1 /*============================================================================
2  * Utilitary functions for sparse matrixes.
3  *============================================================================*/
4 
5 /*
6   This file is part of Code_Saturne, a general-purpose CFD tool.
7 
8   Copyright (C) 1998-2021 EDF S.A.
9 
10   This program is free software; you can redistribute it and/or modify it under
11   the terms of the GNU General Public License as published by the Free Software
12   Foundation; either version 2 of the License, or (at your option) any later
13   version.
14 
15   This program is distributed in the hope that it will be useful, but WITHOUT
16   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
18   details.
19 
20   You should have received a copy of the GNU General Public License along with
21   this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22   Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 */
24 
25 /*----------------------------------------------------------------------------*/
26 
27 #include "cs_defs.h"
28 
29 /*----------------------------------------------------------------------------
30  * Standard C library headers
31  *----------------------------------------------------------------------------*/
32 
33 #include <stdarg.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38 #include <math.h>
39 
40 #if defined(HAVE_MPI)
41 #include <mpi.h>
42 #endif
43 
44 /*----------------------------------------------------------------------------
45  * Local headers
46  *----------------------------------------------------------------------------*/
47 
48 #include "bft_mem.h"
49 #include "bft_error.h"
50 #include "bft_printf.h"
51 
52 #include "fvm_io_num.h"
53 
54 #include "cs_base.h"
55 #include "cs_blas.h"
56 #include "cs_file.h"
57 #include "cs_halo.h"
58 #include "cs_log.h"
59 #include "cs_numbering.h"
60 #include "cs_order.h"
61 #include "cs_parall.h"
62 #include "cs_part_to_block.h"
63 #include "cs_prototypes.h"
64 #include "cs_timer.h"
65 
66 /*----------------------------------------------------------------------------
67  *  Header for the current file
68  *----------------------------------------------------------------------------*/
69 
70 #include "cs_matrix_util.h"
71 
72 #include "cs_matrix_priv.h"
73 
74 /*----------------------------------------------------------------------------*/
75 
76 BEGIN_C_DECLS
77 
78 /*! \cond DOXYGEN_SHOULD_SKIP_THIS */
79 
80 /*=============================================================================
81  * Local Macro Definitions
82  *============================================================================*/
83 
84 /*=============================================================================
85  * Local Type Definitions
86  *============================================================================*/
87 
88 /*============================================================================
89  *  Global variables
90  *============================================================================*/
91 
92 /*============================================================================
93  * Private function definitions
94  *============================================================================*/
95 
96 /*----------------------------------------------------------------------------
97  * y[i] = abs(da[i]), with da possibly NULL.
98  *
99  * parameters:
100  *   da         <-- Pointer to coefficients array (usually matrix diagonal)
101  *   dd         --> Resulting vector
102  *   n_rows     <-- Number of rows
103  *   n_cols_ext <-- Number of columns + ghost columns
104 *----------------------------------------------------------------------------*/
105 
106 static void
_diag_dom_diag_contrib(const cs_real_t * restrict da,cs_real_t * restrict dd,cs_lnum_t n_rows,cs_lnum_t n_cols_ext)107 _diag_dom_diag_contrib(const cs_real_t  *restrict da,
108                        cs_real_t        *restrict dd,
109                        cs_lnum_t         n_rows,
110                        cs_lnum_t         n_cols_ext)
111 {
112   cs_lnum_t  ii;
113 
114   if (da != NULL) {
115 #   pragma omp parallel for
116     for (ii = 0; ii < n_rows; ii++)
117       dd[ii] = fabs(da[ii]);
118     for (ii = n_rows; ii < n_cols_ext; ii++)
119       dd[ii] = 0.0;
120   }
121   else {
122 #   pragma omp parallel for
123     for (ii = 0; ii < n_cols_ext; ii++)
124       dd[ii] = 0.0;
125   }
126 
127 }
128 
129 /*----------------------------------------------------------------------------
130  * Block diagonal contribution to diagonal dominance, with da possibly NULL.
131  *
132  * parameters:
133  *   da         <-- Pointer to coefficients array (usually matrix diagonal)
134  *   dd         --> Resulting vector
135  *   n_rows     <-- Number of rows
136  *   n_cols_ext <-- Number of colmuns + ghost columns
137  *   b_size     <-- block size, including padding:
138  *                  b_size[0]: useful block size
139  *                  b_size[1]: vector block extents
140  *                  b_size[2]: matrix line extents
141  *                  b_size[3]: matrix line*column (block) extents
142  *----------------------------------------------------------------------------*/
143 
144 static void
_b_diag_dom_diag_contrib(const cs_real_t * restrict da,cs_real_t * restrict dd,cs_lnum_t n_rows,cs_lnum_t n_cols_ext,const cs_lnum_t b_size[4])145 _b_diag_dom_diag_contrib(const cs_real_t  *restrict da,
146                          cs_real_t        *restrict dd,
147                          cs_lnum_t         n_rows,
148                          cs_lnum_t         n_cols_ext,
149                          const cs_lnum_t   b_size[4])
150 {
151   cs_lnum_t  ii, jj, kk;
152   double  sign;
153   const cs_lnum_t  dd_size = n_cols_ext*b_size[1];
154 
155 # pragma omp parallel for
156   for (ii = 0; ii < dd_size; ii++)
157     dd[ii] = 0.0;
158 
159   if (da != NULL) {
160 #   pragma omp parallel for private(jj, kk, sign)
161     for (ii = 0; ii < n_rows; ii++) {
162       for (jj = 0; jj < b_size[1]; jj++)
163         dd[ii*b_size[1] + jj] = 0.0;
164       for (jj = 0; jj < b_size[0]; jj++) {
165         for (kk = 0; kk < b_size[0]; kk++) {
166           sign = (jj == kk) ? 1. : -1.;
167           dd[ii*b_size[1] + kk]
168             += sign*fabs(da[ii*b_size[3] + jj*b_size[2] + kk]);
169         }
170       }
171     }
172   }
173 }
174 
175 /*----------------------------------------------------------------------------
176  * Normalize diagonal dominance, with da possibly NULL.
177  *
178  * parameters:
179  *   da         <-- Pointer to coefficients array (usually matrix diagonal)
180  *   dd         --> Resulting vector
181  *   n_rows     <-- Number of rows
182 *----------------------------------------------------------------------------*/
183 
184 static void
_diag_dom_diag_normalize(const cs_real_t * restrict da,cs_real_t * restrict dd,cs_lnum_t n_rows)185 _diag_dom_diag_normalize(const cs_real_t  *restrict da,
186                          cs_real_t        *restrict dd,
187                          cs_lnum_t         n_rows)
188 {
189   cs_lnum_t  ii;
190 
191   if (da != NULL) {
192 #   pragma omp parallel for
193     for (ii = 0; ii < n_rows; ii++) {
194       if (fabs(da[ii]) > 1.e-18)
195         dd[ii] /= fabs(da[ii]);
196       else if (dd[ii] > -1.e-18)
197         dd[ii] = -1.e18;
198       else
199         dd[ii] = 0;
200     }
201   }
202   else {
203 #   pragma omp parallel for
204     for (ii = 0; ii < n_rows; ii++) {
205       if (dd[ii] > -1.e-18)
206         dd[ii] = -1.e18;
207       else
208         dd[ii] = 0;
209     }
210   }
211 }
212 
213 /*----------------------------------------------------------------------------
214  * Normalize block diagonal dominance, with da possibly NULL.
215  *
216  * parameters:
217  *   da      <-- Pointer to coefficients array (usually matrix diagonal)
218  *   dd      --> Resulting vector
219  *   n_rows  <-- Number of rows
220  *   b_size  <-- block size, including padding:
221  *               b_size[0]: useful block size
222  *               b_size[1]: vector block extents
223  *               b_size[2]: matrix line extents
224  *               b_size[3]: matrix line*column (block) extents
225  *----------------------------------------------------------------------------*/
226 
227 static void
_b_diag_dom_diag_normalize(const cs_real_t * restrict da,cs_real_t * restrict dd,cs_lnum_t n_rows,const cs_lnum_t b_size[4])228 _b_diag_dom_diag_normalize(const cs_real_t  *restrict da,
229                            cs_real_t        *restrict dd,
230                            cs_lnum_t         n_rows,
231                            const cs_lnum_t   b_size[4])
232 {
233   cs_lnum_t  ii, jj;
234   double  d_val;
235 
236   if (da != NULL) {
237 #   pragma omp parallel for private(jj, d_val)
238     for (ii = 0; ii < n_rows; ii++) {
239       for (jj = 0; jj < b_size[0]; jj++) {
240         d_val = fabs(da[ii*b_size[3] + jj*b_size[2] + jj]);
241         if (d_val > 1.e-18)
242           dd[ii*b_size[1] + jj] /= d_val;
243         else if (dd[ii*b_size[1] + jj] > -1.e-18)
244           dd[ii*b_size[1] + jj] = -1.e18;
245         else
246           dd[ii*b_size[1] + jj] = 0;
247       }
248     }
249   }
250 }
251 
252 /*----------------------------------------------------------------------------
253  * Measure Diagonal dominance of native matrix.
254  *
255  * parameters:
256  *   matrix <-- Pointer to matrix structure
257  *   dd     --> Resulting vector
258  *----------------------------------------------------------------------------*/
259 
260 static void
_diag_dom_native(const cs_matrix_t * matrix,cs_real_t * restrict dd)261 _diag_dom_native(const cs_matrix_t  *matrix,
262                  cs_real_t          *restrict dd)
263 {
264   cs_lnum_t  ii, jj, edge_id;
265 
266   const cs_matrix_struct_native_t  *ms = matrix->structure;
267   const cs_matrix_coeff_native_t  *mc = matrix->coeffs;
268 
269   const cs_real_t  *restrict xa = mc->xa;
270 
271   /* diagonal contribution */
272 
273   _diag_dom_diag_contrib(mc->da, dd, ms->n_rows, ms->n_cols_ext);
274 
275   /* non-diagonal terms */
276 
277   if (mc->xa != NULL) {
278 
279     const cs_lnum_2_t *restrict face_cel_p = ms->edges;
280 
281     if (mc->symmetric) {
282 
283       for (edge_id = 0; edge_id < ms->n_edges; edge_id++) {
284         ii = face_cel_p[edge_id][0];
285         jj = face_cel_p[edge_id][1];
286         dd[ii] -= fabs(xa[edge_id]);
287         dd[jj] -= fabs(xa[edge_id]);
288       }
289 
290     }
291     else {
292 
293       for (edge_id = 0; edge_id < ms->n_edges; edge_id++) {
294         ii = face_cel_p[edge_id][0];
295         jj = face_cel_p[edge_id][1];
296         dd[ii] -= fabs(xa[2*edge_id]);
297         dd[jj] -= fabs(xa[2*edge_id + 1]);
298       }
299 
300     }
301 
302   }
303 
304   _diag_dom_diag_normalize(mc->da, dd, ms->n_rows);
305 }
306 
307 /*----------------------------------------------------------------------------
308  * Measure Diagonal dominance of native matrix with block diagonal.
309  *
310  * parameters:
311  *   matrix <-- Pointer to matrix structure
312  *   dd     --> Resulting vector
313  *----------------------------------------------------------------------------*/
314 
315 static void
_b_diag_dom_native(const cs_matrix_t * matrix,cs_real_t * restrict dd)316 _b_diag_dom_native(const cs_matrix_t  *matrix,
317                    cs_real_t          *restrict dd)
318 {
319   cs_lnum_t  ii, jj, kk, edge_id;
320 
321   const cs_matrix_struct_native_t  *ms = matrix->structure;
322   const cs_matrix_coeff_native_t  *mc = matrix->coeffs;
323 
324   const cs_real_t  *restrict xa = mc->xa;
325   const cs_lnum_t *db_size = matrix->db_size;
326 
327   /* block diagonal contribution */
328 
329   _b_diag_dom_diag_contrib(mc->da, dd, ms->n_rows, ms->n_cols_ext, db_size);
330 
331   /* non-diagonal terms */
332 
333   if (mc->xa != NULL) {
334 
335     const cs_lnum_2_t *restrict face_cel_p = ms->edges;
336 
337     if (mc->symmetric) {
338 
339       for (edge_id = 0; edge_id < ms->n_edges; edge_id++) {
340         ii = face_cel_p[edge_id][0];
341         jj = face_cel_p[edge_id][1];
342         for (kk = 0; kk < db_size[0]; kk++) {
343           dd[ii*db_size[1] + kk] -= fabs(xa[edge_id]);
344           dd[jj*db_size[1] + kk] -= fabs(xa[edge_id]);
345         }
346       }
347     }
348     else {
349 
350       for (edge_id = 0; edge_id < ms->n_edges; edge_id++) {
351         ii = face_cel_p[edge_id][0];
352         jj = face_cel_p[edge_id][1];
353         for (kk = 0; kk < db_size[0]; kk++) {
354           dd[ii*db_size[1] + kk] -= fabs(xa[2*edge_id]);
355           dd[jj*db_size[1] + kk] -= fabs(xa[2*edge_id + 1]);
356         }
357       }
358 
359     }
360 
361   }
362 
363   _b_diag_dom_diag_normalize(mc->da, dd, ms->n_rows, db_size);
364 }
365 
366 /*----------------------------------------------------------------------------
367  * Measure Diagonal dominance of native block matrix.
368  *
369  * parameters:
370  *   matrix <-- Pointer to matrix structure
371  *   dd     --> Resulting vector
372  *----------------------------------------------------------------------------*/
373 
374 static void
_bb_diag_dom_native(const cs_matrix_t * matrix,cs_real_t * restrict dd)375 _bb_diag_dom_native(const cs_matrix_t  *matrix,
376                     cs_real_t          *restrict dd)
377 {
378   cs_lnum_t  ii, jj, kk, ll, edge_id;
379 
380   const cs_matrix_struct_native_t  *ms = matrix->structure;
381   const cs_matrix_coeff_native_t  *mc = matrix->coeffs;
382 
383   const cs_real_t  *restrict xa = mc->xa;
384   const cs_lnum_t *db_size = matrix->db_size;
385   const cs_lnum_t *eb_size = matrix->eb_size;
386 
387   /* block diagonal contribution */
388 
389   _b_diag_dom_diag_contrib(mc->da, dd, ms->n_rows, ms->n_cols_ext, db_size);
390 
391   /* non-diagonal terms */
392 
393   if (mc->xa != NULL) {
394 
395     const cs_lnum_2_t *restrict face_cel_p = ms->edges;
396 
397     if (mc->symmetric) {
398 
399       for (edge_id = 0; edge_id < ms->n_edges; edge_id++) {
400         ii = face_cel_p[edge_id][0];
401         jj = face_cel_p[edge_id][1];
402         for (kk = 0; kk < eb_size[0]; kk++) {
403           for (ll = 0; ll < eb_size[0]; ll++) {
404             cs_lnum_t si = edge_id*eb_size[3] + kk*eb_size[2] + ll;
405             dd[ii*db_size[1] + kk] -= fabs(xa[si]);
406             dd[jj*db_size[1] + kk] -= fabs(xa[si]);
407           }
408         }
409       }
410     }
411     else {
412 
413       for (edge_id = 0; edge_id < ms->n_edges; edge_id++) {
414         ii = face_cel_p[edge_id][0];
415         jj = face_cel_p[edge_id][1];
416         for (kk = 0; kk < db_size[0]; kk++) {
417           for (ll = 0; ll < eb_size[0]; ll++) {
418             cs_lnum_t si0 = 2*edge_id*eb_size[3] + kk*eb_size[2] + ll;
419             cs_lnum_t si1 = (2*edge_id+1)*eb_size[3] + kk*eb_size[2] + ll;
420             dd[ii*db_size[1] + kk] -= fabs(xa[si0]);
421             dd[jj*db_size[1] + kk] -= fabs(xa[si1]);
422           }
423         }
424       }
425 
426     }
427 
428   }
429 
430   _b_diag_dom_diag_normalize(mc->da, dd, ms->n_rows, db_size);
431 }
432 
433 /*----------------------------------------------------------------------------
434  * Measure Diagonal dominance of CSR matrix.
435  *
436  * parameters:
437  *   matrix <-- Pointer to matrix structure
438  *   dd     --> Resulting vector
439  *----------------------------------------------------------------------------*/
440 
441 static void
_diag_dom_csr(const cs_matrix_t * matrix,cs_real_t * restrict dd)442 _diag_dom_csr(const cs_matrix_t  *matrix,
443               cs_real_t          *restrict dd)
444 {
445   cs_lnum_t  ii, jj, n_cols;
446   double  sii, sign, d_val;
447   const cs_lnum_t  *restrict col_id;
448   const cs_real_t  *restrict m_row;
449 
450   const cs_matrix_struct_csr_t  *ms = matrix->structure;
451   const cs_matrix_coeff_csr_t  *mc = matrix->coeffs;
452   cs_lnum_t  n_rows = ms->n_rows;
453 
454   /* Standard case */
455 
456 # pragma omp parallel for private(jj, col_id, m_row, n_cols, sii, sign, d_val)
457   for (ii = 0; ii < n_rows; ii++) {
458     col_id = ms->col_id + ms->row_index[ii];
459     m_row = mc->val + ms->row_index[ii];
460     n_cols = ms->row_index[ii+1] - ms->row_index[ii];
461     sii = 0.0;
462     d_val = 0.0;
463     for (jj = 0; jj < n_cols; jj++) {
464       if (col_id[jj] == ii) {
465         sign = 1.;
466         d_val = fabs(m_row[jj]);
467       }
468       else
469         sign = -1.;
470       sii += sign * fabs(m_row[jj]);
471     }
472     if (d_val > 1.e-18)
473       sii /= d_val;
474     else if (sii > -1.e-18)
475       sii = -1.e18;
476     else
477       sii = 0;
478     dd[ii] = sii;
479   }
480 }
481 
482 /*----------------------------------------------------------------------------
483  * Measure Diagonal dominance of MSR matrix.
484  *
485  * parameters:
486  *   matrix <-- Pointer to matrix structure
487  *   dd     --> Resulting vector
488  *----------------------------------------------------------------------------*/
489 
490 static void
_diag_dom_msr(const cs_matrix_t * matrix,cs_real_t * restrict dd)491 _diag_dom_msr(const cs_matrix_t  *matrix,
492               cs_real_t          *restrict dd)
493 {
494   cs_lnum_t  ii, jj, n_cols;
495   cs_real_t  sii;
496   const cs_real_t  *restrict m_row;
497 
498   const cs_matrix_struct_csr_t  *ms = matrix->structure;
499   const cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
500   cs_lnum_t  n_rows = ms->n_rows;
501 
502   /* diagonal contribution */
503 
504   _diag_dom_diag_contrib(mc->d_val, dd, ms->n_rows, ms->n_cols_ext);
505 
506   /* extra-diagonal contribution */
507 
508   if (mc->x_val != NULL) {
509 
510 #   pragma omp parallel for private(jj, m_row, n_cols, sii)
511     for (ii = 0; ii < n_rows; ii++) {
512       m_row = mc->x_val + ms->row_index[ii];
513       n_cols = ms->row_index[ii+1] - ms->row_index[ii];
514       sii = 0.0;
515       for (jj = 0; jj < n_cols; jj++)
516         sii -= fabs(m_row[jj]);
517       dd[ii] += sii;
518     }
519 
520   }
521 
522   _diag_dom_diag_normalize(mc->d_val, dd, n_rows);
523 }
524 
525 /*----------------------------------------------------------------------------
526  * Measure Diagonal dominance of MSR matrix, blocked version.
527  *
528  * parameters:
529  *   matrix <-- Pointer to matrix structure
530  *   dd     --> Resulting vector
531  *----------------------------------------------------------------------------*/
532 
533 static void
_b_diag_dom_msr(const cs_matrix_t * matrix,cs_real_t * restrict dd)534 _b_diag_dom_msr(const cs_matrix_t  *matrix,
535                 cs_real_t          *restrict dd)
536 {
537   cs_lnum_t  ii, jj, kk, n_cols;
538   const cs_real_t  *restrict m_row;
539 
540   const cs_matrix_struct_csr_t  *ms = matrix->structure;
541   const cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
542   const cs_lnum_t *db_size = matrix->db_size;
543   const cs_lnum_t  n_rows = ms->n_rows;
544 
545   /* diagonal contribution */
546 
547   _b_diag_dom_diag_contrib(mc->d_val, dd, ms->n_rows, ms->n_cols_ext, db_size);
548 
549   /* extra-diagonal contribution */
550 
551   if (mc->x_val != NULL) {
552 
553 #   pragma omp parallel for private(jj, kk, m_row, n_cols)
554     for (ii = 0; ii < n_rows; ii++) {
555       m_row = mc->x_val + ms->row_index[ii];
556       n_cols = ms->row_index[ii+1] - ms->row_index[ii];
557       for (jj = 0; jj < n_cols; jj++) {
558         for (kk = 0; kk < db_size[0]; kk++)
559           dd[ii*db_size[1] + kk] -= fabs(m_row[jj]);
560       }
561     }
562 
563   }
564 
565   _b_diag_dom_diag_normalize(mc->d_val, dd, ms->n_rows, db_size);
566 }
567 
568 /*----------------------------------------------------------------------------
569  * Diagonal contribution to matrix dump.
570  *
571  * parameters:
572  *   da         <-- Pointer to coefficients array (usually matrix diagonal)
573  *   m_coo      <-- Matrix coefficient coordinates array
574  *   m_val      <-- Matrix coefficient values array
575  *   g_coo_num  <-- Global coordinate numbers
576  *   n_rows     <-- Number of rows
577 *----------------------------------------------------------------------------*/
578 
579 static void
_pre_dump_diag_contrib(const cs_real_t * restrict da,cs_gnum_t * restrict m_coo,cs_real_t * restrict m_val,const cs_gnum_t * restrict g_coo_num,cs_lnum_t n_rows)580 _pre_dump_diag_contrib(const cs_real_t  *restrict da,
581                        cs_gnum_t        *restrict m_coo,
582                        cs_real_t        *restrict m_val,
583                        const cs_gnum_t  *restrict g_coo_num,
584                        cs_lnum_t         n_rows)
585 {
586   cs_lnum_t  ii;
587 
588   if (da != NULL) {
589 #   pragma omp parallel for
590     for (ii = 0; ii < n_rows; ii++) {
591       m_coo[ii*2] = g_coo_num[ii];
592       m_coo[ii*2 + 1] = g_coo_num[ii];
593       m_val[ii] = da[ii];
594     }
595   }
596   else {
597 #   pragma omp parallel for
598     for (ii = 0; ii < n_rows; ii++) {
599       m_coo[ii*2] = g_coo_num[ii];
600       m_coo[ii*2 + 1] = g_coo_num[ii];
601       m_val[ii] = 0.0;
602     }
603   }
604 }
605 
606 /*----------------------------------------------------------------------------
607  * Block diagonal contribution to matrix dump.
608  *
609  * parameters:
610  *   da        <-- Pointer to coefficients array (usually matrix diagonal)
611  *   m_coo     <-- Matrix coefficient coordinates array
612  *   m_val     <-- Matrix coefficient values array
613  *   g_coo_num <-- Global coordinate numbers
614  *   n_rows    <-- Number of rows
615  *   b_size    <-- block size, including padding:
616  *                 b_size[0]: useful block size
617  *                 b_size[1]: vector block extents
618  *                 b_size[2]: matrix line extents
619  *                 b_size[3]: matrix line*column (block) extents
620  *----------------------------------------------------------------------------*/
621 
622 static void
_b_pre_dump_diag_contrib(const cs_real_t * restrict da,cs_gnum_t * restrict m_coo,cs_real_t * restrict m_val,const cs_gnum_t * restrict g_coo_num,cs_lnum_t n_rows,const cs_lnum_t b_size[4])623 _b_pre_dump_diag_contrib(const cs_real_t  *restrict da,
624                          cs_gnum_t        *restrict m_coo,
625                          cs_real_t        *restrict m_val,
626                          const cs_gnum_t  *restrict g_coo_num,
627                          cs_lnum_t         n_rows,
628                          const cs_lnum_t   b_size[4])
629 {
630   cs_lnum_t  ii, jj, kk;
631   cs_lnum_t  db_size[2] = {b_size[0], b_size[0]*b_size[0]};
632 
633   if (da != NULL) {
634 #   pragma omp parallel for private(jj, kk)
635     for (ii = 0; ii < n_rows; ii++) {
636       for (jj = 0; jj < b_size[0]; jj++) {
637         for (kk = 0; kk < b_size[0]; kk++) {
638           m_coo[(ii*db_size[1] + jj*db_size[0] + kk)*2]
639             = g_coo_num[ii]*b_size[0] + jj;
640           m_coo[(ii*db_size[1] + jj*db_size[0] + kk)*2 + 1]
641             = g_coo_num[ii]*b_size[0] + kk;
642           m_val[ii*db_size[1] + jj*db_size[0] + kk]
643             = da[ii*b_size[3] + jj*b_size[2] + kk];
644         }
645       }
646     }
647   }
648   else {
649 #   pragma omp parallel for private(jj, kk)
650     for (ii = 0; ii < n_rows; ii++) {
651       for (jj = 0; jj < b_size[0]; jj++) {
652         for (kk = 0; kk < b_size[0]; kk++) {
653           m_coo[(ii*db_size[1] + jj*db_size[0] + kk)*2]
654             = g_coo_num[ii]*b_size[0] + jj;
655           m_coo[(ii*db_size[1] + jj*db_size[0] + kk)*2 + 1]
656             = g_coo_num[ii]*b_size[0] + kk;
657           m_val[ii*db_size[1] + jj*db_size[0] + kk] = 0.0;
658         }
659       }
660     }
661   }
662 }
663 
664 /*----------------------------------------------------------------------------
665  * Prepare dump of native matrix.
666  *
667  * parameters:
668  *   matrix    <-- Pointer to matrix structure
669  *   g_coo_num <-- Global coordinate numbers
670  *   m_coo     --> Matrix coefficient coordinates array
671  *   m_val     --> Matrix coefficient values array
672  *
673  * returns:
674  *   number of matrix entries
675  *----------------------------------------------------------------------------*/
676 
677 static cs_lnum_t
_pre_dump_native(const cs_matrix_t * matrix,const cs_gnum_t * g_coo_num,cs_gnum_t ** m_coo,cs_real_t ** m_val)678 _pre_dump_native(const cs_matrix_t   *matrix,
679                  const cs_gnum_t     *g_coo_num,
680                  cs_gnum_t          **m_coo,
681                  cs_real_t          **m_val)
682 {
683   cs_lnum_t  ii, jj, edge_id, dump_id;
684 
685   cs_gnum_t   *restrict _m_coo;
686   cs_real_t   *restrict _m_val;
687 
688   const cs_matrix_struct_native_t  *ms = matrix->structure;
689   const cs_matrix_coeff_native_t  *mc = matrix->coeffs;
690 
691   const cs_real_t  *restrict xa = mc->xa;
692 
693   cs_lnum_t  n_entries = ms->n_rows + ms->n_edges*2;
694 
695   /* Allocate arrays */
696 
697   BFT_MALLOC(_m_coo, n_entries*2, cs_gnum_t);
698   BFT_MALLOC(_m_val, n_entries, double);
699 
700   *m_coo = _m_coo;
701   *m_val = _m_val;
702 
703   /* diagonal contribution */
704 
705   _pre_dump_diag_contrib(mc->da, _m_coo, _m_val, g_coo_num, ms->n_rows);
706 
707   /* non-diagonal terms */
708 
709   dump_id = ms->n_rows;
710 
711   if (mc->xa != NULL) {
712 
713     const cs_lnum_2_t *restrict face_cel_p
714       = (const cs_lnum_2_t *restrict)(ms->edges);
715 
716     if (mc->symmetric) {
717 
718       for (edge_id = 0; edge_id < ms->n_edges; edge_id++) {
719         ii = face_cel_p[edge_id][0];
720         jj = face_cel_p[edge_id][1];
721         _m_coo[dump_id*2] = g_coo_num[ii];
722         _m_coo[dump_id*2 + 1] = g_coo_num[jj];
723         _m_val[dump_id] = xa[edge_id];
724         _m_coo[dump_id*2 + 2] = g_coo_num[jj];
725         _m_coo[dump_id*2 + 3] = g_coo_num[ii];
726         _m_val[dump_id + 1] = xa[edge_id];
727         dump_id += 2;
728       }
729 
730     }
731     else {
732 
733       for (edge_id = 0; edge_id < ms->n_edges; edge_id++) {
734         ii = face_cel_p[edge_id][0];
735         jj = face_cel_p[edge_id][1];
736         _m_coo[dump_id*2] = g_coo_num[ii];
737         _m_coo[dump_id*2 + 1] = g_coo_num[jj];
738         _m_val[dump_id] = xa[2*edge_id];
739         _m_coo[dump_id*2 + 2] = g_coo_num[jj];
740         _m_coo[dump_id*2 + 3] = g_coo_num[ii];
741         _m_val[dump_id + 1] = xa[2*edge_id + 1];
742         dump_id += 2;
743       }
744 
745     }
746 
747   }
748 
749   return n_entries;
750 }
751 
752 /*----------------------------------------------------------------------------
753  * Prepare dump of native matrix with block diagonal.
754  *
755  * parameters:
756  *   matrix    <-- Pointer to matrix structure
757  *   g_coo_num <-- Global coordinate numbers
758  *   m_coo     --> Matrix coefficient coordinates array
759  *   m_val     --> Matrix coefficient values array
760  *
761  * returns:
762  *   number of matrix entries
763  *----------------------------------------------------------------------------*/
764 
765 static cs_lnum_t
_b_pre_dump_native(const cs_matrix_t * matrix,const cs_gnum_t * g_coo_num,cs_gnum_t ** m_coo,cs_real_t ** m_val)766 _b_pre_dump_native(const cs_matrix_t  *matrix,
767                    const cs_gnum_t     *g_coo_num,
768                    cs_gnum_t          **m_coo,
769                    cs_real_t          **m_val)
770 {
771   cs_lnum_t  ii, jj, kk, edge_id, dump_id;
772 
773   cs_gnum_t   *restrict _m_coo;
774   cs_real_t   *restrict _m_val;
775 
776   const cs_matrix_struct_native_t  *ms = matrix->structure;
777   const cs_matrix_coeff_native_t  *mc = matrix->coeffs;
778 
779   const cs_real_t  *restrict xa = mc->xa;
780   const cs_lnum_t *db_size = matrix->db_size;
781 
782   cs_lnum_t  n_entries = (ms->n_rows*db_size[0] + ms->n_edges*2) * db_size[0];
783 
784   /* Allocate arrays */
785 
786   BFT_MALLOC(_m_coo, n_entries*2, cs_gnum_t);
787   BFT_MALLOC(_m_val, n_entries, double);
788 
789   *m_coo = _m_coo;
790   *m_val = _m_val;
791 
792   /* block diagonal contribution */
793 
794   _b_pre_dump_diag_contrib(mc->da, _m_coo, _m_val,
795                            g_coo_num, ms->n_rows, db_size);
796 
797   /* non-diagonal terms */
798 
799   dump_id = ms->n_rows*db_size[0]*db_size[0];
800 
801   if (mc->xa != NULL) {
802 
803     const cs_lnum_2_t *restrict face_cel_p
804       = (const cs_lnum_2_t *restrict)(ms->edges);
805 
806     if (mc->symmetric) {
807 
808       for (edge_id = 0; edge_id < ms->n_edges; edge_id++) {
809         ii = face_cel_p[edge_id][0];
810         jj = face_cel_p[edge_id][1];
811         for (kk = 0; kk < db_size[0]; kk++) {
812           _m_coo[dump_id*2] = g_coo_num[ii]*db_size[0] + kk;
813           _m_coo[dump_id*2 + 1] = g_coo_num[jj]*db_size[0] + kk;
814           _m_val[dump_id] = xa[edge_id];
815           _m_coo[dump_id*2 + 2] = g_coo_num[jj]*db_size[0] + kk;
816           _m_coo[dump_id*2 + 3] = g_coo_num[ii]*db_size[0] + kk;
817           _m_val[dump_id + 1] = xa[edge_id];
818           dump_id += 2;
819         }
820       }
821     }
822     else {
823 
824       for (edge_id = 0; edge_id < ms->n_edges; edge_id++) {
825         ii = face_cel_p[edge_id][0];
826         jj = face_cel_p[edge_id][1];
827         for (kk = 0; kk < db_size[0]; kk++) {
828           _m_coo[dump_id*2] = g_coo_num[ii]*db_size[0] + kk;
829           _m_coo[dump_id*2 + 1] = g_coo_num[jj]*db_size[0] + kk;
830           _m_val[dump_id] = xa[edge_id*2];
831           _m_coo[dump_id*2 + 2] = g_coo_num[jj]*db_size[0] + kk;
832           _m_coo[dump_id*2 + 3] = g_coo_num[ii]*db_size[0] + kk;
833           _m_val[dump_id + 1] = xa[edge_id*2 + 1];
834           dump_id += 2;
835         }
836       }
837 
838     }
839 
840   }
841 
842   return n_entries;
843 }
844 
845 /*----------------------------------------------------------------------------
846  * Prepare dump of CSR matrix.
847  *
848  * parameters:
849  *   matrix    <-- Pointer to matrix structure
850  *   g_coo_num <-- Global coordinate numbers
851  *   m_coo     --> Matrix coefficient coordinates array
852  *   m_val     --> Matrix coefficient values array
853  *
854  * returns:
855  *   number of matrix entries
856  *----------------------------------------------------------------------------*/
857 
858 static cs_lnum_t
_pre_dump_csr(const cs_matrix_t * matrix,const cs_gnum_t * g_coo_num,cs_gnum_t ** m_coo,cs_real_t ** m_val)859 _pre_dump_csr(const cs_matrix_t   *matrix,
860               const cs_gnum_t     *g_coo_num,
861               cs_gnum_t          **m_coo,
862               cs_real_t          **m_val)
863 {
864   cs_lnum_t  ii, jj, n_cols, dump_id;
865   const cs_lnum_t  *restrict col_id;
866   const cs_real_t  *restrict m_row;
867 
868   cs_gnum_t   *restrict _m_coo;
869   cs_real_t   *restrict _m_val;
870 
871   const cs_matrix_struct_csr_t  *ms = matrix->structure;
872   const cs_matrix_coeff_csr_t  *mc = matrix->coeffs;
873   cs_lnum_t  dump_id_shift = 0;
874   cs_lnum_t  n_rows = ms->n_rows;
875 
876   cs_lnum_t  n_entries = ms->row_index[n_rows];
877 
878   /* Allocate arrays */
879 
880   BFT_MALLOC(_m_coo, n_entries*2, cs_gnum_t);
881   BFT_MALLOC(_m_val, n_entries, double);
882 
883   *m_coo = _m_coo;
884   *m_val = _m_val;
885 
886   /* Contribution */
887 
888   if (ms->have_diag == false) {
889     assert(0);
890 #   pragma omp parallel for
891     for (ii = 0; ii < n_rows; ii++) {
892       _m_coo[ii*2] = g_coo_num[ii];
893       _m_coo[ii*2+1] = g_coo_num[ii];
894       _m_val[ii] = 0.0;
895     }
896     dump_id_shift = n_rows;
897   }
898 
899 # pragma omp parallel for private(jj, dump_id, col_id, m_row, n_cols)
900   for (ii = 0; ii < n_rows; ii++) {
901     col_id = ms->col_id + ms->row_index[ii];
902     m_row = mc->val + ms->row_index[ii];
903     n_cols = ms->row_index[ii+1] - ms->row_index[ii];
904     for (jj = 0; jj < n_cols; jj++) {
905       dump_id = ms->row_index[ii] + jj + dump_id_shift;
906       _m_coo[dump_id*2] = g_coo_num[ii];
907       _m_coo[dump_id*2+1] = g_coo_num[col_id[jj]];
908       _m_val[dump_id] = m_row[jj];
909     }
910   }
911 
912   return n_entries;
913 }
914 
915 /*----------------------------------------------------------------------------
916  * Prepare dump of MSR matrix.
917  *
918  * parameters:
919  *   matrix    <-- Pointer to matrix structure
920  *   g_coo_num <-- Global coordinate numbers
921  *   m_coo     --> Matrix coefficient coordinates array
922  *   m_val     --> Matrix coefficient values array
923  *
924  * returns:
925  *   number of matrix entries
926  *----------------------------------------------------------------------------*/
927 
928 static cs_lnum_t
_pre_dump_msr(const cs_matrix_t * matrix,const cs_gnum_t * g_coo_num,cs_gnum_t ** m_coo,cs_real_t ** m_val)929 _pre_dump_msr(const cs_matrix_t   *matrix,
930               const cs_gnum_t     *g_coo_num,
931               cs_gnum_t          **m_coo,
932               cs_real_t          **m_val)
933 {
934   cs_lnum_t  ii, jj, n_cols, dump_id;
935   const cs_lnum_t  *restrict col_id;
936   const cs_real_t  *restrict m_row;
937 
938   cs_gnum_t   *restrict _m_coo;
939   cs_real_t   *restrict _m_val;
940 
941   const cs_matrix_struct_csr_t  *ms = matrix->structure;
942   const cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
943   const cs_lnum_t  n_rows = ms->n_rows;
944 
945   cs_lnum_t  n_entries = ms->row_index[n_rows] + ms->n_rows;
946 
947   /* Allocate arrays */
948 
949   BFT_MALLOC(_m_coo, n_entries*2, cs_gnum_t);
950   BFT_MALLOC(_m_val, n_entries, double);
951 
952   *m_coo = _m_coo;
953   *m_val = _m_val;
954 
955   /* diagonal contribution */
956 
957   _pre_dump_diag_contrib(mc->d_val, _m_coo, _m_val, g_coo_num, ms->n_rows);
958 
959   /* extra-diagonal contribution */
960 
961   if (mc->x_val != NULL) {
962 #   pragma omp parallel for private(jj, dump_id, col_id, m_row, n_cols)
963     for (ii = 0; ii < n_rows; ii++) {
964       col_id = ms->col_id + ms->row_index[ii];
965       m_row = mc->x_val + ms->row_index[ii];
966       n_cols = ms->row_index[ii+1] - ms->row_index[ii];
967       for (jj = 0; jj < n_cols; jj++) {
968         dump_id = ms->row_index[ii] + jj + ms->n_rows;
969         _m_coo[dump_id*2] = g_coo_num[ii];
970         _m_coo[dump_id*2+1] = g_coo_num[col_id[jj]];
971         _m_val[dump_id] = m_row[jj];
972       }
973     }
974   }
975   else {
976 #   pragma omp parallel for private(jj, dump_id, col_id, n_cols)
977     for (ii = 0; ii < n_rows; ii++) {
978       col_id = ms->col_id + ms->row_index[ii];
979       n_cols = ms->row_index[ii+1] - ms->row_index[ii];
980       for (jj = 0; jj < n_cols; jj++) {
981         dump_id = ms->row_index[ii] + jj + ms->n_rows;
982         _m_coo[dump_id*2] = g_coo_num[ii];
983         _m_coo[dump_id*2+1] = g_coo_num[col_id[jj]];
984         _m_val[dump_id] = 0.0;
985       }
986     }
987   }
988 
989   return n_entries;
990 }
991 
992 /*----------------------------------------------------------------------------
993  * Prepare dump of MSR matrix, blocked version.
994  *
995  * parameters:
996  *   matrix    <-- Pointer to matrix structure
997  *   g_coo_num <-- Global coordinate numbers
998  *   m_coo     --> Matrix coefficient coordinates array
999  *   m_val     --> Matrix coefficient values array
1000  *
1001  * returns:
1002  *   number of matrix entries
1003  *----------------------------------------------------------------------------*/
1004 
1005 static cs_lnum_t
_b_pre_dump_msr(const cs_matrix_t * matrix,const cs_gnum_t * g_coo_num,cs_gnum_t ** m_coo,cs_real_t ** m_val)1006 _b_pre_dump_msr(const cs_matrix_t   *matrix,
1007                 const cs_gnum_t     *g_coo_num,
1008                 cs_gnum_t          **m_coo,
1009                 cs_real_t          **m_val)
1010 {
1011   cs_lnum_t  ii, jj, kk, n_cols, dump_id;
1012   const cs_lnum_t  *restrict col_id;
1013   const cs_real_t  *restrict m_row;
1014 
1015   cs_gnum_t   *restrict _m_coo;
1016   cs_real_t   *restrict _m_val;
1017 
1018   const cs_matrix_struct_csr_t  *ms = matrix->structure;
1019   const cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
1020   const cs_lnum_t  *db_size = matrix->db_size;
1021   const cs_lnum_t  n_rows = ms->n_rows;
1022   const cs_lnum_t  dump_id_shift = ms->n_rows*db_size[0]*db_size[0];
1023 
1024   cs_lnum_t  n_entries =   ms->row_index[n_rows]*db_size[0]
1025                          + ms->n_rows*db_size[0]*db_size[0];
1026 
1027   /* Allocate arrays */
1028 
1029   BFT_MALLOC(_m_coo, n_entries*2, cs_gnum_t);
1030   BFT_MALLOC(_m_val, n_entries, double);
1031 
1032   *m_coo = _m_coo;
1033   *m_val = _m_val;
1034 
1035   /* diagonal contribution */
1036 
1037   _b_pre_dump_diag_contrib(mc->d_val, _m_coo, _m_val,
1038                            g_coo_num, ms->n_rows, db_size);
1039 
1040 
1041   /* extra-diagonal contribution */
1042 
1043   if (mc->x_val != NULL) {
1044 #   pragma omp parallel for private(jj, kk, dump_id, col_id, m_row, n_cols)
1045     for (ii = 0; ii < n_rows; ii++) {
1046       col_id = ms->col_id + ms->row_index[ii];
1047       m_row = mc->x_val + ms->row_index[ii];
1048       n_cols = ms->row_index[ii+1] - ms->row_index[ii];
1049       for (jj = 0; jj < n_cols; jj++) {
1050         for (kk = 0; kk < db_size[0]; kk++) {
1051           dump_id = (ms->row_index[ii] + jj)*db_size[0] + kk + dump_id_shift;
1052           _m_coo[dump_id*2] = g_coo_num[ii]*db_size[0] + kk;
1053           _m_coo[dump_id*2+1] = g_coo_num[col_id[jj]]*db_size[0] + kk;
1054           _m_val[dump_id] = m_row[jj];
1055         }
1056       }
1057     }
1058   }
1059   else {
1060 #   pragma omp parallel for private(jj, kk, dump_id, col_id, n_cols)
1061     for (ii = 0; ii < n_rows; ii++) {
1062       col_id = ms->col_id + ms->row_index[ii];
1063       n_cols = ms->row_index[ii+1] - ms->row_index[ii];
1064       for (jj = 0; jj < n_cols; jj++) {
1065         for (kk = 0; kk < db_size[0]; kk++) {
1066           dump_id = (ms->row_index[ii] + jj)*db_size[0] + kk + dump_id_shift;
1067           _m_coo[dump_id*2] = g_coo_num[ii]*db_size[0] + kk;
1068           _m_coo[dump_id*2+1] = g_coo_num[col_id[jj]]*db_size[0] + kk;
1069           _m_val[dump_id] = 0.0;
1070         }
1071       }
1072     }
1073   }
1074 
1075   return n_entries;
1076 }
1077 
1078 /*----------------------------------------------------------------------------
1079  * Write header for dump of matrix to native file.
1080  *
1081  * 3 characters are used to define (in order):
1082  * - the size of cs_gnum_t (int, long, or long long)
1083  * - the size of floating point values (double, should be size _)
1084  * - 'l' for little-endian files, 'b' for big-endian files.
1085  *
1086  * parameters:
1087  *   f <-- Pointer to file structure
1088  *----------------------------------------------------------------------------*/
1089 
1090 static void
_write_header_simple(cs_file_t * f)1091 _write_header_simple(cs_file_t  *f)
1092 {
1093   unsigned char flags[3] = {(unsigned char) sizeof(cs_gnum_t),
1094                             (unsigned char) sizeof(double),
1095                             'b'};
1096 
1097   /* Check if system is "big-endian" or "little-endian" */
1098   {
1099     unsigned  int_endian = 0;
1100     *((char *)(&int_endian)) = '\1';
1101     if (int_endian == 1)
1102       flags[2] = 'l';
1103   }
1104 
1105   assert(sizeof(cs_gnum_t) < 255);
1106   assert(sizeof(double) < 255); /* should always be 8 */
1107 
1108   cs_file_write_global(f, flags, 1, 3);
1109 }
1110 
1111 /*----------------------------------------------------------------------------
1112  * Locally sort matrix dump data.
1113  *
1114  * parameters:
1115  *   n_entries <-- Number of local matrix entries
1116  *   m_coords  <-> Matrix coefficient coordinates (interlaced)
1117  *   m_vals    <-> Matrix coefficient values
1118  *----------------------------------------------------------------------------*/
1119 
1120 static void
_sort_matrix_dump_data(cs_lnum_t n_entries,cs_gnum_t * m_coords,double * m_vals)1121 _sort_matrix_dump_data(cs_lnum_t   n_entries,
1122                        cs_gnum_t  *m_coords,
1123                        double     *m_vals)
1124 {
1125   cs_lnum_t ii, jj;
1126   cs_gnum_t *_m_coords;
1127   double *_m_vals;
1128 
1129   cs_lnum_t *order = cs_order_gnum_s(NULL, m_coords, 2, n_entries);
1130 
1131   BFT_MALLOC(_m_coords, n_entries*2, cs_gnum_t);
1132 
1133   for (ii = 0; ii < n_entries; ii++) {
1134     jj = order[ii];
1135     _m_coords[ii*2] = m_coords[jj*2];
1136     _m_coords[ii*2+1] = m_coords[jj*2+1];
1137   }
1138   memcpy(m_coords, _m_coords, n_entries*2*sizeof(cs_gnum_t));
1139 
1140   BFT_FREE(_m_coords);
1141 
1142   BFT_MALLOC(_m_vals, n_entries, double);
1143 
1144   for (ii = 0; ii < n_entries; ii++)
1145     _m_vals[ii] = m_vals[order[ii]];
1146 
1147   memcpy(m_vals, _m_vals, n_entries*sizeof(double));
1148 
1149   BFT_FREE(_m_vals);
1150 
1151   BFT_FREE(order);
1152 }
1153 
1154 /*----------------------------------------------------------------------------
1155  * Prepare data for matrix dump.
1156  *
1157  * Build local arrays with global matrix coordinates and coefficients.
1158  *
1159  * parameters:
1160  *   m         <-- Pointer to matrix structure
1161  *   n_entries --> Number of local matrix entries
1162  *   m_coords  --> Matrix coefficient coordinates (interlaced)
1163  *   m_vals    --> Matrix coefficient values
1164  *----------------------------------------------------------------------------*/
1165 
1166 static void
_prepare_matrix_dump_data(const cs_matrix_t * m,cs_lnum_t * n_entries,cs_gnum_t ** m_coords,double ** m_vals)1167 _prepare_matrix_dump_data(const cs_matrix_t   *m,
1168                           cs_lnum_t           *n_entries,
1169                           cs_gnum_t          **m_coords,
1170                           double             **m_vals)
1171 {
1172   cs_lnum_t ii, jj;
1173   cs_lnum_t _n_entries = 0;
1174   cs_gnum_t coo_shift = 1, n_g_rows = 0;
1175 
1176   cs_gnum_t *g_coo_num = NULL;
1177   cs_gnum_t *_m_coords = NULL;
1178   double *_m_vals = NULL;
1179 
1180   BFT_MALLOC(g_coo_num, m->n_cols_ext, cs_gnum_t);
1181 
1182   n_g_rows = m->n_rows;
1183 
1184 #if defined(HAVE_MPI)
1185   if (cs_glob_n_ranks > 1) {
1186     cs_gnum_t loc_shift = m->n_rows;
1187     MPI_Scan(&loc_shift, &coo_shift, 1, CS_MPI_GNUM, MPI_SUM, cs_glob_mpi_comm);
1188     coo_shift = coo_shift + 1 - loc_shift;
1189   }
1190 #endif
1191 
1192   for (ii = 0; ii < m->n_rows; ii++)
1193     g_coo_num[ii] = ii + coo_shift;
1194 
1195   if (m->halo != NULL)
1196     cs_halo_sync_untyped(m->halo,
1197                          CS_HALO_STANDARD,
1198                          sizeof(cs_gnum_t),
1199                          g_coo_num);
1200 
1201   /* Populate arrays based on matrix type */
1202 
1203   switch(m->type) {
1204   case CS_MATRIX_NATIVE:
1205     if (m->db_size[3] == 1)
1206       _n_entries = _pre_dump_native(m, g_coo_num, &_m_coords, &_m_vals);
1207     else
1208       _n_entries = _b_pre_dump_native(m, g_coo_num, &_m_coords, &_m_vals);
1209     break;
1210   case CS_MATRIX_CSR:
1211     assert(m->db_size[3] == 1);
1212     _n_entries = _pre_dump_csr(m, g_coo_num, &_m_coords, &_m_vals);
1213     break;
1214   case CS_MATRIX_MSR:
1215     if (m->db_size[3] == 1)
1216       _n_entries = _pre_dump_msr(m, g_coo_num, &_m_coords, &_m_vals);
1217     else
1218       _n_entries = _b_pre_dump_msr(m, g_coo_num, &_m_coords, &_m_vals);
1219     break;
1220   default:
1221     bft_error(__FILE__, __LINE__, 0,
1222               _("Dump of matrixes in %s format\n"
1223                 "is not operational yet."),
1224               _(cs_matrix_get_type_name(m)));
1225     break;
1226   }
1227 
1228   BFT_FREE(g_coo_num);
1229 
1230   /* Sort values */
1231 
1232   _sort_matrix_dump_data(_n_entries, _m_coords, _m_vals);
1233 
1234   /* Remove references to extra periodicity ghost values if present */
1235 
1236   for (ii = 0, jj = 0; ii < _n_entries; ii++) {
1237     if (_m_coords[ii*2] <= n_g_rows && _m_coords[ii*2+1] <= n_g_rows) {
1238       _m_coords[jj*2] = _m_coords[ii*2];
1239       _m_coords[jj*2+1] = _m_coords[ii*2+1];
1240       _m_vals[jj] = _m_vals[ii];
1241       jj += 1;
1242     }
1243   }
1244   _n_entries = jj;
1245 
1246   /* Return arrays */
1247 
1248   *n_entries = _n_entries;
1249   *m_coords = _m_coords;
1250   *m_vals = _m_vals;
1251 }
1252 
1253 #if defined(HAVE_MPI)
1254 
1255 /*----------------------------------------------------------------------------
1256  * Write matrix data to file in parallel mode.
1257  *
1258  * Arrays written are freed by this function.
1259  *
1260  * The full matrix is written, whether symmetric or not. Row and column
1261  * coordinats are 1-to-n based.
1262  *
1263  * File format:
1264  *   - number of matrix entries (1 integer of type cs_gnum_t).
1265  *   - array of row coordinates (n_entries integers of type cs_gnum_t)
1266  *   - array of column coordinates (n_entries integers of type cs_gnum_t)
1267  *   - array of matrix coefficients (n_entries values of type double)
1268  *
1269  * parameters:
1270  *   m <-- Pointer to matrix structure
1271  *   f <-> associated file pointer
1272  *----------------------------------------------------------------------------*/
1273 
1274 static void
_write_matrix_g(const cs_matrix_t * m,cs_file_t * f)1275 _write_matrix_g(const cs_matrix_t  *m,
1276                 cs_file_t          *f)
1277 {
1278   int  block_rank_step = 1, min_block_size = 0;
1279   cs_lnum_t  block_size = 0;
1280   cs_gnum_t  n_glob_ents = 0;
1281 
1282   cs_gnum_t  *b_coords = NULL, *c_coords = NULL, *r_coords = NULL;
1283   double  *b_vals = NULL;
1284 
1285   cs_block_dist_info_t bi;
1286 
1287   fvm_io_num_t *io_num = NULL;
1288   cs_part_to_block_t *d = NULL;
1289 
1290   cs_lnum_t  n_entries = 0;
1291   cs_gnum_t  *m_coords = NULL;
1292   double  *m_vals = NULL;
1293 
1294   const cs_datatype_t gnum_type
1295     = (sizeof(cs_gnum_t) == 8) ? CS_UINT64 : CS_UINT32;
1296 
1297   /* Initialization for matrix coefficients */
1298 
1299   _prepare_matrix_dump_data(m, &n_entries, &m_coords, &m_vals);
1300 
1301   /* Initialization for redistribution */
1302 
1303   io_num = fvm_io_num_create_from_adj_s(NULL, m_coords, n_entries, 2);
1304 
1305   n_glob_ents = fvm_io_num_get_global_count(io_num);
1306 
1307   cs_file_get_default_comm(&block_rank_step, NULL, NULL);
1308 
1309   bi = cs_block_dist_compute_sizes(cs_glob_rank_id,
1310                                    cs_glob_n_ranks,
1311                                    block_rank_step,
1312                                    min_block_size/2,
1313                                    n_glob_ents);
1314 
1315   d = cs_part_to_block_create_by_gnum(cs_glob_mpi_comm,
1316                                       bi,
1317                                       n_entries,
1318                                       fvm_io_num_get_global_num(io_num));
1319 
1320   /* Write number of entries */
1321 
1322   cs_file_write_global(f, &n_glob_ents, sizeof(cs_gnum_t), 1);
1323 
1324   /* Distribute to blocks */
1325 
1326   block_size = (bi.gnum_range[1] - bi.gnum_range[0]);
1327 
1328   /* Distribute coordinate blocks on ranks */
1329 
1330   if (block_size > 0)
1331     BFT_MALLOC(b_coords, block_size*2, cs_gnum_t);
1332 
1333   cs_part_to_block_copy_array(d, gnum_type, 2, m_coords, b_coords);
1334 
1335   BFT_FREE(m_coords);
1336 
1337   /* De-interlace coordinates */
1338 
1339   if (block_size > 0) {
1340    cs_lnum_t  ii;
1341    BFT_MALLOC(r_coords, block_size, cs_gnum_t);
1342    BFT_MALLOC(c_coords, block_size, cs_gnum_t);
1343    for (ii = 0; ii < block_size; ii++) {
1344      r_coords[ii] = b_coords[ii*2];
1345      c_coords[ii] = b_coords[ii*2+1];
1346    }
1347    BFT_FREE(b_coords);
1348   }
1349 
1350   /* Write coordinate blocks */
1351 
1352   cs_file_write_block_buffer(f,
1353                              r_coords,
1354                              sizeof(cs_gnum_t),
1355                              1,
1356                              bi.gnum_range[0],
1357                              bi.gnum_range[1]);
1358 
1359   cs_file_write_block_buffer(f,
1360                              c_coords,
1361                              sizeof(cs_gnum_t),
1362                              1,
1363                              bi.gnum_range[0],
1364                              bi.gnum_range[1]);
1365 
1366   BFT_FREE(c_coords);
1367   BFT_FREE(r_coords);
1368 
1369   /* Distribute value blocks on ranks */
1370 
1371   if (block_size > 0)
1372     BFT_MALLOC(b_vals, block_size, double);
1373 
1374   cs_part_to_block_copy_array(d, CS_DOUBLE, 1, m_vals, b_vals);
1375 
1376   BFT_FREE(m_vals);
1377 
1378   /* Write value blocks */
1379 
1380   cs_file_write_block_buffer(f,
1381                              b_vals,
1382                              sizeof(double),
1383                              1,
1384                              bi.gnum_range[0],
1385                              bi.gnum_range[1]);
1386 
1387   BFT_FREE(b_vals);
1388 
1389   /* Free matrix coefficient distribution structures */
1390 
1391   cs_part_to_block_destroy(&d);
1392   io_num = fvm_io_num_destroy(io_num);
1393 }
1394 
1395 #endif /* #if defined(HAVE_MPI) */
1396 
1397 /*----------------------------------------------------------------------------
1398  * Write matrix data to file in serial mode.
1399  *
1400  * Arrays written are freed by this function.
1401  *
1402  * The full matrix is written, whether symmetric or not. Row and column
1403  * coordinats are 1-to-n based.
1404  *
1405  * File format:
1406  *   - number of matrix entries (1 integer of type cs_gnum_t).
1407  *   - array of row coordinates (n_entries integers of type cs_gnum_t)
1408  *   - array of column coordinates (n_entries integers of type cs_gnum_t)
1409  *   - array of matrix coefficients (n_entries values of type double)
1410  *
1411  * parameters:
1412  *   m <-- Pointer to matrix structure
1413  *   f <-> associated file pointer
1414  *----------------------------------------------------------------------------*/
1415 
1416 static void
_write_matrix_l(const cs_matrix_t * m,cs_file_t * f)1417 _write_matrix_l(const cs_matrix_t  *m,
1418                 cs_file_t          *f)
1419 {
1420   cs_lnum_t  ii;
1421   cs_gnum_t  n_glob_ents = 0;
1422 
1423   cs_gnum_t  *c_coords = NULL, *r_coords = NULL;
1424 
1425   cs_lnum_t  n_entries = 0;
1426   cs_gnum_t  *m_coords = NULL;
1427   double  *m_vals = NULL;
1428 
1429   /* Initialization */
1430 
1431   _prepare_matrix_dump_data(m, &n_entries, &m_coords, &m_vals);
1432 
1433   n_glob_ents = n_entries;
1434 
1435   /* Write number of entries */
1436 
1437   cs_file_write_global(f, &n_glob_ents, sizeof(cs_gnum_t), 1);
1438 
1439   /* De-interlace coordinates */
1440 
1441   BFT_MALLOC(r_coords, n_entries, cs_gnum_t);
1442   BFT_MALLOC(c_coords, n_entries, cs_gnum_t);
1443   for (ii = 0; ii < n_entries; ii++) {
1444     r_coords[ii] = m_coords[ii*2];
1445     c_coords[ii] = m_coords[ii*2+1];
1446   }
1447   BFT_FREE(m_coords);
1448 
1449   /* Write coordinate blocks */
1450 
1451   cs_file_write_global(f, r_coords, sizeof(cs_gnum_t), n_entries);
1452   cs_file_write_global(f, c_coords, sizeof(cs_gnum_t), n_entries);
1453 
1454   BFT_FREE(r_coords);
1455   BFT_FREE(c_coords);
1456 
1457   /* Write value blocks */
1458 
1459   cs_file_write_global(f, m_vals, sizeof(double), n_entries);
1460 
1461   BFT_FREE(m_vals);
1462 }
1463 
1464 #if defined(HAVE_MPI)
1465 
1466 /*----------------------------------------------------------------------------
1467  * Write vector data to file in parallel mode.
1468  *
1469  * File format:
1470  *   - vector size matrix entries (1 integer of type cs_gnum_t).
1471  *   - array of vector values (n_elts values of type double)
1472  *
1473  * parameters:
1474  *   n_elts <-- local number of matrix entries
1475  *   vals   <-- vector element values
1476  *   f      <-> associated file pointer
1477  *----------------------------------------------------------------------------*/
1478 
1479 static void
_write_vector_g(cs_lnum_t n_elts,const cs_real_t * vals,cs_file_t * f)1480 _write_vector_g(cs_lnum_t         n_elts,
1481                 const cs_real_t  *vals,
1482                 cs_file_t        *f)
1483 {
1484   cs_lnum_t  ii;
1485 
1486   int        block_rank_step = 1, min_block_size = 0;
1487   cs_lnum_t  block_size = 0;
1488   cs_gnum_t  coo_shift = 1;
1489   cs_gnum_t  local_max = 0, n_glob_ents = 0;
1490 
1491   cs_gnum_t  *g_elt_num = NULL;
1492   double  *b_vals = NULL;
1493 
1494   cs_block_dist_info_t bi;
1495 
1496   cs_part_to_block_t *d = NULL;
1497 
1498   cs_gnum_t loc_shift = n_elts;
1499   MPI_Scan(&loc_shift, &coo_shift, 1, CS_MPI_GNUM, MPI_SUM, cs_glob_mpi_comm);
1500   coo_shift = coo_shift - loc_shift;
1501 
1502   /* Get maximum global number value */
1503 
1504   if (n_elts > 0)
1505     local_max = n_elts + coo_shift;
1506 
1507   MPI_Allreduce(&local_max, &n_glob_ents, 1, CS_MPI_GNUM, MPI_MAX,
1508                 cs_glob_mpi_comm);
1509 
1510   BFT_MALLOC(g_elt_num, n_elts, cs_gnum_t);
1511 
1512   for (ii = 0; ii < n_elts; ii++)
1513     g_elt_num[ii] = ii + coo_shift + 1;
1514 
1515   cs_file_get_default_comm(&block_rank_step, NULL, NULL);
1516 
1517   /* Redistribution structures */
1518 
1519   bi = cs_block_dist_compute_sizes(cs_glob_rank_id,
1520                                    cs_glob_n_ranks,
1521                                    block_rank_step,
1522                                    min_block_size/2,
1523                                    n_glob_ents);
1524 
1525   d = cs_part_to_block_create_by_gnum(cs_glob_mpi_comm,
1526                                       bi,
1527                                       n_elts,
1528                                       g_elt_num);
1529 
1530   /* Write number of entries */
1531 
1532   cs_file_write_global(f, &n_glob_ents, sizeof(cs_gnum_t), 1);
1533 
1534   /* Distribute to blocks */
1535 
1536   block_size = (bi.gnum_range[1] - bi.gnum_range[0]);
1537 
1538   if (block_size > 0)
1539     BFT_MALLOC(b_vals, block_size, double);
1540 
1541   if (sizeof(cs_real_t) != sizeof(double)) {
1542     double  *p_vals = NULL;
1543     BFT_MALLOC(p_vals, n_elts, double);
1544     for (ii = 0; ii < n_elts; ii++)
1545       p_vals[ii] = vals[ii];
1546     cs_part_to_block_copy_array(d, CS_DOUBLE, 1, p_vals, b_vals);
1547     BFT_FREE(p_vals);
1548   }
1549   else
1550     cs_part_to_block_copy_array(d, CS_DOUBLE, 1, vals, b_vals);
1551 
1552   /* Write value blocks */
1553 
1554   cs_file_write_block_buffer(f,
1555                              b_vals,
1556                              sizeof(double),
1557                              1,
1558                              bi.gnum_range[0],
1559                              bi.gnum_range[1]);
1560 
1561   BFT_FREE(b_vals);
1562 
1563   /* Free distribution structures */
1564 
1565   cs_part_to_block_destroy(&d);
1566 
1567   BFT_FREE(g_elt_num);
1568 }
1569 
1570 #endif /* #if defined(HAVE_MPI) */
1571 
1572 /*----------------------------------------------------------------------------
1573  * Write vector data to file in serial mode.
1574  *
1575  * File format:
1576  *   - vector size matrix entries (1 integer of type cs_gnum_t).
1577  *   - array of vector values (n_elts values of type double)
1578  *
1579  * parameters:
1580  *   n_elts <-- local number of matrix entries
1581  *   vals   <-- vector element values
1582  *   f      <-> associated file pointer
1583  *----------------------------------------------------------------------------*/
1584 
1585 static void
_write_vector_l(cs_lnum_t n_elts,const cs_real_t * vals,cs_file_t * f)1586 _write_vector_l(cs_lnum_t         n_elts,
1587                 const cs_real_t  *vals,
1588                 cs_file_t        *f)
1589 {
1590   cs_gnum_t  n_glob_ents = n_elts;
1591 
1592   /* Write number of entries */
1593 
1594   cs_file_write_global(f, &n_glob_ents, sizeof(cs_gnum_t), 1);
1595 
1596   /* Write values */
1597 
1598   if (sizeof(cs_real_t) != sizeof(double)) {
1599     cs_lnum_t ii;
1600     double  *p_vals = NULL;
1601     BFT_MALLOC(p_vals, n_elts, double);
1602     for (ii = 0; ii < n_elts; ii++)
1603       p_vals[ii] = vals[ii];
1604     cs_file_write_global(f, p_vals, sizeof(double), n_elts);
1605     BFT_FREE(p_vals);
1606   }
1607   else
1608     cs_file_write_global(f, vals, sizeof(double), n_elts);
1609 }
1610 
1611 /*----------------------------------------------------------------------------
1612  * Compute the Frobenius norm of a matrix.
1613  *
1614  * parameters:
1615  *   m <-- pointer to matrix structure
1616  *
1617  * returns:
1618  *    matrix Frobenius norm (or -1 if not computable)
1619  *----------------------------------------------------------------------------*/
1620 
1621 static double
_frobenius_norm(const cs_matrix_t * m)1622 _frobenius_norm(const cs_matrix_t  *m)
1623 {
1624   double retval = -1.;
1625 
1626   if (m == NULL)
1627     return retval;
1628 
1629   cs_matrix_fill_type_t ft = m->fill_type;
1630 
1631   switch(m->type) {
1632 
1633   case CS_MATRIX_NATIVE:
1634     {
1635       if (   (m->eb_size[0]*m->eb_size[0] == m->eb_size[3])
1636           && (m->db_size[0]*m->db_size[0] == m->db_size[3])) {
1637 
1638         cs_lnum_t  d_stride = m->db_size[3];
1639         cs_lnum_t  e_stride = m->eb_size[3];
1640         const cs_matrix_struct_native_t  *ms = m->structure;
1641         const cs_matrix_coeff_native_t  *mc = m->coeffs;
1642         double e_mult = (m->eb_size[3] == 1) ? m->db_size[0] : 1;
1643         if (mc->symmetric)
1644           e_mult *= 2;
1645         else
1646           e_stride *= 2;
1647 
1648         retval = cs_dot_xx(d_stride*m->n_rows, mc->da);
1649 
1650         double ed_contrib = 0.;
1651         const cs_real_t  *restrict xa = mc->xa;
1652 #       pragma omp parallel reduction(+:ed_contrib) if (ms->n_edges > CS_THR_MIN)
1653         {
1654           double c = 0; /* Use Kahan compensated summation for
1655                            sum of block contributions (but not for local
1656                            block sums, for simplicity and performance) */
1657 #         pragma omp for
1658           for (cs_lnum_t edge_id = 0; edge_id < ms->n_edges; edge_id++) {
1659             cs_lnum_t ii = ms->edges[edge_id][0];
1660             if (ii < ms->n_rows) {
1661               double bsum = 0;
1662               for (cs_lnum_t kk = 0; kk < e_stride; kk++) {
1663                 bsum  += (  xa[edge_id*e_stride + kk]
1664                           * xa[edge_id*e_stride + kk]);
1665               }
1666               double z = bsum - c;
1667               double t = ed_contrib + z;
1668               c = (t - ed_contrib) - z;
1669               ed_contrib = t;
1670             }
1671           }
1672         }
1673         retval += ed_contrib*e_mult;
1674 
1675         cs_parall_sum(1, CS_DOUBLE, &retval);
1676       }
1677     }
1678     break;
1679 
1680   case CS_MATRIX_CSR:
1681     assert(   ft == CS_MATRIX_SCALAR
1682            || ft == CS_MATRIX_SCALAR_SYM
1683            || ft == CS_MATRIX_BLOCK);
1684     if (m->eb_size[0]*m->eb_size[0] == m->eb_size[3]) {
1685       cs_lnum_t  stride = m->eb_size[3];
1686       const cs_matrix_struct_csr_t  *ms = m->structure;
1687       const cs_matrix_coeff_csr_t  *mc = m->coeffs;
1688       cs_lnum_t n_vals = ms->row_index[m->n_rows];
1689       retval = cs_dot_xx(stride*n_vals, mc->val);
1690       cs_parall_sum(1, CS_DOUBLE, &retval);
1691     }
1692     break;
1693 
1694   case CS_MATRIX_MSR:
1695     if (   (m->eb_size[0]*m->eb_size[0] == m->eb_size[3])
1696         && (m->db_size[0]*m->db_size[0] == m->db_size[3])) {
1697       cs_lnum_t  d_stride = m->db_size[3];
1698       cs_lnum_t  e_stride = m->eb_size[3];
1699       const cs_matrix_struct_csr_t  *ms = m->structure;
1700       const cs_matrix_coeff_msr_t  *mc = m->coeffs;
1701       cs_lnum_t n_vals = ms->row_index[m->n_rows];
1702       double d_mult = (m->eb_size[3] == 1) ? m->db_size[0] : 1;
1703       retval = cs_dot_xx(d_stride*m->n_rows, mc->d_val);
1704       retval += d_mult * cs_dot_xx(e_stride*n_vals, mc->x_val);
1705       cs_parall_sum(1, CS_DOUBLE, &retval);
1706     }
1707     break;
1708 
1709     default:
1710       retval = -1;
1711   }
1712 
1713   if (retval > 0)
1714     retval = sqrt(retval);
1715 
1716   return retval;
1717 }
1718 
1719 /*! (DOXYGEN_SHOULD_SKIP_THIS) \endcond */
1720 
1721 /*============================================================================
1722  * Public function definitions
1723  *============================================================================*/
1724 
1725 /*----------------------------------------------------------------------------
1726  * Compute diagonal dominance metric.
1727  *
1728  * parameters:
1729  *   matrix <-- pointer to matrix structure
1730  *   dd     --> diagonal dominance (normalized)
1731  *----------------------------------------------------------------------------*/
1732 
1733 void
cs_matrix_diag_dominance(const cs_matrix_t * matrix,cs_real_t dd[])1734 cs_matrix_diag_dominance(const cs_matrix_t  *matrix,
1735                          cs_real_t           dd[])
1736 {
1737   const cs_halo_t *halo = matrix->halo;
1738 
1739   switch(matrix->type) {
1740   case CS_MATRIX_NATIVE:
1741     if (matrix->db_size[3] == 1)
1742       _diag_dom_native(matrix, dd);
1743     else if (matrix->eb_size[3] == 1)
1744       _b_diag_dom_native(matrix, dd);
1745     else
1746       _bb_diag_dom_native(matrix, dd);
1747     break;
1748   case CS_MATRIX_CSR:
1749     assert(matrix->db_size[3] == 1);
1750     _diag_dom_csr(matrix, dd);
1751     break;
1752   case CS_MATRIX_MSR:
1753     if (matrix->db_size[3] == 1)
1754       _diag_dom_msr(matrix, dd);
1755     else
1756       _b_diag_dom_msr(matrix, dd);
1757     break;
1758     break;
1759   default:
1760     bft_error(__FILE__, __LINE__, 0,
1761               _("Extraction of diagonal dominance of matrixes in %s format\n"
1762                 "is not operational yet."),
1763               _(cs_matrix_get_type_name(matrix)));
1764     break;
1765   }
1766 
1767   /* Sync ghost rows as a precaution */
1768 
1769   if (halo != NULL) {
1770     if (matrix->db_size[3] == 1)
1771       cs_halo_sync_var(halo, CS_HALO_STANDARD, dd);
1772     else {
1773       cs_halo_sync_var_strided(halo, CS_HALO_STANDARD, dd, matrix->db_size[1]);
1774       if (halo->n_transforms > 0 && matrix->db_size[0] == 3)
1775         cs_halo_perio_sync_var_vect(halo,
1776                                     CS_HALO_STANDARD,
1777                                     dd,
1778                                     matrix->db_size[1]);
1779     }
1780   }
1781 }
1782 
1783 /*----------------------------------------------------------------------------
1784  * Dump a linear system matrix and right-hand side to file.
1785  *
1786  * parameters:
1787  *   matrix <-- pointer to matrix structure
1788  *   rhs    <-- right hand side vector
1789  *   name   <-- identifier string used in file name
1790  *----------------------------------------------------------------------------*/
1791 
1792 void
cs_matrix_dump_linear_system(const cs_matrix_t * matrix,const cs_real_t rhs[],const char * name)1793 cs_matrix_dump_linear_system(const cs_matrix_t  *matrix,
1794                              const cs_real_t     rhs[],
1795                              const char         *name)
1796 {
1797   char filename[64];
1798   cs_gnum_t n_g_rows = matrix->n_rows;
1799 
1800   cs_file_t  *f = NULL;
1801 
1802 #if defined(HAVE_MPI)
1803   if (cs_glob_n_ranks > 1) {
1804     cs_gnum_t n_l_rows = matrix->n_rows;
1805     MPI_Allreduce(&n_l_rows, &n_g_rows, 1, CS_MPI_GNUM, MPI_SUM,
1806                   cs_glob_mpi_comm);
1807   }
1808 #endif
1809 
1810   snprintf(filename, 63, "%s_%010llu", name, (unsigned long long)n_g_rows);
1811   filename[63] = '\0';
1812 
1813   f = cs_file_open_default(filename, CS_FILE_MODE_WRITE);
1814 
1815   _write_header_simple(f);
1816 
1817 #if defined(HAVE_MPI)
1818   if (cs_glob_n_ranks > 1) {
1819     _write_matrix_g(matrix, f);
1820     _write_vector_g(matrix->n_rows, rhs, f);
1821   }
1822 #endif
1823   if (cs_glob_n_ranks == 1) {
1824     _write_matrix_l(matrix, f);
1825     _write_vector_l(matrix->n_rows, rhs, f);
1826   }
1827 
1828   f = cs_file_free(f);
1829 }
1830 
1831 /*----------------------------------------------------------------------------*/
1832 /*!
1833  * \brief Log general info relative to matrix.
1834  *
1835  * \param[in]  matrix     pointer to matrix structure
1836  * \param[in]  verbosity  verbosity level
1837  */
1838 /*----------------------------------------------------------------------------*/
1839 
1840 void
cs_matrix_log_info(const cs_matrix_t * matrix,int verbosity)1841 cs_matrix_log_info(const cs_matrix_t  *matrix,
1842                    int                 verbosity)
1843 {
1844   cs_log_t l = CS_LOG_DEFAULT;
1845 
1846   if (matrix == NULL)
1847     bft_error(__FILE__, __LINE__, 0,
1848               _("The matrix is not defined."));
1849 
1850   cs_log_printf(l,
1851                 _("\n"
1852                   " Matrix info:\n"
1853                   "   type: %s\n"),
1854                 cs_matrix_get_type_fullname(matrix));
1855 
1856   if (matrix->fill_type == CS_MATRIX_N_FILL_TYPES)
1857     return;
1858 
1859   cs_log_printf(l,
1860                 _("   fill type: %s\n"),
1861                 cs_matrix_fill_type_name[matrix->fill_type]);
1862 
1863   if (verbosity > 1) {
1864     double fnorm = _frobenius_norm(matrix);
1865     if (fnorm > -1)
1866       cs_log_printf(l,
1867                     _("   Frobenius norm: %11.4e\n"), fnorm);
1868   }
1869 
1870   cs_log_printf(l, "\n");
1871 }
1872 
1873 /*----------------------------------------------------------------------------
1874  * Test matrix dump operations.
1875  *
1876  * parameters:
1877  *   n_rows     <-- number of local rows
1878  *   n_cols_ext <-- number of columns including ghost columns (array size)
1879  *   n_edges    <-- local number of graph edges
1880  *   edges      <-- graph edges connectivity
1881  *   halo       <-- cell halo structure
1882  *   numbering  <-- vectorization or thread-related numbering info, or NULL
1883  *----------------------------------------------------------------------------*/
1884 
1885 void
cs_matrix_dump_test(cs_lnum_t n_rows,cs_lnum_t n_cols_ext,cs_lnum_t n_edges,const cs_lnum_2_t * edges,const cs_halo_t * halo,const cs_numbering_t * numbering)1886 cs_matrix_dump_test(cs_lnum_t              n_rows,
1887                     cs_lnum_t              n_cols_ext,
1888                     cs_lnum_t              n_edges,
1889                     const cs_lnum_2_t     *edges,
1890                     const cs_halo_t       *halo,
1891                     const cs_numbering_t  *numbering)
1892 {
1893   cs_lnum_t  ii;
1894   int  test_id;
1895 
1896   cs_real_t  *da = NULL, *xa = NULL, *rhs = NULL;
1897   cs_lnum_t  diag_block_size[4] = {3, 3, 3, 9};
1898   cs_lnum_t  extra_diag_block_size[4] = {1, 1, 1, 1};
1899 
1900   const int n_tests = 7;
1901   const char *name[] = {"matrix_native",
1902                         "matrix_native_sym",
1903                         "matrix_native_block",
1904                         "matrix_csr",
1905                         "matrix_msr",
1906                         "matrix_msr_block"};
1907   const cs_matrix_type_t type[] = {CS_MATRIX_NATIVE,
1908                                    CS_MATRIX_NATIVE,
1909                                    CS_MATRIX_NATIVE,
1910                                    CS_MATRIX_CSR,
1911                                    CS_MATRIX_MSR,
1912                                    CS_MATRIX_MSR};
1913   const bool sym_flag[] = {false, true, false, false, false, false};
1914   const int block_flag[] = {0, 0, 1, 0, 0, 1};
1915 
1916   /* Allocate and initialize  working arrays */
1917   /*-----------------------------------------*/
1918 
1919   BFT_MALLOC(rhs, n_cols_ext*diag_block_size[1], cs_real_t);
1920 
1921   BFT_MALLOC(da, n_cols_ext*diag_block_size[3], cs_real_t);
1922   BFT_MALLOC(xa, n_edges*2, cs_real_t);
1923 
1924 # pragma omp parallel for
1925   for (ii = 0; ii < n_cols_ext*diag_block_size[3]; ii++)
1926     da[ii] = 1.0 + ii*0.1/n_cols_ext;
1927 # pragma omp parallel for
1928   for (ii = 0; ii < n_cols_ext*diag_block_size[1]; ii++)
1929     rhs[ii] = ii*0.1/n_cols_ext;
1930 
1931 # pragma omp parallel for
1932   for (ii = 0; ii < n_edges; ii++) {
1933     xa[ii*2] = 0.5*(1.0 + ii*1.0/n_edges);
1934     xa[ii*2 + 1] = -0.5*(1.0 + ii*1.0/n_edges);
1935   }
1936 
1937   /* Loop on variant types */
1938   /*-----------------------*/
1939 
1940   for (test_id = 0; test_id < n_tests; test_id++) {
1941 
1942     cs_lnum_t *_diag_block_size = (block_flag[test_id]) ? diag_block_size : NULL;
1943     cs_lnum_t *_extra_diag_block_size = (block_flag[test_id]-1) ?
1944       extra_diag_block_size : NULL;
1945 
1946     cs_matrix_structure_t
1947       *ms = cs_matrix_structure_create(type[test_id],
1948                                        true,
1949                                        n_rows,
1950                                        n_cols_ext,
1951                                        n_edges,
1952                                        edges,
1953                                        halo,
1954                                        numbering);
1955     cs_matrix_t *m = cs_matrix_create(ms);
1956 
1957     cs_matrix_set_coefficients(m,
1958                                sym_flag[test_id],
1959                                _diag_block_size,
1960                                _extra_diag_block_size,
1961                                n_edges,
1962                                edges,
1963                                da,
1964                                xa);
1965 
1966     cs_matrix_dump_linear_system(m, rhs, name[test_id]);
1967 
1968     cs_matrix_release_coefficients(m);
1969 
1970     cs_matrix_destroy(&m);
1971     cs_matrix_structure_destroy(&ms);
1972   }
1973 
1974   BFT_FREE(rhs);
1975 
1976   BFT_FREE(da);
1977   BFT_FREE(xa);
1978 }
1979 
1980 /*----------------------------------------------------------------------------*/
1981 
1982 END_C_DECLS
1983