1 /*============================================================================
2  * Set of operations to handle Small Dense Matrices (SDM)
3  *============================================================================*/
4 
5 /*
6   This file is part of Code_Saturne, a general-purpose CFD tool.
7 
8   Copyright (C) 1998-2021 EDF S.A.
9 
10   This program is free software; you can redistribute it and/or modify it under
11   the terms of the GNU General Public License as published by the Free Software
12   Foundation; either version 2 of the License, or (at your option) any later
13   version.
14 
15   This program is distributed in the hope that it will be useful, but WITHOUT
16   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
18   details.
19 
20   You should have received a copy of the GNU General Public License along with
21   this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22   Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 */
24 
25 /*----------------------------------------------------------------------------*/
26 
27 /*----------------------------------------------------------------------------
28  * Standard C library headers
29  *----------------------------------------------------------------------------*/
30 
31 #include <stdlib.h>
32 #include <assert.h>
33 #include <float.h>
34 #include <limits.h>
35 
36 /*----------------------------------------------------------------------------
37  *  Local headers
38  *----------------------------------------------------------------------------*/
39 
40 #include <bft_mem.h>
41 
42 #include "cs_blas.h"
43 #include "cs_log.h"
44 #include "cs_math.h"
45 #include "cs_parall.h"
46 #include "cs_sort.h"
47 
48 /*----------------------------------------------------------------------------
49  *  Header for the current file
50  *----------------------------------------------------------------------------*/
51 
52 #include "cs_sdm.h"
53 
54 /*----------------------------------------------------------------------------*/
55 
56 BEGIN_C_DECLS
57 
58 /*============================================================================
59  * Static definitions
60  *============================================================================*/
61 
62 static const char  _msg_small_p[] =
63   " %s: Very small or null pivot.\n Stop inversion.";
64 
65 /*============================================================================
66  * Private function prototypes
67  *============================================================================*/
68 
69 /*----------------------------------------------------------------------------*/
70 /*!
71  * \brief   Generic way to allocate a cs_sdm_t structure
72  *
73  * \param[in]  flag         metadata related to a cs_sdm_t structure
74  * \param[in]  n_max_rows   max number of rows
75  * \param[in]  n_max_cols   max number of columns
76  * \param[in]  n_max_rows   max number of rows
77  * \param[in]  n_max_cols   max number of columns
78  *
79  *
80  * \return  a new allocated cs_sdm_t structure
81  */
82 /*----------------------------------------------------------------------------*/
83 
84 static cs_sdm_t *
_create_sdm(cs_flag_t flag,int n_max_rows,int n_max_cols)85 _create_sdm(cs_flag_t   flag,
86             int         n_max_rows,
87             int         n_max_cols)
88 {
89   cs_sdm_t  *mat = NULL;
90 
91   BFT_MALLOC(mat, 1, cs_sdm_t);
92 
93   mat->flag = flag;
94   mat->n_max_rows = n_max_rows;
95   mat->n_max_cols = n_max_cols;
96   mat->n_rows = n_max_rows;
97   mat->n_cols = n_max_cols;
98 
99   BFT_MALLOC(mat->val, mat->n_max_rows*mat->n_max_cols, cs_real_t);
100   memset(mat->val, 0, sizeof(cs_real_t)*mat->n_max_rows*mat->n_max_cols);
101 
102   if (flag & CS_SDM_BY_BLOCK) {
103 
104     cs_sdm_block_t  *bd = NULL;
105 
106     BFT_MALLOC(bd, 1, cs_sdm_block_t);
107     bd->blocks = NULL;
108     bd->n_max_blocks_by_row = bd->n_max_blocks_by_col = 0;
109     bd->n_row_blocks = bd->n_col_blocks = 0;
110     mat->block_desc = bd;
111 
112   }
113   else
114     mat->block_desc = NULL;
115 
116   return mat;
117 }
118 
119 /*============================================================================
120  * Public function prototypes
121  *============================================================================*/
122 
123 /*----------------------------------------------------------------------------*/
124 /*!
125  * \brief   Allocate and initialize a cs_sdm_t structure
126  *          Most generic function to create a cs_sdm_t structure without block
127  *          description
128  *
129  * \param[in]  flag         metadata related to a cs_sdm_t structure
130  * \param[in]  n_max_rows   max number of rows
131  * \param[in]  n_max_cols   max number of columns
132  *
133  * \return  a new allocated cs_sdm_t structure
134  */
135 /*----------------------------------------------------------------------------*/
136 
137 cs_sdm_t *
cs_sdm_create(cs_flag_t flag,int n_max_rows,int n_max_cols)138 cs_sdm_create(cs_flag_t   flag,
139               int         n_max_rows,
140               int         n_max_cols)
141 {
142   return _create_sdm(flag, n_max_rows, n_max_cols);
143 }
144 
145 /*----------------------------------------------------------------------------*/
146 /*!
147  * \brief   Allocate and initialize a cs_sdm_t structure
148  *          Case of a square matrix
149  *
150  * \param[in]  n_max_rows   max number of rows
151  *
152  * \return  a new allocated cs_sdm_t structure
153  */
154 /*----------------------------------------------------------------------------*/
155 
156 cs_sdm_t *
cs_sdm_square_create(int n_max_rows)157 cs_sdm_square_create(int   n_max_rows)
158 {
159   return _create_sdm(0, n_max_rows, n_max_rows);
160 }
161 
162 /*----------------------------------------------------------------------------*/
163 /*!
164  * \brief   Allocate a cs_sdm_t structure and initialized it with the
165  *          copy of the matrix m in input
166  *
167  * \param[in]     m     pointer to a cs_sdm_t structure to copy
168  *
169  * \return  a pointer to a new allocated cs_sdm_t structure
170  */
171 /*----------------------------------------------------------------------------*/
172 
173 cs_sdm_t *
cs_sdm_create_copy(const cs_sdm_t * m)174 cs_sdm_create_copy(const cs_sdm_t   *m)
175 {
176   cs_sdm_t  *c = _create_sdm(m->flag, m->n_max_rows, m->n_max_cols);
177 
178   c->n_rows = m->n_rows;
179   c->n_cols = m->n_cols;
180   memcpy(c->val, m->val, sizeof(cs_real_t)*m->n_rows*m->n_cols);
181 
182   return c;
183 }
184 
185 /*----------------------------------------------------------------------------*/
186 /*!
187  * \brief   Define a new matrix which is its transpose.
188  *
189  * \param[in] mat   local matrix to transpose
190  *
191  * \return a pointer to the new allocated transposed matrix
192  */
193 /*----------------------------------------------------------------------------*/
194 
195 cs_sdm_t *
cs_sdm_create_transpose(cs_sdm_t * mat)196 cs_sdm_create_transpose(cs_sdm_t  *mat)
197 {
198   /* Sanity check */
199   assert(mat != NULL);
200 
201   cs_sdm_t  *tr = _create_sdm(mat->flag, mat->n_max_cols, mat->n_max_rows);
202 
203   tr->n_rows = mat->n_cols;
204   tr->n_cols = mat->n_rows;
205 
206   for (short int i = 0; i < mat->n_rows; i++) {
207 
208     const cs_real_t  *mval_i = mat->val + i*mat->n_cols;
209     for (short int j = 0; j < mat->n_cols; j++)
210       tr->val[j*tr->n_cols+i] = mval_i[j];
211 
212   }
213 
214   return tr;
215 }
216 
217 /*----------------------------------------------------------------------------*/
218 /*!
219  * \brief   Allocate and initialize a cs_sdm_t structure
220  *
221  * \param[in]  n_max_blocks_by_row    max number of blocks in a row
222  * \param[in]  n_max_blocks_by_col    max number of blocks in a column
223  * \param[in]  max_row_block_sizes    max number of rows by block in a column
224  * \param[in]  max_col_block_sizes    max number of columns by block in a row
225  *
226  * \return  a new allocated cs_sdm_t structure
227  */
228 /*----------------------------------------------------------------------------*/
229 
230 cs_sdm_t *
cs_sdm_block_create(int n_max_blocks_by_row,int n_max_blocks_by_col,const int max_row_block_sizes[],const int max_col_block_sizes[])231 cs_sdm_block_create(int          n_max_blocks_by_row,
232                     int          n_max_blocks_by_col,
233                     const int    max_row_block_sizes[],
234                     const int    max_col_block_sizes[])
235 {
236   cs_sdm_t  *m = NULL;
237 
238   if (n_max_blocks_by_row < 1 || n_max_blocks_by_col < 1)
239     return m;
240 
241   int  row_size = 0, col_size = 0;
242   for (int i = 0; i < n_max_blocks_by_row; i++)
243     row_size += max_row_block_sizes[i];
244   for (int j = 0; j < n_max_blocks_by_col; j++)
245     col_size += max_col_block_sizes[j];
246 
247   m = _create_sdm(CS_SDM_BY_BLOCK, row_size, col_size);
248 
249   /* Define the block description */
250   m->block_desc->n_max_blocks_by_row = n_max_blocks_by_row;
251   m->block_desc->n_max_blocks_by_col = n_max_blocks_by_col;
252   m->block_desc->n_row_blocks = n_max_blocks_by_row;
253   m->block_desc->n_col_blocks = n_max_blocks_by_col;
254   BFT_MALLOC(m->block_desc->blocks,
255              n_max_blocks_by_row*n_max_blocks_by_col, cs_sdm_t);
256 
257   cs_real_t  *p_val = m->val;
258   int  shift = 0;
259   for (int i = 0; i < n_max_blocks_by_row; i++) {
260     const short int  n_rows_i = max_row_block_sizes[i];
261     for (int j = 0; j < n_max_blocks_by_col; j++) {
262       const short int  n_cols_j = max_col_block_sizes[j];
263 
264       /* Set the block (i,j) */
265       cs_sdm_t  *b_ij = m->block_desc->blocks + shift;
266       int  _size = n_rows_i*n_cols_j;
267 
268       cs_sdm_map_array(n_rows_i, n_cols_j, b_ij, p_val);
269       shift++;
270       p_val += _size;
271 
272     }
273   }
274 
275   return m;
276 }
277 
278 /*----------------------------------------------------------------------------*/
279 /*!
280  * \brief   Allocate and initialize a cs_sdm_t structure by block when the
281  *          block size is constant and equal to 3
282  *
283  * \param[in]  n_max_blocks_by_row    max number of blocks in a row
284  * \param[in]  n_max_blocks_by_col    max number of blocks in a column
285  *
286  * \return  a new allocated cs_sdm_t structure
287  */
288 /*----------------------------------------------------------------------------*/
289 
290 cs_sdm_t *
cs_sdm_block33_create(int n_max_blocks_by_row,int n_max_blocks_by_col)291 cs_sdm_block33_create(int      n_max_blocks_by_row,
292                       int      n_max_blocks_by_col)
293 {
294   cs_sdm_t  *m = NULL;
295 
296   if (n_max_blocks_by_row < 1 || n_max_blocks_by_col < 1)
297     return m;
298 
299   m = _create_sdm(CS_SDM_BY_BLOCK,
300                   3*n_max_blocks_by_row,
301                   3*n_max_blocks_by_col);
302 
303   /* Define the block description */
304   m->block_desc->n_max_blocks_by_row = n_max_blocks_by_row;
305   m->block_desc->n_max_blocks_by_col = n_max_blocks_by_col;
306   m->block_desc->n_row_blocks = n_max_blocks_by_row;
307   m->block_desc->n_col_blocks = n_max_blocks_by_col;
308   BFT_MALLOC(m->block_desc->blocks,
309              n_max_blocks_by_row*n_max_blocks_by_col, cs_sdm_t);
310 
311   cs_real_t  *p_val = m->val;
312   for (int i = 0; i < n_max_blocks_by_row*n_max_blocks_by_col; i++) {
313     cs_sdm_map_array(3, 3, m->block_desc->blocks + i, p_val);
314       p_val += 9;
315   }
316 
317   return m;
318 }
319 
320 /*----------------------------------------------------------------------------*/
321 /*!
322  * \brief   Free a cs_sdm_t structure
323  *
324  * \param[in]  mat    pointer to a cs_sdm_t struct. to free
325  *
326  * \return  a NULL pointer
327  */
328 /*----------------------------------------------------------------------------*/
329 
330 cs_sdm_t *
cs_sdm_free(cs_sdm_t * mat)331 cs_sdm_free(cs_sdm_t  *mat)
332 {
333   if (mat == NULL)
334     return mat;
335 
336   if ((mat->flag & CS_SDM_SHARED_VAL) == 0)
337     BFT_FREE(mat->val);
338 
339   if (mat->flag & CS_SDM_BY_BLOCK) {
340     BFT_FREE(mat->block_desc->blocks);
341     BFT_FREE(mat->block_desc);
342   }
343 
344   BFT_FREE(mat);
345 
346   return NULL;
347 }
348 
349 /*----------------------------------------------------------------------------*/
350 /*!
351  * \brief   Initialize the pattern of cs_sdm_t structure defined by block
352  *          The matrix should have been allocated before calling this function
353  *
354  * \param[in, out] m
355  * \param[in]      n_row_blocks      number of blocks in a row
356  * \param[in]      n_col_blocks      number of blocks in a column
357  * \param[in]      row_block_sizes   number of rows by block in a column
358  * \param[in]      col_block_sizes   number of columns by block in a row
359  */
360 /*----------------------------------------------------------------------------*/
361 
362 void
cs_sdm_block_init(cs_sdm_t * m,int n_row_blocks,int n_col_blocks,const int row_block_sizes[],const int col_block_sizes[])363 cs_sdm_block_init(cs_sdm_t      *m,
364                   int            n_row_blocks,
365                   int            n_col_blocks,
366                   const int      row_block_sizes[],
367                   const int      col_block_sizes[])
368 {
369   /* Sanity checks */
370   assert(m != NULL && row_block_sizes != NULL && col_block_sizes != NULL);
371   assert(m->flag & CS_SDM_BY_BLOCK);
372   assert(m->block_desc != NULL);
373 
374   cs_sdm_block_t  *bd = m->block_desc;
375 
376   assert(n_row_blocks <= bd->n_max_blocks_by_row);
377   assert(n_col_blocks <= bd->n_max_blocks_by_col);
378   bd->n_row_blocks = n_row_blocks;
379   bd->n_col_blocks = n_col_blocks;
380   m->n_rows = 0;
381   for (int i = 0; i < n_row_blocks; i++)
382     m->n_rows += row_block_sizes[i];
383   m->n_cols = 0;
384   for (int i = 0; i < n_col_blocks; i++)
385     m->n_cols += col_block_sizes[i];
386 
387   memset(m->val, 0, m->n_rows*m->n_cols*sizeof(cs_real_t));
388 
389   cs_real_t  *p_val = m->val;
390   int  shift = 0;
391   for (int i = 0; i < bd->n_row_blocks; i++) {
392     const short int  n_rows_i = row_block_sizes[i];
393     for (int j = 0; j < bd->n_col_blocks; j++) {
394       const short int  n_cols_j = col_block_sizes[j];
395 
396       /* Set the block (i,j) */
397       cs_sdm_t  *b_ij = bd->blocks + shift;
398 
399       cs_sdm_map_array(n_rows_i, n_cols_j, b_ij, p_val);
400       p_val += n_rows_i*n_cols_j;
401       shift++;
402 
403     }
404   }
405 }
406 
407 /*----------------------------------------------------------------------------*/
408 /*!
409  * \brief   Initialize the pattern of cs_sdm_t structure defined by 3x3 block
410  *          The matrix should have been allocated before calling this function
411  *
412  * \param[in, out] m
413  * \param[in]      n_row_blocks      number of blocks in a row
414  * \param[in]      n_col_blocks      number of blocks in a column
415  */
416 /*----------------------------------------------------------------------------*/
417 
418 void
cs_sdm_block33_init(cs_sdm_t * m,int n_row_blocks,int n_col_blocks)419 cs_sdm_block33_init(cs_sdm_t     *m,
420                     int           n_row_blocks,
421                     int           n_col_blocks)
422 {
423   /* Sanity checks */
424   assert(m != NULL);
425   assert(m->flag & CS_SDM_BY_BLOCK);
426   assert(m->block_desc != NULL);
427   assert(n_row_blocks <= m->block_desc->n_max_blocks_by_row);
428   assert(n_col_blocks <= m->block_desc->n_max_blocks_by_col);
429 
430   cs_sdm_block_t  *bd = m->block_desc;
431 
432   bd->n_row_blocks = n_row_blocks;
433   bd->n_col_blocks = n_col_blocks;
434   m->n_rows = 3*n_row_blocks;
435   m->n_cols = 3*n_col_blocks;
436 
437   memset(m->val, 0, m->n_rows*m->n_cols*sizeof(cs_real_t));
438 
439   cs_real_t  *p_val = m->val;
440   for (int i = 0; i < bd->n_row_blocks*bd->n_col_blocks; i++) {
441     cs_sdm_map_array(3, 3, bd->blocks + i, p_val);
442     p_val += 9; /* Each 3x3 block has 9 entries */
443   }
444 }
445 
446 /*----------------------------------------------------------------------------*/
447 /*!
448  * \brief   Convert a matrix with each entry is a 3x3 block into a matrix
449  *          with a block for each component x,y,z.
450  *
451  * \param[in]      mb33        pointer to a matrix
452  * \param[in, out] mbxyz       pointer to a matrix to build (but allocated)
453  */
454 /*----------------------------------------------------------------------------*/
455 
456 void
cs_sdm_block_33_to_xyz(const cs_sdm_t * mb33,cs_sdm_t * mbxyz)457 cs_sdm_block_33_to_xyz(const cs_sdm_t   *mb33,
458                        cs_sdm_t         *mbxyz)
459 {
460   if (mb33 == NULL)
461     return;
462 
463   assert(mb33->flag & CS_SDM_BY_BLOCK);
464   assert(mb33->block_desc != NULL);
465 
466   const cs_sdm_block_t  *mb33_desc = mb33->block_desc;
467   const int  n_cols = mb33_desc->n_col_blocks;
468   assert(n_cols == mb33_desc->n_row_blocks);
469 
470   /* Reshape the block matrix to fit the shape requested by mb33 */
471   int block_sizes[3];
472   for (int i = 0; i < 3; i++) block_sizes[i] = n_cols;
473   cs_sdm_block_init(mbxyz, 3, 3, block_sizes, block_sizes);
474 
475   cs_sdm_t  *mxyz[3][3];
476   for (int i = 0; i < 3; i++)
477     for (int j = 0; j < 3; j++)
478       mxyz[i][j] = cs_sdm_get_block(mbxyz, i, j);
479 
480   for (int bi = 0; bi < n_cols; bi++) {
481     for (int bj = 0; bj < n_cols; bj++) {
482 
483       cs_sdm_t  *m33_ij = cs_sdm_get_block(mb33, bi, bj);
484 
485       for (int _i = 0; _i < 3; _i++)
486         for (int _j = 0; _j < 3; _j++)
487           mxyz[_i][_j]->val[bi*n_cols+bj] = m33_ij->val[3*_i+_j];
488 
489     } /* Loop on column blocks */
490   } /* Loop on row blocks */
491 
492 }
493 
494 /*----------------------------------------------------------------------------*/
495 /*!
496  * \brief   Allocate and initialize a cs_sdm_t structure w.r.t. to a given
497  *          matrix
498  *
499  * \param[in]  mref       pointer to a matrix to copy
500  *
501  * \return  a new allocated cs_sdm_t structure which is a copy of mref
502  */
503 /*----------------------------------------------------------------------------*/
504 
505 cs_sdm_t *
cs_sdm_block_create_copy(const cs_sdm_t * mref)506 cs_sdm_block_create_copy(const cs_sdm_t   *mref)
507 {
508   cs_sdm_t  *m = NULL;
509 
510   if (mref == NULL)
511     return m;
512 
513   if (mref->n_max_rows < 1 || mref->n_max_cols < 1)
514     return m;
515 
516   const cs_sdm_block_t  *bd_ref = mref->block_desc;
517 
518   int  row_size = 0, col_size = 0;
519   for (int bi = 0; bi < bd_ref->n_row_blocks; bi++) {
520     const cs_sdm_t  *bI0 = cs_sdm_get_block(mref, bi, 0);
521     row_size += bI0->n_max_rows;
522   }
523   for (int bj = 0; bj < bd_ref->n_col_blocks; bj++) {
524     const cs_sdm_t  *b0J = cs_sdm_get_block(mref, 0, bj);
525     col_size += b0J->n_max_cols;
526   }
527 
528   m = _create_sdm(CS_SDM_BY_BLOCK, row_size, col_size);
529 
530   /* Copy values */
531   memcpy(m->val, mref->val, sizeof(cs_real_t)*m->n_max_rows*m->n_max_cols);
532 
533   /* Define the block description */
534   cs_sdm_block_t  *bd = m->block_desc;
535 
536   bd->n_max_blocks_by_row = bd_ref->n_max_blocks_by_row;
537   bd->n_max_blocks_by_col = bd_ref->n_max_blocks_by_col;
538   bd->n_row_blocks = bd_ref->n_row_blocks;
539   bd->n_col_blocks = bd_ref->n_col_blocks;
540 
541   BFT_MALLOC(bd->blocks,
542              bd_ref->n_max_blocks_by_row*bd_ref->n_max_blocks_by_col,
543              cs_sdm_t);
544 
545   cs_real_t  *p_val = m->val;
546   int  shift = 0;
547   for (int bi = 0; bi < bd_ref->n_row_blocks; bi++) {
548     for (int bj = 0; bj < bd_ref->n_col_blocks; bj++) {
549 
550       const cs_sdm_t  *ref_IJ = cs_sdm_get_block(mref, bi, bj);
551 
552       /* Set the block (i,j) */
553       cs_sdm_t  *b_ij = bd->blocks + shift;
554       int  _size = ref_IJ->n_rows*ref_IJ->n_cols;
555 
556       cs_sdm_map_array(ref_IJ->n_rows, ref_IJ->n_cols, b_ij, p_val);
557       shift++;
558       p_val += _size;
559 
560     } /* Column blocks */
561   }   /* Row blocks */
562 
563   return m;
564 }
565 
566 /*----------------------------------------------------------------------------*/
567 /*!
568  * \brief   Compute a local dense matrix-product c = a*b
569  *          c has been previously allocated
570  *
571  * \param[in]      a     local dense matrix to use
572  * \param[in]      b     local dense matrix to use
573  * \param[in, out] c     result of the local matrix-product
574  *                       is updated
575  */
576 /*----------------------------------------------------------------------------*/
577 
578 void
cs_sdm_multiply(const cs_sdm_t * a,const cs_sdm_t * b,cs_sdm_t * c)579 cs_sdm_multiply(const cs_sdm_t   *a,
580                 const cs_sdm_t   *b,
581                 cs_sdm_t         *c)
582 {
583   /* Sanity checks */
584   assert(a != NULL && b != NULL && c != NULL);
585   assert(a->n_cols == b->n_rows &&
586          a->n_rows == c->n_rows &&
587          c->n_cols == b->n_cols);
588 
589   const cs_real_t *bv = b->val;
590 
591   for (short int i = 0; i < a->n_rows; i++) {
592 
593     const cs_real_t *av_i = a->val + i*a->n_cols;
594     cs_real_t  *cv_i = c->val + i*b->n_cols;
595 
596     for (short int j = 0; j < b->n_cols; j++){
597       cs_real_t p = 0.0;
598       for (short int k = 0; k < a->n_cols; k++)
599         p += av_i[k] * bv[k*b->n_cols + j];
600       cv_i[j] += p;
601 
602     } /* Loop on b columns */
603   } /* Loop on a rows */
604 
605 }
606 
607 /*----------------------------------------------------------------------------*/
608 /*!
609  * \brief    Compute a row-row matrix product of a and b. It is basically equal
610  *           to the classical a*b^T. It is a fast (matrices are row-major) way
611  *           of computing a*b if b is symmetric or if b^T is given.
612  *           Generic version: all compatible sizes
613  *
614  * \param[in]      a    local matrix to use
615  * \param[in]      b    local matrix to use
616  * \param[in, out] c    result of the local matrix-product (already allocated)
617  *                      is updated.
618  */
619 /*----------------------------------------------------------------------------*/
620 
621 void
cs_sdm_multiply_rowrow(const cs_sdm_t * a,const cs_sdm_t * b,cs_sdm_t * c)622 cs_sdm_multiply_rowrow(const cs_sdm_t   *a,
623                        const cs_sdm_t   *b,
624                        cs_sdm_t         *c)
625 {
626   /* Sanity check */
627   assert(a != NULL && b != NULL && c != NULL);
628   assert(a->n_cols == b->n_cols &&
629          a->n_rows == c->n_rows &&
630          c->n_cols == b->n_rows);
631 
632   for (short int i = 0; i < a->n_rows; i++) {
633 
634     const cs_real_t  *av_i = a->val + i*a->n_cols;
635 
636     cs_real_t  *cv_i = c->val + i*b->n_rows;
637 
638     for (short int j = 0; j < b->n_rows; j++) {
639 
640       const cs_real_t  *bv_j = b->val + j * b->n_cols;
641 
642       cs_real_t  dp = 0;
643       for (short int k = 0; k < a->n_cols; k++)
644         dp += av_i[k] * bv_j[k];
645       cv_i[j] += dp;
646 
647     } /* Loop on b rows */
648   } /* Loop on a rows */
649 
650 }
651 
652 /*----------------------------------------------------------------------------*/
653 /*!
654  * \brief    Compute a row-row matrix product of a and b. It is basically equal
655  *           to the classical a*b^T. It is a fast (matrices are row-major) way
656  *           of computing a*b if b is symmetric or if b^T is given.
657  *           Generic version: all compatible sizes
658  *           Result is known to be symmetric.
659  *
660  * \param[in]      a    local matrix to use
661  * \param[in]      b    local matrix to use
662  * \param[in, out] c    result of the local matrix-product (already allocated)
663  *                      is updated.
664  */
665 /*----------------------------------------------------------------------------*/
666 
667 void
cs_sdm_multiply_rowrow_sym(const cs_sdm_t * a,const cs_sdm_t * b,cs_sdm_t * c)668 cs_sdm_multiply_rowrow_sym(const cs_sdm_t   *a,
669                            const cs_sdm_t   *b,
670                            cs_sdm_t         *c)
671 {
672   /* Sanity check */
673   assert(a != NULL && b != NULL && c != NULL);
674   assert(a->n_cols == b->n_cols &&
675          a->n_rows == c->n_rows &&
676          c->n_cols == b->n_rows);
677 
678   for (short int i = 0; i < a->n_rows; i++) {
679 
680     const cs_real_t  *av_i = a->val + i*a->n_cols;
681 
682     cs_real_t  *cv_i = c->val + i*b->n_rows;
683 
684     for (short int j = i; j < b->n_rows; j++) {
685 
686       const cs_real_t  *bv_j = b->val + j * b->n_cols;
687 
688       cs_real_t  dp = 0;
689       for (short int k = 0; k < a->n_cols; k++)
690         dp += av_i[k] * bv_j[k];
691       cv_i[j] += dp;
692 
693       if (j > i)
694         (c->val + j*b->n_rows)[i] += dp;
695 
696     } /* Loop on b rows */
697   } /* Loop on a rows */
698 
699 }
700 
701 /*----------------------------------------------------------------------------*/
702 /*!
703  * \brief    Compute a row-row matrix product of a and b. It is basically equal
704  *           to the classical a*b^T. It is a fast (matrices are row-major) way
705  *           of computing a*b if b is symmetric or if b^T is given.
706  *           Case of matrices defined by block.
707  *
708  * \param[in]      a    local matrix to use
709  * \param[in]      b    local matrix to use
710  * \param[in, out] c    result of the local matrix-product (already allocated)
711  *                      is updated
712  */
713 /*----------------------------------------------------------------------------*/
714 
715 void
cs_sdm_block_multiply_rowrow(const cs_sdm_t * a,const cs_sdm_t * b,cs_sdm_t * c)716 cs_sdm_block_multiply_rowrow(const cs_sdm_t   *a,
717                              const cs_sdm_t   *b,
718                              cs_sdm_t         *c)
719 {
720   /* Sanity check */
721   assert(a != NULL && b != NULL && c != NULL);
722   assert(a->flag & CS_SDM_BY_BLOCK);
723   assert(b->flag & CS_SDM_BY_BLOCK);
724   assert(c->flag & CS_SDM_BY_BLOCK);
725   assert(a->n_cols == b->n_cols &&
726          a->n_rows == c->n_rows &&
727          c->n_cols == b->n_rows);
728 
729   const cs_sdm_block_t  *a_desc = a->block_desc;
730   const cs_sdm_block_t  *b_desc = b->block_desc;
731   const cs_sdm_block_t  *c_desc = c->block_desc;
732 
733   CS_UNUSED(c_desc);            /* Only in debug mode */
734   assert(a_desc->n_col_blocks == b_desc->n_col_blocks &&
735          a_desc->n_row_blocks == c_desc->n_row_blocks &&
736          c_desc->n_col_blocks == b_desc->n_row_blocks);
737 
738   for (short int i = 0; i < a_desc->n_row_blocks; i++) {
739     for (short int j = 0; j < b_desc->n_row_blocks; j++) {
740 
741       cs_sdm_t  *cIJ = cs_sdm_get_block(c, i, j);
742 
743       for (short int k = 0; k < a_desc->n_col_blocks; k++) {
744 
745         cs_sdm_t  *aIK = cs_sdm_get_block(a, i, k);
746         cs_sdm_t  *bJK = cs_sdm_get_block(b, j, k);
747 
748         cs_sdm_multiply_rowrow(aIK, bJK, cIJ);
749 
750       } /* Loop on common blocks between a and b */
751     } /* Loop on b row blocks */
752   } /* Loop on a row blocks */
753 
754 }
755 
756 /*----------------------------------------------------------------------------*/
757 /*!
758  * \brief    Compute a row-row matrix product of a and b. It is basically equal
759  *           to the classical a*b^T. It is a fast (matrices are row-major) way
760  *           of computing a*b if b is symmetric or if b^T is given.
761  *           Case of matrices defined by block.
762  *           Results is known to be symmetric.
763  *
764  * \param[in]      a    local matrix to use
765  * \param[in]      b    local matrix to use
766  * \param[in, out] c    result of the local matrix-product (already allocated)
767  *                      is updated
768  */
769 /*----------------------------------------------------------------------------*/
770 
771 void
cs_sdm_block_multiply_rowrow_sym(const cs_sdm_t * a,const cs_sdm_t * b,cs_sdm_t * c)772 cs_sdm_block_multiply_rowrow_sym(const cs_sdm_t   *a,
773                                  const cs_sdm_t   *b,
774                                  cs_sdm_t         *c)
775 {
776   /* Sanity check */
777   assert(a != NULL && b != NULL && c != NULL);
778   assert(a->flag & CS_SDM_BY_BLOCK);
779   assert(b->flag & CS_SDM_BY_BLOCK);
780   assert(c->flag & CS_SDM_BY_BLOCK);
781   assert(a->n_cols == b->n_cols &&
782          a->n_rows == c->n_rows &&
783          c->n_cols == b->n_rows);
784 
785   const cs_sdm_block_t  *a_desc = a->block_desc;
786   const cs_sdm_block_t  *b_desc = b->block_desc;
787 
788   assert(a_desc->n_col_blocks == b_desc->n_col_blocks &&
789          a_desc->n_row_blocks == c->block_desc->n_row_blocks &&
790          c->block_desc->n_col_blocks == b_desc->n_row_blocks);
791 
792   for (short int i = 0; i < a_desc->n_row_blocks; i++) {
793 
794     for (short int j = i; j < b_desc->n_row_blocks; j++) {
795 
796       cs_sdm_t  *cIJ = cs_sdm_get_block(c, i, j);
797 
798       for (short int k = 0; k < a_desc->n_col_blocks; k++) {
799 
800         cs_sdm_t  *aIK = cs_sdm_get_block(a, i, k);
801         cs_sdm_t  *bJK = cs_sdm_get_block(b, j, k);
802 
803         cs_sdm_multiply_rowrow(aIK, bJK, cIJ);
804 
805       } /* Loop on common blocks between a and b */
806 
807     } /* Loop on b row blocks */
808   } /* Loop on a row blocks */
809 
810   for (short int i = 0; i < a_desc->n_row_blocks; i++)
811     for (short int j = i + 1; j < b_desc->n_row_blocks; j++)
812       cs_sdm_transpose_and_update(cs_sdm_get_block(c, i, j),
813                                   cs_sdm_get_block(c, j, i));
814 
815 }
816 
817 /*----------------------------------------------------------------------------*/
818 /*!
819  * \brief   Compute a dense matrix-vector product for a small square matrix
820  *          mv has been previously allocated
821  *
822  * \param[in]      mat    local matrix to use
823  * \param[in]      vec    local vector to use
824  * \param[in, out] mv result of the local matrix-vector product
825  */
826 /*----------------------------------------------------------------------------*/
827 
828 void
cs_sdm_square_matvec(const cs_sdm_t * mat,const cs_real_t * vec,cs_real_t * mv)829 cs_sdm_square_matvec(const cs_sdm_t    *mat,
830                      const cs_real_t   *vec,
831                      cs_real_t         *mv)
832 {
833   /* Sanity checks */
834   assert(mat != NULL && vec != NULL && mv != NULL);
835   assert(mat->n_rows == mat->n_cols);
836 
837   const int  n = mat->n_rows;
838 
839   /* Initialize mv */
840   const cs_real_t  v = vec[0];
841   for (short int i = 0; i < n; i++)
842     mv[i] = v*mat->val[i*n];
843 
844   /* Increment mv */
845   for (short int i = 0; i < n; i++) {
846     const cs_real_t *m_i = mat->val + i*n;
847     for (short int j = 1; j < n; j++)
848       mv[i] += m_i[j] * vec[j];
849   }
850 
851 }
852 
853 /*----------------------------------------------------------------------------*/
854 /*!
855  * \brief   Compute a dense matrix-vector product for a rectangular matrix
856  *          mv has been previously allocated
857  *
858  * \param[in]      mat    local matrix to use
859  * \param[in]      vec    local vector to use (size = mat->n_cols)
860  * \param[in, out] mv     result of the operation (size = mat->n_rows)
861  */
862 /*----------------------------------------------------------------------------*/
863 
864 void
cs_sdm_matvec(const cs_sdm_t * mat,const cs_real_t * vec,cs_real_t * mv)865 cs_sdm_matvec(const cs_sdm_t    *mat,
866               const cs_real_t   *vec,
867               cs_real_t         *mv)
868 {
869   /* Sanity checks */
870   assert(mat != NULL && vec != NULL && mv != NULL);
871 
872   if (mat->n_rows == mat->n_cols) {
873     cs_sdm_square_matvec(mat, vec, mv);
874     return;
875   }
876 
877   const short int  nr = mat->n_rows;
878   const short int  nc = mat->n_cols;
879 
880   /* Initialize mv with the first column */
881   const cs_real_t  v = vec[0];
882   for (short int i = 0; i < nr; i++)
883     mv[i] = v*mat->val[i*nc];
884 
885   /* Increment mv */
886   for (short int i = 0; i < nr; i++) {
887     cs_real_t *m_i = mat->val + i*nc;
888     for (short int j = 1; j < nc; j++)
889       mv[i] +=  m_i[j] * vec[j];
890   }
891 
892 }
893 
894 /*----------------------------------------------------------------------------*/
895 /*!
896  * \brief   Compute a dense matrix-vector product for a rectangular matrix
897  *          mv has been previously allocated and initialized
898  *          Thus mv is updated inside this function
899  *
900  * \param[in]      mat    local matrix to use
901  * \param[in]      vec    local vector to use (size = mat->n_cols)
902  * \param[in, out] mv     result of the operation (size = mat->n_rows)
903  */
904 /*----------------------------------------------------------------------------*/
905 
906 void
cs_sdm_update_matvec(const cs_sdm_t * mat,const cs_real_t * vec,cs_real_t * mv)907 cs_sdm_update_matvec(const cs_sdm_t    *mat,
908                      const cs_real_t   *vec,
909                      cs_real_t         *mv)
910 {
911   /* Sanity checks */
912   assert(mat != NULL && vec != NULL && mv != NULL);
913 
914   const short int  nr = mat->n_rows;
915   const short int  nc = mat->n_cols;
916 
917   /* Update mv */
918   for (short int i = 0; i < nr; i++) {
919     cs_real_t *m_i = mat->val + i*nc;
920     for (short int j = 0; j < nc; j++)
921       mv[i] +=  m_i[j] * vec[j];
922   }
923 
924 }
925 
926 /*----------------------------------------------------------------------------*/
927 /*!
928  * \brief   Compute a dense matrix-vector product for a rectangular matrix
929  *          which is transposed.
930  *          mv has been previously allocated. mv is updated inside this
931  *          function. Don't forget to initialize mv if needed.
932  *
933  * \param[in]      mat    local matrix to use
934  * \param[in]      vec    local vector to use (size = mat->n_cols)
935  * \param[in, out] mv     result of the operation (size = mat->n_rows)
936  */
937 /*----------------------------------------------------------------------------*/
938 
939 void
cs_sdm_matvec_transposed(const cs_sdm_t * mat,const cs_real_t * vec,cs_real_t * mv)940 cs_sdm_matvec_transposed(const cs_sdm_t    *mat,
941                          const cs_real_t   *vec,
942                          cs_real_t         *mv)
943 {
944   /* Sanity checks */
945   assert(mat != NULL && vec != NULL && mv != NULL);
946 
947   const short int  nr = mat->n_rows;
948   const short int  nc = mat->n_cols;
949 
950   /* Update mv */
951   for (short int i = 0; i < nr; i++) {
952     const cs_real_t *m_i = mat->val + i*nc;
953     const cs_real_t v = vec[i];
954     for (short int j = 0; j < nc; j++)
955       mv[j] +=  m_i[j] * v;
956   }
957 
958 }
959 
960 /*----------------------------------------------------------------------------*/
961 /*!
962  * \brief   Add two matrices defined by block: loc += add
963  *
964  * \param[in, out] mat   local matrix storing the result
965  * \param[in]      add   values to add to mat
966  */
967 /*----------------------------------------------------------------------------*/
968 
969 void
cs_sdm_block_add(cs_sdm_t * mat,const cs_sdm_t * add)970 cs_sdm_block_add(cs_sdm_t        *mat,
971                  const cs_sdm_t  *add)
972 {
973   if (mat == NULL || add == NULL)
974     return;
975 
976   const cs_sdm_block_t  *mat_desc = mat->block_desc;
977 
978   assert(add->block_desc != NULL && mat_desc != NULL);
979   assert(add->block_desc->n_row_blocks == mat_desc->n_row_blocks);
980   assert(add->block_desc->n_col_blocks == mat_desc->n_col_blocks);
981 
982   for (short int bi = 0; bi < mat_desc->n_row_blocks; bi++) {
983     for (short int bj = 0; bj < mat_desc->n_col_blocks; bj++) {
984 
985       cs_sdm_t  *mat_ij = cs_sdm_get_block(mat, bi, bj);
986       const cs_sdm_t  *add_ij = cs_sdm_get_block(add, bi, bj);
987 
988       cs_sdm_add(mat_ij, add_ij);
989 
990     } /* Loop on column blocks */
991   } /* Loop on row blocks */
992 
993 }
994 
995 /*----------------------------------------------------------------------------*/
996 /*!
997  * \brief   Add two matrices defined by block: loc += mult_coef * add
998  *
999  * \param[in, out] mat         local matrix storing the result
1000  * \param[in]      mult_coef   multiplicative coefficient
1001  * \param[in]      add         values to add to mat
1002  */
1003 /*----------------------------------------------------------------------------*/
1004 
1005 void
cs_sdm_block_add_mult(cs_sdm_t * mat,cs_real_t mult_coef,const cs_sdm_t * add)1006 cs_sdm_block_add_mult(cs_sdm_t        *mat,
1007                       cs_real_t        mult_coef,
1008                       const cs_sdm_t  *add)
1009 {
1010   if (mat == NULL || add == NULL)
1011     return;
1012 
1013   const cs_sdm_block_t  *add_desc = add->block_desc;
1014   const cs_sdm_block_t  *mat_desc = mat->block_desc;
1015 
1016   CS_UNUSED(add_desc);          /* Only in debug mode */
1017   assert(add_desc != NULL && mat_desc != NULL);
1018   assert(add_desc->n_row_blocks == mat_desc->n_row_blocks);
1019   assert(add_desc->n_col_blocks == mat_desc->n_col_blocks);
1020 
1021   for (short int bi = 0; bi < mat_desc->n_row_blocks; bi++) {
1022     for (short int bj = 0; bj < mat_desc->n_col_blocks; bj++) {
1023 
1024       cs_sdm_t  *mat_ij = cs_sdm_get_block(mat, bi, bj);
1025       const cs_sdm_t  *add_ij = cs_sdm_get_block(add, bi, bj);
1026 
1027       cs_sdm_add_mult(mat_ij, mult_coef, add_ij);
1028 
1029     } /* Loop on column blocks */
1030   } /* Loop on row blocks */
1031 
1032 }
1033 
1034 /*----------------------------------------------------------------------------*/
1035 /*!
1036  * \brief   Compute a dense matrix-vector product for a rectangular matrix
1037  *          defined by block
1038  *          mv has been previously allocated
1039  *
1040  * \param[in]      mat    local matrix to use
1041  * \param[in]      vec    local vector to use (size = mat->n_cols)
1042  * \param[in, out] mv     result of the operation (size = mat->n_rows)
1043  */
1044 /*----------------------------------------------------------------------------*/
1045 
1046 void
cs_sdm_block_matvec(const cs_sdm_t * mat,const cs_real_t * vec,cs_real_t * mv)1047 cs_sdm_block_matvec(const cs_sdm_t    *mat,
1048                     const cs_real_t   *vec,
1049                     cs_real_t         *mv)
1050 {
1051   /* Sanity checks */
1052   assert(mat != NULL && vec != NULL && mv != NULL);
1053 
1054   if (mat == NULL)
1055     return;
1056 
1057   const cs_sdm_block_t  *mat_desc = mat->block_desc;
1058 
1059   assert(mat_desc != NULL);
1060   memset(mv, 0, mat->n_rows * sizeof(cs_real_t));
1061 
1062   int  c_shift = 0, r_shift = 0;
1063 
1064   for (short int bi = 0; bi < mat_desc->n_row_blocks; bi++) {
1065 
1066     cs_real_t  *_mv = mv + r_shift;
1067     int  n_rows = 0;
1068 
1069     c_shift = 0;
1070     for (short int bj = 0; bj < mat_desc->n_col_blocks; bj++) {
1071 
1072       cs_sdm_t  *mat_ij = cs_sdm_get_block(mat, bi, bj);
1073 
1074       cs_sdm_update_matvec(mat_ij, vec + c_shift, _mv);
1075       c_shift += mat_ij->n_cols;
1076       n_rows = mat_ij->n_rows;
1077 
1078     } /* Loop on column blocks */
1079 
1080     r_shift += n_rows;
1081 
1082   } /* Loop on row blocks */
1083 
1084   assert(r_shift == mat->n_rows);
1085   assert(c_shift == mat->n_cols);
1086 
1087 }
1088 
1089 /*----------------------------------------------------------------------------*/
1090 /*!
1091  * \brief   Add two small dense matrices: loc += add
1092  *
1093  * \param[in, out] mat   local matrix storing the result
1094  * \param[in]      add   values to add to mat
1095  */
1096 /*----------------------------------------------------------------------------*/
1097 
1098 void
cs_sdm_add(cs_sdm_t * mat,const cs_sdm_t * add)1099 cs_sdm_add(cs_sdm_t        *mat,
1100            const cs_sdm_t  *add)
1101 {
1102   /* Sanity checks */
1103   assert(mat != NULL && add != NULL);
1104   assert(mat->n_rows == add->n_rows);
1105   assert(mat->n_cols == add->n_cols);
1106 
1107   for (int i = 0; i < mat->n_rows*mat->n_cols; i++)
1108     mat->val[i] += add->val[i];
1109 }
1110 
1111 /*----------------------------------------------------------------------------*/
1112 /*!
1113  * \brief   Add two small dense matrices: loc += alpha*add
1114  *
1115  * \param[in, out] mat     local matrix storing the result
1116  * \param[in]      alpha   multiplicative coefficient
1117  * \param[in]      add     values to add to mat
1118  */
1119 /*----------------------------------------------------------------------------*/
1120 
1121 void
cs_sdm_add_mult(cs_sdm_t * mat,cs_real_t alpha,const cs_sdm_t * add)1122 cs_sdm_add_mult(cs_sdm_t        *mat,
1123                 cs_real_t        alpha,
1124                 const cs_sdm_t  *add)
1125 {
1126   /* Sanity checks */
1127   assert(mat != NULL && add != NULL);
1128   assert(mat->n_rows == add->n_rows);
1129   assert(mat->n_cols == add->n_cols);
1130 
1131   if (fabs(alpha) < FLT_MIN)
1132     return;
1133 
1134   for (int i = 0; i < mat->n_rows*mat->n_cols; i++)
1135     mat->val[i] += alpha * add->val[i];
1136 }
1137 
1138 /*----------------------------------------------------------------------------*/
1139 /*!
1140  * \brief   Define a new matrix by adding the given matrix with its transpose.
1141  *          Keep the transposed matrix for a future use.
1142  *
1143  * \param[in, out] mat   local matrix to transpose and add
1144  * \param[in, out] tr    transposed of the local matrix
1145  */
1146 /*----------------------------------------------------------------------------*/
1147 
1148 void
cs_sdm_square_add_transpose(cs_sdm_t * mat,cs_sdm_t * tr)1149 cs_sdm_square_add_transpose(cs_sdm_t  *mat,
1150                             cs_sdm_t  *tr)
1151 {
1152   /* Sanity check */
1153   assert(mat != NULL && tr != NULL);
1154   assert(mat->n_rows <= tr->n_max_cols && mat->n_cols <= tr->n_max_rows);
1155   assert(mat->n_rows == mat->n_cols);
1156 
1157   if (mat->n_rows < 1 || mat->n_cols < 1)
1158     return;
1159 
1160   tr->n_rows = mat->n_cols;
1161   tr->n_cols = mat->n_rows;
1162 
1163   for (short int i = 0; i < mat->n_rows; i++) {
1164 
1165     const int  ii = i*mat->n_cols + i;
1166     tr->val[ii] = mat->val[ii];
1167     mat->val[ii] *= 2;
1168 
1169     for (short int j = i+1; j < mat->n_cols; j++) {
1170 
1171       const int  ij = i*mat->n_cols + j;
1172       const int  ji = j*mat->n_cols + i;
1173 
1174       tr->val[ji] = mat->val[ij];
1175       tr->val[ij] = mat->val[ji];
1176       mat->val[ij] += tr->val[ij];
1177       mat->val[ji] += tr->val[ji];
1178 
1179     }
1180   }
1181 
1182 }
1183 
1184 /*----------------------------------------------------------------------------*/
1185 /*!
1186  * \brief   Set the given matrix to two times its symmetric part
1187  *          mat --> mat + mat_tr = 2*symm(mat)
1188  *
1189  * \param[in, out] mat   small dense matrix to transform
1190  */
1191 /*----------------------------------------------------------------------------*/
1192 
1193 void
cs_sdm_square_2symm(cs_sdm_t * mat)1194 cs_sdm_square_2symm(cs_sdm_t   *mat)
1195 {
1196   /* Sanity check */
1197   assert(mat != NULL);
1198   assert(mat->n_rows == mat->n_cols);
1199 
1200   if (mat->n_rows < 1)
1201     return;
1202 
1203   for (short int i = 0; i < mat->n_rows; i++) {
1204 
1205     cs_real_t  *mi = mat->val + i*mat->n_cols;
1206     for (short int j = i; j < mat->n_cols; j++) {
1207       int  ji = j*mat->n_rows + i;
1208       mi[j] += mat->val[ji];
1209       mat->val[ji] = mi[j];
1210     } /* col j */
1211 
1212   } /* row i */
1213 
1214 }
1215 
1216 /*----------------------------------------------------------------------------*/
1217 /*!
1218  * \brief   Set the given matrix into its anti-symmetric part
1219  *
1220  * \param[in, out] mat   small dense matrix to transform
1221  */
1222 /*----------------------------------------------------------------------------*/
1223 
1224 void
cs_sdm_square_asymm(cs_sdm_t * mat)1225 cs_sdm_square_asymm(cs_sdm_t   *mat)
1226 {
1227   /* Sanity check */
1228   assert(mat != NULL);
1229   assert(mat->n_rows == mat->n_cols);
1230 
1231   if (mat->n_rows < 1)
1232     return;
1233 
1234   for (short int i = 0; i < mat->n_rows; i++) {
1235 
1236     cs_real_t  *mi = mat->val + i*mat->n_cols;
1237 
1238     mi[i] = 0;
1239 
1240     for (short int j = i+1; j < mat->n_cols; j++) {
1241 
1242       int  ji = j*mat->n_rows + i;
1243 
1244       mi[j] = 0.5*(mi[j] - mat->val[ji]);
1245       mat->val[ji] = -mi[j];
1246 
1247     }
1248   }
1249 
1250 }
1251 
1252 /*----------------------------------------------------------------------------*/
1253 /*!
1254  * \brief   Set the given block matrix into its anti-symmetric part
1255  *
1256  * \param[in, out] mat   small dense matrix defined by block to transform
1257  */
1258 /*----------------------------------------------------------------------------*/
1259 
1260 void
cs_sdm_block_square_asymm(cs_sdm_t * mat)1261 cs_sdm_block_square_asymm(cs_sdm_t   *mat)
1262 {
1263   /* Sanity check */
1264   assert(mat != NULL);
1265   assert(mat->flag & CS_SDM_BY_BLOCK);
1266   assert(mat->n_rows == mat->n_cols);
1267 
1268   if (mat->n_rows < 1)
1269     return;
1270 
1271   const cs_sdm_block_t  *matb = mat->block_desc;
1272   if (matb->n_row_blocks < 1)
1273     return;
1274   assert(matb->n_row_blocks == matb->n_col_blocks);
1275 
1276   for (int bi = 0; bi < matb->n_row_blocks; bi++) {
1277 
1278     /* Diagonal block */
1279     cs_sdm_t  *bII = cs_sdm_get_block(mat, bi, bi);
1280 
1281     cs_sdm_square_asymm(bII);
1282 
1283     for (int bj = bi+1; bj < matb->n_col_blocks; bj++) {
1284 
1285       cs_sdm_t  *bIJ = cs_sdm_get_block(mat, bi, bj);
1286       cs_sdm_t  *bJI = cs_sdm_get_block(mat, bj, bi);
1287 
1288       assert(bIJ->n_rows == bJI->n_cols);
1289       assert(bIJ->n_cols == bJI->n_rows);
1290 
1291       for (short int i = 0; i < bIJ->n_rows; i++) {
1292         cs_real_t  *bIJ_i = bIJ->val + i*bIJ->n_cols;
1293         for (short int j = 0; j < bIJ->n_cols; j++) {
1294 
1295           int  ji = j*bIJ->n_rows + i;
1296 
1297           bIJ_i[j] = 0.5*(bIJ_i[j] - bJI->val[ji]);
1298           bJI->val[ji] = -bIJ_i[j];
1299 
1300         } /* bIJ columns */
1301       } /* bIJ rows */
1302 
1303     } /* Loop on column blocks */
1304 
1305   } /* Loop on row blocks */
1306 
1307 }
1308 
1309 /*----------------------------------------------------------------------------*/
1310 /*!
1311  * \brief  Decompose a matrix into the matrix product Q.R
1312  *         Case of a 3x3 symmetric matrix
1313  *
1314  * \param[in]      m      matrix values
1315  * \param[in, out] Qt     transposed of matrix Q
1316  * \param[in, out] R      vector of the coefficient of the decomposition
1317  *
1318  * \note: R is an upper triangular matrix. Stored in a compact way.
1319  *
1320  *    j= 0, 1, 2
1321  *  i=0| 0| 1| 2|
1322  *  i=1   | 4| 5|
1323  *  i=2        6|
1324  */
1325 /*----------------------------------------------------------------------------*/
1326 
1327 void
cs_sdm_33_sym_qr_compute(const cs_real_t m[9],cs_real_t Qt[9],cs_real_t R[6])1328 cs_sdm_33_sym_qr_compute(const cs_real_t   m[9],
1329                          cs_real_t         Qt[9],
1330                          cs_real_t         R[6])
1331 {
1332   /* Sanity checks */
1333   assert(m != NULL && Qt != NULL && R != NULL);
1334 
1335   /* Work as if Q is defined column by column (instead of row). At the end, we
1336      transpose */
1337 
1338   cs_nvec3_t  tmp;
1339   cs_real_3_t  qhat;
1340 
1341   cs_nvec3(m, &tmp); /* normalize the first row (i.e first column) */
1342   R[0] = tmp.meas; /* R00 */
1343   assert(tmp.meas > cs_math_zero_threshold);
1344   cs_real_t  *q0 = Qt;
1345   for (int k = 0; k < 3; k++)
1346     q0[k] = tmp.unitv[k];
1347 
1348   const cs_real_t  *m1 = m + 3;    /* qhat = m1 */
1349   R[1] = cs_math_3_dot_product(q0, m1); /* R01 */
1350   for (int k = 0; k < 3; k++)
1351     qhat[k] = m1[k] - R[1]*q0[k];
1352 
1353   cs_nvec3(qhat, &tmp);
1354   R[3] = tmp.meas; /* R11 */
1355   assert(tmp.meas > cs_math_zero_threshold);
1356   cs_real_t  *q1 = Qt + 3;
1357   for (int k = 0; k < 3; k++)
1358     q1[k] = tmp.unitv[k];
1359 
1360   const cs_real_t  *m2 = m + 6;    /* qhat = m2 */
1361   R[2] = cs_math_3_dot_product(q0, m2); /* R02 */
1362   for (int k = 0; k < 3; k++)
1363     qhat[k] = m2[k] - R[2]*q0[k];
1364   R[4] = cs_math_3_dot_product(q1, qhat); /* R12 */
1365   for (int k = 0; k < 3; k++)
1366     qhat[k] -= R[4]*q1[k];
1367 
1368   cs_nvec3(qhat, &tmp);
1369   R[5] = tmp.meas;
1370   assert(tmp.meas > cs_math_zero_threshold);
1371   cs_real_t *q2 = Qt + 6;
1372   for (int k = 0; k < 3; k++)
1373     q2[k] = tmp.unitv[k];
1374 }
1375 
1376 /*----------------------------------------------------------------------------*/
1377 /*!
1378  * \brief  LU factorization of a small dense 3x3 matrix.
1379  *
1380  * \param[in]      m        pointer to a cs_sdm_t structure
1381  * \param[in, out] facto    compact storage of coefficients for the LU
1382  *                          factorization
1383  *
1384  * \note: facto stores L the lower triangular matrix (without its diagonal
1385  *        entries assumed to be equal to 1) and U the upper triangular matrix.
1386  */
1387 /*----------------------------------------------------------------------------*/
1388 
1389 void
cs_sdm_33_lu_compute(const cs_sdm_t * m,cs_real_t facto[9])1390 cs_sdm_33_lu_compute(const cs_sdm_t   *m,
1391                      cs_real_t         facto[9])
1392 {
1393   /* Sanity check */
1394   assert(m != NULL && facto != NULL);
1395   assert(m->n_cols == m->n_rows);
1396 
1397   /* j=0: first row */
1398   const cs_real_t  d00 = m->val[0];
1399   if (fabs(d00) < cs_math_zero_threshold)
1400     bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1401   const cs_real_t  invd00 = 1./d00;
1402 
1403   facto[0] = d00, facto[1] = m->val[1], facto[2] = m->val[2];     /* 1st row */
1404   facto[3] =  m->val[3]*invd00;                                   /* L10 */
1405   facto[4] =  m->val[4] - facto[3]*facto[1];                      /* U11 */
1406   facto[5] =  m->val[5] - facto[3]*facto[2];                      /* U12 */
1407   facto[6] =  m->val[6]*invd00;                                   /* L20 */
1408   facto[7] = (m->val[7] - facto[6]*m->val[1])/facto[4];           /* L21 */
1409   facto[8] =  m->val[8] - facto[6]*m->val[2] - facto[7]*facto[5]; /* U22 */
1410 }
1411 
1412 /*----------------------------------------------------------------------------*/
1413 /*!
1414  * \brief  LU factorization of a small dense matrix. Small means that the
1415  *         number m->n_rows is less than 100 for instance.
1416  *
1417  * \param[in]      m        pointer to a cs_sdm_t structure
1418  * \param[in, out] facto    compact storage of coefficients for the LU
1419  *                          factorization (should be allocated to the right
1420  *                          size, i.e. m->n_rows*m->n_rows)
1421  *
1422  * \note: facto stores L the lower triangular matrix (without its diagonal
1423  *        entries assumed to be equal to 1) and U the upper triangular matrix.
1424  */
1425 /*----------------------------------------------------------------------------*/
1426 
1427 void
cs_sdm_lu_compute(const cs_sdm_t * m,cs_real_t facto[])1428 cs_sdm_lu_compute(const cs_sdm_t   *m,
1429                   cs_real_t         facto[])
1430 {
1431   /* Sanity check */
1432   assert(m != NULL && facto != NULL);
1433   assert(m->n_cols == m->n_rows);
1434 
1435   const cs_lnum_t  n = m->n_rows;
1436 
1437   /* Initialization */
1438   memcpy(facto, m->val, n*n*sizeof(cs_real_t));
1439 
1440   /* Each step work on a smaller block */
1441   for (cs_lnum_t k = 0; k < n-1; k++) {
1442 
1443     /* Pivot */
1444     cs_real_t  pivot = facto[k*(n+1)];
1445     if (fabs(pivot) < cs_math_zero_threshold)
1446       bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1447     const cs_real_t  invp = 1./pivot;
1448 
1449     for (cs_lnum_t i = k+1; i < m->n_rows; i++) { /* Loop on rows */
1450 
1451       cs_real_t  *pr_fact = facto + (i-1)*n;
1452       cs_real_t  *cr_fact = pr_fact + n;
1453 
1454       /* L-part: lower part of the (i,i-1) entry */
1455       cr_fact[k] *= invp;
1456       const double  lval = cr_fact[k];
1457 
1458       /* U-part */
1459       for (cs_lnum_t j = k+1; j < n; j++) {
1460         cr_fact[j] -= lval*pr_fact[j];
1461 
1462       } /* Loop on j (columns) */
1463 
1464     } /* Loop on i (rows) */
1465 
1466   } /* Loop on k (block size) */
1467 
1468 }
1469 
1470 /*----------------------------------------------------------------------------*/
1471 /*!
1472  * \brief  Solve a system A.sol = rhs using a LU factorization of A (a small
1473  *         3x3 dense matrix).
1474  *
1475  * \param[in]       facto    compact storage of coefficients for the LU
1476  *                           factorization (should be allocated to the right
1477  *                           size, i.e. n_rows*n_rows)
1478  * \param[in]       rhs      right-hand side
1479  * \param[in, out]  sol      solution
1480  *
1481  * \note: facto stores L the lower triangular matrix (without its diagonal
1482  *        entries assumed to be equal to 1) and U the upper triangular matrix.
1483  */
1484 /*----------------------------------------------------------------------------*/
1485 
1486 void
cs_sdm_33_lu_solve(const cs_real_t facto[9],const cs_real_t rhs[3],cs_real_t sol[3])1487 cs_sdm_33_lu_solve(const cs_real_t    facto[9],
1488                    const cs_real_t    rhs[3],
1489                    cs_real_t          sol[3])
1490 {
1491   /* Sanity check */
1492   assert(facto != NULL && rhs != NULL && sol != NULL);
1493 
1494   /* Forward pass */
1495   sol[0] = rhs[0];
1496   sol[1] = rhs[1] - facto[3]*sol[0];
1497   sol[2] = rhs[2] - facto[6]*sol[0] - facto[7]*sol[1];
1498 
1499   /* Backward pass */
1500   sol[2] = sol[2]/facto[8];
1501   sol[1] = (sol[1] - facto[5]*sol[2])/facto[4];
1502   sol[0] = (sol[0] - facto[2]*sol[2] - facto[1]*sol[1])/facto[0];
1503 }
1504 
1505 /*----------------------------------------------------------------------------*/
1506 /*!
1507  * \brief  Solve a system A.sol = rhs using a LU factorization of A (a small
1508  *         dense matrix).
1509  *
1510  * \param[in]       n_rows   dimension of the system to solve
1511  * \param[in]       facto    compact storage of coefficients for the LU
1512  *                           factorization (should be allocated to the right
1513  *                           size, i.e. n_rows*n_rows)
1514  * \param[in]       rhs      right-hand side
1515  * \param[in, out]  sol      solution
1516  *
1517  * \note: facto stores L the lower triangular matrix (without its diagonal
1518  *        entries assumed to be equal to 1) and U the upper triangular matrix.
1519  */
1520 /*----------------------------------------------------------------------------*/
1521 
1522 void
cs_sdm_lu_solve(cs_lnum_t n_rows,const cs_real_t facto[],const cs_real_t * rhs,cs_real_t * sol)1523 cs_sdm_lu_solve(cs_lnum_t          n_rows,
1524                 const cs_real_t    facto[],
1525                 const cs_real_t   *rhs,
1526                 cs_real_t         *sol)
1527 {
1528   /* Sanity check */
1529   assert(facto != NULL && rhs != NULL && sol != NULL);
1530 
1531   /* Forward pass: L.y = rhs (sol stores the values for y) */
1532   for (cs_lnum_t  i = 0; i < n_rows; i++) {
1533 
1534     sol[i] = rhs[i];
1535     const cs_real_t  *_fact = facto + i*n_rows;
1536     for (cs_lnum_t j = 0; j < i; j++) {
1537       sol[i] -= sol[j]*_fact[j];
1538     }
1539 
1540   }
1541 
1542   /* Backward pass: U.sol = y */
1543   for (cs_lnum_t i = n_rows-1; i >= 0; i--) { /* Loop on rows */
1544     for (cs_lnum_t j = n_rows-1; j > i; j--) { /* Loop on columns */
1545 
1546       sol[i] -= sol[j]*facto[i*n_rows + j];
1547 
1548     }
1549     sol[i] /= facto[i*(n_rows + 1)];
1550   }
1551 
1552 }
1553 
1554 /*----------------------------------------------------------------------------*/
1555 /*!
1556  * \brief  LDL^T: Modified Cholesky decomposition of a 3x3 SPD matrix.
1557  *         For more reference, see for instance
1558  *   http://mathforcollege.com/nm/mws/gen/04sle/mws_gen_sle_txt_cholesky.pdf
1559  *
1560  * \param[in]      m        pointer to a cs_sdm_t structure
1561  * \param[in, out] facto    vector of the coefficient of the decomposition
1562  *
1563  * \note: facto is a lower triangular matrix. The first value of the
1564  *        j-th (zero-based) row is easily accessed: its index is j*(j+1)/2
1565  *        (cf sum of the first j natural numbers). Instead of 1 on the diagonal
1566  *        we store the inverse of D mat in the L.D.L^T decomposition
1567  */
1568 /*----------------------------------------------------------------------------*/
1569 
1570 void
cs_sdm_33_ldlt_compute(const cs_sdm_t * m,cs_real_t facto[6])1571 cs_sdm_33_ldlt_compute(const cs_sdm_t   *m,
1572                        cs_real_t         facto[6])
1573 {
1574   /* Sanity check */
1575   assert(m != NULL && facto != NULL);
1576   assert(m->n_cols == m->n_rows && m->n_cols == 3);
1577 
1578   /* j=0: first row */
1579   const cs_real_t  d00 = m->val[0];
1580   if (fabs(d00) < cs_math_zero_threshold)
1581     bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1582 
1583   facto[0] = 1. / d00;
1584   const cs_real_t  l10 = facto[1] = m->val[1] * facto[0];
1585   const cs_real_t  l20 = facto[3] = m->val[2] * facto[0];
1586 
1587   /* j=1: second row */
1588   const cs_real_t  d11 = m->val[4] - l10*l10 * d00;
1589   if (fabs(d11) < cs_math_zero_threshold)
1590     bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1591   facto[2] = 1. / d11;
1592   const cs_real_t l21 = facto[4] = (m->val[5] - l20*d00*l10) * facto[2];
1593 
1594   /* j=2: third row */
1595   const cs_real_t  d22 = m->val[8] - l20*l20*d00 - l21*l21*d11;
1596   if (fabs(d22) < cs_math_zero_threshold)
1597     bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1598   facto[5] = 1. / d22;
1599 }
1600 
1601 /*----------------------------------------------------------------------------*/
1602 /*!
1603  * \brief  LDL^T: Modified Cholesky decomposition of a 4x4 SPD matrix.
1604  *         For more reference, see for instance
1605  *   http://mathforcollege.com/nm/mws/gen/04sle/mws_gen_sle_txt_cholesky.pdf
1606  *
1607  * \param[in]      m        pointer to a cs_sdm_t structure
1608  * \param[in, out] facto    vector of the coefficient of the decomposition
1609  *
1610  * \note: facto is a lower triangular matrix. The first value of the
1611  *        j-th (zero-based) row is easily accessed: its index is j*(j+1)/2
1612  *        (cf sum of the first j natural numbers). Instead of 1 on the diagonal
1613  *        we store the inverse of D mat in the L.D.L^T decomposition
1614  */
1615 /*----------------------------------------------------------------------------*/
1616 
1617 void
cs_sdm_44_ldlt_compute(const cs_sdm_t * m,cs_real_t facto[10])1618 cs_sdm_44_ldlt_compute(const cs_sdm_t   *m,
1619                        cs_real_t         facto[10])
1620 {
1621   /* Sanity check */
1622   assert(m != NULL && facto != NULL);
1623   assert(m->n_cols == m->n_rows && m->n_cols == 4);
1624 
1625   /* j=0: first row */
1626   const cs_real_t  d00 = m->val[0];
1627   if (fabs(d00) < cs_math_zero_threshold)
1628     bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1629 
1630   facto[0] = 1. / d00;
1631   const cs_real_t  l10 = facto[1] = m->val[1] * facto[0];
1632   const cs_real_t  l20 = facto[3] = m->val[2] * facto[0];
1633   const cs_real_t  l30 = facto[6] = m->val[3] * facto[0];
1634 
1635   /* j=1: second row */
1636   const cs_real_t  d11 = m->val[5] - l10*l10 * d00;
1637   if (fabs(d11) < cs_math_zero_threshold)
1638     bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1639   facto[2] = 1. / d11;
1640   const cs_real_t  l21 = facto[4] = (m->val[6] - l20*d00*l10) * facto[2];
1641   const cs_real_t  l31 = facto[7] = (m->val[7] - l30*d00*l10) * facto[2];
1642 
1643   /* j=2: third row */
1644   const cs_real_t  d22 = m->val[10] - l20*l20*d00 - l21*l21*d11;
1645   if (fabs(d22) < cs_math_zero_threshold)
1646     bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1647   facto[5] = 1. / d22;
1648   const cs_real_t  l32 = facto[8] =
1649     (m->val[11] - l30*d00*l20 - l31*d11*l21) * facto[5];
1650 
1651   /* j=3: row 4 */
1652   const cs_real_t  d33 = m->val[15] - l30*l30*d00 - l31*l31*d11 - l32*l32*d22;
1653   if (fabs(d33) < cs_math_zero_threshold)
1654     bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1655   facto[9] = 1. / d33;
1656 }
1657 
1658 /*----------------------------------------------------------------------------*/
1659 /*!
1660  * \brief  LDL^T: Modified Cholesky decomposition of a 6x6 SPD matrix.
1661  *         For more reference, see for instance
1662  *   http://mathforcollege.com/nm/mws/gen/04sle/mws_gen_sle_txt_cholesky.pdf
1663  *
1664  * \param[in]      m        pointer to a cs_sdm_t structure
1665  * \param[in, out] facto    vector of the coefficient of the decomposition
1666  *
1667  * \note: facto is a lower triangular matrix. The first value of the
1668  *        j-th (zero-based) row is easily accessed: its index is j*(j+1)/2
1669  *        (cf sum of the first j natural numbers). Instead of 1 on the diagonal
1670  *        we store the inverse of D mat in the L.D.L^T decomposition
1671  */
1672 /*----------------------------------------------------------------------------*/
1673 
1674 void
cs_sdm_66_ldlt_compute(const cs_sdm_t * m,cs_real_t facto[21])1675 cs_sdm_66_ldlt_compute(const cs_sdm_t   *m,
1676                        cs_real_t         facto[21])
1677 {
1678   /* Sanity check */
1679   assert(m != NULL && facto != NULL);
1680   assert(m->n_cols == m->n_rows && m->n_cols == 6);
1681 
1682   /* j=0: first row */
1683   const cs_real_t  d00 = m->val[0];
1684   if (fabs(d00) < cs_math_zero_threshold)
1685     bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1686 
1687   facto[0] = 1. / d00;
1688   const cs_real_t  l10 = facto[ 1] = m->val[1] * facto[0];
1689   const cs_real_t  l20 = facto[ 3] = m->val[2] * facto[0];
1690   const cs_real_t  l30 = facto[ 6] = m->val[3] * facto[0];
1691   const cs_real_t  l40 = facto[10] = m->val[4] * facto[0];
1692   const cs_real_t  l50 = facto[15] = m->val[5] * facto[0];
1693 
1694   /* j=1: second row */
1695   const cs_real_t  d11 = m->val[7] - l10*l10 * d00;
1696   if (fabs(d11) < cs_math_zero_threshold)
1697     bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1698   facto[2] = 1. / d11;
1699   const cs_real_t  d0l10 = d00*l10;
1700   const cs_real_t  l21 = facto[ 4] = (m->val[ 8] - l20*d0l10) * facto[2];
1701   const cs_real_t  l31 = facto[ 7] = (m->val[ 9] - l30*d0l10) * facto[2];
1702   const cs_real_t  l41 = facto[11] = (m->val[10] - l40*d0l10) * facto[2];
1703   const cs_real_t  l51 = facto[16] = (m->val[11] - l50*d0l10) * facto[2];
1704 
1705   /* j=2: third row */
1706   const cs_real_t  d22 = m->val[14] - l20*l20*d00 - l21*l21*d11;
1707   if (fabs(d22) < cs_math_zero_threshold)
1708     bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1709   facto[5] = 1. / d22;
1710   const cs_real_t  d1l21 = d11*l21, d0l20 = d00*l20;
1711   const cs_real_t  l32 = facto[ 8] =
1712     (m->val[15] - l30*d0l20 - l31*d1l21) * facto[5];
1713   const cs_real_t  l42 = facto[12] =
1714     (m->val[16] - l30*d0l20 - l31*d1l21) * facto[5];
1715   const cs_real_t  l52 = facto[17] =
1716     (m->val[17] - l30*d0l20 - l31*d1l21) * facto[5];
1717 
1718   /* j=3: row 4 */
1719   const cs_real_t  d33 = m->val[21] - l30*l30*d00 - l31*l31*d11 - l32*l32*d22;
1720   if (fabs(d33) < cs_math_zero_threshold)
1721     bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1722   facto[9] = 1. / d33;
1723   const cs_real_t  d1l31 = d11*l31, d0l30 = d00*l30, d2l32 = d22*l32;
1724   const cs_real_t  l43 = facto[13] =
1725     (m->val[22] - l40*d0l30 - l41*d1l31 - l42*d2l32) * facto[9];
1726   const cs_real_t  l53 = facto[18] =
1727     (m->val[23] - l50*d0l30 - l51*d1l31 - l52*d2l32) * facto[9];
1728 
1729   /* j=4: row 5 */
1730   const cs_real_t  d44 =
1731     m->val[28] - l40*l40*d00 - l41*l41*d11 - l42*l42*d22 - l43*l43*d33;
1732   if (fabs(d44) < cs_math_zero_threshold)
1733     bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1734   facto[14] = 1. / d44;
1735   const cs_real_t  l54 = facto[19] = facto[14] *
1736     (m->val[29] - l50*d00*l40 - l51*d11*l41 - l52*d22*l42 - l53*d33*l43);
1737 
1738   /* j=5: row 6 */
1739   const cs_real_t  d55 = m->val[35]
1740     - l50*l50*d00 - l51*l51*d11 - l52*l52*d22 - l53*l53*d33 - l54*l54*d44;
1741   if (fabs(d55) < cs_math_zero_threshold)
1742     bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1743   facto[20] = 1. / d55;
1744 
1745 }
1746 
1747 /*----------------------------------------------------------------------------*/
1748 /*!
1749  * \brief  LDL^T: Modified Cholesky decomposition of a SPD matrix.
1750  *         For more reference, see for instance
1751  *   http://mathforcollege.com/nm/mws/gen/04sle/mws_gen_sle_txt_cholesky.pdf
1752  *
1753  * \param[in]      m        pointer to a cs_sdm_t structure
1754  * \param[in, out] facto    vector of the coefficient of the decomposition
1755  * \param[in, out] dkk      store temporary the diagonal (size = n_rows)
1756  *
1757  * \note: facto is a lower triangular matrix. The first value of the
1758  *        j-th (zero-based) row is easily accessed: its index is j*(j+1)/2
1759  *        (cf sum of the first j natural numbers). Instead of 1 on the diagonal
1760  *        we store the inverse of D mat in the L.D.L^T decomposition
1761  */
1762 /*----------------------------------------------------------------------------*/
1763 
1764 void
cs_sdm_ldlt_compute(const cs_sdm_t * m,cs_real_t * facto,cs_real_t * dkk)1765 cs_sdm_ldlt_compute(const cs_sdm_t     *m,
1766                     cs_real_t          *facto,
1767                     cs_real_t          *dkk)
1768 {
1769   /* Sanity checks */
1770   assert(m != NULL && facto != NULL);
1771   assert(m->n_cols == m->n_rows);
1772 
1773   const short int n = m->n_cols;
1774 
1775   if (n == 1) {
1776     facto[0] = 1. / m->val[0];
1777     return;
1778   }
1779 
1780   int  rowj_idx = 0;
1781 
1782   /* Factorization (column-major algorithm) */
1783   for (short int j = 0; j < n; j++) {
1784 
1785     rowj_idx += j;
1786     const int  djj_idx = rowj_idx + j;
1787 
1788     switch (j) {
1789 
1790     case 0:  /* Optimization for the first colum */
1791       {
1792         dkk[0] = m->val[0]; /* d00 */
1793 
1794         if (fabs(dkk[0]) < cs_math_zero_threshold)
1795           bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1796         const cs_real_t  inv_d00 = facto[0] = 1. / dkk[0];
1797 
1798         /* l_i0 = a_i0 / d_00 */
1799         short int rowi_idx = rowj_idx;
1800         const cs_real_t  *a_0 = m->val;  /* a_ij = a_ji */
1801         for (short int i = j+1; i < n; i++) { /* Loop on rows */
1802 
1803           rowi_idx += i;
1804           cs_real_t  *l_i = facto + rowi_idx;
1805           l_i[0] = a_0[i] * inv_d00;
1806 
1807         }
1808 
1809       }
1810       break;
1811 
1812     case 1:  /* Optimization for the second colum */
1813       {
1814         /* d_11 = a_11 - l_10^2 * d_00 */
1815         cs_real_t  *l_1 = facto + rowj_idx;
1816 
1817         const cs_real_t  d11 = dkk[1] = m->val[n+1] - l_1[0]*l_1[0]*dkk[0];
1818         if (fabs(d11) < cs_math_zero_threshold)
1819           bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1820 
1821         const cs_real_t  inv_d11 = facto[djj_idx] = 1. / d11;
1822 
1823         /* l_i1 = (a_i1 - l_i0 * d_00 * l_10 ) / d_11 */
1824         short int rowi_idx = rowj_idx;
1825         const cs_real_t  *a_1 = m->val + n;  /* a_i1 = a_1i */
1826         for (short int i = 2; i < n; i++) { /* Loop on rows */
1827 
1828           rowi_idx += i;
1829           cs_real_t  *l_i = facto + rowi_idx;
1830           l_i[1] = (a_1[i] - l_i[0] *  dkk[0] * l_1[0]) * inv_d11;
1831 
1832         }
1833 
1834       }
1835       break;
1836 
1837     default:
1838       {
1839         /* d_jj = a_jj - \sum_{k=0}^{j-1} l_jk^2 * d_kk */
1840         cs_real_t  *l_j = facto + rowj_idx;
1841 
1842         cs_real_t  sum = 0.;
1843         for (short int k = 0; k < j; k++)
1844           sum += l_j[k]*l_j[k] * dkk[k];
1845         const cs_real_t  djj = dkk[j] = m->val[j*n+j] - sum;
1846 
1847         if (fabs(djj) < cs_math_zero_threshold)
1848           bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1849 
1850         const cs_real_t  inv_djj = facto[djj_idx] = 1. / djj;
1851 
1852         /* l_ij = (a_ij - \sum_{k=1}^{j-1} l_ik * d_kk * l_jk ) / d_jj */
1853         short int rowi_idx = rowj_idx;
1854         const cs_real_t  *a_j = m->val + j*n;  /* a_ij = a_ji */
1855         for (short int i = j+1; i < n; i++) { /* Loop on rows */
1856 
1857           rowi_idx += i;
1858           cs_real_t  *l_i = facto + rowi_idx;
1859           sum = 0.;
1860           for (short int k = 0; k < j; k++)
1861             sum += l_i[k] *  dkk[k] * l_j[k];
1862           l_i[j] = (a_j[i] - sum) * inv_djj;
1863 
1864         }
1865 
1866       }
1867       break;
1868     } /* End of switch */
1869 
1870   } /* Loop on column j */
1871 
1872 }
1873 
1874 /*----------------------------------------------------------------------------*/
1875 /*!
1876  * \brief  Solve a 3x3 matrix with a modified Cholesky decomposition (L.D.L^T)
1877  *         The solution should be already allocated
1878  * Ref. http://mathforcollege.com/nm/mws/gen/04sle/mws_gen_sle_txt_cholesky.pdf
1879  *
1880  * \param[in]      facto   vector of the coefficients of the decomposition
1881  * \param[in]      rhs     right-hand side
1882  * \param[in,out]  sol     solution
1883  */
1884 /*----------------------------------------------------------------------------*/
1885 
1886 void
cs_sdm_33_ldlt_solve(const cs_real_t facto[6],const cs_real_t rhs[3],cs_real_t sol[3])1887 cs_sdm_33_ldlt_solve(const cs_real_t    facto[6],
1888                      const cs_real_t    rhs[3],
1889                      cs_real_t          sol[3])
1890 {
1891   /* Sanity check */
1892   assert(facto != NULL && rhs != NULL && sol != NULL);
1893 
1894   sol[0] = rhs[0];
1895   sol[1] = rhs[1] - sol[0]*facto[1];
1896   sol[2] =(rhs[2] - sol[0]*facto[3] - sol[1]*facto[4]) * facto[5];
1897 
1898   sol[1] = sol[1] * facto[2] - facto[4]*sol[2];
1899   sol[0] = sol[0] * facto[0] - facto[1]*sol[1] - facto[3]*sol[2];
1900 }
1901 
1902 /*----------------------------------------------------------------------------*/
1903 /*!
1904  * \brief  Solve a 4x4 matrix with a modified Cholesky decomposition (L.D.L^T)
1905  *         The solution should be already allocated
1906  * Ref. http://mathforcollege.com/nm/mws/gen/04sle/mws_gen_sle_txt_cholesky.pdf
1907  *
1908  * \param[in]      facto   vector of the coefficients of the decomposition
1909  * \param[in]      rhs     right-hand side
1910  * \param[in,out]  x       solution
1911  */
1912 /*----------------------------------------------------------------------------*/
1913 
1914 void
cs_sdm_44_ldlt_solve(const cs_real_t facto[10],const cs_real_t rhs[4],cs_real_t x[4])1915 cs_sdm_44_ldlt_solve(const cs_real_t    facto[10],
1916                      const cs_real_t    rhs[4],
1917                      cs_real_t          x[4])
1918 {
1919   /* Sanity check */
1920   assert(facto != NULL && rhs != NULL && x != NULL);
1921 
1922   x[0] = rhs[0];
1923   x[1] = rhs[1] - x[0]*facto[1];
1924   x[2] = rhs[2] - x[0]*facto[3] - x[1]*facto[4];
1925   x[3] = rhs[3] - x[0]*facto[6] - x[1]*facto[7] - x[2]*facto[8];
1926 
1927   x[3] = x[3]*facto[9];
1928   x[2] = x[2]*facto[5] - facto[8]*x[3];
1929   x[1] = x[1]*facto[2] - facto[7]*x[3] - facto[4]*x[2];
1930   x[0] = x[0]*facto[0] - facto[6]*x[3] - facto[3]*x[2] - facto[1]*x[1];
1931 }
1932 
1933 /*----------------------------------------------------------------------------*/
1934 /*!
1935  * \brief  Solve a 6x6 matrix with a modified Cholesky decomposition (L.D.L^T)
1936  *         The solution should be already allocated
1937  * Ref. http://mathforcollege.com/nm/mws/gen/04sle/mws_gen_sle_txt_cholesky.pdf
1938  *
1939  * \param[in]      f    vector of the coefficients of the decomposition
1940  * \param[in]      b    right-hand side
1941  * \param[in,out]  x    solution
1942  */
1943 /*----------------------------------------------------------------------------*/
1944 
1945 void
cs_sdm_66_ldlt_solve(const cs_real_t f[21],const cs_real_t b[6],cs_real_t x[6])1946 cs_sdm_66_ldlt_solve(const cs_real_t    f[21],
1947                      const cs_real_t    b[6],
1948                      cs_real_t          x[6])
1949 {
1950   /* Sanity check */
1951   assert(f != NULL && b != NULL && x != NULL);
1952 
1953   x[0] = b[0];
1954   x[1] = b[1] - x[0]*f[1];
1955   x[2] = b[2] - x[0]*f[3]  - x[1]*f[4];
1956   x[3] = b[3] - x[0]*f[6]  - x[1]*f[7]  - x[2]*f[8];
1957   x[4] = b[4] - x[0]*f[10] - x[1]*f[11] - x[2]*f[12] - x[3]*f[13];
1958   x[5] = b[5] - x[0]*f[15] - x[1]*f[16] - x[2]*f[17] - x[3]*f[18] - x[4]*f[19];
1959 
1960   x[5] = x[5]*f[20];
1961   x[4] = x[4]*f[14] -f[19]*x[5];
1962   x[3] = x[3]*f[9]  -f[18]*x[5] -f[13]*x[4];
1963   x[2] = x[2]*f[5]  -f[17]*x[5] -f[12]*x[4] -f[8]*x[3];
1964   x[1] = x[1]*f[2]  -f[16]*x[5] -f[11]*x[4] -f[7]*x[3] -f[4]*x[2];
1965   x[0] = x[0]*f[0]  -f[15]*x[5] -f[10]*x[4] -f[6]*x[3] -f[3]*x[2] -f[1]*x[1];
1966 }
1967 
1968 /*----------------------------------------------------------------------------*/
1969 /*!
1970  * \brief  Solve a SPD matrix with a L.D.L^T (Modified Cholesky decomposition)
1971  *         The solution should be already allocated
1972  * Reference:
1973  * http://mathforcollege.com/nm/mws/gen/04sle/mws_gen_sle_txt_cholesky.pdf
1974  *
1975  * \param[in]       n_rows   dimension of the system to solve
1976  * \param[in]       facto    vector of the coefficients of the decomposition
1977  * \param[in]       rhs      right-hand side
1978  * \param[in, out]  sol      solution
1979  */
1980 /*----------------------------------------------------------------------------*/
1981 
1982 void
cs_sdm_ldlt_solve(int n_rows,const cs_real_t * facto,const cs_real_t * rhs,cs_real_t * sol)1983 cs_sdm_ldlt_solve(int                n_rows,
1984                   const cs_real_t   *facto,
1985                   const cs_real_t   *rhs,
1986                   cs_real_t         *sol)
1987 {
1988   /* Sanity check */
1989   assert(facto != NULL && rhs != NULL && sol != NULL);
1990 
1991   if (n_rows == 1) {
1992     sol[0] = rhs[0] * facto[0];
1993     return;
1994   }
1995 
1996   /* 1 - Solving Lz = b with forward substitution :
1997    *     z_i = b_i - \sum_{k=0}^{i-1} l_ik * z_k
1998    */
1999 
2000   sol[0] = rhs[0]; /* case i = 0 */
2001 
2002   short int rowi_idx = 0;
2003   for (short int i = 1; i < n_rows; i++){
2004 
2005     rowi_idx += i;
2006 
2007     const cs_real_t  *l_i = facto + rowi_idx;
2008     cs_real_t  sum = 0.;
2009     for (short int k = 0; k < i; k++)
2010       sum += sol[k] * l_i[k];
2011     sol[i] = rhs[i] - sum;
2012 
2013   } /* forward substitution */
2014 
2015   /* 2 - Solving Dy = z and facto^Tx=y with backwards substitution
2016    *     x_i = z_i/d_ii - \sum_{k=i+1}^{n} l_ki * x_k
2017    */
2018 
2019   const short int  last_row_id = n_rows - 1;
2020   const int  shift = n_rows*(last_row_id)/2;   /* idx with n_rows - 1 */
2021   int  diagi_idx = shift + last_row_id;        /* last entry of the facto. */
2022   sol[last_row_id] *= facto[diagi_idx];        /* 1 / d_nn */
2023 
2024   for (short int i = last_row_id - 1; i >= 0; i--) {
2025 
2026     diagi_idx -= (i+2);
2027     sol[i] *= facto[diagi_idx];
2028 
2029     short int  rowk_idx = shift;
2030     cs_real_t  sum = 0.0;
2031     for (short int k = last_row_id; k > i; k--) {
2032       /*sol[i] -= facto[k*(k+1)/2+i] * sol[k];*/
2033       const cs_real_t  *l_k = facto + rowk_idx;
2034       sum += l_k[i] * sol[k];
2035       rowk_idx -= k;
2036     }
2037     sol[i] -= sum;
2038 
2039   } /* backward substitution */
2040 
2041 }
2042 
2043 /*----------------------------------------------------------------------------*/
2044 /*!
2045  * \brief   Test if a matrix is symmetric. Return 0. if the extradiagonal
2046  *          differences are lower thann the machine precision.
2047  *
2048  * \param[in]  mat         pointer to the cs_sdm_t structure to test
2049  *
2050  * \return  0 if the matrix is symmetric at the machine tolerance otherwise
2051  *          the absolute max. value between two transposed terms
2052  */
2053 /*----------------------------------------------------------------------------*/
2054 
2055 double
cs_sdm_test_symmetry(const cs_sdm_t * mat)2056 cs_sdm_test_symmetry(const cs_sdm_t     *mat)
2057 {
2058   double  sym_eval = 0.;
2059 
2060   if (mat == NULL)
2061     return sym_eval;
2062 
2063   if (mat->flag & CS_SDM_BY_BLOCK) {
2064 
2065     cs_sdm_t  *copy = cs_sdm_block_create_copy(mat);
2066 
2067     cs_sdm_block_square_asymm(copy);
2068 
2069     const cs_sdm_block_t  *bd = copy->block_desc;
2070     for (int bi = 0; bi < bd->n_row_blocks; bi++) {
2071       for (int bj = bi; bj < bd->n_col_blocks; bj++) {
2072 
2073         cs_sdm_t  *bIJ = cs_sdm_get_block(copy, bi, bj);
2074 
2075         for (int i = 0; i < bIJ->n_rows*bIJ->n_cols; i++)
2076           if (fabs(bIJ->val[i]) > sym_eval)
2077             sym_eval = fabs(bIJ->val[i]);
2078 
2079       } /* Loop on column blocks */
2080 
2081     } /* Loop on row blocks */
2082 
2083     copy = cs_sdm_free(copy);
2084 
2085   }
2086   else {
2087 
2088     cs_sdm_t  *copy = cs_sdm_create_copy(mat);
2089 
2090     cs_sdm_square_asymm(copy);
2091 
2092     for (int i = 0; i < copy->n_rows*copy->n_cols; i++)
2093       if (fabs(copy->val[i]) > sym_eval)
2094         sym_eval = fabs(copy->val[i]);
2095 
2096     copy = cs_sdm_free(copy);
2097 
2098   }
2099 
2100   return 2*sym_eval;
2101 }
2102 
2103 /*----------------------------------------------------------------------------*/
2104 /*!
2105  * \brief   Dump a small dense matrix
2106  *
2107  * \param[in]  mat         pointer to the cs_sdm_t structure
2108  */
2109 /*----------------------------------------------------------------------------*/
2110 
2111 void
cs_sdm_simple_dump(const cs_sdm_t * mat)2112 cs_sdm_simple_dump(const cs_sdm_t     *mat)
2113 {
2114   if (mat == NULL)
2115     return;
2116 
2117   if (mat->n_rows < 1 || mat->n_cols < 1) {
2118     cs_log_printf(CS_LOG_DEFAULT, " No value.\n");
2119     return;
2120   }
2121 
2122   for (short int i = 0; i < mat->n_rows; i++) {
2123     for (short int j = 0; j < mat->n_cols; j++)
2124       cs_log_printf(CS_LOG_DEFAULT, " % .4e", mat->val[i*mat->n_cols+j]);
2125     cs_log_printf(CS_LOG_DEFAULT, "\n");
2126   }
2127 }
2128 
2129 /*----------------------------------------------------------------------------*/
2130 /*!
2131  * \brief   Dump a small dense matrix
2132  *
2133  * \param[in]  parent_id   id of the related parent entity
2134  * \param[in]  row_ids     list of ids related to associated entities (or NULL)
2135  * \param[in]  col_ids     list of ids related to associated entities (or NULL)
2136  * \param[in]  mat         pointer to the cs_sdm_t structure
2137  */
2138 /*----------------------------------------------------------------------------*/
2139 
2140 void
cs_sdm_dump(cs_lnum_t parent_id,const cs_lnum_t * row_ids,const cs_lnum_t * col_ids,const cs_sdm_t * mat)2141 cs_sdm_dump(cs_lnum_t           parent_id,
2142             const cs_lnum_t    *row_ids,
2143             const cs_lnum_t    *col_ids,
2144             const cs_sdm_t     *mat)
2145 {
2146   if (mat == NULL) {
2147     cs_log_printf(CS_LOG_DEFAULT,
2148                   "<< MATRIX is set to NULL (parent id: %ld)>>\n",
2149                   (long)parent_id);
2150     return;
2151   }
2152 
2153   cs_log_printf(CS_LOG_DEFAULT, "<< MATRIX parent id: %ld >>\n",
2154                 (long)parent_id);
2155 
2156   if (mat->n_rows < 1 || mat->n_cols < 1) {
2157     cs_log_printf(CS_LOG_DEFAULT, " No value.\n");
2158     return;
2159   }
2160 
2161   if (row_ids == NULL || col_ids == NULL)
2162     cs_sdm_simple_dump(mat);
2163 
2164   else {
2165 
2166     cs_log_printf(CS_LOG_DEFAULT, " %8s %11ld", " ", (long)col_ids[0]);
2167     for (short int i = 1; i < mat->n_cols; i++)
2168       cs_log_printf(CS_LOG_DEFAULT, " %11ld", (long)col_ids[i]);
2169     cs_log_printf(CS_LOG_DEFAULT, "\n");
2170 
2171     for (short int i = 0; i < mat->n_rows; i++) {
2172       cs_log_printf(CS_LOG_DEFAULT, " %8ld ", (long)row_ids[i]);
2173       for (short int j = 0; j < mat->n_cols; j++)
2174         cs_log_printf(CS_LOG_DEFAULT, " % .4e", mat->val[i*mat->n_cols+j]);
2175       cs_log_printf(CS_LOG_DEFAULT, "\n");
2176     }
2177 
2178   }
2179 
2180 }
2181 
2182 /*----------------------------------------------------------------------------*/
2183 /*!
2184  * \brief   Print a cs_sdm_t structure not defined by block
2185  *          Print into the file f if given otherwise open a new file named
2186  *          fname if given otherwise print into the standard output
2187  *          The usage of threshold allows one to compare more easier matrices
2188  *          without taking into account numerical roundoff.
2189  *
2190  * \param[in]  fp         pointer to a file structure or NULL
2191  * \param[in]  fname      filename or NULL
2192  * \param[in]  thd        threshold (below this value --> set 0)
2193  * \param[in]  m          pointer to the cs_sdm_t structure
2194  */
2195 /*----------------------------------------------------------------------------*/
2196 
2197 void
cs_sdm_fprintf(FILE * fp,const char * fname,cs_real_t thd,const cs_sdm_t * m)2198 cs_sdm_fprintf(FILE             *fp,
2199                const char       *fname,
2200                cs_real_t         thd,
2201                const cs_sdm_t   *m)
2202 {
2203   FILE  *fout = stdout;
2204   if (fp != NULL)
2205     fout = fp;
2206   else if (fname != NULL) {
2207     fout = fopen(fname, "w");
2208   }
2209 
2210   fprintf(fout, "cs_sdm_t %p\n", (const void *)m);
2211 
2212   if (m == NULL)
2213     return;
2214 
2215   if (m->n_rows < 1 || m->n_cols < 1) {
2216     fprintf(fout, " No value.\n");
2217     return;
2218   }
2219 
2220   for (int i = 0; i < m->n_rows; i++) {
2221 
2222     const cs_real_t  *mval_i = m->val + i*m->n_cols;
2223     for (int j = 0; j < m->n_cols; j++) {
2224       if (fabs(mval_i[j]) > thd)
2225         fprintf(fout, " % -9.5e", mval_i[j]);
2226       else
2227         fprintf(fout, " % -9.5e", 0.);
2228     }
2229     fprintf(fout, "\n");
2230 
2231   }
2232 
2233   if (fout != stdout && fout != fp)
2234     fclose(fout);
2235 }
2236 
2237 /*----------------------------------------------------------------------------*/
2238 /*!
2239  * \brief   Dump a small dense matrix defined by blocks
2240  *
2241  * \param[in]  parent_id   id of the related parent entity
2242  * \param[in]  mat         pointer to the cs_sdm_t structure
2243  */
2244 /*----------------------------------------------------------------------------*/
2245 
2246 void
cs_sdm_block_dump(cs_lnum_t parent_id,const cs_sdm_t * mat)2247 cs_sdm_block_dump(cs_lnum_t           parent_id,
2248                   const cs_sdm_t     *mat)
2249 {
2250   if (mat == NULL)
2251     return;
2252 
2253   if ((mat->flag & CS_SDM_BY_BLOCK) == 0) {
2254     cs_sdm_simple_dump(mat);
2255     return;
2256   }
2257   assert(mat->block_desc != NULL);
2258 
2259   cs_log_printf(CS_LOG_DEFAULT, "\n << BLOCK MATRIX parent id: %ld >>\n",
2260                 (long)parent_id);
2261 
2262   const int  n_b_rows = mat->block_desc->n_row_blocks;
2263   const int  n_b_cols = mat->block_desc->n_col_blocks;
2264   const cs_sdm_t  *blocks = mat->block_desc->blocks;
2265 
2266   if (n_b_rows < 1 || n_b_cols < 1) {
2267     cs_log_printf(CS_LOG_DEFAULT, " No block\n");
2268     return;
2269   }
2270   cs_log_printf(CS_LOG_DEFAULT, " n_row_blocks: %d; n_col_blocks: %d\n",
2271                 n_b_rows, n_b_cols);
2272 
2273   const char _sep[] = "------------------------------------------------------";
2274   for (short int bi = 0; bi < n_b_rows; bi++) {
2275 
2276     const cs_sdm_t  *bi0 = blocks + bi*n_b_cols;
2277     const int n_rows = bi0->n_rows;
2278 
2279     for (int i = 0; i < n_rows; i++) {
2280 
2281       for (short int bj = 0; bj < n_b_cols; bj++) {
2282 
2283         const cs_sdm_t  *bij = blocks + bi*n_b_cols + bj;
2284         const int  n_cols = bij->n_cols;
2285         const cs_real_t  *mval_i = bij->val + i*n_cols;
2286 
2287         for (int j = 0; j < n_cols; j++)
2288           cs_log_printf(CS_LOG_DEFAULT, " % -6.3e", mval_i[j]);
2289         cs_log_printf(CS_LOG_DEFAULT, " |");
2290 
2291       } /* Block j */
2292       cs_log_printf(CS_LOG_DEFAULT, "\n");
2293 
2294     } /* Loop on rows */
2295     cs_log_printf(CS_LOG_DEFAULT, "%s%s%s\n", _sep, _sep, _sep);
2296 
2297   } /* Block i */
2298 
2299 }
2300 
2301 /*----------------------------------------------------------------------------*/
2302 /*!
2303  * \brief   Print a cs_sdm_t structure which is defined by block
2304  *          Print into the file f if given otherwise open a new file named
2305  *          fname if given otherwise print into the standard output
2306  *          The usage of threshold allows one to compare more easier matrices
2307  *          without taking into account numerical roundoff.
2308  *
2309  * \param[in]  fp         pointer to a file structure or NULL
2310  * \param[in]  fname      filename or NULL
2311  * \param[in]  thd        threshold (below this value --> set 0)
2312  * \param[in]  m          pointer to the cs_sdm_t structure
2313  */
2314 /*----------------------------------------------------------------------------*/
2315 
2316 void
cs_sdm_block_fprintf(FILE * fp,const char * fname,cs_real_t thd,const cs_sdm_t * m)2317 cs_sdm_block_fprintf(FILE             *fp,
2318                      const char       *fname,
2319                      cs_real_t         thd,
2320                      const cs_sdm_t   *m)
2321 {
2322   FILE  *fout = stdout;
2323   if (fp != NULL)
2324     fout = fp;
2325   else if (fname != NULL) {
2326     fout = fopen(fname, "w");
2327   }
2328 
2329   fprintf(fout, "cs_sdm_t %p\n", (const void *)m);
2330 
2331   if (m == NULL)
2332     return;
2333 
2334   assert(m->block_desc != NULL);
2335   const int  n_b_rows = m->block_desc->n_row_blocks;
2336   const int  n_b_cols = m->block_desc->n_col_blocks;
2337   const cs_sdm_t  *blocks = m->block_desc->blocks;
2338 
2339   for (short int bi = 0; bi < n_b_rows; bi++) {
2340 
2341     const cs_sdm_t  *bi0 = blocks + bi*n_b_cols;
2342     const int n_rows = bi0->n_rows;
2343 
2344     for (int i = 0; i < n_rows; i++) {
2345 
2346       for (short int bj = 0; bj < n_b_cols; bj++) {
2347 
2348         const cs_sdm_t  *bij = blocks + bi*n_b_cols + bj;
2349         const int  n_cols = bij->n_cols;
2350         const cs_real_t  *mval_i = bij->val + i*n_cols;
2351 
2352         for (int j = 0; j < n_cols; j++) {
2353           if (fabs(mval_i[j]) > thd)
2354             fprintf(fout, " % -9.5e", mval_i[j]);
2355           else
2356             fprintf(fout, " % -9.5e", 0.);
2357         }
2358 
2359       } /* Block j */
2360       fprintf(fout, "\n");
2361 
2362     } /* Loop on rows */
2363 
2364   } /* Block i */
2365 
2366   if (fout != stdout && fout != fp)
2367     fclose(fout);
2368 }
2369 
2370 /*----------------------------------------------------------------------------*/
2371 
2372 END_C_DECLS
2373