1 /*============================================================================
2 * Set of operations to handle Small Dense Matrices (SDM)
3 *============================================================================*/
4
5 /*
6 This file is part of Code_Saturne, a general-purpose CFD tool.
7
8 Copyright (C) 1998-2021 EDF S.A.
9
10 This program is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free Software
12 Foundation; either version 2 of the License, or (at your option) any later
13 version.
14
15 This program is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
18 details.
19
20 You should have received a copy of the GNU General Public License along with
21 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22 Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 */
24
25 /*----------------------------------------------------------------------------*/
26
27 /*----------------------------------------------------------------------------
28 * Standard C library headers
29 *----------------------------------------------------------------------------*/
30
31 #include <stdlib.h>
32 #include <assert.h>
33 #include <float.h>
34 #include <limits.h>
35
36 /*----------------------------------------------------------------------------
37 * Local headers
38 *----------------------------------------------------------------------------*/
39
40 #include <bft_mem.h>
41
42 #include "cs_blas.h"
43 #include "cs_log.h"
44 #include "cs_math.h"
45 #include "cs_parall.h"
46 #include "cs_sort.h"
47
48 /*----------------------------------------------------------------------------
49 * Header for the current file
50 *----------------------------------------------------------------------------*/
51
52 #include "cs_sdm.h"
53
54 /*----------------------------------------------------------------------------*/
55
56 BEGIN_C_DECLS
57
58 /*============================================================================
59 * Static definitions
60 *============================================================================*/
61
62 static const char _msg_small_p[] =
63 " %s: Very small or null pivot.\n Stop inversion.";
64
65 /*============================================================================
66 * Private function prototypes
67 *============================================================================*/
68
69 /*----------------------------------------------------------------------------*/
70 /*!
71 * \brief Generic way to allocate a cs_sdm_t structure
72 *
73 * \param[in] flag metadata related to a cs_sdm_t structure
74 * \param[in] n_max_rows max number of rows
75 * \param[in] n_max_cols max number of columns
76 * \param[in] n_max_rows max number of rows
77 * \param[in] n_max_cols max number of columns
78 *
79 *
80 * \return a new allocated cs_sdm_t structure
81 */
82 /*----------------------------------------------------------------------------*/
83
84 static cs_sdm_t *
_create_sdm(cs_flag_t flag,int n_max_rows,int n_max_cols)85 _create_sdm(cs_flag_t flag,
86 int n_max_rows,
87 int n_max_cols)
88 {
89 cs_sdm_t *mat = NULL;
90
91 BFT_MALLOC(mat, 1, cs_sdm_t);
92
93 mat->flag = flag;
94 mat->n_max_rows = n_max_rows;
95 mat->n_max_cols = n_max_cols;
96 mat->n_rows = n_max_rows;
97 mat->n_cols = n_max_cols;
98
99 BFT_MALLOC(mat->val, mat->n_max_rows*mat->n_max_cols, cs_real_t);
100 memset(mat->val, 0, sizeof(cs_real_t)*mat->n_max_rows*mat->n_max_cols);
101
102 if (flag & CS_SDM_BY_BLOCK) {
103
104 cs_sdm_block_t *bd = NULL;
105
106 BFT_MALLOC(bd, 1, cs_sdm_block_t);
107 bd->blocks = NULL;
108 bd->n_max_blocks_by_row = bd->n_max_blocks_by_col = 0;
109 bd->n_row_blocks = bd->n_col_blocks = 0;
110 mat->block_desc = bd;
111
112 }
113 else
114 mat->block_desc = NULL;
115
116 return mat;
117 }
118
119 /*============================================================================
120 * Public function prototypes
121 *============================================================================*/
122
123 /*----------------------------------------------------------------------------*/
124 /*!
125 * \brief Allocate and initialize a cs_sdm_t structure
126 * Most generic function to create a cs_sdm_t structure without block
127 * description
128 *
129 * \param[in] flag metadata related to a cs_sdm_t structure
130 * \param[in] n_max_rows max number of rows
131 * \param[in] n_max_cols max number of columns
132 *
133 * \return a new allocated cs_sdm_t structure
134 */
135 /*----------------------------------------------------------------------------*/
136
137 cs_sdm_t *
cs_sdm_create(cs_flag_t flag,int n_max_rows,int n_max_cols)138 cs_sdm_create(cs_flag_t flag,
139 int n_max_rows,
140 int n_max_cols)
141 {
142 return _create_sdm(flag, n_max_rows, n_max_cols);
143 }
144
145 /*----------------------------------------------------------------------------*/
146 /*!
147 * \brief Allocate and initialize a cs_sdm_t structure
148 * Case of a square matrix
149 *
150 * \param[in] n_max_rows max number of rows
151 *
152 * \return a new allocated cs_sdm_t structure
153 */
154 /*----------------------------------------------------------------------------*/
155
156 cs_sdm_t *
cs_sdm_square_create(int n_max_rows)157 cs_sdm_square_create(int n_max_rows)
158 {
159 return _create_sdm(0, n_max_rows, n_max_rows);
160 }
161
162 /*----------------------------------------------------------------------------*/
163 /*!
164 * \brief Allocate a cs_sdm_t structure and initialized it with the
165 * copy of the matrix m in input
166 *
167 * \param[in] m pointer to a cs_sdm_t structure to copy
168 *
169 * \return a pointer to a new allocated cs_sdm_t structure
170 */
171 /*----------------------------------------------------------------------------*/
172
173 cs_sdm_t *
cs_sdm_create_copy(const cs_sdm_t * m)174 cs_sdm_create_copy(const cs_sdm_t *m)
175 {
176 cs_sdm_t *c = _create_sdm(m->flag, m->n_max_rows, m->n_max_cols);
177
178 c->n_rows = m->n_rows;
179 c->n_cols = m->n_cols;
180 memcpy(c->val, m->val, sizeof(cs_real_t)*m->n_rows*m->n_cols);
181
182 return c;
183 }
184
185 /*----------------------------------------------------------------------------*/
186 /*!
187 * \brief Define a new matrix which is its transpose.
188 *
189 * \param[in] mat local matrix to transpose
190 *
191 * \return a pointer to the new allocated transposed matrix
192 */
193 /*----------------------------------------------------------------------------*/
194
195 cs_sdm_t *
cs_sdm_create_transpose(cs_sdm_t * mat)196 cs_sdm_create_transpose(cs_sdm_t *mat)
197 {
198 /* Sanity check */
199 assert(mat != NULL);
200
201 cs_sdm_t *tr = _create_sdm(mat->flag, mat->n_max_cols, mat->n_max_rows);
202
203 tr->n_rows = mat->n_cols;
204 tr->n_cols = mat->n_rows;
205
206 for (short int i = 0; i < mat->n_rows; i++) {
207
208 const cs_real_t *mval_i = mat->val + i*mat->n_cols;
209 for (short int j = 0; j < mat->n_cols; j++)
210 tr->val[j*tr->n_cols+i] = mval_i[j];
211
212 }
213
214 return tr;
215 }
216
217 /*----------------------------------------------------------------------------*/
218 /*!
219 * \brief Allocate and initialize a cs_sdm_t structure
220 *
221 * \param[in] n_max_blocks_by_row max number of blocks in a row
222 * \param[in] n_max_blocks_by_col max number of blocks in a column
223 * \param[in] max_row_block_sizes max number of rows by block in a column
224 * \param[in] max_col_block_sizes max number of columns by block in a row
225 *
226 * \return a new allocated cs_sdm_t structure
227 */
228 /*----------------------------------------------------------------------------*/
229
230 cs_sdm_t *
cs_sdm_block_create(int n_max_blocks_by_row,int n_max_blocks_by_col,const int max_row_block_sizes[],const int max_col_block_sizes[])231 cs_sdm_block_create(int n_max_blocks_by_row,
232 int n_max_blocks_by_col,
233 const int max_row_block_sizes[],
234 const int max_col_block_sizes[])
235 {
236 cs_sdm_t *m = NULL;
237
238 if (n_max_blocks_by_row < 1 || n_max_blocks_by_col < 1)
239 return m;
240
241 int row_size = 0, col_size = 0;
242 for (int i = 0; i < n_max_blocks_by_row; i++)
243 row_size += max_row_block_sizes[i];
244 for (int j = 0; j < n_max_blocks_by_col; j++)
245 col_size += max_col_block_sizes[j];
246
247 m = _create_sdm(CS_SDM_BY_BLOCK, row_size, col_size);
248
249 /* Define the block description */
250 m->block_desc->n_max_blocks_by_row = n_max_blocks_by_row;
251 m->block_desc->n_max_blocks_by_col = n_max_blocks_by_col;
252 m->block_desc->n_row_blocks = n_max_blocks_by_row;
253 m->block_desc->n_col_blocks = n_max_blocks_by_col;
254 BFT_MALLOC(m->block_desc->blocks,
255 n_max_blocks_by_row*n_max_blocks_by_col, cs_sdm_t);
256
257 cs_real_t *p_val = m->val;
258 int shift = 0;
259 for (int i = 0; i < n_max_blocks_by_row; i++) {
260 const short int n_rows_i = max_row_block_sizes[i];
261 for (int j = 0; j < n_max_blocks_by_col; j++) {
262 const short int n_cols_j = max_col_block_sizes[j];
263
264 /* Set the block (i,j) */
265 cs_sdm_t *b_ij = m->block_desc->blocks + shift;
266 int _size = n_rows_i*n_cols_j;
267
268 cs_sdm_map_array(n_rows_i, n_cols_j, b_ij, p_val);
269 shift++;
270 p_val += _size;
271
272 }
273 }
274
275 return m;
276 }
277
278 /*----------------------------------------------------------------------------*/
279 /*!
280 * \brief Allocate and initialize a cs_sdm_t structure by block when the
281 * block size is constant and equal to 3
282 *
283 * \param[in] n_max_blocks_by_row max number of blocks in a row
284 * \param[in] n_max_blocks_by_col max number of blocks in a column
285 *
286 * \return a new allocated cs_sdm_t structure
287 */
288 /*----------------------------------------------------------------------------*/
289
290 cs_sdm_t *
cs_sdm_block33_create(int n_max_blocks_by_row,int n_max_blocks_by_col)291 cs_sdm_block33_create(int n_max_blocks_by_row,
292 int n_max_blocks_by_col)
293 {
294 cs_sdm_t *m = NULL;
295
296 if (n_max_blocks_by_row < 1 || n_max_blocks_by_col < 1)
297 return m;
298
299 m = _create_sdm(CS_SDM_BY_BLOCK,
300 3*n_max_blocks_by_row,
301 3*n_max_blocks_by_col);
302
303 /* Define the block description */
304 m->block_desc->n_max_blocks_by_row = n_max_blocks_by_row;
305 m->block_desc->n_max_blocks_by_col = n_max_blocks_by_col;
306 m->block_desc->n_row_blocks = n_max_blocks_by_row;
307 m->block_desc->n_col_blocks = n_max_blocks_by_col;
308 BFT_MALLOC(m->block_desc->blocks,
309 n_max_blocks_by_row*n_max_blocks_by_col, cs_sdm_t);
310
311 cs_real_t *p_val = m->val;
312 for (int i = 0; i < n_max_blocks_by_row*n_max_blocks_by_col; i++) {
313 cs_sdm_map_array(3, 3, m->block_desc->blocks + i, p_val);
314 p_val += 9;
315 }
316
317 return m;
318 }
319
320 /*----------------------------------------------------------------------------*/
321 /*!
322 * \brief Free a cs_sdm_t structure
323 *
324 * \param[in] mat pointer to a cs_sdm_t struct. to free
325 *
326 * \return a NULL pointer
327 */
328 /*----------------------------------------------------------------------------*/
329
330 cs_sdm_t *
cs_sdm_free(cs_sdm_t * mat)331 cs_sdm_free(cs_sdm_t *mat)
332 {
333 if (mat == NULL)
334 return mat;
335
336 if ((mat->flag & CS_SDM_SHARED_VAL) == 0)
337 BFT_FREE(mat->val);
338
339 if (mat->flag & CS_SDM_BY_BLOCK) {
340 BFT_FREE(mat->block_desc->blocks);
341 BFT_FREE(mat->block_desc);
342 }
343
344 BFT_FREE(mat);
345
346 return NULL;
347 }
348
349 /*----------------------------------------------------------------------------*/
350 /*!
351 * \brief Initialize the pattern of cs_sdm_t structure defined by block
352 * The matrix should have been allocated before calling this function
353 *
354 * \param[in, out] m
355 * \param[in] n_row_blocks number of blocks in a row
356 * \param[in] n_col_blocks number of blocks in a column
357 * \param[in] row_block_sizes number of rows by block in a column
358 * \param[in] col_block_sizes number of columns by block in a row
359 */
360 /*----------------------------------------------------------------------------*/
361
362 void
cs_sdm_block_init(cs_sdm_t * m,int n_row_blocks,int n_col_blocks,const int row_block_sizes[],const int col_block_sizes[])363 cs_sdm_block_init(cs_sdm_t *m,
364 int n_row_blocks,
365 int n_col_blocks,
366 const int row_block_sizes[],
367 const int col_block_sizes[])
368 {
369 /* Sanity checks */
370 assert(m != NULL && row_block_sizes != NULL && col_block_sizes != NULL);
371 assert(m->flag & CS_SDM_BY_BLOCK);
372 assert(m->block_desc != NULL);
373
374 cs_sdm_block_t *bd = m->block_desc;
375
376 assert(n_row_blocks <= bd->n_max_blocks_by_row);
377 assert(n_col_blocks <= bd->n_max_blocks_by_col);
378 bd->n_row_blocks = n_row_blocks;
379 bd->n_col_blocks = n_col_blocks;
380 m->n_rows = 0;
381 for (int i = 0; i < n_row_blocks; i++)
382 m->n_rows += row_block_sizes[i];
383 m->n_cols = 0;
384 for (int i = 0; i < n_col_blocks; i++)
385 m->n_cols += col_block_sizes[i];
386
387 memset(m->val, 0, m->n_rows*m->n_cols*sizeof(cs_real_t));
388
389 cs_real_t *p_val = m->val;
390 int shift = 0;
391 for (int i = 0; i < bd->n_row_blocks; i++) {
392 const short int n_rows_i = row_block_sizes[i];
393 for (int j = 0; j < bd->n_col_blocks; j++) {
394 const short int n_cols_j = col_block_sizes[j];
395
396 /* Set the block (i,j) */
397 cs_sdm_t *b_ij = bd->blocks + shift;
398
399 cs_sdm_map_array(n_rows_i, n_cols_j, b_ij, p_val);
400 p_val += n_rows_i*n_cols_j;
401 shift++;
402
403 }
404 }
405 }
406
407 /*----------------------------------------------------------------------------*/
408 /*!
409 * \brief Initialize the pattern of cs_sdm_t structure defined by 3x3 block
410 * The matrix should have been allocated before calling this function
411 *
412 * \param[in, out] m
413 * \param[in] n_row_blocks number of blocks in a row
414 * \param[in] n_col_blocks number of blocks in a column
415 */
416 /*----------------------------------------------------------------------------*/
417
418 void
cs_sdm_block33_init(cs_sdm_t * m,int n_row_blocks,int n_col_blocks)419 cs_sdm_block33_init(cs_sdm_t *m,
420 int n_row_blocks,
421 int n_col_blocks)
422 {
423 /* Sanity checks */
424 assert(m != NULL);
425 assert(m->flag & CS_SDM_BY_BLOCK);
426 assert(m->block_desc != NULL);
427 assert(n_row_blocks <= m->block_desc->n_max_blocks_by_row);
428 assert(n_col_blocks <= m->block_desc->n_max_blocks_by_col);
429
430 cs_sdm_block_t *bd = m->block_desc;
431
432 bd->n_row_blocks = n_row_blocks;
433 bd->n_col_blocks = n_col_blocks;
434 m->n_rows = 3*n_row_blocks;
435 m->n_cols = 3*n_col_blocks;
436
437 memset(m->val, 0, m->n_rows*m->n_cols*sizeof(cs_real_t));
438
439 cs_real_t *p_val = m->val;
440 for (int i = 0; i < bd->n_row_blocks*bd->n_col_blocks; i++) {
441 cs_sdm_map_array(3, 3, bd->blocks + i, p_val);
442 p_val += 9; /* Each 3x3 block has 9 entries */
443 }
444 }
445
446 /*----------------------------------------------------------------------------*/
447 /*!
448 * \brief Convert a matrix with each entry is a 3x3 block into a matrix
449 * with a block for each component x,y,z.
450 *
451 * \param[in] mb33 pointer to a matrix
452 * \param[in, out] mbxyz pointer to a matrix to build (but allocated)
453 */
454 /*----------------------------------------------------------------------------*/
455
456 void
cs_sdm_block_33_to_xyz(const cs_sdm_t * mb33,cs_sdm_t * mbxyz)457 cs_sdm_block_33_to_xyz(const cs_sdm_t *mb33,
458 cs_sdm_t *mbxyz)
459 {
460 if (mb33 == NULL)
461 return;
462
463 assert(mb33->flag & CS_SDM_BY_BLOCK);
464 assert(mb33->block_desc != NULL);
465
466 const cs_sdm_block_t *mb33_desc = mb33->block_desc;
467 const int n_cols = mb33_desc->n_col_blocks;
468 assert(n_cols == mb33_desc->n_row_blocks);
469
470 /* Reshape the block matrix to fit the shape requested by mb33 */
471 int block_sizes[3];
472 for (int i = 0; i < 3; i++) block_sizes[i] = n_cols;
473 cs_sdm_block_init(mbxyz, 3, 3, block_sizes, block_sizes);
474
475 cs_sdm_t *mxyz[3][3];
476 for (int i = 0; i < 3; i++)
477 for (int j = 0; j < 3; j++)
478 mxyz[i][j] = cs_sdm_get_block(mbxyz, i, j);
479
480 for (int bi = 0; bi < n_cols; bi++) {
481 for (int bj = 0; bj < n_cols; bj++) {
482
483 cs_sdm_t *m33_ij = cs_sdm_get_block(mb33, bi, bj);
484
485 for (int _i = 0; _i < 3; _i++)
486 for (int _j = 0; _j < 3; _j++)
487 mxyz[_i][_j]->val[bi*n_cols+bj] = m33_ij->val[3*_i+_j];
488
489 } /* Loop on column blocks */
490 } /* Loop on row blocks */
491
492 }
493
494 /*----------------------------------------------------------------------------*/
495 /*!
496 * \brief Allocate and initialize a cs_sdm_t structure w.r.t. to a given
497 * matrix
498 *
499 * \param[in] mref pointer to a matrix to copy
500 *
501 * \return a new allocated cs_sdm_t structure which is a copy of mref
502 */
503 /*----------------------------------------------------------------------------*/
504
505 cs_sdm_t *
cs_sdm_block_create_copy(const cs_sdm_t * mref)506 cs_sdm_block_create_copy(const cs_sdm_t *mref)
507 {
508 cs_sdm_t *m = NULL;
509
510 if (mref == NULL)
511 return m;
512
513 if (mref->n_max_rows < 1 || mref->n_max_cols < 1)
514 return m;
515
516 const cs_sdm_block_t *bd_ref = mref->block_desc;
517
518 int row_size = 0, col_size = 0;
519 for (int bi = 0; bi < bd_ref->n_row_blocks; bi++) {
520 const cs_sdm_t *bI0 = cs_sdm_get_block(mref, bi, 0);
521 row_size += bI0->n_max_rows;
522 }
523 for (int bj = 0; bj < bd_ref->n_col_blocks; bj++) {
524 const cs_sdm_t *b0J = cs_sdm_get_block(mref, 0, bj);
525 col_size += b0J->n_max_cols;
526 }
527
528 m = _create_sdm(CS_SDM_BY_BLOCK, row_size, col_size);
529
530 /* Copy values */
531 memcpy(m->val, mref->val, sizeof(cs_real_t)*m->n_max_rows*m->n_max_cols);
532
533 /* Define the block description */
534 cs_sdm_block_t *bd = m->block_desc;
535
536 bd->n_max_blocks_by_row = bd_ref->n_max_blocks_by_row;
537 bd->n_max_blocks_by_col = bd_ref->n_max_blocks_by_col;
538 bd->n_row_blocks = bd_ref->n_row_blocks;
539 bd->n_col_blocks = bd_ref->n_col_blocks;
540
541 BFT_MALLOC(bd->blocks,
542 bd_ref->n_max_blocks_by_row*bd_ref->n_max_blocks_by_col,
543 cs_sdm_t);
544
545 cs_real_t *p_val = m->val;
546 int shift = 0;
547 for (int bi = 0; bi < bd_ref->n_row_blocks; bi++) {
548 for (int bj = 0; bj < bd_ref->n_col_blocks; bj++) {
549
550 const cs_sdm_t *ref_IJ = cs_sdm_get_block(mref, bi, bj);
551
552 /* Set the block (i,j) */
553 cs_sdm_t *b_ij = bd->blocks + shift;
554 int _size = ref_IJ->n_rows*ref_IJ->n_cols;
555
556 cs_sdm_map_array(ref_IJ->n_rows, ref_IJ->n_cols, b_ij, p_val);
557 shift++;
558 p_val += _size;
559
560 } /* Column blocks */
561 } /* Row blocks */
562
563 return m;
564 }
565
566 /*----------------------------------------------------------------------------*/
567 /*!
568 * \brief Compute a local dense matrix-product c = a*b
569 * c has been previously allocated
570 *
571 * \param[in] a local dense matrix to use
572 * \param[in] b local dense matrix to use
573 * \param[in, out] c result of the local matrix-product
574 * is updated
575 */
576 /*----------------------------------------------------------------------------*/
577
578 void
cs_sdm_multiply(const cs_sdm_t * a,const cs_sdm_t * b,cs_sdm_t * c)579 cs_sdm_multiply(const cs_sdm_t *a,
580 const cs_sdm_t *b,
581 cs_sdm_t *c)
582 {
583 /* Sanity checks */
584 assert(a != NULL && b != NULL && c != NULL);
585 assert(a->n_cols == b->n_rows &&
586 a->n_rows == c->n_rows &&
587 c->n_cols == b->n_cols);
588
589 const cs_real_t *bv = b->val;
590
591 for (short int i = 0; i < a->n_rows; i++) {
592
593 const cs_real_t *av_i = a->val + i*a->n_cols;
594 cs_real_t *cv_i = c->val + i*b->n_cols;
595
596 for (short int j = 0; j < b->n_cols; j++){
597 cs_real_t p = 0.0;
598 for (short int k = 0; k < a->n_cols; k++)
599 p += av_i[k] * bv[k*b->n_cols + j];
600 cv_i[j] += p;
601
602 } /* Loop on b columns */
603 } /* Loop on a rows */
604
605 }
606
607 /*----------------------------------------------------------------------------*/
608 /*!
609 * \brief Compute a row-row matrix product of a and b. It is basically equal
610 * to the classical a*b^T. It is a fast (matrices are row-major) way
611 * of computing a*b if b is symmetric or if b^T is given.
612 * Generic version: all compatible sizes
613 *
614 * \param[in] a local matrix to use
615 * \param[in] b local matrix to use
616 * \param[in, out] c result of the local matrix-product (already allocated)
617 * is updated.
618 */
619 /*----------------------------------------------------------------------------*/
620
621 void
cs_sdm_multiply_rowrow(const cs_sdm_t * a,const cs_sdm_t * b,cs_sdm_t * c)622 cs_sdm_multiply_rowrow(const cs_sdm_t *a,
623 const cs_sdm_t *b,
624 cs_sdm_t *c)
625 {
626 /* Sanity check */
627 assert(a != NULL && b != NULL && c != NULL);
628 assert(a->n_cols == b->n_cols &&
629 a->n_rows == c->n_rows &&
630 c->n_cols == b->n_rows);
631
632 for (short int i = 0; i < a->n_rows; i++) {
633
634 const cs_real_t *av_i = a->val + i*a->n_cols;
635
636 cs_real_t *cv_i = c->val + i*b->n_rows;
637
638 for (short int j = 0; j < b->n_rows; j++) {
639
640 const cs_real_t *bv_j = b->val + j * b->n_cols;
641
642 cs_real_t dp = 0;
643 for (short int k = 0; k < a->n_cols; k++)
644 dp += av_i[k] * bv_j[k];
645 cv_i[j] += dp;
646
647 } /* Loop on b rows */
648 } /* Loop on a rows */
649
650 }
651
652 /*----------------------------------------------------------------------------*/
653 /*!
654 * \brief Compute a row-row matrix product of a and b. It is basically equal
655 * to the classical a*b^T. It is a fast (matrices are row-major) way
656 * of computing a*b if b is symmetric or if b^T is given.
657 * Generic version: all compatible sizes
658 * Result is known to be symmetric.
659 *
660 * \param[in] a local matrix to use
661 * \param[in] b local matrix to use
662 * \param[in, out] c result of the local matrix-product (already allocated)
663 * is updated.
664 */
665 /*----------------------------------------------------------------------------*/
666
667 void
cs_sdm_multiply_rowrow_sym(const cs_sdm_t * a,const cs_sdm_t * b,cs_sdm_t * c)668 cs_sdm_multiply_rowrow_sym(const cs_sdm_t *a,
669 const cs_sdm_t *b,
670 cs_sdm_t *c)
671 {
672 /* Sanity check */
673 assert(a != NULL && b != NULL && c != NULL);
674 assert(a->n_cols == b->n_cols &&
675 a->n_rows == c->n_rows &&
676 c->n_cols == b->n_rows);
677
678 for (short int i = 0; i < a->n_rows; i++) {
679
680 const cs_real_t *av_i = a->val + i*a->n_cols;
681
682 cs_real_t *cv_i = c->val + i*b->n_rows;
683
684 for (short int j = i; j < b->n_rows; j++) {
685
686 const cs_real_t *bv_j = b->val + j * b->n_cols;
687
688 cs_real_t dp = 0;
689 for (short int k = 0; k < a->n_cols; k++)
690 dp += av_i[k] * bv_j[k];
691 cv_i[j] += dp;
692
693 if (j > i)
694 (c->val + j*b->n_rows)[i] += dp;
695
696 } /* Loop on b rows */
697 } /* Loop on a rows */
698
699 }
700
701 /*----------------------------------------------------------------------------*/
702 /*!
703 * \brief Compute a row-row matrix product of a and b. It is basically equal
704 * to the classical a*b^T. It is a fast (matrices are row-major) way
705 * of computing a*b if b is symmetric or if b^T is given.
706 * Case of matrices defined by block.
707 *
708 * \param[in] a local matrix to use
709 * \param[in] b local matrix to use
710 * \param[in, out] c result of the local matrix-product (already allocated)
711 * is updated
712 */
713 /*----------------------------------------------------------------------------*/
714
715 void
cs_sdm_block_multiply_rowrow(const cs_sdm_t * a,const cs_sdm_t * b,cs_sdm_t * c)716 cs_sdm_block_multiply_rowrow(const cs_sdm_t *a,
717 const cs_sdm_t *b,
718 cs_sdm_t *c)
719 {
720 /* Sanity check */
721 assert(a != NULL && b != NULL && c != NULL);
722 assert(a->flag & CS_SDM_BY_BLOCK);
723 assert(b->flag & CS_SDM_BY_BLOCK);
724 assert(c->flag & CS_SDM_BY_BLOCK);
725 assert(a->n_cols == b->n_cols &&
726 a->n_rows == c->n_rows &&
727 c->n_cols == b->n_rows);
728
729 const cs_sdm_block_t *a_desc = a->block_desc;
730 const cs_sdm_block_t *b_desc = b->block_desc;
731 const cs_sdm_block_t *c_desc = c->block_desc;
732
733 CS_UNUSED(c_desc); /* Only in debug mode */
734 assert(a_desc->n_col_blocks == b_desc->n_col_blocks &&
735 a_desc->n_row_blocks == c_desc->n_row_blocks &&
736 c_desc->n_col_blocks == b_desc->n_row_blocks);
737
738 for (short int i = 0; i < a_desc->n_row_blocks; i++) {
739 for (short int j = 0; j < b_desc->n_row_blocks; j++) {
740
741 cs_sdm_t *cIJ = cs_sdm_get_block(c, i, j);
742
743 for (short int k = 0; k < a_desc->n_col_blocks; k++) {
744
745 cs_sdm_t *aIK = cs_sdm_get_block(a, i, k);
746 cs_sdm_t *bJK = cs_sdm_get_block(b, j, k);
747
748 cs_sdm_multiply_rowrow(aIK, bJK, cIJ);
749
750 } /* Loop on common blocks between a and b */
751 } /* Loop on b row blocks */
752 } /* Loop on a row blocks */
753
754 }
755
756 /*----------------------------------------------------------------------------*/
757 /*!
758 * \brief Compute a row-row matrix product of a and b. It is basically equal
759 * to the classical a*b^T. It is a fast (matrices are row-major) way
760 * of computing a*b if b is symmetric or if b^T is given.
761 * Case of matrices defined by block.
762 * Results is known to be symmetric.
763 *
764 * \param[in] a local matrix to use
765 * \param[in] b local matrix to use
766 * \param[in, out] c result of the local matrix-product (already allocated)
767 * is updated
768 */
769 /*----------------------------------------------------------------------------*/
770
771 void
cs_sdm_block_multiply_rowrow_sym(const cs_sdm_t * a,const cs_sdm_t * b,cs_sdm_t * c)772 cs_sdm_block_multiply_rowrow_sym(const cs_sdm_t *a,
773 const cs_sdm_t *b,
774 cs_sdm_t *c)
775 {
776 /* Sanity check */
777 assert(a != NULL && b != NULL && c != NULL);
778 assert(a->flag & CS_SDM_BY_BLOCK);
779 assert(b->flag & CS_SDM_BY_BLOCK);
780 assert(c->flag & CS_SDM_BY_BLOCK);
781 assert(a->n_cols == b->n_cols &&
782 a->n_rows == c->n_rows &&
783 c->n_cols == b->n_rows);
784
785 const cs_sdm_block_t *a_desc = a->block_desc;
786 const cs_sdm_block_t *b_desc = b->block_desc;
787
788 assert(a_desc->n_col_blocks == b_desc->n_col_blocks &&
789 a_desc->n_row_blocks == c->block_desc->n_row_blocks &&
790 c->block_desc->n_col_blocks == b_desc->n_row_blocks);
791
792 for (short int i = 0; i < a_desc->n_row_blocks; i++) {
793
794 for (short int j = i; j < b_desc->n_row_blocks; j++) {
795
796 cs_sdm_t *cIJ = cs_sdm_get_block(c, i, j);
797
798 for (short int k = 0; k < a_desc->n_col_blocks; k++) {
799
800 cs_sdm_t *aIK = cs_sdm_get_block(a, i, k);
801 cs_sdm_t *bJK = cs_sdm_get_block(b, j, k);
802
803 cs_sdm_multiply_rowrow(aIK, bJK, cIJ);
804
805 } /* Loop on common blocks between a and b */
806
807 } /* Loop on b row blocks */
808 } /* Loop on a row blocks */
809
810 for (short int i = 0; i < a_desc->n_row_blocks; i++)
811 for (short int j = i + 1; j < b_desc->n_row_blocks; j++)
812 cs_sdm_transpose_and_update(cs_sdm_get_block(c, i, j),
813 cs_sdm_get_block(c, j, i));
814
815 }
816
817 /*----------------------------------------------------------------------------*/
818 /*!
819 * \brief Compute a dense matrix-vector product for a small square matrix
820 * mv has been previously allocated
821 *
822 * \param[in] mat local matrix to use
823 * \param[in] vec local vector to use
824 * \param[in, out] mv result of the local matrix-vector product
825 */
826 /*----------------------------------------------------------------------------*/
827
828 void
cs_sdm_square_matvec(const cs_sdm_t * mat,const cs_real_t * vec,cs_real_t * mv)829 cs_sdm_square_matvec(const cs_sdm_t *mat,
830 const cs_real_t *vec,
831 cs_real_t *mv)
832 {
833 /* Sanity checks */
834 assert(mat != NULL && vec != NULL && mv != NULL);
835 assert(mat->n_rows == mat->n_cols);
836
837 const int n = mat->n_rows;
838
839 /* Initialize mv */
840 const cs_real_t v = vec[0];
841 for (short int i = 0; i < n; i++)
842 mv[i] = v*mat->val[i*n];
843
844 /* Increment mv */
845 for (short int i = 0; i < n; i++) {
846 const cs_real_t *m_i = mat->val + i*n;
847 for (short int j = 1; j < n; j++)
848 mv[i] += m_i[j] * vec[j];
849 }
850
851 }
852
853 /*----------------------------------------------------------------------------*/
854 /*!
855 * \brief Compute a dense matrix-vector product for a rectangular matrix
856 * mv has been previously allocated
857 *
858 * \param[in] mat local matrix to use
859 * \param[in] vec local vector to use (size = mat->n_cols)
860 * \param[in, out] mv result of the operation (size = mat->n_rows)
861 */
862 /*----------------------------------------------------------------------------*/
863
864 void
cs_sdm_matvec(const cs_sdm_t * mat,const cs_real_t * vec,cs_real_t * mv)865 cs_sdm_matvec(const cs_sdm_t *mat,
866 const cs_real_t *vec,
867 cs_real_t *mv)
868 {
869 /* Sanity checks */
870 assert(mat != NULL && vec != NULL && mv != NULL);
871
872 if (mat->n_rows == mat->n_cols) {
873 cs_sdm_square_matvec(mat, vec, mv);
874 return;
875 }
876
877 const short int nr = mat->n_rows;
878 const short int nc = mat->n_cols;
879
880 /* Initialize mv with the first column */
881 const cs_real_t v = vec[0];
882 for (short int i = 0; i < nr; i++)
883 mv[i] = v*mat->val[i*nc];
884
885 /* Increment mv */
886 for (short int i = 0; i < nr; i++) {
887 cs_real_t *m_i = mat->val + i*nc;
888 for (short int j = 1; j < nc; j++)
889 mv[i] += m_i[j] * vec[j];
890 }
891
892 }
893
894 /*----------------------------------------------------------------------------*/
895 /*!
896 * \brief Compute a dense matrix-vector product for a rectangular matrix
897 * mv has been previously allocated and initialized
898 * Thus mv is updated inside this function
899 *
900 * \param[in] mat local matrix to use
901 * \param[in] vec local vector to use (size = mat->n_cols)
902 * \param[in, out] mv result of the operation (size = mat->n_rows)
903 */
904 /*----------------------------------------------------------------------------*/
905
906 void
cs_sdm_update_matvec(const cs_sdm_t * mat,const cs_real_t * vec,cs_real_t * mv)907 cs_sdm_update_matvec(const cs_sdm_t *mat,
908 const cs_real_t *vec,
909 cs_real_t *mv)
910 {
911 /* Sanity checks */
912 assert(mat != NULL && vec != NULL && mv != NULL);
913
914 const short int nr = mat->n_rows;
915 const short int nc = mat->n_cols;
916
917 /* Update mv */
918 for (short int i = 0; i < nr; i++) {
919 cs_real_t *m_i = mat->val + i*nc;
920 for (short int j = 0; j < nc; j++)
921 mv[i] += m_i[j] * vec[j];
922 }
923
924 }
925
926 /*----------------------------------------------------------------------------*/
927 /*!
928 * \brief Compute a dense matrix-vector product for a rectangular matrix
929 * which is transposed.
930 * mv has been previously allocated. mv is updated inside this
931 * function. Don't forget to initialize mv if needed.
932 *
933 * \param[in] mat local matrix to use
934 * \param[in] vec local vector to use (size = mat->n_cols)
935 * \param[in, out] mv result of the operation (size = mat->n_rows)
936 */
937 /*----------------------------------------------------------------------------*/
938
939 void
cs_sdm_matvec_transposed(const cs_sdm_t * mat,const cs_real_t * vec,cs_real_t * mv)940 cs_sdm_matvec_transposed(const cs_sdm_t *mat,
941 const cs_real_t *vec,
942 cs_real_t *mv)
943 {
944 /* Sanity checks */
945 assert(mat != NULL && vec != NULL && mv != NULL);
946
947 const short int nr = mat->n_rows;
948 const short int nc = mat->n_cols;
949
950 /* Update mv */
951 for (short int i = 0; i < nr; i++) {
952 const cs_real_t *m_i = mat->val + i*nc;
953 const cs_real_t v = vec[i];
954 for (short int j = 0; j < nc; j++)
955 mv[j] += m_i[j] * v;
956 }
957
958 }
959
960 /*----------------------------------------------------------------------------*/
961 /*!
962 * \brief Add two matrices defined by block: loc += add
963 *
964 * \param[in, out] mat local matrix storing the result
965 * \param[in] add values to add to mat
966 */
967 /*----------------------------------------------------------------------------*/
968
969 void
cs_sdm_block_add(cs_sdm_t * mat,const cs_sdm_t * add)970 cs_sdm_block_add(cs_sdm_t *mat,
971 const cs_sdm_t *add)
972 {
973 if (mat == NULL || add == NULL)
974 return;
975
976 const cs_sdm_block_t *mat_desc = mat->block_desc;
977
978 assert(add->block_desc != NULL && mat_desc != NULL);
979 assert(add->block_desc->n_row_blocks == mat_desc->n_row_blocks);
980 assert(add->block_desc->n_col_blocks == mat_desc->n_col_blocks);
981
982 for (short int bi = 0; bi < mat_desc->n_row_blocks; bi++) {
983 for (short int bj = 0; bj < mat_desc->n_col_blocks; bj++) {
984
985 cs_sdm_t *mat_ij = cs_sdm_get_block(mat, bi, bj);
986 const cs_sdm_t *add_ij = cs_sdm_get_block(add, bi, bj);
987
988 cs_sdm_add(mat_ij, add_ij);
989
990 } /* Loop on column blocks */
991 } /* Loop on row blocks */
992
993 }
994
995 /*----------------------------------------------------------------------------*/
996 /*!
997 * \brief Add two matrices defined by block: loc += mult_coef * add
998 *
999 * \param[in, out] mat local matrix storing the result
1000 * \param[in] mult_coef multiplicative coefficient
1001 * \param[in] add values to add to mat
1002 */
1003 /*----------------------------------------------------------------------------*/
1004
1005 void
cs_sdm_block_add_mult(cs_sdm_t * mat,cs_real_t mult_coef,const cs_sdm_t * add)1006 cs_sdm_block_add_mult(cs_sdm_t *mat,
1007 cs_real_t mult_coef,
1008 const cs_sdm_t *add)
1009 {
1010 if (mat == NULL || add == NULL)
1011 return;
1012
1013 const cs_sdm_block_t *add_desc = add->block_desc;
1014 const cs_sdm_block_t *mat_desc = mat->block_desc;
1015
1016 CS_UNUSED(add_desc); /* Only in debug mode */
1017 assert(add_desc != NULL && mat_desc != NULL);
1018 assert(add_desc->n_row_blocks == mat_desc->n_row_blocks);
1019 assert(add_desc->n_col_blocks == mat_desc->n_col_blocks);
1020
1021 for (short int bi = 0; bi < mat_desc->n_row_blocks; bi++) {
1022 for (short int bj = 0; bj < mat_desc->n_col_blocks; bj++) {
1023
1024 cs_sdm_t *mat_ij = cs_sdm_get_block(mat, bi, bj);
1025 const cs_sdm_t *add_ij = cs_sdm_get_block(add, bi, bj);
1026
1027 cs_sdm_add_mult(mat_ij, mult_coef, add_ij);
1028
1029 } /* Loop on column blocks */
1030 } /* Loop on row blocks */
1031
1032 }
1033
1034 /*----------------------------------------------------------------------------*/
1035 /*!
1036 * \brief Compute a dense matrix-vector product for a rectangular matrix
1037 * defined by block
1038 * mv has been previously allocated
1039 *
1040 * \param[in] mat local matrix to use
1041 * \param[in] vec local vector to use (size = mat->n_cols)
1042 * \param[in, out] mv result of the operation (size = mat->n_rows)
1043 */
1044 /*----------------------------------------------------------------------------*/
1045
1046 void
cs_sdm_block_matvec(const cs_sdm_t * mat,const cs_real_t * vec,cs_real_t * mv)1047 cs_sdm_block_matvec(const cs_sdm_t *mat,
1048 const cs_real_t *vec,
1049 cs_real_t *mv)
1050 {
1051 /* Sanity checks */
1052 assert(mat != NULL && vec != NULL && mv != NULL);
1053
1054 if (mat == NULL)
1055 return;
1056
1057 const cs_sdm_block_t *mat_desc = mat->block_desc;
1058
1059 assert(mat_desc != NULL);
1060 memset(mv, 0, mat->n_rows * sizeof(cs_real_t));
1061
1062 int c_shift = 0, r_shift = 0;
1063
1064 for (short int bi = 0; bi < mat_desc->n_row_blocks; bi++) {
1065
1066 cs_real_t *_mv = mv + r_shift;
1067 int n_rows = 0;
1068
1069 c_shift = 0;
1070 for (short int bj = 0; bj < mat_desc->n_col_blocks; bj++) {
1071
1072 cs_sdm_t *mat_ij = cs_sdm_get_block(mat, bi, bj);
1073
1074 cs_sdm_update_matvec(mat_ij, vec + c_shift, _mv);
1075 c_shift += mat_ij->n_cols;
1076 n_rows = mat_ij->n_rows;
1077
1078 } /* Loop on column blocks */
1079
1080 r_shift += n_rows;
1081
1082 } /* Loop on row blocks */
1083
1084 assert(r_shift == mat->n_rows);
1085 assert(c_shift == mat->n_cols);
1086
1087 }
1088
1089 /*----------------------------------------------------------------------------*/
1090 /*!
1091 * \brief Add two small dense matrices: loc += add
1092 *
1093 * \param[in, out] mat local matrix storing the result
1094 * \param[in] add values to add to mat
1095 */
1096 /*----------------------------------------------------------------------------*/
1097
1098 void
cs_sdm_add(cs_sdm_t * mat,const cs_sdm_t * add)1099 cs_sdm_add(cs_sdm_t *mat,
1100 const cs_sdm_t *add)
1101 {
1102 /* Sanity checks */
1103 assert(mat != NULL && add != NULL);
1104 assert(mat->n_rows == add->n_rows);
1105 assert(mat->n_cols == add->n_cols);
1106
1107 for (int i = 0; i < mat->n_rows*mat->n_cols; i++)
1108 mat->val[i] += add->val[i];
1109 }
1110
1111 /*----------------------------------------------------------------------------*/
1112 /*!
1113 * \brief Add two small dense matrices: loc += alpha*add
1114 *
1115 * \param[in, out] mat local matrix storing the result
1116 * \param[in] alpha multiplicative coefficient
1117 * \param[in] add values to add to mat
1118 */
1119 /*----------------------------------------------------------------------------*/
1120
1121 void
cs_sdm_add_mult(cs_sdm_t * mat,cs_real_t alpha,const cs_sdm_t * add)1122 cs_sdm_add_mult(cs_sdm_t *mat,
1123 cs_real_t alpha,
1124 const cs_sdm_t *add)
1125 {
1126 /* Sanity checks */
1127 assert(mat != NULL && add != NULL);
1128 assert(mat->n_rows == add->n_rows);
1129 assert(mat->n_cols == add->n_cols);
1130
1131 if (fabs(alpha) < FLT_MIN)
1132 return;
1133
1134 for (int i = 0; i < mat->n_rows*mat->n_cols; i++)
1135 mat->val[i] += alpha * add->val[i];
1136 }
1137
1138 /*----------------------------------------------------------------------------*/
1139 /*!
1140 * \brief Define a new matrix by adding the given matrix with its transpose.
1141 * Keep the transposed matrix for a future use.
1142 *
1143 * \param[in, out] mat local matrix to transpose and add
1144 * \param[in, out] tr transposed of the local matrix
1145 */
1146 /*----------------------------------------------------------------------------*/
1147
1148 void
cs_sdm_square_add_transpose(cs_sdm_t * mat,cs_sdm_t * tr)1149 cs_sdm_square_add_transpose(cs_sdm_t *mat,
1150 cs_sdm_t *tr)
1151 {
1152 /* Sanity check */
1153 assert(mat != NULL && tr != NULL);
1154 assert(mat->n_rows <= tr->n_max_cols && mat->n_cols <= tr->n_max_rows);
1155 assert(mat->n_rows == mat->n_cols);
1156
1157 if (mat->n_rows < 1 || mat->n_cols < 1)
1158 return;
1159
1160 tr->n_rows = mat->n_cols;
1161 tr->n_cols = mat->n_rows;
1162
1163 for (short int i = 0; i < mat->n_rows; i++) {
1164
1165 const int ii = i*mat->n_cols + i;
1166 tr->val[ii] = mat->val[ii];
1167 mat->val[ii] *= 2;
1168
1169 for (short int j = i+1; j < mat->n_cols; j++) {
1170
1171 const int ij = i*mat->n_cols + j;
1172 const int ji = j*mat->n_cols + i;
1173
1174 tr->val[ji] = mat->val[ij];
1175 tr->val[ij] = mat->val[ji];
1176 mat->val[ij] += tr->val[ij];
1177 mat->val[ji] += tr->val[ji];
1178
1179 }
1180 }
1181
1182 }
1183
1184 /*----------------------------------------------------------------------------*/
1185 /*!
1186 * \brief Set the given matrix to two times its symmetric part
1187 * mat --> mat + mat_tr = 2*symm(mat)
1188 *
1189 * \param[in, out] mat small dense matrix to transform
1190 */
1191 /*----------------------------------------------------------------------------*/
1192
1193 void
cs_sdm_square_2symm(cs_sdm_t * mat)1194 cs_sdm_square_2symm(cs_sdm_t *mat)
1195 {
1196 /* Sanity check */
1197 assert(mat != NULL);
1198 assert(mat->n_rows == mat->n_cols);
1199
1200 if (mat->n_rows < 1)
1201 return;
1202
1203 for (short int i = 0; i < mat->n_rows; i++) {
1204
1205 cs_real_t *mi = mat->val + i*mat->n_cols;
1206 for (short int j = i; j < mat->n_cols; j++) {
1207 int ji = j*mat->n_rows + i;
1208 mi[j] += mat->val[ji];
1209 mat->val[ji] = mi[j];
1210 } /* col j */
1211
1212 } /* row i */
1213
1214 }
1215
1216 /*----------------------------------------------------------------------------*/
1217 /*!
1218 * \brief Set the given matrix into its anti-symmetric part
1219 *
1220 * \param[in, out] mat small dense matrix to transform
1221 */
1222 /*----------------------------------------------------------------------------*/
1223
1224 void
cs_sdm_square_asymm(cs_sdm_t * mat)1225 cs_sdm_square_asymm(cs_sdm_t *mat)
1226 {
1227 /* Sanity check */
1228 assert(mat != NULL);
1229 assert(mat->n_rows == mat->n_cols);
1230
1231 if (mat->n_rows < 1)
1232 return;
1233
1234 for (short int i = 0; i < mat->n_rows; i++) {
1235
1236 cs_real_t *mi = mat->val + i*mat->n_cols;
1237
1238 mi[i] = 0;
1239
1240 for (short int j = i+1; j < mat->n_cols; j++) {
1241
1242 int ji = j*mat->n_rows + i;
1243
1244 mi[j] = 0.5*(mi[j] - mat->val[ji]);
1245 mat->val[ji] = -mi[j];
1246
1247 }
1248 }
1249
1250 }
1251
1252 /*----------------------------------------------------------------------------*/
1253 /*!
1254 * \brief Set the given block matrix into its anti-symmetric part
1255 *
1256 * \param[in, out] mat small dense matrix defined by block to transform
1257 */
1258 /*----------------------------------------------------------------------------*/
1259
1260 void
cs_sdm_block_square_asymm(cs_sdm_t * mat)1261 cs_sdm_block_square_asymm(cs_sdm_t *mat)
1262 {
1263 /* Sanity check */
1264 assert(mat != NULL);
1265 assert(mat->flag & CS_SDM_BY_BLOCK);
1266 assert(mat->n_rows == mat->n_cols);
1267
1268 if (mat->n_rows < 1)
1269 return;
1270
1271 const cs_sdm_block_t *matb = mat->block_desc;
1272 if (matb->n_row_blocks < 1)
1273 return;
1274 assert(matb->n_row_blocks == matb->n_col_blocks);
1275
1276 for (int bi = 0; bi < matb->n_row_blocks; bi++) {
1277
1278 /* Diagonal block */
1279 cs_sdm_t *bII = cs_sdm_get_block(mat, bi, bi);
1280
1281 cs_sdm_square_asymm(bII);
1282
1283 for (int bj = bi+1; bj < matb->n_col_blocks; bj++) {
1284
1285 cs_sdm_t *bIJ = cs_sdm_get_block(mat, bi, bj);
1286 cs_sdm_t *bJI = cs_sdm_get_block(mat, bj, bi);
1287
1288 assert(bIJ->n_rows == bJI->n_cols);
1289 assert(bIJ->n_cols == bJI->n_rows);
1290
1291 for (short int i = 0; i < bIJ->n_rows; i++) {
1292 cs_real_t *bIJ_i = bIJ->val + i*bIJ->n_cols;
1293 for (short int j = 0; j < bIJ->n_cols; j++) {
1294
1295 int ji = j*bIJ->n_rows + i;
1296
1297 bIJ_i[j] = 0.5*(bIJ_i[j] - bJI->val[ji]);
1298 bJI->val[ji] = -bIJ_i[j];
1299
1300 } /* bIJ columns */
1301 } /* bIJ rows */
1302
1303 } /* Loop on column blocks */
1304
1305 } /* Loop on row blocks */
1306
1307 }
1308
1309 /*----------------------------------------------------------------------------*/
1310 /*!
1311 * \brief Decompose a matrix into the matrix product Q.R
1312 * Case of a 3x3 symmetric matrix
1313 *
1314 * \param[in] m matrix values
1315 * \param[in, out] Qt transposed of matrix Q
1316 * \param[in, out] R vector of the coefficient of the decomposition
1317 *
1318 * \note: R is an upper triangular matrix. Stored in a compact way.
1319 *
1320 * j= 0, 1, 2
1321 * i=0| 0| 1| 2|
1322 * i=1 | 4| 5|
1323 * i=2 6|
1324 */
1325 /*----------------------------------------------------------------------------*/
1326
1327 void
cs_sdm_33_sym_qr_compute(const cs_real_t m[9],cs_real_t Qt[9],cs_real_t R[6])1328 cs_sdm_33_sym_qr_compute(const cs_real_t m[9],
1329 cs_real_t Qt[9],
1330 cs_real_t R[6])
1331 {
1332 /* Sanity checks */
1333 assert(m != NULL && Qt != NULL && R != NULL);
1334
1335 /* Work as if Q is defined column by column (instead of row). At the end, we
1336 transpose */
1337
1338 cs_nvec3_t tmp;
1339 cs_real_3_t qhat;
1340
1341 cs_nvec3(m, &tmp); /* normalize the first row (i.e first column) */
1342 R[0] = tmp.meas; /* R00 */
1343 assert(tmp.meas > cs_math_zero_threshold);
1344 cs_real_t *q0 = Qt;
1345 for (int k = 0; k < 3; k++)
1346 q0[k] = tmp.unitv[k];
1347
1348 const cs_real_t *m1 = m + 3; /* qhat = m1 */
1349 R[1] = cs_math_3_dot_product(q0, m1); /* R01 */
1350 for (int k = 0; k < 3; k++)
1351 qhat[k] = m1[k] - R[1]*q0[k];
1352
1353 cs_nvec3(qhat, &tmp);
1354 R[3] = tmp.meas; /* R11 */
1355 assert(tmp.meas > cs_math_zero_threshold);
1356 cs_real_t *q1 = Qt + 3;
1357 for (int k = 0; k < 3; k++)
1358 q1[k] = tmp.unitv[k];
1359
1360 const cs_real_t *m2 = m + 6; /* qhat = m2 */
1361 R[2] = cs_math_3_dot_product(q0, m2); /* R02 */
1362 for (int k = 0; k < 3; k++)
1363 qhat[k] = m2[k] - R[2]*q0[k];
1364 R[4] = cs_math_3_dot_product(q1, qhat); /* R12 */
1365 for (int k = 0; k < 3; k++)
1366 qhat[k] -= R[4]*q1[k];
1367
1368 cs_nvec3(qhat, &tmp);
1369 R[5] = tmp.meas;
1370 assert(tmp.meas > cs_math_zero_threshold);
1371 cs_real_t *q2 = Qt + 6;
1372 for (int k = 0; k < 3; k++)
1373 q2[k] = tmp.unitv[k];
1374 }
1375
1376 /*----------------------------------------------------------------------------*/
1377 /*!
1378 * \brief LU factorization of a small dense 3x3 matrix.
1379 *
1380 * \param[in] m pointer to a cs_sdm_t structure
1381 * \param[in, out] facto compact storage of coefficients for the LU
1382 * factorization
1383 *
1384 * \note: facto stores L the lower triangular matrix (without its diagonal
1385 * entries assumed to be equal to 1) and U the upper triangular matrix.
1386 */
1387 /*----------------------------------------------------------------------------*/
1388
1389 void
cs_sdm_33_lu_compute(const cs_sdm_t * m,cs_real_t facto[9])1390 cs_sdm_33_lu_compute(const cs_sdm_t *m,
1391 cs_real_t facto[9])
1392 {
1393 /* Sanity check */
1394 assert(m != NULL && facto != NULL);
1395 assert(m->n_cols == m->n_rows);
1396
1397 /* j=0: first row */
1398 const cs_real_t d00 = m->val[0];
1399 if (fabs(d00) < cs_math_zero_threshold)
1400 bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1401 const cs_real_t invd00 = 1./d00;
1402
1403 facto[0] = d00, facto[1] = m->val[1], facto[2] = m->val[2]; /* 1st row */
1404 facto[3] = m->val[3]*invd00; /* L10 */
1405 facto[4] = m->val[4] - facto[3]*facto[1]; /* U11 */
1406 facto[5] = m->val[5] - facto[3]*facto[2]; /* U12 */
1407 facto[6] = m->val[6]*invd00; /* L20 */
1408 facto[7] = (m->val[7] - facto[6]*m->val[1])/facto[4]; /* L21 */
1409 facto[8] = m->val[8] - facto[6]*m->val[2] - facto[7]*facto[5]; /* U22 */
1410 }
1411
1412 /*----------------------------------------------------------------------------*/
1413 /*!
1414 * \brief LU factorization of a small dense matrix. Small means that the
1415 * number m->n_rows is less than 100 for instance.
1416 *
1417 * \param[in] m pointer to a cs_sdm_t structure
1418 * \param[in, out] facto compact storage of coefficients for the LU
1419 * factorization (should be allocated to the right
1420 * size, i.e. m->n_rows*m->n_rows)
1421 *
1422 * \note: facto stores L the lower triangular matrix (without its diagonal
1423 * entries assumed to be equal to 1) and U the upper triangular matrix.
1424 */
1425 /*----------------------------------------------------------------------------*/
1426
1427 void
cs_sdm_lu_compute(const cs_sdm_t * m,cs_real_t facto[])1428 cs_sdm_lu_compute(const cs_sdm_t *m,
1429 cs_real_t facto[])
1430 {
1431 /* Sanity check */
1432 assert(m != NULL && facto != NULL);
1433 assert(m->n_cols == m->n_rows);
1434
1435 const cs_lnum_t n = m->n_rows;
1436
1437 /* Initialization */
1438 memcpy(facto, m->val, n*n*sizeof(cs_real_t));
1439
1440 /* Each step work on a smaller block */
1441 for (cs_lnum_t k = 0; k < n-1; k++) {
1442
1443 /* Pivot */
1444 cs_real_t pivot = facto[k*(n+1)];
1445 if (fabs(pivot) < cs_math_zero_threshold)
1446 bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1447 const cs_real_t invp = 1./pivot;
1448
1449 for (cs_lnum_t i = k+1; i < m->n_rows; i++) { /* Loop on rows */
1450
1451 cs_real_t *pr_fact = facto + (i-1)*n;
1452 cs_real_t *cr_fact = pr_fact + n;
1453
1454 /* L-part: lower part of the (i,i-1) entry */
1455 cr_fact[k] *= invp;
1456 const double lval = cr_fact[k];
1457
1458 /* U-part */
1459 for (cs_lnum_t j = k+1; j < n; j++) {
1460 cr_fact[j] -= lval*pr_fact[j];
1461
1462 } /* Loop on j (columns) */
1463
1464 } /* Loop on i (rows) */
1465
1466 } /* Loop on k (block size) */
1467
1468 }
1469
1470 /*----------------------------------------------------------------------------*/
1471 /*!
1472 * \brief Solve a system A.sol = rhs using a LU factorization of A (a small
1473 * 3x3 dense matrix).
1474 *
1475 * \param[in] facto compact storage of coefficients for the LU
1476 * factorization (should be allocated to the right
1477 * size, i.e. n_rows*n_rows)
1478 * \param[in] rhs right-hand side
1479 * \param[in, out] sol solution
1480 *
1481 * \note: facto stores L the lower triangular matrix (without its diagonal
1482 * entries assumed to be equal to 1) and U the upper triangular matrix.
1483 */
1484 /*----------------------------------------------------------------------------*/
1485
1486 void
cs_sdm_33_lu_solve(const cs_real_t facto[9],const cs_real_t rhs[3],cs_real_t sol[3])1487 cs_sdm_33_lu_solve(const cs_real_t facto[9],
1488 const cs_real_t rhs[3],
1489 cs_real_t sol[3])
1490 {
1491 /* Sanity check */
1492 assert(facto != NULL && rhs != NULL && sol != NULL);
1493
1494 /* Forward pass */
1495 sol[0] = rhs[0];
1496 sol[1] = rhs[1] - facto[3]*sol[0];
1497 sol[2] = rhs[2] - facto[6]*sol[0] - facto[7]*sol[1];
1498
1499 /* Backward pass */
1500 sol[2] = sol[2]/facto[8];
1501 sol[1] = (sol[1] - facto[5]*sol[2])/facto[4];
1502 sol[0] = (sol[0] - facto[2]*sol[2] - facto[1]*sol[1])/facto[0];
1503 }
1504
1505 /*----------------------------------------------------------------------------*/
1506 /*!
1507 * \brief Solve a system A.sol = rhs using a LU factorization of A (a small
1508 * dense matrix).
1509 *
1510 * \param[in] n_rows dimension of the system to solve
1511 * \param[in] facto compact storage of coefficients for the LU
1512 * factorization (should be allocated to the right
1513 * size, i.e. n_rows*n_rows)
1514 * \param[in] rhs right-hand side
1515 * \param[in, out] sol solution
1516 *
1517 * \note: facto stores L the lower triangular matrix (without its diagonal
1518 * entries assumed to be equal to 1) and U the upper triangular matrix.
1519 */
1520 /*----------------------------------------------------------------------------*/
1521
1522 void
cs_sdm_lu_solve(cs_lnum_t n_rows,const cs_real_t facto[],const cs_real_t * rhs,cs_real_t * sol)1523 cs_sdm_lu_solve(cs_lnum_t n_rows,
1524 const cs_real_t facto[],
1525 const cs_real_t *rhs,
1526 cs_real_t *sol)
1527 {
1528 /* Sanity check */
1529 assert(facto != NULL && rhs != NULL && sol != NULL);
1530
1531 /* Forward pass: L.y = rhs (sol stores the values for y) */
1532 for (cs_lnum_t i = 0; i < n_rows; i++) {
1533
1534 sol[i] = rhs[i];
1535 const cs_real_t *_fact = facto + i*n_rows;
1536 for (cs_lnum_t j = 0; j < i; j++) {
1537 sol[i] -= sol[j]*_fact[j];
1538 }
1539
1540 }
1541
1542 /* Backward pass: U.sol = y */
1543 for (cs_lnum_t i = n_rows-1; i >= 0; i--) { /* Loop on rows */
1544 for (cs_lnum_t j = n_rows-1; j > i; j--) { /* Loop on columns */
1545
1546 sol[i] -= sol[j]*facto[i*n_rows + j];
1547
1548 }
1549 sol[i] /= facto[i*(n_rows + 1)];
1550 }
1551
1552 }
1553
1554 /*----------------------------------------------------------------------------*/
1555 /*!
1556 * \brief LDL^T: Modified Cholesky decomposition of a 3x3 SPD matrix.
1557 * For more reference, see for instance
1558 * http://mathforcollege.com/nm/mws/gen/04sle/mws_gen_sle_txt_cholesky.pdf
1559 *
1560 * \param[in] m pointer to a cs_sdm_t structure
1561 * \param[in, out] facto vector of the coefficient of the decomposition
1562 *
1563 * \note: facto is a lower triangular matrix. The first value of the
1564 * j-th (zero-based) row is easily accessed: its index is j*(j+1)/2
1565 * (cf sum of the first j natural numbers). Instead of 1 on the diagonal
1566 * we store the inverse of D mat in the L.D.L^T decomposition
1567 */
1568 /*----------------------------------------------------------------------------*/
1569
1570 void
cs_sdm_33_ldlt_compute(const cs_sdm_t * m,cs_real_t facto[6])1571 cs_sdm_33_ldlt_compute(const cs_sdm_t *m,
1572 cs_real_t facto[6])
1573 {
1574 /* Sanity check */
1575 assert(m != NULL && facto != NULL);
1576 assert(m->n_cols == m->n_rows && m->n_cols == 3);
1577
1578 /* j=0: first row */
1579 const cs_real_t d00 = m->val[0];
1580 if (fabs(d00) < cs_math_zero_threshold)
1581 bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1582
1583 facto[0] = 1. / d00;
1584 const cs_real_t l10 = facto[1] = m->val[1] * facto[0];
1585 const cs_real_t l20 = facto[3] = m->val[2] * facto[0];
1586
1587 /* j=1: second row */
1588 const cs_real_t d11 = m->val[4] - l10*l10 * d00;
1589 if (fabs(d11) < cs_math_zero_threshold)
1590 bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1591 facto[2] = 1. / d11;
1592 const cs_real_t l21 = facto[4] = (m->val[5] - l20*d00*l10) * facto[2];
1593
1594 /* j=2: third row */
1595 const cs_real_t d22 = m->val[8] - l20*l20*d00 - l21*l21*d11;
1596 if (fabs(d22) < cs_math_zero_threshold)
1597 bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1598 facto[5] = 1. / d22;
1599 }
1600
1601 /*----------------------------------------------------------------------------*/
1602 /*!
1603 * \brief LDL^T: Modified Cholesky decomposition of a 4x4 SPD matrix.
1604 * For more reference, see for instance
1605 * http://mathforcollege.com/nm/mws/gen/04sle/mws_gen_sle_txt_cholesky.pdf
1606 *
1607 * \param[in] m pointer to a cs_sdm_t structure
1608 * \param[in, out] facto vector of the coefficient of the decomposition
1609 *
1610 * \note: facto is a lower triangular matrix. The first value of the
1611 * j-th (zero-based) row is easily accessed: its index is j*(j+1)/2
1612 * (cf sum of the first j natural numbers). Instead of 1 on the diagonal
1613 * we store the inverse of D mat in the L.D.L^T decomposition
1614 */
1615 /*----------------------------------------------------------------------------*/
1616
1617 void
cs_sdm_44_ldlt_compute(const cs_sdm_t * m,cs_real_t facto[10])1618 cs_sdm_44_ldlt_compute(const cs_sdm_t *m,
1619 cs_real_t facto[10])
1620 {
1621 /* Sanity check */
1622 assert(m != NULL && facto != NULL);
1623 assert(m->n_cols == m->n_rows && m->n_cols == 4);
1624
1625 /* j=0: first row */
1626 const cs_real_t d00 = m->val[0];
1627 if (fabs(d00) < cs_math_zero_threshold)
1628 bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1629
1630 facto[0] = 1. / d00;
1631 const cs_real_t l10 = facto[1] = m->val[1] * facto[0];
1632 const cs_real_t l20 = facto[3] = m->val[2] * facto[0];
1633 const cs_real_t l30 = facto[6] = m->val[3] * facto[0];
1634
1635 /* j=1: second row */
1636 const cs_real_t d11 = m->val[5] - l10*l10 * d00;
1637 if (fabs(d11) < cs_math_zero_threshold)
1638 bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1639 facto[2] = 1. / d11;
1640 const cs_real_t l21 = facto[4] = (m->val[6] - l20*d00*l10) * facto[2];
1641 const cs_real_t l31 = facto[7] = (m->val[7] - l30*d00*l10) * facto[2];
1642
1643 /* j=2: third row */
1644 const cs_real_t d22 = m->val[10] - l20*l20*d00 - l21*l21*d11;
1645 if (fabs(d22) < cs_math_zero_threshold)
1646 bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1647 facto[5] = 1. / d22;
1648 const cs_real_t l32 = facto[8] =
1649 (m->val[11] - l30*d00*l20 - l31*d11*l21) * facto[5];
1650
1651 /* j=3: row 4 */
1652 const cs_real_t d33 = m->val[15] - l30*l30*d00 - l31*l31*d11 - l32*l32*d22;
1653 if (fabs(d33) < cs_math_zero_threshold)
1654 bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1655 facto[9] = 1. / d33;
1656 }
1657
1658 /*----------------------------------------------------------------------------*/
1659 /*!
1660 * \brief LDL^T: Modified Cholesky decomposition of a 6x6 SPD matrix.
1661 * For more reference, see for instance
1662 * http://mathforcollege.com/nm/mws/gen/04sle/mws_gen_sle_txt_cholesky.pdf
1663 *
1664 * \param[in] m pointer to a cs_sdm_t structure
1665 * \param[in, out] facto vector of the coefficient of the decomposition
1666 *
1667 * \note: facto is a lower triangular matrix. The first value of the
1668 * j-th (zero-based) row is easily accessed: its index is j*(j+1)/2
1669 * (cf sum of the first j natural numbers). Instead of 1 on the diagonal
1670 * we store the inverse of D mat in the L.D.L^T decomposition
1671 */
1672 /*----------------------------------------------------------------------------*/
1673
1674 void
cs_sdm_66_ldlt_compute(const cs_sdm_t * m,cs_real_t facto[21])1675 cs_sdm_66_ldlt_compute(const cs_sdm_t *m,
1676 cs_real_t facto[21])
1677 {
1678 /* Sanity check */
1679 assert(m != NULL && facto != NULL);
1680 assert(m->n_cols == m->n_rows && m->n_cols == 6);
1681
1682 /* j=0: first row */
1683 const cs_real_t d00 = m->val[0];
1684 if (fabs(d00) < cs_math_zero_threshold)
1685 bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1686
1687 facto[0] = 1. / d00;
1688 const cs_real_t l10 = facto[ 1] = m->val[1] * facto[0];
1689 const cs_real_t l20 = facto[ 3] = m->val[2] * facto[0];
1690 const cs_real_t l30 = facto[ 6] = m->val[3] * facto[0];
1691 const cs_real_t l40 = facto[10] = m->val[4] * facto[0];
1692 const cs_real_t l50 = facto[15] = m->val[5] * facto[0];
1693
1694 /* j=1: second row */
1695 const cs_real_t d11 = m->val[7] - l10*l10 * d00;
1696 if (fabs(d11) < cs_math_zero_threshold)
1697 bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1698 facto[2] = 1. / d11;
1699 const cs_real_t d0l10 = d00*l10;
1700 const cs_real_t l21 = facto[ 4] = (m->val[ 8] - l20*d0l10) * facto[2];
1701 const cs_real_t l31 = facto[ 7] = (m->val[ 9] - l30*d0l10) * facto[2];
1702 const cs_real_t l41 = facto[11] = (m->val[10] - l40*d0l10) * facto[2];
1703 const cs_real_t l51 = facto[16] = (m->val[11] - l50*d0l10) * facto[2];
1704
1705 /* j=2: third row */
1706 const cs_real_t d22 = m->val[14] - l20*l20*d00 - l21*l21*d11;
1707 if (fabs(d22) < cs_math_zero_threshold)
1708 bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1709 facto[5] = 1. / d22;
1710 const cs_real_t d1l21 = d11*l21, d0l20 = d00*l20;
1711 const cs_real_t l32 = facto[ 8] =
1712 (m->val[15] - l30*d0l20 - l31*d1l21) * facto[5];
1713 const cs_real_t l42 = facto[12] =
1714 (m->val[16] - l30*d0l20 - l31*d1l21) * facto[5];
1715 const cs_real_t l52 = facto[17] =
1716 (m->val[17] - l30*d0l20 - l31*d1l21) * facto[5];
1717
1718 /* j=3: row 4 */
1719 const cs_real_t d33 = m->val[21] - l30*l30*d00 - l31*l31*d11 - l32*l32*d22;
1720 if (fabs(d33) < cs_math_zero_threshold)
1721 bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1722 facto[9] = 1. / d33;
1723 const cs_real_t d1l31 = d11*l31, d0l30 = d00*l30, d2l32 = d22*l32;
1724 const cs_real_t l43 = facto[13] =
1725 (m->val[22] - l40*d0l30 - l41*d1l31 - l42*d2l32) * facto[9];
1726 const cs_real_t l53 = facto[18] =
1727 (m->val[23] - l50*d0l30 - l51*d1l31 - l52*d2l32) * facto[9];
1728
1729 /* j=4: row 5 */
1730 const cs_real_t d44 =
1731 m->val[28] - l40*l40*d00 - l41*l41*d11 - l42*l42*d22 - l43*l43*d33;
1732 if (fabs(d44) < cs_math_zero_threshold)
1733 bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1734 facto[14] = 1. / d44;
1735 const cs_real_t l54 = facto[19] = facto[14] *
1736 (m->val[29] - l50*d00*l40 - l51*d11*l41 - l52*d22*l42 - l53*d33*l43);
1737
1738 /* j=5: row 6 */
1739 const cs_real_t d55 = m->val[35]
1740 - l50*l50*d00 - l51*l51*d11 - l52*l52*d22 - l53*l53*d33 - l54*l54*d44;
1741 if (fabs(d55) < cs_math_zero_threshold)
1742 bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1743 facto[20] = 1. / d55;
1744
1745 }
1746
1747 /*----------------------------------------------------------------------------*/
1748 /*!
1749 * \brief LDL^T: Modified Cholesky decomposition of a SPD matrix.
1750 * For more reference, see for instance
1751 * http://mathforcollege.com/nm/mws/gen/04sle/mws_gen_sle_txt_cholesky.pdf
1752 *
1753 * \param[in] m pointer to a cs_sdm_t structure
1754 * \param[in, out] facto vector of the coefficient of the decomposition
1755 * \param[in, out] dkk store temporary the diagonal (size = n_rows)
1756 *
1757 * \note: facto is a lower triangular matrix. The first value of the
1758 * j-th (zero-based) row is easily accessed: its index is j*(j+1)/2
1759 * (cf sum of the first j natural numbers). Instead of 1 on the diagonal
1760 * we store the inverse of D mat in the L.D.L^T decomposition
1761 */
1762 /*----------------------------------------------------------------------------*/
1763
1764 void
cs_sdm_ldlt_compute(const cs_sdm_t * m,cs_real_t * facto,cs_real_t * dkk)1765 cs_sdm_ldlt_compute(const cs_sdm_t *m,
1766 cs_real_t *facto,
1767 cs_real_t *dkk)
1768 {
1769 /* Sanity checks */
1770 assert(m != NULL && facto != NULL);
1771 assert(m->n_cols == m->n_rows);
1772
1773 const short int n = m->n_cols;
1774
1775 if (n == 1) {
1776 facto[0] = 1. / m->val[0];
1777 return;
1778 }
1779
1780 int rowj_idx = 0;
1781
1782 /* Factorization (column-major algorithm) */
1783 for (short int j = 0; j < n; j++) {
1784
1785 rowj_idx += j;
1786 const int djj_idx = rowj_idx + j;
1787
1788 switch (j) {
1789
1790 case 0: /* Optimization for the first colum */
1791 {
1792 dkk[0] = m->val[0]; /* d00 */
1793
1794 if (fabs(dkk[0]) < cs_math_zero_threshold)
1795 bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1796 const cs_real_t inv_d00 = facto[0] = 1. / dkk[0];
1797
1798 /* l_i0 = a_i0 / d_00 */
1799 short int rowi_idx = rowj_idx;
1800 const cs_real_t *a_0 = m->val; /* a_ij = a_ji */
1801 for (short int i = j+1; i < n; i++) { /* Loop on rows */
1802
1803 rowi_idx += i;
1804 cs_real_t *l_i = facto + rowi_idx;
1805 l_i[0] = a_0[i] * inv_d00;
1806
1807 }
1808
1809 }
1810 break;
1811
1812 case 1: /* Optimization for the second colum */
1813 {
1814 /* d_11 = a_11 - l_10^2 * d_00 */
1815 cs_real_t *l_1 = facto + rowj_idx;
1816
1817 const cs_real_t d11 = dkk[1] = m->val[n+1] - l_1[0]*l_1[0]*dkk[0];
1818 if (fabs(d11) < cs_math_zero_threshold)
1819 bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1820
1821 const cs_real_t inv_d11 = facto[djj_idx] = 1. / d11;
1822
1823 /* l_i1 = (a_i1 - l_i0 * d_00 * l_10 ) / d_11 */
1824 short int rowi_idx = rowj_idx;
1825 const cs_real_t *a_1 = m->val + n; /* a_i1 = a_1i */
1826 for (short int i = 2; i < n; i++) { /* Loop on rows */
1827
1828 rowi_idx += i;
1829 cs_real_t *l_i = facto + rowi_idx;
1830 l_i[1] = (a_1[i] - l_i[0] * dkk[0] * l_1[0]) * inv_d11;
1831
1832 }
1833
1834 }
1835 break;
1836
1837 default:
1838 {
1839 /* d_jj = a_jj - \sum_{k=0}^{j-1} l_jk^2 * d_kk */
1840 cs_real_t *l_j = facto + rowj_idx;
1841
1842 cs_real_t sum = 0.;
1843 for (short int k = 0; k < j; k++)
1844 sum += l_j[k]*l_j[k] * dkk[k];
1845 const cs_real_t djj = dkk[j] = m->val[j*n+j] - sum;
1846
1847 if (fabs(djj) < cs_math_zero_threshold)
1848 bft_error(__FILE__, __LINE__, 0, _msg_small_p, __func__);
1849
1850 const cs_real_t inv_djj = facto[djj_idx] = 1. / djj;
1851
1852 /* l_ij = (a_ij - \sum_{k=1}^{j-1} l_ik * d_kk * l_jk ) / d_jj */
1853 short int rowi_idx = rowj_idx;
1854 const cs_real_t *a_j = m->val + j*n; /* a_ij = a_ji */
1855 for (short int i = j+1; i < n; i++) { /* Loop on rows */
1856
1857 rowi_idx += i;
1858 cs_real_t *l_i = facto + rowi_idx;
1859 sum = 0.;
1860 for (short int k = 0; k < j; k++)
1861 sum += l_i[k] * dkk[k] * l_j[k];
1862 l_i[j] = (a_j[i] - sum) * inv_djj;
1863
1864 }
1865
1866 }
1867 break;
1868 } /* End of switch */
1869
1870 } /* Loop on column j */
1871
1872 }
1873
1874 /*----------------------------------------------------------------------------*/
1875 /*!
1876 * \brief Solve a 3x3 matrix with a modified Cholesky decomposition (L.D.L^T)
1877 * The solution should be already allocated
1878 * Ref. http://mathforcollege.com/nm/mws/gen/04sle/mws_gen_sle_txt_cholesky.pdf
1879 *
1880 * \param[in] facto vector of the coefficients of the decomposition
1881 * \param[in] rhs right-hand side
1882 * \param[in,out] sol solution
1883 */
1884 /*----------------------------------------------------------------------------*/
1885
1886 void
cs_sdm_33_ldlt_solve(const cs_real_t facto[6],const cs_real_t rhs[3],cs_real_t sol[3])1887 cs_sdm_33_ldlt_solve(const cs_real_t facto[6],
1888 const cs_real_t rhs[3],
1889 cs_real_t sol[3])
1890 {
1891 /* Sanity check */
1892 assert(facto != NULL && rhs != NULL && sol != NULL);
1893
1894 sol[0] = rhs[0];
1895 sol[1] = rhs[1] - sol[0]*facto[1];
1896 sol[2] =(rhs[2] - sol[0]*facto[3] - sol[1]*facto[4]) * facto[5];
1897
1898 sol[1] = sol[1] * facto[2] - facto[4]*sol[2];
1899 sol[0] = sol[0] * facto[0] - facto[1]*sol[1] - facto[3]*sol[2];
1900 }
1901
1902 /*----------------------------------------------------------------------------*/
1903 /*!
1904 * \brief Solve a 4x4 matrix with a modified Cholesky decomposition (L.D.L^T)
1905 * The solution should be already allocated
1906 * Ref. http://mathforcollege.com/nm/mws/gen/04sle/mws_gen_sle_txt_cholesky.pdf
1907 *
1908 * \param[in] facto vector of the coefficients of the decomposition
1909 * \param[in] rhs right-hand side
1910 * \param[in,out] x solution
1911 */
1912 /*----------------------------------------------------------------------------*/
1913
1914 void
cs_sdm_44_ldlt_solve(const cs_real_t facto[10],const cs_real_t rhs[4],cs_real_t x[4])1915 cs_sdm_44_ldlt_solve(const cs_real_t facto[10],
1916 const cs_real_t rhs[4],
1917 cs_real_t x[4])
1918 {
1919 /* Sanity check */
1920 assert(facto != NULL && rhs != NULL && x != NULL);
1921
1922 x[0] = rhs[0];
1923 x[1] = rhs[1] - x[0]*facto[1];
1924 x[2] = rhs[2] - x[0]*facto[3] - x[1]*facto[4];
1925 x[3] = rhs[3] - x[0]*facto[6] - x[1]*facto[7] - x[2]*facto[8];
1926
1927 x[3] = x[3]*facto[9];
1928 x[2] = x[2]*facto[5] - facto[8]*x[3];
1929 x[1] = x[1]*facto[2] - facto[7]*x[3] - facto[4]*x[2];
1930 x[0] = x[0]*facto[0] - facto[6]*x[3] - facto[3]*x[2] - facto[1]*x[1];
1931 }
1932
1933 /*----------------------------------------------------------------------------*/
1934 /*!
1935 * \brief Solve a 6x6 matrix with a modified Cholesky decomposition (L.D.L^T)
1936 * The solution should be already allocated
1937 * Ref. http://mathforcollege.com/nm/mws/gen/04sle/mws_gen_sle_txt_cholesky.pdf
1938 *
1939 * \param[in] f vector of the coefficients of the decomposition
1940 * \param[in] b right-hand side
1941 * \param[in,out] x solution
1942 */
1943 /*----------------------------------------------------------------------------*/
1944
1945 void
cs_sdm_66_ldlt_solve(const cs_real_t f[21],const cs_real_t b[6],cs_real_t x[6])1946 cs_sdm_66_ldlt_solve(const cs_real_t f[21],
1947 const cs_real_t b[6],
1948 cs_real_t x[6])
1949 {
1950 /* Sanity check */
1951 assert(f != NULL && b != NULL && x != NULL);
1952
1953 x[0] = b[0];
1954 x[1] = b[1] - x[0]*f[1];
1955 x[2] = b[2] - x[0]*f[3] - x[1]*f[4];
1956 x[3] = b[3] - x[0]*f[6] - x[1]*f[7] - x[2]*f[8];
1957 x[4] = b[4] - x[0]*f[10] - x[1]*f[11] - x[2]*f[12] - x[3]*f[13];
1958 x[5] = b[5] - x[0]*f[15] - x[1]*f[16] - x[2]*f[17] - x[3]*f[18] - x[4]*f[19];
1959
1960 x[5] = x[5]*f[20];
1961 x[4] = x[4]*f[14] -f[19]*x[5];
1962 x[3] = x[3]*f[9] -f[18]*x[5] -f[13]*x[4];
1963 x[2] = x[2]*f[5] -f[17]*x[5] -f[12]*x[4] -f[8]*x[3];
1964 x[1] = x[1]*f[2] -f[16]*x[5] -f[11]*x[4] -f[7]*x[3] -f[4]*x[2];
1965 x[0] = x[0]*f[0] -f[15]*x[5] -f[10]*x[4] -f[6]*x[3] -f[3]*x[2] -f[1]*x[1];
1966 }
1967
1968 /*----------------------------------------------------------------------------*/
1969 /*!
1970 * \brief Solve a SPD matrix with a L.D.L^T (Modified Cholesky decomposition)
1971 * The solution should be already allocated
1972 * Reference:
1973 * http://mathforcollege.com/nm/mws/gen/04sle/mws_gen_sle_txt_cholesky.pdf
1974 *
1975 * \param[in] n_rows dimension of the system to solve
1976 * \param[in] facto vector of the coefficients of the decomposition
1977 * \param[in] rhs right-hand side
1978 * \param[in, out] sol solution
1979 */
1980 /*----------------------------------------------------------------------------*/
1981
1982 void
cs_sdm_ldlt_solve(int n_rows,const cs_real_t * facto,const cs_real_t * rhs,cs_real_t * sol)1983 cs_sdm_ldlt_solve(int n_rows,
1984 const cs_real_t *facto,
1985 const cs_real_t *rhs,
1986 cs_real_t *sol)
1987 {
1988 /* Sanity check */
1989 assert(facto != NULL && rhs != NULL && sol != NULL);
1990
1991 if (n_rows == 1) {
1992 sol[0] = rhs[0] * facto[0];
1993 return;
1994 }
1995
1996 /* 1 - Solving Lz = b with forward substitution :
1997 * z_i = b_i - \sum_{k=0}^{i-1} l_ik * z_k
1998 */
1999
2000 sol[0] = rhs[0]; /* case i = 0 */
2001
2002 short int rowi_idx = 0;
2003 for (short int i = 1; i < n_rows; i++){
2004
2005 rowi_idx += i;
2006
2007 const cs_real_t *l_i = facto + rowi_idx;
2008 cs_real_t sum = 0.;
2009 for (short int k = 0; k < i; k++)
2010 sum += sol[k] * l_i[k];
2011 sol[i] = rhs[i] - sum;
2012
2013 } /* forward substitution */
2014
2015 /* 2 - Solving Dy = z and facto^Tx=y with backwards substitution
2016 * x_i = z_i/d_ii - \sum_{k=i+1}^{n} l_ki * x_k
2017 */
2018
2019 const short int last_row_id = n_rows - 1;
2020 const int shift = n_rows*(last_row_id)/2; /* idx with n_rows - 1 */
2021 int diagi_idx = shift + last_row_id; /* last entry of the facto. */
2022 sol[last_row_id] *= facto[diagi_idx]; /* 1 / d_nn */
2023
2024 for (short int i = last_row_id - 1; i >= 0; i--) {
2025
2026 diagi_idx -= (i+2);
2027 sol[i] *= facto[diagi_idx];
2028
2029 short int rowk_idx = shift;
2030 cs_real_t sum = 0.0;
2031 for (short int k = last_row_id; k > i; k--) {
2032 /*sol[i] -= facto[k*(k+1)/2+i] * sol[k];*/
2033 const cs_real_t *l_k = facto + rowk_idx;
2034 sum += l_k[i] * sol[k];
2035 rowk_idx -= k;
2036 }
2037 sol[i] -= sum;
2038
2039 } /* backward substitution */
2040
2041 }
2042
2043 /*----------------------------------------------------------------------------*/
2044 /*!
2045 * \brief Test if a matrix is symmetric. Return 0. if the extradiagonal
2046 * differences are lower thann the machine precision.
2047 *
2048 * \param[in] mat pointer to the cs_sdm_t structure to test
2049 *
2050 * \return 0 if the matrix is symmetric at the machine tolerance otherwise
2051 * the absolute max. value between two transposed terms
2052 */
2053 /*----------------------------------------------------------------------------*/
2054
2055 double
cs_sdm_test_symmetry(const cs_sdm_t * mat)2056 cs_sdm_test_symmetry(const cs_sdm_t *mat)
2057 {
2058 double sym_eval = 0.;
2059
2060 if (mat == NULL)
2061 return sym_eval;
2062
2063 if (mat->flag & CS_SDM_BY_BLOCK) {
2064
2065 cs_sdm_t *copy = cs_sdm_block_create_copy(mat);
2066
2067 cs_sdm_block_square_asymm(copy);
2068
2069 const cs_sdm_block_t *bd = copy->block_desc;
2070 for (int bi = 0; bi < bd->n_row_blocks; bi++) {
2071 for (int bj = bi; bj < bd->n_col_blocks; bj++) {
2072
2073 cs_sdm_t *bIJ = cs_sdm_get_block(copy, bi, bj);
2074
2075 for (int i = 0; i < bIJ->n_rows*bIJ->n_cols; i++)
2076 if (fabs(bIJ->val[i]) > sym_eval)
2077 sym_eval = fabs(bIJ->val[i]);
2078
2079 } /* Loop on column blocks */
2080
2081 } /* Loop on row blocks */
2082
2083 copy = cs_sdm_free(copy);
2084
2085 }
2086 else {
2087
2088 cs_sdm_t *copy = cs_sdm_create_copy(mat);
2089
2090 cs_sdm_square_asymm(copy);
2091
2092 for (int i = 0; i < copy->n_rows*copy->n_cols; i++)
2093 if (fabs(copy->val[i]) > sym_eval)
2094 sym_eval = fabs(copy->val[i]);
2095
2096 copy = cs_sdm_free(copy);
2097
2098 }
2099
2100 return 2*sym_eval;
2101 }
2102
2103 /*----------------------------------------------------------------------------*/
2104 /*!
2105 * \brief Dump a small dense matrix
2106 *
2107 * \param[in] mat pointer to the cs_sdm_t structure
2108 */
2109 /*----------------------------------------------------------------------------*/
2110
2111 void
cs_sdm_simple_dump(const cs_sdm_t * mat)2112 cs_sdm_simple_dump(const cs_sdm_t *mat)
2113 {
2114 if (mat == NULL)
2115 return;
2116
2117 if (mat->n_rows < 1 || mat->n_cols < 1) {
2118 cs_log_printf(CS_LOG_DEFAULT, " No value.\n");
2119 return;
2120 }
2121
2122 for (short int i = 0; i < mat->n_rows; i++) {
2123 for (short int j = 0; j < mat->n_cols; j++)
2124 cs_log_printf(CS_LOG_DEFAULT, " % .4e", mat->val[i*mat->n_cols+j]);
2125 cs_log_printf(CS_LOG_DEFAULT, "\n");
2126 }
2127 }
2128
2129 /*----------------------------------------------------------------------------*/
2130 /*!
2131 * \brief Dump a small dense matrix
2132 *
2133 * \param[in] parent_id id of the related parent entity
2134 * \param[in] row_ids list of ids related to associated entities (or NULL)
2135 * \param[in] col_ids list of ids related to associated entities (or NULL)
2136 * \param[in] mat pointer to the cs_sdm_t structure
2137 */
2138 /*----------------------------------------------------------------------------*/
2139
2140 void
cs_sdm_dump(cs_lnum_t parent_id,const cs_lnum_t * row_ids,const cs_lnum_t * col_ids,const cs_sdm_t * mat)2141 cs_sdm_dump(cs_lnum_t parent_id,
2142 const cs_lnum_t *row_ids,
2143 const cs_lnum_t *col_ids,
2144 const cs_sdm_t *mat)
2145 {
2146 if (mat == NULL) {
2147 cs_log_printf(CS_LOG_DEFAULT,
2148 "<< MATRIX is set to NULL (parent id: %ld)>>\n",
2149 (long)parent_id);
2150 return;
2151 }
2152
2153 cs_log_printf(CS_LOG_DEFAULT, "<< MATRIX parent id: %ld >>\n",
2154 (long)parent_id);
2155
2156 if (mat->n_rows < 1 || mat->n_cols < 1) {
2157 cs_log_printf(CS_LOG_DEFAULT, " No value.\n");
2158 return;
2159 }
2160
2161 if (row_ids == NULL || col_ids == NULL)
2162 cs_sdm_simple_dump(mat);
2163
2164 else {
2165
2166 cs_log_printf(CS_LOG_DEFAULT, " %8s %11ld", " ", (long)col_ids[0]);
2167 for (short int i = 1; i < mat->n_cols; i++)
2168 cs_log_printf(CS_LOG_DEFAULT, " %11ld", (long)col_ids[i]);
2169 cs_log_printf(CS_LOG_DEFAULT, "\n");
2170
2171 for (short int i = 0; i < mat->n_rows; i++) {
2172 cs_log_printf(CS_LOG_DEFAULT, " %8ld ", (long)row_ids[i]);
2173 for (short int j = 0; j < mat->n_cols; j++)
2174 cs_log_printf(CS_LOG_DEFAULT, " % .4e", mat->val[i*mat->n_cols+j]);
2175 cs_log_printf(CS_LOG_DEFAULT, "\n");
2176 }
2177
2178 }
2179
2180 }
2181
2182 /*----------------------------------------------------------------------------*/
2183 /*!
2184 * \brief Print a cs_sdm_t structure not defined by block
2185 * Print into the file f if given otherwise open a new file named
2186 * fname if given otherwise print into the standard output
2187 * The usage of threshold allows one to compare more easier matrices
2188 * without taking into account numerical roundoff.
2189 *
2190 * \param[in] fp pointer to a file structure or NULL
2191 * \param[in] fname filename or NULL
2192 * \param[in] thd threshold (below this value --> set 0)
2193 * \param[in] m pointer to the cs_sdm_t structure
2194 */
2195 /*----------------------------------------------------------------------------*/
2196
2197 void
cs_sdm_fprintf(FILE * fp,const char * fname,cs_real_t thd,const cs_sdm_t * m)2198 cs_sdm_fprintf(FILE *fp,
2199 const char *fname,
2200 cs_real_t thd,
2201 const cs_sdm_t *m)
2202 {
2203 FILE *fout = stdout;
2204 if (fp != NULL)
2205 fout = fp;
2206 else if (fname != NULL) {
2207 fout = fopen(fname, "w");
2208 }
2209
2210 fprintf(fout, "cs_sdm_t %p\n", (const void *)m);
2211
2212 if (m == NULL)
2213 return;
2214
2215 if (m->n_rows < 1 || m->n_cols < 1) {
2216 fprintf(fout, " No value.\n");
2217 return;
2218 }
2219
2220 for (int i = 0; i < m->n_rows; i++) {
2221
2222 const cs_real_t *mval_i = m->val + i*m->n_cols;
2223 for (int j = 0; j < m->n_cols; j++) {
2224 if (fabs(mval_i[j]) > thd)
2225 fprintf(fout, " % -9.5e", mval_i[j]);
2226 else
2227 fprintf(fout, " % -9.5e", 0.);
2228 }
2229 fprintf(fout, "\n");
2230
2231 }
2232
2233 if (fout != stdout && fout != fp)
2234 fclose(fout);
2235 }
2236
2237 /*----------------------------------------------------------------------------*/
2238 /*!
2239 * \brief Dump a small dense matrix defined by blocks
2240 *
2241 * \param[in] parent_id id of the related parent entity
2242 * \param[in] mat pointer to the cs_sdm_t structure
2243 */
2244 /*----------------------------------------------------------------------------*/
2245
2246 void
cs_sdm_block_dump(cs_lnum_t parent_id,const cs_sdm_t * mat)2247 cs_sdm_block_dump(cs_lnum_t parent_id,
2248 const cs_sdm_t *mat)
2249 {
2250 if (mat == NULL)
2251 return;
2252
2253 if ((mat->flag & CS_SDM_BY_BLOCK) == 0) {
2254 cs_sdm_simple_dump(mat);
2255 return;
2256 }
2257 assert(mat->block_desc != NULL);
2258
2259 cs_log_printf(CS_LOG_DEFAULT, "\n << BLOCK MATRIX parent id: %ld >>\n",
2260 (long)parent_id);
2261
2262 const int n_b_rows = mat->block_desc->n_row_blocks;
2263 const int n_b_cols = mat->block_desc->n_col_blocks;
2264 const cs_sdm_t *blocks = mat->block_desc->blocks;
2265
2266 if (n_b_rows < 1 || n_b_cols < 1) {
2267 cs_log_printf(CS_LOG_DEFAULT, " No block\n");
2268 return;
2269 }
2270 cs_log_printf(CS_LOG_DEFAULT, " n_row_blocks: %d; n_col_blocks: %d\n",
2271 n_b_rows, n_b_cols);
2272
2273 const char _sep[] = "------------------------------------------------------";
2274 for (short int bi = 0; bi < n_b_rows; bi++) {
2275
2276 const cs_sdm_t *bi0 = blocks + bi*n_b_cols;
2277 const int n_rows = bi0->n_rows;
2278
2279 for (int i = 0; i < n_rows; i++) {
2280
2281 for (short int bj = 0; bj < n_b_cols; bj++) {
2282
2283 const cs_sdm_t *bij = blocks + bi*n_b_cols + bj;
2284 const int n_cols = bij->n_cols;
2285 const cs_real_t *mval_i = bij->val + i*n_cols;
2286
2287 for (int j = 0; j < n_cols; j++)
2288 cs_log_printf(CS_LOG_DEFAULT, " % -6.3e", mval_i[j]);
2289 cs_log_printf(CS_LOG_DEFAULT, " |");
2290
2291 } /* Block j */
2292 cs_log_printf(CS_LOG_DEFAULT, "\n");
2293
2294 } /* Loop on rows */
2295 cs_log_printf(CS_LOG_DEFAULT, "%s%s%s\n", _sep, _sep, _sep);
2296
2297 } /* Block i */
2298
2299 }
2300
2301 /*----------------------------------------------------------------------------*/
2302 /*!
2303 * \brief Print a cs_sdm_t structure which is defined by block
2304 * Print into the file f if given otherwise open a new file named
2305 * fname if given otherwise print into the standard output
2306 * The usage of threshold allows one to compare more easier matrices
2307 * without taking into account numerical roundoff.
2308 *
2309 * \param[in] fp pointer to a file structure or NULL
2310 * \param[in] fname filename or NULL
2311 * \param[in] thd threshold (below this value --> set 0)
2312 * \param[in] m pointer to the cs_sdm_t structure
2313 */
2314 /*----------------------------------------------------------------------------*/
2315
2316 void
cs_sdm_block_fprintf(FILE * fp,const char * fname,cs_real_t thd,const cs_sdm_t * m)2317 cs_sdm_block_fprintf(FILE *fp,
2318 const char *fname,
2319 cs_real_t thd,
2320 const cs_sdm_t *m)
2321 {
2322 FILE *fout = stdout;
2323 if (fp != NULL)
2324 fout = fp;
2325 else if (fname != NULL) {
2326 fout = fopen(fname, "w");
2327 }
2328
2329 fprintf(fout, "cs_sdm_t %p\n", (const void *)m);
2330
2331 if (m == NULL)
2332 return;
2333
2334 assert(m->block_desc != NULL);
2335 const int n_b_rows = m->block_desc->n_row_blocks;
2336 const int n_b_cols = m->block_desc->n_col_blocks;
2337 const cs_sdm_t *blocks = m->block_desc->blocks;
2338
2339 for (short int bi = 0; bi < n_b_rows; bi++) {
2340
2341 const cs_sdm_t *bi0 = blocks + bi*n_b_cols;
2342 const int n_rows = bi0->n_rows;
2343
2344 for (int i = 0; i < n_rows; i++) {
2345
2346 for (short int bj = 0; bj < n_b_cols; bj++) {
2347
2348 const cs_sdm_t *bij = blocks + bi*n_b_cols + bj;
2349 const int n_cols = bij->n_cols;
2350 const cs_real_t *mval_i = bij->val + i*n_cols;
2351
2352 for (int j = 0; j < n_cols; j++) {
2353 if (fabs(mval_i[j]) > thd)
2354 fprintf(fout, " % -9.5e", mval_i[j]);
2355 else
2356 fprintf(fout, " % -9.5e", 0.);
2357 }
2358
2359 } /* Block j */
2360 fprintf(fout, "\n");
2361
2362 } /* Loop on rows */
2363
2364 } /* Block i */
2365
2366 if (fout != stdout && fout != fp)
2367 fclose(fout);
2368 }
2369
2370 /*----------------------------------------------------------------------------*/
2371
2372 END_C_DECLS
2373