1 /*============================================================================
2 * Assembly of local cellwise system into a cs_matrix_t structure through
3 * the cs_matrix_assembler_t and its related structures
4 *============================================================================*/
5
6 /*
7 This file is part of Code_Saturne, a general-purpose CFD tool.
8
9 Copyright (C) 1998-2021 EDF S.A.
10
11 This program is free software; you can redistribute it and/or modify it under
12 the terms of the GNU General Public License as published by the Free Software
13 Foundation; either version 2 of the License, or (at your option) any later
14 version.
15
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
19 details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
23 Street, Fifth Floor, Boston, MA 02110-1301, USA.
24 */
25
26 /*----------------------------------------------------------------------------*/
27
28 #include "cs_defs.h"
29
30 /*----------------------------------------------------------------------------
31 * Standard C library headers
32 *----------------------------------------------------------------------------*/
33
34 #include <assert.h>
35
36 /*----------------------------------------------------------------------------
37 * Local headers
38 *----------------------------------------------------------------------------*/
39
40 #include "bft_error.h"
41 #include "bft_mem.h"
42
43 #include "cs_defs.h"
44 #include "cs_log.h"
45 #include "cs_matrix_priv.h"
46 #include "cs_matrix_assembler_priv.h"
47 #include "cs_matrix_assembler.h"
48 #include "cs_param_cdo.h"
49 #include "cs_parall.h"
50 #include "cs_sort.h"
51 #include "cs_timer.h"
52
53 #if defined(DEBUG) && !defined(NDEBUG)
54 #include "cs_dbg.h"
55 #endif
56
57 /*----------------------------------------------------------------------------
58 * Header for the current file
59 *----------------------------------------------------------------------------*/
60
61 #include "cs_equation_assemble.h"
62
63 /*----------------------------------------------------------------------------*/
64
65 BEGIN_C_DECLS
66
67 /*=============================================================================
68 * Additional doxygen documentation
69 *============================================================================*/
70
71 /*!
72 \file cs_equation_assemble.c
73
74 \brief Assembly of local cellwise system into a cs_matrix_t structure through
75 the cs_matrix_assembler_t and its related structures.
76
77 This function are specific to CDO schemes. Thus one can assume a more specific
78 behavior in order to get a more optimzed version of the standard assembly
79 process.
80
81 */
82
83 /*! \cond DOXYGEN_SHOULD_SKIP_THIS */
84
85 /*============================================================================
86 * Type definitions and macros
87 *============================================================================*/
88
89 #define CS_EQUATION_ASSEMBLE_DBG 0 /* Debug level */
90
91 /*============================================================================
92 * Local private variables
93 *============================================================================*/
94
95 /* Store the matrix structure and its assembler structures for each family
96 of space discretizations */
97 static cs_matrix_assembler_t **cs_equation_assemble_ma = NULL;
98 static cs_matrix_structure_t **cs_equation_assemble_ms = NULL;
99 static cs_equation_assemble_t **cs_equation_assemble = NULL;
100
101 static cs_timer_counter_t cs_equation_ms_time;
102
103 /*=============================================================================
104 * Local function pointer definitions
105 *============================================================================*/
106
107 /*=============================================================================
108 * Local structure definitions
109 *============================================================================*/
110
111 typedef struct {
112
113 /* Row numberings */
114 cs_gnum_t g_id; /* Global row numbering */
115 cs_lnum_t l_id; /* Local range set numbering */
116 int i; /* Cellwise system numbering */
117
118 /* Column-related members */
119 int n_cols; /* Number of columns (cellwise system) */
120 cs_gnum_t *col_g_id; /* Global numbering of columns */
121 int *col_idx; /* Array to build and to give as parameter
122 for add_vals() function */
123
124 const cs_real_t *val; /* Row values */
125 cs_real_t *expval; /* Expanded row values */
126
127 } cs_equation_assemble_row_t;
128
129 struct _cs_equation_assemble_t {
130
131 int ddim; /* Number of real values related to each diagonal
132 entry */
133 int edim; /* Number of real values related to each
134 extra-diagonal entry */
135
136 cs_equation_assemble_row_t *row;
137 };
138
139 /*============================================================================
140 * Static inline private function prototypes
141 *============================================================================*/
142
143 /*----------------------------------------------------------------------------*/
144 /*!
145 * \brief Choose which function will be used to perform the matrix assembly
146 * Case of scalar-valued matrices.
147 *
148 * \return a pointer to a function
149 */
150 /*----------------------------------------------------------------------------*/
151
152 static inline cs_equation_assembly_t *
_set_scalar_assembly_func(void)153 _set_scalar_assembly_func(void)
154 {
155 #if defined(HAVE_MPI)
156
157 if (cs_glob_n_ranks > 1) { /* Parallel */
158
159 if (cs_glob_n_threads < 2) /* Without OpenMP */
160 return cs_equation_assemble_matrix_mpis;
161 else /* With OpenMP */
162 return cs_equation_assemble_matrix_mpit;
163
164 }
165
166 #endif /* defined(HAVE_MPI) */
167
168 if (cs_glob_n_ranks <= 1) { /* Sequential */
169
170 if (cs_glob_n_threads < 2) /* Without OpenMP */
171 return cs_equation_assemble_matrix_seqs;
172 else /* With OpenMP */
173 return cs_equation_assemble_matrix_seqt;
174
175 }
176
177 return NULL; /* Case not handled */
178 }
179
180 /*----------------------------------------------------------------------------*/
181 /*!
182 * \brief Choose which function will be used to perform the matrix assembly
183 * Case of block 3x3 matrices.
184 *
185 * \return a pointer to a function
186 */
187 /*----------------------------------------------------------------------------*/
188
189 static inline cs_equation_assembly_t *
_set_block33_assembly_func(void)190 _set_block33_assembly_func(void)
191 {
192 #if defined(HAVE_MPI)
193
194 if (cs_glob_n_ranks > 1) { /* Parallel */
195
196 if (cs_glob_n_threads < 2) /* Without OpenMP */
197 return cs_equation_assemble_eblock33_matrix_mpis;
198 else /* With OpenMP */
199 return cs_equation_assemble_eblock33_matrix_mpit;
200
201 }
202
203 #endif /* defined(HAVE_MPI) */
204
205 if (cs_glob_n_ranks <= 1) { /* Sequential */
206
207 if (cs_glob_n_threads < 2) /* Without OpenMP */
208 return cs_equation_assemble_eblock33_matrix_seqs;
209 else /* With OpenMP */
210 return cs_equation_assemble_eblock33_matrix_seqt;
211
212 }
213
214 return NULL; /* Case not handled */
215 }
216
217 /*----------------------------------------------------------------------------*/
218 /*!
219 * \brief Choose which function will be used to perform the matrix assembly
220 * Case of block NxN matrices.
221 *
222 * \return a pointer to a function
223 */
224 /*----------------------------------------------------------------------------*/
225
226 static inline cs_equation_assembly_t *
_set_block_assembly_func(void)227 _set_block_assembly_func(void)
228 {
229 #if defined(HAVE_MPI)
230
231 if (cs_glob_n_ranks > 1) { /* Parallel */
232
233 if (cs_glob_n_threads < 2) /* Without OpenMP */
234 return cs_equation_assemble_eblock_matrix_mpis;
235 else /* With OpenMP */
236 return cs_equation_assemble_eblock_matrix_mpit;
237
238 }
239
240 #endif /* defined(HAVE_MPI) */
241
242 if (cs_glob_n_ranks <= 1) { /* Sequential */
243
244 if (cs_glob_n_threads < 2) /* Without OpenMP */
245 return cs_equation_assemble_eblock_matrix_seqs;
246 else /* With OpenMP */
247 return cs_equation_assemble_eblock_matrix_seqt;
248
249 }
250
251 return NULL; /* Case not handled */
252 }
253
254 /*----------------------------------------------------------------------------*/
255 /*!
256 * \brief Binary search for a given local id in a given array of
257 * ordered local ids, when the id might not be present
258 *
259 * We assume the id is present in the array.
260 *
261 * \param[in] start_id begin search with this id
262 * \param[in] l_id_array size array_size
263 * \param[in] l_id local id to search for
264 * \param[in] l_id_array ordered unique local ids array
265 *
266 * \return index of l_id in l_id_array, or -1 if not found
267 */
268 /*----------------------------------------------------------------------------*/
269
270 static inline int
_l_binary_search(int start_id,int l_id_array_size,cs_lnum_t l_id,const cs_lnum_t l_id_array[])271 _l_binary_search(int start_id,
272 int l_id_array_size,
273 cs_lnum_t l_id,
274 const cs_lnum_t l_id_array[])
275 {
276 assert(start_id > -1);
277 int end_id = l_id_array_size - 1;
278
279 while (start_id <= end_id) {
280
281 const int mid_id = (start_id + end_id)/2;
282 const cs_lnum_t test_val = l_id_array[mid_id];
283
284 if (test_val < l_id)
285 start_id = mid_id + 1;
286 else if (test_val > l_id)
287 end_id = mid_id - 1;
288 else
289 return mid_id;
290
291 }
292
293 return -1; /* Not found */
294 }
295
296 /*----------------------------------------------------------------------------*/
297 /*!
298 * \brief Binary search for a given global id in a given array of
299 * ordered global ids
300 *
301 * We assume the id is present in the array.
302 *
303 * \param[in] g_id_array size array_size
304 * \param[in] g_id global id to search for
305 * \param[in] g_id_array ordered unique global ids array
306 *
307 * \return index of g_id in g_id_array or -1 if not found.
308 */
309 /*----------------------------------------------------------------------------*/
310
311 static inline int
_g_binary_search(int g_id_array_size,cs_gnum_t g_id,const cs_gnum_t g_id_array[])312 _g_binary_search(int g_id_array_size,
313 cs_gnum_t g_id,
314 const cs_gnum_t g_id_array[])
315 {
316 int start_id = 0;
317 int end_id = g_id_array_size - 1;
318
319 while (start_id <= end_id) {
320
321 const int mid_id = (end_id + start_id) / 2;
322 const cs_gnum_t mid_val = g_id_array[mid_id];
323
324 if (mid_val < g_id)
325 start_id = mid_id + 1;
326 else if (mid_val > g_id)
327 end_id = mid_id - 1;
328 else
329 return mid_id;
330
331 }
332
333 return -1; /* Not found */
334 }
335
336 /*----------------------------------------------------------------------------*/
337 /*!
338 * \brief Function pointer for adding values to a MSR matrix.
339 *
340 * Specific case:
341 * CDO schemes with no openMP and scalar-valued quantities
342 *
343 * \warning The matrix pointer must point to valid data when the selection
344 * function is called, so the life cycle of the data pointed to
345 * should be at least as long as that of the assembler values
346 * structure.
347 *
348 * \remark Note that we pass column indexes (not ids) here; as the
349 * caller is already assumed to have identified the index
350 * matching a given column id.
351 *
352 * \param[in] row pointer to a cs_equation_assemble_row_t type
353 * \param[in, out] matrix_p untyped pointer to matrix description structure
354 */
355 /*----------------------------------------------------------------------------*/
356
357 inline static void
_add_scal_values_single(const cs_equation_assemble_row_t * row,void * matrix_p)358 _add_scal_values_single(const cs_equation_assemble_row_t *row,
359 void *matrix_p)
360 {
361 assert(row->l_id > -1);
362
363 cs_matrix_t *matrix = (cs_matrix_t *)matrix_p;
364 cs_matrix_coeff_msr_t *mc = matrix->coeffs;
365
366 const cs_matrix_struct_csr_t *ms = matrix->structure;
367
368 /* Update the diagonal value */
369 mc->_d_val[row->l_id] += row->val[row->i];
370
371 /* Update the extra-diagonal values */
372 cs_real_t *xvals = mc->_x_val + ms->row_index[row->l_id];
373 for (int j = 0; j < row->i; j++) /* Lower part */
374 xvals[row->col_idx[j]] += row->val[j];
375 for (int j = row->i+1; j < row->n_cols; j++) /* Upper part */
376 xvals[row->col_idx[j]] += row->val[j];
377 }
378
379 /*----------------------------------------------------------------------------*/
380 /*!
381 * \brief Function pointer for adding values to a MSR matrix.
382 *
383 * Specific case:
384 * CDO schemes with openMP atomic section and scalar-valued quantities
385 *
386 * \warning The matrix pointer must point to valid data when the selection
387 * function is called, so the life cycle of the data pointed to
388 * should be at least as long as that of the assembler values
389 * structure.
390 *
391 * \remark Note that we pass column indexes (not ids) here; as the
392 * caller is already assumed to have identified the index
393 * matching a given column id.
394 *
395 * \param[in] row pointer to a cs_equation_assemble_row_t type
396 * \param[in, out] matrix_p untyped pointer to matrix description structure
397 */
398 /*----------------------------------------------------------------------------*/
399
400 inline static void
_add_scal_values_atomic(const cs_equation_assemble_row_t * row,void * matrix_p)401 _add_scal_values_atomic(const cs_equation_assemble_row_t *row,
402 void *matrix_p)
403 {
404 assert(row->l_id > -1);
405
406 cs_matrix_t *matrix = (cs_matrix_t *)matrix_p;
407 cs_matrix_coeff_msr_t *mc = matrix->coeffs;
408
409 const cs_matrix_struct_csr_t *ms = matrix->structure;
410
411 /* Update the diagonal value */
412 # pragma omp atomic
413 mc->_d_val[row->l_id] += row->val[row->i];
414
415 /* Update the extra-diagonal values */
416 cs_real_t *xvals = mc->_x_val + ms->row_index[row->l_id];
417 for (int j = 0; j < row->n_cols; j++) {
418 if (j != row->i) {
419 # pragma omp atomic
420 xvals[row->col_idx[j]] += row->val[j];
421 }
422 }
423 }
424
425 /*----------------------------------------------------------------------------*/
426 /*!
427 * \brief Function pointer for adding values to a MSR matrix.
428 *
429 * Specific case:
430 * CDO schemes with openMP critical section and scalar-valued quantities
431 *
432 * \warning The matrix pointer must point to valid data when the selection
433 * function is called, so the life cycle of the data pointed to
434 * should be at least as long as that of the assembler values
435 * structure.
436 *
437 * \remark Note that we pass column indexes (not ids) here; as the
438 * caller is already assumed to have identified the index
439 * matching a given column id.
440 *
441 * \param[in] row pointer to a cs_equation_assemble_row_t type
442 * \param[in, out] matrix_p untyped pointer to matrix description structure
443 */
444 /*----------------------------------------------------------------------------*/
445
446 inline static void
_add_scal_values_critical(const cs_equation_assemble_row_t * row,void * matrix_p)447 _add_scal_values_critical(const cs_equation_assemble_row_t *row,
448 void *matrix_p)
449 {
450 assert(row->l_id > -1);
451
452 cs_matrix_t *matrix = (cs_matrix_t *)matrix_p;
453 cs_matrix_coeff_msr_t *mc = matrix->coeffs;
454
455 const cs_matrix_struct_csr_t *ms = matrix->structure;
456
457 /* Update the diagonal value */
458 # pragma omp critical
459 {
460 mc->_d_val[row->l_id] += row->val[row->i];
461
462 /* Update the extra-diagonal values */
463 cs_real_t *xvals = mc->_x_val + ms->row_index[row->l_id];
464 for (int j = 0; j < row->n_cols; j++)
465 if (j != row->i)
466 xvals[row->col_idx[j]] += row->val[j];
467 }
468 }
469
470 /*----------------------------------------------------------------------------*/
471 /*!
472 * \brief Add values to a matrix assembler values structure using global
473 * row and column ids.
474 * Case where the row belong to the local rank and all its colums too.
475 *
476 * See \ref cs_matrix_assembler_values_add_g which performs the same operations
477 * In the specific case of CDO system, one assumes predefined choices in
478 * order to get a more optimized version of this function
479 *
480 * \param[in, out] ma pointer to matrix assembler values structure
481 * \param[in, out] row pointer to a cs_equation_assemble_row_t structure
482 */
483 /*----------------------------------------------------------------------------*/
484
485 inline static void
_assemble_row_scal_l(const cs_matrix_assembler_t * ma,cs_equation_assemble_row_t * row)486 _assemble_row_scal_l(const cs_matrix_assembler_t *ma,
487 cs_equation_assemble_row_t *row)
488 {
489 const cs_lnum_t l_r_id = row->l_id; /* g_r_id - ma->l_range[0]; */
490 const cs_lnum_t l_start = ma->r_idx[l_r_id], l_end = ma->r_idx[l_r_id+1];
491 const int n_l_cols = l_end - l_start;
492 const cs_lnum_t *col_ids = ma->c_id + l_start;
493
494 /* Loop on columns to fill col_idx for extra-diag entries
495 * Diagonal is treated separately */
496 for (int j = 0; j < row->i; j++) { /* Lower part */
497 row->col_idx[j] = _l_binary_search(0,
498 n_l_cols,
499 /* l_c_id */ row->col_g_id[j]-ma->l_range[0],
500 col_ids);
501 assert(row->col_idx[j] > -1);
502 }
503 for (int j = row->i + 1; j < row->n_cols; j++) { /* Upper part */
504 row->col_idx[j] = _l_binary_search(0,
505 n_l_cols,
506 /* l_c_id */ row->col_g_id[j]-ma->l_range[0],
507 col_ids);
508 assert(row->col_idx[j] > -1);
509 }
510 }
511
512 #if defined(HAVE_MPI)
513
514 /*----------------------------------------------------------------------------*/
515 /*!
516 * \brief Add values to a matrix assembler values structure using global
517 * row and column ids.
518 * Case where the row belong to the local rank but a part of the columns
519 * belong to a distant rank. Hence the naming *_ld
520 *
521 * See \ref cs_matrix_assembler_values_add_g which performs the same operations
522 * In the specific case of CDO system, one assumes predefined choices in
523 * order to get a more optimized version of this function
524 *
525 * \param[in, out] ma pointer to matrix assembler values structure
526 * \param[in, out] row pointer to a cs_equation_assemble_row_t structure
527 */
528 /*----------------------------------------------------------------------------*/
529
530 inline static void
_assemble_row_scal_ld(const cs_matrix_assembler_t * ma,cs_equation_assemble_row_t * row)531 _assemble_row_scal_ld(const cs_matrix_assembler_t *ma,
532 cs_equation_assemble_row_t *row)
533 {
534 assert(ma->d_r_idx != NULL); /* local-id-based function, need to adapt */
535
536 const cs_lnum_t l_r_id = row->l_id; /* g_r_id - ma->l_range[0]; */
537 const cs_lnum_t l_start = ma->r_idx[l_r_id], l_end = ma->r_idx[l_r_id+1];
538 const cs_lnum_t d_start = ma->d_r_idx[l_r_id], d_end = ma->d_r_idx[l_r_id+1];
539 const int n_d_cols = d_end - d_start;
540 const int n_l_cols = l_end - l_start - n_d_cols;
541
542 /* Loop on columns to fill col_idx for extra-diag entries */
543 for (int j = 0; j < row->i; j++) {
544
545 const cs_gnum_t g_c_id = row->col_g_id[j];
546
547 if (g_c_id >= ma->l_range[0] && g_c_id < ma->l_range[1]) { /* Local part */
548
549 row->col_idx[j] = _l_binary_search(0,
550 n_l_cols,
551 g_c_id - ma->l_range[0], /* l_c_id */
552 ma->c_id + l_start);
553 assert(row->col_idx[j] > -1);
554
555 }
556 else { /* Distant part */
557
558 /* column ids start and end of local row, so add n_l_cols */
559 row->col_idx[j] = n_l_cols + _g_binary_search(n_d_cols,
560 g_c_id,
561 ma->d_g_c_id + d_start);
562
563 }
564
565 } /* Loop on columns of the row */
566
567 for (int j = row->i + 1; j < row->n_cols; j++) {
568
569 const cs_gnum_t g_c_id = row->col_g_id[j];
570
571 if (g_c_id >= ma->l_range[0] && g_c_id < ma->l_range[1]) { /* Local part */
572
573 row->col_idx[j] = _l_binary_search(0,
574 n_l_cols,
575 g_c_id-ma->l_range[0], /* l_c_id */
576 ma->c_id + l_start);
577 assert(row->col_idx[j] > -1);
578
579 }
580 else { /* Distant part */
581
582 /* column ids start and end of local row, so add n_l_cols */
583 row->col_idx[j] = n_l_cols + _g_binary_search(n_d_cols,
584 g_c_id,
585 ma->d_g_c_id + d_start);
586
587 }
588
589 } /* Loop on columns of the row */
590
591 }
592
593 /*----------------------------------------------------------------------------*/
594 /*!
595 * \brief Add values to a matrix assembler values structure using global
596 * row and column ids.
597 * Case where the row does not belong to the local rank.
598 *
599 * See \ref cs_matrix_assembler_values_add_g which performs the same operations
600 * In the specific case of CDO system, one assumes predefined choices in
601 * order to get a more optimized version of this function
602 *
603 * \param[in, out] mav pointer to matrix assembler values structure
604 * \param[in] ma pointer to matrix assembler values structure
605 * \param[in] row pointer to a cs_equation_assemble_row_t structure
606 */
607 /*----------------------------------------------------------------------------*/
608
609 inline static void
_assemble_row_scal_dt(cs_matrix_assembler_values_t * mav,const cs_matrix_assembler_t * ma,const cs_equation_assemble_row_t * row)610 _assemble_row_scal_dt(cs_matrix_assembler_values_t *mav,
611 const cs_matrix_assembler_t *ma,
612 const cs_equation_assemble_row_t *row)
613 {
614 /* Case where coefficient is handled by other rank. No need to call
615 add_values() function in this case */
616
617 assert(row->g_id < ma->l_range[0] || row->g_id >= ma->l_range[1]);
618
619 const cs_lnum_t e_r_id = _g_binary_search(ma->coeff_send_n_rows,
620 row->g_id,
621 ma->coeff_send_row_g_id);
622 const cs_lnum_t r_start = ma->coeff_send_index[e_r_id];
623 const int n_e_rows = ma->coeff_send_index[e_r_id+1] - r_start;
624 const cs_gnum_t *coeff_send_g_id = ma->coeff_send_col_g_id + r_start;
625
626 /* Diagonal term */
627 const cs_lnum_t e_diag_id = r_start + _g_binary_search(n_e_rows,
628 row->g_id,
629 coeff_send_g_id);
630
631 /* Now add values to send coefficients */
632 # pragma omp atomic
633 mav->coeff_send[e_diag_id] += row->val[row->i];
634
635 /* Loop on extra-diagonal entries */
636 for (int j = 0; j < row->i; j++) { /* Lower-part */
637
638 const cs_lnum_t e_id = r_start + _g_binary_search(n_e_rows,
639 row->col_g_id[j],
640 coeff_send_g_id);
641
642 /* Now add values to send coefficients */
643 # pragma omp atomic
644 mav->coeff_send[e_id] += row->val[j];
645
646 }
647
648 for (int j = row->i + 1; j < row->n_cols; j++) { /* Upper-part */
649
650 const cs_lnum_t e_id = r_start + _g_binary_search(n_e_rows,
651 row->col_g_id[j],
652 coeff_send_g_id);
653
654 /* Now add values to send coefficients */
655 # pragma omp atomic
656 mav->coeff_send[e_id] += row->val[j];
657
658 }
659 }
660
661 /*----------------------------------------------------------------------------*/
662 /*!
663 * \brief Add values to a matrix assembler values structure using global
664 * row and column ids.
665 * Case where the row does not belong to the local rank. No openMP.
666 *
667 * See \ref cs_matrix_assembler_values_add_g which performs the same operations
668 * In the specific case of CDO system, one assumes predefined choices in
669 * order to get a more optimized version of this function
670 *
671 * \param[in, out] mav pointer to matrix assembler values structure
672 * \param[in] ma pointer to matrix assembler values structure
673 * \param[in] row pointer to a cs_equation_assemble_row_t structure
674 */
675 /*----------------------------------------------------------------------------*/
676
677 inline static void
_assemble_row_scal_ds(cs_matrix_assembler_values_t * mav,const cs_matrix_assembler_t * ma,const cs_equation_assemble_row_t * row)678 _assemble_row_scal_ds(cs_matrix_assembler_values_t *mav,
679 const cs_matrix_assembler_t *ma,
680 const cs_equation_assemble_row_t *row)
681 {
682 /* Case where coefficient is handled by other rank. No need to call
683 add_values() function in this case */
684
685 assert(row->g_id < ma->l_range[0] || row->g_id >= ma->l_range[1]);
686
687 const cs_lnum_t e_r_id = _g_binary_search(ma->coeff_send_n_rows,
688 row->g_id,
689 ma->coeff_send_row_g_id);
690 const cs_lnum_t r_start = ma->coeff_send_index[e_r_id];
691 const int n_e_rows = ma->coeff_send_index[e_r_id+1] - r_start;
692 const cs_gnum_t *coeff_send_g_id = ma->coeff_send_col_g_id + r_start;
693
694 /* Diagonal term */
695 const cs_lnum_t e_diag_id = r_start + _g_binary_search(n_e_rows,
696 row->g_id,
697 coeff_send_g_id);
698
699 /* Now add values to send coefficients */
700 mav->coeff_send[e_diag_id] += row->val[row->i];
701
702 /* Loop on extra-diagonal entries */
703 for (int j = 0; j < row->i; j++) { /* Lower-part */
704
705 const cs_lnum_t e_id = r_start + _g_binary_search(n_e_rows,
706 row->col_g_id[j],
707 coeff_send_g_id);
708
709 /* Now add values to send coefficients */
710 mav->coeff_send[e_id] += row->val[j];
711
712 }
713
714 for (int j = row->i + 1; j < row->n_cols; j++) { /* Upper-part */
715
716 const cs_lnum_t e_id = r_start + _g_binary_search(n_e_rows,
717 row->col_g_id[j],
718 coeff_send_g_id);
719
720 /* Now add values to send coefficients */
721 mav->coeff_send[e_id] += row->val[j];
722
723 }
724 }
725
726 #endif /* defined(HAVE_MPI) */
727
728 /*============================================================================
729 * Private function definitions
730 *============================================================================*/
731
732 /*----------------------------------------------------------------------------*/
733 /*!
734 * \brief Allocate and initialize a cs_equation_assemble_t structure
735 *
736 * \param[in] max_ddim max. dim of the diagonal entries
737 * \param[in] max_edim max. dim of the extra-diagonal entries
738 * \param[in] n_max_cw_dofs max. number of DoF to be handled
739 *
740 * \return a pointer to a new allocated cs_equation_assemble_t structure
741 */
742 /*----------------------------------------------------------------------------*/
743
744 static cs_equation_assemble_t *
_init_equation_assembler_struct(int max_ddim,int max_edim,int n_max_cw_dofs)745 _init_equation_assembler_struct(int max_ddim,
746 int max_edim,
747 int n_max_cw_dofs)
748 {
749 cs_equation_assemble_t *eqa = NULL;
750
751 BFT_MALLOC(eqa, 1, cs_equation_assemble_t);
752
753 eqa->ddim = max_ddim;
754 eqa->edim = max_edim;
755
756 BFT_MALLOC(eqa->row, 1, cs_equation_assemble_row_t);
757 if (max_ddim < 2) {
758 BFT_MALLOC(eqa->row->col_g_id, n_max_cw_dofs, cs_gnum_t);
759 BFT_MALLOC(eqa->row->col_idx, n_max_cw_dofs, int);
760 }
761 else {
762 n_max_cw_dofs *= max_ddim; /* Temporary (until the global matrix is not
763 defined by block) */
764
765 BFT_MALLOC(eqa->row->col_g_id, n_max_cw_dofs, cs_gnum_t);
766 BFT_MALLOC(eqa->row->col_idx, n_max_cw_dofs, int);
767 BFT_MALLOC(eqa->row->expval, max_ddim*n_max_cw_dofs, cs_real_t);
768 }
769
770 return eqa;
771 }
772
773 /*----------------------------------------------------------------------------*/
774 /*!
775 * \brief Free a cs_equation_assemble_t structure
776 *
777 * \param[in, out] p_eqa pointer to a structure pointer to be freed
778 */
779 /*----------------------------------------------------------------------------*/
780
781 static void
_free_equation_assembler_struct(cs_equation_assemble_t ** p_eqa)782 _free_equation_assembler_struct(cs_equation_assemble_t **p_eqa)
783 {
784 if (*p_eqa == NULL)
785 return;
786
787 cs_equation_assemble_t *eqa = *p_eqa;
788
789 if (eqa->ddim > 1)
790 BFT_FREE(eqa->row->expval);
791 BFT_FREE(eqa->row->col_g_id);
792 BFT_FREE(eqa->row->col_idx);
793 BFT_FREE(eqa->row);
794
795 BFT_FREE(eqa);
796 *p_eqa = NULL;
797 }
798
799 /*----------------------------------------------------------------------------*/
800 /*!
801 * \brief Allocate and define a cs_matrix_assembler_t structure
802 *
803 * \param[in] n_elts number of elements
804 * \param[in] n_dofbyx number of DoFs by element
805 * \param[in] x2x pointer to a cs_adjacency_t structure
806 * \param[in] rs pointer to a range set or NULL if sequential
807 *
808 * \return a pointer to a new allocated cs_matrix_assembler_t structure
809 */
810 /*----------------------------------------------------------------------------*/
811
812 static cs_matrix_assembler_t *
_build_matrix_assembler(cs_lnum_t n_elts,int n_dofbyx,const cs_adjacency_t * x2x,const cs_range_set_t * rs)813 _build_matrix_assembler(cs_lnum_t n_elts,
814 int n_dofbyx,
815 const cs_adjacency_t *x2x,
816 const cs_range_set_t *rs)
817 {
818 cs_gnum_t *grows = NULL, *gcols = NULL;
819
820 /* The second paramter is set to "true" meaning that the diagonal is stored
821 separately --> MSR storage */
822 cs_matrix_assembler_t *ma = cs_matrix_assembler_create(rs->l_range, true);
823
824 /* First loop to count max size of the buffer */
825 cs_lnum_t max_size = 0;
826 for (cs_lnum_t id = 0; id < n_elts; id++)
827 max_size = CS_MAX(max_size, x2x->idx[id+1] - x2x->idx[id]);
828
829 /* We increment max_size to take into account the diagonal entry */
830 int buf_size = n_dofbyx * n_dofbyx * (max_size + 1);
831 BFT_MALLOC(grows, buf_size, cs_gnum_t);
832 BFT_MALLOC(gcols, buf_size, cs_gnum_t);
833
834 if (n_dofbyx == 1) { /* Simplified version */
835
836 for (cs_lnum_t row_id = 0; row_id < n_elts; row_id++) {
837
838 const cs_gnum_t grow_id = rs->g_id[row_id];
839 const cs_lnum_t start = x2x->idx[row_id];
840 const cs_lnum_t end = x2x->idx[row_id+1];
841
842 /* Diagonal term is excluded in this connectivity. Add it "manually" */
843 grows[0] = grow_id, gcols[0] = grow_id;
844
845 /* Extra diagonal couples */
846 for (cs_lnum_t j = start, i = 1; j < end; j++, i++) {
847 grows[i] = grow_id;
848 gcols[i] = rs->g_id[x2x->ids[j]];
849 }
850
851 cs_matrix_assembler_add_g_ids(ma, end - start + 1, grows, gcols);
852
853 } /* Loop on entities */
854
855 }
856 else {
857
858 for (cs_lnum_t row_id = 0; row_id < n_elts; row_id++) {
859
860 const cs_lnum_t start = x2x->idx[row_id];
861 const cs_lnum_t end = x2x->idx[row_id+1];
862 const int n_entries = (end - start + 1) * n_dofbyx * n_dofbyx;
863 const cs_gnum_t *grow_ids = rs->g_id + row_id*n_dofbyx;
864
865 int shift = 0;
866
867 /* Diagonal term is excluded in this connectivity. Add it "manually" */
868 for (int dof_i = 0; dof_i < n_dofbyx; dof_i++) {
869 const cs_gnum_t grow_id = grow_ids[dof_i];
870 for (int dof_j = 0; dof_j < n_dofbyx; dof_j++) {
871 grows[shift] = grow_id;
872 gcols[shift] = grow_ids[dof_j];
873 shift++;
874 }
875 }
876
877 /* Extra diagonal couples */
878 for (cs_lnum_t j = start; j < end; j++) {
879
880 const cs_lnum_t col_id = x2x->ids[j];
881 const cs_gnum_t *gcol_ids = rs->g_id + col_id*n_dofbyx;
882
883 for (int dof_i = 0; dof_i < n_dofbyx; dof_i++) {
884 const cs_gnum_t grow_id = grow_ids[dof_i];
885 for (int dof_j = 0; dof_j < n_dofbyx; dof_j++) {
886 grows[shift] = grow_id;
887 gcols[shift] = gcol_ids[dof_j];
888 shift++;
889 }
890 }
891
892 } /* Loop on number of DoFs by entity */
893
894 assert(shift == n_entries);
895 cs_matrix_assembler_add_g_ids(ma, n_entries, grows, gcols);
896
897 } /* Loop on entities */
898
899 }
900
901 /* Now compute structure */
902 cs_matrix_assembler_compute(ma);
903
904 /* Free temporary buffers */
905 BFT_FREE(grows);
906 BFT_FREE(gcols);
907
908 return ma;
909 }
910
911 /*! (DOXYGEN_SHOULD_SKIP_THIS) \endcond */
912
913 /*============================================================================
914 * Public function definitions
915 *============================================================================*/
916
917 /*----------------------------------------------------------------------------*/
918 /*!
919 * \brief Retrieve the pointer to a requested \ref cs_matrix_structure_t
920 * structure
921 *
922 * \param[in] flag_id id in the array of matrix structures
923 *
924 * \return a pointer to a cs_matrix_structure_t
925 */
926 /*----------------------------------------------------------------------------*/
927
928 cs_matrix_structure_t *
cs_equation_get_matrix_structure(int flag)929 cs_equation_get_matrix_structure(int flag)
930 {
931 if (cs_equation_assemble_ms == NULL || flag < 0)
932 return NULL;
933
934 if (flag < CS_CDO_CONNECT_N_CASES)
935 return cs_equation_assemble_ms[flag];
936 else
937 return NULL;
938 }
939
940 /*----------------------------------------------------------------------------*/
941 /*!
942 * \brief Get a pointer to a cs_equation_assemble_t structure related
943 * to a given thread
944 *
945 * \param[in] t_id id in the array of pointer
946 *
947 * \return a pointer to a cs_equation_assemble_t structure
948 */
949 /*----------------------------------------------------------------------------*/
950
951 cs_equation_assemble_t *
cs_equation_assemble_get(int t_id)952 cs_equation_assemble_get(int t_id)
953 {
954 if (t_id < 0 || t_id >= cs_glob_n_threads)
955 return NULL;
956
957 return cs_equation_assemble[t_id];
958 }
959
960 /*----------------------------------------------------------------------------*/
961 /*!
962 * \brief Allocate and initialize matrix-related structures according to
963 * the type of discretization used for this simulation
964 *
965 * \param[in] connect pointer to a cs_cdo_connect_t structure
966 * \param[in] time_step pointer to a time step structure
967 * \param[in] eb_flag metadata for Edge-based schemes
968 * \param[in] fb_flag metadata for Face-based schemes
969 * \param[in] vb_flag metadata for Vertex-based schemes
970 * \param[in] vcb_flag metadata for Vertex+Cell-basde schemes
971 * \param[in] hho_flag metadata for HHO schemes
972 */
973 /*----------------------------------------------------------------------------*/
974
975 void
cs_equation_assemble_init(const cs_cdo_connect_t * connect,cs_flag_t eb_flag,cs_flag_t fb_flag,cs_flag_t vb_flag,cs_flag_t vcb_flag,cs_flag_t hho_flag)976 cs_equation_assemble_init(const cs_cdo_connect_t *connect,
977 cs_flag_t eb_flag,
978 cs_flag_t fb_flag,
979 cs_flag_t vb_flag,
980 cs_flag_t vcb_flag,
981 cs_flag_t hho_flag)
982 {
983 assert(connect != NULL); /* Sanity check */
984
985 cs_timer_t t0, t1;
986 CS_TIMER_COUNTER_INIT(cs_equation_ms_time);
987
988 /* Two types of matrix assembler are considered:
989 * - The one related to matrix based on vertices
990 * - The one related to matrix based on faces
991 */
992 BFT_MALLOC(cs_equation_assemble_ma,
993 CS_CDO_CONNECT_N_CASES, cs_matrix_assembler_t *);
994 for (int i = 0; i < CS_CDO_CONNECT_N_CASES; i++)
995 cs_equation_assemble_ma[i] = NULL;
996
997 BFT_MALLOC(cs_equation_assemble_ms,
998 CS_CDO_CONNECT_N_CASES, cs_matrix_structure_t *);
999 for (int i = 0; i < CS_CDO_CONNECT_N_CASES; i++)
1000 cs_equation_assemble_ms[i] = NULL;
1001
1002 const cs_lnum_t n_faces = connect->n_faces[CS_ALL_FACES];
1003 const cs_lnum_t n_vertices = connect->n_vertices;
1004 const cs_lnum_t n_edges = connect->n_edges;
1005
1006 /* Allocate shared buffer and initialize shared structures
1007 Number max of DoFs at the cell level: n_max_cw_dofs
1008 Greatest dimension of the diagonal block: max_ddim
1009 Greatest dimension of the extra-diagonal block: max_edim
1010 */
1011 int n_max_cw_dofs = 0, max_ddim = 1, max_edim = 1;
1012
1013 /* Allocate and initialize matrix assembler and matrix structures */
1014 if (vb_flag > 0 || vcb_flag > 0) {
1015
1016 const cs_adjacency_t *v2v = connect->v2v;
1017
1018 assert(v2v != NULL);
1019 n_max_cw_dofs = CS_MAX(n_max_cw_dofs, connect->n_max_vbyc);
1020
1021 if (vb_flag & CS_FLAG_SCHEME_SCALAR || vcb_flag & CS_FLAG_SCHEME_SCALAR) {
1022
1023 t0 = cs_timer_time();
1024
1025 /* Build the matrix structure and the matrix assembler structure */
1026 const cs_range_set_t *rs = connect->range_sets[CS_CDO_CONNECT_VTX_SCAL];
1027
1028 cs_matrix_assembler_t *ma = _build_matrix_assembler(n_vertices, 1, v2v,
1029 rs);
1030 cs_matrix_structure_t *ms
1031 = cs_matrix_structure_create_from_assembler(CS_MATRIX_MSR, ma);
1032
1033 cs_equation_assemble_ma[CS_CDO_CONNECT_VTX_SCAL] = ma;
1034 cs_equation_assemble_ms[CS_CDO_CONNECT_VTX_SCAL] = ms;
1035
1036 t1 = cs_timer_time();
1037 cs_timer_counter_add_diff(&cs_equation_ms_time, &t0, &t1);
1038
1039 } /* scalar-valued DoFs */
1040
1041 if (vb_flag & CS_FLAG_SCHEME_VECTOR || vcb_flag & CS_FLAG_SCHEME_VECTOR) {
1042
1043 t0 = cs_timer_time();
1044
1045 const cs_range_set_t *rs = connect->range_sets[CS_CDO_CONNECT_VTX_VECT];
1046
1047 cs_matrix_assembler_t *ma = _build_matrix_assembler(n_vertices, 3, v2v,
1048 rs);
1049 cs_matrix_structure_t *ms
1050 = cs_matrix_structure_create_from_assembler(CS_MATRIX_MSR, ma);
1051
1052 cs_equation_assemble_ma[CS_CDO_CONNECT_VTX_VECT] = ma;
1053 cs_equation_assemble_ms[CS_CDO_CONNECT_VTX_VECT] = ms;
1054
1055 t1 = cs_timer_time();
1056 cs_timer_counter_add_diff(&cs_equation_ms_time, &t0, &t1);
1057
1058 max_ddim = CS_MAX(max_ddim, 3);
1059 max_edim = CS_MAX(max_edim, 3);
1060
1061 } /* vector-valued DoFs */
1062
1063 } /* Vertex-based schemes and related ones */
1064
1065 /* Allocate and initialize matrix assembler and matrix structures */
1066 if (eb_flag > 0) {
1067
1068 const cs_adjacency_t *e2e = connect->e2e;
1069
1070 assert(e2e != NULL);
1071 n_max_cw_dofs = CS_MAX(n_max_cw_dofs, connect->n_max_ebyc);
1072
1073 if (eb_flag & CS_FLAG_SCHEME_SCALAR) {
1074
1075 t0 = cs_timer_time();
1076
1077 /* Build the matrix structure and the matrix assembler structure */
1078 const cs_range_set_t *rs = connect->range_sets[CS_CDO_CONNECT_EDGE_SCAL];
1079
1080 cs_matrix_assembler_t *ma = _build_matrix_assembler(n_edges, 1, e2e, rs);
1081 cs_matrix_structure_t *ms
1082 = cs_matrix_structure_create_from_assembler(CS_MATRIX_MSR, ma);
1083
1084 cs_equation_assemble_ma[CS_CDO_CONNECT_EDGE_SCAL] = ma;
1085 cs_equation_assemble_ms[CS_CDO_CONNECT_EDGE_SCAL] = ms;
1086
1087 t1 = cs_timer_time();
1088 cs_timer_counter_add_diff(&cs_equation_ms_time, &t0, &t1);
1089
1090 } /* scalar-valued DoFs */
1091
1092 } /* Edge-based schemes and related ones */
1093
1094 if (fb_flag > 0 || hho_flag > 0) {
1095
1096 cs_matrix_structure_t *ms0 = NULL, *ms1 = NULL, *ms2 = NULL;
1097 cs_matrix_assembler_t *ma0 = NULL, *ma1 = NULL, *ma2 = NULL;
1098
1099 n_max_cw_dofs = CS_MAX(n_max_cw_dofs, connect->n_max_fbyc);
1100
1101 const cs_adjacency_t *f2f = connect->f2f;
1102
1103 if (cs_flag_test(fb_flag, CS_FLAG_SCHEME_POLY0 | CS_FLAG_SCHEME_SCALAR) ||
1104 cs_flag_test(hho_flag, CS_FLAG_SCHEME_POLY0 | CS_FLAG_SCHEME_SCALAR)) {
1105
1106 t0 = cs_timer_time();
1107
1108 const cs_range_set_t *rs = connect->range_sets[CS_CDO_CONNECT_FACE_SP0];
1109
1110 ma0 = _build_matrix_assembler(n_faces, 1, f2f, rs);
1111 ms0 = cs_matrix_structure_create_from_assembler(CS_MATRIX_MSR, ma0);
1112
1113 cs_equation_assemble_ma[CS_CDO_CONNECT_FACE_SP0] = ma0;
1114 cs_equation_assemble_ms[CS_CDO_CONNECT_FACE_SP0] = ms0;
1115
1116 t1 = cs_timer_time();
1117 cs_timer_counter_add_diff(&cs_equation_ms_time, &t0, &t1);
1118
1119 } /* Scalar-valued CDO-Fb or HHO-P0 */
1120
1121 if (cs_flag_test(fb_flag, CS_FLAG_SCHEME_POLY0 | CS_FLAG_SCHEME_VECTOR) ||
1122 cs_flag_test(hho_flag, CS_FLAG_SCHEME_POLY1 | CS_FLAG_SCHEME_SCALAR) ||
1123 cs_flag_test(hho_flag, CS_FLAG_SCHEME_POLY0 | CS_FLAG_SCHEME_VECTOR)) {
1124
1125 t0 = cs_timer_time();
1126
1127 const cs_range_set_t *rs = connect->range_sets[CS_CDO_CONNECT_FACE_SP1];
1128
1129 ma1 = _build_matrix_assembler(n_faces, CS_N_FACE_DOFS_1ST, f2f, rs);
1130 ms1 = cs_matrix_structure_create_from_assembler(CS_MATRIX_MSR, ma1);
1131
1132 assert((CS_CDO_CONNECT_FACE_SP1 == CS_CDO_CONNECT_FACE_VP0) &&
1133 (CS_CDO_CONNECT_FACE_SP1 == CS_CDO_CONNECT_FACE_VHP0));
1134
1135 cs_equation_assemble_ma[CS_CDO_CONNECT_FACE_SP1] = ma1;
1136 cs_equation_assemble_ms[CS_CDO_CONNECT_FACE_SP1] = ms1;
1137
1138 t1 = cs_timer_time();
1139 cs_timer_counter_add_diff(&cs_equation_ms_time, &t0, &t1);
1140
1141 max_ddim = CS_MAX(max_ddim, 3);
1142 max_edim = CS_MAX(max_edim, 3);
1143
1144 } /* Vector CDO-Fb or HHO-P1 or vector HHO-P0 */
1145
1146 if (cs_flag_test(hho_flag,
1147 CS_FLAG_SCHEME_POLY2 | CS_FLAG_SCHEME_SCALAR)) {
1148
1149 t0 = cs_timer_time();
1150
1151 const cs_range_set_t *rs = connect->range_sets[CS_CDO_CONNECT_FACE_SP2];
1152
1153 ma2 = _build_matrix_assembler(n_faces, CS_N_FACE_DOFS_2ND, f2f, rs);
1154 ms2 = cs_matrix_structure_create_from_assembler(CS_MATRIX_MSR, ma2);
1155
1156 cs_equation_assemble_ma[CS_CDO_CONNECT_FACE_SP2] = ma2;
1157 cs_equation_assemble_ms[CS_CDO_CONNECT_FACE_SP2] = ms2;
1158
1159 t1 = cs_timer_time();
1160 cs_timer_counter_add_diff(&cs_equation_ms_time, &t0, &t1);
1161
1162 max_ddim = CS_MAX(max_ddim, CS_N_FACE_DOFS_2ND);
1163 max_edim = CS_MAX(max_edim, CS_N_FACE_DOFS_2ND);
1164
1165 }
1166
1167 /* For vector equations and HHO */
1168 if (cs_flag_test(hho_flag, CS_FLAG_SCHEME_VECTOR | CS_FLAG_SCHEME_POLY1) ||
1169 cs_flag_test(hho_flag, CS_FLAG_SCHEME_VECTOR | CS_FLAG_SCHEME_POLY2)) {
1170
1171 if (hho_flag & CS_FLAG_SCHEME_POLY1) {
1172
1173 t0 = cs_timer_time();
1174
1175 const cs_range_set_t *rs
1176 = connect->range_sets[CS_CDO_CONNECT_FACE_VHP1];
1177
1178 ma1 = _build_matrix_assembler(n_faces, 3*CS_N_FACE_DOFS_1ST, f2f, rs);
1179 ms1 = cs_matrix_structure_create_from_assembler(CS_MATRIX_MSR, ma1);
1180
1181 cs_equation_assemble_ma[CS_CDO_CONNECT_FACE_VHP1] = ma1;
1182 cs_equation_assemble_ms[CS_CDO_CONNECT_FACE_VHP1] = ms1;
1183
1184 t1 = cs_timer_time();
1185 cs_timer_counter_add_diff(&cs_equation_ms_time, &t0, &t1);
1186
1187 max_ddim = CS_MAX(max_ddim, 3*CS_N_FACE_DOFS_1ST);
1188 max_edim = CS_MAX(max_edim, 3*CS_N_FACE_DOFS_1ST);
1189
1190 }
1191 else if (hho_flag & CS_FLAG_SCHEME_POLY2) {
1192
1193 t0 = cs_timer_time();
1194
1195 const cs_range_set_t *rs
1196 = connect->range_sets[CS_CDO_CONNECT_FACE_VHP2];
1197
1198 ma2 = _build_matrix_assembler(n_faces, 3*CS_N_FACE_DOFS_2ND, f2f, rs);
1199 ms2 = cs_matrix_structure_create_from_assembler(CS_MATRIX_MSR, ma2);
1200
1201 cs_equation_assemble_ma[CS_CDO_CONNECT_FACE_VHP2] = ma2;
1202 cs_equation_assemble_ms[CS_CDO_CONNECT_FACE_VHP2] = ms2;
1203
1204 t1 = cs_timer_time();
1205 cs_timer_counter_add_diff(&cs_equation_ms_time, &t0, &t1);
1206
1207 /* 18 = 3*6 = 3*CS_N_FACE_DOFS_2ND */
1208 max_ddim = CS_MAX(max_ddim, 3*CS_N_FACE_DOFS_2ND);
1209 max_edim = CS_MAX(max_edim, 3*CS_N_FACE_DOFS_2ND);
1210
1211 }
1212
1213 }
1214
1215 } /* Face-based schemes (CDO or HHO) */
1216
1217 /* Common buffers for assemble usage */
1218 const int n_threads = cs_glob_n_threads;
1219 BFT_MALLOC(cs_equation_assemble, n_threads, cs_equation_assemble_t *);
1220 for (int i = 0; i < n_threads; i++)
1221 cs_equation_assemble[i] = NULL;
1222
1223 #if defined(HAVE_OPENMP) /* Determine the default number of OpenMP threads */
1224 #pragma omp parallel
1225 {
1226 /* Each thread allocate its part. This yields a better memory affinity */
1227 int t_id = omp_get_thread_num();
1228 cs_equation_assemble[t_id] = _init_equation_assembler_struct(max_ddim,
1229 max_edim,
1230 n_max_cw_dofs);
1231 }
1232 #else
1233 cs_equation_assemble[0] = _init_equation_assembler_struct(max_ddim,
1234 max_edim,
1235 n_max_cw_dofs);
1236 #endif
1237 }
1238
1239 /*----------------------------------------------------------------------------*/
1240 /*!
1241 * \brief Free matrix-related structures used during the simulation.
1242 * Display overall statistic about the assembly stage for CDO schemes
1243 */
1244 /*----------------------------------------------------------------------------*/
1245
1246 void
cs_equation_assemble_finalize(void)1247 cs_equation_assemble_finalize(void)
1248 {
1249 /* Display profiling/performance information */
1250 cs_log_printf(CS_LOG_PERFORMANCE, " <CDO/Assembly> structure: %5.3e\n",
1251 cs_equation_ms_time.nsec*1e-9);
1252
1253 /* Free common assemble buffers */
1254 #if defined(HAVE_OPENMP) /* Determine the default number of OpenMP threads */
1255 #pragma omp parallel
1256 {
1257 int t_id = omp_get_thread_num();
1258 cs_equation_assemble_t *eqa = cs_equation_assemble[t_id];
1259 _free_equation_assembler_struct(&eqa);
1260 }
1261 #else
1262 cs_equation_assemble_t *eqa = cs_equation_assemble[0];
1263 _free_equation_assembler_struct(&eqa);
1264 #endif
1265 BFT_FREE(cs_equation_assemble);
1266
1267 /* Free matrix structures */
1268 for (int i = 0; i < CS_CDO_CONNECT_N_CASES; i++)
1269 cs_matrix_structure_destroy(&(cs_equation_assemble_ms[i]));
1270 BFT_FREE(cs_equation_assemble_ms);
1271
1272 /* Free matrix assemblers */
1273 for (int i = 0; i < CS_CDO_CONNECT_N_CASES; i++)
1274 cs_matrix_assembler_destroy(&(cs_equation_assemble_ma[i]));
1275 BFT_FREE(cs_equation_assemble_ma);
1276 }
1277
1278 /*----------------------------------------------------------------------------*/
1279 /*!
1280 * \brief Define the function pointer used to assemble the algebraic system
1281 *
1282 * \param[in] scheme space discretization scheme
1283 * \param[in] ma_id id in the array of matrix assembler
1284 *
1285 * \return a function pointer cs_equation_assembly_t
1286 */
1287 /*----------------------------------------------------------------------------*/
1288
1289 cs_equation_assembly_t *
cs_equation_assemble_set(cs_param_space_scheme_t scheme,int ma_id)1290 cs_equation_assemble_set(cs_param_space_scheme_t scheme,
1291 int ma_id)
1292 {
1293 switch (scheme) {
1294
1295 case CS_SPACE_SCHEME_CDOVB:
1296 if (ma_id == CS_CDO_CONNECT_VTX_SCAL)
1297 return _set_scalar_assembly_func();
1298 else if (ma_id == CS_CDO_CONNECT_VTX_VECT)
1299 return _set_block33_assembly_func();
1300 break;
1301
1302 case CS_SPACE_SCHEME_CDOVCB:
1303 if (ma_id == CS_CDO_CONNECT_VTX_SCAL)
1304 return _set_scalar_assembly_func();
1305 break;
1306
1307 case CS_SPACE_SCHEME_HHO_P0:
1308 case CS_SPACE_SCHEME_CDOFB:
1309 if (ma_id == CS_CDO_CONNECT_FACE_SP0)
1310 return _set_scalar_assembly_func();
1311 else if (ma_id == CS_CDO_CONNECT_FACE_VP0)
1312 return _set_block33_assembly_func();
1313 break;
1314
1315 case CS_SPACE_SCHEME_HHO_P1:
1316 case CS_SPACE_SCHEME_HHO_P2:
1317 if (ma_id == CS_CDO_CONNECT_FACE_SP1)
1318 return _set_block33_assembly_func();
1319 else
1320 return _set_block_assembly_func();
1321 break;
1322
1323 case CS_SPACE_SCHEME_CDOEB:
1324 if (ma_id == CS_CDO_CONNECT_EDGE_SCAL)
1325 return _set_scalar_assembly_func();
1326 break;
1327
1328 default:
1329 return NULL; /* Case not handle */
1330 }
1331
1332 return NULL;
1333 }
1334
1335 #if defined(HAVE_MPI)
1336
1337 /*----------------------------------------------------------------------------*/
1338 /*!
1339 * \brief Assemble a cellwise matrix into the global matrix
1340 * Scalar-valued case. Parallel and with openMP threading.
1341 *
1342 * \param[in] m cellwise view of the algebraic system
1343 * \param[in] dof_ids local DoF numbering
1344 * \param[in] rset pointer to a cs_range_set_t structure
1345 * \param[in, out] eqa pointer to a matrix assembler buffers
1346 * \param[in, out] mav pointer to a matrix assembler structure
1347 */
1348 /*----------------------------------------------------------------------------*/
1349
1350 void
cs_equation_assemble_matrix_mpit(const cs_sdm_t * m,const cs_lnum_t * dof_ids,const cs_range_set_t * rset,cs_equation_assemble_t * eqa,cs_matrix_assembler_values_t * mav)1351 cs_equation_assemble_matrix_mpit(const cs_sdm_t *m,
1352 const cs_lnum_t *dof_ids,
1353 const cs_range_set_t *rset,
1354 cs_equation_assemble_t *eqa,
1355 cs_matrix_assembler_values_t *mav)
1356 {
1357 const cs_matrix_assembler_t *ma = mav->ma;
1358
1359 cs_equation_assemble_row_t *row = eqa->row;
1360
1361 row->n_cols = m->n_rows;
1362
1363 /* Switch to the global numbering */
1364 for (int i = 0; i < row->n_cols; i++)
1365 row->col_g_id[i] = rset->g_id[dof_ids[i]];
1366
1367 /* Push each row of the cellwise matrix into the assembler */
1368 for (int i = 0; i < row->n_cols; i++) {
1369
1370 row->i = i; /* cellwise numbering */
1371 row->g_id = row->col_g_id[i]; /* global numbering */
1372 row->l_id = row->g_id - rset->l_range[0]; /* range set numbering */
1373 row->val = m->val + i*row->n_cols;
1374
1375 if (row->l_id < 0 || row->l_id >= rset->n_elts[0])
1376 _assemble_row_scal_dt(mav, ma, row);
1377
1378 else {
1379
1380 _assemble_row_scal_ld(ma, row);
1381
1382 #if CS_CDO_OMP_SYNC_SECTIONS > 0 /* OpenMP with critical section */
1383 _add_scal_values_critical(row, mav->matrix);
1384 #else
1385 _add_scal_values_atomic(row, mav->matrix);
1386 #endif
1387
1388 }
1389
1390 }
1391
1392 }
1393
1394 /*----------------------------------------------------------------------------*/
1395 /*!
1396 * \brief Assemble a cellwise matrix into the global matrix
1397 * Scalar-valued case. Parallel without openMP threading.
1398 *
1399 * \param[in] m cellwise view of the algebraic system
1400 * \param[in] dof_ids local DoF numbering
1401 * \param[in] rset pointer to a cs_range_set_t structure
1402 * \param[in, out] eqa pointer to a matrix assembler buffers
1403 * \param[in, out] mav pointer to a matrix assembler structure
1404 */
1405 /*----------------------------------------------------------------------------*/
1406
1407 void
cs_equation_assemble_matrix_mpis(const cs_sdm_t * m,const cs_lnum_t * dof_ids,const cs_range_set_t * rset,cs_equation_assemble_t * eqa,cs_matrix_assembler_values_t * mav)1408 cs_equation_assemble_matrix_mpis(const cs_sdm_t *m,
1409 const cs_lnum_t *dof_ids,
1410 const cs_range_set_t *rset,
1411 cs_equation_assemble_t *eqa,
1412 cs_matrix_assembler_values_t *mav)
1413 {
1414 const cs_matrix_assembler_t *ma = mav->ma;
1415
1416 cs_equation_assemble_row_t *row = eqa->row;
1417
1418 row->n_cols = m->n_rows;
1419
1420 /* Switch to the global numbering */
1421 for (int i = 0; i < row->n_cols; i++)
1422 row->col_g_id[i] = rset->g_id[dof_ids[i]];
1423
1424 /* Push each row of the cellwise matrix into the assembler */
1425 for (int i = 0; i < row->n_cols; i++) {
1426
1427 row->i = i; /* cellwise numbering */
1428 row->g_id = row->col_g_id[i]; /* global numbering */
1429 row->l_id = row->g_id - rset->l_range[0]; /* range set numbering */
1430 row->val = m->val + i*row->n_cols;
1431
1432 if (row->l_id < 0 || row->l_id >= rset->n_elts[0])
1433 _assemble_row_scal_ds(mav, ma, row);
1434
1435 else {
1436
1437 _assemble_row_scal_ld(ma, row);
1438 _add_scal_values_single(row, mav->matrix);
1439
1440 }
1441
1442 }
1443
1444 }
1445
1446 #endif /* defined(HAVE_MPI) */
1447
1448 /*----------------------------------------------------------------------------*/
1449 /*!
1450 * \brief Assemble a cellwise matrix into the global matrix
1451 * Scalar-valued case. Sequential and with openMP threading.
1452 *
1453 * \param[in] m cellwise view of the algebraic system
1454 * \param[in] dof_ids local DoF numbering
1455 * \param[in] rset pointer to a cs_range_set_t structure
1456 * \param[in, out] eqa pointer to a matrix assembler buffers
1457 * \param[in, out] mav pointer to a matrix assembler structure
1458 */
1459 /*----------------------------------------------------------------------------*/
1460
1461 void
cs_equation_assemble_matrix_seqt(const cs_sdm_t * m,const cs_lnum_t * dof_ids,const cs_range_set_t * rset,cs_equation_assemble_t * eqa,cs_matrix_assembler_values_t * mav)1462 cs_equation_assemble_matrix_seqt(const cs_sdm_t *m,
1463 const cs_lnum_t *dof_ids,
1464 const cs_range_set_t *rset,
1465 cs_equation_assemble_t *eqa,
1466 cs_matrix_assembler_values_t *mav)
1467 {
1468 const cs_matrix_assembler_t *ma = mav->ma;
1469
1470 cs_equation_assemble_row_t *row = eqa->row;
1471
1472 row->n_cols = m->n_rows;
1473
1474 /* Switch to the global numbering */
1475 for (int i = 0; i < row->n_cols; i++)
1476 row->col_g_id[i] = rset->g_id[dof_ids[i]];
1477
1478 /* Push each row of the cellwise matrix into the assembler */
1479 for (int i = 0; i < row->n_cols; i++) {
1480
1481 row->i = i; /* cellwise numbering */
1482 row->g_id = row->col_g_id[i]; /* global numbering */
1483 row->l_id = row->g_id - rset->l_range[0]; /* range set numbering */
1484 row->val = m->val + i*row->n_cols;
1485
1486 _assemble_row_scal_l(ma, row);
1487
1488 #if CS_CDO_OMP_SYNC_SECTIONS > 0 /* OpenMP with critical section */
1489 _add_scal_values_critical(row, mav->matrix);
1490 #else
1491 _add_scal_values_atomic(row, mav->matrix);
1492 #endif
1493
1494 }
1495
1496 }
1497
1498 /*----------------------------------------------------------------------------*/
1499 /*!
1500 * \brief Assemble a cellwise matrix into the global matrix
1501 * Scalar-valued case. Sequential and without openMP.
1502 *
1503 * \param[in] m cellwise view of the algebraic system
1504 * \param[in] dof_ids local DoF numbering
1505 * \param[in] rset pointer to a cs_range_set_t structure
1506 * \param[in, out] eqa pointer to a matrix assembler buffers
1507 * \param[in, out] mav pointer to a matrix assembler structure
1508 */
1509 /*----------------------------------------------------------------------------*/
1510
1511 void
cs_equation_assemble_matrix_seqs(const cs_sdm_t * m,const cs_lnum_t * dof_ids,const cs_range_set_t * rset,cs_equation_assemble_t * eqa,cs_matrix_assembler_values_t * mav)1512 cs_equation_assemble_matrix_seqs(const cs_sdm_t *m,
1513 const cs_lnum_t *dof_ids,
1514 const cs_range_set_t *rset,
1515 cs_equation_assemble_t *eqa,
1516 cs_matrix_assembler_values_t *mav)
1517 {
1518 const cs_matrix_assembler_t *ma = mav->ma;
1519
1520 cs_equation_assemble_row_t *row = eqa->row;
1521
1522 row->n_cols = m->n_rows;
1523
1524 /* Switch to the global numbering */
1525 for (int i = 0; i < row->n_cols; i++)
1526 row->col_g_id[i] = rset->g_id[dof_ids[i]];
1527
1528 /* Push each row of the cellwise matrix into the assembler */
1529 for (int i = 0; i < row->n_cols; i++) {
1530
1531 row->i = i; /* cellwise numbering */
1532 row->g_id = row->col_g_id[i]; /* global numbering */
1533 row->l_id = row->g_id - rset->l_range[0]; /* range set numbering */
1534 row->val = m->val + i*row->n_cols;
1535
1536 _assemble_row_scal_l(ma, row);
1537 _add_scal_values_single(row, mav->matrix);
1538
1539 } /* Loop on rows */
1540
1541 }
1542
1543 /*----------------------------------------------------------------------------*/
1544 /*!
1545 * \brief Assemble a cellwise matrix into the global matrix
1546 * Case of a block 3x3 entries. Expand each row.
1547 * Sequential run without openMP threading.
1548 *
1549 * \param[in] m cellwise view of the algebraic system
1550 * \param[in] dof_ids local DoF numbering
1551 * \param[in] rset pointer to a cs_range_set_t structure
1552 * \param[in, out] eqa pointer to an equation assembly structure
1553 * \param[in, out] mav pointer to a matrix assembler structure
1554 */
1555 /*----------------------------------------------------------------------------*/
1556
1557 void
cs_equation_assemble_eblock33_matrix_seqs(const cs_sdm_t * m,const cs_lnum_t * dof_ids,const cs_range_set_t * rset,cs_equation_assemble_t * eqa,cs_matrix_assembler_values_t * mav)1558 cs_equation_assemble_eblock33_matrix_seqs(const cs_sdm_t *m,
1559 const cs_lnum_t *dof_ids,
1560 const cs_range_set_t *rset,
1561 cs_equation_assemble_t *eqa,
1562 cs_matrix_assembler_values_t *mav)
1563 {
1564 const cs_sdm_block_t *bd = m->block_desc;
1565 const cs_matrix_assembler_t *ma = mav->ma;
1566
1567 cs_equation_assemble_row_t *row = eqa->row;
1568
1569 /* Sanity checks */
1570 assert(m->flag & CS_SDM_BY_BLOCK);
1571 assert(m->block_desc != NULL);
1572 assert(bd->n_row_blocks == bd->n_col_blocks);
1573 assert(eqa->ddim >= 3);
1574 assert(row->expval != NULL);
1575
1576 /* Expand the values for a bundle of rows */
1577 cs_real_t *_vxyz[3] = {row->expval,
1578 row->expval + m->n_rows,
1579 row->expval + 2*m->n_rows };
1580
1581 assert(m->n_rows == m->n_rows);
1582 row->n_cols = m->n_rows;
1583
1584 /* Switch to the global numbering */
1585 for (int i = 0; i < row->n_cols; i++)
1586 row->col_g_id[i] = rset->g_id[dof_ids[i]];
1587
1588 for (int bi = 0; bi < bd->n_row_blocks; bi++) {
1589
1590 /* Expand all the blocks for this row */
1591 for (int bj = 0; bj < bd->n_col_blocks; bj++) {
1592
1593 /* mIJ matrices are small square matrices of size 3 */
1594 const cs_sdm_t *const mIJ = cs_sdm_get_block(m, bi, bj);
1595 const cs_real_t *const mvals = mIJ->val;
1596 for (int k = 0; k < 3; k++) {
1597 _vxyz[0][3*bj+k] = mvals[ k];
1598 _vxyz[1][3*bj+k] = mvals[3+k];
1599 _vxyz[2][3*bj+k] = mvals[6+k];
1600 }
1601
1602 } /* Loop on column-wise blocks */
1603
1604 /* dof_ids is an interlaced array (get access to the next 3 values) */
1605 for (int k = 0; k < 3; k++) {
1606 row->i = 3*bi+k; /* cellwise numbering */
1607 row->g_id = row->col_g_id[3*bi+k]; /* global numbering */
1608 row->l_id = row->g_id - rset->l_range[0]; /* range set numbering */
1609 row->val = _vxyz[k];
1610
1611 _assemble_row_scal_l(ma, row);
1612 _add_scal_values_single(row, mav->matrix);
1613
1614 } /* Push each row of the block */
1615
1616 } /* Loop on row-wise blocks */
1617
1618 }
1619
1620 /*----------------------------------------------------------------------------*/
1621 /*!
1622 * \brief Assemble a cellwise matrix into the global matrix
1623 * Case of a block 3x3 entries. Expand each row.
1624 * Sequential run with openMP threading.
1625 *
1626 * \param[in] m cellwise view of the algebraic system
1627 * \param[in] dof_ids local DoF numbering
1628 * \param[in] rset pointer to a cs_range_set_t structure
1629 * \param[in, out] eqa pointer to an equation assembly structure
1630 * \param[in, out] mav pointer to a matrix assembler structure
1631 */
1632 /*----------------------------------------------------------------------------*/
1633
1634 void
cs_equation_assemble_eblock33_matrix_seqt(const cs_sdm_t * m,const cs_lnum_t * dof_ids,const cs_range_set_t * rset,cs_equation_assemble_t * eqa,cs_matrix_assembler_values_t * mav)1635 cs_equation_assemble_eblock33_matrix_seqt(const cs_sdm_t *m,
1636 const cs_lnum_t *dof_ids,
1637 const cs_range_set_t *rset,
1638 cs_equation_assemble_t *eqa,
1639 cs_matrix_assembler_values_t *mav)
1640 {
1641 const cs_sdm_block_t *bd = m->block_desc;
1642 const cs_matrix_assembler_t *ma = mav->ma;
1643
1644 cs_equation_assemble_row_t *row = eqa->row;
1645
1646 /* Sanity checks */
1647 assert(m->flag & CS_SDM_BY_BLOCK);
1648 assert(m->block_desc != NULL);
1649 assert(bd->n_row_blocks == bd->n_col_blocks);
1650 assert(eqa->ddim >= 3);
1651 assert(row->expval != NULL);
1652
1653 /* Expand the values for a bundle of rows */
1654 cs_real_t *_vxyz[3] = {row->expval,
1655 row->expval + m->n_rows,
1656 row->expval + 2*m->n_rows };
1657
1658 assert(m->n_rows == m->n_rows);
1659 row->n_cols = m->n_rows;
1660
1661 /* Switch to the global numbering */
1662 for (int i = 0; i < row->n_cols; i++)
1663 row->col_g_id[i] = rset->g_id[dof_ids[i]];
1664
1665 for (int bi = 0; bi < bd->n_row_blocks; bi++) {
1666
1667 /* Expand all the blocks for this row */
1668 for (int bj = 0; bj < bd->n_col_blocks; bj++) {
1669
1670 /* mIJ matrices are small square matrices of size 3 */
1671 const cs_sdm_t *const mIJ = cs_sdm_get_block(m, bi, bj);
1672 const cs_real_t *const mvals = mIJ->val;
1673 for (int k = 0; k < 3; k++) {
1674 _vxyz[0][3*bj+k] = mvals[ k];
1675 _vxyz[1][3*bj+k] = mvals[3+k];
1676 _vxyz[2][3*bj+k] = mvals[6+k];
1677 }
1678
1679 } /* Loop on column-wise blocks */
1680
1681 /* dof_ids is an interlaced array (get access to the next 3 values */
1682 for (int k = 0; k < 3; k++) {
1683 row->i = 3*bi+k; /* cellwise numbering */
1684 row->g_id = row->col_g_id[3*bi+k]; /* global numbering */
1685 row->l_id = row->g_id - rset->l_range[0]; /* range set numbering */
1686 row->val = _vxyz[k];
1687
1688 _assemble_row_scal_l(ma, row);
1689
1690 #if CS_CDO_OMP_SYNC_SECTIONS > 0 /* OpenMP with critical section */
1691 _add_scal_values_critical(row, mav->matrix);
1692 #else
1693 _add_scal_values_atomic(row, mav->matrix);
1694 #endif
1695
1696 } /* Push each row of the block */
1697
1698 } /* Loop on row-wise blocks */
1699 }
1700
1701 #if defined(HAVE_MPI)
1702
1703 /*----------------------------------------------------------------------------*/
1704 /*!
1705 * \brief Assemble a cellwise matrix into the global matrix
1706 * Case of a block 3x3 entries. Expand each row.
1707 * Parallel run without openMP threading.
1708 *
1709 * \param[in] m cellwise view of the algebraic system
1710 * \param[in] dof_ids local DoF numbering
1711 * \param[in] rset pointer to a cs_range_set_t structure
1712 * \param[in, out] eqa pointer to an equation assembly structure
1713 * \param[in, out] mav pointer to a matrix assembler structure
1714 */
1715 /*----------------------------------------------------------------------------*/
1716
1717 void
cs_equation_assemble_eblock33_matrix_mpis(const cs_sdm_t * m,const cs_lnum_t * dof_ids,const cs_range_set_t * rset,cs_equation_assemble_t * eqa,cs_matrix_assembler_values_t * mav)1718 cs_equation_assemble_eblock33_matrix_mpis(const cs_sdm_t *m,
1719 const cs_lnum_t *dof_ids,
1720 const cs_range_set_t *rset,
1721 cs_equation_assemble_t *eqa,
1722 cs_matrix_assembler_values_t *mav)
1723 {
1724 const cs_sdm_block_t *bd = m->block_desc;
1725 const cs_matrix_assembler_t *ma = mav->ma;
1726
1727 cs_equation_assemble_row_t *row = eqa->row;
1728
1729 assert(m->flag & CS_SDM_BY_BLOCK);
1730 assert(m->block_desc != NULL);
1731 assert(bd->n_row_blocks == bd->n_col_blocks);
1732 assert(eqa->ddim >= 3);
1733 assert(row->expval != NULL);
1734
1735 /* Expand the values for a bundle of rows */
1736
1737 cs_real_t *_vxyz[3] = {row->expval,
1738 row->expval + m->n_rows,
1739 row->expval + 2*m->n_rows };
1740
1741 row->n_cols = m->n_rows;
1742
1743 /* Switch to the global numbering */
1744
1745 for (int i = 0; i < row->n_cols; i++)
1746 row->col_g_id[i] = rset->g_id[dof_ids[i]];
1747
1748 for (int bi = 0; bi < bd->n_row_blocks; bi++) {
1749
1750 /* Expand all the blocks for this row */
1751
1752 for (int bj = 0; bj < bd->n_col_blocks; bj++) {
1753
1754 /* mIJ matrices are small square matrices of size 3 */
1755
1756 const cs_sdm_t *const mIJ = cs_sdm_get_block(m, bi, bj);
1757 const cs_real_t *const mvals = mIJ->val;
1758 for (int k = 0; k < 3; k++) {
1759 _vxyz[0][3*bj+k] = mvals[ k];
1760 _vxyz[1][3*bj+k] = mvals[3+k];
1761 _vxyz[2][3*bj+k] = mvals[6+k];
1762 }
1763
1764 } /* Loop on column-wise blocks */
1765
1766 /* dof_ids is an interlaced array (get access to the next 3 values */
1767
1768 for (int k = 0; k < 3; k++) {
1769
1770 row->i = 3*bi+k; /* cellwise numbering */
1771 row->g_id = row->col_g_id[3*bi+k]; /* global numbering */
1772 row->l_id = row->g_id - rset->l_range[0]; /* range set numbering */
1773 row->val = _vxyz[k];
1774
1775 if (row->l_id < 0 || row->l_id >= rset->n_elts[0])
1776 _assemble_row_scal_ds(mav, ma, row);
1777
1778 else {
1779
1780 _assemble_row_scal_ld(ma, row);
1781 _add_scal_values_single(row, mav->matrix);
1782
1783 }
1784
1785 } /* Push each row of the block */
1786
1787 } /* Loop on row-wise blocks */
1788 }
1789
1790 /*----------------------------------------------------------------------------*/
1791 /*!
1792 * \brief Assemble a cellwise matrix into the global matrix
1793 * Case of a block 3x3 entries. Expand each row.
1794 * Parallel run with openMP threading.
1795 *
1796 * \param[in] m cellwise view of the algebraic system
1797 * \param[in] dof_ids local DoF numbering
1798 * \param[in] rset pointer to a cs_range_set_t structure
1799 * \param[in, out] eqa pointer to an equation assembly structure
1800 * \param[in, out] mav pointer to a matrix assembler structure
1801 */
1802 /*----------------------------------------------------------------------------*/
1803
1804 void
cs_equation_assemble_eblock33_matrix_mpit(const cs_sdm_t * m,const cs_lnum_t * dof_ids,const cs_range_set_t * rset,cs_equation_assemble_t * eqa,cs_matrix_assembler_values_t * mav)1805 cs_equation_assemble_eblock33_matrix_mpit(const cs_sdm_t *m,
1806 const cs_lnum_t *dof_ids,
1807 const cs_range_set_t *rset,
1808 cs_equation_assemble_t *eqa,
1809 cs_matrix_assembler_values_t *mav)
1810 {
1811 const cs_sdm_block_t *bd = m->block_desc;
1812 const cs_matrix_assembler_t *ma = mav->ma;
1813
1814 cs_equation_assemble_row_t *row = eqa->row;
1815
1816 assert(m->flag & CS_SDM_BY_BLOCK);
1817 assert(m->block_desc != NULL);
1818 assert(bd->n_row_blocks == bd->n_col_blocks);
1819 assert(eqa->ddim >= 3);
1820 assert(row->expval != NULL);
1821
1822 /* Expand the values for a bundle of rows */
1823
1824 cs_real_t *_vxyz[3] = {row->expval,
1825 row->expval + m->n_rows,
1826 row->expval + 2*m->n_rows};
1827
1828 row->n_cols = m->n_rows;
1829
1830 /* Switch to the global numbering */
1831
1832 for (int i = 0; i < row->n_cols; i++)
1833 row->col_g_id[i] = rset->g_id[dof_ids[i]];
1834
1835 for (int bi = 0; bi < bd->n_row_blocks; bi++) {
1836
1837 /* Expand all the blocks for this row */
1838
1839 for (int bj = 0; bj < bd->n_col_blocks; bj++) {
1840
1841 /* mIJ matrices are small square matrices of size 3 */
1842
1843 const cs_sdm_t *const mIJ = cs_sdm_get_block(m, bi, bj);
1844 const cs_real_t *const mvals = mIJ->val;
1845 for (int k = 0; k < 3; k++) {
1846 _vxyz[0][3*bj+k] = mvals[ k];
1847 _vxyz[1][3*bj+k] = mvals[3+k];
1848 _vxyz[2][3*bj+k] = mvals[6+k];
1849 }
1850
1851 } /* Loop on column-wise blocks */
1852
1853 /* dof_ids is an interlaced array (get access to the next 3 values */
1854
1855 for (int k = 0; k < 3; k++) {
1856
1857 row->i = 3*bi+k; /* cellwise numbering */
1858 row->g_id = row->col_g_id[3*bi+k]; /* global numbering */
1859 row->l_id = row->g_id - rset->l_range[0]; /* range set numbering */
1860 row->val = _vxyz[k];
1861
1862 if (row->l_id < 0 || row->l_id >= rset->n_elts[0])
1863 _assemble_row_scal_ds(mav, ma, row);
1864
1865 else {
1866
1867 _assemble_row_scal_ld(ma, row);
1868
1869 #if CS_CDO_OMP_SYNC_SECTIONS > 0 /* OpenMP with critical section */
1870 _add_scal_values_critical(row, mav->matrix);
1871 #else
1872 _add_scal_values_atomic(row, mav->matrix);
1873 #endif
1874
1875 }
1876
1877 } /* Push each row of the block */
1878
1879 } /* Loop on row-wise blocks */
1880 }
1881
1882 #endif /* defined(HAVE_MPI) */
1883
1884 /*----------------------------------------------------------------------------*/
1885 /*!
1886 * \brief Assemble a cellwise matrix into the global matrix
1887 * Case of a block NxN entries. Expand each row.
1888 * Sequential run without openMP threading.
1889 *
1890 * \param[in] m cellwise view of the algebraic system
1891 * \param[in] dof_ids local DoF numbering
1892 * \param[in] rset pointer to a cs_range_set_t structure
1893 * \param[in, out] eqa pointer to an equation assembly structure
1894 * \param[in, out] mav pointer to a matrix assembler structure
1895 */
1896 /*----------------------------------------------------------------------------*/
1897
1898 void
cs_equation_assemble_eblock_matrix_seqs(const cs_sdm_t * m,const cs_lnum_t * dof_ids,const cs_range_set_t * rset,cs_equation_assemble_t * eqa,cs_matrix_assembler_values_t * mav)1899 cs_equation_assemble_eblock_matrix_seqs(const cs_sdm_t *m,
1900 const cs_lnum_t *dof_ids,
1901 const cs_range_set_t *rset,
1902 cs_equation_assemble_t *eqa,
1903 cs_matrix_assembler_values_t *mav)
1904 {
1905 const cs_sdm_block_t *bd = m->block_desc;
1906 const cs_matrix_assembler_t *ma = mav->ma;
1907
1908 cs_equation_assemble_row_t *row = eqa->row;
1909
1910 /* Sanity checks */
1911 assert(m->flag & CS_SDM_BY_BLOCK);
1912 assert(m->block_desc != NULL);
1913 assert(bd->n_row_blocks == bd->n_col_blocks);
1914 assert(eqa->ddim < 19);
1915 assert(eqa->edim == eqa->ddim);
1916 assert(row->expval != NULL);
1917
1918 const int dim = eqa->ddim;
1919
1920 /* Expand the values for a bundle of rows */
1921 cs_real_t *_vpointer[18];
1922
1923 for (int k = 0; k < dim; k++)
1924 _vpointer[k] = row->expval + k*m->n_rows;
1925
1926 row->n_cols = m->n_rows;
1927
1928 /* Switch to the global numbering */
1929 for (int i = 0; i < row->n_cols; i++)
1930 row->col_g_id[i] = rset->g_id[dof_ids[i]];
1931
1932 for (int bi = 0; bi < bd->n_row_blocks; bi++) {
1933
1934 /* Expand all the blocks for this row */
1935 for (int bj = 0; bj < bd->n_col_blocks; bj++) {
1936
1937 /* mIJ matrices are small square matrices of size "dim" */
1938 const cs_sdm_t *const mIJ = cs_sdm_get_block(m, bi, bj);
1939
1940 for (int ki = 0; ki < dim; ki++) {
1941 const cs_real_t *const mvals = mIJ->val + ki*dim;
1942 cs_real_t *buf = _vpointer[ki] + dim*bj;
1943 for (int kj = 0; kj < dim; kj++)
1944 buf[kj] = mvals[kj];
1945 }
1946
1947 } /* Loop on column blocks */
1948
1949 /* dof_ids is an interlaced array (get access to the next "dim" values */
1950 for (int ki = 0; ki < dim; ki++) {
1951 row->i = dim*bi+ki; /* cellwise numbering */
1952 row->g_id = row->col_g_id[dim*bi+ki]; /* global numbering */
1953 row->l_id = row->g_id - rset->l_range[0]; /* range set numbering */
1954 row->val = _vpointer[ki];
1955
1956 _assemble_row_scal_l(ma, row);
1957 _add_scal_values_single(row, mav->matrix);
1958 }
1959
1960 } /* Loop on row-wise blocks */
1961
1962 }
1963
1964 /*----------------------------------------------------------------------------*/
1965 /*!
1966 * \brief Assemble a cellwise system into the global algebraic system.
1967 * Case of a block NxN entries. Expand each row.
1968 * Sequential run with openMP threading.
1969 *
1970 * \param[in] m cellwise view of the algebraic system
1971 * \param[in] dof_ids local DoF numbering
1972 * \param[in] rset pointer to a cs_range_set_t structure
1973 * \param[in, out] eqa pointer to an equation assembly structure
1974 * \param[in, out] mav pointer to a matrix assembler structure
1975 */
1976 /*----------------------------------------------------------------------------*/
1977
1978 void
cs_equation_assemble_eblock_matrix_seqt(const cs_sdm_t * m,const cs_lnum_t * dof_ids,const cs_range_set_t * rset,cs_equation_assemble_t * eqa,cs_matrix_assembler_values_t * mav)1979 cs_equation_assemble_eblock_matrix_seqt(const cs_sdm_t *m,
1980 const cs_lnum_t *dof_ids,
1981 const cs_range_set_t *rset,
1982 cs_equation_assemble_t *eqa,
1983 cs_matrix_assembler_values_t *mav)
1984 {
1985 const cs_sdm_block_t *bd = m->block_desc;
1986 const cs_matrix_assembler_t *ma = mav->ma;
1987
1988 cs_equation_assemble_row_t *row = eqa->row;
1989
1990 /* Sanity checks */
1991 assert(m->flag & CS_SDM_BY_BLOCK);
1992 assert(m->block_desc != NULL);
1993 assert(bd->n_row_blocks == bd->n_col_blocks);
1994 assert(eqa->ddim < 19);
1995 assert(eqa->edim == eqa->ddim);
1996 assert(row->expval != NULL);
1997
1998 const int dim = eqa->ddim;
1999
2000 /* Expand the values for a bundle of rows */
2001 cs_real_t *_vpointer[18];
2002
2003 for (int k = 0; k < dim; k++)
2004 _vpointer[k] = row->expval + k*m->n_rows;
2005
2006 row->n_cols = m->n_rows;
2007
2008 /* Switch to the global numbering */
2009 for (int i = 0; i < row->n_cols; i++)
2010 row->col_g_id[i] = rset->g_id[dof_ids[i]];
2011
2012 for (int bi = 0; bi < bd->n_row_blocks; bi++) {
2013
2014 /* Expand all the blocks for this row */
2015 for (int bj = 0; bj < bd->n_col_blocks; bj++) {
2016
2017 /* mIJ matrices are small square matrices of size "dim" */
2018 const cs_sdm_t *const mIJ = cs_sdm_get_block(m, bi, bj);
2019
2020 for (int ki = 0; ki < dim; ki++) {
2021 const cs_real_t *const mvals = mIJ->val + ki*dim;
2022 cs_real_t *buf = _vpointer[ki] + dim*bj;
2023 for (int kj = 0; kj < dim; kj++)
2024 buf[kj] = mvals[kj];
2025 }
2026
2027 } /* Loop on column blocks */
2028
2029 /* dof_ids is an interlaced array (get access to the next "dim" values */
2030 for (int ki = 0; ki < dim; ki++) {
2031 row->i = dim*bi+ki; /* cellwise numbering */
2032 row->g_id = row->col_g_id[dim*bi+ki]; /* global numbering */
2033 row->l_id = row->g_id - rset->l_range[0]; /* range set numbering */
2034 row->val = _vpointer[ki];
2035
2036 _assemble_row_scal_l(ma, row);
2037
2038 #if CS_CDO_OMP_SYNC_SECTIONS > 0 /* OpenMP with critical section */
2039 _add_scal_values_critical(row, mav->matrix);
2040 #else
2041 _add_scal_values_atomic(row, mav->matrix);
2042 #endif
2043 } /* Push each row of the block */
2044
2045 } /* Loop on row-wise blocks */
2046
2047 }
2048
2049 #if defined(HAVE_MPI)
2050
2051 /*----------------------------------------------------------------------------*/
2052 /*!
2053 * \brief Assemble a cellwise matrix into the global matrix
2054 * Case of a block NxN entries. Expand each row.
2055 * Parallel run without openMP threading.
2056 *
2057 * \param[in] m cellwise view of the algebraic system
2058 * \param[in] dof_ids local DoF numbering
2059 * \param[in] rset pointer to a cs_range_set_t structure
2060 * \param[in, out] eqa pointer to an equation assembly structure
2061 * \param[in, out] mav pointer to a matrix assembler structure
2062 */
2063 /*----------------------------------------------------------------------------*/
2064
2065 void
cs_equation_assemble_eblock_matrix_mpis(const cs_sdm_t * m,const cs_lnum_t * dof_ids,const cs_range_set_t * rset,cs_equation_assemble_t * eqa,cs_matrix_assembler_values_t * mav)2066 cs_equation_assemble_eblock_matrix_mpis(const cs_sdm_t *m,
2067 const cs_lnum_t *dof_ids,
2068 const cs_range_set_t *rset,
2069 cs_equation_assemble_t *eqa,
2070 cs_matrix_assembler_values_t *mav)
2071 {
2072 const cs_sdm_block_t *bd = m->block_desc;
2073 const cs_matrix_assembler_t *ma = mav->ma;
2074
2075 cs_equation_assemble_row_t *row = eqa->row;
2076
2077 /* Sanity checks */
2078 assert(m->flag & CS_SDM_BY_BLOCK);
2079 assert(m->block_desc != NULL);
2080 assert(bd->n_row_blocks == bd->n_col_blocks);
2081 assert(eqa->ddim < 19);
2082 assert(eqa->edim == eqa->ddim);
2083 assert(row->expval != NULL);
2084
2085 const int dim = eqa->ddim;
2086
2087 /* Expand the values for a bundle of rows */
2088 cs_real_t *_vpointer[18];
2089
2090 for (int k = 0; k < dim; k++)
2091 _vpointer[k] = row->expval + k*m->n_rows;
2092
2093 row->n_cols = m->n_rows;
2094
2095 /* Switch to the global numbering */
2096 for (int i = 0; i < row->n_cols; i++)
2097 row->col_g_id[i] = rset->g_id[dof_ids[i]];
2098
2099 for (int bi = 0; bi < bd->n_row_blocks; bi++) {
2100
2101 /* Expand all the blocks for this row */
2102 for (int bj = 0; bj < bd->n_col_blocks; bj++) {
2103
2104 /* mIJ matrices are small square matrices of size "dim" */
2105 const cs_sdm_t *const mIJ = cs_sdm_get_block(m, bi, bj);
2106
2107 for (int ki = 0; ki < dim; ki++) {
2108 const cs_real_t *const mvals = mIJ->val + ki*dim;
2109 cs_real_t *buf = _vpointer[ki] + dim*bj;
2110 for (int kj = 0; kj < dim; kj++)
2111 buf[kj] = mvals[kj];
2112 }
2113
2114 } /* Loop on column blocks */
2115
2116 /* dof_ids is an interlaced array (get access to the next "dim" values */
2117 for (int ki = 0; ki < dim; ki++) {
2118
2119 row->i = dim*bi+ki; /* cellwise numbering */
2120 row->g_id = row->col_g_id[dim*bi+ki]; /* global numbering */
2121 row->l_id = row->g_id - rset->l_range[0]; /* range set numbering */
2122 row->val = _vpointer[ki];
2123
2124 if (row->l_id < 0 || row->l_id >= rset->n_elts[0])
2125 _assemble_row_scal_ds(mav, ma, row);
2126
2127 else {
2128
2129 _assemble_row_scal_ld(ma, row);
2130 _add_scal_values_single(row, mav->matrix);
2131
2132 }
2133
2134 } /* Push each row of the block */
2135
2136 } /* Loop on row-wise blocks */
2137
2138 }
2139
2140 /*----------------------------------------------------------------------------*/
2141 /*!
2142 * \brief Assemble a cellwise matrix into the global matrix
2143 * Case of a block NxN entries. Expand each row.
2144 * Parallel run with openMP threading.
2145 *
2146 * \param[in] m cellwise view of the algebraic system
2147 * \param[in] dof_ids local DoF numbering
2148 * \param[in] rset pointer to a cs_range_set_t structure
2149 * \param[in, out] eqa pointer to an equation assembly structure
2150 * \param[in, out] mav pointer to a matrix assembler structure
2151 */
2152 /*----------------------------------------------------------------------------*/
2153
2154 void
cs_equation_assemble_eblock_matrix_mpit(const cs_sdm_t * m,const cs_lnum_t * dof_ids,const cs_range_set_t * rset,cs_equation_assemble_t * eqa,cs_matrix_assembler_values_t * mav)2155 cs_equation_assemble_eblock_matrix_mpit(const cs_sdm_t *m,
2156 const cs_lnum_t *dof_ids,
2157 const cs_range_set_t *rset,
2158 cs_equation_assemble_t *eqa,
2159 cs_matrix_assembler_values_t *mav)
2160 {
2161 const cs_sdm_block_t *bd = m->block_desc;
2162 const cs_matrix_assembler_t *ma = mav->ma;
2163
2164 cs_equation_assemble_row_t *row = eqa->row;
2165
2166 /* Sanity checks */
2167 assert(m->flag & CS_SDM_BY_BLOCK);
2168 assert(m->block_desc != NULL);
2169 assert(bd->n_row_blocks == bd->n_col_blocks);
2170 assert(eqa->ddim < 19);
2171 assert(eqa->edim == eqa->ddim);
2172 assert(row->expval != NULL);
2173
2174 const int dim = eqa->ddim;
2175
2176 /* Expand the values for a bundle of rows */
2177 cs_real_t *_vpointer[18];
2178
2179 for (int k = 0; k < dim; k++)
2180 _vpointer[k] = row->expval + k*m->n_rows;
2181
2182 row->n_cols = m->n_rows;
2183
2184 /* Switch to the global numbering */
2185 for (int i = 0; i < row->n_cols; i++)
2186 row->col_g_id[i] = rset->g_id[dof_ids[i]];
2187
2188 for (int bi = 0; bi < bd->n_row_blocks; bi++) {
2189
2190 /* Expand all the blocks for this row */
2191 for (int bj = 0; bj < bd->n_col_blocks; bj++) {
2192
2193 /* mIJ matrices are small square matrices of size "dim" */
2194 const cs_sdm_t *const mIJ = cs_sdm_get_block(m, bi, bj);
2195
2196 for (int ki = 0; ki < dim; ki++) {
2197 const cs_real_t *const mvals = mIJ->val + ki*dim;
2198 cs_real_t *buf = _vpointer[ki] + dim*bj;
2199 for (int kj = 0; kj < dim; kj++)
2200 buf[kj] = mvals[kj];
2201 }
2202
2203 } /* Loop on column blocks */
2204
2205 /* dof_ids is an interlaced array (get access to the next "dim" values */
2206 for (int ki = 0; ki < dim; ki++) {
2207
2208 row->i = dim*bi+ki; /* cellwise numbering */
2209 row->g_id = row->col_g_id[dim*bi+ki]; /* global numbering */
2210 row->l_id = row->g_id - rset->l_range[0]; /* range set numbering */
2211 row->val = _vpointer[ki];
2212
2213 if (row->l_id < 0 || row->l_id >= rset->n_elts[0])
2214 _assemble_row_scal_ds(mav, ma, row);
2215
2216 else {
2217
2218 _assemble_row_scal_ld(ma, row);
2219
2220 #if CS_CDO_OMP_SYNC_SECTIONS > 0 /* OpenMP with critical section */
2221 _add_scal_values_critical(row, mav->matrix);
2222 #else
2223 _add_scal_values_atomic(row, mav->matrix);
2224 #endif
2225
2226 }
2227
2228 } /* Push each row of the block */
2229
2230 } /* Loop on row-wise blocks */
2231
2232 }
2233
2234 #endif /* defined(HAVE_MPI) */
2235
2236 /*----------------------------------------------------------------------------*/
2237
2238 END_C_DECLS
2239