1 /*============================================================================
2  * Assembly of local cellwise system into a cs_matrix_t structure through
3  * the cs_matrix_assembler_t and its related structures
4  *============================================================================*/
5 
6 /*
7   This file is part of Code_Saturne, a general-purpose CFD tool.
8 
9   Copyright (C) 1998-2021 EDF S.A.
10 
11   This program is free software; you can redistribute it and/or modify it under
12   the terms of the GNU General Public License as published by the Free Software
13   Foundation; either version 2 of the License, or (at your option) any later
14   version.
15 
16   This program is distributed in the hope that it will be useful, but WITHOUT
17   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
19   details.
20 
21   You should have received a copy of the GNU General Public License along with
22   this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
23   Street, Fifth Floor, Boston, MA 02110-1301, USA.
24 */
25 
26 /*----------------------------------------------------------------------------*/
27 
28 #include "cs_defs.h"
29 
30 /*----------------------------------------------------------------------------
31  * Standard C library headers
32  *----------------------------------------------------------------------------*/
33 
34 #include <assert.h>
35 
36 /*----------------------------------------------------------------------------
37  * Local headers
38  *----------------------------------------------------------------------------*/
39 
40 #include "bft_error.h"
41 #include "bft_mem.h"
42 
43 #include "cs_defs.h"
44 #include "cs_log.h"
45 #include "cs_matrix_priv.h"
46 #include "cs_matrix_assembler_priv.h"
47 #include "cs_matrix_assembler.h"
48 #include "cs_param_cdo.h"
49 #include "cs_parall.h"
50 #include "cs_sort.h"
51 #include "cs_timer.h"
52 
53 #if defined(DEBUG) && !defined(NDEBUG)
54 #include "cs_dbg.h"
55 #endif
56 
57 /*----------------------------------------------------------------------------
58  *  Header for the current file
59  *----------------------------------------------------------------------------*/
60 
61 #include "cs_equation_assemble.h"
62 
63 /*----------------------------------------------------------------------------*/
64 
65 BEGIN_C_DECLS
66 
67 /*=============================================================================
68  * Additional doxygen documentation
69  *============================================================================*/
70 
71 /*!
72   \file cs_equation_assemble.c
73 
74   \brief Assembly of local cellwise system into a cs_matrix_t structure through
75   the cs_matrix_assembler_t and its related structures.
76 
77   This function are specific to CDO schemes. Thus one can assume a more specific
78   behavior in order to get a more optimzed version of the standard assembly
79   process.
80 
81 */
82 
83 /*! \cond DOXYGEN_SHOULD_SKIP_THIS */
84 
85 /*============================================================================
86  * Type definitions and macros
87  *============================================================================*/
88 
89 #define CS_EQUATION_ASSEMBLE_DBG          0 /* Debug level */
90 
91 /*============================================================================
92  * Local private variables
93  *============================================================================*/
94 
95 /* Store the matrix structure and its assembler structures for each family
96    of space discretizations */
97 static cs_matrix_assembler_t  **cs_equation_assemble_ma = NULL;
98 static cs_matrix_structure_t  **cs_equation_assemble_ms = NULL;
99 static cs_equation_assemble_t  **cs_equation_assemble = NULL;
100 
101 static cs_timer_counter_t  cs_equation_ms_time;
102 
103 /*=============================================================================
104  * Local function pointer definitions
105  *============================================================================*/
106 
107 /*=============================================================================
108  * Local structure definitions
109  *============================================================================*/
110 
111 typedef struct {
112 
113   /* Row numberings */
114   cs_gnum_t   g_id;             /* Global row numbering */
115   cs_lnum_t   l_id;             /* Local range set numbering */
116   int         i;                /* Cellwise system numbering */
117 
118   /* Column-related members */
119   int                 n_cols;    /* Number of columns (cellwise system) */
120   cs_gnum_t          *col_g_id;  /* Global numbering of columns */
121   int                *col_idx;   /* Array to build and to give as parameter
122                                     for add_vals() function */
123 
124   const cs_real_t    *val;       /* Row values */
125   cs_real_t          *expval;    /* Expanded row values */
126 
127 } cs_equation_assemble_row_t;
128 
129 struct _cs_equation_assemble_t {
130 
131   int         ddim;         /* Number of real values related to each diagonal
132                                entry */
133   int         edim;         /* Number of real values related to each
134                                extra-diagonal entry */
135 
136   cs_equation_assemble_row_t    *row;
137 };
138 
139 /*============================================================================
140  * Static inline private function prototypes
141  *============================================================================*/
142 
143 /*----------------------------------------------------------------------------*/
144 /*!
145  * \brief  Choose which function will be used to perform the matrix assembly
146  *         Case of scalar-valued matrices.
147  *
148  * \return  a pointer to a function
149  */
150 /*----------------------------------------------------------------------------*/
151 
152 static inline cs_equation_assembly_t *
_set_scalar_assembly_func(void)153 _set_scalar_assembly_func(void)
154 {
155 #if defined(HAVE_MPI)
156 
157   if (cs_glob_n_ranks > 1) {  /* Parallel */
158 
159     if (cs_glob_n_threads < 2) /* Without OpenMP */
160       return cs_equation_assemble_matrix_mpis;
161     else                       /* With OpenMP */
162       return cs_equation_assemble_matrix_mpit;
163 
164   }
165 
166 #endif /* defined(HAVE_MPI) */
167 
168   if (cs_glob_n_ranks <= 1) { /* Sequential */
169 
170     if (cs_glob_n_threads < 2) /* Without OpenMP */
171       return cs_equation_assemble_matrix_seqs;
172     else                       /* With OpenMP */
173       return cs_equation_assemble_matrix_seqt;
174 
175   }
176 
177   return NULL; /* Case not handled */
178 }
179 
180 /*----------------------------------------------------------------------------*/
181 /*!
182  * \brief  Choose which function will be used to perform the matrix assembly
183  *         Case of block 3x3 matrices.
184  *
185  * \return  a pointer to a function
186  */
187 /*----------------------------------------------------------------------------*/
188 
189 static inline cs_equation_assembly_t *
_set_block33_assembly_func(void)190 _set_block33_assembly_func(void)
191 {
192 #if defined(HAVE_MPI)
193 
194   if (cs_glob_n_ranks > 1) {  /* Parallel */
195 
196     if (cs_glob_n_threads < 2) /* Without OpenMP */
197       return cs_equation_assemble_eblock33_matrix_mpis;
198     else                      /* With OpenMP */
199       return cs_equation_assemble_eblock33_matrix_mpit;
200 
201   }
202 
203 #endif /* defined(HAVE_MPI) */
204 
205   if (cs_glob_n_ranks <= 1) {  /* Sequential */
206 
207     if (cs_glob_n_threads < 2) /* Without OpenMP */
208       return cs_equation_assemble_eblock33_matrix_seqs;
209     else                      /* With OpenMP */
210       return cs_equation_assemble_eblock33_matrix_seqt;
211 
212   }
213 
214   return NULL; /* Case not handled */
215 }
216 
217 /*----------------------------------------------------------------------------*/
218 /*!
219  * \brief  Choose which function will be used to perform the matrix assembly
220  *         Case of block NxN matrices.
221  *
222  * \return  a pointer to a function
223  */
224 /*----------------------------------------------------------------------------*/
225 
226 static inline cs_equation_assembly_t *
_set_block_assembly_func(void)227 _set_block_assembly_func(void)
228 {
229 #if defined(HAVE_MPI)
230 
231   if (cs_glob_n_ranks > 1) {  /* Parallel */
232 
233     if (cs_glob_n_threads < 2) /* Without OpenMP */
234       return cs_equation_assemble_eblock_matrix_mpis;
235     else                      /* With OpenMP */
236       return cs_equation_assemble_eblock_matrix_mpit;
237 
238   }
239 
240 #endif /* defined(HAVE_MPI) */
241 
242   if (cs_glob_n_ranks <= 1) {  /* Sequential */
243 
244     if (cs_glob_n_threads < 2) /* Without OpenMP */
245       return cs_equation_assemble_eblock_matrix_seqs;
246     else                      /* With OpenMP */
247       return cs_equation_assemble_eblock_matrix_seqt;
248 
249   }
250 
251   return NULL; /* Case not handled */
252 }
253 
254 /*----------------------------------------------------------------------------*/
255 /*!
256  * \brief Binary search for a given local id in a given array of
257  *        ordered local ids, when the id might not be present
258  *
259  * We assume the id is present in the array.
260  *
261  * \param[in]  start_id         begin search with this id
262  * \param[in]  l_id_array size  array_size
263  * \param[in]  l_id             local id to search for
264  * \param[in]  l_id_array       ordered unique local ids array
265  *
266  * \return  index of l_id in l_id_array, or -1 if not found
267  */
268 /*----------------------------------------------------------------------------*/
269 
270 static inline int
_l_binary_search(int start_id,int l_id_array_size,cs_lnum_t l_id,const cs_lnum_t l_id_array[])271 _l_binary_search(int              start_id,
272                  int              l_id_array_size,
273                  cs_lnum_t        l_id,
274                  const cs_lnum_t  l_id_array[])
275 {
276   assert(start_id > -1);
277   int  end_id = l_id_array_size - 1;
278 
279   while (start_id <= end_id) {
280 
281     const int  mid_id = (start_id + end_id)/2;
282     const cs_lnum_t test_val = l_id_array[mid_id];
283 
284     if (test_val < l_id)
285       start_id = mid_id + 1;
286     else if (test_val > l_id)
287       end_id = mid_id - 1;
288     else
289       return mid_id;
290 
291   }
292 
293   return -1; /* Not found */
294 }
295 
296 /*----------------------------------------------------------------------------*/
297 /*!
298  * \brief Binary search for a given global id in a given array of
299  *        ordered global ids
300  *
301  * We assume the id is present in the array.
302  *
303  * \param[in]  g_id_array size  array_size
304  * \param[in]  g_id             global id to search for
305  * \param[in]  g_id_array       ordered unique global ids array
306  *
307  * \return  index of g_id in g_id_array or -1 if not found.
308  */
309 /*----------------------------------------------------------------------------*/
310 
311 static inline int
_g_binary_search(int g_id_array_size,cs_gnum_t g_id,const cs_gnum_t g_id_array[])312 _g_binary_search(int              g_id_array_size,
313                  cs_gnum_t        g_id,
314                  const cs_gnum_t  g_id_array[])
315 {
316   int  start_id = 0;
317   int  end_id = g_id_array_size - 1;
318 
319   while (start_id <= end_id) {
320 
321     const int  mid_id = (end_id + start_id) / 2;
322     const cs_gnum_t mid_val = g_id_array[mid_id];
323 
324     if (mid_val < g_id)
325       start_id = mid_id + 1;
326     else if (mid_val > g_id)
327       end_id = mid_id - 1;
328     else
329       return mid_id;
330 
331   }
332 
333   return -1; /* Not found */
334 }
335 
336 /*----------------------------------------------------------------------------*/
337 /*!
338  * \brief Function pointer for adding values to a MSR matrix.
339  *
340  *  Specific case:
341  *        CDO schemes with no openMP and scalar-valued quantities
342  *
343  * \warning  The matrix pointer must point to valid data when the selection
344  *           function is called, so the life cycle of the data pointed to
345  *           should be at least as long as that of the assembler values
346  *           structure.
347  *
348  * \remark  Note that we pass column indexes (not ids) here; as the
349  *          caller is already assumed to have identified the index
350  *          matching a given column id.
351  *
352  * \param[in]      row         pointer to a cs_equation_assemble_row_t type
353  * \param[in, out] matrix_p    untyped pointer to matrix description structure
354  */
355 /*----------------------------------------------------------------------------*/
356 
357 inline static void
_add_scal_values_single(const cs_equation_assemble_row_t * row,void * matrix_p)358 _add_scal_values_single(const cs_equation_assemble_row_t   *row,
359                         void                               *matrix_p)
360 {
361   assert(row->l_id > -1);
362 
363   cs_matrix_t  *matrix = (cs_matrix_t *)matrix_p;
364   cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
365 
366   const cs_matrix_struct_csr_t  *ms = matrix->structure;
367 
368   /* Update the diagonal value */
369   mc->_d_val[row->l_id] += row->val[row->i];
370 
371   /* Update the extra-diagonal values */
372   cs_real_t  *xvals = mc->_x_val + ms->row_index[row->l_id];
373   for (int j = 0; j < row->i; j++) /* Lower part */
374     xvals[row->col_idx[j]] += row->val[j];
375   for (int j = row->i+1; j < row->n_cols; j++) /* Upper part */
376     xvals[row->col_idx[j]] += row->val[j];
377 }
378 
379 /*----------------------------------------------------------------------------*/
380 /*!
381  * \brief Function pointer for adding values to a MSR matrix.
382  *
383  *  Specific case:
384  *        CDO schemes with openMP atomic section and scalar-valued quantities
385  *
386  * \warning  The matrix pointer must point to valid data when the selection
387  *           function is called, so the life cycle of the data pointed to
388  *           should be at least as long as that of the assembler values
389  *           structure.
390  *
391  * \remark  Note that we pass column indexes (not ids) here; as the
392  *          caller is already assumed to have identified the index
393  *          matching a given column id.
394  *
395  * \param[in]      row         pointer to a cs_equation_assemble_row_t type
396  * \param[in, out] matrix_p    untyped pointer to matrix description structure
397  */
398 /*----------------------------------------------------------------------------*/
399 
400 inline static void
_add_scal_values_atomic(const cs_equation_assemble_row_t * row,void * matrix_p)401 _add_scal_values_atomic(const cs_equation_assemble_row_t   *row,
402                         void                               *matrix_p)
403 {
404   assert(row->l_id > -1);
405 
406   cs_matrix_t  *matrix = (cs_matrix_t *)matrix_p;
407   cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
408 
409   const cs_matrix_struct_csr_t  *ms = matrix->structure;
410 
411   /* Update the diagonal value */
412 # pragma omp atomic
413   mc->_d_val[row->l_id] += row->val[row->i];
414 
415   /* Update the extra-diagonal values */
416   cs_real_t  *xvals = mc->_x_val + ms->row_index[row->l_id];
417   for (int j = 0; j < row->n_cols; j++) {
418     if (j != row->i) {
419 #     pragma omp atomic
420       xvals[row->col_idx[j]] += row->val[j];
421     }
422   }
423 }
424 
425 /*----------------------------------------------------------------------------*/
426 /*!
427  * \brief Function pointer for adding values to a MSR matrix.
428  *
429  *  Specific case:
430  *        CDO schemes with openMP critical section and scalar-valued quantities
431  *
432  * \warning  The matrix pointer must point to valid data when the selection
433  *           function is called, so the life cycle of the data pointed to
434  *           should be at least as long as that of the assembler values
435  *           structure.
436  *
437  * \remark  Note that we pass column indexes (not ids) here; as the
438  *          caller is already assumed to have identified the index
439  *          matching a given column id.
440  *
441  * \param[in]      row         pointer to a cs_equation_assemble_row_t type
442  * \param[in, out] matrix_p    untyped pointer to matrix description structure
443  */
444 /*----------------------------------------------------------------------------*/
445 
446 inline static void
_add_scal_values_critical(const cs_equation_assemble_row_t * row,void * matrix_p)447 _add_scal_values_critical(const cs_equation_assemble_row_t   *row,
448                           void                               *matrix_p)
449 {
450   assert(row->l_id > -1);
451 
452   cs_matrix_t  *matrix = (cs_matrix_t *)matrix_p;
453   cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
454 
455   const cs_matrix_struct_csr_t  *ms = matrix->structure;
456 
457   /* Update the diagonal value */
458 # pragma omp critical
459   {
460     mc->_d_val[row->l_id] += row->val[row->i];
461 
462     /* Update the extra-diagonal values */
463     cs_real_t  *xvals = mc->_x_val + ms->row_index[row->l_id];
464     for (int j = 0; j < row->n_cols; j++)
465       if (j != row->i)
466         xvals[row->col_idx[j]] += row->val[j];
467   }
468 }
469 
470 /*----------------------------------------------------------------------------*/
471 /*!
472  * \brief Add values to a matrix assembler values structure using global
473  *        row and column ids.
474  *        Case where the row belong to the local rank and all its colums too.
475  *
476  * See \ref cs_matrix_assembler_values_add_g which performs the same operations
477  * In the specific case of CDO system, one assumes predefined choices in
478  * order to get a more optimized version of this function
479  *
480  * \param[in, out]  ma     pointer to matrix assembler values structure
481  * \param[in, out]  row    pointer to a cs_equation_assemble_row_t structure
482  */
483 /*----------------------------------------------------------------------------*/
484 
485 inline static void
_assemble_row_scal_l(const cs_matrix_assembler_t * ma,cs_equation_assemble_row_t * row)486 _assemble_row_scal_l(const cs_matrix_assembler_t     *ma,
487                      cs_equation_assemble_row_t      *row)
488 {
489   const cs_lnum_t  l_r_id = row->l_id; /* g_r_id - ma->l_range[0]; */
490   const cs_lnum_t  l_start = ma->r_idx[l_r_id], l_end = ma->r_idx[l_r_id+1];
491   const int  n_l_cols = l_end - l_start;
492   const cs_lnum_t  *col_ids = ma->c_id + l_start;
493 
494   /* Loop on columns to fill col_idx for extra-diag entries
495    * Diagonal is treated separately */
496   for (int j = 0; j < row->i; j++) { /* Lower part */
497     row->col_idx[j] = _l_binary_search(0,
498                                        n_l_cols,
499                          /* l_c_id */  row->col_g_id[j]-ma->l_range[0],
500                                        col_ids);
501     assert(row->col_idx[j] > -1);
502   }
503   for (int j = row->i + 1; j < row->n_cols; j++) { /* Upper part */
504     row->col_idx[j] = _l_binary_search(0,
505                                        n_l_cols,
506                          /* l_c_id */  row->col_g_id[j]-ma->l_range[0],
507                                        col_ids);
508     assert(row->col_idx[j] > -1);
509   }
510 }
511 
512 #if defined(HAVE_MPI)
513 
514 /*----------------------------------------------------------------------------*/
515 /*!
516  * \brief Add values to a matrix assembler values structure using global
517  *        row and column ids.
518  *        Case where the row belong to the local rank but a part of the columns
519  *        belong to a distant rank. Hence the naming *_ld
520  *
521  * See \ref cs_matrix_assembler_values_add_g which performs the same operations
522  * In the specific case of CDO system, one assumes predefined choices in
523  * order to get a more optimized version of this function
524  *
525  * \param[in, out]  ma     pointer to matrix assembler values structure
526  * \param[in, out]  row    pointer to a cs_equation_assemble_row_t structure
527  */
528 /*----------------------------------------------------------------------------*/
529 
530 inline static void
_assemble_row_scal_ld(const cs_matrix_assembler_t * ma,cs_equation_assemble_row_t * row)531 _assemble_row_scal_ld(const cs_matrix_assembler_t      *ma,
532                       cs_equation_assemble_row_t       *row)
533 {
534   assert(ma->d_r_idx != NULL); /* local-id-based function, need to adapt */
535 
536   const cs_lnum_t l_r_id = row->l_id; /* g_r_id - ma->l_range[0]; */
537   const cs_lnum_t l_start = ma->r_idx[l_r_id], l_end = ma->r_idx[l_r_id+1];
538   const cs_lnum_t d_start = ma->d_r_idx[l_r_id], d_end = ma->d_r_idx[l_r_id+1];
539   const int n_d_cols = d_end - d_start;
540   const int n_l_cols = l_end - l_start - n_d_cols;
541 
542   /* Loop on columns to fill col_idx for extra-diag entries */
543   for (int j = 0; j < row->i; j++) {
544 
545     const cs_gnum_t g_c_id = row->col_g_id[j];
546 
547     if (g_c_id >= ma->l_range[0] && g_c_id < ma->l_range[1]) { /* Local part */
548 
549       row->col_idx[j] = _l_binary_search(0,
550                                          n_l_cols,
551                                          g_c_id - ma->l_range[0], /* l_c_id */
552                                          ma->c_id + l_start);
553       assert(row->col_idx[j] > -1);
554 
555     }
556     else { /* Distant part */
557 
558       /* column ids start and end of local row, so add n_l_cols */
559       row->col_idx[j] = n_l_cols + _g_binary_search(n_d_cols,
560                                                     g_c_id,
561                                                     ma->d_g_c_id + d_start);
562 
563     }
564 
565   } /* Loop on columns of the row */
566 
567   for (int j = row->i + 1; j < row->n_cols; j++) {
568 
569     const cs_gnum_t g_c_id = row->col_g_id[j];
570 
571     if (g_c_id >= ma->l_range[0] && g_c_id < ma->l_range[1]) { /* Local part */
572 
573       row->col_idx[j] = _l_binary_search(0,
574                                          n_l_cols,
575                                          g_c_id-ma->l_range[0], /* l_c_id */
576                                          ma->c_id + l_start);
577       assert(row->col_idx[j] > -1);
578 
579     }
580     else { /* Distant part */
581 
582       /* column ids start and end of local row, so add n_l_cols */
583       row->col_idx[j] = n_l_cols + _g_binary_search(n_d_cols,
584                                                     g_c_id,
585                                                     ma->d_g_c_id + d_start);
586 
587     }
588 
589   } /* Loop on columns of the row */
590 
591 }
592 
593 /*----------------------------------------------------------------------------*/
594 /*!
595  * \brief Add values to a matrix assembler values structure using global
596  *        row and column ids.
597  *        Case where the row does not belong to the local rank.
598  *
599  * See \ref cs_matrix_assembler_values_add_g which performs the same operations
600  * In the specific case of CDO system, one assumes predefined choices in
601  * order to get a more optimized version of this function
602  *
603  * \param[in, out]  mav    pointer to matrix assembler values structure
604  * \param[in]       ma     pointer to matrix assembler values structure
605  * \param[in]       row    pointer to a cs_equation_assemble_row_t structure
606  */
607 /*----------------------------------------------------------------------------*/
608 
609 inline static void
_assemble_row_scal_dt(cs_matrix_assembler_values_t * mav,const cs_matrix_assembler_t * ma,const cs_equation_assemble_row_t * row)610 _assemble_row_scal_dt(cs_matrix_assembler_values_t         *mav,
611                       const cs_matrix_assembler_t          *ma,
612                       const cs_equation_assemble_row_t     *row)
613 {
614   /* Case where coefficient is handled by other rank. No need to call
615      add_values() function in this case */
616 
617   assert(row->g_id < ma->l_range[0] || row->g_id >= ma->l_range[1]);
618 
619   const cs_lnum_t  e_r_id = _g_binary_search(ma->coeff_send_n_rows,
620                                              row->g_id,
621                                              ma->coeff_send_row_g_id);
622   const cs_lnum_t  r_start = ma->coeff_send_index[e_r_id];
623   const int  n_e_rows = ma->coeff_send_index[e_r_id+1] - r_start;
624   const cs_gnum_t  *coeff_send_g_id = ma->coeff_send_col_g_id + r_start;
625 
626   /* Diagonal term */
627   const cs_lnum_t  e_diag_id = r_start + _g_binary_search(n_e_rows,
628                                                           row->g_id,
629                                                           coeff_send_g_id);
630 
631   /* Now add values to send coefficients */
632 # pragma omp atomic
633   mav->coeff_send[e_diag_id] += row->val[row->i];
634 
635   /* Loop on extra-diagonal entries */
636   for (int j = 0; j < row->i; j++) { /* Lower-part */
637 
638     const cs_lnum_t e_id = r_start + _g_binary_search(n_e_rows,
639                                                       row->col_g_id[j],
640                                                       coeff_send_g_id);
641 
642     /* Now add values to send coefficients */
643 #   pragma omp atomic
644     mav->coeff_send[e_id] += row->val[j];
645 
646   }
647 
648   for (int j = row->i + 1; j < row->n_cols; j++) { /* Upper-part */
649 
650     const cs_lnum_t e_id = r_start + _g_binary_search(n_e_rows,
651                                                       row->col_g_id[j],
652                                                       coeff_send_g_id);
653 
654     /* Now add values to send coefficients */
655 #   pragma omp atomic
656     mav->coeff_send[e_id] += row->val[j];
657 
658   }
659 }
660 
661 /*----------------------------------------------------------------------------*/
662 /*!
663  * \brief Add values to a matrix assembler values structure using global
664  *        row and column ids.
665  *        Case where the row does not belong to the local rank. No openMP.
666  *
667  * See \ref cs_matrix_assembler_values_add_g which performs the same operations
668  * In the specific case of CDO system, one assumes predefined choices in
669  * order to get a more optimized version of this function
670  *
671  * \param[in, out]  mav    pointer to matrix assembler values structure
672  * \param[in]       ma     pointer to matrix assembler values structure
673  * \param[in]       row    pointer to a cs_equation_assemble_row_t structure
674  */
675 /*----------------------------------------------------------------------------*/
676 
677 inline static void
_assemble_row_scal_ds(cs_matrix_assembler_values_t * mav,const cs_matrix_assembler_t * ma,const cs_equation_assemble_row_t * row)678 _assemble_row_scal_ds(cs_matrix_assembler_values_t         *mav,
679                       const cs_matrix_assembler_t          *ma,
680                       const cs_equation_assemble_row_t     *row)
681 {
682   /* Case where coefficient is handled by other rank. No need to call
683      add_values() function in this case */
684 
685   assert(row->g_id < ma->l_range[0] || row->g_id >= ma->l_range[1]);
686 
687   const cs_lnum_t  e_r_id = _g_binary_search(ma->coeff_send_n_rows,
688                                              row->g_id,
689                                              ma->coeff_send_row_g_id);
690   const cs_lnum_t  r_start = ma->coeff_send_index[e_r_id];
691   const int  n_e_rows = ma->coeff_send_index[e_r_id+1] - r_start;
692   const cs_gnum_t  *coeff_send_g_id = ma->coeff_send_col_g_id + r_start;
693 
694   /* Diagonal term */
695   const cs_lnum_t  e_diag_id = r_start + _g_binary_search(n_e_rows,
696                                                           row->g_id,
697                                                           coeff_send_g_id);
698 
699   /* Now add values to send coefficients */
700   mav->coeff_send[e_diag_id] += row->val[row->i];
701 
702   /* Loop on extra-diagonal entries */
703   for (int j = 0; j < row->i; j++) { /* Lower-part */
704 
705     const cs_lnum_t e_id = r_start + _g_binary_search(n_e_rows,
706                                                       row->col_g_id[j],
707                                                       coeff_send_g_id);
708 
709     /* Now add values to send coefficients */
710     mav->coeff_send[e_id] += row->val[j];
711 
712   }
713 
714   for (int j = row->i + 1; j < row->n_cols; j++) { /* Upper-part */
715 
716     const cs_lnum_t e_id = r_start + _g_binary_search(n_e_rows,
717                                                       row->col_g_id[j],
718                                                       coeff_send_g_id);
719 
720     /* Now add values to send coefficients */
721     mav->coeff_send[e_id] += row->val[j];
722 
723   }
724 }
725 
726 #endif /* defined(HAVE_MPI) */
727 
728 /*============================================================================
729  * Private function definitions
730  *============================================================================*/
731 
732 /*----------------------------------------------------------------------------*/
733 /*!
734  * \brief  Allocate and initialize a cs_equation_assemble_t structure
735  *
736  * \param[in]  max_ddim        max. dim of the diagonal entries
737  * \param[in]  max_edim        max. dim of the extra-diagonal entries
738  * \param[in]  n_max_cw_dofs   max. number of DoF to be handled
739  *
740  * \return a pointer to a new allocated cs_equation_assemble_t structure
741  */
742 /*----------------------------------------------------------------------------*/
743 
744 static cs_equation_assemble_t *
_init_equation_assembler_struct(int max_ddim,int max_edim,int n_max_cw_dofs)745 _init_equation_assembler_struct(int      max_ddim,
746                                 int      max_edim,
747                                 int      n_max_cw_dofs)
748 {
749   cs_equation_assemble_t  *eqa = NULL;
750 
751   BFT_MALLOC(eqa, 1, cs_equation_assemble_t);
752 
753   eqa->ddim = max_ddim;
754   eqa->edim = max_edim;
755 
756   BFT_MALLOC(eqa->row, 1, cs_equation_assemble_row_t);
757   if (max_ddim < 2) {
758     BFT_MALLOC(eqa->row->col_g_id, n_max_cw_dofs, cs_gnum_t);
759     BFT_MALLOC(eqa->row->col_idx, n_max_cw_dofs, int);
760   }
761   else {
762     n_max_cw_dofs *= max_ddim; /* Temporary (until the global matrix is not
763                                   defined by block) */
764 
765     BFT_MALLOC(eqa->row->col_g_id, n_max_cw_dofs, cs_gnum_t);
766     BFT_MALLOC(eqa->row->col_idx, n_max_cw_dofs, int);
767     BFT_MALLOC(eqa->row->expval, max_ddim*n_max_cw_dofs, cs_real_t);
768   }
769 
770   return eqa;
771 }
772 
773 /*----------------------------------------------------------------------------*/
774 /*!
775  * \brief  Free a cs_equation_assemble_t structure
776  *
777  * \param[in, out]  p_eqa    pointer to a structure pointer to be freed
778  */
779 /*----------------------------------------------------------------------------*/
780 
781 static void
_free_equation_assembler_struct(cs_equation_assemble_t ** p_eqa)782 _free_equation_assembler_struct(cs_equation_assemble_t  **p_eqa)
783 {
784   if (*p_eqa == NULL)
785     return;
786 
787   cs_equation_assemble_t  *eqa = *p_eqa;
788 
789   if (eqa->ddim > 1)
790     BFT_FREE(eqa->row->expval);
791   BFT_FREE(eqa->row->col_g_id);
792   BFT_FREE(eqa->row->col_idx);
793   BFT_FREE(eqa->row);
794 
795   BFT_FREE(eqa);
796   *p_eqa = NULL;
797 }
798 
799 /*----------------------------------------------------------------------------*/
800 /*!
801  * \brief  Allocate and define a cs_matrix_assembler_t structure
802  *
803  * \param[in]  n_elts     number of elements
804  * \param[in]  n_dofbyx   number of DoFs by element
805  * \param[in]  x2x        pointer to a cs_adjacency_t structure
806  * \param[in]  rs         pointer to a range set or NULL if sequential
807  *
808  * \return a pointer to a new allocated cs_matrix_assembler_t structure
809  */
810 /*----------------------------------------------------------------------------*/
811 
812 static cs_matrix_assembler_t *
_build_matrix_assembler(cs_lnum_t n_elts,int n_dofbyx,const cs_adjacency_t * x2x,const cs_range_set_t * rs)813 _build_matrix_assembler(cs_lnum_t                n_elts,
814                         int                      n_dofbyx,
815                         const cs_adjacency_t    *x2x,
816                         const cs_range_set_t    *rs)
817 {
818   cs_gnum_t  *grows = NULL, *gcols = NULL;
819 
820   /* The second paramter is set to "true" meaning that the diagonal is stored
821      separately --> MSR storage */
822   cs_matrix_assembler_t  *ma = cs_matrix_assembler_create(rs->l_range, true);
823 
824   /* First loop to count max size of the buffer */
825   cs_lnum_t  max_size = 0;
826   for (cs_lnum_t id = 0; id < n_elts; id++)
827     max_size = CS_MAX(max_size, x2x->idx[id+1] - x2x->idx[id]);
828 
829   /* We increment max_size to take into account the diagonal entry */
830   int  buf_size = n_dofbyx * n_dofbyx * (max_size + 1);
831   BFT_MALLOC(grows, buf_size, cs_gnum_t);
832   BFT_MALLOC(gcols, buf_size, cs_gnum_t);
833 
834   if (n_dofbyx == 1)  { /* Simplified version */
835 
836     for (cs_lnum_t row_id = 0; row_id < n_elts; row_id++) {
837 
838       const cs_gnum_t  grow_id = rs->g_id[row_id];
839       const cs_lnum_t  start = x2x->idx[row_id];
840       const cs_lnum_t  end = x2x->idx[row_id+1];
841 
842       /* Diagonal term is excluded in this connectivity. Add it "manually" */
843       grows[0] = grow_id, gcols[0] = grow_id;
844 
845       /* Extra diagonal couples */
846       for (cs_lnum_t j = start, i = 1; j < end; j++, i++) {
847         grows[i] = grow_id;
848         gcols[i] = rs->g_id[x2x->ids[j]];
849       }
850 
851       cs_matrix_assembler_add_g_ids(ma, end - start + 1, grows, gcols);
852 
853     } /* Loop on entities */
854 
855   }
856   else {
857 
858     for (cs_lnum_t row_id = 0; row_id < n_elts; row_id++) {
859 
860       const cs_lnum_t  start = x2x->idx[row_id];
861       const cs_lnum_t  end = x2x->idx[row_id+1];
862       const int  n_entries = (end - start + 1) * n_dofbyx * n_dofbyx;
863       const cs_gnum_t  *grow_ids = rs->g_id + row_id*n_dofbyx;
864 
865       int shift = 0;
866 
867       /* Diagonal term is excluded in this connectivity. Add it "manually" */
868       for (int dof_i = 0; dof_i < n_dofbyx; dof_i++) {
869         const cs_gnum_t  grow_id = grow_ids[dof_i];
870         for (int dof_j = 0; dof_j < n_dofbyx; dof_j++) {
871           grows[shift] = grow_id;
872           gcols[shift] = grow_ids[dof_j];
873           shift++;
874         }
875       }
876 
877       /* Extra diagonal couples */
878       for (cs_lnum_t j = start; j < end; j++) {
879 
880         const cs_lnum_t  col_id = x2x->ids[j];
881         const cs_gnum_t  *gcol_ids = rs->g_id + col_id*n_dofbyx;
882 
883         for (int dof_i = 0; dof_i < n_dofbyx; dof_i++) {
884           const cs_gnum_t  grow_id = grow_ids[dof_i];
885           for (int dof_j = 0; dof_j < n_dofbyx; dof_j++) {
886             grows[shift] = grow_id;
887             gcols[shift] = gcol_ids[dof_j];
888             shift++;
889           }
890         }
891 
892       } /* Loop on number of DoFs by entity */
893 
894       assert(shift == n_entries);
895       cs_matrix_assembler_add_g_ids(ma, n_entries, grows, gcols);
896 
897     } /* Loop on entities */
898 
899   }
900 
901   /* Now compute structure */
902   cs_matrix_assembler_compute(ma);
903 
904   /* Free temporary buffers */
905   BFT_FREE(grows);
906   BFT_FREE(gcols);
907 
908   return ma;
909 }
910 
911 /*! (DOXYGEN_SHOULD_SKIP_THIS) \endcond */
912 
913 /*============================================================================
914  * Public function definitions
915  *============================================================================*/
916 
917 /*----------------------------------------------------------------------------*/
918 /*!
919  * \brief  Retrieve the pointer to a requested \ref cs_matrix_structure_t
920  *         structure
921  *
922  * \param[in]  flag_id       id in the array of matrix structures
923  *
924  * \return  a pointer to a cs_matrix_structure_t
925  */
926 /*----------------------------------------------------------------------------*/
927 
928 cs_matrix_structure_t *
cs_equation_get_matrix_structure(int flag)929 cs_equation_get_matrix_structure(int  flag)
930 {
931   if (cs_equation_assemble_ms == NULL || flag < 0)
932     return NULL;
933 
934   if (flag < CS_CDO_CONNECT_N_CASES)
935     return cs_equation_assemble_ms[flag];
936   else
937     return NULL;
938 }
939 
940 /*----------------------------------------------------------------------------*/
941 /*!
942  * \brief  Get a pointer to a cs_equation_assemble_t structure related
943  *         to a given thread
944  *
945  * \param[in]  t_id    id in the array of pointer
946  *
947  * \return a pointer to a cs_equation_assemble_t structure
948  */
949 /*----------------------------------------------------------------------------*/
950 
951 cs_equation_assemble_t *
cs_equation_assemble_get(int t_id)952 cs_equation_assemble_get(int    t_id)
953 {
954   if (t_id < 0 || t_id >= cs_glob_n_threads)
955     return NULL;
956 
957   return cs_equation_assemble[t_id];
958 }
959 
960 /*----------------------------------------------------------------------------*/
961 /*!
962  * \brief  Allocate and initialize matrix-related structures according to
963  *         the type of discretization used for this simulation
964  *
965  * \param[in]  connect      pointer to a cs_cdo_connect_t structure
966  * \param[in]  time_step    pointer to a time step structure
967  * \param[in]  eb_flag      metadata for Edge-based schemes
968  * \param[in]  fb_flag      metadata for Face-based schemes
969  * \param[in]  vb_flag      metadata for Vertex-based schemes
970  * \param[in]  vcb_flag     metadata for Vertex+Cell-basde schemes
971  * \param[in]  hho_flag     metadata for HHO schemes
972  */
973 /*----------------------------------------------------------------------------*/
974 
975 void
cs_equation_assemble_init(const cs_cdo_connect_t * connect,cs_flag_t eb_flag,cs_flag_t fb_flag,cs_flag_t vb_flag,cs_flag_t vcb_flag,cs_flag_t hho_flag)976 cs_equation_assemble_init(const cs_cdo_connect_t       *connect,
977                           cs_flag_t                     eb_flag,
978                           cs_flag_t                     fb_flag,
979                           cs_flag_t                     vb_flag,
980                           cs_flag_t                     vcb_flag,
981                           cs_flag_t                     hho_flag)
982 {
983   assert(connect != NULL); /* Sanity check */
984 
985   cs_timer_t t0, t1;
986   CS_TIMER_COUNTER_INIT(cs_equation_ms_time);
987 
988   /* Two types of matrix assembler are considered:
989    *  - The one related to matrix based on vertices
990    *  - The one related to matrix based on faces
991    */
992   BFT_MALLOC(cs_equation_assemble_ma,
993              CS_CDO_CONNECT_N_CASES, cs_matrix_assembler_t *);
994   for (int i = 0; i < CS_CDO_CONNECT_N_CASES; i++)
995     cs_equation_assemble_ma[i] = NULL;
996 
997   BFT_MALLOC(cs_equation_assemble_ms,
998              CS_CDO_CONNECT_N_CASES, cs_matrix_structure_t *);
999   for (int i = 0; i < CS_CDO_CONNECT_N_CASES; i++)
1000     cs_equation_assemble_ms[i] = NULL;
1001 
1002   const cs_lnum_t  n_faces = connect->n_faces[CS_ALL_FACES];
1003   const cs_lnum_t  n_vertices = connect->n_vertices;
1004   const cs_lnum_t  n_edges = connect->n_edges;
1005 
1006   /* Allocate shared buffer and initialize shared structures
1007      Number max of DoFs at the cell level: n_max_cw_dofs
1008      Greatest dimension of the diagonal block: max_ddim
1009      Greatest dimension of the extra-diagonal block: max_edim
1010    */
1011   int  n_max_cw_dofs = 0, max_ddim = 1, max_edim = 1;
1012 
1013   /* Allocate and initialize matrix assembler and matrix structures */
1014   if (vb_flag > 0 || vcb_flag > 0) {
1015 
1016     const cs_adjacency_t  *v2v = connect->v2v;
1017 
1018     assert(v2v != NULL);
1019     n_max_cw_dofs = CS_MAX(n_max_cw_dofs, connect->n_max_vbyc);
1020 
1021     if (vb_flag & CS_FLAG_SCHEME_SCALAR || vcb_flag & CS_FLAG_SCHEME_SCALAR) {
1022 
1023       t0 = cs_timer_time();
1024 
1025       /* Build the matrix structure and the matrix assembler structure */
1026       const cs_range_set_t  *rs = connect->range_sets[CS_CDO_CONNECT_VTX_SCAL];
1027 
1028       cs_matrix_assembler_t  *ma = _build_matrix_assembler(n_vertices, 1, v2v,
1029                                                            rs);
1030       cs_matrix_structure_t  *ms
1031         = cs_matrix_structure_create_from_assembler(CS_MATRIX_MSR, ma);
1032 
1033       cs_equation_assemble_ma[CS_CDO_CONNECT_VTX_SCAL] = ma;
1034       cs_equation_assemble_ms[CS_CDO_CONNECT_VTX_SCAL] = ms;
1035 
1036       t1 = cs_timer_time();
1037       cs_timer_counter_add_diff(&cs_equation_ms_time, &t0, &t1);
1038 
1039     } /* scalar-valued DoFs */
1040 
1041     if (vb_flag & CS_FLAG_SCHEME_VECTOR || vcb_flag & CS_FLAG_SCHEME_VECTOR) {
1042 
1043       t0 = cs_timer_time();
1044 
1045       const cs_range_set_t  *rs = connect->range_sets[CS_CDO_CONNECT_VTX_VECT];
1046 
1047       cs_matrix_assembler_t  *ma = _build_matrix_assembler(n_vertices, 3, v2v,
1048                                                            rs);
1049       cs_matrix_structure_t  *ms
1050         = cs_matrix_structure_create_from_assembler(CS_MATRIX_MSR, ma);
1051 
1052       cs_equation_assemble_ma[CS_CDO_CONNECT_VTX_VECT] = ma;
1053       cs_equation_assemble_ms[CS_CDO_CONNECT_VTX_VECT] = ms;
1054 
1055       t1 = cs_timer_time();
1056       cs_timer_counter_add_diff(&cs_equation_ms_time, &t0, &t1);
1057 
1058       max_ddim = CS_MAX(max_ddim, 3);
1059       max_edim = CS_MAX(max_edim, 3);
1060 
1061     } /* vector-valued DoFs */
1062 
1063   } /* Vertex-based schemes and related ones */
1064 
1065   /* Allocate and initialize matrix assembler and matrix structures */
1066   if (eb_flag > 0) {
1067 
1068     const cs_adjacency_t  *e2e = connect->e2e;
1069 
1070     assert(e2e != NULL);
1071     n_max_cw_dofs = CS_MAX(n_max_cw_dofs, connect->n_max_ebyc);
1072 
1073     if (eb_flag & CS_FLAG_SCHEME_SCALAR) {
1074 
1075       t0 = cs_timer_time();
1076 
1077       /* Build the matrix structure and the matrix assembler structure */
1078       const cs_range_set_t  *rs = connect->range_sets[CS_CDO_CONNECT_EDGE_SCAL];
1079 
1080       cs_matrix_assembler_t  *ma = _build_matrix_assembler(n_edges, 1, e2e, rs);
1081       cs_matrix_structure_t  *ms
1082         = cs_matrix_structure_create_from_assembler(CS_MATRIX_MSR, ma);
1083 
1084       cs_equation_assemble_ma[CS_CDO_CONNECT_EDGE_SCAL] = ma;
1085       cs_equation_assemble_ms[CS_CDO_CONNECT_EDGE_SCAL] = ms;
1086 
1087       t1 = cs_timer_time();
1088       cs_timer_counter_add_diff(&cs_equation_ms_time, &t0, &t1);
1089 
1090     } /* scalar-valued DoFs */
1091 
1092   } /* Edge-based schemes and related ones */
1093 
1094   if (fb_flag > 0 || hho_flag > 0) {
1095 
1096     cs_matrix_structure_t  *ms0 = NULL, *ms1 = NULL, *ms2 = NULL;
1097     cs_matrix_assembler_t  *ma0 = NULL, *ma1 = NULL, *ma2 = NULL;
1098 
1099     n_max_cw_dofs = CS_MAX(n_max_cw_dofs, connect->n_max_fbyc);
1100 
1101     const cs_adjacency_t  *f2f = connect->f2f;
1102 
1103     if (cs_flag_test(fb_flag, CS_FLAG_SCHEME_POLY0 | CS_FLAG_SCHEME_SCALAR) ||
1104         cs_flag_test(hho_flag, CS_FLAG_SCHEME_POLY0 | CS_FLAG_SCHEME_SCALAR)) {
1105 
1106       t0 = cs_timer_time();
1107 
1108       const cs_range_set_t  *rs = connect->range_sets[CS_CDO_CONNECT_FACE_SP0];
1109 
1110       ma0 = _build_matrix_assembler(n_faces, 1, f2f, rs);
1111       ms0 = cs_matrix_structure_create_from_assembler(CS_MATRIX_MSR, ma0);
1112 
1113       cs_equation_assemble_ma[CS_CDO_CONNECT_FACE_SP0] = ma0;
1114       cs_equation_assemble_ms[CS_CDO_CONNECT_FACE_SP0] = ms0;
1115 
1116       t1 = cs_timer_time();
1117       cs_timer_counter_add_diff(&cs_equation_ms_time, &t0, &t1);
1118 
1119     } /* Scalar-valued CDO-Fb or HHO-P0 */
1120 
1121     if (cs_flag_test(fb_flag, CS_FLAG_SCHEME_POLY0 | CS_FLAG_SCHEME_VECTOR) ||
1122         cs_flag_test(hho_flag, CS_FLAG_SCHEME_POLY1 | CS_FLAG_SCHEME_SCALAR) ||
1123         cs_flag_test(hho_flag, CS_FLAG_SCHEME_POLY0 | CS_FLAG_SCHEME_VECTOR)) {
1124 
1125       t0 = cs_timer_time();
1126 
1127       const cs_range_set_t  *rs = connect->range_sets[CS_CDO_CONNECT_FACE_SP1];
1128 
1129       ma1 = _build_matrix_assembler(n_faces, CS_N_FACE_DOFS_1ST, f2f, rs);
1130       ms1 = cs_matrix_structure_create_from_assembler(CS_MATRIX_MSR, ma1);
1131 
1132       assert((CS_CDO_CONNECT_FACE_SP1 == CS_CDO_CONNECT_FACE_VP0) &&
1133              (CS_CDO_CONNECT_FACE_SP1 == CS_CDO_CONNECT_FACE_VHP0));
1134 
1135       cs_equation_assemble_ma[CS_CDO_CONNECT_FACE_SP1] = ma1;
1136       cs_equation_assemble_ms[CS_CDO_CONNECT_FACE_SP1] = ms1;
1137 
1138       t1 = cs_timer_time();
1139       cs_timer_counter_add_diff(&cs_equation_ms_time, &t0, &t1);
1140 
1141       max_ddim = CS_MAX(max_ddim, 3);
1142       max_edim = CS_MAX(max_edim, 3);
1143 
1144     } /* Vector CDO-Fb or HHO-P1 or vector HHO-P0 */
1145 
1146     if (cs_flag_test(hho_flag,
1147                      CS_FLAG_SCHEME_POLY2 | CS_FLAG_SCHEME_SCALAR)) {
1148 
1149       t0 = cs_timer_time();
1150 
1151       const cs_range_set_t  *rs = connect->range_sets[CS_CDO_CONNECT_FACE_SP2];
1152 
1153       ma2 = _build_matrix_assembler(n_faces, CS_N_FACE_DOFS_2ND, f2f, rs);
1154       ms2 = cs_matrix_structure_create_from_assembler(CS_MATRIX_MSR, ma2);
1155 
1156       cs_equation_assemble_ma[CS_CDO_CONNECT_FACE_SP2] = ma2;
1157       cs_equation_assemble_ms[CS_CDO_CONNECT_FACE_SP2] = ms2;
1158 
1159       t1 = cs_timer_time();
1160       cs_timer_counter_add_diff(&cs_equation_ms_time, &t0, &t1);
1161 
1162       max_ddim = CS_MAX(max_ddim, CS_N_FACE_DOFS_2ND);
1163       max_edim = CS_MAX(max_edim, CS_N_FACE_DOFS_2ND);
1164 
1165     }
1166 
1167     /* For vector equations and HHO */
1168     if (cs_flag_test(hho_flag, CS_FLAG_SCHEME_VECTOR | CS_FLAG_SCHEME_POLY1) ||
1169         cs_flag_test(hho_flag, CS_FLAG_SCHEME_VECTOR | CS_FLAG_SCHEME_POLY2)) {
1170 
1171       if  (hho_flag & CS_FLAG_SCHEME_POLY1) {
1172 
1173         t0 = cs_timer_time();
1174 
1175         const cs_range_set_t  *rs
1176           = connect->range_sets[CS_CDO_CONNECT_FACE_VHP1];
1177 
1178         ma1 = _build_matrix_assembler(n_faces, 3*CS_N_FACE_DOFS_1ST, f2f, rs);
1179         ms1 = cs_matrix_structure_create_from_assembler(CS_MATRIX_MSR, ma1);
1180 
1181         cs_equation_assemble_ma[CS_CDO_CONNECT_FACE_VHP1] = ma1;
1182         cs_equation_assemble_ms[CS_CDO_CONNECT_FACE_VHP1] = ms1;
1183 
1184         t1 = cs_timer_time();
1185         cs_timer_counter_add_diff(&cs_equation_ms_time, &t0, &t1);
1186 
1187         max_ddim = CS_MAX(max_ddim, 3*CS_N_FACE_DOFS_1ST);
1188         max_edim = CS_MAX(max_edim, 3*CS_N_FACE_DOFS_1ST);
1189 
1190       }
1191       else if  (hho_flag & CS_FLAG_SCHEME_POLY2) {
1192 
1193         t0 = cs_timer_time();
1194 
1195         const cs_range_set_t  *rs
1196           = connect->range_sets[CS_CDO_CONNECT_FACE_VHP2];
1197 
1198         ma2 = _build_matrix_assembler(n_faces, 3*CS_N_FACE_DOFS_2ND, f2f, rs);
1199         ms2 = cs_matrix_structure_create_from_assembler(CS_MATRIX_MSR, ma2);
1200 
1201         cs_equation_assemble_ma[CS_CDO_CONNECT_FACE_VHP2] = ma2;
1202         cs_equation_assemble_ms[CS_CDO_CONNECT_FACE_VHP2] = ms2;
1203 
1204         t1 = cs_timer_time();
1205         cs_timer_counter_add_diff(&cs_equation_ms_time, &t0, &t1);
1206 
1207         /* 18 = 3*6 =  3*CS_N_FACE_DOFS_2ND */
1208         max_ddim = CS_MAX(max_ddim, 3*CS_N_FACE_DOFS_2ND);
1209         max_edim = CS_MAX(max_edim, 3*CS_N_FACE_DOFS_2ND);
1210 
1211       }
1212 
1213     }
1214 
1215   } /* Face-based schemes (CDO or HHO) */
1216 
1217   /* Common buffers for assemble usage */
1218   const int  n_threads = cs_glob_n_threads;
1219   BFT_MALLOC(cs_equation_assemble, n_threads, cs_equation_assemble_t *);
1220   for (int i = 0; i < n_threads; i++)
1221     cs_equation_assemble[i] = NULL;
1222 
1223 #if defined(HAVE_OPENMP) /* Determine the default number of OpenMP threads */
1224 #pragma omp parallel
1225   {
1226     /* Each thread allocate its part. This yields a better memory affinity */
1227     int  t_id = omp_get_thread_num();
1228     cs_equation_assemble[t_id] = _init_equation_assembler_struct(max_ddim,
1229                                                                  max_edim,
1230                                                                  n_max_cw_dofs);
1231   }
1232 #else
1233   cs_equation_assemble[0] = _init_equation_assembler_struct(max_ddim,
1234                                                             max_edim,
1235                                                             n_max_cw_dofs);
1236 #endif
1237 }
1238 
1239 /*----------------------------------------------------------------------------*/
1240 /*!
1241  * \brief  Free matrix-related structures used during the simulation.
1242  *         Display overall statistic about the assembly stage for CDO schemes
1243  */
1244 /*----------------------------------------------------------------------------*/
1245 
1246 void
cs_equation_assemble_finalize(void)1247 cs_equation_assemble_finalize(void)
1248 {
1249   /* Display profiling/performance information */
1250   cs_log_printf(CS_LOG_PERFORMANCE, " <CDO/Assembly> structure: %5.3e\n",
1251                 cs_equation_ms_time.nsec*1e-9);
1252 
1253   /* Free common assemble buffers */
1254 #if defined(HAVE_OPENMP) /* Determine the default number of OpenMP threads */
1255 #pragma omp parallel
1256   {
1257     int  t_id = omp_get_thread_num();
1258     cs_equation_assemble_t  *eqa = cs_equation_assemble[t_id];
1259     _free_equation_assembler_struct(&eqa);
1260   }
1261 #else
1262   cs_equation_assemble_t  *eqa = cs_equation_assemble[0];
1263   _free_equation_assembler_struct(&eqa);
1264 #endif
1265   BFT_FREE(cs_equation_assemble);
1266 
1267   /* Free matrix structures */
1268   for (int i = 0; i < CS_CDO_CONNECT_N_CASES; i++)
1269     cs_matrix_structure_destroy(&(cs_equation_assemble_ms[i]));
1270   BFT_FREE(cs_equation_assemble_ms);
1271 
1272   /* Free matrix assemblers */
1273   for (int i = 0; i < CS_CDO_CONNECT_N_CASES; i++)
1274     cs_matrix_assembler_destroy(&(cs_equation_assemble_ma[i]));
1275   BFT_FREE(cs_equation_assemble_ma);
1276 }
1277 
1278 /*----------------------------------------------------------------------------*/
1279 /*!
1280  * \brief  Define the function pointer used to assemble the algebraic system
1281  *
1282  * \param[in] scheme     space discretization scheme
1283  * \param[in] ma_id      id in the array of matrix assembler
1284  *
1285  * \return a function pointer cs_equation_assembly_t
1286  */
1287 /*----------------------------------------------------------------------------*/
1288 
1289 cs_equation_assembly_t *
cs_equation_assemble_set(cs_param_space_scheme_t scheme,int ma_id)1290 cs_equation_assemble_set(cs_param_space_scheme_t    scheme,
1291                          int                        ma_id)
1292 {
1293   switch (scheme) {
1294 
1295   case CS_SPACE_SCHEME_CDOVB:
1296     if (ma_id == CS_CDO_CONNECT_VTX_SCAL)
1297       return _set_scalar_assembly_func();
1298     else if (ma_id == CS_CDO_CONNECT_VTX_VECT)
1299       return _set_block33_assembly_func();
1300     break;
1301 
1302   case CS_SPACE_SCHEME_CDOVCB:
1303     if (ma_id == CS_CDO_CONNECT_VTX_SCAL)
1304       return _set_scalar_assembly_func();
1305     break;
1306 
1307   case CS_SPACE_SCHEME_HHO_P0:
1308   case CS_SPACE_SCHEME_CDOFB:
1309     if (ma_id == CS_CDO_CONNECT_FACE_SP0)
1310       return _set_scalar_assembly_func();
1311     else if (ma_id == CS_CDO_CONNECT_FACE_VP0)
1312       return _set_block33_assembly_func();
1313     break;
1314 
1315   case CS_SPACE_SCHEME_HHO_P1:
1316   case CS_SPACE_SCHEME_HHO_P2:
1317     if (ma_id == CS_CDO_CONNECT_FACE_SP1)
1318       return _set_block33_assembly_func();
1319     else
1320       return _set_block_assembly_func();
1321     break;
1322 
1323   case CS_SPACE_SCHEME_CDOEB:
1324     if (ma_id == CS_CDO_CONNECT_EDGE_SCAL)
1325       return _set_scalar_assembly_func();
1326     break;
1327 
1328   default:
1329     return NULL; /* Case not handle */
1330   }
1331 
1332   return NULL;
1333 }
1334 
1335 #if defined(HAVE_MPI)
1336 
1337 /*----------------------------------------------------------------------------*/
1338 /*!
1339  * \brief  Assemble a cellwise matrix into the global matrix
1340  *         Scalar-valued case. Parallel and with openMP threading.
1341  *
1342  * \param[in]      m        cellwise view of the algebraic system
1343  * \param[in]      dof_ids  local DoF numbering
1344  * \param[in]      rset     pointer to a cs_range_set_t structure
1345  * \param[in, out] eqa      pointer to a matrix assembler buffers
1346  * \param[in, out] mav      pointer to a matrix assembler structure
1347  */
1348 /*----------------------------------------------------------------------------*/
1349 
1350 void
cs_equation_assemble_matrix_mpit(const cs_sdm_t * m,const cs_lnum_t * dof_ids,const cs_range_set_t * rset,cs_equation_assemble_t * eqa,cs_matrix_assembler_values_t * mav)1351 cs_equation_assemble_matrix_mpit(const cs_sdm_t                   *m,
1352                                  const cs_lnum_t                  *dof_ids,
1353                                  const cs_range_set_t             *rset,
1354                                  cs_equation_assemble_t           *eqa,
1355                                  cs_matrix_assembler_values_t     *mav)
1356 {
1357   const cs_matrix_assembler_t  *ma = mav->ma;
1358 
1359   cs_equation_assemble_row_t  *row = eqa->row;
1360 
1361   row->n_cols = m->n_rows;
1362 
1363   /* Switch to the global numbering */
1364   for (int i = 0; i < row->n_cols; i++)
1365     row->col_g_id[i] = rset->g_id[dof_ids[i]];
1366 
1367   /* Push each row of the cellwise matrix into the assembler */
1368   for (int i = 0; i < row->n_cols; i++) {
1369 
1370     row->i = i;                               /* cellwise numbering */
1371     row->g_id = row->col_g_id[i];             /* global numbering */
1372     row->l_id = row->g_id - rset->l_range[0]; /* range set numbering */
1373     row->val = m->val + i*row->n_cols;
1374 
1375     if (row->l_id < 0 || row->l_id >= rset->n_elts[0])
1376       _assemble_row_scal_dt(mav, ma, row);
1377 
1378     else {
1379 
1380       _assemble_row_scal_ld(ma, row);
1381 
1382 #if CS_CDO_OMP_SYNC_SECTIONS > 0 /* OpenMP with critical section */
1383       _add_scal_values_critical(row, mav->matrix);
1384 #else
1385       _add_scal_values_atomic(row, mav->matrix);
1386 #endif
1387 
1388     }
1389 
1390   }
1391 
1392 }
1393 
1394 /*----------------------------------------------------------------------------*/
1395 /*!
1396  * \brief  Assemble a cellwise matrix into the global matrix
1397  *         Scalar-valued case. Parallel without openMP threading.
1398  *
1399  * \param[in]      m        cellwise view of the algebraic system
1400  * \param[in]      dof_ids  local DoF numbering
1401  * \param[in]      rset     pointer to a cs_range_set_t structure
1402  * \param[in, out] eqa      pointer to a matrix assembler buffers
1403  * \param[in, out] mav      pointer to a matrix assembler structure
1404  */
1405 /*----------------------------------------------------------------------------*/
1406 
1407 void
cs_equation_assemble_matrix_mpis(const cs_sdm_t * m,const cs_lnum_t * dof_ids,const cs_range_set_t * rset,cs_equation_assemble_t * eqa,cs_matrix_assembler_values_t * mav)1408 cs_equation_assemble_matrix_mpis(const cs_sdm_t                   *m,
1409                                  const cs_lnum_t                  *dof_ids,
1410                                  const cs_range_set_t             *rset,
1411                                  cs_equation_assemble_t           *eqa,
1412                                  cs_matrix_assembler_values_t     *mav)
1413 {
1414   const cs_matrix_assembler_t  *ma = mav->ma;
1415 
1416   cs_equation_assemble_row_t  *row = eqa->row;
1417 
1418   row->n_cols = m->n_rows;
1419 
1420   /* Switch to the global numbering */
1421   for (int i = 0; i < row->n_cols; i++)
1422     row->col_g_id[i] = rset->g_id[dof_ids[i]];
1423 
1424   /* Push each row of the cellwise matrix into the assembler */
1425   for (int i = 0; i < row->n_cols; i++) {
1426 
1427     row->i = i;                               /* cellwise numbering */
1428     row->g_id = row->col_g_id[i];             /* global numbering */
1429     row->l_id = row->g_id - rset->l_range[0]; /* range set numbering */
1430     row->val = m->val + i*row->n_cols;
1431 
1432     if (row->l_id < 0 || row->l_id >= rset->n_elts[0])
1433       _assemble_row_scal_ds(mav, ma, row);
1434 
1435     else {
1436 
1437       _assemble_row_scal_ld(ma, row);
1438       _add_scal_values_single(row, mav->matrix);
1439 
1440     }
1441 
1442   }
1443 
1444 }
1445 
1446 #endif /* defined(HAVE_MPI) */
1447 
1448 /*----------------------------------------------------------------------------*/
1449 /*!
1450  * \brief  Assemble a cellwise matrix into the global matrix
1451  *         Scalar-valued case. Sequential and with openMP threading.
1452  *
1453  * \param[in]      m        cellwise view of the algebraic system
1454  * \param[in]      dof_ids  local DoF numbering
1455  * \param[in]      rset     pointer to a cs_range_set_t structure
1456  * \param[in, out] eqa      pointer to a matrix assembler buffers
1457  * \param[in, out] mav      pointer to a matrix assembler structure
1458  */
1459 /*----------------------------------------------------------------------------*/
1460 
1461 void
cs_equation_assemble_matrix_seqt(const cs_sdm_t * m,const cs_lnum_t * dof_ids,const cs_range_set_t * rset,cs_equation_assemble_t * eqa,cs_matrix_assembler_values_t * mav)1462 cs_equation_assemble_matrix_seqt(const cs_sdm_t                  *m,
1463                                  const cs_lnum_t                 *dof_ids,
1464                                  const cs_range_set_t            *rset,
1465                                  cs_equation_assemble_t          *eqa,
1466                                  cs_matrix_assembler_values_t    *mav)
1467 {
1468   const cs_matrix_assembler_t  *ma = mav->ma;
1469 
1470   cs_equation_assemble_row_t  *row = eqa->row;
1471 
1472   row->n_cols = m->n_rows;
1473 
1474   /* Switch to the global numbering */
1475   for (int i = 0; i < row->n_cols; i++)
1476     row->col_g_id[i] = rset->g_id[dof_ids[i]];
1477 
1478   /* Push each row of the cellwise matrix into the assembler */
1479   for (int i = 0; i < row->n_cols; i++) {
1480 
1481     row->i = i;                               /* cellwise numbering */
1482     row->g_id = row->col_g_id[i];             /* global numbering */
1483     row->l_id = row->g_id - rset->l_range[0]; /* range set numbering */
1484     row->val = m->val + i*row->n_cols;
1485 
1486     _assemble_row_scal_l(ma, row);
1487 
1488 #if CS_CDO_OMP_SYNC_SECTIONS > 0 /* OpenMP with critical section */
1489     _add_scal_values_critical(row, mav->matrix);
1490 #else
1491     _add_scal_values_atomic(row, mav->matrix);
1492 #endif
1493 
1494   }
1495 
1496 }
1497 
1498 /*----------------------------------------------------------------------------*/
1499 /*!
1500  * \brief  Assemble a cellwise matrix into the global matrix
1501  *         Scalar-valued case. Sequential and without openMP.
1502  *
1503  * \param[in]      m        cellwise view of the algebraic system
1504  * \param[in]      dof_ids  local DoF numbering
1505  * \param[in]      rset     pointer to a cs_range_set_t structure
1506  * \param[in, out] eqa      pointer to a matrix assembler buffers
1507  * \param[in, out] mav      pointer to a matrix assembler structure
1508  */
1509 /*----------------------------------------------------------------------------*/
1510 
1511 void
cs_equation_assemble_matrix_seqs(const cs_sdm_t * m,const cs_lnum_t * dof_ids,const cs_range_set_t * rset,cs_equation_assemble_t * eqa,cs_matrix_assembler_values_t * mav)1512 cs_equation_assemble_matrix_seqs(const cs_sdm_t                  *m,
1513                                  const cs_lnum_t                 *dof_ids,
1514                                  const cs_range_set_t            *rset,
1515                                  cs_equation_assemble_t          *eqa,
1516                                  cs_matrix_assembler_values_t    *mav)
1517 {
1518   const cs_matrix_assembler_t  *ma = mav->ma;
1519 
1520   cs_equation_assemble_row_t  *row = eqa->row;
1521 
1522   row->n_cols = m->n_rows;
1523 
1524   /* Switch to the global numbering */
1525   for (int i = 0; i < row->n_cols; i++)
1526     row->col_g_id[i] = rset->g_id[dof_ids[i]];
1527 
1528   /* Push each row of the cellwise matrix into the assembler */
1529   for (int i = 0; i < row->n_cols; i++) {
1530 
1531     row->i = i;                               /* cellwise numbering */
1532     row->g_id = row->col_g_id[i];             /* global numbering */
1533     row->l_id = row->g_id - rset->l_range[0]; /* range set numbering */
1534     row->val = m->val + i*row->n_cols;
1535 
1536     _assemble_row_scal_l(ma, row);
1537     _add_scal_values_single(row, mav->matrix);
1538 
1539   } /* Loop on rows */
1540 
1541 }
1542 
1543 /*----------------------------------------------------------------------------*/
1544 /*!
1545  * \brief  Assemble a cellwise matrix into the global matrix
1546  *         Case of a block 3x3 entries. Expand each row.
1547  *         Sequential run without openMP threading.
1548  *
1549  * \param[in]      m        cellwise view of the algebraic system
1550  * \param[in]      dof_ids  local DoF numbering
1551  * \param[in]      rset     pointer to a cs_range_set_t structure
1552  * \param[in, out] eqa      pointer to an equation assembly structure
1553  * \param[in, out] mav      pointer to a matrix assembler structure
1554  */
1555 /*----------------------------------------------------------------------------*/
1556 
1557 void
cs_equation_assemble_eblock33_matrix_seqs(const cs_sdm_t * m,const cs_lnum_t * dof_ids,const cs_range_set_t * rset,cs_equation_assemble_t * eqa,cs_matrix_assembler_values_t * mav)1558 cs_equation_assemble_eblock33_matrix_seqs(const cs_sdm_t               *m,
1559                                           const cs_lnum_t              *dof_ids,
1560                                           const cs_range_set_t         *rset,
1561                                           cs_equation_assemble_t       *eqa,
1562                                           cs_matrix_assembler_values_t *mav)
1563 {
1564   const cs_sdm_block_t  *bd = m->block_desc;
1565   const cs_matrix_assembler_t  *ma = mav->ma;
1566 
1567   cs_equation_assemble_row_t  *row = eqa->row;
1568 
1569   /* Sanity checks */
1570   assert(m->flag & CS_SDM_BY_BLOCK);
1571   assert(m->block_desc != NULL);
1572   assert(bd->n_row_blocks == bd->n_col_blocks);
1573   assert(eqa->ddim >= 3);
1574   assert(row->expval != NULL);
1575 
1576   /* Expand the values for a bundle of rows */
1577   cs_real_t  *_vxyz[3] = {row->expval,
1578                           row->expval + m->n_rows,
1579                           row->expval + 2*m->n_rows };
1580 
1581   assert(m->n_rows == m->n_rows);
1582   row->n_cols = m->n_rows;
1583 
1584   /* Switch to the global numbering */
1585   for (int i = 0; i < row->n_cols; i++)
1586     row->col_g_id[i] = rset->g_id[dof_ids[i]];
1587 
1588   for (int bi = 0; bi < bd->n_row_blocks; bi++) {
1589 
1590     /* Expand all the blocks for this row */
1591     for (int bj = 0; bj < bd->n_col_blocks; bj++) {
1592 
1593       /* mIJ matrices are small square matrices of size 3 */
1594       const cs_sdm_t  *const mIJ = cs_sdm_get_block(m, bi, bj);
1595       const cs_real_t  *const mvals = mIJ->val;
1596       for (int k = 0; k < 3; k++) {
1597         _vxyz[0][3*bj+k] = mvals[  k];
1598         _vxyz[1][3*bj+k] = mvals[3+k];
1599         _vxyz[2][3*bj+k] = mvals[6+k];
1600       }
1601 
1602     } /* Loop on column-wise blocks */
1603 
1604     /* dof_ids is an interlaced array (get access to the next 3 values) */
1605     for (int k = 0; k < 3; k++) {
1606       row->i = 3*bi+k;                          /* cellwise numbering */
1607       row->g_id = row->col_g_id[3*bi+k];        /* global numbering */
1608       row->l_id = row->g_id - rset->l_range[0]; /* range set numbering */
1609       row->val = _vxyz[k];
1610 
1611       _assemble_row_scal_l(ma, row);
1612       _add_scal_values_single(row, mav->matrix);
1613 
1614     } /* Push each row of the block */
1615 
1616   } /* Loop on row-wise blocks */
1617 
1618 }
1619 
1620 /*----------------------------------------------------------------------------*/
1621 /*!
1622  * \brief  Assemble a cellwise matrix into the global matrix
1623  *         Case of a block 3x3 entries. Expand each row.
1624  *         Sequential run with openMP threading.
1625  *
1626  * \param[in]      m        cellwise view of the algebraic system
1627  * \param[in]      dof_ids  local DoF numbering
1628  * \param[in]      rset     pointer to a cs_range_set_t structure
1629  * \param[in, out] eqa      pointer to an equation assembly structure
1630  * \param[in, out] mav      pointer to a matrix assembler structure
1631  */
1632 /*----------------------------------------------------------------------------*/
1633 
1634 void
cs_equation_assemble_eblock33_matrix_seqt(const cs_sdm_t * m,const cs_lnum_t * dof_ids,const cs_range_set_t * rset,cs_equation_assemble_t * eqa,cs_matrix_assembler_values_t * mav)1635 cs_equation_assemble_eblock33_matrix_seqt(const cs_sdm_t                *m,
1636                                           const cs_lnum_t               *dof_ids,
1637                                           const cs_range_set_t          *rset,
1638                                           cs_equation_assemble_t        *eqa,
1639                                           cs_matrix_assembler_values_t  *mav)
1640 {
1641   const cs_sdm_block_t  *bd = m->block_desc;
1642   const cs_matrix_assembler_t  *ma = mav->ma;
1643 
1644   cs_equation_assemble_row_t  *row = eqa->row;
1645 
1646   /* Sanity checks */
1647   assert(m->flag & CS_SDM_BY_BLOCK);
1648   assert(m->block_desc != NULL);
1649   assert(bd->n_row_blocks == bd->n_col_blocks);
1650   assert(eqa->ddim >= 3);
1651   assert(row->expval != NULL);
1652 
1653   /* Expand the values for a bundle of rows */
1654   cs_real_t  *_vxyz[3] = {row->expval,
1655                           row->expval + m->n_rows,
1656                           row->expval + 2*m->n_rows };
1657 
1658   assert(m->n_rows == m->n_rows);
1659   row->n_cols = m->n_rows;
1660 
1661   /* Switch to the global numbering */
1662   for (int i = 0; i < row->n_cols; i++)
1663     row->col_g_id[i] = rset->g_id[dof_ids[i]];
1664 
1665   for (int bi = 0; bi < bd->n_row_blocks; bi++) {
1666 
1667     /* Expand all the blocks for this row */
1668     for (int bj = 0; bj < bd->n_col_blocks; bj++) {
1669 
1670       /* mIJ matrices are small square matrices of size 3 */
1671       const cs_sdm_t  *const mIJ = cs_sdm_get_block(m, bi, bj);
1672       const cs_real_t  *const mvals = mIJ->val;
1673       for (int k = 0; k < 3; k++) {
1674         _vxyz[0][3*bj+k] = mvals[  k];
1675         _vxyz[1][3*bj+k] = mvals[3+k];
1676         _vxyz[2][3*bj+k] = mvals[6+k];
1677       }
1678 
1679     } /* Loop on column-wise blocks */
1680 
1681     /* dof_ids is an interlaced array (get access to the next 3 values */
1682     for (int k = 0; k < 3; k++) {
1683       row->i = 3*bi+k;                          /* cellwise numbering */
1684       row->g_id = row->col_g_id[3*bi+k];        /* global numbering */
1685       row->l_id = row->g_id - rset->l_range[0]; /* range set numbering */
1686       row->val = _vxyz[k];
1687 
1688       _assemble_row_scal_l(ma, row);
1689 
1690 #if CS_CDO_OMP_SYNC_SECTIONS > 0 /* OpenMP with critical section */
1691       _add_scal_values_critical(row, mav->matrix);
1692 #else
1693       _add_scal_values_atomic(row, mav->matrix);
1694 #endif
1695 
1696     } /* Push each row of the block */
1697 
1698   } /* Loop on row-wise blocks */
1699 }
1700 
1701 #if defined(HAVE_MPI)
1702 
1703 /*----------------------------------------------------------------------------*/
1704 /*!
1705  * \brief  Assemble a cellwise matrix into the global matrix
1706  *         Case of a block 3x3 entries. Expand each row.
1707  *         Parallel run without openMP threading.
1708  *
1709  * \param[in]      m        cellwise view of the algebraic system
1710  * \param[in]      dof_ids  local DoF numbering
1711  * \param[in]      rset     pointer to a cs_range_set_t structure
1712  * \param[in, out] eqa      pointer to an equation assembly structure
1713  * \param[in, out] mav      pointer to a matrix assembler structure
1714  */
1715 /*----------------------------------------------------------------------------*/
1716 
1717 void
cs_equation_assemble_eblock33_matrix_mpis(const cs_sdm_t * m,const cs_lnum_t * dof_ids,const cs_range_set_t * rset,cs_equation_assemble_t * eqa,cs_matrix_assembler_values_t * mav)1718 cs_equation_assemble_eblock33_matrix_mpis(const cs_sdm_t               *m,
1719                                           const cs_lnum_t              *dof_ids,
1720                                           const cs_range_set_t         *rset,
1721                                           cs_equation_assemble_t       *eqa,
1722                                           cs_matrix_assembler_values_t *mav)
1723 {
1724   const cs_sdm_block_t  *bd = m->block_desc;
1725   const cs_matrix_assembler_t  *ma = mav->ma;
1726 
1727   cs_equation_assemble_row_t  *row = eqa->row;
1728 
1729   assert(m->flag & CS_SDM_BY_BLOCK);
1730   assert(m->block_desc != NULL);
1731   assert(bd->n_row_blocks == bd->n_col_blocks);
1732   assert(eqa->ddim >= 3);
1733   assert(row->expval != NULL);
1734 
1735   /* Expand the values for a bundle of rows */
1736 
1737   cs_real_t  *_vxyz[3] = {row->expval,
1738                           row->expval + m->n_rows,
1739                           row->expval + 2*m->n_rows };
1740 
1741   row->n_cols = m->n_rows;
1742 
1743   /* Switch to the global numbering */
1744 
1745   for (int i = 0; i < row->n_cols; i++)
1746     row->col_g_id[i] = rset->g_id[dof_ids[i]];
1747 
1748   for (int bi = 0; bi < bd->n_row_blocks; bi++) {
1749 
1750     /* Expand all the blocks for this row */
1751 
1752     for (int bj = 0; bj < bd->n_col_blocks; bj++) {
1753 
1754       /* mIJ matrices are small square matrices of size 3 */
1755 
1756       const cs_sdm_t  *const mIJ = cs_sdm_get_block(m, bi, bj);
1757       const cs_real_t  *const mvals = mIJ->val;
1758       for (int k = 0; k < 3; k++) {
1759         _vxyz[0][3*bj+k] = mvals[  k];
1760         _vxyz[1][3*bj+k] = mvals[3+k];
1761         _vxyz[2][3*bj+k] = mvals[6+k];
1762       }
1763 
1764     } /* Loop on column-wise blocks */
1765 
1766     /* dof_ids is an interlaced array (get access to the next 3 values */
1767 
1768     for (int k = 0; k < 3; k++) {
1769 
1770       row->i = 3*bi+k;                          /* cellwise numbering */
1771       row->g_id = row->col_g_id[3*bi+k];        /* global numbering */
1772       row->l_id = row->g_id - rset->l_range[0]; /* range set numbering */
1773       row->val = _vxyz[k];
1774 
1775       if (row->l_id < 0 || row->l_id >= rset->n_elts[0])
1776         _assemble_row_scal_ds(mav, ma, row);
1777 
1778       else {
1779 
1780         _assemble_row_scal_ld(ma, row);
1781         _add_scal_values_single(row, mav->matrix);
1782 
1783       }
1784 
1785     } /* Push each row of the block */
1786 
1787   } /* Loop on row-wise blocks */
1788 }
1789 
1790 /*----------------------------------------------------------------------------*/
1791 /*!
1792  * \brief  Assemble a cellwise matrix into the global matrix
1793  *         Case of a block 3x3 entries. Expand each row.
1794  *         Parallel run with openMP threading.
1795  *
1796  * \param[in]      m        cellwise view of the algebraic system
1797  * \param[in]      dof_ids  local DoF numbering
1798  * \param[in]      rset     pointer to a cs_range_set_t structure
1799  * \param[in, out] eqa      pointer to an equation assembly structure
1800  * \param[in, out] mav      pointer to a matrix assembler structure
1801  */
1802 /*----------------------------------------------------------------------------*/
1803 
1804 void
cs_equation_assemble_eblock33_matrix_mpit(const cs_sdm_t * m,const cs_lnum_t * dof_ids,const cs_range_set_t * rset,cs_equation_assemble_t * eqa,cs_matrix_assembler_values_t * mav)1805 cs_equation_assemble_eblock33_matrix_mpit(const cs_sdm_t               *m,
1806                                           const cs_lnum_t              *dof_ids,
1807                                           const cs_range_set_t         *rset,
1808                                           cs_equation_assemble_t       *eqa,
1809                                           cs_matrix_assembler_values_t *mav)
1810 {
1811   const cs_sdm_block_t  *bd = m->block_desc;
1812   const cs_matrix_assembler_t  *ma = mav->ma;
1813 
1814   cs_equation_assemble_row_t  *row = eqa->row;
1815 
1816   assert(m->flag & CS_SDM_BY_BLOCK);
1817   assert(m->block_desc != NULL);
1818   assert(bd->n_row_blocks == bd->n_col_blocks);
1819   assert(eqa->ddim >= 3);
1820   assert(row->expval != NULL);
1821 
1822   /* Expand the values for a bundle of rows */
1823 
1824   cs_real_t  *_vxyz[3] = {row->expval,
1825                           row->expval + m->n_rows,
1826                           row->expval + 2*m->n_rows};
1827 
1828   row->n_cols = m->n_rows;
1829 
1830   /* Switch to the global numbering */
1831 
1832   for (int i = 0; i < row->n_cols; i++)
1833     row->col_g_id[i] = rset->g_id[dof_ids[i]];
1834 
1835   for (int bi = 0; bi < bd->n_row_blocks; bi++) {
1836 
1837     /* Expand all the blocks for this row */
1838 
1839     for (int bj = 0; bj < bd->n_col_blocks; bj++) {
1840 
1841       /* mIJ matrices are small square matrices of size 3 */
1842 
1843       const cs_sdm_t  *const mIJ = cs_sdm_get_block(m, bi, bj);
1844       const cs_real_t  *const mvals = mIJ->val;
1845       for (int k = 0; k < 3; k++) {
1846         _vxyz[0][3*bj+k] = mvals[  k];
1847         _vxyz[1][3*bj+k] = mvals[3+k];
1848         _vxyz[2][3*bj+k] = mvals[6+k];
1849       }
1850 
1851     } /* Loop on column-wise blocks */
1852 
1853     /* dof_ids is an interlaced array (get access to the next 3 values */
1854 
1855     for (int k = 0; k < 3; k++) {
1856 
1857       row->i = 3*bi+k;                          /* cellwise numbering */
1858       row->g_id = row->col_g_id[3*bi+k];        /* global numbering */
1859       row->l_id = row->g_id - rset->l_range[0]; /* range set numbering */
1860       row->val = _vxyz[k];
1861 
1862       if (row->l_id < 0 || row->l_id >= rset->n_elts[0])
1863         _assemble_row_scal_ds(mav, ma, row);
1864 
1865       else {
1866 
1867         _assemble_row_scal_ld(ma, row);
1868 
1869 #if CS_CDO_OMP_SYNC_SECTIONS > 0 /* OpenMP with critical section */
1870         _add_scal_values_critical(row, mav->matrix);
1871 #else
1872         _add_scal_values_atomic(row, mav->matrix);
1873 #endif
1874 
1875       }
1876 
1877     } /* Push each row of the block */
1878 
1879   } /* Loop on row-wise blocks */
1880 }
1881 
1882 #endif /* defined(HAVE_MPI) */
1883 
1884 /*----------------------------------------------------------------------------*/
1885 /*!
1886  * \brief  Assemble a cellwise matrix into the global matrix
1887  *         Case of a block NxN entries. Expand each row.
1888  *         Sequential run without openMP threading.
1889  *
1890  * \param[in]      m        cellwise view of the algebraic system
1891  * \param[in]      dof_ids  local DoF numbering
1892  * \param[in]      rset     pointer to a cs_range_set_t structure
1893  * \param[in, out] eqa      pointer to an equation assembly structure
1894  * \param[in, out] mav      pointer to a matrix assembler structure
1895  */
1896 /*----------------------------------------------------------------------------*/
1897 
1898 void
cs_equation_assemble_eblock_matrix_seqs(const cs_sdm_t * m,const cs_lnum_t * dof_ids,const cs_range_set_t * rset,cs_equation_assemble_t * eqa,cs_matrix_assembler_values_t * mav)1899 cs_equation_assemble_eblock_matrix_seqs(const cs_sdm_t                *m,
1900                                         const cs_lnum_t               *dof_ids,
1901                                         const cs_range_set_t          *rset,
1902                                         cs_equation_assemble_t        *eqa,
1903                                         cs_matrix_assembler_values_t  *mav)
1904 {
1905   const cs_sdm_block_t  *bd = m->block_desc;
1906   const cs_matrix_assembler_t  *ma = mav->ma;
1907 
1908   cs_equation_assemble_row_t  *row = eqa->row;
1909 
1910   /* Sanity checks */
1911   assert(m->flag & CS_SDM_BY_BLOCK);
1912   assert(m->block_desc != NULL);
1913   assert(bd->n_row_blocks == bd->n_col_blocks);
1914   assert(eqa->ddim < 19);
1915   assert(eqa->edim == eqa->ddim);
1916   assert(row->expval != NULL);
1917 
1918   const int  dim = eqa->ddim;
1919 
1920   /* Expand the values for a bundle of rows */
1921   cs_real_t  *_vpointer[18];
1922 
1923   for (int k = 0; k < dim; k++)
1924     _vpointer[k] = row->expval + k*m->n_rows;
1925 
1926   row->n_cols = m->n_rows;
1927 
1928   /* Switch to the global numbering */
1929   for (int i = 0; i < row->n_cols; i++)
1930     row->col_g_id[i] = rset->g_id[dof_ids[i]];
1931 
1932   for (int bi = 0; bi < bd->n_row_blocks; bi++) {
1933 
1934     /* Expand all the blocks for this row */
1935     for (int bj = 0; bj < bd->n_col_blocks; bj++) {
1936 
1937       /* mIJ matrices are small square matrices of size "dim" */
1938       const cs_sdm_t  *const mIJ = cs_sdm_get_block(m, bi, bj);
1939 
1940       for (int ki = 0; ki < dim; ki++) {
1941         const cs_real_t  *const mvals = mIJ->val + ki*dim;
1942         cs_real_t  *buf = _vpointer[ki] + dim*bj;
1943         for (int kj = 0; kj < dim; kj++)
1944           buf[kj] = mvals[kj];
1945       }
1946 
1947     } /* Loop on column blocks */
1948 
1949     /* dof_ids is an interlaced array (get access to the next "dim" values */
1950     for (int ki = 0; ki < dim; ki++) {
1951       row->i = dim*bi+ki;                       /* cellwise numbering */
1952       row->g_id = row->col_g_id[dim*bi+ki];     /* global numbering */
1953       row->l_id = row->g_id - rset->l_range[0]; /* range set numbering */
1954       row->val = _vpointer[ki];
1955 
1956       _assemble_row_scal_l(ma, row);
1957       _add_scal_values_single(row, mav->matrix);
1958     }
1959 
1960   } /* Loop on row-wise blocks */
1961 
1962 }
1963 
1964 /*----------------------------------------------------------------------------*/
1965 /*!
1966  * \brief  Assemble a cellwise system into the global algebraic system.
1967  *         Case of a block NxN entries. Expand each row.
1968  *         Sequential run with openMP threading.
1969  *
1970  * \param[in]      m        cellwise view of the algebraic system
1971  * \param[in]      dof_ids  local DoF numbering
1972  * \param[in]      rset     pointer to a cs_range_set_t structure
1973  * \param[in, out] eqa      pointer to an equation assembly structure
1974  * \param[in, out] mav      pointer to a matrix assembler structure
1975  */
1976 /*----------------------------------------------------------------------------*/
1977 
1978 void
cs_equation_assemble_eblock_matrix_seqt(const cs_sdm_t * m,const cs_lnum_t * dof_ids,const cs_range_set_t * rset,cs_equation_assemble_t * eqa,cs_matrix_assembler_values_t * mav)1979 cs_equation_assemble_eblock_matrix_seqt(const cs_sdm_t                *m,
1980                                         const cs_lnum_t               *dof_ids,
1981                                         const cs_range_set_t          *rset,
1982                                         cs_equation_assemble_t        *eqa,
1983                                         cs_matrix_assembler_values_t  *mav)
1984 {
1985   const cs_sdm_block_t  *bd = m->block_desc;
1986   const cs_matrix_assembler_t  *ma = mav->ma;
1987 
1988   cs_equation_assemble_row_t  *row = eqa->row;
1989 
1990   /* Sanity checks */
1991   assert(m->flag & CS_SDM_BY_BLOCK);
1992   assert(m->block_desc != NULL);
1993   assert(bd->n_row_blocks == bd->n_col_blocks);
1994   assert(eqa->ddim < 19);
1995   assert(eqa->edim == eqa->ddim);
1996   assert(row->expval != NULL);
1997 
1998   const int  dim = eqa->ddim;
1999 
2000   /* Expand the values for a bundle of rows */
2001   cs_real_t  *_vpointer[18];
2002 
2003   for (int k = 0; k < dim; k++)
2004     _vpointer[k] = row->expval + k*m->n_rows;
2005 
2006   row->n_cols = m->n_rows;
2007 
2008   /* Switch to the global numbering */
2009   for (int i = 0; i < row->n_cols; i++)
2010     row->col_g_id[i] = rset->g_id[dof_ids[i]];
2011 
2012   for (int bi = 0; bi < bd->n_row_blocks; bi++) {
2013 
2014     /* Expand all the blocks for this row */
2015     for (int bj = 0; bj < bd->n_col_blocks; bj++) {
2016 
2017       /* mIJ matrices are small square matrices of size "dim" */
2018       const cs_sdm_t  *const mIJ = cs_sdm_get_block(m, bi, bj);
2019 
2020       for (int ki = 0; ki < dim; ki++) {
2021         const cs_real_t  *const mvals = mIJ->val + ki*dim;
2022         cs_real_t  *buf = _vpointer[ki] + dim*bj;
2023         for (int kj = 0; kj < dim; kj++)
2024           buf[kj] = mvals[kj];
2025       }
2026 
2027     } /* Loop on column blocks */
2028 
2029     /* dof_ids is an interlaced array (get access to the next "dim" values */
2030     for (int ki = 0; ki < dim; ki++) {
2031       row->i = dim*bi+ki;                       /* cellwise numbering */
2032       row->g_id = row->col_g_id[dim*bi+ki];     /* global numbering */
2033       row->l_id = row->g_id - rset->l_range[0]; /* range set numbering */
2034       row->val = _vpointer[ki];
2035 
2036       _assemble_row_scal_l(ma, row);
2037 
2038 #if CS_CDO_OMP_SYNC_SECTIONS > 0 /* OpenMP with critical section */
2039       _add_scal_values_critical(row, mav->matrix);
2040 #else
2041       _add_scal_values_atomic(row, mav->matrix);
2042 #endif
2043     } /* Push each row of the block */
2044 
2045   } /* Loop on row-wise blocks */
2046 
2047 }
2048 
2049 #if defined(HAVE_MPI)
2050 
2051 /*----------------------------------------------------------------------------*/
2052 /*!
2053  * \brief  Assemble a cellwise matrix into the global matrix
2054  *         Case of a block NxN entries. Expand each row.
2055  *         Parallel run without openMP threading.
2056  *
2057  * \param[in]      m        cellwise view of the algebraic system
2058  * \param[in]      dof_ids  local DoF numbering
2059  * \param[in]      rset     pointer to a cs_range_set_t structure
2060  * \param[in, out] eqa      pointer to an equation assembly structure
2061  * \param[in, out] mav      pointer to a matrix assembler structure
2062  */
2063 /*----------------------------------------------------------------------------*/
2064 
2065 void
cs_equation_assemble_eblock_matrix_mpis(const cs_sdm_t * m,const cs_lnum_t * dof_ids,const cs_range_set_t * rset,cs_equation_assemble_t * eqa,cs_matrix_assembler_values_t * mav)2066 cs_equation_assemble_eblock_matrix_mpis(const cs_sdm_t                *m,
2067                                         const cs_lnum_t               *dof_ids,
2068                                         const cs_range_set_t          *rset,
2069                                         cs_equation_assemble_t        *eqa,
2070                                         cs_matrix_assembler_values_t  *mav)
2071 {
2072   const cs_sdm_block_t  *bd = m->block_desc;
2073   const cs_matrix_assembler_t  *ma = mav->ma;
2074 
2075   cs_equation_assemble_row_t  *row = eqa->row;
2076 
2077   /* Sanity checks */
2078   assert(m->flag & CS_SDM_BY_BLOCK);
2079   assert(m->block_desc != NULL);
2080   assert(bd->n_row_blocks == bd->n_col_blocks);
2081   assert(eqa->ddim < 19);
2082   assert(eqa->edim == eqa->ddim);
2083   assert(row->expval != NULL);
2084 
2085   const int  dim = eqa->ddim;
2086 
2087   /* Expand the values for a bundle of rows */
2088   cs_real_t  *_vpointer[18];
2089 
2090   for (int k = 0; k < dim; k++)
2091     _vpointer[k] = row->expval + k*m->n_rows;
2092 
2093   row->n_cols = m->n_rows;
2094 
2095   /* Switch to the global numbering */
2096   for (int i = 0; i < row->n_cols; i++)
2097     row->col_g_id[i] = rset->g_id[dof_ids[i]];
2098 
2099   for (int bi = 0; bi < bd->n_row_blocks; bi++) {
2100 
2101     /* Expand all the blocks for this row */
2102     for (int bj = 0; bj < bd->n_col_blocks; bj++) {
2103 
2104       /* mIJ matrices are small square matrices of size "dim" */
2105       const cs_sdm_t  *const mIJ = cs_sdm_get_block(m, bi, bj);
2106 
2107       for (int ki = 0; ki < dim; ki++) {
2108         const cs_real_t  *const mvals = mIJ->val + ki*dim;
2109         cs_real_t  *buf = _vpointer[ki] + dim*bj;
2110         for (int kj = 0; kj < dim; kj++)
2111           buf[kj] = mvals[kj];
2112       }
2113 
2114     } /* Loop on column blocks */
2115 
2116     /* dof_ids is an interlaced array (get access to the next "dim" values */
2117     for (int ki = 0; ki < dim; ki++) {
2118 
2119       row->i = dim*bi+ki;                       /* cellwise numbering */
2120       row->g_id = row->col_g_id[dim*bi+ki];     /* global numbering */
2121       row->l_id = row->g_id - rset->l_range[0]; /* range set numbering */
2122       row->val = _vpointer[ki];
2123 
2124       if (row->l_id < 0 || row->l_id >= rset->n_elts[0])
2125         _assemble_row_scal_ds(mav, ma, row);
2126 
2127       else {
2128 
2129         _assemble_row_scal_ld(ma, row);
2130         _add_scal_values_single(row, mav->matrix);
2131 
2132       }
2133 
2134     } /* Push each row of the block */
2135 
2136   } /* Loop on row-wise blocks */
2137 
2138 }
2139 
2140 /*----------------------------------------------------------------------------*/
2141 /*!
2142  * \brief  Assemble a cellwise matrix into the global matrix
2143  *         Case of a block NxN entries. Expand each row.
2144  *         Parallel run with openMP threading.
2145  *
2146  * \param[in]      m        cellwise view of the algebraic system
2147  * \param[in]      dof_ids  local DoF numbering
2148  * \param[in]      rset     pointer to a cs_range_set_t structure
2149  * \param[in, out] eqa      pointer to an equation assembly structure
2150  * \param[in, out] mav      pointer to a matrix assembler structure
2151  */
2152 /*----------------------------------------------------------------------------*/
2153 
2154 void
cs_equation_assemble_eblock_matrix_mpit(const cs_sdm_t * m,const cs_lnum_t * dof_ids,const cs_range_set_t * rset,cs_equation_assemble_t * eqa,cs_matrix_assembler_values_t * mav)2155 cs_equation_assemble_eblock_matrix_mpit(const cs_sdm_t                *m,
2156                                         const cs_lnum_t               *dof_ids,
2157                                         const cs_range_set_t          *rset,
2158                                         cs_equation_assemble_t        *eqa,
2159                                         cs_matrix_assembler_values_t  *mav)
2160 {
2161   const cs_sdm_block_t  *bd = m->block_desc;
2162   const cs_matrix_assembler_t  *ma = mav->ma;
2163 
2164   cs_equation_assemble_row_t  *row = eqa->row;
2165 
2166   /* Sanity checks */
2167   assert(m->flag & CS_SDM_BY_BLOCK);
2168   assert(m->block_desc != NULL);
2169   assert(bd->n_row_blocks == bd->n_col_blocks);
2170   assert(eqa->ddim < 19);
2171   assert(eqa->edim == eqa->ddim);
2172   assert(row->expval != NULL);
2173 
2174   const int  dim = eqa->ddim;
2175 
2176   /* Expand the values for a bundle of rows */
2177   cs_real_t  *_vpointer[18];
2178 
2179   for (int k = 0; k < dim; k++)
2180     _vpointer[k] = row->expval + k*m->n_rows;
2181 
2182   row->n_cols = m->n_rows;
2183 
2184   /* Switch to the global numbering */
2185   for (int i = 0; i < row->n_cols; i++)
2186     row->col_g_id[i] = rset->g_id[dof_ids[i]];
2187 
2188   for (int bi = 0; bi < bd->n_row_blocks; bi++) {
2189 
2190     /* Expand all the blocks for this row */
2191     for (int bj = 0; bj < bd->n_col_blocks; bj++) {
2192 
2193       /* mIJ matrices are small square matrices of size "dim" */
2194       const cs_sdm_t  *const mIJ = cs_sdm_get_block(m, bi, bj);
2195 
2196       for (int ki = 0; ki < dim; ki++) {
2197         const cs_real_t  *const mvals = mIJ->val + ki*dim;
2198         cs_real_t  *buf = _vpointer[ki] + dim*bj;
2199         for (int kj = 0; kj < dim; kj++)
2200           buf[kj] = mvals[kj];
2201       }
2202 
2203     } /* Loop on column blocks */
2204 
2205     /* dof_ids is an interlaced array (get access to the next "dim" values */
2206     for (int ki = 0; ki < dim; ki++) {
2207 
2208       row->i = dim*bi+ki;                       /* cellwise numbering */
2209       row->g_id = row->col_g_id[dim*bi+ki];     /* global numbering */
2210       row->l_id = row->g_id - rset->l_range[0]; /* range set numbering */
2211       row->val = _vpointer[ki];
2212 
2213       if (row->l_id < 0 || row->l_id >= rset->n_elts[0])
2214         _assemble_row_scal_ds(mav, ma, row);
2215 
2216       else {
2217 
2218         _assemble_row_scal_ld(ma, row);
2219 
2220 #if CS_CDO_OMP_SYNC_SECTIONS > 0 /* OpenMP with critical section */
2221         _add_scal_values_critical(row, mav->matrix);
2222 #else
2223         _add_scal_values_atomic(row, mav->matrix);
2224 #endif
2225 
2226       }
2227 
2228     } /* Push each row of the block */
2229 
2230   } /* Loop on row-wise blocks */
2231 
2232 }
2233 
2234 #endif /* defined(HAVE_MPI) */
2235 
2236 /*----------------------------------------------------------------------------*/
2237 
2238 END_C_DECLS
2239