1 /*============================================================================
2  * Mathematical base functions.
3  *============================================================================*/
4 
5 /*
6   This file is part of Code_Saturne, a general-purpose CFD tool.
7 
8   Copyright (C) 1998-2021 EDF S.A.
9 
10   This program is free software; you can redistribute it and/or modify it under
11   the terms of the GNU General Public License as published by the Free Software
12   Foundation; either version 2 of the License, or (at your option) any later
13   version.
14 
15   This program is distributed in the hope that it will be useful, but WITHOUT
16   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
18   details.
19 
20   You should have received a copy of the GNU General Public License along with
21   this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22   Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 */
24 
25 /*----------------------------------------------------------------------------*/
26 
27 #include "cs_defs.h"
28 
29 /*----------------------------------------------------------------------------
30  * Standard C library headers
31  *----------------------------------------------------------------------------*/
32 
33 #include <assert.h>
34 #include <errno.h>
35 #include <stdio.h>
36 #include <stdarg.h>
37 #include <string.h>
38 #include <float.h>
39 
40 #if defined(HAVE_MPI)
41 #include <mpi.h>
42 #endif
43 
44 /*----------------------------------------------------------------------------
45  *  Local headers
46  *----------------------------------------------------------------------------*/
47 
48 #include "bft_error.h"
49 #include "bft_mem.h"
50 
51 /*----------------------------------------------------------------------------
52  *  Header for the current file
53  *----------------------------------------------------------------------------*/
54 
55 #include "cs_math.h"
56 
57 /*----------------------------------------------------------------------------*/
58 
59 BEGIN_C_DECLS
60 
61 /*=============================================================================
62  * Additional Doxygen documentation
63  *============================================================================*/
64 
65 /*! \file  cs_math.c
66            Mathematical base functions.
67 */
68 
69 /*! \cond DOXYGEN_SHOULD_SKIP_THIS */
70 
71 /*=============================================================================
72  * Local type definitions
73  *============================================================================*/
74 
75 /*============================================================================
76  * Global variables
77  *============================================================================*/
78 
79 static cs_real_t  _machine_epsilon = 1.11e-16;
80 
81 /* Numerical constants */
82 
83 const cs_real_t cs_math_zero_threshold = FLT_MIN;
84 const cs_real_t cs_math_1ov3 = 1./3.;
85 const cs_real_t cs_math_2ov3 = 2./3.;
86 const cs_real_t cs_math_4ov3 = 4./3.;
87 const cs_real_t cs_math_5ov3 = 5./3.;
88 const cs_real_t cs_math_1ov6 = 1./6.;
89 const cs_real_t cs_math_1ov12 = 1./12.;
90 const cs_real_t cs_math_1ov24 = 1./24.;
91 
92 /*! epsilon \f$ 10^{-12}\f$ */
93 const cs_real_t cs_math_epzero = 1e-12;
94 
95 /*! infinite \f$ 10^{+30}\f$ */
96 const cs_real_t cs_math_infinite_r = 1.e30;
97 
98 /*! big value \f$ 10^{+12}\f$ */
99 const cs_real_t cs_math_big_r = 1.e12;
100 
101 /*! \f$ \pi \f$ value with 20 digits */
102 const cs_real_t cs_math_pi = 3.14159265358979323846;
103 
104 /*============================================================================
105  * Private function definitions
106  *============================================================================*/
107 
108 /*----------------------------------------------------------------------------*/
109 /*!
110  * \brief Compute a rotation of the elements in a 3x3 real values matrix
111  *
112  * \param[in, out]  m   matrix of 3x3 real values
113  * \param[in]       i   1st index
114  * \param[in]       j   2nd index
115  * \param[in]       k   3rd index
116  * \param[in]       l   4th index
117  * \param[in]       s   rate
118  * \param[in]       t   rate
119  */
120 /*----------------------------------------------------------------------------*/
121 
122 static inline void
_rotate_ind_33(cs_real_t m[3][3],cs_lnum_t i,cs_lnum_t j,cs_lnum_t k,cs_lnum_t l,cs_real_t s,cs_real_t t)123 _rotate_ind_33(cs_real_t  m[3][3],
124                cs_lnum_t  i,
125                cs_lnum_t  j,
126                cs_lnum_t  k,
127                cs_lnum_t  l,
128                cs_real_t  s,
129                cs_real_t  t)
130 {
131   /* Save values of m[i][j] and m[k][l] */
132   cs_real_t m_ij = m[i][j];
133   cs_real_t m_kl = m[k][l];
134 
135   /* Modify the values of (i,j) and (k,l) */
136   m[i][j] = m_ij - s*(m_kl + m_ij*t);
137   m[k][l] = m_kl + s*(m_ij - m_kl*t);
138 }
139 
140 /*! (DOXYGEN_SHOULD_SKIP_THIS) \endcond */
141 
142 /*============================================================================
143  * Prototypes for functions intended for use only by Fortran wrappers.
144  * (descriptions follow, with function bodies).
145  *============================================================================*/
146 
147 void
148 cs_f_math_sym_33_inv_cramer(const cs_real_t s[6],
149                             cs_real_t       sout[6]);
150 
151 void
152 cs_f_math_sym_33_product(const cs_real_t  s1[6],
153                          const cs_real_t  s2[6],
154                          cs_real_t        sout[6]);
155 
156 void
157 cs_f_math_reduce_sym_prod_33_to_66(const cs_real_t  s[3][3],
158                                    cs_real_t        sout[6][6]);
159 
160 void
161 cs_f_math_3_normalize(const cs_real_t vin[3],
162                       cs_real_t       vout[3]);
163 
164 /*============================================================================
165  * Fortran wrapper function definitions
166  *============================================================================*/
167 
168 /*----------------------------------------------------------------------------
169  * Wrapper to cs_math_sym_33_inv_cramer
170  *----------------------------------------------------------------------------*/
171 
172 void
cs_f_math_sym_33_inv_cramer(const cs_real_t s[6],cs_real_t sout[6])173 cs_f_math_sym_33_inv_cramer(const cs_real_t s[6],
174                             cs_real_t       sout[6])
175 {
176   cs_math_sym_33_inv_cramer(s, sout);
177 }
178 
179 /*----------------------------------------------------------------------------
180  * Wrapper to cs_math_sym_33_product
181  *----------------------------------------------------------------------------*/
182 
183 void
cs_f_math_sym_33_product(const cs_real_t s1[6],const cs_real_t s2[6],cs_real_t sout[6])184 cs_f_math_sym_33_product(const cs_real_t  s1[6],
185                          const cs_real_t  s2[6],
186                          cs_real_t        sout[6])
187 {
188   cs_math_sym_33_product(s1, s2, sout);
189 }
190 
191 /*----------------------------------------------------------------------------
192  * Wrapper to cs_math_reduce_sym_prod_33_to_66
193  *----------------------------------------------------------------------------*/
194 
195 void
cs_f_math_reduce_sym_prod_33_to_66(const cs_real_t s[3][3],cs_real_t sout[6][6])196 cs_f_math_reduce_sym_prod_33_to_66(const cs_real_t  s[3][3],
197                                    cs_real_t        sout[6][6])
198 {
199   cs_math_reduce_sym_prod_33_to_66(s, sout);
200 }
201 
202 /*----------------------------------------------------------------------------
203  * Wrapper
204  *----------------------------------------------------------------------------*/
205 
206 void
cs_f_math_3_normalize(const cs_real_t vin[3],cs_real_t vout[3])207 cs_f_math_3_normalize(const cs_real_t vin[3],
208                       cs_real_t       vout[3])
209 {
210   cs_math_3_normalize(vin, vout);
211 }
212 
213 /*============================================================================
214  * Public function definitions
215  *============================================================================*/
216 
217 /*----------------------------------------------------------------------------*/
218 /*!
219  * \brief  Compute the value related to the machine precision
220  */
221 /*----------------------------------------------------------------------------*/
222 
223 void
cs_math_set_machine_epsilon(void)224 cs_math_set_machine_epsilon(void)
225 {
226   double  eps = 5e-16;
227   double  y = 1.0 + eps;
228 
229   while (y > 1.0) {
230     eps /= 2.0;
231     y = 1.0 + eps;
232   }
233   eps *= 2.0;
234 
235   _machine_epsilon = eps;
236 }
237 
238 /*----------------------------------------------------------------------------*/
239 /*!
240  * \brief  Get the value related to the machine precision
241  */
242 /*----------------------------------------------------------------------------*/
243 
244 double
cs_math_get_machine_epsilon(void)245 cs_math_get_machine_epsilon(void)
246 {
247   return _machine_epsilon;
248 }
249 
250 /*----------------------------------------------------------------------------*/
251 /*!
252  * \brief Compute all eigenvalues of a 3x3 symmetric matrix
253  *        with symmetric storage.
254  *
255  * Based on: Oliver K. Smith "eigenvalues of a symmetric 3x3 matrix",
256  *           Communication of the ACM (April 1961)
257  *           (Wikipedia article entitled "Eigenvalue algorithm")
258  *
259  * \param[in]  m          3x3 symmetric matrix (m11, m22, m33, m12, m23, m13)
260  * \param[out] eig_vals   size 3 vector
261  */
262 /*----------------------------------------------------------------------------*/
263 
264 void
cs_math_sym_33_eigen(const cs_real_t m[6],cs_real_t eig_vals[3])265 cs_math_sym_33_eigen(const cs_real_t  m[6],
266                      cs_real_t        eig_vals[3])
267 {
268   cs_real_t  e, e1, e2, e3;
269 
270   cs_real_t  p1 = cs_math_3_square_norm((const cs_real_t *)(m+3));
271   cs_real_t  d2 = cs_math_3_square_norm((const cs_real_t *)m);
272 
273   if (p1 > cs_math_epzero*d2) { /* m is not diagonal */
274 
275     cs_real_6_t  n;
276     cs_real_t  tr = (m[0] + m[1] + m[2]);
277     cs_real_t  tr_third = cs_math_1ov3 * tr;
278 
279     e1 = m[0] - tr_third, e2 = m[1] - tr_third, e3 = m[2] - tr_third;
280     cs_real_t  p2 = e1*e1 + e2*e2 + e3*e3 + 2.*p1;
281 
282     cs_real_t  p = sqrt(p2*cs_math_1ov6);
283     assert(p > 0.);
284     cs_real_t  ovp = 1./p;
285 
286     for (int  i = 0; i < 3; i++) {
287       /* Diagonal */
288       n[i] = ovp * (m[i] - tr_third);
289       /* Extra diagonal */
290       n[3 + i] = ovp * m[3 + i];
291     }
292 
293     /* r should be between -1 and 1 but truncation error and bad conditioning
294        can lead to slightly under/over-shoot */
295     cs_real_t  r = 0.5 * cs_math_sym_33_determinant(n);
296 
297     cs_real_t  cos_theta, cos_theta_2pi3;
298     if (r <= -1.) {
299       cos_theta = 0.5; /* theta = pi/3; */
300       cos_theta_2pi3 = -1.;
301     }
302     else if (r >= 1.) {
303       cos_theta = 1.; /* theta = 0.; */
304       cos_theta_2pi3 = -0.5;
305     }
306     else {
307       cos_theta = cos(cs_math_1ov3*acos(r));
308       cos_theta_2pi3 = cos(cs_math_1ov3*(acos(r) + 2.*cs_math_pi));
309     }
310 
311     /* eigenvalues computed should satisfy e1 < e2 < e3 */
312     e3 = tr_third + 2.*p*cos_theta;
313     e1 = tr_third + 2.*p*cos_theta_2pi3;
314     e2 = tr - e1 -e3; // since tr(m) = e1 + e2 + e3
315 
316   }
317   else { /* m is diagonal */
318 
319     e1 = m[0], e2 = m[1], e3 = m[2];
320 
321   } /* diagonal or not */
322 
323   if (e3 < e2) e = e3, e3 = e2, e2 = e;
324   if (e3 < e1) e = e3, e3 = e1, e1 = e2, e2 = e;
325   else {
326     if (e2 < e1) e = e2, e2 = e1, e1 = e;
327   }
328 
329   /* Return values */
330   eig_vals[0] = e1;
331   eig_vals[1] = e2;
332   eig_vals[2] = e3;
333 }
334 
335 /*----------------------------------------------------------------------------*/
336 /*!
337  * \brief  Compute max/min eigenvalues ratio and max. eigenvalue of a 3x3
338  *         symmetric matrix with non-symmetric storage.
339  *
340  * Based on: Oliver K. Smith "eigenvalues of a symmetric 3x3 matrix",
341  *           Communication of the ACM (April 1961)
342  *           (Wikipedia article entitled "Eigenvalue algorithm")
343  *
344  * \param[in]  m          3x3 matrix
345  * \param[out] eig_ratio  max/min
346  * \param[out] eig_max    max. eigenvalue
347  */
348 /*----------------------------------------------------------------------------*/
349 
350 void
cs_math_33_eigen(const cs_real_t m[3][3],cs_real_t * eig_ratio,cs_real_t * eig_max)351 cs_math_33_eigen(const cs_real_t     m[3][3],
352                  cs_real_t          *eig_ratio,
353                  cs_real_t          *eig_max)
354 {
355   cs_real_t  e, e1, e2, e3;
356 
357 #if defined(DEBUG) && !defined(NDEBUG) /* Sanity check */
358   e1 = m[0][1]-m[1][0], e2 = m[0][2]-m[2][0], e3 = m[1][2]-m[2][1];
359   if (e1*e1 + e2*e2 + e3*e3 > 0.0)
360     bft_error(__FILE__, __LINE__, 0,
361               " The given 3x3 matrix is not symmetric.\n"
362               " Stop computing eigenvalues of a 3x3 matrix since the"
363               " algorithm is only dedicated to symmetric matrix.");
364 #endif
365 
366   cs_real_t  p1 = m[0][1]*m[0][1] + m[0][2]*m[0][2] + m[1][2]*m[1][2];
367 
368   if (p1 > 0.0) { /* m is not diagonal */
369 
370     cs_real_t  theta;
371     cs_real_t  n[3][3];
372     cs_real_t  tr = cs_math_1ov3*(m[0][0] + m[1][1] + m[2][2]);
373 
374     e1 = m[0][0] - tr, e2 = m[1][1] - tr, e3 = m[2][2] - tr;
375     cs_real_t  p2 = e1*e1 + e2*e2 + e3*e3 + 2*p1;
376 
377     assert(p2 > 0);
378     cs_real_t  p = sqrt(p2*cs_math_1ov6);
379     cs_real_t  ovp = 1./p;
380 
381     for (int  i = 0; i < 3; i++) {
382       n[i][i] = ovp * (m[i][i] - tr);
383       for (int j = i + 1; j < 3; j++) {
384         n[i][j] = ovp*m[i][j];
385         n[j][i] = n[i][j];
386       }
387     }
388 
389     /* r should be between -1 and 1 but truncation error and bad conditioning
390        can lead to slightly under/over-shoot */
391     cs_real_t  r = 0.5 * cs_math_33_determinant((const cs_real_t (*)[3])n);
392 
393     if (r <= -1)
394       theta = cs_math_1ov3*cs_math_pi;
395     else if (r >= 1)
396       theta = 0.;
397     else
398       theta = cs_math_1ov3*acos(r);
399 
400     /* eigenvalues computed should satisfy e1 < e2 < e3 */
401     e3 = tr + 2*p*cos(theta);
402     e1 = tr + 2*p*cos(theta + 2*cs_math_pi*cs_math_1ov3);
403     e2 = 3*tr - e1 -e3; // since tr(m) = e1 + e2 + e3
404 
405   }
406   else { /* m is diagonal */
407 
408     e1 = m[0][0], e2 = m[1][1], e3 = m[2][2];
409     if (e3 < e2) e = e3, e3 = e2, e2 = e;
410     if (e3 < e1) e = e3, e3 = e1, e1 = e2, e2 = e;
411     else {
412       if (e2 < e1) e = e2, e2 = e1, e1 = e;
413     }
414 
415   } /* diagonal or not */
416 
417   /* Return values */
418   if (fabs(e1) > 0)
419     *eig_ratio = e3/e1;
420   else
421     *eig_ratio = 1;
422   *eig_max = e3;
423 }
424 
425 /*----------------------------------------------------------------------------*/
426 /*!
427  * \brief  Compute the length (Euclidean norm) between two points xa and xb in
428  *         a Cartesian coordinate system of dimension 3
429  *
430  * \param[in]   xa       coordinate of the first extremity
431  * \param[in]   xb       coordinate of the second extremity
432  * \param[out]  len      pointer to the length of the vector va -> vb
433  * \param[out]  unitv    unitary vector along va -> vb
434  */
435 /*----------------------------------------------------------------------------*/
436 
437 inline void
cs_math_3_length_unitv(const cs_real_t xa[3],const cs_real_t xb[3],cs_real_t * len,cs_real_3_t unitv)438 cs_math_3_length_unitv(const cs_real_t    xa[3],
439                        const cs_real_t    xb[3],
440                        cs_real_t         *len,
441                        cs_real_3_t        unitv)
442 {
443   cs_real_t  invl;
444   cs_real_3_t  diff;
445 
446   diff[0] = xb[0] - xa[0];
447   diff[1] = xb[1] - xa[1];
448   diff[2] = xb[2] - xa[2];
449 
450   *len = cs_math_3_norm(diff), invl = 1/(*len);
451 
452   unitv[0] = invl*diff[0];
453   unitv[1] = invl*diff[1];
454   unitv[2] = invl*diff[2];
455 }
456 
457 /*----------------------------------------------------------------------------*/
458 /*!
459  * \brief  Compute the area of the convex_hull generated by 3 points.
460  *         This corresponds to the computation of the surface of a triangle
461  *
462  * \param[in]  xv  coordinates of the first vertex
463  * \param[in]  xe  coordinates of the second vertex
464  * \param[in]  xf  coordinates of the third vertex
465  *
466  * \return the surface of a triangle
467  */
468 /*----------------------------------------------------------------------------*/
469 
470 inline double
cs_math_surftri(const cs_real_t xv[3],const cs_real_t xe[3],const cs_real_t xf[3])471 cs_math_surftri(const cs_real_t  xv[3],
472                 const cs_real_t  xe[3],
473                 const cs_real_t  xf[3])
474 {
475   cs_real_3_t  u, v, cp;
476 
477   for (int k = 0; k < 3; k++) {
478     u[k] = xe[k] - xv[k];
479     v[k] = xf[k] - xv[k];
480   }
481   cs_math_3_cross_product(u, v, cp);
482 
483   return  0.5 * cs_math_3_norm(cp);
484 }
485 
486 /*----------------------------------------------------------------------------*/
487 /*!
488  * \brief  Compute the volume of the convex_hull generated by 4 points.
489  *         This is equivalent to the computation of the volume of a tetrahedron
490  *
491  * \param[in]  xv  coordinates of the first vertex
492  * \param[in]  xe  coordinates of the second vertex
493  * \param[in]  xf  coordinates of the third vertex
494  * \param[in]  xc  coordinates of the fourth vertex
495  *
496  * \return the volume of the tetrahedron.
497  */
498 /*----------------------------------------------------------------------------*/
499 
500 double
cs_math_voltet(const cs_real_t xv[3],const cs_real_t xe[3],const cs_real_t xf[3],const cs_real_t xc[3])501 cs_math_voltet(const cs_real_t   xv[3],
502                const cs_real_t   xe[3],
503                const cs_real_t   xf[3],
504                const cs_real_t   xc[3])
505 {
506   double  lev, lef, lec;
507   cs_real_3_t  uev, uef, uec, ucp;
508 
509   cs_math_3_length_unitv(xe, xv, &lev, uev);
510   cs_math_3_length_unitv(xe, xf, &lef, uef);
511   cs_math_3_length_unitv(xe, xc, &lec, uec);
512   cs_math_3_cross_product(uev, uef, ucp);
513 
514   return  cs_math_1ov6 *lev*lef*lec* fabs(cs_math_3_dot_product(ucp, uec));
515 }
516 
517 /*----------------------------------------------------------------------------*/
518 /*!
519  * \brief  Evaluate eigenvalues and eigenvectors
520  *         of a real symmetric matrix m1[3,3]: m1*m2 = lambda*m2
521  *
522  * Use of Jacobi method for symmetric matrices
523  * (adapted from the book Numerical Recipes in C, Chapter 11.1)
524  *
525  * \param[in]     m_in         matrix of 3x3 real values (initial)
526  * \param[in]     tol_err      absolute tolerance (sum of off-diagonal elements)
527  * \param[out]    eig_val      vector of 3 real values (eigenvalues)
528  * \param[out]    eig_vec      matrix of 3x3 real values (eigenvectors)
529  */
530 /*----------------------------------------------------------------------------*/
531 
532 void
cs_math_33_eig_val_vec(const cs_real_t m_in[3][3],const cs_real_t tol_err,cs_real_t eig_val[restrict3],cs_real_t eig_vec[restrict3][3])533 cs_math_33_eig_val_vec(const cs_real_t  m_in[3][3],
534                        const cs_real_t  tol_err,
535                        cs_real_t        eig_val[restrict 3],
536                        cs_real_t        eig_vec[restrict 3][3])
537 {
538   /* Declaration of local variables
539    * vec1, vec2:  vectors of 3 real values (copies of diagonal values)
540    * m:           matrix of 3x3 real values (copy of m_in)
541    * epsilon:     error (like machine epsilon for floats) */
542   cs_real_t vec1[3], vec2[3];
543   cs_real_t m[3][3] = {{m_in[0][0], m_in[0][1], m_in[0][2]},
544                        {m_in[1][0], m_in[1][1], m_in[1][2]},
545                        {m_in[2][0], m_in[2][1], m_in[2][2]}};
546   cs_real_t epsilon = 1.0e-16;
547 
548   for (int id = 0; id < 3; id++) {
549     eig_val[id] = m_in[id][id];
550     vec1[id] = eig_val[id];
551     vec2[id] = 0.0;
552   }
553 
554   /* The strategy here is to adopt a cyclic Jacobi method
555    * where the diagonalisation is carried out by several sweeps
556    * (each sweep to increase the precision of the result)
557    * Here, we perform up to 50 sweeps (the algorithm stops when
558    * reaching the given tolerance) */
559 
560   // Error = sum off-diagonal elements
561   cs_real_t error
562     = cs_math_fabs(m[0][1]) + cs_math_fabs(m[0][2]) + cs_math_fabs(m[1][2]);
563 
564   // Iterative solution
565   for (int i_sweep = 0; (i_sweep < 50) && (error > tol_err); i_sweep++) {
566 
567     // Start loop on off-diagonal elements
568     for (int id1 = 0; id1 < 2; id1 ++) {
569       for (int id2 = id1+1; id2 < 3; id2 ++) {
570         // After 4 sweeps, skip rotation if off-diagonal element is small
571         if (cs_math_fabs(m[id1][id2]) < epsilon) {
572           if (i_sweep > 4)
573             m[id1][id2] = 0.0;
574         }
575         // Otherwise, ...
576         else { // if (abs(m[id1][id2]) >= epsilon)
577           cs_real_t val1, val2, val3, val4, val5;    // Declare five variables val.
578           // Get val1
579           cs_real_t diff = eig_val[id2] - eig_val[id1];
580           if (cs_math_fabs(m[id1][id2]) < epsilon)
581             val1 = m[id1][id2] / diff;
582           else {
583             cs_real_t theta = 0.5 * diff / m[id1][id2];
584             val1 = 1.0 / (cs_math_fabs(theta)+sqrt(1.0+cs_math_pow2(theta)));
585             if ( theta < 0 )
586               val1 = -val1;
587           }
588           // Get val2, val3 and val4
589           val3 = 1.0 / sqrt(1.0+cs_math_pow2(val1));
590           val2 = val1*val3;
591           val4 = val2 / (1.0 + val3);
592           val5 = val1 * m[id1][id2];
593           // Accumulate correction to diagonal elements
594           vec2[id1] -= val5;
595           vec2[id2] += val5;
596           eig_val[id1] -= val5;
597           eig_val[id2] += val5;
598 
599           m[id1][id2] = 0.0;
600           // Rotate
601           for (int id3 = 0; id3 <= id1-1; id3++) // Rotations 0 <= j < p
602             _rotate_ind_33(m, id3, id1, id3, id2, val2, val4);
603           for (int id3 = id1+1; id3 <= id2-1; id3++) // Rotations p < j < q
604             _rotate_ind_33(m, id1, id3, id3, id2, val2, val4);
605           for (int id3 = id2+1; id3 < 3; id3++) // Rotations q < j <= n
606             _rotate_ind_33(m, id1, id3, id2, id3, val2, val4);
607           for (int id3 = 0; id3 < 3; id3++)
608             _rotate_ind_33(eig_vec, id3, id1, id3, id2, val2, val4);
609         }
610       }
611     }
612 
613     // Update d and reinitialize z
614     for (int id = 0; id < 3; id++ ) {
615       vec1[id] += vec2[id];
616       eig_val[id] = vec1[id];
617       vec2[id] = 0.0;
618     }
619 
620     // Update the error
621     error = cs_math_fabs(m[0][1]) + cs_math_fabs(m[0][2]) + cs_math_fabs(m[1][2]);
622   }
623 
624   /* Sort eigenvalues and eigenvectors with ascending order */
625   for (int id1 = 0; id1 < 2; id1++) {
626     cs_lnum_t ind_min = id1;
627     for (int id2 = id1+1; id2 < 3; id2++) {
628       if ( eig_val[id2] < eig_val[id1] )
629         ind_min = id2;
630     }
631     if ( ind_min != id1 ) {
632       cs_real_t temp = eig_val[ind_min];
633       eig_val[ind_min] = eig_val[id1];
634       eig_val[id1] = temp;
635       for (int id2 = 0; id2 < 3; id2++) {
636         temp = eig_vec[id2][ind_min];
637         eig_vec[id2][ind_min] = eig_vec[id2][id1];
638         eig_vec[id2][id1] = temp;
639       }
640     }
641   }
642 }
643 
644 /*----------------------------------------------------------------------------*/
645 /*!
646  * \brief Compute LU factorization of an array of dense matrices
647  *        of identical size.
648  *
649  * \param[in]   n_blocks  number of blocks
650  * \param[in]   b_size    block size
651  * \param[in]   a         matrix blocks
652  * \param[out]  a_lu      LU factorizations of matrix blocks
653  */
654 /*----------------------------------------------------------------------------*/
655 
656 void
cs_math_fact_lu(cs_lnum_t n_blocks,int b_size,const cs_real_t * a,cs_real_t * a_lu)657 cs_math_fact_lu(cs_lnum_t         n_blocks,
658                 int               b_size,
659                 const cs_real_t  *a,
660                 cs_real_t        *a_lu)
661 {
662 # pragma omp parallel for if(n_blocks > CS_THR_MIN)
663   for (cs_lnum_t i = 0; i < n_blocks; i++) {
664 
665     cs_real_t *restrict _a_lu = &a_lu[b_size*b_size*i];
666     const cs_real_t *restrict  _a = &a[b_size*b_size*i];
667 
668     _a_lu[0] = _a[0];
669     for (cs_lnum_t ii = 1; ii < b_size; ii++) {
670       _a_lu[ii] = _a[ii];
671       _a_lu[ii*b_size] = _a[ii*b_size]/_a[0];
672     }
673     for (cs_lnum_t ii = 1; ii < b_size - 1; ii++) {
674       _a_lu[ii + ii*b_size] = _a[ii + ii*b_size];
675       for (cs_lnum_t kk = 0; kk < ii; kk++) {
676         _a_lu[ii + ii*b_size] -= _a_lu[ii*b_size + kk]
677                                 *_a_lu[kk*b_size + ii];
678       }
679 
680       for (cs_lnum_t jj = ii + 1; jj < b_size; jj++) {
681         _a_lu[ii*b_size + jj] = _a[ii*b_size + jj];
682         _a_lu[jj*b_size + ii] =   _a[jj*b_size + ii]
683                                 / _a_lu[ii*b_size + ii];
684         for (cs_lnum_t kk = 0; kk < ii; kk++) {
685           _a_lu[ii*b_size + jj] -=  _a_lu[ii*b_size + kk]
686                                    *_a_lu[kk*b_size + jj];
687           _a_lu[jj*b_size + ii] -=  _a_lu[jj*b_size + kk]
688                                    *_a_lu[kk*b_size + ii]
689                                    /_a_lu[ii*b_size + ii];
690         }
691       }
692     }
693     _a_lu[b_size*b_size -1] = _a[b_size*b_size - 1];
694 
695     for (cs_lnum_t kk = 0; kk < b_size - 1; kk++) {
696       _a_lu[b_size*b_size - 1] -=  _a_lu[(b_size-1)*b_size + kk]
697                                   *_a_lu[kk*b_size + b_size -1];
698     }
699   }
700 }
701 
702 /*----------------------------------------------------------------------------*/
703 /*!
704  * \brief  Block Jacobi utilities.
705  *         Compute forward and backward to solve an LU P*P system.
706  *
707  * \param[in]   a_lu   matrix LU factorization
708  * \param[in]   n      matrix size
709  * \param[out]  x      solution
710  * \param[out]  b      right hand side
711  */
712 /*----------------------------------------------------------------------------*/
713 
714 void
cs_math_fw_and_bw_lu(const cs_real_t a_lu[],int n,cs_real_t x[],const cs_real_t b[])715 cs_math_fw_and_bw_lu(const cs_real_t  a_lu[],
716                      int              n,
717                      cs_real_t        x[],
718                      const cs_real_t  b[])
719 {
720   cs_real_t  _aux[256];
721   cs_real_t  *aux = _aux;
722 
723   if (n > 256)
724     BFT_MALLOC(aux, n, cs_real_t);
725 
726   /* forward */
727   for (int ii = 0; ii < n; ii++) {
728     aux[ii] = b[ii];
729     for (int jj = 0; jj < ii; jj++) {
730       aux[ii] -= aux[jj]*a_lu[ii*n + jj];
731     }
732   }
733 
734   /* backward */
735   for (int ii = n - 1; ii >= 0; ii-=1) {
736     x[ii] = aux[ii];
737     for (int jj = n - 1; jj > ii; jj-=1) {
738       x[ii] -= x[jj]*a_lu[ii*n + jj];
739     }
740     x[ii] /= a_lu[ii*(n + 1)];
741   }
742 
743   if (n > 256)
744     BFT_FREE(aux);
745 }
746 
747 /*----------------------------------------------------------------------------*/
748 
749 END_C_DECLS
750