1 /* Siconos is a program dedicated to modeling, simulation and control
2  * of non smooth dynamical systems.
3  *
4  * Copyright 2021 INRIA.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17 */
18 
19 /*!\file op3x3.h
20  * \brief linear algebra operations in 3D*/
21 
22 #ifndef _op3x3_h_
23 #define _op3x3_h_
24 
25 #include <math.h>
26 #include <float.h>
27 #include <stdlib.h>
28 
29 #ifndef M_PI
30 #define M_PI        3.14159265358979323846264338327950288   /* pi             */
31 #define M_PI_2      1.57079632679489661923132169163975144   /* pi/2           */
32 #endif
33 
34 #ifndef MAYBE_UNUSED
35 #ifdef __GNUC__
36 #define MAYBE_UNUSED __attribute__((unused))
37 #else
38 #define MAYBE_UNUSED
39 #endif
40 #endif
41 
42 #ifndef WARN_RESULT_IGNORED
43 #ifdef __GNUC__
44 #define WARN_RESULT_IGNORED __attribute__ ((warn_unused_result))
45 #else
46 #define WARN_RESULT_IGNORED
47 #endif
48 #endif
49 
50 #ifdef __cplusplus
51 #undef restrict
52 #include <sys/cdefs.h>  // for __restrict
53 #define restrict __restrict
54 #endif
55 
56 #define SOLVE_3X3_GEPP(MAT, X) \
57 { \
58   int res = solve_3x3_gepp(mat, X); \
59   if (res) { printf("%s :: Call solve_3x3_gepp(" #MAT ", " #X " failed!. "\
60                     "No pivot could be selected for column %d\n", __func__, res); } }
61 
62 
63 /** OP3X3(EXPR) do EXPR 9 times
64  * \param EXPR a C expression that should contains self incrementing
65  *        pointers on arrays[9] */
66 #define OP3X3(EXPR)                             \
67   do                                            \
68   {                                             \
69     EXPR;                                       \
70     EXPR;                                       \
71     EXPR;                                       \
72     EXPR;                                       \
73     EXPR;                                       \
74     EXPR;                                       \
75     EXPR;                                       \
76     EXPR;                                       \
77     EXPR;                                       \
78   } while(0)                                    \
79 
80 /** OP3(EXPR) do EXPR 3 times
81  * \param EXPR a C expression that should contains self incrementing
82  *        pointers on arrays[9] */
83 #define OP3(EXPR)                               \
84   do                                            \
85   {                                             \
86     EXPR;                                       \
87     EXPR;                                       \
88     EXPR;                                       \
89   } while(0)                                    \
90 
91 /** SET3X3 : set pointers on a 3x3 matrix a (*a00 *a01 *a10 etc.)
92  * warning the pointer a is modified (use a00 instead) and is ready
93  * for a next SET3X3
94  */
95 #define SET3X3(V)                                                       \
96   double* V##00 MAYBE_UNUSED = V++;                                     \
97   double* V##10 MAYBE_UNUSED = V++;                                     \
98   double* V##20 MAYBE_UNUSED = V++;                                     \
99   double* V##01 MAYBE_UNUSED = V++;                                     \
100   double* V##11 MAYBE_UNUSED = V++;                                     \
101   double* V##21 MAYBE_UNUSED = V++;                                     \
102   double* V##02 MAYBE_UNUSED = V++;                                     \
103   double* V##12 MAYBE_UNUSED = V++;                                     \
104   double* V##22 MAYBE_UNUSED = V++;
105 #define SET3X3MAYBE(V)                                       \
106   double* V##00 MAYBE_UNUSED = 0;                            \
107   double* V##10 MAYBE_UNUSED = 0;                            \
108   double* V##20 MAYBE_UNUSED = 0;                            \
109   double* V##01 MAYBE_UNUSED = 0;                            \
110   double* V##11 MAYBE_UNUSED = 0;                            \
111   double* V##21 MAYBE_UNUSED = 0;                            \
112   double* V##02 MAYBE_UNUSED = 0;                            \
113   double* V##12 MAYBE_UNUSED = 0;                            \
114   double* V##22 MAYBE_UNUSED = 0;                            \
115   if (V)                                        \
116   {                                             \
117     V##00 = V++;                                \
118     V##10 = V++;                                \
119     V##20 = V++;                                \
120     V##01 = V++;                                \
121     V##11 = V++;                                \
122     V##21 = V++;                                \
123     V##02 = V++;                                \
124     V##12 = V++;                                \
125     V##22 = V++;                                \
126   }
127 
128 /** SET3 : set pointers on a vector3 v (*v0 *v1 *v2)
129  * Warning: the pointer v is modified and is ready for a next SET3
130  * use *v0 if you need *v
131  */
132 #define SET3(V)                                 \
133   double* V##0 MAYBE_UNUSED = V++;                           \
134   double* V##1 MAYBE_UNUSED = V++;                           \
135   double* V##2 MAYBE_UNUSED = V++;
136 
137 /** SET3MAYBE : set pointers on a vector3 v (*v0 *v1 *v2) only if v is
138  * non null.
139  * Warning: the pointer v is modified and is ready for a next SET3
140  * use *v0 if you need *v
141  */
142 #define SET3MAYBE(V)                                 \
143   double* V##0 MAYBE_UNUSED = 0;                                  \
144   double* V##1 MAYBE_UNUSED = 0;                                  \
145   double* V##2 MAYBE_UNUSED = 0;                                  \
146   if (V)                                             \
147   {                                                  \
148     V##0 = V++;                                      \
149     V##1 = V++;                                      \
150     V##2 = V++;                                      \
151   }
152 
153 /** copy a 3x3 matrix or a vector[9]
154  * \param[in] a  a[9]
155  * \param[out] b  b[9]*/
cpy3x3(double * restrict a,double * restrict b)156 static inline void cpy3x3(double* restrict a, double* restrict b)
157 {
158   OP3X3(*b++ = *a++);
159 }
160 
161 /** add a 3x3 matrix or a vector[9]
162  *\param[in] a a[9]
163  *\param[in,out]  b b[9]*/
add3x3(double a[9],double b[9])164 static inline void add3x3(double a[9], double b[9])
165 {
166   OP3X3(*b++ += *a++);
167 }
168 
169 /** sub a 3x3 matrix or a vector[9]
170  *\param[in] a a[9]
171  *\param[in,out] b b[9]*/
sub3x3(double a[9],double b[9])172 static inline void sub3x3(double a[9], double b[9])
173 {
174   OP3X3(*b++ -= *a++);
175 }
176 
177 /** copy a vector[3]
178  * \param[in] a a[3]
179  * \param[out] b b[3]
180  */
cpy3(double a[3],double b[3])181 static inline void cpy3(double a[3], double b[3])
182 {
183   OP3(*b++ = *a++);
184 }
185 
186 /** add a vector[3]
187  * \param[in] a a[3]
188  * \param[in,out] b b[3]*/
add3(double a[3],double b[3])189 static inline void add3(double a[3], double b[3])
190 {
191   OP3(*b++ += *a++);
192 }
193 
194 /** sub a vector[3]
195  * \param[in] a a[3]
196  * \param[in,out] b  b[3]
197  */
sub3(double a[3],double b[3])198 static inline void sub3(double a[3], double b[3])
199 {
200   OP3(*b++ -= *a++);
201 }
202 
203 /** scalar multiplication of a matrix3x3
204  * \param[in] scal double scalar
205  * \param[in,out] m  m[9]
206  */
scal3x3(double scal,double m[9])207 static inline void scal3x3(double scal, double m[9])
208 {
209   OP3X3(*m++ *= scal);
210 }
211 
212 /** diagonal scaling of a vector
213  * \param[in] scal_coeffs diagonal part of a matrix
214  * \param[in,out] v a 3D vector
215  */
diag_scal3(double * restrict scal_coeffs,double * restrict v)216 static inline void diag_scal3(double* restrict scal_coeffs, double* restrict v)
217 {
218   OP3(*v++ *= *scal_coeffs++);
219 }
220 
221 /** scalar multiplication of a vector3
222  * \param[in] scal double scalar
223  * \param[in,out] v v[3]
224  */
scal3(double scal,double * v)225 static inline void scal3(double scal, double* v)
226 {
227   OP3(*v++ *= scal);
228 }
229 
230 
231 /** copy & transpose a matrix
232  * \param[in] a *a
233  * \param[out] b transpose(*a)
234  */
cpytr3x3(double * restrict a,double * restrict b)235 static inline void cpytr3x3(double* restrict a, double* restrict b)
236 {
237   SET3X3(a);
238   SET3X3(b);
239   *b00 = *a00;
240   *b10 = *a01;
241   *b20 = *a02;
242   *b01 = *a10;
243   *b11 = *a11;
244   *b21 = *a12;
245   *b02 = *a20;
246   *b12 = *a21;
247   *b22 = *a22;
248 }
249 
250 /** matrix vector multiplication
251  * \param[in] a 3 by 3 matrix in col-major
252  * \param[in] v 3 dimensional vector
253  * \param[out] r \f$r = a*v\f$
254  */
mv3x3(double * restrict a,double * restrict v,double * restrict r)255 static inline void mv3x3(double* restrict a, double* restrict v, double* restrict r)
256 {
257 
258   double* pr;
259 
260   pr = r;
261 
262   *pr++ = *a++ * *v;
263   *pr++ = *a++ * *v;
264   *pr++ = *a++ * *v++;
265 
266   pr = r;
267 
268   *pr++ += *a++ * *v;
269   *pr++ += *a++ * *v;
270   *pr++ += *a++ * *v++;
271 
272   pr = r;
273 
274   *pr++ += *a++ * *v;
275   *pr++ += *a++ * *v;
276   *pr++ += *a++ * *v++;
277 }
278 
279 /** transpose(matrix) vector multiplication
280  * \param[in] a 3 by 3 matrix in col-major
281  * \param[in] v 3 dimensional vector
282  * \param[out] r \f$r = a^T*v\f$
283  */
mtv3x3(double * restrict a,double * restrict v,double * restrict r)284 static inline void mtv3x3(double* restrict a, double* restrict v, double* restrict r)
285 {
286 
287   double* pv = v;
288 
289   *r = *a++ * *pv++;
290   *r += *a++ * *pv++;
291   *r++ += *a++ * *pv;
292 
293   pv = v;
294 
295   *r = *a++ * *pv++;
296   *r += *a++ * *pv++;
297   *r++ += *a++ * *pv;
298 
299   pv = v;
300 
301   *r = *a++ * *pv++;
302   *r += *a++ * *pv++;
303   *r += *a * *pv;
304 }
305 
306 /** add a matrix vector multiplication
307  * \param[in] a a[9]
308  * \param[in] v v[3]
309  * \param[out] r r[3] the result of r += av
310  */
mvp3x3(const double * restrict a,const double * restrict v,double * restrict r)311 static inline void mvp3x3(const double* restrict a, const double* restrict v, double* restrict r)
312 {
313 
314   double* pr;
315 
316   pr = r;
317 
318   *pr++ += *a++ * *v;
319   *pr++ += *a++ * *v;
320   *pr++ += *a++ * *v++;
321 
322   pr = r;
323 
324   *pr++ += *a++ * *v;
325   *pr++ += *a++ * *v;
326   *pr++ += *a++ * *v++;
327 
328   pr = r;
329 
330   *pr++ += *a++ * *v;
331   *pr++ += *a++ * *v;
332   *pr++ += *a++ * *v++;
333 }
334 
mvp5x5(const double * restrict a,const double * restrict v,double * restrict r)335 static inline void mvp5x5(const double* restrict a, const double* restrict v, double* restrict r)
336 {
337 
338   double* pr;
339 
340   pr = r;
341 
342   *pr++ += *a++ * *v;
343   *pr++ += *a++ * *v;
344   *pr++ += *a++ * *v;
345   *pr++ += *a++ * *v;
346   *pr++ += *a++ * *v++;
347 
348   pr = r;
349 
350   *pr++ += *a++ * *v;
351   *pr++ += *a++ * *v;
352   *pr++ += *a++ * *v;
353   *pr++ += *a++ * *v;
354   *pr++ += *a++ * *v++;
355 
356   pr = r;
357 
358   *pr++ += *a++ * *v;
359   *pr++ += *a++ * *v;
360   *pr++ += *a++ * *v;
361   *pr++ += *a++ * *v;
362   *pr++ += *a++ * *v++;
363 
364   pr = r;
365 
366   *pr++ += *a++ * *v;
367   *pr++ += *a++ * *v;
368   *pr++ += *a++ * *v;
369   *pr++ += *a++ * *v;
370   *pr++ += *a++ * *v++;
371 
372   pr = r;
373 
374   *pr++ += *a++ * *v;
375   *pr++ += *a++ * *v;
376   *pr++ += *a++ * *v;
377   *pr++ += *a++ * *v;
378   *pr++ += *a++ * *v++;
379 
380 
381 }
382 
383 /** add a matrix vector multiplication scaled by alpha
384  * \param[in] alpha scalar coeff
385  * \param[in] a a[9]
386  * \param[in] v v[3]
387  * \param[out] r r[3] the result of r += av
388  */
mvp_alpha3x3(double alpha,const double * restrict a,const double * restrict v,double * restrict r)389 static inline void mvp_alpha3x3(double alpha, const double* restrict a, const double* restrict v, double* restrict r)
390 {
391 
392   double* pr;
393 
394   pr = r;
395 
396   *pr++ += alpha * *a++ * *v;
397   *pr++ += alpha * *a++ * *v;
398   *pr++ += alpha * *a++ * *v++;
399 
400   pr = r;
401 
402   *pr++ += alpha *  *a++ * *v;
403   *pr++ += alpha *  *a++ * *v;
404   *pr++ += alpha *  *a++ * *v++;
405 
406   pr = r;
407 
408   *pr++ += alpha *  *a++ * *v;
409   *pr++ += alpha *  *a++ * *v;
410   *pr++ += alpha *  *a++ * *v++;
411 }
412 /** minux the result a matrix vector multiplication
413  * \param[in] a matrix
414  * \param[in] v vector
415  * \param[out] r the result of r -= av
416  */
mvm3x3(double * restrict a,double * restrict v,double * restrict r)417 static inline void mvm3x3(double* restrict a, double* restrict v, double* restrict r)
418 {
419 
420   double* pr;
421 
422   pr = r;
423 
424   *pr++ -= *a++ * *v;
425   *pr++ -= *a++ * *v;
426   *pr++ -= *a++ * *v++;
427 
428   pr = r;
429 
430   *pr++ -= *a++ * *v;
431   *pr++ -= *a++ * *v;
432   *pr++ -= *a++ * *v++;
433 
434   pr = r;
435 
436   *pr++ -= *a++ * *v;
437   *pr++ -= *a++ * *v;
438   *pr++ -= *a++ * *v++;
439 }
440 
441 
442 /** transpose(matrix) vector multiplication
443  * \param[in] a 3 by 3 matrix in col-major
444  * \param[in] v 3 dimensional vector
445  * \param[out] r \f$r = a^T*v\f$
446  */
mtvm3x3(double * restrict a,double * restrict v,double * restrict r)447 static inline void mtvm3x3(double* restrict a, double* restrict v, double* restrict r)
448 {
449 
450   double* pv = v;
451 
452   *r -= *a++ * *pv++;
453   *r -= *a++ * *pv++;
454   *r++ -= *a++ * *pv;
455 
456   pv = v;
457 
458   *r -= *a++ * *pv++;
459   *r -= *a++ * *pv++;
460   *r++ -= *a++ * *pv;
461 
462   pv = v;
463 
464   *r -= *a++ * *pv++;
465   *r -= *a++ * *pv++;
466   *r -= *a * *pv;
467 }
468 /** matrix matrix multiplication : c = a * b
469  * \param[in] a  a[9]
470  * \param[in] b b[9]
471  * \param[out] c c[9]
472  */
mm3x3(double * restrict a,double * restrict b,double * restrict c)473 static inline void mm3x3(double* restrict a, double* restrict b, double* restrict c)
474 {
475 
476   SET3X3(a);
477   SET3X3(b);
478   SET3X3(c);
479 
480   *c00 = *a00 * *b00 + *a01 * *b10 + *a02 * *b20;
481   *c01 = *a00 * *b01 + *a01 * *b11 + *a02 * *b21;
482   *c02 = *a00 * *b02 + *a01 * *b12 + *a02 * *b22;
483 
484   *c10 = *a10 * *b00 + *a11 * *b10 + *a12 * *b20;
485   *c11 = *a10 * *b01 + *a11 * *b11 + *a12 * *b21;
486   *c12 = *a10 * *b02 + *a11 * *b12 + *a12 * *b22;
487 
488   *c20 = *a20 * *b00 + *a21 * *b10 + *a22 * *b20;
489   *c21 = *a20 * *b01 + *a21 * *b11 + *a22 * *b21;
490   *c22 = *a20 * *b02 + *a21 * *b12 + *a22 * *b22;
491 
492 }
493 
494 /** add a matrix matrix multiplication : c += a*b
495  * \param[in] a a[9]
496  * \param[in] b b[9]
497  * \param[out] c c[9]
498  */
mmp3x3(double * restrict a,double * restrict b,double * restrict c)499 static inline void mmp3x3(double* restrict a, double* restrict b, double* restrict c)
500 {
501 
502   SET3X3(a);
503   SET3X3(b);
504   SET3X3(c);
505 
506   *c00 += *a00 * *b00 + *a01 * *b10 + *a02 * *b20;
507   *c01 += *a00 * *b01 + *a01 * *b11 + *a02 * *b21;
508   *c02 += *a00 * *b02 + *a01 * *b12 + *a02 * *b22;
509 
510   *c10 += *a10 * *b00 + *a11 * *b10 + *a12 * *b20;
511   *c11 += *a10 * *b01 + *a11 * *b11 + *a12 * *b21;
512   *c12 += *a10 * *b02 + *a11 * *b12 + *a12 * *b22;
513 
514   *c20 += *a20 * *b00 + *a21 * *b10 + *a22 * *b20;
515   *c21 += *a20 * *b01 + *a21 * *b11 + *a22 * *b21;
516   *c22 += *a20 * *b02 + *a21 * *b12 + *a22 * *b22;
517 
518 }
519 
520 /** sub a matrix matrix multiplication : c -= a*b
521  * \param[in] a a[9]
522  * \param[in] b b[9]
523  * \param[out] c c[9]
524  */
mmm3x3(double * restrict a,double * restrict b,double * restrict c)525 static inline void mmm3x3(double* restrict a, double* restrict b, double* restrict c)
526 {
527 
528   SET3X3(a);
529   SET3X3(b);
530   SET3X3(c);
531 
532   *c00 -= *a00 * *b00 + *a01 * *b10 + *a02 * *b20;
533   *c01 -= *a00 * *b01 + *a01 * *b11 + *a02 * *b21;
534   *c02 -= *a00 * *b02 + *a01 * *b12 + *a02 * *b22;
535 
536   *c10 -= *a10 * *b00 + *a11 * *b10 + *a12 * *b20;
537   *c11 -= *a10 * *b01 + *a11 * *b11 + *a12 * *b21;
538   *c12 -= *a10 * *b02 + *a11 * *b12 + *a12 * *b22;
539 
540   *c20 -= *a20 * *b00 + *a21 * *b10 + *a22 * *b20;
541   *c21 -= *a20 * *b01 + *a21 * *b11 + *a22 * *b21;
542   *c22 -= *a20 * *b02 + *a21 * *b12 + *a22 * *b22;
543 
544 }
545 
546 /** determinant
547  * \param[in] a double* a
548  * \return the value of the determinant
549  */
det3x3(double * a)550 static inline double det3x3(double* a)
551 {
552   SET3X3(a);
553 
554   return
555     *a00 * *a11 * *a22 + *a01 * *a12 * *a20 + *a02 * *a10 * *a21 -
556     *a00 * *a12 * *a21 - *a01 * *a10 * *a22 - *a02 * *a11 * *a20;
557 }
558 
559 
560 /** system resolution : x <- sol(Ax = b)
561  * \param[in] a double a[9]
562  * \param[out] x double x[3]
563  * \param[in] b double b[3]
564  * \return 0 if success, 1 if failed
565  */
566 WARN_RESULT_IGNORED
solv3x3(double * restrict a,double * restrict x,double * restrict b)567 static inline int solv3x3(double* restrict a, double* restrict x, double* restrict b)
568 {
569 
570   SET3X3(a);
571   double* b0 = b++;
572   double* b1 = b++;
573   double* b2 = b;
574 
575   double det =
576     *a00 * *a11 * *a22 + *a01 * *a12 * *a20 + *a02 * *a10 * *a21 -
577     *a00 * *a12 * *a21 - *a01 * *a10 * *a22 - *a02 * *a11 * *a20;
578 
579   if (fabs(det) > DBL_EPSILON)
580   {
581     double idet = 1.0 / det;
582     *x++ = idet * (*a01 * *a12 * *b2 +  *a02 * *a21 * *b1 +
583                    *a11 * *a22 * *b0 -  *a01 * *a22 * *b1 -
584                    *a02 * *a11 * *b2 -  *a12 * *a21 * *b0);
585     *x++ = idet * (*a00 * *a22 * *b1 +  *a02 * *a10 * *b2 +
586                    *a12 * *a20 * *b0 -  *a00 * *a12 * *b2 -
587                    *a02 * *a20 * *b1 -  *a10 * *a22 * *b0);
588     *x   = idet * (*a00 * *a11 * *b2 +  *a01 * *a20 * *b1 +
589                    *a10 * *a21 * *b0 -  *a00 * *a21 * *b1 -
590                    *a01 * *a10 * *b2 -  *a11 * *a20 * *b0);
591   }
592   else
593   {
594     *x++ = NAN;
595     *x++ = NAN;
596     *x   = NAN;
597     return 1;
598   }
599   return 0;
600 }
601 
602 /** check equality : a[9] == b[9]
603  * \param[in] a double a[9]
604  * \param[in] b double b[9]
605  * \return 1 if true, 0 if false
606  */
equal3x3(double * restrict a,double * restrict b)607 static inline int equal3x3(double* restrict a, double* restrict b)
608 {
609   return *a++ == *b++ &&
610          *a++ == *b++ &&
611          *a++ == *b++ &&
612          *a++ == *b++ &&
613          *a++ == *b++ &&
614          *a++ == *b++ &&
615          *a++ == *b++ &&
616          *a++ == *b++ &&
617          *a == *b;
618 }
619 
620 /** check equality : a[3] == b[3]
621  * \param[in] a double a[3]
622  * \param[in] b double b[3]
623  * \return 1 if true, 0 if false
624  */
equal3(double * restrict a,double * restrict b)625 static inline int equal3(double* restrict a, double* restrict b)
626 {
627   return *a++ == *b++ &&
628          *a++ == *b++ &&
629          *a == *b;
630 }
631 
632 /** scalar product : c <- a.b
633  * \param[in] a double a[3]
634  * \param[in] b double b[3]
635  * \return the scalar product
636  */
dot3(double * restrict a,double * restrict b)637 static inline double dot3(double* restrict a, double* restrict b)
638 {
639   double r;
640   r = *a++ * * b++;
641   r += *a++ * * b++;
642   r += *a * *b;
643   return r;
644 }
645 
646 /** cross product : c <- a x b
647  * \param[in] a double a[3]
648  * \param[in] b double b[3]
649  * \param[out] c double c[3]
650  */
cross3(double * restrict a,double * restrict b,double * restrict c)651 static inline void cross3(double* restrict a, double* restrict b, double* restrict c)
652 {
653   double* a0 = a++;
654   double* a1 = a++;
655   double* a2 = a;
656   double* b0 = b++;
657   double* b1 = b++;
658   double* b2 = b;
659 
660   *c++ = *a1 * *b2 - *a2 * *b1;
661   *c++ = *a2 * *b0 - *a0 * *b2;
662   *c   = *a0 * *b1 - *a1 * *b0;
663 }
664 
665 /** norm : || a ||
666  *  may underflow & overflow
667  * \param[in] a a[2]
668  * \return the norm
669  */
hypot2(double * a)670 static inline double hypot2(double* a)
671 {
672   double r;
673 
674   r = *a * *a;
675   a++;
676   r += *a * *a;
677   return sqrt(r);
678 }
679 
680 
681 /** norm : || a ||
682  *  may underflow & overflow
683  * \param[in] a a[3]
684  * \return the norm
685  */
hypot3(double * a)686 static inline double hypot3(double* a)
687 {
688   double r;
689 
690   r = *a * *a;
691   a++;
692   r += *a * *a;
693   a++;
694   r += *a * *a;
695   return sqrt(r);
696 }
hypot5(double * a)697 static inline double hypot5(double* a)
698 {
699   double r;
700 
701   r = *a * *a;
702   a++;
703   r += *a * *a;
704   a++;
705   r += *a * *a;
706   a++;
707   r += *a * *a;
708   a++;
709   r += *a * *a;
710 
711   return sqrt(r);
712 }
hypot9(double * a)713 static inline double hypot9(double* a)
714 {
715   double r;
716 
717   r = *a * *a;
718   a++;
719   r += *a * *a;
720   a++;
721   r += *a * *a;
722   a++;
723   r += *a * *a;
724   a++;
725   r += *a * *a;
726   a++;
727   r += *a * *a;
728   a++;
729   r += *a * *a;
730   a++;
731   r += *a * *a;
732   a++;
733   r += *a * *a;
734 
735   return sqrt(r);
736 }
737 
738 
739 /** extract3x3 : copy a sub 3x3 matrix of *a into *b */
740 /* \param[in] n row numbers of matrix a
741  * \param[in] i0 row of first element
742  * \param[in] j0 column of first element
743  * \param[in] a a[n,n] matrix
744  * \param[out] b b[9] a 3x3 matrix */
extract3x3(int n,int i0,int j0,double * restrict a,double * restrict b)745 static inline void extract3x3(int n, int i0, int j0, double* restrict a, double* restrict b)
746 {
747 
748   int k0 = i0 + n * j0;
749 
750   int nm3 = n - 3;
751 
752   a += k0;
753 
754   *b++ = *a++;
755   *b++ = *a++;
756   *b++ = *a++;
757   a += nm3;
758   *b++ = *a++;
759   *b++ = *a++;
760   *b++ = *a++;
761   a += nm3;
762   *b++ = *a++;
763   *b++ = *a++;
764   *b   = *a;
765 }
766 
767 /** insert3x3 : insert a 3x3 matrix *b into *a */
768 /* \param[in] n row numbers of matrix a
769  * \param[in] i0 row of first element
770  * \param[in] j0 column of first element
771  * \param[in,out] a  a[n,n] matrix
772  * \param[in] b b[9] a 3x3 matrix */
insert3x3(int n,int i0,int j0,double * restrict a,double * restrict b)773 static inline void insert3x3(int n, int i0, int j0, double* restrict a, double* restrict b)
774 {
775 
776   int k0 = i0 + n * j0;
777 
778   int nm3 = n - 3;
779 
780   a += k0;
781 
782   *a++ = *b++;
783   *a++ = *b++;
784   *a++ = *b++;
785   a += nm3;
786   *a++ = *b++;
787   *a++ = *b++;
788   *a++ = *b++;
789   a += nm3;
790   *a++ = *b++;
791   *a++ = *b++;
792   *a = *b;
793 }
794 
795 /** print a matrix
796  * \param mat the matrix
797  */
798 void print3x3(double* mat);
799 
800 
801 /** print a vector
802  * \param[in] v the vector
803  */
804 void print3(double* v);
805 
806 /** orthoBaseFromVector : From a vector A, build a matrix (A,A1,A2) such that it is an
807  * orthonormal.
808  * \param[in,out] Ax first component of the vector A
809  * \param[in,out] Ay second component of the vector A
810  * \param[in,out] Az third component of the vector A
811  * \param[out] A1x first component of the vector A
812  * \param[out] A1y second component of the vector A
813  * \param[out] A1z third component of the vector A
814  * \param[out] A2x first component of the vector A
815  * \param[out] A2y second component of the vector A
816  * \param[out] A2z third component of the vector A
817  *
818  * \return 0 if success, 1 if there is a problem
819 */
820 WARN_RESULT_IGNORED
orthoBaseFromVector(double * Ax,double * Ay,double * Az,double * A1x,double * A1y,double * A1z,double * A2x,double * A2y,double * A2z)821 static inline int orthoBaseFromVector(double *Ax, double *Ay, double *Az,
822                                       double *A1x, double *A1y, double *A1z,
823                                       double *A2x, double *A2y, double *A2z)
824 {
825 
826   double normA = sqrt((*Ax) * (*Ax) + (*Ay) * (*Ay) + (*Az) * (*Az));
827   if (normA == 0.)
828   {
829     (*Ax) = NAN;
830     (*Ay) = NAN;
831     (*Az) = NAN;
832     (*A1x) = NAN;
833     (*A1y) = NAN;
834     (*A1z) = NAN;
835     (*A2x) = NAN;
836     (*A2y) = NAN;
837     (*A2z) = NAN;
838     return 1;
839   }
840   (*Ax) /= normA;
841   (*Ay) /= normA;
842   (*Az) /= normA;
843   /*build (*A1*/
844   if (fabs(*Ax) > fabs(*Ay))
845   {
846     if (fabs((*Ax)) > fabs((*Az))) /*(*Ax is the bigest*/
847     {
848       (*A1x) = (*Ay);
849       (*A1y) = -(*Ax);
850       (*A1z) = 0;
851     }
852     else /*(*Az is the biggest*/
853     {
854       (*A1z) = (*Ax);
855       (*A1y) = -(*Az);
856       (*A1x) = 0;
857     }
858   }
859   else if (fabs(*Ay) > fabs(*Az)) /*(*Ay is the bigest*/
860   {
861     (*A1y) = (*Ax);
862     (*A1x) = -(*Ay);
863     (*A1z) = 0;
864   }
865   else /*(*Az is the biggest*/
866   {
867     (*A1z) = (*Ax);
868     (*A1y) = -(*Az);
869     (*A1x) = 0;
870   }
871   double normA1 = sqrt((*A1x) * (*A1x) + (*A1y) * (*A1y) + (*A1z) * (*A1z));
872   (*A1x) /= normA1;
873   (*A1y) /= normA1;
874   (*A1z) /= normA1;
875 
876   (*A2x) = *Ay * *A1z - *Az * *A1y;
877   (*A2y) = *Az * *A1x - *Ax * *A1z;
878   (*A2z) = *Ax * *A1y - *Ay * *A1x;
879   return 0;
880 }
881 
882 
883 /** solve Ax = b by partial pivoting Gaussian elimination. This function is 10
884  * to 20 times faster than calling LAPACK (tested with netlib).
885  *
886  * \param a column-major matrix (not modified)
887  * \param[in,out] b on input, the right-hand side; on output the solution x
888  *
889  * \return 0 if ok, otherwise the column where no pivot could be selected
890  */
891 WARN_RESULT_IGNORED
solve_3x3_gepp(const double * restrict a,double * restrict b)892 static inline int solve_3x3_gepp(const double* restrict a, double* restrict b)
893 {
894   double lp0, lp1, lp2, lm1, lm2, ln1, ln2;
895   double bl, bm, bn;
896   double factor1, factor2;
897   double sol0, sol1, sol2;
898   double alp0;
899   double a0 = a[0];
900   double a1 = a[1];
901   double a2 = a[2];
902   double aa0 = fabs(a0);
903   double aa1 = fabs(a1);
904   double aa2 = fabs(a2);
905   /* do partial search of pivot */
906   int pivot2, pivot1 = aa0 >= aa1 ? aa0 >= aa2 ? 0 : 20 : aa1 >= aa2 ? 10 : 20;
907   int info = 0;
908 
909   /* We are going to put the system in the form
910    * | lp0 lp1 lp2 ; bl |
911    * |  0  lm1 lm2 ; bm |
912    * |  0  ln1 ln2 ; bn |
913    */
914   switch (pivot1)
915   {
916     case 0: /* first element is pivot, first line does not change */
917       factor1 = a1/a0;
918       factor2 = a2/a0;
919       lp0 = a0;
920       alp0 = fabs(a0);
921       lp1 = a[3];
922       lp2 = a[6];
923       lm1 = a[4] - factor1*lp1;
924       lm2 = a[7] - factor1*lp2;
925       ln1 = a[5] - factor2*lp1;
926       ln2 = a[8] - factor2*lp2;
927       bl = b[0];
928       bm = b[1] - factor1*bl;
929       bn = b[2] - factor2*bl;
930       break;
931     case 10: /* first element is pivot, first line does not change */
932       factor1 = a0/a1;
933       factor2 = a2/a1;
934       lp0 = a1;
935       alp0 = fabs(a1);
936       lp1 = a[4];
937       lp2 = a[7];
938       lm1 = a[3] - factor1*lp1;
939       lm2 = a[6] - factor1*lp2;
940       ln1 = a[5] - factor2*lp1;
941       ln2 = a[8] - factor2*lp2;
942       bl = b[1];
943       bm = b[0] - factor1*bl;
944       bn = b[2] - factor2*bl;
945       break;
946     case 20: /* first element is pivot, first line does not change */
947       factor1 = a0/a2;
948       factor2 = a1/a2;
949       lp0 = a2;
950       alp0 = fabs(a2);
951       lp1 = a[5];
952       lp2 = a[8];
953       lm1 = a[3] - factor1*lp1;
954       lm2 = a[6] - factor1*lp2;
955       ln1 = a[4] - factor2*lp1;
956       ln2 = a[7] - factor2*lp2;
957       bl = b[2];
958       bm = b[0] - factor1*bl;
959       bn = b[1] - factor2*bl;
960       break;
961     default:
962       exit(EXIT_FAILURE);
963   }
964 
965   if (alp0 <= DBL_EPSILON)
966   {
967     info = 1;
968     return info;
969   }
970 
971   double alm1 = fabs(lm1);
972   double aln1 = fabs(ln1);
973   pivot2 = alm1 >= aln1 ? 0 : 1;
974 
975   /* We now solve the system
976    * | lm1 lm2 ; bm |
977    * | ln1 ln2 ; bn |
978    */
979   switch (pivot2)
980   {
981     case 0:
982       if (alm1 < DBL_EPSILON)
983       {
984         info = 1;
985         return info;
986       }
987       factor1 = ln1/lm1;
988       sol2 = (bn - factor1*bm)/(ln2 - factor1*lm2);
989       sol1 = (bm - lm2*sol2)/lm1;
990       break;
991     case 1:
992       if (aln1 < DBL_EPSILON)
993       {
994         info = 1;
995         return info;
996       }
997       factor1 = lm1/ln1;
998       sol2 = (bm - factor1*bn)/(lm2 - factor1*ln2);
999       sol1 = (bn - ln2*sol2)/ln1;
1000       break;
1001     default:
1002       exit(EXIT_FAILURE);
1003   }
1004   sol0 = (bl - sol1*lp1 - sol2*lp2)/lp0;
1005 
1006   b[0] = sol0;
1007   b[1] = sol1;
1008   b[2] = sol2;
1009 
1010   return info;
1011 }
1012 
1013 
1014 #define mat_elem(a, y, x, n) (a + ((y) * (n) + (x)))
1015 
swap_row(double * a,double * b,int r1,int r2,int n)1016 static void swap_row(double *a, double *b, int r1, int r2, int n)
1017 {
1018 	double tmp, *p1, *p2;
1019 	int i;
1020 
1021 	if (r1 == r2) return;
1022 	for (i = 0; i < n; i++) {
1023 		p1 = mat_elem(a, r1, i, n);
1024 		p2 = mat_elem(a, r2, i, n);
1025 		tmp = *p1, *p1 = *p2, *p2 = tmp;
1026 	}
1027 	tmp = b[r1], b[r1] = b[r2], b[r2] = tmp;
1028 }
1029 
solve_nxn_gepp(int n,double * a,double * b,double * x)1030 static inline  void solve_nxn_gepp(int n, double *a, double *b, double *x)
1031 {
1032 #define A(y, x) (*mat_elem(a, y, x, n))
1033 	int  j, col, row, max_row,dia;
1034 	double max, tmp;
1035 
1036 	for (dia = 0; dia < n; dia++) {
1037 		max_row = dia, max = A(dia, dia);
1038 
1039 		for (row = dia + 1; row < n; row++)
1040 			if ((tmp = fabs(A(row, dia))) > max)
1041 				max_row = row, max = tmp;
1042 
1043 		swap_row(a, b, dia, max_row, n);
1044 
1045 		for (row = dia + 1; row < n; row++) {
1046 			tmp = A(row, dia) / A(dia, dia);
1047 			for (col = dia+1; col < n; col++)
1048 				A(row, col) -= tmp * A(dia, col);
1049 			A(row, dia) = 0;
1050 			b[row] -= tmp * b[dia];
1051 		}
1052 	}
1053 	for (row = n - 1; row >= 0; row--) {
1054 		tmp = b[row];
1055 		for (j = n - 1; j > row; j--)
1056 			tmp -= x[j] * A(row, j);
1057 		x[row] = tmp / A(row, row);
1058 	}
1059 #undef A
1060 }
1061 
1062 /** Computation of the eigenvalues of a symmetric 3x3 real matrix
1063  *
1064  * \param a symmetric column-major matrix (not modified)
1065  * \param b a 3x3 work matrix
1066  * \param[in,out] eig eigenvalie in decreasing order
1067  *
1068  * \return 0 all the time
1069  */
1070 WARN_RESULT_IGNORED
eig_3x3(double * restrict a,double * restrict b,double * restrict eig)1071 static inline int eig_3x3(double* restrict a, double* restrict b, double* restrict eig)
1072 {
1073   SET3X3(a);
1074   SET3X3(b);
1075   double pi = M_PI;
1076   double p1 =  *a01* *a01 +  *a02* *a02 +  *a12* *a12;
1077   if (p1 == 0)
1078   {
1079     /* A is diagonal. */
1080     eig[0] =  *a00;
1081     eig[1] =  *a11;
1082     eig[2] =  *a22;
1083   }
1084   else
1085   {
1086     double q = ( *a00+ *a11+ *a22)/3.0;
1087     double  p2 = ( *a00 - q)*( *a00 - q) + ( *a11 - q)*( *a11 - q) + ( *a22 - q)*( *a22 - q) + 2 * p1;
1088     double p = sqrt(p2 / 6.0);
1089     *b00 = (1 / p) * ( *a00 - q);
1090     *b11 = (1 / p) * ( *a11 - q);
1091     *b22 = (1 / p) * ( *a22 - q);
1092     *b01 = (1 / p) * ( *a01);
1093     *b02 = (1 / p) * ( *a02);
1094     *b10 = (1 / p) * ( *a10);
1095     *b12 = (1 / p) * ( *a12);
1096     *b20 = (1 / p) * ( *a20);
1097     *b21 = (1 / p) * ( *a21);
1098     //B = (1 / p) * ( *A - q * I)       % I is the identity matrix
1099     double det =
1100       *b00 * *b11 * *b22 + *b01 * *b12 * *b20 + *b02 * *b10 * *b21 -
1101       *b00 * *b12 * *b21 - *b01 * *b10 * *b22 - *b02 * *b11 * *b20;
1102     double r = det/2.0;
1103 
1104     /* % In exact arithmetic for a symmetric matrix  -1 <= r <= 1 */
1105     /* % but computation error can leave it slightly outside this range. */
1106     double phi;
1107     if (r <= -1)
1108       phi = pi / 3.0;
1109     else if (r >= 1)
1110       phi = 0.0;
1111     else
1112       phi = acos(r) / 3;
1113 
1114    /* % the eigenvalues satisfy eig3 <= eig2 <= eig1 */
1115     eig[0] = q + 2 * p * cos(phi);
1116     eig[2] = q + 2 * p * cos(phi + (2*pi/3));
1117     eig[1] = 3 * q - eig[0] - eig[2];    /* % since trace(A) = eig1 + eig2 + eig3; */
1118   }
1119 
1120   return 0;
1121 }
1122 
1123 
1124 
1125 #endif
1126