1 #ifndef _ALBERT_INLINES_H_
2 #define _ALBERT_INLINES_H_
3 
4 /*--------------------------------------------------------------------------*/
5 /* ALBERT:   an Adaptive multi Level finite element toolbox using           */
6 /*           Bisectioning refinement and Error control by Residual          */
7 /*           Techniques                                                     */
8 /*                                                                          */
9 /* www.alberta-fem.de                                                       */
10 /*                                                                          */
11 /*--------------------------------------------------------------------------*/
12 /*                                                                          */
13 /* file: albert_inlines.h                                                   */
14 /*                                                                          */
15 /*                                                                          */
16 /* description: Blas-like inline functions for REAL_Ds and REAL_DDs,        */
17 /* REAL_Bs & friends.                                                       */
18 /*                                                                          */
19 /*--------------------------------------------------------------------------*/
20 /*                                                                          */
21 /*  authors:   Alfred Schmidt                                               */
22 /*             Zentrum fuer Technomathematik                                */
23 /*             Fachbereich 3 Mathematik/Informatik                          */
24 /*             Universitaet Bremen                                          */
25 /*             Bibliothekstr. 2                                             */
26 /*             D-28359 Bremen, Germany                                      */
27 /*                                                                          */
28 /*             Kunibert G. Siebert                                          */
29 /*             Istitut fuer Mathematik                                     */
30 /*             Universitaet Augsburg                                        */
31 /*             Universitaetsstr. 14                                         */
32 /*             D-86159 Augsburg, Germany                                    */
33 /*                                                                          */
34 /*             Claus-Justus Heine                                           */
35 /*             Abteilung fuer Angewandte Mathematik                         */
36 /*             Albert-Ludwigs-Universitaet Freiburg                         */
37 /*             Hermann-Herder-Str. 10                                       */
38 /*             D-79104 Freiburg im Breisgau, Germany                        */
39 /*                                                                          */
40 /*                                                                          */
41 /*  (c) by A. Schmidt, K.G. Siebert, C.-J. Heine (1996-2007)                */
42 /*                                                                          */
43 /*--------------------------------------------------------------------------*/
44 
45 #include "alberta.h" /* essentially a no-op when included from alberta.h */
46 
47 #ifndef DIM_OF_WORLD
48 # error Need to know the dimension of the World :)
49 #endif
50 
51 /* multiple invocations of macro-arguments can be harmful, if the macro
52  * argument is, e.g., a function-call.
53  *
54  * NOTE: as DIM_OF_WORLD is a constant, the C-compiler should unroll all
55  * loops when compiling with optimizations, so there should be no need
56  * for hand-unrolling, except in some simple 1D cases.
57  *
58  * Also, all modern compilers do function inlining, so the
59  * function-call  over-head is _not_ a problem.
60  *
61  * Note: the function may be nested, they return the address of the
62  * _modified_ operand. So AXPY(a, AX(b, x), y) is valid.
63  */
64 
65 /* In addition to BLAS-like routines for REAL_D vectors and REAL_DD
66  * matrices this file also defines the access to the per-element
67  * quadrature and geometry caches:
68  *
69  * fill_el_geom_cache()
70  * fill_quad_el_cache()
71  */
72 
73 /* The following functions are defined here:
74  *
75  *   AX(a, x)              --  x *= a (alias SCAL_DOW is also defined)
76  *   AXEY(a, x, y)         --  y  = a x
77  *   AXPBY(a, x, b, y, z)  --  z  = a x + by
78  *   AXPBYPCZ(a, x, b, y, c, z, w)  --  w  = a x + by + cz
79  *   AXPBYP(a, x, b, y, z) --  z += a x + by
80  *   AXPY(a, x, y)         --  y += a x
81  *   COPY(src, dst)        --  dst := src
82  *   DIST(x, y)            --  sqrt(DST2(x, y))
83  *   DST2(x, y)            --  SCP(x-y, x-y)
84  *   NRM2(x)               --  SCP(x, x)
85  *   NORM(x)               --  sqrt(NRM2(x))
86  *   NORM1(x)              --  fabs(x[0]) + ... + fabs(x[DOW-1])
87  *   DIST1(x, y)           --  NORM1(x-y)
88  *   PNORMP(x, p)          --  (pow(fabs(x[0]), p) + ... + pow(fabs(x[0]), p))
89  *   NORMP(x, p)           --  pow(PNORMP(x), 1.0/p)
90  *   NORM8(x)              --  max{fabs(x[0]), ..., fabs(x[DOW-1])} (8==infty)
91  *   DIST8(x, y)           --  NORM8(x-y)
92  *   SUM(x)                --  x[0] + ... + x[DOW-1]
93  *   MTV(m, v, b)          --  b += m^t v
94  *   MV(m, v, b)           --  b += m v   (m is a matrix)
95  *   MDIV(m, v, b)         --  scale v by the inverse of the diagonal -> b
96  *   SCP(x, y)             --  <x, y>
97  *   SET(val, x)           --  x[i] = val, i=1, ..., DOW
98  *   WEDGE(x, y, n)        --  n  = x /\ y             in 3D
99  *   WEDGE(x, y)           --  x0 * y1 - x1 * y0       in 2D
100  *
101  * The actual function named is generated by adding a _DOW() suffix.
102  *
103  * Prefix        Version
104  * none          REAL_D
105  * M             REAL_DD
106  * DM            diagonal matrix, diagonal stored in REAL_D vector
107  * SCM           scalar matrix, data type REAL (albert.h)
108  *
109  * Further:
110  * Macros EXPAND and FORMAT (with named pre- and suffixes) for easier
111  * print-out of REAL_D and REAL_DD, use like this:
112  *
113  * printf("text"MFORMAT_DOW"more text\n", MEXPAND_DOW(m));
114  *
115  * Some more functions for barycentric coordinates. NOTE: works best
116  * with constant dim. Use with care w.r.t. to optimization.
117  *
118  * SET_BAR(dim, a, x)
119  * SCAL_BAR(dim, a, x)
120  * SCP_BAR(dim, x, y)
121  * AXPY_BAR(dim, a, x, y)
122  * AXPBY_BAR(dim, a, x, b, y, z)
123  * COPY_BAR(dim, from, to)
124  * GRAD_DOW(dim, Lambda, b_grd, x_grd) -- conversion from barycentric
125  *      to cartesian gradients. The function computes x_grd = b_grd Lambda.
126  *
127  * Some of these functions are also available as matrix versions
128  * (e.g. MAXPY_BAR()).
129  *
130  * Further: to add matrices of higher symmetry to those of lower
131  * symmetry the following functions exist. The first prefix is alway
132  * the type of the destination matrix. Only the well defined functions
133  * are implemented. Grin. (Hint: the limiting factor is the structure
134  * of the destination matrix ...)
135  *
136  * {M,DM,SCM}{M,DM,SCM}AXPY_DOW(alpha, a, b)    b += alpha*a
137  * {M,DM,SCM}{M,DM,SCM}AXEY_DOW(alpha, a, b)    b  = alpha*a
138  * {M,DM,SCM}{M,DM,SCM}COPY_DOW(a, b)           b  = a
139  *
140  * Some more: DOW x DOW matrix multiplication etc.:
141  *
142  * MM_DOW(), MMT_DOW(), MTM_DOW(), MDET_DOW(), MINVERT_DOW()
143  *
144  */
145 
POW_DOW(REAL a)146 static inline REAL POW_DOW(REAL a)
147 {
148 #if DIM_OF_WORLD == 0
149   return 1.0;
150 #elif DIM_OF_WORLD == 1
151   return a;
152 #elif DIM_OF_WORLD == 2
153   return a*a;
154 #elif DIM_OF_WORLD == 3
155   return a*a*a;
156 #else
157   int i;
158   REAL res = a;
159 
160   for (i = 1; i < DIM_OF_WORLD; i++) {
161     res *= a;
162   }
163   return res;
164 #endif
165 }
166 
167 #define SCAL_DOW(a, x) AX_DOW(a, x)
AX_DOW(REAL a,REAL_D x)168 static inline REAL *AX_DOW(REAL a, REAL_D x)
169 {
170   int i;
171 
172   for (i = 0; i < DIM_OF_WORLD; i++) {
173     x[i] *= a;
174   }
175   return x;
176 }
177 
178 #define MSCAL_DOW(a, m) MAX_DOW(a, m)
MAX_DOW(REAL a,REAL_DD m)179 static inline REAL_D *MAX_DOW(REAL a, REAL_DD m)
180 {
181   int i;
182 
183   for (i = 0; i < DIM_OF_WORLD; i++) {
184     AX_DOW(a, m[i]);
185   }
186   return m;
187 }
188 
189 #define DMAX_DOW(a, m)   AX_DOW(a, m)
190 #define DMSCAL_DOW(a, m) DMAX_DOW(a, m)
191 
192 #define SCMAX_DOW(a, m)   (m) *= (a)
193 #define SCMSCAL_DOW(a, m) SCMAX_DOW(a, m)
194 
AXEY_DOW(REAL a,const REAL_D x,REAL_D y)195 static inline REAL *AXEY_DOW(REAL a, const REAL_D x, REAL_D y)
196 {
197   int i;
198 
199   for (i = 0; i < DIM_OF_WORLD; i++) {
200     y[i] = a * x[i];
201   }
202   return y;
203 }
204 
MAXEY_DOW(REAL a,const REAL_DD x,REAL_DD y)205 static inline REAL_D *MAXEY_DOW(REAL a, const REAL_DD x, REAL_DD y)
206 {
207   int i;
208 
209   for (i = 0; i < DIM_OF_WORLD; i++) {
210     AXEY_DOW(a, x[i], y[i]);
211   }
212   return y;
213 }
214 
215 #define DMAXEY_DOW(a, x, y)  AXEY_DOW(a, x, y)
216 #define SCMAXEY_DOW(a, x, y) (y) = (a)*(x)
217 
AXPY_DOW(REAL a,const REAL_D x,REAL_D y)218 static inline REAL *AXPY_DOW(REAL a, const REAL_D x, REAL_D y)
219 {
220   int i;
221 
222   for (i = 0; i < DIM_OF_WORLD; i++) {
223     y[i] += a * x[i];
224   }
225   return y;
226 }
227 
MAXPY_DOW(REAL a,const REAL_DD x,REAL_DD y)228 static inline REAL_D *MAXPY_DOW(REAL a, const REAL_DD x, REAL_DD y)
229 {
230   int i;
231 
232   for (i = 0; i < DIM_OF_WORLD; i++) {
233     AXPY_DOW(a, x[i], y[i]);
234   }
235   return y;
236 }
237 
238 /* same as above, but add the transposed matrix to y */
MAXTPY_DOW(REAL a,const REAL_DD x,REAL_DD y)239 static inline REAL_D *MAXTPY_DOW(REAL a, const REAL_DD x, REAL_DD y)
240 {
241   REAL tmp;
242   int i, j;
243 
244   for (i = 0; i < DIM_OF_WORLD; i++) {
245     y[i][i] += a*x[i][i];
246     for (j = i+1; j < DIM_OF_WORLD; j++) {
247       tmp = x[i][j];
248       y[i][j] += a*x[j][i];
249       y[j][i] += a*tmp;
250     }
251   }
252   return y;
253 }
254 
255 #define DMAXPY_DOW(a, x, y)  AXPY_DOW(a, x, y)
256 #define DMAXTPY_DOW(a, x, y) DMAXPY_DOW(a, x, y) /* transpose of diagonal matrix :) */
257 #define SCMAXPY_DOW(a, x, y)  (y) += (a)*(x)
258 #define SCMAXTPY_DOW(a, x, y) SCMAXPY_DOW(a, x, y)
259 
AXPBY_DOW(REAL a,const REAL_D x,REAL b,const REAL_D y,REAL_D z)260 static inline REAL *AXPBY_DOW(REAL a, const REAL_D x, REAL b, const REAL_D y,
261 			      REAL_D z)
262 {
263   int i;
264 
265   for (i = 0; i < DIM_OF_WORLD; i++) {
266     z[i] = b*y[i] + a * x[i];
267   }
268   return z;
269 }
270 
MAXPBY_DOW(REAL a,const REAL_DD x,REAL b,const REAL_DD y,REAL_DD z)271 static inline REAL_D *MAXPBY_DOW(REAL a, const REAL_DD x,
272 				 REAL b, const REAL_DD y,
273 				 REAL_DD z)
274 {
275   int i;
276 
277   for (i = 0; i < DIM_OF_WORLD; i++) {
278     AXPBY_DOW(a, x[i], b, y[i], z[i]);
279   }
280   return z;
281 }
282 
283 #define DMAXPBY_DOW(a, x, b, y, z)  AXPBY_DOW(a, x, b, y, z)
284 #define SCMAXPBY_DOW(a, x, b, y, z) (z) = (a)*(x) + (b)*(y)
285 
AXPBYPCZ_DOW(REAL a,const REAL_D x,REAL b,const REAL_D y,REAL c,const REAL_D z,REAL_D w)286 static inline REAL *AXPBYPCZ_DOW(REAL a, const REAL_D x,
287 				 REAL b, const REAL_D y,
288 				 REAL c, const REAL_D z,
289 				 REAL_D w)
290 {
291   int i;
292 
293   for (i = 0; i < DIM_OF_WORLD; i++) {
294     w[i] = c*z[i] + b*y[i] + a * x[i];
295   }
296   return w;
297 }
298 
MAXPBYPCZ_DOW(REAL a,const REAL_DD x,REAL b,const REAL_DD y,REAL c,const REAL_DD z,REAL_DD w)299 static inline REAL_D *MAXPBYPCZ_DOW(REAL a, const REAL_DD x,
300 				    REAL b, const REAL_DD y,
301 				    REAL c, const REAL_DD z,
302 				    REAL_DD w)
303 {
304   int i;
305 
306   for (i = 0; i < DIM_OF_WORLD; i++) {
307     AXPBYPCZ_DOW(a, x[i], b, y[i], c, z[i], w[i]);
308   }
309   return w;
310 }
311 
312 #define DMAXPBYPCZ_DOW(a, x, b, y, c, z, w)  AXPBYPCZ_DOW(a, x, b, y, c, z, w)
313 #define SCMAXPBYPCZ_DOW(a, x, b, y, c, z, w) ((w) = (a)*(x) + (b)*(y) + (c)*(z))
314 
AXPBYP_DOW(REAL a,const REAL_D x,REAL b,const REAL_D y,REAL_D z)315 static inline REAL *AXPBYP_DOW(REAL a, const REAL_D x, REAL b, const REAL_D y,
316 			       REAL_D z)
317 {
318   int i;
319 
320   for (i = 0; i < DIM_OF_WORLD; i++) {
321     z[i] += b*y[i] + a * x[i];
322   }
323   return z;
324 }
325 
MAXPBYP_DOW(REAL a,const REAL_DD x,REAL b,const REAL_DD y,REAL_DD z)326 static inline REAL_D *MAXPBYP_DOW(REAL a, const REAL_DD x,
327 				  REAL b, const REAL_DD y,
328 				  REAL_DD z)
329 {
330   int i;
331 
332   for (i = 0; i < DIM_OF_WORLD; i++) {
333     AXPBYP_DOW(a, x[i], b, y[i], z[i]);
334   }
335   return z;
336 }
337 
338 #define DMAXPBYP_DOW(a, x, b, y, z)  AXPBYP_DOW(a, x, b, y, z)
339 #define SCMAXPBYP_DOW(a, x, b, y, z) ((z) += (a)*(x) + (b)*(y))
340 
AXPBYPCZP_DOW(REAL a,const REAL_D x,REAL b,const REAL_D y,REAL c,const REAL_D z,REAL_D w)341 static inline REAL *AXPBYPCZP_DOW(REAL a, const REAL_D x,
342 				  REAL b, const REAL_D y,
343 				  REAL c, const REAL_D z,
344 				  REAL_D w)
345 {
346   int i;
347 
348   for (i = 0; i < DIM_OF_WORLD; i++) {
349     w[i] += c*z[i] + b*y[i] + a*x[i];
350   }
351   return w;
352 }
353 
MAXPBYPCZP_DOW(REAL a,const REAL_DD x,REAL b,const REAL_DD y,REAL c,const REAL_DD z,REAL_DD w)354 static inline REAL_D *MAXPBYPCZP_DOW(REAL a, const REAL_DD x,
355 				     REAL b, const REAL_DD y,
356 				     REAL c, const REAL_DD z,
357 				     REAL_DD w)
358 {
359   int i;
360 
361   for (i = 0; i < DIM_OF_WORLD; i++) {
362     AXPBYPCZP_DOW(a, x[i], b, y[i], c, z[i], w[i]);
363   }
364   return w;
365 }
366 
367 #define DMAXPBYPCZP_DOW(a, x, b, y, z)		\
368   AXPBYPCZP_DOW(a, x, b, y, z)
369 #define SCMAXPBYPCZP_DOW(a, x, b, y, c, z, w)	\
370   ((w) += (a)*(x) + (b)*(y) + (c)*(z))
371 
372 /***********************/
373 /* Matrix - Matrix addition */
374 #define MMAXPY_DOW(s, a, b)      MAXPY_DOW(s, a, b)
375 #define DMDMAXPY_DOW(s, a, b)    DMAXPY_DOW(s, a, b)
376 #define SCMSCMAXPY_DOW(s, a, b)  SCMAXPY_DOW(s, a, b)
377 
378 /* Transpose addition, only different for the MM case */
379 #define MMAXTPY_DOW(s, a, b)     MAXTPY_DOW(s, a, b)
380 #define MDMAXTPY_DOW(s, a, b)    MDMAXPY_DOW(s, a, b)
381 #define MSCMAXTPY_DOW(s, a, b)   MSCMAXPY_DOW(s, a, b)
382 
383 #define DMDMAXTPY_DOW(s, a, b)   DMAXPY_DOW(s, a, b)
384 #define DMSCMAXTPY_DOW(s, a, b)  DMSCMAXPY_DOW(s, a, b)
385 
386 #define SCMSCMAXTPY_DOW(s, a, b) SCMAXPY_DOW(s, a, b)
387 
MDMAXPY_DOW(REAL a,const REAL_D x,REAL_DD y)388 static inline REAL_D *MDMAXPY_DOW(REAL a, const REAL_D x, REAL_DD y)
389 {
390   int i;
391 
392   for (i = 0; i < DIM_OF_WORLD; i++) {
393     y[i][i] += a * x[i];
394   }
395 
396   return y;
397 }
398 
MSCMAXPY_DOW(REAL a,const REAL x,REAL_DD y)399 static inline REAL_D *MSCMAXPY_DOW(REAL a, const REAL x, REAL_DD y)
400 {
401   int i;
402 
403   for (i = 0; i < DIM_OF_WORLD; i++) {
404     y[i][i] += a * x;
405   }
406 
407   return y;
408 }
409 
DMSCMAXPY_DOW(REAL a,REAL x,REAL * y)410 static inline REAL *DMSCMAXPY_DOW(REAL a, REAL x, REAL *y)
411 {
412   int i;
413 
414   for (i = 0; i < DIM_OF_WORLD; i++) {
415     y[i] += a*x;
416   }
417 
418   return y;
419 }
420 
421 /* Matrix - Matrix initialization */
422 #define MMAXEY_DOW(s, a, b)     MAXEY_DOW(s, a, b)
423 #define DMDMAXEY_DOW(s, a, b)   DMAXEY_DOW(s, a, b)
424 #define SCMSCMAXEY_DOW(s, a, b) SCMAXEY_DOW(s, a, b)
425 
MDMAXEY_DOW(REAL a,const REAL_D x,REAL_DD y)426 static inline REAL_D *MDMAXEY_DOW(REAL a, const REAL_D x, REAL_DD y)
427 {
428   int i, j;
429 
430   for (i = 0; i < DIM_OF_WORLD; i++) {
431     y[i][i] = a * x[i];
432     for (j = i+1; j < DIM_OF_WORLD; j++) {
433       y[i][j] = y[j][i] = 0.0;
434     }
435   }
436 
437   return y;
438 }
439 
MSCMAXEY_DOW(REAL a,const REAL x,REAL_DD y)440 static inline REAL_D *MSCMAXEY_DOW(REAL a, const REAL x, REAL_DD y)
441 {
442   int i, j;
443 
444   for (i = 0; i < DIM_OF_WORLD; i++) {
445     y[i][i] = a * x;
446     for (j = i+1; j < DIM_OF_WORLD; j++) {
447       y[i][j] = y[j][i] = 0.0;
448     }
449   }
450 
451   return y;
452 }
453 
DMSCMAXEY_DOW(REAL a,REAL x,REAL * y)454 static inline REAL *DMSCMAXEY_DOW(REAL a, REAL x, REAL *y)
455 {
456   int i;
457 
458   for (i = 0; i < DIM_OF_WORLD; i++) {
459     y[i] = a*x;
460   }
461 
462   return y;
463 }
464 
465 /* Matrix - Matrix initialization */
466 #define MMMAXPBY_DOW(a, x, b, y, z)      MAXPBY_DOW(a, x, b, y, z)
467 #define DMDMDMAXPBY_DOW(a, x, b, y, z)    DMAXPBY_DOW(a, x, b, y, z)
468 #define SCMSCMSCMAXPBY_DOW(a, x, b, y, z)  SCMAXPBY_DOW(a, x, b, y, z)
469 
MMDMAXPBY_DOW(REAL a,const REAL_DD x,REAL b,const REAL_D y,REAL_DD z)470 static inline REAL_D *MMDMAXPBY_DOW(REAL a, const REAL_DD x,
471 				REAL b, const REAL_D y,
472 				REAL_DD z)
473 {
474   int i, j;
475 
476   for (i = 0; i < DIM_OF_WORLD; i++) {
477     z[i][i] = a * x[i][i] + b * y[i];
478     for (j = i+1; j < DIM_OF_WORLD; j++) {
479     	z[i][j] = a * x[i][j];
480     	z[j][i] = a * x[j][i];
481     }
482   }
483 
484   return z;
485 }
486 
MMSCMAXPBY_DOW(REAL a,const REAL_DD x,REAL b,const REAL y,REAL_DD z)487 static inline REAL_D *MMSCMAXPBY_DOW(REAL a, const REAL_DD x,
488 				REAL b, const REAL y,
489 				REAL_DD z)
490 {
491   int i, j;
492 
493   for (i = 0; i < DIM_OF_WORLD; i++) {
494     z[i][i] = a * x[i][i] + b * y;
495     for (j = i+1; j < DIM_OF_WORLD; j++) {
496     	z[i][j] = a * x[i][j];
497     	z[j][i] = a * x[j][i];
498     }
499   }
500 
501   return z;
502 }
503 
MDMDMAXPBY_DOW(REAL a,const REAL_D x,REAL b,const REAL_D y,REAL_DD z)504 static inline REAL_D *MDMDMAXPBY_DOW(REAL a, const REAL_D x,
505 				REAL b, const REAL_D y,
506 				REAL_DD z)
507 {
508   int i, j;
509 
510   for (i = 0; i < DIM_OF_WORLD; i++) {
511     z[i][i] = a * x[i] + b * y[i];
512     for (j = i+1; j < DIM_OF_WORLD; j++) {
513     	z[i][j] = z[j][i] = 0.0;
514     }
515   }
516 
517   return z;
518 }
519 
MDMSCMAXPBY_DOW(REAL a,REAL * x,REAL b,REAL y,REAL_DD z)520 static inline REAL_D *MDMSCMAXPBY_DOW(REAL a, REAL *x,
521 				REAL b, REAL y,
522 				REAL_DD z)
523 {
524   int i, j;
525 
526   for (i = 0; i < DIM_OF_WORLD; i++) {
527     z[i][i] = a * x[i] + b * y;
528     for (j = i+1; j < DIM_OF_WORLD; j++) {
529     	z[i][j] = z[j][i] = 0.0;
530     }
531   }
532 
533   return z;
534 }
535 
MSCMSCMAXPBY_DOW(REAL a,REAL x,REAL b,REAL y,REAL_DD z)536 static inline REAL_D *MSCMSCMAXPBY_DOW(REAL a, REAL x,
537 				REAL b, REAL y,
538 				REAL_DD z)
539 {
540   int i, j;
541 
542   for (i = 0; i < DIM_OF_WORLD; i++) {
543     z[i][i] = a * x + b * y;
544     for (j = i+1; j < DIM_OF_WORLD; j++) {
545     	z[i][j] = z[j][i] = 0.0;
546     }
547   }
548 
549   return z;
550 }
551 
DMDMSCMAXPBY_DOW(REAL a,const REAL_D x,REAL b,const REAL y,REAL_D z)552 static inline REAL *DMDMSCMAXPBY_DOW(REAL a, const REAL_D x,
553 				REAL b, const REAL y,
554 				REAL_D z)
555 {
556   int i;
557 
558   for (i = 0; i < DIM_OF_WORLD; i++) {
559     z[i] = a * x[i] + b * y;
560   }
561   return z;
562 }
563 
DMSCMSCMAXPBY_DOW(REAL a,const REAL x,REAL b,const REAL y,REAL_D z)564 static inline REAL *DMSCMSCMAXPBY_DOW(REAL a, const REAL x,
565 				REAL b, const REAL y,
566 				REAL_D z)
567 {
568   int i;
569 
570   for (i = 0; i < DIM_OF_WORLD; i++) {
571     z[i] = a * x + b * y;
572   }
573   return z;
574 }
575 
576 /* Matrix - Matrix copy */
577 #define MMCOPY_DOW(a, b)     MCOPY_DOW(a, b)
578 #define MDMCOPY_DOW(a, b)    MDMAXEY_DOW(1.0, a, b)
579 #define MSCMCOPY_DOW(a, b)   MSCMAXEY_DOW(1.0, a, b)
580 
581 #define DMDMCOPY_DOW(a, b)   DMCOPY_DOW(a, b)
582 #define DMSCMCOPY_DOW(a, b)  DMSCMAXEY_DOW(1.0, a, b)
583 
584 #define SCMSCMCOPY_DOW(a, b) (b) = (a)
585 
586 /***********************/
587 
COPY_DOW(const REAL_D x,REAL_D y)588 static inline REAL *COPY_DOW(const REAL_D x, REAL_D y)
589 {
590   memcpy(y, x, sizeof(REAL_D));
591   return y;
592 }
593 
MCOPY_DOW(const REAL_DD x,REAL_DD y)594 static inline REAL_D *MCOPY_DOW(const REAL_DD x, REAL_DD y)
595 {
596   memcpy(y, x, sizeof(REAL_DD));
597   return y;
598 }
599 
600 #define DMCOPY_DOW(src, dst) COPY_DOW(src, dst)
601 
DST2_DOW(const REAL_D x,const REAL_D y)602 static inline REAL DST2_DOW(const REAL_D x, const REAL_D y)
603 {
604 # if DIM_OF_WORLD == 1
605   return SQR(ABS(x[0] - y[0]));
606 # else
607   int i;
608   REAL accu;
609 
610   accu = SQR(x[0] - y[0]);
611   for (i = 1; i < DIM_OF_WORLD; i++) {
612     accu += SQR(x[i] - y[i]);
613   }
614   return accu;
615 # endif
616 }
617 
MDST2_DOW(const REAL_DD a,const REAL_DD b)618 static inline REAL MDST2_DOW(const REAL_DD a, const REAL_DD b)
619 {
620   int  i;
621   REAL res;
622 
623   res = DST2_DOW(a[0], b[0]);
624   for (i = 1; i < DIM_OF_WORLD; i++) {
625     res += DST2_DOW(a[i], b[i]);
626   }
627   return res;
628 }
629 
630 #define DMDST2_DOW(x, y) DST2_DOW(x, y)
631 
NRM2_DOW(const REAL_D x)632 static inline REAL NRM2_DOW(const REAL_D x)
633 {
634   int i;
635   REAL accu;
636 
637   accu = SQR(x[0]);
638   for (i = 1; i < DIM_OF_WORLD; i++) {
639     accu += SQR(x[i]);
640   }
641   return accu;
642 }
643 
MNRM2_DOW(const REAL_DD m)644 static inline REAL MNRM2_DOW(const REAL_DD m)
645 {
646   int  i;
647   REAL res;
648 
649   res = NRM2_DOW(m[0]);
650   for (i = 1; i < DIM_OF_WORLD; i++) {
651     res += NRM2_DOW(m[i]);
652   }
653   return res;
654 }
655 
656 #define DMNRM2_DOW(m) NRM2_DOW(x)
657 
NORM1_DOW(const REAL_D x)658 static inline REAL NORM1_DOW(const REAL_D x)
659 {
660   int i;
661   REAL sum;
662 
663   sum = fabs(x[0]);
664   for (i = 1; i < DIM_OF_WORLD; i++) {
665     sum += fabs(x[i]);
666   }
667 
668   return sum;
669 }
670 
DIST1_DOW(const REAL_D x,const REAL_D y)671 static inline REAL DIST1_DOW(const REAL_D x, const REAL_D y)
672 {
673   int i;
674   REAL sum;
675 
676   sum = fabs(x[0]-y[0]);
677   for (i = 1; i < DIM_OF_WORLD; i++) {
678     sum += fabs(x[i]-y[i]);
679   }
680 
681   return sum;
682 }
683 
NORM8_DOW(const REAL_D x)684 static inline REAL NORM8_DOW(const REAL_D x)
685 {
686   int i;
687   REAL max;
688 
689   max = fabs(x[0]);
690   for (i = 1; i < DIM_OF_WORLD; i++) {
691     max = MAX(max, fabs(x[i]));
692   }
693 
694   return max;
695 }
696 
DIST8_DOW(const REAL_D x,const REAL_D y)697 static inline REAL DIST8_DOW(const REAL_D x, const REAL_D y)
698 {
699   int i;
700   REAL max;
701 
702   max = fabs(x[0]-y[0]);
703   for (i = 1; i < DIM_OF_WORLD; i++) {
704     max = MAX(max, fabs(x[i]-y[i]));
705   }
706 
707   return max;
708 }
709 
SUM_DOW(const REAL_D x)710 static inline REAL SUM_DOW(const REAL_D x)
711 {
712   int i;
713   REAL sum;
714 
715   sum = x[0];
716   for (i = 1; i < DIM_OF_WORLD; i++) {
717     sum += x[i];
718   }
719 
720   return sum;
721 }
722 
PNRMP_DOW(const REAL_D x,REAL p)723 static inline REAL PNRMP_DOW(const REAL_D x, REAL p)
724 {
725   int i;
726   REAL sum;
727 
728   sum = pow(fabs(x[0]), p);
729   for (i = 1; i < DIM_OF_WORLD; i++) {
730     sum += pow(fabs(x[i]), p);
731   }
732 
733   return sum;
734 }
735 
NRMP_DOW(const REAL_D x,REAL p)736 static inline REAL NRMP_DOW(const REAL_D x, REAL p)
737 {
738   return pow(PNRMP_DOW(x, p), 1.0/p);
739 }
740 
MNORM1_DOW(const REAL_DD x)741 static inline REAL MNORM1_DOW(const REAL_DD x)
742 {
743   int i;
744   REAL sum;
745 
746   sum = NORM1_DOW(x[0]);
747   for (i = 1; i < DIM_OF_WORLD; i++) {
748     sum += NORM1_DOW(x[0]);
749   }
750 
751   return sum;
752 }
753 
MDIST1_DOW(const REAL_DD x,const REAL_DD y)754 static inline REAL MDIST1_DOW(const REAL_DD x, const REAL_DD y)
755 {
756   int i;
757   REAL sum;
758 
759   sum = DIST1_DOW(x[0], y[0]);
760   for (i = 1; i < DIM_OF_WORLD; i++) {
761     sum += DIST1_DOW(x[i], y[i]);
762   }
763 
764   return sum;
765 }
766 
MNORM8_DOW(const REAL_DD x)767 static inline REAL MNORM8_DOW(const REAL_DD x)
768 {
769   int i;
770   REAL max;
771 
772   max = NORM8_DOW(x[0]);
773   for (i = 1; i < DIM_OF_WORLD; i++) {
774     max = MAX(max, NORM8_DOW(x[i]));
775   }
776 
777   return max;
778 }
779 
MDIST8_DOW(const REAL_DD x,const REAL_DD y)780 static inline REAL MDIST8_DOW(const REAL_DD x, const REAL_DD y)
781 {
782   int i;
783   REAL max;
784 
785   max = DIST8_DOW(x[0], y[0]);
786   for (i = 1; i < DIM_OF_WORLD; i++) {
787     max = MAX(max, DIST8_DOW(x[i], y[i]));
788   }
789 
790   return max;
791 }
792 
MSUM_DOW(const REAL_DD x)793 static inline REAL MSUM_DOW(const REAL_DD x)
794 {
795   int i;
796   REAL sum;
797 
798   sum = SUM_DOW(x[0]);
799   for (i = 1; i < DIM_OF_WORLD; i++) {
800     sum += SUM_DOW(x[0]);
801   }
802 
803   return sum;
804 }
805 
MPNRMP_DOW(const REAL_DD x,REAL p)806 static inline REAL MPNRMP_DOW(const REAL_DD x, REAL p)
807 {
808   int i;
809   REAL sum;
810 
811   sum = PNRMP_DOW(x[0], p);
812   for (i = 1; i < DIM_OF_WORLD; i++) {
813     sum += PNRMP_DOW(x[i], p);
814   }
815 
816   return sum;
817 }
818 
MNRMP_DOW(const REAL_DD x,REAL p)819 static inline REAL MNRMP_DOW(const REAL_DD x, REAL p)
820 {
821   return pow(MPNRMP_DOW(x, p), 1.0/p);
822 }
823 
SCP_DOW(const REAL_D x,const REAL_D y)824 static inline REAL SCP_DOW(const REAL_D x, const REAL_D y)
825 {
826   REAL res;
827   int  i;
828 
829   res = x[0] * y[0];
830   for (i = 1; i < DIM_OF_WORLD; i++) {
831     res += x[i]*y[i];
832 
833   }
834   return res;
835 }
836 
GRAMSCP_DOW(const REAL_DD M,const REAL_D x,const REAL_D y)837 static inline REAL GRAMSCP_DOW(const REAL_DD M, const REAL_D x, const REAL_D y)
838 {
839   REAL res = 0.0;
840   int i, j;
841 
842   for (i = 0; i < DIM_OF_WORLD; i++) {
843     for (j = 0; j < DIM_OF_WORLD; j++) {
844       res += x[i] * M[i][j] * y[j];
845     }
846   }
847   return res;
848 }
849 #define MGRAMSCP_DOW(M, x, y) GRAMSCP_DOW(M, x, y)
850 
DMGRAMSCP_DOW(const REAL_D M,const REAL_D x,const REAL_D y)851 static inline REAL DMGRAMSCP_DOW(const REAL_D M, const REAL_D x, const REAL_D y)
852 {
853   REAL res = 0.0;
854   int i;
855 
856   for (i = 0; i < DIM_OF_WORLD; i++) {
857     res += x[i] * M[i] * y[i];
858   }
859   return res;
860 }
861 
SCMGRAMSCP_DOW(REAL s,const REAL_D x,const REAL_D y)862 static inline REAL SCMGRAMSCP_DOW(REAL s, const REAL_D x, const REAL_D y)
863 {
864   return s*SCP_DOW(x, y);
865 }
866 
MTV_DOW(const REAL_DD m,const REAL_D v,REAL_D b)867 static inline REAL *MTV_DOW(const REAL_DD m, const REAL_D v, REAL_D b)
868 {
869   int i, j;
870 
871   for (i = 0; i < DIM_OF_WORLD; i++) {
872     for (j = 0; j < DIM_OF_WORLD; j++) {
873       b[i] += m[j][i] * v[j];
874     }
875   }
876   return b;
877 }
878 
879 #define SCMV_DOW(m, v, b)  AXPY_DOW(m, v, b)
880 #define DMTV_DOW(m, v, b)  DMV_DOW(m, v, b)
881 #define SCMTV_DOW(m, v, b) SCMV_DOW(m, v, b)
882 
MDIV_DOW(const REAL_DD m,const REAL_D v,REAL_D b)883 static inline REAL *MDIV_DOW(const REAL_DD m, const REAL_D v, REAL_D b)
884 {
885   int i;
886 
887   for (i = 0; i < DIM_OF_WORLD; i++) {
888     b[i] = v[i] / m[i][i];
889   }
890   return b;
891 }
892 
DMDIV_DOW(const REAL_D m,const REAL_D y,REAL_D r)893 static inline REAL *DMDIV_DOW(const REAL_D m, const REAL_D y, REAL_D r)
894 {
895   int i;
896   for (i = 0; i < DIM_OF_WORLD; i++) {
897     r[i] = y[i] / m[i];
898   }
899   return r;
900 }
901 
902 #define SCMDIV_DOW(m, y, r) AXEY_DOW(1.0/(m), y, r)
903 
DMV_DOW(const REAL_D x,const REAL_D y,REAL_D r)904 static inline REAL *DMV_DOW(const REAL_D x, const REAL_D y, REAL_D r)
905 {
906   int i;
907   for (i = 0; i < DIM_OF_WORLD; i++) {
908     r[i] += x[i]*y[i];
909   }
910   return r;
911 }
912 
MV_DOW(const REAL_DD m,const REAL_D v,REAL_D b)913 static inline REAL *MV_DOW(const REAL_DD m, const REAL_D v, REAL_D b)
914 {
915   int i;
916 
917   for (i = 0; i < DIM_OF_WORLD; i++) {
918     b[i] += SCP_DOW(m[i], v);
919   }
920   return b;
921 }
922 
MVEQ_DOW(const REAL_DD m,const REAL_D v,REAL_D b)923 static inline REAL *MVEQ_DOW(const REAL_DD m, const REAL_D v, REAL_D b)
924 {
925   int i;
926 
927   for (i = 0; i < DIM_OF_WORLD; i++) {
928     b[i] = SCP_DOW(m[i], v);
929   }
930   return b;
931 }
932 
MTVEQ_DOW(const REAL_DD m,const REAL_D v,REAL_D b)933 static inline REAL *MTVEQ_DOW(const REAL_DD m, const REAL_D v, REAL_D b)
934 {
935   int i, j;
936 
937   for (i = 0; i < DIM_OF_WORLD; i++) {
938     b[i] = 0.0;
939     for (j = 0; j < DIM_OF_WORLD; j++) {
940       b[i] += m[j][i] * v[j];
941     }
942   }
943   return b;
944 }
945 
DMVEQ_DOW(const REAL_D x,const REAL_D y,REAL_D r)946 static inline REAL *DMVEQ_DOW(const REAL_D x, const REAL_D y, REAL_D r)
947 {
948   int i;
949   for (i = 0; i < DIM_OF_WORLD; i++) {
950     r[i] = x[i]*y[i];
951   }
952   return r;
953 }
954 
955 #define SCMVEQ_DOW(m, v, b)  AXEY_DOW(m, v, b)
956 
957 static inline REAL *
MMBIMV_DOW(REAL a,const REAL_DD A,REAL b,const REAL_DD B,const REAL_D v,REAL c,REAL_D w)958 MMBIMV_DOW(REAL a, const REAL_DD A, REAL b, const REAL_DD B, const REAL_D v,
959 	   REAL c, REAL_D w)
960 {
961   int i, j;
962   REAL sum;
963 
964   for (i = 0; i < DIM_OF_WORLD; i++) {
965     sum = 0.0;
966     for (j = 0; j < DIM_OF_WORLD; j++) {
967       sum += (a * A[i][j] + b * B[i][j]) * v[j];
968     }
969     w[i] = c*w[i] + sum;
970   }
971   return w;
972 }
973 
974 static inline REAL *
MDMBIMV_DOW(REAL a,const REAL_DD A,REAL b,const REAL_D B,const REAL_D v,REAL c,REAL_D w)975 MDMBIMV_DOW(REAL a, const REAL_DD A, REAL b, const REAL_D B, const REAL_D v,
976 	    REAL c, REAL_D w)
977 {
978   int i, j;
979   REAL sum;
980 
981   for (i = 0; i < DIM_OF_WORLD; i++) {
982     sum = 0.0;
983     for (j = 0; j < DIM_OF_WORLD; j++) {
984       sum += a * A[i][j] * v[j];
985     }
986     w[i] = c*w[i] + sum + b * B[i] * v[i];
987   }
988   return w;
989 }
990 
991 static inline REAL *
DMMBIMV_DOW(REAL a,const REAL_D A,REAL b,const REAL_DD B,const REAL_D v,REAL c,REAL_D w)992 DMMBIMV_DOW(REAL a, const REAL_D A, REAL b, const REAL_DD B, const REAL_D v,
993 	    REAL c, REAL_D w)
994 {
995   return MDMBIMV_DOW(b, B, a, A, v, c, w);
996 }
997 
998 static inline REAL *
MSCMBIMV_DOW(REAL a,const REAL_DD A,REAL b,REAL B,const REAL_D v,REAL c,REAL_D w)999 MSCMBIMV_DOW(REAL a, const REAL_DD A, REAL b, REAL B, const REAL_D v,
1000 	     REAL c, REAL_D w)
1001 {
1002   int i, j;
1003   REAL sum;
1004 
1005   for (i = 0; i < DIM_OF_WORLD; i++) {
1006     sum = 0.0;
1007     for (j = 0; j < DIM_OF_WORLD; j++) {
1008       sum += a * A[i][j] * v[j];
1009     }
1010     w[i] = c*w[i] + sum + b * B * v[i];
1011   }
1012   return w;
1013 }
1014 
1015 
1016 static inline REAL *
SCMMBIMV_DOW(REAL a,REAL A,REAL b,const REAL_DD B,const REAL_D v,REAL c,REAL_D w)1017 SCMMBIMV_DOW(REAL a, REAL A, REAL b, const REAL_DD B, const REAL_D v,
1018 	     REAL c, REAL_D w)
1019 {
1020   return MSCMBIMV_DOW(b, B, a, A, v, c, w);
1021 }
1022 
1023 static inline REAL *
DMDMBIMV_DOW(REAL a,const REAL_D A,REAL b,const REAL_D B,const REAL_D v,REAL c,REAL_D w)1024 DMDMBIMV_DOW(REAL a, const REAL_D A, REAL b, const REAL_D B, const REAL_D v,
1025 	     REAL c, REAL_D w)
1026 {
1027   int i;
1028 
1029   for (i = 0; i < DIM_OF_WORLD; i++) {
1030     w[i] = c*w[i] + (a * A[i] + b * B[i]) * v[i];
1031   }
1032   return w;
1033 }
1034 
1035 static inline REAL *
DMSCMBIMV_DOW(REAL a,const REAL_D A,REAL b,REAL B,const REAL_D v,REAL c,REAL_D w)1036 DMSCMBIMV_DOW(REAL a, const REAL_D A, REAL b, REAL B, const REAL_D v,
1037 	      REAL c, REAL_D w)
1038 {
1039   int i;
1040 
1041   for (i = 0; i < DIM_OF_WORLD; i++) {
1042     w[i] = c*w[i] + (a * A[i] + b * B) * v[i];
1043   }
1044   return w;
1045 }
1046 
1047 static inline REAL *
SCMDMBIMV_DOW(REAL a,REAL A,REAL b,const REAL_D B,const REAL_D v,REAL c,REAL_D w)1048 SCMDMBIMV_DOW(REAL a, REAL A, REAL b, const REAL_D B, const REAL_D v,
1049 	      REAL c, REAL_D w)
1050 {
1051   return DMSCMBIMV_DOW(b, B, a, A, v, c, w);
1052 }
1053 
1054 static inline REAL *
SCMSCMBIMV_DOW(REAL a,REAL A,REAL b,REAL B,const REAL_D v,REAL c,REAL_D w)1055 SCMSCMBIMV_DOW(REAL a, REAL A, REAL b, REAL B, const REAL_D v,
1056 	       REAL c, REAL_D w)
1057 {
1058   return AXPBY_DOW(a*A + b*B, v, c, w, w);
1059 }
1060 
MGEMV_DOW(REAL a,const REAL_DD m,const REAL_D v,REAL beta,REAL_D b)1061 static inline REAL *MGEMV_DOW(REAL a, const REAL_DD m,
1062 			      const REAL_D v, REAL beta,
1063 			      REAL_D b)
1064 {
1065   int i;
1066 
1067   for (i = 0; i < DIM_OF_WORLD; i++) {
1068     b[i] = beta*b[i] + a * SCP_DOW(m[i], v);
1069   }
1070   return b;
1071 }
1072 
1073 /* Same as above, but without diagonal. */
MGEMV_ND_DOW(REAL a,const REAL_DD m,const REAL_D v,REAL beta,REAL_D b)1074 static inline REAL *MGEMV_ND_DOW(REAL a, const REAL_DD m,
1075 			      const REAL_D v, REAL beta,
1076 			      REAL_D b)
1077 {
1078   int i, j;
1079 
1080   for (i = 0; i < DIM_OF_WORLD; i++) {
1081     REAL tmp = 0.0;
1082     for (j = 0; j < DIM_OF_WORLD; j++) {
1083       if (i == j) {
1084         continue;
1085       }
1086       tmp += m[i][j] * v[j];
1087     }
1088     b[i] = beta*b[i] + a * tmp;
1089   }
1090   return b;
1091 }
1092 
1093 #define GEMV_DOW(a, m, v, beta, b) MGEMV_DOW(a, m, v, beta, b)
1094 #define GEMV_ND_DOW(a, m, v, beta, b) MGEMV_ND_DOW(a, m, v, beta, b)
1095 
DMGEMV_DOW(REAL a,const REAL_D x,const REAL_D y,REAL beta,REAL_D r)1096 static inline REAL *DMGEMV_DOW(REAL a, const REAL_D x, const REAL_D y,
1097 			       REAL beta, REAL_D r)
1098 {
1099   int i;
1100   for (i = 0; i < DIM_OF_WORLD; i++) {
1101     r[i] = beta*r[i] + a*x[i]*y[i];
1102   }
1103   return r;
1104 }
1105 
DMGEMV_ND_DOW(REAL a,const REAL_D x,const REAL_D y,REAL beta,REAL_D r)1106 static inline REAL *DMGEMV_ND_DOW(REAL a, const REAL_D x, const REAL_D y,
1107                                   REAL beta, REAL_D r)
1108 {
1109   int i;
1110   for (i = 0; i < DIM_OF_WORLD; i++) {
1111     r[i] *= beta;
1112   }
1113   return r;
1114 }
1115 
MGEMTV_DOW(REAL a,const REAL_DD m,const REAL_D v,REAL beta,REAL_D b)1116 static inline REAL *MGEMTV_DOW(REAL a, const REAL_DD m,
1117 			       const REAL_D v, REAL beta,
1118 			       REAL_D b)
1119 {
1120   int i, j;
1121   REAL tmp;
1122 
1123   for (i = 0; i < DIM_OF_WORLD; i++) {
1124     b[i] *= beta;
1125     tmp = m[0][i] * v[0];
1126     for (j = 1; j < DIM_OF_WORLD; j++) {
1127       tmp += m[j][i] * v[j];
1128     }
1129     b[i] += a*tmp;
1130   }
1131   return b;
1132 }
1133 
1134 #define GEMTV_DOW(a, m, v, beta, b) MGEMTV_DOW(a, m, v, beta, b)
1135 
SCMGEMV_DOW(REAL a,REAL m,const REAL_D v,REAL beta,REAL_D b)1136 static inline REAL *SCMGEMV_DOW(REAL a, REAL m, const REAL_D v, REAL beta,
1137 				REAL_D b)
1138 {
1139   int i;
1140 
1141   m *= a;
1142   for (i = 0; i < DIM_OF_WORLD; i++) {
1143     b[i] *= beta;
1144     b[i] += m*v[i];
1145   }
1146   return b;
1147 }
1148 
SCMGEMV_ND_DOW(REAL a,REAL m,const REAL_D v,REAL beta,REAL_D b)1149 static inline REAL *SCMGEMV_ND_DOW(REAL a, REAL m, const REAL_D v, REAL beta,
1150                                    REAL_D b)
1151 {
1152   int i;
1153 
1154   for (i = 0; i < DIM_OF_WORLD; i++) {
1155     b[i] *= beta;
1156   }
1157   return b;
1158 }
1159 
1160 #define DMGEMTV_DOW(a, m, v, beta, b)  DMGEMV_DOW(a, m, v, beta, b)
1161 #define SCMGEMTV_DOW(a, m, v, beta, b) SCMGEMV_DOW(a, m, v, beta, b)
1162 
MSCP_DOW(const REAL_DD x,const REAL_DD y)1163 static inline REAL MSCP_DOW(const REAL_DD x, const REAL_DD y)
1164 {
1165   REAL res;
1166   int  i;
1167 
1168   res = SCP_DOW(x[0], y[0]);
1169   for (i = 1; i < DIM_OF_WORLD; i++) {
1170     res += SCP_DOW(x[i], y[i]);
1171   }
1172   return res;
1173 }
1174 
1175 #define DMSCP_DOW(x, y) SCP_DOW(x, y)
1176 
SET_DOW(REAL val,REAL_D x)1177 static inline REAL *SET_DOW(REAL val, REAL_D x)
1178 {
1179   int i;
1180   for (i = 0; i < DIM_OF_WORLD; i++) {
1181     x[i] = val;
1182   }
1183   return x;
1184 }
1185 
MSET_DOW(REAL val,REAL_DD m)1186 static inline REAL_D *MSET_DOW(REAL val, REAL_DD m)
1187 {
1188   int i, j;
1189 
1190   for (i = 0; i < DIM_OF_WORLD; i++) {
1191     m[i][i] = val;
1192     for (j = i+1; j < DIM_OF_WORLD; j++) {
1193       m[j][i] = m[i][j] = 0.0;
1194     }
1195   }
1196   return m;
1197 }
1198 
1199 #define DMSET_DOW(val, m) SET_DOW(val, m)
1200 #define SCMSET_DOW(val, m) (m) = (val)
1201 
CMP_DOW(REAL val,const REAL_D a)1202 static inline bool CMP_DOW(REAL val, const REAL_D a)
1203 {
1204   int i;
1205 
1206   for (i = 0; i < DIM_OF_WORLD; i++) {
1207     if (a[i] != val) {
1208       return false;
1209     }
1210   }
1211   return true;
1212 }
1213 
MCMP_DOW(REAL val,const REAL_DD a)1214 static inline bool MCMP_DOW(REAL val, const REAL_DD a)
1215 {
1216   int i, j;
1217 
1218   for (i = 0; i < DIM_OF_WORLD; i++) {
1219     if (a[i][i] != val) {
1220       return false;
1221     }
1222     for (j = i+1; j < DIM_OF_WORLD; j++) {
1223       if (a[i][j] != 0.0 || a[j][i] != 0.0) {
1224 	return false;
1225       }
1226     }
1227   }
1228   return true;
1229 }
1230 
1231 #define DMCMP_DOW(val, m) CMP_DOW(val, m)
1232 #define SCMCMP_DOW(val, m) ((m) == (val))
1233 
1234 #if DIM_OF_WORLD == 2
WEDGE_DOW(const REAL_D a,const REAL_D b)1235 static inline REAL WEDGE_DOW(const REAL_D a, const REAL_D b)
1236 {
1237   return a[0]*b[1] - a[1]*b[0];
1238 }
1239 #endif
1240 
1241 #if DIM_OF_WORLD == 3
WEDGE_DOW(const REAL_D a,const REAL_D b,REAL_D r)1242 static inline REAL *WEDGE_DOW(const REAL_D a, const REAL_D b, REAL_D r)
1243 {
1244   r[0] = a[1]*b[2] - a[2]*b[1];
1245   r[1] = a[2]*b[0] - a[0]*b[2];
1246   r[2] = a[0]*b[1] - a[1]*b[0];
1247   return r;
1248 }
1249 #endif
1250 
1251 #define MAT_SWITCH_TYPE(type, body_f, body_d, body_sc)		\
1252   switch (type) {						\
1253   case MATENT_REAL_DD: body_f; break;				\
1254   case MATENT_REAL_D: body_d; break;				\
1255   case MATENT_REAL: body_sc; break;				\
1256   default: ERROR_EXIT("Unknown MATENT_TYPE (%d)\n", type);	\
1257 }
1258 
1259 /* MAT_BODY(PFX, CONSTCAST, CAST, SUF, TYPE) is supposed to be a
1260  * "multiplex" macro where BLAS routines are accessed via
1261 " * PFX##AXPY_DOW(..., CONSTCAST var##SUF, ...)
1262  *
1263  * PFX is one of M, DM, SCM
1264  * CAST and CONSTCAST specify type-casts s.t. the ...._DOW() functions
1265  *   compile without error.
1266  * SUF is the suffix attached to some block-matrix types, one of
1267  *   real, real_d, real_dd
1268  * TYPE is the actual type corresponding to SUF, one of
1269  *   REAL, RELA_D, REAL_DD
1270  */
1271 #define MAT_EMIT_BODY_SWITCH(type)				\
1272   MAT_SWITCH_TYPE(						\
1273     type,							\
1274     MAT_BODY(M,  (const REAL_D *), (REAL_D *), real_dd, REAL_DD),	\
1275     MAT_BODY(DM,                 ,           , real_d, REAL_D),	\
1276     MAT_BODY(SCM,                ,           , real, REAL))
1277 
1278 
1279 /* BI_MAT_BODY(PFX1, CONSTCAST1, CAST1, SUF1, TYPE1,
1280  *             PFX2, CONSTCAST2, CAST2, SUF2, TYPE2)
1281  *
1282  * is supposed to be a "multiplex" macro where BLAS routines are
1283  * accessed via " * PFX##AXPY_DOW(..., CONSTCAST var##SUF, ...) etc.
1284  */
1285 #define MAT_EMIT_BI_BODY_SWITCH(type1, type2)				\
1286   MAT_SWITCH_TYPE(							\
1287     type1,								\
1288     MAT_SWITCH_TYPE(							\
1289       type2,								\
1290       MAT_BI_BODY(M,  (const REAL_D *), (REAL_D *), real_dd, REAL_DD,	\
1291 		  M,  (const REAL_D *), (REAL_D *), real_dd, REAL_DD),	\
1292       MAT_BI_BODY(M,  (const REAL_D *), (REAL_D *), real_dd, REAL_DD,	\
1293 		  DM,                 ,           , real_d, REAL_D),	\
1294       MAT_BI_BODY(M,  (const REAL_D *), (REAL_D *), real_dd, REAL_DD,	\
1295 		  SCM,                ,           , real, REAL)),	\
1296     MAT_SWITCH_TYPE(							\
1297       type2,								\
1298       MAT_BI_BODY(DM,                 ,           , real_d, REAL_D,	\
1299 		  M,  (const REAL_D *), (REAL_D *), real_dd, REAL_DD),	\
1300       MAT_BI_BODY(DM,                 ,           , real_d, REAL_D,	\
1301 		  DM,                 ,           , real_d, REAL_D),	\
1302       MAT_BI_BODY(DM,                 ,           , real_d, REAL_D,	\
1303 		  SCM,                ,           , real, REAL)),	\
1304     MAT_SWITCH_TYPE(							\
1305       type2,								\
1306       MAT_BI_BODY(SCM,                ,           , real, REAL,		\
1307 		  M,  (const REAL_D *), (REAL_D *), real_dd, REAL_DD),	\
1308       MAT_BI_BODY(SCM,                ,           , real, REAL,		\
1309 		  DM,                 ,           , real_d, REAL_D),	\
1310       MAT_BI_BODY(SCM,                ,           , real, REAL,		\
1311 		  SCM,                ,           , real, REAL)))
1312 
1313 /* TRI_MAT_BODY(PFX1, CONSTCAST1, CAST1, SUF1, TYPE1,
1314  *              PFX2, CONSTCAST2, CAST2, SUF2, TYPE2)
1315  *
1316  * is supposed to be a "multiplex" macro where BLAS routines are
1317  * accessed via " * PFX##AXPY_DOW(..., CONSTCAST var##SUF, ...) etc.
1318  */
1319 #define MAT_EMIT_TRI_BODY_SWITCH(type1, type2)				\
1320   MAT_SWITCH_TYPE(							\
1321     type1,								\
1322     MAT_SWITCH_TYPE(							\
1323       type2,								\
1324       MAT_TRI_BODY(M,  (const REAL_D *), (REAL_D *), real_dd, REAL_DD,	\
1325 		   M,  (const REAL_D *), (REAL_D *), real_dd, REAL_DD),	\
1326       MAT_TRI_BODY(M,  (const REAL_D *), (REAL_D *), real_dd, REAL_DD,	\
1327 		   DM,                 ,           , real_d, REAL_D),	\
1328       MAT_TRI_BODY(M,  (const REAL_D *), (REAL_D *), real_dd, REAL_DD,	\
1329 		   SCM,                ,           , real, REAL)),	\
1330     if (type2 == MATENT_REAL_D) {					\
1331       MAT_TRI_BODY(DM,                 ,           , real_d, REAL_D,	\
1332 		   DM,                 ,           , real_d, REAL_D);	\
1333     } else if (type2 == MATENT_REAL) {					\
1334       MAT_TRI_BODY(DM,                 ,           , real_d, REAL_D,	\
1335 		   SCM,                ,           , real, REAL);	\
1336     },									\
1337     if (type2 == MATENT_REAL) {						\
1338       MAT_TRI_BODY(SCM,                ,           , real, REAL,	\
1339 		   SCM,                ,           , real, REAL);	\
1340     })
1341 
1342 
1343 /* defines where only DOW == 1 plays a special role */
1344 # if DIM_OF_WORLD == 1
1345 #  define DIST_DOW(x,y)    ABS((x)[0]-(y)[0])
1346 #  define NORM_DOW(x)      ABS((x)[0])
1347 #  define MNRM_DOW(m)      ABS((m)[0][0])
1348 #  define DMNRM_DOW(m)     NRM_DOW(m)
1349 #  define MDIST_DOW(a,b)   ABS((a)[0][0] - (b)[0][0])
1350 #  define DMDIST_DOW(a,b)  DIST_DOW(a, b)
1351 #  define SCMDIST_DOW(a,b) ABS((a)-(b))
1352 # else
1353 #  define NORM_DOW(x)      sqrt(NRM2_DOW(x))
1354 #  define DIST_DOW(x,y)    sqrt(DST2_DOW(x, y))
1355 #  define MNORM_DOW(m)     sqrt(MNRM2_DOW(m))
1356 #  define DMNORM_DOW(m)    sqrt(DMNRM2_DOW(m))
1357 #  define MDIST_DOW(a,b)   sqrt(MDST2_DOW(a, b))
1358 #  define DMDIST_DOW(a,b)  sqrt(DMDST2_DOW(a, b))
1359 #  define SCMDIST_DOW(a,b) sqrt(SCMDST2_DOW(a, b))
1360 # endif
1361 
1362 /* defines different for all DOWs */
1363 # if   DIM_OF_WORLD == 1
1364 #  define EXPAND_DOW(x)       (x)[0]
1365 #  define FORMAT_DOW          "%10.5le"
1366 #  define SCAN_FORMAT_DOW     "%f"
1367 #  define SCAN_EXPAND_DOW(v)  &(v)[0]
1368 #  define MEXPAND_DOW(m)      (m)[0][0]
1369 #  define SCAN_MFORMAT_DOW    "%f %f"
1370 #  define SCAN_MEXPAND_DOW(m) &(m)[0][0]
1371 #  define MFORMAT_DOW         FORMAT_DOW
1372 #  define DMEXPAND_DOW(m)     EXPAND_DOW(m)
1373 #  define DMFORMAT_DOW        FORMAT_DOW
1374 #  define SCMEXPAND_DOW(m)    (m)
1375 #  define SCMFORMAT_DOW       "[%10.5le]"
1376 # elif DIM_OF_WORLD == 2
1377 #  define EXPAND_DOW(x)       (x)[0], (x)[1]
1378 #  define FORMAT_DOW          "[%10.5le, %10.5le]"
1379 #  define SCAN_FORMAT_DOW     "%f %f"
1380 #  define SCAN_EXPAND_DOW(v)  &(v)[0], &(v)[1]
1381 #  define MEXPAND_DOW(m)      EXPAND_DOW((m)[0]), EXPAND_DOW((m)[1])
1382 #  define SCAN_MEXPAND_DOW(m) SCAN_EXPAND_DOW((m)[0]), SCAN_EXPAND_DOW((m)[1])
1383 #  define SCAN_MFORMAT_DOW    SCAN_FORMAT_DOW SCAN_FORMAT_DOW
1384 #  define MFORMAT_DOW         "[" FORMAT_DOW ", " FORMAT_DOW "]"
1385 #  define DMEXPAND_DOW(m)     EXPAND_DOW(m)
1386 #  define DMFORMAT_DOW        FORMAT_DOW
1387 #  define SCMEXPAND_DOW(m)    (m)
1388 #  define SCMFORMAT_DOW       "[%10.5le]"
1389 # elif DIM_OF_WORLD == 3
1390 #  define EXPAND_DOW(x)      (x)[0], (x)[1], (x)[2]
1391 #  define FORMAT_DOW         "[%10.5le, %10.5le, %10.5le]"
1392 #  define SCAN_FORMAT_DOW    "%f %f %f"
1393 #  define SCAN_EXPAND_DOW(v) &(v)[0], &(v)[1], &(v)[2]
1394 #  define MEXPAND_DOW(m)					\
1395   EXPAND_DOW((m)[0]), EXPAND_DOW((m)[1]), EXPAND_DOW((m)[2])
1396 #  define SCAN_MEXPAND_DOW(m)						\
1397   SCAN_EXPAND_DOW((m)[0]), SCAN_EXPAND_DOW((m)[1]), SCAN_EXPAND_DOW((m)[2])
1398 #  define MFORMAT_DOW      "[" FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW "]"
1399 #  define SCAN_MFORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW
1400 #  define DMEXPAND_DOW(m)  EXPAND_DOW(m)
1401 #  define DMFORMAT_DOW     FORMAT_DOW
1402 #  define SCMEXPAND_DOW(m) (m)
1403 #  define SCMFORMAT_DOW    "[%10.5le]"
1404 # elif DIM_OF_WORLD == 4
1405 #  define EXPAND_DOW(x)  (x)[0], (x)[1], (x)[2], (x)[3]
1406 #  define FORMAT_DOW     "[%10.5le, %10.5le, %10.5le, %10.5le]"
1407 #  define SCAN_FORMAT_DOW     "%f %f %f %f"
1408 #  define SCAN_EXPAND_DOW(v) &(v)[0], &(v)[1], &(v)[2], &(v)[3]
1409 #  define MEXPAND_DOW(m)						\
1410   EXPAND_DOW((m)[0]), EXPAND_DOW((m)[1]), EXPAND_DOW((m)[2]), EXPAND_DOW((m)[3])
1411 #  define SCAN_MEXPAND_DOW(m)				\
1412   SCAN_EXPAND_DOW((m)[0]), SCAN_EXPAND_DOW((m)[1]),	\
1413     SCAN_EXPAND_DOW((m)[2]), SCAN_EXPAND_DOW((m)[3])
1414 #  define MFORMAT_DOW						\
1415   "[" FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW "]"
1416 #  define SCAN_MFORMAT_DOW				\
1417   SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW
1418 #  define DMEXPAND_DOW(m) EXPAND_DOW(m)
1419 #  define DMFORMAT_DOW    FORMAT_DOW
1420 #  define SCMEXPAND_DOW(m) (m)
1421 #  define SCMFORMAT_DOW   "[%10.5le]"
1422 # elif DIM_OF_WORLD == 5
1423 #  define EXPAND_DOW(x)      (x)[0], (x)[1], (x)[2], (x)[3], (x)[4]
1424 #  define FORMAT_DOW         "[%10.5le, %10.5le, %10.5le, %10.5le, %10.5le]"
1425 #  define SCAN_FORMAT_DOW    "%f %f %f %f %f"
1426 #  define SCAN_EXPAND_DOW(v) &(v)[0], &(v)[1], &(v)[2], &(v)[3], &(v)[4]
1427 #  define MEXPAND_DOW(m)					\
1428   EXPAND_DOW((m)[0]), EXPAND_DOW((m)[1]), EXPAND_DOW((m)[2]),	\
1429     EXPAND_DOW((m)[3]), EXPAND_DOW((m)[4])
1430 #  define SCAN_MEXPAND_DOW(m)						\
1431   SCAN_EXPAND_DOW((m)[0]), SCAN_EXPAND_DOW((m)[1]), SCAN_EXPAND_DOW((m)[2]), \
1432     SCAN_EXPAND_DOW((m)[3]), SCAN_EXPAND_DOW((m)[4])
1433 #  define MFORMAT_DOW							\
1434  "[" FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW "]"
1435 #  define SCAN_MFORMAT_DOW				\
1436   SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW	\
1437   SCAN_FORMAT_DOW SCAN_FORMAT_DOW
1438 #  define DMEXPAND_DOW(m) EXPAND_DOW(m)
1439 #  define DMFORMAT_DOW    FORMAT_DOW
1440 #  define SCMEXPAND_DOW(m) (m)
1441 #  define SCMFORMAT_DOW   "[%10.5le]"
1442 # elif DIM_OF_WORLD == 6
1443 #  define EXPAND_DOW(x)  (x)[0], (x)[1], (x)[2], (x)[3], (x)[4], (x)[5]
1444 #  define FORMAT_DOW						\
1445 #  define SCAN_FORMAT_DOW     "%f %f %f %f %f %f"
1446 #  define SCAN_EXPAND_DOW(v)				\
1447   &(v)[0], &(v)[1], &(v)[2], &(v)[3], &(v)[4], &(v)[5]
1448 #  define MFORMAT_DOW						\
1449   "[%10.5le, %10.5le, %10.5le, %10.5le, %10.5le, %10.5le]"
1450 #  define MEXPAND_DOW(m) \
1451   EXPAND_DOW((m)[0]), EXPAND_DOW((m)[1]), EXPAND_DOW((m)[2]),	\
1452     EXPAND_DOW((m)[3]), EXPAND_DOW((m)[4]), EXPAND_DOW((m)[5])
1453 #  define SCAN_MEXPAND_DOW(m) \
1454   SCAN_EXPAND_DOW((m)[0]), SCAN_EXPAND_DOW((m)[1]), SCAN_EXPAND_DOW((m)[2]), \
1455     SCAN_EXPAND_DOW((m)[3]), SCAN_EXPAND_DOW((m)[4]), SCAN_EXPAND_DOW((m)[5])
1456 #  define MFORMAT_DOW				\
1457   "[" FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", "	\
1458       FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW "]"
1459 #  define SCAN_MFORMAT_DOW				\
1460   SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW	\
1461   SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW
1462 #  define DMEXPAND_DOW(m)  EXPAND_DOW(m)
1463 #  define DMFORMAT_DOW     FORMAT_DOW
1464 #  define SCMEXPAND_DOW(m) (m)
1465 #  define SCMFORMAT_DOW    "[%10.5le]"
1466 # elif DIM_OF_WORLD == 7
1467 #  define EXPAND_DOW(x)						\
1468   (x)[0], (x)[1], (x)[2], (x)[3], (x)[4], (x)[5], (x)[6]
1469 #  define FORMAT_DOW							\
1470   "[%10.5le, %10.5le, %10.5le, %10.5le, %10.5le, %10.5le, %10.5le]"
1471 #  define SCAN_FORMAT_DOW     "%f %f %f %f %f %f %f"
1472 #  define SCAN_EXPAND_DOW(v)					\
1473   &(v)[0], &(v)[1], &(v)[2], &(v)[3], &(v)[4], &(v)[5], &(v)[6]
1474 #  define MEXPAND_DOW(m)						\
1475   EXPAND_DOW((m)[0]), EXPAND_DOW((m)[1]), EXPAND_DOW((m)[2]),		\
1476     EXPAND_DOW((m)[3]), EXPAND_DOW((m)[4]), EXPAND_DOW((m)[5]),		\
1477     EXPAND_DOW((m)[6])
1478 #  define SCAN_MEXPAND_DOW(m)						\
1479   SCAN_EXPAND_DOW((m)[0]), SCAN_EXPAND_DOW((m)[1]), SCAN_EXPAND_DOW((m)[2]), \
1480     SCAN_EXPAND_DOW((m)[3]), SCAN_EXPAND_DOW((m)[4]), SCAN_EXPAND_DOW((m)[5]) \
1481     SCAN_EXPAND_DOW((m)[6])
1482 #  define MFORMAT_DOW						\
1483   "[" FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", "	\
1484       FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW "]"
1485 #  define SCAN_MFORMAT_DOW						\
1486   SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW	\
1487   SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW
1488 #  define DMEXPAND_DOW(m) EXPAND_DOW(m)
1489 #  define DMFORMAT_DOW    FORMAT_DOW
1490 #  define SCMEXPAND_DOW(m) (m)
1491 #  define SCMFORMAT_DOW   "[%10.5le]"
1492 # elif DIM_OF_WORLD == 8
1493 #  define EXPAND_DOW(x)						\
1494   (x)[0], (x)[1], (x)[2], (x)[3], (x)[4], (x)[5], (x)[6], (x)[7]
1495 #  define FORMAT_DOW							\
1496   "[%10.5le, %10.5le, %10.5le, %10.5le, %10.5le, %10.5le, %10.5le, %10.5le]"
1497 #  define SCAN_FORMAT_DOW     "%f %f %f %f %f %f %f %f"
1498 #  define SCAN_EXPAND_DOW(v)				\
1499   &(v)[0], &(v)[1], &(v)[2], &(v)[3], &(v)[4], &(v)[5], &(v)[6], &(v)[7]
1500 #  define MEXPAND_DOW(m)						\
1501   EXPAND_DOW((m)[0]), EXPAND_DOW((m)[1]), EXPAND_DOW((m)[2]),		\
1502     EXPAND_DOW((m)[3]), EXPAND_DOW((m)[4]), EXPAND_DOW((m)[5]),		\
1503     EXPAND_DOW((m)[6]), EXPAND_DOW((m)[7])
1504 #  define SCAN_MEXPAND_DOW(m)						\
1505   SCAN_EXPAND_DOW((m)[0]), SCAN_EXPAND_DOW((m)[1]), SCAN_EXPAND_DOW((m)[2]), \
1506     SCAN_EXPAND_DOW((m)[3]), SCAN_EXPAND_DOW((m)[4]), SCAN_EXPAND_DOW((m)[5]) \
1507     SCAN_EXPAND_DOW((m)[6]), SCAN_EXPAND_DOW((m)[7])
1508 #  define MFORMAT_DOW				\
1509   "[" FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", "\
1510       FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW "]"
1511 #  define SCAN_MFORMAT_DOW						\
1512   SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW	\
1513   SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW
1514 #  define DMEXPAND_DOW(m)  EXPAND_DOW(m)
1515 #  define DMFORMAT_DOW     FORMAT_DOW
1516 #  define SCMEXPAND_DOW(m) (m)
1517 #  define SCMFORMAT_DOW    "[%10.5le]"
1518 # elif DIM_OF_WORLD == 9
1519 #  define EXPAND_DOW(x)						\
1520   (x)[0], (x)[1], (x)[2], (x)[3], (x)[4], (x)[5], (x)[6], (x)[7], (x)[8]
1521 #  define FORMAT_DOW							\
1522   "[%10.5le, %10.5le, %10.5le, %10.5le, "				\
1523   "%10.5le, %10.5le, %10.5le, %10.5le, %10.5le]"
1524 #  define SCAN_FORMAT_DOW     "%f %f %f %f %f %f %f %f %f"
1525 #  define SCAN_EXPAND_DOW(v)				\
1526   &(v)[0], &(v)[1], &(v)[2], &(v)[3], &(v)[4],		\
1527     &(v)[5], &(v)[6], &(v)[7], &(v)[8]
1528 #  define MEXPAND_DOW(m)						\
1529   EXPAND_DOW((m)[0]), EXPAND_DOW((m)[1]), EXPAND_DOW((m)[2]),		\
1530     EXPAND_DOW((m)[3]), EXPAND_DOW((m)[4]), EXPAND_DOW((m)[5]),		\
1531     EXPAND_DOW((m)[6]), EXPAND_DOW((m)[7]), EXPAND_DOW((m)[8])
1532 #  define SCAN_MEXPAND_DOW(m)						\
1533   SCAN_EXPAND_DOW((m)[0]), SCAN_EXPAND_DOW((m)[1]), SCAN_EXPAND_DOW((m)[2]), \
1534     SCAN_EXPAND_DOW((m)[3]), SCAN_EXPAND_DOW((m)[4]), SCAN_EXPAND_DOW((m)[5]) \
1535     SCAN_EXPAND_DOW((m)[6]), SCAN_EXPAND_DOW((m)[7]), SCAN_EXPAND_DOW((m)[8])
1536 #  define MFORMAT_DOW				\
1537   "[" FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", "\
1538       FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW "]"
1539 #  define SCAN_MFORMAT_DOW						\
1540   SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW	\
1541   SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW	\
1542   SCAN_FORMAT_DOW
1543 #  define DMEXPAND_DOW(m) EXPAND_DOW(m)
1544 #  define DMFORMAT_DOW    FORMAT_DOW
1545 #  define SCMEXPAND_DOW(m) (m)
1546 #  define SCMFORMAT_DOW   "[%10.5le]"
1547 # endif
1548 
1549 /* Some inline functions for barycentric coordinates, and conversion
1550  * between barycentric gradients and cartesian gradients.
1551  */
1552 
1553 /** x = a */
SET_BAR(int dim,REAL a,REAL_B x)1554 static inline const REAL *SET_BAR(int dim, REAL a, REAL_B x)
1555 {
1556   int i;
1557 
1558   for (i = 0; i < N_LAMBDA(dim); i++) {
1559     x[i] = a;
1560   }
1561   for (; i < N_LAMBDA_MAX; i++) {
1562     x[i] = 0.0;
1563   }
1564   return x;
1565 }
1566 
MSET_BAR(int dim,REAL a,REAL_BB x)1567 static inline const REAL_B *MSET_BAR(int dim, REAL a, REAL_BB x)
1568 {
1569   int i;
1570 
1571   for (i = 0; i < N_LAMBDA(dim); i++) {
1572     SET_BAR(dim, a, x[i]);
1573   }
1574   return (const REAL_B *)x;
1575 }
1576 
1577 /** x *= a */
SCAL_BAR(int dim,REAL a,REAL_B x)1578 static inline const REAL *SCAL_BAR(int dim, REAL a, REAL_B x)
1579 {
1580   int i;
1581 
1582   for (i = 0; i < N_LAMBDA(dim); i++) {
1583     x[i] *= a;
1584   }
1585   return x;
1586 }
1587 
MSCAL_BAR(int dim,REAL a,REAL_BB x)1588 static inline const REAL_B *MSCAL_BAR(int dim, REAL a, REAL_BB x)
1589 {
1590   int i;
1591 
1592   for (i = 0; i < N_LAMBDA(dim); i++) {
1593     SCAL_BAR(dim, a, x[i]);
1594   }
1595   return (const REAL_B *)x;
1596 }
1597 
1598 /** z = a*x + b*y, x, y, z are barycentric coordinate tuples.
1599  */
SCP_BAR(int dim,const REAL_B x,const REAL_B y)1600 static inline REAL SCP_BAR(int dim, const REAL_B x, const REAL_B y)
1601 {
1602   int i;
1603   REAL res;
1604 
1605   res = x[0] * y[0];
1606   for (i = 1; i < N_LAMBDA(dim); i++) {
1607     res += x[i] * y[i];
1608   }
1609   return res;
1610 }
1611 
1612 /** y = a*x,  x and y are barycentric coordinate tuples.
1613  */
AXEY_BAR(int dim,REAL a,const REAL_B x,REAL_B y)1614 static inline const REAL *AXEY_BAR(int dim, REAL a, const REAL_B x, REAL_B y)
1615 {
1616   int i;
1617 
1618   for (i = 0; i < N_LAMBDA(dim); i++) {
1619     y[i] = a * x[i];
1620   }
1621   return y;
1622 }
1623 
MAXEY_BAR(int dim,REAL a,const REAL_BB x,REAL_BB y)1624 static inline const REAL_B *MAXEY_BAR(int dim,
1625 				      REAL a, const REAL_BB x, REAL_BB y)
1626 {
1627   int i;
1628 
1629   for (i = 0; i < N_LAMBDA(dim); i++) {
1630     AXEY_BAR(dim, a, x[i], y[i]);
1631   }
1632   return (const REAL_B *)y;
1633 }
1634 
1635 /** y += a*x,  x and y are barycentric coordinate tuples.
1636  */
AXPY_BAR(int dim,REAL a,const REAL_B x,REAL_B y)1637 static inline const REAL *AXPY_BAR(int dim, REAL a, const REAL_B x, REAL_B y)
1638 {
1639   int i;
1640 
1641   for (i = 0; i < N_LAMBDA(dim); i++) {
1642     y[i] += a * x[i];
1643   }
1644   return y;
1645 }
1646 
MAXPY_BAR(int dim,REAL a,const REAL_BB x,REAL_BB y)1647 static inline const REAL_B *MAXPY_BAR(int dim,
1648 				      REAL a, const REAL_BB x, REAL_BB y)
1649 {
1650   int i;
1651 
1652   for (i = 0; i < N_LAMBDA(dim); i++) {
1653     AXPY_BAR(dim, a, x[i], y[i]);
1654   }
1655   return (const REAL_B *)y;
1656 }
1657 
1658 /** z = a*x + b*y, x, y, z are barycentric coordinate tuples.
1659  */
AXPBY_BAR(int dim,REAL a,const REAL_B x,REAL b,const REAL_B y,REAL_B z)1660 static inline const REAL *AXPBY_BAR(int dim,
1661 				    REAL a, const REAL_B x,
1662 				    REAL b, const REAL_B y,
1663 				    REAL_B z)
1664 {
1665   int i;
1666 
1667   for (i = 0; i < N_LAMBDA(dim); i++) {
1668     z[i] = b*y[i] + a * x[i];
1669   }
1670   return z;
1671 }
1672 
MAXPBY_BAR(int dim,REAL a,const REAL_BB x,REAL b,const REAL_BB y,REAL_BB z)1673 static inline const REAL_B *MAXPBY_BAR(int dim,
1674 				       REAL a, const REAL_BB x,
1675 				       REAL b, const REAL_BB y,
1676 				       REAL_BB z)
1677 {
1678   int i;
1679 
1680   for (i = 0; i < N_LAMBDA(dim); i++) {
1681     AXPBY_BAR(dim, a, x[i], b, y[i], z[i]);
1682   }
1683   return (const REAL_B *)z;
1684 }
1685 
AXPBYPCZ_BAR(int dim,REAL a,const REAL_B x,REAL b,const REAL_B y,REAL c,const REAL_B z,REAL_B w)1686 static inline const REAL *AXPBYPCZ_BAR(int dim,
1687 				       REAL a, const REAL_B x,
1688 				       REAL b, const REAL_B y,
1689 				       REAL c, const REAL_B z,
1690 				       REAL_B w)
1691 {
1692   int i;
1693 
1694   for (i = 0; i < N_LAMBDA(dim); i++) {
1695     w[i] = a * x[i] + b*y[i] + c*z[i];
1696   }
1697   return w;
1698 }
1699 
1700 /** b = a, a and b are barycentric coordinate tuples.
1701  */
COPY_BAR(int dim,const REAL_B a,REAL_B b)1702 static inline const REAL *COPY_BAR(int dim, const REAL_B a, REAL_B b)
1703 {
1704   int i;
1705 
1706   for (i = 0; i < N_LAMBDA(dim); i++) {
1707     b[i] = a[i];
1708   }
1709   return b;
1710 }
1711 
MCOPY_BAR(int dim,const REAL_BB a,REAL_BB b)1712 static inline const REAL_B *MCOPY_BAR(int dim, const REAL_BB a, REAL_BB b)
1713 {
1714   int i;
1715 
1716   for (i = 0; i < N_LAMBDA(dim); i++) {
1717     COPY_BAR(dim, a[i], b[i]);
1718   }
1719   return (const REAL_B *)b;
1720 }
1721 
1722 /** Convert a barycentric gradient to a world gradient, given the
1723  * gradient of the transformation to the reference element. (VM means
1724  * vector-matrix).
1725  */
1726 __FORCE_INLINE_ATTRIBUTE__
GRAD_DOW(int dim,const REAL_BD Lambda,const REAL_B b_grd,REAL_D x_grd)1727 static inline const REAL *GRAD_DOW(int dim,
1728 				   const REAL_BD Lambda,
1729 				   const REAL_B b_grd,
1730 				   REAL_D x_grd)
1731 {
1732   static REAL_D res;
1733   int i, j;
1734 
1735   if (__UNLIKELY__(x_grd == NULL)) {
1736     x_grd = res;
1737   }
1738 
1739   for (i = 0; i < DIM_OF_WORLD; i++) {
1740     x_grd[i] = b_grd[0] * Lambda[0][i];
1741     for (j = 1; j < N_LAMBDA(dim); j++) {
1742       x_grd[i] += b_grd[j] * Lambda[j][i];
1743     }
1744   }
1745 
1746   return x_grd;
1747 }
1748 
1749 __FORCE_INLINE_ATTRIBUTE__
MGRAD_DOW(int dim,const REAL_BD Lambda,const REAL_DB b_grd,REAL_DD x_grd)1750 static inline const REAL_D *MGRAD_DOW(int dim,
1751 				      const REAL_BD Lambda,
1752 				      const REAL_DB b_grd,
1753 				      REAL_DD x_grd)
1754 {
1755   static REAL_DD res;
1756   int i;
1757 
1758   if (__UNLIKELY__(x_grd == NULL)) {
1759     x_grd = res;
1760   }
1761 
1762   for (i = 0; i < DIM_OF_WORLD; i++) {
1763     GRAD_DOW(dim, Lambda, b_grd[i], x_grd[i]);
1764   }
1765 
1766   return (const REAL_D *)x_grd;
1767 }
1768 
1769 /* Compute the divergence */
DIV_DOW(int dim,const REAL_BD Lambda,const REAL_DB b_grd)1770 static inline REAL DIV_DOW(int dim, const REAL_BD Lambda, const REAL_DB b_grd)
1771 {
1772   REAL div = 0.0;
1773   int i, alpha;
1774 
1775   for (i = 0; i < DIM_OF_WORLD; i++) {
1776     for (alpha = 0; alpha < N_LAMBDA(dim); ++alpha) {
1777       div += Lambda[alpha][i] * b_grd[i][alpha];
1778     }
1779   }
1780 
1781   return div;
1782 }
1783 
1784 __FORCE_INLINE_ATTRIBUTE__
GRAD_P_DOW(int dim,const REAL_BD Lambda,const REAL_B b_grd,REAL_D x_grd)1785 static inline const REAL *GRAD_P_DOW(int dim,
1786 				     const REAL_BD Lambda,
1787 				     const REAL_B b_grd,
1788 				     REAL_D x_grd)
1789 {
1790   static REAL_D res;
1791   int i, j;
1792 
1793   if (__UNLIKELY__(x_grd == NULL)) {
1794     x_grd = res;
1795   }
1796 
1797   for (i = 0; i < DIM_OF_WORLD; i++) {
1798     for (j = 0; j < N_LAMBDA(dim); j++) {
1799       x_grd[i] += b_grd[j] * Lambda[j][i];
1800     }
1801   }
1802 
1803   return x_grd;
1804 }
1805 
1806 __FORCE_INLINE_ATTRIBUTE__
MGRAD_P_DOW(int dim,const REAL_BD Lambda,const REAL_DB b_grd,REAL_DD x_grd)1807 static inline const REAL_D *MGRAD_P_DOW(int dim,
1808 					const REAL_BD Lambda,
1809 					const REAL_DB b_grd,
1810 					REAL_DD x_grd)
1811 {
1812   static REAL_DD res;
1813   int i;
1814 
1815   if (__UNLIKELY__(x_grd == NULL)) {
1816     x_grd = res;
1817   }
1818 
1819   for (i = 0; i < DIM_OF_WORLD; i++) {
1820     GRAD_P_DOW(dim, Lambda, b_grd[i], x_grd[i]);
1821   }
1822 
1823   return (const REAL_D *)x_grd;
1824 }
1825 
1826 
1827 /** Convert a barycentric Hesse matrix to a world Hesse matrix, given
1828  * the gradient of the transformation to the reference element.
1829  */
1830 __FORCE_INLINE_ATTRIBUTE__
D2_DOW(int dim,const REAL_BD Lambda,const REAL_BB b_hesse,REAL_DD x_hesse)1831 static inline const REAL_D *D2_DOW(int dim,
1832 				   const REAL_BD Lambda,
1833 				   const REAL_BB b_hesse,
1834 				   REAL_DD x_hesse)
1835 {
1836   static REAL_DD res;
1837   int i, j, k, l;
1838 
1839   if (__UNLIKELY__(x_hesse == NULL)) {
1840     x_hesse = res;
1841   }
1842 
1843   for (i = 0; i < DIM_OF_WORLD; i++) {
1844     x_hesse[i][i] = 0.0;
1845     for (k = 0; k < N_LAMBDA(dim); k++) {
1846       x_hesse[i][i] += Lambda[k][i] * b_hesse[k][k] * Lambda[k][i];
1847       for (l = k+1; l < N_LAMBDA(dim); l++) {
1848 	x_hesse[i][i] +=  2.0 * Lambda[k][i] * b_hesse[k][l] * Lambda[l][i];
1849       }
1850     }
1851     for (j = i+1; j < DIM_OF_WORLD; j++) {
1852       x_hesse[i][j] = 0.0;
1853       for (k = 0; k < N_LAMBDA(dim); k++) {
1854 	x_hesse[i][j] += Lambda[k][i] * b_hesse[k][k] * Lambda[k][j];
1855 	for (l = k+1; l < N_LAMBDA(dim); l++) {
1856 	  x_hesse[i][j] += b_hesse[k][l]*(Lambda[k][i] * Lambda[l][j]
1857 					  +
1858 					  Lambda[l][i] * Lambda[k][j]);
1859 	}
1860       }
1861       x_hesse[j][i] = x_hesse[i][j];
1862     }
1863   }
1864 
1865   return (const REAL_D *)x_hesse;
1866 }
1867 
1868 __FORCE_INLINE_ATTRIBUTE__
MD2_DOW(int dim,const REAL_BD Lambda,const REAL_BB * b_hesse,REAL_DDD x_hesse)1869 static inline const REAL_DD *MD2_DOW(int dim,
1870 				     const REAL_BD Lambda,
1871 				     const REAL_BB *b_hesse,
1872 				     REAL_DDD x_hesse)
1873 {
1874   static REAL_DDD res;
1875   int i;
1876 
1877   if (__UNLIKELY__(x_hesse == NULL)) {
1878     x_hesse = res;
1879   }
1880 
1881   for (i = 0; i < DIM_OF_WORLD; i++) {
1882     D2_DOW(dim, Lambda, b_hesse[i], x_hesse[i]);
1883   }
1884 
1885   return (const REAL_DD *)x_hesse;
1886 }
1887 
1888 __FORCE_INLINE_ATTRIBUTE__
D2_P_DOW(int dim,const REAL_BD Lambda,const REAL_BB b_hesse,REAL_DD x_hesse)1889 static inline const REAL_D *D2_P_DOW(int dim,
1890 				     const REAL_BD Lambda,
1891 				     const REAL_BB b_hesse,
1892 				     REAL_DD x_hesse)
1893 {
1894   static REAL_DD res;
1895   int i, j, k, l;
1896 
1897   if (__UNLIKELY__(x_hesse == NULL)) {
1898     x_hesse = res;
1899   }
1900 
1901   for (i = 0; i < DIM_OF_WORLD; i++) {
1902     for (k = 0; k < N_LAMBDA(dim); k++) {
1903       x_hesse[i][i] += Lambda[k][i] * b_hesse[k][k] * Lambda[k][i];
1904       for (l = k+1; l < N_LAMBDA(dim); l++) {
1905 	x_hesse[i][i] +=  2.0 * Lambda[k][i] * b_hesse[k][l] * Lambda[l][i];
1906       }
1907     }
1908     for (j = i+1; j < DIM_OF_WORLD; j++) {
1909       REAL tmp = 0.0;
1910       for (k = 0; k < N_LAMBDA(dim); k++) {
1911 	tmp += Lambda[k][i] * b_hesse[k][k] * Lambda[k][j];
1912 	for (l = k+1; l < N_LAMBDA(dim); l++) {
1913 	  tmp += b_hesse[k][l]*(Lambda[k][i] * Lambda[l][j]
1914 					  +
1915 					  Lambda[l][i] * Lambda[k][j]);
1916 	}
1917       }
1918       x_hesse[i][j] += tmp;
1919       x_hesse[j][i] += tmp;
1920     }
1921   }
1922 
1923   return (const REAL_D *)x_hesse;
1924 }
1925 
1926 __FORCE_INLINE_ATTRIBUTE__
MD2_P_DOW(int dim,const REAL_BD Lambda,const REAL_BB * b_hesse,REAL_DDD x_hesse)1927 static inline const REAL_DD *MD2_P_DOW(int dim,
1928 				       const REAL_BD Lambda,
1929 				       const REAL_BB *b_hesse,
1930 				       REAL_DDD x_hesse)
1931 {
1932   static REAL_DDD res;
1933   int i;
1934 
1935   if (__UNLIKELY__(x_hesse == NULL)) {
1936     x_hesse = res;
1937   }
1938 
1939   for (i = 0; i < DIM_OF_WORLD; i++) {
1940     D2_P_DOW(dim, Lambda, b_hesse[i], x_hesse[i]);
1941   }
1942 
1943   return (const REAL_DD *)x_hesse;
1944 }
1945 
1946 /**Compute the Laplacian from a given Hessian in barycentric co-ordinates. */
LAPLACE_DOW(int dim,const REAL_BD Lambda,const REAL_BB b_hesse)1947 static inline REAL LAPLACE_DOW(int dim,
1948 			       const REAL_BD Lambda,
1949 			       const REAL_BB b_hesse)
1950 {
1951   REAL res = 0.0;
1952   int i, k, l;
1953 
1954   for (i = 0; i < DIM_OF_WORLD; i++) {
1955     for (k = 0; k < N_LAMBDA(dim); k++) {
1956       res += Lambda[k][i] * b_hesse[k][k] * Lambda[k][i];
1957       for (l = k+1; l < N_LAMBDA(dim); l++) {
1958 	res +=  2.0 * Lambda[k][i] * b_hesse[k][l] * Lambda[l][i];
1959       }
1960     }
1961   }
1962 
1963   return res;
1964 }
1965 
1966 __FORCE_INLINE_ATTRIBUTE__
MLAPLACE_DOW(int dim,const REAL_BD Lambda,const REAL_BB * b_hesse,REAL_D laplace)1967 static inline const REAL *MLAPLACE_DOW(int dim,
1968 				       const REAL_BD Lambda,
1969 				       const REAL_BB *b_hesse,
1970 				       REAL_D laplace)
1971 {
1972   static REAL_D res;
1973   int i;
1974 
1975   if (__UNLIKELY__(laplace == NULL)) {
1976     laplace = res;
1977   }
1978 
1979   for (i = 0; i < DIM_OF_WORLD; i++) {
1980     laplace[i] = LAPLACE_DOW(dim, Lambda, b_hesse[i]);
1981   }
1982 
1983   return (const REAL *)laplace;
1984 }
1985 
1986 __FORCE_INLINE_ATTRIBUTE__
MLAPLACE_P_DOW(int dim,const REAL_BD Lambda,const REAL_BB * b_hesse,REAL_D laplace)1987 static inline const REAL *MLAPLACE_P_DOW(int dim,
1988 					 const REAL_BD Lambda,
1989 					 const REAL_BB *b_hesse,
1990 					 REAL_D laplace)
1991 {
1992   static REAL_D res;
1993   int i;
1994 
1995   if (__UNLIKELY__(laplace == NULL)) {
1996     laplace = res;
1997   }
1998 
1999   for (i = 0; i < DIM_OF_WORLD; i++) {
2000     laplace[i] += LAPLACE_DOW(dim, Lambda, b_hesse[i]);
2001   }
2002 
2003   return (const REAL *)laplace;
2004 }
2005 
2006 /** Convert a cartesian gradient to a barycentric gradient, given the
2007  * vertices of the element (in the parametric case "coords" is just
2008  * the barycentric gradient of the coordinate functions).
2009  */
2010 __FORCE_INLINE_ATTRIBUTE__
GRAD_BAR(int dim,const REAL_D * coords,const REAL_D x_grd,REAL_B b_grd)2011 static inline const REAL *GRAD_BAR(int dim,
2012 				   const REAL_D *coords,
2013 				   const REAL_D x_grd,
2014 				   REAL_B b_grd)
2015 {
2016   static REAL_B res;
2017   int i;
2018 
2019   if (__UNLIKELY__(b_grd == NULL)) {
2020     b_grd = res;
2021   }
2022 
2023   for (i = 0; i < N_LAMBDA(dim); i++) {
2024     b_grd[i] = SCP_DOW(x_grd, coords[i]);
2025   }
2026 
2027   return b_grd;
2028 }
2029 
2030 __FORCE_INLINE_ATTRIBUTE__
MGRAD_BAR(int dim,const REAL_D * coords,const REAL_DD x_grd,REAL_DB b_grd)2031 static inline const REAL_B *MGRAD_BAR(int dim,
2032 				      const REAL_D *coords,
2033 				      const REAL_DD x_grd,
2034 				      REAL_DB b_grd)
2035 {
2036   static REAL_B res[DIM_OF_WORLD];
2037   int i;
2038 
2039   if (__UNLIKELY__(b_grd == NULL)) {
2040     b_grd = res;
2041   }
2042 
2043   for (i = 0; i < DIM_OF_WORLD; i++) {
2044     GRAD_BAR(dim, coords, x_grd[i], b_grd[i]);
2045   }
2046 
2047   return (const REAL_B *)b_grd;
2048 }
2049 
2050 __FORCE_INLINE_ATTRIBUTE__
D2_BAR(int dim,const REAL_D * coords,const REAL_DD x_D2,REAL_BB b_D2)2051 static inline const REAL_B *D2_BAR(int dim,
2052 				   const REAL_D *coords,
2053 				   const REAL_DD x_D2,
2054 				   REAL_BB b_D2)
2055 {
2056   static REAL_BB res;
2057   int i, j, k, l;
2058 
2059   if (__UNLIKELY__(b_D2 == NULL)) {
2060     b_D2 = res;
2061   }
2062 
2063   for (i = 0; i < N_LAMBDA(dim); i++) {
2064     b_D2[i][i] = 0.0;
2065     for (k = 0; k < DIM_OF_WORLD; k++) {
2066       b_D2[i][i] += SQR(coords[i][k])*x_D2[k][k];
2067       for (l = k+1; l < DIM_OF_WORLD; l++) {
2068 	b_D2[i][i] += coords[i][k]*2.0*x_D2[k][l]*coords[i][l];
2069       }
2070     }
2071     for (j = i+1; j < N_LAMBDA(dim); j++) {
2072       b_D2[i][j] = 0.0;
2073       for (k = 0; k < DIM_OF_WORLD; k++) {
2074 	b_D2[i][j] += coords[i][k]*x_D2[k][k]*coords[j][k];
2075 	for (l = k+1; l < DIM_OF_WORLD; l++) {
2076 	  b_D2[i][j] += x_D2[k][l] * (coords[i][k] * coords[j][l]
2077 				      +
2078 				      coords[i][l] * coords[j][k]);
2079 	}
2080       }
2081       b_D2[j][i] = b_D2[i][j];
2082     }
2083   }
2084   return (const REAL_B *)b_D2;
2085 }
2086 
2087 __FORCE_INLINE_ATTRIBUTE__
MD2_BAR(int dim,const REAL_D * coords,const REAL_DD * x_D2,REAL_BB * b_D2)2088 static inline const REAL_BB *MD2_BAR(int dim,
2089 				     const REAL_D *coords,
2090 				     const REAL_DD *x_D2,
2091 				     REAL_BB *b_D2)
2092 {
2093   static REAL_BB res[DIM_OF_WORLD];
2094   int i;
2095 
2096   if (__UNLIKELY__(b_D2 == NULL)) {
2097     b_D2 = res;
2098   }
2099 
2100   for (i = 0; i < DIM_OF_WORLD; i++) {
2101     D2_BAR(dim, coords, x_D2[i], b_D2[i]);
2102   }
2103 
2104   return (const REAL_BB *)b_D2;
2105 }
2106 
MM_DOW(const REAL_DD a,const REAL_DD b,REAL_DD c)2107 static inline REAL_D *MM_DOW(const REAL_DD a,
2108 			     const REAL_DD b, REAL_DD c)
2109 {
2110   int i, j, k;
2111 
2112   for (i = 0; i < DIM_OF_WORLD; i++) {
2113     for (j = 0; j < DIM_OF_WORLD; j++) {
2114       c[i][j] = 0.0;
2115       for (k = 0; k < DIM_OF_WORLD; k++) {
2116 	c[i][j] += a[i][k]*b[k][j];
2117       }
2118     }
2119   }
2120   return c;
2121 }
MMT_DOW(const REAL_DD a,const REAL_DD b,REAL_DD c)2122 static inline REAL_D *MMT_DOW(const REAL_DD a,
2123 			      const REAL_DD b, REAL_DD c)
2124 {
2125   int i, j, k;
2126 
2127   for (i = 0; i < DIM_OF_WORLD; i++) {
2128     for (j = 0; j < DIM_OF_WORLD; j++) {
2129       c[i][j] = 0.0;
2130       for (k = 0; k < DIM_OF_WORLD; k++) {
2131 	c[i][j] += a[i][k]*b[j][k];
2132       }
2133     }
2134   }
2135   return c;
2136 }
MTM_DOW(const REAL_DD a,const REAL_DD b,REAL_DD c)2137 static inline REAL_D *MTM_DOW(const REAL_DD a,
2138 			      const REAL_DD b,
2139 			      REAL_DD c)
2140 {
2141   int i, j, k;
2142 
2143   for (i = 0; i < DIM_OF_WORLD; i++) {
2144     for (j = 0; j < DIM_OF_WORLD; j++) {
2145       c[i][j] = a[0][i]*b[j][0];
2146       for (k = 1; k < DIM_OF_WORLD; k++) {
2147 	c[i][j] += a[k][i]*b[j][k];
2148       }
2149     }
2150   }
2151   return c;
2152 }
2153 
2154 #if DIM_OF_WORLD == 1
MDET_DOW(const REAL_DD m)2155 static inline REAL MDET_DOW(const REAL_DD m)
2156 {
2157   return m[0][0];
2158 }
MINVERT_DOW(const REAL_DD m,REAL_DD mi)2159 static inline REAL MINVERT_DOW(const REAL_DD m, REAL_DD mi)
2160 {
2161   if (mi[0][0] != 0.0) {
2162     mi[0][0] = 1.0/m[0][0];
2163   }
2164 
2165   return m[0][0];
2166 }
2167 #elif DIM_OF_WORLD == 2
MDET_DOW(const REAL_DD m)2168 static inline REAL MDET_DOW(const REAL_DD m)
2169 {
2170   return m[0][0]*m[1][1] - m[1][0]*m[0][1];
2171 }
MINVERT_DOW(const REAL_DD m,REAL_DD mi)2172 static inline REAL MINVERT_DOW(const REAL_DD m, REAL_DD mi)
2173 {
2174   REAL det = m[0][0]*m[1][1] - m[1][0]*m[0][1];
2175 
2176   if (det != 0.0) {
2177     mi[0][0] = m[1][1] / det;
2178     mi[1][1] = m[0][0] / det;
2179     mi[0][1] = -m[0][1] / det;
2180     mi[1][0] = -m[1][0] / det;
2181   }
2182 
2183   return det;
2184 }
2185 #elif DIM_OF_WORLD == 3
MDET_DOW(const REAL_DD m)2186 static inline REAL MDET_DOW(const REAL_DD m)
2187 {
2188 #if 0
2189   int j;
2190   REAL_D tmp;
2191   REAL det;
2192 
2193   det = 0;
2194   for (j = 0; j < DIM_OF_WORLD; j++) {
2195     WEDGE_DOW(m[(j+1)%DIM_OF_WORLD], m[(j+2)%DIM_OF_WORLD], tmp);
2196     det += SCP_DOW(tmp, m[j]);
2197   }
2198   return det / (REAL)DIM_OF_WORLD;
2199 #else
2200   return
2201     +(m[1][1]*m[2][2] - m[2][1]*m[1][2]) * m[0][0]
2202     -(m[0][1]*m[2][2] - m[2][1]*m[0][2]) * m[1][0]
2203     +(m[0][1]*m[1][2] - m[1][1]*m[0][2]) * m[2][0];
2204 #endif
2205 }
MINVERT_DOW(const REAL_DD m,REAL_DD mi)2206 static inline REAL MINVERT_DOW(const REAL_DD m, REAL_DD mi)
2207 {
2208 #if 0
2209   int i, j;
2210   REAL_D tmp;
2211 
2212   for (j = 0; j < DIM_OF_WORLD; j++) {
2213     WEDGE_DOW(m[(j+1)%DIM_OF_WORLD], m[(j+2)%DIM_OF_WORLD], tmp);
2214     SCAL_DOW(1.0/SCP_DOW(tmp, m[j]), tmp);
2215     for (i = 0; i < DIM_OF_WORLD; i++) {
2216       mi[i][j] = tmp[i];
2217     }
2218   }
2219   return mi;
2220 #else
2221   REAL det;
2222   int i;
2223 
2224   mi[0][0] = +(m[1][1]*m[2][2] - m[2][1]*m[1][2]);
2225   mi[0][1] = -(m[0][1]*m[2][2] - m[2][1]*m[0][2]);
2226   mi[0][2] = +(m[0][1]*m[1][2] - m[1][1]*m[0][2]);
2227 
2228   det = mi[0][0] * m[0][0] + mi[0][1] * m[1][0] + mi[0][2]*m[2][0];
2229 
2230   if (det != 0.0) {
2231     for (i = 0; i < DIM_OF_WORLD; i++) {
2232       mi[0][i] /= det;
2233     }
2234 
2235     mi[1][0] = (m[1][2]*m[2][0] - m[1][0]*m[2][2])/det;
2236     mi[1][1] = (m[0][0]*m[2][2] - m[2][0]*m[0][2])/det;
2237     mi[1][2] = (m[1][0]*m[0][2] - m[0][0]*m[1][2])/det;
2238 
2239     mi[2][0] = (m[1][0]*m[2][1] - m[1][1]*m[2][0])/det;
2240     mi[2][1] = (m[2][0]*m[0][1] - m[0][0]*m[2][1])/det;
2241     mi[2][2] = (m[0][0]*m[1][1] - m[0][1]*m[1][0])/det;
2242   }
2243 
2244   return det;
2245 #endif
2246 }
2247 #else
MDET_DOW(const REAL_DD m)2248 static inline REAL MDET_DOW(const REAL_DD m)
2249 {
2250   FUNCNAME("MDET_DOW");
2251   REAL_DD tmpM;
2252   REAL_D  b;
2253   REAL det;
2254   int i;
2255 
2256   SET_DOW(0.0, b);    /* dummy */
2257   MCOPY_DOW(m, tmpM); /* destructive Gauss destroys M */
2258 
2259   square_gauss((REAL *)tmpM, b, b, DIM_OF_WORLD, 1);
2260 
2261   det = 1.0;
2262   for (i = 0; i < DIM_OF_WORLD; ++i) {
2263     det *= tmpM[i][i];
2264   }
2265 
2266   return det;
2267 }
2268 
MINVERT_DOW(const REAL_DD m,REAL_DD mi)2269 static inline REAL_D *MINVERT_DOW(const REAL_DD m, REAL_DD mi)
2270 {
2271   FUNCNAME("MINVERT_DOW");
2272   REAL_DD tmpM, b;
2273 
2274   MSET_DOW(1.0, b);   /* unit matrix */
2275   MCOPY_DOW(m, tmpM); /* destructive Gauss destroys M */
2276 
2277   square_gauss((REAL *)tmpM, (REAL *)b, (REAL *)mi, DIM_OF_WORLD, DIM_OF_WORLD);
2278 
2279   return mi;
2280 }
2281 #endif
2282 
2283 static const AFF_TRAFO aff_identity = {
2284   { { 1.0, },
2285 #if DIM_OF_WORLD > 1
2286     { 0.0, 1.0, },
2287 #endif
2288 #if DIM_OF_WORLD > 2
2289     { 0.0, 0.0, 1.0, },
2290 #endif
2291 #if DIM_OF_WORLD > 3
2292     { 0.0, 0.0, 0.0, 1.0, },
2293 #endif
2294 #if DIM_OF_WORLD > 4
2295     { 0.0, 0.0, 0.0, 0.0, 1.0, },
2296 #endif
2297 #if DIM_OF_WORLD > 5
2298     { 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, },
2299 #endif
2300 #if DIM_OF_WORLD > 6
2301     { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, },
2302 #endif
2303 #if DIM_OF_WORLD > 7
2304     { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, },
2305 #endif
2306 #if DIM_OF_WORLD > 8
2307     { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, },
2308 #endif
2309   },
2310   { 0.0 }
2311 };
2312 
AFFINE_IDENTITY(void)2313 static inline const AFF_TRAFO *AFFINE_IDENTITY(void)
2314 {
2315   return &aff_identity;
2316 }
2317 
GET_AFF_TRAFO(int info,const char * key,AFF_TRAFO * T)2318 static inline void GET_AFF_TRAFO(int info, const char *key, AFF_TRAFO *T)
2319 {
2320   GET_PARAMETER(info, key,
2321 		SCAN_MFORMAT_DOW SCAN_FORMAT_DOW,
2322 		SCAN_MEXPAND_DOW(T->M), SCAN_EXPAND_DOW(T->t));
2323 }
2324 
AFFINE_DOW(const AFF_TRAFO * trafo,const REAL_D x,REAL_D y)2325 static inline REAL *AFFINE_DOW(const AFF_TRAFO *trafo,
2326 			       const REAL_D x,
2327 			       REAL_D y)
2328 {
2329   SET_DOW(0.0, y);
2330   MV_DOW((const REAL_D *)trafo->M, x, y);
2331   AXPY_DOW(1.0, trafo->t, y);
2332 
2333   return y;
2334 }
2335 
2336 /* Apply the inverse of the affine transformation. trafo->M is
2337  * assumed to be orthogonal.
2338  */
AFFINV_DOW(const AFF_TRAFO * trafo,const REAL_D x,REAL_D y)2339 static inline REAL *AFFINV_DOW(const AFF_TRAFO *trafo,
2340 			       const REAL_D x,
2341 			       REAL_D y)
2342 {
2343   REAL_D tmp = { 0.0, };
2344 
2345   MTV_DOW((const REAL_D *)trafo->M, trafo->t, tmp);
2346   SET_DOW(0.0, y);
2347   MTV_DOW((const REAL_D *)trafo->M, x, y);
2348   AXPY_DOW(-1.0, tmp, y);
2349 
2350   return y;
2351 }
2352 
INVAFF_DOW(const AFF_TRAFO * A,AFF_TRAFO * B)2353 static inline AFF_TRAFO *INVAFF_DOW(const AFF_TRAFO *A,
2354 				    AFF_TRAFO *B)
2355 {
2356   int i, j;
2357 
2358   SET_DOW(0.0, B->t);
2359   MTV_DOW((const REAL_D *)A->M, A->t, B->t);
2360   SCAL_DOW(-1.0, B->t);
2361   for (i = 0; i < DIM_OF_WORLD; i++ ) {
2362     for (j = 0; j < DIM_OF_WORLD; j++) {
2363       B->M[i][j] = A->M[j][i];
2364     }
2365   }
2366   return B;
2367 }
2368 
AFFAFF_DOW(const AFF_TRAFO * A,const AFF_TRAFO * B,AFF_TRAFO * C)2369 static inline AFF_TRAFO *AFFAFF_DOW(const AFF_TRAFO *A,
2370 				    const AFF_TRAFO *B,
2371 				    AFF_TRAFO *C)
2372 {
2373   MM_DOW(A->M, B->M, C->M);
2374   COPY_DOW(A->t, C->t);
2375   MV_DOW(A->M, B->t, C->t);
2376 
2377   return C;
2378 }
2379 
2380 /* filling of the geometry and the quadrature cache */
2381 
2382 static inline const EL_GEOM_CACHE *
fill_el_geom_cache(const EL_INFO * el_info,FLAGS fill_flag)2383 fill_el_geom_cache(const EL_INFO *el_info, FLAGS fill_flag)
2384 {
2385   EL_GEOM_CACHE *elgc;
2386   FLAGS need;
2387   int dim, wall;
2388 
2389   elgc = (EL_GEOM_CACHE *)&el_info->el_geom_cache;
2390 
2391   if (elgc->current_el != el_info->el) {
2392     elgc->fill_flag = 0;
2393     elgc->current_el = el_info->el;
2394   }
2395 
2396   if (!(need = (elgc->fill_flag ^ fill_flag) & fill_flag)) {
2397     return elgc;
2398   }
2399 
2400   dim = el_info->mesh->dim;
2401 
2402   if (need & FILL_EL_LAMBDA) {
2403     elgc->det = el_grd_lambda_dim(dim, el_info, elgc->Lambda);
2404     elgc->fill_flag |= FILL_EL_LAMBDA|FILL_EL_DET;
2405   } else if (need & FILL_EL_DET) {
2406     elgc->det = el_det_dim(dim, el_info);
2407     elgc->fill_flag |= FILL_EL_DET;
2408   }
2409 
2410   for (wall = 0; wall < N_WALLS_MAX; wall++) {
2411     if (need & FILL_EL_WALL_ORIENTATION(wall)) {
2412       EL  *el    = el_info->el;
2413       elgc->orientation[wall][0] = wall_orientation(dim, el, wall);
2414       if ((el_info->fill_flag & FILL_NEIGH) && el_info->neigh[wall]) {
2415 	EL  *neigh = el_info->neigh[wall];
2416 	int oppv   = el_info->opp_vertex[wall];
2417 
2418 	elgc->orientation[wall][1] = wall_orientation(dim, neigh, oppv);
2419       } else {
2420 	elgc->orientation[wall][1] = -1;
2421       }
2422       elgc->fill_flag |= FILL_EL_WALL_ORIENTATION(wall);
2423     }
2424     if (need & FILL_EL_WALL_REL_ORIENTATION(wall)) {
2425       DEBUG_TEST_FLAG(FILL_NEIGH, el_info);
2426       if (el_info->neigh[wall]) {
2427 	EL  *el    = el_info->el;
2428 	EL  *neigh = el_info->neigh[wall];
2429 	int oppv   = el_info->opp_vertex[wall];
2430 	elgc->rel_orientation[wall] =
2431 	  wall_rel_orientation(dim, el, neigh, wall, oppv);
2432 	elgc->fill_flag |= FILL_EL_WALL_REL_ORIENTATION(wall);
2433       }
2434     }
2435     if (need & (FILL_EL_WALL_NORMAL(wall)|FILL_EL_WALL_DET(wall))) {
2436       elgc->wall_det[wall] =
2437 	get_wall_normal_dim(dim, el_info, wall, elgc->wall_normal[wall]);
2438       elgc->fill_flag |= FILL_EL_WALL_NORMAL(wall)|FILL_EL_WALL_DET(wall);
2439     }
2440   }
2441 
2442   return elgc;
2443 }
2444 
2445 /* Fill the quadrature cache for the given element. Maybe this should
2446  * be made an inline function. This function does not call
2447  * parametric->init_element(); it is assumed that this has been done
2448  * before if necessary. We also do not call any INIT_ELEMENT() method.
2449  */
fill_quad_el_cache(const EL_INFO * el_info,const QUAD * quad,FLAGS fill)2450 static inline const QUAD_EL_CACHE *fill_quad_el_cache(const EL_INFO *el_info,
2451 						      const QUAD *quad,
2452 						      FLAGS fill)
2453 {
2454   QUAD_EL_CACHE *qelc = (QUAD_EL_CACHE *)quad->metadata;
2455   FLAGS need;
2456   int iq, wall;
2457 
2458   if (qelc->current_el != el_info->el) {
2459     qelc->fill_flag = 0;
2460     qelc->current_el = el_info->el;
2461     INIT_ELEMENT(el_info, quad);
2462   }
2463 
2464   if (!(need = (qelc->fill_flag ^ fill) & fill)) {
2465     return qelc;
2466   }
2467 
2468   if (el_info->fill_flag & FILL_COORDS) {
2469     if (need & FILL_EL_QUAD_WORLD) {
2470       for (iq = 0; iq < quad->n_points; iq++) {
2471 	coord_to_world(el_info, quad->lambda[iq], qelc->world[iq]);
2472       }
2473       qelc->fill_flag |= FILL_EL_QUAD_WORLD;
2474     }
2475   } else {
2476     PARAMETRIC *parametric = el_info->mesh->parametric;
2477 
2478     TEST_EXIT(parametric,
2479 	      "FILL_COORDS not set in el_info->fill_flag and "
2480 	      "not on a parametric mesh.\n");
2481 
2482     if (need & FILL_EL_QUAD_WORLD) {
2483       parametric->coord_to_world(el_info, quad, -1, NULL, qelc->world);
2484     }
2485     if (need &
2486 	(FILL_EL_QUAD_GRD_WORLD|FILL_EL_QUAD_D2_WORLD|FILL_EL_QUAD_D3_WORLD)) {
2487       parametric->grd_world(el_info, quad, -1, NULL,
2488 			    (need & FILL_EL_QUAD_GRD_WORLD)
2489 			    ? qelc->param.grd_world : NULL,
2490 			    (need & FILL_EL_QUAD_D2_WORLD)
2491 			    ? qelc->param.D2_world : NULL,
2492 			    (need & FILL_EL_QUAD_D3_WORLD)
2493 			    ? qelc->param.D3_world : NULL);
2494       qelc->fill_flag |= need &
2495 	(FILL_EL_QUAD_GRD_WORLD|FILL_EL_QUAD_D2_WORLD|FILL_EL_QUAD_D3_WORLD);
2496     }
2497     if (need & FILL_EL_QUAD_DLAMBDA) {
2498       parametric->grd_lambda(el_info, quad, -1, NULL,
2499 			     qelc->param.Lambda,
2500 			     qelc->param.DLambda,
2501 			     qelc->param.det);
2502       qelc->fill_flag |=
2503 	FILL_EL_QUAD_DLAMBDA|FILL_EL_QUAD_LAMBDA|FILL_EL_QUAD_DET;
2504     } else if (need & FILL_EL_QUAD_LAMBDA) {
2505       parametric->grd_lambda(el_info, quad, -1, NULL,
2506 			     qelc->param.Lambda, NULL, qelc->param.det);
2507       qelc->fill_flag |= FILL_EL_QUAD_LAMBDA|FILL_EL_QUAD_DET;
2508     } else if (need & FILL_EL_QUAD_DET) {
2509       parametric->det(el_info, quad, -1, NULL, qelc->param.det);
2510       qelc->fill_flag |= FILL_EL_QUAD_DET;
2511     }
2512 
2513     if (need & (FILL_EL_QUAD_WALL_DET |
2514 		FILL_EL_QUAD_WALL_NORMAL |
2515 		FILL_EL_QUAD_GRD_NORMAL |
2516 		FILL_EL_QUAD_D2_NORMAL)) {
2517       DEBUG_TEST_EXIT(quad->codim == 1,
2518 		      "Wall normals make only sense for co-dim 1.\n");
2519 
2520       wall = quad->subsplx;
2521 
2522       if (need & FILL_EL_QUAD_D2_NORMAL) {
2523 	parametric->wall_normal(el_info, wall, quad, -1, NULL,
2524 				qelc->param.wall_normal,
2525 				qelc->param.grd_normal,
2526 				qelc->param.D2_normal,
2527 				qelc->param.wall_det);
2528 	qelc->fill_flag |=
2529 	  (FILL_EL_QUAD_WALL_DET |
2530 	   FILL_EL_QUAD_WALL_NORMAL |
2531 	   FILL_EL_QUAD_GRD_NORMAL |
2532 	   FILL_EL_QUAD_D2_NORMAL);
2533       } else if (need & FILL_EL_QUAD_GRD_NORMAL) {
2534 	parametric->wall_normal(el_info, wall, quad, -1, NULL,
2535 				qelc->param.wall_normal,
2536 				qelc->param.grd_normal,
2537 				NULL,
2538 				qelc->param.wall_det);
2539 	qelc->fill_flag |=
2540 	  (FILL_EL_QUAD_WALL_DET |
2541 	   FILL_EL_QUAD_WALL_NORMAL |
2542 	   FILL_EL_QUAD_GRD_NORMAL);
2543       } else if (need & FILL_EL_QUAD_WALL_NORMAL) {
2544 	parametric->wall_normal(el_info, wall, quad, -1, NULL,
2545 				qelc->param.wall_normal, NULL, NULL,
2546 				qelc->param.wall_det);
2547 	qelc->fill_flag |= FILL_EL_QUAD_WALL_DET|FILL_EL_QUAD_WALL_NORMAL;
2548       } else {
2549 	parametric->wall_normal(el_info, wall, quad, -1, NULL,
2550 				NULL /* no normals */, NULL, NULL,
2551 				qelc->param.wall_det);
2552 	qelc->fill_flag |= FILL_EL_QUAD_WALL_DET;
2553       }
2554     }
2555   }
2556 
2557   return qelc;
2558 }
2559 
2560 /* Compute the value of some really vector-valued basis function. */
2561 static inline const REAL *
phi_dow(REAL_D result,int i,const REAL_B lambda,const BAS_FCTS * thisptr)2562 phi_dow(REAL_D result,
2563 	int i, const REAL_B lambda, const BAS_FCTS *thisptr)
2564 {
2565   AXEY_DOW(PHI(thisptr, i, lambda), PHI_D(thisptr, i, lambda), result);
2566   return result;
2567 }
2568 
2569 /* Compute the barycentric gradient of some really vector-valued basis
2570  * function.
2571  */
2572 static inline const REAL_B *
grd_phi_dow(REAL_DB result,int i,const REAL_B lambda,const BAS_FCTS * thisptr)2573 grd_phi_dow(REAL_DB result,
2574 	    int i, const REAL_B lambda, const BAS_FCTS *thisptr)
2575 {
2576   int n;
2577   const REAL *grd_phi = GRD_PHI(thisptr, i, lambda);
2578   const REAL *phi_d   = PHI_D(thisptr, i, lambda);
2579 
2580   for (n = 0; n < DIM_OF_WORLD; n++) {
2581     AXEY_BAR(DIM_MAX, phi_d[n], grd_phi, result[n]);
2582   }
2583 
2584   if (!thisptr->dir_pw_const) {
2585     REAL phi = PHI(thisptr, i, lambda);
2586     const REAL_B *grd_phi_d = GRD_PHI_D(thisptr, i, lambda);
2587 
2588     for (n = 0; n < DIM_OF_WORLD; n++) {
2589       AXPY_BAR(DIM_MAX, phi, grd_phi_d[n], result[n]);
2590     }
2591   }
2592 
2593   return (const REAL_B *)result;
2594 }
2595 
2596 /* Compute the barycentric second derivatives of some really
2597  * vector-valued basis functions.
2598  */
2599 
2600 #endif /* _ALBERT_INLINES_H_ */
2601