1 #ifndef _ALBERT_INLINES_H_
2 #define _ALBERT_INLINES_H_
3
4 /*--------------------------------------------------------------------------*/
5 /* ALBERT: an Adaptive multi Level finite element toolbox using */
6 /* Bisectioning refinement and Error control by Residual */
7 /* Techniques */
8 /* */
9 /* www.alberta-fem.de */
10 /* */
11 /*--------------------------------------------------------------------------*/
12 /* */
13 /* file: albert_inlines.h */
14 /* */
15 /* */
16 /* description: Blas-like inline functions for REAL_Ds and REAL_DDs, */
17 /* REAL_Bs & friends. */
18 /* */
19 /*--------------------------------------------------------------------------*/
20 /* */
21 /* authors: Alfred Schmidt */
22 /* Zentrum fuer Technomathematik */
23 /* Fachbereich 3 Mathematik/Informatik */
24 /* Universitaet Bremen */
25 /* Bibliothekstr. 2 */
26 /* D-28359 Bremen, Germany */
27 /* */
28 /* Kunibert G. Siebert */
29 /* Istitut fuer Mathematik */
30 /* Universitaet Augsburg */
31 /* Universitaetsstr. 14 */
32 /* D-86159 Augsburg, Germany */
33 /* */
34 /* Claus-Justus Heine */
35 /* Abteilung fuer Angewandte Mathematik */
36 /* Albert-Ludwigs-Universitaet Freiburg */
37 /* Hermann-Herder-Str. 10 */
38 /* D-79104 Freiburg im Breisgau, Germany */
39 /* */
40 /* */
41 /* (c) by A. Schmidt, K.G. Siebert, C.-J. Heine (1996-2007) */
42 /* */
43 /*--------------------------------------------------------------------------*/
44
45 #include "alberta.h" /* essentially a no-op when included from alberta.h */
46
47 #ifndef DIM_OF_WORLD
48 # error Need to know the dimension of the World :)
49 #endif
50
51 /* multiple invocations of macro-arguments can be harmful, if the macro
52 * argument is, e.g., a function-call.
53 *
54 * NOTE: as DIM_OF_WORLD is a constant, the C-compiler should unroll all
55 * loops when compiling with optimizations, so there should be no need
56 * for hand-unrolling, except in some simple 1D cases.
57 *
58 * Also, all modern compilers do function inlining, so the
59 * function-call over-head is _not_ a problem.
60 *
61 * Note: the function may be nested, they return the address of the
62 * _modified_ operand. So AXPY(a, AX(b, x), y) is valid.
63 */
64
65 /* In addition to BLAS-like routines for REAL_D vectors and REAL_DD
66 * matrices this file also defines the access to the per-element
67 * quadrature and geometry caches:
68 *
69 * fill_el_geom_cache()
70 * fill_quad_el_cache()
71 */
72
73 /* The following functions are defined here:
74 *
75 * AX(a, x) -- x *= a (alias SCAL_DOW is also defined)
76 * AXEY(a, x, y) -- y = a x
77 * AXPBY(a, x, b, y, z) -- z = a x + by
78 * AXPBYPCZ(a, x, b, y, c, z, w) -- w = a x + by + cz
79 * AXPBYP(a, x, b, y, z) -- z += a x + by
80 * AXPY(a, x, y) -- y += a x
81 * COPY(src, dst) -- dst := src
82 * DIST(x, y) -- sqrt(DST2(x, y))
83 * DST2(x, y) -- SCP(x-y, x-y)
84 * NRM2(x) -- SCP(x, x)
85 * NORM(x) -- sqrt(NRM2(x))
86 * NORM1(x) -- fabs(x[0]) + ... + fabs(x[DOW-1])
87 * DIST1(x, y) -- NORM1(x-y)
88 * PNORMP(x, p) -- (pow(fabs(x[0]), p) + ... + pow(fabs(x[0]), p))
89 * NORMP(x, p) -- pow(PNORMP(x), 1.0/p)
90 * NORM8(x) -- max{fabs(x[0]), ..., fabs(x[DOW-1])} (8==infty)
91 * DIST8(x, y) -- NORM8(x-y)
92 * SUM(x) -- x[0] + ... + x[DOW-1]
93 * MTV(m, v, b) -- b += m^t v
94 * MV(m, v, b) -- b += m v (m is a matrix)
95 * MDIV(m, v, b) -- scale v by the inverse of the diagonal -> b
96 * SCP(x, y) -- <x, y>
97 * SET(val, x) -- x[i] = val, i=1, ..., DOW
98 * WEDGE(x, y, n) -- n = x /\ y in 3D
99 * WEDGE(x, y) -- x0 * y1 - x1 * y0 in 2D
100 *
101 * The actual function named is generated by adding a _DOW() suffix.
102 *
103 * Prefix Version
104 * none REAL_D
105 * M REAL_DD
106 * DM diagonal matrix, diagonal stored in REAL_D vector
107 * SCM scalar matrix, data type REAL (albert.h)
108 *
109 * Further:
110 * Macros EXPAND and FORMAT (with named pre- and suffixes) for easier
111 * print-out of REAL_D and REAL_DD, use like this:
112 *
113 * printf("text"MFORMAT_DOW"more text\n", MEXPAND_DOW(m));
114 *
115 * Some more functions for barycentric coordinates. NOTE: works best
116 * with constant dim. Use with care w.r.t. to optimization.
117 *
118 * SET_BAR(dim, a, x)
119 * SCAL_BAR(dim, a, x)
120 * SCP_BAR(dim, x, y)
121 * AXPY_BAR(dim, a, x, y)
122 * AXPBY_BAR(dim, a, x, b, y, z)
123 * COPY_BAR(dim, from, to)
124 * GRAD_DOW(dim, Lambda, b_grd, x_grd) -- conversion from barycentric
125 * to cartesian gradients. The function computes x_grd = b_grd Lambda.
126 *
127 * Some of these functions are also available as matrix versions
128 * (e.g. MAXPY_BAR()).
129 *
130 * Further: to add matrices of higher symmetry to those of lower
131 * symmetry the following functions exist. The first prefix is alway
132 * the type of the destination matrix. Only the well defined functions
133 * are implemented. Grin. (Hint: the limiting factor is the structure
134 * of the destination matrix ...)
135 *
136 * {M,DM,SCM}{M,DM,SCM}AXPY_DOW(alpha, a, b) b += alpha*a
137 * {M,DM,SCM}{M,DM,SCM}AXEY_DOW(alpha, a, b) b = alpha*a
138 * {M,DM,SCM}{M,DM,SCM}COPY_DOW(a, b) b = a
139 *
140 * Some more: DOW x DOW matrix multiplication etc.:
141 *
142 * MM_DOW(), MMT_DOW(), MTM_DOW(), MDET_DOW(), MINVERT_DOW()
143 *
144 */
145
POW_DOW(REAL a)146 static inline REAL POW_DOW(REAL a)
147 {
148 #if DIM_OF_WORLD == 0
149 return 1.0;
150 #elif DIM_OF_WORLD == 1
151 return a;
152 #elif DIM_OF_WORLD == 2
153 return a*a;
154 #elif DIM_OF_WORLD == 3
155 return a*a*a;
156 #else
157 int i;
158 REAL res = a;
159
160 for (i = 1; i < DIM_OF_WORLD; i++) {
161 res *= a;
162 }
163 return res;
164 #endif
165 }
166
167 #define SCAL_DOW(a, x) AX_DOW(a, x)
AX_DOW(REAL a,REAL_D x)168 static inline REAL *AX_DOW(REAL a, REAL_D x)
169 {
170 int i;
171
172 for (i = 0; i < DIM_OF_WORLD; i++) {
173 x[i] *= a;
174 }
175 return x;
176 }
177
178 #define MSCAL_DOW(a, m) MAX_DOW(a, m)
MAX_DOW(REAL a,REAL_DD m)179 static inline REAL_D *MAX_DOW(REAL a, REAL_DD m)
180 {
181 int i;
182
183 for (i = 0; i < DIM_OF_WORLD; i++) {
184 AX_DOW(a, m[i]);
185 }
186 return m;
187 }
188
189 #define DMAX_DOW(a, m) AX_DOW(a, m)
190 #define DMSCAL_DOW(a, m) DMAX_DOW(a, m)
191
192 #define SCMAX_DOW(a, m) (m) *= (a)
193 #define SCMSCAL_DOW(a, m) SCMAX_DOW(a, m)
194
AXEY_DOW(REAL a,const REAL_D x,REAL_D y)195 static inline REAL *AXEY_DOW(REAL a, const REAL_D x, REAL_D y)
196 {
197 int i;
198
199 for (i = 0; i < DIM_OF_WORLD; i++) {
200 y[i] = a * x[i];
201 }
202 return y;
203 }
204
MAXEY_DOW(REAL a,const REAL_DD x,REAL_DD y)205 static inline REAL_D *MAXEY_DOW(REAL a, const REAL_DD x, REAL_DD y)
206 {
207 int i;
208
209 for (i = 0; i < DIM_OF_WORLD; i++) {
210 AXEY_DOW(a, x[i], y[i]);
211 }
212 return y;
213 }
214
215 #define DMAXEY_DOW(a, x, y) AXEY_DOW(a, x, y)
216 #define SCMAXEY_DOW(a, x, y) (y) = (a)*(x)
217
AXPY_DOW(REAL a,const REAL_D x,REAL_D y)218 static inline REAL *AXPY_DOW(REAL a, const REAL_D x, REAL_D y)
219 {
220 int i;
221
222 for (i = 0; i < DIM_OF_WORLD; i++) {
223 y[i] += a * x[i];
224 }
225 return y;
226 }
227
MAXPY_DOW(REAL a,const REAL_DD x,REAL_DD y)228 static inline REAL_D *MAXPY_DOW(REAL a, const REAL_DD x, REAL_DD y)
229 {
230 int i;
231
232 for (i = 0; i < DIM_OF_WORLD; i++) {
233 AXPY_DOW(a, x[i], y[i]);
234 }
235 return y;
236 }
237
238 /* same as above, but add the transposed matrix to y */
MAXTPY_DOW(REAL a,const REAL_DD x,REAL_DD y)239 static inline REAL_D *MAXTPY_DOW(REAL a, const REAL_DD x, REAL_DD y)
240 {
241 REAL tmp;
242 int i, j;
243
244 for (i = 0; i < DIM_OF_WORLD; i++) {
245 y[i][i] += a*x[i][i];
246 for (j = i+1; j < DIM_OF_WORLD; j++) {
247 tmp = x[i][j];
248 y[i][j] += a*x[j][i];
249 y[j][i] += a*tmp;
250 }
251 }
252 return y;
253 }
254
255 #define DMAXPY_DOW(a, x, y) AXPY_DOW(a, x, y)
256 #define DMAXTPY_DOW(a, x, y) DMAXPY_DOW(a, x, y) /* transpose of diagonal matrix :) */
257 #define SCMAXPY_DOW(a, x, y) (y) += (a)*(x)
258 #define SCMAXTPY_DOW(a, x, y) SCMAXPY_DOW(a, x, y)
259
AXPBY_DOW(REAL a,const REAL_D x,REAL b,const REAL_D y,REAL_D z)260 static inline REAL *AXPBY_DOW(REAL a, const REAL_D x, REAL b, const REAL_D y,
261 REAL_D z)
262 {
263 int i;
264
265 for (i = 0; i < DIM_OF_WORLD; i++) {
266 z[i] = b*y[i] + a * x[i];
267 }
268 return z;
269 }
270
MAXPBY_DOW(REAL a,const REAL_DD x,REAL b,const REAL_DD y,REAL_DD z)271 static inline REAL_D *MAXPBY_DOW(REAL a, const REAL_DD x,
272 REAL b, const REAL_DD y,
273 REAL_DD z)
274 {
275 int i;
276
277 for (i = 0; i < DIM_OF_WORLD; i++) {
278 AXPBY_DOW(a, x[i], b, y[i], z[i]);
279 }
280 return z;
281 }
282
283 #define DMAXPBY_DOW(a, x, b, y, z) AXPBY_DOW(a, x, b, y, z)
284 #define SCMAXPBY_DOW(a, x, b, y, z) (z) = (a)*(x) + (b)*(y)
285
AXPBYPCZ_DOW(REAL a,const REAL_D x,REAL b,const REAL_D y,REAL c,const REAL_D z,REAL_D w)286 static inline REAL *AXPBYPCZ_DOW(REAL a, const REAL_D x,
287 REAL b, const REAL_D y,
288 REAL c, const REAL_D z,
289 REAL_D w)
290 {
291 int i;
292
293 for (i = 0; i < DIM_OF_WORLD; i++) {
294 w[i] = c*z[i] + b*y[i] + a * x[i];
295 }
296 return w;
297 }
298
MAXPBYPCZ_DOW(REAL a,const REAL_DD x,REAL b,const REAL_DD y,REAL c,const REAL_DD z,REAL_DD w)299 static inline REAL_D *MAXPBYPCZ_DOW(REAL a, const REAL_DD x,
300 REAL b, const REAL_DD y,
301 REAL c, const REAL_DD z,
302 REAL_DD w)
303 {
304 int i;
305
306 for (i = 0; i < DIM_OF_WORLD; i++) {
307 AXPBYPCZ_DOW(a, x[i], b, y[i], c, z[i], w[i]);
308 }
309 return w;
310 }
311
312 #define DMAXPBYPCZ_DOW(a, x, b, y, c, z, w) AXPBYPCZ_DOW(a, x, b, y, c, z, w)
313 #define SCMAXPBYPCZ_DOW(a, x, b, y, c, z, w) ((w) = (a)*(x) + (b)*(y) + (c)*(z))
314
AXPBYP_DOW(REAL a,const REAL_D x,REAL b,const REAL_D y,REAL_D z)315 static inline REAL *AXPBYP_DOW(REAL a, const REAL_D x, REAL b, const REAL_D y,
316 REAL_D z)
317 {
318 int i;
319
320 for (i = 0; i < DIM_OF_WORLD; i++) {
321 z[i] += b*y[i] + a * x[i];
322 }
323 return z;
324 }
325
MAXPBYP_DOW(REAL a,const REAL_DD x,REAL b,const REAL_DD y,REAL_DD z)326 static inline REAL_D *MAXPBYP_DOW(REAL a, const REAL_DD x,
327 REAL b, const REAL_DD y,
328 REAL_DD z)
329 {
330 int i;
331
332 for (i = 0; i < DIM_OF_WORLD; i++) {
333 AXPBYP_DOW(a, x[i], b, y[i], z[i]);
334 }
335 return z;
336 }
337
338 #define DMAXPBYP_DOW(a, x, b, y, z) AXPBYP_DOW(a, x, b, y, z)
339 #define SCMAXPBYP_DOW(a, x, b, y, z) ((z) += (a)*(x) + (b)*(y))
340
AXPBYPCZP_DOW(REAL a,const REAL_D x,REAL b,const REAL_D y,REAL c,const REAL_D z,REAL_D w)341 static inline REAL *AXPBYPCZP_DOW(REAL a, const REAL_D x,
342 REAL b, const REAL_D y,
343 REAL c, const REAL_D z,
344 REAL_D w)
345 {
346 int i;
347
348 for (i = 0; i < DIM_OF_WORLD; i++) {
349 w[i] += c*z[i] + b*y[i] + a*x[i];
350 }
351 return w;
352 }
353
MAXPBYPCZP_DOW(REAL a,const REAL_DD x,REAL b,const REAL_DD y,REAL c,const REAL_DD z,REAL_DD w)354 static inline REAL_D *MAXPBYPCZP_DOW(REAL a, const REAL_DD x,
355 REAL b, const REAL_DD y,
356 REAL c, const REAL_DD z,
357 REAL_DD w)
358 {
359 int i;
360
361 for (i = 0; i < DIM_OF_WORLD; i++) {
362 AXPBYPCZP_DOW(a, x[i], b, y[i], c, z[i], w[i]);
363 }
364 return w;
365 }
366
367 #define DMAXPBYPCZP_DOW(a, x, b, y, z) \
368 AXPBYPCZP_DOW(a, x, b, y, z)
369 #define SCMAXPBYPCZP_DOW(a, x, b, y, c, z, w) \
370 ((w) += (a)*(x) + (b)*(y) + (c)*(z))
371
372 /***********************/
373 /* Matrix - Matrix addition */
374 #define MMAXPY_DOW(s, a, b) MAXPY_DOW(s, a, b)
375 #define DMDMAXPY_DOW(s, a, b) DMAXPY_DOW(s, a, b)
376 #define SCMSCMAXPY_DOW(s, a, b) SCMAXPY_DOW(s, a, b)
377
378 /* Transpose addition, only different for the MM case */
379 #define MMAXTPY_DOW(s, a, b) MAXTPY_DOW(s, a, b)
380 #define MDMAXTPY_DOW(s, a, b) MDMAXPY_DOW(s, a, b)
381 #define MSCMAXTPY_DOW(s, a, b) MSCMAXPY_DOW(s, a, b)
382
383 #define DMDMAXTPY_DOW(s, a, b) DMAXPY_DOW(s, a, b)
384 #define DMSCMAXTPY_DOW(s, a, b) DMSCMAXPY_DOW(s, a, b)
385
386 #define SCMSCMAXTPY_DOW(s, a, b) SCMAXPY_DOW(s, a, b)
387
MDMAXPY_DOW(REAL a,const REAL_D x,REAL_DD y)388 static inline REAL_D *MDMAXPY_DOW(REAL a, const REAL_D x, REAL_DD y)
389 {
390 int i;
391
392 for (i = 0; i < DIM_OF_WORLD; i++) {
393 y[i][i] += a * x[i];
394 }
395
396 return y;
397 }
398
MSCMAXPY_DOW(REAL a,const REAL x,REAL_DD y)399 static inline REAL_D *MSCMAXPY_DOW(REAL a, const REAL x, REAL_DD y)
400 {
401 int i;
402
403 for (i = 0; i < DIM_OF_WORLD; i++) {
404 y[i][i] += a * x;
405 }
406
407 return y;
408 }
409
DMSCMAXPY_DOW(REAL a,REAL x,REAL * y)410 static inline REAL *DMSCMAXPY_DOW(REAL a, REAL x, REAL *y)
411 {
412 int i;
413
414 for (i = 0; i < DIM_OF_WORLD; i++) {
415 y[i] += a*x;
416 }
417
418 return y;
419 }
420
421 /* Matrix - Matrix initialization */
422 #define MMAXEY_DOW(s, a, b) MAXEY_DOW(s, a, b)
423 #define DMDMAXEY_DOW(s, a, b) DMAXEY_DOW(s, a, b)
424 #define SCMSCMAXEY_DOW(s, a, b) SCMAXEY_DOW(s, a, b)
425
MDMAXEY_DOW(REAL a,const REAL_D x,REAL_DD y)426 static inline REAL_D *MDMAXEY_DOW(REAL a, const REAL_D x, REAL_DD y)
427 {
428 int i, j;
429
430 for (i = 0; i < DIM_OF_WORLD; i++) {
431 y[i][i] = a * x[i];
432 for (j = i+1; j < DIM_OF_WORLD; j++) {
433 y[i][j] = y[j][i] = 0.0;
434 }
435 }
436
437 return y;
438 }
439
MSCMAXEY_DOW(REAL a,const REAL x,REAL_DD y)440 static inline REAL_D *MSCMAXEY_DOW(REAL a, const REAL x, REAL_DD y)
441 {
442 int i, j;
443
444 for (i = 0; i < DIM_OF_WORLD; i++) {
445 y[i][i] = a * x;
446 for (j = i+1; j < DIM_OF_WORLD; j++) {
447 y[i][j] = y[j][i] = 0.0;
448 }
449 }
450
451 return y;
452 }
453
DMSCMAXEY_DOW(REAL a,REAL x,REAL * y)454 static inline REAL *DMSCMAXEY_DOW(REAL a, REAL x, REAL *y)
455 {
456 int i;
457
458 for (i = 0; i < DIM_OF_WORLD; i++) {
459 y[i] = a*x;
460 }
461
462 return y;
463 }
464
465 /* Matrix - Matrix initialization */
466 #define MMMAXPBY_DOW(a, x, b, y, z) MAXPBY_DOW(a, x, b, y, z)
467 #define DMDMDMAXPBY_DOW(a, x, b, y, z) DMAXPBY_DOW(a, x, b, y, z)
468 #define SCMSCMSCMAXPBY_DOW(a, x, b, y, z) SCMAXPBY_DOW(a, x, b, y, z)
469
MMDMAXPBY_DOW(REAL a,const REAL_DD x,REAL b,const REAL_D y,REAL_DD z)470 static inline REAL_D *MMDMAXPBY_DOW(REAL a, const REAL_DD x,
471 REAL b, const REAL_D y,
472 REAL_DD z)
473 {
474 int i, j;
475
476 for (i = 0; i < DIM_OF_WORLD; i++) {
477 z[i][i] = a * x[i][i] + b * y[i];
478 for (j = i+1; j < DIM_OF_WORLD; j++) {
479 z[i][j] = a * x[i][j];
480 z[j][i] = a * x[j][i];
481 }
482 }
483
484 return z;
485 }
486
MMSCMAXPBY_DOW(REAL a,const REAL_DD x,REAL b,const REAL y,REAL_DD z)487 static inline REAL_D *MMSCMAXPBY_DOW(REAL a, const REAL_DD x,
488 REAL b, const REAL y,
489 REAL_DD z)
490 {
491 int i, j;
492
493 for (i = 0; i < DIM_OF_WORLD; i++) {
494 z[i][i] = a * x[i][i] + b * y;
495 for (j = i+1; j < DIM_OF_WORLD; j++) {
496 z[i][j] = a * x[i][j];
497 z[j][i] = a * x[j][i];
498 }
499 }
500
501 return z;
502 }
503
MDMDMAXPBY_DOW(REAL a,const REAL_D x,REAL b,const REAL_D y,REAL_DD z)504 static inline REAL_D *MDMDMAXPBY_DOW(REAL a, const REAL_D x,
505 REAL b, const REAL_D y,
506 REAL_DD z)
507 {
508 int i, j;
509
510 for (i = 0; i < DIM_OF_WORLD; i++) {
511 z[i][i] = a * x[i] + b * y[i];
512 for (j = i+1; j < DIM_OF_WORLD; j++) {
513 z[i][j] = z[j][i] = 0.0;
514 }
515 }
516
517 return z;
518 }
519
MDMSCMAXPBY_DOW(REAL a,REAL * x,REAL b,REAL y,REAL_DD z)520 static inline REAL_D *MDMSCMAXPBY_DOW(REAL a, REAL *x,
521 REAL b, REAL y,
522 REAL_DD z)
523 {
524 int i, j;
525
526 for (i = 0; i < DIM_OF_WORLD; i++) {
527 z[i][i] = a * x[i] + b * y;
528 for (j = i+1; j < DIM_OF_WORLD; j++) {
529 z[i][j] = z[j][i] = 0.0;
530 }
531 }
532
533 return z;
534 }
535
MSCMSCMAXPBY_DOW(REAL a,REAL x,REAL b,REAL y,REAL_DD z)536 static inline REAL_D *MSCMSCMAXPBY_DOW(REAL a, REAL x,
537 REAL b, REAL y,
538 REAL_DD z)
539 {
540 int i, j;
541
542 for (i = 0; i < DIM_OF_WORLD; i++) {
543 z[i][i] = a * x + b * y;
544 for (j = i+1; j < DIM_OF_WORLD; j++) {
545 z[i][j] = z[j][i] = 0.0;
546 }
547 }
548
549 return z;
550 }
551
DMDMSCMAXPBY_DOW(REAL a,const REAL_D x,REAL b,const REAL y,REAL_D z)552 static inline REAL *DMDMSCMAXPBY_DOW(REAL a, const REAL_D x,
553 REAL b, const REAL y,
554 REAL_D z)
555 {
556 int i;
557
558 for (i = 0; i < DIM_OF_WORLD; i++) {
559 z[i] = a * x[i] + b * y;
560 }
561 return z;
562 }
563
DMSCMSCMAXPBY_DOW(REAL a,const REAL x,REAL b,const REAL y,REAL_D z)564 static inline REAL *DMSCMSCMAXPBY_DOW(REAL a, const REAL x,
565 REAL b, const REAL y,
566 REAL_D z)
567 {
568 int i;
569
570 for (i = 0; i < DIM_OF_WORLD; i++) {
571 z[i] = a * x + b * y;
572 }
573 return z;
574 }
575
576 /* Matrix - Matrix copy */
577 #define MMCOPY_DOW(a, b) MCOPY_DOW(a, b)
578 #define MDMCOPY_DOW(a, b) MDMAXEY_DOW(1.0, a, b)
579 #define MSCMCOPY_DOW(a, b) MSCMAXEY_DOW(1.0, a, b)
580
581 #define DMDMCOPY_DOW(a, b) DMCOPY_DOW(a, b)
582 #define DMSCMCOPY_DOW(a, b) DMSCMAXEY_DOW(1.0, a, b)
583
584 #define SCMSCMCOPY_DOW(a, b) (b) = (a)
585
586 /***********************/
587
COPY_DOW(const REAL_D x,REAL_D y)588 static inline REAL *COPY_DOW(const REAL_D x, REAL_D y)
589 {
590 memcpy(y, x, sizeof(REAL_D));
591 return y;
592 }
593
MCOPY_DOW(const REAL_DD x,REAL_DD y)594 static inline REAL_D *MCOPY_DOW(const REAL_DD x, REAL_DD y)
595 {
596 memcpy(y, x, sizeof(REAL_DD));
597 return y;
598 }
599
600 #define DMCOPY_DOW(src, dst) COPY_DOW(src, dst)
601
DST2_DOW(const REAL_D x,const REAL_D y)602 static inline REAL DST2_DOW(const REAL_D x, const REAL_D y)
603 {
604 # if DIM_OF_WORLD == 1
605 return SQR(ABS(x[0] - y[0]));
606 # else
607 int i;
608 REAL accu;
609
610 accu = SQR(x[0] - y[0]);
611 for (i = 1; i < DIM_OF_WORLD; i++) {
612 accu += SQR(x[i] - y[i]);
613 }
614 return accu;
615 # endif
616 }
617
MDST2_DOW(const REAL_DD a,const REAL_DD b)618 static inline REAL MDST2_DOW(const REAL_DD a, const REAL_DD b)
619 {
620 int i;
621 REAL res;
622
623 res = DST2_DOW(a[0], b[0]);
624 for (i = 1; i < DIM_OF_WORLD; i++) {
625 res += DST2_DOW(a[i], b[i]);
626 }
627 return res;
628 }
629
630 #define DMDST2_DOW(x, y) DST2_DOW(x, y)
631
NRM2_DOW(const REAL_D x)632 static inline REAL NRM2_DOW(const REAL_D x)
633 {
634 int i;
635 REAL accu;
636
637 accu = SQR(x[0]);
638 for (i = 1; i < DIM_OF_WORLD; i++) {
639 accu += SQR(x[i]);
640 }
641 return accu;
642 }
643
MNRM2_DOW(const REAL_DD m)644 static inline REAL MNRM2_DOW(const REAL_DD m)
645 {
646 int i;
647 REAL res;
648
649 res = NRM2_DOW(m[0]);
650 for (i = 1; i < DIM_OF_WORLD; i++) {
651 res += NRM2_DOW(m[i]);
652 }
653 return res;
654 }
655
656 #define DMNRM2_DOW(m) NRM2_DOW(x)
657
NORM1_DOW(const REAL_D x)658 static inline REAL NORM1_DOW(const REAL_D x)
659 {
660 int i;
661 REAL sum;
662
663 sum = fabs(x[0]);
664 for (i = 1; i < DIM_OF_WORLD; i++) {
665 sum += fabs(x[i]);
666 }
667
668 return sum;
669 }
670
DIST1_DOW(const REAL_D x,const REAL_D y)671 static inline REAL DIST1_DOW(const REAL_D x, const REAL_D y)
672 {
673 int i;
674 REAL sum;
675
676 sum = fabs(x[0]-y[0]);
677 for (i = 1; i < DIM_OF_WORLD; i++) {
678 sum += fabs(x[i]-y[i]);
679 }
680
681 return sum;
682 }
683
NORM8_DOW(const REAL_D x)684 static inline REAL NORM8_DOW(const REAL_D x)
685 {
686 int i;
687 REAL max;
688
689 max = fabs(x[0]);
690 for (i = 1; i < DIM_OF_WORLD; i++) {
691 max = MAX(max, fabs(x[i]));
692 }
693
694 return max;
695 }
696
DIST8_DOW(const REAL_D x,const REAL_D y)697 static inline REAL DIST8_DOW(const REAL_D x, const REAL_D y)
698 {
699 int i;
700 REAL max;
701
702 max = fabs(x[0]-y[0]);
703 for (i = 1; i < DIM_OF_WORLD; i++) {
704 max = MAX(max, fabs(x[i]-y[i]));
705 }
706
707 return max;
708 }
709
SUM_DOW(const REAL_D x)710 static inline REAL SUM_DOW(const REAL_D x)
711 {
712 int i;
713 REAL sum;
714
715 sum = x[0];
716 for (i = 1; i < DIM_OF_WORLD; i++) {
717 sum += x[i];
718 }
719
720 return sum;
721 }
722
PNRMP_DOW(const REAL_D x,REAL p)723 static inline REAL PNRMP_DOW(const REAL_D x, REAL p)
724 {
725 int i;
726 REAL sum;
727
728 sum = pow(fabs(x[0]), p);
729 for (i = 1; i < DIM_OF_WORLD; i++) {
730 sum += pow(fabs(x[i]), p);
731 }
732
733 return sum;
734 }
735
NRMP_DOW(const REAL_D x,REAL p)736 static inline REAL NRMP_DOW(const REAL_D x, REAL p)
737 {
738 return pow(PNRMP_DOW(x, p), 1.0/p);
739 }
740
MNORM1_DOW(const REAL_DD x)741 static inline REAL MNORM1_DOW(const REAL_DD x)
742 {
743 int i;
744 REAL sum;
745
746 sum = NORM1_DOW(x[0]);
747 for (i = 1; i < DIM_OF_WORLD; i++) {
748 sum += NORM1_DOW(x[0]);
749 }
750
751 return sum;
752 }
753
MDIST1_DOW(const REAL_DD x,const REAL_DD y)754 static inline REAL MDIST1_DOW(const REAL_DD x, const REAL_DD y)
755 {
756 int i;
757 REAL sum;
758
759 sum = DIST1_DOW(x[0], y[0]);
760 for (i = 1; i < DIM_OF_WORLD; i++) {
761 sum += DIST1_DOW(x[i], y[i]);
762 }
763
764 return sum;
765 }
766
MNORM8_DOW(const REAL_DD x)767 static inline REAL MNORM8_DOW(const REAL_DD x)
768 {
769 int i;
770 REAL max;
771
772 max = NORM8_DOW(x[0]);
773 for (i = 1; i < DIM_OF_WORLD; i++) {
774 max = MAX(max, NORM8_DOW(x[i]));
775 }
776
777 return max;
778 }
779
MDIST8_DOW(const REAL_DD x,const REAL_DD y)780 static inline REAL MDIST8_DOW(const REAL_DD x, const REAL_DD y)
781 {
782 int i;
783 REAL max;
784
785 max = DIST8_DOW(x[0], y[0]);
786 for (i = 1; i < DIM_OF_WORLD; i++) {
787 max = MAX(max, DIST8_DOW(x[i], y[i]));
788 }
789
790 return max;
791 }
792
MSUM_DOW(const REAL_DD x)793 static inline REAL MSUM_DOW(const REAL_DD x)
794 {
795 int i;
796 REAL sum;
797
798 sum = SUM_DOW(x[0]);
799 for (i = 1; i < DIM_OF_WORLD; i++) {
800 sum += SUM_DOW(x[0]);
801 }
802
803 return sum;
804 }
805
MPNRMP_DOW(const REAL_DD x,REAL p)806 static inline REAL MPNRMP_DOW(const REAL_DD x, REAL p)
807 {
808 int i;
809 REAL sum;
810
811 sum = PNRMP_DOW(x[0], p);
812 for (i = 1; i < DIM_OF_WORLD; i++) {
813 sum += PNRMP_DOW(x[i], p);
814 }
815
816 return sum;
817 }
818
MNRMP_DOW(const REAL_DD x,REAL p)819 static inline REAL MNRMP_DOW(const REAL_DD x, REAL p)
820 {
821 return pow(MPNRMP_DOW(x, p), 1.0/p);
822 }
823
SCP_DOW(const REAL_D x,const REAL_D y)824 static inline REAL SCP_DOW(const REAL_D x, const REAL_D y)
825 {
826 REAL res;
827 int i;
828
829 res = x[0] * y[0];
830 for (i = 1; i < DIM_OF_WORLD; i++) {
831 res += x[i]*y[i];
832
833 }
834 return res;
835 }
836
GRAMSCP_DOW(const REAL_DD M,const REAL_D x,const REAL_D y)837 static inline REAL GRAMSCP_DOW(const REAL_DD M, const REAL_D x, const REAL_D y)
838 {
839 REAL res = 0.0;
840 int i, j;
841
842 for (i = 0; i < DIM_OF_WORLD; i++) {
843 for (j = 0; j < DIM_OF_WORLD; j++) {
844 res += x[i] * M[i][j] * y[j];
845 }
846 }
847 return res;
848 }
849 #define MGRAMSCP_DOW(M, x, y) GRAMSCP_DOW(M, x, y)
850
DMGRAMSCP_DOW(const REAL_D M,const REAL_D x,const REAL_D y)851 static inline REAL DMGRAMSCP_DOW(const REAL_D M, const REAL_D x, const REAL_D y)
852 {
853 REAL res = 0.0;
854 int i;
855
856 for (i = 0; i < DIM_OF_WORLD; i++) {
857 res += x[i] * M[i] * y[i];
858 }
859 return res;
860 }
861
SCMGRAMSCP_DOW(REAL s,const REAL_D x,const REAL_D y)862 static inline REAL SCMGRAMSCP_DOW(REAL s, const REAL_D x, const REAL_D y)
863 {
864 return s*SCP_DOW(x, y);
865 }
866
MTV_DOW(const REAL_DD m,const REAL_D v,REAL_D b)867 static inline REAL *MTV_DOW(const REAL_DD m, const REAL_D v, REAL_D b)
868 {
869 int i, j;
870
871 for (i = 0; i < DIM_OF_WORLD; i++) {
872 for (j = 0; j < DIM_OF_WORLD; j++) {
873 b[i] += m[j][i] * v[j];
874 }
875 }
876 return b;
877 }
878
879 #define SCMV_DOW(m, v, b) AXPY_DOW(m, v, b)
880 #define DMTV_DOW(m, v, b) DMV_DOW(m, v, b)
881 #define SCMTV_DOW(m, v, b) SCMV_DOW(m, v, b)
882
MDIV_DOW(const REAL_DD m,const REAL_D v,REAL_D b)883 static inline REAL *MDIV_DOW(const REAL_DD m, const REAL_D v, REAL_D b)
884 {
885 int i;
886
887 for (i = 0; i < DIM_OF_WORLD; i++) {
888 b[i] = v[i] / m[i][i];
889 }
890 return b;
891 }
892
DMDIV_DOW(const REAL_D m,const REAL_D y,REAL_D r)893 static inline REAL *DMDIV_DOW(const REAL_D m, const REAL_D y, REAL_D r)
894 {
895 int i;
896 for (i = 0; i < DIM_OF_WORLD; i++) {
897 r[i] = y[i] / m[i];
898 }
899 return r;
900 }
901
902 #define SCMDIV_DOW(m, y, r) AXEY_DOW(1.0/(m), y, r)
903
DMV_DOW(const REAL_D x,const REAL_D y,REAL_D r)904 static inline REAL *DMV_DOW(const REAL_D x, const REAL_D y, REAL_D r)
905 {
906 int i;
907 for (i = 0; i < DIM_OF_WORLD; i++) {
908 r[i] += x[i]*y[i];
909 }
910 return r;
911 }
912
MV_DOW(const REAL_DD m,const REAL_D v,REAL_D b)913 static inline REAL *MV_DOW(const REAL_DD m, const REAL_D v, REAL_D b)
914 {
915 int i;
916
917 for (i = 0; i < DIM_OF_WORLD; i++) {
918 b[i] += SCP_DOW(m[i], v);
919 }
920 return b;
921 }
922
MVEQ_DOW(const REAL_DD m,const REAL_D v,REAL_D b)923 static inline REAL *MVEQ_DOW(const REAL_DD m, const REAL_D v, REAL_D b)
924 {
925 int i;
926
927 for (i = 0; i < DIM_OF_WORLD; i++) {
928 b[i] = SCP_DOW(m[i], v);
929 }
930 return b;
931 }
932
MTVEQ_DOW(const REAL_DD m,const REAL_D v,REAL_D b)933 static inline REAL *MTVEQ_DOW(const REAL_DD m, const REAL_D v, REAL_D b)
934 {
935 int i, j;
936
937 for (i = 0; i < DIM_OF_WORLD; i++) {
938 b[i] = 0.0;
939 for (j = 0; j < DIM_OF_WORLD; j++) {
940 b[i] += m[j][i] * v[j];
941 }
942 }
943 return b;
944 }
945
DMVEQ_DOW(const REAL_D x,const REAL_D y,REAL_D r)946 static inline REAL *DMVEQ_DOW(const REAL_D x, const REAL_D y, REAL_D r)
947 {
948 int i;
949 for (i = 0; i < DIM_OF_WORLD; i++) {
950 r[i] = x[i]*y[i];
951 }
952 return r;
953 }
954
955 #define SCMVEQ_DOW(m, v, b) AXEY_DOW(m, v, b)
956
957 static inline REAL *
MMBIMV_DOW(REAL a,const REAL_DD A,REAL b,const REAL_DD B,const REAL_D v,REAL c,REAL_D w)958 MMBIMV_DOW(REAL a, const REAL_DD A, REAL b, const REAL_DD B, const REAL_D v,
959 REAL c, REAL_D w)
960 {
961 int i, j;
962 REAL sum;
963
964 for (i = 0; i < DIM_OF_WORLD; i++) {
965 sum = 0.0;
966 for (j = 0; j < DIM_OF_WORLD; j++) {
967 sum += (a * A[i][j] + b * B[i][j]) * v[j];
968 }
969 w[i] = c*w[i] + sum;
970 }
971 return w;
972 }
973
974 static inline REAL *
MDMBIMV_DOW(REAL a,const REAL_DD A,REAL b,const REAL_D B,const REAL_D v,REAL c,REAL_D w)975 MDMBIMV_DOW(REAL a, const REAL_DD A, REAL b, const REAL_D B, const REAL_D v,
976 REAL c, REAL_D w)
977 {
978 int i, j;
979 REAL sum;
980
981 for (i = 0; i < DIM_OF_WORLD; i++) {
982 sum = 0.0;
983 for (j = 0; j < DIM_OF_WORLD; j++) {
984 sum += a * A[i][j] * v[j];
985 }
986 w[i] = c*w[i] + sum + b * B[i] * v[i];
987 }
988 return w;
989 }
990
991 static inline REAL *
DMMBIMV_DOW(REAL a,const REAL_D A,REAL b,const REAL_DD B,const REAL_D v,REAL c,REAL_D w)992 DMMBIMV_DOW(REAL a, const REAL_D A, REAL b, const REAL_DD B, const REAL_D v,
993 REAL c, REAL_D w)
994 {
995 return MDMBIMV_DOW(b, B, a, A, v, c, w);
996 }
997
998 static inline REAL *
MSCMBIMV_DOW(REAL a,const REAL_DD A,REAL b,REAL B,const REAL_D v,REAL c,REAL_D w)999 MSCMBIMV_DOW(REAL a, const REAL_DD A, REAL b, REAL B, const REAL_D v,
1000 REAL c, REAL_D w)
1001 {
1002 int i, j;
1003 REAL sum;
1004
1005 for (i = 0; i < DIM_OF_WORLD; i++) {
1006 sum = 0.0;
1007 for (j = 0; j < DIM_OF_WORLD; j++) {
1008 sum += a * A[i][j] * v[j];
1009 }
1010 w[i] = c*w[i] + sum + b * B * v[i];
1011 }
1012 return w;
1013 }
1014
1015
1016 static inline REAL *
SCMMBIMV_DOW(REAL a,REAL A,REAL b,const REAL_DD B,const REAL_D v,REAL c,REAL_D w)1017 SCMMBIMV_DOW(REAL a, REAL A, REAL b, const REAL_DD B, const REAL_D v,
1018 REAL c, REAL_D w)
1019 {
1020 return MSCMBIMV_DOW(b, B, a, A, v, c, w);
1021 }
1022
1023 static inline REAL *
DMDMBIMV_DOW(REAL a,const REAL_D A,REAL b,const REAL_D B,const REAL_D v,REAL c,REAL_D w)1024 DMDMBIMV_DOW(REAL a, const REAL_D A, REAL b, const REAL_D B, const REAL_D v,
1025 REAL c, REAL_D w)
1026 {
1027 int i;
1028
1029 for (i = 0; i < DIM_OF_WORLD; i++) {
1030 w[i] = c*w[i] + (a * A[i] + b * B[i]) * v[i];
1031 }
1032 return w;
1033 }
1034
1035 static inline REAL *
DMSCMBIMV_DOW(REAL a,const REAL_D A,REAL b,REAL B,const REAL_D v,REAL c,REAL_D w)1036 DMSCMBIMV_DOW(REAL a, const REAL_D A, REAL b, REAL B, const REAL_D v,
1037 REAL c, REAL_D w)
1038 {
1039 int i;
1040
1041 for (i = 0; i < DIM_OF_WORLD; i++) {
1042 w[i] = c*w[i] + (a * A[i] + b * B) * v[i];
1043 }
1044 return w;
1045 }
1046
1047 static inline REAL *
SCMDMBIMV_DOW(REAL a,REAL A,REAL b,const REAL_D B,const REAL_D v,REAL c,REAL_D w)1048 SCMDMBIMV_DOW(REAL a, REAL A, REAL b, const REAL_D B, const REAL_D v,
1049 REAL c, REAL_D w)
1050 {
1051 return DMSCMBIMV_DOW(b, B, a, A, v, c, w);
1052 }
1053
1054 static inline REAL *
SCMSCMBIMV_DOW(REAL a,REAL A,REAL b,REAL B,const REAL_D v,REAL c,REAL_D w)1055 SCMSCMBIMV_DOW(REAL a, REAL A, REAL b, REAL B, const REAL_D v,
1056 REAL c, REAL_D w)
1057 {
1058 return AXPBY_DOW(a*A + b*B, v, c, w, w);
1059 }
1060
MGEMV_DOW(REAL a,const REAL_DD m,const REAL_D v,REAL beta,REAL_D b)1061 static inline REAL *MGEMV_DOW(REAL a, const REAL_DD m,
1062 const REAL_D v, REAL beta,
1063 REAL_D b)
1064 {
1065 int i;
1066
1067 for (i = 0; i < DIM_OF_WORLD; i++) {
1068 b[i] = beta*b[i] + a * SCP_DOW(m[i], v);
1069 }
1070 return b;
1071 }
1072
1073 /* Same as above, but without diagonal. */
MGEMV_ND_DOW(REAL a,const REAL_DD m,const REAL_D v,REAL beta,REAL_D b)1074 static inline REAL *MGEMV_ND_DOW(REAL a, const REAL_DD m,
1075 const REAL_D v, REAL beta,
1076 REAL_D b)
1077 {
1078 int i, j;
1079
1080 for (i = 0; i < DIM_OF_WORLD; i++) {
1081 REAL tmp = 0.0;
1082 for (j = 0; j < DIM_OF_WORLD; j++) {
1083 if (i == j) {
1084 continue;
1085 }
1086 tmp += m[i][j] * v[j];
1087 }
1088 b[i] = beta*b[i] + a * tmp;
1089 }
1090 return b;
1091 }
1092
1093 #define GEMV_DOW(a, m, v, beta, b) MGEMV_DOW(a, m, v, beta, b)
1094 #define GEMV_ND_DOW(a, m, v, beta, b) MGEMV_ND_DOW(a, m, v, beta, b)
1095
DMGEMV_DOW(REAL a,const REAL_D x,const REAL_D y,REAL beta,REAL_D r)1096 static inline REAL *DMGEMV_DOW(REAL a, const REAL_D x, const REAL_D y,
1097 REAL beta, REAL_D r)
1098 {
1099 int i;
1100 for (i = 0; i < DIM_OF_WORLD; i++) {
1101 r[i] = beta*r[i] + a*x[i]*y[i];
1102 }
1103 return r;
1104 }
1105
DMGEMV_ND_DOW(REAL a,const REAL_D x,const REAL_D y,REAL beta,REAL_D r)1106 static inline REAL *DMGEMV_ND_DOW(REAL a, const REAL_D x, const REAL_D y,
1107 REAL beta, REAL_D r)
1108 {
1109 int i;
1110 for (i = 0; i < DIM_OF_WORLD; i++) {
1111 r[i] *= beta;
1112 }
1113 return r;
1114 }
1115
MGEMTV_DOW(REAL a,const REAL_DD m,const REAL_D v,REAL beta,REAL_D b)1116 static inline REAL *MGEMTV_DOW(REAL a, const REAL_DD m,
1117 const REAL_D v, REAL beta,
1118 REAL_D b)
1119 {
1120 int i, j;
1121 REAL tmp;
1122
1123 for (i = 0; i < DIM_OF_WORLD; i++) {
1124 b[i] *= beta;
1125 tmp = m[0][i] * v[0];
1126 for (j = 1; j < DIM_OF_WORLD; j++) {
1127 tmp += m[j][i] * v[j];
1128 }
1129 b[i] += a*tmp;
1130 }
1131 return b;
1132 }
1133
1134 #define GEMTV_DOW(a, m, v, beta, b) MGEMTV_DOW(a, m, v, beta, b)
1135
SCMGEMV_DOW(REAL a,REAL m,const REAL_D v,REAL beta,REAL_D b)1136 static inline REAL *SCMGEMV_DOW(REAL a, REAL m, const REAL_D v, REAL beta,
1137 REAL_D b)
1138 {
1139 int i;
1140
1141 m *= a;
1142 for (i = 0; i < DIM_OF_WORLD; i++) {
1143 b[i] *= beta;
1144 b[i] += m*v[i];
1145 }
1146 return b;
1147 }
1148
SCMGEMV_ND_DOW(REAL a,REAL m,const REAL_D v,REAL beta,REAL_D b)1149 static inline REAL *SCMGEMV_ND_DOW(REAL a, REAL m, const REAL_D v, REAL beta,
1150 REAL_D b)
1151 {
1152 int i;
1153
1154 for (i = 0; i < DIM_OF_WORLD; i++) {
1155 b[i] *= beta;
1156 }
1157 return b;
1158 }
1159
1160 #define DMGEMTV_DOW(a, m, v, beta, b) DMGEMV_DOW(a, m, v, beta, b)
1161 #define SCMGEMTV_DOW(a, m, v, beta, b) SCMGEMV_DOW(a, m, v, beta, b)
1162
MSCP_DOW(const REAL_DD x,const REAL_DD y)1163 static inline REAL MSCP_DOW(const REAL_DD x, const REAL_DD y)
1164 {
1165 REAL res;
1166 int i;
1167
1168 res = SCP_DOW(x[0], y[0]);
1169 for (i = 1; i < DIM_OF_WORLD; i++) {
1170 res += SCP_DOW(x[i], y[i]);
1171 }
1172 return res;
1173 }
1174
1175 #define DMSCP_DOW(x, y) SCP_DOW(x, y)
1176
SET_DOW(REAL val,REAL_D x)1177 static inline REAL *SET_DOW(REAL val, REAL_D x)
1178 {
1179 int i;
1180 for (i = 0; i < DIM_OF_WORLD; i++) {
1181 x[i] = val;
1182 }
1183 return x;
1184 }
1185
MSET_DOW(REAL val,REAL_DD m)1186 static inline REAL_D *MSET_DOW(REAL val, REAL_DD m)
1187 {
1188 int i, j;
1189
1190 for (i = 0; i < DIM_OF_WORLD; i++) {
1191 m[i][i] = val;
1192 for (j = i+1; j < DIM_OF_WORLD; j++) {
1193 m[j][i] = m[i][j] = 0.0;
1194 }
1195 }
1196 return m;
1197 }
1198
1199 #define DMSET_DOW(val, m) SET_DOW(val, m)
1200 #define SCMSET_DOW(val, m) (m) = (val)
1201
CMP_DOW(REAL val,const REAL_D a)1202 static inline bool CMP_DOW(REAL val, const REAL_D a)
1203 {
1204 int i;
1205
1206 for (i = 0; i < DIM_OF_WORLD; i++) {
1207 if (a[i] != val) {
1208 return false;
1209 }
1210 }
1211 return true;
1212 }
1213
MCMP_DOW(REAL val,const REAL_DD a)1214 static inline bool MCMP_DOW(REAL val, const REAL_DD a)
1215 {
1216 int i, j;
1217
1218 for (i = 0; i < DIM_OF_WORLD; i++) {
1219 if (a[i][i] != val) {
1220 return false;
1221 }
1222 for (j = i+1; j < DIM_OF_WORLD; j++) {
1223 if (a[i][j] != 0.0 || a[j][i] != 0.0) {
1224 return false;
1225 }
1226 }
1227 }
1228 return true;
1229 }
1230
1231 #define DMCMP_DOW(val, m) CMP_DOW(val, m)
1232 #define SCMCMP_DOW(val, m) ((m) == (val))
1233
1234 #if DIM_OF_WORLD == 2
WEDGE_DOW(const REAL_D a,const REAL_D b)1235 static inline REAL WEDGE_DOW(const REAL_D a, const REAL_D b)
1236 {
1237 return a[0]*b[1] - a[1]*b[0];
1238 }
1239 #endif
1240
1241 #if DIM_OF_WORLD == 3
WEDGE_DOW(const REAL_D a,const REAL_D b,REAL_D r)1242 static inline REAL *WEDGE_DOW(const REAL_D a, const REAL_D b, REAL_D r)
1243 {
1244 r[0] = a[1]*b[2] - a[2]*b[1];
1245 r[1] = a[2]*b[0] - a[0]*b[2];
1246 r[2] = a[0]*b[1] - a[1]*b[0];
1247 return r;
1248 }
1249 #endif
1250
1251 #define MAT_SWITCH_TYPE(type, body_f, body_d, body_sc) \
1252 switch (type) { \
1253 case MATENT_REAL_DD: body_f; break; \
1254 case MATENT_REAL_D: body_d; break; \
1255 case MATENT_REAL: body_sc; break; \
1256 default: ERROR_EXIT("Unknown MATENT_TYPE (%d)\n", type); \
1257 }
1258
1259 /* MAT_BODY(PFX, CONSTCAST, CAST, SUF, TYPE) is supposed to be a
1260 * "multiplex" macro where BLAS routines are accessed via
1261 " * PFX##AXPY_DOW(..., CONSTCAST var##SUF, ...)
1262 *
1263 * PFX is one of M, DM, SCM
1264 * CAST and CONSTCAST specify type-casts s.t. the ...._DOW() functions
1265 * compile without error.
1266 * SUF is the suffix attached to some block-matrix types, one of
1267 * real, real_d, real_dd
1268 * TYPE is the actual type corresponding to SUF, one of
1269 * REAL, RELA_D, REAL_DD
1270 */
1271 #define MAT_EMIT_BODY_SWITCH(type) \
1272 MAT_SWITCH_TYPE( \
1273 type, \
1274 MAT_BODY(M, (const REAL_D *), (REAL_D *), real_dd, REAL_DD), \
1275 MAT_BODY(DM, , , real_d, REAL_D), \
1276 MAT_BODY(SCM, , , real, REAL))
1277
1278
1279 /* BI_MAT_BODY(PFX1, CONSTCAST1, CAST1, SUF1, TYPE1,
1280 * PFX2, CONSTCAST2, CAST2, SUF2, TYPE2)
1281 *
1282 * is supposed to be a "multiplex" macro where BLAS routines are
1283 * accessed via " * PFX##AXPY_DOW(..., CONSTCAST var##SUF, ...) etc.
1284 */
1285 #define MAT_EMIT_BI_BODY_SWITCH(type1, type2) \
1286 MAT_SWITCH_TYPE( \
1287 type1, \
1288 MAT_SWITCH_TYPE( \
1289 type2, \
1290 MAT_BI_BODY(M, (const REAL_D *), (REAL_D *), real_dd, REAL_DD, \
1291 M, (const REAL_D *), (REAL_D *), real_dd, REAL_DD), \
1292 MAT_BI_BODY(M, (const REAL_D *), (REAL_D *), real_dd, REAL_DD, \
1293 DM, , , real_d, REAL_D), \
1294 MAT_BI_BODY(M, (const REAL_D *), (REAL_D *), real_dd, REAL_DD, \
1295 SCM, , , real, REAL)), \
1296 MAT_SWITCH_TYPE( \
1297 type2, \
1298 MAT_BI_BODY(DM, , , real_d, REAL_D, \
1299 M, (const REAL_D *), (REAL_D *), real_dd, REAL_DD), \
1300 MAT_BI_BODY(DM, , , real_d, REAL_D, \
1301 DM, , , real_d, REAL_D), \
1302 MAT_BI_BODY(DM, , , real_d, REAL_D, \
1303 SCM, , , real, REAL)), \
1304 MAT_SWITCH_TYPE( \
1305 type2, \
1306 MAT_BI_BODY(SCM, , , real, REAL, \
1307 M, (const REAL_D *), (REAL_D *), real_dd, REAL_DD), \
1308 MAT_BI_BODY(SCM, , , real, REAL, \
1309 DM, , , real_d, REAL_D), \
1310 MAT_BI_BODY(SCM, , , real, REAL, \
1311 SCM, , , real, REAL)))
1312
1313 /* TRI_MAT_BODY(PFX1, CONSTCAST1, CAST1, SUF1, TYPE1,
1314 * PFX2, CONSTCAST2, CAST2, SUF2, TYPE2)
1315 *
1316 * is supposed to be a "multiplex" macro where BLAS routines are
1317 * accessed via " * PFX##AXPY_DOW(..., CONSTCAST var##SUF, ...) etc.
1318 */
1319 #define MAT_EMIT_TRI_BODY_SWITCH(type1, type2) \
1320 MAT_SWITCH_TYPE( \
1321 type1, \
1322 MAT_SWITCH_TYPE( \
1323 type2, \
1324 MAT_TRI_BODY(M, (const REAL_D *), (REAL_D *), real_dd, REAL_DD, \
1325 M, (const REAL_D *), (REAL_D *), real_dd, REAL_DD), \
1326 MAT_TRI_BODY(M, (const REAL_D *), (REAL_D *), real_dd, REAL_DD, \
1327 DM, , , real_d, REAL_D), \
1328 MAT_TRI_BODY(M, (const REAL_D *), (REAL_D *), real_dd, REAL_DD, \
1329 SCM, , , real, REAL)), \
1330 if (type2 == MATENT_REAL_D) { \
1331 MAT_TRI_BODY(DM, , , real_d, REAL_D, \
1332 DM, , , real_d, REAL_D); \
1333 } else if (type2 == MATENT_REAL) { \
1334 MAT_TRI_BODY(DM, , , real_d, REAL_D, \
1335 SCM, , , real, REAL); \
1336 }, \
1337 if (type2 == MATENT_REAL) { \
1338 MAT_TRI_BODY(SCM, , , real, REAL, \
1339 SCM, , , real, REAL); \
1340 })
1341
1342
1343 /* defines where only DOW == 1 plays a special role */
1344 # if DIM_OF_WORLD == 1
1345 # define DIST_DOW(x,y) ABS((x)[0]-(y)[0])
1346 # define NORM_DOW(x) ABS((x)[0])
1347 # define MNRM_DOW(m) ABS((m)[0][0])
1348 # define DMNRM_DOW(m) NRM_DOW(m)
1349 # define MDIST_DOW(a,b) ABS((a)[0][0] - (b)[0][0])
1350 # define DMDIST_DOW(a,b) DIST_DOW(a, b)
1351 # define SCMDIST_DOW(a,b) ABS((a)-(b))
1352 # else
1353 # define NORM_DOW(x) sqrt(NRM2_DOW(x))
1354 # define DIST_DOW(x,y) sqrt(DST2_DOW(x, y))
1355 # define MNORM_DOW(m) sqrt(MNRM2_DOW(m))
1356 # define DMNORM_DOW(m) sqrt(DMNRM2_DOW(m))
1357 # define MDIST_DOW(a,b) sqrt(MDST2_DOW(a, b))
1358 # define DMDIST_DOW(a,b) sqrt(DMDST2_DOW(a, b))
1359 # define SCMDIST_DOW(a,b) sqrt(SCMDST2_DOW(a, b))
1360 # endif
1361
1362 /* defines different for all DOWs */
1363 # if DIM_OF_WORLD == 1
1364 # define EXPAND_DOW(x) (x)[0]
1365 # define FORMAT_DOW "%10.5le"
1366 # define SCAN_FORMAT_DOW "%f"
1367 # define SCAN_EXPAND_DOW(v) &(v)[0]
1368 # define MEXPAND_DOW(m) (m)[0][0]
1369 # define SCAN_MFORMAT_DOW "%f %f"
1370 # define SCAN_MEXPAND_DOW(m) &(m)[0][0]
1371 # define MFORMAT_DOW FORMAT_DOW
1372 # define DMEXPAND_DOW(m) EXPAND_DOW(m)
1373 # define DMFORMAT_DOW FORMAT_DOW
1374 # define SCMEXPAND_DOW(m) (m)
1375 # define SCMFORMAT_DOW "[%10.5le]"
1376 # elif DIM_OF_WORLD == 2
1377 # define EXPAND_DOW(x) (x)[0], (x)[1]
1378 # define FORMAT_DOW "[%10.5le, %10.5le]"
1379 # define SCAN_FORMAT_DOW "%f %f"
1380 # define SCAN_EXPAND_DOW(v) &(v)[0], &(v)[1]
1381 # define MEXPAND_DOW(m) EXPAND_DOW((m)[0]), EXPAND_DOW((m)[1])
1382 # define SCAN_MEXPAND_DOW(m) SCAN_EXPAND_DOW((m)[0]), SCAN_EXPAND_DOW((m)[1])
1383 # define SCAN_MFORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW
1384 # define MFORMAT_DOW "[" FORMAT_DOW ", " FORMAT_DOW "]"
1385 # define DMEXPAND_DOW(m) EXPAND_DOW(m)
1386 # define DMFORMAT_DOW FORMAT_DOW
1387 # define SCMEXPAND_DOW(m) (m)
1388 # define SCMFORMAT_DOW "[%10.5le]"
1389 # elif DIM_OF_WORLD == 3
1390 # define EXPAND_DOW(x) (x)[0], (x)[1], (x)[2]
1391 # define FORMAT_DOW "[%10.5le, %10.5le, %10.5le]"
1392 # define SCAN_FORMAT_DOW "%f %f %f"
1393 # define SCAN_EXPAND_DOW(v) &(v)[0], &(v)[1], &(v)[2]
1394 # define MEXPAND_DOW(m) \
1395 EXPAND_DOW((m)[0]), EXPAND_DOW((m)[1]), EXPAND_DOW((m)[2])
1396 # define SCAN_MEXPAND_DOW(m) \
1397 SCAN_EXPAND_DOW((m)[0]), SCAN_EXPAND_DOW((m)[1]), SCAN_EXPAND_DOW((m)[2])
1398 # define MFORMAT_DOW "[" FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW "]"
1399 # define SCAN_MFORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW
1400 # define DMEXPAND_DOW(m) EXPAND_DOW(m)
1401 # define DMFORMAT_DOW FORMAT_DOW
1402 # define SCMEXPAND_DOW(m) (m)
1403 # define SCMFORMAT_DOW "[%10.5le]"
1404 # elif DIM_OF_WORLD == 4
1405 # define EXPAND_DOW(x) (x)[0], (x)[1], (x)[2], (x)[3]
1406 # define FORMAT_DOW "[%10.5le, %10.5le, %10.5le, %10.5le]"
1407 # define SCAN_FORMAT_DOW "%f %f %f %f"
1408 # define SCAN_EXPAND_DOW(v) &(v)[0], &(v)[1], &(v)[2], &(v)[3]
1409 # define MEXPAND_DOW(m) \
1410 EXPAND_DOW((m)[0]), EXPAND_DOW((m)[1]), EXPAND_DOW((m)[2]), EXPAND_DOW((m)[3])
1411 # define SCAN_MEXPAND_DOW(m) \
1412 SCAN_EXPAND_DOW((m)[0]), SCAN_EXPAND_DOW((m)[1]), \
1413 SCAN_EXPAND_DOW((m)[2]), SCAN_EXPAND_DOW((m)[3])
1414 # define MFORMAT_DOW \
1415 "[" FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW "]"
1416 # define SCAN_MFORMAT_DOW \
1417 SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW
1418 # define DMEXPAND_DOW(m) EXPAND_DOW(m)
1419 # define DMFORMAT_DOW FORMAT_DOW
1420 # define SCMEXPAND_DOW(m) (m)
1421 # define SCMFORMAT_DOW "[%10.5le]"
1422 # elif DIM_OF_WORLD == 5
1423 # define EXPAND_DOW(x) (x)[0], (x)[1], (x)[2], (x)[3], (x)[4]
1424 # define FORMAT_DOW "[%10.5le, %10.5le, %10.5le, %10.5le, %10.5le]"
1425 # define SCAN_FORMAT_DOW "%f %f %f %f %f"
1426 # define SCAN_EXPAND_DOW(v) &(v)[0], &(v)[1], &(v)[2], &(v)[3], &(v)[4]
1427 # define MEXPAND_DOW(m) \
1428 EXPAND_DOW((m)[0]), EXPAND_DOW((m)[1]), EXPAND_DOW((m)[2]), \
1429 EXPAND_DOW((m)[3]), EXPAND_DOW((m)[4])
1430 # define SCAN_MEXPAND_DOW(m) \
1431 SCAN_EXPAND_DOW((m)[0]), SCAN_EXPAND_DOW((m)[1]), SCAN_EXPAND_DOW((m)[2]), \
1432 SCAN_EXPAND_DOW((m)[3]), SCAN_EXPAND_DOW((m)[4])
1433 # define MFORMAT_DOW \
1434 "[" FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW "]"
1435 # define SCAN_MFORMAT_DOW \
1436 SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW \
1437 SCAN_FORMAT_DOW SCAN_FORMAT_DOW
1438 # define DMEXPAND_DOW(m) EXPAND_DOW(m)
1439 # define DMFORMAT_DOW FORMAT_DOW
1440 # define SCMEXPAND_DOW(m) (m)
1441 # define SCMFORMAT_DOW "[%10.5le]"
1442 # elif DIM_OF_WORLD == 6
1443 # define EXPAND_DOW(x) (x)[0], (x)[1], (x)[2], (x)[3], (x)[4], (x)[5]
1444 # define FORMAT_DOW \
1445 # define SCAN_FORMAT_DOW "%f %f %f %f %f %f"
1446 # define SCAN_EXPAND_DOW(v) \
1447 &(v)[0], &(v)[1], &(v)[2], &(v)[3], &(v)[4], &(v)[5]
1448 # define MFORMAT_DOW \
1449 "[%10.5le, %10.5le, %10.5le, %10.5le, %10.5le, %10.5le]"
1450 # define MEXPAND_DOW(m) \
1451 EXPAND_DOW((m)[0]), EXPAND_DOW((m)[1]), EXPAND_DOW((m)[2]), \
1452 EXPAND_DOW((m)[3]), EXPAND_DOW((m)[4]), EXPAND_DOW((m)[5])
1453 # define SCAN_MEXPAND_DOW(m) \
1454 SCAN_EXPAND_DOW((m)[0]), SCAN_EXPAND_DOW((m)[1]), SCAN_EXPAND_DOW((m)[2]), \
1455 SCAN_EXPAND_DOW((m)[3]), SCAN_EXPAND_DOW((m)[4]), SCAN_EXPAND_DOW((m)[5])
1456 # define MFORMAT_DOW \
1457 "[" FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", " \
1458 FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW "]"
1459 # define SCAN_MFORMAT_DOW \
1460 SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW \
1461 SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW
1462 # define DMEXPAND_DOW(m) EXPAND_DOW(m)
1463 # define DMFORMAT_DOW FORMAT_DOW
1464 # define SCMEXPAND_DOW(m) (m)
1465 # define SCMFORMAT_DOW "[%10.5le]"
1466 # elif DIM_OF_WORLD == 7
1467 # define EXPAND_DOW(x) \
1468 (x)[0], (x)[1], (x)[2], (x)[3], (x)[4], (x)[5], (x)[6]
1469 # define FORMAT_DOW \
1470 "[%10.5le, %10.5le, %10.5le, %10.5le, %10.5le, %10.5le, %10.5le]"
1471 # define SCAN_FORMAT_DOW "%f %f %f %f %f %f %f"
1472 # define SCAN_EXPAND_DOW(v) \
1473 &(v)[0], &(v)[1], &(v)[2], &(v)[3], &(v)[4], &(v)[5], &(v)[6]
1474 # define MEXPAND_DOW(m) \
1475 EXPAND_DOW((m)[0]), EXPAND_DOW((m)[1]), EXPAND_DOW((m)[2]), \
1476 EXPAND_DOW((m)[3]), EXPAND_DOW((m)[4]), EXPAND_DOW((m)[5]), \
1477 EXPAND_DOW((m)[6])
1478 # define SCAN_MEXPAND_DOW(m) \
1479 SCAN_EXPAND_DOW((m)[0]), SCAN_EXPAND_DOW((m)[1]), SCAN_EXPAND_DOW((m)[2]), \
1480 SCAN_EXPAND_DOW((m)[3]), SCAN_EXPAND_DOW((m)[4]), SCAN_EXPAND_DOW((m)[5]) \
1481 SCAN_EXPAND_DOW((m)[6])
1482 # define MFORMAT_DOW \
1483 "[" FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", " \
1484 FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW "]"
1485 # define SCAN_MFORMAT_DOW \
1486 SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW \
1487 SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW
1488 # define DMEXPAND_DOW(m) EXPAND_DOW(m)
1489 # define DMFORMAT_DOW FORMAT_DOW
1490 # define SCMEXPAND_DOW(m) (m)
1491 # define SCMFORMAT_DOW "[%10.5le]"
1492 # elif DIM_OF_WORLD == 8
1493 # define EXPAND_DOW(x) \
1494 (x)[0], (x)[1], (x)[2], (x)[3], (x)[4], (x)[5], (x)[6], (x)[7]
1495 # define FORMAT_DOW \
1496 "[%10.5le, %10.5le, %10.5le, %10.5le, %10.5le, %10.5le, %10.5le, %10.5le]"
1497 # define SCAN_FORMAT_DOW "%f %f %f %f %f %f %f %f"
1498 # define SCAN_EXPAND_DOW(v) \
1499 &(v)[0], &(v)[1], &(v)[2], &(v)[3], &(v)[4], &(v)[5], &(v)[6], &(v)[7]
1500 # define MEXPAND_DOW(m) \
1501 EXPAND_DOW((m)[0]), EXPAND_DOW((m)[1]), EXPAND_DOW((m)[2]), \
1502 EXPAND_DOW((m)[3]), EXPAND_DOW((m)[4]), EXPAND_DOW((m)[5]), \
1503 EXPAND_DOW((m)[6]), EXPAND_DOW((m)[7])
1504 # define SCAN_MEXPAND_DOW(m) \
1505 SCAN_EXPAND_DOW((m)[0]), SCAN_EXPAND_DOW((m)[1]), SCAN_EXPAND_DOW((m)[2]), \
1506 SCAN_EXPAND_DOW((m)[3]), SCAN_EXPAND_DOW((m)[4]), SCAN_EXPAND_DOW((m)[5]) \
1507 SCAN_EXPAND_DOW((m)[6]), SCAN_EXPAND_DOW((m)[7])
1508 # define MFORMAT_DOW \
1509 "[" FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", "\
1510 FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW "]"
1511 # define SCAN_MFORMAT_DOW \
1512 SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW \
1513 SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW
1514 # define DMEXPAND_DOW(m) EXPAND_DOW(m)
1515 # define DMFORMAT_DOW FORMAT_DOW
1516 # define SCMEXPAND_DOW(m) (m)
1517 # define SCMFORMAT_DOW "[%10.5le]"
1518 # elif DIM_OF_WORLD == 9
1519 # define EXPAND_DOW(x) \
1520 (x)[0], (x)[1], (x)[2], (x)[3], (x)[4], (x)[5], (x)[6], (x)[7], (x)[8]
1521 # define FORMAT_DOW \
1522 "[%10.5le, %10.5le, %10.5le, %10.5le, " \
1523 "%10.5le, %10.5le, %10.5le, %10.5le, %10.5le]"
1524 # define SCAN_FORMAT_DOW "%f %f %f %f %f %f %f %f %f"
1525 # define SCAN_EXPAND_DOW(v) \
1526 &(v)[0], &(v)[1], &(v)[2], &(v)[3], &(v)[4], \
1527 &(v)[5], &(v)[6], &(v)[7], &(v)[8]
1528 # define MEXPAND_DOW(m) \
1529 EXPAND_DOW((m)[0]), EXPAND_DOW((m)[1]), EXPAND_DOW((m)[2]), \
1530 EXPAND_DOW((m)[3]), EXPAND_DOW((m)[4]), EXPAND_DOW((m)[5]), \
1531 EXPAND_DOW((m)[6]), EXPAND_DOW((m)[7]), EXPAND_DOW((m)[8])
1532 # define SCAN_MEXPAND_DOW(m) \
1533 SCAN_EXPAND_DOW((m)[0]), SCAN_EXPAND_DOW((m)[1]), SCAN_EXPAND_DOW((m)[2]), \
1534 SCAN_EXPAND_DOW((m)[3]), SCAN_EXPAND_DOW((m)[4]), SCAN_EXPAND_DOW((m)[5]) \
1535 SCAN_EXPAND_DOW((m)[6]), SCAN_EXPAND_DOW((m)[7]), SCAN_EXPAND_DOW((m)[8])
1536 # define MFORMAT_DOW \
1537 "[" FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", "\
1538 FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW "]"
1539 # define SCAN_MFORMAT_DOW \
1540 SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW \
1541 SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW \
1542 SCAN_FORMAT_DOW
1543 # define DMEXPAND_DOW(m) EXPAND_DOW(m)
1544 # define DMFORMAT_DOW FORMAT_DOW
1545 # define SCMEXPAND_DOW(m) (m)
1546 # define SCMFORMAT_DOW "[%10.5le]"
1547 # endif
1548
1549 /* Some inline functions for barycentric coordinates, and conversion
1550 * between barycentric gradients and cartesian gradients.
1551 */
1552
1553 /** x = a */
SET_BAR(int dim,REAL a,REAL_B x)1554 static inline const REAL *SET_BAR(int dim, REAL a, REAL_B x)
1555 {
1556 int i;
1557
1558 for (i = 0; i < N_LAMBDA(dim); i++) {
1559 x[i] = a;
1560 }
1561 for (; i < N_LAMBDA_MAX; i++) {
1562 x[i] = 0.0;
1563 }
1564 return x;
1565 }
1566
MSET_BAR(int dim,REAL a,REAL_BB x)1567 static inline const REAL_B *MSET_BAR(int dim, REAL a, REAL_BB x)
1568 {
1569 int i;
1570
1571 for (i = 0; i < N_LAMBDA(dim); i++) {
1572 SET_BAR(dim, a, x[i]);
1573 }
1574 return (const REAL_B *)x;
1575 }
1576
1577 /** x *= a */
SCAL_BAR(int dim,REAL a,REAL_B x)1578 static inline const REAL *SCAL_BAR(int dim, REAL a, REAL_B x)
1579 {
1580 int i;
1581
1582 for (i = 0; i < N_LAMBDA(dim); i++) {
1583 x[i] *= a;
1584 }
1585 return x;
1586 }
1587
MSCAL_BAR(int dim,REAL a,REAL_BB x)1588 static inline const REAL_B *MSCAL_BAR(int dim, REAL a, REAL_BB x)
1589 {
1590 int i;
1591
1592 for (i = 0; i < N_LAMBDA(dim); i++) {
1593 SCAL_BAR(dim, a, x[i]);
1594 }
1595 return (const REAL_B *)x;
1596 }
1597
1598 /** z = a*x + b*y, x, y, z are barycentric coordinate tuples.
1599 */
SCP_BAR(int dim,const REAL_B x,const REAL_B y)1600 static inline REAL SCP_BAR(int dim, const REAL_B x, const REAL_B y)
1601 {
1602 int i;
1603 REAL res;
1604
1605 res = x[0] * y[0];
1606 for (i = 1; i < N_LAMBDA(dim); i++) {
1607 res += x[i] * y[i];
1608 }
1609 return res;
1610 }
1611
1612 /** y = a*x, x and y are barycentric coordinate tuples.
1613 */
AXEY_BAR(int dim,REAL a,const REAL_B x,REAL_B y)1614 static inline const REAL *AXEY_BAR(int dim, REAL a, const REAL_B x, REAL_B y)
1615 {
1616 int i;
1617
1618 for (i = 0; i < N_LAMBDA(dim); i++) {
1619 y[i] = a * x[i];
1620 }
1621 return y;
1622 }
1623
MAXEY_BAR(int dim,REAL a,const REAL_BB x,REAL_BB y)1624 static inline const REAL_B *MAXEY_BAR(int dim,
1625 REAL a, const REAL_BB x, REAL_BB y)
1626 {
1627 int i;
1628
1629 for (i = 0; i < N_LAMBDA(dim); i++) {
1630 AXEY_BAR(dim, a, x[i], y[i]);
1631 }
1632 return (const REAL_B *)y;
1633 }
1634
1635 /** y += a*x, x and y are barycentric coordinate tuples.
1636 */
AXPY_BAR(int dim,REAL a,const REAL_B x,REAL_B y)1637 static inline const REAL *AXPY_BAR(int dim, REAL a, const REAL_B x, REAL_B y)
1638 {
1639 int i;
1640
1641 for (i = 0; i < N_LAMBDA(dim); i++) {
1642 y[i] += a * x[i];
1643 }
1644 return y;
1645 }
1646
MAXPY_BAR(int dim,REAL a,const REAL_BB x,REAL_BB y)1647 static inline const REAL_B *MAXPY_BAR(int dim,
1648 REAL a, const REAL_BB x, REAL_BB y)
1649 {
1650 int i;
1651
1652 for (i = 0; i < N_LAMBDA(dim); i++) {
1653 AXPY_BAR(dim, a, x[i], y[i]);
1654 }
1655 return (const REAL_B *)y;
1656 }
1657
1658 /** z = a*x + b*y, x, y, z are barycentric coordinate tuples.
1659 */
AXPBY_BAR(int dim,REAL a,const REAL_B x,REAL b,const REAL_B y,REAL_B z)1660 static inline const REAL *AXPBY_BAR(int dim,
1661 REAL a, const REAL_B x,
1662 REAL b, const REAL_B y,
1663 REAL_B z)
1664 {
1665 int i;
1666
1667 for (i = 0; i < N_LAMBDA(dim); i++) {
1668 z[i] = b*y[i] + a * x[i];
1669 }
1670 return z;
1671 }
1672
MAXPBY_BAR(int dim,REAL a,const REAL_BB x,REAL b,const REAL_BB y,REAL_BB z)1673 static inline const REAL_B *MAXPBY_BAR(int dim,
1674 REAL a, const REAL_BB x,
1675 REAL b, const REAL_BB y,
1676 REAL_BB z)
1677 {
1678 int i;
1679
1680 for (i = 0; i < N_LAMBDA(dim); i++) {
1681 AXPBY_BAR(dim, a, x[i], b, y[i], z[i]);
1682 }
1683 return (const REAL_B *)z;
1684 }
1685
AXPBYPCZ_BAR(int dim,REAL a,const REAL_B x,REAL b,const REAL_B y,REAL c,const REAL_B z,REAL_B w)1686 static inline const REAL *AXPBYPCZ_BAR(int dim,
1687 REAL a, const REAL_B x,
1688 REAL b, const REAL_B y,
1689 REAL c, const REAL_B z,
1690 REAL_B w)
1691 {
1692 int i;
1693
1694 for (i = 0; i < N_LAMBDA(dim); i++) {
1695 w[i] = a * x[i] + b*y[i] + c*z[i];
1696 }
1697 return w;
1698 }
1699
1700 /** b = a, a and b are barycentric coordinate tuples.
1701 */
COPY_BAR(int dim,const REAL_B a,REAL_B b)1702 static inline const REAL *COPY_BAR(int dim, const REAL_B a, REAL_B b)
1703 {
1704 int i;
1705
1706 for (i = 0; i < N_LAMBDA(dim); i++) {
1707 b[i] = a[i];
1708 }
1709 return b;
1710 }
1711
MCOPY_BAR(int dim,const REAL_BB a,REAL_BB b)1712 static inline const REAL_B *MCOPY_BAR(int dim, const REAL_BB a, REAL_BB b)
1713 {
1714 int i;
1715
1716 for (i = 0; i < N_LAMBDA(dim); i++) {
1717 COPY_BAR(dim, a[i], b[i]);
1718 }
1719 return (const REAL_B *)b;
1720 }
1721
1722 /** Convert a barycentric gradient to a world gradient, given the
1723 * gradient of the transformation to the reference element. (VM means
1724 * vector-matrix).
1725 */
1726 __FORCE_INLINE_ATTRIBUTE__
GRAD_DOW(int dim,const REAL_BD Lambda,const REAL_B b_grd,REAL_D x_grd)1727 static inline const REAL *GRAD_DOW(int dim,
1728 const REAL_BD Lambda,
1729 const REAL_B b_grd,
1730 REAL_D x_grd)
1731 {
1732 static REAL_D res;
1733 int i, j;
1734
1735 if (__UNLIKELY__(x_grd == NULL)) {
1736 x_grd = res;
1737 }
1738
1739 for (i = 0; i < DIM_OF_WORLD; i++) {
1740 x_grd[i] = b_grd[0] * Lambda[0][i];
1741 for (j = 1; j < N_LAMBDA(dim); j++) {
1742 x_grd[i] += b_grd[j] * Lambda[j][i];
1743 }
1744 }
1745
1746 return x_grd;
1747 }
1748
1749 __FORCE_INLINE_ATTRIBUTE__
MGRAD_DOW(int dim,const REAL_BD Lambda,const REAL_DB b_grd,REAL_DD x_grd)1750 static inline const REAL_D *MGRAD_DOW(int dim,
1751 const REAL_BD Lambda,
1752 const REAL_DB b_grd,
1753 REAL_DD x_grd)
1754 {
1755 static REAL_DD res;
1756 int i;
1757
1758 if (__UNLIKELY__(x_grd == NULL)) {
1759 x_grd = res;
1760 }
1761
1762 for (i = 0; i < DIM_OF_WORLD; i++) {
1763 GRAD_DOW(dim, Lambda, b_grd[i], x_grd[i]);
1764 }
1765
1766 return (const REAL_D *)x_grd;
1767 }
1768
1769 /* Compute the divergence */
DIV_DOW(int dim,const REAL_BD Lambda,const REAL_DB b_grd)1770 static inline REAL DIV_DOW(int dim, const REAL_BD Lambda, const REAL_DB b_grd)
1771 {
1772 REAL div = 0.0;
1773 int i, alpha;
1774
1775 for (i = 0; i < DIM_OF_WORLD; i++) {
1776 for (alpha = 0; alpha < N_LAMBDA(dim); ++alpha) {
1777 div += Lambda[alpha][i] * b_grd[i][alpha];
1778 }
1779 }
1780
1781 return div;
1782 }
1783
1784 __FORCE_INLINE_ATTRIBUTE__
GRAD_P_DOW(int dim,const REAL_BD Lambda,const REAL_B b_grd,REAL_D x_grd)1785 static inline const REAL *GRAD_P_DOW(int dim,
1786 const REAL_BD Lambda,
1787 const REAL_B b_grd,
1788 REAL_D x_grd)
1789 {
1790 static REAL_D res;
1791 int i, j;
1792
1793 if (__UNLIKELY__(x_grd == NULL)) {
1794 x_grd = res;
1795 }
1796
1797 for (i = 0; i < DIM_OF_WORLD; i++) {
1798 for (j = 0; j < N_LAMBDA(dim); j++) {
1799 x_grd[i] += b_grd[j] * Lambda[j][i];
1800 }
1801 }
1802
1803 return x_grd;
1804 }
1805
1806 __FORCE_INLINE_ATTRIBUTE__
MGRAD_P_DOW(int dim,const REAL_BD Lambda,const REAL_DB b_grd,REAL_DD x_grd)1807 static inline const REAL_D *MGRAD_P_DOW(int dim,
1808 const REAL_BD Lambda,
1809 const REAL_DB b_grd,
1810 REAL_DD x_grd)
1811 {
1812 static REAL_DD res;
1813 int i;
1814
1815 if (__UNLIKELY__(x_grd == NULL)) {
1816 x_grd = res;
1817 }
1818
1819 for (i = 0; i < DIM_OF_WORLD; i++) {
1820 GRAD_P_DOW(dim, Lambda, b_grd[i], x_grd[i]);
1821 }
1822
1823 return (const REAL_D *)x_grd;
1824 }
1825
1826
1827 /** Convert a barycentric Hesse matrix to a world Hesse matrix, given
1828 * the gradient of the transformation to the reference element.
1829 */
1830 __FORCE_INLINE_ATTRIBUTE__
D2_DOW(int dim,const REAL_BD Lambda,const REAL_BB b_hesse,REAL_DD x_hesse)1831 static inline const REAL_D *D2_DOW(int dim,
1832 const REAL_BD Lambda,
1833 const REAL_BB b_hesse,
1834 REAL_DD x_hesse)
1835 {
1836 static REAL_DD res;
1837 int i, j, k, l;
1838
1839 if (__UNLIKELY__(x_hesse == NULL)) {
1840 x_hesse = res;
1841 }
1842
1843 for (i = 0; i < DIM_OF_WORLD; i++) {
1844 x_hesse[i][i] = 0.0;
1845 for (k = 0; k < N_LAMBDA(dim); k++) {
1846 x_hesse[i][i] += Lambda[k][i] * b_hesse[k][k] * Lambda[k][i];
1847 for (l = k+1; l < N_LAMBDA(dim); l++) {
1848 x_hesse[i][i] += 2.0 * Lambda[k][i] * b_hesse[k][l] * Lambda[l][i];
1849 }
1850 }
1851 for (j = i+1; j < DIM_OF_WORLD; j++) {
1852 x_hesse[i][j] = 0.0;
1853 for (k = 0; k < N_LAMBDA(dim); k++) {
1854 x_hesse[i][j] += Lambda[k][i] * b_hesse[k][k] * Lambda[k][j];
1855 for (l = k+1; l < N_LAMBDA(dim); l++) {
1856 x_hesse[i][j] += b_hesse[k][l]*(Lambda[k][i] * Lambda[l][j]
1857 +
1858 Lambda[l][i] * Lambda[k][j]);
1859 }
1860 }
1861 x_hesse[j][i] = x_hesse[i][j];
1862 }
1863 }
1864
1865 return (const REAL_D *)x_hesse;
1866 }
1867
1868 __FORCE_INLINE_ATTRIBUTE__
MD2_DOW(int dim,const REAL_BD Lambda,const REAL_BB * b_hesse,REAL_DDD x_hesse)1869 static inline const REAL_DD *MD2_DOW(int dim,
1870 const REAL_BD Lambda,
1871 const REAL_BB *b_hesse,
1872 REAL_DDD x_hesse)
1873 {
1874 static REAL_DDD res;
1875 int i;
1876
1877 if (__UNLIKELY__(x_hesse == NULL)) {
1878 x_hesse = res;
1879 }
1880
1881 for (i = 0; i < DIM_OF_WORLD; i++) {
1882 D2_DOW(dim, Lambda, b_hesse[i], x_hesse[i]);
1883 }
1884
1885 return (const REAL_DD *)x_hesse;
1886 }
1887
1888 __FORCE_INLINE_ATTRIBUTE__
D2_P_DOW(int dim,const REAL_BD Lambda,const REAL_BB b_hesse,REAL_DD x_hesse)1889 static inline const REAL_D *D2_P_DOW(int dim,
1890 const REAL_BD Lambda,
1891 const REAL_BB b_hesse,
1892 REAL_DD x_hesse)
1893 {
1894 static REAL_DD res;
1895 int i, j, k, l;
1896
1897 if (__UNLIKELY__(x_hesse == NULL)) {
1898 x_hesse = res;
1899 }
1900
1901 for (i = 0; i < DIM_OF_WORLD; i++) {
1902 for (k = 0; k < N_LAMBDA(dim); k++) {
1903 x_hesse[i][i] += Lambda[k][i] * b_hesse[k][k] * Lambda[k][i];
1904 for (l = k+1; l < N_LAMBDA(dim); l++) {
1905 x_hesse[i][i] += 2.0 * Lambda[k][i] * b_hesse[k][l] * Lambda[l][i];
1906 }
1907 }
1908 for (j = i+1; j < DIM_OF_WORLD; j++) {
1909 REAL tmp = 0.0;
1910 for (k = 0; k < N_LAMBDA(dim); k++) {
1911 tmp += Lambda[k][i] * b_hesse[k][k] * Lambda[k][j];
1912 for (l = k+1; l < N_LAMBDA(dim); l++) {
1913 tmp += b_hesse[k][l]*(Lambda[k][i] * Lambda[l][j]
1914 +
1915 Lambda[l][i] * Lambda[k][j]);
1916 }
1917 }
1918 x_hesse[i][j] += tmp;
1919 x_hesse[j][i] += tmp;
1920 }
1921 }
1922
1923 return (const REAL_D *)x_hesse;
1924 }
1925
1926 __FORCE_INLINE_ATTRIBUTE__
MD2_P_DOW(int dim,const REAL_BD Lambda,const REAL_BB * b_hesse,REAL_DDD x_hesse)1927 static inline const REAL_DD *MD2_P_DOW(int dim,
1928 const REAL_BD Lambda,
1929 const REAL_BB *b_hesse,
1930 REAL_DDD x_hesse)
1931 {
1932 static REAL_DDD res;
1933 int i;
1934
1935 if (__UNLIKELY__(x_hesse == NULL)) {
1936 x_hesse = res;
1937 }
1938
1939 for (i = 0; i < DIM_OF_WORLD; i++) {
1940 D2_P_DOW(dim, Lambda, b_hesse[i], x_hesse[i]);
1941 }
1942
1943 return (const REAL_DD *)x_hesse;
1944 }
1945
1946 /**Compute the Laplacian from a given Hessian in barycentric co-ordinates. */
LAPLACE_DOW(int dim,const REAL_BD Lambda,const REAL_BB b_hesse)1947 static inline REAL LAPLACE_DOW(int dim,
1948 const REAL_BD Lambda,
1949 const REAL_BB b_hesse)
1950 {
1951 REAL res = 0.0;
1952 int i, k, l;
1953
1954 for (i = 0; i < DIM_OF_WORLD; i++) {
1955 for (k = 0; k < N_LAMBDA(dim); k++) {
1956 res += Lambda[k][i] * b_hesse[k][k] * Lambda[k][i];
1957 for (l = k+1; l < N_LAMBDA(dim); l++) {
1958 res += 2.0 * Lambda[k][i] * b_hesse[k][l] * Lambda[l][i];
1959 }
1960 }
1961 }
1962
1963 return res;
1964 }
1965
1966 __FORCE_INLINE_ATTRIBUTE__
MLAPLACE_DOW(int dim,const REAL_BD Lambda,const REAL_BB * b_hesse,REAL_D laplace)1967 static inline const REAL *MLAPLACE_DOW(int dim,
1968 const REAL_BD Lambda,
1969 const REAL_BB *b_hesse,
1970 REAL_D laplace)
1971 {
1972 static REAL_D res;
1973 int i;
1974
1975 if (__UNLIKELY__(laplace == NULL)) {
1976 laplace = res;
1977 }
1978
1979 for (i = 0; i < DIM_OF_WORLD; i++) {
1980 laplace[i] = LAPLACE_DOW(dim, Lambda, b_hesse[i]);
1981 }
1982
1983 return (const REAL *)laplace;
1984 }
1985
1986 __FORCE_INLINE_ATTRIBUTE__
MLAPLACE_P_DOW(int dim,const REAL_BD Lambda,const REAL_BB * b_hesse,REAL_D laplace)1987 static inline const REAL *MLAPLACE_P_DOW(int dim,
1988 const REAL_BD Lambda,
1989 const REAL_BB *b_hesse,
1990 REAL_D laplace)
1991 {
1992 static REAL_D res;
1993 int i;
1994
1995 if (__UNLIKELY__(laplace == NULL)) {
1996 laplace = res;
1997 }
1998
1999 for (i = 0; i < DIM_OF_WORLD; i++) {
2000 laplace[i] += LAPLACE_DOW(dim, Lambda, b_hesse[i]);
2001 }
2002
2003 return (const REAL *)laplace;
2004 }
2005
2006 /** Convert a cartesian gradient to a barycentric gradient, given the
2007 * vertices of the element (in the parametric case "coords" is just
2008 * the barycentric gradient of the coordinate functions).
2009 */
2010 __FORCE_INLINE_ATTRIBUTE__
GRAD_BAR(int dim,const REAL_D * coords,const REAL_D x_grd,REAL_B b_grd)2011 static inline const REAL *GRAD_BAR(int dim,
2012 const REAL_D *coords,
2013 const REAL_D x_grd,
2014 REAL_B b_grd)
2015 {
2016 static REAL_B res;
2017 int i;
2018
2019 if (__UNLIKELY__(b_grd == NULL)) {
2020 b_grd = res;
2021 }
2022
2023 for (i = 0; i < N_LAMBDA(dim); i++) {
2024 b_grd[i] = SCP_DOW(x_grd, coords[i]);
2025 }
2026
2027 return b_grd;
2028 }
2029
2030 __FORCE_INLINE_ATTRIBUTE__
MGRAD_BAR(int dim,const REAL_D * coords,const REAL_DD x_grd,REAL_DB b_grd)2031 static inline const REAL_B *MGRAD_BAR(int dim,
2032 const REAL_D *coords,
2033 const REAL_DD x_grd,
2034 REAL_DB b_grd)
2035 {
2036 static REAL_B res[DIM_OF_WORLD];
2037 int i;
2038
2039 if (__UNLIKELY__(b_grd == NULL)) {
2040 b_grd = res;
2041 }
2042
2043 for (i = 0; i < DIM_OF_WORLD; i++) {
2044 GRAD_BAR(dim, coords, x_grd[i], b_grd[i]);
2045 }
2046
2047 return (const REAL_B *)b_grd;
2048 }
2049
2050 __FORCE_INLINE_ATTRIBUTE__
D2_BAR(int dim,const REAL_D * coords,const REAL_DD x_D2,REAL_BB b_D2)2051 static inline const REAL_B *D2_BAR(int dim,
2052 const REAL_D *coords,
2053 const REAL_DD x_D2,
2054 REAL_BB b_D2)
2055 {
2056 static REAL_BB res;
2057 int i, j, k, l;
2058
2059 if (__UNLIKELY__(b_D2 == NULL)) {
2060 b_D2 = res;
2061 }
2062
2063 for (i = 0; i < N_LAMBDA(dim); i++) {
2064 b_D2[i][i] = 0.0;
2065 for (k = 0; k < DIM_OF_WORLD; k++) {
2066 b_D2[i][i] += SQR(coords[i][k])*x_D2[k][k];
2067 for (l = k+1; l < DIM_OF_WORLD; l++) {
2068 b_D2[i][i] += coords[i][k]*2.0*x_D2[k][l]*coords[i][l];
2069 }
2070 }
2071 for (j = i+1; j < N_LAMBDA(dim); j++) {
2072 b_D2[i][j] = 0.0;
2073 for (k = 0; k < DIM_OF_WORLD; k++) {
2074 b_D2[i][j] += coords[i][k]*x_D2[k][k]*coords[j][k];
2075 for (l = k+1; l < DIM_OF_WORLD; l++) {
2076 b_D2[i][j] += x_D2[k][l] * (coords[i][k] * coords[j][l]
2077 +
2078 coords[i][l] * coords[j][k]);
2079 }
2080 }
2081 b_D2[j][i] = b_D2[i][j];
2082 }
2083 }
2084 return (const REAL_B *)b_D2;
2085 }
2086
2087 __FORCE_INLINE_ATTRIBUTE__
MD2_BAR(int dim,const REAL_D * coords,const REAL_DD * x_D2,REAL_BB * b_D2)2088 static inline const REAL_BB *MD2_BAR(int dim,
2089 const REAL_D *coords,
2090 const REAL_DD *x_D2,
2091 REAL_BB *b_D2)
2092 {
2093 static REAL_BB res[DIM_OF_WORLD];
2094 int i;
2095
2096 if (__UNLIKELY__(b_D2 == NULL)) {
2097 b_D2 = res;
2098 }
2099
2100 for (i = 0; i < DIM_OF_WORLD; i++) {
2101 D2_BAR(dim, coords, x_D2[i], b_D2[i]);
2102 }
2103
2104 return (const REAL_BB *)b_D2;
2105 }
2106
MM_DOW(const REAL_DD a,const REAL_DD b,REAL_DD c)2107 static inline REAL_D *MM_DOW(const REAL_DD a,
2108 const REAL_DD b, REAL_DD c)
2109 {
2110 int i, j, k;
2111
2112 for (i = 0; i < DIM_OF_WORLD; i++) {
2113 for (j = 0; j < DIM_OF_WORLD; j++) {
2114 c[i][j] = 0.0;
2115 for (k = 0; k < DIM_OF_WORLD; k++) {
2116 c[i][j] += a[i][k]*b[k][j];
2117 }
2118 }
2119 }
2120 return c;
2121 }
MMT_DOW(const REAL_DD a,const REAL_DD b,REAL_DD c)2122 static inline REAL_D *MMT_DOW(const REAL_DD a,
2123 const REAL_DD b, REAL_DD c)
2124 {
2125 int i, j, k;
2126
2127 for (i = 0; i < DIM_OF_WORLD; i++) {
2128 for (j = 0; j < DIM_OF_WORLD; j++) {
2129 c[i][j] = 0.0;
2130 for (k = 0; k < DIM_OF_WORLD; k++) {
2131 c[i][j] += a[i][k]*b[j][k];
2132 }
2133 }
2134 }
2135 return c;
2136 }
MTM_DOW(const REAL_DD a,const REAL_DD b,REAL_DD c)2137 static inline REAL_D *MTM_DOW(const REAL_DD a,
2138 const REAL_DD b,
2139 REAL_DD c)
2140 {
2141 int i, j, k;
2142
2143 for (i = 0; i < DIM_OF_WORLD; i++) {
2144 for (j = 0; j < DIM_OF_WORLD; j++) {
2145 c[i][j] = a[0][i]*b[j][0];
2146 for (k = 1; k < DIM_OF_WORLD; k++) {
2147 c[i][j] += a[k][i]*b[j][k];
2148 }
2149 }
2150 }
2151 return c;
2152 }
2153
2154 #if DIM_OF_WORLD == 1
MDET_DOW(const REAL_DD m)2155 static inline REAL MDET_DOW(const REAL_DD m)
2156 {
2157 return m[0][0];
2158 }
MINVERT_DOW(const REAL_DD m,REAL_DD mi)2159 static inline REAL MINVERT_DOW(const REAL_DD m, REAL_DD mi)
2160 {
2161 if (mi[0][0] != 0.0) {
2162 mi[0][0] = 1.0/m[0][0];
2163 }
2164
2165 return m[0][0];
2166 }
2167 #elif DIM_OF_WORLD == 2
MDET_DOW(const REAL_DD m)2168 static inline REAL MDET_DOW(const REAL_DD m)
2169 {
2170 return m[0][0]*m[1][1] - m[1][0]*m[0][1];
2171 }
MINVERT_DOW(const REAL_DD m,REAL_DD mi)2172 static inline REAL MINVERT_DOW(const REAL_DD m, REAL_DD mi)
2173 {
2174 REAL det = m[0][0]*m[1][1] - m[1][0]*m[0][1];
2175
2176 if (det != 0.0) {
2177 mi[0][0] = m[1][1] / det;
2178 mi[1][1] = m[0][0] / det;
2179 mi[0][1] = -m[0][1] / det;
2180 mi[1][0] = -m[1][0] / det;
2181 }
2182
2183 return det;
2184 }
2185 #elif DIM_OF_WORLD == 3
MDET_DOW(const REAL_DD m)2186 static inline REAL MDET_DOW(const REAL_DD m)
2187 {
2188 #if 0
2189 int j;
2190 REAL_D tmp;
2191 REAL det;
2192
2193 det = 0;
2194 for (j = 0; j < DIM_OF_WORLD; j++) {
2195 WEDGE_DOW(m[(j+1)%DIM_OF_WORLD], m[(j+2)%DIM_OF_WORLD], tmp);
2196 det += SCP_DOW(tmp, m[j]);
2197 }
2198 return det / (REAL)DIM_OF_WORLD;
2199 #else
2200 return
2201 +(m[1][1]*m[2][2] - m[2][1]*m[1][2]) * m[0][0]
2202 -(m[0][1]*m[2][2] - m[2][1]*m[0][2]) * m[1][0]
2203 +(m[0][1]*m[1][2] - m[1][1]*m[0][2]) * m[2][0];
2204 #endif
2205 }
MINVERT_DOW(const REAL_DD m,REAL_DD mi)2206 static inline REAL MINVERT_DOW(const REAL_DD m, REAL_DD mi)
2207 {
2208 #if 0
2209 int i, j;
2210 REAL_D tmp;
2211
2212 for (j = 0; j < DIM_OF_WORLD; j++) {
2213 WEDGE_DOW(m[(j+1)%DIM_OF_WORLD], m[(j+2)%DIM_OF_WORLD], tmp);
2214 SCAL_DOW(1.0/SCP_DOW(tmp, m[j]), tmp);
2215 for (i = 0; i < DIM_OF_WORLD; i++) {
2216 mi[i][j] = tmp[i];
2217 }
2218 }
2219 return mi;
2220 #else
2221 REAL det;
2222 int i;
2223
2224 mi[0][0] = +(m[1][1]*m[2][2] - m[2][1]*m[1][2]);
2225 mi[0][1] = -(m[0][1]*m[2][2] - m[2][1]*m[0][2]);
2226 mi[0][2] = +(m[0][1]*m[1][2] - m[1][1]*m[0][2]);
2227
2228 det = mi[0][0] * m[0][0] + mi[0][1] * m[1][0] + mi[0][2]*m[2][0];
2229
2230 if (det != 0.0) {
2231 for (i = 0; i < DIM_OF_WORLD; i++) {
2232 mi[0][i] /= det;
2233 }
2234
2235 mi[1][0] = (m[1][2]*m[2][0] - m[1][0]*m[2][2])/det;
2236 mi[1][1] = (m[0][0]*m[2][2] - m[2][0]*m[0][2])/det;
2237 mi[1][2] = (m[1][0]*m[0][2] - m[0][0]*m[1][2])/det;
2238
2239 mi[2][0] = (m[1][0]*m[2][1] - m[1][1]*m[2][0])/det;
2240 mi[2][1] = (m[2][0]*m[0][1] - m[0][0]*m[2][1])/det;
2241 mi[2][2] = (m[0][0]*m[1][1] - m[0][1]*m[1][0])/det;
2242 }
2243
2244 return det;
2245 #endif
2246 }
2247 #else
MDET_DOW(const REAL_DD m)2248 static inline REAL MDET_DOW(const REAL_DD m)
2249 {
2250 FUNCNAME("MDET_DOW");
2251 REAL_DD tmpM;
2252 REAL_D b;
2253 REAL det;
2254 int i;
2255
2256 SET_DOW(0.0, b); /* dummy */
2257 MCOPY_DOW(m, tmpM); /* destructive Gauss destroys M */
2258
2259 square_gauss((REAL *)tmpM, b, b, DIM_OF_WORLD, 1);
2260
2261 det = 1.0;
2262 for (i = 0; i < DIM_OF_WORLD; ++i) {
2263 det *= tmpM[i][i];
2264 }
2265
2266 return det;
2267 }
2268
MINVERT_DOW(const REAL_DD m,REAL_DD mi)2269 static inline REAL_D *MINVERT_DOW(const REAL_DD m, REAL_DD mi)
2270 {
2271 FUNCNAME("MINVERT_DOW");
2272 REAL_DD tmpM, b;
2273
2274 MSET_DOW(1.0, b); /* unit matrix */
2275 MCOPY_DOW(m, tmpM); /* destructive Gauss destroys M */
2276
2277 square_gauss((REAL *)tmpM, (REAL *)b, (REAL *)mi, DIM_OF_WORLD, DIM_OF_WORLD);
2278
2279 return mi;
2280 }
2281 #endif
2282
2283 static const AFF_TRAFO aff_identity = {
2284 { { 1.0, },
2285 #if DIM_OF_WORLD > 1
2286 { 0.0, 1.0, },
2287 #endif
2288 #if DIM_OF_WORLD > 2
2289 { 0.0, 0.0, 1.0, },
2290 #endif
2291 #if DIM_OF_WORLD > 3
2292 { 0.0, 0.0, 0.0, 1.0, },
2293 #endif
2294 #if DIM_OF_WORLD > 4
2295 { 0.0, 0.0, 0.0, 0.0, 1.0, },
2296 #endif
2297 #if DIM_OF_WORLD > 5
2298 { 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, },
2299 #endif
2300 #if DIM_OF_WORLD > 6
2301 { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, },
2302 #endif
2303 #if DIM_OF_WORLD > 7
2304 { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, },
2305 #endif
2306 #if DIM_OF_WORLD > 8
2307 { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, },
2308 #endif
2309 },
2310 { 0.0 }
2311 };
2312
AFFINE_IDENTITY(void)2313 static inline const AFF_TRAFO *AFFINE_IDENTITY(void)
2314 {
2315 return &aff_identity;
2316 }
2317
GET_AFF_TRAFO(int info,const char * key,AFF_TRAFO * T)2318 static inline void GET_AFF_TRAFO(int info, const char *key, AFF_TRAFO *T)
2319 {
2320 GET_PARAMETER(info, key,
2321 SCAN_MFORMAT_DOW SCAN_FORMAT_DOW,
2322 SCAN_MEXPAND_DOW(T->M), SCAN_EXPAND_DOW(T->t));
2323 }
2324
AFFINE_DOW(const AFF_TRAFO * trafo,const REAL_D x,REAL_D y)2325 static inline REAL *AFFINE_DOW(const AFF_TRAFO *trafo,
2326 const REAL_D x,
2327 REAL_D y)
2328 {
2329 SET_DOW(0.0, y);
2330 MV_DOW((const REAL_D *)trafo->M, x, y);
2331 AXPY_DOW(1.0, trafo->t, y);
2332
2333 return y;
2334 }
2335
2336 /* Apply the inverse of the affine transformation. trafo->M is
2337 * assumed to be orthogonal.
2338 */
AFFINV_DOW(const AFF_TRAFO * trafo,const REAL_D x,REAL_D y)2339 static inline REAL *AFFINV_DOW(const AFF_TRAFO *trafo,
2340 const REAL_D x,
2341 REAL_D y)
2342 {
2343 REAL_D tmp = { 0.0, };
2344
2345 MTV_DOW((const REAL_D *)trafo->M, trafo->t, tmp);
2346 SET_DOW(0.0, y);
2347 MTV_DOW((const REAL_D *)trafo->M, x, y);
2348 AXPY_DOW(-1.0, tmp, y);
2349
2350 return y;
2351 }
2352
INVAFF_DOW(const AFF_TRAFO * A,AFF_TRAFO * B)2353 static inline AFF_TRAFO *INVAFF_DOW(const AFF_TRAFO *A,
2354 AFF_TRAFO *B)
2355 {
2356 int i, j;
2357
2358 SET_DOW(0.0, B->t);
2359 MTV_DOW((const REAL_D *)A->M, A->t, B->t);
2360 SCAL_DOW(-1.0, B->t);
2361 for (i = 0; i < DIM_OF_WORLD; i++ ) {
2362 for (j = 0; j < DIM_OF_WORLD; j++) {
2363 B->M[i][j] = A->M[j][i];
2364 }
2365 }
2366 return B;
2367 }
2368
AFFAFF_DOW(const AFF_TRAFO * A,const AFF_TRAFO * B,AFF_TRAFO * C)2369 static inline AFF_TRAFO *AFFAFF_DOW(const AFF_TRAFO *A,
2370 const AFF_TRAFO *B,
2371 AFF_TRAFO *C)
2372 {
2373 MM_DOW(A->M, B->M, C->M);
2374 COPY_DOW(A->t, C->t);
2375 MV_DOW(A->M, B->t, C->t);
2376
2377 return C;
2378 }
2379
2380 /* filling of the geometry and the quadrature cache */
2381
2382 static inline const EL_GEOM_CACHE *
fill_el_geom_cache(const EL_INFO * el_info,FLAGS fill_flag)2383 fill_el_geom_cache(const EL_INFO *el_info, FLAGS fill_flag)
2384 {
2385 EL_GEOM_CACHE *elgc;
2386 FLAGS need;
2387 int dim, wall;
2388
2389 elgc = (EL_GEOM_CACHE *)&el_info->el_geom_cache;
2390
2391 if (elgc->current_el != el_info->el) {
2392 elgc->fill_flag = 0;
2393 elgc->current_el = el_info->el;
2394 }
2395
2396 if (!(need = (elgc->fill_flag ^ fill_flag) & fill_flag)) {
2397 return elgc;
2398 }
2399
2400 dim = el_info->mesh->dim;
2401
2402 if (need & FILL_EL_LAMBDA) {
2403 elgc->det = el_grd_lambda_dim(dim, el_info, elgc->Lambda);
2404 elgc->fill_flag |= FILL_EL_LAMBDA|FILL_EL_DET;
2405 } else if (need & FILL_EL_DET) {
2406 elgc->det = el_det_dim(dim, el_info);
2407 elgc->fill_flag |= FILL_EL_DET;
2408 }
2409
2410 for (wall = 0; wall < N_WALLS_MAX; wall++) {
2411 if (need & FILL_EL_WALL_ORIENTATION(wall)) {
2412 EL *el = el_info->el;
2413 elgc->orientation[wall][0] = wall_orientation(dim, el, wall);
2414 if ((el_info->fill_flag & FILL_NEIGH) && el_info->neigh[wall]) {
2415 EL *neigh = el_info->neigh[wall];
2416 int oppv = el_info->opp_vertex[wall];
2417
2418 elgc->orientation[wall][1] = wall_orientation(dim, neigh, oppv);
2419 } else {
2420 elgc->orientation[wall][1] = -1;
2421 }
2422 elgc->fill_flag |= FILL_EL_WALL_ORIENTATION(wall);
2423 }
2424 if (need & FILL_EL_WALL_REL_ORIENTATION(wall)) {
2425 DEBUG_TEST_FLAG(FILL_NEIGH, el_info);
2426 if (el_info->neigh[wall]) {
2427 EL *el = el_info->el;
2428 EL *neigh = el_info->neigh[wall];
2429 int oppv = el_info->opp_vertex[wall];
2430 elgc->rel_orientation[wall] =
2431 wall_rel_orientation(dim, el, neigh, wall, oppv);
2432 elgc->fill_flag |= FILL_EL_WALL_REL_ORIENTATION(wall);
2433 }
2434 }
2435 if (need & (FILL_EL_WALL_NORMAL(wall)|FILL_EL_WALL_DET(wall))) {
2436 elgc->wall_det[wall] =
2437 get_wall_normal_dim(dim, el_info, wall, elgc->wall_normal[wall]);
2438 elgc->fill_flag |= FILL_EL_WALL_NORMAL(wall)|FILL_EL_WALL_DET(wall);
2439 }
2440 }
2441
2442 return elgc;
2443 }
2444
2445 /* Fill the quadrature cache for the given element. Maybe this should
2446 * be made an inline function. This function does not call
2447 * parametric->init_element(); it is assumed that this has been done
2448 * before if necessary. We also do not call any INIT_ELEMENT() method.
2449 */
fill_quad_el_cache(const EL_INFO * el_info,const QUAD * quad,FLAGS fill)2450 static inline const QUAD_EL_CACHE *fill_quad_el_cache(const EL_INFO *el_info,
2451 const QUAD *quad,
2452 FLAGS fill)
2453 {
2454 QUAD_EL_CACHE *qelc = (QUAD_EL_CACHE *)quad->metadata;
2455 FLAGS need;
2456 int iq, wall;
2457
2458 if (qelc->current_el != el_info->el) {
2459 qelc->fill_flag = 0;
2460 qelc->current_el = el_info->el;
2461 INIT_ELEMENT(el_info, quad);
2462 }
2463
2464 if (!(need = (qelc->fill_flag ^ fill) & fill)) {
2465 return qelc;
2466 }
2467
2468 if (el_info->fill_flag & FILL_COORDS) {
2469 if (need & FILL_EL_QUAD_WORLD) {
2470 for (iq = 0; iq < quad->n_points; iq++) {
2471 coord_to_world(el_info, quad->lambda[iq], qelc->world[iq]);
2472 }
2473 qelc->fill_flag |= FILL_EL_QUAD_WORLD;
2474 }
2475 } else {
2476 PARAMETRIC *parametric = el_info->mesh->parametric;
2477
2478 TEST_EXIT(parametric,
2479 "FILL_COORDS not set in el_info->fill_flag and "
2480 "not on a parametric mesh.\n");
2481
2482 if (need & FILL_EL_QUAD_WORLD) {
2483 parametric->coord_to_world(el_info, quad, -1, NULL, qelc->world);
2484 }
2485 if (need &
2486 (FILL_EL_QUAD_GRD_WORLD|FILL_EL_QUAD_D2_WORLD|FILL_EL_QUAD_D3_WORLD)) {
2487 parametric->grd_world(el_info, quad, -1, NULL,
2488 (need & FILL_EL_QUAD_GRD_WORLD)
2489 ? qelc->param.grd_world : NULL,
2490 (need & FILL_EL_QUAD_D2_WORLD)
2491 ? qelc->param.D2_world : NULL,
2492 (need & FILL_EL_QUAD_D3_WORLD)
2493 ? qelc->param.D3_world : NULL);
2494 qelc->fill_flag |= need &
2495 (FILL_EL_QUAD_GRD_WORLD|FILL_EL_QUAD_D2_WORLD|FILL_EL_QUAD_D3_WORLD);
2496 }
2497 if (need & FILL_EL_QUAD_DLAMBDA) {
2498 parametric->grd_lambda(el_info, quad, -1, NULL,
2499 qelc->param.Lambda,
2500 qelc->param.DLambda,
2501 qelc->param.det);
2502 qelc->fill_flag |=
2503 FILL_EL_QUAD_DLAMBDA|FILL_EL_QUAD_LAMBDA|FILL_EL_QUAD_DET;
2504 } else if (need & FILL_EL_QUAD_LAMBDA) {
2505 parametric->grd_lambda(el_info, quad, -1, NULL,
2506 qelc->param.Lambda, NULL, qelc->param.det);
2507 qelc->fill_flag |= FILL_EL_QUAD_LAMBDA|FILL_EL_QUAD_DET;
2508 } else if (need & FILL_EL_QUAD_DET) {
2509 parametric->det(el_info, quad, -1, NULL, qelc->param.det);
2510 qelc->fill_flag |= FILL_EL_QUAD_DET;
2511 }
2512
2513 if (need & (FILL_EL_QUAD_WALL_DET |
2514 FILL_EL_QUAD_WALL_NORMAL |
2515 FILL_EL_QUAD_GRD_NORMAL |
2516 FILL_EL_QUAD_D2_NORMAL)) {
2517 DEBUG_TEST_EXIT(quad->codim == 1,
2518 "Wall normals make only sense for co-dim 1.\n");
2519
2520 wall = quad->subsplx;
2521
2522 if (need & FILL_EL_QUAD_D2_NORMAL) {
2523 parametric->wall_normal(el_info, wall, quad, -1, NULL,
2524 qelc->param.wall_normal,
2525 qelc->param.grd_normal,
2526 qelc->param.D2_normal,
2527 qelc->param.wall_det);
2528 qelc->fill_flag |=
2529 (FILL_EL_QUAD_WALL_DET |
2530 FILL_EL_QUAD_WALL_NORMAL |
2531 FILL_EL_QUAD_GRD_NORMAL |
2532 FILL_EL_QUAD_D2_NORMAL);
2533 } else if (need & FILL_EL_QUAD_GRD_NORMAL) {
2534 parametric->wall_normal(el_info, wall, quad, -1, NULL,
2535 qelc->param.wall_normal,
2536 qelc->param.grd_normal,
2537 NULL,
2538 qelc->param.wall_det);
2539 qelc->fill_flag |=
2540 (FILL_EL_QUAD_WALL_DET |
2541 FILL_EL_QUAD_WALL_NORMAL |
2542 FILL_EL_QUAD_GRD_NORMAL);
2543 } else if (need & FILL_EL_QUAD_WALL_NORMAL) {
2544 parametric->wall_normal(el_info, wall, quad, -1, NULL,
2545 qelc->param.wall_normal, NULL, NULL,
2546 qelc->param.wall_det);
2547 qelc->fill_flag |= FILL_EL_QUAD_WALL_DET|FILL_EL_QUAD_WALL_NORMAL;
2548 } else {
2549 parametric->wall_normal(el_info, wall, quad, -1, NULL,
2550 NULL /* no normals */, NULL, NULL,
2551 qelc->param.wall_det);
2552 qelc->fill_flag |= FILL_EL_QUAD_WALL_DET;
2553 }
2554 }
2555 }
2556
2557 return qelc;
2558 }
2559
2560 /* Compute the value of some really vector-valued basis function. */
2561 static inline const REAL *
phi_dow(REAL_D result,int i,const REAL_B lambda,const BAS_FCTS * thisptr)2562 phi_dow(REAL_D result,
2563 int i, const REAL_B lambda, const BAS_FCTS *thisptr)
2564 {
2565 AXEY_DOW(PHI(thisptr, i, lambda), PHI_D(thisptr, i, lambda), result);
2566 return result;
2567 }
2568
2569 /* Compute the barycentric gradient of some really vector-valued basis
2570 * function.
2571 */
2572 static inline const REAL_B *
grd_phi_dow(REAL_DB result,int i,const REAL_B lambda,const BAS_FCTS * thisptr)2573 grd_phi_dow(REAL_DB result,
2574 int i, const REAL_B lambda, const BAS_FCTS *thisptr)
2575 {
2576 int n;
2577 const REAL *grd_phi = GRD_PHI(thisptr, i, lambda);
2578 const REAL *phi_d = PHI_D(thisptr, i, lambda);
2579
2580 for (n = 0; n < DIM_OF_WORLD; n++) {
2581 AXEY_BAR(DIM_MAX, phi_d[n], grd_phi, result[n]);
2582 }
2583
2584 if (!thisptr->dir_pw_const) {
2585 REAL phi = PHI(thisptr, i, lambda);
2586 const REAL_B *grd_phi_d = GRD_PHI_D(thisptr, i, lambda);
2587
2588 for (n = 0; n < DIM_OF_WORLD; n++) {
2589 AXPY_BAR(DIM_MAX, phi, grd_phi_d[n], result[n]);
2590 }
2591 }
2592
2593 return (const REAL_B *)result;
2594 }
2595
2596 /* Compute the barycentric second derivatives of some really
2597 * vector-valued basis functions.
2598 */
2599
2600 #endif /* _ALBERT_INLINES_H_ */
2601