alberta_inlines.h - OpenGrok cross reference for /dports/math/alberta/alberta3-920315ae1bbfd1b1fb6672d916619ac37a411e95/alberta/src/Common/alberta_inlines.h

#ifndef _ALBERT_INLINES_H_
#define _ALBERT_INLINES_H_

/*--------------------------------------------------------------------------*/
/* ALBERT:   an Adaptive multi Level finite element toolbox using           */
/*           Bisectioning refinement and Error control by Residual          */
/*           Techniques                                                     */
/*                                                                          */
/* www.alberta-fem.de                                                       */
/*                                                                          */
/*--------------------------------------------------------------------------*/
/*                                                                          */
/* file: albert_inlines.h                                                   */
/*                                                                          */
/*                                                                          */
/* description: Blas-like inline functions for REAL_Ds and REAL_DDs,        */
/* REAL_Bs & friends.                                                       */
/*                                                                          */
/*--------------------------------------------------------------------------*/
/*                                                                          */
/*  authors:   Alfred Schmidt                                               */
/*             Zentrum fuer Technomathematik                                */
/*             Fachbereich 3 Mathematik/Informatik                          */
/*             Universitaet Bremen                                          */
/*             Bibliothekstr. 2                                             */
/*             D-28359 Bremen, Germany                                      */
/*                                                                          */
/*             Kunibert G. Siebert                                          */
/*             Istitut fuer Mathematik                                     */
/*             Universitaet Augsburg                                        */
/*             Universitaetsstr. 14                                         */
/*             D-86159 Augsburg, Germany                                    */
/*                                                                          */
/*             Claus-Justus Heine                                           */
/*             Abteilung fuer Angewandte Mathematik                         */
/*             Albert-Ludwigs-Universitaet Freiburg                         */
/*             Hermann-Herder-Str. 10                                       */
/*             D-79104 Freiburg im Breisgau, Germany                        */
/*                                                                          */
/*                                                                          */
/*  (c) by A. Schmidt, K.G. Siebert, C.-J. Heine (1996-2007)                */
/*                                                                          */
/*--------------------------------------------------------------------------*/

#include "alberta.h" /* essentially a no-op when included from alberta.h */

#ifndef DIM_OF_WORLD
# error Need to know the dimension of the World :)
#endif

/* multiple invocations of macro-arguments can be harmful, if the macro
 * argument is, e.g., a function-call.
 *
 * NOTE: as DIM_OF_WORLD is a constant, the C-compiler should unroll all
 * loops when compiling with optimizations, so there should be no need
 * for hand-unrolling, except in some simple 1D cases.
 *
 * Also, all modern compilers do function inlining, so the
 * function-call  over-head is _not_ a problem.
 *
 * Note: the function may be nested, they return the address of the
 * _modified_ operand. So AXPY(a, AX(b, x), y) is valid.
 */

/* In addition to BLAS-like routines for REAL_D vectors and REAL_DD
 * matrices this file also defines the access to the per-element
 * quadrature and geometry caches:
 *
 * fill_el_geom_cache()
 * fill_quad_el_cache()
 */

/* The following functions are defined here:
 *
 *   AX(a, x)              --  x *= a (alias SCAL_DOW is also defined)
 *   AXEY(a, x, y)         --  y  = a x
 *   AXPBY(a, x, b, y, z)  --  z  = a x + by
 *   AXPBYPCZ(a, x, b, y, c, z, w)  --  w  = a x + by + cz
 *   AXPBYP(a, x, b, y, z) --  z += a x + by
 *   AXPY(a, x, y)         --  y += a x
 *   COPY(src, dst)        --  dst := src
 *   DIST(x, y)            --  sqrt(DST2(x, y))
 *   DST2(x, y)            --  SCP(x-y, x-y)
 *   NRM2(x)               --  SCP(x, x)
 *   NORM(x)               --  sqrt(NRM2(x))
 *   NORM1(x)              --  fabs(x[0]) + ... + fabs(x[DOW-1])
 *   DIST1(x, y)           --  NORM1(x-y)
 *   PNORMP(x, p)          --  (pow(fabs(x[0]), p) + ... + pow(fabs(x[0]), p))
 *   NORMP(x, p)           --  pow(PNORMP(x), 1.0/p)
 *   NORM8(x)              --  max{fabs(x[0]), ..., fabs(x[DOW-1])} (8==infty)
 *   DIST8(x, y)           --  NORM8(x-y)
 *   SUM(x)                --  x[0] + ... + x[DOW-1]
 *   MTV(m, v, b)          --  b += m^t v
 *   MV(m, v, b)           --  b += m v   (m is a matrix)
 *   MDIV(m, v, b)         --  scale v by the inverse of the diagonal -> b
 *   SCP(x, y)             --  <x, y>
 *   SET(val, x)           --  x[i] = val, i=1, ..., DOW
 *   WEDGE(x, y, n)        --  n  = x /\ y             in 3D
 *   WEDGE(x, y)           --  x0 * y1 - x1 * y0       in 2D
 *
 * The actual function named is generated by adding a _DOW() suffix.
 *
 * Prefix        Version
 * none          REAL_D
 * M             REAL_DD
 * DM            diagonal matrix, diagonal stored in REAL_D vector
 * SCM           scalar matrix, data type REAL (albert.h)
 *
 * Further:
 * Macros EXPAND and FORMAT (with named pre- and suffixes) for easier
 * print-out of REAL_D and REAL_DD, use like this:
 *
 * printf("text"MFORMAT_DOW"more text\n", MEXPAND_DOW(m));
 *
 * Some more functions for barycentric coordinates. NOTE: works best
 * with constant dim. Use with care w.r.t. to optimization.
 *
 * SET_BAR(dim, a, x)
 * SCAL_BAR(dim, a, x)
 * SCP_BAR(dim, x, y)
 * AXPY_BAR(dim, a, x, y)
 * AXPBY_BAR(dim, a, x, b, y, z)
 * COPY_BAR(dim, from, to)
 * GRAD_DOW(dim, Lambda, b_grd, x_grd) -- conversion from barycentric
 *      to cartesian gradients. The function computes x_grd = b_grd Lambda.
 *
 * Some of these functions are also available as matrix versions
 * (e.g. MAXPY_BAR()).
 *
 * Further: to add matrices of higher symmetry to those of lower
 * symmetry the following functions exist. The first prefix is alway
 * the type of the destination matrix. Only the well defined functions
 * are implemented. Grin. (Hint: the limiting factor is the structure
 * of the destination matrix ...)
 *
 * {M,DM,SCM}{M,DM,SCM}AXPY_DOW(alpha, a, b)    b += alpha*a
 * {M,DM,SCM}{M,DM,SCM}AXEY_DOW(alpha, a, b)    b  = alpha*a
 * {M,DM,SCM}{M,DM,SCM}COPY_DOW(a, b)           b  = a
 *
 * Some more: DOW x DOW matrix multiplication etc.:
 *
 * MM_DOW(), MMT_DOW(), MTM_DOW(), MDET_DOW(), MINVERT_DOW()
 *
 */

static inline REAL POW_DOW(REAL a)
{
#if DIM_OF_WORLD == 0
  return 1.0;
#elif DIM_OF_WORLD == 1
  return a;
#elif DIM_OF_WORLD == 2
  return a*a;
#elif DIM_OF_WORLD == 3
  return a*a*a;
#else
  int i;
  REAL res = a;

  for (i = 1; i < DIM_OF_WORLD; i++) {
    res *= a;
  }
  return res;
#endif
}

#define SCAL_DOW(a, x) AX_DOW(a, x)
static inline REAL *AX_DOW(REAL a, REAL_D x)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    x[i] *= a;
  }
  return x;
}

#define MSCAL_DOW(a, m) MAX_DOW(a, m)
static inline REAL_D *MAX_DOW(REAL a, REAL_DD m)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    AX_DOW(a, m[i]);
  }
  return m;
}

#define DMAX_DOW(a, m)   AX_DOW(a, m)
#define DMSCAL_DOW(a, m) DMAX_DOW(a, m)

#define SCMAX_DOW(a, m)   (m) *= (a)
#define SCMSCAL_DOW(a, m) SCMAX_DOW(a, m)

static inline REAL *AXEY_DOW(REAL a, const REAL_D x, REAL_D y)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    y[i] = a * x[i];
  }
  return y;
}

static inline REAL_D *MAXEY_DOW(REAL a, const REAL_DD x, REAL_DD y)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    AXEY_DOW(a, x[i], y[i]);
  }
  return y;
}

#define DMAXEY_DOW(a, x, y)  AXEY_DOW(a, x, y)
#define SCMAXEY_DOW(a, x, y) (y) = (a)*(x)

static inline REAL *AXPY_DOW(REAL a, const REAL_D x, REAL_D y)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    y[i] += a * x[i];
  }
  return y;
}

static inline REAL_D *MAXPY_DOW(REAL a, const REAL_DD x, REAL_DD y)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    AXPY_DOW(a, x[i], y[i]);
  }
  return y;
}

/* same as above, but add the transposed matrix to y */
static inline REAL_D *MAXTPY_DOW(REAL a, const REAL_DD x, REAL_DD y)
{
  REAL tmp;
  int i, j;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    y[i][i] += a*x[i][i];
    for (j = i+1; j < DIM_OF_WORLD; j++) {
      tmp = x[i][j];
      y[i][j] += a*x[j][i];
      y[j][i] += a*tmp;
    }
  }
  return y;
}

#define DMAXPY_DOW(a, x, y)  AXPY_DOW(a, x, y)
#define DMAXTPY_DOW(a, x, y) DMAXPY_DOW(a, x, y) /* transpose of diagonal matrix :) */
#define SCMAXPY_DOW(a, x, y)  (y) += (a)*(x)
#define SCMAXTPY_DOW(a, x, y) SCMAXPY_DOW(a, x, y)

static inline REAL *AXPBY_DOW(REAL a, const REAL_D x, REAL b, const REAL_D y,
			      REAL_D z)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    z[i] = b*y[i] + a * x[i];
  }
  return z;
}

static inline REAL_D *MAXPBY_DOW(REAL a, const REAL_DD x,
				 REAL b, const REAL_DD y,
				 REAL_DD z)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    AXPBY_DOW(a, x[i], b, y[i], z[i]);
  }
  return z;
}

#define DMAXPBY_DOW(a, x, b, y, z)  AXPBY_DOW(a, x, b, y, z)
#define SCMAXPBY_DOW(a, x, b, y, z) (z) = (a)*(x) + (b)*(y)

static inline REAL *AXPBYPCZ_DOW(REAL a, const REAL_D x,
				 REAL b, const REAL_D y,
				 REAL c, const REAL_D z,
				 REAL_D w)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    w[i] = c*z[i] + b*y[i] + a * x[i];
  }
  return w;
}

static inline REAL_D *MAXPBYPCZ_DOW(REAL a, const REAL_DD x,
				    REAL b, const REAL_DD y,
				    REAL c, const REAL_DD z,
				    REAL_DD w)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    AXPBYPCZ_DOW(a, x[i], b, y[i], c, z[i], w[i]);
  }
  return w;
}

#define DMAXPBYPCZ_DOW(a, x, b, y, c, z, w)  AXPBYPCZ_DOW(a, x, b, y, c, z, w)
#define SCMAXPBYPCZ_DOW(a, x, b, y, c, z, w) ((w) = (a)*(x) + (b)*(y) + (c)*(z))

static inline REAL *AXPBYP_DOW(REAL a, const REAL_D x, REAL b, const REAL_D y,
			       REAL_D z)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    z[i] += b*y[i] + a * x[i];
  }
  return z;
}

static inline REAL_D *MAXPBYP_DOW(REAL a, const REAL_DD x,
				  REAL b, const REAL_DD y,
				  REAL_DD z)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    AXPBYP_DOW(a, x[i], b, y[i], z[i]);
  }
  return z;
}

#define DMAXPBYP_DOW(a, x, b, y, z)  AXPBYP_DOW(a, x, b, y, z)
#define SCMAXPBYP_DOW(a, x, b, y, z) ((z) += (a)*(x) + (b)*(y))

static inline REAL *AXPBYPCZP_DOW(REAL a, const REAL_D x,
				  REAL b, const REAL_D y,
				  REAL c, const REAL_D z,
				  REAL_D w)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    w[i] += c*z[i] + b*y[i] + a*x[i];
  }
  return w;
}

static inline REAL_D *MAXPBYPCZP_DOW(REAL a, const REAL_DD x,
				     REAL b, const REAL_DD y,
				     REAL c, const REAL_DD z,
				     REAL_DD w)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    AXPBYPCZP_DOW(a, x[i], b, y[i], c, z[i], w[i]);
  }
  return w;
}

#define DMAXPBYPCZP_DOW(a, x, b, y, z)		\
  AXPBYPCZP_DOW(a, x, b, y, z)
#define SCMAXPBYPCZP_DOW(a, x, b, y, c, z, w)	\
  ((w) += (a)*(x) + (b)*(y) + (c)*(z))

/***********************/
/* Matrix - Matrix addition */
#define MMAXPY_DOW(s, a, b)      MAXPY_DOW(s, a, b)
#define DMDMAXPY_DOW(s, a, b)    DMAXPY_DOW(s, a, b)
#define SCMSCMAXPY_DOW(s, a, b)  SCMAXPY_DOW(s, a, b)

/* Transpose addition, only different for the MM case */
#define MMAXTPY_DOW(s, a, b)     MAXTPY_DOW(s, a, b)
#define MDMAXTPY_DOW(s, a, b)    MDMAXPY_DOW(s, a, b)
#define MSCMAXTPY_DOW(s, a, b)   MSCMAXPY_DOW(s, a, b)

#define DMDMAXTPY_DOW(s, a, b)   DMAXPY_DOW(s, a, b)
#define DMSCMAXTPY_DOW(s, a, b)  DMSCMAXPY_DOW(s, a, b)

#define SCMSCMAXTPY_DOW(s, a, b) SCMAXPY_DOW(s, a, b)

static inline REAL_D *MDMAXPY_DOW(REAL a, const REAL_D x, REAL_DD y)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    y[i][i] += a * x[i];
  }

  return y;
}

static inline REAL_D *MSCMAXPY_DOW(REAL a, const REAL x, REAL_DD y)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    y[i][i] += a * x;
  }

  return y;
}

static inline REAL *DMSCMAXPY_DOW(REAL a, REAL x, REAL *y)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    y[i] += a*x;
  }

  return y;
}

/* Matrix - Matrix initialization */
#define MMAXEY_DOW(s, a, b)     MAXEY_DOW(s, a, b)
#define DMDMAXEY_DOW(s, a, b)   DMAXEY_DOW(s, a, b)
#define SCMSCMAXEY_DOW(s, a, b) SCMAXEY_DOW(s, a, b)

static inline REAL_D *MDMAXEY_DOW(REAL a, const REAL_D x, REAL_DD y)
{
  int i, j;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    y[i][i] = a * x[i];
    for (j = i+1; j < DIM_OF_WORLD; j++) {
      y[i][j] = y[j][i] = 0.0;
    }
  }

  return y;
}

static inline REAL_D *MSCMAXEY_DOW(REAL a, const REAL x, REAL_DD y)
{
  int i, j;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    y[i][i] = a * x;
    for (j = i+1; j < DIM_OF_WORLD; j++) {
      y[i][j] = y[j][i] = 0.0;
    }
  }

  return y;
}

static inline REAL *DMSCMAXEY_DOW(REAL a, REAL x, REAL *y)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    y[i] = a*x;
  }

  return y;
}

/* Matrix - Matrix initialization */
#define MMMAXPBY_DOW(a, x, b, y, z)      MAXPBY_DOW(a, x, b, y, z)
#define DMDMDMAXPBY_DOW(a, x, b, y, z)    DMAXPBY_DOW(a, x, b, y, z)
#define SCMSCMSCMAXPBY_DOW(a, x, b, y, z)  SCMAXPBY_DOW(a, x, b, y, z)

static inline REAL_D *MMDMAXPBY_DOW(REAL a, const REAL_DD x,
				REAL b, const REAL_D y,
				REAL_DD z)
{
  int i, j;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    z[i][i] = a * x[i][i] + b * y[i];
    for (j = i+1; j < DIM_OF_WORLD; j++) {
    	z[i][j] = a * x[i][j];
    	z[j][i] = a * x[j][i];
    }
  }

  return z;
}

static inline REAL_D *MMSCMAXPBY_DOW(REAL a, const REAL_DD x,
				REAL b, const REAL y,
				REAL_DD z)
{
  int i, j;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    z[i][i] = a * x[i][i] + b * y;
    for (j = i+1; j < DIM_OF_WORLD; j++) {
    	z[i][j] = a * x[i][j];
    	z[j][i] = a * x[j][i];
    }
  }

  return z;
}

static inline REAL_D *MDMDMAXPBY_DOW(REAL a, const REAL_D x,
				REAL b, const REAL_D y,
				REAL_DD z)
{
  int i, j;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    z[i][i] = a * x[i] + b * y[i];
    for (j = i+1; j < DIM_OF_WORLD; j++) {
    	z[i][j] = z[j][i] = 0.0;
    }
  }

  return z;
}

static inline REAL_D *MDMSCMAXPBY_DOW(REAL a, REAL *x,
				REAL b, REAL y,
				REAL_DD z)
{
  int i, j;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    z[i][i] = a * x[i] + b * y;
    for (j = i+1; j < DIM_OF_WORLD; j++) {
    	z[i][j] = z[j][i] = 0.0;
    }
  }

  return z;
}

static inline REAL_D *MSCMSCMAXPBY_DOW(REAL a, REAL x,
				REAL b, REAL y,
				REAL_DD z)
{
  int i, j;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    z[i][i] = a * x + b * y;
    for (j = i+1; j < DIM_OF_WORLD; j++) {
    	z[i][j] = z[j][i] = 0.0;
    }
  }

  return z;
}

static inline REAL *DMDMSCMAXPBY_DOW(REAL a, const REAL_D x,
				REAL b, const REAL y,
				REAL_D z)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    z[i] = a * x[i] + b * y;
  }
  return z;
}

static inline REAL *DMSCMSCMAXPBY_DOW(REAL a, const REAL x,
				REAL b, const REAL y,
				REAL_D z)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    z[i] = a * x + b * y;
  }
  return z;
}

/* Matrix - Matrix copy */
#define MMCOPY_DOW(a, b)     MCOPY_DOW(a, b)
#define MDMCOPY_DOW(a, b)    MDMAXEY_DOW(1.0, a, b)
#define MSCMCOPY_DOW(a, b)   MSCMAXEY_DOW(1.0, a, b)

#define DMDMCOPY_DOW(a, b)   DMCOPY_DOW(a, b)
#define DMSCMCOPY_DOW(a, b)  DMSCMAXEY_DOW(1.0, a, b)

#define SCMSCMCOPY_DOW(a, b) (b) = (a)

/***********************/

static inline REAL *COPY_DOW(const REAL_D x, REAL_D y)
{
  memcpy(y, x, sizeof(REAL_D));
  return y;
}

static inline REAL_D *MCOPY_DOW(const REAL_DD x, REAL_DD y)
{
  memcpy(y, x, sizeof(REAL_DD));
  return y;
}

#define DMCOPY_DOW(src, dst) COPY_DOW(src, dst)

static inline REAL DST2_DOW(const REAL_D x, const REAL_D y)
{
# if DIM_OF_WORLD == 1
  return SQR(ABS(x[0] - y[0]));
# else
  int i;
  REAL accu;

  accu = SQR(x[0] - y[0]);
  for (i = 1; i < DIM_OF_WORLD; i++) {
    accu += SQR(x[i] - y[i]);
  }
  return accu;
# endif
}

static inline REAL MDST2_DOW(const REAL_DD a, const REAL_DD b)
{
  int  i;
  REAL res;

  res = DST2_DOW(a[0], b[0]);
  for (i = 1; i < DIM_OF_WORLD; i++) {
    res += DST2_DOW(a[i], b[i]);
  }
  return res;
}

#define DMDST2_DOW(x, y) DST2_DOW(x, y)

static inline REAL NRM2_DOW(const REAL_D x)
{
  int i;
  REAL accu;

  accu = SQR(x[0]);
  for (i = 1; i < DIM_OF_WORLD; i++) {
    accu += SQR(x[i]);
  }
  return accu;
}

static inline REAL MNRM2_DOW(const REAL_DD m)
{
  int  i;
  REAL res;

  res = NRM2_DOW(m[0]);
  for (i = 1; i < DIM_OF_WORLD; i++) {
    res += NRM2_DOW(m[i]);
  }
  return res;
}

#define DMNRM2_DOW(m) NRM2_DOW(x)

static inline REAL NORM1_DOW(const REAL_D x)
{
  int i;
  REAL sum;

  sum = fabs(x[0]);
  for (i = 1; i < DIM_OF_WORLD; i++) {
    sum += fabs(x[i]);
  }

  return sum;
}

static inline REAL DIST1_DOW(const REAL_D x, const REAL_D y)
{
  int i;
  REAL sum;

  sum = fabs(x[0]-y[0]);
  for (i = 1; i < DIM_OF_WORLD; i++) {
    sum += fabs(x[i]-y[i]);
  }

  return sum;
}

static inline REAL NORM8_DOW(const REAL_D x)
{
  int i;
  REAL max;

  max = fabs(x[0]);
  for (i = 1; i < DIM_OF_WORLD; i++) {
    max = MAX(max, fabs(x[i]));
  }

  return max;
}

static inline REAL DIST8_DOW(const REAL_D x, const REAL_D y)
{
  int i;
  REAL max;

  max = fabs(x[0]-y[0]);
  for (i = 1; i < DIM_OF_WORLD; i++) {
    max = MAX(max, fabs(x[i]-y[i]));
  }

  return max;
}

static inline REAL SUM_DOW(const REAL_D x)
{
  int i;
  REAL sum;

  sum = x[0];
  for (i = 1; i < DIM_OF_WORLD; i++) {
    sum += x[i];
  }

  return sum;
}

static inline REAL PNRMP_DOW(const REAL_D x, REAL p)
{
  int i;
  REAL sum;

  sum = pow(fabs(x[0]), p);
  for (i = 1; i < DIM_OF_WORLD; i++) {
    sum += pow(fabs(x[i]), p);
  }

  return sum;
}

static inline REAL NRMP_DOW(const REAL_D x, REAL p)
{
  return pow(PNRMP_DOW(x, p), 1.0/p);
}

static inline REAL MNORM1_DOW(const REAL_DD x)
{
  int i;
  REAL sum;

  sum = NORM1_DOW(x[0]);
  for (i = 1; i < DIM_OF_WORLD; i++) {
    sum += NORM1_DOW(x[0]);
  }

  return sum;
}

static inline REAL MDIST1_DOW(const REAL_DD x, const REAL_DD y)
{
  int i;
  REAL sum;

  sum = DIST1_DOW(x[0], y[0]);
  for (i = 1; i < DIM_OF_WORLD; i++) {
    sum += DIST1_DOW(x[i], y[i]);
  }

  return sum;
}

static inline REAL MNORM8_DOW(const REAL_DD x)
{
  int i;
  REAL max;

  max = NORM8_DOW(x[0]);
  for (i = 1; i < DIM_OF_WORLD; i++) {
    max = MAX(max, NORM8_DOW(x[i]));
  }

  return max;
}

static inline REAL MDIST8_DOW(const REAL_DD x, const REAL_DD y)
{
  int i;
  REAL max;

  max = DIST8_DOW(x[0], y[0]);
  for (i = 1; i < DIM_OF_WORLD; i++) {
    max = MAX(max, DIST8_DOW(x[i], y[i]));
  }

  return max;
}

static inline REAL MSUM_DOW(const REAL_DD x)
{
  int i;
  REAL sum;

  sum = SUM_DOW(x[0]);
  for (i = 1; i < DIM_OF_WORLD; i++) {
    sum += SUM_DOW(x[0]);
  }

  return sum;
}

static inline REAL MPNRMP_DOW(const REAL_DD x, REAL p)
{
  int i;
  REAL sum;

  sum = PNRMP_DOW(x[0], p);
  for (i = 1; i < DIM_OF_WORLD; i++) {
    sum += PNRMP_DOW(x[i], p);
  }

  return sum;
}

static inline REAL MNRMP_DOW(const REAL_DD x, REAL p)
{
  return pow(MPNRMP_DOW(x, p), 1.0/p);
}

static inline REAL SCP_DOW(const REAL_D x, const REAL_D y)
{
  REAL res;
  int  i;

  res = x[0] * y[0];
  for (i = 1; i < DIM_OF_WORLD; i++) {
    res += x[i]*y[i];

  }
  return res;
}

static inline REAL GRAMSCP_DOW(const REAL_DD M, const REAL_D x, const REAL_D y)
{
  REAL res = 0.0;
  int i, j;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    for (j = 0; j < DIM_OF_WORLD; j++) {
      res += x[i] * M[i][j] * y[j];
    }
  }
  return res;
}
#define MGRAMSCP_DOW(M, x, y) GRAMSCP_DOW(M, x, y)

static inline REAL DMGRAMSCP_DOW(const REAL_D M, const REAL_D x, const REAL_D y)
{
  REAL res = 0.0;
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    res += x[i] * M[i] * y[i];
  }
  return res;
}

static inline REAL SCMGRAMSCP_DOW(REAL s, const REAL_D x, const REAL_D y)
{
  return s*SCP_DOW(x, y);
}

static inline REAL *MTV_DOW(const REAL_DD m, const REAL_D v, REAL_D b)
{
  int i, j;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    for (j = 0; j < DIM_OF_WORLD; j++) {
      b[i] += m[j][i] * v[j];
    }
  }
  return b;
}

#define SCMV_DOW(m, v, b)  AXPY_DOW(m, v, b)
#define DMTV_DOW(m, v, b)  DMV_DOW(m, v, b)
#define SCMTV_DOW(m, v, b) SCMV_DOW(m, v, b)

static inline REAL *MDIV_DOW(const REAL_DD m, const REAL_D v, REAL_D b)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    b[i] = v[i] / m[i][i];
  }
  return b;
}

static inline REAL *DMDIV_DOW(const REAL_D m, const REAL_D y, REAL_D r)
{
  int i;
  for (i = 0; i < DIM_OF_WORLD; i++) {
    r[i] = y[i] / m[i];
  }
  return r;
}

#define SCMDIV_DOW(m, y, r) AXEY_DOW(1.0/(m), y, r)

static inline REAL *DMV_DOW(const REAL_D x, const REAL_D y, REAL_D r)
{
  int i;
  for (i = 0; i < DIM_OF_WORLD; i++) {
    r[i] += x[i]*y[i];
  }
  return r;
}

static inline REAL *MV_DOW(const REAL_DD m, const REAL_D v, REAL_D b)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    b[i] += SCP_DOW(m[i], v);
  }
  return b;
}

static inline REAL *MVEQ_DOW(const REAL_DD m, const REAL_D v, REAL_D b)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    b[i] = SCP_DOW(m[i], v);
  }
  return b;
}

static inline REAL *MTVEQ_DOW(const REAL_DD m, const REAL_D v, REAL_D b)
{
  int i, j;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    b[i] = 0.0;
    for (j = 0; j < DIM_OF_WORLD; j++) {
      b[i] += m[j][i] * v[j];
    }
  }
  return b;
}

static inline REAL *DMVEQ_DOW(const REAL_D x, const REAL_D y, REAL_D r)
{
  int i;
  for (i = 0; i < DIM_OF_WORLD; i++) {
    r[i] = x[i]*y[i];
  }
  return r;
}

#define SCMVEQ_DOW(m, v, b)  AXEY_DOW(m, v, b)

static inline REAL *
MMBIMV_DOW(REAL a, const REAL_DD A, REAL b, const REAL_DD B, const REAL_D v,
	   REAL c, REAL_D w)
{
  int i, j;
  REAL sum;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    sum = 0.0;
    for (j = 0; j < DIM_OF_WORLD; j++) {
      sum += (a * A[i][j] + b * B[i][j]) * v[j];
    }
    w[i] = c*w[i] + sum;
  }
  return w;
}

static inline REAL *
MDMBIMV_DOW(REAL a, const REAL_DD A, REAL b, const REAL_D B, const REAL_D v,
	    REAL c, REAL_D w)
{
  int i, j;
  REAL sum;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    sum = 0.0;
    for (j = 0; j < DIM_OF_WORLD; j++) {
      sum += a * A[i][j] * v[j];
    }
    w[i] = c*w[i] + sum + b * B[i] * v[i];
  }
  return w;
}

static inline REAL *
DMMBIMV_DOW(REAL a, const REAL_D A, REAL b, const REAL_DD B, const REAL_D v,
	    REAL c, REAL_D w)
{
  return MDMBIMV_DOW(b, B, a, A, v, c, w);
}

static inline REAL *
MSCMBIMV_DOW(REAL a, const REAL_DD A, REAL b, REAL B, const REAL_D v,
	     REAL c, REAL_D w)
{
  int i, j;
  REAL sum;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    sum = 0.0;
    for (j = 0; j < DIM_OF_WORLD; j++) {
      sum += a * A[i][j] * v[j];
    }
    w[i] = c*w[i] + sum + b * B * v[i];
  }
  return w;
}


static inline REAL *
SCMMBIMV_DOW(REAL a, REAL A, REAL b, const REAL_DD B, const REAL_D v,
	     REAL c, REAL_D w)
{
  return MSCMBIMV_DOW(b, B, a, A, v, c, w);
}

static inline REAL *
DMDMBIMV_DOW(REAL a, const REAL_D A, REAL b, const REAL_D B, const REAL_D v,
	     REAL c, REAL_D w)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    w[i] = c*w[i] + (a * A[i] + b * B[i]) * v[i];
  }
  return w;
}

static inline REAL *
DMSCMBIMV_DOW(REAL a, const REAL_D A, REAL b, REAL B, const REAL_D v,
	      REAL c, REAL_D w)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    w[i] = c*w[i] + (a * A[i] + b * B) * v[i];
  }
  return w;
}

static inline REAL *
SCMDMBIMV_DOW(REAL a, REAL A, REAL b, const REAL_D B, const REAL_D v,
	      REAL c, REAL_D w)
{
  return DMSCMBIMV_DOW(b, B, a, A, v, c, w);
}

static inline REAL *
SCMSCMBIMV_DOW(REAL a, REAL A, REAL b, REAL B, const REAL_D v,
	       REAL c, REAL_D w)
{
  return AXPBY_DOW(a*A + b*B, v, c, w, w);
}

static inline REAL *MGEMV_DOW(REAL a, const REAL_DD m,
			      const REAL_D v, REAL beta,
			      REAL_D b)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    b[i] = beta*b[i] + a * SCP_DOW(m[i], v);
  }
  return b;
}

/* Same as above, but without diagonal. */
static inline REAL *MGEMV_ND_DOW(REAL a, const REAL_DD m,
			      const REAL_D v, REAL beta,
			      REAL_D b)
{
  int i, j;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    REAL tmp = 0.0;
    for (j = 0; j < DIM_OF_WORLD; j++) {
      if (i == j) {
        continue;
      }
      tmp += m[i][j] * v[j];
    }
    b[i] = beta*b[i] + a * tmp;
  }
  return b;
}

#define GEMV_DOW(a, m, v, beta, b) MGEMV_DOW(a, m, v, beta, b)
#define GEMV_ND_DOW(a, m, v, beta, b) MGEMV_ND_DOW(a, m, v, beta, b)

static inline REAL *DMGEMV_DOW(REAL a, const REAL_D x, const REAL_D y,
			       REAL beta, REAL_D r)
{
  int i;
  for (i = 0; i < DIM_OF_WORLD; i++) {
    r[i] = beta*r[i] + a*x[i]*y[i];
  }
  return r;
}

static inline REAL *DMGEMV_ND_DOW(REAL a, const REAL_D x, const REAL_D y,
                                  REAL beta, REAL_D r)
{
  int i;
  for (i = 0; i < DIM_OF_WORLD; i++) {
    r[i] *= beta;
  }
  return r;
}

static inline REAL *MGEMTV_DOW(REAL a, const REAL_DD m,
			       const REAL_D v, REAL beta,
			       REAL_D b)
{
  int i, j;
  REAL tmp;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    b[i] *= beta;
    tmp = m[0][i] * v[0];
    for (j = 1; j < DIM_OF_WORLD; j++) {
      tmp += m[j][i] * v[j];
    }
    b[i] += a*tmp;
  }
  return b;
}

#define GEMTV_DOW(a, m, v, beta, b) MGEMTV_DOW(a, m, v, beta, b)

static inline REAL *SCMGEMV_DOW(REAL a, REAL m, const REAL_D v, REAL beta,
				REAL_D b)
{
  int i;

  m *= a;
  for (i = 0; i < DIM_OF_WORLD; i++) {
    b[i] *= beta;
    b[i] += m*v[i];
  }
  return b;
}

static inline REAL *SCMGEMV_ND_DOW(REAL a, REAL m, const REAL_D v, REAL beta,
                                   REAL_D b)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    b[i] *= beta;
  }
  return b;
}

#define DMGEMTV_DOW(a, m, v, beta, b)  DMGEMV_DOW(a, m, v, beta, b)
#define SCMGEMTV_DOW(a, m, v, beta, b) SCMGEMV_DOW(a, m, v, beta, b)

static inline REAL MSCP_DOW(const REAL_DD x, const REAL_DD y)
{
  REAL res;
  int  i;

  res = SCP_DOW(x[0], y[0]);
  for (i = 1; i < DIM_OF_WORLD; i++) {
    res += SCP_DOW(x[i], y[i]);
  }
  return res;
}

#define DMSCP_DOW(x, y) SCP_DOW(x, y)

static inline REAL *SET_DOW(REAL val, REAL_D x)
{
  int i;
  for (i = 0; i < DIM_OF_WORLD; i++) {
    x[i] = val;
  }
  return x;
}

static inline REAL_D *MSET_DOW(REAL val, REAL_DD m)
{
  int i, j;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    m[i][i] = val;
    for (j = i+1; j < DIM_OF_WORLD; j++) {
      m[j][i] = m[i][j] = 0.0;
    }
  }
  return m;
}

#define DMSET_DOW(val, m) SET_DOW(val, m)
#define SCMSET_DOW(val, m) (m) = (val)

static inline bool CMP_DOW(REAL val, const REAL_D a)
{
  int i;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    if (a[i] != val) {
      return false;
    }
  }
  return true;
}

static inline bool MCMP_DOW(REAL val, const REAL_DD a)
{
  int i, j;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    if (a[i][i] != val) {
      return false;
    }
    for (j = i+1; j < DIM_OF_WORLD; j++) {
      if (a[i][j] != 0.0 || a[j][i] != 0.0) {
	return false;
      }
    }
  }
  return true;
}

#define DMCMP_DOW(val, m) CMP_DOW(val, m)
#define SCMCMP_DOW(val, m) ((m) == (val))

#if DIM_OF_WORLD == 2
static inline REAL WEDGE_DOW(const REAL_D a, const REAL_D b)
{
  return a[0]*b[1] - a[1]*b[0];
}
#endif

#if DIM_OF_WORLD == 3
static inline REAL *WEDGE_DOW(const REAL_D a, const REAL_D b, REAL_D r)
{
  r[0] = a[1]*b[2] - a[2]*b[1];
  r[1] = a[2]*b[0] - a[0]*b[2];
  r[2] = a[0]*b[1] - a[1]*b[0];
  return r;
}
#endif

#define MAT_SWITCH_TYPE(type, body_f, body_d, body_sc)		\
  switch (type) {						\
  case MATENT_REAL_DD: body_f; break;				\
  case MATENT_REAL_D: body_d; break;				\
  case MATENT_REAL: body_sc; break;				\
  default: ERROR_EXIT("Unknown MATENT_TYPE (%d)\n", type);	\
}

/* MAT_BODY(PFX, CONSTCAST, CAST, SUF, TYPE) is supposed to be a
 * "multiplex" macro where BLAS routines are accessed via
" * PFX##AXPY_DOW(..., CONSTCAST var##SUF, ...)
 *
 * PFX is one of M, DM, SCM
 * CAST and CONSTCAST specify type-casts s.t. the ...._DOW() functions
 *   compile without error.
 * SUF is the suffix attached to some block-matrix types, one of
 *   real, real_d, real_dd
 * TYPE is the actual type corresponding to SUF, one of
 *   REAL, RELA_D, REAL_DD
 */
#define MAT_EMIT_BODY_SWITCH(type)				\
  MAT_SWITCH_TYPE(						\
    type,							\
    MAT_BODY(M,  (const REAL_D *), (REAL_D *), real_dd, REAL_DD),	\
    MAT_BODY(DM,                 ,           , real_d, REAL_D),	\
    MAT_BODY(SCM,                ,           , real, REAL))


/* BI_MAT_BODY(PFX1, CONSTCAST1, CAST1, SUF1, TYPE1,
 *             PFX2, CONSTCAST2, CAST2, SUF2, TYPE2)
 *
 * is supposed to be a "multiplex" macro where BLAS routines are
 * accessed via " * PFX##AXPY_DOW(..., CONSTCAST var##SUF, ...) etc.
 */
#define MAT_EMIT_BI_BODY_SWITCH(type1, type2)				\
  MAT_SWITCH_TYPE(							\
    type1,								\
    MAT_SWITCH_TYPE(							\
      type2,								\
      MAT_BI_BODY(M,  (const REAL_D *), (REAL_D *), real_dd, REAL_DD,	\
		  M,  (const REAL_D *), (REAL_D *), real_dd, REAL_DD),	\
      MAT_BI_BODY(M,  (const REAL_D *), (REAL_D *), real_dd, REAL_DD,	\
		  DM,                 ,           , real_d, REAL_D),	\
      MAT_BI_BODY(M,  (const REAL_D *), (REAL_D *), real_dd, REAL_DD,	\
		  SCM,                ,           , real, REAL)),	\
    MAT_SWITCH_TYPE(							\
      type2,								\
      MAT_BI_BODY(DM,                 ,           , real_d, REAL_D,	\
		  M,  (const REAL_D *), (REAL_D *), real_dd, REAL_DD),	\
      MAT_BI_BODY(DM,                 ,           , real_d, REAL_D,	\
		  DM,                 ,           , real_d, REAL_D),	\
      MAT_BI_BODY(DM,                 ,           , real_d, REAL_D,	\
		  SCM,                ,           , real, REAL)),	\
    MAT_SWITCH_TYPE(							\
      type2,								\
      MAT_BI_BODY(SCM,                ,           , real, REAL,		\
		  M,  (const REAL_D *), (REAL_D *), real_dd, REAL_DD),	\
      MAT_BI_BODY(SCM,                ,           , real, REAL,		\
		  DM,                 ,           , real_d, REAL_D),	\
      MAT_BI_BODY(SCM,                ,           , real, REAL,		\
		  SCM,                ,           , real, REAL)))

/* TRI_MAT_BODY(PFX1, CONSTCAST1, CAST1, SUF1, TYPE1,
 *              PFX2, CONSTCAST2, CAST2, SUF2, TYPE2)
 *
 * is supposed to be a "multiplex" macro where BLAS routines are
 * accessed via " * PFX##AXPY_DOW(..., CONSTCAST var##SUF, ...) etc.
 */
#define MAT_EMIT_TRI_BODY_SWITCH(type1, type2)				\
  MAT_SWITCH_TYPE(							\
    type1,								\
    MAT_SWITCH_TYPE(							\
      type2,								\
      MAT_TRI_BODY(M,  (const REAL_D *), (REAL_D *), real_dd, REAL_DD,	\
		   M,  (const REAL_D *), (REAL_D *), real_dd, REAL_DD),	\
      MAT_TRI_BODY(M,  (const REAL_D *), (REAL_D *), real_dd, REAL_DD,	\
		   DM,                 ,           , real_d, REAL_D),	\
      MAT_TRI_BODY(M,  (const REAL_D *), (REAL_D *), real_dd, REAL_DD,	\
		   SCM,                ,           , real, REAL)),	\
    if (type2 == MATENT_REAL_D) {					\
      MAT_TRI_BODY(DM,                 ,           , real_d, REAL_D,	\
		   DM,                 ,           , real_d, REAL_D);	\
    } else if (type2 == MATENT_REAL) {					\
      MAT_TRI_BODY(DM,                 ,           , real_d, REAL_D,	\
		   SCM,                ,           , real, REAL);	\
    },									\
    if (type2 == MATENT_REAL) {						\
      MAT_TRI_BODY(SCM,                ,           , real, REAL,	\
		   SCM,                ,           , real, REAL);	\
    })


/* defines where only DOW == 1 plays a special role */
# if DIM_OF_WORLD == 1
#  define DIST_DOW(x,y)    ABS((x)[0]-(y)[0])
#  define NORM_DOW(x)      ABS((x)[0])
#  define MNRM_DOW(m)      ABS((m)[0][0])
#  define DMNRM_DOW(m)     NRM_DOW(m)
#  define MDIST_DOW(a,b)   ABS((a)[0][0] - (b)[0][0])
#  define DMDIST_DOW(a,b)  DIST_DOW(a, b)
#  define SCMDIST_DOW(a,b) ABS((a)-(b))
# else
#  define NORM_DOW(x)      sqrt(NRM2_DOW(x))
#  define DIST_DOW(x,y)    sqrt(DST2_DOW(x, y))
#  define MNORM_DOW(m)     sqrt(MNRM2_DOW(m))
#  define DMNORM_DOW(m)    sqrt(DMNRM2_DOW(m))
#  define MDIST_DOW(a,b)   sqrt(MDST2_DOW(a, b))
#  define DMDIST_DOW(a,b)  sqrt(DMDST2_DOW(a, b))
#  define SCMDIST_DOW(a,b) sqrt(SCMDST2_DOW(a, b))
# endif

/* defines different for all DOWs */
# if   DIM_OF_WORLD == 1
#  define EXPAND_DOW(x)       (x)[0]
#  define FORMAT_DOW          "%10.5le"
#  define SCAN_FORMAT_DOW     "%f"
#  define SCAN_EXPAND_DOW(v)  &(v)[0]
#  define MEXPAND_DOW(m)      (m)[0][0]
#  define SCAN_MFORMAT_DOW    "%f %f"
#  define SCAN_MEXPAND_DOW(m) &(m)[0][0]
#  define MFORMAT_DOW         FORMAT_DOW
#  define DMEXPAND_DOW(m)     EXPAND_DOW(m)
#  define DMFORMAT_DOW        FORMAT_DOW
#  define SCMEXPAND_DOW(m)    (m)
#  define SCMFORMAT_DOW       "[%10.5le]"
# elif DIM_OF_WORLD == 2
#  define EXPAND_DOW(x)       (x)[0], (x)[1]
#  define FORMAT_DOW          "[%10.5le, %10.5le]"
#  define SCAN_FORMAT_DOW     "%f %f"
#  define SCAN_EXPAND_DOW(v)  &(v)[0], &(v)[1]
#  define MEXPAND_DOW(m)      EXPAND_DOW((m)[0]), EXPAND_DOW((m)[1])
#  define SCAN_MEXPAND_DOW(m) SCAN_EXPAND_DOW((m)[0]), SCAN_EXPAND_DOW((m)[1])
#  define SCAN_MFORMAT_DOW    SCAN_FORMAT_DOW SCAN_FORMAT_DOW
#  define MFORMAT_DOW         "[" FORMAT_DOW ", " FORMAT_DOW "]"
#  define DMEXPAND_DOW(m)     EXPAND_DOW(m)
#  define DMFORMAT_DOW        FORMAT_DOW
#  define SCMEXPAND_DOW(m)    (m)
#  define SCMFORMAT_DOW       "[%10.5le]"
# elif DIM_OF_WORLD == 3
#  define EXPAND_DOW(x)      (x)[0], (x)[1], (x)[2]
#  define FORMAT_DOW         "[%10.5le, %10.5le, %10.5le]"
#  define SCAN_FORMAT_DOW    "%f %f %f"
#  define SCAN_EXPAND_DOW(v) &(v)[0], &(v)[1], &(v)[2]
#  define MEXPAND_DOW(m)					\
  EXPAND_DOW((m)[0]), EXPAND_DOW((m)[1]), EXPAND_DOW((m)[2])
#  define SCAN_MEXPAND_DOW(m)						\
  SCAN_EXPAND_DOW((m)[0]), SCAN_EXPAND_DOW((m)[1]), SCAN_EXPAND_DOW((m)[2])
#  define MFORMAT_DOW      "[" FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW "]"
#  define SCAN_MFORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW
#  define DMEXPAND_DOW(m)  EXPAND_DOW(m)
#  define DMFORMAT_DOW     FORMAT_DOW
#  define SCMEXPAND_DOW(m) (m)
#  define SCMFORMAT_DOW    "[%10.5le]"
# elif DIM_OF_WORLD == 4
#  define EXPAND_DOW(x)  (x)[0], (x)[1], (x)[2], (x)[3]
#  define FORMAT_DOW     "[%10.5le, %10.5le, %10.5le, %10.5le]"
#  define SCAN_FORMAT_DOW     "%f %f %f %f"
#  define SCAN_EXPAND_DOW(v) &(v)[0], &(v)[1], &(v)[2], &(v)[3]
#  define MEXPAND_DOW(m)						\
  EXPAND_DOW((m)[0]), EXPAND_DOW((m)[1]), EXPAND_DOW((m)[2]), EXPAND_DOW((m)[3])
#  define SCAN_MEXPAND_DOW(m)				\
  SCAN_EXPAND_DOW((m)[0]), SCAN_EXPAND_DOW((m)[1]),	\
    SCAN_EXPAND_DOW((m)[2]), SCAN_EXPAND_DOW((m)[3])
#  define MFORMAT_DOW						\
  "[" FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW "]"
#  define SCAN_MFORMAT_DOW				\
  SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW
#  define DMEXPAND_DOW(m) EXPAND_DOW(m)
#  define DMFORMAT_DOW    FORMAT_DOW
#  define SCMEXPAND_DOW(m) (m)
#  define SCMFORMAT_DOW   "[%10.5le]"
# elif DIM_OF_WORLD == 5
#  define EXPAND_DOW(x)      (x)[0], (x)[1], (x)[2], (x)[3], (x)[4]
#  define FORMAT_DOW         "[%10.5le, %10.5le, %10.5le, %10.5le, %10.5le]"
#  define SCAN_FORMAT_DOW    "%f %f %f %f %f"
#  define SCAN_EXPAND_DOW(v) &(v)[0], &(v)[1], &(v)[2], &(v)[3], &(v)[4]
#  define MEXPAND_DOW(m)					\
  EXPAND_DOW((m)[0]), EXPAND_DOW((m)[1]), EXPAND_DOW((m)[2]),	\
    EXPAND_DOW((m)[3]), EXPAND_DOW((m)[4])
#  define SCAN_MEXPAND_DOW(m)						\
  SCAN_EXPAND_DOW((m)[0]), SCAN_EXPAND_DOW((m)[1]), SCAN_EXPAND_DOW((m)[2]), \
    SCAN_EXPAND_DOW((m)[3]), SCAN_EXPAND_DOW((m)[4])
#  define MFORMAT_DOW							\
 "[" FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW "]"
#  define SCAN_MFORMAT_DOW				\
  SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW	\
  SCAN_FORMAT_DOW SCAN_FORMAT_DOW
#  define DMEXPAND_DOW(m) EXPAND_DOW(m)
#  define DMFORMAT_DOW    FORMAT_DOW
#  define SCMEXPAND_DOW(m) (m)
#  define SCMFORMAT_DOW   "[%10.5le]"
# elif DIM_OF_WORLD == 6
#  define EXPAND_DOW(x)  (x)[0], (x)[1], (x)[2], (x)[3], (x)[4], (x)[5]
#  define FORMAT_DOW						\
#  define SCAN_FORMAT_DOW     "%f %f %f %f %f %f"
#  define SCAN_EXPAND_DOW(v)				\
  &(v)[0], &(v)[1], &(v)[2], &(v)[3], &(v)[4], &(v)[5]
#  define MFORMAT_DOW						\
  "[%10.5le, %10.5le, %10.5le, %10.5le, %10.5le, %10.5le]"
#  define MEXPAND_DOW(m) \
  EXPAND_DOW((m)[0]), EXPAND_DOW((m)[1]), EXPAND_DOW((m)[2]),	\
    EXPAND_DOW((m)[3]), EXPAND_DOW((m)[4]), EXPAND_DOW((m)[5])
#  define SCAN_MEXPAND_DOW(m) \
  SCAN_EXPAND_DOW((m)[0]), SCAN_EXPAND_DOW((m)[1]), SCAN_EXPAND_DOW((m)[2]), \
    SCAN_EXPAND_DOW((m)[3]), SCAN_EXPAND_DOW((m)[4]), SCAN_EXPAND_DOW((m)[5])
#  define MFORMAT_DOW				\
  "[" FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", "	\
      FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW "]"
#  define SCAN_MFORMAT_DOW				\
  SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW	\
  SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW
#  define DMEXPAND_DOW(m)  EXPAND_DOW(m)
#  define DMFORMAT_DOW     FORMAT_DOW
#  define SCMEXPAND_DOW(m) (m)
#  define SCMFORMAT_DOW    "[%10.5le]"
# elif DIM_OF_WORLD == 7
#  define EXPAND_DOW(x)						\
  (x)[0], (x)[1], (x)[2], (x)[3], (x)[4], (x)[5], (x)[6]
#  define FORMAT_DOW							\
  "[%10.5le, %10.5le, %10.5le, %10.5le, %10.5le, %10.5le, %10.5le]"
#  define SCAN_FORMAT_DOW     "%f %f %f %f %f %f %f"
#  define SCAN_EXPAND_DOW(v)					\
  &(v)[0], &(v)[1], &(v)[2], &(v)[3], &(v)[4], &(v)[5], &(v)[6]
#  define MEXPAND_DOW(m)						\
  EXPAND_DOW((m)[0]), EXPAND_DOW((m)[1]), EXPAND_DOW((m)[2]),		\
    EXPAND_DOW((m)[3]), EXPAND_DOW((m)[4]), EXPAND_DOW((m)[5]),		\
    EXPAND_DOW((m)[6])
#  define SCAN_MEXPAND_DOW(m)						\
  SCAN_EXPAND_DOW((m)[0]), SCAN_EXPAND_DOW((m)[1]), SCAN_EXPAND_DOW((m)[2]), \
    SCAN_EXPAND_DOW((m)[3]), SCAN_EXPAND_DOW((m)[4]), SCAN_EXPAND_DOW((m)[5]) \
    SCAN_EXPAND_DOW((m)[6])
#  define MFORMAT_DOW						\
  "[" FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", "	\
      FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW "]"
#  define SCAN_MFORMAT_DOW						\
  SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW	\
  SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW
#  define DMEXPAND_DOW(m) EXPAND_DOW(m)
#  define DMFORMAT_DOW    FORMAT_DOW
#  define SCMEXPAND_DOW(m) (m)
#  define SCMFORMAT_DOW   "[%10.5le]"
# elif DIM_OF_WORLD == 8
#  define EXPAND_DOW(x)						\
  (x)[0], (x)[1], (x)[2], (x)[3], (x)[4], (x)[5], (x)[6], (x)[7]
#  define FORMAT_DOW							\
  "[%10.5le, %10.5le, %10.5le, %10.5le, %10.5le, %10.5le, %10.5le, %10.5le]"
#  define SCAN_FORMAT_DOW     "%f %f %f %f %f %f %f %f"
#  define SCAN_EXPAND_DOW(v)				\
  &(v)[0], &(v)[1], &(v)[2], &(v)[3], &(v)[4], &(v)[5], &(v)[6], &(v)[7]
#  define MEXPAND_DOW(m)						\
  EXPAND_DOW((m)[0]), EXPAND_DOW((m)[1]), EXPAND_DOW((m)[2]),		\
    EXPAND_DOW((m)[3]), EXPAND_DOW((m)[4]), EXPAND_DOW((m)[5]),		\
    EXPAND_DOW((m)[6]), EXPAND_DOW((m)[7])
#  define SCAN_MEXPAND_DOW(m)						\
  SCAN_EXPAND_DOW((m)[0]), SCAN_EXPAND_DOW((m)[1]), SCAN_EXPAND_DOW((m)[2]), \
    SCAN_EXPAND_DOW((m)[3]), SCAN_EXPAND_DOW((m)[4]), SCAN_EXPAND_DOW((m)[5]) \
    SCAN_EXPAND_DOW((m)[6]), SCAN_EXPAND_DOW((m)[7])
#  define MFORMAT_DOW				\
  "[" FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", "\
      FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW "]"
#  define SCAN_MFORMAT_DOW						\
  SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW	\
  SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW
#  define DMEXPAND_DOW(m)  EXPAND_DOW(m)
#  define DMFORMAT_DOW     FORMAT_DOW
#  define SCMEXPAND_DOW(m) (m)
#  define SCMFORMAT_DOW    "[%10.5le]"
# elif DIM_OF_WORLD == 9
#  define EXPAND_DOW(x)						\
  (x)[0], (x)[1], (x)[2], (x)[3], (x)[4], (x)[5], (x)[6], (x)[7], (x)[8]
#  define FORMAT_DOW							\
  "[%10.5le, %10.5le, %10.5le, %10.5le, "				\
  "%10.5le, %10.5le, %10.5le, %10.5le, %10.5le]"
#  define SCAN_FORMAT_DOW     "%f %f %f %f %f %f %f %f %f"
#  define SCAN_EXPAND_DOW(v)				\
  &(v)[0], &(v)[1], &(v)[2], &(v)[3], &(v)[4],		\
    &(v)[5], &(v)[6], &(v)[7], &(v)[8]
#  define MEXPAND_DOW(m)						\
  EXPAND_DOW((m)[0]), EXPAND_DOW((m)[1]), EXPAND_DOW((m)[2]),		\
    EXPAND_DOW((m)[3]), EXPAND_DOW((m)[4]), EXPAND_DOW((m)[5]),		\
    EXPAND_DOW((m)[6]), EXPAND_DOW((m)[7]), EXPAND_DOW((m)[8])
#  define SCAN_MEXPAND_DOW(m)						\
  SCAN_EXPAND_DOW((m)[0]), SCAN_EXPAND_DOW((m)[1]), SCAN_EXPAND_DOW((m)[2]), \
    SCAN_EXPAND_DOW((m)[3]), SCAN_EXPAND_DOW((m)[4]), SCAN_EXPAND_DOW((m)[5]) \
    SCAN_EXPAND_DOW((m)[6]), SCAN_EXPAND_DOW((m)[7]), SCAN_EXPAND_DOW((m)[8])
#  define MFORMAT_DOW				\
  "[" FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW ", "\
      FORMAT_DOW ", " FORMAT_DOW ", " FORMAT_DOW "]"
#  define SCAN_MFORMAT_DOW						\
  SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW	\
  SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW SCAN_FORMAT_DOW	\
  SCAN_FORMAT_DOW
#  define DMEXPAND_DOW(m) EXPAND_DOW(m)
#  define DMFORMAT_DOW    FORMAT_DOW
#  define SCMEXPAND_DOW(m) (m)
#  define SCMFORMAT_DOW   "[%10.5le]"
# endif

/* Some inline functions for barycentric coordinates, and conversion
 * between barycentric gradients and cartesian gradients.
 */

/** x = a */
static inline const REAL *SET_BAR(int dim, REAL a, REAL_B x)
{
  int i;

  for (i = 0; i < N_LAMBDA(dim); i++) {
    x[i] = a;
  }
  for (; i < N_LAMBDA_MAX; i++) {
    x[i] = 0.0;
  }
  return x;
}

static inline const REAL_B *MSET_BAR(int dim, REAL a, REAL_BB x)
{
  int i;

  for (i = 0; i < N_LAMBDA(dim); i++) {
    SET_BAR(dim, a, x[i]);
  }
  return (const REAL_B *)x;
}

/** x *= a */
static inline const REAL *SCAL_BAR(int dim, REAL a, REAL_B x)
{
  int i;

  for (i = 0; i < N_LAMBDA(dim); i++) {
    x[i] *= a;
  }
  return x;
}

static inline const REAL_B *MSCAL_BAR(int dim, REAL a, REAL_BB x)
{
  int i;

  for (i = 0; i < N_LAMBDA(dim); i++) {
    SCAL_BAR(dim, a, x[i]);
  }
  return (const REAL_B *)x;
}

/** z = a*x + b*y, x, y, z are barycentric coordinate tuples.
 */
static inline REAL SCP_BAR(int dim, const REAL_B x, const REAL_B y)
{
  int i;
  REAL res;

  res = x[0] * y[0];
  for (i = 1; i < N_LAMBDA(dim); i++) {
    res += x[i] * y[i];
  }
  return res;
}

/** y = a*x,  x and y are barycentric coordinate tuples.
 */
static inline const REAL *AXEY_BAR(int dim, REAL a, const REAL_B x, REAL_B y)
{
  int i;

  for (i = 0; i < N_LAMBDA(dim); i++) {
    y[i] = a * x[i];
  }
  return y;
}

static inline const REAL_B *MAXEY_BAR(int dim,
				      REAL a, const REAL_BB x, REAL_BB y)
{
  int i;

  for (i = 0; i < N_LAMBDA(dim); i++) {
    AXEY_BAR(dim, a, x[i], y[i]);
  }
  return (const REAL_B *)y;
}

/** y += a*x,  x and y are barycentric coordinate tuples.
 */
static inline const REAL *AXPY_BAR(int dim, REAL a, const REAL_B x, REAL_B y)
{
  int i;

  for (i = 0; i < N_LAMBDA(dim); i++) {
    y[i] += a * x[i];
  }
  return y;
}

static inline const REAL_B *MAXPY_BAR(int dim,
				      REAL a, const REAL_BB x, REAL_BB y)
{
  int i;

  for (i = 0; i < N_LAMBDA(dim); i++) {
    AXPY_BAR(dim, a, x[i], y[i]);
  }
  return (const REAL_B *)y;
}

/** z = a*x + b*y, x, y, z are barycentric coordinate tuples.
 */
static inline const REAL *AXPBY_BAR(int dim,
				    REAL a, const REAL_B x,
				    REAL b, const REAL_B y,
				    REAL_B z)
{
  int i;

  for (i = 0; i < N_LAMBDA(dim); i++) {
    z[i] = b*y[i] + a * x[i];
  }
  return z;
}

static inline const REAL_B *MAXPBY_BAR(int dim,
				       REAL a, const REAL_BB x,
				       REAL b, const REAL_BB y,
				       REAL_BB z)
{
  int i;

  for (i = 0; i < N_LAMBDA(dim); i++) {
    AXPBY_BAR(dim, a, x[i], b, y[i], z[i]);
  }
  return (const REAL_B *)z;
}

static inline const REAL *AXPBYPCZ_BAR(int dim,
				       REAL a, const REAL_B x,
				       REAL b, const REAL_B y,
				       REAL c, const REAL_B z,
				       REAL_B w)
{
  int i;

  for (i = 0; i < N_LAMBDA(dim); i++) {
    w[i] = a * x[i] + b*y[i] + c*z[i];
  }
  return w;
}

/** b = a, a and b are barycentric coordinate tuples.
 */
static inline const REAL *COPY_BAR(int dim, const REAL_B a, REAL_B b)
{
  int i;

  for (i = 0; i < N_LAMBDA(dim); i++) {
    b[i] = a[i];
  }
  return b;
}

static inline const REAL_B *MCOPY_BAR(int dim, const REAL_BB a, REAL_BB b)
{
  int i;

  for (i = 0; i < N_LAMBDA(dim); i++) {
    COPY_BAR(dim, a[i], b[i]);
  }
  return (const REAL_B *)b;
}

/** Convert a barycentric gradient to a world gradient, given the
 * gradient of the transformation to the reference element. (VM means
 * vector-matrix).
 */
__FORCE_INLINE_ATTRIBUTE__
static inline const REAL *GRAD_DOW(int dim,
				   const REAL_BD Lambda,
				   const REAL_B b_grd,
				   REAL_D x_grd)
{
  static REAL_D res;
  int i, j;

  if (__UNLIKELY__(x_grd == NULL)) {
    x_grd = res;
  }

  for (i = 0; i < DIM_OF_WORLD; i++) {
    x_grd[i] = b_grd[0] * Lambda[0][i];
    for (j = 1; j < N_LAMBDA(dim); j++) {
      x_grd[i] += b_grd[j] * Lambda[j][i];
    }
  }

  return x_grd;
}

__FORCE_INLINE_ATTRIBUTE__
static inline const REAL_D *MGRAD_DOW(int dim,
				      const REAL_BD Lambda,
				      const REAL_DB b_grd,
				      REAL_DD x_grd)
{
  static REAL_DD res;
  int i;

  if (__UNLIKELY__(x_grd == NULL)) {
    x_grd = res;
  }

  for (i = 0; i < DIM_OF_WORLD; i++) {
    GRAD_DOW(dim, Lambda, b_grd[i], x_grd[i]);
  }

  return (const REAL_D *)x_grd;
}

/* Compute the divergence */
static inline REAL DIV_DOW(int dim, const REAL_BD Lambda, const REAL_DB b_grd)
{
  REAL div = 0.0;
  int i, alpha;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    for (alpha = 0; alpha < N_LAMBDA(dim); ++alpha) {
      div += Lambda[alpha][i] * b_grd[i][alpha];
    }
  }

  return div;
}

__FORCE_INLINE_ATTRIBUTE__
static inline const REAL *GRAD_P_DOW(int dim,
				     const REAL_BD Lambda,
				     const REAL_B b_grd,
				     REAL_D x_grd)
{
  static REAL_D res;
  int i, j;

  if (__UNLIKELY__(x_grd == NULL)) {
    x_grd = res;
  }

  for (i = 0; i < DIM_OF_WORLD; i++) {
    for (j = 0; j < N_LAMBDA(dim); j++) {
      x_grd[i] += b_grd[j] * Lambda[j][i];
    }
  }

  return x_grd;
}

__FORCE_INLINE_ATTRIBUTE__
static inline const REAL_D *MGRAD_P_DOW(int dim,
					const REAL_BD Lambda,
					const REAL_DB b_grd,
					REAL_DD x_grd)
{
  static REAL_DD res;
  int i;

  if (__UNLIKELY__(x_grd == NULL)) {
    x_grd = res;
  }

  for (i = 0; i < DIM_OF_WORLD; i++) {
    GRAD_P_DOW(dim, Lambda, b_grd[i], x_grd[i]);
  }

  return (const REAL_D *)x_grd;
}


/** Convert a barycentric Hesse matrix to a world Hesse matrix, given
 * the gradient of the transformation to the reference element.
 */
__FORCE_INLINE_ATTRIBUTE__
static inline const REAL_D *D2_DOW(int dim,
				   const REAL_BD Lambda,
				   const REAL_BB b_hesse,
				   REAL_DD x_hesse)
{
  static REAL_DD res;
  int i, j, k, l;

  if (__UNLIKELY__(x_hesse == NULL)) {
    x_hesse = res;
  }

  for (i = 0; i < DIM_OF_WORLD; i++) {
    x_hesse[i][i] = 0.0;
    for (k = 0; k < N_LAMBDA(dim); k++) {
      x_hesse[i][i] += Lambda[k][i] * b_hesse[k][k] * Lambda[k][i];
      for (l = k+1; l < N_LAMBDA(dim); l++) {
	x_hesse[i][i] +=  2.0 * Lambda[k][i] * b_hesse[k][l] * Lambda[l][i];
      }
    }
    for (j = i+1; j < DIM_OF_WORLD; j++) {
      x_hesse[i][j] = 0.0;
      for (k = 0; k < N_LAMBDA(dim); k++) {
	x_hesse[i][j] += Lambda[k][i] * b_hesse[k][k] * Lambda[k][j];
	for (l = k+1; l < N_LAMBDA(dim); l++) {
	  x_hesse[i][j] += b_hesse[k][l]*(Lambda[k][i] * Lambda[l][j]
					  +
					  Lambda[l][i] * Lambda[k][j]);
	}
      }
      x_hesse[j][i] = x_hesse[i][j];
    }
  }

  return (const REAL_D *)x_hesse;
}

__FORCE_INLINE_ATTRIBUTE__
static inline const REAL_DD *MD2_DOW(int dim,
				     const REAL_BD Lambda,
				     const REAL_BB *b_hesse,
				     REAL_DDD x_hesse)
{
  static REAL_DDD res;
  int i;

  if (__UNLIKELY__(x_hesse == NULL)) {
    x_hesse = res;
  }

  for (i = 0; i < DIM_OF_WORLD; i++) {
    D2_DOW(dim, Lambda, b_hesse[i], x_hesse[i]);
  }

  return (const REAL_DD *)x_hesse;
}

__FORCE_INLINE_ATTRIBUTE__
static inline const REAL_D *D2_P_DOW(int dim,
				     const REAL_BD Lambda,
				     const REAL_BB b_hesse,
				     REAL_DD x_hesse)
{
  static REAL_DD res;
  int i, j, k, l;

  if (__UNLIKELY__(x_hesse == NULL)) {
    x_hesse = res;
  }

  for (i = 0; i < DIM_OF_WORLD; i++) {
    for (k = 0; k < N_LAMBDA(dim); k++) {
      x_hesse[i][i] += Lambda[k][i] * b_hesse[k][k] * Lambda[k][i];
      for (l = k+1; l < N_LAMBDA(dim); l++) {
	x_hesse[i][i] +=  2.0 * Lambda[k][i] * b_hesse[k][l] * Lambda[l][i];
      }
    }
    for (j = i+1; j < DIM_OF_WORLD; j++) {
      REAL tmp = 0.0;
      for (k = 0; k < N_LAMBDA(dim); k++) {
	tmp += Lambda[k][i] * b_hesse[k][k] * Lambda[k][j];
	for (l = k+1; l < N_LAMBDA(dim); l++) {
	  tmp += b_hesse[k][l]*(Lambda[k][i] * Lambda[l][j]
					  +
					  Lambda[l][i] * Lambda[k][j]);
	}
      }
      x_hesse[i][j] += tmp;
      x_hesse[j][i] += tmp;
    }
  }

  return (const REAL_D *)x_hesse;
}

__FORCE_INLINE_ATTRIBUTE__
static inline const REAL_DD *MD2_P_DOW(int dim,
				       const REAL_BD Lambda,
				       const REAL_BB *b_hesse,
				       REAL_DDD x_hesse)
{
  static REAL_DDD res;
  int i;

  if (__UNLIKELY__(x_hesse == NULL)) {
    x_hesse = res;
  }

  for (i = 0; i < DIM_OF_WORLD; i++) {
    D2_P_DOW(dim, Lambda, b_hesse[i], x_hesse[i]);
  }

  return (const REAL_DD *)x_hesse;
}

/**Compute the Laplacian from a given Hessian in barycentric co-ordinates. */
static inline REAL LAPLACE_DOW(int dim,
			       const REAL_BD Lambda,
			       const REAL_BB b_hesse)
{
  REAL res = 0.0;
  int i, k, l;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    for (k = 0; k < N_LAMBDA(dim); k++) {
      res += Lambda[k][i] * b_hesse[k][k] * Lambda[k][i];
      for (l = k+1; l < N_LAMBDA(dim); l++) {
	res +=  2.0 * Lambda[k][i] * b_hesse[k][l] * Lambda[l][i];
      }
    }
  }

  return res;
}

__FORCE_INLINE_ATTRIBUTE__
static inline const REAL *MLAPLACE_DOW(int dim,
				       const REAL_BD Lambda,
				       const REAL_BB *b_hesse,
				       REAL_D laplace)
{
  static REAL_D res;
  int i;

  if (__UNLIKELY__(laplace == NULL)) {
    laplace = res;
  }

  for (i = 0; i < DIM_OF_WORLD; i++) {
    laplace[i] = LAPLACE_DOW(dim, Lambda, b_hesse[i]);
  }

  return (const REAL *)laplace;
}

__FORCE_INLINE_ATTRIBUTE__
static inline const REAL *MLAPLACE_P_DOW(int dim,
					 const REAL_BD Lambda,
					 const REAL_BB *b_hesse,
					 REAL_D laplace)
{
  static REAL_D res;
  int i;

  if (__UNLIKELY__(laplace == NULL)) {
    laplace = res;
  }

  for (i = 0; i < DIM_OF_WORLD; i++) {
    laplace[i] += LAPLACE_DOW(dim, Lambda, b_hesse[i]);
  }

  return (const REAL *)laplace;
}

/** Convert a cartesian gradient to a barycentric gradient, given the
 * vertices of the element (in the parametric case "coords" is just
 * the barycentric gradient of the coordinate functions).
 */
__FORCE_INLINE_ATTRIBUTE__
static inline const REAL *GRAD_BAR(int dim,
				   const REAL_D *coords,
				   const REAL_D x_grd,
				   REAL_B b_grd)
{
  static REAL_B res;
  int i;

  if (__UNLIKELY__(b_grd == NULL)) {
    b_grd = res;
  }

  for (i = 0; i < N_LAMBDA(dim); i++) {
    b_grd[i] = SCP_DOW(x_grd, coords[i]);
  }

  return b_grd;
}

__FORCE_INLINE_ATTRIBUTE__
static inline const REAL_B *MGRAD_BAR(int dim,
				      const REAL_D *coords,
				      const REAL_DD x_grd,
				      REAL_DB b_grd)
{
  static REAL_B res[DIM_OF_WORLD];
  int i;

  if (__UNLIKELY__(b_grd == NULL)) {
    b_grd = res;
  }

  for (i = 0; i < DIM_OF_WORLD; i++) {
    GRAD_BAR(dim, coords, x_grd[i], b_grd[i]);
  }

  return (const REAL_B *)b_grd;
}

__FORCE_INLINE_ATTRIBUTE__
static inline const REAL_B *D2_BAR(int dim,
				   const REAL_D *coords,
				   const REAL_DD x_D2,
				   REAL_BB b_D2)
{
  static REAL_BB res;
  int i, j, k, l;

  if (__UNLIKELY__(b_D2 == NULL)) {
    b_D2 = res;
  }

  for (i = 0; i < N_LAMBDA(dim); i++) {
    b_D2[i][i] = 0.0;
    for (k = 0; k < DIM_OF_WORLD; k++) {
      b_D2[i][i] += SQR(coords[i][k])*x_D2[k][k];
      for (l = k+1; l < DIM_OF_WORLD; l++) {
	b_D2[i][i] += coords[i][k]*2.0*x_D2[k][l]*coords[i][l];
      }
    }
    for (j = i+1; j < N_LAMBDA(dim); j++) {
      b_D2[i][j] = 0.0;
      for (k = 0; k < DIM_OF_WORLD; k++) {
	b_D2[i][j] += coords[i][k]*x_D2[k][k]*coords[j][k];
	for (l = k+1; l < DIM_OF_WORLD; l++) {
	  b_D2[i][j] += x_D2[k][l] * (coords[i][k] * coords[j][l]
				      +
				      coords[i][l] * coords[j][k]);
	}
      }
      b_D2[j][i] = b_D2[i][j];
    }
  }
  return (const REAL_B *)b_D2;
}

__FORCE_INLINE_ATTRIBUTE__
static inline const REAL_BB *MD2_BAR(int dim,
				     const REAL_D *coords,
				     const REAL_DD *x_D2,
				     REAL_BB *b_D2)
{
  static REAL_BB res[DIM_OF_WORLD];
  int i;

  if (__UNLIKELY__(b_D2 == NULL)) {
    b_D2 = res;
  }

  for (i = 0; i < DIM_OF_WORLD; i++) {
    D2_BAR(dim, coords, x_D2[i], b_D2[i]);
  }

  return (const REAL_BB *)b_D2;
}

static inline REAL_D *MM_DOW(const REAL_DD a,
			     const REAL_DD b, REAL_DD c)
{
  int i, j, k;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    for (j = 0; j < DIM_OF_WORLD; j++) {
      c[i][j] = 0.0;
      for (k = 0; k < DIM_OF_WORLD; k++) {
	c[i][j] += a[i][k]*b[k][j];
      }
    }
  }
  return c;
}
static inline REAL_D *MMT_DOW(const REAL_DD a,
			      const REAL_DD b, REAL_DD c)
{
  int i, j, k;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    for (j = 0; j < DIM_OF_WORLD; j++) {
      c[i][j] = 0.0;
      for (k = 0; k < DIM_OF_WORLD; k++) {
	c[i][j] += a[i][k]*b[j][k];
      }
    }
  }
  return c;
}
static inline REAL_D *MTM_DOW(const REAL_DD a,
			      const REAL_DD b,
			      REAL_DD c)
{
  int i, j, k;

  for (i = 0; i < DIM_OF_WORLD; i++) {
    for (j = 0; j < DIM_OF_WORLD; j++) {
      c[i][j] = a[0][i]*b[j][0];
      for (k = 1; k < DIM_OF_WORLD; k++) {
	c[i][j] += a[k][i]*b[j][k];
      }
    }
  }
  return c;
}

#if DIM_OF_WORLD == 1
static inline REAL MDET_DOW(const REAL_DD m)
{
  return m[0][0];
}
static inline REAL MINVERT_DOW(const REAL_DD m, REAL_DD mi)
{
  if (mi[0][0] != 0.0) {
    mi[0][0] = 1.0/m[0][0];
  }

  return m[0][0];
}
#elif DIM_OF_WORLD == 2
static inline REAL MDET_DOW(const REAL_DD m)
{
  return m[0][0]*m[1][1] - m[1][0]*m[0][1];
}
static inline REAL MINVERT_DOW(const REAL_DD m, REAL_DD mi)
{
  REAL det = m[0][0]*m[1][1] - m[1][0]*m[0][1];

  if (det != 0.0) {
    mi[0][0] = m[1][1] / det;
    mi[1][1] = m[0][0] / det;
    mi[0][1] = -m[0][1] / det;
    mi[1][0] = -m[1][0] / det;
  }

  return det;
}
#elif DIM_OF_WORLD == 3
static inline REAL MDET_DOW(const REAL_DD m)
{
#if 0
  int j;
  REAL_D tmp;
  REAL det;

  det = 0;
  for (j = 0; j < DIM_OF_WORLD; j++) {
    WEDGE_DOW(m[(j+1)%DIM_OF_WORLD], m[(j+2)%DIM_OF_WORLD], tmp);
    det += SCP_DOW(tmp, m[j]);
  }
  return det / (REAL)DIM_OF_WORLD;
#else
  return
    +(m[1][1]*m[2][2] - m[2][1]*m[1][2]) * m[0][0]
    -(m[0][1]*m[2][2] - m[2][1]*m[0][2]) * m[1][0]
    +(m[0][1]*m[1][2] - m[1][1]*m[0][2]) * m[2][0];
#endif
}
static inline REAL MINVERT_DOW(const REAL_DD m, REAL_DD mi)
{
#if 0
  int i, j;
  REAL_D tmp;

  for (j = 0; j < DIM_OF_WORLD; j++) {
    WEDGE_DOW(m[(j+1)%DIM_OF_WORLD], m[(j+2)%DIM_OF_WORLD], tmp);
    SCAL_DOW(1.0/SCP_DOW(tmp, m[j]), tmp);
    for (i = 0; i < DIM_OF_WORLD; i++) {
      mi[i][j] = tmp[i];
    }
  }
  return mi;
#else
  REAL det;
  int i;

  mi[0][0] = +(m[1][1]*m[2][2] - m[2][1]*m[1][2]);
  mi[0][1] = -(m[0][1]*m[2][2] - m[2][1]*m[0][2]);
  mi[0][2] = +(m[0][1]*m[1][2] - m[1][1]*m[0][2]);

  det = mi[0][0] * m[0][0] + mi[0][1] * m[1][0] + mi[0][2]*m[2][0];

  if (det != 0.0) {
    for (i = 0; i < DIM_OF_WORLD; i++) {
      mi[0][i] /= det;
    }

    mi[1][0] = (m[1][2]*m[2][0] - m[1][0]*m[2][2])/det;
    mi[1][1] = (m[0][0]*m[2][2] - m[2][0]*m[0][2])/det;
    mi[1][2] = (m[1][0]*m[0][2] - m[0][0]*m[1][2])/det;

    mi[2][0] = (m[1][0]*m[2][1] - m[1][1]*m[2][0])/det;
    mi[2][1] = (m[2][0]*m[0][1] - m[0][0]*m[2][1])/det;
    mi[2][2] = (m[0][0]*m[1][1] - m[0][1]*m[1][0])/det;
  }

  return det;
#endif
}
#else
static inline REAL MDET_DOW(const REAL_DD m)
{
  FUNCNAME("MDET_DOW");
  REAL_DD tmpM;
  REAL_D  b;
  REAL det;
  int i;

  SET_DOW(0.0, b);    /* dummy */
  MCOPY_DOW(m, tmpM); /* destructive Gauss destroys M */

  square_gauss((REAL *)tmpM, b, b, DIM_OF_WORLD, 1);

  det = 1.0;
  for (i = 0; i < DIM_OF_WORLD; ++i) {
    det *= tmpM[i][i];
  }

  return det;
}

static inline REAL_D *MINVERT_DOW(const REAL_DD m, REAL_DD mi)
{
  FUNCNAME("MINVERT_DOW");
  REAL_DD tmpM, b;

  MSET_DOW(1.0, b);   /* unit matrix */
  MCOPY_DOW(m, tmpM); /* destructive Gauss destroys M */

  square_gauss((REAL *)tmpM, (REAL *)b, (REAL *)mi, DIM_OF_WORLD, DIM_OF_WORLD);

  return mi;
}
#endif

static const AFF_TRAFO aff_identity = {
  { { 1.0, },
#if DIM_OF_WORLD > 1
    { 0.0, 1.0, },
#endif
#if DIM_OF_WORLD > 2
    { 0.0, 0.0, 1.0, },
#endif
#if DIM_OF_WORLD > 3
    { 0.0, 0.0, 0.0, 1.0, },
#endif
#if DIM_OF_WORLD > 4
    { 0.0, 0.0, 0.0, 0.0, 1.0, },
#endif
#if DIM_OF_WORLD > 5
    { 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, },
#endif
#if DIM_OF_WORLD > 6
    { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, },
#endif
#if DIM_OF_WORLD > 7
    { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, },
#endif
#if DIM_OF_WORLD > 8
    { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, },
#endif
  },
  { 0.0 }
};

static inline const AFF_TRAFO *AFFINE_IDENTITY(void)
{
  return &aff_identity;
}

static inline void GET_AFF_TRAFO(int info, const char *key, AFF_TRAFO *T)
{
  GET_PARAMETER(info, key,
		SCAN_MFORMAT_DOW SCAN_FORMAT_DOW,
		SCAN_MEXPAND_DOW(T->M), SCAN_EXPAND_DOW(T->t));
}

static inline REAL *AFFINE_DOW(const AFF_TRAFO *trafo,
			       const REAL_D x,
			       REAL_D y)
{
  SET_DOW(0.0, y);
  MV_DOW((const REAL_D *)trafo->M, x, y);
  AXPY_DOW(1.0, trafo->t, y);

  return y;
}

/* Apply the inverse of the affine transformation. trafo->M is
 * assumed to be orthogonal.
 */
static inline REAL *AFFINV_DOW(const AFF_TRAFO *trafo,
			       const REAL_D x,
			       REAL_D y)
{
  REAL_D tmp = { 0.0, };

  MTV_DOW((const REAL_D *)trafo->M, trafo->t, tmp);
  SET_DOW(0.0, y);
  MTV_DOW((const REAL_D *)trafo->M, x, y);
  AXPY_DOW(-1.0, tmp, y);

  return y;
}

static inline AFF_TRAFO *INVAFF_DOW(const AFF_TRAFO *A,
				    AFF_TRAFO *B)
{
  int i, j;

  SET_DOW(0.0, B->t);
  MTV_DOW((const REAL_D *)A->M, A->t, B->t);
  SCAL_DOW(-1.0, B->t);
  for (i = 0; i < DIM_OF_WORLD; i++ ) {
    for (j = 0; j < DIM_OF_WORLD; j++) {
      B->M[i][j] = A->M[j][i];
    }
  }
  return B;
}

static inline AFF_TRAFO *AFFAFF_DOW(const AFF_TRAFO *A,
				    const AFF_TRAFO *B,
				    AFF_TRAFO *C)
{
  MM_DOW(A->M, B->M, C->M);
  COPY_DOW(A->t, C->t);
  MV_DOW(A->M, B->t, C->t);

  return C;
}

/* filling of the geometry and the quadrature cache */

static inline const EL_GEOM_CACHE *
fill_el_geom_cache(const EL_INFO *el_info, FLAGS fill_flag)
{
  EL_GEOM_CACHE *elgc;
  FLAGS need;
  int dim, wall;

  elgc = (EL_GEOM_CACHE *)&el_info->el_geom_cache;

  if (elgc->current_el != el_info->el) {
    elgc->fill_flag = 0;
    elgc->current_el = el_info->el;
  }

  if (!(need = (elgc->fill_flag ^ fill_flag) & fill_flag)) {
    return elgc;
  }

  dim = el_info->mesh->dim;

  if (need & FILL_EL_LAMBDA) {
    elgc->det = el_grd_lambda_dim(dim, el_info, elgc->Lambda);
    elgc->fill_flag |= FILL_EL_LAMBDA|FILL_EL_DET;
  } else if (need & FILL_EL_DET) {
    elgc->det = el_det_dim(dim, el_info);
    elgc->fill_flag |= FILL_EL_DET;
  }

  for (wall = 0; wall < N_WALLS_MAX; wall++) {
    if (need & FILL_EL_WALL_ORIENTATION(wall)) {
      EL  *el    = el_info->el;
      elgc->orientation[wall][0] = wall_orientation(dim, el, wall);
      if ((el_info->fill_flag & FILL_NEIGH) && el_info->neigh[wall]) {
	EL  *neigh = el_info->neigh[wall];
	int oppv   = el_info->opp_vertex[wall];

	elgc->orientation[wall][1] = wall_orientation(dim, neigh, oppv);
      } else {
	elgc->orientation[wall][1] = -1;
      }
      elgc->fill_flag |= FILL_EL_WALL_ORIENTATION(wall);
    }
    if (need & FILL_EL_WALL_REL_ORIENTATION(wall)) {
      DEBUG_TEST_FLAG(FILL_NEIGH, el_info);
      if (el_info->neigh[wall]) {
	EL  *el    = el_info->el;
	EL  *neigh = el_info->neigh[wall];
	int oppv   = el_info->opp_vertex[wall];
	elgc->rel_orientation[wall] =
	  wall_rel_orientation(dim, el, neigh, wall, oppv);
	elgc->fill_flag |= FILL_EL_WALL_REL_ORIENTATION(wall);
      }
    }
    if (need & (FILL_EL_WALL_NORMAL(wall)|FILL_EL_WALL_DET(wall))) {
      elgc->wall_det[wall] =
	get_wall_normal_dim(dim, el_info, wall, elgc->wall_normal[wall]);
      elgc->fill_flag |= FILL_EL_WALL_NORMAL(wall)|FILL_EL_WALL_DET(wall);
    }
  }

  return elgc;
}

/* Fill the quadrature cache for the given element. Maybe this should
 * be made an inline function. This function does not call
 * parametric->init_element(); it is assumed that this has been done
 * before if necessary. We also do not call any INIT_ELEMENT() method.
 */
static inline const QUAD_EL_CACHE *fill_quad_el_cache(const EL_INFO *el_info,
						      const QUAD *quad,
						      FLAGS fill)
{
  QUAD_EL_CACHE *qelc = (QUAD_EL_CACHE *)quad->metadata;
  FLAGS need;
  int iq, wall;

  if (qelc->current_el != el_info->el) {
    qelc->fill_flag = 0;
    qelc->current_el = el_info->el;
    INIT_ELEMENT(el_info, quad);
  }

  if (!(need = (qelc->fill_flag ^ fill) & fill)) {
    return qelc;
  }

  if (el_info->fill_flag & FILL_COORDS) {
    if (need & FILL_EL_QUAD_WORLD) {
      for (iq = 0; iq < quad->n_points; iq++) {
	coord_to_world(el_info, quad->lambda[iq], qelc->world[iq]);
      }
      qelc->fill_flag |= FILL_EL_QUAD_WORLD;
    }
  } else {
    PARAMETRIC *parametric = el_info->mesh->parametric;

    TEST_EXIT(parametric,
	      "FILL_COORDS not set in el_info->fill_flag and "
	      "not on a parametric mesh.\n");

    if (need & FILL_EL_QUAD_WORLD) {
      parametric->coord_to_world(el_info, quad, -1, NULL, qelc->world);
    }
    if (need &
	(FILL_EL_QUAD_GRD_WORLD|FILL_EL_QUAD_D2_WORLD|FILL_EL_QUAD_D3_WORLD)) {
      parametric->grd_world(el_info, quad, -1, NULL,
			    (need & FILL_EL_QUAD_GRD_WORLD)
			    ? qelc->param.grd_world : NULL,
			    (need & FILL_EL_QUAD_D2_WORLD)
			    ? qelc->param.D2_world : NULL,
			    (need & FILL_EL_QUAD_D3_WORLD)
			    ? qelc->param.D3_world : NULL);
      qelc->fill_flag |= need &
	(FILL_EL_QUAD_GRD_WORLD|FILL_EL_QUAD_D2_WORLD|FILL_EL_QUAD_D3_WORLD);
    }
    if (need & FILL_EL_QUAD_DLAMBDA) {
      parametric->grd_lambda(el_info, quad, -1, NULL,
			     qelc->param.Lambda,
			     qelc->param.DLambda,
			     qelc->param.det);
      qelc->fill_flag |=
	FILL_EL_QUAD_DLAMBDA|FILL_EL_QUAD_LAMBDA|FILL_EL_QUAD_DET;
    } else if (need & FILL_EL_QUAD_LAMBDA) {
      parametric->grd_lambda(el_info, quad, -1, NULL,
			     qelc->param.Lambda, NULL, qelc->param.det);
      qelc->fill_flag |= FILL_EL_QUAD_LAMBDA|FILL_EL_QUAD_DET;
    } else if (need & FILL_EL_QUAD_DET) {
      parametric->det(el_info, quad, -1, NULL, qelc->param.det);
      qelc->fill_flag |= FILL_EL_QUAD_DET;
    }

    if (need & (FILL_EL_QUAD_WALL_DET |
		FILL_EL_QUAD_WALL_NORMAL |
		FILL_EL_QUAD_GRD_NORMAL |
		FILL_EL_QUAD_D2_NORMAL)) {
      DEBUG_TEST_EXIT(quad->codim == 1,
		      "Wall normals make only sense for co-dim 1.\n");

      wall = quad->subsplx;

      if (need & FILL_EL_QUAD_D2_NORMAL) {
	parametric->wall_normal(el_info, wall, quad, -1, NULL,
				qelc->param.wall_normal,
				qelc->param.grd_normal,
				qelc->param.D2_normal,
				qelc->param.wall_det);
	qelc->fill_flag |=
	  (FILL_EL_QUAD_WALL_DET |
	   FILL_EL_QUAD_WALL_NORMAL |
	   FILL_EL_QUAD_GRD_NORMAL |
	   FILL_EL_QUAD_D2_NORMAL);
      } else if (need & FILL_EL_QUAD_GRD_NORMAL) {
	parametric->wall_normal(el_info, wall, quad, -1, NULL,
				qelc->param.wall_normal,
				qelc->param.grd_normal,
				NULL,
				qelc->param.wall_det);
	qelc->fill_flag |=
	  (FILL_EL_QUAD_WALL_DET |
	   FILL_EL_QUAD_WALL_NORMAL |
	   FILL_EL_QUAD_GRD_NORMAL);
      } else if (need & FILL_EL_QUAD_WALL_NORMAL) {
	parametric->wall_normal(el_info, wall, quad, -1, NULL,
				qelc->param.wall_normal, NULL, NULL,
				qelc->param.wall_det);
	qelc->fill_flag |= FILL_EL_QUAD_WALL_DET|FILL_EL_QUAD_WALL_NORMAL;
      } else {
	parametric->wall_normal(el_info, wall, quad, -1, NULL,
				NULL /* no normals */, NULL, NULL,
				qelc->param.wall_det);
	qelc->fill_flag |= FILL_EL_QUAD_WALL_DET;
      }
    }
  }

  return qelc;
}

/* Compute the value of some really vector-valued basis function. */
static inline const REAL *
phi_dow(REAL_D result,
	int i, const REAL_B lambda, const BAS_FCTS *thisptr)
{
  AXEY_DOW(PHI(thisptr, i, lambda), PHI_D(thisptr, i, lambda), result);
  return result;
}

/* Compute the barycentric gradient of some really vector-valued basis
 * function.
 */
static inline const REAL_B *
grd_phi_dow(REAL_DB result,
	    int i, const REAL_B lambda, const BAS_FCTS *thisptr)
{
  int n;
  const REAL *grd_phi = GRD_PHI(thisptr, i, lambda);
  const REAL *phi_d   = PHI_D(thisptr, i, lambda);

  for (n = 0; n < DIM_OF_WORLD; n++) {
    AXEY_BAR(DIM_MAX, phi_d[n], grd_phi, result[n]);
  }

  if (!thisptr->dir_pw_const) {
    REAL phi = PHI(thisptr, i, lambda);
    const REAL_B *grd_phi_d = GRD_PHI_D(thisptr, i, lambda);

    for (n = 0; n < DIM_OF_WORLD; n++) {
      AXPY_BAR(DIM_MAX, phi, grd_phi_d[n], result[n]);
    }
  }

  return (const REAL_B *)result;
}

/* Compute the barycentric second derivatives of some really
 * vector-valued basis functions.
 */

#endif /* _ALBERT_INLINES_H_ */