1 /////////////////////////////////////////////////////////////////////////////
2 //  einspline:  a library for creating and evaluating B-splines            //
3 //  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
4 //  Released under the BSD-3-clause license                                //
5 /////////////////////////////////////////////////////////////////////////////
6 
7 #include <cmath>
8 #include <algorithm>
9 #include "bspline_base.h"
10 #include "multi_bspline_structs.h"
11 #include "multi_bspline_eval_d.h"
12 
13 extern const double* restrict   Ad;
14 extern const double* restrict  dAd;
15 extern const double* restrict d2Ad;
16 extern const double* restrict d3Ad;
17 
18 /************************************************************/
19 /* 1D double-precision, real evaluation functions        */
20 /************************************************************/
21 void
eval_multi_UBspline_1d_d(const multi_UBspline_1d_d * spline,double x,double * restrict vals)22 eval_multi_UBspline_1d_d (const multi_UBspline_1d_d *spline,
23 			  double x,
24 			  double* restrict vals)
25 {
26   x -= spline->x_grid.start;
27   double ux = x*spline->x_grid.delta_inv;
28   double ipartx, tx;
29   tx = std::modf (ux, &ipartx);
30   // This protects from overflow reads of coefs when x goes out of [start, end)
31   // in simulation systems with an open boundary condition.
32   // This protection has no effect on simulation systems with PBC and anti-PBC
33   // condition because x has been reduced to [start, end) before the call
34   // The protection is correct for PERIODIC/ANTIPERIODIC BC splines
35   // but not sufficient for NATURAL BC splines (need grid.num-2).
36   // With this protection ix is pulled to the boundary, tx is not modified.
37   // This is not correct but no more relevant to QMCPACK since
38   // we moved to C++ versions of this function already.
39   int ix = std::min(std::max(0,(int) ipartx),spline->x_grid.num-1);
40 
41   double tpx[4], a[4];
42   tpx[0] = tx*tx*tx;  tpx[1] = tx*tx;  tpx[2] = tx;  tpx[3] = 1.0;
43 
44   a[0]  = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
45   a[1]  = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
46   a[2]  = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
47   a[3]  = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
48 
49   intptr_t xs = spline->x_stride;
50 
51   double* restrict coefs0 = spline->coefs + ((ix  )*xs);
52   double* restrict coefs1 = spline->coefs + ((ix+1)*xs);
53   double* restrict coefs2 = spline->coefs + ((ix+2)*xs);
54   double* restrict coefs3 = spline->coefs + ((ix+3)*xs);
55 
56   for (int n=0; n<spline->num_splines; n++)
57     vals[n] = a[0] * coefs0[n] + a[1] * coefs1[n] + a[2] * coefs2[n] + a[3] * coefs3[n];
58 }
59 
60 
61 
62 void
eval_multi_UBspline_1d_d_vg(const multi_UBspline_1d_d * spline,double x,double * restrict vals,double * restrict grads)63 eval_multi_UBspline_1d_d_vg (const multi_UBspline_1d_d *spline,
64 			     double x,
65 			     double* restrict vals,
66 			     double* restrict grads)
67 {
68   x -= spline->x_grid.start;
69   double ux = x*spline->x_grid.delta_inv;
70   double ipartx, tx;
71   tx = std::modf (ux, &ipartx);  int ix = std::min(std::max(0,(int) ipartx),spline->x_grid.num-1);
72 
73   double tpx[4], a[4], da[4];
74   tpx[0] = tx*tx*tx;  tpx[1] = tx*tx;  tpx[2] = tx;  tpx[3] = 1.0;
75 
76   a[0]  = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
77   a[1]  = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
78   a[2]  = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
79   a[3]  = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
80   da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]);
81   da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]);
82   da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]);
83   da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]);
84 
85   intptr_t xs = spline->x_stride;
86 
87   for (int n=0; n<spline->num_splines; n++) {
88     vals[n]  = 0.0;
89     grads[n] = 0.0;
90   }
91 
92   for (int i=0; i<4; i++) {
93     double* restrict coefs = spline->coefs + ((ix+i)*xs);
94     for (int n=0; n<spline->num_splines; n++) {
95       vals[n]  +=   a[i] * coefs[n];
96       grads[n] +=  da[i] * coefs[n];
97     }
98   }
99 
100   double dxInv = spline->x_grid.delta_inv;
101   for (int n=0; n<spline->num_splines; n++)
102     grads[n] *= dxInv;
103 }
104 
105 
106 void
eval_multi_UBspline_1d_d_vgl(const multi_UBspline_1d_d * spline,double x,double * restrict vals,double * restrict grads,double * restrict lapl)107 eval_multi_UBspline_1d_d_vgl (const multi_UBspline_1d_d *spline,
108 			      double x,
109 			      double* restrict vals,
110 			      double* restrict grads,
111 			      double* restrict lapl)
112 {
113   x -= spline->x_grid.start;
114   double ux = x*spline->x_grid.delta_inv;
115   double ipartx, tx;
116   tx = std::modf (ux, &ipartx);  int ix = std::min(std::max(0,(int) ipartx),spline->x_grid.num-1);
117 
118   double tpx[4], a[4], da[4], d2a[4];
119   tpx[0] = tx*tx*tx;  tpx[1] = tx*tx;  tpx[2] = tx;  tpx[3] = 1.0;
120 
121   a[0]  = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
122   a[1]  = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
123   a[2]  = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
124   a[3]  = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
125   da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]);
126   da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]);
127   da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]);
128   da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]);
129   d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]);
130   d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]);
131   d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]);
132   d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]);
133 
134   intptr_t xs = spline->x_stride;
135   double dxInv = spline->x_grid.delta_inv;
136 
137   double* restrict coefs0 = spline->coefs + ((ix  )*xs);
138   double* restrict coefs1 = spline->coefs + ((ix+1)*xs);
139   double* restrict coefs2 = spline->coefs + ((ix+2)*xs);
140   double* restrict coefs3 = spline->coefs + ((ix+3)*xs);
141 
142   for (int n=0; n<spline->num_splines; n++)
143   {
144     vals[n]  = a[0] * coefs0[n] + a[1] * coefs1[n] + a[2] * coefs2[n] + a[3] * coefs3[n];
145     grads[n] = (da[0] * coefs0[n] + da[1] * coefs1[n] + da[2] * coefs2[n] + da[3] * coefs3[n])*dxInv;
146     lapl[n]  = (d2a[0] * coefs0[n] + d2a[1] * coefs1[n] + d2a[2] * coefs2[n] + d2a[3] * coefs3[n])*dxInv*dxInv;
147   }
148 }
149 
150 
151 void
eval_multi_UBspline_1d_d_vgh(const multi_UBspline_1d_d * spline,double x,double * restrict vals,double * restrict grads,double * restrict hess)152 eval_multi_UBspline_1d_d_vgh (const multi_UBspline_1d_d *spline,
153 			      double x,
154 			      double* restrict vals,
155 			      double* restrict grads,
156 			      double* restrict hess)
157 {
158   eval_multi_UBspline_1d_d_vgl (spline, x, vals, grads, hess);
159 }
160 
161 
162 
163 /************************************************************/
164 /* 2D double-precision, real evaluation functions        */
165 /************************************************************/
166 void
eval_multi_UBspline_2d_d(const multi_UBspline_2d_d * spline,double x,double y,double * restrict vals)167 eval_multi_UBspline_2d_d (const multi_UBspline_2d_d *spline,
168 			  double x, double y,
169 			  double* restrict vals)
170 {
171   x -= spline->x_grid.start;
172   y -= spline->y_grid.start;
173   double ux = x*spline->x_grid.delta_inv;
174   double uy = y*spline->y_grid.delta_inv;
175   double ipartx, iparty, tx, ty;
176   tx = std::modf (ux, &ipartx);  int ix = (int) ipartx;
177   ty = std::modf (uy, &iparty);  int iy = (int) iparty;
178 
179   double tpx[4], tpy[4], a[4], b[4];
180   tpx[0] = tx*tx*tx;  tpx[1] = tx*tx;  tpx[2] = tx;  tpx[3] = 1.0;
181   tpy[0] = ty*ty*ty;  tpy[1] = ty*ty;  tpy[2] = ty;  tpy[3] = 1.0;
182 
183   a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
184   a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
185   a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
186   a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
187 
188   b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]);
189   b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]);
190   b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]);
191   b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]);
192 
193   intptr_t xs = spline->x_stride;
194   intptr_t ys = spline->y_stride;
195 
196   for (int n=0; n<spline->num_splines; n++)
197     vals[n] = 0.0;
198 
199   for (int i=0; i<4; i++)
200     for (int j=0; j<4; j++) {
201       double prefactor = a[i]*b[j];
202       double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys);
203       for (int n=0; n<spline->num_splines; n++)
204 	vals[n] += prefactor*coefs[n];
205     }
206 }
207 
208 
209 void
eval_multi_UBspline_2d_d_vg(const multi_UBspline_2d_d * spline,double x,double y,double * restrict vals,double * restrict grads)210 eval_multi_UBspline_2d_d_vg (const multi_UBspline_2d_d *spline,
211 			     double x, double y,
212 			     double* restrict vals,
213 			     double* restrict grads)
214 {
215   x -= spline->x_grid.start;
216   y -= spline->y_grid.start;
217   double ux = x*spline->x_grid.delta_inv;
218   double uy = y*spline->y_grid.delta_inv;
219   double ipartx, iparty, tx, ty;
220   tx = std::modf (ux, &ipartx);  int ix = (int) ipartx;
221   ty = std::modf (uy, &iparty);  int iy = (int) iparty;
222 
223   double tpx[4], tpy[4], a[4], b[4], da[4], db[4];
224   tpx[0] = tx*tx*tx;  tpx[1] = tx*tx;  tpx[2] = tx;  tpx[3] = 1.0;
225   tpy[0] = ty*ty*ty;  tpy[1] = ty*ty;  tpy[2] = ty;  tpy[3] = 1.0;
226 
227   a[0]  = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
228   a[1]  = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
229   a[2]  = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
230   a[3]  = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
231   da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]);
232   da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]);
233   da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]);
234   da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]);
235 
236   b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]);
237   b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]);
238   b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]);
239   b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]);
240   db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]);
241   db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]);
242   db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]);
243   db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]);
244 
245   intptr_t xs = spline->x_stride;
246   intptr_t ys = spline->y_stride;
247 
248   for (int n=0; n<spline->num_splines; n++) {
249     vals[n] = 0.0;
250     grads[2*n+0] = grads[2*n+1] = grads[2*n+2] = 0.0;
251   }
252 
253   for (int i=0; i<4; i++)
254     for (int j=0; j<4; j++) {
255       double ab = a[i]*b[j];
256       double dab[2];
257       dab[0] = da[i]* b[j];
258       dab[1] =  a[i]*db[j];
259 
260       double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys);
261       for (int n=0; n<spline->num_splines; n++) {
262 	vals [n]     +=   ab   *coefs[n];
263 	grads[2*n+0] +=  dab[0]*coefs[n];
264 	grads[2*n+1] +=  dab[1]*coefs[n];
265       }
266     }
267 
268   double dxInv = spline->x_grid.delta_inv;
269   double dyInv = spline->y_grid.delta_inv;
270   for (int n=0; n<spline->num_splines; n++) {
271     grads[2*n+0] *= dxInv;
272     grads[2*n+1] *= dyInv;
273   }
274 }
275 
276 void
eval_multi_UBspline_2d_d_vgl(const multi_UBspline_2d_d * spline,double x,double y,double * restrict vals,double * restrict grads,double * restrict lapl)277 eval_multi_UBspline_2d_d_vgl (const multi_UBspline_2d_d *spline,
278 			      double x, double y,
279 			      double* restrict vals,
280 			      double* restrict grads,
281 			      double* restrict lapl)
282 {
283   x -= spline->x_grid.start;
284   y -= spline->y_grid.start;
285   double ux = x*spline->x_grid.delta_inv;
286   double uy = y*spline->y_grid.delta_inv;
287   double ipartx, iparty, tx, ty;
288   tx = std::modf (ux, &ipartx);  int ix = (int) ipartx;
289   ty = std::modf (uy, &iparty);  int iy = (int) iparty;
290 
291   double tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4];
292   tpx[0] = tx*tx*tx;  tpx[1] = tx*tx;  tpx[2] = tx;  tpx[3] = 1.0;
293   tpy[0] = ty*ty*ty;  tpy[1] = ty*ty;  tpy[2] = ty;  tpy[3] = 1.0;
294 
295   a[0]  = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
296   a[1]  = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
297   a[2]  = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
298   a[3]  = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
299   da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]);
300   da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]);
301   da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]);
302   da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]);
303   d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]);
304   d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]);
305   d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]);
306   d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]);
307 
308   b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]);
309   b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]);
310   b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]);
311   b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]);
312   db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]);
313   db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]);
314   db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]);
315   db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]);
316   d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]);
317   d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]);
318   d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]);
319   d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]);
320 
321   intptr_t xs = spline->x_stride;
322   intptr_t ys = spline->y_stride;
323 
324   double* lapl2 = new double[2*spline->num_splines];
325   for (int n=0; n<spline->num_splines; n++) {
326     vals[n] = 0.0;
327     grads[2*n+0] = grads[2*n+1] = 0.0;
328     lapl2[2*n+0] = lapl2[2*n+1] = 0.0;
329   }
330 
331   for (int i=0; i<4; i++)
332     for (int j=0; j<4; j++) {
333       double ab = a[i]*b[j];
334       double dab[2], d2ab[2];
335       dab[0] = da[i]* b[j];
336       dab[1] =  a[i]*db[j];
337       d2ab[0] = d2a[i]*  b[j];
338       d2ab[1] =   a[i]*d2b[j];
339 
340       double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys);
341       for (int n=0; n<spline->num_splines; n++) {
342 	vals[n]      +=   ab   *coefs[n];
343 	grads[2*n+0] +=  dab[0]*coefs[n];
344 	grads[2*n+1] +=  dab[1]*coefs[n];
345 	lapl2[2*n+0] += d2ab[0]*coefs[n];
346 	lapl2[2*n+1] += d2ab[1]*coefs[n];
347       }
348     }
349 
350   double dxInv = spline->x_grid.delta_inv;
351   double dyInv = spline->y_grid.delta_inv;
352   for (int n=0; n<spline->num_splines; n++) {
353     grads[2*n+0] *= dxInv;
354     grads[2*n+1] *= dyInv;
355     lapl2[2*n+0] *= dxInv*dxInv;
356     lapl2[2*n+1] *= dyInv*dyInv;
357     lapl[n] = lapl2[2*n+0] + lapl2[2*n+1];
358   }
359   delete[] lapl2;
360 }
361 
362 
363 
364 
365 void
eval_multi_UBspline_2d_d_vgh(const multi_UBspline_2d_d * spline,double x,double y,double * restrict vals,double * restrict grads,double * restrict hess)366 eval_multi_UBspline_2d_d_vgh (const multi_UBspline_2d_d *spline,
367 			      double x, double y,
368 			      double* restrict vals,
369 			      double* restrict grads,
370 			      double* restrict hess)
371 {
372   x -= spline->x_grid.start;
373   y -= spline->y_grid.start;
374   double ux = x*spline->x_grid.delta_inv;
375   double uy = y*spline->y_grid.delta_inv;
376   double ipartx, iparty, tx, ty;
377   tx = std::modf (ux, &ipartx);  int ix = (int) ipartx;
378   ty = std::modf (uy, &iparty);  int iy = (int) iparty;
379 
380   double tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4];
381   tpx[0] = tx*tx*tx;  tpx[1] = tx*tx;  tpx[2] = tx;  tpx[3] = 1.0;
382   tpy[0] = ty*ty*ty;  tpy[1] = ty*ty;  tpy[2] = ty;  tpy[3] = 1.0;
383 
384   a[0]  = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
385   a[1]  = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
386   a[2]  = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
387   a[3]  = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
388   da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]);
389   da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]);
390   da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]);
391   da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]);
392   d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]);
393   d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]);
394   d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]);
395   d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]);
396 
397   b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]);
398   b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]);
399   b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]);
400   b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]);
401   db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]);
402   db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]);
403   db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]);
404   db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]);
405   d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]);
406   d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]);
407   d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]);
408   d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]);
409 
410   intptr_t xs = spline->x_stride;
411   intptr_t ys = spline->y_stride;
412 
413   for (int n=0; n<spline->num_splines; n++) {
414     vals[n] = 0.0;
415     grads[2*n+0] = grads[2*n+1] = 0.0;
416     for (int i=0; i<4; i++)
417       hess[4*n+i] = 0.0;
418   }
419 
420   for (int i=0; i<4; i++)
421     for (int j=0; j<4; j++){
422       double ab = a[i]*b[j];
423       double dab[2], d2ab[3];
424       dab[0] = da[i]* b[j];
425       dab[1] =  a[i]*db[j];
426       d2ab[0] = d2a[i] *   b[j];
427       d2ab[1] =  da[i] *  db[j];
428       d2ab[2] =   a[i] * d2b[j];
429 
430       double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys);
431       for (int n=0; n<spline->num_splines; n++) {
432 	vals[n]      +=   ab   *coefs[n];
433 	grads[2*n+0] +=  dab[0]*coefs[n];
434 	grads[2*n+1] +=  dab[1]*coefs[n];
435 	hess [4*n+0] += d2ab[0]*coefs[n];
436 	hess [4*n+1] += d2ab[1]*coefs[n];
437 	hess [4*n+3] += d2ab[2]*coefs[n];
438       }
439     }
440 
441   double dxInv = spline->x_grid.delta_inv;
442   double dyInv = spline->y_grid.delta_inv;
443   for (int n=0; n<spline->num_splines; n++) {
444     grads[2*n+0] *= dxInv;
445     grads[2*n+1] *= dyInv;
446     hess[4*n+0] *= dxInv*dxInv;
447     hess[4*n+1] *= dxInv*dyInv;
448     hess[4*n+3] *= dyInv*dyInv;
449     // Copy hessian elements into lower half of 3x3 matrix
450     hess[4*n+2] = hess[4*n+1];
451   }
452 }
453 
454 
455 
456 
457 /************************************************************/
458 /* 3D double-precision, real evaluation functions           */
459 /************************************************************/
460 void
eval_multi_UBspline_3d_d(const multi_UBspline_3d_d * spline,double x,double y,double z,double * restrict vals)461 eval_multi_UBspline_3d_d (const multi_UBspline_3d_d *spline,
462 			  double x, double y, double z,
463 			  double* restrict vals)
464 {
465   // algorithm modified by Ye Luo, Mar. 12th 2015
466   x -= spline->x_grid.start;
467   y -= spline->y_grid.start;
468   z -= spline->z_grid.start;
469   double ux = x*spline->x_grid.delta_inv;
470   double uy = y*spline->y_grid.delta_inv;
471   double uz = z*spline->z_grid.delta_inv;
472   double ipartx, iparty, ipartz, tx, ty, tz;
473   tx = std::modf (ux, &ipartx);  int ix = std::min(std::max(0,(int) ipartx),spline->x_grid.num-1);
474   ty = std::modf (uy, &iparty);  int iy = std::min(std::max(0,(int) iparty),spline->y_grid.num-1);
475   tz = std::modf (uz, &ipartz);  int iz = std::min(std::max(0,(int) ipartz),spline->z_grid.num-1);
476 
477   double  a[4], b[4], c[4];
478 
479   a[0] = ( ( Ad[0]  * tx + Ad[1] ) * tx + Ad[2] ) * tx + Ad[3];
480   a[1] = ( ( Ad[4]  * tx + Ad[5] ) * tx + Ad[6] ) * tx + Ad[7];
481   a[2] = ( ( Ad[8]  * tx + Ad[9] ) * tx + Ad[10] ) * tx + Ad[11];
482   a[3] = ( ( Ad[12] * tx + Ad[13] ) * tx + Ad[14] ) * tx + Ad[15];
483 
484   b[0] = ( ( Ad[0]  * ty + Ad[1] ) * ty + Ad[2] ) * ty + Ad[3];
485   b[1] = ( ( Ad[4]  * ty + Ad[5] ) * ty + Ad[6] ) * ty + Ad[7];
486   b[2] = ( ( Ad[8]  * ty + Ad[9] ) * ty + Ad[10] ) * ty + Ad[11];
487   b[3] = ( ( Ad[12] * ty + Ad[13] ) * ty + Ad[14] ) * ty + Ad[15];
488 
489   c[0] = ( ( Ad[0]  * tz + Ad[1] ) * tz + Ad[2] ) * tz + Ad[3];
490   c[1] = ( ( Ad[4]  * tz + Ad[5] ) * tz + Ad[6] ) * tz + Ad[7];
491   c[2] = ( ( Ad[8]  * tz + Ad[9] ) * tz + Ad[10] ) * tz + Ad[11];
492   c[3] = ( ( Ad[12] * tz + Ad[13] ) * tz + Ad[14] ) * tz + Ad[15];
493 
494   intptr_t xs = spline->x_stride;
495   intptr_t ys = spline->y_stride;
496   intptr_t zs = spline->z_stride;
497 
498   for (int n=0; n<spline->num_splines; n++)
499     vals[n] = 0.0;
500 
501   for (int i=0; i<4; i++)
502     for (int j=0; j<4; j++){
503       double pre00 =  a[i]*b[j];
504       double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + iz*zs);
505       for(int n=0; n<spline->num_splines; n++)
506           vals[n] += pre00*(c[0]*coefs[n] + c[1]*coefs[n+zs] + c[2]*coefs[n+2*zs] + c[3]*coefs[n+3*zs]);
507     }
508 }
509 
510 void
eval_multi_UBspline_3d_d_vgh(const multi_UBspline_3d_d * spline,double x,double y,double z,double * restrict vals,double * restrict grads,double * restrict hess)511 eval_multi_UBspline_3d_d_vgh (const multi_UBspline_3d_d *spline,
512 			      double x, double y, double z,
513 			      double* restrict vals,
514 			      double* restrict grads,
515 			      double* restrict hess)
516 {
517   // algorithm modified by Ye Luo, Mar. 12th 2015
518   x -= spline->x_grid.start;
519   y -= spline->y_grid.start;
520   z -= spline->z_grid.start;
521   double ux = x*spline->x_grid.delta_inv;
522   double uy = y*spline->y_grid.delta_inv;
523   double uz = z*spline->z_grid.delta_inv;
524   double ipartx, iparty, ipartz, tx, ty, tz;
525 
526   tx = std::modf (ux, &ipartx);  int ix = std::min(std::max(0,(int) ipartx),spline->x_grid.num-1);
527   ty = std::modf (uy, &iparty);  int iy = std::min(std::max(0,(int) iparty),spline->y_grid.num-1);
528   tz = std::modf (uz, &ipartz);  int iz = std::min(std::max(0,(int) ipartz),spline->z_grid.num-1);
529 
530   double a[4], b[4], c[4],da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4];
531 
532   a[0] = ( ( Ad[0]  * tx + Ad[1] ) * tx + Ad[2] ) * tx + Ad[3];
533   a[1] = ( ( Ad[4]  * tx + Ad[5] ) * tx + Ad[6] ) * tx + Ad[7];
534   a[2] = ( ( Ad[8]  * tx + Ad[9] ) * tx + Ad[10] ) * tx + Ad[11];
535   a[3] = ( ( Ad[12] * tx + Ad[13] ) * tx + Ad[14] ) * tx + Ad[15];
536   da[0] = ( ( dAd[0]  * tx + dAd[1] ) * tx + dAd[2] ) * tx + dAd[3];
537   da[1] = ( ( dAd[4]  * tx + dAd[5] ) * tx + dAd[6] ) * tx + dAd[7];
538   da[2] = ( ( dAd[8]  * tx + dAd[9] ) * tx + dAd[10] ) * tx + dAd[11];
539   da[3] = ( ( dAd[12] * tx + dAd[13] ) * tx + dAd[14] ) * tx + dAd[15];
540   d2a[0] = ( ( d2Ad[0]  * tx + d2Ad[1] ) * tx + d2Ad[2] ) * tx + d2Ad[3];
541   d2a[1] = ( ( d2Ad[4]  * tx + d2Ad[5] ) * tx + d2Ad[6] ) * tx + d2Ad[7];
542   d2a[2] = ( ( d2Ad[8]  * tx + d2Ad[9] ) * tx + d2Ad[10] ) * tx + d2Ad[11];
543   d2a[3] = ( ( d2Ad[12] * tx + d2Ad[13] ) * tx + d2Ad[14] ) * tx + d2Ad[15];
544 
545   b[0] = ( ( Ad[0]  * ty + Ad[1] ) * ty + Ad[2] ) * ty + Ad[3];
546   b[1] = ( ( Ad[4]  * ty + Ad[5] ) * ty + Ad[6] ) * ty + Ad[7];
547   b[2] = ( ( Ad[8]  * ty + Ad[9] ) * ty + Ad[10] ) * ty + Ad[11];
548   b[3] = ( ( Ad[12] * ty + Ad[13] ) * ty + Ad[14] ) * ty + Ad[15];
549   db[0] = ( ( dAd[0]  * ty + dAd[1] ) * ty + dAd[2] ) * ty + dAd[3];
550   db[1] = ( ( dAd[4]  * ty + dAd[5] ) * ty + dAd[6] ) * ty + dAd[7];
551   db[2] = ( ( dAd[8]  * ty + dAd[9] ) * ty + dAd[10] ) * ty + dAd[11];
552   db[3] = ( ( dAd[12] * ty + dAd[13] ) * ty + dAd[14] ) * ty + dAd[15];
553   d2b[0] = ( ( d2Ad[0]  * ty + d2Ad[1] ) * ty + d2Ad[2] ) * ty + d2Ad[3];
554   d2b[1] = ( ( d2Ad[4]  * ty + d2Ad[5] ) * ty + d2Ad[6] ) * ty + d2Ad[7];
555   d2b[2] = ( ( d2Ad[8]  * ty + d2Ad[9] ) * ty + d2Ad[10] ) * ty + d2Ad[11];
556   d2b[3] = ( ( d2Ad[12] * ty + d2Ad[13] ) * ty + d2Ad[14] ) * ty + d2Ad[15];
557 
558   c[0] = ( ( Ad[0]  * tz + Ad[1] ) * tz + Ad[2] ) * tz + Ad[3];
559   c[1] = ( ( Ad[4]  * tz + Ad[5] ) * tz + Ad[6] ) * tz + Ad[7];
560   c[2] = ( ( Ad[8]  * tz + Ad[9] ) * tz + Ad[10] ) * tz + Ad[11];
561   c[3] = ( ( Ad[12] * tz + Ad[13] ) * tz + Ad[14] ) * tz + Ad[15];
562   dc[0] = ( ( dAd[0]  * tz + dAd[1] ) * tz + dAd[2] ) * tz + dAd[3];
563   dc[1] = ( ( dAd[4]  * tz + dAd[5] ) * tz + dAd[6] ) * tz + dAd[7];
564   dc[2] = ( ( dAd[8]  * tz + dAd[9] ) * tz + dAd[10] ) * tz + dAd[11];
565   dc[3] = ( ( dAd[12] * tz + dAd[13] ) * tz + dAd[14] ) * tz + dAd[15];
566   d2c[0] = ( ( d2Ad[0]  * tz + d2Ad[1] ) * tz + d2Ad[2] ) * tz + d2Ad[3];
567   d2c[1] = ( ( d2Ad[4]  * tz + d2Ad[5] ) * tz + d2Ad[6] ) * tz + d2Ad[7];
568   d2c[2] = ( ( d2Ad[8]  * tz + d2Ad[9] ) * tz + d2Ad[10] ) * tz + d2Ad[11];
569   d2c[3] = ( ( d2Ad[12] * tz + d2Ad[13] ) * tz + d2Ad[14] ) * tz + d2Ad[15];
570 
571   intptr_t xs = spline->x_stride;
572   intptr_t ys = spline->y_stride;
573   intptr_t zs = spline->z_stride;
574 
575   for (int n=0; n<spline->num_splines; n++) {
576     vals[n] = 0.0;
577     grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0;
578     for (int i=0; i<9; i++)
579       hess[9*n+i] = 0.0;
580   }
581 
582   for (int i=0; i<4; i++)
583     for (int j=0; j<4; j++){
584       double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + iz*zs);
585 
586       double pre20 = d2a[i]*  b[j];
587       double pre10 =  da[i]*  b[j];
588       double pre00 =   a[i]*  b[j];
589       double pre11 =  da[i]* db[j];
590       double pre01 =   a[i]* db[j];
591       double pre02 =   a[i]*d2b[j];
592 
593       for (int n=0; n<spline->num_splines; n++) {
594         double sum0 =   c[0] * coefs[n] +   c[1] * coefs[n+zs] +   c[2] * coefs[n+2*zs] +   c[3] * coefs[n+3*zs];
595         double sum1 =  dc[0] * coefs[n] +  dc[1] * coefs[n+zs] +  dc[2] * coefs[n+2*zs] +  dc[3] * coefs[n+3*zs];
596         double sum2 = d2c[0] * coefs[n] + d2c[1] * coefs[n+zs] + d2c[2] * coefs[n+2*zs] + d2c[3] * coefs[n+3*zs];
597         hess [9*n  ] += pre20 * sum0;
598         hess [9*n+1] += pre11 * sum0;
599         hess [9*n+2] += pre10 * sum1;
600         hess [9*n+4] += pre02 * sum0;
601         hess [9*n+5] += pre01 * sum1;
602         hess [9*n+8] += pre00 * sum2;
603         grads[3*n  ] += pre10 * sum0;
604         grads[3*n+1] += pre01 * sum0;
605         grads[3*n+2] += pre00 * sum1;
606         vals[n]      += pre00 * sum0;
607       }
608     }
609 
610   double dxInv = spline->x_grid.delta_inv;
611   double dyInv = spline->y_grid.delta_inv;
612   double dzInv = spline->z_grid.delta_inv;
613 
614   for (int n=0; n<spline->num_splines; n++) {
615 
616 
617     grads[3*n+0] *= dxInv;
618     grads[3*n+1] *= dyInv;
619     grads[3*n+2] *= dzInv;
620     hess [9*n+0] *= dxInv*dxInv;
621     hess [9*n+4] *= dyInv*dyInv;
622     hess [9*n+8] *= dzInv*dzInv;
623     hess [9*n+1] *= dxInv*dyInv;
624     hess [9*n+2] *= dxInv*dzInv;
625     hess [9*n+5] *= dyInv*dzInv;
626     // Copy hessian elements into lower half of 3x3 matrix
627     hess [9*n+3] = hess[9*n+1];
628     hess [9*n+6] = hess[9*n+2];
629     hess [9*n+7] = hess[9*n+5];
630   }
631 }
632 
633 
634 void
eval_multi_UBspline_3d_d_vg(const multi_UBspline_3d_d * spline,double x,double y,double z,double * restrict vals,double * restrict grads)635 eval_multi_UBspline_3d_d_vg (const multi_UBspline_3d_d *spline,
636 			     double x, double y, double z,
637 			     double* restrict vals,
638 			     double* restrict grads)
639 {
640   x -= spline->x_grid.start;
641   y -= spline->y_grid.start;
642   z -= spline->z_grid.start;
643   double ux = x*spline->x_grid.delta_inv;
644   double uy = y*spline->y_grid.delta_inv;
645   double uz = z*spline->z_grid.delta_inv;
646   double ipartx, iparty, ipartz, tx, ty, tz;
647   tx = std::modf (ux, &ipartx);  int ix = (int) ipartx;
648   ty = std::modf (uy, &iparty);  int iy = (int) iparty;
649   tz = std::modf (uz, &ipartz);  int iz = (int) ipartz;
650 
651   double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4],
652     da[4], db[4], dc[4];
653   tpx[0] = tx*tx*tx;  tpx[1] = tx*tx;  tpx[2] = tx;  tpx[3] = 1.0;
654   tpy[0] = ty*ty*ty;  tpy[1] = ty*ty;  tpy[2] = ty;  tpy[3] = 1.0;
655   tpz[0] = tz*tz*tz;  tpz[1] = tz*tz;  tpz[2] = tz;  tpz[3] = 1.0;
656 
657   a[0]  = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
658   a[1]  = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
659   a[2]  = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
660   a[3]  = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
661   da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]);
662   da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]);
663   da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]);
664   da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]);
665 
666   b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]);
667   b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]);
668   b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]);
669   b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]);
670   db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]);
671   db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]);
672   db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]);
673   db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]);
674 
675   c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]);
676   c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]);
677   c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]);
678   c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]);
679   dc[0] = (dAd[ 0]*tpz[0] + dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]);
680   dc[1] = (dAd[ 4]*tpz[0] + dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]);
681   dc[2] = (dAd[ 8]*tpz[0] + dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]);
682   dc[3] = (dAd[12]*tpz[0] + dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]);
683 
684   intptr_t xs = spline->x_stride;
685   intptr_t ys = spline->y_stride;
686   intptr_t zs = spline->z_stride;
687 
688   for (int n=0; n<spline->num_splines; n++) {
689     vals[n] = 0.0;
690     grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0;
691   }
692 
693   for (int i=0; i<4; i++)
694     for (int j=0; j<4; j++)
695       for (int k=0; k<4; k++) {
696 	double abc = a[i]*b[j]*c[k];
697 	double dabc[3];
698 	dabc[0] = da[i]* b[j]* c[k];
699 	dabc[1] =  a[i]*db[j]* c[k];
700 	dabc[2] =  a[i]* b[j]*dc[k];
701 
702 	double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs);
703 	for (int n=0; n<spline->num_splines; n++) {
704 	  vals[n]      +=   abc   *coefs[n];
705 	  grads[3*n+0] +=  dabc[0]*coefs[n];
706 	  grads[3*n+1] +=  dabc[1]*coefs[n];
707 	  grads[3*n+2] +=  dabc[2]*coefs[n];
708 	}
709       }
710 
711   double dxInv = spline->x_grid.delta_inv;
712   double dyInv = spline->y_grid.delta_inv;
713   double dzInv = spline->z_grid.delta_inv;
714   for (int n=0; n<spline->num_splines; n++) {
715     grads[3*n+0] *= dxInv;
716     grads[3*n+1] *= dyInv;
717     grads[3*n+2] *= dzInv;
718   }
719 }
720 
721 
722 
723 void
eval_multi_UBspline_3d_d_vgl(const multi_UBspline_3d_d * spline,double x,double y,double z,double * restrict vals,double * restrict grads,double * restrict lapl)724 eval_multi_UBspline_3d_d_vgl (const multi_UBspline_3d_d *spline,
725 			      double x, double y, double z,
726 			      double* restrict vals,
727 			      double* restrict grads,
728 			      double* restrict lapl)
729 {
730   x -= spline->x_grid.start;
731   y -= spline->y_grid.start;
732   z -= spline->z_grid.start;
733   double ux = x*spline->x_grid.delta_inv;
734   double uy = y*spline->y_grid.delta_inv;
735   double uz = z*spline->z_grid.delta_inv;
736   double ipartx, iparty, ipartz, tx, ty, tz;
737   tx = std::modf (ux, &ipartx);  int ix = std::min(std::max(0,(int) ipartx),spline->x_grid.num-1);
738   ty = std::modf (uy, &iparty);  int iy = std::min(std::max(0,(int) iparty),spline->y_grid.num-1);
739   tz = std::modf (uz, &ipartz);  int iz = std::min(std::max(0,(int) ipartz),spline->z_grid.num-1);
740 
741   double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4],
742     da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4];
743   tpx[0] = tx*tx*tx;  tpx[1] = tx*tx;  tpx[2] = tx;  tpx[3] = 1.0;
744   tpy[0] = ty*ty*ty;  tpy[1] = ty*ty;  tpy[2] = ty;  tpy[3] = 1.0;
745   tpz[0] = tz*tz*tz;  tpz[1] = tz*tz;  tpz[2] = tz;  tpz[3] = 1.0;
746 
747   a[0]  = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
748   a[1]  = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
749   a[2]  = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
750   a[3]  = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
751   da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]);
752   da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]);
753   da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]);
754   da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]);
755   d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]);
756   d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]);
757   d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]);
758   d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]);
759 
760   b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]);
761   b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]);
762   b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]);
763   b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]);
764   db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]);
765   db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]);
766   db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]);
767   db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]);
768   d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]);
769   d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]);
770   d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]);
771   d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]);
772 
773   c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]);
774   c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]);
775   c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]);
776   c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]);
777   dc[0] = (dAd[ 0]*tpz[0] + dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]);
778   dc[1] = (dAd[ 4]*tpz[0] + dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]);
779   dc[2] = (dAd[ 8]*tpz[0] + dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]);
780   dc[3] = (dAd[12]*tpz[0] + dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]);
781   d2c[0] = (d2Ad[ 0]*tpz[0] + d2Ad[ 1]*tpz[1] + d2Ad[ 2]*tpz[2] + d2Ad[ 3]*tpz[3]);
782   d2c[1] = (d2Ad[ 4]*tpz[0] + d2Ad[ 5]*tpz[1] + d2Ad[ 6]*tpz[2] + d2Ad[ 7]*tpz[3]);
783   d2c[2] = (d2Ad[ 8]*tpz[0] + d2Ad[ 9]*tpz[1] + d2Ad[10]*tpz[2] + d2Ad[11]*tpz[3]);
784   d2c[3] = (d2Ad[12]*tpz[0] + d2Ad[13]*tpz[1] + d2Ad[14]*tpz[2] + d2Ad[15]*tpz[3]);
785 
786   intptr_t xs = spline->x_stride;
787   intptr_t ys = spline->y_stride;
788   intptr_t zs = spline->z_stride;
789 
790   double* lapl3 = new double[3*spline->num_splines];
791   for (int n=0; n<spline->num_splines; n++) {
792     vals[n] = 0.0;
793     grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0;
794     lapl3[3*n+0] = lapl3[3*n+1] = lapl3[3*n+2] = 0.0;
795   }
796 
797   for (int i=0; i<4; i++)
798     for (int j=0; j<4; j++)
799       for (int k=0; k<4; k++) {
800 	double abc = a[i]*b[j]*c[k];
801 	double dabc[3], d2abc[3];
802 	dabc[0] = da[i]* b[j]* c[k];
803 	dabc[1] =  a[i]*db[j]* c[k];
804 	dabc[2] =  a[i]* b[j]*dc[k];
805 	d2abc[0] = d2a[i]*  b[j]*  c[k];
806 	d2abc[1] =   a[i]*d2b[j]*  c[k];
807 	d2abc[2] =   a[i]*  b[j]*d2c[k];
808 
809 	double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs);
810 	for (int n=0; n<spline->num_splines; n++) {
811 	  vals[n]      +=   abc   *coefs[n];
812 	  grads[3*n+0] +=  dabc[0]*coefs[n];
813 	  grads[3*n+1] +=  dabc[1]*coefs[n];
814 	  grads[3*n+2] +=  dabc[2]*coefs[n];
815 	  lapl3[3*n+0] += d2abc[0]*coefs[n];
816 	  lapl3[3*n+1] += d2abc[1]*coefs[n];
817 	  lapl3[3*n+2] += d2abc[2]*coefs[n];
818 	}
819       }
820 
821   double dxInv = spline->x_grid.delta_inv;
822   double dyInv = spline->y_grid.delta_inv;
823   double dzInv = spline->z_grid.delta_inv;
824   for (int n=0; n<spline->num_splines; n++) {
825     grads[3*n+0] *= dxInv;
826     grads[3*n+1] *= dyInv;
827     grads[3*n+2] *= dzInv;
828     lapl3[3*n+0] *= dxInv*dxInv;
829     lapl3[3*n+1] *= dyInv*dyInv;
830     lapl3[3*n+2] *= dzInv*dzInv;
831     lapl[n] = lapl3[3*n+0] + lapl3[3*n+1] + lapl3[3*n+2];
832   }
833   delete[] lapl3;
834 }
835 
836 
837 void
eval_multi_UBspline_3d_d_vghgh(const multi_UBspline_3d_d * spline,double x,double y,double z,double * restrict vals,double * restrict grads,double * restrict hess,double * restrict gradhess)838 eval_multi_UBspline_3d_d_vghgh (const multi_UBspline_3d_d *spline,
839                double x, double y, double z,
840                double* restrict vals,
841                double* restrict grads,
842                double* restrict hess,
843                double* restrict gradhess)
844 {
845   x -= spline->x_grid.start;
846   y -= spline->y_grid.start;
847   z -= spline->z_grid.start;
848   double ux = x*spline->x_grid.delta_inv;
849   double uy = y*spline->y_grid.delta_inv;
850   double uz = z*spline->z_grid.delta_inv;
851   double ipartx, iparty, ipartz, tx, ty, tz;
852   tx = std::modf (ux, &ipartx);  int ix = (int) ipartx;
853   ty = std::modf (uy, &iparty);  int iy = (int) iparty;
854   tz = std::modf (uz, &ipartz);  int iz = (int) ipartz;
855 
856   double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4],
857     da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4],
858     d3a[4], d3b[4], d3c[4];
859   tpx[0] = tx*tx*tx;  tpx[1] = tx*tx;  tpx[2] = tx;  tpx[3] = 1.0;
860   tpy[0] = ty*ty*ty;  tpy[1] = ty*ty;  tpy[2] = ty;  tpy[3] = 1.0;
861   tpz[0] = tz*tz*tz;  tpz[1] = tz*tz;  tpz[2] = tz;  tpz[3] = 1.0;
862 
863   a[0]  = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
864   a[1]  = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
865   a[2]  = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
866   a[3]  = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
867   da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]);
868   da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]);
869   da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]);
870   da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]);
871   d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]);
872   d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]);
873   d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]);
874   d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]);
875   d3a[0] = (/*d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] +*/ d3Ad[ 3]*tpx[3]);
876   d3a[1] = (/*d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] +*/ d3Ad[ 7]*tpx[3]);
877   d3a[2] = (/*d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] +*/ d3Ad[11]*tpx[3]);
878   d3a[3] = (/*d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] +*/ d3Ad[15]*tpx[3]);
879 
880   b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]);
881   b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]);
882   b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]);
883   b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]);
884   db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]);
885   db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]);
886   db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]);
887   db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]);
888   d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]);
889   d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]);
890   d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]);
891   d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]);
892   d3b[0] = (/*d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] +*/ d3Ad[ 3]*tpy[3]);
893   d3b[1] = (/*d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] +*/ d3Ad[ 7]*tpy[3]);
894   d3b[2] = (/*d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] +*/ d3Ad[11]*tpy[3]);
895   d3b[3] = (/*d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] +*/ d3Ad[15]*tpy[3]);
896 
897   c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]);
898   c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]);
899   c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]);
900   c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]);
901   dc[0] = (dAd[ 0]*tpz[0] + dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]);
902   dc[1] = (dAd[ 4]*tpz[0] + dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]);
903   dc[2] = (dAd[ 8]*tpz[0] + dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]);
904   dc[3] = (dAd[12]*tpz[0] + dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]);
905   d2c[0] = (d2Ad[ 0]*tpz[0] + d2Ad[ 1]*tpz[1] + d2Ad[ 2]*tpz[2] + d2Ad[ 3]*tpz[3]);
906   d2c[1] = (d2Ad[ 4]*tpz[0] + d2Ad[ 5]*tpz[1] + d2Ad[ 6]*tpz[2] + d2Ad[ 7]*tpz[3]);
907   d2c[2] = (d2Ad[ 8]*tpz[0] + d2Ad[ 9]*tpz[1] + d2Ad[10]*tpz[2] + d2Ad[11]*tpz[3]);
908   d2c[3] = (d2Ad[12]*tpz[0] + d2Ad[13]*tpz[1] + d2Ad[14]*tpz[2] + d2Ad[15]*tpz[3]);
909   d3c[0] = (/*d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] +*/ d3Ad[ 3]*tpz[3]);
910   d3c[1] = (/*d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] +*/ d3Ad[ 7]*tpz[3]);
911   d3c[2] = (/*d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] +*/ d3Ad[11]*tpz[3]);
912   d3c[3] = (/*d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] +*/ d3Ad[15]*tpz[3]);
913 
914   intptr_t xs = spline->x_stride;
915   intptr_t ys = spline->y_stride;
916   intptr_t zs = spline->z_stride;
917 
918   for (int n=0; n<spline->num_splines; n++) {
919     vals[n] = 0.0;
920     grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0;
921     for (int i=0; i<9; i++)
922       hess[9*n+i] = 0.0;
923     for (int i=0; i<27; i++)
924       gradhess[27*n+i] = 0.0;
925   }
926 
927   for (int i=0; i<4; i++)
928     for (int j=0; j<4; j++)
929       for (int k=0; k<4; k++) {
930    double abc = a[i]*b[j]*c[k];
931    double dabc[3], d2abc[6], d3abc[10];
932    dabc[0] = da[i]* b[j]* c[k];
933    dabc[1] =  a[i]*db[j]* c[k];
934    dabc[2] =  a[i]* b[j]*dc[k];
935    d2abc[0] = d2a[i]*  b[j]*  c[k];
936    d2abc[1] =  da[i]* db[j]*  c[k];
937    d2abc[2] =  da[i]*  b[j]* dc[k];
938    d2abc[3] =   a[i]*d2b[j]*  c[k];
939    d2abc[4] =   a[i]* db[j]* dc[k];
940    d2abc[5] =   a[i]*  b[j]*d2c[k];
941 
942    d3abc[0] = d3a[i]*  b[j]*  c[k];
943    d3abc[1] = d2a[i]* db[j]*  c[k];
944    d3abc[2] = d2a[i]*  b[j]* dc[k];
945    d3abc[3] =  da[i]*d2b[j]*  c[k];
946    d3abc[4] =  da[i]* db[j]* dc[k];
947    d3abc[5] =  da[i]*  b[j]*d2c[k];
948    d3abc[6] =   a[i]*d3b[j]*  c[k];
949    d3abc[7] =   a[i]*d2b[j]* dc[k];
950    d3abc[8] =   a[i]* db[j]*d2c[k];
951    d3abc[9] =   a[i]*  b[j]*d3c[k];
952 
953    double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs);
954    for (int n=0; n<spline->num_splines; n++) {
955      vals[n]      +=   abc   *coefs[n];
956      grads[3*n+0] +=  dabc[0]*coefs[n];
957      grads[3*n+1] +=  dabc[1]*coefs[n];
958      grads[3*n+2] +=  dabc[2]*coefs[n];
959      hess [9*n+0] += d2abc[0]*coefs[n];
960      hess [9*n+1] += d2abc[1]*coefs[n];
961      hess [9*n+2] += d2abc[2]*coefs[n];
962      hess [9*n+4] += d2abc[3]*coefs[n];
963      hess [9*n+5] += d2abc[4]*coefs[n];
964      hess [9*n+8] += d2abc[5]*coefs[n];
965 
966      gradhess [27*n+0 ] += d3abc[0]*coefs[n];
967      gradhess [27*n+1 ] += d3abc[1]*coefs[n];
968      gradhess [27*n+2 ] += d3abc[2]*coefs[n];
969      gradhess [27*n+4 ] += d3abc[3]*coefs[n];
970      gradhess [27*n+5 ] += d3abc[4]*coefs[n];
971      gradhess [27*n+8 ] += d3abc[5]*coefs[n];
972      gradhess [27*n+13] += d3abc[6]*coefs[n];
973      gradhess [27*n+14] += d3abc[7]*coefs[n];
974      gradhess [27*n+17] += d3abc[8]*coefs[n];
975      gradhess [27*n+26] += d3abc[9]*coefs[n];
976    }
977       }
978 
979   double dxInv = spline->x_grid.delta_inv;
980   double dyInv = spline->y_grid.delta_inv;
981   double dzInv = spline->z_grid.delta_inv;
982   for (int n=0; n<spline->num_splines; n++) {
983     grads[3*n+0] *= dxInv;
984     grads[3*n+1] *= dyInv;
985     grads[3*n+2] *= dzInv;
986     hess [9*n+0] *= dxInv*dxInv;
987     hess [9*n+4] *= dyInv*dyInv;
988     hess [9*n+8] *= dzInv*dzInv;
989     hess [9*n+1] *= dxInv*dyInv;
990     hess [9*n+2] *= dxInv*dzInv;
991     hess [9*n+5] *= dyInv*dzInv;
992     // Copy hessian elements into lower half of 3x3 matrix
993     hess [9*n+3] = hess[9*n+1];
994     hess [9*n+6] = hess[9*n+2];
995     hess [9*n+7] = hess[9*n+5];
996 
997     gradhess [27*n+0 ] *= dxInv*dxInv*dxInv;
998     gradhess [27*n+1 ] *= dxInv*dxInv*dyInv;
999     gradhess [27*n+2 ] *= dxInv*dxInv*dzInv;
1000     gradhess [27*n+4 ] *= dxInv*dyInv*dyInv;
1001     gradhess [27*n+5 ] *= dxInv*dyInv*dzInv;
1002     gradhess [27*n+8 ] *= dxInv*dzInv*dzInv;
1003     gradhess [27*n+13] *= dyInv*dyInv*dyInv;
1004     gradhess [27*n+14] *= dyInv*dyInv*dzInv;
1005     gradhess [27*n+17] *= dyInv*dzInv*dzInv;
1006     gradhess [27*n+26] *= dzInv*dzInv*dzInv;
1007 
1008     // Copy gradhess elements into rest of tensor
1009     gradhess [27*n+9  ] = gradhess [27*n+3  ] = gradhess [27*n+1 ];
1010     gradhess [27*n+18 ] = gradhess [27*n+6  ] = gradhess [27*n+2 ];
1011     gradhess [27*n+22 ] = gradhess [27*n+16 ] = gradhess [27*n+14];
1012     gradhess [27*n+12 ] = gradhess [27*n+10 ] = gradhess [27*n+4 ];
1013     gradhess [27*n+24 ] = gradhess [27*n+20 ] = gradhess [27*n+8 ];
1014     gradhess [27*n+25 ] = gradhess [27*n+23 ] = gradhess [27*n+17];
1015     gradhess [27*n+21 ] = gradhess [27*n+19 ] = gradhess [27*n+15] = gradhess [27*n+11 ] = gradhess [27*n+7 ] = gradhess [27*n+5];
1016 
1017   }
1018 }
1019