1 /////////////////////////////////////////////////////////////////////////////
2 // einspline: a library for creating and evaluating B-splines //
3 // Copyright (C) 2007 Kenneth P. Esler, Jr. //
4 // Released under the BSD-3-clause license //
5 /////////////////////////////////////////////////////////////////////////////
6
7 #include <cmath>
8 #include <algorithm>
9 #include "bspline_base.h"
10 #include "multi_bspline_structs.h"
11 #include "multi_bspline_eval_d.h"
12
13 extern const double* restrict Ad;
14 extern const double* restrict dAd;
15 extern const double* restrict d2Ad;
16 extern const double* restrict d3Ad;
17
18 /************************************************************/
19 /* 1D double-precision, real evaluation functions */
20 /************************************************************/
21 void
eval_multi_UBspline_1d_d(const multi_UBspline_1d_d * spline,double x,double * restrict vals)22 eval_multi_UBspline_1d_d (const multi_UBspline_1d_d *spline,
23 double x,
24 double* restrict vals)
25 {
26 x -= spline->x_grid.start;
27 double ux = x*spline->x_grid.delta_inv;
28 double ipartx, tx;
29 tx = std::modf (ux, &ipartx);
30 // This protects from overflow reads of coefs when x goes out of [start, end)
31 // in simulation systems with an open boundary condition.
32 // This protection has no effect on simulation systems with PBC and anti-PBC
33 // condition because x has been reduced to [start, end) before the call
34 // The protection is correct for PERIODIC/ANTIPERIODIC BC splines
35 // but not sufficient for NATURAL BC splines (need grid.num-2).
36 // With this protection ix is pulled to the boundary, tx is not modified.
37 // This is not correct but no more relevant to QMCPACK since
38 // we moved to C++ versions of this function already.
39 int ix = std::min(std::max(0,(int) ipartx),spline->x_grid.num-1);
40
41 double tpx[4], a[4];
42 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
43
44 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
45 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
46 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
47 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
48
49 intptr_t xs = spline->x_stride;
50
51 double* restrict coefs0 = spline->coefs + ((ix )*xs);
52 double* restrict coefs1 = spline->coefs + ((ix+1)*xs);
53 double* restrict coefs2 = spline->coefs + ((ix+2)*xs);
54 double* restrict coefs3 = spline->coefs + ((ix+3)*xs);
55
56 for (int n=0; n<spline->num_splines; n++)
57 vals[n] = a[0] * coefs0[n] + a[1] * coefs1[n] + a[2] * coefs2[n] + a[3] * coefs3[n];
58 }
59
60
61
62 void
eval_multi_UBspline_1d_d_vg(const multi_UBspline_1d_d * spline,double x,double * restrict vals,double * restrict grads)63 eval_multi_UBspline_1d_d_vg (const multi_UBspline_1d_d *spline,
64 double x,
65 double* restrict vals,
66 double* restrict grads)
67 {
68 x -= spline->x_grid.start;
69 double ux = x*spline->x_grid.delta_inv;
70 double ipartx, tx;
71 tx = std::modf (ux, &ipartx); int ix = std::min(std::max(0,(int) ipartx),spline->x_grid.num-1);
72
73 double tpx[4], a[4], da[4];
74 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
75
76 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
77 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
78 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
79 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
80 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]);
81 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]);
82 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]);
83 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]);
84
85 intptr_t xs = spline->x_stride;
86
87 for (int n=0; n<spline->num_splines; n++) {
88 vals[n] = 0.0;
89 grads[n] = 0.0;
90 }
91
92 for (int i=0; i<4; i++) {
93 double* restrict coefs = spline->coefs + ((ix+i)*xs);
94 for (int n=0; n<spline->num_splines; n++) {
95 vals[n] += a[i] * coefs[n];
96 grads[n] += da[i] * coefs[n];
97 }
98 }
99
100 double dxInv = spline->x_grid.delta_inv;
101 for (int n=0; n<spline->num_splines; n++)
102 grads[n] *= dxInv;
103 }
104
105
106 void
eval_multi_UBspline_1d_d_vgl(const multi_UBspline_1d_d * spline,double x,double * restrict vals,double * restrict grads,double * restrict lapl)107 eval_multi_UBspline_1d_d_vgl (const multi_UBspline_1d_d *spline,
108 double x,
109 double* restrict vals,
110 double* restrict grads,
111 double* restrict lapl)
112 {
113 x -= spline->x_grid.start;
114 double ux = x*spline->x_grid.delta_inv;
115 double ipartx, tx;
116 tx = std::modf (ux, &ipartx); int ix = std::min(std::max(0,(int) ipartx),spline->x_grid.num-1);
117
118 double tpx[4], a[4], da[4], d2a[4];
119 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
120
121 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
122 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
123 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
124 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
125 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]);
126 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]);
127 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]);
128 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]);
129 d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]);
130 d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]);
131 d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]);
132 d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]);
133
134 intptr_t xs = spline->x_stride;
135 double dxInv = spline->x_grid.delta_inv;
136
137 double* restrict coefs0 = spline->coefs + ((ix )*xs);
138 double* restrict coefs1 = spline->coefs + ((ix+1)*xs);
139 double* restrict coefs2 = spline->coefs + ((ix+2)*xs);
140 double* restrict coefs3 = spline->coefs + ((ix+3)*xs);
141
142 for (int n=0; n<spline->num_splines; n++)
143 {
144 vals[n] = a[0] * coefs0[n] + a[1] * coefs1[n] + a[2] * coefs2[n] + a[3] * coefs3[n];
145 grads[n] = (da[0] * coefs0[n] + da[1] * coefs1[n] + da[2] * coefs2[n] + da[3] * coefs3[n])*dxInv;
146 lapl[n] = (d2a[0] * coefs0[n] + d2a[1] * coefs1[n] + d2a[2] * coefs2[n] + d2a[3] * coefs3[n])*dxInv*dxInv;
147 }
148 }
149
150
151 void
eval_multi_UBspline_1d_d_vgh(const multi_UBspline_1d_d * spline,double x,double * restrict vals,double * restrict grads,double * restrict hess)152 eval_multi_UBspline_1d_d_vgh (const multi_UBspline_1d_d *spline,
153 double x,
154 double* restrict vals,
155 double* restrict grads,
156 double* restrict hess)
157 {
158 eval_multi_UBspline_1d_d_vgl (spline, x, vals, grads, hess);
159 }
160
161
162
163 /************************************************************/
164 /* 2D double-precision, real evaluation functions */
165 /************************************************************/
166 void
eval_multi_UBspline_2d_d(const multi_UBspline_2d_d * spline,double x,double y,double * restrict vals)167 eval_multi_UBspline_2d_d (const multi_UBspline_2d_d *spline,
168 double x, double y,
169 double* restrict vals)
170 {
171 x -= spline->x_grid.start;
172 y -= spline->y_grid.start;
173 double ux = x*spline->x_grid.delta_inv;
174 double uy = y*spline->y_grid.delta_inv;
175 double ipartx, iparty, tx, ty;
176 tx = std::modf (ux, &ipartx); int ix = (int) ipartx;
177 ty = std::modf (uy, &iparty); int iy = (int) iparty;
178
179 double tpx[4], tpy[4], a[4], b[4];
180 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
181 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0;
182
183 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
184 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
185 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
186 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
187
188 b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]);
189 b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]);
190 b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]);
191 b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]);
192
193 intptr_t xs = spline->x_stride;
194 intptr_t ys = spline->y_stride;
195
196 for (int n=0; n<spline->num_splines; n++)
197 vals[n] = 0.0;
198
199 for (int i=0; i<4; i++)
200 for (int j=0; j<4; j++) {
201 double prefactor = a[i]*b[j];
202 double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys);
203 for (int n=0; n<spline->num_splines; n++)
204 vals[n] += prefactor*coefs[n];
205 }
206 }
207
208
209 void
eval_multi_UBspline_2d_d_vg(const multi_UBspline_2d_d * spline,double x,double y,double * restrict vals,double * restrict grads)210 eval_multi_UBspline_2d_d_vg (const multi_UBspline_2d_d *spline,
211 double x, double y,
212 double* restrict vals,
213 double* restrict grads)
214 {
215 x -= spline->x_grid.start;
216 y -= spline->y_grid.start;
217 double ux = x*spline->x_grid.delta_inv;
218 double uy = y*spline->y_grid.delta_inv;
219 double ipartx, iparty, tx, ty;
220 tx = std::modf (ux, &ipartx); int ix = (int) ipartx;
221 ty = std::modf (uy, &iparty); int iy = (int) iparty;
222
223 double tpx[4], tpy[4], a[4], b[4], da[4], db[4];
224 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
225 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0;
226
227 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
228 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
229 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
230 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
231 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]);
232 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]);
233 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]);
234 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]);
235
236 b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]);
237 b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]);
238 b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]);
239 b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]);
240 db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]);
241 db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]);
242 db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]);
243 db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]);
244
245 intptr_t xs = spline->x_stride;
246 intptr_t ys = spline->y_stride;
247
248 for (int n=0; n<spline->num_splines; n++) {
249 vals[n] = 0.0;
250 grads[2*n+0] = grads[2*n+1] = grads[2*n+2] = 0.0;
251 }
252
253 for (int i=0; i<4; i++)
254 for (int j=0; j<4; j++) {
255 double ab = a[i]*b[j];
256 double dab[2];
257 dab[0] = da[i]* b[j];
258 dab[1] = a[i]*db[j];
259
260 double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys);
261 for (int n=0; n<spline->num_splines; n++) {
262 vals [n] += ab *coefs[n];
263 grads[2*n+0] += dab[0]*coefs[n];
264 grads[2*n+1] += dab[1]*coefs[n];
265 }
266 }
267
268 double dxInv = spline->x_grid.delta_inv;
269 double dyInv = spline->y_grid.delta_inv;
270 for (int n=0; n<spline->num_splines; n++) {
271 grads[2*n+0] *= dxInv;
272 grads[2*n+1] *= dyInv;
273 }
274 }
275
276 void
eval_multi_UBspline_2d_d_vgl(const multi_UBspline_2d_d * spline,double x,double y,double * restrict vals,double * restrict grads,double * restrict lapl)277 eval_multi_UBspline_2d_d_vgl (const multi_UBspline_2d_d *spline,
278 double x, double y,
279 double* restrict vals,
280 double* restrict grads,
281 double* restrict lapl)
282 {
283 x -= spline->x_grid.start;
284 y -= spline->y_grid.start;
285 double ux = x*spline->x_grid.delta_inv;
286 double uy = y*spline->y_grid.delta_inv;
287 double ipartx, iparty, tx, ty;
288 tx = std::modf (ux, &ipartx); int ix = (int) ipartx;
289 ty = std::modf (uy, &iparty); int iy = (int) iparty;
290
291 double tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4];
292 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
293 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0;
294
295 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
296 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
297 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
298 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
299 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]);
300 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]);
301 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]);
302 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]);
303 d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]);
304 d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]);
305 d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]);
306 d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]);
307
308 b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]);
309 b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]);
310 b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]);
311 b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]);
312 db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]);
313 db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]);
314 db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]);
315 db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]);
316 d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]);
317 d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]);
318 d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]);
319 d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]);
320
321 intptr_t xs = spline->x_stride;
322 intptr_t ys = spline->y_stride;
323
324 double* lapl2 = new double[2*spline->num_splines];
325 for (int n=0; n<spline->num_splines; n++) {
326 vals[n] = 0.0;
327 grads[2*n+0] = grads[2*n+1] = 0.0;
328 lapl2[2*n+0] = lapl2[2*n+1] = 0.0;
329 }
330
331 for (int i=0; i<4; i++)
332 for (int j=0; j<4; j++) {
333 double ab = a[i]*b[j];
334 double dab[2], d2ab[2];
335 dab[0] = da[i]* b[j];
336 dab[1] = a[i]*db[j];
337 d2ab[0] = d2a[i]* b[j];
338 d2ab[1] = a[i]*d2b[j];
339
340 double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys);
341 for (int n=0; n<spline->num_splines; n++) {
342 vals[n] += ab *coefs[n];
343 grads[2*n+0] += dab[0]*coefs[n];
344 grads[2*n+1] += dab[1]*coefs[n];
345 lapl2[2*n+0] += d2ab[0]*coefs[n];
346 lapl2[2*n+1] += d2ab[1]*coefs[n];
347 }
348 }
349
350 double dxInv = spline->x_grid.delta_inv;
351 double dyInv = spline->y_grid.delta_inv;
352 for (int n=0; n<spline->num_splines; n++) {
353 grads[2*n+0] *= dxInv;
354 grads[2*n+1] *= dyInv;
355 lapl2[2*n+0] *= dxInv*dxInv;
356 lapl2[2*n+1] *= dyInv*dyInv;
357 lapl[n] = lapl2[2*n+0] + lapl2[2*n+1];
358 }
359 delete[] lapl2;
360 }
361
362
363
364
365 void
eval_multi_UBspline_2d_d_vgh(const multi_UBspline_2d_d * spline,double x,double y,double * restrict vals,double * restrict grads,double * restrict hess)366 eval_multi_UBspline_2d_d_vgh (const multi_UBspline_2d_d *spline,
367 double x, double y,
368 double* restrict vals,
369 double* restrict grads,
370 double* restrict hess)
371 {
372 x -= spline->x_grid.start;
373 y -= spline->y_grid.start;
374 double ux = x*spline->x_grid.delta_inv;
375 double uy = y*spline->y_grid.delta_inv;
376 double ipartx, iparty, tx, ty;
377 tx = std::modf (ux, &ipartx); int ix = (int) ipartx;
378 ty = std::modf (uy, &iparty); int iy = (int) iparty;
379
380 double tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4];
381 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
382 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0;
383
384 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
385 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
386 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
387 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
388 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]);
389 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]);
390 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]);
391 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]);
392 d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]);
393 d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]);
394 d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]);
395 d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]);
396
397 b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]);
398 b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]);
399 b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]);
400 b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]);
401 db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]);
402 db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]);
403 db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]);
404 db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]);
405 d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]);
406 d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]);
407 d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]);
408 d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]);
409
410 intptr_t xs = spline->x_stride;
411 intptr_t ys = spline->y_stride;
412
413 for (int n=0; n<spline->num_splines; n++) {
414 vals[n] = 0.0;
415 grads[2*n+0] = grads[2*n+1] = 0.0;
416 for (int i=0; i<4; i++)
417 hess[4*n+i] = 0.0;
418 }
419
420 for (int i=0; i<4; i++)
421 for (int j=0; j<4; j++){
422 double ab = a[i]*b[j];
423 double dab[2], d2ab[3];
424 dab[0] = da[i]* b[j];
425 dab[1] = a[i]*db[j];
426 d2ab[0] = d2a[i] * b[j];
427 d2ab[1] = da[i] * db[j];
428 d2ab[2] = a[i] * d2b[j];
429
430 double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys);
431 for (int n=0; n<spline->num_splines; n++) {
432 vals[n] += ab *coefs[n];
433 grads[2*n+0] += dab[0]*coefs[n];
434 grads[2*n+1] += dab[1]*coefs[n];
435 hess [4*n+0] += d2ab[0]*coefs[n];
436 hess [4*n+1] += d2ab[1]*coefs[n];
437 hess [4*n+3] += d2ab[2]*coefs[n];
438 }
439 }
440
441 double dxInv = spline->x_grid.delta_inv;
442 double dyInv = spline->y_grid.delta_inv;
443 for (int n=0; n<spline->num_splines; n++) {
444 grads[2*n+0] *= dxInv;
445 grads[2*n+1] *= dyInv;
446 hess[4*n+0] *= dxInv*dxInv;
447 hess[4*n+1] *= dxInv*dyInv;
448 hess[4*n+3] *= dyInv*dyInv;
449 // Copy hessian elements into lower half of 3x3 matrix
450 hess[4*n+2] = hess[4*n+1];
451 }
452 }
453
454
455
456
457 /************************************************************/
458 /* 3D double-precision, real evaluation functions */
459 /************************************************************/
460 void
eval_multi_UBspline_3d_d(const multi_UBspline_3d_d * spline,double x,double y,double z,double * restrict vals)461 eval_multi_UBspline_3d_d (const multi_UBspline_3d_d *spline,
462 double x, double y, double z,
463 double* restrict vals)
464 {
465 // algorithm modified by Ye Luo, Mar. 12th 2015
466 x -= spline->x_grid.start;
467 y -= spline->y_grid.start;
468 z -= spline->z_grid.start;
469 double ux = x*spline->x_grid.delta_inv;
470 double uy = y*spline->y_grid.delta_inv;
471 double uz = z*spline->z_grid.delta_inv;
472 double ipartx, iparty, ipartz, tx, ty, tz;
473 tx = std::modf (ux, &ipartx); int ix = std::min(std::max(0,(int) ipartx),spline->x_grid.num-1);
474 ty = std::modf (uy, &iparty); int iy = std::min(std::max(0,(int) iparty),spline->y_grid.num-1);
475 tz = std::modf (uz, &ipartz); int iz = std::min(std::max(0,(int) ipartz),spline->z_grid.num-1);
476
477 double a[4], b[4], c[4];
478
479 a[0] = ( ( Ad[0] * tx + Ad[1] ) * tx + Ad[2] ) * tx + Ad[3];
480 a[1] = ( ( Ad[4] * tx + Ad[5] ) * tx + Ad[6] ) * tx + Ad[7];
481 a[2] = ( ( Ad[8] * tx + Ad[9] ) * tx + Ad[10] ) * tx + Ad[11];
482 a[3] = ( ( Ad[12] * tx + Ad[13] ) * tx + Ad[14] ) * tx + Ad[15];
483
484 b[0] = ( ( Ad[0] * ty + Ad[1] ) * ty + Ad[2] ) * ty + Ad[3];
485 b[1] = ( ( Ad[4] * ty + Ad[5] ) * ty + Ad[6] ) * ty + Ad[7];
486 b[2] = ( ( Ad[8] * ty + Ad[9] ) * ty + Ad[10] ) * ty + Ad[11];
487 b[3] = ( ( Ad[12] * ty + Ad[13] ) * ty + Ad[14] ) * ty + Ad[15];
488
489 c[0] = ( ( Ad[0] * tz + Ad[1] ) * tz + Ad[2] ) * tz + Ad[3];
490 c[1] = ( ( Ad[4] * tz + Ad[5] ) * tz + Ad[6] ) * tz + Ad[7];
491 c[2] = ( ( Ad[8] * tz + Ad[9] ) * tz + Ad[10] ) * tz + Ad[11];
492 c[3] = ( ( Ad[12] * tz + Ad[13] ) * tz + Ad[14] ) * tz + Ad[15];
493
494 intptr_t xs = spline->x_stride;
495 intptr_t ys = spline->y_stride;
496 intptr_t zs = spline->z_stride;
497
498 for (int n=0; n<spline->num_splines; n++)
499 vals[n] = 0.0;
500
501 for (int i=0; i<4; i++)
502 for (int j=0; j<4; j++){
503 double pre00 = a[i]*b[j];
504 double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + iz*zs);
505 for(int n=0; n<spline->num_splines; n++)
506 vals[n] += pre00*(c[0]*coefs[n] + c[1]*coefs[n+zs] + c[2]*coefs[n+2*zs] + c[3]*coefs[n+3*zs]);
507 }
508 }
509
510 void
eval_multi_UBspline_3d_d_vgh(const multi_UBspline_3d_d * spline,double x,double y,double z,double * restrict vals,double * restrict grads,double * restrict hess)511 eval_multi_UBspline_3d_d_vgh (const multi_UBspline_3d_d *spline,
512 double x, double y, double z,
513 double* restrict vals,
514 double* restrict grads,
515 double* restrict hess)
516 {
517 // algorithm modified by Ye Luo, Mar. 12th 2015
518 x -= spline->x_grid.start;
519 y -= spline->y_grid.start;
520 z -= spline->z_grid.start;
521 double ux = x*spline->x_grid.delta_inv;
522 double uy = y*spline->y_grid.delta_inv;
523 double uz = z*spline->z_grid.delta_inv;
524 double ipartx, iparty, ipartz, tx, ty, tz;
525
526 tx = std::modf (ux, &ipartx); int ix = std::min(std::max(0,(int) ipartx),spline->x_grid.num-1);
527 ty = std::modf (uy, &iparty); int iy = std::min(std::max(0,(int) iparty),spline->y_grid.num-1);
528 tz = std::modf (uz, &ipartz); int iz = std::min(std::max(0,(int) ipartz),spline->z_grid.num-1);
529
530 double a[4], b[4], c[4],da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4];
531
532 a[0] = ( ( Ad[0] * tx + Ad[1] ) * tx + Ad[2] ) * tx + Ad[3];
533 a[1] = ( ( Ad[4] * tx + Ad[5] ) * tx + Ad[6] ) * tx + Ad[7];
534 a[2] = ( ( Ad[8] * tx + Ad[9] ) * tx + Ad[10] ) * tx + Ad[11];
535 a[3] = ( ( Ad[12] * tx + Ad[13] ) * tx + Ad[14] ) * tx + Ad[15];
536 da[0] = ( ( dAd[0] * tx + dAd[1] ) * tx + dAd[2] ) * tx + dAd[3];
537 da[1] = ( ( dAd[4] * tx + dAd[5] ) * tx + dAd[6] ) * tx + dAd[7];
538 da[2] = ( ( dAd[8] * tx + dAd[9] ) * tx + dAd[10] ) * tx + dAd[11];
539 da[3] = ( ( dAd[12] * tx + dAd[13] ) * tx + dAd[14] ) * tx + dAd[15];
540 d2a[0] = ( ( d2Ad[0] * tx + d2Ad[1] ) * tx + d2Ad[2] ) * tx + d2Ad[3];
541 d2a[1] = ( ( d2Ad[4] * tx + d2Ad[5] ) * tx + d2Ad[6] ) * tx + d2Ad[7];
542 d2a[2] = ( ( d2Ad[8] * tx + d2Ad[9] ) * tx + d2Ad[10] ) * tx + d2Ad[11];
543 d2a[3] = ( ( d2Ad[12] * tx + d2Ad[13] ) * tx + d2Ad[14] ) * tx + d2Ad[15];
544
545 b[0] = ( ( Ad[0] * ty + Ad[1] ) * ty + Ad[2] ) * ty + Ad[3];
546 b[1] = ( ( Ad[4] * ty + Ad[5] ) * ty + Ad[6] ) * ty + Ad[7];
547 b[2] = ( ( Ad[8] * ty + Ad[9] ) * ty + Ad[10] ) * ty + Ad[11];
548 b[3] = ( ( Ad[12] * ty + Ad[13] ) * ty + Ad[14] ) * ty + Ad[15];
549 db[0] = ( ( dAd[0] * ty + dAd[1] ) * ty + dAd[2] ) * ty + dAd[3];
550 db[1] = ( ( dAd[4] * ty + dAd[5] ) * ty + dAd[6] ) * ty + dAd[7];
551 db[2] = ( ( dAd[8] * ty + dAd[9] ) * ty + dAd[10] ) * ty + dAd[11];
552 db[3] = ( ( dAd[12] * ty + dAd[13] ) * ty + dAd[14] ) * ty + dAd[15];
553 d2b[0] = ( ( d2Ad[0] * ty + d2Ad[1] ) * ty + d2Ad[2] ) * ty + d2Ad[3];
554 d2b[1] = ( ( d2Ad[4] * ty + d2Ad[5] ) * ty + d2Ad[6] ) * ty + d2Ad[7];
555 d2b[2] = ( ( d2Ad[8] * ty + d2Ad[9] ) * ty + d2Ad[10] ) * ty + d2Ad[11];
556 d2b[3] = ( ( d2Ad[12] * ty + d2Ad[13] ) * ty + d2Ad[14] ) * ty + d2Ad[15];
557
558 c[0] = ( ( Ad[0] * tz + Ad[1] ) * tz + Ad[2] ) * tz + Ad[3];
559 c[1] = ( ( Ad[4] * tz + Ad[5] ) * tz + Ad[6] ) * tz + Ad[7];
560 c[2] = ( ( Ad[8] * tz + Ad[9] ) * tz + Ad[10] ) * tz + Ad[11];
561 c[3] = ( ( Ad[12] * tz + Ad[13] ) * tz + Ad[14] ) * tz + Ad[15];
562 dc[0] = ( ( dAd[0] * tz + dAd[1] ) * tz + dAd[2] ) * tz + dAd[3];
563 dc[1] = ( ( dAd[4] * tz + dAd[5] ) * tz + dAd[6] ) * tz + dAd[7];
564 dc[2] = ( ( dAd[8] * tz + dAd[9] ) * tz + dAd[10] ) * tz + dAd[11];
565 dc[3] = ( ( dAd[12] * tz + dAd[13] ) * tz + dAd[14] ) * tz + dAd[15];
566 d2c[0] = ( ( d2Ad[0] * tz + d2Ad[1] ) * tz + d2Ad[2] ) * tz + d2Ad[3];
567 d2c[1] = ( ( d2Ad[4] * tz + d2Ad[5] ) * tz + d2Ad[6] ) * tz + d2Ad[7];
568 d2c[2] = ( ( d2Ad[8] * tz + d2Ad[9] ) * tz + d2Ad[10] ) * tz + d2Ad[11];
569 d2c[3] = ( ( d2Ad[12] * tz + d2Ad[13] ) * tz + d2Ad[14] ) * tz + d2Ad[15];
570
571 intptr_t xs = spline->x_stride;
572 intptr_t ys = spline->y_stride;
573 intptr_t zs = spline->z_stride;
574
575 for (int n=0; n<spline->num_splines; n++) {
576 vals[n] = 0.0;
577 grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0;
578 for (int i=0; i<9; i++)
579 hess[9*n+i] = 0.0;
580 }
581
582 for (int i=0; i<4; i++)
583 for (int j=0; j<4; j++){
584 double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + iz*zs);
585
586 double pre20 = d2a[i]* b[j];
587 double pre10 = da[i]* b[j];
588 double pre00 = a[i]* b[j];
589 double pre11 = da[i]* db[j];
590 double pre01 = a[i]* db[j];
591 double pre02 = a[i]*d2b[j];
592
593 for (int n=0; n<spline->num_splines; n++) {
594 double sum0 = c[0] * coefs[n] + c[1] * coefs[n+zs] + c[2] * coefs[n+2*zs] + c[3] * coefs[n+3*zs];
595 double sum1 = dc[0] * coefs[n] + dc[1] * coefs[n+zs] + dc[2] * coefs[n+2*zs] + dc[3] * coefs[n+3*zs];
596 double sum2 = d2c[0] * coefs[n] + d2c[1] * coefs[n+zs] + d2c[2] * coefs[n+2*zs] + d2c[3] * coefs[n+3*zs];
597 hess [9*n ] += pre20 * sum0;
598 hess [9*n+1] += pre11 * sum0;
599 hess [9*n+2] += pre10 * sum1;
600 hess [9*n+4] += pre02 * sum0;
601 hess [9*n+5] += pre01 * sum1;
602 hess [9*n+8] += pre00 * sum2;
603 grads[3*n ] += pre10 * sum0;
604 grads[3*n+1] += pre01 * sum0;
605 grads[3*n+2] += pre00 * sum1;
606 vals[n] += pre00 * sum0;
607 }
608 }
609
610 double dxInv = spline->x_grid.delta_inv;
611 double dyInv = spline->y_grid.delta_inv;
612 double dzInv = spline->z_grid.delta_inv;
613
614 for (int n=0; n<spline->num_splines; n++) {
615
616
617 grads[3*n+0] *= dxInv;
618 grads[3*n+1] *= dyInv;
619 grads[3*n+2] *= dzInv;
620 hess [9*n+0] *= dxInv*dxInv;
621 hess [9*n+4] *= dyInv*dyInv;
622 hess [9*n+8] *= dzInv*dzInv;
623 hess [9*n+1] *= dxInv*dyInv;
624 hess [9*n+2] *= dxInv*dzInv;
625 hess [9*n+5] *= dyInv*dzInv;
626 // Copy hessian elements into lower half of 3x3 matrix
627 hess [9*n+3] = hess[9*n+1];
628 hess [9*n+6] = hess[9*n+2];
629 hess [9*n+7] = hess[9*n+5];
630 }
631 }
632
633
634 void
eval_multi_UBspline_3d_d_vg(const multi_UBspline_3d_d * spline,double x,double y,double z,double * restrict vals,double * restrict grads)635 eval_multi_UBspline_3d_d_vg (const multi_UBspline_3d_d *spline,
636 double x, double y, double z,
637 double* restrict vals,
638 double* restrict grads)
639 {
640 x -= spline->x_grid.start;
641 y -= spline->y_grid.start;
642 z -= spline->z_grid.start;
643 double ux = x*spline->x_grid.delta_inv;
644 double uy = y*spline->y_grid.delta_inv;
645 double uz = z*spline->z_grid.delta_inv;
646 double ipartx, iparty, ipartz, tx, ty, tz;
647 tx = std::modf (ux, &ipartx); int ix = (int) ipartx;
648 ty = std::modf (uy, &iparty); int iy = (int) iparty;
649 tz = std::modf (uz, &ipartz); int iz = (int) ipartz;
650
651 double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4],
652 da[4], db[4], dc[4];
653 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
654 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0;
655 tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0;
656
657 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
658 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
659 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
660 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
661 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]);
662 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]);
663 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]);
664 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]);
665
666 b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]);
667 b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]);
668 b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]);
669 b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]);
670 db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]);
671 db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]);
672 db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]);
673 db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]);
674
675 c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]);
676 c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]);
677 c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]);
678 c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]);
679 dc[0] = (dAd[ 0]*tpz[0] + dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]);
680 dc[1] = (dAd[ 4]*tpz[0] + dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]);
681 dc[2] = (dAd[ 8]*tpz[0] + dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]);
682 dc[3] = (dAd[12]*tpz[0] + dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]);
683
684 intptr_t xs = spline->x_stride;
685 intptr_t ys = spline->y_stride;
686 intptr_t zs = spline->z_stride;
687
688 for (int n=0; n<spline->num_splines; n++) {
689 vals[n] = 0.0;
690 grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0;
691 }
692
693 for (int i=0; i<4; i++)
694 for (int j=0; j<4; j++)
695 for (int k=0; k<4; k++) {
696 double abc = a[i]*b[j]*c[k];
697 double dabc[3];
698 dabc[0] = da[i]* b[j]* c[k];
699 dabc[1] = a[i]*db[j]* c[k];
700 dabc[2] = a[i]* b[j]*dc[k];
701
702 double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs);
703 for (int n=0; n<spline->num_splines; n++) {
704 vals[n] += abc *coefs[n];
705 grads[3*n+0] += dabc[0]*coefs[n];
706 grads[3*n+1] += dabc[1]*coefs[n];
707 grads[3*n+2] += dabc[2]*coefs[n];
708 }
709 }
710
711 double dxInv = spline->x_grid.delta_inv;
712 double dyInv = spline->y_grid.delta_inv;
713 double dzInv = spline->z_grid.delta_inv;
714 for (int n=0; n<spline->num_splines; n++) {
715 grads[3*n+0] *= dxInv;
716 grads[3*n+1] *= dyInv;
717 grads[3*n+2] *= dzInv;
718 }
719 }
720
721
722
723 void
eval_multi_UBspline_3d_d_vgl(const multi_UBspline_3d_d * spline,double x,double y,double z,double * restrict vals,double * restrict grads,double * restrict lapl)724 eval_multi_UBspline_3d_d_vgl (const multi_UBspline_3d_d *spline,
725 double x, double y, double z,
726 double* restrict vals,
727 double* restrict grads,
728 double* restrict lapl)
729 {
730 x -= spline->x_grid.start;
731 y -= spline->y_grid.start;
732 z -= spline->z_grid.start;
733 double ux = x*spline->x_grid.delta_inv;
734 double uy = y*spline->y_grid.delta_inv;
735 double uz = z*spline->z_grid.delta_inv;
736 double ipartx, iparty, ipartz, tx, ty, tz;
737 tx = std::modf (ux, &ipartx); int ix = std::min(std::max(0,(int) ipartx),spline->x_grid.num-1);
738 ty = std::modf (uy, &iparty); int iy = std::min(std::max(0,(int) iparty),spline->y_grid.num-1);
739 tz = std::modf (uz, &ipartz); int iz = std::min(std::max(0,(int) ipartz),spline->z_grid.num-1);
740
741 double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4],
742 da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4];
743 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
744 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0;
745 tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0;
746
747 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
748 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
749 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
750 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
751 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]);
752 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]);
753 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]);
754 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]);
755 d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]);
756 d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]);
757 d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]);
758 d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]);
759
760 b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]);
761 b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]);
762 b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]);
763 b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]);
764 db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]);
765 db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]);
766 db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]);
767 db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]);
768 d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]);
769 d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]);
770 d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]);
771 d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]);
772
773 c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]);
774 c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]);
775 c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]);
776 c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]);
777 dc[0] = (dAd[ 0]*tpz[0] + dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]);
778 dc[1] = (dAd[ 4]*tpz[0] + dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]);
779 dc[2] = (dAd[ 8]*tpz[0] + dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]);
780 dc[3] = (dAd[12]*tpz[0] + dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]);
781 d2c[0] = (d2Ad[ 0]*tpz[0] + d2Ad[ 1]*tpz[1] + d2Ad[ 2]*tpz[2] + d2Ad[ 3]*tpz[3]);
782 d2c[1] = (d2Ad[ 4]*tpz[0] + d2Ad[ 5]*tpz[1] + d2Ad[ 6]*tpz[2] + d2Ad[ 7]*tpz[3]);
783 d2c[2] = (d2Ad[ 8]*tpz[0] + d2Ad[ 9]*tpz[1] + d2Ad[10]*tpz[2] + d2Ad[11]*tpz[3]);
784 d2c[3] = (d2Ad[12]*tpz[0] + d2Ad[13]*tpz[1] + d2Ad[14]*tpz[2] + d2Ad[15]*tpz[3]);
785
786 intptr_t xs = spline->x_stride;
787 intptr_t ys = spline->y_stride;
788 intptr_t zs = spline->z_stride;
789
790 double* lapl3 = new double[3*spline->num_splines];
791 for (int n=0; n<spline->num_splines; n++) {
792 vals[n] = 0.0;
793 grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0;
794 lapl3[3*n+0] = lapl3[3*n+1] = lapl3[3*n+2] = 0.0;
795 }
796
797 for (int i=0; i<4; i++)
798 for (int j=0; j<4; j++)
799 for (int k=0; k<4; k++) {
800 double abc = a[i]*b[j]*c[k];
801 double dabc[3], d2abc[3];
802 dabc[0] = da[i]* b[j]* c[k];
803 dabc[1] = a[i]*db[j]* c[k];
804 dabc[2] = a[i]* b[j]*dc[k];
805 d2abc[0] = d2a[i]* b[j]* c[k];
806 d2abc[1] = a[i]*d2b[j]* c[k];
807 d2abc[2] = a[i]* b[j]*d2c[k];
808
809 double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs);
810 for (int n=0; n<spline->num_splines; n++) {
811 vals[n] += abc *coefs[n];
812 grads[3*n+0] += dabc[0]*coefs[n];
813 grads[3*n+1] += dabc[1]*coefs[n];
814 grads[3*n+2] += dabc[2]*coefs[n];
815 lapl3[3*n+0] += d2abc[0]*coefs[n];
816 lapl3[3*n+1] += d2abc[1]*coefs[n];
817 lapl3[3*n+2] += d2abc[2]*coefs[n];
818 }
819 }
820
821 double dxInv = spline->x_grid.delta_inv;
822 double dyInv = spline->y_grid.delta_inv;
823 double dzInv = spline->z_grid.delta_inv;
824 for (int n=0; n<spline->num_splines; n++) {
825 grads[3*n+0] *= dxInv;
826 grads[3*n+1] *= dyInv;
827 grads[3*n+2] *= dzInv;
828 lapl3[3*n+0] *= dxInv*dxInv;
829 lapl3[3*n+1] *= dyInv*dyInv;
830 lapl3[3*n+2] *= dzInv*dzInv;
831 lapl[n] = lapl3[3*n+0] + lapl3[3*n+1] + lapl3[3*n+2];
832 }
833 delete[] lapl3;
834 }
835
836
837 void
eval_multi_UBspline_3d_d_vghgh(const multi_UBspline_3d_d * spline,double x,double y,double z,double * restrict vals,double * restrict grads,double * restrict hess,double * restrict gradhess)838 eval_multi_UBspline_3d_d_vghgh (const multi_UBspline_3d_d *spline,
839 double x, double y, double z,
840 double* restrict vals,
841 double* restrict grads,
842 double* restrict hess,
843 double* restrict gradhess)
844 {
845 x -= spline->x_grid.start;
846 y -= spline->y_grid.start;
847 z -= spline->z_grid.start;
848 double ux = x*spline->x_grid.delta_inv;
849 double uy = y*spline->y_grid.delta_inv;
850 double uz = z*spline->z_grid.delta_inv;
851 double ipartx, iparty, ipartz, tx, ty, tz;
852 tx = std::modf (ux, &ipartx); int ix = (int) ipartx;
853 ty = std::modf (uy, &iparty); int iy = (int) iparty;
854 tz = std::modf (uz, &ipartz); int iz = (int) ipartz;
855
856 double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4],
857 da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4],
858 d3a[4], d3b[4], d3c[4];
859 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
860 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0;
861 tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0;
862
863 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
864 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
865 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
866 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
867 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]);
868 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]);
869 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]);
870 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]);
871 d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]);
872 d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]);
873 d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]);
874 d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]);
875 d3a[0] = (/*d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] +*/ d3Ad[ 3]*tpx[3]);
876 d3a[1] = (/*d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] +*/ d3Ad[ 7]*tpx[3]);
877 d3a[2] = (/*d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] +*/ d3Ad[11]*tpx[3]);
878 d3a[3] = (/*d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] +*/ d3Ad[15]*tpx[3]);
879
880 b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]);
881 b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]);
882 b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]);
883 b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]);
884 db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]);
885 db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]);
886 db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]);
887 db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]);
888 d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]);
889 d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]);
890 d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]);
891 d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]);
892 d3b[0] = (/*d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] +*/ d3Ad[ 3]*tpy[3]);
893 d3b[1] = (/*d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] +*/ d3Ad[ 7]*tpy[3]);
894 d3b[2] = (/*d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] +*/ d3Ad[11]*tpy[3]);
895 d3b[3] = (/*d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] +*/ d3Ad[15]*tpy[3]);
896
897 c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]);
898 c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]);
899 c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]);
900 c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]);
901 dc[0] = (dAd[ 0]*tpz[0] + dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]);
902 dc[1] = (dAd[ 4]*tpz[0] + dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]);
903 dc[2] = (dAd[ 8]*tpz[0] + dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]);
904 dc[3] = (dAd[12]*tpz[0] + dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]);
905 d2c[0] = (d2Ad[ 0]*tpz[0] + d2Ad[ 1]*tpz[1] + d2Ad[ 2]*tpz[2] + d2Ad[ 3]*tpz[3]);
906 d2c[1] = (d2Ad[ 4]*tpz[0] + d2Ad[ 5]*tpz[1] + d2Ad[ 6]*tpz[2] + d2Ad[ 7]*tpz[3]);
907 d2c[2] = (d2Ad[ 8]*tpz[0] + d2Ad[ 9]*tpz[1] + d2Ad[10]*tpz[2] + d2Ad[11]*tpz[3]);
908 d2c[3] = (d2Ad[12]*tpz[0] + d2Ad[13]*tpz[1] + d2Ad[14]*tpz[2] + d2Ad[15]*tpz[3]);
909 d3c[0] = (/*d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] +*/ d3Ad[ 3]*tpz[3]);
910 d3c[1] = (/*d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] +*/ d3Ad[ 7]*tpz[3]);
911 d3c[2] = (/*d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] +*/ d3Ad[11]*tpz[3]);
912 d3c[3] = (/*d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] +*/ d3Ad[15]*tpz[3]);
913
914 intptr_t xs = spline->x_stride;
915 intptr_t ys = spline->y_stride;
916 intptr_t zs = spline->z_stride;
917
918 for (int n=0; n<spline->num_splines; n++) {
919 vals[n] = 0.0;
920 grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0;
921 for (int i=0; i<9; i++)
922 hess[9*n+i] = 0.0;
923 for (int i=0; i<27; i++)
924 gradhess[27*n+i] = 0.0;
925 }
926
927 for (int i=0; i<4; i++)
928 for (int j=0; j<4; j++)
929 for (int k=0; k<4; k++) {
930 double abc = a[i]*b[j]*c[k];
931 double dabc[3], d2abc[6], d3abc[10];
932 dabc[0] = da[i]* b[j]* c[k];
933 dabc[1] = a[i]*db[j]* c[k];
934 dabc[2] = a[i]* b[j]*dc[k];
935 d2abc[0] = d2a[i]* b[j]* c[k];
936 d2abc[1] = da[i]* db[j]* c[k];
937 d2abc[2] = da[i]* b[j]* dc[k];
938 d2abc[3] = a[i]*d2b[j]* c[k];
939 d2abc[4] = a[i]* db[j]* dc[k];
940 d2abc[5] = a[i]* b[j]*d2c[k];
941
942 d3abc[0] = d3a[i]* b[j]* c[k];
943 d3abc[1] = d2a[i]* db[j]* c[k];
944 d3abc[2] = d2a[i]* b[j]* dc[k];
945 d3abc[3] = da[i]*d2b[j]* c[k];
946 d3abc[4] = da[i]* db[j]* dc[k];
947 d3abc[5] = da[i]* b[j]*d2c[k];
948 d3abc[6] = a[i]*d3b[j]* c[k];
949 d3abc[7] = a[i]*d2b[j]* dc[k];
950 d3abc[8] = a[i]* db[j]*d2c[k];
951 d3abc[9] = a[i]* b[j]*d3c[k];
952
953 double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs);
954 for (int n=0; n<spline->num_splines; n++) {
955 vals[n] += abc *coefs[n];
956 grads[3*n+0] += dabc[0]*coefs[n];
957 grads[3*n+1] += dabc[1]*coefs[n];
958 grads[3*n+2] += dabc[2]*coefs[n];
959 hess [9*n+0] += d2abc[0]*coefs[n];
960 hess [9*n+1] += d2abc[1]*coefs[n];
961 hess [9*n+2] += d2abc[2]*coefs[n];
962 hess [9*n+4] += d2abc[3]*coefs[n];
963 hess [9*n+5] += d2abc[4]*coefs[n];
964 hess [9*n+8] += d2abc[5]*coefs[n];
965
966 gradhess [27*n+0 ] += d3abc[0]*coefs[n];
967 gradhess [27*n+1 ] += d3abc[1]*coefs[n];
968 gradhess [27*n+2 ] += d3abc[2]*coefs[n];
969 gradhess [27*n+4 ] += d3abc[3]*coefs[n];
970 gradhess [27*n+5 ] += d3abc[4]*coefs[n];
971 gradhess [27*n+8 ] += d3abc[5]*coefs[n];
972 gradhess [27*n+13] += d3abc[6]*coefs[n];
973 gradhess [27*n+14] += d3abc[7]*coefs[n];
974 gradhess [27*n+17] += d3abc[8]*coefs[n];
975 gradhess [27*n+26] += d3abc[9]*coefs[n];
976 }
977 }
978
979 double dxInv = spline->x_grid.delta_inv;
980 double dyInv = spline->y_grid.delta_inv;
981 double dzInv = spline->z_grid.delta_inv;
982 for (int n=0; n<spline->num_splines; n++) {
983 grads[3*n+0] *= dxInv;
984 grads[3*n+1] *= dyInv;
985 grads[3*n+2] *= dzInv;
986 hess [9*n+0] *= dxInv*dxInv;
987 hess [9*n+4] *= dyInv*dyInv;
988 hess [9*n+8] *= dzInv*dzInv;
989 hess [9*n+1] *= dxInv*dyInv;
990 hess [9*n+2] *= dxInv*dzInv;
991 hess [9*n+5] *= dyInv*dzInv;
992 // Copy hessian elements into lower half of 3x3 matrix
993 hess [9*n+3] = hess[9*n+1];
994 hess [9*n+6] = hess[9*n+2];
995 hess [9*n+7] = hess[9*n+5];
996
997 gradhess [27*n+0 ] *= dxInv*dxInv*dxInv;
998 gradhess [27*n+1 ] *= dxInv*dxInv*dyInv;
999 gradhess [27*n+2 ] *= dxInv*dxInv*dzInv;
1000 gradhess [27*n+4 ] *= dxInv*dyInv*dyInv;
1001 gradhess [27*n+5 ] *= dxInv*dyInv*dzInv;
1002 gradhess [27*n+8 ] *= dxInv*dzInv*dzInv;
1003 gradhess [27*n+13] *= dyInv*dyInv*dyInv;
1004 gradhess [27*n+14] *= dyInv*dyInv*dzInv;
1005 gradhess [27*n+17] *= dyInv*dzInv*dzInv;
1006 gradhess [27*n+26] *= dzInv*dzInv*dzInv;
1007
1008 // Copy gradhess elements into rest of tensor
1009 gradhess [27*n+9 ] = gradhess [27*n+3 ] = gradhess [27*n+1 ];
1010 gradhess [27*n+18 ] = gradhess [27*n+6 ] = gradhess [27*n+2 ];
1011 gradhess [27*n+22 ] = gradhess [27*n+16 ] = gradhess [27*n+14];
1012 gradhess [27*n+12 ] = gradhess [27*n+10 ] = gradhess [27*n+4 ];
1013 gradhess [27*n+24 ] = gradhess [27*n+20 ] = gradhess [27*n+8 ];
1014 gradhess [27*n+25 ] = gradhess [27*n+23 ] = gradhess [27*n+17];
1015 gradhess [27*n+21 ] = gradhess [27*n+19 ] = gradhess [27*n+15] = gradhess [27*n+11 ] = gradhess [27*n+7 ] = gradhess [27*n+5];
1016
1017 }
1018 }
1019