1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2018 QMCPACK developers.
6 //
7 // File developed by: Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories
8 //
9 // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
10 //////////////////////////////////////////////////////////////////////////////////////
11 // -*- C++ -*-
12 /**@file MultiBsplineStd.hpp
13 *
14 * Literal port of einspline/multi_bspline_eval_d_std3.cpp by Ye at ANL
15 * Modified to handle both float/double and blocked operations
16 * - template parameter T for the precision
17 * - MUB spline object created by einspline allocators.
18 * Function signatures modified anticipating its use by a class that can perform data parallel execution
19 * - evaluate(...., int first, int last)
20 */
21 #ifndef SPLINE2_MULTIEINSPLINE_VGHGH_HPP
22 #define SPLINE2_MULTIEINSPLINE_VGHGH_HPP
23
24 namespace spline2
25 {
26 template<typename T>
evaluate_vghgh_impl(const typename qmcplusplus::bspline_traits<T,3>::SplineType * restrict spline_m,T x,T y,T z,T * restrict vals,T * restrict grads,T * restrict hess,T * restrict ghess,size_t out_offset,int first,int last)27 inline void evaluate_vghgh_impl(const typename qmcplusplus::bspline_traits<T, 3>::SplineType* restrict spline_m,
28 T x,
29 T y,
30 T z,
31 T* restrict vals,
32 T* restrict grads,
33 T* restrict hess,
34 T* restrict ghess,
35 size_t out_offset,
36 int first,
37 int last)
38 {
39 int ix, iy, iz;
40 T tx, ty, tz;
41 T a[4], b[4], c[4];
42 T da[4], db[4], dc[4];
43 T d2a[4], d2b[4], d2c[4];
44 T d3a[4], d3b[4], d3c[4];
45
46 x -= spline_m->x_grid.start;
47 y -= spline_m->y_grid.start;
48 z -= spline_m->z_grid.start;
49 getSplineBound(x * spline_m->x_grid.delta_inv, tx, ix, spline_m->x_grid.num - 1);
50 getSplineBound(y * spline_m->y_grid.delta_inv, ty, iy, spline_m->y_grid.num - 1);
51 getSplineBound(z * spline_m->z_grid.delta_inv, tz, iz, spline_m->z_grid.num - 1);
52
53 MultiBsplineData<T>::compute_prefactors(a, da, d2a, d3a, tx);
54 MultiBsplineData<T>::compute_prefactors(b, db, d2b, d3b, ty);
55 MultiBsplineData<T>::compute_prefactors(c, dc, d2c, d3c, tz);
56
57 const intptr_t xs = spline_m->x_stride;
58 const intptr_t ys = spline_m->y_stride;
59 const intptr_t zs = spline_m->z_stride;
60
61 const int num_splines = last - first;
62
63 T* restrict gx = grads;
64 T* restrict gy = grads + out_offset;
65 T* restrict gz = grads + 2 * out_offset;
66
67 T* restrict hxx = hess;
68 T* restrict hxy = hess + out_offset;
69 T* restrict hxz = hess + 2 * out_offset;
70 T* restrict hyy = hess + 3 * out_offset;
71 T* restrict hyz = hess + 4 * out_offset;
72 T* restrict hzz = hess + 5 * out_offset;
73
74 T* restrict gh_xxx = ghess;
75 T* restrict gh_xxy = ghess + out_offset;
76 T* restrict gh_xxz = ghess + 2 * out_offset;
77 T* restrict gh_xyy = ghess + 3 * out_offset;
78 T* restrict gh_xyz = ghess + 4 * out_offset;
79 T* restrict gh_xzz = ghess + 5 * out_offset;
80 T* restrict gh_yyy = ghess + 6 * out_offset;
81 T* restrict gh_yyz = ghess + 7 * out_offset;
82 T* restrict gh_yzz = ghess + 8 * out_offset;
83 T* restrict gh_zzz = ghess + 9 * out_offset;
84
85 std::fill(vals, vals + num_splines, T());
86 std::fill(gx, gx + num_splines, T());
87 std::fill(gy, gy + num_splines, T());
88 std::fill(gz, gz + num_splines, T());
89 std::fill(hxx, hxx + num_splines, T());
90 std::fill(hxy, hxy + num_splines, T());
91 std::fill(hxz, hxz + num_splines, T());
92 std::fill(hyy, hyy + num_splines, T());
93 std::fill(hyz, hyz + num_splines, T());
94 std::fill(hzz, hzz + num_splines, T());
95
96 std::fill(gh_xxx, gh_xxx + num_splines, T());
97 std::fill(gh_xxy, gh_xxy + num_splines, T());
98 std::fill(gh_xxz, gh_xxz + num_splines, T());
99 std::fill(gh_xyy, gh_xyy + num_splines, T());
100 std::fill(gh_xyz, gh_xyz + num_splines, T());
101 std::fill(gh_xzz, gh_xzz + num_splines, T());
102 std::fill(gh_yyy, gh_yyy + num_splines, T());
103 std::fill(gh_yyz, gh_yyz + num_splines, T());
104 std::fill(gh_yzz, gh_yzz + num_splines, T());
105 std::fill(gh_zzz, gh_zzz + num_splines, T());
106
107 for (int i = 0; i < 4; i++)
108 for (int j = 0; j < 4; j++)
109 {
110 const T* restrict coefs = spline_m->coefs + ((ix + i) * xs + (iy + j) * ys + iz * zs) + first;
111 const T* restrict coefszs = coefs + zs;
112 const T* restrict coefs2zs = coefs + 2 * zs;
113 const T* restrict coefs3zs = coefs + 3 * zs;
114
115 const T pre20 = d2a[i] * b[j];
116 const T pre10 = da[i] * b[j];
117 const T pre00 = a[i] * b[j];
118 const T pre11 = da[i] * db[j];
119 const T pre01 = a[i] * db[j];
120 const T pre02 = a[i] * d2b[j];
121
122 const T pre30 = d3a[i] * b[j];
123 const T pre21 = d2a[i] * db[j];
124 const T pre12 = da[i] * d2b[j];
125 const T pre03 = a[i] * d3b[j];
126
127
128 #pragma omp simd aligned(coefs, coefszs, coefs2zs, coefs3zs, gx, gy, gz, hxx, hxy, hxz, hyy, hyz, hzz, gh_xxx, gh_xxy, \
129 gh_xxz, gh_xyy, gh_xyz, gh_xzz, gh_yyy, gh_yyz, gh_yzz, gh_zzz, vals: QMC_SIMD_ALIGNMENT)
130 for (int n = 0; n < num_splines; n++)
131 {
132 T coefsv = coefs[n];
133 T coefsvzs = coefszs[n];
134 T coefsv2zs = coefs2zs[n];
135 T coefsv3zs = coefs3zs[n];
136
137 T sum0 = c[0] * coefsv + c[1] * coefsvzs + c[2] * coefsv2zs + c[3] * coefsv3zs;
138 T sum1 = dc[0] * coefsv + dc[1] * coefsvzs + dc[2] * coefsv2zs + dc[3] * coefsv3zs;
139 T sum2 = d2c[0] * coefsv + d2c[1] * coefsvzs + d2c[2] * coefsv2zs + d2c[3] * coefsv3zs;
140 T sum3 = d3c[0] * coefsv + d3c[1] * coefsvzs + d3c[2] * coefsv2zs + d3c[3] * coefsv3zs;
141
142 gh_xxx[n] += pre30 * sum0;
143 gh_xxy[n] += pre21 * sum0;
144 gh_xxz[n] += pre20 * sum1;
145 gh_xyy[n] += pre12 * sum0;
146 gh_xyz[n] += pre11 * sum1;
147 gh_xzz[n] += pre10 * sum2;
148 gh_yyy[n] += pre03 * sum0;
149 gh_yyz[n] += pre02 * sum1;
150 gh_yzz[n] += pre01 * sum2;
151 gh_zzz[n] += pre00 * sum3;
152
153 hxx[n] += pre20 * sum0;
154 hxy[n] += pre11 * sum0;
155 hxz[n] += pre10 * sum1;
156 hyy[n] += pre02 * sum0;
157 hyz[n] += pre01 * sum1;
158 hzz[n] += pre00 * sum2;
159 gx[n] += pre10 * sum0;
160 gy[n] += pre01 * sum0;
161 gz[n] += pre00 * sum1;
162 vals[n] += pre00 * sum0;
163 }
164 }
165
166 const T dxInv = spline_m->x_grid.delta_inv;
167 const T dyInv = spline_m->y_grid.delta_inv;
168 const T dzInv = spline_m->z_grid.delta_inv;
169 const T dxx = dxInv * dxInv;
170 const T dyy = dyInv * dyInv;
171 const T dzz = dzInv * dzInv;
172 const T dxy = dxInv * dyInv;
173 const T dxz = dxInv * dzInv;
174 const T dyz = dyInv * dzInv;
175
176 const T dxxx = dxInv * dxInv * dxInv;
177 const T dxxy = dxInv * dxInv * dyInv;
178 const T dxxz = dxInv * dxInv * dzInv;
179 const T dxyy = dxInv * dyInv * dyInv;
180 const T dxyz = dxInv * dyInv * dzInv;
181 const T dxzz = dxInv * dzInv * dzInv;
182 const T dyyy = dyInv * dyInv * dyInv;
183 const T dyyz = dyInv * dyInv * dzInv;
184 const T dyzz = dyInv * dzInv * dzInv;
185 const T dzzz = dzInv * dzInv * dzInv;
186
187 #pragma omp simd aligned(gx, gy, gz, hxx, hxy, hxz, hyy, hyz, hzz, gh_xxx, gh_xxy, gh_xxz, gh_xyy, gh_xyz, gh_xzz, \
188 gh_yyy, gh_yyz, gh_yzz, gh_zzz: QMC_SIMD_ALIGNMENT)
189 for (int n = 0; n < num_splines; n++)
190 {
191 gx[n] *= dxInv;
192 gy[n] *= dyInv;
193 gz[n] *= dzInv;
194 hxx[n] *= dxx;
195 hyy[n] *= dyy;
196 hzz[n] *= dzz;
197 hxy[n] *= dxy;
198 hxz[n] *= dxz;
199 hyz[n] *= dyz;
200
201 gh_xxx[n] *= dxxx;
202 gh_xxy[n] *= dxxy;
203 gh_xxz[n] *= dxxz;
204 gh_xyy[n] *= dxyy;
205 gh_xyz[n] *= dxyz;
206 gh_xzz[n] *= dxzz;
207 gh_yyy[n] *= dyyy;
208 gh_yyz[n] *= dyyz;
209 gh_yzz[n] *= dyzz;
210 gh_zzz[n] *= dzzz;
211 }
212 }
213
214 } // namespace spline2
215 #endif
216