1 /* Copyright 2014-2018 The PySCF Developers. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14
15 *
16 * Author: Qiming Sun <osirpt.sun@gmail.com>
17 */
18
19 #include <stdlib.h>
20 #include <math.h>
21 #include <complex.h>
22 #include "config.h"
23 #include "grid_ao_drv.h"
24
25 #define MIN(X,Y) ((X)<(Y)?(X):(Y))
26 #define MAX(X,Y) ((X)>(Y)?(X):(Y))
27
28 double CINTcommon_fac_sp(int l);
29
GTOnabla1(double * fx1,double * fy1,double * fz1,double * fx0,double * fy0,double * fz0,int l,double a)30 void GTOnabla1(double *fx1, double *fy1, double *fz1,
31 double *fx0, double *fy0, double *fz0, int l, double a)
32 {
33 int i, n;
34 double a2 = -2 * a;
35 for (n = 0; n < SIMDD; n++) {
36 fx1[n] = a2*fx0[SIMDD+n];
37 fy1[n] = a2*fy0[SIMDD+n];
38 fz1[n] = a2*fz0[SIMDD+n];
39 }
40 for (i = 1; i <= l; i++) {
41 for (n = 0; n < SIMDD; n++) {
42 fx1[i*SIMDD+n] = i*fx0[(i-1)*SIMDD+n] + a2*fx0[(i+1)*SIMDD+n];
43 fy1[i*SIMDD+n] = i*fy0[(i-1)*SIMDD+n] + a2*fy0[(i+1)*SIMDD+n];
44 fz1[i*SIMDD+n] = i*fz0[(i-1)*SIMDD+n] + a2*fz0[(i+1)*SIMDD+n];
45 } }
46 }
47
48 /*
49 * r - R_O = (r-R_i) + ri, ri = (x,y,z) = R_i - R_O
50 */
GTOx1(double * fx1,double * fy1,double * fz1,double * fx0,double * fy0,double * fz0,int l,double * ri)51 void GTOx1(double *fx1, double *fy1, double *fz1,
52 double *fx0, double *fy0, double *fz0, int l, double *ri)
53 {
54 int i, n;
55 for (i = 0; i <= l; i++) {
56 for (n = 0; n < SIMDD; n++) {
57 fx1[i*SIMDD+n] = ri[0] * fx0[i*SIMDD+n] + fx0[(i+1)*SIMDD+n];
58 fy1[i*SIMDD+n] = ri[1] * fy0[i*SIMDD+n] + fy0[(i+1)*SIMDD+n];
59 fz1[i*SIMDD+n] = ri[2] * fz0[i*SIMDD+n] + fz0[(i+1)*SIMDD+n];
60 } }
61 }
62
GTOprim_exp(double * eprim,double * coord,double * alpha,double * coeff,int l,int nprim,int nctr,size_t ngrids,double fac)63 int GTOprim_exp(double *eprim, double *coord, double *alpha, double *coeff,
64 int l, int nprim, int nctr, size_t ngrids, double fac)
65 {
66 int i, j;
67 double arr, maxc;
68 double logcoeff[nprim];
69 double rr[ngrids];
70 double *gridx = coord;
71 double *gridy = coord+BLKSIZE;
72 double *gridz = coord+BLKSIZE*2;
73 int not0 = 0;
74
75 // the maximum value of the coefficients for each pGTO
76 for (j = 0; j < nprim; j++) {
77 maxc = 0;
78 for (i = 0; i < nctr; i++) {
79 maxc = MAX(maxc, fabs(coeff[i*nprim+j]));
80 }
81 logcoeff[j] = log(maxc);
82 }
83
84 for (i = 0; i < ngrids; i++) {
85 rr[i] = gridx[i]*gridx[i] + gridy[i]*gridy[i] + gridz[i]*gridz[i];
86 }
87
88 for (j = 0; j < nprim; j++) {
89 for (i = 0; i < ngrids; i++) {
90 arr = alpha[j] * rr[i];
91 if (arr-logcoeff[j] < EXPCUTOFF) {
92 eprim[j*BLKSIZE+i] = exp(-arr) * fac;
93 not0 = 1;
94 } else {
95 eprim[j*BLKSIZE+i] = 0;
96 }
97 }
98 }
99 return not0;
100 }
101
102
103 // grid2atm[atm_id,xyz,grid_id]
_fill_grid2atm(double * grid2atm,double * coord,size_t bgrids,size_t ngrids,int * atm,int natm,int * bas,int nbas,double * env)104 static void _fill_grid2atm(double *grid2atm, double *coord, size_t bgrids, size_t ngrids,
105 int *atm, int natm, int *bas, int nbas, double *env)
106 {
107 int atm_id;
108 size_t ig;
109 double *r_atm;
110 for (atm_id = 0; atm_id < natm; atm_id++) {
111 r_atm = env + atm[PTR_COORD+atm_id*ATM_SLOTS];
112 for (ig = 0; ig < bgrids; ig++) {
113 grid2atm[0*BLKSIZE+ig] = coord[0*ngrids+ig] - r_atm[0];
114 }
115 for (ig = 0; ig < bgrids; ig++) {
116 grid2atm[1*BLKSIZE+ig] = coord[1*ngrids+ig] - r_atm[1];
117 }
118 for (ig = 0; ig < bgrids; ig++) {
119 grid2atm[2*BLKSIZE+ig] = coord[2*ngrids+ig] - r_atm[2];
120 }
121 grid2atm += 3*BLKSIZE;
122 }
123 }
124
125
_dset0(double * out,size_t odim,size_t bgrids,int counts)126 static void _dset0(double *out, size_t odim, size_t bgrids, int counts)
127 {
128 size_t i, j;
129 for (i = 0; i < counts; i++) {
130 for (j = 0; j < bgrids; j++) {
131 out[i*odim+j] = 0;
132 }
133 }
134 }
135
_zset0(double complex * out,size_t odim,size_t bgrids,int counts)136 static void _zset0(double complex *out, size_t odim, size_t bgrids, int counts)
137 {
138 size_t i, j;
139 for (i = 0; i < counts; i++) {
140 for (j = 0; j < bgrids; j++) {
141 out[i*odim+j] = 0;
142 }
143 }
144 }
145
GTOeval_sph_iter(FPtr_eval feval,FPtr_exp fexp,double fac,size_t nao,size_t ngrids,size_t bgrids,int param[],int * shls_slice,int * ao_loc,double * buf,double * ao,double * coord,char * non0table,int * atm,int natm,int * bas,int nbas,double * env)146 void GTOeval_sph_iter(FPtr_eval feval, FPtr_exp fexp, double fac,
147 size_t nao, size_t ngrids, size_t bgrids,
148 int param[], int *shls_slice, int *ao_loc, double *buf,
149 double *ao, double *coord, char *non0table,
150 int *atm, int natm, int *bas, int nbas, double *env)
151 {
152 const int ncomp = param[TENSOR];
153 const int sh0 = shls_slice[0];
154 const int sh1 = shls_slice[1];
155 const int atmstart = bas[sh0*BAS_SLOTS+ATOM_OF];
156 const int atmend = bas[(sh1-1)*BAS_SLOTS+ATOM_OF]+1;
157 const int atmcount = atmend - atmstart;
158 int i, k, l, np, nc, atm_id, bas_id, deg, dcart, ao_id;
159 size_t di;
160 double fac1;
161 double *p_exp, *pcoeff, *pcoord, *pcart, *ri, *pao;
162 double *grid2atm = ALIGN8_UP(buf); // [atm_id,xyz,grid]
163 double *eprim = grid2atm + atmcount*3*BLKSIZE;
164 double *cart_gto = eprim + NPRIMAX*BLKSIZE*2;
165
166 _fill_grid2atm(grid2atm, coord, bgrids, ngrids,
167 atm+atmstart*ATM_SLOTS, atmcount, bas, nbas, env);
168
169 for (bas_id = sh0; bas_id < sh1; bas_id++) {
170 np = bas[bas_id*BAS_SLOTS+NPRIM_OF];
171 nc = bas[bas_id*BAS_SLOTS+NCTR_OF ];
172 l = bas[bas_id*BAS_SLOTS+ANG_OF ];
173 deg = l * 2 + 1;
174 fac1 = fac * CINTcommon_fac_sp(l);
175 p_exp = env + bas[bas_id*BAS_SLOTS+PTR_EXP];
176 pcoeff = env + bas[bas_id*BAS_SLOTS+PTR_COEFF];
177 atm_id = bas[bas_id*BAS_SLOTS+ATOM_OF];
178 pcoord = grid2atm + (atm_id - atmstart) * 3*BLKSIZE;
179 ao_id = ao_loc[bas_id] - ao_loc[sh0];
180 if (non0table[bas_id] &&
181 (*fexp)(eprim, pcoord, p_exp, pcoeff, l, np, nc, bgrids, fac1)) {
182 dcart = (l+1)*(l+2)/2;
183 di = nc * dcart;
184 ri = env + atm[PTR_COORD+atm_id*ATM_SLOTS];
185 if (l <= 1) { // s, p functions
186 (*feval)(ao+ao_id*ngrids, ri, eprim, pcoord, p_exp, pcoeff,
187 env, l, np, nc, nao, ngrids, bgrids);
188 } else {
189 (*feval)(cart_gto, ri, eprim, pcoord, p_exp, pcoeff,
190 env, l, np, nc, di, bgrids, bgrids);
191 pcart = cart_gto;
192 for (i = 0; i < ncomp; i++) {
193 pao = ao + (i*nao+ao_id)*ngrids;
194 for (k = 0; k < nc; k++) {
195 CINTc2s_ket_sph1(pao, pcart,
196 ngrids, bgrids, l);
197 pao += deg * ngrids;
198 pcart += dcart * bgrids;
199 }
200 }
201 }
202 } else {
203 for (i = 0; i < ncomp; i++) {
204 _dset0(ao+(i*nao+ao_id)*ngrids, ngrids, bgrids, nc*deg);
205 }
206 }
207 }
208 }
209
GTOeval_cart_iter(FPtr_eval feval,FPtr_exp fexp,double fac,size_t nao,size_t ngrids,size_t bgrids,int param[],int * shls_slice,int * ao_loc,double * buf,double * ao,double * coord,char * non0table,int * atm,int natm,int * bas,int nbas,double * env)210 void GTOeval_cart_iter(FPtr_eval feval, FPtr_exp fexp, double fac,
211 size_t nao, size_t ngrids, size_t bgrids,
212 int param[], int *shls_slice, int *ao_loc, double *buf,
213 double *ao, double *coord, char *non0table,
214 int *atm, int natm, int *bas, int nbas, double *env)
215 {
216 const int ncomp = param[TENSOR];
217 const int sh0 = shls_slice[0];
218 const int sh1 = shls_slice[1];
219 const int atmstart = bas[sh0*BAS_SLOTS+ATOM_OF];
220 const int atmend = bas[(sh1-1)*BAS_SLOTS+ATOM_OF]+1;
221 const int atmcount = atmend - atmstart;
222 int i, l, np, nc, atm_id, bas_id, deg, ao_id;
223 double fac1;
224 double *p_exp, *pcoeff, *pcoord, *ri;
225 double *grid2atm = ALIGN8_UP(buf); // [atm_id,xyz,grid]
226 double *eprim = grid2atm + atmcount*3*BLKSIZE;
227
228 _fill_grid2atm(grid2atm, coord, bgrids, ngrids,
229 atm+atmstart*ATM_SLOTS, atmcount, bas, nbas, env);
230
231 for (bas_id = sh0; bas_id < sh1; bas_id++) {
232 np = bas[bas_id*BAS_SLOTS+NPRIM_OF];
233 nc = bas[bas_id*BAS_SLOTS+NCTR_OF ];
234 l = bas[bas_id*BAS_SLOTS+ANG_OF ];
235 deg = (l+1)*(l+2)/2;
236 fac1 = fac * CINTcommon_fac_sp(l);
237 p_exp = env + bas[bas_id*BAS_SLOTS+PTR_EXP];
238 pcoeff = env + bas[bas_id*BAS_SLOTS+PTR_COEFF];
239 atm_id = bas[bas_id*BAS_SLOTS+ATOM_OF];
240 pcoord = grid2atm + (atm_id - atmstart) * 3*BLKSIZE;
241 ao_id = ao_loc[bas_id] - ao_loc[sh0];
242 if (non0table[bas_id] &&
243 (*fexp)(eprim, pcoord, p_exp, pcoeff, l, np, nc, bgrids, fac1)) {
244 ri = env + atm[PTR_COORD+atm_id*ATM_SLOTS];
245 (*feval)(ao+ao_id*ngrids, ri, eprim, pcoord, p_exp, pcoeff,
246 env, l, np, nc, nao, ngrids, bgrids);
247 } else {
248 for (i = 0; i < ncomp; i++) {
249 _dset0(ao+(i*nao+ao_id)*ngrids, ngrids, bgrids, nc*deg);
250 }
251 }
252 }
253 }
254
GTOeval_spinor_iter(FPtr_eval feval,FPtr_exp fexp,void (* c2s)(),double fac,size_t nao,size_t ngrids,size_t bgrids,int param[],int * shls_slice,int * ao_loc,double * buf,double complex * ao,double * coord,char * non0table,int * atm,int natm,int * bas,int nbas,double * env)255 void GTOeval_spinor_iter(FPtr_eval feval, FPtr_exp fexp, void (*c2s)(), double fac,
256 size_t nao, size_t ngrids, size_t bgrids,
257 int param[], int *shls_slice, int *ao_loc, double *buf,
258 double complex *ao, double *coord, char *non0table,
259 int *atm, int natm, int *bas, int nbas, double *env)
260 {
261 const int ncomp_e1 = param[POS_E1];
262 const int ncomp = param[TENSOR];
263 const int sh0 = shls_slice[0];
264 const int sh1 = shls_slice[1];
265 const int atmstart = bas[sh0*BAS_SLOTS+ATOM_OF];
266 const int atmend = bas[(sh1-1)*BAS_SLOTS+ATOM_OF]+1;
267 const int atmcount = atmend - atmstart;
268 int i, l, np, nc, atm_id, bas_id, deg, kappa, dcart, ao_id;
269 size_t off, di;
270 double fac1;
271 double *p_exp, *pcoeff, *pcoord, *pcart, *ri;
272 double complex *aoa = ao;
273 double complex *aob = ao + ncomp*nao*ngrids;
274 double *grid2atm = ALIGN8_UP(buf); // [atm_id,xyz,grid]
275 double *eprim = grid2atm + atmcount*3*BLKSIZE;
276 double *cart_gto = eprim + NPRIMAX*BLKSIZE*2;
277
278 _fill_grid2atm(grid2atm, coord, bgrids, ngrids,
279 atm+atmstart*ATM_SLOTS, atmcount, bas, nbas, env);
280
281 for (bas_id = sh0; bas_id < sh1; bas_id++) {
282 np = bas[bas_id*BAS_SLOTS+NPRIM_OF];
283 nc = bas[bas_id*BAS_SLOTS+NCTR_OF ];
284 l = bas[bas_id*BAS_SLOTS+ANG_OF ];
285 deg = CINTlen_spinor(bas_id, bas);
286 fac1 = fac * CINTcommon_fac_sp(l);
287 p_exp = env + bas[bas_id*BAS_SLOTS+PTR_EXP];
288 pcoeff = env + bas[bas_id*BAS_SLOTS+PTR_COEFF];
289 atm_id = bas[bas_id*BAS_SLOTS+ATOM_OF];
290 pcoord = grid2atm + (atm_id - atmstart) * 3*BLKSIZE;
291 ao_id = ao_loc[bas_id] - ao_loc[sh0];
292 if (non0table[bas_id] &&
293 (*fexp)(eprim, pcoord, p_exp, pcoeff, l, np, nc, bgrids, fac1)) {
294 kappa = bas[bas_id*BAS_SLOTS+KAPPA_OF];
295 dcart = (l+1)*(l+2)/2;
296 di = nc * dcart;
297 ri = env + atm[PTR_COORD+atm_id*ATM_SLOTS];
298 (*feval)(cart_gto, ri, eprim, pcoord, p_exp, pcoeff,
299 env, l, np, nc, di, bgrids, bgrids);
300 for (i = 0; i < ncomp; i++) {
301 pcart = cart_gto + i * di*bgrids*ncomp_e1;
302 off = (i*nao+ao_id)*ngrids;
303 (*c2s)(aoa+off, aob+off, pcart,
304 ngrids, bgrids, nc, kappa, l);
305 }
306 } else {
307 for (i = 0; i < ncomp; i++) {
308 off = (i*nao+ao_id)*ngrids;
309 _zset0(aoa+off, ngrids, bgrids, nc*deg);
310 _zset0(aob+off, ngrids, bgrids, nc*deg);
311 }
312 }
313 }
314 }
315
GTOshloc_by_atom(int * shloc,int * shls_slice,int * ao_loc,int * atm,int * bas)316 int GTOshloc_by_atom(int *shloc, int *shls_slice, int *ao_loc, int *atm, int *bas)
317 {
318 const int sh0 = shls_slice[0];
319 const int sh1 = shls_slice[1];
320 int ish, nshblk, lastatm;
321 shloc[0] = sh0;
322 nshblk = 1;
323 lastatm = bas[BAS_SLOTS*sh0+ATOM_OF];
324 for (ish = sh0; ish < sh1; ish++) {
325 if (lastatm != bas[BAS_SLOTS*ish+ATOM_OF]) {
326 lastatm = bas[BAS_SLOTS*ish+ATOM_OF];
327 shloc[nshblk] = ish;
328 nshblk++;
329 }
330 }
331 shloc[nshblk] = sh1;
332 return nshblk;
333 }
334
335 /*
336 * non0table[ngrids/blksize,natm] is the T/F table for ao values to
337 * screen the ao evaluation for each shell
338 */
GTOeval_loop(void (* fiter)(),FPtr_eval feval,FPtr_exp fexp,double fac,int ngrids,int param[],int * shls_slice,int * ao_loc,double * ao,double * coord,char * non0table,int * atm,int natm,int * bas,int nbas,double * env)339 void GTOeval_loop(void (*fiter)(), FPtr_eval feval, FPtr_exp fexp, double fac,
340 int ngrids, int param[], int *shls_slice, int *ao_loc,
341 double *ao, double *coord, char *non0table,
342 int *atm, int natm, int *bas, int nbas, double *env)
343 {
344 int shloc[shls_slice[1]-shls_slice[0]+1];
345 const int nshblk = GTOshloc_by_atom(shloc, shls_slice, ao_loc, atm, bas);
346 const int nblk = (ngrids+BLKSIZE-1) / BLKSIZE;
347 const size_t Ngrids = ngrids;
348
349 #pragma omp parallel
350 {
351 const int sh0 = shls_slice[0];
352 const int sh1 = shls_slice[1];
353 const size_t nao = ao_loc[sh1] - ao_loc[sh0];
354 int ip, ib, k, iloc, ish;
355 size_t aoff, bgrids;
356 int ncart = NCTR_CART * param[TENSOR] * param[POS_E1];
357 double *buf = malloc(sizeof(double) * BLKSIZE*(NPRIMAX*2+ncart));
358 #pragma omp for schedule(dynamic, 4)
359 for (k = 0; k < nblk*nshblk; k++) {
360 iloc = k / nblk;
361 ish = shloc[iloc];
362 aoff = ao_loc[ish] - ao_loc[sh0];
363 ib = k - iloc * nblk;
364 ip = ib * BLKSIZE;
365 bgrids = MIN(ngrids-ip, BLKSIZE);
366 (*fiter)(feval, fexp, fac, nao, Ngrids, bgrids,
367 param, shloc+iloc, ao_loc, buf, ao+aoff*Ngrids+ip,
368 coord+ip, non0table+ib*nbas,
369 atm, natm, bas, nbas, env);
370 }
371 free(buf);
372 }
373 }
374
GTOeval_sph_drv(FPtr_eval feval,FPtr_exp fexp,double fac,int ngrids,int param[],int * shls_slice,int * ao_loc,double * ao,double * coord,char * non0table,int * atm,int natm,int * bas,int nbas,double * env)375 void GTOeval_sph_drv(FPtr_eval feval, FPtr_exp fexp, double fac, int ngrids,
376 int param[], int *shls_slice, int *ao_loc,
377 double *ao, double *coord, char *non0table,
378 int *atm, int natm, int *bas, int nbas, double *env)
379 {
380 GTOeval_loop(GTOeval_sph_iter, feval, fexp, fac, ngrids,
381 param, shls_slice, ao_loc,
382 ao, coord, non0table, atm, natm, bas, nbas, env);
383 }
384
GTOeval_cart_drv(FPtr_eval feval,FPtr_exp fexp,double fac,int ngrids,int param[],int * shls_slice,int * ao_loc,double * ao,double * coord,char * non0table,int * atm,int natm,int * bas,int nbas,double * env)385 void GTOeval_cart_drv(FPtr_eval feval, FPtr_exp fexp, double fac, int ngrids,
386 int param[], int *shls_slice, int *ao_loc,
387 double *ao, double *coord, char *non0table,
388 int *atm, int natm, int *bas, int nbas, double *env)
389 {
390 GTOeval_loop(GTOeval_cart_iter, feval, fexp, fac, ngrids,
391 param, shls_slice, ao_loc,
392 ao, coord, non0table, atm, natm, bas, nbas, env);
393 }
394
GTOeval_spinor_drv(FPtr_eval feval,FPtr_exp fexp,void (* c2s)(),double fac,int ngrids,int param[],int * shls_slice,int * ao_loc,double complex * ao,double * coord,char * non0table,int * atm,int natm,int * bas,int nbas,double * env)395 void GTOeval_spinor_drv(FPtr_eval feval, FPtr_exp fexp, void (*c2s)(), double fac,
396 int ngrids, int param[], int *shls_slice, int *ao_loc,
397 double complex *ao, double *coord, char *non0table,
398 int *atm, int natm, int *bas, int nbas, double *env)
399 {
400 int shloc[shls_slice[1]-shls_slice[0]+1];
401 const int nshblk = GTOshloc_by_atom(shloc, shls_slice, ao_loc, atm, bas);
402 const int nblk = (ngrids+BLKSIZE-1) / BLKSIZE;
403 const size_t Ngrids = ngrids;
404
405 #pragma omp parallel
406 {
407 const int sh0 = shls_slice[0];
408 const int sh1 = shls_slice[1];
409 const size_t nao = ao_loc[sh1] - ao_loc[sh0];
410 int ip, ib, k, iloc, ish;
411 size_t aoff, bgrids;
412 int ncart = NCTR_CART * param[TENSOR] * param[POS_E1];
413 double *buf = malloc(sizeof(double) * BLKSIZE*(NPRIMAX*2+ncart));
414 #pragma omp for schedule(dynamic, 4)
415 for (k = 0; k < nblk*nshblk; k++) {
416 iloc = k / nblk;
417 ish = shloc[iloc];
418 aoff = ao_loc[ish] - ao_loc[sh0];
419 ib = k - iloc * nblk;
420 ip = ib * BLKSIZE;
421 bgrids = MIN(ngrids-ip, BLKSIZE);
422 GTOeval_spinor_iter(feval, fexp, c2s, fac,
423 nao, Ngrids, bgrids,
424 param, shloc+iloc, ao_loc, buf, ao+aoff*Ngrids+ip,
425 coord+ip, non0table+ib*nbas,
426 atm, natm, bas, nbas, env);
427 }
428 free(buf);
429 }
430 }
431
432