1 /* Copyright 2014-2018 The PySCF Developers. All Rights Reserved.
2 
3    Licensed under the Apache License, Version 2.0 (the "License");
4     you may not use this file except in compliance with the License.
5     You may obtain a copy of the License at
6 
7         http://www.apache.org/licenses/LICENSE-2.0
8 
9     Unless required by applicable law or agreed to in writing, software
10     distributed under the License is distributed on an "AS IS" BASIS,
11     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12     See the License for the specific language governing permissions and
13     limitations under the License.
14 
15  *
16  * Author: Qiming Sun <osirpt.sun@gmail.com>
17  */
18 
19 #include <stdlib.h>
20 #include <math.h>
21 #include <complex.h>
22 #include "config.h"
23 #include "grid_ao_drv.h"
24 
25 #define MIN(X,Y)        ((X)<(Y)?(X):(Y))
26 #define MAX(X,Y)        ((X)>(Y)?(X):(Y))
27 
28 double CINTcommon_fac_sp(int l);
29 
GTOnabla1(double * fx1,double * fy1,double * fz1,double * fx0,double * fy0,double * fz0,int l,double a)30 void GTOnabla1(double *fx1, double *fy1, double *fz1,
31                double *fx0, double *fy0, double *fz0, int l, double a)
32 {
33         int i, n;
34         double a2 = -2 * a;
35         for (n = 0; n < SIMDD; n++) {
36                 fx1[n] = a2*fx0[SIMDD+n];
37                 fy1[n] = a2*fy0[SIMDD+n];
38                 fz1[n] = a2*fz0[SIMDD+n];
39         }
40         for (i = 1; i <= l; i++) {
41         for (n = 0; n < SIMDD; n++) {
42                 fx1[i*SIMDD+n] = i*fx0[(i-1)*SIMDD+n] + a2*fx0[(i+1)*SIMDD+n];
43                 fy1[i*SIMDD+n] = i*fy0[(i-1)*SIMDD+n] + a2*fy0[(i+1)*SIMDD+n];
44                 fz1[i*SIMDD+n] = i*fz0[(i-1)*SIMDD+n] + a2*fz0[(i+1)*SIMDD+n];
45         } }
46 }
47 
48 /*
49  * r - R_O = (r-R_i) + ri, ri = (x,y,z) = R_i - R_O
50  */
GTOx1(double * fx1,double * fy1,double * fz1,double * fx0,double * fy0,double * fz0,int l,double * ri)51 void GTOx1(double *fx1, double *fy1, double *fz1,
52            double *fx0, double *fy0, double *fz0, int l, double *ri)
53 {
54         int i, n;
55         for (i = 0; i <= l; i++) {
56         for (n = 0; n < SIMDD; n++) {
57                 fx1[i*SIMDD+n] = ri[0] * fx0[i*SIMDD+n] + fx0[(i+1)*SIMDD+n];
58                 fy1[i*SIMDD+n] = ri[1] * fy0[i*SIMDD+n] + fy0[(i+1)*SIMDD+n];
59                 fz1[i*SIMDD+n] = ri[2] * fz0[i*SIMDD+n] + fz0[(i+1)*SIMDD+n];
60         } }
61 }
62 
GTOprim_exp(double * eprim,double * coord,double * alpha,double * coeff,int l,int nprim,int nctr,size_t ngrids,double fac)63 int GTOprim_exp(double *eprim, double *coord, double *alpha, double *coeff,
64                 int l, int nprim, int nctr, size_t ngrids, double fac)
65 {
66         int i, j;
67         double arr, maxc;
68         double logcoeff[nprim];
69         double rr[ngrids];
70         double *gridx = coord;
71         double *gridy = coord+BLKSIZE;
72         double *gridz = coord+BLKSIZE*2;
73         int not0 = 0;
74 
75         // the maximum value of the coefficients for each pGTO
76         for (j = 0; j < nprim; j++) {
77                 maxc = 0;
78                 for (i = 0; i < nctr; i++) {
79                         maxc = MAX(maxc, fabs(coeff[i*nprim+j]));
80                 }
81                 logcoeff[j] = log(maxc);
82         }
83 
84         for (i = 0; i < ngrids; i++) {
85                 rr[i] = gridx[i]*gridx[i] + gridy[i]*gridy[i] + gridz[i]*gridz[i];
86         }
87 
88         for (j = 0; j < nprim; j++) {
89                 for (i = 0; i < ngrids; i++) {
90                         arr = alpha[j] * rr[i];
91                         if (arr-logcoeff[j] < EXPCUTOFF) {
92                                 eprim[j*BLKSIZE+i] = exp(-arr) * fac;
93                                 not0 = 1;
94                         } else {
95                                 eprim[j*BLKSIZE+i] = 0;
96                         }
97                 }
98         }
99         return not0;
100 }
101 
102 
103 // grid2atm[atm_id,xyz,grid_id]
_fill_grid2atm(double * grid2atm,double * coord,size_t bgrids,size_t ngrids,int * atm,int natm,int * bas,int nbas,double * env)104 static void _fill_grid2atm(double *grid2atm, double *coord, size_t bgrids, size_t ngrids,
105                            int *atm, int natm, int *bas, int nbas, double *env)
106 {
107         int atm_id;
108         size_t ig;
109         double *r_atm;
110         for (atm_id = 0; atm_id < natm; atm_id++) {
111                 r_atm = env + atm[PTR_COORD+atm_id*ATM_SLOTS];
112                 for (ig = 0; ig < bgrids; ig++) {
113                         grid2atm[0*BLKSIZE+ig] = coord[0*ngrids+ig] - r_atm[0];
114                 }
115                 for (ig = 0; ig < bgrids; ig++) {
116                         grid2atm[1*BLKSIZE+ig] = coord[1*ngrids+ig] - r_atm[1];
117                 }
118                 for (ig = 0; ig < bgrids; ig++) {
119                         grid2atm[2*BLKSIZE+ig] = coord[2*ngrids+ig] - r_atm[2];
120                 }
121                 grid2atm += 3*BLKSIZE;
122         }
123 }
124 
125 
_dset0(double * out,size_t odim,size_t bgrids,int counts)126 static void _dset0(double *out, size_t odim, size_t bgrids, int counts)
127 {
128         size_t i, j;
129         for (i = 0; i < counts; i++) {
130                 for (j = 0; j < bgrids; j++) {
131                         out[i*odim+j] = 0;
132                 }
133         }
134 }
135 
_zset0(double complex * out,size_t odim,size_t bgrids,int counts)136 static void _zset0(double complex *out, size_t odim, size_t bgrids, int counts)
137 {
138         size_t i, j;
139         for (i = 0; i < counts; i++) {
140                 for (j = 0; j < bgrids; j++) {
141                         out[i*odim+j] = 0;
142                 }
143         }
144 }
145 
GTOeval_sph_iter(FPtr_eval feval,FPtr_exp fexp,double fac,size_t nao,size_t ngrids,size_t bgrids,int param[],int * shls_slice,int * ao_loc,double * buf,double * ao,double * coord,char * non0table,int * atm,int natm,int * bas,int nbas,double * env)146 void GTOeval_sph_iter(FPtr_eval feval,  FPtr_exp fexp, double fac,
147                       size_t nao, size_t ngrids, size_t bgrids,
148                       int param[], int *shls_slice, int *ao_loc, double *buf,
149                       double *ao, double *coord, char *non0table,
150                       int *atm, int natm, int *bas, int nbas, double *env)
151 {
152         const int ncomp = param[TENSOR];
153         const int sh0 = shls_slice[0];
154         const int sh1 = shls_slice[1];
155         const int atmstart = bas[sh0*BAS_SLOTS+ATOM_OF];
156         const int atmend = bas[(sh1-1)*BAS_SLOTS+ATOM_OF]+1;
157         const int atmcount = atmend - atmstart;
158         int i, k, l, np, nc, atm_id, bas_id, deg, dcart, ao_id;
159         size_t di;
160         double fac1;
161         double *p_exp, *pcoeff, *pcoord, *pcart, *ri, *pao;
162         double *grid2atm = ALIGN8_UP(buf); // [atm_id,xyz,grid]
163         double *eprim = grid2atm + atmcount*3*BLKSIZE;
164         double *cart_gto = eprim + NPRIMAX*BLKSIZE*2;
165 
166         _fill_grid2atm(grid2atm, coord, bgrids, ngrids,
167                        atm+atmstart*ATM_SLOTS, atmcount, bas, nbas, env);
168 
169         for (bas_id = sh0; bas_id < sh1; bas_id++) {
170                 np = bas[bas_id*BAS_SLOTS+NPRIM_OF];
171                 nc = bas[bas_id*BAS_SLOTS+NCTR_OF ];
172                 l  = bas[bas_id*BAS_SLOTS+ANG_OF  ];
173                 deg = l * 2 + 1;
174                 fac1 = fac * CINTcommon_fac_sp(l);
175                 p_exp  = env + bas[bas_id*BAS_SLOTS+PTR_EXP];
176                 pcoeff = env + bas[bas_id*BAS_SLOTS+PTR_COEFF];
177                 atm_id = bas[bas_id*BAS_SLOTS+ATOM_OF];
178                 pcoord = grid2atm + (atm_id - atmstart) * 3*BLKSIZE;
179                 ao_id = ao_loc[bas_id] - ao_loc[sh0];
180                 if (non0table[bas_id] &&
181                     (*fexp)(eprim, pcoord, p_exp, pcoeff, l, np, nc, bgrids, fac1)) {
182                         dcart = (l+1)*(l+2)/2;
183                         di = nc * dcart;
184                         ri = env + atm[PTR_COORD+atm_id*ATM_SLOTS];
185                         if (l <= 1) { // s, p functions
186                                 (*feval)(ao+ao_id*ngrids, ri, eprim, pcoord, p_exp, pcoeff,
187                                          env, l, np, nc, nao, ngrids, bgrids);
188                         } else {
189                                 (*feval)(cart_gto, ri, eprim, pcoord, p_exp, pcoeff,
190                                          env, l, np, nc, di, bgrids, bgrids);
191                                 pcart = cart_gto;
192                                 for (i = 0; i < ncomp; i++) {
193                                         pao = ao + (i*nao+ao_id)*ngrids;
194                                         for (k = 0; k < nc; k++) {
195                                                 CINTc2s_ket_sph1(pao, pcart,
196                                                                  ngrids, bgrids, l);
197                                                 pao += deg * ngrids;
198                                                 pcart += dcart * bgrids;
199                                         }
200                                 }
201                         }
202                 } else {
203                         for (i = 0; i < ncomp; i++) {
204                                 _dset0(ao+(i*nao+ao_id)*ngrids, ngrids, bgrids, nc*deg);
205                         }
206                 }
207         }
208 }
209 
GTOeval_cart_iter(FPtr_eval feval,FPtr_exp fexp,double fac,size_t nao,size_t ngrids,size_t bgrids,int param[],int * shls_slice,int * ao_loc,double * buf,double * ao,double * coord,char * non0table,int * atm,int natm,int * bas,int nbas,double * env)210 void GTOeval_cart_iter(FPtr_eval feval,  FPtr_exp fexp, double fac,
211                        size_t nao, size_t ngrids, size_t bgrids,
212                        int param[], int *shls_slice, int *ao_loc, double *buf,
213                        double *ao, double *coord, char *non0table,
214                        int *atm, int natm, int *bas, int nbas, double *env)
215 {
216         const int ncomp = param[TENSOR];
217         const int sh0 = shls_slice[0];
218         const int sh1 = shls_slice[1];
219         const int atmstart = bas[sh0*BAS_SLOTS+ATOM_OF];
220         const int atmend = bas[(sh1-1)*BAS_SLOTS+ATOM_OF]+1;
221         const int atmcount = atmend - atmstart;
222         int i, l, np, nc, atm_id, bas_id, deg, ao_id;
223         double fac1;
224         double *p_exp, *pcoeff, *pcoord, *ri;
225         double *grid2atm = ALIGN8_UP(buf); // [atm_id,xyz,grid]
226         double *eprim = grid2atm + atmcount*3*BLKSIZE;
227 
228         _fill_grid2atm(grid2atm, coord, bgrids, ngrids,
229                        atm+atmstart*ATM_SLOTS, atmcount, bas, nbas, env);
230 
231         for (bas_id = sh0; bas_id < sh1; bas_id++) {
232                 np = bas[bas_id*BAS_SLOTS+NPRIM_OF];
233                 nc = bas[bas_id*BAS_SLOTS+NCTR_OF ];
234                 l  = bas[bas_id*BAS_SLOTS+ANG_OF  ];
235                 deg = (l+1)*(l+2)/2;
236                 fac1 = fac * CINTcommon_fac_sp(l);
237                 p_exp  = env + bas[bas_id*BAS_SLOTS+PTR_EXP];
238                 pcoeff = env + bas[bas_id*BAS_SLOTS+PTR_COEFF];
239                 atm_id = bas[bas_id*BAS_SLOTS+ATOM_OF];
240                 pcoord = grid2atm + (atm_id - atmstart) * 3*BLKSIZE;
241                 ao_id = ao_loc[bas_id] - ao_loc[sh0];
242                 if (non0table[bas_id] &&
243                     (*fexp)(eprim, pcoord, p_exp, pcoeff, l, np, nc, bgrids, fac1)) {
244                         ri = env + atm[PTR_COORD+atm_id*ATM_SLOTS];
245                         (*feval)(ao+ao_id*ngrids, ri, eprim, pcoord, p_exp, pcoeff,
246                                  env, l, np, nc, nao, ngrids, bgrids);
247                 } else {
248                         for (i = 0; i < ncomp; i++) {
249                                 _dset0(ao+(i*nao+ao_id)*ngrids, ngrids, bgrids, nc*deg);
250                         }
251                 }
252         }
253 }
254 
GTOeval_spinor_iter(FPtr_eval feval,FPtr_exp fexp,void (* c2s)(),double fac,size_t nao,size_t ngrids,size_t bgrids,int param[],int * shls_slice,int * ao_loc,double * buf,double complex * ao,double * coord,char * non0table,int * atm,int natm,int * bas,int nbas,double * env)255 void GTOeval_spinor_iter(FPtr_eval feval, FPtr_exp fexp, void (*c2s)(), double fac,
256                          size_t nao, size_t ngrids, size_t bgrids,
257                          int param[], int *shls_slice, int *ao_loc, double *buf,
258                          double complex *ao, double *coord, char *non0table,
259                          int *atm, int natm, int *bas, int nbas, double *env)
260 {
261         const int ncomp_e1 = param[POS_E1];
262         const int ncomp = param[TENSOR];
263         const int sh0 = shls_slice[0];
264         const int sh1 = shls_slice[1];
265         const int atmstart = bas[sh0*BAS_SLOTS+ATOM_OF];
266         const int atmend = bas[(sh1-1)*BAS_SLOTS+ATOM_OF]+1;
267         const int atmcount = atmend - atmstart;
268         int i, l, np, nc, atm_id, bas_id, deg, kappa, dcart, ao_id;
269         size_t off, di;
270         double fac1;
271         double *p_exp, *pcoeff, *pcoord, *pcart, *ri;
272         double complex *aoa = ao;
273         double complex *aob = ao + ncomp*nao*ngrids;
274         double *grid2atm = ALIGN8_UP(buf); // [atm_id,xyz,grid]
275         double *eprim = grid2atm + atmcount*3*BLKSIZE;
276         double *cart_gto = eprim + NPRIMAX*BLKSIZE*2;
277 
278         _fill_grid2atm(grid2atm, coord, bgrids, ngrids,
279                        atm+atmstart*ATM_SLOTS, atmcount, bas, nbas, env);
280 
281         for (bas_id = sh0; bas_id < sh1; bas_id++) {
282                 np = bas[bas_id*BAS_SLOTS+NPRIM_OF];
283                 nc = bas[bas_id*BAS_SLOTS+NCTR_OF ];
284                 l  = bas[bas_id*BAS_SLOTS+ANG_OF  ];
285                 deg = CINTlen_spinor(bas_id, bas);
286                 fac1 = fac * CINTcommon_fac_sp(l);
287                 p_exp  = env + bas[bas_id*BAS_SLOTS+PTR_EXP];
288                 pcoeff = env + bas[bas_id*BAS_SLOTS+PTR_COEFF];
289                 atm_id = bas[bas_id*BAS_SLOTS+ATOM_OF];
290                 pcoord = grid2atm + (atm_id - atmstart) * 3*BLKSIZE;
291                 ao_id = ao_loc[bas_id] - ao_loc[sh0];
292                 if (non0table[bas_id] &&
293                     (*fexp)(eprim, pcoord, p_exp, pcoeff, l, np, nc, bgrids, fac1)) {
294                         kappa = bas[bas_id*BAS_SLOTS+KAPPA_OF];
295                         dcart = (l+1)*(l+2)/2;
296                         di = nc * dcart;
297                         ri = env + atm[PTR_COORD+atm_id*ATM_SLOTS];
298                         (*feval)(cart_gto, ri, eprim, pcoord, p_exp, pcoeff,
299                                  env, l, np, nc, di, bgrids, bgrids);
300                         for (i = 0; i < ncomp; i++) {
301                                 pcart = cart_gto + i * di*bgrids*ncomp_e1;
302                                 off = (i*nao+ao_id)*ngrids;
303                                 (*c2s)(aoa+off, aob+off, pcart,
304                                        ngrids, bgrids, nc, kappa, l);
305                         }
306                 } else {
307                         for (i = 0; i < ncomp; i++) {
308                                 off = (i*nao+ao_id)*ngrids;
309                                 _zset0(aoa+off, ngrids, bgrids, nc*deg);
310                                 _zset0(aob+off, ngrids, bgrids, nc*deg);
311                         }
312                 }
313         }
314 }
315 
GTOshloc_by_atom(int * shloc,int * shls_slice,int * ao_loc,int * atm,int * bas)316 int GTOshloc_by_atom(int *shloc, int *shls_slice, int *ao_loc, int *atm, int *bas)
317 {
318         const int sh0 = shls_slice[0];
319         const int sh1 = shls_slice[1];
320         int ish, nshblk, lastatm;
321         shloc[0] = sh0;
322         nshblk = 1;
323         lastatm = bas[BAS_SLOTS*sh0+ATOM_OF];
324         for (ish = sh0; ish < sh1; ish++) {
325                 if (lastatm != bas[BAS_SLOTS*ish+ATOM_OF]) {
326                         lastatm = bas[BAS_SLOTS*ish+ATOM_OF];
327                         shloc[nshblk] = ish;
328                         nshblk++;
329                 }
330         }
331         shloc[nshblk] = sh1;
332         return nshblk;
333 }
334 
335 /*
336  * non0table[ngrids/blksize,natm] is the T/F table for ao values to
337  * screen the ao evaluation for each shell
338  */
GTOeval_loop(void (* fiter)(),FPtr_eval feval,FPtr_exp fexp,double fac,int ngrids,int param[],int * shls_slice,int * ao_loc,double * ao,double * coord,char * non0table,int * atm,int natm,int * bas,int nbas,double * env)339 void GTOeval_loop(void (*fiter)(), FPtr_eval feval, FPtr_exp fexp, double fac,
340                   int ngrids, int param[], int *shls_slice, int *ao_loc,
341                   double *ao, double *coord, char *non0table,
342                   int *atm, int natm, int *bas, int nbas, double *env)
343 {
344         int shloc[shls_slice[1]-shls_slice[0]+1];
345         const int nshblk = GTOshloc_by_atom(shloc, shls_slice, ao_loc, atm, bas);
346         const int nblk = (ngrids+BLKSIZE-1) / BLKSIZE;
347         const size_t Ngrids = ngrids;
348 
349 #pragma omp parallel
350 {
351         const int sh0 = shls_slice[0];
352         const int sh1 = shls_slice[1];
353         const size_t nao = ao_loc[sh1] - ao_loc[sh0];
354         int ip, ib, k, iloc, ish;
355         size_t aoff, bgrids;
356         int ncart = NCTR_CART * param[TENSOR] * param[POS_E1];
357         double *buf = malloc(sizeof(double) * BLKSIZE*(NPRIMAX*2+ncart));
358 #pragma omp for schedule(dynamic, 4)
359         for (k = 0; k < nblk*nshblk; k++) {
360                 iloc = k / nblk;
361                 ish = shloc[iloc];
362                 aoff = ao_loc[ish] - ao_loc[sh0];
363                 ib = k - iloc * nblk;
364                 ip = ib * BLKSIZE;
365                 bgrids = MIN(ngrids-ip, BLKSIZE);
366                 (*fiter)(feval, fexp, fac, nao, Ngrids, bgrids,
367                          param, shloc+iloc, ao_loc, buf, ao+aoff*Ngrids+ip,
368                          coord+ip, non0table+ib*nbas,
369                          atm, natm, bas, nbas, env);
370         }
371         free(buf);
372 }
373 }
374 
GTOeval_sph_drv(FPtr_eval feval,FPtr_exp fexp,double fac,int ngrids,int param[],int * shls_slice,int * ao_loc,double * ao,double * coord,char * non0table,int * atm,int natm,int * bas,int nbas,double * env)375 void GTOeval_sph_drv(FPtr_eval feval, FPtr_exp fexp, double fac, int ngrids,
376                      int param[], int *shls_slice, int *ao_loc,
377                      double *ao, double *coord, char *non0table,
378                      int *atm, int natm, int *bas, int nbas, double *env)
379 {
380         GTOeval_loop(GTOeval_sph_iter, feval, fexp, fac, ngrids,
381                      param, shls_slice, ao_loc,
382                      ao, coord, non0table, atm, natm, bas, nbas, env);
383 }
384 
GTOeval_cart_drv(FPtr_eval feval,FPtr_exp fexp,double fac,int ngrids,int param[],int * shls_slice,int * ao_loc,double * ao,double * coord,char * non0table,int * atm,int natm,int * bas,int nbas,double * env)385 void GTOeval_cart_drv(FPtr_eval feval, FPtr_exp fexp, double fac, int ngrids,
386                       int param[], int *shls_slice, int *ao_loc,
387                       double *ao, double *coord, char *non0table,
388                       int *atm, int natm, int *bas, int nbas, double *env)
389 {
390         GTOeval_loop(GTOeval_cart_iter, feval, fexp, fac, ngrids,
391                      param, shls_slice, ao_loc,
392                      ao, coord, non0table, atm, natm, bas, nbas, env);
393 }
394 
GTOeval_spinor_drv(FPtr_eval feval,FPtr_exp fexp,void (* c2s)(),double fac,int ngrids,int param[],int * shls_slice,int * ao_loc,double complex * ao,double * coord,char * non0table,int * atm,int natm,int * bas,int nbas,double * env)395 void GTOeval_spinor_drv(FPtr_eval feval, FPtr_exp fexp, void (*c2s)(), double fac,
396                         int ngrids, int param[], int *shls_slice, int *ao_loc,
397                         double complex *ao, double *coord, char *non0table,
398                         int *atm, int natm, int *bas, int nbas, double *env)
399 {
400         int shloc[shls_slice[1]-shls_slice[0]+1];
401         const int nshblk = GTOshloc_by_atom(shloc, shls_slice, ao_loc, atm, bas);
402         const int nblk = (ngrids+BLKSIZE-1) / BLKSIZE;
403         const size_t Ngrids = ngrids;
404 
405 #pragma omp parallel
406 {
407         const int sh0 = shls_slice[0];
408         const int sh1 = shls_slice[1];
409         const size_t nao = ao_loc[sh1] - ao_loc[sh0];
410         int ip, ib, k, iloc, ish;
411         size_t aoff, bgrids;
412         int ncart = NCTR_CART * param[TENSOR] * param[POS_E1];
413         double *buf = malloc(sizeof(double) * BLKSIZE*(NPRIMAX*2+ncart));
414 #pragma omp for schedule(dynamic, 4)
415         for (k = 0; k < nblk*nshblk; k++) {
416                 iloc = k / nblk;
417                 ish = shloc[iloc];
418                 aoff = ao_loc[ish] - ao_loc[sh0];
419                 ib = k - iloc * nblk;
420                 ip = ib * BLKSIZE;
421                 bgrids = MIN(ngrids-ip, BLKSIZE);
422                 GTOeval_spinor_iter(feval, fexp, c2s, fac,
423                                     nao, Ngrids, bgrids,
424                                     param, shloc+iloc, ao_loc, buf, ao+aoff*Ngrids+ip,
425                                     coord+ip, non0table+ib*nbas,
426                                     atm, natm, bas, nbas, env);
427         }
428         free(buf);
429 }
430 }
431 
432