1 /*
2  * Copyright (C) 2013  Qiming Sun <osirpt.sun@gmail.com>
3  *
4  * optimizer for 2e integrals.  Note if CINT2e_drv is only called a few
5  * hundred times, this optimizer cannot really speed up the integration.
6  */
7 
8 #include <stdlib.h>
9 #include <math.h>
10 #include <string.h>
11 #include "config.h"
12 #include "cint_bas.h"
13 #include "g1e.h"
14 #include "g1e_grids.h"
15 #include "g2e.h"
16 #include "g3c1e.h"
17 #include "optimizer.h"
18 #include "misc.h"
19 
20 // generate caller to CINTinit_2e_optimizer for each type of function
CINTinit_2e_optimizer(CINTOpt ** opt,FINT * atm,FINT natm,FINT * bas,FINT nbas,double * env)21 void CINTinit_2e_optimizer(CINTOpt **opt, FINT *atm, FINT natm,
22                            FINT *bas, FINT nbas, double *env)
23 {
24         CINTOpt *opt0 = (CINTOpt *)malloc(sizeof(CINTOpt));
25         opt0->index_xyz_array = NULL;
26         opt0->non0ctr = NULL;
27         opt0->sortedidx = NULL;
28         opt0->nbas = nbas;
29         opt0->log_max_coeff = NULL;
30         opt0->pairdata = NULL;
31         *opt = opt0;
32 }
CINTinit_optimizer(CINTOpt ** opt,FINT * atm,FINT natm,FINT * bas,FINT nbas,double * env)33 void CINTinit_optimizer(CINTOpt **opt, FINT *atm, FINT natm,
34                         FINT *bas, FINT nbas, double *env)
35 {
36         CINTinit_2e_optimizer(opt, atm, natm, bas, nbas, env);
37 }
38 
CINTdel_2e_optimizer(CINTOpt ** opt)39 void CINTdel_2e_optimizer(CINTOpt **opt)
40 {
41         CINTOpt *opt0 = *opt;
42         if (opt0 == NULL) { // when opt is created by CINTno_optimizer
43                 return;
44         }
45 
46         if (opt0->index_xyz_array != NULL) {
47                 free(opt0->index_xyz_array[0]);
48                 free(opt0->index_xyz_array);
49         }
50 
51         if (opt0->non0ctr != NULL) {
52                 free(opt0->sortedidx[0]);
53                 free(opt0->sortedidx);
54                 free(opt0->non0ctr[0]);
55                 free(opt0->non0ctr);
56         }
57 
58         if (opt0->log_max_coeff != NULL) {
59                 free(opt0->log_max_coeff[0]);
60                 free(opt0->log_max_coeff);
61         }
62 
63         CINTdel_pairdata_optimizer(opt0);
64 
65         free(opt0);
66         *opt = NULL;
67 }
CINTdel_optimizer(CINTOpt ** opt)68 void CINTdel_optimizer(CINTOpt **opt)
69 {
70         CINTdel_2e_optimizer(opt);
71 }
72 
CINTno_optimizer(CINTOpt ** opt,FINT * atm,FINT natm,FINT * bas,FINT nbas,double * env)73 void CINTno_optimizer(CINTOpt **opt, FINT *atm, FINT natm,
74                       FINT *bas, FINT nbas, double *env)
75 {
76         *opt = NULL;
77 }
78 
_make_fakebas(FINT * fakebas,FINT * bas,FINT nbas,double * env)79 static FINT _make_fakebas(FINT *fakebas, FINT *bas, FINT nbas, double *env)
80 {
81         FINT i;
82         FINT max_l = 0;
83         for (i = 0; i < nbas; i++) {
84                 max_l = MAX(max_l, bas(ANG_OF,i));
85         }
86 
87         FINT fakenbas = max_l + 1;
88         for (i = 0; i < BAS_SLOTS*fakenbas; i++) {
89                 fakebas[i] = 0;
90         }
91         // fakebas only initializes ANG_OF, since the others does not
92         // affect index_xyz
93         for (i = 0; i <= max_l; i++) {
94                 fakebas[BAS_SLOTS*i+ANG_OF] = i;
95         }
96         return max_l;
97 }
_allocate_index_xyz(CINTOpt * opt,FINT max_l,FINT order)98 static FINT *_allocate_index_xyz(CINTOpt *opt, FINT max_l, FINT order)
99 {
100         FINT i;
101         FINT cumcart = (max_l+1) * (max_l+2) * (max_l+3) / 6;
102         FINT ll = max_l + 1;
103         FINT cc = cumcart;
104         for (i = 1; i < order; i++) {
105                 ll *= LMAX1;
106                 cc *= cumcart;
107         }
108         FINT *buf = malloc(sizeof(FINT) * cc * 3);
109         FINT **ppbuf = malloc(sizeof(FINT*) * ll);
110         ppbuf[0] = buf;
111         for (i = 1; i < ll; i++) {
112                 ppbuf[i] = NULL;
113         }
114         opt->index_xyz_array = ppbuf;
115         return buf;
116 }
gen_idx(CINTOpt * opt,void (* finit)(),void (* findex_xyz)(),FINT order,FINT max_l,FINT * ng,FINT * atm,FINT natm,FINT * bas,FINT nbas,double * env)117 static void gen_idx(CINTOpt *opt, void (*finit)(), void (*findex_xyz)(),
118                     FINT order, FINT max_l, FINT *ng,
119                     FINT *atm, FINT natm, FINT *bas, FINT nbas, double *env)
120 {
121         FINT i, j, k, l, ptr;
122         FINT fakebas[BAS_SLOTS*LMAX1];
123         FINT max_l1 = _make_fakebas(fakebas, bas, nbas, env);
124         if (max_l == 0) {
125                 max_l = max_l1;
126         } else {
127                 max_l = MIN(max_l, max_l1);
128         }
129         FINT fakenbas = max_l+1;
130         FINT *buf = _allocate_index_xyz(opt, max_l, order);
131 
132         CINTEnvVars envs;
133         FINT shls[4];
134         if (order == 2) {
135                 for (i = 0; i <= max_l; i++) {
136                 for (j = 0; j <= max_l; j++) {
137                         shls[0] = i; shls[1] = j;
138                         (*finit)(&envs, ng, shls, atm, natm, fakebas, fakenbas, env);
139                         ptr = i*LMAX1 + j;
140                         opt->index_xyz_array[ptr] = buf;
141                         (*findex_xyz)(opt->index_xyz_array[ptr], &envs);
142                         buf += envs.nf * 3;
143                 } }
144 
145         } else if (order == 3) {
146                 for (i = 0; i <= max_l; i++) {
147                 for (j = 0; j <= max_l; j++) {
148                 for (k = 0; k <= max_l; k++) {
149                         shls[0] = i; shls[1] = j; shls[2] = k;
150                         (*finit)(&envs, ng, shls, atm, natm, fakebas, fakenbas, env);
151                         ptr = i*LMAX1*LMAX1 + j*LMAX1 + k;
152                         opt->index_xyz_array[ptr] = buf;
153                         (*findex_xyz)(opt->index_xyz_array[ptr], &envs);
154                         buf += envs.nf * 3;
155                 } } }
156 
157         } else {
158                 for (i = 0; i <= max_l; i++) {
159                 for (j = 0; j <= max_l; j++) {
160                 for (k = 0; k <= max_l; k++) {
161                 for (l = 0; l <= max_l; l++) {
162                         shls[0] = i; shls[1] = j; shls[2] = k; shls[3] = l;
163                         (*finit)(&envs, ng, shls, atm, natm, fakebas, fakenbas, env);
164                         ptr = i*LMAX1*LMAX1*LMAX1
165                             + j*LMAX1*LMAX1
166                             + k*LMAX1
167                             + l;
168                         opt->index_xyz_array[ptr] = buf;
169                         (*findex_xyz)(opt->index_xyz_array[ptr], &envs);
170                         buf += envs.nf * 3;
171                 } } } }
172         }
173 }
174 
CINTall_1e_optimizer(CINTOpt ** opt,FINT * ng,FINT * atm,FINT natm,FINT * bas,FINT nbas,double * env)175 void CINTall_1e_optimizer(CINTOpt **opt, FINT *ng,
176                           FINT *atm, FINT natm, FINT *bas, FINT nbas, double *env)
177 {
178         CINTinit_2e_optimizer(opt, atm, natm, bas, nbas, env);
179         CINTOpt_set_log_maxc(*opt, atm, natm, bas, nbas, env);
180         CINTOpt_set_non0coeff(*opt, atm, natm, bas, nbas, env);
181         gen_idx(*opt, &CINTinit_int1e_EnvVars, &CINTg1e_index_xyz,
182                 2, 0, ng, atm, natm, bas, nbas, env);
183 }
184 
CINTall_2e_optimizer(CINTOpt ** opt,FINT * ng,FINT * atm,FINT natm,FINT * bas,FINT nbas,double * env)185 void CINTall_2e_optimizer(CINTOpt **opt, FINT *ng,
186                           FINT *atm, FINT natm, FINT *bas, FINT nbas, double *env)
187 {
188         CINTinit_2e_optimizer(opt, atm, natm, bas, nbas, env);
189         CINTOpt_setij(*opt, ng, atm, natm, bas, nbas, env);
190         CINTOpt_set_non0coeff(*opt, atm, natm, bas, nbas, env);
191         gen_idx(*opt, &CINTinit_int2e_EnvVars, &CINTg2e_index_xyz,
192                 4, 0, ng, atm, natm, bas, nbas, env);
193 }
194 
CINTall_3c2e_optimizer(CINTOpt ** opt,FINT * ng,FINT * atm,FINT natm,FINT * bas,FINT nbas,double * env)195 void CINTall_3c2e_optimizer(CINTOpt **opt, FINT *ng,
196                             FINT *atm, FINT natm, FINT *bas, FINT nbas, double *env)
197 {
198         CINTinit_2e_optimizer(opt, atm, natm, bas, nbas, env);
199         CINTOpt_setij(*opt, ng, atm, natm, bas, nbas, env);
200         CINTOpt_set_non0coeff(*opt, atm, natm, bas, nbas, env);
201         gen_idx(*opt, &CINTinit_int3c2e_EnvVars, &CINTg2e_index_xyz,
202                 3, 0, ng, atm, natm, bas, nbas, env);
203 }
204 
CINTall_2c2e_optimizer(CINTOpt ** opt,FINT * ng,FINT * atm,FINT natm,FINT * bas,FINT nbas,double * env)205 void CINTall_2c2e_optimizer(CINTOpt **opt, FINT *ng,
206                             FINT *atm, FINT natm, FINT *bas, FINT nbas, double *env)
207 {
208         CINTinit_2e_optimizer(opt, atm, natm, bas, nbas, env);
209         CINTOpt_set_log_maxc(*opt, atm, natm, bas, nbas, env);
210         CINTOpt_set_non0coeff(*opt, atm, natm, bas, nbas, env);
211         gen_idx(*opt, &CINTinit_int2c2e_EnvVars, &CINTg1e_index_xyz,
212                 2, 0, ng, atm, natm, bas, nbas, env);
213 }
214 
215 void CINTg3c1e_index_xyz(FINT *idx, const CINTEnvVars *envs);
CINTall_3c1e_optimizer(CINTOpt ** opt,FINT * ng,FINT * atm,FINT natm,FINT * bas,FINT nbas,double * env)216 void CINTall_3c1e_optimizer(CINTOpt **opt, FINT *ng,
217                             FINT *atm, FINT natm, FINT *bas, FINT nbas, double *env)
218 {
219         CINTinit_2e_optimizer(opt, atm, natm, bas, nbas, env);
220         CINTOpt_setij(*opt, ng, atm, natm, bas, nbas, env);
221         CINTOpt_set_non0coeff(*opt, atm, natm, bas, nbas, env);
222         gen_idx(*opt, &CINTinit_int3c1e_EnvVars, &CINTg3c1e_index_xyz,
223                 3, 0, ng, atm, natm, bas, nbas, env);
224 }
225 
CINTall_1e_grids_optimizer(CINTOpt ** opt,FINT * ng,FINT * atm,FINT natm,FINT * bas,FINT nbas,double * env)226 void CINTall_1e_grids_optimizer(CINTOpt **opt, FINT *ng,
227                                 FINT *atm, FINT natm, FINT *bas, FINT nbas, double *env)
228 {
229         CINTinit_2e_optimizer(opt, atm, natm, bas, nbas, env);
230         CINTOpt_set_log_maxc(*opt, atm, natm, bas, nbas, env);
231         CINTOpt_set_non0coeff(*opt, atm, natm, bas, nbas, env);
232         gen_idx(*opt, &CINTinit_int1e_grids_EnvVars, &CINTg1e_index_xyz,
233                 2, 0, ng, atm, natm, bas, nbas, env);
234 }
235 
236 #ifdef WITH_F12
237 void CINTinit_int2e_stg_EnvVars(CINTEnvVars *envs, FINT *ng, FINT *shls,
238                            FINT *atm, FINT natm, FINT *bas, FINT nbas, double *env);
CINTall_2e_stg_optimizer(CINTOpt ** opt,FINT * ng,FINT * atm,FINT natm,FINT * bas,FINT nbas,double * env)239 void CINTall_2e_stg_optimizer(CINTOpt **opt, FINT *ng,
240                               FINT *atm, FINT natm, FINT *bas, FINT nbas, double *env)
241 {
242         CINTinit_2e_optimizer(opt, atm, natm, bas, nbas, env);
243         CINTOpt_setij(*opt, ng, atm, natm, bas, nbas, env);
244         CINTOpt_set_non0coeff(*opt, atm, natm, bas, nbas, env);
245         gen_idx(*opt, &CINTinit_int2e_stg_EnvVars, &CINTg2e_index_xyz,
246                 4, 0, ng, atm, natm, bas, nbas, env);
247 }
248 #endif
249 
250 #ifdef WITH_GTG
CINTall_2e_gtg_optimizer(CINTOpt ** opt,FINT * ng,FINT * atm,FINT natm,FINT * bas,FINT nbas,double * env)251 void CINTall_2e_gtg_optimizer(CINTOpt **opt, FINT *ng,
252                               FINT *atm, FINT natm, FINT *bas, FINT nbas, double *env)
253 {
254         CINTinit_2e_optimizer(opt, atm, natm, bas, nbas, env);
255         CINTOpt_setij(*opt, ng, atm, natm, bas, nbas, env);
256         CINTOpt_set_non0coeff(*opt, atm, natm, bas, nbas, env);
257         gen_idx(*opt, &CINTinit_int2e_gtg_EnvVars, &CINTg2e_index_xyz,
258                 4, 0, ng, atm, natm, bas, nbas, env);
259 }
260 
CINTall_3c2e_gtg_optimizer(CINTOpt ** opt,FINT * ng,FINT * atm,FINT natm,FINT * bas,FINT nbas,double * env)261 void CINTall_3c2e_gtg_optimizer(CINTOpt **opt, FINT *ng,
262                                 FINT *atm, FINT natm, FINT *bas, FINT nbas, double *env)
263 {
264         CINTinit_2e_optimizer(opt, atm, natm, bas, nbas, env);
265         CINTOpt_setij(*opt, ng, atm, natm, bas, nbas, env);
266         CINTOpt_set_non0coeff(*opt, atm, natm, bas, nbas, env);
267         gen_idx(*opt, &CINTinit_int3c2e_gtg_EnvVars, &CINTg2e_index_xyz,
268                 3, 0, ng, atm, natm, bas, nbas, env);
269 }
270 
CINTall_2c2e_gtg_optimizer(CINTOpt ** opt,FINT * ng,FINT * atm,FINT natm,FINT * bas,FINT nbas,double * env)271 void CINTall_2c2e_gtg_optimizer(CINTOpt **opt, FINT *ng,
272                                 FINT *atm, FINT natm, FINT *bas, FINT nbas, double *env)
273 {
274         CINTinit_2e_optimizer(opt, atm, natm, bas, nbas, env);
275         CINTOpt_set_log_maxc(*opt, atm, natm, bas, nbas, env);
276         CINTOpt_set_non0coeff(*opt, atm, natm, bas, nbas, env);
277         gen_idx(*opt, &CINTinit_int2c2e_gtg_EnvVars, &CINTg1e_index_xyz,
278                 2, 0, ng, atm, natm, bas, nbas, env);
279 }
280 #endif
281 
282 
283 // little endian on x86
284 typedef union {
285     double d;
286     unsigned short s[4];
287 } type_IEEE754;
288 // ~4 times faster than built-in log
approx_log(double x)289 static inline double approx_log(double x)
290 {
291         type_IEEE754 y;
292         y.d = x;
293         return ((y.s[3] >> 4) - 1023 + 1) * 0.693145751953125;
294 }
295 
CINTOpt_log_max_pgto_coeff(double * log_maxc,double * coeff,FINT nprim,FINT nctr)296 void CINTOpt_log_max_pgto_coeff(double *log_maxc, double *coeff, FINT nprim, FINT nctr)
297 {
298         FINT i, ip;
299         double maxc;
300         for (ip = 0; ip < nprim; ip++) {
301                 maxc = 0;
302                 for (i = 0; i < nctr; i++) {
303                         maxc = MAX(maxc, fabs(coeff[i*nprim+ip]));
304                 }
305                 log_maxc[ip] = approx_log(maxc);
306         }
307 }
308 
309 
CINTOpt_set_log_maxc(CINTOpt * opt,FINT * atm,FINT natm,FINT * bas,FINT nbas,double * env)310 void CINTOpt_set_log_maxc(CINTOpt *opt, FINT *atm, FINT natm,
311                           FINT *bas, FINT nbas, double *env)
312 {
313         FINT i, iprim, ictr;
314         double *ci;
315         size_t tot_prim = 0;
316         for (i = 0; i < nbas; i++) {
317                 tot_prim += bas(NPRIM_OF, i);
318         }
319         if (tot_prim == 0) {
320                 return;
321         }
322 
323         opt->log_max_coeff = malloc(sizeof(double *) * MAX(nbas, 1));
324         double *plog_maxc = malloc(sizeof(double) * tot_prim);
325         opt->log_max_coeff[0] = plog_maxc;
326         for (i = 0; i < nbas; i++) {
327                 iprim = bas(NPRIM_OF, i);
328                 ictr = bas(NCTR_OF, i);
329                 ci = env + bas(PTR_COEFF, i);
330                 opt->log_max_coeff[i] = plog_maxc;
331                 CINTOpt_log_max_pgto_coeff(plog_maxc, ci, iprim, ictr);
332                 plog_maxc += iprim;
333         }
334 }
335 
CINTset_pairdata(PairData * pairdata,double * ai,double * aj,double * ri,double * rj,double * log_maxci,double * log_maxcj,FINT li_ceil,FINT lj_ceil,FINT iprim,FINT jprim,double rr_ij,double expcutoff)336 FINT CINTset_pairdata(PairData *pairdata, double *ai, double *aj, double *ri, double *rj,
337                      double *log_maxci, double *log_maxcj,
338                      FINT li_ceil, FINT lj_ceil, FINT iprim, FINT jprim,
339                      double rr_ij, double expcutoff)
340 {
341         FINT ip, jp, n;
342         double aij, eij, cceij;
343         // This estimation is based on the assumption that the two gaussian charge
344         // distributions are separated in space. If two gaussians are too close (the
345         // distance between gaussian product ij and gaussian product kl < 1), rr
346         double log_rr_ij = (li_ceil+lj_ceil+1) * approx_log(rr_ij+1) / 2;
347         PairData *pdata;
348 
349         FINT empty = 1;
350         for (n = 0, jp = 0; jp < jprim; jp++) {
351                 for (ip = 0; ip < iprim; ip++, n++) {
352                         aij = 1/(ai[ip] + aj[jp]);
353                         eij = rr_ij * ai[ip] * aj[jp] * aij;
354                         cceij = eij - log_rr_ij - log_maxci[ip] - log_maxcj[jp];
355                         pdata = pairdata + n;
356                         pdata->cceij = cceij;
357                         if (cceij < expcutoff) {
358                                 empty = 0;
359                                 pdata->rij[0] = (ai[ip]*ri[0] + aj[jp]*rj[0]) * aij;
360                                 pdata->rij[1] = (ai[ip]*ri[1] + aj[jp]*rj[1]) * aij;
361                                 pdata->rij[2] = (ai[ip]*ri[2] + aj[jp]*rj[2]) * aij;
362                                 pdata->eij = exp(-eij);
363                         } else {
364                                 pdata->rij[0] = 0;
365                                 pdata->rij[1] = 0;
366                                 pdata->rij[2] = 0;
367                                 pdata->eij = 0;
368                         }
369                 }
370         }
371         return empty;
372 }
373 
CINTOpt_setij(CINTOpt * opt,FINT * ng,FINT * atm,FINT natm,FINT * bas,FINT nbas,double * env)374 void CINTOpt_setij(CINTOpt *opt, FINT *ng,
375                    FINT *atm, FINT natm, FINT *bas, FINT nbas, double *env)
376 {
377         FINT i, j, ip, jp;
378         FINT iprim, jprim, li, lj;
379         double *ai, *aj, *ri, *rj;
380         double expcutoff;
381         if (env[PTR_EXPCUTOFF] == 0) {
382                 expcutoff = EXPCUTOFF;
383         } else {
384                 expcutoff = MAX(MIN_EXPCUTOFF, env[PTR_EXPCUTOFF]);
385         }
386 
387         if (opt->log_max_coeff == NULL) {
388                 CINTOpt_set_log_maxc(opt, atm, natm, bas, nbas, env);
389         }
390         double **log_max_coeff = opt->log_max_coeff;
391         double *log_maxci, *log_maxcj;
392 
393         size_t tot_prim = 0;
394         for (i = 0; i < nbas; i++) {
395                 tot_prim += bas(NPRIM_OF, i);
396         }
397         if (tot_prim == 0 || tot_prim > MAX_PGTO_FOR_PAIRDATA) {
398                 return;
399         }
400         opt->pairdata = malloc(sizeof(PairData *) * MAX(nbas * nbas, 1));
401         PairData *pdata = malloc(sizeof(PairData) * tot_prim * tot_prim);
402         opt->pairdata[0] = pdata;
403 
404         FINT ijkl_inc;
405         if ((ng[IINC]+ng[JINC]) > (ng[KINC]+ng[LINC])) {
406                 ijkl_inc = ng[IINC] + ng[JINC];
407         } else {
408                 ijkl_inc = ng[KINC] + ng[LINC];
409         }
410 
411         FINT empty;
412         double rr;
413         PairData *pdata0;
414         for (i = 0; i < nbas; i++) {
415                 ri = env + atm(PTR_COORD,bas(ATOM_OF,i));
416                 ai = env + bas(PTR_EXP,i);
417                 iprim = bas(NPRIM_OF,i);
418                 li = bas(ANG_OF,i);
419                 log_maxci = log_max_coeff[i];
420 
421                 for (j = 0; j <= i; j++) {
422                         rj = env + atm(PTR_COORD,bas(ATOM_OF,j));
423                         aj = env + bas(PTR_EXP,j);
424                         jprim = bas(NPRIM_OF,j);
425                         lj = bas(ANG_OF,j);
426                         log_maxcj = log_max_coeff[j];
427                         rr = (ri[0]-rj[0])*(ri[0]-rj[0])
428                            + (ri[1]-rj[1])*(ri[1]-rj[1])
429                            + (ri[2]-rj[2])*(ri[2]-rj[2]);
430 
431                         empty = CINTset_pairdata(pdata, ai, aj, ri, rj, log_maxci, log_maxcj,
432                                                  li+ijkl_inc, lj, iprim, jprim, rr, expcutoff);
433                         if (i == 0 && j == 0) {
434                                 opt->pairdata[0] = pdata;
435                                 pdata += iprim * jprim;
436                         } else if (!empty) {
437                                 opt->pairdata[i*nbas+j] = pdata;
438                                 pdata += iprim * jprim;
439                                 if (i != j) {
440                                         opt->pairdata[j*nbas+i] = pdata;
441                                         pdata0 = opt->pairdata[i*nbas+j];
442                                         // transpose pairdata
443                                         for (ip = 0; ip < iprim; ip++) {
444                                         for (jp = 0; jp < jprim; jp++, pdata++) {
445                                                 memcpy(pdata, pdata0+jp*iprim+ip,
446                                                        sizeof(PairData));
447                                         } }
448                                 }
449                         } else {
450                                 opt->pairdata[i*nbas+j] = NOVALUE;
451                                 opt->pairdata[j*nbas+i] = NOVALUE;
452                         }
453                 }
454         }
455 }
456 
CINTdel_pairdata_optimizer(CINTOpt * cintopt)457 void CINTdel_pairdata_optimizer(CINTOpt *cintopt)
458 {
459         if (cintopt != NULL && cintopt->pairdata != NULL) {
460                 free(cintopt->pairdata[0]);
461                 free(cintopt->pairdata);
462                 cintopt->pairdata = NULL;
463         }
464 }
465 
CINTOpt_non0coeff_byshell(FINT * sortedidx,FINT * non0ctr,double * ci,FINT iprim,FINT ictr)466 void CINTOpt_non0coeff_byshell(FINT *sortedidx, FINT *non0ctr, double *ci,
467                                FINT iprim, FINT ictr)
468 {
469         FINT ip, j, k, kp;
470         FINT zeroidx[ictr];
471         for (ip = 0; ip < iprim; ip++) {
472                 for (j = 0, k = 0, kp = 0; j < ictr; j++) {
473                         if (ci[iprim*j+ip] != 0) {
474                                 sortedidx[k] = j;
475                                 k++;
476                         } else {
477                                 zeroidx[kp] = j;
478                                 kp++;
479                         }
480                 }
481 // Append the index of zero-coeff to sortedidx for function CINTprim_to_ctr_0
482                 for (j = 0; j < kp; j++) {
483                         sortedidx[k+j] = zeroidx[j];
484                 }
485                 non0ctr[ip] = k;
486                 sortedidx += ictr;
487         }
488 }
489 
CINTOpt_set_non0coeff(CINTOpt * opt,FINT * atm,FINT natm,FINT * bas,FINT nbas,double * env)490 void CINTOpt_set_non0coeff(CINTOpt *opt, FINT *atm, FINT natm,
491                            FINT *bas, FINT nbas, double *env)
492 {
493         FINT i, iprim, ictr;
494         double *ci;
495         size_t tot_prim = 0;
496         size_t tot_prim_ctr = 0;
497         for (i = 0; i < nbas; i++) {
498                 tot_prim += bas(NPRIM_OF, i);
499                 tot_prim_ctr += bas(NPRIM_OF, i) * bas(NCTR_OF,i);
500         }
501         if (tot_prim == 0) {
502                 return;
503         }
504 
505         opt->non0ctr = malloc(sizeof(FINT *) * MAX(nbas, 1));
506         opt->sortedidx = malloc(sizeof(FINT *) * MAX(nbas, 1));
507         FINT *pnon0ctr = malloc(sizeof(FINT) * tot_prim);
508         FINT *psortedidx = malloc(sizeof(FINT) * tot_prim_ctr);
509         opt->non0ctr[0] = pnon0ctr;
510         opt->sortedidx[0] = psortedidx;
511         for (i = 0; i < nbas; i++) {
512                 iprim = bas(NPRIM_OF, i);
513                 ictr = bas(NCTR_OF, i);
514                 ci = env + bas(PTR_COEFF, i);
515                 opt->non0ctr[i] = pnon0ctr;
516                 opt->sortedidx[i] = psortedidx;
517                 CINTOpt_non0coeff_byshell(psortedidx, pnon0ctr, ci, iprim, ictr);
518                 pnon0ctr += iprim;
519                 psortedidx += iprim * ictr;
520         }
521 }
522