1 /* ----------------------------------------------------------------------
2   eri_ll.c
3 
4   Low-level functions of LIBERI
5 
6   Coded by TOYODA Masayuki, June 19, 2009
7 ----------------------------------------------------------------------*/
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <math.h>
11 #include "eri_def.h"
12 #include "eri.h"
13 #include "eri_sf.h"
14 #include "eri_interpolate.h"
15 #include "eri_gtbl.h"
16 #include "sbt/linear/eri_linfsbt.h"
17 #include "sbt/log/eri_logfsbt.h"
18 
19 
20 #define LOOP_UNROLLING 1
21 
22 
23 #define WORKSPACE_AT_STACK 0
24 #define MAXNUMR 1024
25 
26 /*--- Switches for speeding-up ---*/
27 #define SUMSKIP    1
28 #define SUMSKIP_THRESHOLD 1e-10
29 
30 /* index table */
31 static int g_itbl_flag = 0;
32 static int g_itbl_j2l[ERI_LMAXMAX*ERI_LMAXMAX];
33 static int g_itbl_j2m[ERI_LMAXMAX*ERI_LMAXMAX];
34 
35 static char* my_version = "GAMMA-091210";
36 
37 
38 struct ERI_Struct {
39   int lmax;
40   int lmax_gl;
41   int rcsh;
42 
43   /* Gauss-Laguerre quadrature */
44   int glq_n;
45   double *glq_x;
46   double *glq_w;
47   double *glq_j;
48   double *glq_dj;
49 
50   /* SBT */
51   int sbttype;
52   ERI_SBT_t  *sbt;
53 
54   /* Gaunt-coefficients */
55   ERI_Gtbl_t *gtbl;
56 
57   ERI_Init_Misc info;
58 
59 
60 #if WORKSPACE_AT_STACK
61 #else
62   double *ws_in;  /* [ERI_NGRIDMAX*2] */
63   double *ws_out; /* [ERI_NGRIDMAX*2] */
64   double *ws_sb;  /* [(ERI_LMAXMAX*2)*ERI_NGRIDMAX] */
65   double *ws_dsb; /* [(ERI_LMAXMAX*2)*ERI_NGRIDMAX] */
66 #endif
67 };
68 
69 
70 
71 
72 /*----------------------------------------------------------------------
73   INTERNAL SUBROUTINES
74 ----------------------------------------------------------------------*/
phase(int l,double * a)75 static void phase(int l, double *a)
76 {
77   double re = a[0], im = a[1];
78 
79   switch ((l%4+4)%4) {
80   case 0: break;
81   case 1: a[0] = -im; a[1] =  re; break;
82   case 2: a[0] = -re; a[1] = -im; break;
83   case 3: a[0] =  im; a[1] = -re; break;
84   }
85 }
86 
87 
88 
ERI_Version(void)89 const char* ERI_Version(void)
90 {
91   return my_version;
92 }
93 
94 
95 
96 
97 /*----------------------------------------------------------------------
98   ERI_Init
99 ----------------------------------------------------------------------*/
ERI_Init(int lmax,int lmax_gl,int ngrid,int ngl,int rcsh,const ERI_Init_Misc * info)100 ERI_t* ERI_Init(
101   int lmax,
102   int lmax_gl,
103   int ngrid,
104   int ngl,
105   int rcsh,
106   const ERI_Init_Misc *info
107 )
108 {
109   int j, l, m, jmax;
110   double dt;
111   ERI_t *solver;
112 
113   STEPTRACE("ERI_Init: in");
114 
115   jmax  = lmax*lmax;
116 
117   /* allocation */
118   solver = (ERI_t*)malloc(sizeof(ERI_t));
119   if (NULL==solver) { return NULL; }
120 
121   solver->glq_x  = (double*)malloc(sizeof(double)*ngl);
122   solver->glq_w  = (double*)malloc(sizeof(double)*ngl);
123   solver->glq_j  = (double*)malloc(sizeof(double)*ngl*lmax);
124   solver->glq_dj = (double*)malloc(sizeof(double)*ngl*lmax);
125 #if WORKSPACE_AT_STACK
126 #else
127   solver->ws_in  = (double*)malloc(sizeof(double)*ngrid*2);
128   solver->ws_out = (double*)malloc(sizeof(double)*ngrid*2);
129   solver->ws_sb  = (double*)malloc(sizeof(double)*lmax*ngrid*2);
130   solver->ws_dsb = (double*)malloc(sizeof(double)*lmax*ngrid*2);
131 #endif
132   if (NULL==solver->glq_x || NULL==solver->glq_w
133     || NULL==solver->glq_j || NULL==solver->glq_dj ) {
134     ERI_Free(solver);
135     return NULL;
136   }
137 
138   /* j <-> (l,m) conversion table */
139   if (0==g_itbl_flag) {
140     for (j=0; j<jmax; j++) {
141       l = (int)sqrt((double)j);
142       m = j-l*(l+1);
143       g_itbl_j2l[j] = l;
144       g_itbl_j2m[j] = m;
145     }
146   }
147 
148   /* abscissas and weights for Gauss-Laguerre integration */
149   ERI_GLQ(solver->glq_x, solver->glq_w, ngl);
150 
151   /* Gaunt coefficients */
152   solver->gtbl = ERI_Gtbl_Init(lmax, rcsh);
153 
154   /* additional parameters */
155   if (NULL==info) {
156     /* default values */
157     solver->info.sbttype = ERI_SBT_LINEAR;
158     solver->info.rmax    = 100.0;
159     solver->info.rho0    = -10.0;
160     solver->info.qmin    = 1e-4;
161     solver->info.qmax    = 1e-2;
162     solver->info.nq      = 3;
163   } else {
164     solver->info.sbttype = info->sbttype;
165     solver->info.rmax    = info->rmax;
166     solver->info.rho0    = info->rho0;
167     solver->info.qmin    = info->qmin;
168     solver->info.qmax    = info->qmax;
169     solver->info.nq      = info->nq;
170   }
171   dt = 2.0*PI/(log(solver->info.rmax)-(solver->info.rho0));
172 
173   switch (solver->info.sbttype) {
174   case ERI_SBT_LINEAR:
175     solver->sbt = ERI_LinFSBT_Init(ngrid, lmax, solver->info.rmax);
176     break;
177   case ERI_SBT_LOG:
178     solver->sbt = ERI_LogFSBT_Init(lmax, ngrid, solver->info.rho0, dt,
179       solver->info.qmin, solver->info.qmax, solver->info.nq);
180     break;
181   }
182   if (NULL==solver->sbt) {
183     ERI_Free(solver);
184     return NULL;
185   }
186 
187   solver->lmax    = lmax;
188   solver->lmax_gl = lmax_gl;
189   solver->glq_n   = ngl;
190   solver->rcsh    = rcsh;
191 
192   return solver;
193 }
194 
195 
196 
197 
198 /*----------------------------------------------------------------------
199   ERI_Required_Size
200 ----------------------------------------------------------------------*/
ERI_Required_Size(int lmax,int lmax_gl,int ngrid,int ngl,int rcsh,const ERI_Init_Misc * info)201 size_t ERI_Required_Size(
202   int lmax,
203   int lmax_gl,
204   int ngrid,
205   int ngl,
206   int rcsh,
207   const ERI_Init_Misc *info
208 )
209 {
210   int nq, type, jmax;
211   size_t sz;
212 
213   jmax = lmax*lmax;
214 
215   sz = sizeof(ERI_t)
216        + ERI_Gtbl_Required_Size(lmax, 0)
217        + sizeof(double)*ngl            /* glx */
218        + sizeof(double)*ngl            /* glw */
219        + sizeof(double)*ngl*lmax       /* glq_j */
220        + sizeof(double)*ngl*lmax       /* glq_dj */
221     ;
222 
223   /* Gaunt coefficients */
224   switch (rcsh) {
225   case ERI_SH_COMPLEX:
226     sz += ERI_Gtbl_Required_Size(lmax, 0);
227     break;
228   case ERI_SH_REAL:
229     sz += ERI_Gtbl_Required_Size(lmax, 1);
230     break;
231   default:
232     fprintf(stderr, "*** undefined rcsh value(%d)\n", rcsh);
233     abort();
234   }
235 
236   type = info ? info->sbttype : ERI_SBT_LINEAR;
237   switch (type) {
238   case ERI_SBT_LOG:
239     if (NULL==info) {
240       nq = 3;
241     } else {
242       nq = info->nq;
243     }
244     sz += ERI_LogFSBT_Required_Size(lmax, ngrid, nq);
245     break;
246   case ERI_SBT_LINEAR:
247     sz += ERI_LinFSBT_Required_Size(ngrid, lmax);
248     break;
249   default:
250     fprintf(stderr, "*** undefined type value(%d)\n", type);
251     abort();
252   }
253 
254   return sz;
255 }
256 
257 
258 
259 
260 /*----------------------------------------------------------------------
261   ERI_Free
262 ----------------------------------------------------------------------*/
ERI_Free(ERI_t * solver)263 void ERI_Free(ERI_t *solver)
264 {
265   if (solver) {
266     if (solver->glq_x)  { free(solver->glq_x); }
267     if (solver->glq_w)  { free(solver->glq_w); }
268     if (solver->glq_j)  { free(solver->glq_j); }
269     if (solver->sbt)    { ERI_SBT_Free(solver->sbt); }
270     if (solver->gtbl)   { ERI_Gtbl_Free(solver->gtbl); }
271     if (solver->ws_in)  { free(solver->ws_in); }
272     if (solver->ws_out) { free(solver->ws_out); }
273     if (solver->ws_sb)  { free(solver->ws_sb); }
274     if (solver->ws_dsb) { free(solver->ws_dsb); }
275     free(solver);
276   }
277 }
278 
279 
280 
ERI_lmax(const ERI_t * solver)281 int ERI_lmax(const ERI_t *solver)  { return solver->lmax; }
ERI_lmax_gl(const ERI_t * solver)282 int ERI_lmax_gl(const ERI_t *solver) { return solver->lmax_gl; }
ERI_ngl(const ERI_t * solver)283 int ERI_ngl(const ERI_t *solver)   { return solver->glq_n; }
ERI_rcsh(const ERI_t * solver)284 int ERI_rcsh(const ERI_t *solver) { return solver->rcsh; }
285 
ERI_Mesh_Array_glx(const ERI_t * solver)286 const double* ERI_Mesh_Array_glx(const ERI_t *solver)
287 {
288   return solver->glq_x;
289 }
290 
ERI_Mesh_Array_glw(const ERI_t * solver)291 const double* ERI_Mesh_Array_glw(const ERI_t *solver)
292 {
293   return solver->glq_w;
294 }
295 
ERI_ngrid(const ERI_t * solver)296 int ERI_ngrid(const ERI_t *solver)
297 {
298   return ERI_SBT_ngrid(solver->sbt);
299 }
300 
ERI_Mesh_r(const ERI_t * solver,int i)301 double ERI_Mesh_r(const ERI_t *solver, int i)
302 {
303   return ERI_SBT_Mesh_r(solver->sbt, i);
304 }
305 
306 
ERI_Mesh_k(const ERI_t * solver,int i)307 double ERI_Mesh_k(const ERI_t *solver, int i)
308 {
309   return ERI_SBT_Mesh_k(solver->sbt, i);
310 }
311 
312 
ERI_Mesh_Array_r(const ERI_t * solver)313 const double* ERI_Mesh_Array_r(const ERI_t *solver)
314 {
315   return ERI_SBT_Mesh_Array_r(solver->sbt);
316 }
317 
318 
ERI_Mesh_Array_k(const ERI_t * solver)319 const double* ERI_Mesh_Array_k(const ERI_t *solver)
320 {
321   return ERI_SBT_Mesh_Array_k(solver->sbt);
322 }
323 
324 
ERI_Mesh_dr(const ERI_t * solver,int i)325 double ERI_Mesh_dr(const ERI_t *solver, int i)
326 {
327   return ERI_SBT_Mesh_dr(solver->sbt, i);
328 }
329 
330 
ERI_Mesh_dk(const ERI_t * solver,int i)331 double ERI_Mesh_dk(const ERI_t *solver, int i)
332 {
333   return ERI_SBT_Mesh_dk(solver->sbt, i);
334 }
335 
336 
337 
338 /*----------------------------------------------------------------------
339   SIZE Calculators
340 ----------------------------------------------------------------------*/
ERI_Size_of_Orbital(const ERI_t * solver)341 size_t ERI_Size_of_Orbital(const ERI_t *solver)
342 {
343   int ngrid = ERI_ngrid(solver);
344   return sizeof(double)*ngrid;
345 }
346 
347 
ERI_Size_of_Gamma(const ERI_t * solver)348 size_t ERI_Size_of_Gamma(const ERI_t *solver)
349 {
350   int ngrid = ERI_ngrid(solver);
351   int lmax  = ERI_lmax(solver);
352   int jmax  = lmax*lmax;
353 
354 #if SUMSKIP
355   return sizeof(double)*(ngrid*jmax + jmax);
356 #else
357   return sizeof(double)*(ngrid*jmax);
358 #endif
359 }
360 
361 
ERI_Size_of_Alpha(const ERI_t * solver)362 size_t ERI_Size_of_Alpha(const ERI_t *solver)
363 {
364   int ndalp;
365   int ngrid = ERI_ngrid(solver);
366   int lmax  = ERI_lmax(solver);
367   int jmax  = lmax*lmax;
368 
369   switch (solver->rcsh) {
370   case ERI_SH_REAL:    ndalp = ngrid*jmax; break;
371   case ERI_SH_COMPLEX: ndalp = ngrid*jmax*2; break;
372   }
373 
374 #if SUMSKIP
375   return sizeof(double)*(ndalp + jmax);
376 #else
377   return sizeof(double)*(ndalp);
378 #endif
379 }
380 
381 
ERI_Size_of_Overlap(const ERI_t * solver)382 size_t ERI_Size_of_Overlap(const ERI_t *solver)
383 {
384   int ndp;
385   int ngrid = ERI_ngrid(solver);
386   int lmax0 = ERI_lmax_gl(solver);
387   int jmax0 = lmax0*lmax0;
388 
389   switch (solver->rcsh) {
390   case ERI_SH_COMPLEX: ndp = 2*ngrid*jmax0; break;
391   case ERI_SH_REAL:    ndp = ngrid*jmax0; break;
392   }
393 
394 #if SUMSKIP
395   return sizeof(double)*(ndp + jmax0);
396 #else
397   return sizeof(double)*ndp;
398 #endif
399 }
400 
401 
ERI_Size_of_GLF(const ERI_t * solver)402 size_t ERI_Size_of_GLF(const ERI_t *solver)
403 {
404   int ndglf;
405   int ngl   = ERI_ngl(solver);
406   int lmax0 = ERI_lmax_gl(solver);
407   int jmax0 = lmax0*lmax0;
408 
409   switch (solver->rcsh) {
410   case ERI_SH_COMPLEX: ndglf = 2*ngl*jmax0; break;
411   case ERI_SH_REAL:    ndglf = ngl*jmax0; break;
412   }
413 
414 #if SUMSKIP
415   return sizeof(double)*(ndglf + jmax0);
416 #else
417   return sizeof(double)*ndglf;
418 #endif
419 }
420 
421 
422 
423 
424 /*----------------------------------------------------------------------
425   ERI_Orbital
426 
427   For a given orbital function, this performs sph. Bessel transform.
428 ----------------------------------------------------------------------*/
ERI_Transform_Orbital(ERI_t * solver,double * fk,const double * fr,const double * xr,int n,int l)429 void ERI_Transform_Orbital(
430   ERI_t        *solver,
431   double       *fk,     /* (OUT) [ngrid] transformed orbital */
432   const double *fr,     /* (IN)  [ngrid] radial orbital function */
433   const double *xr,     /* (IN)  [ngrid] radial grid for fr */
434   int           n,      /* (IN)  number of radial grid points */
435   int           l       /* (IN)  angular momentum */
436 )
437 {
438   int i, ngrid;
439   const double *rmesh;
440   ERI_CSpline_t *cs;
441 
442 #if WORKSPACE_AT_STACK
443   double in[ERI_NGRIDMAX*2], out[ERI_NGRIDMAX*2];
444 #else
445   double *in = solver->ws_in;
446   double *out = solver->ws_out;
447 #endif
448 
449   ngrid = ERI_ngrid(solver);
450   rmesh = ERI_Mesh_Array_r(solver);
451 
452   /* change radial grid */
453   cs = ERI_CSpline_Init(xr, fr, n);
454   for (i=0; i<ngrid; i++) {
455     in[2*i+0] = ERI_CSpline_Eval(rmesh[i], cs);
456     in[2*i+1] = 0.0;
457   }
458   ERI_CSpline_Free(cs);
459 
460   /* transform */
461   ERI_SBT_Transform(solver->sbt, out, in, l, ERI_SBT_FORWARD);
462 
463   for (i=0; i<ngrid; i++) {
464     fk[i] = out[2*i+0];
465     /* imag part out[2*i+1] is thrown away */
466   }
467 }
468 
469 
470 
471 /*----------------------------------------------------------------------
472   ERI_Gamma
473   g  : Gamma factor (out)
474   dg : Deribative of Gamma factor (out)
475   fk : Transformed wave function
476   R  : Displacement
477 ----------------------------------------------------------------------*/
ERI_LL_Gamma(ERI_t * solver,double * g,double * dg,const double * fk,double rd)478 void ERI_LL_Gamma(
479   ERI_t        *solver,
480   double       *g,
481   double       *dg,
482   const double *fk,
483   double        rd
484 )
485 {
486   int ngrid, lmax, jmax;
487   int l1, l2, ik, ig, igbase, j;
488   const double *kmesh;
489 
490 #if SUMSKIP
491   double *gmax  = NULL;
492   double *dgmax = NULL;
493 #endif
494 
495 #if WORKSPACE_AT_STACK
496   double sb[(ERI_LMAXMAX*2)*ERI_NGRIDMAX];
497   double dsb[(ERI_LMAXMAX*2)*ERI_NGRIDMAX];
498   double in[ERI_NGRIDMAX*2];
499   double out[ERI_NGRIDMAX*2];
500 #else
501   double *sb  = solver->ws_sb;
502   double *dsb = solver->ws_dsb;
503   double *in  = solver->ws_in;
504   double *out = solver->ws_out;
505 #endif
506 
507   ngrid = ERI_ngrid(solver);
508   lmax  = ERI_lmax(solver);
509   jmax  = lmax*lmax;
510   kmesh = ERI_Mesh_Array_k(solver);
511 
512 #if SUMSKIP
513   gmax  =  &g[ngrid*jmax];
514   for (j=0; j<jmax; j++) { gmax[j] = 0.0; }
515   if (dg) {
516     dgmax =  &dg[ngrid*jmax];
517     for (j=0; j<jmax; j++) { dgmax[j] = 0.0; }
518   }
519 #endif
520 
521   /*--- the spherical Bessel functions ---*/
522   for (ik=0; ik<ngrid; ik++) {
523     ERI_Spherical_Bessel(kmesh[ik]*rd, lmax, &sb[ik*lmax], &dsb[ik*lmax]);
524   }
525 
526   for (l2=0; l2<lmax; l2++) {
527     for (ik=0; ik<ngrid; ik++) {
528       in[2*ik+0] = fk[ik]*sb[ik*lmax+l2];
529       in[2*ik+1] = 0.0;
530     }
531     ERI_SBT_Transform_Input(solver->sbt, in, ERI_SBT_BACKWARD);
532     for (l1=0; l1<lmax; l1++) {
533       j = l1*lmax+l2;
534       igbase = j*ngrid;
535       ERI_SBT_Transform_Output(solver->sbt, out, l1);
536       for (ik=0; ik<ngrid; ik++) {
537         ig = igbase+ik;
538         g[ig] = out[2*ik+0]*2.0/PI;
539         /* imag part out[2*ik+1] is thworn away */
540 #if SUMSKIP
541         if (fabs(g[ig])>gmax[j]) { gmax[j] = fabs(g[ig]); }
542 #endif
543       } /* loop of ik */
544     } /* loop of l1 */
545   } /* loop of l2 */
546 
547   if (dg) {
548     /*--- deribative ---*/
549     for (l2=0; l2<lmax; l2++) {
550       for (ik=0; ik<ngrid; ik++) {
551         in[2*ik+0] = fk[ik]*dsb[ik*lmax+l2]*kmesh[ik];
552         in[2*ik+1] = 0.0;
553       }
554       ERI_SBT_Transform_Input(solver->sbt, in, ERI_SBT_BACKWARD);
555       for (l1=0; l1<lmax; l1++) {
556         j = l1*lmax+l2;
557         igbase = j*ngrid;
558         ERI_SBT_Transform_Output(solver->sbt, out, l1);
559         for (ik=0; ik<ngrid; ik++) {
560           ig = igbase+ik;
561           dg[ig] = out[2*ik+0]*2.0/PI;
562           /* imag part out[2*ik+1] is thrown away */
563 #if SUMSKIP
564           if (fabs(dg[ig])>dgmax[j]) { dgmax[j] = fabs(dg[ig]); }
565 #endif
566         } /* loop of ik */
567       } /* loop of l1 */
568     } /* loop of l2 */
569   } /* end if */
570 }
571 
572 
573 
574 
575 /*----------------------------------------------------------------------
576   Alpha term
577 
578   Alpha term.
579   If you need the ferivatives as well, you should call ERI_Alpha_d.
580 
581   This routine uses the complex spherical harmonic functions.
582 
583   alpha[l1][m2](r) = 4 pi sum[l2][m2] i^(l-l1+l2) G(l,m,l1,m1,l2,m2)
584                                         Y[l2][m2](anlge R) g[l1][l2](r)
585 ----------------------------------------------------------------------*/
ERI_LL_Alpha(ERI_t * solver,double * alp,const double * g,double theta,double phi,int l,int m)586 void ERI_LL_Alpha(
587   ERI_t        *solver,
588   double       *alp,   /* (OUT) Alpha term*/
589   const double *g,     /* (IN) Gamma term */
590   double        theta, /*      in the spherical coordinates. */
591   double        phi,
592   int           l,     /* (IN) Angular momentum of the orbital */
593   int           m
594 )
595 {
596   int ngrid, lmax, jmax;
597   int j, j1, j2, ir, l1, m1, l2, m2;
598   int ia, ia_base, ig, ig_base;
599   double sh[2], dsht[2], dshp[2], fpg, fpgsh[2];
600 
601   const int *j2l   = g_itbl_j2l;
602   const int *j2m   = g_itbl_j2m;
603 
604   int ndalp;
605 
606   int i, n, igtbl;
607   const int *gtbl_n;
608   const long *gtbl_j1j2;
609   const double *gtbl_gc;
610 
611 #if SUMSKIP
612   double *amax = NULL;
613   const double *gmax = NULL;
614 #endif
615 
616   ngrid = ERI_ngrid(solver);
617   lmax  = ERI_lmax(solver);
618   jmax  = lmax*lmax;
619 
620   gtbl_n    = ERI_Gtbl_n(solver->gtbl);
621   gtbl_j1j2 = ERI_Gtbl_j1j2(solver->gtbl);
622   gtbl_gc   = ERI_Gtbl_gc(solver->gtbl);
623 
624   STEPTRACE("ERI_Alpha: in ");
625 
626   switch (solver->rcsh) {
627   case ERI_SH_REAL:    ndalp = ngrid*jmax; break;
628   case ERI_SH_COMPLEX: ndalp = ngrid*jmax*2; break;
629   }
630 
631 #if SUMSKIP
632   amax  = &alp[ndalp];
633   gmax  = &g[ngrid*jmax];
634   for (j1=0; j1<jmax; j1++) { amax[j1] = 0.0; }
635 #endif
636 
637   j = l*(l+1)+m;
638 
639   /* zero reset */
640   for (ia=0; ia<ndalp; ia++) { alp[ia] = 0.0; }
641 
642   igtbl = ERI_Gtbl_index(solver->gtbl, j);
643   n = gtbl_n[j];
644 
645   switch (solver->rcsh) {
646   case ERI_SH_COMPLEX:
647     for (i=0; i<n; i++) {
648       fpg = 4.0*PI*gtbl_gc[igtbl];
649       j1 = ERI_GTBL_UNPACK_J1( gtbl_j1j2[igtbl] );
650       j2 = ERI_GTBL_UNPACK_J2( gtbl_j1j2[igtbl] );
651       igtbl++;
652 
653       l1 = j2l[j1];
654       m1 = j2m[j1];
655       l2 = j2l[j2];
656       m2 = j2m[j2];
657 #if SUMSKIP
658       if (gmax[l1*lmax+l2]>SUMSKIP_THRESHOLD)
659 #endif
660       {
661         ERI_Spherical_Harmonics(l2, m2, theta, phi, sh, dsht, dshp);
662         fpgsh[0] = fpg*sh[0];
663         fpgsh[1] = fpg*sh[1];
664         phase(l-l1+l2, fpgsh);
665 
666         ia_base = j1*ngrid;
667         ig_base = (l1*lmax+l2)*ngrid;
668         for (ir=0; ir<ngrid; ir++) {
669           ia = 2*(ia_base+ir);
670           ig = ig_base+ir;
671           alp[ia+0] += fpgsh[0]*g[ig];
672           alp[ia+1] += fpgsh[1]*g[ig];
673         }
674       }
675     } /* loop of i */
676 #if SUMSKIP
677     for (j1=0; j1<jmax; j1++) {
678       ia_base = j1*ngrid;
679       for (ir=0; ir<ngrid; ir++) {
680         ia = 2*(ia_base+ir);
681         if (fabs(alp[ia+0])>amax[j1]) { amax[j1] = fabs(alp[ia+0]); }
682         if (fabs(alp[ia+1])>amax[j1]) { amax[j1] = fabs(alp[ia+1]); }
683       }
684     }
685 #endif
686     break;
687 
688   case ERI_SH_REAL:
689     for (i=0; i<n; i++) {
690       fpg = 4.0*PI*gtbl_gc[igtbl];
691       j1 = ERI_GTBL_UNPACK_J1( gtbl_j1j2[igtbl] );
692       j2 = ERI_GTBL_UNPACK_J2( gtbl_j1j2[igtbl] );
693       igtbl++;
694 
695       l1 = j2l[j1];
696       m1 = j2m[j1];
697       l2 = j2l[j2];
698       m2 = j2m[j2];
699 #if SUMSKIP
700       if (gmax[l1*lmax+l2]>SUMSKIP_THRESHOLD)
701 #endif
702       {
703         ERI_Real_Spherical_Harmonics(l2, m2, theta, phi, sh, dsht, dshp);
704         fpgsh[0] = fpg*sh[0];
705         fpgsh[1] = 0.0;
706         phase(l-l1+l2, fpgsh);
707 
708         ia_base = j1*ngrid;
709         ig_base = (l1*lmax+l2)*ngrid;
710         for (ir=0; ir<ngrid; ir++) {
711           ia = ia_base+ir;
712           ig = ig_base+ir;
713           alp[ia] += fpgsh[0]*g[ig];
714         }
715       }
716     } /* loop of i */
717 #if SUMSKIP
718     for (j1=0; j1<jmax; j1++) {
719       ia_base = j1*ngrid;
720       for (ir=0; ir<ngrid; ir++) {
721         ia = ia_base+ir;
722         if (fabs(alp[ia])>amax[j1]) { amax[j1] = fabs(alp[ia]); }
723       }
724     }
725 #endif
726     break;
727   }
728 
729   STEPTRACE("ERI_Alpha: out");
730 }
731 
732 
733 
734 /*----------------------------------------------------------------------
735   ERI_Alpha_d
736 
737   This calculates the Alpha term and its derivatives.
738   If you don't need the derivatives, you may call ERI_Alpha which is
739   faster than this routine.
740 ----------------------------------------------------------------------*/
ERI_LL_Alpha_d(ERI_t * solver,double * alp,double * dalp[3],const double * g,const double * dg,double rd,double theta,double phi,int l,int m)741 void ERI_LL_Alpha_d(
742   ERI_t        *solver,
743   double       *alp,     /* (OUT) Alpha term */
744   double       *dalp[3], /* (OUT) Derivatives of Alpha term */
745   const double *g,       /* (IN)  Gamma term */
746   const double *dg,      /* (IN)  Derivatives of Gamma term */
747   double        rd,      /* (IN)  Displacement from the expansion center */
748   double        theta,   /*       in the spherical coordinates. */
749   double        phi,
750   int           l,       /* (IN)  Angular mementum of the orbital */
751   int           m
752 )
753 {
754   int ngrid, lmax, jmax;
755   int i, j, ir, j1, j2, l1, m1, l2, m2;
756   int ia, ia_base, ig, ig_base;
757 
758   double sh[2], dsht[2], dshp[2], fpg, fpgsh[2], fpgdsht[2], fpgdshp[2];
759   double dgsh[2], gdsht[2], gdshp[2];
760 
761   double ct = cos(theta);
762   double st = sin(theta);
763   double cp = cos(phi);
764   double sp = sin(phi);
765   double x1, y1, z1, x2, y2, z2, x3, y3;
766 
767   const int *j2l   = g_itbl_j2l;
768   const int *j2m   = g_itbl_j2m;
769 
770   int ndalp;
771 
772   int n, igtbl;
773   const int* gtbl_n;
774   const long* gtbl_j1j2;
775   const double* gtbl_gc;
776 
777 #if SUMSKIP
778   double *amax, *damax[3];
779   const double *gmax, *dgmax;
780 #endif
781 
782   const double rd_thresh = 1e-10;
783 
784   ngrid = ERI_ngrid(solver);
785   lmax  = ERI_lmax(solver);
786   jmax  = lmax*lmax;
787 
788   gtbl_n    = ERI_Gtbl_n(solver->gtbl);
789   gtbl_j1j2 = ERI_Gtbl_j1j2(solver->gtbl);
790   gtbl_gc   = ERI_Gtbl_gc(solver->gtbl);
791 
792   switch (solver->rcsh) {
793   case ERI_SH_REAL:    ndalp = ngrid*jmax; break;
794   case ERI_SH_COMPLEX: ndalp = ngrid*jmax*2; break;
795   }
796 
797 #if SUMSKIP
798   amax     = &alp[ndalp];
799   damax[0] = &dalp[0][ndalp];
800   damax[1] = &dalp[1][ndalp];
801   damax[2] = &dalp[2][ndalp];
802   gmax     = &g[ngrid*jmax];
803   dgmax    = &dg[ngrid*jmax];
804   for (j1=0; j1<jmax; j1++) {
805     amax[j1] = 0.0;
806     damax[0][j1] = 0.0;
807     damax[1][j1] = 0.0;
808     damax[2][j1] = 0.0;
809   }
810 #endif
811 
812   /* zero reset */
813   for (ia=0; ia<ndalp; ia++) {
814     alp[ia] = 0.0;
815     dalp[0][ia] = 0.0;
816     dalp[1][ia] = 0.0;
817     dalp[2][ia] = 0.0;
818   }
819 
820   if (fabs(rd)<rd_thresh) {
821     /* The both orbitals are on a same site,
822        The derivatives are not to be calculated
823        but just set to zero. */
824     ERI_LL_Alpha(solver, alp, g, theta, phi, l, m);
825   } else {
826     /* The orbitals are on different sites.
827        The derivatives are calculated as well. */
828 
829     j = l*(l+1)+m;
830 
831     x1 = st*cp;     y1 = st*sp;    z1 = ct;
832     x2 = ct*cp/rd;  y2 = ct*sp/rd; z2 = -st/rd;
833     x3 = -sp/rd/st; y3 = cp/rd/st;
834 
835     igtbl = ERI_Gtbl_index(solver->gtbl, j);
836     n = gtbl_n[j];
837 
838     switch (solver->rcsh) {
839     case ERI_SH_COMPLEX:
840       for (i=0; i<n; i++) {
841         fpg = 4.0*PI*gtbl_gc[igtbl];
842         j1  = ERI_GTBL_UNPACK_J1( gtbl_j1j2[igtbl] );
843         j2  = ERI_GTBL_UNPACK_J2( gtbl_j1j2[igtbl] );
844         igtbl++;
845 
846         l1 = j2l[j1];
847         m1 = j2m[j1];
848         l2 = j2l[j2];
849         m2 = j2m[j2];
850 
851         ERI_Spherical_Harmonics(l2, m2, theta, phi, sh, dsht, dshp);
852         fpgsh[0]   = fpg*sh[0];
853         fpgsh[1]   = fpg*sh[1];
854         fpgdsht[0] = fpg*dsht[0];
855         fpgdsht[1] = fpg*dsht[1];
856         fpgdshp[0] = fpg*dshp[0];
857         fpgdshp[1] = fpg*dshp[1];
858         phase(l-l1+l2, fpgsh);
859         phase(l-l1+l2, fpgdsht);
860         phase(l-l1+l2, fpgdshp);
861 
862         ia_base  = j1*ngrid;
863         ig_base  = (l1*lmax+l2)*ngrid;
864 #if SUMSKIP
865         if (gmax[l1*lmax+l2]>SUMSKIP_THRESHOLD)
866 #endif
867         {
868           for (ir=0; ir<ngrid; ir++) {
869             ia = 2*(ia_base+ir);
870             ig = ig_base+ir;
871             alp[ia+0] += fpgsh[0]*g[ig];
872             alp[ia+1] += fpgsh[1]*g[ig];
873             /* derivatives */
874             gdsht[0] = g[ig]*fpgdsht[0];
875             gdsht[1] = g[ig]*fpgdsht[1];
876             gdshp[0] = g[ig]*fpgdshp[0];
877             gdshp[1] = g[ig]*fpgdshp[1];
878 
879             dalp[0][ia+0] += x2*gdsht[0] + x3*gdshp[0];
880             dalp[0][ia+1] += x2*gdsht[1] + x3*gdshp[1];
881             dalp[1][ia+0] += y2*gdsht[0] + y3*gdshp[0];
882             dalp[1][ia+1] += y2*gdsht[1] + y3*gdshp[1];
883             dalp[2][ia+0] += z2*gdsht[0];
884             dalp[2][ia+1] += z2*gdsht[1];
885           } /* --- loop of ir --- */
886         }
887 #if SUMSKIP
888         if (dgmax[l1*lmax+l2]>SUMSKIP_THRESHOLD)
889 #endif
890         {
891           for (ir=0; ir<ngrid; ir++) {
892             /* derivatives */
893             ia = 2*(ia_base+ir);
894             ig = ig_base+ir;
895             dgsh[0] = dg[ig]*fpgsh[0];
896             dgsh[1] = dg[ig]*fpgsh[1];
897             dalp[0][ia+0] += x1*dgsh[0];
898             dalp[0][ia+1] += x1*dgsh[1];
899             dalp[1][ia+0] += y1*dgsh[0];
900             dalp[1][ia+1] += y1*dgsh[1];
901             dalp[2][ia+0] += z1*dgsh[0];
902             dalp[2][ia+1] += z1*dgsh[1];
903           } /* --- loop of ir --- */
904         }
905       } /* --- loop of i ---*/
906 #if SUMSKIP
907       for (j1=0; j1<jmax; j1++) {
908         ia_base  = j1*ngrid;
909         for (ir=0; ir<ngrid; ir++) {
910           ia = 2*(ia_base+ir);
911           if (fabs(alp[ia+0])>amax[j1]) { amax[j1] = fabs(alp[ia+0]); }
912           if (fabs(alp[ia+1])>amax[j1]) { amax[j1] = fabs(alp[ia+1]); }
913           if (fabs(dalp[0][ia+0])>damax[0][j1]) {
914             damax[0][j1] = fabs(dalp[0][ia+0]);
915           }
916           if (fabs(dalp[0][ia+1])>damax[0][j1]) {
917             damax[0][j1] = fabs(dalp[0][ia+1]);
918           }
919           if (fabs(dalp[1][ia+0])>damax[1][j1]) {
920             damax[1][j1] = fabs(dalp[1][ia+0]);
921           }
922           if (fabs(dalp[1][ia+1])>damax[1][j1]) {
923             damax[1][j1] = fabs(dalp[1][ia+1]);
924           }
925           if (fabs(dalp[2][ia+0])>damax[2][j1]) {
926             damax[2][j1] = fabs(dalp[2][ia+0]);
927           }
928           if (fabs(dalp[2][ia+1])>damax[2][j1]) {
929             damax[2][j1] = fabs(dalp[2][ia+1]);
930           }
931         } /* loop of ir */
932       } /* loop of j1 */
933 #endif
934       break;
935 
936     case ERI_SH_REAL:
937       for (i=0; i<n; i++) {
938         fpg = 4.0*PI*gtbl_gc[igtbl];
939         j1  = ERI_GTBL_UNPACK_J1( gtbl_j1j2[igtbl] );
940         j2  = ERI_GTBL_UNPACK_J2( gtbl_j1j2[igtbl] );
941         igtbl++;
942 
943         l1 = j2l[j1];
944         m1 = j2m[j1];
945         l2 = j2l[j2];
946         m2 = j2m[j2];
947 
948         ERI_Real_Spherical_Harmonics(l2, m2, theta, phi, sh, dsht, dshp);
949         fpgsh[0]   = fpg*sh[0];
950         fpgsh[1]   = 0.0;
951         fpgdsht[0] = fpg*dsht[0];
952         fpgdsht[1] = 0.0;
953         fpgdshp[0] = fpg*dshp[0];
954         fpgdshp[1] = 0.0;
955         phase(l-l1+l2, fpgsh);
956         phase(l-l1+l2, fpgdsht);
957         phase(l-l1+l2, fpgdshp);
958 
959         ia_base  = j1*ngrid;
960         ig_base  = (l1*lmax+l2)*ngrid;
961 #if SUMSKIP
962         if (gmax[l1*lmax+l2]>SUMSKIP_THRESHOLD)
963 #endif
964         {
965           for (ir=0; ir<ngrid; ir++) {
966             ia = ia_base+ir;
967             ig = ig_base+ir;
968             alp[ia] += fpgsh[0]*g[ig];
969             /* derivatives */
970             gdsht[0] = g[ig]*fpgdsht[0];
971             gdshp[0] = g[ig]*fpgdshp[0];
972 
973             dalp[0][ia] += x2*gdsht[0] + x3*gdshp[0];
974             dalp[1][ia] += y2*gdsht[0] + y3*gdshp[0];
975             dalp[2][ia] += z2*gdsht[0];
976           } /* --- loop of ir --- */
977         }
978 #if SUMSKIP
979         if (dgmax[l1*lmax+l2]>SUMSKIP_THRESHOLD)
980 #endif
981         {
982           for (ir=0; ir<ngrid; ir++) {
983             /* derivatives */
984             ia = ia_base+ir;
985             ig = ig_base+ir;
986             dgsh[0] = dg[ig]*fpgsh[0];
987             dalp[0][ia] += x1*dgsh[0];
988             dalp[1][ia] += y1*dgsh[0];
989             dalp[2][ia] += z1*dgsh[0];
990           } /* --- loop of ir --- */
991         }
992       } /* --- loop of i ---*/
993 #if SUMSKIP
994       for (j1=0; j1<jmax; j1++) {
995         ia_base  = j1*ngrid;
996         for (ir=0; ir<ngrid; ir++) {
997           ia = ia_base+ir;
998           if (fabs(alp[ia])>amax[j1]) { amax[j1] = fabs(alp[ia]); }
999           if (fabs(dalp[0][ia])>damax[0][j1]) { damax[0][j1] = fabs(dalp[0][ia]); }
1000           if (fabs(dalp[1][ia])>damax[1][j1]) { damax[1][j1] = fabs(dalp[1][ia]); }
1001           if (fabs(dalp[2][ia])>damax[2][j1]) { damax[2][j1] = fabs(dalp[2][ia]); }
1002         } /* loop of ir */
1003       } /* loop of j1 */
1004 #endif
1005       break;
1006     }
1007   }
1008 }
1009 
1010 
1011 
1012 
1013 
1014 
1015 /*----------------------------------------------------------------------
1016   ERI_LL_Overlap
1017 
1018   Product of two orbital
1019 ----------------------------------------------------------------------*/
ERI_LL_Overlap(ERI_t * solver,double * p,const double * a1,const double * a2)1020 void ERI_LL_Overlap(
1021   ERI_t        *solver,
1022   double       *p,   /* (OUT) P term */
1023   const double *a1,  /* (IN)  Alpha term of the orbtial #1 */
1024   const double *a2   /* (IN)  Alpha term of the orbital #2 */
1025 )
1026 {
1027   int ngrid, lmax, jmax, lmax0, jmax0;
1028   int j, j1, j2, ir, ia1, ia2, ip;
1029   int ia1_base, ia2_base, ip_base;
1030   double gnt, a1a2[2];
1031 
1032   const int *j2l   = g_itbl_j2l;
1033   const int *j2m   = g_itbl_j2m;
1034 
1035   int ndp, ndalp;
1036 
1037   int i, n, igtbl;
1038   const int* gtbl_n;
1039   const long* gtbl_j1j2;
1040   const double* gtbl_gc;
1041 
1042 #if SUMSKIP
1043   const double *amax1, *amax2;
1044   double *pmax;
1045 #endif
1046 
1047   ngrid = ERI_ngrid(solver);
1048   lmax  = ERI_lmax(solver);
1049   jmax  = lmax*lmax;
1050   lmax0 = ERI_lmax_gl(solver);
1051   jmax0 = lmax0*lmax0;
1052 
1053   gtbl_n    = ERI_Gtbl_n(solver->gtbl);
1054   gtbl_j1j2 = ERI_Gtbl_j1j2(solver->gtbl);
1055   gtbl_gc   = ERI_Gtbl_gc(solver->gtbl);
1056 
1057   STEPTRACE("ERI_LL_Overlap: in\n");
1058 
1059   switch (solver->rcsh) {
1060   case ERI_SH_COMPLEX:
1061     ndalp = 2*ngrid*jmax;
1062     ndp   = 2*ngrid*jmax0;
1063     break;
1064   case ERI_SH_REAL:
1065     ndalp = ngrid*jmax;
1066     ndp   = ngrid*jmax0;
1067     break;
1068   }
1069 
1070 #if SUMSKIP
1071   amax1 = &a1[ndalp];
1072   amax2 = &a2[ndalp];
1073   pmax  = &p[ndp];
1074 #endif
1075 
1076   /* zero rest */
1077   for (j=0; j<ndp; j++) { p[j] = 0.0; }
1078 #if SUMSKIP
1079   for (j=0; j<jmax0; j++) { pmax[j] = 0.0; }
1080 #endif
1081 
1082   switch (solver->rcsh) {
1083   case ERI_SH_COMPLEX:
1084     igtbl=0;
1085     for (j=0; j<jmax0; j++) {
1086       n = gtbl_n[j];
1087 
1088       ip_base = j*ngrid;
1089       for (i=0; i<n; i++) {
1090         gnt = gtbl_gc[igtbl];
1091         j1 = ERI_GTBL_UNPACK_J1( gtbl_j1j2[igtbl] );
1092         j2 = ERI_GTBL_UNPACK_J2( gtbl_j1j2[igtbl] );
1093         igtbl++;
1094 
1095         ia1_base = j1*ngrid;
1096         ia2_base = j2*ngrid;
1097 #if SUMSKIP
1098         if (amax1[j1]*amax2[j2]>SUMSKIP_THRESHOLD)
1099 #endif
1100         {
1101           for (ir=0; ir<ngrid; ir++) {
1102             ia1 = 2*(ia1_base+ir);
1103             ia2 = 2*(ia2_base+ir);
1104             ip  = 2*(ip_base+ir);
1105             a1a2[0] = a1[ia1+0]*a2[ia2+0] - a1[ia1+1]*a2[ia2+1];
1106             a1a2[1] = a1[ia1+0]*a2[ia2+1] + a1[ia1+1]*a2[ia2+0];
1107             p[ip+0] += gnt*a1a2[0];
1108             p[ip+1] += gnt*a1a2[1];
1109           } /*--- loop of ir ---*/
1110         }
1111       } /*--- loop of i ---*/
1112 #if SUMSKIP
1113       for (ir=0; ir<ngrid; ir++) {
1114         ip = 2*(ip_base+ir);
1115         if (fabs(p[ip+0])>pmax[j]) { pmax[j] = fabs(p[ip+0]); }
1116         if (fabs(p[ip+1])>pmax[j]) { pmax[j] = fabs(p[ip+1]); }
1117       }
1118 #endif
1119     } /*--- loop of j ---*/
1120     break;
1121 
1122   case ERI_SH_REAL:
1123     igtbl=0;
1124     for (j=0; j<jmax0; j++) {
1125       n = gtbl_n[j];
1126 
1127       ip_base = j*ngrid;
1128       for (i=0; i<n; i++) {
1129         gnt = gtbl_gc[igtbl];
1130         j1 = ERI_GTBL_UNPACK_J1( gtbl_j1j2[igtbl] );
1131         j2 = ERI_GTBL_UNPACK_J2( gtbl_j1j2[igtbl] );
1132         igtbl++;
1133 
1134         ia1_base = j1*ngrid;
1135         ia2_base = j2*ngrid;
1136 #if SUMSKIP
1137         if (amax1[j1]*amax2[j2]>SUMSKIP_THRESHOLD)
1138 #endif
1139         {
1140           for (ir=0; ir<ngrid; ir++) {
1141             ia1 = ia1_base+ir;
1142             ia2 = ia2_base+ir;
1143             ip  = ip_base+ir;
1144             a1a2[0] = a1[ia1]*a2[ia2];
1145             p[ip] += gnt*a1a2[0];
1146             /* imag part should be zero */
1147           } /*--- loop of ir ---*/
1148         }
1149       } /*--- loop of i ---*/
1150 #if SUMSKIP
1151       for (ir=0; ir<ngrid; ir++) {
1152         ip = ip_base+ir;
1153         if (fabs(p[ip])>pmax[j]) { pmax[j] = fabs(p[ip]); }
1154       }
1155 #endif
1156     } /*--- loop of j ---*/
1157     break;
1158   }
1159 
1160   STEPTRACE("ERI_Overlap: out\n");
1161 }
1162 
1163 
1164 
1165 /*--------------------------------------------------------------------*/
ERI_LL_Overlap_d(ERI_t * solver,double * p,double * dp[3],const double * a1,const double * da1[3],const double * a2,const double * da2[3],double x)1166 void ERI_LL_Overlap_d(
1167   ERI_t        *solver,
1168   double       *p,      /* (OUT) overlap function */
1169   double       *dp[3],  /* (OUT) derivatives */
1170   const double *a1,     /* (IN) alpha for orb 1 */
1171   const double *da1[3], /* (IN) derivatives */
1172   const double *a2,     /* (IN) alpha for orb 2 */
1173   const double *da2[3], /* (IN) derivatives */
1174   double        x       /* (IN) */
1175 )
1176 {
1177   int ngrid, lmax, jmax, lmax0, jmax0;
1178   int i, j, j1, j2, ir,  ia1, ia2, ip, ixyz, l, m;
1179   int ia1_base, ia2_base, ip_base;
1180   double gnt, a1a2[2], da1a2[3][2], a1da2[3][2];
1181 
1182   const int *j2l = g_itbl_j2l;
1183   const int *j2m = g_itbl_j2m;
1184 
1185   int ndp, ndalp;
1186 
1187   int n, igtbl;
1188   const int* gtbl_n;
1189   const long* gtbl_j1j2;
1190   const double* gtbl_gc;
1191 
1192 #if SUMSKIP
1193   double *pmax, *dpmax[3];
1194   const double *amax1, *damax1[3], *amax2, *damax2[3];
1195 #endif
1196 
1197   ngrid = ERI_ngrid(solver);
1198   lmax  = ERI_lmax(solver);
1199   jmax  = lmax*lmax;
1200   lmax0 = ERI_lmax_gl(solver);
1201   jmax0 = lmax0*lmax0;
1202 
1203   gtbl_n    = ERI_Gtbl_n(solver->gtbl);
1204   gtbl_j1j2 = ERI_Gtbl_j1j2(solver->gtbl);
1205   gtbl_gc   = ERI_Gtbl_gc(solver->gtbl);
1206 
1207   switch (solver->rcsh) {
1208   case ERI_SH_COMPLEX:
1209     ndalp = 2*ngrid*jmax;
1210     ndp   = 2*ngrid*jmax0;
1211     break;
1212   case ERI_SH_REAL:
1213     ndalp = ngrid*jmax;
1214     ndp   = ngrid*jmax0;
1215     break;
1216   }
1217 
1218 #if SUMSKIP
1219   pmax      = &p[ndp];
1220   amax1     = &a1[ndalp];
1221   amax2     = &a2[ndalp];
1222   for (i=0; i<3; i++) {
1223     dpmax[i]  = &(dp[i][ndp]);
1224     damax1[i] = &(da1[i][ndalp]);
1225     damax2[i] = &(da2[i][ndalp]);
1226   }
1227 #endif
1228   STEPTRACE("MCP_P_d: begin\n");
1229 
1230   /* zero reset */
1231   for (j=0; j<ndp; j++) {
1232     p[j] = 0.0;
1233     dp[0][j] = 0.0;
1234     dp[1][j] = 0.0;
1235     dp[2][j] = 0.0;
1236   }
1237 #if SUMSKIP
1238   for (j=0; j<jmax0; j++) {
1239     pmax[j]  = 0.0;
1240     dpmax[0][j]  = 0.0;
1241     dpmax[1][j]  = 0.0;
1242     dpmax[2][j]  = 0.0;
1243   }
1244 #endif
1245   STEPTRACE("MCP_P_d: step 1\n");
1246 
1247   switch (solver->rcsh) {
1248   case ERI_SH_COMPLEX:
1249     igtbl=0;
1250     for (j=0; j<jmax0; j++) {
1251       l = j2l[j];
1252       m = j2m[j];
1253       ip_base = j*ngrid;
1254       n = gtbl_n[j];
1255       for (i=0; i<n; i++) {
1256         gnt = gtbl_gc[igtbl];
1257         j1  = ERI_GTBL_UNPACK_J1( gtbl_j1j2[igtbl] );
1258         j2  = ERI_GTBL_UNPACK_J2( gtbl_j1j2[igtbl] );
1259         igtbl++;
1260 
1261         {
1262           ia1_base = j1*ngrid;
1263           ia2_base = j2*ngrid;
1264 #if SUMSKIP
1265           if (amax1[j1]*amax2[j2]>SUMSKIP_THRESHOLD) {
1266             for (ir=0; ir<ngrid; ir++) {
1267               ia1 = 2*(ia1_base+ir);
1268               ia2 = 2*(ia2_base+ir);
1269               ip  = 2*(ip_base+ir);
1270               a1a2[0] = a1[ia1+0]*a2[ia2+0] - a1[ia1+1]*a2[ia2+1];
1271               a1a2[1] = a1[ia1+0]*a2[ia2+1] + a1[ia1+1]*a2[ia2+0];
1272               p[ip+0] += gnt*a1a2[0];
1273               p[ip+1] += gnt*a1a2[1];
1274             } /* loop of ir */
1275           }
1276           for (ixyz=0; ixyz<3; ixyz++) {
1277             if (amax2[j2]*damax1[ixyz][j1]>SUMSKIP_THRESHOLD) {
1278               for (ir=0; ir<ngrid; ir++) {
1279                 ia1 = 2*(ia1_base+ir);
1280                 ia2 = 2*(ia2_base+ir);
1281                 ip  = 2*(ip_base+ir);
1282                 da1a2[ixyz][0] = da1[ixyz][ia1+0]*a2[ia2+0]
1283                                  -da1[ixyz][ia1+1]*a2[ia2+1];
1284                 da1a2[ixyz][1] = da1[ixyz][ia1+0]*a2[ia2+1]
1285                                  +da1[ixyz][ia1+1]*a2[ia2+0];
1286                 dp[ixyz][ip+0] += gnt*(1.0-x)*da1a2[ixyz][0];
1287                 dp[ixyz][ip+1] += gnt*(1.0-x)*da1a2[ixyz][1];
1288               } /* loop of ir */
1289             }
1290           } /* loop of ixyz */
1291 
1292           for (ixyz=0; ixyz<3; ixyz++) {
1293             if (amax1[j1]*damax2[ixyz][j2]>SUMSKIP_THRESHOLD) {
1294               for (ir=0; ir<ngrid; ir++) {
1295                 ia1 = 2*(ia1_base+ir);
1296                 ia2 = 2*(ia2_base+ir);
1297                 ip  = 2*(ip_base+ir);
1298                 a1da2[ixyz][0] = a1[ia1+0]*da2[ixyz][ia2+0]
1299                                  - a1[ia1+1]*da2[ixyz][ia2+1];
1300                 a1da2[ixyz][1] = a1[ia1+0]*da2[ixyz][ia2+1]
1301                                  + a1[ia1+1]*da2[ixyz][ia2+0];
1302                 dp[ixyz][ip+0] += -gnt*x*a1da2[ixyz][0];
1303                 dp[ixyz][ip+1] += -gnt*x*a1da2[ixyz][1];
1304               } /* loop of ir */
1305             }
1306           } /* loop of ixyz */
1307 #else /* SUMSKIP */
1308           for (ir=0; ir<ngrid; ir++) {
1309             ia1 = 2*(ia1_base+ir);
1310             ia2 = 2*(ia2_base+ir);
1311             ip  = 2*(ip_base+ir);
1312 
1313             a1a2[0] = a1[ia1+0]*a2[ia2+0]-a1[ia1+1]*a2[ia2+1];
1314             a1a2[1] = a1[ia1+0]*a2[ia2+1]+a1[ia1+1]*a2[ia2+0];
1315             p[ip+0] += gnt*a1a2[0];
1316             p[ip+1] += gnt*a1a2[1];
1317 
1318             da1a2[0][0] = da1[0][ia1+0]*a2[ia2+0]-da1[0][ia1+1]*a2[ia2+1];
1319             da1a2[0][1] = da1[0][ia1+0]*a2[ia2+1]+da1[0][ia1+1]*a2[ia2+0];
1320             da1a2[1][0] = da1[1][ia1+0]*a2[ia2+0]-da1[1][ia1+1]*a2[ia2+1];
1321             da1a2[1][1] = da1[1][ia1+0]*a2[ia2+1]+da1[1][ia1+1]*a2[ia2+0];
1322             da1a2[2][0] = da1[2][ia1+0]*a2[ia2+0]-da1[2][ia1+1]*a2[ia2+1];
1323             da1a2[2][1] = da1[2][ia1+0]*a2[ia2+1]+da1[2][ia1+1]*a2[ia2+0];
1324 
1325             a1da2[0][0] = a1[ia1+0]*da2[0][ia2+0]-a1[ia1+1]*da2[0][ia2+1];
1326             a1da2[0][1] = a1[ia1+0]*da2[0][ia2+1]+a1[ia1+1]*da2[0][ia2+0];
1327             a1da2[1][0] = a1[ia1+0]*da2[1][ia2+0]-a1[ia1+1]*da2[1][ia2+1];
1328             a1da2[1][1] = a1[ia1+0]*da2[1][ia2+1]+a1[ia1+1]*da2[1][ia2+0];
1329             a1da2[2][0] = a1[ia1+0]*da2[2][ia2+0]-a1[ia1+1]*da2[2][ia2+1];
1330             a1da2[2][1] = a1[ia1+0]*da2[2][ia2+1]+a1[ia1+1]*da2[2][ia2+0];
1331 
1332             dp[0][ip+0] += gnt*((1.0-x)*da1a2[0][0] - x*a1da2[0][0]);
1333             dp[0][ip+1] += gnt*((1.0-x)*da1a2[0][1] - x*a1da2[0][1]);
1334             dp[1][ip+0] += gnt*((1.0-x)*da1a2[1][0] - x*a1da2[1][0]);
1335             dp[1][ip+1] += gnt*((1.0-x)*da1a2[1][1] - x*a1da2[1][1]);
1336             dp[2][ip+0] += gnt*((1.0-x)*da1a2[2][0] - x*a1da2[2][0]);
1337             dp[2][ip+1] += gnt*((1.0-x)*da1a2[2][1] - x*a1da2[2][1]);
1338           } /*--- loop of ir ---*/
1339 #endif /* SUMSKIP */
1340         }
1341       } /*--- loop of i ---*/
1342 #if SUMSKIP
1343       for (ir=0; ir<ngrid; ir++) {
1344         ip = 2*(ip_base+ir);
1345         if (fabs(p[ip+0])>pmax[j]) { pmax[j] = fabs(p[ip+0]); }
1346         if (fabs(p[ip+1])>pmax[j]) { pmax[j] = fabs(p[ip+1]); }
1347         for (i=0; i<3; i++) {
1348           if (fabs(dp[i][ip+0])>dpmax[i][j]) {
1349             dpmax[i][j] = fabs(dp[i][ip+0]);
1350           }
1351           if (fabs(dp[i][ip+1])>dpmax[i][j]) {
1352             dpmax[i][j] = fabs(dp[i][ip+1]);
1353           }
1354         } /* loop of i */
1355       } /* loop of ir */
1356 #endif
1357     } /*--- loop of j ---*/
1358     break;
1359 
1360   case ERI_SH_REAL:
1361     igtbl=0;
1362     for (j=0; j<jmax0; j++) {
1363       l = j2l[j];
1364       m = j2m[j];
1365       ip_base = j*ngrid;
1366       n = gtbl_n[j];
1367       for (i=0; i<n; i++) {
1368         gnt = gtbl_gc[igtbl];
1369         j1  = ERI_GTBL_UNPACK_J1( gtbl_j1j2[igtbl] );
1370         j2  = ERI_GTBL_UNPACK_J2( gtbl_j1j2[igtbl] );
1371         igtbl++;
1372 
1373         {
1374           ia1_base = j1*ngrid;
1375           ia2_base = j2*ngrid;
1376 #if SUMSKIP
1377           if (amax1[j1]*amax2[j2]>SUMSKIP_THRESHOLD) {
1378             for (ir=0; ir<ngrid; ir++) {
1379               ia1 = ia1_base+ir;
1380               ia2 = ia2_base+ir;
1381               ip  = ip_base+ir;
1382               a1a2[0] = a1[ia1]*a2[ia2];
1383               p[ip] += gnt*a1a2[0];
1384               /* imag part should be zero */
1385             } /* loop of ir */
1386           }
1387           for (ixyz=0; ixyz<3; ixyz++) {
1388             if (amax2[j2]*damax1[ixyz][j1]>SUMSKIP_THRESHOLD) {
1389               for (ir=0; ir<ngrid; ir++) {
1390                 ia1 = ia1_base+ir;
1391                 ia2 = ia2_base+ir;
1392                 ip  = ip_base+ir;
1393                 da1a2[ixyz][0] = da1[ixyz][ia1]*a2[ia2];
1394                 dp[ixyz][ip] += gnt*(1.0-x)*da1a2[ixyz][0];
1395                 /* imag part should be zero */
1396               } /* loop of ir */
1397             }
1398           } /* loop of ixyz */
1399 
1400           for (ixyz=0; ixyz<3; ixyz++) {
1401             if (amax1[j1]*damax2[ixyz][j2]>SUMSKIP_THRESHOLD) {
1402               for (ir=0; ir<ngrid; ir++) {
1403                 ia1 = ia1_base+ir;
1404                 ia2 = ia2_base+ir;
1405                 ip  = ip_base+ir;
1406                 a1da2[ixyz][0] = a1[ia1]*da2[ixyz][ia2];
1407                 dp[ixyz][ip] += -gnt*x*a1da2[ixyz][0];
1408                 /* imag part should be zero */
1409               } /* loop of ir */
1410             }
1411           } /* loop of ixyz */
1412 #else /* SUMSKIP */
1413           for (ir=0; ir<ngrid; ir++) {
1414             ia1 = ia1_base+ir;
1415             ia2 = ia2_base+ir;
1416             ip  = ip_base+ir;
1417 
1418             a1a2[0] = a1[ia1]*a2[ia2];
1419             p[ip] += gnt*a1a2[0];
1420             /* imag part should be zero */
1421 
1422             da1a2[0][0] = da1[0][ia1]*a2[ia2];
1423             da1a2[1][0] = da1[1][ia1]*a2[ia2];
1424             da1a2[2][0] = da1[2][ia1]*a2[ia2];
1425 
1426             a1da2[0][0] = a1[ia1]*da2[0][ia2];
1427             a1da2[1][0] = a1[ia1]*da2[1][ia2];
1428             a1da2[2][0] = a1[ia1]*da2[2][ia2];
1429 
1430             dp[0][ip] += gnt*((1.0-x)*da1a2[0][0] - x*a1da2[0][0]);
1431             /* imag part should be zero */
1432             dp[1][ip] += gnt*((1.0-x)*da1a2[1][0] - x*a1da2[1][0]);
1433             /* imag part should be zero */
1434             dp[2][ip] += gnt*((1.0-x)*da1a2[2][0] - x*a1da2[2][0]);
1435             /* imag part should be zero */
1436           } /*--- loop of ir ---*/
1437 #endif /* SUMSKIP */
1438         }
1439       } /*--- loop of i ---*/
1440 #if SUMSKIP
1441       for (ir=0; ir<ngrid; ir++) {
1442         ip = ip_base+ir;
1443         if (fabs(p[ip])>pmax[j]) { pmax[j] = fabs(p[ip]); }
1444         for (i=0; i<3; i++) {
1445           if (fabs(dp[i][ip])>dpmax[i][j]) { dpmax[i][j] = fabs(dp[i][ip]); }
1446         } /* loop of i */
1447       } /* loop of ir */
1448 #endif
1449     } /*--- loop of j ---*/
1450   }
1451 }
1452 
1453 
1454 
1455 
ERI_Transform_Overlap(ERI_t * solver,double * F,const double * P)1456 void ERI_Transform_Overlap(
1457   ERI_t        *solver,
1458   double       *F,
1459   const double *P
1460 )
1461 {
1462   int ngrid, lmax0, jmax0;
1463   int j, ip, ip_base, l;
1464 
1465 #if WORKSPACE_AT_STACK
1466   double in[ERI_NGRIDMAX*2], out[ERI_NGRIDMAX*2];
1467 #else
1468   double *in  = solver->ws_in;
1469   double *out = solver->ws_out;
1470 #endif
1471 
1472   int ndp;
1473 
1474 #if SUMSKIP
1475   int ik;
1476   double *fmax;
1477   const double *pmax;
1478 #endif
1479 
1480   ngrid = ERI_ngrid(solver);
1481   lmax0 = ERI_lmax_gl(solver);
1482   jmax0 = lmax0*lmax0;
1483 
1484   STEPTRACE("ERI_Transform_Overlap: in");
1485 
1486   switch (solver->rcsh) {
1487   case ERI_SH_COMPLEX: ndp = 2*ngrid*jmax0; break;
1488   case ERI_SH_REAL:    ndp = ngrid*jmax0; break;
1489   }
1490 
1491 #if SUMSKIP
1492   pmax = &P[ndp];
1493   fmax = &F[ndp];
1494   for (j=0; j<jmax0; j++) { fmax[j] = 0.0; }
1495 #endif
1496 
1497   switch (solver->rcsh) {
1498   case ERI_SH_COMPLEX:
1499     for (j=0; j<jmax0; j++) {
1500 #if SUMSKIP
1501       if (pmax[j]>SUMSKIP_THRESHOLD)
1502 #endif
1503       {
1504         l=(int)sqrt((double)j);
1505         ip_base = j*ngrid;
1506         ERI_SBT_Transform(solver->sbt, &F[2*ip_base], &P[2*ip_base],
1507                           l, ERI_SBT_FORWARD);
1508 #if SUMSKIP
1509         for (ik=0; ik<ngrid; ik++) {
1510           ip = 2*(ip_base + ik);
1511           if (fabs(F[ip+0])>fmax[j]) { fmax[j] = fabs(F[ip+0]); }
1512           if (fabs(F[ip+1])>fmax[j]) { fmax[j] = fabs(F[ip+1]); }
1513         }
1514 #endif
1515       }
1516     } /* loop of j */
1517     break;
1518 
1519   case ERI_SH_REAL:
1520     for (j=0; j<jmax0; j++) {
1521 #if SUMSKIP
1522       if (pmax[j]>SUMSKIP_THRESHOLD)
1523 #endif
1524       {
1525         l=(int)sqrt((double)j);
1526         ip_base = j*ngrid;
1527         for (ik=0; ik<ngrid; ik++) {
1528           ip = ip_base+ik;
1529           in[2*ik+0] = P[ip];
1530           in[2*ik+1] = 0.0;
1531         }
1532         ERI_SBT_Transform(solver->sbt, out, in, l, ERI_SBT_FORWARD);
1533         for (ik=0; ik<ngrid; ik++) {
1534           ip = ip_base+ik;
1535           F[ip] = out[2*ik+0];
1536           /* imag part out[2*ir+1] is thrown away */
1537 #if SUMSKIP
1538           if (fabs(F[ip])>fmax[j]) { fmax[j] = fabs(F[ip]); }
1539 #endif
1540         }
1541       }
1542     } /* loop of j */
1543     break;
1544   }
1545 
1546   STEPTRACE("ERI_Transofrm_Overlap: out");
1547 }
1548 
1549 
1550 
1551 
1552 /*----------------------------------------------------------------------
1553   ERI_GL_Interpolate
1554 ----------------------------------------------------------------------*/
ERI_GL_Interpolate(ERI_t * solver,double * glF,const double * F)1555 void ERI_GL_Interpolate(
1556   ERI_t        *solver,
1557   double       *glF, /* (IN) Overlap matrix */
1558   const double *F    /* (IN) Overlap matrix */
1559 )
1560 {
1561   int ngrid, ngl, lmax0, jmax0;
1562   int j, ik, ig;
1563   double k, y[2];
1564 
1565   const double *kmesh, *glx;
1566 
1567 #if SUMSKIP
1568   int ndf, ndglf;
1569   double *glfmax;
1570   const double *fmax;
1571 #endif
1572 
1573   ERI_CSpline_t *cs;
1574 
1575   ngrid = ERI_ngrid(solver);
1576   ngl   = ERI_ngl(solver);
1577   lmax0 = ERI_lmax_gl(solver);
1578   jmax0 = lmax0*lmax0;
1579 
1580   kmesh = ERI_Mesh_Array_k(solver);
1581   glx   = ERI_Mesh_Array_glx(solver);
1582 
1583 #if SUMSKIP
1584   switch (solver->rcsh) {
1585   case ERI_SH_COMPLEX:
1586     ndglf = ngl*jmax0*2;
1587     ndf   = ngrid*jmax0*2;
1588     break;
1589   case ERI_SH_REAL:
1590     ndglf = ngl*jmax0;
1591     ndf   = ngrid*jmax0;
1592     break;
1593   }
1594   glfmax = &glF[ndglf];
1595   fmax   = &F[ndf];
1596   for (j=0; j<jmax0; j++) { glfmax[j] = 0.0; }
1597 #endif
1598 
1599   switch (solver->rcsh) {
1600   case ERI_SH_COMPLEX:
1601 
1602     for (j=0; j<jmax0; j++) {
1603 #if SUMSKIP
1604       if (fmax[j]>SUMSKIP_THRESHOLD)
1605 #endif
1606       {
1607         cs = ERI_CSpline_Init_Complex(kmesh,
1608           (const double*)&F[j*ngrid*2], ngrid);
1609         for (ik=0; ik<ngl; ik++) {
1610           k = glx[ik];
1611           ig = 2*(j*ngl+ik);
1612           ERI_CSpline_Eval_Complex(y, k, cs);
1613           glF[ig+0] = y[0];
1614           glF[ig+1] = y[1];
1615 #if SUMSKIP
1616           if (fabs(glF[ig+0])>glfmax[j]) { glfmax[j]=fabs(glF[ig+0]); }
1617           if (fabs(glF[ig+1])>glfmax[j]) { glfmax[j]=fabs(glF[ig+1]); }
1618 #endif
1619         } /* loop of ik */
1620         ERI_CSpline_Free(cs);
1621       }
1622     } /* loop of j */
1623     break;
1624 
1625   case ERI_SH_REAL:
1626     for (j=0; j<jmax0; j++) {
1627 #if SUMSKIP
1628       if (fmax[j]>SUMSKIP_THRESHOLD)
1629 #endif
1630       {
1631         cs = ERI_CSpline_Init(kmesh, (const double*)&F[j*ngrid], ngrid);
1632         for (ik=0; ik<ngl; ik++) {
1633           k = glx[ik];
1634           ig = j*ngl+ik;
1635           glF[ig] = ERI_CSpline_Eval(k, cs);
1636 #if SUMSKIP
1637           if (fabs(glF[ig])>glfmax[j]) { glfmax[j]=fabs(glF[ig]); }
1638 #endif
1639         } /* loop of ik */
1640         ERI_CSpline_Free(cs);
1641       }
1642     } /* loop of j */
1643     break;
1644   }
1645 
1646 }
1647 
1648 
1649 
1650 /*----------------------------------------------------------------------
1651   ERI_Integral_GL
1652 
1653   If you need the derivatives, you should call ERI_I4_d.
1654 ----------------------------------------------------------------------*/
ERI_Integral_GL(ERI_t * solver,double I4[2],const double * F1,const double * F2,double R,double theta,double phi,double omega,int lmax1)1655 void ERI_Integral_GL(
1656   ERI_t        *solver,
1657   double        I4[2], /* (OUT) */
1658   const double *F1,    /* (IN) Overlap matrix */
1659   const double *F2,    /* (IN) */
1660   double        R,     /* (IN) Displacement of two expansion centers */
1661   double        theta,
1662   double        phi,
1663   double        omega,  /* (IN) screening parameter */
1664   int           lmax1
1665 )
1666 {
1667   int ngl, lmax, lmax0, jmax0, jmax1;
1668   int i, j, j1, j2, ik, l, m, l1, l2;
1669   int gl_ibase1, gl_ibase2, igl1, igl2;
1670 
1671   double sh[2], dsht[2], dshp[2];
1672   double k, gnt, gnt_r[2], sum[2], FG[2], int1[2], sum2[2];
1673 
1674 #if WORKSPACE_AT_STACK
1675   double tmp_sb[(ERI_LMAXMAX*2)*ERI_NGRIDMAX];
1676   double tmp_dsb[(ERI_LMAXMAX*2)*ERI_NGRIDMAX];
1677 #else
1678   double *tmp_sb = solver->ws_sb;
1679   double *tmp_dsb = solver->ws_dsb;
1680 #endif
1681 
1682   const double *glx, *glw;
1683   double *glj;
1684 
1685   const int *j2l = g_itbl_j2l;
1686   const int *j2m = g_itbl_j2m;
1687 
1688   double scr, scr_a;
1689 
1690   int n, igtbl;
1691   const int* gtbl_n;
1692   const long* gtbl_j1j2;
1693   const double* gtbl_gc;
1694 
1695   int lphase;
1696 
1697 #if SUMSKIP
1698   int ndglf;
1699   const double *fmax1, *fmax2;
1700 #endif
1701 
1702   ngl   = ERI_ngl(solver);
1703   lmax  = ERI_lmax(solver);
1704   lmax0 = ERI_lmax_gl(solver);
1705   jmax0 = lmax0*lmax0;
1706   jmax1 = lmax1*lmax1;
1707 
1708   glx  = solver->glq_x;
1709   glw  = solver->glq_w;
1710   glj  = solver->glq_j;
1711 
1712   gtbl_n    = ERI_Gtbl_n(solver->gtbl);
1713   gtbl_j1j2 = ERI_Gtbl_j1j2(solver->gtbl);
1714   gtbl_gc   = ERI_Gtbl_gc(solver->gtbl);
1715 
1716 #if SUMSKIP
1717   switch (solver->rcsh) {
1718   case ERI_SH_COMPLEX: ndglf = 2*ngl*jmax0; break;
1719   case ERI_SH_REAL:    ndglf = ngl*jmax0; break;
1720   }
1721   fmax1 = &F1[ndglf];
1722   fmax2 = &F2[ndglf];
1723 #endif
1724 
1725   /* spherical Bessel function */
1726   if (omega<0.0) {
1727     /* no screening */
1728     for (ik=0; ik<ngl; ik++) {
1729       k = glx[ik];
1730       ERI_Spherical_Bessel(k*R, lmax, tmp_sb, tmp_dsb);
1731       for (l=0; l<lmax; l++) {
1732         i = l*ngl + ik;
1733         glj[i] = glw[ik]*exp(k)*tmp_sb[l]*32.0*PI;
1734       }
1735     }
1736   } else {
1737     /* screening */
1738     scr_a = 0.25/omega/omega;
1739     for (ik=0; ik<ngl; ik++) {
1740       k = glx[ik];
1741       ERI_Spherical_Bessel(k*R, lmax, tmp_sb, tmp_dsb);
1742       scr = 1.0-exp(-k*k*scr_a);
1743       for (l=0; l<lmax; l++) {
1744         i = l*ngl + ik;
1745         glj[i] = glw[ik]*exp(k)*tmp_sb[l]*32.0*PI*scr;
1746       }
1747     }
1748   }
1749 
1750   I4[0] = 0.0;
1751   I4[1] = 0.0;
1752 
1753   switch (solver->rcsh) {
1754   case ERI_SH_COMPLEX:
1755     igtbl=0;
1756     for (j=0; j<jmax1; j++) {
1757       l = j2l[j];
1758       m = j2m[j];
1759       ERI_Spherical_Harmonics(l, m, theta, phi, sh, dsht, dshp);
1760       int1[0] = 0.0;
1761       int1[1] = 0.0;
1762       n = gtbl_n[j];
1763       for (i=0; i<n; i++) {
1764         gnt = gtbl_gc[igtbl];
1765         j1 = ERI_GTBL_UNPACK_J1( gtbl_j1j2[igtbl] );
1766         j2 = ERI_GTBL_UNPACK_J2( gtbl_j1j2[igtbl] );
1767         igtbl++;
1768 
1769         if (j1<jmax1 && j2<jmax1) {
1770           sum2[0] = 0.0;
1771           sum2[1] = 0.0;
1772 #if SUMSKIP
1773           if (fmax1[j1]*fmax2[j2]>SUMSKIP_THRESHOLD)
1774 #endif
1775           {
1776             l1 = j2l[j1];
1777             l2 = j2l[j2];
1778             sum[0] = 0.0;
1779             sum[1] = 0.0;
1780             gl_ibase1 = j1*ngl;
1781             gl_ibase2 = j2*ngl;
1782             for (ik=0; ik<ngl; ik++) {
1783               igl1 = 2*(gl_ibase1+ik);
1784               igl2 = 2*(gl_ibase2+ik);
1785               FG[0] = F1[igl1+0]*F2[igl2+0] - F1[igl1+1]*F2[igl2+1];
1786               FG[1] = F1[igl1+0]*F2[igl2+1] + F1[igl1+1]*F2[igl2+0];
1787               sum[0] += glj[l*ngl+ik]*FG[0];
1788               sum[1] += glj[l*ngl+ik]*FG[1];
1789             }
1790             phase(l2+l-l1, sum);
1791             sum2[0] += sum[0];
1792             sum2[1] += sum[1];
1793           }
1794           int1[0] += sum2[0]*gnt;
1795           int1[1] += sum2[1]*gnt;
1796         }
1797       }
1798       I4[0] += int1[0]*sh[0] - int1[1]*sh[1];
1799       I4[1] += int1[1]*sh[0] + int1[0]*sh[1];
1800     } /* loop of j */
1801     break;
1802 
1803   case ERI_SH_REAL:
1804     igtbl=0;
1805     for (j=0; j<jmax1; j++) {
1806       l = j2l[j];
1807       m = j2m[j];
1808       ERI_Real_Spherical_Harmonics(l, m, theta, phi, sh, dsht, dshp);
1809       sh[1] = 0.0;
1810 
1811       int1[0] = 0.0;
1812       int1[1] = 0.0;
1813       n = gtbl_n[j];
1814       for (i=0; i<n; i++) {
1815         gnt = gtbl_gc[igtbl];
1816         j1 = ERI_GTBL_UNPACK_J1( gtbl_j1j2[igtbl] );
1817         j2 = ERI_GTBL_UNPACK_J2( gtbl_j1j2[igtbl] );
1818         igtbl++;
1819 
1820         if (j1>=jmax1 || j2>=jmax1) { continue; }
1821 #if SUMSKIP
1822         if (fmax1[j1]*fmax2[j2]<SUMSKIP_THRESHOLD) { continue; }
1823 #endif
1824         l1 = j2l[j1];
1825         l2 = j2l[j2];
1826         lphase = l2+l-l1;
1827         if (1==lphase%2) { continue; }
1828 
1829         sum[0] = 0.0;
1830         /* sum[1] = 0.0; */
1831 
1832         gl_ibase1 = j1*ngl;
1833         gl_ibase2 = j2*ngl;
1834         for (ik=0; ik<ngl; ik++) {
1835           igl1 = gl_ibase1+ik;
1836           igl2 = gl_ibase2+ik;
1837           FG[0] = F1[igl1]*F2[igl2];
1838           sum[0] += glj[l*ngl+ik]*FG[0];
1839         }
1840 
1841         if (2==lphase%4) { sum[0] = -sum[0]; }
1842         int1[0] += sum[0]*gnt;
1843       }
1844       I4[0] += int1[0]*sh[0];
1845       /* I4[1] += int1[1]*sh[0]; */
1846     } /* loop of j */
1847     break;
1848   }
1849 }
1850 
1851 
1852 
1853 /*----------------------------------------------------------------------
1854   ERI_Integral_GL_d
1855 ----------------------------------------------------------------------*/
ERI_Integral_GL_d(ERI_t * solver,double I4[2],double dI4[4][3][2],const double * F1,const double * F2,const double * dF1[3],const double * dF2[3],double R,double theta,double phi,double cx12,double cx34,double delta,double omega,int lmax1)1856 void ERI_Integral_GL_d(
1857   ERI_t        *solver,
1858   double        I4[2],        /* (OUT) integrals */
1859   double        dI4[4][3][2], /* (OUT) derivatives */
1860   const double *F1,           /* (IN) overlap matrix 1 */
1861   const double *F2,           /* (IN) overlap matrix 2 */
1862   const double *dF1[3],       /* (IN) derivatives of overlap 1 */
1863   const double *dF2[3],       /* (IN) derivatives of overlap 2 */
1864   double        R,            /* (IN) displacement */
1865   double        theta,
1866   double        phi,
1867   double        cx12,         /* (IN) */
1868   double        cx34,
1869   double        delta,
1870   double        omega,  /* (IN) screening parameter */
1871   int           lmax1
1872 )
1873 {
1874   int ngl, lmax, lmax0, jmax0, jmax1;
1875   int i, j, j1, j2, ik, ixyz, l, m, l1, l2;
1876   int gl_ibase1, gl_ibase2, igl1, igl2;
1877 
1878   double sh[2], dsht[2], dshp[2];
1879   double k, gnt;
1880   double sum[2], dsum[2];
1881   double FG[2], dFG[3][2], FdG[3][2];
1882   double int1[2], int2[2], int3[3][2], int4[3][2];
1883   double q1[2], q2[2], q3[2], q4[2], q5[3][2], q6[2], q7[2];
1884   double ct, st, cp, sp;
1885   double x1, y1, z1, x2, y2, z2, x3, y3, Rd;
1886 
1887 #if WORKSPACE_AT_STACK
1888   double tmp_sb[(ERI_LMAXMAX*2)*ERI_NGRIDMAX];
1889   double tmp_dsb[(ERI_LMAXMAX*2)*ERI_NGRIDMAX];
1890 #else
1891   double *tmp_sb  = solver->ws_sb;
1892   double *tmp_dsb = solver->ws_dsb;
1893 #endif
1894 
1895   const double *glx, *glw;
1896   double *glj, *gldj;
1897 
1898   const int *j2l  = g_itbl_j2l;
1899   const int *j2m  = g_itbl_j2m;
1900 
1901   double scr, scr_a;
1902 
1903   int n, igtbl;
1904   const int* gtbl_n;
1905   const long* gtbl_j1j2;
1906   const double* gtbl_gc;
1907 #if 0
1908   int m1, m2;
1909 #endif
1910 
1911 #if SUMSKIP
1912   int ndglf;
1913   const double *fmax1, *fmax2;
1914   const double *dfmax1[3], *dfmax2[3];
1915 #endif
1916 
1917   ngl   = ERI_ngl(solver);
1918   lmax  = ERI_lmax(solver);
1919   lmax0 = ERI_lmax_gl(solver);
1920   jmax0 = lmax0*lmax0;
1921   jmax1 = lmax1*lmax1;
1922 
1923   glx  = solver->glq_x;
1924   glw  = solver->glq_w;
1925   glj  = solver->glq_j;
1926   gldj = solver->glq_dj;
1927 
1928   gtbl_n    = ERI_Gtbl_n(solver->gtbl);
1929   gtbl_j1j2 = ERI_Gtbl_j1j2(solver->gtbl);
1930   gtbl_gc   = ERI_Gtbl_gc(solver->gtbl);
1931 
1932 #if SUMSKIP
1933   switch (solver->rcsh) {
1934   case ERI_SH_COMPLEX: ndglf = 2*ngl*jmax0; break;
1935   case ERI_SH_REAL:    ndglf = ngl*jmax0; break;
1936   }
1937 
1938   fmax1 = &F1[ndglf];
1939   fmax2 = &F2[ndglf];
1940   dfmax1[0] = &dF1[0][ndglf];
1941   dfmax1[1] = &dF1[1][ndglf];
1942   dfmax1[2] = &dF1[2][ndglf];
1943   dfmax2[0] = &dF2[0][ndglf];
1944   dfmax2[1] = &dF2[1][ndglf];
1945   dfmax2[2] = &dF2[2][ndglf];
1946 #endif
1947 
1948   Rd = sqrt(R*R+delta*delta*exp(-R*R/delta/delta));
1949   ct = cos(theta);
1950   st = sin(theta);
1951   cp = cos(phi);
1952   sp = sin(phi);
1953 
1954   x1 = st*cp;     y1 = st*sp;    z1 = ct;
1955   x2 = ct*cp/Rd;  y2 = ct*sp/Rd; z2 = -st/Rd;
1956   x3 = -sp/Rd/st; y3 = cp/Rd/st;
1957 
1958   if (fabs(theta)<1e-10) { x3 = 0.0; y3 = 0.0; }
1959 
1960   /* spherical Bessel function */
1961   if (omega<0.0) {
1962     /* no screening */
1963     for (ik=0; ik<ngl; ik++) {
1964       k = glx[ik];
1965       ERI_Spherical_Bessel(k*R, lmax, tmp_sb, tmp_dsb);
1966       for (l=0; l<lmax; l++) {
1967         i = l*ngl + ik;
1968         glj[i]  = glw[ik]*exp(k)*tmp_sb[l]*32.0*PI;
1969         gldj[i] = glw[ik]*exp(k)*tmp_dsb[l]*k*32.0*PI;
1970       }
1971     }
1972   } else {
1973     /* screening */
1974     scr_a = 0.25/omega/omega;
1975     for (ik=0; ik<ngl; ik++) {
1976       k = glx[ik];
1977       ERI_Spherical_Bessel(k*R, lmax, tmp_sb, tmp_dsb);
1978       scr = 1.0-exp(-k*k*scr_a);
1979       for (l=0; l<lmax; l++) {
1980         i = l*ngl + ik;
1981         glj[i]  = glw[ik]*exp(k)*tmp_sb[l]*32.0*PI*scr;
1982         gldj[i] = glw[ik]*exp(k)*tmp_dsb[l]*k*32.0*PI*scr;
1983       }
1984     }
1985   }
1986 
1987   I4[0] = 0.0;
1988   I4[1] = 0.0;
1989   for (i=0; i<4; i++) {
1990     for (ixyz=0; ixyz<3; ixyz++) {
1991       dI4[i][ixyz][0] = 0.0;
1992       dI4[i][ixyz][1] = 0.0;
1993     }
1994   }
1995 
1996   switch (solver->rcsh) {
1997   case ERI_SH_COMPLEX:
1998     igtbl=0;
1999     for (j=0; j<jmax1; j++) {
2000       l = j2l[j];
2001       m = j2m[j];
2002       ERI_Spherical_Harmonics(l, m, theta, phi, sh, dsht, dshp);
2003       int1[0] = 0.0;
2004       int1[1] = 0.0;
2005       int2[0] = 0.0;
2006       int2[1] = 0.0;
2007       for (ixyz=0; ixyz<3; ixyz++) {
2008         int3[ixyz][0] = 0.0;
2009         int3[ixyz][1] = 0.0;
2010         int4[ixyz][0] = 0.0;
2011         int4[ixyz][1] = 0.0;
2012       }
2013       n = gtbl_n[j];
2014       for (i=0; i<n; i++) {
2015         gnt = gtbl_gc[igtbl];
2016         j1  = ERI_GTBL_UNPACK_J1( gtbl_j1j2[igtbl] );
2017         j2  = ERI_GTBL_UNPACK_J2( gtbl_j1j2[igtbl] );
2018         igtbl++;
2019 
2020         if (j1<jmax1 && j2<jmax1) {
2021           l1 = j2l[j1];
2022           l2 = j2l[j2];
2023           gl_ibase1 = j1*ngl;
2024           gl_ibase2 = j2*ngl;
2025 #if SUMSKIP
2026           if (fmax1[j1]*fmax2[j2]>SUMSKIP_THRESHOLD)
2027 #endif
2028           {
2029             sum[0] = 0.0;
2030             sum[1] = 0.0;
2031             dsum[0] = 0.0;
2032             dsum[1] = 0.0;
2033             for (ik=0; ik<ngl; ik++) {
2034               igl1 = 2*(gl_ibase1+ik);
2035               igl2 = 2*(gl_ibase2+ik);
2036               FG[0] = F1[igl1+0]*F2[igl2+0] - F1[igl1+1]*F2[igl2+1];
2037               FG[1] = F1[igl1+0]*F2[igl2+1] + F1[igl1+1]*F2[igl2+0];
2038               sum[0] += glj[l*ngl+ik]*FG[0];
2039               sum[1] += glj[l*ngl+ik]*FG[1];
2040               dsum[0] += gldj[l*ngl+ik]*FG[0];
2041               dsum[1] += gldj[l*ngl+ik]*FG[1];
2042             }
2043             phase(l2+l-l1, sum);
2044             phase(l2+l-l1, dsum);
2045 
2046             int1[0] += sum[0]*gnt;
2047             int1[1] += sum[1]*gnt;
2048 
2049             int2[0] += dsum[0]*gnt;
2050             int2[1] += dsum[1]*gnt;
2051           }
2052           for (ixyz=0; ixyz<3; ixyz++) {
2053 #if SUMSKIP
2054             if (dfmax1[ixyz][j1]*fmax2[j2]>SUMSKIP_THRESHOLD)
2055 #endif
2056             {
2057               dsum[0] = 0.0;
2058               dsum[1] = 0.0;
2059               for (ik=0; ik<ngl; ik++) {
2060                 igl1 = 2*(gl_ibase1+ik);
2061                 igl2 = 2*(gl_ibase2+ik);
2062                 dFG[ixyz][0] = dF1[ixyz][igl1+0]*F2[igl2+0]
2063                                -dF1[ixyz][igl1+1]*F2[igl2+1];
2064                 dFG[ixyz][1] = dF1[ixyz][igl1+0]*F2[igl2+1]
2065                                +dF1[ixyz][igl1+1]*F2[igl2+0];
2066                 dsum[0] += glj[l*ngl+ik]*dFG[ixyz][0];
2067                 dsum[1] += glj[l*ngl+ik]*dFG[ixyz][1];
2068               }
2069               phase(l2+l-l1, dsum);
2070               int3[ixyz][0] += dsum[0]*gnt;
2071               int3[ixyz][1] += dsum[1]*gnt;
2072             }
2073 #if SUMSKIP
2074             if (fmax1[j1]*dfmax2[ixyz][j2]>SUMSKIP_THRESHOLD)
2075 #endif
2076             {
2077               dsum[0] = 0.0;
2078               dsum[1] = 0.0;
2079               for (ik=0; ik<ngl; ik++) {
2080                 igl1 = 2*(gl_ibase1+ik);
2081                 igl2 = 2*(gl_ibase2+ik);
2082                 FdG[ixyz][0] = F1[igl1+0]*dF2[ixyz][igl2+0]
2083                                -F1[igl1+1]*dF2[ixyz][igl2+1];
2084                 FdG[ixyz][1] = F1[igl1+0]*dF2[ixyz][igl2+1]
2085                                +F1[igl1+1]*dF2[ixyz][igl2+0];
2086                 dsum[0] += glj[l*ngl+ik]*FdG[ixyz][0];
2087                 dsum[1] += glj[l*ngl+ik]*FdG[ixyz][1];
2088               }
2089               phase(l2+l-l1, dsum);
2090               int4[ixyz][0] += dsum[0]*gnt;
2091               int4[ixyz][1] += dsum[1]*gnt;
2092             }
2093           } /* loop of ixyz */
2094         }
2095       }
2096 
2097       q1[0] = int1[0]*sh[0] - int1[1]*sh[1];
2098       q1[1] = int1[1]*sh[0] + int1[0]*sh[1];
2099 
2100       I4[0] += q1[0];
2101       I4[1] += q1[1];
2102 
2103       q2[0] = int2[0]*sh[0] - int2[1]*sh[1];
2104       q2[1] = int2[1]*sh[0] + int2[0]*sh[1];
2105 
2106       q3[0] = int1[0]*dsht[0] - int1[1]*dsht[1];
2107       q3[1] = int1[1]*dsht[0] + int1[0]*dsht[1];
2108       q4[0] = int1[0]*dshp[0] - int1[1]*dshp[1];
2109       q4[1] = int1[1]*dshp[0] + int1[0]*dshp[1];
2110 
2111       q5[0][0] = x1*q2[0] + x2*q3[0] + x3*q4[0];
2112       q5[0][1] = x1*q2[1] + x2*q3[1] + x3*q4[1];
2113       q5[1][0] = y1*q2[0] + y2*q3[0] + y3*q4[0];
2114       q5[1][1] = y1*q2[1] + y2*q3[1] + y3*q4[1];
2115       q5[2][0] = z1*q2[0] + z2*q3[0];
2116       q5[2][1] = z1*q2[1] + z2*q3[1];
2117 
2118       for (ixyz=0; ixyz<3; ixyz++) {
2119         q6[0] = int3[ixyz][0]*sh[0] - int3[ixyz][1]*sh[1];
2120         q6[1] = int3[ixyz][1]*sh[0] + int3[ixyz][0]*sh[1];
2121         q7[0] = int4[ixyz][0]*sh[0] - int4[ixyz][1]*sh[1];
2122         q7[1] = int4[ixyz][1]*sh[0] + int4[ixyz][0]*sh[1];
2123 
2124         dI4[0][ixyz][0] +=      -cx12*q5[ixyz][0] + q6[0];
2125         dI4[0][ixyz][1] +=      -cx12*q5[ixyz][1] + q6[1];
2126         dI4[1][ixyz][0] += (cx12-1.0)*q5[ixyz][0] - q6[0];
2127         dI4[1][ixyz][1] += (cx12-1.0)*q5[ixyz][1] - q6[1];
2128         dI4[2][ixyz][0] +=       cx34*q5[ixyz][0] + q7[0];
2129         dI4[2][ixyz][1] +=       cx34*q5[ixyz][1] + q7[1];
2130         dI4[3][ixyz][0] += (1.0-cx34)*q5[ixyz][0] - q7[0];
2131         dI4[3][ixyz][1] += (1.0-cx34)*q5[ixyz][1] - q7[1];
2132       }
2133     } /* loop of j */
2134     break;
2135 
2136   case ERI_SH_REAL:
2137     abort();
2138   }
2139 
2140 }
2141 
2142 
2143 
2144 
ERI_Integral_GL_PrejY(ERI_t * solver,double * R,double * theta,double * phi,int numR,double omega,double * prej,double * preY,int * mul_j2,double * mul_gc,int * mul_n,int * minimalR,int * num_minimalR)2145 void ERI_Integral_GL_PrejY(
2146   ERI_t        *solver,
2147   double       *R,     /* (IN) Displacement of two expansion centers */
2148   double       *theta,
2149   double       *phi,
2150   int           numR,
2151   double        omega,  /* (IN) screening parameter */
2152   double       *prej,   /* [lmax*ngl*numR] */
2153   double       *preY,   /* [numR*jmax1] */
2154   int          *mul_j2, /* [jmax1*jmax1*jmax1] */
2155   double       *mul_gc, /* [jmax1*jmax1*jmax1] */
2156   int          *mul_n,  /* [jmax1*jmax1] */
2157   int          *minimalR,     /* [numR] */
2158   int          *num_minimalR
2159 )
2160 {
2161   int ngl, lmax, jmax;
2162   int i, j, j1, j2, ik, l, m, l1, l2;
2163 
2164   double sh[2], dsht[2], dshp[2];
2165   double k, z;
2166 
2167 #if WORKSPACE_AT_STACK
2168   double tmp_sb[(ERI_LMAXMAX*2)*ERI_NGRIDMAX];
2169   double tmp_dsb[(ERI_LMAXMAX*2)*ERI_NGRIDMAX];
2170 #else
2171   double *tmp_sb = solver->ws_sb;
2172   double *tmp_dsb = solver->ws_dsb;
2173 #endif
2174 
2175   const double *glx, *glw;
2176 
2177   const int *j2l = g_itbl_j2l;
2178   const int *j2m = g_itbl_j2m;
2179 
2180   double scr;
2181 
2182   int iR, imR, num_mR;
2183   int nmul;
2184 
2185   int mR2iR[MAXNUMR];
2186   double R0;
2187 
2188   int n, igtbl, igtbl0;
2189   const int* gtbl_n;
2190   const long* gtbl_j1j2;
2191   const double* gtbl_gc;
2192 
2193   int lphase;
2194 
2195   ngl  = ERI_ngl(solver);
2196   lmax = ERI_lmax_gl(solver);
2197   jmax = lmax*lmax;
2198 
2199   glx  = solver->glq_x;
2200   glw  = solver->glq_w;
2201 
2202   gtbl_n    = ERI_Gtbl_n(solver->gtbl);
2203   gtbl_j1j2 = ERI_Gtbl_j1j2(solver->gtbl);
2204   gtbl_gc   = ERI_Gtbl_gc(solver->gtbl);
2205 
2206   ngl  = ERI_ngl(solver);
2207   lmax = ERI_lmax_gl(solver);
2208   jmax = lmax*lmax;
2209 
2210   glx  = solver->glq_x;
2211   glw  = solver->glq_w;
2212 
2213   /* minimal set of R */
2214   num_mR = 0;
2215   for (i=0; i<numR; i++) {
2216     for (j=0; j<num_mR; j++) {
2217       if (fabs(R[mR2iR[j]]-R[i])<1e-10) { break; }
2218     }
2219     minimalR[i] = j;
2220     if (j==num_mR) {
2221       if (num_mR>=MAXNUMR) {
2222         fprintf(stderr, "***ERROR in %s (%d)\n", __FILE__, __LINE__);
2223         fprintf(stderr, "   MAXNUMR is too small!\n");
2224         abort();
2225       }
2226       mR2iR[num_mR] = i;
2227       num_mR++;
2228     }
2229   }
2230   *num_minimalR = num_mR;
2231 
2232   /* spherical Bessel function */
2233   for (imR=0; imR<num_mR; imR++) {
2234     R0 = R[mR2iR[imR]];
2235     for (ik=0; ik<ngl; ik++) {
2236       k = glx[ik];
2237       ERI_Spherical_Bessel(k*R0, lmax, tmp_sb, tmp_dsb);
2238       if (omega<0.0) {
2239         scr = 1.0; /* no screening */
2240       } else {
2241         scr = 1.0-exp(-k*k*0.25/omega/omega); /* screening */
2242       }
2243       for (l=0; l<lmax; l++) {
2244         prej[(imR*lmax+l)*ngl+ik] = glw[ik]*exp(k)*tmp_sb[l]*scr;
2245       }
2246     }
2247   }
2248 
2249   /* Spherical Harominics */
2250   for (iR=0; iR<numR; iR++) {
2251     for (j=0; j<jmax; j++) {
2252       l = j2l[j];
2253       m = j2m[j];
2254       ERI_Real_Spherical_Harmonics(l, m, theta[iR], phi[iR], sh, dsht, dshp);
2255       preY[iR*jmax+j] = sh[0];
2256     }
2257   }
2258 
2259   igtbl0 = 0;
2260   for (j=0; j<jmax; j++) {
2261     l = j2l[j];
2262     n = gtbl_n[j];
2263     for (j1=0; j1<jmax; j1++) {
2264       l1 = j2l[j1];
2265       nmul = 0;
2266       for (i=0; i<n; i++) {
2267         igtbl = igtbl0 + i;
2268         if (j1 != ERI_GTBL_UNPACK_J1( gtbl_j1j2[igtbl] )) { continue; }
2269 
2270         j2 = ERI_GTBL_UNPACK_J2( gtbl_j1j2[igtbl] );
2271         if (j2>=jmax) { continue; }
2272 
2273         l2 = j2l[j2];
2274         lphase = l2+l-l1;
2275         if (1==lphase%2) { continue; }
2276 
2277         mul_j2[(j*jmax+j1)*jmax+nmul] = j2;
2278         if (2==lphase%4) {
2279           mul_gc[(j*jmax+j1)*jmax+nmul] = -gtbl_gc[igtbl];
2280         } else {
2281           mul_gc[(j*jmax+j1)*jmax+nmul] = gtbl_gc[igtbl];
2282         }
2283         nmul++;
2284       } /* i */
2285       mul_n [j*jmax+j1] = nmul;
2286     } /* j1 */
2287     igtbl0 += n;
2288   } /* j */
2289 }
2290 
2291 
2292 
2293 
2294 /*----------------------------------------------------------------------
2295   ERI_Integral_GL
2296 
2297   If you need the derivatives, you should call ERI_I4_d.
2298 ----------------------------------------------------------------------*/
ERI_Integral_GL_Post(ERI_t * solver,double * I4,const double * F1,const double * F2,int numR,double * prej,double * preY,int * mul_j2,double * mul_gc,int * mul_n,int * minimalR,int num_minimalR)2299 void ERI_Integral_GL_Post(
2300   ERI_t        *solver,
2301   double       *I4,    /* (OUT) [numR] */
2302   const double *F1,    /* (IN) Overlap matrix */
2303   const double *F2,    /* (IN) */
2304   int           numR,
2305   double       *prej,   /* [lmax*ngl*numR] */
2306   double       *preY,   /* [numR*jmax1] */
2307   int          *mul_j2, /* [jmax1*jmax1*jmax1] */
2308   double       *mul_gc, /* [jmax1*jmax1*jmax1] */
2309   int          *mul_n,  /* [jmax1*jmax1] */
2310   int          *minimalR,    /* [numR] */
2311   int           num_minimalR
2312 )
2313 {
2314   int ngl, lmax, jmax, nmul, lphase;
2315   int iR, imR, i, j, j1, j2, ik, l, l1, l2;
2316   double gnt, sh, sum;
2317   const int *j2l = g_itbl_j2l;
2318   double int1[MAXNUMR];
2319 
2320 #if SUMSKIP
2321   int ndglf;
2322   const double *fmax1, *fmax2;
2323 #endif
2324 
2325   ngl  = ERI_ngl(solver);
2326   lmax = ERI_lmax_gl(solver);
2327   jmax = lmax*lmax;
2328 
2329 #if SUMSKIP
2330   ndglf = ngl*jmax;
2331   fmax1 = &F1[ndglf];
2332   fmax2 = &F2[ndglf];
2333 #endif
2334 
2335   for (iR=0; iR<numR; iR++) { I4[iR]=0.0; }
2336 
2337   for (j=0; j<jmax; j++) {
2338     l = j2l[j];
2339     for (imR=0; imR<num_minimalR; imR++) { int1[imR]=0.0; }
2340 
2341     for (j1=0; j1<jmax; j1++) {
2342       nmul =  mul_n[j*jmax+j1];
2343       for (i=0; i<nmul; i++) {
2344         gnt = mul_gc[(j*jmax+j1)*jmax+i];
2345         j2  = mul_j2[(j*jmax+j1)*jmax+i];
2346 #if SUMSKIP
2347         if (fmax1[j1]*fmax2[j2]<SUMSKIP_THRESHOLD) { continue; }
2348 #endif
2349 
2350 #if LOOP_UNROLLING
2351         for (imR=0; imR<num_minimalR; imR++) {
2352           sum = 0.0;
2353           for (ik=0; ik<ngl-3; ik+=4) {
2354             sum += prej[((imR)*lmax+l)*ngl+ik+0]
2355                     *F1[j1*ngl+ik+0]*F2[j2*ngl+ik+0]
2356                  + prej[((imR)*lmax+l)*ngl+ik+1]
2357                     *F1[j1*ngl+ik+1]*F2[j2*ngl+ik+1]
2358                  + prej[((imR)*lmax+l)*ngl+ik+2]
2359                     *F1[j1*ngl+ik+2]*F2[j2*ngl+ik+2]
2360                  + prej[((imR)*lmax+l)*ngl+ik+3]
2361                     *F1[j1*ngl+ik+3]*F2[j2*ngl+ik+3];
2362           }
2363           for (; ik<ngl; ik++) {
2364             sum += prej[((imR)*lmax+l)*ngl+ik]
2365                     *F1[j1*ngl+ik]*F2[j2*ngl+ik];
2366           }
2367           int1[imR] += sum*gnt;
2368         } /* loop of imR */
2369 #else
2370         for (imR=0; imR<num_minimalR; imR++) {
2371           sum = 0.0;
2372           for (ik=0; ik<ngl; ik++) {
2373             sum += prej[(imR*lmax+l)*ngl+ik]
2374                     *F1[j1*ngl+ik]*F2[j2*ngl+ik];
2375           }
2376           int1[imR] += sum*gnt;
2377         } /* loop of imR */
2378 #endif
2379       } /* loop of i */
2380     } /* loop of j1 */
2381 
2382     for (iR=0; iR<numR; iR++) {
2383       imR = minimalR[iR];
2384       I4[iR] += 32.0*PI*int1[imR]*preY[iR*jmax+j];
2385     }
2386   } /* loop of j */
2387 }
2388 
2389 
2390 
2391 /*----------------------------------------------------------------------
2392   ERI_Integral_GL
2393 
2394   If you need the derivatives, you should call ERI_I4_d.
2395 ----------------------------------------------------------------------*/
ERI_Integral_GL_Post2(ERI_t * solver,double * I4,const double * F1,const double * F2,int numR,double * prej,double * preY,int * mul_j2,double * mul_gc,int * mul_n,int * minimalR,int num_minimalR)2396 void ERI_Integral_GL_Post2(
2397   ERI_t        *solver,
2398   double       *I4,    /* (OUT) [numR] */
2399   const double *F1,    /* (IN) Overlap matrix */
2400   const double *F2,    /* (IN) */
2401   int           numR,
2402   double       *prej,   /* [lmax*ngl*numR] */
2403   double       *preY,   /* [numR*jmax1] */
2404   int          *mul_j2, /* [jmax1*jmax1*jmax1] */
2405   double       *mul_gc, /* [jmax1*jmax1*jmax1] */
2406   int          *mul_n,  /* [jmax1*jmax1] */
2407   int          *minimalR,    /* [numR] */
2408   int           num_minimalR
2409 )
2410 {
2411   int ngl, lmax, jmax, nmul, lphase;
2412   int iR, imR, i, j, j1, j2, ik, l, l1, l2;
2413   double gnt, sh, sum;
2414   const int *j2l = g_itbl_j2l;
2415   double int1[MAXNUMR*ERI_LMAXMAX*ERI_LMAXMAX];
2416 
2417 #if SUMSKIP
2418   int ndglf;
2419   const double *fmax1, *fmax2;
2420 #endif
2421 
2422   ngl  = ERI_ngl(solver);
2423   lmax = ERI_lmax_gl(solver);
2424   jmax = lmax*lmax;
2425 
2426 #if SUMSKIP
2427   ndglf = ngl*jmax;
2428   fmax1 = &F1[ndglf];
2429   fmax2 = &F2[ndglf];
2430 #endif
2431 
2432   for (iR=0; iR<numR; iR++) { I4[iR]=0.0; }
2433   for (i=0; i<MAXNUMR*ERI_LMAXMAX*ERI_LMAXMAX; i++) { int1[i]=0.0; }
2434 
2435   for (j=0; j<jmax; j++) {
2436     l = j2l[j];
2437     for (j1=0; j1<jmax; j1++) {
2438       nmul =  mul_n[j*jmax+j1];
2439       for (i=0; i<nmul; i++) {
2440         gnt = mul_gc[(j*jmax+j1)*jmax+i];
2441         j2  = mul_j2[(j*jmax+j1)*jmax+i];
2442 #if SUMSKIP
2443         if (fmax1[j1]*fmax2[j2]<SUMSKIP_THRESHOLD) { continue; }
2444 #endif
2445 
2446         for (imR=0; imR<num_minimalR; imR++) {
2447           sum = 0.0;
2448           for (ik=0; ik<ngl; ik++) {
2449             sum += prej[(imR*lmax+l)*ngl+ik]
2450                     *F1[j1*ngl+ik]*F2[j2*ngl+ik];
2451           }
2452           int1[j*numR+imR] += sum*gnt;
2453         } /* loop of imR */
2454       } /* loop of i */
2455     } /* loop of j1 */
2456   } /* loop of l */
2457 
2458   for (j=0; j<jmax; j++) {
2459     for (iR=0; iR<numR; iR++) {
2460       imR = minimalR[iR];
2461       I4[iR] += 32.0*PI*int1[j*numR+imR]*preY[iR*jmax+j];
2462     }
2463   } /* loop of j */
2464 }
2465 
2466 
2467 
2468 /*----------------------------------------------------------------------
2469   ERI_Integral_GL_X
2470 
2471   If you need the derivatives, you should call ERI_I4_d.
2472 ----------------------------------------------------------------------*/
ERI_Integral_GL_X(ERI_t * solver,double * X,const double * F1,const double * F2,int numR,double * prej,int * mul_j2,double * mul_gc,int * mul_n,int * minimalR,int num_minimalR)2473 void ERI_Integral_GL_X(
2474   ERI_t        *solver,
2475   double       *X,      /* (OUT) [numR*lmax] */
2476   const double *F1,    /* (IN) Overlap matrix */
2477   const double *F2,    /* (IN) */
2478   int           numR,
2479   double       *prej,   /* [lmax*ngl*numR] */
2480   int          *mul_j2,      /* [jmax1*jmax1*jmax1] */
2481   double       *mul_gc,      /* [jmax1*jmax1*jmax1] */
2482   int          *mul_n,       /* [jmax1*jmax1] */
2483   int          *minimalR,    /* [numR] */
2484   int           num_minimalR
2485 )
2486 {
2487   int ngl, lmax, jmax, nmul, lphase;
2488   int iR, imR, i, j, j1, j2, ik, l, l1, l2;
2489   double gnt, sum;
2490   const int *j2l = g_itbl_j2l;
2491 
2492 #if SUMSKIP
2493   int ndglf;
2494   const double *fmax1, *fmax2;
2495 #endif
2496 
2497   ngl  = ERI_ngl(solver);
2498   lmax = ERI_lmax_gl(solver);
2499   jmax = lmax*lmax;
2500 
2501 #if SUMSKIP
2502   ndglf = ngl*jmax;
2503   fmax1 = &F1[ndglf];
2504   fmax2 = &F2[ndglf];
2505 #endif
2506 
2507 
2508   for (i=0; i<numR*lmax; i++) { X[i]=0.0; }
2509 
2510   for (j=0; j<jmax; j++) {
2511     l = j2l[j];
2512     for (j1=0; j1<jmax; j1++) {
2513       nmul =  mul_n[j*jmax+j1];
2514       for (i=0; i<nmul; i++) {
2515         gnt = mul_gc[(j*jmax+j1)*jmax+i];
2516         j2  = mul_j2[(j*jmax+j1)*jmax+i];
2517 #if SUMSKIP
2518         if (fmax1[j1]*fmax2[j2]<SUMSKIP_THRESHOLD) { continue; }
2519 #endif
2520 
2521         for (imR=0; imR<num_minimalR; imR++) {
2522           sum = 0.0;
2523           for (ik=0; ik<ngl; ik++) {
2524             sum += prej[(imR*lmax+l)*ngl+ik]
2525                     *F1[j1*ngl+ik]*F2[j2*ngl+ik];
2526           }
2527           X[l*numR+imR] += sum*gnt;
2528         } /* loop of imR */
2529       } /* loop of i */
2530     } /* loop of j1 */
2531   } /* loop of j */
2532 }
2533 
2534 
2535 /*----------------------------------------------------------------------
2536   ERI_Integral_GL
2537 
2538   If you need the derivatives, you should call ERI_I4_d.
2539 ----------------------------------------------------------------------*/
ERI_Integral_GL_X_Post(ERI_t * solver,double * I4,const double * X,int numR,const double * preY,const int * minimalR)2540 void ERI_Integral_GL_X_Post(
2541   ERI_t        *solver,
2542   double       *I4,       /* (OUT) [numR] */
2543   const double *X,        /* (IN)  [numR*lmax1] */
2544   int           numR,
2545   const double *preY,     /* (IN)  [numR*jmax1] */
2546   const int    *minimalR  /* (IN)  [numR] */
2547 )
2548 {
2549   int lmax, jmax;
2550   int iR, imR, j, l;
2551   double sum;
2552   const int *j2l   = g_itbl_j2l;
2553 
2554   lmax = ERI_lmax_gl(solver);
2555   jmax = lmax*lmax;
2556 
2557   for (iR=0; iR<numR; iR++) {
2558     imR = minimalR[iR];
2559     sum = 0.0;
2560     for (j=0; j<jmax; j++) {
2561       l = j2l[j];
2562       sum += X[l*numR+imR]*preY[iR*jmax+j];
2563     } /* loop of j */
2564     I4[iR] = 32.0*PI*sum;
2565   } /* loop of iR */
2566 }
2567 
2568 /* EOF */
2569