1 #include "simint/boys/boys.h"
2 #include "simint/ostei/gen/ostei_generated.h"
3 #include "simint/vectorization/vectorization.h"
4 #include <math.h>
5 #include <string.h>
6 
7 
ostei_f_p_g_p(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__f_p_g_p)8 int ostei_f_p_g_p(struct simint_multi_shellpair const P,
9                   struct simint_multi_shellpair const Q,
10                   double screen_tol,
11                   double * const restrict work,
12                   double * const restrict INT__f_p_g_p)
13 {
14 
15     SIMINT_ASSUME_ALIGN_DBL(work);
16     SIMINT_ASSUME_ALIGN_DBL(INT__f_p_g_p);
17     int ab, cd, abcd;
18     int istart, jstart;
19     int iprimcd, nprim_icd, icd;
20     const int check_screen = (screen_tol > 0.0);
21     int i, j;
22     int n;
23     int not_screened;
24     int real_abcd;
25     int iket;
26     int ibra;
27 
28     // partition workspace
29     double * const INT__f_s_g_s = work + (SIMINT_NSHELL_SIMD * 0);
30     double * const INT__f_s_h_s = work + (SIMINT_NSHELL_SIMD * 150);
31     double * const INT__g_s_g_s = work + (SIMINT_NSHELL_SIMD * 360);
32     double * const INT__g_s_h_s = work + (SIMINT_NSHELL_SIMD * 585);
33     SIMINT_DBLTYPE * const primwork = (SIMINT_DBLTYPE *)(work + SIMINT_NSHELL_SIMD*900);
34     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_s_s = primwork + 0;
35     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_p_s = primwork + 10;
36     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_d_s = primwork + 37;
37     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_f_s = primwork + 85;
38     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_g_s = primwork + 155;
39     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_h_s = primwork + 245;
40     SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_p_s = primwork + 350;
41     SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_d_s = primwork + 386;
42     SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_f_s = primwork + 458;
43     SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_g_s = primwork + 578;
44     SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_h_s = primwork + 758;
45     SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_d_s = primwork + 1010;
46     SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_f_s = primwork + 1118;
47     SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_g_s = primwork + 1298;
48     SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_h_s = primwork + 1568;
49     SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_f_s = primwork + 1946;
50     SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_g_s = primwork + 2146;
51     SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_h_s = primwork + 2446;
52     SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_g_s = primwork + 2866;
53     SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_h_s = primwork + 3091;
54     double * const hrrwork = (double *)(primwork + 3406);
55     double * const HRR_INT__f_p_g_s = hrrwork + 0;
56     double * const HRR_INT__f_p_h_s = hrrwork + 450;
57 
58 
59     // Create constants
60     const SIMINT_DBLTYPE const_1 = SIMINT_DBLSET1(1);
61     const SIMINT_DBLTYPE const_2 = SIMINT_DBLSET1(2);
62     const SIMINT_DBLTYPE const_3 = SIMINT_DBLSET1(3);
63     const SIMINT_DBLTYPE const_4 = SIMINT_DBLSET1(4);
64     const SIMINT_DBLTYPE const_5 = SIMINT_DBLSET1(5);
65     const SIMINT_DBLTYPE one_half = SIMINT_DBLSET1(0.5);
66 
67 
68     ////////////////////////////////////////
69     // Loop over shells and primitives
70     ////////////////////////////////////////
71 
72     real_abcd = 0;
73     istart = 0;
74     for(ab = 0; ab < P.nshell12_clip; ++ab)
75     {
76         const int iend = istart + P.nprim12[ab];
77 
78         cd = 0;
79         jstart = 0;
80 
81         for(cd = 0; cd < Q.nshell12_clip; cd += SIMINT_NSHELL_SIMD)
82         {
83             const int nshellbatch = ((cd + SIMINT_NSHELL_SIMD) > Q.nshell12_clip) ? Q.nshell12_clip - cd : SIMINT_NSHELL_SIMD;
84             int jend = jstart;
85             for(i = 0; i < nshellbatch; i++)
86                 jend += Q.nprim12[cd+i];
87 
88             // Clear the beginning of the workspace (where we are accumulating integrals)
89             memset(work, 0, SIMINT_NSHELL_SIMD * 900 * sizeof(double));
90             abcd = 0;
91 
92 
93             for(i = istart; i < iend; ++i)
94             {
95                 SIMINT_DBLTYPE bra_screen_max;  // only used if check_screen
96 
97                 if(check_screen)
98                 {
99                     // Skip this whole thing if always insignificant
100                     if((P.screen[i] * Q.screen_max) < screen_tol)
101                         continue;
102                     bra_screen_max = SIMINT_DBLSET1(P.screen[i]);
103                 }
104 
105                 icd = 0;
106                 iprimcd = 0;
107                 nprim_icd = Q.nprim12[cd];
108                 double * restrict PRIM_PTR_INT__f_s_g_s = INT__f_s_g_s + abcd * 150;
109                 double * restrict PRIM_PTR_INT__f_s_h_s = INT__f_s_h_s + abcd * 210;
110                 double * restrict PRIM_PTR_INT__g_s_g_s = INT__g_s_g_s + abcd * 225;
111                 double * restrict PRIM_PTR_INT__g_s_h_s = INT__g_s_h_s + abcd * 315;
112 
113 
114 
115                 // Load these one per loop over i
116                 const SIMINT_DBLTYPE P_alpha = SIMINT_DBLSET1(P.alpha[i]);
117                 const SIMINT_DBLTYPE P_prefac = SIMINT_DBLSET1(P.prefac[i]);
118                 const SIMINT_DBLTYPE Pxyz[3] = { SIMINT_DBLSET1(P.x[i]), SIMINT_DBLSET1(P.y[i]), SIMINT_DBLSET1(P.z[i]) };
119 
120                 const SIMINT_DBLTYPE P_PA[3] = { SIMINT_DBLSET1(P.PA_x[i]), SIMINT_DBLSET1(P.PA_y[i]), SIMINT_DBLSET1(P.PA_z[i]) };
121 
122                 for(j = jstart; j < jend; j += SIMINT_SIMD_LEN)
123                 {
124                     // calculate the shell offsets
125                     // these are the offset from the shell pointed to by cd
126                     // for each element
127                     int shelloffsets[SIMINT_SIMD_LEN] = {0};
128                     int lastoffset = 0;
129                     const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j));
130 
131                     if((iprimcd + SIMINT_SIMD_LEN) >= nprim_icd)
132                     {
133                         // Handle if the first element of the vector is a new shell
134                         if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
135                         {
136                             nprim_icd += Q.nprim12[cd + (++icd)];
137                             PRIM_PTR_INT__f_s_g_s += 150;
138                             PRIM_PTR_INT__f_s_h_s += 210;
139                             PRIM_PTR_INT__g_s_g_s += 225;
140                             PRIM_PTR_INT__g_s_h_s += 315;
141                         }
142                         iprimcd++;
143                         for(n = 1; n < SIMINT_SIMD_LEN; ++n)
144                         {
145                             if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
146                             {
147                                 shelloffsets[n] = shelloffsets[n-1] + 1;
148                                 lastoffset++;
149                                 nprim_icd += Q.nprim12[cd + (++icd)];
150                             }
151                             else
152                                 shelloffsets[n] = shelloffsets[n-1];
153                             iprimcd++;
154                         }
155                     }
156                     else
157                         iprimcd += SIMINT_SIMD_LEN;
158 
159                     // Do we have to compute this vector (or has it been screened out)?
160                     // (not_screened != 0 means we have to do this vector)
161                     if(check_screen)
162                     {
163                         const double vmax = vector_max(SIMINT_MUL(bra_screen_max, SIMINT_DBLLOAD(Q.screen, j)));
164                         if(vmax < screen_tol)
165                         {
166                             PRIM_PTR_INT__f_s_g_s += lastoffset*150;
167                             PRIM_PTR_INT__f_s_h_s += lastoffset*210;
168                             PRIM_PTR_INT__g_s_g_s += lastoffset*225;
169                             PRIM_PTR_INT__g_s_h_s += lastoffset*315;
170                             continue;
171                         }
172                     }
173 
174                     const SIMINT_DBLTYPE Q_alpha = SIMINT_DBLLOAD(Q.alpha, j);
175                     const SIMINT_DBLTYPE PQalpha_mul = SIMINT_MUL(P_alpha, Q_alpha);
176                     const SIMINT_DBLTYPE PQalpha_sum = SIMINT_ADD(P_alpha, Q_alpha);
177                     const SIMINT_DBLTYPE one_over_PQalpha_sum = SIMINT_DIV(const_1, PQalpha_sum);
178 
179 
180                     /* construct R2 = (Px - Qx)**2 + (Py - Qy)**2 + (Pz -Qz)**2 */
181                     SIMINT_DBLTYPE PQ[3];
182                     PQ[0] = SIMINT_SUB(Pxyz[0], SIMINT_DBLLOAD(Q.x, j));
183                     PQ[1] = SIMINT_SUB(Pxyz[1], SIMINT_DBLLOAD(Q.y, j));
184                     PQ[2] = SIMINT_SUB(Pxyz[2], SIMINT_DBLLOAD(Q.z, j));
185                     SIMINT_DBLTYPE R2 = SIMINT_MUL(PQ[0], PQ[0]);
186                     R2 = SIMINT_FMADD(PQ[1], PQ[1], R2);
187                     R2 = SIMINT_FMADD(PQ[2], PQ[2], R2);
188 
189                     const SIMINT_DBLTYPE alpha = SIMINT_MUL(PQalpha_mul, one_over_PQalpha_sum); // alpha from MEST
190                     const SIMINT_DBLTYPE one_over_p = SIMINT_DIV(const_1, P_alpha);
191                     const SIMINT_DBLTYPE one_over_q = SIMINT_DIV(const_1, Q_alpha);
192                     const SIMINT_DBLTYPE one_over_2p = SIMINT_MUL(one_half, one_over_p);
193                     const SIMINT_DBLTYPE one_over_2q = SIMINT_MUL(one_half, one_over_q);
194                     const SIMINT_DBLTYPE one_over_2pq = SIMINT_MUL(one_half, one_over_PQalpha_sum);
195                     const SIMINT_DBLTYPE Q_PA[3] = { SIMINT_DBLLOAD(Q.PA_x, j), SIMINT_DBLLOAD(Q.PA_y, j), SIMINT_DBLLOAD(Q.PA_z, j) };
196 
197                     // NOTE: Minus sign!
198                     const SIMINT_DBLTYPE a_over_p = SIMINT_MUL(SIMINT_NEG(alpha), one_over_p);
199                     SIMINT_DBLTYPE aop_PQ[3];
200                     aop_PQ[0] = SIMINT_MUL(a_over_p, PQ[0]);
201                     aop_PQ[1] = SIMINT_MUL(a_over_p, PQ[1]);
202                     aop_PQ[2] = SIMINT_MUL(a_over_p, PQ[2]);
203 
204                     SIMINT_DBLTYPE a_over_q = SIMINT_MUL(alpha, one_over_q);
205                     SIMINT_DBLTYPE aoq_PQ[3];
206                     aoq_PQ[0] = SIMINT_MUL(a_over_q, PQ[0]);
207                     aoq_PQ[1] = SIMINT_MUL(a_over_q, PQ[1]);
208                     aoq_PQ[2] = SIMINT_MUL(a_over_q, PQ[2]);
209                     // Put a minus sign here so we don't have to in RR routines
210                     a_over_q = SIMINT_NEG(a_over_q);
211 
212 
213                     //////////////////////////////////////////////
214                     // Fjt function section
215                     // Maximum v value: 9
216                     //////////////////////////////////////////////
217                     // The parameter to the Fjt function
218                     const SIMINT_DBLTYPE F_x = SIMINT_MUL(R2, alpha);
219 
220 
221                     const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j);
222 
223 
224                     boys_F_split(PRIM_INT__s_s_s_s, F_x, 9);
225                     SIMINT_DBLTYPE prefac = SIMINT_SQRT(one_over_PQalpha_sum);
226                     prefac = SIMINT_MUL(SIMINT_MUL(P_prefac, Q_prefac), prefac);
227                     for(n = 0; n <= 9; n++)
228                         PRIM_INT__s_s_s_s[n] = SIMINT_MUL(PRIM_INT__s_s_s_s[n], prefac);
229 
230                     //////////////////////////////////////////////
231                     // Primitive integrals: Vertical recurrance
232                     //////////////////////////////////////////////
233 
234                     const SIMINT_DBLTYPE vrr_const_1_over_2p = one_over_2p;
235                     const SIMINT_DBLTYPE vrr_const_2_over_2p = SIMINT_MUL(const_2, one_over_2p);
236                     const SIMINT_DBLTYPE vrr_const_3_over_2p = SIMINT_MUL(const_3, one_over_2p);
237                     const SIMINT_DBLTYPE vrr_const_1_over_2q = one_over_2q;
238                     const SIMINT_DBLTYPE vrr_const_2_over_2q = SIMINT_MUL(const_2, one_over_2q);
239                     const SIMINT_DBLTYPE vrr_const_3_over_2q = SIMINT_MUL(const_3, one_over_2q);
240                     const SIMINT_DBLTYPE vrr_const_4_over_2q = SIMINT_MUL(const_4, one_over_2q);
241                     const SIMINT_DBLTYPE vrr_const_1_over_2pq = one_over_2pq;
242                     const SIMINT_DBLTYPE vrr_const_2_over_2pq = SIMINT_MUL(const_2, one_over_2pq);
243                     const SIMINT_DBLTYPE vrr_const_3_over_2pq = SIMINT_MUL(const_3, one_over_2pq);
244                     const SIMINT_DBLTYPE vrr_const_4_over_2pq = SIMINT_MUL(const_4, one_over_2pq);
245                     const SIMINT_DBLTYPE vrr_const_5_over_2pq = SIMINT_MUL(const_5, one_over_2pq);
246 
247 
248 
249                     // Forming PRIM_INT__s_s_p_s[9 * 3];
250                     for(n = 0; n < 9; ++n)  // loop over orders of auxiliary function
251                     {
252 
253                         PRIM_INT__s_s_p_s[n * 3 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_s_s[n * 1 + 0]);
254                         PRIM_INT__s_s_p_s[n * 3 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 0]);
255 
256                         PRIM_INT__s_s_p_s[n * 3 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_s_s[n * 1 + 0]);
257                         PRIM_INT__s_s_p_s[n * 3 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 1]);
258 
259                         PRIM_INT__s_s_p_s[n * 3 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_s_s[n * 1 + 0]);
260                         PRIM_INT__s_s_p_s[n * 3 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 2]);
261 
262                     }
263 
264 
265 
266                     // Forming PRIM_INT__s_s_d_s[8 * 6];
267                     for(n = 0; n < 8; ++n)  // loop over orders of auxiliary function
268                     {
269 
270                         PRIM_INT__s_s_d_s[n * 6 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_p_s[n * 3 + 0]);
271                         PRIM_INT__s_s_d_s[n * 6 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_d_s[n * 6 + 0]);
272                         PRIM_INT__s_s_d_s[n * 6 + 0] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_d_s[n * 6 + 0]);
273 
274                         PRIM_INT__s_s_d_s[n * 6 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_p_s[n * 3 + 0]);
275                         PRIM_INT__s_s_d_s[n * 6 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_d_s[n * 6 + 1]);
276 
277                         PRIM_INT__s_s_d_s[n * 6 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_p_s[n * 3 + 0]);
278                         PRIM_INT__s_s_d_s[n * 6 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_d_s[n * 6 + 2]);
279 
280                         PRIM_INT__s_s_d_s[n * 6 + 3] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_p_s[n * 3 + 1]);
281                         PRIM_INT__s_s_d_s[n * 6 + 3] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__s_s_d_s[n * 6 + 3]);
282                         PRIM_INT__s_s_d_s[n * 6 + 3] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_d_s[n * 6 + 3]);
283 
284                         PRIM_INT__s_s_d_s[n * 6 + 4] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_p_s[n * 3 + 1]);
285                         PRIM_INT__s_s_d_s[n * 6 + 4] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__s_s_d_s[n * 6 + 4]);
286 
287                         PRIM_INT__s_s_d_s[n * 6 + 5] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_p_s[n * 3 + 2]);
288                         PRIM_INT__s_s_d_s[n * 6 + 5] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__s_s_d_s[n * 6 + 5]);
289                         PRIM_INT__s_s_d_s[n * 6 + 5] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_d_s[n * 6 + 5]);
290 
291                     }
292 
293 
294 
295                     // Forming PRIM_INT__s_s_f_s[7 * 10];
296                     for(n = 0; n < 7; ++n)  // loop over orders of auxiliary function
297                     {
298 
299                         PRIM_INT__s_s_f_s[n * 10 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_d_s[n * 6 + 0]);
300                         PRIM_INT__s_s_f_s[n * 10 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__s_s_f_s[n * 10 + 0]);
301                         PRIM_INT__s_s_f_s[n * 10 + 0] = SIMINT_FMADD( vrr_const_2_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_p_s[n * 3 + 0]), PRIM_INT__s_s_f_s[n * 10 + 0]);
302 
303                         PRIM_INT__s_s_f_s[n * 10 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_d_s[n * 6 + 0]);
304                         PRIM_INT__s_s_f_s[n * 10 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__s_s_f_s[n * 10 + 1]);
305 
306                         PRIM_INT__s_s_f_s[n * 10 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 0]);
307                         PRIM_INT__s_s_f_s[n * 10 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__s_s_f_s[n * 10 + 2]);
308 
309                         PRIM_INT__s_s_f_s[n * 10 + 3] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_d_s[n * 6 + 3]);
310                         PRIM_INT__s_s_f_s[n * 10 + 3] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__s_s_f_s[n * 10 + 3]);
311 
312                         PRIM_INT__s_s_f_s[n * 10 + 4] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 1]);
313                         PRIM_INT__s_s_f_s[n * 10 + 4] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 1], PRIM_INT__s_s_f_s[n * 10 + 4]);
314 
315                         PRIM_INT__s_s_f_s[n * 10 + 5] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_d_s[n * 6 + 5]);
316                         PRIM_INT__s_s_f_s[n * 10 + 5] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__s_s_f_s[n * 10 + 5]);
317 
318                         PRIM_INT__s_s_f_s[n * 10 + 6] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_d_s[n * 6 + 3]);
319                         PRIM_INT__s_s_f_s[n * 10 + 6] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__s_s_f_s[n * 10 + 6]);
320                         PRIM_INT__s_s_f_s[n * 10 + 6] = SIMINT_FMADD( vrr_const_2_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__s_s_p_s[n * 3 + 1]), PRIM_INT__s_s_f_s[n * 10 + 6]);
321 
322                         PRIM_INT__s_s_f_s[n * 10 + 7] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 3]);
323                         PRIM_INT__s_s_f_s[n * 10 + 7] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__s_s_f_s[n * 10 + 7]);
324 
325                         PRIM_INT__s_s_f_s[n * 10 + 8] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_d_s[n * 6 + 5]);
326                         PRIM_INT__s_s_f_s[n * 10 + 8] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__s_s_f_s[n * 10 + 8]);
327 
328                         PRIM_INT__s_s_f_s[n * 10 + 9] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 5]);
329                         PRIM_INT__s_s_f_s[n * 10 + 9] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__s_s_f_s[n * 10 + 9]);
330                         PRIM_INT__s_s_f_s[n * 10 + 9] = SIMINT_FMADD( vrr_const_2_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__s_s_p_s[n * 3 + 2]), PRIM_INT__s_s_f_s[n * 10 + 9]);
331 
332                     }
333 
334 
335                     VRR_K_s_s_g_s(
336                             PRIM_INT__s_s_g_s,
337                             PRIM_INT__s_s_f_s,
338                             PRIM_INT__s_s_d_s,
339                             Q_PA,
340                             a_over_q,
341                             aoq_PQ,
342                             one_over_2q,
343                             6);
344 
345 
346                     VRR_I_p_s_g_s(
347                             PRIM_INT__p_s_g_s,
348                             PRIM_INT__s_s_g_s,
349                             PRIM_INT__s_s_f_s,
350                             P_PA,
351                             aop_PQ,
352                             one_over_2pq,
353                             4);
354 
355 
356                     VRR_I_p_s_f_s(
357                             PRIM_INT__p_s_f_s,
358                             PRIM_INT__s_s_f_s,
359                             PRIM_INT__s_s_d_s,
360                             P_PA,
361                             aop_PQ,
362                             one_over_2pq,
363                             4);
364 
365 
366                     ostei_general_vrr_I(2, 0, 4, 0, 3,
367                             one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
368                             PRIM_INT__p_s_g_s, PRIM_INT__s_s_g_s, NULL, PRIM_INT__p_s_f_s, NULL, PRIM_INT__d_s_g_s);
369 
370 
371 
372                     // Forming PRIM_INT__p_s_d_s[4 * 18];
373                     for(n = 0; n < 4; ++n)  // loop over orders of auxiliary function
374                     {
375 
376                         PRIM_INT__p_s_d_s[n * 18 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_d_s[n * 6 + 0]);
377                         PRIM_INT__p_s_d_s[n * 18 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__p_s_d_s[n * 18 + 0]);
378                         PRIM_INT__p_s_d_s[n * 18 + 0] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__p_s_d_s[n * 18 + 0]);
379 
380                         PRIM_INT__p_s_d_s[n * 18 + 1] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_d_s[n * 6 + 1]);
381                         PRIM_INT__p_s_d_s[n * 18 + 1] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 1], PRIM_INT__p_s_d_s[n * 18 + 1]);
382                         PRIM_INT__p_s_d_s[n * 18 + 1] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__p_s_d_s[n * 18 + 1]);
383 
384                         PRIM_INT__p_s_d_s[n * 18 + 2] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_d_s[n * 6 + 2]);
385                         PRIM_INT__p_s_d_s[n * 18 + 2] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 2], PRIM_INT__p_s_d_s[n * 18 + 2]);
386                         PRIM_INT__p_s_d_s[n * 18 + 2] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__p_s_d_s[n * 18 + 2]);
387 
388                         PRIM_INT__p_s_d_s[n * 18 + 3] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_d_s[n * 6 + 3]);
389                         PRIM_INT__p_s_d_s[n * 18 + 3] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__p_s_d_s[n * 18 + 3]);
390 
391                         PRIM_INT__p_s_d_s[n * 18 + 4] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_d_s[n * 6 + 4]);
392                         PRIM_INT__p_s_d_s[n * 18 + 4] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 4], PRIM_INT__p_s_d_s[n * 18 + 4]);
393 
394                         PRIM_INT__p_s_d_s[n * 18 + 5] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_d_s[n * 6 + 5]);
395                         PRIM_INT__p_s_d_s[n * 18 + 5] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__p_s_d_s[n * 18 + 5]);
396 
397                         PRIM_INT__p_s_d_s[n * 18 + 6] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_d_s[n * 6 + 0]);
398                         PRIM_INT__p_s_d_s[n * 18 + 6] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__p_s_d_s[n * 18 + 6]);
399 
400                         PRIM_INT__p_s_d_s[n * 18 + 7] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_d_s[n * 6 + 1]);
401                         PRIM_INT__p_s_d_s[n * 18 + 7] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 1], PRIM_INT__p_s_d_s[n * 18 + 7]);
402                         PRIM_INT__p_s_d_s[n * 18 + 7] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__p_s_d_s[n * 18 + 7]);
403 
404                         PRIM_INT__p_s_d_s[n * 18 + 8] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_d_s[n * 6 + 2]);
405                         PRIM_INT__p_s_d_s[n * 18 + 8] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 2], PRIM_INT__p_s_d_s[n * 18 + 8]);
406 
407                         PRIM_INT__p_s_d_s[n * 18 + 9] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_d_s[n * 6 + 3]);
408                         PRIM_INT__p_s_d_s[n * 18 + 9] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__p_s_d_s[n * 18 + 9]);
409                         PRIM_INT__p_s_d_s[n * 18 + 9] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__p_s_d_s[n * 18 + 9]);
410 
411                         PRIM_INT__p_s_d_s[n * 18 + 10] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_d_s[n * 6 + 4]);
412                         PRIM_INT__p_s_d_s[n * 18 + 10] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 4], PRIM_INT__p_s_d_s[n * 18 + 10]);
413                         PRIM_INT__p_s_d_s[n * 18 + 10] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__p_s_d_s[n * 18 + 10]);
414 
415                         PRIM_INT__p_s_d_s[n * 18 + 11] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_d_s[n * 6 + 5]);
416                         PRIM_INT__p_s_d_s[n * 18 + 11] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__p_s_d_s[n * 18 + 11]);
417 
418                         PRIM_INT__p_s_d_s[n * 18 + 12] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_d_s[n * 6 + 0]);
419                         PRIM_INT__p_s_d_s[n * 18 + 12] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__p_s_d_s[n * 18 + 12]);
420 
421                         PRIM_INT__p_s_d_s[n * 18 + 13] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_d_s[n * 6 + 1]);
422                         PRIM_INT__p_s_d_s[n * 18 + 13] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 1], PRIM_INT__p_s_d_s[n * 18 + 13]);
423 
424                         PRIM_INT__p_s_d_s[n * 18 + 14] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_d_s[n * 6 + 2]);
425                         PRIM_INT__p_s_d_s[n * 18 + 14] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 2], PRIM_INT__p_s_d_s[n * 18 + 14]);
426                         PRIM_INT__p_s_d_s[n * 18 + 14] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__p_s_d_s[n * 18 + 14]);
427 
428                         PRIM_INT__p_s_d_s[n * 18 + 15] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_d_s[n * 6 + 3]);
429                         PRIM_INT__p_s_d_s[n * 18 + 15] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__p_s_d_s[n * 18 + 15]);
430 
431                         PRIM_INT__p_s_d_s[n * 18 + 16] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_d_s[n * 6 + 4]);
432                         PRIM_INT__p_s_d_s[n * 18 + 16] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 4], PRIM_INT__p_s_d_s[n * 18 + 16]);
433                         PRIM_INT__p_s_d_s[n * 18 + 16] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__p_s_d_s[n * 18 + 16]);
434 
435                         PRIM_INT__p_s_d_s[n * 18 + 17] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_d_s[n * 6 + 5]);
436                         PRIM_INT__p_s_d_s[n * 18 + 17] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__p_s_d_s[n * 18 + 17]);
437                         PRIM_INT__p_s_d_s[n * 18 + 17] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__p_s_d_s[n * 18 + 17]);
438 
439                     }
440 
441 
442                     VRR_I_d_s_f_s(
443                             PRIM_INT__d_s_f_s,
444                             PRIM_INT__p_s_f_s,
445                             PRIM_INT__s_s_f_s,
446                             PRIM_INT__p_s_d_s,
447                             P_PA,
448                             a_over_p,
449                             aop_PQ,
450                             one_over_2p,
451                             one_over_2pq,
452                             3);
453 
454 
455                     ostei_general_vrr_I(3, 0, 4, 0, 2,
456                             one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
457                             PRIM_INT__d_s_g_s, PRIM_INT__p_s_g_s, NULL, PRIM_INT__d_s_f_s, NULL, PRIM_INT__f_s_g_s);
458 
459 
460                     VRR_K_s_s_h_s(
461                             PRIM_INT__s_s_h_s,
462                             PRIM_INT__s_s_g_s,
463                             PRIM_INT__s_s_f_s,
464                             Q_PA,
465                             a_over_q,
466                             aoq_PQ,
467                             one_over_2q,
468                             5);
469 
470 
471                     ostei_general_vrr_I(1, 0, 5, 0, 4,
472                             one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
473                             PRIM_INT__s_s_h_s, NULL, NULL, PRIM_INT__s_s_g_s, NULL, PRIM_INT__p_s_h_s);
474 
475 
476                     ostei_general_vrr_I(2, 0, 5, 0, 3,
477                             one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
478                             PRIM_INT__p_s_h_s, PRIM_INT__s_s_h_s, NULL, PRIM_INT__p_s_g_s, NULL, PRIM_INT__d_s_h_s);
479 
480 
481                     ostei_general_vrr_I(3, 0, 5, 0, 2,
482                             one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
483                             PRIM_INT__d_s_h_s, PRIM_INT__p_s_h_s, NULL, PRIM_INT__d_s_g_s, NULL, PRIM_INT__f_s_h_s);
484 
485 
486 
487                     // Forming PRIM_INT__p_s_p_s[4 * 9];
488                     for(n = 0; n < 4; ++n)  // loop over orders of auxiliary function
489                     {
490 
491                         PRIM_INT__p_s_p_s[n * 9 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_p_s[n * 3 + 0]);
492                         PRIM_INT__p_s_p_s[n * 9 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__p_s_p_s[n * 9 + 0]);
493                         PRIM_INT__p_s_p_s[n * 9 + 0] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_p_s[n * 9 + 0]);
494 
495                         PRIM_INT__p_s_p_s[n * 9 + 1] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_p_s[n * 3 + 1]);
496                         PRIM_INT__p_s_p_s[n * 9 + 1] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__p_s_p_s[n * 9 + 1]);
497 
498                         PRIM_INT__p_s_p_s[n * 9 + 2] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_p_s[n * 3 + 2]);
499                         PRIM_INT__p_s_p_s[n * 9 + 2] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__p_s_p_s[n * 9 + 2]);
500 
501                         PRIM_INT__p_s_p_s[n * 9 + 3] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_p_s[n * 3 + 0]);
502                         PRIM_INT__p_s_p_s[n * 9 + 3] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__p_s_p_s[n * 9 + 3]);
503 
504                         PRIM_INT__p_s_p_s[n * 9 + 4] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_p_s[n * 3 + 1]);
505                         PRIM_INT__p_s_p_s[n * 9 + 4] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__p_s_p_s[n * 9 + 4]);
506                         PRIM_INT__p_s_p_s[n * 9 + 4] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_p_s[n * 9 + 4]);
507 
508                         PRIM_INT__p_s_p_s[n * 9 + 5] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_p_s[n * 3 + 2]);
509                         PRIM_INT__p_s_p_s[n * 9 + 5] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__p_s_p_s[n * 9 + 5]);
510 
511                         PRIM_INT__p_s_p_s[n * 9 + 6] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_p_s[n * 3 + 0]);
512                         PRIM_INT__p_s_p_s[n * 9 + 6] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__p_s_p_s[n * 9 + 6]);
513 
514                         PRIM_INT__p_s_p_s[n * 9 + 7] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_p_s[n * 3 + 1]);
515                         PRIM_INT__p_s_p_s[n * 9 + 7] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__p_s_p_s[n * 9 + 7]);
516 
517                         PRIM_INT__p_s_p_s[n * 9 + 8] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_p_s[n * 3 + 2]);
518                         PRIM_INT__p_s_p_s[n * 9 + 8] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__p_s_p_s[n * 9 + 8]);
519                         PRIM_INT__p_s_p_s[n * 9 + 8] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_p_s[n * 9 + 8]);
520 
521                     }
522 
523 
524                     VRR_I_d_s_d_s(
525                             PRIM_INT__d_s_d_s,
526                             PRIM_INT__p_s_d_s,
527                             PRIM_INT__s_s_d_s,
528                             PRIM_INT__p_s_p_s,
529                             P_PA,
530                             a_over_p,
531                             aop_PQ,
532                             one_over_2p,
533                             one_over_2pq,
534                             3);
535 
536 
537                     ostei_general_vrr_I(3, 0, 3, 0, 2,
538                             one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
539                             PRIM_INT__d_s_f_s, PRIM_INT__p_s_f_s, NULL, PRIM_INT__d_s_d_s, NULL, PRIM_INT__f_s_f_s);
540 
541 
542                     ostei_general_vrr_I(4, 0, 4, 0, 1,
543                             one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
544                             PRIM_INT__f_s_g_s, PRIM_INT__d_s_g_s, NULL, PRIM_INT__f_s_f_s, NULL, PRIM_INT__g_s_g_s);
545 
546 
547                     ostei_general_vrr_I(4, 0, 5, 0, 1,
548                             one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
549                             PRIM_INT__f_s_h_s, PRIM_INT__d_s_h_s, NULL, PRIM_INT__f_s_g_s, NULL, PRIM_INT__g_s_h_s);
550 
551 
552 
553 
554                     ////////////////////////////////////
555                     // Accumulate contracted integrals
556                     ////////////////////////////////////
557                     if(lastoffset == 0)
558                     {
559                         contract_all(150, PRIM_INT__f_s_g_s, PRIM_PTR_INT__f_s_g_s);
560                         contract_all(210, PRIM_INT__f_s_h_s, PRIM_PTR_INT__f_s_h_s);
561                         contract_all(225, PRIM_INT__g_s_g_s, PRIM_PTR_INT__g_s_g_s);
562                         contract_all(315, PRIM_INT__g_s_h_s, PRIM_PTR_INT__g_s_h_s);
563                     }
564                     else
565                     {
566                         contract(150, shelloffsets, PRIM_INT__f_s_g_s, PRIM_PTR_INT__f_s_g_s);
567                         contract(210, shelloffsets, PRIM_INT__f_s_h_s, PRIM_PTR_INT__f_s_h_s);
568                         contract(225, shelloffsets, PRIM_INT__g_s_g_s, PRIM_PTR_INT__g_s_g_s);
569                         contract(315, shelloffsets, PRIM_INT__g_s_h_s, PRIM_PTR_INT__g_s_h_s);
570                         PRIM_PTR_INT__f_s_g_s += lastoffset*150;
571                         PRIM_PTR_INT__f_s_h_s += lastoffset*210;
572                         PRIM_PTR_INT__g_s_g_s += lastoffset*225;
573                         PRIM_PTR_INT__g_s_h_s += lastoffset*315;
574                     }
575 
576                 }  // close loop over j
577             }  // close loop over i
578 
579             //Advance to the next batch
580             jstart = SIMINT_SIMD_ROUND(jend);
581 
582             //////////////////////////////////////////////
583             // Contracted integrals: Horizontal recurrance
584             //////////////////////////////////////////////
585 
586 
587             const double hAB[3] = { P.AB_x[ab], P.AB_y[ab], P.AB_z[ab] };
588 
589 
590             for(abcd = 0; abcd < nshellbatch; ++abcd, ++real_abcd)
591             {
592                 const double hCD[3] = { Q.AB_x[cd+abcd], Q.AB_y[cd+abcd], Q.AB_z[cd+abcd] };
593 
594                 // set up HRR pointers
595                 double const * restrict HRR_INT__f_s_g_s = INT__f_s_g_s + abcd * 150;
596                 double const * restrict HRR_INT__f_s_h_s = INT__f_s_h_s + abcd * 210;
597                 double const * restrict HRR_INT__g_s_g_s = INT__g_s_g_s + abcd * 225;
598                 double const * restrict HRR_INT__g_s_h_s = INT__g_s_h_s + abcd * 315;
599                 double * restrict HRR_INT__f_p_g_p = INT__f_p_g_p + real_abcd * 1350;
600 
601                 // form INT__f_p_g_s
602                 HRR_J_f_p(
603                     HRR_INT__f_p_g_s,
604                     HRR_INT__f_s_g_s,
605                     HRR_INT__g_s_g_s,
606                     hAB, 15);
607 
608                 // form INT__f_p_h_s
609                 HRR_J_f_p(
610                     HRR_INT__f_p_h_s,
611                     HRR_INT__f_s_h_s,
612                     HRR_INT__g_s_h_s,
613                     hAB, 21);
614 
615                 // form INT__f_p_g_p
616                 HRR_L_g_p(
617                     HRR_INT__f_p_g_p,
618                     HRR_INT__f_p_g_s,
619                     HRR_INT__f_p_h_s,
620                     hCD, 30);
621 
622 
623             }  // close HRR loop
624 
625 
626         }   // close loop cdbatch
627 
628         istart = iend;
629     }  // close loop over ab
630 
631     return P.nshell12_clip * Q.nshell12_clip;
632 }
633 
ostei_p_f_g_p(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__p_f_g_p)634 int ostei_p_f_g_p(struct simint_multi_shellpair const P,
635                   struct simint_multi_shellpair const Q,
636                   double screen_tol,
637                   double * const restrict work,
638                   double * const restrict INT__p_f_g_p)
639 {
640     double P_AB[3*P.nshell12];
641     struct simint_multi_shellpair P_tmp = P;
642     P_tmp.PA_x = P.PB_x;  P_tmp.PA_y = P.PB_y;  P_tmp.PA_z = P.PB_z;
643     P_tmp.PB_x = P.PA_x;  P_tmp.PB_y = P.PA_y;  P_tmp.PB_z = P.PA_z;
644     P_tmp.AB_x = P_AB;
645     P_tmp.AB_y = P_AB + P.nshell12;
646     P_tmp.AB_z = P_AB + 2*P.nshell12;
647 
648     for(int i = 0; i < P.nshell12; i++)
649     {
650         P_tmp.AB_x[i] = -P.AB_x[i];
651         P_tmp.AB_y[i] = -P.AB_y[i];
652         P_tmp.AB_z[i] = -P.AB_z[i];
653     }
654 
655     int ret = ostei_f_p_g_p(P_tmp, Q, screen_tol, work, INT__p_f_g_p);
656     double buffer[1350] SIMINT_ALIGN_ARRAY_DBL;
657 
658     for(int q = 0; q < ret; q++)
659     {
660         int idx = 0;
661         for(int a = 0; a < 3; ++a)
662         for(int b = 0; b < 10; ++b)
663         for(int c = 0; c < 15; ++c)
664         for(int d = 0; d < 3; ++d)
665             buffer[idx++] = INT__p_f_g_p[q*1350+b*135+a*45+c*3+d];
666 
667         memcpy(INT__p_f_g_p+q*1350, buffer, 1350*sizeof(double));
668     }
669 
670     return ret;
671 }
672 
ostei_f_p_p_g(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__f_p_p_g)673 int ostei_f_p_p_g(struct simint_multi_shellpair const P,
674                   struct simint_multi_shellpair const Q,
675                   double screen_tol,
676                   double * const restrict work,
677                   double * const restrict INT__f_p_p_g)
678 {
679     double Q_AB[3*Q.nshell12];
680     struct simint_multi_shellpair Q_tmp = Q;
681     Q_tmp.PA_x = Q.PB_x;  Q_tmp.PA_y = Q.PB_y;  Q_tmp.PA_z = Q.PB_z;
682     Q_tmp.PB_x = Q.PA_x;  Q_tmp.PB_y = Q.PA_y;  Q_tmp.PB_z = Q.PA_z;
683     Q_tmp.AB_x = Q_AB;
684     Q_tmp.AB_y = Q_AB + Q.nshell12;
685     Q_tmp.AB_z = Q_AB + 2*Q.nshell12;
686 
687     for(int i = 0; i < Q.nshell12; i++)
688     {
689         Q_tmp.AB_x[i] = -Q.AB_x[i];
690         Q_tmp.AB_y[i] = -Q.AB_y[i];
691         Q_tmp.AB_z[i] = -Q.AB_z[i];
692     }
693 
694     int ret = ostei_f_p_g_p(P, Q_tmp, screen_tol, work, INT__f_p_p_g);
695     double buffer[1350] SIMINT_ALIGN_ARRAY_DBL;
696 
697     for(int q = 0; q < ret; q++)
698     {
699         int idx = 0;
700         for(int a = 0; a < 10; ++a)
701         for(int b = 0; b < 3; ++b)
702         for(int c = 0; c < 3; ++c)
703         for(int d = 0; d < 15; ++d)
704             buffer[idx++] = INT__f_p_p_g[q*1350+a*135+b*45+d*3+c];
705 
706         memcpy(INT__f_p_p_g+q*1350, buffer, 1350*sizeof(double));
707     }
708 
709     return ret;
710 }
711 
ostei_p_f_p_g(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__p_f_p_g)712 int ostei_p_f_p_g(struct simint_multi_shellpair const P,
713                   struct simint_multi_shellpair const Q,
714                   double screen_tol,
715                   double * const restrict work,
716                   double * const restrict INT__p_f_p_g)
717 {
718     double P_AB[3*P.nshell12];
719     struct simint_multi_shellpair P_tmp = P;
720     P_tmp.PA_x = P.PB_x;  P_tmp.PA_y = P.PB_y;  P_tmp.PA_z = P.PB_z;
721     P_tmp.PB_x = P.PA_x;  P_tmp.PB_y = P.PA_y;  P_tmp.PB_z = P.PA_z;
722     P_tmp.AB_x = P_AB;
723     P_tmp.AB_y = P_AB + P.nshell12;
724     P_tmp.AB_z = P_AB + 2*P.nshell12;
725 
726     for(int i = 0; i < P.nshell12; i++)
727     {
728         P_tmp.AB_x[i] = -P.AB_x[i];
729         P_tmp.AB_y[i] = -P.AB_y[i];
730         P_tmp.AB_z[i] = -P.AB_z[i];
731     }
732 
733     double Q_AB[3*Q.nshell12];
734     struct simint_multi_shellpair Q_tmp = Q;
735     Q_tmp.PA_x = Q.PB_x;  Q_tmp.PA_y = Q.PB_y;  Q_tmp.PA_z = Q.PB_z;
736     Q_tmp.PB_x = Q.PA_x;  Q_tmp.PB_y = Q.PA_y;  Q_tmp.PB_z = Q.PA_z;
737     Q_tmp.AB_x = Q_AB;
738     Q_tmp.AB_y = Q_AB + Q.nshell12;
739     Q_tmp.AB_z = Q_AB + 2*Q.nshell12;
740 
741     for(int i = 0; i < Q.nshell12; i++)
742     {
743         Q_tmp.AB_x[i] = -Q.AB_x[i];
744         Q_tmp.AB_y[i] = -Q.AB_y[i];
745         Q_tmp.AB_z[i] = -Q.AB_z[i];
746     }
747 
748     int ret = ostei_f_p_g_p(P_tmp, Q_tmp, screen_tol, work, INT__p_f_p_g);
749     double buffer[1350] SIMINT_ALIGN_ARRAY_DBL;
750 
751     for(int q = 0; q < ret; q++)
752     {
753         int idx = 0;
754         for(int a = 0; a < 3; ++a)
755         for(int b = 0; b < 10; ++b)
756         for(int c = 0; c < 3; ++c)
757         for(int d = 0; d < 15; ++d)
758             buffer[idx++] = INT__p_f_p_g[q*1350+b*135+a*45+d*3+c];
759 
760         memcpy(INT__p_f_p_g+q*1350, buffer, 1350*sizeof(double));
761     }
762 
763     return ret;
764 }
765 
766