1 #include "simint/boys/boys.h"
2 #include "simint/ostei/gen/ostei_generated.h"
3 #include "simint/vectorization/vectorization.h"
4 #include <math.h>
5 #include <string.h>
6 
7 
ostei_d_s_f_p(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__d_s_f_p)8 int ostei_d_s_f_p(struct simint_multi_shellpair const P,
9                   struct simint_multi_shellpair const Q,
10                   double screen_tol,
11                   double * const restrict work,
12                   double * const restrict INT__d_s_f_p)
13 {
14 
15     SIMINT_ASSUME_ALIGN_DBL(work);
16     SIMINT_ASSUME_ALIGN_DBL(INT__d_s_f_p);
17     int ab, cd, abcd;
18     int istart, jstart;
19     int iprimcd, nprim_icd, icd;
20     const int check_screen = (screen_tol > 0.0);
21     int i, j;
22     int n;
23     int not_screened;
24     int real_abcd;
25     int ibra;
26 
27     // partition workspace
28     double * const INT__d_s_f_s = work + (SIMINT_NSHELL_SIMD * 0);
29     double * const INT__d_s_g_s = work + (SIMINT_NSHELL_SIMD * 60);
30     SIMINT_DBLTYPE * const primwork = (SIMINT_DBLTYPE *)(work + SIMINT_NSHELL_SIMD*150);
31     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_s_s = primwork + 0;
32     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_p_s = primwork + 7;
33     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_d_s = primwork + 25;
34     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_f_s = primwork + 55;
35     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_g_s = primwork + 95;
36     SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_d_s = primwork + 140;
37     SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_f_s = primwork + 176;
38     SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_g_s = primwork + 236;
39     SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_f_s = primwork + 326;
40     SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_g_s = primwork + 386;
41     double * const hrrwork = (double *)(primwork + 476);
42 
43 
44     // Create constants
45     const SIMINT_DBLTYPE const_1 = SIMINT_DBLSET1(1);
46     const SIMINT_DBLTYPE const_2 = SIMINT_DBLSET1(2);
47     const SIMINT_DBLTYPE const_3 = SIMINT_DBLSET1(3);
48     const SIMINT_DBLTYPE const_4 = SIMINT_DBLSET1(4);
49     const SIMINT_DBLTYPE one_half = SIMINT_DBLSET1(0.5);
50 
51 
52     ////////////////////////////////////////
53     // Loop over shells and primitives
54     ////////////////////////////////////////
55 
56     real_abcd = 0;
57     istart = 0;
58     for(ab = 0; ab < P.nshell12_clip; ++ab)
59     {
60         const int iend = istart + P.nprim12[ab];
61 
62         cd = 0;
63         jstart = 0;
64 
65         for(cd = 0; cd < Q.nshell12_clip; cd += SIMINT_NSHELL_SIMD)
66         {
67             const int nshellbatch = ((cd + SIMINT_NSHELL_SIMD) > Q.nshell12_clip) ? Q.nshell12_clip - cd : SIMINT_NSHELL_SIMD;
68             int jend = jstart;
69             for(i = 0; i < nshellbatch; i++)
70                 jend += Q.nprim12[cd+i];
71 
72             // Clear the beginning of the workspace (where we are accumulating integrals)
73             memset(work, 0, SIMINT_NSHELL_SIMD * 150 * sizeof(double));
74             abcd = 0;
75 
76 
77             for(i = istart; i < iend; ++i)
78             {
79                 SIMINT_DBLTYPE bra_screen_max;  // only used if check_screen
80 
81                 if(check_screen)
82                 {
83                     // Skip this whole thing if always insignificant
84                     if((P.screen[i] * Q.screen_max) < screen_tol)
85                         continue;
86                     bra_screen_max = SIMINT_DBLSET1(P.screen[i]);
87                 }
88 
89                 icd = 0;
90                 iprimcd = 0;
91                 nprim_icd = Q.nprim12[cd];
92                 double * restrict PRIM_PTR_INT__d_s_f_s = INT__d_s_f_s + abcd * 60;
93                 double * restrict PRIM_PTR_INT__d_s_g_s = INT__d_s_g_s + abcd * 90;
94 
95 
96 
97                 // Load these one per loop over i
98                 const SIMINT_DBLTYPE P_alpha = SIMINT_DBLSET1(P.alpha[i]);
99                 const SIMINT_DBLTYPE P_prefac = SIMINT_DBLSET1(P.prefac[i]);
100                 const SIMINT_DBLTYPE Pxyz[3] = { SIMINT_DBLSET1(P.x[i]), SIMINT_DBLSET1(P.y[i]), SIMINT_DBLSET1(P.z[i]) };
101 
102                 const SIMINT_DBLTYPE P_PA[3] = { SIMINT_DBLSET1(P.PA_x[i]), SIMINT_DBLSET1(P.PA_y[i]), SIMINT_DBLSET1(P.PA_z[i]) };
103 
104                 for(j = jstart; j < jend; j += SIMINT_SIMD_LEN)
105                 {
106                     // calculate the shell offsets
107                     // these are the offset from the shell pointed to by cd
108                     // for each element
109                     int shelloffsets[SIMINT_SIMD_LEN] = {0};
110                     int lastoffset = 0;
111                     const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j));
112 
113                     if((iprimcd + SIMINT_SIMD_LEN) >= nprim_icd)
114                     {
115                         // Handle if the first element of the vector is a new shell
116                         if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
117                         {
118                             nprim_icd += Q.nprim12[cd + (++icd)];
119                             PRIM_PTR_INT__d_s_f_s += 60;
120                             PRIM_PTR_INT__d_s_g_s += 90;
121                         }
122                         iprimcd++;
123                         for(n = 1; n < SIMINT_SIMD_LEN; ++n)
124                         {
125                             if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
126                             {
127                                 shelloffsets[n] = shelloffsets[n-1] + 1;
128                                 lastoffset++;
129                                 nprim_icd += Q.nprim12[cd + (++icd)];
130                             }
131                             else
132                                 shelloffsets[n] = shelloffsets[n-1];
133                             iprimcd++;
134                         }
135                     }
136                     else
137                         iprimcd += SIMINT_SIMD_LEN;
138 
139                     // Do we have to compute this vector (or has it been screened out)?
140                     // (not_screened != 0 means we have to do this vector)
141                     if(check_screen)
142                     {
143                         const double vmax = vector_max(SIMINT_MUL(bra_screen_max, SIMINT_DBLLOAD(Q.screen, j)));
144                         if(vmax < screen_tol)
145                         {
146                             PRIM_PTR_INT__d_s_f_s += lastoffset*60;
147                             PRIM_PTR_INT__d_s_g_s += lastoffset*90;
148                             continue;
149                         }
150                     }
151 
152                     const SIMINT_DBLTYPE Q_alpha = SIMINT_DBLLOAD(Q.alpha, j);
153                     const SIMINT_DBLTYPE PQalpha_mul = SIMINT_MUL(P_alpha, Q_alpha);
154                     const SIMINT_DBLTYPE PQalpha_sum = SIMINT_ADD(P_alpha, Q_alpha);
155                     const SIMINT_DBLTYPE one_over_PQalpha_sum = SIMINT_DIV(const_1, PQalpha_sum);
156 
157 
158                     /* construct R2 = (Px - Qx)**2 + (Py - Qy)**2 + (Pz -Qz)**2 */
159                     SIMINT_DBLTYPE PQ[3];
160                     PQ[0] = SIMINT_SUB(Pxyz[0], SIMINT_DBLLOAD(Q.x, j));
161                     PQ[1] = SIMINT_SUB(Pxyz[1], SIMINT_DBLLOAD(Q.y, j));
162                     PQ[2] = SIMINT_SUB(Pxyz[2], SIMINT_DBLLOAD(Q.z, j));
163                     SIMINT_DBLTYPE R2 = SIMINT_MUL(PQ[0], PQ[0]);
164                     R2 = SIMINT_FMADD(PQ[1], PQ[1], R2);
165                     R2 = SIMINT_FMADD(PQ[2], PQ[2], R2);
166 
167                     const SIMINT_DBLTYPE alpha = SIMINT_MUL(PQalpha_mul, one_over_PQalpha_sum); // alpha from MEST
168                     const SIMINT_DBLTYPE one_over_p = SIMINT_DIV(const_1, P_alpha);
169                     const SIMINT_DBLTYPE one_over_q = SIMINT_DIV(const_1, Q_alpha);
170                     const SIMINT_DBLTYPE one_over_2p = SIMINT_MUL(one_half, one_over_p);
171                     const SIMINT_DBLTYPE one_over_2q = SIMINT_MUL(one_half, one_over_q);
172                     const SIMINT_DBLTYPE one_over_2pq = SIMINT_MUL(one_half, one_over_PQalpha_sum);
173                     const SIMINT_DBLTYPE Q_PA[3] = { SIMINT_DBLLOAD(Q.PA_x, j), SIMINT_DBLLOAD(Q.PA_y, j), SIMINT_DBLLOAD(Q.PA_z, j) };
174 
175                     // NOTE: Minus sign!
176                     const SIMINT_DBLTYPE a_over_p = SIMINT_MUL(SIMINT_NEG(alpha), one_over_p);
177                     SIMINT_DBLTYPE aop_PQ[3];
178                     aop_PQ[0] = SIMINT_MUL(a_over_p, PQ[0]);
179                     aop_PQ[1] = SIMINT_MUL(a_over_p, PQ[1]);
180                     aop_PQ[2] = SIMINT_MUL(a_over_p, PQ[2]);
181 
182                     SIMINT_DBLTYPE a_over_q = SIMINT_MUL(alpha, one_over_q);
183                     SIMINT_DBLTYPE aoq_PQ[3];
184                     aoq_PQ[0] = SIMINT_MUL(a_over_q, PQ[0]);
185                     aoq_PQ[1] = SIMINT_MUL(a_over_q, PQ[1]);
186                     aoq_PQ[2] = SIMINT_MUL(a_over_q, PQ[2]);
187                     // Put a minus sign here so we don't have to in RR routines
188                     a_over_q = SIMINT_NEG(a_over_q);
189 
190 
191                     //////////////////////////////////////////////
192                     // Fjt function section
193                     // Maximum v value: 6
194                     //////////////////////////////////////////////
195                     // The parameter to the Fjt function
196                     const SIMINT_DBLTYPE F_x = SIMINT_MUL(R2, alpha);
197 
198 
199                     const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j);
200 
201 
202                     boys_F_split(PRIM_INT__s_s_s_s, F_x, 6);
203                     SIMINT_DBLTYPE prefac = SIMINT_SQRT(one_over_PQalpha_sum);
204                     prefac = SIMINT_MUL(SIMINT_MUL(P_prefac, Q_prefac), prefac);
205                     for(n = 0; n <= 6; n++)
206                         PRIM_INT__s_s_s_s[n] = SIMINT_MUL(PRIM_INT__s_s_s_s[n], prefac);
207 
208                     //////////////////////////////////////////////
209                     // Primitive integrals: Vertical recurrance
210                     //////////////////////////////////////////////
211 
212                     const SIMINT_DBLTYPE vrr_const_1_over_2p = one_over_2p;
213                     const SIMINT_DBLTYPE vrr_const_1_over_2q = one_over_2q;
214                     const SIMINT_DBLTYPE vrr_const_2_over_2q = SIMINT_MUL(const_2, one_over_2q);
215                     const SIMINT_DBLTYPE vrr_const_3_over_2q = SIMINT_MUL(const_3, one_over_2q);
216                     const SIMINT_DBLTYPE vrr_const_1_over_2pq = one_over_2pq;
217                     const SIMINT_DBLTYPE vrr_const_2_over_2pq = SIMINT_MUL(const_2, one_over_2pq);
218                     const SIMINT_DBLTYPE vrr_const_3_over_2pq = SIMINT_MUL(const_3, one_over_2pq);
219                     const SIMINT_DBLTYPE vrr_const_4_over_2pq = SIMINT_MUL(const_4, one_over_2pq);
220 
221 
222 
223                     // Forming PRIM_INT__s_s_p_s[6 * 3];
224                     for(n = 0; n < 6; ++n)  // loop over orders of auxiliary function
225                     {
226 
227                         PRIM_INT__s_s_p_s[n * 3 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_s_s[n * 1 + 0]);
228                         PRIM_INT__s_s_p_s[n * 3 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 0]);
229 
230                         PRIM_INT__s_s_p_s[n * 3 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_s_s[n * 1 + 0]);
231                         PRIM_INT__s_s_p_s[n * 3 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 1]);
232 
233                         PRIM_INT__s_s_p_s[n * 3 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_s_s[n * 1 + 0]);
234                         PRIM_INT__s_s_p_s[n * 3 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 2]);
235 
236                     }
237 
238 
239 
240                     // Forming PRIM_INT__s_s_d_s[5 * 6];
241                     for(n = 0; n < 5; ++n)  // loop over orders of auxiliary function
242                     {
243 
244                         PRIM_INT__s_s_d_s[n * 6 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_p_s[n * 3 + 0]);
245                         PRIM_INT__s_s_d_s[n * 6 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_d_s[n * 6 + 0]);
246                         PRIM_INT__s_s_d_s[n * 6 + 0] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_d_s[n * 6 + 0]);
247 
248                         PRIM_INT__s_s_d_s[n * 6 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_p_s[n * 3 + 0]);
249                         PRIM_INT__s_s_d_s[n * 6 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_d_s[n * 6 + 1]);
250 
251                         PRIM_INT__s_s_d_s[n * 6 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_p_s[n * 3 + 0]);
252                         PRIM_INT__s_s_d_s[n * 6 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_d_s[n * 6 + 2]);
253 
254                         PRIM_INT__s_s_d_s[n * 6 + 3] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_p_s[n * 3 + 1]);
255                         PRIM_INT__s_s_d_s[n * 6 + 3] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__s_s_d_s[n * 6 + 3]);
256                         PRIM_INT__s_s_d_s[n * 6 + 3] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_d_s[n * 6 + 3]);
257 
258                         PRIM_INT__s_s_d_s[n * 6 + 4] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_p_s[n * 3 + 1]);
259                         PRIM_INT__s_s_d_s[n * 6 + 4] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__s_s_d_s[n * 6 + 4]);
260 
261                         PRIM_INT__s_s_d_s[n * 6 + 5] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_p_s[n * 3 + 2]);
262                         PRIM_INT__s_s_d_s[n * 6 + 5] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__s_s_d_s[n * 6 + 5]);
263                         PRIM_INT__s_s_d_s[n * 6 + 5] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_d_s[n * 6 + 5]);
264 
265                     }
266 
267 
268 
269                     // Forming PRIM_INT__s_s_f_s[4 * 10];
270                     for(n = 0; n < 4; ++n)  // loop over orders of auxiliary function
271                     {
272 
273                         PRIM_INT__s_s_f_s[n * 10 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_d_s[n * 6 + 0]);
274                         PRIM_INT__s_s_f_s[n * 10 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__s_s_f_s[n * 10 + 0]);
275                         PRIM_INT__s_s_f_s[n * 10 + 0] = SIMINT_FMADD( vrr_const_2_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_p_s[n * 3 + 0]), PRIM_INT__s_s_f_s[n * 10 + 0]);
276 
277                         PRIM_INT__s_s_f_s[n * 10 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_d_s[n * 6 + 0]);
278                         PRIM_INT__s_s_f_s[n * 10 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__s_s_f_s[n * 10 + 1]);
279 
280                         PRIM_INT__s_s_f_s[n * 10 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 0]);
281                         PRIM_INT__s_s_f_s[n * 10 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__s_s_f_s[n * 10 + 2]);
282 
283                         PRIM_INT__s_s_f_s[n * 10 + 3] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_d_s[n * 6 + 3]);
284                         PRIM_INT__s_s_f_s[n * 10 + 3] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__s_s_f_s[n * 10 + 3]);
285 
286                         PRIM_INT__s_s_f_s[n * 10 + 4] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 1]);
287                         PRIM_INT__s_s_f_s[n * 10 + 4] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 1], PRIM_INT__s_s_f_s[n * 10 + 4]);
288 
289                         PRIM_INT__s_s_f_s[n * 10 + 5] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_d_s[n * 6 + 5]);
290                         PRIM_INT__s_s_f_s[n * 10 + 5] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__s_s_f_s[n * 10 + 5]);
291 
292                         PRIM_INT__s_s_f_s[n * 10 + 6] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_d_s[n * 6 + 3]);
293                         PRIM_INT__s_s_f_s[n * 10 + 6] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__s_s_f_s[n * 10 + 6]);
294                         PRIM_INT__s_s_f_s[n * 10 + 6] = SIMINT_FMADD( vrr_const_2_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__s_s_p_s[n * 3 + 1]), PRIM_INT__s_s_f_s[n * 10 + 6]);
295 
296                         PRIM_INT__s_s_f_s[n * 10 + 7] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 3]);
297                         PRIM_INT__s_s_f_s[n * 10 + 7] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__s_s_f_s[n * 10 + 7]);
298 
299                         PRIM_INT__s_s_f_s[n * 10 + 8] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_d_s[n * 6 + 5]);
300                         PRIM_INT__s_s_f_s[n * 10 + 8] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__s_s_f_s[n * 10 + 8]);
301 
302                         PRIM_INT__s_s_f_s[n * 10 + 9] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 5]);
303                         PRIM_INT__s_s_f_s[n * 10 + 9] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__s_s_f_s[n * 10 + 9]);
304                         PRIM_INT__s_s_f_s[n * 10 + 9] = SIMINT_FMADD( vrr_const_2_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__s_s_p_s[n * 3 + 2]), PRIM_INT__s_s_f_s[n * 10 + 9]);
305 
306                     }
307 
308 
309                     VRR_I_p_s_f_s(
310                             PRIM_INT__p_s_f_s,
311                             PRIM_INT__s_s_f_s,
312                             PRIM_INT__s_s_d_s,
313                             P_PA,
314                             aop_PQ,
315                             one_over_2pq,
316                             2);
317 
318 
319 
320                     // Forming PRIM_INT__p_s_d_s[2 * 18];
321                     for(n = 0; n < 2; ++n)  // loop over orders of auxiliary function
322                     {
323 
324                         PRIM_INT__p_s_d_s[n * 18 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_d_s[n * 6 + 0]);
325                         PRIM_INT__p_s_d_s[n * 18 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__p_s_d_s[n * 18 + 0]);
326                         PRIM_INT__p_s_d_s[n * 18 + 0] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__p_s_d_s[n * 18 + 0]);
327 
328                         PRIM_INT__p_s_d_s[n * 18 + 1] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_d_s[n * 6 + 1]);
329                         PRIM_INT__p_s_d_s[n * 18 + 1] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 1], PRIM_INT__p_s_d_s[n * 18 + 1]);
330                         PRIM_INT__p_s_d_s[n * 18 + 1] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__p_s_d_s[n * 18 + 1]);
331 
332                         PRIM_INT__p_s_d_s[n * 18 + 2] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_d_s[n * 6 + 2]);
333                         PRIM_INT__p_s_d_s[n * 18 + 2] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 2], PRIM_INT__p_s_d_s[n * 18 + 2]);
334                         PRIM_INT__p_s_d_s[n * 18 + 2] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__p_s_d_s[n * 18 + 2]);
335 
336                         PRIM_INT__p_s_d_s[n * 18 + 3] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_d_s[n * 6 + 3]);
337                         PRIM_INT__p_s_d_s[n * 18 + 3] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__p_s_d_s[n * 18 + 3]);
338 
339                         PRIM_INT__p_s_d_s[n * 18 + 4] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_d_s[n * 6 + 4]);
340                         PRIM_INT__p_s_d_s[n * 18 + 4] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 4], PRIM_INT__p_s_d_s[n * 18 + 4]);
341 
342                         PRIM_INT__p_s_d_s[n * 18 + 5] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_d_s[n * 6 + 5]);
343                         PRIM_INT__p_s_d_s[n * 18 + 5] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__p_s_d_s[n * 18 + 5]);
344 
345                         PRIM_INT__p_s_d_s[n * 18 + 6] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_d_s[n * 6 + 0]);
346                         PRIM_INT__p_s_d_s[n * 18 + 6] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__p_s_d_s[n * 18 + 6]);
347 
348                         PRIM_INT__p_s_d_s[n * 18 + 7] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_d_s[n * 6 + 1]);
349                         PRIM_INT__p_s_d_s[n * 18 + 7] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 1], PRIM_INT__p_s_d_s[n * 18 + 7]);
350                         PRIM_INT__p_s_d_s[n * 18 + 7] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__p_s_d_s[n * 18 + 7]);
351 
352                         PRIM_INT__p_s_d_s[n * 18 + 8] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_d_s[n * 6 + 2]);
353                         PRIM_INT__p_s_d_s[n * 18 + 8] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 2], PRIM_INT__p_s_d_s[n * 18 + 8]);
354 
355                         PRIM_INT__p_s_d_s[n * 18 + 9] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_d_s[n * 6 + 3]);
356                         PRIM_INT__p_s_d_s[n * 18 + 9] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__p_s_d_s[n * 18 + 9]);
357                         PRIM_INT__p_s_d_s[n * 18 + 9] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__p_s_d_s[n * 18 + 9]);
358 
359                         PRIM_INT__p_s_d_s[n * 18 + 10] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_d_s[n * 6 + 4]);
360                         PRIM_INT__p_s_d_s[n * 18 + 10] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 4], PRIM_INT__p_s_d_s[n * 18 + 10]);
361                         PRIM_INT__p_s_d_s[n * 18 + 10] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__p_s_d_s[n * 18 + 10]);
362 
363                         PRIM_INT__p_s_d_s[n * 18 + 11] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_d_s[n * 6 + 5]);
364                         PRIM_INT__p_s_d_s[n * 18 + 11] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__p_s_d_s[n * 18 + 11]);
365 
366                         PRIM_INT__p_s_d_s[n * 18 + 12] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_d_s[n * 6 + 0]);
367                         PRIM_INT__p_s_d_s[n * 18 + 12] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__p_s_d_s[n * 18 + 12]);
368 
369                         PRIM_INT__p_s_d_s[n * 18 + 13] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_d_s[n * 6 + 1]);
370                         PRIM_INT__p_s_d_s[n * 18 + 13] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 1], PRIM_INT__p_s_d_s[n * 18 + 13]);
371 
372                         PRIM_INT__p_s_d_s[n * 18 + 14] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_d_s[n * 6 + 2]);
373                         PRIM_INT__p_s_d_s[n * 18 + 14] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 2], PRIM_INT__p_s_d_s[n * 18 + 14]);
374                         PRIM_INT__p_s_d_s[n * 18 + 14] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__p_s_d_s[n * 18 + 14]);
375 
376                         PRIM_INT__p_s_d_s[n * 18 + 15] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_d_s[n * 6 + 3]);
377                         PRIM_INT__p_s_d_s[n * 18 + 15] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__p_s_d_s[n * 18 + 15]);
378 
379                         PRIM_INT__p_s_d_s[n * 18 + 16] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_d_s[n * 6 + 4]);
380                         PRIM_INT__p_s_d_s[n * 18 + 16] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 4], PRIM_INT__p_s_d_s[n * 18 + 16]);
381                         PRIM_INT__p_s_d_s[n * 18 + 16] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__p_s_d_s[n * 18 + 16]);
382 
383                         PRIM_INT__p_s_d_s[n * 18 + 17] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_d_s[n * 6 + 5]);
384                         PRIM_INT__p_s_d_s[n * 18 + 17] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__p_s_d_s[n * 18 + 17]);
385                         PRIM_INT__p_s_d_s[n * 18 + 17] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__p_s_d_s[n * 18 + 17]);
386 
387                     }
388 
389 
390                     VRR_I_d_s_f_s(
391                             PRIM_INT__d_s_f_s,
392                             PRIM_INT__p_s_f_s,
393                             PRIM_INT__s_s_f_s,
394                             PRIM_INT__p_s_d_s,
395                             P_PA,
396                             a_over_p,
397                             aop_PQ,
398                             one_over_2p,
399                             one_over_2pq,
400                             1);
401 
402 
403                     VRR_K_s_s_g_s(
404                             PRIM_INT__s_s_g_s,
405                             PRIM_INT__s_s_f_s,
406                             PRIM_INT__s_s_d_s,
407                             Q_PA,
408                             a_over_q,
409                             aoq_PQ,
410                             one_over_2q,
411                             3);
412 
413 
414                     VRR_I_p_s_g_s(
415                             PRIM_INT__p_s_g_s,
416                             PRIM_INT__s_s_g_s,
417                             PRIM_INT__s_s_f_s,
418                             P_PA,
419                             aop_PQ,
420                             one_over_2pq,
421                             2);
422 
423 
424                     ostei_general_vrr_I(2, 0, 4, 0, 1,
425                             one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
426                             PRIM_INT__p_s_g_s, PRIM_INT__s_s_g_s, NULL, PRIM_INT__p_s_f_s, NULL, PRIM_INT__d_s_g_s);
427 
428 
429 
430 
431                     ////////////////////////////////////
432                     // Accumulate contracted integrals
433                     ////////////////////////////////////
434                     if(lastoffset == 0)
435                     {
436                         contract_all(60, PRIM_INT__d_s_f_s, PRIM_PTR_INT__d_s_f_s);
437                         contract_all(90, PRIM_INT__d_s_g_s, PRIM_PTR_INT__d_s_g_s);
438                     }
439                     else
440                     {
441                         contract(60, shelloffsets, PRIM_INT__d_s_f_s, PRIM_PTR_INT__d_s_f_s);
442                         contract(90, shelloffsets, PRIM_INT__d_s_g_s, PRIM_PTR_INT__d_s_g_s);
443                         PRIM_PTR_INT__d_s_f_s += lastoffset*60;
444                         PRIM_PTR_INT__d_s_g_s += lastoffset*90;
445                     }
446 
447                 }  // close loop over j
448             }  // close loop over i
449 
450             //Advance to the next batch
451             jstart = SIMINT_SIMD_ROUND(jend);
452 
453             //////////////////////////////////////////////
454             // Contracted integrals: Horizontal recurrance
455             //////////////////////////////////////////////
456 
457 
458 
459 
460             for(abcd = 0; abcd < nshellbatch; ++abcd, ++real_abcd)
461             {
462                 const double hCD[3] = { Q.AB_x[cd+abcd], Q.AB_y[cd+abcd], Q.AB_z[cd+abcd] };
463 
464                 // set up HRR pointers
465                 double const * restrict HRR_INT__d_s_f_s = INT__d_s_f_s + abcd * 60;
466                 double const * restrict HRR_INT__d_s_g_s = INT__d_s_g_s + abcd * 90;
467                 double * restrict HRR_INT__d_s_f_p = INT__d_s_f_p + real_abcd * 180;
468 
469                 // form INT__d_s_f_p
470                 HRR_L_f_p(
471                     HRR_INT__d_s_f_p,
472                     HRR_INT__d_s_f_s,
473                     HRR_INT__d_s_g_s,
474                     hCD, 6);
475 
476 
477             }  // close HRR loop
478 
479 
480         }   // close loop cdbatch
481 
482         istart = iend;
483     }  // close loop over ab
484 
485     return P.nshell12_clip * Q.nshell12_clip;
486 }
487 
488