1 #include "simint/boys/boys.h"
2 #include "simint/ostei/gen/ostei_generated.h"
3 #include "simint/vectorization/vectorization.h"
4 #include <math.h>
5 #include <string.h>
6 
7 
ostei_f_f_s_s(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__f_f_s_s)8 int ostei_f_f_s_s(struct simint_multi_shellpair const P,
9                   struct simint_multi_shellpair const Q,
10                   double screen_tol,
11                   double * const restrict work,
12                   double * const restrict INT__f_f_s_s)
13 {
14 
15     SIMINT_ASSUME_ALIGN_DBL(work);
16     SIMINT_ASSUME_ALIGN_DBL(INT__f_f_s_s);
17     int ab, cd, abcd;
18     int istart, jstart;
19     int iprimcd, nprim_icd, icd;
20     const int check_screen = (screen_tol > 0.0);
21     int i, j;
22     int n;
23     int not_screened;
24     int real_abcd;
25     int iket;
26 
27     // partition workspace
28     double * const INT__f_s_s_s = work + (SIMINT_NSHELL_SIMD * 0);
29     double * const INT__g_s_s_s = work + (SIMINT_NSHELL_SIMD * 10);
30     double * const INT__h_s_s_s = work + (SIMINT_NSHELL_SIMD * 25);
31     double * const INT__i_s_s_s = work + (SIMINT_NSHELL_SIMD * 46);
32     SIMINT_DBLTYPE * const primwork = (SIMINT_DBLTYPE *)(work + SIMINT_NSHELL_SIMD*74);
33     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_s_s = primwork + 0;
34     SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_s_s = primwork + 7;
35     SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_s_s = primwork + 25;
36     SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_s_s = primwork + 55;
37     SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_s_s = primwork + 95;
38     SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_s_s = primwork + 140;
39     SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_s_s = primwork + 182;
40     double * const hrrwork = (double *)(primwork + 210);
41     double * const HRR_INT__f_p_s_s = hrrwork + 0;
42     double * const HRR_INT__f_d_s_s = hrrwork + 30;
43     double * const HRR_INT__g_p_s_s = hrrwork + 90;
44     double * const HRR_INT__g_d_s_s = hrrwork + 135;
45     double * const HRR_INT__h_p_s_s = hrrwork + 225;
46 
47 
48     // Create constants
49     const SIMINT_DBLTYPE const_1 = SIMINT_DBLSET1(1);
50     const SIMINT_DBLTYPE const_2 = SIMINT_DBLSET1(2);
51     const SIMINT_DBLTYPE const_3 = SIMINT_DBLSET1(3);
52     const SIMINT_DBLTYPE const_4 = SIMINT_DBLSET1(4);
53     const SIMINT_DBLTYPE const_5 = SIMINT_DBLSET1(5);
54     const SIMINT_DBLTYPE one_half = SIMINT_DBLSET1(0.5);
55 
56 
57     ////////////////////////////////////////
58     // Loop over shells and primitives
59     ////////////////////////////////////////
60 
61     real_abcd = 0;
62     istart = 0;
63     for(ab = 0; ab < P.nshell12_clip; ++ab)
64     {
65         const int iend = istart + P.nprim12[ab];
66 
67         cd = 0;
68         jstart = 0;
69 
70         for(cd = 0; cd < Q.nshell12_clip; cd += SIMINT_NSHELL_SIMD)
71         {
72             const int nshellbatch = ((cd + SIMINT_NSHELL_SIMD) > Q.nshell12_clip) ? Q.nshell12_clip - cd : SIMINT_NSHELL_SIMD;
73             int jend = jstart;
74             for(i = 0; i < nshellbatch; i++)
75                 jend += Q.nprim12[cd+i];
76 
77             // Clear the beginning of the workspace (where we are accumulating integrals)
78             memset(work, 0, SIMINT_NSHELL_SIMD * 74 * sizeof(double));
79             abcd = 0;
80 
81 
82             for(i = istart; i < iend; ++i)
83             {
84                 SIMINT_DBLTYPE bra_screen_max;  // only used if check_screen
85 
86                 if(check_screen)
87                 {
88                     // Skip this whole thing if always insignificant
89                     if((P.screen[i] * Q.screen_max) < screen_tol)
90                         continue;
91                     bra_screen_max = SIMINT_DBLSET1(P.screen[i]);
92                 }
93 
94                 icd = 0;
95                 iprimcd = 0;
96                 nprim_icd = Q.nprim12[cd];
97                 double * restrict PRIM_PTR_INT__f_s_s_s = INT__f_s_s_s + abcd * 10;
98                 double * restrict PRIM_PTR_INT__g_s_s_s = INT__g_s_s_s + abcd * 15;
99                 double * restrict PRIM_PTR_INT__h_s_s_s = INT__h_s_s_s + abcd * 21;
100                 double * restrict PRIM_PTR_INT__i_s_s_s = INT__i_s_s_s + abcd * 28;
101 
102 
103 
104                 // Load these one per loop over i
105                 const SIMINT_DBLTYPE P_alpha = SIMINT_DBLSET1(P.alpha[i]);
106                 const SIMINT_DBLTYPE P_prefac = SIMINT_DBLSET1(P.prefac[i]);
107                 const SIMINT_DBLTYPE Pxyz[3] = { SIMINT_DBLSET1(P.x[i]), SIMINT_DBLSET1(P.y[i]), SIMINT_DBLSET1(P.z[i]) };
108 
109                 const SIMINT_DBLTYPE P_PA[3] = { SIMINT_DBLSET1(P.PA_x[i]), SIMINT_DBLSET1(P.PA_y[i]), SIMINT_DBLSET1(P.PA_z[i]) };
110 
111                 for(j = jstart; j < jend; j += SIMINT_SIMD_LEN)
112                 {
113                     // calculate the shell offsets
114                     // these are the offset from the shell pointed to by cd
115                     // for each element
116                     int shelloffsets[SIMINT_SIMD_LEN] = {0};
117                     int lastoffset = 0;
118                     const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j));
119 
120                     if((iprimcd + SIMINT_SIMD_LEN) >= nprim_icd)
121                     {
122                         // Handle if the first element of the vector is a new shell
123                         if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
124                         {
125                             nprim_icd += Q.nprim12[cd + (++icd)];
126                             PRIM_PTR_INT__f_s_s_s += 10;
127                             PRIM_PTR_INT__g_s_s_s += 15;
128                             PRIM_PTR_INT__h_s_s_s += 21;
129                             PRIM_PTR_INT__i_s_s_s += 28;
130                         }
131                         iprimcd++;
132                         for(n = 1; n < SIMINT_SIMD_LEN; ++n)
133                         {
134                             if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
135                             {
136                                 shelloffsets[n] = shelloffsets[n-1] + 1;
137                                 lastoffset++;
138                                 nprim_icd += Q.nprim12[cd + (++icd)];
139                             }
140                             else
141                                 shelloffsets[n] = shelloffsets[n-1];
142                             iprimcd++;
143                         }
144                     }
145                     else
146                         iprimcd += SIMINT_SIMD_LEN;
147 
148                     // Do we have to compute this vector (or has it been screened out)?
149                     // (not_screened != 0 means we have to do this vector)
150                     if(check_screen)
151                     {
152                         const double vmax = vector_max(SIMINT_MUL(bra_screen_max, SIMINT_DBLLOAD(Q.screen, j)));
153                         if(vmax < screen_tol)
154                         {
155                             PRIM_PTR_INT__f_s_s_s += lastoffset*10;
156                             PRIM_PTR_INT__g_s_s_s += lastoffset*15;
157                             PRIM_PTR_INT__h_s_s_s += lastoffset*21;
158                             PRIM_PTR_INT__i_s_s_s += lastoffset*28;
159                             continue;
160                         }
161                     }
162 
163                     const SIMINT_DBLTYPE Q_alpha = SIMINT_DBLLOAD(Q.alpha, j);
164                     const SIMINT_DBLTYPE PQalpha_mul = SIMINT_MUL(P_alpha, Q_alpha);
165                     const SIMINT_DBLTYPE PQalpha_sum = SIMINT_ADD(P_alpha, Q_alpha);
166                     const SIMINT_DBLTYPE one_over_PQalpha_sum = SIMINT_DIV(const_1, PQalpha_sum);
167 
168 
169                     /* construct R2 = (Px - Qx)**2 + (Py - Qy)**2 + (Pz -Qz)**2 */
170                     SIMINT_DBLTYPE PQ[3];
171                     PQ[0] = SIMINT_SUB(Pxyz[0], SIMINT_DBLLOAD(Q.x, j));
172                     PQ[1] = SIMINT_SUB(Pxyz[1], SIMINT_DBLLOAD(Q.y, j));
173                     PQ[2] = SIMINT_SUB(Pxyz[2], SIMINT_DBLLOAD(Q.z, j));
174                     SIMINT_DBLTYPE R2 = SIMINT_MUL(PQ[0], PQ[0]);
175                     R2 = SIMINT_FMADD(PQ[1], PQ[1], R2);
176                     R2 = SIMINT_FMADD(PQ[2], PQ[2], R2);
177 
178                     const SIMINT_DBLTYPE alpha = SIMINT_MUL(PQalpha_mul, one_over_PQalpha_sum); // alpha from MEST
179                     const SIMINT_DBLTYPE one_over_p = SIMINT_DIV(const_1, P_alpha);
180                     const SIMINT_DBLTYPE one_over_q = SIMINT_DIV(const_1, Q_alpha);
181                     const SIMINT_DBLTYPE one_over_2p = SIMINT_MUL(one_half, one_over_p);
182                     const SIMINT_DBLTYPE one_over_2q = SIMINT_MUL(one_half, one_over_q);
183                     const SIMINT_DBLTYPE one_over_2pq = SIMINT_MUL(one_half, one_over_PQalpha_sum);
184 
185                     // NOTE: Minus sign!
186                     const SIMINT_DBLTYPE a_over_p = SIMINT_MUL(SIMINT_NEG(alpha), one_over_p);
187                     SIMINT_DBLTYPE aop_PQ[3];
188                     aop_PQ[0] = SIMINT_MUL(a_over_p, PQ[0]);
189                     aop_PQ[1] = SIMINT_MUL(a_over_p, PQ[1]);
190                     aop_PQ[2] = SIMINT_MUL(a_over_p, PQ[2]);
191 
192 
193                     //////////////////////////////////////////////
194                     // Fjt function section
195                     // Maximum v value: 6
196                     //////////////////////////////////////////////
197                     // The parameter to the Fjt function
198                     const SIMINT_DBLTYPE F_x = SIMINT_MUL(R2, alpha);
199 
200 
201                     const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j);
202 
203 
204                     boys_F_split(PRIM_INT__s_s_s_s, F_x, 6);
205                     SIMINT_DBLTYPE prefac = SIMINT_SQRT(one_over_PQalpha_sum);
206                     prefac = SIMINT_MUL(SIMINT_MUL(P_prefac, Q_prefac), prefac);
207                     for(n = 0; n <= 6; n++)
208                         PRIM_INT__s_s_s_s[n] = SIMINT_MUL(PRIM_INT__s_s_s_s[n], prefac);
209 
210                     //////////////////////////////////////////////
211                     // Primitive integrals: Vertical recurrance
212                     //////////////////////////////////////////////
213 
214                     const SIMINT_DBLTYPE vrr_const_1_over_2p = one_over_2p;
215                     const SIMINT_DBLTYPE vrr_const_2_over_2p = SIMINT_MUL(const_2, one_over_2p);
216                     const SIMINT_DBLTYPE vrr_const_3_over_2p = SIMINT_MUL(const_3, one_over_2p);
217                     const SIMINT_DBLTYPE vrr_const_4_over_2p = SIMINT_MUL(const_4, one_over_2p);
218                     const SIMINT_DBLTYPE vrr_const_5_over_2p = SIMINT_MUL(const_5, one_over_2p);
219 
220 
221 
222                     // Forming PRIM_INT__p_s_s_s[6 * 3];
223                     for(n = 0; n < 6; ++n)  // loop over orders of auxiliary function
224                     {
225 
226                         PRIM_INT__p_s_s_s[n * 3 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_s_s[n * 1 + 0]);
227                         PRIM_INT__p_s_s_s[n * 3 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]);
228 
229                         PRIM_INT__p_s_s_s[n * 3 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_s_s[n * 1 + 0]);
230                         PRIM_INT__p_s_s_s[n * 3 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 1]);
231 
232                         PRIM_INT__p_s_s_s[n * 3 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_s_s[n * 1 + 0]);
233                         PRIM_INT__p_s_s_s[n * 3 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 2]);
234 
235                     }
236 
237 
238 
239                     // Forming PRIM_INT__d_s_s_s[5 * 6];
240                     for(n = 0; n < 5; ++n)  // loop over orders of auxiliary function
241                     {
242 
243                         PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__p_s_s_s[n * 3 + 0]);
244                         PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 0]);
245                         PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 0]);
246 
247                         PRIM_INT__d_s_s_s[n * 6 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__p_s_s_s[n * 3 + 0]);
248                         PRIM_INT__d_s_s_s[n * 6 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 1]);
249 
250                         PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_MUL(P_PA[1], PRIM_INT__p_s_s_s[n * 3 + 1]);
251                         PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_s[n * 6 + 3]);
252                         PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 3]);
253 
254                         PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_MUL(P_PA[2], PRIM_INT__p_s_s_s[n * 3 + 2]);
255                         PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__d_s_s_s[n * 6 + 5]);
256                         PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 5]);
257 
258                     }
259 
260 
261 
262                     // Forming PRIM_INT__f_s_s_s[4 * 10];
263                     for(n = 0; n < 4; ++n)  // loop over orders of auxiliary function
264                     {
265 
266                         PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 0]);
267                         PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 0]);
268                         PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]), PRIM_INT__f_s_s_s[n * 10 + 0]);
269 
270                         PRIM_INT__f_s_s_s[n * 10 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 0]);
271                         PRIM_INT__f_s_s_s[n * 10 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 1]);
272 
273                         PRIM_INT__f_s_s_s[n * 10 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 0]);
274                         PRIM_INT__f_s_s_s[n * 10 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 2]);
275 
276                         PRIM_INT__f_s_s_s[n * 10 + 3] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 3]);
277                         PRIM_INT__f_s_s_s[n * 10 + 3] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 3]);
278 
279                         PRIM_INT__f_s_s_s[n * 10 + 4] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 1]);
280                         PRIM_INT__f_s_s_s[n * 10 + 4] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 1], PRIM_INT__f_s_s_s[n * 10 + 4]);
281 
282                         PRIM_INT__f_s_s_s[n * 10 + 5] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 5]);
283                         PRIM_INT__f_s_s_s[n * 10 + 5] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 5]);
284 
285                         PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 3]);
286                         PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 6]);
287                         PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_s_s[n * 3 + 1]), PRIM_INT__f_s_s_s[n * 10 + 6]);
288 
289                         PRIM_INT__f_s_s_s[n * 10 + 7] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 3]);
290                         PRIM_INT__f_s_s_s[n * 10 + 7] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 7]);
291 
292                         PRIM_INT__f_s_s_s[n * 10 + 8] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 5]);
293                         PRIM_INT__f_s_s_s[n * 10 + 8] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 8]);
294 
295                         PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 5]);
296                         PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 9]);
297                         PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_s_s[n * 3 + 2]), PRIM_INT__f_s_s_s[n * 10 + 9]);
298 
299                     }
300 
301 
302                     VRR_I_g_s_s_s(
303                             PRIM_INT__g_s_s_s,
304                             PRIM_INT__f_s_s_s,
305                             PRIM_INT__d_s_s_s,
306                             P_PA,
307                             a_over_p,
308                             aop_PQ,
309                             one_over_2p,
310                             3);
311 
312 
313                     VRR_I_h_s_s_s(
314                             PRIM_INT__h_s_s_s,
315                             PRIM_INT__g_s_s_s,
316                             PRIM_INT__f_s_s_s,
317                             P_PA,
318                             a_over_p,
319                             aop_PQ,
320                             one_over_2p,
321                             2);
322 
323 
324                     ostei_general_vrr1_I(6, 1,
325                             one_over_2p, a_over_p, aop_PQ, P_PA,
326                             PRIM_INT__h_s_s_s, PRIM_INT__g_s_s_s, PRIM_INT__i_s_s_s);
327 
328 
329 
330 
331                     ////////////////////////////////////
332                     // Accumulate contracted integrals
333                     ////////////////////////////////////
334                     if(lastoffset == 0)
335                     {
336                         contract_all(10, PRIM_INT__f_s_s_s, PRIM_PTR_INT__f_s_s_s);
337                         contract_all(15, PRIM_INT__g_s_s_s, PRIM_PTR_INT__g_s_s_s);
338                         contract_all(21, PRIM_INT__h_s_s_s, PRIM_PTR_INT__h_s_s_s);
339                         contract_all(28, PRIM_INT__i_s_s_s, PRIM_PTR_INT__i_s_s_s);
340                     }
341                     else
342                     {
343                         contract(10, shelloffsets, PRIM_INT__f_s_s_s, PRIM_PTR_INT__f_s_s_s);
344                         contract(15, shelloffsets, PRIM_INT__g_s_s_s, PRIM_PTR_INT__g_s_s_s);
345                         contract(21, shelloffsets, PRIM_INT__h_s_s_s, PRIM_PTR_INT__h_s_s_s);
346                         contract(28, shelloffsets, PRIM_INT__i_s_s_s, PRIM_PTR_INT__i_s_s_s);
347                         PRIM_PTR_INT__f_s_s_s += lastoffset*10;
348                         PRIM_PTR_INT__g_s_s_s += lastoffset*15;
349                         PRIM_PTR_INT__h_s_s_s += lastoffset*21;
350                         PRIM_PTR_INT__i_s_s_s += lastoffset*28;
351                     }
352 
353                 }  // close loop over j
354             }  // close loop over i
355 
356             //Advance to the next batch
357             jstart = SIMINT_SIMD_ROUND(jend);
358 
359             //////////////////////////////////////////////
360             // Contracted integrals: Horizontal recurrance
361             //////////////////////////////////////////////
362 
363 
364             const double hAB[3] = { P.AB_x[ab], P.AB_y[ab], P.AB_z[ab] };
365 
366 
367             for(abcd = 0; abcd < nshellbatch; ++abcd, ++real_abcd)
368             {
369 
370                 // set up HRR pointers
371                 double const * restrict HRR_INT__f_s_s_s = INT__f_s_s_s + abcd * 10;
372                 double const * restrict HRR_INT__g_s_s_s = INT__g_s_s_s + abcd * 15;
373                 double const * restrict HRR_INT__h_s_s_s = INT__h_s_s_s + abcd * 21;
374                 double const * restrict HRR_INT__i_s_s_s = INT__i_s_s_s + abcd * 28;
375                 double * restrict HRR_INT__f_f_s_s = INT__f_f_s_s + real_abcd * 100;
376 
377                 // form INT__f_p_s_s
378                 HRR_J_f_p(
379                     HRR_INT__f_p_s_s,
380                     HRR_INT__f_s_s_s,
381                     HRR_INT__g_s_s_s,
382                     hAB, 1);
383 
384                 // form INT__g_p_s_s
385                 HRR_J_g_p(
386                     HRR_INT__g_p_s_s,
387                     HRR_INT__g_s_s_s,
388                     HRR_INT__h_s_s_s,
389                     hAB, 1);
390 
391                 // form INT__h_p_s_s
392                 ostei_general_hrr_J(5, 1, 0, 0, hAB, HRR_INT__i_s_s_s, HRR_INT__h_s_s_s, HRR_INT__h_p_s_s);
393 
394                 // form INT__f_d_s_s
395                 HRR_J_f_d(
396                     HRR_INT__f_d_s_s,
397                     HRR_INT__f_p_s_s,
398                     HRR_INT__g_p_s_s,
399                     hAB, 1);
400 
401                 // form INT__g_d_s_s
402                 ostei_general_hrr_J(4, 2, 0, 0, hAB, HRR_INT__h_p_s_s, HRR_INT__g_p_s_s, HRR_INT__g_d_s_s);
403 
404                 // form INT__f_f_s_s
405                 ostei_general_hrr_J(3, 3, 0, 0, hAB, HRR_INT__g_d_s_s, HRR_INT__f_d_s_s, HRR_INT__f_f_s_s);
406 
407 
408             }  // close HRR loop
409 
410 
411         }   // close loop cdbatch
412 
413         istart = iend;
414     }  // close loop over ab
415 
416     return P.nshell12_clip * Q.nshell12_clip;
417 }
418 
419