1 #include "simint/boys/boys.h"
2 #include "simint/ostei/gen/ostei_generated.h"
3 #include "simint/vectorization/vectorization.h"
4 #include <math.h>
5 #include <string.h>
6 
7 
ostei_k_k_s_s(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__k_k_s_s)8 int ostei_k_k_s_s(struct simint_multi_shellpair const P,
9                   struct simint_multi_shellpair const Q,
10                   double screen_tol,
11                   double * const restrict work,
12                   double * const restrict INT__k_k_s_s)
13 {
14 
15     SIMINT_ASSUME_ALIGN_DBL(work);
16     SIMINT_ASSUME_ALIGN_DBL(INT__k_k_s_s);
17     int ab, cd, abcd;
18     int istart, jstart;
19     int iprimcd, nprim_icd, icd;
20     const int check_screen = (screen_tol > 0.0);
21     int i, j;
22     int n;
23     int not_screened;
24     int real_abcd;
25     int iket;
26 
27     // partition workspace
28     double * const INT__k_s_s_s = work + (SIMINT_NSHELL_SIMD * 0);
29     double * const INT__l_s_s_s = work + (SIMINT_NSHELL_SIMD * 36);
30     double * const INT__m_s_s_s = work + (SIMINT_NSHELL_SIMD * 81);
31     double * const INT__n_s_s_s = work + (SIMINT_NSHELL_SIMD * 136);
32     double * const INT__o_s_s_s = work + (SIMINT_NSHELL_SIMD * 202);
33     double * const INT__q_s_s_s = work + (SIMINT_NSHELL_SIMD * 280);
34     double * const INT__r_s_s_s = work + (SIMINT_NSHELL_SIMD * 371);
35     double * const INT__t_s_s_s = work + (SIMINT_NSHELL_SIMD * 476);
36     SIMINT_DBLTYPE * const primwork = (SIMINT_DBLTYPE *)(work + SIMINT_NSHELL_SIMD*596);
37     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_s_s = primwork + 0;
38     SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_s_s = primwork + 15;
39     SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_s_s = primwork + 57;
40     SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_s_s = primwork + 135;
41     SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_s_s = primwork + 255;
42     SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_s_s = primwork + 420;
43     SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_s_s = primwork + 630;
44     SIMINT_DBLTYPE * const restrict PRIM_INT__k_s_s_s = primwork + 882;
45     SIMINT_DBLTYPE * const restrict PRIM_INT__l_s_s_s = primwork + 1170;
46     SIMINT_DBLTYPE * const restrict PRIM_INT__m_s_s_s = primwork + 1485;
47     SIMINT_DBLTYPE * const restrict PRIM_INT__n_s_s_s = primwork + 1815;
48     SIMINT_DBLTYPE * const restrict PRIM_INT__o_s_s_s = primwork + 2145;
49     SIMINT_DBLTYPE * const restrict PRIM_INT__q_s_s_s = primwork + 2457;
50     SIMINT_DBLTYPE * const restrict PRIM_INT__r_s_s_s = primwork + 2730;
51     SIMINT_DBLTYPE * const restrict PRIM_INT__t_s_s_s = primwork + 2940;
52     double * const hrrwork = (double *)(primwork + 3060);
53     double * const HRR_INT__k_p_s_s = hrrwork + 0;
54     double * const HRR_INT__k_d_s_s = hrrwork + 108;
55     double * const HRR_INT__k_f_s_s = hrrwork + 324;
56     double * const HRR_INT__k_g_s_s = hrrwork + 684;
57     double * const HRR_INT__k_h_s_s = hrrwork + 1224;
58     double * const HRR_INT__k_i_s_s = hrrwork + 1980;
59     double * const HRR_INT__l_p_s_s = hrrwork + 2988;
60     double * const HRR_INT__l_d_s_s = hrrwork + 3123;
61     double * const HRR_INT__l_f_s_s = hrrwork + 3393;
62     double * const HRR_INT__l_g_s_s = hrrwork + 3843;
63     double * const HRR_INT__l_h_s_s = hrrwork + 4518;
64     double * const HRR_INT__l_i_s_s = hrrwork + 5463;
65     double * const HRR_INT__m_p_s_s = hrrwork + 6723;
66     double * const HRR_INT__m_d_s_s = hrrwork + 6888;
67     double * const HRR_INT__m_f_s_s = hrrwork + 7218;
68     double * const HRR_INT__m_g_s_s = hrrwork + 7768;
69     double * const HRR_INT__m_h_s_s = hrrwork + 8593;
70     double * const HRR_INT__n_p_s_s = hrrwork + 9748;
71     double * const HRR_INT__n_d_s_s = hrrwork + 9946;
72     double * const HRR_INT__n_f_s_s = hrrwork + 10342;
73     double * const HRR_INT__n_g_s_s = hrrwork + 11002;
74     double * const HRR_INT__o_p_s_s = hrrwork + 11992;
75     double * const HRR_INT__o_d_s_s = hrrwork + 12226;
76     double * const HRR_INT__o_f_s_s = hrrwork + 12694;
77     double * const HRR_INT__q_p_s_s = hrrwork + 13474;
78     double * const HRR_INT__q_d_s_s = hrrwork + 13747;
79     double * const HRR_INT__r_p_s_s = hrrwork + 14293;
80 
81 
82     // Create constants
83     const SIMINT_DBLTYPE const_1 = SIMINT_DBLSET1(1);
84     const SIMINT_DBLTYPE const_10 = SIMINT_DBLSET1(10);
85     const SIMINT_DBLTYPE const_11 = SIMINT_DBLSET1(11);
86     const SIMINT_DBLTYPE const_12 = SIMINT_DBLSET1(12);
87     const SIMINT_DBLTYPE const_13 = SIMINT_DBLSET1(13);
88     const SIMINT_DBLTYPE const_2 = SIMINT_DBLSET1(2);
89     const SIMINT_DBLTYPE const_3 = SIMINT_DBLSET1(3);
90     const SIMINT_DBLTYPE const_4 = SIMINT_DBLSET1(4);
91     const SIMINT_DBLTYPE const_5 = SIMINT_DBLSET1(5);
92     const SIMINT_DBLTYPE const_6 = SIMINT_DBLSET1(6);
93     const SIMINT_DBLTYPE const_7 = SIMINT_DBLSET1(7);
94     const SIMINT_DBLTYPE const_8 = SIMINT_DBLSET1(8);
95     const SIMINT_DBLTYPE const_9 = SIMINT_DBLSET1(9);
96     const SIMINT_DBLTYPE one_half = SIMINT_DBLSET1(0.5);
97 
98 
99     ////////////////////////////////////////
100     // Loop over shells and primitives
101     ////////////////////////////////////////
102 
103     real_abcd = 0;
104     istart = 0;
105     for(ab = 0; ab < P.nshell12_clip; ++ab)
106     {
107         const int iend = istart + P.nprim12[ab];
108 
109         cd = 0;
110         jstart = 0;
111 
112         for(cd = 0; cd < Q.nshell12_clip; cd += SIMINT_NSHELL_SIMD)
113         {
114             const int nshellbatch = ((cd + SIMINT_NSHELL_SIMD) > Q.nshell12_clip) ? Q.nshell12_clip - cd : SIMINT_NSHELL_SIMD;
115             int jend = jstart;
116             for(i = 0; i < nshellbatch; i++)
117                 jend += Q.nprim12[cd+i];
118 
119             // Clear the beginning of the workspace (where we are accumulating integrals)
120             memset(work, 0, SIMINT_NSHELL_SIMD * 596 * sizeof(double));
121             abcd = 0;
122 
123 
124             for(i = istart; i < iend; ++i)
125             {
126                 SIMINT_DBLTYPE bra_screen_max;  // only used if check_screen
127 
128                 if(check_screen)
129                 {
130                     // Skip this whole thing if always insignificant
131                     if((P.screen[i] * Q.screen_max) < screen_tol)
132                         continue;
133                     bra_screen_max = SIMINT_DBLSET1(P.screen[i]);
134                 }
135 
136                 icd = 0;
137                 iprimcd = 0;
138                 nprim_icd = Q.nprim12[cd];
139                 double * restrict PRIM_PTR_INT__k_s_s_s = INT__k_s_s_s + abcd * 36;
140                 double * restrict PRIM_PTR_INT__l_s_s_s = INT__l_s_s_s + abcd * 45;
141                 double * restrict PRIM_PTR_INT__m_s_s_s = INT__m_s_s_s + abcd * 55;
142                 double * restrict PRIM_PTR_INT__n_s_s_s = INT__n_s_s_s + abcd * 66;
143                 double * restrict PRIM_PTR_INT__o_s_s_s = INT__o_s_s_s + abcd * 78;
144                 double * restrict PRIM_PTR_INT__q_s_s_s = INT__q_s_s_s + abcd * 91;
145                 double * restrict PRIM_PTR_INT__r_s_s_s = INT__r_s_s_s + abcd * 105;
146                 double * restrict PRIM_PTR_INT__t_s_s_s = INT__t_s_s_s + abcd * 120;
147 
148 
149 
150                 // Load these one per loop over i
151                 const SIMINT_DBLTYPE P_alpha = SIMINT_DBLSET1(P.alpha[i]);
152                 const SIMINT_DBLTYPE P_prefac = SIMINT_DBLSET1(P.prefac[i]);
153                 const SIMINT_DBLTYPE Pxyz[3] = { SIMINT_DBLSET1(P.x[i]), SIMINT_DBLSET1(P.y[i]), SIMINT_DBLSET1(P.z[i]) };
154 
155                 const SIMINT_DBLTYPE P_PA[3] = { SIMINT_DBLSET1(P.PA_x[i]), SIMINT_DBLSET1(P.PA_y[i]), SIMINT_DBLSET1(P.PA_z[i]) };
156 
157                 for(j = jstart; j < jend; j += SIMINT_SIMD_LEN)
158                 {
159                     // calculate the shell offsets
160                     // these are the offset from the shell pointed to by cd
161                     // for each element
162                     int shelloffsets[SIMINT_SIMD_LEN] = {0};
163                     int lastoffset = 0;
164                     const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j));
165 
166                     if((iprimcd + SIMINT_SIMD_LEN) >= nprim_icd)
167                     {
168                         // Handle if the first element of the vector is a new shell
169                         if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
170                         {
171                             nprim_icd += Q.nprim12[cd + (++icd)];
172                             PRIM_PTR_INT__k_s_s_s += 36;
173                             PRIM_PTR_INT__l_s_s_s += 45;
174                             PRIM_PTR_INT__m_s_s_s += 55;
175                             PRIM_PTR_INT__n_s_s_s += 66;
176                             PRIM_PTR_INT__o_s_s_s += 78;
177                             PRIM_PTR_INT__q_s_s_s += 91;
178                             PRIM_PTR_INT__r_s_s_s += 105;
179                             PRIM_PTR_INT__t_s_s_s += 120;
180                         }
181                         iprimcd++;
182                         for(n = 1; n < SIMINT_SIMD_LEN; ++n)
183                         {
184                             if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
185                             {
186                                 shelloffsets[n] = shelloffsets[n-1] + 1;
187                                 lastoffset++;
188                                 nprim_icd += Q.nprim12[cd + (++icd)];
189                             }
190                             else
191                                 shelloffsets[n] = shelloffsets[n-1];
192                             iprimcd++;
193                         }
194                     }
195                     else
196                         iprimcd += SIMINT_SIMD_LEN;
197 
198                     // Do we have to compute this vector (or has it been screened out)?
199                     // (not_screened != 0 means we have to do this vector)
200                     if(check_screen)
201                     {
202                         const double vmax = vector_max(SIMINT_MUL(bra_screen_max, SIMINT_DBLLOAD(Q.screen, j)));
203                         if(vmax < screen_tol)
204                         {
205                             PRIM_PTR_INT__k_s_s_s += lastoffset*36;
206                             PRIM_PTR_INT__l_s_s_s += lastoffset*45;
207                             PRIM_PTR_INT__m_s_s_s += lastoffset*55;
208                             PRIM_PTR_INT__n_s_s_s += lastoffset*66;
209                             PRIM_PTR_INT__o_s_s_s += lastoffset*78;
210                             PRIM_PTR_INT__q_s_s_s += lastoffset*91;
211                             PRIM_PTR_INT__r_s_s_s += lastoffset*105;
212                             PRIM_PTR_INT__t_s_s_s += lastoffset*120;
213                             continue;
214                         }
215                     }
216 
217                     const SIMINT_DBLTYPE Q_alpha = SIMINT_DBLLOAD(Q.alpha, j);
218                     const SIMINT_DBLTYPE PQalpha_mul = SIMINT_MUL(P_alpha, Q_alpha);
219                     const SIMINT_DBLTYPE PQalpha_sum = SIMINT_ADD(P_alpha, Q_alpha);
220                     const SIMINT_DBLTYPE one_over_PQalpha_sum = SIMINT_DIV(const_1, PQalpha_sum);
221 
222 
223                     /* construct R2 = (Px - Qx)**2 + (Py - Qy)**2 + (Pz -Qz)**2 */
224                     SIMINT_DBLTYPE PQ[3];
225                     PQ[0] = SIMINT_SUB(Pxyz[0], SIMINT_DBLLOAD(Q.x, j));
226                     PQ[1] = SIMINT_SUB(Pxyz[1], SIMINT_DBLLOAD(Q.y, j));
227                     PQ[2] = SIMINT_SUB(Pxyz[2], SIMINT_DBLLOAD(Q.z, j));
228                     SIMINT_DBLTYPE R2 = SIMINT_MUL(PQ[0], PQ[0]);
229                     R2 = SIMINT_FMADD(PQ[1], PQ[1], R2);
230                     R2 = SIMINT_FMADD(PQ[2], PQ[2], R2);
231 
232                     const SIMINT_DBLTYPE alpha = SIMINT_MUL(PQalpha_mul, one_over_PQalpha_sum); // alpha from MEST
233                     const SIMINT_DBLTYPE one_over_p = SIMINT_DIV(const_1, P_alpha);
234                     const SIMINT_DBLTYPE one_over_q = SIMINT_DIV(const_1, Q_alpha);
235                     const SIMINT_DBLTYPE one_over_2p = SIMINT_MUL(one_half, one_over_p);
236                     const SIMINT_DBLTYPE one_over_2q = SIMINT_MUL(one_half, one_over_q);
237                     const SIMINT_DBLTYPE one_over_2pq = SIMINT_MUL(one_half, one_over_PQalpha_sum);
238 
239                     // NOTE: Minus sign!
240                     const SIMINT_DBLTYPE a_over_p = SIMINT_MUL(SIMINT_NEG(alpha), one_over_p);
241                     SIMINT_DBLTYPE aop_PQ[3];
242                     aop_PQ[0] = SIMINT_MUL(a_over_p, PQ[0]);
243                     aop_PQ[1] = SIMINT_MUL(a_over_p, PQ[1]);
244                     aop_PQ[2] = SIMINT_MUL(a_over_p, PQ[2]);
245 
246 
247                     //////////////////////////////////////////////
248                     // Fjt function section
249                     // Maximum v value: 14
250                     //////////////////////////////////////////////
251                     // The parameter to the Fjt function
252                     const SIMINT_DBLTYPE F_x = SIMINT_MUL(R2, alpha);
253 
254 
255                     const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j);
256 
257 
258                     boys_F_split(PRIM_INT__s_s_s_s, F_x, 14);
259                     SIMINT_DBLTYPE prefac = SIMINT_SQRT(one_over_PQalpha_sum);
260                     prefac = SIMINT_MUL(SIMINT_MUL(P_prefac, Q_prefac), prefac);
261                     for(n = 0; n <= 14; n++)
262                         PRIM_INT__s_s_s_s[n] = SIMINT_MUL(PRIM_INT__s_s_s_s[n], prefac);
263 
264                     //////////////////////////////////////////////
265                     // Primitive integrals: Vertical recurrance
266                     //////////////////////////////////////////////
267 
268                     const SIMINT_DBLTYPE vrr_const_1_over_2p = one_over_2p;
269                     const SIMINT_DBLTYPE vrr_const_2_over_2p = SIMINT_MUL(const_2, one_over_2p);
270                     const SIMINT_DBLTYPE vrr_const_3_over_2p = SIMINT_MUL(const_3, one_over_2p);
271                     const SIMINT_DBLTYPE vrr_const_4_over_2p = SIMINT_MUL(const_4, one_over_2p);
272                     const SIMINT_DBLTYPE vrr_const_5_over_2p = SIMINT_MUL(const_5, one_over_2p);
273                     const SIMINT_DBLTYPE vrr_const_6_over_2p = SIMINT_MUL(const_6, one_over_2p);
274                     const SIMINT_DBLTYPE vrr_const_7_over_2p = SIMINT_MUL(const_7, one_over_2p);
275                     const SIMINT_DBLTYPE vrr_const_8_over_2p = SIMINT_MUL(const_8, one_over_2p);
276                     const SIMINT_DBLTYPE vrr_const_9_over_2p = SIMINT_MUL(const_9, one_over_2p);
277                     const SIMINT_DBLTYPE vrr_const_10_over_2p = SIMINT_MUL(const_10, one_over_2p);
278                     const SIMINT_DBLTYPE vrr_const_11_over_2p = SIMINT_MUL(const_11, one_over_2p);
279                     const SIMINT_DBLTYPE vrr_const_12_over_2p = SIMINT_MUL(const_12, one_over_2p);
280                     const SIMINT_DBLTYPE vrr_const_13_over_2p = SIMINT_MUL(const_13, one_over_2p);
281 
282 
283 
284                     // Forming PRIM_INT__p_s_s_s[14 * 3];
285                     for(n = 0; n < 14; ++n)  // loop over orders of auxiliary function
286                     {
287 
288                         PRIM_INT__p_s_s_s[n * 3 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_s_s[n * 1 + 0]);
289                         PRIM_INT__p_s_s_s[n * 3 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]);
290 
291                         PRIM_INT__p_s_s_s[n * 3 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_s_s[n * 1 + 0]);
292                         PRIM_INT__p_s_s_s[n * 3 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 1]);
293 
294                         PRIM_INT__p_s_s_s[n * 3 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_s_s[n * 1 + 0]);
295                         PRIM_INT__p_s_s_s[n * 3 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 2]);
296 
297                     }
298 
299 
300 
301                     // Forming PRIM_INT__d_s_s_s[13 * 6];
302                     for(n = 0; n < 13; ++n)  // loop over orders of auxiliary function
303                     {
304 
305                         PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__p_s_s_s[n * 3 + 0]);
306                         PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 0]);
307                         PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 0]);
308 
309                         PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_MUL(P_PA[1], PRIM_INT__p_s_s_s[n * 3 + 1]);
310                         PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_s[n * 6 + 3]);
311                         PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 3]);
312 
313                         PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_MUL(P_PA[2], PRIM_INT__p_s_s_s[n * 3 + 2]);
314                         PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__d_s_s_s[n * 6 + 5]);
315                         PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 5]);
316 
317                     }
318 
319 
320 
321                     // Forming PRIM_INT__f_s_s_s[12 * 10];
322                     for(n = 0; n < 12; ++n)  // loop over orders of auxiliary function
323                     {
324 
325                         PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 0]);
326                         PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 0]);
327                         PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]), PRIM_INT__f_s_s_s[n * 10 + 0]);
328 
329                         PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 3]);
330                         PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 6]);
331                         PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_s_s[n * 3 + 1]), PRIM_INT__f_s_s_s[n * 10 + 6]);
332 
333                         PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 5]);
334                         PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 9]);
335                         PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_s_s[n * 3 + 2]), PRIM_INT__f_s_s_s[n * 10 + 9]);
336 
337                     }
338 
339 
340                     VRR_I_g_s_s_s(
341                             PRIM_INT__g_s_s_s,
342                             PRIM_INT__f_s_s_s,
343                             PRIM_INT__d_s_s_s,
344                             P_PA,
345                             a_over_p,
346                             aop_PQ,
347                             one_over_2p,
348                             11);
349 
350 
351                     VRR_I_h_s_s_s(
352                             PRIM_INT__h_s_s_s,
353                             PRIM_INT__g_s_s_s,
354                             PRIM_INT__f_s_s_s,
355                             P_PA,
356                             a_over_p,
357                             aop_PQ,
358                             one_over_2p,
359                             10);
360 
361 
362                     ostei_general_vrr1_I(6, 9,
363                             one_over_2p, a_over_p, aop_PQ, P_PA,
364                             PRIM_INT__h_s_s_s, PRIM_INT__g_s_s_s, PRIM_INT__i_s_s_s);
365 
366 
367                     ostei_general_vrr1_I(7, 8,
368                             one_over_2p, a_over_p, aop_PQ, P_PA,
369                             PRIM_INT__i_s_s_s, PRIM_INT__h_s_s_s, PRIM_INT__k_s_s_s);
370 
371 
372                     ostei_general_vrr1_I(8, 7,
373                             one_over_2p, a_over_p, aop_PQ, P_PA,
374                             PRIM_INT__k_s_s_s, PRIM_INT__i_s_s_s, PRIM_INT__l_s_s_s);
375 
376 
377                     ostei_general_vrr1_I(9, 6,
378                             one_over_2p, a_over_p, aop_PQ, P_PA,
379                             PRIM_INT__l_s_s_s, PRIM_INT__k_s_s_s, PRIM_INT__m_s_s_s);
380 
381 
382                     ostei_general_vrr1_I(10, 5,
383                             one_over_2p, a_over_p, aop_PQ, P_PA,
384                             PRIM_INT__m_s_s_s, PRIM_INT__l_s_s_s, PRIM_INT__n_s_s_s);
385 
386 
387                     ostei_general_vrr1_I(11, 4,
388                             one_over_2p, a_over_p, aop_PQ, P_PA,
389                             PRIM_INT__n_s_s_s, PRIM_INT__m_s_s_s, PRIM_INT__o_s_s_s);
390 
391 
392                     ostei_general_vrr1_I(12, 3,
393                             one_over_2p, a_over_p, aop_PQ, P_PA,
394                             PRIM_INT__o_s_s_s, PRIM_INT__n_s_s_s, PRIM_INT__q_s_s_s);
395 
396 
397                     ostei_general_vrr1_I(13, 2,
398                             one_over_2p, a_over_p, aop_PQ, P_PA,
399                             PRIM_INT__q_s_s_s, PRIM_INT__o_s_s_s, PRIM_INT__r_s_s_s);
400 
401 
402                     ostei_general_vrr1_I(14, 1,
403                             one_over_2p, a_over_p, aop_PQ, P_PA,
404                             PRIM_INT__r_s_s_s, PRIM_INT__q_s_s_s, PRIM_INT__t_s_s_s);
405 
406 
407 
408 
409                     ////////////////////////////////////
410                     // Accumulate contracted integrals
411                     ////////////////////////////////////
412                     if(lastoffset == 0)
413                     {
414                         contract_all(36, PRIM_INT__k_s_s_s, PRIM_PTR_INT__k_s_s_s);
415                         contract_all(45, PRIM_INT__l_s_s_s, PRIM_PTR_INT__l_s_s_s);
416                         contract_all(55, PRIM_INT__m_s_s_s, PRIM_PTR_INT__m_s_s_s);
417                         contract_all(66, PRIM_INT__n_s_s_s, PRIM_PTR_INT__n_s_s_s);
418                         contract_all(78, PRIM_INT__o_s_s_s, PRIM_PTR_INT__o_s_s_s);
419                         contract_all(91, PRIM_INT__q_s_s_s, PRIM_PTR_INT__q_s_s_s);
420                         contract_all(105, PRIM_INT__r_s_s_s, PRIM_PTR_INT__r_s_s_s);
421                         contract_all(120, PRIM_INT__t_s_s_s, PRIM_PTR_INT__t_s_s_s);
422                     }
423                     else
424                     {
425                         contract(36, shelloffsets, PRIM_INT__k_s_s_s, PRIM_PTR_INT__k_s_s_s);
426                         contract(45, shelloffsets, PRIM_INT__l_s_s_s, PRIM_PTR_INT__l_s_s_s);
427                         contract(55, shelloffsets, PRIM_INT__m_s_s_s, PRIM_PTR_INT__m_s_s_s);
428                         contract(66, shelloffsets, PRIM_INT__n_s_s_s, PRIM_PTR_INT__n_s_s_s);
429                         contract(78, shelloffsets, PRIM_INT__o_s_s_s, PRIM_PTR_INT__o_s_s_s);
430                         contract(91, shelloffsets, PRIM_INT__q_s_s_s, PRIM_PTR_INT__q_s_s_s);
431                         contract(105, shelloffsets, PRIM_INT__r_s_s_s, PRIM_PTR_INT__r_s_s_s);
432                         contract(120, shelloffsets, PRIM_INT__t_s_s_s, PRIM_PTR_INT__t_s_s_s);
433                         PRIM_PTR_INT__k_s_s_s += lastoffset*36;
434                         PRIM_PTR_INT__l_s_s_s += lastoffset*45;
435                         PRIM_PTR_INT__m_s_s_s += lastoffset*55;
436                         PRIM_PTR_INT__n_s_s_s += lastoffset*66;
437                         PRIM_PTR_INT__o_s_s_s += lastoffset*78;
438                         PRIM_PTR_INT__q_s_s_s += lastoffset*91;
439                         PRIM_PTR_INT__r_s_s_s += lastoffset*105;
440                         PRIM_PTR_INT__t_s_s_s += lastoffset*120;
441                     }
442 
443                 }  // close loop over j
444             }  // close loop over i
445 
446             //Advance to the next batch
447             jstart = SIMINT_SIMD_ROUND(jend);
448 
449             //////////////////////////////////////////////
450             // Contracted integrals: Horizontal recurrance
451             //////////////////////////////////////////////
452 
453 
454             const double hAB[3] = { P.AB_x[ab], P.AB_y[ab], P.AB_z[ab] };
455 
456 
457             for(abcd = 0; abcd < nshellbatch; ++abcd, ++real_abcd)
458             {
459 
460                 // set up HRR pointers
461                 double const * restrict HRR_INT__k_s_s_s = INT__k_s_s_s + abcd * 36;
462                 double const * restrict HRR_INT__l_s_s_s = INT__l_s_s_s + abcd * 45;
463                 double const * restrict HRR_INT__m_s_s_s = INT__m_s_s_s + abcd * 55;
464                 double const * restrict HRR_INT__n_s_s_s = INT__n_s_s_s + abcd * 66;
465                 double const * restrict HRR_INT__o_s_s_s = INT__o_s_s_s + abcd * 78;
466                 double const * restrict HRR_INT__q_s_s_s = INT__q_s_s_s + abcd * 91;
467                 double const * restrict HRR_INT__r_s_s_s = INT__r_s_s_s + abcd * 105;
468                 double const * restrict HRR_INT__t_s_s_s = INT__t_s_s_s + abcd * 120;
469                 double * restrict HRR_INT__k_k_s_s = INT__k_k_s_s + real_abcd * 1296;
470 
471                 // form INT__k_p_s_s
472                 ostei_general_hrr_J(7, 1, 0, 0, hAB, HRR_INT__l_s_s_s, HRR_INT__k_s_s_s, HRR_INT__k_p_s_s);
473 
474                 // form INT__l_p_s_s
475                 ostei_general_hrr_J(8, 1, 0, 0, hAB, HRR_INT__m_s_s_s, HRR_INT__l_s_s_s, HRR_INT__l_p_s_s);
476 
477                 // form INT__m_p_s_s
478                 ostei_general_hrr_J(9, 1, 0, 0, hAB, HRR_INT__n_s_s_s, HRR_INT__m_s_s_s, HRR_INT__m_p_s_s);
479 
480                 // form INT__n_p_s_s
481                 ostei_general_hrr_J(10, 1, 0, 0, hAB, HRR_INT__o_s_s_s, HRR_INT__n_s_s_s, HRR_INT__n_p_s_s);
482 
483                 // form INT__o_p_s_s
484                 ostei_general_hrr_J(11, 1, 0, 0, hAB, HRR_INT__q_s_s_s, HRR_INT__o_s_s_s, HRR_INT__o_p_s_s);
485 
486                 // form INT__q_p_s_s
487                 ostei_general_hrr_J(12, 1, 0, 0, hAB, HRR_INT__r_s_s_s, HRR_INT__q_s_s_s, HRR_INT__q_p_s_s);
488 
489                 // form INT__r_p_s_s
490                 ostei_general_hrr_J(13, 1, 0, 0, hAB, HRR_INT__t_s_s_s, HRR_INT__r_s_s_s, HRR_INT__r_p_s_s);
491 
492                 // form INT__k_d_s_s
493                 ostei_general_hrr_J(7, 2, 0, 0, hAB, HRR_INT__l_p_s_s, HRR_INT__k_p_s_s, HRR_INT__k_d_s_s);
494 
495                 // form INT__l_d_s_s
496                 ostei_general_hrr_J(8, 2, 0, 0, hAB, HRR_INT__m_p_s_s, HRR_INT__l_p_s_s, HRR_INT__l_d_s_s);
497 
498                 // form INT__m_d_s_s
499                 ostei_general_hrr_J(9, 2, 0, 0, hAB, HRR_INT__n_p_s_s, HRR_INT__m_p_s_s, HRR_INT__m_d_s_s);
500 
501                 // form INT__n_d_s_s
502                 ostei_general_hrr_J(10, 2, 0, 0, hAB, HRR_INT__o_p_s_s, HRR_INT__n_p_s_s, HRR_INT__n_d_s_s);
503 
504                 // form INT__o_d_s_s
505                 ostei_general_hrr_J(11, 2, 0, 0, hAB, HRR_INT__q_p_s_s, HRR_INT__o_p_s_s, HRR_INT__o_d_s_s);
506 
507                 // form INT__q_d_s_s
508                 ostei_general_hrr_J(12, 2, 0, 0, hAB, HRR_INT__r_p_s_s, HRR_INT__q_p_s_s, HRR_INT__q_d_s_s);
509 
510                 // form INT__k_f_s_s
511                 ostei_general_hrr_J(7, 3, 0, 0, hAB, HRR_INT__l_d_s_s, HRR_INT__k_d_s_s, HRR_INT__k_f_s_s);
512 
513                 // form INT__l_f_s_s
514                 ostei_general_hrr_J(8, 3, 0, 0, hAB, HRR_INT__m_d_s_s, HRR_INT__l_d_s_s, HRR_INT__l_f_s_s);
515 
516                 // form INT__m_f_s_s
517                 ostei_general_hrr_J(9, 3, 0, 0, hAB, HRR_INT__n_d_s_s, HRR_INT__m_d_s_s, HRR_INT__m_f_s_s);
518 
519                 // form INT__n_f_s_s
520                 ostei_general_hrr_J(10, 3, 0, 0, hAB, HRR_INT__o_d_s_s, HRR_INT__n_d_s_s, HRR_INT__n_f_s_s);
521 
522                 // form INT__o_f_s_s
523                 ostei_general_hrr_J(11, 3, 0, 0, hAB, HRR_INT__q_d_s_s, HRR_INT__o_d_s_s, HRR_INT__o_f_s_s);
524 
525                 // form INT__k_g_s_s
526                 ostei_general_hrr_J(7, 4, 0, 0, hAB, HRR_INT__l_f_s_s, HRR_INT__k_f_s_s, HRR_INT__k_g_s_s);
527 
528                 // form INT__l_g_s_s
529                 ostei_general_hrr_J(8, 4, 0, 0, hAB, HRR_INT__m_f_s_s, HRR_INT__l_f_s_s, HRR_INT__l_g_s_s);
530 
531                 // form INT__m_g_s_s
532                 ostei_general_hrr_J(9, 4, 0, 0, hAB, HRR_INT__n_f_s_s, HRR_INT__m_f_s_s, HRR_INT__m_g_s_s);
533 
534                 // form INT__n_g_s_s
535                 ostei_general_hrr_J(10, 4, 0, 0, hAB, HRR_INT__o_f_s_s, HRR_INT__n_f_s_s, HRR_INT__n_g_s_s);
536 
537                 // form INT__k_h_s_s
538                 ostei_general_hrr_J(7, 5, 0, 0, hAB, HRR_INT__l_g_s_s, HRR_INT__k_g_s_s, HRR_INT__k_h_s_s);
539 
540                 // form INT__l_h_s_s
541                 ostei_general_hrr_J(8, 5, 0, 0, hAB, HRR_INT__m_g_s_s, HRR_INT__l_g_s_s, HRR_INT__l_h_s_s);
542 
543                 // form INT__m_h_s_s
544                 ostei_general_hrr_J(9, 5, 0, 0, hAB, HRR_INT__n_g_s_s, HRR_INT__m_g_s_s, HRR_INT__m_h_s_s);
545 
546                 // form INT__k_i_s_s
547                 ostei_general_hrr_J(7, 6, 0, 0, hAB, HRR_INT__l_h_s_s, HRR_INT__k_h_s_s, HRR_INT__k_i_s_s);
548 
549                 // form INT__l_i_s_s
550                 ostei_general_hrr_J(8, 6, 0, 0, hAB, HRR_INT__m_h_s_s, HRR_INT__l_h_s_s, HRR_INT__l_i_s_s);
551 
552                 // form INT__k_k_s_s
553                 ostei_general_hrr_J(7, 7, 0, 0, hAB, HRR_INT__l_i_s_s, HRR_INT__k_i_s_s, HRR_INT__k_k_s_s);
554 
555 
556             }  // close HRR loop
557 
558 
559         }   // close loop cdbatch
560 
561         istart = iend;
562     }  // close loop over ab
563 
564     return P.nshell12_clip * Q.nshell12_clip;
565 }
566 
567