1 #include "simint/boys/boys.h"
2 #include "simint/ostei/gen/ostei_generated.h"
3 #include "simint/vectorization/vectorization.h"
4 #include <math.h>
5 #include <string.h>
6 
7 
ostei_d_p_i_f(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__d_p_i_f)8 int ostei_d_p_i_f(struct simint_multi_shellpair const P,
9                   struct simint_multi_shellpair const Q,
10                   double screen_tol,
11                   double * const restrict work,
12                   double * const restrict INT__d_p_i_f)
13 {
14 
15     SIMINT_ASSUME_ALIGN_DBL(work);
16     SIMINT_ASSUME_ALIGN_DBL(INT__d_p_i_f);
17     int ab, cd, abcd;
18     int istart, jstart;
19     int iprimcd, nprim_icd, icd;
20     const int check_screen = (screen_tol > 0.0);
21     int i, j;
22     int n;
23     int not_screened;
24     int real_abcd;
25     int iket;
26     int ibra;
27 
28     // partition workspace
29     double * const INT__d_s_i_s = work + (SIMINT_NSHELL_SIMD * 0);
30     double * const INT__d_s_k_s = work + (SIMINT_NSHELL_SIMD * 168);
31     double * const INT__d_s_l_s = work + (SIMINT_NSHELL_SIMD * 384);
32     double * const INT__d_s_m_s = work + (SIMINT_NSHELL_SIMD * 654);
33     double * const INT__f_s_i_s = work + (SIMINT_NSHELL_SIMD * 984);
34     double * const INT__f_s_k_s = work + (SIMINT_NSHELL_SIMD * 1264);
35     double * const INT__f_s_l_s = work + (SIMINT_NSHELL_SIMD * 1624);
36     double * const INT__f_s_m_s = work + (SIMINT_NSHELL_SIMD * 2074);
37     SIMINT_DBLTYPE * const primwork = (SIMINT_DBLTYPE *)(work + SIMINT_NSHELL_SIMD*2624);
38     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_s_s = primwork + 0;
39     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_p_s = primwork + 13;
40     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_d_s = primwork + 49;
41     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_f_s = primwork + 115;
42     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_g_s = primwork + 215;
43     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_h_s = primwork + 350;
44     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_i_s = primwork + 518;
45     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_k_s = primwork + 714;
46     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_l_s = primwork + 930;
47     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_m_s = primwork + 1155;
48     SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_g_s = primwork + 1375;
49     SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_h_s = primwork + 1510;
50     SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_i_s = primwork + 1699;
51     SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_k_s = primwork + 1951;
52     SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_l_s = primwork + 2275;
53     SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_m_s = primwork + 2680;
54     SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_h_s = primwork + 3175;
55     SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_i_s = primwork + 3427;
56     SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_k_s = primwork + 3763;
57     SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_l_s = primwork + 4195;
58     SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_m_s = primwork + 4735;
59     SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_i_s = primwork + 5395;
60     SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_k_s = primwork + 5675;
61     SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_l_s = primwork + 6035;
62     SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_m_s = primwork + 6485;
63     double * const hrrwork = (double *)(primwork + 7035);
64     double * const HRR_INT__d_p_i_s = hrrwork + 0;
65     double * const HRR_INT__d_p_i_p = hrrwork + 504;
66     double * const HRR_INT__d_p_i_d = hrrwork + 2016;
67     double * const HRR_INT__d_p_k_s = hrrwork + 5040;
68     double * const HRR_INT__d_p_k_p = hrrwork + 5688;
69     double * const HRR_INT__d_p_k_d = hrrwork + 7632;
70     double * const HRR_INT__d_p_l_s = hrrwork + 11520;
71     double * const HRR_INT__d_p_l_p = hrrwork + 12330;
72     double * const HRR_INT__d_p_m_s = hrrwork + 14760;
73 
74 
75     // Create constants
76     const SIMINT_DBLTYPE const_1 = SIMINT_DBLSET1(1);
77     const SIMINT_DBLTYPE const_2 = SIMINT_DBLSET1(2);
78     const SIMINT_DBLTYPE const_3 = SIMINT_DBLSET1(3);
79     const SIMINT_DBLTYPE const_4 = SIMINT_DBLSET1(4);
80     const SIMINT_DBLTYPE const_5 = SIMINT_DBLSET1(5);
81     const SIMINT_DBLTYPE const_6 = SIMINT_DBLSET1(6);
82     const SIMINT_DBLTYPE const_7 = SIMINT_DBLSET1(7);
83     const SIMINT_DBLTYPE const_8 = SIMINT_DBLSET1(8);
84     const SIMINT_DBLTYPE const_9 = SIMINT_DBLSET1(9);
85     const SIMINT_DBLTYPE one_half = SIMINT_DBLSET1(0.5);
86 
87 
88     ////////////////////////////////////////
89     // Loop over shells and primitives
90     ////////////////////////////////////////
91 
92     real_abcd = 0;
93     istart = 0;
94     for(ab = 0; ab < P.nshell12_clip; ++ab)
95     {
96         const int iend = istart + P.nprim12[ab];
97 
98         cd = 0;
99         jstart = 0;
100 
101         for(cd = 0; cd < Q.nshell12_clip; cd += SIMINT_NSHELL_SIMD)
102         {
103             const int nshellbatch = ((cd + SIMINT_NSHELL_SIMD) > Q.nshell12_clip) ? Q.nshell12_clip - cd : SIMINT_NSHELL_SIMD;
104             int jend = jstart;
105             for(i = 0; i < nshellbatch; i++)
106                 jend += Q.nprim12[cd+i];
107 
108             // Clear the beginning of the workspace (where we are accumulating integrals)
109             memset(work, 0, SIMINT_NSHELL_SIMD * 2624 * sizeof(double));
110             abcd = 0;
111 
112 
113             for(i = istart; i < iend; ++i)
114             {
115                 SIMINT_DBLTYPE bra_screen_max;  // only used if check_screen
116 
117                 if(check_screen)
118                 {
119                     // Skip this whole thing if always insignificant
120                     if((P.screen[i] * Q.screen_max) < screen_tol)
121                         continue;
122                     bra_screen_max = SIMINT_DBLSET1(P.screen[i]);
123                 }
124 
125                 icd = 0;
126                 iprimcd = 0;
127                 nprim_icd = Q.nprim12[cd];
128                 double * restrict PRIM_PTR_INT__d_s_i_s = INT__d_s_i_s + abcd * 168;
129                 double * restrict PRIM_PTR_INT__d_s_k_s = INT__d_s_k_s + abcd * 216;
130                 double * restrict PRIM_PTR_INT__d_s_l_s = INT__d_s_l_s + abcd * 270;
131                 double * restrict PRIM_PTR_INT__d_s_m_s = INT__d_s_m_s + abcd * 330;
132                 double * restrict PRIM_PTR_INT__f_s_i_s = INT__f_s_i_s + abcd * 280;
133                 double * restrict PRIM_PTR_INT__f_s_k_s = INT__f_s_k_s + abcd * 360;
134                 double * restrict PRIM_PTR_INT__f_s_l_s = INT__f_s_l_s + abcd * 450;
135                 double * restrict PRIM_PTR_INT__f_s_m_s = INT__f_s_m_s + abcd * 550;
136 
137 
138 
139                 // Load these one per loop over i
140                 const SIMINT_DBLTYPE P_alpha = SIMINT_DBLSET1(P.alpha[i]);
141                 const SIMINT_DBLTYPE P_prefac = SIMINT_DBLSET1(P.prefac[i]);
142                 const SIMINT_DBLTYPE Pxyz[3] = { SIMINT_DBLSET1(P.x[i]), SIMINT_DBLSET1(P.y[i]), SIMINT_DBLSET1(P.z[i]) };
143 
144                 const SIMINT_DBLTYPE P_PA[3] = { SIMINT_DBLSET1(P.PA_x[i]), SIMINT_DBLSET1(P.PA_y[i]), SIMINT_DBLSET1(P.PA_z[i]) };
145 
146                 for(j = jstart; j < jend; j += SIMINT_SIMD_LEN)
147                 {
148                     // calculate the shell offsets
149                     // these are the offset from the shell pointed to by cd
150                     // for each element
151                     int shelloffsets[SIMINT_SIMD_LEN] = {0};
152                     int lastoffset = 0;
153                     const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j));
154 
155                     if((iprimcd + SIMINT_SIMD_LEN) >= nprim_icd)
156                     {
157                         // Handle if the first element of the vector is a new shell
158                         if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
159                         {
160                             nprim_icd += Q.nprim12[cd + (++icd)];
161                             PRIM_PTR_INT__d_s_i_s += 168;
162                             PRIM_PTR_INT__d_s_k_s += 216;
163                             PRIM_PTR_INT__d_s_l_s += 270;
164                             PRIM_PTR_INT__d_s_m_s += 330;
165                             PRIM_PTR_INT__f_s_i_s += 280;
166                             PRIM_PTR_INT__f_s_k_s += 360;
167                             PRIM_PTR_INT__f_s_l_s += 450;
168                             PRIM_PTR_INT__f_s_m_s += 550;
169                         }
170                         iprimcd++;
171                         for(n = 1; n < SIMINT_SIMD_LEN; ++n)
172                         {
173                             if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
174                             {
175                                 shelloffsets[n] = shelloffsets[n-1] + 1;
176                                 lastoffset++;
177                                 nprim_icd += Q.nprim12[cd + (++icd)];
178                             }
179                             else
180                                 shelloffsets[n] = shelloffsets[n-1];
181                             iprimcd++;
182                         }
183                     }
184                     else
185                         iprimcd += SIMINT_SIMD_LEN;
186 
187                     // Do we have to compute this vector (or has it been screened out)?
188                     // (not_screened != 0 means we have to do this vector)
189                     if(check_screen)
190                     {
191                         const double vmax = vector_max(SIMINT_MUL(bra_screen_max, SIMINT_DBLLOAD(Q.screen, j)));
192                         if(vmax < screen_tol)
193                         {
194                             PRIM_PTR_INT__d_s_i_s += lastoffset*168;
195                             PRIM_PTR_INT__d_s_k_s += lastoffset*216;
196                             PRIM_PTR_INT__d_s_l_s += lastoffset*270;
197                             PRIM_PTR_INT__d_s_m_s += lastoffset*330;
198                             PRIM_PTR_INT__f_s_i_s += lastoffset*280;
199                             PRIM_PTR_INT__f_s_k_s += lastoffset*360;
200                             PRIM_PTR_INT__f_s_l_s += lastoffset*450;
201                             PRIM_PTR_INT__f_s_m_s += lastoffset*550;
202                             continue;
203                         }
204                     }
205 
206                     const SIMINT_DBLTYPE Q_alpha = SIMINT_DBLLOAD(Q.alpha, j);
207                     const SIMINT_DBLTYPE PQalpha_mul = SIMINT_MUL(P_alpha, Q_alpha);
208                     const SIMINT_DBLTYPE PQalpha_sum = SIMINT_ADD(P_alpha, Q_alpha);
209                     const SIMINT_DBLTYPE one_over_PQalpha_sum = SIMINT_DIV(const_1, PQalpha_sum);
210 
211 
212                     /* construct R2 = (Px - Qx)**2 + (Py - Qy)**2 + (Pz -Qz)**2 */
213                     SIMINT_DBLTYPE PQ[3];
214                     PQ[0] = SIMINT_SUB(Pxyz[0], SIMINT_DBLLOAD(Q.x, j));
215                     PQ[1] = SIMINT_SUB(Pxyz[1], SIMINT_DBLLOAD(Q.y, j));
216                     PQ[2] = SIMINT_SUB(Pxyz[2], SIMINT_DBLLOAD(Q.z, j));
217                     SIMINT_DBLTYPE R2 = SIMINT_MUL(PQ[0], PQ[0]);
218                     R2 = SIMINT_FMADD(PQ[1], PQ[1], R2);
219                     R2 = SIMINT_FMADD(PQ[2], PQ[2], R2);
220 
221                     const SIMINT_DBLTYPE alpha = SIMINT_MUL(PQalpha_mul, one_over_PQalpha_sum); // alpha from MEST
222                     const SIMINT_DBLTYPE one_over_p = SIMINT_DIV(const_1, P_alpha);
223                     const SIMINT_DBLTYPE one_over_q = SIMINT_DIV(const_1, Q_alpha);
224                     const SIMINT_DBLTYPE one_over_2p = SIMINT_MUL(one_half, one_over_p);
225                     const SIMINT_DBLTYPE one_over_2q = SIMINT_MUL(one_half, one_over_q);
226                     const SIMINT_DBLTYPE one_over_2pq = SIMINT_MUL(one_half, one_over_PQalpha_sum);
227                     const SIMINT_DBLTYPE Q_PA[3] = { SIMINT_DBLLOAD(Q.PA_x, j), SIMINT_DBLLOAD(Q.PA_y, j), SIMINT_DBLLOAD(Q.PA_z, j) };
228 
229                     // NOTE: Minus sign!
230                     const SIMINT_DBLTYPE a_over_p = SIMINT_MUL(SIMINT_NEG(alpha), one_over_p);
231                     SIMINT_DBLTYPE aop_PQ[3];
232                     aop_PQ[0] = SIMINT_MUL(a_over_p, PQ[0]);
233                     aop_PQ[1] = SIMINT_MUL(a_over_p, PQ[1]);
234                     aop_PQ[2] = SIMINT_MUL(a_over_p, PQ[2]);
235 
236                     SIMINT_DBLTYPE a_over_q = SIMINT_MUL(alpha, one_over_q);
237                     SIMINT_DBLTYPE aoq_PQ[3];
238                     aoq_PQ[0] = SIMINT_MUL(a_over_q, PQ[0]);
239                     aoq_PQ[1] = SIMINT_MUL(a_over_q, PQ[1]);
240                     aoq_PQ[2] = SIMINT_MUL(a_over_q, PQ[2]);
241                     // Put a minus sign here so we don't have to in RR routines
242                     a_over_q = SIMINT_NEG(a_over_q);
243 
244 
245                     //////////////////////////////////////////////
246                     // Fjt function section
247                     // Maximum v value: 12
248                     //////////////////////////////////////////////
249                     // The parameter to the Fjt function
250                     const SIMINT_DBLTYPE F_x = SIMINT_MUL(R2, alpha);
251 
252 
253                     const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j);
254 
255 
256                     boys_F_split(PRIM_INT__s_s_s_s, F_x, 12);
257                     SIMINT_DBLTYPE prefac = SIMINT_SQRT(one_over_PQalpha_sum);
258                     prefac = SIMINT_MUL(SIMINT_MUL(P_prefac, Q_prefac), prefac);
259                     for(n = 0; n <= 12; n++)
260                         PRIM_INT__s_s_s_s[n] = SIMINT_MUL(PRIM_INT__s_s_s_s[n], prefac);
261 
262                     //////////////////////////////////////////////
263                     // Primitive integrals: Vertical recurrance
264                     //////////////////////////////////////////////
265 
266                     const SIMINT_DBLTYPE vrr_const_1_over_2p = one_over_2p;
267                     const SIMINT_DBLTYPE vrr_const_2_over_2p = SIMINT_MUL(const_2, one_over_2p);
268                     const SIMINT_DBLTYPE vrr_const_1_over_2q = one_over_2q;
269                     const SIMINT_DBLTYPE vrr_const_2_over_2q = SIMINT_MUL(const_2, one_over_2q);
270                     const SIMINT_DBLTYPE vrr_const_3_over_2q = SIMINT_MUL(const_3, one_over_2q);
271                     const SIMINT_DBLTYPE vrr_const_4_over_2q = SIMINT_MUL(const_4, one_over_2q);
272                     const SIMINT_DBLTYPE vrr_const_5_over_2q = SIMINT_MUL(const_5, one_over_2q);
273                     const SIMINT_DBLTYPE vrr_const_6_over_2q = SIMINT_MUL(const_6, one_over_2q);
274                     const SIMINT_DBLTYPE vrr_const_7_over_2q = SIMINT_MUL(const_7, one_over_2q);
275                     const SIMINT_DBLTYPE vrr_const_8_over_2q = SIMINT_MUL(const_8, one_over_2q);
276                     const SIMINT_DBLTYPE vrr_const_1_over_2pq = one_over_2pq;
277                     const SIMINT_DBLTYPE vrr_const_2_over_2pq = SIMINT_MUL(const_2, one_over_2pq);
278                     const SIMINT_DBLTYPE vrr_const_3_over_2pq = SIMINT_MUL(const_3, one_over_2pq);
279                     const SIMINT_DBLTYPE vrr_const_4_over_2pq = SIMINT_MUL(const_4, one_over_2pq);
280                     const SIMINT_DBLTYPE vrr_const_5_over_2pq = SIMINT_MUL(const_5, one_over_2pq);
281                     const SIMINT_DBLTYPE vrr_const_6_over_2pq = SIMINT_MUL(const_6, one_over_2pq);
282                     const SIMINT_DBLTYPE vrr_const_7_over_2pq = SIMINT_MUL(const_7, one_over_2pq);
283                     const SIMINT_DBLTYPE vrr_const_8_over_2pq = SIMINT_MUL(const_8, one_over_2pq);
284                     const SIMINT_DBLTYPE vrr_const_9_over_2pq = SIMINT_MUL(const_9, one_over_2pq);
285 
286 
287 
288                     // Forming PRIM_INT__s_s_p_s[12 * 3];
289                     for(n = 0; n < 12; ++n)  // loop over orders of auxiliary function
290                     {
291 
292                         PRIM_INT__s_s_p_s[n * 3 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_s_s[n * 1 + 0]);
293                         PRIM_INT__s_s_p_s[n * 3 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 0]);
294 
295                         PRIM_INT__s_s_p_s[n * 3 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_s_s[n * 1 + 0]);
296                         PRIM_INT__s_s_p_s[n * 3 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 1]);
297 
298                         PRIM_INT__s_s_p_s[n * 3 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_s_s[n * 1 + 0]);
299                         PRIM_INT__s_s_p_s[n * 3 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 2]);
300 
301                     }
302 
303 
304 
305                     // Forming PRIM_INT__s_s_d_s[11 * 6];
306                     for(n = 0; n < 11; ++n)  // loop over orders of auxiliary function
307                     {
308 
309                         PRIM_INT__s_s_d_s[n * 6 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_p_s[n * 3 + 0]);
310                         PRIM_INT__s_s_d_s[n * 6 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_d_s[n * 6 + 0]);
311                         PRIM_INT__s_s_d_s[n * 6 + 0] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_d_s[n * 6 + 0]);
312 
313                         PRIM_INT__s_s_d_s[n * 6 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_p_s[n * 3 + 0]);
314                         PRIM_INT__s_s_d_s[n * 6 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_d_s[n * 6 + 1]);
315 
316                         PRIM_INT__s_s_d_s[n * 6 + 3] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_p_s[n * 3 + 1]);
317                         PRIM_INT__s_s_d_s[n * 6 + 3] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__s_s_d_s[n * 6 + 3]);
318                         PRIM_INT__s_s_d_s[n * 6 + 3] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_d_s[n * 6 + 3]);
319 
320                         PRIM_INT__s_s_d_s[n * 6 + 5] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_p_s[n * 3 + 2]);
321                         PRIM_INT__s_s_d_s[n * 6 + 5] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__s_s_d_s[n * 6 + 5]);
322                         PRIM_INT__s_s_d_s[n * 6 + 5] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_d_s[n * 6 + 5]);
323 
324                     }
325 
326 
327 
328                     // Forming PRIM_INT__s_s_f_s[10 * 10];
329                     for(n = 0; n < 10; ++n)  // loop over orders of auxiliary function
330                     {
331 
332                         PRIM_INT__s_s_f_s[n * 10 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_d_s[n * 6 + 0]);
333                         PRIM_INT__s_s_f_s[n * 10 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__s_s_f_s[n * 10 + 0]);
334                         PRIM_INT__s_s_f_s[n * 10 + 0] = SIMINT_FMADD( vrr_const_2_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_p_s[n * 3 + 0]), PRIM_INT__s_s_f_s[n * 10 + 0]);
335 
336                         PRIM_INT__s_s_f_s[n * 10 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_d_s[n * 6 + 0]);
337                         PRIM_INT__s_s_f_s[n * 10 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__s_s_f_s[n * 10 + 1]);
338 
339                         PRIM_INT__s_s_f_s[n * 10 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 0]);
340                         PRIM_INT__s_s_f_s[n * 10 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__s_s_f_s[n * 10 + 2]);
341 
342                         PRIM_INT__s_s_f_s[n * 10 + 3] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_d_s[n * 6 + 3]);
343                         PRIM_INT__s_s_f_s[n * 10 + 3] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__s_s_f_s[n * 10 + 3]);
344 
345                         PRIM_INT__s_s_f_s[n * 10 + 4] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 1]);
346                         PRIM_INT__s_s_f_s[n * 10 + 4] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 1], PRIM_INT__s_s_f_s[n * 10 + 4]);
347 
348                         PRIM_INT__s_s_f_s[n * 10 + 5] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_d_s[n * 6 + 5]);
349                         PRIM_INT__s_s_f_s[n * 10 + 5] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__s_s_f_s[n * 10 + 5]);
350 
351                         PRIM_INT__s_s_f_s[n * 10 + 6] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_d_s[n * 6 + 3]);
352                         PRIM_INT__s_s_f_s[n * 10 + 6] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__s_s_f_s[n * 10 + 6]);
353                         PRIM_INT__s_s_f_s[n * 10 + 6] = SIMINT_FMADD( vrr_const_2_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__s_s_p_s[n * 3 + 1]), PRIM_INT__s_s_f_s[n * 10 + 6]);
354 
355                         PRIM_INT__s_s_f_s[n * 10 + 7] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 3]);
356                         PRIM_INT__s_s_f_s[n * 10 + 7] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__s_s_f_s[n * 10 + 7]);
357 
358                         PRIM_INT__s_s_f_s[n * 10 + 8] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_d_s[n * 6 + 5]);
359                         PRIM_INT__s_s_f_s[n * 10 + 8] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__s_s_f_s[n * 10 + 8]);
360 
361                         PRIM_INT__s_s_f_s[n * 10 + 9] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 5]);
362                         PRIM_INT__s_s_f_s[n * 10 + 9] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__s_s_f_s[n * 10 + 9]);
363                         PRIM_INT__s_s_f_s[n * 10 + 9] = SIMINT_FMADD( vrr_const_2_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__s_s_p_s[n * 3 + 2]), PRIM_INT__s_s_f_s[n * 10 + 9]);
364 
365                     }
366 
367 
368                     VRR_K_s_s_g_s(
369                             PRIM_INT__s_s_g_s,
370                             PRIM_INT__s_s_f_s,
371                             PRIM_INT__s_s_d_s,
372                             Q_PA,
373                             a_over_q,
374                             aoq_PQ,
375                             one_over_2q,
376                             9);
377 
378 
379                     VRR_K_s_s_h_s(
380                             PRIM_INT__s_s_h_s,
381                             PRIM_INT__s_s_g_s,
382                             PRIM_INT__s_s_f_s,
383                             Q_PA,
384                             a_over_q,
385                             aoq_PQ,
386                             one_over_2q,
387                             8);
388 
389 
390                     ostei_general_vrr1_K(6, 7,
391                             one_over_2q, a_over_q, aoq_PQ, Q_PA,
392                             PRIM_INT__s_s_h_s, PRIM_INT__s_s_g_s, PRIM_INT__s_s_i_s);
393 
394 
395                     ostei_general_vrr_I(1, 0, 6, 0, 3,
396                             one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
397                             PRIM_INT__s_s_i_s, NULL, NULL, PRIM_INT__s_s_h_s, NULL, PRIM_INT__p_s_i_s);
398 
399 
400                     ostei_general_vrr_I(1, 0, 5, 0, 3,
401                             one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
402                             PRIM_INT__s_s_h_s, NULL, NULL, PRIM_INT__s_s_g_s, NULL, PRIM_INT__p_s_h_s);
403 
404 
405                     ostei_general_vrr_I(2, 0, 6, 0, 2,
406                             one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
407                             PRIM_INT__p_s_i_s, PRIM_INT__s_s_i_s, NULL, PRIM_INT__p_s_h_s, NULL, PRIM_INT__d_s_i_s);
408 
409 
410                     ostei_general_vrr1_K(7, 6,
411                             one_over_2q, a_over_q, aoq_PQ, Q_PA,
412                             PRIM_INT__s_s_i_s, PRIM_INT__s_s_h_s, PRIM_INT__s_s_k_s);
413 
414 
415                     ostei_general_vrr_I(1, 0, 7, 0, 3,
416                             one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
417                             PRIM_INT__s_s_k_s, NULL, NULL, PRIM_INT__s_s_i_s, NULL, PRIM_INT__p_s_k_s);
418 
419 
420                     ostei_general_vrr_I(2, 0, 7, 0, 2,
421                             one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
422                             PRIM_INT__p_s_k_s, PRIM_INT__s_s_k_s, NULL, PRIM_INT__p_s_i_s, NULL, PRIM_INT__d_s_k_s);
423 
424 
425                     VRR_I_p_s_g_s(
426                             PRIM_INT__p_s_g_s,
427                             PRIM_INT__s_s_g_s,
428                             PRIM_INT__s_s_f_s,
429                             P_PA,
430                             aop_PQ,
431                             one_over_2pq,
432                             3);
433 
434 
435                     ostei_general_vrr_I(2, 0, 5, 0, 2,
436                             one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
437                             PRIM_INT__p_s_h_s, PRIM_INT__s_s_h_s, NULL, PRIM_INT__p_s_g_s, NULL, PRIM_INT__d_s_h_s);
438 
439 
440                     ostei_general_vrr_I(3, 0, 6, 0, 1,
441                             one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
442                             PRIM_INT__d_s_i_s, PRIM_INT__p_s_i_s, NULL, PRIM_INT__d_s_h_s, NULL, PRIM_INT__f_s_i_s);
443 
444 
445                     ostei_general_vrr1_K(8, 5,
446                             one_over_2q, a_over_q, aoq_PQ, Q_PA,
447                             PRIM_INT__s_s_k_s, PRIM_INT__s_s_i_s, PRIM_INT__s_s_l_s);
448 
449 
450                     ostei_general_vrr_I(1, 0, 8, 0, 3,
451                             one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
452                             PRIM_INT__s_s_l_s, NULL, NULL, PRIM_INT__s_s_k_s, NULL, PRIM_INT__p_s_l_s);
453 
454 
455                     ostei_general_vrr_I(2, 0, 8, 0, 2,
456                             one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
457                             PRIM_INT__p_s_l_s, PRIM_INT__s_s_l_s, NULL, PRIM_INT__p_s_k_s, NULL, PRIM_INT__d_s_l_s);
458 
459 
460                     ostei_general_vrr_I(3, 0, 7, 0, 1,
461                             one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
462                             PRIM_INT__d_s_k_s, PRIM_INT__p_s_k_s, NULL, PRIM_INT__d_s_i_s, NULL, PRIM_INT__f_s_k_s);
463 
464 
465                     ostei_general_vrr1_K(9, 4,
466                             one_over_2q, a_over_q, aoq_PQ, Q_PA,
467                             PRIM_INT__s_s_l_s, PRIM_INT__s_s_k_s, PRIM_INT__s_s_m_s);
468 
469 
470                     ostei_general_vrr_I(1, 0, 9, 0, 3,
471                             one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
472                             PRIM_INT__s_s_m_s, NULL, NULL, PRIM_INT__s_s_l_s, NULL, PRIM_INT__p_s_m_s);
473 
474 
475                     ostei_general_vrr_I(2, 0, 9, 0, 2,
476                             one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
477                             PRIM_INT__p_s_m_s, PRIM_INT__s_s_m_s, NULL, PRIM_INT__p_s_l_s, NULL, PRIM_INT__d_s_m_s);
478 
479 
480                     ostei_general_vrr_I(3, 0, 8, 0, 1,
481                             one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
482                             PRIM_INT__d_s_l_s, PRIM_INT__p_s_l_s, NULL, PRIM_INT__d_s_k_s, NULL, PRIM_INT__f_s_l_s);
483 
484 
485                     ostei_general_vrr_I(3, 0, 9, 0, 1,
486                             one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
487                             PRIM_INT__d_s_m_s, PRIM_INT__p_s_m_s, NULL, PRIM_INT__d_s_l_s, NULL, PRIM_INT__f_s_m_s);
488 
489 
490 
491 
492                     ////////////////////////////////////
493                     // Accumulate contracted integrals
494                     ////////////////////////////////////
495                     if(lastoffset == 0)
496                     {
497                         contract_all(168, PRIM_INT__d_s_i_s, PRIM_PTR_INT__d_s_i_s);
498                         contract_all(216, PRIM_INT__d_s_k_s, PRIM_PTR_INT__d_s_k_s);
499                         contract_all(270, PRIM_INT__d_s_l_s, PRIM_PTR_INT__d_s_l_s);
500                         contract_all(330, PRIM_INT__d_s_m_s, PRIM_PTR_INT__d_s_m_s);
501                         contract_all(280, PRIM_INT__f_s_i_s, PRIM_PTR_INT__f_s_i_s);
502                         contract_all(360, PRIM_INT__f_s_k_s, PRIM_PTR_INT__f_s_k_s);
503                         contract_all(450, PRIM_INT__f_s_l_s, PRIM_PTR_INT__f_s_l_s);
504                         contract_all(550, PRIM_INT__f_s_m_s, PRIM_PTR_INT__f_s_m_s);
505                     }
506                     else
507                     {
508                         contract(168, shelloffsets, PRIM_INT__d_s_i_s, PRIM_PTR_INT__d_s_i_s);
509                         contract(216, shelloffsets, PRIM_INT__d_s_k_s, PRIM_PTR_INT__d_s_k_s);
510                         contract(270, shelloffsets, PRIM_INT__d_s_l_s, PRIM_PTR_INT__d_s_l_s);
511                         contract(330, shelloffsets, PRIM_INT__d_s_m_s, PRIM_PTR_INT__d_s_m_s);
512                         contract(280, shelloffsets, PRIM_INT__f_s_i_s, PRIM_PTR_INT__f_s_i_s);
513                         contract(360, shelloffsets, PRIM_INT__f_s_k_s, PRIM_PTR_INT__f_s_k_s);
514                         contract(450, shelloffsets, PRIM_INT__f_s_l_s, PRIM_PTR_INT__f_s_l_s);
515                         contract(550, shelloffsets, PRIM_INT__f_s_m_s, PRIM_PTR_INT__f_s_m_s);
516                         PRIM_PTR_INT__d_s_i_s += lastoffset*168;
517                         PRIM_PTR_INT__d_s_k_s += lastoffset*216;
518                         PRIM_PTR_INT__d_s_l_s += lastoffset*270;
519                         PRIM_PTR_INT__d_s_m_s += lastoffset*330;
520                         PRIM_PTR_INT__f_s_i_s += lastoffset*280;
521                         PRIM_PTR_INT__f_s_k_s += lastoffset*360;
522                         PRIM_PTR_INT__f_s_l_s += lastoffset*450;
523                         PRIM_PTR_INT__f_s_m_s += lastoffset*550;
524                     }
525 
526                 }  // close loop over j
527             }  // close loop over i
528 
529             //Advance to the next batch
530             jstart = SIMINT_SIMD_ROUND(jend);
531 
532             //////////////////////////////////////////////
533             // Contracted integrals: Horizontal recurrance
534             //////////////////////////////////////////////
535 
536 
537             const double hAB[3] = { P.AB_x[ab], P.AB_y[ab], P.AB_z[ab] };
538 
539 
540             for(abcd = 0; abcd < nshellbatch; ++abcd, ++real_abcd)
541             {
542                 const double hCD[3] = { Q.AB_x[cd+abcd], Q.AB_y[cd+abcd], Q.AB_z[cd+abcd] };
543 
544                 // set up HRR pointers
545                 double const * restrict HRR_INT__d_s_i_s = INT__d_s_i_s + abcd * 168;
546                 double const * restrict HRR_INT__d_s_k_s = INT__d_s_k_s + abcd * 216;
547                 double const * restrict HRR_INT__d_s_l_s = INT__d_s_l_s + abcd * 270;
548                 double const * restrict HRR_INT__d_s_m_s = INT__d_s_m_s + abcd * 330;
549                 double const * restrict HRR_INT__f_s_i_s = INT__f_s_i_s + abcd * 280;
550                 double const * restrict HRR_INT__f_s_k_s = INT__f_s_k_s + abcd * 360;
551                 double const * restrict HRR_INT__f_s_l_s = INT__f_s_l_s + abcd * 450;
552                 double const * restrict HRR_INT__f_s_m_s = INT__f_s_m_s + abcd * 550;
553                 double * restrict HRR_INT__d_p_i_f = INT__d_p_i_f + real_abcd * 5040;
554 
555                 // form INT__d_p_i_s
556                 for(iket = 0; iket < 28; ++iket)
557                 {
558                     HRR_INT__d_p_i_s[0 * 28 + iket] = HRR_INT__f_s_i_s[0 * 28 + iket] + ( hAB[0] * HRR_INT__d_s_i_s[0 * 28 + iket] );
559 
560                     HRR_INT__d_p_i_s[1 * 28 + iket] = HRR_INT__f_s_i_s[1 * 28 + iket] + ( hAB[1] * HRR_INT__d_s_i_s[0 * 28 + iket] );
561 
562                     HRR_INT__d_p_i_s[2 * 28 + iket] = HRR_INT__f_s_i_s[2 * 28 + iket] + ( hAB[2] * HRR_INT__d_s_i_s[0 * 28 + iket] );
563 
564                     HRR_INT__d_p_i_s[3 * 28 + iket] = HRR_INT__f_s_i_s[1 * 28 + iket] + ( hAB[0] * HRR_INT__d_s_i_s[1 * 28 + iket] );
565 
566                     HRR_INT__d_p_i_s[4 * 28 + iket] = HRR_INT__f_s_i_s[3 * 28 + iket] + ( hAB[1] * HRR_INT__d_s_i_s[1 * 28 + iket] );
567 
568                     HRR_INT__d_p_i_s[5 * 28 + iket] = HRR_INT__f_s_i_s[4 * 28 + iket] + ( hAB[2] * HRR_INT__d_s_i_s[1 * 28 + iket] );
569 
570                     HRR_INT__d_p_i_s[6 * 28 + iket] = HRR_INT__f_s_i_s[2 * 28 + iket] + ( hAB[0] * HRR_INT__d_s_i_s[2 * 28 + iket] );
571 
572                     HRR_INT__d_p_i_s[7 * 28 + iket] = HRR_INT__f_s_i_s[4 * 28 + iket] + ( hAB[1] * HRR_INT__d_s_i_s[2 * 28 + iket] );
573 
574                     HRR_INT__d_p_i_s[8 * 28 + iket] = HRR_INT__f_s_i_s[5 * 28 + iket] + ( hAB[2] * HRR_INT__d_s_i_s[2 * 28 + iket] );
575 
576                     HRR_INT__d_p_i_s[9 * 28 + iket] = HRR_INT__f_s_i_s[3 * 28 + iket] + ( hAB[0] * HRR_INT__d_s_i_s[3 * 28 + iket] );
577 
578                     HRR_INT__d_p_i_s[10 * 28 + iket] = HRR_INT__f_s_i_s[6 * 28 + iket] + ( hAB[1] * HRR_INT__d_s_i_s[3 * 28 + iket] );
579 
580                     HRR_INT__d_p_i_s[11 * 28 + iket] = HRR_INT__f_s_i_s[7 * 28 + iket] + ( hAB[2] * HRR_INT__d_s_i_s[3 * 28 + iket] );
581 
582                     HRR_INT__d_p_i_s[12 * 28 + iket] = HRR_INT__f_s_i_s[4 * 28 + iket] + ( hAB[0] * HRR_INT__d_s_i_s[4 * 28 + iket] );
583 
584                     HRR_INT__d_p_i_s[13 * 28 + iket] = HRR_INT__f_s_i_s[7 * 28 + iket] + ( hAB[1] * HRR_INT__d_s_i_s[4 * 28 + iket] );
585 
586                     HRR_INT__d_p_i_s[14 * 28 + iket] = HRR_INT__f_s_i_s[8 * 28 + iket] + ( hAB[2] * HRR_INT__d_s_i_s[4 * 28 + iket] );
587 
588                     HRR_INT__d_p_i_s[15 * 28 + iket] = HRR_INT__f_s_i_s[5 * 28 + iket] + ( hAB[0] * HRR_INT__d_s_i_s[5 * 28 + iket] );
589 
590                     HRR_INT__d_p_i_s[16 * 28 + iket] = HRR_INT__f_s_i_s[8 * 28 + iket] + ( hAB[1] * HRR_INT__d_s_i_s[5 * 28 + iket] );
591 
592                     HRR_INT__d_p_i_s[17 * 28 + iket] = HRR_INT__f_s_i_s[9 * 28 + iket] + ( hAB[2] * HRR_INT__d_s_i_s[5 * 28 + iket] );
593 
594                 }
595 
596 
597                 // form INT__d_p_k_s
598                 for(iket = 0; iket < 36; ++iket)
599                 {
600                     HRR_INT__d_p_k_s[0 * 36 + iket] = HRR_INT__f_s_k_s[0 * 36 + iket] + ( hAB[0] * HRR_INT__d_s_k_s[0 * 36 + iket] );
601 
602                     HRR_INT__d_p_k_s[1 * 36 + iket] = HRR_INT__f_s_k_s[1 * 36 + iket] + ( hAB[1] * HRR_INT__d_s_k_s[0 * 36 + iket] );
603 
604                     HRR_INT__d_p_k_s[2 * 36 + iket] = HRR_INT__f_s_k_s[2 * 36 + iket] + ( hAB[2] * HRR_INT__d_s_k_s[0 * 36 + iket] );
605 
606                     HRR_INT__d_p_k_s[3 * 36 + iket] = HRR_INT__f_s_k_s[1 * 36 + iket] + ( hAB[0] * HRR_INT__d_s_k_s[1 * 36 + iket] );
607 
608                     HRR_INT__d_p_k_s[4 * 36 + iket] = HRR_INT__f_s_k_s[3 * 36 + iket] + ( hAB[1] * HRR_INT__d_s_k_s[1 * 36 + iket] );
609 
610                     HRR_INT__d_p_k_s[5 * 36 + iket] = HRR_INT__f_s_k_s[4 * 36 + iket] + ( hAB[2] * HRR_INT__d_s_k_s[1 * 36 + iket] );
611 
612                     HRR_INT__d_p_k_s[6 * 36 + iket] = HRR_INT__f_s_k_s[2 * 36 + iket] + ( hAB[0] * HRR_INT__d_s_k_s[2 * 36 + iket] );
613 
614                     HRR_INT__d_p_k_s[7 * 36 + iket] = HRR_INT__f_s_k_s[4 * 36 + iket] + ( hAB[1] * HRR_INT__d_s_k_s[2 * 36 + iket] );
615 
616                     HRR_INT__d_p_k_s[8 * 36 + iket] = HRR_INT__f_s_k_s[5 * 36 + iket] + ( hAB[2] * HRR_INT__d_s_k_s[2 * 36 + iket] );
617 
618                     HRR_INT__d_p_k_s[9 * 36 + iket] = HRR_INT__f_s_k_s[3 * 36 + iket] + ( hAB[0] * HRR_INT__d_s_k_s[3 * 36 + iket] );
619 
620                     HRR_INT__d_p_k_s[10 * 36 + iket] = HRR_INT__f_s_k_s[6 * 36 + iket] + ( hAB[1] * HRR_INT__d_s_k_s[3 * 36 + iket] );
621 
622                     HRR_INT__d_p_k_s[11 * 36 + iket] = HRR_INT__f_s_k_s[7 * 36 + iket] + ( hAB[2] * HRR_INT__d_s_k_s[3 * 36 + iket] );
623 
624                     HRR_INT__d_p_k_s[12 * 36 + iket] = HRR_INT__f_s_k_s[4 * 36 + iket] + ( hAB[0] * HRR_INT__d_s_k_s[4 * 36 + iket] );
625 
626                     HRR_INT__d_p_k_s[13 * 36 + iket] = HRR_INT__f_s_k_s[7 * 36 + iket] + ( hAB[1] * HRR_INT__d_s_k_s[4 * 36 + iket] );
627 
628                     HRR_INT__d_p_k_s[14 * 36 + iket] = HRR_INT__f_s_k_s[8 * 36 + iket] + ( hAB[2] * HRR_INT__d_s_k_s[4 * 36 + iket] );
629 
630                     HRR_INT__d_p_k_s[15 * 36 + iket] = HRR_INT__f_s_k_s[5 * 36 + iket] + ( hAB[0] * HRR_INT__d_s_k_s[5 * 36 + iket] );
631 
632                     HRR_INT__d_p_k_s[16 * 36 + iket] = HRR_INT__f_s_k_s[8 * 36 + iket] + ( hAB[1] * HRR_INT__d_s_k_s[5 * 36 + iket] );
633 
634                     HRR_INT__d_p_k_s[17 * 36 + iket] = HRR_INT__f_s_k_s[9 * 36 + iket] + ( hAB[2] * HRR_INT__d_s_k_s[5 * 36 + iket] );
635 
636                 }
637 
638 
639                 // form INT__d_p_l_s
640                 for(iket = 0; iket < 45; ++iket)
641                 {
642                     HRR_INT__d_p_l_s[0 * 45 + iket] = HRR_INT__f_s_l_s[0 * 45 + iket] + ( hAB[0] * HRR_INT__d_s_l_s[0 * 45 + iket] );
643 
644                     HRR_INT__d_p_l_s[1 * 45 + iket] = HRR_INT__f_s_l_s[1 * 45 + iket] + ( hAB[1] * HRR_INT__d_s_l_s[0 * 45 + iket] );
645 
646                     HRR_INT__d_p_l_s[2 * 45 + iket] = HRR_INT__f_s_l_s[2 * 45 + iket] + ( hAB[2] * HRR_INT__d_s_l_s[0 * 45 + iket] );
647 
648                     HRR_INT__d_p_l_s[3 * 45 + iket] = HRR_INT__f_s_l_s[1 * 45 + iket] + ( hAB[0] * HRR_INT__d_s_l_s[1 * 45 + iket] );
649 
650                     HRR_INT__d_p_l_s[4 * 45 + iket] = HRR_INT__f_s_l_s[3 * 45 + iket] + ( hAB[1] * HRR_INT__d_s_l_s[1 * 45 + iket] );
651 
652                     HRR_INT__d_p_l_s[5 * 45 + iket] = HRR_INT__f_s_l_s[4 * 45 + iket] + ( hAB[2] * HRR_INT__d_s_l_s[1 * 45 + iket] );
653 
654                     HRR_INT__d_p_l_s[6 * 45 + iket] = HRR_INT__f_s_l_s[2 * 45 + iket] + ( hAB[0] * HRR_INT__d_s_l_s[2 * 45 + iket] );
655 
656                     HRR_INT__d_p_l_s[7 * 45 + iket] = HRR_INT__f_s_l_s[4 * 45 + iket] + ( hAB[1] * HRR_INT__d_s_l_s[2 * 45 + iket] );
657 
658                     HRR_INT__d_p_l_s[8 * 45 + iket] = HRR_INT__f_s_l_s[5 * 45 + iket] + ( hAB[2] * HRR_INT__d_s_l_s[2 * 45 + iket] );
659 
660                     HRR_INT__d_p_l_s[9 * 45 + iket] = HRR_INT__f_s_l_s[3 * 45 + iket] + ( hAB[0] * HRR_INT__d_s_l_s[3 * 45 + iket] );
661 
662                     HRR_INT__d_p_l_s[10 * 45 + iket] = HRR_INT__f_s_l_s[6 * 45 + iket] + ( hAB[1] * HRR_INT__d_s_l_s[3 * 45 + iket] );
663 
664                     HRR_INT__d_p_l_s[11 * 45 + iket] = HRR_INT__f_s_l_s[7 * 45 + iket] + ( hAB[2] * HRR_INT__d_s_l_s[3 * 45 + iket] );
665 
666                     HRR_INT__d_p_l_s[12 * 45 + iket] = HRR_INT__f_s_l_s[4 * 45 + iket] + ( hAB[0] * HRR_INT__d_s_l_s[4 * 45 + iket] );
667 
668                     HRR_INT__d_p_l_s[13 * 45 + iket] = HRR_INT__f_s_l_s[7 * 45 + iket] + ( hAB[1] * HRR_INT__d_s_l_s[4 * 45 + iket] );
669 
670                     HRR_INT__d_p_l_s[14 * 45 + iket] = HRR_INT__f_s_l_s[8 * 45 + iket] + ( hAB[2] * HRR_INT__d_s_l_s[4 * 45 + iket] );
671 
672                     HRR_INT__d_p_l_s[15 * 45 + iket] = HRR_INT__f_s_l_s[5 * 45 + iket] + ( hAB[0] * HRR_INT__d_s_l_s[5 * 45 + iket] );
673 
674                     HRR_INT__d_p_l_s[16 * 45 + iket] = HRR_INT__f_s_l_s[8 * 45 + iket] + ( hAB[1] * HRR_INT__d_s_l_s[5 * 45 + iket] );
675 
676                     HRR_INT__d_p_l_s[17 * 45 + iket] = HRR_INT__f_s_l_s[9 * 45 + iket] + ( hAB[2] * HRR_INT__d_s_l_s[5 * 45 + iket] );
677 
678                 }
679 
680 
681                 // form INT__d_p_m_s
682                 for(iket = 0; iket < 55; ++iket)
683                 {
684                     HRR_INT__d_p_m_s[0 * 55 + iket] = HRR_INT__f_s_m_s[0 * 55 + iket] + ( hAB[0] * HRR_INT__d_s_m_s[0 * 55 + iket] );
685 
686                     HRR_INT__d_p_m_s[1 * 55 + iket] = HRR_INT__f_s_m_s[1 * 55 + iket] + ( hAB[1] * HRR_INT__d_s_m_s[0 * 55 + iket] );
687 
688                     HRR_INT__d_p_m_s[2 * 55 + iket] = HRR_INT__f_s_m_s[2 * 55 + iket] + ( hAB[2] * HRR_INT__d_s_m_s[0 * 55 + iket] );
689 
690                     HRR_INT__d_p_m_s[3 * 55 + iket] = HRR_INT__f_s_m_s[1 * 55 + iket] + ( hAB[0] * HRR_INT__d_s_m_s[1 * 55 + iket] );
691 
692                     HRR_INT__d_p_m_s[4 * 55 + iket] = HRR_INT__f_s_m_s[3 * 55 + iket] + ( hAB[1] * HRR_INT__d_s_m_s[1 * 55 + iket] );
693 
694                     HRR_INT__d_p_m_s[5 * 55 + iket] = HRR_INT__f_s_m_s[4 * 55 + iket] + ( hAB[2] * HRR_INT__d_s_m_s[1 * 55 + iket] );
695 
696                     HRR_INT__d_p_m_s[6 * 55 + iket] = HRR_INT__f_s_m_s[2 * 55 + iket] + ( hAB[0] * HRR_INT__d_s_m_s[2 * 55 + iket] );
697 
698                     HRR_INT__d_p_m_s[7 * 55 + iket] = HRR_INT__f_s_m_s[4 * 55 + iket] + ( hAB[1] * HRR_INT__d_s_m_s[2 * 55 + iket] );
699 
700                     HRR_INT__d_p_m_s[8 * 55 + iket] = HRR_INT__f_s_m_s[5 * 55 + iket] + ( hAB[2] * HRR_INT__d_s_m_s[2 * 55 + iket] );
701 
702                     HRR_INT__d_p_m_s[9 * 55 + iket] = HRR_INT__f_s_m_s[3 * 55 + iket] + ( hAB[0] * HRR_INT__d_s_m_s[3 * 55 + iket] );
703 
704                     HRR_INT__d_p_m_s[10 * 55 + iket] = HRR_INT__f_s_m_s[6 * 55 + iket] + ( hAB[1] * HRR_INT__d_s_m_s[3 * 55 + iket] );
705 
706                     HRR_INT__d_p_m_s[11 * 55 + iket] = HRR_INT__f_s_m_s[7 * 55 + iket] + ( hAB[2] * HRR_INT__d_s_m_s[3 * 55 + iket] );
707 
708                     HRR_INT__d_p_m_s[12 * 55 + iket] = HRR_INT__f_s_m_s[4 * 55 + iket] + ( hAB[0] * HRR_INT__d_s_m_s[4 * 55 + iket] );
709 
710                     HRR_INT__d_p_m_s[13 * 55 + iket] = HRR_INT__f_s_m_s[7 * 55 + iket] + ( hAB[1] * HRR_INT__d_s_m_s[4 * 55 + iket] );
711 
712                     HRR_INT__d_p_m_s[14 * 55 + iket] = HRR_INT__f_s_m_s[8 * 55 + iket] + ( hAB[2] * HRR_INT__d_s_m_s[4 * 55 + iket] );
713 
714                     HRR_INT__d_p_m_s[15 * 55 + iket] = HRR_INT__f_s_m_s[5 * 55 + iket] + ( hAB[0] * HRR_INT__d_s_m_s[5 * 55 + iket] );
715 
716                     HRR_INT__d_p_m_s[16 * 55 + iket] = HRR_INT__f_s_m_s[8 * 55 + iket] + ( hAB[1] * HRR_INT__d_s_m_s[5 * 55 + iket] );
717 
718                     HRR_INT__d_p_m_s[17 * 55 + iket] = HRR_INT__f_s_m_s[9 * 55 + iket] + ( hAB[2] * HRR_INT__d_s_m_s[5 * 55 + iket] );
719 
720                 }
721 
722 
723                 // form INT__d_p_i_p
724                 ostei_general_hrr_L(2, 1, 6, 1, hCD, HRR_INT__d_p_k_s, HRR_INT__d_p_i_s, HRR_INT__d_p_i_p);
725 
726                 // form INT__d_p_k_p
727                 ostei_general_hrr_L(2, 1, 7, 1, hCD, HRR_INT__d_p_l_s, HRR_INT__d_p_k_s, HRR_INT__d_p_k_p);
728 
729                 // form INT__d_p_l_p
730                 ostei_general_hrr_L(2, 1, 8, 1, hCD, HRR_INT__d_p_m_s, HRR_INT__d_p_l_s, HRR_INT__d_p_l_p);
731 
732                 // form INT__d_p_i_d
733                 ostei_general_hrr_L(2, 1, 6, 2, hCD, HRR_INT__d_p_k_p, HRR_INT__d_p_i_p, HRR_INT__d_p_i_d);
734 
735                 // form INT__d_p_k_d
736                 ostei_general_hrr_L(2, 1, 7, 2, hCD, HRR_INT__d_p_l_p, HRR_INT__d_p_k_p, HRR_INT__d_p_k_d);
737 
738                 // form INT__d_p_i_f
739                 ostei_general_hrr_L(2, 1, 6, 3, hCD, HRR_INT__d_p_k_d, HRR_INT__d_p_i_d, HRR_INT__d_p_i_f);
740 
741 
742             }  // close HRR loop
743 
744 
745         }   // close loop cdbatch
746 
747         istart = iend;
748     }  // close loop over ab
749 
750     return P.nshell12_clip * Q.nshell12_clip;
751 }
752 
ostei_p_d_i_f(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__p_d_i_f)753 int ostei_p_d_i_f(struct simint_multi_shellpair const P,
754                   struct simint_multi_shellpair const Q,
755                   double screen_tol,
756                   double * const restrict work,
757                   double * const restrict INT__p_d_i_f)
758 {
759     double P_AB[3*P.nshell12];
760     struct simint_multi_shellpair P_tmp = P;
761     P_tmp.PA_x = P.PB_x;  P_tmp.PA_y = P.PB_y;  P_tmp.PA_z = P.PB_z;
762     P_tmp.PB_x = P.PA_x;  P_tmp.PB_y = P.PA_y;  P_tmp.PB_z = P.PA_z;
763     P_tmp.AB_x = P_AB;
764     P_tmp.AB_y = P_AB + P.nshell12;
765     P_tmp.AB_z = P_AB + 2*P.nshell12;
766 
767     for(int i = 0; i < P.nshell12; i++)
768     {
769         P_tmp.AB_x[i] = -P.AB_x[i];
770         P_tmp.AB_y[i] = -P.AB_y[i];
771         P_tmp.AB_z[i] = -P.AB_z[i];
772     }
773 
774     int ret = ostei_d_p_i_f(P_tmp, Q, screen_tol, work, INT__p_d_i_f);
775     double buffer[5040] SIMINT_ALIGN_ARRAY_DBL;
776 
777     for(int q = 0; q < ret; q++)
778     {
779         int idx = 0;
780         for(int a = 0; a < 3; ++a)
781         for(int b = 0; b < 6; ++b)
782         for(int c = 0; c < 28; ++c)
783         for(int d = 0; d < 10; ++d)
784             buffer[idx++] = INT__p_d_i_f[q*5040+b*840+a*280+c*10+d];
785 
786         memcpy(INT__p_d_i_f+q*5040, buffer, 5040*sizeof(double));
787     }
788 
789     return ret;
790 }
791 
ostei_d_p_f_i(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__d_p_f_i)792 int ostei_d_p_f_i(struct simint_multi_shellpair const P,
793                   struct simint_multi_shellpair const Q,
794                   double screen_tol,
795                   double * const restrict work,
796                   double * const restrict INT__d_p_f_i)
797 {
798     double Q_AB[3*Q.nshell12];
799     struct simint_multi_shellpair Q_tmp = Q;
800     Q_tmp.PA_x = Q.PB_x;  Q_tmp.PA_y = Q.PB_y;  Q_tmp.PA_z = Q.PB_z;
801     Q_tmp.PB_x = Q.PA_x;  Q_tmp.PB_y = Q.PA_y;  Q_tmp.PB_z = Q.PA_z;
802     Q_tmp.AB_x = Q_AB;
803     Q_tmp.AB_y = Q_AB + Q.nshell12;
804     Q_tmp.AB_z = Q_AB + 2*Q.nshell12;
805 
806     for(int i = 0; i < Q.nshell12; i++)
807     {
808         Q_tmp.AB_x[i] = -Q.AB_x[i];
809         Q_tmp.AB_y[i] = -Q.AB_y[i];
810         Q_tmp.AB_z[i] = -Q.AB_z[i];
811     }
812 
813     int ret = ostei_d_p_i_f(P, Q_tmp, screen_tol, work, INT__d_p_f_i);
814     double buffer[5040] SIMINT_ALIGN_ARRAY_DBL;
815 
816     for(int q = 0; q < ret; q++)
817     {
818         int idx = 0;
819         for(int a = 0; a < 6; ++a)
820         for(int b = 0; b < 3; ++b)
821         for(int c = 0; c < 10; ++c)
822         for(int d = 0; d < 28; ++d)
823             buffer[idx++] = INT__d_p_f_i[q*5040+a*840+b*280+d*10+c];
824 
825         memcpy(INT__d_p_f_i+q*5040, buffer, 5040*sizeof(double));
826     }
827 
828     return ret;
829 }
830 
ostei_p_d_f_i(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__p_d_f_i)831 int ostei_p_d_f_i(struct simint_multi_shellpair const P,
832                   struct simint_multi_shellpair const Q,
833                   double screen_tol,
834                   double * const restrict work,
835                   double * const restrict INT__p_d_f_i)
836 {
837     double P_AB[3*P.nshell12];
838     struct simint_multi_shellpair P_tmp = P;
839     P_tmp.PA_x = P.PB_x;  P_tmp.PA_y = P.PB_y;  P_tmp.PA_z = P.PB_z;
840     P_tmp.PB_x = P.PA_x;  P_tmp.PB_y = P.PA_y;  P_tmp.PB_z = P.PA_z;
841     P_tmp.AB_x = P_AB;
842     P_tmp.AB_y = P_AB + P.nshell12;
843     P_tmp.AB_z = P_AB + 2*P.nshell12;
844 
845     for(int i = 0; i < P.nshell12; i++)
846     {
847         P_tmp.AB_x[i] = -P.AB_x[i];
848         P_tmp.AB_y[i] = -P.AB_y[i];
849         P_tmp.AB_z[i] = -P.AB_z[i];
850     }
851 
852     double Q_AB[3*Q.nshell12];
853     struct simint_multi_shellpair Q_tmp = Q;
854     Q_tmp.PA_x = Q.PB_x;  Q_tmp.PA_y = Q.PB_y;  Q_tmp.PA_z = Q.PB_z;
855     Q_tmp.PB_x = Q.PA_x;  Q_tmp.PB_y = Q.PA_y;  Q_tmp.PB_z = Q.PA_z;
856     Q_tmp.AB_x = Q_AB;
857     Q_tmp.AB_y = Q_AB + Q.nshell12;
858     Q_tmp.AB_z = Q_AB + 2*Q.nshell12;
859 
860     for(int i = 0; i < Q.nshell12; i++)
861     {
862         Q_tmp.AB_x[i] = -Q.AB_x[i];
863         Q_tmp.AB_y[i] = -Q.AB_y[i];
864         Q_tmp.AB_z[i] = -Q.AB_z[i];
865     }
866 
867     int ret = ostei_d_p_i_f(P_tmp, Q_tmp, screen_tol, work, INT__p_d_f_i);
868     double buffer[5040] SIMINT_ALIGN_ARRAY_DBL;
869 
870     for(int q = 0; q < ret; q++)
871     {
872         int idx = 0;
873         for(int a = 0; a < 3; ++a)
874         for(int b = 0; b < 6; ++b)
875         for(int c = 0; c < 10; ++c)
876         for(int d = 0; d < 28; ++d)
877             buffer[idx++] = INT__p_d_f_i[q*5040+b*840+a*280+d*10+c];
878 
879         memcpy(INT__p_d_f_i+q*5040, buffer, 5040*sizeof(double));
880     }
881 
882     return ret;
883 }
884 
885