1 #include "simint/boys/boys.h"
2 #include "simint/ostei/gen/ostei_generated.h"
3 #include "simint/vectorization/vectorization.h"
4 #include <math.h>
5 #include <string.h>
6 
7 
ostei_h_d_f_s(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__h_d_f_s)8 int ostei_h_d_f_s(struct simint_multi_shellpair const P,
9                   struct simint_multi_shellpair const Q,
10                   double screen_tol,
11                   double * const restrict work,
12                   double * const restrict INT__h_d_f_s)
13 {
14 
15     SIMINT_ASSUME_ALIGN_DBL(work);
16     SIMINT_ASSUME_ALIGN_DBL(INT__h_d_f_s);
17     int ab, cd, abcd;
18     int istart, jstart;
19     int iprimcd, nprim_icd, icd;
20     const int check_screen = (screen_tol > 0.0);
21     int i, j;
22     int n;
23     int not_screened;
24     int real_abcd;
25     int iket;
26 
27     // partition workspace
28     double * const INT__h_s_f_s = work + (SIMINT_NSHELL_SIMD * 0);
29     double * const INT__i_s_f_s = work + (SIMINT_NSHELL_SIMD * 210);
30     double * const INT__k_s_f_s = work + (SIMINT_NSHELL_SIMD * 490);
31     SIMINT_DBLTYPE * const primwork = (SIMINT_DBLTYPE *)(work + SIMINT_NSHELL_SIMD*850);
32     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_s_s = primwork + 0;
33     SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_s_s = primwork + 11;
34     SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_s_s = primwork + 41;
35     SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_s_s = primwork + 95;
36     SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_p_s = primwork + 175;
37     SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_s_s = primwork + 265;
38     SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_p_s = primwork + 370;
39     SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_d_s = primwork + 505;
40     SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_s_s = primwork + 685;
41     SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_p_s = primwork + 811;
42     SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_d_s = primwork + 1000;
43     SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_f_s = primwork + 1252;
44     SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_s_s = primwork + 1462;
45     SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_p_s = primwork + 1602;
46     SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_d_s = primwork + 1854;
47     SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_f_s = primwork + 2190;
48     SIMINT_DBLTYPE * const restrict PRIM_INT__k_s_s_s = primwork + 2470;
49     SIMINT_DBLTYPE * const restrict PRIM_INT__k_s_p_s = primwork + 2614;
50     SIMINT_DBLTYPE * const restrict PRIM_INT__k_s_d_s = primwork + 2938;
51     SIMINT_DBLTYPE * const restrict PRIM_INT__k_s_f_s = primwork + 3370;
52     double * const hrrwork = (double *)(primwork + 3730);
53     double * const HRR_INT__h_p_f_s = hrrwork + 0;
54     double * const HRR_INT__i_p_f_s = hrrwork + 630;
55 
56 
57     // Create constants
58     const SIMINT_DBLTYPE const_1 = SIMINT_DBLSET1(1);
59     const SIMINT_DBLTYPE const_2 = SIMINT_DBLSET1(2);
60     const SIMINT_DBLTYPE const_3 = SIMINT_DBLSET1(3);
61     const SIMINT_DBLTYPE const_4 = SIMINT_DBLSET1(4);
62     const SIMINT_DBLTYPE const_5 = SIMINT_DBLSET1(5);
63     const SIMINT_DBLTYPE const_6 = SIMINT_DBLSET1(6);
64     const SIMINT_DBLTYPE const_7 = SIMINT_DBLSET1(7);
65     const SIMINT_DBLTYPE one_half = SIMINT_DBLSET1(0.5);
66 
67 
68     ////////////////////////////////////////
69     // Loop over shells and primitives
70     ////////////////////////////////////////
71 
72     real_abcd = 0;
73     istart = 0;
74     for(ab = 0; ab < P.nshell12_clip; ++ab)
75     {
76         const int iend = istart + P.nprim12[ab];
77 
78         cd = 0;
79         jstart = 0;
80 
81         for(cd = 0; cd < Q.nshell12_clip; cd += SIMINT_NSHELL_SIMD)
82         {
83             const int nshellbatch = ((cd + SIMINT_NSHELL_SIMD) > Q.nshell12_clip) ? Q.nshell12_clip - cd : SIMINT_NSHELL_SIMD;
84             int jend = jstart;
85             for(i = 0; i < nshellbatch; i++)
86                 jend += Q.nprim12[cd+i];
87 
88             // Clear the beginning of the workspace (where we are accumulating integrals)
89             memset(work, 0, SIMINT_NSHELL_SIMD * 850 * sizeof(double));
90             abcd = 0;
91 
92 
93             for(i = istart; i < iend; ++i)
94             {
95                 SIMINT_DBLTYPE bra_screen_max;  // only used if check_screen
96 
97                 if(check_screen)
98                 {
99                     // Skip this whole thing if always insignificant
100                     if((P.screen[i] * Q.screen_max) < screen_tol)
101                         continue;
102                     bra_screen_max = SIMINT_DBLSET1(P.screen[i]);
103                 }
104 
105                 icd = 0;
106                 iprimcd = 0;
107                 nprim_icd = Q.nprim12[cd];
108                 double * restrict PRIM_PTR_INT__h_s_f_s = INT__h_s_f_s + abcd * 210;
109                 double * restrict PRIM_PTR_INT__i_s_f_s = INT__i_s_f_s + abcd * 280;
110                 double * restrict PRIM_PTR_INT__k_s_f_s = INT__k_s_f_s + abcd * 360;
111 
112 
113 
114                 // Load these one per loop over i
115                 const SIMINT_DBLTYPE P_alpha = SIMINT_DBLSET1(P.alpha[i]);
116                 const SIMINT_DBLTYPE P_prefac = SIMINT_DBLSET1(P.prefac[i]);
117                 const SIMINT_DBLTYPE Pxyz[3] = { SIMINT_DBLSET1(P.x[i]), SIMINT_DBLSET1(P.y[i]), SIMINT_DBLSET1(P.z[i]) };
118 
119                 const SIMINT_DBLTYPE P_PA[3] = { SIMINT_DBLSET1(P.PA_x[i]), SIMINT_DBLSET1(P.PA_y[i]), SIMINT_DBLSET1(P.PA_z[i]) };
120 
121                 for(j = jstart; j < jend; j += SIMINT_SIMD_LEN)
122                 {
123                     // calculate the shell offsets
124                     // these are the offset from the shell pointed to by cd
125                     // for each element
126                     int shelloffsets[SIMINT_SIMD_LEN] = {0};
127                     int lastoffset = 0;
128                     const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j));
129 
130                     if((iprimcd + SIMINT_SIMD_LEN) >= nprim_icd)
131                     {
132                         // Handle if the first element of the vector is a new shell
133                         if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
134                         {
135                             nprim_icd += Q.nprim12[cd + (++icd)];
136                             PRIM_PTR_INT__h_s_f_s += 210;
137                             PRIM_PTR_INT__i_s_f_s += 280;
138                             PRIM_PTR_INT__k_s_f_s += 360;
139                         }
140                         iprimcd++;
141                         for(n = 1; n < SIMINT_SIMD_LEN; ++n)
142                         {
143                             if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
144                             {
145                                 shelloffsets[n] = shelloffsets[n-1] + 1;
146                                 lastoffset++;
147                                 nprim_icd += Q.nprim12[cd + (++icd)];
148                             }
149                             else
150                                 shelloffsets[n] = shelloffsets[n-1];
151                             iprimcd++;
152                         }
153                     }
154                     else
155                         iprimcd += SIMINT_SIMD_LEN;
156 
157                     // Do we have to compute this vector (or has it been screened out)?
158                     // (not_screened != 0 means we have to do this vector)
159                     if(check_screen)
160                     {
161                         const double vmax = vector_max(SIMINT_MUL(bra_screen_max, SIMINT_DBLLOAD(Q.screen, j)));
162                         if(vmax < screen_tol)
163                         {
164                             PRIM_PTR_INT__h_s_f_s += lastoffset*210;
165                             PRIM_PTR_INT__i_s_f_s += lastoffset*280;
166                             PRIM_PTR_INT__k_s_f_s += lastoffset*360;
167                             continue;
168                         }
169                     }
170 
171                     const SIMINT_DBLTYPE Q_alpha = SIMINT_DBLLOAD(Q.alpha, j);
172                     const SIMINT_DBLTYPE PQalpha_mul = SIMINT_MUL(P_alpha, Q_alpha);
173                     const SIMINT_DBLTYPE PQalpha_sum = SIMINT_ADD(P_alpha, Q_alpha);
174                     const SIMINT_DBLTYPE one_over_PQalpha_sum = SIMINT_DIV(const_1, PQalpha_sum);
175 
176 
177                     /* construct R2 = (Px - Qx)**2 + (Py - Qy)**2 + (Pz -Qz)**2 */
178                     SIMINT_DBLTYPE PQ[3];
179                     PQ[0] = SIMINT_SUB(Pxyz[0], SIMINT_DBLLOAD(Q.x, j));
180                     PQ[1] = SIMINT_SUB(Pxyz[1], SIMINT_DBLLOAD(Q.y, j));
181                     PQ[2] = SIMINT_SUB(Pxyz[2], SIMINT_DBLLOAD(Q.z, j));
182                     SIMINT_DBLTYPE R2 = SIMINT_MUL(PQ[0], PQ[0]);
183                     R2 = SIMINT_FMADD(PQ[1], PQ[1], R2);
184                     R2 = SIMINT_FMADD(PQ[2], PQ[2], R2);
185 
186                     const SIMINT_DBLTYPE alpha = SIMINT_MUL(PQalpha_mul, one_over_PQalpha_sum); // alpha from MEST
187                     const SIMINT_DBLTYPE one_over_p = SIMINT_DIV(const_1, P_alpha);
188                     const SIMINT_DBLTYPE one_over_q = SIMINT_DIV(const_1, Q_alpha);
189                     const SIMINT_DBLTYPE one_over_2p = SIMINT_MUL(one_half, one_over_p);
190                     const SIMINT_DBLTYPE one_over_2q = SIMINT_MUL(one_half, one_over_q);
191                     const SIMINT_DBLTYPE one_over_2pq = SIMINT_MUL(one_half, one_over_PQalpha_sum);
192                     const SIMINT_DBLTYPE Q_PA[3] = { SIMINT_DBLLOAD(Q.PA_x, j), SIMINT_DBLLOAD(Q.PA_y, j), SIMINT_DBLLOAD(Q.PA_z, j) };
193 
194                     // NOTE: Minus sign!
195                     const SIMINT_DBLTYPE a_over_p = SIMINT_MUL(SIMINT_NEG(alpha), one_over_p);
196                     SIMINT_DBLTYPE aop_PQ[3];
197                     aop_PQ[0] = SIMINT_MUL(a_over_p, PQ[0]);
198                     aop_PQ[1] = SIMINT_MUL(a_over_p, PQ[1]);
199                     aop_PQ[2] = SIMINT_MUL(a_over_p, PQ[2]);
200 
201                     SIMINT_DBLTYPE a_over_q = SIMINT_MUL(alpha, one_over_q);
202                     SIMINT_DBLTYPE aoq_PQ[3];
203                     aoq_PQ[0] = SIMINT_MUL(a_over_q, PQ[0]);
204                     aoq_PQ[1] = SIMINT_MUL(a_over_q, PQ[1]);
205                     aoq_PQ[2] = SIMINT_MUL(a_over_q, PQ[2]);
206                     // Put a minus sign here so we don't have to in RR routines
207                     a_over_q = SIMINT_NEG(a_over_q);
208 
209 
210                     //////////////////////////////////////////////
211                     // Fjt function section
212                     // Maximum v value: 10
213                     //////////////////////////////////////////////
214                     // The parameter to the Fjt function
215                     const SIMINT_DBLTYPE F_x = SIMINT_MUL(R2, alpha);
216 
217 
218                     const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j);
219 
220 
221                     boys_F_split(PRIM_INT__s_s_s_s, F_x, 10);
222                     SIMINT_DBLTYPE prefac = SIMINT_SQRT(one_over_PQalpha_sum);
223                     prefac = SIMINT_MUL(SIMINT_MUL(P_prefac, Q_prefac), prefac);
224                     for(n = 0; n <= 10; n++)
225                         PRIM_INT__s_s_s_s[n] = SIMINT_MUL(PRIM_INT__s_s_s_s[n], prefac);
226 
227                     //////////////////////////////////////////////
228                     // Primitive integrals: Vertical recurrance
229                     //////////////////////////////////////////////
230 
231                     const SIMINT_DBLTYPE vrr_const_1_over_2p = one_over_2p;
232                     const SIMINT_DBLTYPE vrr_const_2_over_2p = SIMINT_MUL(const_2, one_over_2p);
233                     const SIMINT_DBLTYPE vrr_const_3_over_2p = SIMINT_MUL(const_3, one_over_2p);
234                     const SIMINT_DBLTYPE vrr_const_4_over_2p = SIMINT_MUL(const_4, one_over_2p);
235                     const SIMINT_DBLTYPE vrr_const_5_over_2p = SIMINT_MUL(const_5, one_over_2p);
236                     const SIMINT_DBLTYPE vrr_const_6_over_2p = SIMINT_MUL(const_6, one_over_2p);
237                     const SIMINT_DBLTYPE vrr_const_1_over_2q = one_over_2q;
238                     const SIMINT_DBLTYPE vrr_const_2_over_2q = SIMINT_MUL(const_2, one_over_2q);
239                     const SIMINT_DBLTYPE vrr_const_1_over_2pq = one_over_2pq;
240                     const SIMINT_DBLTYPE vrr_const_2_over_2pq = SIMINT_MUL(const_2, one_over_2pq);
241                     const SIMINT_DBLTYPE vrr_const_3_over_2pq = SIMINT_MUL(const_3, one_over_2pq);
242                     const SIMINT_DBLTYPE vrr_const_4_over_2pq = SIMINT_MUL(const_4, one_over_2pq);
243                     const SIMINT_DBLTYPE vrr_const_5_over_2pq = SIMINT_MUL(const_5, one_over_2pq);
244                     const SIMINT_DBLTYPE vrr_const_6_over_2pq = SIMINT_MUL(const_6, one_over_2pq);
245                     const SIMINT_DBLTYPE vrr_const_7_over_2pq = SIMINT_MUL(const_7, one_over_2pq);
246 
247 
248 
249                     // Forming PRIM_INT__p_s_s_s[10 * 3];
250                     for(n = 0; n < 10; ++n)  // loop over orders of auxiliary function
251                     {
252 
253                         PRIM_INT__p_s_s_s[n * 3 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_s_s[n * 1 + 0]);
254                         PRIM_INT__p_s_s_s[n * 3 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]);
255 
256                         PRIM_INT__p_s_s_s[n * 3 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_s_s[n * 1 + 0]);
257                         PRIM_INT__p_s_s_s[n * 3 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 1]);
258 
259                         PRIM_INT__p_s_s_s[n * 3 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_s_s[n * 1 + 0]);
260                         PRIM_INT__p_s_s_s[n * 3 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 2]);
261 
262                     }
263 
264 
265 
266                     // Forming PRIM_INT__d_s_s_s[9 * 6];
267                     for(n = 0; n < 9; ++n)  // loop over orders of auxiliary function
268                     {
269 
270                         PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__p_s_s_s[n * 3 + 0]);
271                         PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 0]);
272                         PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 0]);
273 
274                         PRIM_INT__d_s_s_s[n * 6 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__p_s_s_s[n * 3 + 0]);
275                         PRIM_INT__d_s_s_s[n * 6 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 1]);
276 
277                         PRIM_INT__d_s_s_s[n * 6 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__p_s_s_s[n * 3 + 0]);
278                         PRIM_INT__d_s_s_s[n * 6 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 2]);
279 
280                         PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_MUL(P_PA[1], PRIM_INT__p_s_s_s[n * 3 + 1]);
281                         PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_s[n * 6 + 3]);
282                         PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 3]);
283 
284                         PRIM_INT__d_s_s_s[n * 6 + 4] = SIMINT_MUL(P_PA[2], PRIM_INT__p_s_s_s[n * 3 + 1]);
285                         PRIM_INT__d_s_s_s[n * 6 + 4] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_s[n * 6 + 4]);
286 
287                         PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_MUL(P_PA[2], PRIM_INT__p_s_s_s[n * 3 + 2]);
288                         PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__d_s_s_s[n * 6 + 5]);
289                         PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 5]);
290 
291                     }
292 
293 
294 
295                     // Forming PRIM_INT__f_s_s_s[8 * 10];
296                     for(n = 0; n < 8; ++n)  // loop over orders of auxiliary function
297                     {
298 
299                         PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 0]);
300                         PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 0]);
301                         PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]), PRIM_INT__f_s_s_s[n * 10 + 0]);
302 
303                         PRIM_INT__f_s_s_s[n * 10 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 0]);
304                         PRIM_INT__f_s_s_s[n * 10 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 1]);
305 
306                         PRIM_INT__f_s_s_s[n * 10 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 0]);
307                         PRIM_INT__f_s_s_s[n * 10 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 2]);
308 
309                         PRIM_INT__f_s_s_s[n * 10 + 3] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 3]);
310                         PRIM_INT__f_s_s_s[n * 10 + 3] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 3]);
311 
312                         PRIM_INT__f_s_s_s[n * 10 + 4] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 1]);
313                         PRIM_INT__f_s_s_s[n * 10 + 4] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 1], PRIM_INT__f_s_s_s[n * 10 + 4]);
314 
315                         PRIM_INT__f_s_s_s[n * 10 + 5] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 5]);
316                         PRIM_INT__f_s_s_s[n * 10 + 5] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 5]);
317 
318                         PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 3]);
319                         PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 6]);
320                         PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_s_s[n * 3 + 1]), PRIM_INT__f_s_s_s[n * 10 + 6]);
321 
322                         PRIM_INT__f_s_s_s[n * 10 + 7] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 3]);
323                         PRIM_INT__f_s_s_s[n * 10 + 7] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 7]);
324 
325                         PRIM_INT__f_s_s_s[n * 10 + 8] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 5]);
326                         PRIM_INT__f_s_s_s[n * 10 + 8] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 8]);
327 
328                         PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 5]);
329                         PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 9]);
330                         PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_s_s[n * 3 + 2]), PRIM_INT__f_s_s_s[n * 10 + 9]);
331 
332                     }
333 
334 
335                     VRR_I_g_s_s_s(
336                             PRIM_INT__g_s_s_s,
337                             PRIM_INT__f_s_s_s,
338                             PRIM_INT__d_s_s_s,
339                             P_PA,
340                             a_over_p,
341                             aop_PQ,
342                             one_over_2p,
343                             7);
344 
345 
346                     VRR_I_h_s_s_s(
347                             PRIM_INT__h_s_s_s,
348                             PRIM_INT__g_s_s_s,
349                             PRIM_INT__f_s_s_s,
350                             P_PA,
351                             a_over_p,
352                             aop_PQ,
353                             one_over_2p,
354                             6);
355 
356 
357                     ostei_general_vrr_K(5, 0, 1, 0, 3,
358                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
359                             PRIM_INT__h_s_s_s, NULL, NULL, PRIM_INT__g_s_s_s, NULL, PRIM_INT__h_s_p_s);
360 
361 
362                     VRR_K_g_s_p_s(
363                             PRIM_INT__g_s_p_s,
364                             PRIM_INT__g_s_s_s,
365                             PRIM_INT__f_s_s_s,
366                             Q_PA,
367                             aoq_PQ,
368                             one_over_2pq,
369                             3);
370 
371 
372                     ostei_general_vrr_K(5, 0, 2, 0, 2,
373                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
374                             PRIM_INT__h_s_p_s, PRIM_INT__h_s_s_s, NULL, PRIM_INT__g_s_p_s, NULL, PRIM_INT__h_s_d_s);
375 
376 
377                     VRR_K_f_s_p_s(
378                             PRIM_INT__f_s_p_s,
379                             PRIM_INT__f_s_s_s,
380                             PRIM_INT__d_s_s_s,
381                             Q_PA,
382                             aoq_PQ,
383                             one_over_2pq,
384                             3);
385 
386 
387                     ostei_general_vrr_K(4, 0, 2, 0, 2,
388                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
389                             PRIM_INT__g_s_p_s, PRIM_INT__g_s_s_s, NULL, PRIM_INT__f_s_p_s, NULL, PRIM_INT__g_s_d_s);
390 
391 
392                     ostei_general_vrr_K(5, 0, 3, 0, 1,
393                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
394                             PRIM_INT__h_s_d_s, PRIM_INT__h_s_p_s, NULL, PRIM_INT__g_s_d_s, NULL, PRIM_INT__h_s_f_s);
395 
396 
397                     ostei_general_vrr1_I(6, 5,
398                             one_over_2p, a_over_p, aop_PQ, P_PA,
399                             PRIM_INT__h_s_s_s, PRIM_INT__g_s_s_s, PRIM_INT__i_s_s_s);
400 
401 
402                     ostei_general_vrr_K(6, 0, 1, 0, 3,
403                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
404                             PRIM_INT__i_s_s_s, NULL, NULL, PRIM_INT__h_s_s_s, NULL, PRIM_INT__i_s_p_s);
405 
406 
407                     ostei_general_vrr_K(6, 0, 2, 0, 2,
408                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
409                             PRIM_INT__i_s_p_s, PRIM_INT__i_s_s_s, NULL, PRIM_INT__h_s_p_s, NULL, PRIM_INT__i_s_d_s);
410 
411 
412                     ostei_general_vrr_K(6, 0, 3, 0, 1,
413                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
414                             PRIM_INT__i_s_d_s, PRIM_INT__i_s_p_s, NULL, PRIM_INT__h_s_d_s, NULL, PRIM_INT__i_s_f_s);
415 
416 
417                     ostei_general_vrr1_I(7, 4,
418                             one_over_2p, a_over_p, aop_PQ, P_PA,
419                             PRIM_INT__i_s_s_s, PRIM_INT__h_s_s_s, PRIM_INT__k_s_s_s);
420 
421 
422                     ostei_general_vrr_K(7, 0, 1, 0, 3,
423                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
424                             PRIM_INT__k_s_s_s, NULL, NULL, PRIM_INT__i_s_s_s, NULL, PRIM_INT__k_s_p_s);
425 
426 
427                     ostei_general_vrr_K(7, 0, 2, 0, 2,
428                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
429                             PRIM_INT__k_s_p_s, PRIM_INT__k_s_s_s, NULL, PRIM_INT__i_s_p_s, NULL, PRIM_INT__k_s_d_s);
430 
431 
432                     ostei_general_vrr_K(7, 0, 3, 0, 1,
433                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
434                             PRIM_INT__k_s_d_s, PRIM_INT__k_s_p_s, NULL, PRIM_INT__i_s_d_s, NULL, PRIM_INT__k_s_f_s);
435 
436 
437 
438 
439                     ////////////////////////////////////
440                     // Accumulate contracted integrals
441                     ////////////////////////////////////
442                     if(lastoffset == 0)
443                     {
444                         contract_all(210, PRIM_INT__h_s_f_s, PRIM_PTR_INT__h_s_f_s);
445                         contract_all(280, PRIM_INT__i_s_f_s, PRIM_PTR_INT__i_s_f_s);
446                         contract_all(360, PRIM_INT__k_s_f_s, PRIM_PTR_INT__k_s_f_s);
447                     }
448                     else
449                     {
450                         contract(210, shelloffsets, PRIM_INT__h_s_f_s, PRIM_PTR_INT__h_s_f_s);
451                         contract(280, shelloffsets, PRIM_INT__i_s_f_s, PRIM_PTR_INT__i_s_f_s);
452                         contract(360, shelloffsets, PRIM_INT__k_s_f_s, PRIM_PTR_INT__k_s_f_s);
453                         PRIM_PTR_INT__h_s_f_s += lastoffset*210;
454                         PRIM_PTR_INT__i_s_f_s += lastoffset*280;
455                         PRIM_PTR_INT__k_s_f_s += lastoffset*360;
456                     }
457 
458                 }  // close loop over j
459             }  // close loop over i
460 
461             //Advance to the next batch
462             jstart = SIMINT_SIMD_ROUND(jend);
463 
464             //////////////////////////////////////////////
465             // Contracted integrals: Horizontal recurrance
466             //////////////////////////////////////////////
467 
468 
469             const double hAB[3] = { P.AB_x[ab], P.AB_y[ab], P.AB_z[ab] };
470 
471 
472             for(abcd = 0; abcd < nshellbatch; ++abcd, ++real_abcd)
473             {
474 
475                 // set up HRR pointers
476                 double const * restrict HRR_INT__h_s_f_s = INT__h_s_f_s + abcd * 210;
477                 double const * restrict HRR_INT__i_s_f_s = INT__i_s_f_s + abcd * 280;
478                 double const * restrict HRR_INT__k_s_f_s = INT__k_s_f_s + abcd * 360;
479                 double * restrict HRR_INT__h_d_f_s = INT__h_d_f_s + real_abcd * 1260;
480 
481                 // form INT__h_p_f_s
482                 ostei_general_hrr_J(5, 1, 3, 0, hAB, HRR_INT__i_s_f_s, HRR_INT__h_s_f_s, HRR_INT__h_p_f_s);
483 
484                 // form INT__i_p_f_s
485                 ostei_general_hrr_J(6, 1, 3, 0, hAB, HRR_INT__k_s_f_s, HRR_INT__i_s_f_s, HRR_INT__i_p_f_s);
486 
487                 // form INT__h_d_f_s
488                 ostei_general_hrr_J(5, 2, 3, 0, hAB, HRR_INT__i_p_f_s, HRR_INT__h_p_f_s, HRR_INT__h_d_f_s);
489 
490 
491             }  // close HRR loop
492 
493 
494         }   // close loop cdbatch
495 
496         istart = iend;
497     }  // close loop over ab
498 
499     return P.nshell12_clip * Q.nshell12_clip;
500 }
501 
ostei_d_h_f_s(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__d_h_f_s)502 int ostei_d_h_f_s(struct simint_multi_shellpair const P,
503                   struct simint_multi_shellpair const Q,
504                   double screen_tol,
505                   double * const restrict work,
506                   double * const restrict INT__d_h_f_s)
507 {
508     double P_AB[3*P.nshell12];
509     struct simint_multi_shellpair P_tmp = P;
510     P_tmp.PA_x = P.PB_x;  P_tmp.PA_y = P.PB_y;  P_tmp.PA_z = P.PB_z;
511     P_tmp.PB_x = P.PA_x;  P_tmp.PB_y = P.PA_y;  P_tmp.PB_z = P.PA_z;
512     P_tmp.AB_x = P_AB;
513     P_tmp.AB_y = P_AB + P.nshell12;
514     P_tmp.AB_z = P_AB + 2*P.nshell12;
515 
516     for(int i = 0; i < P.nshell12; i++)
517     {
518         P_tmp.AB_x[i] = -P.AB_x[i];
519         P_tmp.AB_y[i] = -P.AB_y[i];
520         P_tmp.AB_z[i] = -P.AB_z[i];
521     }
522 
523     int ret = ostei_h_d_f_s(P_tmp, Q, screen_tol, work, INT__d_h_f_s);
524     double buffer[1260] SIMINT_ALIGN_ARRAY_DBL;
525 
526     for(int q = 0; q < ret; q++)
527     {
528         int idx = 0;
529         for(int a = 0; a < 6; ++a)
530         for(int b = 0; b < 21; ++b)
531         for(int c = 0; c < 10; ++c)
532         for(int d = 0; d < 1; ++d)
533             buffer[idx++] = INT__d_h_f_s[q*1260+b*60+a*10+c*1+d];
534 
535         memcpy(INT__d_h_f_s+q*1260, buffer, 1260*sizeof(double));
536     }
537 
538     return ret;
539 }
540 
ostei_h_d_s_f(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__h_d_s_f)541 int ostei_h_d_s_f(struct simint_multi_shellpair const P,
542                   struct simint_multi_shellpair const Q,
543                   double screen_tol,
544                   double * const restrict work,
545                   double * const restrict INT__h_d_s_f)
546 {
547     double Q_AB[3*Q.nshell12];
548     struct simint_multi_shellpair Q_tmp = Q;
549     Q_tmp.PA_x = Q.PB_x;  Q_tmp.PA_y = Q.PB_y;  Q_tmp.PA_z = Q.PB_z;
550     Q_tmp.PB_x = Q.PA_x;  Q_tmp.PB_y = Q.PA_y;  Q_tmp.PB_z = Q.PA_z;
551     Q_tmp.AB_x = Q_AB;
552     Q_tmp.AB_y = Q_AB + Q.nshell12;
553     Q_tmp.AB_z = Q_AB + 2*Q.nshell12;
554 
555     for(int i = 0; i < Q.nshell12; i++)
556     {
557         Q_tmp.AB_x[i] = -Q.AB_x[i];
558         Q_tmp.AB_y[i] = -Q.AB_y[i];
559         Q_tmp.AB_z[i] = -Q.AB_z[i];
560     }
561 
562     int ret = ostei_h_d_f_s(P, Q_tmp, screen_tol, work, INT__h_d_s_f);
563     double buffer[1260] SIMINT_ALIGN_ARRAY_DBL;
564 
565     for(int q = 0; q < ret; q++)
566     {
567         int idx = 0;
568         for(int a = 0; a < 21; ++a)
569         for(int b = 0; b < 6; ++b)
570         for(int c = 0; c < 1; ++c)
571         for(int d = 0; d < 10; ++d)
572             buffer[idx++] = INT__h_d_s_f[q*1260+a*60+b*10+d*1+c];
573 
574         memcpy(INT__h_d_s_f+q*1260, buffer, 1260*sizeof(double));
575     }
576 
577     return ret;
578 }
579 
ostei_d_h_s_f(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__d_h_s_f)580 int ostei_d_h_s_f(struct simint_multi_shellpair const P,
581                   struct simint_multi_shellpair const Q,
582                   double screen_tol,
583                   double * const restrict work,
584                   double * const restrict INT__d_h_s_f)
585 {
586     double P_AB[3*P.nshell12];
587     struct simint_multi_shellpair P_tmp = P;
588     P_tmp.PA_x = P.PB_x;  P_tmp.PA_y = P.PB_y;  P_tmp.PA_z = P.PB_z;
589     P_tmp.PB_x = P.PA_x;  P_tmp.PB_y = P.PA_y;  P_tmp.PB_z = P.PA_z;
590     P_tmp.AB_x = P_AB;
591     P_tmp.AB_y = P_AB + P.nshell12;
592     P_tmp.AB_z = P_AB + 2*P.nshell12;
593 
594     for(int i = 0; i < P.nshell12; i++)
595     {
596         P_tmp.AB_x[i] = -P.AB_x[i];
597         P_tmp.AB_y[i] = -P.AB_y[i];
598         P_tmp.AB_z[i] = -P.AB_z[i];
599     }
600 
601     double Q_AB[3*Q.nshell12];
602     struct simint_multi_shellpair Q_tmp = Q;
603     Q_tmp.PA_x = Q.PB_x;  Q_tmp.PA_y = Q.PB_y;  Q_tmp.PA_z = Q.PB_z;
604     Q_tmp.PB_x = Q.PA_x;  Q_tmp.PB_y = Q.PA_y;  Q_tmp.PB_z = Q.PA_z;
605     Q_tmp.AB_x = Q_AB;
606     Q_tmp.AB_y = Q_AB + Q.nshell12;
607     Q_tmp.AB_z = Q_AB + 2*Q.nshell12;
608 
609     for(int i = 0; i < Q.nshell12; i++)
610     {
611         Q_tmp.AB_x[i] = -Q.AB_x[i];
612         Q_tmp.AB_y[i] = -Q.AB_y[i];
613         Q_tmp.AB_z[i] = -Q.AB_z[i];
614     }
615 
616     int ret = ostei_h_d_f_s(P_tmp, Q_tmp, screen_tol, work, INT__d_h_s_f);
617     double buffer[1260] SIMINT_ALIGN_ARRAY_DBL;
618 
619     for(int q = 0; q < ret; q++)
620     {
621         int idx = 0;
622         for(int a = 0; a < 6; ++a)
623         for(int b = 0; b < 21; ++b)
624         for(int c = 0; c < 1; ++c)
625         for(int d = 0; d < 10; ++d)
626             buffer[idx++] = INT__d_h_s_f[q*1260+b*60+a*10+d*1+c];
627 
628         memcpy(INT__d_h_s_f+q*1260, buffer, 1260*sizeof(double));
629     }
630 
631     return ret;
632 }
633 
634