1 #include "simint/boys/boys.h"
2 #include "simint/ostei/gen/ostei_generated.h"
3 #include "simint/vectorization/vectorization.h"
4 #include <math.h>
5 #include <string.h>
6
7
ostei_h_f_f_s(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__h_f_f_s)8 int ostei_h_f_f_s(struct simint_multi_shellpair const P,
9 struct simint_multi_shellpair const Q,
10 double screen_tol,
11 double * const restrict work,
12 double * const restrict INT__h_f_f_s)
13 {
14
15 SIMINT_ASSUME_ALIGN_DBL(work);
16 SIMINT_ASSUME_ALIGN_DBL(INT__h_f_f_s);
17 int ab, cd, abcd;
18 int istart, jstart;
19 int iprimcd, nprim_icd, icd;
20 const int check_screen = (screen_tol > 0.0);
21 int i, j;
22 int n;
23 int not_screened;
24 int real_abcd;
25 int iket;
26
27 // partition workspace
28 double * const INT__h_s_f_s = work + (SIMINT_NSHELL_SIMD * 0);
29 double * const INT__i_s_f_s = work + (SIMINT_NSHELL_SIMD * 210);
30 double * const INT__k_s_f_s = work + (SIMINT_NSHELL_SIMD * 490);
31 double * const INT__l_s_f_s = work + (SIMINT_NSHELL_SIMD * 850);
32 SIMINT_DBLTYPE * const primwork = (SIMINT_DBLTYPE *)(work + SIMINT_NSHELL_SIMD*1300);
33 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_s_s = primwork + 0;
34 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_s_s = primwork + 12;
35 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_s_s = primwork + 45;
36 SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_s_s = primwork + 105;
37 SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_p_s = primwork + 195;
38 SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_s_s = primwork + 285;
39 SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_p_s = primwork + 405;
40 SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_d_s = primwork + 540;
41 SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_s_s = primwork + 720;
42 SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_p_s = primwork + 867;
43 SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_d_s = primwork + 1056;
44 SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_f_s = primwork + 1308;
45 SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_s_s = primwork + 1518;
46 SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_p_s = primwork + 1686;
47 SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_d_s = primwork + 1938;
48 SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_f_s = primwork + 2274;
49 SIMINT_DBLTYPE * const restrict PRIM_INT__k_s_s_s = primwork + 2554;
50 SIMINT_DBLTYPE * const restrict PRIM_INT__k_s_p_s = primwork + 2734;
51 SIMINT_DBLTYPE * const restrict PRIM_INT__k_s_d_s = primwork + 3058;
52 SIMINT_DBLTYPE * const restrict PRIM_INT__k_s_f_s = primwork + 3490;
53 SIMINT_DBLTYPE * const restrict PRIM_INT__l_s_s_s = primwork + 3850;
54 SIMINT_DBLTYPE * const restrict PRIM_INT__l_s_p_s = primwork + 4030;
55 SIMINT_DBLTYPE * const restrict PRIM_INT__l_s_d_s = primwork + 4435;
56 SIMINT_DBLTYPE * const restrict PRIM_INT__l_s_f_s = primwork + 4975;
57 double * const hrrwork = (double *)(primwork + 5425);
58 double * const HRR_INT__h_p_f_s = hrrwork + 0;
59 double * const HRR_INT__h_d_f_s = hrrwork + 630;
60 double * const HRR_INT__i_p_f_s = hrrwork + 1890;
61 double * const HRR_INT__i_d_f_s = hrrwork + 2730;
62 double * const HRR_INT__k_p_f_s = hrrwork + 4410;
63
64
65 // Create constants
66 const SIMINT_DBLTYPE const_1 = SIMINT_DBLSET1(1);
67 const SIMINT_DBLTYPE const_2 = SIMINT_DBLSET1(2);
68 const SIMINT_DBLTYPE const_3 = SIMINT_DBLSET1(3);
69 const SIMINT_DBLTYPE const_4 = SIMINT_DBLSET1(4);
70 const SIMINT_DBLTYPE const_5 = SIMINT_DBLSET1(5);
71 const SIMINT_DBLTYPE const_6 = SIMINT_DBLSET1(6);
72 const SIMINT_DBLTYPE const_7 = SIMINT_DBLSET1(7);
73 const SIMINT_DBLTYPE const_8 = SIMINT_DBLSET1(8);
74 const SIMINT_DBLTYPE one_half = SIMINT_DBLSET1(0.5);
75
76
77 ////////////////////////////////////////
78 // Loop over shells and primitives
79 ////////////////////////////////////////
80
81 real_abcd = 0;
82 istart = 0;
83 for(ab = 0; ab < P.nshell12_clip; ++ab)
84 {
85 const int iend = istart + P.nprim12[ab];
86
87 cd = 0;
88 jstart = 0;
89
90 for(cd = 0; cd < Q.nshell12_clip; cd += SIMINT_NSHELL_SIMD)
91 {
92 const int nshellbatch = ((cd + SIMINT_NSHELL_SIMD) > Q.nshell12_clip) ? Q.nshell12_clip - cd : SIMINT_NSHELL_SIMD;
93 int jend = jstart;
94 for(i = 0; i < nshellbatch; i++)
95 jend += Q.nprim12[cd+i];
96
97 // Clear the beginning of the workspace (where we are accumulating integrals)
98 memset(work, 0, SIMINT_NSHELL_SIMD * 1300 * sizeof(double));
99 abcd = 0;
100
101
102 for(i = istart; i < iend; ++i)
103 {
104 SIMINT_DBLTYPE bra_screen_max; // only used if check_screen
105
106 if(check_screen)
107 {
108 // Skip this whole thing if always insignificant
109 if((P.screen[i] * Q.screen_max) < screen_tol)
110 continue;
111 bra_screen_max = SIMINT_DBLSET1(P.screen[i]);
112 }
113
114 icd = 0;
115 iprimcd = 0;
116 nprim_icd = Q.nprim12[cd];
117 double * restrict PRIM_PTR_INT__h_s_f_s = INT__h_s_f_s + abcd * 210;
118 double * restrict PRIM_PTR_INT__i_s_f_s = INT__i_s_f_s + abcd * 280;
119 double * restrict PRIM_PTR_INT__k_s_f_s = INT__k_s_f_s + abcd * 360;
120 double * restrict PRIM_PTR_INT__l_s_f_s = INT__l_s_f_s + abcd * 450;
121
122
123
124 // Load these one per loop over i
125 const SIMINT_DBLTYPE P_alpha = SIMINT_DBLSET1(P.alpha[i]);
126 const SIMINT_DBLTYPE P_prefac = SIMINT_DBLSET1(P.prefac[i]);
127 const SIMINT_DBLTYPE Pxyz[3] = { SIMINT_DBLSET1(P.x[i]), SIMINT_DBLSET1(P.y[i]), SIMINT_DBLSET1(P.z[i]) };
128
129 const SIMINT_DBLTYPE P_PA[3] = { SIMINT_DBLSET1(P.PA_x[i]), SIMINT_DBLSET1(P.PA_y[i]), SIMINT_DBLSET1(P.PA_z[i]) };
130
131 for(j = jstart; j < jend; j += SIMINT_SIMD_LEN)
132 {
133 // calculate the shell offsets
134 // these are the offset from the shell pointed to by cd
135 // for each element
136 int shelloffsets[SIMINT_SIMD_LEN] = {0};
137 int lastoffset = 0;
138 const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j));
139
140 if((iprimcd + SIMINT_SIMD_LEN) >= nprim_icd)
141 {
142 // Handle if the first element of the vector is a new shell
143 if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
144 {
145 nprim_icd += Q.nprim12[cd + (++icd)];
146 PRIM_PTR_INT__h_s_f_s += 210;
147 PRIM_PTR_INT__i_s_f_s += 280;
148 PRIM_PTR_INT__k_s_f_s += 360;
149 PRIM_PTR_INT__l_s_f_s += 450;
150 }
151 iprimcd++;
152 for(n = 1; n < SIMINT_SIMD_LEN; ++n)
153 {
154 if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
155 {
156 shelloffsets[n] = shelloffsets[n-1] + 1;
157 lastoffset++;
158 nprim_icd += Q.nprim12[cd + (++icd)];
159 }
160 else
161 shelloffsets[n] = shelloffsets[n-1];
162 iprimcd++;
163 }
164 }
165 else
166 iprimcd += SIMINT_SIMD_LEN;
167
168 // Do we have to compute this vector (or has it been screened out)?
169 // (not_screened != 0 means we have to do this vector)
170 if(check_screen)
171 {
172 const double vmax = vector_max(SIMINT_MUL(bra_screen_max, SIMINT_DBLLOAD(Q.screen, j)));
173 if(vmax < screen_tol)
174 {
175 PRIM_PTR_INT__h_s_f_s += lastoffset*210;
176 PRIM_PTR_INT__i_s_f_s += lastoffset*280;
177 PRIM_PTR_INT__k_s_f_s += lastoffset*360;
178 PRIM_PTR_INT__l_s_f_s += lastoffset*450;
179 continue;
180 }
181 }
182
183 const SIMINT_DBLTYPE Q_alpha = SIMINT_DBLLOAD(Q.alpha, j);
184 const SIMINT_DBLTYPE PQalpha_mul = SIMINT_MUL(P_alpha, Q_alpha);
185 const SIMINT_DBLTYPE PQalpha_sum = SIMINT_ADD(P_alpha, Q_alpha);
186 const SIMINT_DBLTYPE one_over_PQalpha_sum = SIMINT_DIV(const_1, PQalpha_sum);
187
188
189 /* construct R2 = (Px - Qx)**2 + (Py - Qy)**2 + (Pz -Qz)**2 */
190 SIMINT_DBLTYPE PQ[3];
191 PQ[0] = SIMINT_SUB(Pxyz[0], SIMINT_DBLLOAD(Q.x, j));
192 PQ[1] = SIMINT_SUB(Pxyz[1], SIMINT_DBLLOAD(Q.y, j));
193 PQ[2] = SIMINT_SUB(Pxyz[2], SIMINT_DBLLOAD(Q.z, j));
194 SIMINT_DBLTYPE R2 = SIMINT_MUL(PQ[0], PQ[0]);
195 R2 = SIMINT_FMADD(PQ[1], PQ[1], R2);
196 R2 = SIMINT_FMADD(PQ[2], PQ[2], R2);
197
198 const SIMINT_DBLTYPE alpha = SIMINT_MUL(PQalpha_mul, one_over_PQalpha_sum); // alpha from MEST
199 const SIMINT_DBLTYPE one_over_p = SIMINT_DIV(const_1, P_alpha);
200 const SIMINT_DBLTYPE one_over_q = SIMINT_DIV(const_1, Q_alpha);
201 const SIMINT_DBLTYPE one_over_2p = SIMINT_MUL(one_half, one_over_p);
202 const SIMINT_DBLTYPE one_over_2q = SIMINT_MUL(one_half, one_over_q);
203 const SIMINT_DBLTYPE one_over_2pq = SIMINT_MUL(one_half, one_over_PQalpha_sum);
204 const SIMINT_DBLTYPE Q_PA[3] = { SIMINT_DBLLOAD(Q.PA_x, j), SIMINT_DBLLOAD(Q.PA_y, j), SIMINT_DBLLOAD(Q.PA_z, j) };
205
206 // NOTE: Minus sign!
207 const SIMINT_DBLTYPE a_over_p = SIMINT_MUL(SIMINT_NEG(alpha), one_over_p);
208 SIMINT_DBLTYPE aop_PQ[3];
209 aop_PQ[0] = SIMINT_MUL(a_over_p, PQ[0]);
210 aop_PQ[1] = SIMINT_MUL(a_over_p, PQ[1]);
211 aop_PQ[2] = SIMINT_MUL(a_over_p, PQ[2]);
212
213 SIMINT_DBLTYPE a_over_q = SIMINT_MUL(alpha, one_over_q);
214 SIMINT_DBLTYPE aoq_PQ[3];
215 aoq_PQ[0] = SIMINT_MUL(a_over_q, PQ[0]);
216 aoq_PQ[1] = SIMINT_MUL(a_over_q, PQ[1]);
217 aoq_PQ[2] = SIMINT_MUL(a_over_q, PQ[2]);
218 // Put a minus sign here so we don't have to in RR routines
219 a_over_q = SIMINT_NEG(a_over_q);
220
221
222 //////////////////////////////////////////////
223 // Fjt function section
224 // Maximum v value: 11
225 //////////////////////////////////////////////
226 // The parameter to the Fjt function
227 const SIMINT_DBLTYPE F_x = SIMINT_MUL(R2, alpha);
228
229
230 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j);
231
232
233 boys_F_split(PRIM_INT__s_s_s_s, F_x, 11);
234 SIMINT_DBLTYPE prefac = SIMINT_SQRT(one_over_PQalpha_sum);
235 prefac = SIMINT_MUL(SIMINT_MUL(P_prefac, Q_prefac), prefac);
236 for(n = 0; n <= 11; n++)
237 PRIM_INT__s_s_s_s[n] = SIMINT_MUL(PRIM_INT__s_s_s_s[n], prefac);
238
239 //////////////////////////////////////////////
240 // Primitive integrals: Vertical recurrance
241 //////////////////////////////////////////////
242
243 const SIMINT_DBLTYPE vrr_const_1_over_2p = one_over_2p;
244 const SIMINT_DBLTYPE vrr_const_2_over_2p = SIMINT_MUL(const_2, one_over_2p);
245 const SIMINT_DBLTYPE vrr_const_3_over_2p = SIMINT_MUL(const_3, one_over_2p);
246 const SIMINT_DBLTYPE vrr_const_4_over_2p = SIMINT_MUL(const_4, one_over_2p);
247 const SIMINT_DBLTYPE vrr_const_5_over_2p = SIMINT_MUL(const_5, one_over_2p);
248 const SIMINT_DBLTYPE vrr_const_6_over_2p = SIMINT_MUL(const_6, one_over_2p);
249 const SIMINT_DBLTYPE vrr_const_7_over_2p = SIMINT_MUL(const_7, one_over_2p);
250 const SIMINT_DBLTYPE vrr_const_1_over_2q = one_over_2q;
251 const SIMINT_DBLTYPE vrr_const_2_over_2q = SIMINT_MUL(const_2, one_over_2q);
252 const SIMINT_DBLTYPE vrr_const_1_over_2pq = one_over_2pq;
253 const SIMINT_DBLTYPE vrr_const_2_over_2pq = SIMINT_MUL(const_2, one_over_2pq);
254 const SIMINT_DBLTYPE vrr_const_3_over_2pq = SIMINT_MUL(const_3, one_over_2pq);
255 const SIMINT_DBLTYPE vrr_const_4_over_2pq = SIMINT_MUL(const_4, one_over_2pq);
256 const SIMINT_DBLTYPE vrr_const_5_over_2pq = SIMINT_MUL(const_5, one_over_2pq);
257 const SIMINT_DBLTYPE vrr_const_6_over_2pq = SIMINT_MUL(const_6, one_over_2pq);
258 const SIMINT_DBLTYPE vrr_const_7_over_2pq = SIMINT_MUL(const_7, one_over_2pq);
259 const SIMINT_DBLTYPE vrr_const_8_over_2pq = SIMINT_MUL(const_8, one_over_2pq);
260
261
262
263 // Forming PRIM_INT__p_s_s_s[11 * 3];
264 for(n = 0; n < 11; ++n) // loop over orders of auxiliary function
265 {
266
267 PRIM_INT__p_s_s_s[n * 3 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_s_s[n * 1 + 0]);
268 PRIM_INT__p_s_s_s[n * 3 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]);
269
270 PRIM_INT__p_s_s_s[n * 3 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_s_s[n * 1 + 0]);
271 PRIM_INT__p_s_s_s[n * 3 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 1]);
272
273 PRIM_INT__p_s_s_s[n * 3 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_s_s[n * 1 + 0]);
274 PRIM_INT__p_s_s_s[n * 3 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 2]);
275
276 }
277
278
279
280 // Forming PRIM_INT__d_s_s_s[10 * 6];
281 for(n = 0; n < 10; ++n) // loop over orders of auxiliary function
282 {
283
284 PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__p_s_s_s[n * 3 + 0]);
285 PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 0]);
286 PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 0]);
287
288 PRIM_INT__d_s_s_s[n * 6 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__p_s_s_s[n * 3 + 0]);
289 PRIM_INT__d_s_s_s[n * 6 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 1]);
290
291 PRIM_INT__d_s_s_s[n * 6 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__p_s_s_s[n * 3 + 0]);
292 PRIM_INT__d_s_s_s[n * 6 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 2]);
293
294 PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_MUL(P_PA[1], PRIM_INT__p_s_s_s[n * 3 + 1]);
295 PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_s[n * 6 + 3]);
296 PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 3]);
297
298 PRIM_INT__d_s_s_s[n * 6 + 4] = SIMINT_MUL(P_PA[2], PRIM_INT__p_s_s_s[n * 3 + 1]);
299 PRIM_INT__d_s_s_s[n * 6 + 4] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_s[n * 6 + 4]);
300
301 PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_MUL(P_PA[2], PRIM_INT__p_s_s_s[n * 3 + 2]);
302 PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__d_s_s_s[n * 6 + 5]);
303 PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 5]);
304
305 }
306
307
308
309 // Forming PRIM_INT__f_s_s_s[9 * 10];
310 for(n = 0; n < 9; ++n) // loop over orders of auxiliary function
311 {
312
313 PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 0]);
314 PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 0]);
315 PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]), PRIM_INT__f_s_s_s[n * 10 + 0]);
316
317 PRIM_INT__f_s_s_s[n * 10 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 0]);
318 PRIM_INT__f_s_s_s[n * 10 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 1]);
319
320 PRIM_INT__f_s_s_s[n * 10 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 0]);
321 PRIM_INT__f_s_s_s[n * 10 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 2]);
322
323 PRIM_INT__f_s_s_s[n * 10 + 3] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 3]);
324 PRIM_INT__f_s_s_s[n * 10 + 3] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 3]);
325
326 PRIM_INT__f_s_s_s[n * 10 + 4] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 1]);
327 PRIM_INT__f_s_s_s[n * 10 + 4] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 1], PRIM_INT__f_s_s_s[n * 10 + 4]);
328
329 PRIM_INT__f_s_s_s[n * 10 + 5] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 5]);
330 PRIM_INT__f_s_s_s[n * 10 + 5] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 5]);
331
332 PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 3]);
333 PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 6]);
334 PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_s_s[n * 3 + 1]), PRIM_INT__f_s_s_s[n * 10 + 6]);
335
336 PRIM_INT__f_s_s_s[n * 10 + 7] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 3]);
337 PRIM_INT__f_s_s_s[n * 10 + 7] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 7]);
338
339 PRIM_INT__f_s_s_s[n * 10 + 8] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 5]);
340 PRIM_INT__f_s_s_s[n * 10 + 8] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 8]);
341
342 PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 5]);
343 PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 9]);
344 PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_s_s[n * 3 + 2]), PRIM_INT__f_s_s_s[n * 10 + 9]);
345
346 }
347
348
349 VRR_I_g_s_s_s(
350 PRIM_INT__g_s_s_s,
351 PRIM_INT__f_s_s_s,
352 PRIM_INT__d_s_s_s,
353 P_PA,
354 a_over_p,
355 aop_PQ,
356 one_over_2p,
357 8);
358
359
360 VRR_I_h_s_s_s(
361 PRIM_INT__h_s_s_s,
362 PRIM_INT__g_s_s_s,
363 PRIM_INT__f_s_s_s,
364 P_PA,
365 a_over_p,
366 aop_PQ,
367 one_over_2p,
368 7);
369
370
371 ostei_general_vrr_K(5, 0, 1, 0, 3,
372 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
373 PRIM_INT__h_s_s_s, NULL, NULL, PRIM_INT__g_s_s_s, NULL, PRIM_INT__h_s_p_s);
374
375
376 VRR_K_g_s_p_s(
377 PRIM_INT__g_s_p_s,
378 PRIM_INT__g_s_s_s,
379 PRIM_INT__f_s_s_s,
380 Q_PA,
381 aoq_PQ,
382 one_over_2pq,
383 3);
384
385
386 ostei_general_vrr_K(5, 0, 2, 0, 2,
387 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
388 PRIM_INT__h_s_p_s, PRIM_INT__h_s_s_s, NULL, PRIM_INT__g_s_p_s, NULL, PRIM_INT__h_s_d_s);
389
390
391 VRR_K_f_s_p_s(
392 PRIM_INT__f_s_p_s,
393 PRIM_INT__f_s_s_s,
394 PRIM_INT__d_s_s_s,
395 Q_PA,
396 aoq_PQ,
397 one_over_2pq,
398 3);
399
400
401 ostei_general_vrr_K(4, 0, 2, 0, 2,
402 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
403 PRIM_INT__g_s_p_s, PRIM_INT__g_s_s_s, NULL, PRIM_INT__f_s_p_s, NULL, PRIM_INT__g_s_d_s);
404
405
406 ostei_general_vrr_K(5, 0, 3, 0, 1,
407 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
408 PRIM_INT__h_s_d_s, PRIM_INT__h_s_p_s, NULL, PRIM_INT__g_s_d_s, NULL, PRIM_INT__h_s_f_s);
409
410
411 ostei_general_vrr1_I(6, 6,
412 one_over_2p, a_over_p, aop_PQ, P_PA,
413 PRIM_INT__h_s_s_s, PRIM_INT__g_s_s_s, PRIM_INT__i_s_s_s);
414
415
416 ostei_general_vrr_K(6, 0, 1, 0, 3,
417 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
418 PRIM_INT__i_s_s_s, NULL, NULL, PRIM_INT__h_s_s_s, NULL, PRIM_INT__i_s_p_s);
419
420
421 ostei_general_vrr_K(6, 0, 2, 0, 2,
422 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
423 PRIM_INT__i_s_p_s, PRIM_INT__i_s_s_s, NULL, PRIM_INT__h_s_p_s, NULL, PRIM_INT__i_s_d_s);
424
425
426 ostei_general_vrr_K(6, 0, 3, 0, 1,
427 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
428 PRIM_INT__i_s_d_s, PRIM_INT__i_s_p_s, NULL, PRIM_INT__h_s_d_s, NULL, PRIM_INT__i_s_f_s);
429
430
431 ostei_general_vrr1_I(7, 5,
432 one_over_2p, a_over_p, aop_PQ, P_PA,
433 PRIM_INT__i_s_s_s, PRIM_INT__h_s_s_s, PRIM_INT__k_s_s_s);
434
435
436 ostei_general_vrr_K(7, 0, 1, 0, 3,
437 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
438 PRIM_INT__k_s_s_s, NULL, NULL, PRIM_INT__i_s_s_s, NULL, PRIM_INT__k_s_p_s);
439
440
441 ostei_general_vrr_K(7, 0, 2, 0, 2,
442 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
443 PRIM_INT__k_s_p_s, PRIM_INT__k_s_s_s, NULL, PRIM_INT__i_s_p_s, NULL, PRIM_INT__k_s_d_s);
444
445
446 ostei_general_vrr_K(7, 0, 3, 0, 1,
447 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
448 PRIM_INT__k_s_d_s, PRIM_INT__k_s_p_s, NULL, PRIM_INT__i_s_d_s, NULL, PRIM_INT__k_s_f_s);
449
450
451 ostei_general_vrr1_I(8, 4,
452 one_over_2p, a_over_p, aop_PQ, P_PA,
453 PRIM_INT__k_s_s_s, PRIM_INT__i_s_s_s, PRIM_INT__l_s_s_s);
454
455
456 ostei_general_vrr_K(8, 0, 1, 0, 3,
457 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
458 PRIM_INT__l_s_s_s, NULL, NULL, PRIM_INT__k_s_s_s, NULL, PRIM_INT__l_s_p_s);
459
460
461 ostei_general_vrr_K(8, 0, 2, 0, 2,
462 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
463 PRIM_INT__l_s_p_s, PRIM_INT__l_s_s_s, NULL, PRIM_INT__k_s_p_s, NULL, PRIM_INT__l_s_d_s);
464
465
466 ostei_general_vrr_K(8, 0, 3, 0, 1,
467 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
468 PRIM_INT__l_s_d_s, PRIM_INT__l_s_p_s, NULL, PRIM_INT__k_s_d_s, NULL, PRIM_INT__l_s_f_s);
469
470
471
472
473 ////////////////////////////////////
474 // Accumulate contracted integrals
475 ////////////////////////////////////
476 if(lastoffset == 0)
477 {
478 contract_all(210, PRIM_INT__h_s_f_s, PRIM_PTR_INT__h_s_f_s);
479 contract_all(280, PRIM_INT__i_s_f_s, PRIM_PTR_INT__i_s_f_s);
480 contract_all(360, PRIM_INT__k_s_f_s, PRIM_PTR_INT__k_s_f_s);
481 contract_all(450, PRIM_INT__l_s_f_s, PRIM_PTR_INT__l_s_f_s);
482 }
483 else
484 {
485 contract(210, shelloffsets, PRIM_INT__h_s_f_s, PRIM_PTR_INT__h_s_f_s);
486 contract(280, shelloffsets, PRIM_INT__i_s_f_s, PRIM_PTR_INT__i_s_f_s);
487 contract(360, shelloffsets, PRIM_INT__k_s_f_s, PRIM_PTR_INT__k_s_f_s);
488 contract(450, shelloffsets, PRIM_INT__l_s_f_s, PRIM_PTR_INT__l_s_f_s);
489 PRIM_PTR_INT__h_s_f_s += lastoffset*210;
490 PRIM_PTR_INT__i_s_f_s += lastoffset*280;
491 PRIM_PTR_INT__k_s_f_s += lastoffset*360;
492 PRIM_PTR_INT__l_s_f_s += lastoffset*450;
493 }
494
495 } // close loop over j
496 } // close loop over i
497
498 //Advance to the next batch
499 jstart = SIMINT_SIMD_ROUND(jend);
500
501 //////////////////////////////////////////////
502 // Contracted integrals: Horizontal recurrance
503 //////////////////////////////////////////////
504
505
506 const double hAB[3] = { P.AB_x[ab], P.AB_y[ab], P.AB_z[ab] };
507
508
509 for(abcd = 0; abcd < nshellbatch; ++abcd, ++real_abcd)
510 {
511
512 // set up HRR pointers
513 double const * restrict HRR_INT__h_s_f_s = INT__h_s_f_s + abcd * 210;
514 double const * restrict HRR_INT__i_s_f_s = INT__i_s_f_s + abcd * 280;
515 double const * restrict HRR_INT__k_s_f_s = INT__k_s_f_s + abcd * 360;
516 double const * restrict HRR_INT__l_s_f_s = INT__l_s_f_s + abcd * 450;
517 double * restrict HRR_INT__h_f_f_s = INT__h_f_f_s + real_abcd * 2100;
518
519 // form INT__h_p_f_s
520 ostei_general_hrr_J(5, 1, 3, 0, hAB, HRR_INT__i_s_f_s, HRR_INT__h_s_f_s, HRR_INT__h_p_f_s);
521
522 // form INT__i_p_f_s
523 ostei_general_hrr_J(6, 1, 3, 0, hAB, HRR_INT__k_s_f_s, HRR_INT__i_s_f_s, HRR_INT__i_p_f_s);
524
525 // form INT__k_p_f_s
526 ostei_general_hrr_J(7, 1, 3, 0, hAB, HRR_INT__l_s_f_s, HRR_INT__k_s_f_s, HRR_INT__k_p_f_s);
527
528 // form INT__h_d_f_s
529 ostei_general_hrr_J(5, 2, 3, 0, hAB, HRR_INT__i_p_f_s, HRR_INT__h_p_f_s, HRR_INT__h_d_f_s);
530
531 // form INT__i_d_f_s
532 ostei_general_hrr_J(6, 2, 3, 0, hAB, HRR_INT__k_p_f_s, HRR_INT__i_p_f_s, HRR_INT__i_d_f_s);
533
534 // form INT__h_f_f_s
535 ostei_general_hrr_J(5, 3, 3, 0, hAB, HRR_INT__i_d_f_s, HRR_INT__h_d_f_s, HRR_INT__h_f_f_s);
536
537
538 } // close HRR loop
539
540
541 } // close loop cdbatch
542
543 istart = iend;
544 } // close loop over ab
545
546 return P.nshell12_clip * Q.nshell12_clip;
547 }
548
ostei_f_h_f_s(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__f_h_f_s)549 int ostei_f_h_f_s(struct simint_multi_shellpair const P,
550 struct simint_multi_shellpair const Q,
551 double screen_tol,
552 double * const restrict work,
553 double * const restrict INT__f_h_f_s)
554 {
555 double P_AB[3*P.nshell12];
556 struct simint_multi_shellpair P_tmp = P;
557 P_tmp.PA_x = P.PB_x; P_tmp.PA_y = P.PB_y; P_tmp.PA_z = P.PB_z;
558 P_tmp.PB_x = P.PA_x; P_tmp.PB_y = P.PA_y; P_tmp.PB_z = P.PA_z;
559 P_tmp.AB_x = P_AB;
560 P_tmp.AB_y = P_AB + P.nshell12;
561 P_tmp.AB_z = P_AB + 2*P.nshell12;
562
563 for(int i = 0; i < P.nshell12; i++)
564 {
565 P_tmp.AB_x[i] = -P.AB_x[i];
566 P_tmp.AB_y[i] = -P.AB_y[i];
567 P_tmp.AB_z[i] = -P.AB_z[i];
568 }
569
570 int ret = ostei_h_f_f_s(P_tmp, Q, screen_tol, work, INT__f_h_f_s);
571 double buffer[2100] SIMINT_ALIGN_ARRAY_DBL;
572
573 for(int q = 0; q < ret; q++)
574 {
575 int idx = 0;
576 for(int a = 0; a < 10; ++a)
577 for(int b = 0; b < 21; ++b)
578 for(int c = 0; c < 10; ++c)
579 for(int d = 0; d < 1; ++d)
580 buffer[idx++] = INT__f_h_f_s[q*2100+b*100+a*10+c*1+d];
581
582 memcpy(INT__f_h_f_s+q*2100, buffer, 2100*sizeof(double));
583 }
584
585 return ret;
586 }
587
ostei_h_f_s_f(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__h_f_s_f)588 int ostei_h_f_s_f(struct simint_multi_shellpair const P,
589 struct simint_multi_shellpair const Q,
590 double screen_tol,
591 double * const restrict work,
592 double * const restrict INT__h_f_s_f)
593 {
594 double Q_AB[3*Q.nshell12];
595 struct simint_multi_shellpair Q_tmp = Q;
596 Q_tmp.PA_x = Q.PB_x; Q_tmp.PA_y = Q.PB_y; Q_tmp.PA_z = Q.PB_z;
597 Q_tmp.PB_x = Q.PA_x; Q_tmp.PB_y = Q.PA_y; Q_tmp.PB_z = Q.PA_z;
598 Q_tmp.AB_x = Q_AB;
599 Q_tmp.AB_y = Q_AB + Q.nshell12;
600 Q_tmp.AB_z = Q_AB + 2*Q.nshell12;
601
602 for(int i = 0; i < Q.nshell12; i++)
603 {
604 Q_tmp.AB_x[i] = -Q.AB_x[i];
605 Q_tmp.AB_y[i] = -Q.AB_y[i];
606 Q_tmp.AB_z[i] = -Q.AB_z[i];
607 }
608
609 int ret = ostei_h_f_f_s(P, Q_tmp, screen_tol, work, INT__h_f_s_f);
610 double buffer[2100] SIMINT_ALIGN_ARRAY_DBL;
611
612 for(int q = 0; q < ret; q++)
613 {
614 int idx = 0;
615 for(int a = 0; a < 21; ++a)
616 for(int b = 0; b < 10; ++b)
617 for(int c = 0; c < 1; ++c)
618 for(int d = 0; d < 10; ++d)
619 buffer[idx++] = INT__h_f_s_f[q*2100+a*100+b*10+d*1+c];
620
621 memcpy(INT__h_f_s_f+q*2100, buffer, 2100*sizeof(double));
622 }
623
624 return ret;
625 }
626
ostei_f_h_s_f(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__f_h_s_f)627 int ostei_f_h_s_f(struct simint_multi_shellpair const P,
628 struct simint_multi_shellpair const Q,
629 double screen_tol,
630 double * const restrict work,
631 double * const restrict INT__f_h_s_f)
632 {
633 double P_AB[3*P.nshell12];
634 struct simint_multi_shellpair P_tmp = P;
635 P_tmp.PA_x = P.PB_x; P_tmp.PA_y = P.PB_y; P_tmp.PA_z = P.PB_z;
636 P_tmp.PB_x = P.PA_x; P_tmp.PB_y = P.PA_y; P_tmp.PB_z = P.PA_z;
637 P_tmp.AB_x = P_AB;
638 P_tmp.AB_y = P_AB + P.nshell12;
639 P_tmp.AB_z = P_AB + 2*P.nshell12;
640
641 for(int i = 0; i < P.nshell12; i++)
642 {
643 P_tmp.AB_x[i] = -P.AB_x[i];
644 P_tmp.AB_y[i] = -P.AB_y[i];
645 P_tmp.AB_z[i] = -P.AB_z[i];
646 }
647
648 double Q_AB[3*Q.nshell12];
649 struct simint_multi_shellpair Q_tmp = Q;
650 Q_tmp.PA_x = Q.PB_x; Q_tmp.PA_y = Q.PB_y; Q_tmp.PA_z = Q.PB_z;
651 Q_tmp.PB_x = Q.PA_x; Q_tmp.PB_y = Q.PA_y; Q_tmp.PB_z = Q.PA_z;
652 Q_tmp.AB_x = Q_AB;
653 Q_tmp.AB_y = Q_AB + Q.nshell12;
654 Q_tmp.AB_z = Q_AB + 2*Q.nshell12;
655
656 for(int i = 0; i < Q.nshell12; i++)
657 {
658 Q_tmp.AB_x[i] = -Q.AB_x[i];
659 Q_tmp.AB_y[i] = -Q.AB_y[i];
660 Q_tmp.AB_z[i] = -Q.AB_z[i];
661 }
662
663 int ret = ostei_h_f_f_s(P_tmp, Q_tmp, screen_tol, work, INT__f_h_s_f);
664 double buffer[2100] SIMINT_ALIGN_ARRAY_DBL;
665
666 for(int q = 0; q < ret; q++)
667 {
668 int idx = 0;
669 for(int a = 0; a < 10; ++a)
670 for(int b = 0; b < 21; ++b)
671 for(int c = 0; c < 1; ++c)
672 for(int d = 0; d < 10; ++d)
673 buffer[idx++] = INT__f_h_s_f[q*2100+b*100+a*10+d*1+c];
674
675 memcpy(INT__f_h_s_f+q*2100, buffer, 2100*sizeof(double));
676 }
677
678 return ret;
679 }
680
681