1 #include "simint/boys/boys.h"
2 #include "simint/ostei/gen/ostei_generated.h"
3 #include "simint/vectorization/vectorization.h"
4 #include <math.h>
5 #include <string.h>
6
7
ostei_k_p_d_d(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__k_p_d_d)8 int ostei_k_p_d_d(struct simint_multi_shellpair const P,
9 struct simint_multi_shellpair const Q,
10 double screen_tol,
11 double * const restrict work,
12 double * const restrict INT__k_p_d_d)
13 {
14
15 SIMINT_ASSUME_ALIGN_DBL(work);
16 SIMINT_ASSUME_ALIGN_DBL(INT__k_p_d_d);
17 int ab, cd, abcd;
18 int istart, jstart;
19 int iprimcd, nprim_icd, icd;
20 const int check_screen = (screen_tol > 0.0);
21 int i, j;
22 int n;
23 int not_screened;
24 int real_abcd;
25 int iket;
26 int ibra;
27
28 // partition workspace
29 double * const INT__k_s_d_s = work + (SIMINT_NSHELL_SIMD * 0);
30 double * const INT__k_s_f_s = work + (SIMINT_NSHELL_SIMD * 216);
31 double * const INT__k_s_g_s = work + (SIMINT_NSHELL_SIMD * 576);
32 double * const INT__l_s_d_s = work + (SIMINT_NSHELL_SIMD * 1116);
33 double * const INT__l_s_f_s = work + (SIMINT_NSHELL_SIMD * 1386);
34 double * const INT__l_s_g_s = work + (SIMINT_NSHELL_SIMD * 1836);
35 SIMINT_DBLTYPE * const primwork = (SIMINT_DBLTYPE *)(work + SIMINT_NSHELL_SIMD*2511);
36 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_s_s = primwork + 0;
37 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_s_s = primwork + 13;
38 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_s_s = primwork + 49;
39 SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_s_s = primwork + 115;
40 SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_s_s = primwork + 215;
41 SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_p_s = primwork + 350;
42 SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_s_s = primwork + 530;
43 SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_p_s = primwork + 698;
44 SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_d_s = primwork + 950;
45 SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_s_s = primwork + 1328;
46 SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_p_s = primwork + 1524;
47 SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_d_s = primwork + 1860;
48 SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_f_s = primwork + 2364;
49 SIMINT_DBLTYPE * const restrict PRIM_INT__k_s_s_s = primwork + 2924;
50 SIMINT_DBLTYPE * const restrict PRIM_INT__k_s_p_s = primwork + 3140;
51 SIMINT_DBLTYPE * const restrict PRIM_INT__k_s_d_s = primwork + 3572;
52 SIMINT_DBLTYPE * const restrict PRIM_INT__k_s_f_s = primwork + 4220;
53 SIMINT_DBLTYPE * const restrict PRIM_INT__k_s_g_s = primwork + 4940;
54 SIMINT_DBLTYPE * const restrict PRIM_INT__l_s_s_s = primwork + 5480;
55 SIMINT_DBLTYPE * const restrict PRIM_INT__l_s_p_s = primwork + 5705;
56 SIMINT_DBLTYPE * const restrict PRIM_INT__l_s_d_s = primwork + 6245;
57 SIMINT_DBLTYPE * const restrict PRIM_INT__l_s_f_s = primwork + 7055;
58 SIMINT_DBLTYPE * const restrict PRIM_INT__l_s_g_s = primwork + 7955;
59 double * const hrrwork = (double *)(primwork + 8630);
60 double * const HRR_INT__k_p_d_s = hrrwork + 0;
61 double * const HRR_INT__k_p_d_p = hrrwork + 648;
62 double * const HRR_INT__k_p_f_s = hrrwork + 2592;
63 double * const HRR_INT__k_p_f_p = hrrwork + 3672;
64 double * const HRR_INT__k_p_g_s = hrrwork + 6912;
65
66
67 // Create constants
68 const SIMINT_DBLTYPE const_1 = SIMINT_DBLSET1(1);
69 const SIMINT_DBLTYPE const_2 = SIMINT_DBLSET1(2);
70 const SIMINT_DBLTYPE const_3 = SIMINT_DBLSET1(3);
71 const SIMINT_DBLTYPE const_4 = SIMINT_DBLSET1(4);
72 const SIMINT_DBLTYPE const_5 = SIMINT_DBLSET1(5);
73 const SIMINT_DBLTYPE const_6 = SIMINT_DBLSET1(6);
74 const SIMINT_DBLTYPE const_7 = SIMINT_DBLSET1(7);
75 const SIMINT_DBLTYPE const_8 = SIMINT_DBLSET1(8);
76 const SIMINT_DBLTYPE one_half = SIMINT_DBLSET1(0.5);
77
78
79 ////////////////////////////////////////
80 // Loop over shells and primitives
81 ////////////////////////////////////////
82
83 real_abcd = 0;
84 istart = 0;
85 for(ab = 0; ab < P.nshell12_clip; ++ab)
86 {
87 const int iend = istart + P.nprim12[ab];
88
89 cd = 0;
90 jstart = 0;
91
92 for(cd = 0; cd < Q.nshell12_clip; cd += SIMINT_NSHELL_SIMD)
93 {
94 const int nshellbatch = ((cd + SIMINT_NSHELL_SIMD) > Q.nshell12_clip) ? Q.nshell12_clip - cd : SIMINT_NSHELL_SIMD;
95 int jend = jstart;
96 for(i = 0; i < nshellbatch; i++)
97 jend += Q.nprim12[cd+i];
98
99 // Clear the beginning of the workspace (where we are accumulating integrals)
100 memset(work, 0, SIMINT_NSHELL_SIMD * 2511 * sizeof(double));
101 abcd = 0;
102
103
104 for(i = istart; i < iend; ++i)
105 {
106 SIMINT_DBLTYPE bra_screen_max; // only used if check_screen
107
108 if(check_screen)
109 {
110 // Skip this whole thing if always insignificant
111 if((P.screen[i] * Q.screen_max) < screen_tol)
112 continue;
113 bra_screen_max = SIMINT_DBLSET1(P.screen[i]);
114 }
115
116 icd = 0;
117 iprimcd = 0;
118 nprim_icd = Q.nprim12[cd];
119 double * restrict PRIM_PTR_INT__k_s_d_s = INT__k_s_d_s + abcd * 216;
120 double * restrict PRIM_PTR_INT__k_s_f_s = INT__k_s_f_s + abcd * 360;
121 double * restrict PRIM_PTR_INT__k_s_g_s = INT__k_s_g_s + abcd * 540;
122 double * restrict PRIM_PTR_INT__l_s_d_s = INT__l_s_d_s + abcd * 270;
123 double * restrict PRIM_PTR_INT__l_s_f_s = INT__l_s_f_s + abcd * 450;
124 double * restrict PRIM_PTR_INT__l_s_g_s = INT__l_s_g_s + abcd * 675;
125
126
127
128 // Load these one per loop over i
129 const SIMINT_DBLTYPE P_alpha = SIMINT_DBLSET1(P.alpha[i]);
130 const SIMINT_DBLTYPE P_prefac = SIMINT_DBLSET1(P.prefac[i]);
131 const SIMINT_DBLTYPE Pxyz[3] = { SIMINT_DBLSET1(P.x[i]), SIMINT_DBLSET1(P.y[i]), SIMINT_DBLSET1(P.z[i]) };
132
133 const SIMINT_DBLTYPE P_PA[3] = { SIMINT_DBLSET1(P.PA_x[i]), SIMINT_DBLSET1(P.PA_y[i]), SIMINT_DBLSET1(P.PA_z[i]) };
134
135 for(j = jstart; j < jend; j += SIMINT_SIMD_LEN)
136 {
137 // calculate the shell offsets
138 // these are the offset from the shell pointed to by cd
139 // for each element
140 int shelloffsets[SIMINT_SIMD_LEN] = {0};
141 int lastoffset = 0;
142 const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j));
143
144 if((iprimcd + SIMINT_SIMD_LEN) >= nprim_icd)
145 {
146 // Handle if the first element of the vector is a new shell
147 if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
148 {
149 nprim_icd += Q.nprim12[cd + (++icd)];
150 PRIM_PTR_INT__k_s_d_s += 216;
151 PRIM_PTR_INT__k_s_f_s += 360;
152 PRIM_PTR_INT__k_s_g_s += 540;
153 PRIM_PTR_INT__l_s_d_s += 270;
154 PRIM_PTR_INT__l_s_f_s += 450;
155 PRIM_PTR_INT__l_s_g_s += 675;
156 }
157 iprimcd++;
158 for(n = 1; n < SIMINT_SIMD_LEN; ++n)
159 {
160 if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
161 {
162 shelloffsets[n] = shelloffsets[n-1] + 1;
163 lastoffset++;
164 nprim_icd += Q.nprim12[cd + (++icd)];
165 }
166 else
167 shelloffsets[n] = shelloffsets[n-1];
168 iprimcd++;
169 }
170 }
171 else
172 iprimcd += SIMINT_SIMD_LEN;
173
174 // Do we have to compute this vector (or has it been screened out)?
175 // (not_screened != 0 means we have to do this vector)
176 if(check_screen)
177 {
178 const double vmax = vector_max(SIMINT_MUL(bra_screen_max, SIMINT_DBLLOAD(Q.screen, j)));
179 if(vmax < screen_tol)
180 {
181 PRIM_PTR_INT__k_s_d_s += lastoffset*216;
182 PRIM_PTR_INT__k_s_f_s += lastoffset*360;
183 PRIM_PTR_INT__k_s_g_s += lastoffset*540;
184 PRIM_PTR_INT__l_s_d_s += lastoffset*270;
185 PRIM_PTR_INT__l_s_f_s += lastoffset*450;
186 PRIM_PTR_INT__l_s_g_s += lastoffset*675;
187 continue;
188 }
189 }
190
191 const SIMINT_DBLTYPE Q_alpha = SIMINT_DBLLOAD(Q.alpha, j);
192 const SIMINT_DBLTYPE PQalpha_mul = SIMINT_MUL(P_alpha, Q_alpha);
193 const SIMINT_DBLTYPE PQalpha_sum = SIMINT_ADD(P_alpha, Q_alpha);
194 const SIMINT_DBLTYPE one_over_PQalpha_sum = SIMINT_DIV(const_1, PQalpha_sum);
195
196
197 /* construct R2 = (Px - Qx)**2 + (Py - Qy)**2 + (Pz -Qz)**2 */
198 SIMINT_DBLTYPE PQ[3];
199 PQ[0] = SIMINT_SUB(Pxyz[0], SIMINT_DBLLOAD(Q.x, j));
200 PQ[1] = SIMINT_SUB(Pxyz[1], SIMINT_DBLLOAD(Q.y, j));
201 PQ[2] = SIMINT_SUB(Pxyz[2], SIMINT_DBLLOAD(Q.z, j));
202 SIMINT_DBLTYPE R2 = SIMINT_MUL(PQ[0], PQ[0]);
203 R2 = SIMINT_FMADD(PQ[1], PQ[1], R2);
204 R2 = SIMINT_FMADD(PQ[2], PQ[2], R2);
205
206 const SIMINT_DBLTYPE alpha = SIMINT_MUL(PQalpha_mul, one_over_PQalpha_sum); // alpha from MEST
207 const SIMINT_DBLTYPE one_over_p = SIMINT_DIV(const_1, P_alpha);
208 const SIMINT_DBLTYPE one_over_q = SIMINT_DIV(const_1, Q_alpha);
209 const SIMINT_DBLTYPE one_over_2p = SIMINT_MUL(one_half, one_over_p);
210 const SIMINT_DBLTYPE one_over_2q = SIMINT_MUL(one_half, one_over_q);
211 const SIMINT_DBLTYPE one_over_2pq = SIMINT_MUL(one_half, one_over_PQalpha_sum);
212 const SIMINT_DBLTYPE Q_PA[3] = { SIMINT_DBLLOAD(Q.PA_x, j), SIMINT_DBLLOAD(Q.PA_y, j), SIMINT_DBLLOAD(Q.PA_z, j) };
213
214 // NOTE: Minus sign!
215 const SIMINT_DBLTYPE a_over_p = SIMINT_MUL(SIMINT_NEG(alpha), one_over_p);
216 SIMINT_DBLTYPE aop_PQ[3];
217 aop_PQ[0] = SIMINT_MUL(a_over_p, PQ[0]);
218 aop_PQ[1] = SIMINT_MUL(a_over_p, PQ[1]);
219 aop_PQ[2] = SIMINT_MUL(a_over_p, PQ[2]);
220
221 SIMINT_DBLTYPE a_over_q = SIMINT_MUL(alpha, one_over_q);
222 SIMINT_DBLTYPE aoq_PQ[3];
223 aoq_PQ[0] = SIMINT_MUL(a_over_q, PQ[0]);
224 aoq_PQ[1] = SIMINT_MUL(a_over_q, PQ[1]);
225 aoq_PQ[2] = SIMINT_MUL(a_over_q, PQ[2]);
226 // Put a minus sign here so we don't have to in RR routines
227 a_over_q = SIMINT_NEG(a_over_q);
228
229
230 //////////////////////////////////////////////
231 // Fjt function section
232 // Maximum v value: 12
233 //////////////////////////////////////////////
234 // The parameter to the Fjt function
235 const SIMINT_DBLTYPE F_x = SIMINT_MUL(R2, alpha);
236
237
238 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j);
239
240
241 boys_F_split(PRIM_INT__s_s_s_s, F_x, 12);
242 SIMINT_DBLTYPE prefac = SIMINT_SQRT(one_over_PQalpha_sum);
243 prefac = SIMINT_MUL(SIMINT_MUL(P_prefac, Q_prefac), prefac);
244 for(n = 0; n <= 12; n++)
245 PRIM_INT__s_s_s_s[n] = SIMINT_MUL(PRIM_INT__s_s_s_s[n], prefac);
246
247 //////////////////////////////////////////////
248 // Primitive integrals: Vertical recurrance
249 //////////////////////////////////////////////
250
251 const SIMINT_DBLTYPE vrr_const_1_over_2p = one_over_2p;
252 const SIMINT_DBLTYPE vrr_const_2_over_2p = SIMINT_MUL(const_2, one_over_2p);
253 const SIMINT_DBLTYPE vrr_const_3_over_2p = SIMINT_MUL(const_3, one_over_2p);
254 const SIMINT_DBLTYPE vrr_const_4_over_2p = SIMINT_MUL(const_4, one_over_2p);
255 const SIMINT_DBLTYPE vrr_const_5_over_2p = SIMINT_MUL(const_5, one_over_2p);
256 const SIMINT_DBLTYPE vrr_const_6_over_2p = SIMINT_MUL(const_6, one_over_2p);
257 const SIMINT_DBLTYPE vrr_const_7_over_2p = SIMINT_MUL(const_7, one_over_2p);
258 const SIMINT_DBLTYPE vrr_const_1_over_2q = one_over_2q;
259 const SIMINT_DBLTYPE vrr_const_2_over_2q = SIMINT_MUL(const_2, one_over_2q);
260 const SIMINT_DBLTYPE vrr_const_3_over_2q = SIMINT_MUL(const_3, one_over_2q);
261 const SIMINT_DBLTYPE vrr_const_1_over_2pq = one_over_2pq;
262 const SIMINT_DBLTYPE vrr_const_2_over_2pq = SIMINT_MUL(const_2, one_over_2pq);
263 const SIMINT_DBLTYPE vrr_const_3_over_2pq = SIMINT_MUL(const_3, one_over_2pq);
264 const SIMINT_DBLTYPE vrr_const_4_over_2pq = SIMINT_MUL(const_4, one_over_2pq);
265 const SIMINT_DBLTYPE vrr_const_5_over_2pq = SIMINT_MUL(const_5, one_over_2pq);
266 const SIMINT_DBLTYPE vrr_const_6_over_2pq = SIMINT_MUL(const_6, one_over_2pq);
267 const SIMINT_DBLTYPE vrr_const_7_over_2pq = SIMINT_MUL(const_7, one_over_2pq);
268 const SIMINT_DBLTYPE vrr_const_8_over_2pq = SIMINT_MUL(const_8, one_over_2pq);
269
270
271
272 // Forming PRIM_INT__p_s_s_s[12 * 3];
273 for(n = 0; n < 12; ++n) // loop over orders of auxiliary function
274 {
275
276 PRIM_INT__p_s_s_s[n * 3 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_s_s[n * 1 + 0]);
277 PRIM_INT__p_s_s_s[n * 3 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]);
278
279 PRIM_INT__p_s_s_s[n * 3 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_s_s[n * 1 + 0]);
280 PRIM_INT__p_s_s_s[n * 3 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 1]);
281
282 PRIM_INT__p_s_s_s[n * 3 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_s_s[n * 1 + 0]);
283 PRIM_INT__p_s_s_s[n * 3 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 2]);
284
285 }
286
287
288
289 // Forming PRIM_INT__d_s_s_s[11 * 6];
290 for(n = 0; n < 11; ++n) // loop over orders of auxiliary function
291 {
292
293 PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__p_s_s_s[n * 3 + 0]);
294 PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 0]);
295 PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 0]);
296
297 PRIM_INT__d_s_s_s[n * 6 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__p_s_s_s[n * 3 + 0]);
298 PRIM_INT__d_s_s_s[n * 6 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 1]);
299
300 PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_MUL(P_PA[1], PRIM_INT__p_s_s_s[n * 3 + 1]);
301 PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_s[n * 6 + 3]);
302 PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 3]);
303
304 PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_MUL(P_PA[2], PRIM_INT__p_s_s_s[n * 3 + 2]);
305 PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__d_s_s_s[n * 6 + 5]);
306 PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 5]);
307
308 }
309
310
311
312 // Forming PRIM_INT__f_s_s_s[10 * 10];
313 for(n = 0; n < 10; ++n) // loop over orders of auxiliary function
314 {
315
316 PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 0]);
317 PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 0]);
318 PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]), PRIM_INT__f_s_s_s[n * 10 + 0]);
319
320 PRIM_INT__f_s_s_s[n * 10 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 0]);
321 PRIM_INT__f_s_s_s[n * 10 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 1]);
322
323 PRIM_INT__f_s_s_s[n * 10 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 0]);
324 PRIM_INT__f_s_s_s[n * 10 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 2]);
325
326 PRIM_INT__f_s_s_s[n * 10 + 3] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 3]);
327 PRIM_INT__f_s_s_s[n * 10 + 3] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 3]);
328
329 PRIM_INT__f_s_s_s[n * 10 + 4] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 1]);
330 PRIM_INT__f_s_s_s[n * 10 + 4] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 1], PRIM_INT__f_s_s_s[n * 10 + 4]);
331
332 PRIM_INT__f_s_s_s[n * 10 + 5] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 5]);
333 PRIM_INT__f_s_s_s[n * 10 + 5] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 5]);
334
335 PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 3]);
336 PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 6]);
337 PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_s_s[n * 3 + 1]), PRIM_INT__f_s_s_s[n * 10 + 6]);
338
339 PRIM_INT__f_s_s_s[n * 10 + 7] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 3]);
340 PRIM_INT__f_s_s_s[n * 10 + 7] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 7]);
341
342 PRIM_INT__f_s_s_s[n * 10 + 8] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 5]);
343 PRIM_INT__f_s_s_s[n * 10 + 8] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 8]);
344
345 PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 5]);
346 PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 9]);
347 PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_s_s[n * 3 + 2]), PRIM_INT__f_s_s_s[n * 10 + 9]);
348
349 }
350
351
352 VRR_I_g_s_s_s(
353 PRIM_INT__g_s_s_s,
354 PRIM_INT__f_s_s_s,
355 PRIM_INT__d_s_s_s,
356 P_PA,
357 a_over_p,
358 aop_PQ,
359 one_over_2p,
360 9);
361
362
363 VRR_I_h_s_s_s(
364 PRIM_INT__h_s_s_s,
365 PRIM_INT__g_s_s_s,
366 PRIM_INT__f_s_s_s,
367 P_PA,
368 a_over_p,
369 aop_PQ,
370 one_over_2p,
371 8);
372
373
374 ostei_general_vrr1_I(6, 7,
375 one_over_2p, a_over_p, aop_PQ, P_PA,
376 PRIM_INT__h_s_s_s, PRIM_INT__g_s_s_s, PRIM_INT__i_s_s_s);
377
378
379 ostei_general_vrr1_I(7, 6,
380 one_over_2p, a_over_p, aop_PQ, P_PA,
381 PRIM_INT__i_s_s_s, PRIM_INT__h_s_s_s, PRIM_INT__k_s_s_s);
382
383
384 ostei_general_vrr_K(7, 0, 1, 0, 4,
385 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
386 PRIM_INT__k_s_s_s, NULL, NULL, PRIM_INT__i_s_s_s, NULL, PRIM_INT__k_s_p_s);
387
388
389 ostei_general_vrr_K(6, 0, 1, 0, 4,
390 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
391 PRIM_INT__i_s_s_s, NULL, NULL, PRIM_INT__h_s_s_s, NULL, PRIM_INT__i_s_p_s);
392
393
394 ostei_general_vrr_K(7, 0, 2, 0, 3,
395 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
396 PRIM_INT__k_s_p_s, PRIM_INT__k_s_s_s, NULL, PRIM_INT__i_s_p_s, NULL, PRIM_INT__k_s_d_s);
397
398
399 ostei_general_vrr_K(5, 0, 1, 0, 4,
400 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
401 PRIM_INT__h_s_s_s, NULL, NULL, PRIM_INT__g_s_s_s, NULL, PRIM_INT__h_s_p_s);
402
403
404 ostei_general_vrr_K(6, 0, 2, 0, 3,
405 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
406 PRIM_INT__i_s_p_s, PRIM_INT__i_s_s_s, NULL, PRIM_INT__h_s_p_s, NULL, PRIM_INT__i_s_d_s);
407
408
409 ostei_general_vrr_K(7, 0, 3, 0, 2,
410 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
411 PRIM_INT__k_s_d_s, PRIM_INT__k_s_p_s, NULL, PRIM_INT__i_s_d_s, NULL, PRIM_INT__k_s_f_s);
412
413
414 ostei_general_vrr1_I(8, 5,
415 one_over_2p, a_over_p, aop_PQ, P_PA,
416 PRIM_INT__k_s_s_s, PRIM_INT__i_s_s_s, PRIM_INT__l_s_s_s);
417
418
419 ostei_general_vrr_K(8, 0, 1, 0, 4,
420 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
421 PRIM_INT__l_s_s_s, NULL, NULL, PRIM_INT__k_s_s_s, NULL, PRIM_INT__l_s_p_s);
422
423
424 ostei_general_vrr_K(8, 0, 2, 0, 3,
425 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
426 PRIM_INT__l_s_p_s, PRIM_INT__l_s_s_s, NULL, PRIM_INT__k_s_p_s, NULL, PRIM_INT__l_s_d_s);
427
428
429 VRR_K_g_s_p_s(
430 PRIM_INT__g_s_p_s,
431 PRIM_INT__g_s_s_s,
432 PRIM_INT__f_s_s_s,
433 Q_PA,
434 aoq_PQ,
435 one_over_2pq,
436 4);
437
438
439 ostei_general_vrr_K(5, 0, 2, 0, 3,
440 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
441 PRIM_INT__h_s_p_s, PRIM_INT__h_s_s_s, NULL, PRIM_INT__g_s_p_s, NULL, PRIM_INT__h_s_d_s);
442
443
444 ostei_general_vrr_K(6, 0, 3, 0, 2,
445 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
446 PRIM_INT__i_s_d_s, PRIM_INT__i_s_p_s, NULL, PRIM_INT__h_s_d_s, NULL, PRIM_INT__i_s_f_s);
447
448
449 ostei_general_vrr_K(7, 0, 4, 0, 1,
450 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
451 PRIM_INT__k_s_f_s, PRIM_INT__k_s_d_s, NULL, PRIM_INT__i_s_f_s, NULL, PRIM_INT__k_s_g_s);
452
453
454 ostei_general_vrr_K(8, 0, 3, 0, 2,
455 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
456 PRIM_INT__l_s_d_s, PRIM_INT__l_s_p_s, NULL, PRIM_INT__k_s_d_s, NULL, PRIM_INT__l_s_f_s);
457
458
459 ostei_general_vrr_K(8, 0, 4, 0, 1,
460 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
461 PRIM_INT__l_s_f_s, PRIM_INT__l_s_d_s, NULL, PRIM_INT__k_s_f_s, NULL, PRIM_INT__l_s_g_s);
462
463
464
465
466 ////////////////////////////////////
467 // Accumulate contracted integrals
468 ////////////////////////////////////
469 if(lastoffset == 0)
470 {
471 contract_all(216, PRIM_INT__k_s_d_s, PRIM_PTR_INT__k_s_d_s);
472 contract_all(360, PRIM_INT__k_s_f_s, PRIM_PTR_INT__k_s_f_s);
473 contract_all(540, PRIM_INT__k_s_g_s, PRIM_PTR_INT__k_s_g_s);
474 contract_all(270, PRIM_INT__l_s_d_s, PRIM_PTR_INT__l_s_d_s);
475 contract_all(450, PRIM_INT__l_s_f_s, PRIM_PTR_INT__l_s_f_s);
476 contract_all(675, PRIM_INT__l_s_g_s, PRIM_PTR_INT__l_s_g_s);
477 }
478 else
479 {
480 contract(216, shelloffsets, PRIM_INT__k_s_d_s, PRIM_PTR_INT__k_s_d_s);
481 contract(360, shelloffsets, PRIM_INT__k_s_f_s, PRIM_PTR_INT__k_s_f_s);
482 contract(540, shelloffsets, PRIM_INT__k_s_g_s, PRIM_PTR_INT__k_s_g_s);
483 contract(270, shelloffsets, PRIM_INT__l_s_d_s, PRIM_PTR_INT__l_s_d_s);
484 contract(450, shelloffsets, PRIM_INT__l_s_f_s, PRIM_PTR_INT__l_s_f_s);
485 contract(675, shelloffsets, PRIM_INT__l_s_g_s, PRIM_PTR_INT__l_s_g_s);
486 PRIM_PTR_INT__k_s_d_s += lastoffset*216;
487 PRIM_PTR_INT__k_s_f_s += lastoffset*360;
488 PRIM_PTR_INT__k_s_g_s += lastoffset*540;
489 PRIM_PTR_INT__l_s_d_s += lastoffset*270;
490 PRIM_PTR_INT__l_s_f_s += lastoffset*450;
491 PRIM_PTR_INT__l_s_g_s += lastoffset*675;
492 }
493
494 } // close loop over j
495 } // close loop over i
496
497 //Advance to the next batch
498 jstart = SIMINT_SIMD_ROUND(jend);
499
500 //////////////////////////////////////////////
501 // Contracted integrals: Horizontal recurrance
502 //////////////////////////////////////////////
503
504
505 const double hAB[3] = { P.AB_x[ab], P.AB_y[ab], P.AB_z[ab] };
506
507
508 for(abcd = 0; abcd < nshellbatch; ++abcd, ++real_abcd)
509 {
510 const double hCD[3] = { Q.AB_x[cd+abcd], Q.AB_y[cd+abcd], Q.AB_z[cd+abcd] };
511
512 // set up HRR pointers
513 double const * restrict HRR_INT__k_s_d_s = INT__k_s_d_s + abcd * 216;
514 double const * restrict HRR_INT__k_s_f_s = INT__k_s_f_s + abcd * 360;
515 double const * restrict HRR_INT__k_s_g_s = INT__k_s_g_s + abcd * 540;
516 double const * restrict HRR_INT__l_s_d_s = INT__l_s_d_s + abcd * 270;
517 double const * restrict HRR_INT__l_s_f_s = INT__l_s_f_s + abcd * 450;
518 double const * restrict HRR_INT__l_s_g_s = INT__l_s_g_s + abcd * 675;
519 double * restrict HRR_INT__k_p_d_d = INT__k_p_d_d + real_abcd * 3888;
520
521 // form INT__k_p_d_s
522 ostei_general_hrr_J(7, 1, 2, 0, hAB, HRR_INT__l_s_d_s, HRR_INT__k_s_d_s, HRR_INT__k_p_d_s);
523
524 // form INT__k_p_f_s
525 ostei_general_hrr_J(7, 1, 3, 0, hAB, HRR_INT__l_s_f_s, HRR_INT__k_s_f_s, HRR_INT__k_p_f_s);
526
527 // form INT__k_p_g_s
528 ostei_general_hrr_J(7, 1, 4, 0, hAB, HRR_INT__l_s_g_s, HRR_INT__k_s_g_s, HRR_INT__k_p_g_s);
529
530 // form INT__k_p_d_p
531 for(ibra = 0; ibra < 108; ++ibra)
532 {
533 HRR_INT__k_p_d_p[ibra * 18 + 0] = HRR_INT__k_p_f_s[ibra * 10 + 0] + ( hCD[0] * HRR_INT__k_p_d_s[ibra * 6 + 0] );
534
535 HRR_INT__k_p_d_p[ibra * 18 + 1] = HRR_INT__k_p_f_s[ibra * 10 + 1] + ( hCD[1] * HRR_INT__k_p_d_s[ibra * 6 + 0] );
536
537 HRR_INT__k_p_d_p[ibra * 18 + 2] = HRR_INT__k_p_f_s[ibra * 10 + 2] + ( hCD[2] * HRR_INT__k_p_d_s[ibra * 6 + 0] );
538
539 HRR_INT__k_p_d_p[ibra * 18 + 3] = HRR_INT__k_p_f_s[ibra * 10 + 1] + ( hCD[0] * HRR_INT__k_p_d_s[ibra * 6 + 1] );
540
541 HRR_INT__k_p_d_p[ibra * 18 + 4] = HRR_INT__k_p_f_s[ibra * 10 + 3] + ( hCD[1] * HRR_INT__k_p_d_s[ibra * 6 + 1] );
542
543 HRR_INT__k_p_d_p[ibra * 18 + 5] = HRR_INT__k_p_f_s[ibra * 10 + 4] + ( hCD[2] * HRR_INT__k_p_d_s[ibra * 6 + 1] );
544
545 HRR_INT__k_p_d_p[ibra * 18 + 6] = HRR_INT__k_p_f_s[ibra * 10 + 2] + ( hCD[0] * HRR_INT__k_p_d_s[ibra * 6 + 2] );
546
547 HRR_INT__k_p_d_p[ibra * 18 + 7] = HRR_INT__k_p_f_s[ibra * 10 + 4] + ( hCD[1] * HRR_INT__k_p_d_s[ibra * 6 + 2] );
548
549 HRR_INT__k_p_d_p[ibra * 18 + 8] = HRR_INT__k_p_f_s[ibra * 10 + 5] + ( hCD[2] * HRR_INT__k_p_d_s[ibra * 6 + 2] );
550
551 HRR_INT__k_p_d_p[ibra * 18 + 9] = HRR_INT__k_p_f_s[ibra * 10 + 3] + ( hCD[0] * HRR_INT__k_p_d_s[ibra * 6 + 3] );
552
553 HRR_INT__k_p_d_p[ibra * 18 + 10] = HRR_INT__k_p_f_s[ibra * 10 + 6] + ( hCD[1] * HRR_INT__k_p_d_s[ibra * 6 + 3] );
554
555 HRR_INT__k_p_d_p[ibra * 18 + 11] = HRR_INT__k_p_f_s[ibra * 10 + 7] + ( hCD[2] * HRR_INT__k_p_d_s[ibra * 6 + 3] );
556
557 HRR_INT__k_p_d_p[ibra * 18 + 12] = HRR_INT__k_p_f_s[ibra * 10 + 4] + ( hCD[0] * HRR_INT__k_p_d_s[ibra * 6 + 4] );
558
559 HRR_INT__k_p_d_p[ibra * 18 + 13] = HRR_INT__k_p_f_s[ibra * 10 + 7] + ( hCD[1] * HRR_INT__k_p_d_s[ibra * 6 + 4] );
560
561 HRR_INT__k_p_d_p[ibra * 18 + 14] = HRR_INT__k_p_f_s[ibra * 10 + 8] + ( hCD[2] * HRR_INT__k_p_d_s[ibra * 6 + 4] );
562
563 HRR_INT__k_p_d_p[ibra * 18 + 15] = HRR_INT__k_p_f_s[ibra * 10 + 5] + ( hCD[0] * HRR_INT__k_p_d_s[ibra * 6 + 5] );
564
565 HRR_INT__k_p_d_p[ibra * 18 + 16] = HRR_INT__k_p_f_s[ibra * 10 + 8] + ( hCD[1] * HRR_INT__k_p_d_s[ibra * 6 + 5] );
566
567 HRR_INT__k_p_d_p[ibra * 18 + 17] = HRR_INT__k_p_f_s[ibra * 10 + 9] + ( hCD[2] * HRR_INT__k_p_d_s[ibra * 6 + 5] );
568
569 }
570
571 // form INT__k_p_f_p
572 HRR_L_f_p(
573 HRR_INT__k_p_f_p,
574 HRR_INT__k_p_f_s,
575 HRR_INT__k_p_g_s,
576 hCD, 108);
577
578 // form INT__k_p_d_d
579 HRR_L_d_d(
580 HRR_INT__k_p_d_d,
581 HRR_INT__k_p_d_p,
582 HRR_INT__k_p_f_p,
583 hCD, 108);
584
585
586 } // close HRR loop
587
588
589 } // close loop cdbatch
590
591 istart = iend;
592 } // close loop over ab
593
594 return P.nshell12_clip * Q.nshell12_clip;
595 }
596
ostei_p_k_d_d(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__p_k_d_d)597 int ostei_p_k_d_d(struct simint_multi_shellpair const P,
598 struct simint_multi_shellpair const Q,
599 double screen_tol,
600 double * const restrict work,
601 double * const restrict INT__p_k_d_d)
602 {
603 double P_AB[3*P.nshell12];
604 struct simint_multi_shellpair P_tmp = P;
605 P_tmp.PA_x = P.PB_x; P_tmp.PA_y = P.PB_y; P_tmp.PA_z = P.PB_z;
606 P_tmp.PB_x = P.PA_x; P_tmp.PB_y = P.PA_y; P_tmp.PB_z = P.PA_z;
607 P_tmp.AB_x = P_AB;
608 P_tmp.AB_y = P_AB + P.nshell12;
609 P_tmp.AB_z = P_AB + 2*P.nshell12;
610
611 for(int i = 0; i < P.nshell12; i++)
612 {
613 P_tmp.AB_x[i] = -P.AB_x[i];
614 P_tmp.AB_y[i] = -P.AB_y[i];
615 P_tmp.AB_z[i] = -P.AB_z[i];
616 }
617
618 int ret = ostei_k_p_d_d(P_tmp, Q, screen_tol, work, INT__p_k_d_d);
619 double buffer[3888] SIMINT_ALIGN_ARRAY_DBL;
620
621 for(int q = 0; q < ret; q++)
622 {
623 int idx = 0;
624 for(int a = 0; a < 3; ++a)
625 for(int b = 0; b < 36; ++b)
626 for(int c = 0; c < 6; ++c)
627 for(int d = 0; d < 6; ++d)
628 buffer[idx++] = INT__p_k_d_d[q*3888+b*108+a*36+c*6+d];
629
630 memcpy(INT__p_k_d_d+q*3888, buffer, 3888*sizeof(double));
631 }
632
633 return ret;
634 }
635
636