1 #include "simint/boys/boys.h"
2 #include "simint/ostei/gen/ostei_generated.h"
3 #include "simint/vectorization/vectorization.h"
4 #include <math.h>
5 #include <string.h>
6
7
ostei_d_s_h_g(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__d_s_h_g)8 int ostei_d_s_h_g(struct simint_multi_shellpair const P,
9 struct simint_multi_shellpair const Q,
10 double screen_tol,
11 double * const restrict work,
12 double * const restrict INT__d_s_h_g)
13 {
14
15 SIMINT_ASSUME_ALIGN_DBL(work);
16 SIMINT_ASSUME_ALIGN_DBL(INT__d_s_h_g);
17 int ab, cd, abcd;
18 int istart, jstart;
19 int iprimcd, nprim_icd, icd;
20 const int check_screen = (screen_tol > 0.0);
21 int i, j;
22 int n;
23 int not_screened;
24 int real_abcd;
25 int ibra;
26
27 // partition workspace
28 double * const INT__d_s_h_s = work + (SIMINT_NSHELL_SIMD * 0);
29 double * const INT__d_s_i_s = work + (SIMINT_NSHELL_SIMD * 126);
30 double * const INT__d_s_k_s = work + (SIMINT_NSHELL_SIMD * 294);
31 double * const INT__d_s_l_s = work + (SIMINT_NSHELL_SIMD * 510);
32 double * const INT__d_s_m_s = work + (SIMINT_NSHELL_SIMD * 780);
33 SIMINT_DBLTYPE * const primwork = (SIMINT_DBLTYPE *)(work + SIMINT_NSHELL_SIMD*1110);
34 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_s_s = primwork + 0;
35 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_p_s = primwork + 12;
36 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_d_s = primwork + 45;
37 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_f_s = primwork + 105;
38 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_g_s = primwork + 195;
39 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_h_s = primwork + 315;
40 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_i_s = primwork + 462;
41 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_k_s = primwork + 630;
42 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_l_s = primwork + 810;
43 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_m_s = primwork + 990;
44 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_g_s = primwork + 1155;
45 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_h_s = primwork + 1245;
46 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_i_s = primwork + 1371;
47 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_k_s = primwork + 1539;
48 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_l_s = primwork + 1755;
49 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_m_s = primwork + 2025;
50 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_h_s = primwork + 2355;
51 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_i_s = primwork + 2481;
52 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_k_s = primwork + 2649;
53 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_l_s = primwork + 2865;
54 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_m_s = primwork + 3135;
55 double * const hrrwork = (double *)(primwork + 3465);
56 double * const HRR_INT__d_s_h_p = hrrwork + 0;
57 double * const HRR_INT__d_s_h_d = hrrwork + 378;
58 double * const HRR_INT__d_s_h_f = hrrwork + 1134;
59 double * const HRR_INT__d_s_i_p = hrrwork + 2394;
60 double * const HRR_INT__d_s_i_d = hrrwork + 2898;
61 double * const HRR_INT__d_s_i_f = hrrwork + 3906;
62 double * const HRR_INT__d_s_k_p = hrrwork + 5586;
63 double * const HRR_INT__d_s_k_d = hrrwork + 6234;
64 double * const HRR_INT__d_s_l_p = hrrwork + 7530;
65
66
67 // Create constants
68 const SIMINT_DBLTYPE const_1 = SIMINT_DBLSET1(1);
69 const SIMINT_DBLTYPE const_2 = SIMINT_DBLSET1(2);
70 const SIMINT_DBLTYPE const_3 = SIMINT_DBLSET1(3);
71 const SIMINT_DBLTYPE const_4 = SIMINT_DBLSET1(4);
72 const SIMINT_DBLTYPE const_5 = SIMINT_DBLSET1(5);
73 const SIMINT_DBLTYPE const_6 = SIMINT_DBLSET1(6);
74 const SIMINT_DBLTYPE const_7 = SIMINT_DBLSET1(7);
75 const SIMINT_DBLTYPE const_8 = SIMINT_DBLSET1(8);
76 const SIMINT_DBLTYPE const_9 = SIMINT_DBLSET1(9);
77 const SIMINT_DBLTYPE one_half = SIMINT_DBLSET1(0.5);
78
79
80 ////////////////////////////////////////
81 // Loop over shells and primitives
82 ////////////////////////////////////////
83
84 real_abcd = 0;
85 istart = 0;
86 for(ab = 0; ab < P.nshell12_clip; ++ab)
87 {
88 const int iend = istart + P.nprim12[ab];
89
90 cd = 0;
91 jstart = 0;
92
93 for(cd = 0; cd < Q.nshell12_clip; cd += SIMINT_NSHELL_SIMD)
94 {
95 const int nshellbatch = ((cd + SIMINT_NSHELL_SIMD) > Q.nshell12_clip) ? Q.nshell12_clip - cd : SIMINT_NSHELL_SIMD;
96 int jend = jstart;
97 for(i = 0; i < nshellbatch; i++)
98 jend += Q.nprim12[cd+i];
99
100 // Clear the beginning of the workspace (where we are accumulating integrals)
101 memset(work, 0, SIMINT_NSHELL_SIMD * 1110 * sizeof(double));
102 abcd = 0;
103
104
105 for(i = istart; i < iend; ++i)
106 {
107 SIMINT_DBLTYPE bra_screen_max; // only used if check_screen
108
109 if(check_screen)
110 {
111 // Skip this whole thing if always insignificant
112 if((P.screen[i] * Q.screen_max) < screen_tol)
113 continue;
114 bra_screen_max = SIMINT_DBLSET1(P.screen[i]);
115 }
116
117 icd = 0;
118 iprimcd = 0;
119 nprim_icd = Q.nprim12[cd];
120 double * restrict PRIM_PTR_INT__d_s_h_s = INT__d_s_h_s + abcd * 126;
121 double * restrict PRIM_PTR_INT__d_s_i_s = INT__d_s_i_s + abcd * 168;
122 double * restrict PRIM_PTR_INT__d_s_k_s = INT__d_s_k_s + abcd * 216;
123 double * restrict PRIM_PTR_INT__d_s_l_s = INT__d_s_l_s + abcd * 270;
124 double * restrict PRIM_PTR_INT__d_s_m_s = INT__d_s_m_s + abcd * 330;
125
126
127
128 // Load these one per loop over i
129 const SIMINT_DBLTYPE P_alpha = SIMINT_DBLSET1(P.alpha[i]);
130 const SIMINT_DBLTYPE P_prefac = SIMINT_DBLSET1(P.prefac[i]);
131 const SIMINT_DBLTYPE Pxyz[3] = { SIMINT_DBLSET1(P.x[i]), SIMINT_DBLSET1(P.y[i]), SIMINT_DBLSET1(P.z[i]) };
132
133 const SIMINT_DBLTYPE P_PA[3] = { SIMINT_DBLSET1(P.PA_x[i]), SIMINT_DBLSET1(P.PA_y[i]), SIMINT_DBLSET1(P.PA_z[i]) };
134
135 for(j = jstart; j < jend; j += SIMINT_SIMD_LEN)
136 {
137 // calculate the shell offsets
138 // these are the offset from the shell pointed to by cd
139 // for each element
140 int shelloffsets[SIMINT_SIMD_LEN] = {0};
141 int lastoffset = 0;
142 const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j));
143
144 if((iprimcd + SIMINT_SIMD_LEN) >= nprim_icd)
145 {
146 // Handle if the first element of the vector is a new shell
147 if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
148 {
149 nprim_icd += Q.nprim12[cd + (++icd)];
150 PRIM_PTR_INT__d_s_h_s += 126;
151 PRIM_PTR_INT__d_s_i_s += 168;
152 PRIM_PTR_INT__d_s_k_s += 216;
153 PRIM_PTR_INT__d_s_l_s += 270;
154 PRIM_PTR_INT__d_s_m_s += 330;
155 }
156 iprimcd++;
157 for(n = 1; n < SIMINT_SIMD_LEN; ++n)
158 {
159 if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
160 {
161 shelloffsets[n] = shelloffsets[n-1] + 1;
162 lastoffset++;
163 nprim_icd += Q.nprim12[cd + (++icd)];
164 }
165 else
166 shelloffsets[n] = shelloffsets[n-1];
167 iprimcd++;
168 }
169 }
170 else
171 iprimcd += SIMINT_SIMD_LEN;
172
173 // Do we have to compute this vector (or has it been screened out)?
174 // (not_screened != 0 means we have to do this vector)
175 if(check_screen)
176 {
177 const double vmax = vector_max(SIMINT_MUL(bra_screen_max, SIMINT_DBLLOAD(Q.screen, j)));
178 if(vmax < screen_tol)
179 {
180 PRIM_PTR_INT__d_s_h_s += lastoffset*126;
181 PRIM_PTR_INT__d_s_i_s += lastoffset*168;
182 PRIM_PTR_INT__d_s_k_s += lastoffset*216;
183 PRIM_PTR_INT__d_s_l_s += lastoffset*270;
184 PRIM_PTR_INT__d_s_m_s += lastoffset*330;
185 continue;
186 }
187 }
188
189 const SIMINT_DBLTYPE Q_alpha = SIMINT_DBLLOAD(Q.alpha, j);
190 const SIMINT_DBLTYPE PQalpha_mul = SIMINT_MUL(P_alpha, Q_alpha);
191 const SIMINT_DBLTYPE PQalpha_sum = SIMINT_ADD(P_alpha, Q_alpha);
192 const SIMINT_DBLTYPE one_over_PQalpha_sum = SIMINT_DIV(const_1, PQalpha_sum);
193
194
195 /* construct R2 = (Px - Qx)**2 + (Py - Qy)**2 + (Pz -Qz)**2 */
196 SIMINT_DBLTYPE PQ[3];
197 PQ[0] = SIMINT_SUB(Pxyz[0], SIMINT_DBLLOAD(Q.x, j));
198 PQ[1] = SIMINT_SUB(Pxyz[1], SIMINT_DBLLOAD(Q.y, j));
199 PQ[2] = SIMINT_SUB(Pxyz[2], SIMINT_DBLLOAD(Q.z, j));
200 SIMINT_DBLTYPE R2 = SIMINT_MUL(PQ[0], PQ[0]);
201 R2 = SIMINT_FMADD(PQ[1], PQ[1], R2);
202 R2 = SIMINT_FMADD(PQ[2], PQ[2], R2);
203
204 const SIMINT_DBLTYPE alpha = SIMINT_MUL(PQalpha_mul, one_over_PQalpha_sum); // alpha from MEST
205 const SIMINT_DBLTYPE one_over_p = SIMINT_DIV(const_1, P_alpha);
206 const SIMINT_DBLTYPE one_over_q = SIMINT_DIV(const_1, Q_alpha);
207 const SIMINT_DBLTYPE one_over_2p = SIMINT_MUL(one_half, one_over_p);
208 const SIMINT_DBLTYPE one_over_2q = SIMINT_MUL(one_half, one_over_q);
209 const SIMINT_DBLTYPE one_over_2pq = SIMINT_MUL(one_half, one_over_PQalpha_sum);
210 const SIMINT_DBLTYPE Q_PA[3] = { SIMINT_DBLLOAD(Q.PA_x, j), SIMINT_DBLLOAD(Q.PA_y, j), SIMINT_DBLLOAD(Q.PA_z, j) };
211
212 // NOTE: Minus sign!
213 const SIMINT_DBLTYPE a_over_p = SIMINT_MUL(SIMINT_NEG(alpha), one_over_p);
214 SIMINT_DBLTYPE aop_PQ[3];
215 aop_PQ[0] = SIMINT_MUL(a_over_p, PQ[0]);
216 aop_PQ[1] = SIMINT_MUL(a_over_p, PQ[1]);
217 aop_PQ[2] = SIMINT_MUL(a_over_p, PQ[2]);
218
219 SIMINT_DBLTYPE a_over_q = SIMINT_MUL(alpha, one_over_q);
220 SIMINT_DBLTYPE aoq_PQ[3];
221 aoq_PQ[0] = SIMINT_MUL(a_over_q, PQ[0]);
222 aoq_PQ[1] = SIMINT_MUL(a_over_q, PQ[1]);
223 aoq_PQ[2] = SIMINT_MUL(a_over_q, PQ[2]);
224 // Put a minus sign here so we don't have to in RR routines
225 a_over_q = SIMINT_NEG(a_over_q);
226
227
228 //////////////////////////////////////////////
229 // Fjt function section
230 // Maximum v value: 11
231 //////////////////////////////////////////////
232 // The parameter to the Fjt function
233 const SIMINT_DBLTYPE F_x = SIMINT_MUL(R2, alpha);
234
235
236 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j);
237
238
239 boys_F_split(PRIM_INT__s_s_s_s, F_x, 11);
240 SIMINT_DBLTYPE prefac = SIMINT_SQRT(one_over_PQalpha_sum);
241 prefac = SIMINT_MUL(SIMINT_MUL(P_prefac, Q_prefac), prefac);
242 for(n = 0; n <= 11; n++)
243 PRIM_INT__s_s_s_s[n] = SIMINT_MUL(PRIM_INT__s_s_s_s[n], prefac);
244
245 //////////////////////////////////////////////
246 // Primitive integrals: Vertical recurrance
247 //////////////////////////////////////////////
248
249 const SIMINT_DBLTYPE vrr_const_1_over_2p = one_over_2p;
250 const SIMINT_DBLTYPE vrr_const_1_over_2q = one_over_2q;
251 const SIMINT_DBLTYPE vrr_const_2_over_2q = SIMINT_MUL(const_2, one_over_2q);
252 const SIMINT_DBLTYPE vrr_const_3_over_2q = SIMINT_MUL(const_3, one_over_2q);
253 const SIMINT_DBLTYPE vrr_const_4_over_2q = SIMINT_MUL(const_4, one_over_2q);
254 const SIMINT_DBLTYPE vrr_const_5_over_2q = SIMINT_MUL(const_5, one_over_2q);
255 const SIMINT_DBLTYPE vrr_const_6_over_2q = SIMINT_MUL(const_6, one_over_2q);
256 const SIMINT_DBLTYPE vrr_const_7_over_2q = SIMINT_MUL(const_7, one_over_2q);
257 const SIMINT_DBLTYPE vrr_const_8_over_2q = SIMINT_MUL(const_8, one_over_2q);
258 const SIMINT_DBLTYPE vrr_const_1_over_2pq = one_over_2pq;
259 const SIMINT_DBLTYPE vrr_const_2_over_2pq = SIMINT_MUL(const_2, one_over_2pq);
260 const SIMINT_DBLTYPE vrr_const_3_over_2pq = SIMINT_MUL(const_3, one_over_2pq);
261 const SIMINT_DBLTYPE vrr_const_4_over_2pq = SIMINT_MUL(const_4, one_over_2pq);
262 const SIMINT_DBLTYPE vrr_const_5_over_2pq = SIMINT_MUL(const_5, one_over_2pq);
263 const SIMINT_DBLTYPE vrr_const_6_over_2pq = SIMINT_MUL(const_6, one_over_2pq);
264 const SIMINT_DBLTYPE vrr_const_7_over_2pq = SIMINT_MUL(const_7, one_over_2pq);
265 const SIMINT_DBLTYPE vrr_const_8_over_2pq = SIMINT_MUL(const_8, one_over_2pq);
266 const SIMINT_DBLTYPE vrr_const_9_over_2pq = SIMINT_MUL(const_9, one_over_2pq);
267
268
269
270 // Forming PRIM_INT__s_s_p_s[11 * 3];
271 for(n = 0; n < 11; ++n) // loop over orders of auxiliary function
272 {
273
274 PRIM_INT__s_s_p_s[n * 3 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_s_s[n * 1 + 0]);
275 PRIM_INT__s_s_p_s[n * 3 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 0]);
276
277 PRIM_INT__s_s_p_s[n * 3 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_s_s[n * 1 + 0]);
278 PRIM_INT__s_s_p_s[n * 3 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 1]);
279
280 PRIM_INT__s_s_p_s[n * 3 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_s_s[n * 1 + 0]);
281 PRIM_INT__s_s_p_s[n * 3 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 2]);
282
283 }
284
285
286
287 // Forming PRIM_INT__s_s_d_s[10 * 6];
288 for(n = 0; n < 10; ++n) // loop over orders of auxiliary function
289 {
290
291 PRIM_INT__s_s_d_s[n * 6 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_p_s[n * 3 + 0]);
292 PRIM_INT__s_s_d_s[n * 6 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_d_s[n * 6 + 0]);
293 PRIM_INT__s_s_d_s[n * 6 + 0] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_d_s[n * 6 + 0]);
294
295 PRIM_INT__s_s_d_s[n * 6 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_p_s[n * 3 + 0]);
296 PRIM_INT__s_s_d_s[n * 6 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_d_s[n * 6 + 1]);
297
298 PRIM_INT__s_s_d_s[n * 6 + 3] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_p_s[n * 3 + 1]);
299 PRIM_INT__s_s_d_s[n * 6 + 3] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__s_s_d_s[n * 6 + 3]);
300 PRIM_INT__s_s_d_s[n * 6 + 3] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_d_s[n * 6 + 3]);
301
302 PRIM_INT__s_s_d_s[n * 6 + 5] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_p_s[n * 3 + 2]);
303 PRIM_INT__s_s_d_s[n * 6 + 5] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__s_s_d_s[n * 6 + 5]);
304 PRIM_INT__s_s_d_s[n * 6 + 5] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_d_s[n * 6 + 5]);
305
306 }
307
308
309
310 // Forming PRIM_INT__s_s_f_s[9 * 10];
311 for(n = 0; n < 9; ++n) // loop over orders of auxiliary function
312 {
313
314 PRIM_INT__s_s_f_s[n * 10 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_d_s[n * 6 + 0]);
315 PRIM_INT__s_s_f_s[n * 10 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__s_s_f_s[n * 10 + 0]);
316 PRIM_INT__s_s_f_s[n * 10 + 0] = SIMINT_FMADD( vrr_const_2_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_p_s[n * 3 + 0]), PRIM_INT__s_s_f_s[n * 10 + 0]);
317
318 PRIM_INT__s_s_f_s[n * 10 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_d_s[n * 6 + 0]);
319 PRIM_INT__s_s_f_s[n * 10 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__s_s_f_s[n * 10 + 1]);
320
321 PRIM_INT__s_s_f_s[n * 10 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 0]);
322 PRIM_INT__s_s_f_s[n * 10 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 0], PRIM_INT__s_s_f_s[n * 10 + 2]);
323
324 PRIM_INT__s_s_f_s[n * 10 + 3] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_d_s[n * 6 + 3]);
325 PRIM_INT__s_s_f_s[n * 10 + 3] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__s_s_f_s[n * 10 + 3]);
326
327 PRIM_INT__s_s_f_s[n * 10 + 4] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 1]);
328 PRIM_INT__s_s_f_s[n * 10 + 4] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 1], PRIM_INT__s_s_f_s[n * 10 + 4]);
329
330 PRIM_INT__s_s_f_s[n * 10 + 5] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_d_s[n * 6 + 5]);
331 PRIM_INT__s_s_f_s[n * 10 + 5] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__s_s_f_s[n * 10 + 5]);
332
333 PRIM_INT__s_s_f_s[n * 10 + 6] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_d_s[n * 6 + 3]);
334 PRIM_INT__s_s_f_s[n * 10 + 6] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__s_s_f_s[n * 10 + 6]);
335 PRIM_INT__s_s_f_s[n * 10 + 6] = SIMINT_FMADD( vrr_const_2_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__s_s_p_s[n * 3 + 1]), PRIM_INT__s_s_f_s[n * 10 + 6]);
336
337 PRIM_INT__s_s_f_s[n * 10 + 7] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 3]);
338 PRIM_INT__s_s_f_s[n * 10 + 7] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 3], PRIM_INT__s_s_f_s[n * 10 + 7]);
339
340 PRIM_INT__s_s_f_s[n * 10 + 8] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_d_s[n * 6 + 5]);
341 PRIM_INT__s_s_f_s[n * 10 + 8] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__s_s_f_s[n * 10 + 8]);
342
343 PRIM_INT__s_s_f_s[n * 10 + 9] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_d_s[n * 6 + 5]);
344 PRIM_INT__s_s_f_s[n * 10 + 9] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_d_s[(n+1) * 6 + 5], PRIM_INT__s_s_f_s[n * 10 + 9]);
345 PRIM_INT__s_s_f_s[n * 10 + 9] = SIMINT_FMADD( vrr_const_2_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__s_s_p_s[n * 3 + 2]), PRIM_INT__s_s_f_s[n * 10 + 9]);
346
347 }
348
349
350 VRR_K_s_s_g_s(
351 PRIM_INT__s_s_g_s,
352 PRIM_INT__s_s_f_s,
353 PRIM_INT__s_s_d_s,
354 Q_PA,
355 a_over_q,
356 aoq_PQ,
357 one_over_2q,
358 8);
359
360
361 VRR_K_s_s_h_s(
362 PRIM_INT__s_s_h_s,
363 PRIM_INT__s_s_g_s,
364 PRIM_INT__s_s_f_s,
365 Q_PA,
366 a_over_q,
367 aoq_PQ,
368 one_over_2q,
369 7);
370
371
372 ostei_general_vrr_I(1, 0, 5, 0, 2,
373 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
374 PRIM_INT__s_s_h_s, NULL, NULL, PRIM_INT__s_s_g_s, NULL, PRIM_INT__p_s_h_s);
375
376
377 VRR_I_p_s_g_s(
378 PRIM_INT__p_s_g_s,
379 PRIM_INT__s_s_g_s,
380 PRIM_INT__s_s_f_s,
381 P_PA,
382 aop_PQ,
383 one_over_2pq,
384 2);
385
386
387 ostei_general_vrr_I(2, 0, 5, 0, 1,
388 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
389 PRIM_INT__p_s_h_s, PRIM_INT__s_s_h_s, NULL, PRIM_INT__p_s_g_s, NULL, PRIM_INT__d_s_h_s);
390
391
392 ostei_general_vrr1_K(6, 6,
393 one_over_2q, a_over_q, aoq_PQ, Q_PA,
394 PRIM_INT__s_s_h_s, PRIM_INT__s_s_g_s, PRIM_INT__s_s_i_s);
395
396
397 ostei_general_vrr_I(1, 0, 6, 0, 2,
398 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
399 PRIM_INT__s_s_i_s, NULL, NULL, PRIM_INT__s_s_h_s, NULL, PRIM_INT__p_s_i_s);
400
401
402 ostei_general_vrr_I(2, 0, 6, 0, 1,
403 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
404 PRIM_INT__p_s_i_s, PRIM_INT__s_s_i_s, NULL, PRIM_INT__p_s_h_s, NULL, PRIM_INT__d_s_i_s);
405
406
407 ostei_general_vrr1_K(7, 5,
408 one_over_2q, a_over_q, aoq_PQ, Q_PA,
409 PRIM_INT__s_s_i_s, PRIM_INT__s_s_h_s, PRIM_INT__s_s_k_s);
410
411
412 ostei_general_vrr_I(1, 0, 7, 0, 2,
413 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
414 PRIM_INT__s_s_k_s, NULL, NULL, PRIM_INT__s_s_i_s, NULL, PRIM_INT__p_s_k_s);
415
416
417 ostei_general_vrr_I(2, 0, 7, 0, 1,
418 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
419 PRIM_INT__p_s_k_s, PRIM_INT__s_s_k_s, NULL, PRIM_INT__p_s_i_s, NULL, PRIM_INT__d_s_k_s);
420
421
422 ostei_general_vrr1_K(8, 4,
423 one_over_2q, a_over_q, aoq_PQ, Q_PA,
424 PRIM_INT__s_s_k_s, PRIM_INT__s_s_i_s, PRIM_INT__s_s_l_s);
425
426
427 ostei_general_vrr_I(1, 0, 8, 0, 2,
428 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
429 PRIM_INT__s_s_l_s, NULL, NULL, PRIM_INT__s_s_k_s, NULL, PRIM_INT__p_s_l_s);
430
431
432 ostei_general_vrr_I(2, 0, 8, 0, 1,
433 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
434 PRIM_INT__p_s_l_s, PRIM_INT__s_s_l_s, NULL, PRIM_INT__p_s_k_s, NULL, PRIM_INT__d_s_l_s);
435
436
437 ostei_general_vrr1_K(9, 3,
438 one_over_2q, a_over_q, aoq_PQ, Q_PA,
439 PRIM_INT__s_s_l_s, PRIM_INT__s_s_k_s, PRIM_INT__s_s_m_s);
440
441
442 ostei_general_vrr_I(1, 0, 9, 0, 2,
443 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
444 PRIM_INT__s_s_m_s, NULL, NULL, PRIM_INT__s_s_l_s, NULL, PRIM_INT__p_s_m_s);
445
446
447 ostei_general_vrr_I(2, 0, 9, 0, 1,
448 one_over_2p, a_over_p, one_over_2pq, aop_PQ, P_PA,
449 PRIM_INT__p_s_m_s, PRIM_INT__s_s_m_s, NULL, PRIM_INT__p_s_l_s, NULL, PRIM_INT__d_s_m_s);
450
451
452
453
454 ////////////////////////////////////
455 // Accumulate contracted integrals
456 ////////////////////////////////////
457 if(lastoffset == 0)
458 {
459 contract_all(126, PRIM_INT__d_s_h_s, PRIM_PTR_INT__d_s_h_s);
460 contract_all(168, PRIM_INT__d_s_i_s, PRIM_PTR_INT__d_s_i_s);
461 contract_all(216, PRIM_INT__d_s_k_s, PRIM_PTR_INT__d_s_k_s);
462 contract_all(270, PRIM_INT__d_s_l_s, PRIM_PTR_INT__d_s_l_s);
463 contract_all(330, PRIM_INT__d_s_m_s, PRIM_PTR_INT__d_s_m_s);
464 }
465 else
466 {
467 contract(126, shelloffsets, PRIM_INT__d_s_h_s, PRIM_PTR_INT__d_s_h_s);
468 contract(168, shelloffsets, PRIM_INT__d_s_i_s, PRIM_PTR_INT__d_s_i_s);
469 contract(216, shelloffsets, PRIM_INT__d_s_k_s, PRIM_PTR_INT__d_s_k_s);
470 contract(270, shelloffsets, PRIM_INT__d_s_l_s, PRIM_PTR_INT__d_s_l_s);
471 contract(330, shelloffsets, PRIM_INT__d_s_m_s, PRIM_PTR_INT__d_s_m_s);
472 PRIM_PTR_INT__d_s_h_s += lastoffset*126;
473 PRIM_PTR_INT__d_s_i_s += lastoffset*168;
474 PRIM_PTR_INT__d_s_k_s += lastoffset*216;
475 PRIM_PTR_INT__d_s_l_s += lastoffset*270;
476 PRIM_PTR_INT__d_s_m_s += lastoffset*330;
477 }
478
479 } // close loop over j
480 } // close loop over i
481
482 //Advance to the next batch
483 jstart = SIMINT_SIMD_ROUND(jend);
484
485 //////////////////////////////////////////////
486 // Contracted integrals: Horizontal recurrance
487 //////////////////////////////////////////////
488
489
490
491
492 for(abcd = 0; abcd < nshellbatch; ++abcd, ++real_abcd)
493 {
494 const double hCD[3] = { Q.AB_x[cd+abcd], Q.AB_y[cd+abcd], Q.AB_z[cd+abcd] };
495
496 // set up HRR pointers
497 double const * restrict HRR_INT__d_s_h_s = INT__d_s_h_s + abcd * 126;
498 double const * restrict HRR_INT__d_s_i_s = INT__d_s_i_s + abcd * 168;
499 double const * restrict HRR_INT__d_s_k_s = INT__d_s_k_s + abcd * 216;
500 double const * restrict HRR_INT__d_s_l_s = INT__d_s_l_s + abcd * 270;
501 double const * restrict HRR_INT__d_s_m_s = INT__d_s_m_s + abcd * 330;
502 double * restrict HRR_INT__d_s_h_g = INT__d_s_h_g + real_abcd * 1890;
503
504 // form INT__d_s_h_p
505 ostei_general_hrr_L(2, 0, 5, 1, hCD, HRR_INT__d_s_i_s, HRR_INT__d_s_h_s, HRR_INT__d_s_h_p);
506
507 // form INT__d_s_i_p
508 ostei_general_hrr_L(2, 0, 6, 1, hCD, HRR_INT__d_s_k_s, HRR_INT__d_s_i_s, HRR_INT__d_s_i_p);
509
510 // form INT__d_s_k_p
511 ostei_general_hrr_L(2, 0, 7, 1, hCD, HRR_INT__d_s_l_s, HRR_INT__d_s_k_s, HRR_INT__d_s_k_p);
512
513 // form INT__d_s_l_p
514 ostei_general_hrr_L(2, 0, 8, 1, hCD, HRR_INT__d_s_m_s, HRR_INT__d_s_l_s, HRR_INT__d_s_l_p);
515
516 // form INT__d_s_h_d
517 ostei_general_hrr_L(2, 0, 5, 2, hCD, HRR_INT__d_s_i_p, HRR_INT__d_s_h_p, HRR_INT__d_s_h_d);
518
519 // form INT__d_s_i_d
520 ostei_general_hrr_L(2, 0, 6, 2, hCD, HRR_INT__d_s_k_p, HRR_INT__d_s_i_p, HRR_INT__d_s_i_d);
521
522 // form INT__d_s_k_d
523 ostei_general_hrr_L(2, 0, 7, 2, hCD, HRR_INT__d_s_l_p, HRR_INT__d_s_k_p, HRR_INT__d_s_k_d);
524
525 // form INT__d_s_h_f
526 ostei_general_hrr_L(2, 0, 5, 3, hCD, HRR_INT__d_s_i_d, HRR_INT__d_s_h_d, HRR_INT__d_s_h_f);
527
528 // form INT__d_s_i_f
529 ostei_general_hrr_L(2, 0, 6, 3, hCD, HRR_INT__d_s_k_d, HRR_INT__d_s_i_d, HRR_INT__d_s_i_f);
530
531 // form INT__d_s_h_g
532 ostei_general_hrr_L(2, 0, 5, 4, hCD, HRR_INT__d_s_i_f, HRR_INT__d_s_h_f, HRR_INT__d_s_h_g);
533
534
535 } // close HRR loop
536
537
538 } // close loop cdbatch
539
540 istart = iend;
541 } // close loop over ab
542
543 return P.nshell12_clip * Q.nshell12_clip;
544 }
545
ostei_s_d_h_g(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__s_d_h_g)546 int ostei_s_d_h_g(struct simint_multi_shellpair const P,
547 struct simint_multi_shellpair const Q,
548 double screen_tol,
549 double * const restrict work,
550 double * const restrict INT__s_d_h_g)
551 {
552 double P_AB[3*P.nshell12];
553 struct simint_multi_shellpair P_tmp = P;
554 P_tmp.PA_x = P.PB_x; P_tmp.PA_y = P.PB_y; P_tmp.PA_z = P.PB_z;
555 P_tmp.PB_x = P.PA_x; P_tmp.PB_y = P.PA_y; P_tmp.PB_z = P.PA_z;
556 P_tmp.AB_x = P_AB;
557 P_tmp.AB_y = P_AB + P.nshell12;
558 P_tmp.AB_z = P_AB + 2*P.nshell12;
559
560 for(int i = 0; i < P.nshell12; i++)
561 {
562 P_tmp.AB_x[i] = -P.AB_x[i];
563 P_tmp.AB_y[i] = -P.AB_y[i];
564 P_tmp.AB_z[i] = -P.AB_z[i];
565 }
566
567 int ret = ostei_d_s_h_g(P_tmp, Q, screen_tol, work, INT__s_d_h_g);
568 double buffer[1890] SIMINT_ALIGN_ARRAY_DBL;
569
570 for(int q = 0; q < ret; q++)
571 {
572 int idx = 0;
573 for(int a = 0; a < 1; ++a)
574 for(int b = 0; b < 6; ++b)
575 for(int c = 0; c < 21; ++c)
576 for(int d = 0; d < 15; ++d)
577 buffer[idx++] = INT__s_d_h_g[q*1890+b*315+a*315+c*15+d];
578
579 memcpy(INT__s_d_h_g+q*1890, buffer, 1890*sizeof(double));
580 }
581
582 return ret;
583 }
584
ostei_d_s_g_h(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__d_s_g_h)585 int ostei_d_s_g_h(struct simint_multi_shellpair const P,
586 struct simint_multi_shellpair const Q,
587 double screen_tol,
588 double * const restrict work,
589 double * const restrict INT__d_s_g_h)
590 {
591 double Q_AB[3*Q.nshell12];
592 struct simint_multi_shellpair Q_tmp = Q;
593 Q_tmp.PA_x = Q.PB_x; Q_tmp.PA_y = Q.PB_y; Q_tmp.PA_z = Q.PB_z;
594 Q_tmp.PB_x = Q.PA_x; Q_tmp.PB_y = Q.PA_y; Q_tmp.PB_z = Q.PA_z;
595 Q_tmp.AB_x = Q_AB;
596 Q_tmp.AB_y = Q_AB + Q.nshell12;
597 Q_tmp.AB_z = Q_AB + 2*Q.nshell12;
598
599 for(int i = 0; i < Q.nshell12; i++)
600 {
601 Q_tmp.AB_x[i] = -Q.AB_x[i];
602 Q_tmp.AB_y[i] = -Q.AB_y[i];
603 Q_tmp.AB_z[i] = -Q.AB_z[i];
604 }
605
606 int ret = ostei_d_s_h_g(P, Q_tmp, screen_tol, work, INT__d_s_g_h);
607 double buffer[1890] SIMINT_ALIGN_ARRAY_DBL;
608
609 for(int q = 0; q < ret; q++)
610 {
611 int idx = 0;
612 for(int a = 0; a < 6; ++a)
613 for(int b = 0; b < 1; ++b)
614 for(int c = 0; c < 15; ++c)
615 for(int d = 0; d < 21; ++d)
616 buffer[idx++] = INT__d_s_g_h[q*1890+a*315+b*315+d*15+c];
617
618 memcpy(INT__d_s_g_h+q*1890, buffer, 1890*sizeof(double));
619 }
620
621 return ret;
622 }
623
ostei_s_d_g_h(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__s_d_g_h)624 int ostei_s_d_g_h(struct simint_multi_shellpair const P,
625 struct simint_multi_shellpair const Q,
626 double screen_tol,
627 double * const restrict work,
628 double * const restrict INT__s_d_g_h)
629 {
630 double P_AB[3*P.nshell12];
631 struct simint_multi_shellpair P_tmp = P;
632 P_tmp.PA_x = P.PB_x; P_tmp.PA_y = P.PB_y; P_tmp.PA_z = P.PB_z;
633 P_tmp.PB_x = P.PA_x; P_tmp.PB_y = P.PA_y; P_tmp.PB_z = P.PA_z;
634 P_tmp.AB_x = P_AB;
635 P_tmp.AB_y = P_AB + P.nshell12;
636 P_tmp.AB_z = P_AB + 2*P.nshell12;
637
638 for(int i = 0; i < P.nshell12; i++)
639 {
640 P_tmp.AB_x[i] = -P.AB_x[i];
641 P_tmp.AB_y[i] = -P.AB_y[i];
642 P_tmp.AB_z[i] = -P.AB_z[i];
643 }
644
645 double Q_AB[3*Q.nshell12];
646 struct simint_multi_shellpair Q_tmp = Q;
647 Q_tmp.PA_x = Q.PB_x; Q_tmp.PA_y = Q.PB_y; Q_tmp.PA_z = Q.PB_z;
648 Q_tmp.PB_x = Q.PA_x; Q_tmp.PB_y = Q.PA_y; Q_tmp.PB_z = Q.PA_z;
649 Q_tmp.AB_x = Q_AB;
650 Q_tmp.AB_y = Q_AB + Q.nshell12;
651 Q_tmp.AB_z = Q_AB + 2*Q.nshell12;
652
653 for(int i = 0; i < Q.nshell12; i++)
654 {
655 Q_tmp.AB_x[i] = -Q.AB_x[i];
656 Q_tmp.AB_y[i] = -Q.AB_y[i];
657 Q_tmp.AB_z[i] = -Q.AB_z[i];
658 }
659
660 int ret = ostei_d_s_h_g(P_tmp, Q_tmp, screen_tol, work, INT__s_d_g_h);
661 double buffer[1890] SIMINT_ALIGN_ARRAY_DBL;
662
663 for(int q = 0; q < ret; q++)
664 {
665 int idx = 0;
666 for(int a = 0; a < 1; ++a)
667 for(int b = 0; b < 6; ++b)
668 for(int c = 0; c < 15; ++c)
669 for(int d = 0; d < 21; ++d)
670 buffer[idx++] = INT__s_d_g_h[q*1890+b*315+a*315+d*15+c];
671
672 memcpy(INT__s_d_g_h+q*1890, buffer, 1890*sizeof(double));
673 }
674
675 return ret;
676 }
677
678