1 #include "simint/boys/boys.h"
2 #include "simint/ostei/gen/ostei_generated.h"
3 #include "simint/vectorization/vectorization.h"
4 #include <math.h>
5 #include <string.h>
6
7
ostei_d_d_p_f(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__d_d_p_f)8 int ostei_d_d_p_f(struct simint_multi_shellpair const P,
9 struct simint_multi_shellpair const Q,
10 double screen_tol,
11 double * const restrict work,
12 double * const restrict INT__d_d_p_f)
13 {
14
15 SIMINT_ASSUME_ALIGN_DBL(work);
16 SIMINT_ASSUME_ALIGN_DBL(INT__d_d_p_f);
17 int ab, cd, abcd;
18 int istart, jstart;
19 int iprimcd, nprim_icd, icd;
20 const int check_screen = (screen_tol > 0.0);
21 int i, j;
22 int n;
23 int not_screened;
24 int real_abcd;
25 int iket;
26 int ibra;
27
28 // partition workspace
29 double * const INT__d_s_s_f = work + (SIMINT_NSHELL_SIMD * 0);
30 double * const INT__d_s_s_g = work + (SIMINT_NSHELL_SIMD * 60);
31 double * const INT__f_s_s_f = work + (SIMINT_NSHELL_SIMD * 150);
32 double * const INT__f_s_s_g = work + (SIMINT_NSHELL_SIMD * 250);
33 double * const INT__g_s_s_f = work + (SIMINT_NSHELL_SIMD * 400);
34 double * const INT__g_s_s_g = work + (SIMINT_NSHELL_SIMD * 550);
35 SIMINT_DBLTYPE * const primwork = (SIMINT_DBLTYPE *)(work + SIMINT_NSHELL_SIMD*775);
36 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_s_s = primwork + 0;
37 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_s_p = primwork + 9;
38 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_s_d = primwork + 21;
39 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_s_s = primwork + 39;
40 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_s_p = primwork + 63;
41 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_s_d = primwork + 99;
42 SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_s_f = primwork + 153;
43 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_s_s = primwork + 213;
44 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_s_p = primwork + 255;
45 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_s_d = primwork + 327;
46 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_s_f = primwork + 435;
47 SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_s_g = primwork + 555;
48 SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_s_s = primwork + 645;
49 SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_s_p = primwork + 705;
50 SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_s_d = primwork + 825;
51 SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_s_f = primwork + 1005;
52 SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_s_g = primwork + 1205;
53 SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_s_s = primwork + 1355;
54 SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_s_p = primwork + 1430;
55 SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_s_d = primwork + 1610;
56 SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_s_f = primwork + 1880;
57 SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_s_g = primwork + 2180;
58 double * const hrrwork = (double *)(primwork + 2405);
59 double * const HRR_INT__d_p_s_f = hrrwork + 0;
60 double * const HRR_INT__d_p_s_g = hrrwork + 180;
61 double * const HRR_INT__d_d_s_f = hrrwork + 450;
62 double * const HRR_INT__d_d_s_g = hrrwork + 810;
63 double * const HRR_INT__f_p_s_f = hrrwork + 1350;
64 double * const HRR_INT__f_p_s_g = hrrwork + 1650;
65
66
67 // Create constants
68 const SIMINT_DBLTYPE const_1 = SIMINT_DBLSET1(1);
69 const SIMINT_DBLTYPE const_2 = SIMINT_DBLSET1(2);
70 const SIMINT_DBLTYPE const_3 = SIMINT_DBLSET1(3);
71 const SIMINT_DBLTYPE const_4 = SIMINT_DBLSET1(4);
72 const SIMINT_DBLTYPE one_half = SIMINT_DBLSET1(0.5);
73
74
75 ////////////////////////////////////////
76 // Loop over shells and primitives
77 ////////////////////////////////////////
78
79 real_abcd = 0;
80 istart = 0;
81 for(ab = 0; ab < P.nshell12_clip; ++ab)
82 {
83 const int iend = istart + P.nprim12[ab];
84
85 cd = 0;
86 jstart = 0;
87
88 for(cd = 0; cd < Q.nshell12_clip; cd += SIMINT_NSHELL_SIMD)
89 {
90 const int nshellbatch = ((cd + SIMINT_NSHELL_SIMD) > Q.nshell12_clip) ? Q.nshell12_clip - cd : SIMINT_NSHELL_SIMD;
91 int jend = jstart;
92 for(i = 0; i < nshellbatch; i++)
93 jend += Q.nprim12[cd+i];
94
95 // Clear the beginning of the workspace (where we are accumulating integrals)
96 memset(work, 0, SIMINT_NSHELL_SIMD * 775 * sizeof(double));
97 abcd = 0;
98
99
100 for(i = istart; i < iend; ++i)
101 {
102 SIMINT_DBLTYPE bra_screen_max; // only used if check_screen
103
104 if(check_screen)
105 {
106 // Skip this whole thing if always insignificant
107 if((P.screen[i] * Q.screen_max) < screen_tol)
108 continue;
109 bra_screen_max = SIMINT_DBLSET1(P.screen[i]);
110 }
111
112 icd = 0;
113 iprimcd = 0;
114 nprim_icd = Q.nprim12[cd];
115 double * restrict PRIM_PTR_INT__d_s_s_f = INT__d_s_s_f + abcd * 60;
116 double * restrict PRIM_PTR_INT__d_s_s_g = INT__d_s_s_g + abcd * 90;
117 double * restrict PRIM_PTR_INT__f_s_s_f = INT__f_s_s_f + abcd * 100;
118 double * restrict PRIM_PTR_INT__f_s_s_g = INT__f_s_s_g + abcd * 150;
119 double * restrict PRIM_PTR_INT__g_s_s_f = INT__g_s_s_f + abcd * 150;
120 double * restrict PRIM_PTR_INT__g_s_s_g = INT__g_s_s_g + abcd * 225;
121
122
123
124 // Load these one per loop over i
125 const SIMINT_DBLTYPE P_alpha = SIMINT_DBLSET1(P.alpha[i]);
126 const SIMINT_DBLTYPE P_prefac = SIMINT_DBLSET1(P.prefac[i]);
127 const SIMINT_DBLTYPE Pxyz[3] = { SIMINT_DBLSET1(P.x[i]), SIMINT_DBLSET1(P.y[i]), SIMINT_DBLSET1(P.z[i]) };
128
129 const SIMINT_DBLTYPE P_PA[3] = { SIMINT_DBLSET1(P.PA_x[i]), SIMINT_DBLSET1(P.PA_y[i]), SIMINT_DBLSET1(P.PA_z[i]) };
130
131 for(j = jstart; j < jend; j += SIMINT_SIMD_LEN)
132 {
133 // calculate the shell offsets
134 // these are the offset from the shell pointed to by cd
135 // for each element
136 int shelloffsets[SIMINT_SIMD_LEN] = {0};
137 int lastoffset = 0;
138 const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j));
139
140 if((iprimcd + SIMINT_SIMD_LEN) >= nprim_icd)
141 {
142 // Handle if the first element of the vector is a new shell
143 if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
144 {
145 nprim_icd += Q.nprim12[cd + (++icd)];
146 PRIM_PTR_INT__d_s_s_f += 60;
147 PRIM_PTR_INT__d_s_s_g += 90;
148 PRIM_PTR_INT__f_s_s_f += 100;
149 PRIM_PTR_INT__f_s_s_g += 150;
150 PRIM_PTR_INT__g_s_s_f += 150;
151 PRIM_PTR_INT__g_s_s_g += 225;
152 }
153 iprimcd++;
154 for(n = 1; n < SIMINT_SIMD_LEN; ++n)
155 {
156 if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
157 {
158 shelloffsets[n] = shelloffsets[n-1] + 1;
159 lastoffset++;
160 nprim_icd += Q.nprim12[cd + (++icd)];
161 }
162 else
163 shelloffsets[n] = shelloffsets[n-1];
164 iprimcd++;
165 }
166 }
167 else
168 iprimcd += SIMINT_SIMD_LEN;
169
170 // Do we have to compute this vector (or has it been screened out)?
171 // (not_screened != 0 means we have to do this vector)
172 if(check_screen)
173 {
174 const double vmax = vector_max(SIMINT_MUL(bra_screen_max, SIMINT_DBLLOAD(Q.screen, j)));
175 if(vmax < screen_tol)
176 {
177 PRIM_PTR_INT__d_s_s_f += lastoffset*60;
178 PRIM_PTR_INT__d_s_s_g += lastoffset*90;
179 PRIM_PTR_INT__f_s_s_f += lastoffset*100;
180 PRIM_PTR_INT__f_s_s_g += lastoffset*150;
181 PRIM_PTR_INT__g_s_s_f += lastoffset*150;
182 PRIM_PTR_INT__g_s_s_g += lastoffset*225;
183 continue;
184 }
185 }
186
187 const SIMINT_DBLTYPE Q_alpha = SIMINT_DBLLOAD(Q.alpha, j);
188 const SIMINT_DBLTYPE PQalpha_mul = SIMINT_MUL(P_alpha, Q_alpha);
189 const SIMINT_DBLTYPE PQalpha_sum = SIMINT_ADD(P_alpha, Q_alpha);
190 const SIMINT_DBLTYPE one_over_PQalpha_sum = SIMINT_DIV(const_1, PQalpha_sum);
191
192
193 /* construct R2 = (Px - Qx)**2 + (Py - Qy)**2 + (Pz -Qz)**2 */
194 SIMINT_DBLTYPE PQ[3];
195 PQ[0] = SIMINT_SUB(Pxyz[0], SIMINT_DBLLOAD(Q.x, j));
196 PQ[1] = SIMINT_SUB(Pxyz[1], SIMINT_DBLLOAD(Q.y, j));
197 PQ[2] = SIMINT_SUB(Pxyz[2], SIMINT_DBLLOAD(Q.z, j));
198 SIMINT_DBLTYPE R2 = SIMINT_MUL(PQ[0], PQ[0]);
199 R2 = SIMINT_FMADD(PQ[1], PQ[1], R2);
200 R2 = SIMINT_FMADD(PQ[2], PQ[2], R2);
201
202 const SIMINT_DBLTYPE alpha = SIMINT_MUL(PQalpha_mul, one_over_PQalpha_sum); // alpha from MEST
203 const SIMINT_DBLTYPE one_over_p = SIMINT_DIV(const_1, P_alpha);
204 const SIMINT_DBLTYPE one_over_q = SIMINT_DIV(const_1, Q_alpha);
205 const SIMINT_DBLTYPE one_over_2p = SIMINT_MUL(one_half, one_over_p);
206 const SIMINT_DBLTYPE one_over_2q = SIMINT_MUL(one_half, one_over_q);
207 const SIMINT_DBLTYPE one_over_2pq = SIMINT_MUL(one_half, one_over_PQalpha_sum);
208 const SIMINT_DBLTYPE Q_PB[3] = { SIMINT_DBLLOAD(Q.PB_x, j), SIMINT_DBLLOAD(Q.PB_y, j), SIMINT_DBLLOAD(Q.PB_z, j) };
209
210 // NOTE: Minus sign!
211 const SIMINT_DBLTYPE a_over_p = SIMINT_MUL(SIMINT_NEG(alpha), one_over_p);
212 SIMINT_DBLTYPE aop_PQ[3];
213 aop_PQ[0] = SIMINT_MUL(a_over_p, PQ[0]);
214 aop_PQ[1] = SIMINT_MUL(a_over_p, PQ[1]);
215 aop_PQ[2] = SIMINT_MUL(a_over_p, PQ[2]);
216
217 SIMINT_DBLTYPE a_over_q = SIMINT_MUL(alpha, one_over_q);
218 SIMINT_DBLTYPE aoq_PQ[3];
219 aoq_PQ[0] = SIMINT_MUL(a_over_q, PQ[0]);
220 aoq_PQ[1] = SIMINT_MUL(a_over_q, PQ[1]);
221 aoq_PQ[2] = SIMINT_MUL(a_over_q, PQ[2]);
222 // Put a minus sign here so we don't have to in RR routines
223 a_over_q = SIMINT_NEG(a_over_q);
224
225
226 //////////////////////////////////////////////
227 // Fjt function section
228 // Maximum v value: 8
229 //////////////////////////////////////////////
230 // The parameter to the Fjt function
231 const SIMINT_DBLTYPE F_x = SIMINT_MUL(R2, alpha);
232
233
234 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j);
235
236
237 boys_F_split(PRIM_INT__s_s_s_s, F_x, 8);
238 SIMINT_DBLTYPE prefac = SIMINT_SQRT(one_over_PQalpha_sum);
239 prefac = SIMINT_MUL(SIMINT_MUL(P_prefac, Q_prefac), prefac);
240 for(n = 0; n <= 8; n++)
241 PRIM_INT__s_s_s_s[n] = SIMINT_MUL(PRIM_INT__s_s_s_s[n], prefac);
242
243 //////////////////////////////////////////////
244 // Primitive integrals: Vertical recurrance
245 //////////////////////////////////////////////
246
247 const SIMINT_DBLTYPE vrr_const_1_over_2p = one_over_2p;
248 const SIMINT_DBLTYPE vrr_const_2_over_2p = SIMINT_MUL(const_2, one_over_2p);
249 const SIMINT_DBLTYPE vrr_const_3_over_2p = SIMINT_MUL(const_3, one_over_2p);
250 const SIMINT_DBLTYPE vrr_const_1_over_2q = one_over_2q;
251 const SIMINT_DBLTYPE vrr_const_2_over_2q = SIMINT_MUL(const_2, one_over_2q);
252 const SIMINT_DBLTYPE vrr_const_3_over_2q = SIMINT_MUL(const_3, one_over_2q);
253 const SIMINT_DBLTYPE vrr_const_1_over_2pq = one_over_2pq;
254 const SIMINT_DBLTYPE vrr_const_2_over_2pq = SIMINT_MUL(const_2, one_over_2pq);
255 const SIMINT_DBLTYPE vrr_const_3_over_2pq = SIMINT_MUL(const_3, one_over_2pq);
256 const SIMINT_DBLTYPE vrr_const_4_over_2pq = SIMINT_MUL(const_4, one_over_2pq);
257
258
259
260 // Forming PRIM_INT__p_s_s_s[8 * 3];
261 for(n = 0; n < 8; ++n) // loop over orders of auxiliary function
262 {
263
264 PRIM_INT__p_s_s_s[n * 3 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_s_s[n * 1 + 0]);
265 PRIM_INT__p_s_s_s[n * 3 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]);
266
267 PRIM_INT__p_s_s_s[n * 3 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_s_s[n * 1 + 0]);
268 PRIM_INT__p_s_s_s[n * 3 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 1]);
269
270 PRIM_INT__p_s_s_s[n * 3 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_s_s[n * 1 + 0]);
271 PRIM_INT__p_s_s_s[n * 3 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 2]);
272
273 }
274
275
276
277 // Forming PRIM_INT__d_s_s_s[7 * 6];
278 for(n = 0; n < 7; ++n) // loop over orders of auxiliary function
279 {
280
281 PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__p_s_s_s[n * 3 + 0]);
282 PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 0]);
283 PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 0]);
284
285 PRIM_INT__d_s_s_s[n * 6 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__p_s_s_s[n * 3 + 0]);
286 PRIM_INT__d_s_s_s[n * 6 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 1]);
287
288 PRIM_INT__d_s_s_s[n * 6 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__p_s_s_s[n * 3 + 0]);
289 PRIM_INT__d_s_s_s[n * 6 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 2]);
290
291 PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_MUL(P_PA[1], PRIM_INT__p_s_s_s[n * 3 + 1]);
292 PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_s[n * 6 + 3]);
293 PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 3]);
294
295 PRIM_INT__d_s_s_s[n * 6 + 4] = SIMINT_MUL(P_PA[2], PRIM_INT__p_s_s_s[n * 3 + 1]);
296 PRIM_INT__d_s_s_s[n * 6 + 4] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_s[n * 6 + 4]);
297
298 PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_MUL(P_PA[2], PRIM_INT__p_s_s_s[n * 3 + 2]);
299 PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__d_s_s_s[n * 6 + 5]);
300 PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 5]);
301
302 }
303
304
305
306 // Forming PRIM_INT__d_s_s_p[4 * 18];
307 for(n = 0; n < 4; ++n) // loop over orders of auxiliary function
308 {
309
310 PRIM_INT__d_s_s_p[n * 18 + 0] = SIMINT_MUL(Q_PB[0], PRIM_INT__d_s_s_s[n * 6 + 0]);
311 PRIM_INT__d_s_s_p[n * 18 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__d_s_s_p[n * 18 + 0]);
312 PRIM_INT__d_s_s_p[n * 18 + 0] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_p[n * 18 + 0]);
313
314 PRIM_INT__d_s_s_p[n * 18 + 1] = SIMINT_MUL(Q_PB[1], PRIM_INT__d_s_s_s[n * 6 + 0]);
315 PRIM_INT__d_s_s_p[n * 18 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__d_s_s_p[n * 18 + 1]);
316
317 PRIM_INT__d_s_s_p[n * 18 + 2] = SIMINT_MUL(Q_PB[2], PRIM_INT__d_s_s_s[n * 6 + 0]);
318 PRIM_INT__d_s_s_p[n * 18 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__d_s_s_p[n * 18 + 2]);
319
320 PRIM_INT__d_s_s_p[n * 18 + 3] = SIMINT_MUL(Q_PB[0], PRIM_INT__d_s_s_s[n * 6 + 1]);
321 PRIM_INT__d_s_s_p[n * 18 + 3] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 1], PRIM_INT__d_s_s_p[n * 18 + 3]);
322 PRIM_INT__d_s_s_p[n * 18 + 3] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_p[n * 18 + 3]);
323
324 PRIM_INT__d_s_s_p[n * 18 + 4] = SIMINT_MUL(Q_PB[1], PRIM_INT__d_s_s_s[n * 6 + 1]);
325 PRIM_INT__d_s_s_p[n * 18 + 4] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 1], PRIM_INT__d_s_s_p[n * 18 + 4]);
326 PRIM_INT__d_s_s_p[n * 18 + 4] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_p[n * 18 + 4]);
327
328 PRIM_INT__d_s_s_p[n * 18 + 5] = SIMINT_MUL(Q_PB[2], PRIM_INT__d_s_s_s[n * 6 + 1]);
329 PRIM_INT__d_s_s_p[n * 18 + 5] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 1], PRIM_INT__d_s_s_p[n * 18 + 5]);
330
331 PRIM_INT__d_s_s_p[n * 18 + 6] = SIMINT_MUL(Q_PB[0], PRIM_INT__d_s_s_s[n * 6 + 2]);
332 PRIM_INT__d_s_s_p[n * 18 + 6] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 2], PRIM_INT__d_s_s_p[n * 18 + 6]);
333 PRIM_INT__d_s_s_p[n * 18 + 6] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__d_s_s_p[n * 18 + 6]);
334
335 PRIM_INT__d_s_s_p[n * 18 + 7] = SIMINT_MUL(Q_PB[1], PRIM_INT__d_s_s_s[n * 6 + 2]);
336 PRIM_INT__d_s_s_p[n * 18 + 7] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 2], PRIM_INT__d_s_s_p[n * 18 + 7]);
337
338 PRIM_INT__d_s_s_p[n * 18 + 8] = SIMINT_MUL(Q_PB[2], PRIM_INT__d_s_s_s[n * 6 + 2]);
339 PRIM_INT__d_s_s_p[n * 18 + 8] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 2], PRIM_INT__d_s_s_p[n * 18 + 8]);
340 PRIM_INT__d_s_s_p[n * 18 + 8] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_p[n * 18 + 8]);
341
342 PRIM_INT__d_s_s_p[n * 18 + 9] = SIMINT_MUL(Q_PB[0], PRIM_INT__d_s_s_s[n * 6 + 3]);
343 PRIM_INT__d_s_s_p[n * 18 + 9] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__d_s_s_p[n * 18 + 9]);
344
345 PRIM_INT__d_s_s_p[n * 18 + 10] = SIMINT_MUL(Q_PB[1], PRIM_INT__d_s_s_s[n * 6 + 3]);
346 PRIM_INT__d_s_s_p[n * 18 + 10] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__d_s_s_p[n * 18 + 10]);
347 PRIM_INT__d_s_s_p[n * 18 + 10] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_p[n * 18 + 10]);
348
349 PRIM_INT__d_s_s_p[n * 18 + 11] = SIMINT_MUL(Q_PB[2], PRIM_INT__d_s_s_s[n * 6 + 3]);
350 PRIM_INT__d_s_s_p[n * 18 + 11] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__d_s_s_p[n * 18 + 11]);
351
352 PRIM_INT__d_s_s_p[n * 18 + 12] = SIMINT_MUL(Q_PB[0], PRIM_INT__d_s_s_s[n * 6 + 4]);
353 PRIM_INT__d_s_s_p[n * 18 + 12] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 4], PRIM_INT__d_s_s_p[n * 18 + 12]);
354
355 PRIM_INT__d_s_s_p[n * 18 + 13] = SIMINT_MUL(Q_PB[1], PRIM_INT__d_s_s_s[n * 6 + 4]);
356 PRIM_INT__d_s_s_p[n * 18 + 13] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 4], PRIM_INT__d_s_s_p[n * 18 + 13]);
357 PRIM_INT__d_s_s_p[n * 18 + 13] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__d_s_s_p[n * 18 + 13]);
358
359 PRIM_INT__d_s_s_p[n * 18 + 14] = SIMINT_MUL(Q_PB[2], PRIM_INT__d_s_s_s[n * 6 + 4]);
360 PRIM_INT__d_s_s_p[n * 18 + 14] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 4], PRIM_INT__d_s_s_p[n * 18 + 14]);
361 PRIM_INT__d_s_s_p[n * 18 + 14] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_p[n * 18 + 14]);
362
363 PRIM_INT__d_s_s_p[n * 18 + 15] = SIMINT_MUL(Q_PB[0], PRIM_INT__d_s_s_s[n * 6 + 5]);
364 PRIM_INT__d_s_s_p[n * 18 + 15] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__d_s_s_p[n * 18 + 15]);
365
366 PRIM_INT__d_s_s_p[n * 18 + 16] = SIMINT_MUL(Q_PB[1], PRIM_INT__d_s_s_s[n * 6 + 5]);
367 PRIM_INT__d_s_s_p[n * 18 + 16] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__d_s_s_p[n * 18 + 16]);
368
369 PRIM_INT__d_s_s_p[n * 18 + 17] = SIMINT_MUL(Q_PB[2], PRIM_INT__d_s_s_s[n * 6 + 5]);
370 PRIM_INT__d_s_s_p[n * 18 + 17] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__d_s_s_p[n * 18 + 17]);
371 PRIM_INT__d_s_s_p[n * 18 + 17] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__d_s_s_p[n * 18 + 17]);
372
373 }
374
375
376
377 // Forming PRIM_INT__p_s_s_p[4 * 9];
378 for(n = 0; n < 4; ++n) // loop over orders of auxiliary function
379 {
380
381 PRIM_INT__p_s_s_p[n * 9 + 0] = SIMINT_MUL(Q_PB[0], PRIM_INT__p_s_s_s[n * 3 + 0]);
382 PRIM_INT__p_s_s_p[n * 9 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_s_p[n * 9 + 0]);
383 PRIM_INT__p_s_s_p[n * 9 + 0] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_p[n * 9 + 0]);
384
385 PRIM_INT__p_s_s_p[n * 9 + 1] = SIMINT_MUL(Q_PB[1], PRIM_INT__p_s_s_s[n * 3 + 0]);
386 PRIM_INT__p_s_s_p[n * 9 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_s_p[n * 9 + 1]);
387
388 PRIM_INT__p_s_s_p[n * 9 + 2] = SIMINT_MUL(Q_PB[2], PRIM_INT__p_s_s_s[n * 3 + 0]);
389 PRIM_INT__p_s_s_p[n * 9 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_s_p[n * 9 + 2]);
390
391 PRIM_INT__p_s_s_p[n * 9 + 3] = SIMINT_MUL(Q_PB[0], PRIM_INT__p_s_s_s[n * 3 + 1]);
392 PRIM_INT__p_s_s_p[n * 9 + 3] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_s_p[n * 9 + 3]);
393
394 PRIM_INT__p_s_s_p[n * 9 + 4] = SIMINT_MUL(Q_PB[1], PRIM_INT__p_s_s_s[n * 3 + 1]);
395 PRIM_INT__p_s_s_p[n * 9 + 4] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_s_p[n * 9 + 4]);
396 PRIM_INT__p_s_s_p[n * 9 + 4] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_p[n * 9 + 4]);
397
398 PRIM_INT__p_s_s_p[n * 9 + 5] = SIMINT_MUL(Q_PB[2], PRIM_INT__p_s_s_s[n * 3 + 1]);
399 PRIM_INT__p_s_s_p[n * 9 + 5] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_s_p[n * 9 + 5]);
400
401 PRIM_INT__p_s_s_p[n * 9 + 6] = SIMINT_MUL(Q_PB[0], PRIM_INT__p_s_s_s[n * 3 + 2]);
402 PRIM_INT__p_s_s_p[n * 9 + 6] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_s_p[n * 9 + 6]);
403
404 PRIM_INT__p_s_s_p[n * 9 + 7] = SIMINT_MUL(Q_PB[1], PRIM_INT__p_s_s_s[n * 3 + 2]);
405 PRIM_INT__p_s_s_p[n * 9 + 7] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_s_p[n * 9 + 7]);
406
407 PRIM_INT__p_s_s_p[n * 9 + 8] = SIMINT_MUL(Q_PB[2], PRIM_INT__p_s_s_s[n * 3 + 2]);
408 PRIM_INT__p_s_s_p[n * 9 + 8] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_s_p[n * 9 + 8]);
409 PRIM_INT__p_s_s_p[n * 9 + 8] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_p[n * 9 + 8]);
410
411 }
412
413
414 VRR_L_d_s_s_d(
415 PRIM_INT__d_s_s_d,
416 PRIM_INT__d_s_s_p,
417 PRIM_INT__d_s_s_s,
418 PRIM_INT__p_s_s_p,
419 Q_PB,
420 a_over_q,
421 aoq_PQ,
422 one_over_2pq,
423 one_over_2q,
424 3);
425
426
427
428 // Forming PRIM_INT__s_s_s_p[4 * 3];
429 for(n = 0; n < 4; ++n) // loop over orders of auxiliary function
430 {
431
432 PRIM_INT__s_s_s_p[n * 3 + 0] = SIMINT_MUL(Q_PB[0], PRIM_INT__s_s_s_s[n * 1 + 0]);
433 PRIM_INT__s_s_s_p[n * 3 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_p[n * 3 + 0]);
434
435 PRIM_INT__s_s_s_p[n * 3 + 1] = SIMINT_MUL(Q_PB[1], PRIM_INT__s_s_s_s[n * 1 + 0]);
436 PRIM_INT__s_s_s_p[n * 3 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_p[n * 3 + 1]);
437
438 PRIM_INT__s_s_s_p[n * 3 + 2] = SIMINT_MUL(Q_PB[2], PRIM_INT__s_s_s_s[n * 1 + 0]);
439 PRIM_INT__s_s_s_p[n * 3 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_p[n * 3 + 2]);
440
441 }
442
443
444
445 // Forming PRIM_INT__p_s_s_d[3 * 18];
446 for(n = 0; n < 3; ++n) // loop over orders of auxiliary function
447 {
448
449 PRIM_INT__p_s_s_d[n * 18 + 0] = SIMINT_MUL(Q_PB[0], PRIM_INT__p_s_s_p[n * 9 + 0]);
450 PRIM_INT__p_s_s_d[n * 18 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__p_s_s_p[(n+1) * 9 + 0], PRIM_INT__p_s_s_d[n * 18 + 0]);
451 PRIM_INT__p_s_s_d[n * 18 + 0] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]), PRIM_INT__p_s_s_d[n * 18 + 0]);
452 PRIM_INT__p_s_s_d[n * 18 + 0] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_s_p[(n+1) * 3 + 0], PRIM_INT__p_s_s_d[n * 18 + 0]);
453
454 PRIM_INT__p_s_s_d[n * 18 + 1] = SIMINT_MUL(Q_PB[1], PRIM_INT__p_s_s_p[n * 9 + 0]);
455 PRIM_INT__p_s_s_d[n * 18 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__p_s_s_p[(n+1) * 9 + 0], PRIM_INT__p_s_s_d[n * 18 + 1]);
456
457 PRIM_INT__p_s_s_d[n * 18 + 3] = SIMINT_MUL(Q_PB[1], PRIM_INT__p_s_s_p[n * 9 + 1]);
458 PRIM_INT__p_s_s_d[n * 18 + 3] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__p_s_s_p[(n+1) * 9 + 1], PRIM_INT__p_s_s_d[n * 18 + 3]);
459 PRIM_INT__p_s_s_d[n * 18 + 3] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]), PRIM_INT__p_s_s_d[n * 18 + 3]);
460
461 PRIM_INT__p_s_s_d[n * 18 + 5] = SIMINT_MUL(Q_PB[2], PRIM_INT__p_s_s_p[n * 9 + 2]);
462 PRIM_INT__p_s_s_d[n * 18 + 5] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__p_s_s_p[(n+1) * 9 + 2], PRIM_INT__p_s_s_d[n * 18 + 5]);
463 PRIM_INT__p_s_s_d[n * 18 + 5] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]), PRIM_INT__p_s_s_d[n * 18 + 5]);
464
465 PRIM_INT__p_s_s_d[n * 18 + 6] = SIMINT_MUL(Q_PB[0], PRIM_INT__p_s_s_p[n * 9 + 3]);
466 PRIM_INT__p_s_s_d[n * 18 + 6] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__p_s_s_p[(n+1) * 9 + 3], PRIM_INT__p_s_s_d[n * 18 + 6]);
467 PRIM_INT__p_s_s_d[n * 18 + 6] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_s_s[n * 3 + 1]), PRIM_INT__p_s_s_d[n * 18 + 6]);
468
469 PRIM_INT__p_s_s_d[n * 18 + 7] = SIMINT_MUL(Q_PB[1], PRIM_INT__p_s_s_p[n * 9 + 3]);
470 PRIM_INT__p_s_s_d[n * 18 + 7] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__p_s_s_p[(n+1) * 9 + 3], PRIM_INT__p_s_s_d[n * 18 + 7]);
471 PRIM_INT__p_s_s_d[n * 18 + 7] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_s_p[(n+1) * 3 + 0], PRIM_INT__p_s_s_d[n * 18 + 7]);
472
473 PRIM_INT__p_s_s_d[n * 18 + 9] = SIMINT_MUL(Q_PB[1], PRIM_INT__p_s_s_p[n * 9 + 4]);
474 PRIM_INT__p_s_s_d[n * 18 + 9] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__p_s_s_p[(n+1) * 9 + 4], PRIM_INT__p_s_s_d[n * 18 + 9]);
475 PRIM_INT__p_s_s_d[n * 18 + 9] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_s_s[n * 3 + 1]), PRIM_INT__p_s_s_d[n * 18 + 9]);
476 PRIM_INT__p_s_s_d[n * 18 + 9] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_s_p[(n+1) * 3 + 1], PRIM_INT__p_s_s_d[n * 18 + 9]);
477
478 PRIM_INT__p_s_s_d[n * 18 + 11] = SIMINT_MUL(Q_PB[2], PRIM_INT__p_s_s_p[n * 9 + 5]);
479 PRIM_INT__p_s_s_d[n * 18 + 11] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__p_s_s_p[(n+1) * 9 + 5], PRIM_INT__p_s_s_d[n * 18 + 11]);
480 PRIM_INT__p_s_s_d[n * 18 + 11] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_s_s[n * 3 + 1]), PRIM_INT__p_s_s_d[n * 18 + 11]);
481
482 PRIM_INT__p_s_s_d[n * 18 + 12] = SIMINT_MUL(Q_PB[0], PRIM_INT__p_s_s_p[n * 9 + 6]);
483 PRIM_INT__p_s_s_d[n * 18 + 12] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__p_s_s_p[(n+1) * 9 + 6], PRIM_INT__p_s_s_d[n * 18 + 12]);
484 PRIM_INT__p_s_s_d[n * 18 + 12] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_s_s[n * 3 + 2]), PRIM_INT__p_s_s_d[n * 18 + 12]);
485
486 PRIM_INT__p_s_s_d[n * 18 + 13] = SIMINT_MUL(Q_PB[1], PRIM_INT__p_s_s_p[n * 9 + 6]);
487 PRIM_INT__p_s_s_d[n * 18 + 13] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__p_s_s_p[(n+1) * 9 + 6], PRIM_INT__p_s_s_d[n * 18 + 13]);
488
489 PRIM_INT__p_s_s_d[n * 18 + 15] = SIMINT_MUL(Q_PB[1], PRIM_INT__p_s_s_p[n * 9 + 7]);
490 PRIM_INT__p_s_s_d[n * 18 + 15] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__p_s_s_p[(n+1) * 9 + 7], PRIM_INT__p_s_s_d[n * 18 + 15]);
491 PRIM_INT__p_s_s_d[n * 18 + 15] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_s_s[n * 3 + 2]), PRIM_INT__p_s_s_d[n * 18 + 15]);
492
493 PRIM_INT__p_s_s_d[n * 18 + 17] = SIMINT_MUL(Q_PB[2], PRIM_INT__p_s_s_p[n * 9 + 8]);
494 PRIM_INT__p_s_s_d[n * 18 + 17] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__p_s_s_p[(n+1) * 9 + 8], PRIM_INT__p_s_s_d[n * 18 + 17]);
495 PRIM_INT__p_s_s_d[n * 18 + 17] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_s_s[n * 3 + 2]), PRIM_INT__p_s_s_d[n * 18 + 17]);
496 PRIM_INT__p_s_s_d[n * 18 + 17] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_s_p[(n+1) * 3 + 2], PRIM_INT__p_s_s_d[n * 18 + 17]);
497
498 }
499
500
501 VRR_L_d_s_s_f(
502 PRIM_INT__d_s_s_f,
503 PRIM_INT__d_s_s_d,
504 PRIM_INT__d_s_s_p,
505 PRIM_INT__p_s_s_d,
506 Q_PB,
507 a_over_q,
508 aoq_PQ,
509 one_over_2pq,
510 one_over_2q,
511 2);
512
513
514
515 // Forming PRIM_INT__s_s_s_d[3 * 6];
516 for(n = 0; n < 3; ++n) // loop over orders of auxiliary function
517 {
518
519 PRIM_INT__s_s_s_d[n * 6 + 0] = SIMINT_MUL(Q_PB[0], PRIM_INT__s_s_s_p[n * 3 + 0]);
520 PRIM_INT__s_s_s_d[n * 6 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_s_p[(n+1) * 3 + 0], PRIM_INT__s_s_s_d[n * 6 + 0]);
521 PRIM_INT__s_s_s_d[n * 6 + 0] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_s_d[n * 6 + 0]);
522
523 PRIM_INT__s_s_s_d[n * 6 + 3] = SIMINT_MUL(Q_PB[1], PRIM_INT__s_s_s_p[n * 3 + 1]);
524 PRIM_INT__s_s_s_d[n * 6 + 3] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_s_p[(n+1) * 3 + 1], PRIM_INT__s_s_s_d[n * 6 + 3]);
525 PRIM_INT__s_s_s_d[n * 6 + 3] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_s_d[n * 6 + 3]);
526
527 PRIM_INT__s_s_s_d[n * 6 + 5] = SIMINT_MUL(Q_PB[2], PRIM_INT__s_s_s_p[n * 3 + 2]);
528 PRIM_INT__s_s_s_d[n * 6 + 5] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_s_p[(n+1) * 3 + 2], PRIM_INT__s_s_s_d[n * 6 + 5]);
529 PRIM_INT__s_s_s_d[n * 6 + 5] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_s_d[n * 6 + 5]);
530
531 }
532
533
534 VRR_L_p_s_s_f(
535 PRIM_INT__p_s_s_f,
536 PRIM_INT__p_s_s_d,
537 PRIM_INT__p_s_s_p,
538 PRIM_INT__s_s_s_d,
539 Q_PB,
540 a_over_q,
541 aoq_PQ,
542 one_over_2pq,
543 one_over_2q,
544 2);
545
546
547 ostei_general_vrr_L(2, 0, 0, 4, 1,
548 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PB,
549 PRIM_INT__d_s_s_f, NULL, PRIM_INT__d_s_s_d, PRIM_INT__p_s_s_f, NULL, PRIM_INT__d_s_s_g);
550
551
552
553 // Forming PRIM_INT__f_s_s_s[6 * 10];
554 for(n = 0; n < 6; ++n) // loop over orders of auxiliary function
555 {
556
557 PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 0]);
558 PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 0]);
559 PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]), PRIM_INT__f_s_s_s[n * 10 + 0]);
560
561 PRIM_INT__f_s_s_s[n * 10 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 0]);
562 PRIM_INT__f_s_s_s[n * 10 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 1]);
563
564 PRIM_INT__f_s_s_s[n * 10 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 0]);
565 PRIM_INT__f_s_s_s[n * 10 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 2]);
566
567 PRIM_INT__f_s_s_s[n * 10 + 3] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 3]);
568 PRIM_INT__f_s_s_s[n * 10 + 3] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 3]);
569
570 PRIM_INT__f_s_s_s[n * 10 + 4] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 1]);
571 PRIM_INT__f_s_s_s[n * 10 + 4] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 1], PRIM_INT__f_s_s_s[n * 10 + 4]);
572
573 PRIM_INT__f_s_s_s[n * 10 + 5] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 5]);
574 PRIM_INT__f_s_s_s[n * 10 + 5] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 5]);
575
576 PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 3]);
577 PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 6]);
578 PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_s_s[n * 3 + 1]), PRIM_INT__f_s_s_s[n * 10 + 6]);
579
580 PRIM_INT__f_s_s_s[n * 10 + 7] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 3]);
581 PRIM_INT__f_s_s_s[n * 10 + 7] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 7]);
582
583 PRIM_INT__f_s_s_s[n * 10 + 8] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 5]);
584 PRIM_INT__f_s_s_s[n * 10 + 8] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 8]);
585
586 PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 5]);
587 PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 9]);
588 PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_s_s[n * 3 + 2]), PRIM_INT__f_s_s_s[n * 10 + 9]);
589
590 }
591
592
593 VRR_L_f_s_s_p(
594 PRIM_INT__f_s_s_p,
595 PRIM_INT__f_s_s_s,
596 PRIM_INT__d_s_s_s,
597 Q_PB,
598 aoq_PQ,
599 one_over_2pq,
600 4);
601
602
603 VRR_L_f_s_s_d(
604 PRIM_INT__f_s_s_d,
605 PRIM_INT__f_s_s_p,
606 PRIM_INT__f_s_s_s,
607 PRIM_INT__d_s_s_p,
608 Q_PB,
609 a_over_q,
610 aoq_PQ,
611 one_over_2pq,
612 one_over_2q,
613 3);
614
615
616 ostei_general_vrr_L(3, 0, 0, 3, 2,
617 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PB,
618 PRIM_INT__f_s_s_d, NULL, PRIM_INT__f_s_s_p, PRIM_INT__d_s_s_d, NULL, PRIM_INT__f_s_s_f);
619
620
621 ostei_general_vrr_L(3, 0, 0, 4, 1,
622 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PB,
623 PRIM_INT__f_s_s_f, NULL, PRIM_INT__f_s_s_d, PRIM_INT__d_s_s_f, NULL, PRIM_INT__f_s_s_g);
624
625
626 VRR_I_g_s_s_s(
627 PRIM_INT__g_s_s_s,
628 PRIM_INT__f_s_s_s,
629 PRIM_INT__d_s_s_s,
630 P_PA,
631 a_over_p,
632 aop_PQ,
633 one_over_2p,
634 5);
635
636
637 VRR_L_g_s_s_p(
638 PRIM_INT__g_s_s_p,
639 PRIM_INT__g_s_s_s,
640 PRIM_INT__f_s_s_s,
641 Q_PB,
642 aoq_PQ,
643 one_over_2pq,
644 4);
645
646
647 ostei_general_vrr_L(4, 0, 0, 2, 3,
648 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PB,
649 PRIM_INT__g_s_s_p, NULL, PRIM_INT__g_s_s_s, PRIM_INT__f_s_s_p, NULL, PRIM_INT__g_s_s_d);
650
651
652 ostei_general_vrr_L(4, 0, 0, 3, 2,
653 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PB,
654 PRIM_INT__g_s_s_d, NULL, PRIM_INT__g_s_s_p, PRIM_INT__f_s_s_d, NULL, PRIM_INT__g_s_s_f);
655
656
657 ostei_general_vrr_L(4, 0, 0, 4, 1,
658 one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PB,
659 PRIM_INT__g_s_s_f, NULL, PRIM_INT__g_s_s_d, PRIM_INT__f_s_s_f, NULL, PRIM_INT__g_s_s_g);
660
661
662
663
664 ////////////////////////////////////
665 // Accumulate contracted integrals
666 ////////////////////////////////////
667 if(lastoffset == 0)
668 {
669 contract_all(60, PRIM_INT__d_s_s_f, PRIM_PTR_INT__d_s_s_f);
670 contract_all(90, PRIM_INT__d_s_s_g, PRIM_PTR_INT__d_s_s_g);
671 contract_all(100, PRIM_INT__f_s_s_f, PRIM_PTR_INT__f_s_s_f);
672 contract_all(150, PRIM_INT__f_s_s_g, PRIM_PTR_INT__f_s_s_g);
673 contract_all(150, PRIM_INT__g_s_s_f, PRIM_PTR_INT__g_s_s_f);
674 contract_all(225, PRIM_INT__g_s_s_g, PRIM_PTR_INT__g_s_s_g);
675 }
676 else
677 {
678 contract(60, shelloffsets, PRIM_INT__d_s_s_f, PRIM_PTR_INT__d_s_s_f);
679 contract(90, shelloffsets, PRIM_INT__d_s_s_g, PRIM_PTR_INT__d_s_s_g);
680 contract(100, shelloffsets, PRIM_INT__f_s_s_f, PRIM_PTR_INT__f_s_s_f);
681 contract(150, shelloffsets, PRIM_INT__f_s_s_g, PRIM_PTR_INT__f_s_s_g);
682 contract(150, shelloffsets, PRIM_INT__g_s_s_f, PRIM_PTR_INT__g_s_s_f);
683 contract(225, shelloffsets, PRIM_INT__g_s_s_g, PRIM_PTR_INT__g_s_s_g);
684 PRIM_PTR_INT__d_s_s_f += lastoffset*60;
685 PRIM_PTR_INT__d_s_s_g += lastoffset*90;
686 PRIM_PTR_INT__f_s_s_f += lastoffset*100;
687 PRIM_PTR_INT__f_s_s_g += lastoffset*150;
688 PRIM_PTR_INT__g_s_s_f += lastoffset*150;
689 PRIM_PTR_INT__g_s_s_g += lastoffset*225;
690 }
691
692 } // close loop over j
693 } // close loop over i
694
695 //Advance to the next batch
696 jstart = SIMINT_SIMD_ROUND(jend);
697
698 //////////////////////////////////////////////
699 // Contracted integrals: Horizontal recurrance
700 //////////////////////////////////////////////
701
702
703 const double hAB[3] = { P.AB_x[ab], P.AB_y[ab], P.AB_z[ab] };
704
705
706 for(abcd = 0; abcd < nshellbatch; ++abcd, ++real_abcd)
707 {
708 const double hCD[3] = { Q.AB_x[cd+abcd], Q.AB_y[cd+abcd], Q.AB_z[cd+abcd] };
709
710 // set up HRR pointers
711 double const * restrict HRR_INT__d_s_s_f = INT__d_s_s_f + abcd * 60;
712 double const * restrict HRR_INT__d_s_s_g = INT__d_s_s_g + abcd * 90;
713 double const * restrict HRR_INT__f_s_s_f = INT__f_s_s_f + abcd * 100;
714 double const * restrict HRR_INT__f_s_s_g = INT__f_s_s_g + abcd * 150;
715 double const * restrict HRR_INT__g_s_s_f = INT__g_s_s_f + abcd * 150;
716 double const * restrict HRR_INT__g_s_s_g = INT__g_s_s_g + abcd * 225;
717 double * restrict HRR_INT__d_d_p_f = INT__d_d_p_f + real_abcd * 1080;
718
719 // form INT__d_p_s_f
720 for(iket = 0; iket < 10; ++iket)
721 {
722 HRR_INT__d_p_s_f[0 * 10 + iket] = HRR_INT__f_s_s_f[0 * 10 + iket] + ( hAB[0] * HRR_INT__d_s_s_f[0 * 10 + iket] );
723
724 HRR_INT__d_p_s_f[1 * 10 + iket] = HRR_INT__f_s_s_f[1 * 10 + iket] + ( hAB[1] * HRR_INT__d_s_s_f[0 * 10 + iket] );
725
726 HRR_INT__d_p_s_f[2 * 10 + iket] = HRR_INT__f_s_s_f[2 * 10 + iket] + ( hAB[2] * HRR_INT__d_s_s_f[0 * 10 + iket] );
727
728 HRR_INT__d_p_s_f[3 * 10 + iket] = HRR_INT__f_s_s_f[1 * 10 + iket] + ( hAB[0] * HRR_INT__d_s_s_f[1 * 10 + iket] );
729
730 HRR_INT__d_p_s_f[4 * 10 + iket] = HRR_INT__f_s_s_f[3 * 10 + iket] + ( hAB[1] * HRR_INT__d_s_s_f[1 * 10 + iket] );
731
732 HRR_INT__d_p_s_f[5 * 10 + iket] = HRR_INT__f_s_s_f[4 * 10 + iket] + ( hAB[2] * HRR_INT__d_s_s_f[1 * 10 + iket] );
733
734 HRR_INT__d_p_s_f[6 * 10 + iket] = HRR_INT__f_s_s_f[2 * 10 + iket] + ( hAB[0] * HRR_INT__d_s_s_f[2 * 10 + iket] );
735
736 HRR_INT__d_p_s_f[7 * 10 + iket] = HRR_INT__f_s_s_f[4 * 10 + iket] + ( hAB[1] * HRR_INT__d_s_s_f[2 * 10 + iket] );
737
738 HRR_INT__d_p_s_f[8 * 10 + iket] = HRR_INT__f_s_s_f[5 * 10 + iket] + ( hAB[2] * HRR_INT__d_s_s_f[2 * 10 + iket] );
739
740 HRR_INT__d_p_s_f[9 * 10 + iket] = HRR_INT__f_s_s_f[3 * 10 + iket] + ( hAB[0] * HRR_INT__d_s_s_f[3 * 10 + iket] );
741
742 HRR_INT__d_p_s_f[10 * 10 + iket] = HRR_INT__f_s_s_f[6 * 10 + iket] + ( hAB[1] * HRR_INT__d_s_s_f[3 * 10 + iket] );
743
744 HRR_INT__d_p_s_f[11 * 10 + iket] = HRR_INT__f_s_s_f[7 * 10 + iket] + ( hAB[2] * HRR_INT__d_s_s_f[3 * 10 + iket] );
745
746 HRR_INT__d_p_s_f[12 * 10 + iket] = HRR_INT__f_s_s_f[4 * 10 + iket] + ( hAB[0] * HRR_INT__d_s_s_f[4 * 10 + iket] );
747
748 HRR_INT__d_p_s_f[13 * 10 + iket] = HRR_INT__f_s_s_f[7 * 10 + iket] + ( hAB[1] * HRR_INT__d_s_s_f[4 * 10 + iket] );
749
750 HRR_INT__d_p_s_f[14 * 10 + iket] = HRR_INT__f_s_s_f[8 * 10 + iket] + ( hAB[2] * HRR_INT__d_s_s_f[4 * 10 + iket] );
751
752 HRR_INT__d_p_s_f[15 * 10 + iket] = HRR_INT__f_s_s_f[5 * 10 + iket] + ( hAB[0] * HRR_INT__d_s_s_f[5 * 10 + iket] );
753
754 HRR_INT__d_p_s_f[16 * 10 + iket] = HRR_INT__f_s_s_f[8 * 10 + iket] + ( hAB[1] * HRR_INT__d_s_s_f[5 * 10 + iket] );
755
756 HRR_INT__d_p_s_f[17 * 10 + iket] = HRR_INT__f_s_s_f[9 * 10 + iket] + ( hAB[2] * HRR_INT__d_s_s_f[5 * 10 + iket] );
757
758 }
759
760
761 // form INT__d_p_s_g
762 for(iket = 0; iket < 15; ++iket)
763 {
764 HRR_INT__d_p_s_g[0 * 15 + iket] = HRR_INT__f_s_s_g[0 * 15 + iket] + ( hAB[0] * HRR_INT__d_s_s_g[0 * 15 + iket] );
765
766 HRR_INT__d_p_s_g[1 * 15 + iket] = HRR_INT__f_s_s_g[1 * 15 + iket] + ( hAB[1] * HRR_INT__d_s_s_g[0 * 15 + iket] );
767
768 HRR_INT__d_p_s_g[2 * 15 + iket] = HRR_INT__f_s_s_g[2 * 15 + iket] + ( hAB[2] * HRR_INT__d_s_s_g[0 * 15 + iket] );
769
770 HRR_INT__d_p_s_g[3 * 15 + iket] = HRR_INT__f_s_s_g[1 * 15 + iket] + ( hAB[0] * HRR_INT__d_s_s_g[1 * 15 + iket] );
771
772 HRR_INT__d_p_s_g[4 * 15 + iket] = HRR_INT__f_s_s_g[3 * 15 + iket] + ( hAB[1] * HRR_INT__d_s_s_g[1 * 15 + iket] );
773
774 HRR_INT__d_p_s_g[5 * 15 + iket] = HRR_INT__f_s_s_g[4 * 15 + iket] + ( hAB[2] * HRR_INT__d_s_s_g[1 * 15 + iket] );
775
776 HRR_INT__d_p_s_g[6 * 15 + iket] = HRR_INT__f_s_s_g[2 * 15 + iket] + ( hAB[0] * HRR_INT__d_s_s_g[2 * 15 + iket] );
777
778 HRR_INT__d_p_s_g[7 * 15 + iket] = HRR_INT__f_s_s_g[4 * 15 + iket] + ( hAB[1] * HRR_INT__d_s_s_g[2 * 15 + iket] );
779
780 HRR_INT__d_p_s_g[8 * 15 + iket] = HRR_INT__f_s_s_g[5 * 15 + iket] + ( hAB[2] * HRR_INT__d_s_s_g[2 * 15 + iket] );
781
782 HRR_INT__d_p_s_g[9 * 15 + iket] = HRR_INT__f_s_s_g[3 * 15 + iket] + ( hAB[0] * HRR_INT__d_s_s_g[3 * 15 + iket] );
783
784 HRR_INT__d_p_s_g[10 * 15 + iket] = HRR_INT__f_s_s_g[6 * 15 + iket] + ( hAB[1] * HRR_INT__d_s_s_g[3 * 15 + iket] );
785
786 HRR_INT__d_p_s_g[11 * 15 + iket] = HRR_INT__f_s_s_g[7 * 15 + iket] + ( hAB[2] * HRR_INT__d_s_s_g[3 * 15 + iket] );
787
788 HRR_INT__d_p_s_g[12 * 15 + iket] = HRR_INT__f_s_s_g[4 * 15 + iket] + ( hAB[0] * HRR_INT__d_s_s_g[4 * 15 + iket] );
789
790 HRR_INT__d_p_s_g[13 * 15 + iket] = HRR_INT__f_s_s_g[7 * 15 + iket] + ( hAB[1] * HRR_INT__d_s_s_g[4 * 15 + iket] );
791
792 HRR_INT__d_p_s_g[14 * 15 + iket] = HRR_INT__f_s_s_g[8 * 15 + iket] + ( hAB[2] * HRR_INT__d_s_s_g[4 * 15 + iket] );
793
794 HRR_INT__d_p_s_g[15 * 15 + iket] = HRR_INT__f_s_s_g[5 * 15 + iket] + ( hAB[0] * HRR_INT__d_s_s_g[5 * 15 + iket] );
795
796 HRR_INT__d_p_s_g[16 * 15 + iket] = HRR_INT__f_s_s_g[8 * 15 + iket] + ( hAB[1] * HRR_INT__d_s_s_g[5 * 15 + iket] );
797
798 HRR_INT__d_p_s_g[17 * 15 + iket] = HRR_INT__f_s_s_g[9 * 15 + iket] + ( hAB[2] * HRR_INT__d_s_s_g[5 * 15 + iket] );
799
800 }
801
802
803 // form INT__f_p_s_f
804 HRR_J_f_p(
805 HRR_INT__f_p_s_f,
806 HRR_INT__f_s_s_f,
807 HRR_INT__g_s_s_f,
808 hAB, 10);
809
810 // form INT__f_p_s_g
811 HRR_J_f_p(
812 HRR_INT__f_p_s_g,
813 HRR_INT__f_s_s_g,
814 HRR_INT__g_s_s_g,
815 hAB, 15);
816
817 // form INT__d_d_s_f
818 HRR_J_d_d(
819 HRR_INT__d_d_s_f,
820 HRR_INT__d_p_s_f,
821 HRR_INT__f_p_s_f,
822 hAB, 10);
823
824 // form INT__d_d_s_g
825 HRR_J_d_d(
826 HRR_INT__d_d_s_g,
827 HRR_INT__d_p_s_g,
828 HRR_INT__f_p_s_g,
829 hAB, 15);
830
831 // form INT__d_d_p_f
832 HRR_K_p_f(
833 HRR_INT__d_d_p_f,
834 HRR_INT__d_d_s_f,
835 HRR_INT__d_d_s_g,
836 hCD, 36);
837
838
839 } // close HRR loop
840
841
842 } // close loop cdbatch
843
844 istart = iend;
845 } // close loop over ab
846
847 return P.nshell12_clip * Q.nshell12_clip;
848 }
849
850