1 #include "simint/boys/boys.h"
2 #include "simint/ostei/gen/ostei_generated.h"
3 #include "simint/vectorization/vectorization.h"
4 #include <math.h>
5 #include <string.h>
6 
7 
ostei_f_f_h_s(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__f_f_h_s)8 int ostei_f_f_h_s(struct simint_multi_shellpair const P,
9                   struct simint_multi_shellpair const Q,
10                   double screen_tol,
11                   double * const restrict work,
12                   double * const restrict INT__f_f_h_s)
13 {
14 
15     SIMINT_ASSUME_ALIGN_DBL(work);
16     SIMINT_ASSUME_ALIGN_DBL(INT__f_f_h_s);
17     int ab, cd, abcd;
18     int istart, jstart;
19     int iprimcd, nprim_icd, icd;
20     const int check_screen = (screen_tol > 0.0);
21     int i, j;
22     int n;
23     int not_screened;
24     int real_abcd;
25     int iket;
26 
27     // partition workspace
28     double * const INT__f_s_h_s = work + (SIMINT_NSHELL_SIMD * 0);
29     double * const INT__g_s_h_s = work + (SIMINT_NSHELL_SIMD * 210);
30     double * const INT__h_s_h_s = work + (SIMINT_NSHELL_SIMD * 525);
31     double * const INT__i_s_h_s = work + (SIMINT_NSHELL_SIMD * 966);
32     SIMINT_DBLTYPE * const primwork = (SIMINT_DBLTYPE *)(work + SIMINT_NSHELL_SIMD*1554);
33     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_s_s = primwork + 0;
34     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_p_s = primwork + 12;
35     SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_d_s = primwork + 27;
36     SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_s_s = primwork + 51;
37     SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_p_s = primwork + 84;
38     SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_d_s = primwork + 129;
39     SIMINT_DBLTYPE * const restrict PRIM_INT__p_s_f_s = primwork + 201;
40     SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_s_s = primwork + 291;
41     SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_p_s = primwork + 351;
42     SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_d_s = primwork + 441;
43     SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_f_s = primwork + 585;
44     SIMINT_DBLTYPE * const restrict PRIM_INT__d_s_g_s = primwork + 765;
45     SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_s_s = primwork + 945;
46     SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_p_s = primwork + 1035;
47     SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_d_s = primwork + 1185;
48     SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_f_s = primwork + 1425;
49     SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_g_s = primwork + 1725;
50     SIMINT_DBLTYPE * const restrict PRIM_INT__f_s_h_s = primwork + 2025;
51     SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_s_s = primwork + 2235;
52     SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_p_s = primwork + 2355;
53     SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_d_s = primwork + 2580;
54     SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_f_s = primwork + 2940;
55     SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_g_s = primwork + 3390;
56     SIMINT_DBLTYPE * const restrict PRIM_INT__g_s_h_s = primwork + 3840;
57     SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_s_s = primwork + 4155;
58     SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_p_s = primwork + 4302;
59     SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_d_s = primwork + 4617;
60     SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_f_s = primwork + 5121;
61     SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_g_s = primwork + 5751;
62     SIMINT_DBLTYPE * const restrict PRIM_INT__h_s_h_s = primwork + 6381;
63     SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_s_s = primwork + 6822;
64     SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_p_s = primwork + 6990;
65     SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_d_s = primwork + 7410;
66     SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_f_s = primwork + 8082;
67     SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_g_s = primwork + 8922;
68     SIMINT_DBLTYPE * const restrict PRIM_INT__i_s_h_s = primwork + 9762;
69     double * const hrrwork = (double *)(primwork + 10350);
70     double * const HRR_INT__f_p_h_s = hrrwork + 0;
71     double * const HRR_INT__f_d_h_s = hrrwork + 630;
72     double * const HRR_INT__g_p_h_s = hrrwork + 1890;
73     double * const HRR_INT__g_d_h_s = hrrwork + 2835;
74     double * const HRR_INT__h_p_h_s = hrrwork + 4725;
75 
76 
77     // Create constants
78     const SIMINT_DBLTYPE const_1 = SIMINT_DBLSET1(1);
79     const SIMINT_DBLTYPE const_2 = SIMINT_DBLSET1(2);
80     const SIMINT_DBLTYPE const_3 = SIMINT_DBLSET1(3);
81     const SIMINT_DBLTYPE const_4 = SIMINT_DBLSET1(4);
82     const SIMINT_DBLTYPE const_5 = SIMINT_DBLSET1(5);
83     const SIMINT_DBLTYPE const_6 = SIMINT_DBLSET1(6);
84     const SIMINT_DBLTYPE one_half = SIMINT_DBLSET1(0.5);
85 
86 
87     ////////////////////////////////////////
88     // Loop over shells and primitives
89     ////////////////////////////////////////
90 
91     real_abcd = 0;
92     istart = 0;
93     for(ab = 0; ab < P.nshell12_clip; ++ab)
94     {
95         const int iend = istart + P.nprim12[ab];
96 
97         cd = 0;
98         jstart = 0;
99 
100         for(cd = 0; cd < Q.nshell12_clip; cd += SIMINT_NSHELL_SIMD)
101         {
102             const int nshellbatch = ((cd + SIMINT_NSHELL_SIMD) > Q.nshell12_clip) ? Q.nshell12_clip - cd : SIMINT_NSHELL_SIMD;
103             int jend = jstart;
104             for(i = 0; i < nshellbatch; i++)
105                 jend += Q.nprim12[cd+i];
106 
107             // Clear the beginning of the workspace (where we are accumulating integrals)
108             memset(work, 0, SIMINT_NSHELL_SIMD * 1554 * sizeof(double));
109             abcd = 0;
110 
111 
112             for(i = istart; i < iend; ++i)
113             {
114                 SIMINT_DBLTYPE bra_screen_max;  // only used if check_screen
115 
116                 if(check_screen)
117                 {
118                     // Skip this whole thing if always insignificant
119                     if((P.screen[i] * Q.screen_max) < screen_tol)
120                         continue;
121                     bra_screen_max = SIMINT_DBLSET1(P.screen[i]);
122                 }
123 
124                 icd = 0;
125                 iprimcd = 0;
126                 nprim_icd = Q.nprim12[cd];
127                 double * restrict PRIM_PTR_INT__f_s_h_s = INT__f_s_h_s + abcd * 210;
128                 double * restrict PRIM_PTR_INT__g_s_h_s = INT__g_s_h_s + abcd * 315;
129                 double * restrict PRIM_PTR_INT__h_s_h_s = INT__h_s_h_s + abcd * 441;
130                 double * restrict PRIM_PTR_INT__i_s_h_s = INT__i_s_h_s + abcd * 588;
131 
132 
133 
134                 // Load these one per loop over i
135                 const SIMINT_DBLTYPE P_alpha = SIMINT_DBLSET1(P.alpha[i]);
136                 const SIMINT_DBLTYPE P_prefac = SIMINT_DBLSET1(P.prefac[i]);
137                 const SIMINT_DBLTYPE Pxyz[3] = { SIMINT_DBLSET1(P.x[i]), SIMINT_DBLSET1(P.y[i]), SIMINT_DBLSET1(P.z[i]) };
138 
139                 const SIMINT_DBLTYPE P_PA[3] = { SIMINT_DBLSET1(P.PA_x[i]), SIMINT_DBLSET1(P.PA_y[i]), SIMINT_DBLSET1(P.PA_z[i]) };
140 
141                 for(j = jstart; j < jend; j += SIMINT_SIMD_LEN)
142                 {
143                     // calculate the shell offsets
144                     // these are the offset from the shell pointed to by cd
145                     // for each element
146                     int shelloffsets[SIMINT_SIMD_LEN] = {0};
147                     int lastoffset = 0;
148                     const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j));
149 
150                     if((iprimcd + SIMINT_SIMD_LEN) >= nprim_icd)
151                     {
152                         // Handle if the first element of the vector is a new shell
153                         if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
154                         {
155                             nprim_icd += Q.nprim12[cd + (++icd)];
156                             PRIM_PTR_INT__f_s_h_s += 210;
157                             PRIM_PTR_INT__g_s_h_s += 315;
158                             PRIM_PTR_INT__h_s_h_s += 441;
159                             PRIM_PTR_INT__i_s_h_s += 588;
160                         }
161                         iprimcd++;
162                         for(n = 1; n < SIMINT_SIMD_LEN; ++n)
163                         {
164                             if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
165                             {
166                                 shelloffsets[n] = shelloffsets[n-1] + 1;
167                                 lastoffset++;
168                                 nprim_icd += Q.nprim12[cd + (++icd)];
169                             }
170                             else
171                                 shelloffsets[n] = shelloffsets[n-1];
172                             iprimcd++;
173                         }
174                     }
175                     else
176                         iprimcd += SIMINT_SIMD_LEN;
177 
178                     // Do we have to compute this vector (or has it been screened out)?
179                     // (not_screened != 0 means we have to do this vector)
180                     if(check_screen)
181                     {
182                         const double vmax = vector_max(SIMINT_MUL(bra_screen_max, SIMINT_DBLLOAD(Q.screen, j)));
183                         if(vmax < screen_tol)
184                         {
185                             PRIM_PTR_INT__f_s_h_s += lastoffset*210;
186                             PRIM_PTR_INT__g_s_h_s += lastoffset*315;
187                             PRIM_PTR_INT__h_s_h_s += lastoffset*441;
188                             PRIM_PTR_INT__i_s_h_s += lastoffset*588;
189                             continue;
190                         }
191                     }
192 
193                     const SIMINT_DBLTYPE Q_alpha = SIMINT_DBLLOAD(Q.alpha, j);
194                     const SIMINT_DBLTYPE PQalpha_mul = SIMINT_MUL(P_alpha, Q_alpha);
195                     const SIMINT_DBLTYPE PQalpha_sum = SIMINT_ADD(P_alpha, Q_alpha);
196                     const SIMINT_DBLTYPE one_over_PQalpha_sum = SIMINT_DIV(const_1, PQalpha_sum);
197 
198 
199                     /* construct R2 = (Px - Qx)**2 + (Py - Qy)**2 + (Pz -Qz)**2 */
200                     SIMINT_DBLTYPE PQ[3];
201                     PQ[0] = SIMINT_SUB(Pxyz[0], SIMINT_DBLLOAD(Q.x, j));
202                     PQ[1] = SIMINT_SUB(Pxyz[1], SIMINT_DBLLOAD(Q.y, j));
203                     PQ[2] = SIMINT_SUB(Pxyz[2], SIMINT_DBLLOAD(Q.z, j));
204                     SIMINT_DBLTYPE R2 = SIMINT_MUL(PQ[0], PQ[0]);
205                     R2 = SIMINT_FMADD(PQ[1], PQ[1], R2);
206                     R2 = SIMINT_FMADD(PQ[2], PQ[2], R2);
207 
208                     const SIMINT_DBLTYPE alpha = SIMINT_MUL(PQalpha_mul, one_over_PQalpha_sum); // alpha from MEST
209                     const SIMINT_DBLTYPE one_over_p = SIMINT_DIV(const_1, P_alpha);
210                     const SIMINT_DBLTYPE one_over_q = SIMINT_DIV(const_1, Q_alpha);
211                     const SIMINT_DBLTYPE one_over_2p = SIMINT_MUL(one_half, one_over_p);
212                     const SIMINT_DBLTYPE one_over_2q = SIMINT_MUL(one_half, one_over_q);
213                     const SIMINT_DBLTYPE one_over_2pq = SIMINT_MUL(one_half, one_over_PQalpha_sum);
214                     const SIMINT_DBLTYPE Q_PA[3] = { SIMINT_DBLLOAD(Q.PA_x, j), SIMINT_DBLLOAD(Q.PA_y, j), SIMINT_DBLLOAD(Q.PA_z, j) };
215 
216                     // NOTE: Minus sign!
217                     const SIMINT_DBLTYPE a_over_p = SIMINT_MUL(SIMINT_NEG(alpha), one_over_p);
218                     SIMINT_DBLTYPE aop_PQ[3];
219                     aop_PQ[0] = SIMINT_MUL(a_over_p, PQ[0]);
220                     aop_PQ[1] = SIMINT_MUL(a_over_p, PQ[1]);
221                     aop_PQ[2] = SIMINT_MUL(a_over_p, PQ[2]);
222 
223                     SIMINT_DBLTYPE a_over_q = SIMINT_MUL(alpha, one_over_q);
224                     SIMINT_DBLTYPE aoq_PQ[3];
225                     aoq_PQ[0] = SIMINT_MUL(a_over_q, PQ[0]);
226                     aoq_PQ[1] = SIMINT_MUL(a_over_q, PQ[1]);
227                     aoq_PQ[2] = SIMINT_MUL(a_over_q, PQ[2]);
228                     // Put a minus sign here so we don't have to in RR routines
229                     a_over_q = SIMINT_NEG(a_over_q);
230 
231 
232                     //////////////////////////////////////////////
233                     // Fjt function section
234                     // Maximum v value: 11
235                     //////////////////////////////////////////////
236                     // The parameter to the Fjt function
237                     const SIMINT_DBLTYPE F_x = SIMINT_MUL(R2, alpha);
238 
239 
240                     const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j);
241 
242 
243                     boys_F_split(PRIM_INT__s_s_s_s, F_x, 11);
244                     SIMINT_DBLTYPE prefac = SIMINT_SQRT(one_over_PQalpha_sum);
245                     prefac = SIMINT_MUL(SIMINT_MUL(P_prefac, Q_prefac), prefac);
246                     for(n = 0; n <= 11; n++)
247                         PRIM_INT__s_s_s_s[n] = SIMINT_MUL(PRIM_INT__s_s_s_s[n], prefac);
248 
249                     //////////////////////////////////////////////
250                     // Primitive integrals: Vertical recurrance
251                     //////////////////////////////////////////////
252 
253                     const SIMINT_DBLTYPE vrr_const_1_over_2p = one_over_2p;
254                     const SIMINT_DBLTYPE vrr_const_2_over_2p = SIMINT_MUL(const_2, one_over_2p);
255                     const SIMINT_DBLTYPE vrr_const_3_over_2p = SIMINT_MUL(const_3, one_over_2p);
256                     const SIMINT_DBLTYPE vrr_const_4_over_2p = SIMINT_MUL(const_4, one_over_2p);
257                     const SIMINT_DBLTYPE vrr_const_5_over_2p = SIMINT_MUL(const_5, one_over_2p);
258                     const SIMINT_DBLTYPE vrr_const_1_over_2q = one_over_2q;
259                     const SIMINT_DBLTYPE vrr_const_2_over_2q = SIMINT_MUL(const_2, one_over_2q);
260                     const SIMINT_DBLTYPE vrr_const_3_over_2q = SIMINT_MUL(const_3, one_over_2q);
261                     const SIMINT_DBLTYPE vrr_const_4_over_2q = SIMINT_MUL(const_4, one_over_2q);
262                     const SIMINT_DBLTYPE vrr_const_1_over_2pq = one_over_2pq;
263                     const SIMINT_DBLTYPE vrr_const_2_over_2pq = SIMINT_MUL(const_2, one_over_2pq);
264                     const SIMINT_DBLTYPE vrr_const_3_over_2pq = SIMINT_MUL(const_3, one_over_2pq);
265                     const SIMINT_DBLTYPE vrr_const_4_over_2pq = SIMINT_MUL(const_4, one_over_2pq);
266                     const SIMINT_DBLTYPE vrr_const_5_over_2pq = SIMINT_MUL(const_5, one_over_2pq);
267                     const SIMINT_DBLTYPE vrr_const_6_over_2pq = SIMINT_MUL(const_6, one_over_2pq);
268 
269 
270 
271                     // Forming PRIM_INT__p_s_s_s[11 * 3];
272                     for(n = 0; n < 11; ++n)  // loop over orders of auxiliary function
273                     {
274 
275                         PRIM_INT__p_s_s_s[n * 3 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__s_s_s_s[n * 1 + 0]);
276                         PRIM_INT__p_s_s_s[n * 3 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]);
277 
278                         PRIM_INT__p_s_s_s[n * 3 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__s_s_s_s[n * 1 + 0]);
279                         PRIM_INT__p_s_s_s[n * 3 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 1]);
280 
281                         PRIM_INT__p_s_s_s[n * 3 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__s_s_s_s[n * 1 + 0]);
282                         PRIM_INT__p_s_s_s[n * 3 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_s_s[n * 3 + 2]);
283 
284                     }
285 
286 
287 
288                     // Forming PRIM_INT__d_s_s_s[10 * 6];
289                     for(n = 0; n < 10; ++n)  // loop over orders of auxiliary function
290                     {
291 
292                         PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__p_s_s_s[n * 3 + 0]);
293                         PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 0]);
294                         PRIM_INT__d_s_s_s[n * 6 + 0] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 0]);
295 
296                         PRIM_INT__d_s_s_s[n * 6 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__p_s_s_s[n * 3 + 0]);
297                         PRIM_INT__d_s_s_s[n * 6 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 1]);
298 
299                         PRIM_INT__d_s_s_s[n * 6 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__p_s_s_s[n * 3 + 0]);
300                         PRIM_INT__d_s_s_s[n * 6 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_s_s[n * 6 + 2]);
301 
302                         PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_MUL(P_PA[1], PRIM_INT__p_s_s_s[n * 3 + 1]);
303                         PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_s[n * 6 + 3]);
304                         PRIM_INT__d_s_s_s[n * 6 + 3] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 3]);
305 
306                         PRIM_INT__d_s_s_s[n * 6 + 4] = SIMINT_MUL(P_PA[2], PRIM_INT__p_s_s_s[n * 3 + 1]);
307                         PRIM_INT__d_s_s_s[n * 6 + 4] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_s_s[n * 6 + 4]);
308 
309                         PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_MUL(P_PA[2], PRIM_INT__p_s_s_s[n * 3 + 2]);
310                         PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__d_s_s_s[n * 6 + 5]);
311                         PRIM_INT__d_s_s_s[n * 6 + 5] = SIMINT_FMADD( vrr_const_1_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__d_s_s_s[n * 6 + 5]);
312 
313                     }
314 
315 
316 
317                     // Forming PRIM_INT__f_s_s_s[9 * 10];
318                     for(n = 0; n < 9; ++n)  // loop over orders of auxiliary function
319                     {
320 
321                         PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 0]);
322                         PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 0]);
323                         PRIM_INT__f_s_s_s[n * 10 + 0] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]), PRIM_INT__f_s_s_s[n * 10 + 0]);
324 
325                         PRIM_INT__f_s_s_s[n * 10 + 1] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 0]);
326                         PRIM_INT__f_s_s_s[n * 10 + 1] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 1]);
327 
328                         PRIM_INT__f_s_s_s[n * 10 + 2] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 0]);
329                         PRIM_INT__f_s_s_s[n * 10 + 2] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__f_s_s_s[n * 10 + 2]);
330 
331                         PRIM_INT__f_s_s_s[n * 10 + 3] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 3]);
332                         PRIM_INT__f_s_s_s[n * 10 + 3] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 3]);
333 
334                         PRIM_INT__f_s_s_s[n * 10 + 4] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 1]);
335                         PRIM_INT__f_s_s_s[n * 10 + 4] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 1], PRIM_INT__f_s_s_s[n * 10 + 4]);
336 
337                         PRIM_INT__f_s_s_s[n * 10 + 5] = SIMINT_MUL(P_PA[0], PRIM_INT__d_s_s_s[n * 6 + 5]);
338                         PRIM_INT__f_s_s_s[n * 10 + 5] = SIMINT_FMADD( aop_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 5]);
339 
340                         PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 3]);
341                         PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 6]);
342                         PRIM_INT__f_s_s_s[n * 10 + 6] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_s_s[n * 3 + 1]), PRIM_INT__f_s_s_s[n * 10 + 6]);
343 
344                         PRIM_INT__f_s_s_s[n * 10 + 7] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 3]);
345                         PRIM_INT__f_s_s_s[n * 10 + 7] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__f_s_s_s[n * 10 + 7]);
346 
347                         PRIM_INT__f_s_s_s[n * 10 + 8] = SIMINT_MUL(P_PA[1], PRIM_INT__d_s_s_s[n * 6 + 5]);
348                         PRIM_INT__f_s_s_s[n * 10 + 8] = SIMINT_FMADD( aop_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 8]);
349 
350                         PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_MUL(P_PA[2], PRIM_INT__d_s_s_s[n * 6 + 5]);
351                         PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_FMADD( aop_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__f_s_s_s[n * 10 + 9]);
352                         PRIM_INT__f_s_s_s[n * 10 + 9] = SIMINT_FMADD( vrr_const_2_over_2p, SIMINT_FMADD(a_over_p, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_s_s[n * 3 + 2]), PRIM_INT__f_s_s_s[n * 10 + 9]);
353 
354                     }
355 
356 
357                     VRR_K_f_s_p_s(
358                             PRIM_INT__f_s_p_s,
359                             PRIM_INT__f_s_s_s,
360                             PRIM_INT__d_s_s_s,
361                             Q_PA,
362                             aoq_PQ,
363                             one_over_2pq,
364                             5);
365 
366 
367 
368                     // Forming PRIM_INT__d_s_p_s[5 * 18];
369                     for(n = 0; n < 5; ++n)  // loop over orders of auxiliary function
370                     {
371 
372                         PRIM_INT__d_s_p_s[n * 18 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__d_s_s_s[n * 6 + 0]);
373                         PRIM_INT__d_s_p_s[n * 18 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__d_s_p_s[n * 18 + 0]);
374                         PRIM_INT__d_s_p_s[n * 18 + 0] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_p_s[n * 18 + 0]);
375 
376                         PRIM_INT__d_s_p_s[n * 18 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__d_s_s_s[n * 6 + 0]);
377                         PRIM_INT__d_s_p_s[n * 18 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__d_s_p_s[n * 18 + 1]);
378 
379                         PRIM_INT__d_s_p_s[n * 18 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__d_s_s_s[n * 6 + 0]);
380                         PRIM_INT__d_s_p_s[n * 18 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 0], PRIM_INT__d_s_p_s[n * 18 + 2]);
381 
382                         PRIM_INT__d_s_p_s[n * 18 + 3] = SIMINT_MUL(Q_PA[0], PRIM_INT__d_s_s_s[n * 6 + 1]);
383                         PRIM_INT__d_s_p_s[n * 18 + 3] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 1], PRIM_INT__d_s_p_s[n * 18 + 3]);
384                         PRIM_INT__d_s_p_s[n * 18 + 3] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_p_s[n * 18 + 3]);
385 
386                         PRIM_INT__d_s_p_s[n * 18 + 4] = SIMINT_MUL(Q_PA[1], PRIM_INT__d_s_s_s[n * 6 + 1]);
387                         PRIM_INT__d_s_p_s[n * 18 + 4] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 1], PRIM_INT__d_s_p_s[n * 18 + 4]);
388                         PRIM_INT__d_s_p_s[n * 18 + 4] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_p_s[n * 18 + 4]);
389 
390                         PRIM_INT__d_s_p_s[n * 18 + 5] = SIMINT_MUL(Q_PA[2], PRIM_INT__d_s_s_s[n * 6 + 1]);
391                         PRIM_INT__d_s_p_s[n * 18 + 5] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 1], PRIM_INT__d_s_p_s[n * 18 + 5]);
392 
393                         PRIM_INT__d_s_p_s[n * 18 + 6] = SIMINT_MUL(Q_PA[0], PRIM_INT__d_s_s_s[n * 6 + 2]);
394                         PRIM_INT__d_s_p_s[n * 18 + 6] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 2], PRIM_INT__d_s_p_s[n * 18 + 6]);
395                         PRIM_INT__d_s_p_s[n * 18 + 6] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__d_s_p_s[n * 18 + 6]);
396 
397                         PRIM_INT__d_s_p_s[n * 18 + 7] = SIMINT_MUL(Q_PA[1], PRIM_INT__d_s_s_s[n * 6 + 2]);
398                         PRIM_INT__d_s_p_s[n * 18 + 7] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 2], PRIM_INT__d_s_p_s[n * 18 + 7]);
399 
400                         PRIM_INT__d_s_p_s[n * 18 + 8] = SIMINT_MUL(Q_PA[2], PRIM_INT__d_s_s_s[n * 6 + 2]);
401                         PRIM_INT__d_s_p_s[n * 18 + 8] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 2], PRIM_INT__d_s_p_s[n * 18 + 8]);
402                         PRIM_INT__d_s_p_s[n * 18 + 8] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__d_s_p_s[n * 18 + 8]);
403 
404                         PRIM_INT__d_s_p_s[n * 18 + 9] = SIMINT_MUL(Q_PA[0], PRIM_INT__d_s_s_s[n * 6 + 3]);
405                         PRIM_INT__d_s_p_s[n * 18 + 9] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__d_s_p_s[n * 18 + 9]);
406 
407                         PRIM_INT__d_s_p_s[n * 18 + 10] = SIMINT_MUL(Q_PA[1], PRIM_INT__d_s_s_s[n * 6 + 3]);
408                         PRIM_INT__d_s_p_s[n * 18 + 10] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__d_s_p_s[n * 18 + 10]);
409                         PRIM_INT__d_s_p_s[n * 18 + 10] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_p_s[n * 18 + 10]);
410 
411                         PRIM_INT__d_s_p_s[n * 18 + 11] = SIMINT_MUL(Q_PA[2], PRIM_INT__d_s_s_s[n * 6 + 3]);
412                         PRIM_INT__d_s_p_s[n * 18 + 11] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 3], PRIM_INT__d_s_p_s[n * 18 + 11]);
413 
414                         PRIM_INT__d_s_p_s[n * 18 + 12] = SIMINT_MUL(Q_PA[0], PRIM_INT__d_s_s_s[n * 6 + 4]);
415                         PRIM_INT__d_s_p_s[n * 18 + 12] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 4], PRIM_INT__d_s_p_s[n * 18 + 12]);
416 
417                         PRIM_INT__d_s_p_s[n * 18 + 13] = SIMINT_MUL(Q_PA[1], PRIM_INT__d_s_s_s[n * 6 + 4]);
418                         PRIM_INT__d_s_p_s[n * 18 + 13] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 4], PRIM_INT__d_s_p_s[n * 18 + 13]);
419                         PRIM_INT__d_s_p_s[n * 18 + 13] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__d_s_p_s[n * 18 + 13]);
420 
421                         PRIM_INT__d_s_p_s[n * 18 + 14] = SIMINT_MUL(Q_PA[2], PRIM_INT__d_s_s_s[n * 6 + 4]);
422                         PRIM_INT__d_s_p_s[n * 18 + 14] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 4], PRIM_INT__d_s_p_s[n * 18 + 14]);
423                         PRIM_INT__d_s_p_s[n * 18 + 14] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__d_s_p_s[n * 18 + 14]);
424 
425                         PRIM_INT__d_s_p_s[n * 18 + 15] = SIMINT_MUL(Q_PA[0], PRIM_INT__d_s_s_s[n * 6 + 5]);
426                         PRIM_INT__d_s_p_s[n * 18 + 15] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__d_s_p_s[n * 18 + 15]);
427 
428                         PRIM_INT__d_s_p_s[n * 18 + 16] = SIMINT_MUL(Q_PA[1], PRIM_INT__d_s_s_s[n * 6 + 5]);
429                         PRIM_INT__d_s_p_s[n * 18 + 16] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__d_s_p_s[n * 18 + 16]);
430 
431                         PRIM_INT__d_s_p_s[n * 18 + 17] = SIMINT_MUL(Q_PA[2], PRIM_INT__d_s_s_s[n * 6 + 5]);
432                         PRIM_INT__d_s_p_s[n * 18 + 17] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__d_s_s_s[(n+1) * 6 + 5], PRIM_INT__d_s_p_s[n * 18 + 17]);
433                         PRIM_INT__d_s_p_s[n * 18 + 17] = SIMINT_FMADD( vrr_const_2_over_2pq, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__d_s_p_s[n * 18 + 17]);
434 
435                     }
436 
437 
438                     VRR_K_f_s_d_s(
439                             PRIM_INT__f_s_d_s,
440                             PRIM_INT__f_s_p_s,
441                             PRIM_INT__f_s_s_s,
442                             PRIM_INT__d_s_p_s,
443                             Q_PA,
444                             a_over_q,
445                             aoq_PQ,
446                             one_over_2pq,
447                             one_over_2q,
448                             4);
449 
450 
451 
452                     // Forming PRIM_INT__p_s_p_s[5 * 9];
453                     for(n = 0; n < 5; ++n)  // loop over orders of auxiliary function
454                     {
455 
456                         PRIM_INT__p_s_p_s[n * 9 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__p_s_s_s[n * 3 + 0]);
457                         PRIM_INT__p_s_p_s[n * 9 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_p_s[n * 9 + 0]);
458                         PRIM_INT__p_s_p_s[n * 9 + 0] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_p_s[n * 9 + 0]);
459 
460                         PRIM_INT__p_s_p_s[n * 9 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__p_s_s_s[n * 3 + 0]);
461                         PRIM_INT__p_s_p_s[n * 9 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_p_s[n * 9 + 1]);
462 
463                         PRIM_INT__p_s_p_s[n * 9 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__p_s_s_s[n * 3 + 0]);
464                         PRIM_INT__p_s_p_s[n * 9 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_p_s[n * 9 + 2]);
465 
466                         PRIM_INT__p_s_p_s[n * 9 + 3] = SIMINT_MUL(Q_PA[0], PRIM_INT__p_s_s_s[n * 3 + 1]);
467                         PRIM_INT__p_s_p_s[n * 9 + 3] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_p_s[n * 9 + 3]);
468 
469                         PRIM_INT__p_s_p_s[n * 9 + 4] = SIMINT_MUL(Q_PA[1], PRIM_INT__p_s_s_s[n * 3 + 1]);
470                         PRIM_INT__p_s_p_s[n * 9 + 4] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_p_s[n * 9 + 4]);
471                         PRIM_INT__p_s_p_s[n * 9 + 4] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_p_s[n * 9 + 4]);
472 
473                         PRIM_INT__p_s_p_s[n * 9 + 5] = SIMINT_MUL(Q_PA[2], PRIM_INT__p_s_s_s[n * 3 + 1]);
474                         PRIM_INT__p_s_p_s[n * 9 + 5] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_p_s[n * 9 + 5]);
475 
476                         PRIM_INT__p_s_p_s[n * 9 + 6] = SIMINT_MUL(Q_PA[0], PRIM_INT__p_s_s_s[n * 3 + 2]);
477                         PRIM_INT__p_s_p_s[n * 9 + 6] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_p_s[n * 9 + 6]);
478 
479                         PRIM_INT__p_s_p_s[n * 9 + 7] = SIMINT_MUL(Q_PA[1], PRIM_INT__p_s_s_s[n * 3 + 2]);
480                         PRIM_INT__p_s_p_s[n * 9 + 7] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_p_s[n * 9 + 7]);
481 
482                         PRIM_INT__p_s_p_s[n * 9 + 8] = SIMINT_MUL(Q_PA[2], PRIM_INT__p_s_s_s[n * 3 + 2]);
483                         PRIM_INT__p_s_p_s[n * 9 + 8] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_p_s[n * 9 + 8]);
484                         PRIM_INT__p_s_p_s[n * 9 + 8] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__p_s_p_s[n * 9 + 8]);
485 
486                     }
487 
488 
489                     VRR_K_d_s_d_s(
490                             PRIM_INT__d_s_d_s,
491                             PRIM_INT__d_s_p_s,
492                             PRIM_INT__d_s_s_s,
493                             PRIM_INT__p_s_p_s,
494                             Q_PA,
495                             a_over_q,
496                             aoq_PQ,
497                             one_over_2pq,
498                             one_over_2q,
499                             4);
500 
501 
502                     ostei_general_vrr_K(3, 0, 3, 0, 3,
503                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
504                             PRIM_INT__f_s_d_s, PRIM_INT__f_s_p_s, NULL, PRIM_INT__d_s_d_s, NULL, PRIM_INT__f_s_f_s);
505 
506 
507 
508                     // Forming PRIM_INT__s_s_p_s[5 * 3];
509                     for(n = 0; n < 5; ++n)  // loop over orders of auxiliary function
510                     {
511 
512                         PRIM_INT__s_s_p_s[n * 3 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_s_s[n * 1 + 0]);
513                         PRIM_INT__s_s_p_s[n * 3 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 0]);
514 
515                         PRIM_INT__s_s_p_s[n * 3 + 1] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_s_s[n * 1 + 0]);
516                         PRIM_INT__s_s_p_s[n * 3 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 1]);
517 
518                         PRIM_INT__s_s_p_s[n * 3 + 2] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_s_s[n * 1 + 0]);
519                         PRIM_INT__s_s_p_s[n * 3 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_p_s[n * 3 + 2]);
520 
521                     }
522 
523 
524 
525                     // Forming PRIM_INT__p_s_d_s[4 * 18];
526                     for(n = 0; n < 4; ++n)  // loop over orders of auxiliary function
527                     {
528 
529                         PRIM_INT__p_s_d_s[n * 18 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__p_s_p_s[n * 9 + 0]);
530                         PRIM_INT__p_s_d_s[n * 18 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__p_s_p_s[(n+1) * 9 + 0], PRIM_INT__p_s_d_s[n * 18 + 0]);
531                         PRIM_INT__p_s_d_s[n * 18 + 0] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]), PRIM_INT__p_s_d_s[n * 18 + 0]);
532                         PRIM_INT__p_s_d_s[n * 18 + 0] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__p_s_d_s[n * 18 + 0]);
533 
534                         PRIM_INT__p_s_d_s[n * 18 + 3] = SIMINT_MUL(Q_PA[1], PRIM_INT__p_s_p_s[n * 9 + 1]);
535                         PRIM_INT__p_s_d_s[n * 18 + 3] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__p_s_p_s[(n+1) * 9 + 1], PRIM_INT__p_s_d_s[n * 18 + 3]);
536                         PRIM_INT__p_s_d_s[n * 18 + 3] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]), PRIM_INT__p_s_d_s[n * 18 + 3]);
537 
538                         PRIM_INT__p_s_d_s[n * 18 + 5] = SIMINT_MUL(Q_PA[2], PRIM_INT__p_s_p_s[n * 9 + 2]);
539                         PRIM_INT__p_s_d_s[n * 18 + 5] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__p_s_p_s[(n+1) * 9 + 2], PRIM_INT__p_s_d_s[n * 18 + 5]);
540                         PRIM_INT__p_s_d_s[n * 18 + 5] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 0], PRIM_INT__p_s_s_s[n * 3 + 0]), PRIM_INT__p_s_d_s[n * 18 + 5]);
541 
542                         PRIM_INT__p_s_d_s[n * 18 + 6] = SIMINT_MUL(Q_PA[0], PRIM_INT__p_s_p_s[n * 9 + 3]);
543                         PRIM_INT__p_s_d_s[n * 18 + 6] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__p_s_p_s[(n+1) * 9 + 3], PRIM_INT__p_s_d_s[n * 18 + 6]);
544                         PRIM_INT__p_s_d_s[n * 18 + 6] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_s_s[n * 3 + 1]), PRIM_INT__p_s_d_s[n * 18 + 6]);
545 
546                         PRIM_INT__p_s_d_s[n * 18 + 9] = SIMINT_MUL(Q_PA[1], PRIM_INT__p_s_p_s[n * 9 + 4]);
547                         PRIM_INT__p_s_d_s[n * 18 + 9] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__p_s_p_s[(n+1) * 9 + 4], PRIM_INT__p_s_d_s[n * 18 + 9]);
548                         PRIM_INT__p_s_d_s[n * 18 + 9] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_s_s[n * 3 + 1]), PRIM_INT__p_s_d_s[n * 18 + 9]);
549                         PRIM_INT__p_s_d_s[n * 18 + 9] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__p_s_d_s[n * 18 + 9]);
550 
551                         PRIM_INT__p_s_d_s[n * 18 + 11] = SIMINT_MUL(Q_PA[2], PRIM_INT__p_s_p_s[n * 9 + 5]);
552                         PRIM_INT__p_s_d_s[n * 18 + 11] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__p_s_p_s[(n+1) * 9 + 5], PRIM_INT__p_s_d_s[n * 18 + 11]);
553                         PRIM_INT__p_s_d_s[n * 18 + 11] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 1], PRIM_INT__p_s_s_s[n * 3 + 1]), PRIM_INT__p_s_d_s[n * 18 + 11]);
554 
555                         PRIM_INT__p_s_d_s[n * 18 + 12] = SIMINT_MUL(Q_PA[0], PRIM_INT__p_s_p_s[n * 9 + 6]);
556                         PRIM_INT__p_s_d_s[n * 18 + 12] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__p_s_p_s[(n+1) * 9 + 6], PRIM_INT__p_s_d_s[n * 18 + 12]);
557                         PRIM_INT__p_s_d_s[n * 18 + 12] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_s_s[n * 3 + 2]), PRIM_INT__p_s_d_s[n * 18 + 12]);
558 
559                         PRIM_INT__p_s_d_s[n * 18 + 15] = SIMINT_MUL(Q_PA[1], PRIM_INT__p_s_p_s[n * 9 + 7]);
560                         PRIM_INT__p_s_d_s[n * 18 + 15] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__p_s_p_s[(n+1) * 9 + 7], PRIM_INT__p_s_d_s[n * 18 + 15]);
561                         PRIM_INT__p_s_d_s[n * 18 + 15] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_s_s[n * 3 + 2]), PRIM_INT__p_s_d_s[n * 18 + 15]);
562 
563                         PRIM_INT__p_s_d_s[n * 18 + 17] = SIMINT_MUL(Q_PA[2], PRIM_INT__p_s_p_s[n * 9 + 8]);
564                         PRIM_INT__p_s_d_s[n * 18 + 17] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__p_s_p_s[(n+1) * 9 + 8], PRIM_INT__p_s_d_s[n * 18 + 17]);
565                         PRIM_INT__p_s_d_s[n * 18 + 17] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__p_s_s_s[(n+1) * 3 + 2], PRIM_INT__p_s_s_s[n * 3 + 2]), PRIM_INT__p_s_d_s[n * 18 + 17]);
566                         PRIM_INT__p_s_d_s[n * 18 + 17] = SIMINT_FMADD( vrr_const_1_over_2pq, PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__p_s_d_s[n * 18 + 17]);
567 
568                     }
569 
570 
571                     VRR_K_d_s_f_s(
572                             PRIM_INT__d_s_f_s,
573                             PRIM_INT__d_s_d_s,
574                             PRIM_INT__d_s_p_s,
575                             PRIM_INT__p_s_d_s,
576                             Q_PA,
577                             a_over_q,
578                             aoq_PQ,
579                             one_over_2pq,
580                             one_over_2q,
581                             3);
582 
583 
584                     ostei_general_vrr_K(3, 0, 4, 0, 2,
585                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
586                             PRIM_INT__f_s_f_s, PRIM_INT__f_s_d_s, NULL, PRIM_INT__d_s_f_s, NULL, PRIM_INT__f_s_g_s);
587 
588 
589 
590                     // Forming PRIM_INT__s_s_d_s[4 * 6];
591                     for(n = 0; n < 4; ++n)  // loop over orders of auxiliary function
592                     {
593 
594                         PRIM_INT__s_s_d_s[n * 6 + 0] = SIMINT_MUL(Q_PA[0], PRIM_INT__s_s_p_s[n * 3 + 0]);
595                         PRIM_INT__s_s_d_s[n * 6 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_p_s[(n+1) * 3 + 0], PRIM_INT__s_s_d_s[n * 6 + 0]);
596                         PRIM_INT__s_s_d_s[n * 6 + 0] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_d_s[n * 6 + 0]);
597 
598                         PRIM_INT__s_s_d_s[n * 6 + 3] = SIMINT_MUL(Q_PA[1], PRIM_INT__s_s_p_s[n * 3 + 1]);
599                         PRIM_INT__s_s_d_s[n * 6 + 3] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_p_s[(n+1) * 3 + 1], PRIM_INT__s_s_d_s[n * 6 + 3]);
600                         PRIM_INT__s_s_d_s[n * 6 + 3] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_d_s[n * 6 + 3]);
601 
602                         PRIM_INT__s_s_d_s[n * 6 + 5] = SIMINT_MUL(Q_PA[2], PRIM_INT__s_s_p_s[n * 3 + 2]);
603                         PRIM_INT__s_s_d_s[n * 6 + 5] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_p_s[(n+1) * 3 + 2], PRIM_INT__s_s_d_s[n * 6 + 5]);
604                         PRIM_INT__s_s_d_s[n * 6 + 5] = SIMINT_FMADD( vrr_const_1_over_2q, SIMINT_FMADD(a_over_q, PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_s[n * 1 + 0]), PRIM_INT__s_s_d_s[n * 6 + 5]);
605 
606                     }
607 
608 
609                     VRR_K_p_s_f_s(
610                             PRIM_INT__p_s_f_s,
611                             PRIM_INT__p_s_d_s,
612                             PRIM_INT__p_s_p_s,
613                             PRIM_INT__s_s_d_s,
614                             Q_PA,
615                             a_over_q,
616                             aoq_PQ,
617                             one_over_2pq,
618                             one_over_2q,
619                             3);
620 
621 
622                     ostei_general_vrr_K(2, 0, 4, 0, 2,
623                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
624                             PRIM_INT__d_s_f_s, PRIM_INT__d_s_d_s, NULL, PRIM_INT__p_s_f_s, NULL, PRIM_INT__d_s_g_s);
625 
626 
627                     ostei_general_vrr_K(3, 0, 5, 0, 1,
628                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
629                             PRIM_INT__f_s_g_s, PRIM_INT__f_s_f_s, NULL, PRIM_INT__d_s_g_s, NULL, PRIM_INT__f_s_h_s);
630 
631 
632                     VRR_I_g_s_s_s(
633                             PRIM_INT__g_s_s_s,
634                             PRIM_INT__f_s_s_s,
635                             PRIM_INT__d_s_s_s,
636                             P_PA,
637                             a_over_p,
638                             aop_PQ,
639                             one_over_2p,
640                             8);
641 
642 
643                     VRR_K_g_s_p_s(
644                             PRIM_INT__g_s_p_s,
645                             PRIM_INT__g_s_s_s,
646                             PRIM_INT__f_s_s_s,
647                             Q_PA,
648                             aoq_PQ,
649                             one_over_2pq,
650                             5);
651 
652 
653                     ostei_general_vrr_K(4, 0, 2, 0, 4,
654                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
655                             PRIM_INT__g_s_p_s, PRIM_INT__g_s_s_s, NULL, PRIM_INT__f_s_p_s, NULL, PRIM_INT__g_s_d_s);
656 
657 
658                     ostei_general_vrr_K(4, 0, 3, 0, 3,
659                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
660                             PRIM_INT__g_s_d_s, PRIM_INT__g_s_p_s, NULL, PRIM_INT__f_s_d_s, NULL, PRIM_INT__g_s_f_s);
661 
662 
663                     ostei_general_vrr_K(4, 0, 4, 0, 2,
664                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
665                             PRIM_INT__g_s_f_s, PRIM_INT__g_s_d_s, NULL, PRIM_INT__f_s_f_s, NULL, PRIM_INT__g_s_g_s);
666 
667 
668                     ostei_general_vrr_K(4, 0, 5, 0, 1,
669                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
670                             PRIM_INT__g_s_g_s, PRIM_INT__g_s_f_s, NULL, PRIM_INT__f_s_g_s, NULL, PRIM_INT__g_s_h_s);
671 
672 
673                     VRR_I_h_s_s_s(
674                             PRIM_INT__h_s_s_s,
675                             PRIM_INT__g_s_s_s,
676                             PRIM_INT__f_s_s_s,
677                             P_PA,
678                             a_over_p,
679                             aop_PQ,
680                             one_over_2p,
681                             7);
682 
683 
684                     ostei_general_vrr_K(5, 0, 1, 0, 5,
685                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
686                             PRIM_INT__h_s_s_s, NULL, NULL, PRIM_INT__g_s_s_s, NULL, PRIM_INT__h_s_p_s);
687 
688 
689                     ostei_general_vrr_K(5, 0, 2, 0, 4,
690                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
691                             PRIM_INT__h_s_p_s, PRIM_INT__h_s_s_s, NULL, PRIM_INT__g_s_p_s, NULL, PRIM_INT__h_s_d_s);
692 
693 
694                     ostei_general_vrr_K(5, 0, 3, 0, 3,
695                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
696                             PRIM_INT__h_s_d_s, PRIM_INT__h_s_p_s, NULL, PRIM_INT__g_s_d_s, NULL, PRIM_INT__h_s_f_s);
697 
698 
699                     ostei_general_vrr_K(5, 0, 4, 0, 2,
700                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
701                             PRIM_INT__h_s_f_s, PRIM_INT__h_s_d_s, NULL, PRIM_INT__g_s_f_s, NULL, PRIM_INT__h_s_g_s);
702 
703 
704                     ostei_general_vrr_K(5, 0, 5, 0, 1,
705                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
706                             PRIM_INT__h_s_g_s, PRIM_INT__h_s_f_s, NULL, PRIM_INT__g_s_g_s, NULL, PRIM_INT__h_s_h_s);
707 
708 
709                     ostei_general_vrr1_I(6, 6,
710                             one_over_2p, a_over_p, aop_PQ, P_PA,
711                             PRIM_INT__h_s_s_s, PRIM_INT__g_s_s_s, PRIM_INT__i_s_s_s);
712 
713 
714                     ostei_general_vrr_K(6, 0, 1, 0, 5,
715                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
716                             PRIM_INT__i_s_s_s, NULL, NULL, PRIM_INT__h_s_s_s, NULL, PRIM_INT__i_s_p_s);
717 
718 
719                     ostei_general_vrr_K(6, 0, 2, 0, 4,
720                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
721                             PRIM_INT__i_s_p_s, PRIM_INT__i_s_s_s, NULL, PRIM_INT__h_s_p_s, NULL, PRIM_INT__i_s_d_s);
722 
723 
724                     ostei_general_vrr_K(6, 0, 3, 0, 3,
725                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
726                             PRIM_INT__i_s_d_s, PRIM_INT__i_s_p_s, NULL, PRIM_INT__h_s_d_s, NULL, PRIM_INT__i_s_f_s);
727 
728 
729                     ostei_general_vrr_K(6, 0, 4, 0, 2,
730                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
731                             PRIM_INT__i_s_f_s, PRIM_INT__i_s_d_s, NULL, PRIM_INT__h_s_f_s, NULL, PRIM_INT__i_s_g_s);
732 
733 
734                     ostei_general_vrr_K(6, 0, 5, 0, 1,
735                             one_over_2q, a_over_q, one_over_2pq, aoq_PQ, Q_PA,
736                             PRIM_INT__i_s_g_s, PRIM_INT__i_s_f_s, NULL, PRIM_INT__h_s_g_s, NULL, PRIM_INT__i_s_h_s);
737 
738 
739 
740 
741                     ////////////////////////////////////
742                     // Accumulate contracted integrals
743                     ////////////////////////////////////
744                     if(lastoffset == 0)
745                     {
746                         contract_all(210, PRIM_INT__f_s_h_s, PRIM_PTR_INT__f_s_h_s);
747                         contract_all(315, PRIM_INT__g_s_h_s, PRIM_PTR_INT__g_s_h_s);
748                         contract_all(441, PRIM_INT__h_s_h_s, PRIM_PTR_INT__h_s_h_s);
749                         contract_all(588, PRIM_INT__i_s_h_s, PRIM_PTR_INT__i_s_h_s);
750                     }
751                     else
752                     {
753                         contract(210, shelloffsets, PRIM_INT__f_s_h_s, PRIM_PTR_INT__f_s_h_s);
754                         contract(315, shelloffsets, PRIM_INT__g_s_h_s, PRIM_PTR_INT__g_s_h_s);
755                         contract(441, shelloffsets, PRIM_INT__h_s_h_s, PRIM_PTR_INT__h_s_h_s);
756                         contract(588, shelloffsets, PRIM_INT__i_s_h_s, PRIM_PTR_INT__i_s_h_s);
757                         PRIM_PTR_INT__f_s_h_s += lastoffset*210;
758                         PRIM_PTR_INT__g_s_h_s += lastoffset*315;
759                         PRIM_PTR_INT__h_s_h_s += lastoffset*441;
760                         PRIM_PTR_INT__i_s_h_s += lastoffset*588;
761                     }
762 
763                 }  // close loop over j
764             }  // close loop over i
765 
766             //Advance to the next batch
767             jstart = SIMINT_SIMD_ROUND(jend);
768 
769             //////////////////////////////////////////////
770             // Contracted integrals: Horizontal recurrance
771             //////////////////////////////////////////////
772 
773 
774             const double hAB[3] = { P.AB_x[ab], P.AB_y[ab], P.AB_z[ab] };
775 
776 
777             for(abcd = 0; abcd < nshellbatch; ++abcd, ++real_abcd)
778             {
779 
780                 // set up HRR pointers
781                 double const * restrict HRR_INT__f_s_h_s = INT__f_s_h_s + abcd * 210;
782                 double const * restrict HRR_INT__g_s_h_s = INT__g_s_h_s + abcd * 315;
783                 double const * restrict HRR_INT__h_s_h_s = INT__h_s_h_s + abcd * 441;
784                 double const * restrict HRR_INT__i_s_h_s = INT__i_s_h_s + abcd * 588;
785                 double * restrict HRR_INT__f_f_h_s = INT__f_f_h_s + real_abcd * 2100;
786 
787                 // form INT__f_p_h_s
788                 HRR_J_f_p(
789                     HRR_INT__f_p_h_s,
790                     HRR_INT__f_s_h_s,
791                     HRR_INT__g_s_h_s,
792                     hAB, 21);
793 
794                 // form INT__g_p_h_s
795                 HRR_J_g_p(
796                     HRR_INT__g_p_h_s,
797                     HRR_INT__g_s_h_s,
798                     HRR_INT__h_s_h_s,
799                     hAB, 21);
800 
801                 // form INT__h_p_h_s
802                 ostei_general_hrr_J(5, 1, 5, 0, hAB, HRR_INT__i_s_h_s, HRR_INT__h_s_h_s, HRR_INT__h_p_h_s);
803 
804                 // form INT__f_d_h_s
805                 HRR_J_f_d(
806                     HRR_INT__f_d_h_s,
807                     HRR_INT__f_p_h_s,
808                     HRR_INT__g_p_h_s,
809                     hAB, 21);
810 
811                 // form INT__g_d_h_s
812                 ostei_general_hrr_J(4, 2, 5, 0, hAB, HRR_INT__h_p_h_s, HRR_INT__g_p_h_s, HRR_INT__g_d_h_s);
813 
814                 // form INT__f_f_h_s
815                 ostei_general_hrr_J(3, 3, 5, 0, hAB, HRR_INT__g_d_h_s, HRR_INT__f_d_h_s, HRR_INT__f_f_h_s);
816 
817 
818             }  // close HRR loop
819 
820 
821         }   // close loop cdbatch
822 
823         istart = iend;
824     }  // close loop over ab
825 
826     return P.nshell12_clip * Q.nshell12_clip;
827 }
828 
ostei_f_f_s_h(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__f_f_s_h)829 int ostei_f_f_s_h(struct simint_multi_shellpair const P,
830                   struct simint_multi_shellpair const Q,
831                   double screen_tol,
832                   double * const restrict work,
833                   double * const restrict INT__f_f_s_h)
834 {
835     double Q_AB[3*Q.nshell12];
836     struct simint_multi_shellpair Q_tmp = Q;
837     Q_tmp.PA_x = Q.PB_x;  Q_tmp.PA_y = Q.PB_y;  Q_tmp.PA_z = Q.PB_z;
838     Q_tmp.PB_x = Q.PA_x;  Q_tmp.PB_y = Q.PA_y;  Q_tmp.PB_z = Q.PA_z;
839     Q_tmp.AB_x = Q_AB;
840     Q_tmp.AB_y = Q_AB + Q.nshell12;
841     Q_tmp.AB_z = Q_AB + 2*Q.nshell12;
842 
843     for(int i = 0; i < Q.nshell12; i++)
844     {
845         Q_tmp.AB_x[i] = -Q.AB_x[i];
846         Q_tmp.AB_y[i] = -Q.AB_y[i];
847         Q_tmp.AB_z[i] = -Q.AB_z[i];
848     }
849 
850     int ret = ostei_f_f_h_s(P, Q_tmp, screen_tol, work, INT__f_f_s_h);
851     double buffer[2100] SIMINT_ALIGN_ARRAY_DBL;
852 
853     for(int q = 0; q < ret; q++)
854     {
855         int idx = 0;
856         for(int a = 0; a < 10; ++a)
857         for(int b = 0; b < 10; ++b)
858         for(int c = 0; c < 1; ++c)
859         for(int d = 0; d < 21; ++d)
860             buffer[idx++] = INT__f_f_s_h[q*2100+a*210+b*21+d*1+c];
861 
862         memcpy(INT__f_f_s_h+q*2100, buffer, 2100*sizeof(double));
863     }
864 
865     return ret;
866 }
867 
868