1 #include "simint/boys/boys.h"
2 #include "simint/ostei/gen/ostei_generated.h"
3 #include "simint/vectorization/vectorization.h"
4 #include <math.h>
5 #include <string.h>
6
7
ostei_s_s_s_p(struct simint_multi_shellpair const P,struct simint_multi_shellpair const Q,double screen_tol,double * const restrict work,double * const restrict INT__s_s_s_p)8 int ostei_s_s_s_p(struct simint_multi_shellpair const P,
9 struct simint_multi_shellpair const Q,
10 double screen_tol,
11 double * const restrict work,
12 double * const restrict INT__s_s_s_p)
13 {
14
15 SIMINT_ASSUME_ALIGN_DBL(work);
16 SIMINT_ASSUME_ALIGN_DBL(INT__s_s_s_p);
17 memset(INT__s_s_s_p, 0, P.nshell12_clip * Q.nshell12_clip * 3 * sizeof(double));
18
19 int ab, cd, abcd;
20 int istart, jstart;
21 int iprimcd, nprim_icd, icd;
22 const int check_screen = (screen_tol > 0.0);
23 int i, j;
24 int n;
25 int not_screened;
26
27 // partition workspace
28 SIMINT_DBLTYPE * const primwork = (SIMINT_DBLTYPE *)(work + SIMINT_NSHELL_SIMD*0);
29 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_s_s = primwork + 0;
30 SIMINT_DBLTYPE * const restrict PRIM_INT__s_s_s_p = primwork + 2;
31 double * const hrrwork = (double *)(primwork + 5);
32
33
34 // Create constants
35 const SIMINT_DBLTYPE const_1 = SIMINT_DBLSET1(1);
36 const SIMINT_DBLTYPE one_half = SIMINT_DBLSET1(0.5);
37
38
39 ////////////////////////////////////////
40 // Loop over shells and primitives
41 ////////////////////////////////////////
42
43 abcd = 0;
44 istart = 0;
45 for(ab = 0; ab < P.nshell12_clip; ++ab)
46 {
47 const int iend = istart + P.nprim12[ab];
48
49 cd = 0;
50 jstart = 0;
51
52 for(cd = 0; cd < Q.nshell12_clip; cd += SIMINT_NSHELL_SIMD)
53 {
54 const int nshellbatch = ((cd + SIMINT_NSHELL_SIMD) > Q.nshell12_clip) ? Q.nshell12_clip - cd : SIMINT_NSHELL_SIMD;
55 int jend = jstart;
56 for(i = 0; i < nshellbatch; i++)
57 jend += Q.nprim12[cd+i];
58
59
60 for(i = istart; i < iend; ++i)
61 {
62 SIMINT_DBLTYPE bra_screen_max; // only used if check_screen
63
64 if(check_screen)
65 {
66 // Skip this whole thing if always insignificant
67 if((P.screen[i] * Q.screen_max) < screen_tol)
68 continue;
69 bra_screen_max = SIMINT_DBLSET1(P.screen[i]);
70 }
71
72 icd = 0;
73 iprimcd = 0;
74 nprim_icd = Q.nprim12[cd];
75 double * restrict PRIM_PTR_INT__s_s_s_p = INT__s_s_s_p + abcd * 3;
76
77
78
79 // Load these one per loop over i
80 const SIMINT_DBLTYPE P_alpha = SIMINT_DBLSET1(P.alpha[i]);
81 const SIMINT_DBLTYPE P_prefac = SIMINT_DBLSET1(P.prefac[i]);
82 const SIMINT_DBLTYPE Pxyz[3] = { SIMINT_DBLSET1(P.x[i]), SIMINT_DBLSET1(P.y[i]), SIMINT_DBLSET1(P.z[i]) };
83
84
85 for(j = jstart; j < jend; j += SIMINT_SIMD_LEN)
86 {
87 // calculate the shell offsets
88 // these are the offset from the shell pointed to by cd
89 // for each element
90 int shelloffsets[SIMINT_SIMD_LEN] = {0};
91 int lastoffset = 0;
92 const int nlane = ( ((j + SIMINT_SIMD_LEN) < jend) ? SIMINT_SIMD_LEN : (jend - j));
93
94 if((iprimcd + SIMINT_SIMD_LEN) >= nprim_icd)
95 {
96 // Handle if the first element of the vector is a new shell
97 if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
98 {
99 nprim_icd += Q.nprim12[cd + (++icd)];
100 PRIM_PTR_INT__s_s_s_p += 3;
101 }
102 iprimcd++;
103 for(n = 1; n < SIMINT_SIMD_LEN; ++n)
104 {
105 if(iprimcd >= nprim_icd && ((icd+1) < nshellbatch))
106 {
107 shelloffsets[n] = shelloffsets[n-1] + 1;
108 lastoffset++;
109 nprim_icd += Q.nprim12[cd + (++icd)];
110 }
111 else
112 shelloffsets[n] = shelloffsets[n-1];
113 iprimcd++;
114 }
115 }
116 else
117 iprimcd += SIMINT_SIMD_LEN;
118
119 // Do we have to compute this vector (or has it been screened out)?
120 // (not_screened != 0 means we have to do this vector)
121 if(check_screen)
122 {
123 const double vmax = vector_max(SIMINT_MUL(bra_screen_max, SIMINT_DBLLOAD(Q.screen, j)));
124 if(vmax < screen_tol)
125 {
126 PRIM_PTR_INT__s_s_s_p += lastoffset*3;
127 continue;
128 }
129 }
130
131 const SIMINT_DBLTYPE Q_alpha = SIMINT_DBLLOAD(Q.alpha, j);
132 const SIMINT_DBLTYPE PQalpha_mul = SIMINT_MUL(P_alpha, Q_alpha);
133 const SIMINT_DBLTYPE PQalpha_sum = SIMINT_ADD(P_alpha, Q_alpha);
134 const SIMINT_DBLTYPE one_over_PQalpha_sum = SIMINT_DIV(const_1, PQalpha_sum);
135
136
137 /* construct R2 = (Px - Qx)**2 + (Py - Qy)**2 + (Pz -Qz)**2 */
138 SIMINT_DBLTYPE PQ[3];
139 PQ[0] = SIMINT_SUB(Pxyz[0], SIMINT_DBLLOAD(Q.x, j));
140 PQ[1] = SIMINT_SUB(Pxyz[1], SIMINT_DBLLOAD(Q.y, j));
141 PQ[2] = SIMINT_SUB(Pxyz[2], SIMINT_DBLLOAD(Q.z, j));
142 SIMINT_DBLTYPE R2 = SIMINT_MUL(PQ[0], PQ[0]);
143 R2 = SIMINT_FMADD(PQ[1], PQ[1], R2);
144 R2 = SIMINT_FMADD(PQ[2], PQ[2], R2);
145
146 const SIMINT_DBLTYPE alpha = SIMINT_MUL(PQalpha_mul, one_over_PQalpha_sum); // alpha from MEST
147 const SIMINT_DBLTYPE one_over_p = SIMINT_DIV(const_1, P_alpha);
148 const SIMINT_DBLTYPE one_over_q = SIMINT_DIV(const_1, Q_alpha);
149 const SIMINT_DBLTYPE one_over_2p = SIMINT_MUL(one_half, one_over_p);
150 const SIMINT_DBLTYPE one_over_2q = SIMINT_MUL(one_half, one_over_q);
151 const SIMINT_DBLTYPE one_over_2pq = SIMINT_MUL(one_half, one_over_PQalpha_sum);
152 const SIMINT_DBLTYPE Q_PB[3] = { SIMINT_DBLLOAD(Q.PB_x, j), SIMINT_DBLLOAD(Q.PB_y, j), SIMINT_DBLLOAD(Q.PB_z, j) };
153
154 SIMINT_DBLTYPE a_over_q = SIMINT_MUL(alpha, one_over_q);
155 SIMINT_DBLTYPE aoq_PQ[3];
156 aoq_PQ[0] = SIMINT_MUL(a_over_q, PQ[0]);
157 aoq_PQ[1] = SIMINT_MUL(a_over_q, PQ[1]);
158 aoq_PQ[2] = SIMINT_MUL(a_over_q, PQ[2]);
159 // Put a minus sign here so we don't have to in RR routines
160 a_over_q = SIMINT_NEG(a_over_q);
161
162
163 //////////////////////////////////////////////
164 // Fjt function section
165 // Maximum v value: 1
166 //////////////////////////////////////////////
167 // The parameter to the Fjt function
168 const SIMINT_DBLTYPE F_x = SIMINT_MUL(R2, alpha);
169
170
171 const SIMINT_DBLTYPE Q_prefac = mask_load(nlane, Q.prefac + j);
172
173
174 boys_F_split(PRIM_INT__s_s_s_s, F_x, 1);
175 SIMINT_DBLTYPE prefac = SIMINT_SQRT(one_over_PQalpha_sum);
176 prefac = SIMINT_MUL(SIMINT_MUL(P_prefac, Q_prefac), prefac);
177 for(n = 0; n <= 1; n++)
178 PRIM_INT__s_s_s_s[n] = SIMINT_MUL(PRIM_INT__s_s_s_s[n], prefac);
179
180 //////////////////////////////////////////////
181 // Primitive integrals: Vertical recurrance
182 //////////////////////////////////////////////
183
184
185
186
187 // Forming PRIM_INT__s_s_s_p[1 * 3];
188 for(n = 0; n < 1; ++n) // loop over orders of auxiliary function
189 {
190
191 PRIM_INT__s_s_s_p[n * 3 + 0] = SIMINT_MUL(Q_PB[0], PRIM_INT__s_s_s_s[n * 1 + 0]);
192 PRIM_INT__s_s_s_p[n * 3 + 0] = SIMINT_FMADD( aoq_PQ[0], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_p[n * 3 + 0]);
193
194 PRIM_INT__s_s_s_p[n * 3 + 1] = SIMINT_MUL(Q_PB[1], PRIM_INT__s_s_s_s[n * 1 + 0]);
195 PRIM_INT__s_s_s_p[n * 3 + 1] = SIMINT_FMADD( aoq_PQ[1], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_p[n * 3 + 1]);
196
197 PRIM_INT__s_s_s_p[n * 3 + 2] = SIMINT_MUL(Q_PB[2], PRIM_INT__s_s_s_s[n * 1 + 0]);
198 PRIM_INT__s_s_s_p[n * 3 + 2] = SIMINT_FMADD( aoq_PQ[2], PRIM_INT__s_s_s_s[(n+1) * 1 + 0], PRIM_INT__s_s_s_p[n * 3 + 2]);
199
200 }
201
202
203
204
205 ////////////////////////////////////
206 // Accumulate contracted integrals
207 ////////////////////////////////////
208 if(lastoffset == 0)
209 {
210 contract_all(3, PRIM_INT__s_s_s_p, PRIM_PTR_INT__s_s_s_p);
211 }
212 else
213 {
214 contract(3, shelloffsets, PRIM_INT__s_s_s_p, PRIM_PTR_INT__s_s_s_p);
215 PRIM_PTR_INT__s_s_s_p += lastoffset*3;
216 }
217
218 } // close loop over j
219 } // close loop over i
220
221 //Advance to the next batch
222 jstart = SIMINT_SIMD_ROUND(jend);
223 abcd += nshellbatch;
224
225 } // close loop cdbatch
226
227 istart = iend;
228 } // close loop over ab
229
230 return P.nshell12_clip * Q.nshell12_clip;
231 }
232
233