1 /* Copyright 2014-2018 The PySCF Developers. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14
15 *
16 * Incore version of non-relativistic integrals JK contraction
17 * ic in CVHFic... is short for incore
18 */
19
20 #include <stdlib.h>
21 #include <math.h>
22 //#include <omp.h>
23 #include "config.h"
24 #include "cvhf.h"
25 #include "np_helper/np_helper.h"
26 #include "fblas.h"
27
28
29 /*
30 * J
31 */
CVHFics8_ij_s2kl_o0(double * eri,double * dm,double * vj,int nao,int ic,int jc)32 void CVHFics8_ij_s2kl_o0(double *eri, double *dm, double *vj,
33 int nao, int ic, int jc)
34 {
35 int i, j, ij;
36 double dm_ij;
37 double vj_ij = 0;
38
39 if (ic > jc) {
40 dm_ij = dm[ic*nao+jc] + dm[jc*nao+ic];
41 } else if (ic == jc) {
42 dm_ij = dm[ic*nao+ic];
43 } else {
44 return;
45 }
46
47 for (i = 0, ij = 0; i < ic; i++) {
48 for (j = 0; j < i; j++, ij++) {
49 vj_ij += eri[ij] *(dm[i*nao+j]+dm[j*nao+i]);
50 vj[i*nao+j] += eri[ij] * dm_ij;
51 }
52 vj_ij += eri[ij] * dm[i*nao+i];
53 vj[i*nao+i] += eri[ij] * dm_ij;
54 ij++;
55 }
56 // i == ic
57 for (j = 0; j < jc; j++, ij++) {
58 vj_ij += eri[ij] *(dm[i*nao+j]+dm[j*nao+i]);
59 vj[i*nao+j] += eri[ij] * dm_ij;
60 }
61 vj_ij += eri[ij] * dm_ij;
62
63 vj[ic*nao+jc] += vj_ij;
64 }
65
CVHFics4_ij_s2kl_o0(double * eri,double * dm,double * vj,int nao,int ic,int jc)66 void CVHFics4_ij_s2kl_o0(double *eri, double *dm, double *vj,
67 int nao, int ic, int jc)
68 {
69 int i, j, ij;
70 double dm_ij;
71
72 if (ic > jc) {
73 dm_ij = dm[ic*nao+jc] + dm[jc*nao+ic];
74 } else if (ic == jc) {
75 dm_ij = dm[ic*nao+ic];
76 } else {
77 return;
78 }
79
80 for (i = 0, ij = 0; i < nao; i++) {
81 for (j = 0; j <= i; j++, ij++) {
82 vj[i*nao+j] += eri[ij] * dm_ij;
83 }
84 }
85 }
86
CVHFics2kl_kl_s1ij_o0(double * eri,double * dm,double * vj,int nao,int ic,int jc)87 void CVHFics2kl_kl_s1ij_o0(double *eri, double *dm, double *vj,
88 int nao, int ic, int jc)
89 {
90 int i, j, ij;
91 double vj_ij = 0;
92 for (i = 0, ij = 0; i < nao; i++) {
93 for (j = 0; j < i; j++, ij++) {
94 vj_ij += eri[ij] *(dm[i*nao+j]+dm[j*nao+i]);
95 }
96 vj_ij += eri[ij] * dm[i*nao+i];
97 ij++;
98 }
99 vj[ic*nao+jc] += vj_ij;
100 }
101
102
103
104 /*
105 * K
106 */
CVHFics8_jk_s1il_o0(double * eri,double * dm,double * vk,int nao,int ic,int jc)107 void CVHFics8_jk_s1il_o0(double *eri, double *dm, double *vk,
108 int nao, int ic, int jc)
109 {
110 int k, l, kl;
111 if (ic > jc) {
112 for (k = 0, kl = 0; k < ic; k++) {
113 for (l = 0; l < k; l++, kl++) {
114 vk[jc*nao+l] += eri[kl] * dm[ic*nao+k];
115 vk[ic*nao+l] += eri[kl] * dm[jc*nao+k];
116 vk[jc*nao+k] += eri[kl] * dm[ic*nao+l];
117 vk[ic*nao+k] += eri[kl] * dm[jc*nao+l];
118 vk[l*nao+jc] += eri[kl] * dm[k*nao+ic];
119 vk[k*nao+jc] += eri[kl] * dm[l*nao+ic];
120 vk[l*nao+ic] += eri[kl] * dm[k*nao+jc];
121 vk[k*nao+ic] += eri[kl] * dm[l*nao+jc];
122 }
123 vk[jc*nao+k] += eri[kl] * dm[ic*nao+k];
124 vk[ic*nao+k] += eri[kl] * dm[jc*nao+k];
125 vk[k*nao+jc] += eri[kl] * dm[k*nao+ic];
126 vk[k*nao+ic] += eri[kl] * dm[k*nao+jc];
127 kl++;
128 }
129 k = ic;
130 for (l = 0; l < jc; l++, kl++) { // l<k
131 vk[jc*nao+l] += eri[kl] * dm[ic*nao+k];
132 vk[ic*nao+l] += eri[kl] * dm[jc*nao+k];
133 vk[jc*nao+k] += eri[kl] * dm[ic*nao+l];
134 vk[ic*nao+k] += eri[kl] * dm[jc*nao+l];
135 vk[l*nao+jc] += eri[kl] * dm[k*nao+ic];
136 vk[k*nao+jc] += eri[kl] * dm[l*nao+ic];
137 vk[l*nao+ic] += eri[kl] * dm[k*nao+jc];
138 vk[k*nao+ic] += eri[kl] * dm[l*nao+jc];
139 }
140 // ic = k, jc = l;
141 vk[jc*nao+jc] += eri[kl] * dm[ic*nao+ic];
142 vk[ic*nao+jc] += eri[kl] * dm[jc*nao+ic];
143 vk[jc*nao+ic] += eri[kl] * dm[ic*nao+jc];
144 vk[ic*nao+ic] += eri[kl] * dm[jc*nao+jc];
145 } else if (ic == jc) {
146 for (k = 0, kl = 0; k < ic; k++) {
147 for (l = 0; l < k; l++, kl++) {
148 vk[ic*nao+l] += eri[kl] * dm[ic*nao+k];
149 vk[ic*nao+k] += eri[kl] * dm[ic*nao+l];
150 vk[l*nao+ic] += eri[kl] * dm[k*nao+ic];
151 vk[k*nao+ic] += eri[kl] * dm[l*nao+ic];
152 }
153 vk[ic*nao+k] += eri[kl] * dm[ic*nao+k];
154 vk[k*nao+ic] += eri[kl] * dm[k*nao+ic];
155 kl++;
156 }
157 k = ic;
158 for (l = 0; l < k; l++, kl++) { // l<k
159 vk[ic*nao+l] += eri[kl] * dm[ic*nao+ic];
160 vk[l*nao+ic] += eri[kl] * dm[ic*nao+ic];
161 vk[ic*nao+ic] += eri[kl] * dm[ic*nao+l];
162 vk[ic*nao+ic] += eri[kl] * dm[l*nao+ic];
163 }
164 // ic = jc = k = l
165 vk[ic*nao+ic] += eri[kl] * dm[ic*nao+ic];
166 }
167 }
168
CVHFics8_jk_s2il_o0(double * eri,double * dm,double * vk,int nao,int ic,int jc)169 void CVHFics8_jk_s2il_o0(double *eri, double *dm, double *vk,
170 int nao, int ic, int jc)
171 {
172 int k, l;
173 //double vk_jj = 0;
174 //double vk_ij = 0;
175 if (ic > jc) {
176 // k < jc
177 for (k=0; k < jc; k++) {
178 for (l = 0; l < k; l++) {
179 vk[jc*nao+l] += eri[l] * dm[ic*nao+k];
180 vk[jc*nao+k] += eri[l] * dm[ic*nao+l];
181 vk[ic*nao+l] += eri[l] * dm[jc*nao+k];
182 vk[ic*nao+k] += eri[l] * dm[jc*nao+l];
183 }
184 // l = k
185 vk[jc*nao+k] += eri[k] * dm[ic*nao+k];
186 vk[ic*nao+k] += eri[k] * dm[jc*nao+k];
187 eri += k + 1;
188 }
189 // k = jc
190 for (l = 0; l < k; l++) {
191 vk[jc*nao+l ] += eri[l] * dm[ic*nao+jc];
192 vk[ic*nao+l ] += eri[l] * dm[jc*nao+jc];
193 vk[jc*nao+jc] += eri[l] *(dm[ic*nao+l] + dm[l*nao+ic]);
194 vk[ic*nao+jc] += eri[l] * dm[jc*nao+l];
195 }
196 // l = k = jc
197 vk[jc*nao+jc] += eri[l] *(dm[ic*nao+jc] + dm[jc*nao+ic]);
198 vk[ic*nao+jc] += eri[l] * dm[jc*nao+jc];
199 eri += k + 1;
200 // k > jc
201 for (k=jc+1; k < ic; k++) {
202 // l < jc
203 for (l = 0; l < jc; l++) {
204 vk[jc*nao+l] += eri[l] * dm[ic*nao+k];
205 vk[ic*nao+l] += eri[l] * dm[jc*nao+k];
206 vk[ic*nao+k] += eri[l] * dm[jc*nao+l];
207 vk[k*nao+jc] += eri[l] * dm[l*nao+ic];
208 }
209 // l = jc
210 vk[jc*nao+jc] += eri[l] *(dm[ic*nao+k] + dm[k*nao+ic]);
211 vk[ic*nao+jc] += eri[l] * dm[jc*nao+k];
212 vk[ic*nao+k] += eri[l] * dm[jc*nao+jc];
213 vk[k*nao+jc] += eri[l] * dm[jc*nao+ic];
214 //eri += jc+1;
215 // l > jc
216 for (l = jc+1; l < k; l++) {
217 vk[ic*nao+l] += eri[l] * dm[jc*nao+k];
218 vk[ic*nao+k] += eri[l] * dm[jc*nao+l];
219 vk[l*nao+jc] += eri[l] * dm[k*nao+ic];
220 vk[k*nao+jc] += eri[l] * dm[l*nao+ic];
221 }
222 // l = k
223 vk[jc*nao+k] += eri[l] * dm[ic*nao+k];
224 vk[ic*nao+k] += eri[l] * dm[jc*nao+k];
225 vk[k*nao+jc] += eri[l] * dm[k*nao+ic];
226 eri += k + 1;
227 }
228 // k = ic
229 for (l = 0; l < jc; l++) {
230 vk[jc*nao+l] += eri[l] * dm[ic*nao+ic];
231 vk[ic*nao+l] += eri[l] * dm[jc*nao+ic];
232 vk[ic*nao+ic] += eri[l] *(dm[jc*nao+l] + dm[l*nao+jc]);
233 vk[ic*nao+jc] += eri[l] * dm[l*nao+ic];
234 }
235 // ic = k, jc = l;
236 vk[jc*nao+jc] += eri[l] * dm[ic*nao+ic];
237 vk[ic*nao+jc] += eri[l] * dm[jc*nao+ic];
238 vk[ic*nao+ic] += eri[l] * dm[jc*nao+jc];
239 eri += jc + 1;
240 } else if (ic == jc) {
241 for (k = 0; k < ic-1; k+=2) {
242 for (l = 0; l < k; l++) {
243 vk[ic*nao+l] += eri[l] * dm[ic*nao+k];
244 vk[ic*nao+k] += eri[l] * dm[ic*nao+l];
245 vk[ic*nao+l ] += eri[l+k+1] * dm[ic*nao+k+1];
246 vk[ic*nao+k+1] += eri[l+k+1] * dm[ic*nao+l ];
247 }
248 vk[ic*nao+k] += eri[k] * dm[ic*nao+k];
249 eri += k+1;
250 vk[ic*nao+k ] += eri[k] * dm[ic*nao+k+1];
251 vk[ic*nao+k+1] += eri[k] * dm[ic*nao+k ];
252 vk[ic*nao+k+1] += eri[k+1] * dm[ic*nao+k+1];
253 eri += k+2;
254 }
255 for (; k < ic; k++) {
256 for (l = 0; l < k; l++) {
257 vk[ic*nao+l] += eri[l] * dm[ic*nao+k];
258 vk[ic*nao+k] += eri[l] * dm[ic*nao+l];
259 }
260 vk[ic*nao+k] += eri[k] * dm[ic*nao+k];
261 eri += k+1;
262 }
263 for (l = 0; l < k; l++) { // l<k
264 vk[ic*nao+l] += eri[l] * dm[ic*nao+ic];
265 vk[ic*nao+ic] += eri[l] *(dm[ic*nao+l] + dm[l*nao+ic]);
266 }
267 // ic = jc = k = l
268 vk[ic*nao+ic] += eri[l] * dm[ic*nao+ic];
269 eri += k + 1;
270 }
271 }
272
CVHFics4_jk_s1il_o0(double * eri,double * dm,double * vk,int nao,int ic,int jc)273 void CVHFics4_jk_s1il_o0(double *eri, double *dm, double *vk,
274 int nao, int ic, int jc)
275 {
276 int k, l, kl;
277 if (ic > jc) {
278 for (k = 0, kl = 0; k < nao; k++) {
279 for (l = 0; l < k; l++, kl++) {
280 vk[jc*nao+l] += eri[kl] * dm[ic*nao+k];
281 vk[jc*nao+k] += eri[kl] * dm[ic*nao+l];
282 vk[ic*nao+l] += eri[kl] * dm[jc*nao+k];
283 vk[ic*nao+k] += eri[kl] * dm[jc*nao+l];
284 }
285 vk[jc*nao+k] += eri[kl] * dm[ic*nao+k];
286 vk[ic*nao+k] += eri[kl] * dm[jc*nao+k];
287 kl++;
288 }
289 } else if (ic == jc) {
290 for (k = 0, kl = 0; k < nao; k++) {
291 for (l = 0; l < k; l++, kl++) {
292 vk[ic*nao+l] += eri[kl] * dm[ic*nao+k];
293 vk[ic*nao+k] += eri[kl] * dm[ic*nao+l];
294 }
295 vk[ic*nao+k] += eri[kl] * dm[ic*nao+k];
296 kl++;
297 }
298 }
299 }
300
CVHFics4_il_s1jk_o0(double * eri,double * dm,double * vk,int nao,int ic,int jc)301 void CVHFics4_il_s1jk_o0(double *eri, double *dm, double *vk,
302 int nao, int ic, int jc)
303 {
304 CVHFics4_jk_s1il_o0(eri, dm, vk, nao, ic, jc);
305 }
306
CVHFics4_jk_s2il_o0(double * eri,double * dm,double * vk,int nao,int ic,int jc)307 void CVHFics4_jk_s2il_o0(double *eri, double *dm, double *vk,
308 int nao, int ic, int jc)
309 {
310 int k, l, kl;
311 if (ic > jc) {
312 for (k = 0, kl = 0; k <= jc; k++) {
313 for (l = 0; l < k; l++, kl++) {
314 vk[jc*nao+l] += eri[kl] * dm[ic*nao+k];
315 vk[jc*nao+k] += eri[kl] * dm[ic*nao+l];
316 vk[ic*nao+l] += eri[kl] * dm[jc*nao+k];
317 vk[ic*nao+k] += eri[kl] * dm[jc*nao+l];
318 }
319 vk[jc*nao+k] += eri[kl] * dm[ic*nao+k];
320 vk[ic*nao+k] += eri[kl] * dm[jc*nao+k];
321 kl++;
322 }
323 for (k = jc+1; k <= ic; k++) {
324 for (l = 0; l <= jc; l++, kl++) {
325 vk[jc*nao+l] += eri[kl] * dm[ic*nao+k];
326 vk[ic*nao+l] += eri[kl] * dm[jc*nao+k];
327 vk[ic*nao+k] += eri[kl] * dm[jc*nao+l];
328 }
329 for (l = jc+1; l < k; l++, kl++) {
330 vk[ic*nao+l] += eri[kl] * dm[jc*nao+k];
331 vk[ic*nao+k] += eri[kl] * dm[jc*nao+l];
332 }
333 vk[ic*nao+k] += eri[kl] * dm[jc*nao+k];
334 kl++;
335 }
336 for (k = ic+1; k < nao; k++) {
337 for (l = 0, kl = k*(k+1)/2; l <= jc; l++, kl++) {
338 vk[jc*nao+l] += eri[kl] * dm[ic*nao+k];
339 vk[ic*nao+l] += eri[kl] * dm[jc*nao+k];
340 }
341 for (l = jc+1; l <= ic; l++, kl++) {
342 vk[ic*nao+l] += eri[kl] * dm[jc*nao+k];
343 }
344 }
345 } else if (ic == jc) {
346 for (k = 0, kl = 0; k <= ic; k++) {
347 for (l = 0; l < k; l++, kl++) {
348 vk[ic*nao+l] += eri[kl] * dm[ic*nao+k];
349 vk[ic*nao+k] += eri[kl] * dm[ic*nao+l];
350 }
351 vk[ic*nao+k] += eri[kl] * dm[ic*nao+k];
352 kl++;
353 }
354 for (k = ic+1; k < nao; k++) {
355 for (l = 0, kl = k*(k+1)/2; l <= ic; l++, kl++) {
356 vk[ic*nao+l] += eri[kl] * dm[ic*nao+k];
357 }
358 }
359 }
360 }
361
CVHFics4_il_s2jk_o0(double * eri,double * dm,double * vk,int nao,int ic,int jc)362 void CVHFics4_il_s2jk_o0(double *eri, double *dm, double *vk,
363 int nao, int ic, int jc)
364 {
365 CVHFics4_jk_s2il_o0(eri, dm, vk, nao, ic, jc);
366 }
367
368
369
370 /*
371 * einsum ijkl,ij->(s2)kl
372 * 8-fold symmetry for eri: i>=j,k>=l,ij>=kl
373 * input address eri of the first element for pair ij=ic*(ic+1)/2+jc
374 * i.e. ~ &eri_ao[ij*(ij+1)/2]
375 * dm can be non-Hermitian,
376 * output vk might not be Hermitian
377 *
378 * NOTE all _s2kl (nrs8_, nrs4_, nrs2kl_) assumes the tril part of eri
379 * being stored in C-order *contiguously*. so call CVHFunpack_nrblock2tril
380 * to generate eris
381 */
CVHFics8_ij_s2kl(double * eri,double * dm,double * vj,int nao,int ic,int jc)382 void CVHFics8_ij_s2kl(double *eri, double *dm, double *vj,
383 int nao, int ic, int jc)
384 {
385 CVHFics8_ij_s2kl_o0(eri, dm, vj, nao, ic, jc);
386 }
387 // tri_dm: fold upper triangular dm to lower triangle,
388 // tri_dm[i*(i+1)/2+j] = dm[i*nao+j] + dm[j*nao+i] for i > j
CVHFics8_tridm_vj(double * eri,double * tri_dm,double * vj,int nao,int ic,int jc)389 void CVHFics8_tridm_vj(double *eri, double *tri_dm, double *vj,
390 int nao, int ic, int jc)
391 {
392 int i, j, ij;
393 double dm_ijc = tri_dm[ic*(ic+1)/2+jc];
394 double vj_ij = 0;
395 const int INC1 = 1;
396 int i1;
397
398 for (i = 0, ij = 0; i < ic; i++) {
399 i1 = i + 1;
400 vj_ij += ddot_(&i1, eri+ij, &INC1, tri_dm+ij, &INC1);
401 daxpy_(&i1, &dm_ijc, eri+ij, &INC1, vj+i*nao, &INC1);
402 ij += i1;
403 }
404 // i == ic
405 for (j = 0; j < jc; j++, ij++) {
406 vj_ij += eri[ij] * tri_dm[ij];
407 vj[i*nao+j] += eri[ij] * dm_ijc;
408 }
409 vj_ij += eri[ij] * dm_ijc;
410
411 vj[ic*nao+jc] += vj_ij;
412 }
CVHFics8_jk_s1il(double * eri,double * dm,double * vk,int nao,int ic,int jc)413 void CVHFics8_jk_s1il(double *eri, double *dm, double *vk,
414 int nao, int ic, int jc)
415 {
416 CVHFics8_jk_s1il_o0(eri, dm, vk, nao, ic, jc);
417 }
418 /*
419 * einsum ijkl,jk->(s2)il
420 * output vk should be Hermitian
421 */
CVHFics8_jk_s2il(double * eri,double * dm,double * vk,int nao,int ic,int jc)422 void CVHFics8_jk_s2il(double *eri, double *dm, double *vk,
423 int nao, int ic, int jc)
424 {
425 CVHFics8_jk_s2il_o0(eri, dm, vk, nao, ic, jc);
426 }
427
428
429 /*
430 * einsum ijkl,jk->il
431 * 4-fold symmetry for eri: i>=j,k>=l
432 */
CVHFics4_jk_s1il(double * eri,double * dm,double * vk,int nao,int ic,int jc)433 void CVHFics4_jk_s1il(double *eri, double *dm, double *vk,
434 int nao, int ic, int jc)
435 {
436 CVHFics4_jk_s1il_o0(eri, dm, vk, nao, ic, jc);
437 }
CVHFics4_il_s1jk(double * eri,double * dm,double * vk,int nao,int ic,int jc)438 void CVHFics4_il_s1jk(double *eri, double *dm, double *vk,
439 int nao, int ic, int jc)
440 {
441 CVHFics4_jk_s1il_o0(eri, dm, vk, nao, ic, jc);
442 }
443 /*
444 * output vk should be Hermitian
445 */
CVHFics4_jk_s2il(double * eri,double * dm,double * vk,int nao,int ic,int jc)446 void CVHFics4_jk_s2il(double *eri, double *dm, double *vk,
447 int nao, int ic, int jc)
448 {
449 CVHFics4_jk_s2il_o0(eri, dm, vk, nao, ic, jc);
450 }
CVHFics4_il_s2jk(double * eri,double * dm,double * vk,int nao,int ic,int jc)451 void CVHFics4_il_s2jk(double *eri, double *dm, double *vk,
452 int nao, int ic, int jc)
453 {
454 CVHFics4_jk_s2il_o0(eri, dm, vk, nao, ic, jc);
455 }
CVHFics4_ij_s2kl(double * eri,double * dm,double * vj,int nao,int ic,int jc)456 void CVHFics4_ij_s2kl(double *eri, double *dm, double *vj,
457 int nao, int ic, int jc)
458 {
459 CVHFics4_ij_s2kl_o0(eri, dm, vj, nao, ic, jc);
460 }
CVHFics4_kl_s2ij(double * eri,double * dm,double * vj,int nao,int ic,int jc)461 void CVHFics4_kl_s2ij(double *eri, double *dm, double *vj,
462 int nao, int ic, int jc)
463 {
464 if (ic >= jc) {
465 CVHFics2kl_kl_s1ij_o0(eri, dm, vj, nao, ic, jc);
466 }
467 }
468
469
CVHFics1_ij_s1kl(double * eri,double * dm,double * vj,int nao,int ic,int jc)470 void CVHFics1_ij_s1kl(double *eri, double *dm, double *vj,
471 int nao, int ic, int jc)
472 {
473 int i;
474 double dm_ij = dm[ic*nao+jc];
475 for (i = 0; i < nao*nao; i++) {
476 vj[i] += eri[i] * dm_ij;
477 }
478 }
CVHFics1_kl_s1ij(double * eri,double * dm,double * vj,int nao,int ic,int jc)479 void CVHFics1_kl_s1ij(double *eri, double *dm, double *vj,
480 int nao, int ic, int jc)
481 {
482 const int INC1 = 1;
483 int nn = nao * nao;
484 vj[ic*nao+jc] += ddot_(&nn, eri, &INC1, dm, &INC1);
485 }
CVHFics1_jk_s1il(double * eri,double * dm,double * vk,int nao,int ic,int jc)486 void CVHFics1_jk_s1il(double *eri, double *dm, double *vk,
487 int nao, int ic, int jc)
488 {
489 int k, l, kl;
490 for (k = 0, kl = 0; k < nao; k++) {
491 for (l = 0; l < nao; l++, kl++) {
492 vk[ic*nao+l] += eri[kl] * dm[jc*nao+k];
493 } }
494 }
CVHFics1_il_s1jk(double * eri,double * dm,double * vk,int nao,int ic,int jc)495 void CVHFics1_il_s1jk(double *eri, double *dm, double *vk,
496 int nao, int ic, int jc)
497 {
498 int k, l, kl;
499 for (k = 0, kl = 0; k < nao; k++) {
500 for (l = 0; l < nao; l++, kl++) {
501 vk[jc*nao+k] += eri[kl] * dm[ic*nao+l];
502 } }
503 }
504
505
CVHFics2ij_ij_s1kl(double * eri,double * dm,double * vj,int nao,int ic,int jc)506 void CVHFics2ij_ij_s1kl(double *eri, double *dm, double *vj,
507 int nao, int ic, int jc)
508 {
509 int i;
510 double dm_ij;
511 if (ic > jc) {
512 dm_ij = dm[ic*nao+jc] + dm[jc*nao+ic];
513 } else if (ic == jc) {
514 dm_ij = dm[ic*nao+ic];
515 } else {
516 return;
517 }
518
519 for (i = 0; i < nao*nao; i++) {
520 vj[i] += eri[i] * dm_ij;
521 }
522 }
CVHFics2ij_kl_s2ij(double * eri,double * dm,double * vj,int nao,int ic,int jc)523 void CVHFics2ij_kl_s2ij(double *eri, double *dm, double *vj,
524 int nao, int ic, int jc)
525 {
526 if (ic < jc) {
527 return;
528 }
529 CVHFics1_kl_s1ij(eri, dm, vj, nao, ic, jc);
530 }
CVHFics2ij_jk_s1il(double * eri,double * dm,double * vk,int nao,int ic,int jc)531 void CVHFics2ij_jk_s1il(double *eri, double *dm, double *vk,
532 int nao, int ic, int jc)
533 {
534 int k, l, kl;
535 if (ic > jc) {
536 for (k = 0, kl = 0; k < nao; k++) {
537 for (l = 0; l < nao; l++, kl++) {
538 vk[jc*nao+l] += eri[kl] * dm[ic*nao+k];
539 vk[ic*nao+l] += eri[kl] * dm[jc*nao+k];
540 }
541 }
542 } else if (ic == jc) {
543 for (k = 0, kl = 0; k < nao; k++) {
544 for (l = 0; l < nao; l++, kl++) {
545 vk[ic*nao+l] += eri[kl] * dm[ic*nao+k];
546 }
547 }
548 }
549 }
CVHFics2ij_il_s1jk(double * eri,double * dm,double * vk,int nao,int ic,int jc)550 void CVHFics2ij_il_s1jk(double *eri, double *dm, double *vk,
551 int nao, int ic, int jc)
552 {
553 int k, l, kl;
554 if (ic > jc) {
555 for (k = 0, kl = 0; k < nao; k++) {
556 for (l = 0; l < nao; l++, kl++) {
557 vk[jc*nao+k] += eri[kl] * dm[ic*nao+l];
558 vk[ic*nao+k] += eri[kl] * dm[jc*nao+l];
559 }
560 }
561 } else if (ic == jc) {
562 for (k = 0, kl = 0; k < nao; k++) {
563 for (l = 0; l < nao; l++, kl++) {
564 vk[ic*nao+k] += eri[kl] * dm[ic*nao+l];
565 }
566 }
567 }
568 }
569
570
CVHFics2kl_ij_s2kl(double * eri,double * dm,double * vj,int nao,int ic,int jc)571 void CVHFics2kl_ij_s2kl(double *eri, double *dm, double *vj,
572 int nao, int ic, int jc)
573 {
574 int i, j, ij;
575 double dm_ij = dm[ic*nao+jc];
576 for (i = 0, ij = 0; i < nao; i++) {
577 for (j = 0; j <= i; j++, ij++) {
578 vj[i*nao+j] += eri[ij] * dm_ij;
579 }
580 }
581 }
CVHFics2kl_kl_s1ij(double * eri,double * dm,double * vj,int nao,int ic,int jc)582 void CVHFics2kl_kl_s1ij(double *eri, double *dm, double *vj,
583 int nao, int ic, int jc)
584 {
585 CVHFics2kl_kl_s1ij_o0(eri, dm, vj, nao, ic, jc);
586 }
CVHFics2kl_jk_s1il(double * eri,double * dm,double * vk,int nao,int ic,int jc)587 void CVHFics2kl_jk_s1il(double *eri, double *dm, double *vk,
588 int nao, int ic, int jc)
589 {
590 int k, l, kl;
591 for (k = 0, kl = 0; k < nao; k++) {
592 for (l = 0; l < k; l++, kl++) {
593 vk[ic*nao+l] += eri[kl] * dm[jc*nao+k];
594 vk[ic*nao+k] += eri[kl] * dm[jc*nao+l];
595 }
596 vk[ic*nao+k] += eri[kl] * dm[jc*nao+k];
597 kl++;
598 }
599 }
CVHFics2kl_il_s1jk(double * eri,double * dm,double * vk,int nao,int ic,int jc)600 void CVHFics2kl_il_s1jk(double *eri, double *dm, double *vk,
601 int nao, int ic, int jc)
602 {
603 int k, l, kl;
604 for (k = 0, kl = 0; k < nao; k++) {
605 for (l = 0; l < k; l++, kl++) {
606 vk[jc*nao+l] += eri[kl] * dm[ic*nao+k];
607 vk[jc*nao+k] += eri[kl] * dm[ic*nao+l];
608 }
609 vk[jc*nao+k] += eri[kl] * dm[ic*nao+k];
610 kl++;
611 }
612 }
613
614
615 /**************************************************
616 * s8 8-fold symmetry: i>=j,k>=l,ij>=kl
617 * s4 4-fold symmetry: i>=j,k>=l
618 * s2ij 2-fold symmetry: i>=j
619 * s2kl 2-fold symmetry: k>=l
620 * s1 no permutation symmetry
621 **************************************************/
622 typedef void (*FjkPtr)(double *eri, double *dm, double *vk,
623 int nao, int ic, int jc);
CVHFnrs8_incore_drv(double * eri,double ** dms,double ** vjk,int n_dm,int nao,void (** fjk)())624 void CVHFnrs8_incore_drv(double *eri, double **dms, double **vjk,
625 int n_dm, int nao, void (**fjk)())
626 {
627 #pragma omp parallel default(none) \
628 shared(eri, dms, vjk, n_dm, nao, fjk)
629 {
630 int i, j, ic;
631 size_t ij, off;
632 size_t npair = nao*(nao+1)/2;
633 size_t nn = nao * nao;
634 double *v_priv = calloc(nn*n_dm, sizeof(double));
635 FjkPtr pf;
636 double *pv;
637 #pragma omp for nowait schedule(dynamic, 4)
638 for (ij = 0; ij < npair; ij++) {
639 i = (int)(sqrt(2*ij+.25) - .5 + 1e-7);
640 j = ij - i*(i+1)/2;
641 off = ij*(ij+1)/2;
642 for (ic = 0; ic < n_dm; ic++) {
643 pf = fjk[ic];
644 pv = v_priv + ic*nn;
645 (*pf)(eri+off, dms[ic], pv, nao, i, j);
646 }
647 }
648 #pragma omp critical
649 {
650 for (ic = 0; ic < n_dm; ic++) {
651 pv = vjk[ic];
652 for (i = 0; i < nn; i++) {
653 pv[i] += v_priv[ic*nn+i];
654 }
655 }
656 }
657 free(v_priv);
658 }
659 }
660
CVHFnrs4_incore_drv(double * eri,double ** dms,double ** vjk,int n_dm,int nao,void (** fjk)())661 void CVHFnrs4_incore_drv(double *eri, double **dms, double **vjk,
662 int n_dm, int nao, void (**fjk)())
663 {
664 #pragma omp parallel default(none) \
665 shared(eri, dms, vjk, n_dm, nao, fjk)
666 {
667 int i, j, ic;
668 size_t ij, off;
669 size_t npair = nao*(nao+1)/2;
670 size_t nn = nao * nao;
671 double *v_priv = calloc(nn*n_dm, sizeof(double));
672 FjkPtr pf;
673 double *pv;
674 #pragma omp for nowait schedule(dynamic, 4)
675 for (ij = 0; ij < npair; ij++) {
676 i = (int)(sqrt(2*ij+.25) - .5 + 1e-7);
677 j = ij - i*(i+1)/2;
678 off = ij * npair;
679 for (ic = 0; ic < n_dm; ic++) {
680 pf = fjk[ic];
681 pv = v_priv + ic*nn;
682 (*pf)(eri+off, dms[ic], pv, nao, i, j);
683 }
684 }
685 #pragma omp critical
686 {
687 for (ic = 0; ic < n_dm; ic++) {
688 pv = vjk[ic];
689 for (i = 0; i < nn; i++) {
690 pv[i] += v_priv[ic*nn+i];
691 }
692 }
693 }
694 free(v_priv);
695 }
696 }
697
CVHFnrs2ij_incore_drv(double * eri,double ** dms,double ** vjk,int n_dm,int nao,void (** fjk)())698 void CVHFnrs2ij_incore_drv(double *eri, double **dms, double **vjk,
699 int n_dm, int nao, void (**fjk)())
700 {
701 #pragma omp parallel default(none) \
702 shared(eri, dms, vjk, n_dm, nao, fjk)
703 {
704 int i, j, ic;
705 size_t ij, off;
706 size_t npair = nao*(nao+1)/2;
707 size_t nn = nao * nao;
708 double *v_priv = calloc(nn*n_dm, sizeof(double));
709 FjkPtr pf;
710 double *pv;
711 #pragma omp for nowait schedule(dynamic, 4)
712 for (ij = 0; ij < npair; ij++) {
713 i = (int)(sqrt(2*ij+.25) - .5 + 1e-7);
714 j = ij - i*(i+1)/2;
715 off = ij * nn;
716 for (ic = 0; ic < n_dm; ic++) {
717 pf = fjk[ic];
718 pv = v_priv + ic*nn;
719 (*pf)(eri+off, dms[ic], pv, nao, i, j);
720 }
721 }
722 #pragma omp critical
723 {
724 for (ic = 0; ic < n_dm; ic++) {
725 pv = vjk[ic];
726 for (i = 0; i < nn; i++) {
727 pv[i] += v_priv[ic*nn+i];
728 }
729 }
730 }
731 free(v_priv);
732 }
733 }
734
CVHFnrs2kl_incore_drv(double * eri,double ** dms,double ** vjk,int n_dm,int nao,void (** fjk)())735 void CVHFnrs2kl_incore_drv(double *eri, double **dms, double **vjk,
736 int n_dm, int nao, void (**fjk)())
737 {
738 #pragma omp parallel default(none) \
739 shared(eri, dms, vjk, n_dm, nao, fjk)
740 {
741 int i, j, ic;
742 size_t ij, off;
743 size_t npair = nao*(nao+1)/2;
744 size_t nn = nao * nao;
745 double *v_priv = calloc(nn*n_dm, sizeof(double));
746 FjkPtr pf;
747 double *pv;
748 #pragma omp for nowait schedule(dynamic, 4)
749 for (ij = 0; ij < nn; ij++) {
750 i = ij / nao;
751 j = ij - i * nao;
752 off = ij * npair;
753 for (ic = 0; ic < n_dm; ic++) {
754 pf = fjk[ic];
755 pv = v_priv + ic*nn;
756 (*pf)(eri+off, dms[ic], pv, nao, i, j);
757 }
758 }
759 #pragma omp critical
760 {
761 for (ic = 0; ic < n_dm; ic++) {
762 pv = vjk[ic];
763 for (i = 0; i < nn; i++) {
764 pv[i] += v_priv[ic*nn+i];
765 }
766 }
767 }
768 free(v_priv);
769 }
770 }
771
CVHFnrs1_incore_drv(double * eri,double ** dms,double ** vjk,int n_dm,int nao,void (** fjk)())772 void CVHFnrs1_incore_drv(double *eri, double **dms, double **vjk,
773 int n_dm, int nao, void (**fjk)())
774 {
775 #pragma omp parallel default(none) \
776 shared(eri, dms, vjk, n_dm, nao, fjk)
777 {
778 int i, j, ic;
779 size_t ij, off;
780 size_t nn = nao * nao;
781 double *v_priv = calloc(nn*n_dm, sizeof(double));
782 FjkPtr pf;
783 double *pv;
784 #pragma omp for nowait schedule(dynamic, 4)
785 for (ij = 0; ij < nn; ij++) {
786 i = ij / nao;
787 j = ij - i * nao;
788 off = ij * nn;
789 for (ic = 0; ic < n_dm; ic++) {
790 pf = fjk[ic];
791 pv = v_priv + ic*nn;
792 (*pf)(eri+off, dms[ic], pv, nao, i, j);
793 }
794 }
795 #pragma omp critical
796 {
797 for (ic = 0; ic < n_dm; ic++) {
798 pv = vjk[ic];
799 for (i = 0; i < nn; i++) {
800 pv[i] += v_priv[ic*nn+i];
801 }
802 }
803 }
804 free(v_priv);
805 }
806 }
807