1 /* Copyright 2014-2018 The PySCF Developers. All Rights Reserved.
2 
3    Licensed under the Apache License, Version 2.0 (the "License");
4     you may not use this file except in compliance with the License.
5     You may obtain a copy of the License at
6 
7         http://www.apache.org/licenses/LICENSE-2.0
8 
9     Unless required by applicable law or agreed to in writing, software
10     distributed under the License is distributed on an "AS IS" BASIS,
11     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12     See the License for the specific language governing permissions and
13     limitations under the License.
14 
15  *
16  * Incore version of non-relativistic integrals JK contraction
17  * ic in CVHFic... is short for incore
18  */
19 
20 #include <stdlib.h>
21 #include <math.h>
22 //#include <omp.h>
23 #include "config.h"
24 #include "cvhf.h"
25 #include "np_helper/np_helper.h"
26 #include "fblas.h"
27 
28 
29 /*
30  * J
31  */
CVHFics8_ij_s2kl_o0(double * eri,double * dm,double * vj,int nao,int ic,int jc)32 void CVHFics8_ij_s2kl_o0(double *eri, double *dm, double *vj,
33                          int nao, int ic, int jc)
34 {
35         int i, j, ij;
36         double dm_ij;
37         double vj_ij = 0;
38 
39         if (ic > jc) {
40                 dm_ij = dm[ic*nao+jc] + dm[jc*nao+ic];
41         } else if (ic == jc) {
42                 dm_ij = dm[ic*nao+ic];
43         } else {
44                 return;
45         }
46 
47         for (i = 0, ij = 0; i < ic; i++) {
48                 for (j = 0; j < i; j++, ij++) {
49                         vj_ij += eri[ij] *(dm[i*nao+j]+dm[j*nao+i]);
50                         vj[i*nao+j] += eri[ij] * dm_ij;
51                 }
52                 vj_ij += eri[ij] * dm[i*nao+i];
53                 vj[i*nao+i] += eri[ij] * dm_ij;
54                 ij++;
55         }
56         // i == ic
57         for (j = 0; j < jc; j++, ij++) {
58                 vj_ij += eri[ij] *(dm[i*nao+j]+dm[j*nao+i]);
59                 vj[i*nao+j] += eri[ij] * dm_ij;
60         }
61         vj_ij += eri[ij] * dm_ij;
62 
63         vj[ic*nao+jc] += vj_ij;
64 }
65 
CVHFics4_ij_s2kl_o0(double * eri,double * dm,double * vj,int nao,int ic,int jc)66 void CVHFics4_ij_s2kl_o0(double *eri, double *dm, double *vj,
67                          int nao, int ic, int jc)
68 {
69         int i, j, ij;
70         double dm_ij;
71 
72         if (ic > jc) {
73                 dm_ij = dm[ic*nao+jc] + dm[jc*nao+ic];
74         } else if (ic == jc) {
75                 dm_ij = dm[ic*nao+ic];
76         } else {
77                 return;
78         }
79 
80         for (i = 0, ij = 0; i < nao; i++) {
81                 for (j = 0; j <= i; j++, ij++) {
82                         vj[i*nao+j] += eri[ij] * dm_ij;
83                 }
84         }
85 }
86 
CVHFics2kl_kl_s1ij_o0(double * eri,double * dm,double * vj,int nao,int ic,int jc)87 void CVHFics2kl_kl_s1ij_o0(double *eri, double *dm, double *vj,
88                            int nao, int ic, int jc)
89 {
90         int i, j, ij;
91         double vj_ij = 0;
92         for (i = 0, ij = 0; i < nao; i++) {
93                 for (j = 0; j < i; j++, ij++) {
94                         vj_ij += eri[ij] *(dm[i*nao+j]+dm[j*nao+i]);
95                 }
96                 vj_ij += eri[ij] * dm[i*nao+i];
97                 ij++;
98         }
99         vj[ic*nao+jc] += vj_ij;
100 }
101 
102 
103 
104 /*
105  * K
106  */
CVHFics8_jk_s1il_o0(double * eri,double * dm,double * vk,int nao,int ic,int jc)107 void CVHFics8_jk_s1il_o0(double *eri, double *dm, double *vk,
108                          int nao, int ic, int jc)
109 {
110         int k, l, kl;
111         if (ic > jc) {
112                 for (k = 0, kl = 0; k < ic; k++) {
113                         for (l = 0; l < k; l++, kl++) {
114                                 vk[jc*nao+l] += eri[kl] * dm[ic*nao+k];
115                                 vk[ic*nao+l] += eri[kl] * dm[jc*nao+k];
116                                 vk[jc*nao+k] += eri[kl] * dm[ic*nao+l];
117                                 vk[ic*nao+k] += eri[kl] * dm[jc*nao+l];
118                                 vk[l*nao+jc] += eri[kl] * dm[k*nao+ic];
119                                 vk[k*nao+jc] += eri[kl] * dm[l*nao+ic];
120                                 vk[l*nao+ic] += eri[kl] * dm[k*nao+jc];
121                                 vk[k*nao+ic] += eri[kl] * dm[l*nao+jc];
122                         }
123                         vk[jc*nao+k] += eri[kl] * dm[ic*nao+k];
124                         vk[ic*nao+k] += eri[kl] * dm[jc*nao+k];
125                         vk[k*nao+jc] += eri[kl] * dm[k*nao+ic];
126                         vk[k*nao+ic] += eri[kl] * dm[k*nao+jc];
127                         kl++;
128                 }
129                 k = ic;
130                 for (l = 0; l < jc; l++, kl++) { // l<k
131                         vk[jc*nao+l] += eri[kl] * dm[ic*nao+k];
132                         vk[ic*nao+l] += eri[kl] * dm[jc*nao+k];
133                         vk[jc*nao+k] += eri[kl] * dm[ic*nao+l];
134                         vk[ic*nao+k] += eri[kl] * dm[jc*nao+l];
135                         vk[l*nao+jc] += eri[kl] * dm[k*nao+ic];
136                         vk[k*nao+jc] += eri[kl] * dm[l*nao+ic];
137                         vk[l*nao+ic] += eri[kl] * dm[k*nao+jc];
138                         vk[k*nao+ic] += eri[kl] * dm[l*nao+jc];
139                 }
140                 // ic = k, jc = l;
141                 vk[jc*nao+jc] += eri[kl] * dm[ic*nao+ic];
142                 vk[ic*nao+jc] += eri[kl] * dm[jc*nao+ic];
143                 vk[jc*nao+ic] += eri[kl] * dm[ic*nao+jc];
144                 vk[ic*nao+ic] += eri[kl] * dm[jc*nao+jc];
145         } else if (ic == jc) {
146                 for (k = 0, kl = 0; k < ic; k++) {
147                         for (l = 0; l < k; l++, kl++) {
148                                 vk[ic*nao+l] += eri[kl] * dm[ic*nao+k];
149                                 vk[ic*nao+k] += eri[kl] * dm[ic*nao+l];
150                                 vk[l*nao+ic] += eri[kl] * dm[k*nao+ic];
151                                 vk[k*nao+ic] += eri[kl] * dm[l*nao+ic];
152                         }
153                         vk[ic*nao+k] += eri[kl] * dm[ic*nao+k];
154                         vk[k*nao+ic] += eri[kl] * dm[k*nao+ic];
155                         kl++;
156                 }
157                 k = ic;
158                 for (l = 0; l < k; l++, kl++) { // l<k
159                         vk[ic*nao+l] += eri[kl] * dm[ic*nao+ic];
160                         vk[l*nao+ic] += eri[kl] * dm[ic*nao+ic];
161                         vk[ic*nao+ic] += eri[kl] * dm[ic*nao+l];
162                         vk[ic*nao+ic] += eri[kl] * dm[l*nao+ic];
163                 }
164                 // ic = jc = k = l
165                 vk[ic*nao+ic] += eri[kl] * dm[ic*nao+ic];
166         }
167 }
168 
CVHFics8_jk_s2il_o0(double * eri,double * dm,double * vk,int nao,int ic,int jc)169 void CVHFics8_jk_s2il_o0(double *eri, double *dm, double *vk,
170                          int nao, int ic, int jc)
171 {
172         int k, l;
173         //double vk_jj = 0;
174         //double vk_ij = 0;
175         if (ic > jc) {
176                 // k < jc
177                 for (k=0; k < jc; k++) {
178                         for (l = 0; l < k; l++) {
179                                 vk[jc*nao+l] += eri[l] * dm[ic*nao+k];
180                                 vk[jc*nao+k] += eri[l] * dm[ic*nao+l];
181                                 vk[ic*nao+l] += eri[l] * dm[jc*nao+k];
182                                 vk[ic*nao+k] += eri[l] * dm[jc*nao+l];
183                         }
184                         // l = k
185                         vk[jc*nao+k] += eri[k] * dm[ic*nao+k];
186                         vk[ic*nao+k] += eri[k] * dm[jc*nao+k];
187                         eri += k + 1;
188                 }
189                 // k = jc
190                 for (l = 0; l < k; l++) {
191                         vk[jc*nao+l ] += eri[l] * dm[ic*nao+jc];
192                         vk[ic*nao+l ] += eri[l] * dm[jc*nao+jc];
193                         vk[jc*nao+jc] += eri[l] *(dm[ic*nao+l] + dm[l*nao+ic]);
194                         vk[ic*nao+jc] += eri[l] * dm[jc*nao+l];
195                 }
196                 // l = k = jc
197                 vk[jc*nao+jc] += eri[l] *(dm[ic*nao+jc] + dm[jc*nao+ic]);
198                 vk[ic*nao+jc] += eri[l] * dm[jc*nao+jc];
199                 eri += k + 1;
200                 // k > jc
201                 for (k=jc+1; k < ic; k++) {
202                         // l < jc
203                         for (l = 0; l < jc; l++) {
204                                 vk[jc*nao+l] += eri[l] * dm[ic*nao+k];
205                                 vk[ic*nao+l] += eri[l] * dm[jc*nao+k];
206                                 vk[ic*nao+k] += eri[l] * dm[jc*nao+l];
207                                 vk[k*nao+jc] += eri[l] * dm[l*nao+ic];
208                         }
209                         // l = jc
210                         vk[jc*nao+jc] += eri[l] *(dm[ic*nao+k] + dm[k*nao+ic]);
211                         vk[ic*nao+jc] += eri[l] * dm[jc*nao+k];
212                         vk[ic*nao+k] += eri[l] * dm[jc*nao+jc];
213                         vk[k*nao+jc] += eri[l] * dm[jc*nao+ic];
214                         //eri += jc+1;
215                         // l > jc
216                         for (l = jc+1; l < k; l++) {
217                                 vk[ic*nao+l] += eri[l] * dm[jc*nao+k];
218                                 vk[ic*nao+k] += eri[l] * dm[jc*nao+l];
219                                 vk[l*nao+jc] += eri[l] * dm[k*nao+ic];
220                                 vk[k*nao+jc] += eri[l] * dm[l*nao+ic];
221                         }
222                         // l = k
223                         vk[jc*nao+k] += eri[l] * dm[ic*nao+k];
224                         vk[ic*nao+k] += eri[l] * dm[jc*nao+k];
225                         vk[k*nao+jc] += eri[l] * dm[k*nao+ic];
226                         eri += k + 1;
227                 }
228                 // k = ic
229                 for (l = 0; l < jc; l++) {
230                         vk[jc*nao+l] += eri[l] * dm[ic*nao+ic];
231                         vk[ic*nao+l] += eri[l] * dm[jc*nao+ic];
232                         vk[ic*nao+ic] += eri[l] *(dm[jc*nao+l] + dm[l*nao+jc]);
233                         vk[ic*nao+jc] += eri[l] * dm[l*nao+ic];
234                 }
235                 // ic = k, jc = l;
236                 vk[jc*nao+jc] += eri[l] * dm[ic*nao+ic];
237                 vk[ic*nao+jc] += eri[l] * dm[jc*nao+ic];
238                 vk[ic*nao+ic] += eri[l] * dm[jc*nao+jc];
239                 eri += jc + 1;
240         } else if (ic == jc) {
241                 for (k = 0; k < ic-1; k+=2) {
242                         for (l = 0; l < k; l++) {
243                                 vk[ic*nao+l] += eri[l] * dm[ic*nao+k];
244                                 vk[ic*nao+k] += eri[l] * dm[ic*nao+l];
245                                 vk[ic*nao+l  ] += eri[l+k+1] * dm[ic*nao+k+1];
246                                 vk[ic*nao+k+1] += eri[l+k+1] * dm[ic*nao+l  ];
247                         }
248                         vk[ic*nao+k] += eri[k] * dm[ic*nao+k];
249                         eri += k+1;
250                         vk[ic*nao+k  ] += eri[k] * dm[ic*nao+k+1];
251                         vk[ic*nao+k+1] += eri[k] * dm[ic*nao+k  ];
252                         vk[ic*nao+k+1] += eri[k+1] * dm[ic*nao+k+1];
253                         eri += k+2;
254                 }
255                 for (; k < ic; k++) {
256                         for (l = 0; l < k; l++) {
257                                 vk[ic*nao+l] += eri[l] * dm[ic*nao+k];
258                                 vk[ic*nao+k] += eri[l] * dm[ic*nao+l];
259                         }
260                         vk[ic*nao+k] += eri[k] * dm[ic*nao+k];
261                         eri += k+1;
262                 }
263                 for (l = 0; l < k; l++) { // l<k
264                         vk[ic*nao+l] += eri[l] * dm[ic*nao+ic];
265                         vk[ic*nao+ic] += eri[l] *(dm[ic*nao+l] + dm[l*nao+ic]);
266                 }
267                 // ic = jc = k = l
268                 vk[ic*nao+ic] += eri[l] * dm[ic*nao+ic];
269                 eri += k + 1;
270         }
271 }
272 
CVHFics4_jk_s1il_o0(double * eri,double * dm,double * vk,int nao,int ic,int jc)273 void CVHFics4_jk_s1il_o0(double *eri, double *dm, double *vk,
274                          int nao, int ic, int jc)
275 {
276         int k, l, kl;
277         if (ic > jc) {
278                 for (k = 0, kl = 0; k < nao; k++) {
279                         for (l = 0; l < k; l++, kl++) {
280                                 vk[jc*nao+l] += eri[kl] * dm[ic*nao+k];
281                                 vk[jc*nao+k] += eri[kl] * dm[ic*nao+l];
282                                 vk[ic*nao+l] += eri[kl] * dm[jc*nao+k];
283                                 vk[ic*nao+k] += eri[kl] * dm[jc*nao+l];
284                         }
285                         vk[jc*nao+k] += eri[kl] * dm[ic*nao+k];
286                         vk[ic*nao+k] += eri[kl] * dm[jc*nao+k];
287                         kl++;
288                 }
289         } else if (ic == jc) {
290                 for (k = 0, kl = 0; k < nao; k++) {
291                         for (l = 0; l < k; l++, kl++) {
292                                 vk[ic*nao+l] += eri[kl] * dm[ic*nao+k];
293                                 vk[ic*nao+k] += eri[kl] * dm[ic*nao+l];
294                         }
295                         vk[ic*nao+k] += eri[kl] * dm[ic*nao+k];
296                         kl++;
297                 }
298         }
299 }
300 
CVHFics4_il_s1jk_o0(double * eri,double * dm,double * vk,int nao,int ic,int jc)301 void CVHFics4_il_s1jk_o0(double *eri, double *dm, double *vk,
302                          int nao, int ic, int jc)
303 {
304         CVHFics4_jk_s1il_o0(eri, dm, vk, nao, ic, jc);
305 }
306 
CVHFics4_jk_s2il_o0(double * eri,double * dm,double * vk,int nao,int ic,int jc)307 void CVHFics4_jk_s2il_o0(double *eri, double *dm, double *vk,
308                          int nao, int ic, int jc)
309 {
310         int k, l, kl;
311         if (ic > jc) {
312                 for (k = 0, kl = 0; k <= jc; k++) {
313                         for (l = 0; l < k; l++, kl++) {
314                                 vk[jc*nao+l] += eri[kl] * dm[ic*nao+k];
315                                 vk[jc*nao+k] += eri[kl] * dm[ic*nao+l];
316                                 vk[ic*nao+l] += eri[kl] * dm[jc*nao+k];
317                                 vk[ic*nao+k] += eri[kl] * dm[jc*nao+l];
318                         }
319                         vk[jc*nao+k] += eri[kl] * dm[ic*nao+k];
320                         vk[ic*nao+k] += eri[kl] * dm[jc*nao+k];
321                         kl++;
322                 }
323                 for (k = jc+1; k <= ic; k++) {
324                         for (l = 0; l <= jc; l++, kl++) {
325                                 vk[jc*nao+l] += eri[kl] * dm[ic*nao+k];
326                                 vk[ic*nao+l] += eri[kl] * dm[jc*nao+k];
327                                 vk[ic*nao+k] += eri[kl] * dm[jc*nao+l];
328                         }
329                         for (l = jc+1; l < k; l++, kl++) {
330                                 vk[ic*nao+l] += eri[kl] * dm[jc*nao+k];
331                                 vk[ic*nao+k] += eri[kl] * dm[jc*nao+l];
332                         }
333                         vk[ic*nao+k] += eri[kl] * dm[jc*nao+k];
334                         kl++;
335                 }
336                 for (k = ic+1; k < nao; k++) {
337                         for (l = 0, kl = k*(k+1)/2; l <= jc; l++, kl++) {
338                                 vk[jc*nao+l] += eri[kl] * dm[ic*nao+k];
339                                 vk[ic*nao+l] += eri[kl] * dm[jc*nao+k];
340                         }
341                         for (l = jc+1; l <= ic; l++, kl++) {
342                                 vk[ic*nao+l] += eri[kl] * dm[jc*nao+k];
343                         }
344                 }
345         } else if (ic == jc) {
346                 for (k = 0, kl = 0; k <= ic; k++) {
347                         for (l = 0; l < k; l++, kl++) {
348                                 vk[ic*nao+l] += eri[kl] * dm[ic*nao+k];
349                                 vk[ic*nao+k] += eri[kl] * dm[ic*nao+l];
350                         }
351                         vk[ic*nao+k] += eri[kl] * dm[ic*nao+k];
352                         kl++;
353                 }
354                 for (k = ic+1; k < nao; k++) {
355                         for (l = 0, kl = k*(k+1)/2; l <= ic; l++, kl++) {
356                                 vk[ic*nao+l] += eri[kl] * dm[ic*nao+k];
357                         }
358                 }
359         }
360 }
361 
CVHFics4_il_s2jk_o0(double * eri,double * dm,double * vk,int nao,int ic,int jc)362 void CVHFics4_il_s2jk_o0(double *eri, double *dm, double *vk,
363                          int nao, int ic, int jc)
364 {
365         CVHFics4_jk_s2il_o0(eri, dm, vk, nao, ic, jc);
366 }
367 
368 
369 
370 /*
371  * einsum ijkl,ij->(s2)kl
372  * 8-fold symmetry for eri: i>=j,k>=l,ij>=kl
373  * input address eri of the first element for pair ij=ic*(ic+1)/2+jc
374  * i.e. ~ &eri_ao[ij*(ij+1)/2]
375  * dm can be non-Hermitian,
376  * output vk might not be Hermitian
377  *
378  * NOTE all _s2kl (nrs8_, nrs4_, nrs2kl_) assumes the tril part of eri
379  * being stored in C-order *contiguously*.  so call CVHFunpack_nrblock2tril
380  * to generate eris
381  */
CVHFics8_ij_s2kl(double * eri,double * dm,double * vj,int nao,int ic,int jc)382 void CVHFics8_ij_s2kl(double *eri, double *dm, double *vj,
383                       int nao, int ic, int jc)
384 {
385         CVHFics8_ij_s2kl_o0(eri, dm, vj, nao, ic, jc);
386 }
387 // tri_dm: fold upper triangular dm to lower triangle,
388 // tri_dm[i*(i+1)/2+j] = dm[i*nao+j] + dm[j*nao+i]  for i > j
CVHFics8_tridm_vj(double * eri,double * tri_dm,double * vj,int nao,int ic,int jc)389 void CVHFics8_tridm_vj(double *eri, double *tri_dm, double *vj,
390                        int nao, int ic, int jc)
391 {
392         int i, j, ij;
393         double dm_ijc = tri_dm[ic*(ic+1)/2+jc];
394         double vj_ij = 0;
395         const int INC1 = 1;
396         int i1;
397 
398         for (i = 0, ij = 0; i < ic; i++) {
399                 i1 = i + 1;
400                 vj_ij += ddot_(&i1, eri+ij, &INC1, tri_dm+ij, &INC1);
401                 daxpy_(&i1, &dm_ijc, eri+ij, &INC1, vj+i*nao, &INC1);
402                 ij += i1;
403         }
404         // i == ic
405         for (j = 0; j < jc; j++, ij++) {
406                 vj_ij += eri[ij] * tri_dm[ij];
407                 vj[i*nao+j] += eri[ij] * dm_ijc;
408         }
409         vj_ij += eri[ij] * dm_ijc;
410 
411         vj[ic*nao+jc] += vj_ij;
412 }
CVHFics8_jk_s1il(double * eri,double * dm,double * vk,int nao,int ic,int jc)413 void CVHFics8_jk_s1il(double *eri, double *dm, double *vk,
414                       int nao, int ic, int jc)
415 {
416         CVHFics8_jk_s1il_o0(eri, dm, vk, nao, ic, jc);
417 }
418 /*
419  * einsum ijkl,jk->(s2)il
420  * output vk should be Hermitian
421  */
CVHFics8_jk_s2il(double * eri,double * dm,double * vk,int nao,int ic,int jc)422 void CVHFics8_jk_s2il(double *eri, double *dm, double *vk,
423                       int nao, int ic, int jc)
424 {
425         CVHFics8_jk_s2il_o0(eri, dm, vk, nao, ic, jc);
426 }
427 
428 
429 /*
430  * einsum ijkl,jk->il
431  * 4-fold symmetry for eri: i>=j,k>=l
432  */
CVHFics4_jk_s1il(double * eri,double * dm,double * vk,int nao,int ic,int jc)433 void CVHFics4_jk_s1il(double *eri, double *dm, double *vk,
434                       int nao, int ic, int jc)
435 {
436         CVHFics4_jk_s1il_o0(eri, dm, vk, nao, ic, jc);
437 }
CVHFics4_il_s1jk(double * eri,double * dm,double * vk,int nao,int ic,int jc)438 void CVHFics4_il_s1jk(double *eri, double *dm, double *vk,
439                       int nao, int ic, int jc)
440 {
441         CVHFics4_jk_s1il_o0(eri, dm, vk, nao, ic, jc);
442 }
443 /*
444  * output vk should be Hermitian
445  */
CVHFics4_jk_s2il(double * eri,double * dm,double * vk,int nao,int ic,int jc)446 void CVHFics4_jk_s2il(double *eri, double *dm, double *vk,
447                       int nao, int ic, int jc)
448 {
449         CVHFics4_jk_s2il_o0(eri, dm, vk, nao, ic, jc);
450 }
CVHFics4_il_s2jk(double * eri,double * dm,double * vk,int nao,int ic,int jc)451 void CVHFics4_il_s2jk(double *eri, double *dm, double *vk,
452                       int nao, int ic, int jc)
453 {
454         CVHFics4_jk_s2il_o0(eri, dm, vk, nao, ic, jc);
455 }
CVHFics4_ij_s2kl(double * eri,double * dm,double * vj,int nao,int ic,int jc)456 void CVHFics4_ij_s2kl(double *eri, double *dm, double *vj,
457                       int nao, int ic, int jc)
458 {
459         CVHFics4_ij_s2kl_o0(eri, dm, vj, nao, ic, jc);
460 }
CVHFics4_kl_s2ij(double * eri,double * dm,double * vj,int nao,int ic,int jc)461 void CVHFics4_kl_s2ij(double *eri, double *dm, double *vj,
462                       int nao, int ic, int jc)
463 {
464         if (ic >= jc) {
465                 CVHFics2kl_kl_s1ij_o0(eri, dm, vj, nao, ic, jc);
466         }
467 }
468 
469 
CVHFics1_ij_s1kl(double * eri,double * dm,double * vj,int nao,int ic,int jc)470 void CVHFics1_ij_s1kl(double *eri, double *dm, double *vj,
471                       int nao, int ic, int jc)
472 {
473         int i;
474         double dm_ij = dm[ic*nao+jc];
475         for (i = 0; i < nao*nao; i++) {
476                 vj[i] += eri[i] * dm_ij;
477         }
478 }
CVHFics1_kl_s1ij(double * eri,double * dm,double * vj,int nao,int ic,int jc)479 void CVHFics1_kl_s1ij(double *eri, double *dm, double *vj,
480                       int nao, int ic, int jc)
481 {
482         const int INC1 = 1;
483         int nn = nao * nao;
484         vj[ic*nao+jc] += ddot_(&nn, eri, &INC1, dm, &INC1);
485 }
CVHFics1_jk_s1il(double * eri,double * dm,double * vk,int nao,int ic,int jc)486 void CVHFics1_jk_s1il(double *eri, double *dm, double *vk,
487                       int nao, int ic, int jc)
488 {
489         int k, l, kl;
490         for (k = 0, kl = 0; k < nao; k++) {
491         for (l = 0; l < nao; l++, kl++) {
492                 vk[ic*nao+l] += eri[kl] * dm[jc*nao+k];
493         } }
494 }
CVHFics1_il_s1jk(double * eri,double * dm,double * vk,int nao,int ic,int jc)495 void CVHFics1_il_s1jk(double *eri, double *dm, double *vk,
496                       int nao, int ic, int jc)
497 {
498         int k, l, kl;
499         for (k = 0, kl = 0; k < nao; k++) {
500         for (l = 0; l < nao; l++, kl++) {
501                 vk[jc*nao+k] += eri[kl] * dm[ic*nao+l];
502         } }
503 }
504 
505 
CVHFics2ij_ij_s1kl(double * eri,double * dm,double * vj,int nao,int ic,int jc)506 void CVHFics2ij_ij_s1kl(double *eri, double *dm, double *vj,
507                         int nao, int ic, int jc)
508 {
509         int i;
510         double dm_ij;
511         if (ic > jc) {
512                 dm_ij = dm[ic*nao+jc] + dm[jc*nao+ic];
513         } else if (ic == jc) {
514                 dm_ij = dm[ic*nao+ic];
515         } else {
516                 return;
517         }
518 
519         for (i = 0; i < nao*nao; i++) {
520                 vj[i] += eri[i] * dm_ij;
521         }
522 }
CVHFics2ij_kl_s2ij(double * eri,double * dm,double * vj,int nao,int ic,int jc)523 void CVHFics2ij_kl_s2ij(double *eri, double *dm, double *vj,
524                         int nao, int ic, int jc)
525 {
526         if (ic < jc) {
527                 return;
528         }
529         CVHFics1_kl_s1ij(eri, dm, vj, nao, ic, jc);
530 }
CVHFics2ij_jk_s1il(double * eri,double * dm,double * vk,int nao,int ic,int jc)531 void CVHFics2ij_jk_s1il(double *eri, double *dm, double *vk,
532                         int nao, int ic, int jc)
533 {
534         int k, l, kl;
535         if (ic > jc) {
536                 for (k = 0, kl = 0; k < nao; k++) {
537                         for (l = 0; l < nao; l++, kl++) {
538                                 vk[jc*nao+l] += eri[kl] * dm[ic*nao+k];
539                                 vk[ic*nao+l] += eri[kl] * dm[jc*nao+k];
540                         }
541                 }
542         } else if (ic == jc) {
543                 for (k = 0, kl = 0; k < nao; k++) {
544                         for (l = 0; l < nao; l++, kl++) {
545                                 vk[ic*nao+l] += eri[kl] * dm[ic*nao+k];
546                         }
547                 }
548         }
549 }
CVHFics2ij_il_s1jk(double * eri,double * dm,double * vk,int nao,int ic,int jc)550 void CVHFics2ij_il_s1jk(double *eri, double *dm, double *vk,
551                         int nao, int ic, int jc)
552 {
553         int k, l, kl;
554         if (ic > jc) {
555                 for (k = 0, kl = 0; k < nao; k++) {
556                         for (l = 0; l < nao; l++, kl++) {
557                                 vk[jc*nao+k] += eri[kl] * dm[ic*nao+l];
558                                 vk[ic*nao+k] += eri[kl] * dm[jc*nao+l];
559                         }
560                 }
561         } else if (ic == jc) {
562                 for (k = 0, kl = 0; k < nao; k++) {
563                         for (l = 0; l < nao; l++, kl++) {
564                                 vk[ic*nao+k] += eri[kl] * dm[ic*nao+l];
565                         }
566                 }
567         }
568 }
569 
570 
CVHFics2kl_ij_s2kl(double * eri,double * dm,double * vj,int nao,int ic,int jc)571 void CVHFics2kl_ij_s2kl(double *eri, double *dm, double *vj,
572                         int nao, int ic, int jc)
573 {
574         int i, j, ij;
575         double dm_ij = dm[ic*nao+jc];
576         for (i = 0, ij = 0; i < nao; i++) {
577                 for (j = 0; j <= i; j++, ij++) {
578                         vj[i*nao+j] += eri[ij] * dm_ij;
579                 }
580         }
581 }
CVHFics2kl_kl_s1ij(double * eri,double * dm,double * vj,int nao,int ic,int jc)582 void CVHFics2kl_kl_s1ij(double *eri, double *dm, double *vj,
583                         int nao, int ic, int jc)
584 {
585         CVHFics2kl_kl_s1ij_o0(eri, dm, vj, nao, ic, jc);
586 }
CVHFics2kl_jk_s1il(double * eri,double * dm,double * vk,int nao,int ic,int jc)587 void CVHFics2kl_jk_s1il(double *eri, double *dm, double *vk,
588                         int nao, int ic, int jc)
589 {
590         int k, l, kl;
591         for (k = 0, kl = 0; k < nao; k++) {
592                 for (l = 0; l < k; l++, kl++) {
593                         vk[ic*nao+l] += eri[kl] * dm[jc*nao+k];
594                         vk[ic*nao+k] += eri[kl] * dm[jc*nao+l];
595                 }
596                 vk[ic*nao+k] += eri[kl] * dm[jc*nao+k];
597                 kl++;
598         }
599 }
CVHFics2kl_il_s1jk(double * eri,double * dm,double * vk,int nao,int ic,int jc)600 void CVHFics2kl_il_s1jk(double *eri, double *dm, double *vk,
601                         int nao, int ic, int jc)
602 {
603         int k, l, kl;
604         for (k = 0, kl = 0; k < nao; k++) {
605                 for (l = 0; l < k; l++, kl++) {
606                         vk[jc*nao+l] += eri[kl] * dm[ic*nao+k];
607                         vk[jc*nao+k] += eri[kl] * dm[ic*nao+l];
608                 }
609                 vk[jc*nao+k] += eri[kl] * dm[ic*nao+k];
610                 kl++;
611         }
612 }
613 
614 
615 /**************************************************
616  * s8   8-fold symmetry: i>=j,k>=l,ij>=kl
617  * s4   4-fold symmetry: i>=j,k>=l
618  * s2ij 2-fold symmetry: i>=j
619  * s2kl 2-fold symmetry: k>=l
620  * s1   no permutation symmetry
621  **************************************************/
622 typedef void (*FjkPtr)(double *eri, double *dm, double *vk,
623                        int nao, int ic, int jc);
CVHFnrs8_incore_drv(double * eri,double ** dms,double ** vjk,int n_dm,int nao,void (** fjk)())624 void CVHFnrs8_incore_drv(double *eri, double **dms, double **vjk,
625                          int n_dm, int nao, void (**fjk)())
626 {
627 #pragma omp parallel default(none) \
628         shared(eri, dms, vjk, n_dm, nao, fjk)
629         {
630                 int i, j, ic;
631                 size_t ij, off;
632                 size_t npair = nao*(nao+1)/2;
633                 size_t nn = nao * nao;
634                 double *v_priv = calloc(nn*n_dm, sizeof(double));
635                 FjkPtr pf;
636                 double *pv;
637 #pragma omp for nowait schedule(dynamic, 4)
638                 for (ij = 0; ij < npair; ij++) {
639                         i = (int)(sqrt(2*ij+.25) - .5 + 1e-7);
640                         j = ij - i*(i+1)/2;
641                         off = ij*(ij+1)/2;
642                         for (ic = 0; ic < n_dm; ic++) {
643                                 pf = fjk[ic];
644                                 pv = v_priv + ic*nn;
645                                 (*pf)(eri+off, dms[ic], pv, nao, i, j);
646                         }
647                 }
648 #pragma omp critical
649                 {
650                         for (ic = 0; ic < n_dm; ic++) {
651                                 pv = vjk[ic];
652                                 for (i = 0; i < nn; i++) {
653                                         pv[i] += v_priv[ic*nn+i];
654                                 }
655                         }
656                 }
657                 free(v_priv);
658         }
659 }
660 
CVHFnrs4_incore_drv(double * eri,double ** dms,double ** vjk,int n_dm,int nao,void (** fjk)())661 void CVHFnrs4_incore_drv(double *eri, double **dms, double **vjk,
662                          int n_dm, int nao, void (**fjk)())
663 {
664 #pragma omp parallel default(none) \
665         shared(eri, dms, vjk, n_dm, nao, fjk)
666         {
667                 int i, j, ic;
668                 size_t ij, off;
669                 size_t npair = nao*(nao+1)/2;
670                 size_t nn = nao * nao;
671                 double *v_priv = calloc(nn*n_dm, sizeof(double));
672                 FjkPtr pf;
673                 double *pv;
674 #pragma omp for nowait schedule(dynamic, 4)
675                 for (ij = 0; ij < npair; ij++) {
676                         i = (int)(sqrt(2*ij+.25) - .5 + 1e-7);
677                         j = ij - i*(i+1)/2;
678                         off = ij * npair;
679                         for (ic = 0; ic < n_dm; ic++) {
680                                 pf = fjk[ic];
681                                 pv = v_priv + ic*nn;
682                                 (*pf)(eri+off, dms[ic], pv, nao, i, j);
683                         }
684                 }
685 #pragma omp critical
686                 {
687                         for (ic = 0; ic < n_dm; ic++) {
688                                 pv = vjk[ic];
689                                 for (i = 0; i < nn; i++) {
690                                         pv[i] += v_priv[ic*nn+i];
691                                 }
692                         }
693                 }
694                 free(v_priv);
695         }
696 }
697 
CVHFnrs2ij_incore_drv(double * eri,double ** dms,double ** vjk,int n_dm,int nao,void (** fjk)())698 void CVHFnrs2ij_incore_drv(double *eri, double **dms, double **vjk,
699                            int n_dm, int nao, void (**fjk)())
700 {
701 #pragma omp parallel default(none) \
702         shared(eri, dms, vjk, n_dm, nao, fjk)
703         {
704                 int i, j, ic;
705                 size_t ij, off;
706                 size_t npair = nao*(nao+1)/2;
707                 size_t nn = nao * nao;
708                 double *v_priv = calloc(nn*n_dm, sizeof(double));
709                 FjkPtr pf;
710                 double *pv;
711 #pragma omp for nowait schedule(dynamic, 4)
712                 for (ij = 0; ij < npair; ij++) {
713                         i = (int)(sqrt(2*ij+.25) - .5 + 1e-7);
714                         j = ij - i*(i+1)/2;
715                         off = ij * nn;
716                         for (ic = 0; ic < n_dm; ic++) {
717                                 pf = fjk[ic];
718                                 pv = v_priv + ic*nn;
719                                 (*pf)(eri+off, dms[ic], pv, nao, i, j);
720                         }
721                 }
722 #pragma omp critical
723                 {
724                         for (ic = 0; ic < n_dm; ic++) {
725                                 pv = vjk[ic];
726                                 for (i = 0; i < nn; i++) {
727                                         pv[i] += v_priv[ic*nn+i];
728                                 }
729                         }
730                 }
731                 free(v_priv);
732         }
733 }
734 
CVHFnrs2kl_incore_drv(double * eri,double ** dms,double ** vjk,int n_dm,int nao,void (** fjk)())735 void CVHFnrs2kl_incore_drv(double *eri, double **dms, double **vjk,
736                            int n_dm, int nao, void (**fjk)())
737 {
738 #pragma omp parallel default(none) \
739         shared(eri, dms, vjk, n_dm, nao, fjk)
740         {
741                 int i, j, ic;
742                 size_t ij, off;
743                 size_t npair = nao*(nao+1)/2;
744                 size_t nn = nao * nao;
745                 double *v_priv = calloc(nn*n_dm, sizeof(double));
746                 FjkPtr pf;
747                 double *pv;
748 #pragma omp for nowait schedule(dynamic, 4)
749                 for (ij = 0; ij < nn; ij++) {
750                         i = ij / nao;
751                         j = ij - i * nao;
752                         off = ij * npair;
753                         for (ic = 0; ic < n_dm; ic++) {
754                                 pf = fjk[ic];
755                                 pv = v_priv + ic*nn;
756                                 (*pf)(eri+off, dms[ic], pv, nao, i, j);
757                         }
758                 }
759 #pragma omp critical
760                 {
761                         for (ic = 0; ic < n_dm; ic++) {
762                                 pv = vjk[ic];
763                                 for (i = 0; i < nn; i++) {
764                                         pv[i] += v_priv[ic*nn+i];
765                                 }
766                         }
767                 }
768                 free(v_priv);
769         }
770 }
771 
CVHFnrs1_incore_drv(double * eri,double ** dms,double ** vjk,int n_dm,int nao,void (** fjk)())772 void CVHFnrs1_incore_drv(double *eri, double **dms, double **vjk,
773                          int n_dm, int nao, void (**fjk)())
774 {
775 #pragma omp parallel default(none) \
776         shared(eri, dms, vjk, n_dm, nao, fjk)
777         {
778                 int i, j, ic;
779                 size_t ij, off;
780                 size_t nn = nao * nao;
781                 double *v_priv = calloc(nn*n_dm, sizeof(double));
782                 FjkPtr pf;
783                 double *pv;
784 #pragma omp for nowait schedule(dynamic, 4)
785                 for (ij = 0; ij < nn; ij++) {
786                         i = ij / nao;
787                         j = ij - i * nao;
788                         off = ij * nn;
789                         for (ic = 0; ic < n_dm; ic++) {
790                                 pf = fjk[ic];
791                                 pv = v_priv + ic*nn;
792                                 (*pf)(eri+off, dms[ic], pv, nao, i, j);
793                         }
794                 }
795 #pragma omp critical
796                 {
797                         for (ic = 0; ic < n_dm; ic++) {
798                                 pv = vjk[ic];
799                                 for (i = 0; i < nn; i++) {
800                                         pv[i] += v_priv[ic*nn+i];
801                                 }
802                         }
803                 }
804                 free(v_priv);
805         }
806 }
807