1 /*----------------------------------------------------------------------
2 exx_interface_openmx.c:
3
4 07/JAN/2010 Coded by M. Toyoda
5 ----------------------------------------------------------------------*/
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <math.h>
9 #include <time.h>
10 #include "openmx_common.h"
11 #include "exx.h"
12 #include "exx_log.h"
13 #include "exx_debug.h"
14 #include "exx_interface_openmx.h"
15 #include "exx_step1.h"
16 #include "exx_step2.h"
17 #include "exx_vector.h"
18 #include "exx_index.h"
19 #include "exx_file_eri.h"
20 #include "exx_rhox.h"
21
22 #define PATHLEN 256
23
24 static int g_step = 0;
25
26 MPI_Comm g_exx_mpicomm = MPI_COMM_NULL;
27
28 dcomplex *****g_exx_DM;
29 double g_exx_U[2];
30 int g_exx_switch;
31 int g_exx_switch_output_DM;
32 int g_exx_switch_rhox;
33
34 static double time_step1;
35 static double time_step2;
36
37
38 /*----------------------------------------------------------------------
39 Initialization of EXX for OpenMX
40 ----------------------------------------------------------------------*/
EXX_on_OpenMX_Init(MPI_Comm comm)41 void EXX_on_OpenMX_Init(MPI_Comm comm)
42 {
43 int i, j, l, m, k, system_type, myrank, nproc;
44
45 double pvec[9]; /* primitive translational vectors */
46 double v[3];
47 int natom, nspec;
48 double *atom_v, *spec_rc;
49 int *atom_sp, *spec_nb;
50 double w_scr, rc_cut;
51
52 int nbmax;
53 int **spec_l, **spec_m, **spec_n, *spec_nmesh;
54 double ****pao_fr, **pao_xr;
55
56 int lmax, mul, nmesh, nb, nmul, nm;
57
58 int iep, nep, Gc_AN, Cwan, tno0, Gh_AN, Hwan, tno1;
59 const int *exx_atm1, *exx_atm2;
60
61 double time0, time1;
62
63 /* MPI information */
64 g_exx_mpicomm = comm;
65 MPI_Comm_size(g_exx_mpicomm, &nproc);
66 MPI_Comm_rank(g_exx_mpicomm, &myrank);
67
68 /* start logging */
69 EXX_Log_Open();
70
71 #ifdef EXX_USE_MPI
72 EXX_Log_Print("MPI MODE (NPROC= %d)\n", nproc);
73 #else
74 EXX_Log_Print("SERIAL MODE\n");
75 #endif
76 EXX_Log_Print(" MYRANK= %d\n", myrank);
77 EXX_Log_Print("\n");
78
79 /* solver type */
80 switch (Solver) {
81 case 2: /* cluster */
82 EXX_Log_Print("SOLVER= CLUSTER\n");
83 system_type = EXX_SYSTEM_CLUSTER;
84 rc_cut = 0.0;
85 w_scr = 0.0;
86 break;
87 case 3: /* band */
88 EXX_Log_Print("SOLVER= PERIODIC\n");
89 system_type = EXX_SYSTEM_PERIODIC;
90 rc_cut = 10.0 / BohrR; /* 10 Angstrom */
91 w_scr = 0.0;
92 break;
93 default:
94 EXX_ERROR("EXX is available for 'CLUSTER' or 'BAND' solvers.");
95 }
96
97 /* user values */
98 if (g_exx_rc_cut > 1e-10) { rc_cut = g_exx_rc_cut; }
99 if (g_exx_w_scr > 1e-10) { w_scr = g_exx_w_scr; }
100
101 if (EXX_SYSTEM_PERIODIC==system_type && rc_cut <1e-10) {
102 EXX_ERROR("finite rc_cut is required for periodic mode.");
103 }
104
105 /* swithces */
106 EXX_Log_Print("SKIPPING STEP1 = %d\n", g_exx_skip1);
107 EXX_Log_Print("SKIPPING STEP2 = %d\n", g_exx_skip2);
108 EXX_Log_Print("CACHDIR= %s\n", g_exx_cachedir);
109 EXX_Log_Print("LIBERI_LMAX= %d\n", g_exx_liberi_lmax);
110 EXX_Log_Print("LIBERI_NGRID= %d\n", g_exx_liberi_ngrid);
111 EXX_Log_Print("LIBERI_NGL= %d\n", g_exx_liberi_ngl);
112 EXX_Log_Print("OUTPUT DM= %d\n", g_exx_switch_output_DM);
113 EXX_Log_Print("\n");
114
115 /* primitive translational vectors */
116 pvec[0] = tv[1][1]; pvec[1] = tv[1][2]; pvec[2] = tv[1][3];
117 pvec[3] = tv[2][1]; pvec[4] = tv[2][2]; pvec[5] = tv[2][3];
118 pvec[6] = tv[3][1]; pvec[7] = tv[3][2]; pvec[8] = tv[3][3];
119
120 natom = atomnum;
121 nspec = SpeciesNum;
122
123 /* allocation */
124 atom_v = (double*)malloc(sizeof(double)*3*natom);
125 atom_sp = (int*)malloc(sizeof(int)*natom);
126 spec_rc = (double*)malloc(sizeof(double)*nspec);
127 spec_nb = (int*)malloc(sizeof(int)*nspec);
128
129 /* atomic positions */
130 for (i=0; i<natom; i++) {
131 v[0] = Gxyz[i+1][1];
132 v[1] = Gxyz[i+1][2];
133 v[2] = Gxyz[i+1][3];
134
135 EXX_Vector_C2F(&atom_v[3*i], v, pvec);
136 }
137
138 /* species */
139 for (i=0; i<natom; i++) { atom_sp[i] = WhatSpecies[i+1]; }
140
141 /* cut-off radii */
142 for (i=0; i<nspec; i++) { spec_rc[i] = Spe_Atom_Cut1[i]; }
143
144 /* number of basis */
145 nbmax = 0;
146 for (i=0; i<nspec; i++) {
147 spec_nb[i] = 0;
148 for (l=0; l<=Spe_MaxL_Basis[i]; l++) {
149 mul = Spe_Num_Basis[i][l];
150 spec_nb[i] += mul * (2*l+1);
151 }
152 if (nbmax<spec_nb[i]) { nbmax = spec_nb[i]; }
153 }
154
155 /* PAO wavefunction */
156 spec_l = (int**)malloc(sizeof(int*)*nspec);
157 spec_m = (int**)malloc(sizeof(int*)*nspec);
158 spec_n = (int**)malloc(sizeof(int*)*nspec);
159 for (i=0; i<nspec; i++) {
160 spec_l[i] = (int*)malloc(sizeof(int)*nbmax);
161 spec_m[i] = (int*)malloc(sizeof(int)*nbmax);
162 spec_n[i] = (int*)malloc(sizeof(int)*nbmax);
163 }
164 spec_nmesh = (int*)malloc(sizeof(int)*nspec);
165 pao_fr = (double****)malloc(sizeof(double***)*nspec);
166 pao_xr = (double**)malloc(sizeof(double*)*nspec);
167 for (i=0; i<nspec; i++) {
168 lmax = Spe_PAO_LMAX[i]+1;
169 mul = Spe_PAO_Mul[i];
170 nmesh = Spe_Num_Mesh_PAO[i];
171 pao_fr[i] = (double***)malloc(sizeof(double**)*lmax);
172 for (l=0; l<lmax; l++) {
173 nb = mul*(2*l+1);
174 pao_fr[i][l] = (double**)malloc(sizeof(double*)*nb);
175 for (m=0; m<nb; m++) {
176 pao_fr[i][l][m] = (double*)malloc(sizeof(double)*nmesh);
177 }
178 }
179 pao_xr[i] = (double*)malloc(sizeof(double)*nmesh);
180 }
181
182 /* rotating angular momentum quantum numbers */
183 for (i=0; i<nspec; i++) {
184 j = 0;
185 for (l=0; l<=Spe_MaxL_Basis[i]; l++) {
186 nmul = Spe_Num_Basis[i][l];
187 nm = 2*l+1;
188 for (mul=0; mul<nmul; mul++) {
189 for (m=0; m<nm; m++) {
190 spec_l[i][j] = l;
191 if (l==1) {
192 switch (m) {
193 case 0: spec_m[i][j] = +1; break;
194 case 1: spec_m[i][j] = -1; break;
195 case 2: spec_m[i][j] = 0; break;
196 default:
197 EXX_ERROR("");
198 }
199 } else if (l==2) {
200 switch (m) {
201 case 0: spec_m[i][j] = 0; break;
202 case 1: spec_m[i][j] = +2; break;
203 case 2: spec_m[i][j] = -2; break;
204 case 3: spec_m[i][j] = +1; break;
205 case 4: spec_m[i][j] = -1; break;
206 default:
207 EXX_ERROR("");
208 }
209 } else {
210 if (m==0) {
211 spec_m[i][j] = 0;
212 } else {
213 if (0==m%2) {
214 spec_m[i][j] = -(m+1)/2;
215 } else {
216 spec_m[i][j] = +(m+1)/2;
217 }
218 }
219 }
220 spec_n[i][j] = mul;
221 j++;
222 }
223 }
224 }
225 spec_nmesh[i] = Spe_Num_Mesh_PAO[i];
226
227 lmax = Spe_PAO_LMAX[i]+1;
228 mul = Spe_PAO_Mul[i];
229 nmesh = Spe_Num_Mesh_PAO[i];
230
231 for (l=0; l<lmax; l++) {
232 for (m=0; m<mul; m++) {
233 for (k=0; k<nmesh; k++) {
234 pao_fr[i][l][m][k] = Spe_PAO_RWF[i][l][m][k];
235 }
236 }
237 }
238
239 for (k=0; k<nmesh; k++) {
240 pao_xr[i][k] = Spe_PAO_RV[i][k];
241 }
242 }
243
244 /* logging */
245 EXX_Log_Print("Species:\n");
246 for (i=0; i<nspec; i++) {
247 for (j=0; j<spec_nb[i]; j++) {
248 EXX_Log_Print(" %3d, %3d : %3d %3d %3d\n",
249 i, j, spec_l[i][j], spec_m[i][j], spec_n[i][j]);
250 }
251 }
252 EXX_Log_Print("\n");
253
254 MPI_Barrier(g_exx_mpicomm);
255
256 g_exx = EXX_New(natom, atom_v, atom_sp, nspec, spec_rc, spec_nb,
257 pvec, w_scr, rc_cut, system_type, g_exx_cachedir);
258
259 if (Host_ID==myrank) { printf("<EXX_Init> EXX Initialized\n"); }
260
261 EXX_Log_Print("EXX Initiatlized:\n");
262 EXX_Log_Print(" natom= %d\n", EXX_natom(g_exx));
263 EXX_Log_Print(" nbmax= %d\n", EXX_nbmax(g_exx));
264 EXX_Log_Print("\n");
265
266 EXX_Log_Print("Atoms:\n");
267 EXX_Log_Print(" # : X Y Z RC NB\n");
268 for (i=0; i<EXX_natom(g_exx); i++) {
269 EXX_Log_Print(" %3d : %8.4f %8.4f %8.4f %8.4f %2d\n",
270 i, EXX_atom_v(g_exx)[3*i+0], EXX_atom_v(g_exx)[3*i+1],
271 EXX_atom_v(g_exx)[3*i+2], EXX_atom_rc(g_exx)[i],
272 EXX_atom_nb(g_exx)[i]);
273 }
274 EXX_Log_Print("\n");
275
276 EXX_Log_Print("Translational Vectors:\n");
277 EXX_Log_Print(" a= %8.4f %8.4f %8.4f\n",
278 EXX_pvec(g_exx)[0], EXX_pvec(g_exx)[1], EXX_pvec(g_exx)[2]);
279 EXX_Log_Print(" b= %8.4f %8.4f %8.4f\n",
280 EXX_pvec(g_exx)[3], EXX_pvec(g_exx)[4], EXX_pvec(g_exx)[5]);
281 EXX_Log_Print(" c= %8.4f %8.4f %8.4f\n",
282 EXX_pvec(g_exx)[6], EXX_pvec(g_exx)[7], EXX_pvec(g_exx)[8]);
283 EXX_Log_Print(" w_scr= %8.4f\n", EXX_w_scr(g_exx));
284 EXX_Log_Print(" rc_cut= %8.4f\n", EXX_rc_cut(g_exx));
285 EXX_Log_Print("\n");
286
287 EXX_Log_Print("Overlapping Pairs:\n");
288 EXX_Log_Print(" nshell_op= %d\n", EXX_Number_of_OP_Shells(g_exx));
289 EXX_Log_Print(" nop= %d\n", EXX_Number_of_OP(g_exx));
290 EXX_Log_Print(" # : A1 A2 CELL \n");
291 for (i=0; i<EXX_Number_of_OP(g_exx); i++) {
292 EXX_Log_Print(" %3d : %3d %3d %5d\n",
293 i, EXX_Array_OP_Atom1(g_exx)[i], EXX_Array_OP_Atom2(g_exx)[i],
294 EXX_Array_OP_Cell(g_exx)[i]);
295 }
296 EXX_Log_Print("\n");
297
298 EXX_Log_Print("Exchanging Pairs:\n");
299 EXX_Log_Print(" nshell_ep= %d\n", EXX_Number_of_EP_Shells(g_exx));
300 EXX_Log_Print(" nep= %d\n", EXX_Number_of_EP(g_exx));
301 EXX_Log_Print(" # : A1 A2 CELL \n");
302 for (i=0; i<EXX_Number_of_EP(g_exx); i++) {
303 EXX_Log_Print(" %3d : %3d %3d %5d\n",
304 i, EXX_Array_EP_Atom1(g_exx)[i], EXX_Array_EP_Atom2(g_exx)[i],
305 EXX_Array_EP_Cell(g_exx)[i]);
306 }
307 EXX_Log_Print("\n");
308
309 /* DM */
310 exx_atm1 = EXX_Array_EP_Atom1(g_exx);
311 exx_atm2 = EXX_Array_EP_Atom2(g_exx);
312 nep = EXX_Number_of_EP(g_exx);
313
314 g_exx_DM = (dcomplex*****)malloc(sizeof(dcomplex****)*List_YOUSO[16]);
315 for (m=0; m<List_YOUSO[16]; m++){
316 g_exx_DM[m] = (dcomplex****)malloc(sizeof(dcomplex***)*(SpinP_switch+1));
317 for (k=0; k<=SpinP_switch; k++){
318 g_exx_DM[m][k] = (dcomplex***)malloc(sizeof(dcomplex**)*(nep));
319 for (iep=0; iep<nep; iep++) {
320 Gc_AN = exx_atm1[iep]+1;
321 Cwan = WhatSpecies[Gc_AN];
322 tno0 = Spe_Total_NO[Cwan];
323
324 Gh_AN = exx_atm2[iep]+1;
325 Hwan = WhatSpecies[Gh_AN];
326 tno1 = Spe_Total_NO[Hwan];
327
328 g_exx_DM[m][k][iep] = (dcomplex**)malloc(sizeof(dcomplex*)*tno0);
329 for (i=0; i<tno0; i++){
330 g_exx_DM[m][k][iep][i] = (dcomplex*)malloc(sizeof(dcomplex)*tno1);
331 for (j=0; j<tno1; j++) {
332 g_exx_DM[m][k][iep][i][j].r = 0.0;
333 g_exx_DM[m][k][iep][i][j].i = 0.0;
334 }
335 }
336 }
337 }
338 }
339
340 /* STEP1: calculation of overlaps */
341
342 dtime(&time0);
343 {
344 EXX_Step1(g_exx, natom, atom_sp, nspec,
345 spec_nb, spec_rc, spec_l, spec_m, spec_n,
346 pao_fr, pao_xr, spec_nmesh);
347 }
348 dtime(&time1);
349 time_step1 = time1 - time0;
350
351 if (Host_ID==myrank) {
352 printf("<EXX_Init> Overlaps (NOP=%5d)\n", EXX_Number_of_OP(g_exx));
353 }
354
355 /* STEP2: calculation of ERIs */
356
357 dtime(&time0);
358 {
359 EXX_Step2(g_exx);
360 }
361 dtime(&time1);
362 time_step2 = time1 - time0;
363
364 if (Host_ID==myrank) {
365 printf("<EXX_Init> ERIs (NEP=%10d)\n", EXX_Number_of_EP(g_exx));
366 }
367
368 /* free memory */
369 free(atom_v);
370 free(atom_sp);
371 free(spec_rc);
372 free(spec_nb);
373
374 for (i=0; i<nspec; i++) {
375 free(spec_l[i]);
376 free(spec_m[i]);
377 free(spec_n[i]);
378 }
379 free(spec_l);
380 free(spec_m);
381 free(spec_n);
382 free(spec_nmesh);
383
384 for (i=0; i<nspec; i++) {
385 lmax = Spe_PAO_LMAX[i]+1;
386 mul = Spe_PAO_Mul[i];
387 for (l=0; l<lmax; l++) {
388 for (m=0; m<mul; m++) {
389 free(pao_fr[i][l][m]);
390 }
391 free(pao_fr[i][l]);
392 }
393 free(pao_fr[i]);
394 free(pao_xr[i]);
395 }
396 free(pao_fr);
397 free(pao_xr);
398 }
399
400
EXX_Time_Report(void)401 void EXX_Time_Report(void)
402 {
403 printf(" EXX_Step1 = %10.5f\n",time_step1);
404 printf(" EXX_Step2 = %10.5f\n",time_step2);
405 }
406
407
EXX_on_OpenMX_Free(void)408 void EXX_on_OpenMX_Free(void)
409 {
410 int m, k, iep, nep, i, Gc_AN, Cwan, tno0;
411 const int *exx_atm1;
412
413 exx_atm1 = EXX_Array_EP_Atom1(g_exx);
414 nep = EXX_Number_of_EP(g_exx);
415
416 for (m=0; m<List_YOUSO[16]; m++){
417 for (k=0; k<=SpinP_switch; k++){
418 for (iep=0; iep<nep; iep++) {
419 Gc_AN = exx_atm1[iep]+1;
420 Cwan = WhatSpecies[Gc_AN];
421 tno0 = Spe_Total_NO[Cwan];
422 for (i=0; i<tno0; i++){
423 free(g_exx_DM[m][k][iep][i]);
424 }
425 free(g_exx_DM[m][k][iep]);
426 }
427 free(g_exx_DM[m][k]);
428 }
429 free(g_exx_DM[m]);
430 }
431 free(g_exx_DM);
432
433 EXX_Free(g_exx);
434 g_exx = NULL;
435
436 EXX_Log_Close();
437 }
438
439
find_EP(EXX_t * exx,int ia1,int ia2,int r[3])440 static int find_EP(
441 EXX_t *exx,
442 int ia1,
443 int ia2,
444 int r[3]
445 )
446 {
447 int i, ic, nep, nshep;
448 const int *ep_atom1, *ep_atom2, *ep_cell;
449
450 nep = EXX_Number_of_EP(exx);
451 nshep = EXX_Number_of_EP_Shells(exx);
452 ep_atom1 = EXX_Array_EP_Atom1(exx);
453 ep_atom2 = EXX_Array_EP_Atom2(exx);
454 ep_cell = EXX_Array_EP_Cell(exx);
455
456 ic = EXX_Index_XYZ2Cell(r, nshep);
457
458 for (i=0; i<nep; i++) {
459 if (ia1==ep_atom1[i] && ia2==ep_atom2[i] && ic==ep_cell[i]) { break; }
460 }
461
462 if (i==nep) {
463 fprintf(stderr, "EP not found!\n");
464 fprintf(stderr, " ia1= %d\n", ia1);
465 fprintf(stderr, " ia2= %d\n", ia2);
466 fprintf(stderr, " ic = %d ( %3d %3d %3d)\n", ic, r[0], r[1], r[2]);
467 abort();
468 }
469
470 return i;
471 }
472
473
EXX_OP2EP_Band(EXX_t * exx,int iop1,int iop2,int icell,int * iep1,int * iep2,int * iRd,int * mul)474 static void EXX_OP2EP_Band(
475 EXX_t *exx,
476 int iop1,
477 int iop2,
478 int icell,
479 int *iep1, /* [8] */
480 int *iep2, /* [8] */
481 int *iRd, /* [8] */
482 int *mul
483 )
484 {
485 int m, i, j, k, l, ic1, ic2;
486 int nshop, nshep;
487 int r1[3], r2[3], rd[3];
488 int ep_r1[3], ep_r2[3], ep_rd[3];
489 const int *op_atom1, *op_atom2, *op_cell;
490 int Rd_is_0, R1_is_0, R2_is_0;
491
492 nshop = EXX_Number_of_OP_Shells(exx);
493 nshep = EXX_Number_of_EP_Shells(exx);
494
495 op_atom1 = EXX_Array_OP_Atom1(exx);
496 op_atom2 = EXX_Array_OP_Atom2(exx);
497 op_cell = EXX_Array_OP_Cell(exx);
498
499 i = op_atom1[iop1];
500 j = op_atom2[iop1];
501 k = op_atom1[iop2];
502 l = op_atom2[iop2];
503 ic1 = op_cell[iop1];
504 ic2 = op_cell[iop2];
505
506 EXX_Index_Cell2XYZ(ic1, nshop, r1);
507 EXX_Index_Cell2XYZ(ic2, nshop, r2);
508 EXX_Index_Cell2XYZ(icell, nshep, rd);
509
510 /* 0: (i,k,Rd), (j,l,R2+Rd-R1), R1 */
511 for (m=0; m<3; m++) {
512 ep_r1[m] = rd[m];
513 ep_r2[m] = rd[m]+r2[m]-r1[m];
514 ep_rd[m] = r1[m];
515 }
516 iep1[0] = find_EP(exx, i, k, ep_r1);
517 iep2[0] = find_EP(exx, j, l, ep_r2);
518 iRd[0] = EXX_Index_XYZ2Cell(ep_rd, nshep);
519
520 /* 1: (j,k,Rd-R1), (i,l,R2+Rd), -R1 */
521 for (m=0; m<3; m++) {
522 ep_r1[m] = rd[m]-r1[m];
523 ep_r2[m] = rd[m]+r2[m];
524 ep_rd[m] = -r1[m];
525 }
526 iep1[1] = find_EP(exx, j, k, ep_r1);
527 iep2[1] = find_EP(exx, i, l, ep_r2);
528 iRd[1] = EXX_Index_XYZ2Cell(ep_rd, nshep);
529
530 /* 2: (i,l,R2+Rd), (j,k,Rd-R1), R1 */
531 for (m=0; m<3; m++) {
532 ep_r1[m] = rd[m]+r2[m];
533 ep_r2[m] = rd[m]-r1[m];
534 ep_rd[m] = r1[m];
535 }
536 iep1[2] = find_EP(exx, i, l, ep_r1);
537 iep2[2] = find_EP(exx, j, k, ep_r2);
538 iRd[2] = EXX_Index_XYZ2Cell(ep_rd, nshep);
539
540 /* 3: (j,l,R2+Rd-R1), (i,k,Rd), -R1 */
541 for (m=0; m<3; m++) {
542 ep_r1[m] = rd[m]+r2[m]-r1[m];
543 ep_r2[m] = rd[m];
544 ep_rd[m] = -r1[m];
545 }
546 iep1[3] = find_EP(exx, j, l, ep_r1);
547 iep2[3] = find_EP(exx, i, k, ep_r2);
548 iRd[3] = EXX_Index_XYZ2Cell(ep_rd, nshep);
549
550 /* 4: (k,i,-Rd), (l,j,R1-R2-Rd), R2 */
551 for (m=0; m<3; m++) {
552 ep_r1[m] = -rd[m];
553 ep_r2[m] = r1[m]-r2[m]-rd[m];
554 ep_rd[m] = r2[m];
555 }
556 iep1[4] = find_EP(exx, k, i, ep_r1);
557 iep2[4] = find_EP(exx, l, j, ep_r2);
558 iRd[4] = EXX_Index_XYZ2Cell(ep_rd, nshep);
559
560 /* 5: (k,j,R1-Rd), (l,i,-R2-Rd), R2 */
561 for (m=0; m<3; m++) {
562 ep_r1[m] = r1[m]-rd[m];
563 ep_r2[m] = -r2[m]-rd[m];
564 ep_rd[m] = r2[m];
565 }
566 iep1[5] = find_EP(exx, k, j, ep_r1);
567 iep2[5] = find_EP(exx, l, i, ep_r2);
568 iRd[5] = EXX_Index_XYZ2Cell(ep_rd, nshep);
569
570 /* 6: (l,i,-R2-Rd), (k,j,R1-Rd), -R2 */
571 for (m=0; m<3; m++) {
572 ep_r1[m] = -r2[m]-rd[m];
573 ep_r2[m] = r1[m]-rd[m];
574 ep_rd[m] = -r2[m];
575 }
576 iep1[6] = find_EP(exx, l, i, ep_r1);
577 iep2[6] = find_EP(exx, k, j, ep_r2);
578 iRd[6] = EXX_Index_XYZ2Cell(ep_rd, nshep);
579
580 /* 7: (l,j,R1-R2-Rd), (k,i,-Rd), -R2 */
581 for (m=0; m<3; m++) {
582 ep_r1[m] = r1[m]-r2[m]-rd[m];
583 ep_r2[m] = -rd[m];
584 ep_rd[m] = -r2[m];
585 }
586 iep1[7] = find_EP(exx, l, j, ep_r1);
587 iep2[7] = find_EP(exx, k, i, ep_r2);
588 iRd[7] = EXX_Index_XYZ2Cell(ep_rd, nshep);
589
590 /* multiplicity */
591 Rd_is_0 = (0==rd[0]) && (0==rd[1]) && (0==rd[2]);
592 R1_is_0 = (0==r1[0]) && (0==r1[1]) && (0==r1[2]);
593 R2_is_0 = (0==r2[0]) && (0==r2[1]) && (0==r2[2]);
594 if (Rd_is_0 && iop1==iop2) {
595 if (R1_is_0 && i==j) { *mul = 8; } else { *mul = 2; }
596 } else {
597 if (R1_is_0 && i==j) {
598 if (R2_is_0 && k==l) { *mul = 4; } else { *mul = 2; }
599 } else {
600 if (R2_is_0 && k==l) { *mul = 2; } else { *mul = 1; }
601 }
602 }
603 }
604
605
find_EP_cluster(EXX_t * exx,int ia1,int ia2)606 static int find_EP_cluster(
607 EXX_t *exx,
608 int ia1,
609 int ia2
610 )
611 {
612 int i, nep;
613 const int *ep_atom1, *ep_atom2;
614
615 nep = EXX_Number_of_EP(exx);
616 ep_atom1 = EXX_Array_EP_Atom1(exx);
617 ep_atom2 = EXX_Array_EP_Atom2(exx);
618
619 for (i=0; i<nep; i++) {
620 if (ia1==ep_atom1[i] && ia2==ep_atom2[i]) { break; }
621 }
622
623 if (i==nep) {
624 fprintf(stderr, "***Error at %s (%d)\n", __FILE__, __LINE__);
625 fprintf(stderr, " EP not found!\n");
626 abort();
627 }
628
629 return i;
630 }
631
632
633
EXX_OP2EP_Cluster(EXX_t * exx,int iop1,int iop2,int * iep1,int * iep2,int * mul)634 static void EXX_OP2EP_Cluster(
635 EXX_t *exx,
636 int iop1,
637 int iop2,
638 int *iep1, /* [8] */
639 int *iep2, /* [8] */
640 int *mul
641 )
642 {
643 int i, j, k, l, nep;
644 const int *op_atom1, *op_atom2;
645
646 nep = EXX_Number_of_OP(exx);
647 op_atom1 = EXX_Array_OP_Atom1(exx);
648 op_atom2 = EXX_Array_OP_Atom2(exx);
649
650 i = op_atom1[iop1];
651 j = op_atom2[iop1];
652 k = op_atom1[iop2];
653 l = op_atom2[iop2];
654
655 /* 0: (i,k), (j,l) */
656 iep1[0] = find_EP_cluster(exx, i, k);
657 iep2[0] = find_EP_cluster(exx, j, l);
658
659 /* 1: (j,k), (i,l) */
660 iep1[1] = find_EP_cluster(exx, j, k);
661 iep2[1] = find_EP_cluster(exx, i, l);
662
663 /* 2: (i,l), (j,k) */
664 iep1[2] = find_EP_cluster(exx, i, l);
665 iep2[2] = find_EP_cluster(exx, j, k);
666
667 /* 3: (j,l), (i,k) */
668 iep1[3] = find_EP_cluster(exx, j, l);
669 iep2[3] = find_EP_cluster(exx, i, k);
670
671 /* 4: (k,i), (l,j) */
672 iep1[4] = find_EP_cluster(exx, k, i);
673 iep2[4] = find_EP_cluster(exx, l, j);
674
675 /* 5: (k,j), (l,i) */
676 iep1[5] = find_EP_cluster(exx, k, j);
677 iep2[5] = find_EP_cluster(exx, l, i);
678
679 /* 6: (l,i), (k,j) */
680 iep1[6] = find_EP_cluster(exx, l, i);
681 iep2[6] = find_EP_cluster(exx, k, j);
682
683 /* 7: (l,j), (k,i) */
684 iep1[7] = find_EP_cluster(exx, l, j);
685 iep2[7] = find_EP_cluster(exx, k, i);
686
687 /* multiplicity */
688 if (iop1==iop2) {
689 if (i==j) { *mul = 8; } else { *mul = 2; }
690 } else {
691 if (i==j) {
692 if (k==l) { *mul = 4;} else { *mul = 2; }
693 } else {
694 if (k==l) { *mul = 2;} else { *mul = 1; }
695 }
696 }
697 }
698
699
EXX_Basis_Index(int ib1,int ib2,int ib3,int ib4,int * ib1_op,int * ib2_op,int * ib3_op,int * ib4_op,int mul)700 static void EXX_Basis_Index(
701 int ib1,
702 int ib2,
703 int ib3,
704 int ib4,
705 int *ib1_op,
706 int *ib2_op,
707 int *ib3_op,
708 int *ib4_op,
709 int mul
710 )
711 {
712 switch (mul) {
713 case 0: *ib1_op = ib1; *ib2_op = ib3; *ib3_op = ib2; *ib4_op = ib4; break;
714 case 1: *ib1_op = ib3; *ib2_op = ib1; *ib3_op = ib2; *ib4_op = ib4; break;
715 case 2: *ib1_op = ib1; *ib2_op = ib3; *ib3_op = ib4; *ib4_op = ib2; break;
716 case 3: *ib1_op = ib3; *ib2_op = ib1; *ib3_op = ib4; *ib4_op = ib2; break;
717 case 4: *ib1_op = ib2; *ib2_op = ib4; *ib3_op = ib1; *ib4_op = ib3; break;
718 case 5: *ib1_op = ib4; *ib2_op = ib2; *ib3_op = ib1; *ib4_op = ib3; break;
719 case 6: *ib1_op = ib2; *ib2_op = ib4; *ib3_op = ib3; *ib4_op = ib1; break;
720 case 7: *ib1_op = ib4; *ib2_op = ib2; *ib3_op = ib3; *ib4_op = ib1; break;
721 #if 0
722 case 0: *ib1_op = ib1; *ib2_op = ib3; *ib3_op = ib2; *ib4_op = ib4; break;
723 case 1: *ib1_op = ib2; *ib2_op = ib3; *ib3_op = ib1; *ib4_op = ib4; break;
724 case 2: *ib1_op = ib1; *ib2_op = ib4; *ib3_op = ib2; *ib4_op = ib3; break;
725 case 3: *ib1_op = ib2; *ib2_op = ib4; *ib3_op = ib1; *ib4_op = ib3; break;
726 case 4: *ib1_op = ib3; *ib2_op = ib1; *ib3_op = ib4; *ib4_op = ib2; break;
727 case 5: *ib1_op = ib3; *ib2_op = ib2; *ib3_op = ib4; *ib4_op = ib1; break;
728 case 6: *ib1_op = ib4; *ib2_op = ib1; *ib3_op = ib3; *ib4_op = ib2; break;
729 case 7: *ib1_op = ib4; *ib2_op = ib2; *ib3_op = ib3; *ib4_op = ib1; break;
730 #endif
731 }
732 }
733
734
EXX_Fock_Cluster(double ** H,double * Ux,EXX_t * exx,dcomplex *** exx_CDM,const int * MP)735 void EXX_Fock_Cluster(
736 double **H,
737 double *Ux,
738 EXX_t *exx,
739 dcomplex ***exx_CDM,
740 const int *MP
741 )
742 {
743 double *local_H;
744 /* double *buffer_H;*/
745 int H_n;
746 int i, j, n, ir, nr, irn, nep;
747
748 int iop1, iop2, nb1, nb2, nb3, nb4, nrn;
749 int ia1, ia2, ia3, ia4, ib1, ib2, ib3, ib4;
750 int ib1_ep, ib2_ep, ib3_ep, ib4_ep;
751 int nb1_ep, nb2_ep, nb3_ep, nb4_ep;
752 const int *ep_atom1, *ep_atom2, *atom_nb;
753 const int *op_atom1, *op_atom2;
754
755 int iep1[8], iep2[8], mul;
756 double w;
757
758 int GA_AN, wanA, tnoA, Anum, GB_AN, wanB, tnoB, Bnum;
759
760 double *eri;
761 int neri;
762 double sum, den;
763
764 int nproc, myid, iproc;
765 MPI_Status stat;
766
767 double local_Ux;
768 /*double buffer_Ux;*/
769
770 MPI_Comm_size(g_exx_mpicomm, &nproc);
771 MPI_Comm_rank(g_exx_mpicomm, &myid);
772
773 op_atom1 = EXX_Array_OP_Atom1(exx);
774 op_atom2 = EXX_Array_OP_Atom2(exx);
775 ep_atom1 = EXX_Array_EP_Atom1(exx);
776 ep_atom2 = EXX_Array_EP_Atom2(exx);
777 atom_nb = EXX_atom_nb(exx);
778
779 /* matrix size */
780 H_n = 0;
781 for (i=1; i<=atomnum; i++){
782 wanA = WhatSpecies[i];
783 H_n += Spe_Total_CNO[wanA];
784 }
785
786 /* allocation */
787 local_H = (double*)malloc(sizeof(double)*H_n*H_n);
788 /*buffer_H = (double*)malloc(sizeof(double)*H_n*H_n);*/
789 for (i=0; i<H_n*H_n; i++) { local_H[i] = 0.0; }
790
791 nr = EXX_File_ERI_Read_NRecord(exx);
792 local_Ux = 0.0;
793
794 for (ir=0; ir<nr; ir++) {
795 EXX_File_ERI_Read_Data_Head(exx, ir, &iop1, &iop2,
796 &nb1, &nb2, &nb3, &nb4, &nrn);
797
798 /*EXX_Log_Print("ir= %2d iop1= %2d iop2= %2d ", ir, iop1, iop2);*/
799 /*EXX_Log_Print("atom= %1d %1d %1d %1d\n", */
800 /* op_atom1[iop1], op_atom2[iop1], op_atom1[iop2], op_atom2[iop2]);*/
801
802 if (nrn != 1) {
803 fprintf(stderr, "***Error at %s (%d)\n", __FILE__, __LINE__);
804 fprintf(stderr, " file is broken.\n");
805 abort();
806 }
807
808 neri = nb1*nb2*nb3*nb4*nrn;
809 eri = (double*)malloc(sizeof(double)*neri);
810 EXX_File_ERI_Read(exx, ir, eri, NULL, iop1, iop2, nrn, neri);
811
812
813 #if 0
814 if (0==g_step) {
815 EXX_Log_Print("step3: ir= %d\n", ir);
816
817 for (ib1=0; ib1<nb1; ib1++) {
818 for (ib2=0; ib2<nb2; ib2++) {
819 for (ib3=0; ib3<nb3; ib3++) {
820 for (ib4=0; ib4<nb4; ib4++) {
821 for (irn=0; irn<nrn; irn++) {
822 EXX_Log_Print("IB1= %2d IB2= %2d ", ib1, ib2);
823 EXX_Log_Print("IB3= %2d IB4= %2d ", ib3, ib4);
824 EXX_Log_Print("IRN= %3d ", irn);
825 i = (((ib1*nb2+ib2)*nb3+ib3)*nb4+ib4)*nrn+irn;
826 EXX_Log_Print(" ERI= %10.6f\n", eri[i]);
827 }
828 }
829 }
830 }
831 }
832 }
833 #endif
834
835 EXX_OP2EP_Cluster(exx, iop1, iop2, iep1, iep2, &mul);
836 /*if (0==SpinP_switch) { mul *= 2; }*/
837 w = 1.0/(double)mul;
838
839 for (j=0; j<8; j++) {
840 ia1 = ep_atom1[iep1[j]]; /* i */
841 ia2 = ep_atom2[iep1[j]]; /* j */
842 ia3 = ep_atom1[iep2[j]]; /* k */
843 ia4 = ep_atom2[iep2[j]]; /* l */
844 /*EXX_Log_Print(" j=%1d iep1= %2d iep2= %2d ", j, iep1[j], iep2[j]);*/
845 /*EXX_Log_Print("atom= %1d %1d %1d %1d\n", ia1, ia2, ia3, ia4);*/
846
847 nb1_ep = atom_nb[ia1];
848 nb2_ep = atom_nb[ia2];
849 nb3_ep = atom_nb[ia3];
850 nb4_ep = atom_nb[ia4];
851
852 /* EXX_Log_Print(" nb= %2d %2d %2d %2d\n", nb1_ep, nb2_ep, nb3_ep, nb4_ep);*/
853
854 GA_AN = ia1+1;
855 wanA = WhatSpecies[GA_AN];
856 tnoA = Spe_Total_CNO[wanA];
857 Anum = MP[GA_AN];
858
859 GB_AN = ia2+1;
860 wanB = WhatSpecies[GB_AN];
861 tnoB = Spe_Total_CNO[wanB];
862 Bnum = MP[GB_AN];
863
864 for (ib1_ep=0; ib1_ep<nb1_ep; ib1_ep++) {
865 for (ib2_ep=0; ib2_ep<nb2_ep; ib2_ep++) {
866 for (ib3_ep=0; ib3_ep<nb3_ep; ib3_ep++) {
867 for (ib4_ep=0; ib4_ep<nb4_ep; ib4_ep++) {
868 EXX_Basis_Index(ib1_ep, ib2_ep, ib3_ep, ib4_ep,
869 &ib1, &ib2, &ib3, &ib4, j);
870 /*EXX_Log_Print(" basis(OP)= %2d %2d %2d %2d",*/
871 /* ib1, ib2, ib3, ib4);*/
872 /*EXX_Log_Print(" basis(EP)= %2d %2d %2d %2d\n",*/
873 /* ib1_ep, ib2_ep, ib3_ep, ib4_ep);*/
874 i = (((ib1*nb2+ib2)*nb3+ib3)*nb4+ib4)*nrn;
875 den = exx_CDM[iep2[j]][ib3_ep][ib4_ep].r;
876 /*if (den<0.0) { den = 0.0; }*/
877 /*if (den>1.0) { den = 1.0; }*/
878 sum = eri[i] * den;
879 /*i = (Anum+ib1-1)*H_n + (Bnum+ib2-1);*/
880 i = (Anum+ib1_ep-1)*H_n + (Bnum+ib2_ep-1);
881 local_H[i] += -w*sum;
882 den = exx_CDM[iep1[j]][ib1_ep][ib2_ep].r;
883 /*if (den<0.0) { den = 0.0; }*/
884 /*if (den>1.0) { den = 1.0; }*/
885 local_Ux += -0.5*w*sum*den;
886 }
887 }
888 /*local_H[i] += -0.5*w*sum;*/
889 /*local_Ux += -0.5*w*sum*exx_CDM[iep1[j]][ib1_ep][ib2_ep];*/
890 }
891 }
892 } /* loop of j */
893
894 free(eri);
895 }
896
897
898 #if 0
899 /* gather data */
900 if (EXX_ROOT_RANK==myid) {
901 for (iproc=0; iproc<nproc; iproc++) {
902 if (EXX_ROOT_RANK==iproc) { continue; }
903 /* Fock matrix */
904 MPI_Recv(buffer_H, H_n*H_n, MPI_DOUBLE, iproc, 0, g_exx_mpicomm, &stat);
905 for (i=0; i<H_n*H_n; i++) { local_H[i] += buffer_H[i]; }
906 /* energy */
907 MPI_Recv(&buffer_Ux, 1, MPI_DOUBLE, iproc, 1, g_exx_mpicomm, &stat);
908 local_Ux += buffer_Ux;
909 } /* iproc */
910 } else {
911 MPI_Send(local_H, H_n*H_n, MPI_DOUBLE, EXX_ROOT_RANK, 0, g_exx_mpicomm);
912 MPI_Send(&local_Ux, 1, MPI_DOUBLE, EXX_ROOT_RANK, 1, g_exx_mpicomm);
913 }
914 #endif
915 /* all-to-all communication */
916 MPI_Allreduce(local_H, local_H, H_n*H_n, MPI_DOUBLE,
917 MPI_SUM, g_exx_mpicomm);
918 MPI_Allreduce(&local_Ux, &local_Ux, 1, MPI_DOUBLE,
919 MPI_SUM, g_exx_mpicomm);
920
921
922 #if 0
923 #if 0
924 if (2==g_step || 3==g_step) { EXX_Log_Print("Local_H:\n"); }
925 #endif
926
927 for (GA_AN=1; GA_AN<=atomnum; GA_AN++) {
928 wanA = WhatSpecies[GA_AN];
929 tnoA = Spe_Total_CNO[wanA];
930 Anum = MP[GA_AN];
931 for (GB_AN=1; GB_AN<=atomnum; GB_AN++) {
932 wanB = WhatSpecies[GB_AN];
933 tnoB = Spe_Total_CNO[wanB];
934 Bnum = MP[GB_AN];
935
936 for (ib1=0; ib1<tnoA; ib1++) {
937 for (ib2=0; ib2<tnoB; ib2++) {
938 i = (Anum+ib1-1)*H_n + (Bnum+ib2-1);
939 H[Anum+ib1][Bnum+ib2] += local_H[i];
940 #if 0
941 if (2==g_step || 3==g_step) {
942 EXX_Log_Print(" %2d-%2d %2d-%2d %15.8f\n",
943 GA_AN, ib1, GB_AN, ib2, local_H[i]);
944 }
945 #endif
946 }
947 }
948 }
949 }
950 #if 0
951 EXX_Log_Print("\n");
952 #endif
953 #endif
954 *Ux = local_Ux;
955
956 g_step++;
957
958 /* free*/
959 free(local_H);
960 }
961
962
EXX_Simple_Mixing_DM(EXX_t * exx,double mix_wgt,dcomplex *** exx_CDM,dcomplex *** exx_PDM,dcomplex *** exx_P2DM)963 void EXX_Simple_Mixing_DM(
964 EXX_t *exx,
965 double mix_wgt,
966 dcomplex ***exx_CDM,
967 dcomplex ***exx_PDM,
968 dcomplex ***exx_P2DM
969 )
970 {
971 int iep, nep, Gc_AN, Gh_AN, ian, jan, m, n;
972 const int *ep_atom1, *ep_atom2;
973 double w1, w2;
974
975 nep = EXX_Number_of_EP(exx);
976 ep_atom1 = EXX_Array_EP_Atom1(exx);
977 ep_atom2 = EXX_Array_EP_Atom2(exx);
978
979 w1 = mix_wgt;
980 w2 = 1.0 - mix_wgt;
981
982 #if EXX_MIX_DM
983 for (iep=0; iep<nep; iep++) {
984 Gc_AN = ep_atom1[iep]+1;
985 ian = Spe_Total_CNO[WhatSpecies[Gc_AN]];
986 Gh_AN = ep_atom2[iep]+1;
987 jan = Spe_Total_CNO[WhatSpecies[Gh_AN]];
988 for (m=0; m<ian; m++){
989 for (n=0; n<jan; n++){
990 exx_CDM[iep][m][n].r = w1 * exx_CDM[iep][m][n].r
991 + w2 * exx_PDM[iep][m][n].r;
992 exx_CDM[iep][m][n].i = w1 * exx_CDM[iep][m][n].i
993 + w2 * exx_PDM[iep][m][n].i;
994 exx_P2DM[iep][m][n].r = exx_PDM[iep][m][n].r;
995 exx_P2DM[iep][m][n].i = exx_PDM[iep][m][n].i;
996 exx_PDM[iep][m][n].r = exx_CDM[iep][m][n].r;
997 exx_PDM[iep][m][n].i = exx_CDM[iep][m][n].i;
998 }
999 }
1000 } /* loop of iep */
1001 #endif
1002 }
1003
1004
1005
1006
1007
EXX_Fock_Band(dcomplex ** H,EXX_t * exx,dcomplex **** exx_CDM,int * MP,double k1,double k2,double k3,int spin)1008 void EXX_Fock_Band(
1009 dcomplex **H,
1010 EXX_t *exx,
1011 dcomplex ****exx_CDM,
1012 int *MP,
1013 double k1,
1014 double k2,
1015 double k3,
1016 int spin
1017 )
1018 {
1019 double *local_H, *mpi_H;
1020 int H_n, nbuf;
1021 int i, j, n, ir, nr, irn, nep;
1022
1023 int iop1, iop2, nb1, nb2, nb3, nb4, nrn;
1024 int ia1, ia2, ia3, ia4, ib1, ib2, ib3, ib4, icell1, icell2;
1025 int ib1_ep, ib2_ep, ib3_ep, ib4_ep;
1026 int nb1_ep, nb2_ep, nb3_ep, nb4_ep;
1027 const int *ep_atom1, *ep_atom2, *ep_cell, *atom_nb;
1028 const int *op_atom1, *op_atom2;
1029
1030 int iep1[8], iep2[8], icell3[8], mul;
1031 double w;
1032
1033 int GA_AN, Anum, GB_AN, Bnum, tnoA, tnoB;
1034
1035 int neri;
1036 double *eri_list;
1037 int *iRm;
1038
1039 double eri;
1040 dcomplex den;
1041
1042 int nproc, myid, iproc;
1043 MPI_Status stat;
1044
1045 int ncd, nshell_ep;
1046 int iRn_x, iRn_y, iRn_z, iRmp_x, iRmp_y, iRmp_z;
1047 double kRn, kRmp, co1, si1, co2, si2;
1048
1049 MPI_Comm comm;
1050 double *k1_list, *k2_list, *k3_list;
1051 int *spin_list;
1052
1053 comm = g_exx_mpicomm;
1054
1055 MPI_Comm_size(comm, &nproc);
1056 MPI_Comm_rank(comm, &myid);
1057
1058 k1_list = (double*)malloc(sizeof(double)*nproc);
1059 k2_list = (double*)malloc(sizeof(double)*nproc);
1060 k3_list = (double*)malloc(sizeof(double)*nproc);
1061 spin_list = (int*)malloc(sizeof(int)*nproc);
1062
1063 /* all-to-all */
1064 MPI_Allgather(&k1, 1, MPI_DOUBLE, k1_list, 1, MPI_DOUBLE, comm);
1065 MPI_Allgather(&k2, 1, MPI_DOUBLE, k2_list, 1, MPI_DOUBLE, comm);
1066 MPI_Allgather(&k3, 1, MPI_DOUBLE, k3_list, 1, MPI_DOUBLE, comm);
1067 MPI_Allgather(&spin, 1, MPI_INT, spin_list, 1, MPI_INT, comm);
1068
1069 op_atom1 = EXX_Array_OP_Atom1(exx);
1070 op_atom2 = EXX_Array_OP_Atom2(exx);
1071 ep_atom1 = EXX_Array_EP_Atom1(exx);
1072 ep_atom2 = EXX_Array_EP_Atom2(exx);
1073 ep_cell = EXX_Array_EP_Cell(exx);
1074 atom_nb = EXX_atom_nb(exx);
1075 nshell_ep = EXX_Number_of_EP_Shells(exx);
1076
1077 ncd = 2*nshell_ep+1;
1078
1079 /* matrix size */
1080 H_n = 0;
1081 for (i=1; i<=atomnum; i++){ H_n += Spe_Total_CNO[WhatSpecies[i]]; }
1082
1083 /* allocation */
1084 nbuf = H_n*H_n*2;
1085 local_H = (double*)malloc(sizeof(double)*nbuf);
1086 mpi_H = (double*)malloc(sizeof(double)*nbuf);
1087
1088 nr = EXX_File_ERI_Read_NRecord(exx);
1089
1090 /* clear buffer */
1091 for (i=0; i<nbuf; i++) { local_H[i] = 0.0; }
1092
1093 for (ir=0; ir<nr; ir++) {
1094 EXX_File_ERI_Read_Data_Head(exx, ir, &iop1, &iop2,
1095 &nb1, &nb2, &nb3, &nb4, &nrn);
1096
1097 neri = nb1*nb2*nb3*nb4*nrn;
1098 eri_list = (double*)malloc(sizeof(double)*neri);
1099 iRm = (int*)malloc(sizeof(int)*nrn); /* Rm */
1100 EXX_File_ERI_Read(exx, ir, eri_list, iRm, iop1, iop2, nrn, neri);
1101
1102 for (iproc=0; iproc<nproc; iproc++) {
1103 /* clear buffer */
1104 for (i=0; i<nbuf; i++) { mpi_H[i] = 0.0; }
1105
1106 k1 = k1_list[iproc];
1107 k2 = k2_list[iproc];
1108 k3 = k3_list[iproc];
1109 spin = spin_list[iproc];
1110
1111 for (irn=0; irn<nrn; irn++) {
1112 EXX_OP2EP_Band(exx, iop1, iop2, iRm[irn],
1113 &iep1[0], &iep2[0], &icell3[0], &mul);
1114 w = 1.0/(double)mul;
1115
1116 for (j=0; j<8; j++) {
1117 ia1 = ep_atom1[iep1[j]]; /* i */
1118 ia2 = ep_atom2[iep1[j]]; /* j */
1119 ia3 = ep_atom1[iep2[j]]; /* k */
1120 ia4 = ep_atom2[iep2[j]]; /* l */
1121 icell1 = ep_cell[iep1[j]]; /* Rn */
1122 icell2 = ep_cell[iep2[j]]; /* Rm' */
1123
1124 nb1_ep = atom_nb[ia1];
1125 nb2_ep = atom_nb[ia2];
1126 nb3_ep = atom_nb[ia3];
1127 nb4_ep = atom_nb[ia4];
1128
1129 GA_AN = ia1+1;
1130 Anum = MP[GA_AN];
1131
1132 GB_AN = ia2+1;
1133 Bnum = MP[GB_AN];
1134
1135 /* phase */
1136 iRn_x = icell1%ncd - nshell_ep;
1137 iRn_y = (icell1/ncd)%ncd - nshell_ep;
1138 iRn_z = (icell1/ncd/ncd)%ncd - nshell_ep;
1139 kRn = k1*(double)iRn_x + k2*(double)iRn_y + k3*(double)iRn_z;
1140 si1 = sin(2.0*PI*kRn);
1141 co1 = cos(2.0*PI*kRn);
1142
1143 iRmp_x = icell2%ncd - nshell_ep;
1144 iRmp_y = (icell2/ncd)%ncd - nshell_ep;
1145 iRmp_z = (icell2/ncd/ncd)%ncd - nshell_ep;
1146
1147 kRmp = k1*(double)iRmp_x + k2*(double)iRmp_y + k3*(double)iRmp_z;
1148 si2 = sin(2.0*PI*kRmp);
1149 co2 = cos(2.0*PI*kRmp);
1150
1151 for (ib1_ep=0; ib1_ep<nb1_ep; ib1_ep++) {
1152 for (ib2_ep=0; ib2_ep<nb2_ep; ib2_ep++) {
1153 for (ib3_ep=0; ib3_ep<nb3_ep; ib3_ep++) {
1154 for (ib4_ep=0; ib4_ep<nb4_ep; ib4_ep++) {
1155 EXX_Basis_Index(ib1_ep, ib2_ep, ib3_ep, ib4_ep,
1156 &ib1, &ib2, &ib3, &ib4, j);
1157 i = (((ib1*nb2+ib2)*nb3+ib3)*nb4+ib4)*nrn+irn;
1158 eri = eri_list[i];
1159 den.r = exx_CDM[spin][iep2[j]][ib3_ep][ib4_ep].r;
1160 den.i = exx_CDM[spin][iep2[j]][ib3_ep][ib4_ep].i;
1161 i = (Anum+ib1_ep-1)*H_n + (Bnum+ib2_ep-1);
1162 mpi_H[2*i+0] += -w*eri*(co1*den.r - si1*den.i); /* real */
1163 mpi_H[2*i+1] += -w*eri*(co1*den.i + si1*den.r); /* imag */
1164 }
1165 }
1166 }
1167 }
1168 } /* loop of j */
1169 } /* loop of irn */
1170
1171 /* reduce to the iproc-th proc */
1172 MPI_Reduce(mpi_H, mpi_H, nbuf, MPI_DOUBLE, MPI_SUM, iproc, comm);
1173
1174 if (myid==iproc) {
1175 for (i=0; i<nbuf; i++) { local_H[i] = mpi_H[i]; }
1176 }
1177 } /* loop of iproc */
1178
1179 free(eri_list);
1180 free(iRm);
1181 } /* loop of irn */
1182
1183
1184 #if 1
1185 for (GA_AN=1; GA_AN<=atomnum; GA_AN++) {
1186 tnoA = Spe_Total_CNO[WhatSpecies[GA_AN]];
1187 Anum = MP[GA_AN];
1188 for (GB_AN=1; GB_AN<=atomnum; GB_AN++) {
1189 tnoB = Spe_Total_CNO[WhatSpecies[GB_AN]];
1190 Bnum = MP[GB_AN];
1191 for (ib1=0; ib1<tnoA; ib1++) {
1192 for (ib2=0; ib2<tnoB; ib2++) {
1193 i = (Anum+ib1-1)*H_n + (Bnum+ib2-1);
1194 H[Anum+ib1][Bnum+ib2].r += local_H[2*i+0];
1195 H[Anum+ib1][Bnum+ib2].i += local_H[2*i+1];
1196 }
1197 }
1198 }
1199 }
1200 #endif
1201
1202 /* free*/
1203 free(local_H);
1204 free(mpi_H);
1205 }
1206
1207
1208 /*----------------------------------------------------------------------
1209 EXX_Energy_Band
1210 ----------------------------------------------------------------------*/
EXX_Energy_Band(double * Ux,EXX_t * exx,dcomplex **** exx_CDM,int * MP)1211 void EXX_Energy_Band(
1212 double *Ux,
1213 EXX_t *exx,
1214 dcomplex ****exx_CDM,
1215 int *MP
1216 )
1217 {
1218 int myrank, nproc, iproc;
1219 int i, j, n, ir, nr, irn, nep, spin;
1220
1221 int GA_AN,Anum, GB_AN, Bnum;
1222 int iop1, iop2, nb1, nb2, nb3, nb4, nrn;
1223 int iatm1, iatm2, iatm3, iatm4, ib1, ib2, ib3, ib4, icell1, icell2;
1224 int ib1_ep, ib2_ep, ib3_ep, ib4_ep;
1225 int nb1_ep, nb2_ep, nb3_ep, nb4_ep;
1226 const int *ep_atom1, *ep_atom2, *ep_cell, *atom_nb;
1227 const int *op_atom1, *op_atom2;
1228 int iep1[8], iep2[8], icell3[8], mul;
1229 double w;
1230
1231 int neri;
1232 double *eri_list;
1233 double eri;
1234 int *iRm;
1235 dcomplex den1, den2;
1236
1237 MPI_Status stat;
1238
1239 double local_Ux[2];
1240
1241 MPI_Comm comm;
1242
1243 /* MPI information */
1244 comm = g_exx_mpicomm;
1245 MPI_Comm_size(comm, &nproc);
1246 MPI_Comm_rank(comm, &myrank);
1247
1248 /* Index information */
1249 op_atom1 = EXX_Array_OP_Atom1(exx);
1250 op_atom2 = EXX_Array_OP_Atom2(exx);
1251 ep_atom1 = EXX_Array_EP_Atom1(exx);
1252 ep_atom2 = EXX_Array_EP_Atom2(exx);
1253 ep_cell = EXX_Array_EP_Cell(exx);
1254 atom_nb = EXX_atom_nb(exx);
1255
1256 /* saved datapath */
1257 nr = EXX_File_ERI_Read_NRecord(exx);
1258
1259 local_Ux[0] = 0.0;
1260 local_Ux[1] = 0.0;
1261
1262 for (ir=0; ir<nr; ir++) {
1263 /* read data */
1264 EXX_File_ERI_Read_Data_Head(exx, ir, &iop1, &iop2,
1265 &nb1, &nb2, &nb3, &nb4, &nrn);
1266 neri = nb1*nb2*nb3*nb4*nrn;
1267 eri_list = (double*)malloc(sizeof(double)*neri);
1268 iRm = (int*)malloc(sizeof(int)*nrn); /* Rm */
1269 EXX_File_ERI_Read(exx, ir, eri_list, iRm, iop1, iop2, nrn, neri);
1270
1271 for (irn=0; irn<nrn; irn++) {
1272 EXX_OP2EP_Band(exx, iop1, iop2, iRm[irn],
1273 &iep1[0], &iep2[0], &icell3[0], &mul);
1274 w = 1.0/(double)mul;
1275
1276 for (j=0; j<8; j++) {
1277 iatm1 = ep_atom1[iep1[j]]; /* i */
1278 iatm2 = ep_atom2[iep1[j]]; /* j */
1279 iatm3 = ep_atom1[iep2[j]]; /* k */
1280 iatm4 = ep_atom2[iep2[j]]; /* l */
1281 icell1 = ep_cell[iep1[j]]; /* Rn */
1282 icell2 = ep_cell[iep2[j]]; /* Rm' */
1283
1284 nb1_ep = atom_nb[iatm1];
1285 nb2_ep = atom_nb[iatm2];
1286 nb3_ep = atom_nb[iatm3];
1287 nb4_ep = atom_nb[iatm4];
1288
1289 Anum = MP[iatm1+1];
1290 Bnum = MP[iatm2+1];
1291
1292 for (ib1_ep=0; ib1_ep<nb1_ep; ib1_ep++) {
1293 for (ib2_ep=0; ib2_ep<nb2_ep; ib2_ep++) {
1294 for (ib3_ep=0; ib3_ep<nb3_ep; ib3_ep++) {
1295 for (ib4_ep=0; ib4_ep<nb4_ep; ib4_ep++) {
1296 /* rotate basis index */
1297 EXX_Basis_Index(ib1_ep, ib2_ep, ib3_ep, ib4_ep,
1298 &ib1, &ib2, &ib3, &ib4, j);
1299
1300 i = (((ib1*nb2+ib2)*nb3+ib3)*nb4+ib4)*nrn+irn;
1301 eri = eri_list[i];
1302
1303 for (spin=0; spin<=SpinP_switch; spin++) {
1304 den1.r = exx_CDM[spin][iep2[j]][ib3_ep][ib4_ep].r;
1305 den1.i = exx_CDM[spin][iep2[j]][ib3_ep][ib4_ep].i;
1306 den2.r = exx_CDM[spin][iep1[j]][ib1_ep][ib2_ep].r;
1307 den2.i = exx_CDM[spin][iep1[j]][ib1_ep][ib2_ep].i;
1308 local_Ux[spin] +=
1309 -0.5*w*eri*(den1.r*den1.r - den2.i*den2.i);
1310 }
1311 }
1312 }
1313 }
1314 }
1315
1316 } /* loop of j */
1317 } /* loop of irn */
1318
1319 MPI_Allreduce(local_Ux, local_Ux, 2, MPI_DOUBLE, MPI_SUM, comm);
1320
1321 free(eri_list);
1322 free(iRm);
1323 } /* loop of irn */
1324
1325 Ux[0] = local_Ux[0];
1326 Ux[1] = local_Ux[1];
1327 }
1328
1329
1330
1331
1332 /*----------------------------------------------------------------------
1333 EXX_Reduce_DM
1334 ----------------------------------------------------------------------*/
EXX_Reduce_DM(EXX_t * exx,dcomplex *** exx_DM)1335 void EXX_Reduce_DM(EXX_t *exx, dcomplex ***exx_DM)
1336 {
1337 int i, j, iatm1, iatm2, iep, ib, myrank, nproc;
1338 int nep, nb, nbmax, nbuf, nb1, nb2;
1339 const int *ep_atom1, *ep_atom2;
1340 double *buffer;
1341 MPI_Comm comm;
1342
1343 comm = g_exx_mpicomm;
1344
1345 nep = EXX_Number_of_EP(exx);
1346 ep_atom1 = EXX_Array_EP_Atom1(exx);
1347 ep_atom2 = EXX_Array_EP_Atom2(exx);
1348
1349 MPI_Comm_rank(comm, &myrank);
1350 MPI_Comm_size(comm, &nproc);
1351
1352 /* max number of basis */
1353 nbmax = 0;
1354 for (i=1; i<=atomnum; i++) {
1355 nb = Spe_Total_CNO[WhatSpecies[i]];
1356 if (nb>nbmax) { nbmax=nb; }
1357 }
1358
1359 /* 1d array */
1360 nbuf = nbmax*nbmax*2;
1361 buffer = (double*)malloc(sizeof(double)*nbuf);
1362
1363 for (iep=0; iep<nep; iep++) {
1364 iatm1 = ep_atom1[iep]+1;
1365 iatm2 = ep_atom2[iep]+1;
1366 nb1 = Spe_Total_CNO[WhatSpecies[iatm1]];
1367 nb2 = Spe_Total_CNO[WhatSpecies[iatm2]];
1368
1369 /* clear array */
1370 for (i=0; i<nbuf; i++) { buffer[i] = 0.0; }
1371
1372 /* put DM on array */
1373 for (i=0; i<nb1; i++){
1374 for (j=0; j<nb2; j++){
1375 ib = i*nbmax + j;
1376 buffer[2*ib+0] = exx_DM[iep][i][j].r;
1377 buffer[2*ib+1] = exx_DM[iep][i][j].i;
1378 }
1379 }
1380
1381 /* all-to-all communication */
1382 MPI_Allreduce(buffer, buffer, nbuf, MPI_DOUBLE, MPI_SUM, comm);
1383
1384 /* get back from array */
1385 for (i=0; i<nb1; i++){
1386 for (j=0; j<nb2; j++){
1387 ib = i*nbmax + j;
1388 exx_DM[iep][i][j].r = buffer[2*ib+0];
1389 exx_DM[iep][i][j].i = buffer[2*ib+1];
1390 }
1391 }
1392 } /* loop of iep */
1393
1394 free(buffer);
1395 }
1396
1397
1398
1399