1 /*----------------------------------------------------------------------
2   exx_interface_openmx.c:
3 
4   07/JAN/2010 Coded by M. Toyoda
5 ----------------------------------------------------------------------*/
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <math.h>
9 #include <time.h>
10 #include "openmx_common.h"
11 #include "exx.h"
12 #include "exx_log.h"
13 #include "exx_debug.h"
14 #include "exx_interface_openmx.h"
15 #include "exx_step1.h"
16 #include "exx_step2.h"
17 #include "exx_vector.h"
18 #include "exx_index.h"
19 #include "exx_file_eri.h"
20 #include "exx_rhox.h"
21 
22 #define PATHLEN 256
23 
24 static int g_step = 0;
25 
26 MPI_Comm g_exx_mpicomm = MPI_COMM_NULL;
27 
28 dcomplex *****g_exx_DM;
29 double g_exx_U[2];
30 int g_exx_switch;
31 int g_exx_switch_output_DM;
32 int g_exx_switch_rhox;
33 
34 static double time_step1;
35 static double time_step2;
36 
37 
38 /*----------------------------------------------------------------------
39   Initialization of EXX for OpenMX
40 ----------------------------------------------------------------------*/
EXX_on_OpenMX_Init(MPI_Comm comm)41 void EXX_on_OpenMX_Init(MPI_Comm comm)
42 {
43   int i, j, l, m, k, system_type, myrank, nproc;
44 
45   double pvec[9]; /* primitive translational vectors */
46   double v[3];
47   int natom, nspec;
48   double *atom_v, *spec_rc;
49   int *atom_sp, *spec_nb;
50   double w_scr, rc_cut;
51 
52   int nbmax;
53   int **spec_l, **spec_m, **spec_n, *spec_nmesh;
54   double ****pao_fr, **pao_xr;
55 
56   int lmax, mul, nmesh, nb, nmul, nm;
57 
58   int iep, nep, Gc_AN, Cwan, tno0, Gh_AN, Hwan, tno1;
59   const int *exx_atm1, *exx_atm2;
60 
61   double time0, time1;
62 
63   /* MPI information */
64   g_exx_mpicomm = comm;
65   MPI_Comm_size(g_exx_mpicomm, &nproc);
66   MPI_Comm_rank(g_exx_mpicomm, &myrank);
67 
68   /* start logging */
69   EXX_Log_Open();
70 
71 #ifdef EXX_USE_MPI
72   EXX_Log_Print("MPI MODE (NPROC= %d)\n", nproc);
73 #else
74   EXX_Log_Print("SERIAL MODE\n");
75 #endif
76   EXX_Log_Print("  MYRANK= %d\n", myrank);
77   EXX_Log_Print("\n");
78 
79   /* solver type */
80   switch (Solver) {
81   case 2: /* cluster */
82     EXX_Log_Print("SOLVER= CLUSTER\n");
83     system_type = EXX_SYSTEM_CLUSTER;
84     rc_cut = 0.0;
85     w_scr  = 0.0;
86     break;
87   case 3: /* band */
88     EXX_Log_Print("SOLVER= PERIODIC\n");
89     system_type = EXX_SYSTEM_PERIODIC;
90     rc_cut = 10.0 / BohrR; /* 10 Angstrom */
91     w_scr  = 0.0;
92     break;
93   default:
94     EXX_ERROR("EXX is available for 'CLUSTER' or 'BAND' solvers.");
95   }
96 
97   /* user values */
98   if (g_exx_rc_cut > 1e-10) { rc_cut = g_exx_rc_cut; }
99   if (g_exx_w_scr > 1e-10) { w_scr = g_exx_w_scr; }
100 
101   if (EXX_SYSTEM_PERIODIC==system_type && rc_cut <1e-10) {
102     EXX_ERROR("finite rc_cut is required for periodic mode.");
103   }
104 
105   /* swithces */
106   EXX_Log_Print("SKIPPING STEP1 = %d\n", g_exx_skip1);
107   EXX_Log_Print("SKIPPING STEP2 = %d\n", g_exx_skip2);
108   EXX_Log_Print("CACHDIR= %s\n", g_exx_cachedir);
109   EXX_Log_Print("LIBERI_LMAX= %d\n", g_exx_liberi_lmax);
110   EXX_Log_Print("LIBERI_NGRID= %d\n", g_exx_liberi_ngrid);
111   EXX_Log_Print("LIBERI_NGL= %d\n", g_exx_liberi_ngl);
112   EXX_Log_Print("OUTPUT DM= %d\n", g_exx_switch_output_DM);
113   EXX_Log_Print("\n");
114 
115   /* primitive translational vectors */
116   pvec[0] = tv[1][1]; pvec[1] = tv[1][2]; pvec[2] = tv[1][3];
117   pvec[3] = tv[2][1]; pvec[4] = tv[2][2]; pvec[5] = tv[2][3];
118   pvec[6] = tv[3][1]; pvec[7] = tv[3][2]; pvec[8] = tv[3][3];
119 
120   natom = atomnum;
121   nspec = SpeciesNum;
122 
123   /* allocation */
124   atom_v = (double*)malloc(sizeof(double)*3*natom);
125   atom_sp = (int*)malloc(sizeof(int)*natom);
126   spec_rc = (double*)malloc(sizeof(double)*nspec);
127   spec_nb = (int*)malloc(sizeof(int)*nspec);
128 
129   /* atomic positions */
130   for (i=0; i<natom; i++) {
131     v[0] = Gxyz[i+1][1];
132     v[1] = Gxyz[i+1][2];
133     v[2] = Gxyz[i+1][3];
134 
135     EXX_Vector_C2F(&atom_v[3*i], v, pvec);
136   }
137 
138   /* species */
139   for (i=0; i<natom; i++) { atom_sp[i] = WhatSpecies[i+1]; }
140 
141   /* cut-off radii */
142   for (i=0; i<nspec; i++) { spec_rc[i] = Spe_Atom_Cut1[i]; }
143 
144   /* number of basis */
145   nbmax = 0;
146   for (i=0; i<nspec; i++) {
147     spec_nb[i] = 0;
148     for (l=0; l<=Spe_MaxL_Basis[i]; l++) {
149       mul = Spe_Num_Basis[i][l];
150       spec_nb[i] += mul * (2*l+1);
151     }
152     if (nbmax<spec_nb[i]) { nbmax = spec_nb[i]; }
153   }
154 
155   /* PAO wavefunction */
156   spec_l = (int**)malloc(sizeof(int*)*nspec);
157   spec_m = (int**)malloc(sizeof(int*)*nspec);
158   spec_n = (int**)malloc(sizeof(int*)*nspec);
159   for (i=0; i<nspec; i++) {
160     spec_l[i] = (int*)malloc(sizeof(int)*nbmax);
161     spec_m[i] = (int*)malloc(sizeof(int)*nbmax);
162     spec_n[i] = (int*)malloc(sizeof(int)*nbmax);
163   }
164   spec_nmesh = (int*)malloc(sizeof(int)*nspec);
165   pao_fr = (double****)malloc(sizeof(double***)*nspec);
166   pao_xr = (double**)malloc(sizeof(double*)*nspec);
167   for (i=0; i<nspec; i++) {
168     lmax  = Spe_PAO_LMAX[i]+1;
169     mul   = Spe_PAO_Mul[i];
170     nmesh = Spe_Num_Mesh_PAO[i];
171     pao_fr[i] = (double***)malloc(sizeof(double**)*lmax);
172     for (l=0; l<lmax; l++) {
173       nb = mul*(2*l+1);
174       pao_fr[i][l] = (double**)malloc(sizeof(double*)*nb);
175       for (m=0; m<nb; m++) {
176         pao_fr[i][l][m] = (double*)malloc(sizeof(double)*nmesh);
177       }
178     }
179     pao_xr[i] = (double*)malloc(sizeof(double)*nmesh);
180   }
181 
182   /* rotating angular momentum quantum numbers */
183   for (i=0; i<nspec; i++) {
184     j = 0;
185     for (l=0; l<=Spe_MaxL_Basis[i]; l++) {
186       nmul = Spe_Num_Basis[i][l];
187       nm = 2*l+1;
188       for (mul=0; mul<nmul; mul++) {
189         for (m=0; m<nm; m++) {
190           spec_l[i][j] = l;
191           if (l==1) {
192             switch (m) {
193             case 0: spec_m[i][j] = +1; break;
194             case 1: spec_m[i][j] = -1; break;
195             case 2: spec_m[i][j] = 0; break;
196             default:
197               EXX_ERROR("");
198             }
199           } else if (l==2) {
200             switch (m) {
201             case 0: spec_m[i][j] = 0; break;
202             case 1: spec_m[i][j] = +2; break;
203             case 2: spec_m[i][j] = -2; break;
204             case 3: spec_m[i][j] = +1; break;
205             case 4: spec_m[i][j] = -1; break;
206             default:
207               EXX_ERROR("");
208             }
209           } else {
210             if (m==0) {
211               spec_m[i][j] = 0;
212             } else {
213               if (0==m%2) {
214                 spec_m[i][j] = -(m+1)/2;
215               } else {
216                 spec_m[i][j] = +(m+1)/2;
217               }
218             }
219           }
220           spec_n[i][j] = mul;
221           j++;
222         }
223       }
224     }
225     spec_nmesh[i] = Spe_Num_Mesh_PAO[i];
226 
227     lmax  = Spe_PAO_LMAX[i]+1;
228     mul   = Spe_PAO_Mul[i];
229     nmesh = Spe_Num_Mesh_PAO[i];
230 
231     for (l=0; l<lmax; l++) {
232       for (m=0; m<mul; m++) {
233         for (k=0; k<nmesh; k++) {
234           pao_fr[i][l][m][k] = Spe_PAO_RWF[i][l][m][k];
235         }
236       }
237     }
238 
239     for (k=0; k<nmesh; k++) {
240       pao_xr[i][k] = Spe_PAO_RV[i][k];
241     }
242   }
243 
244   /* logging */
245   EXX_Log_Print("Species:\n");
246   for (i=0; i<nspec; i++) {
247     for (j=0; j<spec_nb[i]; j++) {
248       EXX_Log_Print("  %3d, %3d : %3d %3d %3d\n",
249         i, j, spec_l[i][j], spec_m[i][j], spec_n[i][j]);
250     }
251   }
252   EXX_Log_Print("\n");
253 
254   MPI_Barrier(g_exx_mpicomm);
255 
256   g_exx = EXX_New(natom, atom_v, atom_sp, nspec, spec_rc, spec_nb,
257                 pvec, w_scr, rc_cut, system_type, g_exx_cachedir);
258 
259   if (Host_ID==myrank) { printf("<EXX_Init> EXX Initialized\n"); }
260 
261   EXX_Log_Print("EXX Initiatlized:\n");
262   EXX_Log_Print("  natom= %d\n", EXX_natom(g_exx));
263   EXX_Log_Print("  nbmax= %d\n", EXX_nbmax(g_exx));
264   EXX_Log_Print("\n");
265 
266   EXX_Log_Print("Atoms:\n");
267   EXX_Log_Print("   #  : X         Y         Z         RC        NB\n");
268   for (i=0; i<EXX_natom(g_exx); i++) {
269     EXX_Log_Print("  %3d : %8.4f  %8.4f  %8.4f  %8.4f  %2d\n",
270       i, EXX_atom_v(g_exx)[3*i+0], EXX_atom_v(g_exx)[3*i+1],
271       EXX_atom_v(g_exx)[3*i+2], EXX_atom_rc(g_exx)[i],
272       EXX_atom_nb(g_exx)[i]);
273   }
274   EXX_Log_Print("\n");
275 
276   EXX_Log_Print("Translational Vectors:\n");
277   EXX_Log_Print("  a= %8.4f  %8.4f  %8.4f\n",
278     EXX_pvec(g_exx)[0], EXX_pvec(g_exx)[1], EXX_pvec(g_exx)[2]);
279   EXX_Log_Print("  b= %8.4f  %8.4f  %8.4f\n",
280     EXX_pvec(g_exx)[3], EXX_pvec(g_exx)[4], EXX_pvec(g_exx)[5]);
281   EXX_Log_Print("  c= %8.4f  %8.4f  %8.4f\n",
282     EXX_pvec(g_exx)[6], EXX_pvec(g_exx)[7], EXX_pvec(g_exx)[8]);
283   EXX_Log_Print("  w_scr= %8.4f\n", EXX_w_scr(g_exx));
284   EXX_Log_Print("  rc_cut= %8.4f\n", EXX_rc_cut(g_exx));
285   EXX_Log_Print("\n");
286 
287   EXX_Log_Print("Overlapping Pairs:\n");
288   EXX_Log_Print("  nshell_op= %d\n", EXX_Number_of_OP_Shells(g_exx));
289   EXX_Log_Print("  nop= %d\n", EXX_Number_of_OP(g_exx));
290   EXX_Log_Print("   #  : A1  A2  CELL \n");
291   for (i=0; i<EXX_Number_of_OP(g_exx); i++) {
292     EXX_Log_Print("  %3d : %3d %3d %5d\n",
293       i, EXX_Array_OP_Atom1(g_exx)[i], EXX_Array_OP_Atom2(g_exx)[i],
294       EXX_Array_OP_Cell(g_exx)[i]);
295   }
296   EXX_Log_Print("\n");
297 
298   EXX_Log_Print("Exchanging Pairs:\n");
299   EXX_Log_Print("  nshell_ep= %d\n", EXX_Number_of_EP_Shells(g_exx));
300   EXX_Log_Print("  nep= %d\n", EXX_Number_of_EP(g_exx));
301   EXX_Log_Print("   #  : A1  A2  CELL \n");
302   for (i=0; i<EXX_Number_of_EP(g_exx); i++) {
303     EXX_Log_Print("  %3d : %3d %3d %5d\n",
304       i, EXX_Array_EP_Atom1(g_exx)[i], EXX_Array_EP_Atom2(g_exx)[i],
305       EXX_Array_EP_Cell(g_exx)[i]);
306   }
307   EXX_Log_Print("\n");
308 
309   /* DM */
310   exx_atm1 = EXX_Array_EP_Atom1(g_exx);
311   exx_atm2 = EXX_Array_EP_Atom2(g_exx);
312   nep      = EXX_Number_of_EP(g_exx);
313 
314   g_exx_DM = (dcomplex*****)malloc(sizeof(dcomplex****)*List_YOUSO[16]);
315   for (m=0; m<List_YOUSO[16]; m++){
316     g_exx_DM[m] = (dcomplex****)malloc(sizeof(dcomplex***)*(SpinP_switch+1));
317     for (k=0; k<=SpinP_switch; k++){
318       g_exx_DM[m][k] = (dcomplex***)malloc(sizeof(dcomplex**)*(nep));
319       for (iep=0; iep<nep; iep++) {
320         Gc_AN = exx_atm1[iep]+1;
321         Cwan = WhatSpecies[Gc_AN];
322         tno0 = Spe_Total_NO[Cwan];
323 
324 	Gh_AN = exx_atm2[iep]+1;
325 	Hwan = WhatSpecies[Gh_AN];
326 	tno1 = Spe_Total_NO[Hwan];
327 
328         g_exx_DM[m][k][iep] = (dcomplex**)malloc(sizeof(dcomplex*)*tno0);
329         for (i=0; i<tno0; i++){
330 	  g_exx_DM[m][k][iep][i] = (dcomplex*)malloc(sizeof(dcomplex)*tno1);
331   	  for (j=0; j<tno1; j++) {
332             g_exx_DM[m][k][iep][i][j].r = 0.0;
333             g_exx_DM[m][k][iep][i][j].i = 0.0;
334 	  }
335 	}
336       }
337     }
338   }
339 
340   /* STEP1: calculation of overlaps */
341 
342   dtime(&time0);
343   {
344     EXX_Step1(g_exx, natom, atom_sp, nspec,
345       spec_nb, spec_rc, spec_l, spec_m, spec_n,
346       pao_fr, pao_xr, spec_nmesh);
347   }
348   dtime(&time1);
349   time_step1 = time1 - time0;
350 
351   if (Host_ID==myrank) {
352     printf("<EXX_Init> Overlaps (NOP=%5d)\n", EXX_Number_of_OP(g_exx));
353   }
354 
355   /* STEP2: calculation of ERIs */
356 
357   dtime(&time0);
358   {
359     EXX_Step2(g_exx);
360   }
361   dtime(&time1);
362   time_step2 = time1 - time0;
363 
364   if (Host_ID==myrank) {
365     printf("<EXX_Init> ERIs (NEP=%10d)\n", EXX_Number_of_EP(g_exx));
366   }
367 
368   /* free memory */
369   free(atom_v);
370   free(atom_sp);
371   free(spec_rc);
372   free(spec_nb);
373 
374   for (i=0; i<nspec; i++) {
375     free(spec_l[i]);
376     free(spec_m[i]);
377     free(spec_n[i]);
378   }
379   free(spec_l);
380   free(spec_m);
381   free(spec_n);
382   free(spec_nmesh);
383 
384   for (i=0; i<nspec; i++) {
385     lmax  = Spe_PAO_LMAX[i]+1;
386     mul   = Spe_PAO_Mul[i];
387     for (l=0; l<lmax; l++) {
388       for (m=0; m<mul; m++) {
389         free(pao_fr[i][l][m]);
390       }
391       free(pao_fr[i][l]);
392     }
393     free(pao_fr[i]);
394     free(pao_xr[i]);
395   }
396   free(pao_fr);
397   free(pao_xr);
398 }
399 
400 
EXX_Time_Report(void)401 void EXX_Time_Report(void)
402 {
403   printf("  EXX_Step1         = %10.5f\n",time_step1);
404   printf("  EXX_Step2         = %10.5f\n",time_step2);
405 }
406 
407 
EXX_on_OpenMX_Free(void)408 void EXX_on_OpenMX_Free(void)
409 {
410   int m, k, iep, nep, i, Gc_AN, Cwan, tno0;
411   const int *exx_atm1;
412 
413   exx_atm1 = EXX_Array_EP_Atom1(g_exx);
414   nep      = EXX_Number_of_EP(g_exx);
415 
416   for (m=0; m<List_YOUSO[16]; m++){
417     for (k=0; k<=SpinP_switch; k++){
418       for (iep=0; iep<nep; iep++) {
419         Gc_AN = exx_atm1[iep]+1;
420         Cwan = WhatSpecies[Gc_AN];
421         tno0 = Spe_Total_NO[Cwan];
422         for (i=0; i<tno0; i++){
423 	  free(g_exx_DM[m][k][iep][i]);
424 	}
425         free(g_exx_DM[m][k][iep]);
426       }
427       free(g_exx_DM[m][k]);
428     }
429     free(g_exx_DM[m]);
430   }
431   free(g_exx_DM);
432 
433   EXX_Free(g_exx);
434   g_exx = NULL;
435 
436   EXX_Log_Close();
437 }
438 
439 
find_EP(EXX_t * exx,int ia1,int ia2,int r[3])440 static int find_EP(
441   EXX_t *exx,
442   int ia1,
443   int ia2,
444   int r[3]
445 )
446 {
447   int i, ic, nep, nshep;
448   const int *ep_atom1, *ep_atom2, *ep_cell;
449 
450   nep      = EXX_Number_of_EP(exx);
451   nshep    = EXX_Number_of_EP_Shells(exx);
452   ep_atom1 = EXX_Array_EP_Atom1(exx);
453   ep_atom2 = EXX_Array_EP_Atom2(exx);
454   ep_cell  = EXX_Array_EP_Cell(exx);
455 
456   ic = EXX_Index_XYZ2Cell(r, nshep);
457 
458   for (i=0; i<nep; i++) {
459     if (ia1==ep_atom1[i] && ia2==ep_atom2[i] && ic==ep_cell[i]) { break; }
460   }
461 
462   if (i==nep) {
463     fprintf(stderr, "EP not found!\n");
464     fprintf(stderr, "  ia1= %d\n", ia1);
465     fprintf(stderr, "  ia2= %d\n", ia2);
466     fprintf(stderr, "  ic = %d ( %3d %3d %3d)\n", ic, r[0], r[1], r[2]);
467     abort();
468   }
469 
470   return i;
471 }
472 
473 
EXX_OP2EP_Band(EXX_t * exx,int iop1,int iop2,int icell,int * iep1,int * iep2,int * iRd,int * mul)474 static void EXX_OP2EP_Band(
475   EXX_t *exx,
476   int iop1,
477   int iop2,
478   int icell,
479   int *iep1, /* [8] */
480   int *iep2, /* [8] */
481   int *iRd,  /* [8] */
482   int *mul
483 )
484 {
485   int m, i, j, k, l, ic1, ic2;
486   int nshop, nshep;
487   int r1[3], r2[3], rd[3];
488   int ep_r1[3], ep_r2[3], ep_rd[3];
489   const int *op_atom1, *op_atom2, *op_cell;
490   int Rd_is_0, R1_is_0, R2_is_0;
491 
492   nshop    = EXX_Number_of_OP_Shells(exx);
493   nshep    = EXX_Number_of_EP_Shells(exx);
494 
495   op_atom1 = EXX_Array_OP_Atom1(exx);
496   op_atom2 = EXX_Array_OP_Atom2(exx);
497   op_cell  = EXX_Array_OP_Cell(exx);
498 
499   i = op_atom1[iop1];
500   j = op_atom2[iop1];
501   k = op_atom1[iop2];
502   l = op_atom2[iop2];
503   ic1 = op_cell[iop1];
504   ic2 = op_cell[iop2];
505 
506   EXX_Index_Cell2XYZ(ic1, nshop, r1);
507   EXX_Index_Cell2XYZ(ic2, nshop, r2);
508   EXX_Index_Cell2XYZ(icell, nshep, rd);
509 
510   /* 0: (i,k,Rd), (j,l,R2+Rd-R1), R1 */
511   for (m=0; m<3; m++) {
512     ep_r1[m] = rd[m];
513     ep_r2[m] = rd[m]+r2[m]-r1[m];
514     ep_rd[m] = r1[m];
515   }
516   iep1[0] = find_EP(exx, i, k, ep_r1);
517   iep2[0] = find_EP(exx, j, l, ep_r2);
518   iRd[0]  = EXX_Index_XYZ2Cell(ep_rd, nshep);
519 
520   /* 1: (j,k,Rd-R1), (i,l,R2+Rd), -R1 */
521   for (m=0; m<3; m++) {
522     ep_r1[m] = rd[m]-r1[m];
523     ep_r2[m] = rd[m]+r2[m];
524     ep_rd[m] = -r1[m];
525   }
526   iep1[1] = find_EP(exx, j, k, ep_r1);
527   iep2[1] = find_EP(exx, i, l, ep_r2);
528   iRd[1]  = EXX_Index_XYZ2Cell(ep_rd, nshep);
529 
530   /* 2: (i,l,R2+Rd), (j,k,Rd-R1), R1 */
531   for (m=0; m<3; m++) {
532     ep_r1[m] = rd[m]+r2[m];
533     ep_r2[m] = rd[m]-r1[m];
534     ep_rd[m] = r1[m];
535   }
536   iep1[2] = find_EP(exx, i, l, ep_r1);
537   iep2[2] = find_EP(exx, j, k, ep_r2);
538   iRd[2]  = EXX_Index_XYZ2Cell(ep_rd, nshep);
539 
540   /* 3: (j,l,R2+Rd-R1), (i,k,Rd), -R1 */
541   for (m=0; m<3; m++) {
542     ep_r1[m] = rd[m]+r2[m]-r1[m];
543     ep_r2[m] = rd[m];
544     ep_rd[m] = -r1[m];
545   }
546   iep1[3] = find_EP(exx, j, l, ep_r1);
547   iep2[3] = find_EP(exx, i, k, ep_r2);
548   iRd[3]  = EXX_Index_XYZ2Cell(ep_rd, nshep);
549 
550   /* 4: (k,i,-Rd), (l,j,R1-R2-Rd), R2 */
551   for (m=0; m<3; m++) {
552     ep_r1[m] = -rd[m];
553     ep_r2[m] = r1[m]-r2[m]-rd[m];
554     ep_rd[m] = r2[m];
555   }
556   iep1[4] = find_EP(exx, k, i, ep_r1);
557   iep2[4] = find_EP(exx, l, j, ep_r2);
558   iRd[4]  = EXX_Index_XYZ2Cell(ep_rd, nshep);
559 
560   /* 5: (k,j,R1-Rd), (l,i,-R2-Rd), R2 */
561   for (m=0; m<3; m++) {
562     ep_r1[m] = r1[m]-rd[m];
563     ep_r2[m] = -r2[m]-rd[m];
564     ep_rd[m] = r2[m];
565   }
566   iep1[5] = find_EP(exx, k, j, ep_r1);
567   iep2[5] = find_EP(exx, l, i, ep_r2);
568   iRd[5]  = EXX_Index_XYZ2Cell(ep_rd, nshep);
569 
570   /* 6: (l,i,-R2-Rd), (k,j,R1-Rd), -R2 */
571   for (m=0; m<3; m++) {
572     ep_r1[m] = -r2[m]-rd[m];
573     ep_r2[m] = r1[m]-rd[m];
574     ep_rd[m] = -r2[m];
575   }
576   iep1[6] = find_EP(exx, l, i, ep_r1);
577   iep2[6] = find_EP(exx, k, j, ep_r2);
578   iRd[6]  = EXX_Index_XYZ2Cell(ep_rd, nshep);
579 
580   /* 7: (l,j,R1-R2-Rd), (k,i,-Rd), -R2 */
581   for (m=0; m<3; m++) {
582     ep_r1[m] = r1[m]-r2[m]-rd[m];
583     ep_r2[m] = -rd[m];
584     ep_rd[m] = -r2[m];
585   }
586   iep1[7] = find_EP(exx, l, j, ep_r1);
587   iep2[7] = find_EP(exx, k, i, ep_r2);
588   iRd[7]  = EXX_Index_XYZ2Cell(ep_rd, nshep);
589 
590   /* multiplicity */
591   Rd_is_0 = (0==rd[0]) && (0==rd[1]) && (0==rd[2]);
592   R1_is_0 = (0==r1[0]) && (0==r1[1]) && (0==r1[2]);
593   R2_is_0 = (0==r2[0]) && (0==r2[1]) && (0==r2[2]);
594   if (Rd_is_0 && iop1==iop2) {
595     if (R1_is_0 && i==j) { *mul = 8; } else { *mul = 2; }
596   } else {
597     if (R1_is_0 && i==j) {
598       if (R2_is_0 && k==l) { *mul = 4; } else { *mul = 2; }
599     } else {
600       if (R2_is_0 && k==l) { *mul = 2; } else { *mul = 1; }
601     }
602   }
603 }
604 
605 
find_EP_cluster(EXX_t * exx,int ia1,int ia2)606 static int find_EP_cluster(
607   EXX_t *exx,
608   int ia1,
609   int ia2
610 )
611 {
612   int i, nep;
613   const int *ep_atom1, *ep_atom2;
614 
615   nep      = EXX_Number_of_EP(exx);
616   ep_atom1 = EXX_Array_EP_Atom1(exx);
617   ep_atom2 = EXX_Array_EP_Atom2(exx);
618 
619   for (i=0; i<nep; i++) {
620     if (ia1==ep_atom1[i] && ia2==ep_atom2[i]) { break; }
621   }
622 
623   if (i==nep) {
624     fprintf(stderr, "***Error at %s (%d)\n", __FILE__, __LINE__);
625     fprintf(stderr, "   EP not found!\n");
626     abort();
627   }
628 
629   return i;
630 }
631 
632 
633 
EXX_OP2EP_Cluster(EXX_t * exx,int iop1,int iop2,int * iep1,int * iep2,int * mul)634 static void EXX_OP2EP_Cluster(
635   EXX_t *exx,
636   int iop1,
637   int iop2,
638   int *iep1, /* [8] */
639   int *iep2, /* [8] */
640   int *mul
641 )
642 {
643   int i, j, k, l, nep;
644   const int *op_atom1, *op_atom2;
645 
646   nep      = EXX_Number_of_OP(exx);
647   op_atom1 = EXX_Array_OP_Atom1(exx);
648   op_atom2 = EXX_Array_OP_Atom2(exx);
649 
650   i = op_atom1[iop1];
651   j = op_atom2[iop1];
652   k = op_atom1[iop2];
653   l = op_atom2[iop2];
654 
655   /* 0: (i,k), (j,l) */
656   iep1[0] = find_EP_cluster(exx, i, k);
657   iep2[0] = find_EP_cluster(exx, j, l);
658 
659   /* 1: (j,k), (i,l) */
660   iep1[1] = find_EP_cluster(exx, j, k);
661   iep2[1] = find_EP_cluster(exx, i, l);
662 
663   /* 2: (i,l), (j,k) */
664   iep1[2] = find_EP_cluster(exx, i, l);
665   iep2[2] = find_EP_cluster(exx, j, k);
666 
667   /* 3: (j,l), (i,k) */
668   iep1[3] = find_EP_cluster(exx, j, l);
669   iep2[3] = find_EP_cluster(exx, i, k);
670 
671   /* 4: (k,i), (l,j) */
672   iep1[4] = find_EP_cluster(exx, k, i);
673   iep2[4] = find_EP_cluster(exx, l, j);
674 
675   /* 5: (k,j), (l,i) */
676   iep1[5] = find_EP_cluster(exx, k, j);
677   iep2[5] = find_EP_cluster(exx, l, i);
678 
679   /* 6: (l,i), (k,j) */
680   iep1[6] = find_EP_cluster(exx, l, i);
681   iep2[6] = find_EP_cluster(exx, k, j);
682 
683   /* 7: (l,j), (k,i) */
684   iep1[7] = find_EP_cluster(exx, l, j);
685   iep2[7] = find_EP_cluster(exx, k, i);
686 
687   /* multiplicity */
688   if (iop1==iop2) {
689     if (i==j) { *mul = 8; } else { *mul = 2; }
690   } else {
691     if (i==j) {
692       if (k==l) { *mul = 4;} else { *mul = 2; }
693     } else {
694       if (k==l) { *mul = 2;} else { *mul = 1; }
695     }
696   }
697 }
698 
699 
EXX_Basis_Index(int ib1,int ib2,int ib3,int ib4,int * ib1_op,int * ib2_op,int * ib3_op,int * ib4_op,int mul)700 static void EXX_Basis_Index(
701   int ib1,
702   int ib2,
703   int ib3,
704   int ib4,
705   int *ib1_op,
706   int *ib2_op,
707   int *ib3_op,
708   int *ib4_op,
709   int mul
710 )
711 {
712   switch (mul) {
713   case 0: *ib1_op = ib1; *ib2_op = ib3; *ib3_op = ib2; *ib4_op = ib4; break;
714   case 1: *ib1_op = ib3; *ib2_op = ib1; *ib3_op = ib2; *ib4_op = ib4; break;
715   case 2: *ib1_op = ib1; *ib2_op = ib3; *ib3_op = ib4; *ib4_op = ib2; break;
716   case 3: *ib1_op = ib3; *ib2_op = ib1; *ib3_op = ib4; *ib4_op = ib2; break;
717   case 4: *ib1_op = ib2; *ib2_op = ib4; *ib3_op = ib1; *ib4_op = ib3; break;
718   case 5: *ib1_op = ib4; *ib2_op = ib2; *ib3_op = ib1; *ib4_op = ib3; break;
719   case 6: *ib1_op = ib2; *ib2_op = ib4; *ib3_op = ib3; *ib4_op = ib1; break;
720   case 7: *ib1_op = ib4; *ib2_op = ib2; *ib3_op = ib3; *ib4_op = ib1; break;
721 #if 0
722   case 0: *ib1_op = ib1; *ib2_op = ib3; *ib3_op = ib2; *ib4_op = ib4; break;
723   case 1: *ib1_op = ib2; *ib2_op = ib3; *ib3_op = ib1; *ib4_op = ib4; break;
724   case 2: *ib1_op = ib1; *ib2_op = ib4; *ib3_op = ib2; *ib4_op = ib3; break;
725   case 3: *ib1_op = ib2; *ib2_op = ib4; *ib3_op = ib1; *ib4_op = ib3; break;
726   case 4: *ib1_op = ib3; *ib2_op = ib1; *ib3_op = ib4; *ib4_op = ib2; break;
727   case 5: *ib1_op = ib3; *ib2_op = ib2; *ib3_op = ib4; *ib4_op = ib1; break;
728   case 6: *ib1_op = ib4; *ib2_op = ib1; *ib3_op = ib3; *ib4_op = ib2; break;
729   case 7: *ib1_op = ib4; *ib2_op = ib2; *ib3_op = ib3; *ib4_op = ib1; break;
730 #endif
731   }
732 }
733 
734 
EXX_Fock_Cluster(double ** H,double * Ux,EXX_t * exx,dcomplex *** exx_CDM,const int * MP)735 void EXX_Fock_Cluster(
736   double **H,
737   double *Ux,
738   EXX_t *exx,
739   dcomplex ***exx_CDM,
740   const int *MP
741 )
742 {
743   double *local_H;
744   /* double *buffer_H;*/
745   int H_n;
746   int i, j, n, ir, nr, irn, nep;
747 
748   int iop1, iop2, nb1, nb2, nb3, nb4, nrn;
749   int ia1, ia2, ia3, ia4, ib1, ib2, ib3, ib4;
750   int ib1_ep, ib2_ep, ib3_ep, ib4_ep;
751   int nb1_ep, nb2_ep, nb3_ep, nb4_ep;
752   const int *ep_atom1, *ep_atom2, *atom_nb;
753   const int *op_atom1, *op_atom2;
754 
755   int iep1[8], iep2[8], mul;
756   double w;
757 
758   int GA_AN, wanA, tnoA, Anum, GB_AN, wanB, tnoB, Bnum;
759 
760   double *eri;
761   int     neri;
762   double sum, den;
763 
764   int nproc, myid, iproc;
765   MPI_Status stat;
766 
767   double local_Ux;
768   /*double buffer_Ux;*/
769 
770   MPI_Comm_size(g_exx_mpicomm, &nproc);
771   MPI_Comm_rank(g_exx_mpicomm, &myid);
772 
773   op_atom1 = EXX_Array_OP_Atom1(exx);
774   op_atom2 = EXX_Array_OP_Atom2(exx);
775   ep_atom1 = EXX_Array_EP_Atom1(exx);
776   ep_atom2 = EXX_Array_EP_Atom2(exx);
777   atom_nb  = EXX_atom_nb(exx);
778 
779   /* matrix size */
780   H_n = 0;
781   for (i=1; i<=atomnum; i++){
782     wanA  = WhatSpecies[i];
783     H_n += Spe_Total_CNO[wanA];
784   }
785 
786   /* allocation */
787   local_H = (double*)malloc(sizeof(double)*H_n*H_n);
788   /*buffer_H = (double*)malloc(sizeof(double)*H_n*H_n);*/
789   for (i=0; i<H_n*H_n; i++) { local_H[i] = 0.0; }
790 
791   nr = EXX_File_ERI_Read_NRecord(exx);
792   local_Ux = 0.0;
793 
794   for (ir=0; ir<nr; ir++) {
795     EXX_File_ERI_Read_Data_Head(exx, ir, &iop1, &iop2,
796       &nb1, &nb2, &nb3, &nb4, &nrn);
797 
798     /*EXX_Log_Print("ir= %2d  iop1= %2d  iop2= %2d  ", ir, iop1, iop2);*/
799     /*EXX_Log_Print("atom= %1d %1d %1d %1d\n", */
800     /*  op_atom1[iop1], op_atom2[iop1], op_atom1[iop2], op_atom2[iop2]);*/
801 
802     if (nrn != 1) {
803       fprintf(stderr, "***Error at %s (%d)\n", __FILE__, __LINE__);
804       fprintf(stderr, "   file is broken.\n");
805       abort();
806     }
807 
808     neri = nb1*nb2*nb3*nb4*nrn;
809     eri = (double*)malloc(sizeof(double)*neri);
810     EXX_File_ERI_Read(exx, ir, eri, NULL, iop1, iop2, nrn, neri);
811 
812 
813 #if 0
814     if (0==g_step) {
815       EXX_Log_Print("step3: ir= %d\n", ir);
816 
817       for (ib1=0; ib1<nb1; ib1++) {
818         for (ib2=0; ib2<nb2; ib2++) {
819           for (ib3=0; ib3<nb3; ib3++) {
820             for (ib4=0; ib4<nb4; ib4++) {
821               for (irn=0; irn<nrn; irn++) {
822                 EXX_Log_Print("IB1= %2d  IB2= %2d  ", ib1, ib2);
823                 EXX_Log_Print("IB3= %2d  IB4= %2d  ", ib3, ib4);
824                 EXX_Log_Print("IRN= %3d  ", irn);
825                 i = (((ib1*nb2+ib2)*nb3+ib3)*nb4+ib4)*nrn+irn;
826                 EXX_Log_Print("  ERI= %10.6f\n", eri[i]);
827               }
828             }
829           }
830         }
831       }
832     }
833 #endif
834 
835     EXX_OP2EP_Cluster(exx, iop1, iop2, iep1, iep2, &mul);
836     /*if (0==SpinP_switch) { mul *= 2; }*/
837     w = 1.0/(double)mul;
838 
839     for (j=0; j<8; j++) {
840       ia1 = ep_atom1[iep1[j]]; /* i */
841       ia2 = ep_atom2[iep1[j]]; /* j */
842       ia3 = ep_atom1[iep2[j]]; /* k */
843       ia4 = ep_atom2[iep2[j]]; /* l */
844       /*EXX_Log_Print("  j=%1d  iep1= %2d  iep2= %2d  ", j, iep1[j], iep2[j]);*/
845       /*EXX_Log_Print("atom= %1d %1d %1d %1d\n", ia1, ia2, ia3, ia4);*/
846 
847       nb1_ep = atom_nb[ia1];
848       nb2_ep = atom_nb[ia2];
849       nb3_ep = atom_nb[ia3];
850       nb4_ep = atom_nb[ia4];
851 
852       /* EXX_Log_Print("  nb= %2d %2d %2d %2d\n", nb1_ep, nb2_ep, nb3_ep, nb4_ep);*/
853 
854       GA_AN = ia1+1;
855       wanA = WhatSpecies[GA_AN];
856       tnoA = Spe_Total_CNO[wanA];
857       Anum = MP[GA_AN];
858 
859       GB_AN = ia2+1;
860       wanB = WhatSpecies[GB_AN];
861       tnoB = Spe_Total_CNO[wanB];
862       Bnum = MP[GB_AN];
863 
864       for (ib1_ep=0; ib1_ep<nb1_ep; ib1_ep++) {
865         for (ib2_ep=0; ib2_ep<nb2_ep; ib2_ep++) {
866           for (ib3_ep=0; ib3_ep<nb3_ep; ib3_ep++) {
867             for (ib4_ep=0; ib4_ep<nb4_ep; ib4_ep++) {
868               EXX_Basis_Index(ib1_ep, ib2_ep, ib3_ep, ib4_ep,
869                 &ib1, &ib2, &ib3, &ib4, j);
870               /*EXX_Log_Print("  basis(OP)= %2d %2d %2d %2d",*/
871               /*  ib1, ib2, ib3, ib4);*/
872               /*EXX_Log_Print("  basis(EP)= %2d %2d %2d %2d\n",*/
873               /*  ib1_ep, ib2_ep, ib3_ep, ib4_ep);*/
874               i = (((ib1*nb2+ib2)*nb3+ib3)*nb4+ib4)*nrn;
875               den = exx_CDM[iep2[j]][ib3_ep][ib4_ep].r;
876               /*if (den<0.0) { den = 0.0; }*/
877               /*if (den>1.0) { den = 1.0; }*/
878               sum = eri[i] * den;
879               /*i = (Anum+ib1-1)*H_n + (Bnum+ib2-1);*/
880               i = (Anum+ib1_ep-1)*H_n + (Bnum+ib2_ep-1);
881               local_H[i] += -w*sum;
882               den = exx_CDM[iep1[j]][ib1_ep][ib2_ep].r;
883               /*if (den<0.0) { den = 0.0; }*/
884               /*if (den>1.0) { den = 1.0; }*/
885               local_Ux += -0.5*w*sum*den;
886             }
887           }
888           /*local_H[i] += -0.5*w*sum;*/
889           /*local_Ux += -0.5*w*sum*exx_CDM[iep1[j]][ib1_ep][ib2_ep];*/
890 	}
891       }
892     } /* loop of j */
893 
894     free(eri);
895   }
896 
897 
898 #if 0
899   /* gather data */
900   if (EXX_ROOT_RANK==myid) {
901     for (iproc=0; iproc<nproc; iproc++) {
902       if (EXX_ROOT_RANK==iproc) { continue; }
903       /* Fock matrix */
904       MPI_Recv(buffer_H, H_n*H_n, MPI_DOUBLE, iproc, 0, g_exx_mpicomm, &stat);
905       for (i=0; i<H_n*H_n; i++) { local_H[i] += buffer_H[i]; }
906       /* energy */
907       MPI_Recv(&buffer_Ux, 1, MPI_DOUBLE, iproc, 1, g_exx_mpicomm, &stat);
908       local_Ux += buffer_Ux;
909     } /* iproc */
910   } else {
911     MPI_Send(local_H, H_n*H_n, MPI_DOUBLE, EXX_ROOT_RANK, 0, g_exx_mpicomm);
912     MPI_Send(&local_Ux, 1, MPI_DOUBLE, EXX_ROOT_RANK, 1, g_exx_mpicomm);
913   }
914 #endif
915   /* all-to-all communication */
916   MPI_Allreduce(local_H, local_H, H_n*H_n, MPI_DOUBLE,
917     MPI_SUM, g_exx_mpicomm);
918   MPI_Allreduce(&local_Ux, &local_Ux, 1, MPI_DOUBLE,
919     MPI_SUM, g_exx_mpicomm);
920 
921 
922 #if 0
923 #if 0
924   if (2==g_step || 3==g_step) { EXX_Log_Print("Local_H:\n"); }
925 #endif
926 
927   for (GA_AN=1; GA_AN<=atomnum; GA_AN++) {
928     wanA = WhatSpecies[GA_AN];
929     tnoA = Spe_Total_CNO[wanA];
930     Anum = MP[GA_AN];
931     for (GB_AN=1; GB_AN<=atomnum; GB_AN++) {
932       wanB = WhatSpecies[GB_AN];
933       tnoB = Spe_Total_CNO[wanB];
934       Bnum = MP[GB_AN];
935 
936       for (ib1=0; ib1<tnoA; ib1++) {
937         for (ib2=0; ib2<tnoB; ib2++) {
938           i = (Anum+ib1-1)*H_n + (Bnum+ib2-1);
939           H[Anum+ib1][Bnum+ib2] += local_H[i];
940 #if 0
941           if (2==g_step || 3==g_step) {
942             EXX_Log_Print("  %2d-%2d  %2d-%2d  %15.8f\n",
943               GA_AN, ib1, GB_AN, ib2, local_H[i]);
944           }
945 #endif
946         }
947       }
948     }
949   }
950 #if 0
951   EXX_Log_Print("\n");
952 #endif
953 #endif
954   *Ux = local_Ux;
955 
956   g_step++;
957 
958   /* free*/
959   free(local_H);
960 }
961 
962 
EXX_Simple_Mixing_DM(EXX_t * exx,double mix_wgt,dcomplex *** exx_CDM,dcomplex *** exx_PDM,dcomplex *** exx_P2DM)963 void EXX_Simple_Mixing_DM(
964   EXX_t *exx,
965   double mix_wgt,
966   dcomplex ***exx_CDM,
967   dcomplex ***exx_PDM,
968   dcomplex ***exx_P2DM
969 )
970 {
971   int iep, nep, Gc_AN, Gh_AN, ian, jan, m, n;
972   const int *ep_atom1, *ep_atom2;
973   double w1, w2;
974 
975   nep = EXX_Number_of_EP(exx);
976   ep_atom1 = EXX_Array_EP_Atom1(exx);
977   ep_atom2 = EXX_Array_EP_Atom2(exx);
978 
979   w1 = mix_wgt;
980   w2 = 1.0 - mix_wgt;
981 
982 #if EXX_MIX_DM
983   for (iep=0; iep<nep; iep++) {
984     Gc_AN = ep_atom1[iep]+1;
985     ian = Spe_Total_CNO[WhatSpecies[Gc_AN]];
986     Gh_AN = ep_atom2[iep]+1;
987     jan = Spe_Total_CNO[WhatSpecies[Gh_AN]];
988     for (m=0; m<ian; m++){
989       for (n=0; n<jan; n++){
990         exx_CDM[iep][m][n].r = w1 * exx_CDM[iep][m][n].r
991  	                      + w2 * exx_PDM[iep][m][n].r;
992         exx_CDM[iep][m][n].i = w1 * exx_CDM[iep][m][n].i
993  	                      + w2 * exx_PDM[iep][m][n].i;
994         exx_P2DM[iep][m][n].r = exx_PDM[iep][m][n].r;
995         exx_P2DM[iep][m][n].i = exx_PDM[iep][m][n].i;
996         exx_PDM[iep][m][n].r  = exx_CDM[iep][m][n].r;
997         exx_PDM[iep][m][n].i  = exx_CDM[iep][m][n].i;
998       }
999     }
1000   } /* loop of iep */
1001 #endif
1002 }
1003 
1004 
1005 
1006 
1007 
EXX_Fock_Band(dcomplex ** H,EXX_t * exx,dcomplex **** exx_CDM,int * MP,double k1,double k2,double k3,int spin)1008 void EXX_Fock_Band(
1009   dcomplex **H,
1010   EXX_t *exx,
1011   dcomplex ****exx_CDM,
1012   int    *MP,
1013   double k1,
1014   double k2,
1015   double k3,
1016   int    spin
1017 )
1018 {
1019   double *local_H, *mpi_H;
1020   int H_n, nbuf;
1021   int i, j, n, ir, nr, irn, nep;
1022 
1023   int iop1, iop2, nb1, nb2, nb3, nb4, nrn;
1024   int ia1, ia2, ia3, ia4, ib1, ib2, ib3, ib4, icell1, icell2;
1025   int ib1_ep, ib2_ep, ib3_ep, ib4_ep;
1026   int nb1_ep, nb2_ep, nb3_ep, nb4_ep;
1027   const int *ep_atom1, *ep_atom2, *ep_cell, *atom_nb;
1028   const int *op_atom1, *op_atom2;
1029 
1030   int iep1[8], iep2[8], icell3[8], mul;
1031   double w;
1032 
1033   int GA_AN, Anum, GB_AN, Bnum, tnoA, tnoB;
1034 
1035   int     neri;
1036   double *eri_list;
1037   int *iRm;
1038 
1039   double eri;
1040   dcomplex den;
1041 
1042   int nproc, myid, iproc;
1043   MPI_Status stat;
1044 
1045   int ncd, nshell_ep;
1046   int iRn_x, iRn_y, iRn_z, iRmp_x, iRmp_y, iRmp_z;
1047   double kRn, kRmp, co1, si1, co2, si2;
1048 
1049   MPI_Comm comm;
1050   double *k1_list, *k2_list, *k3_list;
1051   int *spin_list;
1052 
1053   comm = g_exx_mpicomm;
1054 
1055   MPI_Comm_size(comm, &nproc);
1056   MPI_Comm_rank(comm, &myid);
1057 
1058   k1_list = (double*)malloc(sizeof(double)*nproc);
1059   k2_list = (double*)malloc(sizeof(double)*nproc);
1060   k3_list = (double*)malloc(sizeof(double)*nproc);
1061   spin_list = (int*)malloc(sizeof(int)*nproc);
1062 
1063   /* all-to-all */
1064   MPI_Allgather(&k1,   1, MPI_DOUBLE, k1_list,   1, MPI_DOUBLE, comm);
1065   MPI_Allgather(&k2,   1, MPI_DOUBLE, k2_list,   1, MPI_DOUBLE, comm);
1066   MPI_Allgather(&k3,   1, MPI_DOUBLE, k3_list,   1, MPI_DOUBLE, comm);
1067   MPI_Allgather(&spin, 1, MPI_INT,    spin_list, 1, MPI_INT,    comm);
1068 
1069   op_atom1 = EXX_Array_OP_Atom1(exx);
1070   op_atom2 = EXX_Array_OP_Atom2(exx);
1071   ep_atom1 = EXX_Array_EP_Atom1(exx);
1072   ep_atom2 = EXX_Array_EP_Atom2(exx);
1073   ep_cell  = EXX_Array_EP_Cell(exx);
1074   atom_nb  = EXX_atom_nb(exx);
1075   nshell_ep = EXX_Number_of_EP_Shells(exx);
1076 
1077   ncd = 2*nshell_ep+1;
1078 
1079   /* matrix size */
1080   H_n = 0;
1081   for (i=1; i<=atomnum; i++){ H_n += Spe_Total_CNO[WhatSpecies[i]]; }
1082 
1083   /* allocation */
1084   nbuf = H_n*H_n*2;
1085   local_H = (double*)malloc(sizeof(double)*nbuf);
1086   mpi_H   = (double*)malloc(sizeof(double)*nbuf);
1087 
1088   nr = EXX_File_ERI_Read_NRecord(exx);
1089 
1090   /* clear buffer */
1091   for (i=0; i<nbuf; i++) { local_H[i] = 0.0; }
1092 
1093   for (ir=0; ir<nr; ir++) {
1094     EXX_File_ERI_Read_Data_Head(exx, ir, &iop1, &iop2,
1095       &nb1, &nb2, &nb3, &nb4, &nrn);
1096 
1097     neri = nb1*nb2*nb3*nb4*nrn;
1098     eri_list = (double*)malloc(sizeof(double)*neri);
1099     iRm = (int*)malloc(sizeof(int)*nrn); /* Rm */
1100     EXX_File_ERI_Read(exx, ir, eri_list, iRm, iop1, iop2, nrn, neri);
1101 
1102     for (iproc=0; iproc<nproc; iproc++) {
1103       /* clear buffer */
1104       for (i=0; i<nbuf; i++) { mpi_H[i] = 0.0; }
1105 
1106       k1   = k1_list[iproc];
1107       k2   = k2_list[iproc];
1108       k3   = k3_list[iproc];
1109       spin = spin_list[iproc];
1110 
1111       for (irn=0; irn<nrn; irn++) {
1112         EXX_OP2EP_Band(exx, iop1, iop2, iRm[irn],
1113           &iep1[0], &iep2[0], &icell3[0], &mul);
1114         w = 1.0/(double)mul;
1115 
1116         for (j=0; j<8; j++) {
1117           ia1 = ep_atom1[iep1[j]]; /* i */
1118           ia2 = ep_atom2[iep1[j]]; /* j */
1119           ia3 = ep_atom1[iep2[j]]; /* k */
1120           ia4 = ep_atom2[iep2[j]]; /* l */
1121           icell1 = ep_cell[iep1[j]]; /* Rn */
1122           icell2 = ep_cell[iep2[j]]; /* Rm' */
1123 
1124           nb1_ep = atom_nb[ia1];
1125           nb2_ep = atom_nb[ia2];
1126           nb3_ep = atom_nb[ia3];
1127           nb4_ep = atom_nb[ia4];
1128 
1129           GA_AN = ia1+1;
1130           Anum = MP[GA_AN];
1131 
1132           GB_AN = ia2+1;
1133           Bnum = MP[GB_AN];
1134 
1135           /* phase */
1136           iRn_x = icell1%ncd - nshell_ep;
1137           iRn_y = (icell1/ncd)%ncd - nshell_ep;
1138           iRn_z = (icell1/ncd/ncd)%ncd - nshell_ep;
1139           kRn = k1*(double)iRn_x + k2*(double)iRn_y + k3*(double)iRn_z;
1140           si1 = sin(2.0*PI*kRn);
1141           co1 = cos(2.0*PI*kRn);
1142 
1143           iRmp_x = icell2%ncd - nshell_ep;
1144           iRmp_y = (icell2/ncd)%ncd - nshell_ep;
1145           iRmp_z = (icell2/ncd/ncd)%ncd - nshell_ep;
1146 
1147           kRmp = k1*(double)iRmp_x + k2*(double)iRmp_y + k3*(double)iRmp_z;
1148           si2 = sin(2.0*PI*kRmp);
1149           co2 = cos(2.0*PI*kRmp);
1150 
1151           for (ib1_ep=0; ib1_ep<nb1_ep; ib1_ep++) {
1152             for (ib2_ep=0; ib2_ep<nb2_ep; ib2_ep++) {
1153               for (ib3_ep=0; ib3_ep<nb3_ep; ib3_ep++) {
1154                 for (ib4_ep=0; ib4_ep<nb4_ep; ib4_ep++) {
1155                   EXX_Basis_Index(ib1_ep, ib2_ep, ib3_ep, ib4_ep,
1156                     &ib1, &ib2, &ib3, &ib4, j);
1157                   i = (((ib1*nb2+ib2)*nb3+ib3)*nb4+ib4)*nrn+irn;
1158                   eri = eri_list[i];
1159                   den.r = exx_CDM[spin][iep2[j]][ib3_ep][ib4_ep].r;
1160                   den.i = exx_CDM[spin][iep2[j]][ib3_ep][ib4_ep].i;
1161                   i = (Anum+ib1_ep-1)*H_n + (Bnum+ib2_ep-1);
1162                   mpi_H[2*i+0] += -w*eri*(co1*den.r - si1*den.i); /* real */
1163                   mpi_H[2*i+1] += -w*eri*(co1*den.i + si1*den.r); /* imag */
1164                 }
1165               }
1166   	    }
1167           }
1168         } /* loop of j */
1169       } /* loop of irn */
1170 
1171       /* reduce to the iproc-th proc */
1172       MPI_Reduce(mpi_H, mpi_H, nbuf, MPI_DOUBLE, MPI_SUM, iproc, comm);
1173 
1174       if (myid==iproc) {
1175         for (i=0; i<nbuf; i++) { local_H[i] = mpi_H[i]; }
1176       }
1177     } /* loop of iproc */
1178 
1179     free(eri_list);
1180     free(iRm);
1181   } /* loop of irn */
1182 
1183 
1184 #if 1
1185   for (GA_AN=1; GA_AN<=atomnum; GA_AN++) {
1186     tnoA = Spe_Total_CNO[WhatSpecies[GA_AN]];
1187     Anum = MP[GA_AN];
1188     for (GB_AN=1; GB_AN<=atomnum; GB_AN++) {
1189       tnoB = Spe_Total_CNO[WhatSpecies[GB_AN]];
1190       Bnum = MP[GB_AN];
1191       for (ib1=0; ib1<tnoA; ib1++) {
1192         for (ib2=0; ib2<tnoB; ib2++) {
1193           i = (Anum+ib1-1)*H_n + (Bnum+ib2-1);
1194           H[Anum+ib1][Bnum+ib2].r += local_H[2*i+0];
1195           H[Anum+ib1][Bnum+ib2].i += local_H[2*i+1];
1196         }
1197       }
1198     }
1199   }
1200 #endif
1201 
1202   /* free*/
1203   free(local_H);
1204   free(mpi_H);
1205 }
1206 
1207 
1208 /*----------------------------------------------------------------------
1209   EXX_Energy_Band
1210 ----------------------------------------------------------------------*/
EXX_Energy_Band(double * Ux,EXX_t * exx,dcomplex **** exx_CDM,int * MP)1211 void EXX_Energy_Band(
1212   double *Ux,
1213   EXX_t *exx,
1214   dcomplex ****exx_CDM,
1215   int    *MP
1216 )
1217 {
1218   int myrank, nproc, iproc;
1219   int i, j, n, ir, nr, irn, nep, spin;
1220 
1221   int GA_AN,Anum, GB_AN, Bnum;
1222   int iop1, iop2, nb1, nb2, nb3, nb4, nrn;
1223   int iatm1, iatm2, iatm3, iatm4, ib1, ib2, ib3, ib4, icell1, icell2;
1224   int ib1_ep, ib2_ep, ib3_ep, ib4_ep;
1225   int nb1_ep, nb2_ep, nb3_ep, nb4_ep;
1226   const int *ep_atom1, *ep_atom2, *ep_cell, *atom_nb;
1227   const int *op_atom1, *op_atom2;
1228   int iep1[8], iep2[8], icell3[8], mul;
1229   double w;
1230 
1231   int neri;
1232   double *eri_list;
1233   double eri;
1234   int *iRm;
1235   dcomplex den1, den2;
1236 
1237   MPI_Status stat;
1238 
1239   double local_Ux[2];
1240 
1241   MPI_Comm comm;
1242 
1243   /* MPI information */
1244   comm = g_exx_mpicomm;
1245   MPI_Comm_size(comm, &nproc);
1246   MPI_Comm_rank(comm, &myrank);
1247 
1248   /* Index information */
1249   op_atom1 = EXX_Array_OP_Atom1(exx);
1250   op_atom2 = EXX_Array_OP_Atom2(exx);
1251   ep_atom1 = EXX_Array_EP_Atom1(exx);
1252   ep_atom2 = EXX_Array_EP_Atom2(exx);
1253   ep_cell  = EXX_Array_EP_Cell(exx);
1254   atom_nb  = EXX_atom_nb(exx);
1255 
1256   /* saved datapath */
1257   nr = EXX_File_ERI_Read_NRecord(exx);
1258 
1259   local_Ux[0] = 0.0;
1260   local_Ux[1] = 0.0;
1261 
1262   for (ir=0; ir<nr; ir++) {
1263     /* read data */
1264     EXX_File_ERI_Read_Data_Head(exx, ir, &iop1, &iop2,
1265       &nb1, &nb2, &nb3, &nb4, &nrn);
1266     neri = nb1*nb2*nb3*nb4*nrn;
1267     eri_list = (double*)malloc(sizeof(double)*neri);
1268     iRm = (int*)malloc(sizeof(int)*nrn); /* Rm */
1269     EXX_File_ERI_Read(exx, ir, eri_list, iRm, iop1, iop2, nrn, neri);
1270 
1271     for (irn=0; irn<nrn; irn++) {
1272       EXX_OP2EP_Band(exx, iop1, iop2, iRm[irn],
1273         &iep1[0], &iep2[0], &icell3[0], &mul);
1274       w = 1.0/(double)mul;
1275 
1276       for (j=0; j<8; j++) {
1277         iatm1  = ep_atom1[iep1[j]]; /* i */
1278         iatm2  = ep_atom2[iep1[j]]; /* j */
1279         iatm3  = ep_atom1[iep2[j]]; /* k */
1280         iatm4  = ep_atom2[iep2[j]]; /* l */
1281         icell1 = ep_cell[iep1[j]];  /* Rn */
1282         icell2 = ep_cell[iep2[j]];  /* Rm' */
1283 
1284         nb1_ep = atom_nb[iatm1];
1285         nb2_ep = atom_nb[iatm2];
1286         nb3_ep = atom_nb[iatm3];
1287         nb4_ep = atom_nb[iatm4];
1288 
1289         Anum = MP[iatm1+1];
1290         Bnum = MP[iatm2+1];
1291 
1292         for (ib1_ep=0; ib1_ep<nb1_ep; ib1_ep++) {
1293           for (ib2_ep=0; ib2_ep<nb2_ep; ib2_ep++) {
1294             for (ib3_ep=0; ib3_ep<nb3_ep; ib3_ep++) {
1295               for (ib4_ep=0; ib4_ep<nb4_ep; ib4_ep++) {
1296                 /* rotate basis index */
1297                 EXX_Basis_Index(ib1_ep, ib2_ep, ib3_ep, ib4_ep,
1298                   &ib1, &ib2, &ib3, &ib4, j);
1299 
1300                 i = (((ib1*nb2+ib2)*nb3+ib3)*nb4+ib4)*nrn+irn;
1301                 eri = eri_list[i];
1302 
1303                 for (spin=0; spin<=SpinP_switch; spin++) {
1304                   den1.r = exx_CDM[spin][iep2[j]][ib3_ep][ib4_ep].r;
1305                   den1.i = exx_CDM[spin][iep2[j]][ib3_ep][ib4_ep].i;
1306                   den2.r = exx_CDM[spin][iep1[j]][ib1_ep][ib2_ep].r;
1307                   den2.i = exx_CDM[spin][iep1[j]][ib1_ep][ib2_ep].i;
1308                   local_Ux[spin] +=
1309                     -0.5*w*eri*(den1.r*den1.r - den2.i*den2.i);
1310                 }
1311               }
1312             }
1313           }
1314         }
1315 
1316       } /* loop of j */
1317     } /* loop of irn */
1318 
1319     MPI_Allreduce(local_Ux, local_Ux, 2, MPI_DOUBLE, MPI_SUM, comm);
1320 
1321     free(eri_list);
1322     free(iRm);
1323   } /* loop of irn */
1324 
1325   Ux[0] = local_Ux[0];
1326   Ux[1] = local_Ux[1];
1327 }
1328 
1329 
1330 
1331 
1332 /*----------------------------------------------------------------------
1333   EXX_Reduce_DM
1334 ----------------------------------------------------------------------*/
EXX_Reduce_DM(EXX_t * exx,dcomplex *** exx_DM)1335 void EXX_Reduce_DM(EXX_t *exx, dcomplex ***exx_DM)
1336 {
1337   int i, j, iatm1, iatm2, iep, ib,  myrank, nproc;
1338   int nep, nb, nbmax, nbuf, nb1, nb2;
1339   const int *ep_atom1, *ep_atom2;
1340   double *buffer;
1341   MPI_Comm comm;
1342 
1343   comm = g_exx_mpicomm;
1344 
1345   nep = EXX_Number_of_EP(exx);
1346   ep_atom1 = EXX_Array_EP_Atom1(exx);
1347   ep_atom2 = EXX_Array_EP_Atom2(exx);
1348 
1349   MPI_Comm_rank(comm, &myrank);
1350   MPI_Comm_size(comm, &nproc);
1351 
1352   /* max number of basis */
1353   nbmax = 0;
1354   for (i=1; i<=atomnum; i++) {
1355     nb = Spe_Total_CNO[WhatSpecies[i]];
1356     if (nb>nbmax) { nbmax=nb; }
1357   }
1358 
1359   /* 1d array */
1360   nbuf = nbmax*nbmax*2;
1361   buffer = (double*)malloc(sizeof(double)*nbuf);
1362 
1363   for (iep=0; iep<nep; iep++) {
1364     iatm1 = ep_atom1[iep]+1;
1365     iatm2 = ep_atom2[iep]+1;
1366     nb1 = Spe_Total_CNO[WhatSpecies[iatm1]];
1367     nb2 = Spe_Total_CNO[WhatSpecies[iatm2]];
1368 
1369     /* clear array */
1370     for (i=0; i<nbuf; i++) { buffer[i] = 0.0; }
1371 
1372     /* put DM on array */
1373     for (i=0; i<nb1; i++){
1374       for (j=0; j<nb2; j++){
1375         ib = i*nbmax + j;
1376         buffer[2*ib+0] = exx_DM[iep][i][j].r;
1377         buffer[2*ib+1] = exx_DM[iep][i][j].i;
1378       }
1379     }
1380 
1381     /* all-to-all communication */
1382     MPI_Allreduce(buffer, buffer, nbuf, MPI_DOUBLE, MPI_SUM, comm);
1383 
1384     /* get back from array */
1385     for (i=0; i<nb1; i++){
1386       for (j=0; j<nb2; j++){
1387         ib = i*nbmax + j;
1388         exx_DM[iep][i][j].r = buffer[2*ib+0];
1389         exx_DM[iep][i][j].i = buffer[2*ib+1];
1390       }
1391     }
1392   } /* loop of iep */
1393 
1394   free(buffer);
1395 }
1396 
1397 
1398 
1399