1 /**********************************************************************
2   TRAN_DFT_Dosout.c:
3 
4   TRAN_DFT_Dosout.c is a subroutine to calculate density of states
5   of a central region with left and right infinite leads based on
6   a non-equilibrium Green's function method.
7 
8   Log of TRAN_DFT_Dosout.c:
9 
10      24/July/2008  Released by T.Ozaki
11 
12 ***********************************************************************/
13 /* revised by Y. Xiao for Noncollinear NEGF calculations */
14 
15 #define MEASURE_TIME  0
16 
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <math.h>
20 #include <time.h>
21 #include <mpi.h>
22 #include "tran_prototypes.h"
23 #include "tran_variables.h"
24 
25 
26 void dtime(double *);
27 
28 int Get_OneD_HS_Col(int set_flag, double ****RH, double *H1, int *MP,
29                     int *order_GA, int *My_NZeros, int *is1, int *is2);
30 
31 void Make_Comm_Worlds(
32    MPI_Comm MPI_Curret_Comm_WD,
33    int myid0,
34    int numprocs0,
35    int Num_Comm_World,
36    int *myworld1,
37    MPI_Comm *MPI_CommWD,     /* size: Num_Comm_World */
38    int *NPROCS1_ID,          /* size: numprocs0 */
39    int *Comm_World1,         /* size: numprocs0 */
40    int *NPROCS1_WD,          /* size: Num_Comm_World */
41    int *Comm_World_StartID   /* size: Num_Comm_World */
42    );
43 
44 
45 
46 
TRAN_DFT_Kdependent_NC(MPI_Comm comm1,int parallel_mode,int numprocs,int myid,int level_stdout,int iter,int SpinP_switch,double k2,double k3,int k_op,int * order_GA,double ** H1,double ** H2,double * S1,double ***** nh,double ***** ImNL,double **** CntOLP,int atomnum,int Matomnum,int * WhatSpecies,int * Spe_Total_CNO,int * FNAN,int ** natn,int ** ncn,int * M2G,int * G2ID,int ** atv_ijk,int * List_YOUSO,float *** Dos,double ***** EDM,double Eele0[2],double Eele1[2])47 static void TRAN_DFT_Kdependent_NC(
48 			  /* input */
49 			  MPI_Comm comm1,
50                           int parallel_mode,
51                           int numprocs,
52                           int myid,
53 			  int level_stdout,
54 			  int iter,
55 			  int SpinP_switch,
56                           double k2,
57                           double k3,
58                           int k_op,
59                           int *order_GA,
60                           double **H1,
61                           double **H2,
62                           double *S1,
63 			  double *****nh,  /* H */
64 			  double *****ImNL, /* not used, s-o coupling */
65 			  double ****CntOLP,
66 			  int atomnum,
67 			  int Matomnum,
68 			  int *WhatSpecies,
69 			  int *Spe_Total_CNO,
70 			  int *FNAN,
71 			  int **natn,
72 			  int **ncn,
73 			  int *M2G,
74 			  int *G2ID,
75 			  int **atv_ijk,
76 			  int *List_YOUSO,
77 			  /* output */
78 			  float ***Dos,    /* output, DOS */
79 			  double *****EDM,  /* not used */
80 			  double Eele0[2], double Eele1[2]) /* not used */
81 
82 
83 {
84   int i,j,k,q,AN,iside;
85   int *MP,*MP_e[2];
86   int iw,iw_method;
87   dcomplex w, w_weight;
88   dcomplex *GCR,*GCA;
89   dcomplex *GRL,*GRR,*SigmaL, *SigmaR;
90   dcomplex *GCL_R,*GCR_R,*GCL_A,*GCR_A;
91   dcomplex *v1;
92   double dum,sum,tmpr,tmpi;
93   double co,si,kRn;
94   double TStime,TEtime;
95   int MA_AN, GA_AN, wanA, tnoA, Anum;
96   int LB_AN, GB_AN, wanB, tnoB, Bnum;
97   int l1,l2,l3,Rn,direction;
98   int LB_AN_e,GB_AN_e,GA_AN_e,Rn_e;
99 
100   double sum1,sum2;
101 
102   int ID;
103   int **iwIdx, Miwmax, Miw,iw0;
104   double *r_energy,de;
105 
106   /* parallel setup */
107 
108   Miwmax = (tran_dos_energydiv)/numprocs+1;
109 
110   iwIdx=(int**)malloc(sizeof(int*)*numprocs);
111   for (i=0;i<numprocs;i++) {
112     iwIdx[i]=(int*)malloc(sizeof(int)*Miwmax);
113   }
114 
115   TRAN_Distribute_Node_Idx(0, tran_dos_energydiv-1, numprocs, Miwmax,
116                            iwIdx); /* output */
117 
118   /* set up energies where DOS is calculated */
119   r_energy = (double*)malloc(sizeof(double)*tran_dos_energydiv);
120 
121   de = (tran_dos_energyrange[1]-tran_dos_energyrange[0])/(double)tran_dos_energydiv;
122   for (i=0; i<tran_dos_energydiv; i++) {
123     r_energy[i] = tran_dos_energyrange[0] + de*(double)i + ChemP_e[0];
124   }
125 
126   /* setup MP */
127   MP = (int*)malloc(sizeof(int)*(atomnum+1));
128   TRAN_Set_MP(1, atomnum, WhatSpecies, Spe_Total_CNO, &i, MP);
129   NUM_c  = i*2;
130 
131   MP_e[0] = (int*)malloc(sizeof(int)*(atomnum_e[0]+1));
132   TRAN_Set_MP( 1,  atomnum_e[0], WhatSpecies_e[0], Spe_Total_CNO_e[0], &i, MP_e[0]);
133   NUM_e[0]  = i*2;
134 
135   MP_e[1] = (int*)malloc(sizeof(int)*(atomnum_e[1]+1));
136   TRAN_Set_MP( 1,  atomnum_e[1], WhatSpecies_e[1], Spe_Total_CNO_e[1], &i, MP_e[1]);
137   NUM_e[1]  = i*2;
138 
139   /* initialize */
140   TRAN_Set_Value_double(SCC_nc,NUM_c*NUM_c,    0.0,0.0);
141   TRAN_Set_Value_double(SCL_nc,NUM_c*NUM_e[0], 0.0,0.0);
142   TRAN_Set_Value_double(SCR_nc,NUM_c*NUM_e[1], 0.0,0.0);
143 
144   TRAN_Set_Value_double(HCC_nc,NUM_c*NUM_c,    0.0,0.0);
145   TRAN_Set_Value_double(HCL_nc,NUM_c*NUM_e[0], 0.0,0.0);
146   TRAN_Set_Value_double(HCR_nc,NUM_c*NUM_e[1], 0.0,0.0);
147 
148 
149   /* set Hamiltonian and overlap matrices of left and right leads */
150 
151   TRAN_Set_SurfOverlap_NC(comm1,"left", k2, k3);
152   TRAN_Set_SurfOverlap_NC(comm1,"right",k2, k3);
153 
154   /* set CC, CL and CR */
155 
156   TRAN_Set_CentOverlap_NC(comm1,
157                           3,
158                           SpinP_switch,
159                           k2,
160                           k3,
161                           order_GA,
162                           H1,
163                           H2,
164                           S1,
165                           nh, /* input */
166                           CntOLP, /* input */
167                           atomnum,
168 			  Matomnum,
169 			  M2G,
170 			  G2ID,
171                           WhatSpecies,
172                           Spe_Total_CNO,
173                           FNAN,
174                           natn,
175                           ncn,
176                           atv_ijk);
177 
178   GCR    = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
179   GCA    = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
180   GRL    = (dcomplex*)malloc(sizeof(dcomplex)*NUM_e[0]* NUM_e[0]);
181   GRR    = (dcomplex*)malloc(sizeof(dcomplex)*NUM_e[1]* NUM_e[1]);
182   SigmaL = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
183   SigmaR = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
184   v1     = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
185   GCL_R  = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c*NUM_e[0]);
186   GCR_R  = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c*NUM_e[1]);
187   GCL_A  = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c*NUM_e[0]);
188   GCR_A  = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c*NUM_e[1]);
189 
190   /* parallel global iw 0: tran_dos_energydiv-1 */
191   /* parallel local  Miw 0:Miwmax-1             */
192   /* parllel variable iw=iwIdx[myid][Miw]       */
193 
194   for (Miw=0; Miw<Miwmax; Miw++) {
195 
196     iw = iwIdx[myid][Miw];
197 
198       if (iw>=0) {
199 
200         /**************************************
201                     w = w.r + i w.i
202         **************************************/
203 
204         w.r = r_energy[iw];
205         w.i = tran_dos_energyrange[2];
206 
207         iside = 0;
208 
209         TRAN_Calc_SurfGreen_direct(w, NUM_e[iside], H00_nc_e[iside], H01_nc_e[iside],
210 				   S00_nc_e[iside], S01_nc_e[iside], tran_surfgreen_iteration_max,
211                                    tran_surfgreen_eps, GRL);
212 
213         TRAN_Calc_SelfEnergy(w, NUM_e[iside], GRL, NUM_c, HCL_nc, SCL_nc, SigmaL);
214 
215         iside = 1;
216 
217         TRAN_Calc_SurfGreen_direct(w, NUM_e[iside], H00_nc_e[iside],H01_nc_e[iside],
218 				   S00_nc_e[iside], S01_nc_e[iside], tran_surfgreen_iteration_max,
219                                    tran_surfgreen_eps, GRR);
220 
221         TRAN_Calc_SelfEnergy(w, NUM_e[iside], GRR, NUM_c, HCR_nc, SCR_nc, SigmaR);
222 
223         TRAN_Calc_CentGreen(w, NUM_c, SigmaL,SigmaR, HCC_nc, SCC_nc, GCR);
224 
225         /* GCL_R and GCR_R */
226 
227         iside = 0;
228         TRAN_Calc_Hopping_G(w, NUM_e[iside], NUM_c, GRL, GCR, HCL_nc, SCL_nc, GCL_R);
229 
230         iside = 1;
231         TRAN_Calc_Hopping_G(w, NUM_e[iside], NUM_c, GRR, GCR, HCR_nc, SCR_nc, GCR_R);
232 
233         /**************************************
234                     w = w.r - i w.i
235         **************************************/
236 
237           w.r = r_energy[iw];
238           w.i =-tran_dos_energyrange[2];
239 
240 	  iside=0;
241 
242 	  TRAN_Calc_SurfGreen_direct(w, NUM_e[iside], H00_nc_e[iside],H01_nc_e[iside],
243 				     S00_nc_e[iside], S01_nc_e[iside], tran_surfgreen_iteration_max,
244 				     tran_surfgreen_eps, GRL);
245 
246 	  TRAN_Calc_SelfEnergy(w, NUM_e[iside], GRL, NUM_c, HCL_nc, SCL_nc, SigmaL);
247 
248 	  iside=1;
249 
250 	  TRAN_Calc_SurfGreen_direct(w, NUM_e[iside], H00_nc_e[iside],H01_nc_e[iside],
251 				     S00_nc_e[iside], S01_nc_e[iside], tran_surfgreen_iteration_max,
252 				     tran_surfgreen_eps, GRR);
253 
254 	  TRAN_Calc_SelfEnergy(w, NUM_e[iside], GRR, NUM_c, HCR_nc, SCR_nc, SigmaR);
255 
256 	  TRAN_Calc_CentGreen(w, NUM_c, SigmaL,SigmaR, HCC_nc, SCC_nc, GCA);
257 
258           /* GCL_R and GCR_R */
259 
260           iside = 0;
261           TRAN_Calc_Hopping_G(w, NUM_e[iside], NUM_c, GRL, GCA, HCL_nc, SCL_nc, GCL_A);
262 
263           iside = 1;
264           TRAN_Calc_Hopping_G(w, NUM_e[iside], NUM_c, GRR, GCA, HCR_nc, SCR_nc, GCR_A);
265 
266         /***********************************************
267           calculate density of states from the center G
268         ***********************************************/
269 
270         q = 0;
271 
272 	for (AN=1; AN<=atomnum; AN++) {
273 
274           GA_AN = order_GA[AN];
275 	  wanA = WhatSpecies[GA_AN];
276 	  tnoA = Spe_Total_CNO[wanA];
277 	  Anum = MP[GA_AN];
278 
279 	  for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
280 
281 	    GB_AN = natn[GA_AN][LB_AN];
282 	    Rn = ncn[GA_AN][LB_AN];
283 	    wanB = WhatSpecies[GB_AN];
284 	    tnoB = Spe_Total_CNO[wanB];
285 	    Bnum = MP[GB_AN];
286 
287 	    l1 = atv_ijk[Rn][1];
288 	    l2 = atv_ijk[Rn][2];
289 	    l3 = atv_ijk[Rn][3];
290 
291 	    kRn = -(k2*(double)l2 + k3*(double)l3);
292 	    si = sin(2.0*PI*kRn);
293 	    co = cos(2.0*PI*kRn);
294 
295 	    /* note that SCC includes information on the phase factor of translational cells */
296 
297 	      for (i=0; i<tnoA; i++) {
298 
299   	        sum1 = 0.0;
300                 sum2 = 0.0;
301 		for (j=0; j<tnoB; j++) {
302                   /*
303                   tmpr =-0.5*(GCR[ v_idx( Anum+i, Bnum+j ) ].i - GCA[ v_idx( Anum+i, Bnum+j ) ].i);
304                   tmpi = 0.5*(GCR[ v_idx( Anum+i, Bnum+j ) ].r - GCA[ v_idx( Anum+i, Bnum+j ) ].r);
305                   sum += (double)(l1==0)*S1[q]*(tmpr*co - tmpi*si);
306                   */
307                              /* up-up spin contribution */
308 		  tmpr =-0.5*(GCR[ (Bnum+j-1)*NUM_c+(Anum+i)-1 ].i - GCA[ (Bnum+j-1)*NUM_c+(Anum+i)-1 ].i);
309 		  tmpi = 0.5*(GCR[ (Bnum+j-1)*NUM_c+(Anum+i)-1 ].r - GCA[ (Bnum+j-1)*NUM_c+(Anum+i)-1 ].r);
310 		  sum1 += (double)(l1==0)*S1[q]*(tmpr*co - tmpi*si);
311                              /* down-down spin contribution */
312                   tmpr =-0.5*(GCR[ (Bnum+j-1+NUM_c/2)*NUM_c+(Anum+i+NUM_c/2)-1 ].i - GCA[ (Bnum+j-1+NUM_c/2)*NUM_c+(Anum+i+NUM_c/2)-1 ].i);
313                   tmpi = 0.5*(GCR[ (Bnum+j-1+NUM_c/2)*NUM_c+(Anum+i+NUM_c/2)-1 ].r - GCA[ (Bnum+j-1+NUM_c/2)*NUM_c+(Anum+i+NUM_c/2)-1 ].r);
314                   sum2 += (double)(l1==0)*S1[q]*(tmpr*co - tmpi*si);
315 
316 		  q++;
317 		}
318 
319  	        Dos[iw][0][Anum+i-1] += (float)k_op*sum1/PI;
320                 Dos[iw][1][Anum+i-1] += (float)k_op*sum2/PI;
321 
322 	      }
323 
324 	  } /* LB_AN */
325 	} /* AN */
326 
327         /*******************************************************
328          calculate density of states contributed from the
329          off-diagonal G between the Central and Left regions
330         *******************************************************/
331 
332         iside = 0;
333         direction = -1;
334 
335         for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
336 
337           wanA = WhatSpecies[GA_AN];
338           tnoA = Spe_Total_CNO[wanA];
339           Anum = MP[GA_AN];
340 
341           if (TRAN_region[GA_AN]%10==2){
342 
343 	    GA_AN_e =  TRAN_Original_Id[GA_AN];
344 
345 	    for (i=0; i<tnoA; i++){
346 
347 	      sum1 = 0.0;
348               sum2 = 0.0;
349 
350 	      for (LB_AN_e=0; LB_AN_e<=FNAN_e[iside][GA_AN_e]; LB_AN_e++){
351 
352 		GB_AN_e = natn_e[iside][GA_AN_e][LB_AN_e];
353 		Rn_e = ncn_e[iside][GA_AN_e][LB_AN_e];
354 		wanB = WhatSpecies_e[iside][GB_AN_e];
355 		tnoB = Spe_Total_CNO_e[iside][wanB];
356 		Bnum = MP_e[iside][GB_AN_e];
357 
358 		l1 = atv_ijk_e[iside][Rn_e][1];
359 		l2 = atv_ijk_e[iside][Rn_e][2];
360 		l3 = atv_ijk_e[iside][Rn_e][3];
361 
362   	        kRn = -(k2*(double)l2 + k3*(double)l3);
363 		si = sin(2.0*PI*kRn);
364 		co = cos(2.0*PI*kRn);
365 
366 		if (l1==direction) {
367 		  for (j=0; j<tnoB; j++){
368 
369 		      tmpr =-0.5*(GCL_R[ (Bnum+j-1)*NUM_c+(Anum+i)-1 ].i - GCL_A[ (Bnum+j-1)*NUM_c+(Anum+i)-1 ].i);
370 		      tmpi = 0.5*(GCL_R[ (Bnum+j-1)*NUM_c+(Anum+i)-1 ].r - GCL_A[ (Bnum+j-1)*NUM_c+(Anum+i)-1 ].r);
371 		      sum1 += OLP_e[iside][0][GA_AN_e][LB_AN_e][i][j]*(tmpr*co - tmpi*si);
372 
373                       tmpr =-0.5*(GCL_R[ (Bnum+j-1+NUM_e[0]/2)*NUM_c+(Anum+i+NUM_c/2)-1 ].i - GCL_A[ (Bnum+j-1+NUM_e[0]/2)*NUM_c+(Anum+i+NUM_c/2)-1 ].i);
374                       tmpi = 0.5*(GCL_R[ (Bnum+j-1+NUM_e[0]/2)*NUM_c+(Anum+i+NUM_c/2)-1 ].r - GCL_A[ (Bnum+j-1+NUM_e[0]/2)*NUM_c+(Anum+i+NUM_c/2)-1 ].r);
375                       sum2 += OLP_e[iside][0][GA_AN_e][LB_AN_e][i][j]*(tmpr*co - tmpi*si);
376 
377 		  } /* j<tnoB */
378 		}
379 	      } /* LB_AN */
380 
381 	      Dos[iw][0][Anum+i-1] += (float)k_op*sum1/PI;
382               Dos[iw][1][Anum+i-1] += (float)k_op*sum2/PI;
383 
384 	    } /* i */
385 	  }
386 	}
387 
388         /*******************************************************
389          calculate density of states contributed from the
390          off-diagonal G between the Central and Right regions
391         *******************************************************/
392 
393         iside = 1;
394         direction = 1;
395 
396         for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
397 
398           wanA = WhatSpecies[GA_AN];
399           tnoA = Spe_Total_CNO[wanA];
400           Anum = MP[GA_AN];
401 
402           if (TRAN_region[GA_AN]%10==3){
403 
404 	    GA_AN_e =  TRAN_Original_Id[GA_AN];
405 
406 	    for (i=0; i<tnoA; i++){
407 
408 	      sum1 = 0.0;
409               sum2 = 0.0;
410 
411 	      for (LB_AN_e=0; LB_AN_e<=FNAN_e[iside][GA_AN_e]; LB_AN_e++){
412 
413 		GB_AN_e = natn_e[iside][GA_AN_e][LB_AN_e];
414 		Rn_e = ncn_e[iside][GA_AN_e][LB_AN_e];
415 		wanB = WhatSpecies_e[iside][GB_AN_e];
416 		tnoB = Spe_Total_CNO_e[iside][wanB];
417 		Bnum = MP_e[iside][GB_AN_e];
418 
419 		l1 = atv_ijk_e[iside][Rn_e][1];
420 		l2 = atv_ijk_e[iside][Rn_e][2];
421 		l3 = atv_ijk_e[iside][Rn_e][3];
422 
423   	        kRn = -(k2*(double)l2 + k3*(double)l3);
424 		si = sin(2.0*PI*kRn);
425 		co = cos(2.0*PI*kRn);
426 
427 		if (l1==direction) {
428 		  for (j=0; j<tnoB; j++){
429                       /*
430                       tmpr =-0.5*(GCR_R[ v_idx( Anum+i, Bnum+j) ].i - GCR_A[ v_idx( Anum+i, Bnum+j) ].i);
431                       tmpi = 0.5*(GCR_R[ v_idx( Anum+i, Bnum+j) ].r - GCR_A[ v_idx( Anum+i, Bnum+j) ].r);
432                       */
433 
434 		      tmpr =-0.5*(GCR_R[ (Bnum+j-1)*NUM_c+(Anum+i)-1 ].i - GCR_A[ (Bnum+j-1)*NUM_c+(Anum+i)-1 ].i);
435 		      tmpi = 0.5*(GCR_R[ (Bnum+j-1)*NUM_c+(Anum+i)-1 ].r - GCR_A[ (Bnum+j-1)*NUM_c+(Anum+i)-1 ].r);
436 		      sum1 += OLP_e[iside][0][GA_AN_e][LB_AN_e][i][j]*(tmpr*co - tmpi*si);
437 
438                       tmpr =-0.5*(GCR_R[ (Bnum+j-1+NUM_e[1]/2)*NUM_c+(Anum+i+NUM_c/2)-1 ].i - GCR_A[ (Bnum+j-1+NUM_e[1]/2)*NUM_c+(Anum+i+NUM_c/2)-1 ].i);
439                       tmpi = 0.5*(GCR_R[ (Bnum+j-1+NUM_e[1]/2)*NUM_c+(Anum+i+NUM_c/2)-1 ].r - GCR_A[ (Bnum+j-1+NUM_e[1]/2)*NUM_c+(Anum+i+NUM_c/2)-1 ].r);
440                       sum2 += OLP_e[iside][0][GA_AN_e][LB_AN_e][i][j]*(tmpr*co - tmpi*si);
441 
442 		  } /* j<tnoB */
443 		}
444 	      } /* LB_AN */
445 
446 	      Dos[iw][0][Anum+i-1] += (float)k_op*sum1/PI;
447               Dos[iw][1][Anum+i-1] += (float)k_op*sum2/PI;
448 
449 	    } /* i */
450 	  }
451 	}
452 
453       } /* iw>=0 */
454   }     /* Miw   */
455 
456   /* free arrays */
457 
458   free(GCR_A);
459   free(GCL_A);
460   free(GCR_R);
461   free(GCL_R);
462   free(v1);
463   free(SigmaR);
464   free(SigmaL);
465   free(GRR);
466   free(GRL);
467   free(GCR);
468   free(GCA);
469   free(MP_e[1]);
470   free(MP_e[0]);
471   free(MP);
472 
473   for (i=0;i<numprocs;i++) {
474     free(iwIdx[i]);
475   }
476   free(iwIdx);
477 
478   free(r_energy);
479 }
480 
481 
482 
483 
484 
485 
486 
487 
488 
489 
TRAN_DFT_Dosout_NC(MPI_Comm comm1,int level_stdout,int iter,int SpinP_switch,double ***** nh,double ***** ImNL,double **** CntOLP,int atomnum,int Matomnum,int * WhatSpecies,int * Spe_Total_CNO,int * FNAN,int ** natn,int ** ncn,int * M2G,int * G2ID,int ** atv_ijk,int * List_YOUSO,int ** Spe_Num_CBasis,int SpeciesNum,char * filename,char * filepath,double ***** CDM,double ***** EDM,double Eele0[2],double Eele1[2])490 static double TRAN_DFT_Dosout_NC(
491 		/* input */
492                 MPI_Comm comm1,
493                 int level_stdout,
494 		int iter,
495 		int SpinP_switch,
496 		double *****nh,   /* H */
497 		double *****ImNL, /* not used, s-o coupling */
498 		double ****CntOLP,
499 		int atomnum,
500 		int Matomnum,
501 		int *WhatSpecies,
502 		int *Spe_Total_CNO,
503 		int *FNAN,
504 		int **natn,
505 		int **ncn,
506 		int *M2G,
507 		int *G2ID,
508 		int **atv_ijk,
509 		int *List_YOUSO,
510                 int **Spe_Num_CBasis,
511                 int SpeciesNum,
512                 char *filename,
513                 char *filepath,
514 		/* output */
515 		double *****CDM,  /* not used */
516 		double *****EDM,  /* not used */
517 		double Eele0[2], double Eele1[2]) /* not used */
518 {
519   int numprocs0,numprocs1,myid0,myid1,ID;
520   int myworld1,i2,i3,k_op,ik;
521   int T_knum,kloop0,kloop,Anum;
522   int i,j,spin,MA_AN,GA_AN,wanA,tnoA;
523   int LB_AN,GB_AN,wanB,tnoB,k;
524   int E_knum,S_knum,num_kloop0,parallel_mode;
525   int **op_flag,*T_op_flag,*T_k_ID;
526   double *T_KGrids2,*T_KGrids3;
527   double k2,k3,tmp;
528   double TStime,TEtime;
529   float ***Dos;
530 
531   int *MP;
532   int *order_GA;
533   int *My_NZeros;
534   int *SP_NZeros;
535   int *SP_Atoms;
536   int size_H1;
537   double **H1,*S1;
538   double **H2;
539 
540   int Num_Comm_World1;
541   int *NPROCS_ID1;
542   int *Comm_World1;
543   int *NPROCS_WD1;
544   int *Comm_World_StartID1;
545   MPI_Comm *MPI_CommWD1;
546 
547   MPI_Comm_size(comm1,&numprocs0);
548   MPI_Comm_rank(comm1,&myid0);
549 
550   dtime(&TStime);
551 
552   if (myid0==Host_ID){
553     printf("<TRAN_DFT_Dosout>\n"); fflush(stdout);
554   }
555 
556   /* allocate Dos */
557 
558   TRAN_Set_MP(0, atomnum, WhatSpecies, Spe_Total_CNO, &NUM_c, &i);
559 
560   Dos = (float***)malloc(sizeof(float**)*tran_dos_energydiv);
561   for (ik=0; ik<tran_dos_energydiv; ik++) {
562     Dos[ik] = (float**)malloc(sizeof(float*)*(1+1) );
563     for (spin=0; spin<=1; spin++) {
564       Dos[ik][spin] = (float*)malloc(sizeof(float)*NUM_c);
565       for (i=0; i<NUM_c; i++)  Dos[ik][spin][i] = 0.0;
566     }
567   }
568 
569   /***********************************
570         set up operation flag
571   ************************************/
572 
573   op_flag = (int**)malloc(sizeof(int*)*TRAN_dos_Kspace_grid2);
574   for (i2=0; i2<TRAN_dos_Kspace_grid2; i2++){
575     op_flag[i2] = (int*)malloc(sizeof(int)*TRAN_dos_Kspace_grid3);
576     for (i3=0; i3<TRAN_dos_Kspace_grid3; i3++){
577       op_flag[i2][i3] = 1;
578     }
579   }
580 
581   /***********************************
582        one-dimentionalize for MPI
583   ************************************/
584 
585   T_knum = 0;
586   for (i2=0; i2<TRAN_dos_Kspace_grid2; i2++){
587     for (i3=0; i3<TRAN_dos_Kspace_grid3; i3++){
588       if (0<op_flag[i2][i3]) T_knum++;
589     }
590   }
591 
592   T_KGrids2 = (double*)malloc(sizeof(double)*T_knum);
593   T_KGrids3 = (double*)malloc(sizeof(double)*T_knum);
594   T_op_flag = (int*)malloc(sizeof(int)*T_knum);
595   T_k_ID = (int*)malloc(sizeof(int)*T_knum);
596 
597   T_knum = 0;
598 
599   for (i2=0; i2<TRAN_dos_Kspace_grid2; i2++){
600 
601     k2 = -0.5 + (2.0*(double)i2+1.0)/(2.0*(double)TRAN_dos_Kspace_grid2) + Shift_K_Point;
602 
603     for (i3=0; i3<TRAN_dos_Kspace_grid3; i3++){
604 
605       k3 = -0.5 + (2.0*(double)i3+1.0)/(2.0*(double)TRAN_dos_Kspace_grid3) - Shift_K_Point;
606 
607       if (0<op_flag[i2][i3]){
608 
609         T_KGrids2[T_knum] = k2;
610         T_KGrids3[T_knum] = k3;
611         T_op_flag[T_knum] = op_flag[i2][i3];
612 
613         T_knum++;
614       }
615     }
616   }
617 
618   /***************************************************
619    allocate calculations of k-points into processors
620   ***************************************************/
621 
622   if (numprocs0<T_knum){
623 
624     /* set parallel_mode */
625     parallel_mode = 0;
626 
627     /* allocation of kloop to ID */
628 
629     for (ID=0; ID<numprocs0; ID++){
630 
631       tmp = (double)T_knum/(double)numprocs0;
632       S_knum = (int)((double)ID*(tmp+1.0e-12));
633       E_knum = (int)((double)(ID+1)*(tmp+1.0e-12)) - 1;
634       if (ID==(numprocs0-1)) E_knum = T_knum - 1;
635       if (E_knum<0)          E_knum = 0;
636 
637       for (k=S_knum; k<=E_knum; k++){
638         /* ID in the first level world */
639         T_k_ID[k] = ID;
640       }
641     }
642 
643     /* find own informations */
644 
645     tmp = (double)T_knum/(double)numprocs0;
646     S_knum = (int)((double)myid0*(tmp+1.0e-12));
647     E_knum = (int)((double)(myid0+1)*(tmp+1.0e-12)) - 1;
648     if (myid0==(numprocs0-1)) E_knum = T_knum - 1;
649     if (E_knum<0)             E_knum = 0;
650 
651     num_kloop0 = E_knum - S_knum + 1;
652 
653   }
654 
655   else {
656 
657     /* set parallel_mode */
658     parallel_mode = 1;
659     num_kloop0 = 1;
660 
661     Num_Comm_World1 = T_knum;
662 
663     NPROCS_ID1 = (int*)malloc(sizeof(int)*numprocs0);
664     Comm_World1 = (int*)malloc(sizeof(int)*numprocs0);
665     NPROCS_WD1 = (int*)malloc(sizeof(int)*Num_Comm_World1);
666     Comm_World_StartID1 = (int*)malloc(sizeof(int)*Num_Comm_World1);
667     MPI_CommWD1 = (MPI_Comm*)malloc(sizeof(MPI_Comm)*Num_Comm_World1);
668 
669     Make_Comm_Worlds(comm1, myid0, numprocs0, Num_Comm_World1, &myworld1, MPI_CommWD1,
670 		     NPROCS_ID1, Comm_World1, NPROCS_WD1, Comm_World_StartID1);
671 
672     MPI_Comm_size(MPI_CommWD1[myworld1],&numprocs1);
673     MPI_Comm_rank(MPI_CommWD1[myworld1],&myid1);
674 
675     S_knum = myworld1;
676 
677     /* allocate k-points into processors */
678 
679     for (k=0; k<T_knum; k++){
680       /* ID in the first level world */
681       T_k_ID[k] = Comm_World_StartID1[k];
682     }
683 
684   }
685 
686   /*************************************************************
687    one-dimensitonalize H and S and store them in a compact form
688   *************************************************************/
689 
690   MP = (int*)malloc(sizeof(int)*(atomnum+1));
691   order_GA = (int*)malloc(sizeof(int)*(atomnum+1));
692 
693   My_NZeros = (int*)malloc(sizeof(int)*numprocs0);
694   SP_NZeros = (int*)malloc(sizeof(int)*numprocs0);
695   SP_Atoms = (int*)malloc(sizeof(int)*numprocs0);
696 
697   size_H1 = Get_OneD_HS_Col(0, nh[0], &tmp, MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
698 
699   H1 = (double**)malloc(sizeof(double*)*(SpinP_switch+1));
700   for (spin=0; spin<(SpinP_switch+1); spin++){
701     H1[spin] = (double*)malloc(sizeof(double)*size_H1);
702   }
703 
704   H2 = (double**)malloc(sizeof(double*)*(2+1));
705   for (k=0; k<(2+1); k++){
706     H2[k] = (double*)malloc(sizeof(double)*size_H1);
707   }
708 
709   S1 = (double*)malloc(sizeof(double)*size_H1);
710 
711   for (spin=0; spin<(SpinP_switch+1); spin++){
712     size_H1 = Get_OneD_HS_Col(1, nh[spin], H1[spin], MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
713   }
714 
715   for (k=0; k<(2+1); k++){
716     size_H1 = Get_OneD_HS_Col(1, ImNL[k], H2[k], MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
717   }
718 
719   size_H1 = Get_OneD_HS_Col(1, CntOLP, S1, MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
720 
721   /***********************************************************
722    start "kloop0"
723   ***********************************************************/
724 
725   for (kloop0=0; kloop0<num_kloop0; kloop0++){
726 
727     kloop = S_knum + kloop0;
728 
729     k2 = T_KGrids2[kloop];
730     k3 = T_KGrids3[kloop];
731     k_op = T_op_flag[kloop];
732 
733     if (parallel_mode){
734 
735         TRAN_DFT_Kdependent_NC(MPI_CommWD1[myworld1],
736   			    parallel_mode, numprocs1, myid1,
737                             level_stdout, iter, SpinP_switch, k2, k3, k_op, order_GA,
738                             H1, H2, S1,
739                             nh, ImNL, CntOLP,
740 	  	  	    atomnum, Matomnum, WhatSpecies, Spe_Total_CNO, FNAN,
741 			    natn, ncn, M2G, G2ID, atv_ijk, List_YOUSO, Dos, EDM, Eele0, Eele1);
742     }
743 
744     else{
745 
746         TRAN_DFT_Kdependent_NC(comm1,
747  			    parallel_mode, 1, 0,
748                             level_stdout, iter, SpinP_switch, k2, k3, k_op, order_GA,
749                             H1, H2, S1,
750                             nh, ImNL, CntOLP,
751 	  	  	    atomnum, Matomnum, WhatSpecies, Spe_Total_CNO, FNAN,
752 			    natn, ncn, M2G, G2ID, atv_ijk, List_YOUSO, Dos, EDM, Eele0, Eele1);
753     }
754   }
755 
756   /*******************************************************
757           summ up Dos by MPI and send it Host_ID
758   *******************************************************/
759 
760   MPI_Barrier(comm1);
761 
762   /* in TRAN_DFT_Kdependent(), NUM_c is doubled. So, recalculate it */
763   TRAN_Set_MP(0, atomnum, WhatSpecies, Spe_Total_CNO, &NUM_c, &i);
764 
765   {
766     float *Dos0;
767 
768     Dos0 = (float*)malloc(sizeof(float)*NUM_c);
769 
770     tmp = 1.0/(double)(TRAN_dos_Kspace_grid2*TRAN_dos_Kspace_grid3);
771 
772     for (ik=0; ik<tran_dos_energydiv; ik++) {
773       for (spin=0; spin<=1; spin++) {
774 
775 	MPI_Reduce(&Dos[ik][spin][0], &Dos0[0], NUM_c, MPI_FLOAT, MPI_SUM, Host_ID, comm1);
776         MPI_Barrier(comm1);
777         for (i=0; i<NUM_c; i++)  Dos[ik][spin][i] = Dos0[i]*tmp;
778       }
779     }
780 
781     free(Dos0);
782   }
783 
784   /**********************************************************
785                      save Dos to a file
786   **********************************************************/
787 
788   if (myid0==Host_ID){
789 
790     FILE *fp_eig, *fp_ev;
791     char file_eig[YOUSO10],file_ev[YOUSO10];
792     int l,MaxL;
793     double *r_energy,de;
794     int i_vec[10];
795 
796     r_energy = (double*)malloc(sizeof(double)*tran_dos_energydiv);
797 
798     de = (tran_dos_energyrange[1]-tran_dos_energyrange[0])/(double)tran_dos_energydiv;
799     for (i=0; i<tran_dos_energydiv; i++) {
800       r_energy[i] = tran_dos_energyrange[0] + de*(double)i + ChemP_e[0];
801     }
802 
803     /* write *.Dos.val */
804 
805     sprintf(file_eig,"%s%s.Dos.val",filepath,filename);
806 
807     if ( (fp_eig=fopen(file_eig,"w"))==NULL ) {
808       printf("can not open a file %s\n",file_eig);
809     }
810     else {
811 
812       printf("  write eigenvalues\n");
813 
814       fprintf(fp_eig,"mode        6\n");
815       fprintf(fp_eig,"NonCol      0\n");
816       fprintf(fp_eig,"N           %d\n",NUM_c);
817       fprintf(fp_eig,"Nspin       %d\n",1); /* SpinP_switch has been changed to 1 */
818       fprintf(fp_eig,"Erange      %lf %lf\n",tran_dos_energyrange[0],tran_dos_energyrange[1]);
819       fprintf(fp_eig,"Kgrid       %d %d %d\n",1,1,1);
820       fprintf(fp_eig,"atomnum     %d\n",atomnum);
821       fprintf(fp_eig,"<WhatSpecies\n");
822       for (i=1; i<=atomnum; i++) {
823         fprintf(fp_eig,"%d ",WhatSpecies[i]);
824       }
825       fprintf(fp_eig,"\nWhatSpecies>\n");
826       fprintf(fp_eig,"SpeciesNum     %d\n",SpeciesNum);
827       fprintf(fp_eig,"<Spe_Total_CNO\n");
828       for (i=0;i<SpeciesNum;i++) {
829         fprintf(fp_eig,"%d ",Spe_Total_CNO[i]);
830       }
831       fprintf(fp_eig,"\nSpe_Total_CNO>\n");
832       MaxL=4;
833       fprintf(fp_eig,"MaxL           %d\n",4);
834       fprintf(fp_eig,"<Spe_Num_CBasis\n");
835       for (i=0;i<SpeciesNum;i++) {
836         for (l=0;l<=MaxL;l++) {
837 	  fprintf(fp_eig,"%d ",Spe_Num_CBasis[i][l]);
838         }
839         fprintf(fp_eig,"\n");
840       }
841       fprintf(fp_eig,"Spe_Num_CBasis>\n");
842       fprintf(fp_eig,"ChemP       %lf\n",ChemP_e[0]);
843 
844       fprintf(fp_eig,"irange      %d %d\n",0,tran_dos_energydiv-1);
845       fprintf(fp_eig,"<Eigenvalues\n");
846       for (spin=0; spin<=1; spin++) {
847         fprintf(fp_eig,"%d %d %d ",0,0,0);
848         for (ik=0; ik<tran_dos_energydiv; ik++) {
849           fprintf(fp_eig,"%lf ",r_energy[ik]);
850 	}
851         fprintf(fp_eig,"\n");
852       }
853       fprintf(fp_eig,"Eigenvalues>\n");
854 
855       fclose(fp_eig);
856     }
857 
858     /* write *.Dos.vec */
859 
860     printf("  write eigenvectors\n");
861 
862     sprintf(file_ev,"%s%s.Dos.vec",filepath,filename);
863 
864     if ( (fp_ev=fopen(file_ev,"w"))==NULL ) {
865       printf("can not open a file %s\n",file_ev);
866     }
867     else {
868 
869       for (spin=0; spin<=1; spin++) {
870         for (ik=0; ik<tran_dos_energydiv; ik++) {
871 
872           i_vec[0]=i_vec[1]=i_vec[2]=0;
873           if (myid0==Host_ID) fwrite(i_vec,sizeof(int),3,fp_ev);
874 
875           for (GA_AN=1; GA_AN<=atomnum; GA_AN++) {
876 	    wanA = WhatSpecies[GA_AN];
877 	    tnoA = Spe_Total_CNO[wanA];
878             Anum = MP[GA_AN];
879             fwrite(&Dos[ik][spin][Anum-1],sizeof(float),tnoA,fp_ev);
880 	  }
881 	}
882       }
883 
884       fclose(fp_ev);
885     }
886 
887     /* free arrays */
888 
889     free(r_energy);
890   }
891 
892   /* free arrays */
893 
894   for (ik=0; ik<tran_dos_energydiv; ik++) {
895     for (spin=0; spin<=1; spin++) {
896       free(Dos[ik][spin]);
897     }
898     free(Dos[ik]);
899   }
900   free(Dos);
901 
902   for (i2=0; i2<TRAN_dos_Kspace_grid2; i2++){
903     free(op_flag[i2]);
904   }
905   free(op_flag);
906 
907   free(T_KGrids2);
908   free(T_KGrids3);
909   free(T_op_flag);
910   free(T_k_ID);
911 
912   if (T_knum<=numprocs0){
913 
914     if (Num_Comm_World1<=numprocs0){
915       MPI_Comm_free(&MPI_CommWD1[myworld1]);
916     }
917 
918     free(NPROCS_ID1);
919     free(Comm_World1);
920     free(NPROCS_WD1);
921     free(Comm_World_StartID1);
922     free(MPI_CommWD1);
923   }
924 
925   free(MP);
926   free(order_GA);
927 
928   free(My_NZeros);
929   free(SP_NZeros);
930   free(SP_Atoms);
931 
932   for (spin=0; spin<(SpinP_switch+1); spin++){
933     free(H1[spin]);
934   }
935   free(H1);
936 
937   for (spin=0; spin<(2+1); spin++){
938     free(H2[spin]);
939   }
940   free(H2);
941 
942   free(S1);
943 
944   /* for elapsed time */
945   dtime(&TEtime);
946 
947   /*
948   if (myid==Host_ID){
949     printf("TRAN_DFT_Dosout time=%12.7f\n",TEtime - TStime);
950   }
951   */
952 
953   return TEtime - TStime;
954 }
955 
956 
957 
958 
TRAN_DFT_Kdependent_Col(MPI_Comm comm1,int parallel_mode,int numprocs,int myid,int level_stdout,int iter,int SpinP_switch,double k2,double k3,int k_op,int * order_GA,double ** H1,double * S1,double ***** nh,double ***** ImNL,double **** CntOLP,int atomnum,int Matomnum,int * WhatSpecies,int * Spe_Total_CNO,int * FNAN,int ** natn,int ** ncn,int * M2G,int * G2ID,int ** atv_ijk,int * List_YOUSO,float *** Dos,double ***** EDM,double Eele0[2],double Eele1[2])959 static void TRAN_DFT_Kdependent_Col(
960 			  /* input */
961 			  MPI_Comm comm1,
962                           int parallel_mode,
963                           int numprocs,
964                           int myid,
965 			  int level_stdout,
966 			  int iter,
967 			  int SpinP_switch,
968                           double k2,
969                           double k3,
970                           int k_op,
971                           int *order_GA,
972                           double **H1,
973                           double *S1,
974 			  double *****nh,  /* H */
975 			  double *****ImNL, /* not used, s-o coupling */
976 			  double ****CntOLP,
977 			  int atomnum,
978 			  int Matomnum,
979 			  int *WhatSpecies,
980 			  int *Spe_Total_CNO,
981 			  int *FNAN,
982 			  int **natn,
983 			  int **ncn,
984 			  int *M2G,
985 			  int *G2ID,
986 			  int **atv_ijk,
987 			  int *List_YOUSO,
988 			  /* output */
989 			  float ***Dos,    /* output, DOS */
990 			  double *****EDM,  /* not used */
991 			  double Eele0[2], double Eele1[2]) /* not used */
992 
993 #define GC_ref(i,j) GC[ NUM_c*((j)-1) + (i)-1 ]
994 #define v_idx(i,j)   ( ((j)-1)*NUM_c + (i)-1 )
995 
996 {
997   int i,j,k,q,AN,iside;
998   int *MP,*MP_e[2];
999   int iw,iw_method;
1000   dcomplex w, w_weight;
1001   dcomplex *GCR,*GCA;
1002   dcomplex *GRL,*GRR,*SigmaL, *SigmaR;
1003   dcomplex *GCL_R,*GCR_R,*GCL_A,*GCR_A;
1004   dcomplex *v1;
1005   double dum,sum,tmpr,tmpi;
1006   double co,si,kRn;
1007   double TStime,TEtime;
1008   int MA_AN, GA_AN, wanA, tnoA, Anum;
1009   int LB_AN, GB_AN, wanB, tnoB, Bnum;
1010   int l1,l2,l3,Rn,direction;
1011   int LB_AN_e,GB_AN_e,GA_AN_e,Rn_e;
1012 
1013   int ID;
1014   int **iwIdx, Miwmax, Miw,iw0;
1015   double *r_energy,de;
1016 
1017   /* parallel setup */
1018 
1019   Miwmax = (tran_dos_energydiv)/numprocs+1;
1020 
1021   iwIdx=(int**)malloc(sizeof(int*)*numprocs);
1022   for (i=0;i<numprocs;i++) {
1023     iwIdx[i]=(int*)malloc(sizeof(int)*Miwmax);
1024   }
1025 
1026   TRAN_Distribute_Node_Idx(0, tran_dos_energydiv-1, numprocs, Miwmax,
1027                            iwIdx); /* output */
1028 
1029   /* set up energies where DOS is calculated */
1030   r_energy = (double*)malloc(sizeof(double)*tran_dos_energydiv);
1031 
1032   de = (tran_dos_energyrange[1]-tran_dos_energyrange[0])/(double)tran_dos_energydiv;
1033   for (i=0; i<tran_dos_energydiv; i++) {
1034     r_energy[i] = tran_dos_energyrange[0] + de*(double)i + ChemP_e[0];
1035   }
1036 
1037   /* setup MP */
1038   TRAN_Set_MP(0, atomnum, WhatSpecies, Spe_Total_CNO, &NUM_c, &i);
1039   MP = (int*)malloc(sizeof(int)*(NUM_c+1));
1040   TRAN_Set_MP(1, atomnum, WhatSpecies, Spe_Total_CNO, &NUM_c, MP);
1041 
1042   MP_e[0] = (int*)malloc(sizeof(int)*(NUM_e[0]+1));
1043   TRAN_Set_MP( 1,  atomnum_e[0], WhatSpecies_e[0], Spe_Total_CNO_e[0], &i, MP_e[0]);
1044 
1045   MP_e[1] = (int*)malloc(sizeof(int)*(NUM_e[1]+1));
1046   TRAN_Set_MP( 1,  atomnum_e[1], WhatSpecies_e[1], Spe_Total_CNO_e[1], &i, MP_e[1]);
1047 
1048   /* initialize */
1049   TRAN_Set_Value_double(SCC,NUM_c*NUM_c,    0.0,0.0);
1050   TRAN_Set_Value_double(SCL,NUM_c*NUM_e[0], 0.0,0.0);
1051   TRAN_Set_Value_double(SCR,NUM_c*NUM_e[1], 0.0,0.0);
1052   for (k=0; k<=SpinP_switch; k++) {
1053     TRAN_Set_Value_double(HCC[k],NUM_c*NUM_c,    0.0,0.0);
1054     TRAN_Set_Value_double(HCL[k],NUM_c*NUM_e[0], 0.0,0.0);
1055     TRAN_Set_Value_double(HCR[k],NUM_c*NUM_e[1], 0.0,0.0);
1056   }
1057 
1058   /* set Hamiltonian and overlap matrices of left and right leads */
1059 
1060   TRAN_Set_SurfOverlap(comm1,"left", k2, k3);
1061   TRAN_Set_SurfOverlap(comm1,"right",k2, k3);
1062 
1063   /* set CC, CL and CR */
1064 
1065   TRAN_Set_CentOverlap(   comm1,
1066                           3,
1067                           SpinP_switch,
1068                           k2,
1069                           k3,
1070                           order_GA,
1071                           H1,
1072                           S1,
1073                           nh, /* input */
1074                           CntOLP, /* input */
1075                           atomnum,
1076 			  Matomnum,
1077 			  M2G,
1078 			  G2ID,
1079                           WhatSpecies,
1080                           Spe_Total_CNO,
1081                           FNAN,
1082                           natn,
1083                           ncn,
1084                           atv_ijk);
1085 
1086   GCR    = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
1087   GCA    = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
1088   GRL    = (dcomplex*)malloc(sizeof(dcomplex)*NUM_e[0]* NUM_e[0]);
1089   GRR    = (dcomplex*)malloc(sizeof(dcomplex)*NUM_e[1]* NUM_e[1]);
1090   SigmaL = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
1091   SigmaR = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
1092   v1     = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
1093   GCL_R  = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c*NUM_e[0]);
1094   GCR_R  = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c*NUM_e[1]);
1095   GCL_A  = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c*NUM_e[0]);
1096   GCR_A  = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c*NUM_e[1]);
1097 
1098   /* parallel global iw 0: tran_dos_energydiv-1 */
1099   /* parallel local  Miw 0:Miwmax-1             */
1100   /* parllel variable iw=iwIdx[myid][Miw]       */
1101 
1102   for (Miw=0; Miw<Miwmax; Miw++) {
1103 
1104     iw = iwIdx[myid][Miw];
1105 
1106     for (k=0; k<=SpinP_switch; k++) {
1107 
1108       if (iw>=0) {
1109 
1110         /**************************************
1111                     w = w.r + i w.i
1112         **************************************/
1113 
1114         w.r = r_energy[iw];
1115         w.i = tran_dos_energyrange[2];
1116 
1117         iside = 0;
1118 
1119         TRAN_Calc_SurfGreen_direct(w, NUM_e[iside], H00_e[iside][k], H01_e[iside][k],
1120 				   S00_e[iside], S01_e[iside], tran_surfgreen_iteration_max,
1121                                    tran_surfgreen_eps, GRL);
1122 
1123         TRAN_Calc_SelfEnergy(w, NUM_e[iside], GRL, NUM_c, HCL[k], SCL, SigmaL);
1124 
1125         iside = 1;
1126 
1127         TRAN_Calc_SurfGreen_direct(w, NUM_e[iside], H00_e[iside][k],H01_e[iside][k],
1128 				   S00_e[iside], S01_e[iside], tran_surfgreen_iteration_max,
1129                                    tran_surfgreen_eps, GRR);
1130 
1131         TRAN_Calc_SelfEnergy(w, NUM_e[iside], GRR, NUM_c, HCR[k], SCR, SigmaR);
1132 
1133         TRAN_Calc_CentGreen(w, NUM_c, SigmaL,SigmaR, HCC[k], SCC, GCR);
1134 
1135         /* GCL_R and GCR_R */
1136 
1137         iside = 0;
1138         TRAN_Calc_Hopping_G(w, NUM_e[iside], NUM_c, GRL, GCR, HCL[k], SCL, GCL_R);
1139 
1140         iside = 1;
1141         TRAN_Calc_Hopping_G(w, NUM_e[iside], NUM_c, GRR, GCR, HCR[k], SCR, GCR_R);
1142 
1143         /**************************************
1144                     w = w.r - i w.i
1145         **************************************/
1146 
1147         if (TRAN_dos_Kspace_grid2!=1 || TRAN_dos_Kspace_grid3!=1){
1148 
1149           w.r = r_energy[iw];
1150           w.i =-tran_dos_energyrange[2];
1151 
1152 	  iside=0;
1153 
1154 	  TRAN_Calc_SurfGreen_direct(w, NUM_e[iside], H00_e[iside][k],H01_e[iside][k],
1155 				     S00_e[iside], S01_e[iside], tran_surfgreen_iteration_max,
1156 				     tran_surfgreen_eps, GRL);
1157 
1158 	  TRAN_Calc_SelfEnergy(w, NUM_e[iside], GRL, NUM_c, HCL[k], SCL, SigmaL);
1159 
1160 	  iside=1;
1161 
1162 	  TRAN_Calc_SurfGreen_direct(w, NUM_e[iside], H00_e[iside][k],H01_e[iside][k],
1163 				     S00_e[iside], S01_e[iside], tran_surfgreen_iteration_max,
1164 				     tran_surfgreen_eps, GRR);
1165 
1166 	  TRAN_Calc_SelfEnergy(w, NUM_e[iside], GRR, NUM_c, HCR[k], SCR, SigmaR);
1167 
1168 	  TRAN_Calc_CentGreen(w, NUM_c, SigmaL,SigmaR, HCC[k], SCC, GCA);
1169 
1170           /* GCL_R and GCR_R */
1171 
1172           iside = 0;
1173           TRAN_Calc_Hopping_G(w, NUM_e[iside], NUM_c, GRL, GCA, HCL[k], SCL, GCL_A);
1174 
1175           iside = 1;
1176           TRAN_Calc_Hopping_G(w, NUM_e[iside], NUM_c, GRR, GCA, HCR[k], SCR, GCR_A);
1177 	}
1178 
1179         /***********************************************
1180           calculate density of states from the center G
1181         ***********************************************/
1182 
1183         q = 0;
1184 
1185 	for (AN=1; AN<=atomnum; AN++) {
1186 
1187           GA_AN = order_GA[AN];
1188 	  wanA = WhatSpecies[GA_AN];
1189 	  tnoA = Spe_Total_CNO[wanA];
1190 	  Anum = MP[GA_AN];
1191 
1192 	  for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
1193 
1194 	    GB_AN = natn[GA_AN][LB_AN];
1195 	    Rn = ncn[GA_AN][LB_AN];
1196 	    wanB = WhatSpecies[GB_AN];
1197 	    tnoB = Spe_Total_CNO[wanB];
1198 	    Bnum = MP[GB_AN];
1199 
1200 	    l1 = atv_ijk[Rn][1];
1201 	    l2 = atv_ijk[Rn][2];
1202 	    l3 = atv_ijk[Rn][3];
1203 
1204 	    kRn = -(k2*(double)l2 + k3*(double)l3);
1205 	    si = sin(2.0*PI*kRn);
1206 	    co = cos(2.0*PI*kRn);
1207 
1208 	    /* note that SCC includes information on the phase factor of translational cells */
1209 
1210 	    if (TRAN_dos_Kspace_grid2==1 && TRAN_dos_Kspace_grid3==1){
1211 
1212 	      for (i=0; i<tnoA; i++) {
1213 
1214   	        sum = 0.0;
1215 		for (j=0; j<tnoB; j++) {
1216 
1217 		  sum += (double)(l1==0)*S1[q]*(-GCR[ v_idx( Anum+i, Bnum+j) ].i);
1218 
1219 		  q++;
1220 		}
1221 
1222  	        Dos[iw][k][Anum+i-1] += (float)k_op*sum/PI;
1223 
1224 	      }
1225 	    }
1226 
1227 	    else {
1228 
1229 	      for (i=0; i<tnoA; i++) {
1230 
1231   	        sum = 0.0;
1232 		for (j=0; j<tnoB; j++) {
1233 
1234 		  tmpr =-0.5*(GCR[ v_idx( Anum+i, Bnum+j ) ].i - GCA[ v_idx( Anum+i, Bnum+j ) ].i);
1235 		  tmpi = 0.5*(GCR[ v_idx( Anum+i, Bnum+j ) ].r - GCA[ v_idx( Anum+i, Bnum+j ) ].r);
1236 		  sum += (double)(l1==0)*S1[q]*(tmpr*co - tmpi*si);
1237 		  q++;
1238 		}
1239 
1240  	        Dos[iw][k][Anum+i-1] += (float)k_op*sum/PI;
1241 
1242 	      }
1243 
1244 	    }
1245 	  }
1246 
1247 
1248 	} /* AN */
1249 
1250         /*******************************************************
1251          calculate density of states contributed from the
1252          off-diagonal G between the Central and Left regions
1253         *******************************************************/
1254 
1255         iside = 0;
1256         direction = -1;
1257 
1258         for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
1259 
1260           wanA = WhatSpecies[GA_AN];
1261           tnoA = Spe_Total_CNO[wanA];
1262           Anum = MP[GA_AN];
1263 
1264           if (TRAN_region[GA_AN]%10==2){
1265 
1266 	    GA_AN_e =  TRAN_Original_Id[GA_AN];
1267 
1268 	    for (i=0; i<tnoA; i++){
1269 
1270 	      sum = 0.0;
1271 
1272 	      for (LB_AN_e=0; LB_AN_e<=FNAN_e[iside][GA_AN_e]; LB_AN_e++){
1273 
1274 		GB_AN_e = natn_e[iside][GA_AN_e][LB_AN_e];
1275 		Rn_e = ncn_e[iside][GA_AN_e][LB_AN_e];
1276 		wanB = WhatSpecies_e[iside][GB_AN_e];
1277 		tnoB = Spe_Total_CNO_e[iside][wanB];
1278 		Bnum = MP_e[iside][GB_AN_e];
1279 
1280 		l1 = atv_ijk_e[iside][Rn_e][1];
1281 		l2 = atv_ijk_e[iside][Rn_e][2];
1282 		l3 = atv_ijk_e[iside][Rn_e][3];
1283 
1284   	        kRn = -(k2*(double)l2 + k3*(double)l3);
1285 		si = sin(2.0*PI*kRn);
1286 		co = cos(2.0*PI*kRn);
1287 
1288 		if (l1==direction) {
1289 		  for (j=0; j<tnoB; j++){
1290 
1291 		    if (TRAN_dos_Kspace_grid2==1 && TRAN_dos_Kspace_grid3==1){
1292 		      sum += OLP_e[iside][0][GA_AN_e][LB_AN_e][i][j]*(-GCL_R[ v_idx( Anum+i, Bnum+j) ].i);
1293 		    }
1294 		    else {
1295 		      tmpr =-0.5*(GCL_R[ v_idx( Anum+i, Bnum+j) ].i - GCL_A[ v_idx( Anum+i, Bnum+j) ].i);
1296 		      tmpi = 0.5*(GCL_R[ v_idx( Anum+i, Bnum+j) ].r - GCL_A[ v_idx( Anum+i, Bnum+j) ].r);
1297 		      sum += OLP_e[iside][0][GA_AN_e][LB_AN_e][i][j]*(tmpr*co - tmpi*si);
1298 		    }
1299 		  }
1300 		}
1301 	      } /* LB_AN */
1302 
1303 	      Dos[iw][k][Anum+i-1] += (float)k_op*sum/PI;
1304 
1305 	    } /* i */
1306 	  }
1307 	}
1308 
1309         /*******************************************************
1310          calculate density of states contributed from the
1311          off-diagonal G between the Central and Right regions
1312         *******************************************************/
1313 
1314         iside = 1;
1315         direction = 1;
1316 
1317         for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
1318 
1319           wanA = WhatSpecies[GA_AN];
1320           tnoA = Spe_Total_CNO[wanA];
1321           Anum = MP[GA_AN];
1322 
1323           if (TRAN_region[GA_AN]%10==3){
1324 
1325 	    GA_AN_e =  TRAN_Original_Id[GA_AN];
1326 
1327 	    for (i=0; i<tnoA; i++){
1328 
1329 	      sum = 0.0;
1330 
1331 	      for (LB_AN_e=0; LB_AN_e<=FNAN_e[iside][GA_AN_e]; LB_AN_e++){
1332 
1333 		GB_AN_e = natn_e[iside][GA_AN_e][LB_AN_e];
1334 		Rn_e = ncn_e[iside][GA_AN_e][LB_AN_e];
1335 		wanB = WhatSpecies_e[iside][GB_AN_e];
1336 		tnoB = Spe_Total_CNO_e[iside][wanB];
1337 		Bnum = MP_e[iside][GB_AN_e];
1338 
1339 		l1 = atv_ijk_e[iside][Rn_e][1];
1340 		l2 = atv_ijk_e[iside][Rn_e][2];
1341 		l3 = atv_ijk_e[iside][Rn_e][3];
1342 
1343   	        kRn = -(k2*(double)l2 + k3*(double)l3);
1344 		si = sin(2.0*PI*kRn);
1345 		co = cos(2.0*PI*kRn);
1346 
1347 		if (l1==direction) {
1348 		  for (j=0; j<tnoB; j++){
1349 
1350 		    if (TRAN_dos_Kspace_grid2==1 && TRAN_dos_Kspace_grid3==1){
1351 		      sum += OLP_e[iside][0][GA_AN_e][LB_AN_e][i][j]*(-GCR_R[ v_idx( Anum+i, Bnum+j) ].i);
1352 		    }
1353 		    else {
1354 		      tmpr =-0.5*(GCR_R[ v_idx( Anum+i, Bnum+j) ].i - GCR_A[ v_idx( Anum+i, Bnum+j) ].i);
1355 		      tmpi = 0.5*(GCR_R[ v_idx( Anum+i, Bnum+j) ].r - GCR_A[ v_idx( Anum+i, Bnum+j) ].r);
1356 		      sum += OLP_e[iside][0][GA_AN_e][LB_AN_e][i][j]*(tmpr*co - tmpi*si);
1357 		    }
1358 		  }
1359 		}
1360 	      } /* LB_AN */
1361 
1362 	      Dos[iw][k][Anum+i-1] += (float)k_op*sum/PI;
1363 
1364 	    } /* i */
1365 	  }
1366 	}
1367 
1368       } /* iw>=0 */
1369     }   /* for k */
1370   }     /* Miw   */
1371 
1372   /* free arrays */
1373 
1374   free(GCR_A);
1375   free(GCL_A);
1376   free(GCR_R);
1377   free(GCL_R);
1378   free(v1);
1379   free(SigmaR);
1380   free(SigmaL);
1381   free(GRR);
1382   free(GRL);
1383   free(GCR);
1384   free(GCA);
1385   free(MP_e[1]);
1386   free(MP_e[0]);
1387   free(MP);
1388 
1389   for (i=0;i<numprocs;i++) {
1390     free(iwIdx[i]);
1391   }
1392   free(iwIdx);
1393 
1394   free(r_energy);
1395 }
1396 
1397 
1398 
1399 
1400 
1401 
1402 
1403 
1404 
1405 
TRAN_DFT_Dosout_Col(MPI_Comm comm1,int level_stdout,int iter,int SpinP_switch,double ***** nh,double ***** ImNL,double **** CntOLP,int atomnum,int Matomnum,int * WhatSpecies,int * Spe_Total_CNO,int * FNAN,int ** natn,int ** ncn,int * M2G,int * G2ID,int ** atv_ijk,int * List_YOUSO,int ** Spe_Num_CBasis,int SpeciesNum,char * filename,char * filepath,double ***** CDM,double ***** EDM,double Eele0[2],double Eele1[2])1406 static double TRAN_DFT_Dosout_Col(
1407 		/* input */
1408                 MPI_Comm comm1,
1409                 int level_stdout,
1410 		int iter,
1411 		int SpinP_switch,
1412 		double *****nh,   /* H */
1413 		double *****ImNL, /* not used, s-o coupling */
1414 		double ****CntOLP,
1415 		int atomnum,
1416 		int Matomnum,
1417 		int *WhatSpecies,
1418 		int *Spe_Total_CNO,
1419 		int *FNAN,
1420 		int **natn,
1421 		int **ncn,
1422 		int *M2G,
1423 		int *G2ID,
1424 		int **atv_ijk,
1425 		int *List_YOUSO,
1426                 int **Spe_Num_CBasis,
1427                 int SpeciesNum,
1428                 char *filename,
1429                 char *filepath,
1430 		/* output */
1431 		double *****CDM,  /* not used */
1432 		double *****EDM,  /* not used */
1433 		double Eele0[2], double Eele1[2]) /* not used */
1434 {
1435   int numprocs0,numprocs1,myid0,myid1,ID;
1436   int myworld1,i2,i3,k_op,ik;
1437   int T_knum,kloop0,kloop,Anum;
1438   int i,j,spin,MA_AN,GA_AN,wanA,tnoA;
1439   int LB_AN,GB_AN,wanB,tnoB,k;
1440   int E_knum,S_knum,num_kloop0,parallel_mode;
1441   int **op_flag,*T_op_flag,*T_k_ID;
1442   double *T_KGrids2,*T_KGrids3;
1443   double k2,k3,tmp;
1444   double TStime,TEtime;
1445   float ***Dos;
1446 
1447   int *MP;
1448   int *order_GA;
1449   int *My_NZeros;
1450   int *SP_NZeros;
1451   int *SP_Atoms;
1452   int size_H1;
1453   double **H1,*S1;
1454 
1455   int Num_Comm_World1;
1456   int *NPROCS_ID1;
1457   int *Comm_World1;
1458   int *NPROCS_WD1;
1459   int *Comm_World_StartID1;
1460   MPI_Comm *MPI_CommWD1;
1461 
1462   MPI_Comm_size(comm1,&numprocs0);
1463   MPI_Comm_rank(comm1,&myid0);
1464 
1465   dtime(&TStime);
1466 
1467   if (myid0==Host_ID){
1468     printf("<TRAN_DFT_Dosout>\n"); fflush(stdout);
1469   }
1470 
1471   /* allocate Dos */
1472 
1473   TRAN_Set_MP(0, atomnum, WhatSpecies, Spe_Total_CNO, &NUM_c, &i);
1474 
1475   Dos = (float***)malloc(sizeof(float**)*tran_dos_energydiv);
1476   for (ik=0; ik<tran_dos_energydiv; ik++) {
1477     Dos[ik] = (float**)malloc(sizeof(float*)*(SpinP_switch+1) );
1478     for (spin=0; spin<=SpinP_switch; spin++) {
1479       Dos[ik][spin] = (float*)malloc(sizeof(float)*NUM_c);
1480       for (i=0; i<NUM_c; i++)  Dos[ik][spin][i] = 0.0;
1481     }
1482   }
1483 
1484   /***********************************
1485         set up operation flag
1486   ************************************/
1487 
1488   op_flag = (int**)malloc(sizeof(int*)*TRAN_dos_Kspace_grid2);
1489   for (i2=0; i2<TRAN_dos_Kspace_grid2; i2++){
1490     op_flag[i2] = (int*)malloc(sizeof(int)*TRAN_dos_Kspace_grid3);
1491     for (i3=0; i3<TRAN_dos_Kspace_grid3; i3++){
1492       op_flag[i2][i3] = -999;
1493     }
1494   }
1495 
1496   for (i2=0; i2<TRAN_dos_Kspace_grid2; i2++){
1497     for (i3=0; i3<TRAN_dos_Kspace_grid3; i3++){
1498 
1499       if (op_flag[i2][i3]<0){
1500 
1501 	if ( (TRAN_dos_Kspace_grid2-1-i2)==i2 && (TRAN_dos_Kspace_grid3-1-i3)==i3 ){
1502 	  op_flag[i2][i3] = 1;
1503 	}
1504 	else{
1505 	  op_flag[i2][i3] = 2;
1506 	  op_flag[TRAN_dos_Kspace_grid2-1-i2][TRAN_dos_Kspace_grid3-1-i3] = 0;
1507 	}
1508       }
1509 
1510     }
1511   }
1512 
1513   /***********************************
1514        one-dimentionalize for MPI
1515   ************************************/
1516 
1517   T_knum = 0;
1518   for (i2=0; i2<TRAN_dos_Kspace_grid2; i2++){
1519     for (i3=0; i3<TRAN_dos_Kspace_grid3; i3++){
1520       if (0<op_flag[i2][i3]) T_knum++;
1521     }
1522   }
1523 
1524   T_KGrids2 = (double*)malloc(sizeof(double)*T_knum);
1525   T_KGrids3 = (double*)malloc(sizeof(double)*T_knum);
1526   T_op_flag = (int*)malloc(sizeof(int)*T_knum);
1527   T_k_ID = (int*)malloc(sizeof(int)*T_knum);
1528 
1529   T_knum = 0;
1530 
1531   for (i2=0; i2<TRAN_dos_Kspace_grid2; i2++){
1532 
1533     k2 = -0.5 + (2.0*(double)i2+1.0)/(2.0*(double)TRAN_dos_Kspace_grid2) + Shift_K_Point;
1534 
1535     for (i3=0; i3<TRAN_dos_Kspace_grid3; i3++){
1536 
1537       k3 = -0.5 + (2.0*(double)i3+1.0)/(2.0*(double)TRAN_dos_Kspace_grid3) - Shift_K_Point;
1538 
1539       if (0<op_flag[i2][i3]){
1540 
1541         T_KGrids2[T_knum] = k2;
1542         T_KGrids3[T_knum] = k3;
1543         T_op_flag[T_knum] = op_flag[i2][i3];
1544 
1545         T_knum++;
1546       }
1547     }
1548   }
1549 
1550   /***************************************************
1551    allocate calculations of k-points into processors
1552   ***************************************************/
1553 
1554   if (numprocs0<T_knum){
1555 
1556     /* set parallel_mode */
1557     parallel_mode = 0;
1558 
1559     /* allocation of kloop to ID */
1560 
1561     for (ID=0; ID<numprocs0; ID++){
1562 
1563       tmp = (double)T_knum/(double)numprocs0;
1564       S_knum = (int)((double)ID*(tmp+1.0e-12));
1565       E_knum = (int)((double)(ID+1)*(tmp+1.0e-12)) - 1;
1566       if (ID==(numprocs0-1)) E_knum = T_knum - 1;
1567       if (E_knum<0)          E_knum = 0;
1568 
1569       for (k=S_knum; k<=E_knum; k++){
1570         /* ID in the first level world */
1571         T_k_ID[k] = ID;
1572       }
1573     }
1574 
1575     /* find own informations */
1576 
1577     tmp = (double)T_knum/(double)numprocs0;
1578     S_knum = (int)((double)myid0*(tmp+1.0e-12));
1579     E_knum = (int)((double)(myid0+1)*(tmp+1.0e-12)) - 1;
1580     if (myid0==(numprocs0-1)) E_knum = T_knum - 1;
1581     if (E_knum<0)             E_knum = 0;
1582 
1583     num_kloop0 = E_knum - S_knum + 1;
1584 
1585   }
1586 
1587   else {
1588 
1589     /* set parallel_mode */
1590     parallel_mode = 1;
1591     num_kloop0 = 1;
1592 
1593     Num_Comm_World1 = T_knum;
1594 
1595     NPROCS_ID1 = (int*)malloc(sizeof(int)*numprocs0);
1596     Comm_World1 = (int*)malloc(sizeof(int)*numprocs0);
1597     NPROCS_WD1 = (int*)malloc(sizeof(int)*Num_Comm_World1);
1598     Comm_World_StartID1 = (int*)malloc(sizeof(int)*Num_Comm_World1);
1599     MPI_CommWD1 = (MPI_Comm*)malloc(sizeof(MPI_Comm)*Num_Comm_World1);
1600 
1601     Make_Comm_Worlds(comm1, myid0, numprocs0, Num_Comm_World1, &myworld1, MPI_CommWD1,
1602 		     NPROCS_ID1, Comm_World1, NPROCS_WD1, Comm_World_StartID1);
1603 
1604     MPI_Comm_size(MPI_CommWD1[myworld1],&numprocs1);
1605     MPI_Comm_rank(MPI_CommWD1[myworld1],&myid1);
1606 
1607     S_knum = myworld1;
1608 
1609     /* allocate k-points into processors */
1610 
1611     for (k=0; k<T_knum; k++){
1612       /* ID in the first level world */
1613       T_k_ID[k] = Comm_World_StartID1[k];
1614     }
1615 
1616   }
1617 
1618   /*************************************************************
1619    one-dimensitonalize H and S and store them in a compact form
1620   *************************************************************/
1621 
1622   MP = (int*)malloc(sizeof(int)*(atomnum+1));
1623   order_GA = (int*)malloc(sizeof(int)*(atomnum+1));
1624 
1625   My_NZeros = (int*)malloc(sizeof(int)*numprocs0);
1626   SP_NZeros = (int*)malloc(sizeof(int)*numprocs0);
1627   SP_Atoms = (int*)malloc(sizeof(int)*numprocs0);
1628 
1629   size_H1 = Get_OneD_HS_Col(0, nh[0], &tmp, MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
1630 
1631   H1 = (double**)malloc(sizeof(double*)*(SpinP_switch+1));
1632   for (spin=0; spin<(SpinP_switch+1); spin++){
1633     H1[spin] = (double*)malloc(sizeof(double)*size_H1);
1634   }
1635 
1636   S1 = (double*)malloc(sizeof(double)*size_H1);
1637 
1638   for (spin=0; spin<(SpinP_switch+1); spin++){
1639     size_H1 = Get_OneD_HS_Col(1, nh[spin], H1[spin], MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
1640   }
1641 
1642   size_H1 = Get_OneD_HS_Col(1, CntOLP, S1, MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
1643 
1644   /***********************************************************
1645    start "kloop0"
1646   ***********************************************************/
1647 
1648   for (kloop0=0; kloop0<num_kloop0; kloop0++){
1649 
1650     kloop = S_knum + kloop0;
1651 
1652     k2 = T_KGrids2[kloop];
1653     k3 = T_KGrids3[kloop];
1654     k_op = T_op_flag[kloop];
1655 
1656     if (parallel_mode){
1657 
1658         TRAN_DFT_Kdependent_Col(MPI_CommWD1[myworld1],
1659   			    parallel_mode, numprocs1, myid1,
1660                             level_stdout, iter, SpinP_switch, k2, k3, k_op, order_GA,
1661                             H1, S1,
1662                             nh, ImNL, CntOLP,
1663 	  	  	    atomnum, Matomnum, WhatSpecies, Spe_Total_CNO, FNAN,
1664 			    natn, ncn, M2G, G2ID, atv_ijk, List_YOUSO, Dos, EDM, Eele0, Eele1);
1665     }
1666 
1667     else{
1668 
1669         TRAN_DFT_Kdependent_Col(comm1,
1670  			    parallel_mode, 1, 0,
1671                             level_stdout, iter, SpinP_switch, k2, k3, k_op, order_GA,
1672                             H1, S1,
1673                             nh, ImNL, CntOLP,
1674 	  	  	    atomnum, Matomnum, WhatSpecies, Spe_Total_CNO, FNAN,
1675 			    natn, ncn, M2G, G2ID, atv_ijk, List_YOUSO, Dos, EDM, Eele0, Eele1);
1676     }
1677   }
1678 
1679   /*******************************************************
1680           summ up Dos by MPI and send it Host_ID
1681   *******************************************************/
1682 
1683   MPI_Barrier(comm1);
1684 
1685   {
1686     float *Dos0;
1687 
1688     Dos0 = (float*)malloc(sizeof(float)*NUM_c);
1689 
1690     tmp = 1.0/(double)(TRAN_dos_Kspace_grid2*TRAN_dos_Kspace_grid3);
1691 
1692     for (ik=0; ik<tran_dos_energydiv; ik++) {
1693       for (spin=0; spin<=SpinP_switch; spin++) {
1694 
1695 	MPI_Reduce(&Dos[ik][spin][0], &Dos0[0], NUM_c, MPI_FLOAT, MPI_SUM, Host_ID, comm1);
1696         MPI_Barrier(comm1);
1697         for (i=0; i<NUM_c; i++)  Dos[ik][spin][i] = Dos0[i]*tmp;
1698       }
1699     }
1700 
1701     free(Dos0);
1702   }
1703 
1704   /**********************************************************
1705                      save Dos to a file
1706   **********************************************************/
1707 
1708   if (myid0==Host_ID){
1709 
1710     FILE *fp_eig, *fp_ev;
1711     char file_eig[YOUSO10],file_ev[YOUSO10];
1712     int l,MaxL;
1713     double *r_energy,de;
1714     int i_vec[10];
1715 
1716     r_energy = (double*)malloc(sizeof(double)*tran_dos_energydiv);
1717 
1718     de = (tran_dos_energyrange[1]-tran_dos_energyrange[0])/(double)tran_dos_energydiv;
1719     for (i=0; i<tran_dos_energydiv; i++) {
1720       r_energy[i] = tran_dos_energyrange[0] + de*(double)i + ChemP_e[0];
1721     }
1722 
1723     /* write *.Dos.val */
1724 
1725     sprintf(file_eig,"%s%s.Dos.val",filepath,filename);
1726 
1727     if ( (fp_eig=fopen(file_eig,"w"))==NULL ) {
1728       printf("can not open a file %s\n",file_eig);
1729     }
1730     else {
1731 
1732       printf("  write eigenvalues\n");
1733 
1734       fprintf(fp_eig,"mode        6\n");
1735       fprintf(fp_eig,"NonCol      0\n");
1736       fprintf(fp_eig,"N           %d\n",NUM_c);
1737       fprintf(fp_eig,"Nspin       %d\n",SpinP_switch);
1738       fprintf(fp_eig,"Erange      %lf %lf\n",tran_dos_energyrange[0],tran_dos_energyrange[1]);
1739       fprintf(fp_eig,"Kgrid       %d %d %d\n",1,1,1);
1740       fprintf(fp_eig,"atomnum     %d\n",atomnum);
1741       fprintf(fp_eig,"<WhatSpecies\n");
1742       for (i=1; i<=atomnum; i++) {
1743         fprintf(fp_eig,"%d ",WhatSpecies[i]);
1744       }
1745       fprintf(fp_eig,"\nWhatSpecies>\n");
1746       fprintf(fp_eig,"SpeciesNum     %d\n",SpeciesNum);
1747       fprintf(fp_eig,"<Spe_Total_CNO\n");
1748       for (i=0;i<SpeciesNum;i++) {
1749         fprintf(fp_eig,"%d ",Spe_Total_CNO[i]);
1750       }
1751       fprintf(fp_eig,"\nSpe_Total_CNO>\n");
1752       MaxL=4;
1753       fprintf(fp_eig,"MaxL           %d\n",4);
1754       fprintf(fp_eig,"<Spe_Num_CBasis\n");
1755       for (i=0;i<SpeciesNum;i++) {
1756         for (l=0;l<=MaxL;l++) {
1757 	  fprintf(fp_eig,"%d ",Spe_Num_CBasis[i][l]);
1758         }
1759         fprintf(fp_eig,"\n");
1760       }
1761       fprintf(fp_eig,"Spe_Num_CBasis>\n");
1762       fprintf(fp_eig,"ChemP       %lf\n",ChemP_e[0]);
1763 
1764       fprintf(fp_eig,"irange      %d %d\n",0,tran_dos_energydiv-1);
1765       fprintf(fp_eig,"<Eigenvalues\n");
1766       for (spin=0; spin<=SpinP_switch; spin++) {
1767         fprintf(fp_eig,"%d %d %d ",0,0,0);
1768         for (ik=0; ik<tran_dos_energydiv; ik++) {
1769           fprintf(fp_eig,"%lf ",r_energy[ik]);
1770 	}
1771         fprintf(fp_eig,"\n");
1772       }
1773       fprintf(fp_eig,"Eigenvalues>\n");
1774 
1775       fclose(fp_eig);
1776     }
1777 
1778     /* write *.Dos.vec */
1779 
1780     printf("  write eigenvectors\n");
1781 
1782     sprintf(file_ev,"%s%s.Dos.vec",filepath,filename);
1783 
1784     if ( (fp_ev=fopen(file_ev,"w"))==NULL ) {
1785       printf("can not open a file %s\n",file_ev);
1786     }
1787     else {
1788 
1789       for (spin=0; spin<=SpinP_switch; spin++) {
1790         for (ik=0; ik<tran_dos_energydiv; ik++) {
1791 
1792           i_vec[0]=i_vec[1]=i_vec[2]=0;
1793           if (myid0==Host_ID) fwrite(i_vec,sizeof(int),3,fp_ev);
1794 
1795           for (GA_AN=1; GA_AN<=atomnum; GA_AN++) {
1796 	    wanA = WhatSpecies[GA_AN];
1797 	    tnoA = Spe_Total_CNO[wanA];
1798             Anum = MP[GA_AN];
1799             fwrite(&Dos[ik][spin][Anum-1],sizeof(float),tnoA,fp_ev);
1800 	  }
1801 	}
1802       }
1803 
1804       fclose(fp_ev);
1805     }
1806 
1807     /* free arrays */
1808 
1809     free(r_energy);
1810   }
1811 
1812   /* free arrays */
1813 
1814   for (ik=0; ik<tran_dos_energydiv; ik++) {
1815     for (spin=0; spin<=SpinP_switch; spin++) {
1816       free(Dos[ik][spin]);
1817     }
1818     free(Dos[ik]);
1819   }
1820   free(Dos);
1821 
1822   for (i2=0; i2<TRAN_dos_Kspace_grid2; i2++){
1823     free(op_flag[i2]);
1824   }
1825   free(op_flag);
1826 
1827   free(T_KGrids2);
1828   free(T_KGrids3);
1829   free(T_op_flag);
1830   free(T_k_ID);
1831 
1832   if (T_knum<=numprocs0){
1833 
1834     if (Num_Comm_World1<=numprocs0){
1835       MPI_Comm_free(&MPI_CommWD1[myworld1]);
1836     }
1837 
1838     free(NPROCS_ID1);
1839     free(Comm_World1);
1840     free(NPROCS_WD1);
1841     free(Comm_World_StartID1);
1842     free(MPI_CommWD1);
1843   }
1844 
1845   free(MP);
1846   free(order_GA);
1847 
1848   free(My_NZeros);
1849   free(SP_NZeros);
1850   free(SP_Atoms);
1851 
1852   for (spin=0; spin<(SpinP_switch+1); spin++){
1853     free(H1[spin]);
1854   }
1855   free(H1);
1856 
1857   free(S1);
1858 
1859   /* for elapsed time */
1860   dtime(&TEtime);
1861 
1862   /*
1863   if (myid==Host_ID){
1864     printf("TRAN_DFT_Dosout time=%12.7f\n",TEtime - TStime);
1865   }
1866   */
1867 
1868   return TEtime - TStime;
1869 }
1870 
1871 
1872 /* revised by Y. Xiao for Noncollinear NEGF calculations */
TRAN_DFT_Dosout(MPI_Comm comm1,int level_stdout,int iter,int SpinP_switch,double ***** nh,double ***** ImNL,double **** CntOLP,int atomnum,int Matomnum,int * WhatSpecies,int * Spe_Total_CNO,int * FNAN,int ** natn,int ** ncn,int * M2G,int * G2ID,int ** atv_ijk,int * List_YOUSO,int ** Spe_Num_CBasis,int SpeciesNum,char * filename,char * filepath,double ***** CDM,double ***** EDM,double Eele0[2],double Eele1[2])1873 double TRAN_DFT_Dosout(
1874                 /* input */
1875                 MPI_Comm comm1,
1876                 int level_stdout,
1877                 int iter,
1878                 int SpinP_switch,
1879                 double *****nh,   /* H */
1880                 double *****ImNL, /* not used, s-o coupling */
1881                 double ****CntOLP,
1882                 int atomnum,
1883                 int Matomnum,
1884                 int *WhatSpecies,
1885                 int *Spe_Total_CNO,
1886                 int *FNAN,
1887                 int **natn,
1888                 int **ncn,
1889                 int *M2G,
1890                 int *G2ID,
1891                 int **atv_ijk,
1892                 int *List_YOUSO,
1893                 int **Spe_Num_CBasis,
1894                 int SpeciesNum,
1895                 char *filename,
1896                 char *filepath,
1897                 /* output */
1898                 double *****CDM,  /* not used */
1899                 double *****EDM,  /* not used */
1900                 double Eele0[2], double Eele1[2]) /* not used */
1901 {
1902   double TStime,TEtime,time5;
1903 
1904   dtime(&TStime);
1905 
1906      if ( SpinP_switch < 2 ) {
1907                 TRAN_DFT_Dosout_Col( comm1, level_stdout, iter, SpinP_switch, nh, ImNL, CntOLP,
1908                 atomnum, Matomnum, WhatSpecies, Spe_Total_CNO, FNAN, natn, ncn, M2G, G2ID, atv_ijk,
1909                 List_YOUSO, Spe_Num_CBasis, SpeciesNum, filename, filepath, CDM, EDM, Eele0, Eele1);
1910      } else {
1911 
1912                 TRAN_DFT_Dosout_NC( comm1, level_stdout, iter, SpinP_switch, nh, ImNL, CntOLP,
1913                 atomnum, Matomnum, WhatSpecies, Spe_Total_CNO, FNAN, natn, ncn, M2G, G2ID, atv_ijk,
1914                 List_YOUSO, Spe_Num_CBasis, SpeciesNum, filename, filepath, CDM, EDM, Eele0, Eele1);
1915 
1916      }
1917 
1918   dtime(&TEtime);
1919   return TEtime - TStime;
1920 }
1921 
1922