1 /**********************************************************************
2   TRAN_DFT.c:
3 
4   TRAN_DFT.c is a subroutine to perform self-consistent calculations
5   of a central region with left and right infinite leads based on
6   a non-equilibrium Green's function method.
7 
8   Log of TRAN_DFT.c:
9 
10      11/Dec/2005   Released by H.Kino
11      24/July/2008  Modified by T.Ozaki
12 
13 ***********************************************************************/
14 
15 #define MEASURE_TIME  0
16 
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <math.h>
20 #include <time.h>
21 #include <mpi.h>
22 #include <omp.h>
23 #include "tran_prototypes.h"
24 #include "tran_variables.h"
25 
26 void dtime(double *);
27 
28 void k_inversion(int i,  int j,  int k,
29                  int mi, int mj, int mk,
30                  int *ii, int *ij, int *ik );
31 
32 double Band_DFT_Col(int SCF_iter,
33                     int knum_i, int knum_j, int knum_k,
34 		    int SpinP_switch,
35 		    double *****nh,
36 		    double *****ImNL,
37 		    double ****CntOLP,
38 		    double *****CDM,
39 		    double *****EDM,
40 		    double Eele0[2],
41                     double Eele1[2],
42 		    int *MP,
43 		    int *order_GA,
44 		    double *ko,
45 		    double *koS,
46 		    double ***EIGEN,
47 		    double *H1,
48 		    double *S1,
49 		    double *CDM1,
50 		    double *EDM1,
51 		    dcomplex **H,
52 		    dcomplex **S,
53 		    dcomplex **C,
54                     dcomplex *BLAS_S,
55 		    int ***k_op,
56 		    int *T_k_op,
57 		    int **T_k_ID,
58 		    double *T_KGrids1,
59 		    double *T_KGrids2,
60 		    double *T_KGrids3,
61                     int myworld1,
62 		    int *NPROCS_ID1,
63 		    int *Comm_World1,
64 		    int *NPROCS_WD1,
65 		    int *Comm_World_StartID1,
66 		    MPI_Comm *MPI_CommWD1,
67                     int myworld2,
68 		    int *NPROCS_ID2,
69 		    int *NPROCS_WD2,
70 		    int *Comm_World2,
71 		    int *Comm_World_StartID2,
72 		    MPI_Comm *MPI_CommWD2);
73 
74 void Make_Comm_Worlds(
75    MPI_Comm MPI_Curret_Comm_WD,
76    int myid0,
77    int numprocs0,
78    int Num_Comm_World,
79    int *myworld1,
80    MPI_Comm *MPI_CommWD,     /* size: Num_Comm_World */
81    int *NPROCS1_ID,          /* size: numprocs0 */
82    int *Comm_World1,         /* size: numprocs0 */
83    int *NPROCS1_WD,          /* size: Num_Comm_World */
84    int *Comm_World_StartID   /* size: Num_Comm_World */
85    );
86 
87 
88 
89 int Get_OneD_HS_Col(int set_flag, double ****RH, double *H1, int *MP,
90                     int *order_GA, int *My_NZeros, int *is1, int *is2);
91 
92 static void TRAN_Add_MAT(
93     int mode,
94     int NUM_c,
95     dcomplex w_weight,
96     dcomplex *v,
97     dcomplex *out
98     );
99 
100 static double TRAN_DFT_Original(
101 		/* input */
102                 MPI_Comm comm1,
103                 int level_stdout,
104 		int iter,
105 		int SpinP_switch,
106 		double *****nh,  /* H */
107 		double *****ImNL, /* not used, s-o coupling */
108 		double ****CntOLP,
109 		int atomnum,
110 		int Matomnum,
111 		int *WhatSpecies,
112 		int *Spe_Total_CNO,
113 		int *FNAN,
114 		int **natn,
115 		int **ncn,
116 		int *M2G,
117 		int *G2ID,
118                 int *F_G2M,
119 		int **atv_ijk,
120 		int *List_YOUSO,
121 		/* output */
122 		double *****CDM,  /* output, density matrix */
123 		double *****EDM,  /* not used */
124                 double ***TRAN_DecMulP, /* output, partial DecMulP */
125 		double Eele0[2], double Eele1[2],
126                 double ChemP_e0[2]);
127 
128 
129 static void TRAN_DFT_Kdependent(
130 			  /* input */
131 			  MPI_Comm comm1,
132                           int parallel_mode,
133                           int numprocs,
134                           int myid,
135 			  int level_stdout,
136 			  int iter,
137 			  int SpinP_switch,
138                           double k2,
139                           double k3,
140                           int k_op,
141                           int *order_GA,
142                           double **DM1,
143                           double **H1,
144                           double *S1,
145 			  double *****nh,  /* H */
146 			  double *****ImNL, /* not used, s-o coupling */
147 			  double ****CntOLP,
148 			  int atomnum,
149 			  int Matomnum,
150 			  int *WhatSpecies,
151 			  int *Spe_Total_CNO,
152 			  int *FNAN,
153 			  int **natn,
154 			  int **ncn,
155 			  int *M2G,
156 			  int *G2ID,
157 			  int **atv_ijk,
158 			  int *List_YOUSO,
159 			  /* output */
160 			  double *****CDM,  /* output, charge density */
161 			  double *****EDM,  /* not used */
162 			  double Eele0[2], double Eele1[2]); /* not used */
163 
164 
165 
TRAN_DFT(MPI_Comm comm1,int SucceedReadingDMfile,int level_stdout,int iter,int SpinP_switch,double ***** nh,double ***** ImNL,double **** CntOLP,int atomnum,int Matomnum,int * WhatSpecies,int * Spe_Total_CNO,int * FNAN,int ** natn,int ** ncn,int * M2G,int * G2ID,int * F_G2M,int ** atv_ijk,int * List_YOUSO,double ***** CDM,double ***** EDM,double *** TRAN_DecMulP,double Eele0[2],double Eele1[2],double ChemP_e0[2])166 double TRAN_DFT(
167 		/* input */
168                 MPI_Comm comm1,
169                 int SucceedReadingDMfile,
170                 int level_stdout,
171 		int iter,
172 		int SpinP_switch,
173 		double *****nh,  /* H */
174 		double *****ImNL, /* not used, s-o coupling */
175 		double ****CntOLP,
176 		int atomnum,
177 		int Matomnum,
178 		int *WhatSpecies,
179 		int *Spe_Total_CNO,
180 		int *FNAN,
181 		int **natn,
182 		int **ncn,
183 		int *M2G,
184 		int *G2ID,
185                 int *F_G2M,
186 		int **atv_ijk,
187 		int *List_YOUSO,
188 		/* output */
189 		double *****CDM,  /* output, density matrix */
190 		double *****EDM,  /* not used */
191                 double ***TRAN_DecMulP, /* output, partial DecMulP */
192 		double Eele0[2], double Eele1[2],
193                 double ChemP_e0[2])
194 {
195   double TStime,TEtime,time5;
196 
197   dtime(&TStime);
198 
199   /******************************************************
200     if TRAN_Iter_Initial_Band<SCF_iter, employ TRAN_DFT
201   ******************************************************/
202 
203   if (TRAN_SCF_Iter_Band<iter || SucceedReadingDMfile==1){
204 
205     TRAN_DFT_Original( comm1,
206 		       level_stdout,
207 		       iter,
208 		       SpinP_switch,
209 		       nh,   /* H */
210 		       ImNL, /* not used, s-o coupling */
211 		       CntOLP,
212 		       atomnum,
213 		       Matomnum,
214 		       WhatSpecies,
215 		       Spe_Total_CNO,
216 		       FNAN,
217 		       natn,
218 		       ncn,
219 		       M2G,
220 		       G2ID,
221 		       F_G2M,
222 		       atv_ijk,
223 		       List_YOUSO,
224 		       /* output */
225 		       CDM,  /* output, density matrix */
226 		       EDM,  /* not used */
227 		       TRAN_DecMulP, /* output, partial DecMulP */
228 		       Eele0, Eele1,
229 		       ChemP_e0);
230   }
231 
232   /*************************************************
233     if SCF_iter<=TRAN_SCF_Iter_Band,
234     employ the band diagonalization
235   *************************************************/
236 
237   else {
238 
239     int i,j,k,n,n2,wanA;
240     int ii,ij,ik,T_knum,size_H1;
241     int Kspace_grid1,Kspace_grid2,Kspace_grid3;
242     int *MP;
243     int *order_GA;
244     int *My_NZeros;
245     int *SP_NZeros;
246     int *SP_Atoms;
247     double *ko;
248     double *koS;
249     double tmp;
250     dcomplex **H_Band_Col;
251     dcomplex **S_Band;
252     dcomplex **C_Band_Col;
253     dcomplex *BLAS_S;
254     double *H1_Band_Col;
255     double *S1_Band_Col;
256     double *CDM1_Band_Col;
257     double *EDM1_Band_Col;
258     int ***k_op;
259     int *T_k_op;
260     int **T_k_ID;
261     double *T_KGrids1,*T_KGrids2,*T_KGrids3;
262     double ***EIGEN_Band_Col;
263 
264     int numprocs0,myid0;
265     int numprocs1,myid1;
266 
267     int Num_Comm_World1;
268     int Num_Comm_World2;
269 
270     int myworld1;
271     int myworld2;
272 
273     int *NPROCS_ID1;
274     int *Comm_World1;
275     int *NPROCS_WD1;
276     int *Comm_World_StartID1;
277     MPI_Comm *MPI_CommWD1;
278 
279     int *NPROCS_ID2;
280     int *NPROCS_WD2;
281     int *Comm_World2;
282     int *Comm_World_StartID2;
283     MPI_Comm *MPI_CommWD2;
284 
285     /* MPI */
286     MPI_Comm_size(comm1,&numprocs0);
287     MPI_Comm_rank(comm1,&myid0);
288 
289     Kspace_grid1 = 1;
290     Kspace_grid2 = TRAN_Kspace_grid2;
291     Kspace_grid3 = TRAN_Kspace_grid3;
292 
293     n = 0;
294     for (i=1; i<=atomnum; i++){
295       wanA = WhatSpecies[i];
296       n += Spe_Total_CNO[wanA];
297     }
298     n2 = n + 2;
299 
300     MP = (int*)malloc(sizeof(int)*List_YOUSO[1]);
301     order_GA = (int*)malloc(sizeof(int)*(List_YOUSO[1]+1));
302 
303     My_NZeros = (int*)malloc(sizeof(int)*numprocs0);
304     SP_NZeros = (int*)malloc(sizeof(int)*numprocs0);
305     SP_Atoms = (int*)malloc(sizeof(int)*numprocs0);
306 
307     ko = (double*)malloc(sizeof(double)*(n+1));
308     koS = (double*)malloc(sizeof(double)*(n+1));
309 
310     H_Band_Col = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
311     for (j=0; j<n+1; j++){
312       H_Band_Col[j] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
313     }
314 
315     S_Band = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
316     for (i=0; i<n+1; i++){
317       S_Band[i] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
318     }
319 
320     C_Band_Col = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
321     for (j=0; j<n+1; j++){
322       C_Band_Col[j] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
323     }
324 
325     BLAS_S = (dcomplex*)malloc(sizeof(dcomplex)*n*n);
326 
327     /* find size_H1 */
328 
329     size_H1 = Get_OneD_HS_Col(0, nh[0], &tmp, MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
330 
331     H1_Band_Col   = (double*)malloc(sizeof(double)*size_H1);
332     S1_Band_Col   = (double*)malloc(sizeof(double)*size_H1);
333     CDM1_Band_Col = (double*)malloc(sizeof(double)*size_H1);
334     EDM1_Band_Col = (double*)malloc(sizeof(double)*size_H1);
335 
336     k_op = (int***)malloc(sizeof(int**)*Kspace_grid1);
337     for (i=0;i<Kspace_grid1; i++) {
338       k_op[i] = (int**)malloc(sizeof(int*)*Kspace_grid2);
339       for (j=0;j<Kspace_grid2; j++) {
340 	k_op[i][j] = (int*)malloc(sizeof(int)*Kspace_grid3);
341       }
342     }
343 
344     for (i=0; i<Kspace_grid1; i++) {
345       for (j=0; j<Kspace_grid2; j++) {
346 	for (k=0; k<Kspace_grid3; k++) {
347 	  k_op[i][j][k] = -999;
348 	}
349       }
350     }
351 
352     for (i=0; i<Kspace_grid1; i++) {
353       for (j=0; j<Kspace_grid2; j++) {
354 	for (k=0; k<Kspace_grid3; k++) {
355 
356 	  if ( k_op[i][j][k]==-999 ) {
357 	    k_inversion(i,j,k,Kspace_grid1,Kspace_grid2,Kspace_grid3,&ii,&ij,&ik);
358 	    if ( i==ii && j==ij && k==ik ) {
359 	      k_op[i][j][k]    = 1;
360 	    }
361 
362 	    else {
363 	      k_op[i][j][k]    = 2;
364 	      k_op[ii][ij][ik] = 0;
365 	    }
366 	  }
367 	} /* k */
368       } /* j */
369     } /* i */
370 
371     /* find T_knum */
372 
373     T_knum = 0;
374     for (i=0; i<Kspace_grid1; i++) {
375       for (j=0; j<Kspace_grid2; j++) {
376 	for (k=0; k<Kspace_grid3; k++) {
377 	  if (0<k_op[i][j][k]){
378 	    T_knum++;
379 	  }
380 	}
381       }
382     }
383 
384     T_KGrids1 = (double*)malloc(sizeof(double)*T_knum);
385     T_KGrids2 = (double*)malloc(sizeof(double)*T_knum);
386     T_KGrids3 = (double*)malloc(sizeof(double)*T_knum);
387     T_k_op    = (int*)malloc(sizeof(int)*T_knum);
388 
389     T_k_ID    = (int**)malloc(sizeof(int*)*2);
390     for (i=0; i<2; i++){
391       T_k_ID[i] = (int*)malloc(sizeof(int)*T_knum);
392     }
393 
394     EIGEN_Band_Col  = (double***)malloc(sizeof(double**)*List_YOUSO[23]);
395     for (i=0; i<List_YOUSO[23]; i++){
396       EIGEN_Band_Col[i] = (double**)malloc(sizeof(double*)*T_knum);
397       for (j=0; j<T_knum; j++){
398 	EIGEN_Band_Col[i][j] = (double*)malloc(sizeof(double)*(n+1));
399 	for (k=0; k<(n+1); k++) EIGEN_Band_Col[i][j][k] = 1.0e+5;
400       }
401     }
402 
403     /***********************************************
404       allocation of arrays for the first world
405       and
406       make the first level worlds
407     ***********************************************/
408 
409     Num_Comm_World1 = SpinP_switch + 1;
410 
411     NPROCS_ID1 = (int*)malloc(sizeof(int)*numprocs0);
412     Comm_World1 = (int*)malloc(sizeof(int)*numprocs0);
413     NPROCS_WD1 = (int*)malloc(sizeof(int)*Num_Comm_World1);
414     Comm_World_StartID1 = (int*)malloc(sizeof(int)*Num_Comm_World1);
415     MPI_CommWD1 = (MPI_Comm*)malloc(sizeof(MPI_Comm)*Num_Comm_World1);
416 
417     Make_Comm_Worlds(comm1, myid0, numprocs0, Num_Comm_World1, &myworld1, MPI_CommWD1,
418 		     NPROCS_ID1, Comm_World1, NPROCS_WD1, Comm_World_StartID1);
419 
420     MPI_Comm_size(MPI_CommWD1[myworld1],&numprocs1);
421     MPI_Comm_rank(MPI_CommWD1[myworld1],&myid1);
422 
423     /***********************************************
424         allocation of arrays for the second world
425         and
426         make the second level worlds
427     ***********************************************/
428 
429     if (T_knum<=numprocs1){
430 
431       Num_Comm_World2 = T_knum;
432 
433       NPROCS_ID2 = (int*)malloc(sizeof(int)*numprocs1);
434       Comm_World2 = (int*)malloc(sizeof(int)*numprocs1);
435       NPROCS_WD2 = (int*)malloc(sizeof(int)*Num_Comm_World2);
436       Comm_World_StartID2 = (int*)malloc(sizeof(int)*Num_Comm_World2);
437       MPI_CommWD2 = (MPI_Comm*)malloc(sizeof(MPI_Comm)*Num_Comm_World2);
438 
439       Make_Comm_Worlds(MPI_CommWD1[myworld1], myid1, numprocs1, Num_Comm_World2, &myworld2, MPI_CommWD2,
440 		       NPROCS_ID2, Comm_World2, NPROCS_WD2, Comm_World_StartID2);
441     }
442 
443     /***********************************************
444                      call Band_DFT_Col
445      ***********************************************/
446 
447     time5 = Band_DFT_Col( 1,
448 			  Kspace_grid1,
449                           Kspace_grid2,
450                           Kspace_grid3,
451 			  SpinP_switch,
452 			  nh,
453 			  ImNL,
454 			  CntOLP,
455 			  CDM,
456 			  EDM,
457 			  Eele0,
458 			  Eele1,
459 			  MP,order_GA,ko,koS,EIGEN_Band_Col,
460 			  H1_Band_Col,S1_Band_Col,
461 			  CDM1_Band_Col,EDM1_Band_Col,
462 			  H_Band_Col,S_Band,C_Band_Col,
463 			  BLAS_S,
464 			  k_op,T_k_op,T_k_ID,
465 			  T_KGrids1,T_KGrids2,T_KGrids3,
466 			  myworld1,
467 			  NPROCS_ID1,
468 			  Comm_World1,
469 			  NPROCS_WD1,
470 			  Comm_World_StartID1,
471 			  MPI_CommWD1,
472 			  myworld2,
473 			  NPROCS_ID2,
474 			  NPROCS_WD2,
475 			  Comm_World2,
476 			  Comm_World_StartID2,
477 			  MPI_CommWD2);
478 
479     Eele0[0] = 0.0;
480     Eele0[1] = 0.0;
481 
482     Eele1[0] = 0.0;
483     Eele1[1] = 0.0;
484 
485     /***********************************************
486        freeing of arrays
487     ***********************************************/
488 
489     n = 0;
490     for (i=1; i<=atomnum; i++){
491       wanA  = WhatSpecies[i];
492       n += Spe_Total_CNO[wanA];
493     }
494     n2 = n + 2;
495 
496     free(MP);
497     free(order_GA);
498 
499     free(My_NZeros);
500     free(SP_NZeros);
501     free(SP_Atoms);
502 
503     free(ko);
504     free(koS);
505 
506     for (j=0; j<n+1; j++){
507       free(H_Band_Col[j]);
508     }
509     free(H_Band_Col);
510 
511     for (i=0; i<n+1; i++){
512       free(S_Band[i]);
513     }
514     free(S_Band);
515 
516     for (j=0; j<n+1; j++){
517       free(C_Band_Col[j]);
518     }
519     free(C_Band_Col);
520 
521     free(BLAS_S);
522 
523     free(H1_Band_Col);
524     free(S1_Band_Col);
525     free(CDM1_Band_Col);
526     free(EDM1_Band_Col);
527 
528     for (i=0; i<Kspace_grid1; i++) {
529       for (j=0; j<Kspace_grid2; j++) {
530 	free(k_op[i][j]);
531       }
532       free(k_op[i]);
533     }
534     free(k_op);
535 
536     free(T_KGrids1);
537     free(T_KGrids2);
538     free(T_KGrids3);
539     free(T_k_op);
540 
541     for (i=0; i<2; i++){
542       free(T_k_ID[i]);
543     }
544     free(T_k_ID);
545 
546     for (i=0; i<List_YOUSO[23]; i++){
547       for (j=0; j<T_knum; j++){
548 	free(EIGEN_Band_Col[i][j]);
549       }
550       free(EIGEN_Band_Col[i]);
551     }
552     free(EIGEN_Band_Col);
553 
554     /* freeing of arrays for the second world */
555 
556     if (T_knum<=numprocs1){
557 
558       if (Num_Comm_World2<=numprocs1){
559         MPI_Comm_free(&MPI_CommWD2[myworld2]);
560       }
561 
562       free(MPI_CommWD2);
563       free(Comm_World_StartID2);
564       free(NPROCS_WD2);
565       free(Comm_World2);
566       free(NPROCS_ID2);
567     }
568 
569     /* freeing of arrays for the first world */
570 
571     if (Num_Comm_World1<=numprocs0){
572       MPI_Comm_free(&MPI_CommWD1[myworld1]);
573     }
574 
575     free(MPI_CommWD1);
576     free(Comm_World_StartID1);
577     free(NPROCS_WD1);
578     free(Comm_World1);
579     free(NPROCS_ID1);
580 
581   } /* else */
582 
583   /* for elapsed time */
584   dtime(&TEtime);
585   return TEtime - TStime;
586 
587 }
588 
589 
590 
591 
592 
TRAN_DFT_Original(MPI_Comm comm1,int level_stdout,int iter,int SpinP_switch,double ***** nh,double ***** ImNL,double **** CntOLP,int atomnum,int Matomnum,int * WhatSpecies,int * Spe_Total_CNO,int * FNAN,int ** natn,int ** ncn,int * M2G,int * G2ID,int * F_G2M,int ** atv_ijk,int * List_YOUSO,double ***** CDM,double ***** EDM,double *** TRAN_DecMulP,double Eele0[2],double Eele1[2],double ChemP_e0[2])593 double TRAN_DFT_Original(
594 		/* input */
595                 MPI_Comm comm1,
596                 int level_stdout,
597 		int iter,
598 		int SpinP_switch,
599 		double *****nh,  /* H */
600 		double *****ImNL, /* not used, s-o coupling */
601 		double ****CntOLP,
602 		int atomnum,
603 		int Matomnum,
604 		int *WhatSpecies,
605 		int *Spe_Total_CNO,
606 		int *FNAN,
607 		int **natn,
608 		int **ncn,
609 		int *M2G,
610 		int *G2ID,
611                 int *F_G2M,
612 		int **atv_ijk,
613 		int *List_YOUSO,
614 		/* output */
615 		double *****CDM,  /* output, density matrix */
616 		double *****EDM,  /* not used */
617                 double ***TRAN_DecMulP, /* output, partial DecMulP */
618 		double Eele0[2], double Eele1[2],
619                 double ChemP_e0[2])
620 {
621   int numprocs0,myid0,ID;
622   int i2,i3,k_op,l1,l2,l3,RnB;
623   int k,E_knum,S_knum,T_knum,num_kloop0;
624   int parallel_mode,kloop,kloop0;
625   int i,j,spin,MA_AN,GA_AN,wanA,tnoA;
626   int LB_AN,GB_AN,wanB,tnoB;
627   int **op_flag,*T_op_flag,*T_k_ID;
628   double *T_KGrids2,*T_KGrids3;
629   double k2,k3,tmp;
630   double TStime,TEtime;
631 
632   int *MP;
633   int *order_GA;
634   int *My_NZeros;
635   int *SP_NZeros;
636   int *SP_Atoms;
637 
638   int size_H1;
639   int myworld1;
640   int numprocs1,myid1;
641   int Num_Comm_World1;
642   int *NPROCS_ID1;
643   int *Comm_World1;
644   int *NPROCS_WD1;
645   int *Comm_World_StartID1;
646   MPI_Comm *MPI_CommWD1;
647 
648   double **DM1,*TDM1;
649   double **H1,*S1;
650 
651   MPI_Comm_size(comm1,&numprocs0);
652   MPI_Comm_rank(comm1,&myid0);
653 
654   dtime(&TStime);
655 
656   if (myid0==Host_ID){
657     printf("<TRAN_DFT>\n");
658   }
659 
660   /***************************************
661     ChemP_e0 will be used in outputfile1
662   ***************************************/
663 
664   ChemP_e0[0] = ChemP_e[0];
665   ChemP_e0[1] = ChemP_e[1];
666 
667   /***********************************
668             initialize CDM
669   ************************************/
670 
671   for (spin=0; spin<=SpinP_switch; spin++) {
672     for (MA_AN=1; MA_AN<=Matomnum; MA_AN++) {
673       GA_AN = M2G[MA_AN];
674       wanA = WhatSpecies[GA_AN];
675       tnoA = Spe_Total_CNO[wanA];
676       for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
677 	GB_AN = natn[GA_AN][LB_AN];
678 	wanB = WhatSpecies[GB_AN];
679 	tnoB = Spe_Total_CNO[wanB];
680 	for (i=0; i<tnoA; i++){
681 	  for (j=0; j<tnoB; j++){
682 	    CDM[spin][MA_AN][LB_AN][i][j] = 0.0;
683 	  }
684 	}
685       }
686     }
687   }
688 
689   /***********************************
690         set up operation flag
691   ************************************/
692 
693   op_flag = (int**)malloc(sizeof(int*)*TRAN_Kspace_grid2);
694   for (i2=0; i2<TRAN_Kspace_grid2; i2++){
695     op_flag[i2] = (int*)malloc(sizeof(int)*TRAN_Kspace_grid3);
696     for (i3=0; i3<TRAN_Kspace_grid3; i3++){
697       op_flag[i2][i3] = -999;
698     }
699   }
700 
701   for (i2=0; i2<TRAN_Kspace_grid2; i2++){
702     for (i3=0; i3<TRAN_Kspace_grid3; i3++){
703 
704       if (op_flag[i2][i3]<0){
705 
706 	if ( (TRAN_Kspace_grid2-1-i2)==i2 && (TRAN_Kspace_grid3-1-i3)==i3 ){
707 	  op_flag[i2][i3] = 1;
708 	}
709 	else{
710 	  op_flag[i2][i3] = 2;
711 	  op_flag[TRAN_Kspace_grid2-1-i2][TRAN_Kspace_grid3-1-i3] = 0;
712 	}
713       }
714 
715     }
716   }
717 
718   /***********************************
719        one-dimentionalize for MPI
720   ************************************/
721 
722   T_knum = 0;
723   for (i2=0; i2<TRAN_Kspace_grid2; i2++){
724     for (i3=0; i3<TRAN_Kspace_grid3; i3++){
725       if (0<op_flag[i2][i3]) T_knum++;
726     }
727   }
728 
729   T_KGrids2 = (double*)malloc(sizeof(double)*T_knum);
730   T_KGrids3 = (double*)malloc(sizeof(double)*T_knum);
731   T_op_flag = (int*)malloc(sizeof(int)*T_knum);
732   T_k_ID = (int*)malloc(sizeof(int)*T_knum);
733 
734   T_knum = 0;
735 
736   for (i2=0; i2<TRAN_Kspace_grid2; i2++){
737 
738     k2 = -0.5 + (2.0*(double)i2+1.0)/(2.0*(double)TRAN_Kspace_grid2) + Shift_K_Point;
739 
740     for (i3=0; i3<TRAN_Kspace_grid3; i3++){
741 
742       k3 = -0.5 + (2.0*(double)i3+1.0)/(2.0*(double)TRAN_Kspace_grid3) - Shift_K_Point;
743 
744       if (0<op_flag[i2][i3]){
745 
746         T_KGrids2[T_knum] = k2;
747         T_KGrids3[T_knum] = k3;
748         T_op_flag[T_knum] = op_flag[i2][i3];
749 
750         T_knum++;
751       }
752     }
753   }
754 
755   /***************************************
756     print out
757   ***************************************/
758 
759   if (myid0==Host_ID){
760 
761     printf(" KGrids2: ");fflush(stdout);
762 
763     for (i=0;i<TRAN_Kspace_grid2;i++){
764       if (TRAN_Kspace_grid2==1)  k2 = 0.0;
765       else                       k2 = -0.5 + (2.0*(double)i+1.0)/(2.0*(double)TRAN_Kspace_grid2) + Shift_K_Point;
766       printf("%9.5f ",k2);fflush(stdout);
767     }
768     printf("\n");fflush(stdout);
769 
770     printf(" KGrids3: ");fflush(stdout);
771     for (i=0;i<TRAN_Kspace_grid3;i++){
772       if (TRAN_Kspace_grid3==1)  k3 = 0.0;
773       else                       k3 = -0.5 + (2.0*(double)i+1.0)/(2.0*(double)TRAN_Kspace_grid3) - Shift_K_Point;
774       printf("%9.5f ",k3);fflush(stdout);
775     }
776     printf("\n");fflush(stdout);
777   }
778 
779   /***************************************************
780    allocate calculations of k-points into processors
781   ***************************************************/
782 
783   if (numprocs0<T_knum){
784 
785     /* set parallel_mode */
786     parallel_mode = 0;
787 
788     /* allocation of kloop to ID */
789 
790     for (ID=0; ID<numprocs0; ID++){
791       tmp = (double)T_knum/(double)numprocs0;
792       S_knum = (int)((double)ID*(tmp+1.0e-12));
793       E_knum = (int)((double)(ID+1)*(tmp+1.0e-12)) - 1;
794       if (ID==(numprocs0-1)) E_knum = T_knum - 1;
795       if (E_knum<0)          E_knum = 0;
796 
797       for (k=S_knum; k<=E_knum; k++){
798         /* ID in the first level world */
799         T_k_ID[k] = ID;
800       }
801     }
802 
803     /* find own informations */
804 
805     tmp = (double)T_knum/(double)numprocs0;
806     S_knum = (int)((double)myid0*(tmp+1.0e-12));
807     E_knum = (int)((double)(myid0+1)*(tmp+1.0e-12)) - 1;
808     if (myid0==(numprocs0-1)) E_knum = T_knum - 1;
809     if (E_knum<0)             E_knum = 0;
810 
811     num_kloop0 = E_knum - S_knum + 1;
812 
813   }
814 
815   else {
816 
817     /* set parallel_mode */
818     parallel_mode = 1;
819     num_kloop0 = 1;
820 
821     Num_Comm_World1 = T_knum;
822 
823     NPROCS_ID1 = (int*)malloc(sizeof(int)*numprocs0);
824     Comm_World1 = (int*)malloc(sizeof(int)*numprocs0);
825     NPROCS_WD1 = (int*)malloc(sizeof(int)*Num_Comm_World1);
826     Comm_World_StartID1 = (int*)malloc(sizeof(int)*Num_Comm_World1);
827     MPI_CommWD1 = (MPI_Comm*)malloc(sizeof(MPI_Comm)*Num_Comm_World1);
828 
829     Make_Comm_Worlds(comm1, myid0, numprocs0, Num_Comm_World1, &myworld1, MPI_CommWD1,
830 		     NPROCS_ID1, Comm_World1, NPROCS_WD1, Comm_World_StartID1);
831 
832     MPI_Comm_size(MPI_CommWD1[myworld1],&numprocs1);
833     MPI_Comm_rank(MPI_CommWD1[myworld1],&myid1);
834 
835     S_knum = myworld1;
836 
837     /* allocate k-points into processors */
838 
839     for (k=0; k<T_knum; k++){
840       /* ID in the first level world */
841       T_k_ID[k] = Comm_World_StartID1[k];
842     }
843 
844   }
845 
846   /*************************************************************
847    one-dimensitonalize H and S and store them in a compact form
848   *************************************************************/
849 
850   MP = (int*)malloc(sizeof(int)*(atomnum+1));
851   order_GA = (int*)malloc(sizeof(int)*(atomnum+1));
852 
853   My_NZeros = (int*)malloc(sizeof(int)*numprocs0);
854   SP_NZeros = (int*)malloc(sizeof(int)*numprocs0);
855   SP_Atoms = (int*)malloc(sizeof(int)*numprocs0);
856 
857   size_H1 = Get_OneD_HS_Col(0, nh[0], &tmp, MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
858 
859   DM1 = (double**)malloc(sizeof(double*)*(SpinP_switch+1));
860   for (k=0; k<(SpinP_switch+1); k++){
861     DM1[k] = (double*)malloc(sizeof(double)*size_H1);
862     for (i=0; i<size_H1; i++) DM1[k][i] = 0.0;
863   }
864 
865   TDM1 = (double*)malloc(sizeof(double)*size_H1);
866 
867   H1 = (double**)malloc(sizeof(double*)*(SpinP_switch+1));
868   for (k=0; k<(SpinP_switch+1); k++){
869     H1[k] = (double*)malloc(sizeof(double)*size_H1);
870   }
871 
872   S1 = (double*)malloc(sizeof(double)*size_H1);
873 
874   for (k=0; k<(SpinP_switch+1); k++){
875     size_H1 = Get_OneD_HS_Col(1, nh[k], H1[k], MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
876   }
877 
878   size_H1 = Get_OneD_HS_Col(1, CntOLP, S1, MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
879 
880   /***********************************************************
881    start "kloop0"
882   ***********************************************************/
883 
884   for (kloop0=0; kloop0<num_kloop0; kloop0++){
885 
886     kloop = S_knum + kloop0;
887 
888     k2 = T_KGrids2[kloop];
889     k3 = T_KGrids3[kloop];
890     k_op = T_op_flag[kloop];
891 
892     if (parallel_mode){
893 
894       TRAN_DFT_Kdependent(MPI_CommWD1[myworld1],
895 			  parallel_mode, numprocs1, myid1,
896 			  level_stdout, iter, SpinP_switch, k2, k3, k_op, order_GA,
897                           DM1,H1,S1,
898                           nh, ImNL, CntOLP,
899 			  atomnum, Matomnum, WhatSpecies, Spe_Total_CNO, FNAN,
900 			  natn, ncn, M2G, G2ID, atv_ijk, List_YOUSO, CDM, EDM, Eele0, Eele1);
901     }
902     else{
903 
904       TRAN_DFT_Kdependent(comm1,
905 			  parallel_mode, 1, 0,
906 			  level_stdout, iter, SpinP_switch, k2, k3, k_op, order_GA,
907                           DM1,H1,S1,
908                           nh, ImNL, CntOLP,
909 			  atomnum, Matomnum, WhatSpecies, Spe_Total_CNO, FNAN,
910 			  natn, ncn, M2G, G2ID, atv_ijk, List_YOUSO, CDM, EDM, Eele0, Eele1);
911     }
912 
913   } /* kloop0 */
914 
915   /* MPI communication of DM1 */
916 
917   tmp = 1.0/(double)(TRAN_Kspace_grid2*TRAN_Kspace_grid3);
918 
919   for (k=0; k<=SpinP_switch; k++) {
920 
921     int itot0,Anum,Bnum;
922     double co,si,kRn;
923 
924     MPI_Allreduce( DM1[k], TDM1, size_H1, MPI_DOUBLE, MPI_SUM, comm1);
925 
926     itot0 = 0;
927 
928     for (GA_AN=1; GA_AN<=atomnum; GA_AN++) {
929 
930       wanA = WhatSpecies[GA_AN];
931       tnoA = Spe_Total_CNO[wanA];
932       Anum = MP[GA_AN];
933       MA_AN = F_G2M[GA_AN];
934 
935       for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
936 
937 	GB_AN = natn[GA_AN][LB_AN];
938 	RnB = ncn[GA_AN][LB_AN];
939 	wanB = WhatSpecies[GB_AN];
940 	tnoB = Spe_Total_CNO[wanB];
941 
942 	for (i=0;i<tnoA;i++) {
943 	  for (j=0;j<tnoB;j++) {
944 
945 	    if (1<=MA_AN && MA_AN<=Matomnum){
946               CDM[k][MA_AN][LB_AN][i][j] = TDM1[itot0]*tmp;
947 	    }
948 
949             itot0++;
950 
951 	  }
952 	}
953       }
954     }
955 
956   } /* k */
957 
958   /***********************************************************
959            overwrite CMD with l1=-1 or l1=1 by zero
960   ***********************************************************/
961 
962   for (spin=0; spin<=SpinP_switch; spin++) {
963     for (MA_AN=1; MA_AN<=Matomnum; MA_AN++) {
964 
965       GA_AN = M2G[MA_AN];
966       wanA = WhatSpecies[GA_AN];
967       tnoA = Spe_Total_CNO[wanA];
968 
969       for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
970 
971 	GB_AN = natn[GA_AN][LB_AN];
972         RnB = ncn[GA_AN][LB_AN];
973 	wanB = WhatSpecies[GB_AN];
974 	tnoB = Spe_Total_CNO[wanB];
975         l1 = atv_ijk[RnB][1];
976         l2 = atv_ijk[RnB][2];
977         l3 = atv_ijk[RnB][3];
978 
979         /* DM between C-L or C-R */
980 
981         if (l1==-1 || l1==1){
982 	  for (i=0; i<tnoA; i++){
983 	    for (j=0; j<tnoB; j++){
984 	      CDM[spin][MA_AN][LB_AN][i][j] = 0.0;
985 	    }
986 	  }
987         }
988 
989       }
990     }
991   }
992 
993   /***********************************************************
994      calculate (partial) decomposed Mulliken population by
995                density matrix between CL or CR
996 
997      This contribution will be added in Mulliken_Charge.c
998   ***********************************************************/
999 
1000   {
1001     int MA_AN,GA_AN,wanA,tnoA,GA_AN_e,LB_AN_e,iside;
1002     int direction,Rn_e,GB_AN_e;
1003     double sum;
1004 
1005     /* initialize */
1006 
1007     for (MA_AN=1; MA_AN<=Matomnum; MA_AN++){
1008 
1009       GA_AN = M2G[MA_AN];
1010       wanA = WhatSpecies[GA_AN];
1011       tnoA = Spe_Total_CNO[wanA];
1012 
1013       for (spin=0; spin<=SpinP_switch; spin++) {
1014 	for (i=0; i<tnoA; i++){
1015           TRAN_DecMulP[spin][MA_AN][i] = 0.0;
1016 	}
1017       }
1018     }
1019 
1020     /* Left lead */
1021 
1022     iside = 0;
1023     direction = -1;
1024 
1025     for (MA_AN=1; MA_AN<=Matomnum; MA_AN++){
1026 
1027       GA_AN = M2G[MA_AN];
1028       wanA = WhatSpecies[GA_AN];
1029       tnoA = Spe_Total_CNO[wanA];
1030 
1031       if (TRAN_region[GA_AN]%10==2){
1032 
1033         GA_AN_e =  TRAN_Original_Id[GA_AN];
1034 
1035         for (spin=0; spin<=SpinP_switch; spin++) {
1036 	  for (i=0; i<tnoA; i++){
1037 
1038 	    sum = 0.0;
1039 
1040 	    for (LB_AN_e=0; LB_AN_e<=FNAN_e[iside][GA_AN_e]; LB_AN_e++){
1041 
1042 	      GB_AN_e = natn_e[iside][GA_AN_e][LB_AN_e];
1043 	      Rn_e = ncn_e[iside][GA_AN_e][LB_AN_e];
1044 	      wanB = WhatSpecies_e[iside][GB_AN_e];
1045 	      tnoB = Spe_Total_CNO_e[iside][wanB];
1046 	      l1 = atv_ijk_e[iside][Rn_e][1];
1047 
1048 	      if (l1==direction) {
1049 		for (j=0; j<tnoB; j++){
1050 		  sum += OLP_e[iside][0][GA_AN_e][LB_AN_e][i][j]*
1051                          DM_e[iside][0][spin][GA_AN_e][LB_AN_e][i][j];
1052 		}
1053 	      }
1054 	    }
1055 
1056 	    TRAN_DecMulP[spin][MA_AN][i] = sum;
1057 
1058 	  } /* i */
1059 	} /* spin */
1060       }
1061 
1062     } /* MA_AN */
1063 
1064     /* Right lead */
1065 
1066     iside = 1;
1067     direction = 1;
1068 
1069     for (MA_AN=1; MA_AN<=Matomnum; MA_AN++){
1070 
1071       GA_AN = M2G[MA_AN];
1072       wanA = WhatSpecies[GA_AN];
1073       tnoA = Spe_Total_CNO[wanA];
1074 
1075       if (TRAN_region[GA_AN]%10==3){
1076 
1077         GA_AN_e = TRAN_Original_Id[GA_AN];
1078 
1079         for (spin=0; spin<=SpinP_switch; spin++) {
1080 	  for (i=0; i<tnoA; i++){
1081 
1082 	    sum = 0.0;
1083 
1084 	    for (LB_AN_e=0; LB_AN_e<=FNAN_e[iside][GA_AN_e]; LB_AN_e++){
1085 
1086 	      GB_AN_e = natn_e[iside][GA_AN_e][LB_AN_e];
1087 	      Rn_e = ncn_e[iside][GA_AN_e][LB_AN_e];
1088 	      wanB = WhatSpecies_e[iside][GB_AN_e];
1089 	      tnoB = Spe_Total_CNO_e[iside][wanB];
1090 	      l1 = atv_ijk_e[iside][Rn_e][1];
1091 
1092 	      if (l1==direction) {
1093 		for (j=0; j<tnoB; j++){
1094 		  sum += OLP_e[iside][0][GA_AN_e][LB_AN_e][i][j]*
1095                          DM_e[iside][0][spin][GA_AN_e][LB_AN_e][i][j];
1096 		}
1097 	      }
1098 	    }
1099 
1100 	    TRAN_DecMulP[spin][MA_AN][i] = sum;
1101 
1102 	  } /* i */
1103 	} /* spin */
1104       }
1105     } /* MA_AN */
1106 
1107   }
1108 
1109   /* set Eele as 0 */
1110 
1111   Eele0[0] = 0.0;
1112   Eele0[1] = 0.0;
1113   Eele1[0] = 0.0;
1114   Eele1[1] = 0.0;
1115 
1116   /* free arrays */
1117 
1118   for (i2=0; i2<TRAN_Kspace_grid2; i2++){
1119     free(op_flag[i2]);
1120   }
1121   free(op_flag);
1122 
1123   free(T_KGrids2);
1124   free(T_KGrids3);
1125   free(T_op_flag);
1126   free(T_k_ID);
1127 
1128   if (T_knum<=numprocs0){
1129 
1130     if (Num_Comm_World1<=numprocs0){
1131       MPI_Comm_free(&MPI_CommWD1[myworld1]);
1132     }
1133 
1134     free(NPROCS_ID1);
1135     free(Comm_World1);
1136     free(NPROCS_WD1);
1137     free(Comm_World_StartID1);
1138     free(MPI_CommWD1);
1139   }
1140 
1141   free(MP);
1142   free(order_GA);
1143 
1144   free(My_NZeros);
1145   free(SP_NZeros);
1146   free(SP_Atoms);
1147 
1148   for (k=0; k<(SpinP_switch+1); k++){
1149     free(DM1[k]);
1150   }
1151   free(DM1);
1152 
1153   free(TDM1);
1154 
1155   for (k=0; k<(SpinP_switch+1); k++){
1156     free(H1[k]);
1157   }
1158   free(H1);
1159 
1160   free(S1);
1161 
1162   /* for elapsed time */
1163   dtime(&TEtime);
1164   if (myid0==Host_ID){
1165     printf("<TRAN_DFT> time=%lf\n", TEtime-TStime);fflush(stdout);
1166   }
1167 
1168   return TEtime - TStime;
1169 }
1170 
1171 
1172 
1173 
1174 /*
1175  *   calculate CDM from nh, CntOLP, ...
1176  *
1177  *       an alternative routine for Cluster_DFT or Band_DFT
1178  *
1179  */
1180 
1181 
1182 
1183 
TRAN_DFT_Kdependent(MPI_Comm comm1,int parallel_mode,int numprocs,int myid,int level_stdout,int iter,int SpinP_switch,double k2,double k3,int k_op,int * order_GA,double ** DM1,double ** H1,double * S1,double ***** nh,double ***** ImNL,double **** CntOLP,int atomnum,int Matomnum,int * WhatSpecies,int * Spe_Total_CNO,int * FNAN,int ** natn,int ** ncn,int * M2G,int * G2ID,int ** atv_ijk,int * List_YOUSO,double ***** CDM,double ***** EDM,double Eele0[2],double Eele1[2])1184 static void TRAN_DFT_Kdependent(
1185 			  /* input */
1186 			  MPI_Comm comm1,
1187                           int parallel_mode,
1188                           int numprocs,
1189                           int myid,
1190 			  int level_stdout,
1191 			  int iter,
1192 			  int SpinP_switch,
1193                           double k2,
1194                           double k3,
1195                           int k_op,
1196                           int *order_GA,
1197                           double **DM1,
1198                           double **H1,
1199                           double *S1,
1200 			  double *****nh,  /* H */
1201 			  double *****ImNL, /* not used, SO-coupling */
1202 			  double ****CntOLP,
1203 			  int atomnum,
1204 			  int Matomnum,
1205 			  int *WhatSpecies,
1206 			  int *Spe_Total_CNO,
1207 			  int *FNAN,
1208 			  int **natn,
1209 			  int **ncn,
1210 			  int *M2G,
1211 			  int *G2ID,
1212 			  int **atv_ijk,
1213 			  int *List_YOUSO,
1214 			  /* output */
1215 			  double *****CDM,  /* output, charge density */
1216 			  double *****EDM,  /* not used */
1217 			  double Eele0[2], double Eele1[2]) /* not used */
1218 
1219 #define GC_ref(i,j) GC[ NUM_c*((j)-1) + (i)-1 ]
1220 #define Gless_ref(i,j) Gless[ NUM_c*((j)-1) + (i)-1 ]
1221 
1222 {
1223   int i,j,k,iside,spinsize;
1224   int *MP;
1225   int  iw,iw_method;
1226   dcomplex w, w_weight;
1227   dcomplex *GC,*GRL,*GRR;
1228   dcomplex *SigmaL, *SigmaR;
1229   dcomplex *work1,*work2,*Gless;
1230   dcomplex **v2;
1231   double dum;
1232   double TStime,TEtime;
1233   int MA_AN, GA_AN, wanA, tnoA, Anum;
1234   int LB_AN, GB_AN, wanB, tnoB, Bnum;
1235 
1236   static int ID;
1237   int Miw,iw0;
1238   double time_a0, time_a1, time_a2;
1239 
1240   /* setup MP */
1241   TRAN_Set_MP(0, atomnum, WhatSpecies, Spe_Total_CNO, &NUM_c, &i);
1242   MP = (int*)malloc(sizeof(int)*(NUM_c+1));
1243   TRAN_Set_MP(1, atomnum, WhatSpecies, Spe_Total_CNO, &NUM_c, MP);
1244 
1245   /* initialize */
1246   TRAN_Set_Value_double(SCC,NUM_c*NUM_c,    0.0,0.0);
1247   TRAN_Set_Value_double(SCL,NUM_c*NUM_e[0], 0.0,0.0);
1248   TRAN_Set_Value_double(SCR,NUM_c*NUM_e[1], 0.0,0.0);
1249   for (k=0; k<=SpinP_switch; k++) {
1250     TRAN_Set_Value_double(HCC[k],NUM_c*NUM_c,    0.0,0.0);
1251     TRAN_Set_Value_double(HCL[k],NUM_c*NUM_e[0], 0.0,0.0);
1252     TRAN_Set_Value_double(HCR[k],NUM_c*NUM_e[1], 0.0,0.0);
1253   }
1254 
1255   /* set Hamiltonian and overlap matrices of left and right leads */
1256 
1257   TRAN_Set_SurfOverlap(comm1,"left", k2, k3);
1258   TRAN_Set_SurfOverlap(comm1,"right",k2, k3);
1259 
1260   /* set CC, CL and CR */
1261 
1262   TRAN_Set_CentOverlap(   comm1,
1263                           3,
1264                           SpinP_switch,
1265                           k2,
1266                           k3,
1267                           order_GA,
1268                           H1,
1269                           S1,
1270                           nh,      /* input */
1271                           CntOLP,  /* input */
1272                           atomnum,
1273 			  Matomnum,
1274 			  M2G,
1275 			  G2ID,
1276                           WhatSpecies,
1277                           Spe_Total_CNO,
1278                           FNAN,
1279                           natn,
1280                           ncn,
1281                           atv_ijk);
1282 
1283   if (MEASURE_TIME){
1284     dtime(&time_a0);
1285   }
1286 
1287   /* allocate */
1288 
1289   v2 = (dcomplex**)malloc(sizeof(dcomplex*)*(SpinP_switch+1));
1290 
1291   for (k=0; k<=SpinP_switch; k++) {
1292     v2[k] = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c*NUM_c);
1293     TRAN_Set_Value_double( v2[k], NUM_c*NUM_c, 0.0, 0.0);
1294   }
1295 
1296   GC = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
1297 
1298   GRL = (dcomplex*)malloc(sizeof(dcomplex)*NUM_e[0]* NUM_e[0]);
1299   GRR = (dcomplex*)malloc(sizeof(dcomplex)*NUM_e[1]* NUM_e[1]);
1300 
1301   SigmaL = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
1302   SigmaR = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
1303 
1304   work1 = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
1305   work2 = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
1306 
1307   Gless = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
1308 
1309   if (2<=level_stdout){
1310     printf("NUM_c=%d, NUM_e= %d %d\n",NUM_c, NUM_e[0], NUM_e[1]);
1311     printf("# of freq. to calculate G =%d\n",tran_omega_n_scf);
1312   }
1313 
1314   if      (SpinP_switch==0) spinsize = 1;
1315   else if (SpinP_switch==1) spinsize = 2;
1316   else if (SpinP_switch==3) spinsize = 1;
1317 
1318   /**************************************************************
1319              calculation of Green functions at k and iw
1320   **************************************************************/
1321 
1322   for ( Miw=myid; Miw<tran_omega_n_scf*spinsize; Miw+=numprocs ) {
1323 
1324     k = Miw/tran_omega_n_scf;
1325     iw = Miw - k*tran_omega_n_scf;
1326 
1327     w = tran_omega_scf[iw];
1328     w_weight  = tran_omega_weight_scf[iw];
1329     iw_method = tran_integ_method_scf[iw];
1330 
1331     /*
1332     printf("Miw=%3d iw=%d of %d w=%le %le weight=%le %le method=%d\n",
1333             Miw,iw,tran_omega_n_scf, w.r,w.i, w_weight.r, w_weight.i, iw_method);
1334     */
1335 
1336     /* calculation of surface Green's function and self energy from the LEFT lead */
1337 
1338     iside = 0;
1339 
1340     TRAN_Calc_SurfGreen_direct(w, NUM_e[iside], H00_e[iside][k], H01_e[iside][k],
1341 			       S00_e[iside], S01_e[iside], tran_surfgreen_iteration_max,
1342 			       tran_surfgreen_eps, GRL);
1343 
1344     TRAN_Calc_SelfEnergy(w, NUM_e[iside], GRL, NUM_c, HCL[k], SCL, SigmaL);
1345 
1346     /* calculation of surface Green's function and self energy from the RIGHT lead */
1347 
1348     iside = 1;
1349 
1350     TRAN_Calc_SurfGreen_direct(w, NUM_e[iside], H00_e[iside][k], H01_e[iside][k],
1351 			       S00_e[iside], S01_e[iside], tran_surfgreen_iteration_max,
1352 			       tran_surfgreen_eps, GRR);
1353 
1354     TRAN_Calc_SelfEnergy(w, NUM_e[iside], GRR, NUM_c, HCR[k], SCR, SigmaR);
1355 
1356     /* calculation of central retarded Green's function */
1357 
1358     TRAN_Calc_CentGreen(w, NUM_c, SigmaL, SigmaR, HCC[k], SCC, GC);
1359 
1360     /***********************************************
1361        The non-equilibrium part is calculated by
1362         using the lesser Green's function.
1363     ***********************************************/
1364 
1365     if (iw_method==2){
1366 
1367       /* calculation of central lesser Green's function */
1368 
1369       TRAN_Calc_CentGreenLesser( w, ChemP_e, NUM_c,
1370 				 Order_Lead_Side,
1371 				 SigmaL,
1372 				 SigmaR,
1373 				 GC,
1374 				 HCC[k], SCC,
1375 				 work1, work2, Gless);
1376     }
1377 
1378     /***********************************************
1379             add it to construct the density matrix
1380     ***********************************************/
1381 
1382     if      (iw_method==0)
1383       TRAN_Add_MAT( 0, NUM_c, w_weight, GC,     v2[k]);
1384     else if (iw_method==1)
1385       TRAN_Add_MAT( 1, NUM_c, w_weight, GC,     v2[k]);
1386     else if (iw_method==2)
1387       TRAN_Add_MAT( 1, NUM_c, w_weight, Gless,  v2[k]);
1388 
1389   } /* iw */
1390 
1391   if (MEASURE_TIME){
1392     dtime(&time_a1);
1393   }
1394 
1395   /***********************************************
1396           calculation of density matrix
1397   ***********************************************/
1398 
1399   {
1400     int l1,l2,l3,RnB;
1401     int size_v3,itot,itot0;
1402     double kRn,si,co,re,im;
1403     double *my_v3;
1404     double *v3;
1405 
1406     /* find the size of v3 */
1407 
1408     size_v3 = 0;
1409 
1410     for (GA_AN=1; GA_AN<=atomnum; GA_AN++) {
1411 
1412       wanA = WhatSpecies[GA_AN];
1413       tnoA = Spe_Total_CNO[wanA];
1414       Anum = MP[GA_AN];
1415 
1416       for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
1417 
1418 	GB_AN = natn[GA_AN][LB_AN];
1419 	wanB = WhatSpecies[GB_AN];
1420 	tnoB = Spe_Total_CNO[wanB];
1421 	Bnum = MP[GB_AN];
1422 
1423 	for (i=0; i<tnoA; i++) {
1424 	  for (j=0; j<tnoB; j++) {
1425 	    size_v3++;
1426 	    size_v3++;
1427 	  }
1428 	}
1429       }
1430     }
1431 
1432     /* allocate arrays */
1433 
1434     my_v3 = (double*)malloc(sizeof(double)*size_v3);
1435     v3 = (double*)malloc(sizeof(double)*size_v3);
1436 
1437     /* set up v3 */
1438 
1439 #define v_idx(i,j)  ( ((j)-1)*NUM_c + (i)-1 )
1440 
1441     for (k=0; k<=SpinP_switch; k++) {
1442 
1443       itot = 0;
1444 
1445       for (GA_AN=1; GA_AN<=atomnum; GA_AN++) {
1446 
1447 	wanA = WhatSpecies[GA_AN];
1448 	tnoA = Spe_Total_CNO[wanA];
1449 	Anum = MP[GA_AN];
1450 
1451         for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
1452 
1453 	  GB_AN = natn[GA_AN][LB_AN];
1454 	  wanB = WhatSpecies[GB_AN];
1455 	  tnoB = Spe_Total_CNO[wanB];
1456 	  Bnum = MP[GB_AN];
1457 
1458 	  for (i=0; i<tnoA; i++) {
1459 	    for (j=0; j<tnoB; j++) {
1460 	      my_v3[itot++] = v2[k][ v_idx( Anum+i, Bnum+j) ].r;
1461 	      my_v3[itot++] = v2[k][ v_idx( Anum+i, Bnum+j) ].i;
1462 	    }
1463 	  }
1464 	}
1465       }
1466 
1467       if (parallel_mode){
1468         MPI_Allreduce( my_v3, v3, itot, MPI_DOUBLE, MPI_SUM, comm1);
1469       }
1470       else {
1471         for (i=0; i<itot; i++) {
1472           v3[i] = my_v3[i];
1473 	}
1474       }
1475 
1476       /* v3 -> CDM */
1477 
1478       itot = 0;
1479       itot0 = 0;
1480 
1481       for (GA_AN=1; GA_AN<=atomnum; GA_AN++) {
1482 
1483 	wanA = WhatSpecies[GA_AN];
1484 	tnoA = Spe_Total_CNO[wanA];
1485 	Anum = MP[GA_AN];
1486 
1487 	for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
1488 
1489 	  GB_AN = natn[GA_AN][LB_AN];
1490 	  RnB = ncn[GA_AN][LB_AN];
1491 	  wanB = WhatSpecies[GB_AN];
1492 	  tnoB = Spe_Total_CNO[wanB];
1493 	  Bnum = MP[GB_AN];
1494 	  l1 = atv_ijk[RnB][1];
1495 	  l2 = atv_ijk[RnB][2];
1496 	  l3 = atv_ijk[RnB][3];
1497 
1498 	  kRn = k2*(double)l2 + k3*(double)l3;
1499 	  si = (double)k_op*sin(2.0*PI*kRn);
1500 	  co = (double)k_op*cos(2.0*PI*kRn);
1501 
1502 	  for (i=0; i<tnoA; i++) {
1503 	    for (j=0; j<tnoB; j++) {
1504 	      re = v3[itot++];
1505 	      im = v3[itot++];
1506 
1507               /* divided by numprocs due to the later MPI_Allreduce  */
1508 	      DM1[k][itot0++] += (re*co + im*si)/(double)numprocs;
1509 
1510 	    }
1511 	  }
1512 	}
1513       }
1514 
1515     } /* k */
1516 
1517     /* free arrays */
1518 
1519     free(my_v3);
1520     free(v3);
1521   }
1522 
1523   if (MEASURE_TIME){
1524     MPI_Barrier(comm1);
1525     dtime(&time_a2);
1526     printf("TRAN_DFT(%d)> calculaiton (%le)\n",myid, time_a1-time_a0 );
1527   }
1528 
1529   /* free arrays */
1530 
1531   free(Gless);
1532   free(work2);
1533   free(work1);
1534   free(SigmaR);
1535   free(SigmaL);
1536   free(GRR);
1537   free(GRL);
1538   free(GC);
1539 
1540   for (k=0; k<=SpinP_switch; k++) {
1541     free(v2[k]);
1542   }
1543   free(v2);
1544 
1545   free(MP);
1546 }
1547 
1548 
1549 
TRAN_Add_MAT(int mode,int NUM_c,dcomplex w_weight,dcomplex * v,dcomplex * out)1550 static void TRAN_Add_MAT(
1551     int mode,
1552     int NUM_c,
1553     dcomplex w_weight,
1554     dcomplex *v,
1555     dcomplex *out
1556 )
1557 
1558 #define v_ref(i,j)        v[NUM_c*(j)+(i)]
1559 #define out_ref(i,j)      out[NUM_c*(j)+(i)]
1560 
1561 {
1562   int i,j;
1563   double re,im;
1564 
1565   switch (mode) {
1566 
1567   case 0:
1568 
1569     for (i=0; i<NUM_c; i++) {
1570       for (j=0; j<NUM_c; j++) {
1571         re = v_ref(i,j).r + v_ref(j,i).r;
1572         im = v_ref(i,j).i - v_ref(j,i).i;
1573         out_ref(i,j).r += re*w_weight.r - im*w_weight.i;
1574         out_ref(i,j).i += re*w_weight.i + im*w_weight.r;
1575       }
1576     }
1577 
1578     break;
1579 
1580   case 1:
1581 
1582     for (i=0; i<NUM_c*NUM_c; i++) {
1583       out[i].r += ( v[i].r*w_weight.r - v[i].i*w_weight.i );
1584       out[i].i += ( v[i].r*w_weight.i + v[i].i*w_weight.r );
1585     }
1586 
1587     break;
1588 
1589   }
1590 
1591 }
1592 
1593