1 /**********************************************************************
2 TRAN_DFT.c:
3
4 TRAN_DFT.c is a subroutine to perform self-consistent calculations
5 of a central region with left and right infinite leads based on
6 a non-equilibrium Green's function method.
7
8 Log of TRAN_DFT.c:
9
10 11/Dec/2005 Released by H.Kino
11 24/July/2008 Modified by T.Ozaki
12
13 ***********************************************************************/
14
15 #define MEASURE_TIME 0
16
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <math.h>
20 #include <time.h>
21 #include <mpi.h>
22 #include <omp.h>
23 #include "tran_prototypes.h"
24 #include "tran_variables.h"
25
26 void dtime(double *);
27
28 void k_inversion(int i, int j, int k,
29 int mi, int mj, int mk,
30 int *ii, int *ij, int *ik );
31
32 double Band_DFT_Col(int SCF_iter,
33 int knum_i, int knum_j, int knum_k,
34 int SpinP_switch,
35 double *****nh,
36 double *****ImNL,
37 double ****CntOLP,
38 double *****CDM,
39 double *****EDM,
40 double Eele0[2],
41 double Eele1[2],
42 int *MP,
43 int *order_GA,
44 double *ko,
45 double *koS,
46 double ***EIGEN,
47 double *H1,
48 double *S1,
49 double *CDM1,
50 double *EDM1,
51 dcomplex **H,
52 dcomplex **S,
53 dcomplex **C,
54 dcomplex *BLAS_S,
55 int ***k_op,
56 int *T_k_op,
57 int **T_k_ID,
58 double *T_KGrids1,
59 double *T_KGrids2,
60 double *T_KGrids3,
61 int myworld1,
62 int *NPROCS_ID1,
63 int *Comm_World1,
64 int *NPROCS_WD1,
65 int *Comm_World_StartID1,
66 MPI_Comm *MPI_CommWD1,
67 int myworld2,
68 int *NPROCS_ID2,
69 int *NPROCS_WD2,
70 int *Comm_World2,
71 int *Comm_World_StartID2,
72 MPI_Comm *MPI_CommWD2);
73
74 void Make_Comm_Worlds(
75 MPI_Comm MPI_Curret_Comm_WD,
76 int myid0,
77 int numprocs0,
78 int Num_Comm_World,
79 int *myworld1,
80 MPI_Comm *MPI_CommWD, /* size: Num_Comm_World */
81 int *NPROCS1_ID, /* size: numprocs0 */
82 int *Comm_World1, /* size: numprocs0 */
83 int *NPROCS1_WD, /* size: Num_Comm_World */
84 int *Comm_World_StartID /* size: Num_Comm_World */
85 );
86
87
88
89 int Get_OneD_HS_Col(int set_flag, double ****RH, double *H1, int *MP,
90 int *order_GA, int *My_NZeros, int *is1, int *is2);
91
92 static void TRAN_Add_MAT(
93 int mode,
94 int NUM_c,
95 dcomplex w_weight,
96 dcomplex *v,
97 dcomplex *out
98 );
99
100 static double TRAN_DFT_Original(
101 /* input */
102 MPI_Comm comm1,
103 int level_stdout,
104 int iter,
105 int SpinP_switch,
106 double *****nh, /* H */
107 double *****ImNL, /* not used, s-o coupling */
108 double ****CntOLP,
109 int atomnum,
110 int Matomnum,
111 int *WhatSpecies,
112 int *Spe_Total_CNO,
113 int *FNAN,
114 int **natn,
115 int **ncn,
116 int *M2G,
117 int *G2ID,
118 int *F_G2M,
119 int **atv_ijk,
120 int *List_YOUSO,
121 /* output */
122 double *****CDM, /* output, density matrix */
123 double *****EDM, /* not used */
124 double ***TRAN_DecMulP, /* output, partial DecMulP */
125 double Eele0[2], double Eele1[2],
126 double ChemP_e0[2]);
127
128
129 static void TRAN_DFT_Kdependent(
130 /* input */
131 MPI_Comm comm1,
132 int parallel_mode,
133 int numprocs,
134 int myid,
135 int level_stdout,
136 int iter,
137 int SpinP_switch,
138 double k2,
139 double k3,
140 int k_op,
141 int *order_GA,
142 double **DM1,
143 double **H1,
144 double *S1,
145 double *****nh, /* H */
146 double *****ImNL, /* not used, s-o coupling */
147 double ****CntOLP,
148 int atomnum,
149 int Matomnum,
150 int *WhatSpecies,
151 int *Spe_Total_CNO,
152 int *FNAN,
153 int **natn,
154 int **ncn,
155 int *M2G,
156 int *G2ID,
157 int **atv_ijk,
158 int *List_YOUSO,
159 /* output */
160 double *****CDM, /* output, charge density */
161 double *****EDM, /* not used */
162 double Eele0[2], double Eele1[2]); /* not used */
163
164
165
TRAN_DFT(MPI_Comm comm1,int SucceedReadingDMfile,int level_stdout,int iter,int SpinP_switch,double ***** nh,double ***** ImNL,double **** CntOLP,int atomnum,int Matomnum,int * WhatSpecies,int * Spe_Total_CNO,int * FNAN,int ** natn,int ** ncn,int * M2G,int * G2ID,int * F_G2M,int ** atv_ijk,int * List_YOUSO,double ***** CDM,double ***** EDM,double *** TRAN_DecMulP,double Eele0[2],double Eele1[2],double ChemP_e0[2])166 double TRAN_DFT(
167 /* input */
168 MPI_Comm comm1,
169 int SucceedReadingDMfile,
170 int level_stdout,
171 int iter,
172 int SpinP_switch,
173 double *****nh, /* H */
174 double *****ImNL, /* not used, s-o coupling */
175 double ****CntOLP,
176 int atomnum,
177 int Matomnum,
178 int *WhatSpecies,
179 int *Spe_Total_CNO,
180 int *FNAN,
181 int **natn,
182 int **ncn,
183 int *M2G,
184 int *G2ID,
185 int *F_G2M,
186 int **atv_ijk,
187 int *List_YOUSO,
188 /* output */
189 double *****CDM, /* output, density matrix */
190 double *****EDM, /* not used */
191 double ***TRAN_DecMulP, /* output, partial DecMulP */
192 double Eele0[2], double Eele1[2],
193 double ChemP_e0[2])
194 {
195 double TStime,TEtime,time5;
196
197 dtime(&TStime);
198
199 /******************************************************
200 if TRAN_Iter_Initial_Band<SCF_iter, employ TRAN_DFT
201 ******************************************************/
202
203 if (TRAN_SCF_Iter_Band<iter || SucceedReadingDMfile==1){
204
205 TRAN_DFT_Original( comm1,
206 level_stdout,
207 iter,
208 SpinP_switch,
209 nh, /* H */
210 ImNL, /* not used, s-o coupling */
211 CntOLP,
212 atomnum,
213 Matomnum,
214 WhatSpecies,
215 Spe_Total_CNO,
216 FNAN,
217 natn,
218 ncn,
219 M2G,
220 G2ID,
221 F_G2M,
222 atv_ijk,
223 List_YOUSO,
224 /* output */
225 CDM, /* output, density matrix */
226 EDM, /* not used */
227 TRAN_DecMulP, /* output, partial DecMulP */
228 Eele0, Eele1,
229 ChemP_e0);
230 }
231
232 /*************************************************
233 if SCF_iter<=TRAN_SCF_Iter_Band,
234 employ the band diagonalization
235 *************************************************/
236
237 else {
238
239 int i,j,k,n,n2,wanA;
240 int ii,ij,ik,T_knum,size_H1;
241 int Kspace_grid1,Kspace_grid2,Kspace_grid3;
242 int *MP;
243 int *order_GA;
244 int *My_NZeros;
245 int *SP_NZeros;
246 int *SP_Atoms;
247 double *ko;
248 double *koS;
249 double tmp;
250 dcomplex **H_Band_Col;
251 dcomplex **S_Band;
252 dcomplex **C_Band_Col;
253 dcomplex *BLAS_S;
254 double *H1_Band_Col;
255 double *S1_Band_Col;
256 double *CDM1_Band_Col;
257 double *EDM1_Band_Col;
258 int ***k_op;
259 int *T_k_op;
260 int **T_k_ID;
261 double *T_KGrids1,*T_KGrids2,*T_KGrids3;
262 double ***EIGEN_Band_Col;
263
264 int numprocs0,myid0;
265 int numprocs1,myid1;
266
267 int Num_Comm_World1;
268 int Num_Comm_World2;
269
270 int myworld1;
271 int myworld2;
272
273 int *NPROCS_ID1;
274 int *Comm_World1;
275 int *NPROCS_WD1;
276 int *Comm_World_StartID1;
277 MPI_Comm *MPI_CommWD1;
278
279 int *NPROCS_ID2;
280 int *NPROCS_WD2;
281 int *Comm_World2;
282 int *Comm_World_StartID2;
283 MPI_Comm *MPI_CommWD2;
284
285 /* MPI */
286 MPI_Comm_size(comm1,&numprocs0);
287 MPI_Comm_rank(comm1,&myid0);
288
289 Kspace_grid1 = 1;
290 Kspace_grid2 = TRAN_Kspace_grid2;
291 Kspace_grid3 = TRAN_Kspace_grid3;
292
293 n = 0;
294 for (i=1; i<=atomnum; i++){
295 wanA = WhatSpecies[i];
296 n += Spe_Total_CNO[wanA];
297 }
298 n2 = n + 2;
299
300 MP = (int*)malloc(sizeof(int)*List_YOUSO[1]);
301 order_GA = (int*)malloc(sizeof(int)*(List_YOUSO[1]+1));
302
303 My_NZeros = (int*)malloc(sizeof(int)*numprocs0);
304 SP_NZeros = (int*)malloc(sizeof(int)*numprocs0);
305 SP_Atoms = (int*)malloc(sizeof(int)*numprocs0);
306
307 ko = (double*)malloc(sizeof(double)*(n+1));
308 koS = (double*)malloc(sizeof(double)*(n+1));
309
310 H_Band_Col = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
311 for (j=0; j<n+1; j++){
312 H_Band_Col[j] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
313 }
314
315 S_Band = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
316 for (i=0; i<n+1; i++){
317 S_Band[i] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
318 }
319
320 C_Band_Col = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
321 for (j=0; j<n+1; j++){
322 C_Band_Col[j] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
323 }
324
325 BLAS_S = (dcomplex*)malloc(sizeof(dcomplex)*n*n);
326
327 /* find size_H1 */
328
329 size_H1 = Get_OneD_HS_Col(0, nh[0], &tmp, MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
330
331 H1_Band_Col = (double*)malloc(sizeof(double)*size_H1);
332 S1_Band_Col = (double*)malloc(sizeof(double)*size_H1);
333 CDM1_Band_Col = (double*)malloc(sizeof(double)*size_H1);
334 EDM1_Band_Col = (double*)malloc(sizeof(double)*size_H1);
335
336 k_op = (int***)malloc(sizeof(int**)*Kspace_grid1);
337 for (i=0;i<Kspace_grid1; i++) {
338 k_op[i] = (int**)malloc(sizeof(int*)*Kspace_grid2);
339 for (j=0;j<Kspace_grid2; j++) {
340 k_op[i][j] = (int*)malloc(sizeof(int)*Kspace_grid3);
341 }
342 }
343
344 for (i=0; i<Kspace_grid1; i++) {
345 for (j=0; j<Kspace_grid2; j++) {
346 for (k=0; k<Kspace_grid3; k++) {
347 k_op[i][j][k] = -999;
348 }
349 }
350 }
351
352 for (i=0; i<Kspace_grid1; i++) {
353 for (j=0; j<Kspace_grid2; j++) {
354 for (k=0; k<Kspace_grid3; k++) {
355
356 if ( k_op[i][j][k]==-999 ) {
357 k_inversion(i,j,k,Kspace_grid1,Kspace_grid2,Kspace_grid3,&ii,&ij,&ik);
358 if ( i==ii && j==ij && k==ik ) {
359 k_op[i][j][k] = 1;
360 }
361
362 else {
363 k_op[i][j][k] = 2;
364 k_op[ii][ij][ik] = 0;
365 }
366 }
367 } /* k */
368 } /* j */
369 } /* i */
370
371 /* find T_knum */
372
373 T_knum = 0;
374 for (i=0; i<Kspace_grid1; i++) {
375 for (j=0; j<Kspace_grid2; j++) {
376 for (k=0; k<Kspace_grid3; k++) {
377 if (0<k_op[i][j][k]){
378 T_knum++;
379 }
380 }
381 }
382 }
383
384 T_KGrids1 = (double*)malloc(sizeof(double)*T_knum);
385 T_KGrids2 = (double*)malloc(sizeof(double)*T_knum);
386 T_KGrids3 = (double*)malloc(sizeof(double)*T_knum);
387 T_k_op = (int*)malloc(sizeof(int)*T_knum);
388
389 T_k_ID = (int**)malloc(sizeof(int*)*2);
390 for (i=0; i<2; i++){
391 T_k_ID[i] = (int*)malloc(sizeof(int)*T_knum);
392 }
393
394 EIGEN_Band_Col = (double***)malloc(sizeof(double**)*List_YOUSO[23]);
395 for (i=0; i<List_YOUSO[23]; i++){
396 EIGEN_Band_Col[i] = (double**)malloc(sizeof(double*)*T_knum);
397 for (j=0; j<T_knum; j++){
398 EIGEN_Band_Col[i][j] = (double*)malloc(sizeof(double)*(n+1));
399 for (k=0; k<(n+1); k++) EIGEN_Band_Col[i][j][k] = 1.0e+5;
400 }
401 }
402
403 /***********************************************
404 allocation of arrays for the first world
405 and
406 make the first level worlds
407 ***********************************************/
408
409 Num_Comm_World1 = SpinP_switch + 1;
410
411 NPROCS_ID1 = (int*)malloc(sizeof(int)*numprocs0);
412 Comm_World1 = (int*)malloc(sizeof(int)*numprocs0);
413 NPROCS_WD1 = (int*)malloc(sizeof(int)*Num_Comm_World1);
414 Comm_World_StartID1 = (int*)malloc(sizeof(int)*Num_Comm_World1);
415 MPI_CommWD1 = (MPI_Comm*)malloc(sizeof(MPI_Comm)*Num_Comm_World1);
416
417 Make_Comm_Worlds(comm1, myid0, numprocs0, Num_Comm_World1, &myworld1, MPI_CommWD1,
418 NPROCS_ID1, Comm_World1, NPROCS_WD1, Comm_World_StartID1);
419
420 MPI_Comm_size(MPI_CommWD1[myworld1],&numprocs1);
421 MPI_Comm_rank(MPI_CommWD1[myworld1],&myid1);
422
423 /***********************************************
424 allocation of arrays for the second world
425 and
426 make the second level worlds
427 ***********************************************/
428
429 if (T_knum<=numprocs1){
430
431 Num_Comm_World2 = T_knum;
432
433 NPROCS_ID2 = (int*)malloc(sizeof(int)*numprocs1);
434 Comm_World2 = (int*)malloc(sizeof(int)*numprocs1);
435 NPROCS_WD2 = (int*)malloc(sizeof(int)*Num_Comm_World2);
436 Comm_World_StartID2 = (int*)malloc(sizeof(int)*Num_Comm_World2);
437 MPI_CommWD2 = (MPI_Comm*)malloc(sizeof(MPI_Comm)*Num_Comm_World2);
438
439 Make_Comm_Worlds(MPI_CommWD1[myworld1], myid1, numprocs1, Num_Comm_World2, &myworld2, MPI_CommWD2,
440 NPROCS_ID2, Comm_World2, NPROCS_WD2, Comm_World_StartID2);
441 }
442
443 /***********************************************
444 call Band_DFT_Col
445 ***********************************************/
446
447 time5 = Band_DFT_Col( 1,
448 Kspace_grid1,
449 Kspace_grid2,
450 Kspace_grid3,
451 SpinP_switch,
452 nh,
453 ImNL,
454 CntOLP,
455 CDM,
456 EDM,
457 Eele0,
458 Eele1,
459 MP,order_GA,ko,koS,EIGEN_Band_Col,
460 H1_Band_Col,S1_Band_Col,
461 CDM1_Band_Col,EDM1_Band_Col,
462 H_Band_Col,S_Band,C_Band_Col,
463 BLAS_S,
464 k_op,T_k_op,T_k_ID,
465 T_KGrids1,T_KGrids2,T_KGrids3,
466 myworld1,
467 NPROCS_ID1,
468 Comm_World1,
469 NPROCS_WD1,
470 Comm_World_StartID1,
471 MPI_CommWD1,
472 myworld2,
473 NPROCS_ID2,
474 NPROCS_WD2,
475 Comm_World2,
476 Comm_World_StartID2,
477 MPI_CommWD2);
478
479 Eele0[0] = 0.0;
480 Eele0[1] = 0.0;
481
482 Eele1[0] = 0.0;
483 Eele1[1] = 0.0;
484
485 /***********************************************
486 freeing of arrays
487 ***********************************************/
488
489 n = 0;
490 for (i=1; i<=atomnum; i++){
491 wanA = WhatSpecies[i];
492 n += Spe_Total_CNO[wanA];
493 }
494 n2 = n + 2;
495
496 free(MP);
497 free(order_GA);
498
499 free(My_NZeros);
500 free(SP_NZeros);
501 free(SP_Atoms);
502
503 free(ko);
504 free(koS);
505
506 for (j=0; j<n+1; j++){
507 free(H_Band_Col[j]);
508 }
509 free(H_Band_Col);
510
511 for (i=0; i<n+1; i++){
512 free(S_Band[i]);
513 }
514 free(S_Band);
515
516 for (j=0; j<n+1; j++){
517 free(C_Band_Col[j]);
518 }
519 free(C_Band_Col);
520
521 free(BLAS_S);
522
523 free(H1_Band_Col);
524 free(S1_Band_Col);
525 free(CDM1_Band_Col);
526 free(EDM1_Band_Col);
527
528 for (i=0; i<Kspace_grid1; i++) {
529 for (j=0; j<Kspace_grid2; j++) {
530 free(k_op[i][j]);
531 }
532 free(k_op[i]);
533 }
534 free(k_op);
535
536 free(T_KGrids1);
537 free(T_KGrids2);
538 free(T_KGrids3);
539 free(T_k_op);
540
541 for (i=0; i<2; i++){
542 free(T_k_ID[i]);
543 }
544 free(T_k_ID);
545
546 for (i=0; i<List_YOUSO[23]; i++){
547 for (j=0; j<T_knum; j++){
548 free(EIGEN_Band_Col[i][j]);
549 }
550 free(EIGEN_Band_Col[i]);
551 }
552 free(EIGEN_Band_Col);
553
554 /* freeing of arrays for the second world */
555
556 if (T_knum<=numprocs1){
557
558 if (Num_Comm_World2<=numprocs1){
559 MPI_Comm_free(&MPI_CommWD2[myworld2]);
560 }
561
562 free(MPI_CommWD2);
563 free(Comm_World_StartID2);
564 free(NPROCS_WD2);
565 free(Comm_World2);
566 free(NPROCS_ID2);
567 }
568
569 /* freeing of arrays for the first world */
570
571 if (Num_Comm_World1<=numprocs0){
572 MPI_Comm_free(&MPI_CommWD1[myworld1]);
573 }
574
575 free(MPI_CommWD1);
576 free(Comm_World_StartID1);
577 free(NPROCS_WD1);
578 free(Comm_World1);
579 free(NPROCS_ID1);
580
581 } /* else */
582
583 /* for elapsed time */
584 dtime(&TEtime);
585 return TEtime - TStime;
586
587 }
588
589
590
591
592
TRAN_DFT_Original(MPI_Comm comm1,int level_stdout,int iter,int SpinP_switch,double ***** nh,double ***** ImNL,double **** CntOLP,int atomnum,int Matomnum,int * WhatSpecies,int * Spe_Total_CNO,int * FNAN,int ** natn,int ** ncn,int * M2G,int * G2ID,int * F_G2M,int ** atv_ijk,int * List_YOUSO,double ***** CDM,double ***** EDM,double *** TRAN_DecMulP,double Eele0[2],double Eele1[2],double ChemP_e0[2])593 double TRAN_DFT_Original(
594 /* input */
595 MPI_Comm comm1,
596 int level_stdout,
597 int iter,
598 int SpinP_switch,
599 double *****nh, /* H */
600 double *****ImNL, /* not used, s-o coupling */
601 double ****CntOLP,
602 int atomnum,
603 int Matomnum,
604 int *WhatSpecies,
605 int *Spe_Total_CNO,
606 int *FNAN,
607 int **natn,
608 int **ncn,
609 int *M2G,
610 int *G2ID,
611 int *F_G2M,
612 int **atv_ijk,
613 int *List_YOUSO,
614 /* output */
615 double *****CDM, /* output, density matrix */
616 double *****EDM, /* not used */
617 double ***TRAN_DecMulP, /* output, partial DecMulP */
618 double Eele0[2], double Eele1[2],
619 double ChemP_e0[2])
620 {
621 int numprocs0,myid0,ID;
622 int i2,i3,k_op,l1,l2,l3,RnB;
623 int k,E_knum,S_knum,T_knum,num_kloop0;
624 int parallel_mode,kloop,kloop0;
625 int i,j,spin,MA_AN,GA_AN,wanA,tnoA;
626 int LB_AN,GB_AN,wanB,tnoB;
627 int **op_flag,*T_op_flag,*T_k_ID;
628 double *T_KGrids2,*T_KGrids3;
629 double k2,k3,tmp;
630 double TStime,TEtime;
631
632 int *MP;
633 int *order_GA;
634 int *My_NZeros;
635 int *SP_NZeros;
636 int *SP_Atoms;
637
638 int size_H1;
639 int myworld1;
640 int numprocs1,myid1;
641 int Num_Comm_World1;
642 int *NPROCS_ID1;
643 int *Comm_World1;
644 int *NPROCS_WD1;
645 int *Comm_World_StartID1;
646 MPI_Comm *MPI_CommWD1;
647
648 double **DM1,*TDM1;
649 double **H1,*S1;
650
651 MPI_Comm_size(comm1,&numprocs0);
652 MPI_Comm_rank(comm1,&myid0);
653
654 dtime(&TStime);
655
656 if (myid0==Host_ID){
657 printf("<TRAN_DFT>\n");
658 }
659
660 /***************************************
661 ChemP_e0 will be used in outputfile1
662 ***************************************/
663
664 ChemP_e0[0] = ChemP_e[0];
665 ChemP_e0[1] = ChemP_e[1];
666
667 /***********************************
668 initialize CDM
669 ************************************/
670
671 for (spin=0; spin<=SpinP_switch; spin++) {
672 for (MA_AN=1; MA_AN<=Matomnum; MA_AN++) {
673 GA_AN = M2G[MA_AN];
674 wanA = WhatSpecies[GA_AN];
675 tnoA = Spe_Total_CNO[wanA];
676 for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
677 GB_AN = natn[GA_AN][LB_AN];
678 wanB = WhatSpecies[GB_AN];
679 tnoB = Spe_Total_CNO[wanB];
680 for (i=0; i<tnoA; i++){
681 for (j=0; j<tnoB; j++){
682 CDM[spin][MA_AN][LB_AN][i][j] = 0.0;
683 }
684 }
685 }
686 }
687 }
688
689 /***********************************
690 set up operation flag
691 ************************************/
692
693 op_flag = (int**)malloc(sizeof(int*)*TRAN_Kspace_grid2);
694 for (i2=0; i2<TRAN_Kspace_grid2; i2++){
695 op_flag[i2] = (int*)malloc(sizeof(int)*TRAN_Kspace_grid3);
696 for (i3=0; i3<TRAN_Kspace_grid3; i3++){
697 op_flag[i2][i3] = -999;
698 }
699 }
700
701 for (i2=0; i2<TRAN_Kspace_grid2; i2++){
702 for (i3=0; i3<TRAN_Kspace_grid3; i3++){
703
704 if (op_flag[i2][i3]<0){
705
706 if ( (TRAN_Kspace_grid2-1-i2)==i2 && (TRAN_Kspace_grid3-1-i3)==i3 ){
707 op_flag[i2][i3] = 1;
708 }
709 else{
710 op_flag[i2][i3] = 2;
711 op_flag[TRAN_Kspace_grid2-1-i2][TRAN_Kspace_grid3-1-i3] = 0;
712 }
713 }
714
715 }
716 }
717
718 /***********************************
719 one-dimentionalize for MPI
720 ************************************/
721
722 T_knum = 0;
723 for (i2=0; i2<TRAN_Kspace_grid2; i2++){
724 for (i3=0; i3<TRAN_Kspace_grid3; i3++){
725 if (0<op_flag[i2][i3]) T_knum++;
726 }
727 }
728
729 T_KGrids2 = (double*)malloc(sizeof(double)*T_knum);
730 T_KGrids3 = (double*)malloc(sizeof(double)*T_knum);
731 T_op_flag = (int*)malloc(sizeof(int)*T_knum);
732 T_k_ID = (int*)malloc(sizeof(int)*T_knum);
733
734 T_knum = 0;
735
736 for (i2=0; i2<TRAN_Kspace_grid2; i2++){
737
738 k2 = -0.5 + (2.0*(double)i2+1.0)/(2.0*(double)TRAN_Kspace_grid2) + Shift_K_Point;
739
740 for (i3=0; i3<TRAN_Kspace_grid3; i3++){
741
742 k3 = -0.5 + (2.0*(double)i3+1.0)/(2.0*(double)TRAN_Kspace_grid3) - Shift_K_Point;
743
744 if (0<op_flag[i2][i3]){
745
746 T_KGrids2[T_knum] = k2;
747 T_KGrids3[T_knum] = k3;
748 T_op_flag[T_knum] = op_flag[i2][i3];
749
750 T_knum++;
751 }
752 }
753 }
754
755 /***************************************
756 print out
757 ***************************************/
758
759 if (myid0==Host_ID){
760
761 printf(" KGrids2: ");fflush(stdout);
762
763 for (i=0;i<TRAN_Kspace_grid2;i++){
764 if (TRAN_Kspace_grid2==1) k2 = 0.0;
765 else k2 = -0.5 + (2.0*(double)i+1.0)/(2.0*(double)TRAN_Kspace_grid2) + Shift_K_Point;
766 printf("%9.5f ",k2);fflush(stdout);
767 }
768 printf("\n");fflush(stdout);
769
770 printf(" KGrids3: ");fflush(stdout);
771 for (i=0;i<TRAN_Kspace_grid3;i++){
772 if (TRAN_Kspace_grid3==1) k3 = 0.0;
773 else k3 = -0.5 + (2.0*(double)i+1.0)/(2.0*(double)TRAN_Kspace_grid3) - Shift_K_Point;
774 printf("%9.5f ",k3);fflush(stdout);
775 }
776 printf("\n");fflush(stdout);
777 }
778
779 /***************************************************
780 allocate calculations of k-points into processors
781 ***************************************************/
782
783 if (numprocs0<T_knum){
784
785 /* set parallel_mode */
786 parallel_mode = 0;
787
788 /* allocation of kloop to ID */
789
790 for (ID=0; ID<numprocs0; ID++){
791 tmp = (double)T_knum/(double)numprocs0;
792 S_knum = (int)((double)ID*(tmp+1.0e-12));
793 E_knum = (int)((double)(ID+1)*(tmp+1.0e-12)) - 1;
794 if (ID==(numprocs0-1)) E_knum = T_knum - 1;
795 if (E_knum<0) E_knum = 0;
796
797 for (k=S_knum; k<=E_knum; k++){
798 /* ID in the first level world */
799 T_k_ID[k] = ID;
800 }
801 }
802
803 /* find own informations */
804
805 tmp = (double)T_knum/(double)numprocs0;
806 S_knum = (int)((double)myid0*(tmp+1.0e-12));
807 E_knum = (int)((double)(myid0+1)*(tmp+1.0e-12)) - 1;
808 if (myid0==(numprocs0-1)) E_knum = T_knum - 1;
809 if (E_knum<0) E_knum = 0;
810
811 num_kloop0 = E_knum - S_knum + 1;
812
813 }
814
815 else {
816
817 /* set parallel_mode */
818 parallel_mode = 1;
819 num_kloop0 = 1;
820
821 Num_Comm_World1 = T_knum;
822
823 NPROCS_ID1 = (int*)malloc(sizeof(int)*numprocs0);
824 Comm_World1 = (int*)malloc(sizeof(int)*numprocs0);
825 NPROCS_WD1 = (int*)malloc(sizeof(int)*Num_Comm_World1);
826 Comm_World_StartID1 = (int*)malloc(sizeof(int)*Num_Comm_World1);
827 MPI_CommWD1 = (MPI_Comm*)malloc(sizeof(MPI_Comm)*Num_Comm_World1);
828
829 Make_Comm_Worlds(comm1, myid0, numprocs0, Num_Comm_World1, &myworld1, MPI_CommWD1,
830 NPROCS_ID1, Comm_World1, NPROCS_WD1, Comm_World_StartID1);
831
832 MPI_Comm_size(MPI_CommWD1[myworld1],&numprocs1);
833 MPI_Comm_rank(MPI_CommWD1[myworld1],&myid1);
834
835 S_knum = myworld1;
836
837 /* allocate k-points into processors */
838
839 for (k=0; k<T_knum; k++){
840 /* ID in the first level world */
841 T_k_ID[k] = Comm_World_StartID1[k];
842 }
843
844 }
845
846 /*************************************************************
847 one-dimensitonalize H and S and store them in a compact form
848 *************************************************************/
849
850 MP = (int*)malloc(sizeof(int)*(atomnum+1));
851 order_GA = (int*)malloc(sizeof(int)*(atomnum+1));
852
853 My_NZeros = (int*)malloc(sizeof(int)*numprocs0);
854 SP_NZeros = (int*)malloc(sizeof(int)*numprocs0);
855 SP_Atoms = (int*)malloc(sizeof(int)*numprocs0);
856
857 size_H1 = Get_OneD_HS_Col(0, nh[0], &tmp, MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
858
859 DM1 = (double**)malloc(sizeof(double*)*(SpinP_switch+1));
860 for (k=0; k<(SpinP_switch+1); k++){
861 DM1[k] = (double*)malloc(sizeof(double)*size_H1);
862 for (i=0; i<size_H1; i++) DM1[k][i] = 0.0;
863 }
864
865 TDM1 = (double*)malloc(sizeof(double)*size_H1);
866
867 H1 = (double**)malloc(sizeof(double*)*(SpinP_switch+1));
868 for (k=0; k<(SpinP_switch+1); k++){
869 H1[k] = (double*)malloc(sizeof(double)*size_H1);
870 }
871
872 S1 = (double*)malloc(sizeof(double)*size_H1);
873
874 for (k=0; k<(SpinP_switch+1); k++){
875 size_H1 = Get_OneD_HS_Col(1, nh[k], H1[k], MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
876 }
877
878 size_H1 = Get_OneD_HS_Col(1, CntOLP, S1, MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
879
880 /***********************************************************
881 start "kloop0"
882 ***********************************************************/
883
884 for (kloop0=0; kloop0<num_kloop0; kloop0++){
885
886 kloop = S_knum + kloop0;
887
888 k2 = T_KGrids2[kloop];
889 k3 = T_KGrids3[kloop];
890 k_op = T_op_flag[kloop];
891
892 if (parallel_mode){
893
894 TRAN_DFT_Kdependent(MPI_CommWD1[myworld1],
895 parallel_mode, numprocs1, myid1,
896 level_stdout, iter, SpinP_switch, k2, k3, k_op, order_GA,
897 DM1,H1,S1,
898 nh, ImNL, CntOLP,
899 atomnum, Matomnum, WhatSpecies, Spe_Total_CNO, FNAN,
900 natn, ncn, M2G, G2ID, atv_ijk, List_YOUSO, CDM, EDM, Eele0, Eele1);
901 }
902 else{
903
904 TRAN_DFT_Kdependent(comm1,
905 parallel_mode, 1, 0,
906 level_stdout, iter, SpinP_switch, k2, k3, k_op, order_GA,
907 DM1,H1,S1,
908 nh, ImNL, CntOLP,
909 atomnum, Matomnum, WhatSpecies, Spe_Total_CNO, FNAN,
910 natn, ncn, M2G, G2ID, atv_ijk, List_YOUSO, CDM, EDM, Eele0, Eele1);
911 }
912
913 } /* kloop0 */
914
915 /* MPI communication of DM1 */
916
917 tmp = 1.0/(double)(TRAN_Kspace_grid2*TRAN_Kspace_grid3);
918
919 for (k=0; k<=SpinP_switch; k++) {
920
921 int itot0,Anum,Bnum;
922 double co,si,kRn;
923
924 MPI_Allreduce( DM1[k], TDM1, size_H1, MPI_DOUBLE, MPI_SUM, comm1);
925
926 itot0 = 0;
927
928 for (GA_AN=1; GA_AN<=atomnum; GA_AN++) {
929
930 wanA = WhatSpecies[GA_AN];
931 tnoA = Spe_Total_CNO[wanA];
932 Anum = MP[GA_AN];
933 MA_AN = F_G2M[GA_AN];
934
935 for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
936
937 GB_AN = natn[GA_AN][LB_AN];
938 RnB = ncn[GA_AN][LB_AN];
939 wanB = WhatSpecies[GB_AN];
940 tnoB = Spe_Total_CNO[wanB];
941
942 for (i=0;i<tnoA;i++) {
943 for (j=0;j<tnoB;j++) {
944
945 if (1<=MA_AN && MA_AN<=Matomnum){
946 CDM[k][MA_AN][LB_AN][i][j] = TDM1[itot0]*tmp;
947 }
948
949 itot0++;
950
951 }
952 }
953 }
954 }
955
956 } /* k */
957
958 /***********************************************************
959 overwrite CMD with l1=-1 or l1=1 by zero
960 ***********************************************************/
961
962 for (spin=0; spin<=SpinP_switch; spin++) {
963 for (MA_AN=1; MA_AN<=Matomnum; MA_AN++) {
964
965 GA_AN = M2G[MA_AN];
966 wanA = WhatSpecies[GA_AN];
967 tnoA = Spe_Total_CNO[wanA];
968
969 for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
970
971 GB_AN = natn[GA_AN][LB_AN];
972 RnB = ncn[GA_AN][LB_AN];
973 wanB = WhatSpecies[GB_AN];
974 tnoB = Spe_Total_CNO[wanB];
975 l1 = atv_ijk[RnB][1];
976 l2 = atv_ijk[RnB][2];
977 l3 = atv_ijk[RnB][3];
978
979 /* DM between C-L or C-R */
980
981 if (l1==-1 || l1==1){
982 for (i=0; i<tnoA; i++){
983 for (j=0; j<tnoB; j++){
984 CDM[spin][MA_AN][LB_AN][i][j] = 0.0;
985 }
986 }
987 }
988
989 }
990 }
991 }
992
993 /***********************************************************
994 calculate (partial) decomposed Mulliken population by
995 density matrix between CL or CR
996
997 This contribution will be added in Mulliken_Charge.c
998 ***********************************************************/
999
1000 {
1001 int MA_AN,GA_AN,wanA,tnoA,GA_AN_e,LB_AN_e,iside;
1002 int direction,Rn_e,GB_AN_e;
1003 double sum;
1004
1005 /* initialize */
1006
1007 for (MA_AN=1; MA_AN<=Matomnum; MA_AN++){
1008
1009 GA_AN = M2G[MA_AN];
1010 wanA = WhatSpecies[GA_AN];
1011 tnoA = Spe_Total_CNO[wanA];
1012
1013 for (spin=0; spin<=SpinP_switch; spin++) {
1014 for (i=0; i<tnoA; i++){
1015 TRAN_DecMulP[spin][MA_AN][i] = 0.0;
1016 }
1017 }
1018 }
1019
1020 /* Left lead */
1021
1022 iside = 0;
1023 direction = -1;
1024
1025 for (MA_AN=1; MA_AN<=Matomnum; MA_AN++){
1026
1027 GA_AN = M2G[MA_AN];
1028 wanA = WhatSpecies[GA_AN];
1029 tnoA = Spe_Total_CNO[wanA];
1030
1031 if (TRAN_region[GA_AN]%10==2){
1032
1033 GA_AN_e = TRAN_Original_Id[GA_AN];
1034
1035 for (spin=0; spin<=SpinP_switch; spin++) {
1036 for (i=0; i<tnoA; i++){
1037
1038 sum = 0.0;
1039
1040 for (LB_AN_e=0; LB_AN_e<=FNAN_e[iside][GA_AN_e]; LB_AN_e++){
1041
1042 GB_AN_e = natn_e[iside][GA_AN_e][LB_AN_e];
1043 Rn_e = ncn_e[iside][GA_AN_e][LB_AN_e];
1044 wanB = WhatSpecies_e[iside][GB_AN_e];
1045 tnoB = Spe_Total_CNO_e[iside][wanB];
1046 l1 = atv_ijk_e[iside][Rn_e][1];
1047
1048 if (l1==direction) {
1049 for (j=0; j<tnoB; j++){
1050 sum += OLP_e[iside][0][GA_AN_e][LB_AN_e][i][j]*
1051 DM_e[iside][0][spin][GA_AN_e][LB_AN_e][i][j];
1052 }
1053 }
1054 }
1055
1056 TRAN_DecMulP[spin][MA_AN][i] = sum;
1057
1058 } /* i */
1059 } /* spin */
1060 }
1061
1062 } /* MA_AN */
1063
1064 /* Right lead */
1065
1066 iside = 1;
1067 direction = 1;
1068
1069 for (MA_AN=1; MA_AN<=Matomnum; MA_AN++){
1070
1071 GA_AN = M2G[MA_AN];
1072 wanA = WhatSpecies[GA_AN];
1073 tnoA = Spe_Total_CNO[wanA];
1074
1075 if (TRAN_region[GA_AN]%10==3){
1076
1077 GA_AN_e = TRAN_Original_Id[GA_AN];
1078
1079 for (spin=0; spin<=SpinP_switch; spin++) {
1080 for (i=0; i<tnoA; i++){
1081
1082 sum = 0.0;
1083
1084 for (LB_AN_e=0; LB_AN_e<=FNAN_e[iside][GA_AN_e]; LB_AN_e++){
1085
1086 GB_AN_e = natn_e[iside][GA_AN_e][LB_AN_e];
1087 Rn_e = ncn_e[iside][GA_AN_e][LB_AN_e];
1088 wanB = WhatSpecies_e[iside][GB_AN_e];
1089 tnoB = Spe_Total_CNO_e[iside][wanB];
1090 l1 = atv_ijk_e[iside][Rn_e][1];
1091
1092 if (l1==direction) {
1093 for (j=0; j<tnoB; j++){
1094 sum += OLP_e[iside][0][GA_AN_e][LB_AN_e][i][j]*
1095 DM_e[iside][0][spin][GA_AN_e][LB_AN_e][i][j];
1096 }
1097 }
1098 }
1099
1100 TRAN_DecMulP[spin][MA_AN][i] = sum;
1101
1102 } /* i */
1103 } /* spin */
1104 }
1105 } /* MA_AN */
1106
1107 }
1108
1109 /* set Eele as 0 */
1110
1111 Eele0[0] = 0.0;
1112 Eele0[1] = 0.0;
1113 Eele1[0] = 0.0;
1114 Eele1[1] = 0.0;
1115
1116 /* free arrays */
1117
1118 for (i2=0; i2<TRAN_Kspace_grid2; i2++){
1119 free(op_flag[i2]);
1120 }
1121 free(op_flag);
1122
1123 free(T_KGrids2);
1124 free(T_KGrids3);
1125 free(T_op_flag);
1126 free(T_k_ID);
1127
1128 if (T_knum<=numprocs0){
1129
1130 if (Num_Comm_World1<=numprocs0){
1131 MPI_Comm_free(&MPI_CommWD1[myworld1]);
1132 }
1133
1134 free(NPROCS_ID1);
1135 free(Comm_World1);
1136 free(NPROCS_WD1);
1137 free(Comm_World_StartID1);
1138 free(MPI_CommWD1);
1139 }
1140
1141 free(MP);
1142 free(order_GA);
1143
1144 free(My_NZeros);
1145 free(SP_NZeros);
1146 free(SP_Atoms);
1147
1148 for (k=0; k<(SpinP_switch+1); k++){
1149 free(DM1[k]);
1150 }
1151 free(DM1);
1152
1153 free(TDM1);
1154
1155 for (k=0; k<(SpinP_switch+1); k++){
1156 free(H1[k]);
1157 }
1158 free(H1);
1159
1160 free(S1);
1161
1162 /* for elapsed time */
1163 dtime(&TEtime);
1164 if (myid0==Host_ID){
1165 printf("<TRAN_DFT> time=%lf\n", TEtime-TStime);fflush(stdout);
1166 }
1167
1168 return TEtime - TStime;
1169 }
1170
1171
1172
1173
1174 /*
1175 * calculate CDM from nh, CntOLP, ...
1176 *
1177 * an alternative routine for Cluster_DFT or Band_DFT
1178 *
1179 */
1180
1181
1182
1183
TRAN_DFT_Kdependent(MPI_Comm comm1,int parallel_mode,int numprocs,int myid,int level_stdout,int iter,int SpinP_switch,double k2,double k3,int k_op,int * order_GA,double ** DM1,double ** H1,double * S1,double ***** nh,double ***** ImNL,double **** CntOLP,int atomnum,int Matomnum,int * WhatSpecies,int * Spe_Total_CNO,int * FNAN,int ** natn,int ** ncn,int * M2G,int * G2ID,int ** atv_ijk,int * List_YOUSO,double ***** CDM,double ***** EDM,double Eele0[2],double Eele1[2])1184 static void TRAN_DFT_Kdependent(
1185 /* input */
1186 MPI_Comm comm1,
1187 int parallel_mode,
1188 int numprocs,
1189 int myid,
1190 int level_stdout,
1191 int iter,
1192 int SpinP_switch,
1193 double k2,
1194 double k3,
1195 int k_op,
1196 int *order_GA,
1197 double **DM1,
1198 double **H1,
1199 double *S1,
1200 double *****nh, /* H */
1201 double *****ImNL, /* not used, SO-coupling */
1202 double ****CntOLP,
1203 int atomnum,
1204 int Matomnum,
1205 int *WhatSpecies,
1206 int *Spe_Total_CNO,
1207 int *FNAN,
1208 int **natn,
1209 int **ncn,
1210 int *M2G,
1211 int *G2ID,
1212 int **atv_ijk,
1213 int *List_YOUSO,
1214 /* output */
1215 double *****CDM, /* output, charge density */
1216 double *****EDM, /* not used */
1217 double Eele0[2], double Eele1[2]) /* not used */
1218
1219 #define GC_ref(i,j) GC[ NUM_c*((j)-1) + (i)-1 ]
1220 #define Gless_ref(i,j) Gless[ NUM_c*((j)-1) + (i)-1 ]
1221
1222 {
1223 int i,j,k,iside,spinsize;
1224 int *MP;
1225 int iw,iw_method;
1226 dcomplex w, w_weight;
1227 dcomplex *GC,*GRL,*GRR;
1228 dcomplex *SigmaL, *SigmaR;
1229 dcomplex *work1,*work2,*Gless;
1230 dcomplex **v2;
1231 double dum;
1232 double TStime,TEtime;
1233 int MA_AN, GA_AN, wanA, tnoA, Anum;
1234 int LB_AN, GB_AN, wanB, tnoB, Bnum;
1235
1236 static int ID;
1237 int Miw,iw0;
1238 double time_a0, time_a1, time_a2;
1239
1240 /* setup MP */
1241 TRAN_Set_MP(0, atomnum, WhatSpecies, Spe_Total_CNO, &NUM_c, &i);
1242 MP = (int*)malloc(sizeof(int)*(NUM_c+1));
1243 TRAN_Set_MP(1, atomnum, WhatSpecies, Spe_Total_CNO, &NUM_c, MP);
1244
1245 /* initialize */
1246 TRAN_Set_Value_double(SCC,NUM_c*NUM_c, 0.0,0.0);
1247 TRAN_Set_Value_double(SCL,NUM_c*NUM_e[0], 0.0,0.0);
1248 TRAN_Set_Value_double(SCR,NUM_c*NUM_e[1], 0.0,0.0);
1249 for (k=0; k<=SpinP_switch; k++) {
1250 TRAN_Set_Value_double(HCC[k],NUM_c*NUM_c, 0.0,0.0);
1251 TRAN_Set_Value_double(HCL[k],NUM_c*NUM_e[0], 0.0,0.0);
1252 TRAN_Set_Value_double(HCR[k],NUM_c*NUM_e[1], 0.0,0.0);
1253 }
1254
1255 /* set Hamiltonian and overlap matrices of left and right leads */
1256
1257 TRAN_Set_SurfOverlap(comm1,"left", k2, k3);
1258 TRAN_Set_SurfOverlap(comm1,"right",k2, k3);
1259
1260 /* set CC, CL and CR */
1261
1262 TRAN_Set_CentOverlap( comm1,
1263 3,
1264 SpinP_switch,
1265 k2,
1266 k3,
1267 order_GA,
1268 H1,
1269 S1,
1270 nh, /* input */
1271 CntOLP, /* input */
1272 atomnum,
1273 Matomnum,
1274 M2G,
1275 G2ID,
1276 WhatSpecies,
1277 Spe_Total_CNO,
1278 FNAN,
1279 natn,
1280 ncn,
1281 atv_ijk);
1282
1283 if (MEASURE_TIME){
1284 dtime(&time_a0);
1285 }
1286
1287 /* allocate */
1288
1289 v2 = (dcomplex**)malloc(sizeof(dcomplex*)*(SpinP_switch+1));
1290
1291 for (k=0; k<=SpinP_switch; k++) {
1292 v2[k] = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c*NUM_c);
1293 TRAN_Set_Value_double( v2[k], NUM_c*NUM_c, 0.0, 0.0);
1294 }
1295
1296 GC = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
1297
1298 GRL = (dcomplex*)malloc(sizeof(dcomplex)*NUM_e[0]* NUM_e[0]);
1299 GRR = (dcomplex*)malloc(sizeof(dcomplex)*NUM_e[1]* NUM_e[1]);
1300
1301 SigmaL = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
1302 SigmaR = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
1303
1304 work1 = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
1305 work2 = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
1306
1307 Gless = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
1308
1309 if (2<=level_stdout){
1310 printf("NUM_c=%d, NUM_e= %d %d\n",NUM_c, NUM_e[0], NUM_e[1]);
1311 printf("# of freq. to calculate G =%d\n",tran_omega_n_scf);
1312 }
1313
1314 if (SpinP_switch==0) spinsize = 1;
1315 else if (SpinP_switch==1) spinsize = 2;
1316 else if (SpinP_switch==3) spinsize = 1;
1317
1318 /**************************************************************
1319 calculation of Green functions at k and iw
1320 **************************************************************/
1321
1322 for ( Miw=myid; Miw<tran_omega_n_scf*spinsize; Miw+=numprocs ) {
1323
1324 k = Miw/tran_omega_n_scf;
1325 iw = Miw - k*tran_omega_n_scf;
1326
1327 w = tran_omega_scf[iw];
1328 w_weight = tran_omega_weight_scf[iw];
1329 iw_method = tran_integ_method_scf[iw];
1330
1331 /*
1332 printf("Miw=%3d iw=%d of %d w=%le %le weight=%le %le method=%d\n",
1333 Miw,iw,tran_omega_n_scf, w.r,w.i, w_weight.r, w_weight.i, iw_method);
1334 */
1335
1336 /* calculation of surface Green's function and self energy from the LEFT lead */
1337
1338 iside = 0;
1339
1340 TRAN_Calc_SurfGreen_direct(w, NUM_e[iside], H00_e[iside][k], H01_e[iside][k],
1341 S00_e[iside], S01_e[iside], tran_surfgreen_iteration_max,
1342 tran_surfgreen_eps, GRL);
1343
1344 TRAN_Calc_SelfEnergy(w, NUM_e[iside], GRL, NUM_c, HCL[k], SCL, SigmaL);
1345
1346 /* calculation of surface Green's function and self energy from the RIGHT lead */
1347
1348 iside = 1;
1349
1350 TRAN_Calc_SurfGreen_direct(w, NUM_e[iside], H00_e[iside][k], H01_e[iside][k],
1351 S00_e[iside], S01_e[iside], tran_surfgreen_iteration_max,
1352 tran_surfgreen_eps, GRR);
1353
1354 TRAN_Calc_SelfEnergy(w, NUM_e[iside], GRR, NUM_c, HCR[k], SCR, SigmaR);
1355
1356 /* calculation of central retarded Green's function */
1357
1358 TRAN_Calc_CentGreen(w, NUM_c, SigmaL, SigmaR, HCC[k], SCC, GC);
1359
1360 /***********************************************
1361 The non-equilibrium part is calculated by
1362 using the lesser Green's function.
1363 ***********************************************/
1364
1365 if (iw_method==2){
1366
1367 /* calculation of central lesser Green's function */
1368
1369 TRAN_Calc_CentGreenLesser( w, ChemP_e, NUM_c,
1370 Order_Lead_Side,
1371 SigmaL,
1372 SigmaR,
1373 GC,
1374 HCC[k], SCC,
1375 work1, work2, Gless);
1376 }
1377
1378 /***********************************************
1379 add it to construct the density matrix
1380 ***********************************************/
1381
1382 if (iw_method==0)
1383 TRAN_Add_MAT( 0, NUM_c, w_weight, GC, v2[k]);
1384 else if (iw_method==1)
1385 TRAN_Add_MAT( 1, NUM_c, w_weight, GC, v2[k]);
1386 else if (iw_method==2)
1387 TRAN_Add_MAT( 1, NUM_c, w_weight, Gless, v2[k]);
1388
1389 } /* iw */
1390
1391 if (MEASURE_TIME){
1392 dtime(&time_a1);
1393 }
1394
1395 /***********************************************
1396 calculation of density matrix
1397 ***********************************************/
1398
1399 {
1400 int l1,l2,l3,RnB;
1401 int size_v3,itot,itot0;
1402 double kRn,si,co,re,im;
1403 double *my_v3;
1404 double *v3;
1405
1406 /* find the size of v3 */
1407
1408 size_v3 = 0;
1409
1410 for (GA_AN=1; GA_AN<=atomnum; GA_AN++) {
1411
1412 wanA = WhatSpecies[GA_AN];
1413 tnoA = Spe_Total_CNO[wanA];
1414 Anum = MP[GA_AN];
1415
1416 for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
1417
1418 GB_AN = natn[GA_AN][LB_AN];
1419 wanB = WhatSpecies[GB_AN];
1420 tnoB = Spe_Total_CNO[wanB];
1421 Bnum = MP[GB_AN];
1422
1423 for (i=0; i<tnoA; i++) {
1424 for (j=0; j<tnoB; j++) {
1425 size_v3++;
1426 size_v3++;
1427 }
1428 }
1429 }
1430 }
1431
1432 /* allocate arrays */
1433
1434 my_v3 = (double*)malloc(sizeof(double)*size_v3);
1435 v3 = (double*)malloc(sizeof(double)*size_v3);
1436
1437 /* set up v3 */
1438
1439 #define v_idx(i,j) ( ((j)-1)*NUM_c + (i)-1 )
1440
1441 for (k=0; k<=SpinP_switch; k++) {
1442
1443 itot = 0;
1444
1445 for (GA_AN=1; GA_AN<=atomnum; GA_AN++) {
1446
1447 wanA = WhatSpecies[GA_AN];
1448 tnoA = Spe_Total_CNO[wanA];
1449 Anum = MP[GA_AN];
1450
1451 for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
1452
1453 GB_AN = natn[GA_AN][LB_AN];
1454 wanB = WhatSpecies[GB_AN];
1455 tnoB = Spe_Total_CNO[wanB];
1456 Bnum = MP[GB_AN];
1457
1458 for (i=0; i<tnoA; i++) {
1459 for (j=0; j<tnoB; j++) {
1460 my_v3[itot++] = v2[k][ v_idx( Anum+i, Bnum+j) ].r;
1461 my_v3[itot++] = v2[k][ v_idx( Anum+i, Bnum+j) ].i;
1462 }
1463 }
1464 }
1465 }
1466
1467 if (parallel_mode){
1468 MPI_Allreduce( my_v3, v3, itot, MPI_DOUBLE, MPI_SUM, comm1);
1469 }
1470 else {
1471 for (i=0; i<itot; i++) {
1472 v3[i] = my_v3[i];
1473 }
1474 }
1475
1476 /* v3 -> CDM */
1477
1478 itot = 0;
1479 itot0 = 0;
1480
1481 for (GA_AN=1; GA_AN<=atomnum; GA_AN++) {
1482
1483 wanA = WhatSpecies[GA_AN];
1484 tnoA = Spe_Total_CNO[wanA];
1485 Anum = MP[GA_AN];
1486
1487 for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
1488
1489 GB_AN = natn[GA_AN][LB_AN];
1490 RnB = ncn[GA_AN][LB_AN];
1491 wanB = WhatSpecies[GB_AN];
1492 tnoB = Spe_Total_CNO[wanB];
1493 Bnum = MP[GB_AN];
1494 l1 = atv_ijk[RnB][1];
1495 l2 = atv_ijk[RnB][2];
1496 l3 = atv_ijk[RnB][3];
1497
1498 kRn = k2*(double)l2 + k3*(double)l3;
1499 si = (double)k_op*sin(2.0*PI*kRn);
1500 co = (double)k_op*cos(2.0*PI*kRn);
1501
1502 for (i=0; i<tnoA; i++) {
1503 for (j=0; j<tnoB; j++) {
1504 re = v3[itot++];
1505 im = v3[itot++];
1506
1507 /* divided by numprocs due to the later MPI_Allreduce */
1508 DM1[k][itot0++] += (re*co + im*si)/(double)numprocs;
1509
1510 }
1511 }
1512 }
1513 }
1514
1515 } /* k */
1516
1517 /* free arrays */
1518
1519 free(my_v3);
1520 free(v3);
1521 }
1522
1523 if (MEASURE_TIME){
1524 MPI_Barrier(comm1);
1525 dtime(&time_a2);
1526 printf("TRAN_DFT(%d)> calculaiton (%le)\n",myid, time_a1-time_a0 );
1527 }
1528
1529 /* free arrays */
1530
1531 free(Gless);
1532 free(work2);
1533 free(work1);
1534 free(SigmaR);
1535 free(SigmaL);
1536 free(GRR);
1537 free(GRL);
1538 free(GC);
1539
1540 for (k=0; k<=SpinP_switch; k++) {
1541 free(v2[k]);
1542 }
1543 free(v2);
1544
1545 free(MP);
1546 }
1547
1548
1549
TRAN_Add_MAT(int mode,int NUM_c,dcomplex w_weight,dcomplex * v,dcomplex * out)1550 static void TRAN_Add_MAT(
1551 int mode,
1552 int NUM_c,
1553 dcomplex w_weight,
1554 dcomplex *v,
1555 dcomplex *out
1556 )
1557
1558 #define v_ref(i,j) v[NUM_c*(j)+(i)]
1559 #define out_ref(i,j) out[NUM_c*(j)+(i)]
1560
1561 {
1562 int i,j;
1563 double re,im;
1564
1565 switch (mode) {
1566
1567 case 0:
1568
1569 for (i=0; i<NUM_c; i++) {
1570 for (j=0; j<NUM_c; j++) {
1571 re = v_ref(i,j).r + v_ref(j,i).r;
1572 im = v_ref(i,j).i - v_ref(j,i).i;
1573 out_ref(i,j).r += re*w_weight.r - im*w_weight.i;
1574 out_ref(i,j).i += re*w_weight.i + im*w_weight.r;
1575 }
1576 }
1577
1578 break;
1579
1580 case 1:
1581
1582 for (i=0; i<NUM_c*NUM_c; i++) {
1583 out[i].r += ( v[i].r*w_weight.r - v[i].i*w_weight.i );
1584 out[i].i += ( v[i].r*w_weight.i + v[i].i*w_weight.r );
1585 }
1586
1587 break;
1588
1589 }
1590
1591 }
1592
1593