1 /**********************************************************************
2 TRAN_DFT_NC.c:
3
4 TRAN_DFT_NC.c is a subroutine to perform self-consistent calculations
5 of a central region with left and right infinite leads based on
6 a non-equilibrium Green's function method coupled with NC-DFT.
7
8 Log of TRAN_DFT_NC.c:
9
10 11/Dec/2005 Released by H.Kino
11 24/July/2008 Modified by T.Ozaki
12 27/June/2016 Modified by T.Ozaki
13
14 ***********************************************************************/
15 /* revised by Y. Xiao for Noncollinear NEGF calculations */
16
17 #define MEASURE_TIME 0
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <math.h>
22 #include <time.h>
23 #include <mpi.h>
24 #include <omp.h>
25 #include "tran_prototypes.h"
26 #include "tran_variables.h"
27
28 void dtime(double *);
29
30 void k_inversion(int i, int j, int k,
31 int mi, int mj, int mk,
32 int *ii, int *ij, int *ik );
33
34 double Band_DFT_NonCol(int SCF_iter,
35 double *koS,
36 dcomplex **S,
37 int knum_i, int knum_j, int knum_k,
38 int SpinP_switch,
39 double *****nh,
40 double *****ImNL,
41 double ****CntOLP,
42 double *****CDM,
43 double *****EDM,
44 double Eele0[2], double Eele1[2]);
45
46 void Make_Comm_Worlds(
47 MPI_Comm MPI_Curret_Comm_WD,
48 int myid0,
49 int numprocs0,
50 int Num_Comm_World,
51 int *myworld1,
52 MPI_Comm *MPI_CommWD, /* size: Num_Comm_World */
53 int *NPROCS1_ID, /* size: numprocs0 */
54 int *Comm_World1, /* size: numprocs0 */
55 int *NPROCS1_WD, /* size: Num_Comm_World */
56 int *Comm_World_StartID /* size: Num_Comm_World */
57 );
58
59
60
61 int Get_OneD_HS_Col(int set_flag, double ****RH, double *H1, int *MP,
62 int *order_GA, int *My_NZeros, int *is1, int *is2);
63
64 static void TRAN_Add_MAT(
65 int mode,
66 int NUM_c,
67 dcomplex w_weight,
68 dcomplex *v,
69 dcomplex *out
70 );
71
72 static double TRAN_DFT_Original(
73 /* input */
74 MPI_Comm comm1,
75 int level_stdout,
76 int iter,
77 int SpinP_switch,
78 double *****nh, /* H */
79 double *****ImNL, /* SO coupling */
80 double ****CntOLP,
81 int atomnum,
82 int Matomnum,
83 int *WhatSpecies,
84 int *Spe_Total_CNO,
85 int *FNAN,
86 int **natn,
87 int **ncn,
88 int *M2G,
89 int *G2ID,
90 int *F_G2M,
91 int **atv_ijk,
92 int *List_YOUSO,
93 /* output */
94 double *****CDM, /* output, density matrix */
95 double *****iCDM,
96 double *****EDM, /* not used */
97 double ***TRAN_DecMulP, /* output, partial DecMulP */
98 double Eele0[2], double Eele1[2],
99 double ChemP_e0[2]);
100
101
102 static void TRAN_DFT_Kdependent(
103 /* input */
104 MPI_Comm comm1,
105 int parallel_mode,
106 int numprocs,
107 int myid,
108 int level_stdout,
109 int iter,
110 int SpinP_switch,
111 double k2,
112 double k3,
113 int k_op,
114 int *order_GA,
115 double **DM1,
116 double **DM2,
117 double **H1,
118 double **H2,
119 double *S1,
120 double *****nh, /* H */
121 double *****ImNL, /* SO coupling */
122 double ****CntOLP,
123 int atomnum,
124 int Matomnum,
125 int *WhatSpecies,
126 int *Spe_Total_CNO,
127 int *FNAN,
128 int **natn,
129 int **ncn,
130 int *M2G,
131 int *G2ID,
132 int **atv_ijk,
133 int *List_YOUSO,
134 /* output */
135 double *****CDM, /* output, charge density */
136 double *****EDM, /* not used */
137 double Eele0[2], double Eele1[2]); /* not used */
138
139
140
TRAN_DFT_NC(MPI_Comm comm1,int SucceedReadingDMfile,int level_stdout,int iter,int SpinP_switch,double ***** nh,double ***** ImNL,double **** CntOLP,int atomnum,int Matomnum,int * WhatSpecies,int * Spe_Total_CNO,int * FNAN,int ** natn,int ** ncn,int * M2G,int * G2ID,int * F_G2M,int ** atv_ijk,int * List_YOUSO,double * koS,dcomplex ** S,double ***** CDM,double ***** iCDM,double ***** EDM,double *** TRAN_DecMulP,double Eele0[2],double Eele1[2],double ChemP_e0[2])141 double TRAN_DFT_NC(
142 /* input */
143 MPI_Comm comm1,
144 int SucceedReadingDMfile,
145 int level_stdout,
146 int iter,
147 int SpinP_switch,
148 double *****nh, /* H */
149 double *****ImNL, /* SO coupling */
150 double ****CntOLP,
151 int atomnum,
152 int Matomnum,
153 int *WhatSpecies,
154 int *Spe_Total_CNO,
155 int *FNAN,
156 int **natn,
157 int **ncn,
158 int *M2G,
159 int *G2ID,
160 int *F_G2M,
161 int **atv_ijk,
162 int *List_YOUSO,
163 double *koS,
164 dcomplex **S,
165 /* output */
166 double *****CDM, /* output, density matrix */
167 double *****iCDM,
168 double *****EDM, /* not used */
169 double ***TRAN_DecMulP, /* output, partial DecMulP */
170 double Eele0[2], double Eele1[2],
171 double ChemP_e0[2])
172 {
173 double TStime,TEtime,time5;
174
175 dtime(&TStime);
176
177 /******************************************************
178 if TRAN_Iter_Initial_Band<SCF_iter, employ TRAN_DFT
179 ******************************************************/
180
181 if (TRAN_SCF_Iter_Band<iter || SucceedReadingDMfile==1){
182
183 TRAN_DFT_Original( comm1,
184 level_stdout,
185 iter,
186 SpinP_switch,
187 nh, /* H */
188 ImNL, /* SO coupling */
189 CntOLP,
190 atomnum,
191 Matomnum,
192 WhatSpecies,
193 Spe_Total_CNO,
194 FNAN,
195 natn,
196 ncn,
197 M2G,
198 G2ID,
199 F_G2M,
200 atv_ijk,
201 List_YOUSO,
202 /* output */
203 CDM, /* output, density matrix */
204 iCDM,
205 EDM, /* not used */
206 TRAN_DecMulP, /* output, partial DecMulP */
207 Eele0, Eele1,
208 ChemP_e0);
209 }
210
211 /*************************************************************
212 if TRAN_SCF_Iter_Band<iter, employ the band diagonalization
213 *************************************************************/
214
215 else {
216 int knum_i, knum_j, knum_k;
217
218 knum_i = 1;
219 knum_j = TRAN_Kspace_grid2;
220 knum_k = TRAN_Kspace_grid3;
221
222 time5 = Band_DFT_NonCol(iter, koS, S, knum_i, knum_j, knum_k, SpinP_switch,
223 nh, ImNL, CntOLP, CDM, EDM, Eele0, Eele1);
224
225 }
226
227 dtime(&TEtime);
228 return TEtime - TStime;
229 }
230
231
232
233
234
TRAN_DFT_Original(MPI_Comm comm1,int level_stdout,int iter,int SpinP_switch,double ***** nh,double ***** ImNL,double **** CntOLP,int atomnum,int Matomnum,int * WhatSpecies,int * Spe_Total_CNO,int * FNAN,int ** natn,int ** ncn,int * M2G,int * G2ID,int * F_G2M,int ** atv_ijk,int * List_YOUSO,double ***** CDM,double ***** iCDM,double ***** EDM,double *** TRAN_DecMulP,double Eele0[2],double Eele1[2],double ChemP_e0[2])235 double TRAN_DFT_Original(
236 /* input */
237 MPI_Comm comm1,
238 int level_stdout,
239 int iter,
240 int SpinP_switch,
241 double *****nh, /* H */
242 double *****ImNL, /* SO coupling */
243 double ****CntOLP,
244 int atomnum,
245 int Matomnum,
246 int *WhatSpecies,
247 int *Spe_Total_CNO,
248 int *FNAN,
249 int **natn,
250 int **ncn,
251 int *M2G,
252 int *G2ID,
253 int *F_G2M,
254 int **atv_ijk,
255 int *List_YOUSO,
256 /* output */
257 double *****CDM, /* output, density matrix */
258 double *****iCDM,
259 double *****EDM, /* not used */
260 double ***TRAN_DecMulP, /* output, partial DecMulP */
261 double Eele0[2], double Eele1[2],
262 double ChemP_e0[2])
263 {
264 int numprocs0,myid0,ID;
265 int i2,i3,k_op,l1,l2,l3,RnB;
266 int k,E_knum,S_knum,T_knum,num_kloop0;
267 int parallel_mode,kloop,kloop0;
268 int i,j,spin,MA_AN,GA_AN,wanA,tnoA;
269 int LB_AN,GB_AN,wanB,tnoB;
270 int **op_flag,*T_op_flag,*T_k_ID;
271 double *T_KGrids2,*T_KGrids3;
272 double k2,k3,tmp;
273 double TStime,TEtime;
274
275 int *MP;
276 int *order_GA;
277 int *My_NZeros;
278 int *SP_NZeros;
279 int *SP_Atoms;
280
281 int size_H1;
282 int myworld1;
283 int numprocs1,myid1;
284 int Num_Comm_World1;
285 int *NPROCS_ID1;
286 int *Comm_World1;
287 int *NPROCS_WD1;
288 int *Comm_World_StartID1;
289 MPI_Comm *MPI_CommWD1;
290
291 double **DM1,*TDM1,**DM2,*TDM2;
292 double **H1,*S1;
293 double **H2;
294
295 MPI_Comm_size(comm1,&numprocs0);
296 MPI_Comm_rank(comm1,&myid0);
297
298 dtime(&TStime);
299
300 if (myid0==Host_ID){
301 printf("<TRAN_DFT>\n");
302 }
303
304 /***************************************
305 ChemP_e0 will be used in outputfile1
306 ***************************************/
307
308 ChemP_e0[0] = ChemP_e[0];
309 ChemP_e0[1] = ChemP_e[1];
310
311 /***********************************
312 initialize CDM
313 ************************************/
314
315 for (spin=0; spin<=SpinP_switch; spin++) {
316 for (MA_AN=1; MA_AN<=Matomnum; MA_AN++) {
317 GA_AN = M2G[MA_AN];
318 wanA = WhatSpecies[GA_AN];
319 tnoA = Spe_Total_CNO[wanA];
320 for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
321 GB_AN = natn[GA_AN][LB_AN];
322 wanB = WhatSpecies[GB_AN];
323 tnoB = Spe_Total_CNO[wanB];
324 for (i=0; i<tnoA; i++){
325 for (j=0; j<tnoB; j++){
326 CDM[spin][MA_AN][LB_AN][i][j] = 0.0;
327 if(spin<2) {
328 iCDM[spin][MA_AN][LB_AN][i][j] = 0.0;
329 }
330 }
331 }
332 }
333 }
334 }
335
336 /***********************************
337 set up operation flag
338 ************************************/
339
340 op_flag = (int**)malloc(sizeof(int*)*TRAN_Kspace_grid2);
341 for (i2=0; i2<TRAN_Kspace_grid2; i2++){
342 op_flag[i2] = (int*)malloc(sizeof(int)*TRAN_Kspace_grid3);
343 for (i3=0; i3<TRAN_Kspace_grid3; i3++){
344 op_flag[i2][i3] = 1;
345 }
346 }
347
348 /***********************************
349 one-dimentionalize for MPI
350 ************************************/
351
352 T_knum = 0;
353 for (i2=0; i2<TRAN_Kspace_grid2; i2++){
354 for (i3=0; i3<TRAN_Kspace_grid3; i3++){
355 if (0<op_flag[i2][i3]) T_knum++;
356 }
357 }
358
359 T_KGrids2 = (double*)malloc(sizeof(double)*T_knum);
360 T_KGrids3 = (double*)malloc(sizeof(double)*T_knum);
361 T_op_flag = (int*)malloc(sizeof(int)*T_knum);
362 T_k_ID = (int*)malloc(sizeof(int)*T_knum);
363
364 T_knum = 0;
365
366 for (i2=0; i2<TRAN_Kspace_grid2; i2++){
367
368 k2 = -0.5 + (2.0*(double)i2+1.0)/(2.0*(double)TRAN_Kspace_grid2) + Shift_K_Point;
369
370 for (i3=0; i3<TRAN_Kspace_grid3; i3++){
371
372 k3 = -0.5 + (2.0*(double)i3+1.0)/(2.0*(double)TRAN_Kspace_grid3) - Shift_K_Point;
373
374 if (0<op_flag[i2][i3]){
375
376 T_KGrids2[T_knum] = k2;
377 T_KGrids3[T_knum] = k3;
378 T_op_flag[T_knum] = op_flag[i2][i3];
379
380 T_knum++;
381 }
382 }
383 }
384
385 /***************************************
386 print out
387 ***************************************/
388
389 if (myid0==Host_ID){
390
391 printf(" KGrids2: ");fflush(stdout);
392
393 for (i=0;i<TRAN_Kspace_grid2;i++){
394 if (TRAN_Kspace_grid2==1) k2 = 0.0;
395 else k2 = -0.5 + (2.0*(double)i+1.0)/(2.0*(double)TRAN_Kspace_grid2) + Shift_K_Point;
396 printf("%9.5f ",k2);fflush(stdout);
397 }
398 printf("\n");fflush(stdout);
399
400 printf(" KGrids3: ");fflush(stdout);
401 for (i=0;i<TRAN_Kspace_grid3;i++){
402 if (TRAN_Kspace_grid3==1) k3 = 0.0;
403 else k3 = -0.5 + (2.0*(double)i+1.0)/(2.0*(double)TRAN_Kspace_grid3) - Shift_K_Point;
404 printf("%9.5f ",k3);fflush(stdout);
405 }
406 printf("\n");fflush(stdout);
407 }
408
409 /***************************************************
410 allocate calculations of k-points into processors
411 ***************************************************/
412
413 if (numprocs0<T_knum){
414
415 /* set parallel_mode */
416 parallel_mode = 0;
417
418 /* allocation of kloop to ID */
419
420 for (ID=0; ID<numprocs0; ID++){
421 tmp = (double)T_knum/(double)numprocs0;
422 S_knum = (int)((double)ID*(tmp+1.0e-12));
423 E_knum = (int)((double)(ID+1)*(tmp+1.0e-12)) - 1;
424 if (ID==(numprocs0-1)) E_knum = T_knum - 1;
425 if (E_knum<0) E_knum = 0;
426
427 for (k=S_knum; k<=E_knum; k++){
428 /* ID in the first level world */
429 T_k_ID[k] = ID;
430 }
431 }
432
433 /* find own informations */
434
435 tmp = (double)T_knum/(double)numprocs0;
436 S_knum = (int)((double)myid0*(tmp+1.0e-12));
437 E_knum = (int)((double)(myid0+1)*(tmp+1.0e-12)) - 1;
438 if (myid0==(numprocs0-1)) E_knum = T_knum - 1;
439 if (E_knum<0) E_knum = 0;
440
441 num_kloop0 = E_knum - S_knum + 1;
442
443 }
444
445 else {
446
447 /* set parallel_mode */
448 parallel_mode = 1;
449 num_kloop0 = 1;
450
451 Num_Comm_World1 = T_knum;
452
453 NPROCS_ID1 = (int*)malloc(sizeof(int)*numprocs0);
454 Comm_World1 = (int*)malloc(sizeof(int)*numprocs0);
455 NPROCS_WD1 = (int*)malloc(sizeof(int)*Num_Comm_World1);
456 Comm_World_StartID1 = (int*)malloc(sizeof(int)*Num_Comm_World1);
457 MPI_CommWD1 = (MPI_Comm*)malloc(sizeof(MPI_Comm)*Num_Comm_World1);
458
459 Make_Comm_Worlds(comm1, myid0, numprocs0, Num_Comm_World1, &myworld1, MPI_CommWD1,
460 NPROCS_ID1, Comm_World1, NPROCS_WD1, Comm_World_StartID1);
461
462 MPI_Comm_size(MPI_CommWD1[myworld1],&numprocs1);
463 MPI_Comm_rank(MPI_CommWD1[myworld1],&myid1);
464
465 S_knum = myworld1;
466
467 /* allocate k-points into processors */
468
469 for (k=0; k<T_knum; k++){
470 /* ID in the first level world */
471 T_k_ID[k] = Comm_World_StartID1[k];
472 }
473
474 }
475
476 /*************************************************************
477 one-dimensitonalize H and S and store them in a compact form
478 *************************************************************/
479
480 MP = (int*)malloc(sizeof(int)*(atomnum+1));
481 order_GA = (int*)malloc(sizeof(int)*(atomnum+1));
482
483 My_NZeros = (int*)malloc(sizeof(int)*numprocs0);
484 SP_NZeros = (int*)malloc(sizeof(int)*numprocs0);
485 SP_Atoms = (int*)malloc(sizeof(int)*numprocs0);
486
487 size_H1 = Get_OneD_HS_Col(0, nh[0], &tmp, MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
488
489 DM1 = (double**)malloc(sizeof(double*)*(SpinP_switch+1));
490 for (k=0; k<(SpinP_switch+1); k++){
491 DM1[k] = (double*)malloc(sizeof(double)*size_H1);
492 for (i=0; i<size_H1; i++) DM1[k][i] = 0.0;
493 }
494
495 TDM1 = (double*)malloc(sizeof(double)*size_H1);
496
497 DM2 = (double**)malloc(sizeof(double*)*(1+1));
498 for (k=0; k<(1+1); k++){
499 DM2[k] = (double*)malloc(sizeof(double)*size_H1);
500 for (i=0; i<size_H1; i++) DM2[k][i] = 0.0;
501 }
502
503 TDM2 = (double*)malloc(sizeof(double)*size_H1);
504
505 H1 = (double**)malloc(sizeof(double*)*(SpinP_switch+1));
506 for (k=0; k<(SpinP_switch+1); k++){
507 H1[k] = (double*)malloc(sizeof(double)*size_H1);
508 }
509
510 /* s-o coupling */
511 H2 = (double**)malloc(sizeof(double*)*(2+1));
512 for (k=0; k<(2+1); k++){
513 H2[k] = (double*)malloc(sizeof(double)*size_H1);
514 }
515
516 S1 = (double*)malloc(sizeof(double)*size_H1);
517
518 for (k=0; k<(SpinP_switch+1); k++){
519 size_H1 = Get_OneD_HS_Col(1, nh[k], H1[k], MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
520 }
521
522 /* s-o coupling */
523 for (k=0; k<(2+1); k++){
524 size_H1 = Get_OneD_HS_Col(1, ImNL[k], H2[k], MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
525 }
526
527 size_H1 = Get_OneD_HS_Col(1, CntOLP, S1, MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
528
529 /***********************************************************
530 start "kloop0"
531 ***********************************************************/
532
533 for (kloop0=0; kloop0<num_kloop0; kloop0++){
534
535 kloop = S_knum + kloop0;
536
537 k2 = T_KGrids2[kloop];
538 k3 = T_KGrids3[kloop];
539 k_op = T_op_flag[kloop];
540
541 if (parallel_mode){
542
543 TRAN_DFT_Kdependent(MPI_CommWD1[myworld1],
544 parallel_mode, numprocs1, myid1,
545 level_stdout, iter, SpinP_switch, k2, k3, k_op, order_GA,
546 DM1,DM2,H1,H2,S1,
547 nh, ImNL, CntOLP,
548 atomnum, Matomnum, WhatSpecies, Spe_Total_CNO, FNAN,
549 natn, ncn, M2G, G2ID, atv_ijk, List_YOUSO, CDM, EDM, Eele0, Eele1);
550 }
551 else{
552
553 TRAN_DFT_Kdependent(comm1,
554 parallel_mode, 1, 0,
555 level_stdout, iter, SpinP_switch, k2, k3, k_op, order_GA,
556 DM1,DM2,H1,H2,S1,
557 nh, ImNL, CntOLP,
558 atomnum, Matomnum, WhatSpecies, Spe_Total_CNO, FNAN,
559 natn, ncn, M2G, G2ID, atv_ijk, List_YOUSO, CDM, EDM, Eele0, Eele1);
560 }
561
562 } /* kloop0 */
563
564 /* MPI communication of DM1 */
565
566 tmp = 1.0/(double)(TRAN_Kspace_grid2*TRAN_Kspace_grid3);
567
568 for (k=0; k<=SpinP_switch; k++) {
569
570 int itot0,Anum,Bnum;
571 double co,si,kRn;
572
573 MPI_Allreduce( DM1[k], TDM1, size_H1, MPI_DOUBLE, MPI_SUM, comm1);
574
575 if (k<2) {
576 MPI_Allreduce( DM2[k], TDM2, size_H1, MPI_DOUBLE, MPI_SUM, comm1);
577 }
578
579 itot0 = 0;
580
581 for (GA_AN=1; GA_AN<=atomnum; GA_AN++) {
582
583 wanA = WhatSpecies[GA_AN];
584 tnoA = Spe_Total_CNO[wanA];
585 Anum = MP[GA_AN];
586 MA_AN = F_G2M[GA_AN];
587
588 for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
589
590 GB_AN = natn[GA_AN][LB_AN];
591 RnB = ncn[GA_AN][LB_AN];
592 wanB = WhatSpecies[GB_AN];
593 tnoB = Spe_Total_CNO[wanB];
594
595 for (i=0;i<tnoA;i++) {
596 for (j=0;j<tnoB;j++) {
597
598 if (1<=MA_AN && MA_AN<=Matomnum){
599 CDM[k][MA_AN][LB_AN][i][j] = TDM1[itot0]*tmp;
600 /* for iCDM */
601 if (k<2) {
602 iCDM[k][MA_AN][LB_AN][i][j] = TDM2[itot0]*tmp;
603 }
604 /* until here */
605 }
606
607 itot0++;
608
609 }
610 }
611 }
612 }
613
614 } /* k */
615
616 /***********************************************************
617 overwrite CMD with l1=-1 or l1=1 by zero
618 ***********************************************************/
619
620 for (spin=0; spin<=SpinP_switch; spin++) {
621 for (MA_AN=1; MA_AN<=Matomnum; MA_AN++) {
622
623 GA_AN = M2G[MA_AN];
624 wanA = WhatSpecies[GA_AN];
625 tnoA = Spe_Total_CNO[wanA];
626
627 for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
628
629 GB_AN = natn[GA_AN][LB_AN];
630 RnB = ncn[GA_AN][LB_AN];
631 wanB = WhatSpecies[GB_AN];
632 tnoB = Spe_Total_CNO[wanB];
633 l1 = atv_ijk[RnB][1];
634 l2 = atv_ijk[RnB][2];
635 l3 = atv_ijk[RnB][3];
636
637 /* DM between C-L or C-R */
638
639 if (l1==-1 || l1==1){
640 for (i=0; i<tnoA; i++){
641 for (j=0; j<tnoB; j++){
642 CDM[spin][MA_AN][LB_AN][i][j] = 0.0;
643 /* for iCDM */
644 if (spin<2) {
645 iCDM[spin][MA_AN][LB_AN][i][j] = 0.0;
646 }
647 }
648 }
649 }
650
651 }
652 }
653 }
654
655 /***********************************************************
656 calculate (partial) decomposed Mulliken population by
657 density matrix between CL or CR
658
659 This contribution will be added in Mulliken_Charge.c
660 ***********************************************************/
661
662 {
663 int MA_AN,GA_AN,wanA,tnoA,GA_AN_e,LB_AN_e,iside;
664 int direction,Rn_e,GB_AN_e;
665 double sum;
666
667 /* initialize */
668
669 for (MA_AN=1; MA_AN<=Matomnum; MA_AN++){
670
671 GA_AN = M2G[MA_AN];
672 wanA = WhatSpecies[GA_AN];
673 tnoA = Spe_Total_CNO[wanA];
674
675 for (spin=0; spin<=SpinP_switch; spin++) {
676 for (i=0; i<tnoA; i++){
677 TRAN_DecMulP[spin][MA_AN][i] = 0.0;
678 }
679 }
680 }
681
682 /* Left lead */
683
684 iside = 0;
685 direction = -1;
686
687 for (MA_AN=1; MA_AN<=Matomnum; MA_AN++){
688
689 GA_AN = M2G[MA_AN];
690 wanA = WhatSpecies[GA_AN];
691 tnoA = Spe_Total_CNO[wanA];
692
693 if (TRAN_region[GA_AN]%10==2){
694
695 GA_AN_e = TRAN_Original_Id[GA_AN];
696
697 for (spin=0; spin<=SpinP_switch; spin++) {
698 for (i=0; i<tnoA; i++){
699
700 sum = 0.0;
701
702 for (LB_AN_e=0; LB_AN_e<=FNAN_e[iside][GA_AN_e]; LB_AN_e++){
703
704 GB_AN_e = natn_e[iside][GA_AN_e][LB_AN_e];
705 Rn_e = ncn_e[iside][GA_AN_e][LB_AN_e];
706 wanB = WhatSpecies_e[iside][GB_AN_e];
707 tnoB = Spe_Total_CNO_e[iside][wanB];
708 l1 = atv_ijk_e[iside][Rn_e][1];
709
710 if (l1==direction) {
711 for (j=0; j<tnoB; j++){
712 sum += OLP_e[iside][0][GA_AN_e][LB_AN_e][i][j]*
713 DM_e[iside][0][spin][GA_AN_e][LB_AN_e][i][j];
714 }
715 }
716 }
717
718 if(spin==3) {
719 TRAN_DecMulP[spin][MA_AN][i] = -sum;
720 } else {
721 TRAN_DecMulP[spin][MA_AN][i] = sum;
722 }
723
724 } /* i */
725 } /* spin */
726 }
727
728 } /* MA_AN */
729
730 /* Right lead */
731
732 iside = 1;
733 direction = 1;
734
735 for (MA_AN=1; MA_AN<=Matomnum; MA_AN++){
736
737 GA_AN = M2G[MA_AN];
738 wanA = WhatSpecies[GA_AN];
739 tnoA = Spe_Total_CNO[wanA];
740
741 if (TRAN_region[GA_AN]%10==3){
742
743 GA_AN_e = TRAN_Original_Id[GA_AN];
744
745 for (spin=0; spin<=SpinP_switch; spin++) {
746 for (i=0; i<tnoA; i++){
747
748 sum = 0.0;
749
750 for (LB_AN_e=0; LB_AN_e<=FNAN_e[iside][GA_AN_e]; LB_AN_e++){
751
752 GB_AN_e = natn_e[iside][GA_AN_e][LB_AN_e];
753 Rn_e = ncn_e[iside][GA_AN_e][LB_AN_e];
754 wanB = WhatSpecies_e[iside][GB_AN_e];
755 tnoB = Spe_Total_CNO_e[iside][wanB];
756 l1 = atv_ijk_e[iside][Rn_e][1];
757
758 if (l1==direction) {
759 for (j=0; j<tnoB; j++){
760 sum += OLP_e[iside][0][GA_AN_e][LB_AN_e][i][j]*
761 DM_e[iside][0][spin][GA_AN_e][LB_AN_e][i][j];
762 }
763 }
764 }
765
766 if(spin==3) {
767 TRAN_DecMulP[spin][MA_AN][i] = -sum;
768 } else {
769 TRAN_DecMulP[spin][MA_AN][i] = sum;
770 }
771
772 } /* i */
773 } /* spin */
774 }
775 } /* MA_AN */
776
777 }
778
779 /* set Eele as 0 */
780
781 Eele0[0] = 0.0;
782 Eele0[1] = 0.0;
783 Eele1[0] = 0.0;
784 Eele1[1] = 0.0;
785
786 /* free arrays */
787
788 for (i2=0; i2<TRAN_Kspace_grid2; i2++){
789 free(op_flag[i2]);
790 }
791 free(op_flag);
792
793 free(T_KGrids2);
794 free(T_KGrids3);
795 free(T_op_flag);
796 free(T_k_ID);
797
798 if (T_knum<=numprocs0){
799
800 if (Num_Comm_World1<=numprocs0){
801 MPI_Comm_free(&MPI_CommWD1[myworld1]);
802 }
803
804 free(NPROCS_ID1);
805 free(Comm_World1);
806 free(NPROCS_WD1);
807 free(Comm_World_StartID1);
808 free(MPI_CommWD1);
809 }
810
811 free(MP);
812 free(order_GA);
813
814 free(My_NZeros);
815 free(SP_NZeros);
816 free(SP_Atoms);
817
818 for (k=0; k<(SpinP_switch+1); k++){
819 free(DM1[k]);
820 }
821 free(DM1);
822
823 free(TDM1);
824
825 for (k=0; k<(1+1); k++){
826 free(DM2[k]);
827 }
828 free(DM2);
829
830 free(TDM2);
831
832 for (k=0; k<(SpinP_switch+1); k++){
833 free(H1[k]);
834 }
835 free(H1);
836
837 for (k=0; k<(2+1); k++){
838 free(H2[k]);
839 }
840 free(H2);
841
842 free(S1);
843
844 /* for elapsed time */
845 dtime(&TEtime);
846 if (myid0==Host_ID){
847 printf("<TRAN_DFT> time=%lf\n", TEtime-TStime);fflush(stdout);
848 }
849
850 return TEtime - TStime;
851 }
852
853
854
855
856 /*
857 * calculate CDM from nh, CntOLP, ...
858 *
859 * an alternative routine for Cluster_DFT or Band_DFT
860 *
861 */
862
863
864
865
TRAN_DFT_Kdependent(MPI_Comm comm1,int parallel_mode,int numprocs,int myid,int level_stdout,int iter,int SpinP_switch,double k2,double k3,int k_op,int * order_GA,double ** DM1,double ** DM2,double ** H1,double ** H2,double * S1,double ***** nh,double ***** ImNL,double **** CntOLP,int atomnum,int Matomnum,int * WhatSpecies,int * Spe_Total_CNO,int * FNAN,int ** natn,int ** ncn,int * M2G,int * G2ID,int ** atv_ijk,int * List_YOUSO,double ***** CDM,double ***** EDM,double Eele0[2],double Eele1[2])866 static void TRAN_DFT_Kdependent(
867 /* input */
868 MPI_Comm comm1,
869 int parallel_mode,
870 int numprocs,
871 int myid,
872 int level_stdout,
873 int iter,
874 int SpinP_switch,
875 double k2,
876 double k3,
877 int k_op,
878 int *order_GA,
879 double **DM1,
880 double **DM2,
881 double **H1,
882 double **H2,
883 double *S1,
884 double *****nh, /* H */
885 double *****ImNL, /* SO-coupling */
886 double ****CntOLP,
887 int atomnum,
888 int Matomnum,
889 int *WhatSpecies,
890 int *Spe_Total_CNO,
891 int *FNAN,
892 int **natn,
893 int **ncn,
894 int *M2G,
895 int *G2ID,
896 int **atv_ijk,
897 int *List_YOUSO,
898 /* output */
899 double *****CDM, /* output, charge density */
900 double *****EDM, /* not used */
901 double Eele0[2], double Eele1[2]) /* not used */
902 {
903 int i,j,k,iside,spinsize;
904 int *MP;
905 int iw,iw_method;
906 dcomplex w, w_weight;
907 dcomplex *GC,*GRL,*GRR;
908 dcomplex *SigmaL, *SigmaR;
909 dcomplex *work1,*work2,*Gless;
910 dcomplex **v2,*v20;
911 double dum;
912 double TStime,TEtime;
913 int MA_AN, GA_AN, wanA, tnoA, Anum;
914 int LB_AN, GB_AN, wanB, tnoB, Bnum;
915 int NUM_c0;
916
917 static int ID;
918 int Miw,iw0;
919 double time_a0, time_a1, time_a2;
920
921 /* setup MP */
922
923 MP = (int*)malloc(sizeof(int)*(atomnum+1));
924 TRAN_Set_MP(1, atomnum, WhatSpecies, Spe_Total_CNO, &NUM_c, MP);
925 NUM_c0 = NUM_c;
926 NUM_c = 2*NUM_c;
927
928 /* initialize */
929 TRAN_Set_Value_double(SCC_nc,NUM_c*NUM_c, 0.0,0.0);
930 TRAN_Set_Value_double(SCL_nc,NUM_c*NUM_e[0], 0.0,0.0);
931 TRAN_Set_Value_double(SCR_nc,NUM_c*NUM_e[1], 0.0,0.0);
932 TRAN_Set_Value_double(HCC_nc,NUM_c*NUM_c, 0.0,0.0);
933 TRAN_Set_Value_double(HCL_nc,NUM_c*NUM_e[0], 0.0,0.0);
934 TRAN_Set_Value_double(HCR_nc,NUM_c*NUM_e[1], 0.0,0.0);
935
936 /* set Hamiltonian and overlap matrices of left and right leads */
937 TRAN_Set_SurfOverlap_NC(comm1,"left", k2, k3);
938
939 TRAN_Set_SurfOverlap_NC(comm1,"right",k2, k3);
940
941 /* set CC, CL and CR */
942
943 TRAN_Set_CentOverlap_NC( comm1,
944 3,
945 SpinP_switch,
946 k2,
947 k3,
948 order_GA,
949 H1,
950 H2,
951 S1,
952 nh, /* input */
953 CntOLP, /* input */
954 atomnum,
955 Matomnum,
956 M2G,
957 G2ID,
958 WhatSpecies,
959 Spe_Total_CNO,
960 FNAN,
961 natn,
962 ncn,
963 atv_ijk);
964
965 if (MEASURE_TIME){
966 dtime(&time_a0);
967 }
968
969 /* allocate */
970
971 v2 = (dcomplex**)malloc(sizeof(dcomplex*)*(SpinP_switch+1));
972
973 for (k=0; k<=SpinP_switch; k++) {
974 v2[k] = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c0*NUM_c0);
975 TRAN_Set_Value_double( v2[k], NUM_c0*NUM_c0, 0.0, 0.0);
976 }
977
978 /* added by Y. Xiao */
979 v20 = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
980 TRAN_Set_Value_double( v20, NUM_c*NUM_c, 0.0, 0.0);
981
982 GC = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
983
984 GRL = (dcomplex*)malloc(sizeof(dcomplex)*NUM_e[0]* NUM_e[0]);
985 GRR = (dcomplex*)malloc(sizeof(dcomplex)*NUM_e[1]* NUM_e[1]);
986
987 SigmaL = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
988 SigmaR = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
989
990 work1 = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
991 work2 = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
992
993 Gless = (dcomplex*)malloc(sizeof(dcomplex)*NUM_c* NUM_c);
994
995 if (2<=level_stdout){
996 printf("NUM_c=%d, NUM_e= %d %d\n",NUM_c, NUM_e[0], NUM_e[1]);
997 printf("# of freq. to calculate G =%d\n",tran_omega_n_scf);
998 }
999
1000 if (SpinP_switch==0) spinsize = 1;
1001 else if (SpinP_switch==1) spinsize = 2;
1002 else if (SpinP_switch==3) spinsize = 1; /* The three below lines are not used for NC cal. */
1003
1004 /**************************************************************
1005 calculation of Green functions at k and iw
1006 **************************************************************/
1007
1008 for ( Miw=myid; Miw<tran_omega_n_scf; Miw+=numprocs ) {
1009
1010 iw = Miw;
1011 w = tran_omega_scf[iw];
1012 w_weight = tran_omega_weight_scf[iw];
1013 iw_method = tran_integ_method_scf[iw];
1014
1015 /*
1016 printf("Miw=%3d iw=%d of %d w=%le %le weight=%le %le method=%d\n",
1017 Miw,iw,tran_omega_n_scf, w.r,w.i, w_weight.r, w_weight.i, iw_method);
1018 */
1019
1020 /* calculation of surface Green's function and self energy from the LEFT lead */
1021
1022 iside = 0;
1023
1024 TRAN_Calc_SurfGreen_direct(w, NUM_e[iside], H00_nc_e[iside], H01_nc_e[iside],
1025 S00_nc_e[iside], S01_nc_e[iside], tran_surfgreen_iteration_max,
1026 tran_surfgreen_eps, GRL);
1027
1028 TRAN_Calc_SelfEnergy(w, NUM_e[iside], GRL, NUM_c, HCL_nc, SCL_nc, SigmaL);
1029
1030 /* calculation of surface Green's function and self energy from the RIGHT lead */
1031
1032 iside = 1;
1033
1034 TRAN_Calc_SurfGreen_direct(w, NUM_e[iside], H00_nc_e[iside], H01_nc_e[iside],
1035 S00_nc_e[iside], S01_nc_e[iside], tran_surfgreen_iteration_max,
1036 tran_surfgreen_eps, GRR);
1037
1038 TRAN_Calc_SelfEnergy(w, NUM_e[iside], GRR, NUM_c, HCR_nc, SCR_nc, SigmaR);
1039
1040 /* calculation of central retarded Green's function */
1041
1042 TRAN_Calc_CentGreen(w, NUM_c, SigmaL, SigmaR, HCC_nc, SCC_nc, GC);
1043
1044 /***********************************************
1045 The non-equilibrium part is calculated by
1046 using the lesser Green's function.
1047 ***********************************************/
1048
1049 /* calculation of central advanced Green's function */
1050
1051 if (iw_method==2){
1052
1053 /* calculation of central lesser Green's function */
1054
1055 TRAN_Calc_CentGreenLesser( w, ChemP_e, NUM_c,
1056 Order_Lead_Side,
1057 SigmaL,
1058 SigmaR,
1059 GC,
1060 HCC_nc, SCC_nc,
1061 work1, work2, Gless);
1062
1063 } /* if (iw_method==2) */
1064
1065 /***********************************************
1066 add it to construct the density matrix
1067 ***********************************************/
1068
1069 if (iw_method==0)
1070 TRAN_Add_MAT( 0, NUM_c, w_weight, GC, v20);
1071 else if (iw_method==1)
1072 TRAN_Add_MAT( 1, NUM_c, w_weight, GC, v20);
1073 else if (iw_method==2)
1074 TRAN_Add_MAT( 1, NUM_c, w_weight, Gless, v20);
1075
1076 /*
1077 if (Miw==0){
1078
1079 for (i=0; i<NUM_c; i++){
1080 for (j=0; j<NUM_c; j++){
1081 printf("Miw=%3d iw_method=%2d i=%3d j=%3d re=%15.12f im=%15.12f\n",Miw,iw_method,i,j,v20[NUM_c*j+i].r,v20[NUM_c*j+i].i);
1082 }
1083 }
1084
1085 MPI_Finalize();
1086 exit(0);
1087 }
1088 */
1089
1090
1091 } /* iw */
1092
1093 if (MEASURE_TIME){
1094 dtime(&time_a1);
1095 }
1096
1097 /* distribution of spin-spin */
1098 for (i=1; i<=NUM_c0; i++) {
1099 for (j=1; j<=NUM_c0; j++) {
1100
1101 /* alpha-alpha */
1102 v2[0][(j-1)*NUM_c0+(i)-1].r = v20[(j-1)*NUM_c+(i)-1].r;
1103 v2[0][(j-1)*NUM_c0+(i)-1].i = v20[(j-1)*NUM_c+(i)-1].i;
1104 /* beta-beta */
1105 v2[1][(j-1)*NUM_c0+(i)-1].r = v20[(j+NUM_c0-1)*NUM_c+(i+NUM_c0)-1].r;
1106 v2[1][(j-1)*NUM_c0+(i)-1].i = v20[(j+NUM_c0-1)*NUM_c+(i+NUM_c0)-1].i;
1107 /* alpha-beta */
1108
1109 v2[2][(j-1)*NUM_c0+(i)-1].r = v20[(j+NUM_c0-1)*NUM_c+(i)-1].r;
1110 v2[2][(j-1)*NUM_c0+(i)-1].i = v20[(j+NUM_c0-1)*NUM_c+(i)-1].i;
1111
1112 /* beta-alpha */
1113
1114 v2[3][(j-1)*NUM_c0+(i)-1].r = v20[(j-1)*NUM_c+(i+NUM_c0)-1].r;
1115 v2[3][(j-1)*NUM_c0+(i)-1].i = v20[(j-1)*NUM_c+(i+NUM_c0)-1].i;
1116
1117
1118 }
1119 }
1120
1121 /***********************************************
1122 calculation of density matrix
1123 ***********************************************/
1124
1125 {
1126 int l1,l2,l3,RnB;
1127 int size_v3,itot,itot0,size_temp;
1128 double kRn,si,co,re,im;
1129 double *my_v3;
1130 double *v3;
1131 dcomplex **DM1_temp;
1132
1133 /* find the size of v3 */
1134
1135 size_v3 = 0;
1136 size_temp = 0;
1137
1138 for (GA_AN=1; GA_AN<=atomnum; GA_AN++) {
1139
1140 wanA = WhatSpecies[GA_AN];
1141 tnoA = Spe_Total_CNO[wanA];
1142 Anum = MP[GA_AN];
1143
1144 for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
1145
1146 GB_AN = natn[GA_AN][LB_AN];
1147 wanB = WhatSpecies[GB_AN];
1148 tnoB = Spe_Total_CNO[wanB];
1149 Bnum = MP[GB_AN];
1150
1151 for (i=0; i<tnoA; i++) {
1152 for (j=0; j<tnoB; j++) {
1153 size_v3++;
1154 size_v3++;
1155
1156 size_temp++;
1157 }
1158 }
1159 }
1160 }
1161
1162 /* allocate arrays */
1163
1164 my_v3 = (double*)malloc(sizeof(double)*size_v3);
1165 v3 = (double*)malloc(sizeof(double)*size_v3);
1166 /* allocate a temporal array for DM1 */
1167 DM1_temp = (dcomplex**)malloc(sizeof(dcomplex*)*(SpinP_switch+1));
1168 for (k=0; k<=SpinP_switch; k++){
1169 DM1_temp[k] = (dcomplex*)malloc(sizeof(dcomplex)*size_temp);
1170 TRAN_Set_Value_double( DM1_temp[k], size_temp, 0.0, 0.0);
1171 }
1172
1173 /* set up v3 */
1174
1175 #define v_idx(i,j) ( ((j)-1)*NUM_c0 + (i)-1 )
1176
1177 for (k=0; k<=SpinP_switch; k++) {
1178
1179 itot = 0;
1180
1181 for (GA_AN=1; GA_AN<=atomnum; GA_AN++) {
1182
1183 wanA = WhatSpecies[GA_AN];
1184 tnoA = Spe_Total_CNO[wanA];
1185 Anum = MP[GA_AN];
1186
1187 for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
1188
1189 GB_AN = natn[GA_AN][LB_AN];
1190 wanB = WhatSpecies[GB_AN];
1191 tnoB = Spe_Total_CNO[wanB];
1192 Bnum = MP[GB_AN];
1193
1194 for (i=0; i<tnoA; i++) {
1195 for (j=0; j<tnoB; j++) {
1196 my_v3[itot++] = v2[k][ v_idx( Anum+i, Bnum+j) ].r;
1197 my_v3[itot++] = v2[k][ v_idx( Anum+i, Bnum+j) ].i;
1198 }
1199 }
1200 }
1201 }
1202
1203 if (parallel_mode){
1204 MPI_Allreduce( my_v3, v3, itot, MPI_DOUBLE, MPI_SUM, comm1);
1205 }
1206 else {
1207 for (i=0; i<itot; i++) {
1208 v3[i] = my_v3[i];
1209 }
1210 }
1211
1212 /* v3 -> CDM */
1213
1214 itot = 0;
1215 itot0 = 0;
1216
1217 for (GA_AN=1; GA_AN<=atomnum; GA_AN++) {
1218
1219 wanA = WhatSpecies[GA_AN];
1220 tnoA = Spe_Total_CNO[wanA];
1221 Anum = MP[GA_AN];
1222
1223 for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
1224
1225 GB_AN = natn[GA_AN][LB_AN];
1226 RnB = ncn[GA_AN][LB_AN];
1227 wanB = WhatSpecies[GB_AN];
1228 tnoB = Spe_Total_CNO[wanB];
1229 Bnum = MP[GB_AN];
1230 l1 = atv_ijk[RnB][1];
1231 l2 = atv_ijk[RnB][2];
1232 l3 = atv_ijk[RnB][3];
1233
1234 kRn = k2*(double)l2 + k3*(double)l3;
1235 si = (double)k_op*sin(2.0*PI*kRn);
1236 co = (double)k_op*cos(2.0*PI*kRn);
1237
1238 for (i=0; i<tnoA; i++) {
1239 for (j=0; j<tnoB; j++) {
1240 re = v3[itot++];
1241 im = v3[itot++];
1242
1243 /* divided by numprocs due to the later MPI_Allreduce */
1244 DM1_temp[k][itot0].r += (re*co + im*si)/(double)numprocs;
1245 DM1_temp[k][itot0].i += (im*co - re*si)/(double)numprocs;
1246 itot0++;
1247
1248 }
1249 }
1250 }
1251 }
1252
1253 } /* k */
1254
1255 /* transfer the value of DM1_temp to DM1 */
1256 for (i=0; i<size_temp; i++) {
1257
1258 DM1[0][i] += DM1_temp[0][i].r;
1259 DM1[1][i] += DM1_temp[1][i].r;
1260 DM1[2][i] += DM1_temp[2][i].r;
1261 DM1[3][i] -= DM1_temp[2][i].i;
1262
1263 DM2[0][i] += DM1_temp[0][i].i;
1264 DM2[1][i] += DM1_temp[1][i].i;
1265
1266 }
1267
1268
1269 /* free arrays */
1270
1271 free(my_v3);
1272 free(v3);
1273
1274 for (i=0; i<=SpinP_switch; i++){
1275 free(DM1_temp[i]);
1276 }
1277 free(DM1_temp);
1278
1279 }
1280
1281 if (MEASURE_TIME){
1282 MPI_Barrier(comm1);
1283 dtime(&time_a2);
1284 printf("TRAN_DFT(%d)> calculaiton (%le)\n",myid, time_a1-time_a0 );
1285 }
1286
1287
1288 /* free arrays */
1289
1290 free(Gless);
1291 free(work2);
1292 free(work1);
1293 free(SigmaR);
1294 free(SigmaL);
1295 free(GRR);
1296 free(GRL);
1297 free(GC);
1298
1299 for (k=0; k<=SpinP_switch; k++) {
1300 free(v2[k]);
1301 }
1302 free(v2);
1303 free(v20);
1304
1305 free(MP);
1306 }
1307
1308
1309
TRAN_Add_MAT(int mode,int NUM_c,dcomplex w_weight,dcomplex * v,dcomplex * out)1310 static void TRAN_Add_MAT(
1311 int mode,
1312 int NUM_c,
1313 dcomplex w_weight,
1314 dcomplex *v,
1315 dcomplex *out
1316 )
1317
1318 #define v_ref(i,j) v[NUM_c*(j)+(i)]
1319 #define out_ref(i,j) out[NUM_c*(j)+(i)]
1320
1321 {
1322 int i,j;
1323 double re,im;
1324
1325 switch (mode) {
1326
1327 case 0:
1328
1329 for (i=0; i<NUM_c; i++) {
1330 for (j=0; j<NUM_c; j++) {
1331 re = v_ref(i,j).r + v_ref(j,i).r;
1332 im = v_ref(i,j).i - v_ref(j,i).i;
1333 out_ref(i,j).r += re*w_weight.r - im*w_weight.i;
1334 out_ref(i,j).i += re*w_weight.i + im*w_weight.r;
1335 }
1336 }
1337
1338 break;
1339
1340 case 1:
1341
1342 for (i=0; i<NUM_c*NUM_c; i++) {
1343 out[i].r += v[i].r*w_weight.r - v[i].i*w_weight.i;
1344 out[i].i += v[i].r*w_weight.i + v[i].i*w_weight.r;
1345 }
1346
1347 break;
1348
1349 }
1350
1351 }
1352
1353