1 /**********************************************************************
2 Band_DFT_Dosout.c:
3
4 Band_DFT_Dosout.c is a subroutine to calculate the density of
5 states based on the band calculation.
6
7 Log of Band_DFT_Dosout.c:
8
9 12/May/2003 Released by H.Kino
10 25/Dec/2003 a non-collinear part (added by T.Ozaki)
11
12 ***********************************************************************/
13
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <math.h>
18 #include <time.h>
19 #include "openmx_common.h"
20 #include "mpi.h"
21 #include <omp.h>
22
23 #define measure_time 0
24
25
26
27
28 static double Band_DFT_Dosout_Col(
29 int knum_i, int knum_j, int knum_k,
30 int SpinP_switch,
31 double *****nh,
32 double *****ImNL,
33 double ****CntOLP);
34
35 static double Band_DFT_DosoutGauss_Col(
36 int knum_i, int knum_j, int knum_k,
37 int SpinP_switch,
38 double *****nh,
39 double *****ImNL,
40 double ****CntOLP);
41
42 static double Band_DFT_Dosout_NonCol(
43 int knum_i, int knum_j, int knum_k,
44 int SpinP_switch,
45 double *****nh,
46 double *****ImNL,
47 double ****CntOLP);
48
49 static double Band_DFT_DosoutGauss_NonCol(
50 int knum_i, int knum_j, int knum_k,
51 int SpinP_switch,
52 double *****nh,
53 double *****ImNL,
54 double ****CntOLP);
55
56
57
Band_DFT_Dosout(int knum_i,int knum_j,int knum_k,int SpinP_switch,double ***** nh,double ***** ImNL,double **** CntOLP)58 double Band_DFT_Dosout( int knum_i, int knum_j, int knum_k,
59 int SpinP_switch,
60 double *****nh,
61 double *****ImNL,
62 double ****CntOLP)
63 {
64 static double time0;
65
66
67 if ( (SpinP_switch==0 || SpinP_switch==1) && Dos_fileout){
68
69 time0 = Band_DFT_Dosout_Col( knum_i, knum_j, knum_k, SpinP_switch, nh, ImNL, CntOLP);
70 }
71
72 else if ( (SpinP_switch==0 || SpinP_switch==1) && DosGauss_fileout){
73
74 time0 = Band_DFT_DosoutGauss_Col( knum_i, knum_j, knum_k, SpinP_switch, nh, ImNL, CntOLP);
75 }
76
77 else if (SpinP_switch==3 && Dos_fileout){
78
79 time0 = Band_DFT_Dosout_NonCol( knum_i, knum_j, knum_k, SpinP_switch, nh, ImNL, CntOLP);
80 }
81
82 else if (SpinP_switch==3 && DosGauss_fileout){
83
84 time0 = Band_DFT_DosoutGauss_NonCol( knum_i, knum_j, knum_k, SpinP_switch, nh, ImNL, CntOLP);
85 }
86
87 return time0;
88 }
89
90
91
92
93
94
95
96
Band_DFT_DosoutGauss_Col(int knum_i,int knum_j,int knum_k,int SpinP_switch,double ***** nh,double ***** ImNL,double **** CntOLP)97 static double Band_DFT_DosoutGauss_Col(
98 int knum_i, int knum_j, int knum_k,
99 int SpinP_switch,
100 double *****nh,
101 double *****ImNL,
102 double ****CntOLP)
103 {
104 int i,j,k,spin,l,i1,j1,n1;
105 int n, wanA;
106 int *MP;
107 int MA_AN, GA_AN, tnoA,Anum, LB_AN;
108 int GB_AN, wanB, tnoB, Bnum, RnB;
109 int h_AN,tno0,tno1,Gh_AN,Hwan,Gc_AN,Cwan;
110 int l1,l2,l3,ik,Rn;
111 int kloop,kloop0,S_knum,E_knum,T_knum,num_kloop0;
112 int ie,iemin,iemax,n1min,iecenter,iewidth;
113 int MaxL,e1,s1;
114 int i_vec[10];
115
116 double sum,sumi,u2,v2,uv,vu,tmp,pi2,xa,eg,x,de;
117 double factor,EV_cut0;
118 double FermiF1,FermiF2,x1,x2,diffF,max_x=50.0;
119 double kRn,si,co,Redum,Imdum,Redum2,Resum;
120 double TStime,TEtime,time0;
121 double OLP_eigen_cut=Threshold_OLP_Eigen;
122 double *****PDM;
123 double *****TPDM;
124 double *koS;
125 double **ko; int N_ko, i_ko[10];
126 dcomplex **H; int N_H, i_H[10];
127 dcomplex **S; int N_S, i_S[10];
128 double *M1,***EIGEN;
129 dcomplex **C; int N_C, i_C[10];
130 double *KGrids1, *KGrids2, *KGrids3;
131 double *SD;
132 dcomplex ***H2,**TmpM,**TmpS;
133 double *T_KGrids1,*T_KGrids2,*T_KGrids3;
134 int *Ti_KGrids1,*Tj_KGrids2,*Tk_KGrids3,*arpo;
135 dcomplex Ctmp1,Ctmp2;
136
137 double ****Dummy_ImNL;
138 double snum_i, snum_j, snum_k;
139 double k1,k2,k3;
140 double Dos_Erange_Tmp[2];
141 double *r_energy;
142 double time1,time2,time3;
143 double time4,time5,time6;
144 double time7,time8,time9,time10;
145 double Stime1,Etime1;
146 float ***Dos;
147 float *DosVec;
148 char file_ev[YOUSO10],file_eig[YOUSO10];
149 FILE *fp_ev,*fp_eig;
150 char buf1[fp_bsize]; /* setvbuf */
151 char buf2[fp_bsize]; /* setvbuf */
152 int numprocs,myid,ID,ID1,ID2,tag;
153 /* for OpenMP */
154 int OMPID,Nthrds,Nprocs;
155
156 MPI_Status stat;
157 MPI_Request request;
158
159 /* MPI */
160 MPI_Comm_size(mpi_comm_level1,&numprocs);
161 MPI_Comm_rank(mpi_comm_level1,&myid);
162 MPI_Barrier(mpi_comm_level1);
163
164 if (myid==Host_ID && 0<level_stdout){
165 printf("\n<Band_DFT_Dosout>\n"); fflush(stdout);
166 }
167
168 dtime(&TStime);
169
170 time1 = 0.0;
171 time2 = 0.0;
172 time3 = 0.0;
173 time4 = 0.0;
174 time5 = 0.0;
175 time6 = 0.0;
176 time7 = 0.0;
177 time8 = 0.0;
178 time9 = 0.0;
179 time10 = 0.0;
180
181 /****************************************************
182 calculation of the array size
183 ****************************************************/
184
185 n = 0;
186 for (i=1; i<=atomnum; i++){
187 wanA = WhatSpecies[i];
188 n += Spe_Total_CNO[wanA];
189 }
190
191 /****************************************************
192 Allocation of arrays
193 ****************************************************/
194
195 DosVec = (float*)malloc(sizeof(float)*(atomnum+1)*List_YOUSO[7]);
196
197 MP = (int*)malloc(sizeof(int)*List_YOUSO[1]);
198 arpo = (int*)malloc(sizeof(int)*numprocs);
199
200 N_ko=2; i_ko[0]=List_YOUSO[23]; i_ko[1]=n+1;
201 ko=(double**)malloc_multidimarray("double",N_ko, i_ko);
202
203 koS = (double*)malloc(sizeof(double)*(n+1));
204
205 N_H=2; i_H[0]=i_H[1]=n+1;
206 H=(dcomplex**)malloc_multidimarray("dcomplex",N_H, i_H);
207
208 N_S=2; i_S[0]=i_S[1]=n+1;
209 S=(dcomplex**)malloc_multidimarray("dcomplex",N_S, i_S);
210
211 M1 = (double*)malloc(sizeof(double)*(n+1));
212
213 N_C=2; i_C[0]=i_C[1]=n+1;
214 C=(dcomplex**)malloc_multidimarray("dcomplex",N_C, i_C);
215
216 KGrids1 = (double*)malloc(sizeof(double)*knum_i);
217 KGrids2 = (double*)malloc(sizeof(double)*knum_j);
218 KGrids3 = (double*)malloc(sizeof(double)*knum_k);
219
220 SD = (double*)malloc(sizeof(double)*(atomnum+1)*List_YOUSO[7]*2);
221
222 TmpM = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
223 for (j=0; j<n+1; j++){
224 TmpM[j] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
225 }
226
227 TmpS = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
228 for (j=0; j<n+1; j++){
229 TmpS[j] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
230 }
231
232 H2 = (dcomplex***)malloc(sizeof(dcomplex**)*List_YOUSO[23]);
233 for (i=0; i<List_YOUSO[23]; i++){
234 H2[i] = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
235 for (j=0; j<n+1; j++){
236 H2[i][j] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
237 }
238 }
239
240 /* for calculation partial density matrix */
241
242 if (cal_partial_charge){
243
244 PDM = (double*****)malloc(sizeof(double****)*2);
245 for (k=0; k<=1; k++){
246
247 PDM[k] = (double****)malloc(sizeof(double***)*(atomnum+1));
248 FNAN[0] = 0;
249
250 for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
251
252 if (Gc_AN==0){
253 tno0 = 1;
254 }
255 else{
256 Cwan = WhatSpecies[Gc_AN];
257 tno0 = Spe_Total_NO[Cwan];
258 }
259
260 PDM[k][Gc_AN] = (double***)malloc(sizeof(double**)*(FNAN[Gc_AN]+1));
261 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
262
263 if (Gc_AN==0){
264 tno1 = 1;
265 }
266 else{
267 Gh_AN = natn[Gc_AN][h_AN];
268 Hwan = WhatSpecies[Gh_AN];
269 tno1 = Spe_Total_NO[Hwan];
270 }
271
272 PDM[k][Gc_AN][h_AN] = (double**)malloc(sizeof(double*)*tno0);
273 for (i=0; i<tno0; i++){
274 PDM[k][Gc_AN][h_AN][i] = (double*)malloc(sizeof(double)*tno1);
275 for (j=0; j<tno1; j++) PDM[k][Gc_AN][h_AN][i][j] = 0.0;
276 }
277 }
278 }
279 }
280
281 TPDM = (double*****)malloc(sizeof(double****)*2);
282 for (k=0; k<=1; k++){
283
284 TPDM[k] = (double****)malloc(sizeof(double***)*(atomnum+1));
285 FNAN[0] = 0;
286
287 for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
288
289 if (Gc_AN==0){
290 tno0 = 1;
291 }
292 else{
293 Cwan = WhatSpecies[Gc_AN];
294 tno0 = Spe_Total_NO[Cwan];
295 }
296
297 TPDM[k][Gc_AN] = (double***)malloc(sizeof(double**)*(FNAN[Gc_AN]+1));
298 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
299
300 if (Gc_AN==0){
301 tno1 = 1;
302 }
303 else{
304 Gh_AN = natn[Gc_AN][h_AN];
305 Hwan = WhatSpecies[Gh_AN];
306 tno1 = Spe_Total_NO[Hwan];
307 }
308
309 TPDM[k][Gc_AN][h_AN] = (double**)malloc(sizeof(double*)*tno0);
310 for (i=0; i<tno0; i++){
311 TPDM[k][Gc_AN][h_AN][i] = (double*)malloc(sizeof(double)*tno1);
312 for (j=0; j<tno1; j++) TPDM[k][Gc_AN][h_AN][i][j] = 0.0;
313 }
314 }
315 }
316 }
317 }
318
319 /* allocate Dos */
320
321 Dos = (float***)malloc(sizeof(float**)*DosGauss_Num_Mesh);
322 for (ik=0; ik<DosGauss_Num_Mesh; ik++) {
323 Dos[ik] = (float**)malloc(sizeof(float*)*(SpinP_switch+1) );
324 for (spin=0; spin<=SpinP_switch; spin++) {
325 Dos[ik][spin] = (float*)malloc(sizeof(float)*(atomnum+1)*List_YOUSO[7]);
326 for (i=0; i<(atomnum+1)*List_YOUSO[7]; i++) Dos[ik][spin][i] = 0.0;
327 }
328 }
329
330 /* allocate r_energy */
331
332 r_energy = (double*)malloc(sizeof(double)*DosGauss_Num_Mesh);
333
334 /* set up energies where DOS is calculated */
335
336 Dos_Erange_Tmp[0] = Dos_Erange[0] + ChemP;
337 Dos_Erange_Tmp[1] = Dos_Erange[1] + ChemP;
338
339 de = (Dos_Erange_Tmp[1]-Dos_Erange_Tmp[0])/(double)DosGauss_Num_Mesh;
340 for (i=0; i<DosGauss_Num_Mesh; i++) {
341 r_energy[i] = Dos_Erange_Tmp[0] + de*(double)i;
342 }
343
344 /* no spin-orbit coupling */
345 if (SO_switch==0){
346 Dummy_ImNL = (double****)malloc(sizeof(double***)*1);
347 Dummy_ImNL[0] = (double***)malloc(sizeof(double**)*1);
348 Dummy_ImNL[0][0] = (double**)malloc(sizeof(double*)*1);
349 Dummy_ImNL[0][0][0] = (double*)malloc(sizeof(double)*1);
350 }
351
352 snum_i = knum_i;
353 snum_j = knum_j;
354 snum_k = knum_k;
355
356 /* set up k-grids */
357
358 for (i=0; i<knum_i; i++){
359 k1 = (double)i/snum_i + Shift_K_Point;
360 KGrids1[i]=k1;
361 }
362 for (i=0; i<knum_j; i++){
363 k1 = (double)i/snum_j - Shift_K_Point;
364 KGrids2[i]=k1;
365 }
366 for (i=0; i<knum_k; i++){
367 k1 = (double)i/snum_k + 2.0*Shift_K_Point;
368 KGrids3[i]=k1;
369 }
370
371 if (myid==Host_ID && 0<level_stdout){
372 printf(" KGrids1: ");
373 for (i=0;i<=knum_i-1;i++) printf("%f ",KGrids1[i]);
374 printf("\n");
375 printf(" KGrids2: ");
376 for (i=0;i<=knum_j-1;i++) printf("%f ",KGrids2[i]);
377 printf("\n");
378 printf(" KGrids3: ");
379 for (i=0;i<=knum_k-1;i++) printf("%f ",KGrids3[i]);
380 printf("\n");
381 }
382
383 /***********************************
384 one-dimentionalize for MPI
385 ************************************/
386
387 T_knum = 0;
388 for (i=0; i<=(knum_i-1); i++){
389 for (j=0; j<=(knum_j-1); j++){
390 for (k=0; k<=(knum_k-1); k++){
391 T_knum++;
392 }
393 }
394 }
395
396 T_KGrids1 = (double*)malloc(sizeof(double)*T_knum);
397 T_KGrids2 = (double*)malloc(sizeof(double)*T_knum);
398 T_KGrids3 = (double*)malloc(sizeof(double)*T_knum);
399
400 Ti_KGrids1 = (int*)malloc(sizeof(int)*T_knum);
401 Tj_KGrids2 = (int*)malloc(sizeof(int)*T_knum);
402 Tk_KGrids3 = (int*)malloc(sizeof(int)*T_knum);
403
404 EIGEN = (double***)malloc(sizeof(double**)*List_YOUSO[23]);
405 for (i=0; i<List_YOUSO[23]; i++){
406 EIGEN[i] = (double**)malloc(sizeof(double*)*T_knum);
407 for (j=0; j<T_knum; j++){
408 EIGEN[i][j] = (double*)malloc(sizeof(double)*(n+1));
409 }
410 }
411
412 /* set T_KGrid1,2,3 */
413
414 T_knum = 0;
415 for (i=0; i<=(knum_i-1); i++){
416 for (j=0; j<=(knum_j-1); j++){
417 for (k=0; k<=(knum_k-1); k++){
418
419 T_KGrids1[T_knum] = KGrids1[i];
420 T_KGrids2[T_knum] = KGrids2[j];
421 T_KGrids3[T_knum] = KGrids3[k];
422
423 Ti_KGrids1[T_knum] = i;
424 Tj_KGrids2[T_knum] = j;
425 Tk_KGrids3[T_knum] = k;
426
427 T_knum++;
428 }
429 }
430 }
431
432 /* allocate k-points into proccessors */
433
434 if (T_knum<=myid){
435 S_knum = -10;
436 E_knum = -100;
437 num_kloop0 = 1;
438 }
439 else if (T_knum<numprocs) {
440 S_knum = myid;
441 E_knum = myid;
442 num_kloop0 = 1;
443 }
444 else {
445 tmp = (double)T_knum/(double)numprocs;
446 num_kloop0 = (int)tmp + 1;
447
448 S_knum = (int)((double)myid*(tmp+0.0001));
449 E_knum = (int)((double)(myid+1)*(tmp+0.0001)) - 1;
450 if (myid==(numprocs-1)) E_knum = T_knum - 1;
451 if (E_knum<0) E_knum = 0;
452 }
453
454 factor = 1.0/(double)(knum_i * knum_j * knum_k); /* normalization factor */
455 pi2 = sqrt(PI);
456
457 /* for kloop */
458
459 for (kloop0=0; kloop0<num_kloop0; kloop0++){
460
461 kloop = kloop0 + S_knum;
462 arpo[myid] = -1;
463 if (S_knum<=kloop && kloop<=E_knum) arpo[myid] = kloop;
464 for (ID=0; ID<numprocs; ID++){
465 MPI_Bcast(&arpo[ID], 1, MPI_INT, ID, mpi_comm_level1);
466 }
467
468 /* set S */
469
470 for (ID=0; ID<numprocs; ID++){
471
472 kloop = arpo[ID];
473
474 if (measure_time==1) dtime(&Stime1);
475
476 if (0<=kloop){
477
478 k1 = T_KGrids1[kloop];
479 k2 = T_KGrids2[kloop];
480 k3 = T_KGrids3[kloop];
481
482 Overlap_Band(ID,CntOLP,TmpM,MP,k1,k2,k3);
483 n = TmpM[0][0].r;
484
485 if (myid==ID){
486 for (i1=1; i1<=n; i1++){
487 for (j1=1; j1<=n; j1++){
488 S[i1][j1].r = TmpM[i1][j1].r;
489 S[i1][j1].i = TmpM[i1][j1].i;
490
491 TmpS[i1][j1].r = TmpM[i1][j1].r;
492 TmpS[i1][j1].i = TmpM[i1][j1].i;
493 }
494 }
495 }
496
497 }
498
499 if (measure_time==1){
500 dtime(&Etime1);
501 time1 += Etime1 - Stime1;
502 }
503 }
504
505 kloop = arpo[myid];
506
507 if (0<=kloop){
508
509 if (measure_time==1) dtime(&Stime1);
510
511 EigenBand_lapack(S,ko[0],n,n,1);
512
513 if (measure_time==1){
514 dtime(&Etime1);
515 time2 += Etime1 - Stime1;
516 }
517
518 if (3<=level_stdout && 0<=kloop){
519 printf(" kloop %2d k1 k2 k3 %10.6f %10.6f %10.6f\n",
520 kloop,T_KGrids1[kloop],T_KGrids2[kloop],T_KGrids3[kloop]);
521 for (i1=1; i1<=n; i1++){
522 printf(" Eigenvalues of OLP %2d %15.12f\n",i1,ko[0][i1]);
523 }
524 }
525
526 /* minus eigenvalues to 1.0e-14 */
527
528 for (l=1; l<=n; l++){
529 if (ko[0][l]<0.0) ko[0][l] = 1.0e-14;
530 koS[l] = ko[0][l];
531 }
532
533 /* calculate S*1/sqrt(ko) */
534
535 for (l=1; l<=n; l++) M1[l] = 1.0/sqrt(ko[0][l]);
536
537 /* S * M1 */
538
539 for (i1=1; i1<=n; i1++){
540 for (j1=1; j1<=n; j1++){
541 S[i1][j1].r = S[i1][j1].r*M1[j1];
542 S[i1][j1].i = S[i1][j1].i*M1[j1];
543 }
544 }
545
546 } /* if (0<=kloop) */
547
548 /* set H */
549
550 for (spin=0; spin<=SpinP_switch; spin++){
551
552 for (ID=0; ID<numprocs; ID++){
553
554 kloop = arpo[ID];
555
556 if (0<=kloop){
557 k1 = T_KGrids1[kloop];
558 k2 = T_KGrids2[kloop];
559 k3 = T_KGrids3[kloop];
560
561 Hamiltonian_Band(ID, nh[spin], TmpM, MP, k1, k2, k3);
562
563 if (myid==ID){
564 for (i1=1; i1<=n; i1++){
565 for (j1=1; j1<=n; j1++){
566 H[i1][j1].r = TmpM[i1][j1].r;
567 H[i1][j1].i = TmpM[i1][j1].i;
568 }
569 }
570 }
571 }
572 }
573
574 kloop = arpo[myid];
575
576 if (0<=kloop){
577
578 /****************************************************
579 M1 * U^t * H * U * M1
580 ****************************************************/
581
582 if (measure_time==1) dtime(&Stime1);
583
584 /* transpose S */
585
586 for (i1=1; i1<=n; i1++){
587 for (j1=i1+1; j1<=n; j1++){
588 Ctmp1 = S[i1][j1];
589 Ctmp2 = S[j1][i1];
590 S[i1][j1] = Ctmp2;
591 S[j1][i1] = Ctmp1;
592 }
593 }
594
595 /* H * U * M1 */
596
597 #pragma omp parallel shared(C,S,H,n) private(OMPID,Nthrds,Nprocs,i1,j1,sum,sumi,l)
598 {
599
600 /* get info. on OpenMP */
601
602 OMPID = omp_get_thread_num();
603 Nthrds = omp_get_num_threads();
604 Nprocs = omp_get_num_procs();
605
606 for (i1=1+OMPID; i1<=n; i1+=Nthrds){
607
608 for (j1=1; j1<=n; j1++){
609
610 sum = 0.0;
611 sumi = 0.0;
612
613 for (l=1; l<=n; l++){
614 sum += H[i1][l].r*S[j1][l].r - H[i1][l].i*S[j1][l].i;
615 sumi += H[i1][l].r*S[j1][l].i + H[i1][l].i*S[j1][l].r;
616 }
617
618 C[j1][i1].r = sum;
619 C[j1][i1].i = sumi;
620 }
621 }
622 } /* #pragma omp parallel */
623
624 /* M1 * U^+ H * U * M1 */
625
626 #pragma omp parallel shared(C,S,H,n) private(OMPID,Nthrds,Nprocs,i1,j1,sum,sumi,l)
627 {
628
629 /* get info. on OpenMP */
630
631 OMPID = omp_get_thread_num();
632 Nthrds = omp_get_num_threads();
633 Nprocs = omp_get_num_procs();
634
635 for (i1=1+OMPID; i1<=n; i1+=Nthrds){
636 for (j1=1; j1<=n; j1++){
637
638 sum = 0.0;
639 sumi = 0.0;
640
641 for (l=1; l<=n; l++){
642 sum += S[i1][l].r*C[j1][l].r + S[i1][l].i*C[j1][l].i;
643 sumi += S[i1][l].r*C[j1][l].i - S[i1][l].i*C[j1][l].r;
644 }
645 H[i1][j1].r = sum;
646 H[i1][j1].i = sumi;
647 }
648 }
649 } /* #pragma omp parallel */
650
651 /* H to C */
652
653 for (i1=1; i1<=n; i1++){
654 for (j1=1; j1<=n; j1++){
655 C[i1][j1] = H[i1][j1];
656 }
657 }
658
659 /* penalty for ill-conditioning states */
660
661 EV_cut0 = Threshold_OLP_Eigen;
662
663 for (i1=1; i1<=n; i1++){
664
665 if (koS[i1]<EV_cut0){
666 C[i1][i1].r += pow((koS[i1]/EV_cut0),-2.0) - 1.0;
667 }
668
669 /* cutoff the interaction between the ill-conditioned state */
670
671 if (1.0e+3<C[i1][i1].r){
672 for (j1=1; j1<=n; j1++){
673 C[i1][j1] = Complex(0.0,0.0);
674 C[j1][i1] = Complex(0.0,0.0);
675 }
676 C[i1][i1].r = 1.0e+4;
677 }
678 }
679
680 if (measure_time==1){
681 dtime(&Etime1);
682 time3 += Etime1 - Stime1;
683 }
684
685 /* solve eigenvalue problem */
686
687 if (measure_time==1) dtime(&Stime1);
688
689 n1 = n;
690 EigenBand_lapack(C,ko[spin],n1,n1,1);
691
692 for (i1=1; i1<=n; i1++) EIGEN[spin][kloop][i1] = ko[spin][i1];
693
694 if (measure_time==1){
695 dtime(&Etime1);
696 time4 += Etime1 - Stime1;
697 }
698
699 /****************************************************
700 transformation to the original eigen vectors.
701 NOTE JRCAT-244p and JAIST-2122p
702 C = U * lambda^{-1/2} * D
703 ****************************************************/
704
705 if (measure_time==1) dtime(&Stime1);
706
707 /* transpose */
708
709 for (i1=1; i1<=n; i1++){
710 for (j1=i1+1; j1<=n; j1++){
711 Ctmp1 = S[i1][j1];
712 Ctmp2 = S[j1][i1];
713 S[i1][j1] = Ctmp2;
714 S[j1][i1] = Ctmp1;
715 }
716 }
717
718 /* transpose */
719
720 for (i1=1; i1<=n; i1++){
721 for (j1=i1+1; j1<=n; j1++){
722 Ctmp1 = C[i1][j1];
723 Ctmp2 = C[j1][i1];
724 C[i1][j1] = Ctmp2;
725 C[j1][i1] = Ctmp1;
726 }
727 }
728
729 /* shift */
730
731 for (j1=1; j1<=n; j1++){
732 for (l=n; 1<=l; l--){
733 C[j1][l] = C[j1][l];
734 }
735 }
736
737 #pragma omp parallel shared(C,S,H2,n,n1,spin) private(OMPID,Nthrds,Nprocs,i1,j1,sum,sumi,l)
738 {
739
740 /* get info. on OpenMP */
741
742 OMPID = omp_get_thread_num();
743 Nthrds = omp_get_num_threads();
744 Nprocs = omp_get_num_procs();
745
746 for (i1=1+OMPID; i1<=n; i1+=Nthrds){
747 for (j1=1; j1<=n1; j1++){
748
749 sum = 0.0;
750 sumi = 0.0;
751
752 for (l=1; l<=n; l++){
753 sum += S[i1][l].r*C[j1][l].r - S[i1][l].i*C[j1][l].i;
754 sumi += S[i1][l].r*C[j1][l].i + S[i1][l].i*C[j1][l].r;
755 }
756
757 H2[spin][j1][i1].r = sum;
758 H2[spin][j1][i1].i = sumi;
759
760 }
761 }
762 } /* #pragma omp parallel */
763
764 if (measure_time==1){
765 dtime(&Etime1);
766 time5 += Etime1 - Stime1;
767 }
768
769 } /* if (0<=kloop) */
770 } /* spin */
771
772 /****************************************************
773 store LDOS
774 ****************************************************/
775
776 kloop = arpo[myid];
777
778 if (0<=kloop){
779
780 for (spin=0; spin<=SpinP_switch; spin++){
781
782 k1 = T_KGrids1[kloop];
783 k2 = T_KGrids2[kloop];
784 k3 = T_KGrids3[kloop];
785
786 i = Ti_KGrids1[kloop];
787 j = Tj_KGrids2[kloop];
788 k = Tk_KGrids3[kloop];
789
790 for (l=1; l<=n; l++){
791
792 /* initialize */
793
794 for (i1=0; i1<(atomnum+1)*List_YOUSO[7]; i1++){
795 SD[i1] = 0.0;
796 }
797
798 /* calculate SD */
799
800 #pragma omp parallel shared(TmpS,SD,spin,l,H2,MP,Spe_Total_CNO,WhatSpecies,atomnum) private(OMPID,Nthrds,Nprocs,GA_AN,wanA,tnoA,Anum,GB_AN,wanB,tnoB,Bnum,i1,j1,u2,v2,uv,vu,Redum,Imdum)
801 {
802
803 /* get info. on OpenMP */
804
805 OMPID = omp_get_thread_num();
806 Nthrds = omp_get_num_threads();
807 Nprocs = omp_get_num_procs();
808
809 for (GA_AN=1+OMPID; GA_AN<=atomnum; GA_AN+=Nthrds){
810
811 wanA = WhatSpecies[GA_AN];
812 tnoA = Spe_Total_CNO[wanA];
813 Anum = MP[GA_AN];
814
815 for (GB_AN=1; GB_AN<=atomnum; GB_AN++){
816
817 wanB = WhatSpecies[GB_AN];
818 tnoB = Spe_Total_CNO[wanB];
819 Bnum = MP[GB_AN];
820
821 for (i1=0; i1<tnoA; i1++){
822 for (j1=0; j1<tnoB; j1++){
823
824 u2 = H2[spin][l][Anum+i1].r*H2[spin][l][Bnum+j1].r;
825 v2 = H2[spin][l][Anum+i1].i*H2[spin][l][Bnum+j1].i;
826 uv = H2[spin][l][Anum+i1].r*H2[spin][l][Bnum+j1].i;
827 vu = H2[spin][l][Anum+i1].i*H2[spin][l][Bnum+j1].r;
828
829 Redum = (u2 + v2);
830 Imdum = (uv - vu);
831
832 SD[Anum+i1] += Redum*TmpS[Anum+i1][Bnum+j1].r - Imdum*TmpS[Anum+i1][Bnum+j1].i;
833
834 } /* j1 */
835 } /* i1 */
836 } /* GB_AN */
837 } /* GA_AN */
838
839 } /* #pragma omp parallel */
840
841 /* calculate contribution to Gaussian DOS */
842
843 #pragma omp parallel shared(MP,Spe_Total_CNO,WhatSpecies,pi2,SD,factor,Dos,r_energy,DosGauss_Width,DosGauss_Num_Mesh,Dos_Erange_Tmp,l,kloop,spin,EIGEN,atomnum) private(OMPID,Nthrds,Nprocs,GA_AN,wanA,tnoA,Anum,eg,x,iecenter,iewidth,iemin,iemax,ie,xa,i1)
844 {
845
846 /* get info. on OpenMP */
847
848 OMPID = omp_get_thread_num();
849 Nthrds = omp_get_num_threads();
850 Nprocs = omp_get_num_procs();
851
852 for (GA_AN=1+OMPID; GA_AN<=atomnum; GA_AN+=Nthrds){
853
854 wanA = WhatSpecies[GA_AN];
855 tnoA = Spe_Total_CNO[wanA];
856 Anum = MP[GA_AN];
857
858 /*************************
859 exp( -(x/a)^2 )
860 a= DosGauss_Width
861 x=-3a : 3a is counted
862 **************************/
863
864 eg = EIGEN[spin][kloop][l];
865 x = (eg-Dos_Erange_Tmp[0])/(Dos_Erange_Tmp[1]-Dos_Erange_Tmp[0])*(DosGauss_Num_Mesh-1);
866 iecenter = (int)x ;
867
868 iewidth = DosGauss_Width*3.0/(Dos_Erange_Tmp[1]-Dos_Erange_Tmp[0])*(DosGauss_Num_Mesh-1)+3;
869
870 iemin = iecenter - iewidth;
871 iemax = iecenter + iewidth;
872
873 if (iemin<0) iemin=0;
874 if (iemax>=DosGauss_Num_Mesh) iemax=DosGauss_Num_Mesh-1;
875
876 if ( 0<=iemin && iemin<DosGauss_Num_Mesh && 0<=iemax && iemax<DosGauss_Num_Mesh ) {
877
878 for (ie=iemin;ie<=iemax;ie++) {
879 xa = (eg-r_energy[ie])/DosGauss_Width;
880
881 for (i1=0; i1<tnoA; i1++){
882 Dos[ie][spin][Anum+i1] += (float)(factor*SD[Anum+i1]* exp(-xa*xa)/(DosGauss_Width*pi2));
883 }
884 }
885 }
886
887 } /* GA_AN */
888
889 } /* #pragma omp parallel */
890
891 } /* l */
892 } /* spin */
893 } /* if (0<=kloop) */
894
895 /*********************************************
896 calculation of partial density matrix
897 *********************************************/
898
899 kloop = arpo[myid];
900
901 if (0<=kloop && cal_partial_charge) {
902
903 for (spin=0; spin<=SpinP_switch; spin++){
904
905 k1 = T_KGrids1[kloop];
906 k2 = T_KGrids2[kloop];
907 k3 = T_KGrids3[kloop];
908
909 for (l=1; l<=n; l++){
910
911 x1 = (ko[spin][l] - ChemP)*Beta;
912 if (x1<=-max_x) x1 = -max_x;
913 if (max_x<=x1) x1 = max_x;
914 FermiF1 = 1.0/(1.0 + exp(x1));
915
916 x2 = (ko[spin][l] - (ChemP+ene_win_partial_charge))*Beta;
917 if (x2<=-max_x) x2 = -max_x;
918 if (max_x<=x2) x2 = max_x;
919 FermiF2 = 1.0/(1.0 + exp(x2));
920
921 diffF = fabs(FermiF1-FermiF2);
922
923 for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
924
925 wanA = WhatSpecies[GA_AN];
926 tnoA = Spe_Total_CNO[wanA];
927 Anum = MP[GA_AN];
928
929 for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
930 GB_AN = natn[GA_AN][LB_AN];
931 Rn = ncn[GA_AN][LB_AN];
932 wanB = WhatSpecies[GB_AN];
933 tnoB = Spe_Total_CNO[wanB];
934 Bnum = MP[GB_AN];
935
936 l1 = atv_ijk[Rn][1];
937 l2 = atv_ijk[Rn][2];
938 l3 = atv_ijk[Rn][3];
939 kRn = k1*(double)l1 + k2*(double)l2 + k3*(double)l3;
940
941 si = sin(2.0*PI*kRn);
942 co = cos(2.0*PI*kRn);
943
944 for (i=0; i<tnoA; i++){
945 for (j=0; j<tnoB; j++){
946
947 u2 = H2[spin][l][Anum+i].r*H2[spin][l][Bnum+j].r;
948 v2 = H2[spin][l][Anum+i].i*H2[spin][l][Bnum+j].i;
949 uv = H2[spin][l][Anum+i].r*H2[spin][l][Bnum+j].i;
950 vu = H2[spin][l][Anum+i].i*H2[spin][l][Bnum+j].r;
951 tmp = co*(u2+v2) - si*(uv-vu);
952 PDM[spin][GA_AN][LB_AN][i][j] += diffF*tmp;
953 }
954 }
955 }
956 }
957 }
958 }
959 }
960
961 /****************************************************
962 MPI: EIGEN
963 ****************************************************/
964
965 for (ID=0; ID<numprocs; ID++){
966
967 kloop = arpo[ID];
968
969 if (0<=kloop){
970 for (spin=0; spin<=SpinP_switch; spin++){
971 MPI_Bcast(&EIGEN[spin][kloop][0], n+1, MPI_DOUBLE, ID, mpi_comm_level1);
972 }
973 }
974 }
975
976 } /* kloop0 */
977
978 /****************************************************
979 MPI: PDM
980 ****************************************************/
981
982 if (cal_partial_charge) {
983
984 /* MPI: PDM */
985
986 for (spin=0; spin<=SpinP_switch; spin++){
987 for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
988
989 wanA = WhatSpecies[GA_AN];
990 tnoA = Spe_Total_CNO[wanA];
991
992 for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
993
994 GB_AN = natn[GA_AN][LB_AN];
995 wanB = WhatSpecies[GB_AN];
996 tnoB = Spe_Total_CNO[wanB];
997
998 for (i=0; i<tnoA; i++){
999
1000 MPI_Allreduce(&PDM[spin][GA_AN][LB_AN][i][0],
1001 &TPDM[spin][GA_AN][LB_AN][i][0],
1002 tnoB, MPI_DOUBLE, MPI_SUM,
1003 mpi_comm_level1);
1004
1005 for (j=0; j<tnoB; j++){
1006 TPDM[spin][GA_AN][LB_AN][i][j] /= (double)T_knum;
1007 }
1008 }
1009 }
1010 }
1011 }
1012
1013 /* store TPDM to Partial_DM */
1014
1015 for (spin=0; spin<=SpinP_switch; spin++){
1016 for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
1017
1018 MA_AN = F_G2M[GA_AN];
1019 wanA = WhatSpecies[GA_AN];
1020 tnoA = Spe_Total_CNO[wanA];
1021
1022 if (1<=MA_AN && MA_AN<=Matomnum){
1023
1024 for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
1025
1026 GB_AN = natn[GA_AN][LB_AN];
1027 wanB = WhatSpecies[GB_AN];
1028 tnoB = Spe_Total_CNO[wanB];
1029
1030 for (i=0; i<tnoA; i++){
1031 for (j=0; j<tnoB; j++){
1032 Partial_DM[spin][MA_AN][LB_AN][i][j] = TPDM[spin][GA_AN][LB_AN][i][j];
1033 }
1034 }
1035 }
1036 }
1037 }
1038 }
1039 }
1040
1041 /****************************************************************
1042 write eigenvalues and eigenvectors
1043 ****************************************************************/
1044
1045 if (measure_time==1) dtime(&Stime1);
1046
1047 if (myid==Host_ID){
1048
1049 sprintf(file_eig,"%s%s.Dos.val",filepath,filename);
1050
1051 if ( (fp_eig=fopen(file_eig,"w"))==NULL ) {
1052 printf("can not open a file %s\n",file_eig);
1053 }
1054
1055 else{
1056
1057 /* write *.Dos.val */
1058
1059 printf("write eigenvalues\n");
1060
1061 fprintf(fp_eig,"mode 7\n");
1062 fprintf(fp_eig,"NonCol 0\n");
1063 fprintf(fp_eig,"N %d\n",n);
1064 fprintf(fp_eig,"Nspin %d\n",SpinP_switch);
1065 fprintf(fp_eig,"Erange %lf %lf\n",Dos_Erange[0],Dos_Erange[1]);
1066 fprintf(fp_eig,"Kgrid %d %d %d\n",1,1,1);
1067 fprintf(fp_eig,"atomnum %d\n",atomnum);
1068 fprintf(fp_eig,"<WhatSpecies\n");
1069 for (i=1; i<=atomnum; i++) {
1070 fprintf(fp_eig,"%d ",WhatSpecies[i]);
1071 }
1072 fprintf(fp_eig,"\nWhatSpecies>\n");
1073 fprintf(fp_eig,"SpeciesNum %d\n",SpeciesNum);
1074 fprintf(fp_eig,"<Spe_Total_CNO\n");
1075 for (i=0;i<SpeciesNum;i++) {
1076 fprintf(fp_eig,"%d ",Spe_Total_CNO[i]);
1077 }
1078 fprintf(fp_eig,"\nSpe_Total_CNO>\n");
1079
1080 MaxL = Supported_MaxL;
1081
1082 fprintf(fp_eig,"MaxL %d\n",Supported_MaxL);
1083 fprintf(fp_eig,"<Spe_Num_CBasis\n");
1084 for (i=0;i<SpeciesNum;i++) {
1085 for (l=0;l<=MaxL;l++) {
1086 fprintf(fp_eig,"%d ",Spe_Num_CBasis[i][l]);
1087 }
1088 fprintf(fp_eig,"\n");
1089 }
1090 fprintf(fp_eig,"Spe_Num_CBasis>\n");
1091 fprintf(fp_eig,"ChemP %lf\n",ChemP);
1092
1093 fprintf(fp_eig,"irange %d %d\n",0,DosGauss_Num_Mesh-1);
1094 fprintf(fp_eig,"<Eigenvalues\n");
1095 for (spin=0; spin<=SpinP_switch; spin++) {
1096 fprintf(fp_eig,"%d %d %d ",0,0,0);
1097
1098 for (ik=0; ik<DosGauss_Num_Mesh; ik++) {
1099 fprintf(fp_eig,"%lf ",r_energy[ik]);
1100 }
1101 fprintf(fp_eig,"\n");
1102 }
1103 fprintf(fp_eig,"Eigenvalues>\n");
1104
1105 fclose(fp_eig);
1106 }
1107
1108 } /* if (myid==Host_ID) */
1109
1110 /* write *.Dos.vec */
1111
1112 if (myid==Host_ID){
1113 printf("write eigenvectors\n");
1114 sprintf(file_ev,"%s%s.Dos.vec",filepath,filename);
1115 if ( (fp_ev=fopen(file_ev,"w"))==NULL ) {
1116 printf("cannot open a file %s\n",file_ev);
1117 }
1118 }
1119
1120 for (spin=0; spin<=SpinP_switch; spin++) {
1121 for (ik=0; ik<DosGauss_Num_Mesh; ik++) {
1122
1123 i_vec[0] = 0;
1124 i_vec[1] = 0;
1125 i_vec[2] = 0;
1126
1127 MPI_Reduce(&Dos[ik][spin][1], &DosVec[1], n, MPI_FLOAT, MPI_SUM, Host_ID, mpi_comm_level1);
1128
1129 if (myid==Host_ID){
1130 fwrite(i_vec,sizeof(int),3,fp_ev);
1131 fwrite(&DosVec[1],sizeof(float),n,fp_ev);
1132 }
1133 }
1134 }
1135
1136 if (myid==Host_ID){
1137 fclose(fp_ev);
1138 }
1139
1140 if (measure_time==1){
1141 dtime(&Etime1);
1142 time10 += Etime1 - Stime1;
1143 }
1144
1145 if (measure_time==1){
1146 printf("myid=%4d time1=%15.12f\n",myid,time1);
1147 printf("myid=%4d time2=%15.12f\n",myid,time2);
1148 printf("myid=%4d time3=%15.12f\n",myid,time3);
1149 printf("myid=%4d time4=%15.12f\n",myid,time4);
1150 printf("myid=%4d time5=%15.12f\n",myid,time5);
1151 printf("myid=%4d time6=%15.12f\n",myid,time6);
1152 printf("myid=%4d time7=%15.12f\n",myid,time7);
1153 printf("myid=%4d time8=%15.12f\n",myid,time8);
1154 printf("myid=%4d time9=%15.12f\n",myid,time9);
1155 printf("myid=%4d time10=%15.12f\n",myid,time10);
1156 }
1157
1158 /****************************************************
1159 Fermi surface viewable by XCrysDen
1160 ****************************************************/
1161
1162 if (myid==Host_ID){
1163
1164 int ii,jj,kk;
1165 int ***TD2OD;
1166 double x0,y0,z0;
1167 char file_fermi[YOUSO10];
1168 FILE *fp;
1169
1170 /* allocation of TD2OD */
1171
1172 TD2OD = (int***)malloc(sizeof(int**)*knum_i);
1173 for (i=0; i<knum_i; i++){
1174 TD2OD[i] = (int**)malloc(sizeof(int*)*knum_j);
1175 for (j=0; j<knum_j; j++){
1176 TD2OD[i][j] = (int*)malloc(sizeof(int)*knum_k);
1177 for (k=0; k<knum_k; k++) TD2OD[i][j][k] = 0;
1178 }
1179 }
1180
1181 kloop = 0;
1182 for (i=0; i<knum_i; i++){
1183 for (j=0; j<knum_j; j++){
1184 for (k=0; k<knum_k; k++){
1185 TD2OD[i][j][k] = kloop;
1186 kloop++;
1187 }
1188 }
1189 }
1190
1191 /* make bxsf files */
1192
1193 for (spin=0; spin<=SpinP_switch; spin++){
1194
1195 sprintf(file_fermi,"%s%s.FermiSurf%d.bxsf",filepath,filename,spin);
1196
1197 if ((fp=fopen(file_fermi,"w")) != NULL){
1198
1199 fprintf(fp,"BEGIN_INFO\n");
1200 fprintf(fp,"Fermi Energy: %15.12f\n",ChemP);
1201 fprintf(fp,"END_INFO\n\n");
1202
1203 fprintf(fp,"BEGIN_BLOCK_BANDGRID_3D\n");
1204 fprintf(fp,"comment_line\n");
1205 fprintf(fp,"BEGIN_BANDGRID_3D_simple_example\n");
1206 fprintf(fp,"%d\n",n);
1207 fprintf(fp,"%d %d %d\n",knum_i+1,knum_j+1,knum_k+1);
1208
1209 fprintf(fp,"0.0 0.0 0.0\n");
1210 fprintf(fp,"%15.10f %15.10f %15.10f\n",rtv[1][1],rtv[1][2],rtv[1][3]);
1211 fprintf(fp,"%15.10f %15.10f %15.10f\n",rtv[2][1],rtv[2][2],rtv[2][3]);
1212 fprintf(fp,"%15.10f %15.10f %15.10f\n",rtv[3][1],rtv[3][2],rtv[3][3]);
1213
1214 for (i1=1; i1<=n; i1++){
1215
1216 fprintf(fp,"BAND: %d\n",i1);
1217
1218 for (i=0; i<=knum_i; i++){
1219 ii = i%knum_i;
1220 for (j=0; j<=knum_j; j++){
1221 jj = j%knum_j;
1222 for (k=0; k<=knum_k; k++){
1223 kk = k%knum_k;
1224 kloop = TD2OD[ii][jj][kk];
1225 fprintf(fp,"%10.5f ",EIGEN[spin][kloop][i1]);
1226 }
1227 fprintf(fp,"\n");
1228 }
1229
1230 if (i!=knum_i) fprintf(fp,"\n");
1231
1232 }
1233 }
1234
1235 fprintf(fp,"END_BANDGRID_3D\n");
1236 fprintf(fp,"END_BLOCK_BANDGRID_3D\n");
1237
1238 fclose(fp);
1239 }
1240 else{
1241 printf("Failure in saving bxsf files.\n");
1242 }
1243
1244 } /* spin */
1245
1246 /* free TD2OD */
1247 for (i=0; i<knum_i; i++){
1248 for (j=0; j<knum_j; j++){
1249 free(TD2OD[i][j]);
1250 }
1251 free(TD2OD[i]);
1252 }
1253 free(TD2OD);
1254
1255 } /* if (myid==Host_ID) */
1256
1257 /****************************************************
1258 free arrays
1259 ****************************************************/
1260
1261 free(DosVec);
1262
1263 free(MP);
1264 free(arpo);
1265
1266 for (i=0; i<i_ko[0]; i++){
1267 free(ko[i]);
1268 }
1269 free(ko);
1270
1271 free(koS);
1272
1273 for (i=0; i<i_H[0]; i++){
1274 free(H[i]);
1275 }
1276 free(H);
1277
1278 for (i=0; i<i_S[0]; i++){
1279 free(S[i]);
1280 }
1281 free(S);
1282
1283 free(M1);
1284
1285 for (i=0; i<i_C[0]; i++){
1286 free(C[i]);
1287 }
1288 free(C);
1289
1290 free(KGrids1); free(KGrids2);free(KGrids3);
1291
1292 free(SD);
1293
1294 for (j=0; j<n+1; j++){
1295 free(TmpM[j]);
1296 }
1297 free(TmpM);
1298
1299 for (j=0; j<n+1; j++){
1300 free(TmpS[j]);
1301 }
1302 free(TmpS);
1303
1304 for (i=0; i<List_YOUSO[23]; i++){
1305 for (j=0; j<n+1; j++){
1306 free(H2[i][j]);
1307 }
1308 free(H2[i]);
1309 }
1310 free(H2);
1311
1312 /* partial density matrix */
1313
1314 if (cal_partial_charge){
1315
1316 /* PDM */
1317
1318 for (k=0; k<=1; k++){
1319
1320 FNAN[0] = 0;
1321
1322 for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
1323
1324 if (Gc_AN==0){
1325 tno0 = 1;
1326 }
1327 else{
1328 Cwan = WhatSpecies[Gc_AN];
1329 tno0 = Spe_Total_NO[Cwan];
1330 }
1331
1332 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1333
1334 if (Gc_AN==0){
1335 tno1 = 1;
1336 }
1337 else{
1338 Gh_AN = natn[Gc_AN][h_AN];
1339 Hwan = WhatSpecies[Gh_AN];
1340 tno1 = Spe_Total_NO[Hwan];
1341 }
1342
1343 for (i=0; i<tno0; i++){
1344 free(PDM[k][Gc_AN][h_AN][i]);
1345 }
1346 free(PDM[k][Gc_AN][h_AN]);
1347 }
1348 free(PDM[k][Gc_AN]);
1349 }
1350 free(PDM[k]);
1351 }
1352 free(PDM);
1353
1354 /* TPDM */
1355
1356 for (k=0; k<=1; k++){
1357
1358 FNAN[0] = 0;
1359
1360 for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
1361
1362 if (Gc_AN==0){
1363 tno0 = 1;
1364 }
1365 else{
1366 Cwan = WhatSpecies[Gc_AN];
1367 tno0 = Spe_Total_NO[Cwan];
1368 }
1369
1370 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1371
1372 if (Gc_AN==0){
1373 tno1 = 1;
1374 }
1375 else{
1376 Gh_AN = natn[Gc_AN][h_AN];
1377 Hwan = WhatSpecies[Gh_AN];
1378 tno1 = Spe_Total_NO[Hwan];
1379 }
1380
1381 for (i=0; i<tno0; i++){
1382 free(TPDM[k][Gc_AN][h_AN][i]);
1383 }
1384 free(TPDM[k][Gc_AN][h_AN]);
1385 }
1386 free(TPDM[k][Gc_AN]);
1387 }
1388 free(TPDM[k]);
1389 }
1390 free(TPDM);
1391 }
1392
1393 /* no spin-orbit coupling */
1394 if (SO_switch==0){
1395 free(Dummy_ImNL[0][0][0]);
1396 free(Dummy_ImNL[0][0]);
1397 free(Dummy_ImNL[0]);
1398 free(Dummy_ImNL);
1399 }
1400
1401 free(T_KGrids1);
1402 free(T_KGrids2);
1403 free(T_KGrids3);
1404
1405 free(Ti_KGrids1);
1406 free(Tj_KGrids2);
1407 free(Tk_KGrids3);
1408
1409 for (i=0; i<List_YOUSO[23]; i++){
1410 for (j=0; j<T_knum; j++){
1411 free(EIGEN[i][j]);
1412 }
1413 free(EIGEN[i]);
1414 }
1415 free(EIGEN);
1416
1417 for (ik=0; ik<DosGauss_Num_Mesh; ik++) {
1418 for (spin=0; spin<=SpinP_switch; spin++) {
1419 free(Dos[ik][spin]);
1420 }
1421 free(Dos[ik]);
1422 }
1423 free(Dos);
1424
1425 free(r_energy);
1426
1427 /* for elapsed time */
1428
1429 dtime(&TEtime);
1430 time0 = TEtime - TStime;
1431 return time0;
1432
1433 }
1434
1435
1436
1437
1438
1439
1440
Band_DFT_DosoutGauss_NonCol(int knum_i,int knum_j,int knum_k,int SpinP_switch,double ***** nh,double ***** ImNL,double **** CntOLP)1441 static double Band_DFT_DosoutGauss_NonCol(
1442 int knum_i, int knum_j, int knum_k,
1443 int SpinP_switch,
1444 double *****nh,
1445 double *****ImNL,
1446 double ****CntOLP)
1447 {
1448 int i,j,k,spin,l,i1,j1,ik,n1;
1449 int n, wanA,ii1,jj1,m,n2;
1450 int *MP;
1451 int MA_AN, GA_AN, tnoA,Anum, LB_AN;
1452 int GB_AN, wanB, tnoB, Bnum, RnB;
1453 int l1,l2,l3;
1454
1455 int ie,iemin,iemax,iemin0,iemax0,n1min,mn;
1456 int MaxL,e1,s1,T_knum,S_knum,E_knum;
1457 int kloop,kloop0,num_kloop0;
1458 int i_vec[10];
1459 int h_AN,tno0,tno1,Gh_AN,Hwan,Gc_AN,Cwan,Rn;
1460
1461 double *****PDM;
1462 double *****TPDM;
1463 double EV_cut0;
1464 double FermiF1,FermiF2,x1,x2,diffF,max_x=50.0;
1465 double sum,sumi,u2,v2,uv,vu,tmp;
1466 double sum_r0,sum_i0,sum_r1,sum_i1;
1467 double d0,d1,d2,d3;
1468 double kRn,si,co,Redum,Imdum;
1469 double Redum2,Resum,Imdum2;
1470 double TStime,TEtime,time0;
1471 double OLP_eigen_cut=Threshold_OLP_Eigen;
1472 double theta,phi,sit,cot,sip,cop,de;
1473 double tmp1,tmp2,tmp3,eg,x,xa,pi2,factor;
1474 double *ko; int N_ko, i_ko[10];
1475 double *koS;
1476 double *r_energy;
1477 double Dos_Erange_Tmp[2];
1478 dcomplex **H; int N_H, i_H[10];
1479 dcomplex **S; int N_S, i_S[10];
1480 dcomplex Ctmp1,Ctmp2;
1481 double *M1,**EIGEN;
1482 dcomplex **C; int N_C, i_C[10];
1483 double *KGrids1, *KGrids2, *KGrids3;
1484 double *SD;
1485 dcomplex **TmpM,**TmpS;
1486 double *T_KGrids1,*T_KGrids2,*T_KGrids3;
1487 int *Ti_KGrids1,*Tj_KGrids2,*Tk_KGrids3,*arpo;
1488 int iecenter,iewidth;
1489
1490 double *****Dummy_ImNL;
1491 float **Dos;
1492 float *DosVec;
1493 double snum_i, snum_j, snum_k;
1494 double k1,k2,k3;
1495
1496 char file_ev[YOUSO10],file_eig[YOUSO10];
1497 FILE *fp_ev,*fp_eig;
1498 char buf1[fp_bsize]; /* setvbuf */
1499 char buf2[fp_bsize]; /* setvbuf */
1500 int numprocs,myid,ID,ID1,ID2,tag;
1501
1502 /* for OpenMP */
1503 int OMPID,Nthrds,Nprocs;
1504
1505 MPI_Status stat;
1506 MPI_Request request;
1507
1508 /* MPI */
1509 MPI_Comm_size(mpi_comm_level1,&numprocs);
1510 MPI_Comm_rank(mpi_comm_level1,&myid);
1511 MPI_Barrier(mpi_comm_level1);
1512
1513 if (myid==Host_ID && 0<level_stdout){
1514 printf("\n<Band_DFT_Dosout>\n"); fflush(stdout);
1515 }
1516
1517 dtime(&TStime);
1518
1519 /****************************************************
1520 calculation of the array size
1521 ****************************************************/
1522
1523 n = 0;
1524 for (i=1; i<=atomnum; i++){
1525 wanA = WhatSpecies[i];
1526 n = n + Spe_Total_CNO[wanA];
1527 }
1528 n2 = 2*n + 2;
1529
1530 /****************************************************
1531 Allocation of arrays
1532 ****************************************************/
1533
1534 DosVec = (float*)malloc(sizeof(float)*2*(atomnum+1)*List_YOUSO[7]);
1535
1536 MP = (int*)malloc(sizeof(int)*List_YOUSO[1]);
1537
1538 arpo = (int*)malloc(sizeof(int)*numprocs);
1539
1540 ko = (double*)malloc(sizeof(double)*n2);
1541 koS = (double*)malloc(sizeof(double)*(n+1));
1542
1543 H = (dcomplex**)malloc(sizeof(dcomplex*)*n2);
1544 for (j=0; j<n2; j++){
1545 H[j] = (dcomplex*)malloc(sizeof(dcomplex)*n2);
1546 }
1547
1548 S = (dcomplex**)malloc(sizeof(dcomplex*)*n2);
1549 for (i=0; i<n2; i++){
1550 S[i] = (dcomplex*)malloc(sizeof(dcomplex)*n2);
1551 }
1552
1553 M1 = (double*)malloc(sizeof(double)*n2);
1554
1555 C = (dcomplex**)malloc(sizeof(dcomplex*)*n2);
1556 for (j=0; j<n2; j++){
1557 C[j] = (dcomplex*)malloc(sizeof(dcomplex)*n2);
1558 }
1559
1560 KGrids1 = (double*)malloc(sizeof(double)*knum_i);
1561 KGrids2 = (double*)malloc(sizeof(double)*knum_j);
1562 KGrids3 = (double*)malloc(sizeof(double)*knum_k);
1563
1564 SD = (double*)malloc(sizeof(double)*(atomnum+1)*List_YOUSO[7]*2);
1565
1566 TmpM = (dcomplex**)malloc(sizeof(dcomplex*)*n2);
1567 for (j=0; j<n2; j++){
1568 TmpM[j] = (dcomplex*)malloc(sizeof(dcomplex)*n2);
1569 }
1570
1571 TmpS = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
1572 for (j=0; j<n+1; j++){
1573 TmpS[j] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
1574 }
1575
1576 /* allocate Dos */
1577
1578 Dos = (float**)malloc(sizeof(float*)*DosGauss_Num_Mesh);
1579 for (ik=0; ik<DosGauss_Num_Mesh; ik++) {
1580 Dos[ik] = (float*)malloc(sizeof(float)*(atomnum+1)*List_YOUSO[7]*2);
1581 for (i=0; i<(atomnum+1)*List_YOUSO[7]*2; i++) Dos[ik][i] = 0.0;
1582 }
1583
1584 /* allocate r_energy */
1585
1586 r_energy = (double*)malloc(sizeof(double)*DosGauss_Num_Mesh);
1587
1588 /* set up energies where DOS is calculated */
1589
1590 Dos_Erange_Tmp[0] = Dos_Erange[0] + ChemP;
1591 Dos_Erange_Tmp[1] = Dos_Erange[1] + ChemP;
1592
1593 de = (Dos_Erange_Tmp[1]-Dos_Erange_Tmp[0])/(double)DosGauss_Num_Mesh;
1594 for (i=0; i<DosGauss_Num_Mesh; i++) {
1595 r_energy[i] = Dos_Erange_Tmp[0] + de*(double)i;
1596 }
1597
1598 /* non-spin-orbit coupling and non-LDA+U */
1599 if (SO_switch==0 && Hub_U_switch==0 && Constraint_NCS_switch==0
1600 && Zeeman_NCS_switch==0 && Zeeman_NCO_switch==0){
1601
1602 Dummy_ImNL = (double*****)malloc(sizeof(double****)*1);
1603 Dummy_ImNL[0] = (double****)malloc(sizeof(double***)*1);
1604 Dummy_ImNL[0][0] = (double***)malloc(sizeof(double**)*1);
1605 Dummy_ImNL[0][0][0] = (double**)malloc(sizeof(double*)*1);
1606 Dummy_ImNL[0][0][0][0] = (double*)malloc(sizeof(double)*1);
1607 }
1608
1609 /* for calculation partial density matrix */
1610
1611 if (cal_partial_charge){
1612
1613 PDM = (double*****)malloc(sizeof(double****)*2);
1614 for (k=0; k<=1; k++){
1615
1616 PDM[k] = (double****)malloc(sizeof(double***)*(atomnum+1));
1617 FNAN[0] = 0;
1618
1619 for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
1620
1621 if (Gc_AN==0){
1622 tno0 = 1;
1623 }
1624 else{
1625 Cwan = WhatSpecies[Gc_AN];
1626 tno0 = Spe_Total_NO[Cwan];
1627 }
1628
1629 PDM[k][Gc_AN] = (double***)malloc(sizeof(double**)*(FNAN[Gc_AN]+1));
1630 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1631
1632 if (Gc_AN==0){
1633 tno1 = 1;
1634 }
1635 else{
1636 Gh_AN = natn[Gc_AN][h_AN];
1637 Hwan = WhatSpecies[Gh_AN];
1638 tno1 = Spe_Total_NO[Hwan];
1639 }
1640
1641 PDM[k][Gc_AN][h_AN] = (double**)malloc(sizeof(double*)*tno0);
1642 for (i=0; i<tno0; i++){
1643 PDM[k][Gc_AN][h_AN][i] = (double*)malloc(sizeof(double)*tno1);
1644 for (j=0; j<tno1; j++) PDM[k][Gc_AN][h_AN][i][j] = 0.0;
1645 }
1646 }
1647 }
1648 }
1649
1650 TPDM = (double*****)malloc(sizeof(double****)*2);
1651 for (k=0; k<=1; k++){
1652
1653 TPDM[k] = (double****)malloc(sizeof(double***)*(atomnum+1));
1654 FNAN[0] = 0;
1655
1656 for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
1657
1658 if (Gc_AN==0){
1659 tno0 = 1;
1660 }
1661 else{
1662 Cwan = WhatSpecies[Gc_AN];
1663 tno0 = Spe_Total_NO[Cwan];
1664 }
1665
1666 TPDM[k][Gc_AN] = (double***)malloc(sizeof(double**)*(FNAN[Gc_AN]+1));
1667 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
1668
1669 if (Gc_AN==0){
1670 tno1 = 1;
1671 }
1672 else{
1673 Gh_AN = natn[Gc_AN][h_AN];
1674 Hwan = WhatSpecies[Gh_AN];
1675 tno1 = Spe_Total_NO[Hwan];
1676 }
1677
1678 TPDM[k][Gc_AN][h_AN] = (double**)malloc(sizeof(double*)*tno0);
1679 for (i=0; i<tno0; i++){
1680 TPDM[k][Gc_AN][h_AN][i] = (double*)malloc(sizeof(double)*tno1);
1681 for (j=0; j<tno1; j++) TPDM[k][Gc_AN][h_AN][i][j] = 0.0;
1682 }
1683 }
1684 }
1685 }
1686 }
1687
1688 /****************************************************
1689 set up k-grids
1690 ****************************************************/
1691
1692 snum_i = knum_i;
1693 snum_j = knum_j;
1694 snum_k = knum_k;
1695
1696 for (i=0; i<knum_i; i++){
1697 k1 = (double)i/snum_i + Shift_K_Point;
1698 KGrids1[i]=k1;
1699 }
1700 for (i=0; i<knum_j; i++){
1701 k1 = (double)i/snum_j - Shift_K_Point;
1702 KGrids2[i]=k1;
1703 }
1704 for (i=0; i<knum_k; i++){
1705 k1 = (double)i/snum_k + 2.0*Shift_K_Point;
1706 KGrids3[i]=k1;
1707 }
1708
1709 if (myid==Host_ID && 0<level_stdout){
1710 printf(" KGrids1: ");
1711 for (i=0;i<=knum_i-1;i++) printf("%f ",KGrids1[i]);
1712 printf("\n");
1713 printf(" KGrids2: ");
1714 for (i=0;i<=knum_j-1;i++) printf("%f ",KGrids2[i]);
1715 printf("\n");
1716 printf(" KGrids3: ");
1717 for (i=0;i<=knum_k-1;i++) printf("%f ",KGrids3[i]);
1718 printf("\n");
1719 }
1720
1721 /***********************************
1722 one-dimentionalize for MPI
1723 ************************************/
1724
1725 T_knum = 0;
1726 for (i=0; i<=(knum_i-1); i++){
1727 for (j=0; j<=(knum_j-1); j++){
1728 for (k=0; k<=(knum_k-1); k++){
1729 T_knum++;
1730 }
1731 }
1732 }
1733
1734 T_KGrids1 = (double*)malloc(sizeof(double)*T_knum);
1735 T_KGrids2 = (double*)malloc(sizeof(double)*T_knum);
1736 T_KGrids3 = (double*)malloc(sizeof(double)*T_knum);
1737
1738 Ti_KGrids1 = (int*)malloc(sizeof(int)*T_knum);
1739 Tj_KGrids2 = (int*)malloc(sizeof(int)*T_knum);
1740 Tk_KGrids3 = (int*)malloc(sizeof(int)*T_knum);
1741
1742 EIGEN = (double**)malloc(sizeof(double*)*T_knum);
1743 for (j=0; j<T_knum; j++){
1744 EIGEN[j] = (double*)malloc(sizeof(double)*n2);
1745 }
1746
1747 /* set T_KGrid1,2,3 */
1748
1749 T_knum = 0;
1750 for (i=0; i<=(knum_i-1); i++){
1751 for (j=0; j<=(knum_j-1); j++){
1752 for (k=0; k<=(knum_k-1); k++){
1753
1754 T_KGrids1[T_knum] = KGrids1[i];
1755 T_KGrids2[T_knum] = KGrids2[j];
1756 T_KGrids3[T_knum] = KGrids3[k];
1757
1758 Ti_KGrids1[T_knum] = i;
1759 Tj_KGrids2[T_knum] = j;
1760 Tk_KGrids3[T_knum] = k;
1761
1762 T_knum++;
1763 }
1764 }
1765 }
1766
1767 /* allocate k-points into proccessors */
1768
1769 if (T_knum<=myid){
1770 S_knum = -10;
1771 E_knum = -100;
1772 num_kloop0 = 1;
1773 }
1774 else if (T_knum<numprocs) {
1775 S_knum = myid;
1776 E_knum = myid;
1777 num_kloop0 = 1;
1778 }
1779 else {
1780 tmp = (double)T_knum/(double)numprocs;
1781 num_kloop0 = (int)tmp + 1;
1782
1783 S_knum = (int)((double)myid*(tmp+0.0001));
1784 E_knum = (int)((double)(myid+1)*(tmp+0.0001)) - 1;
1785 if (myid==(numprocs-1)) E_knum = T_knum - 1;
1786 if (E_knum<0) E_knum = 0;
1787 }
1788
1789 factor = 1.0/(double)(knum_i * knum_j * knum_k); /* normalization factor */
1790 pi2 = sqrt(PI);
1791
1792 /* for kloop */
1793
1794 for (kloop0=0; kloop0<num_kloop0; kloop0++){
1795
1796 kloop = kloop0 + S_knum;
1797 arpo[myid] = -1;
1798 if (S_knum<=kloop && kloop<=E_knum) arpo[myid] = kloop;
1799 for (ID=0; ID<numprocs; ID++){
1800 MPI_Bcast(&arpo[ID], 1, MPI_INT, ID, mpi_comm_level1);
1801 }
1802
1803 /* set S */
1804
1805 for (ID=0; ID<numprocs; ID++){
1806
1807 kloop = arpo[ID];
1808
1809 if (0<=kloop){
1810
1811 k1 = T_KGrids1[kloop];
1812 k2 = T_KGrids2[kloop];
1813 k3 = T_KGrids3[kloop];
1814
1815 Overlap_Band(ID,CntOLP,TmpM,MP,k1,k2,k3);
1816 n = TmpM[0][0].r;
1817
1818 if (myid==ID){
1819 for (i1=1; i1<=n; i1++){
1820 for (j1=1; j1<=n; j1++){
1821 S[i1][j1].r = TmpM[i1][j1].r;
1822 S[i1][j1].i = TmpM[i1][j1].i;
1823
1824 TmpS[i1][j1].r = TmpM[i1][j1].r;
1825 TmpS[i1][j1].i = TmpM[i1][j1].i;
1826 }
1827 }
1828 }
1829
1830 }
1831 }
1832
1833 kloop = arpo[myid];
1834
1835 if (0<=kloop){
1836
1837 EigenBand_lapack(S,ko,n,n,1);
1838
1839 if (2<=level_stdout && 0<=kloop){
1840 printf(" kloop %2d k1 k2 k3 %10.6f %10.6f %10.6f\n",
1841 kloop,T_KGrids1[kloop],T_KGrids2[kloop],T_KGrids3[kloop]);
1842 for (i1=1; i1<=n; i1++){
1843 printf(" Eigenvalues of OLP %2d %15.12f\n",i1,ko[i1]);
1844 }
1845 }
1846
1847 /* minus eigenvalues to 1.0e-14 */
1848 for (l=1; l<=n; l++){
1849 if (ko[l]<0.0){
1850 ko[l] = 1.0e-14;
1851
1852 if (2<=level_stdout){
1853 printf("found an eigenvalue smaller than %10.8f of OLP kloop=%2d\n",
1854 Threshold_OLP_Eigen,kloop);
1855 }
1856 }
1857
1858 koS[l] = ko[l];
1859 }
1860
1861 /* calculate S*1/sqrt(ko) */
1862
1863 for (l=1; l<=n; l++) M1[l] = 1.0/sqrt(ko[l]);
1864
1865 /* S * M1 */
1866
1867 for (i1=1; i1<=n; i1++){
1868 for (j1=1; j1<=n; j1++){
1869 S[i1][j1].r = S[i1][j1].r*M1[j1];
1870 S[i1][j1].i = S[i1][j1].i*M1[j1];
1871 }
1872 }
1873
1874 }
1875
1876 /****************************************************
1877 make a full Hamiltonian matrix
1878 ****************************************************/
1879
1880 for (ID=0; ID<numprocs; ID++){
1881
1882 kloop = arpo[ID];
1883
1884 if (0<=kloop){
1885 k1 = T_KGrids1[kloop];
1886 k2 = T_KGrids2[kloop];
1887 k3 = T_KGrids3[kloop];
1888
1889 /* non-spin-orbit coupling and non-LDA+U */
1890 if (SO_switch==0 && Hub_U_switch==0 && Constraint_NCS_switch==0
1891 && Zeeman_NCS_switch==0 && Zeeman_NCO_switch==0)
1892 Hamiltonian_Band_NC(ID, nh, Dummy_ImNL, TmpM, MP, k1, k2, k3);
1893
1894 /* spin-orbit coupling or LDA+U */
1895 else
1896 Hamiltonian_Band_NC(ID, nh, ImNL, TmpM, MP, k1, k2, k3);
1897
1898 if (myid==ID){
1899 for (i1=1; i1<=2*n; i1++){
1900 for (j1=1; j1<=2*n; j1++){
1901 H[i1][j1].r = TmpM[i1][j1].r;
1902 H[i1][j1].i = TmpM[i1][j1].i;
1903 }
1904 }
1905 }
1906 }
1907 }
1908
1909 kloop = arpo[myid];
1910
1911 if (0<=kloop){
1912
1913 /****************************************************
1914 M1 * U^t * H * U * M1
1915 ****************************************************/
1916
1917 /* transpose S */
1918
1919 for (i1=1; i1<=n; i1++){
1920 for (j1=i1+1; j1<=n; j1++){
1921 Ctmp1 = S[i1][j1];
1922 Ctmp2 = S[j1][i1];
1923 S[i1][j1] = Ctmp2;
1924 S[j1][i1] = Ctmp1;
1925 }
1926 }
1927
1928 /* H * U * M1 */
1929
1930 #pragma omp parallel shared(C,S,H,n) private(OMPID,Nthrds,Nprocs,i1,jj1,j1,sum_r0,sum_i0,sum_r1,sum_i1,l)
1931 {
1932 /* get info. on OpenMP */
1933
1934 OMPID = omp_get_thread_num();
1935 Nthrds = omp_get_num_threads();
1936 Nprocs = omp_get_num_procs();
1937
1938 for (i1=1+OMPID; i1<=2*n; i1+=Nthrds){
1939
1940 jj1 = 1;
1941 for (j1=1; j1<=n; j1++){
1942
1943 sum_r0 = 0.0;
1944 sum_i0 = 0.0;
1945
1946 sum_r1 = 0.0;
1947 sum_i1 = 0.0;
1948
1949 for (l=1; l<=n; l++){
1950
1951 sum_r0 += H[i1][l ].r*S[j1][l].r - H[i1][l ].i*S[j1][l].i;
1952 sum_i0 += H[i1][l ].r*S[j1][l].i + H[i1][l ].i*S[j1][l].r;
1953
1954 sum_r1 += H[i1][l+n].r*S[j1][l].r - H[i1][l+n].i*S[j1][l].i;
1955 sum_i1 += H[i1][l+n].r*S[j1][l].i + H[i1][l+n].i*S[j1][l].r;
1956 }
1957
1958 C[jj1][i1].r = sum_r0;
1959 C[jj1][i1].i = sum_i0; jj1++;
1960
1961 C[jj1][i1].r = sum_r1;
1962 C[jj1][i1].i = sum_i1; jj1++;
1963
1964 }
1965 }
1966
1967 } /* #pragma omp parallel */
1968
1969 /* M1 * U^+ H * U * M1 */
1970
1971 #pragma omp parallel shared(C,S,H,n) private(OMPID,Nthrds,Nprocs,i1,ii1,j1,sum_r0,sum_i0,sum_r1,sum_i1,l)
1972 {
1973 /* get info. on OpenMP */
1974
1975 OMPID = omp_get_thread_num();
1976 Nthrds = omp_get_num_threads();
1977 Nprocs = omp_get_num_procs();
1978
1979 for (j1=1+OMPID; j1<=2*n; j1+=Nthrds){
1980
1981 ii1 = 1;
1982 for (i1=1; i1<=n; i1++){
1983
1984 sum_r0 = 0.0;
1985 sum_i0 = 0.0;
1986
1987 sum_r1 = 0.0;
1988 sum_i1 = 0.0;
1989
1990 for (l=1; l<=n; l++){
1991
1992 sum_r0 += S[i1][l].r*C[j1][l ].r + S[i1][l].i*C[j1][l ].i;
1993 sum_i0 += S[i1][l].r*C[j1][l ].i - S[i1][l].i*C[j1][l ].r;
1994
1995 sum_r1 += S[i1][l].r*C[j1][l+n].r + S[i1][l].i*C[j1][l+n].i;
1996 sum_i1 += S[i1][l].r*C[j1][l+n].i - S[i1][l].i*C[j1][l+n].r;
1997 }
1998
1999 H[ii1][j1].r = sum_r0;
2000 H[ii1][j1].i = sum_i0; ii1++;
2001
2002 H[ii1][j1].r = sum_r1;
2003 H[ii1][j1].i = sum_i1; ii1++;
2004
2005 }
2006 }
2007
2008 } /* #pragma omp parallel */
2009
2010 /* H to C */
2011
2012 for (i1=1; i1<=2*n; i1++){
2013 for (j1=1; j1<=2*n; j1++){
2014 C[i1][j1].r = H[i1][j1].r;
2015 C[i1][j1].i = H[i1][j1].i;
2016 }
2017 }
2018
2019 /* penalty for ill-conditioning states */
2020
2021 EV_cut0 = Threshold_OLP_Eigen;
2022
2023 for (i1=1; i1<=n; i1++){
2024
2025 if (koS[i1]<EV_cut0){
2026 C[2*i1-1][2*i1-1].r += pow((koS[i1]/EV_cut0),-2.0) - 1.0;
2027 C[2*i1 ][2*i1 ].r += pow((koS[i1]/EV_cut0),-2.0) - 1.0;
2028 }
2029
2030 /* cutoff the interaction between the ill-conditioned state */
2031
2032 if (1.0e+3<C[2*i1-1][2*i1-1].r){
2033 for (j1=1; j1<=2*n; j1++){
2034 C[2*i1-1][j1 ] = Complex(0.0,0.0);
2035 C[j1 ][2*i1-1] = Complex(0.0,0.0);
2036 C[2*i1 ][j1 ] = Complex(0.0,0.0);
2037 C[j1 ][2*i1 ] = Complex(0.0,0.0);
2038 }
2039 C[2*i1-1][2*i1-1] = Complex(1.0e+4,0.0);
2040 C[2*i1 ][2*i1 ] = Complex(1.0e+4,0.0);
2041 }
2042 }
2043
2044 /* solve eigenvalue problem */
2045
2046 n1 = 2*n;
2047 EigenBand_lapack(C,ko,n1,n1,1);
2048
2049 for (i1=1; i1<=2*n; i1++) EIGEN[kloop][i1] = ko[i1];
2050
2051 /****************************************************
2052 Transformation to the original eigenvectors.
2053 NOTE JRCAT-244p and JAIST-2122p
2054 C = U * lambda^{-1/2} * D
2055 ****************************************************/
2056
2057 /* transpose */
2058 for (i1=1; i1<=2*n; i1++){
2059 for (j1=i1+1; j1<=2*n; j1++){
2060 Ctmp1 = C[i1][j1];
2061 Ctmp2 = C[j1][i1];
2062 C[i1][j1] = Ctmp2;
2063 C[j1][i1] = Ctmp1;
2064 }
2065 }
2066
2067 /* transpose */
2068 for (i1=1; i1<=n; i1++){
2069 for (j1=i1+1; j1<=n; j1++){
2070 Ctmp1 = S[i1][j1];
2071 Ctmp2 = S[j1][i1];
2072 S[i1][j1] = Ctmp2;
2073 S[j1][i1] = Ctmp1;
2074 }
2075 }
2076
2077 #pragma omp parallel shared(C,S,H,n,n1) private(OMPID,Nthrds,Nprocs,i1,j1,l1,sum_r0,sum_i0,sum_r1,sum_i1,l)
2078 {
2079 /* get info. on OpenMP */
2080
2081 OMPID = omp_get_thread_num();
2082 Nthrds = omp_get_num_threads();
2083 Nprocs = omp_get_num_procs();
2084
2085 for (j1=1+OMPID; j1<=n1; j1+=Nthrds){
2086 for (i1=1; i1<=n; i1++){
2087
2088 sum_r0 = 0.0;
2089 sum_i0 = 0.0;
2090
2091 sum_r1 = 0.0;
2092 sum_i1 = 0.0;
2093
2094 l1 = 1;
2095 for (l=1; l<=n; l++){
2096
2097 sum_r0 += S[i1][l].r*C[j1][l1].r
2098 -S[i1][l].i*C[j1][l1].i;
2099 sum_i0 += S[i1][l].r*C[j1][l1].i
2100 +S[i1][l].i*C[j1][l1].r; l1++;
2101
2102 sum_r1 += S[i1][l].r*C[j1][l1].r
2103 -S[i1][l].i*C[j1][l1].i;
2104 sum_i1 += S[i1][l].r*C[j1][l1].i
2105 +S[i1][l].i*C[j1][l1].r; l1++;
2106 }
2107
2108 H[j1][i1 ].r = sum_r0;
2109 H[j1][i1 ].i = sum_i0;
2110
2111 H[j1][i1+n].r = sum_r1;
2112 H[j1][i1+n].i = sum_i1;
2113 }
2114 }
2115
2116 } /* #pragma omp parallel */
2117
2118 } /* if (0<=kloop) */
2119
2120 /****************************************************
2121 store LDOS
2122 ****************************************************/
2123
2124 kloop = arpo[myid];
2125
2126 if (0<=kloop){
2127
2128 k1 = T_KGrids1[kloop];
2129 k2 = T_KGrids2[kloop];
2130 k3 = T_KGrids3[kloop];
2131
2132 i = Ti_KGrids1[kloop];
2133 j = Tj_KGrids2[kloop];
2134 k = Tk_KGrids3[kloop];
2135
2136 for (l=1; l<=2*n; l++){
2137
2138 /* calculate SD */
2139
2140 #pragma omp parallel shared(TmpS,SD,H,l,MP,Spe_Total_CNO,WhatSpecies,atomnum) private(OMPID,Nthrds,Nprocs,GA_AN,wanA,tnoA,Anum,theta,phi,sit,cot,sip,cop,d0,d1,d2,d3,GB_AN,wanB,tnoB,Bnum,i1,j1,u2,v2,uv,vu,Redum,Imdum,tmp1,tmp2,tmp3)
2141 {
2142
2143 /* get info. on OpenMP */
2144
2145 OMPID = omp_get_thread_num();
2146 Nthrds = omp_get_num_threads();
2147 Nprocs = omp_get_num_procs();
2148
2149 for (GA_AN=1+OMPID; GA_AN<=atomnum; GA_AN+=Nthrds){
2150
2151 wanA = WhatSpecies[GA_AN];
2152 tnoA = Spe_Total_CNO[wanA];
2153 Anum = MP[GA_AN];
2154 theta = Angle0_Spin[GA_AN];
2155 phi = Angle1_Spin[GA_AN];
2156 sit = sin(theta);
2157 cot = cos(theta);
2158 sip = sin(phi);
2159 cop = cos(phi);
2160
2161 for (i1=0; i1<tnoA; i1++){
2162
2163 d0 = 0.0;
2164 d1 = 0.0;
2165 d2 = 0.0;
2166 d3 = 0.0;
2167
2168 for (GB_AN=1; GB_AN<=atomnum; GB_AN++){
2169
2170 wanB = WhatSpecies[GB_AN];
2171 tnoB = Spe_Total_CNO[wanB];
2172 Bnum = MP[GB_AN];
2173
2174 for (j1=0; j1<tnoB; j1++){
2175
2176 /* Re11 */
2177 u2 = H[l][Anum+i1].r*H[l][Bnum+j1].r;
2178 v2 = H[l][Anum+i1].i*H[l][Bnum+j1].i;
2179 uv = H[l][Anum+i1].r*H[l][Bnum+j1].i;
2180 vu = H[l][Anum+i1].i*H[l][Bnum+j1].r;
2181 Redum = (u2 + v2);
2182 Imdum = (uv - vu);
2183 d0 += Redum*TmpS[Anum+i1][Bnum+j1].r - Imdum*TmpS[Anum+i1][Bnum+j1].i;
2184
2185 /* Re22 */
2186 u2 = H[l][Anum+i1+n].r*H[l][Bnum+j1+n].r;
2187 v2 = H[l][Anum+i1+n].i*H[l][Bnum+j1+n].i;
2188 uv = H[l][Anum+i1+n].r*H[l][Bnum+j1+n].i;
2189 vu = H[l][Anum+i1+n].i*H[l][Bnum+j1+n].r;
2190 Redum = (u2 + v2);
2191 Imdum = (uv - vu);
2192 d1 += Redum*TmpS[Anum+i1][Bnum+j1].r - Imdum*TmpS[Anum+i1][Bnum+j1].i;
2193
2194 /* Re12 */
2195 u2 = H[l][Anum+i1].r*H[l][Bnum+j1+n].r;
2196 v2 = H[l][Anum+i1].i*H[l][Bnum+j1+n].i;
2197 uv = H[l][Anum+i1].r*H[l][Bnum+j1+n].i;
2198 vu = H[l][Anum+i1].i*H[l][Bnum+j1+n].r;
2199 Redum = (u2 + v2);
2200 Imdum = (uv - vu);
2201 d2 += Redum*TmpS[Anum+i1][Bnum+j1].r - Imdum*TmpS[Anum+i1][Bnum+j1].i;
2202
2203 /* Im12
2204 conjugate complex of Im12 due to difference in the definition
2205 between density matrix and charge density
2206 */
2207
2208 d3 += -(Imdum*TmpS[Anum+i1][Bnum+j1].r + Redum*TmpS[Anum+i1][Bnum+j1].i);
2209
2210 } /* j1 */
2211 } /* GB_AN */
2212
2213 tmp1 = 0.5*(d0 + d1);
2214 tmp2 = 0.5*cot*(d0 - d1);
2215 tmp3 = (d2*cop - d3*sip)*sit;
2216
2217 SD[2*(Anum-1)+i1] = tmp1 + tmp2 + tmp3;
2218 SD[2*(Anum-1)+tnoA+i1] = tmp1 - tmp2 - tmp3;
2219
2220 } /* i1 */
2221 } /* GA_AN */
2222
2223 } /* #pragma omp parallel */
2224
2225 /* calculate contribution to Gaussian DOS */
2226
2227 for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
2228
2229 wanA = WhatSpecies[GA_AN];
2230 tnoA = Spe_Total_CNO[wanA];
2231 Anum = MP[GA_AN];
2232
2233 /*************************
2234 exp( -(x/a)^2 )
2235 a= DosGauss_Width
2236 x=-3a : 3a is counted
2237 **************************/
2238
2239 eg = EIGEN[kloop][l];
2240 x = (eg-Dos_Erange_Tmp[0])/(Dos_Erange_Tmp[1]-Dos_Erange_Tmp[0])*(DosGauss_Num_Mesh-1);
2241 iecenter = (int)x ;
2242
2243 iewidth = DosGauss_Width*3.0/(Dos_Erange_Tmp[1]-Dos_Erange_Tmp[0])*(DosGauss_Num_Mesh-1)+3;
2244
2245 iemin = iecenter - iewidth;
2246 iemax = iecenter + iewidth;
2247
2248 if (iemin<0) iemin=0;
2249 if (iemax>=DosGauss_Num_Mesh) iemax=DosGauss_Num_Mesh-1;
2250
2251 if ( 0<=iemin && iemin<DosGauss_Num_Mesh && 0<=iemax && iemax<DosGauss_Num_Mesh ) {
2252
2253 for (ie=iemin;ie<=iemax;ie++) {
2254 xa = (eg-r_energy[ie])/DosGauss_Width;
2255
2256 for (i1=0; i1<tnoA; i1++){
2257 Dos[ie][2*(Anum-1)+i1 ] += (float)(factor*SD[2*(Anum-1)+i1 ]*exp(-xa*xa)/(DosGauss_Width*pi2));
2258 Dos[ie][2*(Anum-1)+tnoA+i1] += (float)(factor*SD[2*(Anum-1)+tnoA+i1]*exp(-xa*xa)/(DosGauss_Width*pi2));
2259 }
2260 }
2261 }
2262
2263 } /* GA_AN */
2264 } /* l */
2265 } /* if (0<=kloop) */
2266
2267 /*********************************************
2268 calculation of partial density matrix
2269 *********************************************/
2270
2271 kloop = arpo[myid];
2272
2273 if (0<=kloop && cal_partial_charge) {
2274
2275 k1 = T_KGrids1[kloop];
2276 k2 = T_KGrids2[kloop];
2277 k3 = T_KGrids3[kloop];
2278
2279 for (l=1; l<=2*n; l++){
2280
2281 x1 = (ko[l] - ChemP)*Beta;
2282 if (x1<=-max_x) x1 = -max_x;
2283 if (max_x<=x1) x1 = max_x;
2284 FermiF1 = 1.0/(1.0 + exp(x1));
2285
2286 x2 = (ko[l] - (ChemP+ene_win_partial_charge))*Beta;
2287 if (x2<=-max_x) x2 = -max_x;
2288 if (max_x<=x2) x2 = max_x;
2289 FermiF2 = 1.0/(1.0 + exp(x2));
2290
2291 diffF = fabs(FermiF1-FermiF2);
2292
2293 for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
2294
2295 wanA = WhatSpecies[GA_AN];
2296 tnoA = Spe_Total_CNO[wanA];
2297 Anum = MP[GA_AN];
2298
2299 for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
2300 GB_AN = natn[GA_AN][LB_AN];
2301 Rn = ncn[GA_AN][LB_AN];
2302 wanB = WhatSpecies[GB_AN];
2303 tnoB = Spe_Total_CNO[wanB];
2304 Bnum = MP[GB_AN];
2305
2306 l1 = atv_ijk[Rn][1];
2307 l2 = atv_ijk[Rn][2];
2308 l3 = atv_ijk[Rn][3];
2309 kRn = k1*(double)l1 + k2*(double)l2 + k3*(double)l3;
2310
2311 si = sin(2.0*PI*kRn);
2312 co = cos(2.0*PI*kRn);
2313
2314 for (i=0; i<tnoA; i++){
2315 for (j=0; j<tnoB; j++){
2316
2317 /* Re11 */
2318 u2 = H[l][Anum+i].r*H[l][Bnum+j].r;
2319 v2 = H[l][Anum+i].i*H[l][Bnum+j].i;
2320 uv = H[l][Anum+i].r*H[l][Bnum+j].i;
2321 vu = H[l][Anum+i].i*H[l][Bnum+j].r;
2322 tmp = co*(u2+v2) - si*(uv-vu);
2323 PDM[0][GA_AN][LB_AN][i][j] += diffF*tmp;
2324
2325 /* Re22 */
2326 u2 = H[l][Anum+i+n].r*H[l][Bnum+j+n].r;
2327 v2 = H[l][Anum+i+n].i*H[l][Bnum+j+n].i;
2328 uv = H[l][Anum+i+n].r*H[l][Bnum+j+n].i;
2329 vu = H[l][Anum+i+n].i*H[l][Bnum+j+n].r;
2330 tmp = co*(u2+v2) - si*(uv-vu);
2331 PDM[1][GA_AN][LB_AN][i][j] += diffF*tmp;
2332
2333 }
2334 }
2335 }
2336 }
2337 }
2338 }
2339
2340 /****************************************************
2341 MPI: EIGEN
2342 ****************************************************/
2343
2344 for (ID=0; ID<numprocs; ID++){
2345
2346 kloop = arpo[ID];
2347
2348 if (0<=kloop){
2349 MPI_Bcast(&EIGEN[kloop][0], 2*n+1, MPI_DOUBLE, ID, mpi_comm_level1);
2350 }
2351 }
2352
2353 } /* kloop0 */
2354
2355 /****************************************************
2356 MPI: PDM
2357 ****************************************************/
2358
2359 if (cal_partial_charge) {
2360
2361 /* MPI: PDM */
2362
2363 for (spin=0; spin<=1; spin++){
2364 for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
2365
2366 wanA = WhatSpecies[GA_AN];
2367 tnoA = Spe_Total_CNO[wanA];
2368
2369 for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
2370
2371 GB_AN = natn[GA_AN][LB_AN];
2372 wanB = WhatSpecies[GB_AN];
2373 tnoB = Spe_Total_CNO[wanB];
2374
2375 for (i=0; i<tnoA; i++){
2376
2377 MPI_Allreduce(&PDM[spin][GA_AN][LB_AN][i][0],
2378 &TPDM[spin][GA_AN][LB_AN][i][0],
2379 tnoB, MPI_DOUBLE, MPI_SUM,
2380 mpi_comm_level1);
2381
2382 for (j=0; j<tnoB; j++){
2383 TPDM[spin][GA_AN][LB_AN][i][j] /= (double)T_knum;
2384 }
2385 }
2386 }
2387 }
2388 }
2389
2390 /* store TPDM to Partial_DM */
2391
2392 for (spin=0; spin<=1; spin++){
2393 for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
2394
2395 MA_AN = F_G2M[GA_AN];
2396 wanA = WhatSpecies[GA_AN];
2397 tnoA = Spe_Total_CNO[wanA];
2398
2399 if (1<=MA_AN && MA_AN<=Matomnum){
2400
2401 for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
2402
2403 GB_AN = natn[GA_AN][LB_AN];
2404 wanB = WhatSpecies[GB_AN];
2405 tnoB = Spe_Total_CNO[wanB];
2406
2407 for (i=0; i<tnoA; i++){
2408 for (j=0; j<tnoB; j++){
2409 Partial_DM[spin][MA_AN][LB_AN][i][j] = TPDM[spin][GA_AN][LB_AN][i][j];
2410 }
2411 }
2412 }
2413 }
2414 }
2415 }
2416 }
2417
2418
2419
2420
2421
2422
2423 /****************************************************************
2424 write eigenvalues and eigenvectors
2425 ****************************************************************/
2426
2427 if (myid==Host_ID){
2428
2429 sprintf(file_eig,"%s%s.Dos.val",filepath,filename);
2430
2431 if ( (fp_eig=fopen(file_eig,"w"))==NULL ) {
2432 printf("can not open a file %s\n",file_eig);
2433 }
2434
2435 else{
2436
2437 /* write *.Dos.val */
2438
2439 printf("write eigenvalues\n");
2440
2441 fprintf(fp_eig,"mode 7\n");
2442 fprintf(fp_eig,"NonCol 1\n");
2443 fprintf(fp_eig,"N %d\n",n);
2444 fprintf(fp_eig,"Nspin %d\n",1); /* switch to 1 */
2445 fprintf(fp_eig,"Erange %lf %lf\n",Dos_Erange[0],Dos_Erange[1]);
2446 fprintf(fp_eig,"Kgrid %d %d %d\n",1,1,1);
2447 fprintf(fp_eig,"atomnum %d\n",atomnum);
2448 fprintf(fp_eig,"<WhatSpecies\n");
2449 for (i=1; i<=atomnum; i++) {
2450 fprintf(fp_eig,"%d ",WhatSpecies[i]);
2451 }
2452 fprintf(fp_eig,"\nWhatSpecies>\n");
2453 fprintf(fp_eig,"SpeciesNum %d\n",SpeciesNum);
2454 fprintf(fp_eig,"<Spe_Total_CNO\n");
2455 for (i=0;i<SpeciesNum;i++) {
2456 fprintf(fp_eig,"%d ",Spe_Total_CNO[i]);
2457 }
2458 fprintf(fp_eig,"\nSpe_Total_CNO>\n");
2459
2460 MaxL = Supported_MaxL;
2461
2462 fprintf(fp_eig,"MaxL %d\n",Supported_MaxL);
2463 fprintf(fp_eig,"<Spe_Num_CBasis\n");
2464 for (i=0;i<SpeciesNum;i++) {
2465 for (l=0;l<=MaxL;l++) {
2466 fprintf(fp_eig,"%d ",Spe_Num_CBasis[i][l]);
2467 }
2468 fprintf(fp_eig,"\n");
2469 }
2470 fprintf(fp_eig,"Spe_Num_CBasis>\n");
2471 fprintf(fp_eig,"ChemP %lf\n",ChemP);
2472
2473 fprintf(fp_eig,"<SpinAngle\n");
2474 for (i=1; i<=atomnum; i++) {
2475 fprintf(fp_eig,"%lf %lf\n",Angle0_Spin[i],Angle1_Spin[i]);
2476 }
2477 fprintf(fp_eig,"SpinAngle>\n");
2478
2479 fprintf(fp_eig,"irange %d %d\n",0,DosGauss_Num_Mesh-1);
2480 fprintf(fp_eig,"<Eigenvalues\n");
2481 for (spin=0; spin<=1; spin++) {
2482 fprintf(fp_eig,"%d %d %d ",0,0,0);
2483
2484 for (ik=0; ik<DosGauss_Num_Mesh; ik++) {
2485 fprintf(fp_eig,"%lf ",r_energy[ik]);
2486 }
2487 fprintf(fp_eig,"\n");
2488 }
2489 fprintf(fp_eig,"Eigenvalues>\n");
2490
2491 fclose(fp_eig);
2492 }
2493
2494 } /* if (myid==Host_ID) */
2495
2496 if (myid==Host_ID){
2497
2498 /* write *.Dos.vec */
2499 printf("write eigenvectors\n");
2500 sprintf(file_ev,"%s%s.Dos.vec",filepath,filename);
2501 if ( (fp_ev=fopen(file_ev,"w"))==NULL ) {
2502 printf("can not open a file %s\n",file_ev);
2503 }
2504 }
2505
2506 for (ik=0; ik<DosGauss_Num_Mesh; ik++) {
2507
2508 i_vec[0] = 0;
2509 i_vec[1] = 0;
2510 i_vec[2] = 0;
2511
2512 MPI_Reduce(&Dos[ik][0], &DosVec[0], 2*n, MPI_FLOAT, MPI_SUM, Host_ID, mpi_comm_level1);
2513
2514 if (myid==Host_ID){
2515 fwrite(i_vec,sizeof(int),3,fp_ev);
2516 fwrite(&DosVec[0],sizeof(float),2*n,fp_ev);
2517 }
2518
2519 }
2520
2521 if (myid==Host_ID){
2522 fclose(fp_ev);
2523 }
2524
2525 /****************************************************
2526 Fermi surface viewable by XCrysDen
2527 ****************************************************/
2528
2529 if (myid==Host_ID){
2530
2531 int ii,jj,kk;
2532 int ***TD2OD;
2533 double x0,y0,z0;
2534 char file_fermi[YOUSO10];
2535 FILE *fp;
2536
2537 /* allocation of TD2OD */
2538
2539 TD2OD = (int***)malloc(sizeof(int**)*knum_i);
2540 for (i=0; i<knum_i; i++){
2541 TD2OD[i] = (int**)malloc(sizeof(int*)*knum_j);
2542 for (j=0; j<knum_j; j++){
2543 TD2OD[i][j] = (int*)malloc(sizeof(int)*knum_k);
2544 for (k=0; k<knum_k; k++) TD2OD[i][j][k] = 0;
2545 }
2546 }
2547
2548 kloop = 0;
2549 for (i=0; i<knum_i; i++){
2550 for (j=0; j<knum_j; j++){
2551 for (k=0; k<knum_k; k++){
2552 TD2OD[i][j][k] = kloop;
2553 kloop++;
2554 }
2555 }
2556 }
2557
2558 /* make bxsf files */
2559
2560 sprintf(file_fermi,"%s%s.FermiSurf.bxsf",filepath,filename);
2561
2562 if ((fp=fopen(file_fermi,"w")) != NULL){
2563
2564 fprintf(fp,"BEGIN_INFO\n");
2565 fprintf(fp,"Fermi Energy: %15.12f\n",ChemP);
2566 fprintf(fp,"END_INFO\n\n");
2567
2568 fprintf(fp,"BEGIN_BLOCK_BANDGRID_3D\n");
2569 fprintf(fp,"comment_line\n");
2570 fprintf(fp,"BEGIN_BANDGRID_3D_simple_example\n");
2571 fprintf(fp,"%d\n",2*n);
2572 fprintf(fp,"%d %d %d\n",knum_i+1,knum_j+1,knum_k+1);
2573
2574 fprintf(fp,"0.0 0.0 0.0\n");
2575 fprintf(fp,"%15.10f %15.10f %15.10f\n",rtv[1][1],rtv[1][2],rtv[1][3]);
2576 fprintf(fp,"%15.10f %15.10f %15.10f\n",rtv[2][1],rtv[2][2],rtv[2][3]);
2577 fprintf(fp,"%15.10f %15.10f %15.10f\n",rtv[3][1],rtv[3][2],rtv[3][3]);
2578
2579 for (i1=1; i1<=2*n; i1++){
2580
2581 fprintf(fp,"BAND: %d\n",i1);
2582
2583 for (i=0; i<=knum_i; i++){
2584 ii = i%knum_i;
2585 for (j=0; j<=knum_j; j++){
2586 jj = j%knum_j;
2587 for (k=0; k<=knum_k; k++){
2588 kk = k%knum_k;
2589 kloop = TD2OD[ii][jj][kk];
2590 fprintf(fp,"%10.5f ",EIGEN[kloop][i1]);
2591 }
2592 fprintf(fp,"\n");
2593 }
2594
2595 if (i!=knum_i) fprintf(fp,"\n");
2596
2597 }
2598 }
2599
2600 fprintf(fp,"END_BANDGRID_3D\n");
2601 fprintf(fp,"END_BLOCK_BANDGRID_3D\n");
2602
2603 fclose(fp);
2604 }
2605 else{
2606 printf("Failure in saving bxsf files.\n");
2607 }
2608
2609 /* free TD2OD */
2610 for (i=0; i<knum_i; i++){
2611 for (j=0; j<knum_j; j++){
2612 free(TD2OD[i][j]);
2613 }
2614 free(TD2OD[i]);
2615 }
2616 free(TD2OD);
2617
2618 } /* if (myid==Host_ID) */
2619
2620 /****************************************************
2621 free arrays
2622 ****************************************************/
2623
2624 free(DosVec);
2625
2626 free(MP);
2627
2628 free(arpo);
2629
2630 free(ko);
2631 free(koS);
2632
2633 for (j=0; j<n2; j++){
2634 free(H[j]);
2635 }
2636 free(H);
2637
2638 for (i=0; i<n2; i++){
2639 free(S[i]);
2640 }
2641 free(S);
2642
2643 free(M1);
2644
2645 for (j=0; j<n2; j++){
2646 free(C[j]);
2647 }
2648 free(C);
2649
2650 free(KGrids1); free(KGrids2);free(KGrids3);
2651
2652 free(SD);
2653
2654 for (j=0; j<n2; j++){
2655 free(TmpM[j]);
2656 }
2657 free(TmpM);
2658
2659 for (j=0; j<n+1; j++){
2660 free(TmpS[j]);
2661 }
2662 free(TmpS);
2663
2664 for (ik=0; ik<DosGauss_Num_Mesh; ik++) {
2665 free(Dos[ik]);
2666 }
2667 free(Dos);
2668
2669 free(r_energy);
2670
2671 /* non-spin-orbit coupling and non-LDA+U */
2672 if (SO_switch==0 && Hub_U_switch==0 && Constraint_NCS_switch==0
2673 && Zeeman_NCS_switch==0 && Zeeman_NCO_switch==0){
2674 free(Dummy_ImNL[0][0][0][0]);
2675 free(Dummy_ImNL[0][0][0]);
2676 free(Dummy_ImNL[0][0]);
2677 free(Dummy_ImNL[0]);
2678 free(Dummy_ImNL);
2679 }
2680
2681 /* partial density matrix */
2682
2683 if (cal_partial_charge){
2684
2685 /* PDM */
2686
2687 for (k=0; k<=1; k++){
2688
2689 FNAN[0] = 0;
2690
2691 for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
2692
2693 if (Gc_AN==0){
2694 tno0 = 1;
2695 }
2696 else{
2697 Cwan = WhatSpecies[Gc_AN];
2698 tno0 = Spe_Total_NO[Cwan];
2699 }
2700
2701 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2702
2703 if (Gc_AN==0){
2704 tno1 = 1;
2705 }
2706 else{
2707 Gh_AN = natn[Gc_AN][h_AN];
2708 Hwan = WhatSpecies[Gh_AN];
2709 tno1 = Spe_Total_NO[Hwan];
2710 }
2711
2712 for (i=0; i<tno0; i++){
2713 free(PDM[k][Gc_AN][h_AN][i]);
2714 }
2715 free(PDM[k][Gc_AN][h_AN]);
2716 }
2717 free(PDM[k][Gc_AN]);
2718 }
2719 free(PDM[k]);
2720 }
2721 free(PDM);
2722
2723 /* TPDM */
2724
2725 for (k=0; k<=1; k++){
2726
2727 FNAN[0] = 0;
2728
2729 for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
2730
2731 if (Gc_AN==0){
2732 tno0 = 1;
2733 }
2734 else{
2735 Cwan = WhatSpecies[Gc_AN];
2736 tno0 = Spe_Total_NO[Cwan];
2737 }
2738
2739 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2740
2741 if (Gc_AN==0){
2742 tno1 = 1;
2743 }
2744 else{
2745 Gh_AN = natn[Gc_AN][h_AN];
2746 Hwan = WhatSpecies[Gh_AN];
2747 tno1 = Spe_Total_NO[Hwan];
2748 }
2749
2750 for (i=0; i<tno0; i++){
2751 free(TPDM[k][Gc_AN][h_AN][i]);
2752 }
2753 free(TPDM[k][Gc_AN][h_AN]);
2754 }
2755 free(TPDM[k][Gc_AN]);
2756 }
2757 free(TPDM[k]);
2758 }
2759 free(TPDM);
2760 }
2761
2762 free(T_KGrids1);
2763 free(T_KGrids2);
2764 free(T_KGrids3);
2765
2766 free(Ti_KGrids1);
2767 free(Tj_KGrids2);
2768 free(Tk_KGrids3);
2769
2770 for (j=0; j<T_knum; j++){
2771 free(EIGEN[j]);
2772 }
2773 free(EIGEN);
2774
2775 /* for elapsed time */
2776
2777 dtime(&TEtime);
2778 time0 = TEtime - TStime;
2779 return time0;
2780 }
2781
2782
2783
2784
2785
2786
2787
Band_DFT_Dosout_Col(int knum_i,int knum_j,int knum_k,int SpinP_switch,double ***** nh,double ***** ImNL,double **** CntOLP)2788 static double Band_DFT_Dosout_Col(
2789 int knum_i, int knum_j, int knum_k,
2790 int SpinP_switch,
2791 double *****nh,
2792 double *****ImNL,
2793 double ****CntOLP)
2794 {
2795 int i,j,k,spin,l,i1,j1,n1;
2796 int n, wanA;
2797 int *MP;
2798 int MA_AN, GA_AN, tnoA,Anum, LB_AN;
2799 int GB_AN, wanB, tnoB, Bnum, RnB;
2800 int l1,l2,l3,Rn;
2801 int kloop,kloop0,S_knum,E_knum,T_knum,num_kloop0;
2802 int ie,iemin,iemax,iemin0,iemax0,n1min;
2803 int MaxL,e1,s1;
2804 int h_AN,tno0,tno1,Gh_AN,Hwan,Gc_AN,Cwan;
2805 int i_vec[10];
2806 double sum,sumi,u2,v2,uv,vu,tmp;
2807 double FermiF1,FermiF2,x1,x2,diffF,max_x=50.0;
2808 double kRn,si,co,Redum,Imdum,Redum2,Resum;
2809 double TStime,TEtime,time0;
2810 double OLP_eigen_cut=Threshold_OLP_Eigen;
2811 double EV_cut0;
2812 double *****PDM;
2813 double *****TPDM;
2814 double *koS;
2815 double **ko; int N_ko, i_ko[10];
2816 dcomplex **H; int N_H, i_H[10];
2817 dcomplex **S; int N_S, i_S[10];
2818 double *M1,***EIGEN;
2819 dcomplex **C; int N_C, i_C[10];
2820 double *KGrids1, *KGrids2, *KGrids3;
2821 float *SD; int N_SD, i_SD[10];
2822 dcomplex ***H2,**TmpM,**TmpS;
2823 double *T_KGrids1,*T_KGrids2,*T_KGrids3;
2824 int *Ti_KGrids1,*Tj_KGrids2,*Tk_KGrids3,*arpo;
2825 int *num_allocated_k;
2826 dcomplex Ctmp1,Ctmp2;
2827 double ****Dummy_ImNL;
2828 double snum_i, snum_j, snum_k;
2829 double k1,k2,k3;
2830 char file_ev[YOUSO10],file_eig[YOUSO10];
2831 char file_ev0[YOUSO10];
2832 FILE *fp_ev0,*fp_ev,*fp_eig;
2833 char buf1[fp_bsize]; /* setvbuf */
2834 char buf2[fp_bsize]; /* setvbuf */
2835 int numprocs,myid,ID,ID1,ID2,tag;
2836
2837 /* for OpenMP */
2838 int OMPID,Nthrds,Nprocs;
2839
2840 MPI_Status stat;
2841 MPI_Request request;
2842
2843 /* MPI */
2844 MPI_Comm_size(mpi_comm_level1,&numprocs);
2845 MPI_Comm_rank(mpi_comm_level1,&myid);
2846 MPI_Barrier(mpi_comm_level1);
2847
2848 if (myid==Host_ID && 0<level_stdout){
2849 printf("\n<Band_DFT_Dosout>\n"); fflush(stdout);
2850 }
2851
2852 dtime(&TStime);
2853
2854 /****************************************************
2855 calculation of the array size
2856 ****************************************************/
2857
2858 n = 0;
2859 for (i=1; i<=atomnum; i++){
2860 wanA = WhatSpecies[i];
2861 n += Spe_Total_CNO[wanA];
2862 }
2863
2864 /****************************************************
2865 Allocation of arrays
2866 ****************************************************/
2867
2868 MP = (int*)malloc(sizeof(int)*List_YOUSO[1]);
2869 arpo = (int*)malloc(sizeof(int)*numprocs);
2870
2871 num_allocated_k = (int*)malloc(sizeof(int)*numprocs);
2872
2873 N_ko=2; i_ko[0]=List_YOUSO[23]; i_ko[1]=n+1;
2874 ko=(double**)malloc_multidimarray("double",N_ko, i_ko);
2875
2876 koS = (double*)malloc(sizeof(double)*(n+1));
2877
2878 N_H=2; i_H[0]=i_H[1]=n+1;
2879 H=(dcomplex**)malloc_multidimarray("dcomplex",N_H, i_H);
2880
2881 N_S=2; i_S[0]=i_S[1]=n+1;
2882 S=(dcomplex**)malloc_multidimarray("dcomplex",N_S, i_S);
2883
2884 M1 = (double*)malloc(sizeof(double)*(n+1));
2885
2886 N_C=2; i_C[0]=i_C[1]=n+1;
2887 C=(dcomplex**)malloc_multidimarray("dcomplex",N_C, i_C);
2888
2889 KGrids1 = (double*)malloc(sizeof(double)*knum_i);
2890 KGrids2 = (double*)malloc(sizeof(double)*knum_j);
2891 KGrids3 = (double*)malloc(sizeof(double)*knum_k);
2892
2893 SD=(float*)malloc(sizeof(float)*(atomnum+1)*List_YOUSO[7]);
2894
2895 TmpM = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
2896 for (j=0; j<n+1; j++){
2897 TmpM[j] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
2898 }
2899
2900 TmpS = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
2901 for (j=0; j<n+1; j++){
2902 TmpS[j] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
2903 }
2904
2905 H2 = (dcomplex***)malloc(sizeof(dcomplex**)*List_YOUSO[23]);
2906 for (i=0; i<List_YOUSO[23]; i++){
2907 H2[i] = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
2908 for (j=0; j<n+1; j++){
2909 H2[i][j] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
2910 }
2911 }
2912
2913 /* for calculation partial density matrix */
2914
2915 if (cal_partial_charge){
2916
2917 PDM = (double*****)malloc(sizeof(double****)*2);
2918 for (k=0; k<=1; k++){
2919
2920 PDM[k] = (double****)malloc(sizeof(double***)*(atomnum+1));
2921 FNAN[0] = 0;
2922
2923 for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
2924
2925 if (Gc_AN==0){
2926 tno0 = 1;
2927 }
2928 else{
2929 Cwan = WhatSpecies[Gc_AN];
2930 tno0 = Spe_Total_NO[Cwan];
2931 }
2932
2933 PDM[k][Gc_AN] = (double***)malloc(sizeof(double**)*(FNAN[Gc_AN]+1));
2934 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2935
2936 if (Gc_AN==0){
2937 tno1 = 1;
2938 }
2939 else{
2940 Gh_AN = natn[Gc_AN][h_AN];
2941 Hwan = WhatSpecies[Gh_AN];
2942 tno1 = Spe_Total_NO[Hwan];
2943 }
2944
2945 PDM[k][Gc_AN][h_AN] = (double**)malloc(sizeof(double*)*tno0);
2946 for (i=0; i<tno0; i++){
2947 PDM[k][Gc_AN][h_AN][i] = (double*)malloc(sizeof(double)*tno1);
2948 for (j=0; j<tno1; j++) PDM[k][Gc_AN][h_AN][i][j] = 0.0;
2949 }
2950 }
2951 }
2952 }
2953
2954 TPDM = (double*****)malloc(sizeof(double****)*2);
2955 for (k=0; k<=1; k++){
2956
2957 TPDM[k] = (double****)malloc(sizeof(double***)*(atomnum+1));
2958 FNAN[0] = 0;
2959
2960 for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
2961
2962 if (Gc_AN==0){
2963 tno0 = 1;
2964 }
2965 else{
2966 Cwan = WhatSpecies[Gc_AN];
2967 tno0 = Spe_Total_NO[Cwan];
2968 }
2969
2970 TPDM[k][Gc_AN] = (double***)malloc(sizeof(double**)*(FNAN[Gc_AN]+1));
2971 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2972
2973 if (Gc_AN==0){
2974 tno1 = 1;
2975 }
2976 else{
2977 Gh_AN = natn[Gc_AN][h_AN];
2978 Hwan = WhatSpecies[Gh_AN];
2979 tno1 = Spe_Total_NO[Hwan];
2980 }
2981
2982 TPDM[k][Gc_AN][h_AN] = (double**)malloc(sizeof(double*)*tno0);
2983 for (i=0; i<tno0; i++){
2984 TPDM[k][Gc_AN][h_AN][i] = (double*)malloc(sizeof(double)*tno1);
2985 for (j=0; j<tno1; j++) TPDM[k][Gc_AN][h_AN][i][j] = 0.0;
2986 }
2987 }
2988 }
2989 }
2990 }
2991
2992 /* no spin-orbit coupling */
2993 if (SO_switch==0){
2994 Dummy_ImNL = (double****)malloc(sizeof(double***)*1);
2995 Dummy_ImNL[0] = (double***)malloc(sizeof(double**)*1);
2996 Dummy_ImNL[0][0] = (double**)malloc(sizeof(double*)*1);
2997 Dummy_ImNL[0][0][0] = (double*)malloc(sizeof(double)*1);
2998 }
2999
3000 snum_i = knum_i;
3001 snum_j = knum_j;
3002 snum_k = knum_k;
3003
3004 /* set up k-grids */
3005 for (i=0; i<knum_i; i++){
3006 k1 = (double)i/snum_i + Shift_K_Point;
3007 KGrids1[i]=k1;
3008 }
3009 for (i=0; i<knum_j; i++){
3010 k1 = (double)i/snum_j - Shift_K_Point;
3011 KGrids2[i]=k1;
3012 }
3013 for (i=0; i<knum_k; i++){
3014 k1 = (double)i/snum_k + 2.0*Shift_K_Point;
3015 KGrids3[i]=k1;
3016 }
3017
3018 if (myid==Host_ID && 0<level_stdout){
3019 printf(" KGrids1: ");
3020 for (i=0;i<=knum_i-1;i++) printf("%f ",KGrids1[i]);
3021 printf("\n");
3022 printf(" KGrids2: ");
3023 for (i=0;i<=knum_j-1;i++) printf("%f ",KGrids2[i]);
3024 printf("\n");
3025 printf(" KGrids3: ");
3026 for (i=0;i<=knum_k-1;i++) printf("%f ",KGrids3[i]);
3027 printf("\n");
3028 }
3029
3030 /***********************************
3031 one-dimentionalize for MPI
3032 ************************************/
3033
3034 T_knum = 0;
3035 for (i=0; i<=(knum_i-1); i++){
3036 for (j=0; j<=(knum_j-1); j++){
3037 for (k=0; k<=(knum_k-1); k++){
3038 T_knum++;
3039 }
3040 }
3041 }
3042
3043 T_KGrids1 = (double*)malloc(sizeof(double)*T_knum);
3044 T_KGrids2 = (double*)malloc(sizeof(double)*T_knum);
3045 T_KGrids3 = (double*)malloc(sizeof(double)*T_knum);
3046
3047 Ti_KGrids1 = (int*)malloc(sizeof(int)*T_knum);
3048 Tj_KGrids2 = (int*)malloc(sizeof(int)*T_knum);
3049 Tk_KGrids3 = (int*)malloc(sizeof(int)*T_knum);
3050
3051 EIGEN = (double***)malloc(sizeof(double**)*List_YOUSO[23]);
3052 for (i=0; i<List_YOUSO[23]; i++){
3053 EIGEN[i] = (double**)malloc(sizeof(double*)*T_knum);
3054 for (j=0; j<T_knum; j++){
3055 EIGEN[i][j] = (double*)malloc(sizeof(double)*(n+1));
3056 }
3057 }
3058
3059 /* set T_KGrid1,2,3 */
3060
3061 T_knum = 0;
3062 for (i=0; i<=(knum_i-1); i++){
3063 for (j=0; j<=(knum_j-1); j++){
3064 for (k=0; k<=(knum_k-1); k++){
3065
3066 T_KGrids1[T_knum] = KGrids1[i];
3067 T_KGrids2[T_knum] = KGrids2[j];
3068 T_KGrids3[T_knum] = KGrids3[k];
3069
3070 Ti_KGrids1[T_knum] = i;
3071 Tj_KGrids2[T_knum] = j;
3072 Tk_KGrids3[T_knum] = k;
3073
3074 T_knum++;
3075 }
3076 }
3077 }
3078
3079 /* allocate k-points into proccessors */
3080
3081 if (T_knum<=myid){
3082 S_knum = -10;
3083 E_knum = -100;
3084 num_kloop0 = 1;
3085 num_allocated_k[myid] = 0;
3086 }
3087 else if (T_knum<numprocs) {
3088 S_knum = myid;
3089 E_knum = myid;
3090 num_kloop0 = 1;
3091 num_allocated_k[myid] = 1;
3092 }
3093 else {
3094 tmp = (double)T_knum/(double)numprocs;
3095 num_kloop0 = (int)tmp + 1;
3096
3097 S_knum = (int)((double)myid*(tmp+0.0001));
3098 E_knum = (int)((double)(myid+1)*(tmp+0.0001)) - 1;
3099 if (myid==(numprocs-1)) E_knum = T_knum - 1;
3100 if (E_knum<0) E_knum = 0;
3101 num_allocated_k[myid] = E_knum - S_knum + 1;
3102 }
3103
3104 for (ID=0; ID<numprocs; ID++){
3105 MPI_Bcast(&num_allocated_k[ID], 1, MPI_INT, ID, mpi_comm_level1);
3106 }
3107
3108 /****************************************************************
3109 find iemin and iemax
3110 *****************************************************************/
3111
3112 iemin=n; iemax=1; n1min=1;
3113
3114 k1 = 0.0;
3115 k2 = 0.0;
3116 k3 = 0.0;
3117
3118 Overlap_Band(Host_ID,CntOLP,S,MP,k1,k2,k3);
3119
3120 if (myid==Host_ID){
3121
3122 n = S[0][0].r;
3123 EigenBand_lapack(S,ko[0],n,n,1);
3124
3125 if (3<=level_stdout){
3126 printf(" k1 k2 k3 %10.6f %10.6f %10.6f\n",k1,k2,k3);
3127 for (i1=1; i1<=n; i1++){
3128 printf(" Eigenvalues of OLP %2d %15.12f\n",i1,ko[0][i1]);
3129 }
3130 }
3131
3132 /* minus eigenvalues to 1.0e-14 */
3133
3134 for (l=1; l<=n; l++){
3135 if (ko[0][l]<0.0) ko[0][l] = 1.0e-14;
3136 koS[l] = ko[0][l];
3137 }
3138
3139 /* calculate S*1/sqrt(ko) */
3140
3141 for (l=1; l<=n; l++) M1[l] = 1.0/sqrt(ko[0][l]);
3142
3143 /* S * M1 */
3144
3145 for (i1=1; i1<=n; i1++){
3146 for (j1=1; j1<=n; j1++){
3147 S[i1][j1].r = S[i1][j1].r*M1[j1];
3148 S[i1][j1].i = S[i1][j1].i*M1[j1];
3149 }
3150 }
3151
3152 } /* if (myid==Host_ID) */
3153
3154 spin = 0;
3155
3156 /****************************************************
3157 make a full Hamiltonian matrix
3158 ****************************************************/
3159
3160 Hamiltonian_Band(Host_ID, nh[spin], H, MP, k1, k2, k3);
3161
3162 if (myid==Host_ID){
3163
3164 /****************************************************
3165 M1 * U^t * H * U * M1
3166 ****************************************************/
3167
3168 /* transpose S */
3169
3170 if (spin==0){
3171 for (i1=1; i1<=n; i1++){
3172 for (j1=i1+1; j1<=n; j1++){
3173 Ctmp1 = S[i1][j1];
3174 Ctmp2 = S[j1][i1];
3175 S[i1][j1] = Ctmp2;
3176 S[j1][i1] = Ctmp1;
3177 }
3178 }
3179 }
3180
3181 /* H * U * M1 */
3182
3183 for (i1=1; i1<=n; i1++){
3184 for (j1=1; j1<=n; j1++){
3185 sum = 0.0; sumi=0.0;
3186 for (l=1; l<=n; l++){
3187 sum += H[i1][l].r*S[j1][l].r - H[i1][l].i*S[j1][l].i;
3188 sumi += H[i1][l].r*S[j1][l].i + H[i1][l].i*S[j1][l].r;
3189 }
3190
3191 C[j1][i1].r = sum;
3192 C[j1][i1].i = sumi;
3193 }
3194 }
3195
3196 /* M1 * U^+ H * U * M1 */
3197
3198 for (i1=1; i1<=n; i1++){
3199 for (j1=1; j1<=n; j1++){
3200 sum = 0.0;
3201 sumi=0.0;
3202 for (l=1; l<=n; l++){
3203 sum += S[i1][l].r*C[j1][l].r + S[i1][l].i*C[j1][l].i;
3204 sumi += S[i1][l].r*C[j1][l].i - S[i1][l].i*C[j1][l].r;
3205 }
3206 H[i1][j1].r = sum;
3207 H[i1][j1].i = sumi;
3208 }
3209 }
3210
3211 /* H to C */
3212
3213 for (i1=1; i1<=n; i1++){
3214 for (j1=1; j1<=n; j1++){
3215 C[i1][j1] = H[i1][j1];
3216 }
3217 }
3218
3219 /* penalty for ill-conditioning states */
3220
3221 EV_cut0 = Threshold_OLP_Eigen;
3222
3223 for (i1=1; i1<=n; i1++){
3224
3225 if (koS[i1]<EV_cut0){
3226 C[i1][i1].r += pow((koS[i1]/EV_cut0),-2.0) - 1.0;
3227 }
3228
3229 /* cutoff the interaction between the ill-conditioned state */
3230
3231 if (1.0e+3<C[i1][i1].r){
3232 for (j1=1; j1<=n; j1++){
3233 C[i1][j1] = Complex(0.0,0.0);
3234 C[j1][i1] = Complex(0.0,0.0);
3235 }
3236 C[i1][i1].r = 1.0e+4;
3237 }
3238 }
3239
3240 /* solve eigenvalue problem */
3241
3242 n1 = n;
3243 EigenBand_lapack(C,ko[spin],n1,n1,1);
3244
3245 if (n1min<n1) n1min=n1;
3246
3247 iemin0=1;
3248 for (i1=1;i1<=n1;i1++) {
3249 if (ko[spin][i1]>(ChemP+Dos_Erange[0]) ) {
3250 iemin0=i1-1;
3251 break;
3252 }
3253 }
3254 if (iemin0<1) iemin0=1;
3255
3256 iemax0=n1;
3257 for (i1=iemin0;i1<=n1;i1++) {
3258 if (ko[spin][i1]>(ChemP+Dos_Erange[1]) ) {
3259 iemax0=i1;
3260 break;
3261 }
3262 }
3263 if (iemax0>n1) iemax0=n1;
3264
3265 if (iemin>iemin0) iemin=iemin0;
3266 if (iemax<iemax0) iemax=iemax0;
3267
3268 } /* if (myid==Host_ID) */
3269
3270 /* add a buffer to iemin and iemax */
3271
3272 iemin -= 5;
3273 iemax += 5;
3274
3275 if (iemin<1) iemin = 1;
3276 if (n<iemax) iemax = n;
3277
3278 /* partial density matrix for STM simulation */
3279 if (cal_partial_charge){
3280 iemin = 1;
3281 iemax = n;
3282 }
3283
3284 if (myid==Host_ID){
3285 if (n1min<iemax) iemax=n1min;
3286 printf(" iemin and iemax= %d %d\n",iemin,iemax);
3287 }
3288
3289 /* MPI, iemin, iemax */
3290 MPI_Barrier(mpi_comm_level1);
3291 MPI_Bcast(&iemin, 1, MPI_INT, Host_ID, mpi_comm_level1);
3292 MPI_Bcast(&iemax, 1, MPI_INT, Host_ID, mpi_comm_level1);
3293
3294 /****************************************************************
3295 eigenvalues and eigenvectors
3296 ****************************************************************/
3297
3298 if (myid==Host_ID){
3299
3300 sprintf(file_eig,"%s%s.Dos.val",filepath,filename);
3301 if ( (fp_eig=fopen(file_eig,"w"))==NULL ) {
3302
3303 #ifdef xt3
3304 setvbuf(fp_eig,buf1,_IOFBF,fp_bsize); /* setvbuf */
3305 #endif
3306
3307 printf("can not open a file %s\n",file_eig);
3308 }
3309
3310 if ( fp_eig==NULL ) {
3311 goto Finishing;
3312 }
3313 }
3314
3315 sprintf(file_ev,"%s%s.Dos.vec%d",filepath,filename,myid);
3316 if ( (fp_ev=fopen(file_ev,"w"))==NULL ) {
3317 printf("cannot open a file %s\n",file_ev);
3318 }
3319 if ( fp_ev==NULL ) {
3320 goto Finishing;
3321 }
3322
3323 if (myid==Host_ID){
3324
3325 if (fp_eig) {
3326 fprintf(fp_eig,"mode 1\n");
3327 fprintf(fp_eig,"NonCol 0\n");
3328 fprintf(fp_eig,"N %d\n",n);
3329 fprintf(fp_eig,"Nspin %d\n",SpinP_switch);
3330 fprintf(fp_eig,"Erange %lf %lf\n",Dos_Erange[0],Dos_Erange[1]);
3331 fprintf(fp_eig,"irange %d %d\n",iemin,iemax);
3332 fprintf(fp_eig,"Kgrid %d %d %d\n",knum_i,knum_j,knum_k);
3333 fprintf(fp_eig,"atomnum %d\n",atomnum);
3334 fprintf(fp_eig,"<WhatSpecies\n");
3335 for (i=1;i<=atomnum;i++) {
3336 fprintf(fp_eig,"%d ",WhatSpecies[i]);
3337 }
3338 fprintf(fp_eig,"\nWhatSpecies>\n");
3339 fprintf(fp_eig,"SpeciesNum %d\n",SpeciesNum);
3340 fprintf(fp_eig,"<Spe_Total_CNO\n");
3341 for (i=0;i<SpeciesNum;i++) {
3342 fprintf(fp_eig,"%d ",Spe_Total_CNO[i]);
3343 }
3344 fprintf(fp_eig,"\nSpe_Total_CNO>\n");
3345
3346 MaxL=Supported_MaxL;
3347
3348 fprintf(fp_eig,"MaxL %d\n",Supported_MaxL);
3349 fprintf(fp_eig,"<Spe_Num_CBasis\n");
3350 for (i=0;i<SpeciesNum;i++) {
3351 for (l=0;l<=MaxL;l++) {
3352 fprintf(fp_eig,"%d ",Spe_Num_CBasis[i][l]);
3353 }
3354 fprintf(fp_eig,"\n");
3355 }
3356 fprintf(fp_eig,"Spe_Num_CBasis>\n");
3357
3358 fprintf(fp_eig,"ChemP %lf\n",ChemP);
3359
3360 fprintf(fp_eig,"<Eigenvalues\n");
3361
3362 }
3363 if (fp_eig) {
3364 printf(" write eigenvalues\n");
3365 }
3366 if (fp_ev) {
3367 printf(" write eigenvectors\n");
3368 }
3369 }
3370
3371 /* for kloop */
3372
3373 for (kloop0=0; kloop0<num_kloop0; kloop0++){
3374
3375 kloop = kloop0 + S_knum;
3376 arpo[myid] = -1;
3377 if (S_knum<=kloop && kloop<=E_knum) arpo[myid] = kloop;
3378 for (ID=0; ID<numprocs; ID++){
3379 MPI_Bcast(&arpo[ID], 1, MPI_INT, ID, mpi_comm_level1);
3380 }
3381
3382 /* set S */
3383
3384 for (ID=0; ID<numprocs; ID++){
3385
3386 kloop = arpo[ID];
3387
3388 if (0<=kloop){
3389
3390 k1 = T_KGrids1[kloop];
3391 k2 = T_KGrids2[kloop];
3392 k3 = T_KGrids3[kloop];
3393
3394 Overlap_Band(ID,CntOLP,TmpM,MP,k1,k2,k3);
3395 n = TmpM[0][0].r;
3396
3397 if (myid==ID){
3398 for (i1=1; i1<=n; i1++){
3399 for (j1=1; j1<=n; j1++){
3400 S[i1][j1].r = TmpM[i1][j1].r;
3401 S[i1][j1].i = TmpM[i1][j1].i;
3402
3403 TmpS[i1][j1].r = TmpM[i1][j1].r;
3404 TmpS[i1][j1].i = TmpM[i1][j1].i;
3405 }
3406 }
3407 }
3408
3409 }
3410 }
3411
3412 kloop = arpo[myid];
3413
3414 if (0<=kloop){
3415
3416 EigenBand_lapack(S,ko[0],n,n,1);
3417
3418 if (3<=level_stdout && 0<=kloop){
3419 printf(" kloop %2d k1 k2 k3 %10.6f %10.6f %10.6f\n",
3420 kloop,T_KGrids1[kloop],T_KGrids2[kloop],T_KGrids3[kloop]);
3421 for (i1=1; i1<=n; i1++){
3422 printf(" Eigenvalues of OLP %2d %15.12f\n",i1,ko[0][i1]);
3423 }
3424 }
3425
3426 /* minus eigenvalues to 1.0e-14 */
3427
3428 for (l=1; l<=n; l++){
3429 if (ko[0][l]<0.0) ko[0][l] = 1.0e-14;
3430 koS[l] = ko[0][l];
3431 }
3432
3433 /* calculate S*1/sqrt(ko) */
3434
3435 for (l=1; l<=n; l++) M1[l] = 1.0/sqrt(ko[0][l]);
3436
3437 /* S * M1 */
3438
3439 for (i1=1; i1<=n; i1++){
3440 for (j1=1; j1<=n; j1++){
3441 S[i1][j1].r = S[i1][j1].r*M1[j1];
3442 S[i1][j1].i = S[i1][j1].i*M1[j1];
3443 }
3444 }
3445
3446 } /* if (0<=kloop) */
3447
3448 /* set H */
3449
3450 for (spin=0; spin<=SpinP_switch; spin++){
3451
3452 for (ID=0; ID<numprocs; ID++){
3453
3454 kloop = arpo[ID];
3455
3456 if (0<=kloop){
3457 k1 = T_KGrids1[kloop];
3458 k2 = T_KGrids2[kloop];
3459 k3 = T_KGrids3[kloop];
3460
3461 Hamiltonian_Band(ID, nh[spin], TmpM, MP, k1, k2, k3);
3462
3463 if (myid==ID){
3464 for (i1=1; i1<=n; i1++){
3465 for (j1=1; j1<=n; j1++){
3466 H[i1][j1].r = TmpM[i1][j1].r;
3467 H[i1][j1].i = TmpM[i1][j1].i;
3468 }
3469 }
3470 }
3471 }
3472 }
3473
3474 kloop = arpo[myid];
3475
3476 if (0<=kloop){
3477
3478 /****************************************************
3479 M1 * U^t * H * U * M1
3480 ****************************************************/
3481
3482 /* transpose S */
3483
3484 for (i1=1; i1<=n; i1++){
3485 for (j1=i1+1; j1<=n; j1++){
3486 Ctmp1 = S[i1][j1];
3487 Ctmp2 = S[j1][i1];
3488 S[i1][j1] = Ctmp2;
3489 S[j1][i1] = Ctmp1;
3490 }
3491 }
3492
3493 /* H * U * M1 */
3494
3495 #pragma omp parallel shared(C,S,H,n) private(OMPID,Nthrds,Nprocs,i1,j1,sum,sumi,l)
3496 {
3497
3498 /* get info. on OpenMP */
3499
3500 OMPID = omp_get_thread_num();
3501 Nthrds = omp_get_num_threads();
3502 Nprocs = omp_get_num_procs();
3503
3504 for (i1=1+OMPID; i1<=n; i1+=Nthrds){
3505
3506 for (j1=1; j1<=n; j1++){
3507
3508 sum = 0.0;
3509 sumi = 0.0;
3510
3511 for (l=1; l<=n; l++){
3512 sum += H[i1][l].r*S[j1][l].r - H[i1][l].i*S[j1][l].i;
3513 sumi += H[i1][l].r*S[j1][l].i + H[i1][l].i*S[j1][l].r;
3514 }
3515
3516 C[j1][i1].r = sum;
3517 C[j1][i1].i = sumi;
3518 }
3519 }
3520 } /* #pragma omp parallel */
3521
3522 /* M1 * U^+ H * U * M1 */
3523
3524 #pragma omp parallel shared(C,S,H,n) private(OMPID,Nthrds,Nprocs,i1,j1,sum,sumi,l)
3525 {
3526
3527 /* get info. on OpenMP */
3528
3529 OMPID = omp_get_thread_num();
3530 Nthrds = omp_get_num_threads();
3531 Nprocs = omp_get_num_procs();
3532
3533 for (i1=1+OMPID; i1<=n; i1+=Nthrds){
3534 for (j1=1; j1<=n; j1++){
3535
3536 sum = 0.0;
3537 sumi = 0.0;
3538
3539 for (l=1; l<=n; l++){
3540 sum += S[i1][l].r*C[j1][l].r + S[i1][l].i*C[j1][l].i;
3541 sumi += S[i1][l].r*C[j1][l].i - S[i1][l].i*C[j1][l].r;
3542 }
3543 H[i1][j1].r = sum;
3544 H[i1][j1].i = sumi;
3545 }
3546 }
3547 } /* #pragma omp parallel */
3548
3549 /* H to C */
3550
3551 for (i1=1; i1<=n; i1++){
3552 for (j1=1; j1<=n; j1++){
3553 C[i1][j1] = H[i1][j1];
3554 }
3555 }
3556
3557 /* penalty for ill-conditioning states */
3558
3559 EV_cut0 = Threshold_OLP_Eigen;
3560
3561 for (i1=1; i1<=n; i1++){
3562
3563 if (koS[i1]<EV_cut0){
3564 C[i1][i1].r += pow((koS[i1]/EV_cut0),-2.0) - 1.0;
3565 }
3566
3567 /* cutoff the interaction between the ill-conditioned state */
3568
3569 if (1.0e+3<C[i1][i1].r){
3570 for (j1=1; j1<=n; j1++){
3571 C[i1][j1] = Complex(0.0,0.0);
3572 C[j1][i1] = Complex(0.0,0.0);
3573 }
3574 C[i1][i1].r = 1.0e+4;
3575 }
3576 }
3577
3578 /* solve eigenvalue problem */
3579
3580 n1 = n;
3581 EigenBand_lapack(C,ko[spin],n1,n1,1);
3582
3583 for (i1=1; i1<=n; i1++) EIGEN[spin][kloop][i1] = ko[spin][i1];
3584
3585 /****************************************************
3586 transformation to the original eigen vectors.
3587 NOTE JRCAT-244p and JAIST-2122p
3588 C = U * lambda^{-1/2} * D
3589 ****************************************************/
3590
3591 /* transpose */
3592
3593 for (i1=1; i1<=n; i1++){
3594 for (j1=i1+1; j1<=n; j1++){
3595 Ctmp1 = S[i1][j1];
3596 Ctmp2 = S[j1][i1];
3597 S[i1][j1] = Ctmp2;
3598 S[j1][i1] = Ctmp1;
3599 }
3600 }
3601
3602 /* transpose */
3603
3604 for (i1=1; i1<=n; i1++){
3605 for (j1=i1+1; j1<=n; j1++){
3606 Ctmp1 = C[i1][j1];
3607 Ctmp2 = C[j1][i1];
3608 C[i1][j1] = Ctmp2;
3609 C[j1][i1] = Ctmp1;
3610 }
3611 }
3612
3613 /* shift */
3614
3615 for (j1=1; j1<=n; j1++){
3616 for (l=n; 1<=l; l--){
3617 C[j1][l] = C[j1][l];
3618 }
3619 }
3620
3621 #pragma omp parallel shared(C,S,H2,n) private(OMPID,Nthrds,Nprocs,i1,j1,sum,sumi,l)
3622 {
3623
3624 /* get info. on OpenMP */
3625
3626 OMPID = omp_get_thread_num();
3627 Nthrds = omp_get_num_threads();
3628 Nprocs = omp_get_num_procs();
3629
3630 for (i1=1+OMPID; i1<=n; i1+=Nthrds){
3631 for (j1=1; j1<=n1; j1++){
3632
3633 sum = 0.0;
3634 sumi = 0.0;
3635
3636 for (l=1; l<=n; l++){
3637 sum += S[i1][l].r*C[j1][l].r - S[i1][l].i*C[j1][l].i;
3638 sumi += S[i1][l].r*C[j1][l].i + S[i1][l].i*C[j1][l].r;
3639 }
3640
3641 H2[spin][j1][i1].r = sum;
3642 H2[spin][j1][i1].i = sumi;
3643
3644 }
3645 }
3646 } /* #pragma omp parallel */
3647
3648 } /* if (0<=kloop) */
3649 } /* spin */
3650
3651 /****************************************************
3652 store LDOS
3653 ****************************************************/
3654
3655 kloop = arpo[myid];
3656
3657 if (0<=kloop){
3658
3659 for (spin=0; spin<=SpinP_switch; spin++){
3660
3661 k1 = T_KGrids1[kloop];
3662 k2 = T_KGrids2[kloop];
3663 k3 = T_KGrids3[kloop];
3664
3665 i = Ti_KGrids1[kloop];
3666 j = Tj_KGrids2[kloop];
3667 k = Tk_KGrids3[kloop];
3668
3669 for (l=iemin; l<=iemax; l++){
3670
3671 /* initialize */
3672
3673 for (i1=0; i1<(atomnum+1)*List_YOUSO[7]; i1++){
3674 SD[i1] = 0.0;
3675 }
3676
3677 /* calculate SD */
3678
3679 #pragma omp parallel shared(TmpS,SD,spin,l,H2,MP,Spe_Total_CNO,WhatSpecies,atomnum) private(OMPID,Nthrds,Nprocs,GA_AN,wanA,tnoA,Anum,GB_AN,wanB,tnoB,Bnum,i1,j1,u2,v2,uv,vu,Redum,Imdum)
3680 {
3681
3682 /* get info. on OpenMP */
3683
3684 OMPID = omp_get_thread_num();
3685 Nthrds = omp_get_num_threads();
3686 Nprocs = omp_get_num_procs();
3687
3688 for (GA_AN=1+OMPID; GA_AN<=atomnum; GA_AN+=Nthrds){
3689
3690 wanA = WhatSpecies[GA_AN];
3691 tnoA = Spe_Total_CNO[wanA];
3692 Anum = MP[GA_AN];
3693
3694 for (GB_AN=1; GB_AN<=atomnum; GB_AN++){
3695
3696 wanB = WhatSpecies[GB_AN];
3697 tnoB = Spe_Total_CNO[wanB];
3698 Bnum = MP[GB_AN];
3699
3700 for (i1=0; i1<tnoA; i1++){
3701 for (j1=0; j1<tnoB; j1++){
3702
3703 u2 = H2[spin][l][Anum+i1].r*H2[spin][l][Bnum+j1].r;
3704 v2 = H2[spin][l][Anum+i1].i*H2[spin][l][Bnum+j1].i;
3705 uv = H2[spin][l][Anum+i1].r*H2[spin][l][Bnum+j1].i;
3706 vu = H2[spin][l][Anum+i1].i*H2[spin][l][Bnum+j1].r;
3707
3708 Redum = (u2 + v2);
3709 Imdum = (uv - vu);
3710
3711 SD[Anum+i1] += (float)(Redum*TmpS[Anum+i1][Bnum+j1].r - Imdum*TmpS[Anum+i1][Bnum+j1].i);
3712
3713 } /* j1 */
3714 } /* i1 */
3715 } /* GB_AN */
3716 } /* GA_AN */
3717
3718 } /* #pragma omp parallel */
3719
3720 /*********************************************
3721 writing a binary file
3722 *********************************************/
3723
3724 i_vec[0] = Ti_KGrids1[kloop];
3725 i_vec[1] = Tj_KGrids2[kloop];
3726 i_vec[2] = Tk_KGrids3[kloop];
3727
3728 fwrite(i_vec,sizeof(int),3,fp_ev);
3729 fwrite(&SD[1],sizeof(float),n,fp_ev);
3730
3731 } /* l */
3732 } /* spin */
3733 } /* if (0<=kloop) */
3734
3735 /*********************************************
3736 calculation of partial density matrix
3737 *********************************************/
3738
3739 kloop = arpo[myid];
3740
3741 if (0<=kloop && cal_partial_charge) {
3742
3743 for (spin=0; spin<=SpinP_switch; spin++){
3744
3745 k1 = T_KGrids1[kloop];
3746 k2 = T_KGrids2[kloop];
3747 k3 = T_KGrids3[kloop];
3748
3749 for (l=iemin; l<=iemax; l++){
3750
3751 x1 = (ko[spin][l] - ChemP)*Beta;
3752 if (x1<=-max_x) x1 = -max_x;
3753 if (max_x<=x1) x1 = max_x;
3754 FermiF1 = 1.0/(1.0 + exp(x1));
3755
3756 x2 = (ko[spin][l] - (ChemP+ene_win_partial_charge))*Beta;
3757 if (x2<=-max_x) x2 = -max_x;
3758 if (max_x<=x2) x2 = max_x;
3759 FermiF2 = 1.0/(1.0 + exp(x2));
3760
3761 diffF = fabs(FermiF1-FermiF2);
3762
3763 for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
3764
3765 wanA = WhatSpecies[GA_AN];
3766 tnoA = Spe_Total_CNO[wanA];
3767 Anum = MP[GA_AN];
3768
3769 for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
3770 GB_AN = natn[GA_AN][LB_AN];
3771 Rn = ncn[GA_AN][LB_AN];
3772 wanB = WhatSpecies[GB_AN];
3773 tnoB = Spe_Total_CNO[wanB];
3774 Bnum = MP[GB_AN];
3775
3776 l1 = atv_ijk[Rn][1];
3777 l2 = atv_ijk[Rn][2];
3778 l3 = atv_ijk[Rn][3];
3779 kRn = k1*(double)l1 + k2*(double)l2 + k3*(double)l3;
3780
3781 si = sin(2.0*PI*kRn);
3782 co = cos(2.0*PI*kRn);
3783
3784 for (i=0; i<tnoA; i++){
3785 for (j=0; j<tnoB; j++){
3786
3787 u2 = H2[spin][l][Anum+i].r*H2[spin][l][Bnum+j].r;
3788 v2 = H2[spin][l][Anum+i].i*H2[spin][l][Bnum+j].i;
3789 uv = H2[spin][l][Anum+i].r*H2[spin][l][Bnum+j].i;
3790 vu = H2[spin][l][Anum+i].i*H2[spin][l][Bnum+j].r;
3791 tmp = co*(u2+v2) - si*(uv-vu);
3792 PDM[spin][GA_AN][LB_AN][i][j] += diffF*tmp;
3793 }
3794 }
3795 }
3796 }
3797 }
3798 }
3799 }
3800
3801 /****************************************************
3802 MPI: EIGEN
3803 ****************************************************/
3804
3805 for (ID=0; ID<numprocs; ID++){
3806
3807 kloop = arpo[ID];
3808
3809 if (0<=kloop){
3810 for (spin=0; spin<=SpinP_switch; spin++){
3811 MPI_Bcast(&EIGEN[spin][kloop][0], n+1, MPI_DOUBLE, ID, mpi_comm_level1);
3812 }
3813 }
3814 }
3815
3816 } /* kloop0 */
3817
3818 /****************************************************
3819 MPI: PDM
3820 ****************************************************/
3821
3822 if (cal_partial_charge) {
3823
3824 /* MPI: PDM */
3825
3826 for (spin=0; spin<=SpinP_switch; spin++){
3827 for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
3828
3829 wanA = WhatSpecies[GA_AN];
3830 tnoA = Spe_Total_CNO[wanA];
3831
3832 for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
3833
3834 GB_AN = natn[GA_AN][LB_AN];
3835 wanB = WhatSpecies[GB_AN];
3836 tnoB = Spe_Total_CNO[wanB];
3837
3838 for (i=0; i<tnoA; i++){
3839
3840 MPI_Allreduce(&PDM[spin][GA_AN][LB_AN][i][0],
3841 &TPDM[spin][GA_AN][LB_AN][i][0],
3842 tnoB, MPI_DOUBLE, MPI_SUM,
3843 mpi_comm_level1);
3844
3845 for (j=0; j<tnoB; j++){
3846 TPDM[spin][GA_AN][LB_AN][i][j] /= (double)T_knum;
3847 }
3848 }
3849 }
3850 }
3851 }
3852
3853 /* store TPDM to Partial_DM */
3854
3855 for (spin=0; spin<=SpinP_switch; spin++){
3856 for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
3857
3858 MA_AN = F_G2M[GA_AN];
3859 wanA = WhatSpecies[GA_AN];
3860 tnoA = Spe_Total_CNO[wanA];
3861
3862 if (1<=MA_AN && MA_AN<=Matomnum){
3863
3864 for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
3865
3866 GB_AN = natn[GA_AN][LB_AN];
3867 wanB = WhatSpecies[GB_AN];
3868 tnoB = Spe_Total_CNO[wanB];
3869
3870 for (i=0; i<tnoA; i++){
3871 for (j=0; j<tnoB; j++){
3872 Partial_DM[spin][MA_AN][LB_AN][i][j] = TPDM[spin][GA_AN][LB_AN][i][j];
3873 }
3874 }
3875 }
3876 }
3877 }
3878 }
3879 }
3880
3881 /****************************************************
3882 write: EIGEN
3883 ****************************************************/
3884
3885 if (myid==Host_ID){
3886 if (fp_eig) {
3887
3888 for (kloop=0; kloop<T_knum; kloop++){
3889 for (spin=0; spin<=SpinP_switch; spin++){
3890
3891 i = Ti_KGrids1[kloop];
3892 j = Tj_KGrids2[kloop];
3893 k = Tk_KGrids3[kloop];
3894
3895 fprintf(fp_eig,"%d %d %d ",i,j,k);
3896 for (ie=iemin;ie<=iemax;ie++) {
3897 fprintf(fp_eig,"%lf ",EIGEN[spin][kloop][ie]);
3898 }
3899 fprintf(fp_eig,"\n");
3900
3901 }
3902 }
3903
3904 fprintf(fp_eig,"Eigenvalues>\n");
3905 }
3906 }
3907
3908 Finishing:
3909
3910 if (myid==Host_ID){
3911 if (fp_eig) {
3912 fclose(fp_eig);
3913 }
3914 }
3915
3916 if (fp_ev) {
3917 fclose(fp_ev);
3918 }
3919
3920 /****************************************************
3921 merge *.Dos.vec#
3922 ****************************************************/
3923
3924 MPI_Barrier(mpi_comm_level1);
3925
3926 if (myid==Host_ID){
3927
3928 sprintf(file_ev0,"%s%s.Dos.vec",filepath,filename);
3929 if ( (fp_ev0=fopen(file_ev0,"w"))==NULL ) {
3930 printf("cannot open a file %s\n",file_ev0);
3931 }
3932
3933 for (ID=0; ID<numprocs; ID++){
3934
3935 sprintf(file_ev,"%s%s.Dos.vec%d",filepath,filename,ID);
3936
3937 if ( 1<=num_allocated_k[ID] ){
3938
3939 if ( (fp_ev=fopen(file_ev,"r"))==NULL ) {
3940 printf("cannot open a file %s\n",file_ev);
3941 }
3942
3943 for (k=0; k<num_allocated_k[ID]; k++){
3944 for (spin=0; spin<=SpinP_switch; spin++){
3945 for (l=iemin; l<=iemax; l++){
3946 fread( i_vec, sizeof(int), 3,fp_ev);
3947 fwrite(i_vec, sizeof(int), 3,fp_ev0);
3948 fread( &SD[1], sizeof(float),n,fp_ev);
3949 fwrite(&SD[1], sizeof(float),n,fp_ev0);
3950 }
3951 }
3952 }
3953
3954 fclose(fp_ev);
3955 }
3956 }
3957
3958 fclose(fp_ev0);
3959 }
3960
3961 MPI_Barrier(mpi_comm_level1);
3962
3963 /* delete files */
3964 if (myid==Host_ID){
3965 for (ID=0; ID<numprocs; ID++){
3966 sprintf(file_ev,"%s%s.Dos.vec%d",filepath,filename,ID);
3967 remove(file_ev);
3968 }
3969 }
3970
3971 /****************************************************
3972 Fermi surface viewable by XCrysDen
3973 ****************************************************/
3974
3975 if (myid==Host_ID){
3976
3977 int ii,jj,kk;
3978 int ***TD2OD;
3979 double x0,y0,z0;
3980 char file_fermi[YOUSO10];
3981 FILE *fp;
3982
3983 /* allocation of TD2OD */
3984
3985 TD2OD = (int***)malloc(sizeof(int**)*knum_i);
3986 for (i=0; i<knum_i; i++){
3987 TD2OD[i] = (int**)malloc(sizeof(int*)*knum_j);
3988 for (j=0; j<knum_j; j++){
3989 TD2OD[i][j] = (int*)malloc(sizeof(int)*knum_k);
3990 for (k=0; k<knum_k; k++) TD2OD[i][j][k] = 0;
3991 }
3992 }
3993
3994 kloop = 0;
3995 for (i=0; i<knum_i; i++){
3996 for (j=0; j<knum_j; j++){
3997 for (k=0; k<knum_k; k++){
3998 TD2OD[i][j][k] = kloop;
3999 kloop++;
4000 }
4001 }
4002 }
4003
4004 /* make bxsf files */
4005
4006 for (spin=0; spin<=SpinP_switch; spin++){
4007
4008 sprintf(file_fermi,"%s%s.FermiSurf%d.bxsf",filepath,filename,spin);
4009
4010 if ((fp=fopen(file_fermi,"w")) != NULL){
4011
4012 fprintf(fp,"BEGIN_INFO\n");
4013 fprintf(fp,"Fermi Energy: %15.12f\n",ChemP);
4014 fprintf(fp,"END_INFO\n\n");
4015
4016 fprintf(fp,"BEGIN_BLOCK_BANDGRID_3D\n");
4017 fprintf(fp,"comment_line\n");
4018 fprintf(fp,"BEGIN_BANDGRID_3D_simple_example\n");
4019 fprintf(fp,"%d\n",n);
4020 fprintf(fp,"%d %d %d\n",knum_i+1,knum_j+1,knum_k+1);
4021
4022 fprintf(fp,"0.0 0.0 0.0\n");
4023 fprintf(fp,"%15.10f %15.10f %15.10f\n",rtv[1][1],rtv[1][2],rtv[1][3]);
4024 fprintf(fp,"%15.10f %15.10f %15.10f\n",rtv[2][1],rtv[2][2],rtv[2][3]);
4025 fprintf(fp,"%15.10f %15.10f %15.10f\n",rtv[3][1],rtv[3][2],rtv[3][3]);
4026
4027 for (i1=1; i1<=n; i1++){
4028
4029 fprintf(fp,"BAND: %d\n",i1);
4030
4031 for (i=0; i<=knum_i; i++){
4032 ii = i%knum_i;
4033 for (j=0; j<=knum_j; j++){
4034 jj = j%knum_j;
4035 for (k=0; k<=knum_k; k++){
4036 kk = k%knum_k;
4037 kloop = TD2OD[ii][jj][kk];
4038 fprintf(fp,"%10.5f ",EIGEN[spin][kloop][i1]);
4039 }
4040 fprintf(fp,"\n");
4041 }
4042
4043 if (i!=knum_i) fprintf(fp,"\n");
4044
4045 }
4046 }
4047
4048 fprintf(fp,"END_BANDGRID_3D\n");
4049 fprintf(fp,"END_BLOCK_BANDGRID_3D\n");
4050
4051 fclose(fp);
4052 }
4053 else{
4054 printf("Failure in saving bxsf files.\n");
4055 }
4056
4057 } /* spin */
4058
4059 /* free TD2OD */
4060 for (i=0; i<knum_i; i++){
4061 for (j=0; j<knum_j; j++){
4062 free(TD2OD[i][j]);
4063 }
4064 free(TD2OD[i]);
4065 }
4066 free(TD2OD);
4067
4068 } /* if (myid==Host_ID) */
4069
4070 /****************************************************
4071 free arrays
4072 ****************************************************/
4073
4074 free(MP);
4075 free(arpo);
4076 free(num_allocated_k);
4077
4078 for (i=0; i<i_ko[0]; i++){
4079 free(ko[i]);
4080 }
4081 free(ko);
4082
4083 free(koS);
4084
4085 for (i=0; i<i_H[0]; i++){
4086 free(H[i]);
4087 }
4088 free(H);
4089
4090 for (i=0; i<i_S[0]; i++){
4091 free(S[i]);
4092 }
4093 free(S);
4094
4095 free(M1);
4096
4097 for (i=0; i<i_C[0]; i++){
4098 free(C[i]);
4099 }
4100 free(C);
4101
4102 free(KGrids1); free(KGrids2);free(KGrids3);
4103
4104 free(SD);
4105
4106 for (j=0; j<n+1; j++){
4107 free(TmpM[j]);
4108 }
4109 free(TmpM);
4110
4111 for (j=0; j<n+1; j++){
4112 free(TmpS[j]);
4113 }
4114 free(TmpS);
4115
4116 for (i=0; i<List_YOUSO[23]; i++){
4117 for (j=0; j<n+1; j++){
4118 free(H2[i][j]);
4119 }
4120 free(H2[i]);
4121 }
4122 free(H2);
4123
4124 /* partial density matrix */
4125
4126 if (cal_partial_charge){
4127
4128 /* PDM */
4129
4130 for (k=0; k<=1; k++){
4131
4132 FNAN[0] = 0;
4133
4134 for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
4135
4136 if (Gc_AN==0){
4137 tno0 = 1;
4138 }
4139 else{
4140 Cwan = WhatSpecies[Gc_AN];
4141 tno0 = Spe_Total_NO[Cwan];
4142 }
4143
4144 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
4145
4146 if (Gc_AN==0){
4147 tno1 = 1;
4148 }
4149 else{
4150 Gh_AN = natn[Gc_AN][h_AN];
4151 Hwan = WhatSpecies[Gh_AN];
4152 tno1 = Spe_Total_NO[Hwan];
4153 }
4154
4155 for (i=0; i<tno0; i++){
4156 free(PDM[k][Gc_AN][h_AN][i]);
4157 }
4158 free(PDM[k][Gc_AN][h_AN]);
4159 }
4160 free(PDM[k][Gc_AN]);
4161 }
4162 free(PDM[k]);
4163 }
4164 free(PDM);
4165
4166 /* TPDM */
4167
4168 for (k=0; k<=1; k++){
4169
4170 FNAN[0] = 0;
4171
4172 for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
4173
4174 if (Gc_AN==0){
4175 tno0 = 1;
4176 }
4177 else{
4178 Cwan = WhatSpecies[Gc_AN];
4179 tno0 = Spe_Total_NO[Cwan];
4180 }
4181
4182 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
4183
4184 if (Gc_AN==0){
4185 tno1 = 1;
4186 }
4187 else{
4188 Gh_AN = natn[Gc_AN][h_AN];
4189 Hwan = WhatSpecies[Gh_AN];
4190 tno1 = Spe_Total_NO[Hwan];
4191 }
4192
4193 for (i=0; i<tno0; i++){
4194 free(TPDM[k][Gc_AN][h_AN][i]);
4195 }
4196 free(TPDM[k][Gc_AN][h_AN]);
4197 }
4198 free(TPDM[k][Gc_AN]);
4199 }
4200 free(TPDM[k]);
4201 }
4202 free(TPDM);
4203 }
4204
4205 /* no spin-orbit coupling */
4206 if (SO_switch==0){
4207 free(Dummy_ImNL[0][0][0]);
4208 free(Dummy_ImNL[0][0]);
4209 free(Dummy_ImNL[0]);
4210 free(Dummy_ImNL);
4211 }
4212
4213 free(T_KGrids1);
4214 free(T_KGrids2);
4215 free(T_KGrids3);
4216
4217 free(Ti_KGrids1);
4218 free(Tj_KGrids2);
4219 free(Tk_KGrids3);
4220
4221 for (i=0; i<List_YOUSO[23]; i++){
4222 for (j=0; j<T_knum; j++){
4223 free(EIGEN[i][j]);
4224 }
4225 free(EIGEN[i]);
4226 }
4227 free(EIGEN);
4228
4229 /* for elapsed time */
4230
4231 dtime(&TEtime);
4232 time0 = TEtime - TStime;
4233 return time0;
4234
4235 }
4236
4237
4238
4239
4240
4241
4242
Band_DFT_Dosout_NonCol(int knum_i,int knum_j,int knum_k,int SpinP_switch,double ***** nh,double ***** ImNL,double **** CntOLP)4243 static double Band_DFT_Dosout_NonCol(
4244 int knum_i, int knum_j, int knum_k,
4245 int SpinP_switch,
4246 double *****nh,
4247 double *****ImNL,
4248 double ****CntOLP)
4249 {
4250 int i,j,k,spin,l,i1,j1,n1;
4251 int n, wanA,ii1,jj1,m,n2;
4252 int *MP;
4253 int MA_AN, GA_AN, tnoA,Anum, LB_AN;
4254 int GB_AN, wanB, tnoB, Bnum, RnB;
4255 int l1,l2,l3;
4256
4257 int ie,iemin,iemax,iemin0,iemax0,n1min,mn;
4258 int MaxL,e1,s1,T_knum,S_knum,E_knum;
4259 int kloop,kloop0,num_kloop0;
4260 int i_vec[10];
4261 int h_AN,tno0,tno1,Gh_AN,Hwan,Gc_AN,Cwan,Rn;
4262
4263 double *****PDM;
4264 double *****TPDM;
4265 double EV_cut0;
4266 double FermiF1,FermiF2,x1,x2,diffF,max_x=50.0;
4267 double sum_r0,sum_i0,sum_r1,sum_i1;
4268 double sum,sumi,u2,v2,uv,vu,tmp;
4269 double kRn,si,co,Redum,Imdum,Redum2,Resum,Imdum2;
4270 double theta,phi,sit,cot,sip,cop,tmp1,tmp2,tmp3;
4271 double TStime,TEtime,time0;
4272 double OLP_eigen_cut=Threshold_OLP_Eigen;
4273
4274 double *ko; int N_ko, i_ko[10];
4275 double *koS;
4276 double d0,d1,d2,d3;
4277 dcomplex **H; int N_H, i_H[10];
4278 dcomplex **S; int N_S, i_S[10];
4279 dcomplex **TmpS;
4280 dcomplex Ctmp1,Ctmp2;
4281 double *M1,**EIGEN;
4282 dcomplex **C; int N_C, i_C[10];
4283 double *KGrids1, *KGrids2, *KGrids3;
4284 float *SD; int N_SD, i_SD[10];
4285 dcomplex **TmpM;
4286 double *T_KGrids1,*T_KGrids2,*T_KGrids3;
4287 int *Ti_KGrids1,*Tj_KGrids2,*Tk_KGrids3,*arpo;
4288 int *num_allocated_k;
4289
4290 double *****Dummy_ImNL;
4291
4292 double snum_i, snum_j, snum_k;
4293 double k1,k2,k3;
4294
4295 char file_ev[YOUSO10],file_ev0[YOUSO10],file_eig[YOUSO10];
4296 FILE *fp_ev,*fp_ev0,*fp_eig;
4297
4298 char buf1[fp_bsize]; /* setvbuf */
4299 char buf2[fp_bsize]; /* setvbuf */
4300 int numprocs,myid,ID,ID1,ID2,tag;
4301
4302 /* for OpenMP */
4303 int OMPID,Nthrds,Nprocs;
4304
4305 MPI_Status stat;
4306 MPI_Request request;
4307
4308 /* MPI */
4309 MPI_Comm_size(mpi_comm_level1,&numprocs);
4310 MPI_Comm_rank(mpi_comm_level1,&myid);
4311 MPI_Barrier(mpi_comm_level1);
4312
4313 if (myid==Host_ID && 0<level_stdout){
4314 printf("\n<Band_DFT_Dosout>\n"); fflush(stdout);
4315 }
4316
4317 dtime(&TStime);
4318
4319 /****************************************************
4320 calculation of the array size
4321 ****************************************************/
4322
4323 n = 0;
4324 for (i=1; i<=atomnum; i++){
4325 wanA = WhatSpecies[i];
4326 n = n + Spe_Total_CNO[wanA];
4327 }
4328 n2 = 2*n + 2;
4329
4330 /****************************************************
4331 Allocation of arrays
4332 ****************************************************/
4333
4334 MP = (int*)malloc(sizeof(int)*List_YOUSO[1]);
4335
4336 arpo = (int*)malloc(sizeof(int)*numprocs);
4337 num_allocated_k = (int*)malloc(sizeof(int)*numprocs);
4338
4339 ko = (double*)malloc(sizeof(double)*n2);
4340 koS = (double*)malloc(sizeof(double)*(n+1));
4341
4342 H = (dcomplex**)malloc(sizeof(dcomplex*)*n2);
4343 for (j=0; j<n2; j++){
4344 H[j] = (dcomplex*)malloc(sizeof(dcomplex)*n2);
4345 }
4346
4347 S = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
4348 for (i=0; i<(n+1); i++){
4349 S[i] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
4350 }
4351
4352 TmpS = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
4353 for (i=0; i<(n+1); i++){
4354 TmpS[i] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
4355 }
4356
4357 M1 = (double*)malloc(sizeof(double)*n2);
4358
4359 C = (dcomplex**)malloc(sizeof(dcomplex*)*n2);
4360 for (j=0; j<n2; j++){
4361 C[j] = (dcomplex*)malloc(sizeof(dcomplex)*n2);
4362 }
4363
4364 KGrids1 = (double*)malloc(sizeof(double)*knum_i);
4365 KGrids2 = (double*)malloc(sizeof(double)*knum_j);
4366 KGrids3 = (double*)malloc(sizeof(double)*knum_k);
4367
4368 SD = (float*)malloc(sizeof(float)*(atomnum+1)*List_YOUSO[7]*2);
4369
4370 TmpM = (dcomplex**)malloc(sizeof(dcomplex*)*n2);
4371 for (j=0; j<n2; j++){
4372 TmpM[j] = (dcomplex*)malloc(sizeof(dcomplex)*n2);
4373 }
4374
4375 /* non-spin-orbit coupling and non-LDA+U */
4376 if (SO_switch==0 && Hub_U_switch==0 && Constraint_NCS_switch==0
4377 && Zeeman_NCS_switch==0 && Zeeman_NCO_switch==0){
4378 Dummy_ImNL = (double*****)malloc(sizeof(double****)*1);
4379 Dummy_ImNL[0] = (double****)malloc(sizeof(double***)*1);
4380 Dummy_ImNL[0][0] = (double***)malloc(sizeof(double**)*1);
4381 Dummy_ImNL[0][0][0] = (double**)malloc(sizeof(double*)*1);
4382 Dummy_ImNL[0][0][0][0] = (double*)malloc(sizeof(double)*1);
4383 }
4384
4385 /* for calculation partial density matrix */
4386
4387 if (cal_partial_charge){
4388
4389 PDM = (double*****)malloc(sizeof(double****)*2);
4390 for (k=0; k<=1; k++){
4391
4392 PDM[k] = (double****)malloc(sizeof(double***)*(atomnum+1));
4393 FNAN[0] = 0;
4394
4395 for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
4396
4397 if (Gc_AN==0){
4398 tno0 = 1;
4399 }
4400 else{
4401 Cwan = WhatSpecies[Gc_AN];
4402 tno0 = Spe_Total_NO[Cwan];
4403 }
4404
4405 PDM[k][Gc_AN] = (double***)malloc(sizeof(double**)*(FNAN[Gc_AN]+1));
4406 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
4407
4408 if (Gc_AN==0){
4409 tno1 = 1;
4410 }
4411 else{
4412 Gh_AN = natn[Gc_AN][h_AN];
4413 Hwan = WhatSpecies[Gh_AN];
4414 tno1 = Spe_Total_NO[Hwan];
4415 }
4416
4417 PDM[k][Gc_AN][h_AN] = (double**)malloc(sizeof(double*)*tno0);
4418 for (i=0; i<tno0; i++){
4419 PDM[k][Gc_AN][h_AN][i] = (double*)malloc(sizeof(double)*tno1);
4420 for (j=0; j<tno1; j++) PDM[k][Gc_AN][h_AN][i][j] = 0.0;
4421 }
4422 }
4423 }
4424 }
4425
4426 TPDM = (double*****)malloc(sizeof(double****)*2);
4427 for (k=0; k<=1; k++){
4428
4429 TPDM[k] = (double****)malloc(sizeof(double***)*(atomnum+1));
4430 FNAN[0] = 0;
4431
4432 for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
4433
4434 if (Gc_AN==0){
4435 tno0 = 1;
4436 }
4437 else{
4438 Cwan = WhatSpecies[Gc_AN];
4439 tno0 = Spe_Total_NO[Cwan];
4440 }
4441
4442 TPDM[k][Gc_AN] = (double***)malloc(sizeof(double**)*(FNAN[Gc_AN]+1));
4443 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
4444
4445 if (Gc_AN==0){
4446 tno1 = 1;
4447 }
4448 else{
4449 Gh_AN = natn[Gc_AN][h_AN];
4450 Hwan = WhatSpecies[Gh_AN];
4451 tno1 = Spe_Total_NO[Hwan];
4452 }
4453
4454 TPDM[k][Gc_AN][h_AN] = (double**)malloc(sizeof(double*)*tno0);
4455 for (i=0; i<tno0; i++){
4456 TPDM[k][Gc_AN][h_AN][i] = (double*)malloc(sizeof(double)*tno1);
4457 for (j=0; j<tno1; j++) TPDM[k][Gc_AN][h_AN][i][j] = 0.0;
4458 }
4459 }
4460 }
4461 }
4462 }
4463
4464 /****************************************************
4465 set up k-grids
4466 ****************************************************/
4467
4468 snum_i = knum_i;
4469 snum_j = knum_j;
4470 snum_k = knum_k;
4471
4472 for (i=0; i<knum_i; i++){
4473 k1 = (double)i/snum_i + Shift_K_Point;
4474 KGrids1[i]=k1;
4475 }
4476 for (i=0; i<knum_j; i++){
4477 k1 = (double)i/snum_j - Shift_K_Point;
4478 KGrids2[i]=k1;
4479 }
4480 for (i=0; i<knum_k; i++){
4481 k1 = (double)i/snum_k + 2.0*Shift_K_Point;
4482 KGrids3[i]=k1;
4483 }
4484
4485 if (myid==Host_ID && 0<level_stdout){
4486 printf(" KGrids1: ");
4487 for (i=0;i<=knum_i-1;i++) printf("%f ",KGrids1[i]);
4488 printf("\n");
4489 printf(" KGrids2: ");
4490 for (i=0;i<=knum_j-1;i++) printf("%f ",KGrids2[i]);
4491 printf("\n");
4492 printf(" KGrids3: ");
4493 for (i=0;i<=knum_k-1;i++) printf("%f ",KGrids3[i]);
4494 printf("\n");
4495 }
4496
4497 /***********************************
4498 one-dimentionalize for MPI
4499 ************************************/
4500
4501 T_knum = 0;
4502 for (i=0; i<=(knum_i-1); i++){
4503 for (j=0; j<=(knum_j-1); j++){
4504 for (k=0; k<=(knum_k-1); k++){
4505 T_knum++;
4506 }
4507 }
4508 }
4509
4510 T_KGrids1 = (double*)malloc(sizeof(double)*T_knum);
4511 T_KGrids2 = (double*)malloc(sizeof(double)*T_knum);
4512 T_KGrids3 = (double*)malloc(sizeof(double)*T_knum);
4513
4514 Ti_KGrids1 = (int*)malloc(sizeof(int)*T_knum);
4515 Tj_KGrids2 = (int*)malloc(sizeof(int)*T_knum);
4516 Tk_KGrids3 = (int*)malloc(sizeof(int)*T_knum);
4517
4518 EIGEN = (double**)malloc(sizeof(double*)*T_knum);
4519 for (j=0; j<T_knum; j++){
4520 EIGEN[j] = (double*)malloc(sizeof(double)*n2);
4521 }
4522
4523 /* set T_KGrid1,2,3 */
4524
4525 T_knum = 0;
4526 for (i=0; i<=(knum_i-1); i++){
4527 for (j=0; j<=(knum_j-1); j++){
4528 for (k=0; k<=(knum_k-1); k++){
4529
4530 T_KGrids1[T_knum] = KGrids1[i];
4531 T_KGrids2[T_knum] = KGrids2[j];
4532 T_KGrids3[T_knum] = KGrids3[k];
4533
4534 Ti_KGrids1[T_knum] = i;
4535 Tj_KGrids2[T_knum] = j;
4536 Tk_KGrids3[T_knum] = k;
4537
4538 T_knum++;
4539 }
4540 }
4541 }
4542
4543 /* allocate k-points into proccessors */
4544
4545 if (T_knum<=myid){
4546 S_knum = -10;
4547 E_knum = -100;
4548 num_kloop0 = 1;
4549 num_allocated_k[myid] = 0;
4550 }
4551 else if (T_knum<numprocs) {
4552 S_knum = myid;
4553 E_knum = myid;
4554 num_kloop0 = 1;
4555 num_allocated_k[myid] = 1;
4556 }
4557 else {
4558 tmp = (double)T_knum/(double)numprocs;
4559 num_kloop0 = (int)tmp + 1;
4560
4561 S_knum = (int)((double)myid*(tmp+0.0001));
4562 E_knum = (int)((double)(myid+1)*(tmp+0.0001)) - 1;
4563 if (myid==(numprocs-1)) E_knum = T_knum - 1;
4564 if (E_knum<0) E_knum = 0;
4565 num_allocated_k[myid] = E_knum - S_knum + 1;
4566 }
4567
4568 for (ID=0; ID<numprocs; ID++){
4569 MPI_Bcast(&num_allocated_k[ID], 1, MPI_INT, ID, mpi_comm_level1);
4570 }
4571
4572 /****************************************************************
4573 find iemin and iemax
4574 *****************************************************************/
4575
4576 iemin=n2; iemax=1; n1min=1;
4577
4578 k1 = 0.0;
4579 k2 = 0.0;
4580 k3 = 0.0;
4581
4582 Overlap_Band(Host_ID,CntOLP,S,MP,k1,k2,k3);
4583
4584 if (myid==Host_ID){
4585
4586 n = S[0][0].r;
4587 EigenBand_lapack(S,ko,n,n,1);
4588
4589 if (2<=level_stdout){
4590 printf(" k1 k2 k3 %10.6f %10.6f %10.6f\n",k1,k2,k3);
4591 for (i1=1; i1<=n; i1++){
4592 printf(" Eigenvalues of OLP %2d %15.12f\n",i1,ko[i1]);
4593 }
4594 }
4595
4596 /* minus eigenvalues to 1.0e-14 */
4597 for (l=1; l<=n; l++){
4598 if (ko[l]<0.0){
4599 ko[l] = 1.0e-14;
4600
4601 if (2<=level_stdout){
4602 printf("found an eigenvalue smaller than %10.8f of OLP kloop=%2d\n",
4603 Threshold_OLP_Eigen,kloop);
4604 }
4605 }
4606
4607 koS[l] = ko[l];
4608 }
4609
4610 /* calculate S*1/sqrt(ko) */
4611
4612 for (l=1; l<=n; l++) M1[l] = 1.0/sqrt(ko[l]);
4613
4614 /* S * M1 */
4615
4616 for (i1=1; i1<=n; i1++){
4617 for (j1=1; j1<=n; j1++){
4618 S[i1][j1].r = S[i1][j1].r*M1[j1];
4619 S[i1][j1].i = S[i1][j1].i*M1[j1];
4620 }
4621 }
4622
4623 } /* if (myid==Host_ID) */
4624
4625 /****************************************************
4626 make a full Hamiltonian matrix
4627 ****************************************************/
4628
4629 /* non-spin-orbit coupling and non-LDA+U */
4630 if (SO_switch==0 && Hub_U_switch==0 && Constraint_NCS_switch==0
4631 && Zeeman_NCS_switch==0 && Zeeman_NCO_switch==0)
4632 Hamiltonian_Band_NC(Host_ID, nh, Dummy_ImNL, H, MP, k1, k2, k3);
4633
4634 /* spin-orbit coupling or LDA+U */
4635 else
4636 Hamiltonian_Band_NC(Host_ID, nh, ImNL, H, MP, k1, k2, k3);
4637
4638 if (myid==Host_ID){
4639
4640 /****************************************************
4641 M1 * U^+ * H * U * M1
4642 ****************************************************/
4643
4644 /* transpose S */
4645
4646 for (i1=1; i1<=n; i1++){
4647 for (j1=i1+1; j1<=n; j1++){
4648 Ctmp1 = S[i1][j1];
4649 Ctmp2 = S[j1][i1];
4650 S[i1][j1] = Ctmp2;
4651 S[j1][i1] = Ctmp1;
4652 }
4653 }
4654
4655 /* H * U * M1 */
4656
4657 for (i1=1; i1<=2*n; i1++){
4658
4659 jj1 = 1;
4660 for (j1=1; j1<=n; j1++){
4661
4662 sum_r0 = 0.0;
4663 sum_i0 = 0.0;
4664
4665 sum_r1 = 0.0;
4666 sum_i1 = 0.0;
4667
4668 for (l=1; l<=n; l++){
4669
4670 sum_r0 += H[i1][l ].r*S[j1][l].r - H[i1][l ].i*S[j1][l].i;
4671 sum_i0 += H[i1][l ].r*S[j1][l].i + H[i1][l ].i*S[j1][l].r;
4672
4673 sum_r1 += H[i1][l+n].r*S[j1][l].r - H[i1][l+n].i*S[j1][l].i;
4674 sum_i1 += H[i1][l+n].r*S[j1][l].i + H[i1][l+n].i*S[j1][l].r;
4675 }
4676
4677 C[jj1][i1].r = sum_r0;
4678 C[jj1][i1].i = sum_i0; jj1++;
4679
4680 C[jj1][i1].r = sum_r1;
4681 C[jj1][i1].i = sum_i1; jj1++;
4682
4683 }
4684 }
4685
4686 /* M1 * U^+ H * U * M1 */
4687
4688 for (j1=1; j1<=2*n; j1++){
4689
4690 ii1 = 1;
4691 for (i1=1; i1<=n; i1++){
4692
4693 sum_r0 = 0.0;
4694 sum_i0 = 0.0;
4695
4696 sum_r1 = 0.0;
4697 sum_i1 = 0.0;
4698
4699 for (l=1; l<=n; l++){
4700
4701 sum_r0 += S[i1][l].r*C[j1][l ].r + S[i1][l].i*C[j1][l ].i;
4702 sum_i0 += S[i1][l].r*C[j1][l ].i - S[i1][l].i*C[j1][l ].r;
4703
4704 sum_r1 += S[i1][l].r*C[j1][l+n].r + S[i1][l].i*C[j1][l+n].i;
4705 sum_i1 += S[i1][l].r*C[j1][l+n].i - S[i1][l].i*C[j1][l+n].r;
4706 }
4707
4708 H[ii1][j1].r = sum_r0;
4709 H[ii1][j1].i = sum_i0; ii1++;
4710
4711 H[ii1][j1].r = sum_r1;
4712 H[ii1][j1].i = sum_i1; ii1++;
4713
4714 }
4715 }
4716
4717 /* H to C */
4718
4719 for (i1=1; i1<=2*n; i1++){
4720 for (j1=1; j1<=2*n; j1++){
4721 C[i1][j1].r = H[i1][j1].r;
4722 C[i1][j1].i = H[i1][j1].i;
4723 }
4724 }
4725
4726 /* penalty for ill-conditioning states */
4727
4728 EV_cut0 = Threshold_OLP_Eigen;
4729
4730 for (i1=1; i1<=n; i1++){
4731
4732 if (koS[i1]<EV_cut0){
4733 C[2*i1-1][2*i1-1].r += pow((koS[i1]/EV_cut0),-2.0) - 1.0;
4734 C[2*i1 ][2*i1 ].r += pow((koS[i1]/EV_cut0),-2.0) - 1.0;
4735 }
4736
4737 /* cutoff the interaction between the ill-conditioned state */
4738
4739 if (1.0e+3<C[2*i1-1][2*i1-1].r){
4740 for (j1=1; j1<=2*n; j1++){
4741 C[2*i1-1][j1 ] = Complex(0.0,0.0);
4742 C[j1 ][2*i1-1] = Complex(0.0,0.0);
4743 C[2*i1 ][j1 ] = Complex(0.0,0.0);
4744 C[j1 ][2*i1 ] = Complex(0.0,0.0);
4745 }
4746 C[2*i1-1][2*i1-1] = Complex(1.0e+4,0.0);
4747 C[2*i1 ][2*i1 ] = Complex(1.0e+4,0.0);
4748 }
4749 }
4750
4751 /* solve eigenvalue problem */
4752
4753 n1 = 2*n;
4754 EigenBand_lapack(C,ko,n1,n1,1);
4755
4756 if (n1min<n1) n1min=n1;
4757
4758 iemin0=1;
4759 for (i1=1;i1<=n1;i1++) {
4760 if (ko[i1]> (ChemP+Dos_Erange[0])) {
4761 iemin0=i1-1;
4762 break;
4763 }
4764 }
4765 if (iemin0<1) iemin0=1;
4766
4767 iemax0=n1;
4768 for (i1=iemin0;i1<=n1;i1++) {
4769 if (ko[i1]> (ChemP+Dos_Erange[1])) {
4770 iemax0=i1;
4771 break;
4772 }
4773 }
4774 if (iemax0>n1) iemax0=n1;
4775
4776 if (iemin>iemin0) iemin=iemin0;
4777 if (iemax<iemax0) iemax=iemax0;
4778
4779 } /* if (myid==Host_ID) */
4780
4781 /* add a buffer to iemin and iemax */
4782
4783 iemin -= 5;
4784 iemax += 5;
4785
4786 if (iemin<1) iemin = 1;
4787 if ((2*n)<iemax) iemax = 2*n;
4788
4789 /* partial density matrix for STM simulation */
4790 if (cal_partial_charge){
4791 iemin = 1;
4792 iemax = 2*n;
4793 }
4794
4795 if (myid==Host_ID){
4796 if (n1min<iemax) iemax=n1min;
4797 printf(" iemin and iemax= %d %d\n",iemin,iemax);fflush(stdout);
4798 }
4799
4800 /* MPI, iemin, iemax */
4801 MPI_Barrier(mpi_comm_level1);
4802 MPI_Bcast(&iemin, 1, MPI_INT, Host_ID, mpi_comm_level1);
4803 MPI_Bcast(&iemax, 1, MPI_INT, Host_ID, mpi_comm_level1);
4804
4805 /****************************************************************
4806 eigenvalues and eigenvectors
4807 ****************************************************************/
4808
4809 if (myid==Host_ID){
4810
4811 sprintf(file_eig,"%s%s.Dos.val",filepath,filename);
4812 if ( (fp_eig=fopen(file_eig,"w"))==NULL ) {
4813
4814 #ifdef xt3
4815 setvbuf(fp_eig,buf1,_IOFBF,fp_bsize); /* setvbuf */
4816 #endif
4817
4818 printf("cannot open a file %s\n",file_eig);
4819 }
4820
4821 if ( fp_eig==NULL ) {
4822 goto Finishing;
4823 }
4824 }
4825
4826 sprintf(file_ev,"%s%s.Dos.vec%d",filepath,filename,myid);
4827 if ( (fp_ev=fopen(file_ev,"w"))==NULL ) {
4828 printf("cannot open a file %s\n",file_ev);
4829 }
4830 if ( fp_ev==NULL ) {
4831 goto Finishing;
4832 }
4833
4834 if (myid==Host_ID){
4835
4836 if (fp_eig) {
4837 fprintf(fp_eig,"mode 1\n");
4838 fprintf(fp_eig,"NonCol 1\n");
4839 fprintf(fp_eig,"N %d\n",n);
4840 fprintf(fp_eig,"Nspin %d\n",1); /* switch to 1 */
4841 fprintf(fp_eig,"Erange %lf %lf\n",Dos_Erange[0],Dos_Erange[1]);
4842 fprintf(fp_eig,"Kgrid %d %d %d\n",knum_i,knum_j,knum_k);
4843 fprintf(fp_eig,"atomnum %d\n",atomnum);
4844 fprintf(fp_eig,"<WhatSpecies\n");
4845 for (i=1;i<=atomnum;i++) {
4846 fprintf(fp_eig,"%d ",WhatSpecies[i]);
4847 }
4848 fprintf(fp_eig,"\nWhatSpecies>\n");
4849 fprintf(fp_eig,"SpeciesNum %d\n",SpeciesNum);
4850 fprintf(fp_eig,"<Spe_Total_CNO\n");
4851 for (i=0;i<SpeciesNum;i++) {
4852 fprintf(fp_eig,"%d ",Spe_Total_CNO[i]);
4853 }
4854 fprintf(fp_eig,"\nSpe_Total_CNO>\n");
4855
4856 MaxL=Supported_MaxL;
4857
4858 fprintf(fp_eig,"MaxL %d\n",Supported_MaxL);
4859 fprintf(fp_eig,"<Spe_Num_CBasis\n");
4860 for (i=0;i<SpeciesNum;i++) {
4861 for (l=0;l<=MaxL;l++) {
4862 fprintf(fp_eig,"%d ",Spe_Num_CBasis[i][l]);
4863 }
4864 fprintf(fp_eig,"\n");
4865 }
4866 fprintf(fp_eig,"Spe_Num_CBasis>\n");
4867
4868 fprintf(fp_eig,"ChemP %lf\n",ChemP);
4869
4870 fprintf(fp_eig,"<SpinAngle\n");
4871 for (i=1; i<=atomnum; i++) {
4872 fprintf(fp_eig,"%lf %lf\n",Angle0_Spin[i],Angle1_Spin[i]);
4873 }
4874 fprintf(fp_eig,"SpinAngle>\n");
4875
4876 printf(" write eigenvalues\n");
4877 printf(" write eigenvectors\n");
4878 }
4879 }
4880
4881 /* for kloop */
4882
4883 for (kloop0=0; kloop0<num_kloop0; kloop0++){
4884
4885 kloop = kloop0 + S_knum;
4886 arpo[myid] = -1;
4887 if (S_knum<=kloop && kloop<=E_knum) arpo[myid] = kloop;
4888 for (ID=0; ID<numprocs; ID++){
4889 MPI_Bcast(&arpo[ID], 1, MPI_INT, ID, mpi_comm_level1);
4890 }
4891
4892 /* set S */
4893
4894 for (ID=0; ID<numprocs; ID++){
4895
4896 kloop = arpo[ID];
4897
4898 if (0<=kloop){
4899
4900 k1 = T_KGrids1[kloop];
4901 k2 = T_KGrids2[kloop];
4902 k3 = T_KGrids3[kloop];
4903
4904 Overlap_Band(ID,CntOLP,TmpM,MP,k1,k2,k3);
4905 n = TmpM[0][0].r;
4906
4907 if (myid==ID){
4908 for (i1=1; i1<=n; i1++){
4909 for (j1=1; j1<=n; j1++){
4910 S[i1][j1].r = TmpM[i1][j1].r;
4911 S[i1][j1].i = TmpM[i1][j1].i;
4912
4913 TmpS[i1][j1].r = TmpM[i1][j1].r;
4914 TmpS[i1][j1].i = TmpM[i1][j1].i;
4915 }
4916 }
4917 }
4918
4919 }
4920 }
4921
4922 kloop = arpo[myid];
4923
4924 if (0<=kloop){
4925
4926 EigenBand_lapack(S,ko,n,n,1);
4927
4928 if (2<=level_stdout && 0<=kloop){
4929 printf(" kloop %2d k1 k2 k3 %10.6f %10.6f %10.6f\n",
4930 kloop,T_KGrids1[kloop],T_KGrids2[kloop],T_KGrids3[kloop]);
4931 for (i1=1; i1<=n; i1++){
4932 printf(" Eigenvalues of OLP %2d %15.12f\n",i1,ko[i1]);
4933 }
4934 }
4935
4936 /* minus eigenvalues to 1.0e-14 */
4937 for (l=1; l<=n; l++){
4938 if (ko[l]<0.0){
4939 ko[l] = 1.0e-14;
4940
4941 if (2<=level_stdout){
4942 printf("found an eigenvalue smaller than %10.8f of OLP kloop=%2d\n",
4943 Threshold_OLP_Eigen,kloop);
4944 }
4945 }
4946
4947 koS[l] = ko[l];
4948 }
4949
4950 /* calculate S*1/sqrt(ko) */
4951
4952 for (l=1; l<=n; l++) M1[l] = 1.0/sqrt(ko[l]);
4953
4954 /* S * M1 */
4955
4956 for (i1=1; i1<=n; i1++){
4957 for (j1=1; j1<=n; j1++){
4958 S[i1][j1].r = S[i1][j1].r*M1[j1];
4959 S[i1][j1].i = S[i1][j1].i*M1[j1];
4960 }
4961 }
4962
4963 }
4964
4965 /****************************************************
4966 make a full Hamiltonian matrix
4967 ****************************************************/
4968
4969 for (ID=0; ID<numprocs; ID++){
4970
4971 kloop = arpo[ID];
4972
4973 if (0<=kloop){
4974 k1 = T_KGrids1[kloop];
4975 k2 = T_KGrids2[kloop];
4976 k3 = T_KGrids3[kloop];
4977
4978 /* non-spin-orbit coupling and non-LDA+U */
4979 if (SO_switch==0 && Hub_U_switch==0 && Constraint_NCS_switch==0
4980 && Zeeman_NCS_switch==0 && Zeeman_NCO_switch==0)
4981 Hamiltonian_Band_NC(ID, nh, Dummy_ImNL, TmpM, MP, k1, k2, k3);
4982
4983 /* spin-orbit coupling or LDA+U */
4984 else
4985 Hamiltonian_Band_NC(ID, nh, ImNL, TmpM, MP, k1, k2, k3);
4986
4987 if (myid==ID){
4988 for (i1=1; i1<=2*n; i1++){
4989 for (j1=1; j1<=2*n; j1++){
4990 H[i1][j1].r = TmpM[i1][j1].r;
4991 H[i1][j1].i = TmpM[i1][j1].i;
4992 }
4993 }
4994 }
4995 }
4996 }
4997
4998 kloop = arpo[myid];
4999
5000 if (0<=kloop){
5001
5002 /****************************************************
5003 M1 * U^t * H * U * M1
5004 ****************************************************/
5005
5006 /* transpose S */
5007
5008 for (i1=1; i1<=n; i1++){
5009 for (j1=i1+1; j1<=n; j1++){
5010 Ctmp1 = S[i1][j1];
5011 Ctmp2 = S[j1][i1];
5012 S[i1][j1] = Ctmp2;
5013 S[j1][i1] = Ctmp1;
5014 }
5015 }
5016
5017 /* H * U * M1 */
5018
5019 #pragma omp parallel shared(C,S,H,n) private(OMPID,Nthrds,Nprocs,i1,jj1,j1,sum_r0,sum_i0,sum_r1,sum_i1,l)
5020 {
5021 /* get info. on OpenMP */
5022
5023 OMPID = omp_get_thread_num();
5024 Nthrds = omp_get_num_threads();
5025 Nprocs = omp_get_num_procs();
5026
5027 for (i1=1+OMPID; i1<=2*n; i1+=Nthrds){
5028
5029 jj1 = 1;
5030 for (j1=1; j1<=n; j1++){
5031
5032 sum_r0 = 0.0;
5033 sum_i0 = 0.0;
5034
5035 sum_r1 = 0.0;
5036 sum_i1 = 0.0;
5037
5038 for (l=1; l<=n; l++){
5039
5040 sum_r0 += H[i1][l ].r*S[j1][l].r - H[i1][l ].i*S[j1][l].i;
5041 sum_i0 += H[i1][l ].r*S[j1][l].i + H[i1][l ].i*S[j1][l].r;
5042
5043 sum_r1 += H[i1][l+n].r*S[j1][l].r - H[i1][l+n].i*S[j1][l].i;
5044 sum_i1 += H[i1][l+n].r*S[j1][l].i + H[i1][l+n].i*S[j1][l].r;
5045 }
5046
5047 C[jj1][i1].r = sum_r0;
5048 C[jj1][i1].i = sum_i0; jj1++;
5049
5050 C[jj1][i1].r = sum_r1;
5051 C[jj1][i1].i = sum_i1; jj1++;
5052
5053 }
5054 }
5055
5056 } /* #pragma omp parallel */
5057
5058 /* M1 * U^+ H * U * M1 */
5059
5060 #pragma omp parallel shared(C,S,H,n) private(OMPID,Nthrds,Nprocs,i1,ii1,j1,sum_r0,sum_i0,sum_r1,sum_i1,l)
5061 {
5062 /* get info. on OpenMP */
5063
5064 OMPID = omp_get_thread_num();
5065 Nthrds = omp_get_num_threads();
5066 Nprocs = omp_get_num_procs();
5067
5068 for (j1=1+OMPID; j1<=2*n; j1+=Nthrds){
5069
5070 ii1 = 1;
5071 for (i1=1; i1<=n; i1++){
5072
5073 sum_r0 = 0.0;
5074 sum_i0 = 0.0;
5075
5076 sum_r1 = 0.0;
5077 sum_i1 = 0.0;
5078
5079 for (l=1; l<=n; l++){
5080
5081 sum_r0 += S[i1][l].r*C[j1][l ].r + S[i1][l].i*C[j1][l ].i;
5082 sum_i0 += S[i1][l].r*C[j1][l ].i - S[i1][l].i*C[j1][l ].r;
5083
5084 sum_r1 += S[i1][l].r*C[j1][l+n].r + S[i1][l].i*C[j1][l+n].i;
5085 sum_i1 += S[i1][l].r*C[j1][l+n].i - S[i1][l].i*C[j1][l+n].r;
5086 }
5087
5088 H[ii1][j1].r = sum_r0;
5089 H[ii1][j1].i = sum_i0; ii1++;
5090
5091 H[ii1][j1].r = sum_r1;
5092 H[ii1][j1].i = sum_i1; ii1++;
5093
5094 }
5095 }
5096
5097 } /* #pragma omp parallel */
5098
5099 /* H to C */
5100
5101 for (i1=1; i1<=2*n; i1++){
5102 for (j1=1; j1<=2*n; j1++){
5103 C[i1][j1].r = H[i1][j1].r;
5104 C[i1][j1].i = H[i1][j1].i;
5105 }
5106 }
5107
5108 /* penalty for ill-conditioning states */
5109
5110 EV_cut0 = Threshold_OLP_Eigen;
5111
5112 for (i1=1; i1<=n; i1++){
5113
5114 if (koS[i1]<EV_cut0){
5115 C[2*i1-1][2*i1-1].r += pow((koS[i1]/EV_cut0),-2.0) - 1.0;
5116 C[2*i1 ][2*i1 ].r += pow((koS[i1]/EV_cut0),-2.0) - 1.0;
5117 }
5118
5119 /* cutoff the interaction between the ill-conditioned state */
5120
5121 if (1.0e+3<C[2*i1-1][2*i1-1].r){
5122 for (j1=1; j1<=2*n; j1++){
5123 C[2*i1-1][j1 ] = Complex(0.0,0.0);
5124 C[j1 ][2*i1-1] = Complex(0.0,0.0);
5125 C[2*i1 ][j1 ] = Complex(0.0,0.0);
5126 C[j1 ][2*i1 ] = Complex(0.0,0.0);
5127 }
5128 C[2*i1-1][2*i1-1] = Complex(1.0e+4,0.0);
5129 C[2*i1 ][2*i1 ] = Complex(1.0e+4,0.0);
5130 }
5131 }
5132
5133 /* solve eigenvalue problem */
5134
5135 n1 = 2*n;
5136 EigenBand_lapack(C,ko,n1,n1,1);
5137
5138 for (i1=1; i1<=2*n; i1++) EIGEN[kloop][i1] = ko[i1];
5139
5140 /****************************************************
5141 Transformation to the original eigenvectors.
5142 NOTE JRCAT-244p and JAIST-2122p
5143 C = U * lambda^{-1/2} * D
5144 ****************************************************/
5145
5146 /* transpose */
5147 for (i1=1; i1<=2*n; i1++){
5148 for (j1=i1+1; j1<=2*n; j1++){
5149 Ctmp1 = C[i1][j1];
5150 Ctmp2 = C[j1][i1];
5151 C[i1][j1] = Ctmp2;
5152 C[j1][i1] = Ctmp1;
5153 }
5154 }
5155
5156 /* transpose */
5157 for (i1=1; i1<=n; i1++){
5158 for (j1=i1+1; j1<=n; j1++){
5159 Ctmp1 = S[i1][j1];
5160 Ctmp2 = S[j1][i1];
5161 S[i1][j1] = Ctmp2;
5162 S[j1][i1] = Ctmp1;
5163 }
5164 }
5165
5166 #pragma omp parallel shared(C,S,H,n,n1) private(OMPID,Nthrds,Nprocs,i1,j1,l1,sum_r0,sum_i0,sum_r1,sum_i1,l)
5167 {
5168 /* get info. on OpenMP */
5169
5170 OMPID = omp_get_thread_num();
5171 Nthrds = omp_get_num_threads();
5172 Nprocs = omp_get_num_procs();
5173
5174 for (j1=1+OMPID; j1<=n1; j1+=Nthrds){
5175 for (i1=1; i1<=n; i1++){
5176
5177 sum_r0 = 0.0;
5178 sum_i0 = 0.0;
5179
5180 sum_r1 = 0.0;
5181 sum_i1 = 0.0;
5182
5183 l1 = 1;
5184 for (l=1; l<=n; l++){
5185
5186 sum_r0 += S[i1][l].r*C[j1][l1].r
5187 -S[i1][l].i*C[j1][l1].i;
5188 sum_i0 += S[i1][l].r*C[j1][l1].i
5189 +S[i1][l].i*C[j1][l1].r; l1++;
5190
5191 sum_r1 += S[i1][l].r*C[j1][l1].r
5192 -S[i1][l].i*C[j1][l1].i;
5193 sum_i1 += S[i1][l].r*C[j1][l1].i
5194 +S[i1][l].i*C[j1][l1].r; l1++;
5195 }
5196
5197 H[j1][i1 ].r = sum_r0;
5198 H[j1][i1 ].i = sum_i0;
5199
5200 H[j1][i1+n].r = sum_r1;
5201 H[j1][i1+n].i = sum_i1;
5202 }
5203 }
5204
5205 } /* #pragma omp parallel */
5206
5207 } /* if (0<=kloop) */
5208
5209 /****************************************************
5210 store LDOS
5211 ****************************************************/
5212
5213 kloop = arpo[myid];
5214
5215 if (0<=kloop){
5216
5217 k1 = T_KGrids1[kloop];
5218 k2 = T_KGrids2[kloop];
5219 k3 = T_KGrids3[kloop];
5220
5221 i = Ti_KGrids1[kloop];
5222 j = Tj_KGrids2[kloop];
5223 k = Tk_KGrids3[kloop];
5224
5225 for (l=iemin; l<=iemax; l++){
5226
5227 /* calculate SD */
5228
5229 #pragma omp parallel shared(TmpS,SD,H,l,MP,Spe_Total_CNO,WhatSpecies,atomnum) private(OMPID,Nthrds,Nprocs,GA_AN,wanA,tnoA,Anum,theta,phi,sit,cot,sip,cop,d0,d1,d2,d3,GB_AN,wanB,tnoB,Bnum,i1,j1,u2,v2,uv,vu,Redum,Imdum,tmp1,tmp2,tmp3)
5230 {
5231
5232 /* get info. on OpenMP */
5233
5234 OMPID = omp_get_thread_num();
5235 Nthrds = omp_get_num_threads();
5236 Nprocs = omp_get_num_procs();
5237
5238 for (GA_AN=1+OMPID; GA_AN<=atomnum; GA_AN+=Nthrds){
5239
5240 wanA = WhatSpecies[GA_AN];
5241 tnoA = Spe_Total_CNO[wanA];
5242 Anum = MP[GA_AN];
5243 theta = Angle0_Spin[GA_AN];
5244 phi = Angle1_Spin[GA_AN];
5245 sit = sin(theta);
5246 cot = cos(theta);
5247 sip = sin(phi);
5248 cop = cos(phi);
5249
5250 for (i1=0; i1<tnoA; i1++){
5251
5252 d0 = 0.0;
5253 d1 = 0.0;
5254 d2 = 0.0;
5255 d3 = 0.0;
5256
5257 for (GB_AN=1; GB_AN<=atomnum; GB_AN++){
5258
5259 wanB = WhatSpecies[GB_AN];
5260 tnoB = Spe_Total_CNO[wanB];
5261 Bnum = MP[GB_AN];
5262
5263 for (j1=0; j1<tnoB; j1++){
5264
5265 /* Re11 */
5266 u2 = H[l][Anum+i1].r*H[l][Bnum+j1].r;
5267 v2 = H[l][Anum+i1].i*H[l][Bnum+j1].i;
5268 uv = H[l][Anum+i1].r*H[l][Bnum+j1].i;
5269 vu = H[l][Anum+i1].i*H[l][Bnum+j1].r;
5270 Redum = (u2 + v2);
5271 Imdum = (uv - vu);
5272 d0 += Redum*TmpS[Anum+i1][Bnum+j1].r - Imdum*TmpS[Anum+i1][Bnum+j1].i;
5273
5274 /* Re22 */
5275 u2 = H[l][Anum+i1+n].r*H[l][Bnum+j1+n].r;
5276 v2 = H[l][Anum+i1+n].i*H[l][Bnum+j1+n].i;
5277 uv = H[l][Anum+i1+n].r*H[l][Bnum+j1+n].i;
5278 vu = H[l][Anum+i1+n].i*H[l][Bnum+j1+n].r;
5279 Redum = (u2 + v2);
5280 Imdum = (uv - vu);
5281 d1 += Redum*TmpS[Anum+i1][Bnum+j1].r - Imdum*TmpS[Anum+i1][Bnum+j1].i;
5282
5283 /* Re12 */
5284 u2 = H[l][Anum+i1].r*H[l][Bnum+j1+n].r;
5285 v2 = H[l][Anum+i1].i*H[l][Bnum+j1+n].i;
5286 uv = H[l][Anum+i1].r*H[l][Bnum+j1+n].i;
5287 vu = H[l][Anum+i1].i*H[l][Bnum+j1+n].r;
5288 Redum = (u2 + v2);
5289 Imdum = (uv - vu);
5290 d2 += Redum*TmpS[Anum+i1][Bnum+j1].r - Imdum*TmpS[Anum+i1][Bnum+j1].i;
5291
5292 /* Im12
5293 conjugate complex of Im12 due to difference in the definition
5294 between density matrix and charge density
5295 */
5296
5297 d3 += -(Imdum*TmpS[Anum+i1][Bnum+j1].r + Redum*TmpS[Anum+i1][Bnum+j1].i);
5298
5299 } /* j1 */
5300 } /* GB_AN */
5301
5302 tmp1 = 0.5*(d0 + d1);
5303 tmp2 = 0.5*cot*(d0 - d1);
5304 tmp3 = (d2*cop - d3*sip)*sit;
5305
5306 SD[2*(Anum-1)+i1] = (float)(tmp1 + tmp2 + tmp3);
5307 SD[2*(Anum-1)+tnoA+i1] = (float)(tmp1 - tmp2 - tmp3);
5308
5309 } /* i1 */
5310 } /* GA_AN */
5311
5312 } /* #pragma omp parallel */
5313
5314 /*********************************************
5315 writing a binary file
5316 *********************************************/
5317
5318 i_vec[0] = i;
5319 i_vec[1] = j;
5320 i_vec[2] = k;
5321
5322 fwrite(i_vec,sizeof(int),3,fp_ev);
5323 fwrite(&SD[0],sizeof(float),2*n,fp_ev);
5324
5325 } /* l */
5326 } /* if (kloop<=0) */
5327
5328 /*********************************************
5329 calculation of partial density matrix
5330 *********************************************/
5331
5332 kloop = arpo[myid];
5333
5334 if (0<=kloop && cal_partial_charge) {
5335
5336 k1 = T_KGrids1[kloop];
5337 k2 = T_KGrids2[kloop];
5338 k3 = T_KGrids3[kloop];
5339
5340 for (l=iemin; l<=iemax; l++){
5341
5342 x1 = (ko[l] - ChemP)*Beta;
5343 if (x1<=-max_x) x1 = -max_x;
5344 if (max_x<=x1) x1 = max_x;
5345 FermiF1 = 1.0/(1.0 + exp(x1));
5346
5347 x2 = (ko[l] - (ChemP+ene_win_partial_charge))*Beta;
5348 if (x2<=-max_x) x2 = -max_x;
5349 if (max_x<=x2) x2 = max_x;
5350 FermiF2 = 1.0/(1.0 + exp(x2));
5351
5352 diffF = fabs(FermiF1-FermiF2);
5353
5354 for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
5355
5356 wanA = WhatSpecies[GA_AN];
5357 tnoA = Spe_Total_CNO[wanA];
5358 Anum = MP[GA_AN];
5359
5360 for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
5361 GB_AN = natn[GA_AN][LB_AN];
5362 Rn = ncn[GA_AN][LB_AN];
5363 wanB = WhatSpecies[GB_AN];
5364 tnoB = Spe_Total_CNO[wanB];
5365 Bnum = MP[GB_AN];
5366
5367 l1 = atv_ijk[Rn][1];
5368 l2 = atv_ijk[Rn][2];
5369 l3 = atv_ijk[Rn][3];
5370 kRn = k1*(double)l1 + k2*(double)l2 + k3*(double)l3;
5371
5372 si = sin(2.0*PI*kRn);
5373 co = cos(2.0*PI*kRn);
5374
5375 for (i=0; i<tnoA; i++){
5376 for (j=0; j<tnoB; j++){
5377
5378 /* Re11 */
5379 u2 = H[l][Anum+i].r*H[l][Bnum+j].r;
5380 v2 = H[l][Anum+i].i*H[l][Bnum+j].i;
5381 uv = H[l][Anum+i].r*H[l][Bnum+j].i;
5382 vu = H[l][Anum+i].i*H[l][Bnum+j].r;
5383 tmp = co*(u2+v2) - si*(uv-vu);
5384 PDM[0][GA_AN][LB_AN][i][j] += diffF*tmp;
5385
5386 /* Re22 */
5387 u2 = H[l][Anum+i+n].r*H[l][Bnum+j+n].r;
5388 v2 = H[l][Anum+i+n].i*H[l][Bnum+j+n].i;
5389 uv = H[l][Anum+i+n].r*H[l][Bnum+j+n].i;
5390 vu = H[l][Anum+i+n].i*H[l][Bnum+j+n].r;
5391 tmp = co*(u2+v2) - si*(uv-vu);
5392 PDM[1][GA_AN][LB_AN][i][j] += diffF*tmp;
5393
5394 }
5395 }
5396 }
5397 }
5398 }
5399 }
5400
5401 /****************************************************
5402 MPI: EIGEN
5403 ****************************************************/
5404
5405 for (ID=0; ID<numprocs; ID++){
5406
5407 kloop = arpo[ID];
5408
5409 if (0<=kloop){
5410 MPI_Bcast(&EIGEN[kloop][0], 2*n+1, MPI_DOUBLE, ID, mpi_comm_level1);
5411 }
5412 }
5413
5414 } /* kloop0 */
5415
5416 /****************************************************
5417 MPI:
5418
5419 PDM
5420 ****************************************************/
5421
5422 if (cal_partial_charge) {
5423
5424 /* MPI: PDM */
5425
5426 for (spin=0; spin<=1; spin++){
5427 for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
5428
5429 wanA = WhatSpecies[GA_AN];
5430 tnoA = Spe_Total_CNO[wanA];
5431
5432 for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
5433
5434 GB_AN = natn[GA_AN][LB_AN];
5435 wanB = WhatSpecies[GB_AN];
5436 tnoB = Spe_Total_CNO[wanB];
5437
5438 for (i=0; i<tnoA; i++){
5439
5440 MPI_Allreduce(&PDM[spin][GA_AN][LB_AN][i][0],
5441 &TPDM[spin][GA_AN][LB_AN][i][0],
5442 tnoB, MPI_DOUBLE, MPI_SUM,
5443 mpi_comm_level1);
5444
5445 for (j=0; j<tnoB; j++){
5446 TPDM[spin][GA_AN][LB_AN][i][j] /= (double)T_knum;
5447 }
5448 }
5449 }
5450 }
5451 }
5452
5453 /* store TPDM to Partial_DM */
5454
5455 for (spin=0; spin<=1; spin++){
5456 for (GA_AN=1; GA_AN<=atomnum; GA_AN++){
5457
5458 MA_AN = F_G2M[GA_AN];
5459 wanA = WhatSpecies[GA_AN];
5460 tnoA = Spe_Total_CNO[wanA];
5461
5462 if (1<=MA_AN && MA_AN<=Matomnum){
5463
5464 for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
5465
5466 GB_AN = natn[GA_AN][LB_AN];
5467 wanB = WhatSpecies[GB_AN];
5468 tnoB = Spe_Total_CNO[wanB];
5469
5470 for (i=0; i<tnoA; i++){
5471 for (j=0; j<tnoB; j++){
5472 Partial_DM[spin][MA_AN][LB_AN][i][j] = TPDM[spin][GA_AN][LB_AN][i][j];
5473 }
5474 }
5475 }
5476 }
5477 }
5478 }
5479 }
5480
5481 /****************************************************
5482 write: EIGEN
5483 ****************************************************/
5484
5485 if (myid==Host_ID){
5486 if (fp_eig) {
5487
5488 fprintf(fp_eig,"irange %d %d\n",iemin,iemax);
5489 fprintf(fp_eig,"<Eigenvalues\n");
5490
5491 for (kloop=0; kloop<T_knum; kloop++){
5492 for (spin=0; spin<=1; spin++){
5493
5494 i = Ti_KGrids1[kloop];
5495 j = Tj_KGrids2[kloop];
5496 k = Tk_KGrids3[kloop];
5497
5498 fprintf(fp_eig,"%d %d %d ",i,j,k);
5499 for (ie=iemin; ie<=iemax; ie++) {
5500 fprintf(fp_eig,"%lf ",EIGEN[kloop][ie]);
5501 }
5502 fprintf(fp_eig,"\n");
5503
5504 }
5505 }
5506
5507 fprintf(fp_eig,"Eigenvalues>\n");
5508
5509 } /* fp_eig */
5510 } /* if (myid==Host_ID) */
5511
5512 Finishing:
5513
5514 if (myid==Host_ID){
5515 if (fp_eig) {
5516 fclose(fp_eig);
5517 }
5518 }
5519
5520 if (fp_ev) {
5521 fclose(fp_ev);
5522 }
5523
5524 /****************************************************
5525 merge *.Dos.vec#
5526 ****************************************************/
5527
5528 MPI_Barrier(mpi_comm_level1);
5529
5530 if (myid==Host_ID){
5531
5532 sprintf(file_ev0,"%s%s.Dos.vec",filepath,filename);
5533 if ( (fp_ev0=fopen(file_ev0,"w"))==NULL ) {
5534 printf("cannot open a file %s\n",file_ev0);
5535 }
5536
5537 for (ID=0; ID<numprocs; ID++){
5538
5539 sprintf(file_ev,"%s%s.Dos.vec%d",filepath,filename,ID);
5540
5541 if ( 1<=num_allocated_k[ID] ){
5542
5543 if ( (fp_ev=fopen(file_ev,"r"))==NULL ) {
5544 printf("cannot open a file %s\n",file_ev);
5545 }
5546
5547 for (k=0; k<num_allocated_k[ID]; k++){
5548 for (l=iemin; l<=iemax; l++){
5549 fread( i_vec, sizeof(int), 3,fp_ev);
5550 fwrite(i_vec, sizeof(int), 3,fp_ev0);
5551 fread( &SD[0], sizeof(float),2*n,fp_ev);
5552 fwrite(&SD[0], sizeof(float),2*n,fp_ev0);
5553 }
5554 }
5555
5556 fclose(fp_ev);
5557 }
5558 }
5559
5560 fclose(fp_ev0);
5561 }
5562
5563 MPI_Barrier(mpi_comm_level1);
5564
5565 /* delete files */
5566 if (myid==Host_ID){
5567 for (ID=0; ID<numprocs; ID++){
5568 sprintf(file_ev,"%s%s.Dos.vec%d",filepath,filename,ID);
5569 remove(file_ev);
5570 }
5571 }
5572
5573 /****************************************************
5574 Fermi surface viewable by XCrysDen
5575 ****************************************************/
5576
5577 if (myid==Host_ID){
5578
5579 int ii,jj,kk;
5580 int ***TD2OD;
5581 double x0,y0,z0;
5582 char file_fermi[YOUSO10];
5583 FILE *fp;
5584
5585 /* allocation of TD2OD */
5586
5587 TD2OD = (int***)malloc(sizeof(int**)*knum_i);
5588 for (i=0; i<knum_i; i++){
5589 TD2OD[i] = (int**)malloc(sizeof(int*)*knum_j);
5590 for (j=0; j<knum_j; j++){
5591 TD2OD[i][j] = (int*)malloc(sizeof(int)*knum_k);
5592 for (k=0; k<knum_k; k++) TD2OD[i][j][k] = 0;
5593 }
5594 }
5595
5596 kloop = 0;
5597 for (i=0; i<knum_i; i++){
5598 for (j=0; j<knum_j; j++){
5599 for (k=0; k<knum_k; k++){
5600 TD2OD[i][j][k] = kloop;
5601 kloop++;
5602 }
5603 }
5604 }
5605
5606 /* make bxsf files */
5607
5608 sprintf(file_fermi,"%s%s.FermiSurf.bxsf",filepath,filename);
5609
5610 if ((fp=fopen(file_fermi,"w")) != NULL){
5611
5612 fprintf(fp,"BEGIN_INFO\n");
5613 fprintf(fp,"Fermi Energy: %15.12f\n",ChemP);
5614 fprintf(fp,"END_INFO\n\n");
5615
5616 fprintf(fp,"BEGIN_BLOCK_BANDGRID_3D\n");
5617 fprintf(fp,"comment_line\n");
5618 fprintf(fp,"BEGIN_BANDGRID_3D_simple_example\n");
5619 fprintf(fp,"%d\n",2*n);
5620 fprintf(fp,"%d %d %d\n",knum_i+1,knum_j+1,knum_k+1);
5621
5622 fprintf(fp,"0.0 0.0 0.0\n");
5623 fprintf(fp,"%15.10f %15.10f %15.10f\n",rtv[1][1],rtv[1][2],rtv[1][3]);
5624 fprintf(fp,"%15.10f %15.10f %15.10f\n",rtv[2][1],rtv[2][2],rtv[2][3]);
5625 fprintf(fp,"%15.10f %15.10f %15.10f\n",rtv[3][1],rtv[3][2],rtv[3][3]);
5626
5627 for (i1=1; i1<=2*n; i1++){
5628
5629 fprintf(fp,"BAND: %d\n",i1);
5630
5631 for (i=0; i<=knum_i; i++){
5632 ii = i%knum_i;
5633 for (j=0; j<=knum_j; j++){
5634 jj = j%knum_j;
5635 for (k=0; k<=knum_k; k++){
5636 kk = k%knum_k;
5637 kloop = TD2OD[ii][jj][kk];
5638 fprintf(fp,"%10.5f ",EIGEN[kloop][i1]);
5639 }
5640 fprintf(fp,"\n");
5641 }
5642
5643 if (i!=knum_i) fprintf(fp,"\n");
5644
5645 }
5646 }
5647
5648 fprintf(fp,"END_BANDGRID_3D\n");
5649 fprintf(fp,"END_BLOCK_BANDGRID_3D\n");
5650
5651 fclose(fp);
5652 }
5653 else{
5654 printf("Failure in saving bxsf files.\n");
5655 }
5656
5657 /* free TD2OD */
5658 for (i=0; i<knum_i; i++){
5659 for (j=0; j<knum_j; j++){
5660 free(TD2OD[i][j]);
5661 }
5662 free(TD2OD[i]);
5663 }
5664 free(TD2OD);
5665
5666 } /* if (myid==Host_ID) */
5667
5668 /****************************************************
5669 free arrays
5670 ****************************************************/
5671
5672 free(MP);
5673 free(arpo);
5674 free(num_allocated_k);
5675
5676 free(ko);
5677 free(koS);
5678
5679 for (j=0; j<n2; j++){
5680 free(H[j]);
5681 }
5682 free(H);
5683
5684 free(SD);
5685
5686 for (i=0; i<(n+1); i++){
5687 free(TmpS[i]);
5688 }
5689 free(TmpS);
5690
5691 free(M1);
5692
5693 for (j=0; j<n2; j++){
5694 free(C[j]);
5695 }
5696 free(C);
5697
5698 free(KGrids1); free(KGrids2);free(KGrids3);
5699
5700 for (j=0; j<n2; j++){
5701 free(TmpM[j]);
5702 }
5703 free(TmpM);
5704
5705 /* partial density matrix */
5706
5707 if (cal_partial_charge){
5708
5709 /* PDM */
5710
5711 for (k=0; k<=1; k++){
5712
5713 FNAN[0] = 0;
5714
5715 for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
5716
5717 if (Gc_AN==0){
5718 tno0 = 1;
5719 }
5720 else{
5721 Cwan = WhatSpecies[Gc_AN];
5722 tno0 = Spe_Total_NO[Cwan];
5723 }
5724
5725 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
5726
5727 if (Gc_AN==0){
5728 tno1 = 1;
5729 }
5730 else{
5731 Gh_AN = natn[Gc_AN][h_AN];
5732 Hwan = WhatSpecies[Gh_AN];
5733 tno1 = Spe_Total_NO[Hwan];
5734 }
5735
5736 for (i=0; i<tno0; i++){
5737 free(PDM[k][Gc_AN][h_AN][i]);
5738 }
5739 free(PDM[k][Gc_AN][h_AN]);
5740 }
5741 free(PDM[k][Gc_AN]);
5742 }
5743 free(PDM[k]);
5744 }
5745 free(PDM);
5746
5747 /* TPDM */
5748
5749 for (k=0; k<=1; k++){
5750
5751 FNAN[0] = 0;
5752
5753 for (Gc_AN=0; Gc_AN<=atomnum; Gc_AN++){
5754
5755 if (Gc_AN==0){
5756 tno0 = 1;
5757 }
5758 else{
5759 Cwan = WhatSpecies[Gc_AN];
5760 tno0 = Spe_Total_NO[Cwan];
5761 }
5762
5763 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
5764
5765 if (Gc_AN==0){
5766 tno1 = 1;
5767 }
5768 else{
5769 Gh_AN = natn[Gc_AN][h_AN];
5770 Hwan = WhatSpecies[Gh_AN];
5771 tno1 = Spe_Total_NO[Hwan];
5772 }
5773
5774 for (i=0; i<tno0; i++){
5775 free(TPDM[k][Gc_AN][h_AN][i]);
5776 }
5777 free(TPDM[k][Gc_AN][h_AN]);
5778 }
5779 free(TPDM[k][Gc_AN]);
5780 }
5781 free(TPDM[k]);
5782 }
5783 free(TPDM);
5784 }
5785
5786 /* non-spin-orbit coupling and non-LDA+U */
5787 if (SO_switch==0 && Hub_U_switch==0 && Constraint_NCS_switch==0
5788 && Zeeman_NCS_switch==0 && Zeeman_NCO_switch==0){
5789 free(Dummy_ImNL[0][0][0][0]);
5790 free(Dummy_ImNL[0][0][0]);
5791 free(Dummy_ImNL[0][0]);
5792 free(Dummy_ImNL[0]);
5793 free(Dummy_ImNL);
5794 }
5795
5796 free(T_KGrids1);
5797 free(T_KGrids2);
5798 free(T_KGrids3);
5799
5800 free(Ti_KGrids1);
5801 free(Tj_KGrids2);
5802 free(Tk_KGrids3);
5803
5804 for (j=0; j<T_knum; j++){
5805 free(EIGEN[j]);
5806 }
5807 free(EIGEN);
5808
5809 /* for elapsed time */
5810
5811 dtime(&TEtime);
5812 time0 = TEtime - TStime;
5813 return time0;
5814 }
5815