1 /**********************************************************************
2   Unfolding_Bands.c
3 
4      Unfolding_Bands.c is a subroutine to calculate unfolded weight
5      at given k-points for the file output.
6 
7   Log of Band_Unfolding.c:
8 
9       6/Jan/2016  Released by Chi-Cheng Lee
10 
11 ***********************************************************************/
12 
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <math.h>
17 #include <time.h>
18 #include "openmx_common.h"
19 #include "mpi.h"
20 #include <omp.h>
21 
22 static int NR;
23 static int Norb;
24 static int nr,natom;
25 static int* Norbperatom;
26 static int*** _MapR;
27 static int** Rlist;
28 static int** rlist;
29 static double***** Elem;
30 static int*** tabr4RN;
31 static int*** rnmap;
32 static int exitcode;
33 static int totnkpts;
34 static int* np;
35 
36 static void determine_kpts(const int nk, double** klist);
37 static double volume(const double* a,const double* b,const double* c);
38 static double dot(const double* v1,const double* v2);
39 static double distwovec(const double* a,const double* b);
40 static void getnorbperatom();
41 static void buildMapRlist();
42 static void buildtabr4RN(const double* a,const double* b,const double* c,double* origin,const int* mapN2n);
43 static void abc_by_ABC(double** S);
44 static void buildrnmap(const int* mapN2n);
45 
46 static void Unfolding_Bands_Col(
47 				int nkpoint, double **kpoint,
48 				int SpinP_switch,
49 				double *****nh,
50 				double ****CntOLP);
51 
52 static void Unfolding_Bands_NonCol(
53 				   int nkpoint, double **kpoint,
54 				   int SpinP_switch,
55 				   double *****nh,
56 				   double *****ImNL,
57 				   double ****CntOLP);
58 
59 
Unfolding_Bands(int nkpoint,double ** kpoint,int SpinP_switch,double ***** nh,double ***** ImNL,double **** CntOLP)60 void Unfolding_Bands( int nkpoint, double **kpoint,
61 		      int SpinP_switch,
62 		      double *****nh,
63 		      double *****ImNL,
64 		      double ****CntOLP)
65 {
66   if (SpinP_switch==0 || SpinP_switch==1){
67     Unfolding_Bands_Col( nkpoint, kpoint, SpinP_switch, nh, CntOLP);
68   }
69   else if (SpinP_switch==3){
70     Unfolding_Bands_NonCol( nkpoint, kpoint, SpinP_switch, nh, ImNL, CntOLP);
71   }
72 }
73 
74 
75 
Unfolding_Bands_Col(int nkpoint,double ** kpoint,int SpinP_switch,double ***** nh,double **** CntOLP)76 static void Unfolding_Bands_Col(
77 				int nkpoint, double **kpoint,
78 				int SpinP_switch,
79 				double *****nh,
80 				double ****CntOLP)
81 {
82   double coe;
83   double* a;
84   double* b;
85   double* c;
86   double* K;
87   double* K2;
88   double* r;
89   double* r0;
90   double* kj_e;
91   int* iskj_e;
92   int countkj_e;
93   double pk1,pk2,pk3;
94   double dis2pk;
95   double kdis;
96   dcomplex** weight;
97   dcomplex*** kj_v;
98   dcomplex**** tmpelem;
99   double **fracabc;
100 
101   int i,j,k,l,n,wan;
102   int *MP,*order_GA,*My_NZeros,*SP_NZeros,*SP_Atoms;
103   int i1,j1,po,spin,n1,size_H1;
104   int num2,RnB,l1,l2,l3,kloop;
105   int ct_AN,h_AN,wanA,tnoA,wanB,tnoB;
106   int GA_AN,Anum;
107   int ii,ij,ik,Rn,AN;
108   int num0,num1,mul,m,wan1,Gc_AN;
109   int LB_AN,GB_AN,Bnum;
110   double time0,tmp,av_num;
111   double snum_i,snum_j,snum_k,k1,k2,k3,sum,sumi,Num_State,FermiF;
112   double x,Dnum,Dnum2,AcP,ChemP_MAX,ChemP_MIN,EV_cut0;
113   double **ko,*M1,**EIGEN;
114   double *koS;
115   double *S1,**H1;
116   dcomplex ***H,**S,***C;
117   dcomplex Ctmp1,Ctmp2;
118   double u2,v2,uv,vu;
119   double dum,sumE,kRn,si,co;
120   double Resum,ResumE,Redum,Redum2,Imdum;
121   double TStime,TEtime,SiloopTime,EiloopTime;
122   double FermiEps = 1.0e-14;
123   double x_cut = 30.0;
124   double OLP_eigen_cut=Threshold_OLP_Eigen;
125   char *Name_Angular[Supported_MaxL+1][2*(Supported_MaxL+1)+1];
126   char *Name_Multiple[20];
127   char file_EV[YOUSO10];
128   FILE *fp_EV0;
129   FILE *fp_EV;
130   FILE *fp_EV1;
131   FILE *fp_EV2;
132   FILE *fp_EV3;
133   char buf[fp_bsize];          /* setvbuf */
134   int numprocs,myid,ID;
135   int *is1,*ie1;
136   MPI_Status *stat_send;
137   MPI_Request *request_send;
138   MPI_Request *request_recv;
139 
140   /* MPI */
141   MPI_Comm_size(mpi_comm_level1,&numprocs);
142   MPI_Comm_rank(mpi_comm_level1,&myid);
143   MPI_Barrier(mpi_comm_level1);
144 
145   if (myid==Host_ID && 0<level_stdout) {
146     printf("\n*******************************************************\n");
147     printf("                 Unfolding of Bands \n");
148     printf("*******************************************************\n\n");fflush(stdout);
149   }
150   dtime(&TStime);
151 
152   /****************************************************
153                   allocation of arrays
154   ****************************************************/
155 
156   getnorbperatom();
157   exitcode=0;
158   buildMapRlist();
159   if (exitcode==1) {
160     for (i=0; i<3; i++) free(unfold_abc[i]); free(unfold_abc);
161     free(unfold_origin);
162     free(unfold_mapN2n);
163     for (i=0; i<unfold_Nkpoint+1; i++) free(unfold_kpoint[i]); free(unfold_kpoint);
164     return;
165   }
166 
167   a = (double*)malloc(sizeof(double)*3);
168   b = (double*)malloc(sizeof(double)*3);
169   c = (double*)malloc(sizeof(double)*3);
170   a[0]=unfold_abc[0][0];
171   a[1]=unfold_abc[0][1];
172   a[2]=unfold_abc[0][2];
173   b[0]=unfold_abc[1][0];
174   b[1]=unfold_abc[1][1];
175   b[2]=unfold_abc[1][2];
176   c[0]=unfold_abc[2][0];
177   c[1]=unfold_abc[2][1];
178   c[2]=unfold_abc[2][2];
179   buildtabr4RN(a,b,c,unfold_origin,unfold_mapN2n);
180   if (exitcode==1) {
181     for (i=0; i<3; i++) free(unfold_abc[i]); free(unfold_abc);
182     free(unfold_origin);
183     free(unfold_mapN2n);
184     for (i=0; i<unfold_Nkpoint+1; i++) free(unfold_kpoint[i]); free(unfold_kpoint);
185     free(a);
186     free(b);
187     free(c);
188     return;
189   }
190 
191   if (myid==Host_ID && 1<level_stdout) {
192     printf("Reference origin is set to (%f %f %f) (Bohr)\n",unfold_origin[0],unfold_origin[1],unfold_origin[2]);
193     printf("Supercell_lattice_vector atom Reference_lattice_vector atom\n");
194     for (i=0; i<NR; i++) for (j=0; j<atomnum; j++)
195       printf("(%i %i %i) %i <==> (%i %i %i) %i\n",Rlist[i][0],Rlist[i][1],Rlist[i][2],j+1,tabr4RN[i][j][0],tabr4RN[i][j][1],tabr4RN[i][j][2],unfold_mapN2n[j]);
196     printf("\n");fflush(stdout);
197   }
198 
199   coe=Cell_Volume/volume(a,b,c);
200   fracabc=(double**)malloc(sizeof(double*)*3);
201   for (i=0; i<3; i++) fracabc[i]=(double*)malloc(sizeof(double)*3);
202   abc_by_ABC(fracabc);
203   determine_kpts(nkpoint,kpoint);
204 
205   MP = (int*)malloc(sizeof(int)*List_YOUSO[1]);
206   order_GA = (int*)malloc(sizeof(int)*(List_YOUSO[1]+1));
207   My_NZeros = (int*)malloc(sizeof(int)*numprocs);
208   SP_NZeros = (int*)malloc(sizeof(int)*numprocs);
209   SP_Atoms = (int*)malloc(sizeof(int)*numprocs);
210 
211   n = 0;
212   for (i=1; i<=atomnum; i++){
213     wanA  = WhatSpecies[i];
214     n  = n + Spe_Total_CNO[wanA];
215   }
216 
217   ko = (double**)malloc(sizeof(double*)*List_YOUSO[23]);
218   for (i=0; i<List_YOUSO[23]; i++){
219     ko[i] = (double*)malloc(sizeof(double)*(n+1));
220   }
221 
222   koS = (double*)malloc(sizeof(double)*(n+1));
223 
224   EIGEN = (double**)malloc(sizeof(double*)*List_YOUSO[23]);
225   for (j=0; j<List_YOUSO[23]; j++){
226     EIGEN[j] = (double*)malloc(sizeof(double)*(n+1));
227   }
228 
229   H = (dcomplex***)malloc(sizeof(dcomplex**)*List_YOUSO[23]);
230   for (i=0; i<List_YOUSO[23]; i++){
231     H[i] = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
232     for (j=0; j<n+1; j++){
233       H[i][j] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
234     }
235   }
236 
237   S = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
238   for (i=0; i<n+1; i++){
239     S[i] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
240   }
241 
242   M1 = (double*)malloc(sizeof(double)*(n+1));
243 
244   C = (dcomplex***)malloc(sizeof(dcomplex**)*List_YOUSO[23]);
245   for (i=0; i<List_YOUSO[23]; i++){
246     C[i] = (dcomplex**)malloc(sizeof(dcomplex*)*(n+1));
247     for (j=0; j<n+1; j++){
248       C[i][j] = (dcomplex*)malloc(sizeof(dcomplex)*(n+1));
249     }
250   }
251 
252   /*****************************************************
253         allocation of arrays for parallelization
254   *****************************************************/
255 
256   stat_send = malloc(sizeof(MPI_Status)*numprocs);
257   request_send = malloc(sizeof(MPI_Request)*numprocs);
258   request_recv = malloc(sizeof(MPI_Request)*numprocs);
259 
260   is1 = (int*)malloc(sizeof(int)*numprocs);
261   ie1 = (int*)malloc(sizeof(int)*numprocs);
262 
263   if ( numprocs<=n ){
264 
265     av_num = (double)n/(double)numprocs;
266 
267     for (ID=0; ID<numprocs; ID++){
268       is1[ID] = (int)(av_num*(double)ID) + 1;
269       ie1[ID] = (int)(av_num*(double)(ID+1));
270     }
271 
272     is1[0] = 1;
273     ie1[numprocs-1] = n;
274 
275   }
276 
277   else{
278 
279     for (ID=0; ID<n; ID++){
280       is1[ID] = ID + 1;
281       ie1[ID] = ID + 1;
282     }
283     for (ID=n; ID<numprocs; ID++){
284       is1[ID] =  1;
285       ie1[ID] = -2;
286     }
287   }
288 
289   /* find size_H1 */
290   size_H1 = Get_OneD_HS_Col(0, CntOLP, &tmp, MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
291 
292   /* allocation of S1 and H1 */
293   S1 = (double*)malloc(sizeof(double)*size_H1);
294   H1 = (double**)malloc(sizeof(double*)*(SpinP_switch+1));
295   for (spin=0; spin<(SpinP_switch+1); spin++){
296     H1[spin] = (double*)malloc(sizeof(double)*size_H1);
297   }
298 
299   /* Get S1 */
300   size_H1 = Get_OneD_HS_Col(1, CntOLP, S1, MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
301 
302   if (SpinP_switch==0){
303     size_H1 = Get_OneD_HS_Col(1, nh[0], H1[0],  MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
304   }
305   else {
306     size_H1 = Get_OneD_HS_Col(1, nh[0], H1[0],  MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
307     size_H1 = Get_OneD_HS_Col(1, nh[1], H1[1],  MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
308   }
309 
310   dtime(&SiloopTime);
311 
312   /*****************************************************
313          Solve eigenvalue problem at each k-point
314   *****************************************************/
315 
316   kj_e=(double*)malloc(sizeof(double)*4);
317   iskj_e=(int*)malloc(sizeof(int)*4);
318   K=(double*)malloc(sizeof(double)*3);
319   K2=(double*)malloc(sizeof(double)*3);
320   r=(double*)malloc(sizeof(double)*3);
321   r0=(double*)malloc(sizeof(double)*3);
322   weight=(dcomplex**)malloc(sizeof(dcomplex*)*atomnum);
323   for (i=0; i<atomnum; i++) weight[i]=(dcomplex*)malloc(sizeof(dcomplex)*Norbperatom[i]);
324   kj_v=(dcomplex***)malloc(sizeof(dcomplex**)*4);
325   for (i=0; i<4; i++) {
326     kj_v[i]=(dcomplex**)malloc(sizeof(dcomplex*)*atomnum);
327     for (j=0; j<atomnum; j++) kj_v[i][j]=(dcomplex*)malloc(sizeof(dcomplex)*Norbperatom[j]);
328   }
329   tmpelem=(dcomplex****)malloc(sizeof(dcomplex***)*atomnum);
330   for (i=0; i<atomnum; i++) tmpelem[i]=(dcomplex***)malloc(sizeof(dcomplex**)*atomnum);
331   for (i=0; i<atomnum; i++) for (j=0; j<atomnum; j++) {
332     tmpelem[i][j]=(dcomplex**)malloc(sizeof(dcomplex*)*Norbperatom[i]);
333     for (k=0; k<Norbperatom[i]; k++) {
334       tmpelem[i][j][k]=(dcomplex*)malloc(sizeof(dcomplex)*Norbperatom[j]);
335     }
336   }
337 
338   Name_Angular[0][0] = "s          ";
339   Name_Angular[1][0] = "px         ";
340   Name_Angular[1][1] = "py         ";
341   Name_Angular[1][2] = "pz         ";
342   Name_Angular[2][0] = "d3z^2-r^2  ";
343   Name_Angular[2][1] = "dx^2-y^2   ";
344   Name_Angular[2][2] = "dxy        ";
345   Name_Angular[2][3] = "dxz        ";
346   Name_Angular[2][4] = "dyz        ";
347   Name_Angular[3][0] = "f5z^2-3r^2 ";
348   Name_Angular[3][1] = "f5xz^2-xr^2";
349   Name_Angular[3][2] = "f5yz^2-yr^2";
350   Name_Angular[3][3] = "fzx^2-zy^2 ";
351   Name_Angular[3][4] = "fxyz       ";
352   Name_Angular[3][5] = "fx^3-3*xy^2";
353   Name_Angular[3][6] = "f3yx^2-y^3 ";
354   Name_Angular[4][0] = "g1         ";
355   Name_Angular[4][1] = "g2         ";
356   Name_Angular[4][2] = "g3         ";
357   Name_Angular[4][3] = "g4         ";
358   Name_Angular[4][4] = "g5         ";
359   Name_Angular[4][5] = "g6         ";
360   Name_Angular[4][6] = "g7         ";
361   Name_Angular[4][7] = "g8         ";
362   Name_Angular[4][8] = "g9         ";
363 
364   Name_Multiple[0] = "0";
365   Name_Multiple[1] = "1";
366   Name_Multiple[2] = "2";
367   Name_Multiple[3] = "3";
368   Name_Multiple[4] = "4";
369   Name_Multiple[5] = "5";
370 
371   if (myid==Host_ID){
372     strcpy(file_EV,".EV");
373     fnjoint(filepath,filename,file_EV);
374     if ((fp_EV = fopen(file_EV,"a")) != NULL){
375       fprintf(fp_EV,"\n");
376       fprintf(fp_EV,"***********************************************************\n");
377       fprintf(fp_EV,"***********************************************************\n");
378       fprintf(fp_EV,"          Unfolding calculation for band structure         \n");
379       fprintf(fp_EV,"***********************************************************\n");
380       fprintf(fp_EV,"***********************************************************\n");
381       fprintf(fp_EV,"                                                                          \n");
382       fprintf(fp_EV," Origin of the Reference cell is set to (%f %f %f) (Bohr).\n\n",
383               unfold_origin[0],unfold_origin[1],unfold_origin[2]);
384       fprintf(fp_EV," Unfolded weights at specified k points are stored in System.Name.unfold_totup(dn).\n");
385       fprintf(fp_EV," Individual orbital weights are stored in System.Name.unfold_orbup(dn).\n");
386       fprintf(fp_EV," The format is: k_dis(Bohr^{-1})  energy(eV)  weight.\n\n");
387       fprintf(fp_EV," The sequence for the orbital weights in System.Name.unfold_orbup(dn) is given below.\n\n");
388 
389       i1 = 1;
390 
391       for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){
392 	wan1 = WhatSpecies[Gc_AN];
393 	for (l=0; l<=Supported_MaxL; l++){
394 	  for (mul=0; mul<Spe_Num_CBasis[wan1][l]; mul++){
395 	    for (m=0; m<(2*l+1); m++){
396 	      fprintf(fp_EV,"  %4d ",i1);
397 	      if (l==0 && mul==0 && m==0)
398 		fprintf(fp_EV,"%4d %3s %s %s",
399 			Gc_AN,SpeName[wan1],Name_Multiple[mul],Name_Angular[l][m]);
400 	      else
401 		fprintf(fp_EV,"         %s %s",
402 			Name_Multiple[mul],Name_Angular[l][m]);
403 	      fprintf(fp_EV,"\n");
404 	      i1++;
405 	    }
406 	  }
407 	}
408       }
409 
410       fprintf(fp_EV,"\n");
411       fprintf(fp_EV,"\n  The total number of calculated k points is %i.\n",totnkpts);
412       fprintf(fp_EV,"  The number of calculated k points on each path is \n");
413 
414       fprintf(fp_EV,"  For each path: (");
415       for (i=0; i<nkpoint; i++){
416         fprintf(fp_EV," %i",np[i]);
417       }
418       fprintf(fp_EV," )\n\n");
419 
420       fprintf(fp_EV,"                 ka         kb         kc\n");
421 
422       kloop = 0;
423       for (i=0; i<nkpoint; i++){
424 	for (j=0; j<np[i]; j++) {
425 
426           kloop++;
427 
428 	  if (np[i]==1) {
429 	    fprintf(fp_EV,"  %3d/%3d   %10.6f %10.6f %10.6f\n",kloop,totnkpts,kpoint[i][1],kpoint[i][2],kpoint[i][3]);
430 	  }
431           else {
432 	    fprintf(fp_EV,"  %3d/%3d   %10.6f %10.6f %10.6f\n",
433                     kloop,totnkpts,
434                     kpoint[i][1]+j*(kpoint[i+1][1]-kpoint[i][1])/np[i],
435 		    kpoint[i][2]+j*(kpoint[i+1][2]-kpoint[i][2])/np[i],
436 		    kpoint[i][3]+j*(kpoint[i+1][3]-kpoint[i][3])/np[i]);
437 	  }
438 	}
439       }
440 
441       fprintf(fp_EV,"\n");
442       fclose(fp_EV);
443 
444     }
445     else{
446       printf("Failure of saving the EV file.\n");
447       fclose(fp_EV);
448     }
449 
450     if (SpinP_switch==0) {
451 
452       strcpy(file_EV,".unfold_totup");
453       fnjoint(filepath,filename,file_EV);
454 
455       fp_EV = fopen(file_EV,"w");
456       if (fp_EV == NULL) {
457 	printf("Failure of saving the System.Name.unfold_totup file.\n");
458 	fclose(fp_EV);
459       }
460 
461       strcpy(file_EV,".unfold_orbup");
462       fnjoint(filepath,filename,file_EV);
463       fp_EV1 = fopen(file_EV,"w");
464 
465       if (fp_EV1 == NULL) {
466 	printf("Failure of saving the System.Name.unfold_orbup file.\n");
467 	fclose(fp_EV1);
468       }
469 
470     }
471     else if (SpinP_switch==1) {
472       strcpy(file_EV,".unfold_totup");
473       fnjoint(filepath,filename,file_EV);
474       fp_EV = fopen(file_EV,"w");
475       if (fp_EV == NULL) {
476 	printf("Failure of saving the System.Name.unfold_totup file.\n");
477 	fclose(fp_EV);
478       }
479       strcpy(file_EV,".unfold_orbup");
480       fnjoint(filepath,filename,file_EV);
481       fp_EV1 = fopen(file_EV,"w");
482       if (fp_EV1 == NULL) {
483 	printf("Failure of saving the System.Name.unfold_orbup file.\n");
484 	fclose(fp_EV1);
485       }
486       strcpy(file_EV,".unfold_totdn");
487       fnjoint(filepath,filename,file_EV);
488       fp_EV2 = fopen(file_EV,"w");
489       if (fp_EV2 == NULL) {
490 	printf("Failure of saving the System.Name.unfold_totdn file.\n");
491 	fclose(fp_EV2);
492       }
493       strcpy(file_EV,".unfold_orbdn");
494       fnjoint(filepath,filename,file_EV);
495       fp_EV3 = fopen(file_EV,"w");
496       if (fp_EV3 == NULL) {
497 	printf("Failure of saving the System.Name.unfold_orbdn file.\n");
498 	fclose(fp_EV3);
499       }
500     }
501   }
502 
503   int kloopi,kloopj;
504   double kpt0,kpt1,kpt2;
505 
506   /* for gnuplot example */
507 
508   if (myid==Host_ID){
509     strcpy(file_EV,".unfold_plotexample");
510     fnjoint(filepath,filename,file_EV);
511     fp_EV0 = fopen(file_EV,"w");
512     if (fp_EV0 == NULL) {
513       printf("Failure of saving the System.Name.unfold_plotexample file.\n");
514       fclose(fp_EV0);
515     }
516 
517     fprintf(fp_EV0,"set yrange [%f:%f]\n",unfold_lbound*eV2Hartree,unfold_ubound*eV2Hartree);
518     fprintf(fp_EV0,"set ylabel 'Energy (eV)'\n");
519     fprintf(fp_EV0,"set xtics(");
520 
521     pk1 =  coe*kpoint[0][1]*(fracabc[1][1]*fracabc[2][2]-fracabc[1][2]*fracabc[2][1])
522           -coe*kpoint[0][2]*(fracabc[0][1]*fracabc[2][2]-fracabc[2][1]*fracabc[0][2])
523           +coe*kpoint[0][3]*(fracabc[0][1]*fracabc[1][2]-fracabc[1][1]*fracabc[0][2]);
524     pk2 = -coe*kpoint[0][1]*(fracabc[1][0]*fracabc[2][2]-fracabc[2][0]*fracabc[1][2])
525           +coe*kpoint[0][2]*(fracabc[0][0]*fracabc[2][2]-fracabc[2][0]*fracabc[0][2])
526           -coe*kpoint[0][3]*(fracabc[0][0]*fracabc[1][2]-fracabc[1][0]*fracabc[0][2]);
527     pk3 =  coe*kpoint[0][1]*(fracabc[1][0]*fracabc[2][1]-fracabc[1][1]*fracabc[2][0])
528           -coe*kpoint[0][2]*(fracabc[0][0]*fracabc[2][1]-fracabc[2][0]*fracabc[0][1])
529           +coe*kpoint[0][3]*(fracabc[0][0]*fracabc[1][1]-fracabc[1][0]*fracabc[0][1]);
530 
531     kdis=0.;
532     for (kloopi=0; kloopi<nkpoint; kloopi++) {
533 
534       kpt0=kpoint[kloopi][1];
535       kpt1=kpoint[kloopi][2];
536       kpt2=kpoint[kloopi][3];
537 
538       k1 =  coe*kpt0*(fracabc[1][1]*fracabc[2][2]-fracabc[1][2]*fracabc[2][1])
539            -coe*kpt1*(fracabc[0][1]*fracabc[2][2]-fracabc[2][1]*fracabc[0][2])
540            +coe*kpt2*(fracabc[0][1]*fracabc[1][2]-fracabc[1][1]*fracabc[0][2]);
541       k2 = -coe*kpt0*(fracabc[1][0]*fracabc[2][2]-fracabc[2][0]*fracabc[1][2])
542            +coe*kpt1*(fracabc[0][0]*fracabc[2][2]-fracabc[2][0]*fracabc[0][2])
543            -coe*kpt2*(fracabc[0][0]*fracabc[1][2]-fracabc[1][0]*fracabc[0][2]);
544       k3 = coe*kpt0*(fracabc[1][0]*fracabc[2][1]-fracabc[1][1]*fracabc[2][0])
545           -coe*kpt1*(fracabc[0][0]*fracabc[2][1]-fracabc[2][0]*fracabc[0][1])
546           +coe*kpt2*(fracabc[0][0]*fracabc[1][1]-fracabc[1][0]*fracabc[0][1]);
547 
548       K[0]=k1*rtv[1][1]+k2*rtv[2][1]+k3*rtv[3][1];
549       K[1]=k1*rtv[1][2]+k2*rtv[2][2]+k3*rtv[3][2];
550       K[2]=k1*rtv[1][3]+k2*rtv[2][3]+k3*rtv[3][3];
551       K2[0]=pk1*rtv[1][1]+pk2*rtv[2][1]+pk3*rtv[3][1];
552       K2[1]=pk1*rtv[1][2]+pk2*rtv[2][2]+pk3*rtv[3][2];
553       K2[2]=pk1*rtv[1][3]+pk2*rtv[2][3]+pk3*rtv[3][3];
554       dis2pk=distwovec(K,K2);
555       kdis+=dis2pk;
556 
557       if (kloopi==nkpoint-1)
558         fprintf(fp_EV0,"'%s' %f)\n",unfold_kpoint_name[kloopi],kdis);
559       else
560         fprintf(fp_EV0,"'%s' %f,",unfold_kpoint_name[kloopi],kdis);
561 
562       pk1=k1;
563       pk2=k2;
564       pk3=k3;
565     }
566 
567     pk1=coe*kpoint[0][1]*(fracabc[1][1]*fracabc[2][2]-fracabc[1][2]*fracabc[2][1])
568       -coe*kpoint[0][2]*(fracabc[0][1]*fracabc[2][2]-fracabc[2][1]*fracabc[0][2])
569       +coe*kpoint[0][3]*(fracabc[0][1]*fracabc[1][2]-fracabc[1][1]*fracabc[0][2]);
570     pk2=-coe*kpoint[0][1]*(fracabc[1][0]*fracabc[2][2]-fracabc[2][0]*fracabc[1][2])
571       +coe*kpoint[0][2]*(fracabc[0][0]*fracabc[2][2]-fracabc[2][0]*fracabc[0][2])
572       -coe*kpoint[0][3]*(fracabc[0][0]*fracabc[1][2]-fracabc[1][0]*fracabc[0][2]);
573     pk3=coe*kpoint[0][1]*(fracabc[1][0]*fracabc[2][1]-fracabc[1][1]*fracabc[2][0])
574       -coe*kpoint[0][2]*(fracabc[0][0]*fracabc[2][1]-fracabc[2][0]*fracabc[0][1])
575       +coe*kpoint[0][3]*(fracabc[0][0]*fracabc[1][1]-fracabc[1][0]*fracabc[0][1]);
576     fprintf(fp_EV0,"set xrange [0:%f]\n",kdis);
577     fprintf(fp_EV0,"set arrow nohead from 0,0 to %f,0\n",kdis);
578     kdis=0.;
579     for (kloopi=1; kloopi<nkpoint-1; kloopi++) {
580       fprintf(fp_EV0,"set arrow nohead from ");
581       kpt0=kpoint[kloopi][1];
582       kpt1=kpoint[kloopi][2];
583       kpt2=kpoint[kloopi][3];
584       k1= coe*kpt0*(fracabc[1][1]*fracabc[2][2]-fracabc[1][2]*fracabc[2][1])
585         -coe*kpt1*(fracabc[0][1]*fracabc[2][2]-fracabc[2][1]*fracabc[0][2])
586         +coe*kpt2*(fracabc[0][1]*fracabc[1][2]-fracabc[1][1]*fracabc[0][2]);
587       k2=-coe*kpt0*(fracabc[1][0]*fracabc[2][2]-fracabc[2][0]*fracabc[1][2])
588         +coe*kpt1*(fracabc[0][0]*fracabc[2][2]-fracabc[2][0]*fracabc[0][2])
589         -coe*kpt2*(fracabc[0][0]*fracabc[1][2]-fracabc[1][0]*fracabc[0][2]);
590       k3= coe*kpt0*(fracabc[1][0]*fracabc[2][1]-fracabc[1][1]*fracabc[2][0])
591         -coe*kpt1*(fracabc[0][0]*fracabc[2][1]-fracabc[2][0]*fracabc[0][1])
592         +coe*kpt2*(fracabc[0][0]*fracabc[1][1]-fracabc[1][0]*fracabc[0][1]);
593       K[0]=k1*rtv[1][1]+k2*rtv[2][1]+k3*rtv[3][1];
594       K[1]=k1*rtv[1][2]+k2*rtv[2][2]+k3*rtv[3][2];
595       K[2]=k1*rtv[1][3]+k2*rtv[2][3]+k3*rtv[3][3];
596       K2[0]=pk1*rtv[1][1]+pk2*rtv[2][1]+pk3*rtv[3][1];
597       K2[1]=pk1*rtv[1][2]+pk2*rtv[2][2]+pk3*rtv[3][2];
598       K2[2]=pk1*rtv[1][3]+pk2*rtv[2][3]+pk3*rtv[3][3];
599       dis2pk=distwovec(K,K2);
600       kdis+=dis2pk;
601       fprintf(fp_EV0,"%f,%f to %f,%f\n",kdis,unfold_lbound*eV2Hartree,kdis,unfold_ubound*eV2Hartree);
602       pk1=k1;
603       pk2=k2;
604       pk3=k3;
605     }
606     fprintf(fp_EV0,"set style circle radius 0\n");
607     fprintf(fp_EV0,"plot '%s.unfold_totup' using 1:2:($3)*0.05 notitle with circles lc rgb 'red'\n",filename);
608   }
609 
610   /* end gnuplot example */
611 
612   pk1=coe*kpoint[0][1]*(fracabc[1][1]*fracabc[2][2]-fracabc[1][2]*fracabc[2][1])
613     -coe*kpoint[0][2]*(fracabc[0][1]*fracabc[2][2]-fracabc[2][1]*fracabc[0][2])
614     +coe*kpoint[0][3]*(fracabc[0][1]*fracabc[1][2]-fracabc[1][1]*fracabc[0][2]);
615   pk2=-coe*kpoint[0][1]*(fracabc[1][0]*fracabc[2][2]-fracabc[2][0]*fracabc[1][2])
616     +coe*kpoint[0][2]*(fracabc[0][0]*fracabc[2][2]-fracabc[2][0]*fracabc[0][2])
617     -coe*kpoint[0][3]*(fracabc[0][0]*fracabc[1][2]-fracabc[1][0]*fracabc[0][2]);
618   pk3=coe*kpoint[0][1]*(fracabc[1][0]*fracabc[2][1]-fracabc[1][1]*fracabc[2][0])
619     -coe*kpoint[0][2]*(fracabc[0][0]*fracabc[2][1]-fracabc[2][0]*fracabc[0][1])
620     +coe*kpoint[0][3]*(fracabc[0][0]*fracabc[1][1]-fracabc[1][0]*fracabc[0][1]);
621 
622   /* for standard output */
623 
624   if (myid==Host_ID && 0<level_stdout) {
625     printf(" The number of selected k points is %i.\n",totnkpts);
626 
627     printf(" For each path: (");
628     for (i=0; i<nkpoint; i++){
629       printf(" %i",np[i]);
630     }
631     printf(" )\n\n");
632     printf("                 ka         kb         kc\n");
633   }
634 
635   /*********************************************
636                       kloopi
637   *********************************************/
638 
639   kdis=0.;
640   kloop = 0;
641   for (kloopi=0; kloopi<nkpoint; kloopi++)
642     for (kloopj=0; kloopj<np[kloopi]; kloopj++) {
643 
644       kloop++;
645 
646       if (np[kloopi]==1) {
647 	kpt0=kpoint[kloopi][1];
648 	kpt1=kpoint[kloopi][2];
649 	kpt2=kpoint[kloopi][3];
650       } else {
651 	kpt0=kpoint[kloopi][1]+kloopj*(kpoint[kloopi+1][1]-kpoint[kloopi][1])/np[kloopi];
652 	kpt1=kpoint[kloopi][2]+kloopj*(kpoint[kloopi+1][2]-kpoint[kloopi][2])/np[kloopi];
653 	kpt2=kpoint[kloopi][3]+kloopj*(kpoint[kloopi+1][3]-kpoint[kloopi][3])/np[kloopi];
654       }
655 
656       /* for standard output */
657 
658       if (myid==Host_ID && 0<level_stdout) {
659 
660         if (kloop==totnkpts)
661           printf("  %3d/%3d   %10.6f %10.6f %10.6f\n\n",kloop,totnkpts,kpt0,kpt1,kpt2);
662         else
663           printf("  %3d/%3d   %10.6f %10.6f %10.6f\n",kloop,totnkpts,kpt0,kpt1,kpt2);
664       }
665 
666       k1= coe*kpt0*(fracabc[1][1]*fracabc[2][2]-fracabc[1][2]*fracabc[2][1])
667         -coe*kpt1*(fracabc[0][1]*fracabc[2][2]-fracabc[2][1]*fracabc[0][2])
668         +coe*kpt2*(fracabc[0][1]*fracabc[1][2]-fracabc[1][1]*fracabc[0][2]);
669       k2=-coe*kpt0*(fracabc[1][0]*fracabc[2][2]-fracabc[2][0]*fracabc[1][2])
670         +coe*kpt1*(fracabc[0][0]*fracabc[2][2]-fracabc[2][0]*fracabc[0][2])
671         -coe*kpt2*(fracabc[0][0]*fracabc[1][2]-fracabc[1][0]*fracabc[0][2]);
672       k3= coe*kpt0*(fracabc[1][0]*fracabc[2][1]-fracabc[1][1]*fracabc[2][0])
673         -coe*kpt1*(fracabc[0][0]*fracabc[2][1]-fracabc[2][0]*fracabc[0][1])
674         +coe*kpt2*(fracabc[0][0]*fracabc[1][1]-fracabc[1][0]*fracabc[0][1]);
675 
676       /* make S */
677 
678       for (i1=1; i1<=n; i1++){
679 	for (j1=1; j1<=n; j1++){
680 	  S[i1][j1] = Complex(0.0,0.0);
681 	}
682       }
683 
684       k = 0;
685       for (AN=1; AN<=atomnum; AN++){
686 	GA_AN = order_GA[AN];
687 	wanA = WhatSpecies[GA_AN];
688 	tnoA = Spe_Total_CNO[wanA];
689 	Anum = MP[GA_AN];
690 
691 	for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
692 	  GB_AN = natn[GA_AN][LB_AN];
693 	  Rn = ncn[GA_AN][LB_AN];
694 	  wanB = WhatSpecies[GB_AN];
695 	  tnoB = Spe_Total_CNO[wanB];
696 	  Bnum = MP[GB_AN];
697 
698 	  l1 = atv_ijk[Rn][1];
699 	  l2 = atv_ijk[Rn][2];
700 	  l3 = atv_ijk[Rn][3];
701 	  kRn = k1*(double)l1 + k2*(double)l2 + k3*(double)l3;
702 
703 	  si = sin(2.0*PI*kRn);
704 	  co = cos(2.0*PI*kRn);
705 
706 	  for (i=0; i<tnoA; i++){
707 	    for (j=0; j<tnoB; j++){
708 
709 	      S[Anum+i][Bnum+j].r += S1[k]*co;
710 	      S[Anum+i][Bnum+j].i += S1[k]*si;
711 
712 	      k++;
713 	    }
714 	  }
715 	}
716       }
717 
718       /* diagonalization of S */
719       Eigen_PHH(mpi_comm_level1,S,koS,n,n,1);
720 
721       if (3<=level_stdout){
722 	printf("  kloop %i, k1 k2 k3 %10.6f %10.6f %10.6f\n",kloop,k1,k2,k3);
723 	for (i1=1; i1<=n; i1++){
724 	  printf("  Eigenvalues of OLP  %2d  %15.12f\n",i1,koS[i1]);
725 	}
726       }
727 
728       /* minus eigenvalues to 1.0e-14 */
729 
730       for (l=1; l<=n; l++){
731 	if (koS[l]<0.0) koS[l] = 1.0e-14;
732       }
733 
734       /* calculate S*1/sqrt(koS) */
735 
736       for (l=1; l<=n; l++) M1[l] = 1.0/sqrt(koS[l]);
737 
738       /* S * M1  */
739 
740       for (i1=1; i1<=n; i1++){
741 	for (j1=1; j1<=n; j1++){
742 	  S[i1][j1].r = S[i1][j1].r*M1[j1];
743 	  S[i1][j1].i = S[i1][j1].i*M1[j1];
744 	}
745       }
746 
747       /* loop for spin */
748 
749       for (spin=0; spin<=SpinP_switch; spin++){
750 
751 	/* make H */
752 
753 	for (i1=1; i1<=n; i1++){
754 	  for (j1=1; j1<=n; j1++){
755 	    H[spin][i1][j1] = Complex(0.0,0.0);
756 	  }
757 	}
758 
759 	k = 0;
760 	for (AN=1; AN<=atomnum; AN++){
761 	  GA_AN = order_GA[AN];
762 	  wanA = WhatSpecies[GA_AN];
763 	  tnoA = Spe_Total_CNO[wanA];
764 	  Anum = MP[GA_AN];
765 
766 	  for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
767 	    GB_AN = natn[GA_AN][LB_AN];
768 	    Rn = ncn[GA_AN][LB_AN];
769 	    wanB = WhatSpecies[GB_AN];
770 	    tnoB = Spe_Total_CNO[wanB];
771 	    Bnum = MP[GB_AN];
772 
773 	    l1 = atv_ijk[Rn][1];
774 	    l2 = atv_ijk[Rn][2];
775 	    l3 = atv_ijk[Rn][3];
776 	    kRn = k1*(double)l1 + k2*(double)l2 + k3*(double)l3;
777 
778 	    si = sin(2.0*PI*kRn);
779 	    co = cos(2.0*PI*kRn);
780 
781 	    for (i=0; i<tnoA; i++){
782 
783 	      for (j=0; j<tnoB; j++){
784 
785 		H[spin][Anum+i][Bnum+j].r += H1[spin][k]*co;
786 		H[spin][Anum+i][Bnum+j].i += H1[spin][k]*si;
787 
788 		k++;
789 
790 	      }
791 	    }
792 	  }
793 	}
794 
795 	/* first transpose of S */
796 
797 	for (i1=1; i1<=n; i1++){
798 	  for (j1=i1+1; j1<=n; j1++){
799 	    Ctmp1 = S[i1][j1];
800 	    Ctmp2 = S[j1][i1];
801 	    S[i1][j1] = Ctmp2;
802 	    S[j1][i1] = Ctmp1;
803 	  }
804 	}
805 
806 	/****************************************************
807                       M1 * U^t * H * U * M1
808 	****************************************************/
809 
810 	/* H * U * M1 */
811 
812 #pragma omp parallel for shared(spin,n,myid,is1,ie1,S,H,C) private(i1,j1,l)
813 
814 	for (j1=is1[myid]; j1<=ie1[myid]; j1++){
815 
816 	  for (i1=1; i1<=(n-1); i1+=2){
817 
818 	    double sum0  = 0.0, sum1  = 0.0;
819 	    double sumi0 = 0.0, sumi1 = 0.0;
820 
821 	    for (l=1; l<=n; l++){
822 	      sum0  += H[spin][i1+0][l].r*S[j1][l].r - H[spin][i1+0][l].i*S[j1][l].i;
823 	      sum1  += H[spin][i1+1][l].r*S[j1][l].r - H[spin][i1+1][l].i*S[j1][l].i;
824 
825 	      sumi0 += H[spin][i1+0][l].r*S[j1][l].i + H[spin][i1+0][l].i*S[j1][l].r;
826 	      sumi1 += H[spin][i1+1][l].r*S[j1][l].i + H[spin][i1+1][l].i*S[j1][l].r;
827 	    }
828 
829 	    C[spin][j1][i1+0].r = sum0;
830 	    C[spin][j1][i1+1].r = sum1;
831 
832 	    C[spin][j1][i1+0].i = sumi0;
833 	    C[spin][j1][i1+1].i = sumi1;
834 	  }
835 
836 	  for (; i1<=n; i1++){
837 
838 	    double sum  = 0.0;
839 	    double sumi = 0.0;
840 
841 	    for (l=1; l<=n; l++){
842 	      sum  += H[spin][i1][l].r*S[j1][l].r - H[spin][i1][l].i*S[j1][l].i;
843 	      sumi += H[spin][i1][l].r*S[j1][l].i + H[spin][i1][l].i*S[j1][l].r;
844 	    }
845 
846 	    C[spin][j1][i1].r = sum;
847 	    C[spin][j1][i1].i = sumi;
848 	  }
849 
850 	} /* i1 */
851 
852 	/* M1 * U^+ H * U * M1 */
853 
854 #pragma omp parallel for shared(spin,n,is1,ie1,myid,S,H,C) private(i1,j1,l)
855 
856 	for (i1=1; i1<=n; i1++){
857 	  for (j1=is1[myid]; j1<=ie1[myid]; j1++){
858 
859 	    double sum  = 0.0;
860 	    double sumi = 0.0;
861 
862 	    for (l=1; l<=n; l++){
863 	      sum  +=  S[i1][l].r*C[spin][j1][l].r + S[i1][l].i*C[spin][j1][l].i;
864 	      sumi +=  S[i1][l].r*C[spin][j1][l].i - S[i1][l].i*C[spin][j1][l].r;
865 	    }
866 
867 	    H[spin][j1][i1].r = sum;
868 	    H[spin][j1][i1].i = sumi;
869 
870 	  }
871 	}
872 
873 	/* broadcast H */
874 
875 	BroadCast_ComplexMatrix(mpi_comm_level1,H[spin],n,is1,ie1,myid,numprocs,
876 				stat_send,request_send,request_recv);
877 
878 	/* H to C */
879 
880 	for (i1=1; i1<=n; i1++){
881 	  for (j1=1; j1<=n; j1++){
882 	    C[spin][j1][i1] = H[spin][i1][j1];
883 	  }
884 	}
885 
886 	/* penalty for ill-conditioning states */
887 
888 	EV_cut0 = 1.0e-9;
889 
890 	for (i1=1; i1<=n; i1++){
891 
892 	  if (koS[i1]<EV_cut0){
893 	    C[spin][i1][i1].r += pow((koS[i1]/EV_cut0),-2.0) - 1.0;
894 	  }
895 
896 	  /* cutoff the interaction between the ill-conditioned state */
897 
898 	  if (1.0e+3<C[spin][i1][i1].r){
899 	    for (j1=1; j1<=n; j1++){
900 	      C[spin][i1][j1] = Complex(0.0,0.0);
901 	      C[spin][j1][i1] = Complex(0.0,0.0);
902 	    }
903 	    C[spin][i1][i1].r = 1.0e+4;
904 	  }
905 	}
906 
907 	/* diagonalization of C */
908 	Eigen_PHH(mpi_comm_level1,C[spin],ko[spin],n,n,0);
909 
910 	for (i1=1; i1<=n; i1++){
911 	  EIGEN[spin][i1] = ko[spin][i1];
912 	}
913 
914 	/****************************************************
915           transformation to the original eigenvectors.
916                  NOTE JRCAT-244p and JAIST-2122p
917 	****************************************************/
918 
919 	/*  The H matrix is distributed by row */
920 
921 	for (i1=1; i1<=n; i1++){
922 	  for (j1=is1[myid]; j1<=ie1[myid]; j1++){
923 	    H[spin][j1][i1] = C[spin][i1][j1];
924 	  }
925 	}
926 
927 	/* second transpose of S */
928 
929 	for (i1=1; i1<=n; i1++){
930 	  for (j1=i1+1; j1<=n; j1++){
931 	    Ctmp1 = S[i1][j1];
932 	    Ctmp2 = S[j1][i1];
933 	    S[i1][j1] = Ctmp2;
934 	    S[j1][i1] = Ctmp1;
935 	  }
936 	}
937 
938 	/* C is distributed by row in each processor */
939 
940 #pragma omp parallel for shared(spin,n,is1,ie1,myid,S,H,C) private(i1,j1,l,sum,sumi)
941 
942 	for (j1=is1[myid]; j1<=ie1[myid]; j1++){
943 	  for (i1=1; i1<=n; i1++){
944 
945 	    sum  = 0.0;
946 	    sumi = 0.0;
947 	    for (l=1; l<=n; l++){
948 	      sum  += S[i1][l].r*H[spin][j1][l].r - S[i1][l].i*H[spin][j1][l].i;
949 	      sumi += S[i1][l].r*H[spin][j1][l].i + S[i1][l].i*H[spin][j1][l].r;
950 	    }
951 
952 	    C[spin][j1][i1].r = sum;
953 	    C[spin][j1][i1].i = sumi;
954 	  }
955 	}
956 
957 	/* broadcast C:
958 	   C is distributed by row in each processor
959 	*/
960 
961 	BroadCast_ComplexMatrix(mpi_comm_level1,C[spin],n,is1,ie1,myid,numprocs,
962 				stat_send,request_send,request_recv);
963 
964       } /* spin */
965 
966       /****************************************************
967                           Output
968       ****************************************************/
969 
970       if (myid==Host_ID){
971 
972 #ifdef xt3
973         setvbuf(fp_EV,buf,_IOFBF,fp_bsize);  /* setvbuf */
974 #endif
975 
976 	K[0]=k1*rtv[1][1]+k2*rtv[2][1]+k3*rtv[3][1];
977 	K[1]=k1*rtv[1][2]+k2*rtv[2][2]+k3*rtv[3][2];
978 	K[2]=k1*rtv[1][3]+k2*rtv[2][3]+k3*rtv[3][3];
979 	K2[0]=pk1*rtv[1][1]+pk2*rtv[2][1]+pk3*rtv[3][1];
980 	K2[1]=pk1*rtv[1][2]+pk2*rtv[2][2]+pk3*rtv[3][2];
981 	K2[2]=pk1*rtv[1][3]+pk2*rtv[2][3]+pk3*rtv[3][3];
982 	dis2pk=distwovec(K,K2);
983 	kdis+=dis2pk;
984 
985 	num0 = 4;
986 	num1 = n/num0 + 1*(n%num0!=0);
987 
988 	for (spin=0; spin<=SpinP_switch; spin++){
989 	  for (i=1; i<=num1; i++){
990 
991             countkj_e=0;
992             for (j=0; j<4; j++) iskj_e[j]=0;
993 
994 	    for (i1=-2; i1<=0; i1++){
995 
996 	      for (j=1; j<=num0; j++){
997 
998 		j1 = num0*(i-1) + j;
999 
1000 		if (j1<=n){
1001 
1002 		  if (i1==-1){
1003 
1004                     if (((EIGEN[spin][j1]-ChemP)<=unfold_ubound)&&((EIGEN[spin][j1]-ChemP)>=unfold_lbound)) {
1005                       kj_e[countkj_e]=(EIGEN[spin][j1]-ChemP);
1006                       iskj_e[countkj_e]=1; }
1007                     countkj_e++;
1008 
1009 		  }
1010 
1011 		}
1012 	      }
1013 	    }
1014 
1015 	    i1 = 1;
1016             int iorb;
1017 
1018 	    for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){
1019               iorb=0;
1020 
1021 	      wan1 = WhatSpecies[Gc_AN];
1022 
1023 	      for (l=0; l<=Supported_MaxL; l++){
1024 		for (mul=0; mul<Spe_Num_CBasis[wan1][l]; mul++){
1025 		  for (m=0; m<(2*l+1); m++){
1026 
1027                     countkj_e = 0;
1028 
1029 		    for (j=1; j<=num0; j++){
1030 
1031 		      j1 = num0*(i-1) + j;
1032 
1033 		      if (0<i1 && j1<=n){
1034 			kj_v[countkj_e++][Gc_AN-1][iorb]=Complex(C[spin][j1][i1].r,C[spin][j1][i1].i);
1035 		      }
1036 		    }
1037 
1038 		    i1++;
1039                     iorb++;
1040 		  }
1041 		}
1042 	      }
1043 	    }
1044 
1045             for (countkj_e=0; countkj_e<4; countkj_e++) if (iskj_e[countkj_e]==1) {
1046 
1047 	      for (k=0; k<atomnum; k++) for (j=0; j<Norbperatom[k]; j++) weight[k][j]=Complex(0.,0.);
1048 
1049 	      for (j=0; j<atomnum; j++) for (k=0; k<atomnum; k++) for (l=0; l<Norbperatom[j]; l++)
1050 		for (m=0; m<Norbperatom[k]; m++) tmpelem[j][k][l][m]=Cmul(Conjg(kj_v[countkj_e][j][l]),kj_v[countkj_e][k][m]);
1051 
1052 	      int NA,ir,MA,MO,NO;
1053 	      dcomplex dtmp;
1054 	      for (NA=0; NA<atomnum; NA++) {
1055 		int n=unfold_mapN2n[NA]; int r0x=tabr4RN[0][NA][0]; int r0y=tabr4RN[0][NA][1]; int r0z=tabr4RN[0][NA][2];
1056 		r0[0]=r0x*a[0]+r0y*b[0]+r0z*c[0];
1057 		r0[1]=r0x*a[1]+r0y*b[1]+r0z*c[1];
1058 		r0[2]=r0x*a[2]+r0y*b[2]+r0z*c[2];
1059 		dcomplex phase1=Cexp(Complex(0.,-dot(K,r0)));
1060 		for (ir=0; ir<nr; ir++) {
1061 		  if (rnmap[ir][n][1]==-1) continue;
1062 		  r[0]=rlist[ir][0]*a[0]+rlist[ir][1]*b[0]+rlist[ir][2]*c[0];
1063 		  r[1]=rlist[ir][0]*a[1]+rlist[ir][1]*b[1]+rlist[ir][2]*c[1];
1064 		  r[2]=rlist[ir][0]*a[2]+rlist[ir][1]*b[2]+rlist[ir][2]*c[2];
1065 		  dcomplex phase2=Cmul(phase1,Cexp(Complex(0.,dot(K,r))));
1066 
1067 		  for (MA=0; MA<atomnum; MA++) {
1068 		    if (Elem[rnmap[ir][n][0]][MA][rnmap[ir][n][1]][0][0]<-99999.) continue;
1069 		    for (MO=0; MO<Norbperatom[MA]; MO++) for (NO=0; NO<Norbperatom[NA]; NO++) {
1070 		      dtmp=RCmul(Elem[rnmap[ir][n][0]][MA][rnmap[ir][n][1]][MO][NO],tmpelem[MA][NA][MO][NO]);
1071 		      weight[NA][NO]=Cadd(weight[NA][NO],Cmul(phase2,dtmp));
1072 		    }}}}
1073 
1074 	      double sumallorb=0.;
1075 
1076 	      for (j=0; j<atomnum; j++){
1077                 for (k=0; k<Norbperatom[j]; k++){
1078                    sumallorb += weight[j][k].r;
1079 		}
1080 	      }
1081 
1082 	      if (spin==0)
1083                 fprintf(fp_EV,"%f %f %10.7f\n",kdis,kj_e[countkj_e]*eV2Hartree,fabs(sumallorb)/coe);
1084 	      else
1085                 fprintf(fp_EV2,"%f %f %10.7f\n",kdis,kj_e[countkj_e]*eV2Hartree,fabs(sumallorb)/coe);
1086 
1087 	      /* set negative weight to zero for plotting purpose */
1088 	      for (j=0; j<atomnum; j++){
1089                 for (k=0; k<Norbperatom[j]; k++){
1090                    if (weight[j][k].r<0.0) weight[j][k].r=0.0;
1091 		}
1092 	      }
1093 
1094 	      if (spin==0) fprintf(fp_EV1,"%f %f ",kdis,kj_e[countkj_e]*eV2Hartree);
1095 	      else fprintf(fp_EV3,"%f %f ",kdis,kj_e[countkj_e]*eV2Hartree);
1096 	      for (j=0; j<atomnum; j++) {
1097 		if (spin==0) for (k=0; k<Norbperatom[j]; k++) fprintf(fp_EV1,"%e ",weight[j][k].r/coe);
1098 		else for (k=0; k<Norbperatom[j]; k++) fprintf(fp_EV3,"%e ",weight[j][k].r/coe);
1099 	      }
1100 	      if (spin==0) fprintf(fp_EV1,"\n");
1101 	      else fprintf(fp_EV3,"\n");
1102 	    }
1103 	  }
1104 	}
1105 
1106       } /* if (myid==Host_ID) */
1107 
1108       pk1=k1;
1109       pk2=k2;
1110       pk3=k3;
1111     }  /* kloop */
1112 
1113   if (myid==Host_ID){
1114     if (fp_EV0 != NULL) fclose(fp_EV0);
1115     if (fp_EV != NULL) fclose(fp_EV);
1116     if (fp_EV1 != NULL) fclose(fp_EV1);
1117     if ((SpinP_switch==1)&&(fp_EV2 != NULL)) fclose(fp_EV2);
1118     if ((SpinP_switch==1)&&(fp_EV3 != NULL)) fclose(fp_EV3);
1119   }
1120 
1121   /****************************************************
1122                        free arrays
1123   ****************************************************/
1124 
1125   free(K);
1126   free(K2);
1127   free(r);
1128   free(r0);
1129   free(kj_e);
1130   free(iskj_e);
1131   free(unfold_mapN2n);
1132   free(a);
1133   free(b);
1134   free(c);
1135   free(unfold_origin);
1136   free(np);
1137   for (i=0; i<3; i++) free(unfold_abc[i]); free(unfold_abc);
1138   for (i=0; i<4; i++) for (j=0; j<atomnum; j++) free(kj_v[i][j]);
1139   for (i=0; i<4; i++) free(kj_v[i]); free(kj_v);
1140   for (i=0; i<atomnum; i++) free(weight[i]); free(weight);
1141   for (i=0; i<NR; i++) free(Rlist[i]); free(Rlist);
1142   for (i=0; i<nr; i++) free(rlist[i]); free(rlist);
1143   for (i=0; i<21; i++) for (j=0; j<21; j++) free(_MapR[i][j]);
1144   for (i=0; i<21; i++) free(_MapR[i]); free(_MapR);
1145   for (i=0; i<NR; i++) for (j=0; j<atomnum; j++) free(tabr4RN[i][j]);
1146   for (i=0; i<NR; i++) free(tabr4RN[i]); free(tabr4RN);
1147   for (i=0; i<nr; i++) for (j=0; j<natom; j++) free(rnmap[i][j]);
1148   for (i=0; i<nr; i++) free(rnmap[i]); free(rnmap);
1149   for (i=0; i<NR; i++) for (j=0; j<atomnum; j++) for (k=0; k<atomnum; k++) for (l=0; l<Norbperatom[j]; l++) free(Elem[i][j][k][l]);
1150   for (i=0; i<NR; i++) for (j=0; j<atomnum; j++) for (k=0; k<atomnum; k++) free(Elem[i][j][k]);
1151   for (i=0; i<NR; i++) for (j=0; j<atomnum; j++) free(Elem[i][j]);
1152   for (i=0; i<NR; i++) free(Elem[i]); free(Elem);
1153   for (i=0; i<atomnum; i++) for (j=0; j<atomnum; j++) for (k=0; k<Norbperatom[i]; k++)
1154     free(tmpelem[i][j][k]);
1155   for (i=0; i<atomnum; i++) for (j=0; j<atomnum; j++) free(tmpelem[i][j]);
1156   for (i=0; i<atomnum; i++) free(tmpelem[i]); free(tmpelem);
1157   for (i=0; i<3; i++) free(fracabc[i]); free(fracabc);
1158   free(Norbperatom);
1159   for (i=0; i<unfold_Nkpoint+1; i++) free(unfold_kpoint[i]); free(unfold_kpoint);
1160   for (i=0; i<(unfold_Nkpoint+1); i++){
1161     free(unfold_kpoint_name[i]);
1162   }
1163   free(unfold_kpoint_name);
1164 
1165   free(stat_send);
1166   free(request_send);
1167   free(request_recv);
1168 
1169   free(is1);
1170   free(ie1);
1171 
1172   free(MP);
1173   free(order_GA);
1174   free(My_NZeros);
1175   free(SP_NZeros);
1176   free(SP_Atoms);
1177 
1178   for (i=0; i<List_YOUSO[23]; i++){
1179     free(ko[i]);
1180   }
1181   free(ko);
1182 
1183   free(koS);
1184 
1185   for (j=0; j<List_YOUSO[23]; j++){
1186     free(EIGEN[j]);
1187   }
1188   free(EIGEN);
1189 
1190   for (i=0; i<List_YOUSO[23]; i++){
1191     for (j=0; j<n+1; j++){
1192       free(H[i][j]);
1193     }
1194     free(H[i]);
1195   }
1196   free(H);
1197 
1198   for (i=0; i<n+1; i++){
1199     free(S[i]);
1200   }
1201   free(S);
1202 
1203   free(M1);
1204 
1205   for (i=0; i<List_YOUSO[23]; i++){
1206     for (j=0; j<n+1; j++){
1207       free(C[i][j]);
1208     }
1209     free(C[i]);
1210   }
1211   free(C);
1212 
1213   free(S1);
1214 
1215   for (spin=0; spin<(SpinP_switch+1); spin++){
1216     free(H1[spin]);
1217   }
1218   free(H1);
1219 
1220   dtime(&TEtime);
1221 }
1222 
1223 
1224 
1225 
Unfolding_Bands_NonCol(int nkpoint,double ** kpoint,int SpinP_switch,double ***** nh,double ***** ImNL,double **** CntOLP)1226 static void Unfolding_Bands_NonCol(
1227 				   int nkpoint, double **kpoint,
1228 				   int SpinP_switch,
1229 				   double *****nh,
1230 				   double *****ImNL,
1231 				   double ****CntOLP)
1232 {
1233   double coe;
1234   double* a;
1235   double* b;
1236   double* c;
1237   double* K;
1238   double* K2;
1239   double* r;
1240   double* r0;
1241   double* kj_e;
1242   int* iskj_e;
1243   int countkj_e;
1244   double pk1,pk2,pk3;
1245   double dis2pk;
1246   double kdis;
1247   dcomplex** weight;
1248   dcomplex** weight1;
1249   dcomplex*** kj_v;
1250   dcomplex*** kj_v1;
1251   dcomplex**** tmpelem;
1252   dcomplex**** tmpelem1;
1253   double **fracabc;
1254 
1255   int i,j,k,l,n,wan,m,ii1,jj1,jj2,n2;
1256   int *MP;
1257   int *order_GA;
1258   int *My_NZeros;
1259   int *SP_NZeros;
1260   int *SP_Atoms;
1261   int i1,j1,po,spin,n1,size_H1;
1262   int num2,RnB,l1,l2,l3,kloop,AN,Rn;
1263   int ct_AN,h_AN,wanA,tnoA,wanB,tnoB;
1264   int GA_AN,Anum;
1265   int ii,ij,ik,MaxN;
1266   int wan1,mul,Gc_AN,num0,num1;
1267   int LB_AN,GB_AN,Bnum;
1268 
1269   double time0,tmp,av_num;
1270   double snum_i,snum_j,snum_k,k1,k2,k3,sum,sumi,Num_State,FermiF;
1271   double x,Dnum,Dnum2,AcP,ChemP_MAX,ChemP_MIN;
1272   double *S1;
1273   double *RH0;
1274   double *RH1;
1275   double *RH2;
1276   double *RH3;
1277   double *IH0;
1278   double *IH1;
1279   double *IH2;
1280   double *ko,*M1,*EIGEN;
1281   double *koS;
1282   double EV_cut0;
1283   dcomplex **H,**S,**C;
1284   dcomplex Ctmp1,Ctmp2;
1285   double **Ctmp;
1286   double u2,v2,uv,vu;
1287   double dum,sumE,kRn,si,co;
1288   double Resum,ResumE,Redum,Redum2,Imdum;
1289   double TStime,TEtime,SiloopTime,EiloopTime;
1290   double FermiEps = 1.0e-14;
1291   double x_cut = 30.0;
1292   double OLP_eigen_cut=Threshold_OLP_Eigen;
1293 
1294   char *Name_Angular[Supported_MaxL+1][2*(Supported_MaxL+1)+1];
1295   char *Name_Multiple[20];
1296   char file_EV[YOUSO10];
1297   FILE *fp_EV0;
1298   FILE *fp_EV;
1299   FILE *fp_EV1;
1300   char buf[fp_bsize];          /* setvbuf */
1301   int numprocs,myid,ID;
1302   int OMPID,Nthrds,Nprocs;
1303   int *is1,*ie1;
1304   int *is2,*ie2;
1305   int *is12,*ie12;
1306   MPI_Status *stat_send;
1307   MPI_Request *request_send;
1308   MPI_Request *request_recv;
1309 
1310   /* MPI */
1311   MPI_Comm_size(mpi_comm_level1,&numprocs);
1312   MPI_Comm_rank(mpi_comm_level1,&myid);
1313   MPI_Barrier(mpi_comm_level1);
1314 
1315   if (myid==Host_ID && 0<level_stdout) {
1316     printf("\n*******************************************************\n");
1317     printf("                 Unfolding of Bands \n");
1318     printf("*******************************************************\n\n");fflush(stdout);
1319   }
1320 
1321   dtime(&TStime);
1322 
1323   /****************************************************
1324              calculation of the array size
1325   ****************************************************/
1326 
1327   n = 0;
1328   for (i=1; i<=atomnum; i++){
1329     wanA  = WhatSpecies[i];
1330     n  = n + Spe_Total_CNO[wanA];
1331   }
1332   n2 = 2*n + 2;
1333 
1334   /****************************************************
1335    Allocation
1336   ****************************************************/
1337 
1338   getnorbperatom();
1339   exitcode=0;
1340   buildMapRlist();
1341   if (exitcode==1) {
1342     for (i=0; i<3; i++) free(unfold_abc[i]); free(unfold_abc);
1343     free(unfold_origin);
1344     free(unfold_mapN2n);
1345     for (i=0; i<unfold_Nkpoint+1; i++) free(unfold_kpoint[i]); free(unfold_kpoint);
1346     return;
1347   }
1348 
1349   a = (double*)malloc(sizeof(double)*3);
1350   b = (double*)malloc(sizeof(double)*3);
1351   c = (double*)malloc(sizeof(double)*3);
1352   a[0]=unfold_abc[0][0];
1353   a[1]=unfold_abc[0][1];
1354   a[2]=unfold_abc[0][2];
1355   b[0]=unfold_abc[1][0];
1356   b[1]=unfold_abc[1][1];
1357   b[2]=unfold_abc[1][2];
1358   c[0]=unfold_abc[2][0];
1359   c[1]=unfold_abc[2][1];
1360   c[2]=unfold_abc[2][2];
1361   buildtabr4RN(a,b,c,unfold_origin,unfold_mapN2n);
1362   if (exitcode==1) {
1363     for (i=0; i<3; i++) free(unfold_abc[i]); free(unfold_abc);
1364     free(unfold_origin);
1365     free(unfold_mapN2n);
1366     for (i=0; i<unfold_Nkpoint+1; i++) free(unfold_kpoint[i]); free(unfold_kpoint);
1367     free(a);
1368     free(b);
1369     free(c);
1370     return;
1371   }
1372 
1373   if (myid==Host_ID && 1<level_stdout) {
1374     printf("Reference origin is set to (%f %f %f) (Bohr)\n",unfold_origin[0],unfold_origin[1],unfold_origin[2]);
1375     printf("Supercell_lattice_vector atom Reference_lattice_vector atom\n");
1376     for (i=0; i<NR; i++) for (j=0; j<atomnum; j++)
1377       printf("(%i %i %i) %i <==> (%i %i %i) %i\n",Rlist[i][0],Rlist[i][1],Rlist[i][2],j+1,tabr4RN[i][j][0],tabr4RN[i][j][1],tabr4RN[i][j][2],unfold_mapN2n[j]);
1378     printf("\n");fflush(stdout);
1379   }
1380 
1381   coe=Cell_Volume/volume(a,b,c);
1382   fracabc=(double**)malloc(sizeof(double*)*3);
1383   for (i=0; i<3; i++) fracabc[i]=(double*)malloc(sizeof(double)*3);
1384   abc_by_ABC(fracabc);
1385   determine_kpts(nkpoint,kpoint);
1386 
1387   MP = (int*)malloc(sizeof(int)*List_YOUSO[1]);
1388   order_GA = (int*)malloc(sizeof(int)*(List_YOUSO[1]+1));
1389 
1390   My_NZeros = (int*)malloc(sizeof(int)*numprocs);
1391   SP_NZeros = (int*)malloc(sizeof(int)*numprocs);
1392   SP_Atoms = (int*)malloc(sizeof(int)*numprocs);
1393 
1394   ko = (double*)malloc(sizeof(double)*n2);
1395   koS = (double*)malloc(sizeof(double)*(n+1));
1396 
1397   EIGEN = (double*)malloc(sizeof(double)*n2);
1398 
1399   H = (dcomplex**)malloc(sizeof(dcomplex*)*n2);
1400   for (j=0; j<n2; j++){
1401     H[j] = (dcomplex*)malloc(sizeof(dcomplex)*n2);
1402   }
1403 
1404   S = (dcomplex**)malloc(sizeof(dcomplex*)*n2);
1405   for (i=0; i<n2; i++){
1406     S[i] = (dcomplex*)malloc(sizeof(dcomplex)*n2);
1407   }
1408 
1409   M1 = (double*)malloc(sizeof(double)*n2);
1410 
1411   C = (dcomplex**)malloc(sizeof(dcomplex*)*n2);
1412   for (j=0; j<n2; j++){
1413     C[j] = (dcomplex*)malloc(sizeof(dcomplex)*n2);
1414   }
1415 
1416   Ctmp = (double**)malloc(sizeof(double*)*n2);
1417   for (j=0; j<n2; j++){
1418     Ctmp[j] = (double*)malloc(sizeof(double)*n2);
1419   }
1420 
1421   /*****************************************************
1422         allocation of arrays for parallelization
1423   *****************************************************/
1424 
1425   stat_send = malloc(sizeof(MPI_Status)*numprocs);
1426   request_send = malloc(sizeof(MPI_Request)*numprocs);
1427   request_recv = malloc(sizeof(MPI_Request)*numprocs);
1428 
1429   is1 = (int*)malloc(sizeof(int)*numprocs);
1430   ie1 = (int*)malloc(sizeof(int)*numprocs);
1431 
1432   is12 = (int*)malloc(sizeof(int)*numprocs);
1433   ie12 = (int*)malloc(sizeof(int)*numprocs);
1434 
1435   is2 = (int*)malloc(sizeof(int)*numprocs);
1436   ie2 = (int*)malloc(sizeof(int)*numprocs);
1437 
1438   if ( numprocs<=n ){
1439 
1440     av_num = (double)n/(double)numprocs;
1441 
1442     for (ID=0; ID<numprocs; ID++){
1443       is1[ID] = (int)(av_num*(double)ID) + 1;
1444       ie1[ID] = (int)(av_num*(double)(ID+1));
1445     }
1446 
1447     is1[0] = 1;
1448     ie1[numprocs-1] = n;
1449 
1450   }
1451 
1452   else{
1453 
1454     for (ID=0; ID<n; ID++){
1455       is1[ID] = ID + 1;
1456       ie1[ID] = ID + 1;
1457     }
1458     for (ID=n; ID<numprocs; ID++){
1459       is1[ID] =  1;
1460       ie1[ID] = -2;
1461     }
1462   }
1463 
1464   for (ID=0; ID<numprocs; ID++){
1465     is12[ID] = 2*is1[ID] - 1;
1466     ie12[ID] = 2*ie1[ID];
1467   }
1468 
1469   /* make is2 and ie2 */
1470 
1471   MaxN = 2*n;
1472 
1473   if ( numprocs<=MaxN ){
1474 
1475     av_num = (double)MaxN/(double)numprocs;
1476 
1477     for (ID=0; ID<numprocs; ID++){
1478       is2[ID] = (int)(av_num*(double)ID) + 1;
1479       ie2[ID] = (int)(av_num*(double)(ID+1));
1480     }
1481 
1482     is2[0] = 1;
1483     ie2[numprocs-1] = MaxN;
1484   }
1485 
1486   else{
1487     for (ID=0; ID<MaxN; ID++){
1488       is2[ID] = ID + 1;
1489       ie2[ID] = ID + 1;
1490     }
1491     for (ID=MaxN; ID<numprocs; ID++){
1492       is2[ID] =  1;
1493       ie2[ID] = -2;
1494     }
1495   }
1496 
1497   /* find size_H1 */
1498   size_H1 = Get_OneD_HS_Col(0, CntOLP, &tmp, MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
1499 
1500   /* allocation of arrays */
1501   S1  = (double*)malloc(sizeof(double)*size_H1);
1502   RH0 = (double*)malloc(sizeof(double)*size_H1);
1503   RH1 = (double*)malloc(sizeof(double)*size_H1);
1504   RH2 = (double*)malloc(sizeof(double)*size_H1);
1505   RH3 = (double*)malloc(sizeof(double)*size_H1);
1506   IH0 = (double*)malloc(sizeof(double)*size_H1);
1507   IH1 = (double*)malloc(sizeof(double)*size_H1);
1508   IH2 = (double*)malloc(sizeof(double)*size_H1);
1509 
1510   /* set S1, RH0, RH1, RH2, RH3, IH0, IH1, IH2 */
1511 
1512   size_H1 = Get_OneD_HS_Col(1, CntOLP, S1,  MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
1513   size_H1 = Get_OneD_HS_Col(1, nh[0],  RH0, MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
1514   size_H1 = Get_OneD_HS_Col(1, nh[1],  RH1, MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
1515   size_H1 = Get_OneD_HS_Col(1, nh[2],  RH2, MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
1516   size_H1 = Get_OneD_HS_Col(1, nh[3],  RH3, MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
1517 
1518   if (SO_switch==0 && Hub_U_switch==0 && Constraint_NCS_switch==0
1519       && Zeeman_NCS_switch==0 && Zeeman_NCO_switch==0){
1520 
1521     /* nothing is done. */
1522   }
1523   else {
1524     size_H1 = Get_OneD_HS_Col(1, ImNL[0], IH0, MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
1525     size_H1 = Get_OneD_HS_Col(1, ImNL[1], IH1, MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
1526     size_H1 = Get_OneD_HS_Col(1, ImNL[2], IH2, MP, order_GA, My_NZeros, SP_NZeros, SP_Atoms);
1527   }
1528 
1529   dtime(&SiloopTime);
1530 
1531   /*****************************************************
1532          Solve eigenvalue problem at each k-point
1533   *****************************************************/
1534 
1535   kj_e=(double*)malloc(sizeof(double)*2);
1536   iskj_e=(int*)malloc(sizeof(int)*2);
1537   K=(double*)malloc(sizeof(double)*3);
1538   K2=(double*)malloc(sizeof(double)*3);
1539   r=(double*)malloc(sizeof(double)*3);
1540   r0=(double*)malloc(sizeof(double)*3);
1541   weight=(dcomplex**)malloc(sizeof(dcomplex*)*atomnum);
1542   for (i=0; i<atomnum; i++) weight[i]=(dcomplex*)malloc(sizeof(dcomplex)*Norbperatom[i]);
1543   weight1=(dcomplex**)malloc(sizeof(dcomplex*)*atomnum);
1544   for (i=0; i<atomnum; i++) weight1[i]=(dcomplex*)malloc(sizeof(dcomplex)*Norbperatom[i]);
1545   kj_v=(dcomplex***)malloc(sizeof(dcomplex**)*2);
1546   for (i=0; i<2; i++) {
1547     kj_v[i]=(dcomplex**)malloc(sizeof(dcomplex*)*atomnum);
1548     for (j=0; j<atomnum; j++) kj_v[i][j]=(dcomplex*)malloc(sizeof(dcomplex)*Norbperatom[j]);
1549   }
1550   kj_v1=(dcomplex***)malloc(sizeof(dcomplex**)*2);
1551   for (i=0; i<2; i++) {
1552     kj_v1[i]=(dcomplex**)malloc(sizeof(dcomplex*)*atomnum);
1553     for (j=0; j<atomnum; j++) kj_v1[i][j]=(dcomplex*)malloc(sizeof(dcomplex)*Norbperatom[j]);
1554   }
1555   tmpelem=(dcomplex****)malloc(sizeof(dcomplex***)*atomnum);
1556   for (i=0; i<atomnum; i++) tmpelem[i]=(dcomplex***)malloc(sizeof(dcomplex**)*atomnum);
1557   for (i=0; i<atomnum; i++) for (j=0; j<atomnum; j++) {
1558     tmpelem[i][j]=(dcomplex**)malloc(sizeof(dcomplex*)*Norbperatom[i]);
1559     for (k=0; k<Norbperatom[i]; k++) {
1560       tmpelem[i][j][k]=(dcomplex*)malloc(sizeof(dcomplex)*Norbperatom[j]);
1561     }
1562   }
1563   tmpelem1=(dcomplex****)malloc(sizeof(dcomplex***)*atomnum);
1564   for (i=0; i<atomnum; i++) tmpelem1[i]=(dcomplex***)malloc(sizeof(dcomplex**)*atomnum);
1565   for (i=0; i<atomnum; i++) for (j=0; j<atomnum; j++) {
1566     tmpelem1[i][j]=(dcomplex**)malloc(sizeof(dcomplex*)*Norbperatom[i]);
1567     for (k=0; k<Norbperatom[i]; k++) {
1568       tmpelem1[i][j][k]=(dcomplex*)malloc(sizeof(dcomplex)*Norbperatom[j]);
1569     }
1570   }
1571 
1572   Name_Angular[0][0] = "s          ";
1573   Name_Angular[1][0] = "px         ";
1574   Name_Angular[1][1] = "py         ";
1575   Name_Angular[1][2] = "pz         ";
1576   Name_Angular[2][0] = "d3z^2-r^2  ";
1577   Name_Angular[2][1] = "dx^2-y^2   ";
1578   Name_Angular[2][2] = "dxy        ";
1579   Name_Angular[2][3] = "dxz        ";
1580   Name_Angular[2][4] = "dyz        ";
1581   Name_Angular[3][0] = "f5z^2-3r^2 ";
1582   Name_Angular[3][1] = "f5xz^2-xr^2";
1583   Name_Angular[3][2] = "f5yz^2-yr^2";
1584   Name_Angular[3][3] = "fzx^2-zy^2 ";
1585   Name_Angular[3][4] = "fxyz       ";
1586   Name_Angular[3][5] = "fx^3-3*xy^2";
1587   Name_Angular[3][6] = "f3yx^2-y^3 ";
1588   Name_Angular[4][0] = "g1         ";
1589   Name_Angular[4][1] = "g2         ";
1590   Name_Angular[4][2] = "g3         ";
1591   Name_Angular[4][3] = "g4         ";
1592   Name_Angular[4][4] = "g5         ";
1593   Name_Angular[4][5] = "g6         ";
1594   Name_Angular[4][6] = "g7         ";
1595   Name_Angular[4][7] = "g8         ";
1596   Name_Angular[4][8] = "g9         ";
1597 
1598   Name_Multiple[0] = "0";
1599   Name_Multiple[1] = "1";
1600   Name_Multiple[2] = "2";
1601   Name_Multiple[3] = "3";
1602   Name_Multiple[4] = "4";
1603   Name_Multiple[5] = "5";
1604 
1605   if (myid==Host_ID){
1606     strcpy(file_EV,".EV");
1607     fnjoint(filepath,filename,file_EV);
1608     if ((fp_EV = fopen(file_EV,"a")) != NULL){
1609       fprintf(fp_EV,"\n");
1610       fprintf(fp_EV,"***********************************************************\n");
1611       fprintf(fp_EV,"***********************************************************\n");
1612       fprintf(fp_EV,"          Unfolding calculation for band structure         \n");
1613       fprintf(fp_EV,"***********************************************************\n");
1614       fprintf(fp_EV,"***********************************************************\n");
1615       fprintf(fp_EV,"                                                                          \n");
1616       fprintf(fp_EV," Origin of the Reference cell is set to (%f %f %f) (Bohr).\n\n",
1617               unfold_origin[0],unfold_origin[1],unfold_origin[2]);
1618       fprintf(fp_EV," Unfolded weights at specified k points are stored in System.Name.unfold_totup(dn).\n");
1619       fprintf(fp_EV," Individual orbital weights are stored in System.Name.unfold_orbup(dn).\n");
1620       fprintf(fp_EV," The format is: k_dis(Bohr^{-1})  energy(eV)  weight.\n\n");
1621       fprintf(fp_EV," The sequence for the orbital weights in System.Name.unfold_orbup(dn) is given below.\n\n");
1622 
1623       i1 = 1;
1624 
1625       for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){
1626 	wan1 = WhatSpecies[Gc_AN];
1627 	for (l=0; l<=Supported_MaxL; l++){
1628 	  for (mul=0; mul<Spe_Num_CBasis[wan1][l]; mul++){
1629 	    for (m=0; m<(2*l+1); m++){
1630 	      fprintf(fp_EV,"  %4d ",i1);
1631 	      if (l==0 && mul==0 && m==0)
1632 		fprintf(fp_EV,"%4d %3s %s %s",
1633 			Gc_AN,SpeName[wan1],Name_Multiple[mul],Name_Angular[l][m]);
1634 	      else
1635 		fprintf(fp_EV,"         %s %s",
1636 			Name_Multiple[mul],Name_Angular[l][m]);
1637 	      fprintf(fp_EV,"\n");
1638 	      i1++;
1639 	    }
1640 	  }
1641 	}
1642       }
1643 
1644       fprintf(fp_EV,"\n");
1645       fprintf(fp_EV,"\n  The total number of calculated k points is %i.\n",totnkpts);
1646       fprintf(fp_EV,"  The number of calculated k points on each path is \n");
1647 
1648       fprintf(fp_EV,"  For each path: (");
1649       for (i=0; i<nkpoint; i++){
1650         fprintf(fp_EV," %i",np[i]);
1651       }
1652       fprintf(fp_EV," )\n\n");
1653 
1654       fprintf(fp_EV,"                 ka         kb         kc\n");
1655 
1656       kloop = 0;
1657       for (i=0; i<nkpoint; i++){
1658 	for (j=0; j<np[i]; j++) {
1659 
1660           kloop++;
1661 
1662 	  if (np[i]==1) {
1663 	    fprintf(fp_EV,"  %3d/%3d   %10.6f %10.6f %10.6f\n",kloop,totnkpts,kpoint[i][1],kpoint[i][2],kpoint[i][3]);
1664 	  }
1665           else {
1666 	    fprintf(fp_EV,"  %3d/%3d   %10.6f %10.6f %10.6f\n",
1667                     kloop,totnkpts,
1668                     kpoint[i][1]+j*(kpoint[i+1][1]-kpoint[i][1])/np[i],
1669 		    kpoint[i][2]+j*(kpoint[i+1][2]-kpoint[i][2])/np[i],
1670 		    kpoint[i][3]+j*(kpoint[i+1][3]-kpoint[i][3])/np[i]);
1671 	  }
1672 
1673 	}
1674       }
1675       fprintf(fp_EV,"\n");
1676       fclose(fp_EV);
1677     }
1678     else{
1679       printf("Failure of saving the EV file.\n");
1680       fclose(fp_EV);
1681     }
1682 
1683     strcpy(file_EV,".unfold_tot");
1684     fnjoint(filepath,filename,file_EV);
1685     fp_EV = fopen(file_EV,"w");
1686     if (fp_EV == NULL) {
1687       printf("Failure of saving the System.Name.unfold_totup file.\n");
1688       fclose(fp_EV);
1689     }
1690     strcpy(file_EV,".unfold_orb");
1691     fnjoint(filepath,filename,file_EV);
1692     fp_EV1 = fopen(file_EV,"w");
1693     if (fp_EV1 == NULL) {
1694       printf("Failure of saving the System.Name.unfold_orbup file.\n");
1695       fclose(fp_EV1);
1696     }
1697   }
1698 
1699   int kloopi,kloopj;
1700   double kpt0,kpt1,kpt2;
1701 
1702   /* for gnuplot example */
1703   if (myid==Host_ID){
1704 
1705     strcpy(file_EV,".unfold_plotexample");
1706     fnjoint(filepath,filename,file_EV);
1707     fp_EV0 = fopen(file_EV,"w");
1708     if (fp_EV0 == NULL) {
1709       printf("Failure of saving the System.Name.unfold_plotexample file.\n");
1710       fclose(fp_EV0);
1711     }
1712     fprintf(fp_EV0,"set yrange [%f:%f]\n",unfold_lbound*eV2Hartree,unfold_ubound*eV2Hartree);
1713     fprintf(fp_EV0,"set ylabel 'Energy (eV)'\n");
1714     fprintf(fp_EV0,"set xtics(");
1715     pk1=coe*kpoint[0][1]*(fracabc[1][1]*fracabc[2][2]-fracabc[1][2]*fracabc[2][1])
1716       -coe*kpoint[0][2]*(fracabc[0][1]*fracabc[2][2]-fracabc[2][1]*fracabc[0][2])
1717       +coe*kpoint[0][3]*(fracabc[0][1]*fracabc[1][2]-fracabc[1][1]*fracabc[0][2]);
1718     pk2=-coe*kpoint[0][1]*(fracabc[1][0]*fracabc[2][2]-fracabc[2][0]*fracabc[1][2])
1719       +coe*kpoint[0][2]*(fracabc[0][0]*fracabc[2][2]-fracabc[2][0]*fracabc[0][2])
1720       -coe*kpoint[0][3]*(fracabc[0][0]*fracabc[1][2]-fracabc[1][0]*fracabc[0][2]);
1721     pk3=coe*kpoint[0][1]*(fracabc[1][0]*fracabc[2][1]-fracabc[1][1]*fracabc[2][0])
1722       -coe*kpoint[0][2]*(fracabc[0][0]*fracabc[2][1]-fracabc[2][0]*fracabc[0][1])
1723       +coe*kpoint[0][3]*(fracabc[0][0]*fracabc[1][1]-fracabc[1][0]*fracabc[0][1]);
1724     kdis=0.;
1725 
1726     for (kloopi=0; kloopi<nkpoint; kloopi++) {
1727 
1728       kpt0=kpoint[kloopi][1];
1729       kpt1=kpoint[kloopi][2];
1730       kpt2=kpoint[kloopi][3];
1731 
1732       k1= coe*kpt0*(fracabc[1][1]*fracabc[2][2]-fracabc[1][2]*fracabc[2][1])
1733         -coe*kpt1*(fracabc[0][1]*fracabc[2][2]-fracabc[2][1]*fracabc[0][2])
1734         +coe*kpt2*(fracabc[0][1]*fracabc[1][2]-fracabc[1][1]*fracabc[0][2]);
1735       k2=-coe*kpt0*(fracabc[1][0]*fracabc[2][2]-fracabc[2][0]*fracabc[1][2])
1736         +coe*kpt1*(fracabc[0][0]*fracabc[2][2]-fracabc[2][0]*fracabc[0][2])
1737         -coe*kpt2*(fracabc[0][0]*fracabc[1][2]-fracabc[1][0]*fracabc[0][2]);
1738       k3= coe*kpt0*(fracabc[1][0]*fracabc[2][1]-fracabc[1][1]*fracabc[2][0])
1739         -coe*kpt1*(fracabc[0][0]*fracabc[2][1]-fracabc[2][0]*fracabc[0][1])
1740         +coe*kpt2*(fracabc[0][0]*fracabc[1][1]-fracabc[1][0]*fracabc[0][1]);
1741       K[0]=k1*rtv[1][1]+k2*rtv[2][1]+k3*rtv[3][1];
1742       K[1]=k1*rtv[1][2]+k2*rtv[2][2]+k3*rtv[3][2];
1743       K[2]=k1*rtv[1][3]+k2*rtv[2][3]+k3*rtv[3][3];
1744       K2[0]=pk1*rtv[1][1]+pk2*rtv[2][1]+pk3*rtv[3][1];
1745       K2[1]=pk1*rtv[1][2]+pk2*rtv[2][2]+pk3*rtv[3][2];
1746       K2[2]=pk1*rtv[1][3]+pk2*rtv[2][3]+pk3*rtv[3][3];
1747       dis2pk=distwovec(K,K2);
1748       kdis+=dis2pk;
1749 
1750       if (kloopi==(nkpoint-1))
1751         fprintf(fp_EV0,"'%s' %f)\n",unfold_kpoint_name[kloopi],kdis);
1752       else
1753         fprintf(fp_EV0,"'%s' %f,",unfold_kpoint_name[kloopi],kdis);
1754 
1755       pk1=k1;
1756       pk2=k2;
1757       pk3=k3;
1758     }
1759 
1760     pk1=coe*kpoint[0][1]*(fracabc[1][1]*fracabc[2][2]-fracabc[1][2]*fracabc[2][1])
1761       -coe*kpoint[0][2]*(fracabc[0][1]*fracabc[2][2]-fracabc[2][1]*fracabc[0][2])
1762       +coe*kpoint[0][3]*(fracabc[0][1]*fracabc[1][2]-fracabc[1][1]*fracabc[0][2]);
1763     pk2=-coe*kpoint[0][1]*(fracabc[1][0]*fracabc[2][2]-fracabc[2][0]*fracabc[1][2])
1764       +coe*kpoint[0][2]*(fracabc[0][0]*fracabc[2][2]-fracabc[2][0]*fracabc[0][2])
1765       -coe*kpoint[0][3]*(fracabc[0][0]*fracabc[1][2]-fracabc[1][0]*fracabc[0][2]);
1766     pk3=coe*kpoint[0][1]*(fracabc[1][0]*fracabc[2][1]-fracabc[1][1]*fracabc[2][0])
1767       -coe*kpoint[0][2]*(fracabc[0][0]*fracabc[2][1]-fracabc[2][0]*fracabc[0][1])
1768       +coe*kpoint[0][3]*(fracabc[0][0]*fracabc[1][1]-fracabc[1][0]*fracabc[0][1]);
1769     fprintf(fp_EV0,"set xrange [0:%f]\n",kdis);
1770     fprintf(fp_EV0,"set arrow nohead from 0,0 to %f,0\n",kdis);
1771     kdis=0.;
1772     for (kloopi=1; kloopi<nkpoint-1; kloopi++) {
1773       fprintf(fp_EV0,"set arrow nohead from ");
1774       kpt0=kpoint[kloopi][1];
1775       kpt1=kpoint[kloopi][2];
1776       kpt2=kpoint[kloopi][3];
1777       k1= coe*kpt0*(fracabc[1][1]*fracabc[2][2]-fracabc[1][2]*fracabc[2][1])
1778         -coe*kpt1*(fracabc[0][1]*fracabc[2][2]-fracabc[2][1]*fracabc[0][2])
1779         +coe*kpt2*(fracabc[0][1]*fracabc[1][2]-fracabc[1][1]*fracabc[0][2]);
1780       k2=-coe*kpt0*(fracabc[1][0]*fracabc[2][2]-fracabc[2][0]*fracabc[1][2])
1781         +coe*kpt1*(fracabc[0][0]*fracabc[2][2]-fracabc[2][0]*fracabc[0][2])
1782         -coe*kpt2*(fracabc[0][0]*fracabc[1][2]-fracabc[1][0]*fracabc[0][2]);
1783       k3= coe*kpt0*(fracabc[1][0]*fracabc[2][1]-fracabc[1][1]*fracabc[2][0])
1784         -coe*kpt1*(fracabc[0][0]*fracabc[2][1]-fracabc[2][0]*fracabc[0][1])
1785         +coe*kpt2*(fracabc[0][0]*fracabc[1][1]-fracabc[1][0]*fracabc[0][1]);
1786       K[0]=k1*rtv[1][1]+k2*rtv[2][1]+k3*rtv[3][1];
1787       K[1]=k1*rtv[1][2]+k2*rtv[2][2]+k3*rtv[3][2];
1788       K[2]=k1*rtv[1][3]+k2*rtv[2][3]+k3*rtv[3][3];
1789       K2[0]=pk1*rtv[1][1]+pk2*rtv[2][1]+pk3*rtv[3][1];
1790       K2[1]=pk1*rtv[1][2]+pk2*rtv[2][2]+pk3*rtv[3][2];
1791       K2[2]=pk1*rtv[1][3]+pk2*rtv[2][3]+pk3*rtv[3][3];
1792       dis2pk=distwovec(K,K2);
1793       kdis+=dis2pk;
1794       fprintf(fp_EV0,"%f,%f to %f,%f\n",kdis,unfold_lbound*eV2Hartree,kdis,unfold_ubound*eV2Hartree);
1795       pk1=k1;
1796       pk2=k2;
1797       pk3=k3;
1798     }
1799     fprintf(fp_EV0,"set style circle radius 0\n");
1800     fprintf(fp_EV0,"plot '%s.unfold_tot' using 1:2:($3)*0.05 notitle with circles lc rgb 'red'\n",filename);
1801   }
1802   /* end gnuplot example */
1803 
1804   pk1=coe*kpoint[0][1]*(fracabc[1][1]*fracabc[2][2]-fracabc[1][2]*fracabc[2][1])
1805      -coe*kpoint[0][2]*(fracabc[0][1]*fracabc[2][2]-fracabc[2][1]*fracabc[0][2])
1806      +coe*kpoint[0][3]*(fracabc[0][1]*fracabc[1][2]-fracabc[1][1]*fracabc[0][2]);
1807   pk2=-coe*kpoint[0][1]*(fracabc[1][0]*fracabc[2][2]-fracabc[2][0]*fracabc[1][2])
1808      +coe*kpoint[0][2]*(fracabc[0][0]*fracabc[2][2]-fracabc[2][0]*fracabc[0][2])
1809      -coe*kpoint[0][3]*(fracabc[0][0]*fracabc[1][2]-fracabc[1][0]*fracabc[0][2]);
1810   pk3=coe*kpoint[0][1]*(fracabc[1][0]*fracabc[2][1]-fracabc[1][1]*fracabc[2][0])
1811      -coe*kpoint[0][2]*(fracabc[0][0]*fracabc[2][1]-fracabc[2][0]*fracabc[0][1])
1812      +coe*kpoint[0][3]*(fracabc[0][0]*fracabc[1][1]-fracabc[1][0]*fracabc[0][1]);
1813 
1814   /* for standard output */
1815 
1816   if (myid==Host_ID && 0<level_stdout) {
1817     printf(" The number of selected k points is %i.\n",totnkpts);
1818 
1819     printf(" For each path: (");
1820     for (i=0; i<nkpoint; i++){
1821       printf(" %i",np[i]);
1822     }
1823     printf(" )\n\n");
1824     printf("                 ka         kb         kc\n");
1825   }
1826 
1827   /*********************************************
1828                       kloopi
1829   *********************************************/
1830 
1831   kloop = 0;
1832   kdis=0.;
1833   for (kloopi=0; kloopi<nkpoint; kloopi++)
1834     for (kloopj=0; kloopj<np[kloopi]; kloopj++) {
1835 
1836       kloop++;
1837 
1838       if (np[kloopi]==1) {
1839 	kpt0=kpoint[kloopi][1];
1840 	kpt1=kpoint[kloopi][2];
1841 	kpt2=kpoint[kloopi][3];
1842       } else {
1843 	kpt0=kpoint[kloopi][1]+kloopj*(kpoint[kloopi+1][1]-kpoint[kloopi][1])/np[kloopi];
1844 	kpt1=kpoint[kloopi][2]+kloopj*(kpoint[kloopi+1][2]-kpoint[kloopi][2])/np[kloopi];
1845 	kpt2=kpoint[kloopi][3]+kloopj*(kpoint[kloopi+1][3]-kpoint[kloopi][3])/np[kloopi];
1846       }
1847 
1848       /* for standard output */
1849 
1850       if (myid==Host_ID && 0<level_stdout) {
1851 
1852         if (kloop==totnkpts)
1853           printf("  %3d/%3d   %10.6f %10.6f %10.6f\n\n",kloop,totnkpts,kpt0,kpt1,kpt2);
1854         else
1855           printf("  %3d/%3d   %10.6f %10.6f %10.6f\n",kloop,totnkpts,kpt0,kpt1,kpt2);
1856       }
1857 
1858       k1= coe*kpt0*(fracabc[1][1]*fracabc[2][2]-fracabc[1][2]*fracabc[2][1])
1859          -coe*kpt1*(fracabc[0][1]*fracabc[2][2]-fracabc[2][1]*fracabc[0][2])
1860          +coe*kpt2*(fracabc[0][1]*fracabc[1][2]-fracabc[1][1]*fracabc[0][2]);
1861       k2=-coe*kpt0*(fracabc[1][0]*fracabc[2][2]-fracabc[2][0]*fracabc[1][2])
1862          +coe*kpt1*(fracabc[0][0]*fracabc[2][2]-fracabc[2][0]*fracabc[0][2])
1863          -coe*kpt2*(fracabc[0][0]*fracabc[1][2]-fracabc[1][0]*fracabc[0][2]);
1864       k3= coe*kpt0*(fracabc[1][0]*fracabc[2][1]-fracabc[1][1]*fracabc[2][0])
1865          -coe*kpt1*(fracabc[0][0]*fracabc[2][1]-fracabc[2][0]*fracabc[0][1])
1866          +coe*kpt2*(fracabc[0][0]*fracabc[1][1]-fracabc[1][0]*fracabc[0][1]);
1867 
1868       /* make S and H */
1869 
1870       for (i=1; i<=n; i++){
1871 	for (j=1; j<=n; j++){
1872 	  S[i][j] = Complex(0.0,0.0);
1873 	}
1874       }
1875 
1876       for (i=1; i<=2*n; i++){
1877 	for (j=1; j<=2*n; j++){
1878 	  H[i][j] = Complex(0.0,0.0);
1879 	}
1880       }
1881 
1882       /* non-spin-orbit coupling and non-LDA+U */
1883       if (SO_switch==0 && Hub_U_switch==0 && Constraint_NCS_switch==0
1884 	  && Zeeman_NCS_switch==0 && Zeeman_NCO_switch==0){
1885 
1886 	k = 0;
1887 	for (AN=1; AN<=atomnum; AN++){
1888 	  GA_AN = order_GA[AN];
1889 	  wanA = WhatSpecies[GA_AN];
1890 	  tnoA = Spe_Total_CNO[wanA];
1891 	  Anum = MP[GA_AN];
1892 
1893 	  for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
1894 	    GB_AN = natn[GA_AN][LB_AN];
1895 	    Rn = ncn[GA_AN][LB_AN];
1896 	    wanB = WhatSpecies[GB_AN];
1897 	    tnoB = Spe_Total_CNO[wanB];
1898 	    Bnum = MP[GB_AN];
1899 
1900 	    l1 = atv_ijk[Rn][1];
1901 	    l2 = atv_ijk[Rn][2];
1902 	    l3 = atv_ijk[Rn][3];
1903 	    kRn = k1*(double)l1 + k2*(double)l2 + k3*(double)l3;
1904 
1905 	    si = sin(2.0*PI*kRn);
1906 	    co = cos(2.0*PI*kRn);
1907 
1908 	    for (i=0; i<tnoA; i++){
1909 	      for (j=0; j<tnoB; j++){
1910 
1911 		H[Anum+i  ][Bnum+j  ].r += co*RH0[k];
1912 		H[Anum+i  ][Bnum+j  ].i += si*RH0[k];
1913 
1914 		H[Anum+i+n][Bnum+j+n].r += co*RH1[k];
1915 		H[Anum+i+n][Bnum+j+n].i += si*RH1[k];
1916 
1917 		H[Anum+i  ][Bnum+j+n].r += co*RH2[k] - si*RH3[k];
1918 		H[Anum+i  ][Bnum+j+n].i += si*RH2[k] + co*RH3[k];
1919 
1920 		S[Anum+i  ][Bnum+j  ].r += co*S1[k];
1921 		S[Anum+i  ][Bnum+j  ].i += si*S1[k];
1922 
1923 		k++;
1924 	      }
1925 	    }
1926 	  }
1927 	}
1928       }
1929 
1930       /* spin-orbit coupling or LDA+U */
1931       else {
1932 
1933 	k = 0;
1934 	for (AN=1; AN<=atomnum; AN++){
1935 	  GA_AN = order_GA[AN];
1936 	  wanA = WhatSpecies[GA_AN];
1937 	  tnoA = Spe_Total_CNO[wanA];
1938 	  Anum = MP[GA_AN];
1939 
1940 	  for (LB_AN=0; LB_AN<=FNAN[GA_AN]; LB_AN++){
1941 	    GB_AN = natn[GA_AN][LB_AN];
1942 	    Rn = ncn[GA_AN][LB_AN];
1943 	    wanB = WhatSpecies[GB_AN];
1944 	    tnoB = Spe_Total_CNO[wanB];
1945 	    Bnum = MP[GB_AN];
1946 
1947 	    l1 = atv_ijk[Rn][1];
1948 	    l2 = atv_ijk[Rn][2];
1949 	    l3 = atv_ijk[Rn][3];
1950 	    kRn = k1*(double)l1 + k2*(double)l2 + k3*(double)l3;
1951 
1952 	    si = sin(2.0*PI*kRn);
1953 	    co = cos(2.0*PI*kRn);
1954 
1955 	    for (i=0; i<tnoA; i++){
1956 	      for (j=0; j<tnoB; j++){
1957 
1958 		H[Anum+i  ][Bnum+j  ].r += co*RH0[k] - si*IH0[k];
1959 		H[Anum+i  ][Bnum+j  ].i += si*RH0[k] + co*IH0[k];
1960 
1961 		H[Anum+i+n][Bnum+j+n].r += co*RH1[k] - si*IH1[k];
1962 		H[Anum+i+n][Bnum+j+n].i += si*RH1[k] + co*IH1[k];
1963 
1964 		H[Anum+i  ][Bnum+j+n].r += co*RH2[k] - si*(RH3[k]+IH2[k]);
1965 		H[Anum+i  ][Bnum+j+n].i += si*RH2[k] + co*(RH3[k]+IH2[k]);
1966 
1967 		S[Anum+i  ][Bnum+j  ].r += co*S1[k];
1968 		S[Anum+i  ][Bnum+j  ].i += si*S1[k];
1969 
1970 		k++;
1971 	      }
1972 	    }
1973 	  }
1974 	}
1975       }
1976 
1977       /* set off-diagonal part */
1978 
1979       for (i=1; i<=n; i++){
1980 	for (j=1; j<=n; j++){
1981 	  H[j+n][i].r = H[i][j+n].r;
1982 	  H[j+n][i].i =-H[i][j+n].i;
1983 	}
1984       }
1985 
1986       /* diagonalization of S */
1987       Eigen_PHH(mpi_comm_level1,S,koS,n,n,1);
1988 
1989       /* minus eigenvalues to 1.0e-10 */
1990 
1991       for (l=1; l<=n; l++){
1992 	if (koS[l]<1.0e-10) koS[l] = 1.0e-10;
1993       }
1994 
1995       /* calculate S*1/sqrt(koS) */
1996 
1997       for (l=1; l<=n; l++) koS[l] = 1.0/sqrt(koS[l]);
1998 
1999       /* S * 1.0/sqrt(koS[l]) */
2000 
2001 #pragma omp parallel for shared(n,S,koS) private(i1,j1)
2002 
2003       for (i1=1; i1<=n; i1++){
2004 	for (j1=1; j1<=n; j1++){
2005 	  S[i1][j1].r = S[i1][j1].r*koS[j1];
2006 	  S[i1][j1].i = S[i1][j1].i*koS[j1];
2007 	}
2008       }
2009 
2010       /****************************************************
2011                   set H' and diagonalize it
2012       ****************************************************/
2013 
2014       /* U'^+ * H * U * M1 */
2015 
2016       /* transpose S */
2017 
2018       for (i1=1; i1<=n; i1++){
2019 	for (j1=i1+1; j1<=n; j1++){
2020 	  Ctmp1 = S[i1][j1];
2021 	  Ctmp2 = S[j1][i1];
2022 	  S[i1][j1] = Ctmp2;
2023 	  S[j1][i1] = Ctmp1;
2024 	}
2025       }
2026 
2027       /* H * U' */
2028       /* C is distributed by row in each processor */
2029 
2030 #pragma omp parallel shared(C,S,H,n,is1,ie1,myid) private(OMPID,Nthrds,Nprocs,i1,j1,l)
2031       {
2032 
2033 	/* get info. on OpenMP */
2034 
2035 	OMPID = omp_get_thread_num();
2036 	Nthrds = omp_get_num_threads();
2037 	Nprocs = omp_get_num_procs();
2038 
2039 	for (i1=1+OMPID; i1<=2*n; i1+=Nthrds){
2040 	  for (j1=is1[myid]; j1<=ie1[myid]; j1++){
2041 
2042 	    double sum_r0 = 0.0;
2043 	    double sum_i0 = 0.0;
2044 
2045 	    double sum_r1 = 0.0;
2046 	    double sum_i1 = 0.0;
2047 
2048 	    for (l=1; l<=n; l++){
2049 	      sum_r0 += H[i1][l  ].r*S[j1][l].r - H[i1][l  ].i*S[j1][l].i;
2050 	      sum_i0 += H[i1][l  ].r*S[j1][l].i + H[i1][l  ].i*S[j1][l].r;
2051 
2052 	      sum_r1 += H[i1][n+l].r*S[j1][l].r - H[i1][n+l].i*S[j1][l].i;
2053 	      sum_i1 += H[i1][n+l].r*S[j1][l].i + H[i1][n+l].i*S[j1][l].r;
2054 	    }
2055 
2056 	    C[2*j1-1][i1].r = sum_r0;
2057 	    C[2*j1-1][i1].i = sum_i0;
2058 
2059 	    C[2*j1  ][i1].r = sum_r1;
2060 	    C[2*j1  ][i1].i = sum_i1;
2061 	  }
2062 	}
2063 
2064       } /* #pragma omp parallel */
2065 
2066       /* U'^+ H * U' */
2067       /* H is distributed by row in each processor */
2068 
2069 #pragma omp parallel shared(C,S,H,n,is1,ie1,myid) private(OMPID,Nthrds,Nprocs,i1,j1,l,jj1,jj2)
2070       {
2071 
2072 	/* get info. on OpenMP */
2073 
2074 	OMPID = omp_get_thread_num();
2075 	Nthrds = omp_get_num_threads();
2076 	Nprocs = omp_get_num_procs();
2077 
2078 	for (j1=is1[myid]+OMPID; j1<=ie1[myid]; j1+=Nthrds){
2079 	  for (i1=1; i1<=n; i1++){
2080 
2081 	    double sum_r00 = 0.0;
2082 	    double sum_i00 = 0.0;
2083 
2084 	    double sum_r01 = 0.0;
2085 	    double sum_i01 = 0.0;
2086 
2087 	    double sum_r10 = 0.0;
2088 	    double sum_i10 = 0.0;
2089 
2090 	    double sum_r11 = 0.0;
2091 	    double sum_i11 = 0.0;
2092 
2093 	    jj1 = 2*j1 - 1;
2094 	    jj2 = 2*j1;
2095 
2096 	    for (l=1; l<=n; l++){
2097 
2098 	      sum_r00 += S[i1][l].r*C[jj1][l  ].r + S[i1][l].i*C[jj1][l  ].i;
2099 	      sum_i00 += S[i1][l].r*C[jj1][l  ].i - S[i1][l].i*C[jj1][l  ].r;
2100 
2101 	      sum_r01 += S[i1][l].r*C[jj1][l+n].r + S[i1][l].i*C[jj1][l+n].i;
2102 	      sum_i01 += S[i1][l].r*C[jj1][l+n].i - S[i1][l].i*C[jj1][l+n].r;
2103 
2104 	      sum_r10 += S[i1][l].r*C[jj2][l  ].r + S[i1][l].i*C[jj2][l  ].i;
2105 	      sum_i10 += S[i1][l].r*C[jj2][l  ].i - S[i1][l].i*C[jj2][l  ].r;
2106 
2107 	      sum_r11 += S[i1][l].r*C[jj2][l+n].r + S[i1][l].i*C[jj2][l+n].i;
2108 	      sum_i11 += S[i1][l].r*C[jj2][l+n].i - S[i1][l].i*C[jj2][l+n].r;
2109 	    }
2110 
2111 	    H[jj1][2*i1-1].r = sum_r00;
2112 	    H[jj1][2*i1-1].i = sum_i00;
2113 
2114 	    H[jj1][2*i1  ].r = sum_r01;
2115 	    H[jj1][2*i1  ].i = sum_i01;
2116 
2117 	    H[jj2][2*i1-1].r = sum_r10;
2118 	    H[jj2][2*i1-1].i = sum_i10;
2119 
2120 	    H[jj2][2*i1  ].r = sum_r11;
2121 	    H[jj2][2*i1  ].i = sum_i11;
2122 
2123 	  }
2124 	}
2125 
2126       } /* #pragma omp parallel */
2127 
2128       /* broadcast H */
2129 
2130       BroadCast_ComplexMatrix(mpi_comm_level1,H,2*n,is12,ie12,myid,numprocs,
2131 			      stat_send,request_send,request_recv);
2132 
2133       /* H to C (transposition) */
2134 
2135 #pragma omp parallel for shared(n,C,H)
2136 
2137       for (i1=1; i1<=2*n; i1++){
2138 	for (j1=1; j1<=2*n; j1++){
2139 	  C[j1][i1].r = H[i1][j1].r;
2140 	  C[j1][i1].i = H[i1][j1].i;
2141 	}
2142       }
2143 
2144       /* solve the standard eigenvalue problem */
2145       /*  The output C matrix is distributed by column. */
2146 
2147       Eigen_PHH(mpi_comm_level1,C,ko,2*n,MaxN,0);
2148 
2149       for (i1=1; i1<=MaxN; i1++){
2150 	EIGEN[i1] = ko[i1];
2151       }
2152 
2153       /* calculation of wave functions */
2154 
2155       /*  The H matrix is distributed by row */
2156 
2157       for (i1=1; i1<=2*n; i1++){
2158 	for (j1=is2[myid]; j1<=ie2[myid]; j1++){
2159 	  H[j1][i1] = C[i1][j1];
2160 	}
2161       }
2162 
2163       /* transpose */
2164 
2165       for (i1=1; i1<=n; i1++){
2166 	for (j1=i1+1; j1<=n; j1++){
2167 	  Ctmp1 = S[i1][j1];
2168 	  Ctmp2 = S[j1][i1];
2169 	  S[i1][j1] = Ctmp2;
2170 	  S[j1][i1] = Ctmp1;
2171 	}
2172       }
2173 
2174       /* C is distributed by row in each processor */
2175 
2176 #pragma omp parallel shared(C,S,H,n,is2,ie2,myid) private(OMPID,Nthrds,Nprocs,i1,j1,l,l1)
2177       {
2178 
2179 	/* get info. on OpenMP */
2180 
2181 	OMPID = omp_get_thread_num();
2182 	Nthrds = omp_get_num_threads();
2183 	Nprocs = omp_get_num_procs();
2184 
2185 	for (j1=is2[myid]+OMPID; j1<=ie2[myid]; j1+=Nthrds){
2186 	  for (i1=1; i1<=n; i1++){
2187 
2188 	    double sum_r0 = 0.0;
2189 	    double sum_i0 = 0.0;
2190 
2191 	    double sum_r1 = 0.0;
2192 	    double sum_i1 = 0.0;
2193 
2194 	    l1 = 0;
2195 
2196 	    for (l=1; l<=n; l++){
2197 
2198 	      l1++;
2199 
2200 	      sum_r0 +=  S[i1][l].r*H[j1][l1].r - S[i1][l].i*H[j1][l1].i;
2201 	      sum_i0 +=  S[i1][l].r*H[j1][l1].i + S[i1][l].i*H[j1][l1].r;
2202 
2203 	      l1++;
2204 
2205 	      sum_r1 +=  S[i1][l].r*H[j1][l1].r - S[i1][l].i*H[j1][l1].i;
2206 	      sum_i1 +=  S[i1][l].r*H[j1][l1].i + S[i1][l].i*H[j1][l1].r;
2207 	    }
2208 
2209 	    C[j1][i1  ].r = sum_r0;
2210 	    C[j1][i1  ].i = sum_i0;
2211 
2212 	    C[j1][i1+n].r = sum_r1;
2213 	    C[j1][i1+n].i = sum_i1;
2214 
2215 	  }
2216 	}
2217 
2218       } /* #pragma omp parallel */
2219 
2220       /* broadcast C: C is distributed by row in each processor */
2221 
2222       BroadCast_ComplexMatrix(mpi_comm_level1,C,2*n,is2,ie2,myid,numprocs,
2223 			      stat_send,request_send,request_recv);
2224 
2225       /* C to H (transposition)
2226 	 H consists of column vectors
2227       */
2228 
2229       for (i1=1; i1<=MaxN; i1++){
2230 	for (j1=1; j1<=2*n; j1++){
2231 	  H[j1][i1] = C[i1][j1];
2232 	}
2233       }
2234 
2235       /****************************************************
2236                         Output
2237       ****************************************************/
2238 
2239       if (myid==Host_ID){
2240 
2241 #ifdef xt3
2242 	setvbuf(fp_EV,buf,_IOFBF,fp_bsize);  /* setvbuf */
2243 #endif
2244 
2245 	K[0]=k1*rtv[1][1]+k2*rtv[2][1]+k3*rtv[3][1];
2246 	K[1]=k1*rtv[1][2]+k2*rtv[2][2]+k3*rtv[3][2];
2247 	K[2]=k1*rtv[1][3]+k2*rtv[2][3]+k3*rtv[3][3];
2248 	K2[0]=pk1*rtv[1][1]+pk2*rtv[2][1]+pk3*rtv[3][1];
2249 	K2[1]=pk1*rtv[1][2]+pk2*rtv[2][2]+pk3*rtv[3][2];
2250 	K2[2]=pk1*rtv[1][3]+pk2*rtv[2][3]+pk3*rtv[3][3];
2251 	dis2pk=distwovec(K,K2);
2252 	kdis+=dis2pk;
2253 
2254 	num0 = 2;
2255 	num1 = 2*n/num0 + 1*((2*n)%num0!=0);
2256 
2257 	for (i=1; i<=num1; i++){
2258 
2259           countkj_e=0;
2260           for (j=0; j<2; j++) iskj_e[j]=0;
2261 
2262 	  for (i1=-2; i1<=0; i1++){
2263 
2264 	    for (j=1; j<=num0; j++){
2265 
2266 	      j1 = num0*(i-1) + j;
2267 
2268 	      if (j1<=2*n){
2269 
2270 		if (i1==-1){
2271 
2272                   if (((EIGEN[j1]-ChemP)<=unfold_ubound)&&((EIGEN[j1]-ChemP)>=unfold_lbound)) {
2273                     kj_e[countkj_e]=(EIGEN[j1]-ChemP);
2274                     iskj_e[countkj_e]=1; }
2275                   countkj_e++;
2276 
2277 		}
2278 	      }
2279 	    }
2280 	  }
2281 
2282 	  i1 = 1;
2283           int iorb;
2284 
2285 	  for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){
2286             iorb=0;
2287 
2288 	    wan1 = WhatSpecies[Gc_AN];
2289 
2290 	    for (l=0; l<=Supported_MaxL; l++){
2291 	      for (mul=0; mul<Spe_Num_CBasis[wan1][l]; mul++){
2292 		for (m=0; m<(2*l+1); m++){
2293 
2294                   countkj_e=0;
2295 
2296 		  for (j=1; j<=num0; j++){
2297 
2298 		    j1 = num0*(i-1) + j;
2299 
2300 		    if (0<i1 && j1<=2*n){
2301                       kj_v[countkj_e][Gc_AN-1][iorb]=Complex(H[i1][j1].r,H[i1][j1].i);
2302                       kj_v1[countkj_e++][Gc_AN-1][iorb]=Complex(H[i1+n][j1].r,H[i1+n][j1].i);
2303 		    }
2304 		  }
2305 
2306 		  i1++;
2307                   iorb++;
2308 		}
2309 	      }
2310 	    }
2311 	  }
2312 
2313           for (countkj_e=0; countkj_e<2; countkj_e++) if (iskj_e[countkj_e]==1) {
2314 	    for (k=0; k<atomnum; k++) for (j=0; j<Norbperatom[k]; j++) weight[k][j]=Complex(0.,0.);
2315 	    for (k=0; k<atomnum; k++) for (j=0; j<Norbperatom[k]; j++) weight1[k][j]=Complex(0.,0.);
2316 
2317 	    for (j=0; j<atomnum; j++) for (k=0; k<atomnum; k++) for (l=0; l<Norbperatom[j]; l++)
2318 	      for (m=0; m<Norbperatom[k]; m++) tmpelem[j][k][l][m]=Cmul(Conjg(kj_v[countkj_e][j][l]),kj_v[countkj_e][k][m]);
2319 	    for (j=0; j<atomnum; j++) for (k=0; k<atomnum; k++) for (l=0; l<Norbperatom[j]; l++)
2320 	      for (m=0; m<Norbperatom[k]; m++) tmpelem1[j][k][l][m]=Cmul(Conjg(kj_v1[countkj_e][j][l]),kj_v1[countkj_e][k][m]);
2321 
2322 	    int NA,ir,MA,MO,NO;
2323 	    dcomplex dtmp,dtmp1;
2324 
2325 	    for (NA=0; NA<atomnum; NA++) {
2326 
2327 	      int n=unfold_mapN2n[NA];
2328               int r0x=tabr4RN[0][NA][0];
2329               int r0y=tabr4RN[0][NA][1];
2330               int r0z=tabr4RN[0][NA][2];
2331 
2332 	      r0[0]=r0x*a[0]+r0y*b[0]+r0z*c[0];
2333 	      r0[1]=r0x*a[1]+r0y*b[1]+r0z*c[1];
2334 	      r0[2]=r0x*a[2]+r0y*b[2]+r0z*c[2];
2335 
2336 	      dcomplex phase1=Cexp(Complex(0.,-dot(K,r0)));
2337 
2338 	      for (ir=0; ir<nr; ir++) {
2339 
2340 		if (rnmap[ir][n][1]==-1) continue;
2341 
2342 		r[0]=rlist[ir][0]*a[0]+rlist[ir][1]*b[0]+rlist[ir][2]*c[0];
2343 		r[1]=rlist[ir][0]*a[1]+rlist[ir][1]*b[1]+rlist[ir][2]*c[1];
2344 		r[2]=rlist[ir][0]*a[2]+rlist[ir][1]*b[2]+rlist[ir][2]*c[2];
2345 
2346 		dcomplex phase2=Cmul(phase1,Cexp(Complex(0.,dot(K,r))));
2347 
2348 		for (MA=0; MA<atomnum; MA++) {
2349 
2350 		  if (Elem[rnmap[ir][n][0]][MA][rnmap[ir][n][1]][0][0]<-99999.) continue;
2351 
2352 		  for (MO=0; MO<Norbperatom[MA]; MO++) for (NO=0; NO<Norbperatom[NA]; NO++) {
2353 		    dtmp=RCmul(Elem[rnmap[ir][n][0]][MA][rnmap[ir][n][1]][MO][NO],tmpelem[MA][NA][MO][NO]);
2354 		    dtmp1=RCmul(Elem[rnmap[ir][n][0]][MA][rnmap[ir][n][1]][MO][NO],tmpelem1[MA][NA][MO][NO]);
2355 		    weight[NA][NO]=Cadd(weight[NA][NO],Cmul(phase2,dtmp));
2356 		    weight1[NA][NO]=Cadd(weight1[NA][NO],Cmul(phase2,dtmp1));
2357 		  }}}}
2358 
2359 	    double sumallorb=0.;
2360 	    for (j=0; j<atomnum; j++) for (k=0; k<Norbperatom[j]; k++) sumallorb+=weight[j][k].r;
2361 	    for (j=0; j<atomnum; j++) for (k=0; k<Norbperatom[j]; k++) sumallorb+=weight1[j][k].r;
2362 	    fprintf(fp_EV,"%f %f %10.7f\n",kdis,kj_e[countkj_e]*eV2Hartree,fabs(sumallorb)/coe);
2363 
2364 	    /* set negative weight to zero for plotting purpose */
2365 	    for (j=0; j<atomnum; j++){
2366               for (k=0; k<Norbperatom[j]; k++){
2367 
2368   	        if ( (weight[j][k].r+weight1[j][k].r)<0.0) {
2369                    weight[j][k].r  = 0.0;
2370                    weight1[j][k].r = 0.0;
2371                 }
2372 	      }
2373 	    }
2374 
2375 	    fprintf(fp_EV1,"%f %f ",kdis,kj_e[countkj_e]*eV2Hartree);
2376 	    for (j=0; j<atomnum; j++) {
2377 	      for (k=0; k<Norbperatom[j]; k++) fprintf(fp_EV1,"%e ",(weight[j][k].r+weight1[j][k].r)/coe);
2378 	    }
2379 	    fprintf(fp_EV1,"\n");
2380           }
2381         }
2382       } /* if (myid==Host_ID) */
2383 
2384       MPI_Barrier(mpi_comm_level1);
2385 
2386       pk1=k1;
2387       pk2=k2;
2388       pk3=k3;
2389 
2390     }  /* kloopj */
2391 
2392   if (myid==Host_ID){
2393     if (fp_EV0 != NULL) fclose(fp_EV0);
2394     if (fp_EV != NULL) fclose(fp_EV);
2395     if (fp_EV1 != NULL) fclose(fp_EV1);
2396   }
2397 
2398   /****************************************************
2399                        free arrays
2400   ****************************************************/
2401 
2402   free(K);
2403   free(K2);
2404   free(r);
2405   free(r0);
2406   free(kj_e);
2407   free(iskj_e);
2408   free(unfold_mapN2n);
2409   free(a);
2410   free(b);
2411   free(c);
2412   free(unfold_origin);
2413   free(np);
2414 
2415   for (i=0; i<3; i++) free(unfold_abc[i]); free(unfold_abc);
2416   for (i=0; i<2; i++) for (j=0; j<atomnum; j++) free(kj_v[i][j]);
2417   for (i=0; i<2; i++) free(kj_v[i]); free(kj_v);
2418   for (i=0; i<2; i++) for (j=0; j<atomnum; j++) free(kj_v1[i][j]);
2419   for (i=0; i<2; i++) free(kj_v1[i]); free(kj_v1);
2420   for (i=0; i<atomnum; i++) free(weight[i]); free(weight);
2421   for (i=0; i<atomnum; i++) free(weight1[i]); free(weight1);
2422   for (i=0; i<NR; i++) free(Rlist[i]); free(Rlist);
2423   for (i=0; i<nr; i++) free(rlist[i]); free(rlist);
2424   for (i=0; i<21; i++) for (j=0; j<21; j++) free(_MapR[i][j]);
2425   for (i=0; i<21; i++) free(_MapR[i]); free(_MapR);
2426   for (i=0; i<NR; i++) for (j=0; j<atomnum; j++) free(tabr4RN[i][j]);
2427   for (i=0; i<NR; i++) free(tabr4RN[i]); free(tabr4RN);
2428   for (i=0; i<nr; i++) for (j=0; j<natom; j++) free(rnmap[i][j]);
2429   for (i=0; i<nr; i++) free(rnmap[i]); free(rnmap);
2430   for (i=0; i<NR; i++) for (j=0; j<atomnum; j++) for (k=0; k<atomnum; k++) for (l=0; l<Norbperatom[j]; l++) free(Elem[i][j][k][l]);
2431   for (i=0; i<NR; i++) for (j=0; j<atomnum; j++) for (k=0; k<atomnum; k++) free(Elem[i][j][k]);
2432   for (i=0; i<NR; i++) for (j=0; j<atomnum; j++) free(Elem[i][j]);
2433   for (i=0; i<NR; i++) free(Elem[i]); free(Elem);
2434   for (i=0; i<atomnum; i++) for (j=0; j<atomnum; j++) for (k=0; k<Norbperatom[i]; k++)
2435     free(tmpelem[i][j][k]);
2436   for (i=0; i<atomnum; i++) for (j=0; j<atomnum; j++) free(tmpelem[i][j]);
2437   for (i=0; i<atomnum; i++) free(tmpelem[i]); free(tmpelem);
2438   for (i=0; i<atomnum; i++) for (j=0; j<atomnum; j++) for (k=0; k<Norbperatom[i]; k++)
2439     free(tmpelem1[i][j][k]);
2440   for (i=0; i<atomnum; i++) for (j=0; j<atomnum; j++) free(tmpelem1[i][j]);
2441   for (i=0; i<atomnum; i++) free(tmpelem1[i]); free(tmpelem1);
2442   for (i=0; i<3; i++) free(fracabc[i]); free(fracabc);
2443   free(Norbperatom);
2444   for (i=0; i<unfold_Nkpoint+1; i++) free(unfold_kpoint[i]); free(unfold_kpoint);
2445   for (i=0; i<(unfold_Nkpoint+1); i++){
2446     free(unfold_kpoint_name[i]);
2447   }
2448   free(unfold_kpoint_name);
2449 
2450   free(stat_send);
2451   free(request_send);
2452   free(request_recv);
2453 
2454   free(is1);
2455   free(ie1);
2456   free(is2);
2457   free(ie2);
2458   free(is12);
2459   free(ie12);
2460 
2461   free(MP);
2462   free(order_GA);
2463 
2464   free(My_NZeros);
2465   free(SP_NZeros);
2466   free(SP_Atoms);
2467 
2468   free(ko);
2469   free(koS);
2470 
2471   free(S1);
2472   free(RH0);
2473   free(RH1);
2474   free(RH2);
2475   free(RH3);
2476   free(IH0);
2477   free(IH1);
2478   free(IH2);
2479 
2480   free(EIGEN);
2481 
2482   for (j=0; j<n2; j++){
2483     free(H[j]);
2484   }
2485   free(H);
2486 
2487   for (i=0; i<n2; i++){
2488     free(S[i]);
2489   }
2490   free(S);
2491 
2492   free(M1);
2493 
2494   for (j=0; j<n2; j++){
2495     free(C[j]);
2496   }
2497   free(C);
2498 
2499   for (j=0; j<n2; j++){
2500     free(Ctmp[j]);
2501   }
2502   free(Ctmp);
2503 
2504   dtime(&TEtime);
2505 
2506 }
2507 
2508 
volume(const double * a,const double * b,const double * c)2509 static double volume(const double* a,const double* b,const double* c) {
2510   return fabs(a[0]*b[1]*c[2]+b[0]*c[1]*a[2]+c[0]*a[1]*b[2]-c[0]*b[1]*a[2]-a[1]*b[0]*c[2]-a[0]*c[1]*b[2]);}
2511 
chkr(int a,int b,int c)2512 int chkr(int a, int b, int c) {int i; for (i=0; i<nr; i++) if ((rlist[i][0]==a)&&(rlist[i][1]==b)&&(rlist[i][2]==c)) return i;
2513   return -99999;}
2514 
addr(int a,int b,int c)2515 int addr(int a, int b, int c) { int i, j, ck; ck=chkr(a,b,c);
2516   if (ck!=-99999) return ck;
2517   else {
2518     int** tmprlist;
2519     tmprlist = (int**)malloc(sizeof(int*)*nr);
2520     for (i=0; i<nr; i++) {
2521       tmprlist[i]=(int*)malloc(sizeof(int)*3);
2522       for (j=0; j<3; j++) tmprlist[i][j]=rlist[i][j];
2523     }
2524     for (i=0; i<nr; i++) free(rlist[i]); free(rlist);
2525     rlist=(int**)malloc(sizeof(int*)*++nr);
2526     for (i=0; i<nr; i++) rlist[i]=(int*)malloc(sizeof(int)*3);
2527     for (i=0; i<nr-1; i++) for (j=0; j<3; j++)
2528       rlist[i][j]=tmprlist[i][j];
2529     rlist[nr-1][0]=a; rlist[nr-1][1]=b; rlist[nr-1][2]=c;
2530     for (i=0; i<nr-1; i++) free(tmprlist[i]);
2531     free(tmprlist);
2532     return nr-1;
2533   }}
2534 
buildrnmap(const int * mapN2n)2535 void buildrnmap(const int* mapN2n) {
2536   int i, j, k;
2537   natom=-999; for (i=0; i<atomnum; i++) if (mapN2n[i]>natom) natom=mapN2n[i];
2538   natom++;
2539   rnmap=(int***)malloc(sizeof(int**)*nr);
2540   for (i=0; i<nr; i++) {
2541     rnmap[i]=(int**)malloc(sizeof(int*)*natom);
2542     for (j=0; j<natom; j++) {
2543       rnmap[i][j]=(int*)malloc(sizeof(int)*2);
2544       rnmap[i][j][1]=-1;
2545     }
2546   }
2547   int iR, iN;
2548   for (iR=0; iR<NR; iR++) for (iN=0; iN<atomnum; iN++) {
2549     int tmpi; tmpi=addr(tabr4RN[iR][iN][0],tabr4RN[iR][iN][1],tabr4RN[iR][iN][2]);
2550     rnmap[tmpi][mapN2n[iN]][0]=iR; rnmap[tmpi][mapN2n[iN]][1]=iN;
2551   }
2552 }
2553 
dis(const double * a)2554 static double dis(const double* a) {return sqrt(a[2]*a[2]+a[1]*a[1]+a[0]*a[0]);}
2555 
distwovec(const double * a,const double * b)2556 static double distwovec(const double* a, const double* b) {return sqrt((a[2]-b[2])*(a[2]-b[2])+(a[1]-b[1])*(a[1]-b[1])+(a[0]-b[0])*(a[0]-b[0]));}
2557 
det(const double * a,const double * b,const double * c)2558 static double det(const double* a,const double* b,const double* c) {
2559   return a[0]*b[1]*c[2]+b[0]*c[1]*a[2]+c[0]*a[1]*b[2]-c[0]*b[1]*a[2]-a[1]*b[0]*c[2]-a[0]*c[1]*b[2];}
2560 
2561 /* abc = S ABC */
abc_by_ABC(double ** S)2562 void abc_by_ABC(double ** S) {
2563   double detABC=tv[1][1]*tv[2][2]*tv[3][3]+tv[2][1]*tv[3][2]*tv[1][3]+tv[3][1]*tv[1][2]*tv[2][3]-tv[3][1]*tv[2][2]*tv[1][3]-tv[1][2]*tv[2][1]*tv[3][3]-tv[1][1]*tv[3][2]*tv[2][3];
2564   int i,j,k;
2565   double** inv = (double**)malloc(sizeof(double*)*3);
2566   for (i=0; i<3; i++) inv[i]=(double*)malloc(sizeof(double)*3);
2567   inv[0][0]=(tv[2][2]*tv[3][3]-tv[3][2]*tv[2][3])/detABC;
2568   inv[0][1]=(tv[1][3]*tv[3][2]-tv[3][3]*tv[1][2])/detABC;
2569   inv[0][2]=(tv[1][2]*tv[2][3]-tv[2][2]*tv[1][3])/detABC;
2570   inv[1][0]=(tv[2][3]*tv[3][1]-tv[3][3]*tv[2][1])/detABC;
2571   inv[1][1]=(tv[1][1]*tv[3][3]-tv[3][1]*tv[1][3])/detABC;
2572   inv[1][2]=(tv[1][3]*tv[2][1]-tv[2][3]*tv[1][1])/detABC;
2573   inv[2][0]=(tv[2][1]*tv[3][2]-tv[3][1]*tv[2][2])/detABC;
2574   inv[2][1]=(tv[1][2]*tv[3][1]-tv[3][2]*tv[1][1])/detABC;
2575   inv[2][2]=(tv[1][1]*tv[2][2]-tv[2][1]*tv[1][2])/detABC;
2576   for (i=0; i<3; i++) for (j=0; j<3; j++) S[i][j]=0.;
2577   for (i=0; i<3; i++) for (j=0; j<3; j++) for (k=0; k<3; k++) S[i][j]+=unfold_abc[i][k]*inv[k][j];
2578   for (i=0; i<3; i++) free(inv[i]); free(inv);
2579 }
2580 
2581 /* det(v-a-b-c,b,c) */
det1(const double * a,const double * b,const double * c,const double * v)2582 static double det1(const double* a,const double* b,const double* c,const double* v) {
2583   return (v[0]-a[0]-b[0]-c[0])*b[1]*c[2]+b[0]*c[1]*(v[2]-a[2]-b[2]-c[2])+c[0]*(v[1]-a[1]-b[1]-c[1])*b[2]-c[0]*b[1]*(v[2]-a[2]-b[2]-c[2])-(v[1]-a[1]-b[1]-c[1])*b[0]*c[2]-(v[0]-a[0]-b[0]-c[0])*c[1]*b[2];}
2584 
insidecube(const double * va,const double * vb,const double * vc,const double * vatom)2585 int insidecube(const double* va,const double* vb,const double* vc,const double* vatom) {
2586   double chk0=det(vatom,va,vb);
2587   double chk1=det(vatom,vb,vc);
2588   double chk2=det(vatom,vc,va);
2589   double chk3=det1(vc,va,vb,vatom);
2590   double chk4=det1(va,vb,vc,vatom);
2591   double chk5=det1(vb,vc,va,vatom);
2592   if (fabs(chk5)<0.0000001) return 0;
2593   if (fabs(chk4)<0.0000001) return 0;
2594   if (fabs(chk3)<0.0000001) return 0;
2595   if (fabs(chk2)<0.0000001) chk2=0.;
2596   if (fabs(chk1)<0.0000001) chk1=0.;
2597   if (fabs(chk0)<0.0000001) chk0=0.;
2598   if (chk0*chk3>0.0000001) return 0;
2599   if (chk1*chk4>0.0000001) return 0;
2600   if (chk2*chk5>0.0000001) return 0;
2601   return 1;
2602 }
2603 
dot(const double * v1,const double * v2)2604 static double dot(const double* v1,const double* v2) {
2605   double dotsum=0.;
2606   int i;
2607   for (i=0; i<3; i++) dotsum+=v1[i]*v2[i];
2608   return dotsum;
2609 }
2610 
getnorbperatom()2611 void getnorbperatom() {
2612   Norbperatom = (int*)malloc(sizeof(int)*atomnum);
2613   int ct_AN, wan1, TNO1;
2614   for (ct_AN=1; ct_AN<=atomnum; ct_AN++){
2615     wan1 = WhatSpecies[ct_AN];
2616     TNO1 = Spe_Total_CNO[wan1];
2617     Norbperatom[ct_AN-1] = TNO1;
2618   }
2619 
2620   Norb=0;
2621   int* Ibegin;
2622   Ibegin = (int*)malloc(sizeof(int)*atomnum);
2623   int i;
2624   for (i=0; i<atomnum; i++) {Ibegin[i]=Norb; Norb+=Norbperatom[i];}
2625   free(Ibegin);
2626 }
2627 
MapR(const int i,const int j,const int k)2628 static int MapR(const int i,const int j,const int k) { return _MapR[i+10][j+10][k+10];}
2629 
AddR(const int a,const int b,const int c)2630 int AddR(const int a,const int b,const int c) { if (MapR(a,b,c)==-1) {
2631     int i, j;
2632     int** tmpRlist;
2633     tmpRlist=(int**)malloc(sizeof(int*)*NR);
2634     for (i=0; i<NR; i++) {
2635       tmpRlist[i]=(int*)malloc(sizeof(int)*3);
2636       for (j=0; j<3; j++) {
2637 	tmpRlist[i][j]=Rlist[i][j];
2638       }
2639     }
2640 
2641     for (i=0; i<NR; i++) free(Rlist[i]);
2642     free(Rlist);
2643     Rlist=(int**)malloc(sizeof(int*)*++NR);
2644     for (i=0; i<NR; i++) {
2645       Rlist[i]=(int*)malloc(sizeof(int)*3);
2646     }
2647 
2648     Rlist[NR-1][0]=a; Rlist[NR-1][1]=b; Rlist[NR-1][2]=c;
2649     for (i=0; i<NR-1; i++) for (j=0; j<3; j++)
2650       Rlist[i][j]=tmpRlist[i][j];
2651     if ((a>10)||(a<-10)||(b>10)||(b<-10)||(c>10)||(c<-10)) {
2652       free(tmpRlist);
2653       for (i=0; i<NR-1; i++) free(tmpRlist[i]);
2654       free(tmpRlist);
2655       printf("R in overlap matrix is larger than expected\n");
2656       exitcode=1;
2657       return -9999;
2658     }
2659     _MapR[a+10][b+10][c+10]=NR-1;
2660 
2661     for (i=0; i<NR-1; i++) free(tmpRlist[i]);
2662     free(tmpRlist);
2663 
2664     return MapR(a,b,c);
2665   } else return MapR(a,b,c);
2666 }
2667 
buildMapRlist()2668 void buildMapRlist() {
2669   int i, j, k, l, m;
2670   _MapR=(int***)malloc(sizeof(int**)*21);
2671   for (i=0; i<21; i++) {
2672     _MapR[i]=(int**)malloc(sizeof(int*)*21);
2673     for (j=0; j<21; j++) {
2674       _MapR[i][j]=(int*)malloc(sizeof(int)*21);
2675       for (k=0; k<21; k++) _MapR[i][j][k]=-1;
2676     }
2677   }
2678   _MapR[10][10][10]=0;
2679 
2680   NR=1;
2681   Rlist=(int**)malloc(sizeof(int*)*NR);
2682   for (i=0; i<NR; i++) {
2683     Rlist[i]=(int*)malloc(sizeof(int)*3);
2684     for (j=0; j<3; j++) {
2685       Rlist[i][j]=0;
2686     }
2687   }
2688 
2689   int Gc_AN,Mc_AN,Gh_AN,h_AN;
2690   int num,wan1,wan2,TNO1,TNO2,Rn;
2691   int numprocs,myid,ID,tag=999;
2692   double *Tmp_Vec;
2693   Tmp_Vec = (double*)malloc(sizeof(double)*List_YOUSO[8]*List_YOUSO[7]*List_YOUSO[7]);
2694 
2695   for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){
2696     for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2697       Gh_AN = natn[Gc_AN][h_AN];
2698       Rn = ncn[Gc_AN][h_AN];
2699       AddR(atv_ijk[Rn][1], atv_ijk[Rn][2], atv_ijk[Rn][3]);
2700       if (exitcode==1) {
2701 	for (i=0; i<21; i++) for (j=0; j<21; j++) free(_MapR[i][j]);
2702 	for (i=0; i<21; i++) free(_MapR[i]); free(_MapR);
2703 	for (i=0; i<NR; i++) free(Rlist[i]); free(Rlist);
2704 	free(Tmp_Vec);
2705 	return;
2706       }
2707     }
2708   }
2709 
2710   Elem=(double*****)malloc(sizeof(double****)*NR);
2711   for (i=0; i<NR; i++) {
2712     Elem[i]=(double****)malloc(sizeof(double***)*atomnum);
2713     for (j=0; j<atomnum; j++) {
2714       Elem[i][j]=(double***)malloc(sizeof(double**)*atomnum);
2715       for (k=0; k<atomnum; k++) {
2716 	Elem[i][j][k]=(double**)malloc(sizeof(double*)*Norbperatom[j]);
2717 	for (l=0; l<Norbperatom[j]; l++) {
2718 	  Elem[i][j][k][l]=(double*)malloc(sizeof(double)*Norbperatom[k]);
2719 	  for (m=0; m<Norbperatom[k]; m++) {
2720 	    Elem[i][j][k][l][m]=-9999999.88888;
2721 	  }}}}}
2722 
2723   MPI_Status stat;
2724   MPI_Request request;
2725 
2726   /* MPI */
2727   MPI_Comm_size(mpi_comm_level1,&numprocs);
2728   MPI_Comm_rank(mpi_comm_level1,&myid);
2729 
2730   for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){
2731     ID = G2ID[Gc_AN];
2732 
2733     if (myid==ID){
2734 
2735       num = 0;
2736 
2737       Mc_AN = F_G2M[Gc_AN];
2738       wan1 = WhatSpecies[Gc_AN];
2739       TNO1 = Spe_Total_CNO[wan1];
2740       for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2741         Rn = ncn[Gc_AN][h_AN];
2742         Gh_AN = natn[Gc_AN][h_AN];
2743         wan2 = WhatSpecies[Gh_AN];
2744         TNO2 = Spe_Total_CNO[wan2];
2745 
2746         if (Cnt_switch==0){
2747           for (i=0; i<TNO1; i++){
2748             for (j=0; j<TNO2; j++){
2749               Tmp_Vec[num] = OLP[0][Mc_AN][h_AN][i][j];
2750               num++;
2751             }
2752           }
2753         }
2754         else{
2755           for (i=0; i<TNO1; i++){
2756             for (j=0; j<TNO2; j++){
2757               Tmp_Vec[num] = CntOLP[0][Mc_AN][h_AN][i][j];
2758               num++;
2759             }
2760           }
2761         }
2762       }
2763 
2764       if (myid!=Host_ID){
2765         MPI_Isend(&num, 1, MPI_INT, Host_ID, tag, mpi_comm_level1, &request);
2766         MPI_Wait(&request,&stat);
2767         MPI_Isend(&Tmp_Vec[0], num, MPI_DOUBLE, Host_ID, tag, mpi_comm_level1, &request);
2768         MPI_Wait(&request,&stat);
2769       }
2770       else{
2771 	/*        fwrite(Tmp_Vec, sizeof(double), num, fp); */
2772       }
2773     }
2774 
2775     else if (ID!=myid && myid==Host_ID){
2776       MPI_Recv(&num, 1, MPI_INT, ID, tag, mpi_comm_level1, &stat);
2777       MPI_Recv(&Tmp_Vec[0], num, MPI_DOUBLE, ID, tag, mpi_comm_level1, &stat);
2778       /*      fwrite(Tmp_Vec, sizeof(double), num, fp);*/
2779     }
2780 
2781     num = 0;
2782     wan1 = WhatSpecies[Gc_AN];
2783     TNO1 = Spe_Total_CNO[wan1];
2784     for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
2785       Rn = ncn[Gc_AN][h_AN];
2786       Gh_AN = natn[Gc_AN][h_AN];
2787       wan2 = WhatSpecies[Gh_AN];
2788       TNO2 = Spe_Total_CNO[wan2];
2789       for (i=0; i<TNO1; i++){
2790 	for (j=0; j<TNO2; j++){
2791 	  Elem[MapR(atv_ijk[Rn][1],atv_ijk[Rn][2],atv_ijk[Rn][3])][Gc_AN-1][Gh_AN-1][i][j]=Tmp_Vec[num];
2792 	  num++;
2793 	}
2794       }
2795     }
2796   }
2797 
2798   free(Tmp_Vec);
2799 }
2800 
2801 /* assign each R N with r */
buildtabr4RN(const double * a,const double * b,const double * c,double * origin,const int * mapN2n)2802 void buildtabr4RN(const double* a,const double* b,const double*c,double* origin,const int* mapN2n) {
2803   double* A;
2804   double* B;
2805   double* C;
2806   double** Atoms;
2807   double*tmp;
2808   A = (double*)malloc(sizeof(double)*3);
2809   B = (double*)malloc(sizeof(double)*3);
2810   C = (double*)malloc(sizeof(double)*3);
2811   A[0]=tv[1][1];
2812   A[1]=tv[1][2];
2813   A[2]=tv[1][3];
2814   B[0]=tv[2][1];
2815   B[1]=tv[2][2];
2816   B[2]=tv[2][3];
2817   C[0]=tv[3][1];
2818   C[1]=tv[3][2];
2819   C[2]=tv[3][3];
2820   int i, j, k;
2821   int iR, iN;
2822   int esti=0,estj=0,estk=0;
2823   Atoms = (double**)malloc(sizeof(double*)*atomnum);
2824   for (i=0; i<atomnum; i++) {
2825     Atoms[i] = (double*)malloc(sizeof(double)*3);
2826     for (j=0; j<3; j++) Atoms[i][j]=Gxyz[i+1][j+1];
2827   }
2828 
2829   double X,Y,Z;
2830   tmp = (double*)malloc(sizeof(double)*3);
2831   tabr4RN=(int***)malloc(sizeof(int**)*NR);
2832   for (i=0; i<NR; i++) {
2833     tabr4RN[i]=(int**)malloc(sizeof(int*)*atomnum);
2834     for (j=0; j<atomnum; j++) {
2835       tabr4RN[i][j]=(int*)malloc(sizeof(int)*3);
2836       tabr4RN[i][j][0]=-99999;
2837     }
2838   }
2839 
2840   /*  suggesting the origin of the reference cell */
2841   if ((fabs(origin[0]+0.9999900023114)<0.00000000001)&&
2842       (fabs(origin[1]+0.9999900047614)<0.00000000001)&&
2843       (fabs(origin[2]+0.9999900058914)<0.00000000001)) {
2844     double orgx,orgy,orgz;
2845     double reforgx,reforgy,reforgz;
2846     reforgx=Atoms[0][0]-a[0]-b[0]-c[0]+0.124966998688568*(a[0]+b[0]+c[0]);
2847     reforgy=Atoms[0][1]-a[1]-b[1]-c[1]+0.124966997688568*(a[1]+b[1]+c[1]);
2848     reforgz=Atoms[0][2]-a[2]-b[2]-c[2]+0.124966996688568*(a[2]+b[2]+c[2]);
2849     int*** nrtable;
2850     nrtable=(int***)malloc(sizeof(int**)*8);
2851     for (i=0; i<8; i++) {
2852       nrtable[i]=(int**)malloc(sizeof(int*)*8);
2853       for (j=0; j<8; j++) {
2854 	nrtable[i][j]=(int*)malloc(sizeof(int)*8);
2855       }}
2856 
2857     int tmpi,tmpj,tmpk;
2858     for (tmpi=0; tmpi<8; tmpi++) for (tmpj=0; tmpj<8; tmpj++) for (tmpk=0; tmpk<8; tmpk++) {
2859       nr=1;
2860       rlist=(int**)malloc(sizeof(int*)*1);
2861       for (i=0; i<1; i++) {
2862 	rlist[i]=(int*)malloc(sizeof(int)*3);
2863 	for (j=0; j<3; j++) {
2864 	  rlist[i][j]=0;
2865 	}
2866       }
2867       for (j=0; j<atomnum; j++) tabr4RN[0][j][0]=-99999;
2868       orgx=reforgx+(double)tmpi*a[0]/8.+(double)tmpj*b[0]/8.+(double)tmpk*c[0]/8.;
2869       orgy=reforgy+(double)tmpi*a[1]/8.+(double)tmpj*b[1]/8.+(double)tmpk*c[1]/8.;
2870       orgz=reforgz+(double)tmpi*a[2]/8.+(double)tmpj*b[2]/8.+(double)tmpk*c[2]/8.;
2871 
2872       for (iN=0; iN<atomnum; iN++) {
2873         double estl=9999.;
2874 	double X=(double)Rlist[0][0]*A[0]+(double)Rlist[0][1]*B[0]+(double)Rlist[0][2]*C[0]+Atoms[iN][0];
2875 	double Y=(double)Rlist[0][0]*A[1]+(double)Rlist[0][1]*B[1]+(double)Rlist[0][2]*C[1]+Atoms[iN][1];
2876 	double Z=(double)Rlist[0][0]*A[2]+(double)Rlist[0][1]*B[2]+(double)Rlist[0][2]*C[2]+Atoms[iN][2];
2877 	for (i=-2*atomnum; i<=2*atomnum; i+=5) for (j=-2*atomnum; j<=2*atomnum; j+=5) for (k=-2*atomnum; k<=2*atomnum; k+=5) {
2878 	  double x=(double)i*a[0]+(double)j*b[0]+(double)k*c[0]+orgx;
2879 	  double y=(double)i*a[1]+(double)j*b[1]+(double)k*c[1]+orgy;
2880 	  double z=(double)i*a[2]+(double)j*b[2]+(double)k*c[2]+orgz;
2881 	  tmp[0]=X-x;
2882 	  tmp[1]=Y-y;
2883 	  tmp[2]=Z-z;
2884 	  if (dis(tmp)<estl) { estl=dis(tmp); esti=i; estj=j; estk=k; }
2885 	}
2886 	int leave=0;
2887 	for (i=esti-10; i<=esti+10; i++) {
2888 	  if (leave==1) break;
2889 	  for (j=estj-10; j<=estj+10; j++) {
2890             if (leave==1) break;
2891 	    for (k=estk-10; k<=estk+10; k++) {
2892               double x=(double)i*a[0]+(double)j*b[0]+(double)k*c[0]+orgx;
2893               double y=(double)i*a[1]+(double)j*b[1]+(double)k*c[1]+orgy;
2894               double z=(double)i*a[2]+(double)j*b[2]+(double)k*c[2]+orgz;
2895               tmp[0]=X-x;
2896               tmp[1]=Y-y;
2897               tmp[2]=Z-z;
2898 	      if (insidecube(a,b,c,tmp)) {
2899 		tabr4RN[0][iN][0]=i;
2900 		tabr4RN[0][iN][1]=j;
2901 		tabr4RN[0][iN][2]=k;
2902 		addr(i,j,k); leave=1; break;
2903 	      }
2904             }}}
2905       }
2906       nrtable[tmpi][tmpj][tmpk]=nr;
2907       for (iN=0; iN<atomnum; iN++) {
2908         int chka=tabr4RN[0][iN][0];
2909         int chkb=tabr4RN[0][iN][1];
2910         int chkc=tabr4RN[0][iN][2];
2911         int chkn=mapN2n[iN];
2912         int iNp;
2913         for (iNp=iN+1; iNp<atomnum; iNp++)
2914           if (((chka==tabr4RN[0][iNp][0])&&
2915                (chkb==tabr4RN[0][iNp][1])&&
2916                (chkc==tabr4RN[0][iNp][2])&&
2917                (chkn==mapN2n[iNp]))) nrtable[tmpi][tmpj][tmpk]=-1;
2918       }
2919       for (i=0; i<nr; i++) free(rlist[i]); free(rlist);
2920     }
2921 
2922     int smallestnr;
2923     int leave=0;
2924     for (tmpi=0; tmpi<8; tmpi++) {
2925       if (leave==1) break;
2926       for (tmpj=0; tmpj<8; tmpj++) {
2927         if (leave==1) break;
2928 	for (tmpk=0; tmpk<8; tmpk++)
2929 	  if (nrtable[tmpi][tmpj][tmpk]>0) { smallestnr=nrtable[tmpi][tmpj][tmpk]; leave=1; break; }
2930       }}
2931 
2932     for (tmpi=0; tmpi<8; tmpi++) for (tmpj=0; tmpj<8; tmpj++) for (tmpk=0; tmpk<8; tmpk++)
2933       if ((nrtable[tmpi][tmpj][tmpk]>0)&&(nrtable[tmpi][tmpj][tmpk]<smallestnr)) smallestnr=nrtable[tmpi][tmpj][tmpk];
2934     for (tmpi=0; tmpi<8; tmpi++) for (tmpj=0; tmpj<8; tmpj++) for (tmpk=0; tmpk<8; tmpk++)
2935       if (nrtable[tmpi][tmpj][tmpk]==smallestnr) {
2936         origin[0]=reforgx+(double)tmpi*a[0]/8.+(double)tmpj*b[0]/8.+(double)tmpk*c[0]/8.;
2937         origin[1]=reforgy+(double)tmpi*a[1]/8.+(double)tmpj*b[1]/8.+(double)tmpk*c[1]/8.;
2938         origin[2]=reforgz+(double)tmpi*a[2]/8.+(double)tmpj*b[2]/8.+(double)tmpk*c[2]/8.;
2939       }
2940 
2941     for (i=0; i<8; i++) for (j=0; j<8; j++) free(nrtable[i][j]);
2942     for (i=0; i<8; i++) free(nrtable[i]);
2943     free(nrtable);
2944   }
2945   /*  finish suggesting the reference origin      */
2946 
2947   nr=1;
2948   rlist=(int**)malloc(sizeof(int*)*1);
2949   for (i=0; i<1; i++) {
2950     rlist[i]=(int*)malloc(sizeof(int)*3);
2951     for (j=0; j<3; j++) {
2952       rlist[i][j]=0;
2953     }
2954   }
2955 
2956   for (j=0; j<atomnum; j++) tabr4RN[0][j][0]=-99999;
2957 
2958   int shiftorigin=0;
2959   /* try to find r vector for RN */
2960   for (iR=0; iR<NR; iR++) { for (iN=0; iN<atomnum; iN++) {
2961       double estl=9999.;
2962       double X=(double)Rlist[iR][0]*A[0]+(double)Rlist[iR][1]*B[0]+(double)Rlist[iR][2]*C[0]+Atoms[iN][0];
2963       double Y=(double)Rlist[iR][0]*A[1]+(double)Rlist[iR][1]*B[1]+(double)Rlist[iR][2]*C[1]+Atoms[iN][1];
2964       double Z=(double)Rlist[iR][0]*A[2]+(double)Rlist[iR][1]*B[2]+(double)Rlist[iR][2]*C[2]+Atoms[iN][2];
2965       for (i=-2*atomnum; i<=2*atomnum; i+=5) for (j=-2*atomnum; j<=2*atomnum; j+=5) for (k=-2*atomnum; k<=2*atomnum; k+=5) {
2966 	double x=(double)i*a[0]+(double)j*b[0]+(double)k*c[0]+origin[0];
2967 	double y=(double)i*a[1]+(double)j*b[1]+(double)k*c[1]+origin[1];
2968 	double z=(double)i*a[2]+(double)j*b[2]+(double)k*c[2]+origin[2];
2969 	tmp[0]=X-x;
2970 	tmp[1]=Y-y;
2971 	tmp[2]=Z-z;
2972 	if (dis(tmp)<estl) { estl=dis(tmp); esti=i; estj=j; estk=k; }
2973       }
2974       int leave=0;
2975       for (i=esti-10; i<=esti+10; i++) {
2976         if (leave==1) break;
2977         for (j=estj-10; j<=estj+10; j++) {
2978 	  if (leave==1) break;
2979           for (k=estk-10; k<=estk+10; k++) {
2980 	    double x=(double)i*a[0]+(double)j*b[0]+(double)k*c[0]+origin[0];
2981 	    double y=(double)i*a[1]+(double)j*b[1]+(double)k*c[1]+origin[1];
2982 	    double z=(double)i*a[2]+(double)j*b[2]+(double)k*c[2]+origin[2];
2983 	    tmp[0]=X-x;
2984 	    tmp[1]=Y-y;
2985 	    tmp[2]=Z-z;
2986             if (insidecube(a,b,c,tmp)) {
2987               tabr4RN[iR][iN][0]=i;
2988               tabr4RN[iR][iN][1]=j;
2989               tabr4RN[iR][iN][2]=k;
2990               addr(i,j,k); leave=1; break;
2991             }
2992 	  }}}
2993       if (tabr4RN[iR][iN][0]==-99999) { shiftorigin=1; break; }
2994     }
2995     if (shiftorigin==1) break;
2996   }
2997 
2998   /*
2999     checking if two same kinds of normal cell atoms are assigned to the same r vector (could be due to locating around boundaries)
3000   */
3001 
3002   for (iR=0; iR<NR; iR++) { if (shiftorigin==1) break;
3003     for (iN=0; iN<atomnum; iN++) {
3004       int chka=tabr4RN[iR][iN][0];
3005       int chkb=tabr4RN[iR][iN][1];
3006       int chkc=tabr4RN[iR][iN][2];
3007       int chkn=mapN2n[iN];
3008       int iNp, iRp;
3009       for (iNp=iN+1; iNp<atomnum; iNp++)
3010         if ((shiftorigin==1)||((chka==tabr4RN[iR][iNp][0])&&
3011 			       (chkb==tabr4RN[iR][iNp][1])&&
3012 			       (chkc==tabr4RN[iR][iNp][2])&&
3013 			       (chkn==mapN2n[iNp]))) {shiftorigin=1; break;}
3014       for (iRp=iR+1; iRp<NR; iRp++) for (iNp=0; iNp<atomnum; iNp++)
3015         if ((chka==tabr4RN[iRp][iNp][0])&&
3016             (chkb==tabr4RN[iRp][iNp][1])&&
3017             (chkc==tabr4RN[iRp][iNp][2])&&
3018             (chkn==mapN2n[iNp])) {shiftorigin=1; break;}
3019     }}
3020 
3021   if (shiftorigin==0) buildrnmap(mapN2n);
3022 
3023   free(A);
3024   free(B);
3025   free(C);
3026   free(tmp);
3027   for (i=0; i<atomnum; i++) free(Atoms[i]);
3028   free(Atoms);
3029 
3030   if (shiftorigin==1) { printf("Cannot assign atoms in the reference cell properly! Could be due to more than one same atom in the reference cell!\nCheck the input file, maybe the structure is highly disordered or you need to set the reference origin by yourself!\n\n");
3031     exitcode=1;
3032     for (i=0; i<nr; i++) free(rlist[i]); free(rlist);
3033     for (i=0; i<NR; i++) for (j=0; j<atomnum; j++) free(tabr4RN[i][j]);
3034     for (i=0; i<NR; i++) free(tabr4RN[i]); free(tabr4RN);
3035     return;
3036   }
3037 }
3038 
determine_kpts(const int nk,double ** klist)3039 void determine_kpts(const int nk, double** klist) {
3040 
3041   int i,j;
3042   double** reciplatt;
3043   reciplatt = (double**)malloc(sizeof(double*)*3);
3044   for (i=0; i<3; i++) reciplatt[i] = (double*)malloc(sizeof(double)*3);
3045   double V=2.*PI/(unfold_abc[0][0]*unfold_abc[1][1]*unfold_abc[2][2]
3046 		  +unfold_abc[1][0]*unfold_abc[2][1]*unfold_abc[0][2]+unfold_abc[2][0]*unfold_abc[1][2]*unfold_abc[0][1]
3047 		  -unfold_abc[2][0]*unfold_abc[1][1]*unfold_abc[0][2]-unfold_abc[0][1]*unfold_abc[1][0]*unfold_abc[2][2]
3048 		  -unfold_abc[0][0]*unfold_abc[2][1]*unfold_abc[1][2]);
3049   reciplatt[0][0]=V*(unfold_abc[1][1]*unfold_abc[2][2]-unfold_abc[2][1]*unfold_abc[1][2]);
3050   reciplatt[0][1]=V*(unfold_abc[2][0]*unfold_abc[1][2]-unfold_abc[1][0]*unfold_abc[2][2]);
3051   reciplatt[0][2]=V*(unfold_abc[1][0]*unfold_abc[2][1]-unfold_abc[2][0]*unfold_abc[1][1]);
3052   reciplatt[1][0]=V*(unfold_abc[2][1]*unfold_abc[0][2]-unfold_abc[0][1]*unfold_abc[2][2]);
3053   reciplatt[1][1]=V*(unfold_abc[0][0]*unfold_abc[2][2]-unfold_abc[2][0]*unfold_abc[0][2]);
3054   reciplatt[1][2]=V*(unfold_abc[2][0]*unfold_abc[0][1]-unfold_abc[0][0]*unfold_abc[2][1]);
3055   reciplatt[2][0]=V*(unfold_abc[0][1]*unfold_abc[1][2]-unfold_abc[1][1]*unfold_abc[0][2]);
3056   reciplatt[2][1]=V*(unfold_abc[1][0]*unfold_abc[0][2]-unfold_abc[0][0]*unfold_abc[1][2]);
3057   reciplatt[2][2]=V*(unfold_abc[0][0]*unfold_abc[1][1]-unfold_abc[1][0]*unfold_abc[0][1]);
3058 
3059   double dis=0.;
3060   for (i=0; i<nk-1; i++)
3061     dis+=sqrt(pow((klist[i][1]-klist[i+1][1])*reciplatt[0][0]+(klist[i][2]-klist[i+1][2])*reciplatt[1][0]+(klist[i][3]-klist[i+1][3])*reciplatt[2][0],2)+
3062               pow((klist[i][1]-klist[i+1][1])*reciplatt[0][1]+(klist[i][2]-klist[i+1][2])*reciplatt[1][1]+(klist[i][3]-klist[i+1][3])*reciplatt[2][1],2)+
3063               pow((klist[i][1]-klist[i+1][1])*reciplatt[0][2]+(klist[i][2]-klist[i+1][2])*reciplatt[1][2]+(klist[i][3]-klist[i+1][3])*reciplatt[2][2],2));
3064 
3065   np = (int*)malloc(sizeof(int)*(nk));
3066 
3067   if (unfold_nkpts<=nk) {
3068     for (i=0; i<nk; i++) np[i]=1;
3069     totnkpts=nk;
3070   }
3071   else {
3072     double intvl=dis/(unfold_nkpts-1);
3073     for (i=0; i<nk-1; i++) {
3074       np[i]=
3075 	(int)(sqrt(pow((klist[i][1]-klist[i+1][1])*reciplatt[0][0]+(klist[i][2]-klist[i+1][2])*reciplatt[1][0]+(klist[i][3]-klist[i+1][3])*reciplatt[2][0],2)+
3076 		   pow((klist[i][1]-klist[i+1][1])*reciplatt[0][1]+(klist[i][2]-klist[i+1][2])*reciplatt[1][1]+(klist[i][3]-klist[i+1][3])*reciplatt[2][1],2)+
3077 		   pow((klist[i][1]-klist[i+1][1])*reciplatt[0][2]+(klist[i][2]-klist[i+1][2])*reciplatt[1][2]+(klist[i][3]-klist[i+1][3])*reciplatt[2][2],2))/
3078               intvl);
3079       if (np[i]==0) np[i]=1;
3080     }
3081     np[nk-1]=1;
3082     totnkpts=1;
3083     for (i=0; i<nk-1; i++) totnkpts+=np[i];
3084   }
3085 
3086   for (i=0; i<3; i++) free(reciplatt[i]);
3087   free(reciplatt);
3088 }
3089 
3090 
3091 
3092 
3093 
3094 
3095 
3096 
3097 
3098 
3099 
3100 
3101 
3102 
3103 
3104 
3105 
3106 
3107 
3108