1 /**********************************************************************
2   Set_Density_Grid.c:
3 
4      Set_Density_Grid.c is a subroutine to calculate a charge density
5      on grid by one-particle wave functions.
6 
7   Log of Set_Density_Grid.c:
8 
9      22/Nov/2001  Released by T.Ozaki
10      19/Apr/2013  Modified by A.M.Ito
11 
12 ***********************************************************************/
13 
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <time.h>
17 #include <math.h>
18 #include "openmx_common.h"
19 #include "mpi.h"
20 #include <omp.h>
21 
22 #define  measure_time   0
23 
24 
25 
Set_Density_Grid(int Cnt_kind,int Calc_CntOrbital_ON,double ***** CDM)26 double Set_Density_Grid(int Cnt_kind, int Calc_CntOrbital_ON, double *****CDM)
27 {
28   static int firsttime=1;
29   int al,L0,Mul0,M0,p,size1,size2;
30   int Gc_AN,Mc_AN,Mh_AN,LN,AN,BN,CN;
31   int n1,n2,n3,k1,k2,k3,N3[4];
32   int Cwan,NO0,NO1,Rn,N,Hwan,i,j,k,n;
33   int NN_S,NN_R;
34   unsigned long long int N2D,n2D,GN;
35   int Max_Size,My_Max;
36   int size_Tmp_Den_Grid;
37   int size_Den_Snd_Grid_A2B;
38   int size_Den_Rcv_Grid_A2B;
39   int h_AN,Gh_AN,Rnh,spin,Nc,GRc,Nh,Nog;
40   int Nc_0,Nc_1,Nc_2,Nc_3,Nh_0,Nh_1,Nh_2,Nh_3;
41 
42   double threshold;
43   double tmp0,tmp1,sk1,sk2,sk3,tot_den,sum;
44   double tmp0_0,tmp0_1,tmp0_2,tmp0_3;
45   double sum_0,sum_1,sum_2,sum_3;
46   double d1,d2,d3,cop,sip,sit,cot;
47   double x,y,z,Cxyz[4];
48   double TStime,TEtime;
49   double ***Tmp_Den_Grid;
50   double **Den_Snd_Grid_A2B;
51   double **Den_Rcv_Grid_A2B;
52   double *tmp_array;
53   double *tmp_array2;
54   double *orbs0,*orbs1;
55   double *orbs0_0,*orbs0_1,*orbs0_2,*orbs0_3;
56   double *orbs1_0,*orbs1_1,*orbs1_2,*orbs1_3;
57   double ***tmp_CDM;
58   int *Snd_Size,*Rcv_Size;
59   int numprocs,myid,tag=999,ID,IDS,IDR;
60   double Stime_atom, Etime_atom;
61   double time0,time1,time2;
62 
63   MPI_Status stat;
64   MPI_Request request;
65   MPI_Status *stat_send;
66   MPI_Status *stat_recv;
67   MPI_Request *request_send;
68   MPI_Request *request_recv;
69 
70   /* for OpenMP */
71   int OMPID,Nthrds;
72 
73   /* MPI */
74   MPI_Comm_size(mpi_comm_level1,&numprocs);
75   MPI_Comm_rank(mpi_comm_level1,&myid);
76 
77   dtime(&TStime);
78 
79   /* allocation of arrays */
80 
81   size_Tmp_Den_Grid = 0;
82   Tmp_Den_Grid = (double***)malloc(sizeof(double**)*(SpinP_switch+1));
83   for (i=0; i<(SpinP_switch+1); i++){
84     Tmp_Den_Grid[i] = (double**)malloc(sizeof(double*)*(Matomnum+1));
85     Tmp_Den_Grid[i][0] = (double*)malloc(sizeof(double)*1);
86     for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
87       Gc_AN = F_M2G[Mc_AN];
88       Tmp_Den_Grid[i][Mc_AN] = (double*)malloc(sizeof(double)*GridN_Atom[Gc_AN]);
89 
90       /* AITUNE */
91       for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
92 	Tmp_Den_Grid[i][Mc_AN][Nc] = 0.0;
93       }
94 
95       size_Tmp_Den_Grid += GridN_Atom[Gc_AN];
96     }
97   }
98 
99   size_Den_Snd_Grid_A2B = 0;
100   Den_Snd_Grid_A2B = (double**)malloc(sizeof(double*)*numprocs);
101   for (ID=0; ID<numprocs; ID++){
102     Den_Snd_Grid_A2B[ID] = (double*)malloc(sizeof(double)*Num_Snd_Grid_A2B[ID]*(SpinP_switch+1));
103     size_Den_Snd_Grid_A2B += Num_Snd_Grid_A2B[ID]*(SpinP_switch+1);
104   }
105 
106   size_Den_Rcv_Grid_A2B = 0;
107   Den_Rcv_Grid_A2B = (double**)malloc(sizeof(double*)*numprocs);
108   for (ID=0; ID<numprocs; ID++){
109     Den_Rcv_Grid_A2B[ID] = (double*)malloc(sizeof(double)*Num_Rcv_Grid_A2B[ID]*(SpinP_switch+1));
110     size_Den_Rcv_Grid_A2B += Num_Rcv_Grid_A2B[ID]*(SpinP_switch+1);
111   }
112 
113   /* PrintMemory */
114 
115   if (firsttime==1){
116     PrintMemory("Set_Density_Grid: AtomDen_Grid",    sizeof(double)*size_Tmp_Den_Grid, NULL);
117     PrintMemory("Set_Density_Grid: Den_Snd_Grid_A2B",sizeof(double)*size_Den_Snd_Grid_A2B, NULL);
118     PrintMemory("Set_Density_Grid: Den_Rcv_Grid_A2B",sizeof(double)*size_Den_Rcv_Grid_A2B, NULL);
119     firsttime = 0;
120   }
121 
122   /****************************************************
123                 when orbital optimization
124   ****************************************************/
125 
126   if (Calc_CntOrbital_ON==1 && Cnt_kind==0 && Cnt_switch==1){
127 
128     for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
129 
130       dtime(&Stime_atom);
131 
132       /* COrbs_Grid */
133 
134       Gc_AN = M2G[Mc_AN];
135       Cwan = WhatSpecies[Gc_AN];
136       NO0 = Spe_Total_CNO[Cwan];
137       for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
138 
139         al = -1;
140 	for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
141 	  for (Mul0=0; Mul0<Spe_Num_CBasis[Cwan][L0]; Mul0++){
142 	    for (M0=0; M0<=2*L0; M0++){
143 
144 	      al++;
145 	      tmp0 = 0.0;
146 
147 	      for (p=0; p<Spe_Specified_Num[Cwan][al]; p++){
148 	        j = Spe_Trans_Orbital[Cwan][al][p];
149 	        tmp0 += CntCoes[Mc_AN][al][p]*Orbs_Grid[Mc_AN][Nc][j];/* AITUNE */
150 	      }
151 
152 	      COrbs_Grid[Mc_AN][al][Nc] = (Type_Orbs_Grid)tmp0;
153 	    }
154 	  }
155         }
156       }
157 
158       dtime(&Etime_atom);
159       time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
160     }
161 
162     /**********************************************
163      MPI:
164 
165      COrbs_Grid
166     ***********************************************/
167 
168     /* allocation of arrays  */
169     Snd_Size = (int*)malloc(sizeof(int)*numprocs);
170     Rcv_Size = (int*)malloc(sizeof(int)*numprocs);
171 
172     /* find data size for sending and receiving */
173 
174     My_Max = -10000;
175     for (ID=0; ID<numprocs; ID++){
176 
177       IDS = (myid + ID) % numprocs;
178       IDR = (myid - ID + numprocs) % numprocs;
179 
180       if (ID!=0){
181         /*  sending size */
182         if (F_Snd_Num[IDS]!=0){
183           /* find data size */
184           size1 = 0;
185           for (n=0; n<F_Snd_Num[IDS]; n++){
186             Gc_AN = Snd_GAN[IDS][n];
187             Cwan = WhatSpecies[Gc_AN];
188             size1 += GridN_Atom[Gc_AN]*Spe_Total_CNO[Cwan];
189           }
190 
191           Snd_Size[IDS] = size1;
192           MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request);
193         }
194         else{
195           Snd_Size[IDS] = 0;
196         }
197 
198         /*  receiving size */
199         if (F_Rcv_Num[IDR]!=0){
200           MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat);
201           Rcv_Size[IDR] = size2;
202         }
203         else{
204           Rcv_Size[IDR] = 0;
205         }
206         if (F_Snd_Num[IDS]!=0) MPI_Wait(&request,&stat);
207       }
208       else{
209         Snd_Size[IDS] = 0;
210         Rcv_Size[IDR] = 0;
211       }
212 
213       if (My_Max<Snd_Size[IDS]) My_Max = Snd_Size[IDS];
214       if (My_Max<Rcv_Size[IDR]) My_Max = Rcv_Size[IDR];
215 
216     }
217 
218     MPI_Allreduce(&My_Max, &Max_Size, 1, MPI_INT, MPI_MAX, mpi_comm_level1);
219     /* allocation of arrays */
220     tmp_array  = (double*)malloc(sizeof(double)*Max_Size);
221     tmp_array2 = (double*)malloc(sizeof(double)*Max_Size);
222 
223     /* send and receive COrbs_Grid */
224 
225     for (ID=0; ID<numprocs; ID++){
226 
227       IDS = (myid + ID) % numprocs;
228       IDR = (myid - ID + numprocs) % numprocs;
229 
230       if (ID!=0){
231 
232         /* sending of data */
233 
234         if (F_Snd_Num[IDS]!=0){
235 
236           /* find data size */
237           size1 = Snd_Size[IDS];
238 
239           /* multidimentional array to vector array */
240           k = 0;
241           for (n=0; n<F_Snd_Num[IDS]; n++){
242             Mc_AN = Snd_MAN[IDS][n];
243             Gc_AN = Snd_GAN[IDS][n];
244             Cwan = WhatSpecies[Gc_AN];
245             NO0 = Spe_Total_CNO[Cwan];
246             for (i=0; i<NO0; i++){
247               for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
248                 tmp_array[k] = COrbs_Grid[Mc_AN][i][Nc];
249                 k++;
250               }
251             }
252           }
253 
254           /* MPI_Isend */
255           MPI_Isend(&tmp_array[0], size1, MPI_DOUBLE, IDS,
256                     tag, mpi_comm_level1, &request);
257         }
258 
259         /* receiving of block data */
260 
261         if (F_Rcv_Num[IDR]!=0){
262 
263           /* find data size */
264           size2 = Rcv_Size[IDR];
265 
266           /* MPI_Recv */
267           MPI_Recv(&tmp_array2[0], size2, MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat);
268 
269           k = 0;
270           Mc_AN = F_TopMAN[IDR] - 1;
271           for (n=0; n<F_Rcv_Num[IDR]; n++){
272             Mc_AN++;
273             Gc_AN = Rcv_GAN[IDR][n];
274             Cwan = WhatSpecies[Gc_AN];
275             NO0 = Spe_Total_CNO[Cwan];
276 
277             for (i=0; i<NO0; i++){
278               for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
279                 COrbs_Grid[Mc_AN][i][Nc] = tmp_array2[k];
280                 k++;
281               }
282             }
283           }
284         }
285         if (F_Snd_Num[IDS]!=0) MPI_Wait(&request,&stat);
286       }
287     }
288 
289     /* freeing of arrays  */
290     free(tmp_array);
291     free(tmp_array2);
292     free(Snd_Size);
293     free(Rcv_Size);
294   }
295 
296   /**********************************************
297               calculate Tmp_Den_Grid
298   ***********************************************/
299 
300   dtime(&time1);
301 
302 
303   /* AITUNE ========================== */
304   int OneD_Nloop = 0;
305   int ai_MaxNc = 0;
306   for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
307     int Gc_AN = M2G[Mc_AN];
308     for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
309       OneD_Nloop++;
310       if(ai_MaxNc < GridN_Atom[Gc_AN]) {ai_MaxNc = GridN_Atom[Gc_AN];}
311     }
312   }
313   /* ai_MaxNc is maximum of GridN_Atom[] */
314 
315   int gNthrds;
316 #pragma omp parallel
317   {
318     gNthrds = omp_get_num_threads();
319   }
320 
321   double*** ai_tmpDG_all = (double***)malloc(sizeof(double**)*gNthrds);
322 
323   /* ========================== AITUNE */
324 
325 #pragma omp parallel shared(myid,G2ID,Orbs_Grid_FNAN,List_YOUSO,time_per_atom,Tmp_Den_Grid,Orbs_Grid,COrbs_Grid,Cnt_switch,Cnt_kind,GListTAtoms2,GListTAtoms1,NumOLG,CDM,SpinP_switch,WhatSpecies,ncn,F_G2M,natn,Spe_Total_CNO,M2G) private(OMPID,Nthrds,Mc_AN,h_AN,Stime_atom,Etime_atom,Gc_AN,Cwan,NO0,Gh_AN,Mh_AN,Rnh,Hwan,NO1,spin,i,j,tmp_CDM,Nog,Nc_0,Nc_1,Nc_2,Nc_3,Nh_0,Nh_1,Nh_2,Nh_3,orbs0_0,orbs0_1,orbs0_2,orbs0_3,orbs1_0,orbs1_1,orbs1_2,orbs1_3,sum_0,sum_1,sum_2,sum_3,tmp0_0,tmp0_1,tmp0_2,tmp0_3,Nc,Nh,orbs0,orbs1,sum,tmp0)
326   {
327 
328     orbs0 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
329     orbs1 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
330 
331     orbs0_0 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
332     orbs0_1 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
333     orbs0_2 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
334     orbs0_3 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
335     orbs1_0 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
336     orbs1_1 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
337     orbs1_2 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
338     orbs1_3 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
339 
340     tmp_CDM = (double***)malloc(sizeof(double**)*(SpinP_switch+1));
341     for (i=0; i<(SpinP_switch+1); i++){
342       tmp_CDM[i] = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
343       for (j=0; j<List_YOUSO[7]; j++){
344 	tmp_CDM[i][j] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
345       }
346     }
347 
348     /* get info. on OpenMP */
349 
350     OMPID = omp_get_thread_num();
351     Nthrds = omp_get_num_threads();
352 
353 
354     /* AITUNE ========================== */
355 
356 
357     double *ai_tmpDGs[4];
358     {
359       int spin;
360       for (spin=0; spin<=SpinP_switch; spin++){
361 	ai_tmpDGs[spin] = (double*)malloc(sizeof(double)* ai_MaxNc);
362       }
363     }
364     ai_tmpDG_all[OMPID] = ai_tmpDGs;
365     /* ==================================== AITUNE */
366 
367 
368     /* for (Mc_AN=(OMPID+1); Mc_AN<=Matomnum; Mc_AN+=Nthrds){ AITUNE */
369     for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
370 
371       dtime(&Stime_atom);
372 
373       /* set data on Mc_AN */
374 
375       Gc_AN = M2G[Mc_AN];
376       Cwan = WhatSpecies[Gc_AN];
377       NO0 = Spe_Total_CNO[Cwan];
378 
379       int spin;
380       for (spin=0; spin<=SpinP_switch; spin++){
381 	int Nc;
382 	for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
383 	  ai_tmpDGs[spin][Nc] = 0.0;
384 	}
385       }
386 
387       for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
388 
389 	/* set data on h_AN */
390 
391 	Gh_AN = natn[Gc_AN][h_AN];
392 	Mh_AN = F_G2M[Gh_AN];
393 	Rnh = ncn[Gc_AN][h_AN];
394 	Hwan = WhatSpecies[Gh_AN];
395 	NO1 = Spe_Total_CNO[Hwan];
396 
397 	/* store CDM into tmp_CDM */
398 
399 	for (spin=0; spin<=SpinP_switch; spin++){
400 	  for (i=0; i<NO0; i++){
401 	    for (j=0; j<NO1; j++){
402 	      tmp_CDM[spin][i][j] = CDM[spin][Mc_AN][h_AN][i][j];
403 	    }
404 	  }
405 	}
406 
407 	/* summation of non-zero elements */
408 	/* for (Nog=0; Nog<NumOLG[Mc_AN][h_AN]; Nog++){ */
409 #pragma omp for
410 	for (Nog=0; Nog<NumOLG[Mc_AN][h_AN]-3; Nog+=4){
411 
412 	  Nc_0 = GListTAtoms1[Mc_AN][h_AN][Nog];
413 	  Nc_1 = GListTAtoms1[Mc_AN][h_AN][Nog+1];
414 	  Nc_2 = GListTAtoms1[Mc_AN][h_AN][Nog+2];
415 	  Nc_3 = GListTAtoms1[Mc_AN][h_AN][Nog+3];
416 
417 	  Nh_0 = GListTAtoms2[Mc_AN][h_AN][Nog];
418 	  Nh_1 = GListTAtoms2[Mc_AN][h_AN][Nog+1];
419 	  Nh_2 = GListTAtoms2[Mc_AN][h_AN][Nog+2];
420 	  Nh_3 = GListTAtoms2[Mc_AN][h_AN][Nog+3];
421 
422 	  /* Now under the orbital optimization */
423 	  if (Cnt_kind==0 && Cnt_switch==1){
424 	    for (i=0; i<NO0; i++){
425 	      orbs0_0[i] = COrbs_Grid[Mc_AN][i][Nc_0];
426 	      orbs0_1[i] = COrbs_Grid[Mc_AN][i][Nc_1];
427 	      orbs0_2[i] = COrbs_Grid[Mc_AN][i][Nc_2];
428 	      orbs0_3[i] = COrbs_Grid[Mc_AN][i][Nc_3];
429 	    }
430 	    for (j=0; j<NO1; j++){
431 	      orbs1_0[j] = COrbs_Grid[Mh_AN][j][Nh_0];
432 	      orbs1_1[j] = COrbs_Grid[Mh_AN][j][Nh_1];
433 	      orbs1_2[j] = COrbs_Grid[Mh_AN][j][Nh_2];
434 	      orbs1_3[j] = COrbs_Grid[Mh_AN][j][Nh_3];
435 	    }
436 	  }
437 	  /* else if ! "now under the orbital optimization" */
438 	  else{
439 	    for (i=0; i<NO0; i++){
440 	      orbs0_0[i] = Orbs_Grid[Mc_AN][Nc_0][i];
441 	      orbs0_1[i] = Orbs_Grid[Mc_AN][Nc_1][i];
442 	      orbs0_2[i] = Orbs_Grid[Mc_AN][Nc_2][i];
443 	      orbs0_3[i] = Orbs_Grid[Mc_AN][Nc_3][i];
444 	    }
445 
446             if (G2ID[Gh_AN]==myid){
447 	      for (j=0; j<NO1; j++){
448 		orbs1_0[j] = Orbs_Grid[Mh_AN][Nh_0][j];
449 		orbs1_1[j] = Orbs_Grid[Mh_AN][Nh_1][j];
450 		orbs1_2[j] = Orbs_Grid[Mh_AN][Nh_2][j];
451 		orbs1_3[j] = Orbs_Grid[Mh_AN][Nh_3][j];
452 	      }
453 	    }
454             else{
455 	      for (j=0; j<NO1; j++){
456 		orbs1_0[j] = Orbs_Grid_FNAN[Mc_AN][h_AN][Nog  ][j];
457 		orbs1_1[j] = Orbs_Grid_FNAN[Mc_AN][h_AN][Nog+1][j];
458 		orbs1_2[j] = Orbs_Grid_FNAN[Mc_AN][h_AN][Nog+2][j];
459 		orbs1_3[j] = Orbs_Grid_FNAN[Mc_AN][h_AN][Nog+3][j];
460 	      }
461 	    }
462 	  }
463 
464 	  for (spin=0; spin<=SpinP_switch; spin++){
465 
466 	    /* Tmp_Den_Grid */
467 
468 	    sum_0 = 0.0;
469 	    sum_1 = 0.0;
470 	    sum_2 = 0.0;
471 	    sum_3 = 0.0;
472 
473 	    for (i=0; i<NO0; i++){
474 
475 	      tmp0_0 = 0.0;
476 	      tmp0_1 = 0.0;
477 	      tmp0_2 = 0.0;
478 	      tmp0_3 = 0.0;
479 
480 	      for (j=0; j<NO1; j++){
481 		tmp0_0 += orbs1_0[j]*tmp_CDM[spin][i][j];
482 		tmp0_1 += orbs1_1[j]*tmp_CDM[spin][i][j];
483 		tmp0_2 += orbs1_2[j]*tmp_CDM[spin][i][j];
484 		tmp0_3 += orbs1_3[j]*tmp_CDM[spin][i][j];
485 	      }
486 
487 	      sum_0 += orbs0_0[i]*tmp0_0;
488 	      sum_1 += orbs0_1[i]*tmp0_1;
489 	      sum_2 += orbs0_2[i]*tmp0_2;
490 	      sum_3 += orbs0_3[i]*tmp0_3;
491 	    }
492 
493 	    ai_tmpDGs[spin][Nc_0] += sum_0;
494 	    ai_tmpDGs[spin][Nc_1] += sum_1;
495 	    ai_tmpDGs[spin][Nc_2] += sum_2;
496 	    ai_tmpDGs[spin][Nc_3] += sum_3;
497 
498 	    /*
499 	      Tmp_Den_Grid[spin][Mc_AN][Nc_0] += sum_0;
500 	      Tmp_Den_Grid[spin][Mc_AN][Nc_1] += sum_1;
501 	      Tmp_Den_Grid[spin][Mc_AN][Nc_2] += sum_2;
502 	      Tmp_Den_Grid[spin][Mc_AN][Nc_3] += sum_3;
503 	    */
504 
505 	  } /* spin */
506 	} /* Nog */
507 
508 #pragma omp for
509 	for (Nog = NumOLG[Mc_AN][h_AN] - (NumOLG[Mc_AN][h_AN] % 4); Nog<NumOLG[Mc_AN][h_AN]; Nog++){
510 	  /*for (; Nog<NumOLG[Mc_AN][h_AN]; Nog++){*/
511 
512 	  Nc = GListTAtoms1[Mc_AN][h_AN][Nog];
513 	  Nh = GListTAtoms2[Mc_AN][h_AN][Nog];
514 
515 
516 	  if (Cnt_kind==0 && Cnt_switch==1){
517 	    for (i=0; i<NO0; i++){
518 	      orbs0[i] = COrbs_Grid[Mc_AN][i][Nc];
519 	    }
520 	    for (j=0; j<NO1; j++){
521 	      orbs1[j] = COrbs_Grid[Mh_AN][j][Nh];
522 	    }
523 	  }
524 	  else{
525 	    for (i=0; i<NO0; i++){
526 	      orbs0[i] = Orbs_Grid[Mc_AN][Nc][i];
527 	    }
528 
529 	    if (G2ID[Gh_AN]==myid){
530 	      for (j=0; j<NO1; j++){
531 		orbs1[j] = Orbs_Grid[Mh_AN][Nh][j];
532 	      }
533 	    }
534 	    else{
535 	      for (j=0; j<NO1; j++){
536 		orbs1[j] = Orbs_Grid_FNAN[Mc_AN][h_AN][Nog][j];
537 	      }
538 	    }
539 	  }
540 
541 	  for (spin=0; spin<=SpinP_switch; spin++){
542 
543 
544 	    sum = 0.0;
545 	    for (i=0; i<NO0; i++){
546 	      tmp0 = 0.0;
547 	      for (j=0; j<NO1; j++){
548 		tmp0 += orbs1[j]*tmp_CDM[spin][i][j];
549 	      }
550 	      sum += orbs0[i]*tmp0;
551 	    }
552 
553 	    ai_tmpDGs[spin][Nc] += sum;
554 	    /*Tmp_Den_Grid[spin][Mc_AN][Nc] += sum;*/
555 	  }
556 
557 	} /* Nog */
558 
559       } /* h_AN */
560 
561       /* AITUNE   merge temporary buffer for all omp threads */
562       for (spin=0; spin<=SpinP_switch; spin++){
563 	int Nc;
564 #pragma omp for
565 	for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
566 	  double sum = 0.0;
567 	  int th;
568 	  for(th = 0; th < Nthrds; th++){
569 	    sum += ai_tmpDG_all[th][spin][Nc];
570 	  }
571 	  Tmp_Den_Grid[spin][Mc_AN][Nc] += sum;
572 	}
573       }
574 
575 
576       dtime(&Etime_atom);
577       time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
578 
579     } /* Mc_AN */
580 
581     /* freeing of arrays */
582 
583     free(orbs0);
584     free(orbs1);
585 
586     free(orbs0_0);
587     free(orbs0_1);
588     free(orbs0_2);
589     free(orbs0_3);
590     free(orbs1_0);
591     free(orbs1_1);
592     free(orbs1_2);
593     free(orbs1_3);
594 
595     for (i=0; i<(SpinP_switch+1); i++){
596       for (j=0; j<List_YOUSO[7]; j++){
597 	free(tmp_CDM[i][j]);
598       }
599       free(tmp_CDM[i]);
600 
601       free(ai_tmpDGs[i]); /* AITUNE */
602 
603     }
604     free(tmp_CDM);
605 
606 #pragma omp flush(Tmp_Den_Grid)
607 
608   } /* #pragma omp parallel */
609 
610   free(ai_tmpDG_all);
611 
612   dtime(&time2);
613   if(myid==0 && measure_time){
614     printf("Time for Part1=%18.5f\n",(time2-time1));fflush(stdout);
615   }
616 
617   /******************************************************
618       MPI communication from the partitions A to B
619   ******************************************************/
620 
621   /* copy Tmp_Den_Grid to Den_Snd_Grid_A2B */
622 
623   for (ID=0; ID<numprocs; ID++) Num_Snd_Grid_A2B[ID] = 0;
624 
625   N2D = Ngrid1*Ngrid2;
626 
627   for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
628 
629     Gc_AN = M2G[Mc_AN];
630 
631     for (AN=0; AN<GridN_Atom[Gc_AN]; AN++){
632 
633       GN = GridListAtom[Mc_AN][AN];
634       GN2N(GN,N3);
635       n2D = N3[1]*Ngrid2 + N3[2];
636       ID = (int)(n2D*(unsigned long long int)numprocs/N2D);
637 
638       if (SpinP_switch==0){
639         Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]] = Tmp_Den_Grid[0][Mc_AN][AN];
640       }
641       else if (SpinP_switch==1){
642         Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]*2+0] = Tmp_Den_Grid[0][Mc_AN][AN];
643         Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]*2+1] = Tmp_Den_Grid[1][Mc_AN][AN];
644       }
645       else if (SpinP_switch==3){
646         Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]*4+0] = Tmp_Den_Grid[0][Mc_AN][AN];
647         Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]*4+1] = Tmp_Den_Grid[1][Mc_AN][AN];
648         Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]*4+2] = Tmp_Den_Grid[2][Mc_AN][AN];
649         Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]*4+3] = Tmp_Den_Grid[3][Mc_AN][AN];
650       }
651 
652       Num_Snd_Grid_A2B[ID]++;
653     }
654   }
655 
656   /* MPI: A to B */
657 
658   request_send = malloc(sizeof(MPI_Request)*NN_A2B_S);
659   request_recv = malloc(sizeof(MPI_Request)*NN_A2B_R);
660   stat_send = malloc(sizeof(MPI_Status)*NN_A2B_S);
661   stat_recv = malloc(sizeof(MPI_Status)*NN_A2B_R);
662 
663   NN_S = 0;
664   NN_R = 0;
665 
666   tag = 999;
667   for (ID=1; ID<numprocs; ID++){
668 
669     IDS = (myid + ID) % numprocs;
670     IDR = (myid - ID + numprocs) % numprocs;
671 
672     if (Num_Snd_Grid_A2B[IDS]!=0){
673       MPI_Isend( &Den_Snd_Grid_A2B[IDS][0], Num_Snd_Grid_A2B[IDS]*(SpinP_switch+1),
674 	         MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request_send[NN_S]);
675       NN_S++;
676     }
677 
678     if (Num_Rcv_Grid_A2B[IDR]!=0){
679       MPI_Irecv( &Den_Rcv_Grid_A2B[IDR][0], Num_Rcv_Grid_A2B[IDR]*(SpinP_switch+1),
680   	         MPI_DOUBLE, IDR, tag, mpi_comm_level1, &request_recv[NN_R]);
681       NN_R++;
682     }
683   }
684 
685   if (NN_S!=0) MPI_Waitall(NN_S,request_send,stat_send);
686   if (NN_R!=0) MPI_Waitall(NN_R,request_recv,stat_recv);
687 
688   free(request_send);
689   free(request_recv);
690   free(stat_send);
691   free(stat_recv);
692 
693   /* for myid */
694   for (i=0; i<Num_Rcv_Grid_A2B[myid]*(SpinP_switch+1); i++){
695     Den_Rcv_Grid_A2B[myid][i] = Den_Snd_Grid_A2B[myid][i];
696   }
697 
698   /******************************************************
699    superposition of rho_i to calculate charge density
700    in the partition B.
701   ******************************************************/
702 
703   /* initialize arrays */
704 
705   for (spin=0; spin<(SpinP_switch+1); spin++){
706     for (BN=0; BN<My_NumGridB_AB; BN++){
707       Density_Grid_B[spin][BN] = 0.0;
708     }
709   }
710 
711   /* superposition of densities rho_i */
712 
713   for (ID=0; ID<numprocs; ID++){
714 
715     for (LN=0; LN<Num_Rcv_Grid_A2B[ID]; LN++){
716 
717       BN    = Index_Rcv_Grid_A2B[ID][3*LN+0];
718       Gc_AN = Index_Rcv_Grid_A2B[ID][3*LN+1];
719       GRc   = Index_Rcv_Grid_A2B[ID][3*LN+2];
720 
721       if (Solver!=4 || (Solver==4 && atv_ijk[GRc][1]==0 )){
722 
723 	/* spin collinear non-polarization */
724 	if ( SpinP_switch==0 ){
725 	  Density_Grid_B[0][BN] += Den_Rcv_Grid_A2B[ID][LN];
726 	}
727 
728 	/* spin collinear polarization */
729 	else if ( SpinP_switch==1 ){
730 	  Density_Grid_B[0][BN] += Den_Rcv_Grid_A2B[ID][LN*2  ];
731 	  Density_Grid_B[1][BN] += Den_Rcv_Grid_A2B[ID][LN*2+1];
732 	}
733 
734 	/* spin non-collinear */
735 	else if ( SpinP_switch==3 ){
736 	  Density_Grid_B[0][BN] += Den_Rcv_Grid_A2B[ID][LN*4  ];
737 	  Density_Grid_B[1][BN] += Den_Rcv_Grid_A2B[ID][LN*4+1];
738 	  Density_Grid_B[2][BN] += Den_Rcv_Grid_A2B[ID][LN*4+2];
739 	  Density_Grid_B[3][BN] += Den_Rcv_Grid_A2B[ID][LN*4+3];
740 	}
741 
742       } /* if (Solve!=4.....) */
743 
744     } /* AN */
745   } /* ID */
746 
747   /****************************************************
748    Conjugate complex of Density_Grid[3][MN] due to
749    difference in the definition between density matrix
750    and charge density
751   ****************************************************/
752 
753   if (SpinP_switch==3){
754 
755     for (BN=0; BN<My_NumGridB_AB; BN++){
756       Density_Grid_B[3][BN] = -Density_Grid_B[3][BN];
757     }
758   }
759 
760   /******************************************************
761              MPI: from the partitions B to D
762   ******************************************************/
763 
764   Density_Grid_Copy_B2D();
765 
766   /* freeing of arrays */
767 
768   for (i=0; i<(SpinP_switch+1); i++){
769     for (Mc_AN=0; Mc_AN<=Matomnum; Mc_AN++){
770       free(Tmp_Den_Grid[i][Mc_AN]);
771     }
772     free(Tmp_Den_Grid[i]);
773   }
774   free(Tmp_Den_Grid);
775 
776   for (ID=0; ID<numprocs; ID++){
777     free(Den_Snd_Grid_A2B[ID]);
778   }
779   free(Den_Snd_Grid_A2B);
780 
781   for (ID=0; ID<numprocs; ID++){
782     free(Den_Rcv_Grid_A2B[ID]);
783   }
784   free(Den_Rcv_Grid_A2B);
785 
786   /* elapsed time */
787   dtime(&TEtime);
788   time0 = TEtime - TStime;
789   if(myid==0 && measure_time) printf("time0=%18.5f\n",time0);
790 
791   return time0;
792 }
793 
794 
795 
Data_Grid_Copy_B2C_2(double ** data_B,double ** data_C)796 void Data_Grid_Copy_B2C_2(double **data_B, double **data_C)
797 {
798   static int firsttime=1;
799   int CN,BN,LN,spin,i,gp,NN_S,NN_R;
800   double *Work_Array_Snd_Grid_B2C;
801   double *Work_Array_Rcv_Grid_B2C;
802   int numprocs,myid,tag=999,ID,IDS,IDR;
803   MPI_Status stat;
804   MPI_Request request;
805   MPI_Status *stat_send;
806   MPI_Status *stat_recv;
807   MPI_Request *request_send;
808   MPI_Request *request_recv;
809 
810   MPI_Comm_size(mpi_comm_level1,&numprocs);
811   MPI_Comm_rank(mpi_comm_level1,&myid);
812 
813   /* allocation of arrays */
814 
815   Work_Array_Snd_Grid_B2C = (double*)malloc(sizeof(double)*GP_B2C_S[NN_B2C_S]*(SpinP_switch+1));
816   Work_Array_Rcv_Grid_B2C = (double*)malloc(sizeof(double)*GP_B2C_R[NN_B2C_R]*(SpinP_switch+1));
817 
818   if (firsttime==1){
819     PrintMemory("Data_Grid_Copy_B2C_2: Work_Array_Snd_Grid_B2C",
820 		sizeof(double)*GP_B2C_S[NN_B2C_S]*(SpinP_switch+1), NULL);
821     PrintMemory("Data_Grid_Copy_B2C_2: Work_Array_Rcv_Grid_B2C",
822 		sizeof(double)*GP_B2C_R[NN_B2C_R]*(SpinP_switch+1), NULL);
823     firsttime = 0;
824   }
825 
826   /******************************************************
827              MPI: from the partitions B to C
828   ******************************************************/
829 
830   request_send = malloc(sizeof(MPI_Request)*NN_B2C_S);
831   request_recv = malloc(sizeof(MPI_Request)*NN_B2C_R);
832   stat_send = malloc(sizeof(MPI_Status)*NN_B2C_S);
833   stat_recv = malloc(sizeof(MPI_Status)*NN_B2C_R);
834 
835   NN_S = 0;
836   NN_R = 0;
837 
838   /* MPI_Irecv */
839 
840   for (ID=0; ID<NN_B2C_R; ID++){
841 
842     IDR = ID_NN_B2C_R[ID];
843     gp = GP_B2C_R[ID];
844 
845     if (IDR!=myid){
846       MPI_Irecv( &Work_Array_Rcv_Grid_B2C[(SpinP_switch+1)*gp], Num_Rcv_Grid_B2C[IDR]*(SpinP_switch+1),
847                  MPI_DOUBLE, IDR, tag, mpi_comm_level1, &request_recv[NN_R]);
848       NN_R++;
849     }
850 
851   }
852 
853   /* MPI_Isend */
854 
855   for (ID=0; ID<NN_B2C_S; ID++){
856 
857     IDS = ID_NN_B2C_S[ID];
858     gp = GP_B2C_S[ID];
859 
860     /* copy Density_Grid_B to Work_Array_Snd_Grid_B2C */
861 
862     for (LN=0; LN<Num_Snd_Grid_B2C[IDS]; LN++){
863       BN = Index_Snd_Grid_B2C[IDS][LN];
864 
865       if (SpinP_switch==0){
866         Work_Array_Snd_Grid_B2C[gp+LN]       = data_B[0][BN];
867       }
868       else if (SpinP_switch==1){
869         Work_Array_Snd_Grid_B2C[2*gp+2*LN+0] = data_B[0][BN];
870         Work_Array_Snd_Grid_B2C[2*gp+2*LN+1] = data_B[1][BN];
871       }
872       else if (SpinP_switch==3){
873         Work_Array_Snd_Grid_B2C[4*gp+4*LN+0] = data_B[0][BN];
874         Work_Array_Snd_Grid_B2C[4*gp+4*LN+1] = data_B[1][BN];
875         Work_Array_Snd_Grid_B2C[4*gp+4*LN+2] = data_B[2][BN];
876         Work_Array_Snd_Grid_B2C[4*gp+4*LN+3] = data_B[3][BN];
877       }
878     } /* LN */
879 
880     if (IDS!=myid){
881       MPI_Isend( &Work_Array_Snd_Grid_B2C[(SpinP_switch+1)*gp], Num_Snd_Grid_B2C[IDS]*(SpinP_switch+1),
882 		 MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request_send[NN_S]);
883       NN_S++;
884     }
885   }
886 
887   /* MPI_Waitall */
888 
889   if (NN_S!=0) MPI_Waitall(NN_S,request_send,stat_send);
890   if (NN_R!=0) MPI_Waitall(NN_R,request_recv,stat_recv);
891 
892   free(request_send);
893   free(request_recv);
894   free(stat_send);
895   free(stat_recv);
896 
897   /* copy Work_Array_Rcv_Grid_B2C to data_C */
898 
899   for (ID=0; ID<NN_B2C_R; ID++){
900 
901     IDR = ID_NN_B2C_R[ID];
902 
903     if (IDR==myid){
904 
905       gp = GP_B2C_S[ID];
906 
907       for (LN=0; LN<Num_Rcv_Grid_B2C[IDR]; LN++){
908 
909 	CN = Index_Rcv_Grid_B2C[IDR][LN];
910 
911 	if (SpinP_switch==0){
912 	  data_C[0][CN] = Work_Array_Snd_Grid_B2C[gp+LN];
913 	}
914 	else if (SpinP_switch==1){
915 	  data_C[0][CN] = Work_Array_Snd_Grid_B2C[2*gp+2*LN+0];
916 	  data_C[1][CN] = Work_Array_Snd_Grid_B2C[2*gp+2*LN+1];
917 	}
918 	else if (SpinP_switch==3){
919 	  data_C[0][CN] = Work_Array_Snd_Grid_B2C[4*gp+4*LN+0];
920 	  data_C[1][CN] = Work_Array_Snd_Grid_B2C[4*gp+4*LN+1];
921 	  data_C[2][CN] = Work_Array_Snd_Grid_B2C[4*gp+4*LN+2];
922 	  data_C[3][CN] = Work_Array_Snd_Grid_B2C[4*gp+4*LN+3];
923 	}
924       } /* LN */
925 
926     }
927     else {
928 
929       gp = GP_B2C_R[ID];
930 
931       for (LN=0; LN<Num_Rcv_Grid_B2C[IDR]; LN++){
932 	CN = Index_Rcv_Grid_B2C[IDR][LN];
933 
934 	if (SpinP_switch==0){
935 	  data_C[0][CN] = Work_Array_Rcv_Grid_B2C[gp+LN];
936 	}
937 	else if (SpinP_switch==1){
938 	  data_C[0][CN] = Work_Array_Rcv_Grid_B2C[2*gp+2*LN+0];
939 	  data_C[1][CN] = Work_Array_Rcv_Grid_B2C[2*gp+2*LN+1];
940 	}
941 	else if (SpinP_switch==3){
942 	  data_C[0][CN] = Work_Array_Rcv_Grid_B2C[4*gp+4*LN+0];
943 	  data_C[1][CN] = Work_Array_Rcv_Grid_B2C[4*gp+4*LN+1];
944 	  data_C[2][CN] = Work_Array_Rcv_Grid_B2C[4*gp+4*LN+2];
945 	  data_C[3][CN] = Work_Array_Rcv_Grid_B2C[4*gp+4*LN+3];
946 	}
947       }
948     }
949   }
950 
951   /* if (SpinP_switch==0),
952      copy data_B[0] to data_B[1]
953      copy data_C[0] to data_C[1]
954   */
955 
956   if (SpinP_switch==0){
957     for (BN=0; BN<My_NumGridB_AB; BN++){
958       data_B[1][BN] = data_B[0][BN];
959     }
960 
961     for (CN=0; CN<My_NumGridC; CN++){
962       data_C[1][CN] = data_C[0][CN];
963     }
964   }
965 
966   /* freeing of arrays */
967   free(Work_Array_Snd_Grid_B2C);
968   free(Work_Array_Rcv_Grid_B2C);
969 }
970 
971 
972 
Data_Grid_Copy_B2C_1(double * data_B,double * data_C)973 void Data_Grid_Copy_B2C_1(double *data_B, double *data_C)
974 {
975   static int firsttime=1;
976   int CN,BN,LN,spin,i,gp,NN_S,NN_R;
977   double *Work_Array_Snd_Grid_B2C;
978   double *Work_Array_Rcv_Grid_B2C;
979   int numprocs,myid,tag=999,ID,IDS,IDR;
980   MPI_Status stat;
981   MPI_Request request;
982   MPI_Status *stat_send;
983   MPI_Status *stat_recv;
984   MPI_Request *request_send;
985   MPI_Request *request_recv;
986 
987   MPI_Comm_size(mpi_comm_level1,&numprocs);
988   MPI_Comm_rank(mpi_comm_level1,&myid);
989 
990   /* allocation of arrays */
991 
992   Work_Array_Snd_Grid_B2C = (double*)malloc(sizeof(double)*GP_B2C_S[NN_B2C_S]);
993   Work_Array_Rcv_Grid_B2C = (double*)malloc(sizeof(double)*GP_B2C_R[NN_B2C_R]);
994 
995   if (firsttime==1){
996     PrintMemory("Data_Grid_Copy_B2C_1: Work_Array_Snd_Grid_B2C",
997 		sizeof(double)*GP_B2C_S[NN_B2C_S], NULL);
998     PrintMemory("Data_Grid_Copy_B2C_1: Work_Array_Rcv_Grid_B2C",
999 		sizeof(double)*GP_B2C_R[NN_B2C_R], NULL);
1000     firsttime = 0;
1001   }
1002 
1003   /******************************************************
1004              MPI: from the partitions B to C
1005   ******************************************************/
1006 
1007   request_send = malloc(sizeof(MPI_Request)*NN_B2C_S);
1008   request_recv = malloc(sizeof(MPI_Request)*NN_B2C_R);
1009   stat_send = malloc(sizeof(MPI_Status)*NN_B2C_S);
1010   stat_recv = malloc(sizeof(MPI_Status)*NN_B2C_R);
1011 
1012   NN_S = 0;
1013   NN_R = 0;
1014 
1015   /* MPI_Irecv */
1016 
1017   for (ID=0; ID<NN_B2C_R; ID++){
1018 
1019     IDR = ID_NN_B2C_R[ID];
1020     gp = GP_B2C_R[ID];
1021 
1022     if (IDR!=myid){
1023       MPI_Irecv( &Work_Array_Rcv_Grid_B2C[gp], Num_Rcv_Grid_B2C[IDR],
1024                  MPI_DOUBLE, IDR, tag, mpi_comm_level1, &request_recv[NN_R]);
1025       NN_R++;
1026     }
1027   }
1028 
1029   /* MPI_Isend */
1030 
1031   for (ID=0; ID<NN_B2C_S; ID++){
1032 
1033     IDS = ID_NN_B2C_S[ID];
1034     gp = GP_B2C_S[ID];
1035 
1036     /* copy Density_Grid_B to Work_Array_Snd_Grid_B2C */
1037 
1038     for (LN=0; LN<Num_Snd_Grid_B2C[IDS]; LN++){
1039       BN = Index_Snd_Grid_B2C[IDS][LN];
1040       Work_Array_Snd_Grid_B2C[gp+LN] = data_B[BN];
1041     }
1042 
1043     if (IDS!=myid){
1044       MPI_Isend( &Work_Array_Snd_Grid_B2C[gp], Num_Snd_Grid_B2C[IDS],
1045 		 MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request_send[NN_S]);
1046       NN_S++;
1047     }
1048   }
1049 
1050   /* MPI_Waitall */
1051 
1052   if (NN_S!=0) MPI_Waitall(NN_S,request_send,stat_send);
1053   if (NN_R!=0) MPI_Waitall(NN_R,request_recv,stat_recv);
1054 
1055   free(request_send);
1056   free(request_recv);
1057   free(stat_send);
1058   free(stat_recv);
1059 
1060   /* copy Work_Array_Rcv_Grid_B2C to data_C */
1061 
1062   for (ID=0; ID<NN_B2C_R; ID++){
1063 
1064     IDR = ID_NN_B2C_R[ID];
1065 
1066     if (IDR==myid){
1067       gp = GP_B2C_S[ID];
1068       for (LN=0; LN<Num_Rcv_Grid_B2C[IDR]; LN++){
1069 	CN = Index_Rcv_Grid_B2C[IDR][LN];
1070 	data_C[CN] = Work_Array_Snd_Grid_B2C[gp+LN];
1071       }
1072     }
1073     else{
1074 
1075       gp = GP_B2C_R[ID];
1076       for (LN=0; LN<Num_Rcv_Grid_B2C[IDR]; LN++){
1077 	CN = Index_Rcv_Grid_B2C[IDR][LN];
1078 	data_C[CN] = Work_Array_Rcv_Grid_B2C[gp+LN];
1079       }
1080     }
1081   }
1082 
1083   /* freeing of arrays */
1084   free(Work_Array_Snd_Grid_B2C);
1085   free(Work_Array_Rcv_Grid_B2C);
1086 }
1087 
1088 
1089 
1090 
1091 
Density_Grid_Copy_B2D()1092 void Density_Grid_Copy_B2D()
1093 {
1094   static int firsttime=1;
1095   int DN,BN,LN,spin,i,gp,NN_S,NN_R;
1096   double *Work_Array_Snd_Grid_B2D;
1097   double *Work_Array_Rcv_Grid_B2D;
1098   int numprocs,myid,tag=999,ID,IDS,IDR;
1099   MPI_Status stat;
1100   MPI_Request request;
1101   MPI_Status *stat_send;
1102   MPI_Status *stat_recv;
1103   MPI_Request *request_send;
1104   MPI_Request *request_recv;
1105 
1106   MPI_Comm_size(mpi_comm_level1,&numprocs);
1107   MPI_Comm_rank(mpi_comm_level1,&myid);
1108 
1109   /* allocation of arrays */
1110 
1111   Work_Array_Snd_Grid_B2D = (double*)malloc(sizeof(double)*GP_B2D_S[NN_B2D_S]*(SpinP_switch+1));
1112   Work_Array_Rcv_Grid_B2D = (double*)malloc(sizeof(double)*GP_B2D_R[NN_B2D_R]*(SpinP_switch+1));
1113 
1114   if (firsttime==1){
1115     PrintMemory("Set_Density_Grid: Work_Array_Snd_Grid_B2D",
1116 		sizeof(double)*GP_B2D_S[NN_B2D_S]*(SpinP_switch+1), NULL);
1117     PrintMemory("Set_Density_Grid: Work_Array_Rcv_Grid_B2D",
1118 		sizeof(double)*GP_B2D_R[NN_B2D_R]*(SpinP_switch+1), NULL);
1119     firsttime = 0;
1120   }
1121 
1122   /******************************************************
1123              MPI: from the partitions B to D
1124   ******************************************************/
1125 
1126   request_send = malloc(sizeof(MPI_Request)*NN_B2D_S);
1127   request_recv = malloc(sizeof(MPI_Request)*NN_B2D_R);
1128   stat_send = malloc(sizeof(MPI_Status)*NN_B2D_S);
1129   stat_recv = malloc(sizeof(MPI_Status)*NN_B2D_R);
1130 
1131   NN_S = 0;
1132   NN_R = 0;
1133 
1134   /* MPI_Irecv */
1135 
1136   for (ID=0; ID<NN_B2D_R; ID++){
1137 
1138     IDR = ID_NN_B2D_R[ID];
1139     gp = GP_B2D_R[ID];
1140 
1141     if (IDR!=myid){
1142       MPI_Irecv( &Work_Array_Rcv_Grid_B2D[(SpinP_switch+1)*gp], Num_Rcv_Grid_B2D[IDR]*(SpinP_switch+1),
1143                  MPI_DOUBLE, IDR, tag, mpi_comm_level1, &request_recv[NN_R]);
1144       NN_R++;
1145     }
1146   }
1147 
1148   /* MPI_Isend */
1149 
1150   for (ID=0; ID<NN_B2D_S; ID++){
1151 
1152     IDS = ID_NN_B2D_S[ID];
1153     gp = GP_B2D_S[ID];
1154 
1155     /* copy Density_Grid_B to Work_Array_Snd_Grid_B2D */
1156 
1157     for (LN=0; LN<Num_Snd_Grid_B2D[IDS]; LN++){
1158 
1159       BN = Index_Snd_Grid_B2D[IDS][LN];
1160 
1161       if (SpinP_switch==0){
1162         Work_Array_Snd_Grid_B2D[gp+LN]       = Density_Grid_B[0][BN];
1163       }
1164       else if (SpinP_switch==1){
1165         Work_Array_Snd_Grid_B2D[2*gp+2*LN+0] = Density_Grid_B[0][BN];
1166         Work_Array_Snd_Grid_B2D[2*gp+2*LN+1] = Density_Grid_B[1][BN];
1167       }
1168       else if (SpinP_switch==3){
1169         Work_Array_Snd_Grid_B2D[4*gp+4*LN+0] = Density_Grid_B[0][BN];
1170         Work_Array_Snd_Grid_B2D[4*gp+4*LN+1] = Density_Grid_B[1][BN];
1171         Work_Array_Snd_Grid_B2D[4*gp+4*LN+2] = Density_Grid_B[2][BN];
1172         Work_Array_Snd_Grid_B2D[4*gp+4*LN+3] = Density_Grid_B[3][BN];
1173       }
1174     } /* LN */
1175 
1176     if (IDS!=myid){
1177       MPI_Isend( &Work_Array_Snd_Grid_B2D[(SpinP_switch+1)*gp], Num_Snd_Grid_B2D[IDS]*(SpinP_switch+1),
1178 		 MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request_send[NN_S]);
1179       NN_S++;
1180     }
1181   }
1182 
1183   /* MPI_Waitall */
1184 
1185   if (NN_S!=0) MPI_Waitall(NN_S,request_send,stat_send);
1186   if (NN_R!=0) MPI_Waitall(NN_R,request_recv,stat_recv);
1187 
1188   free(request_send);
1189   free(request_recv);
1190   free(stat_send);
1191   free(stat_recv);
1192 
1193   /* copy Work_Array_Rcv_Grid_B2D to Density_Grid_D */
1194 
1195   for (ID=0; ID<NN_B2D_R; ID++){
1196 
1197     IDR = ID_NN_B2D_R[ID];
1198 
1199     if (IDR==myid){
1200 
1201       gp = GP_B2D_S[ID];
1202 
1203       for (LN=0; LN<Num_Rcv_Grid_B2D[IDR]; LN++){
1204 
1205 	DN = Index_Rcv_Grid_B2D[IDR][LN];
1206 
1207 	if (SpinP_switch==0){
1208 	  Density_Grid_D[0][DN] = Work_Array_Snd_Grid_B2D[gp+LN];
1209 	}
1210 	else if (SpinP_switch==1){
1211 	  Density_Grid_D[0][DN] = Work_Array_Snd_Grid_B2D[2*gp+2*LN+0];
1212 	  Density_Grid_D[1][DN] = Work_Array_Snd_Grid_B2D[2*gp+2*LN+1];
1213 	}
1214 	else if (SpinP_switch==3){
1215 	  Density_Grid_D[0][DN] = Work_Array_Snd_Grid_B2D[4*gp+4*LN+0];
1216 	  Density_Grid_D[1][DN] = Work_Array_Snd_Grid_B2D[4*gp+4*LN+1];
1217 	  Density_Grid_D[2][DN] = Work_Array_Snd_Grid_B2D[4*gp+4*LN+2];
1218 	  Density_Grid_D[3][DN] = Work_Array_Snd_Grid_B2D[4*gp+4*LN+3];
1219 	}
1220       } /* LN */
1221 
1222     }
1223 
1224     else{
1225 
1226       gp = GP_B2D_R[ID];
1227 
1228       for (LN=0; LN<Num_Rcv_Grid_B2D[IDR]; LN++){
1229 
1230 	DN = Index_Rcv_Grid_B2D[IDR][LN];
1231 
1232 	if (SpinP_switch==0){
1233 	  Density_Grid_D[0][DN] = Work_Array_Rcv_Grid_B2D[gp+LN];
1234 	}
1235 	else if (SpinP_switch==1){
1236 	  Density_Grid_D[0][DN] = Work_Array_Rcv_Grid_B2D[2*gp+2*LN+0];
1237 	  Density_Grid_D[1][DN] = Work_Array_Rcv_Grid_B2D[2*gp+2*LN+1];
1238 	}
1239 	else if (SpinP_switch==3){
1240 	  Density_Grid_D[0][DN] = Work_Array_Rcv_Grid_B2D[4*gp+4*LN+0];
1241 	  Density_Grid_D[1][DN] = Work_Array_Rcv_Grid_B2D[4*gp+4*LN+1];
1242 	  Density_Grid_D[2][DN] = Work_Array_Rcv_Grid_B2D[4*gp+4*LN+2];
1243 	  Density_Grid_D[3][DN] = Work_Array_Rcv_Grid_B2D[4*gp+4*LN+3];
1244 	}
1245       }
1246 
1247     }
1248   }
1249 
1250   /* if (SpinP_switch==0), copy Density_Grid to Density_Grid */
1251 
1252   if (SpinP_switch==0){
1253     for (BN=0; BN<My_NumGridB_AB; BN++){
1254       Density_Grid_B[1][BN] = Density_Grid_B[0][BN];
1255     }
1256 
1257     for (DN=0; DN<My_NumGridD; DN++){
1258       Density_Grid_D[1][DN] = Density_Grid_D[0][DN];
1259     }
1260   }
1261 
1262   /* freeing of arrays */
1263   free(Work_Array_Snd_Grid_B2D);
1264   free(Work_Array_Rcv_Grid_B2D);
1265 }
1266 
1267 
diagonalize_nc_density()1268 void diagonalize_nc_density()
1269 {
1270   int BN,DN,Mc_AN,Gc_AN,Nog,GRc;
1271   double Re11,Re22,Re12,Im12;
1272   double phi[2],theta[2],sit,cot,sip,cop;
1273   double d1,d2,d3,x,y,z,Cxyz[4];
1274   double Nup[2],Ndown[2];
1275   /* for OpenMP */
1276   int OMPID,Nthrds;
1277 
1278   /************************************
1279      Density_Grid in the partition B
1280   ************************************/
1281 
1282 #pragma omp parallel shared(Density_Grid_B,My_NumGridB_AB) private(OMPID,Nthrds,BN,Re11,Re22,Re12,Im12,Nup,Ndown,theta,phi) default(none)
1283   {
1284 
1285     /* get info. on OpenMP */
1286 
1287     OMPID = omp_get_thread_num();
1288     Nthrds = omp_get_num_threads();
1289 
1290     for (BN=OMPID; BN<My_NumGridB_AB; BN+=Nthrds){
1291 
1292       Re11 = Density_Grid_B[0][BN];
1293       Re22 = Density_Grid_B[1][BN];
1294       Re12 = Density_Grid_B[2][BN];
1295       Im12 = Density_Grid_B[3][BN];
1296 
1297       EulerAngle_Spin( 1, Re11, Re22, Re12, Im12, Re12, -Im12, Nup, Ndown, theta, phi );
1298 
1299       Density_Grid_B[0][BN] = Nup[0];
1300       Density_Grid_B[1][BN] = Ndown[0];
1301       Density_Grid_B[2][BN] = theta[0];
1302       Density_Grid_B[3][BN] = phi[0];
1303     }
1304 
1305 #pragma omp flush(Density_Grid_B)
1306 
1307   } /* #pragma omp parallel */
1308 
1309   /************************************
1310      Density_Grid in the partition D
1311   ************************************/
1312 
1313 #pragma omp parallel shared(Density_Grid_D,My_NumGridD) private(OMPID,Nthrds,DN,Re11,Re22,Re12,Im12,Nup,Ndown,theta,phi) default(none)
1314   {
1315 
1316     /* get info. on OpenMP */
1317 
1318     OMPID = omp_get_thread_num();
1319     Nthrds = omp_get_num_threads();
1320 
1321     for (DN=OMPID; DN<My_NumGridD; DN+=Nthrds){
1322 
1323       Re11 = Density_Grid_D[0][DN];
1324       Re22 = Density_Grid_D[1][DN];
1325       Re12 = Density_Grid_D[2][DN];
1326       Im12 = Density_Grid_D[3][DN];
1327 
1328       EulerAngle_Spin( 1, Re11, Re22, Re12, Im12, Re12, -Im12, Nup, Ndown, theta, phi );
1329 
1330       Density_Grid_D[0][DN] = Nup[0];
1331       Density_Grid_D[1][DN] = Ndown[0];
1332       Density_Grid_D[2][DN] = theta[0];
1333       Density_Grid_D[3][DN] = phi[0];
1334     }
1335 
1336 #pragma omp flush(Density_Grid_D)
1337 
1338   } /* #pragma omp parallel */
1339 
1340 }
1341