1 /**********************************************************************
2   Poisson.c:
3 
4      Poisson.c is a subrutine to solve Poisson's equation using
5      fast Fourier transformation.
6 
7   Log of Poisson.c:
8 
9      22/Nov/2001  Released by T.Ozaki
10      06/Apr/2012  Rewritten by T.Ozaki
11 
12 ***********************************************************************/
13 
14 #define  measure_time   0
15 
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <math.h>
19 #include <time.h>
20 #include "openmx_common.h"
21 #include "mpi.h"
22 #include <fftw3.h>
23 
24 
25 static void Inverse_FFT_Poisson(double *ReRhor, double *ImRhor,
26                                 double *ReRhok, double *ImRhok);
27 
28 static void FFT_Poisson(double *ReRhor, double *ImRhor,
29                         double *ReRhok, double *ImRhok);
30 
Poisson(int fft_charge_flag,double * ReRhok,double * ImRhok)31 double Poisson(int fft_charge_flag,
32                double *ReRhok, double *ImRhok)
33 {
34   int k1,k2,k3;
35   int N2D,GNs,GN,BN_CB;
36   int N3[4];
37   double time0;
38   double tmp0,sk1,sk2,sk3;
39   double Gx,Gy,Gz,fac_invG2;
40   double TStime,TEtime,etime;
41   int numprocs,myid,tag=999,ID,IDS,IDR;
42   MPI_Status stat;
43   MPI_Request request;
44 
45   /* MPI */
46   MPI_Comm_size(mpi_comm_level1,&numprocs);
47   MPI_Comm_rank(mpi_comm_level1,&myid);
48 
49   if (myid==Host_ID && 0<level_stdout){
50     printf("<Poisson>  Poisson's equation using FFT...\n");
51   }
52 
53   MPI_Barrier(mpi_comm_level1);
54   dtime(&TStime);
55 
56   /****************************************************
57             FFT of difference charge density
58   ****************************************************/
59 
60   if (fft_charge_flag==1) etime = FFT_Density(0,ReRhok,ImRhok);
61 
62   /****************************************************
63                        4*PI/G2/N^3
64   ****************************************************/
65 
66   tmp0 = 4.0*PI/(double)(Ngrid1*Ngrid2*Ngrid3);
67 
68   N2D = Ngrid3*Ngrid2;
69   GNs = ((myid*N2D+numprocs-1)/numprocs)*Ngrid1;
70 
71   for (BN_CB=0; BN_CB<My_NumGridB_CB; BN_CB++){
72 
73     GN = BN_CB + GNs;
74     k3 = GN/(Ngrid2*Ngrid1);
75     k2 = (GN - k3*Ngrid2*Ngrid1)/Ngrid1;
76     k1 = GN - k3*Ngrid2*Ngrid1 - k2*Ngrid1;
77 
78     if (k1<Ngrid1/2) sk1 = (double)k1;
79     else             sk1 = (double)(k1 - Ngrid1);
80 
81     if (k2<Ngrid2/2) sk2 = (double)k2;
82     else             sk2 = (double)(k2 - Ngrid2);
83 
84     if (k3<Ngrid3/2) sk3 = (double)k3;
85     else             sk3 = (double)(k3 - Ngrid3);
86 
87     Gx = sk1*rtv[1][1] + sk2*rtv[2][1] + sk3*rtv[3][1];
88     Gy = sk1*rtv[1][2] + sk2*rtv[2][2] + sk3*rtv[3][2];
89     Gz = sk1*rtv[1][3] + sk2*rtv[2][3] + sk3*rtv[3][3];
90     fac_invG2 = tmp0/(Gx*Gx + Gy*Gy + Gz*Gz);
91 
92     if (k1==0 && k2==0 && k3==0){
93       ReRhok[BN_CB] = 0.0;
94       ImRhok[BN_CB] = 0.0;
95     }
96     else{
97       ReRhok[BN_CB] *= fac_invG2;
98       ImRhok[BN_CB] *= fac_invG2;
99     }
100   }
101 
102   /****************************************************
103         find the Hartree potential in real space
104   ****************************************************/
105 
106   Get_Value_inReal(0,dVHart_Grid_B,dVHart_Grid_B,ReRhok,ImRhok);
107 
108   /* for time */
109   MPI_Barrier(mpi_comm_level1);
110   dtime(&TEtime);
111   time0 = TEtime - TStime;
112   return time0;
113 }
114 
115 
116 
117 
FFT_Poisson(double * ReRhor,double * ImRhor,double * ReRhok,double * ImRhok)118 void FFT_Poisson(double *ReRhor, double *ImRhor,
119                  double *ReRhok, double *ImRhok)
120 {
121   static int firsttime=1;
122   int i,BN_AB,BN_CB,BN_CA,gp,NN_S,NN_R;
123   double *array0,*array1;
124   int numprocs,myid,tag=999,ID,IDS,IDR;
125   double Stime_proc, Etime_proc;
126   MPI_Status stat;
127   MPI_Request request;
128   MPI_Status *stat_send;
129   MPI_Status *stat_recv;
130   MPI_Request *request_send;
131   MPI_Request *request_recv;
132 
133   fftw_complex *in, *out;
134   fftw_plan p;
135 
136   /* MPI */
137   MPI_Comm_size(mpi_comm_level1,&numprocs);
138   MPI_Comm_rank(mpi_comm_level1,&myid);
139 
140   /****************************************************
141     allocation of arrays:
142   ****************************************************/
143 
144   in  = (fftw_complex *)fftw_malloc(sizeof(fftw_complex)*List_YOUSO[17]);
145   out = (fftw_complex *)fftw_malloc(sizeof(fftw_complex)*List_YOUSO[17]);
146 
147   /*------------------ FFT along the C-axis in the AB partition ------------------*/
148 
149   if (measure_time==1) dtime(&Stime_proc);
150 
151   p = fftw_plan_dft_1d(Ngrid3,in,out,-1,FFTW_ESTIMATE);
152 
153   for (BN_AB=0; BN_AB<My_NumGridB_AB; BN_AB+=Ngrid3){
154 
155     for (i=0; i<Ngrid3; i++){
156       in[i][0] = ReRhor[BN_AB+i];
157       in[i][1] = ImRhor[BN_AB+i];
158     }
159 
160     fftw_execute(p);
161 
162     for (i=0; i<Ngrid3; i++){
163       ReRhor[BN_AB+i] = out[i][0];
164       ImRhor[BN_AB+i] = out[i][1];
165     }
166   }
167 
168   fftw_destroy_plan(p);
169 
170   if (measure_time==1){
171     dtime(&Etime_proc);
172     printf("myid=%2d  Time FFT-C  = %15.12f\n",myid,Etime_proc-Stime_proc);
173   }
174 
175   /*------------------ MPI: AB to CA partitions ------------------*/
176 
177   if (measure_time==1){
178     MPI_Barrier(mpi_comm_level1);
179     dtime(&Stime_proc);
180   }
181 
182   array0 = (double*)malloc(sizeof(double)*2*GP_B_AB2CA_S[NN_B_AB2CA_S]);
183   array1 = (double*)malloc(sizeof(double)*2*GP_B_AB2CA_R[NN_B_AB2CA_R]);
184 
185   request_send = (MPI_Request *)malloc(sizeof(MPI_Request)*NN_B_AB2CA_S);
186   request_recv = (MPI_Request *)malloc(sizeof(MPI_Request)*NN_B_AB2CA_R);
187   stat_send = (MPI_Status *)malloc(sizeof(MPI_Status)*NN_B_AB2CA_S);
188   stat_recv = (MPI_Status *)malloc(sizeof(MPI_Status)*NN_B_AB2CA_R);
189 
190   NN_S = 0;
191   NN_R = 0;
192 
193   /* MPI_Irecv */
194 
195   for (ID=0; ID<NN_B_AB2CA_R; ID++){
196 
197     IDR = ID_NN_B_AB2CA_R[ID];
198     gp = GP_B_AB2CA_R[ID];
199 
200     if (IDR!=myid){
201       MPI_Irecv( &array1[2*gp], Num_Rcv_Grid_B_AB2CA[IDR]*2,
202                  MPI_DOUBLE, IDR, tag, mpi_comm_level1, &request_recv[NN_R]);
203       NN_R++;
204     }
205   }
206 
207   /* MPI_Isend */
208 
209   for (ID=0; ID<NN_B_AB2CA_S; ID++){
210 
211     IDS = ID_NN_B_AB2CA_S[ID];
212     gp = GP_B_AB2CA_S[ID];
213 
214     if (IDS!=myid){
215 
216       for (i=0; i<Num_Snd_Grid_B_AB2CA[IDS]; i++){
217         BN_AB = Index_Snd_Grid_B_AB2CA[IDS][i];
218         array0[2*gp+2*i  ] = ReRhor[BN_AB];
219         array0[2*gp+2*i+1] = ImRhor[BN_AB];
220       }
221 
222       MPI_Isend( &array0[2*gp], Num_Snd_Grid_B_AB2CA[IDS]*2,
223     	         MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request_send[NN_S]);
224       NN_S++;
225     }
226   }
227 
228   /* MPI_Waitall */
229 
230   if (NN_S!=0) MPI_Waitall(NN_S,request_send,stat_send);
231   if (NN_R!=0) MPI_Waitall(NN_R,request_recv,stat_recv);
232 
233   /* copy them to ReRhok and ImRhok */
234 
235   for (ID=0; ID<NN_B_AB2CA_R; ID++){
236 
237     IDR = ID_NN_B_AB2CA_R[ID];
238     gp = GP_B_AB2CA_R[ID];
239 
240     if (IDR!=myid){
241       for (i=0; i<Num_Rcv_Grid_B_AB2CA[IDR]; i++){
242 	BN_CA = Index_Rcv_Grid_B_AB2CA[IDR][i];
243 	ReRhok[BN_CA] = array1[2*gp+2*i  ];
244 	ImRhok[BN_CA] = array1[2*gp+2*i+1];
245       }
246     }
247 
248     else{
249       for (i=0; i<Num_Snd_Grid_B_AB2CA[IDR]; i++){
250 	BN_AB = Index_Snd_Grid_B_AB2CA[IDR][i];
251 	BN_CA = Index_Rcv_Grid_B_AB2CA[IDR][i];
252 	ReRhok[BN_CA] = ReRhor[BN_AB];
253 	ImRhok[BN_CA] = ImRhor[BN_AB];
254       }
255     }
256   }
257 
258   free(array0);
259   free(array1);
260 
261   free(request_send);
262   free(request_recv);
263   free(stat_send);
264   free(stat_recv);
265 
266   if (measure_time==1){
267     MPI_Barrier(mpi_comm_level1);
268     dtime(&Etime_proc);
269     printf("myid=%2d  Time MPI: AB to CB = %15.12f\n",myid,Etime_proc-Stime_proc);
270   }
271 
272   /*------------------ FFT along the B-axis in the CA partition ------------------*/
273 
274   if (measure_time==1) dtime(&Stime_proc);
275 
276   p = fftw_plan_dft_1d(Ngrid2,in,out,-1,FFTW_ESTIMATE);
277 
278   for (BN_CA=0; BN_CA<My_NumGridB_CA; BN_CA+=Ngrid2){
279 
280     for (i=0; i<Ngrid2; i++){
281       in[i][0] = ReRhok[BN_CA+i];
282       in[i][1] = ImRhok[BN_CA+i];
283     }
284 
285     fftw_execute(p);
286 
287     for (i=0; i<Ngrid2; i++){
288       ReRhok[BN_CA+i] = out[i][0];
289       ImRhok[BN_CA+i] = out[i][1];
290     }
291   }
292 
293   fftw_destroy_plan(p);
294 
295   if (measure_time==1){
296     dtime(&Etime_proc);
297     printf("myid=%2d  Time FFT-B  = %15.12f\n",myid,Etime_proc-Stime_proc);
298   }
299 
300   /*------------------ MPI: CA to CB partitions ------------------*/
301 
302   if (measure_time==1){
303     MPI_Barrier(mpi_comm_level1);
304     dtime(&Stime_proc);
305   }
306 
307   array0 = (double*)malloc(sizeof(double)*2*GP_B_CA2CB_S[NN_B_CA2CB_S]);
308   array1 = (double*)malloc(sizeof(double)*2*GP_B_CA2CB_R[NN_B_CA2CB_R]);
309 
310   request_send = (MPI_Request *)malloc(sizeof(MPI_Request)*NN_B_CA2CB_S);
311   request_recv = (MPI_Request *)malloc(sizeof(MPI_Request)*NN_B_CA2CB_R);
312   stat_send = (MPI_Status *)malloc(sizeof(MPI_Status)*NN_B_CA2CB_S);
313   stat_recv = (MPI_Status *)malloc(sizeof(MPI_Status)*NN_B_CA2CB_R);
314 
315   NN_S = 0;
316   NN_R = 0;
317 
318   /* MPI_Irecv */
319 
320   for (ID=0; ID<NN_B_CA2CB_R; ID++){
321 
322     IDR = ID_NN_B_CA2CB_R[ID];
323     gp = GP_B_CA2CB_R[ID];
324 
325     if (IDR!=myid){
326       MPI_Irecv( &array1[2*gp], Num_Rcv_Grid_B_CA2CB[IDR]*2,
327                  MPI_DOUBLE, IDR, tag, mpi_comm_level1, &request_recv[NN_R]);
328       NN_R++;
329     }
330   }
331 
332   /* MPI_Isend */
333 
334   for (ID=0; ID<NN_B_CA2CB_S; ID++){
335 
336     IDS = ID_NN_B_CA2CB_S[ID];
337     gp = GP_B_CA2CB_S[ID];
338 
339     if (IDS!=myid){
340 
341       for (i=0; i<Num_Snd_Grid_B_CA2CB[IDS]; i++){
342         BN_CA = Index_Snd_Grid_B_CA2CB[IDS][i];
343         array0[2*gp+2*i  ] = ReRhok[BN_CA];
344         array0[2*gp+2*i+1] = ImRhok[BN_CA];
345       }
346 
347       MPI_Isend( &array0[2*gp], Num_Snd_Grid_B_CA2CB[IDS]*2,
348   	         MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request_send[NN_S]);
349       NN_S++;
350     }
351   }
352 
353   /* MPI_Waitall */
354 
355   if (NN_S!=0) MPI_Waitall(NN_S,request_send,stat_send);
356   if (NN_R!=0) MPI_Waitall(NN_R,request_recv,stat_recv);
357 
358   /* copy them to ReRhor and ImRhor */
359 
360   for (ID=0; ID<NN_B_CA2CB_R; ID++){
361 
362     IDR = ID_NN_B_CA2CB_R[ID];
363     gp = GP_B_CA2CB_R[ID];
364 
365     if (IDR!=myid){
366       for (i=0; i<Num_Rcv_Grid_B_CA2CB[IDR]; i++){
367         BN_CB = Index_Rcv_Grid_B_CA2CB[IDR][i];
368         ReRhor[BN_CB] = array1[2*gp+2*i  ];
369         ImRhor[BN_CB] = array1[2*gp+2*i+1];
370       }
371     }
372 
373     else{
374       for (i=0; i<Num_Snd_Grid_B_CA2CB[IDR]; i++){
375 	BN_CA = Index_Snd_Grid_B_CA2CB[IDR][i];
376 	BN_CB = Index_Rcv_Grid_B_CA2CB[IDR][i];
377 	ReRhor[BN_CB] = ReRhok[BN_CA];
378 	ImRhor[BN_CB] = ImRhok[BN_CA];
379       }
380     }
381   }
382 
383   free(array0);
384   free(array1);
385 
386   free(request_send);
387   free(request_recv);
388   free(stat_send);
389   free(stat_recv);
390 
391   if (measure_time==1){
392     MPI_Barrier(mpi_comm_level1);
393     dtime(&Etime_proc);
394     printf("myid=%2d  Time MPI: CA to CB = %15.12f\n",myid,Etime_proc-Stime_proc);
395   }
396 
397   /*------------------ FFT along the A-axis in the CB partition ------------------*/
398 
399   if (measure_time==1) dtime(&Stime_proc);
400 
401   p = fftw_plan_dft_1d(Ngrid1,in,out,-1,FFTW_ESTIMATE);
402 
403   for (BN_CB=0; BN_CB<My_NumGridB_CB; BN_CB+=Ngrid1){
404 
405     for (i=0; i<Ngrid1; i++){
406       in[i][0] = ReRhor[BN_CB+i];
407       in[i][1] = ImRhor[BN_CB+i];
408     }
409 
410     fftw_execute(p);
411 
412     for (i=0; i<Ngrid1; i++){
413       ReRhok[BN_CB+i] = out[i][0];
414       ImRhok[BN_CB+i] = out[i][1];
415     }
416   }
417 
418   fftw_destroy_plan(p);
419   fftw_cleanup();
420 
421   if (measure_time==1){
422     dtime(&Etime_proc);
423     printf("myid=%2d  Time FFT-A  = %15.12f\n",myid,Etime_proc-Stime_proc);
424   }
425 
426   /****************************************************
427     freeing of arrays:
428   ****************************************************/
429 
430   fftw_free(in);
431   fftw_free(out);
432 
433   /* PrintMemory */
434 
435   if (firsttime) {
436 
437     if (GP_B_CA2CB_S[NN_B_CA2CB_S]<GP_B_AB2CA_S[NN_B_AB2CA_S]){
438       PrintMemory("Poisson: array0",sizeof(double)*2*GP_B_AB2CA_S[NN_B_AB2CA_S],NULL);
439     }
440     else{
441       PrintMemory("Poisson: array0",sizeof(double)*2*GP_B_CA2CB_S[NN_B_CA2CB_S],NULL);
442     }
443 
444     if (GP_B_CA2CB_R[NN_B_CA2CB_R]<GP_B_AB2CA_R[NN_B_AB2CA_R]){
445       PrintMemory("Poisson: array1",sizeof(double)*2*GP_B_AB2CA_R[NN_B_AB2CA_R],NULL);
446     }
447     else{
448       PrintMemory("Poisson: array1",sizeof(double)*2*GP_B_CA2CB_R[NN_B_CA2CB_R],NULL);
449     }
450 
451     if (NN_B_CA2CB_S<NN_B_AB2CA_S){
452       PrintMemory("Poisson: request_send",sizeof(MPI_Request)*NN_B_AB2CA_S,NULL);
453       PrintMemory("Poisson: stat_send   ",sizeof(MPI_Status)*NN_B_AB2CA_S,NULL);
454     }
455     else{
456       PrintMemory("Poisson: request_send",sizeof(MPI_Request)*NN_B_CA2CB_S,NULL);
457       PrintMemory("Poisson: stat_send   ",sizeof(MPI_Status)*NN_B_CA2CB_S,NULL);
458     }
459 
460     if (NN_B_CA2CB_R<NN_B_AB2CA_R){
461       PrintMemory("Poisson: request_recv",sizeof(MPI_Request)*NN_B_AB2CA_R,NULL);
462       PrintMemory("Poisson: stat_recv   ",sizeof(MPI_Status)*NN_B_AB2CA_R,NULL);
463     }
464     else{
465       PrintMemory("Poisson: request_recv",sizeof(MPI_Request)*NN_B_CA2CB_R,NULL);
466       PrintMemory("Poisson: stat_recv   ",sizeof(MPI_Status)*NN_B_CA2CB_R,NULL);
467     }
468 
469     /* turn off firsttime flag */
470     firsttime=0;
471   }
472 
473 }
474 
475 
476 
477 
Inverse_FFT_Poisson(double * ReRhor,double * ImRhor,double * ReRhok,double * ImRhok)478 void Inverse_FFT_Poisson(double *ReRhor, double *ImRhor,
479                          double *ReRhok, double *ImRhok)
480 {
481   int i,BN_AB,BN_CB,BN_CA,gp,NN_S,NN_R;
482   double *array0,*array1;
483   int numprocs,myid,tag=999,ID,IDS,IDR;
484   double Stime_proc, Etime_proc;
485   MPI_Status stat;
486   MPI_Request request;
487   MPI_Status *stat_send;
488   MPI_Status *stat_recv;
489   MPI_Request *request_send;
490   MPI_Request *request_recv;
491 
492   fftw_complex *in, *out;
493   fftw_plan p;
494 
495   /* MPI */
496   MPI_Comm_size(mpi_comm_level1,&numprocs);
497   MPI_Comm_rank(mpi_comm_level1,&myid);
498 
499   /****************************************************
500     allocation of arrays:
501 
502     fftw_complex  in[List_YOUSO[17]];
503     fftw_complex out[List_YOUSO[17]];
504   ****************************************************/
505 
506   in  = (fftw_complex *)fftw_malloc(sizeof(fftw_complex)*List_YOUSO[17]);
507   out = (fftw_complex *)fftw_malloc(sizeof(fftw_complex)*List_YOUSO[17]);
508 
509   /*------------------ Inverse FFT along the A-axis in the CB partition ------------------*/
510 
511   if (measure_time==1) dtime(&Stime_proc);
512 
513   p = fftw_plan_dft_1d(Ngrid1,in,out,1,FFTW_ESTIMATE);
514 
515   for (BN_CB=0; BN_CB<My_NumGridB_CB; BN_CB+=Ngrid1){
516 
517     for (i=0; i<Ngrid1; i++){
518       in[i][0] = ReRhok[BN_CB+i];
519       in[i][1] = ImRhok[BN_CB+i];
520     }
521 
522     fftw_execute(p);
523 
524     for (i=0; i<Ngrid1; i++){
525       ReRhok[BN_CB+i] = out[i][0];
526       ImRhok[BN_CB+i] = out[i][1];
527     }
528   }
529 
530   fftw_destroy_plan(p);
531 
532   if (measure_time==1){
533     dtime(&Etime_proc);
534     printf("myid=%2d  Time Inverse FFT-A  = %15.12f\n",myid,Etime_proc-Stime_proc);
535   }
536 
537   /*------------------ MPI: CB to CA partitions ------------------*/
538 
539   if (measure_time==1){
540     MPI_Barrier(mpi_comm_level1);
541     dtime(&Stime_proc);
542   }
543 
544   array0 = (double*)malloc(sizeof(double)*2*GP_B_CA2CB_R[NN_B_CA2CB_R]);
545   array1 = (double*)malloc(sizeof(double)*2*GP_B_CA2CB_S[NN_B_CA2CB_S]);
546 
547   request_send = (MPI_Request *)malloc(sizeof(MPI_Request)*NN_B_CA2CB_R);
548   request_recv = (MPI_Request *)malloc(sizeof(MPI_Request)*NN_B_CA2CB_S);
549   stat_send = (MPI_Status *)malloc(sizeof(MPI_Status)*NN_B_CA2CB_R);
550   stat_recv = (MPI_Status *)malloc(sizeof(MPI_Status)*NN_B_CA2CB_S);
551 
552   NN_S = 0;
553   NN_R = 0;
554 
555   /* MPI_Irecv */
556 
557   for (ID=0; ID<NN_B_CA2CB_S; ID++){
558 
559     IDR = ID_NN_B_CA2CB_S[ID];
560     gp = GP_B_CA2CB_S[ID];
561 
562     if (IDR!=myid){
563 
564       MPI_Irecv( &array1[2*gp], Num_Snd_Grid_B_CA2CB[IDR]*2,
565                  MPI_DOUBLE, IDR, tag, mpi_comm_level1, &request_recv[NN_R]);
566       NN_R++;
567     }
568   }
569 
570   /* MPI_Isend */
571 
572   for (ID=0; ID<NN_B_CA2CB_R; ID++){
573 
574     IDS = ID_NN_B_CA2CB_R[ID];
575     gp = GP_B_CA2CB_R[ID];
576 
577     if (IDS!=myid){
578 
579       for (i=0; i<Num_Rcv_Grid_B_CA2CB[IDS]; i++){
580         BN_CB = Index_Rcv_Grid_B_CA2CB[IDS][i];
581         array0[2*gp+2*i  ] = ReRhok[BN_CB];
582         array0[2*gp+2*i+1] = ImRhok[BN_CB];
583       }
584 
585       MPI_Isend( &array0[2*gp], Num_Rcv_Grid_B_CA2CB[IDS]*2,
586   	         MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request_send[NN_S]);
587       NN_S++;
588     }
589   }
590 
591   /* MPI_Waitall */
592 
593   if (NN_S!=0) MPI_Waitall(NN_S,request_send,stat_send);
594   if (NN_R!=0) MPI_Waitall(NN_R,request_recv,stat_recv);
595 
596   /* copy them to ReRhor and ImRhor */
597 
598   for (ID=0; ID<NN_B_CA2CB_S; ID++){
599 
600     IDR = ID_NN_B_CA2CB_S[ID];
601     gp = GP_B_CA2CB_S[ID];
602 
603     if (IDR!=myid){
604       for (i=0; i<Num_Snd_Grid_B_CA2CB[IDR]; i++){
605         BN_CA = Index_Snd_Grid_B_CA2CB[IDR][i];
606         ReRhor[BN_CA] = array1[2*gp+2*i  ];
607         ImRhor[BN_CA] = array1[2*gp+2*i+1];
608       }
609     }
610     else{
611       for (i=0; i<Num_Rcv_Grid_B_CA2CB[IDR]; i++){
612 	BN_CB = Index_Rcv_Grid_B_CA2CB[IDR][i];
613 	BN_CA = Index_Snd_Grid_B_CA2CB[IDR][i];
614 	ReRhor[BN_CA] = ReRhok[BN_CB];
615 	ImRhor[BN_CA] = ImRhok[BN_CB];
616       }
617     }
618   }
619 
620   free(array0);
621   free(array1);
622 
623   free(request_send);
624   free(request_recv);
625   free(stat_send);
626   free(stat_recv);
627 
628   if (measure_time==1){
629     MPI_Barrier(mpi_comm_level1);
630     dtime(&Etime_proc);
631     printf("myid=%2d  Time MPI: CB to CA = %15.12f\n",myid,Etime_proc-Stime_proc);
632   }
633 
634   /*------------------ Inverse FFT along the B-axis in the CA partition ------------------*/
635 
636   if (measure_time==1) dtime(&Stime_proc);
637 
638   p = fftw_plan_dft_1d(Ngrid2,in,out,1,FFTW_ESTIMATE);
639 
640   for (BN_CA=0; BN_CA<My_NumGridB_CA; BN_CA+=Ngrid2){
641 
642     for (i=0; i<Ngrid2; i++){
643       in[i][0] = ReRhor[BN_CA+i];
644       in[i][1] = ImRhor[BN_CA+i];
645     }
646 
647     fftw_execute(p);
648 
649     for (i=0; i<Ngrid2; i++){
650       ReRhor[BN_CA+i] = out[i][0];
651       ImRhor[BN_CA+i] = out[i][1];
652     }
653   }
654 
655   fftw_destroy_plan(p);
656 
657   if (measure_time==1){
658     dtime(&Etime_proc);
659     printf("myid=%2d  Time Inverse FFT-B  = %15.12f\n",myid,Etime_proc-Stime_proc);
660   }
661 
662   /*------------------ MPI: CA to AB partitions ------------------*/
663 
664   if (measure_time==1){
665     MPI_Barrier(mpi_comm_level1);
666     dtime(&Stime_proc);
667   }
668 
669   array0 = (double*)malloc(sizeof(double)*2*GP_B_AB2CA_R[NN_B_AB2CA_R]);
670   array1 = (double*)malloc(sizeof(double)*2*GP_B_AB2CA_S[NN_B_AB2CA_S]);
671 
672   request_send = (MPI_Request *)malloc(sizeof(MPI_Request)*NN_B_AB2CA_R);
673   request_recv = (MPI_Request *)malloc(sizeof(MPI_Request)*NN_B_AB2CA_S);
674   stat_send = (MPI_Status *)malloc(sizeof(MPI_Status)*NN_B_AB2CA_R);
675   stat_recv = (MPI_Status *)malloc(sizeof(MPI_Status)*NN_B_AB2CA_S);
676 
677   NN_S = 0;
678   NN_R = 0;
679 
680   /* MPI_Irecv */
681 
682   for (ID=0; ID<NN_B_AB2CA_S; ID++){
683 
684     IDR = ID_NN_B_AB2CA_S[ID];
685     gp = GP_B_AB2CA_S[ID];
686 
687     if (IDR!=myid){
688 
689       MPI_Irecv( &array1[2*gp], Num_Snd_Grid_B_AB2CA[IDR]*2,
690                  MPI_DOUBLE, IDR, tag, mpi_comm_level1, &request_recv[NN_R]);
691       NN_R++;
692     }
693   }
694 
695   /* MPI_Isend */
696 
697   for (ID=0; ID<NN_B_AB2CA_R; ID++){
698 
699     IDS = ID_NN_B_AB2CA_R[ID];
700     gp = GP_B_AB2CA_R[ID];
701 
702     if (IDS!=myid){
703 
704       for (i=0; i<Num_Rcv_Grid_B_AB2CA[IDS]; i++){
705         BN_CB = Index_Rcv_Grid_B_AB2CA[IDS][i];
706         array0[2*gp+2*i  ] = ReRhor[BN_CB];
707         array0[2*gp+2*i+1] = ImRhor[BN_CB];
708       }
709 
710       MPI_Isend( &array0[2*gp], Num_Rcv_Grid_B_AB2CA[IDS]*2,
711   	         MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request_send[NN_S]);
712       NN_S++;
713     }
714   }
715 
716   /* MPI_Waitall */
717 
718   if (NN_S!=0) MPI_Waitall(NN_S,request_send,stat_send);
719   if (NN_R!=0) MPI_Waitall(NN_R,request_recv,stat_recv);
720 
721   /* copy them to ReRhok and ImRhok */
722 
723   for (ID=0; ID<NN_B_AB2CA_S; ID++){
724 
725     IDR = ID_NN_B_AB2CA_S[ID];
726     gp = GP_B_AB2CA_S[ID];
727 
728     if (IDR!=myid){
729       for (i=0; i<Num_Snd_Grid_B_AB2CA[IDR]; i++){
730 	BN_AB = Index_Snd_Grid_B_AB2CA[IDR][i];
731 	ReRhok[BN_AB] = array1[2*gp+2*i  ];
732 	ImRhok[BN_AB] = array1[2*gp+2*i+1];
733       }
734     }
735     else {
736       for (i=0; i<Num_Rcv_Grid_B_AB2CA[IDR]; i++){
737 	BN_CA = Index_Rcv_Grid_B_AB2CA[IDR][i];
738 	BN_AB = Index_Snd_Grid_B_AB2CA[IDR][i];
739 	ReRhok[BN_AB] = ReRhor[BN_CA];
740 	ImRhok[BN_AB] = ImRhor[BN_CA];
741       }
742     }
743   }
744 
745   free(array0);
746   free(array1);
747 
748   free(request_send);
749   free(request_recv);
750   free(stat_send);
751   free(stat_recv);
752 
753   if (measure_time==1){
754     MPI_Barrier(mpi_comm_level1);
755     dtime(&Etime_proc);
756     printf("myid=%2d  Time MPI: CA to AB = %15.12f\n",myid,Etime_proc-Stime_proc);
757   }
758 
759   /*------------------ Inverse FFT along the C-axis in the AB partition ------------------*/
760 
761   if (measure_time==1) dtime(&Stime_proc);
762 
763   p = fftw_plan_dft_1d(Ngrid3,in,out,1,FFTW_ESTIMATE);
764 
765   for (BN_AB=0; BN_AB<My_NumGridB_AB; BN_AB+=Ngrid3){
766 
767     for (i=0; i<Ngrid3; i++){
768       in[i][0] = ReRhok[BN_AB+i];
769       in[i][1] = ImRhok[BN_AB+i];
770     }
771 
772     fftw_execute(p);
773 
774     for (i=0; i<Ngrid3; i++){
775       ReRhor[BN_AB+i] = out[i][0];
776       ImRhor[BN_AB+i] = out[i][1];
777     }
778   }
779 
780   fftw_destroy_plan(p);
781   fftw_cleanup();
782 
783   if (measure_time==1){
784     dtime(&Etime_proc);
785     printf("myid=%2d  Time Inverse FFT-C  = %15.12f\n",myid,Etime_proc-Stime_proc);
786   }
787 
788   /****************************************************
789     freeing of arrays:
790 
791     fftw_complex  in[List_YOUSO[17]];
792     fftw_complex out[List_YOUSO[17]];
793   ****************************************************/
794 
795   fftw_free(in);
796   fftw_free(out);
797 }
798 
799 
800 
801 
FFT_Density(int den_flag,double * ReRhok,double * ImRhok)802 double FFT_Density(int den_flag,
803                    double *ReRhok, double *ImRhok)
804 {
805   int BN_AB;
806   int numprocs,myid;
807   double *ReRhor,*ImRhor;
808   double TStime,TEtime,time0;
809 
810   MPI_Comm_size(mpi_comm_level1,&numprocs);
811   MPI_Comm_rank(mpi_comm_level1,&myid);
812 
813   MPI_Barrier(mpi_comm_level1);
814   dtime(&TStime);
815 
816   if (4<den_flag){
817     printf("invalid den_flag for FFT2D_Density\n");
818     MPI_Finalize();
819     exit(0);
820   }
821 
822   /* allocation of arrays */
823 
824   ReRhor = (double*)malloc(sizeof(double)*My_Max_NumGridB);
825   ImRhor = (double*)malloc(sizeof(double)*My_Max_NumGridB);
826 
827   /* set ReRhor and ImRhor */
828 
829   switch(den_flag) {
830 
831     case 0:
832 
833       for (BN_AB=0; BN_AB<My_NumGridB_AB; BN_AB++){
834         ReRhor[BN_AB] = Density_Grid_B[0][BN_AB] + Density_Grid_B[1][BN_AB]
835                        -2.0*ADensity_Grid_B[BN_AB];
836         ImRhor[BN_AB] = 0.0;
837       }
838 
839     break;
840 
841     case 1:
842 
843       for (BN_AB=0; BN_AB<My_NumGridB_AB; BN_AB++){
844         ReRhor[BN_AB] = Density_Grid_B[0][BN_AB];
845         ImRhor[BN_AB] = 0.0;
846       }
847 
848     break;
849 
850     case 2:
851 
852       for (BN_AB=0; BN_AB<My_NumGridB_AB; BN_AB++){
853         ReRhor[BN_AB] = Density_Grid_B[1][BN_AB];
854         ImRhor[BN_AB] = 0.0;
855       }
856 
857     break;
858 
859     case 3:
860 
861       for (BN_AB=0; BN_AB<My_NumGridB_AB; BN_AB++){
862         ReRhor[BN_AB] = 2.0*ADensity_Grid_B[BN_AB];
863         ImRhor[BN_AB] = 0.0;
864       }
865 
866     break;
867 
868     case 4:
869 
870       for (BN_AB=0; BN_AB<My_NumGridB_AB; BN_AB++){
871         ReRhor[BN_AB] = Density_Grid_B[2][BN_AB];
872         ImRhor[BN_AB] = Density_Grid_B[3][BN_AB];
873       }
874 
875     break;
876 
877   }
878 
879   /* FFT of Density */
880 
881   FFT_Poisson(ReRhor, ImRhor, ReRhok, ImRhok);
882 
883   /* freeing of arrays */
884 
885   free(ReRhor);
886   free(ImRhor);
887 
888   /* for time */
889   MPI_Barrier(mpi_comm_level1);
890   dtime(&TEtime);
891   time0 = TEtime - TStime;
892   return time0;
893 }
894 
895 
896 
897 
898 
899 
Get_Value_inReal(int complex_flag,double * ReVr,double * ImVr,double * ReVk,double * ImVk)900 void Get_Value_inReal(int complex_flag,
901                       double *ReVr, double *ImVr,
902                       double *ReVk, double *ImVk)
903 {
904   int BN_AB;
905   double *ReTmpr,*ImTmpr;
906 
907   /* allocation of arrays */
908 
909   ReTmpr = (double*)malloc(sizeof(double)*My_Max_NumGridB);
910   ImTmpr = (double*)malloc(sizeof(double)*My_Max_NumGridB);
911 
912   /* call Inverse_FFT_Poisson */
913 
914   Inverse_FFT_Poisson(ReTmpr, ImTmpr, ReVk, ImVk);
915 
916   /* copy ReTmpr and ImTmpr into ReVr and ImVr */
917 
918   if (complex_flag==0){
919 
920     for (BN_AB=0; BN_AB<My_NumGridB_AB; BN_AB++){
921       ReVr[BN_AB] = ReTmpr[BN_AB];
922     }
923   }
924   else if (complex_flag==1){
925 
926     for (BN_AB=0; BN_AB<My_NumGridB_AB; BN_AB++){
927       ReVr[BN_AB] = ReTmpr[BN_AB];
928       ImVr[BN_AB] = ImTmpr[BN_AB];
929     }
930   }
931 
932   /* freeing of arrays */
933 
934   free(ReTmpr);
935   free(ImTmpr);
936 }
937