1 /**********************************************************************
2 Poisson.c:
3
4 Poisson.c is a subrutine to solve Poisson's equation using
5 fast Fourier transformation.
6
7 Log of Poisson.c:
8
9 22/Nov/2001 Released by T.Ozaki
10 06/Apr/2012 Rewritten by T.Ozaki
11
12 ***********************************************************************/
13
14 #define measure_time 0
15
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <math.h>
19 #include <time.h>
20 #include "openmx_common.h"
21 #include "mpi.h"
22 #include <fftw3.h>
23
24
25 static void Inverse_FFT_Poisson(double *ReRhor, double *ImRhor,
26 double *ReRhok, double *ImRhok);
27
28 static void FFT_Poisson(double *ReRhor, double *ImRhor,
29 double *ReRhok, double *ImRhok);
30
Poisson(int fft_charge_flag,double * ReRhok,double * ImRhok)31 double Poisson(int fft_charge_flag,
32 double *ReRhok, double *ImRhok)
33 {
34 int k1,k2,k3;
35 int N2D,GNs,GN,BN_CB;
36 int N3[4];
37 double time0;
38 double tmp0,sk1,sk2,sk3;
39 double Gx,Gy,Gz,fac_invG2;
40 double TStime,TEtime,etime;
41 int numprocs,myid,tag=999,ID,IDS,IDR;
42 MPI_Status stat;
43 MPI_Request request;
44
45 /* MPI */
46 MPI_Comm_size(mpi_comm_level1,&numprocs);
47 MPI_Comm_rank(mpi_comm_level1,&myid);
48
49 if (myid==Host_ID && 0<level_stdout){
50 printf("<Poisson> Poisson's equation using FFT...\n");
51 }
52
53 MPI_Barrier(mpi_comm_level1);
54 dtime(&TStime);
55
56 /****************************************************
57 FFT of difference charge density
58 ****************************************************/
59
60 if (fft_charge_flag==1) etime = FFT_Density(0,ReRhok,ImRhok);
61
62 /****************************************************
63 4*PI/G2/N^3
64 ****************************************************/
65
66 tmp0 = 4.0*PI/(double)(Ngrid1*Ngrid2*Ngrid3);
67
68 N2D = Ngrid3*Ngrid2;
69 GNs = ((myid*N2D+numprocs-1)/numprocs)*Ngrid1;
70
71 for (BN_CB=0; BN_CB<My_NumGridB_CB; BN_CB++){
72
73 GN = BN_CB + GNs;
74 k3 = GN/(Ngrid2*Ngrid1);
75 k2 = (GN - k3*Ngrid2*Ngrid1)/Ngrid1;
76 k1 = GN - k3*Ngrid2*Ngrid1 - k2*Ngrid1;
77
78 if (k1<Ngrid1/2) sk1 = (double)k1;
79 else sk1 = (double)(k1 - Ngrid1);
80
81 if (k2<Ngrid2/2) sk2 = (double)k2;
82 else sk2 = (double)(k2 - Ngrid2);
83
84 if (k3<Ngrid3/2) sk3 = (double)k3;
85 else sk3 = (double)(k3 - Ngrid3);
86
87 Gx = sk1*rtv[1][1] + sk2*rtv[2][1] + sk3*rtv[3][1];
88 Gy = sk1*rtv[1][2] + sk2*rtv[2][2] + sk3*rtv[3][2];
89 Gz = sk1*rtv[1][3] + sk2*rtv[2][3] + sk3*rtv[3][3];
90 fac_invG2 = tmp0/(Gx*Gx + Gy*Gy + Gz*Gz);
91
92 if (k1==0 && k2==0 && k3==0){
93 ReRhok[BN_CB] = 0.0;
94 ImRhok[BN_CB] = 0.0;
95 }
96 else{
97 ReRhok[BN_CB] *= fac_invG2;
98 ImRhok[BN_CB] *= fac_invG2;
99 }
100 }
101
102 /****************************************************
103 find the Hartree potential in real space
104 ****************************************************/
105
106 Get_Value_inReal(0,dVHart_Grid_B,dVHart_Grid_B,ReRhok,ImRhok);
107
108 /* for time */
109 MPI_Barrier(mpi_comm_level1);
110 dtime(&TEtime);
111 time0 = TEtime - TStime;
112 return time0;
113 }
114
115
116
117
FFT_Poisson(double * ReRhor,double * ImRhor,double * ReRhok,double * ImRhok)118 void FFT_Poisson(double *ReRhor, double *ImRhor,
119 double *ReRhok, double *ImRhok)
120 {
121 static int firsttime=1;
122 int i,BN_AB,BN_CB,BN_CA,gp,NN_S,NN_R;
123 double *array0,*array1;
124 int numprocs,myid,tag=999,ID,IDS,IDR;
125 double Stime_proc, Etime_proc;
126 MPI_Status stat;
127 MPI_Request request;
128 MPI_Status *stat_send;
129 MPI_Status *stat_recv;
130 MPI_Request *request_send;
131 MPI_Request *request_recv;
132
133 fftw_complex *in, *out;
134 fftw_plan p;
135
136 /* MPI */
137 MPI_Comm_size(mpi_comm_level1,&numprocs);
138 MPI_Comm_rank(mpi_comm_level1,&myid);
139
140 /****************************************************
141 allocation of arrays:
142 ****************************************************/
143
144 in = (fftw_complex *)fftw_malloc(sizeof(fftw_complex)*List_YOUSO[17]);
145 out = (fftw_complex *)fftw_malloc(sizeof(fftw_complex)*List_YOUSO[17]);
146
147 /*------------------ FFT along the C-axis in the AB partition ------------------*/
148
149 if (measure_time==1) dtime(&Stime_proc);
150
151 p = fftw_plan_dft_1d(Ngrid3,in,out,-1,FFTW_ESTIMATE);
152
153 for (BN_AB=0; BN_AB<My_NumGridB_AB; BN_AB+=Ngrid3){
154
155 for (i=0; i<Ngrid3; i++){
156 in[i][0] = ReRhor[BN_AB+i];
157 in[i][1] = ImRhor[BN_AB+i];
158 }
159
160 fftw_execute(p);
161
162 for (i=0; i<Ngrid3; i++){
163 ReRhor[BN_AB+i] = out[i][0];
164 ImRhor[BN_AB+i] = out[i][1];
165 }
166 }
167
168 fftw_destroy_plan(p);
169
170 if (measure_time==1){
171 dtime(&Etime_proc);
172 printf("myid=%2d Time FFT-C = %15.12f\n",myid,Etime_proc-Stime_proc);
173 }
174
175 /*------------------ MPI: AB to CA partitions ------------------*/
176
177 if (measure_time==1){
178 MPI_Barrier(mpi_comm_level1);
179 dtime(&Stime_proc);
180 }
181
182 array0 = (double*)malloc(sizeof(double)*2*GP_B_AB2CA_S[NN_B_AB2CA_S]);
183 array1 = (double*)malloc(sizeof(double)*2*GP_B_AB2CA_R[NN_B_AB2CA_R]);
184
185 request_send = (MPI_Request *)malloc(sizeof(MPI_Request)*NN_B_AB2CA_S);
186 request_recv = (MPI_Request *)malloc(sizeof(MPI_Request)*NN_B_AB2CA_R);
187 stat_send = (MPI_Status *)malloc(sizeof(MPI_Status)*NN_B_AB2CA_S);
188 stat_recv = (MPI_Status *)malloc(sizeof(MPI_Status)*NN_B_AB2CA_R);
189
190 NN_S = 0;
191 NN_R = 0;
192
193 /* MPI_Irecv */
194
195 for (ID=0; ID<NN_B_AB2CA_R; ID++){
196
197 IDR = ID_NN_B_AB2CA_R[ID];
198 gp = GP_B_AB2CA_R[ID];
199
200 if (IDR!=myid){
201 MPI_Irecv( &array1[2*gp], Num_Rcv_Grid_B_AB2CA[IDR]*2,
202 MPI_DOUBLE, IDR, tag, mpi_comm_level1, &request_recv[NN_R]);
203 NN_R++;
204 }
205 }
206
207 /* MPI_Isend */
208
209 for (ID=0; ID<NN_B_AB2CA_S; ID++){
210
211 IDS = ID_NN_B_AB2CA_S[ID];
212 gp = GP_B_AB2CA_S[ID];
213
214 if (IDS!=myid){
215
216 for (i=0; i<Num_Snd_Grid_B_AB2CA[IDS]; i++){
217 BN_AB = Index_Snd_Grid_B_AB2CA[IDS][i];
218 array0[2*gp+2*i ] = ReRhor[BN_AB];
219 array0[2*gp+2*i+1] = ImRhor[BN_AB];
220 }
221
222 MPI_Isend( &array0[2*gp], Num_Snd_Grid_B_AB2CA[IDS]*2,
223 MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request_send[NN_S]);
224 NN_S++;
225 }
226 }
227
228 /* MPI_Waitall */
229
230 if (NN_S!=0) MPI_Waitall(NN_S,request_send,stat_send);
231 if (NN_R!=0) MPI_Waitall(NN_R,request_recv,stat_recv);
232
233 /* copy them to ReRhok and ImRhok */
234
235 for (ID=0; ID<NN_B_AB2CA_R; ID++){
236
237 IDR = ID_NN_B_AB2CA_R[ID];
238 gp = GP_B_AB2CA_R[ID];
239
240 if (IDR!=myid){
241 for (i=0; i<Num_Rcv_Grid_B_AB2CA[IDR]; i++){
242 BN_CA = Index_Rcv_Grid_B_AB2CA[IDR][i];
243 ReRhok[BN_CA] = array1[2*gp+2*i ];
244 ImRhok[BN_CA] = array1[2*gp+2*i+1];
245 }
246 }
247
248 else{
249 for (i=0; i<Num_Snd_Grid_B_AB2CA[IDR]; i++){
250 BN_AB = Index_Snd_Grid_B_AB2CA[IDR][i];
251 BN_CA = Index_Rcv_Grid_B_AB2CA[IDR][i];
252 ReRhok[BN_CA] = ReRhor[BN_AB];
253 ImRhok[BN_CA] = ImRhor[BN_AB];
254 }
255 }
256 }
257
258 free(array0);
259 free(array1);
260
261 free(request_send);
262 free(request_recv);
263 free(stat_send);
264 free(stat_recv);
265
266 if (measure_time==1){
267 MPI_Barrier(mpi_comm_level1);
268 dtime(&Etime_proc);
269 printf("myid=%2d Time MPI: AB to CB = %15.12f\n",myid,Etime_proc-Stime_proc);
270 }
271
272 /*------------------ FFT along the B-axis in the CA partition ------------------*/
273
274 if (measure_time==1) dtime(&Stime_proc);
275
276 p = fftw_plan_dft_1d(Ngrid2,in,out,-1,FFTW_ESTIMATE);
277
278 for (BN_CA=0; BN_CA<My_NumGridB_CA; BN_CA+=Ngrid2){
279
280 for (i=0; i<Ngrid2; i++){
281 in[i][0] = ReRhok[BN_CA+i];
282 in[i][1] = ImRhok[BN_CA+i];
283 }
284
285 fftw_execute(p);
286
287 for (i=0; i<Ngrid2; i++){
288 ReRhok[BN_CA+i] = out[i][0];
289 ImRhok[BN_CA+i] = out[i][1];
290 }
291 }
292
293 fftw_destroy_plan(p);
294
295 if (measure_time==1){
296 dtime(&Etime_proc);
297 printf("myid=%2d Time FFT-B = %15.12f\n",myid,Etime_proc-Stime_proc);
298 }
299
300 /*------------------ MPI: CA to CB partitions ------------------*/
301
302 if (measure_time==1){
303 MPI_Barrier(mpi_comm_level1);
304 dtime(&Stime_proc);
305 }
306
307 array0 = (double*)malloc(sizeof(double)*2*GP_B_CA2CB_S[NN_B_CA2CB_S]);
308 array1 = (double*)malloc(sizeof(double)*2*GP_B_CA2CB_R[NN_B_CA2CB_R]);
309
310 request_send = (MPI_Request *)malloc(sizeof(MPI_Request)*NN_B_CA2CB_S);
311 request_recv = (MPI_Request *)malloc(sizeof(MPI_Request)*NN_B_CA2CB_R);
312 stat_send = (MPI_Status *)malloc(sizeof(MPI_Status)*NN_B_CA2CB_S);
313 stat_recv = (MPI_Status *)malloc(sizeof(MPI_Status)*NN_B_CA2CB_R);
314
315 NN_S = 0;
316 NN_R = 0;
317
318 /* MPI_Irecv */
319
320 for (ID=0; ID<NN_B_CA2CB_R; ID++){
321
322 IDR = ID_NN_B_CA2CB_R[ID];
323 gp = GP_B_CA2CB_R[ID];
324
325 if (IDR!=myid){
326 MPI_Irecv( &array1[2*gp], Num_Rcv_Grid_B_CA2CB[IDR]*2,
327 MPI_DOUBLE, IDR, tag, mpi_comm_level1, &request_recv[NN_R]);
328 NN_R++;
329 }
330 }
331
332 /* MPI_Isend */
333
334 for (ID=0; ID<NN_B_CA2CB_S; ID++){
335
336 IDS = ID_NN_B_CA2CB_S[ID];
337 gp = GP_B_CA2CB_S[ID];
338
339 if (IDS!=myid){
340
341 for (i=0; i<Num_Snd_Grid_B_CA2CB[IDS]; i++){
342 BN_CA = Index_Snd_Grid_B_CA2CB[IDS][i];
343 array0[2*gp+2*i ] = ReRhok[BN_CA];
344 array0[2*gp+2*i+1] = ImRhok[BN_CA];
345 }
346
347 MPI_Isend( &array0[2*gp], Num_Snd_Grid_B_CA2CB[IDS]*2,
348 MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request_send[NN_S]);
349 NN_S++;
350 }
351 }
352
353 /* MPI_Waitall */
354
355 if (NN_S!=0) MPI_Waitall(NN_S,request_send,stat_send);
356 if (NN_R!=0) MPI_Waitall(NN_R,request_recv,stat_recv);
357
358 /* copy them to ReRhor and ImRhor */
359
360 for (ID=0; ID<NN_B_CA2CB_R; ID++){
361
362 IDR = ID_NN_B_CA2CB_R[ID];
363 gp = GP_B_CA2CB_R[ID];
364
365 if (IDR!=myid){
366 for (i=0; i<Num_Rcv_Grid_B_CA2CB[IDR]; i++){
367 BN_CB = Index_Rcv_Grid_B_CA2CB[IDR][i];
368 ReRhor[BN_CB] = array1[2*gp+2*i ];
369 ImRhor[BN_CB] = array1[2*gp+2*i+1];
370 }
371 }
372
373 else{
374 for (i=0; i<Num_Snd_Grid_B_CA2CB[IDR]; i++){
375 BN_CA = Index_Snd_Grid_B_CA2CB[IDR][i];
376 BN_CB = Index_Rcv_Grid_B_CA2CB[IDR][i];
377 ReRhor[BN_CB] = ReRhok[BN_CA];
378 ImRhor[BN_CB] = ImRhok[BN_CA];
379 }
380 }
381 }
382
383 free(array0);
384 free(array1);
385
386 free(request_send);
387 free(request_recv);
388 free(stat_send);
389 free(stat_recv);
390
391 if (measure_time==1){
392 MPI_Barrier(mpi_comm_level1);
393 dtime(&Etime_proc);
394 printf("myid=%2d Time MPI: CA to CB = %15.12f\n",myid,Etime_proc-Stime_proc);
395 }
396
397 /*------------------ FFT along the A-axis in the CB partition ------------------*/
398
399 if (measure_time==1) dtime(&Stime_proc);
400
401 p = fftw_plan_dft_1d(Ngrid1,in,out,-1,FFTW_ESTIMATE);
402
403 for (BN_CB=0; BN_CB<My_NumGridB_CB; BN_CB+=Ngrid1){
404
405 for (i=0; i<Ngrid1; i++){
406 in[i][0] = ReRhor[BN_CB+i];
407 in[i][1] = ImRhor[BN_CB+i];
408 }
409
410 fftw_execute(p);
411
412 for (i=0; i<Ngrid1; i++){
413 ReRhok[BN_CB+i] = out[i][0];
414 ImRhok[BN_CB+i] = out[i][1];
415 }
416 }
417
418 fftw_destroy_plan(p);
419 fftw_cleanup();
420
421 if (measure_time==1){
422 dtime(&Etime_proc);
423 printf("myid=%2d Time FFT-A = %15.12f\n",myid,Etime_proc-Stime_proc);
424 }
425
426 /****************************************************
427 freeing of arrays:
428 ****************************************************/
429
430 fftw_free(in);
431 fftw_free(out);
432
433 /* PrintMemory */
434
435 if (firsttime) {
436
437 if (GP_B_CA2CB_S[NN_B_CA2CB_S]<GP_B_AB2CA_S[NN_B_AB2CA_S]){
438 PrintMemory("Poisson: array0",sizeof(double)*2*GP_B_AB2CA_S[NN_B_AB2CA_S],NULL);
439 }
440 else{
441 PrintMemory("Poisson: array0",sizeof(double)*2*GP_B_CA2CB_S[NN_B_CA2CB_S],NULL);
442 }
443
444 if (GP_B_CA2CB_R[NN_B_CA2CB_R]<GP_B_AB2CA_R[NN_B_AB2CA_R]){
445 PrintMemory("Poisson: array1",sizeof(double)*2*GP_B_AB2CA_R[NN_B_AB2CA_R],NULL);
446 }
447 else{
448 PrintMemory("Poisson: array1",sizeof(double)*2*GP_B_CA2CB_R[NN_B_CA2CB_R],NULL);
449 }
450
451 if (NN_B_CA2CB_S<NN_B_AB2CA_S){
452 PrintMemory("Poisson: request_send",sizeof(MPI_Request)*NN_B_AB2CA_S,NULL);
453 PrintMemory("Poisson: stat_send ",sizeof(MPI_Status)*NN_B_AB2CA_S,NULL);
454 }
455 else{
456 PrintMemory("Poisson: request_send",sizeof(MPI_Request)*NN_B_CA2CB_S,NULL);
457 PrintMemory("Poisson: stat_send ",sizeof(MPI_Status)*NN_B_CA2CB_S,NULL);
458 }
459
460 if (NN_B_CA2CB_R<NN_B_AB2CA_R){
461 PrintMemory("Poisson: request_recv",sizeof(MPI_Request)*NN_B_AB2CA_R,NULL);
462 PrintMemory("Poisson: stat_recv ",sizeof(MPI_Status)*NN_B_AB2CA_R,NULL);
463 }
464 else{
465 PrintMemory("Poisson: request_recv",sizeof(MPI_Request)*NN_B_CA2CB_R,NULL);
466 PrintMemory("Poisson: stat_recv ",sizeof(MPI_Status)*NN_B_CA2CB_R,NULL);
467 }
468
469 /* turn off firsttime flag */
470 firsttime=0;
471 }
472
473 }
474
475
476
477
Inverse_FFT_Poisson(double * ReRhor,double * ImRhor,double * ReRhok,double * ImRhok)478 void Inverse_FFT_Poisson(double *ReRhor, double *ImRhor,
479 double *ReRhok, double *ImRhok)
480 {
481 int i,BN_AB,BN_CB,BN_CA,gp,NN_S,NN_R;
482 double *array0,*array1;
483 int numprocs,myid,tag=999,ID,IDS,IDR;
484 double Stime_proc, Etime_proc;
485 MPI_Status stat;
486 MPI_Request request;
487 MPI_Status *stat_send;
488 MPI_Status *stat_recv;
489 MPI_Request *request_send;
490 MPI_Request *request_recv;
491
492 fftw_complex *in, *out;
493 fftw_plan p;
494
495 /* MPI */
496 MPI_Comm_size(mpi_comm_level1,&numprocs);
497 MPI_Comm_rank(mpi_comm_level1,&myid);
498
499 /****************************************************
500 allocation of arrays:
501
502 fftw_complex in[List_YOUSO[17]];
503 fftw_complex out[List_YOUSO[17]];
504 ****************************************************/
505
506 in = (fftw_complex *)fftw_malloc(sizeof(fftw_complex)*List_YOUSO[17]);
507 out = (fftw_complex *)fftw_malloc(sizeof(fftw_complex)*List_YOUSO[17]);
508
509 /*------------------ Inverse FFT along the A-axis in the CB partition ------------------*/
510
511 if (measure_time==1) dtime(&Stime_proc);
512
513 p = fftw_plan_dft_1d(Ngrid1,in,out,1,FFTW_ESTIMATE);
514
515 for (BN_CB=0; BN_CB<My_NumGridB_CB; BN_CB+=Ngrid1){
516
517 for (i=0; i<Ngrid1; i++){
518 in[i][0] = ReRhok[BN_CB+i];
519 in[i][1] = ImRhok[BN_CB+i];
520 }
521
522 fftw_execute(p);
523
524 for (i=0; i<Ngrid1; i++){
525 ReRhok[BN_CB+i] = out[i][0];
526 ImRhok[BN_CB+i] = out[i][1];
527 }
528 }
529
530 fftw_destroy_plan(p);
531
532 if (measure_time==1){
533 dtime(&Etime_proc);
534 printf("myid=%2d Time Inverse FFT-A = %15.12f\n",myid,Etime_proc-Stime_proc);
535 }
536
537 /*------------------ MPI: CB to CA partitions ------------------*/
538
539 if (measure_time==1){
540 MPI_Barrier(mpi_comm_level1);
541 dtime(&Stime_proc);
542 }
543
544 array0 = (double*)malloc(sizeof(double)*2*GP_B_CA2CB_R[NN_B_CA2CB_R]);
545 array1 = (double*)malloc(sizeof(double)*2*GP_B_CA2CB_S[NN_B_CA2CB_S]);
546
547 request_send = (MPI_Request *)malloc(sizeof(MPI_Request)*NN_B_CA2CB_R);
548 request_recv = (MPI_Request *)malloc(sizeof(MPI_Request)*NN_B_CA2CB_S);
549 stat_send = (MPI_Status *)malloc(sizeof(MPI_Status)*NN_B_CA2CB_R);
550 stat_recv = (MPI_Status *)malloc(sizeof(MPI_Status)*NN_B_CA2CB_S);
551
552 NN_S = 0;
553 NN_R = 0;
554
555 /* MPI_Irecv */
556
557 for (ID=0; ID<NN_B_CA2CB_S; ID++){
558
559 IDR = ID_NN_B_CA2CB_S[ID];
560 gp = GP_B_CA2CB_S[ID];
561
562 if (IDR!=myid){
563
564 MPI_Irecv( &array1[2*gp], Num_Snd_Grid_B_CA2CB[IDR]*2,
565 MPI_DOUBLE, IDR, tag, mpi_comm_level1, &request_recv[NN_R]);
566 NN_R++;
567 }
568 }
569
570 /* MPI_Isend */
571
572 for (ID=0; ID<NN_B_CA2CB_R; ID++){
573
574 IDS = ID_NN_B_CA2CB_R[ID];
575 gp = GP_B_CA2CB_R[ID];
576
577 if (IDS!=myid){
578
579 for (i=0; i<Num_Rcv_Grid_B_CA2CB[IDS]; i++){
580 BN_CB = Index_Rcv_Grid_B_CA2CB[IDS][i];
581 array0[2*gp+2*i ] = ReRhok[BN_CB];
582 array0[2*gp+2*i+1] = ImRhok[BN_CB];
583 }
584
585 MPI_Isend( &array0[2*gp], Num_Rcv_Grid_B_CA2CB[IDS]*2,
586 MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request_send[NN_S]);
587 NN_S++;
588 }
589 }
590
591 /* MPI_Waitall */
592
593 if (NN_S!=0) MPI_Waitall(NN_S,request_send,stat_send);
594 if (NN_R!=0) MPI_Waitall(NN_R,request_recv,stat_recv);
595
596 /* copy them to ReRhor and ImRhor */
597
598 for (ID=0; ID<NN_B_CA2CB_S; ID++){
599
600 IDR = ID_NN_B_CA2CB_S[ID];
601 gp = GP_B_CA2CB_S[ID];
602
603 if (IDR!=myid){
604 for (i=0; i<Num_Snd_Grid_B_CA2CB[IDR]; i++){
605 BN_CA = Index_Snd_Grid_B_CA2CB[IDR][i];
606 ReRhor[BN_CA] = array1[2*gp+2*i ];
607 ImRhor[BN_CA] = array1[2*gp+2*i+1];
608 }
609 }
610 else{
611 for (i=0; i<Num_Rcv_Grid_B_CA2CB[IDR]; i++){
612 BN_CB = Index_Rcv_Grid_B_CA2CB[IDR][i];
613 BN_CA = Index_Snd_Grid_B_CA2CB[IDR][i];
614 ReRhor[BN_CA] = ReRhok[BN_CB];
615 ImRhor[BN_CA] = ImRhok[BN_CB];
616 }
617 }
618 }
619
620 free(array0);
621 free(array1);
622
623 free(request_send);
624 free(request_recv);
625 free(stat_send);
626 free(stat_recv);
627
628 if (measure_time==1){
629 MPI_Barrier(mpi_comm_level1);
630 dtime(&Etime_proc);
631 printf("myid=%2d Time MPI: CB to CA = %15.12f\n",myid,Etime_proc-Stime_proc);
632 }
633
634 /*------------------ Inverse FFT along the B-axis in the CA partition ------------------*/
635
636 if (measure_time==1) dtime(&Stime_proc);
637
638 p = fftw_plan_dft_1d(Ngrid2,in,out,1,FFTW_ESTIMATE);
639
640 for (BN_CA=0; BN_CA<My_NumGridB_CA; BN_CA+=Ngrid2){
641
642 for (i=0; i<Ngrid2; i++){
643 in[i][0] = ReRhor[BN_CA+i];
644 in[i][1] = ImRhor[BN_CA+i];
645 }
646
647 fftw_execute(p);
648
649 for (i=0; i<Ngrid2; i++){
650 ReRhor[BN_CA+i] = out[i][0];
651 ImRhor[BN_CA+i] = out[i][1];
652 }
653 }
654
655 fftw_destroy_plan(p);
656
657 if (measure_time==1){
658 dtime(&Etime_proc);
659 printf("myid=%2d Time Inverse FFT-B = %15.12f\n",myid,Etime_proc-Stime_proc);
660 }
661
662 /*------------------ MPI: CA to AB partitions ------------------*/
663
664 if (measure_time==1){
665 MPI_Barrier(mpi_comm_level1);
666 dtime(&Stime_proc);
667 }
668
669 array0 = (double*)malloc(sizeof(double)*2*GP_B_AB2CA_R[NN_B_AB2CA_R]);
670 array1 = (double*)malloc(sizeof(double)*2*GP_B_AB2CA_S[NN_B_AB2CA_S]);
671
672 request_send = (MPI_Request *)malloc(sizeof(MPI_Request)*NN_B_AB2CA_R);
673 request_recv = (MPI_Request *)malloc(sizeof(MPI_Request)*NN_B_AB2CA_S);
674 stat_send = (MPI_Status *)malloc(sizeof(MPI_Status)*NN_B_AB2CA_R);
675 stat_recv = (MPI_Status *)malloc(sizeof(MPI_Status)*NN_B_AB2CA_S);
676
677 NN_S = 0;
678 NN_R = 0;
679
680 /* MPI_Irecv */
681
682 for (ID=0; ID<NN_B_AB2CA_S; ID++){
683
684 IDR = ID_NN_B_AB2CA_S[ID];
685 gp = GP_B_AB2CA_S[ID];
686
687 if (IDR!=myid){
688
689 MPI_Irecv( &array1[2*gp], Num_Snd_Grid_B_AB2CA[IDR]*2,
690 MPI_DOUBLE, IDR, tag, mpi_comm_level1, &request_recv[NN_R]);
691 NN_R++;
692 }
693 }
694
695 /* MPI_Isend */
696
697 for (ID=0; ID<NN_B_AB2CA_R; ID++){
698
699 IDS = ID_NN_B_AB2CA_R[ID];
700 gp = GP_B_AB2CA_R[ID];
701
702 if (IDS!=myid){
703
704 for (i=0; i<Num_Rcv_Grid_B_AB2CA[IDS]; i++){
705 BN_CB = Index_Rcv_Grid_B_AB2CA[IDS][i];
706 array0[2*gp+2*i ] = ReRhor[BN_CB];
707 array0[2*gp+2*i+1] = ImRhor[BN_CB];
708 }
709
710 MPI_Isend( &array0[2*gp], Num_Rcv_Grid_B_AB2CA[IDS]*2,
711 MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request_send[NN_S]);
712 NN_S++;
713 }
714 }
715
716 /* MPI_Waitall */
717
718 if (NN_S!=0) MPI_Waitall(NN_S,request_send,stat_send);
719 if (NN_R!=0) MPI_Waitall(NN_R,request_recv,stat_recv);
720
721 /* copy them to ReRhok and ImRhok */
722
723 for (ID=0; ID<NN_B_AB2CA_S; ID++){
724
725 IDR = ID_NN_B_AB2CA_S[ID];
726 gp = GP_B_AB2CA_S[ID];
727
728 if (IDR!=myid){
729 for (i=0; i<Num_Snd_Grid_B_AB2CA[IDR]; i++){
730 BN_AB = Index_Snd_Grid_B_AB2CA[IDR][i];
731 ReRhok[BN_AB] = array1[2*gp+2*i ];
732 ImRhok[BN_AB] = array1[2*gp+2*i+1];
733 }
734 }
735 else {
736 for (i=0; i<Num_Rcv_Grid_B_AB2CA[IDR]; i++){
737 BN_CA = Index_Rcv_Grid_B_AB2CA[IDR][i];
738 BN_AB = Index_Snd_Grid_B_AB2CA[IDR][i];
739 ReRhok[BN_AB] = ReRhor[BN_CA];
740 ImRhok[BN_AB] = ImRhor[BN_CA];
741 }
742 }
743 }
744
745 free(array0);
746 free(array1);
747
748 free(request_send);
749 free(request_recv);
750 free(stat_send);
751 free(stat_recv);
752
753 if (measure_time==1){
754 MPI_Barrier(mpi_comm_level1);
755 dtime(&Etime_proc);
756 printf("myid=%2d Time MPI: CA to AB = %15.12f\n",myid,Etime_proc-Stime_proc);
757 }
758
759 /*------------------ Inverse FFT along the C-axis in the AB partition ------------------*/
760
761 if (measure_time==1) dtime(&Stime_proc);
762
763 p = fftw_plan_dft_1d(Ngrid3,in,out,1,FFTW_ESTIMATE);
764
765 for (BN_AB=0; BN_AB<My_NumGridB_AB; BN_AB+=Ngrid3){
766
767 for (i=0; i<Ngrid3; i++){
768 in[i][0] = ReRhok[BN_AB+i];
769 in[i][1] = ImRhok[BN_AB+i];
770 }
771
772 fftw_execute(p);
773
774 for (i=0; i<Ngrid3; i++){
775 ReRhor[BN_AB+i] = out[i][0];
776 ImRhor[BN_AB+i] = out[i][1];
777 }
778 }
779
780 fftw_destroy_plan(p);
781 fftw_cleanup();
782
783 if (measure_time==1){
784 dtime(&Etime_proc);
785 printf("myid=%2d Time Inverse FFT-C = %15.12f\n",myid,Etime_proc-Stime_proc);
786 }
787
788 /****************************************************
789 freeing of arrays:
790
791 fftw_complex in[List_YOUSO[17]];
792 fftw_complex out[List_YOUSO[17]];
793 ****************************************************/
794
795 fftw_free(in);
796 fftw_free(out);
797 }
798
799
800
801
FFT_Density(int den_flag,double * ReRhok,double * ImRhok)802 double FFT_Density(int den_flag,
803 double *ReRhok, double *ImRhok)
804 {
805 int BN_AB;
806 int numprocs,myid;
807 double *ReRhor,*ImRhor;
808 double TStime,TEtime,time0;
809
810 MPI_Comm_size(mpi_comm_level1,&numprocs);
811 MPI_Comm_rank(mpi_comm_level1,&myid);
812
813 MPI_Barrier(mpi_comm_level1);
814 dtime(&TStime);
815
816 if (4<den_flag){
817 printf("invalid den_flag for FFT2D_Density\n");
818 MPI_Finalize();
819 exit(0);
820 }
821
822 /* allocation of arrays */
823
824 ReRhor = (double*)malloc(sizeof(double)*My_Max_NumGridB);
825 ImRhor = (double*)malloc(sizeof(double)*My_Max_NumGridB);
826
827 /* set ReRhor and ImRhor */
828
829 switch(den_flag) {
830
831 case 0:
832
833 for (BN_AB=0; BN_AB<My_NumGridB_AB; BN_AB++){
834 ReRhor[BN_AB] = Density_Grid_B[0][BN_AB] + Density_Grid_B[1][BN_AB]
835 -2.0*ADensity_Grid_B[BN_AB];
836 ImRhor[BN_AB] = 0.0;
837 }
838
839 break;
840
841 case 1:
842
843 for (BN_AB=0; BN_AB<My_NumGridB_AB; BN_AB++){
844 ReRhor[BN_AB] = Density_Grid_B[0][BN_AB];
845 ImRhor[BN_AB] = 0.0;
846 }
847
848 break;
849
850 case 2:
851
852 for (BN_AB=0; BN_AB<My_NumGridB_AB; BN_AB++){
853 ReRhor[BN_AB] = Density_Grid_B[1][BN_AB];
854 ImRhor[BN_AB] = 0.0;
855 }
856
857 break;
858
859 case 3:
860
861 for (BN_AB=0; BN_AB<My_NumGridB_AB; BN_AB++){
862 ReRhor[BN_AB] = 2.0*ADensity_Grid_B[BN_AB];
863 ImRhor[BN_AB] = 0.0;
864 }
865
866 break;
867
868 case 4:
869
870 for (BN_AB=0; BN_AB<My_NumGridB_AB; BN_AB++){
871 ReRhor[BN_AB] = Density_Grid_B[2][BN_AB];
872 ImRhor[BN_AB] = Density_Grid_B[3][BN_AB];
873 }
874
875 break;
876
877 }
878
879 /* FFT of Density */
880
881 FFT_Poisson(ReRhor, ImRhor, ReRhok, ImRhok);
882
883 /* freeing of arrays */
884
885 free(ReRhor);
886 free(ImRhor);
887
888 /* for time */
889 MPI_Barrier(mpi_comm_level1);
890 dtime(&TEtime);
891 time0 = TEtime - TStime;
892 return time0;
893 }
894
895
896
897
898
899
Get_Value_inReal(int complex_flag,double * ReVr,double * ImVr,double * ReVk,double * ImVk)900 void Get_Value_inReal(int complex_flag,
901 double *ReVr, double *ImVr,
902 double *ReVk, double *ImVk)
903 {
904 int BN_AB;
905 double *ReTmpr,*ImTmpr;
906
907 /* allocation of arrays */
908
909 ReTmpr = (double*)malloc(sizeof(double)*My_Max_NumGridB);
910 ImTmpr = (double*)malloc(sizeof(double)*My_Max_NumGridB);
911
912 /* call Inverse_FFT_Poisson */
913
914 Inverse_FFT_Poisson(ReTmpr, ImTmpr, ReVk, ImVk);
915
916 /* copy ReTmpr and ImTmpr into ReVr and ImVr */
917
918 if (complex_flag==0){
919
920 for (BN_AB=0; BN_AB<My_NumGridB_AB; BN_AB++){
921 ReVr[BN_AB] = ReTmpr[BN_AB];
922 }
923 }
924 else if (complex_flag==1){
925
926 for (BN_AB=0; BN_AB<My_NumGridB_AB; BN_AB++){
927 ReVr[BN_AB] = ReTmpr[BN_AB];
928 ImVr[BN_AB] = ImTmpr[BN_AB];
929 }
930 }
931
932 /* freeing of arrays */
933
934 free(ReTmpr);
935 free(ImTmpr);
936 }
937