1 /**********************************************************************
2 Set_Density_Grid.c:
3
4 Set_Density_Grid.c is a subroutine to calculate a charge density
5 on grid by one-particle wave functions.
6
7 Log of Set_Density_Grid.c:
8
9 22/Nov/2001 Released by T.Ozaki
10 19/Apr/2013 Modified by A.M.Ito
11
12 ***********************************************************************/
13
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <time.h>
17 #include <math.h>
18 #include "openmx_common.h"
19 #include "mpi.h"
20 #include <omp.h>
21
22 #define measure_time 0
23
24
25
Set_Density_Grid(int Cnt_kind,int Calc_CntOrbital_ON,double ***** CDM)26 double Set_Density_Grid(int Cnt_kind, int Calc_CntOrbital_ON, double *****CDM)
27 {
28 static int firsttime=1;
29 int al,L0,Mul0,M0,p,size1,size2;
30 int Gc_AN,Mc_AN,Mh_AN,LN,AN,BN,CN;
31 int n1,n2,n3,k1,k2,k3,N3[4];
32 int Cwan,NO0,NO1,Rn,N,Hwan,i,j,k,n;
33 int NN_S,NN_R;
34 unsigned long long int N2D,n2D,GN;
35 int Max_Size,My_Max;
36 int size_Tmp_Den_Grid;
37 int size_Den_Snd_Grid_A2B;
38 int size_Den_Rcv_Grid_A2B;
39 int h_AN,Gh_AN,Rnh,spin,Nc,GRc,Nh,Nog;
40 int Nc_0,Nc_1,Nc_2,Nc_3,Nh_0,Nh_1,Nh_2,Nh_3;
41
42 double threshold;
43 double tmp0,tmp1,sk1,sk2,sk3,tot_den,sum;
44 double tmp0_0,tmp0_1,tmp0_2,tmp0_3;
45 double sum_0,sum_1,sum_2,sum_3;
46 double d1,d2,d3,cop,sip,sit,cot;
47 double x,y,z,Cxyz[4];
48 double TStime,TEtime;
49 double ***Tmp_Den_Grid;
50 double **Den_Snd_Grid_A2B;
51 double **Den_Rcv_Grid_A2B;
52 double *tmp_array;
53 double *tmp_array2;
54 double *orbs0,*orbs1;
55 double *orbs0_0,*orbs0_1,*orbs0_2,*orbs0_3;
56 double *orbs1_0,*orbs1_1,*orbs1_2,*orbs1_3;
57 double ***tmp_CDM;
58 int *Snd_Size,*Rcv_Size;
59 int numprocs,myid,tag=999,ID,IDS,IDR;
60 double Stime_atom, Etime_atom;
61 double time0,time1,time2;
62
63 MPI_Status stat;
64 MPI_Request request;
65 MPI_Status *stat_send;
66 MPI_Status *stat_recv;
67 MPI_Request *request_send;
68 MPI_Request *request_recv;
69
70 /* for OpenMP */
71 int OMPID,Nthrds;
72
73 /* MPI */
74 MPI_Comm_size(mpi_comm_level1,&numprocs);
75 MPI_Comm_rank(mpi_comm_level1,&myid);
76
77 dtime(&TStime);
78
79 /* allocation of arrays */
80
81 size_Tmp_Den_Grid = 0;
82 Tmp_Den_Grid = (double***)malloc(sizeof(double**)*(SpinP_switch+1));
83 for (i=0; i<(SpinP_switch+1); i++){
84 Tmp_Den_Grid[i] = (double**)malloc(sizeof(double*)*(Matomnum+1));
85 Tmp_Den_Grid[i][0] = (double*)malloc(sizeof(double)*1);
86 for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
87 Gc_AN = F_M2G[Mc_AN];
88 Tmp_Den_Grid[i][Mc_AN] = (double*)malloc(sizeof(double)*GridN_Atom[Gc_AN]);
89
90 /* AITUNE */
91 for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
92 Tmp_Den_Grid[i][Mc_AN][Nc] = 0.0;
93 }
94
95 size_Tmp_Den_Grid += GridN_Atom[Gc_AN];
96 }
97 }
98
99 size_Den_Snd_Grid_A2B = 0;
100 Den_Snd_Grid_A2B = (double**)malloc(sizeof(double*)*numprocs);
101 for (ID=0; ID<numprocs; ID++){
102 Den_Snd_Grid_A2B[ID] = (double*)malloc(sizeof(double)*Num_Snd_Grid_A2B[ID]*(SpinP_switch+1));
103 size_Den_Snd_Grid_A2B += Num_Snd_Grid_A2B[ID]*(SpinP_switch+1);
104 }
105
106 size_Den_Rcv_Grid_A2B = 0;
107 Den_Rcv_Grid_A2B = (double**)malloc(sizeof(double*)*numprocs);
108 for (ID=0; ID<numprocs; ID++){
109 Den_Rcv_Grid_A2B[ID] = (double*)malloc(sizeof(double)*Num_Rcv_Grid_A2B[ID]*(SpinP_switch+1));
110 size_Den_Rcv_Grid_A2B += Num_Rcv_Grid_A2B[ID]*(SpinP_switch+1);
111 }
112
113 /* PrintMemory */
114
115 if (firsttime==1){
116 PrintMemory("Set_Density_Grid: AtomDen_Grid", sizeof(double)*size_Tmp_Den_Grid, NULL);
117 PrintMemory("Set_Density_Grid: Den_Snd_Grid_A2B",sizeof(double)*size_Den_Snd_Grid_A2B, NULL);
118 PrintMemory("Set_Density_Grid: Den_Rcv_Grid_A2B",sizeof(double)*size_Den_Rcv_Grid_A2B, NULL);
119 firsttime = 0;
120 }
121
122 /****************************************************
123 when orbital optimization
124 ****************************************************/
125
126 if (Calc_CntOrbital_ON==1 && Cnt_kind==0 && Cnt_switch==1){
127
128 for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
129
130 dtime(&Stime_atom);
131
132 /* COrbs_Grid */
133
134 Gc_AN = M2G[Mc_AN];
135 Cwan = WhatSpecies[Gc_AN];
136 NO0 = Spe_Total_CNO[Cwan];
137 for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
138
139 al = -1;
140 for (L0=0; L0<=Spe_MaxL_Basis[Cwan]; L0++){
141 for (Mul0=0; Mul0<Spe_Num_CBasis[Cwan][L0]; Mul0++){
142 for (M0=0; M0<=2*L0; M0++){
143
144 al++;
145 tmp0 = 0.0;
146
147 for (p=0; p<Spe_Specified_Num[Cwan][al]; p++){
148 j = Spe_Trans_Orbital[Cwan][al][p];
149 tmp0 += CntCoes[Mc_AN][al][p]*Orbs_Grid[Mc_AN][Nc][j];/* AITUNE */
150 }
151
152 COrbs_Grid[Mc_AN][al][Nc] = (Type_Orbs_Grid)tmp0;
153 }
154 }
155 }
156 }
157
158 dtime(&Etime_atom);
159 time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
160 }
161
162 /**********************************************
163 MPI:
164
165 COrbs_Grid
166 ***********************************************/
167
168 /* allocation of arrays */
169 Snd_Size = (int*)malloc(sizeof(int)*numprocs);
170 Rcv_Size = (int*)malloc(sizeof(int)*numprocs);
171
172 /* find data size for sending and receiving */
173
174 My_Max = -10000;
175 for (ID=0; ID<numprocs; ID++){
176
177 IDS = (myid + ID) % numprocs;
178 IDR = (myid - ID + numprocs) % numprocs;
179
180 if (ID!=0){
181 /* sending size */
182 if (F_Snd_Num[IDS]!=0){
183 /* find data size */
184 size1 = 0;
185 for (n=0; n<F_Snd_Num[IDS]; n++){
186 Gc_AN = Snd_GAN[IDS][n];
187 Cwan = WhatSpecies[Gc_AN];
188 size1 += GridN_Atom[Gc_AN]*Spe_Total_CNO[Cwan];
189 }
190
191 Snd_Size[IDS] = size1;
192 MPI_Isend(&size1, 1, MPI_INT, IDS, tag, mpi_comm_level1, &request);
193 }
194 else{
195 Snd_Size[IDS] = 0;
196 }
197
198 /* receiving size */
199 if (F_Rcv_Num[IDR]!=0){
200 MPI_Recv(&size2, 1, MPI_INT, IDR, tag, mpi_comm_level1, &stat);
201 Rcv_Size[IDR] = size2;
202 }
203 else{
204 Rcv_Size[IDR] = 0;
205 }
206 if (F_Snd_Num[IDS]!=0) MPI_Wait(&request,&stat);
207 }
208 else{
209 Snd_Size[IDS] = 0;
210 Rcv_Size[IDR] = 0;
211 }
212
213 if (My_Max<Snd_Size[IDS]) My_Max = Snd_Size[IDS];
214 if (My_Max<Rcv_Size[IDR]) My_Max = Rcv_Size[IDR];
215
216 }
217
218 MPI_Allreduce(&My_Max, &Max_Size, 1, MPI_INT, MPI_MAX, mpi_comm_level1);
219 /* allocation of arrays */
220 tmp_array = (double*)malloc(sizeof(double)*Max_Size);
221 tmp_array2 = (double*)malloc(sizeof(double)*Max_Size);
222
223 /* send and receive COrbs_Grid */
224
225 for (ID=0; ID<numprocs; ID++){
226
227 IDS = (myid + ID) % numprocs;
228 IDR = (myid - ID + numprocs) % numprocs;
229
230 if (ID!=0){
231
232 /* sending of data */
233
234 if (F_Snd_Num[IDS]!=0){
235
236 /* find data size */
237 size1 = Snd_Size[IDS];
238
239 /* multidimentional array to vector array */
240 k = 0;
241 for (n=0; n<F_Snd_Num[IDS]; n++){
242 Mc_AN = Snd_MAN[IDS][n];
243 Gc_AN = Snd_GAN[IDS][n];
244 Cwan = WhatSpecies[Gc_AN];
245 NO0 = Spe_Total_CNO[Cwan];
246 for (i=0; i<NO0; i++){
247 for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
248 tmp_array[k] = COrbs_Grid[Mc_AN][i][Nc];
249 k++;
250 }
251 }
252 }
253
254 /* MPI_Isend */
255 MPI_Isend(&tmp_array[0], size1, MPI_DOUBLE, IDS,
256 tag, mpi_comm_level1, &request);
257 }
258
259 /* receiving of block data */
260
261 if (F_Rcv_Num[IDR]!=0){
262
263 /* find data size */
264 size2 = Rcv_Size[IDR];
265
266 /* MPI_Recv */
267 MPI_Recv(&tmp_array2[0], size2, MPI_DOUBLE, IDR, tag, mpi_comm_level1, &stat);
268
269 k = 0;
270 Mc_AN = F_TopMAN[IDR] - 1;
271 for (n=0; n<F_Rcv_Num[IDR]; n++){
272 Mc_AN++;
273 Gc_AN = Rcv_GAN[IDR][n];
274 Cwan = WhatSpecies[Gc_AN];
275 NO0 = Spe_Total_CNO[Cwan];
276
277 for (i=0; i<NO0; i++){
278 for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
279 COrbs_Grid[Mc_AN][i][Nc] = tmp_array2[k];
280 k++;
281 }
282 }
283 }
284 }
285 if (F_Snd_Num[IDS]!=0) MPI_Wait(&request,&stat);
286 }
287 }
288
289 /* freeing of arrays */
290 free(tmp_array);
291 free(tmp_array2);
292 free(Snd_Size);
293 free(Rcv_Size);
294 }
295
296 /**********************************************
297 calculate Tmp_Den_Grid
298 ***********************************************/
299
300 dtime(&time1);
301
302
303 /* AITUNE ========================== */
304 int OneD_Nloop = 0;
305 int ai_MaxNc = 0;
306 for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
307 int Gc_AN = M2G[Mc_AN];
308 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
309 OneD_Nloop++;
310 if(ai_MaxNc < GridN_Atom[Gc_AN]) {ai_MaxNc = GridN_Atom[Gc_AN];}
311 }
312 }
313 /* ai_MaxNc is maximum of GridN_Atom[] */
314
315 int gNthrds;
316 #pragma omp parallel
317 {
318 gNthrds = omp_get_num_threads();
319 }
320
321 double*** ai_tmpDG_all = (double***)malloc(sizeof(double**)*gNthrds);
322
323 /* ========================== AITUNE */
324
325 #pragma omp parallel shared(myid,G2ID,Orbs_Grid_FNAN,List_YOUSO,time_per_atom,Tmp_Den_Grid,Orbs_Grid,COrbs_Grid,Cnt_switch,Cnt_kind,GListTAtoms2,GListTAtoms1,NumOLG,CDM,SpinP_switch,WhatSpecies,ncn,F_G2M,natn,Spe_Total_CNO,M2G) private(OMPID,Nthrds,Mc_AN,h_AN,Stime_atom,Etime_atom,Gc_AN,Cwan,NO0,Gh_AN,Mh_AN,Rnh,Hwan,NO1,spin,i,j,tmp_CDM,Nog,Nc_0,Nc_1,Nc_2,Nc_3,Nh_0,Nh_1,Nh_2,Nh_3,orbs0_0,orbs0_1,orbs0_2,orbs0_3,orbs1_0,orbs1_1,orbs1_2,orbs1_3,sum_0,sum_1,sum_2,sum_3,tmp0_0,tmp0_1,tmp0_2,tmp0_3,Nc,Nh,orbs0,orbs1,sum,tmp0)
326 {
327
328 orbs0 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
329 orbs1 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
330
331 orbs0_0 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
332 orbs0_1 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
333 orbs0_2 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
334 orbs0_3 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
335 orbs1_0 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
336 orbs1_1 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
337 orbs1_2 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
338 orbs1_3 = (double*)malloc(sizeof(double)*List_YOUSO[7]);
339
340 tmp_CDM = (double***)malloc(sizeof(double**)*(SpinP_switch+1));
341 for (i=0; i<(SpinP_switch+1); i++){
342 tmp_CDM[i] = (double**)malloc(sizeof(double*)*List_YOUSO[7]);
343 for (j=0; j<List_YOUSO[7]; j++){
344 tmp_CDM[i][j] = (double*)malloc(sizeof(double)*List_YOUSO[7]);
345 }
346 }
347
348 /* get info. on OpenMP */
349
350 OMPID = omp_get_thread_num();
351 Nthrds = omp_get_num_threads();
352
353
354 /* AITUNE ========================== */
355
356
357 double *ai_tmpDGs[4];
358 {
359 int spin;
360 for (spin=0; spin<=SpinP_switch; spin++){
361 ai_tmpDGs[spin] = (double*)malloc(sizeof(double)* ai_MaxNc);
362 }
363 }
364 ai_tmpDG_all[OMPID] = ai_tmpDGs;
365 /* ==================================== AITUNE */
366
367
368 /* for (Mc_AN=(OMPID+1); Mc_AN<=Matomnum; Mc_AN+=Nthrds){ AITUNE */
369 for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
370
371 dtime(&Stime_atom);
372
373 /* set data on Mc_AN */
374
375 Gc_AN = M2G[Mc_AN];
376 Cwan = WhatSpecies[Gc_AN];
377 NO0 = Spe_Total_CNO[Cwan];
378
379 int spin;
380 for (spin=0; spin<=SpinP_switch; spin++){
381 int Nc;
382 for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
383 ai_tmpDGs[spin][Nc] = 0.0;
384 }
385 }
386
387 for (h_AN=0; h_AN<=FNAN[Gc_AN]; h_AN++){
388
389 /* set data on h_AN */
390
391 Gh_AN = natn[Gc_AN][h_AN];
392 Mh_AN = F_G2M[Gh_AN];
393 Rnh = ncn[Gc_AN][h_AN];
394 Hwan = WhatSpecies[Gh_AN];
395 NO1 = Spe_Total_CNO[Hwan];
396
397 /* store CDM into tmp_CDM */
398
399 for (spin=0; spin<=SpinP_switch; spin++){
400 for (i=0; i<NO0; i++){
401 for (j=0; j<NO1; j++){
402 tmp_CDM[spin][i][j] = CDM[spin][Mc_AN][h_AN][i][j];
403 }
404 }
405 }
406
407 /* summation of non-zero elements */
408 /* for (Nog=0; Nog<NumOLG[Mc_AN][h_AN]; Nog++){ */
409 #pragma omp for
410 for (Nog=0; Nog<NumOLG[Mc_AN][h_AN]-3; Nog+=4){
411
412 Nc_0 = GListTAtoms1[Mc_AN][h_AN][Nog];
413 Nc_1 = GListTAtoms1[Mc_AN][h_AN][Nog+1];
414 Nc_2 = GListTAtoms1[Mc_AN][h_AN][Nog+2];
415 Nc_3 = GListTAtoms1[Mc_AN][h_AN][Nog+3];
416
417 Nh_0 = GListTAtoms2[Mc_AN][h_AN][Nog];
418 Nh_1 = GListTAtoms2[Mc_AN][h_AN][Nog+1];
419 Nh_2 = GListTAtoms2[Mc_AN][h_AN][Nog+2];
420 Nh_3 = GListTAtoms2[Mc_AN][h_AN][Nog+3];
421
422 /* Now under the orbital optimization */
423 if (Cnt_kind==0 && Cnt_switch==1){
424 for (i=0; i<NO0; i++){
425 orbs0_0[i] = COrbs_Grid[Mc_AN][i][Nc_0];
426 orbs0_1[i] = COrbs_Grid[Mc_AN][i][Nc_1];
427 orbs0_2[i] = COrbs_Grid[Mc_AN][i][Nc_2];
428 orbs0_3[i] = COrbs_Grid[Mc_AN][i][Nc_3];
429 }
430 for (j=0; j<NO1; j++){
431 orbs1_0[j] = COrbs_Grid[Mh_AN][j][Nh_0];
432 orbs1_1[j] = COrbs_Grid[Mh_AN][j][Nh_1];
433 orbs1_2[j] = COrbs_Grid[Mh_AN][j][Nh_2];
434 orbs1_3[j] = COrbs_Grid[Mh_AN][j][Nh_3];
435 }
436 }
437 /* else if ! "now under the orbital optimization" */
438 else{
439 for (i=0; i<NO0; i++){
440 orbs0_0[i] = Orbs_Grid[Mc_AN][Nc_0][i];
441 orbs0_1[i] = Orbs_Grid[Mc_AN][Nc_1][i];
442 orbs0_2[i] = Orbs_Grid[Mc_AN][Nc_2][i];
443 orbs0_3[i] = Orbs_Grid[Mc_AN][Nc_3][i];
444 }
445
446 if (G2ID[Gh_AN]==myid){
447 for (j=0; j<NO1; j++){
448 orbs1_0[j] = Orbs_Grid[Mh_AN][Nh_0][j];
449 orbs1_1[j] = Orbs_Grid[Mh_AN][Nh_1][j];
450 orbs1_2[j] = Orbs_Grid[Mh_AN][Nh_2][j];
451 orbs1_3[j] = Orbs_Grid[Mh_AN][Nh_3][j];
452 }
453 }
454 else{
455 for (j=0; j<NO1; j++){
456 orbs1_0[j] = Orbs_Grid_FNAN[Mc_AN][h_AN][Nog ][j];
457 orbs1_1[j] = Orbs_Grid_FNAN[Mc_AN][h_AN][Nog+1][j];
458 orbs1_2[j] = Orbs_Grid_FNAN[Mc_AN][h_AN][Nog+2][j];
459 orbs1_3[j] = Orbs_Grid_FNAN[Mc_AN][h_AN][Nog+3][j];
460 }
461 }
462 }
463
464 for (spin=0; spin<=SpinP_switch; spin++){
465
466 /* Tmp_Den_Grid */
467
468 sum_0 = 0.0;
469 sum_1 = 0.0;
470 sum_2 = 0.0;
471 sum_3 = 0.0;
472
473 for (i=0; i<NO0; i++){
474
475 tmp0_0 = 0.0;
476 tmp0_1 = 0.0;
477 tmp0_2 = 0.0;
478 tmp0_3 = 0.0;
479
480 for (j=0; j<NO1; j++){
481 tmp0_0 += orbs1_0[j]*tmp_CDM[spin][i][j];
482 tmp0_1 += orbs1_1[j]*tmp_CDM[spin][i][j];
483 tmp0_2 += orbs1_2[j]*tmp_CDM[spin][i][j];
484 tmp0_3 += orbs1_3[j]*tmp_CDM[spin][i][j];
485 }
486
487 sum_0 += orbs0_0[i]*tmp0_0;
488 sum_1 += orbs0_1[i]*tmp0_1;
489 sum_2 += orbs0_2[i]*tmp0_2;
490 sum_3 += orbs0_3[i]*tmp0_3;
491 }
492
493 ai_tmpDGs[spin][Nc_0] += sum_0;
494 ai_tmpDGs[spin][Nc_1] += sum_1;
495 ai_tmpDGs[spin][Nc_2] += sum_2;
496 ai_tmpDGs[spin][Nc_3] += sum_3;
497
498 /*
499 Tmp_Den_Grid[spin][Mc_AN][Nc_0] += sum_0;
500 Tmp_Den_Grid[spin][Mc_AN][Nc_1] += sum_1;
501 Tmp_Den_Grid[spin][Mc_AN][Nc_2] += sum_2;
502 Tmp_Den_Grid[spin][Mc_AN][Nc_3] += sum_3;
503 */
504
505 } /* spin */
506 } /* Nog */
507
508 #pragma omp for
509 for (Nog = NumOLG[Mc_AN][h_AN] - (NumOLG[Mc_AN][h_AN] % 4); Nog<NumOLG[Mc_AN][h_AN]; Nog++){
510 /*for (; Nog<NumOLG[Mc_AN][h_AN]; Nog++){*/
511
512 Nc = GListTAtoms1[Mc_AN][h_AN][Nog];
513 Nh = GListTAtoms2[Mc_AN][h_AN][Nog];
514
515
516 if (Cnt_kind==0 && Cnt_switch==1){
517 for (i=0; i<NO0; i++){
518 orbs0[i] = COrbs_Grid[Mc_AN][i][Nc];
519 }
520 for (j=0; j<NO1; j++){
521 orbs1[j] = COrbs_Grid[Mh_AN][j][Nh];
522 }
523 }
524 else{
525 for (i=0; i<NO0; i++){
526 orbs0[i] = Orbs_Grid[Mc_AN][Nc][i];
527 }
528
529 if (G2ID[Gh_AN]==myid){
530 for (j=0; j<NO1; j++){
531 orbs1[j] = Orbs_Grid[Mh_AN][Nh][j];
532 }
533 }
534 else{
535 for (j=0; j<NO1; j++){
536 orbs1[j] = Orbs_Grid_FNAN[Mc_AN][h_AN][Nog][j];
537 }
538 }
539 }
540
541 for (spin=0; spin<=SpinP_switch; spin++){
542
543
544 sum = 0.0;
545 for (i=0; i<NO0; i++){
546 tmp0 = 0.0;
547 for (j=0; j<NO1; j++){
548 tmp0 += orbs1[j]*tmp_CDM[spin][i][j];
549 }
550 sum += orbs0[i]*tmp0;
551 }
552
553 ai_tmpDGs[spin][Nc] += sum;
554 /*Tmp_Den_Grid[spin][Mc_AN][Nc] += sum;*/
555 }
556
557 } /* Nog */
558
559 } /* h_AN */
560
561 /* AITUNE merge temporary buffer for all omp threads */
562 for (spin=0; spin<=SpinP_switch; spin++){
563 int Nc;
564 #pragma omp for
565 for (Nc=0; Nc<GridN_Atom[Gc_AN]; Nc++){
566 double sum = 0.0;
567 int th;
568 for(th = 0; th < Nthrds; th++){
569 sum += ai_tmpDG_all[th][spin][Nc];
570 }
571 Tmp_Den_Grid[spin][Mc_AN][Nc] += sum;
572 }
573 }
574
575
576 dtime(&Etime_atom);
577 time_per_atom[Gc_AN] += Etime_atom - Stime_atom;
578
579 } /* Mc_AN */
580
581 /* freeing of arrays */
582
583 free(orbs0);
584 free(orbs1);
585
586 free(orbs0_0);
587 free(orbs0_1);
588 free(orbs0_2);
589 free(orbs0_3);
590 free(orbs1_0);
591 free(orbs1_1);
592 free(orbs1_2);
593 free(orbs1_3);
594
595 for (i=0; i<(SpinP_switch+1); i++){
596 for (j=0; j<List_YOUSO[7]; j++){
597 free(tmp_CDM[i][j]);
598 }
599 free(tmp_CDM[i]);
600
601 free(ai_tmpDGs[i]); /* AITUNE */
602
603 }
604 free(tmp_CDM);
605
606 #pragma omp flush(Tmp_Den_Grid)
607
608 } /* #pragma omp parallel */
609
610 free(ai_tmpDG_all);
611
612 dtime(&time2);
613 if(myid==0 && measure_time){
614 printf("Time for Part1=%18.5f\n",(time2-time1));fflush(stdout);
615 }
616
617 /******************************************************
618 MPI communication from the partitions A to B
619 ******************************************************/
620
621 /* copy Tmp_Den_Grid to Den_Snd_Grid_A2B */
622
623 for (ID=0; ID<numprocs; ID++) Num_Snd_Grid_A2B[ID] = 0;
624
625 N2D = Ngrid1*Ngrid2;
626
627 for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
628
629 Gc_AN = M2G[Mc_AN];
630
631 for (AN=0; AN<GridN_Atom[Gc_AN]; AN++){
632
633 GN = GridListAtom[Mc_AN][AN];
634 GN2N(GN,N3);
635 n2D = N3[1]*Ngrid2 + N3[2];
636 ID = (int)(n2D*(unsigned long long int)numprocs/N2D);
637
638 if (SpinP_switch==0){
639 Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]] = Tmp_Den_Grid[0][Mc_AN][AN];
640 }
641 else if (SpinP_switch==1){
642 Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]*2+0] = Tmp_Den_Grid[0][Mc_AN][AN];
643 Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]*2+1] = Tmp_Den_Grid[1][Mc_AN][AN];
644 }
645 else if (SpinP_switch==3){
646 Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]*4+0] = Tmp_Den_Grid[0][Mc_AN][AN];
647 Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]*4+1] = Tmp_Den_Grid[1][Mc_AN][AN];
648 Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]*4+2] = Tmp_Den_Grid[2][Mc_AN][AN];
649 Den_Snd_Grid_A2B[ID][Num_Snd_Grid_A2B[ID]*4+3] = Tmp_Den_Grid[3][Mc_AN][AN];
650 }
651
652 Num_Snd_Grid_A2B[ID]++;
653 }
654 }
655
656 /* MPI: A to B */
657
658 request_send = malloc(sizeof(MPI_Request)*NN_A2B_S);
659 request_recv = malloc(sizeof(MPI_Request)*NN_A2B_R);
660 stat_send = malloc(sizeof(MPI_Status)*NN_A2B_S);
661 stat_recv = malloc(sizeof(MPI_Status)*NN_A2B_R);
662
663 NN_S = 0;
664 NN_R = 0;
665
666 tag = 999;
667 for (ID=1; ID<numprocs; ID++){
668
669 IDS = (myid + ID) % numprocs;
670 IDR = (myid - ID + numprocs) % numprocs;
671
672 if (Num_Snd_Grid_A2B[IDS]!=0){
673 MPI_Isend( &Den_Snd_Grid_A2B[IDS][0], Num_Snd_Grid_A2B[IDS]*(SpinP_switch+1),
674 MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request_send[NN_S]);
675 NN_S++;
676 }
677
678 if (Num_Rcv_Grid_A2B[IDR]!=0){
679 MPI_Irecv( &Den_Rcv_Grid_A2B[IDR][0], Num_Rcv_Grid_A2B[IDR]*(SpinP_switch+1),
680 MPI_DOUBLE, IDR, tag, mpi_comm_level1, &request_recv[NN_R]);
681 NN_R++;
682 }
683 }
684
685 if (NN_S!=0) MPI_Waitall(NN_S,request_send,stat_send);
686 if (NN_R!=0) MPI_Waitall(NN_R,request_recv,stat_recv);
687
688 free(request_send);
689 free(request_recv);
690 free(stat_send);
691 free(stat_recv);
692
693 /* for myid */
694 for (i=0; i<Num_Rcv_Grid_A2B[myid]*(SpinP_switch+1); i++){
695 Den_Rcv_Grid_A2B[myid][i] = Den_Snd_Grid_A2B[myid][i];
696 }
697
698 /******************************************************
699 superposition of rho_i to calculate charge density
700 in the partition B.
701 ******************************************************/
702
703 /* initialize arrays */
704
705 for (spin=0; spin<(SpinP_switch+1); spin++){
706 for (BN=0; BN<My_NumGridB_AB; BN++){
707 Density_Grid_B[spin][BN] = 0.0;
708 }
709 }
710
711 /* superposition of densities rho_i */
712
713 for (ID=0; ID<numprocs; ID++){
714
715 for (LN=0; LN<Num_Rcv_Grid_A2B[ID]; LN++){
716
717 BN = Index_Rcv_Grid_A2B[ID][3*LN+0];
718 Gc_AN = Index_Rcv_Grid_A2B[ID][3*LN+1];
719 GRc = Index_Rcv_Grid_A2B[ID][3*LN+2];
720
721 if (Solver!=4 || (Solver==4 && atv_ijk[GRc][1]==0 )){
722
723 /* spin collinear non-polarization */
724 if ( SpinP_switch==0 ){
725 Density_Grid_B[0][BN] += Den_Rcv_Grid_A2B[ID][LN];
726 }
727
728 /* spin collinear polarization */
729 else if ( SpinP_switch==1 ){
730 Density_Grid_B[0][BN] += Den_Rcv_Grid_A2B[ID][LN*2 ];
731 Density_Grid_B[1][BN] += Den_Rcv_Grid_A2B[ID][LN*2+1];
732 }
733
734 /* spin non-collinear */
735 else if ( SpinP_switch==3 ){
736 Density_Grid_B[0][BN] += Den_Rcv_Grid_A2B[ID][LN*4 ];
737 Density_Grid_B[1][BN] += Den_Rcv_Grid_A2B[ID][LN*4+1];
738 Density_Grid_B[2][BN] += Den_Rcv_Grid_A2B[ID][LN*4+2];
739 Density_Grid_B[3][BN] += Den_Rcv_Grid_A2B[ID][LN*4+3];
740 }
741
742 } /* if (Solve!=4.....) */
743
744 } /* AN */
745 } /* ID */
746
747 /****************************************************
748 Conjugate complex of Density_Grid[3][MN] due to
749 difference in the definition between density matrix
750 and charge density
751 ****************************************************/
752
753 if (SpinP_switch==3){
754
755 for (BN=0; BN<My_NumGridB_AB; BN++){
756 Density_Grid_B[3][BN] = -Density_Grid_B[3][BN];
757 }
758 }
759
760 /******************************************************
761 MPI: from the partitions B to D
762 ******************************************************/
763
764 Density_Grid_Copy_B2D();
765
766 /* freeing of arrays */
767
768 for (i=0; i<(SpinP_switch+1); i++){
769 for (Mc_AN=0; Mc_AN<=Matomnum; Mc_AN++){
770 free(Tmp_Den_Grid[i][Mc_AN]);
771 }
772 free(Tmp_Den_Grid[i]);
773 }
774 free(Tmp_Den_Grid);
775
776 for (ID=0; ID<numprocs; ID++){
777 free(Den_Snd_Grid_A2B[ID]);
778 }
779 free(Den_Snd_Grid_A2B);
780
781 for (ID=0; ID<numprocs; ID++){
782 free(Den_Rcv_Grid_A2B[ID]);
783 }
784 free(Den_Rcv_Grid_A2B);
785
786 /* elapsed time */
787 dtime(&TEtime);
788 time0 = TEtime - TStime;
789 if(myid==0 && measure_time) printf("time0=%18.5f\n",time0);
790
791 return time0;
792 }
793
794
795
Data_Grid_Copy_B2C_2(double ** data_B,double ** data_C)796 void Data_Grid_Copy_B2C_2(double **data_B, double **data_C)
797 {
798 static int firsttime=1;
799 int CN,BN,LN,spin,i,gp,NN_S,NN_R;
800 double *Work_Array_Snd_Grid_B2C;
801 double *Work_Array_Rcv_Grid_B2C;
802 int numprocs,myid,tag=999,ID,IDS,IDR;
803 MPI_Status stat;
804 MPI_Request request;
805 MPI_Status *stat_send;
806 MPI_Status *stat_recv;
807 MPI_Request *request_send;
808 MPI_Request *request_recv;
809
810 MPI_Comm_size(mpi_comm_level1,&numprocs);
811 MPI_Comm_rank(mpi_comm_level1,&myid);
812
813 /* allocation of arrays */
814
815 Work_Array_Snd_Grid_B2C = (double*)malloc(sizeof(double)*GP_B2C_S[NN_B2C_S]*(SpinP_switch+1));
816 Work_Array_Rcv_Grid_B2C = (double*)malloc(sizeof(double)*GP_B2C_R[NN_B2C_R]*(SpinP_switch+1));
817
818 if (firsttime==1){
819 PrintMemory("Data_Grid_Copy_B2C_2: Work_Array_Snd_Grid_B2C",
820 sizeof(double)*GP_B2C_S[NN_B2C_S]*(SpinP_switch+1), NULL);
821 PrintMemory("Data_Grid_Copy_B2C_2: Work_Array_Rcv_Grid_B2C",
822 sizeof(double)*GP_B2C_R[NN_B2C_R]*(SpinP_switch+1), NULL);
823 firsttime = 0;
824 }
825
826 /******************************************************
827 MPI: from the partitions B to C
828 ******************************************************/
829
830 request_send = malloc(sizeof(MPI_Request)*NN_B2C_S);
831 request_recv = malloc(sizeof(MPI_Request)*NN_B2C_R);
832 stat_send = malloc(sizeof(MPI_Status)*NN_B2C_S);
833 stat_recv = malloc(sizeof(MPI_Status)*NN_B2C_R);
834
835 NN_S = 0;
836 NN_R = 0;
837
838 /* MPI_Irecv */
839
840 for (ID=0; ID<NN_B2C_R; ID++){
841
842 IDR = ID_NN_B2C_R[ID];
843 gp = GP_B2C_R[ID];
844
845 if (IDR!=myid){
846 MPI_Irecv( &Work_Array_Rcv_Grid_B2C[(SpinP_switch+1)*gp], Num_Rcv_Grid_B2C[IDR]*(SpinP_switch+1),
847 MPI_DOUBLE, IDR, tag, mpi_comm_level1, &request_recv[NN_R]);
848 NN_R++;
849 }
850
851 }
852
853 /* MPI_Isend */
854
855 for (ID=0; ID<NN_B2C_S; ID++){
856
857 IDS = ID_NN_B2C_S[ID];
858 gp = GP_B2C_S[ID];
859
860 /* copy Density_Grid_B to Work_Array_Snd_Grid_B2C */
861
862 for (LN=0; LN<Num_Snd_Grid_B2C[IDS]; LN++){
863 BN = Index_Snd_Grid_B2C[IDS][LN];
864
865 if (SpinP_switch==0){
866 Work_Array_Snd_Grid_B2C[gp+LN] = data_B[0][BN];
867 }
868 else if (SpinP_switch==1){
869 Work_Array_Snd_Grid_B2C[2*gp+2*LN+0] = data_B[0][BN];
870 Work_Array_Snd_Grid_B2C[2*gp+2*LN+1] = data_B[1][BN];
871 }
872 else if (SpinP_switch==3){
873 Work_Array_Snd_Grid_B2C[4*gp+4*LN+0] = data_B[0][BN];
874 Work_Array_Snd_Grid_B2C[4*gp+4*LN+1] = data_B[1][BN];
875 Work_Array_Snd_Grid_B2C[4*gp+4*LN+2] = data_B[2][BN];
876 Work_Array_Snd_Grid_B2C[4*gp+4*LN+3] = data_B[3][BN];
877 }
878 } /* LN */
879
880 if (IDS!=myid){
881 MPI_Isend( &Work_Array_Snd_Grid_B2C[(SpinP_switch+1)*gp], Num_Snd_Grid_B2C[IDS]*(SpinP_switch+1),
882 MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request_send[NN_S]);
883 NN_S++;
884 }
885 }
886
887 /* MPI_Waitall */
888
889 if (NN_S!=0) MPI_Waitall(NN_S,request_send,stat_send);
890 if (NN_R!=0) MPI_Waitall(NN_R,request_recv,stat_recv);
891
892 free(request_send);
893 free(request_recv);
894 free(stat_send);
895 free(stat_recv);
896
897 /* copy Work_Array_Rcv_Grid_B2C to data_C */
898
899 for (ID=0; ID<NN_B2C_R; ID++){
900
901 IDR = ID_NN_B2C_R[ID];
902
903 if (IDR==myid){
904
905 gp = GP_B2C_S[ID];
906
907 for (LN=0; LN<Num_Rcv_Grid_B2C[IDR]; LN++){
908
909 CN = Index_Rcv_Grid_B2C[IDR][LN];
910
911 if (SpinP_switch==0){
912 data_C[0][CN] = Work_Array_Snd_Grid_B2C[gp+LN];
913 }
914 else if (SpinP_switch==1){
915 data_C[0][CN] = Work_Array_Snd_Grid_B2C[2*gp+2*LN+0];
916 data_C[1][CN] = Work_Array_Snd_Grid_B2C[2*gp+2*LN+1];
917 }
918 else if (SpinP_switch==3){
919 data_C[0][CN] = Work_Array_Snd_Grid_B2C[4*gp+4*LN+0];
920 data_C[1][CN] = Work_Array_Snd_Grid_B2C[4*gp+4*LN+1];
921 data_C[2][CN] = Work_Array_Snd_Grid_B2C[4*gp+4*LN+2];
922 data_C[3][CN] = Work_Array_Snd_Grid_B2C[4*gp+4*LN+3];
923 }
924 } /* LN */
925
926 }
927 else {
928
929 gp = GP_B2C_R[ID];
930
931 for (LN=0; LN<Num_Rcv_Grid_B2C[IDR]; LN++){
932 CN = Index_Rcv_Grid_B2C[IDR][LN];
933
934 if (SpinP_switch==0){
935 data_C[0][CN] = Work_Array_Rcv_Grid_B2C[gp+LN];
936 }
937 else if (SpinP_switch==1){
938 data_C[0][CN] = Work_Array_Rcv_Grid_B2C[2*gp+2*LN+0];
939 data_C[1][CN] = Work_Array_Rcv_Grid_B2C[2*gp+2*LN+1];
940 }
941 else if (SpinP_switch==3){
942 data_C[0][CN] = Work_Array_Rcv_Grid_B2C[4*gp+4*LN+0];
943 data_C[1][CN] = Work_Array_Rcv_Grid_B2C[4*gp+4*LN+1];
944 data_C[2][CN] = Work_Array_Rcv_Grid_B2C[4*gp+4*LN+2];
945 data_C[3][CN] = Work_Array_Rcv_Grid_B2C[4*gp+4*LN+3];
946 }
947 }
948 }
949 }
950
951 /* if (SpinP_switch==0),
952 copy data_B[0] to data_B[1]
953 copy data_C[0] to data_C[1]
954 */
955
956 if (SpinP_switch==0){
957 for (BN=0; BN<My_NumGridB_AB; BN++){
958 data_B[1][BN] = data_B[0][BN];
959 }
960
961 for (CN=0; CN<My_NumGridC; CN++){
962 data_C[1][CN] = data_C[0][CN];
963 }
964 }
965
966 /* freeing of arrays */
967 free(Work_Array_Snd_Grid_B2C);
968 free(Work_Array_Rcv_Grid_B2C);
969 }
970
971
972
Data_Grid_Copy_B2C_1(double * data_B,double * data_C)973 void Data_Grid_Copy_B2C_1(double *data_B, double *data_C)
974 {
975 static int firsttime=1;
976 int CN,BN,LN,spin,i,gp,NN_S,NN_R;
977 double *Work_Array_Snd_Grid_B2C;
978 double *Work_Array_Rcv_Grid_B2C;
979 int numprocs,myid,tag=999,ID,IDS,IDR;
980 MPI_Status stat;
981 MPI_Request request;
982 MPI_Status *stat_send;
983 MPI_Status *stat_recv;
984 MPI_Request *request_send;
985 MPI_Request *request_recv;
986
987 MPI_Comm_size(mpi_comm_level1,&numprocs);
988 MPI_Comm_rank(mpi_comm_level1,&myid);
989
990 /* allocation of arrays */
991
992 Work_Array_Snd_Grid_B2C = (double*)malloc(sizeof(double)*GP_B2C_S[NN_B2C_S]);
993 Work_Array_Rcv_Grid_B2C = (double*)malloc(sizeof(double)*GP_B2C_R[NN_B2C_R]);
994
995 if (firsttime==1){
996 PrintMemory("Data_Grid_Copy_B2C_1: Work_Array_Snd_Grid_B2C",
997 sizeof(double)*GP_B2C_S[NN_B2C_S], NULL);
998 PrintMemory("Data_Grid_Copy_B2C_1: Work_Array_Rcv_Grid_B2C",
999 sizeof(double)*GP_B2C_R[NN_B2C_R], NULL);
1000 firsttime = 0;
1001 }
1002
1003 /******************************************************
1004 MPI: from the partitions B to C
1005 ******************************************************/
1006
1007 request_send = malloc(sizeof(MPI_Request)*NN_B2C_S);
1008 request_recv = malloc(sizeof(MPI_Request)*NN_B2C_R);
1009 stat_send = malloc(sizeof(MPI_Status)*NN_B2C_S);
1010 stat_recv = malloc(sizeof(MPI_Status)*NN_B2C_R);
1011
1012 NN_S = 0;
1013 NN_R = 0;
1014
1015 /* MPI_Irecv */
1016
1017 for (ID=0; ID<NN_B2C_R; ID++){
1018
1019 IDR = ID_NN_B2C_R[ID];
1020 gp = GP_B2C_R[ID];
1021
1022 if (IDR!=myid){
1023 MPI_Irecv( &Work_Array_Rcv_Grid_B2C[gp], Num_Rcv_Grid_B2C[IDR],
1024 MPI_DOUBLE, IDR, tag, mpi_comm_level1, &request_recv[NN_R]);
1025 NN_R++;
1026 }
1027 }
1028
1029 /* MPI_Isend */
1030
1031 for (ID=0; ID<NN_B2C_S; ID++){
1032
1033 IDS = ID_NN_B2C_S[ID];
1034 gp = GP_B2C_S[ID];
1035
1036 /* copy Density_Grid_B to Work_Array_Snd_Grid_B2C */
1037
1038 for (LN=0; LN<Num_Snd_Grid_B2C[IDS]; LN++){
1039 BN = Index_Snd_Grid_B2C[IDS][LN];
1040 Work_Array_Snd_Grid_B2C[gp+LN] = data_B[BN];
1041 }
1042
1043 if (IDS!=myid){
1044 MPI_Isend( &Work_Array_Snd_Grid_B2C[gp], Num_Snd_Grid_B2C[IDS],
1045 MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request_send[NN_S]);
1046 NN_S++;
1047 }
1048 }
1049
1050 /* MPI_Waitall */
1051
1052 if (NN_S!=0) MPI_Waitall(NN_S,request_send,stat_send);
1053 if (NN_R!=0) MPI_Waitall(NN_R,request_recv,stat_recv);
1054
1055 free(request_send);
1056 free(request_recv);
1057 free(stat_send);
1058 free(stat_recv);
1059
1060 /* copy Work_Array_Rcv_Grid_B2C to data_C */
1061
1062 for (ID=0; ID<NN_B2C_R; ID++){
1063
1064 IDR = ID_NN_B2C_R[ID];
1065
1066 if (IDR==myid){
1067 gp = GP_B2C_S[ID];
1068 for (LN=0; LN<Num_Rcv_Grid_B2C[IDR]; LN++){
1069 CN = Index_Rcv_Grid_B2C[IDR][LN];
1070 data_C[CN] = Work_Array_Snd_Grid_B2C[gp+LN];
1071 }
1072 }
1073 else{
1074
1075 gp = GP_B2C_R[ID];
1076 for (LN=0; LN<Num_Rcv_Grid_B2C[IDR]; LN++){
1077 CN = Index_Rcv_Grid_B2C[IDR][LN];
1078 data_C[CN] = Work_Array_Rcv_Grid_B2C[gp+LN];
1079 }
1080 }
1081 }
1082
1083 /* freeing of arrays */
1084 free(Work_Array_Snd_Grid_B2C);
1085 free(Work_Array_Rcv_Grid_B2C);
1086 }
1087
1088
1089
1090
1091
Density_Grid_Copy_B2D()1092 void Density_Grid_Copy_B2D()
1093 {
1094 static int firsttime=1;
1095 int DN,BN,LN,spin,i,gp,NN_S,NN_R;
1096 double *Work_Array_Snd_Grid_B2D;
1097 double *Work_Array_Rcv_Grid_B2D;
1098 int numprocs,myid,tag=999,ID,IDS,IDR;
1099 MPI_Status stat;
1100 MPI_Request request;
1101 MPI_Status *stat_send;
1102 MPI_Status *stat_recv;
1103 MPI_Request *request_send;
1104 MPI_Request *request_recv;
1105
1106 MPI_Comm_size(mpi_comm_level1,&numprocs);
1107 MPI_Comm_rank(mpi_comm_level1,&myid);
1108
1109 /* allocation of arrays */
1110
1111 Work_Array_Snd_Grid_B2D = (double*)malloc(sizeof(double)*GP_B2D_S[NN_B2D_S]*(SpinP_switch+1));
1112 Work_Array_Rcv_Grid_B2D = (double*)malloc(sizeof(double)*GP_B2D_R[NN_B2D_R]*(SpinP_switch+1));
1113
1114 if (firsttime==1){
1115 PrintMemory("Set_Density_Grid: Work_Array_Snd_Grid_B2D",
1116 sizeof(double)*GP_B2D_S[NN_B2D_S]*(SpinP_switch+1), NULL);
1117 PrintMemory("Set_Density_Grid: Work_Array_Rcv_Grid_B2D",
1118 sizeof(double)*GP_B2D_R[NN_B2D_R]*(SpinP_switch+1), NULL);
1119 firsttime = 0;
1120 }
1121
1122 /******************************************************
1123 MPI: from the partitions B to D
1124 ******************************************************/
1125
1126 request_send = malloc(sizeof(MPI_Request)*NN_B2D_S);
1127 request_recv = malloc(sizeof(MPI_Request)*NN_B2D_R);
1128 stat_send = malloc(sizeof(MPI_Status)*NN_B2D_S);
1129 stat_recv = malloc(sizeof(MPI_Status)*NN_B2D_R);
1130
1131 NN_S = 0;
1132 NN_R = 0;
1133
1134 /* MPI_Irecv */
1135
1136 for (ID=0; ID<NN_B2D_R; ID++){
1137
1138 IDR = ID_NN_B2D_R[ID];
1139 gp = GP_B2D_R[ID];
1140
1141 if (IDR!=myid){
1142 MPI_Irecv( &Work_Array_Rcv_Grid_B2D[(SpinP_switch+1)*gp], Num_Rcv_Grid_B2D[IDR]*(SpinP_switch+1),
1143 MPI_DOUBLE, IDR, tag, mpi_comm_level1, &request_recv[NN_R]);
1144 NN_R++;
1145 }
1146 }
1147
1148 /* MPI_Isend */
1149
1150 for (ID=0; ID<NN_B2D_S; ID++){
1151
1152 IDS = ID_NN_B2D_S[ID];
1153 gp = GP_B2D_S[ID];
1154
1155 /* copy Density_Grid_B to Work_Array_Snd_Grid_B2D */
1156
1157 for (LN=0; LN<Num_Snd_Grid_B2D[IDS]; LN++){
1158
1159 BN = Index_Snd_Grid_B2D[IDS][LN];
1160
1161 if (SpinP_switch==0){
1162 Work_Array_Snd_Grid_B2D[gp+LN] = Density_Grid_B[0][BN];
1163 }
1164 else if (SpinP_switch==1){
1165 Work_Array_Snd_Grid_B2D[2*gp+2*LN+0] = Density_Grid_B[0][BN];
1166 Work_Array_Snd_Grid_B2D[2*gp+2*LN+1] = Density_Grid_B[1][BN];
1167 }
1168 else if (SpinP_switch==3){
1169 Work_Array_Snd_Grid_B2D[4*gp+4*LN+0] = Density_Grid_B[0][BN];
1170 Work_Array_Snd_Grid_B2D[4*gp+4*LN+1] = Density_Grid_B[1][BN];
1171 Work_Array_Snd_Grid_B2D[4*gp+4*LN+2] = Density_Grid_B[2][BN];
1172 Work_Array_Snd_Grid_B2D[4*gp+4*LN+3] = Density_Grid_B[3][BN];
1173 }
1174 } /* LN */
1175
1176 if (IDS!=myid){
1177 MPI_Isend( &Work_Array_Snd_Grid_B2D[(SpinP_switch+1)*gp], Num_Snd_Grid_B2D[IDS]*(SpinP_switch+1),
1178 MPI_DOUBLE, IDS, tag, mpi_comm_level1, &request_send[NN_S]);
1179 NN_S++;
1180 }
1181 }
1182
1183 /* MPI_Waitall */
1184
1185 if (NN_S!=0) MPI_Waitall(NN_S,request_send,stat_send);
1186 if (NN_R!=0) MPI_Waitall(NN_R,request_recv,stat_recv);
1187
1188 free(request_send);
1189 free(request_recv);
1190 free(stat_send);
1191 free(stat_recv);
1192
1193 /* copy Work_Array_Rcv_Grid_B2D to Density_Grid_D */
1194
1195 for (ID=0; ID<NN_B2D_R; ID++){
1196
1197 IDR = ID_NN_B2D_R[ID];
1198
1199 if (IDR==myid){
1200
1201 gp = GP_B2D_S[ID];
1202
1203 for (LN=0; LN<Num_Rcv_Grid_B2D[IDR]; LN++){
1204
1205 DN = Index_Rcv_Grid_B2D[IDR][LN];
1206
1207 if (SpinP_switch==0){
1208 Density_Grid_D[0][DN] = Work_Array_Snd_Grid_B2D[gp+LN];
1209 }
1210 else if (SpinP_switch==1){
1211 Density_Grid_D[0][DN] = Work_Array_Snd_Grid_B2D[2*gp+2*LN+0];
1212 Density_Grid_D[1][DN] = Work_Array_Snd_Grid_B2D[2*gp+2*LN+1];
1213 }
1214 else if (SpinP_switch==3){
1215 Density_Grid_D[0][DN] = Work_Array_Snd_Grid_B2D[4*gp+4*LN+0];
1216 Density_Grid_D[1][DN] = Work_Array_Snd_Grid_B2D[4*gp+4*LN+1];
1217 Density_Grid_D[2][DN] = Work_Array_Snd_Grid_B2D[4*gp+4*LN+2];
1218 Density_Grid_D[3][DN] = Work_Array_Snd_Grid_B2D[4*gp+4*LN+3];
1219 }
1220 } /* LN */
1221
1222 }
1223
1224 else{
1225
1226 gp = GP_B2D_R[ID];
1227
1228 for (LN=0; LN<Num_Rcv_Grid_B2D[IDR]; LN++){
1229
1230 DN = Index_Rcv_Grid_B2D[IDR][LN];
1231
1232 if (SpinP_switch==0){
1233 Density_Grid_D[0][DN] = Work_Array_Rcv_Grid_B2D[gp+LN];
1234 }
1235 else if (SpinP_switch==1){
1236 Density_Grid_D[0][DN] = Work_Array_Rcv_Grid_B2D[2*gp+2*LN+0];
1237 Density_Grid_D[1][DN] = Work_Array_Rcv_Grid_B2D[2*gp+2*LN+1];
1238 }
1239 else if (SpinP_switch==3){
1240 Density_Grid_D[0][DN] = Work_Array_Rcv_Grid_B2D[4*gp+4*LN+0];
1241 Density_Grid_D[1][DN] = Work_Array_Rcv_Grid_B2D[4*gp+4*LN+1];
1242 Density_Grid_D[2][DN] = Work_Array_Rcv_Grid_B2D[4*gp+4*LN+2];
1243 Density_Grid_D[3][DN] = Work_Array_Rcv_Grid_B2D[4*gp+4*LN+3];
1244 }
1245 }
1246
1247 }
1248 }
1249
1250 /* if (SpinP_switch==0), copy Density_Grid to Density_Grid */
1251
1252 if (SpinP_switch==0){
1253 for (BN=0; BN<My_NumGridB_AB; BN++){
1254 Density_Grid_B[1][BN] = Density_Grid_B[0][BN];
1255 }
1256
1257 for (DN=0; DN<My_NumGridD; DN++){
1258 Density_Grid_D[1][DN] = Density_Grid_D[0][DN];
1259 }
1260 }
1261
1262 /* freeing of arrays */
1263 free(Work_Array_Snd_Grid_B2D);
1264 free(Work_Array_Rcv_Grid_B2D);
1265 }
1266
1267
diagonalize_nc_density()1268 void diagonalize_nc_density()
1269 {
1270 int BN,DN,Mc_AN,Gc_AN,Nog,GRc;
1271 double Re11,Re22,Re12,Im12;
1272 double phi[2],theta[2],sit,cot,sip,cop;
1273 double d1,d2,d3,x,y,z,Cxyz[4];
1274 double Nup[2],Ndown[2];
1275 /* for OpenMP */
1276 int OMPID,Nthrds;
1277
1278 /************************************
1279 Density_Grid in the partition B
1280 ************************************/
1281
1282 #pragma omp parallel shared(Density_Grid_B,My_NumGridB_AB) private(OMPID,Nthrds,BN,Re11,Re22,Re12,Im12,Nup,Ndown,theta,phi) default(none)
1283 {
1284
1285 /* get info. on OpenMP */
1286
1287 OMPID = omp_get_thread_num();
1288 Nthrds = omp_get_num_threads();
1289
1290 for (BN=OMPID; BN<My_NumGridB_AB; BN+=Nthrds){
1291
1292 Re11 = Density_Grid_B[0][BN];
1293 Re22 = Density_Grid_B[1][BN];
1294 Re12 = Density_Grid_B[2][BN];
1295 Im12 = Density_Grid_B[3][BN];
1296
1297 EulerAngle_Spin( 1, Re11, Re22, Re12, Im12, Re12, -Im12, Nup, Ndown, theta, phi );
1298
1299 Density_Grid_B[0][BN] = Nup[0];
1300 Density_Grid_B[1][BN] = Ndown[0];
1301 Density_Grid_B[2][BN] = theta[0];
1302 Density_Grid_B[3][BN] = phi[0];
1303 }
1304
1305 #pragma omp flush(Density_Grid_B)
1306
1307 } /* #pragma omp parallel */
1308
1309 /************************************
1310 Density_Grid in the partition D
1311 ************************************/
1312
1313 #pragma omp parallel shared(Density_Grid_D,My_NumGridD) private(OMPID,Nthrds,DN,Re11,Re22,Re12,Im12,Nup,Ndown,theta,phi) default(none)
1314 {
1315
1316 /* get info. on OpenMP */
1317
1318 OMPID = omp_get_thread_num();
1319 Nthrds = omp_get_num_threads();
1320
1321 for (DN=OMPID; DN<My_NumGridD; DN+=Nthrds){
1322
1323 Re11 = Density_Grid_D[0][DN];
1324 Re22 = Density_Grid_D[1][DN];
1325 Re12 = Density_Grid_D[2][DN];
1326 Im12 = Density_Grid_D[3][DN];
1327
1328 EulerAngle_Spin( 1, Re11, Re22, Re12, Im12, Re12, -Im12, Nup, Ndown, theta, phi );
1329
1330 Density_Grid_D[0][DN] = Nup[0];
1331 Density_Grid_D[1][DN] = Ndown[0];
1332 Density_Grid_D[2][DN] = theta[0];
1333 Density_Grid_D[3][DN] = phi[0];
1334 }
1335
1336 #pragma omp flush(Density_Grid_D)
1337
1338 } /* #pragma omp parallel */
1339
1340 }
1341