1#define NBLOCKS 3 2 3* 4* $Id$ 5* 6 7* *********************************** 8* * * 9* * D3dB_c_transpose_jk * 10* * * 11* *********************************** 12 13 subroutine D3dB_c_transpose_jk(nb,A,tmp1,tmp2) 14 15***************************************************** 16* * 17* This routine performs the operation * 18* A(i,k,j) <- A(i,j,k) * 19* * 20* np = the number of worker nodes * 21* proc#=0...(np-1) 22* * 23* this transpose uses more buffer space * 24* then transpose2 * 25***************************************************** 26 implicit none 27 integer nb 28 complex*16 A(*) 29 complex*16 tmp1(*),tmp2(*) 30 31#include "bafdecls.fh" 32 33#include "D3dB.fh" 34 35 36* **** indexing variables **** 37c integer iq_to_i1((NFFT1/2+1)*NFFT2*NSLABS) 38c integer iq_to_i2((NFFT1/2+1)*NFFT2*NSLABS) 39c integer i1_start(NFFT3+1) 40c integer i2_start(NFFT3+1) 41 integer iq_to_i1(2,NBLOCKS) 42 integer iq_to_i2(2,NBLOCKS) 43 integer i1_start(2,NBLOCKS) 44 integer i2_start(2,NBLOCKS) 45 common / trans_blk / iq_to_i1,iq_to_i2,i1_start,i2_start 46 47* **** Used to avoid asynchronous communications **** 48 integer Nchannels(NBLOCKS) 49 integer channel_proc(2,NBLOCKS) 50 integer channel_type(2,NBLOCKS) 51 common / channel_blk / channel_proc,channel_type,Nchannels 52 53#include "tcgmsg.fh" 54#include "msgtypesf.h" 55 integer rcv_len,rcv_proc 56 57* **** local variables *** 58 integer i,c 59 integer it 60 integer source 61 integer msglen 62 integer pfrom,pto 63 integer taskid,np 64 65* **** external functions **** 66 integer Parallel2d_convert_taskid_i 67 external Parallel2d_convert_taskid_i 68 69 call Parallel2d_taskid_i(taskid) 70 call Parallel2d_np_i(np) 71 72* **** pack A(i) array **** 73#ifndef CRAY 74!DIR$ ivdep 75#endif 76 do i=1,nfft3d(nb) !(nx(nb)/2+1)*ny(nb)*nq(nb) 77 tmp1(int_mb(iq_to_i1(1,nb)+i-1)) = A(i) 78 end do 79 80* **** it = 0, transpose data on same thread **** 81 msglen = int_mb(i2_start(1,nb)+2-1) - int_mb(i2_start(1,nb)+1-1) 82#ifndef CRAY 83!DIR$ ivdep 84#endif 85 do i=1,msglen 86 tmp2(int_mb(i2_start(1,nb)+1-1)+i-1) 87 > = tmp1(int_mb(i1_start(1,nb)+1-1)+i-1) 88 end do 89 90 91 do c=1,Nchannels(nb) 92* **** receive packed array data **** 93 if (int_mb(channel_type(1,nb)+c-1) .eq. 1) then 94 pfrom=int_mb(channel_proc(1,nb)+c-1) 95 it = mod((taskid+np-pfrom),np) 96 97 source=pfrom 98 msglen = (int_mb(i2_start(1,nb)+it+2-1) 99 > - int_mb(i2_start(1,nb)+it+1-1)) 100 101 if (msglen.gt.0) then 102 call RCV(9+MSGDBL, 103 > tmp2(int_mb(i2_start(1,nb)+it+1-1)), 104 > mdtob(2*msglen),rcv_len, 105 > Parallel2d_convert_taskid_i(source), 106 > rcv_proc,1) 107 end if 108 end if 109 110* **** send packed array to other processors **** 111 if (int_mb(channel_type(1,nb)+c-1) .eq. 0) then 112 pto=int_mb(channel_proc(1,nb)+c-1) 113 it = mod((pto-taskid+np),np) 114 115 msglen = (int_mb(i1_start(1,nb)+it+2-1) 116 > - int_mb(i1_start(1,nb)+it+1-1)) 117 118 if (msglen.gt.0) then 119 call SND(9+MSGDBL, 120 > tmp1(int_mb(i1_start(1,nb)+it+1-1)), 121 > mdtob(2*msglen), 122 > Parallel2d_convert_taskid_i(pto), 123 > 1) 124 end if 125 end if 126 127 end do 128 129 130* **** unpack A(i) array **** 131#ifndef CRAY 132!DIR$ ivdep 133#endif 134 do i=1,nfft3d(nb) !(nx(nb)/2+1)*ny(nb)*nq(nb) 135 A(i) = tmp2(int_mb(iq_to_i2(1,nb)+i-1)) 136 end do 137 138 139 return 140 end 141 142c* *********************************** 143c* * * 144c* * D3dB_nc_transpose_jk * 145c* * * 146c* *********************************** 147c 148c subroutine D3dB_nc_transpose_jk(nb,ne,A,tmp1,tmp2) 149c 150c***************************************************** 151c* * 152c* This routine performs the operation * 153c* A(i,k,j,n) <- A(i,j,k,n) * 154c* * 155c* np = the number of worker nodes * 156c* proc#=0...(np-1) 157c* * 158c* this transpose uses more buffer space * 159c* then transpose2 * 160c***************************************************** 161c implicit none 162c integer nb,ne 163c complex*16 A(*) 164c complex*16 tmp1(*),tmp2(*) 165c 166c#include "bafdecls.fh" 167c 168c#include "D3dB.fh" 169c 170c 171c* **** indexing variables **** 172cc integer iq_to_i1((NFFT1/2+1)*NFFT2*NSLABS) 173cc integer iq_to_i2((NFFT1/2+1)*NFFT2*NSLABS) 174cc integer i1_start(NFFT3+1) 175cc integer i2_start(NFFT3+1) 176c integer iq_to_i1(2,NBLOCKS) 177c integer iq_to_i2(2,NBLOCKS) 178c integer i1_start(2,NBLOCKS) 179c integer i2_start(2,NBLOCKS) 180c common / trans_blk / iq_to_i1,iq_to_i2,i1_start,i2_start 181c 182c* **** Used to avoid asynchronous communications **** 183c integer Nchannels(NBLOCKS) 184c integer channel_proc(2,NBLOCKS) 185c integer channel_type(2,NBLOCKS) 186c common / channel_blk / channel_proc,channel_type,Nchannels 187c 188c#include "tcgmsg.fh" 189c#include "msgtypesf.h" 190c integer rcv_len,rcv_proc 191c 192c* **** local variables *** 193c integer i,c,n,nnfft3d 194c integer it 195c integer source 196c integer msglen 197c integer pfrom,pto 198c integer taskid,np 199c 200c call Parallel_taskid(taskid) 201c call Parallel_np(np) 202c 203c !nnfft3d = (nx(nb)/2+1)*ny(nb)*nq(nb) 204c 205c* **** pack A(i) array **** 206c do i=1,nfft3d(nb) 207c#ifndef CRAY 208c!DIR$ ivdep 209c#endif 210c do n=1,ne 211c tmp1(n+(int_mb(iq_to_i1(1,nb)+i-1)-1)*ne) 212c > = A(i+(n-1)*nfft3d(nb)) 213c end do 214c end do 215c 216c* **** it = 0, transpose data on same thread **** 217c msglen = int_mb(i2_start(1,nb)+2-1) - int_mb(i2_start(1,nb)+1-1) 218c do i=1,msglen 219c#ifndef CRAY 220c!DIR$ ivdep 221c#endif 222c do n=1,ne 223c tmp2(n+(int_mb(i2_start(1,nb))+i-2)*ne) 224c > = tmp1(n+(int_mb(i1_start(1,nb))+i-2)*ne) 225c end do 226c end do 227c 228c 229c do c=1,Nchannels(nb) 230c* **** receive packed array data **** 231c if (int_mb(channel_type(1,nb)+c-1) .eq. 1) then 232c pfrom=int_mb(channel_proc(1,nb)+c-1) 233c it = mod((taskid+np-pfrom),np) 234c 235c source=pfrom 236c msglen = (int_mb(i2_start(1,nb)+it+2-1) 237c > - int_mb(i2_start(1,nb)+it+1-1))*ne 238c 239c if (msglen.gt.0) then 240c call RCV(9+MSGDBL, 241c > tmp2(1+(int_mb(i2_start(1,nb)+it+1-1)-1)*ne), 242c > mdtob(2*msglen),rcv_len, 243c > source,rcv_proc,1) 244c end if 245c end if 246c 247c* **** send packed array to other processors **** 248c if (int_mb(channel_type(1,nb)+c-1) .eq. 0) then 249c pto=int_mb(channel_proc(1,nb)+c-1) 250c it = mod((pto-taskid+np),np) 251c 252c msglen = (int_mb(i1_start(1,nb)+it+2-1) 253c > - int_mb(i1_start(1,nb)+it+1-1))*ne 254c 255c if (msglen.gt.0) then 256c call SND(9+MSGDBL, 257c > tmp1(1+(int_mb(i1_start(1,nb)+it+1-1)-1)*ne), 258c > mdtob(2*msglen),pto,1) 259c end if 260c end if 261c 262c end do 263c 264c 265c* **** unpack A(i) array **** 266c do i=1,nfft3d(nb) 267c#ifndef CRAY 268c!DIR$ ivdep 269c#endif 270c do n=1,ne 271c A(i+(n-1)*nfft3d(nb)) 272c > = tmp2(n+(int_mb(iq_to_i2(1,nb)+i-1)-1)*ne) 273c end do 274c end do 275c 276c 277c return 278c end 279c 280 281 282 283 284* *********************************** 285* * * 286* * D3dB_c_timereverse * 287* * * 288* *********************************** 289 290 subroutine D3dB_c_timereverse(nb,A,tmp1,tmp2) 291 292***************************************************** 293* * 294* This routine performs the operation * 295* A(i,j,k) <- conjugate(A(i,-j,-k)) * 296* * 297* np = the number of worker nodes * 298* proc#=0...(np-1) * 299* * 300***************************************************** 301 implicit none 302 integer nb 303 complex*16 A(*) 304 complex*16 tmp1(*) 305 complex*16 tmp2(*) 306 307#include "bafdecls.fh" 308 309#include "D3dB.fh" 310 311 312* **** indexing variables **** 313c integer iq_to_i1(2**NFFT2*NSLABS) 314c integer iq_to_i2(2**NFFT2*NSLABS) 315c integer i1_start(NFFT3+1) 316c integer i2_start(NFFT3+1) 317 integer iq_to_i1(2,NBLOCKS) 318 integer iq_to_i2(2,NBLOCKS) 319 integer i1_start(2,NBLOCKS) 320 integer i2_start(2,NBLOCKS) 321 common / timereverse_blk / iq_to_i1,iq_to_i2,i1_start,i2_start 322 323* **** Used to avoid asynchronous communications **** 324 integer Nchannels(NBLOCKS) 325 integer channel_proc(2,NBLOCKS) 326 integer channel_type(2,NBLOCKS) 327 common / channel_blk / channel_proc,channel_type,Nchannels 328 329#include "tcgmsg.fh" 330#include "msgtypesf.h" 331 integer rcv_len,rcv_proc 332 333* **** local variables *** 334 integer i,c 335 integer it 336 integer source 337 integer msglen 338 integer pfrom,pto 339 integer taskid,np 340 integer index1,index2 341 342* **** external functions **** 343 integer Parallel2d_convert_taskid_i 344 external Parallel2d_convert_taskid_i 345 346 call Parallel2d_taskid_i(taskid) 347 call Parallel2d_np_i(np) 348 349* **** pack A(i) array **** 350 do index1=int_mb(i1_start(1,nb)+1-1), 351 > (int_mb(i1_start(1,nb)+np+1-1)-1) 352 tmp1(index1) = A(int_mb(iq_to_i1(1,nb)+index1-1)) 353 end do 354 355* **** it = 0, transpose data on same thread **** 356 msglen = int_mb(i2_start(1,nb)+2-1) - int_mb(i2_start(1,nb)+1-1) 357 do i=1,msglen 358 tmp2(int_mb(i2_start(1,nb)+1-1)+i-1) 359 > = tmp1(int_mb(i1_start(1,nb)+1-1)+i-1) 360 end do 361 362 363 do c=1,Nchannels(nb) 364* **** receive packed array data **** 365 if (int_mb(channel_type(1,nb)+c-1) .eq. 1) then 366 pfrom=int_mb(channel_proc(1,nb)+c-1) 367 it = mod((taskid+np-pfrom),np) 368 369 source=pfrom 370 msglen = (int_mb(i2_start(1,nb)+it+2-1) 371 > - int_mb(i2_start(1,nb)+it+1-1)) 372 373 if (msglen.gt.0) then 374 call RCV(9+MSGDBL, 375 > tmp2(int_mb(i2_start(1,nb)+it+1-1)), 376 > mdtob(2*msglen),rcv_len, 377 > Parallel2d_convert_taskid_i(source), 378 > rcv_proc,1) 379 end if 380 end if 381 382* **** send packed array to other processors **** 383 if (int_mb(channel_type(1,nb)+c-1) .eq. 0) then 384 pto=int_mb(channel_proc(1,nb)+c-1) 385 it = mod((pto-taskid+np),np) 386 387 msglen = (int_mb(i1_start(1,nb)+it+2-1) 388 > - int_mb(i1_start(1,nb)+it+1-1)) 389 390 if (msglen.gt.0) then 391 call SND(9+MSGDBL, 392 > tmp1(int_mb(i1_start(1,nb)+it+1-1)), 393 > mdtob(2*msglen), 394 > Parallel2d_convert_taskid_i(pto), 395 > 1) 396 end if 397 end if 398 399 end do 400 401* **** unpack A(i) array **** 402 do index2=int_mb(i2_start(1,nb)+1-1), 403 > (int_mb(i2_start(1,nb)+np+1-1)-1) 404 A(int_mb(iq_to_i2(1,nb)+index2-1))=dconjg(tmp2(index2)) 405 end do 406 407 return 408 end 409 410 411 412* *********************************** 413* * * 414* * D3dB_c_transpose_ijk * 415* * * 416* *********************************** 417 418 subroutine D3dB_c_transpose_ijk(nb,op,A,tmp1,tmp2) 419 420***************************************************** 421* * 422* This routine performs the operation * 423* A(i,k,j) <- A(i,j,k) * 424* * 425* np = the number of worker nodes * 426* proc#=0...(np-1) 427* * 428* this transpose uses more buffer space * 429* then transpose2 * 430***************************************************** 431 implicit none 432 integer nb,op 433 complex*16 A(*) 434 complex*16 tmp1(*),tmp2(*) 435 436#include "bafdecls.fh" 437 438#include "D3dB.fh" 439 440 441* **** indexing variables **** 442 integer h_iq_to_i1(2,6,NBLOCKS) 443 integer h_iq_to_i2(2,6,NBLOCKS) 444 integer h_i1_start(2,6,NBLOCKS) 445 integer h_i2_start(2,6,NBLOCKS) 446 common / trans_blk_ijk / h_iq_to_i1, 447 > h_iq_to_i2, 448 > h_i1_start, 449 > h_i2_start 450 451* **** Used to avoid asynchronous communications **** 452 integer Nchannels(NBLOCKS) 453 integer channel_proc(2,NBLOCKS) 454 integer channel_type(2,NBLOCKS) 455 common / channel_blk / channel_proc,channel_type,Nchannels 456 457#include "tcgmsg.fh" 458#include "msgtypesf.h" 459 integer rcv_len,rcv_proc 460 461* **** local variables *** 462 integer i,c,nnfft3d 463 integer it 464 integer source 465 integer msglen 466 integer pfrom,pto 467 integer taskid,np 468 469* **** external functions **** 470 integer Parallel2d_convert_taskid_i 471 external Parallel2d_convert_taskid_i 472 473 call Parallel2d_taskid_i(taskid) 474 call Parallel2d_np_i(np) 475 476 477* **** pack A(i) array **** 478 if ((op.eq.1).or.(op.eq.5)) nnfft3d = (nx(nb)/2+1)*nq1(nb) 479 if ((op.eq.2).or.(op.eq.4)) nnfft3d = (ny(nb)) *nq2(nb) 480 if ((op.eq.3).or.(op.eq.6)) nnfft3d = (nz(nb)) *nq3(nb) 481#ifndef CRAY 482!DIR$ ivdep 483#endif 484 do i=1,nnfft3d 485 tmp1(int_mb(h_iq_to_i1(1,op,nb)+i-1)) = A(i) 486 end do 487 488* **** it = 0, transpose data on same thread **** 489 msglen = int_mb(h_i2_start(1,op,nb)+2-1) 490 > - int_mb(h_i2_start(1,op,nb)+1-1) 491#ifndef CRAY 492!DIR$ ivdep 493#endif 494 do i=1,msglen 495 tmp2(int_mb(h_i2_start(1,op,nb)+1-1)+i-1) 496 > = tmp1(int_mb(h_i1_start(1,op,nb)+1-1)+i-1) 497 end do 498 499 500 do c=1,Nchannels(nb) 501* **** receive packed array data **** 502 if (int_mb(channel_type(1,nb)+c-1) .eq. 1) then 503 pfrom=int_mb(channel_proc(1,nb)+c-1) 504 it = mod((taskid+np-pfrom),np) 505 506 source=pfrom 507 msglen = (int_mb(h_i2_start(1,op,nb)+it+2-1) 508 > - int_mb(h_i2_start(1,op,nb)+it+1-1)) 509 510 if (msglen.gt.0) then 511 call RCV(9+MSGDBL, 512 > tmp2(int_mb(h_i2_start(1,op,nb)+it+1-1)), 513 > mdtob(2*msglen),rcv_len, 514 > Parallel2d_convert_taskid_i(source), 515 > rcv_proc,1) 516 end if 517 end if 518 519* **** send packed array to other processors **** 520 if (int_mb(channel_type(1,nb)+c-1) .eq. 0) then 521 pto=int_mb(channel_proc(1,nb)+c-1) 522 it = mod((pto-taskid+np),np) 523 524 msglen = (int_mb(h_i1_start(1,op,nb)+it+2-1) 525 > - int_mb(h_i1_start(1,op,nb)+it+1-1)) 526 527 if (msglen.gt.0) then 528 call SND(9+MSGDBL, 529 > tmp1(int_mb(h_i1_start(1,op,nb)+it+1-1)), 530 > mdtob(2*msglen), 531 > Parallel2d_convert_taskid_i(pto),1) 532 end if 533 end if 534 535 end do 536 537 538* **** unpack A(i) array **** 539 if ((op.eq.4).or.(op.eq.6)) nnfft3d = (nx(nb)/2+1)*nq1(nb) 540 if ((op.eq.1).or.(op.eq.3)) nnfft3d = (ny(nb)) *nq2(nb) 541 if ((op.eq.2).or.(op.eq.5)) nnfft3d = (nz(nb)) *nq3(nb) 542#ifndef CRAY 543!DIR$ ivdep 544#endif 545 do i=1,nnfft3d 546 A(i) = tmp2(int_mb(h_iq_to_i2(1,op,nb)+i-1)) 547 end do 548 549 550 return 551 end 552 553 554 555* *********************************** 556* * * 557* * D3dB_t_transpose_ijk * 558* * * 559* *********************************** 560 561 subroutine D3dB_t_transpose_ijk(nb,op,A,tmp1,tmp2) 562 563***************************************************** 564* * 565* This routine performs the operation * 566* A(i,k,j) <- A(i,j,k) * 567* * 568* np = the number of worker nodes * 569* proc#=0...(np-1) 570* * 571* this transpose uses more buffer space * 572* then transpose2 * 573***************************************************** 574 implicit none 575 integer nb,op 576 real*8 A(*) 577 real*8 tmp1(*),tmp2(*) 578 579#include "bafdecls.fh" 580#include "D3dB.fh" 581 582 583* **** indexing variables **** 584 integer h_iq_to_i1(2,6,NBLOCKS) 585 integer h_iq_to_i2(2,6,NBLOCKS) 586 integer h_i1_start(2,6,NBLOCKS) 587 integer h_i2_start(2,6,NBLOCKS) 588 common / trans_blk_ijk / h_iq_to_i1, 589 > h_iq_to_i2, 590 > h_i1_start, 591 > h_i2_start 592 593* **** Used to avoid asynchronous communications **** 594 integer Nchannels(NBLOCKS) 595 integer channel_proc(2,NBLOCKS) 596 integer channel_type(2,NBLOCKS) 597 common / channel_blk / channel_proc,channel_type,Nchannels 598 599#include "tcgmsg.fh" 600#include "msgtypesf.h" 601 integer rcv_len,rcv_proc 602 603* **** local variables *** 604 integer i,c,nnfft3d 605 integer it 606 integer source 607 integer msglen 608 integer pfrom,pto 609 integer taskid,np 610 611* **** external functions **** 612 integer Parallel2d_convert_taskid_i 613 external Parallel2d_convert_taskid_i 614 615 call Parallel2d_taskid_i(taskid) 616 call Parallel2d_np_i(np) 617 618 619* **** pack A(i) array **** 620 if ((op.eq.1).or.(op.eq.5)) nnfft3d = (nx(nb)/2+1)*nq1(nb) 621 if ((op.eq.2).or.(op.eq.4)) nnfft3d = (ny(nb)) *nq2(nb) 622 if ((op.eq.3).or.(op.eq.6)) nnfft3d = (nz(nb)) *nq3(nb) 623#ifndef CRAY 624!DIR$ ivdep 625#endif 626 do i=1,nnfft3d 627 tmp1(int_mb(h_iq_to_i1(1,op,nb)+i-1)) = A(i) 628 end do 629 630* **** it = 0, transpose data on same thread **** 631 msglen = int_mb(h_i2_start(1,op,nb)+2-1) 632 > - int_mb(h_i2_start(1,op,nb)+1-1) 633#ifndef CRAY 634!DIR$ ivdep 635#endif 636 do i=1,msglen 637 tmp2(int_mb(h_i2_start(1,op,nb)+1-1)+i-1) 638 > = tmp1(int_mb(h_i1_start(1,op,nb)+1-1)+i-1) 639 end do 640 641 642 do c=1,Nchannels(nb) 643* **** receive packed array data **** 644 if (int_mb(channel_type(1,nb)+c-1) .eq. 1) then 645 pfrom=int_mb(channel_proc(1,nb)+c-1) 646 it = mod((taskid+np-pfrom),np) 647 648 source=pfrom 649 msglen = (int_mb(h_i2_start(1,op,nb)+it+2-1) 650 > - int_mb(h_i2_start(1,op,nb)+it+1-1)) 651 652 if (msglen.gt.0) then 653 call RCV(9+MSGDBL, 654 > tmp2(int_mb(h_i2_start(1,op,nb)+it+1-1)), 655 > mdtob(msglen),rcv_len, 656 > Parallel2d_convert_taskid_i(source), 657 > rcv_proc,1) 658 end if 659 end if 660 661* **** send packed array to other processors **** 662 if (int_mb(channel_type(1,nb)+c-1) .eq. 0) then 663 pto=int_mb(channel_proc(1,nb)+c-1) 664 it = mod((pto-taskid+np),np) 665 666 msglen = (int_mb(h_i1_start(1,op,nb)+it+2-1) 667 > - int_mb(h_i1_start(1,op,nb)+it+1-1)) 668 669 if (msglen.gt.0) then 670 call SND(9+MSGDBL, 671 > tmp1(int_mb(h_i1_start(1,op,nb)+it+1-1)), 672 > mdtob(msglen), 673 > Parallel2d_convert_taskid_i(pto), 674 > 1) 675 end if 676 end if 677 678 end do 679 680 681* **** unpack A(i) array **** 682 if ((op.eq.4).or.(op.eq.6)) nnfft3d = (nx(nb)/2+1)*nq1(nb) 683 if ((op.eq.1).or.(op.eq.3)) nnfft3d = (ny(nb)) *nq2(nb) 684 if ((op.eq.2).or.(op.eq.5)) nnfft3d = (nz(nb)) *nq3(nb) 685#ifndef CRAY 686!DIR$ ivdep 687#endif 688 do i=1,nnfft3d 689 A(i) = tmp2(int_mb(h_iq_to_i2(1,op,nb)+i-1)) 690 end do 691 692 693 return 694 end 695 696 697* *********************************** 698* * * 699* * D3dB_SumAll * 700* * * 701* *********************************** 702 703 subroutine D3dB_SumAll(sum) 704c implicit none 705 real*8 sum 706 707#include "tcgmsg.fh" 708#include "msgtypesf.h" 709 710* **** local variables **** 711 integer np_i 712 713* **** external functions **** 714 integer Parallel2d_comm_i 715 external Parallel2d_comm_i 716 717 call Parallel2d_np_i(np_i) 718 if (np_i.gt.1) then 719 call GA_PGROUP_DGOP(Parallel2d_comm_i(), 720 > 9+MSGDBL,sum,1,'+') 721 end if 722 723 return 724 end 725 726 727 728* *********************************** 729* * * 730* * D3dB_Vector_SumAll * 731* * * 732* *********************************** 733 734 subroutine D3dB_Vector_SumAll(n,sum) 735c implicit none 736 integer n 737 real*8 sum(*) 738 739#include "bafdecls.fh" 740#include "tcgmsg.fh" 741#include "msgtypesf.h" 742#include "errquit.fh" 743 744 745* **** temporary workspace **** 746 integer np_i 747 748* **** external functions **** 749 integer Parallel2d_comm_i 750 external Parallel2d_comm_i 751 752 call nwpw_timing_start(2) 753 754 call Parallel2d_np_i(np_i) 755 if (np_i.gt.1) then 756 call GA_PGROUP_DGOP(Parallel2d_comm_i(), 757 > 9+MSGDBL,sum,n,'+') 758 end if 759 760 call nwpw_timing_end(2) 761 762 return 763 end 764 765 766 767 768* *********************************** 769* * * 770* * D3dB_ISumAll * 771* * * 772* *********************************** 773 774 subroutine D3dB_ISumAll(sum) 775c implicit none 776 integer sum 777 778#include "tcgmsg.fh" 779#include "msgtypesf.h" 780 781* **** local variables **** 782 integer np_i 783 784* **** external functions **** 785 integer Parallel2d_comm_i 786 external Parallel2d_comm_i 787 788 789 call Parallel2d_np_i(np_i) 790 if (np_i.gt.1) then 791 call GA_PGROUP_IGOP(Parallel2d_comm_i(), 792 > 9+MSGINT,sum,1,'+') 793 end if 794 795 return 796 end 797 798* *********************************** 799* * * 800* * D3dB_Vector_ISumAll * 801* * * 802* *********************************** 803 804 subroutine D3dB_Vector_ISumAll(n,sum) 805c implicit none 806 integer n 807 integer sum(*) 808 809#include "bafdecls.fh" 810#include "errquit.fh" 811#include "tcgmsg.fh" 812#include "msgtypesf.h" 813 814* **** local variables **** 815 integer np_i 816 817* **** external functions **** 818 integer Parallel2d_comm_i 819 external Parallel2d_comm_i 820 821 call nwpw_timing_start(2) 822 823 call Parallel2d_np_i(np_i) 824 if (np_i.gt.1) then 825 call GA_PGROUP_IGOP(Parallel2d_comm_i(), 826 > 9+MSGINT,sum,n,'+') 827 end if 828 829 call nwpw_timing_end(2) 830 831 return 832 end 833 834 835 836 837 838 839* *********************************** 840* * * 841* * D3dB_c_ptranspose1_jk * 842* * * 843* *********************************** 844 845 subroutine D3dB_c_ptranspose1_jk(nbb,A,tmp1,tmp2) 846 847***************************************************** 848* * 849* This routine performs the operation * 850* A(i,k,j) <- A(i,j,k) * 851* * 852* np = the number of worker nodes * 853* proc#=0...(np-1) 854* * 855* this transpose uses more buffer space * 856* then transpose2 * 857***************************************************** 858 implicit none 859 integer nbb 860 complex*16 A(*) 861 complex*16 tmp1(*),tmp2(*) 862 863#include "bafdecls.fh" 864#include "D3dB.fh" 865 866* **** indexing variables **** 867 integer iq_to_i1(2,0:3) 868 integer iq_to_i2(2,0:3) 869 integer iz_to_i2(2,0:3) 870 integer i1_start(2,0:3) 871 integer i2_start(2,0:3) 872 common / ptrans_blk1 / iq_to_i1,iq_to_i2,iz_to_i2, 873 > i1_start,i2_start 874 875 876 877* **** Used to avoid asynchronous communications **** 878 integer Nchannels(NBLOCKS) 879 integer channel_proc(2,NBLOCKS) 880 integer channel_type(2,NBLOCKS) 881 common / channel_blk / channel_proc,channel_type,Nchannels 882 883#include "tcgmsg.fh" 884#include "msgtypesf.h" 885 integer rcv_len,rcv_proc 886 887* **** local variables *** 888 integer c,it 889 integer source 890 integer msglen 891 integer pfrom,pto 892 integer taskid,np 893 integer n1,n2,id 894 895* **** external functions **** 896 integer Parallel2d_convert_taskid_i 897 external Parallel2d_convert_taskid_i 898 899 900 id = 2*(nbb/2) + 1 901 902 call Parallel2d_taskid_i(taskid) 903 call Parallel2d_np_i(np) 904 905 906 n1 = int_mb(i1_start(1,nbb)+np) - 1 907 n2 = int_mb(i2_start(1,nbb)+np) - 1 908 909* **** pack A(i) array **** 910 call D3dB_pfft_index1_copy(n1,int_mb(iq_to_i1(1,nbb)),A,tmp1) 911 912 913* **** it = 0, transpose data on same thread **** 914 msglen = int_mb(i2_start(1,nbb)+2-1) - int_mb(i2_start(1,nbb)+1-1) 915 call dcopy(2*msglen, 916 > tmp1(int_mb(i1_start(1,nbb)+1-1)),1, 917 > tmp2(int_mb(i2_start(1,nbb)+1-1)),1) 918 919 920 do c=1,Nchannels(1) 921* **** receive packed array data **** 922 if (int_mb(channel_type(1,1)+c-1) .eq. 1) then 923 pfrom=int_mb(channel_proc(1,1)+c-1) 924 it = mod((taskid+np-pfrom),np) 925 926 source=pfrom 927 msglen = (int_mb(i2_start(1,nbb)+it+2-1) 928 > - int_mb(i2_start(1,nbb)+it+1-1)) 929 930 if (msglen.gt.0) then 931 call RCV(9+MSGDBL, 932 > tmp2(int_mb(i2_start(1,nbb)+it+1-1)), 933 > mdtob(2*msglen),rcv_len, 934 > Parallel2d_convert_taskid_i(source), 935 > rcv_proc,1) 936 end if 937 end if 938 939* **** send packed array to other processors **** 940 if (int_mb(channel_type(1,1)+c-1) .eq. 0) then 941 pto=int_mb(channel_proc(1,1)+c-1) 942 it = mod((pto-taskid+np),np) 943 944 msglen = (int_mb(i1_start(1,nbb)+it+2-1) 945 > - int_mb(i1_start(1,nbb)+it+1-1)) 946 947 if (msglen.gt.0) then 948 call SND(9+MSGDBL, 949 > tmp1(int_mb(i1_start(1,nbb)+it+1-1)), 950 > mdtob(2*msglen), 951 > Parallel2d_convert_taskid_i(pto), 952 > 1) 953 end if 954 end if 955 956 end do 957 958 959* **** unpack A(i) array **** 960 call D3dB_pfft_index2_copy(n2,int_mb(iq_to_i2(1,nbb)),tmp2,A) 961 call D3dB_pfft_index2_zero(nfft3d(id)-n2, 962 > int_mb(iz_to_i2(1,nbb)),A) 963 964 965 return 966 end 967 968 969 970* *********************************** 971* * * 972* * D3dB_c_ptranspose2_jk * 973* * * 974* *********************************** 975 976 subroutine D3dB_c_ptranspose2_jk(nbb,A,tmp1,tmp2) 977 978***************************************************** 979* * 980* This routine performs the operation * 981* A(i,k,j) <- A(i,j,k) * 982* * 983* np = the number of worker nodes * 984* proc#=0...(np-1) 985* * 986* this transpose uses more buffer space * 987* then transpose2 * 988***************************************************** 989 implicit none 990 integer nbb 991 complex*16 A(*) 992 complex*16 tmp1(*),tmp2(*) 993 994#include "bafdecls.fh" 995#include "D3dB.fh" 996 997* **** indexing variables **** 998 integer iq_to_i1(2,0:3) 999 integer iq_to_i2(2,0:3) 1000 integer iz_to_i2(2,0:3) 1001 integer i1_start(2,0:3) 1002 integer i2_start(2,0:3) 1003 common / ptrans_blk2 / iq_to_i1,iq_to_i2,iz_to_i2, 1004 > i1_start,i2_start 1005 1006 1007 1008* **** Used to avoid asynchronous communications **** 1009 integer Nchannels(NBLOCKS) 1010 integer channel_proc(2,NBLOCKS) 1011 integer channel_type(2,NBLOCKS) 1012 common / channel_blk / channel_proc,channel_type,Nchannels 1013 1014#include "tcgmsg.fh" 1015#include "msgtypesf.h" 1016 integer rcv_len,rcv_proc 1017 1018* **** local variables *** 1019 integer c,it 1020 integer source 1021 integer msglen 1022 integer pfrom,pto 1023 integer taskid,np 1024 integer n1,n2,id 1025 1026* **** external functions **** 1027 integer Parallel2d_convert_taskid_i 1028 external Parallel2d_convert_taskid_i 1029 1030 id = 2*(nbb/2) + 1 1031 1032 call Parallel2d_taskid_i(taskid) 1033 call Parallel2d_np_i(np) 1034 1035 1036 n1 = int_mb(i1_start(1,nbb)+np) - 1 1037 n2 = int_mb(i2_start(1,nbb)+np) - 1 1038 1039* **** pack A(i) array **** 1040 call D3dB_pfft_index1_copy(n1,int_mb(iq_to_i1(1,nbb)),A,tmp1) 1041 1042 1043* **** it = 0, transpose data on same thread **** 1044 msglen = int_mb(i2_start(1,nbb)+2-1) - int_mb(i2_start(1,nbb)+1-1) 1045 call dcopy(2*msglen, 1046 > tmp1(int_mb(i1_start(1,nbb)+1-1)),1, 1047 > tmp2(int_mb(i2_start(1,nbb)+1-1)),1) 1048 1049 1050 do c=1,Nchannels(1) 1051* **** receive packed array data **** 1052 if (int_mb(channel_type(1,1)+c-1) .eq. 1) then 1053 pfrom=int_mb(channel_proc(1,1)+c-1) 1054 it = mod((taskid+np-pfrom),np) 1055 1056 source=pfrom 1057 msglen = (int_mb(i2_start(1,nbb)+it+2-1) 1058 > - int_mb(i2_start(1,nbb)+it+1-1)) 1059 1060 if (msglen.gt.0) then 1061 call RCV(9+MSGDBL, 1062 > tmp2(int_mb(i2_start(1,nbb)+it+1-1)), 1063 > mdtob(2*msglen),rcv_len, 1064 > Parallel2d_convert_taskid_i(source), 1065 > rcv_proc,1) 1066 end if 1067 end if 1068 1069* **** send packed array to other processors **** 1070 if (int_mb(channel_type(1,1)+c-1) .eq. 0) then 1071 pto=int_mb(channel_proc(1,1)+c-1) 1072 it = mod((pto-taskid+np),np) 1073 1074 msglen = (int_mb(i1_start(1,nbb)+it+2-1) 1075 > - int_mb(i1_start(1,nbb)+it+1-1)) 1076 1077 if (msglen.gt.0) then 1078 call SND(9+MSGDBL, 1079 > tmp1(int_mb(i1_start(1,nbb)+it+1-1)), 1080 > mdtob(2*msglen), 1081 > Parallel2d_convert_taskid_i(pto), 1082 > 1) 1083 end if 1084 end if 1085 1086 end do 1087 1088* **** unpack A(i) array **** 1089 call D3dB_pfft_index2_copy(n2,int_mb(iq_to_i2(1,nbb)),tmp2,A) 1090 call D3dB_pfft_index2_zero(nfft3d(id)-n2, 1091 > int_mb(iz_to_i2(1,nbb)),A) 1092 1093 return 1094 end 1095 1096 1097 1098* *********************************** 1099* * * 1100* * D3dB_c_timereverse_start * 1101* * * 1102* *********************************** 1103* 1104* This routine performs the operation 1105* A(i,j,k) <- conjugate(A(i,-j,-k)) 1106* 1107* np = the number of worker nodes 1108* proc#=0...(np-1) 1109* 1110 subroutine D3dB_c_timereverse_start(nb,A,tmp1,tmp2,request,reqcnt) 1111 implicit none 1112 integer nb 1113 complex*16 A(*) 1114 complex*16 tmp1(*) 1115 complex*16 tmp2(*) 1116 integer request(*),reqcnt 1117 1118#include "bafdecls.fh" 1119#include "D3dB.fh" 1120 1121 1122* **** indexing variables **** 1123c integer iq_to_i1(2**NFFT2*NSLABS) 1124c integer iq_to_i2(2**NFFT2*NSLABS) 1125c integer i1_start(NFFT3+1) 1126c integer i2_start(NFFT3+1) 1127 integer iq_to_i1(2,NBLOCKS) 1128 integer iq_to_i2(2,NBLOCKS) 1129 integer i1_start(2,NBLOCKS) 1130 integer i2_start(2,NBLOCKS) 1131 common / timereverse_blk / iq_to_i1,iq_to_i2,i1_start,i2_start 1132 1133* **** Used to avoid asynchronous communications **** 1134 integer Nchannels(NBLOCKS) 1135 integer channel_proc(2,NBLOCKS) 1136 integer channel_type(2,NBLOCKS) 1137 common / channel_blk / channel_proc,channel_type,Nchannels 1138 1139#include "tcgmsg.fh" 1140#include "msgtypesf.h" 1141 integer rcv_len,rcv_proc 1142 1143* **** local variables *** 1144 integer i,c 1145 integer it 1146 integer source 1147 integer msglen 1148 integer pfrom,pto 1149 integer taskid,np 1150 integer index1 1151 1152* **** external functions **** 1153 integer Parallel2d_convert_taskid_i 1154 external Parallel2d_convert_taskid_i 1155 1156 1157 call Parallel2d_taskid_i(taskid) 1158 call Parallel2d_np_i(np) 1159 1160* **** pack A(i) array **** 1161 do index1=int_mb(i1_start(1,nb)+1-1), 1162 > (int_mb(i1_start(1,nb)+np+1-1)-1) 1163 tmp1(index1) = A(int_mb(iq_to_i1(1,nb)+index1-1)) 1164 end do 1165 1166* **** it = 0, transpose data on same thread **** 1167 msglen = int_mb(i2_start(1,nb)+2-1) - int_mb(i2_start(1,nb)+1-1) 1168 do i=1,msglen 1169 tmp2(int_mb(i2_start(1,nb)+1-1)+i-1) 1170 > = tmp1(int_mb(i1_start(1,nb)+1-1)+i-1) 1171 end do 1172 1173 1174 do c=1,Nchannels(nb) 1175* **** receive packed array data **** 1176 if (int_mb(channel_type(1,nb)+c-1) .eq. 1) then 1177 pfrom=int_mb(channel_proc(1,nb)+c-1) 1178 it = mod((taskid+np-pfrom),np) 1179 1180 source=pfrom 1181 msglen = (int_mb(i2_start(1,nb)+it+2-1) 1182 > - int_mb(i2_start(1,nb)+it+1-1)) 1183 1184 if (msglen.gt.0) then 1185 call RCV(9+MSGDBL, 1186 > tmp2(int_mb(i2_start(1,nb)+it+1-1)), 1187 > mdtob(2*msglen),rcv_len, 1188 > Parallel2d_convert_taskid_i(source), 1189 > rcv_proc,1) 1190 end if 1191 end if 1192 1193* **** send packed array to other processors **** 1194 if (int_mb(channel_type(1,nb)+c-1) .eq. 0) then 1195 pto=int_mb(channel_proc(1,nb)+c-1) 1196 it = mod((pto-taskid+np),np) 1197 1198 msglen = (int_mb(i1_start(1,nb)+it+2-1) 1199 > - int_mb(i1_start(1,nb)+it+1-1)) 1200 1201 if (msglen.gt.0) then 1202 call SND(9+MSGDBL, 1203 > tmp1(int_mb(i1_start(1,nb)+it+1-1)), 1204 > mdtob(2*msglen), 1205 > Parallel2d_convert_taskid_i(pto),1) 1206 end if 1207 end if 1208 1209 end do 1210 1211 1212 return 1213 end 1214 1215 1216 1217* *********************************** 1218* * * 1219* * D3dB_c_timereverse_end * 1220* * * 1221* *********************************** 1222* 1223* This routine performs the operation 1224* A(i,j,k) <- conjugate(A(i,-j,-k)) 1225* 1226* np = the number of worker nodes 1227* proc#=0...(np-1) 1228* 1229 subroutine D3dB_c_timereverse_end(nb,A,tmp1,tmp2,request,reqcnt) 1230 implicit none 1231 integer nb 1232 complex*16 A(*) 1233 complex*16 tmp1(*) 1234 complex*16 tmp2(*) 1235 integer request(*),reqcnt 1236 1237#include "bafdecls.fh" 1238#include "errquit.fh" 1239 1240 1241* **** indexing variables **** 1242 integer iq_to_i1(2,NBLOCKS) 1243 integer iq_to_i2(2,NBLOCKS) 1244 integer i1_start(2,NBLOCKS) 1245 integer i2_start(2,NBLOCKS) 1246 common / timereverse_blk / iq_to_i1,iq_to_i2,i1_start,i2_start 1247 1248 1249* **** local variables *** 1250 integer np 1251 integer index2 1252 1253 1254 call Parallel2d_np_i(np) 1255 1256* **** unpack A(i) array **** 1257#ifndef CRAY 1258!DIR$ ivdep 1259#endif 1260 do index2=int_mb(i2_start(1,nb)+1-1), 1261 > (int_mb(i2_start(1,nb)+np+1-1)-1) 1262 A(int_mb(iq_to_i2(1,nb)+index2-1))=dconjg(tmp2(index2)) 1263 end do 1264 1265 return 1266 end 1267 1268 1269* ************************************ 1270* * * 1271* * Balance_c_balance_start * 1272* * * 1273* ************************************ 1274 1275 subroutine Balance_c_balance_start(nb,A,request,reqcnt,msgtype) 1276 implicit none 1277 integer nb 1278 complex*16 A(*) 1279 integer request(*),reqcnt,msgtype 1280 1281 call Balance_c_balance(nb,A) 1282 return 1283 end 1284 1285* ************************************ 1286* * * 1287* * Balance_c_balance_end * 1288* * * 1289* ************************************ 1290* 1291 subroutine Balance_c_balance_end(nb,A,request,reqcnt) 1292 implicit none 1293 integer nb 1294 complex*16 A(*) 1295 integer request(*),reqcnt 1296 1297* *** dummy routine *** 1298 return 1299 end 1300 1301* ************************************ 1302* * * 1303* * Balance_c_unbalance_start * 1304* * * 1305* ************************************ 1306 1307 subroutine Balance_c_unbalance_start(nb,A,request,reqcnt,msgtype) 1308 implicit none 1309 integer nb 1310 complex*16 A(*) 1311 integer request(*),reqcnt,msgtype 1312 1313 call Balance_c_unbalance(nb,A) 1314 return 1315 end 1316 1317* ************************************ 1318* * * 1319* * Balance_c_unbalance_end * 1320* * * 1321* ************************************ 1322 1323 subroutine Balance_c_unbalance_end(nb,A,request,reqcnt) 1324 implicit none 1325 integer nb 1326 complex*16 A(*) 1327 integer request(*),reqcnt 1328 1329* *** dummy routine *** 1330 return 1331 end 1332 1333 1334 1335 1336* *********************************** 1337* * * 1338* * D3dB_c_ptranspose1_jk_start * 1339* * * 1340* *********************************** 1341 1342* 1343* This routine performs the operation 1344* A(i,k,j) <- A(i,j,k) 1345* 1346* np = the number of worker nodes 1347* proc#=0...(np-1) 1348* 1349* this transpose uses more buffer space 1350* then transpose2 1351* 1352 1353 subroutine D3dB_c_ptranspose1_jk_start(nbb,A,tmp1,tmp2, 1354 > request,reqcnt,msgtype) 1355 implicit none 1356 integer nbb 1357 complex*16 A(*) 1358 complex*16 tmp1(*),tmp2(*) 1359 integer request(*),reqcnt,msgtype 1360 1361#include "bafdecls.fh" 1362#include "errquit.fh" 1363#include "D3dB.fh" 1364 1365* **** indexing variables **** 1366 integer iq_to_i1(2,0:3) 1367 integer iq_to_i2(2,0:3) 1368 integer iz_to_i2(2,0:3) 1369 integer i1_start(2,0:3) 1370 integer i2_start(2,0:3) 1371 common / ptrans_blk1 / iq_to_i1,iq_to_i2,iz_to_i2, 1372 > i1_start,i2_start 1373 1374* **** Used to avoid asynchronous communications **** 1375 integer Nchannels(NBLOCKS) 1376 integer channel_proc(2,NBLOCKS) 1377 integer channel_type(2,NBLOCKS) 1378 common / channel_blk / channel_proc,channel_type,Nchannels 1379 1380#include "tcgmsg.fh" 1381#include "msgtypesf.h" 1382 integer rcv_len,rcv_proc 1383 1384 1385* **** local variables *** 1386 integer c,it 1387 integer source 1388 integer msglen 1389 integer pfrom,pto 1390 integer taskid,np 1391 integer n1 1392 1393* **** external functions **** 1394 integer Parallel2d_convert_taskid_i 1395 external Parallel2d_convert_taskid_i 1396 1397 1398 call Parallel2d_taskid_i(taskid) 1399 call Parallel2d_np_i(np) 1400 1401 1402 1403 n1 = int_mb(i1_start(1,nbb)+np) - 1 1404 1405* **** pack A(i) array **** 1406 call D3dB_pfft_index1_copy(n1,int_mb(iq_to_i1(1,nbb)),A,tmp1) 1407 1408* **** it = 0, transpose data on same thread **** 1409 msglen = int_mb(i2_start(1,nbb)+2-1) - int_mb(i2_start(1,nbb)+1-1) 1410 call dcopy(2*msglen, 1411 > tmp1(int_mb(i1_start(1,nbb)+1-1)),1, 1412 > tmp2(int_mb(i2_start(1,nbb)+1-1)),1) 1413 1414 1415 do c=1,Nchannels(1) 1416* **** receive packed array data **** 1417 if (int_mb(channel_type(1,1)+c-1) .eq. 1) then 1418 pfrom=int_mb(channel_proc(1,1)+c-1) 1419 it = mod((taskid+np-pfrom),np) 1420 1421 source=pfrom 1422 msglen = (int_mb(i2_start(1,nbb)+it+2-1) 1423 > - int_mb(i2_start(1,nbb)+it+1-1)) 1424 1425 if (msglen.gt.0) then 1426 call RCV(9+MSGDBL, 1427 > tmp2(int_mb(i2_start(1,nbb)+it+1-1)), 1428 > mdtob(2*msglen),rcv_len, 1429 > Parallel2d_convert_taskid_i(source), 1430 > rcv_proc,1) 1431 end if 1432 end if 1433 1434* **** send packed array to other processors **** 1435 if (int_mb(channel_type(1,1)+c-1) .eq. 0) then 1436 pto=int_mb(channel_proc(1,1)+c-1) 1437 it = mod((pto-taskid+np),np) 1438 1439 msglen = (int_mb(i1_start(1,nbb)+it+2-1) 1440 > - int_mb(i1_start(1,nbb)+it+1-1)) 1441 1442 if (msglen.gt.0) then 1443 call SND(9+MSGDBL, 1444 > tmp1(int_mb(i1_start(1,nbb)+it+1-1)), 1445 > mdtob(2*msglen), 1446 > Parallel2d_convert_taskid_i(pto),1) 1447 end if 1448 end if 1449 1450 end do 1451 1452 return 1453 end 1454 1455 1456* *********************************** 1457* * * 1458* * D3dB_c_ptranspose1_jk_end * 1459* * * 1460* *********************************** 1461 1462* 1463* This routine performs the operation 1464* A(i,k,j) <- A(i,j,k) 1465* 1466* np = the number of worker nodes 1467* proc#=0...(np-1) 1468* 1469* this transpose uses more buffer space 1470* then transpose2 1471* 1472 1473 subroutine D3dB_c_ptranspose1_jk_end(nbb,A,tmp2,request,reqcnt) 1474 1475 implicit none 1476 integer nbb 1477 complex*16 A(*) 1478 complex*16 tmp2(*) 1479 integer request(*),reqcnt 1480 1481#include "bafdecls.fh" 1482#include "errquit.fh" 1483#include "D3dB.fh" 1484 1485 1486* **** indexing variables **** 1487 integer iq_to_i1(2,0:3) 1488 integer iq_to_i2(2,0:3) 1489 integer iz_to_i2(2,0:3) 1490 integer i1_start(2,0:3) 1491 integer i2_start(2,0:3) 1492 common / ptrans_blk1 / iq_to_i1,iq_to_i2,iz_to_i2, 1493 > i1_start,i2_start 1494 1495* **** local variables *** 1496 integer np,n2,id 1497 1498 id = 2*(nbb/2) + 1 1499 1500 call Parallel2d_np_i(np) 1501 1502* **** unpack A(i) array **** 1503 n2 = int_mb(i2_start(1,nbb)+np) - 1 1504 call D3dB_pfft_index2_copy(n2,int_mb(iq_to_i2(1,nbb)),tmp2,A) 1505 call D3dB_pfft_index2_zero(nfft3d(id)-n2, 1506 > int_mb(iz_to_i2(1,nbb)),A) 1507 1508 return 1509 end 1510 1511 1512 1513* *********************************** 1514* * * 1515* * D3dB_c_ptranspose2_jk_start * 1516* * * 1517* *********************************** 1518 1519* 1520* This routine performs the operation 1521* A(i,k,j) <- A(i,j,k) 1522* 1523* np = the number of worker nodes 1524* proc#=0...(np-1) 1525* 1526* this transpose uses more buffer space 1527* then transpose2 1528* 1529 1530 subroutine D3dB_c_ptranspose2_jk_start(nbb,A,tmp1,tmp2, 1531 > request,reqcnt,msgtype) 1532 implicit none 1533 integer nbb 1534 complex*16 A(*) 1535 complex*16 tmp1(*),tmp2(*) 1536 integer request(*),reqcnt,msgtype 1537 1538#include "bafdecls.fh" 1539#include "errquit.fh" 1540#include "D3dB.fh" 1541 1542 1543* **** indexing variables **** 1544 integer iq_to_i1(2,0:3) 1545 integer iq_to_i2(2,0:3) 1546 integer iz_to_i2(2,0:3) 1547 integer i1_start(2,0:3) 1548 integer i2_start(2,0:3) 1549 common / ptrans_blk2 / iq_to_i1,iq_to_i2,iz_to_i2, 1550 > i1_start,i2_start 1551 1552 1553 1554* **** Used to avoid asynchronous communications **** 1555 integer Nchannels(NBLOCKS) 1556 integer channel_proc(2,NBLOCKS) 1557 integer channel_type(2,NBLOCKS) 1558 common / channel_blk / channel_proc,channel_type,Nchannels 1559 1560#include "tcgmsg.fh" 1561#include "msgtypesf.h" 1562 integer rcv_len,rcv_proc 1563 1564 1565* **** local variables *** 1566 integer c,it 1567 integer source 1568 integer msglen 1569 integer pfrom,pto 1570 integer taskid,np 1571 integer n1 1572 1573* **** external functions **** 1574 integer Parallel2d_convert_taskid_i 1575 external Parallel2d_convert_taskid_i 1576 1577 1578 call Parallel2d_taskid_i(taskid) 1579 call Parallel2d_np_i(np) 1580 1581 1582 n1 = int_mb(i1_start(1,nbb)+np) - 1 1583 1584* **** pack A(i) array **** 1585 call D3dB_pfft_index1_copy(n1,int_mb(iq_to_i1(1,nbb)),A,tmp1) 1586 1587* **** it = 0, transpose data on same thread **** 1588 msglen = int_mb(i2_start(1,nbb)+2-1) - int_mb(i2_start(1,nbb)+1-1) 1589 call dcopy(2*msglen, 1590 > tmp1(int_mb(i1_start(1,nbb)+1-1)),1, 1591 > tmp2(int_mb(i2_start(1,nbb)+1-1)),1) 1592 1593 1594 do c=1,Nchannels(1) 1595* **** receive packed array data **** 1596 if (int_mb(channel_type(1,1)+c-1) .eq. 1) then 1597 pfrom=int_mb(channel_proc(1,1)+c-1) 1598 it = mod((taskid+np-pfrom),np) 1599 1600 source=pfrom 1601 msglen = (int_mb(i2_start(1,nbb)+it+2-1) 1602 > - int_mb(i2_start(1,nbb)+it+1-1)) 1603 1604 if (msglen.gt.0) then 1605 call RCV(9+MSGDBL, 1606 > tmp2(int_mb(i2_start(1,nbb)+it+1-1)), 1607 > mdtob(2*msglen),rcv_len, 1608 > Parallel2d_convert_taskid_i(source), 1609 > rcv_proc,1) 1610 end if 1611 end if 1612 1613* **** send packed array to other processors **** 1614 if (int_mb(channel_type(1,1)+c-1) .eq. 0) then 1615 pto=int_mb(channel_proc(1,1)+c-1) 1616 it = mod((pto-taskid+np),np) 1617 1618 msglen = (int_mb(i1_start(1,nbb)+it+2-1) 1619 > - int_mb(i1_start(1,nbb)+it+1-1)) 1620 1621 if (msglen.gt.0) then 1622 call SND(9+MSGDBL, 1623 > tmp1(int_mb(i1_start(1,nbb)+it+1-1)), 1624 > mdtob(2*msglen), 1625 > Parallel2d_convert_taskid_i(pto), 1626 > 1) 1627 end if 1628 end if 1629 1630 end do 1631 1632 1633 return 1634 end 1635 1636 1637* *********************************** 1638* * * 1639* * D3dB_c_ptranspose2_jk_end * 1640* * * 1641* *********************************** 1642 1643* 1644* This routine performs the operation 1645* A(i,k,j) <- A(i,j,k) 1646* 1647* np = the number of worker nodes 1648* proc#=0...(np-1) 1649* 1650* this transpose uses more buffer space 1651* then transpose2 1652* 1653 1654 subroutine D3dB_c_ptranspose2_jk_end(nbb,A,tmp2,request,reqcnt) 1655 1656 implicit none 1657 integer nbb 1658 complex*16 A(*) 1659 complex*16 tmp2(*) 1660 integer request(*),reqcnt 1661 1662#include "bafdecls.fh" 1663#include "errquit.fh" 1664#include "D3dB.fh" 1665 1666 1667* **** indexing variables **** 1668 integer iq_to_i1(2,0:3) 1669 integer iq_to_i2(2,0:3) 1670 integer iz_to_i2(2,0:3) 1671 integer i1_start(2,0:3) 1672 integer i2_start(2,0:3) 1673 common / ptrans_blk2 / iq_to_i1,iq_to_i2,iz_to_i2, 1674 > i1_start,i2_start 1675 1676 1677* **** local variables *** 1678 integer np,n2,id 1679 1680 id = 2*(nbb/2) + 1 1681 1682 call Parallel2d_np_i(np) 1683 1684* **** unpack A(i) array **** 1685 n2 = int_mb(i2_start(1,nbb)+np) - 1 1686 call D3dB_pfft_index2_copy(n2,int_mb(iq_to_i2(1,nbb)),tmp2,A) 1687 call D3dB_pfft_index2_zero(nfft3d(id)-n2, 1688 > int_mb(iz_to_i2(1,nbb)),A) 1689 1690 return 1691 end 1692 1693 1694 1695* *********************************** 1696* * * 1697* * D3dB_c_ptranspose_ijk_start * 1698* * * 1699* *********************************** 1700* 1701* This routine performs the operation 1702* A(i,k,j) <- A(i,j,k) 1703* 1704* np = the number of worker nodes 1705* proc#=0...(np-1) 1706* 1707* this transpose uses more buffer space 1708* then transpose2 1709* 1710 1711 1712 subroutine D3dB_c_ptranspose_ijk_start(nbb,op,A,tmp1,tmp2, 1713 > request,reqcnt,msgtype) 1714 1715 implicit none 1716 integer nbb,op 1717 complex*16 A(*) 1718 complex*16 tmp1(*),tmp2(*) 1719 integer request(*),reqcnt,msgtype 1720 1721#include "bafdecls.fh" 1722#include "errquit.fh" 1723#include "D3dB.fh" 1724 1725 1726* **** indexing variables **** 1727 integer h_iq_to_i1(2,6,0:3) 1728 integer h_iq_to_i2(2,6,0:3) 1729 integer h_iz_to_i2(2,6,0:3) 1730 integer h_iz_to_i2_count(6,0:3) 1731 integer h_i1_start(2,6,0:3) 1732 integer h_i2_start(2,6,0:3) 1733 common / ptrans_blk_ijk / h_iq_to_i1, 1734 > h_iq_to_i2, 1735 > h_iz_to_i2, 1736 > h_iz_to_i2_count, 1737 > h_i1_start, 1738 > h_i2_start 1739 1740* **** Used to avoid asynchronous communications **** 1741 integer Nchannels(NBLOCKS) 1742 integer channel_proc(2,NBLOCKS) 1743 integer channel_type(2,NBLOCKS) 1744 common / channel_blk / channel_proc,channel_type,Nchannels 1745 1746#include "tcgmsg.fh" 1747#include "msgtypesf.h" 1748 integer rcv_len,rcv_proc 1749 1750 1751* **** local variables *** 1752 integer c,n1 1753 integer it 1754 integer source 1755 integer msglen 1756 integer pfrom,pto 1757 integer taskid,np 1758 1759* **** external functions **** 1760 integer Parallel2d_convert_taskid_i 1761 external Parallel2d_convert_taskid_i 1762 1763 1764 call Parallel2d_taskid_i(taskid) 1765 call Parallel2d_np_i(np) 1766 1767* **** pack A(i) array **** 1768 n1 = int_mb(h_i1_start(1,op,nbb)+np) - 1 1769 call D3dB_pfft_index1_copy(n1,int_mb(h_iq_to_i1(1,op,nbb)),A,tmp1) 1770 1771 1772* **** it = 0, transpose data on same thread **** 1773 msglen = int_mb(h_i2_start(1,op,nbb)+2-1) 1774 > - int_mb(h_i2_start(1,op,nbb)+1-1) 1775 call dcopy(2*msglen, 1776 > tmp1(int_mb(h_i1_start(1,op,nbb))),1, 1777 > tmp2(int_mb(h_i2_start(1,op,nbb))),1) 1778 1779 1780 1781 do c=1,Nchannels(1) 1782* **** receive packed array data **** 1783 if (int_mb(channel_type(1,1)+c-1) .eq. 1) then 1784 pfrom=int_mb(channel_proc(1,1)+c-1) 1785 it = mod((taskid+np-pfrom),np) 1786 1787 source=pfrom 1788 msglen = (int_mb(h_i2_start(1,op,nbb)+it+2-1) 1789 > - int_mb(h_i2_start(1,op,nbb)+it+1-1)) 1790 1791 if (msglen.gt.0) then 1792 call RCV(9+MSGDBL, 1793 > tmp2(int_mb(h_i2_start(1,op,nbb)+it+1-1)), 1794 > mdtob(2*msglen),rcv_len, 1795 > Parallel2d_convert_taskid_i(source), 1796 > rcv_proc,1) 1797 end if 1798 end if 1799 1800* **** send packed array to other processors **** 1801 if (int_mb(channel_type(1,1)+c-1) .eq. 0) then 1802 pto=int_mb(channel_proc(1,1)+c-1) 1803 it = mod((pto-taskid+np),np) 1804 1805 msglen = (int_mb(h_i1_start(1,op,nbb)+it+2-1) 1806 > - int_mb(h_i1_start(1,op,nbb)+it+1-1)) 1807 1808 if (msglen.gt.0) then 1809 call SND(9+MSGDBL, 1810 > tmp1(int_mb(h_i1_start(1,op,nbb)+it+1-1)), 1811 > mdtob(2*msglen), 1812 > Parallel2d_convert_taskid_i(pto), 1813 > 1) 1814 end if 1815 end if 1816 1817 end do 1818 1819 return 1820 end 1821 1822 1823 1824* *********************************** 1825* * * 1826* * D3dB_c_ptranspose_ijk_end * 1827* * * 1828* *********************************** 1829* 1830* This routine performs the operation 1831* A(i,k,j) <- A(i,j,k) 1832* 1833* np = the number of worker nodes 1834* proc#=0...(np-1) 1835* 1836* this transpose uses more buffer space 1837* then transpose2 1838* 1839 1840 subroutine D3dB_c_ptranspose_ijk_end(nbb,op,A,tmp2, 1841 > request,reqcnt) 1842 1843 implicit none 1844 integer nbb,op 1845 complex*16 A(*) 1846 complex*16 tmp2(*) 1847 integer request(*),reqcnt 1848 1849#include "bafdecls.fh" 1850#include "errquit.fh" 1851#include "D3dB.fh" 1852 1853 1854* **** indexing variables **** 1855 integer h_iq_to_i1(2,6,0:3) 1856 integer h_iq_to_i2(2,6,0:3) 1857 integer h_iz_to_i2(2,6,0:3) 1858 integer h_iz_to_i2_count(6,0:3) 1859 integer h_i1_start(2,6,0:3) 1860 integer h_i2_start(2,6,0:3) 1861 common / ptrans_blk_ijk / h_iq_to_i1, 1862 > h_iq_to_i2, 1863 > h_iz_to_i2, 1864 > h_iz_to_i2_count, 1865 > h_i1_start, 1866 > h_i2_start 1867 1868* **** local variables *** 1869 integer n2,n3,np 1870 1871 1872 call Parallel2d_np_i(np) 1873 1874* **** unpack A(i) array **** 1875 n2 = int_mb(h_i2_start(1,op,nbb)+np) - 1 1876 n3 = h_iz_to_i2_count(op,nbb) 1877 call D3dB_pfft_index2_copy(n2,int_mb(h_iq_to_i2(1,op,nbb)),tmp2,A) 1878 call D3dB_pfft_index2_zero(n3,int_mb(h_iz_to_i2(1,op,nbb)),A) 1879 1880 1881 return 1882 end 1883 1884 1885 1886* *********************************** 1887* * * 1888* * D3dB_c_ptranspose_ijk * 1889* * * 1890* *********************************** 1891 1892 subroutine D3dB_c_ptranspose_ijk(nbb,op,A,tmp1,tmp2) 1893 1894***************************************************** 1895* * 1896* This routine performs the operation * 1897* A(i,k,j) <- A(i,j,k) * 1898* * 1899* np = the number of worker nodes * 1900* proc#=0...(np-1) 1901* * 1902* this transpose uses more buffer space * 1903* then transpose2 * 1904***************************************************** 1905 implicit none 1906 integer nbb,op 1907 complex*16 A(*) 1908 complex*16 tmp1(*),tmp2(*) 1909 1910#include "bafdecls.fh" 1911#include "errquit.fh" 1912#include "D3dB.fh" 1913 1914 1915* **** indexing variables **** 1916 integer h_iq_to_i1(2,6,0:3) 1917 integer h_iq_to_i2(2,6,0:3) 1918 integer h_iz_to_i2(2,6,0:3) 1919 integer h_iz_to_i2_count(6,0:3) 1920 integer h_i1_start(2,6,0:3) 1921 integer h_i2_start(2,6,0:3) 1922 common / ptrans_blk_ijk / h_iq_to_i1, 1923 > h_iq_to_i2, 1924 > h_iz_to_i2, 1925 > h_iz_to_i2_count, 1926 > h_i1_start, 1927 > h_i2_start 1928 1929* **** Used to avoid asynchronous communications **** 1930 integer Nchannels(NBLOCKS) 1931 integer channel_proc(2,NBLOCKS) 1932 integer channel_type(2,NBLOCKS) 1933 common / channel_blk / channel_proc,channel_type,Nchannels 1934 1935#include "tcgmsg.fh" 1936#include "msgtypesf.h" 1937 integer rcv_len,rcv_proc 1938 1939 1940* **** local variables *** 1941 integer c,n1,n2,n3 1942 integer it 1943 integer source 1944 integer msglen 1945 integer pfrom,pto 1946 integer taskid,np 1947 1948* **** external functions **** 1949 integer Parallel2d_convert_taskid_i 1950 external Parallel2d_convert_taskid_i 1951 1952 1953 call Parallel2d_taskid_i(taskid) 1954 call Parallel2d_np_i(np) 1955 1956 n1 = int_mb(h_i1_start(1,op,nbb)+np) - 1 1957 n2 = int_mb(h_i2_start(1,op,nbb)+np) - 1 1958 n3 = h_iz_to_i2_count(op,nbb) 1959 1960 1961* **** pack A(i) array **** 1962 call D3dB_pfft_index1_copy(n1,int_mb(h_iq_to_i1(1,op,nbb)),A,tmp1) 1963 1964 1965* **** it = 0, transpose data on same thread **** 1966 msglen = int_mb(h_i2_start(1,op,nbb)+2-1) 1967 > - int_mb(h_i2_start(1,op,nbb)+1-1) 1968 call dcopy(2*msglen, 1969 > tmp1(int_mb(h_i1_start(1,op,nbb))),1, 1970 > tmp2(int_mb(h_i2_start(1,op,nbb))),1) 1971 1972 1973 do c=1,Nchannels(1) 1974* **** receive packed array data **** 1975 if (int_mb(channel_type(1,1)+c-1) .eq. 1) then 1976 pfrom=int_mb(channel_proc(1,1)+c-1) 1977 it = mod((taskid+np-pfrom),np) 1978 1979 source=pfrom 1980 msglen = (int_mb(h_i2_start(1,op,nbb)+it+2-1) 1981 > - int_mb(h_i2_start(1,op,nbb)+it+1-1)) 1982 1983 if (msglen.gt.0) then 1984 call RCV(9+MSGDBL, 1985 > tmp2(int_mb(h_i2_start(1,op,nbb)+it+1-1)), 1986 > mdtob(2*msglen),rcv_len, 1987 > Parallel2d_convert_taskid_i(source), 1988 > rcv_proc,1) 1989 end if 1990 end if 1991 1992* **** send packed array to other processors **** 1993 if (int_mb(channel_type(1,1)+c-1) .eq. 0) then 1994 pto=int_mb(channel_proc(1,1)+c-1) 1995 it = mod((pto-taskid+np),np) 1996 1997 msglen = (int_mb(h_i1_start(1,op,nbb)+it+2-1) 1998 > - int_mb(h_i1_start(1,op,nbb)+it+1-1)) 1999 2000 if (msglen.gt.0) then 2001 call SND(9+MSGDBL, 2002 > tmp1(int_mb(h_i1_start(1,op,nbb)+it+1-1)), 2003 > mdtob(2*msglen), 2004 > Parallel2d_convert_taskid_i(pto), 2005 > 1) 2006 end if 2007 end if 2008 2009 end do 2010 2011* **** unpack A(i) array **** 2012 call D3dB_pfft_index2_copy(n2,int_mb(h_iq_to_i2(1,op,nbb)),tmp2,A) 2013 call D3dB_pfft_index2_zero(n3,int_mb(h_iz_to_i2(1,op,nbb)),A) 2014 2015 2016 return 2017 end 2018 2019 2020* *********************************** 2021* * * 2022* * D3dB_channel_init * 2023* * * 2024* *********************************** 2025 2026 subroutine D3dB_channel_init(nb) 2027 implicit none 2028 integer nb 2029 2030#include "bafdecls.fh" 2031#include "errquit.fh" 2032#include "D3dB.fh" 2033 2034 2035* **** Used to avoid asynchronous communications **** 2036 integer Nchannels(NBLOCKS) 2037 integer channel_proc(2,NBLOCKS) 2038 integer channel_type(2,NBLOCKS) 2039 common / channel_blk / channel_proc,channel_type,Nchannels 2040 integer pair1(2),pair2(2) 2041 integer pair_step(2),pair_tmp(2) 2042 integer step,Nstep,icount,jcount,jcount_max 2043 2044* **** local variables **** 2045 integer np,taskid 2046 integer i,j,k 2047 logical value 2048 2049 2050 call Parallel2d_taskid_i(taskid) 2051 call Parallel2d_np_i(np) 2052* 2053* **** Define Channels - which are used to avoid **** 2054* **** asynchronous communications **** 2055 2056 value = BA_alloc_get(mt_int,(2*np), 2057 > 'channel_proc',channel_proc(2,nb),channel_proc(1,nb)) 2058 value = value.and. 2059 > BA_alloc_get(mt_int,(2*np), 2060 > 'channel_type',channel_type(2,nb),channel_type(1,nb)) 2061 if (.not. value) 2062 > call errquit('D3dB_channel_init:out of heap memory',0, MA_ERR) 2063 2064 2065 2066 value = BA_push_get(mt_int,(np*(np-1)/2), 2067 > 'pair1',pair1(2),pair1(1)) 2068 value = value.and. 2069 > BA_push_get(mt_int,(np*(np-1)/2), 2070 > 'pair2',pair2(2),pair2(1)) 2071 value = value.and. 2072 > BA_push_get(mt_int,(np*(np-1)/2), 2073 > 'pair_step',pair_step(2),pair_step(1)) 2074 value = value.and. 2075 > BA_push_get(mt_int,(np*(np-1)/2), 2076 > 'pair_tmp',pair_tmp(2),pair_tmp(1)) 2077 if (.not. value) 2078 > call errquit('D3dB_channel_init:out of stack memory',0, MA_ERR) 2079 2080* *** define pair1,pair2 **** 2081 icount = 0 2082 do i=0, (np-1) 2083 do j=(i+1), (np-1) 2084 icount = icount + 1 2085 int_mb(pair1(1)+icount-1) = i 2086 int_mb(pair2(1)+icount-1) = j 2087 end do 2088 end do 2089 2090* **** define pair_step **** 2091 do i=1,(np*(np-1)/2) 2092 int_mb(pair_step(1)+i-1) = (-1) 2093 end do 2094 2095 step=0 2096 jcount = 0 2097 jcount_max = np*(np-1)/2 2098c do while(.not. full_ps(int_mb(pair_step(1)),np)) 2099 do while(jcount.lt.jcount_max) 2100 step=step+1 2101 2102 icount = 0 2103 do i=1, (np*(np-1)/2) 2104 if (int_mb(pair_step(1)+i-1).eq.(-1)) then 2105 value=.true. 2106 do k=1,icount 2107 j = int_mb(pair_tmp(1)+k-1) 2108 2109 if 2110 > ((int_mb(pair1(1)+i-1).eq.int_mb(pair1(1)+j-1)).or. 2111 > (int_mb(pair1(1)+i-1).eq.int_mb(pair2(1)+j-1)).or. 2112 > (int_mb(pair2(1)+i-1).eq.int_mb(pair1(1)+j-1)).or. 2113 > (int_mb(pair2(1)+i-1).eq.int_mb(pair2(1)+j-1))) 2114 > value=.false. 2115 2116 end do 2117 if (value) then 2118 int_mb(pair_step(1)+i-1) = step 2119 icount = icount + 1 2120 jcount = jcount + 1 2121 int_mb(pair_tmp(1)+icount-1) = i 2122 end if 2123 end if 2124 end do 2125 end do 2126 Nstep=step 2127 2128 2129* **** define channels *** 2130 Nchannels(nb)=0 2131 do step=1,Nstep 2132 do i=1,(np*(np-1)/2) 2133 if (int_mb(pair_step(1)+i-1).eq.step) then 2134* **** send then recv **** 2135 if (int_mb(pair1(1)+i-1).eq.taskid) then 2136 Nchannels(nb)=Nchannels(nb)+1 2137 int_mb(channel_proc(1,nb)+Nchannels(nb)-1) 2138 > = int_mb(pair2(1)+i-1) 2139 int_mb(channel_type(1,nb)+Nchannels(nb)-1) = 0 2140 Nchannels(nb)=Nchannels(nb)+1 2141 int_mb(channel_proc(1,nb)+Nchannels(nb)-1) 2142 > = int_mb(pair2(1)+i-1) 2143 int_mb(channel_type(1,nb)+Nchannels(nb)-1) = 1 2144 end if 2145 2146* **** recv then send **** 2147 if (int_mb(pair2(1)+i-1).eq.taskid) then 2148 Nchannels(nb)=Nchannels(nb)+1 2149 int_mb(channel_proc(1,nb)+Nchannels(nb)-1) 2150 > = int_mb(pair1(1)+i-1) 2151 int_mb(channel_type(1,nb)+Nchannels(nb)-1) = 1 2152 Nchannels(nb)=Nchannels(nb)+1 2153 int_mb(channel_proc(1,nb)+Nchannels(nb)-1) 2154 > = int_mb(pair1(1)+i-1) 2155 int_mb(channel_type(1,nb)+Nchannels(nb)-1) = 0 2156 end if 2157 end if 2158 end do 2159 end do 2160 2161 2162 value= BA_pop_stack(pair_tmp(2)) 2163 value=value.and.BA_pop_stack(pair_step(2)) 2164 value=value.and.BA_pop_stack(pair2(2)) 2165 value=value.and.BA_pop_stack(pair1(2)) 2166 if (.not. value) 2167 > call errquit('D3dB_channel_init:popping stack',0, MA_ERR) 2168 2169 return 2170 end 2171 2172c logical function full_ps(ps,np) 2173c implicit none 2174c integer ps(*) 2175c integer np 2176c 2177c integer i 2178c logical value 2179c 2180c value=.true. 2181c do i=1,(np*(np-1)/2) 2182c if (ps(i).eq.(-1)) value=.false. 2183c end do 2184c 2185c full_ps=value 2186c return 2187c end 2188 2189* *********************************** 2190* * * 2191* * D3dB_(c,r,t)_read * 2192* * * 2193* *********************************** 2194 2195 subroutine D3dB_c_read_pio(nb,iunit,A,tmp,jcol) 2196 implicit none 2197 integer nb 2198 integer iunit 2199 complex*16 A(*) 2200 complex*16 tmp(*) 2201 integer jcol 2202 2203#include "bafdecls.fh" 2204#include "errquit.fh" 2205 2206#include "D3dB.fh" 2207 2208#include "tcgmsg.fh" 2209#include "msgtypesf.h" 2210 integer rcv_len,rcv_proc 2211 2212 2213* *** local variables *** 2214 logical value,fillcolumn,readcolumn 2215 integer MASTER,taskid,taskid_i 2216 parameter(MASTER=0) 2217 integer p_to, p_here,q 2218 integer index,k,j 2219 integer source,msglen 2220 integer tmp1(2),tmp2(2) 2221 2222 integer taskid_j,np_j 2223 integer ii,jj,jstart,jend 2224 2225* **** external functions **** 2226 integer Parallel2d_convert_taskid_ij 2227 external Parallel2d_convert_taskid_ij 2228 2229 call Parallel_taskid(taskid) 2230 call Parallel2d_taskid_i(taskid_i) 2231 call Parallel2d_taskid_j(taskid_j) 2232 2233c call Parallel2d_np_j(np_j) 2234c call Parallel2d_taskid_j(taskid_j) 2235 if (jcol.lt.0) then 2236c jstart = 0 2237c jend = np_j-1 2238 fillcolumn = .true. 2239 readcolumn = .true. 2240 else 2241c jstart = jcol 2242c jend = jcol 2243 fillcolumn = (taskid_j.eq.jcol) 2244 readcolumn = (taskid_j.eq.jcol) 2245 end if 2246 2247 if (readcolumn) then 2248 2249 2250 !********************** 2251 !**** slab mapping **** 2252 !********************** 2253 if (mapping.eq.1) then 2254* **** master node reads from file and distributes **** 2255 if (taskid_i.eq.MASTER) then 2256 do k=1,nz(nb) 2257 2258 call dread(iunit,tmp,(nx(nb)+2)*ny(nb)) 2259 2260 call D3dB_ktoqp(nb,k,q,ii) 2261 !do jj=jstart,jend 2262 p_to = Parallel2d_convert_taskid_ij(ii,taskid_j) 2263 !p_to = Parallel2d_convert_taskid_ij(ii,jj) 2264 if (ii.eq.MASTER) then 2265 index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1 2266 call dcopy((nx(nb)+2)*ny(nb),tmp,1,A(index),1) 2267 else 2268 msglen = (nx(nb)/2+1)*ny(nb) 2269 call SND(9+MSGDBL,tmp,mdtob(2*msglen),p_to,1) 2270 end if 2271 !end do 2272 end do 2273 2274* **** not master node **** 2275 else if (fillcolumn) then 2276 do k=1,nz(nb) 2277 call D3dB_ktoqp(nb,k,q,ii) 2278 p_here = Parallel2d_convert_taskid_ij(ii,taskid_j) 2279 if (p_here.eq.taskid) then 2280 2281 msglen = (nx(nb)/2+1)*ny(nb) 2282 !source = MASTER 2283 source = Parallel2d_convert_taskid_ij(MASTER,taskid_j) 2284 2285 call RCV(9+MSGDBL,tmp,mdtob(2*msglen),rcv_len, 2286 > source,rcv_proc,1) 2287 2288 2289 index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1 2290 call dcopy((nx(nb)+2)*ny(nb),tmp,1,A(index),1) 2291 2292 end if 2293 end do 2294 end if 2295 2296 !************************* 2297 !**** hilbert mapping **** 2298 !************************* 2299 else 2300 2301* **** master node reads from file and distributes **** 2302 if (taskid_i.eq.MASTER) then 2303 do k=1,nz(nb) 2304 do j=1,ny(nb) 2305 2306 call dread(iunit,tmp,(nx(nb)+2)) 2307 2308 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 2309 ii = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 2310 !do jj=jstart,jend 2311 p_to = Parallel2d_convert_taskid_ij(ii,taskid_j) 2312 !p_to = Parallel2d_convert_taskid_ij(ii,jj) 2313 2314 if (ii.eq.MASTER) then 2315 index = (q-1)*(nx(nb)/2+1) + 1 2316 call dcopy((nx(nb)+2),tmp,1,A(index),1) 2317 else 2318 msglen = (nx(nb)/2+1) 2319 call SND(9+MSGDBL,tmp,mdtob(2*msglen),p_to,1) 2320 end if 2321 !end do 2322 end do 2323 end do 2324 2325* **** not master node **** 2326 else if (fillcolumn) then 2327 do k=1,nz(nb) 2328 do j=1,ny(nb) 2329 2330 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 2331 ii = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 2332 p_here = Parallel2d_convert_taskid_ij(ii,taskid_j) 2333 if (p_here.eq.taskid) then 2334 msglen = (nx(nb)/2+1) 2335 !source = MASTER 2336 source = Parallel2d_convert_taskid_ij(MASTER,taskid_j) 2337 call RCV(9+MSGDBL,tmp,mdtob(2*msglen),rcv_len, 2338 > source,rcv_proc,1) 2339 2340 index = (q-1)*(nx(nb)/2+1) + 1 2341 call dcopy((nx(nb)+2),tmp,1,A(index),1) 2342 end if 2343 end do 2344 end do 2345 end if 2346 2347 2348* **** allocate temporary space **** 2349 value = BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp1', 2350 > tmp1(2),tmp1(1)) 2351 value = value.and. 2352 > BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp2',tmp2(2),tmp2(1)) 2353 if (.not. value) call errquit('out of stack memory',0, MA_ERR) 2354* 2355 call D3dB_c_transpose_ijk(nb,5,A, 2356 > dcpl_mb(tmp1(1)), 2357 > dcpl_mb(tmp2(1))) !*** map1to3 operation *** 2358 2359* **** deallocate temporary space **** 2360 value = BA_pop_stack(tmp2(2)) 2361 value = value.and.BA_pop_stack(tmp1(2)) 2362 if (.not. value) call errquit('error popping stack',0, MA_ERR) 2363 2364 end if 2365 2366 2367 !*** shift filepointer by (nx(nb)+2)*ny(nb)*nz(nb) doubles **** 2368 else 2369 if (taskid_i.eq.MASTER) 2370 > call dshift_fileptr(iunit,(nx(nb)+2)*ny(nb)*nz(nb)) 2371 end if !*** readcolumn *** 2372 2373 2374 2375 return 2376 end 2377 2378 2379 2380 subroutine D3dB_r_read_pio(nb,iunit,A,tmp,jcol) 2381 implicit none 2382 integer nb 2383 integer iunit 2384 real*8 A(*) 2385 real*8 tmp(*) 2386 integer jcol 2387 2388#include "bafdecls.fh" 2389#include "D3dB.fh" 2390 2391#include "tcgmsg.fh" 2392#include "msgtypesf.h" 2393 integer rcv_len,rcv_proc 2394 2395 2396* *** local variables *** 2397 logical fillcolumn,readcolumn 2398 integer MASTER,taskid,taskid_i 2399 parameter(MASTER=0) 2400 integer p_to, p_here,q 2401 integer j,k,index,index2 2402 integer source,msglen 2403 2404 integer taskid_j,np_j 2405 integer ii,jj,jstart,jend 2406 2407* **** external functions **** 2408 integer Parallel2d_convert_taskid_ij 2409 external Parallel2d_convert_taskid_ij 2410 2411 call Parallel_taskid(taskid) 2412 call Parallel2d_taskid_i(taskid_i) 2413 call Parallel2d_taskid_j(taskid_j) 2414 2415c call Parallel2d_np_j(np_j) 2416 if (jcol.lt.0) then 2417c jstart = 0 2418c jend = np_j-1 2419 fillcolumn = .true. 2420 readcolumn = .true. 2421 else 2422c jstart = jcol 2423c jend = jcol 2424 fillcolumn = (taskid_j.eq.jcol) 2425 readcolumn = (taskid_j.eq.jcol) 2426 end if 2427 2428 if (readcolumn) then 2429 2430 !********************** 2431 !**** slab mapping **** 2432 !********************** 2433 if (mapping.eq.1) then 2434* **** master node reads from file and distributes **** 2435 if (taskid_i.eq.MASTER) then 2436 do k=1,nz(nb) 2437 2438 call dread(iunit,tmp,(nx(nb))*ny(nb)) 2439 2440 call D3dB_ktoqp(nb,k,q,ii) 2441 !do jj=jstart,jend 2442 p_to = Parallel2d_convert_taskid_ij(ii,taskid_j) 2443 if (p_to.eq.MASTER) then 2444 do j=1,ny(nb) 2445 index = (q-1)*(nx(nb)+2)*ny(nb) 2446 > + (j-1)*(nx(nb)+2) + 1 2447 index2 = (j-1)*nx(nb) + 1 2448 call dcopy(nx(nb),tmp(index2),1,A(index),1) 2449 A(index+nx(nb)) = 0.0d0 2450 A(index+nx(nb)+1) = 0.0d0 2451 end do 2452 else 2453 msglen = (nx(nb))*ny(nb) 2454 call SND(9+MSGDBL,tmp,mdtob(msglen),p_to,1) 2455 end if 2456 !end do 2457 end do 2458 2459* **** not master node **** 2460 else if (fillcolumn) then 2461 do k=1,nz(nb) 2462 2463 call D3dB_ktoqp(nb,k,q,ii) 2464 p_here = Parallel2d_convert_taskid_ij(ii,taskid_j) 2465 if (p_here.eq.taskid) then 2466 2467 msglen = (nx(nb))*ny(nb) 2468 !source = MASTER 2469 source = Parallel2d_convert_taskid_ij(MASTER,taskid_j) 2470 2471 call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len, 2472 > source,rcv_proc,1) 2473 do j=1,ny(nb) 2474 index = (q-1)*(nx(nb)+2)*ny(nb) 2475 > + (j-1)*(nx(nb)+2) + 1 2476 index2 = (j-1)*nx(nb) + 1 2477 call dcopy(nx(nb),tmp(index2),1,A(index),1) 2478 A(index+nx(nb)) = 0.0d0 2479 A(index+nx(nb)+1) = 0.0d0 2480 end do 2481 end if 2482 end do 2483 end if 2484 2485 2486 2487 !************************* 2488 !**** hilbert mapping **** 2489 !************************* 2490 else 2491* **** master node reads from file and distributes **** 2492 if (taskid.eq.MASTER) then 2493 do k=1,nz(nb) 2494 do j=1,ny(nb) 2495 2496 call dread(iunit,tmp,(nx(nb))) 2497 2498 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 2499 ii = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 2500 !do jj=jstart,jend 2501 p_to = Parallel2d_convert_taskid_ij(ii,taskid_j) 2502 !p_to = Parallel2d_convert_taskid_ij(ii,jj) 2503 if (p_to.eq.MASTER) then 2504 index = (q-1)*(nx(nb)+2) + 1 2505 call dcopy((nx(nb)+2),tmp,1,A(index),1) 2506 else 2507 msglen = (nx(nb)+2) 2508 call SND(9+MSGDBL,tmp,mdtob(msglen),p_to,1) 2509 end if 2510 !end do 2511 end do 2512 end do 2513 2514* **** not master node **** 2515 else if (fillcolumn) then 2516 do k=1,nz(nb) 2517 do j=1,ny(nb) 2518 2519 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 2520 ii = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 2521 p_here = Parallel2d_convert_taskid_ij(ii,taskid_j) 2522 if (p_here.eq.taskid) then 2523 2524 msglen = (nx(nb)+2) 2525 !source = MASTER 2526 source = Parallel2d_convert_taskid_ij(MASTER,taskid_j) 2527 2528 call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len, 2529 > source,rcv_proc,1) 2530 2531 index = (q-1)*(nx(nb)+2) + 1 2532 call dcopy((nx(nb)+2),tmp,1,A(index),1) 2533 2534 end if 2535 end do 2536 end do 2537 end if 2538 2539 end if 2540 2541 !*** shift filepointer by nx(nb)*ny(nb)*nz(nb) doubles **** 2542 else 2543 if (taskid_i.eq.MASTER) 2544 > call dshift_fileptr(iunit,nx(nb)*ny(nb)*nz(nb)) 2545 end if !*** readcolumn *** 2546 2547 return 2548 end 2549 2550 2551 2552 2553 subroutine D3dB_t_read_pio(nb,iunit,A,tmp,jcol) 2554 implicit none 2555 integer nb 2556 integer iunit 2557 real*8 A(*) 2558 real*8 tmp(*) 2559 integer jcol 2560 2561#include "bafdecls.fh" 2562#include "errquit.fh" 2563#include "D3dB.fh" 2564 2565 2566 2567#include "tcgmsg.fh" 2568#include "msgtypesf.h" 2569 integer rcv_len,rcv_proc 2570 2571 2572* *** local variables *** 2573 integer MASTER,taskid,taskid_i 2574 parameter(MASTER=0) 2575 2576 logical value,fillcolumn,readcolumn 2577 integer p_to, p_here,q 2578 integer j,k,index 2579 integer source,msglen 2580 integer tmp1(2),tmp2(2) 2581 2582 integer taskid_j,np_j 2583 integer ii,jj,jstart,jend 2584 2585* **** external functions **** 2586 integer Parallel2d_convert_taskid_ij 2587 external Parallel2d_convert_taskid_ij 2588 2589 call Parallel_taskid(taskid) 2590 call Parallel2d_taskid_i(taskid_i) 2591 call Parallel2d_taskid_j(taskid_j) 2592 2593c call Parallel2d_np_j(np_j) 2594 if (jcol.lt.0) then 2595c jstart = 0 2596c jend = np_j-1 2597 fillcolumn = .true. 2598 readcolumn = .true. 2599 else 2600c jstart = jcol 2601c jend = jcol 2602 fillcolumn = (taskid_j.eq.jcol) 2603 readcolumn = (taskid_j.eq.jcol) 2604 end if 2605 2606 if (readcolumn) then 2607 2608 2609 if (mapping.eq.1) then 2610* **** master node reads from file and distributes **** 2611 if (taskid_i.eq.MASTER) then 2612 do k=1,nz(nb) 2613 2614 call dread(iunit,tmp,(nx(nb)/2+1)*ny(nb)) 2615 2616 call D3dB_ktoqp(nb,k,q,ii) 2617 !do jj=jstart,jend 2618 2619 p_to = Parallel2d_convert_taskid_ij(ii,taskid_j) 2620 !p_to = Parallel2d_convert_taskid_ij(ii,jj) 2621 if (p_to.eq.MASTER) then 2622 index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1 2623 call dcopy((nx(nb)/2+1)*ny(nb),tmp,1,A(index),1) 2624 else 2625 msglen = (nx(nb)/2+1)*ny(nb) 2626 call SND(9+MSGDBL,tmp,mdtob(msglen),p_to,1) 2627 end if 2628 !end do 2629 end do 2630 2631* **** not master node **** 2632 else if (fillcolumn) then 2633 2634 do k=1,nz(nb) 2635 call D3dB_ktoqp(nb,k,q,ii) 2636 p_here = Parallel2d_convert_taskid_ij(ii,taskid_j) 2637 if (p_here.eq.taskid) then 2638 2639 msglen = (nx(nb)/2+1)*ny(nb) 2640 !source = MASTER 2641 source = Parallel2d_convert_taskid_ij(MASTER,taskid_j) 2642 2643 call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len, 2644 > source,rcv_proc,1) 2645 2646 2647 index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1 2648 call dcopy((nx(nb)/2+1)*ny(nb),tmp,1,A(index),1) 2649 2650 end if 2651 end do 2652 end if 2653 2654 2655 !************************* 2656 !**** hilbert mapping **** 2657 !************************* 2658 else 2659* **** master node reads from file and distributes **** 2660 if (taskid_i.eq.MASTER) then 2661 do k=1,nz(nb) 2662 do j=1,ny(nb) 2663 2664 call dread(iunit,tmp,(nx(nb)/2+1)) 2665 2666 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 2667 ii = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 2668 !do jj=jstart,jend 2669 2670 p_to = Parallel2d_convert_taskid_ij(ii,taskid_j) 2671 !p_to = Parallel2d_convert_taskid_ij(ii,jj) 2672 if (ii.eq.MASTER) then 2673 index = (q-1)*(nx(nb)/2+1) + 1 2674 call dcopy((nx(nb)/2+1),tmp,1,A(index),1) 2675 else 2676 msglen = (nx(nb)/2+1) 2677 call SND(9+MSGDBL,tmp,mdtob(msglen),p_to,1) 2678 end if 2679 !end do 2680 end do 2681 end do 2682 2683* **** not master node **** 2684 else if (fillcolumn) then 2685 do k=1,nz(nb) 2686 do j=1,ny(nb) 2687 2688 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 2689 ii = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 2690 p_here = Parallel2d_convert_taskid_ij(ii,taskid_j) 2691 if (p_here.eq.taskid) then 2692 2693 msglen = (nx(nb)/2+1) 2694 !source = MASTER 2695 source = Parallel2d_convert_taskid_ij(MASTER,taskid_j) 2696 2697 call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len, 2698 > source,rcv_proc,1) 2699 2700 2701 index = (q-1)*(nx(nb)/2+1) + 1 2702 call dcopy((nx(nb)/2+1),tmp,1,A(index),1) 2703 2704 end if 2705 end do 2706 end do 2707 end if 2708 2709* **** allocate temporary space **** 2710 value = BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp1', 2711 > tmp1(2),tmp1(1)) 2712 value = value.and. 2713 > BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp2',tmp2(2),tmp2(1)) 2714 if (.not. value) call errquit('out of stack memory',0, MA_ERR) 2715* 2716 call D3dB_t_transpose_ijk(nb,5,A, 2717 > dcpl_mb(tmp1(1)), 2718 > dcpl_mb(tmp2(1))) !*** map1to3 operation *** 2719 2720* **** deallocate temporary space **** 2721 value = BA_pop_stack(tmp2(2)) 2722 value = value.and.BA_pop_stack(tmp1(2)) 2723 if (.not. value) call errquit('error popping stack',0, MA_ERR) 2724 2725 2726 2727 end if 2728 2729 2730 !*** shift filepointer by (nx(nb)/2+1)*ny(nb)*nz(nb) doubles **** 2731 else 2732 if (taskid_i.eq.MASTER) 2733 > call dshift_fileptr(iunit,(nx(nb)/2+1)*ny(nb)*nz(nb)) 2734 end if !*** readcolumn *** 2735 2736 return 2737 end 2738 2739 2740 2741 2742* *********************************** 2743* * * 2744* * D3dB_(c,r,t)_write * 2745* * * 2746* *********************************** 2747 2748 subroutine D3dB_c_write_pio(nb,iunit,A,tmp,jcol) 2749 implicit none 2750 integer nb 2751 integer iunit 2752 complex*16 A(*) 2753 complex*16 tmp(*) 2754 integer jcol 2755 2756 2757#include "bafdecls.fh" 2758#include "errquit.fh" 2759#include "D3dB.fh" 2760 2761 2762#include "tcgmsg.fh" 2763#include "msgtypesf.h" 2764 integer rcv_len,rcv_proc 2765 2766 2767* *** local variables *** 2768 logical writecolumn 2769 integer MASTER,taskid,taskid_i 2770 parameter(MASTER=0) 2771 logical value 2772 integer p_from, p_here,q 2773 integer j,k,index 2774 integer dest,source,status,msglen 2775 integer dum,dum_msglen 2776 integer tmp1(2),tmp2(2) 2777 2778 integer ii,taskid_j 2779 2780* **** external functions **** 2781 integer Parallel2d_convert_taskid_ij 2782 external Parallel2d_convert_taskid_ij 2783 2784 call Parallel_taskid(taskid) 2785 call Parallel2d_taskid_i(taskid_i) 2786 call Parallel2d_taskid_j(taskid_j) 2787 writecolumn = (taskid_j.eq.jcol) 2788 2789 if (writecolumn) then 2790 2791 !********************** 2792 !**** slab mapping **** 2793 !********************** 2794 if (mapping.eq.1) then 2795* **** master node reads from file and distributes **** 2796 if (taskid_i.eq.MASTER) then 2797 do k=1,nz(nb) 2798 2799 call D3dB_ktoqp(nb,k,q,ii) 2800 2801 p_from = Parallel2d_convert_taskid_ij(ii,taskid_j) 2802 !p_from = Parallel2d_convert_taskid_ij(ii,jcol) 2803 if (ii.eq.MASTER) then 2804 2805 index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1 2806 call dcopy((nx(nb)+2)*ny(nb),A(index),1,tmp,1) 2807 2808 else 2809 2810 msglen = (nx(nb)/2+1)*ny(nb) 2811 status = msglen 2812 source = p_from 2813 2814 dum = 99 2815 dum_msglen = 1 2816 call SND(9+MSGINT,dum,mitob(dum_msglen),source,1) 2817 call RCV(9+MSGDBL,tmp,mdtob(2*msglen),rcv_len, 2818 > source,rcv_proc,1) 2819 2820 end if 2821 2822 call dwrite(iunit,tmp,(nx(nb)+2)*ny(nb)) 2823 end do 2824 2825* **** not master node **** 2826 else 2827 do k=1,nz(nb) 2828 2829 call D3dB_ktoqp(nb,k,q,ii) 2830 p_here = Parallel2d_convert_taskid_ij(ii,taskid_j) 2831 if (p_here.eq.taskid) then 2832 2833 index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1 2834 call dcopy((nx(nb)+2)*ny(nb),A(index),1,tmp,1) 2835 2836 2837 msglen = (nx(nb)/2+1)*ny(nb) 2838 !dest = MASTER 2839 dest = Parallel2d_convert_taskid_ij(MASTER,taskid_j) 2840 2841 dum_msglen = 1 2842 call RCV(9+MSGINT,dum,mitob(dum_msglen),rcv_len, 2843 > dest,rcv_proc,1) 2844 call SND(9+MSGDBL,tmp,mdtob(2*msglen),dest,1) 2845 2846 end if 2847 2848 end do 2849 end if 2850 2851 2852 !************************* 2853 !**** hilbert mapping **** 2854 !************************* 2855 else 2856 2857 if (taskid_j.eq.jcol) then 2858 2859* **** allocate temporary space **** 2860 value = BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp1', 2861 > tmp1(2),tmp1(1)) 2862 value = value.and. 2863 > BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp2',tmp2(2),tmp2(1)) 2864 if (.not. value) call errquit('out of stack memory',0, MA_ERR) 2865* 2866 call D3dB_c_transpose_ijk(nb,6,A, 2867 > dcpl_mb(tmp1(1)), 2868 > dcpl_mb(tmp2(1))) !*** map3to1 operation *** 2869 2870* **** deallocate temporary space **** 2871 value = BA_pop_stack(tmp2(2)) 2872 value = value.and.BA_pop_stack(tmp1(2)) 2873 if (.not. value) call errquit('error popping stack',0, MA_ERR) 2874 2875 end if 2876 2877 2878* **** master node reads from file and distributes **** 2879 if (taskid_i.eq.MASTER) then 2880 do k=1,nz(nb) 2881 do j=1,ny(nb) 2882 2883 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 2884 ii = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 2885 p_from = Parallel2d_convert_taskid_ij(ii,taskid_j) 2886 !p_from = Parallel2d_convert_taskid_ij(ii,jcol) 2887 if (ii.eq.MASTER) then 2888 2889 index = (q-1)*(nx(nb)/2+1) + 1 2890 call dcopy((nx(nb)+2),A(index),1,tmp,1) 2891 2892 else 2893 2894 msglen = (nx(nb)/2+1) 2895 status = msglen 2896 source = p_from 2897 2898 dum = 99 2899 dum_msglen = 1 2900 call SND(9+MSGINT,dum,mitob(dum_msglen),source,1) 2901 call RCV(9+MSGDBL,tmp,mdtob(2*msglen),rcv_len, 2902 > source,rcv_proc,1) 2903 2904 end if 2905 2906 call dwrite(iunit,tmp,(nx(nb)+2)) 2907 end do 2908 end do 2909 2910* **** not master node **** 2911 else 2912 do k=1,nz(nb) 2913 do j=1,ny(nb) 2914 2915 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 2916 ii = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 2917 p_here = Parallel2d_convert_taskid_ij(ii,taskid_j) 2918 if (p_here.eq.taskid) then 2919 2920 index = (q-1)*(nx(nb)/2+1) + 1 2921 call dcopy((nx(nb)+2),A(index),1,tmp,1) 2922 2923 2924 msglen = (nx(nb)/2+1) 2925 !dest = MASTER 2926 dest = Parallel2d_convert_taskid_ij(MASTER,taskid_j) 2927 2928 dum_msglen = 1 2929 call RCV(9+MSGINT,dum,mitob(dum_msglen),rcv_len, 2930 > dest,rcv_proc,1) 2931 call SND(9+MSGDBL,tmp,mdtob(2*msglen),dest,1) 2932 2933 end if 2934 2935 end do 2936 end do 2937 end if 2938 2939 end if 2940 2941 2942 !*** shift filepointer by (nx(nb)+2)*ny(nb)*nz(nb) doubles **** 2943 else 2944 if (taskid_i.eq.MASTER) 2945 > call dshift_fileptr(iunit,(nx(nb)+2)*ny(nb)*nz(nb)) 2946 end if !*** writecolumn *** 2947 2948 2949 return 2950 end 2951 2952 subroutine D3dB_r_write_pio(nb,iunit,A,tmp,jcol) 2953 implicit none 2954 integer nb 2955 integer iunit 2956 real*8 A(*) 2957 real*8 tmp(*) 2958 integer jcol 2959 2960#include "bafdecls.fh" 2961#include "D3dB.fh" 2962 2963 2964 2965#include "tcgmsg.fh" 2966#include "msgtypesf.h" 2967 integer rcv_len,rcv_proc 2968 2969 2970* *** local variables *** 2971 logical writecolumn 2972 integer MASTER,taskid,taskid_i 2973 parameter(MASTER=0) 2974 integer p_from, p_here,q 2975 integer j,k,index,index2 2976 integer dest,source,status,msglen 2977 2978 integer taskid_j,ii 2979 2980* **** external functions **** 2981 integer Parallel2d_convert_taskid_ij 2982 external Parallel2d_convert_taskid_ij 2983 2984 call Parallel_taskid(taskid) 2985 call Parallel2d_taskid_i(taskid_i) 2986 call Parallel2d_taskid_j(taskid_j) 2987 writecolumn = (taskid_j.eq.jcol) 2988 2989 if (writecolumn) then 2990 2991 !********************** 2992 !**** slab mapping **** 2993 !********************** 2994 if (mapping.eq.1) then 2995* **** master node reads from file and distributes **** 2996 if (taskid_i.eq.MASTER) then 2997 do k=1,nz(nb) 2998 2999 call D3dB_ktoqp(nb,k,q,ii) 3000 p_from = Parallel2d_convert_taskid_ij(ii,taskid_j) 3001 if (ii.eq.MASTER) then 3002 3003 do j=1,ny(nb) 3004 index = (q-1)*(nx(nb)+2)*ny(nb) 3005 > + (j-1)*(nx(nb)+2) + 1 3006 index2 = (j-1)*nx(nb) + 1 3007 call dcopy(nx(nb),A(index),1,tmp(index2),1) 3008 end do 3009 3010 else 3011 3012 msglen = (nx(nb))*ny(nb) 3013 status = msglen 3014 source = p_from 3015 3016 call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len, 3017 > source,rcv_proc,1) 3018 3019 end if 3020 3021 call dwrite(iunit,tmp,(nx(nb))*ny(nb)) 3022 end do 3023 3024* **** not master node **** 3025 else 3026 do k=1,nz(nb) 3027 3028 call D3dB_ktoqp(nb,k,q,ii) 3029 p_here = Parallel2d_convert_taskid_ij(ii,taskid_j) 3030 !p_here = Parallel2d_convert_taskid_ij(ii,jcol) 3031 if (p_here.eq.taskid) then 3032 3033 do j=1,ny(nb) 3034 index = (q-1)*(nx(nb)+2)*ny(nb) 3035 > + (j-1)*(nx(nb)+2) + 1 3036 index2 = (j-1)*nx(nb) + 1 3037 call dcopy(nx(nb),A(index),1,tmp(index2),1) 3038 end do 3039 3040 3041 msglen = (nx(nb))*ny(nb) 3042 !dest = MASTER 3043 dest = Parallel2d_convert_taskid_ij(MASTER,taskid_j) 3044 3045 call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1) 3046 3047 end if 3048 3049 end do 3050 end if 3051 3052 !************************* 3053 !**** hilbert mapping **** 3054 !************************* 3055 else 3056* **** master node reads from file and distributes **** 3057 if (taskid_i.eq.MASTER) then 3058 do k=1,nz(nb) 3059 do j=1,ny(nb) 3060 3061 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 3062 ii = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 3063 p_from = Parallel2d_convert_taskid_ij(ii,taskid_j) 3064 3065 if (ii.eq.MASTER) then 3066 index = (q-1)*(nx(nb)+2) + 1 3067 call dcopy(nx(nb),A(index),1,tmp,1) 3068 3069 else 3070 3071 msglen = (nx(nb)) 3072 status = msglen 3073 source = p_from 3074 3075 call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len, 3076 > source,rcv_proc,1) 3077 3078 end if 3079 3080 call dwrite(iunit,tmp,(nx(nb))) 3081 end do 3082 end do 3083 3084* **** not master node **** 3085 else 3086 do k=1,nz(nb) 3087 do j=1,ny(nb) 3088 3089 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 3090 ii = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 3091 p_here = Parallel2d_convert_taskid_ij(ii,taskid_j) 3092 if (p_here.eq.taskid) then 3093 3094 index = (q-1)*(nx(nb)+2) + 1 3095 call dcopy(nx(nb),A(index),1,tmp,1) 3096 3097 msglen = nx(nb) 3098 dest = MASTER 3099 3100 call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1) 3101 3102 end if 3103 3104 end do 3105 end do 3106 end if 3107 3108 end if 3109 3110 !*** shift filepointer by nx(nb)*ny(nb)*nz(nb) doubles **** 3111 else 3112 if (taskid_i.eq.MASTER) 3113 > call dshift_fileptr(iunit,nx(nb)*ny(nb)*nz(nb)) 3114 end if !*** writecolumn *** 3115 3116 return 3117 end 3118 3119 subroutine D3dB_t_write_pio(nb,iunit,A,tmp,jcol) 3120 implicit none 3121 integer nb 3122 integer iunit 3123 real*8 A(*) 3124 real*8 tmp(*) 3125 integer jcol 3126 3127#include "bafdecls.fh" 3128#include "errquit.fh" 3129#include "D3dB.fh" 3130#include "tcgmsg.fh" 3131#include "msgtypesf.h" 3132 3133 integer rcv_len,rcv_proc 3134 3135 3136* *** local variables *** 3137 logical writecolumn 3138 integer MASTER,taskid,taskid_i 3139 parameter(MASTER=0) 3140 logical value 3141 integer p_from, p_here,q 3142 integer j,k,index 3143 integer dest,source,status,msglen 3144 integer dum,dum_msglen 3145 integer tmp1(2),tmp2(2) 3146 3147 integer ii,taskid_j 3148 3149* **** external functions **** 3150 integer Parallel2d_convert_taskid_ij 3151 external Parallel2d_convert_taskid_ij 3152 3153 call Parallel_taskid(taskid) 3154 call Parallel2d_taskid_i(taskid_i) 3155 call Parallel2d_taskid_j(taskid_j) 3156 writecolumn = (taskid_j.eq.jcol) 3157 3158 if (writecolumn) then 3159 3160 !********************** 3161 !**** slab mapping **** 3162 !********************** 3163 if (mapping.eq.1) then 3164* **** master node reads from file and distributes **** 3165 if (taskid_i.eq.MASTER) then 3166 do k=1,nz(nb) 3167 3168 call D3dB_ktoqp(nb,k,q,ii) 3169 3170 p_from = Parallel2d_convert_taskid_ij(ii,taskid_j) 3171 if (ii.eq.MASTER) then 3172 3173 index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1 3174 call dcopy((nx(nb)/2+1)*ny(nb),A(index),1,tmp,1) 3175 3176 else 3177 3178 msglen = (nx(nb)/2+1)*ny(nb) 3179 status = msglen 3180 source = p_from 3181 3182 dum = 99 3183 dum_msglen = 1 3184 call SND(9+MSGINT,dum,mitob(dum_msglen),source,1) 3185 call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len, 3186 > source,rcv_proc,1) 3187 3188 end if 3189 3190 call dwrite(iunit,tmp,(nx(nb)/2+1)*ny(nb)) 3191 end do 3192 3193* **** not master node **** 3194 else 3195 do k=1,nz(nb) 3196 3197 call D3dB_ktoqp(nb,k,q,ii) 3198 p_here = Parallel2d_convert_taskid_ij(ii,taskid_j) 3199 if (p_here.eq.taskid) then 3200 3201 index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1 3202 call dcopy((nx(nb)/2+1)*ny(nb),A(index),1,tmp,1) 3203 3204 3205 msglen = (nx(nb)/2+1)*ny(nb) 3206 !dest = MASTER 3207 dest = Parallel2d_convert_taskid_ij(MASTER,taskid_j) 3208 3209 dum_msglen = 1 3210 call RCV(9+MSGINT,dum,mitob(dum_msglen),rcv_len, 3211 > dest,rcv_proc,1) 3212 call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1) 3213 3214 end if 3215 3216 end do 3217 end if 3218 3219 3220 !************************* 3221 !**** hilbert mapping **** 3222 !************************* 3223 else 3224 3225 if (taskid_j.eq.jcol) then 3226 3227* **** allocate temporary space **** 3228 value = BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp1', 3229 > tmp1(2),tmp1(1)) 3230 value = value.and. 3231 > BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp2',tmp2(2),tmp2(1)) 3232 if (.not. value) call errquit('out of stack memory',0, MA_ERR) 3233* 3234 call D3dB_t_transpose_ijk(nb,6,A, 3235 > dcpl_mb(tmp1(1)), 3236 > dcpl_mb(tmp2(1))) !*** map3to1 operation *** 3237 3238* **** deallocate temporary space **** 3239 value = BA_pop_stack(tmp2(2)) 3240 value = value.and.BA_pop_stack(tmp1(2)) 3241 if (.not. value) call errquit('error popping stack',0, MA_ERR) 3242 3243 end if 3244 3245 3246* **** master node reads from file and distributes **** 3247 if (taskid_i.eq.MASTER) then 3248 do k=1,nz(nb) 3249 do j=1,ny(nb) 3250 3251 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 3252 ii = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 3253 p_from = Parallel2d_convert_taskid_ij(ii,taskid_j) 3254 if (ii.eq.MASTER) then 3255 3256 index = (q-1)*(nx(nb)/2+1) + 1 3257 call dcopy((nx(nb)/2+1),A(index),1,tmp,1) 3258 3259 else 3260 3261 msglen = (nx(nb)/2+1) 3262 status = msglen 3263 source = p_from 3264 3265 dum = 99 3266 dum_msglen = 1 3267 call SND(9+MSGINT,dum,mitob(dum_msglen),source,1) 3268 call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len, 3269 > source,rcv_proc,1) 3270 3271 end if 3272 3273 call dwrite(iunit,tmp,(nx(nb)/2+1)) 3274 end do 3275 end do 3276 3277* **** not master node **** 3278 else 3279 do k=1,nz(nb) 3280 do j=1,ny(nb) 3281 3282 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 3283 ii = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 3284 p_here = Parallel2d_convert_taskid_ij(ii,taskid_j) 3285 if (p_here.eq.taskid) then 3286 3287 index = (q-1)*(nx(nb)/2+1) + 1 3288 call dcopy((nx(nb)/2+1),A(index),1,tmp,1) 3289 3290 3291 msglen = (nx(nb)/2+1) 3292 !dest = MASTER 3293 dest = Parallel2d_convert_taskid_ij(MASTER,taskid_j) 3294 3295 dum_msglen = 1 3296 call RCV(9+MSGINT,dum,mitob(dum_msglen),rcv_len, 3297 > dest,rcv_proc,1) 3298 call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1) 3299 3300 end if 3301 3302 end do 3303 end do 3304 end if 3305 end if 3306 3307 !*** shift filepointer by (nx(nb)/2+1)*ny(nb)*nz(nb) doubles **** 3308 else 3309 if (taskid_i.eq.MASTER) 3310 > call dshift_fileptr(iunit,(nx(nb)/2+1)*ny(nb)*nz(nb)) 3311 end if !*** writecolumn *** 3312 3313 3314 return 3315 end 3316 3317 3318 3319 3320 subroutine D3dB_c_read(nb,iunit,A,tmp,jcol) 3321 implicit none 3322 integer nb 3323 integer iunit 3324 complex*16 A(*) 3325 complex*16 tmp(*) 3326 integer jcol 3327 3328#include "bafdecls.fh" 3329#include "errquit.fh" 3330 3331#include "D3dB.fh" 3332 3333#include "tcgmsg.fh" 3334#include "msgtypesf.h" 3335 integer rcv_len,rcv_proc 3336 3337 3338* *** local variables *** 3339 logical value,fillcolumn 3340 integer MASTER,taskid 3341 parameter(MASTER=0) 3342 integer p_to, p_here,q 3343 integer index,k,j 3344 integer source,msglen 3345 integer tmp1(2),tmp2(2) 3346 3347 integer taskid_j,np_j 3348 integer ii,jj,jstart,jend 3349 3350* **** external functions **** 3351 integer Parallel2d_convert_taskid_ij 3352 external Parallel2d_convert_taskid_ij 3353 3354 call Parallel_taskid(taskid) 3355 call Parallel2d_taskid_j(taskid_j) 3356 call Parallel2d_np_j(np_j) 3357 if (jcol.lt.0) then 3358 jstart = 0 3359 jend = np_j-1 3360 fillcolumn = .true. 3361 else 3362 jstart = jcol 3363 jend = jcol 3364 fillcolumn = (taskid_j.eq.jcol) 3365 end if 3366 3367 3368 3369 !********************** 3370 !**** slab mapping **** 3371 !********************** 3372 if (mapping.eq.1) then 3373* **** master node reads from file and distributes **** 3374 if (taskid.eq.MASTER) then 3375 do k=1,nz(nb) 3376 3377 call dread(iunit,tmp,(nx(nb)+2)*ny(nb)) 3378 3379 call D3dB_ktoqp(nb,k,q,ii) 3380 do jj=jstart,jend 3381 p_to = Parallel2d_convert_taskid_ij(ii,jj) 3382 if (p_to.eq.MASTER) then 3383 index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1 3384 call dcopy((nx(nb)+2)*ny(nb),tmp,1,A(index),1) 3385 else 3386 msglen = (nx(nb)/2+1)*ny(nb) 3387 call SND(9+MSGDBL,tmp,mdtob(2*msglen),p_to,1) 3388 end if 3389 end do 3390 end do 3391 3392* **** not master node **** 3393 else if (fillcolumn) then 3394 do k=1,nz(nb) 3395 call D3dB_ktoqp(nb,k,q,ii) 3396 p_here = Parallel2d_convert_taskid_ij(ii,taskid_j) 3397 if (p_here.eq.taskid) then 3398 3399 msglen = (nx(nb)/2+1)*ny(nb) 3400 source = MASTER 3401 3402 call RCV(9+MSGDBL,tmp,mdtob(2*msglen),rcv_len, 3403 > source,rcv_proc,1) 3404 index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1 3405 call dcopy((nx(nb)+2)*ny(nb),tmp,1,A(index),1) 3406 3407 end if 3408 end do 3409 end if 3410 3411 !************************* 3412 !**** hilbert mapping **** 3413 !************************* 3414 else 3415 3416* **** master node reads from file and distributes **** 3417 if (taskid.eq.MASTER) then 3418 do k=1,nz(nb) 3419 do j=1,ny(nb) 3420 3421 call dread(iunit,tmp,(nx(nb)+2)) 3422 3423 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 3424 ii = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 3425 do jj=jstart,jend 3426 p_to = Parallel2d_convert_taskid_ij(ii,jj) 3427 3428 if (p_to.eq.MASTER) then 3429 index = (q-1)*(nx(nb)/2+1) + 1 3430 call dcopy((nx(nb)+2),tmp,1,A(index),1) 3431 else 3432 msglen = (nx(nb)/2+1) 3433 call SND(9+MSGDBL,tmp,mdtob(2*msglen),p_to,1) 3434 end if 3435 end do 3436 end do 3437 end do 3438 3439* **** not master node **** 3440 else if (fillcolumn) then 3441 do k=1,nz(nb) 3442 do j=1,ny(nb) 3443 3444 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 3445 ii = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 3446 p_here = Parallel2d_convert_taskid_ij(ii,taskid_j) 3447 if (p_here.eq.taskid) then 3448 msglen = (nx(nb)/2+1) 3449 source = MASTER 3450 call RCV(9+MSGDBL,tmp,mdtob(2*msglen),rcv_len, 3451 > source,rcv_proc,1) 3452 3453 index = (q-1)*(nx(nb)/2+1) + 1 3454 call dcopy((nx(nb)+2),tmp,1,A(index),1) 3455 end if 3456 end do 3457 end do 3458 end if 3459 3460 3461* **** allocate temporary space **** 3462 value = BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp1', 3463 > tmp1(2),tmp1(1)) 3464 value = value.and. 3465 > BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp2',tmp2(2),tmp2(1)) 3466 if (.not. value) call errquit('out of stack memory',0, MA_ERR) 3467* 3468 call D3dB_c_transpose_ijk(nb,5,A, 3469 > dcpl_mb(tmp1(1)), 3470 > dcpl_mb(tmp2(1))) !*** map1to3 operation *** 3471 3472* **** deallocate temporary space **** 3473 value = BA_pop_stack(tmp2(2)) 3474 value = value.and.BA_pop_stack(tmp1(2)) 3475 if (.not. value) call errquit('error popping stack',0, MA_ERR) 3476 3477 end if 3478 3479 3480 return 3481 end 3482 3483 3484 3485 subroutine D3dB_r_read(nb,iunit,A,tmp,jcol) 3486 implicit none 3487 integer nb 3488 integer iunit 3489 real*8 A(*) 3490 real*8 tmp(*) 3491 integer jcol 3492 3493#include "bafdecls.fh" 3494#include "D3dB.fh" 3495 3496#include "tcgmsg.fh" 3497#include "msgtypesf.h" 3498 integer rcv_len,rcv_proc 3499 3500 3501* *** local variables *** 3502 logical fillcolumn 3503 integer MASTER,taskid 3504 parameter(MASTER=0) 3505 integer p_to, p_here,q 3506 integer j,k,index,index2 3507 integer source,msglen 3508 3509 integer taskid_j,np_j 3510 integer ii,jj,jstart,jend 3511 3512* **** external functions **** 3513 integer Parallel2d_convert_taskid_ij 3514 external Parallel2d_convert_taskid_ij 3515 3516 call Parallel_taskid(taskid) 3517 call Parallel2d_taskid_j(taskid_j) 3518 call Parallel2d_np_j(np_j) 3519 if (jcol.lt.0) then 3520 jstart = 0 3521 jend = np_j-1 3522 fillcolumn = .true. 3523 else 3524 jstart = jcol 3525 jend = jcol 3526 fillcolumn = (taskid_j.eq.jcol) 3527 end if 3528 3529 3530 !********************** 3531 !**** slab mapping **** 3532 !********************** 3533 if (mapping.eq.1) then 3534* **** master node reads from file and distributes **** 3535 if (taskid.eq.MASTER) then 3536 do k=1,nz(nb) 3537 3538 call dread(iunit,tmp,(nx(nb))*ny(nb)) 3539 3540 call D3dB_ktoqp(nb,k,q,ii) 3541 do jj=jstart,jend 3542 p_to = Parallel2d_convert_taskid_ij(ii,jj) 3543 if (p_to.eq.MASTER) then 3544 do j=1,ny(nb) 3545 index = (q-1)*(nx(nb)+2)*ny(nb) 3546 > + (j-1)*(nx(nb)+2) + 1 3547 index2 = (j-1)*nx(nb) + 1 3548 call dcopy(nx(nb),tmp(index2),1,A(index),1) 3549 A(index+nx(nb)) = 0.0d0 3550 A(index+nx(nb)+1) = 0.0d0 3551 end do 3552 else 3553 msglen = (nx(nb))*ny(nb) 3554 call SND(9+MSGDBL,tmp,mdtob(msglen),p_to,1) 3555 end if 3556 end do 3557 end do 3558 3559* **** not master node **** 3560 else if (fillcolumn) then 3561 do k=1,nz(nb) 3562 3563 call D3dB_ktoqp(nb,k,q,ii) 3564 p_here = Parallel2d_convert_taskid_ij(ii,taskid_j) 3565 if (p_here.eq.taskid) then 3566 3567 msglen = (nx(nb))*ny(nb) 3568 source = MASTER 3569 3570 call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len, 3571 > source,rcv_proc,1) 3572 do j=1,ny(nb) 3573 index = (q-1)*(nx(nb)+2)*ny(nb) 3574 > + (j-1)*(nx(nb)+2) + 1 3575 index2 = (j-1)*nx(nb) + 1 3576 call dcopy(nx(nb),tmp(index2),1,A(index),1) 3577 A(index+nx(nb)) = 0.0d0 3578 A(index+nx(nb)+1) = 0.0d0 3579 end do 3580 end if 3581 end do 3582 end if 3583 3584 3585 3586 !************************* 3587 !**** hilbert mapping **** 3588 !************************* 3589 else 3590* **** master node reads from file and distributes **** 3591 if (taskid.eq.MASTER) then 3592 do k=1,nz(nb) 3593 do j=1,ny(nb) 3594 3595 call dread(iunit,tmp,(nx(nb))) 3596 3597 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 3598 ii = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 3599 do jj=jstart,jend 3600 p_to = Parallel2d_convert_taskid_ij(ii,jj) 3601 if (p_to.eq.MASTER) then 3602 index = (q-1)*(nx(nb)+2) + 1 3603 call dcopy((nx(nb)+2),tmp,1,A(index),1) 3604 else 3605 msglen = (nx(nb)+2) 3606 call SND(9+MSGDBL,tmp,mdtob(msglen),p_to,1) 3607 end if 3608 end do 3609 end do 3610 end do 3611 3612* **** not master node **** 3613 else if (fillcolumn) then 3614 do k=1,nz(nb) 3615 do j=1,ny(nb) 3616 3617 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 3618 ii = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 3619 p_here = Parallel2d_convert_taskid_ij(ii,taskid_j) 3620 if (p_here.eq.taskid) then 3621 3622 msglen = (nx(nb)+2) 3623 source = MASTER 3624 3625 call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len, 3626 > source,rcv_proc,1) 3627 3628 index = (q-1)*(nx(nb)+2) + 1 3629 call dcopy((nx(nb)+2),tmp,1,A(index),1) 3630 3631 end if 3632 end do 3633 end do 3634 end if 3635 3636 end if 3637 3638 return 3639 end 3640 3641 3642 3643 subroutine D3dB_t_read(nb,iunit,A,tmp,jcol) 3644 implicit none 3645 integer nb 3646 integer iunit 3647 real*8 A(*) 3648 real*8 tmp(*) 3649 integer jcol 3650 3651#include "bafdecls.fh" 3652#include "errquit.fh" 3653#include "D3dB.fh" 3654 3655 3656 3657#include "tcgmsg.fh" 3658#include "msgtypesf.h" 3659 integer rcv_len,rcv_proc 3660 3661 3662* *** local variables *** 3663 integer MASTER,taskid 3664 parameter(MASTER=0) 3665 3666 logical value,fillcolumn 3667 integer p_to, p_here,q 3668 integer j,k,index 3669 integer source,msglen 3670 integer tmp1(2),tmp2(2) 3671 3672 integer taskid_j,np_j 3673 integer ii,jj,jstart,jend 3674 3675* **** external functions **** 3676 integer Parallel2d_convert_taskid_ij 3677 external Parallel2d_convert_taskid_ij 3678 3679 call Parallel_taskid(taskid) 3680 call Parallel2d_taskid_j(taskid_j) 3681 call Parallel2d_np_j(np_j) 3682 if (jcol.lt.0) then 3683 jstart = 0 3684 jend = np_j-1 3685 fillcolumn = .true. 3686 else 3687 jstart = jcol 3688 jend = jcol 3689 fillcolumn = (taskid_j.eq.jcol) 3690 end if 3691 3692 3693 3694 if (mapping.eq.1) then 3695* **** master node reads from file and distributes **** 3696 if (taskid.eq.MASTER) then 3697 do k=1,nz(nb) 3698 3699 call dread(iunit,tmp,(nx(nb)/2+1)*ny(nb)) 3700 3701 call D3dB_ktoqp(nb,k,q,ii) 3702 !do jj=jstart,jend 3703 3704 p_to = Parallel2d_convert_taskid_ij(ii,jj) 3705 if (p_to.eq.MASTER) then 3706 index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1 3707 call dcopy((nx(nb)/2+1)*ny(nb),tmp,1,A(index),1) 3708 else 3709 msglen = (nx(nb)/2+1)*ny(nb) 3710 call SND(9+MSGDBL,tmp,mdtob(msglen),p_to,1) 3711 end if 3712 !end do 3713 end do 3714 3715* **** not master node **** 3716 else if (fillcolumn) then 3717 3718 do k=1,nz(nb) 3719 call D3dB_ktoqp(nb,k,q,ii) 3720 p_here = Parallel2d_convert_taskid_ij(ii,taskid_j) 3721 if (p_here.eq.taskid) then 3722 3723 msglen = (nx(nb)/2+1)*ny(nb) 3724 source = MASTER 3725 call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len, 3726 > source,rcv_proc,1) 3727 3728 3729 index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1 3730 call dcopy((nx(nb)/2+1)*ny(nb),tmp,1,A(index),1) 3731 3732 end if 3733 end do 3734 end if 3735 3736 3737 !************************* 3738 !**** hilbert mapping **** 3739 !************************* 3740 else 3741* **** master node reads from file and distributes **** 3742 if (taskid.eq.MASTER) then 3743 do k=1,nz(nb) 3744 do j=1,ny(nb) 3745 3746 call dread(iunit,tmp,(nx(nb)/2+1)) 3747 3748 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 3749 ii = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 3750 do jj=jstart,jend 3751 3752 p_to = Parallel2d_convert_taskid_ij(ii,jj) 3753 if (p_to.eq.MASTER) then 3754 index = (q-1)*(nx(nb)/2+1) + 1 3755 call dcopy((nx(nb)/2+1),tmp,1,A(index),1) 3756 else 3757 msglen = (nx(nb)/2+1) 3758 call SND(9+MSGDBL,tmp,mdtob(msglen),p_to,1) 3759 end if 3760 end do 3761 end do 3762 end do 3763 3764* **** not master node **** 3765 else if (fillcolumn) then 3766 do k=1,nz(nb) 3767 do j=1,ny(nb) 3768 3769 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 3770 ii = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 3771 p_here = Parallel2d_convert_taskid_ij(ii,taskid_j) 3772 if (p_here.eq.taskid) then 3773 3774 msglen = (nx(nb)/2+1) 3775 source = MASTER 3776 3777 call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len, 3778 > source,rcv_proc,1) 3779 3780 index = (q-1)*(nx(nb)/2+1) + 1 3781 call dcopy((nx(nb)/2+1),tmp,1,A(index),1) 3782 3783 end if 3784 end do 3785 end do 3786 end if 3787 3788* **** allocate temporary space **** 3789 value = BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp1', 3790 > tmp1(2),tmp1(1)) 3791 value = value.and. 3792 > BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp2',tmp2(2),tmp2(1)) 3793 if (.not. value) call errquit('out of stack memory',0, MA_ERR) 3794* 3795 call D3dB_t_transpose_ijk(nb,5,A, 3796 > dcpl_mb(tmp1(1)), 3797 > dcpl_mb(tmp2(1))) !*** map1to3 operation *** 3798 3799* **** deallocate temporary space **** 3800 value = BA_pop_stack(tmp2(2)) 3801 value = value.and.BA_pop_stack(tmp1(2)) 3802 if (.not. value) call errquit('error popping stack',0, MA_ERR) 3803 3804 3805 3806 end if 3807 3808 3809 return 3810 end 3811 3812 3813 3814 3815* *********************************** 3816* * * 3817* * D3dB_(c,r,t)_write * 3818* * * 3819* *********************************** 3820 3821 subroutine D3dB_c_write(nb,iunit,A,tmp,jcol) 3822 implicit none 3823 integer nb 3824 integer iunit 3825 complex*16 A(*) 3826 complex*16 tmp(*) 3827 integer jcol 3828 3829 3830#include "bafdecls.fh" 3831#include "errquit.fh" 3832#include "D3dB.fh" 3833 3834 3835#include "tcgmsg.fh" 3836#include "msgtypesf.h" 3837 integer rcv_len,rcv_proc 3838 3839 3840* *** local variables *** 3841 integer MASTER,taskid 3842 parameter(MASTER=0) 3843 logical value 3844 integer p_from, p_here,q 3845 integer j,k,index 3846 integer dest,source,status,msglen 3847 integer dum,dum_msglen 3848 integer tmp1(2),tmp2(2) 3849 3850 integer ii,taskid_j 3851 3852* **** external functions **** 3853 integer Parallel2d_convert_taskid_ij 3854 external Parallel2d_convert_taskid_ij 3855 3856 call Parallel_taskid(taskid) 3857 call Parallel2d_taskid_j(taskid_j) 3858 3859 3860 !********************** 3861 !**** slab mapping **** 3862 !********************** 3863 if (mapping.eq.1) then 3864* **** master node reads from file and distributes **** 3865 if (taskid.eq.MASTER) then 3866 do k=1,nz(nb) 3867 3868 call D3dB_ktoqp(nb,k,q,ii) 3869 3870 p_from = Parallel2d_convert_taskid_ij(ii,jcol) 3871 if (p_from.eq.MASTER) then 3872 3873 index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1 3874 call dcopy((nx(nb)+2)*ny(nb),A(index),1,tmp,1) 3875 3876 else 3877 3878 msglen = (nx(nb)/2+1)*ny(nb) 3879 status = msglen 3880 source = p_from 3881 3882 dum = 99 3883 dum_msglen = 1 3884 call SND(9+MSGINT,dum,mitob(dum_msglen),source,1) 3885 call RCV(9+MSGDBL,tmp,mdtob(2*msglen),rcv_len, 3886 > source,rcv_proc,1) 3887 3888 end if 3889 3890 call dwrite(iunit,tmp,(nx(nb)+2)*ny(nb)) 3891 end do 3892 3893* **** not master node **** 3894 else 3895 do k=1,nz(nb) 3896 3897 call D3dB_ktoqp(nb,k,q,ii) 3898 p_here = Parallel2d_convert_taskid_ij(ii,taskid_j) 3899 if (p_here.eq.taskid) then 3900 3901 index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1 3902 call dcopy((nx(nb)+2)*ny(nb),A(index),1,tmp,1) 3903 3904 3905 msglen = (nx(nb)/2+1)*ny(nb) 3906 dest = MASTER 3907 3908 dum_msglen = 1 3909 call RCV(9+MSGINT,dum,mitob(dum_msglen),rcv_len, 3910 > dest,rcv_proc,1) 3911 call SND(9+MSGDBL,tmp,mdtob(2*msglen),dest,1) 3912 3913 end if 3914 3915 end do 3916 end if 3917 3918 3919 !************************* 3920 !**** hilbert mapping **** 3921 !************************* 3922 else 3923 3924 if (taskid_j.eq.jcol) then 3925 3926* **** allocate temporary space **** 3927 value = BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp1', 3928 > tmp1(2),tmp1(1)) 3929 value = value.and. 3930 > BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp2',tmp2(2),tmp2(1)) 3931 if (.not. value) call errquit('out of stack memory',0, MA_ERR) 3932* 3933 call D3dB_c_transpose_ijk(nb,6,A, 3934 > dcpl_mb(tmp1(1)), 3935 > dcpl_mb(tmp2(1))) !*** map3to1 operation *** 3936 3937* **** deallocate temporary space **** 3938 value = BA_pop_stack(tmp2(2)) 3939 value = value.and.BA_pop_stack(tmp1(2)) 3940 if (.not. value) call errquit('error popping stack',0, MA_ERR) 3941 3942 end if 3943 3944 3945* **** master node reads from file and distributes **** 3946 if (taskid.eq.MASTER) then 3947 do k=1,nz(nb) 3948 do j=1,ny(nb) 3949 3950 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 3951 ii = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 3952 p_from = Parallel2d_convert_taskid_ij(ii,jcol) 3953 if (p_from.eq.MASTER) then 3954 3955 index = (q-1)*(nx(nb)/2+1) + 1 3956 call dcopy((nx(nb)+2),A(index),1,tmp,1) 3957 3958 else 3959 3960 msglen = (nx(nb)/2+1) 3961 status = msglen 3962 source = p_from 3963 3964 dum = 99 3965 dum_msglen = 1 3966 call SND(9+MSGINT,dum,mitob(dum_msglen),source,1) 3967 call RCV(9+MSGDBL,tmp,mdtob(2*msglen),rcv_len, 3968 > source,rcv_proc,1) 3969 3970 end if 3971 3972 call dwrite(iunit,tmp,(nx(nb)+2)) 3973 end do 3974 end do 3975 3976* **** not master node **** 3977 else 3978 do k=1,nz(nb) 3979 do j=1,ny(nb) 3980 3981 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 3982 ii = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 3983 p_here = Parallel2d_convert_taskid_ij(ii,taskid_j) 3984 if (p_here.eq.taskid) then 3985 3986 index = (q-1)*(nx(nb)/2+1) + 1 3987 call dcopy((nx(nb)+2),A(index),1,tmp,1) 3988 3989 msglen = (nx(nb)/2+1) 3990 dest = MASTER 3991 3992 dum_msglen = 1 3993 call RCV(9+MSGINT,dum,mitob(dum_msglen),rcv_len, 3994 > dest,rcv_proc,1) 3995 call SND(9+MSGDBL,tmp,mdtob(2*msglen),dest,1) 3996 3997 end if 3998 3999 end do 4000 end do 4001 end if 4002 4003 end if 4004 4005 4006 4007 return 4008 end 4009 4010 subroutine D3dB_r_write(nb,iunit,A,tmp,jcol) 4011 implicit none 4012 integer nb 4013 integer iunit 4014 real*8 A(*) 4015 real*8 tmp(*) 4016 integer jcol 4017 4018#include "bafdecls.fh" 4019#include "D3dB.fh" 4020 4021#include "tcgmsg.fh" 4022#include "msgtypesf.h" 4023 integer rcv_len,rcv_proc 4024 4025 4026* *** local variables *** 4027 integer MASTER,taskid 4028 parameter(MASTER=0) 4029 integer p_from, p_here,q 4030 integer j,k,index,index2 4031 integer dest,source,status,msglen 4032 4033 integer taskid_j,ii 4034 4035* **** external functions **** 4036 integer Parallel2d_convert_taskid_ij 4037 external Parallel2d_convert_taskid_ij 4038 4039 call Parallel_taskid(taskid) 4040 call Parallel2d_taskid_j(taskid_j) 4041 4042 4043 !********************** 4044 !**** slab mapping **** 4045 !********************** 4046 if (mapping.eq.1) then 4047* **** master node reads from file and distributes **** 4048 if (taskid.eq.MASTER) then 4049 do k=1,nz(nb) 4050 4051 call D3dB_ktoqp(nb,k,q,ii) 4052 p_from = Parallel2d_convert_taskid_ij(ii,taskid_j) 4053 if (p_from.eq.MASTER) then 4054 4055 do j=1,ny(nb) 4056 index = (q-1)*(nx(nb)+2)*ny(nb) 4057 > + (j-1)*(nx(nb)+2) + 1 4058 index2 = (j-1)*nx(nb) + 1 4059 call dcopy(nx(nb),A(index),1,tmp(index2),1) 4060 end do 4061 4062 else 4063 4064 msglen = (nx(nb))*ny(nb) 4065 status = msglen 4066 source = p_from 4067 4068 call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len, 4069 > source,rcv_proc,1) 4070 4071 end if 4072 4073 call dwrite(iunit,tmp,(nx(nb))*ny(nb)) 4074 end do 4075 4076* **** not master node **** 4077 else 4078 do k=1,nz(nb) 4079 4080 call D3dB_ktoqp(nb,k,q,ii) 4081 p_here = Parallel2d_convert_taskid_ij(ii,jcol) 4082 if (p_here.eq.taskid) then 4083 4084 do j=1,ny(nb) 4085 index = (q-1)*(nx(nb)+2)*ny(nb) 4086 > + (j-1)*(nx(nb)+2) + 1 4087 index2 = (j-1)*nx(nb) + 1 4088 call dcopy(nx(nb),A(index),1,tmp(index2),1) 4089 end do 4090 4091 4092 msglen = (nx(nb))*ny(nb) 4093 dest = MASTER 4094 4095 call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1) 4096 4097 end if 4098 4099 end do 4100 end if 4101 4102 !************************* 4103 !**** hilbert mapping **** 4104 !************************* 4105 else 4106* **** master node reads from file and distributes **** 4107 if (taskid.eq.MASTER) then 4108 do k=1,nz(nb) 4109 do j=1,ny(nb) 4110 4111 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 4112 ii = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 4113 p_from = Parallel2d_convert_taskid_ij(ii,taskid_j) 4114 4115 if (p_from.eq.MASTER) then 4116 index = (q-1)*(nx(nb)+2) + 1 4117 call dcopy(nx(nb),A(index),1,tmp,1) 4118 4119 else 4120 4121 msglen = (nx(nb)) 4122 status = msglen 4123 source = p_from 4124 4125 call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len, 4126 > source,rcv_proc,1) 4127 4128 end if 4129 4130 call dwrite(iunit,tmp,(nx(nb))) 4131 end do 4132 end do 4133 4134* **** not master node **** 4135 else 4136 do k=1,nz(nb) 4137 do j=1,ny(nb) 4138 4139 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 4140 ii = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 4141 p_here = Parallel2d_convert_taskid_ij(ii,taskid_j) 4142 if (p_here.eq.taskid) then 4143 4144 index = (q-1)*(nx(nb)+2) + 1 4145 call dcopy(nx(nb),A(index),1,tmp,1) 4146 4147 msglen = nx(nb) 4148 dest = MASTER 4149 4150 call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1) 4151 4152 end if 4153 4154 end do 4155 end do 4156 end if 4157 4158 end if 4159 4160 return 4161 end 4162 4163 subroutine D3dB_t_write(nb,iunit,A,tmp,jcol) 4164 implicit none 4165 integer nb 4166 integer iunit 4167 real*8 A(*) 4168 real*8 tmp(*) 4169 integer jcol 4170 4171#include "bafdecls.fh" 4172#include "errquit.fh" 4173#include "D3dB.fh" 4174#include "tcgmsg.fh" 4175#include "msgtypesf.h" 4176 4177 integer rcv_len,rcv_proc 4178 4179 4180* *** local variables *** 4181 integer MASTER,taskid 4182 parameter(MASTER=0) 4183 logical value 4184 integer p_from, p_here,q 4185 integer j,k,index 4186 integer dest,source,status,msglen 4187 integer dum,dum_msglen 4188 integer tmp1(2),tmp2(2) 4189 4190 integer ii,taskid_j 4191 4192* **** external functions **** 4193 integer Parallel2d_convert_taskid_ij 4194 external Parallel2d_convert_taskid_ij 4195 4196 call Parallel_taskid(taskid) 4197 call Parallel2d_taskid_j(taskid_j) 4198 4199 4200 !********************** 4201 !**** slab mapping **** 4202 !********************** 4203 if (mapping.eq.1) then 4204* **** master node reads from file and distributes **** 4205 if (taskid.eq.MASTER) then 4206 do k=1,nz(nb) 4207 4208 call D3dB_ktoqp(nb,k,q,ii) 4209 4210 p_from = Parallel2d_convert_taskid_ij(ii,taskid_j) 4211 if (p_from.eq.MASTER) then 4212 4213 index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1 4214 call dcopy((nx(nb)/2+1)*ny(nb),A(index),1,tmp,1) 4215 4216 else 4217 4218 msglen = (nx(nb)/2+1)*ny(nb) 4219 status = msglen 4220 source = p_from 4221 4222 dum = 99 4223 dum_msglen = 1 4224 call SND(9+MSGINT,dum,mitob(dum_msglen),source,1) 4225 call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len, 4226 > source,rcv_proc,1) 4227 4228 end if 4229 4230 call dwrite(iunit,tmp,(nx(nb)/2+1)*ny(nb)) 4231 end do 4232 4233* **** not master node **** 4234 else 4235 do k=1,nz(nb) 4236 4237 call D3dB_ktoqp(nb,k,q,ii) 4238 p_here = Parallel2d_convert_taskid_ij(ii,taskid_j) 4239 if (p_here.eq.taskid) then 4240 4241 index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1 4242 call dcopy((nx(nb)/2+1)*ny(nb),A(index),1,tmp,1) 4243 4244 4245 msglen = (nx(nb)/2+1)*ny(nb) 4246 dest = MASTER 4247 4248 dum_msglen = 1 4249 call RCV(9+MSGINT,dum,mitob(dum_msglen),rcv_len, 4250 > dest,rcv_proc,1) 4251 call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1) 4252 4253 end if 4254 4255 end do 4256 end if 4257 4258 4259 !************************* 4260 !**** hilbert mapping **** 4261 !************************* 4262 else 4263 4264 if (taskid_j.eq.jcol) then 4265 4266* **** allocate temporary space **** 4267 value = BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp1', 4268 > tmp1(2),tmp1(1)) 4269 value = value.and. 4270 > BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp2',tmp2(2),tmp2(1)) 4271 if (.not. value) call errquit('out of stack memory',0, MA_ERR) 4272* 4273 call D3dB_t_transpose_ijk(nb,6,A, 4274 > dcpl_mb(tmp1(1)), 4275 > dcpl_mb(tmp2(1))) !*** map3to1 operation *** 4276 4277* **** deallocate temporary space **** 4278 value = BA_pop_stack(tmp2(2)) 4279 value = value.and.BA_pop_stack(tmp1(2)) 4280 if (.not. value) call errquit('error popping stack',0, MA_ERR) 4281 4282 end if 4283 4284 4285* **** master node reads from file and distributes **** 4286 if (taskid.eq.MASTER) then 4287 do k=1,nz(nb) 4288 do j=1,ny(nb) 4289 4290 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 4291 ii = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 4292 p_from = Parallel2d_convert_taskid_ij(ii,taskid_j) 4293 if (p_from.eq.MASTER) then 4294 4295 index = (q-1)*(nx(nb)/2+1) + 1 4296 call dcopy((nx(nb)/2+1),A(index),1,tmp,1) 4297 4298 else 4299 4300 msglen = (nx(nb)/2+1) 4301 status = msglen 4302 source = p_from 4303 4304 dum = 99 4305 dum_msglen = 1 4306 call SND(9+MSGINT,dum,mitob(dum_msglen),source,1) 4307 call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len, 4308 > source,rcv_proc,1) 4309 4310 end if 4311 4312 call dwrite(iunit,tmp,(nx(nb)/2+1)) 4313 end do 4314 end do 4315 4316* **** not master node **** 4317 else 4318 do k=1,nz(nb) 4319 do j=1,ny(nb) 4320 4321 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 4322 ii = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 4323 p_here = Parallel2d_convert_taskid_ij(ii,taskid_j) 4324 if (p_here.eq.taskid) then 4325 4326 index = (q-1)*(nx(nb)/2+1) + 1 4327 call dcopy((nx(nb)/2+1),A(index),1,tmp,1) 4328 4329 4330 msglen = (nx(nb)/2+1) 4331 dest = MASTER 4332 4333 dum_msglen = 1 4334 call RCV(9+MSGINT,dum,mitob(dum_msglen),rcv_len, 4335 > dest,rcv_proc,1) 4336 call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1) 4337 4338 end if 4339 4340 end do 4341 end do 4342 end if 4343 end if 4344 4345 4346 return 4347 end 4348 4349 4350 4351* *********************************** 4352* * * 4353* * D3dB_r_FormatWrite * 4354* * * 4355* *********************************** 4356 4357 subroutine D3dB_r_FormatWrite(nb,iunit,A,tmp) 4358 implicit none 4359 integer nb 4360 integer iunit 4361 real*8 A(*) 4362 real*8 tmp(*) 4363 4364#include "bafdecls.fh" 4365#include "D3dB.fh" 4366 4367 4368#include "tcgmsg.fh" 4369#include "msgtypesf.h" 4370 integer rcv_len,rcv_proc 4371 4372 4373* *** local variables *** 4374 integer MASTER,taskid 4375 parameter(MASTER=0) 4376 integer p_from, p_here,q 4377 integer i,j,k,index,index2 4378 integer dest,source,status,msglen 4379 4380 call Parallel_taskid(taskid) 4381 4382 !********************** 4383 !**** slab mapping **** 4384 !********************** 4385 if (mapping.eq.1) then 4386* **** master node reads from file and distributes **** 4387 if (taskid.eq.MASTER) then 4388 do k=1,nz(nb) 4389 4390 call D3dB_ktoqp(nb,k,q,p_from) 4391 4392 if (p_from.eq.MASTER) then 4393 4394 do j=1,ny(nb) 4395 index = (q-1)*(nx(nb)+2)*ny(nb) 4396 > + (j-1)*(nx(nb)+2) + 1 4397 index2 = (j-1)*nx(nb) + 1 4398 call dcopy(nx(nb),A(index),1,tmp(index2),1) 4399 end do 4400 4401 else 4402 4403 msglen = (nx(nb))*ny(nb) 4404 status = msglen 4405 source = p_from 4406 4407 call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len, 4408 > source,rcv_proc,1) 4409 4410 end if 4411 4412c **** call dwrite(iunit,tmp,(nx(nb))*ny(nb)) **** 4413 do j=1,ny(nb) 4414 write(iunit,'(3E26.14)') (tmp(i+(j-1)*nx(nb)), i=1,nx(nb)) 4415 end do 4416 4417 end do 4418 4419* **** not master node **** 4420 else 4421 do k=1,nz(nb) 4422 4423 call D3dB_ktoqp(nb,k,q,p_here) 4424 if (p_here.eq.taskid) then 4425 4426 do j=1,ny(nb) 4427 index = (q-1)*(nx(nb)+2)*ny(nb) 4428 > + (j-1)*(nx(nb)+2) + 1 4429 index2 = (j-1)*nx(nb) + 1 4430 call dcopy(nx(nb),A(index),1,tmp(index2),1) 4431 end do 4432 4433 4434 msglen = (nx(nb))*ny(nb) 4435 dest = MASTER 4436 4437 call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1) 4438 4439 end if 4440 4441 end do 4442 end if 4443 4444 4445 4446 !************************* 4447 !**** hilbert mapping **** 4448 !************************* 4449 else 4450 4451 4452* **** master node reads from file and distributes **** 4453 if (taskid.eq.MASTER) then 4454 do k=1,nz(nb) 4455 do j=1,ny(nb) 4456 4457 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 4458 p_from = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 4459 4460 4461 if (p_from.eq.MASTER) then 4462 index = (q-1)*(nx(nb)+2) + 1 4463 call dcopy(nx(nb),A(index),1,tmp,1) 4464 4465 else 4466 4467 msglen = (nx(nb)) 4468 status = msglen 4469 source = p_from 4470 4471 call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len, 4472 > source,rcv_proc,1) 4473 4474 end if 4475 4476 !call dwrite(iunit,tmp,(nx(nb))) 4477 write(iunit,'(3E26.14)') (tmp(i), i=1,nx(nb)) 4478 end do 4479 end do 4480 4481* **** not master node **** 4482 else 4483 do k=1,nz(nb) 4484 do j=1,ny(nb) 4485 4486 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 4487 p_here = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 4488 4489 if (p_here.eq.taskid) then 4490 4491 index = (q-1)*(nx(nb)+2) + 1 4492 call dcopy(nx(nb),A(index),1,tmp,1) 4493 4494 msglen = nx(nb) 4495 dest = MASTER 4496 4497 call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1) 4498 4499 end if 4500 4501 end do 4502 end do 4503 end if 4504 4505 4506 4507 end if 4508 4509* **** wait **** 4510 return 4511 end 4512 4513 4514* ******************************************* 4515* * * 4516* * D3dB_r_FormatWrite_reverse * 4517* * * 4518* ******************************************* 4519 4520 subroutine D3dB_r_FormatWrite_reverse(nb,iunit,A,tmp) 4521 implicit none 4522 integer nb 4523 integer iunit 4524 real*8 A(*) 4525 real*8 tmp(*) 4526 4527#include "D3dB.fh" 4528 4529#include "tcgmsg.fh" 4530#include "msgtypesf.h" 4531 integer rcv_len,rcv_proc 4532 4533 4534* *** local variables *** 4535 integer MASTER,taskid 4536 parameter(MASTER=0) 4537 integer p_from, p_here,q 4538 integer i,j,k,index 4539 integer dest,source,status,msglen,idum 4540 4541 call Parallel_taskid(taskid) 4542 4543 !********************** 4544 !**** slab mapping **** 4545 !********************** 4546 if (mapping.eq.1) then 4547* **** master node reads from file and distributes **** 4548 if (taskid.eq.MASTER) then 4549 do i=1,nx(nb) 4550 do j=1,ny(nb) 4551 4552 do k=1,nz(nb) 4553 call D3dB_ktoqp(nb,k,q,p_from) 4554 if (p_from.eq.MASTER) then 4555 index = (q-1)*(nx(nb)+2)*ny(nb) 4556 > + (j-1)*(nx(nb)+2) + i 4557 tmp(k) = A(index) 4558 else 4559 msglen = 1 4560 status = msglen 4561 source = p_from 4562 4563 call SND(9+MSGINT,idum,mitob(msglen),source,1) 4564 call RCV(9+MSGDBL,tmp(k),mdtob(msglen),rcv_len, 4565 > source,rcv_proc,1) 4566 4567 end if 4568 end do 4569 write(iunit,'(6E13.5)') (tmp(k), k=1,nz(nb)) 4570 4571 end do 4572 end do 4573 4574* **** not master node **** 4575 else 4576 do i=1,nx(nb) 4577 do j=1,ny(nb) 4578 4579 do k=1,nz(nb) 4580 call D3dB_ktoqp(nb,k,q,p_here) 4581 if (p_here.eq.taskid) then 4582 4583 index = (q-1)*(nx(nb)+2)*ny(nb) 4584 > + (j-1)*(nx(nb)+2) + i 4585 tmp(1) = A(index) 4586 4587 msglen = 1 4588 dest = MASTER 4589 4590 call RCV(9+MSGINT,idum,mitob(msglen),rcv_len, 4591 > dest,rcv_proc,1) 4592 call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1) 4593 4594 end if 4595 end do 4596 4597 end do 4598 end do 4599 end if 4600 4601 4602 !************************* 4603 !**** hilbert mapping **** 4604 !************************* 4605 else 4606 4607* **** master node reads from file and distributes **** 4608 if (taskid.eq.MASTER) then 4609 do i=1,nx(nb) 4610 do j=1,ny(nb) 4611 4612 do k=1,nz(nb) 4613 call D3dB_ijktoindex2p(nb,i,j,k,index,p_from) 4614 if (p_from.eq.MASTER) then 4615 tmp(k) = A(index) 4616 else 4617 msglen = 1 4618 status = msglen 4619 source = p_from 4620 4621 call SND(9+MSGINT,idum,mitob(msglen),source,1) 4622 call RCV(9+MSGDBL,tmp(k),mdtob(msglen),rcv_len, 4623 > source,rcv_proc,1) 4624 4625 end if 4626 end do 4627 write(iunit,'(6E13.5)') (tmp(k), k=1,nz(nb)) 4628 4629 end do 4630 end do 4631 4632* **** not master node **** 4633 else 4634 do i=1,nx(nb) 4635 do j=1,ny(nb) 4636 4637 do k=1,nz(nb) 4638 call D3dB_ijktoindex2p(nb,i,j,k,index,p_here) 4639 if (p_here.eq.taskid) then 4640 4641 tmp(1) = A(index) 4642 4643 msglen = 1 4644 dest = MASTER 4645 4646 call RCV(9+MSGINT,idum,mitob(msglen),rcv_len, 4647 > dest,rcv_proc,1) 4648 call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1) 4649 4650 end if 4651 end do 4652 4653 end do 4654 end do 4655 end if 4656 4657 end if 4658 4659* **** wait **** 4660 return 4661 end 4662 4663* *********************************** 4664* * * 4665* * D3dB_r_FormatWrite_paw * 4666* * * 4667* *********************************** 4668 4669 subroutine D3dB_r_FormatWrite_paw(nb,iunit,A,tmp) 4670 implicit none 4671 integer nb 4672 integer iunit 4673 real*8 A(*) 4674 real*8 tmp(*) 4675 4676#include "D3dB.fh" 4677 4678 4679#include "tcgmsg.fh" 4680#include "msgtypesf.h" 4681 integer rcv_len,rcv_proc 4682 4683 4684* *** local variables *** 4685 integer MASTER,taskid 4686 parameter(MASTER=0) 4687 integer p_from, p_here,q 4688 integer i,j,k,index 4689 integer dest,source,status,msglen 4690 4691 call Parallel_taskid(taskid) 4692 4693* **** master node reads from file and distributes **** 4694 if (taskid.eq.MASTER) then 4695 do k=1,nz(nb) 4696 do j=1,ny(nb) 4697 4698 call D3dB_ktoqp(nb,j,q,p_from) 4699 4700 if (p_from.eq.MASTER) then 4701 4702 index = (q-1)*(nx(nb)+2)*ny(nb) 4703 > + (k-1)*(nx(nb)+2) + 1 4704 call dcopy(nx(nb),A(index),1,tmp,1) 4705 4706 else 4707 4708 msglen = (nx(nb)) 4709 status = msglen 4710 source = p_from 4711 4712 call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len, 4713 > source,rcv_proc,1) 4714 4715 end if 4716 4717c **** call dwrite(iunit,tmp,(nx(nb))) **** 4718 write(iunit,'(3E26.14)') (tmp(i), i=1,nx(nb)) 4719 4720 end do 4721 end do 4722 4723* **** not master node **** 4724 else 4725 do k=1,nz(nb) 4726 do j=1,ny(nb) 4727 4728 call D3dB_ktoqp(nb,j,q,p_here) 4729 if (p_here.eq.taskid) then 4730 4731 index = (q-1)*(nx(nb)+2)*ny(nb) 4732 > + (k-1)*(nx(nb)+2) + 1 4733 call dcopy(nx(nb),A(index),1,tmp,1) 4734 4735 4736 msglen = (nx(nb)) 4737 dest = MASTER 4738 4739 call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1) 4740 4741 end if 4742 4743 end do 4744 end do 4745 end if 4746 4747* **** wait **** 4748 return 4749 end 4750 4751 4752 4753