1* 2* $Id$ 3* 4 5* Parallel.f 6* Author - Eric Bylaska 7* 8* These routines are to be used to keep track of the parallel message 9* passing variables, as well as iniitialize and deinitialize the 10* message passing routines. 11* 12 13 14* ************************************* 15* * * 16* * Parallel_Init * 17* * * 18* ************************************* 19 20 subroutine Parallel_Init() 21 implicit none 22 23#include "Parallel.fh" 24#include "bafdecls.fh" 25#include "errquit.fh" 26 27#include "tcgmsg.fh" 28#include "global.fh" 29 30c include 'mpif.h' 31c integer mpierr 32 33* **** local variables **** 34 integer i 35 36 np = nnodes() 37 taskid = nodeid() 38 39 40* **** set up 3d processor grid = np x 1 x 1**** 41 if (.not.BA_alloc_get(mt_int,np,'procNd',procNd(2),procNd(1))) 42 > call errquit('Parallel_init:out of heap memory',0, MA_ERR) 43 44 np_i = np 45 np_j = 1 46 np_k = 1 47 do i=0,np-1 48 int_mb(procNd(1)+i) = i 49 end do 50 taskid_i = taskid 51 taskid_j = 0 52 taskid_k = 0 53 comm_i = ga_pgroup_get_world() 54 comm_j = -99 55 comm_k = -99 56 57 return 58 end 59 60 61* ************************************* 62* * * 63* * Parallel2d_Init * 64* * * 65* ************************************* 66 67* Sset up the 2d processor grid = np_i x np_j, 68* where np_i = ncolumns, and np_j = np/np_i 69* 70 subroutine Parallel2d_Init(ncolumns) 71 implicit none 72 integer ncolumns 73 74#include "Parallel.fh" 75#include "bafdecls.fh" 76#include "errquit.fh" 77#include "global.fh" 78 79* *** local variables *** 80 integer i,j,icount 81 integer tmp(2) 82 83 np_i = np/ncolumns 84 np_j = ncolumns 85 86 if (np_j.gt.1) then 87 88 icount = 0 89 do j=0,np_j-1 90 do i=0,np_i-1 91 if (icount.eq.taskid) then 92 taskid_i = i 93 taskid_j = j 94 end if 95 int_mb(procNd(1) + i + j*np_i) = icount 96 icount = mod((icount+1),np) 97 end do 98 end do 99 100 if (.not.BA_push_get(mt_int,np,'tmppp2',tmp(2),tmp(1))) 101 > call errquit('Parallel2d_init:out of stack memory',0, MA_ERR) 102 do i=0,np_i-1 103 int_mb(tmp(1)+i) = int_mb(procNd(1) + i + taskid_j*np_i) 104 end do 105 comm_i = ga_pgroup_create(int_mb(tmp(1)),np_i) 106 do j=0,np_j-1 107 int_mb(tmp(1)+j) = int_mb(procNd(1) + taskid_i + j*np_i) 108 end do 109 comm_j = ga_pgroup_create(int_mb(tmp(1)),np_j) 110 if (.not.BA_pop_stack(tmp(2))) 111 > call errquit('Parallel2d_init:popping stack memory',0, MA_ERR) 112 113 end if 114 115 return 116 end 117 118 119* ************************************* 120* * * 121* * Parallel2d_Finalize * 122* * * 123* ************************************* 124 125 subroutine Parallel2d_Finalize() 126 implicit none 127 128#include "Parallel.fh" 129#include "bafdecls.fh" 130#include "errquit.fh" 131#include "global.fh" 132 133 if (np_j.gt.1) then 134 135* **** free comm_i and comm_j communicators **** 136 if (.not.ga_pgroup_destroy(comm_i)) 137 > call errquit('Parallel2d_Finalize: error destoying comm_i',0,0) 138 if (.not.ga_pgroup_destroy(comm_j)) 139 > call errquit('Parallel2d_Finalize: error destoying comm_j',0,1) 140 141 end if 142 return 143 end 144 145 146* ************************************* 147* * * 148* * Parallel3d_Init * 149* * * 150* ************************************* 151 152* Sset up the 3d processor grid = np_i x np_j x np_k, 153* where np_i = np/(np_j*np_k), and np_j = ncolumns and np_k=nzones 154* 155 subroutine Parallel3d_Init(ncolumns,nzones) 156 implicit none 157 integer ncolumns,nzones 158 159#include "Parallel.fh" 160#include "bafdecls.fh" 161#include "errquit.fh" 162#include "global.fh" 163 164* *** local variables *** 165 integer i,j,k,icount 166 integer tmp(2) 167 168 np_i = np/(ncolumns*nzones) 169 np_j = ncolumns 170 np_k = nzones 171 172 if ((np_j.gt.1).or.(np_k.gt.1)) then 173 174 icount = 0 175 do k=0,np_k-1 176 do j=0,np_j-1 177 do i=0,np_i-1 178 if (icount.eq.taskid) then 179 taskid_i = i 180 taskid_j = j 181 taskid_k = k 182 end if 183 int_mb(procNd(1) + i + j*np_i + k*np_i*np_j) = icount 184 icount = mod((icount+1),np) 185 end do 186 end do 187 end do 188 189 if (.not.BA_push_get(mt_int,np,'tmppp2',tmp(2),tmp(1))) 190 > call errquit('Parallel3d_init:out of stack memory',0, MA_ERR) 191 do i=0,np_i-1 192 int_mb(tmp(1)+i) = int_mb(procNd(1) 193 > + i 194 > + taskid_j*np_i 195 > + taskid_k*np_i*np_j) 196 end do 197 comm_i = ga_pgroup_create(int_mb(tmp(1)),np_i) 198 do j=0,np_j-1 199 int_mb(tmp(1)+j) = int_mb(procNd(1) 200 > + taskid_i 201 > + j*np_i 202 > + taskid_k*np_i*np_j) 203 end do 204 comm_j = ga_pgroup_create(int_mb(tmp(1)),np_j) 205 do k=0,np_k-1 206 int_mb(tmp(1)+k) = int_mb(procNd(1) 207 > + taskid_i 208 > + taskid_j*np_i 209 > + k*np_i*np_j) 210 end do 211 comm_k = ga_pgroup_create(int_mb(tmp(1)),np_k) 212 if (.not.BA_pop_stack(tmp(2))) 213 > call errquit('Parallel3d_init:popping stack memory',0, MA_ERR) 214 215 end if 216 217 return 218 end 219 220 221* ************************************* 222* * * 223* * Parallel3d_Finalize * 224* * * 225* ************************************* 226 227 subroutine Parallel3d_Finalize() 228 implicit none 229 230#include "Parallel.fh" 231#include "bafdecls.fh" 232#include "errquit.fh" 233#include "global.fh" 234 235 if ((np_j.gt.1).or.(np_k.gt.1)) then 236 237* **** free comm_i and comm_j communicators **** 238 if (.not.ga_pgroup_destroy(comm_i)) 239 > call errquit('Parallel3d_Finalize: error destoying comm_i',0,0) 240 if (.not.ga_pgroup_destroy(comm_j)) 241 > call errquit('Parallel3d_Finalize: error destoying comm_j',0,1) 242 if (.not.ga_pgroup_destroy(comm_k)) 243 > call errquit('Parallel3d_Finalize: error destoying comm_k',0,2) 244 245 end if 246 return 247 end 248 249 250 251* *********************************** 252* * * 253* * Parallel_MaxAll * 254* * * 255* *********************************** 256 257 subroutine Parallel_MaxAll(sum) 258c implicit none 259 real*8 sum 260 261#include "tcgmsg.fh" 262#include "msgtypesf.h" 263#include "Parallel.fh" 264 265 if (np.gt.1) then 266 call GA_DGOP(9+MSGDBL,sum,1,'max') 267 end if 268 269 return 270 end 271 272 273 274 275 276* *********************************** 277* * * 278* * Parallel_IMaxAll * 279* * * 280* *********************************** 281 subroutine Parallel_IMaxAll(isum) 282c implicit none 283 integer isum 284 285#include "tcgmsg.fh" 286#include "msgtypesf.h" 287#include "Parallel.fh" 288 289 if (np.gt.1) then 290 call GA_IGOP(9+MSGINT,isum,1,'max') 291 end if 292 return 293 end 294 295 296 297 298* *********************************** 299* * * 300* * Parallel_SumAll * 301* * * 302* *********************************** 303 304 subroutine Parallel_SumAll(sum) 305c implicit none 306 real*8 sum 307 308#include "tcgmsg.fh" 309#include "msgtypesf.h" 310#include "Parallel.fh" 311 312 if (np.gt.1) then 313 call GA_DGOP(9+MSGDBL,sum,1,'+') 314 end if 315 316 return 317 end 318 319 320 321* *********************************** 322* * * 323* * Parallel_ISumAll * 324* * * 325* *********************************** 326 327 subroutine Parallel_ISumAll(sum) 328c implicit none 329 integer sum 330 331#include "tcgmsg.fh" 332#include "msgtypesf.h" 333#include "Parallel.fh" 334 335 if (np.gt.1) then 336 call GA_IGOP(9+MSGINT,sum,1,'+') 337 end if 338 339 return 340 end 341 342 343* *********************************** 344* * * 345* * Parallel_Vector_SumAll * 346* * * 347* *********************************** 348 subroutine Parallel_Vector_SumAll(n,sum) 349c implicit none 350 integer n 351 real*8 sum(*) 352 353#include "bafdecls.fh" 354#include "tcgmsg.fh" 355#include "msgtypesf.h" 356#include "errquit.fh" 357#include "Parallel.fh" 358 359 360 call nwpw_timing_start(2) 361 if (np.gt.1) then 362 call GA_DGOP(9+MSGDBL,sum,n,'+') 363 end if 364 call nwpw_timing_end(2) 365 return 366 end 367 368 369 370* *********************************** 371* * * 372* * Parallel_Vector_ISumAll * 373* * * 374* *********************************** 375 376 subroutine Parallel_Vector_ISumAll(n,sum) 377c implicit none 378 integer n 379 integer sum(*) 380 381#include "bafdecls.fh" 382#include "errquit.fh" 383#include "tcgmsg.fh" 384#include "msgtypesf.h" 385#include "Parallel.fh" 386 387 388 call nwpw_timing_start(2) 389 if (np.gt.1) then 390 call GA_IGOP(9+MSGINT,sum,n,'+') 391 end if 392 call nwpw_timing_end(2) 393 394 return 395 end 396 397 398 399* *********************************** 400* * * 401* * Parallel_Brdcst_value * 402* * * 403* *********************************** 404 405 subroutine Parallel_Brdcst_value(psend,sum) 406 implicit none 407 integer psend 408 real*8 sum 409 410#include "bafdecls.fh" 411#include "errquit.fh" 412#include "tcgmsg.fh" 413#include "msgtypesf.h" 414#include "Parallel.fh" 415 416* **** local variables **** 417 integer msglen 418 419 if (np.gt.1) then 420 msglen = 1 421 call BRDCST(9+MSGDBL,sum,mdtob(msglen),psend) 422 end if 423 424 return 425 end 426 427 428* *********************************** 429* * * 430* * Parallel_Brdcst_values * 431* * * 432* *********************************** 433 434 subroutine Parallel_Brdcst_values(psend,nsize,sum) 435 implicit none 436 integer psend,nsize 437 real*8 sum(*) 438 439#include "bafdecls.fh" 440#include "errquit.fh" 441#include "tcgmsg.fh" 442#include "msgtypesf.h" 443#include "Parallel.fh" 444 445 446 if (np.gt.1) then 447 call BRDCST(9+MSGDBL,sum,mdtob(nsize),psend) 448 end if 449 450 return 451 end 452 453* *********************************** 454* * * 455* * Parallel_Brdcst_ivalue * 456* * * 457* *********************************** 458 459 subroutine Parallel_Brdcst_ivalue(psend,isum) 460 implicit none 461 integer psend 462 integer isum 463 464#include "bafdecls.fh" 465#include "errquit.fh" 466#include "tcgmsg.fh" 467#include "msgtypesf.h" 468#include "Parallel.fh" 469 470* **** local variables **** 471 integer msglen 472 473 if (np.gt.1) then 474 msglen = 1 475 call BRDCST(9+MSGINT,isum,mitob(msglen),psend) 476 end if 477 478 return 479 end 480 481 482 483* *********************************** 484* * * 485* * Parallel_Brdcst_ivalues * 486* * * 487* *********************************** 488 489 subroutine Parallel_Brdcst_ivalues(psend,nsize,isum) 490 implicit none 491 integer psend,nsize 492 integer isum(*) 493 494#include "bafdecls.fh" 495#include "errquit.fh" 496#include "tcgmsg.fh" 497#include "msgtypesf.h" 498#include "Parallel.fh" 499 500 if (np.gt.1) then 501 call BRDCST(9+MSGINT,isum,mitob(nsize),psend) 502 end if 503 return 504 end 505 506 507 508 509* *********************************** 510* * * 511* * Parallela_MaxAll * 512* * * 513* *********************************** 514 515 subroutine Parallela_MaxAll(ic,sum) 516c implicit none 517 integer ic 518 real*8 sum 519 520#include "tcgmsg.fh" 521#include "msgtypesf.h" 522#include "Parallel.fh" 523 524 if (np.gt.1) then 525 if (ic.eq.1) then 526 call GA_PGROUP_DGOP(comm_i,9+MSGDBL,sum,1,'max') 527 else if (ic.eq.2) then 528 call GA_PGROUP_DGOP(comm_j,9+MSGDBL,sum,1,'max') 529 else if (ic.eq.3) then 530 call GA_PGROUP_DGOP(comm_k,9+MSGDBL,sum,1,'max') 531 else 532 call GA_DGOP(9+MSGDBL,sum,1,'max') 533 end if 534 end if 535 536 return 537 end 538 539 540 541 542* *********************************** 543* * * 544* * Parallela_SumAll * 545* * * 546* *********************************** 547 548 subroutine Parallela_SumAll(ic,sum) 549c implicit none 550 integer ic 551 real*8 sum 552 553#include "tcgmsg.fh" 554#include "msgtypesf.h" 555#include "Parallel.fh" 556 557 if (np.gt.1) then 558 if (ic.eq.1) then 559 call GA_PGROUP_DGOP(comm_i,9+MSGDBL,sum,1,'+') 560 else if (ic.eq.1) then 561 call GA_PGROUP_DGOP(comm_j,9+MSGDBL,sum,1,'+') 562 else if (ic.eq.3) then 563 call GA_PGROUP_DGOP(comm_k,9+MSGDBL,sum,1,'+') 564 else 565 call GA_DGOP(9+MSGDBL,sum,1,'+') 566 end if 567 end if 568 569 return 570 end 571 572 573 574* *********************************** 575* * * 576* * Parallela_ISumAll * 577* * * 578* *********************************** 579 580 subroutine Parallela_ISumAll(ic,sum) 581c implicit none 582 integer ic 583 integer sum 584 585#include "tcgmsg.fh" 586#include "msgtypesf.h" 587#include "Parallel.fh" 588 589 if (np.gt.1) then 590 if (ic.eq.1) then 591 call GA_PGROUP_IGOP(comm_i,9+MSGINT,sum,1,'+') 592 else if (ic.eq.2) then 593 call GA_PGROUP_IGOP(comm_j,9+MSGINT,sum,1,'+') 594 else if (ic.eq.3) then 595 call GA_PGROUP_IGOP(comm_k,9+MSGINT,sum,1,'+') 596 else 597 call GA_IGOP(9+MSGINT,sum,1,'+') 598 end if 599 end if 600 601 return 602 end 603 604 605* *********************************** 606* * * 607* * Parallela_Vector_SumAll * 608* * * 609* *********************************** 610 subroutine Parallela_Vector_SumAll(ic,n,sum) 611c implicit none 612 integer ic 613 integer n 614 real*8 sum(*) 615 616#include "bafdecls.fh" 617#include "tcgmsg.fh" 618#include "msgtypesf.h" 619#include "errquit.fh" 620#include "Parallel.fh" 621 622 623 call nwpw_timing_start(2) 624 if (np.gt.1) then 625 if (ic.eq.1) then 626 call GA_PGROUP_DGOP(comm_i,9+MSGDBL,sum,n,'+') 627 else if (ic.eq.2) then 628 call GA_PGROUP_DGOP(comm_j,9+MSGDBL,sum,n,'+') 629 else if (ic.eq.3) then 630 call GA_PGROUP_DGOP(comm_k,9+MSGDBL,sum,n,'+') 631 else 632 call GA_DGOP(9+MSGDBL,sum,n,'+') 633 end if 634 end if 635 call nwpw_timing_end(2) 636 return 637 end 638 639 640 641* *********************************** 642* * * 643* * Parallela_Vector_ISumAll * 644* * * 645* *********************************** 646 647 subroutine Parallela_Vector_ISumAll(ic,n,sum) 648c implicit none 649 integer ic 650 integer n 651 integer sum(*) 652 653#include "bafdecls.fh" 654#include "errquit.fh" 655#include "tcgmsg.fh" 656#include "msgtypesf.h" 657#include "Parallel.fh" 658 659 660 call nwpw_timing_start(2) 661 if (np.gt.1) then 662 if (ic.eq.1) then 663 call GA_PGROUP_IGOP(comm_i,9+MSGINT,sum,n,'+') 664 else if (ic.eq.2) then 665 call GA_PGROUP_IGOP(comm_j,9+MSGINT,sum,n,'+') 666 else if (ic.eq.3) then 667 call GA_PGROUP_IGOP(comm_k,9+MSGINT,sum,n,'+') 668 else 669 call GA_IGOP(9+MSGINT,sum,n,'+') 670 end if 671 end if 672 call nwpw_timing_end(2) 673 674 return 675 end 676 677 678 679* *********************************** 680* * * 681* * Parallela_Brdcst_value * 682* * * 683* *********************************** 684 685 subroutine Parallela_Brdcst_value(ic,psend,sum) 686 implicit none 687 integer ic 688 integer psend 689 real*8 sum 690 691#include "bafdecls.fh" 692#include "errquit.fh" 693#include "tcgmsg.fh" 694#include "msgtypesf.h" 695#include "Parallel.fh" 696 697* **** local variables **** 698 integer msglen 699 700 if (np.gt.1) then 701 msglen = 1 702 if (ic.eq.1) then 703 call GA_PGROUP_BRDCST(comm_i,9+MSGDBL,sum,mdtob(msglen),psend) 704 else if (ic.eq.2) then 705 call GA_PGROUP_BRDCST(comm_j,9+MSGDBL,sum,mdtob(msglen),psend) 706 else if (ic.eq.3) then 707 call GA_PGROUP_BRDCST(comm_k,9+MSGDBL,sum,mdtob(msglen),psend) 708 else 709 call GA_BRDCST(9+MSGDBL,sum,mdtob(msglen),psend) 710 end if 711 end if 712 713 return 714 end 715 716 717* *********************************** 718* * * 719* * Parallela_Brdcst_values * 720* * * 721* *********************************** 722 723 subroutine Parallela_Brdcst_values(ic,psend,nsize,sum) 724 implicit none 725 integer ic 726 integer psend,nsize 727 real*8 sum(*) 728 729#include "bafdecls.fh" 730#include "errquit.fh" 731#include "tcgmsg.fh" 732#include "msgtypesf.h" 733#include "Parallel.fh" 734 735 736 if (np.gt.1) then 737 if (ic.eq.1) then 738 call GA_PGROUP_BRDCST(comm_i,9+MSGDBL,sum,mdtob(nsize),psend) 739 else if (ic.eq.2) then 740 call GA_PGROUP_BRDCST(comm_j,9+MSGDBL,sum,mdtob(nsize),psend) 741 else if (ic.eq.3) then 742 call GA_PGROUP_BRDCST(comm_k,9+MSGDBL,sum,mdtob(nsize),psend) 743 else 744 call GA_BRDCST(9+MSGDBL,sum,mdtob(nsize),psend) 745 end if 746 end if 747 748 return 749 end 750 751 752 753* *********************************** 754* * * 755* * Parallela_Brdcst_ivalue * 756* * * 757* *********************************** 758 subroutine Parallela_Brdcst_ivalue(ic,psend,isum) 759 implicit none 760 integer ic 761 integer psend 762 integer isum 763 764#include "bafdecls.fh" 765#include "errquit.fh" 766#include "tcgmsg.fh" 767#include "msgtypesf.h" 768#include "Parallel.fh" 769 770* **** local variables **** 771 integer msglen 772 773 if (np.gt.1) then 774 msglen = 1 775 if (ic.eq.1) then 776 call GA_PGROUP_BRDCST(comm_i,9+MSGINT,isum,mitob(msglen),psend) 777 else if (ic.eq.2) then 778 call GA_PGROUP_BRDCST(comm_j,9+MSGINT,isum,mitob(msglen),psend) 779 else if (ic.eq.3) then 780 call GA_PGROUP_BRDCST(comm_k,9+MSGINT,isum,mitob(msglen),psend) 781 else 782 call GA_BRDCST(9+MSGINT,isum,mitob(msglen),psend) 783 end if 784 end if 785 786 return 787 end 788 789 790 791 792* *********************************** 793* * * 794* * Parallela_Brdcst_ivalues * 795* * * 796* *********************************** 797 798 subroutine Parallela_Brdcst_ivalues(ic,psend,nsize,isum) 799 implicit none 800 integer ic 801 integer psend,nsize 802 integer isum(*) 803 804#include "bafdecls.fh" 805#include "errquit.fh" 806#include "tcgmsg.fh" 807#include "msgtypesf.h" 808#include "Parallel.fh" 809 810 if (np.gt.1) then 811 if (ic.eq.1) then 812 call GA_PGROUP_BRDCST(comm_i,9+MSGINT,isum,mitob(nsize),psend) 813 else if (ic.eq.2) then 814 call GA_PGROUP_BRDCST(comm_j,9+MSGINT,isum,mitob(nsize),psend) 815 else if (ic.eq.3) then 816 call GA_PGROUP_BRDCST(comm_k,9+MSGINT,isum,mitob(nsize),psend) 817 else 818 call GA_BRDCST(9+MSGINT,isum,mitob(nsize),psend) 819 end if 820 end if 821 return 822 end 823 824 825 826* *********************************** 827* * * 828* * Parallela_start_rotate * 829* * * 830* *********************************** 831 832 subroutine Parallela_start_rotate(ic,shift, 833 > A1,nsize1, 834 > A2,nsize2,request) 835 implicit none 836 integer ic,shift 837 real*8 A1(*) 838 integer nsize1 839 real*8 A2(*) 840 integer nsize2 841 integer request(*) 842 843#include "bafdecls.fh" 844#include "tcgmsg.fh" 845#include "msgtypesf.h" 846#include "errquit.fh" 847 848#include "Parallel.fh" 849 850 851* **** local variables **** 852 integer i,mynp,mytaskid,msglen,msgype,mpierr,proc_to,proc_from 853 integer rcv_proc,rcv_len,psr,tmp(2),pto 854 855* ***** external functions **** 856 integer Parallel3d_convert_taskid_i 857 integer Parallel3d_convert_taskid_j 858 integer Parallel3d_convert_taskid_k 859 external Parallel3d_convert_taskid_i 860 external Parallel3d_convert_taskid_j 861 external Parallel3d_convert_taskid_k 862 863 if (ic.eq.1) then 864 mynp = np_i 865 mytaskid = taskid_i 866 proc_to = mod(mytaskid+shift+mynp,mynp) 867 proc_from = mod(mytaskid-shift+mynp,mynp) 868 869 proc_to = Parallel3d_convert_taskid_i(proc_to) 870 proc_from = Parallel3d_convert_taskid_i(proc_from) 871 else if (ic.eq.2) then 872 mynp = np_j 873 mytaskid = taskid_j 874 proc_to = mod(mytaskid+shift+mynp,mynp) 875 proc_from = mod(mytaskid-shift+mynp,mynp) 876 877 proc_to = Parallel3d_convert_taskid_j(proc_to) 878 proc_from = Parallel3d_convert_taskid_j(proc_from) 879 else if (ic.eq.3) then 880 mynp = np_k 881 mytaskid = taskid_k 882 proc_to = mod(mytaskid+shift+mynp,mynp) 883 proc_from = mod(mytaskid-shift+mynp,mynp) 884 885 proc_to = Parallel3d_convert_taskid_k(proc_to) 886 proc_from = Parallel3d_convert_taskid_k(proc_from) 887 else 888 mynp = np 889 mytaskid = taskid 890 proc_to = mod(mytaskid+shift+mynp,mynp) 891 proc_from = mod(mytaskid-shift+mynp,mynp) 892 end if 893 894* /* determine psr - should be made w/o using tmp array! */ 895 if (.not.BA_push_get(mt_int,mynp,'tmppp2',tmp(2),tmp(1))) 896 > call errquit('Parallela_start_rotate:out of stack',0,MA_ERR) 897 do i=0,np-1 898 int_mb(tmp(1)+i) = 0 899 end do 900 do i=0,np-1 901 pto = mod(i+shift+mynp,mynp) 902 if ((int_mb(tmp(1)+i).eq.0).and.(int_mb(tmp(1)+pto).eq.0)) then 903 int_mb(tmp(1)+i) = 1 904 int_mb(tmp(1)+pto) = 2 905 end if 906 end do 907 psr = int_mb(tmp(1)+mytaskid) 908 if (psr.eq.0) psr = 2 909 if (.not.BA_pop_stack(tmp(2))) 910 > call errquit('Parallela_start_rotate:popping stack',0,MA_ERR) 911 912* **** send then receive **** 913 if (psr.eq.1) then 914 915 if (nsize1.gt.0) then 916 msglen = nsize1 917 call SND(9+MSGDBL,A1,mdtob(msglen),proc_to,1) 918 request(4) = 1 919 else 920 request(4) = 0 921 end if 922 923 if (nsize2.gt.0) then 924 msglen = nsize2 925 call RCV(9+MSGDBL,A2,mdtob(msglen),rcv_len, 926 > proc_from,rcv_proc,1) 927 request(3) = 1 928 else 929 request(3) = 0 930 end if 931 932* **** receive then receive **** 933 else 934 if (nsize2.gt.0) then 935 msglen = nsize2 936 call RCV(9+MSGDBL,A2,mdtob(msglen),rcv_len, 937 > proc_from,rcv_proc,1) 938 request(3) = 1 939 else 940 request(3) = 0 941 end if 942 943 if (nsize1.gt.0) then 944 msglen = nsize1 945 call SND(9+MSGDBL,A1,mdtob(msglen),proc_to,1) 946 request(4) = 1 947 else 948 request(4) = 0 949 end if 950 end if 951 952 if ((request(3).eq.1).and.(request(4).eq.1)) then 953 request(3) = 1 954 else if (request(3).eq.1) then 955 request(3) = 2 956 else if (request(4).eq.1) then 957 request(3) = 3 958 else 959 request(3) = 4 960 end if 961 962 return 963 end 964 965 966* *********************************** 967* * * 968* * Parallela_start_Irotate * 969* * * 970* *********************************** 971 subroutine Parallela_start_Irotate(ic,shift, 972 > A1,nsize1, 973 > A2,nsize2,request) 974 implicit none 975 integer ic,shift 976 integer A1(*) 977 integer nsize1 978 integer A2(*) 979 integer nsize2 980 integer request(*) 981 982#include "bafdecls.fh" 983#include "tcgmsg.fh" 984#include "msgtypesf.h" 985#include "errquit.fh" 986 987#include "Parallel.fh" 988 989 990* **** local variables **** 991 integer i,mynp,mytaskid,msglen,msgype,mpierr,proc_to,proc_from 992 integer rcv_proc,rcv_len,psr,tmp(2),pto 993 994* ***** external functions **** 995 integer Parallel3d_convert_taskid_i 996 integer Parallel3d_convert_taskid_j 997 integer Parallel3d_convert_taskid_k 998 external Parallel3d_convert_taskid_i 999 external Parallel3d_convert_taskid_j 1000 external Parallel3d_convert_taskid_k 1001 1002 if (ic.eq.1) then 1003 mynp = np_i 1004 mytaskid = taskid_i 1005 proc_to = mod(mytaskid+shift+mynp,mynp) 1006 proc_from = mod(mytaskid-shift+mynp,mynp) 1007 1008 proc_to = Parallel3d_convert_taskid_i(proc_to) 1009 proc_from = Parallel3d_convert_taskid_i(proc_from) 1010 else if (ic.eq.2) then 1011 mynp = np_j 1012 mytaskid = taskid_j 1013 proc_to = mod(mytaskid+shift+mynp,mynp) 1014 proc_from = mod(mytaskid-shift+mynp,mynp) 1015 1016 proc_to = Parallel3d_convert_taskid_j(proc_to) 1017 proc_from = Parallel3d_convert_taskid_j(proc_from) 1018 else if (ic.eq.3) then 1019 mynp = np_k 1020 mytaskid = taskid_k 1021 proc_to = mod(mytaskid+shift+mynp,mynp) 1022 proc_from = mod(mytaskid-shift+mynp,mynp) 1023 1024 proc_to = Parallel3d_convert_taskid_k(proc_to) 1025 proc_from = Parallel3d_convert_taskid_k(proc_from) 1026 else 1027 mynp = np 1028 mytaskid = taskid 1029 proc_to = mod(mytaskid+shift+mynp,mynp) 1030 proc_from = mod(mytaskid-shift+mynp,mynp) 1031 end if 1032 1033* /* determine psr - should be made w/o using tmp array! */ 1034 if (.not.BA_push_get(mt_int,mynp,'tmppp2',tmp(2),tmp(1))) 1035 > call errquit('Parallela_start_rotate:out of stack',0,MA_ERR) 1036 do i=0,np-1 1037 int_mb(tmp(1)+i) = 0 1038 end do 1039 do i=0,np-1 1040 pto = mod(i+shift+mynp,mynp) 1041 if ((int_mb(tmp(1)+i).eq.0).and.(int_mb(tmp(1)+pto).eq.0)) then 1042 int_mb(tmp(1)+i) = 1 1043 int_mb(tmp(1)+pto) = 2 1044 end if 1045 end do 1046 psr = int_mb(tmp(1)+mytaskid) 1047 if (psr.eq.0) psr = 2 1048 if (.not.BA_pop_stack(tmp(2))) 1049 > call errquit('Parallela_start_rotate:popping stack',0,MA_ERR) 1050 1051* **** send then receive **** 1052 if (psr.eq.1) then 1053 1054 if (nsize1.gt.0) then 1055 msglen = nsize1 1056 call SND(9+MSGINT,A1,mitob(msglen),proc_to,1) 1057 request(4) = 1 1058 else 1059 request(4) = 0 1060 end if 1061 1062 if (nsize2.gt.0) then 1063 msglen = nsize2 1064 call RCV(9+MSGINT,A2,mitob(msglen),rcv_len, 1065 > proc_from,rcv_proc,1) 1066 request(3) = 1 1067 else 1068 request(3) = 0 1069 end if 1070 1071* **** receive then receive **** 1072 else 1073 if (nsize2.gt.0) then 1074 msglen = nsize2 1075 call RCV(9+MSGINT,A2,mitob(msglen),rcv_len, 1076 > proc_from,rcv_proc,1) 1077 request(3) = 1 1078 else 1079 request(3) = 0 1080 end if 1081 1082 if (nsize1.gt.0) then 1083 msglen = nsize1 1084 call SND(9+MSGINT,A1,mitob(msglen),proc_to,1) 1085 request(4) = 1 1086 else 1087 request(4) = 0 1088 end if 1089 end if 1090 1091 if ((request(3).eq.1).and.(request(4).eq.1)) then 1092 request(3) = 1 1093 else if (request(3).eq.1) then 1094 request(3) = 2 1095 else if (request(4).eq.1) then 1096 request(3) = 3 1097 else 1098 request(3) = 4 1099 end if 1100 1101 return 1102 end 1103 1104 1105 1106 1107 1108 1109* *********************************** 1110* * * 1111* * Parallela_end_rotate * 1112* * * 1113* *********************************** 1114 1115 subroutine Parallela_end_rotate(request) 1116 implicit none 1117 integer request(*) 1118 1119* **** wait for completion of mp_send, also do a sync **** 1120 !*** do nothing *** 1121 1122 return 1123 end 1124 1125 1126 1127* *********************************** 1128* * * 1129* * Parallel_send_characters * 1130* * * 1131* *********************************** 1132 subroutine Parallel_send_characters(pto,msgtype,nsize,cval) 1133 implicit none 1134 integer pto,msgtype,nsize 1135 character cval(*) 1136 1137#include "tcgmsg.fh" 1138#include "msgtypesf.h" 1139 1140 integer rcv_len,rcv_proc 1141 1142 call SND(9+MSGCHR,cval,nsize,pto,1) 1143 return 1144 end 1145 1146* *********************************** 1147* * * 1148* * Parallel_send_values * 1149* * * 1150* *********************************** 1151 subroutine Parallel_send_values(pto,msgtype,nsize,rval) 1152 implicit none 1153 integer pto,msgtype,nsize 1154 character rval(*) 1155 1156#include "tcgmsg.fh" 1157#include "msgtypesf.h" 1158 1159 call SND(9+MSGDBL,rval,mdtob(nsize),pto,1) 1160 return 1161 end 1162 1163* *********************************** 1164* * * 1165* * Parallel_send_ivalues * 1166* * * 1167* *********************************** 1168 subroutine Parallel_send_ivalues(pto,msgtype,nsize,ival) 1169 implicit none 1170 integer pto,msgtype,nsize 1171 integer ival(*) 1172 1173#include "tcgmsg.fh" 1174#include "msgtypesf.h" 1175 1176 call SND(9+MSGINT,ival,mitob(nsize),pto,1) 1177 return 1178 end 1179 1180 1181 1182 1183 1184* *********************************** 1185* * * 1186* * Parallel_recv_characters * 1187* * * 1188* *********************************** 1189 subroutine Parallel_recv_characters(pfrom,msgtype,nsize,cval) 1190 implicit none 1191 integer pfrom,msgtype,nsize 1192 character cval(*) 1193 1194#include "tcgmsg.fh" 1195#include "msgtypesf.h" 1196 1197 integer rcv_len,rcv_proc 1198 1199 call RCV(9+MSGCHR,cval,nsize,rcv_len,pfrom,rcv_proc,1) 1200 return 1201 end 1202 1203 1204* *********************************** 1205* * * 1206* * Parallel_recv_values * 1207* * * 1208* *********************************** 1209 subroutine Parallel_recv_values(pfrom,msgtype,nsize,rval) 1210 implicit none 1211 integer pfrom,msgtype,nsize 1212 real*8 rval(*) 1213 1214#include "tcgmsg.fh" 1215#include "msgtypesf.h" 1216 1217 integer rcv_len,rcv_proc 1218 1219 call RCV(9+MSGDBL,rval,mdtob(nsize),rcv_len,pfrom,rcv_proc,1) 1220 return 1221 end 1222 1223 1224* *********************************** 1225* * * 1226* * Parallel_recv_ivalues * 1227* * * 1228* *********************************** 1229 subroutine Parallel_recv_ivalues(pfrom,msgtype,nsize,ival) 1230 implicit none 1231 integer pfrom,msgtype,nsize 1232 integer ival(*) 1233 1234#include "tcgmsg.fh" 1235#include "msgtypesf.h" 1236 1237 integer rcv_len,rcv_proc 1238 1239 call RCV(9+MSGINT,ival,mitob(nsize),rcv_len,pfrom,rcv_proc,1) 1240 return 1241 end 1242 1243