1* 2* $Id$ 3* 4 5* *********************************** 6* * * 7* * Balance_Init * 8* * * 9* *********************************** 10 11 subroutine Balance_Init(maxsize0,nidb,nidb_out) 12 implicit none 13 integer maxsize0 14 integer nidb(0:maxsize0-1) 15 integer nidb_out(0:maxsize0-1) 16 17#include "bafdecls.fh" 18#include "balance_common.fh" 19#include "errquit.fh" 20 21* **** local variables **** 22 logical value 23 integer nb,np,taskid 24 integer nwave,nwave_out 25 integer dum(2) 26 27 28 maxsize = maxsize0 29 call Parallel2d_np_i(np) 30 call Parallel2d_taskid_i(taskid) 31 32* **** allocate balance memory **** 33 value = BA_alloc_get(mt_int,2*maxsize, 34 > 'psizea_list', 35 > packet_size_list(2), 36 > packet_size_list(1)) 37 value = value.and. 38 > BA_alloc_get(mt_int,2*maxsize, 39 > 'indxsa_list', 40 > indx_start_list(2), 41 > indx_start_list(1)) 42 value = value.and. 43 > BA_alloc_get(mt_int,2*maxsize, 44 > 'prctoa_list', 45 > proc_to_list(2), 46 > proc_to_list(1)) 47 value = value.and. 48 > BA_alloc_get(mt_int,2*maxsize, 49 > 'prcfra_list', 50 > proc_from_list(2), 51 > proc_from_list(1)) 52 53 value = value.and. 54 > BA_alloc_get(mt_int,maxsize, 55 > 'npacket_list', 56 > npacket_list(2), 57 > npacket_list(1)) 58 value = value.and. 59 > BA_alloc_get(mt_log,maxsize, 60 > 'receiver_list', 61 > receiver_list(2), 62 > receiver_list(1)) 63 value = value.and. 64 > BA_alloc_get(mt_log,maxsize, 65 > 'sender_list', 66 > sender_list(2), 67 > sender_list(1)) 68 69 do nb=0,maxsize-1 70 71* **** allocate balance memory **** 72 value = value.and. 73 > BA_alloc_get(mt_int,np, 74 > 'psizea',dum(2),dum(1)) 75 int_mb(packet_size_list(1)+2*nb ) = dum(1) 76 int_mb(packet_size_list(1)+2*nb+1) = dum(2) 77 78 value = value.and. 79 > BA_alloc_get(mt_int,np, 80 > 'indxsa',dum(2),dum(1)) 81 int_mb(indx_start_list(1)+2*nb ) = dum(1) 82 int_mb(indx_start_list(1)+2*nb+1) = dum(2) 83 84 value = value.and. 85 > BA_alloc_get(mt_int,np, 86 > 'prctoa',dum(2),dum(1)) 87 int_mb(proc_to_list(1)+2*nb ) = dum(1) 88 int_mb(proc_to_list(1)+2*nb+1) = dum(2) 89 90 value = value.and. 91 > BA_alloc_get(mt_int,np, 92 > 'prcfra',dum(2),dum(1)) 93 int_mb(proc_from_list(1)+2*nb ) = dum(1) 94 int_mb(proc_from_list(1)+2*nb+1) = dum(2) 95 96 end do 97 98 if (.not. value) 99 > call errquit('Balance_init: out of heap memory',0, MA_ERR) 100 101 102 103 do nb=0,maxsize-1 104 nwave = nidb(nb) 105 call Balance_Init_a(nwave,np,taskid,nwave_out, 106 > int_mb(npacket_list(1) +nb), 107 > log_mb(receiver_list(1)+nb), 108 > log_mb(sender_list(1) +nb), 109 > int_mb(int_mb(proc_to_list(1) +2*nb)), 110 > int_mb(int_mb(proc_from_list(1) +2*nb)), 111 > int_mb(int_mb(packet_size_list(1)+2*nb)), 112 > int_mb(int_mb(indx_start_list(1) +2*nb))) 113 114 nidb_out(nb) = nidb(nb) + (nwave_out-nwave) 115 end do 116 117 return 118 end 119 120 121* *********************************** 122* * * 123* * Balance_End * 124* * * 125* *********************************** 126 127 subroutine Balance_End() 128 implicit none 129 130#include "bafdecls.fh" 131#include "balance_common.fh" 132#include "errquit.fh" 133 134 135* **** local variables **** 136 logical value 137 integer nb,dum2 138 139 value = .true. 140 do nb=0,maxsize-1 141 142 dum2 = int_mb(packet_size_list(1)+2*nb+1) 143 value = value.and.BA_free_heap(dum2) 144 145 dum2 = int_mb(indx_start_list(1)+2*nb+1) 146 value = value.and.BA_free_heap(dum2) 147 148 dum2 = int_mb(proc_to_list(1)+2*nb+1) 149 value = value.and.BA_free_heap(dum2) 150 151 dum2 = int_mb(proc_from_list(1)+2*nb+1) 152 value = value.and.BA_free_heap(dum2) 153 154 end do 155 156 value = value.and.BA_free_heap(packet_size_list(2)) 157 value = value.and.BA_free_heap(indx_start_list(2)) 158 value = value.and.BA_free_heap(proc_to_list(2)) 159 value = value.and.BA_free_heap(proc_from_list(2)) 160 161 value = value.and.BA_free_heap(npacket_list(2)) 162 value = value.and.BA_free_heap(receiver_list(2)) 163 value = value.and.BA_free_heap(sender_list(2)) 164 if (.not. value) 165 > call errquit('Balance_end: error freeing heap memory',0, MA_ERR) 166 167 return 168 end 169 170* *********************************** 171* * * 172* * Balance_Init_a * 173* * * 174* *********************************** 175* This routine defines the balance data structure 176 177 subroutine Balance_Init_a(nwave,np,taskid, 178 > nwave_out, 179 > npacket,receiver,sender, 180 > proc_to,proc_from, 181 > packet_size,indx_start) 182 implicit none 183 integer nwave,np,taskid 184 integer nwave_out 185 186 integer npacket 187 logical receiver,sender 188 integer proc_to(*),proc_from(*) 189 integer packet_size(*) 190 integer indx_start(*) 191 192#include "bafdecls.fh" 193#include "errquit.fh" 194 195* ***** local variables **** 196 logical done,value 197 integer i,j 198 integer ave,short,long 199 integer above,below 200 201c integer nwave2(0:(np-1)) 202c integer indx(0:(np-1)) 203 integer nwave2(2),indx(2) 204 205* **** allocate nwave2 and indx off the stack **** 206 value = BA_push_get(mt_int,(np), 207 > 'nwave2',nwave2(2),nwave2(1)) 208 value = value.and. 209 > BA_push_get(mt_int,(np), 210 > 'indx',indx(2),indx(1)) 211 if (.not. value) 212 > call errquit('Balance_init_a:out of stack memory',0, MA_ERR) 213 214* **** define nwave2 **** 215 do i=0,np-1 216c nwave2(i) = 0 217 int_mb(nwave2(1)+i) = 0 218 end do 219c nwave2(taskid) = nwave 220 int_mb(nwave2(1)+taskid) = nwave 221c call D3dB_Vector_ISumAll(np,nwave2) 222 call D3dB_Vector_ISumAll(np,int_mb(nwave2(1))) 223 224* **** get the sorting index **** 225c call nwave2_sort(np,nwave2,indx) 226 call nwave2_sort(np,int_mb(nwave2(1)),int_mb(indx(1))) 227 228* ***** get the average **** 229 ave = 0 230 do i=0,np-1 231c ave = ave + nwave2(i) 232 ave = ave + int_mb(nwave2(1)+i) 233 end do 234 ave = ave/np 235 236* ***** get below *** 237 below = -1 238 do while (int_mb(nwave2(1) + int_mb(indx(1)+below+1)).lt.ave) 239 below = below + 1 240 end do 241 242* ***** get above *** 243 above = np 244 do while (int_mb(nwave2(1) + int_mb(indx(1)+above-1)).gt.ave) 245 above = above - 1 246 end do 247 248 249 npacket = 0 250 receiver = .false. 251 sender = .false. 252 253 if (np.gt.1) then 254 i = 0 255 j = np-1 256 done = .false. 257 if (i .gt. below) done = .true. 258 if (j .lt. above) done = .true. 259 do while (.not. done) 260 short = ave - int_mb(nwave2(1)+int_mb(indx(1)+i)) 261 long = int_mb(nwave2(1)+int_mb(indx(1)+j)) - ave 262 263 if (taskid.eq.int_mb(indx(1)+i)) then 264 npacket = npacket + 1 265 proc_from(npacket) = int_mb(indx(1)+j) 266 receiver = .true. 267 end if 268 269 if (taskid.eq.int_mb(indx(1)+j)) then 270 npacket = npacket + 1 271 proc_to(npacket) = int_mb(indx(1)+i) 272 sender = .true. 273 end if 274 275 276 if (short.eq.long) then 277 278 if (taskid.eq.int_mb(indx(1)+i)) then 279 packet_size(npacket) = short 280 indx_start(npacket) = 281 > int_mb(nwave2(1)+int_mb(indx(1)+i)) + 1 282 end if 283 284 if (taskid.eq.int_mb(indx(1)+j)) then 285 packet_size(npacket) = long 286 indx_start(npacket) = 287 > int_mb(nwave2(1)+int_mb(indx(1)+j)) - long + 1 288 end if 289 290 int_mb(nwave2(1)+int_mb(indx(1)+i)) = 291 > int_mb(nwave2(1)+int_mb(indx(1)+i)) + short 292 int_mb(nwave2(1)+int_mb(indx(1)+j)) = 293 > int_mb(nwave2(1)+int_mb(indx(1)+j)) - long 294 i = i + 1 295 j = j - 1 296 297 298 else if (short.lt.long) then 299 300 if (taskid.eq.int_mb(indx(1)+i)) then 301 packet_size(npacket) = short 302 indx_start(npacket) = 303 > int_mb(nwave2(1)+int_mb(indx(1)+i)) + 1 304 end if 305 306 if (taskid.eq.int_mb(indx(1)+j)) then 307 packet_size(npacket) = short 308 indx_start(npacket) = 309 > int_mb(nwave2(1)+int_mb(indx(1)+j)) - short + 1 310 end if 311 312 int_mb(nwave2(1)+int_mb(indx(1)+i)) = 313 > int_mb(nwave2(1)+int_mb(indx(1)+i)) + short 314 int_mb(nwave2(1)+int_mb(indx(1)+j)) = 315 > int_mb(nwave2(1)+int_mb(indx(1)+j)) - short 316 i = i + 1 317 318 319 else if (short.gt.long) then 320 if (taskid.eq.int_mb(indx(1)+i)) then 321 packet_size(npacket) = long 322 indx_start(npacket) = 323 > int_mb(nwave2(1)+int_mb(indx(1)+i)) + 1 324 end if 325 326 if (taskid.eq.int_mb(indx(1)+j)) then 327 packet_size(npacket) = long 328 indx_start(npacket) = 329 > int_mb(nwave2(1)+int_mb(indx(1)+j)) - long + 1 330 end if 331 332 int_mb(nwave2(1)+int_mb(indx(1)+i)) = 333 > int_mb(nwave2(1)+int_mb(indx(1)+i)) + long 334 int_mb(nwave2(1)+int_mb(indx(1)+j)) = 335 > int_mb(nwave2(1)+int_mb(indx(1)+j)) - long 336 j = j - 1 337 338 end if 339 340 if (i .gt. below) done = .true. 341 if (j .lt. above) done = .true. 342 343 end do 344 345 end if 346 347 nwave_out = int_mb(nwave2(1)+taskid) 348 349 value = BA_pop_stack(indx(2)) 350 value = value.and.BA_pop_stack(nwave2(2)) 351 if (.not. value) 352 > call errquit('Balance_init_a:error freeing stack memory',0, 353 & MA_ERR) 354 355 356 return 357 end 358 359 subroutine nwave2_sort(n,f,indx) 360 integer n 361 integer f(0:(n-1)) 362 integer indx(0:(n-1)) 363 364 integer i,j,idum 365 do i=0,n-1 366 indx(i) = i 367 end do 368 do i=0,(n-2) 369 do j=i+1,(n-1) 370 if (f(indx(j)).lt.f(indx(i))) then 371 idum = indx(i) 372 indx(i) = indx(j) 373 indx(j) = idum 374 end if 375 end do 376 end do 377 378 return 379 end 380 381* ************************************ 382* * * 383* * Balance_c_balance * 384* * * 385* ************************************ 386 387 subroutine Balance_c_balance(nb,A) 388 implicit none 389 integer nb 390 complex*16 A(*) 391 392#include "bafdecls.fh" 393#include "tcgmsg.fh" 394#include "msgtypesf.h" 395#include "balance_common.fh" 396 397* **** local variables **** 398 integer rcv_len,rcv_proc 399 integer j 400 integer pto,pfrom,msglen,indx 401 402* **** external functions **** 403 integer Parallel2d_convert_taskid_i 404 external Parallel2d_convert_taskid_i 405 406!$OMP MASTER 407 if (log_mb(sender_list(1)+nb)) then 408 do j=1,int_mb(npacket_list(1)+nb) 409 pto = int_mb(int_mb(proc_to_list(1) +2*nb)+j-1) 410 msglen = int_mb(int_mb(packet_size_list(1)+2*nb)+j-1) 411 indx = int_mb(int_mb(indx_start_list(1) +2*nb)+j-1) 412c send data.... 413 if (msglen.gt.0) then 414 call SND(9+MSGDBL, 415 > A(indx), 416 > mdtob(2*msglen), 417 > Parallel2d_convert_taskid_i(pto), 418 > 1) 419 end if 420 421 422 end do 423 end if 424 425 if (log_mb(receiver_list(1)+nb)) then 426 do j=1,int_mb(npacket_list(1)+nb) 427 pfrom = int_mb(int_mb(proc_from_list(1) +2*nb)+j-1) 428 msglen = int_mb(int_mb(packet_size_list(1)+2*nb)+j-1) 429 indx = int_mb(int_mb(indx_start_list(1) +2*nb)+j-1) 430c recieve data.... 431 if (msglen.gt.0) then 432 call RCV(9+MSGDBL, 433 > A(indx), 434 > mdtob(2*msglen),rcv_len, 435 > Parallel2d_convert_taskid_i(pfrom), 436 > rcv_proc,1) 437 end if 438 439 end do 440 end if 441!$OMP END MASTER 442!$OMP BARRIER 443 444 return 445 end 446 447 448* ************************************ 449* * * 450* * Balance_t_balance * 451* * * 452* ************************************ 453 454 subroutine Balance_t_balance(nb,A) 455 implicit none 456 integer nb 457 real*8 A(*) 458 459#include "bafdecls.fh" 460#include "tcgmsg.fh" 461#include "msgtypesf.h" 462#include "balance_common.fh" 463 464* **** local variables **** 465 integer rcv_len,rcv_proc 466 integer j 467 integer pto,pfrom,msglen,indx 468 469* **** external functions **** 470 integer Parallel2d_convert_taskid_i 471 external Parallel2d_convert_taskid_i 472 473!$OMP MASTER 474 if (log_mb(sender_list(1)+nb)) then 475 do j=1,int_mb(npacket_list(1)+nb) 476 pto = int_mb(int_mb(proc_to_list(1) +2*nb)+j-1) 477 msglen = int_mb(int_mb(packet_size_list(1)+2*nb)+j-1) 478 indx = int_mb(int_mb(indx_start_list(1) +2*nb)+j-1) 479c send data.... 480 if (msglen.gt.0) then 481 call SND(9+MSGDBL, 482 > A(indx), 483 > mdtob(msglen), 484 > Parallel2d_convert_taskid_i(pto), 485 > 1) 486 end if 487 488 489 end do 490 end if 491 492 if (log_mb(receiver_list(1)+nb)) then 493 do j=1,int_mb(npacket_list(1)+nb) 494 pfrom = int_mb(int_mb(proc_from_list(1) +2*nb)+j-1) 495 msglen = int_mb(int_mb(packet_size_list(1)+2*nb)+j-1) 496 indx = int_mb(int_mb(indx_start_list(1) +2*nb)+j-1) 497c recieve data.... 498 if (msglen.gt.0) then 499 call RCV(9+MSGDBL, 500 > A(indx), 501 > mdtob(msglen),rcv_len, 502 > Parallel2d_convert_taskid_i(pfrom), 503 > rcv_proc,1) 504 end if 505 506 507 end do 508 end if 509!$OMP END MASTER 510!$OMP BARRIER 511 512 return 513 end 514 515 516* ************************************ 517* * * 518* * Balance_c_unbalance * 519* * * 520* ************************************ 521 522 subroutine Balance_c_unbalance(nb,A) 523 implicit none 524 integer nb 525 complex*16 A(*) 526 527#include "bafdecls.fh" 528#include "tcgmsg.fh" 529#include "msgtypesf.h" 530#include "balance_common.fh" 531 532* **** local variables **** 533 integer rcv_len,rcv_proc 534 integer j 535 integer pto,pfrom,msglen,indx 536 537* **** external functions **** 538 integer Parallel2d_convert_taskid_i 539 external Parallel2d_convert_taskid_i 540 541!$OMP MASTER 542 if (log_mb(sender_list(1)+nb)) then 543 do j=1,int_mb(npacket_list(1)+nb) 544 pfrom = int_mb(int_mb(proc_to_list(1) +2*nb)+j-1) 545 msglen = int_mb(int_mb(packet_size_list(1)+2*nb)+j-1) 546 indx = int_mb(int_mb(indx_start_list(1) +2*nb)+j-1) 547c recieve data.... 548 if (msglen.gt.0) then 549 call RCV(9+MSGDBL, 550 > A(indx), 551 > mdtob(2*msglen),rcv_len, 552 > Parallel2d_convert_taskid_i(pfrom), 553 > rcv_proc,1) 554 end if 555 556 end do 557 end if 558 559 if (log_mb(receiver_list(1)+nb)) then 560 do j=1,int_mb(npacket_list(1)+nb) 561 pto = int_mb(int_mb(proc_from_list(1) +2*nb)+j-1) 562 msglen = int_mb(int_mb(packet_size_list(1)+2*nb)+j-1) 563 indx = int_mb(int_mb(indx_start_list(1) +2*nb)+j-1) 564c send data.... 565 if (msglen.gt.0) then 566 call SND(9+MSGDBL, 567 > A(indx), 568 > mdtob(2*msglen), 569 > Parallel2d_convert_taskid_i(pto),1) 570 end if 571 572 end do 573 end if 574!$OMP END MASTER 575!$OMP BARRIER 576 577 return 578 end 579 580 581* ************************************ 582* * * 583* * Balance_i_balance * 584* * * 585* ************************************ 586 587 subroutine Balance_i_balance(nb,A) 588 implicit none 589 integer nb 590 integer A(*) 591 592#include "bafdecls.fh" 593#include "tcgmsg.fh" 594#include "msgtypesf.h" 595#include "balance_common.fh" 596 597* **** local variables **** 598 integer rcv_len,rcv_proc 599 integer j 600 integer pto,pfrom,msglen,indx 601 602* **** external functions **** 603 integer Parallel2d_convert_taskid_i 604 external Parallel2d_convert_taskid_i 605 606 if (log_mb(sender_list(1)+nb)) then 607 do j=1,int_mb(npacket_list(1)+nb) 608 pto = int_mb(int_mb(proc_to_list(1) +2*nb)+j-1) 609 msglen = int_mb(int_mb(packet_size_list(1)+2*nb)+j-1) 610 indx = int_mb(int_mb(indx_start_list(1) +2*nb)+j-1) 611c send data.... 612 if (msglen.gt.0) then 613 call SND(9+MSGINT, 614 > A(indx), 615 > mitob(msglen), 616 > Parallel2d_convert_taskid_i(pto),1) 617 end if 618 619 620 end do 621 end if 622 623 if (log_mb(receiver_list(1)+nb)) then 624 do j=1,int_mb(npacket_list(1)+nb) 625 pfrom = int_mb(int_mb(proc_from_list(1) +2*nb)+j-1) 626 msglen = int_mb(int_mb(packet_size_list(1)+2*nb)+j-1) 627 indx = int_mb(int_mb(indx_start_list(1) +2*nb)+j-1) 628c recieve data.... 629 if (msglen.gt.0) then 630 call RCV(9+MSGINT, 631 > A(indx), 632 > mitob(msglen),rcv_len, 633 > Parallel2d_convert_taskid_i(pfrom), 634 > rcv_proc,1) 635 end if 636 637 end do 638 end if 639 640 return 641 end 642 643