1c $Id$ 2c vector boxes lack arithmetic precision 3#ifdef CRAY_YMP 4# define THRESH 1d-10 5# define THRESHF 1e-5 6#elif defined(FUJITSU) 7# define THRESH 1d-12 8# define THRESHF 1e-5 9#else 10# define THRESH 1d-13 11# define THRESHF 1e-5 12#endif 13 14#define MISMATCH(x,y) abs(x-y)/max(1d0,abs(x)).gt.THRESH 15#define MISMATCHF(x,y) abs(x-y)/max(1.0,abs(x)).gt.THRESHF 16 17 18 subroutine util_ga_test 19 implicit none 20#include "mafdecls.fh" 21#include "global.fh" 22#include "testutil.fh" 23 integer heap, stack, fudge, ma_heap, me, nproc 24 logical status 25 parameter (heap=100*100*4, fudge=100, stack=100*100) 26c 27c*** Intitialize a message passing library 28c 29#ifdef MPI 30c integer ierr 31c call mpi_init(ierr) 32#else 33c call pbeginf 34#endif 35c 36c*** Initialize GA 37c 38c There are 2 choices: ga_initialize or ga_initialize_ltd. 39c In the first case, there is no explicit limit on memory usage. 40c In the second, user can set limit (per processor) in bytes. 41c 42c call ga_initialize() 43 nproc = ga_nnodes() 44 me = ga_nodeid() 45c we can also use GA_set_memory_limit BEFORE first ga_create call 46c 47 ma_heap = heap/nproc + fudge 48c call GA_set_memory_limit(util_mdtob(ma_heap)) 49c 50c if(ga_nodeid().eq.0)then 51c print *,' GA initialized ' 52c call ffflush(6) 53c endif 54c 55c*** Initialize the MA package 56c MA must be initialized before any global array is allocated 57c 58c status = ma_init(MT_DCPL, stack, ma_heap) 59c if (.not. status) call ga_error('ma_init failed',-1) 60c 61c Uncomment the below line to register external memory allocator 62c for dynamic arrays inside GA routines. 63c call register_ext_memory() 64c 65c if(me.eq.(nproc-1))then 66c print *, 'using ', nproc,' process(es) ', ga_cluster_nnodes(), 67c $ ' cluster nodes' 68c print *,'process ', me, ' is on node ',ga_cluster_nodeid(), 69c $ ' with ', ga_cluster_nprocs(-1), ' processes' 70c call ffflush(6) 71c endif 72 if (me.eq.0) then 73 write(6,'(A,I3)') ' Number of processes ..............',nproc 74 write(6,'(A,I3)') ' Number of cluster nodes ..........', 75 1 ga_cluster_nnodes() 76 call ffflush(6) 77 endif 78 call ga_sync() 79c 80c*** Check support for double precision arrays 81c 82 if (me.eq.0) then 83 write(6,*) 84 write(6,'(A)') ' Checking doubles ' 85 write(6,*) 86 call ffflush(6) 87 endif 88 89 call check_dbl() 90c 91c*** Check support for double precision complex arrays 92c 93 if (me.eq.0) then 94 write(6,*) 95 write(6,'(A)') ' Checking double complexes' 96 write(6,*) 97 call ffflush(6) 98 endif 99 100 call check_complex() 101c 102c*** Check support for integer arrays 103c 104 if (me.eq.0) then 105 write(6,*) 106 write(6,'(A)') ' Checking integers ' 107 write(6,*) 108 call ffflush(6) 109 endif 110 111 call check_int() 112c 113c 114c*** Check support for single precision 115c 116 if (me.eq.0) then 117 write(6,*) 118 write(6,'(A)') ' Checking single precisions ' 119 write(6,*) 120 call ffflush(6) 121 endif 122 123 call check_flt() 124c 125 if (me.eq.0) then 126 write(6,*) 127 write(6,'(A)')' Checking wrappers to MP collective operations' 128 write(6,*) 129 call ffflush(6) 130 endif 131 132 call check_wrappers 133c 134c*** Check if memory limits are enforced 135c 136c if(ga_memory_limited()) 137c 1 call check_mem 138c 139c if(me.eq.0) call ga_print_stats() 140c if(me.eq.0) print *,' ' 141c if(me.eq.0) print *,'All tests succesful ' 142c 143c*** Tidy up the GA package 144c 145c call ga_terminate() 146c 147c*** Tidy up after message-passing library 148c 149#ifdef MPI 150c call mpi_finalize(ierr) 151#else 152c call pend() 153#endif 154c 155 end 156 157 158 subroutine check_dbl() 159 implicit none 160#include "mafdecls.fh" 161#include "global.fh" 162#include "testutil.fh" 163c 164 integer n,m 165 parameter (n = 128) 166 parameter (m = 2*n) 167 double precision a(n,n), b(n,n), v(m),w(m) 168 integer iv(m), jv(m) 169 logical status 170 integer g_a, g_b 171 integer i,j, loop,nloop,maxloop, ilo, ihi, jlo, jhi, itmp 172 integer nproc, me, ij, inc, ii, jj 173 parameter (maxloop = 100) 174 integer maxproc 175 parameter (maxproc = 128) 176 double precision crap, sum1, sum2, x 177 double precision nwords 178 integer iran 179 external iran 180c 181 nproc = ga_nnodes() 182 me = ga_nodeid() 183 nloop = Min(maxloop,n) 184c 185c a() is a local copy of what the global array should start as 186c 187 do j = 1, n 188 do i = 1, n 189 a(i,j) = i-1 + (j-1)*n 190 b(i,j) =-1. 191 enddo 192 enddo 193* write(6,*) ' correct ' 194* call output(a, 1, n, 1, n, n, n, 1) 195* call ffflush(6) 196c 197c Create a global array 198c 199* print *,ga_nodeid(), ' creating array' 200* call ffflush(6) 201c call setdbg(1) 202 status = ga_create(MT_DBL, n, n, 'a', 0, 0, g_a) 203 if (me.eq.0) then 204 if (status) then 205 write(6,'(A)') ' ga_create ........................ OK' 206 else 207 write(6,'(A)') ' ga_create ........................ Failed' 208 stop 209 endif 210 call ffflush(6) 211 endif 212c 213c check if handle is valid. be quiet unless error 214C 215 if(.not.ga_valid_handle(g_a)) call ga_error("invalid handle",g_a) 216c 217 call ga_distribution(g_a,me,ilo, ihi, jlo, jhi) 218 call ga_sync() 219c 220c Zero the array 221c 222 call ga_zero(g_a) 223c 224c Check that it is indeed zero 225c 226 status = .true. 227 call ga_get(g_a, 1, n, 1, n, b, n) 228 call ga_sync() 229 do i = 1, n 230 do j = 1, n 231 if (b(i,j) .ne. 0.0d0) then 232 status = .false. 233 endif 234 enddo 235 enddo 236 if (me.eq.0) then 237 if (status) then 238 write(6,'(A)') ' ga_zero .......................... OK' 239 else 240 write(6,'(A)') ' ga_zero .......................... Failed' 241 endif 242 call ffflush(6) 243 endif 244 call ga_sync() 245c 246c Each node fills in disjoint sections of the array 247c 248 call ga_sync() 249c 250 status = .true. 251 inc = (n-1)/20 + 1 252 ij = 0 253 do j = 1, n, inc 254 do i = 1, n, inc 255 if (mod(ij,nproc) .eq. me) then 256 ilo = i 257 ihi = min(i+inc, n) 258 jlo = j 259 jhi = min(j+inc, n) 260* write(6,4) me, ilo, ihi, jlo, jhi 261* 4 format(' node ',i2,' checking put ',4i4) 262* call ffflush(6) 263 call ga_put(g_a, ilo, ihi, jlo, jhi, a(ilo, jlo), n) 264 endif 265 ij = ij + 1 266 enddo 267 enddo 268 call ga_sync() 269c 270c All nodes check all of a 271c 272 call util_dfill(n*n, 0.0d0, b, 1) 273* call ga_print(g_a,1) 274 call ga_get(g_a, 1, n, 1, n, b, n) 275* write(6,*) ' after get' 276* call output(b, 1, n, 1, n, n, n, 1) 277c 278 do i = 1, n 279 do j = 1, n 280 if (b(i,j) .ne. a(i,j)) then 281 status = .false. 282 endif 283 enddo 284 enddo 285 if (me.eq.0) then 286 if (status) then 287 write(6,'(A)') ' ga_put ........................... OK' 288 else 289 write(6,'(A)') ' ga_put ........................... Failed' 290 endif 291 call ffflush(6) 292 endif 293 call ga_sync() 294c 295c Now check nloop random gets from each node 296c 297 call ga_sync() 298c 299 nwords = 0 300c 301 status = .true. 302 crap = util_drand(ga_nodeid()*51 + 1) ! Different seed for each process 303 do loop = 1, nloop 304 ilo = iran(loop) 305 ihi = iran(loop) 306 if (ihi.lt. ilo) then 307 itmp = ihi 308 ihi = ilo 309 ilo = itmp 310 endif 311 jlo = iran(loop) 312 jhi = iran(loop) 313 if (jhi.lt. jlo) then 314 itmp = jhi 315 jhi = jlo 316 jlo = itmp 317 endif 318c 319 nwords = nwords + (ihi-ilo+1)*(jhi-jlo+1) 320c 321 call util_dfill(n*n, 0.0d0, b, 1) 322 call ga_get(g_a, ilo, ihi, jlo, jhi, b(ilo, jlo), n) 323 sum1 = 0.0d0 324 do j = jlo, jhi 325 do i = ilo, ihi 326 sum1 = sum1 + b(i,j) 327 if (b(i,j) .ne. a(i,j)) then 328 status = .false. 329 endif 330 enddo 331 enddo 332c 333 enddo 334 if (me.eq.0) then 335 if (status) then 336 write(6,'(A)') ' ga_get ........................... OK' 337 else 338 write(6,'(A)') ' ga_get ........................... Failed' 339 endif 340 call ffflush(6) 341 endif 342 call ga_sync() 343c 344c Each node accumulates into disjoint sections of the array 345c 346 call ga_sync() 347c 348 crap = util_drand(12345) ! Same seed for each process 349 do j = 1, n 350 do i = 1, n 351c b(i,j) = util_drand(0) 352 b(i,j) = i+j 353 enddo 354 enddo 355c 356 inc = (n-1)/20 + 1 357 ij = 0 358 do j = 1, n, inc 359 do i = 1, n, inc 360c x = util_drand(0) 361 x = 10. 362 ilo = i 363 ihi = min(i+inc-1, n) 364 if(ihi.eq.n-1)ihi=n 365c ihi = min(i+inc, n) 366 jlo = j 367 jhi = min(j+inc-1, n) 368 if(jhi.eq.n-1)jhi=n 369c jhi = min(j+inc-1, n) 370* call ffflush(6) 371 if (mod(ij,nproc) .eq. me) then 372c print *, me, 'checking accumulate ',ilo,ihi,jlo,jhi,x 373* 11 format(' node ',i2,' checking accumulate ',4i4) 374* call ffflush(6) 375 call ga_acc(g_a, ilo, ihi, jlo, jhi, b(ilo, jlo), n, x) 376 endif 377 ij = ij + 1 378c 379c Each process applies all updates to its local copy 380c 381 do jj = jlo, jhi 382 do ii = ilo, ihi 383 a(ii,jj) = a(ii,jj) + x * b(ii,jj) 384 enddo 385 enddo 386 enddo 387 enddo 388 call ga_sync() 389c 390c All nodes check all of a 391c 392 status = .true. 393 call ga_get(g_a, 1, n, 1, n, b, n) 394 do j = 1, n 395 do i = 1, n 396 if(MISMATCH(b(i,j),a(i,j)))then 397 status = .false. 398 endif 399 enddo 400 enddo 401 if (me.eq.0) then 402 if (status) then 403 write(6,'(A)') ' ga_acc (disjoint) ................ OK' 404 else 405 write(6,'(A)') ' ga_acc (disjoint) ................ Failed' 406 endif 407 call ffflush(6) 408 endif 409c 410c overlapping accumulate 411 status = .true. 412 call ga_sync() 413 if (.not. ga_create(MT_DBL, n, n, 'b', 0, 0, g_b)) then 414 status = .false. 415 endif 416c 417 call ga_zero(g_b) 418 call ga_acc(g_b, n/2, n/2, n/2, n/2, 1d0, 1, 1d0) 419 call ga_sync() 420 if (me.eq.0) then 421 call ga_get(g_b, n/2, n/2, n/2, n/2, b(1,1), 1) 422 x = abs(b(1,1) -1d0*nproc) 423 if(x.gt. 1d-10)then 424 status = .false. 425 endif 426 endif 427 if (me.eq.0) then 428 if (status) then 429 write(6,'(A)') ' ga_acc (overlap) ................. OK' 430 else 431 write(6,'(A)') ' ga_acc (overlap) ................. Failed' 432 endif 433 call ffflush(6) 434 endif 435c 436c Check the ga_add function 437c 438 crap = util_drand(12345) ! Everyone has same seed 439 do j = 1, n 440 do i = 1, n 441 b(i,j) = util_drand(0) 442 a(i,j) = 0.1d0*a(i,j) + 0.9d0*b(i,j) 443 enddo 444 enddo 445 446 status = .true. 447 if (me.eq.0) call ga_put(g_b, 1, n, 1, n, b, n) 448 call ga_add(0.1d0, g_a, 0.9d0, g_b, g_b) 449 call ga_get(g_b, 1, n, 1, n, b, n) 450 do j = 1, n 451 do i = 1, n 452 if(MISMATCH(b(i,j), a(i,j)))then 453 status = .false. 454 endif 455 enddo 456 enddo 457 if (me.eq.0) then 458 if (status) then 459 write(6,'(A)') ' ga_add ........................... OK' 460 else 461 write(6,'(A)') ' ga_add ........................... Failed' 462 endif 463 call ffflush(6) 464 endif 465 call ga_sync() 466c 467c Check the ddot function 468c 469 crap = util_drand(12345) ! Everyone has same seed 470 sum1 = 0.0d0 471 do j = 1, n 472 do i = 1, n 473 b(i,j) = util_drand(0) 474 sum1 = sum1 + a(i,j)*b(i,j) 475 enddo 476 enddo 477 if (me.eq.0) then 478 call ga_put(g_b, 1, n, 1, n, b, n) 479 call ga_put(g_a, 1, n, 1, n, a, n) 480 endif 481 call ga_sync() 482 sum2 = ga_ddot(g_a,g_b) 483 status = .true. 484 if(MISMATCH(sum1, sum2))then 485 status = .false. 486 endif 487 if (me.eq.0) then 488 if (status) then 489 write(6,'(A)') ' ga_ddot .......................... OK' 490 else 491 write(6,'(A)') ' ga_ddot .......................... Failed' 492 endif 493 call ffflush(6) 494 endif 495c 496c Check the ga_scale function 497c 498 call ga_scale(g_a, 0.123d0) 499 call ga_get(g_a, 1, n, 1, n, b, n) 500 status = .true. 501 do j = 1, n 502 do i = 1, n 503 a(i,j) = a(i,j)*0.123d0 504 if (MISMATCH(b(i,j), a(i,j)))then 505 status = .false. 506 endif 507 enddo 508 enddo 509 if (me.eq.0) then 510 if (status) then 511 write(6,'(A)') ' ga_scale ......................... OK' 512 else 513 write(6,'(A)') ' ga_scale ......................... Failed' 514 endif 515 call ffflush(6) 516 endif 517c 518c Check the ga_copy function 519c 520 if(me.eq.0) call ga_put(g_a, 1, n, 1, n, a, n) 521 call ga_copy(g_a, g_b) 522 call ga_get(g_b, 1, n, 1, n, b, n) 523 status = .true. 524 do j = 1, n 525 do i = 1, n 526 if (b(i,j) .ne. a(i,j)) then 527 status = .false. 528 endif 529 enddo 530 enddo 531 if (me.eq.0) then 532 if (status) then 533 write(6,'(A)') ' ga_copy .......................... OK' 534 else 535 write(6,'(A)') ' ga_copy .......................... Failed' 536 endif 537 call ffflush(6) 538 endif 539c 540 call ga_sync() 541c 542 crap = util_drand(ga_nodeid()*51 + 1) ! Different seed for each process 543 status = .true. 544 do j = 1, 10 545 call ga_sync() 546 itmp = iran(nproc)-1 547 if(me.eq.itmp) then 548 do loop = 1,m 549 ilo = iran(n) 550 jlo = iran(n) 551 iv(loop) = ilo 552 jv(loop) = jlo 553 enddo 554 call ga_gather(g_a, v, iv, jv, m) 555 do loop = 1,m 556 ilo= iv(loop) 557 jlo= jv(loop) 558 call ga_get(g_a,ilo,ilo,jlo,jlo,v(loop),1) 559 if(v(loop) .ne. a(ilo,jlo))then 560 status = .false. 561 endif 562 enddo 563 endif 564 enddo 565c 566 if (me.eq.0) then 567 if (status) then 568 write(6,'(A)') ' ga_gather ........................ OK' 569 else 570 write(6,'(A)') ' ga_gather ........................ Failed' 571 endif 572 call ffflush(6) 573 endif 574c 575 status = .true. 576 do j = 1,10 577 call ga_sync() 578 if(me.eq.iran(ga_nnodes())-1) then 579 do loop = 1,m 580 ilo = iran(n) 581 jlo = iran(n) 582 iv(loop) = ilo 583 jv(loop) = jlo 584c v(loop) = DSIN(a(ilo,jlo)+b(ilo,jlo)) 585 v(loop) = 1d0 *(ilo+jlo) 586 enddo 587 call ga_scatter(g_a, v, iv, jv, m) 588 do loop = 1,m 589 ilo= iv(loop) 590 jlo= jv(loop) 591 call ga_get(g_a,ilo,ilo,jlo,jlo,w(loop),1) 592c if(v(loop) .ne. w(loop))then 593 if(w(loop) .ne. 1d0 *(ilo+jlo) )then 594 status = .false. 595 endif 596 enddo 597 endif 598 call ga_sync() 599 enddo 600c 601 if (me.eq.0) then 602 if (status) then 603 write(6,'(A)') ' ga_scatter ....................... OK' 604 else 605 write(6,'(A)') ' ga_scatter ....................... Failed' 606 endif 607 call ffflush(6) 608 endif 609c 610 call ga_sync() 611c 612c scatter-acc available in GA ver. 3.0 613#ifdef GA3 614c 615 crap = util_drand(1234) 616 call ga_zero(g_a) 617c 618 do j = 1, n 619 do i = 1, n 620 b(i,j) =0. 621 enddo 622 enddo 623c 624 status = .true. 625 x = .1d0 626 ii =n 627 do jj = 1,1 628 call ga_sync() 629 do loop = 1, ii 630 631c generate unique i,j pairs 63210 continue 633 i = iran(n) 634 j=iran(n) 635 if (found(i,j, iv, jv, loop-1) ) goto 10 636 637 iv(loop) = i 638 jv(loop) = j 639 v(loop) = 1d0 *(i+j) 640 b(i,j) = b(i,j) + nproc*x*v(loop) ! update local ref. array 641 enddo 642 call ga_scatter_acc(g_a,v,iv,jv, ii,x) 643c 644 call ga_sync() 645c 646c check the result 647c 648 call ga_get(g_a, 1, n, 1,n, a, n) 649 650 do loop = 1,ii 651 i = iv(loop) 652 j = jv(loop) 653 if(MISMATCH(a(i,j),b(i,j)))then 654 status = .false. 655* if(me.eq.0)then 656* do ii=1,loop 657* print *,'element',ii, iv(ii),jv(ii) 658* enddo 659* endif 660 status = .false. 661 endif 662 enddo 663 call ga_sync() 664 enddo 665 666 call ga_sync() 667 if (me.eq.0) then 668 if (status) then 669 write(6,'(A)') ' ga_scatter_acc ................... OK' 670 else 671 write(6,'(A)') ' ga_scatter_acc ................... Failed' 672 endif 673 call ffflush(6) 674 endif 675#endif 676c 677c Delete the global arrays 678c 679 status = ga_destroy(g_b) 680 status = status .and. ga_destroy(g_a) 681 if (me.eq.0) then 682 if (status) then 683 write(6,'(A)') ' ga_destroy ....................... OK' 684 else 685 write(6,'(A)') ' ga_destroy ....................... Failed' 686 endif 687 call ffflush(6) 688 endif 689c 690 end 691 692c----------------------------------------------------------------- 693 subroutine check_complex() 694 implicit none 695#include "mafdecls.fh" 696#include "global.fh" 697#include "testutil.fh" 698c 699 integer n,m 700 parameter (n = 60) 701 parameter (m = 2*n) 702 double complex a(n,n), b(n,n), v(m),w(m) 703 integer iv(m), jv(m) 704 logical status 705 integer g_a, g_b 706 integer i,j, loop,nloop,maxloop, ilo, ihi, jlo, jhi, itmp 707 integer nproc, me, ij, inc, ii, jj 708 parameter (maxloop = 100) 709 integer maxproc 710 parameter (maxproc = 128) 711 double precision crap 712 double precision nwords 713 double complex x, sum1, sum2, factor 714 integer iran 715 external iran 716c 717 nproc = ga_nnodes() 718 me = ga_nodeid() 719 nloop = Min(maxloop,n) 720c 721c a() is a local copy of what the global array should start as 722c 723 do j = 1, n 724 do i = 1, n 725 a(i,j) = cmplx(dble(i-1), dble((j-1)*n)) 726 b(i,j) = cmplx(-1d0,1d0) 727 enddo 728 enddo 729c 730c Create a global array 731c 732c print *,ga_nodeid(), ' creating array' 733 call ffflush(6) 734c call setdbg(1) 735 status = ga_create(MT_DCPL, n, n, 'a', 0, 0, g_a) 736 status = status .and. ga_create(MT_DCPL, n, n, 'b', 0, 0, g_b) 737 if (me.eq.0) then 738 if (status) then 739 write(6,'(A)') ' ga_create ........................ OK' 740 else 741 write(6,'(A)') ' ga_create ........................ Failed' 742 endif 743 call ffflush(6) 744 endif 745 call ga_distribution(g_a,me,ilo, ihi, jlo, jhi) 746 call ga_sync() 747c 748c Zero the array 749c 750 call ga_zero(g_a) 751c 752c Check that it is indeed zero 753c 754 call ga_get(g_a, 1, n, 1, n, b, n) 755 call ga_sync() 756 status = .true. 757 do i = 1, n 758 do j = 1, n 759 if(b(i,j).ne.(0d0,0d0)) then 760 status = .false. 761 endif 762 enddo 763 enddo 764 if (me.eq.0) then 765 if (status) then 766 write(6,'(A)') ' ga_zero .......................... OK' 767 else 768 write(6,'(A)') ' ga_zero .......................... Failed' 769 endif 770 call ffflush(6) 771 endif 772 call ga_sync() 773c 774c Each node fills in disjoint sections of the array 775c 776 call ga_sync() 777c 778 inc = (n-1)/20 + 1 779 ij = 0 780 do j = 1, n, inc 781 do i = 1, n, inc 782 if (mod(ij,nproc) .eq. me) then 783 ilo = i 784 ihi = min(i+inc, n) 785 jlo = j 786 jhi = min(j+inc, n) 787 call ga_put(g_a, ilo, ihi, jlo, jhi, a(ilo, jlo), n) 788 endif 789 ij = ij + 1 790 enddo 791 enddo 792 call ga_sync() 793c 794c All nodes check all of a 795c 796 call util_qfill(n*n, (0d0,0d0), b, 1) 797 call ga_get(g_a, 1, n, 1, n, b, n) 798c 799 status = .true. 800 do i = 1, n 801 do j = 1, n 802 if (b(i,j) .ne. a(i,j)) then 803 status = .false. 804 endif 805 enddo 806 enddo 807 if (me.eq.0) then 808 if (status) then 809 write(6,'(A)') ' ga_put ........................... OK' 810 else 811 write(6,'(A)') ' ga_put ........................... Failed' 812 endif 813 call ffflush(6) 814 endif 815 call ga_sync() 816c 817c Now check nloop random gets from each node 818c 819 call ga_sync() 820c 821 nwords = 0 822c 823 status = .true. 824 crap = util_drand(ga_nodeid()*51 + 1) ! Different seed for each process 825 do loop = 1, nloop 826 ilo = iran(loop) 827 ihi = iran(loop) 828 if (ihi.lt. ilo) then 829 itmp = ihi 830 ihi = ilo 831 ilo = itmp 832 endif 833 jlo = iran(loop) 834 jhi = iran(loop) 835 if (jhi.lt. jlo) then 836 itmp = jhi 837 jhi = jlo 838 jlo = itmp 839 endif 840c 841 nwords = nwords + (ihi-ilo+1)*(jhi-jlo+1) 842c 843 call util_qfill(n*n, (0.0d0,0d0), b, 1) 844 call ga_get(g_a, ilo, ihi, jlo, jhi, b(ilo, jlo), n) 845 do j = jlo, jhi 846 do i = ilo, ihi 847 if (b(i,j) .ne. a(i,j)) then 848 status = .false. 849 endif 850 enddo 851 enddo 852c 853 enddo 854 if (me.eq.0) then 855 if (status) then 856 write(6,'(A)') ' ga_get ........................... OK' 857 else 858 write(6,'(A)') ' ga_get ........................... Failed' 859 endif 860 call ffflush(6) 861 endif 862 call ga_sync() 863c 864c Each node accumulates into disjoint sections of the array 865c 866 call ga_sync() 867c 868 status = .true. 869 crap = util_drand(12345) ! Same seed for each process 870 do j = 1, n 871 do i = 1, n 872 b(i,j) = cmplx(util_drand(0),util_drand(1)) 873 enddo 874 enddo 875c 876 inc = (n-1)/20 + 1 877 ij = 0 878 do j = 1, n, inc 879 do i = 1, n, inc 880c x = cmplx(util_drand(0),0.333d0) 881c x = cmplx(0.333d0,0) 882* x = cmplx(0d0,0d0) 883 x = 0 884 ilo = i 885 ihi = min(i+inc-1, n) 886 if(ihi.eq.n-1)ihi=n 887 jlo = j 888 jhi = min(j+inc-1, n) 889 if(jhi.eq.n-1)jhi=n 890 if (mod(ij,nproc) .eq. me) then 891 call ga_acc(g_a, ilo, ihi, jlo, jhi, b(ilo, jlo), n, x) 892 endif 893 ij = ij + 1 894c 895c Each process applies all updates to its local copy 896c 897 do jj = jlo, jhi 898 do ii = ilo, ihi 899 a(ii,jj) = a(ii,jj) + x * b(ii,jj) 900 enddo 901 enddo 902 enddo 903 enddo 904 call ga_sync() 905c 906c All nodes check all of a 907c 908 call ga_get(g_a, 1, n, 1, n, b, n) 909 do j = 1, n 910 do i = 1, n 911 if (MISMATCH(b(i,j), a(i,j)))then 912 status = .false. 913 endif 914 enddo 915 enddo 916 if (me.eq.0) then 917 if (status) then 918 write(6,'(A)') ' ga_acc (disjoint) ................ OK' 919 else 920 write(6,'(A)') ' ga_acc (disjoint) ................ Failed' 921 endif 922 call ffflush(6) 923 endif 924c 925c overlapping accumulate 926c 927 call ga_zero(g_b) 928 call ga_acc(g_b, n/2, n/2, n/2, n/2, (1d0,-1d0), 1, (1d0,0d0)) 929 call ga_sync() 930 status = .true. 931 if (me.eq.0) then 932 call ga_get(g_b, n/2, n/2, n/2, n/2, x, 1) 933c error = abs(x -(1d0,-1d0)*nproc) 934 if (MISMATCH(x, ((1d0,-1d0)*nproc)))then 935c if(error.gt. (1d-8))then 936 status = .false. 937 endif 938 endif 939 if (me.eq.0) then 940 if (status) then 941 write(6,'(A)') ' ga_acc (overlap) ................. OK' 942 else 943 write(6,'(A)') ' ga_acc (overlap) ................. Failed' 944 endif 945 call ffflush(6) 946 endif 947c 948c Check the ga_copy function 949c 950 status = .true. 951 call ga_sync() 952 if(me.eq.0) call ga_put(g_a, 1, n, 1, n, a, n) 953 call ga_copy(g_a, g_b) 954 call ga_get(g_b, 1, n, 1, n, b, n) 955 do j = 1, n 956 do i = 1, n 957 if (b(i,j) .ne. a(i,j)) then 958 status = .false. 959 endif 960 enddo 961 enddo 962 if (me.eq.0) then 963 if (status) then 964 write(6,'(A)') ' ga_copy .......................... OK' 965 else 966 write(6,'(A)') ' ga_copy .......................... Failed' 967 endif 968 call ffflush(6) 969 endif 970c 971c 972c Check the ga_scale function 973c 974 factor = (1d0,-1d0) 975 call ga_scale(g_a, factor) 976 call ga_get(g_a, 1, n, 1, n, b, n) 977 status = .true. 978 do j = 1, n 979 do i = 1, n 980 a(i,j) = a(i,j)*factor 981 if (MISMATCH(b(i,j), a(i,j)))then 982 status = .false. 983 endif 984 enddo 985 enddo 986 if (me.eq.0) then 987 if (status) then 988 write(6,'(A)') ' ga_scale ......................... OK' 989 else 990 write(6,'(A)') ' ga_scale ......................... Failed' 991 endif 992 call ffflush(6) 993 endif 994c 995c Check scatter&gather 996c 997 call ga_sync() 998 if(me.eq.0) call ga_put(g_a, 1, n, 1, n, a, n) 999c 1000 status = .true. 1001 crap = util_drand(ga_nodeid()*51 + 1) ! Different seed for each process 1002 do j = 1, 10 1003 call ga_sync() 1004 itmp = iran(nproc)-1 1005 if(me.eq.itmp) then 1006 do loop = 1,m 1007 ilo = iran(n) 1008 jlo = iran(n) 1009 iv(loop) = ilo 1010 jv(loop) = jlo 1011 enddo 1012 call ga_gather(g_a, v, iv, jv, m) 1013 do loop = 1,m 1014 ilo= iv(loop) 1015 jlo= jv(loop) 1016 call ga_get(g_a,ilo,ilo,jlo,jlo,v(loop),1) 1017 if(v(loop) .ne. a(ilo,jlo))then 1018 status = .false. 1019 endif 1020 enddo 1021 endif 1022 enddo 1023c 1024 if (me.eq.0) then 1025 if (status) then 1026 write(6,'(A)') ' ga_gather ........................ OK' 1027 else 1028 write(6,'(A)') ' ga_gather ........................ Failed' 1029 endif 1030 call ffflush(6) 1031 endif 1032c 1033 status = .true. 1034 do j = 1,10 1035 call ga_sync() 1036 if(me.eq.iran(ga_nnodes())-1) then 1037 do loop = 1,m 1038 ilo = iran(n) 1039 jlo = iran(n) 1040 iv(loop) = ilo 1041 jv(loop) = jlo 1042 v(loop) = (1d0,-1d0) *(ilo+jlo) 1043 enddo 1044 call ga_scatter(g_a, v, iv, jv, m) 1045 do loop = 1,m 1046 ilo= iv(loop) 1047 jlo= jv(loop) 1048 call ga_get(g_a,ilo,ilo,jlo,jlo,w(loop),1) 1049 if(w(loop) .ne. (1d0,-1d0) *(ilo+jlo) )then 1050 status = .false. 1051 endif 1052 enddo 1053 endif 1054 call ga_sync() 1055 enddo 1056c 1057 if (me.eq.0) then 1058 if (status) then 1059 write(6,'(A)') ' ga_scatter ....................... OK' 1060 else 1061 write(6,'(A)') ' ga_scatter ....................... Failed' 1062 endif 1063 call ffflush(6) 1064 endif 1065c 1066c Check ga_add 1067c 1068 call ga_get(g_a, 1, n, 1, n, a, n) 1069 crap = util_drand(12345) ! Everyone has same seed 1070 do j = 1, n 1071 do i = 1, n 1072 b(i,j) = cmplx(util_drand(0), util_drand(1)) 1073 a(i,j) = (0.1d0,-.1d0)*a(i,j) + (.9d0,-.9d0)*b(i,j) 1074 enddo 1075 enddo 1076 status = .true. 1077 if (me.eq.0) call ga_put(g_b, 1, n, 1, n, b, n) 1078 call ga_add((0.1d0,-.1d0), g_a, (0.9d0,-.9d0), g_b, g_b) 1079 call ga_get(g_b, 1, n, 1, n, b, n) 1080 do j = 1, n 1081 do i = 1, n 1082 if (MISMATCH(b(i,j), a(i,j)))then 1083 status = .false. 1084 endif 1085 enddo 1086 enddo 1087 if (me.eq.0) then 1088 if (status) then 1089 write(6,'(A)') ' ga_add ........................... OK' 1090 else 1091 write(6,'(A)') ' ga_add ........................... Failed' 1092 endif 1093 call ffflush(6) 1094 endif 1095 call ga_sync() 1096c 1097c Check the zdot function 1098c 1099 crap = util_drand(12345) ! Everyone has same seed 1100 sum1 = (0.0d0,0.d0) 1101 do j = 1, n 1102 do i = 1, n 1103 b(i,j) = cmplx(util_drand(0), util_drand(1)) 1104 sum1 = sum1 + a(i,j)*b(i,j) 1105 enddo 1106 enddo 1107 if (me.eq.0) then 1108 call ga_put(g_b, 1, n, 1, n, b, n) 1109 call ga_put(g_a, 1, n, 1, n, a, n) 1110 endif 1111 call ga_sync() 1112 sum2 = ga_zdot(g_a,g_b) 1113 status = .true. 1114 if (MISMATCH(sum1, sum2))then 1115 status = .false. 1116 endif 1117 if (me.eq.0) then 1118 if (status) then 1119 write(6,'(A)') ' ga_zdot .......................... OK' 1120 else 1121 write(6,'(A)') ' ga_zdot .......................... Failed' 1122 endif 1123 call ffflush(6) 1124 endif 1125c 1126c Delete the global arrays 1127c 1128 status = ga_destroy(g_b) 1129 status = status .and. ga_destroy(g_a) 1130 if (me.eq.0) then 1131 if (status) then 1132 write(6,'(A)') ' ga_destroy ....................... OK' 1133 else 1134 write(6,'(A)') ' ga_destroy ....................... Failed' 1135 endif 1136 call ffflush(6) 1137 endif 1138c 1139 end 1140c----------------------------------------------------------------- 1141 1142 1143 1144 1145 subroutine check_int() 1146 implicit none 1147#include "mafdecls.fh" 1148#include "global.fh" 1149#include "testutil.fh" 1150c 1151 integer n 1152 parameter (n = 128) 1153 integer a(n,n), b(n,n) 1154 logical status 1155 integer g_a 1156 integer i, j, loop, nloop, ilo, ihi, jlo, jhi, itmp 1157 integer nproc, me, ij, inc, dimi,dimj,iproc, ii, jj 1158 double precision nwords 1159 parameter (nloop = 100) 1160 integer maxproc 1161 parameter (maxproc = 128) 1162 integer map(5,maxproc), found, np,k 1163 double precision crap, sum1 1164 integer buf 1165 integer iran 1166 external iran 1167c 1168 nproc = ga_nnodes() 1169 me = ga_nodeid() 1170c 1171c a() is a local copy of what the global array should start as 1172c 1173 do j = 1, n 1174 do i = 1, n 1175 a(i,j) = i-1 + (j-1)*1000 1176 enddo 1177 enddo 1178c 1179c Create a global array 1180c 1181 status = ga_create(MT_INT, n, n, 'a', 0, 0, g_a) 1182 if (me.eq.0) then 1183 if (status) then 1184 write(6,'(A)') ' ga_create ........................ OK' 1185 else 1186 write(6,'(A)') ' ga_create ........................ Failed' 1187 endif 1188 call ffflush(6) 1189 endif 1190c 1191c Zero the array 1192c 1193 call ga_zero(g_a) 1194c 1195c Check that it is indeed zero 1196c 1197 status = .true. 1198 call ga_get(g_a, 1, n, 1, n, b, n) 1199 do i = 1, n 1200 do j = 1, n 1201 if (b(i,j) .ne. 0) then 1202 status = .false. 1203 endif 1204 enddo 1205 enddo 1206 if (me.eq.0) then 1207 if (status) then 1208 write(6,'(A)') ' ga_zero .......................... OK' 1209 else 1210 write(6,'(A)') ' ga_zero .......................... Failed' 1211 endif 1212 call ffflush(6) 1213 endif 1214 call ga_sync() 1215c 1216c Each node fills in disjoint sections of the array 1217c 1218 call ga_sync() 1219c 1220 inc = (n-1)/20 + 1 1221 ij = 0 1222 do j = 1, n, inc 1223 do i = 1, n, inc 1224 if (mod(ij,nproc) .eq. me) then 1225 ilo = i 1226 ihi = min(i+inc, n) 1227 jlo = j 1228 jhi = min(j+inc, n) 1229c write(6,4) me, ilo, ihi, jlo, jhi 1230c4 format(' node ',i2,' checking put ',4i4) 1231c call ffflush(6) 1232 call ga_put(g_a, ilo, ihi, jlo, jhi, a(ilo, jlo), n) 1233 endif 1234 ij = ij + 1 1235 enddo 1236 enddo 1237 call ga_sync() 1238c 1239c All nodes check all of a 1240c 1241 status = .true. 1242 if(me.eq.0)then 1243 call ga_get(g_a, 1, n, 1, n, b, n) 1244 do i = 1, n 1245 do j = 1, n 1246 if (b(i,j) .ne. a(i,j)) then 1247 status = .false. 1248 endif 1249 enddo 1250 enddo 1251 endif 1252 call ga_sync() 1253c 1254 if (me.eq.0) then 1255 if (status) then 1256 write(6,'(A)') ' ga_put ........................... OK' 1257 else 1258 write(6,'(A)') ' ga_put ........................... Failed' 1259 endif 1260 call ffflush(6) 1261 endif 1262c 1263c Now check nloop random gets from each node 1264c 1265 call ga_sync() 1266c 1267 nwords = 0 1268c 1269 status = .true. 1270 crap = util_drand(ga_nodeid()*51 + 1) ! Different seed for each process 1271 do loop = 1, nloop 1272 ilo = iran(loop) 1273 ihi = iran(loop) 1274 if (ihi.lt. ilo) then 1275 itmp = ihi 1276 ihi = ilo 1277 ilo = itmp 1278 endif 1279 jlo = iran(loop) 1280 jhi = iran(loop) 1281 if (jhi.lt. jlo) then 1282 itmp = jhi 1283 jhi = jlo 1284 jlo = itmp 1285 endif 1286c 1287 nwords = nwords + (ihi-ilo+1)*(jhi-jlo+1) 1288c 1289 call util_ifill(n*n, 0, b, 1) 1290 call ga_get(g_a, ilo, ihi, jlo, jhi, b(ilo, jlo), n) 1291c 1292 sum1 = 0.0d0 1293 do j = jlo, jhi 1294 do i = ilo, ihi 1295 sum1 = sum1 + b(i,j) 1296 if (b(i,j) .ne. a(i,j)) then 1297 status = .false. 1298 endif 1299 enddo 1300 enddo 1301 enddo 1302 if (me.eq.0) then 1303 if (status) then 1304 write(6,'(A)') ' ga_get ........................... OK' 1305 else 1306 write(6,'(A)') ' ga_get ........................... Failed' 1307 endif 1308 call ffflush(6) 1309 endif 1310c 1311 call ga_sync() 1312c 1313 status = .true. 1314 crap = util_drand(ga_nodeid()*51 + 1) ! Different seed for each process 1315 inc =5 1316c every processor will be operating on somebody elses data 1317c 1318 iproc = ga_nnodes()-me-1 1319c 1320 call ga_distribution(g_a,iproc,ilo,ihi,jlo,jhi) 1321c 1322 dimi = ihi-ilo 1323 dimj = jhi-jlo 1324c write(6,*) me,'..',ilo,ihi,jlo,jhi,'.',dimi,dimj 1325c call ffflush(6) 1326 call ga_sync() 1327 if(ilo .gt.0 .and. jhi .gt. 0)then 1328 do loop = 1,nloop 1329 ii= IABS(iran(dimi)) 1330 jj= IABS(iran(dimj)) 1331 i=ilo + Mod(ii,dimi) 1332 j=jlo + Mod(jj,dimj) 1333c 1334c write(6,*) me,'..',ilo,ihi,jlo,jhi,'.',dimi,dimj,'..',i,j 1335c call ffflush(6) 1336 buf = ga_read_inc(g_a,i,j,inc) 1337 if(a(i,j).ne. buf)then 1338 status = .false. 1339 endif 1340 call ga_get(g_a, i,i,j,j, buf,1) 1341 a(i,j) = a(i,j)+inc 1342 if(a(i,j).ne. buf)then 1343 status = .false. 1344 endif 1345 enddo 1346 endif 1347 call ga_sync() 1348c 1349 if (me.eq.0) then 1350 if (status) then 1351 write(6,'(A)') ' ga_read_inc ...................... OK' 1352 else 1353 write(6,'(A)') ' ga_read_inc ...................... Failed' 1354 endif 1355 call ffflush(6) 1356 endif 1357c 1358 call ga_zero(g_a) 1359c 1360c*** use ga_read_inc and elements g_a(1:2,1) to implement a lock 1361c*** compute g_a(:,n) = sum (1(:)) for P processors 1362c 1363 status = ga_create_mutexes(1) 1364 if (me.eq.0) then 1365 if (status) then 1366 write(6,'(A)') ' ga_create_mutexes ................ OK' 1367 else 1368 write(6,'(A)') ' ga_create_mutexes ................ Failed' 1369 endif 1370 call ffflush(6) 1371 endif 1372 1373 if ((n.lt.2).and.(me.eq.0)) then 1374 write(6,'(A)') ' ga_fence ........................ N/A' 1375 write(6,'(A)') ' ga_lock ......................... N/A' 1376 call ffflush(6) 1377 endif 1378 1379 call ga_lock(0) 1380c call my_lock(g_a) 1381 1382c get original values g_a(:,n) 1383 call ga_get(g_a, 1,n, n,n, b,n) 1384c add my contribution 1385 do i =1,n 1386 b(i,1)= b(i,1)+1 1387 enddo 1388c 1389c need to use fence to assure that coms complete before leaving 1390c Critical Section 1391c 1392 call ga_init_fence() 1393 call ga_put(g_a, 1,n, n,n, b,n) 1394 call ga_fence() 1395 call ga_unlock(0) 1396c call my_unlock(g_a) 1397c 1398333 status = ga_destroy_mutexes() 1399 if (me.eq.0) then 1400 if (status) then 1401 write(6,'(A)') ' ga_destroy_mutexes ............... OK' 1402 else 1403 write(6,'(A)') ' ga_destroy_mutexes ............... Failed' 1404 endif 1405 call ffflush(6) 1406 endif 1407c 1408 status = .true. 1409 call ga_sync() 1410 if (me.eq.0) then 1411 call ga_get(g_a, 1,n, n,n, b,n) 1412 do i =1,n 1413 if(b(i,1).ne. nproc)then 1414 status = .false. 1415 endif 1416 enddo 1417 endif 1418c 1419 status = ga_locate_region(g_a, 1, n, 1,n, map,np) 1420 found = 0 1421 do j=1,n 1422 do i=1,n 1423 b(i,j)=-1 1424 enddo 1425 enddo 1426 if(me.eq.0)call ga_put(g_a,1,n,1,n,b,n) 1427 call ga_sync() 1428 do k = 1, np 1429 if(map(5,k).eq.me)then 1430 if(found.eq.1) then 1431 write(6,*)'double entry in map for proc ',me 1432 call ffflush(6) 1433 endif 1434 do j= map(3,k), map(4,k) 1435 do i= map(1,k), map(2,k) 1436 b(i,j)=1*me 1437 enddo 1438 enddo 1439 call ga_put(g_a, map(1,k),map(2,k),map(3,k),map(4,k), 1440 & b(map(1,k),map(3,k)),n) 1441 found = 1 1442 endif 1443 enddo 1444 call ga_sync() 1445c 1446 do k = 1, np 1447 if(map(5,k).eq.me)then 1448 call ga_get(g_a, map(1,k),map(2,k),map(3,k),map(4,k), 1449 & a(map(1,k),map(3,k)),n) 1450 do j= map(3,k), map(4,k) 1451 do i= map(1,k), map(2,k) 1452 if(b(i,j).ne.a(i,j)) then 1453 write(6,*) 1454 & 'proc ',me, 'overlap with ',a(i,j) 1455 call ffflush(6) 1456 endif 1457 enddo 1458 enddo 1459 endif 1460 enddo 1461 call ga_sync() 1462c 1463 if(me.eq.0)then 1464 call ga_get(g_a,1,n,1,n,a,n) 1465 do j=1,n 1466 do i=1,n 1467 if(a(i,j).eq.-1)then 1468 status = .false. 1469 endif 1470 enddo 1471 enddo 1472 endif 1473 if (me.eq.0) then 1474 if (status) then 1475 write(6,'(A)') ' ga_locate_region ................. OK' 1476 else 1477 write(6,'(A)') ' ga_locate_region ................. Failed' 1478 endif 1479 call ffflush(6) 1480 endif 1481c 1482c Delete the global array 1483c 1484 status = ga_destroy(g_a) 1485 if (me.eq.0) then 1486 if (status) then 1487 write(6,'(A)') ' ga_destroy ....................... OK' 1488 else 1489 write(6,'(A)') ' ga_destroy ....................... Failed' 1490 endif 1491 call ffflush(6) 1492 endif 1493c 1494 end 1495 1496c--------------------------------------------------------------------- 1497 1498 subroutine check_flt() 1499 implicit none 1500#include "mafdecls.fh" 1501#include "global.fh" 1502#include "testutil.fh" 1503 integer n, m 1504 parameter (n =10) 1505 parameter (m=2*n) 1506 real a(n,n), b(n,n), v(m), w(m) 1507 integer iv(m), jv(m) 1508 logical status 1509 integer g_a, g_b 1510 integer i, j, loop, nloop, maxloop, ilo, ihi, jlo, jhi, itmp 1511 integer nproc, me, ij, inc, ii, jj 1512 double precision nwords 1513 parameter (maxloop = 100) 1514 integer maxproc 1515 parameter (maxproc = 128) 1516 double precision crap 1517 real x, sum1, sum2 1518 integer iran 1519 external iran 1520 1521 nproc = ga_nnodes() 1522 me = ga_nodeid() 1523 nloop = Min(maxloop,n) 1524c 1525c a() is a local copy of what the global array should start as 1526c 1527 do j = 1, n 1528 do i = 1, n 1529 a(i,j) = i-1 + (j-1)*n 1530 b(i,j) = -1. 1531 enddo 1532 enddo 1533c 1534c Create a global array 1535c 1536 status = ga_create(MT_REAL, n, n, 'a', 0, 0, g_a) 1537 if (me.eq.0) then 1538 if (status) then 1539 write(6,'(A)') ' ga_create ........................ OK' 1540 else 1541 write(6,'(A)') ' ga_create ........................ Failed' 1542 endif 1543 call ffflush(6) 1544 endif 1545c 1546c check if handle is valid. be quiet unless error 1547c 1548 status = .true. 1549 if(.not.ga_valid_handle(g_a)) status = .false. 1550c 1551 call ga_distribution(g_a,me,ilo, ihi, jlo, jhi) 1552 call ga_sync() 1553c 1554c Zero the array 1555c 1556 call ga_zero(g_a) 1557c 1558c Check that it is indeed zero 1559c 1560 call ga_get(g_a, 1, n, 1, n, b, n) 1561 call ga_sync() 1562 do i = 1, n 1563 do j = 1, n 1564 if (b(i,j) .ne. 0.0) then 1565 status = .false. 1566 endif 1567 enddo 1568 enddo 1569 if (me.eq.0) then 1570 if (status) then 1571 write(6,'(A)') ' ga_zero .......................... OK' 1572 else 1573 write(6,'(A)') ' ga_zero .......................... Failed' 1574 endif 1575 call ffflush(6) 1576 endif 1577 call ga_sync() 1578c 1579c Each node fills in disjoint sections of the array 1580c 1581 status = .true. 1582 inc = (n-1)/20 + 1 1583 ij = 0 1584 do j = 1, n, inc 1585 do i = 1, n, inc 1586 if (mod(ij,nproc) .eq. me) then 1587 ilo = i 1588 ihi = min(i+inc, n) 1589 jlo = j 1590 jhi = min(j+inc, n) 1591c write(6,4) me, ilo, ihi, jlo, jhi 1592c 4 format(' node ',i2,' checking put ',4i4) 1593c call ffflush(6) 1594 call ga_put(g_a, ilo, ihi, jlo, jhi, a(ilo, jlo), n) 1595 endif 1596 ij = ij + 1 1597 enddo 1598 enddo 1599 call ga_sync() 1600c 1601c All nodes check all of a 1602c 1603 call ga_get(g_a, 1, n, 1, n, b, n) 1604 do i = 1, n 1605 do j = 1, n 1606 if (b(i,j) .ne. a(i,j)) then 1607 status = .false. 1608 endif 1609 enddo 1610 enddo 1611 call ga_sync() 1612 if (me.eq.0) then 1613 if (status) then 1614 write(6,'(A)') ' ga_put ........................... OK' 1615 else 1616 write(6,'(A)') ' ga_put ........................... Failed' 1617 endif 1618 call ffflush(6) 1619 endif 1620c 1621 call ga_sync() 1622c 1623 nwords = 0 1624c 1625 status = .true. 1626 crap = util_drand(ga_nodeid()*51 + 1) ! Different seed for each process 1627 do loop = 1, nloop 1628 ilo = iran(loop) 1629 ihi = iran(loop) 1630 if (ihi.lt. ilo) then 1631 itmp = ihi 1632 ihi = ilo 1633 ilo = itmp 1634 endif 1635 jlo = iran(loop) 1636 jhi = iran(loop) 1637 if (jhi.lt. jlo) then 1638 itmp = jhi 1639 jhi = jlo 1640 jlo = itmp 1641 endif 1642c 1643 nwords = nwords + (ihi-ilo+1)*(jhi-jlo+1) 1644c 1645 call util_rfill(n*n, 0.0, b, 1) 1646 call ga_get(g_a, ilo, ihi, jlo, jhi, b(ilo, jlo), n) 1647c 1648 sum1 = 0.0d0 1649 do j = jlo, jhi 1650 do i = ilo, ihi 1651 sum1 = sum1 + b(i,j) 1652 if (b(i,j) .ne. a(i,j)) then 1653 status = .false. 1654 endif 1655 enddo 1656 enddo 1657 enddo 1658 if (me.eq.0) then 1659 if (status) then 1660 write(6,'(A)') ' ga_get ........................... OK' 1661 else 1662 write(6,'(A)') ' ga_get ........................... Failed' 1663 endif 1664 call ffflush(6) 1665 endif 1666 call ga_sync() 1667c 1668c Each node accumulates into disjoint sections of the array 1669c 1670 call ga_sync() 1671c 1672 crap = util_drand(12345) ! Same seed for each process 1673 do j = 1, n 1674 do i = 1, n 1675c b(i,j) = real(util_drand(0)) 1676 b(i,j) = i+j 1677 enddo 1678 enddo 1679c 1680 status = .true. 1681 inc = (n-1)/20 + 1 1682 ij = 0 1683 do j = 1, n, inc 1684 do i = 1, n, inc 1685c x = real(util_drand(0)) 1686 x = 10. 1687 ilo = i 1688 ihi = min(i+inc-1, n) 1689 if(ihi.eq.n-1)ihi=n 1690c ihi = min(i+inc, n) 1691 jlo = j 1692 jhi = min(j+inc-1, n) 1693 if(jhi.eq.n-1)jhi=n 1694c jhi = min(j+inc-1, n) 1695* call ffflush(6) 1696 if (mod(ij,nproc) .eq. me) then 1697c print *, me, 'checking accumulate ',ilo,ihi,jlo,jhi,x 1698* 11 format(' node ',i2,' checking accumulate ',4i4) 1699* call ffflush(6) 1700 call ga_acc(g_a, ilo, ihi, jlo, jhi, b(ilo, jlo), n, x) 1701 endif 1702 ij = ij + 1 1703c 1704c Each process applies all updates to its local copy 1705c 1706 do jj = jlo, jhi 1707 do ii = ilo, ihi 1708 a(ii,jj) = a(ii,jj) + x * b(ii,jj) 1709 enddo 1710 enddo 1711 enddo 1712 enddo 1713 call ga_sync() 1714c 1715c All nodes check all of a 1716 call ga_get(g_a, 1, n, 1, n, b, n) 1717c 1718 do j = 1, n 1719 do i = 1, n 1720 if(MISMATCHF(b(i,j),a(i,j)))then 1721 status = .false. 1722 endif 1723 enddo 1724 enddo 1725 if (me.eq.0) then 1726 if (status) then 1727 write(6,'(A)') ' ga_acc (disjoint) ................ OK' 1728 else 1729 write(6,'(A)') ' ga_acc (disjoint) ................ Failed' 1730 endif 1731 call ffflush(6) 1732 endif 1733c 1734c overlapping accumulate 1735 call ga_sync() 1736 status = .true. 1737 if (.not. ga_create(MT_REAL, n, n, 'b', 0, 0, g_b)) then 1738 status = .false. 1739 endif 1740c 1741 call ga_zero(g_b) 1742 call ga_acc(g_b, n/2, n/2, n/2, n/2, 1.0, 1, 1.0) 1743 call ga_sync() 1744 if (me.eq.0) then 1745 call ga_get(g_b, n/2, n/2, n/2, n/2, b(1,1), 1) 1746 x = abs(b(1,1) -1*nproc) 1747 if(x.gt. 1e-10)then 1748 status = .false. 1749 endif 1750 endif 1751 if (me.eq.0) then 1752 if (status) then 1753 write(6,'(A)') ' ga_acc (overlap) ................. OK' 1754 else 1755 write(6,'(A)') ' ga_acc (overlap) ................. Failed' 1756 endif 1757 call ffflush(6) 1758 endif 1759c 1760c Check the ga_add function 1761c 1762 crap = util_drand(12345) ! Everyone has same seed 1763 status = .true. 1764 do j = 1, n 1765 do i = 1, n 1766 b(i,j) = real(util_drand(0)*real(i)) + 1 1767 a(i,j) = 0.1*a(i,j) + 0.9*b(i,j) 1768 enddo 1769 enddo 1770 if (me.eq.0) call ga_put(g_b, 1, n, 1, n, b, n) 1771 call ga_add(0.1, g_a, 0.9, g_b, g_b) 1772 call ga_get(g_b, 1, n, 1, n, b, n) 1773 do j = 1, n 1774 do i = 1, n 1775 if(MISMATCHF(b(i,j), a(i,j)))then 1776 status = .false. 1777 endif 1778 enddo 1779 enddo 1780 if (me.eq.0) then 1781 if (status) then 1782 write(6,'(A)') ' ga_add ........................... OK' 1783 else 1784 write(6,'(A)') ' ga_add ........................... Failed' 1785 endif 1786 call ffflush(6) 1787 endif 1788 call ga_sync() 1789c 1790 status = .true. 1791 crap = util_drand(12345) ! Everyone has same seed 1792 sum1 = 0.0 1793 do j = 1, n 1794 do i = 1, n 1795 b(i,j) = util_drand(0) 1796 sum1 = sum1 + a(i,j)*b(i,j) 1797 enddo 1798 enddo 1799 if (me.eq.0) then 1800 call ga_put(g_b, 1, n, 1, n, b, n) 1801 call ga_put(g_a, 1, n, 1, n, a, n) 1802 endif 1803 call ga_sync() 1804 sum2 = ga_sdot(g_a,g_b) 1805 if(MISMATCHF(sum1, sum2))then 1806 status = .false. 1807 endif 1808 if (me.eq.0) then 1809 if (status) then 1810 write(6,'(A)') ' ga_sdot .......................... OK' 1811 else 1812 write(6,'(A)') ' ga_sdot .......................... Failed' 1813 endif 1814 call ffflush(6) 1815 endif 1816c 1817 status = .true. 1818 call ga_scale(g_a, 0.123) 1819 call ga_get(g_a, 1, n, 1, n, b, n) 1820 do j = 1, n 1821 do i = 1, n 1822 a(i,j) = a(i,j)*0.123 1823 if (MISMATCHF(b(i,j), a(i,j)))then 1824 status = .false. 1825 endif 1826 enddo 1827 enddo 1828 if (me.eq.0) then 1829 if (status) then 1830 write(6,'(A)') ' ga_scale ......................... OK' 1831 else 1832 write(6,'(A)') ' ga_scale ......................... Failed' 1833 endif 1834 call ffflush(6) 1835 endif 1836c 1837 status = .true. 1838 if(me.eq.0) call ga_put(g_a, 1, n, 1, n, a, n) 1839 call ga_copy(g_a, g_b) 1840 call ga_get(g_b, 1, n, 1, n, b, n) 1841 do j = 1, n 1842 do i = 1, n 1843 if (b(i,j) .ne. a(i,j)) then 1844 status = .false. 1845 endif 1846 enddo 1847 enddo 1848 if (me.eq.0) then 1849 if (status) then 1850 write(6,'(A)') ' ga_copy .......................... OK' 1851 else 1852 write(6,'(A)') ' ga_copy .......................... Failed' 1853 endif 1854 call ffflush(6) 1855 endif 1856c 1857 call ga_sync() 1858 status = .true. 1859 crap = util_drand(ga_nodeid()*51 + 1) ! Different seed for each process 1860 do j = 1, 10 1861 call ga_sync() 1862 itmp = iran(nproc)-1 1863 if(me.eq.itmp) then 1864 do loop = 1,m 1865 ilo = iran(n) 1866 jlo = iran(n) 1867 iv(loop) = ilo 1868 jv(loop) = jlo 1869 enddo 1870 call ga_gather(g_a, v, iv, jv, m) 1871 do loop = 1,m 1872 ilo= iv(loop) 1873 jlo= jv(loop) 1874 call ga_get(g_a,ilo,ilo,jlo,jlo,v(loop),1) 1875 if(v(loop) .ne. a(ilo,jlo))then 1876 status = .false. 1877 endif 1878 enddo 1879 endif 1880 enddo 1881c 1882 if (me.eq.0) then 1883 if (status) then 1884 write(6,'(A)') ' ga_gather ........................ OK' 1885 else 1886 write(6,'(A)') ' ga_gather ........................ Failed' 1887 endif 1888 call ffflush(6) 1889 endif 1890c 1891 status = .true. 1892 do j = 1,10 1893 call ga_sync() 1894 if(me.eq.iran(ga_nnodes())-1) then 1895 do loop = 1,m 1896 ilo = iran(n) 1897 jlo = iran(n) 1898 iv(loop) = ilo 1899 jv(loop) = jlo 1900c v(loop) = DSIN(a(ilo,jlo)+b(ilo,jlo)) 1901 v(loop) = 1.0 *(ilo+jlo) 1902 enddo 1903 call ga_scatter(g_a, v, iv, jv, m) 1904 do loop = 1,m 1905 ilo= iv(loop) 1906 jlo= jv(loop) 1907 call ga_get(g_a,ilo,ilo,jlo,jlo,w(loop),1) 1908c if(v(loop) .ne. w(loop))then 1909 if(w(loop) .ne. 1.0 *(ilo+jlo) )then 1910 status = .false. 1911 endif 1912 enddo 1913 endif 1914 call ga_sync() 1915 enddo 1916c 1917 if (me.eq.0) then 1918 if (status) then 1919 write(6,'(A)') ' ga_scatter ....................... OK' 1920 else 1921 write(6,'(A)') ' ga_scatter ....................... Failed' 1922 endif 1923 call ffflush(6) 1924 endif 1925c 1926 call ga_sync() 1927c 1928c scatter-acc available in GA ver. 3.0 1929#ifdef GA3 1930 status = .true. 1931 crap = util_drand(1234) 1932 call ga_zero(g_a) 1933c 1934 do j = 1, n 1935 do i = 1, n 1936 b(i,j) =0. 1937 enddo 1938 enddo 1939c 1940 x = .1d0 1941 ii =n 1942 do jj = 1,1 1943 call ga_sync() 1944 do loop = 1, ii 1945 1946c generate unique i,j pairs 194710 continue 1948 i = iran(n) 1949 j=iran(n) 1950 if (found(i,j, iv, jv, loop-1) ) goto 10 1951 1952 iv(loop) = i 1953 jv(loop) = j 1954 v(loop) = 1.0 *(i+j) 1955 b(i,j) = b(i,j) + nproc*x*v(loop) ! update local ref. array 1956 enddo 1957 1958 call ga_scatter_acc(g_a,v,iv,jv, ii,x) 1959 call ga_sync() 1960c 1961c check the result 1962c 1963 call ga_get(g_a, 1, n, 1,n, a, n) 1964 do loop = 1,ii 1965 i = iv(loop) 1966 j = jv(loop) 1967 if(MISMATCH(a(i,j),b(i,j)))then 1968 status = .false. 1969 endif 1970 enddo 1971 call ga_sync() 1972 enddo 1973 call ga_sync() 1974 if (me.eq.0) then 1975 if (status) then 1976 write(6,'(A)') ' ga_scatter_acc ................... OK' 1977 else 1978 write(6,'(A)') ' ga_scatter_acc ................... Failed' 1979 endif 1980 call ffflush(6) 1981 endif 1982#endif 1983c 1984c Delete the global array 1985c 1986 status = ga_destroy(g_a) 1987 status = status .and. ga_destroy(g_b) 1988 if (me.eq.0) then 1989 if (status) then 1990 write(6,'(A)') ' ga_destroy ....................... OK' 1991 else 1992 write(6,'(A)') ' ga_destroy ....................... Failed' 1993 endif 1994 call ffflush(6) 1995 endif 1996c 1997 end 1998c_____________________________________________________________ 1999 2000 subroutine check_wrappers 2001 implicit none 2002#include "mafdecls.fh" 2003#include "global.fh" 2004#include "testutil.fh" 2005 double precision sum 2006 integer isum, ibuf(2) 2007 integer me, nproc 2008 logical status 2009 real fsum 2010c 2011 nproc = ga_nnodes() 2012 me = ga_nodeid() 2013c 2014 status = .true. 2015 call ga_sync() 2016 ibuf(1) = 1 2017 ibuf(2) = me 2018 call ga_igop(10000, ibuf, 2, '+') 2019 if(ibuf(1).ne.nproc)then 2020 status = .false. 2021 endif 2022 if(ibuf(2).ne.((nproc-1)*nproc/2))then 2023 status = .false. 2024 endif 2025 call ga_sync() 2026 if (me.eq.0) then 2027 if (status) then 2028 write(6,'(A)') ' ga_igop .......................... OK' 2029 else 2030 write(6,'(A)') ' ga_igop .......................... Failed' 2031 endif 2032 call ffflush(6) 2033 endif 2034 call ga_sync() 2035c 2036 status = .true. 2037 sum = 1d0 * me 2038 call ga_dgop(10000, sum, 1, '+') 2039 if(Int(sum).ne.((nproc-1)*nproc/2))then 2040 status = .false. 2041 endif 2042 call ga_sync() 2043 if (me.eq.0) then 2044 if (status) then 2045 write(6,'(A)') ' ga_dgop .......................... OK' 2046 else 2047 write(6,'(A)') ' ga_dgop .......................... Failed' 2048 endif 2049 call ffflush(6) 2050 endif 2051c 2052 call ga_sync() 2053 status = .true. 2054 fsum = 1.0 * me 2055 call ga_sgop(10000, fsum, 1, '+') 2056 if(Int(sum).ne.((nproc-1)*nproc/2))then 2057 status = .false. 2058 endif 2059 call ga_sync() 2060 if (me.eq.0) then 2061 if (status) then 2062 write(6,'(A)') ' ga_sgop .......................... OK' 2063 else 2064 write(6,'(A)') ' ga_sgop .......................... Failed' 2065 endif 2066 call ffflush(6) 2067 endif 2068c 2069 call ga_sync() 2070 status = .true. 2071 if(me.eq.nproc-1)then 2072 ibuf(1) = me 2073 ibuf(2) = nproc 2074 endif 2075 call ga_brdcst(1000,ibuf,util_mitob(2),nproc-1) 2076 if(ibuf(1).ne.nproc-1) status = .false. 2077 if(ibuf(2).ne.nproc) status = .false. 2078 call ga_sync() 2079 if (me.eq.0) then 2080 if (status) then 2081 write(6,'(A)') ' ga_brdcst ........................ OK' 2082 else 2083 write(6,'(A)') ' ga_brdcst ........................ Failed' 2084 endif 2085 call ffflush(6) 2086 endif 2087 call ga_sync() 2088 end 2089 2090 2091 subroutine check_mem 2092 implicit none 2093 integer mem_size 2094#include "mafdecls.fh" 2095#include "global.fh" 2096#include "testutil.fh" 2097c 2098 integer n,nmax,left,need, me,procs,g_a, g_b 2099 integer stack, heap, global 2100 logical status, overify, ohardfail 2101c 2102 call input_mem_size(stack, heap, global, overify, ohardfail) 2103 mem_size = ma_sizeof(mt_dbl,global,mt_byte) 2104 write(*,*) 'mem_size = ',mem_size 2105 me = ga_nodeid() 2106 procs = ga_nnodes() 2107 nmax = int(dsqrt(dble(mem_size/util_mitob(1)))) 2108 left = mem_size/procs - ga_inquire_memory() 2109 n = nmax/2 2110 need = util_mdtob(n*n)/procs 2111c 2112 if(me.eq.0)then 2113 write(6,*)' ' 2114 if(ga_uses_ma())then 2115 write(6,*)' CHECKING GA MEMORY RESTRICTIONS (MA used)' 2116 else 2117 write(6,*)' CHECKING GA MEMORY RESTRICTIONS (MA not used)' 2118 endif 2119 write(6,*)' ' 2120 write(6,*)' ' 2121 call print_mem_info(n,left, need, mem_size/procs) 2122 endif 2123c 2124 status = ga_create(MT_DBL, n, n, 'a', 0, 0, g_a) 2125c 2126 if(me.eq.0) then 2127 if(status) then 2128 write(6,*) ' success' 2129 else 2130 write(6,*) ' failure' 2131 endif 2132 call ffflush(6) 2133 endif 2134c 2135 n = nmax 2136 left = mem_size/procs - ga_inquire_memory() 2137 need = util_mdtob(n*n)/procs 2138 if(me.eq.0)then 2139 call print_mem_info(n,left, need, mem_size/procs) 2140 endif 2141c 2142 status = ga_create(MT_DBL, n, n, 'b', 0, 0, g_b) 2143c 2144 if(me.eq.0) then 2145 if(status) then 2146 write(6,*) ' success' 2147 else 2148 write(6,*) ' failure' 2149 endif 2150 write(6,*)' ' 2151 write(6,*)' ' 2152 call ffflush(6) 2153 endif 2154 status = ga_destroy(g_a) 2155 end 2156 2157 2158 2159 subroutine print_mem_info(n,left, need, total) 2160 implicit none 2161 integer n,left, need, total 2162c 2163 write(6,*)' ' 2164 if(left - need .ge. 0) then 2165 write(6,1)n,n 21661 format('> Creating array ',i4,' by ',i4,' -- should succeed') 2167 else 2168 write(6,2)n,n 21692 format('> Creating array ',i4,' by ',i4,' -- SHOULD FAIL') 2170 endif 2171 write(6,3) need, left, total 21723 format(' (need ',i7,' and ',i7,' out of ',i7,' bytes are left)') 2173 write(6,*)' ' 2174 call ffflush(6) 2175c 2176 end 2177 2178 2179 2180 subroutine my_lock(g_b) 2181 implicit none 2182#include "global.fh" 2183 integer g_b, val, flag, i 2184 logical first_time 2185 double precision dummy 2186 common /lock/ val 2187 common /dum/ dummy 2188 data first_time /.true./ 2189c 2190c this awkward initialization is to avoid a weird problem 2191C with block data on SUN 2192 if(first_time)then 2193 first_time = .false. 2194 dummy = .0 2195 endif 2196c 2197 val = ga_read_inc(g_b,1,1, 1) 219810 call ga_get(g_b, 2,2,1,1, flag, 1) 2199 if(flag.eq.val) return 2200c 2201c to reduce memory stress, wait a while before retrying 2202 do i = 1, 100 2203 dummy = dummy + .1 2204 enddo 2205 goto 10 2206 end 2207 2208 2209 subroutine my_unlock(g_b) 2210 implicit none 2211#include "global.fh" 2212 integer g_b, val 2213 common /lock/ val 2214c 2215 call ga_put(g_b, 2,2,1,1, val+1, 1) 2216 end 2217 2218 2219 logical function found(i,j, iv, jv, n) 2220 integer n 2221 integer i,j, iv(n), jv(n) 2222 integer loop 2223 2224 found = .false. 2225 do loop = 1, n 2226 if(i .eq. iv(loop) .and. j .eq.jv(loop))then 2227 found = .true. 2228 goto 99 2229 endif 2230 enddo 223199 continue 2232 return 2233 end 2234 2235 2236 subroutine proc_remap() 2237 implicit none 2238#include "global.fh" 2239 integer proc(100),nproc,i 2240 nproc = ga_nnodes() 2241 if(nproc.gt.100) 2242 $ call ga_error("remap requires<=100 processes",nproc) 2243 do i = 1, nproc 2244 proc(i) = nproc-i 2245 enddo 2246c call ga_register_proclist(proc,nproc) 2247 end 2248 2249 2250 subroutine util_rfill(n,val,a,ia) 2251 implicit none 2252 real a(*), val 2253 integer n, ia, i 2254c 2255c initialise real array to scalar value 2256c 2257 if (ia.eq.1) then 2258 do 10 i = 1, n 2259 a(i) = val 2260 10 continue 2261 else 2262 do 20 i = 1,(n-1)*ia+1,ia 2263 a(i) = val 2264 20 continue 2265 endif 2266c 2267 end 2268 2269 2270 subroutine util_dfill(n,val,a,ia) 2271 implicit none 2272 double precision a(*), val 2273 integer n, ia, i 2274c 2275c initialise double precision array to scalar value 2276c 2277 if (ia.eq.1) then 2278 do 10 i = 1, n 2279 a(i) = val 2280 10 continue 2281 else 2282 do 20 i = 1,(n-1)*ia+1,ia 2283 a(i) = val 2284 20 continue 2285 endif 2286c 2287 end 2288 2289 subroutine util_ifill(n,val,a,ia) 2290 implicit none 2291 integer n, ia, i, a(*),val 2292c 2293c initialise integer array to scalar value 2294c 2295 if (ia.eq.1) then 2296 do 10 i = 1, n 2297 a(i) = val 2298 10 continue 2299 else 2300 do 20 i = 1,(n-1)*ia+1,ia 2301 a(i) = val 2302 20 continue 2303 endif 2304c 2305 end 2306 2307 subroutine util_qfill(n,val,a,ia) 2308 implicit none 2309 double complex a(*), val 2310 integer n, ia, i 2311c 2312c initialise double complex array to scalar value 2313c 2314 if (ia.eq.1) then 2315 do 10 i = 1, n 2316 a(i) = val 2317 10 continue 2318 else 2319 do 20 i = 1,(n-1)*ia+1,ia 2320 a(i) = val 2321 20 continue 2322 endif 2323c 2324 end 2325 2326 2327 integer function iran(i) 2328 implicit none 2329 double precision util_drand 2330 external util_drand 2331 integer i 2332 iran = int(util_drand(0)*dfloat(i))+1 2333 return 2334 end 2335