1#if HAVE_CONFIG_H 2# include "config.fh" 3#endif 4c $Id: pgtest.F,v 1.9 2005-11-23 10:25:18 manoj Exp $ 5c vector boxes lack arithmetic precision 6#ifdef CRAY_YMP 7# define THRESH 1d-10 8# define THRESHF 1e-5 9#elif defined(FUJITSU) 10# define THRESH 1d-12 11# define THRESHF 1e-5 12#else 13# define THRESH 1d-13 14# define THRESHF 1e-5 15#endif 16 17#define MISMATCH(x,y) abs(x-y)/max(1d0,abs(x)).gt.THRESH 18#define MISMATCHF(x,y) abs(x-y)/max(1.0,abs(x)).gt.THRESHF 19 20#define NEW_API 21c#define MIRROR 22c#define GA3 23c#define NGA_GATSCAT 24c#define USE_RESTRICTED 25 26#ifdef USE_RESTRICTED 27# define NEW_API 28#endif 29 30 program main 31 implicit none 32#include "mafdecls.fh" 33#include "global.fh" 34#include "testutil.fh" 35 integer heap, stack, fudge, ma_heap, me, nproc 36 integer inode,proclist(100),i,j,nprocs 37 integer proc_group(0:100), my_proc_group, grp, num_grp 38 integer midnode, splitnode, color, key 39 logical status 40 parameter (heap=200*200*4, fudge=100, stack=200*200) 41c 42c*** Intitialize a message passing library 43c 44#include "mp3.fh" 45c 46c*** Initialize GA 47c 48c There are 2 choices: ga_initialize or ga_initialize_ltd. 49c In the first case, there is no explicit limit on memory usage. 50c In the second, user can set limit (per processor) in bytes. 51c 52 call ga_initialize() 53 nproc = ga_nnodes() 54 me = ga_nodeid() 55c 56c*** Initialize the MA package 57c MA must be initialized before any global array is allocated 58c 59 ma_heap = heap/nproc + fudge 60 ma_heap = 2*ma_heap 61#ifdef USE_RESTRICTED 62 ma_heap = 2*ma_heap 63#endif 64 status = ma_init(MT_DCPL, stack, ma_heap) 65 if (.not. status) call ga_error('ma_init failed',-1) 66c 67c we can also use GA_set_memory_limit BEFORE first ga_create call 68c 69 call GA_set_memory_limit(util_mdtob(ma_heap)) 70c 71c*** Create process groups on SMP nodes 72c 73#if 1 74 midnode = nproc/2 75 do i = 1, midnode 76 proclist(i) = i-1 77 end do 78 nprocs = midnode 79 proc_group(0) = ga_pgroup_create(proclist, nprocs) 80 do i = midnode+1, nproc 81 proclist(i-midnode) = i-1 82 end do 83 nprocs = nproc - midnode 84 proc_group(1) = ga_pgroup_create(proclist, nprocs) 85c call ga_pgroup_set_default(proc_group(inode)) 86 if (me.lt.midnode) then 87 call ga_pgroup_set_default(proc_group(0)) 88 call runtest 89 endif 90 call ga_pgroup_set_default(ga_pgroup_get_world()) 91 call ga_sync() 92 if (me.ge.midnode) then 93 call ga_pgroup_set_default(proc_group(1)) 94 call runtest 95 endif 96#else 97c split into 2 equal groups 98 if(me.eq.0) then 99 print *,' ************* Testing ga_pgroup_split ************ ' 100 call ffflush(6) 101 endif 102 num_grp = 2 103 grp = ga_pgroup_get_default() 104 my_proc_group = ga_pgroup_split(grp, num_grp) 105 call ga_pgroup_set_default(my_proc_group) 106 call runtest 107c reset to world group 108 call ga_pgroup_set_default(ga_pgroup_get_world()) 109 call ga_sync() 110c split into 2 irregular groups (33:67) 111 if(me.eq.0) then 112 print *,' ********* Testing ga_pgroup_split_irreg *********** ' 113 call ffflush(6) 114 endif 115 num_grp = 2 116 grp = ga_pgroup_get_default() 117 splitnode = nproc/3 118 if(me.lt.splitnode) then 119 color=0 120 else 121 color=1 122 endif 123c same key for all processes means automatic control of rank assignment 124 key = 0 125 my_proc_group = ga_pgroup_split_irreg(grp, color, key) 126 call ga_pgroup_set_default(my_proc_group) 127 if(me.lt.splitnode) then 128 call runtest 129 endif 130 call ffflush(6) 131 call ga_sync() 132 if(me.ge.splitnode) then 133 call runtest 134 endif 135 call ga_sync() 136#endif 137 138 call ga_pgroup_set_default(ga_pgroup_get_world()) 139c 140c*** Check if memory limits are enforced 141c 142c call check_mem(util_mdtob(ma_heap*ga_nnodes())) 143c 144c*** Tidy up the GA package 145c 146 call ga_sync() 147c 148 if(ga_nodeid().eq.0) print *,'All tests successful ' 149c 150 write(6,*) 'Calling ga_terminate' 151 call ga_terminate() 152c 153c*** Tidy up after message-passing library 154c 155 call MP_FINALIZE() 156c 157 stop 158 end 159c 160 subroutine runtest 161 implicit none 162#include "mafdecls.fh" 163#include "global.fh" 164#include "testutil.fh" 165 integer me, nproc 166 logical status 167 168 me = ga_nodeid() 169 nproc = ga_nnodes() 170 171 if(me.eq.0)then 172#ifdef MIRROR 173 print *,' Performing tests on Mirrored Arrays' 174#endif 175 print *,' GA initialized ' 176 call ffflush(6) 177 endif 178c 179c 180c Uncomment the below line to register external memory allocator 181c for dynamic arrays inside GA routines. 182c call register_ext_memory() 183c 184 if(me.eq.(nproc-1))then 185 print *, 'using ', nproc,' process(es) ', ga_cluster_nnodes(), 186 $ ' cluster nodes' 187 print *,'process ', me, ' is on node ',ga_cluster_nodeid(), 188 $ ' with ', ga_cluster_nprocs(-1), ' processes' 189 call ffflush(6) 190 endif 191#ifdef MIRROR 192 if (me.eq.0) then 193 write(6,*) 194 write(6,*) ' TESTING MIRRORED ARRAYS ' 195 write(6,*) 196 call ffflush(6) 197 endif 198#endif 199 call ga_sync() 200c 201c*** Check support for double precision arrays 202c 203 if (me.eq.0) then 204 write(6,*) 205 write(6,*) ' CHECKING DOUBLES ' 206 write(6,*) 207 call ffflush(6) 208 endif 209 210 call check_dbl() 211c 212c*** Check support for double precision complex arrays 213c 214 if (me.eq.0) then 215 write(6,*) 216 write(6,*) ' CHECKING DOUBLE COMPLEX ' 217 write(6,*) 218 call ffflush(6) 219 endif 220 221 call check_complex() 222c 223c*** Check support for integer arrays 224c 225 if (me.eq.0) then 226 write(6,*) 227 write(6,*) ' CHECKING INTEGERS ' 228 write(6,*) 229 call ffflush(6) 230 endif 231 232 call check_int() 233c 234c 235c*** Check support for single precision 236c 237 if (me.eq.0) then 238 write(6,*) 239 write(6,*) ' CHECKING SINGLE PRECISION ' 240 write(6,*) 241 call ffflush(6) 242 endif 243 244 call check_flt() 245c 246 if (me.eq.0) then 247 write(6,*) 248 write(6,*)'CHECKING Wrappers to Message Passing Collective ops' 249 write(6,*) 250 call ffflush(6) 251 endif 252 253 call check_wrappers 254c 255 if(me.eq.0) call ga_print_stats() 256 if(me.eq.0) print *,' ' 257 if(me.eq.0) print *,'All tests succesful ' 258c 259 return 260 end 261 262 263 subroutine check_dbl() 264 implicit none 265#include "mafdecls.fh" 266#include "global.fh" 267#include "testutil.fh" 268c 269 integer n,m 270 parameter (n = 128) 271 parameter (m = 2*n) 272 double precision a(n,n), b(n,n), v(m),w(m) 273#ifdef MIRROR 274 logical lsame 275 integer g_c, p_mirror 276 integer ndim, dims(2), chunk(2) 277 integer alo(2), blo(2), ahi(2), bhi(2) 278#else 279# ifdef NEW_API 280 integer ndim, dims(2), chunk(2) 281# endif 282#endif 283 integer iv(m), jv(m) 284#ifdef NGA_GATSCAT 285 integer ijv(2,m) 286#endif 287 logical status 288 integer g_a, g_b 289 integer iran, i,j, loop,nloop,maxloop, ilo, ihi, jlo, jhi, itmp 290 integer nproc, me, int, ij, inc, ii, jj 291 parameter (maxloop = 100) 292 integer maxproc 293 parameter (maxproc = 128) 294 double precision crap, sum1, sum2, x 295 double precision nwords 296 logical found 297 integer lprocs,inode,iproc,nnodes 298 intrinsic int 299 integer tlo(2),thi(2),tld(2),index 300#ifdef USE_RESTRICTED 301 integer num_rstrctd 302 integer rstrctd_list(maxproc/2) 303#endif 304 iran(i) = int(drand(0)*real(i)) + 1 305c 306 nproc = ga_nnodes() 307 me = ga_nodeid() 308 inode = ga_cluster_nodeid() 309 lprocs = ga_cluster_nprocs(inode) 310 nnodes = ga_cluster_nnodes() 311 iproc = mod(me,lprocs) 312 nloop = Min(maxloop,n) 313#ifdef USE_RESTRICTED 314 num_rstrctd = nproc/2 315 if (num_rstrctd.eq.0) num_rstrctd = 1 316 do i = 1, num_rstrctd 317 rstrctd_list(i) = (num_rstrctd/2) + i-1 318 end do 319#endif 320c 321c a() is a local copy of what the global array should start as 322c 323 do j = 1, n 324 do i = 1, n 325#ifndef MIRROR 326 a(i,j) = i-1 + (j-1)*n 327#else 328 a(i,j) = inode + i-1 + (j-1)*n 329#endif 330 b(i,j) =-1. 331 enddo 332 enddo 333* write(6,*) ' correct ' 334* call output(a, 1, n, 1, n, n, n, 1) 335* call ffflush(6) 336c 337c Create a global array 338c 339c print *,ga_nodeid(), ' creating array' 340* call ffflush(6) 341c call setdbg(1) 342#ifdef NEW_API 343 g_a = ga_create_handle() 344 ndim = 2 345 dims(1) = n 346 dims(2) = n 347 call ga_set_data(g_a,ndim,dims,MT_DBL) 348 call ga_set_array_name(g_a,'a') 349#ifdef USE_RESTRICTED 350 call ga_set_restricted(g_a, rstrctd_list, num_rstrctd) 351#endif 352# ifdef MIRROR 353 p_mirror = ga_pgroup_get_mirror() 354 call ga_set_pgroup(g_a,p_mirror) 355# endif 356 status = ga_allocate(g_a) 357#else 358# ifndef MIRROR 359 status = ga_create(MT_DBL, n, n, 'a', 0, 0, g_a) 360# else 361 ndim = 2 362 dims(1) = n 363 dims(2) = n 364 chunk(1) = 0 365 chunk(2) = 0 366 p_mirror = ga_pgroup_get_mirror() 367 status = nga_create_config(MT_DBL, ndim, dims, 'a', chunk, 368 + p_mirror, g_a) 369# endif 370#endif 371 if (.not. status) then 372 write(6,*) ' ga_create failed' 373 call ffflush(6) 374 call ga_error('... exiting ',0) 375 endif 376c 377c check if handle is valid. be quiet unless error 378c 379 if(.not.ga_valid_handle(g_a)) call ga_error("invalid handle",g_a) 380c 381 call ga_distribution(g_a,me,ilo, ihi, jlo, jhi) 382 call ga_sync() 383c 384c Zero the array 385c 386 if (me .eq. 0) then 387 write(6,21) 388 21 format(/'> Checking zero ... ') 389 call ffflush(6) 390 endif 391 call ga_zero(g_a) 392c 393c Check that it is indeed zero 394c 395 call ga_get(g_a, 1, n, 1, n, b, n) 396 call ga_sync() 397 do i = 1, n 398 do j = 1, n 399 if (b(i,j) .ne. 0.0d0) then 400 write(6,*) me,' zero ', i, j, b(i,j) 401 call ffflush(6) 402c call ga_error('... exiting ',0) 403 endif 404 enddo 405 enddo 406 if (me.eq.0) then 407 write(6,*) 408 write(6,*) ' ga_zero is OK' 409 write(6,*) 410 endif 411 call ga_sync() 412c 413c Each node fills in disjoint sections of the array 414c 415 if (me .eq. 0) then 416 write(6,2) 417 2 format(/'> Checking disjoint put ... ') 418 call ffflush(6) 419 endif 420 call ga_sync() 421c 422 inc = (n-1)/20 + 1 423 ij = 0 424 do j = 1, n, inc 425 do i = 1, n, inc 426#ifndef MIRROR 427 if (mod(ij,nproc) .eq. me) then 428#else 429 if (mod(ij,lprocs) .eq. iproc) then 430#endif 431 ilo = i 432 ihi = min(i+inc, n) 433 jlo = j 434 jhi = min(j+inc, n) 435* write(6,4) me, ilo, ihi, jlo, jhi 436* 4 format(' node ',i2,' checking put ',4i4) 437* call ffflush(6) 438 call ga_put(g_a, ilo, ihi, jlo, jhi, a(ilo, jlo), n) 439 endif 440 ij = ij + 1 441 enddo 442 enddo 443 call ga_sync() 444c 445c All nodes check all of a 446c 447 call util_dfill(n*n, 0.0d0, b, 1) 448* call ga_print(g_a,1) 449 call ga_get(g_a, 1, n, 1, n, b, n) 450* write(6,*) ' after get' 451* call output(b, 1, n, 1, n, n, n, 1) 452c 453 do i = 1, n 454 do j = 1, n 455 if (b(i,j) .ne. a(i,j)) then 456 write(6,*) ' put ', me, i, j, a(i,j),b(i,j) 457 call ffflush(6) 458 call ga_error('... exiting ',0) 459 endif 460 enddo 461 enddo 462 if (me.eq.0) then 463 write(6,*) 464 write(6,*) ' ga_put is OK' 465 write(6,*) 466 endif 467 call ga_sync() 468c 469c Now check nloop random gets from each node 470c 471 if (me .eq. 0) then 472 write(6,5) nloop 473 5 format(/'> Checking random get (',i5,' calls)...') 474 call ffflush(6) 475 endif 476 call ga_sync() 477c 478 nwords = 0 479c 480 crap = drand(ga_nodeid()*51 +1) !Different seed for each proc 481 do loop = 1, nloop 482 ilo = iran(loop) 483 ihi = iran(loop) 484 if (ihi.lt. ilo) then 485 itmp = ihi 486 ihi = ilo 487 ilo = itmp 488 endif 489 jlo = iran(loop) 490 jhi = iran(loop) 491 if (jhi.lt. jlo) then 492 itmp = jhi 493 jhi = jlo 494 jlo = itmp 495 endif 496c 497 nwords = nwords + (ihi-ilo+1)*(jhi-jlo+1) 498c 499 call util_dfill(n*n, 0.0d0, b, 1) 500 call ga_get(g_a, ilo, ihi, jlo, jhi, b(ilo, jlo), n) 501 if (me .eq. 0 .and. mod(loop-1, max(1,nloop/20)).eq.0) then 502 write(6,1) loop, me, ilo, ihi, jlo, jhi, nwords 503 1 format(' call ',i5, ' node ',i2,' checking get ',4i4, 504 $ ' total ',d9.2) 505 call ffflush(6) 506 endif 507 sum1 = 0.0d0 508 do j = jlo, jhi 509 do i = ilo, ihi 510 sum1 = sum1 + b(i,j) 511 if (b(i,j) .ne. a(i,j)) then 512 write(6,*) i, j, b(i,j), a(i,j) 513 call ffflush(6) 514 call ga_error('... exiting ',0) 515 endif 516 enddo 517 enddo 518c 519 enddo 520 if (me.eq.0) then 521 write(6,*) 522 write(6,*) ' ga_get is OK' 523 write(6,*) 524 call ffflush(6) 525 endif 526 call ga_sync() 527c 528c Each node accumulates into disjoint sections of the array 529c 530 if (me .eq. 0) then 531 write(6,9) 532 9 format(/'> Checking accumulate ... ') 533 call ffflush(6) 534 endif 535 call ga_sync() 536c 537 crap = drand(12345) ! Same seed for each process 538 do j = 1, n 539 do i = 1, n 540c b(i,j) = drand(0) 541 b(i,j) = i+j 542 enddo 543 enddo 544c 545 inc = (n-1)/20 + 1 546 ij = 0 547 do j = 1, n, inc 548 do i = 1, n, inc 549c x = drand(0) 550 x = 10. 551 ilo = i 552 ihi = min(i+inc-1, n) 553 if(ihi.eq.n-1)ihi=n 554c ihi = min(i+inc, n) 555 jlo = j 556 jhi = min(j+inc-1, n) 557 if(jhi.eq.n-1)jhi=n 558c jhi = min(j+inc-1, n) 559* call ffflush(6) 560#ifndef MIRROR 561 if (mod(ij,nproc) .eq. me) then 562#else 563 if (mod(ij,lprocs) .eq. iproc) then 564#endif 565c print *, me, 'checking accumulate ',ilo,ihi,jlo,jhi,x 566* 11 format(' node ',i2,' checking accumulate ',4i4) 567* call ffflush(6) 568 call ga_acc(g_a, ilo, ihi, jlo, jhi, b(ilo, jlo), n, x) 569 endif 570 ij = ij + 1 571c 572c Each process applies all updates to its local copy 573c 574 do jj = jlo, jhi 575 do ii = ilo, ihi 576 a(ii,jj) = a(ii,jj) + x * b(ii,jj) 577 enddo 578 enddo 579 enddo 580 enddo 581 call ga_sync() 582c 583c All nodes check all of a 584c 585 call ga_get(g_a, 1, n, 1, n, b, n) 586 do j = 1, n 587 do i = 1, n 588 if(MISMATCH(b(i,j),a(i,j)))then 589 write(6,*) ' acc ', me, i, j, a(i,j), b(i,j) 590 call ffflush(6) 591c call ga_error('... exiting ',0) 592 endif 593 enddo 594 enddo 595 if (me.eq.0) then 596 write(6,*) 597 write(6,*) ' disjoint ga_acc is OK' 598 write(6,*) 599 endif 600c 601c overlapping accumulate 602 call ga_sync() 603#ifdef NEW_API 604 g_b = ga_create_handle() 605 call ga_set_data(g_b,ndim,dims,MT_DBL) 606 call ga_set_array_name(g_b,'b') 607# ifdef MIRROR 608 call ga_set_pgroup(g_b,p_mirror) 609# endif 610#ifdef USE_RESTRICTED 611 call ga_set_restricted(g_b, rstrctd_list, num_rstrctd) 612#endif 613 if (.not.ga_allocate(g_b)) then 614#else 615# ifndef MIRROR 616 if (.not. ga_create(MT_DBL, n, n, 'b', 0, 0, g_b)) then 617# else 618 if (.not. nga_create_config(MT_DBL, ndim, dims, 'b', chunk, 619 + p_mirror, g_b)) then 620# endif 621#endif 622 call ga_error('ga_create failed for second array ',0) 623 endif 624c 625 call ga_zero(g_b) 626 call ga_sync() 627 call ga_acc(g_b, n/2, n/2, n/2, n/2, 1d0, 1, 1d0) 628 call ga_sync() 629#ifndef MIRROR 630 if (me.eq.0) then 631 call ga_get(g_b, n/2, n/2, n/2, n/2, b(1,1), 1) 632 x = abs(b(1,1) -1d0*nproc) 633#else 634 if (iproc.eq.0) then 635 call ga_get(g_b, n/2, n/2, n/2, n/2, b(1,1), 1) 636 x = abs(b(1,1) -1d0*lprocs) 637#endif 638 if(x.gt. 1d-10)then 639#ifndef MIRROR 640 write(6,*)'val=',b(1,1),' expected=',nproc, x 641#else 642 write(6,*)'val=',b(1,1),' expected=',lprocs, x 643#endif 644 call ga_error('overlapping accumulate failed',0) 645 endif 646 if (me.eq.0) then 647 write(6,*) 648 write(6,*) ' overlapping ga_acc is OK' 649 write(6,*) 650 endif 651 endif 652c 653c Check the ga_add function 654c 655 if (me .eq. 0) then 656 write(6,91) 657 91 format(/'> Checking add ...') 658 call ffflush(6) 659 endif 660c 661c 662 crap = drand(12345) ! Everyone has same seed 663 do j = 1, n 664 do i = 1, n 665 b(i,j) = drand(0) 666 a(i,j) = 0.1d0*a(i,j) + 0.9d0*b(i,j) 667 enddo 668 enddo 669 670#ifndef MIRROR 671 if (me.eq.0) call ga_put(g_b, 1, n, 1, n, b, n) 672#else 673 if (iproc.eq.0) call ga_put(g_b, 1, n, 1, n, b, n) 674#endif 675 call ga_add(0.1d0, g_a, 0.9d0, g_b, g_b) 676 call ga_get(g_b, 1, n, 1, n, b, n) 677 do j = 1, n 678 do i = 1, n 679 if(MISMATCH(b(i,j), a(i,j)))then 680 write(6,*) ' add ', me, i, j, a(i,j), b(i,j) 681 call ffflush(6) 682 call ga_error('... exiting ',0) 683 endif 684 enddo 685 enddo 686 if (me.eq.0) then 687 write(6,*) 688 write(6,*) ' add is OK ' 689 write(6,*) 690 endif 691 call ga_sync() 692c 693c Check the ddot function 694c 695 if (me .eq. 0) then 696 write(6,19) 697 19 format(/'> Checking ddot ...') 698 call ffflush(6) 699 endif 700 crap = drand(12345) ! Everyone has same seed 701 sum1 = 0.0d0 702 do j = 1, n 703 do i = 1, n 704 b(i,j) = drand(0) 705 sum1 = sum1 + a(i,j)*b(i,j) 706 enddo 707 enddo 708#ifndef MIRROR 709 if (me.eq.0) then 710#else 711 if (iproc.eq.0) then 712#endif 713 call ga_put(g_b, 1, n, 1, n, b, n) 714 call ga_put(g_a, 1, n, 1, n, a, n) 715 endif 716 call ga_sync() 717 sum2 = ga_ddot(g_a,g_b) 718 if(MISMATCH(sum1, sum2))then 719 write(6,*) ' ddot wrong ', sum1, sum2 720 call ffflush(6) 721 call ga_error('... exiting ',0) 722 else if (me.eq.0) then 723 write(6,*) 724 write(6,*) ' ddot is OK ' 725 write(6,*) 726 endif 727c 728c Check the ga_scale function 729c 730 if (me .eq. 0) then 731 write(6,92) 732 92 format(/'> Checking scale ...') 733 call ffflush(6) 734 endif 735 call ga_scale(g_a, 0.123d0) 736 call ga_get(g_a, 1, n, 1, n, b, n) 737 do j = 1, n 738 do i = 1, n 739 a(i,j) = a(i,j)*0.123d0 740 if (MISMATCH(b(i,j), a(i,j)))then 741 write(6,*) ' dscal ', me, i, j, a(i,j), b(i,j) 742 call ffflush(6) 743 call ga_error('... exiting ',0) 744 endif 745 enddo 746 enddo 747 if (me.eq.0) then 748 write(6,*) 749 write(6,*) ' scale is OK ' 750 write(6,*) 751 endif 752c 753c Check the ga_copy function 754c 755 if (me .eq. 0) then 756 write(6,*) 757 write(6,*)'> Checking copy' 758 write(6,*) 759 call ffflush(6) 760 endif 761 if(me.eq.0) call ga_put(g_a, 1, n, 1, n, a, n) 762 call ga_copy(g_a, g_b) 763 call ga_get(g_b, 1, n, 1, n, b, n) 764 do j = 1, n 765 do i = 1, n 766 if (b(i,j) .ne. a(i,j)) then 767 write(6,*) ' copy ', me, i, j, a(i,j), b(i,j) 768 call ffflush(6) 769 call ga_error('... exiting ',0) 770 endif 771 enddo 772 enddo 773 if (me.eq.0) then 774 write(6,*) 775 write(6,*) ' copy is OK ' 776 write(6,*) 777 endif 778c 779 call ga_sync() 780 if (me .eq. 0) then 781 write(6,*) '> Checking scatter/gather (might be slow)... ' 782 call ffflush(6) 783 endif 784 call ga_sync() 785c 786 crap = drand(ga_nodeid()*51 +1) !Different seed for each proc 787 do j = 1, 10 788 call ga_sync() 789#ifndef MIRROR 790 itmp = iran(nproc)-1 791 if(me.eq.itmp) then 792#else 793 itmp = iran(lprocs)-1 794 if(me.eq.itmp) then 795#endif 796#ifndef NGA_GATSCAT 797 do loop = 1,m 798 ilo = iran(n) 799 jlo = iran(n) 800 iv(loop) = ilo 801 jv(loop) = jlo 802 enddo 803 call ga_gather(g_a, v, iv, jv, m) 804 do loop = 1,m 805 ilo= iv(loop) 806 jlo= jv(loop) 807 call ga_get(g_a,ilo,ilo,jlo,jlo,v(loop),1) 808 if(v(loop) .ne. a(ilo,jlo))then 809 write(6,*)me,' gather ', ilo,',',jlo,',', a(ilo,jlo) 810 & ,' ',v(loop) 811 call ffflush(6) 812 call ga_error('... exiting ',0) 813 endif 814 enddo 815#else 816 do loop = 1,m 817 ilo = iran(n) 818 jlo = iran(n) 819 ijv(1,loop) = ilo 820 ijv(2,loop) = jlo 821 enddo 822 call nga_gather(g_a, v, ijv, m) 823 do loop = 1,m 824 ilo= ijv(1,loop) 825 jlo= ijv(2,loop) 826 call ga_get(g_a,ilo,ilo,jlo,jlo,v(loop),1) 827 if(v(loop) .ne. a(ilo,jlo))then 828 write(6,*)me,' gather ', ilo,',',jlo,',', a(ilo,jlo) 829 & ,' ',v(loop) 830 call ffflush(6) 831 call ga_error('... exiting ',0) 832 endif 833 enddo 834#endif 835 endif 836 enddo 837c 838 if (me.eq.0) then 839 write(6,*) 840 write(6,*) ' gather is OK' 841 write(6,*) 842 call ffflush(6) 843 endif 844c 845 846 do j = 1,10 847 call ga_sync() 848#ifndef MIRROR 849 if(me.eq.iran(ga_nnodes())-1) then 850#else 851 if(iproc.eq.iran(lprocs)-1) then 852#endif 853#ifndef NGA_GATSCAT 854 do loop = 1,m 855 ilo = iran(n) 856 jlo = iran(n) 857 iv(loop) = ilo 858 jv(loop) = jlo 859c v(loop) = DSIN(a(ilo,jlo)+b(ilo,jlo)) 860 v(loop) = 1d0 *(ilo+jlo) 861 enddo 862 call ga_scatter(g_a, v, iv, jv, m) 863 do loop = 1,m 864 ilo= iv(loop) 865 jlo= jv(loop) 866 call ga_get(g_a,ilo,ilo,jlo,jlo,w(loop),1) 867c if(v(loop) .ne. w(loop))then 868 if(w(loop) .ne. 1d0 *(ilo+jlo) )then 869 write(6,*)me,' scatter ', ilo,',',jlo,',',w(loop) 870 & ,' ', 1d0 *(ilo+jlo) 871 call ffflush(6) 872 call ga_error('... exiting ',0) 873 endif 874 enddo 875#else 876 do loop = 1,m 877 ilo = iran(n) 878 jlo = iran(n) 879 ijv(1,loop) = ilo 880 ijv(2,loop) = jlo 881c v(loop) = DSIN(a(ilo,jlo)+b(ilo,jlo)) 882 v(loop) = 1d0 *(ilo+jlo) 883 enddo 884 call nga_scatter(g_a, v, ijv, m) 885 do loop = 1,m 886 ilo= ijv(1,loop) 887 jlo= ijv(2,loop) 888 call ga_get(g_a,ilo,ilo,jlo,jlo,w(loop),1) 889c if(v(loop) .ne. w(loop))then 890 if(w(loop) .ne. 1d0 *(ilo+jlo) )then 891 write(6,*)me,' scatter ', ilo,',',jlo,',',w(loop) 892 & ,' ', 1d0 *(ilo+jlo) 893 call ffflush(6) 894 call ga_error('... exiting ',0) 895 endif 896 enddo 897#endif 898 endif 899 call ga_sync() 900 enddo 901c 902 if (me.eq.0) then 903 write(6,*) 904 write(6,*) ' scatter is OK' 905 write(6,*) 906 endif 907c 908 call ga_sync() 909c 910c scatter-acc available in GA ver. 3.0 911#ifdef GA3 912 if (me.eq.0) then 913 write(6,*) 914 write(6,*) ' checking scatter-accumulate' 915 write(6,*) 916 endif 917c 918 crap = drand(1234) 919 call ga_zero(g_a) 920c 921 do j = 1, n 922 do i = 1, n 923 b(i,j) =0. 924 enddo 925 enddo 926c 927 x = .1d0 928 ii =n 929 do jj = 1,1 930 call ga_sync() 931 do loop = 1, ii 932 933c generate unique i,j pairs 93410 continue 935 i = iran(n) 936 j=iran(n) 937 if (found(i,j, iv, jv, loop-1) ) goto 10 938#ifndef NGA_GATSCAT 939 iv(loop) = i 940 jv(loop) = j 941#else 942 ijv(1,loop) = i 943 ijv(2,loop) = j 944#endif 945 v(loop) = 1d0 *(i+j) 946#ifndef MIRROR 947 b(i,j) = b(i,j) + nproc*x*v(loop) ! update local ref. array 948#else 949 b(i,j) = b(i,j) + lprocs*x*v(loop) ! update local ref. array 950#endif 951 enddo 952 953 954#ifndef NGA_GATSCAT 955 call ga_scatter_acc(g_a,v,iv,jv, ii,x) 956#else 957 call nga_scatter_acc(g_a,v,ijv,ii,x) 958#endif 959 960c 961 call ga_sync() 962c 963c check the result 964c 965 call ga_get(g_a, 1, n, 1,n, a, n) 966 967 do loop = 1,ii 968#ifndef NGA_GATSCAT 969 i = iv(loop) 970 j = jv(loop) 971#else 972 i = ijv(1,loop) 973 j = ijv(2,loop) 974#endif 975 if(MISMATCH(a(i,j),b(i,j)))then 976 print *,'Error',i,j,loop,a(i,j),'expected=',b(i,j) 977* if(me.eq.0)then 978* do ii=1,loop 979* print *,'element',ii, iv(ii),jv(ii) 980* enddo 981* endif 982 call ga_error('scatter-acc error in trial ',jj) 983 endif 984 enddo 985 call ga_sync() 986 987 enddo 988 989 call ga_sync() 990 if (me.eq.0) then 991 write(6,*) 992 write(6,*) ' scatter-accumulate is OK' 993 write(6,*) 994 endif 995#endif 996 997#ifdef MIRROR 998c 999c Check the merge function 1000c 1001 do j = 1, n 1002 do i = 1, n 1003 a(i,j) = inode + i-1 + (j-1)*n 1004 enddo 1005 enddo 1006 if (me .eq. 0) then 1007 write(6,23) 1008 23 format(/'> Checking merge ...') 1009 call ffflush(6) 1010 endif 1011 if (iproc.eq.0) then 1012 call ga_put(g_a, 1, n, 1, n, a, n) 1013 endif 1014 call ga_merge_mirrored(g_a) 1015 call ga_get(g_a, 1, n, 1, n, b, n) 1016 lsame = .true. 1017 ilo = 0 1018 ihi = nnodes*(nnodes-1)/2 1019 do i = 1, n 1020 do j = 1, n 1021 if ((dble(nnodes)*(a(j,i)-dble(inode)) 1022 + + dble(ihi)).ne.b(j,i)) then 1023 lsame = .false. 1024 ilo = ilo + 1 1025 endif 1026 end do 1027 end do 1028 if(lsame.and.me.eq.0)then 1029 write(6,*) 1030 write(6,*) ' merge is OK ' 1031 write(6,*) 1032 else if (.not.lsame) then 1033 write(6,*) 1034 write(6,*) ' merge is wrong ',ilo 1035 write(6,*) 1036 call ga_error('... exiting ',0) 1037 endif 1038c 1039c Checking copy between different array configurations 1040c 1041 if (me .eq. 0) then 1042 write(6,20) 1043 20 format(/'> Checking mixed copy ...') 1044 call ffflush(6) 1045 endif 1046#ifdef NEW_API 1047 g_c = ga_create_handle() 1048 call ga_set_data(g_c,ndim,dims,MT_DBL) 1049 call ga_set_array_name(g_c,'c') 1050 if (.not.ga_allocate(g_c)) then 1051#else 1052 if (.not. nga_create(MT_DBL, ndim, dims, 'c', chunk, g_c)) then 1053#endif 1054 call ga_error('ga_create failed for third array ',0) 1055 endif 1056 call ga_zero(g_c) 1057 call ga_copy(g_a,g_c) 1058 call ga_get(g_a, 1, n, 1, n, a, n) 1059 call ga_get(g_c, 1, n, 1, n, b, n) 1060 do i = 1, n 1061 do j = 1, n 1062 if (a(j,i).ne.b(j,i)) then 1063 lsame = .false. 1064 endif 1065 end do 1066 end do 1067 if(lsame.and.me.eq.0)then 1068 write(6,*) 1069 write(6,*) ' Mixed copy (mirrored to normal) is OK ' 1070 write(6,*) 1071 elseif (me.eq.0) then 1072 write(6,*) 1073 write(6,*) ' Mixed copy (mirrored to normal) is wrong ',ilo 1074 write(6,*) 1075 call ga_error('... exiting ',0) 1076 endif 1077 call ga_zero(g_a) 1078 call ga_copy(g_c,g_a) 1079 call ga_get(g_a, 1, n, 1, n, a, n) 1080 do i = 1, n 1081 do j = 1, n 1082 if (a(j,i).ne.b(j,i)) then 1083 lsame = .false. 1084 endif 1085 end do 1086 end do 1087 if(lsame.and.me.eq.0)then 1088 write(6,*) 1089 write(6,*) ' Mixed copy (normal to mirrored) is OK ' 1090 write(6,*) 1091 elseif (me.eq.0) then 1092 write(6,*) 1093 write(6,*) ' Mixed copy (normal to mirrored) is wrong ',ilo 1094 write(6,*) 1095 call ga_error('... exiting ',0) 1096 endif 1097c 1098c Check the merge to distributed patch function 1099c 1100 do j = 1, n 1101 do i = 1, n 1102 a(i,j) = inode + i-1 + (j-1)*n 1103 enddo 1104 enddo 1105 if (me .eq. 0) then 1106 write(6,24) 1107 24 format(/'> Checking merge to distributed patch ...') 1108 call ffflush(6) 1109 endif 1110c 1111c get low and high indices of patch 1112c 1113 ilo = n/4 1114 ihi = ilo + n/2 1115 do i = 1, 2 1116 alo(i) = ilo 1117 blo(i) = ilo + 1 1118 ahi(i) = ihi 1119 bhi(i) = ihi + 1 1120 end do 1121 if (iproc.eq.0) then 1122 call ga_put(g_a, 1, n, 1, n, a, n) 1123 endif 1124 call nga_merge_distr_patch(g_a, alo, ahi, g_c, blo, bhi) 1125 call ga_get(g_c, 1, n, 1, n, b, n) 1126 lsame = .true. 1127 ilo = 0 1128 ihi = nnodes*(nnodes-1)/2 1129 do i = alo(1), ahi(1) 1130 ii = i-alo(1) + blo(1) 1131 do j = alo(2), ahi(2) 1132 jj = j-alo(2) + blo(2) 1133 if ((dble(nnodes)*(a(j,i)-dble(inode)) 1134 + + dble(ihi)).ne.b(jj,ii)) then 1135c if (me.eq.0) then 1136c write(6,*) i,j,ii,jj, nnodes*a(j,i)+ihi, b(jj,ii) 1137c endif 1138 lsame = .false. 1139 ilo = ilo + 1 1140 endif 1141 end do 1142 end do 1143 if(lsame.and.me.eq.0)then 1144 write(6,*) 1145 write(6,*) ' merge to distributed patch is OK ' 1146 write(6,*) 1147 elseif (.not.lsame) then 1148 write(6,*) 1149 write(6,*) ' merge to distributed patch is wrong ',ilo 1150 write(6,*) 1151 call ga_error('... exiting ',0) 1152 endif 1153#endif 1154c 1155c Delete the global arrays 1156c 1157 status = ga_destroy(g_b) 1158 status = ga_destroy(g_a) 1159c 1160 return 1161 end 1162 1163c----------------------------------------------------------------- 1164 subroutine check_complex() 1165 implicit none 1166#include "mafdecls.fh" 1167#include "global.fh" 1168#include "testutil.fh" 1169c 1170 integer n,m 1171 parameter (n = 60) 1172 parameter (m = 2*n) 1173 double complex a(n,n), b(n,n), v(m),w(m) 1174#ifdef MIRROR 1175 integer ndim, dims(2), chunk(2), p_mirror 1176#else 1177# ifdef NEW_API 1178 integer ndim, dims(2), chunk(2), p_mirror 1179# endif 1180#endif 1181 integer iv(m), jv(m) 1182 logical status 1183 integer g_a, g_b 1184 integer iran, i,j, loop,nloop,maxloop, ilo, ihi, jlo, jhi, itmp 1185 integer nproc, me, int, ij, inc, ii, jj, nnodes 1186 parameter (maxloop = 100) 1187 integer maxproc 1188 parameter (maxproc = 128) 1189 double precision crap, real 1190 double precision nwords 1191 double complex x, sum1, sum2, factor 1192 integer lprocs, inode, iproc 1193#ifdef USE_RESTRICTED 1194 integer num_rstrctd 1195 integer rstrctd_list(maxproc/2) 1196#endif 1197 intrinsic int 1198 iran(i) = int(drand(0)*real(i)) + 1 1199c 1200 nproc = ga_nnodes() 1201 me = ga_nodeid() 1202 inode = ga_cluster_nodeid() 1203 lprocs = ga_cluster_nprocs(inode) 1204 nnodes = ga_cluster_nnodes() 1205 iproc = mod(me,lprocs) 1206 nloop = Min(maxloop,n) 1207#ifdef USE_RESTRICTED 1208 num_rstrctd = nproc/2 1209 if (num_rstrctd.eq.0) num_rstrctd = 1 1210 do i = 1, num_rstrctd 1211 rstrctd_list(i) = (num_rstrctd/2) + i-1 1212 end do 1213#endif 1214c 1215c a() is a local copy of what the global array should start as 1216c 1217 do j = 1, n 1218 do i = 1, n 1219#ifndef MIRROR 1220 a(i,j) = cmplx(dble(i-1), dble((j-1)*n)) 1221#else 1222 a(i,j) = cmplx(dble(inode),0.0d00) 1223 + + cmplx(dble(i-1), dble((j-1)*n)) 1224#endif 1225 b(i,j) = cmplx(-1d0,1d0) 1226 enddo 1227 enddo 1228c 1229c Create a global array 1230c 1231c print *,ga_nodeid(), ' creating array' 1232 call ffflush(6) 1233c call setdbg(1) 1234#ifdef NEW_API 1235 ndim = 2 1236 dims(1) = n 1237 dims(2) = n 1238 g_a = ga_create_handle() 1239 call ga_set_data(g_a,ndim,dims,MT_DCPL) 1240 call ga_set_array_name(g_a,'a') 1241#ifdef USE_RESTRICTED 1242 call ga_set_restricted(g_a, rstrctd_list, num_rstrctd) 1243#endif 1244# ifdef MIRROR 1245 p_mirror = ga_pgroup_get_mirror() 1246 call ga_set_pgroup(g_a,p_mirror) 1247# endif 1248 status = ga_allocate(g_a) 1249#else 1250# ifndef MIRROR 1251 status = ga_create(MT_DCPL, n, n, 'a', 0, 0, g_a) 1252# else 1253 ndim = 2 1254 dims(1) = n 1255 dims(2) = n 1256 chunk(1) = 0 1257 chunk(2) = 0 1258 p_mirror = ga_pgroup_get_mirror() 1259 status = nga_create_config(MT_DCPL, ndim, dims, 'a', chunk, 1260 + p_mirror, g_a) 1261# endif 1262#endif 1263 if (.not. status) then 1264 write(6,*) ' ga_create failed' 1265 call ga_error('... exiting ',0) 1266 endif 1267#ifdef NEW_API 1268 g_b = ga_create_handle() 1269 call ga_set_data(g_b,ndim,dims,MT_DCPL) 1270 call ga_set_array_name(g_b,'b') 1271# ifdef MIRROR 1272 call ga_set_pgroup(g_b,p_mirror) 1273# endif 1274 if (.not.ga_allocate(g_b)) then 1275#else 1276# ifndef MIRROR 1277 if (.not. ga_create(MT_DCPL, n, n, 'b', 0, 0, g_b)) then 1278# else 1279 if (.not. nga_create_config(MT_DCPL, ndim, dims, 'b', chunk, 1280 _ p_mirror, g_b)) then 1281# endif 1282#endif 1283 call ga_error('ga_create failed for second array ',0) 1284 endif 1285 1286 call ga_distribution(g_a,me,ilo, ihi, jlo, jhi) 1287 call ga_sync() 1288c 1289c Zero the array 1290c 1291 if (me .eq. 0) then 1292 write(6,21) 1293 21 format(/'> Checking zero ... ') 1294 call ffflush(6) 1295 endif 1296 call ga_zero(g_a) 1297c 1298c Check that it is indeed zero 1299c 1300 call ga_get(g_a, 1, n, 1, n, b, n) 1301 call ga_sync() 1302 do i = 1, n 1303 do j = 1, n 1304 if(b(i,j).ne.(0d0,0d0)) then 1305 write(6,*) me,' zero ', i, j, b(i,j) 1306 call ffflush(6) 1307c call ga_error('... exiting ',0) 1308 endif 1309 enddo 1310 enddo 1311 if (me.eq.0) then 1312 write(6,*) 1313 write(6,*) ' ga_zero is OK' 1314 write(6,*) 1315 endif 1316 call ga_sync() 1317c 1318c Each node fills in disjoint sections of the array 1319c 1320 if (me .eq. 0) then 1321 write(6,2) 1322 2 format(/'> Checking disjoint put ... ') 1323 call ffflush(6) 1324 endif 1325 call ga_sync() 1326c 1327 inc = (n-1)/20 + 1 1328 ij = 0 1329 do j = 1, n, inc 1330 do i = 1, n, inc 1331#ifndef MIRROR 1332 if (mod(ij,nproc) .eq. me) then 1333#else 1334 if (mod(ij,lprocs) .eq. iproc) then 1335#endif 1336 ilo = i 1337 ihi = min(i+inc, n) 1338 jlo = j 1339 jhi = min(j+inc, n) 1340 call ga_put(g_a, ilo, ihi, jlo, jhi, a(ilo, jlo), n) 1341 endif 1342 ij = ij + 1 1343 enddo 1344 enddo 1345 call ga_sync() 1346c 1347c All nodes check all of a 1348c 1349 call util_qfill(n*n, (0d0,0d0), b, 1) 1350 call ga_get(g_a, 1, n, 1, n, b, n) 1351c 1352 do i = 1, n 1353 do j = 1, n 1354 if (b(i,j) .ne. a(i,j)) then 1355 write(6,*) ' put ', me, i, j, a(i,j),b(i,j) 1356 call ffflush(6) 1357 call ga_error('... exiting ',0) 1358 endif 1359 enddo 1360 enddo 1361 if (me.eq.0) then 1362 write(6,*) 1363 write(6,*) ' ga_put is OK' 1364 write(6,*) 1365 endif 1366 call ga_sync() 1367c 1368c Now check nloop random gets from each node 1369c 1370 if (me .eq. 0) then 1371 write(6,5) nloop 1372 5 format(/'> Checking random get (',i5,' calls)...') 1373 call ffflush(6) 1374 endif 1375 call ga_sync() 1376c 1377 nwords = 0 1378c 1379 crap = drand(ga_nodeid()*51 +1) !Different seed for each proc 1380 do loop = 1, nloop 1381 ilo = iran(loop) 1382 ihi = iran(loop) 1383 if (ihi.lt. ilo) then 1384 itmp = ihi 1385 ihi = ilo 1386 ilo = itmp 1387 endif 1388 jlo = iran(loop) 1389 jhi = iran(loop) 1390 if (jhi.lt. jlo) then 1391 itmp = jhi 1392 jhi = jlo 1393 jlo = itmp 1394 endif 1395c 1396 nwords = nwords + (ihi-ilo+1)*(jhi-jlo+1) 1397c 1398 call util_qfill(n*n, (0.0d0,0d0), b, 1) 1399 call ga_get(g_a, ilo, ihi, jlo, jhi, b(ilo, jlo), n) 1400 if (me .eq. 0 .and. mod(loop-1, max(1,nloop/20)).eq.0) then 1401 write(6,1) loop, me, ilo, ihi, jlo, jhi, nwords 1402 1 format(' call ',i5, ' node ',i2,' checking get ',4i4, 1403 $ ' total ',d9.2) 1404 call ffflush(6) 1405 endif 1406 do j = jlo, jhi 1407 do i = ilo, ihi 1408 if (b(i,j) .ne. a(i,j)) then 1409 write(6,*)'error:', i, j, b(i,j), a(i,j) 1410 call ga_error('... exiting ',0) 1411 endif 1412 enddo 1413 enddo 1414c 1415 enddo 1416 if (me.eq.0) then 1417 write(6,*) 1418 write(6,*) ' ga_get is OK' 1419 write(6,*) 1420 call ffflush(6) 1421 endif 1422 call ga_sync() 1423c 1424c Each node accumulates into disjoint sections of the array 1425c 1426 if (me .eq. 0) then 1427 write(6,9) 1428 9 format(/'> Checking accumulate ... ') 1429 call ffflush(6) 1430 endif 1431 call ga_sync() 1432c 1433 crap = drand(12345) ! Same seed for each process 1434 do j = 1, n 1435 do i = 1, n 1436 b(i,j) = cmplx(drand(0),drand(1)) 1437 b(i,j) = cmplx(dble(i),dble(j)) 1438 enddo 1439 enddo 1440c 1441 inc = (n-1)/20 + 1 1442 ij = 0 1443 do j = 1, n, inc 1444 do i = 1, n, inc 1445c x = cmplx(drand(0),0.333d0) 1446c x = cmplx(0.333d0,0) 1447* x = cmplx(0d0,0d0) 1448 x = 0 1449 ilo = i 1450 ihi = min(i+inc-1, n) 1451 if(ihi.eq.n-1)ihi=n 1452 jlo = j 1453 jhi = min(j+inc-1, n) 1454 if(jhi.eq.n-1)jhi=n 1455#ifndef MIRROR 1456 if (mod(ij,nproc) .eq. me) then 1457#else 1458 if (mod(ij,lprocs) .eq. iproc) then 1459#endif 1460 call ga_acc(g_a, ilo, ihi, jlo, jhi, b(ilo, jlo), n, x) 1461 endif 1462 ij = ij + 1 1463c 1464c Each process applies all updates to its local copy 1465c 1466 do jj = jlo, jhi 1467 do ii = ilo, ihi 1468 a(ii,jj) = a(ii,jj) + x * b(ii,jj) 1469 enddo 1470 enddo 1471 enddo 1472 enddo 1473 call ga_sync() 1474c 1475c All nodes check all of a 1476c 1477 call ga_get(g_a, 1, n, 1, n, b, n) 1478 do j = 1, n 1479 do i = 1, n 1480 if (MISMATCH(b(i,j), a(i,j)))then 1481 write(6,*) ' acc ', me, i, j, a(i,j), b(i,j) 1482 call ga_error('... exiting ',0) 1483 endif 1484 enddo 1485 enddo 1486 if (me.eq.0) then 1487 write(6,*) 1488 write(6,*) ' disjoint ga_acc is OK' 1489 write(6,*) 1490 endif 1491c 1492c overlapping accumulate 1493c 1494 call ga_zero(g_b) 1495 call ga_acc(g_b, n/2, n/2, n/2, n/2, (1d0,-1d0), 1, (1d0,0d0)) 1496 call ga_sync() 1497#ifndef MIRROR 1498 if (me.eq.0) then 1499 call ga_get(g_b, n/2, n/2, n/2, n/2, x, 1) 1500 if (MISMATCH(x, ((1d0,-1d0)*nproc)))then 1501#else 1502 if (iproc.eq.0) then 1503 call ga_get(g_b, n/2, n/2, n/2, n/2, x, 1) 1504 if (MISMATCH(x, ((1d0,-1d0)*lprocs)))then 1505#endif 1506c if(error.gt. (1d-8))then 1507#ifndef MIRROR 1508 write(6,*)'val=',x,' expected=(',nproc,',',-nproc,')' 1509#else 1510 write(6,*)'val=',x,' expected=(',lprocs,',',-lprocs,')' 1511#endif 1512 call ga_error('overlapping accumulate failed',0) 1513 endif 1514 write(6,*) 1515 write(6,*) ' overlapping ga_acc is OK' 1516 write(6,*) 1517 endif 1518c 1519c Check the ga_copy function 1520c 1521 if (me .eq. 0) then 1522 write(6,*) 1523 write(6,*)'> Checking copy' 1524 write(6,*) 1525 call ffflush(6) 1526 endif 1527 call ga_sync() 1528#ifndef MIRROR 1529 if(me.eq.0) call ga_put(g_a, 1, n, 1, n, a, n) 1530#else 1531 if(iproc.eq.0) call ga_put(g_a, 1, n, 1, n, a, n) 1532#endif 1533 call ga_copy(g_a, g_b) 1534 call ga_get(g_b, 1, n, 1, n, b, n) 1535 do j = 1, n 1536 do i = 1, n 1537 if (b(i,j) .ne. a(i,j)) then 1538 write(6,*) ' copy ', me, i, j, a(i,j), b(i,j) 1539 call ga_error('... exiting ',0) 1540 endif 1541 enddo 1542 enddo 1543 if (me.eq.0) then 1544 write(6,*) 1545 write(6,*) ' copy is OK ' 1546 write(6,*) 1547 endif 1548c 1549c 1550c Check the ga_scale function 1551c 1552 if (me .eq. 0) then 1553 write(6,92) 1554 92 format(/'> Checking scale ...') 1555 call ffflush(6) 1556 endif 1557 factor = (1d0,-1d0) 1558 call ga_scale(g_a, factor) 1559 call ga_get(g_a, 1, n, 1, n, b, n) 1560 do j = 1, n 1561 do i = 1, n 1562 a(i,j) = a(i,j)*factor 1563 if (MISMATCH(b(i,j), a(i,j)))then 1564 write(6,*) ' dscal ', me, i, j, a(i,j), b(i,j) 1565 call ffflush(6) 1566 call ga_error('... exiting ',0) 1567 endif 1568 enddo 1569 enddo 1570 if (me.eq.0) then 1571 write(6,*) 1572 write(6,*) ' scale is OK ' 1573 write(6,*) 1574 endif 1575c 1576c Check scatter&gather 1577c 1578 call ga_sync() 1579 if (me .eq. 0) then 1580 write(6,*) '> Checking scatter/gather (might be slow)... ' 1581 call ffflush(6) 1582 if(me.eq.0) call ga_put(g_a, 1, n, 1, n, a, n) 1583 endif 1584 call ga_sync() 1585c 1586 crap = drand(ga_nodeid()*51 +1) !Different seed for each proc 1587 do j = 1, 10 1588 call ga_sync() 1589#ifndef MIRROR 1590 itmp = iran(nproc)-1 1591 if(me.eq.itmp) then 1592#else 1593 itmp = iran(lprocs)-1 1594 if(iproc.eq.itmp) then 1595#endif 1596 do loop = 1,m 1597 ilo = iran(n) 1598 jlo = iran(n) 1599 iv(loop) = ilo 1600 jv(loop) = jlo 1601 enddo 1602 call ga_gather(g_a, v, iv, jv, m) 1603 do loop = 1,m 1604 ilo= iv(loop) 1605 jlo= jv(loop) 1606 call ga_get(g_a,ilo,ilo,jlo,jlo,v(loop),1) 1607 if(v(loop) .ne. a(ilo,jlo))then 1608 write(6,*)me,' gather ', ilo,',',jlo,',', a(ilo,jlo) 1609 & ,' ',v(loop) 1610 call ffflush(6) 1611 call ga_error('... exiting ',0) 1612 endif 1613 enddo 1614 endif 1615 enddo 1616c 1617 if (me.eq.0) then 1618 write(6,*) 1619 write(6,*) ' gather is OK' 1620 write(6,*) 1621 call ffflush(6) 1622 endif 1623c 1624 do j = 1,10 1625 call ga_sync() 1626#ifndef MIRROR 1627 if(me.eq.iran(ga_nnodes())-1) then 1628#else 1629 if(me.eq.iran(lprocs)-1) then 1630#endif 1631 do loop = 1,m 1632 ilo = iran(n) 1633 jlo = iran(n) 1634 iv(loop) = ilo 1635 jv(loop) = jlo 1636 v(loop) = (1d0,-1d0) *(ilo+jlo) 1637 enddo 1638 call ga_scatter(g_a, v, iv, jv, m) 1639 do loop = 1,m 1640 ilo= iv(loop) 1641 jlo= jv(loop) 1642 call ga_get(g_a,ilo,ilo,jlo,jlo,w(loop),1) 1643 if(w(loop) .ne. (1d0,-1d0) *(ilo+jlo) )then 1644 write(6,*)me,' scatter ', ilo,',',jlo,',',w(loop) 1645 & ,' ', (1d0,-1d0) *(ilo+jlo) 1646 call ffflush(6) 1647 endif 1648 enddo 1649 endif 1650 call ga_sync() 1651 enddo 1652c 1653 if (me.eq.0) then 1654 write(6,*) 1655 write(6,*) ' scatter is OK' 1656 write(6,*) 1657 endif 1658c 1659c Check ga_add 1660c 1661 if (me .eq. 0) then 1662 write(6,91) 1663 91 format(/'> Checking add ...') 1664 call ffflush(6) 1665 endif 1666 call ga_get(g_a, 1, n, 1, n, a, n) 1667 crap = drand(12345) ! Everyone has same seed 1668 do j = 1, n 1669 do i = 1, n 1670 b(i,j) = cmplx(drand(0), drand(1)) 1671 a(i,j) = (0.1d0,-.1d0)*a(i,j) + (.9d0,-.9d0)*b(i,j) 1672 enddo 1673 enddo 1674 1675#ifndef MIRROR 1676 if (me.eq.0) call ga_put(g_b, 1, n, 1, n, b, n) 1677#else 1678 if (iproc.eq.0) call ga_put(g_b, 1, n, 1, n, b, n) 1679#endif 1680 call ga_add((0.1d0,-.1d0), g_a, (0.9d0,-.9d0), g_b, g_b) 1681 call ga_get(g_b, 1, n, 1, n, b, n) 1682 do j = 1, n 1683 do i = 1, n 1684 if (MISMATCH(b(i,j), a(i,j)))then 1685 write(6,*) ' add ', me, i, j, a(i,j), b(i,j) 1686 call ffflush(6) 1687 call ga_error('... exiting ',0) 1688 endif 1689 enddo 1690 enddo 1691 if (me.eq.0) then 1692 write(6,*) 1693 write(6,*) ' add is OK ' 1694 write(6,*) 1695 endif 1696 call ga_sync() 1697c 1698c Check the zdot function 1699c 1700 if (me .eq. 0) then 1701 write(6,19) 1702 19 format(/'> Checking zdot ...') 1703 call ffflush(6) 1704 endif 1705 crap = drand(12345) ! Everyone has same seed 1706 sum1 = (0.0d0,0.d0) 1707 do j = 1, n 1708 do i = 1, n 1709 b(i,j) = cmplx(drand(0), drand(1)) 1710 sum1 = sum1 + a(i,j)*b(i,j) 1711 enddo 1712 enddo 1713#ifndef MIRROR 1714 if (me.eq.0) then 1715#else 1716 if (iproc.eq.0) then 1717#endif 1718 call ga_put(g_b, 1, n, 1, n, b, n) 1719 call ga_put(g_a, 1, n, 1, n, a, n) 1720 endif 1721 call ga_sync() 1722 sum2 = ga_zdot(g_a,g_b) 1723 if (MISMATCH(sum1, sum2))then 1724 write(6,*) ' zdot wrong ', sum1, sum2 1725 call ffflush(6) 1726 call ga_error('... exiting ',0) 1727 else if (me.eq.0) then 1728 write(6,*) 1729 write(6,*) ' zdot is OK ' 1730 write(6,*) 1731 endif 1732c 1733c Delete the global arrays 1734c 1735 123 continue 1736 status = ga_destroy(g_b) 1737 status = ga_destroy(g_a) 1738c 1739 return 1740 end 1741c----------------------------------------------------------------- 1742 1743 1744 1745 1746 subroutine check_int() 1747 implicit none 1748#include "mafdecls.fh" 1749#include "global.fh" 1750#include "testutil.fh" 1751c 1752 integer n 1753 parameter (n = 128) 1754 integer a(n,n), b(n,n) 1755 logical status 1756 integer g_a 1757 integer iran, i, j, loop, nloop, ilo, ihi, jlo, jhi, itmp 1758 integer nproc, me, int, ij, inc, dimi, dimj, ii, jj 1759 integer ndim, dims(2), chunk(2), p_mirror, nnodes 1760 integer lprocs, inode, iproc,lproc, nprocs 1761 double precision nwords 1762 parameter (nloop = 100) 1763 integer maxproc 1764 parameter (maxproc = 128) 1765 integer map(5,maxproc), found, np,k 1766 double precision crap, sum1, real 1767 integer buf 1768#ifdef USE_RESTRICTED 1769 integer num_rstrctd 1770 integer rstrctd_list(maxproc/2) 1771#endif 1772 intrinsic int 1773 iran(i) = int(drand(0)*real(i)) + 1 1774c 1775 nproc = ga_nnodes() 1776 me = ga_nodeid() 1777 inode = ga_cluster_nodeid() 1778 lprocs = ga_cluster_nprocs(inode) 1779 nnodes = ga_cluster_nnodes() 1780 iproc = mod(me,lprocs) 1781#ifdef USE_RESTRICTED 1782 num_rstrctd = nproc/2 1783 if (num_rstrctd.eq.0) num_rstrctd = 1 1784 do i = 1, num_rstrctd 1785 rstrctd_list(i) = (num_rstrctd/2) + i-1 1786 end do 1787#endif 1788c 1789c a() is a local copy of what the global array should start as 1790c 1791 do j = 1, n 1792 do i = 1, n 1793#ifndef MIRROR 1794 a(i,j) = i-1 + (j-1)*1000 1795#else 1796 a(i,j) = inode + i-1 + (j-1)*1000 1797#endif 1798 enddo 1799 enddo 1800c 1801c Create a global array 1802c 1803 ndim = 2 1804 dims(1) = n 1805 dims(2) = n 1806 chunk(1) = 0 1807 chunk(2) = 0 1808#ifdef NEW_API 1809 g_a = ga_create_handle() 1810 call ga_set_data(g_a,ndim,dims,MT_INT) 1811 call ga_set_array_name(g_a,'a') 1812#ifdef USE_RESTRICTED 1813 call ga_set_restricted(g_a, rstrctd_list, num_rstrctd) 1814#endif 1815# ifdef MIRROR 1816 p_mirror = ga_pgroup_get_mirror() 1817 call ga_set_pgroup(g_a,p_mirror) 1818# endif 1819 if (.not.ga_allocate(g_a)) then 1820#else 1821# ifndef MIRROR 1822 if (.not. ga_create(MT_INT, n, n, 'a', 0, 0, g_a)) then 1823# else 1824 p_mirror = ga_pgroup_get_mirror() 1825 if (.not. nga_create_config(MT_INT, ndim, dims, 'a', chunk, 1826 + p_mirror, g_a)) then 1827# endif 1828#endif 1829 write(6,*) ' ga_create failed' 1830 call ffflush(6) 1831 call ga_error('... exiting ',0) 1832 endif 1833c 1834c Zero the array 1835c 1836 if (me .eq. 0) then 1837 write(6,21) 1838 21 format(/'> Checking zero ... ') 1839 call ffflush(6) 1840 endif 1841 call ga_zero(g_a) 1842c 1843c Check that it is indeed zero 1844c 1845 call ga_get(g_a, 1, n, 1, n, b, n) 1846 do i = 1, n 1847 do j = 1, n 1848 if (b(i,j) .ne. 0) then 1849 write(6,*) ' zero ', me, i, j, b(i,j) 1850 call ffflush(6) 1851 call ga_error('... exiting ',0) 1852 endif 1853 enddo 1854 enddo 1855 if (me.eq.0) then 1856 write(6,*) 1857 write(6,*) ' ga_zero is OK' 1858 write(6,*) 1859 endif 1860 call ga_sync() 1861c 1862c Each node fills in disjoint sections of the array 1863c 1864 if (me .eq. 0) then 1865 write(6,2) 1866 2 format(/'> Checking disjoint put ... ') 1867 call ffflush(6) 1868 endif 1869 call ga_sync() 1870c 1871 inc = (n-1)/20 + 1 1872 ij = 0 1873 do j = 1, n, inc 1874 do i = 1, n, inc 1875#ifndef MIRROR 1876 if (mod(ij,nproc) .eq. me) then 1877#else 1878 if (mod(ij,lprocs) .eq. iproc) then 1879#endif 1880 ilo = i 1881 ihi = min(i+inc, n) 1882 jlo = j 1883 jhi = min(j+inc, n) 1884c write(6,4) me, ilo, ihi, jlo, jhi 1885c4 format(' node ',i2,' checking put ',4i4) 1886c call ffflush(6) 1887 call ga_put(g_a, ilo, ihi, jlo, jhi, a(ilo, jlo), n) 1888 endif 1889 ij = ij + 1 1890 enddo 1891 enddo 1892 call ga_sync() 1893c 1894c All nodes check all of a 1895c 1896#ifndef MIRROR 1897 if(me.eq.0)then 1898#else 1899 if(iproc.eq.0)then 1900#endif 1901 call ga_get(g_a, 1, n, 1, n, b, n) 1902 do i = 1, n 1903 do j = 1, n 1904 if (b(i,j) .ne. a(i,j)) then 1905 write(6,*) ' put ', me, i, j, a(i,j),b(i,j) 1906 call ffflush(6) 1907 call ga_error('... exiting ',0) 1908 endif 1909 enddo 1910 enddo 1911 endif 1912 call ga_sync() 1913c 1914 if (me.eq.0) then 1915 write(6,*) 1916 write(6,*) ' ga_put is OK' 1917 write(6,*) 1918 endif 1919c 1920c Now check nloop random gets from each node 1921c 1922 if (me .eq. 0) then 1923 write(6,5) nloop 1924 5 format(/'> Checking random get (',i5,' calls)...') 1925 call ffflush(6) 1926 endif 1927 call ga_sync() 1928c 1929 nwords = 0 1930c 1931 crap = drand(ga_nodeid()*51 +1) !Different seed for each proc 1932 do loop = 1, nloop 1933 ilo = iran(loop) 1934 ihi = iran(loop) 1935 if (ihi.lt. ilo) then 1936 itmp = ihi 1937 ihi = ilo 1938 ilo = itmp 1939 endif 1940 jlo = iran(loop) 1941 jhi = iran(loop) 1942 if (jhi.lt. jlo) then 1943 itmp = jhi 1944 jhi = jlo 1945 jlo = itmp 1946 endif 1947c 1948 nwords = nwords + (ihi-ilo+1)*(jhi-jlo+1) 1949c 1950 call util_ifill(n*n, 0.0d0, b, 1) 1951 call ga_get(g_a, ilo, ihi, jlo, jhi, b(ilo, jlo), n) 1952c 1953 if (me .eq. 0 .and. mod(loop-1, max(1,nloop/20)).eq.0) then 1954 write(6,1) loop, me, ilo, ihi, jlo, jhi, nwords 1955 1 format(' call ',i5, ' node ',i2,' checking get ',4i4, 1956 $ ' total ',d9.2) 1957 call ffflush(6) 1958 endif 1959c 1960 sum1 = 0.0d0 1961 do j = jlo, jhi 1962 do i = ilo, ihi 1963 sum1 = sum1 + b(i,j) 1964 if (b(i,j) .ne. a(i,j)) then 1965 write(6,*) 'error ', i, j, b(i,j), a(i,j) 1966 call ffflush(6) 1967 call ga_error('... exiting ',0) 1968 endif 1969 enddo 1970 enddo 1971 enddo 1972 if (me.eq.0) then 1973 write(6,*) 1974 write(6,*) ' ga_get is OK' 1975 write(6,*) 1976 endif 1977c 1978 call ga_sync() 1979 if (me .eq. 0 .and. n.gt.7) then 1980 write(6,*) 1981 write(6,*) '> Checking ga_print_patch --- should see ' 1982 write(6,*)' [2002 3002 4002 5002 6002]' 1983 write(6,*)' [2003 3003 4003 5003 6003]' 1984 write(6,*)' [2004 3004 4004 5004 6004]' 1985 write(6,*) 1986 call ffflush(6) 1987 endif 1988 if(n.gt.5) call ga_print_patch(g_a,3,5,3,7,0) 1989c 1990 call ga_sync() 1991c 1992 call ga_sync() 1993 if (me .eq. 0) then 1994 write(6,*) 1995 write(6,*) '> Checking read_inc ... ' 1996 write(6,*) 1997 call ffflush(6) 1998 endif 1999 call ga_sync() 2000c 2001 crap = drand(ga_nodeid()*51 +1) !Different seed for each proc 2002 inc =5 2003c every processor will be operating on somebody elses data 2004c 2005#ifndef MIRROR 2006 lproc = nproc - me - 1 2007#else 2008 lproc = nnodes-iproc-1 2009 lproc = inode*lprocs + lproc 2010#endif 2011c 2012 call ga_distribution(g_a,lproc,ilo,ihi,jlo,jhi) 2013c 2014 dimi = ihi-ilo 2015 dimj = jhi-jlo 2016 write(6,111) me,ilo,ihi,jlo,jhi,dimi,dimj 2017 111 format(i2,'..',4i8,'.',2i8) 2018 call ga_sync() 2019 if(ilo .gt.0 .and. jhi .gt. 0)then 2020 do loop = 1,nloop 2021 ii= IABS(iran(dimi)) 2022 jj= IABS(iran(dimj)) 2023 i=ilo + Mod(ii,dimi) 2024 j=jlo + Mod(jj,dimj) 2025c 2026c write(6,*) me,'..',ilo,ihi,jlo,jhi,'.',dimi,dimj,'..',i,j 2027c call ffflush(6) 2028 buf = ga_read_inc(g_a,i,j,inc) 2029 if(a(i,j).ne. buf)then 2030 write(6,*)me,'READ_inc ', i,',',j,',', a(i,j),' ',buf,me 2031 call ffflush(6) 2032 endif 2033 call ga_get(g_a, i,i,j,j, buf,1) 2034 a(i,j) = a(i,j)+inc 2035 if(a(i,j).ne. buf)then 2036 write(6,*)me,'read_INC ', i,',',j,',', a(i,j),' ',buf,me 2037 call ffflush(6) 2038 endif 2039 enddo 2040 endif 2041 call ga_sync() 2042c 2043 if (me.eq.0) then 2044 write(6,*) 2045 write(6,*) ' read_inc is OK' 2046 write(6,*) 2047 endif 2048c 2049c 2050c 2051#if 000 2052#ifndef MIRROR 2053 if (me.eq.0) then 2054 write(6,*) 2055 write(6,*) '> checking ga_fence and ga_lock' 2056 write(6,*) 2057 call ffflush(6) 2058 endif 2059c 2060 call ga_zero(g_a) 2061c 2062c*** use ga_read_inc and elements g_a(1:2,1) to implement a lock 2063c*** compute g_a(:,n) = sum (1(:)) for P processors 2064c 2065 status = ga_create_mutexes(1) 2066 if (.not. status) then 2067 call ga_error('ga_create_mutexes failed ',0) 2068 endif 2069 2070 if(n.lt.2)call ga_error('insufficient n to test ga_fence',n) 2071 2072 call ga_lock(0) 2073c call my_lock(g_a) 2074 2075c get original values g_a(:,n) 2076 call ga_get(g_a, 1,n, n,n, b,n) 2077c add my contribution 2078 do i =1,n 2079 b(i,1)= b(i,1)+1 2080 enddo 2081c 2082c need to use fence to assure that coms complete before leaving 2083c Critical Section 2084c 2085 call ga_init_fence() 2086 call ga_put(g_a, 1,n, n,n, b,n) 2087 call ga_fence() 2088 call ga_unlock(0) 2089c call my_unlock(g_a) 2090c 2091333 if(.not.ga_destroy_mutexes()) 2092 $ call ga_error('mutex not destroyed',0) 2093 2094 call ga_sync() 2095 if (me.eq.0) then 2096 call ga_get(g_a, 1,n, n,n, b,n) 2097 do i =1,n 2098 if(b(i,1).ne. nproc)then 2099 print *, 'mismatch',b(i,1),nproc 2100 call ga_error('fence failed',i) 2101 endif 2102 enddo 2103 write(6,*) 2104 write(6,*) ' ga_fence and ga_lock are OK' 2105 write(6,*) 2106 endif 2107#endif 2108#endif 2109c 2110c 2111 if (me.eq.0) then 2112 write(6,*) 2113 write(6,*) '> checking ga_locate_region' 2114 write(6,*) 2115 call ffflush(6) 2116 endif 2117 2118 status = ga_locate_region(g_a, 1, n, 1,n, map,np) 2119 found = 0 2120 do j=1,n 2121 do i=1,n 2122 b(i,j)=-1 2123 enddo 2124 enddo 2125#ifndef MIRROR 2126 if(me.eq.0)call ga_put(g_a,1,n,1,n,b,n) 2127#else 2128 if(iproc.eq.0)call ga_put(g_a,1,n,1,n,b,n) 2129#endif 2130 call ga_sync() 2131 do k = 1, np 2132 if(map(5,k).eq.me)then 2133 if(found.eq.1) then 2134 write(6,*)'double entry in map for proc ',me 2135 call ffflush(6) 2136 endif 2137 do j= map(3,k), map(4,k) 2138 do i= map(1,k), map(2,k) 2139#ifndef MIRROR 2140 b(i,j)=1*me 2141#else 2142 b(i,j)=1*iproc 2143#endif 2144 enddo 2145 enddo 2146 call ga_put(g_a, map(1,k),map(2,k),map(3,k),map(4,k), 2147 & b(map(1,k),map(3,k)),n) 2148 found = 1 2149 endif 2150 enddo 2151 call ga_sync() 2152c 2153 do k = 1, np 2154#ifndef MIRROR 2155 if(map(5,k).eq.me)then 2156#else 2157 if(map(5,k).eq.iproc)then 2158#endif 2159 call ga_get(g_a, map(1,k),map(2,k),map(3,k),map(4,k), 2160 & a(map(1,k),map(3,k)),n) 2161 do j= map(3,k), map(4,k) 2162 do i= map(1,k), map(2,k) 2163 if(b(i,j).ne.a(i,j)) then 2164 write(6,*) 2165 & 'proc ',me, 'overlap with ',a(i,j) 2166 call ffflush(6) 2167 endif 2168 enddo 2169 enddo 2170 endif 2171 enddo 2172 call ga_sync() 2173c 2174#ifndef MIRROR 2175 if(me.eq.0)then 2176#else 2177 if(iproc.eq.0)then 2178#endif 2179 2180 call ga_get(g_a,1,n,1,n,a,n) 2181 do j=1,n 2182 do i=1,n 2183 if(a(i,j).eq.-1)then 2184 write(6,*)'i=',i,' j=',j, ' not assigned ' 2185 call ga_error('... exiting ',0) 2186 endif 2187 enddo 2188 enddo 2189 endif 2190 2191 if (me.eq.0) then 2192 write(6,*) 2193 write(6,*) ' ga_locate_region is OK' 2194 write(6,*) 2195 call ffflush(6) 2196 endif 2197c 2198c Delete the global array 2199c 2200 status = ga_destroy(g_a) 2201c 2202 return 2203 end 2204 2205c--------------------------------------------------------------------- 2206 2207 subroutine check_flt() 2208 implicit none 2209#include "mafdecls.fh" 2210#include "global.fh" 2211#include "testutil.fh" 2212 integer n, m 2213 parameter (n =10) 2214 parameter (m=2*n) 2215 real a(n,n), b(n,n), v(m), w(m) 2216 integer iv(m), jv(m) 2217 logical status 2218 integer g_a, g_b 2219 integer iran, i, j, loop, nloop, maxloop, ilo, ihi, jlo, jhi, itmp 2220 integer nproc, me, int, ij, inc, ii, jj, nnodes 2221 double precision nwords 2222 parameter (maxloop = 100) 2223 integer maxproc 2224 integer ndim, dims(2), chunk(2), p_mirror 2225 parameter (maxproc = 128) 2226 double precision crap 2227 real x, sum1, sum2 2228 logical found 2229 integer lprocs, inode, iproc 2230#ifdef USE_RESTRICTED 2231 integer num_rstrctd 2232 integer rstrctd_list(maxproc/2) 2233#endif 2234 intrinsic int 2235 iran(i) = int(drand(0)*real(i)) + 1 2236 2237 nproc = ga_nnodes() 2238 me = ga_nodeid() 2239 inode = ga_cluster_nodeid() 2240 lprocs = ga_cluster_nprocs(inode) 2241 nnodes = ga_cluster_nnodes() 2242 iproc = mod(me,lprocs) 2243 nloop = Min(maxloop,n) 2244#ifdef USE_RESTRICTED 2245 num_rstrctd = nproc/2 2246 if (num_rstrctd.eq.0) num_rstrctd = 1 2247 do i = 1, num_rstrctd 2248 rstrctd_list(i) = (num_rstrctd/2) + i-1 2249 end do 2250#endif 2251c 2252c a() is a local copy of what the global array should start as 2253c 2254 do j = 1, n 2255 do i = 1, n 2256#ifndef MIRROR 2257 a(i,j) = i-1 + (j-1)*n 2258#else 2259 a(i,j) = inode + i-1 + (j-1)*n 2260#endif 2261 b(i,j) = -1. 2262 enddo 2263 enddo 2264 2265c 2266c Create a global array 2267c 2268 ndim = 2 2269 dims(1) = n 2270 dims(2) = n 2271 chunk(1) = 0 2272 chunk(2) = 0 2273#ifdef NEW_API 2274 g_a = ga_create_handle() 2275 call ga_set_data(g_a,ndim,dims,MT_REAL) 2276 call ga_set_array_name(g_a,'a') 2277#ifdef USE_RESTRICTED 2278 call ga_set_restricted(g_a, rstrctd_list, num_rstrctd) 2279#endif 2280# ifdef MIRROR 2281 write(6,*) 'Getting p_mirror' 2282 p_mirror = ga_pgroup_get_mirror() 2283 write(6,*) 'Setting proc config' 2284 call ga_set_pgroup(g_a,p_mirror) 2285# endif 2286 if (.not.ga_allocate(g_a)) then 2287#else 2288# ifndef MIRROR 2289 if (.not. ga_create(MT_REAL, n, n, 'a', 0, 0, g_a)) then 2290# else 2291cBJP 2292 write(6,*) 'Creating g_a' 2293 p_mirror = ga_pgroup_get_mirror() 2294 if (.not. nga_create_config(MT_REAL, ndim, dims, 'a', chunk, 2295 + p_mirror, g_a)) then 2296cBJP 2297 write(6,*) 'Created g_a' 2298# endif 2299#endif 2300 write(6,*) ' ga_create failed' 2301 call ffflush(6) 2302 call ga_error('... exiting ',0) 2303 endif 2304c 2305c check if handle is valid. be quiet unless error 2306C 2307 if(.not.ga_valid_handle(g_a)) call ga_error("invalid handle",g_a) 2308c 2309 call ga_distribution(g_a,me,ilo, ihi, jlo, jhi) 2310 call ga_sync() 2311c 2312c Zero the array 2313c 2314 if (me .eq. 0) then 2315 write(6,21) 2316 21 format(/'> Checking zero ... ') 2317 call ffflush(6) 2318 endif 2319 call ga_zero(g_a) 2320c 2321c Check that it is indeed zero 2322c 2323 call ga_get(g_a, 1, n, 1, n, b, n) 2324 call ga_sync() 2325 do i = 1, n 2326 do j = 1, n 2327 if (b(i,j) .ne. 0.0) then 2328 write(6,*) ' zero ', me, i, j, b(i,j) 2329 call ffflush(6) 2330 call ga_error('... exiting ',0) 2331 endif 2332 enddo 2333 enddo 2334 if (me.eq.0) then 2335 write(6,*) 2336 write(6,*) ' ga_zero is OK' 2337 write(6,*) 2338 endif 2339 call ga_sync() 2340c 2341c Each node fills in disjoint sections of the array 2342c 2343 if (me .eq. 0) then 2344 write(6,2) 2345 2 format(/'> Checking disjoint put ... ') 2346 call ffflush(6) 2347 endif 2348 call ga_sync() 2349c 2350 inc = (n-1)/20 + 1 2351 ij = 0 2352 do j = 1, n, inc 2353 do i = 1, n, inc 2354#ifndef MIRROR 2355 if (mod(ij,nproc) .eq. me) then 2356#else 2357 if (mod(ij,lprocs) .eq. iproc) then 2358#endif 2359 ilo = i 2360 ihi = min(i+inc, n) 2361 jlo = j 2362 jhi = min(j+inc, n) 2363c write(6,4) me, ilo, ihi, jlo, jhi 2364c 4 format(' node ',i2,' checking put ',4i4) 2365c call ffflush(6) 2366 call ga_put(g_a, ilo, ihi, jlo, jhi, a(ilo, jlo), n) 2367 endif 2368 ij = ij + 1 2369 enddo 2370 enddo 2371 call ga_sync() 2372c 2373c All nodes check all of a 2374c 2375 call ga_get(g_a, 1, n, 1, n, b, n) 2376 do i = 1, n 2377 do j = 1, n 2378 if (b(i,j) .ne. a(i,j)) then 2379 write(6,*) ' put ', me, i, j, a(i,j),b(i,j) 2380 call ffflush(6) 2381 call ga_error('... exiting ',0) 2382 endif 2383 enddo 2384 enddo 2385 call ga_sync() 2386c 2387 if (me.eq.0) then 2388 write(6,*) 2389 write(6,*) ' ga_put is OK' 2390 write(6,*) 2391 endif 2392c 2393c Now check nloop random gets from each node 2394c 2395 if (me .eq. 0) then 2396 write(6,5) nloop 2397 5 format(/'> Checking random get (',i5,' calls)...') 2398 call ffflush(6) 2399 endif 2400 call ga_sync() 2401c 2402 nwords = 0 2403c 2404 crap = drand(ga_nodeid()*51 +1) !Different seed for each proc 2405 do loop = 1, nloop 2406 ilo = iran(loop) 2407 ihi = iran(loop) 2408 if (ihi.lt. ilo) then 2409 itmp = ihi 2410 ihi = ilo 2411 ilo = itmp 2412 endif 2413 jlo = iran(loop) 2414 jhi = iran(loop) 2415 if (jhi.lt. jlo) then 2416 itmp = jhi 2417 jhi = jlo 2418 jlo = itmp 2419 endif 2420c 2421 nwords = nwords + (ihi-ilo+1)*(jhi-jlo+1) 2422c 2423 call util_rfill(n*n, 0.0, b, 1) 2424 call ga_get(g_a, ilo, ihi, jlo, jhi, b(ilo, jlo), n) 2425c 2426 if (me .eq. 0 .and. mod(loop-1, max(1,nloop/20)).eq.0) then 2427 write(6,1) loop, me, ilo, ihi, jlo, jhi, nwords 2428 1 format(' call ',i5, ' node ',i2,' checking get ',4i4, 2429 $ ' total ',d9.2) 2430 call ffflush(6) 2431 endif 2432c 2433 sum1 = 0.0d0 2434 do j = jlo, jhi 2435 do i = ilo, ihi 2436 sum1 = sum1 + b(i,j) 2437 if (b(i,j) .ne. a(i,j)) then 2438 write(6,*) 'error ', i, j, b(i,j), a(i,j) 2439 call ffflush(6) 2440 call ga_error('... exiting ',0) 2441 endif 2442 enddo 2443 enddo 2444 enddo 2445 if (me.eq.0) then 2446 write(6,*) 2447 write(6,*) ' ga_get is OK' 2448 write(6,*) 2449 endif 2450 call ga_sync() 2451c 2452c Each node accumulates into disjoint sections of the array 2453c 2454 if (me .eq. 0) then 2455 write(6,9) 2456 9 format(/'> Checking accumulate ... ') 2457 call ffflush(6) 2458 endif 2459 call ga_sync() 2460 2461c 2462 crap = drand(12345) ! Same seed for each process 2463 do j = 1, n 2464 do i = 1, n 2465c b(i,j) = real(drand(0)) 2466 b(i,j) = i+j 2467 enddo 2468 enddo 2469c 2470 inc = (n-1)/20 + 1 2471 ij = 0 2472 do j = 1, n, inc 2473 do i = 1, n, inc 2474c x = real(drand(0)) 2475 x = 10. 2476 ilo = i 2477 ihi = min(i+inc-1, n) 2478 if(ihi.eq.n-1)ihi=n 2479c ihi = min(i+inc, n) 2480 jlo = j 2481 jhi = min(j+inc-1, n) 2482 if(jhi.eq.n-1)jhi=n 2483c jhi = min(j+inc-1, n) 2484* call ffflush(6) 2485#ifndef MIRROR 2486 if (mod(ij,nproc) .eq. me) then 2487#else 2488 if (mod(ij,lprocs) .eq. iproc) then 2489#endif 2490c print *, me, 'checking accumulate ',ilo,ihi,jlo,jhi,x 2491* 11 format(' node ',i2,' checking accumulate ',4i4) 2492* call ffflush(6) 2493 call ga_acc(g_a, ilo, ihi, jlo, jhi, b(ilo, jlo), n, x) 2494 endif 2495 ij = ij + 1 2496c 2497c Each process applies all updates to its local copy 2498c 2499 do jj = jlo, jhi 2500 do ii = ilo, ihi 2501 a(ii,jj) = a(ii,jj) + x * b(ii,jj) 2502 enddo 2503 enddo 2504 enddo 2505 enddo 2506 call ga_sync() 2507c 2508c All nodes check all of a 2509 call ga_get(g_a, 1, n, 1, n, b, n) 2510c 2511 do j = 1, n 2512 do i = 1, n 2513 if(MISMATCHF(b(i,j),a(i,j)))then 2514 write(6,*) ' acc ', me, i, j, a(i,j), b(i,j) 2515 call ffflush(6) 2516 call ga_error('... exiting ',0) 2517 endif 2518 enddo 2519 enddo 2520 if (me.eq.0) then 2521 write(6,*) 2522 write(6,*) ' disjoint ga_acc is OK' 2523 write(6,*) 2524 endif 2525c 2526c overlapping accumulate 2527 call ga_sync() 2528#ifndef MIRROR 2529 if (.not. ga_create(MT_REAL, n, n, 'b', 0, 0, g_b)) then 2530#else 2531 if (.not. nga_create_config(MT_REAL, ndim, dims, 'b', chunk, 2532 + p_mirror, g_b)) then 2533#endif 2534 call ga_error('ga_create failed for second array ',0) 2535 endif 2536c 2537 call ga_zero(g_b) 2538 call ga_acc(g_b, n/2, n/2, n/2, n/2, 1.0, 1, 1.0) 2539 call ga_sync() 2540 if (me.eq.0) then 2541 call ga_get(g_b, n/2, n/2, n/2, n/2, b(1,1), 1) 2542#ifndef MIRROR 2543 x = abs(b(1,1) -1*nproc) 2544#else 2545 x = abs(b(1,1) -1*lprocs) 2546#endif 2547 if(x.gt. 1e-10)then 2548#ifndef MIRROR 2549 write(6,*)'val=',b(1,1),' expected=',nproc, x 2550#else 2551 write(6,*)'val=',b(1,1),' expected=',lprocs, x 2552#endif 2553 call ga_error('overlapping accumulate failed',0) 2554 endif 2555 write(6,*) 2556 write(6,*) ' overlapping ga_acc is OK' 2557 write(6,*) 2558 endif 2559c 2560c Check the ga_add function 2561c 2562 if (me .eq. 0) then 2563 write(6,91) 2564 91 format(/'> Checking add ...') 2565 call ffflush(6) 2566 endif 2567c 2568 crap = drand(12345) ! Everyone has same seed 2569 do j = 1, n 2570 do i = 1, n 2571 b(i,j) = real(drand(0)*real(i)) + 1 2572 a(i,j) = 0.1*a(i,j) + 0.9*b(i,j) 2573 enddo 2574 enddo 2575#ifndef MIRROR 2576 if (me.eq.0) call ga_put(g_b, 1, n, 1, n, b, n) 2577#else 2578 if (iproc.eq.0) call ga_put(g_b, 1, n, 1, n, b, n) 2579#endif 2580 call ga_add(0.1, g_a, 0.9, g_b, g_b) 2581 call ga_get(g_b, 1, n, 1, n, b, n) 2582 do j = 1, n 2583 do i = 1, n 2584 if(MISMATCHF(b(i,j), a(i,j)))then 2585 write(6,*) ' add ', me, i, j, a(i,j), b(i,j) 2586 call ffflush(6) 2587CCC call ga_error('... exiting ',0) 2588 endif 2589 enddo 2590 enddo 2591 if (me.eq.0) then 2592 write(6,*) 2593 write(6,*) ' add is OK ' 2594 write(6,*) 2595 endif 2596 call ga_sync() 2597c 2598c Check the sdot function 2599c 2600 if (me .eq. 0) then 2601 write(6,19) 2602 19 format(/'> Checking sdot ...') 2603 call ffflush(6) 2604 endif 2605 crap = drand(12345) ! Everyone has same seed 2606 sum1 = 0.0 2607 do j = 1, n 2608 do i = 1, n 2609 b(i,j) = drand(0) 2610 sum1 = sum1 + a(i,j)*b(i,j) 2611 enddo 2612 enddo 2613#ifndef MIRROR 2614 if (me.eq.0) then 2615#else 2616 if (iproc.eq.0) then 2617#endif 2618 call ga_put(g_b, 1, n, 1, n, b, n) 2619 call ga_put(g_a, 1, n, 1, n, a, n) 2620 endif 2621 call ga_sync() 2622 sum2 = ga_sdot(g_a,g_b) 2623 if(MISMATCHF(sum1, sum2))then 2624 write(6,*) ' fdot wrong ', sum1, sum2 2625 call ffflush(6) 2626 call ga_error('... exiting ',0) 2627 else if (me.eq.0) then 2628 write(6,*) 2629 write(6,*) ' sdot is OK ' 2630 write(6,*) 2631 endif 2632c 2633c Check the ga_scale function 2634c 2635 if (me .eq. 0) then 2636 write(6,92) 2637 92 format(/'> Checking scale ...') 2638 call ffflush(6) 2639 endif 2640 call ga_scale(g_a, 0.123) 2641 call ga_get(g_a, 1, n, 1, n, b, n) 2642 do j = 1, n 2643 do i = 1, n 2644 a(i,j) = a(i,j)*0.123 2645 if (MISMATCHF(b(i,j), a(i,j)))then 2646 write(6,*) ' dscal ', me, i, j, a(i,j), b(i,j) 2647 call ffflush(6) 2648CCC call ga_error('... exiting ',0) 2649 endif 2650 enddo 2651 enddo 2652 if (me.eq.0) then 2653 write(6,*) 2654 write(6,*) ' scale is OK ' 2655 write(6,*) 2656 endif 2657c 2658c Check the ga_copy function 2659c 2660 if (me .eq. 0) then 2661 write(6,*) 2662 write(6,*)'> Checking copy' 2663 write(6,*) 2664 call ffflush(6) 2665 endif 2666 if(me.eq.0) call ga_put(g_a, 1, n, 1, n, a, n) 2667 call ga_copy(g_a, g_b) 2668 call ga_get(g_b, 1, n, 1, n, b, n) 2669 do j = 1, n 2670 do i = 1, n 2671 if (b(i,j) .ne. a(i,j)) then 2672 write(6,*) ' copy ', me, i, j, a(i,j), b(i,j) 2673 call ffflush(6) 2674 call ga_error('... exiting ',0) 2675 endif 2676 enddo 2677 enddo 2678 if (me.eq.0) then 2679 write(6,*) 2680 write(6,*) ' copy is OK ' 2681 write(6,*) 2682 endif 2683c 2684 call ga_sync() 2685 if (me .eq. 0) then 2686 write(6,*) '> Checking scatter/gather (might be slow)... ' 2687 call ffflush(6) 2688 endif 2689 call ga_sync() 2690c 2691 crap = drand(ga_nodeid()*51 +1) !Different seed for each proc 2692 do j = 1, 10 2693 call ga_sync() 2694#ifndef MIRROR 2695 itmp = iran(nproc)-1 2696 if(me.eq.itmp) then 2697#else 2698 itmp = iran(lprocs)-1 2699 if(iproc.eq.itmp) then 2700#endif 2701 do loop = 1,m 2702 ilo = iran(n) 2703 jlo = iran(n) 2704 iv(loop) = ilo 2705 jv(loop) = jlo 2706 enddo 2707 call ga_gather(g_a, v, iv, jv, m) 2708 do loop = 1,m 2709 ilo= iv(loop) 2710 jlo= jv(loop) 2711 call ga_get(g_a,ilo,ilo,jlo,jlo,v(loop),1) 2712 if(v(loop) .ne. a(ilo,jlo))then 2713 write(6,*)me,' gather ', ilo,',',jlo,',', a(ilo,jlo) 2714 & ,' ',v(loop) 2715 call ffflush(6) 2716 call ga_error('... exiting ',0) 2717 endif 2718 enddo 2719 endif 2720 enddo 2721c 2722 if (me.eq.0) then 2723 write(6,*) 2724 write(6,*) ' gather is OK' 2725 write(6,*) 2726 call ffflush(6) 2727 endif 2728c 2729 2730 do j = 1,10 2731 call ga_sync() 2732#ifndef MIRROR 2733 if(me.eq.iran(ga_nnodes())-1) then 2734#else 2735 if(iproc.eq.iran(lprocs)-1) then 2736#endif 2737 do loop = 1,m 2738 ilo = iran(n) 2739 jlo = iran(n) 2740 iv(loop) = ilo 2741 jv(loop) = jlo 2742c v(loop) = DSIN(a(ilo,jlo)+b(ilo,jlo)) 2743 v(loop) = 1.0 *(ilo+jlo) 2744 enddo 2745 call ga_scatter(g_a, v, iv, jv, m) 2746 do loop = 1,m 2747 ilo= iv(loop) 2748 jlo= jv(loop) 2749 call ga_get(g_a,ilo,ilo,jlo,jlo,w(loop),1) 2750c if(v(loop) .ne. w(loop))then 2751 if(w(loop) .ne. 1.0 *(ilo+jlo) )then 2752 write(6,*)me,' scatter ', ilo,',',jlo,',',w(loop) 2753 & ,' ', 1.0 *(ilo+jlo) 2754 call ffflush(6) 2755 call ga_error('... exiting ',0) 2756 endif 2757 enddo 2758 endif 2759 call ga_sync() 2760 enddo 2761c 2762 if (me.eq.0) then 2763 write(6,*) 2764 write(6,*) ' scatter is OK' 2765 write(6,*) 2766 endif 2767c 2768 call ga_sync() 2769c 2770c scatter-acc available in GA ver. 3.0 2771#ifdef GA3 2772 if (me.eq.0) then 2773 write(6,*) 2774 write(6,*) ' checking scatter-accumulate' 2775 write(6,*) 2776 endif 2777c 2778 crap = drand(1234) 2779 call ga_zero(g_a) 2780c 2781 do j = 1, n 2782 do i = 1, n 2783 b(i,j) =0. 2784 enddo 2785 enddo 2786c 2787 x = .1d0 2788 ii =n 2789 do jj = 1,1 2790 call ga_sync() 2791 do loop = 1, ii 2792 2793c generate unique i,j pairs 279410 continue 2795 i = iran(n) 2796 j=iran(n) 2797 if (found(i,j, iv, jv, loop-1) ) goto 10 2798 2799 iv(loop) = i 2800 jv(loop) = j 2801 v(loop) = 1.0 *(i+j) 2802#ifndef MIRROR 2803 b(i,j) = b(i,j) + nproc*x*v(loop) ! update local ref. array 2804#else 2805 b(i,j) = b(i,j) + lprocs*x*v(loop) ! update local ref. array 2806#endif 2807 enddo 2808 2809 call ga_scatter_acc(g_a,v,iv,jv, ii,x) 2810 2811c 2812 call ga_sync() 2813c 2814c check the result 2815c 2816 call ga_get(g_a, 1, n, 1,n, a, n) 2817 2818 do loop = 1,ii 2819 i = iv(loop) 2820 j = jv(loop) 2821 if(MISMATCHF(a(i,j),b(i,j)))then 2822 print *,'Error',i,j,loop,a(i,j),'expected=',b(i,j) 2823* if(me.eq.0)then 2824* do ii=1,loop 2825* print *,'element',ii, iv(ii),jv(ii) 2826* enddo 2827* endif 2828 call ga_error('scatter-acc error in trial ',jj) 2829 endif 2830 enddo 2831 call ga_sync() 2832 2833 enddo 2834 2835 call ga_sync() 2836 if (me.eq.0) then 2837 write(6,*) 2838 write(6,*) ' scatter-accumulate is OK' 2839 write(6,*) 2840 endif 2841#endif 2842c 2843c Delete the global array 2844c 2845 status = ga_destroy(g_a) 2846c 2847 return 2848 end 2849c_____________________________________________________________ 2850 2851 subroutine check_wrappers 2852 implicit none 2853#include "mafdecls.fh" 2854#include "global.fh" 2855#include "testutil.fh" 2856 double precision sum 2857 integer isum, ibuf(2) 2858 integer me, nproc 2859 real fsum 2860c 2861 nproc = ga_nnodes() 2862 me = ga_nodeid() 2863c 2864 call ga_sync() 2865 if (me .eq. 0) then 2866 write(6,*) 2867 write(6,*)'> Checking ga_igop' 2868 write(6,*) 2869 call ffflush(6) 2870 endif 2871 ibuf(1) = 1 2872 ibuf(2) = me 2873 call ga_igop(10000, ibuf, 2, '+') 2874 if(ibuf(1).ne.nproc)then 2875 call ga_error('ga_igop error',isum) 2876 endif 2877 if(ibuf(2).ne.((nproc-1)*nproc/2))then 2878 call ga_error('ga_igop error -2',isum) 2879 endif 2880 call ga_sync() 2881 if (me.eq.0) then 2882 write(6,*) 2883 write(6,*) ' ga_igop is OK' 2884 write(6,*) 2885 endif 2886 call ga_sync() 2887c 2888 call ga_sync() 2889 if (me .eq. 0) then 2890 write(6,*) 2891 write(6,*)'> Checking ga_dgop' 2892 write(6,*) 2893 call ffflush(6) 2894 endif 2895 sum = 1d0 * me 2896 call ga_dgop(10000, sum, 1, '+') 2897 if(Int(sum).ne.((nproc-1)*nproc/2))then 2898 call ga_error('ga_dgop error',Int(sum)) 2899 endif 2900 call ga_sync() 2901 if (me.eq.0) then 2902 write(6,*) 2903 write(6,*) ' ga_dgop is OK' 2904 write(6,*) 2905 endif 2906c 2907 call ga_sync() 2908 if (me .eq. 0) then 2909 write(6,*) 2910 write(6,*)'> Checking ga_sgop' 2911 write(6,*) 2912 call ffflush(6) 2913 endif 2914 fsum = 1.0 * me 2915 call ga_sgop(10000, fsum, 1, '+') 2916 if(Int(sum).ne.((nproc-1)*nproc/2))then 2917 call ga_error('ga_fgop error',Int(sum)) 2918 endif 2919 call ga_sync() 2920 if (me.eq.0) then 2921 write(6,*) 2922 write(6,*) ' ga_sgop is OK' 2923 write(6,*) 2924 endif 2925c 2926 call ga_sync() 2927 if (me .eq. 0) then 2928 write(6,*) 2929 write(6,*)'> Checking ga_brdcst' 2930 write(6,*) 2931 call ffflush(6) 2932 endif 2933 if(me.eq.nproc-1)then 2934 ibuf(1) = me 2935 ibuf(2) = nproc 2936 endif 2937 call ga_brdcst(1000,ibuf,util_mitob(2),nproc-1) 2938 if(ibuf(1).ne.nproc-1)call ga_error('ibuf(1) error',ibuf(1)) 2939 if(ibuf(2).ne.nproc)call ga_error('ibuf(2) error',ibuf(2)) 2940 call ga_sync() 2941 if (me .eq. 0) then 2942 write(6,*) 2943 write(6,*)'> ga_brdcst is OK ' 2944 write(6,*) 2945 call ffflush(6) 2946 endif 2947 call ga_sync() 2948 return 2949 end 2950 2951 2952 subroutine check_mem(mem_size) 2953 implicit none 2954 integer mem_size 2955#include "mafdecls.fh" 2956#include "global.fh" 2957#include "testutil.fh" 2958c 2959 integer n,nmax,left,need, me,procs,g_a, g_b 2960 logical status 2961c 2962 if(.not. ga_memory_limited())return 2963 me = ga_nodeid() 2964 procs = ga_nnodes() 2965 nmax = int(dsqrt(dble(mem_size/util_mitob(1)))) 2966 left = mem_size/procs - ga_inquire_memory() 2967 n = nmax/2 2968 need = util_mdtob(n*n)/procs 2969c 2970 if(me.eq.0)then 2971 write(6,*)' ' 2972 if(ga_uses_ma())then 2973 write(6,*)' CHECKING GA MEMORY RESTRICTIONS (MA used)' 2974 else 2975 write(6,*)' CHECKING GA MEMORY RESTRICTIONS (MA not used)' 2976 endif 2977 write(6,*)' ' 2978 write(6,*)' ' 2979 call print_mem_info(n,left, need, mem_size/procs) 2980 endif 2981c 2982 status = ga_create(MT_DBL, n, n, 'a', 0, 0, g_a) 2983c 2984 if(me.eq.0) then 2985 if(status) then 2986 write(6,*) ' success' 2987 else 2988 write(6,*) ' failure' 2989 endif 2990 call ffflush(6) 2991 endif 2992c 2993 n = nmax 2994 left = mem_size/procs - ga_inquire_memory() 2995 need = util_mdtob(n*n)/procs 2996 if(me.eq.0)then 2997 call print_mem_info(n,left, need, mem_size/procs) 2998 endif 2999c 3000 status = ga_create(MT_DBL, n, n, 'b', 0, 0, g_b) 3001c 3002 if(me.eq.0) then 3003 if(status) then 3004 write(6,*) ' success' 3005 else 3006 write(6,*) ' failure' 3007 endif 3008 write(6,*)' ' 3009 write(6,*)' ' 3010 call ffflush(6) 3011 endif 3012 status = ga_destroy(g_a) 3013 end 3014 3015 3016 3017 subroutine print_mem_info(n,left, need, total) 3018 implicit none 3019 integer n,left, need, total 3020c 3021 write(6,*)' ' 3022 if(left - need .ge. 0) then 3023 write(6,1)n,n 30241 format('> Creating array ',i4,' by ',i4,' -- should succeed') 3025 else 3026 write(6,2)n,n 30272 format('> Creating array ',i4,' by ',i4,' -- SHOULD FAIL') 3028 endif 3029 write(6,3) need, left, total 30303 format(' (need ',i7,' and ',i7,' out of ',i7,' bytes are left)') 3031 write(6,*)' ' 3032 call ffflush(6) 3033c 3034 end 3035 3036 3037 3038 subroutine my_lock(g_b) 3039 implicit none 3040#include "global.fh" 3041 integer g_b, val, flag, i 3042 logical first_time 3043 double precision dummy 3044 common /lock/ val 3045 common /dum/ dummy 3046 data first_time /.true./ 3047c 3048c this awkward initialization is to avoid a weird problem 3049C with block data on SUN 3050 if(first_time)then 3051 first_time = .false. 3052 dummy = .0 3053 endif 3054c 3055 val = ga_read_inc(g_b,1,1, 1) 305610 call ga_get(g_b, 2,2,1,1, flag, 1) 3057 if(flag.eq.val) return 3058c 3059c to reduce memory stress, wait a while before retrying 3060 do i = 1, 100 3061 dummy = dummy + .1 3062 enddo 3063 goto 10 3064 end 3065 3066 3067 subroutine my_unlock(g_b) 3068 implicit none 3069#include "global.fh" 3070 integer g_b, val 3071 common /lock/ val 3072c 3073 call ga_put(g_b, 2,2,1,1, val+1, 1) 3074 end 3075 3076 3077 logical function found(i,j, iv, jv, n) 3078 integer n 3079 integer i,j, iv(n), jv(n) 3080 integer loop 3081 3082 found = .false. 3083 do loop = 1, n 3084 if(i .eq. iv(loop) .and. j .eq.jv(loop))then 3085 found = .true. 3086 goto 99 3087 endif 3088 enddo 308999 continue 3090 return 3091 end 3092 3093 3094 subroutine proc_remap() 3095 implicit none 3096#include "global.fh" 3097 integer proc(100),nproc,i 3098 nproc = ga_nnodes() 3099 if(nproc.gt.100) 3100 $ call ga_error("remap requires<=100 processes",nproc) 3101 do i = 1, nproc 3102 proc(i) = nproc-i 3103 enddo 3104c call ga_register_proclist(proc,nproc) 3105 end 3106 3107 3108 subroutine util_rfill(n,val,a,ia) 3109 implicit none 3110 real a(*), val 3111 integer n, ia, i 3112c 3113c initialise real array to scalar value 3114c 3115 if (ia.eq.1) then 3116 do 10 i = 1, n 3117 a(i) = val 3118 10 continue 3119 else 3120 do 20 i = 1,(n-1)*ia+1,ia 3121 a(i) = val 3122 20 continue 3123 endif 3124c 3125 end 3126 3127 3128 subroutine util_dfill(n,val,a,ia) 3129 implicit none 3130 double precision a(*), val 3131 integer n, ia, i 3132c 3133c initialise double precision array to scalar value 3134c 3135 if (ia.eq.1) then 3136 do 10 i = 1, n 3137 a(i) = val 3138 10 continue 3139 else 3140 do 20 i = 1,(n-1)*ia+1,ia 3141 a(i) = val 3142 20 continue 3143 endif 3144c 3145 end 3146 3147 subroutine util_ifill(n,val,a,ia) 3148 implicit none 3149 integer n, ia, i, a(*),val 3150c 3151c initialise integer array to scalar value 3152c 3153 if (ia.eq.1) then 3154 do 10 i = 1, n 3155 a(i) = val 3156 10 continue 3157 else 3158 do 20 i = 1,(n-1)*ia+1,ia 3159 a(i) = val 3160 20 continue 3161 endif 3162c 3163 end 3164 3165 subroutine util_qfill(n,val,a,ia) 3166 implicit none 3167 double complex a(*), val 3168 integer n, ia, i 3169c 3170c initialise double complex array to scalar value 3171c 3172 if (ia.eq.1) then 3173 do 10 i = 1, n 3174 a(i) = val 3175 10 continue 3176 else 3177 do 20 i = 1,(n-1)*ia+1,ia 3178 a(i) = val 3179 20 continue 3180 endif 3181c 3182 end 3183 3184 3185