1c $Id$
2c vector boxes lack arithmetic precision
3#ifdef CRAY_YMP
4# define THRESH 1d-10
5# define THRESHF 1e-5
6#elif defined(FUJITSU)
7# define THRESH 1d-12
8# define THRESHF 1e-5
9#else
10# define THRESH 1d-13
11# define THRESHF 1e-5
12#endif
13
14#define MISMATCH(x,y) abs(x-y)/max(1d0,abs(x)).gt.THRESH
15#define MISMATCHF(x,y) abs(x-y)/max(1.0,abs(x)).gt.THRESHF
16
17
18      subroutine util_ga_test
19      implicit none
20#include "mafdecls.fh"
21#include "global.fh"
22#include "testutil.fh"
23      integer heap, stack, fudge, ma_heap, me, nproc
24      logical status
25      parameter (heap=100*100*4, fudge=100, stack=100*100)
26c
27c***  Intitialize a message passing library
28c
29#ifdef MPI
30c      integer ierr
31c      call mpi_init(ierr)
32#else
33c      call pbeginf
34#endif
35c
36c***  Initialize GA
37c
38c     There are 2 choices: ga_initialize or ga_initialize_ltd.
39c     In the first case, there is no explicit limit on memory usage.
40c     In the second, user can set limit (per processor) in bytes.
41c
42c     call ga_initialize()
43      nproc = ga_nnodes()
44      me = ga_nodeid()
45c     we can also use GA_set_memory_limit BEFORE first ga_create call
46c
47      ma_heap = heap/nproc + fudge
48c     call GA_set_memory_limit(util_mdtob(ma_heap))
49c
50c     if(ga_nodeid().eq.0)then
51c        print *,' GA initialized '
52c        call ffflush(6)
53c     endif
54c
55c***  Initialize the MA package
56c     MA must be initialized before any global array is allocated
57c
58c     status = ma_init(MT_DCPL, stack, ma_heap)
59c     if (.not. status) call ga_error('ma_init failed',-1)
60c
61c     Uncomment the below line to register external memory allocator
62c     for dynamic arrays inside GA routines.
63c      call register_ext_memory()
64c
65c     if(me.eq.(nproc-1))then
66c       print *, 'using ', nproc,' process(es) ', ga_cluster_nnodes(),
67c    $           ' cluster nodes'
68c       print *,'process ', me, ' is on node ',ga_cluster_nodeid(),
69c    $          ' with ',  ga_cluster_nprocs(-1), ' processes'
70c       call ffflush(6)
71c     endif
72      if (me.eq.0) then
73        write(6,'(A,I3)') ' Number of processes ..............',nproc
74        write(6,'(A,I3)') ' Number of cluster nodes ..........',
75     1                    ga_cluster_nnodes()
76        call ffflush(6)
77      endif
78      call ga_sync()
79c
80c***  Check support for double precision arrays
81c
82      if (me.eq.0) then
83         write(6,*)
84         write(6,'(A)') ' Checking doubles  '
85         write(6,*)
86         call ffflush(6)
87      endif
88
89      call check_dbl()
90c
91c***  Check support for double precision complex arrays
92c
93      if (me.eq.0) then
94         write(6,*)
95         write(6,'(A)') ' Checking double complexes'
96         write(6,*)
97         call ffflush(6)
98      endif
99
100      call check_complex()
101c
102c***  Check support for integer arrays
103c
104      if (me.eq.0) then
105         write(6,*)
106         write(6,'(A)') ' Checking integers  '
107         write(6,*)
108         call ffflush(6)
109      endif
110
111      call check_int()
112c
113c
114c***  Check support for single precision
115c
116      if (me.eq.0) then
117         write(6,*)
118         write(6,'(A)') ' Checking single precisions '
119         write(6,*)
120         call ffflush(6)
121      endif
122
123      call check_flt()
124c
125      if (me.eq.0) then
126         write(6,*)
127         write(6,'(A)')' Checking wrappers to MP collective operations'
128         write(6,*)
129         call ffflush(6)
130      endif
131
132      call check_wrappers
133c
134c***  Check if memory limits are enforced
135c
136c     if(ga_memory_limited())
137c    1  call check_mem
138c
139c     if(me.eq.0) call ga_print_stats()
140c     if(me.eq.0) print *,' '
141c     if(me.eq.0) print *,'All tests succesful '
142c
143c***  Tidy up the GA package
144c
145c     call ga_terminate()
146c
147c***  Tidy up after message-passing library
148c
149#ifdef MPI
150c     call mpi_finalize(ierr)
151#else
152c     call pend()
153#endif
154c
155      end
156
157
158      subroutine check_dbl()
159      implicit none
160#include "mafdecls.fh"
161#include "global.fh"
162#include "testutil.fh"
163c
164      integer n,m
165      parameter (n = 128)
166      parameter (m = 2*n)
167      double precision a(n,n), b(n,n), v(m),w(m)
168      integer iv(m), jv(m)
169      logical status
170      integer g_a, g_b
171      integer i,j, loop,nloop,maxloop, ilo, ihi, jlo, jhi, itmp
172      integer nproc, me, ij, inc, ii, jj
173      parameter (maxloop = 100)
174      integer maxproc
175      parameter (maxproc = 128)
176      double precision crap, sum1, sum2, x
177      double precision nwords
178      integer iran
179      external iran
180c
181      nproc = ga_nnodes()
182      me    = ga_nodeid()
183      nloop = Min(maxloop,n)
184c
185c     a() is a local copy of what the global array should start as
186c
187      do j = 1, n
188         do i = 1, n
189            a(i,j) = i-1 + (j-1)*n
190            b(i,j) =-1.
191         enddo
192      enddo
193*      write(6,*) ' correct '
194*      call output(a, 1, n, 1, n, n, n, 1)
195*      call ffflush(6)
196c
197c     Create a global array
198c
199*     print *,ga_nodeid(), ' creating array'
200*     call ffflush(6)
201c     call setdbg(1)
202      status = ga_create(MT_DBL, n, n, 'a', 0, 0, g_a)
203      if (me.eq.0) then
204        if (status) then
205        write(6,'(A)') ' ga_create ........................ OK'
206        else
207        write(6,'(A)') ' ga_create ........................ Failed'
208        stop
209        endif
210        call ffflush(6)
211      endif
212c
213c     check if handle is valid. be quiet unless error
214C
215      if(.not.ga_valid_handle(g_a)) call ga_error("invalid handle",g_a)
216c
217      call ga_distribution(g_a,me,ilo, ihi, jlo, jhi)
218      call ga_sync()
219c
220c     Zero the array
221c
222      call ga_zero(g_a)
223c
224c     Check that it is indeed zero
225c
226      status = .true.
227      call ga_get(g_a, 1, n, 1, n, b, n)
228      call ga_sync()
229      do i = 1, n
230         do j = 1, n
231            if (b(i,j) .ne. 0.0d0) then
232               status = .false.
233            endif
234         enddo
235      enddo
236      if (me.eq.0) then
237        if (status) then
238        write(6,'(A)') ' ga_zero .......................... OK'
239        else
240        write(6,'(A)') ' ga_zero .......................... Failed'
241        endif
242        call ffflush(6)
243      endif
244      call ga_sync()
245c
246c     Each node fills in disjoint sections of the array
247c
248      call ga_sync()
249c
250      status = .true.
251      inc = (n-1)/20 + 1
252      ij = 0
253      do j = 1, n, inc
254         do i = 1, n, inc
255            if (mod(ij,nproc) .eq. me) then
256               ilo = i
257               ihi = min(i+inc, n)
258               jlo = j
259               jhi = min(j+inc, n)
260*               write(6,4) me, ilo, ihi, jlo, jhi
261* 4             format(' node ',i2,' checking put ',4i4)
262*               call ffflush(6)
263               call ga_put(g_a, ilo, ihi, jlo, jhi, a(ilo, jlo), n)
264            endif
265            ij = ij + 1
266         enddo
267      enddo
268      call ga_sync()
269c
270c     All nodes check all of a
271c
272      call util_dfill(n*n, 0.0d0, b, 1)
273*     call ga_print(g_a,1)
274      call ga_get(g_a, 1, n, 1, n, b, n)
275*      write(6,*) ' after get'
276*      call output(b, 1, n, 1, n, n, n, 1)
277c
278      do i = 1, n
279         do j = 1, n
280            if (b(i,j) .ne. a(i,j)) then
281               status = .false.
282            endif
283         enddo
284      enddo
285      if (me.eq.0) then
286        if (status) then
287        write(6,'(A)') ' ga_put ........................... OK'
288        else
289        write(6,'(A)') ' ga_put ........................... Failed'
290        endif
291        call ffflush(6)
292      endif
293      call ga_sync()
294c
295c     Now check nloop random gets from each node
296c
297      call ga_sync()
298c
299      nwords = 0
300c
301      status = .true.
302      crap = util_drand(ga_nodeid()*51 + 1) ! Different seed for each process
303      do loop = 1, nloop
304         ilo = iran(loop)
305         ihi = iran(loop)
306         if (ihi.lt. ilo) then
307            itmp = ihi
308            ihi = ilo
309            ilo = itmp
310         endif
311         jlo = iran(loop)
312         jhi = iran(loop)
313         if (jhi.lt. jlo) then
314            itmp = jhi
315            jhi = jlo
316            jlo = itmp
317         endif
318c
319         nwords = nwords + (ihi-ilo+1)*(jhi-jlo+1)
320c
321         call util_dfill(n*n, 0.0d0, b, 1)
322         call ga_get(g_a, ilo, ihi, jlo, jhi, b(ilo, jlo), n)
323         sum1 = 0.0d0
324         do j = jlo, jhi
325            do i = ilo, ihi
326               sum1 = sum1 + b(i,j)
327               if (b(i,j) .ne. a(i,j)) then
328                  status = .false.
329               endif
330            enddo
331         enddo
332c
333      enddo
334      if (me.eq.0) then
335        if (status) then
336        write(6,'(A)') ' ga_get ........................... OK'
337        else
338        write(6,'(A)') ' ga_get ........................... Failed'
339        endif
340        call ffflush(6)
341      endif
342      call ga_sync()
343c
344c     Each node accumulates into disjoint sections of the array
345c
346      call ga_sync()
347c
348      crap = util_drand(12345)       ! Same seed for each process
349      do j = 1, n
350         do i = 1, n
351c           b(i,j) = util_drand(0)
352            b(i,j) = i+j
353         enddo
354      enddo
355c
356      inc = (n-1)/20 + 1
357      ij = 0
358      do j = 1, n, inc
359         do i = 1, n, inc
360c           x = util_drand(0)
361            x = 10.
362            ilo = i
363            ihi = min(i+inc-1, n)
364            if(ihi.eq.n-1)ihi=n
365c           ihi = min(i+inc, n)
366            jlo = j
367            jhi = min(j+inc-1, n)
368            if(jhi.eq.n-1)jhi=n
369c           jhi = min(j+inc-1, n)
370*               call ffflush(6)
371            if (mod(ij,nproc) .eq. me) then
372c               print *, me, 'checking accumulate ',ilo,ihi,jlo,jhi,x
373* 11            format(' node ',i2,' checking accumulate ',4i4)
374*               call ffflush(6)
375               call ga_acc(g_a, ilo, ihi, jlo, jhi, b(ilo, jlo), n, x)
376            endif
377            ij = ij + 1
378c
379c           Each process applies all updates to its local copy
380c
381            do jj = jlo, jhi
382               do ii = ilo, ihi
383                  a(ii,jj) = a(ii,jj) + x * b(ii,jj)
384               enddo
385            enddo
386         enddo
387      enddo
388      call ga_sync()
389c
390c     All nodes check all of a
391c
392      status = .true.
393      call ga_get(g_a, 1, n, 1, n, b, n)
394      do j = 1, n
395         do i = 1, n
396            if(MISMATCH(b(i,j),a(i,j)))then
397              status = .false.
398            endif
399         enddo
400      enddo
401      if (me.eq.0) then
402        if (status) then
403        write(6,'(A)') ' ga_acc (disjoint) ................ OK'
404        else
405        write(6,'(A)') ' ga_acc (disjoint) ................ Failed'
406        endif
407        call ffflush(6)
408      endif
409c
410c     overlapping accumulate
411      status = .true.
412      call ga_sync()
413      if (.not. ga_create(MT_DBL, n, n, 'b', 0, 0, g_b)) then
414         status = .false.
415      endif
416c
417      call ga_zero(g_b)
418      call ga_acc(g_b, n/2, n/2, n/2, n/2, 1d0, 1, 1d0)
419      call ga_sync()
420      if (me.eq.0) then
421         call ga_get(g_b, n/2, n/2, n/2, n/2, b(1,1), 1)
422         x = abs(b(1,1) -1d0*nproc)
423         if(x.gt. 1d-10)then
424            status = .false.
425         endif
426      endif
427      if (me.eq.0) then
428        if (status) then
429        write(6,'(A)') ' ga_acc (overlap) ................. OK'
430        else
431        write(6,'(A)') ' ga_acc (overlap) ................. Failed'
432        endif
433        call ffflush(6)
434      endif
435c
436c     Check the ga_add function
437c
438      crap = util_drand(12345)       ! Everyone has same seed
439      do j = 1, n
440         do i = 1, n
441            b(i,j) = util_drand(0)
442            a(i,j) = 0.1d0*a(i,j) + 0.9d0*b(i,j)
443         enddo
444      enddo
445
446      status = .true.
447      if (me.eq.0) call ga_put(g_b, 1, n, 1, n, b, n)
448      call ga_add(0.1d0, g_a, 0.9d0, g_b, g_b)
449      call ga_get(g_b, 1, n, 1, n, b, n)
450      do j = 1, n
451         do i = 1, n
452            if(MISMATCH(b(i,j), a(i,j)))then
453               status = .false.
454            endif
455         enddo
456      enddo
457      if (me.eq.0) then
458        if (status) then
459        write(6,'(A)') ' ga_add ........................... OK'
460        else
461        write(6,'(A)') ' ga_add ........................... Failed'
462        endif
463        call ffflush(6)
464      endif
465      call ga_sync()
466c
467c     Check the ddot function
468c
469      crap = util_drand(12345)       ! Everyone has same seed
470      sum1 = 0.0d0
471      do j = 1, n
472         do i = 1, n
473            b(i,j) = util_drand(0)
474            sum1 = sum1 + a(i,j)*b(i,j)
475         enddo
476      enddo
477      if (me.eq.0) then
478         call ga_put(g_b, 1, n, 1, n, b, n)
479         call ga_put(g_a, 1, n, 1, n, a, n)
480      endif
481      call ga_sync()
482      sum2 = ga_ddot(g_a,g_b)
483      status = .true.
484      if(MISMATCH(sum1, sum2))then
485         status = .false.
486      endif
487      if (me.eq.0) then
488        if (status) then
489        write(6,'(A)') ' ga_ddot .......................... OK'
490        else
491        write(6,'(A)') ' ga_ddot .......................... Failed'
492        endif
493        call ffflush(6)
494      endif
495c
496c     Check the ga_scale function
497c
498      call ga_scale(g_a, 0.123d0)
499      call ga_get(g_a, 1, n, 1, n, b, n)
500      status = .true.
501      do j = 1, n
502         do i = 1, n
503            a(i,j) = a(i,j)*0.123d0
504            if (MISMATCH(b(i,j), a(i,j)))then
505              status = .false.
506            endif
507         enddo
508      enddo
509      if (me.eq.0) then
510        if (status) then
511        write(6,'(A)') ' ga_scale ......................... OK'
512        else
513        write(6,'(A)') ' ga_scale ......................... Failed'
514        endif
515        call ffflush(6)
516      endif
517c
518c     Check the ga_copy function
519c
520      if(me.eq.0) call ga_put(g_a, 1, n, 1, n, a, n)
521      call ga_copy(g_a, g_b)
522      call ga_get(g_b, 1, n, 1, n, b, n)
523      status = .true.
524      do j = 1, n
525         do i = 1, n
526            if (b(i,j) .ne. a(i,j)) then
527               status = .false.
528            endif
529         enddo
530      enddo
531      if (me.eq.0) then
532        if (status) then
533        write(6,'(A)') ' ga_copy .......................... OK'
534        else
535        write(6,'(A)') ' ga_copy .......................... Failed'
536        endif
537        call ffflush(6)
538      endif
539c
540      call ga_sync()
541c
542      crap = util_drand(ga_nodeid()*51 + 1) ! Different seed for each process
543      status = .true.
544      do j = 1, 10
545       call ga_sync()
546       itmp = iran(nproc)-1
547       if(me.eq.itmp) then
548         do loop = 1,m
549           ilo = iran(n)
550           jlo = iran(n)
551           iv(loop) = ilo
552           jv(loop) = jlo
553         enddo
554         call ga_gather(g_a, v, iv, jv, m)
555         do loop = 1,m
556           ilo= iv(loop)
557           jlo= jv(loop)
558           call ga_get(g_a,ilo,ilo,jlo,jlo,v(loop),1)
559           if(v(loop)  .ne. a(ilo,jlo))then
560             status = .false.
561           endif
562         enddo
563       endif
564      enddo
565c
566      if (me.eq.0) then
567        if (status) then
568        write(6,'(A)') ' ga_gather ........................ OK'
569        else
570        write(6,'(A)') ' ga_gather ........................ Failed'
571        endif
572        call ffflush(6)
573      endif
574c
575      status = .true.
576      do j = 1,10
577       call ga_sync()
578       if(me.eq.iran(ga_nnodes())-1) then
579         do loop = 1,m
580           ilo = iran(n)
581           jlo = iran(n)
582           iv(loop) = ilo
583           jv(loop) = jlo
584c          v(loop) = DSIN(a(ilo,jlo)+b(ilo,jlo))
585           v(loop) = 1d0 *(ilo+jlo)
586         enddo
587         call ga_scatter(g_a, v, iv, jv, m)
588         do loop = 1,m
589           ilo= iv(loop)
590           jlo= jv(loop)
591           call ga_get(g_a,ilo,ilo,jlo,jlo,w(loop),1)
592c          if(v(loop) .ne. w(loop))then
593           if(w(loop) .ne. 1d0 *(ilo+jlo) )then
594             status = .false.
595           endif
596         enddo
597       endif
598       call ga_sync()
599      enddo
600c
601      if (me.eq.0) then
602        if (status) then
603        write(6,'(A)') ' ga_scatter ....................... OK'
604        else
605        write(6,'(A)') ' ga_scatter ....................... Failed'
606        endif
607        call ffflush(6)
608      endif
609c
610      call ga_sync()
611c
612c     scatter-acc available in GA ver. 3.0
613#ifdef GA3
614c
615      crap = util_drand(1234)
616      call ga_zero(g_a)
617c
618      do j = 1, n
619         do i = 1, n
620            b(i,j) =0.
621         enddo
622      enddo
623c
624      status = .true.
625      x = .1d0
626      ii =n
627      do jj = 1,1
628	  call ga_sync()
629        do loop = 1, ii
630
631c          generate unique i,j pairs
63210         continue
633              i = iran(n)
634	      j=iran(n)
635           if (found(i,j, iv, jv, loop-1) ) goto 10
636
637           iv(loop) = i
638           jv(loop) = j
639           v(loop) = 1d0 *(i+j)
640           b(i,j) = b(i,j) + nproc*x*v(loop) ! update local ref. array
641        enddo
642	call ga_scatter_acc(g_a,v,iv,jv, ii,x)
643c
644        call ga_sync()
645c
646c       check the result
647c
648        call ga_get(g_a, 1, n, 1,n, a, n)
649
650        do loop = 1,ii
651          i = iv(loop)
652          j = jv(loop)
653          if(MISMATCH(a(i,j),b(i,j)))then
654             status = .false.
655*            if(me.eq.0)then
656*              do ii=1,loop
657*                   print *,'element',ii, iv(ii),jv(ii)
658*              enddo
659*            endif
660             status = .false.
661          endif
662        enddo
663        call ga_sync()
664      enddo
665
666      call ga_sync()
667      if (me.eq.0) then
668        if (status) then
669        write(6,'(A)') ' ga_scatter_acc ................... OK'
670        else
671        write(6,'(A)') ' ga_scatter_acc ................... Failed'
672        endif
673        call ffflush(6)
674      endif
675#endif
676c
677c     Delete the global arrays
678c
679      status = ga_destroy(g_b)
680      status = status .and. ga_destroy(g_a)
681      if (me.eq.0) then
682        if (status) then
683        write(6,'(A)') ' ga_destroy ....................... OK'
684        else
685        write(6,'(A)') ' ga_destroy ....................... Failed'
686        endif
687        call ffflush(6)
688      endif
689c
690      end
691
692c-----------------------------------------------------------------
693      subroutine check_complex()
694      implicit none
695#include "mafdecls.fh"
696#include "global.fh"
697#include "testutil.fh"
698c
699      integer n,m
700      parameter (n = 60)
701      parameter (m = 2*n)
702      double complex a(n,n), b(n,n), v(m),w(m)
703      integer iv(m), jv(m)
704      logical status
705      integer g_a, g_b
706      integer i,j, loop,nloop,maxloop, ilo, ihi, jlo, jhi, itmp
707      integer nproc, me, ij, inc, ii, jj
708      parameter (maxloop = 100)
709      integer maxproc
710      parameter (maxproc = 128)
711      double precision crap
712      double precision nwords
713      double complex   x, sum1, sum2, factor
714      integer iran
715      external iran
716c
717      nproc = ga_nnodes()
718      me    = ga_nodeid()
719      nloop = Min(maxloop,n)
720c
721c     a() is a local copy of what the global array should start as
722c
723      do j = 1, n
724         do i = 1, n
725            a(i,j) = cmplx(dble(i-1), dble((j-1)*n))
726            b(i,j) = cmplx(-1d0,1d0)
727         enddo
728      enddo
729c
730c     Create a global array
731c
732c     print *,ga_nodeid(), ' creating array'
733      call ffflush(6)
734c     call setdbg(1)
735      status = ga_create(MT_DCPL, n, n, 'a', 0, 0, g_a)
736      status = status .and. ga_create(MT_DCPL, n, n, 'b', 0, 0, g_b)
737      if (me.eq.0) then
738        if (status) then
739        write(6,'(A)') ' ga_create ........................ OK'
740        else
741        write(6,'(A)') ' ga_create ........................ Failed'
742        endif
743        call ffflush(6)
744      endif
745      call ga_distribution(g_a,me,ilo, ihi, jlo, jhi)
746      call ga_sync()
747c
748c     Zero the array
749c
750      call ga_zero(g_a)
751c
752c     Check that it is indeed zero
753c
754      call ga_get(g_a, 1, n, 1, n, b, n)
755      call ga_sync()
756      status = .true.
757      do i = 1, n
758         do j = 1, n
759            if(b(i,j).ne.(0d0,0d0)) then
760              status = .false.
761            endif
762         enddo
763      enddo
764      if (me.eq.0) then
765        if (status) then
766        write(6,'(A)') ' ga_zero .......................... OK'
767        else
768        write(6,'(A)') ' ga_zero .......................... Failed'
769        endif
770        call ffflush(6)
771      endif
772      call ga_sync()
773c
774c     Each node fills in disjoint sections of the array
775c
776      call ga_sync()
777c
778      inc = (n-1)/20 + 1
779      ij = 0
780      do j = 1, n, inc
781         do i = 1, n, inc
782            if (mod(ij,nproc) .eq. me) then
783               ilo = i
784               ihi = min(i+inc, n)
785               jlo = j
786               jhi = min(j+inc, n)
787               call ga_put(g_a, ilo, ihi, jlo, jhi, a(ilo, jlo), n)
788            endif
789            ij = ij + 1
790         enddo
791      enddo
792      call ga_sync()
793c
794c     All nodes check all of a
795c
796      call util_qfill(n*n, (0d0,0d0), b, 1)
797      call ga_get(g_a, 1, n, 1, n, b, n)
798c
799      status = .true.
800      do i = 1, n
801         do j = 1, n
802            if (b(i,j) .ne. a(i,j)) then
803               status = .false.
804            endif
805         enddo
806      enddo
807      if (me.eq.0) then
808        if (status) then
809        write(6,'(A)') ' ga_put ........................... OK'
810        else
811        write(6,'(A)') ' ga_put ........................... Failed'
812        endif
813        call ffflush(6)
814      endif
815      call ga_sync()
816c
817c     Now check nloop random gets from each node
818c
819      call ga_sync()
820c
821      nwords = 0
822c
823      status = .true.
824      crap = util_drand(ga_nodeid()*51 + 1) ! Different seed for each process
825      do loop = 1, nloop
826         ilo = iran(loop)
827         ihi = iran(loop)
828         if (ihi.lt. ilo) then
829            itmp = ihi
830            ihi = ilo
831            ilo = itmp
832         endif
833         jlo = iran(loop)
834         jhi = iran(loop)
835         if (jhi.lt. jlo) then
836            itmp = jhi
837            jhi = jlo
838            jlo = itmp
839         endif
840c
841         nwords = nwords + (ihi-ilo+1)*(jhi-jlo+1)
842c
843         call util_qfill(n*n, (0.0d0,0d0), b, 1)
844         call ga_get(g_a, ilo, ihi, jlo, jhi, b(ilo, jlo), n)
845         do j = jlo, jhi
846            do i = ilo, ihi
847               if (b(i,j) .ne. a(i,j)) then
848                 status = .false.
849               endif
850            enddo
851         enddo
852c
853      enddo
854      if (me.eq.0) then
855        if (status) then
856        write(6,'(A)') ' ga_get ........................... OK'
857        else
858        write(6,'(A)') ' ga_get ........................... Failed'
859        endif
860        call ffflush(6)
861      endif
862      call ga_sync()
863c
864c     Each node accumulates into disjoint sections of the array
865c
866      call ga_sync()
867c
868      status = .true.
869      crap = util_drand(12345)       ! Same seed for each process
870      do j = 1, n
871         do i = 1, n
872            b(i,j) = cmplx(util_drand(0),util_drand(1))
873         enddo
874      enddo
875c
876      inc = (n-1)/20 + 1
877      ij = 0
878      do j = 1, n, inc
879         do i = 1, n, inc
880c           x = cmplx(util_drand(0),0.333d0)
881c           x = cmplx(0.333d0,0)
882*           x = cmplx(0d0,0d0)
883            x = 0
884            ilo = i
885            ihi = min(i+inc-1, n)
886            if(ihi.eq.n-1)ihi=n
887            jlo = j
888            jhi = min(j+inc-1, n)
889            if(jhi.eq.n-1)jhi=n
890            if (mod(ij,nproc) .eq. me) then
891               call ga_acc(g_a, ilo, ihi, jlo, jhi, b(ilo, jlo), n, x)
892            endif
893            ij = ij + 1
894c
895c           Each process applies all updates to its local copy
896c
897            do jj = jlo, jhi
898               do ii = ilo, ihi
899                  a(ii,jj) = a(ii,jj) + x * b(ii,jj)
900               enddo
901            enddo
902         enddo
903      enddo
904      call ga_sync()
905c
906c     All nodes check all of a
907c
908      call ga_get(g_a, 1, n, 1, n, b, n)
909      do j = 1, n
910         do i = 1, n
911            if (MISMATCH(b(i,j), a(i,j)))then
912               status = .false.
913            endif
914         enddo
915      enddo
916      if (me.eq.0) then
917        if (status) then
918        write(6,'(A)') ' ga_acc (disjoint) ................ OK'
919        else
920        write(6,'(A)') ' ga_acc (disjoint) ................ Failed'
921        endif
922        call ffflush(6)
923      endif
924c
925c     overlapping accumulate
926c
927      call ga_zero(g_b)
928      call ga_acc(g_b, n/2, n/2, n/2, n/2, (1d0,-1d0), 1, (1d0,0d0))
929      call ga_sync()
930      status = .true.
931      if (me.eq.0) then
932         call ga_get(g_b, n/2, n/2, n/2, n/2, x, 1)
933c        error = abs(x -(1d0,-1d0)*nproc)
934         if (MISMATCH(x, ((1d0,-1d0)*nproc)))then
935c        if(error.gt. (1d-8))then
936           status = .false.
937         endif
938      endif
939      if (me.eq.0) then
940        if (status) then
941        write(6,'(A)') ' ga_acc (overlap) ................. OK'
942        else
943        write(6,'(A)') ' ga_acc (overlap) ................. Failed'
944        endif
945        call ffflush(6)
946      endif
947c
948c     Check the ga_copy function
949c
950      status = .true.
951      call ga_sync()
952      if(me.eq.0) call ga_put(g_a, 1, n, 1, n, a, n)
953      call ga_copy(g_a, g_b)
954      call ga_get(g_b, 1, n, 1, n, b, n)
955      do j = 1, n
956         do i = 1, n
957            if (b(i,j) .ne. a(i,j)) then
958              status = .false.
959            endif
960         enddo
961      enddo
962      if (me.eq.0) then
963        if (status) then
964        write(6,'(A)') ' ga_copy .......................... OK'
965        else
966        write(6,'(A)') ' ga_copy .......................... Failed'
967        endif
968        call ffflush(6)
969      endif
970c
971c
972c     Check the ga_scale function
973c
974      factor = (1d0,-1d0)
975      call ga_scale(g_a, factor)
976      call ga_get(g_a, 1, n, 1, n, b, n)
977      status = .true.
978      do j = 1, n
979         do i = 1, n
980            a(i,j) = a(i,j)*factor
981            if (MISMATCH(b(i,j), a(i,j)))then
982              status = .false.
983            endif
984         enddo
985      enddo
986      if (me.eq.0) then
987        if (status) then
988        write(6,'(A)') ' ga_scale ......................... OK'
989        else
990        write(6,'(A)') ' ga_scale ......................... Failed'
991        endif
992        call ffflush(6)
993      endif
994c
995c     Check scatter&gather
996c
997      call ga_sync()
998      if(me.eq.0) call ga_put(g_a, 1, n, 1, n, a, n)
999c
1000      status = .true.
1001      crap = util_drand(ga_nodeid()*51 + 1) ! Different seed for each process
1002      do j = 1, 10
1003       call ga_sync()
1004       itmp = iran(nproc)-1
1005       if(me.eq.itmp) then
1006         do loop = 1,m
1007           ilo = iran(n)
1008           jlo = iran(n)
1009           iv(loop) = ilo
1010           jv(loop) = jlo
1011         enddo
1012         call ga_gather(g_a, v, iv, jv, m)
1013         do loop = 1,m
1014           ilo= iv(loop)
1015           jlo= jv(loop)
1016           call ga_get(g_a,ilo,ilo,jlo,jlo,v(loop),1)
1017           if(v(loop)  .ne. a(ilo,jlo))then
1018             status = .false.
1019           endif
1020         enddo
1021       endif
1022      enddo
1023c
1024      if (me.eq.0) then
1025        if (status) then
1026        write(6,'(A)') ' ga_gather ........................ OK'
1027        else
1028        write(6,'(A)') ' ga_gather ........................ Failed'
1029        endif
1030        call ffflush(6)
1031      endif
1032c
1033      status = .true.
1034      do j = 1,10
1035       call ga_sync()
1036       if(me.eq.iran(ga_nnodes())-1) then
1037         do loop = 1,m
1038           ilo = iran(n)
1039           jlo = iran(n)
1040           iv(loop) = ilo
1041           jv(loop) = jlo
1042           v(loop) = (1d0,-1d0) *(ilo+jlo)
1043         enddo
1044         call ga_scatter(g_a, v, iv, jv, m)
1045         do loop = 1,m
1046           ilo= iv(loop)
1047           jlo= jv(loop)
1048           call ga_get(g_a,ilo,ilo,jlo,jlo,w(loop),1)
1049           if(w(loop)  .ne. (1d0,-1d0) *(ilo+jlo) )then
1050             status = .false.
1051           endif
1052         enddo
1053       endif
1054       call ga_sync()
1055      enddo
1056c
1057      if (me.eq.0) then
1058        if (status) then
1059        write(6,'(A)') ' ga_scatter ....................... OK'
1060        else
1061        write(6,'(A)') ' ga_scatter ....................... Failed'
1062        endif
1063        call ffflush(6)
1064      endif
1065c
1066c     Check ga_add
1067c
1068      call ga_get(g_a, 1, n, 1, n, a, n)
1069      crap = util_drand(12345)       ! Everyone has same seed
1070      do j = 1, n
1071         do i = 1, n
1072            b(i,j) = cmplx(util_drand(0), util_drand(1))
1073            a(i,j) = (0.1d0,-.1d0)*a(i,j) + (.9d0,-.9d0)*b(i,j)
1074         enddo
1075      enddo
1076      status = .true.
1077      if (me.eq.0) call ga_put(g_b, 1, n, 1, n, b, n)
1078      call ga_add((0.1d0,-.1d0), g_a, (0.9d0,-.9d0), g_b, g_b)
1079      call ga_get(g_b, 1, n, 1, n, b, n)
1080      do j = 1, n
1081         do i = 1, n
1082            if (MISMATCH(b(i,j), a(i,j)))then
1083              status = .false.
1084            endif
1085         enddo
1086      enddo
1087      if (me.eq.0) then
1088        if (status) then
1089        write(6,'(A)') ' ga_add ........................... OK'
1090        else
1091        write(6,'(A)') ' ga_add ........................... Failed'
1092        endif
1093        call ffflush(6)
1094      endif
1095      call ga_sync()
1096c
1097c     Check the zdot function
1098c
1099      crap = util_drand(12345)       ! Everyone has same seed
1100      sum1 = (0.0d0,0.d0)
1101      do j = 1, n
1102         do i = 1, n
1103            b(i,j) = cmplx(util_drand(0), util_drand(1))
1104            sum1 = sum1 + a(i,j)*b(i,j)
1105         enddo
1106      enddo
1107      if (me.eq.0) then
1108         call ga_put(g_b, 1, n, 1, n, b, n)
1109         call ga_put(g_a, 1, n, 1, n, a, n)
1110      endif
1111      call ga_sync()
1112      sum2 =  ga_zdot(g_a,g_b)
1113      status = .true.
1114      if (MISMATCH(sum1, sum2))then
1115        status = .false.
1116      endif
1117      if (me.eq.0) then
1118        if (status) then
1119        write(6,'(A)') ' ga_zdot .......................... OK'
1120        else
1121        write(6,'(A)') ' ga_zdot .......................... Failed'
1122        endif
1123        call ffflush(6)
1124      endif
1125c
1126c     Delete the global arrays
1127c
1128      status = ga_destroy(g_b)
1129      status = status .and. ga_destroy(g_a)
1130      if (me.eq.0) then
1131        if (status) then
1132        write(6,'(A)') ' ga_destroy ....................... OK'
1133        else
1134        write(6,'(A)') ' ga_destroy ....................... Failed'
1135        endif
1136        call ffflush(6)
1137      endif
1138c
1139      end
1140c-----------------------------------------------------------------
1141
1142
1143
1144
1145      subroutine check_int()
1146      implicit none
1147#include "mafdecls.fh"
1148#include "global.fh"
1149#include "testutil.fh"
1150c
1151      integer n
1152      parameter (n = 128)
1153      integer a(n,n), b(n,n)
1154      logical status
1155      integer g_a
1156      integer i, j, loop, nloop, ilo, ihi, jlo, jhi, itmp
1157      integer nproc, me, ij, inc, dimi,dimj,iproc, ii, jj
1158      double precision nwords
1159      parameter (nloop = 100)
1160      integer maxproc
1161      parameter (maxproc = 128)
1162      integer map(5,maxproc), found, np,k
1163      double precision crap, sum1
1164      integer buf
1165      integer iran
1166      external iran
1167c
1168      nproc = ga_nnodes()
1169      me    = ga_nodeid()
1170c
1171c     a() is a local copy of what the global array should start as
1172c
1173      do j = 1, n
1174         do i = 1, n
1175            a(i,j) = i-1 + (j-1)*1000
1176         enddo
1177      enddo
1178c
1179c     Create a global array
1180c
1181      status = ga_create(MT_INT, n, n, 'a', 0, 0, g_a)
1182      if (me.eq.0) then
1183        if (status) then
1184        write(6,'(A)') ' ga_create ........................ OK'
1185        else
1186        write(6,'(A)') ' ga_create ........................ Failed'
1187        endif
1188        call ffflush(6)
1189      endif
1190c
1191c     Zero the array
1192c
1193      call ga_zero(g_a)
1194c
1195c     Check that it is indeed zero
1196c
1197      status = .true.
1198      call ga_get(g_a, 1, n, 1, n, b, n)
1199      do i = 1, n
1200         do j = 1, n
1201            if (b(i,j) .ne. 0) then
1202              status = .false.
1203            endif
1204         enddo
1205      enddo
1206      if (me.eq.0) then
1207        if (status) then
1208        write(6,'(A)') ' ga_zero .......................... OK'
1209        else
1210        write(6,'(A)') ' ga_zero .......................... Failed'
1211        endif
1212        call ffflush(6)
1213      endif
1214      call ga_sync()
1215c
1216c     Each node fills in disjoint sections of the array
1217c
1218      call ga_sync()
1219c
1220      inc = (n-1)/20 + 1
1221      ij = 0
1222      do j = 1, n, inc
1223         do i = 1, n, inc
1224            if (mod(ij,nproc) .eq. me) then
1225               ilo = i
1226               ihi = min(i+inc, n)
1227               jlo = j
1228               jhi = min(j+inc, n)
1229c              write(6,4) me, ilo, ihi, jlo, jhi
1230c4             format(' node ',i2,' checking put ',4i4)
1231c              call ffflush(6)
1232               call ga_put(g_a, ilo, ihi, jlo, jhi, a(ilo, jlo), n)
1233            endif
1234            ij = ij + 1
1235         enddo
1236      enddo
1237      call ga_sync()
1238c
1239c     All nodes check all of a
1240c
1241      status = .true.
1242      if(me.eq.0)then
1243      call ga_get(g_a, 1, n, 1, n, b, n)
1244      do i = 1, n
1245         do j = 1, n
1246            if (b(i,j) .ne. a(i,j)) then
1247              status = .false.
1248            endif
1249         enddo
1250      enddo
1251      endif
1252      call ga_sync()
1253c
1254      if (me.eq.0) then
1255        if (status) then
1256        write(6,'(A)') ' ga_put ........................... OK'
1257        else
1258        write(6,'(A)') ' ga_put ........................... Failed'
1259        endif
1260        call ffflush(6)
1261      endif
1262c
1263c     Now check nloop random gets from each node
1264c
1265      call ga_sync()
1266c
1267      nwords = 0
1268c
1269      status = .true.
1270      crap = util_drand(ga_nodeid()*51 + 1) ! Different seed for each process
1271      do loop = 1, nloop
1272         ilo = iran(loop)
1273         ihi = iran(loop)
1274         if (ihi.lt. ilo) then
1275            itmp = ihi
1276            ihi = ilo
1277            ilo = itmp
1278         endif
1279         jlo = iran(loop)
1280         jhi = iran(loop)
1281         if (jhi.lt. jlo) then
1282            itmp = jhi
1283            jhi = jlo
1284            jlo = itmp
1285         endif
1286c
1287         nwords = nwords + (ihi-ilo+1)*(jhi-jlo+1)
1288c
1289         call util_ifill(n*n, 0, b, 1)
1290         call ga_get(g_a, ilo, ihi, jlo, jhi, b(ilo, jlo), n)
1291c
1292         sum1 = 0.0d0
1293         do j = jlo, jhi
1294            do i = ilo, ihi
1295               sum1 = sum1 + b(i,j)
1296               if (b(i,j) .ne. a(i,j)) then
1297                 status = .false.
1298               endif
1299            enddo
1300         enddo
1301      enddo
1302      if (me.eq.0) then
1303        if (status) then
1304        write(6,'(A)') ' ga_get ........................... OK'
1305        else
1306        write(6,'(A)') ' ga_get ........................... Failed'
1307        endif
1308        call ffflush(6)
1309      endif
1310c
1311      call ga_sync()
1312c
1313      status = .true.
1314      crap = util_drand(ga_nodeid()*51 + 1) ! Different seed for each process
1315      inc =5
1316c     every processor will be operating on somebody elses data
1317c
1318      iproc = ga_nnodes()-me-1
1319c
1320      call ga_distribution(g_a,iproc,ilo,ihi,jlo,jhi)
1321c
1322      dimi = ihi-ilo
1323      dimj = jhi-jlo
1324c     write(6,*) me,'..',ilo,ihi,jlo,jhi,'.',dimi,dimj
1325c     call ffflush(6)
1326      call ga_sync()
1327      if(ilo .gt.0 .and. jhi .gt. 0)then
1328       do loop = 1,nloop
1329         ii= IABS(iran(dimi))
1330         jj= IABS(iran(dimj))
1331         i=ilo + Mod(ii,dimi)
1332         j=jlo + Mod(jj,dimj)
1333c
1334c        write(6,*) me,'..',ilo,ihi,jlo,jhi,'.',dimi,dimj,'..',i,j
1335c        call ffflush(6)
1336         buf = ga_read_inc(g_a,i,j,inc)
1337         if(a(i,j).ne. buf)then
1338           status = .false.
1339         endif
1340         call ga_get(g_a, i,i,j,j, buf,1)
1341         a(i,j) = a(i,j)+inc
1342         if(a(i,j).ne.  buf)then
1343           status = .false.
1344         endif
1345       enddo
1346      endif
1347      call ga_sync()
1348c
1349      if (me.eq.0) then
1350        if (status) then
1351        write(6,'(A)') ' ga_read_inc ...................... OK'
1352        else
1353        write(6,'(A)') ' ga_read_inc ...................... Failed'
1354        endif
1355        call ffflush(6)
1356      endif
1357c
1358      call ga_zero(g_a)
1359c
1360c***  use ga_read_inc and elements g_a(1:2,1) to implement a lock
1361c***  compute g_a(:,n) = sum (1(:)) for P processors
1362c
1363      status = ga_create_mutexes(1)
1364      if (me.eq.0) then
1365        if (status) then
1366        write(6,'(A)') ' ga_create_mutexes ................ OK'
1367        else
1368        write(6,'(A)') ' ga_create_mutexes ................ Failed'
1369        endif
1370        call ffflush(6)
1371      endif
1372
1373      if ((n.lt.2).and.(me.eq.0)) then
1374        write(6,'(A)') ' ga_fence ........................ N/A'
1375        write(6,'(A)') ' ga_lock ......................... N/A'
1376        call ffflush(6)
1377      endif
1378
1379      call ga_lock(0)
1380c     call my_lock(g_a)
1381
1382c          get original values  g_a(:,n)
1383           call ga_get(g_a, 1,n, n,n, b,n)
1384c          add my contribution
1385           do i =1,n
1386              b(i,1)= b(i,1)+1
1387           enddo
1388c
1389c          need to use fence to assure that coms complete before leaving
1390c          Critical Section
1391c
1392           call ga_init_fence()
1393                call ga_put(g_a, 1,n, n,n, b,n)
1394           call ga_fence()
1395      call ga_unlock(0)
1396c     call my_unlock(g_a)
1397c
1398333   status = ga_destroy_mutexes()
1399      if (me.eq.0) then
1400        if (status) then
1401        write(6,'(A)') ' ga_destroy_mutexes ............... OK'
1402        else
1403        write(6,'(A)') ' ga_destroy_mutexes ............... Failed'
1404        endif
1405        call ffflush(6)
1406      endif
1407c
1408      status = .true.
1409      call ga_sync()
1410      if (me.eq.0) then
1411         call ga_get(g_a, 1,n, n,n, b,n)
1412         do i =1,n
1413              if(b(i,1).ne. nproc)then
1414                status = .false.
1415              endif
1416         enddo
1417      endif
1418c
1419      status = ga_locate_region(g_a, 1, n, 1,n, map,np)
1420      found = 0
1421      do j=1,n
1422         do i=1,n
1423            b(i,j)=-1
1424         enddo
1425      enddo
1426      if(me.eq.0)call ga_put(g_a,1,n,1,n,b,n)
1427      call ga_sync()
1428      do k = 1, np
1429         if(map(5,k).eq.me)then
1430                if(found.eq.1) then
1431                    write(6,*)'double entry in map for proc ',me
1432                    call ffflush(6)
1433                endif
1434                do j= map(3,k), map(4,k)
1435                        do i= map(1,k), map(2,k)
1436                        b(i,j)=1*me
1437                        enddo
1438                enddo
1439                call ga_put(g_a, map(1,k),map(2,k),map(3,k),map(4,k),
1440     &                 b(map(1,k),map(3,k)),n)
1441                found = 1
1442         endif
1443      enddo
1444      call ga_sync()
1445c
1446      do k = 1, np
1447         if(map(5,k).eq.me)then
1448                call ga_get(g_a, map(1,k),map(2,k),map(3,k),map(4,k),
1449     &                 a(map(1,k),map(3,k)),n)
1450                do j= map(3,k), map(4,k)
1451                      do i= map(1,k), map(2,k)
1452                        if(b(i,j).ne.a(i,j)) then
1453                           write(6,*)
1454     &                     'proc ',me, 'overlap with ',a(i,j)
1455                           call ffflush(6)
1456                        endif
1457                      enddo
1458                enddo
1459         endif
1460      enddo
1461      call ga_sync()
1462c
1463      if(me.eq.0)then
1464        call ga_get(g_a,1,n,1,n,a,n)
1465        do j=1,n
1466         do i=1,n
1467            if(a(i,j).eq.-1)then
1468              status = .false.
1469            endif
1470         enddo
1471        enddo
1472      endif
1473      if (me.eq.0) then
1474        if (status) then
1475        write(6,'(A)') ' ga_locate_region ................. OK'
1476        else
1477        write(6,'(A)') ' ga_locate_region ................. Failed'
1478        endif
1479        call ffflush(6)
1480      endif
1481c
1482c     Delete the global array
1483c
1484      status = ga_destroy(g_a)
1485      if (me.eq.0) then
1486        if (status) then
1487        write(6,'(A)') ' ga_destroy ....................... OK'
1488        else
1489        write(6,'(A)') ' ga_destroy ....................... Failed'
1490        endif
1491        call ffflush(6)
1492      endif
1493c
1494      end
1495
1496c---------------------------------------------------------------------
1497
1498      subroutine check_flt()
1499      implicit none
1500#include "mafdecls.fh"
1501#include "global.fh"
1502#include "testutil.fh"
1503      integer n, m
1504      parameter (n =10)
1505      parameter (m=2*n)
1506      real a(n,n), b(n,n), v(m), w(m)
1507      integer iv(m), jv(m)
1508      logical status
1509      integer g_a, g_b
1510      integer i, j, loop, nloop, maxloop, ilo, ihi, jlo, jhi, itmp
1511      integer nproc, me, ij, inc, ii, jj
1512      double precision nwords
1513      parameter (maxloop = 100)
1514      integer maxproc
1515      parameter (maxproc = 128)
1516      double precision crap
1517      real x, sum1, sum2
1518      integer iran
1519      external iran
1520
1521      nproc = ga_nnodes()
1522      me    = ga_nodeid()
1523      nloop = Min(maxloop,n)
1524c
1525c     a() is a local copy of what the global array should start as
1526c
1527      do j = 1, n
1528         do i = 1, n
1529            a(i,j) = i-1 + (j-1)*n
1530            b(i,j) = -1.
1531         enddo
1532      enddo
1533c
1534c     Create a global array
1535c
1536      status = ga_create(MT_REAL, n, n, 'a', 0, 0, g_a)
1537      if (me.eq.0) then
1538        if (status) then
1539        write(6,'(A)') ' ga_create ........................ OK'
1540        else
1541        write(6,'(A)') ' ga_create ........................ Failed'
1542        endif
1543        call ffflush(6)
1544      endif
1545c
1546c     check if handle is valid. be quiet unless error
1547c
1548      status = .true.
1549      if(.not.ga_valid_handle(g_a)) status = .false.
1550c
1551      call ga_distribution(g_a,me,ilo, ihi, jlo, jhi)
1552      call ga_sync()
1553c
1554c     Zero the array
1555c
1556      call ga_zero(g_a)
1557c
1558c     Check that it is indeed zero
1559c
1560      call ga_get(g_a, 1, n, 1, n, b, n)
1561      call ga_sync()
1562      do i = 1, n
1563         do j = 1, n
1564            if (b(i,j) .ne. 0.0) then
1565              status = .false.
1566            endif
1567         enddo
1568      enddo
1569      if (me.eq.0) then
1570        if (status) then
1571        write(6,'(A)') ' ga_zero .......................... OK'
1572        else
1573        write(6,'(A)') ' ga_zero .......................... Failed'
1574        endif
1575        call ffflush(6)
1576      endif
1577      call ga_sync()
1578c
1579c     Each node fills in disjoint sections of the array
1580c
1581      status = .true.
1582      inc = (n-1)/20 + 1
1583      ij = 0
1584      do j = 1, n, inc
1585         do i = 1, n, inc
1586            if (mod(ij,nproc) .eq. me) then
1587               ilo = i
1588               ihi = min(i+inc, n)
1589               jlo = j
1590               jhi = min(j+inc, n)
1591c               write(6,4) me, ilo, ihi, jlo, jhi
1592c 4             format(' node ',i2,' checking put ',4i4)
1593c               call ffflush(6)
1594               call ga_put(g_a, ilo, ihi, jlo, jhi, a(ilo, jlo), n)
1595            endif
1596            ij = ij + 1
1597         enddo
1598      enddo
1599      call ga_sync()
1600c
1601c     All nodes check all of a
1602c
1603      call ga_get(g_a, 1, n, 1, n, b, n)
1604      do i = 1, n
1605         do j = 1, n
1606            if (b(i,j) .ne. a(i,j)) then
1607              status = .false.
1608            endif
1609         enddo
1610      enddo
1611      call ga_sync()
1612      if (me.eq.0) then
1613        if (status) then
1614        write(6,'(A)') ' ga_put ........................... OK'
1615        else
1616        write(6,'(A)') ' ga_put ........................... Failed'
1617        endif
1618        call ffflush(6)
1619      endif
1620c
1621      call ga_sync()
1622c
1623      nwords = 0
1624c
1625      status = .true.
1626      crap = util_drand(ga_nodeid()*51 + 1) ! Different seed for each process
1627      do loop = 1, nloop
1628         ilo = iran(loop)
1629         ihi = iran(loop)
1630         if (ihi.lt. ilo) then
1631            itmp = ihi
1632            ihi = ilo
1633            ilo = itmp
1634         endif
1635         jlo = iran(loop)
1636         jhi = iran(loop)
1637         if (jhi.lt. jlo) then
1638            itmp = jhi
1639            jhi = jlo
1640            jlo = itmp
1641         endif
1642c
1643         nwords = nwords + (ihi-ilo+1)*(jhi-jlo+1)
1644c
1645         call util_rfill(n*n, 0.0, b, 1)
1646         call ga_get(g_a, ilo, ihi, jlo, jhi, b(ilo, jlo), n)
1647c
1648         sum1 = 0.0d0
1649         do j = jlo, jhi
1650            do i = ilo, ihi
1651               sum1 = sum1 + b(i,j)
1652               if (b(i,j) .ne. a(i,j)) then
1653                  status = .false.
1654               endif
1655            enddo
1656         enddo
1657      enddo
1658      if (me.eq.0) then
1659        if (status) then
1660        write(6,'(A)') ' ga_get ........................... OK'
1661        else
1662        write(6,'(A)') ' ga_get ........................... Failed'
1663        endif
1664        call ffflush(6)
1665      endif
1666      call ga_sync()
1667c
1668c     Each node accumulates into disjoint sections of the array
1669c
1670      call ga_sync()
1671c
1672      crap = util_drand(12345)       ! Same seed for each process
1673      do j = 1, n
1674         do i = 1, n
1675c           b(i,j) = real(util_drand(0))
1676            b(i,j) = i+j
1677         enddo
1678      enddo
1679c
1680      status = .true.
1681      inc = (n-1)/20 + 1
1682      ij = 0
1683      do j = 1, n, inc
1684         do i = 1, n, inc
1685c           x = real(util_drand(0))
1686            x = 10.
1687            ilo = i
1688            ihi = min(i+inc-1, n)
1689            if(ihi.eq.n-1)ihi=n
1690c           ihi = min(i+inc, n)
1691            jlo = j
1692            jhi = min(j+inc-1, n)
1693            if(jhi.eq.n-1)jhi=n
1694c           jhi = min(j+inc-1, n)
1695*               call ffflush(6)
1696            if (mod(ij,nproc) .eq. me) then
1697c               print *, me, 'checking accumulate ',ilo,ihi,jlo,jhi,x
1698* 11            format(' node ',i2,' checking accumulate ',4i4)
1699*               call ffflush(6)
1700               call ga_acc(g_a, ilo, ihi, jlo, jhi, b(ilo, jlo), n, x)
1701            endif
1702            ij = ij + 1
1703c
1704c           Each process applies all updates to its local copy
1705c
1706            do jj = jlo, jhi
1707               do ii = ilo, ihi
1708                  a(ii,jj) = a(ii,jj) + x * b(ii,jj)
1709               enddo
1710            enddo
1711         enddo
1712      enddo
1713      call ga_sync()
1714c
1715c     All nodes check all of a
1716      call ga_get(g_a, 1, n, 1, n, b, n)
1717c
1718      do j = 1, n
1719         do i = 1, n
1720            if(MISMATCHF(b(i,j),a(i,j)))then
1721              status = .false.
1722            endif
1723         enddo
1724      enddo
1725      if (me.eq.0) then
1726        if (status) then
1727        write(6,'(A)') ' ga_acc (disjoint) ................ OK'
1728        else
1729        write(6,'(A)') ' ga_acc (disjoint) ................ Failed'
1730        endif
1731        call ffflush(6)
1732      endif
1733c
1734c     overlapping accumulate
1735      call ga_sync()
1736      status = .true.
1737      if (.not. ga_create(MT_REAL, n, n, 'b', 0, 0, g_b)) then
1738         status = .false.
1739      endif
1740c
1741      call ga_zero(g_b)
1742      call ga_acc(g_b, n/2, n/2, n/2, n/2, 1.0, 1, 1.0)
1743      call ga_sync()
1744      if (me.eq.0) then
1745         call ga_get(g_b, n/2, n/2, n/2, n/2, b(1,1), 1)
1746         x = abs(b(1,1) -1*nproc)
1747         if(x.gt. 1e-10)then
1748           status = .false.
1749         endif
1750      endif
1751      if (me.eq.0) then
1752        if (status) then
1753        write(6,'(A)') ' ga_acc (overlap) ................. OK'
1754        else
1755        write(6,'(A)') ' ga_acc (overlap) ................. Failed'
1756        endif
1757        call ffflush(6)
1758      endif
1759c
1760c     Check the ga_add function
1761c
1762      crap = util_drand(12345)       ! Everyone has same seed
1763      status = .true.
1764      do j = 1, n
1765         do i = 1, n
1766            b(i,j) = real(util_drand(0)*real(i)) + 1
1767            a(i,j) = 0.1*a(i,j) + 0.9*b(i,j)
1768         enddo
1769      enddo
1770      if (me.eq.0) call ga_put(g_b, 1, n, 1, n, b, n)
1771      call ga_add(0.1, g_a, 0.9, g_b, g_b)
1772      call ga_get(g_b, 1, n, 1, n, b, n)
1773      do j = 1, n
1774         do i = 1, n
1775            if(MISMATCHF(b(i,j), a(i,j)))then
1776              status = .false.
1777            endif
1778         enddo
1779      enddo
1780      if (me.eq.0) then
1781        if (status) then
1782        write(6,'(A)') ' ga_add ........................... OK'
1783        else
1784        write(6,'(A)') ' ga_add ........................... Failed'
1785        endif
1786        call ffflush(6)
1787      endif
1788      call ga_sync()
1789c
1790      status = .true.
1791      crap = util_drand(12345)       ! Everyone has same seed
1792      sum1 = 0.0
1793      do j = 1, n
1794         do i = 1, n
1795            b(i,j) = util_drand(0)
1796            sum1 = sum1 + a(i,j)*b(i,j)
1797         enddo
1798      enddo
1799      if (me.eq.0) then
1800         call ga_put(g_b, 1, n, 1, n, b, n)
1801         call ga_put(g_a, 1, n, 1, n, a, n)
1802      endif
1803      call ga_sync()
1804      sum2 = ga_sdot(g_a,g_b)
1805      if(MISMATCHF(sum1, sum2))then
1806        status = .false.
1807      endif
1808      if (me.eq.0) then
1809        if (status) then
1810        write(6,'(A)') ' ga_sdot .......................... OK'
1811        else
1812        write(6,'(A)') ' ga_sdot .......................... Failed'
1813        endif
1814        call ffflush(6)
1815      endif
1816c
1817      status = .true.
1818      call ga_scale(g_a, 0.123)
1819      call ga_get(g_a, 1, n, 1, n, b, n)
1820      do j = 1, n
1821         do i = 1, n
1822            a(i,j) = a(i,j)*0.123
1823            if (MISMATCHF(b(i,j), a(i,j)))then
1824              status = .false.
1825            endif
1826         enddo
1827      enddo
1828      if (me.eq.0) then
1829        if (status) then
1830        write(6,'(A)') ' ga_scale ......................... OK'
1831        else
1832        write(6,'(A)') ' ga_scale ......................... Failed'
1833        endif
1834        call ffflush(6)
1835      endif
1836c
1837      status = .true.
1838      if(me.eq.0) call ga_put(g_a, 1, n, 1, n, a, n)
1839      call ga_copy(g_a, g_b)
1840      call ga_get(g_b, 1, n, 1, n, b, n)
1841      do j = 1, n
1842         do i = 1, n
1843            if (b(i,j) .ne. a(i,j)) then
1844               status = .false.
1845            endif
1846         enddo
1847      enddo
1848      if (me.eq.0) then
1849        if (status) then
1850        write(6,'(A)') ' ga_copy .......................... OK'
1851        else
1852        write(6,'(A)') ' ga_copy .......................... Failed'
1853        endif
1854        call ffflush(6)
1855      endif
1856c
1857      call ga_sync()
1858      status = .true.
1859      crap = util_drand(ga_nodeid()*51 + 1) ! Different seed for each process
1860      do j = 1, 10
1861       call ga_sync()
1862       itmp = iran(nproc)-1
1863       if(me.eq.itmp) then
1864         do loop = 1,m
1865           ilo = iran(n)
1866           jlo = iran(n)
1867           iv(loop) = ilo
1868           jv(loop) = jlo
1869         enddo
1870         call ga_gather(g_a, v, iv, jv, m)
1871         do loop = 1,m
1872           ilo= iv(loop)
1873           jlo= jv(loop)
1874           call ga_get(g_a,ilo,ilo,jlo,jlo,v(loop),1)
1875           if(v(loop)  .ne. a(ilo,jlo))then
1876             status = .false.
1877           endif
1878         enddo
1879       endif
1880      enddo
1881c
1882      if (me.eq.0) then
1883        if (status) then
1884        write(6,'(A)') ' ga_gather ........................ OK'
1885        else
1886        write(6,'(A)') ' ga_gather ........................ Failed'
1887        endif
1888        call ffflush(6)
1889      endif
1890c
1891      status = .true.
1892      do j = 1,10
1893       call ga_sync()
1894       if(me.eq.iran(ga_nnodes())-1) then
1895         do loop = 1,m
1896           ilo = iran(n)
1897           jlo = iran(n)
1898           iv(loop) = ilo
1899           jv(loop) = jlo
1900c          v(loop) = DSIN(a(ilo,jlo)+b(ilo,jlo))
1901           v(loop) = 1.0 *(ilo+jlo)
1902         enddo
1903         call ga_scatter(g_a, v, iv, jv, m)
1904         do loop = 1,m
1905           ilo= iv(loop)
1906           jlo= jv(loop)
1907           call ga_get(g_a,ilo,ilo,jlo,jlo,w(loop),1)
1908c          if(v(loop)  .ne. w(loop))then
1909           if(w(loop)  .ne. 1.0 *(ilo+jlo) )then
1910             status = .false.
1911           endif
1912         enddo
1913       endif
1914       call ga_sync()
1915      enddo
1916c
1917      if (me.eq.0) then
1918        if (status) then
1919        write(6,'(A)') ' ga_scatter ....................... OK'
1920        else
1921        write(6,'(A)') ' ga_scatter ....................... Failed'
1922        endif
1923        call ffflush(6)
1924      endif
1925c
1926      call ga_sync()
1927c
1928c     scatter-acc available in GA ver. 3.0
1929#ifdef GA3
1930      status = .true.
1931      crap = util_drand(1234)
1932      call ga_zero(g_a)
1933c
1934      do j = 1, n
1935         do i = 1, n
1936            b(i,j) =0.
1937         enddo
1938      enddo
1939c
1940      x = .1d0
1941      ii =n
1942      do jj = 1,1
1943          call ga_sync()
1944        do loop = 1, ii
1945
1946c          generate unique i,j pairs
194710         continue
1948              i = iran(n)
1949              j=iran(n)
1950           if (found(i,j, iv, jv, loop-1) ) goto 10
1951
1952           iv(loop) = i
1953           jv(loop) = j
1954           v(loop) = 1.0 *(i+j)
1955           b(i,j) = b(i,j) + nproc*x*v(loop) ! update local ref. array
1956        enddo
1957
1958        call ga_scatter_acc(g_a,v,iv,jv, ii,x)
1959        call ga_sync()
1960c
1961c       check the result
1962c
1963        call ga_get(g_a, 1, n, 1,n, a, n)
1964        do loop = 1,ii
1965          i = iv(loop)
1966          j = jv(loop)
1967          if(MISMATCH(a(i,j),b(i,j)))then
1968            status = .false.
1969          endif
1970        enddo
1971        call ga_sync()
1972      enddo
1973      call ga_sync()
1974      if (me.eq.0) then
1975        if (status) then
1976        write(6,'(A)') ' ga_scatter_acc ................... OK'
1977        else
1978        write(6,'(A)') ' ga_scatter_acc ................... Failed'
1979        endif
1980        call ffflush(6)
1981      endif
1982#endif
1983c
1984c     Delete the global array
1985c
1986      status = ga_destroy(g_a)
1987      status = status .and. ga_destroy(g_b)
1988      if (me.eq.0) then
1989        if (status) then
1990        write(6,'(A)') ' ga_destroy ....................... OK'
1991        else
1992        write(6,'(A)') ' ga_destroy ....................... Failed'
1993        endif
1994        call ffflush(6)
1995      endif
1996c
1997      end
1998c_____________________________________________________________
1999
2000      subroutine check_wrappers
2001      implicit none
2002#include "mafdecls.fh"
2003#include "global.fh"
2004#include "testutil.fh"
2005      double precision sum
2006      integer isum, ibuf(2)
2007      integer me, nproc
2008      logical status
2009      real fsum
2010c
2011      nproc = ga_nnodes()
2012      me = ga_nodeid()
2013c
2014      status = .true.
2015      call ga_sync()
2016      ibuf(1) = 1
2017      ibuf(2) =  me
2018      call ga_igop(10000, ibuf, 2, '+')
2019      if(ibuf(1).ne.nproc)then
2020        status = .false.
2021      endif
2022      if(ibuf(2).ne.((nproc-1)*nproc/2))then
2023        status = .false.
2024      endif
2025      call ga_sync()
2026      if (me.eq.0) then
2027        if (status) then
2028        write(6,'(A)') ' ga_igop .......................... OK'
2029        else
2030        write(6,'(A)') ' ga_igop .......................... Failed'
2031        endif
2032        call ffflush(6)
2033      endif
2034      call ga_sync()
2035c
2036      status = .true.
2037      sum = 1d0 * me
2038      call ga_dgop(10000, sum, 1, '+')
2039      if(Int(sum).ne.((nproc-1)*nproc/2))then
2040        status = .false.
2041      endif
2042      call ga_sync()
2043      if (me.eq.0) then
2044        if (status) then
2045        write(6,'(A)') ' ga_dgop .......................... OK'
2046        else
2047        write(6,'(A)') ' ga_dgop .......................... Failed'
2048        endif
2049        call ffflush(6)
2050      endif
2051c
2052      call ga_sync()
2053      status = .true.
2054      fsum = 1.0 * me
2055      call ga_sgop(10000, fsum, 1, '+')
2056      if(Int(sum).ne.((nproc-1)*nproc/2))then
2057        status = .false.
2058      endif
2059      call ga_sync()
2060      if (me.eq.0) then
2061        if (status) then
2062        write(6,'(A)') ' ga_sgop .......................... OK'
2063        else
2064        write(6,'(A)') ' ga_sgop .......................... Failed'
2065        endif
2066        call ffflush(6)
2067      endif
2068c
2069      call ga_sync()
2070      status = .true.
2071      if(me.eq.nproc-1)then
2072        ibuf(1) = me
2073        ibuf(2) = nproc
2074      endif
2075      call ga_brdcst(1000,ibuf,util_mitob(2),nproc-1)
2076      if(ibuf(1).ne.nproc-1) status = .false.
2077      if(ibuf(2).ne.nproc) status = .false.
2078      call ga_sync()
2079      if (me.eq.0) then
2080        if (status) then
2081        write(6,'(A)') ' ga_brdcst ........................ OK'
2082        else
2083        write(6,'(A)') ' ga_brdcst ........................ Failed'
2084        endif
2085        call ffflush(6)
2086      endif
2087      call ga_sync()
2088      end
2089
2090
2091      subroutine check_mem
2092      implicit none
2093      integer mem_size
2094#include "mafdecls.fh"
2095#include "global.fh"
2096#include "testutil.fh"
2097c
2098      integer n,nmax,left,need, me,procs,g_a, g_b
2099      integer stack, heap, global
2100      logical status, overify, ohardfail
2101c
2102      call input_mem_size(stack, heap, global, overify, ohardfail)
2103      mem_size = ma_sizeof(mt_dbl,global,mt_byte)
2104      write(*,*) 'mem_size = ',mem_size
2105      me = ga_nodeid()
2106      procs = ga_nnodes()
2107      nmax = int(dsqrt(dble(mem_size/util_mitob(1))))
2108      left =  mem_size/procs - ga_inquire_memory()
2109      n = nmax/2
2110      need = util_mdtob(n*n)/procs
2111c
2112      if(me.eq.0)then
2113        write(6,*)' '
2114        if(ga_uses_ma())then
2115           write(6,*)' CHECKING GA MEMORY RESTRICTIONS (MA used)'
2116        else
2117           write(6,*)' CHECKING GA MEMORY RESTRICTIONS (MA not used)'
2118        endif
2119        write(6,*)' '
2120        write(6,*)' '
2121        call print_mem_info(n,left, need, mem_size/procs)
2122      endif
2123c
2124      status = ga_create(MT_DBL, n, n, 'a', 0, 0, g_a)
2125c
2126      if(me.eq.0) then
2127        if(status) then
2128              write(6,*) '  success'
2129        else
2130              write(6,*) '  failure'
2131        endif
2132        call ffflush(6)
2133      endif
2134c
2135      n = nmax
2136      left =  mem_size/procs - ga_inquire_memory()
2137      need = util_mdtob(n*n)/procs
2138      if(me.eq.0)then
2139        call print_mem_info(n,left, need, mem_size/procs)
2140      endif
2141c
2142      status = ga_create(MT_DBL, n, n, 'b', 0, 0, g_b)
2143c
2144      if(me.eq.0) then
2145        if(status) then
2146              write(6,*) '  success'
2147        else
2148              write(6,*) '  failure'
2149        endif
2150        write(6,*)' '
2151        write(6,*)' '
2152        call ffflush(6)
2153      endif
2154      status = ga_destroy(g_a)
2155      end
2156
2157
2158
2159      subroutine print_mem_info(n,left, need, total)
2160      implicit none
2161      integer n,left, need, total
2162c
2163      write(6,*)' '
2164      if(left - need .ge. 0) then
2165        write(6,1)n,n
21661       format('> Creating array ',i4,' by ',i4,' -- should succeed')
2167      else
2168        write(6,2)n,n
21692       format('> Creating array ',i4,' by ',i4,' -- SHOULD FAIL')
2170      endif
2171      write(6,3) need, left, total
21723     format('  (need ',i7,' and  ',i7,' out of ',i7,' bytes are left)')
2173      write(6,*)' '
2174      call ffflush(6)
2175c
2176      end
2177
2178
2179
2180      subroutine my_lock(g_b)
2181      implicit none
2182#include "global.fh"
2183      integer g_b, val, flag, i
2184      logical first_time
2185      double precision  dummy
2186      common /lock/ val
2187      common /dum/ dummy
2188      data first_time /.true./
2189c
2190c     this awkward initialization is to avoid a weird problem
2191C     with block data on SUN
2192      if(first_time)then
2193        first_time = .false.
2194        dummy = .0
2195      endif
2196c
2197      val = ga_read_inc(g_b,1,1, 1)
219810    call ga_get(g_b, 2,2,1,1, flag, 1)
2199           if(flag.eq.val) return
2200c
2201c          to reduce memory stress,  wait a while before retrying
2202           do i = 1, 100
2203              dummy = dummy + .1
2204           enddo
2205      goto 10
2206      end
2207
2208
2209      subroutine my_unlock(g_b)
2210      implicit none
2211#include "global.fh"
2212      integer g_b, val
2213      common /lock/ val
2214c
2215      call ga_put(g_b, 2,2,1,1, val+1, 1)
2216      end
2217
2218
2219      logical function found(i,j, iv, jv, n)
2220      integer n
2221      integer i,j, iv(n), jv(n)
2222      integer loop
2223
2224      found = .false.
2225      do loop = 1, n
2226               if(i .eq. iv(loop) .and. j .eq.jv(loop))then
2227                 found = .true.
2228                 goto 99
2229               endif
2230            enddo
223199    continue
2232      return
2233      end
2234
2235
2236      subroutine proc_remap()
2237      implicit none
2238#include "global.fh"
2239      integer proc(100),nproc,i
2240      nproc = ga_nnodes()
2241      if(nproc.gt.100)
2242     $   call ga_error("remap requires<=100 processes",nproc)
2243      do i = 1, nproc
2244         proc(i) = nproc-i
2245      enddo
2246c     call ga_register_proclist(proc,nproc)
2247      end
2248
2249
2250      subroutine util_rfill(n,val,a,ia)
2251      implicit none
2252      real  a(*), val
2253      integer n, ia, i
2254c
2255c     initialise real array to scalar value
2256c
2257      if (ia.eq.1) then
2258         do 10 i = 1, n
2259            a(i) = val
2260 10      continue
2261      else
2262         do 20 i = 1,(n-1)*ia+1,ia
2263            a(i) = val
2264 20      continue
2265      endif
2266c
2267      end
2268
2269
2270      subroutine util_dfill(n,val,a,ia)
2271      implicit none
2272      double precision  a(*), val
2273      integer n, ia, i
2274c
2275c     initialise double precision array to scalar value
2276c
2277      if (ia.eq.1) then
2278         do 10 i = 1, n
2279            a(i) = val
2280 10      continue
2281      else
2282         do 20 i = 1,(n-1)*ia+1,ia
2283            a(i) = val
2284 20      continue
2285      endif
2286c
2287      end
2288
2289      subroutine util_ifill(n,val,a,ia)
2290      implicit none
2291      integer n, ia, i, a(*),val
2292c
2293c     initialise integer array to scalar value
2294c
2295      if (ia.eq.1) then
2296         do 10 i = 1, n
2297            a(i) = val
2298 10      continue
2299      else
2300         do 20 i = 1,(n-1)*ia+1,ia
2301            a(i) = val
2302 20      continue
2303      endif
2304c
2305      end
2306
2307      subroutine util_qfill(n,val,a,ia)
2308      implicit none
2309      double  complex  a(*), val
2310      integer n, ia, i
2311c
2312c     initialise double complex array to scalar value
2313c
2314      if (ia.eq.1) then
2315         do 10 i = 1, n
2316            a(i) = val
2317 10      continue
2318      else
2319         do 20 i = 1,(n-1)*ia+1,ia
2320            a(i) = val
2321 20      continue
2322      endif
2323c
2324      end
2325
2326
2327      integer function iran(i)
2328      implicit none
2329      double precision util_drand
2330      external util_drand
2331      integer i
2332      iran = int(util_drand(0)*dfloat(i))+1
2333      return
2334      end
2335