1C$Id$
2C>
3C> \defgroup subgrp NWChem interface to GA sub-groups
4C>
5C> \ingroup subgrp
6C> @{
7C>
8C> \file util_sgroup.F
9C> \brief The NWChem interface to the GA sub-groups
10C>
11C> This file contains routines that facilitate the use of GA sub-groups
12C> in NWChem [1-3]. The routines take care of issues such as,
13C> redirecting the standard output from different sub-groups,
14C> replicating the runtime database, accessing the movecs file, etc. In
15C> addition it provides mechanisms for creating pools of processors in
16C> a number of different ways.
17C>
18C> To date there is no input block to control this functionality.
19C> Instead set directives that put data directly into the runtime
20C> database are required.
21C>
22C> ### References ###
23C>
24C> [1] T. L. Windus, S. M. Kathmann, L. D. Crosby,
25C>     "High performance computations using dynamical nucleation
26C>     theory", <i>J. Phys.: Conf. Ser.</i> (2008) <b>125</b>,
27C>     012017, DOI:
28C>     <a href="https://doi.org/10.1088/1742-6596/125/1/012017">
29C>     10.1088/1742-6596/125/1/012017</a>.
30C>
31C> [2] B. J. Palmer, S. M. Kathmann, M. Krishnan, V. Tipparaju,
32C>     J. Nieplocha, "The use of processor groups in molecular
33C>     dynamics simulations to sample free-energy states",
34C>     <i>J. Chem. Theory Comput.</i> (2007) <b>3</b>, 583-592,
35C>     DOI:
36C>     <a href="https://doi.org/10.1021/ct600260u">
37C>     10.1021/ct600260u</a>.
38C>
39C> [3] J. Nieplocha, B. Palmer, V. Tipparaju, M. Krishnan, H. Trease,
40C>     E. Apra, "Advances, applications and performance of the Global
41C>     Arrays shared memory programming toolkit",
42C>     <i>Int. J. High Perf. Comput. Appl.</i> (2006) <b>20</b>,
43C>     203-231, DOI:
44C>     <a href="https://doi.org/10.1177/1094342006064503">
45C>     10.1177/1094342006064503</a>.
46C>
47      block data groups
48#include "util_sgroup.fh"
49C     Make sure that the group code works even if groups are not set up yet
50      data mygroup(0) /1/
51      data my_ga_grp(0)/-1/
52      data my_ga_0_grp(0)/-999/ ! Bogus
53      data ngroups(0) /1/
54      data sgprint /.true./
55      data depth /0/
56      end
57
58C###############################################################################
59C>
60C> \brief Initialize the subgroup layer
61C>
62C> \return Returns .TRUE. if successful (does not return otherwise)
63C>
64      logical function util_sgstart(rtdb)
65      implicit none
66#include "mafdecls.fh"
67#include "rtdb.fh"
68      integer rtdb !< [Input] The RTDB handle
69      integer groups_want
70      integer dum(1)
71      integer method
72C added option to allow writting of group rtdb's
73C to permanent_dir(.false.) or scratch_dir(.true.)
74      integer dir
75c
76      dir = 1 ! permanent_dir by default
77c
78c Read number of subgroups from input
79c
80      if (.not.rtdb_get(rtdb, 'subgroups_number', mt_int, 1,
81     &   groups_want))groups_want=1
82
83      method = 1
84      call util_sggo(rtdb,groups_want,method,dum,dir)
85
86      util_sgstart=.true.
87      return
88      end
89C###############################################################################
90C>
91C> \brief Do the actual initialization
92C>
93      subroutine util_sggo(rtdb,groups_want,method,array_cpu,dir)
94      implicit none
95#include "stdio.fh"
96#include "global.fh"
97#include "rtdb.fh"
98#include "util_sgroup.fh"
99#include "mafdecls.fh"
100#include "inp.fh"
101#include "util.fh"
102#include "errquit.fh"
103
104c
105c Input
106c
107      integer rtdb         !< [Input] The RTDB handle
108      integer groups_want  !< [Input] The number of subgroups requested
109      integer method       !< [Input] The method to partition the
110                           !< processor pool
111                           !< * 1 -- use `groups_want` to generate equal
112                           !<   sized groups (`array_cpu` ignored)
113                           !< * 2 -- turn each SMP box into a group
114                           !<   (`array_cpu` and `groups_want` ignored)
115                           !< * 3 -- use `array_cpu(groups_want)` to
116                           !<   define number of nodes per group
117                           !< * 4 -- use `array_cpu(groups_want+nnodes)`
118                           !<   to define which nodes per group
119      integer array_cpu(*) !< [Input] Specification of the subgroups
120c
121c     rtdb is the runtime database file
122c
123c     method is:
124c     1 -- use groups_want to generate equal sized groups (array_cpu ignored)
125c     2 -- turn each SMP box into a group (array_cpu and groups_want ignored)
126c     3 -- use array_cpu(groups_want) to define number of nodes per group
127c     4 -- use array_cpu(groups_want+nnodes) to define which nodes per group
128c
129      integer i
130      integer meinworld, meafter
131      integer X0(1)
132      integer idum
133
134C strings for fname definition
135      character*256 permdir
136      character*256 fprefix
137      integer myproc,nproc
138      integer mypgroup_old
139C
140C     Added option to allow group rtdb's to be written
141C     to permanent_dir (0) or scratch_dir(1)
142      integer dir
143      logical dir2
144#ifndef GANXTVAL
145C  Turn off GA load balancing in groups
146      logical util_statldb
147      external util_statldb
148      logical dumlog
149#endif
150
151C     convert to logical
152      if (dir .gt. 0) then
153        dir2 = .true.
154      else
155        dir2 = .false.
156      endif
157#ifndef GANXTVAL
158C     Turn off dynamic load balancing, since it fails in groups
159C     We never turn this back on
160      if (groups_want .ne. 1) dumlog = util_statldb(.true.,rtdb)
161#endif
162c
163c     dont use disk for gridpts, etc. in DFT
164c     We never turn this back on
165c
166      if(.not.rtdb_put(rtdb,'dft:largenode',mt_log,1,.true.))
167     &   call errquit('util_sggo: rtdb_put nodisk failed',1,RTDB_ERR)
168      if (.not. rtdb_put(rtdb, 'dft:noio', mt_int, 1, 1))
169     &   call errquit('util_sggo: rtdb_put noio failed',1,RTDB_ERR)
170      if(.not.rtdb_put(rtdb,'dft:xcreplicated',mt_log,1,.false.))
171     &  call errquit('util_sggo: rtdb_put xcreplicated fail',1,RTDB_ERR)
172
173c
174
175      mypgroup_old = ga_pgroup_get_default()
176      if (mypgroup_old .eq. ga_pgroup_get_world()) then
177        depth = 0
178        mygroup(depth) = 1
179        ngroups(depth) = 1
180        my_ga_grp(depth) = ga_pgroup_get_world()
181        x0(1) = 0
182        if (my_ga_0_grp(0) .eq. -999) then
183          my_ga_0_grp(depth) = ga_pgroup_create(x0,1)
184        end if
185
186c       preliminary rtdb cloning
187C       Work around the 36 char cutoff for fname
188        if (.not. rtdb_getfname(rtdb, rtdb_fname(depth))) call
189     *     errquit('rtdb_fname call failed',0,0)
190        write(permdir,'(256(a))') (' ', i=1,256)
191        write(fprefix,'(256(a))') (' ', i=1,256)
192        if(.not. rtdb_cget(rtdb,'permanent_dir',1,permdir)) then
193           permdir = '.'
194        endif
195        if(.not. rtdb_cget(rtdb,'file_prefix',1,fprefix))
196     &     call errquit('rtdb get file_prefix failed',0,0)
197        idum = 1
198  747   if(permdir(idum:idum).eq. ' ' .and. idum.lt.inp_strlen(permdir))
199     &                then
200          idum = idum + 1
201          goto 747
202        endif
203        write(rtdb_fname(depth),'(a,a,a,a)')
204     &    permdir(idum:inp_strlen(permdir)),
205     &    '/',
206     &    fprefix(1:inp_strlen(fprefix)),
207     &    '.db'
208      end if
209      if (.not. rtdb_close(rtdb, 'keep')) call errquit(
210     *     ' failed to close rtdb for sg ',0,0)
211
212      depth = depth + 1
213      if (depth .gt. maxdeep)
214     *     call errquit('depth is greater than maxdeep',0,0)
215
216      myproc=ga_nodeid()
217      nproc=ga_nnodes()
218c      sgprint = util_print('sgroup info', print_debug)
219c
220c    Create groups
221c
222      if(myproc.eq.0) write(LuOut,*) 'Creating groups'
223
224c initialize sub groups using a method
225
226      if (method .eq. 1) then
227        call util_sginit(groups_want)
228      else if (method .eq. 2) then
229        call util_sginit_smp()
230      else if (method .eq. 3) then
231        call util_sginit_irreg(array_cpu,groups_want)
232      else if (method .eq. 4) then
233        call util_sginit_power(array_cpu,groups_want+nproc)
234      else
235        call errquit('util_sggo: bad method',0,0)
236      endif
237
238      if (sgprint) then
239        meafter=ga_nodeid()
240        meinworld = ga_pgroup_nodeid(ga_pgroup_get_world())
241        write(LuOut,
242     *   "(' proc ',I4,' was (',
243     *       I4,' ',I4,' ',I4,
244     *       ') now is (',
245     *       I4,' ',I4,' ',I4,
246     *       ')')")
247     *   meinworld,
248     *   myproc,mygroup(depth-1),my_ga_grp(depth-1),
249     *   meafter,mygroup(depth),my_ga_grp(depth)
250         call util_flush(LuOut)
251      endif
252
253      call ga_pgroup_sync(mypgroup_old) ! Make sure all is well
254
255      call util_sgrtdb(rtdb,rtdb_fname(depth-1),rtdb_fname(depth),dir2)
256      if (.not. rtdb_open(rtdb_fname(depth), 'old', rtdb)) call errquit
257     *     (' rtdb_open old failed ',0,0)
258
259      if(myproc.eq.0.and.sgprint) then
260       write(LuOut,*) ga_nodeid(),' of group',mygroup(depth),'rtdbname '
261     *           ,rtdb_fname(depth), ' rtdb=',rtdb
262      endif
263c
264c     Create movecs for each subgroup
265c
266      call util_sgmovecs(rtdb)
267c
268c
269      if(myproc.eq.0.and.sgprint) then
270        write(LuOut,*) 'everyone should have an open file at this point'
271        call util_flush(LuOut)
272      endif
273c     These next lines are since we are now done creating groups
274      call util_flush(LuErr)
275      call util_flush(LuOut)
276      call ga_pgroup_sync(mypgroup_old) ! Make sure all is well
277      return
278      end
279C###############################################################################
280C>
281C> \brief Finalize the subgroup layer
282C>
283      subroutine util_sgend(rtdb)
284      implicit none
285#include "stdio.fh"
286#include "global.fh"
287#include "rtdb.fh"
288#include "util_sgroup.fh"
289      integer rtdb   !< [Input] The RTDB handle
290      integer myproc
291      logical ignore
292      integer mypgroup_old
293
294      mypgroup_old = ga_pgroup_get_default()
295      myproc=ga_nodeid()
296      if (mypgroup_old .eq. ga_pgroup_get_world()) then
297        write(LuErr,*) ' Node ',myproc,' tried to close world group '
298        return
299      endif
300      myproc=ga_nodeid()
301      if(myproc.eq.0.and.sgprint) then
302        write(LuErr,*) ' deleting cloned rtdb '
303      endif
304C     Make sure all nodes are done using RTDB file
305      call ga_pgroup_sync(mypgroup_old)
306      if (.not. rtdb_close(rtdb, 'delete'))call errquit
307     $        (' sgend: rtdb_close and delete failed ',0,0)
308      if(myproc.eq.0) then
309        write(LuOut,*) 'Closing subgroup '
310      endif
311      if (sgprint) call util_flush(LuOut)
312
313      call ga_pgroup_set_default(my_ga_grp(depth-1))
314      if (myproc .eq. 0) then
315      ignore = ga_pgroup_destroy(my_ga_0_grp(depth))
316      endif
317      ignore = ga_pgroup_destroy(mypgroup_old)
318      my_ga_0_grp(depth) = -999
319      my_ga_grp(depth) = -999
320c
321c     reopen higher up rtdb
322c
323      depth = depth - 1
324      if (.not. rtdb_open(rtdb_fname(depth), 'old', rtdb))
325     $     call errquit('sgend: rtdb_open old failed ', 0, 0)
326
327      return
328      end
329
330C###############################################################################
331C>
332C> \brief Copy the RTDB for each subgroup
333C>
334C> Copy the RTDB for each subgroup. This way the subgroups can
335C> independently modify the RTDB without messing each other's work up.
336C> Depending on the kind of calculation one may want to store the
337C> RTDB replicas in the scratch directory or the permanent directory.
338C> This behavior can be controlled by the `dir` argument.
339C>
340      subroutine util_sgrtdb(rtdb,name_in,name_out,dir)
341      implicit none
342      integer rtdb           !< [Input] The RTDB handle
343      character*256 name_in  !< [Input] The filename of the global RTDB
344      character*256 name_out !< [Output] The filename of the subgroup
345                             !< RTDB
346#include "stdio.fh"
347#include "global.fh"
348#include "rtdb.fh"
349#include "inp.fh"
350#include "util_sgroup.fh"
351      integer ii, iend, ibegin
352      logical ortdbstate,ortdbstate2,status
353      character*256 grouprtdb
354      character*256 groupname
355      integer myproc
356
357C     Added option to allow group rtdb's to be written to
358C     Permanent_dir or scratch_dir
359      logical dir !< [Input] Flag determining the location of the
360                  !< replica RTDBs
361                  !< * .FALSE. changes to `permanent_dir`
362                  !< * .TRUE. changes to `scratch_dir`
363
364      myproc=ga_nodeid()
365
366      if(myproc.eq.0.and.sgprint) then
367        write(LuOut,*) 'util_sgrtdb start'
368        call util_flush(LuOut)
369      endif
370c
371c     do a copy of rtdb to permanent_dir or scratch_dir for each group
372c
373      ibegin = 1
374      iend   = ibegin + (6-1)
375      write(groupname(ibegin:iend),'(6A)') 'sg_db.'
376      do ii=1,depth
377        ibegin = iend + 1
378        iend   = ibegin + (4-1)
379C       9999 is largest 4 digit number, and 256 is just plain long (used above)
380C       Of course a long group name will most likely fail long before
381C       it gets to that long - other limitations on file name length exist
382        if (iend .gt. 256 .or. mygroup(ii) .gt. 9999)
383     *    call errquit(' util_sgrtdb: groups too deep',0,0)
384        write(groupname(ibegin:iend),'(i4.4)') mygroup(ii)
385      end do
386
387      call util_file_name(groupname(1:iend),dir,.false.,grouprtdb)
388C
389C     Do not want to have other nodes to be using old RTDB file anymore
390      call ga_pgroup_sync(ga_pgroup_get_default())
391C
392C     Node 0 of the group now copies the original RTDB to a group specific one
393C
394      if (myproc .eq. 0) then
395        if(sgprint) then
396           write(LuOut,*)'world rtdbname is ',
397     &       name_in(1:inp_strlen(name_in))
398           write(LuOut,*)'group rtdbname is ',
399     &       grouprtdb(1:inp_strlen(grouprtdb))
400           call util_flush(LuOut)
401        endif
402        inquire(file=name_in(1:inp_strlen(name_in)),exist=status)
403        if(.not.status) then
404c          most likely cause of error: permdir not available on this node
405           write(LuErr,*) ' !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
406           write(LuErr,*) ' ! please change permanent_dir to a   !'
407           write(LuErr,*) ' ! filesystem available to all the    !'
408           write(LuErr,*) ' ! processors (e.g. NFS, GPFS, Lustre)!'
409           write(LuErr,*) ' !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
410           call errquit(' util_sgrtdb: failed to open rtdb',0,0)
411        endif
412        call util_file_copy(name_in(1:inp_strlen(name_in)),
413     &        grouprtdb(1:inp_strlen(grouprtdb)))
414        inquire(file=grouprtdb(1:inp_strlen(grouprtdb)),
415     &          exist=status)
416        if (.not. status) then
417          write (LuErr,*) 'copy failed'
418          call errquit('util_sgrtdb: problem with system call', 0, 0)
419        else if (sgprint) then
420          write(LuOut,*) ' rtdb copied ', ga_pgroup_get_default()
421          call util_flush(LuOut)
422        endif
423      endif
424      name_out=grouprtdb
425C     Do not want to have other nodes using new RTDB before it exists
426      call ga_pgroup_sync(ga_pgroup_get_default())
427      return
428      end
429C###############################################################################
430C>
431C> \brief Initialize the subgroup layer using the SMP method
432C>
433C> The SMP method targets machines that are clusters of shared memory
434C> nodes. It creates subgroups where each subgroup coincides with
435C> a shared memory node.
436C>
437      subroutine util_sginit_smp()
438      implicit none
439c
440c     creates one subgroup for each smp
441c
442#include "global.fh"
443#include "util_sgroup.fh"
444      integer i,j,proclist(maxcpus),num_proc
445
446c setup
447      if (depth .ne. 1) then
448        call errquit(' util_sginit_smp: bad depth',0,0)
449      endif
450      ngroups(depth) = ga_cluster_nnodes()
451      mygroup(depth) = 1 + ga_cluster_nodeid()
452      my_ga_grp(depth) = -99
453
454c make each smp node into a group
455
456      do i=0,ngroups(depth)-1
457        num_proc = ga_cluster_nprocs(i)
458        if (num_proc .gt. maxcpus) call errquit('increase maxcpus',0,0)
459        do j=0,num_proc - 1
460          proclist(j+1)=ga_cluster_procid(i,j)
461        enddo
462        if (i+1 .eq. mygroup(depth)) then
463          my_ga_grp(depth) = ga_pgroup_create(proclist,num_proc)
464        endif
465      enddo
466
467c create the zero group
468
469      num_proc = ngroups(depth)
470      if(num_proc .gt. maxcpus)call errquit('increase maxcpus',0,0)
471      do i=0, num_proc - 1
472        proclist(i+1)=ga_cluster_procid(i,0)
473      enddo
474      my_ga_0_grp(depth) = ga_pgroup_create(proclist,num_proc)
475
476C Set the group
477
478      call ga_pgroup_set_default(my_ga_grp(depth))
479
480      return
481      end
482C###############################################################################
483C>
484C> \brief Initialize the subgroup layer with irregularly sized groups
485C>
486C> This method generates irregularly sized subgroups in a simple way.
487C> The array `cpusperg_array` (CPUs per Group) specifies the size of
488C> every subgroup. This routine generates subgroups to match that
489C> specification.
490C>
491      subroutine util_sginit_irreg(cpusperg_array,length)
492      implicit none
493c
494c     creates custom subgroups using cpusperg_array()
495c
496#include "global.fh"
497#include "util_sgroup.fh"
498      integer nproc, myproc
499      integer i,j,proclist(maxcpus), proc_counter,group_counter
500      integer length                 !< [Input] The number of subgroups
501      integer cpusperg_array(length) !< [Input] The size of each
502                                     !< subgroup
503
504c setup
505
506      nproc = ga_nnodes()
507      myproc = ga_nodeid()
508      my_ga_grp(depth) = -99
509      mygroup(depth)=-99
510      ngroups(depth)=length
511
512C verify input all at once instead of constantly in loops
513
514      proc_counter = 0
515      do i=1,length
516        if(cpusperg_array(i) .le. 0) then
517          call errquit(' util_sginit_irreg: zero cpusperg_array',0,0)
518        endif
519        if(cpusperg_array(i) .gt. maxcpus) then
520          call errquit(' util_sginit_irreg:increase maxcpus',0,0)
521        endif
522        proc_counter = proc_counter + cpusperg_array(i)
523      enddo
524      if (proc_counter .ne. nproc) then
525        call errquit(' util_sginit_irreg:bad cpusperg_array',0,0)
526      end if
527
528C make groups
529      proc_counter=1
530      group_counter=1
531      do i=0,nproc-1
532        proclist(proc_counter) = i
533        if (i.eq.myproc) then
534          mygroup(depth)=group_counter
535        endif
536        if (cpusperg_array(group_counter).eq.proc_counter) then
537          if(mygroup(depth) .eq. group_counter) then
538            my_ga_grp(depth)=ga_pgroup_create(proclist,proc_counter)
539          endif
540          group_counter=group_counter+1
541          proc_counter=0
542        end if
543        proc_counter=proc_counter+1
544      enddo
545
546c create the zero group
547
548      if(ngroups(depth).gt.maxcpus)call errquit('increase maxcpus',0,0)
549      proc_counter=1
550      group_counter=1
551      do i=0,nproc-1
552        if (proc_counter .eq. 1) then
553          proclist(group_counter) = i
554        end if
555        if (cpusperg_array(group_counter).eq.proc_counter) then
556          group_counter=group_counter+1
557          proc_counter=0
558        end if
559        proc_counter=proc_counter+1
560      enddo
561      my_ga_0_grp(depth) = ga_pgroup_create(proclist,ngroups(depth))
562
563C Set the group
564
565      call ga_pgroup_set_default(my_ga_grp(depth))
566
567      return
568      end
569C###############################################################################
570C>
571C> \brief Initialize the subgroup layer using the "power" method
572C>
573C> The power method creates subgroups based on the specification in
574C> `cpu_array`. The data in this array specifies the size of each
575C> subgroup as well as which processes are part of each subgroup.
576C> The data is stored in a vector of the following form
577C> \f{eqnarray*}{
578C>   \left(G_1,n_1,n_2,n_3,\ldots,n_{G_1},G_2,n_1,n_2,n_3,\ldots,
579C>   n_{G_2},\ldots\right)
580C> \f}
581C> where \f$G_1\f$, \f$G_2\f$, etc. are the sizes of the respective
582C> subgroups, and \f$n_1, \ldots, n_G\f$ are the processor ranks that
583C> are part of that subgroup.
584C>
585      subroutine util_sginit_power(cpu_array,length)
586      implicit none
587c
588c     creates custom highly custom subgroups using cpu_array()
589c     is is of the form G1,n1,n2,n3,...,nn,G2,n1,n2,n3,...,nn,...
590c     Where the G's are the size of the groups, and the n's
591c     are the specific nodes in the groups
592c     length = nnodes + ngroups
593c
594#include "global.fh"
595#include "util_sgroup.fh"
596      integer nproc, myproc
597      integer i,j,proclist(maxcpus), proc_counter,group_counter
598      integer pos_counter, group_size
599      integer length, cpu_array(length)
600
601c setup
602
603      nproc = ga_nnodes()
604      myproc = ga_nodeid()
605      my_ga_grp(depth) = -99
606      mygroup(depth)=-99
607      ngroups(depth)=-99
608
609C make groups
610      group_counter=0
611      pos_counter=0
612      do while (pos_counter .lt. length)
613        group_counter = group_counter + 1
614        pos_counter = pos_counter + 1
615        group_size = cpu_array(pos_counter)
616        proc_counter = 0
617        do i=1,group_size
618           pos_counter = pos_counter + 1
619           proc_counter=proc_counter+1
620           if (proc_counter .gt. maxcpus) then
621             call errquit('increase maxcpus',0,0)
622           end if
623           proclist(proc_counter) = cpu_array(pos_counter)
624           if (proclist(proc_counter) .eq. myproc) then
625             mygroup(depth)=group_counter
626           endif
627        end do
628        if(mygroup(depth) .eq. group_counter) then
629            my_ga_grp(depth)=ga_pgroup_create(proclist,proc_counter)
630        endif
631      end do
632      ngroups(depth)=group_counter
633
634c create the zero group
635
636      if(ngroups(depth).gt.maxcpus)call errquit('increase maxcpus',0,0)
637      group_counter=0
638      pos_counter=1
639      do while (pos_counter .lt. length)
640        group_counter = group_counter + 1
641        group_size = cpu_array(pos_counter)
642        pos_counter = pos_counter + 1
643        proclist(group_counter) = cpu_array(pos_counter)
644        pos_counter = pos_counter + group_size
645      end do
646      my_ga_0_grp(depth) = ga_pgroup_create(proclist,ngroups(depth))
647
648C Set the group
649
650      call ga_pgroup_set_default(my_ga_grp(depth))
651
652      return
653      end
654
655
656C###############################################################################
657C>
658C> \brief Initialize the subgroup layer with regular subgroup sizes
659C>
660      subroutine util_sginit(groups_want)
661      implicit none
662#include "global.fh"
663#include "util_sgroup.fh"
664c
665c     Create subgroups of a constant size
666c
667      integer groups_want !< [Input] The number of subgroups wanted
668      integer nproc,myproc,nchunkq,nremainq,i,j,n
669      integer num_proc,proclist(maxcpus)
670
671C setup
672
673      ngroups(depth) = groups_want
674      nproc = ga_nnodes()
675      myproc = ga_nodeid()
676      mygroup(depth) = -99
677      my_ga_grp(depth) = -99
678
679      if (nproc .lt. ngroups(depth)) then
680         if(myproc .eq. 0) write(*,*) ' Will only create group of',nproc
681         ngroups(depth) = nproc
682      endif
683      nchunkq=nproc/ngroups(depth)
684      nremainq=mod(nproc,ngroups(depth))
685      if ((nchunkq .gt. maxcpus) .or.
686     *     (nchunkq+1 .gt. maxcpus .and. nremainq .ne. 0)) then
687        call errquit('increase maxcpus',0,0)
688      end if
689
690c for irregular distribution, make sure the lower groups have more procs
691c assign each proc to individual group, then create individual group
692
693      n = 0
694      do i=1,ngroups(depth)
695         num_proc = nchunkq
696         if(i .le. nremainq) num_proc = num_proc + 1
697         do j=1,num_proc
698            proclist(j)=n
699            if (n .eq. myproc) mygroup(depth) = i
700            n=n+1
701         enddo
702         if (mygroup(depth) .eq. i) then
703            my_ga_grp(depth) = ga_pgroup_create(proclist, num_proc)
704         endif
705      enddo
706
707c create the zero group
708
709      if(ngroups(depth).gt.maxcpus)call errquit('increase maxcpus',0,0)
710      n = 0
711      do i=1,ngroups(depth)
712         num_proc = nchunkq
713         if(i .le. nremainq) num_proc = num_proc + 1
714         proclist(i) = n
715         n = n + num_proc
716      enddo
717      my_ga_0_grp(depth) = ga_pgroup_create(proclist,ngroups(depth))
718
719c set the group
720
721      call ga_pgroup_set_default(my_ga_grp(depth))
722
723      return
724      end
725C###############################################################################
726C>
727C> \brief Create movecs file names for the current subgroup
728C>
729C> The movecs file holds the SCF vectors of a particular calculation
730C> and is therefore a crucial data object. With subgroups every group
731C> works on a calculation of its own and therefore needs a movecs file
732C> of its own. This routine creates and stores the name of the movecs
733C> file for the current subgroup.
734C>
735      subroutine util_sgmovecs(rtdb)
736      implicit none
737      integer rtdb !< [Input] The RTDB handle
738#include "stdio.fh"
739#include "global.fh"
740#include "rtdb.fh"
741#include "inp.fh"
742#include "mafdecls.fh"
743#include "util_sgroup.fh"
744      character*255 movecs_in,sg_movecs_out,tag,theory
745      logical movecs_out_l
746      integer myproc
747c
748c Tested only for the case when movecs are created
749C
750c Read movecs filename with path
751c
752      myproc=ga_nodeid()
753
754       if (.not.(rtdb_cget(rtdb,'scf:input vectors',1,movecs_in).or.
755     >           rtdb_cget(rtdb,'pspw:input vectors',1,movecs_in).or.
756     $           rtdb_cget(rtdb,'dft:input vectors',1, movecs_in)))
757     $     movecs_in = 'atomic'
758       if (movecs_in.eq.'atomic')
759     $     call util_file_name('movecs',.true.,.false.,movecs_in)
760c
761c Create movecs.mygroup(depth)
762C This just keeps appending to the end
763c
764      call util_pname0(movecs_in,sg_movecs_out,mygroup(depth),
765     *     ngroups(depth))
766
767      if(myproc.eq.0.and.sgprint) then
768        write(LuOut,*) 'mygroup=',mygroup(depth),
769     $             ' movecs_in',movecs_in,
770     $             ' sg_movecs_out=',sg_movecs_out
771      endif
772c
773c Set output movecs
774c
775c      if (.not.(rtdb_cget(rtdb,'task:theory',1,theory)))
776c     *     call errquit('rtdb_get task:theory failed',0,0)
777c      if (theory.eq.'dft') then
778c         tag='dft:output vectors'
779c      else
780c         tag='scf:output vectors'
781c      endif
782c
783      tag='dft:output vectors'
784      if (.not. rtdb_cput(rtdb,tag,1,sg_movecs_out)) then
785       write (LuOut,*) 'error in rtdb_cput'
786      endif
787      tag='scf:output vectors'
788      if (.not. rtdb_cput(rtdb,tag,1,sg_movecs_out)) then
789       write (LuOut,*) 'error in rtdb_cput'
790      endif
791
792      tag='pspw:input vectors'
793      if (.not. rtdb_cput(rtdb,tag,1,sg_movecs_out)) then
794       write (LuOut,*) 'error in rtdb_cput'
795      endif
796      tag='pspw:output vectors'
797      if (.not. rtdb_cput(rtdb,tag,1,sg_movecs_out)) then
798       write (LuOut,*) 'error in rtdb_cput'
799      endif
800
801c
802      return
803      end
804c     Hokey functions to keep others away from my common blocks
805C###############################################################################
806C>
807C> \brief Obtain my subgroup number
808C>
809      integer function util_sgroup_mygroup()
810#include "util_sgroup.fh"
811      util_sgroup_mygroup = mygroup(depth)
812      return
813      end
814C###############################################################################
815C>
816C> \brief Obtain the total number of subgroups
817C>
818      integer function util_sgroup_numgroups()
819#include "util_sgroup.fh"
820      util_sgroup_numgroups = ngroups(depth)
821      return
822      end
823C###############################################################################
824C>
825C> \brief Obtain the subgroup of all rank 0 processes in each subgroup
826C>
827      integer function util_sgroup_zero_group()
828#include "util_sgroup.fh"
829      util_sgroup_zero_group = my_ga_0_grp(depth)
830      return
831      end
832C###############################################################################
833C>
834C> \brief Obtain the current subgroup nesting depth
835C>
836      integer function util_sgroup_depth()
837#include "util_sgroup.fh"
838      util_sgroup_depth = depth
839      return
840      end
841C###############################################################################
842C>
843C> \brief Set the file name for standard output for this subgroup
844C>
845      integer function util_sgroup_set_ioname(filename)
846      implicit none
847      character*(*) filename !< [Input] The base file name
848
849#include "stdio.fh"
850#include "util_sgroup.fh"
851
852      logical       found
853      character*255 full_filename
854      character ch_tmp
855
856#ifdef USE_SUBGROUPS
857      LuOut = mygroup(depth)+200
858
859      call util_file_name_noprefix(filename,.false.,.false.,
860     >                             full_filename)
861
862      inquire(file=full_filename,exist=found)
863*     **** FILE already exists - parse to EOF ****
864      if (found) then
865         open(unit=LuOut,file=full_filename,form='formatted',
866     >             status='old')
867         do while(.true.)
868            read(LuOut,*,ERR=30,END=30) ch_tmp
869         end do
870 30      continue
871#if defined(FUJITSU_SOLARIS) || defined(PSCALE) || defined(__crayx1)
872         backspace LuOut
873#endif
874*     **** FILE does not exist ****
875      else
876         open(unit=LuOut,file=full_filename,form='formatted')
877      end if
878      util_sgroup_set_ioname = LuOut
879#else
880      util_sgroup_set_ioname = -1
881#endif
882      return
883      end
884C###############################################################################
885C>
886C> \brief Switch standard output back to the default Fortran scenario
887C>
888      integer function util_sgroup_unset_io()
889      implicit none
890#include "stdio.fh"
891#include "util_sgroup.fh"
892
893#ifdef USE_SUBGROUPS
894      close(LuOut)
895      LuOut    = 6
896      util_sgroup_unset_io = LuOut
897#else
898      util_sgroup_unset_io = -1
899#endif
900      return
901      end
902C###############################################################################
903C>
904C> @}
905