1#if HAVE_CONFIG_H
2#   include "config.fh"
3#endif
4      subroutine shuffle
5#include "common.fh"
6c
7      integer snode,rnode,i,inx,iny,inz,pnum,idx,idy,idz,ipx,ipy,ipz
8      integer icnt,inode,me
9c
10c   This subroutine shuffles the coordinates on each of the processors
11c   around so that each processor has the coordinates for atoms residing
12c   in the physical domain corresponding to that processor. This routine
13c   assumes that atoms are completely randomized.
14c
15      pnum = ga_nnodes()
16      me = ga_nodeid()
17      call factor(pnum,idx,idy,idz)
18      call i_proc_to_xyz(me,ipx,ipy,ipz,idx,idy,idz)
19c
20c   copy all particles into the buffer
21c
22      icnt = 0
23      do i = 1, antot
24        icnt = icnt + 1
25        ra(i,1,6) = ra(i,1,1)
26        ra(i,2,6) = ra(i,2,1)
27        ra(i,3,6) = ra(i,3,1)
28        xcrd(icnt) = ra(i,1,6)
29        ycrd(icnt) = ra(i,2,6)
30        zcrd(icnt) = ra(i,3,6)
31        xfrc(icnt) = ra(i,1,2)
32        yfrc(icnt) = ra(i,2,2)
33        zfrc(icnt) = ra(i,3,2)
34        mbuf(icnt) = mass(i)
35        bidx(icnt) = aidx(i)
36        bat(icnt)  = at(i)
37      end do
38      btot = antot
39      antot = 0
40c
41c   send buffers to processes controlling neighboring domains,
42c   and select out particles that are within that domain
43c
44c   send along x-axis and gather particles whose x value lies
45c   between xmin and xmax
46c
47      call cull(1)
48      do i = 1, idx-1
49        inx = ipx - 1
50        if (inx.eq.-1) inx = idx - 1
51        call i_xyz_to_proc(rnode,inx,ipy,ipz,idx,idy,idz)
52        call exchange_buf(rnode)
53        call cull(1)
54      end do
55c
56c   copy all particles into the buffer
57c
58      icnt = 0
59      do i = 1, antot
60        icnt = icnt + 1
61        xcrd(icnt) = ra(i,1,6)
62        ycrd(icnt) = ra(i,2,6)
63        zcrd(icnt) = ra(i,3,6)
64        xfrc(icnt) = ra(i,1,2)
65        yfrc(icnt) = ra(i,2,2)
66        zfrc(icnt) = ra(i,3,2)
67        mbuf(icnt) = mass(i)
68        bidx(icnt) = aidx(i)
69        bat(icnt)  = at(i)
70      end do
71      btot = antot
72      antot = 0
73c
74c   send along y-axis and gather particles whose y value lies
75c   between ymin and ymax
76c
77      call cull(2)
78      do i = 1, idy-1
79        iny = ipy - 1
80        if (iny.eq.-1) iny = idy - 1
81        call i_xyz_to_proc(rnode,ipx,iny,ipz,idx,idy,idz)
82        call exchange_buf(rnode)
83        call cull(2)
84      end do
85c
86c   copy all particles into the buffer
87c
88      icnt = 0
89      do i = 1, antot
90        icnt = icnt + 1
91        xcrd(icnt) = ra(i,1,6)
92        ycrd(icnt) = ra(i,2,6)
93        zcrd(icnt) = ra(i,3,6)
94        xfrc(icnt) = ra(i,1,2)
95        yfrc(icnt) = ra(i,2,2)
96        zfrc(icnt) = ra(i,3,2)
97        mbuf(icnt) = mass(i)
98        bidx(icnt) = aidx(i)
99        bat(icnt)  = at(i)
100      end do
101      btot = antot
102      antot = 0
103c
104c   send along z-axis and gather particles whose z value lies
105c   between zmin and zmax
106c
107      call cull(3)
108      do i = 1, idz-1
109        inz = ipz - 1
110        if (inz.eq.-1) inz = idz - 1
111        call i_xyz_to_proc(rnode,ipx,ipy,inz,idx,idy,idz)
112        call exchange_buf(rnode)
113        call cull(3)
114      end do
115c
116c   rearrange data stack so that they are ordered with respect to
117c   atom index
118c
119      call heapsort(0)
120      call fixper
121c
122      return
123      end
124c
125      subroutine exchange_buf(rnode)
126#include "common.fh"
127c
128      double precision buf(3,MAXAT), rbuf(MAXAT)
129      integer i, ibuf(MAXAT)
130      integer rnode, rptr, rtot, me, one, ld
131c
132c   exchange the size of lists
133c
134      me = ga_nodeid()
135      one = 1
136      ld = 3
137c
138      do i = 1, btot
139        buf(1,i) = xcrd(i)
140        buf(2,i) = ycrd(i)
141        buf(3,i) = zcrd(i)
142      end do
143c
144      rptr = gsize_lo(me)
145      call nga_put(g_size,rptr,rptr,btot,one)
146      gcoords_hi(1) = 3
147      gcoords_hi(2) = gcoords_lo(2,me) + btot - 1
148      if (btot.gt.0) call nga_put(g_coords,gcoords_lo(1,me),
149     +                            gcoords_hi,buf,ld)
150      call ga_sync()
151      rptr = gsize_lo(rnode)
152      call nga_get(g_size,rptr,rptr,rtot,one)
153      gcoords_hi(2) = gcoords_lo(2,rnode) + rtot - 1
154      if (rtot.gt.0) call nga_get(g_coords,gcoords_lo(1,rnode),
155     +                            gcoords_hi,buf,ld)
156c
157      do i = 1, rtot
158        xcrd(i) = buf(1,i)
159        ycrd(i) = buf(2,i)
160        zcrd(i) = buf(3,i)
161      end do
162      call ga_sync()
163c
164      do i = 1, btot
165        buf(1,i) = xfrc(i)
166        buf(2,i) = yfrc(i)
167        buf(3,i) = zfrc(i)
168      end do
169c
170      gcoords_hi(1) = 3
171      gcoords_hi(2) = gcoords_lo(2,me) + btot - 1
172      if (btot.gt.0) call nga_put(g_coords,gcoords_lo(1,me),
173     +                            gcoords_hi,buf,ld)
174      call ga_sync()
175      gcoords_hi(2) = gcoords_lo(2,rnode) + rtot - 1
176      if (rtot.gt.0) call nga_get(g_coords,gcoords_lo(1,rnode),
177     +                            gcoords_hi,buf,ld)
178c
179      do i = 1, rtot
180        xfrc(i) = buf(1,i)
181        yfrc(i) = buf(2,i)
182        zfrc(i) = buf(3,i)
183      end do
184      call ga_sync()
185c
186      do i = 1, btot
187        rbuf(i) = mbuf(i)
188      end do
189c
190      grvec_hi = grvec_lo(me) + btot - 1
191      if (btot.gt.0) call nga_put(g_rvec,grvec_lo(me),grvec_hi,rbuf,ld)
192      call ga_sync()
193      grvec_hi = grvec_lo(rnode) + rtot - 1
194      if (rtot.gt.0) call nga_get(g_rvec,grvec_lo(rnode),
195     +                            grvec_hi,rbuf,ld)
196c
197      do i = 1, rtot
198        mbuf(i) = rbuf(i)
199      end do
200      call ga_sync()
201c
202      do i = 1, btot
203        ibuf(i) = bidx(i)
204      end do
205c
206      gindex_hi = gindex_lo(me) + btot - 1
207      if (btot.gt.0) call nga_put(g_index,gindex_lo(me),
208     +                            gindex_hi,ibuf,ld)
209      call ga_sync()
210      gindex_hi = gindex_lo(rnode) + rtot - 1
211      if (rtot.gt.0) call nga_get(g_index,gindex_lo(rnode),
212     +                            gindex_hi,ibuf,ld)
213c
214      do i = 1, rtot
215        bidx(i) = ibuf(i)
216      end do
217      call ga_sync()
218c
219      do i = 1, btot
220        ibuf(i) = bat(i)
221      end do
222c
223      gindex_hi = gindex_lo(me) + btot - 1
224      if (btot.gt.0) call nga_put(g_index,gindex_lo(me),gindex_hi,
225     +                            ibuf,ld)
226      call ga_sync()
227      gindex_hi = gindex_lo(rnode) + rtot - 1
228      if (rtot.gt.0) call nga_get(g_index,gindex_lo(rnode),
229     +                            gindex_hi,ibuf,ld)
230c
231      do i = 1, rtot
232        bat(i) = ibuf(i)
233      end do
234      call ga_sync()
235c
236      btot = rtot
237c
238      return
239      end
240c
241      subroutine cull(iflg)
242#include "common.fh"
243c
244      double precision xmax,ymax,zmax,xmin,ymin,zmin
245      double precision xt,yt,zt
246      integer i,pnum,me,idx,idy,idz,ipx,ipy,ipz
247      integer icnt,iflg
248      logical xflg,yflg,zflg
249c
250c   This subroutine gathers all the particles in the buffer arrays that
251c   have coordinates lying in the domain corresponding to the processor
252c   and puts them in the regular particle arrays
253c
254c   Determine boundaries of the physical domain assigned to the processor
255c
256      pnum = ga_nnodes()
257      me = ga_nodeid()
258      call factor(pnum,idx,idy,idz)
259      call i_proc_to_xyz(me,ipx,ipy,ipz,idx,idy,idz)
260c
261c  set logical flags
262c
263      if (iflg.eq.1) then
264        xflg = .true.
265        yflg = .false.
266        zflg = .false.
267      elseif (iflg.eq.2) then
268        xflg = .false.
269        yflg = .true.
270        zflg = .false.
271      elseif (iflg.eq.3) then
272        xflg = .false.
273        yflg = .false.
274        zflg = .true.
275      else
276        call ga_error("Illegal direction in subroutine cull",iflg )
277      endif
278c
279      xmax = xbox*dble(ipx+1)/dble(idx)
280      ymax = ybox*dble(ipy+1)/dble(idy)
281      zmax = zbox*dble(ipz+1)/dble(idz)
282      xmin = xbox*dble(ipx)/dble(idx)
283      ymin = ybox*dble(ipy)/dble(idy)
284      zmin = zbox*dble(ipz)/dble(idz)
285      xmax = xmax - xbox2
286      ymax = ymax - ybox2
287      zmax = zmax - zbox2
288      xmin = xmin - xbox2
289      ymin = ymin - ybox2
290      zmin = zmin - zbox2
291c
292c   Locate all particles on the processor that should reside
293c   on the processor and move all others to the buffer
294c
295      icnt = 0
296      do i = 1, btot
297        xt = xcrd(i) - xbox * anint(xcrd(i)/xbox)
298        yt = ycrd(i) - ybox * anint(ycrd(i)/ybox)
299        zt = zcrd(i) - zbox * anint(zcrd(i)/zbox)
300        if (xt.eq.xbox2) xt = -xbox2
301        if (yt.eq.ybox2) yt = -ybox2
302        if (zt.eq.zbox2) zt = -zbox2
303        if ((xflg.and.xt.lt.xmax.and.xt.ge.xmin).or.
304     +      (yflg.and.yt.lt.ymax.and.yt.ge.ymin).or.
305     +      (zflg.and.zt.lt.zmax.and.zt.ge.zmin)) then
306          antot = antot + 1
307          ra(antot,1,6) = xcrd(i)
308          ra(antot,2,6) = ycrd(i)
309          ra(antot,3,6) = zcrd(i)
310          ra(antot,1,2) = xfrc(i)
311          ra(antot,2,2) = yfrc(i)
312          ra(antot,3,2) = zfrc(i)
313          mass(antot) = mbuf(i)
314          aidx(antot) = bidx(i)
315          at(antot) = bat(i)
316        else
317          icnt = icnt + 1
318          xcrd(icnt) = xcrd(i)
319          ycrd(icnt) = ycrd(i)
320          zcrd(icnt) = zcrd(i)
321          xfrc(icnt) = xfrc(i)
322          yfrc(icnt) = yfrc(i)
323          zfrc(icnt) = zfrc(i)
324          mbuf(icnt) = mbuf(i)
325          bidx(icnt) = bidx(i)
326          bat(icnt)  = bat(i)
327        endif
328      end do
329      btot = icnt
330c      if (btot.gt.MAXAT) then
331c        call ga_error("btot greater than MAXAT in cull",btot)
332c      endif
333c
334      return
335      end
336