1#if HAVE_CONFIG_H 2# include "config.fh" 3#endif 4 subroutine shuffle 5#include "common.fh" 6c 7 integer snode,rnode,i,inx,iny,inz,pnum,idx,idy,idz,ipx,ipy,ipz 8 integer icnt,inode,me 9c 10c This subroutine shuffles the coordinates on each of the processors 11c around so that each processor has the coordinates for atoms residing 12c in the physical domain corresponding to that processor. This routine 13c assumes that atoms are completely randomized. 14c 15 pnum = ga_nnodes() 16 me = ga_nodeid() 17 call factor(pnum,idx,idy,idz) 18 call i_proc_to_xyz(me,ipx,ipy,ipz,idx,idy,idz) 19c 20c copy all particles into the buffer 21c 22 icnt = 0 23 do i = 1, antot 24 icnt = icnt + 1 25 ra(i,1,6) = ra(i,1,1) 26 ra(i,2,6) = ra(i,2,1) 27 ra(i,3,6) = ra(i,3,1) 28 xcrd(icnt) = ra(i,1,6) 29 ycrd(icnt) = ra(i,2,6) 30 zcrd(icnt) = ra(i,3,6) 31 xfrc(icnt) = ra(i,1,2) 32 yfrc(icnt) = ra(i,2,2) 33 zfrc(icnt) = ra(i,3,2) 34 mbuf(icnt) = mass(i) 35 bidx(icnt) = aidx(i) 36 bat(icnt) = at(i) 37 end do 38 btot = antot 39 antot = 0 40c 41c send buffers to processes controlling neighboring domains, 42c and select out particles that are within that domain 43c 44c send along x-axis and gather particles whose x value lies 45c between xmin and xmax 46c 47 call cull(1) 48 do i = 1, idx-1 49 inx = ipx - 1 50 if (inx.eq.-1) inx = idx - 1 51 call i_xyz_to_proc(rnode,inx,ipy,ipz,idx,idy,idz) 52 call exchange_buf(rnode) 53 call cull(1) 54 end do 55c 56c copy all particles into the buffer 57c 58 icnt = 0 59 do i = 1, antot 60 icnt = icnt + 1 61 xcrd(icnt) = ra(i,1,6) 62 ycrd(icnt) = ra(i,2,6) 63 zcrd(icnt) = ra(i,3,6) 64 xfrc(icnt) = ra(i,1,2) 65 yfrc(icnt) = ra(i,2,2) 66 zfrc(icnt) = ra(i,3,2) 67 mbuf(icnt) = mass(i) 68 bidx(icnt) = aidx(i) 69 bat(icnt) = at(i) 70 end do 71 btot = antot 72 antot = 0 73c 74c send along y-axis and gather particles whose y value lies 75c between ymin and ymax 76c 77 call cull(2) 78 do i = 1, idy-1 79 iny = ipy - 1 80 if (iny.eq.-1) iny = idy - 1 81 call i_xyz_to_proc(rnode,ipx,iny,ipz,idx,idy,idz) 82 call exchange_buf(rnode) 83 call cull(2) 84 end do 85c 86c copy all particles into the buffer 87c 88 icnt = 0 89 do i = 1, antot 90 icnt = icnt + 1 91 xcrd(icnt) = ra(i,1,6) 92 ycrd(icnt) = ra(i,2,6) 93 zcrd(icnt) = ra(i,3,6) 94 xfrc(icnt) = ra(i,1,2) 95 yfrc(icnt) = ra(i,2,2) 96 zfrc(icnt) = ra(i,3,2) 97 mbuf(icnt) = mass(i) 98 bidx(icnt) = aidx(i) 99 bat(icnt) = at(i) 100 end do 101 btot = antot 102 antot = 0 103c 104c send along z-axis and gather particles whose z value lies 105c between zmin and zmax 106c 107 call cull(3) 108 do i = 1, idz-1 109 inz = ipz - 1 110 if (inz.eq.-1) inz = idz - 1 111 call i_xyz_to_proc(rnode,ipx,ipy,inz,idx,idy,idz) 112 call exchange_buf(rnode) 113 call cull(3) 114 end do 115c 116c rearrange data stack so that they are ordered with respect to 117c atom index 118c 119 call heapsort(0) 120 call fixper 121c 122 return 123 end 124c 125 subroutine exchange_buf(rnode) 126#include "common.fh" 127c 128 double precision buf(3,MAXAT), rbuf(MAXAT) 129 integer i, ibuf(MAXAT) 130 integer rnode, rptr, rtot, me, one, ld 131c 132c exchange the size of lists 133c 134 me = ga_nodeid() 135 one = 1 136 ld = 3 137c 138 do i = 1, btot 139 buf(1,i) = xcrd(i) 140 buf(2,i) = ycrd(i) 141 buf(3,i) = zcrd(i) 142 end do 143c 144 rptr = gsize_lo(me) 145 call nga_put(g_size,rptr,rptr,btot,one) 146 gcoords_hi(1) = 3 147 gcoords_hi(2) = gcoords_lo(2,me) + btot - 1 148 if (btot.gt.0) call nga_put(g_coords,gcoords_lo(1,me), 149 + gcoords_hi,buf,ld) 150 call ga_sync() 151 rptr = gsize_lo(rnode) 152 call nga_get(g_size,rptr,rptr,rtot,one) 153 gcoords_hi(2) = gcoords_lo(2,rnode) + rtot - 1 154 if (rtot.gt.0) call nga_get(g_coords,gcoords_lo(1,rnode), 155 + gcoords_hi,buf,ld) 156c 157 do i = 1, rtot 158 xcrd(i) = buf(1,i) 159 ycrd(i) = buf(2,i) 160 zcrd(i) = buf(3,i) 161 end do 162 call ga_sync() 163c 164 do i = 1, btot 165 buf(1,i) = xfrc(i) 166 buf(2,i) = yfrc(i) 167 buf(3,i) = zfrc(i) 168 end do 169c 170 gcoords_hi(1) = 3 171 gcoords_hi(2) = gcoords_lo(2,me) + btot - 1 172 if (btot.gt.0) call nga_put(g_coords,gcoords_lo(1,me), 173 + gcoords_hi,buf,ld) 174 call ga_sync() 175 gcoords_hi(2) = gcoords_lo(2,rnode) + rtot - 1 176 if (rtot.gt.0) call nga_get(g_coords,gcoords_lo(1,rnode), 177 + gcoords_hi,buf,ld) 178c 179 do i = 1, rtot 180 xfrc(i) = buf(1,i) 181 yfrc(i) = buf(2,i) 182 zfrc(i) = buf(3,i) 183 end do 184 call ga_sync() 185c 186 do i = 1, btot 187 rbuf(i) = mbuf(i) 188 end do 189c 190 grvec_hi = grvec_lo(me) + btot - 1 191 if (btot.gt.0) call nga_put(g_rvec,grvec_lo(me),grvec_hi,rbuf,ld) 192 call ga_sync() 193 grvec_hi = grvec_lo(rnode) + rtot - 1 194 if (rtot.gt.0) call nga_get(g_rvec,grvec_lo(rnode), 195 + grvec_hi,rbuf,ld) 196c 197 do i = 1, rtot 198 mbuf(i) = rbuf(i) 199 end do 200 call ga_sync() 201c 202 do i = 1, btot 203 ibuf(i) = bidx(i) 204 end do 205c 206 gindex_hi = gindex_lo(me) + btot - 1 207 if (btot.gt.0) call nga_put(g_index,gindex_lo(me), 208 + gindex_hi,ibuf,ld) 209 call ga_sync() 210 gindex_hi = gindex_lo(rnode) + rtot - 1 211 if (rtot.gt.0) call nga_get(g_index,gindex_lo(rnode), 212 + gindex_hi,ibuf,ld) 213c 214 do i = 1, rtot 215 bidx(i) = ibuf(i) 216 end do 217 call ga_sync() 218c 219 do i = 1, btot 220 ibuf(i) = bat(i) 221 end do 222c 223 gindex_hi = gindex_lo(me) + btot - 1 224 if (btot.gt.0) call nga_put(g_index,gindex_lo(me),gindex_hi, 225 + ibuf,ld) 226 call ga_sync() 227 gindex_hi = gindex_lo(rnode) + rtot - 1 228 if (rtot.gt.0) call nga_get(g_index,gindex_lo(rnode), 229 + gindex_hi,ibuf,ld) 230c 231 do i = 1, rtot 232 bat(i) = ibuf(i) 233 end do 234 call ga_sync() 235c 236 btot = rtot 237c 238 return 239 end 240c 241 subroutine cull(iflg) 242#include "common.fh" 243c 244 double precision xmax,ymax,zmax,xmin,ymin,zmin 245 double precision xt,yt,zt 246 integer i,pnum,me,idx,idy,idz,ipx,ipy,ipz 247 integer icnt,iflg 248 logical xflg,yflg,zflg 249c 250c This subroutine gathers all the particles in the buffer arrays that 251c have coordinates lying in the domain corresponding to the processor 252c and puts them in the regular particle arrays 253c 254c Determine boundaries of the physical domain assigned to the processor 255c 256 pnum = ga_nnodes() 257 me = ga_nodeid() 258 call factor(pnum,idx,idy,idz) 259 call i_proc_to_xyz(me,ipx,ipy,ipz,idx,idy,idz) 260c 261c set logical flags 262c 263 if (iflg.eq.1) then 264 xflg = .true. 265 yflg = .false. 266 zflg = .false. 267 elseif (iflg.eq.2) then 268 xflg = .false. 269 yflg = .true. 270 zflg = .false. 271 elseif (iflg.eq.3) then 272 xflg = .false. 273 yflg = .false. 274 zflg = .true. 275 else 276 call ga_error("Illegal direction in subroutine cull",iflg ) 277 endif 278c 279 xmax = xbox*dble(ipx+1)/dble(idx) 280 ymax = ybox*dble(ipy+1)/dble(idy) 281 zmax = zbox*dble(ipz+1)/dble(idz) 282 xmin = xbox*dble(ipx)/dble(idx) 283 ymin = ybox*dble(ipy)/dble(idy) 284 zmin = zbox*dble(ipz)/dble(idz) 285 xmax = xmax - xbox2 286 ymax = ymax - ybox2 287 zmax = zmax - zbox2 288 xmin = xmin - xbox2 289 ymin = ymin - ybox2 290 zmin = zmin - zbox2 291c 292c Locate all particles on the processor that should reside 293c on the processor and move all others to the buffer 294c 295 icnt = 0 296 do i = 1, btot 297 xt = xcrd(i) - xbox * anint(xcrd(i)/xbox) 298 yt = ycrd(i) - ybox * anint(ycrd(i)/ybox) 299 zt = zcrd(i) - zbox * anint(zcrd(i)/zbox) 300 if (xt.eq.xbox2) xt = -xbox2 301 if (yt.eq.ybox2) yt = -ybox2 302 if (zt.eq.zbox2) zt = -zbox2 303 if ((xflg.and.xt.lt.xmax.and.xt.ge.xmin).or. 304 + (yflg.and.yt.lt.ymax.and.yt.ge.ymin).or. 305 + (zflg.and.zt.lt.zmax.and.zt.ge.zmin)) then 306 antot = antot + 1 307 ra(antot,1,6) = xcrd(i) 308 ra(antot,2,6) = ycrd(i) 309 ra(antot,3,6) = zcrd(i) 310 ra(antot,1,2) = xfrc(i) 311 ra(antot,2,2) = yfrc(i) 312 ra(antot,3,2) = zfrc(i) 313 mass(antot) = mbuf(i) 314 aidx(antot) = bidx(i) 315 at(antot) = bat(i) 316 else 317 icnt = icnt + 1 318 xcrd(icnt) = xcrd(i) 319 ycrd(icnt) = ycrd(i) 320 zcrd(icnt) = zcrd(i) 321 xfrc(icnt) = xfrc(i) 322 yfrc(icnt) = yfrc(i) 323 zfrc(icnt) = zfrc(i) 324 mbuf(icnt) = mbuf(i) 325 bidx(icnt) = bidx(i) 326 bat(icnt) = bat(i) 327 endif 328 end do 329 btot = icnt 330c if (btot.gt.MAXAT) then 331c call ga_error("btot greater than MAXAT in cull",btot) 332c endif 333c 334 return 335 end 336