1 subroutine argos_space_dldbal(stime,syntim,waltim,nod,nlst, 2 + nen,lbbl,dlb,lpsyn,ibindx,iburen,npp) 3c 4 implicit none 5c 6#include "argos_space_common.fh" 7#include "mafdecls.fh" 8#include "msgids.fh" 9#include "bitops.fh" 10c 11 integer argos_space_btop 12 external argos_space_btop 13c 14 integer npp 15 integer nod(np),nlst(npp,2),nen(np,mbbl) 16 integer lbbl(mbbl,mbb2) 17 real*8 stime,syntim,synsum,waltim,dlb(*),factor,facx,facy,facz 18 logical lpsyn 19 logical lbsize 20c 21 integer i,j,k,n,ibusy,least,iproc,icell,jcell,itemp,jtemp 22 integer intsiz,ibindx(np),iburen(np,27,2) 23 logical lnb(27) 24 integer nfr,nto,node 25c 26c no load balancing needed if run on single processor 27c 28 if(np.eq.1) return 29c 30c collect the synchronization times converted into integers 31c 32 do 1 i=1,np 33 nlst(i,1)=0 34 nlst(i,2)=0 35 nod(i)=i-1 36 1 continue 37 nlst(me+1,1)=int(1.0d6*syntim) 38 nlst(npp,1)=int(1.0d6*waltim) 39 nlst(npp,2)=0 40c 41 do 2 i=1,nbbl 42 iproc=lbbl(i,1) 43 if(iproc.ne.me) then 44 n=ibindx(iproc+1) 45 if(n.gt.0) nlst(me+1,2)=ior(nlst(me+1,2),2**(n-1)) 46 endif 47 2 continue 48c 49c get the size of an integer on this machine in bits 50c 51 intsiz=ma_sizeof(mt_int,1,mt_byte) 52c 53c if(intsiz.le.32) then 54 call ga_igop(msp_14,nlst,2*npp,'+') 55c else 56c do 3 i=1,np 57c nlst(i,1)=lshift(nlst(i,1),32)+nlst(i,2) 58c 3 continue 59c call ga_igop(msp_14,nlst,np,'+') 60c do 4 i=1,np 61c nlst(i,2)=iand(nlst(i,1),8589934591) 62c nlst(i,1)=rshift(nlst(i,1),32) 63c 4 continue 64c endif 65c 66 synsum=0.0d0 67 do 5 i=1,np 68 dlb(i)=dble(nlst(i,1))*1.0d-6 69 synsum=synsum+dlb(i) 70 5 continue 71 waltim=(dble(nlst(npp,1))*1.0d-6/dble(np)) 72c 73 if(nldup.lt.0) then 74 tslow=synsum 75 nldup=nldup+1 76 else 77 if(synsum.gt.tslow) then 78 nldup=nldup+1 79 else 80 tslow=synsum 81 nldup=0 82 endif 83 endif 84c 85c if the accumulated synchronization time is less than 0.0001 sec 86c no load balancing is needed 87c 88 if(synsum.lt.1.0d-4) return 89c 90c determine the busiest processor 91c if the busiest processor has no off-processor box-box pairs 92c load balancing will be done by resizing 93c 94 ibusy=1 95 do 6 i=2,np 96 if(abs(dlb(i)).lt.abs(dlb(ibusy))) ibusy=i 97 6 continue 98c 99c if the busiest processor has no off-processor pairs in the 100c cell-cell list loadbalancing will be done by resizing 101c 102 lbsize=nlst(ibusy,2).eq.0 103 if(nldup.ge.lbpair) lbsize=.true. 104c 105c order node list 106c 107 do 7 i=1,np-1 108 do 8 j=i+1,np 109 if(dlb(nod(i)+1).gt.dlb(nod(j)+1)) then 110 n=nod(i) 111 nod(i)=nod(j) 112 nod(j)=n 113 endif 114 8 continue 115 7 continue 116c 117c load balancing based on cell resizing 118c 119c 1 if specified as only method in input 120c 2. if busiest processor has no off-processor cell-cell pairs (lbsize true) 121c 3. if busiest alternates between two processors (lpipo true) 122c 123 if(loadb.eq.1.or.(loadb.eq.3.and.(lbsize.or.lpipo))) then 124 factor=(1.0d0-factld*(synsum/dble(np)-dlb(ibusy))/waltim)**third 125 facx=1.0d0 126 facy=1.0d0 127 facz=1.0d0 128 call argos_space_resize(ibusy-1,int_mb(i_iown), 129 + dbl_mb(i_boxs),factor,facx,facy,facz) 130 if(me.eq.0.and.lpsyn) then 131 write(lfnsyn,3000) 1,stime,synsum,waltim,ibusy-1, 132 + factor,facx,facy,facz,lpipo 133 3000 format('synchronization',/,i5,3f12.6,/,i5,4f12.6,4x,l1) 134 do 9 i=1,np 135 write(lfnsyn,3001) nod(i),dlb(nod(i)+1) 136 3001 format(i5,f12.6) 137 9 continue 138 call argos_space_wrtbxsz(lfnsyn,dbl_mb(i_boxs)) 139 endif 140 ipairf=-3 141 ipairt=-4 142 lpipo=.false. 143 tsyncp=synsum 144 nldup=0 145 tslow=synsum 146 return 147 endif 148c 149c cascade implementation 150c 151 if(me.eq.0) then 152c 153 do 12 i=1,np 154 nen(i,1)=-1 155 12 continue 156c 157c for each processor find least busy neighbor in cell-cell list 158c 159 do 10 i=1,np 160 node=nod(i) 161 n=nlst(node+1,2) 162 do 11 j=1,27 163 lnb(j)=iand(n,1).eq.1 164 n=rshift(n,1) 165 11 continue 166 least=-1 167 do 13 j=1,27 168 iproc=iburen(node+1,j,1) 169 if(nen(iproc+1,1).eq.-1) then 170 if(iproc.ne.node.and.lnb(j)) then 171 if(least.ge.0) then 172 if(dlb(iproc+1).gt.dlb(least+1)) least=iproc 173 else 174 if(dlb(iproc+1).gt.dlb(node+1)) least=iproc 175 endif 176 endif 177 endif 178 13 continue 179c 180c nen(i,1) contains processor id that processor i-1 will receive from 181c 182 if(least.ge.0) then 183 if(nen(least+1,1).lt.0) then 184 nen(least+1,1)=node 185 endif 186 endif 187c 188 10 continue 189c 190 endif 191c 192c 193c broadcast the list 194c 195 call ga_brdcst(msp_16,nen(1,1),np*intsiz,0) 196c 197c determine cell pair transfer list 198c 199c the list contains the box-box pairs to be moved as follows: 200c 201c nen(i,1) = ibox+1 202c nen(1,2) = jbox+1 203c 204c the pair will move from iproc to jproc 205c 206 do 14 i=1,np 207 if(me.eq.nen(i,1)) then 208 do 15 j=1,nbbl 209 if(lbbl(j,1).eq.i-1) then 210 nen(i,1)=lbbl(j,3)+1 211 nen(i,2)=lbbl(j,2)+1 212 goto 14 213 endif 214 15 continue 215 else 216 nen(i,1)=0 217 nen(i,2)=0 218 endif 219 14 continue 220c 221 call ga_igop(msp_17,nen,2*np,'+') 222c 223c cascading 224c 225 call argos_space_cascad(nen,int_mb(i_bb)) 226c 227 if(me.eq.0.and.lpsyn) then 228 write(lfnsyn,3002) 2,stime,synsum,waltim,nldup,tslow 229 3002 format('synchronization',/,i5,3f12.6,i5,f12.6) 230 do 16 i=1,np 231 write(lfnsyn,3003) nod(i),dlb(nod(i)+1) 232 3003 format(i5,f12.6,i7,i5) 233 16 continue 234 endif 235c 236 return 237 end 238c $Id$ 239