1! 2! Copyright (C) 2001-2013 Quantum ESPRESSO group 3! This file is distributed under the terms of the 4! GNU General Public License. See the file `License' 5! in the root directory of the present distribution, 6! or http://www.gnu.org/copyleft/gpl.txt . 7! 8! 9 10 MODULE mp_wave_parallel 11 12 IMPLICIT NONE 13 SAVE 14 15 CONTAINS 16 17 SUBROUTINE mergewfp ( npw,pw, pwt, ngwl, ig_l2g, mpime, nproc, root, comm ) 18 19! ... This subroutine merges the pieces of a wave functions (pw) splitted across 20! ... processors into a total wave function (pwt) containing al the components 21! ... in a pre-defined order (the same as if only one processor is used) 22 23 USE kinds 24 USE parallel_include 25 USE io_global, ONLY :stdout 26 27 28 IMPLICIT NONE 29 30 INTEGER, INTENT(in) :: npw,ngwl 31 COMPLEX(DP), intent(in) :: PW(npw,nproc) 32 COMPLEX(DP), intent(out) :: PWT(:) 33 INTEGER, INTENT(IN) :: mpime ! index of the calling processor ( starting from 0 ) 34 INTEGER, INTENT(IN) :: nproc ! number of processors 35 INTEGER, INTENT(IN) :: root ! root processor ( the one that should receive the data ) 36 INTEGER, INTENT(IN) :: comm ! communicator 37 INTEGER, INTENT(IN) :: ig_l2g(:) 38 39 40 41 INTEGER, ALLOCATABLE :: ig_ip(:) 42 COMPLEX(DP), ALLOCATABLE :: pw_ip(:) 43 44 45 INTEGER :: ierr, i, ip, ngw_ip, ngw_lmax, itmp, igwx, gid, req 46 47#if defined __MPI 48 INTEGER :: istatus(MPI_STATUS_SIZE) 49#endif 50 51 INTEGER :: iorig, idest 52 53 54 55 56! 57! ... Subroutine Body 58! 59 60 igwx = MAXVAL( ig_l2g(1:ngwl) ) 61 62#if defined __MPI 63 64 gid = comm 65 66! ... Get local and global wavefunction dimensions 67 CALL MPI_ALLREDUCE( ngwl, ngw_lmax, 1, MPI_INTEGER, MPI_MAX, gid, IERR ) 68 CALL MPI_ALLREDUCE( igwx, itmp, 1, MPI_INTEGER, MPI_MAX, gid, IERR ) 69 igwx = itmp 70 71#endif 72 73 IF( igwx > SIZE( pwt ) ) & 74 CALL errore(' mergewf ',' wrong size for pwt ',SIZE(pwt) ) 75 76#if defined __MPI 77 ALLOCATE(ig_ip(ngw_lmax)) 78 ALLOCATE(pw_ip(ngw_lmax)) 79 80 do ip = 0, nproc-1 81 82 if( ip/=0) then 83 84! ... In turn each processors send to root the wave components and their indexes in the 85! ... global array 86 idest=mpime+ip 87 if(idest>nproc-1)idest=idest-nproc 88 89 iorig=mpime-ip 90 if(iorig<0)iorig=iorig+nproc 91 92 93 CALL MPI_ISEND( ig_l2g, ngwl, MPI_INTEGER, idest, IP, gid, req,IERR ) 94 95 96 CALL MPI_RECV( ig_ip, ngw_lmax, MPI_INTEGER, iorig, IP, gid, istatus, IERR ) 97 98 CALL MPI_WAIT(req,istatus,ierr) 99 100 CALL MPI_ISEND( pw(1,idest+1), ngwl, MPI_DOUBLE_COMPLEX, idest, IP, gid, req,IERR ) 101 102 103 CALL MPI_RECV( pw_ip, ngw_lmax, MPI_DOUBLE_COMPLEX, iorig, IP, gid, istatus, IERR ) 104 CALL MPI_GET_COUNT( istatus, MPI_DOUBLE_COMPLEX, ngw_ip, ierr ) 105 106 CALL MPI_WAIT(req,istatus,ierr) 107 108 109 DO I = 1, ngw_ip 110 PWT(ig_ip(i)) = pw_ip(i) 111 END DO 112 113 114 115 ELSE 116 117 118 DO I = 1, ngwl 119 PWT(ig_l2g(i)) = pw(i,mpime+1) 120 END DO 121 END IF 122 123 124 125 CALL MPI_BARRIER( gid, IERR ) 126 127 END DO 128 129 DEALLOCATE(ig_ip) 130 DEALLOCATE(pw_ip) 131 132 133 134#elif ! defined __MPI 135 136 DO I = 1, ngwl 137 PWT( ig_l2g(i) ) = pw(i,1) 138 END DO 139 140#else 141 142 CALL errore(' MERGEWF ',' no communication protocol ',0) 143 144#endif 145 146 RETURN 147 END SUBROUTINE mergewfp 148 149 SUBROUTINE splitwfp (npw, pw, pwt, ngwl, ig_l2g, mpime, nproc,root, comm ) 150 151! ... This subroutine splits a total wave function (pwt) containing al the components 152! ... in a pre-defined order (the same as if only one processor is used), across 153! ... processors (pw). 154 155 USE kinds 156 USE parallel_include 157 USE io_global, ONLY : stdout 158 IMPLICIT NONE 159 160 INTEGER, INTENT(in) :: npw,nproc 161 COMPLEX(DP), INTENT(OUT) :: PW(npw,nproc) 162 COMPLEX(DP), INTENT(IN) :: PWT(:) 163 INTEGER, INTENT(IN) :: mpime, root 164 INTEGER, INTENT(IN) :: comm ! communicator 165 INTEGER, INTENT(IN) :: ig_l2g(:) 166 INTEGER, INTENT(IN) :: ngwl 167 168 INTEGER, ALLOCATABLE :: ig_ip(:) 169 COMPLEX(DP), ALLOCATABLE :: pw_ip(:) 170 171 INTEGER ierr, i, ngw_ip, ip, ngw_lmax, gid, igwx, itmp,len, req 172 173#if defined __MPI 174 integer istatus(MPI_STATUS_SIZE) 175#endif 176 177 INTEGER :: iorig, idest 178 179 180! 181! ... Subroutine Body 182! 183 184 igwx = MAXVAL( ig_l2g(1:ngwl) ) 185 186#if defined __MPI 187 188 gid = comm 189 190! ... Get local and global wavefunction dimensions 191 CALL MPI_ALLREDUCE(ngwl, ngw_lmax, 1, MPI_INTEGER, MPI_MAX, gid, IERR ) 192 CALL MPI_ALLREDUCE(igwx, itmp , 1, MPI_INTEGER, MPI_MAX, gid, IERR ) 193 igwx = itmp 194 195#endif 196 197 IF( igwx > SIZE( pwt ) ) & 198 CALL errore(' splitwf ',' wrong size for pwt ',SIZE(pwt) ) 199 200#if defined __MPI 201 ALLOCATE(ig_ip(ngw_lmax)) 202 ALLOCATE(pw_ip(ngw_lmax)) 203 204 205 206 DO ip = 0, nproc-1 207 208 209 210 idest=mpime+ip 211 if(idest>nproc-1)idest=idest-nproc 212 213 iorig=mpime-ip 214 if(iorig<0)iorig=iorig+nproc 215 216 217 if(ip/=0) then 218 219 220 CALL MPI_ISEND( ig_l2g, ngwl, MPI_INTEGER, iorig, IP, gid,req,IERR) 221 222 223 CALL MPI_RECV( ig_ip, ngw_lmax, MPI_INTEGER, idest, IP, gid, istatus, IERR ) 224 225 CALL MPI_GET_COUNT(istatus, MPI_INTEGER, ngw_ip, ierr) 226 DO i = 1, ngw_ip 227 pw_ip(i) = PWT(ig_ip(i)) 228 END DO 229 230 CALL MPI_WAIT(req,istatus,ierr) 231 232 CALL MPI_ISEND( pw_ip, ngw_ip, MPI_DOUBLE_COMPLEX, idest, IP, gid,req, IERR ) 233 CALL MPI_RECV( pw(1,iorig+1), ngwl, MPI_DOUBLE_COMPLEX, iorig, IP, gid, istatus, IERR ) 234 !CALL MPI_GET_COUNT(istatus, MPI_INTEGER, ngw_ip, ierr) 235 236 CALL MPI_WAIT(req,istatus,ierr) 237 ELSE 238 239 240 DO i = 1, ngwl 241 pw(i,mpime+1) = PWT(ig_l2g(i)) 242 END DO 243 244 END IF 245 CALL MPI_BARRIER(gid, IERR) 246 247 248 END DO 249 DEALLOCATE(ig_ip) 250 DEALLOCATE(pw_ip) 251 252 253 254#elif ! defined __MPI 255 256 DO I = 1, ngwl 257 pw(i,1) = pwt( ig_l2g(i) ) 258 END DO 259 260#else 261 262 CALL errore(' SPLITWF ',' no communication protocol ',0) 263 264#endif 265 266 RETURN 267 END SUBROUTINE splitwfp 268 269 270 271 272 273 END MODULE mp_wave_parallel 274 275 SUBROUTINE reorderwfp (nbands,npw1, npw2,pw1,pw2, ngwl1,ngwl2, ig_l2g1,ig_l2g2,n_g,mpime, nproc,root, comm ) 276 277 USE kinds 278 USE parallel_include 279 USE io_global, ONLY : stdout 280 USE mp_wave_parallel 281 282 IMPLICIT NONE 283 284 INTEGER, INTENT(in) :: npw1,npw2,nbands 285 COMPLEX(DP), INTENT(IN) :: pw1(npw1,nbands) 286 COMPLEX(DP), INTENT(OUT) :: pw2(npw2,nbands) 287 INTEGER, INTENT(IN) :: mpime, root, nproc 288 INTEGER, INTENT(IN) :: comm ! communicator 289 INTEGER, INTENT(IN) :: ig_l2g1(ngwl1),ig_l2g2(ngwl2) 290 INTEGER, INTENT(IN) :: ngwl1,ngwl2 291 INTEGER, INTENT(in) :: n_g!global maximum number of G vectors for both grids 292 293 294 COMPLEX(kind=DP), ALLOCATABLE :: cbuf1(:,:),cbuf2(:,:), pwt(:) 295 INTEGER :: ii, ilast 296 297 298 allocate(cbuf1(npw1,nproc),cbuf2(npw2,nproc)) 299 allocate(pwt(n_g)) 300 cbuf1(:,:)=(0.d0,0.d0) 301 cbuf2(:,:)=(0.d0,0.d0) 302 !loop on block of states 303 304 do ii=1,nbands,nproc 305 ilast=min(nbands,ii+nproc-1) 306 cbuf1(1:npw1,1:(ilast-ii+1))=pw1(1:npw1,ii:ilast) 307 call mergewfp ( npw1,cbuf1, pwt, ngwl1, ig_l2g1, mpime, nproc, root, comm ) 308 call splitwfp (npw2, cbuf2, pwt, ngwl2, ig_l2g2, mpime, nproc,root, comm ) 309 pw2(1:npw2,ii:ilast)=cbuf2(1:npw2,1:(ilast-ii+1)) 310 enddo 311 deallocate(cbuf1,cbuf2) 312 deallocate(pwt) 313 return 314 END SUBROUTINE reorderwfp 315 316 SUBROUTINE reorderwfp_col (nbands,npw1, npw2,pw1,pw2, ngwl1,ngwl2, ig_l2g1,ig_l2g2,n_g,mpime, nproc, comm ) 317!routine using collective mpi calls 318 USE kinds 319 USE parallel_include 320 USE io_global, ONLY : stdout 321 USE mp_wave_parallel 322 323 IMPLICIT NONE 324 325 INTEGER, INTENT(in) :: npw1,npw2,nbands 326 COMPLEX(kind=DP) :: pw1(npw1,nbands),pw2(npw2,nbands) 327 INTEGER, INTENT(IN) :: mpime, nproc 328 INTEGER, INTENT(IN) :: comm ! communicator 329 INTEGER, INTENT(IN) :: ig_l2g1(ngwl1),ig_l2g2(ngwl2) 330 INTEGER, INTENT(IN) :: ngwl1,ngwl2 331 INTEGER, INTENT(in) :: n_g!global maximum number of G vectors for both grids 332 333 INTEGER :: ngwl1_max,ngwl2_max,npw1_max,npw2_max 334 INTEGER :: gid,ierr 335 INTEGER, ALLOCATABLE :: npw1_loc(:),npw2_loc(:) 336 INTEGER, ALLOCATABLE :: ig_l2g1_tot(:,:),ig_l2g2_tot(:,:), itmp(:) 337 338 INTEGER :: ii,ip,ilast,iband 339 COMPLEX(kind=DP), ALLOCATABLE :: pw1_tot(:,:),pw2_tot(:,:) 340 COMPLEX(kind=DP), ALLOCATABLE :: pw1_tmp(:),pw2_tmp(:), pw_global(:) 341 342 gid=comm 343 344#if defined __MPI 345 allocate(npw1_loc(nproc),npw2_loc(nproc)) 346!all procs gather correspondance arrays 347 CALL MPI_ALLREDUCE( ngwl1, ngwl1_max, 1, MPI_INTEGER, MPI_MAX, gid, IERR ) 348 CALL MPI_ALLREDUCE( ngwl2, ngwl2_max, 1, MPI_INTEGER, MPI_MAX, gid, IERR ) 349 CALL MPI_ALLREDUCE( npw1, npw1_max, 1, MPI_INTEGER, MPI_MAX, gid, IERR ) 350 CALL MPI_ALLREDUCE( npw2, npw2_max, 1, MPI_INTEGER, MPI_MAX, gid, IERR ) 351 CALL MPI_ALLGATHER (npw1,1,MPI_INTEGER,npw1_loc,1,MPI_INTEGER,gid,IERR) 352 CALL MPI_ALLGATHER (npw2,1,MPI_INTEGER,npw2_loc,1,MPI_INTEGER,gid,IERR) 353 354 allocate(ig_l2g1_tot(ngwl1_max,nproc),ig_l2g2_tot(ngwl2_max,nproc)) 355 allocate(itmp(ngwl1_max)) 356 itmp(1:ngwl1)=ig_l2g1(1:ngwl1) 357 CALL MPI_ALLGATHER (itmp,ngwl1_max,MPI_INTEGER,ig_l2g1_tot,ngwl1_max,MPI_INTEGER,gid,IERR) 358 deallocate(itmp) 359 allocate(itmp(ngwl2_max)) 360 itmp(1:ngwl2)=ig_l2g2(1:ngwl2) 361 CALL MPI_ALLGATHER (itmp,ngwl2_max,MPI_INTEGER,ig_l2g2_tot,ngwl2_max,MPI_INTEGER,gid,IERR) 362 deallocate(itmp) 363 364 allocate(pw1_tot(npw1_max,nproc),pw2_tot(npw2_max,nproc)) 365 allocate(pw1_tmp(npw1_max),pw2_tmp(npw2_max)) 366 allocate(pw_global(n_g)) 367 368 do ii=1,nbands,nproc 369 ilast=min(nbands,ii+nproc-1) 370 do iband=ii,ilast 371 ip=mod(iband,nproc)!ip starts from 1 to nproc-1 372 pw1_tmp(1:npw1)=pw1(1:npw1,iband) 373 CALL MPI_GATHER (pw1_tmp,npw1_max,MPI_DOUBLE_COMPLEX,pw1_tot,npw1_max,MPI_DOUBLE_COMPLEX,ip,gid,ierr) 374 enddo 375 pw_global=0.d0 376 do ip=1,nproc 377 pw_global(ig_l2g1_tot(1:npw1_loc(ip),ip))=pw1_tot(1:npw1_loc(ip),ip) 378 enddo 379 do ip=1,nproc 380 pw2_tot(1:npw2_loc(ip),ip)=pw_global(ig_l2g2_tot(1:npw2_loc(ip),ip)) 381 enddo 382 do iband=ii,ilast 383 ip=mod(iband,nproc) 384 CALL MPI_SCATTER (pw2_tot,npw2_max,MPI_DOUBLE_COMPLEX,pw2_tmp,npw2_max ,MPI_DOUBLE_COMPLEX,ip,gid,ierr) 385 pw2(1:npw2,iband)=pw2_tmp(1:npw2) 386 enddo 387 enddo 388 389 deallocate(npw1_loc,npw2_loc) 390 deallocate(ig_l2g1_tot,ig_l2g2_tot) 391 deallocate(pw1_tot,pw2_tot) 392 deallocate(pw1_tmp,pw2_tmp) 393 deallocate(pw_global) 394#endif 395 return 396 END SUBROUTINE reorderwfp_col 397