1!
2! Copyright (C) 2001-2013 Quantum ESPRESSO group
3! This file is distributed under the terms of the
4! GNU General Public License. See the file `License'
5! in the root directory of the present distribution,
6! or http://www.gnu.org/copyleft/gpl.txt .
7!
8!
9
10 MODULE mp_wave_parallel
11
12      IMPLICIT NONE
13      SAVE
14
15    CONTAINS
16
17      SUBROUTINE mergewfp ( npw,pw, pwt, ngwl, ig_l2g, mpime, nproc, root, comm )
18
19! ... This subroutine merges the pieces of a wave functions (pw) splitted across
20! ... processors into a total wave function (pwt) containing al the components
21! ... in a pre-defined order (the same as if only one processor is used)
22
23      USE kinds
24      USE parallel_include
25      USE io_global, ONLY :stdout
26
27
28      IMPLICIT NONE
29
30      INTEGER, INTENT(in) :: npw,ngwl
31      COMPLEX(DP), intent(in) :: PW(npw,nproc)
32      COMPLEX(DP), intent(out) :: PWT(:)
33      INTEGER, INTENT(IN) :: mpime     ! index of the calling processor ( starting from 0 )
34      INTEGER, INTENT(IN) :: nproc     ! number of processors
35      INTEGER, INTENT(IN) :: root      ! root processor ( the one that should receive the data )
36      INTEGER, INTENT(IN) :: comm    ! communicator
37      INTEGER, INTENT(IN) :: ig_l2g(:)
38
39
40
41      INTEGER, ALLOCATABLE :: ig_ip(:)
42      COMPLEX(DP), ALLOCATABLE :: pw_ip(:)
43
44
45      INTEGER :: ierr, i, ip, ngw_ip, ngw_lmax, itmp, igwx, gid, req
46
47#if defined __MPI
48      INTEGER :: istatus(MPI_STATUS_SIZE)
49#endif
50
51      INTEGER :: iorig, idest
52
53
54
55
56!
57! ... Subroutine Body
58!
59
60      igwx = MAXVAL( ig_l2g(1:ngwl) )
61
62#if defined __MPI
63
64      gid = comm
65
66! ... Get local and global wavefunction dimensions
67      CALL MPI_ALLREDUCE( ngwl, ngw_lmax, 1, MPI_INTEGER, MPI_MAX, gid, IERR )
68      CALL MPI_ALLREDUCE( igwx, itmp, 1, MPI_INTEGER, MPI_MAX, gid, IERR )
69      igwx = itmp
70
71#endif
72
73      IF( igwx > SIZE( pwt ) ) &
74        CALL errore(' mergewf ',' wrong size for pwt ',SIZE(pwt) )
75
76#if defined __MPI
77      ALLOCATE(ig_ip(ngw_lmax))
78      ALLOCATE(pw_ip(ngw_lmax))
79
80      do ip = 0, nproc-1
81
82         if( ip/=0) then
83
84! ...     In turn each processors send to root the wave components and their indexes in the
85! ...     global array
86            idest=mpime+ip
87            if(idest>nproc-1)idest=idest-nproc
88
89            iorig=mpime-ip
90            if(iorig<0)iorig=iorig+nproc
91
92
93            CALL MPI_ISEND( ig_l2g, ngwl, MPI_INTEGER, idest, IP, gid, req,IERR )
94
95
96            CALL MPI_RECV( ig_ip, ngw_lmax, MPI_INTEGER, iorig, IP, gid, istatus, IERR )
97
98            CALL MPI_WAIT(req,istatus,ierr)
99
100            CALL MPI_ISEND( pw(1,idest+1), ngwl, MPI_DOUBLE_COMPLEX, idest, IP, gid, req,IERR )
101
102
103            CALL MPI_RECV( pw_ip, ngw_lmax, MPI_DOUBLE_COMPLEX, iorig, IP, gid, istatus, IERR )
104            CALL MPI_GET_COUNT( istatus, MPI_DOUBLE_COMPLEX, ngw_ip, ierr )
105
106            CALL MPI_WAIT(req,istatus,ierr)
107
108
109            DO I = 1, ngw_ip
110               PWT(ig_ip(i)) = pw_ip(i)
111            END DO
112
113
114
115        ELSE
116
117
118           DO I = 1, ngwl
119              PWT(ig_l2g(i)) = pw(i,mpime+1)
120           END DO
121        END IF
122
123
124
125        CALL MPI_BARRIER( gid, IERR )
126
127     END DO
128
129     DEALLOCATE(ig_ip)
130     DEALLOCATE(pw_ip)
131
132
133
134#elif ! defined __MPI
135
136     DO I = 1, ngwl
137        PWT( ig_l2g(i) ) = pw(i,1)
138     END DO
139
140#else
141
142      CALL errore(' MERGEWF ',' no communication protocol ',0)
143
144#endif
145
146      RETURN
147    END SUBROUTINE mergewfp
148
149    SUBROUTINE splitwfp (npw, pw, pwt, ngwl, ig_l2g, mpime, nproc,root, comm )
150
151! ... This subroutine splits a total wave function (pwt) containing al the components
152! ... in a pre-defined order (the same as if only one processor is used), across
153! ... processors (pw).
154
155      USE kinds
156      USE parallel_include
157      USE io_global, ONLY : stdout
158      IMPLICIT NONE
159
160      INTEGER, INTENT(in) :: npw,nproc
161      COMPLEX(DP), INTENT(OUT) :: PW(npw,nproc)
162      COMPLEX(DP), INTENT(IN) :: PWT(:)
163      INTEGER, INTENT(IN) :: mpime,  root
164      INTEGER, INTENT(IN) :: comm    ! communicator
165      INTEGER, INTENT(IN) :: ig_l2g(:)
166      INTEGER, INTENT(IN) :: ngwl
167
168      INTEGER, ALLOCATABLE :: ig_ip(:)
169      COMPLEX(DP), ALLOCATABLE :: pw_ip(:)
170
171      INTEGER ierr, i, ngw_ip, ip, ngw_lmax, gid, igwx, itmp,len, req
172
173#if defined __MPI
174      integer istatus(MPI_STATUS_SIZE)
175#endif
176
177      INTEGER :: iorig, idest
178
179
180!
181! ... Subroutine Body
182!
183
184      igwx = MAXVAL( ig_l2g(1:ngwl) )
185
186#if defined __MPI
187
188      gid = comm
189
190! ... Get local and global wavefunction dimensions
191      CALL MPI_ALLREDUCE(ngwl, ngw_lmax, 1, MPI_INTEGER, MPI_MAX, gid, IERR )
192      CALL MPI_ALLREDUCE(igwx, itmp    , 1, MPI_INTEGER, MPI_MAX, gid, IERR )
193      igwx = itmp
194
195#endif
196
197      IF( igwx > SIZE( pwt ) ) &
198        CALL errore(' splitwf ',' wrong size for pwt ',SIZE(pwt) )
199
200#if defined __MPI
201      ALLOCATE(ig_ip(ngw_lmax))
202      ALLOCATE(pw_ip(ngw_lmax))
203
204
205
206      DO ip = 0, nproc-1
207
208
209
210         idest=mpime+ip
211         if(idest>nproc-1)idest=idest-nproc
212
213         iorig=mpime-ip
214         if(iorig<0)iorig=iorig+nproc
215
216
217         if(ip/=0) then
218
219
220            CALL MPI_ISEND( ig_l2g, ngwl, MPI_INTEGER, iorig, IP, gid,req,IERR)
221
222
223            CALL MPI_RECV( ig_ip, ngw_lmax, MPI_INTEGER, idest, IP, gid, istatus, IERR )
224
225            CALL MPI_GET_COUNT(istatus, MPI_INTEGER, ngw_ip, ierr)
226            DO i = 1, ngw_ip
227               pw_ip(i) = PWT(ig_ip(i))
228            END DO
229
230            CALL MPI_WAIT(req,istatus,ierr)
231
232            CALL MPI_ISEND( pw_ip, ngw_ip, MPI_DOUBLE_COMPLEX, idest, IP, gid,req, IERR )
233            CALL MPI_RECV( pw(1,iorig+1), ngwl, MPI_DOUBLE_COMPLEX, iorig, IP, gid, istatus, IERR )
234            !CALL MPI_GET_COUNT(istatus, MPI_INTEGER, ngw_ip, ierr)
235
236            CALL MPI_WAIT(req,istatus,ierr)
237         ELSE
238
239
240            DO i = 1, ngwl
241              pw(i,mpime+1) = PWT(ig_l2g(i))
242            END DO
243
244         END IF
245         CALL MPI_BARRIER(gid, IERR)
246
247
248      END DO
249      DEALLOCATE(ig_ip)
250      DEALLOCATE(pw_ip)
251
252
253
254#elif ! defined __MPI
255
256      DO I = 1, ngwl
257        pw(i,1) = pwt( ig_l2g(i) )
258      END DO
259
260#else
261
262      CALL errore(' SPLITWF ',' no communication protocol ',0)
263
264#endif
265
266      RETURN
267    END SUBROUTINE splitwfp
268
269
270
271
272
273  END MODULE mp_wave_parallel
274
275  SUBROUTINE reorderwfp (nbands,npw1, npw2,pw1,pw2, ngwl1,ngwl2, ig_l2g1,ig_l2g2,n_g,mpime, nproc,root, comm )
276
277      USE kinds
278      USE parallel_include
279      USE io_global, ONLY : stdout
280      USE mp_wave_parallel
281
282      IMPLICIT NONE
283
284      INTEGER, INTENT(in) :: npw1,npw2,nbands
285      COMPLEX(DP), INTENT(IN) :: pw1(npw1,nbands)
286      COMPLEX(DP), INTENT(OUT) :: pw2(npw2,nbands)
287      INTEGER, INTENT(IN) :: mpime,  root, nproc
288      INTEGER, INTENT(IN) :: comm    ! communicator
289      INTEGER, INTENT(IN) :: ig_l2g1(ngwl1),ig_l2g2(ngwl2)
290      INTEGER, INTENT(IN) :: ngwl1,ngwl2
291      INTEGER, INTENT(in) :: n_g!global maximum number of G vectors for both grids
292
293
294      COMPLEX(kind=DP), ALLOCATABLE :: cbuf1(:,:),cbuf2(:,:), pwt(:)
295      INTEGER :: ii, ilast
296
297
298      allocate(cbuf1(npw1,nproc),cbuf2(npw2,nproc))
299      allocate(pwt(n_g))
300      cbuf1(:,:)=(0.d0,0.d0)
301      cbuf2(:,:)=(0.d0,0.d0)
302      !loop on block of states
303
304      do ii=1,nbands,nproc
305         ilast=min(nbands,ii+nproc-1)
306         cbuf1(1:npw1,1:(ilast-ii+1))=pw1(1:npw1,ii:ilast)
307         call mergewfp ( npw1,cbuf1, pwt, ngwl1, ig_l2g1, mpime, nproc, root, comm )
308         call splitwfp (npw2, cbuf2, pwt, ngwl2, ig_l2g2, mpime, nproc,root, comm )
309         pw2(1:npw2,ii:ilast)=cbuf2(1:npw2,1:(ilast-ii+1))
310      enddo
311      deallocate(cbuf1,cbuf2)
312      deallocate(pwt)
313      return
314    END SUBROUTINE reorderwfp
315
316    SUBROUTINE reorderwfp_col (nbands,npw1, npw2,pw1,pw2, ngwl1,ngwl2, ig_l2g1,ig_l2g2,n_g,mpime, nproc, comm )
317!routine using collective mpi calls
318      USE kinds
319      USE parallel_include
320      USE io_global, ONLY : stdout
321      USE mp_wave_parallel
322
323      IMPLICIT NONE
324
325      INTEGER, INTENT(in) :: npw1,npw2,nbands
326      COMPLEX(kind=DP) :: pw1(npw1,nbands),pw2(npw2,nbands)
327      INTEGER, INTENT(IN) :: mpime, nproc
328      INTEGER, INTENT(IN) :: comm    ! communicator
329      INTEGER, INTENT(IN) :: ig_l2g1(ngwl1),ig_l2g2(ngwl2)
330      INTEGER, INTENT(IN) :: ngwl1,ngwl2
331      INTEGER, INTENT(in) :: n_g!global maximum number of G vectors for both grids
332
333      INTEGER :: ngwl1_max,ngwl2_max,npw1_max,npw2_max
334      INTEGER :: gid,ierr
335      INTEGER, ALLOCATABLE :: npw1_loc(:),npw2_loc(:)
336      INTEGER, ALLOCATABLE :: ig_l2g1_tot(:,:),ig_l2g2_tot(:,:), itmp(:)
337
338      INTEGER :: ii,ip,ilast,iband
339      COMPLEX(kind=DP), ALLOCATABLE :: pw1_tot(:,:),pw2_tot(:,:)
340      COMPLEX(kind=DP), ALLOCATABLE :: pw1_tmp(:),pw2_tmp(:), pw_global(:)
341
342      gid=comm
343
344#if defined __MPI
345      allocate(npw1_loc(nproc),npw2_loc(nproc))
346!all procs gather correspondance arrays
347      CALL MPI_ALLREDUCE( ngwl1, ngwl1_max, 1, MPI_INTEGER, MPI_MAX, gid, IERR )
348      CALL MPI_ALLREDUCE( ngwl2, ngwl2_max, 1, MPI_INTEGER, MPI_MAX, gid, IERR )
349      CALL MPI_ALLREDUCE( npw1, npw1_max, 1, MPI_INTEGER, MPI_MAX, gid, IERR )
350      CALL MPI_ALLREDUCE( npw2, npw2_max, 1, MPI_INTEGER, MPI_MAX, gid, IERR )
351      CALL MPI_ALLGATHER (npw1,1,MPI_INTEGER,npw1_loc,1,MPI_INTEGER,gid,IERR)
352      CALL MPI_ALLGATHER (npw2,1,MPI_INTEGER,npw2_loc,1,MPI_INTEGER,gid,IERR)
353
354      allocate(ig_l2g1_tot(ngwl1_max,nproc),ig_l2g2_tot(ngwl2_max,nproc))
355      allocate(itmp(ngwl1_max))
356      itmp(1:ngwl1)=ig_l2g1(1:ngwl1)
357      CALL MPI_ALLGATHER (itmp,ngwl1_max,MPI_INTEGER,ig_l2g1_tot,ngwl1_max,MPI_INTEGER,gid,IERR)
358      deallocate(itmp)
359      allocate(itmp(ngwl2_max))
360      itmp(1:ngwl2)=ig_l2g2(1:ngwl2)
361      CALL MPI_ALLGATHER (itmp,ngwl2_max,MPI_INTEGER,ig_l2g2_tot,ngwl2_max,MPI_INTEGER,gid,IERR)
362      deallocate(itmp)
363
364      allocate(pw1_tot(npw1_max,nproc),pw2_tot(npw2_max,nproc))
365      allocate(pw1_tmp(npw1_max),pw2_tmp(npw2_max))
366      allocate(pw_global(n_g))
367
368      do ii=1,nbands,nproc
369         ilast=min(nbands,ii+nproc-1)
370         do iband=ii,ilast
371            ip=mod(iband,nproc)!ip starts from 1 to nproc-1
372            pw1_tmp(1:npw1)=pw1(1:npw1,iband)
373            CALL MPI_GATHER (pw1_tmp,npw1_max,MPI_DOUBLE_COMPLEX,pw1_tot,npw1_max,MPI_DOUBLE_COMPLEX,ip,gid,ierr)
374         enddo
375         pw_global=0.d0
376         do ip=1,nproc
377            pw_global(ig_l2g1_tot(1:npw1_loc(ip),ip))=pw1_tot(1:npw1_loc(ip),ip)
378         enddo
379         do ip=1,nproc
380            pw2_tot(1:npw2_loc(ip),ip)=pw_global(ig_l2g2_tot(1:npw2_loc(ip),ip))
381         enddo
382         do iband=ii,ilast
383            ip=mod(iband,nproc)
384            CALL MPI_SCATTER (pw2_tot,npw2_max,MPI_DOUBLE_COMPLEX,pw2_tmp,npw2_max ,MPI_DOUBLE_COMPLEX,ip,gid,ierr)
385            pw2(1:npw2,iband)=pw2_tmp(1:npw2)
386         enddo
387      enddo
388
389      deallocate(npw1_loc,npw2_loc)
390      deallocate(ig_l2g1_tot,ig_l2g2_tot)
391      deallocate(pw1_tot,pw2_tot)
392      deallocate(pw1_tmp,pw2_tmp)
393      deallocate(pw_global)
394#endif
395      return
396    END SUBROUTINE reorderwfp_col
397