1!
2! Copyright (C) 2003 A. Smogunov
3! This file is distributed under the terms of the
4! GNU General Public License. See the file `License'
5! in the root directory of the present distribution,
6! or http://www.gnu.org/copyleft/gpl.txt .
7!
8! Generalized to spinor wavefunctions and spin-orbit Oct. 2004 (ADC).
9!
10!
11SUBROUTINE rotproc (fun0, fund0, fun1, fund1, funl0, fundl0, funl1,  &
12                   fundl1, intw1, intw2, n2d, norbf, norb, nrzp)
13!
14! This subroutine implements a matching procedure to construct
15! local and nonlocal functions on the whole region from those computed
16! by different CPU.
17! It works well with 1, 2, 4, 8, 16... CPU.
18! The matching scheme with 8 CPU looks like:
19
20! |   1   |   2   |   3   |   4   |   5   |   6   |   7   |   8   |
21!         +               +               +               +
22! |               |               |               |               |
23!                 +                               +
24! |                               |                               |
25!                                 +
26! |                                                               |
27!
28! So in this case there are 3 matching steps.
29!
30
31  USE kinds,            ONLY : DP
32  USE noncollin_module, ONLY : npol
33  USE parallel_include
34  USE mp_world,         ONLY : nproc
35  USE mp_pools,         ONLY : me_pool, intra_pool_comm
36  USE mp,               ONLY : mp_sum
37  use cond,             ONLY : lorb, funz0
38
39
40
41  IMPLICIT NONE
42
43
44  INTEGER :: ig, n, lam, lam1, iorb, iorb1, norbf, norb, n2d,  &
45             ibound, numb, ninsl, ib, icolor, ikey, new_comm, nrzp, info
46  INTEGER, ALLOCATABLE :: ipiv(:)
47  COMPLEX(DP), PARAMETER :: one=(1.d0, 0.d0), zero=(0.d0,0.d0)
48  COMPLEX(DP) :: fun0(n2d, 2*n2d),    & ! phi_n(0)
49                      fund0(n2d, 2*n2d),   & ! phi'_n(0)
50                      fun1(n2d, 2*n2d),    & ! phi_n(d)
51                      fund1(n2d, 2*n2d),   & ! phi'_n(d)
52                      funl0(n2d, npol*norbf), & ! phi_alpha(0)
53                      fundl0(n2d, npol*norbf),& ! phi'_alpha(0)
54                      funl1(n2d, npol*norbf), & ! phi_alpha(d)
55                      fundl1(n2d, npol*norbf),  & ! phi'_alpha(d)
56                      intw1(norbf*npol, 2*n2d), & ! integrals on phi_n
57                      intw2(norbf*npol, norbf*npol) ! integrals on phi_alpha
58
59  COMPLEX(DP), ALLOCATABLE :: x(:), y(:), amat(:,:), vec(:,:),  &
60                               amat_aux(:,:), vec_aux(:,:)
61
62#if defined(__MPI)
63
64  IF(nproc.EQ.1) RETURN
65
66  ALLOCATE( x( n2d ) )
67  ALLOCATE( y( n2d ) )
68  ALLOCATE( amat( 2*n2d, 2*n2d ) )
69  ALLOCATE( amat_aux( 2*n2d, 2*n2d ) )
70  ALLOCATE( vec( 2*n2d, 2*n2d+npol*norb ) )
71  ALLOCATE( vec_aux( 2*n2d, 2*n2d+npol*norb ) )
72  ALLOCATE( ipiv( 2*n2d ) )
73
74  numb=0
75  ibound=nproc/2
76  ninsl=1
77  ib=2*(me_pool+1)-(me_pool+1)/2*2
78
79  DO WHILE(ibound.GT.0)
80
81!
82!   To find the matching coefficients for a group of CPU
83!
84    icolor=(ib+ninsl-1)/(2*ninsl)
85    ikey=((me_pool+1)+ninsl)-ib
86    CALL mpi_barrier (MPI_COMM_WORLD, info)
87    CALL mpi_comm_split(MPI_COMM_WORLD, icolor, ikey, new_comm, info)
88    amat=(0.d0,0.d0)
89    vec=(0.d0,0.d0)
90
91    IF((me_pool+1).EQ.ib) THEN
92      DO lam=1, n2d
93        DO lam1=1, n2d
94          amat(lam, n2d+lam1)=-fun0(lam,n2d+lam1)
95          amat(n2d+lam, n2d+lam1)=-fund0(lam,n2d+lam1)
96          vec(lam, lam1)=fun0(lam, lam1)
97          vec(n2d+lam, lam1)=fund0(lam, lam1)
98        ENDDO
99        DO iorb=1, npol*norb
100          vec(lam, 2*n2d+iorb)=funl0(lam, iorb)
101          vec(n2d+lam, 2*n2d+iorb)=fundl0(lam, iorb)
102        ENDDO
103      ENDDO
104      numb=numb+1
105    ENDIF
106    IF((me_pool+1).EQ.ib-1) THEN
107      DO lam=1, n2d
108        DO lam1=1, n2d
109          amat(lam, lam1)=fun1(lam,lam1)
110          amat(n2d+lam, lam1)=fund1(lam,lam1)
111          vec(lam, n2d+lam1)=-fun1(lam, n2d+lam1)
112          vec(n2d+lam, n2d+lam1)=-fund1(lam, n2d+lam1)
113        ENDDO
114        DO iorb=1, npol*norb
115          vec(lam, 2*n2d+iorb)=-funl1(lam, iorb)
116          vec(n2d+lam, 2*n2d+iorb)=-fundl1(lam, iorb)
117        ENDDO
118      ENDDO
119      numb=numb+1
120    ENDIF
121    CALL mpi_allreduce(amat, amat_aux, 2*2*n2d*2*n2d, MPI_DOUBLE_PRECISION, &
122                       MPI_SUM, new_comm, info)
123    CALL mpi_allreduce(vec, vec_aux, 2*2*n2d*(2*n2d+npol*norb),        &
124                       MPI_DOUBLE_PRECISION, MPI_SUM, new_comm, info)
125    CALL dcopy(2*2*n2d*2*n2d, amat_aux, 1, amat, 1)
126    CALL dcopy(2*2*n2d*(2*n2d+npol*norb), vec_aux, 1, vec, 1)
127    CALL ZGESV(2*n2d, 2*n2d+npol*norb, amat, 2*n2d, ipiv,              &
128               vec, 2*n2d, info)
129!
130!   recalculate the functions for CPU which is left to matching
131!   boundary
132!
133    IF(numb.LE.1.AND.(me_pool+1)/2*2.EQ.(me_pool+1)) THEN
134     DO ig=1, n2d
135      DO n=1, n2d
136       DO lam=1, n2d
137        fun1(ig, n)=fun1(ig, n)+  &
138                           vec(n2d+lam, n)*fun1(ig, n2d+lam)
139        fund1(ig, n)=fund1(ig, n)+  &
140                           vec(n2d+lam, n)*fund1(ig, n2d+lam)
141       ENDDO
142      ENDDO
143      DO iorb=1, npol*norb
144       DO lam=1, n2d
145        funl1(ig, iorb)=funl1(ig, iorb)+  &
146                vec(n2d+lam, 2*n2d+iorb)*fun1(ig, n2d+lam)
147        fundl1(ig, iorb)=fundl1(ig, iorb)+  &
148                vec(n2d+lam, 2*n2d+iorb)*fund1(ig, n2d+lam)
149       ENDDO
150      ENDDO
151     ENDDO
152     DO ig=1, n2d
153       x=(0.d0,0.d0)
154       y=(0.d0,0.d0)
155       DO n=1, n2d
156         DO lam=1, n2d
157           x(n)=x(n)+vec(n2d+lam, n2d+n)*fun1(ig, n2d+lam)
158           y(n)=y(n)+vec(n2d+lam, n2d+n)*fund1(ig, n2d+lam)
159         ENDDO
160       ENDDO
161       DO n=1, n2d
162         fun1(ig, n2d+n)=x(n)
163         fund1(ig, n2d+n)=y(n)
164       ENDDO
165     ENDDO
166    ENDIF
167!
168!   recalculate the functions for CPU which is right to matching
169!   boundary
170!
171    IF(numb.LE.1.AND.(me_pool+1)/2*2.NE.(me_pool+1)) THEN
172     DO ig=1, n2d
173      DO n=1, n2d
174       DO lam=1, n2d
175        fun0(ig, n2d+n)=fun0(ig, n2d+n)+  &
176                           vec(lam, n2d+n)*fun0(ig, lam)
177        fund0(ig, n2d+n)=fund0(ig, n2d+n)+  &
178                           vec(lam, n2d+n)*fund0(ig, lam)
179       ENDDO
180      ENDDO
181      DO iorb=1, npol*norb
182       DO lam=1, n2d
183        funl0(ig, iorb)=funl0(ig, iorb)+  &
184                     vec(lam, 2*n2d+iorb)*fun0(ig, lam)
185        fundl0(ig, iorb)=fundl0(ig, iorb)+  &
186                     vec(lam, 2*n2d+iorb)*fund0(ig, lam)
187       ENDDO
188      ENDDO
189     ENDDO
190     DO ig=1, n2d
191       x=(0.d0,0.d0)
192       y=(0.d0,0.d0)
193       DO n=1, n2d
194         DO lam=1, n2d
195           x(n)=x(n)+vec(lam, n)*fun0(ig, lam)
196           y(n)=y(n)+vec(lam, n)*fund0(ig, lam)
197         ENDDO
198       ENDDO
199       DO n=1, n2d
200         fun0(ig, n)=x(n)
201         fund0(ig, n)=y(n)
202       ENDDO
203     ENDDO
204    ENDIF
205!
206!  to recalculate the integrals for a given group of CPU
207!
208    IF((me_pool+1).GE.ib) THEN
209     DO iorb=1, npol*norb
210      DO iorb1=1, npol*norb
211       DO lam=1, n2d
212        intw2(iorb,iorb1)=intw2(iorb,iorb1)+           &
213           vec(n2d+lam, 2*n2d+iorb1)*intw1(iorb, n2d+lam)
214       ENDDO
215      ENDDO
216     ENDDO
217     DO iorb=1, npol*norb
218       x=(0.d0,0.d0)
219       DO n=1, n2d
220         DO lam=1, n2d
221           x(n)=x(n)+vec(n2d+lam, n2d+n)*intw1(iorb, n2d+lam)
222           intw1(iorb, n)=intw1(iorb, n)+   &
223               vec(n2d+lam, n)*intw1(iorb, n2d+lam)
224         ENDDO
225       ENDDO
226       DO n=1, n2d
227         intw1(iorb, n2d+n)=x(n)
228       ENDDO
229     ENDDO
230
231     IF (lorb) THEN
232        DO n = 1, nrzp
233          CALL zgemm('n','n',n2d,n2d,n2d,one,funz0(1,n2d+1,n),n2d,&
234                     vec(n2d+1,1),2*n2d,one,funz0(1,1,n),n2d)
235          CALL zgemm('n','n',n2d,npol*norb,n2d,one,funz0(1,n2d+1,n),n2d,&
236                     vec(n2d+1,2*n2d+1),2*n2d,one,funz0(1,2*n2d+1,n),n2d)
237          CALL zgemm('n','n',n2d,n2d,n2d,one,funz0(1,n2d+1,n),n2d,&
238                      vec(n2d+1,n2d+1),2*n2d,zero,vec_aux(1,1),2*n2d)
239          do ig = 1, n2d
240            do lam = 1, n2d
241              funz0(ig,n2d+lam,n) = vec_aux(ig,lam)
242            enddo
243          enddo
244        END DO
245     ENDIF
246
247    ELSE
248     DO iorb=1, npol*norb
249      DO iorb1=1, npol*norb
250       DO lam=1, n2d
251        intw2(iorb, iorb1)=intw2(iorb, iorb1)+           &
252             vec(lam, 2*n2d+iorb1)*intw1(iorb, lam)
253       ENDDO
254      ENDDO
255     ENDDO
256     DO iorb=1, npol*norb
257       x=(0.d0,0.d0)
258       DO n=1, n2d
259         DO lam=1, n2d
260           x(n)=x(n)+vec(lam, n)*intw1(iorb, lam)
261           intw1(iorb, n2d+n)=intw1(iorb, n2d+n)+   &
262               vec(lam, n2d+n)*intw1(iorb, lam)
263         ENDDO
264       ENDDO
265       DO n=1, n2d
266         intw1(iorb, n)=x(n)
267       ENDDO
268     ENDDO
269
270     IF (lorb) THEN
271        DO n = 1, nrzp
272          CALL zgemm('n','n',n2d,n2d,n2d,one,funz0(1,1,n),n2d,&
273                     vec(1,n2d+1),2*n2d,one,funz0(1,n2d+1,n),n2d)
274          CALL zgemm('n','n',n2d,npol*norb,n2d,one,funz0(1,1,n),n2d,&
275                     vec(1,2*n2d+1),2*n2d,one,funz0(1,2*n2d+1,n),n2d)
276          CALL zgemm('n','n',n2d,n2d,n2d,one,funz0(1,1,n),n2d,&
277                      vec(1,1),2*n2d,zero,vec_aux(1,1),2*n2d)
278          do ig = 1, n2d
279            do lam = 1, n2d
280              funz0(ig,lam,n) = vec_aux(ig,lam)
281            enddo
282          enddo
283        END DO
284     ENDIF
285
286    ENDIF
287
288!
289!  to next matching step
290!
291    n=(ib+ninsl-1)/(2*ninsl)
292    IF(n/2*2.EQ.n) THEN
293      ib=ib-ninsl
294    ELSE
295      ib=ib+ninsl
296    ENDIF
297    ninsl=ninsl*2
298    ibound=ibound/2
299
300    CALL mpi_comm_free(new_comm, info)
301  ENDDO
302
303!
304! Broadcast of the functions and the integrals to all CPU
305!
306  CALL mpi_bcast(fun0, 2*n2d*2*n2d, MPI_DOUBLE_PRECISION,  0,          &
307                 MPI_COMM_WORLD, info)
308  CALL mpi_bcast(fund0, 2*n2d*2*n2d, MPI_DOUBLE_PRECISION, 0,          &
309                 MPI_COMM_WORLD, info)
310  CALL mpi_bcast(funl0, 2*n2d*npol*norbf, MPI_DOUBLE_PRECISION,  0,    &
311                 MPI_COMM_WORLD, info)
312  CALL mpi_bcast(fundl0, 2*n2d*npol*norbf, MPI_DOUBLE_PRECISION, 0,    &
313                 MPI_COMM_WORLD, info)
314
315  CALL mpi_bcast(fun1, 2*n2d*2*n2d, MPI_DOUBLE_PRECISION,  nproc-1,    &
316                 MPI_COMM_WORLD, info)
317  CALL mpi_bcast(fund1, 2*n2d*2*n2d, MPI_DOUBLE_PRECISION, nproc-1,    &
318                 MPI_COMM_WORLD, info)
319  CALL mpi_bcast(funl1, 2*n2d*npol*norbf, MPI_DOUBLE_PRECISION,  nproc-1,   &
320                 MPI_COMM_WORLD, info)
321  CALL mpi_bcast(fundl1, 2*n2d*npol*norbf, MPI_DOUBLE_PRECISION, nproc-1,   &
322                 MPI_COMM_WORLD, info)
323
324!
325! Gathering of the integrals
326!
327  CALL mp_sum( intw1, intra_pool_comm )
328  CALL mp_sum( intw2, intra_pool_comm )
329
330  DEALLOCATE(x)
331  DEALLOCATE(y)
332  DEALLOCATE(amat)
333  DEALLOCATE(amat_aux)
334  DEALLOCATE(vec)
335  DEALLOCATE(vec_aux)
336  DEALLOCATE(ipiv)
337
338#endif
339  RETURN
340END SUBROUTINE rotproc
341