1! 2! Copyright (C) 2003 A. Smogunov 3! This file is distributed under the terms of the 4! GNU General Public License. See the file `License' 5! in the root directory of the present distribution, 6! or http://www.gnu.org/copyleft/gpl.txt . 7! 8! Generalized to spinor wavefunctions and spin-orbit Oct. 2004 (ADC). 9! 10! 11SUBROUTINE rotproc (fun0, fund0, fun1, fund1, funl0, fundl0, funl1, & 12 fundl1, intw1, intw2, n2d, norbf, norb, nrzp) 13! 14! This subroutine implements a matching procedure to construct 15! local and nonlocal functions on the whole region from those computed 16! by different CPU. 17! It works well with 1, 2, 4, 8, 16... CPU. 18! The matching scheme with 8 CPU looks like: 19 20! | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 21! + + + + 22! | | | | | 23! + + 24! | | | 25! + 26! | | 27! 28! So in this case there are 3 matching steps. 29! 30 31 USE kinds, ONLY : DP 32 USE noncollin_module, ONLY : npol 33 USE parallel_include 34 USE mp_world, ONLY : nproc 35 USE mp_pools, ONLY : me_pool, intra_pool_comm 36 USE mp, ONLY : mp_sum 37 use cond, ONLY : lorb, funz0 38 39 40 41 IMPLICIT NONE 42 43 44 INTEGER :: ig, n, lam, lam1, iorb, iorb1, norbf, norb, n2d, & 45 ibound, numb, ninsl, ib, icolor, ikey, new_comm, nrzp, info 46 INTEGER, ALLOCATABLE :: ipiv(:) 47 COMPLEX(DP), PARAMETER :: one=(1.d0, 0.d0), zero=(0.d0,0.d0) 48 COMPLEX(DP) :: fun0(n2d, 2*n2d), & ! phi_n(0) 49 fund0(n2d, 2*n2d), & ! phi'_n(0) 50 fun1(n2d, 2*n2d), & ! phi_n(d) 51 fund1(n2d, 2*n2d), & ! phi'_n(d) 52 funl0(n2d, npol*norbf), & ! phi_alpha(0) 53 fundl0(n2d, npol*norbf),& ! phi'_alpha(0) 54 funl1(n2d, npol*norbf), & ! phi_alpha(d) 55 fundl1(n2d, npol*norbf), & ! phi'_alpha(d) 56 intw1(norbf*npol, 2*n2d), & ! integrals on phi_n 57 intw2(norbf*npol, norbf*npol) ! integrals on phi_alpha 58 59 COMPLEX(DP), ALLOCATABLE :: x(:), y(:), amat(:,:), vec(:,:), & 60 amat_aux(:,:), vec_aux(:,:) 61 62#if defined(__MPI) 63 64 IF(nproc.EQ.1) RETURN 65 66 ALLOCATE( x( n2d ) ) 67 ALLOCATE( y( n2d ) ) 68 ALLOCATE( amat( 2*n2d, 2*n2d ) ) 69 ALLOCATE( amat_aux( 2*n2d, 2*n2d ) ) 70 ALLOCATE( vec( 2*n2d, 2*n2d+npol*norb ) ) 71 ALLOCATE( vec_aux( 2*n2d, 2*n2d+npol*norb ) ) 72 ALLOCATE( ipiv( 2*n2d ) ) 73 74 numb=0 75 ibound=nproc/2 76 ninsl=1 77 ib=2*(me_pool+1)-(me_pool+1)/2*2 78 79 DO WHILE(ibound.GT.0) 80 81! 82! To find the matching coefficients for a group of CPU 83! 84 icolor=(ib+ninsl-1)/(2*ninsl) 85 ikey=((me_pool+1)+ninsl)-ib 86 CALL mpi_barrier (MPI_COMM_WORLD, info) 87 CALL mpi_comm_split(MPI_COMM_WORLD, icolor, ikey, new_comm, info) 88 amat=(0.d0,0.d0) 89 vec=(0.d0,0.d0) 90 91 IF((me_pool+1).EQ.ib) THEN 92 DO lam=1, n2d 93 DO lam1=1, n2d 94 amat(lam, n2d+lam1)=-fun0(lam,n2d+lam1) 95 amat(n2d+lam, n2d+lam1)=-fund0(lam,n2d+lam1) 96 vec(lam, lam1)=fun0(lam, lam1) 97 vec(n2d+lam, lam1)=fund0(lam, lam1) 98 ENDDO 99 DO iorb=1, npol*norb 100 vec(lam, 2*n2d+iorb)=funl0(lam, iorb) 101 vec(n2d+lam, 2*n2d+iorb)=fundl0(lam, iorb) 102 ENDDO 103 ENDDO 104 numb=numb+1 105 ENDIF 106 IF((me_pool+1).EQ.ib-1) THEN 107 DO lam=1, n2d 108 DO lam1=1, n2d 109 amat(lam, lam1)=fun1(lam,lam1) 110 amat(n2d+lam, lam1)=fund1(lam,lam1) 111 vec(lam, n2d+lam1)=-fun1(lam, n2d+lam1) 112 vec(n2d+lam, n2d+lam1)=-fund1(lam, n2d+lam1) 113 ENDDO 114 DO iorb=1, npol*norb 115 vec(lam, 2*n2d+iorb)=-funl1(lam, iorb) 116 vec(n2d+lam, 2*n2d+iorb)=-fundl1(lam, iorb) 117 ENDDO 118 ENDDO 119 numb=numb+1 120 ENDIF 121 CALL mpi_allreduce(amat, amat_aux, 2*2*n2d*2*n2d, MPI_DOUBLE_PRECISION, & 122 MPI_SUM, new_comm, info) 123 CALL mpi_allreduce(vec, vec_aux, 2*2*n2d*(2*n2d+npol*norb), & 124 MPI_DOUBLE_PRECISION, MPI_SUM, new_comm, info) 125 CALL dcopy(2*2*n2d*2*n2d, amat_aux, 1, amat, 1) 126 CALL dcopy(2*2*n2d*(2*n2d+npol*norb), vec_aux, 1, vec, 1) 127 CALL ZGESV(2*n2d, 2*n2d+npol*norb, amat, 2*n2d, ipiv, & 128 vec, 2*n2d, info) 129! 130! recalculate the functions for CPU which is left to matching 131! boundary 132! 133 IF(numb.LE.1.AND.(me_pool+1)/2*2.EQ.(me_pool+1)) THEN 134 DO ig=1, n2d 135 DO n=1, n2d 136 DO lam=1, n2d 137 fun1(ig, n)=fun1(ig, n)+ & 138 vec(n2d+lam, n)*fun1(ig, n2d+lam) 139 fund1(ig, n)=fund1(ig, n)+ & 140 vec(n2d+lam, n)*fund1(ig, n2d+lam) 141 ENDDO 142 ENDDO 143 DO iorb=1, npol*norb 144 DO lam=1, n2d 145 funl1(ig, iorb)=funl1(ig, iorb)+ & 146 vec(n2d+lam, 2*n2d+iorb)*fun1(ig, n2d+lam) 147 fundl1(ig, iorb)=fundl1(ig, iorb)+ & 148 vec(n2d+lam, 2*n2d+iorb)*fund1(ig, n2d+lam) 149 ENDDO 150 ENDDO 151 ENDDO 152 DO ig=1, n2d 153 x=(0.d0,0.d0) 154 y=(0.d0,0.d0) 155 DO n=1, n2d 156 DO lam=1, n2d 157 x(n)=x(n)+vec(n2d+lam, n2d+n)*fun1(ig, n2d+lam) 158 y(n)=y(n)+vec(n2d+lam, n2d+n)*fund1(ig, n2d+lam) 159 ENDDO 160 ENDDO 161 DO n=1, n2d 162 fun1(ig, n2d+n)=x(n) 163 fund1(ig, n2d+n)=y(n) 164 ENDDO 165 ENDDO 166 ENDIF 167! 168! recalculate the functions for CPU which is right to matching 169! boundary 170! 171 IF(numb.LE.1.AND.(me_pool+1)/2*2.NE.(me_pool+1)) THEN 172 DO ig=1, n2d 173 DO n=1, n2d 174 DO lam=1, n2d 175 fun0(ig, n2d+n)=fun0(ig, n2d+n)+ & 176 vec(lam, n2d+n)*fun0(ig, lam) 177 fund0(ig, n2d+n)=fund0(ig, n2d+n)+ & 178 vec(lam, n2d+n)*fund0(ig, lam) 179 ENDDO 180 ENDDO 181 DO iorb=1, npol*norb 182 DO lam=1, n2d 183 funl0(ig, iorb)=funl0(ig, iorb)+ & 184 vec(lam, 2*n2d+iorb)*fun0(ig, lam) 185 fundl0(ig, iorb)=fundl0(ig, iorb)+ & 186 vec(lam, 2*n2d+iorb)*fund0(ig, lam) 187 ENDDO 188 ENDDO 189 ENDDO 190 DO ig=1, n2d 191 x=(0.d0,0.d0) 192 y=(0.d0,0.d0) 193 DO n=1, n2d 194 DO lam=1, n2d 195 x(n)=x(n)+vec(lam, n)*fun0(ig, lam) 196 y(n)=y(n)+vec(lam, n)*fund0(ig, lam) 197 ENDDO 198 ENDDO 199 DO n=1, n2d 200 fun0(ig, n)=x(n) 201 fund0(ig, n)=y(n) 202 ENDDO 203 ENDDO 204 ENDIF 205! 206! to recalculate the integrals for a given group of CPU 207! 208 IF((me_pool+1).GE.ib) THEN 209 DO iorb=1, npol*norb 210 DO iorb1=1, npol*norb 211 DO lam=1, n2d 212 intw2(iorb,iorb1)=intw2(iorb,iorb1)+ & 213 vec(n2d+lam, 2*n2d+iorb1)*intw1(iorb, n2d+lam) 214 ENDDO 215 ENDDO 216 ENDDO 217 DO iorb=1, npol*norb 218 x=(0.d0,0.d0) 219 DO n=1, n2d 220 DO lam=1, n2d 221 x(n)=x(n)+vec(n2d+lam, n2d+n)*intw1(iorb, n2d+lam) 222 intw1(iorb, n)=intw1(iorb, n)+ & 223 vec(n2d+lam, n)*intw1(iorb, n2d+lam) 224 ENDDO 225 ENDDO 226 DO n=1, n2d 227 intw1(iorb, n2d+n)=x(n) 228 ENDDO 229 ENDDO 230 231 IF (lorb) THEN 232 DO n = 1, nrzp 233 CALL zgemm('n','n',n2d,n2d,n2d,one,funz0(1,n2d+1,n),n2d,& 234 vec(n2d+1,1),2*n2d,one,funz0(1,1,n),n2d) 235 CALL zgemm('n','n',n2d,npol*norb,n2d,one,funz0(1,n2d+1,n),n2d,& 236 vec(n2d+1,2*n2d+1),2*n2d,one,funz0(1,2*n2d+1,n),n2d) 237 CALL zgemm('n','n',n2d,n2d,n2d,one,funz0(1,n2d+1,n),n2d,& 238 vec(n2d+1,n2d+1),2*n2d,zero,vec_aux(1,1),2*n2d) 239 do ig = 1, n2d 240 do lam = 1, n2d 241 funz0(ig,n2d+lam,n) = vec_aux(ig,lam) 242 enddo 243 enddo 244 END DO 245 ENDIF 246 247 ELSE 248 DO iorb=1, npol*norb 249 DO iorb1=1, npol*norb 250 DO lam=1, n2d 251 intw2(iorb, iorb1)=intw2(iorb, iorb1)+ & 252 vec(lam, 2*n2d+iorb1)*intw1(iorb, lam) 253 ENDDO 254 ENDDO 255 ENDDO 256 DO iorb=1, npol*norb 257 x=(0.d0,0.d0) 258 DO n=1, n2d 259 DO lam=1, n2d 260 x(n)=x(n)+vec(lam, n)*intw1(iorb, lam) 261 intw1(iorb, n2d+n)=intw1(iorb, n2d+n)+ & 262 vec(lam, n2d+n)*intw1(iorb, lam) 263 ENDDO 264 ENDDO 265 DO n=1, n2d 266 intw1(iorb, n)=x(n) 267 ENDDO 268 ENDDO 269 270 IF (lorb) THEN 271 DO n = 1, nrzp 272 CALL zgemm('n','n',n2d,n2d,n2d,one,funz0(1,1,n),n2d,& 273 vec(1,n2d+1),2*n2d,one,funz0(1,n2d+1,n),n2d) 274 CALL zgemm('n','n',n2d,npol*norb,n2d,one,funz0(1,1,n),n2d,& 275 vec(1,2*n2d+1),2*n2d,one,funz0(1,2*n2d+1,n),n2d) 276 CALL zgemm('n','n',n2d,n2d,n2d,one,funz0(1,1,n),n2d,& 277 vec(1,1),2*n2d,zero,vec_aux(1,1),2*n2d) 278 do ig = 1, n2d 279 do lam = 1, n2d 280 funz0(ig,lam,n) = vec_aux(ig,lam) 281 enddo 282 enddo 283 END DO 284 ENDIF 285 286 ENDIF 287 288! 289! to next matching step 290! 291 n=(ib+ninsl-1)/(2*ninsl) 292 IF(n/2*2.EQ.n) THEN 293 ib=ib-ninsl 294 ELSE 295 ib=ib+ninsl 296 ENDIF 297 ninsl=ninsl*2 298 ibound=ibound/2 299 300 CALL mpi_comm_free(new_comm, info) 301 ENDDO 302 303! 304! Broadcast of the functions and the integrals to all CPU 305! 306 CALL mpi_bcast(fun0, 2*n2d*2*n2d, MPI_DOUBLE_PRECISION, 0, & 307 MPI_COMM_WORLD, info) 308 CALL mpi_bcast(fund0, 2*n2d*2*n2d, MPI_DOUBLE_PRECISION, 0, & 309 MPI_COMM_WORLD, info) 310 CALL mpi_bcast(funl0, 2*n2d*npol*norbf, MPI_DOUBLE_PRECISION, 0, & 311 MPI_COMM_WORLD, info) 312 CALL mpi_bcast(fundl0, 2*n2d*npol*norbf, MPI_DOUBLE_PRECISION, 0, & 313 MPI_COMM_WORLD, info) 314 315 CALL mpi_bcast(fun1, 2*n2d*2*n2d, MPI_DOUBLE_PRECISION, nproc-1, & 316 MPI_COMM_WORLD, info) 317 CALL mpi_bcast(fund1, 2*n2d*2*n2d, MPI_DOUBLE_PRECISION, nproc-1, & 318 MPI_COMM_WORLD, info) 319 CALL mpi_bcast(funl1, 2*n2d*npol*norbf, MPI_DOUBLE_PRECISION, nproc-1, & 320 MPI_COMM_WORLD, info) 321 CALL mpi_bcast(fundl1, 2*n2d*npol*norbf, MPI_DOUBLE_PRECISION, nproc-1, & 322 MPI_COMM_WORLD, info) 323 324! 325! Gathering of the integrals 326! 327 CALL mp_sum( intw1, intra_pool_comm ) 328 CALL mp_sum( intw2, intra_pool_comm ) 329 330 DEALLOCATE(x) 331 DEALLOCATE(y) 332 DEALLOCATE(amat) 333 DEALLOCATE(amat_aux) 334 DEALLOCATE(vec) 335 DEALLOCATE(vec_aux) 336 DEALLOCATE(ipiv) 337 338#endif 339 RETURN 340END SUBROUTINE rotproc 341