1!--------------------------------------------------------------------------------------------------! 2! CP2K: A general program to perform molecular dynamics simulations ! 3! Copyright (C) 2000 - 2020 CP2K developers group ! 4!--------------------------------------------------------------------------------------------------! 5 6! ************************************************************************************************** 7!> \brief Localization methods such as 2x2 Jacobi rotations 8!> Steepest Decents 9!> Conjugate Gradient 10!> \par History 11!> Initial parallellization of jacobi (JVDV 07.2003) 12!> direct minimization using exponential parametrization (JVDV 09.2003) 13!> crazy rotations go fast (JVDV 10.2003) 14!> \author CJM (04.2003) 15! ************************************************************************************************** 16MODULE qs_localization_methods 17 USE cell_types, ONLY: cell_type 18 USE cp_blacs_env, ONLY: cp_blacs_env_type 19 USE cp_cfm_basic_linalg, ONLY: cp_cfm_column_scale,& 20 cp_cfm_gemm,& 21 cp_cfm_schur_product 22 USE cp_cfm_diag, ONLY: cp_cfm_heevd 23 USE cp_cfm_types, ONLY: cp_cfm_create,& 24 cp_cfm_get_element,& 25 cp_cfm_get_info,& 26 cp_cfm_p_type,& 27 cp_cfm_release,& 28 cp_cfm_set_all,& 29 cp_cfm_to_cfm,& 30 cp_cfm_type 31 USE cp_dbcsr_operations, ONLY: cp_dbcsr_sm_fm_multiply 32 USE cp_external_control, ONLY: external_control 33 USE cp_fm_basic_linalg, ONLY: cp_fm_frobenius_norm,& 34 cp_fm_pdgeqpf,& 35 cp_fm_pdorgqr,& 36 cp_fm_scale,& 37 cp_fm_scale_and_add,& 38 cp_fm_trace,& 39 cp_fm_transpose 40 USE cp_fm_diag, ONLY: cp_fm_syevd 41 USE cp_fm_struct, ONLY: cp_fm_struct_create,& 42 cp_fm_struct_get,& 43 cp_fm_struct_release,& 44 cp_fm_struct_type 45 USE cp_fm_types, ONLY: & 46 cp_fm_create, cp_fm_get_element, cp_fm_get_info, cp_fm_get_submatrix, cp_fm_maxabsrownorm, & 47 cp_fm_maxabsval, cp_fm_p_type, cp_fm_release, cp_fm_set_all, cp_fm_set_submatrix, & 48 cp_fm_to_fm, cp_fm_to_fm_submat, cp_fm_type 49 USE cp_gemm_interface, ONLY: cp_gemm 50 USE cp_log_handling, ONLY: cp_logger_get_default_io_unit,& 51 cp_logger_get_default_unit_nr 52 USE cp_para_types, ONLY: cp_para_env_type 53 USE dbcsr_api, ONLY: dbcsr_p_type 54 USE kinds, ONLY: dp 55 USE machine, ONLY: m_flush,& 56 m_walltime 57 USE mathconstants, ONLY: pi,& 58 twopi 59 USE matrix_exp, ONLY: exp_pade_real,& 60 get_nsquare_norder 61 USE message_passing, ONLY: mp_allgather,& 62 mp_bcast,& 63 mp_max,& 64 mp_sendrecv,& 65 mp_sum,& 66 mp_sync 67#include "./base/base_uses.f90" 68 69 IMPLICIT NONE 70 PUBLIC :: initialize_weights, crazy_rotations, & 71 direct_mini, rotate_orbitals, approx_l1_norm_sd, jacobi_rotations, scdm_qrfact, zij_matrix 72 73 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_localization_methods' 74 75 PRIVATE 76 77 TYPE set_c_1d_type 78 COMPLEX(KIND=dp), POINTER, DIMENSION(:) :: c_array 79 END TYPE 80 TYPE set_c_2d_type 81 COMPLEX(KIND=dp), POINTER, DIMENSION(:, :) :: c_array 82 END TYPE 83 84CONTAINS 85! ************************************************************************************************** 86!> \brief ... 87!> \param C ... 88!> \param iterations ... 89!> \param eps ... 90!> \param converged ... 91!> \param sweeps ... 92! ************************************************************************************************** 93 SUBROUTINE approx_l1_norm_sd(C, iterations, eps, converged, sweeps) 94 TYPE(cp_fm_type), POINTER :: C 95 INTEGER, INTENT(IN) :: iterations 96 REAL(KIND=dp), INTENT(IN) :: eps 97 LOGICAL, INTENT(INOUT) :: converged 98 INTEGER, INTENT(INOUT) :: sweeps 99 100 CHARACTER(len=*), PARAMETER :: routineN = 'approx_l1_norm_sd', & 101 routineP = moduleN//':'//routineN 102 INTEGER, PARAMETER :: taylor_order = 100 103 REAL(KIND=dp), PARAMETER :: alpha = 0.1_dp, f2_eps = 0.01_dp 104 105 INTEGER :: handle, i, istep, k, n, ncol_local, & 106 nrow_local, output_unit, p 107 REAL(KIND=dp) :: expfactor, f2, f2old, gnorm, tnorm 108 TYPE(cp_blacs_env_type), POINTER :: context 109 TYPE(cp_fm_struct_type), POINTER :: fm_struct_k_k 110 TYPE(cp_fm_type), POINTER :: CTmp, G, Gp1, Gp2, U 111 TYPE(cp_para_env_type), POINTER :: para_env 112 113 CALL timeset(routineN, handle) 114 115 NULLIFY (CTmp, U, G, Gp1, Gp2, context, para_env, fm_struct_k_k) 116 117 output_unit = cp_logger_get_default_io_unit() 118 119 CALL cp_fm_struct_get(C%matrix_struct, nrow_global=n, ncol_global=k, & 120 nrow_local=nrow_local, ncol_local=ncol_local, & 121 para_env=para_env, context=context) 122 CALL cp_fm_struct_create(fm_struct_k_k, para_env=para_env, context=context, & 123 nrow_global=k, ncol_global=k) 124 CALL cp_fm_create(CTmp, C%matrix_struct) 125 CALL cp_fm_create(U, fm_struct_k_k) 126 CALL cp_fm_create(G, fm_struct_k_k) 127 CALL cp_fm_create(Gp1, fm_struct_k_k) 128 CALL cp_fm_create(Gp2, fm_struct_k_k) 129 ! 130 ! printing 131 IF (output_unit > 0) THEN 132 WRITE (output_unit, '(1X)') 133 WRITE (output_unit, '(2X,A)') '-----------------------------------------------------------------------------' 134 WRITE (output_unit, '(A,I5)') ' Nbr iterations =', iterations 135 WRITE (output_unit, '(A,E10.2)') ' eps convergence =', eps 136 WRITE (output_unit, '(A,I5)') ' Max Taylor order =', taylor_order 137 WRITE (output_unit, '(A,E10.2)') ' f2 eps =', f2_eps 138 WRITE (output_unit, '(A,E10.2)') ' alpha =', alpha 139 WRITE (output_unit, '(A)') ' iteration approx_l1_norm g_norm rel_err' 140 ENDIF 141 ! 142 f2old = 0.0_dp 143 converged = .FALSE. 144 ! 145 ! Start the steepest descent 146 DO istep = 1, iterations 147 ! 148 !------------------------------------------------------------------- 149 ! compute f_2 150 ! f_2(x)=(x^2+eps)^1/2 151 f2 = 0.0_dp 152 DO p = 1, ncol_local ! p 153 DO i = 1, nrow_local ! i 154 f2 = f2 + SQRT(C%local_data(i, p)**2 + f2_eps) 155 ENDDO 156 ENDDO 157 CALL mp_sum(f2, C%matrix_struct%para_env%group) 158 !write(*,*) 'qs_localize: f_2=',f2 159 !------------------------------------------------------------------- 160 ! compute the derivative of f_2 161 ! f_2(x)=(x^2+eps)^1/2 162 DO p = 1, ncol_local ! p 163 DO i = 1, nrow_local ! i 164 CTmp%local_data(i, p) = C%local_data(i, p)/SQRT(C%local_data(i, p)**2 + f2_eps) 165 ENDDO 166 ENDDO 167 CALL cp_gemm('T', 'N', k, k, n, 1.0_dp, CTmp, C, 0.0_dp, G) 168 ! antisymmetrize 169 CALL cp_fm_transpose(G, U) 170 CALL cp_fm_scale_and_add(-0.5_dp, G, 0.5_dp, U) 171 ! 172 !------------------------------------------------------------------- 173 ! 174 CALL cp_fm_frobenius_norm(G, gnorm) 175 !write(*,*) 'qs_localize: norm(G)=',gnorm 176 ! 177 ! rescale for steepest descent 178 CALL cp_fm_scale(-alpha, G) 179 ! 180 ! compute unitary transform 181 ! zeroth order 182 CALL cp_fm_set_all(U, 0.0_dp, 1.0_dp) 183 ! first order 184 expfactor = 1.0_dp 185 CALL cp_fm_scale_and_add(1.0_dp, U, expfactor, G) 186 CALL cp_fm_frobenius_norm(G, tnorm) 187 !write(*,*) 'Taylor expansion i=',1,' norm(X^i)/i!=',tnorm 188 IF (tnorm .GT. 1.0E-10_dp) THEN 189 ! other orders 190 CALL cp_fm_to_fm(G, Gp1) 191 DO i = 2, taylor_order 192 ! new power of G 193 CALL cp_gemm('N', 'N', k, k, k, 1.0_dp, G, Gp1, 0.0_dp, Gp2) 194 CALL cp_fm_to_fm(Gp2, Gp1) 195 ! add to the taylor expansion so far 196 expfactor = expfactor/REAL(i, KIND=dp) 197 CALL cp_fm_scale_and_add(1.0_dp, U, expfactor, Gp1) 198 CALL cp_fm_frobenius_norm(Gp1, tnorm) 199 !write(*,*) 'Taylor expansion i=',i,' norm(X^i)/i!=',tnorm*expfactor 200 IF (tnorm*expfactor .LT. 1.0E-10_dp) EXIT 201 ENDDO 202 ENDIF 203 ! 204 ! incrementaly rotate the MOs 205 CALL cp_gemm('N', 'N', n, k, k, 1.0_dp, C, U, 0.0_dp, CTmp) 206 CALL cp_fm_to_fm(CTmp, C) 207 ! 208 ! printing 209 IF (output_unit .GT. 0) THEN 210 WRITE (output_unit, '(10X,I4,E18.10,2E10.2)') istep, f2, gnorm, ABS((f2 - f2old)/f2) 211 ENDIF 212 ! 213 ! Are we done? 214 sweeps = istep 215 !IF(gnorm.LE.grad_thresh.AND.ABS((f2-f2old)/f2).LE.f2_thresh.AND.istep.GT.1) THEN 216 IF (ABS((f2 - f2old)/f2) .LE. eps .AND. istep .GT. 1) THEN 217 converged = .TRUE. 218 EXIT 219 ENDIF 220 f2old = f2 221 ENDDO 222 ! 223 ! here we should do one refine step to enforce C'*S*C=1 for any case 224 ! 225 ! Print the final result 226 IF (output_unit .GT. 0) WRITE (output_unit, '(A,E16.10)') ' sparseness function f2 = ', f2 227 ! 228 ! sparsity 229 !DO i=1,size(thresh,1) 230 ! gnorm = 0.0_dp 231 ! DO o=1,ncol_local 232 ! DO p=1,nrow_local 233 ! IF(ABS(C%local_data(p,o)).GT.thresh(i)) THEN 234 ! gnorm = gnorm + 1.0_dp 235 ! ENDIF 236 ! ENDDO 237 ! ENDDO 238 ! CALL mp_sum(gnorm,C%matrix_struct%para_env%group) 239 ! IF(output_unit.GT.0) THEN 240 ! WRITE(output_unit,*) 'qs_localize: ratio2=',gnorm / ( REAL(k,KIND=dp)*REAL(n,KIND=dp) ),thresh(i) 241 ! ENDIF 242 !ENDDO 243 ! 244 ! deallocate 245 CALL cp_fm_struct_release(fm_struct_k_k) 246 CALL cp_fm_release(CTmp) 247 CALL cp_fm_release(U) 248 CALL cp_fm_release(G) 249 CALL cp_fm_release(Gp1) 250 CALL cp_fm_release(Gp2) 251 252 CALL timestop(handle) 253 254 END SUBROUTINE approx_l1_norm_sd 255! ************************************************************************************************** 256!> \brief ... 257!> \param cell ... 258!> \param weights ... 259! ************************************************************************************************** 260 SUBROUTINE initialize_weights(cell, weights) 261 TYPE(cell_type), INTENT(IN) :: cell 262 REAL(KIND=dp), DIMENSION(:) :: weights 263 264 REAL(KIND=dp), DIMENSION(3, 3) :: metric 265 266 metric = 0.0_dp 267 CALL dgemm('T', 'N', 3, 3, 3, 1._dp, cell%hmat, 3, cell%hmat, 3, 0.0_dp, metric, 3) 268 269 weights(1) = METRIC(1, 1) - METRIC(1, 2) - METRIC(1, 3) 270 weights(2) = METRIC(2, 2) - METRIC(1, 2) - METRIC(2, 3) 271 weights(3) = METRIC(3, 3) - METRIC(1, 3) - METRIC(2, 3) 272 weights(4) = METRIC(1, 2) 273 weights(5) = METRIC(1, 3) 274 weights(6) = METRIC(2, 3) 275 276 END SUBROUTINE initialize_weights 277 278! ************************************************************************************************** 279!> \brief wrapper for the jacobi routines, should be removed if jacobi_rot_para 280!> can deal with serial para_envs. 281!> \param weights ... 282!> \param zij ... 283!> \param vectors ... 284!> \param para_env ... 285!> \param max_iter ... 286!> \param eps_localization ... 287!> \param sweeps ... 288!> \param out_each ... 289!> \param target_time ... 290!> \param start_time ... 291!> \par History 292!> \author Joost VandeVondele (02.2010) 293! ************************************************************************************************** 294 SUBROUTINE jacobi_rotations(weights, zij, vectors, para_env, max_iter, eps_localization, & 295 sweeps, out_each, target_time, start_time) 296 297 REAL(KIND=dp), INTENT(IN) :: weights(:) 298 TYPE(cp_fm_p_type), INTENT(INOUT) :: ZIJ(:, :) 299 TYPE(cp_fm_type), POINTER :: vectors 300 TYPE(cp_para_env_type), POINTER :: para_env 301 INTEGER, INTENT(IN) :: max_iter 302 REAL(KIND=dp), INTENT(IN) :: eps_localization 303 INTEGER :: sweeps 304 INTEGER, INTENT(IN) :: out_each 305 REAL(dp) :: target_time, start_time 306 307 IF (para_env%num_pe == 1) THEN 308 CALL jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, sweeps, out_each) 309 ELSE 310 CALL jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, & 311 sweeps, out_each, target_time, start_time) 312 ENDIF 313 314 END SUBROUTINE jacobi_rotations 315 316! ************************************************************************************************** 317!> \brief this routine, private to the module is a serial backup, till we have jacobi_rot_para to work in serial 318!> while the routine below works in parallel, it is too slow to be useful 319!> \param weights ... 320!> \param zij ... 321!> \param vectors ... 322!> \param max_iter ... 323!> \param eps_localization ... 324!> \param sweeps ... 325!> \param out_each ... 326! ************************************************************************************************** 327 SUBROUTINE jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, sweeps, out_each) 328 REAL(KIND=dp), INTENT(IN) :: weights(:) 329 TYPE(cp_fm_p_type), INTENT(INOUT) :: ZIJ(:, :) 330 TYPE(cp_fm_type), POINTER :: vectors 331 INTEGER, INTENT(IN) :: max_iter 332 REAL(KIND=dp), INTENT(IN) :: eps_localization 333 INTEGER :: sweeps 334 INTEGER, INTENT(IN) :: out_each 335 336 CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rotations_serial', & 337 routineP = moduleN//':'//routineN 338 339 COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:) 340 INTEGER :: dim2, handle, idim, istate, jstate, & 341 nstate, unit_nr 342 REAL(KIND=dp) :: ct, st, t1, t2, theta, tolerance 343 TYPE(cp_cfm_p_type), DIMENSION(:), POINTER :: c_zij 344 TYPE(cp_cfm_type), POINTER :: c_rmat 345 TYPE(cp_fm_type), POINTER :: rmat 346 347 CALL timeset(routineN, handle) 348 349 dim2 = SIZE(zij, 2) 350 NULLIFY (rmat, c_rmat, c_zij) 351 ALLOCATE (c_zij(dim2)) 352 NULLIFY (mii, mij, mjj) 353 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2)) 354 355 CALL cp_fm_create(rmat, zij(1, 1)%matrix%matrix_struct) 356 CALL cp_fm_set_all(rmat, 0._dp, 1._dp) 357 358 CALL cp_cfm_create(c_rmat, zij(1, 1)%matrix%matrix_struct) 359 CALL cp_cfm_set_all(c_rmat, (0._dp, 0._dp), (1._dp, 0._dp)) 360 DO idim = 1, dim2 361 NULLIFY (c_zij(idim)%matrix) 362 CALL cp_cfm_create(c_zij(idim)%matrix, zij(1, 1)%matrix%matrix_struct) 363 c_zij(idim)%matrix%local_data = CMPLX(zij(1, idim)%matrix%local_data, & 364 zij(2, idim)%matrix%local_data, dp) 365 ENDDO 366 367 CALL cp_fm_get_info(rmat, nrow_global=nstate) 368 tolerance = 1.0e10_dp 369 sweeps = 0 370 unit_nr = -1 371 IF (rmat%matrix_struct%para_env%mepos .EQ. rmat%matrix_struct%para_env%source) THEN 372 unit_nr = cp_logger_get_default_unit_nr() 373 WRITE (unit_nr, '(T4,A )') " Localization by iterative Jacobi rotation" 374 END IF 375 ! do jacobi sweeps until converged 376 DO WHILE (tolerance >= eps_localization .AND. sweeps < max_iter) 377 sweeps = sweeps + 1 378 t1 = m_walltime() 379 DO istate = 1, nstate 380 DO jstate = istate + 1, nstate 381 DO idim = 1, dim2 382 CALL cp_cfm_get_element(c_zij(idim)%matrix, istate, istate, mii(idim)) 383 CALL cp_cfm_get_element(c_zij(idim)%matrix, istate, jstate, mij(idim)) 384 CALL cp_cfm_get_element(c_zij(idim)%matrix, jstate, jstate, mjj(idim)) 385 END DO 386 CALL get_angle(mii, mjj, mij, weights, theta) 387 st = SIN(theta) 388 ct = COS(theta) 389 CALL rotate_zij(istate, jstate, st, ct, c_zij) 390 CALL rotate_rmat(istate, jstate, st, ct, c_rmat) 391 END DO 392 END DO 393 CALL check_tolerance(c_zij, weights, tolerance) 394 t2 = m_walltime() 395 IF (unit_nr > 0 .AND. MODULO(sweeps, out_each) == 0) THEN 396 WRITE (unit_nr, '(T4,A,I7,T30,A,E12.4,T60,A,F8.3)') & 397 "Iteration:", sweeps, "Tolerance:", tolerance, "Time:", t2 - t1 398 CALL m_flush(unit_nr) 399 ENDIF 400 END DO 401 402 DO idim = 1, dim2 403 zij(1, idim)%matrix%local_data = REAL(c_zij(idim)%matrix%local_data, dp) 404 zij(2, idim)%matrix%local_data = AIMAG(c_zij(idim)%matrix%local_data) 405 CALL cp_cfm_release(c_zij(idim)%matrix) 406 ENDDO 407 DEALLOCATE (c_zij) 408 DEALLOCATE (mii, mij, mjj) 409 rmat%local_data = REAL(c_rmat%local_data, dp) 410 CALL cp_cfm_release(c_rmat) 411 CALL rotate_orbitals(rmat, vectors) 412 CALL cp_fm_release(rmat) 413 414 CALL timestop(handle) 415 416 END SUBROUTINE jacobi_rotations_serial 417! ************************************************************************************************** 418!> \brief ... 419!> \param istate ... 420!> \param jstate ... 421!> \param st ... 422!> \param ct ... 423!> \param zij ... 424! ************************************************************************************************** 425 SUBROUTINE rotate_zij(istate, jstate, st, ct, zij) 426 IMPLICIT NONE 427 INTEGER, INTENT(IN) :: istate, jstate 428 TYPE(cp_cfm_p_type) :: zij(:) 429 REAL(KIND=dp), INTENT(IN) :: st, ct 430 ! Locals 431 INTEGER :: idim, nstate 432 COMPLEX(KIND=dp) :: st_cmplx 433#if defined(__SCALAPACK) 434 INTEGER, DIMENSION(9) :: desc 435#else 436 INTEGER :: stride 437#endif 438 439 st_cmplx = CMPLX(st, 0.0_dp, dp) 440 CALL cp_cfm_get_info(zij(1)%matrix, nrow_global=nstate) 441 DO idim = 1, SIZE(zij, 1) 442#if defined(__SCALAPACK) 443 desc(:) = zij(idim)%matrix%matrix_struct%descriptor(:) 444 CALL pzrot(nstate, zij(idim)%matrix%local_data(1, 1), 1, istate, desc, 1, & 445 zij(idim)%matrix%local_data(1, 1), 1, jstate, desc, 1, ct, st_cmplx) 446 CALL pzrot(nstate, zij(idim)%matrix%local_data(1, 1), istate, 1, desc, nstate, & 447 zij(idim)%matrix%local_data(1, 1), jstate, 1, desc, nstate, ct, st_cmplx) 448#else 449 CALL zrot(nstate, zij(idim)%matrix%local_data(1, istate), 1, & 450 zij(idim)%matrix%local_data(1, jstate), 1, ct, st_cmplx) 451 stride = SIZE(zij(idim)%matrix%local_data, 1) 452 CALL zrot(nstate, zij(idim)%matrix%local_data(istate, 1), stride, & 453 zij(idim)%matrix%local_data(jstate, 1), stride, ct, st_cmplx) 454#endif 455 END DO 456 END SUBROUTINE rotate_zij 457! ************************************************************************************************** 458!> \brief ... 459!> \param istate ... 460!> \param jstate ... 461!> \param st ... 462!> \param ct ... 463!> \param rmat ... 464! ************************************************************************************************** 465 SUBROUTINE rotate_rmat(istate, jstate, st, ct, rmat) 466 IMPLICIT NONE 467 INTEGER, INTENT(IN) :: istate, jstate 468 TYPE(cp_cfm_type), POINTER :: rmat 469 REAL(KIND=dp), INTENT(IN) :: ct, st 470! Locals 471 INTEGER :: nstate 472 COMPLEX(KIND=dp) :: st_cmplx 473#if defined(__SCALAPACK) 474 INTEGER, DIMENSION(9) :: desc 475#endif 476 477 st_cmplx = CMPLX(st, 0.0_dp, dp) 478 CALL cp_cfm_get_info(rmat, nrow_global=nstate) 479#if defined(__SCALAPACK) 480 desc(:) = rmat%matrix_struct%descriptor(:) 481 CALL pzrot(nstate, rmat%local_data(1, 1), 1, istate, desc, 1, & 482 rmat%local_data(1, 1), 1, jstate, desc, 1, ct, st_cmplx) 483#else 484 CALL zrot(nstate, rmat%local_data(1, istate), 1, rmat%local_data(1, jstate), 1, ct, st_cmplx) 485#endif 486 END SUBROUTINE rotate_rmat 487! ************************************************************************************************** 488!> \brief ... 489!> \param mii ... 490!> \param mjj ... 491!> \param mij ... 492!> \param weights ... 493!> \param theta ... 494! ************************************************************************************************** 495 SUBROUTINE get_angle(mii, mjj, mij, weights, theta) 496 COMPLEX(KIND=dp), POINTER :: mii(:), mjj(:), mij(:) 497 REAL(KIND=dp), INTENT(IN) :: weights(:) 498 REAL(KIND=dp), INTENT(OUT) :: theta 499 500 COMPLEX(KIND=dp) :: z11, z12, z22 501 INTEGER :: dim_m, idim 502 REAL(KIND=dp) :: a12, b12, d2, ratio 503 504 a12 = 0.0_dp 505 b12 = 0.0_dp 506 dim_m = SIZE(mii) 507 DO idim = 1, dim_m 508 z11 = mii(idim) 509 z22 = mjj(idim) 510 z12 = mij(idim) 511 a12 = a12 + weights(idim)*REAL(CONJG(z12)*(z11 - z22), KIND=dp) 512 b12 = b12 + weights(idim)*REAL((z12*CONJG(z12) - & 513 0.25_dp*(z11 - z22)*(CONJG(z11) - CONJG(z22))), KIND=dp) 514 END DO 515 IF (ABS(b12) > 1.e-10_dp) THEN 516 ratio = -a12/b12 517 theta = 0.25_dp*ATAN(ratio) 518 ELSEIF (ABS(b12) < 1.e-10_dp) THEN 519 b12 = 0.0_dp 520 theta = 0.0_dp 521 ELSE 522 theta = 0.25_dp*pi 523 ENDIF 524! Check second derivative info 525 d2 = a12*SIN(4._dp*theta) - b12*COS(4._dp*theta) 526 IF (d2 <= 0._dp) THEN ! go to the maximum, not the minimum 527 IF (theta > 0.0_dp) THEN ! make theta as small as possible 528 theta = theta - 0.25_dp*pi 529 ELSE 530 theta = theta + 0.25_dp*pi 531 ENDIF 532 ENDIF 533 END SUBROUTINE get_angle 534! ************************************************************************************************** 535!> \brief ... 536!> \param zij ... 537!> \param weights ... 538!> \param tolerance ... 539! ************************************************************************************************** 540 SUBROUTINE check_tolerance(zij, weights, tolerance) 541 TYPE(cp_cfm_p_type) :: zij(:) 542 REAL(KIND=dp), INTENT(IN) :: weights(:) 543 REAL(KIND=dp), INTENT(OUT) :: tolerance 544 545 CHARACTER(len=*), PARAMETER :: routineN = 'check_tolerance', & 546 routineP = moduleN//':'//routineN 547 548 INTEGER :: handle 549 TYPE(cp_fm_type), POINTER :: force 550 551 CALL timeset(routineN, handle) 552 553! compute gradient at t=0 554 555 NULLIFY (force) 556 CALL cp_fm_create(force, zij(1)%matrix%matrix_struct) 557 CALL cp_fm_set_all(force, 0._dp) 558 CALL grad_at_0(zij, weights, force) 559 CALL cp_fm_maxabsval(force, tolerance) 560 CALL cp_fm_release(force) 561 562 CALL timestop(handle) 563 564 END SUBROUTINE check_tolerance 565! ************************************************************************************************** 566!> \brief ... 567!> \param rmat ... 568!> \param vectors ... 569! ************************************************************************************************** 570 SUBROUTINE rotate_orbitals(rmat, vectors) 571 TYPE(cp_fm_type), POINTER :: rmat, vectors 572 573 INTEGER :: k, n 574 TYPE(cp_fm_type), POINTER :: wf 575 576 NULLIFY (wf) 577 CALL cp_fm_create(wf, vectors%matrix_struct) 578 CALL cp_fm_get_info(vectors, nrow_global=n, ncol_global=k) 579 CALL cp_gemm("N", "N", n, k, k, 1.0_dp, vectors, rmat, 0.0_dp, wf) 580 CALL cp_fm_to_fm(wf, vectors) 581 CALL cp_fm_release(wf) 582 END SUBROUTINE rotate_orbitals 583! ************************************************************************************************** 584!> \brief ... 585!> \param diag ... 586!> \param weights ... 587!> \param matrix ... 588!> \param ndim ... 589! ************************************************************************************************** 590 SUBROUTINE gradsq_at_0(diag, weights, matrix, ndim) 591 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag 592 REAL(KIND=dp), INTENT(IN) :: weights(:) 593 TYPE(cp_fm_type), POINTER :: matrix 594 INTEGER, INTENT(IN) :: ndim 595 596 COMPLEX(KIND=dp) :: zii, zjj 597 INTEGER :: idim, istate, jstate, ncol_local, & 598 nrow_global, nrow_local 599 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices 600 REAL(KIND=dp) :: gradsq_ij 601 602 CALL cp_fm_get_info(matrix, nrow_local=nrow_local, & 603 ncol_local=ncol_local, nrow_global=nrow_global, & 604 row_indices=row_indices, col_indices=col_indices) 605 606 DO istate = 1, nrow_local 607 DO jstate = 1, ncol_local 608! get real and imaginary parts 609 gradsq_ij = 0.0_dp 610 DO idim = 1, ndim 611 zii = diag(row_indices(istate), idim) 612 zjj = diag(col_indices(jstate), idim) 613 gradsq_ij = gradsq_ij + weights(idim)* & 614 4.0_dp*REAL((CONJG(zii)*zii + CONJG(zjj)*zjj), KIND=dp) 615 END DO 616 matrix%local_data(istate, jstate) = gradsq_ij 617 END DO 618 END DO 619 END SUBROUTINE gradsq_at_0 620! ************************************************************************************************** 621!> \brief ... 622!> \param matrix_p ... 623!> \param weights ... 624!> \param matrix ... 625! ************************************************************************************************** 626 SUBROUTINE grad_at_0(matrix_p, weights, matrix) 627 TYPE(cp_cfm_p_type) :: matrix_p(:) 628 REAL(KIND=dp), INTENT(IN) :: weights(:) 629 TYPE(cp_fm_type), POINTER :: matrix 630 631 COMPLEX(KIND=dp) :: zii, zij, zjj 632 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag 633 INTEGER :: dim_m, idim, istate, jstate, ncol_local, & 634 nrow_global, nrow_local 635 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices 636 REAL(KIND=dp) :: grad_ij 637 638 NULLIFY (diag) 639 CALL cp_fm_get_info(matrix, nrow_local=nrow_local, & 640 ncol_local=ncol_local, nrow_global=nrow_global, & 641 row_indices=row_indices, col_indices=col_indices) 642 dim_m = SIZE(matrix_p, 1) 643 ALLOCATE (diag(nrow_global, dim_m)) 644 645 DO idim = 1, dim_m 646 DO istate = 1, nrow_global 647 CALL cp_cfm_get_element(matrix_p(idim)%matrix, istate, istate, diag(istate, idim)) 648 ENDDO 649 ENDDO 650 651 DO istate = 1, nrow_local 652 DO jstate = 1, ncol_local 653! get real and imaginary parts 654 grad_ij = 0.0_dp 655 DO idim = 1, dim_m 656 zii = diag(row_indices(istate), idim) 657 zjj = diag(col_indices(jstate), idim) 658 zij = matrix_p(idim)%matrix%local_data(istate, jstate) 659 grad_ij = grad_ij + weights(idim)* & 660 REAL(4.0_dp*CONJG(zij)*(zjj - zii), dp) 661 END DO 662 matrix%local_data(istate, jstate) = grad_ij 663 END DO 664 END DO 665 DEALLOCATE (diag) 666 END SUBROUTINE grad_at_0 667 668! return energy and maximum gradient in the current point 669! ************************************************************************************************** 670!> \brief ... 671!> \param weights ... 672!> \param zij ... 673!> \param tolerance ... 674!> \param value ... 675! ************************************************************************************************** 676 SUBROUTINE check_tolerance_new(weights, zij, tolerance, value) 677 REAL(KIND=dp), INTENT(IN) :: weights(:) 678 TYPE(cp_fm_p_type), INTENT(INOUT) :: ZIJ(:, :) 679 REAL(KIND=dp) :: tolerance, value 680 681 COMPLEX(KIND=dp) :: kii, kij, kjj 682 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag 683 INTEGER :: idim, istate, jstate, ncol_local, & 684 nrow_global, nrow_local 685 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices 686 REAL(KIND=dp) :: grad_ij, ra, rb 687 688 NULLIFY (diag) 689 CALL cp_fm_get_info(zij(1, 1)%matrix, nrow_local=nrow_local, & 690 ncol_local=ncol_local, nrow_global=nrow_global, & 691 row_indices=row_indices, col_indices=col_indices) 692 ALLOCATE (diag(nrow_global, SIZE(zij, 2))) 693 value = 0.0_dp 694 DO idim = 1, SIZE(zij, 2) 695 DO istate = 1, nrow_global 696 CALL cp_fm_get_element(zij(1, idim)%matrix, istate, istate, ra) 697 CALL cp_fm_get_element(zij(2, idim)%matrix, istate, istate, rb) 698 diag(istate, idim) = CMPLX(ra, rb, dp) 699 value = value + weights(idim) - weights(idim)*ABS(diag(istate, idim))**2 700 ENDDO 701 ENDDO 702 tolerance = 0.0_dp 703 DO istate = 1, nrow_local 704 DO jstate = 1, ncol_local 705 grad_ij = 0.0_dp 706 DO idim = 1, SIZE(zij, 2) 707 kii = diag(row_indices(istate), idim) 708 kjj = diag(col_indices(jstate), idim) 709 ra = zij(1, idim)%matrix%local_data(istate, jstate) 710 rb = zij(2, idim)%matrix%local_data(istate, jstate) 711 kij = CMPLX(ra, rb, dp) 712 grad_ij = grad_ij + weights(idim)* & 713 REAL(4.0_dp*CONJG(kij)*(kjj - kii), dp) 714 END DO 715 tolerance = MAX(ABS(grad_ij), tolerance) 716 ENDDO 717 ENDDO 718 CALL mp_max(tolerance, zij(1, 1)%matrix%matrix_struct%para_env%group) 719 720 DEALLOCATE (diag) 721 722 END SUBROUTINE check_tolerance_new 723 724! ************************************************************************************************** 725!> \brief yet another crazy try, computes the angles needed to rotate the orbitals first 726!> and rotates them all at the same time (hoping for the best of course) 727!> \param weights ... 728!> \param zij ... 729!> \param vectors ... 730!> \param max_iter ... 731!> \param max_crazy_angle ... 732!> \param crazy_scale ... 733!> \param crazy_use_diag ... 734!> \param eps_localization ... 735!> \param iterations ... 736!> \param converged ... 737! ************************************************************************************************** 738 SUBROUTINE crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, & 739 eps_localization, iterations, converged) 740 REAL(KIND=dp), INTENT(IN) :: weights(:) 741 TYPE(cp_fm_p_type), INTENT(INOUT) :: ZIJ(:, :) 742 TYPE(cp_fm_type), POINTER :: vectors 743 INTEGER, INTENT(IN) :: max_iter 744 REAL(KIND=dp), INTENT(IN) :: max_crazy_angle 745 REAL(KIND=dp) :: crazy_scale 746 LOGICAL :: crazy_use_diag 747 REAL(KIND=dp), INTENT(IN) :: eps_localization 748 INTEGER :: iterations 749 LOGICAL, INTENT(out), OPTIONAL :: converged 750 751 CHARACTER(len=*), PARAMETER :: routineN = 'crazy_rotations', & 752 routineP = moduleN//':'//routineN 753 COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), & 754 czero = (0.0_dp, 0.0_dp) 755 756 COMPLEX(KIND=dp), DIMENSION(:), POINTER :: evals_exp 757 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag_z 758 COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:) 759 INTEGER :: dim2, handle, i, icol, idim, irow, & 760 method, ncol_global, ncol_local, & 761 norder, nrow_global, nrow_local, & 762 nsquare, unit_nr 763 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices 764 LOGICAL :: do_emd 765 REAL(KIND=dp) :: eps_exp, limit_crazy_angle, maxeval, & 766 norm, ra, rb, theta, tolerance, value 767 REAL(KIND=dp), DIMENSION(:), POINTER :: evals 768 TYPE(cp_cfm_type), POINTER :: cmat_A, cmat_R, cmat_t1 769 TYPE(cp_fm_type), POINTER :: mat_R, mat_t, mat_theta, mat_U 770 771 CALL timeset(routineN, handle) 772 NULLIFY (row_indices, col_indices) 773 NULLIFY (mat_U, mat_t, mat_R) 774 NULLIFY (cmat_A, cmat_R, cmat_t1) 775 CALL cp_fm_get_info(zij(1, 1)%matrix, nrow_global=nrow_global, & 776 ncol_global=ncol_global, & 777 row_indices=row_indices, col_indices=col_indices, & 778 nrow_local=nrow_local, ncol_local=ncol_local) 779 780 limit_crazy_angle = max_crazy_angle 781 782 NULLIFY (diag_z, evals, evals_exp, mii, mij, mjj) 783 dim2 = SIZE(zij, 2) 784 ALLOCATE (diag_z(nrow_global, dim2)) 785 ALLOCATE (evals(nrow_global)) 786 ALLOCATE (evals_exp(nrow_global)) 787 788 CALL cp_cfm_create(cmat_A, zij(1, 1)%matrix%matrix_struct) 789 CALL cp_cfm_create(cmat_R, zij(1, 1)%matrix%matrix_struct) 790 CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix%matrix_struct) 791 792 CALL cp_fm_create(mat_U, zij(1, 1)%matrix%matrix_struct) 793 CALL cp_fm_create(mat_t, zij(1, 1)%matrix%matrix_struct) 794 CALL cp_fm_create(mat_R, zij(1, 1)%matrix%matrix_struct) 795 796 NULLIFY (mat_theta) 797 CALL cp_fm_create(mat_theta, zij(1, 1)%matrix%matrix_struct) 798 799 CALL cp_fm_set_all(mat_R, 0.0_dp, 1.0_dp) 800 CALL cp_fm_set_all(mat_t, 0.0_dp) 801 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2)) 802 DO idim = 1, dim2 803 CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(1, idim)%matrix) 804 CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(2, idim)%matrix) 805 ENDDO 806 CALL cp_fm_syevd(mat_t, mat_U, evals) 807 DO idim = 1, dim2 808 ! rotate z's 809 CALL cp_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim)%matrix, mat_U, 0.0_dp, mat_t) 810 CALL cp_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(1, idim)%matrix) 811 CALL cp_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim)%matrix, mat_U, 0.0_dp, mat_t) 812 CALL cp_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(2, idim)%matrix) 813 ENDDO 814 ! collect rotation matrix 815 CALL cp_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_R, mat_U, 0.0_dp, mat_t) 816 CALL cp_fm_to_fm(mat_t, mat_R) 817 818 unit_nr = -1 819 IF (cmat_A%matrix_struct%para_env%mepos .EQ. cmat_A%matrix_struct%para_env%source) THEN 820 unit_nr = cp_logger_get_default_unit_nr() 821 WRITE (unit_nr, '(T2,A7,A6,1X,A20,A12,A12,A12)') & 822 "CRAZY| ", "Iter", "value ", "gradient", "Max. eval", "limit" 823 ENDIF 824 825 iterations = 0 826 tolerance = 1.0_dp 827 828 DO 829 iterations = iterations + 1 830 DO idim = 1, dim2 831 DO i = 1, nrow_global 832 CALL cp_fm_get_element(zij(1, idim)%matrix, i, i, ra) 833 CALL cp_fm_get_element(zij(2, idim)%matrix, i, i, rb) 834 diag_z(i, idim) = CMPLX(ra, rb, dp) 835 ENDDO 836 ENDDO 837 DO irow = 1, nrow_local 838 DO icol = 1, ncol_local 839 DO idim = 1, dim2 840 ra = zij(1, idim)%matrix%local_data(irow, icol) 841 rb = zij(2, idim)%matrix%local_data(irow, icol) 842 mij(idim) = CMPLX(ra, rb, dp) 843 mii(idim) = diag_z(row_indices(irow), idim) 844 mjj(idim) = diag_z(col_indices(icol), idim) 845 ENDDO 846 IF (row_indices(irow) .NE. col_indices(icol)) THEN 847 CALL get_angle(mii, mjj, mij, weights, theta) 848 theta = crazy_scale*theta 849 IF (theta .GT. limit_crazy_angle) theta = limit_crazy_angle 850 IF (theta .LT. -limit_crazy_angle) theta = -limit_crazy_angle 851 IF (crazy_use_diag) THEN 852 cmat_A%local_data(irow, icol) = -CMPLX(0.0_dp, theta, dp) 853 ELSE 854 mat_theta%local_data(irow, icol) = -theta 855 ENDIF 856 ELSE 857 IF (crazy_use_diag) THEN 858 cmat_A%local_data(irow, icol) = czero 859 ELSE 860 mat_theta%local_data(irow, icol) = 0.0_dp 861 ENDIF 862 ENDIF 863 ENDDO 864 ENDDO 865 866 ! construct rotation matrix U based on A using diagonalization 867 ! alternatively, exp based on repeated squaring could be faster 868 IF (crazy_use_diag) THEN 869 CALL cp_cfm_heevd(cmat_A, cmat_R, evals) 870 maxeval = MAXVAL(ABS(evals)) 871 evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:)) 872 CALL cp_cfm_to_cfm(cmat_R, cmat_t1) 873 CALL cp_cfm_column_scale(cmat_t1, evals_exp) 874 CALL cp_cfm_gemm('N', 'C', nrow_global, nrow_global, nrow_global, cone, & 875 cmat_t1, cmat_R, czero, cmat_A) 876 mat_U%local_data = REAL(cmat_A%local_data, KIND=dp) ! U is a real matrix 877 ELSE 878 do_emd = .FALSE. 879 method = 2 880 eps_exp = 1.0_dp*EPSILON(eps_exp) 881 CALL cp_fm_maxabsrownorm(mat_theta, norm) 882 maxeval = norm ! an upper bound 883 CALL get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd) 884 CALL exp_pade_real(mat_U, mat_theta, nsquare, norder) 885 ENDIF 886 887 DO idim = 1, dim2 888 ! rotate z's 889 CALL cp_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim)%matrix, mat_U, 0.0_dp, mat_t) 890 CALL cp_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(1, idim)%matrix) 891 CALL cp_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim)%matrix, mat_U, 0.0_dp, mat_t) 892 CALL cp_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(2, idim)%matrix) 893 ENDDO 894 ! collect rotation matrix 895 CALL cp_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_R, mat_U, 0.0_dp, mat_t) 896 CALL cp_fm_to_fm(mat_t, mat_R) 897 898 CALL check_tolerance_new(weights, zij, tolerance, value) 899 900 IF (unit_nr > 0) THEN 901 WRITE (unit_nr, '(T2,A7,I6,1X,G20.15,E12.4,E12.4,E12.4)') & 902 "CRAZY| ", iterations, value, tolerance, maxeval, limit_crazy_angle 903 CALL m_flush(unit_nr) 904 ENDIF 905 IF (tolerance .LT. eps_localization .OR. iterations .GE. max_iter) EXIT 906 ENDDO 907 908 IF (PRESENT(converged)) converged = (tolerance .LT. eps_localization) 909 910 CALL cp_cfm_release(cmat_A) 911 CALL cp_cfm_release(cmat_R) 912 CALL cp_cfm_release(cmat_T1) 913 914 CALL cp_fm_release(mat_U) 915 CALL cp_fm_release(mat_T) 916 CALL cp_fm_release(mat_theta) 917 918 CALL rotate_orbitals(mat_R, vectors) 919 920 CALL cp_fm_release(mat_R) 921 DEALLOCATE (evals_exp, evals, diag_z) 922 DEALLOCATE (mii, mij, mjj) 923 924 CALL timestop(handle) 925 926 END SUBROUTINE crazy_rotations 927 928! ************************************************************************************************** 929!> \brief use the exponential parametrization as described in to perform a direct mini 930!> Gerd Berghold et al. PRB 61 (15), pag. 10040 (2000) 931!> none of the input is modified for the time being, just finds the rotations 932!> that minimizes, and throws it away afterwards :-) 933!> apart from being expensive and not cleaned, this works fine 934!> useful to try different spread functionals 935!> \param weights ... 936!> \param zij ... 937!> \param vectors ... 938!> \param max_iter ... 939!> \param eps_localization ... 940!> \param iterations ... 941! ************************************************************************************************** 942 SUBROUTINE direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations) 943 REAL(KIND=dp), INTENT(IN) :: weights(:) 944 TYPE(cp_fm_p_type), INTENT(INOUT) :: ZIJ(:, :) 945 TYPE(cp_fm_type), POINTER :: vectors 946 INTEGER, INTENT(IN) :: max_iter 947 REAL(KIND=dp), INTENT(IN) :: eps_localization 948 INTEGER :: iterations 949 950 CHARACTER(len=*), PARAMETER :: routineN = 'direct_mini', routineP = moduleN//':'//routineN 951 COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), & 952 czero = (0.0_dp, 0.0_dp) 953 REAL(KIND=dp), PARAMETER :: gold_sec = 0.3819_dp 954 955 COMPLEX(KIND=dp) :: lk, ll, tmp 956 COMPLEX(KIND=dp), DIMENSION(:), POINTER :: evals_exp 957 COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag_z 958 INTEGER :: handle, i, icol, idim, irow, & 959 line_search_count, line_searches, lsl, & 960 lsm, lsr, n, ncol_local, ndim, & 961 nrow_local, output_unit 962 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices 963 LOGICAL :: new_direction 964 REAL(KIND=dp) :: a, b, beta_pr, c, denom, ds, ds_min, fa, & 965 fb, fc, nom, normg, normg_cross, & 966 normg_old, npos, omega, tol, val, x0, & 967 x1, xa, xb, xc 968 REAL(KIND=dp), DIMENSION(150) :: energy, grad, pos 969 REAL(KIND=dp), DIMENSION(:), POINTER :: evals, fval, fvald 970 TYPE(cp_cfm_p_type), DIMENSION(:), POINTER :: c_zij 971 TYPE(cp_cfm_type), POINTER :: cmat_A, cmat_B, cmat_M, cmat_R, cmat_t1, & 972 cmat_t2, cmat_U 973 TYPE(cp_fm_type), POINTER :: matrix_A, matrix_G, matrix_G_old, & 974 matrix_G_search, matrix_H, matrix_R, & 975 matrix_T 976 977 NULLIFY (evals, evals_exp, diag_z, fval, fvald, c_zij) 978 NULLIFY (matrix_A, matrix_G, matrix_T, matrix_G_search, matrix_G_old) 979 NULLIFY (cmat_A, cmat_U, cmat_R, cmat_t1, cmat_t2, cmat_B, cmat_M) 980 981 CALL timeset(routineN, handle) 982 output_unit = cp_logger_get_default_io_unit() 983 984 n = zij(1, 1)%matrix%matrix_struct%nrow_global 985 ndim = (SIZE(zij, 2)) 986 987 IF (output_unit > 0) THEN 988 WRITE (output_unit, '(T4,A )') "Localization by direct minimization of the functional; " 989 WRITE (output_unit, '(T5,2A13,A20,A20,A10 )') " Line search ", " Iteration ", " Functional ", " Tolerance ", " ds Min " 990 END IF 991 992 ALLOCATE (evals(n), evals_exp(n), diag_z(n, ndim), fval(n), fvald(n)) 993 ALLOCATE (c_zij(ndim)) 994 995 ! create the three complex matrices Z 996 DO idim = 1, ndim 997 NULLIFY (c_zij(idim)%matrix) 998 CALL cp_cfm_create(c_zij(idim)%matrix, zij(1, 1)%matrix%matrix_struct) 999 c_zij(idim)%matrix%local_data = CMPLX(zij(1, idim)%matrix%local_data, & 1000 zij(2, idim)%matrix%local_data, dp) 1001 ENDDO 1002 1003 CALL cp_fm_create(matrix_A, zij(1, 1)%matrix%matrix_struct) 1004 CALL cp_fm_create(matrix_G, zij(1, 1)%matrix%matrix_struct) 1005 CALL cp_fm_create(matrix_T, zij(1, 1)%matrix%matrix_struct) 1006 CALL cp_fm_create(matrix_H, zij(1, 1)%matrix%matrix_struct) 1007 CALL cp_fm_create(matrix_G_search, zij(1, 1)%matrix%matrix_struct) 1008 CALL cp_fm_create(matrix_G_old, zij(1, 1)%matrix%matrix_struct) 1009 CALL cp_fm_create(matrix_R, zij(1, 1)%matrix%matrix_struct) 1010 CALL cp_fm_set_all(matrix_R, 0.0_dp, 1.0_dp) 1011 1012 CALL cp_fm_set_all(matrix_A, 0.0_dp) 1013! CALL cp_fm_init_random ( matrix_A ) 1014 1015 CALL cp_cfm_create(cmat_A, zij(1, 1)%matrix%matrix_struct) 1016 CALL cp_cfm_create(cmat_U, zij(1, 1)%matrix%matrix_struct) 1017 CALL cp_cfm_create(cmat_R, zij(1, 1)%matrix%matrix_struct) 1018 CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix%matrix_struct) 1019 CALL cp_cfm_create(cmat_t2, zij(1, 1)%matrix%matrix_struct) 1020 CALL cp_cfm_create(cmat_B, zij(1, 1)%matrix%matrix_struct) 1021 CALL cp_cfm_create(cmat_M, zij(1, 1)%matrix%matrix_struct) 1022 1023 CALL cp_cfm_get_info(cmat_B, nrow_local=nrow_local, ncol_local=ncol_local, & 1024 row_indices=row_indices, col_indices=col_indices) 1025 1026 CALL cp_fm_set_all(matrix_G_old, 0.0_dp) 1027 CALL cp_fm_set_all(matrix_G_search, 0.0_dp) 1028 normg_old = 1.0E30_dp 1029 ds_min = 1.0_dp 1030 new_direction = .TRUE. 1031 Iterations = 0 1032 line_searches = 0 1033 line_search_count = 0 1034 DO 1035 iterations = iterations + 1 1036 ! compute U,R,evals given A 1037 cmat_A%local_data = CMPLX(0.0_dp, matrix_A%local_data, dp) ! cmat_A is hermitian, evals are reals 1038 CALL cp_cfm_heevd(cmat_A, cmat_R, evals) 1039 evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:)) 1040 CALL cp_cfm_to_cfm(cmat_R, cmat_t1) 1041 CALL cp_cfm_column_scale(cmat_t1, evals_exp) 1042 CALL cp_cfm_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_R, czero, cmat_U) 1043 cmat_U%local_data = REAL(cmat_U%local_data, KIND=dp) ! enforce numerics, U is a real matrix 1044 1045 IF (new_direction .AND. MOD(line_searches, 20) .EQ. 5) THEN ! reset with A .eq. 0 1046 DO idim = 1, ndim 1047 CALL cp_cfm_gemm('N', 'N', n, n, n, cone, c_zij(idim)%matrix, cmat_U, czero, cmat_t1) 1048 CALL cp_cfm_gemm('C', 'N', n, n, n, cone, cmat_U, cmat_t1, czero, c_zij(idim)%matrix) 1049 ENDDO 1050 ! collect rotation matrix 1051 matrix_H%local_data = REAL(cmat_U%local_data, KIND=dp) 1052 CALL cp_gemm('N', 'N', n, n, n, 1.0_dp, matrix_R, matrix_H, 0.0_dp, matrix_T) 1053 CALL cp_fm_to_fm(matrix_T, matrix_R) 1054 1055 CALL cp_cfm_set_all(cmat_U, czero, cone) 1056 CALL cp_cfm_set_all(cmat_R, czero, cone) 1057 CALL cp_cfm_set_all(cmat_A, czero) 1058 CALL cp_fm_set_all(matrix_A, 0.0_dp) 1059 evals(:) = 0.0_dp 1060 evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:)) 1061 CALL cp_fm_set_all(matrix_G_old, 0.0_dp) 1062 CALL cp_fm_set_all(matrix_G_search, 0.0_dp) 1063 normg_old = 1.0E30_dp 1064 ENDIF 1065 1066 ! compute Omega and M 1067 CALL cp_cfm_set_all(cmat_M, czero) 1068 omega = 0.0_dp 1069 DO idim = 1, ndim 1070 CALL cp_cfm_gemm('N', 'N', n, n, n, cone, c_zij(idim)%matrix, cmat_U, czero, cmat_t1) ! t1=ZU 1071 CALL cp_cfm_gemm('C', 'N', n, n, n, cone, cmat_U, cmat_t1, czero, cmat_t2) ! t2=(U^T)ZU 1072 DO i = 1, n 1073 CALL cp_cfm_get_element(cmat_t2, i, i, diag_z(i, idim)) 1074 SELECT CASE (2) ! allows for selection of different spread functionals 1075 CASE (1) 1076 fval(i) = -weights(idim)*LOG(ABS(diag_z(i, idim))**2) 1077 fvald(i) = -weights(idim)/(ABS(diag_z(i, idim))**2) 1078 CASE (2) ! corresponds to the jacobi setup 1079 fval(i) = weights(idim) - weights(idim)*ABS(diag_z(i, idim))**2 1080 fvald(i) = -weights(idim) 1081 END SELECT 1082 omega = omega + fval(i) 1083 ENDDO 1084 DO icol = 1, ncol_local 1085 DO irow = 1, nrow_local 1086 tmp = cmat_t1%local_data(irow, icol)*CONJG(diag_z(col_indices(icol), idim)) 1087 cmat_M%local_data(irow, icol) = cmat_M%local_data(irow, icol) & 1088 + 4.0_dp*fvald(col_indices(icol))*REAL(tmp, KIND=dp) 1089 ENDDO 1090 ENDDO 1091 ENDDO 1092 1093 ! compute Hessian diagonal approximation for the preconditioner 1094 IF (.TRUE.) THEN 1095 CALL gradsq_at_0(diag_z, weights, matrix_H, ndim) 1096 ELSE 1097 CALL cp_fm_set_all(matrix_H, 1.0_dp) 1098 ENDIF 1099 1100 ! compute B 1101 DO icol = 1, ncol_local 1102 DO irow = 1, nrow_local 1103 ll = (0.0_dp, -1.0_dp)*evals(row_indices(irow)) 1104 lk = (0.0_dp, -1.0_dp)*evals(col_indices(icol)) 1105 IF (ABS(ll - lk) .LT. 0.5_dp) THEN ! use a series expansion to avoid loss of precision 1106 tmp = 1.0_dp 1107 cmat_B%local_data(irow, icol) = 0.0_dp 1108 DO i = 1, 16 1109 cmat_B%local_data(irow, icol) = cmat_B%local_data(irow, icol) + tmp 1110 tmp = tmp*(ll - lk)/(i + 1) 1111 ENDDO 1112 cmat_B%local_data(irow, icol) = cmat_B%local_data(irow, icol)*EXP(lk) 1113 ELSE 1114 cmat_B%local_data(irow, icol) = (EXP(lk) - EXP(ll))/(lk - ll) 1115 ENDIF 1116 ENDDO 1117 ENDDO 1118 ! compute gradient matrix_G 1119 1120 CALL cp_cfm_gemm('C', 'N', n, n, n, cone, cmat_M, cmat_R, czero, cmat_t1) ! t1=(M^T)(R^T) 1121 CALL cp_cfm_gemm('C', 'N', n, n, n, cone, cmat_R, cmat_t1, czero, cmat_t2) ! t2=(R)t1 1122 CALL cp_cfm_schur_product(cmat_t2, cmat_B, cmat_t1) 1123 CALL cp_cfm_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_R, czero, cmat_t2) 1124 CALL cp_cfm_gemm('N', 'N', n, n, n, cone, cmat_R, cmat_t2, czero, cmat_t1) 1125 matrix_G%local_data = REAL(cmat_t1%local_data, KIND=dp) 1126 CALL cp_fm_transpose(matrix_G, matrix_T) 1127 CALL cp_fm_scale_and_add(-1.0_dp, matrix_G, 1.0_dp, matrix_T) 1128 CALL cp_fm_maxabsval(matrix_G, tol) 1129 1130 ! from here on, minimizing technology 1131 IF (new_direction) THEN 1132 ! energy converged up to machine precision ? 1133 line_searches = line_searches + 1 1134 ! DO i=1,line_search_count 1135 ! write(15,*) pos(i),energy(i) 1136 ! ENDDO 1137 ! write(15,*) "" 1138 ! CALL m_flush(15) 1139 !write(16,*) evals(:) 1140 !write(17,*) matrix_A%local_data(:,:) 1141 !write(18,*) matrix_G%local_data(:,:) 1142 IF (output_unit > 0) THEN 1143 WRITE (output_unit, '(T5,I10,T18,I10,T31,2F20.6,F10.3)') line_searches, Iterations, Omega, tol, ds_min 1144 CALL m_flush(output_unit) 1145 ENDIF 1146 IF (tol < eps_localization .OR. iterations > max_iter) EXIT 1147 1148 IF (.TRUE.) THEN ! do conjugate gradient CG 1149 CALL cp_fm_trace(matrix_G, matrix_G_old, normg_cross) 1150 normg_cross = normg_cross*0.5_dp ! takes into account the fact that A is antisymmetric 1151 ! apply the preconditioner 1152 DO icol = 1, ncol_local 1153 DO irow = 1, nrow_local 1154 matrix_G_old%local_data(irow, icol) = matrix_G%local_data(irow, icol)/matrix_H%local_data(irow, icol) 1155 ENDDO 1156 ENDDO 1157 CALL cp_fm_trace(matrix_G, matrix_G_old, normg) 1158 normg = normg*0.5_dp 1159 beta_pr = (normg - normg_cross)/normg_old 1160 normg_old = normg 1161 beta_pr = MAX(beta_pr, 0.0_dp) 1162 CALL cp_fm_scale_and_add(beta_pr, matrix_G_search, -1.0_dp, matrix_G_old) 1163 CALL cp_fm_trace(matrix_G_search, matrix_G_old, normg_cross) 1164 IF (normg_cross .GE. 0) THEN ! back to SD 1165 IF (matrix_A%matrix_struct%para_env%mepos .EQ. & 1166 matrix_A%matrix_struct%para_env%source) THEN 1167 WRITE (cp_logger_get_default_unit_nr(), *) "!" 1168 ENDIF 1169 beta_pr = 0.0_dp 1170 CALL cp_fm_scale_and_add(beta_pr, matrix_G_search, -1.0_dp, matrix_G_old) 1171 ENDIF 1172 ELSE ! SD 1173 CALL cp_fm_scale_and_add(0.0_dp, matrix_G_search, -1.0_dp, matrix_G) 1174 ENDIF 1175 ! ds_min=1.0E-4_dp 1176 line_search_count = 0 1177 END IF 1178 line_search_count = line_search_count + 1 1179 energy(line_search_count) = Omega 1180 1181 ! line search section 1182 SELECT CASE (3) 1183 CASE (1) ! two point line search 1184 SELECT CASE (line_search_count) 1185 CASE (1) 1186 pos(1) = 0.0_dp 1187 pos(2) = ds_min 1188 CALL cp_fm_trace(matrix_G, matrix_G_search, grad(1)) 1189 grad(1) = grad(1)/2.0_dp 1190 new_direction = .FALSE. 1191 CASE (2) 1192 new_direction = .TRUE. 1193 x0 = pos(1) ! 0.0_dp 1194 c = energy(1) 1195 b = grad(1) 1196 x1 = pos(2) 1197 a = (energy(2) - b*x1 - c)/(x1**2) 1198 IF (a .LE. 0.0_dp) a = 1.0E-15_dp 1199 npos = -b/(2.0_dp*a) 1200 val = a*npos**2 + b*npos + c 1201 IF (val .LT. energy(1) .AND. val .LE. energy(2)) THEN 1202 ! we go to a minimum, but ... 1203 ! we take a guard against too large steps 1204 pos(3) = MIN(npos, MAXVAL(pos(1:2))*4.0_dp) 1205 ELSE ! just take an extended step 1206 pos(3) = MAXVAL(pos(1:2))*2.0_dp 1207 ENDIF 1208 END SELECT 1209 CASE (2) ! 3 point line search 1210 SELECT CASE (line_search_count) 1211 CASE (1) 1212 new_direction = .FALSE. 1213 pos(1) = 0.0_dp 1214 pos(2) = ds_min*0.8_dp 1215 CASE (2) 1216 new_direction = .FALSE. 1217 IF (energy(2) .GT. energy(1)) THEN 1218 pos(3) = ds_min*0.7_dp 1219 ELSE 1220 pos(3) = ds_min*1.4_dp 1221 ENDIF 1222 CASE (3) 1223 new_direction = .TRUE. 1224 xa = pos(1) 1225 xb = pos(2) 1226 xc = pos(3) 1227 fa = energy(1) 1228 fb = energy(2) 1229 fc = energy(3) 1230 nom = (xb - xa)**2*(fb - fc) - (xb - xc)**2*(fb - fa) 1231 denom = (xb - xa)*(fb - fc) - (xb - xc)*(fb - fa) 1232 IF (ABS(denom) .LE. 1.0E-18_dp*MAX(ABS(fb - fc), ABS(fb - fa))) THEN 1233 npos = xb 1234 ELSE 1235 npos = xb - 0.5_dp*nom/denom ! position of the stationary point 1236 ENDIF 1237 val = (npos - xa)*(npos - xb)*fc/((xc - xa)*(xc - xb)) + & 1238 (npos - xb)*(npos - xc)*fa/((xa - xb)*(xa - xc)) + & 1239 (npos - xc)*(npos - xa)*fb/((xb - xc)*(xb - xa)) 1240 IF (val .LT. fa .AND. val .LE. fb .AND. val .LE. fc) THEN ! OK, we go to a minimum 1241 ! we take a guard against too large steps 1242 pos(4) = MAX(MAXVAL(pos(1:3))*0.01_dp, & 1243 MIN(npos, MAXVAL(pos(1:3))*4.0_dp)) 1244 ELSE ! just take an extended step 1245 pos(4) = MAXVAL(pos(1:3))*2.0_dp 1246 ENDIF 1247 END SELECT 1248 CASE (3) ! golden section hunt 1249 new_direction = .FALSE. 1250 IF (line_search_count .EQ. 1) THEN 1251 lsl = 1 1252 lsr = 0 1253 lsm = 1 1254 pos(1) = 0.0_dp 1255 pos(2) = ds_min/gold_sec 1256 ELSE 1257 IF (line_search_count .EQ. 150) CPABORT("Too many") 1258 IF (lsr .EQ. 0) THEN 1259 IF (energy(line_search_count - 1) .LT. energy(line_search_count)) THEN 1260 lsr = line_search_count 1261 pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec 1262 ELSE 1263 lsl = lsm 1264 lsm = line_search_count 1265 pos(line_search_count + 1) = pos(line_search_count)/gold_sec 1266 ENDIF 1267 ELSE 1268 IF (pos(line_search_count) .LT. pos(lsm)) THEN 1269 IF (energy(line_search_count) .LT. energy(lsm)) THEN 1270 lsr = lsm 1271 lsm = line_search_count 1272 ELSE 1273 lsl = line_search_count 1274 ENDIF 1275 ELSE 1276 IF (energy(line_search_count) .LT. energy(lsm)) THEN 1277 lsl = lsm 1278 lsm = line_search_count 1279 ELSE 1280 lsr = line_search_count 1281 ENDIF 1282 ENDIF 1283 IF (pos(lsr) - pos(lsm) .GT. pos(lsm) - pos(lsl)) THEN 1284 pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm)) 1285 ELSE 1286 pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl)) 1287 ENDIF 1288 IF ((pos(lsr) - pos(lsl)) .LT. 1.0E-3_dp*pos(lsr)) THEN 1289 new_direction = .TRUE. 1290 ENDIF 1291 ENDIF ! lsr .eq. 0 1292 ENDIF ! first step 1293 END SELECT 1294 ! now go to the suggested point 1295 ds_min = pos(line_search_count + 1) 1296 ds = pos(line_search_count + 1) - pos(line_search_count) 1297 CALL cp_fm_scale_and_add(1.0_dp, matrix_A, ds, matrix_G_search) 1298 ENDDO 1299 1300 ! collect rotation matrix 1301 matrix_H%local_data = REAL(cmat_U%local_data, KIND=dp) 1302 CALL cp_gemm('N', 'N', n, n, n, 1.0_dp, matrix_R, matrix_H, 0.0_dp, matrix_T) 1303 CALL cp_fm_to_fm(matrix_T, matrix_R) 1304 CALL rotate_orbitals(matrix_R, vectors) 1305 CALL cp_fm_release(matrix_R) 1306 1307 CALL cp_fm_release(matrix_A) 1308 CALL cp_fm_release(matrix_G) 1309 CALL cp_fm_release(matrix_H) 1310 CALL cp_fm_release(matrix_T) 1311 CALL cp_fm_release(matrix_G_search) 1312 CALL cp_fm_release(matrix_G_old) 1313 CALL cp_cfm_release(cmat_A) 1314 CALL cp_cfm_release(cmat_U) 1315 CALL cp_cfm_release(cmat_R) 1316 CALL cp_cfm_release(cmat_t1) 1317 CALL cp_cfm_release(cmat_t2) 1318 CALL cp_cfm_release(cmat_B) 1319 CALL cp_cfm_release(cmat_M) 1320 1321 DEALLOCATE (evals, evals_exp, fval, fvald) 1322 1323 DO idim = 1, SIZE(c_zij) 1324 zij(1, idim)%matrix%local_data = REAL(c_zij(idim)%matrix%local_data, dp) 1325 zij(2, idim)%matrix%local_data = AIMAG(c_zij(idim)%matrix%local_data) 1326 CALL cp_cfm_release(c_zij(idim)%matrix) 1327 ENDDO 1328 DEALLOCATE (c_zij) 1329 DEALLOCATE (diag_z) 1330 1331 CALL timestop(handle) 1332 1333 END SUBROUTINE 1334 1335! ************************************************************************************************** 1336!> \brief Parallel algorithm for jacobi rotations 1337!> \param weights ... 1338!> \param zij ... 1339!> \param vectors ... 1340!> \param para_env ... 1341!> \param max_iter ... 1342!> \param eps_localization ... 1343!> \param sweeps ... 1344!> \param out_each ... 1345!> \param target_time ... 1346!> \param start_time ... 1347!> \par History 1348!> use allgather for improved performance 1349!> \author MI (11.2009) 1350! ************************************************************************************************** 1351 SUBROUTINE jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, & 1352 sweeps, out_each, target_time, start_time) 1353 1354 REAL(KIND=dp), INTENT(IN) :: weights(:) 1355 TYPE(cp_fm_p_type), INTENT(INOUT) :: ZIJ(:, :) 1356 TYPE(cp_fm_type), POINTER :: vectors 1357 TYPE(cp_para_env_type), POINTER :: para_env 1358 INTEGER, INTENT(IN) :: max_iter 1359 REAL(KIND=dp), INTENT(IN) :: eps_localization 1360 INTEGER :: sweeps 1361 INTEGER, INTENT(IN) :: out_each 1362 REAL(dp) :: target_time, start_time 1363 1364 CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rot_para', & 1365 routineP = moduleN//':'//routineN 1366 1367 COMPLEX(KIND=dp) :: zi, zj 1368 COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: c_array_me, c_array_partner 1369 COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:) 1370 INTEGER :: dim2, handle, i, idim, ii, ik, il1, il2, il_recv, il_recv_partner, ilow1, ilow2, & 1371 ip, ip_has_i, ip_partner, ip_recv_from, ip_recv_partner, ipair, iperm, istat, istate, & 1372 iu1, iu2, iup1, iup2, j, jj, jstate, k, kk, n1, n2, nblock, nblock_max, npair, nperm, & 1373 ns_me, ns_partner, ns_recv_from, ns_recv_partner, nstate, output_unit 1374 INTEGER, ALLOCATABLE, DIMENSION(:) :: rcount, rdispl 1375 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: list_pair, ns_bound 1376 LOGICAL :: should_stop 1377 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: gmat, rmat_loc, rmat_recv, rmat_send, & 1378 rotmat, z_ij_loc_im, z_ij_loc_re 1379 REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: rmat_recv_all 1380 REAL(KIND=dp) :: ct, func, gmax, grad, ri, rj, st, t1, & 1381 t2, theta, tolerance, xlow, xstate, & 1382 xup, zc, zr 1383 TYPE(cp_fm_type), POINTER :: rmat 1384 TYPE(set_c_1d_type), DIMENSION(:), POINTER :: zdiag_all, zdiag_me 1385 TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc, xyz_mix, xyz_mix_ns 1386 1387 CALL timeset(routineN, handle) 1388 1389 output_unit = cp_logger_get_default_io_unit() 1390 1391 NULLIFY (rmat, cz_ij_loc, zdiag_all, zdiag_me) 1392 NULLIFY (xyz_mix, xyz_mix_ns) 1393 NULLIFY (mii, mij, mjj) 1394 1395 dim2 = SIZE(zij, 2) 1396 ALLOCATE (mii(dim2), mij(dim2), mjj(dim2)) 1397 1398 CALL cp_fm_create(rmat, zij(1, 1)%matrix%matrix_struct) 1399 CALL cp_fm_set_all(rmat, 0._dp, 1._dp) 1400 1401 CALL cp_fm_get_info(rmat, nrow_global=nstate) 1402 1403 ALLOCATE (rcount(para_env%num_pe), STAT=istat) 1404 ALLOCATE (rdispl(para_env%num_pe), STAT=istat) 1405 1406 tolerance = 1.0e10_dp 1407 sweeps = 0 1408 1409 ! number of processor pairs and number of permutations 1410 npair = (para_env%num_pe + 1)/2 1411 nperm = para_env%num_pe - MOD(para_env%num_pe + 1, 2) 1412 ALLOCATE (list_pair(2, npair)) 1413 1414 ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX) 1415 xstate = REAL(nstate, dp)/REAL(para_env%num_pe, dp) 1416 nblock_max = 0 1417 ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2)) 1418 Xlow = 0.0D0 1419 Xup = 0.0D0 1420 DO ip = 1, para_env%num_pe 1421 xup = xlow + xstate 1422 ns_bound(ip - 1, 1) = NINT(xlow) + 1 1423 ns_bound(ip - 1, 2) = NINT(xup) 1424 IF (NINT(xup) .GT. nstate) THEN 1425 ns_bound(ip - 1, 2) = nstate 1426 ENDIF 1427 IF (NINT(xlow) .GT. nstate) THEN 1428 ns_bound(ip - 1, 1) = nstate + 1 1429 ENDIF 1430 xlow = xup 1431 ENDDO 1432 DO ip = 0, para_env%num_pe - 1 1433 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1 1434 nblock_max = MAX(nblock_max, nblock) 1435 ENDDO 1436 1437 ! otbtain local part of the matrix (could be made faster, but is likely irrelevant). 1438 ALLOCATE (z_ij_loc_re(nstate, nblock_max)) 1439 ALLOCATE (z_ij_loc_im(nstate, nblock_max)) 1440 ALLOCATE (cz_ij_loc(dim2)) 1441 DO idim = 1, dim2 1442 DO ip = 0, para_env%num_pe - 1 1443 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1 1444 CALL cp_fm_get_submatrix(zij(1, idim)%matrix, z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock) 1445 CALL cp_fm_get_submatrix(zij(2, idim)%matrix, z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock) 1446 IF (para_env%mepos == ip) THEN 1447 ns_me = nblock 1448 ALLOCATE (cz_ij_loc(idim)%c_array(nstate, ns_me)) 1449 DO i = 1, ns_me 1450 DO j = 1, nstate 1451 cz_ij_loc(idim)%c_array(j, i) = CMPLX(z_ij_loc_re(j, i), z_ij_loc_im(j, i), dp) 1452 END DO 1453 END DO 1454 END IF 1455 END DO ! ip 1456 END DO 1457 DEALLOCATE (z_ij_loc_re) 1458 DEALLOCATE (z_ij_loc_im) 1459 1460 ! initialize rotation matrix 1461 ALLOCATE (rotmat(nstate, 2*nblock_max)) 1462 rotmat = 0.0_dp 1463 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2) 1464 ii = i - ns_bound(para_env%mepos, 1) + 1 1465 rotmat(i, ii) = 1.0_dp 1466 END DO 1467 1468 ALLOCATE (xyz_mix(dim2)) 1469 ALLOCATE (xyz_mix_ns(dim2)) 1470 ALLOCATE (zdiag_me(dim2)) 1471 ALLOCATE (zdiag_all(dim2)) 1472 1473 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1 1474 IF (ns_me /= 0) THEN 1475 ALLOCATE (c_array_me(nstate, ns_me, dim2)) 1476 DO idim = 1, dim2 1477 ALLOCATE (xyz_mix_ns(idim)%c_array(nstate, ns_me)) 1478 END DO 1479 ALLOCATE (gmat(nstate, ns_me)) 1480 END IF 1481 1482 DO idim = 1, dim2 1483 ALLOCATE (zdiag_me(idim)%c_array(nblock_max)) 1484 zdiag_me(idim)%c_array = CMPLX(0.0_dp, 0.0_dp, dp) 1485 ALLOCATE (zdiag_all(idim)%c_array(para_env%num_pe*nblock_max)) 1486 zdiag_all(idim)%c_array = CMPLX(0.0_dp, 0.0_dp, dp) 1487 END DO 1488 ALLOCATE (rmat_recv(nblock_max*2, nblock_max)) 1489 ALLOCATE (rmat_send(nblock_max*2, nblock_max)) 1490 1491 ! buffer for message passing 1492 ALLOCATE (rmat_recv_all(nblock_max*2, nblock_max, 0:para_env%num_pe - 1)) 1493 1494 IF (output_unit > 0) THEN 1495 WRITE (output_unit, '(T4,A )') " Localization by iterative distributed Jacobi rotation" 1496 WRITE (output_unit, '(T20,A12,T32, A22,T60, A12,A8 )') "Iteration", "Functional", "Tolerance", " Time " 1497 END IF 1498 1499 DO sweeps = 1, max_iter + 1 1500 IF (sweeps == max_iter + 1) THEN 1501 IF (output_unit > 0) THEN 1502 WRITE (output_unit, *) ' LOCALIZATION! loop did not converge within the maximum number of iterations.' 1503 WRITE (output_unit, *) ' Present Max. gradient = ', tolerance 1504 END IF 1505 EXIT 1506 ENDIF 1507 t1 = m_walltime() 1508 1509 DO iperm = 1, nperm 1510 1511 ! fix partners for this permutation, and get the number of states 1512 CALL eberlein(iperm, para_env, list_pair) 1513 ip_partner = -1 1514 ns_partner = 0 1515 DO ipair = 1, npair 1516 IF (list_pair(1, ipair) == para_env%mepos) THEN 1517 ip_partner = list_pair(2, ipair) 1518 EXIT 1519 ELSE IF (list_pair(2, ipair) == para_env%mepos) THEN 1520 ip_partner = list_pair(1, ipair) 1521 EXIT 1522 END IF 1523 END DO 1524 IF (ip_partner >= 0) THEN 1525 ns_partner = ns_bound(ip_partner, 2) - ns_bound(ip_partner, 1) + 1 1526 ELSE 1527 ns_partner = 0 1528 END IF 1529 1530 ! if there is a non-zero block connecting two partners, jacobi-sweep it. 1531 IF (ns_partner*ns_me /= 0) THEN 1532 1533 ALLOCATE (rmat_loc(ns_me + ns_partner, ns_me + ns_partner)) 1534 rmat_loc = 0.0_dp 1535 DO i = 1, ns_me + ns_partner 1536 rmat_loc(i, i) = 1.0_dp 1537 END DO 1538 1539 ALLOCATE (c_array_partner(nstate, ns_partner, dim2)) 1540 1541 DO idim = 1, dim2 1542 ALLOCATE (xyz_mix(idim)%c_array(ns_me + ns_partner, ns_me + ns_partner)) 1543 DO i = 1, ns_me 1544 c_array_me(1:nstate, i, idim) = cz_ij_loc(idim)%c_array(1:nstate, i) 1545 END DO 1546 END DO 1547 1548 CALL mp_sendrecv(msgin=c_array_me, dest=ip_partner, & 1549 msgout=c_array_partner, source=ip_partner, comm=para_env%group) 1550 1551 n1 = ns_me 1552 n2 = ns_partner 1553 ilow1 = ns_bound(para_env%mepos, 1) 1554 iup1 = ns_bound(para_env%mepos, 1) + n1 - 1 1555 ilow2 = ns_bound(ip_partner, 1) 1556 iup2 = ns_bound(ip_partner, 1) + n2 - 1 1557 IF (ns_bound(para_env%mepos, 1) < ns_bound(ip_partner, 1)) THEN 1558 il1 = 1 1559 iu1 = n1 1560 iu1 = n1 1561 il2 = 1 + n1 1562 iu2 = n1 + n2 1563 ELSE 1564 il1 = 1 + n2 1565 iu1 = n1 + n2 1566 iu1 = n1 + n2 1567 il2 = 1 1568 iu2 = n2 1569 END IF 1570 1571 DO idim = 1, dim2 1572 DO i = 1, n1 1573 xyz_mix(idim)%c_array(il1:iu1, il1 + i - 1) = c_array_me(ilow1:iup1, i, idim) 1574 xyz_mix(idim)%c_array(il2:iu2, il1 + i - 1) = c_array_me(ilow2:iup2, i, idim) 1575 END DO 1576 DO i = 1, n2 1577 xyz_mix(idim)%c_array(il2:iu2, il2 + i - 1) = c_array_partner(ilow2:iup2, i, idim) 1578 xyz_mix(idim)%c_array(il1:iu1, il2 + i - 1) = c_array_partner(ilow1:iup1, i, idim) 1579 END DO 1580 END DO 1581 1582 DO istate = 1, n1 + n2 1583 DO jstate = istate + 1, n1 + n2 1584 DO idim = 1, dim2 1585 mii(idim) = xyz_mix(idim)%c_array(istate, istate) 1586 mij(idim) = xyz_mix(idim)%c_array(istate, jstate) 1587 mjj(idim) = xyz_mix(idim)%c_array(jstate, jstate) 1588 END DO 1589 CALL get_angle(mii, mjj, mij, weights, theta) 1590 st = SIN(theta) 1591 ct = COS(theta) 1592 DO idim = 1, dim2 1593 DO i = 1, n1 + n2 1594 zi = ct*xyz_mix(idim)%c_array(i, istate) + st*xyz_mix(idim)%c_array(i, jstate) 1595 zj = -st*xyz_mix(idim)%c_array(i, istate) + ct*xyz_mix(idim)%c_array(i, jstate) 1596 xyz_mix(idim)%c_array(i, istate) = zi 1597 xyz_mix(idim)%c_array(i, jstate) = zj 1598 END DO 1599 DO i = 1, n1 + n2 1600 zi = ct*xyz_mix(idim)%c_array(istate, i) + st*xyz_mix(idim)%c_array(jstate, i) 1601 zj = -st*xyz_mix(idim)%c_array(istate, i) + ct*xyz_mix(idim)%c_array(jstate, i) 1602 xyz_mix(idim)%c_array(istate, i) = zi 1603 xyz_mix(idim)%c_array(jstate, i) = zj 1604 END DO 1605 END DO 1606 1607 DO i = 1, n1 + n2 1608 ri = ct*rmat_loc(i, istate) + st*rmat_loc(i, jstate) 1609 rj = ct*rmat_loc(i, jstate) - st*rmat_loc(i, istate) 1610 rmat_loc(i, istate) = ri 1611 rmat_loc(i, jstate) = rj 1612 END DO 1613 END DO 1614 END DO 1615 1616 k = nblock_max + 1 1617 CALL mp_sendrecv(rotmat(1:nstate, 1:ns_me), ip_partner, & 1618 rotmat(1:nstate, k:k + n2 - 1), ip_partner, para_env%group) 1619 1620 IF (ilow1 < ilow2) THEN 1621 CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1, k), nstate, rmat_loc(1 + n1, 1), n1 + n2, 0.0_dp, gmat, nstate) 1622 CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1, 1), nstate, rmat_loc(1, 1), n1 + n2, 1.0_dp, gmat, nstate) 1623 ELSE 1624 CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1, k), nstate, rmat_loc(1, n2 + 1), n1 + n2, 0.0_dp, gmat, nstate) 1625 CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1, 1), nstate, & 1626 rmat_loc(n2 + 1, n2 + 1), n1 + n2, 1.0_dp, gmat, nstate) 1627 END IF 1628 1629 CALL dcopy(nstate*n1, gmat(1, 1), 1, rotmat(1, 1), 1) 1630 1631 DO idim = 1, dim2 1632 DO i = 1, n1 1633 xyz_mix_ns(idim)%c_array(1:nstate, i) = CMPLX(0.0_dp, 0.0_dp, dp) 1634 END DO 1635 1636 DO istate = 1, n1 1637 DO jstate = 1, nstate 1638 DO i = 1, n2 1639 xyz_mix_ns(idim)%c_array(jstate, istate) = & 1640 xyz_mix_ns(idim)%c_array(jstate, istate) + & 1641 c_array_partner(jstate, i, idim)*rmat_loc(il2 + i - 1, il1 + istate - 1) 1642 END DO 1643 END DO 1644 END DO 1645 DO istate = 1, n1 1646 DO jstate = 1, nstate 1647 DO i = 1, n1 1648 xyz_mix_ns(idim)%c_array(jstate, istate) = xyz_mix_ns(idim)%c_array(jstate, istate) + & 1649 c_array_me(jstate, i, idim)*rmat_loc(il1 + i - 1, il1 + istate - 1) 1650 END DO 1651 END DO 1652 END DO 1653 END DO ! idim 1654 1655 DEALLOCATE (c_array_partner) 1656 1657 ELSE ! save my data 1658 DO idim = 1, dim2 1659 DO i = 1, ns_me 1660 xyz_mix_ns(idim)%c_array(1:nstate, i) = cz_ij_loc(idim)%c_array(1:nstate, i) 1661 END DO 1662 END DO 1663 END IF 1664 1665 DO idim = 1, dim2 1666 DO i = 1, ns_me 1667 cz_ij_loc(idim)%c_array(1:nstate, i) = CMPLX(0.0_dp, 0.0_dp, dp) 1668 END DO 1669 END DO 1670 1671 IF (ns_partner*ns_me /= 0) THEN 1672 ! transpose rotation matrix rmat_loc 1673 DO i = 1, ns_me + ns_partner 1674 DO j = i + 1, ns_me + ns_partner 1675 ri = rmat_loc(i, j) 1676 rmat_loc(i, j) = rmat_loc(j, i) 1677 rmat_loc(j, i) = ri 1678 END DO 1679 END DO 1680 1681 ! prepare for distribution 1682 DO i = 1, n1 1683 rmat_send(1:n1, i) = rmat_loc(il1:iu1, il1 + i - 1) 1684 END DO 1685 ik = nblock_max 1686 DO i = 1, n2 1687 rmat_send(ik + 1:ik + n1, i) = rmat_loc(il1:iu1, il2 + i - 1) 1688 END DO 1689 ELSE 1690 rmat_send = 0.0_dp 1691 END IF 1692 1693 ! collect data from all tasks (this takes some significant time) 1694 CALL mp_allgather(rmat_send, rmat_recv_all, para_env%group) 1695 1696 ! update blocks everywhere 1697 DO ip = 0, para_env%num_pe - 1 1698 1699 ip_recv_from = MOD(para_env%mepos - IP + para_env%num_pe, para_env%num_pe) 1700 rmat_recv(:, :) = rmat_recv_all(:, :, ip_recv_from) 1701 1702 ns_recv_from = ns_bound(ip_recv_from, 2) - ns_bound(ip_recv_from, 1) + 1 1703 1704 IF (ns_me /= 0) THEN 1705 IF (ns_recv_from /= 0) THEN 1706 !look for the partner of ip_recv_from 1707 ip_recv_partner = -1 1708 ns_recv_partner = 0 1709 DO ipair = 1, npair 1710 IF (list_pair(1, ipair) == ip_recv_from) THEN 1711 ip_recv_partner = list_pair(2, ipair) 1712 EXIT 1713 ELSE IF (list_pair(2, ipair) == ip_recv_from) THEN 1714 ip_recv_partner = list_pair(1, ipair) 1715 EXIT 1716 END IF 1717 END DO 1718 1719 IF (ip_recv_partner >= 0) THEN 1720 ns_recv_partner = ns_bound(ip_recv_partner, 2) - ns_bound(ip_recv_partner, 1) + 1 1721 END IF 1722 IF (ns_recv_partner > 0) THEN 1723 il1 = ns_bound(para_env%mepos, 1) 1724 il_recv = ns_bound(ip_recv_from, 1) 1725 il_recv_partner = ns_bound(ip_recv_partner, 1) 1726 ik = nblock_max 1727 1728 DO idim = 1, dim2 1729 DO i = 1, ns_recv_from 1730 ii = il_recv + i - 1 1731 DO j = 1, ns_me 1732 jj = j 1733 DO k = 1, ns_recv_from 1734 kk = il_recv + k - 1 1735 cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + & 1736 rmat_recv(i, k)*xyz_mix_ns(idim)%c_array(kk, j) 1737 END DO 1738 END DO 1739 END DO 1740 DO i = 1, ns_recv_from 1741 ii = il_recv + i - 1 1742 DO j = 1, ns_me 1743 jj = j 1744 DO k = 1, ns_recv_partner 1745 kk = il_recv_partner + k - 1 1746 cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + & 1747 rmat_recv(ik + i, k)*xyz_mix_ns(idim)%c_array(kk, j) 1748 END DO 1749 END DO 1750 END DO 1751 END DO ! idim 1752 ELSE 1753 il1 = ns_bound(para_env%mepos, 1) 1754 il_recv = ns_bound(ip_recv_from, 1) 1755 DO idim = 1, dim2 1756 DO j = 1, ns_me 1757 jj = j 1758 DO i = 1, ns_recv_from 1759 ii = il_recv + i - 1 1760 cz_ij_loc(idim)%c_array(ii, jj) = xyz_mix_ns(idim)%c_array(ii, j) 1761 END DO 1762 END DO 1763 END DO ! idim 1764 END IF 1765 END IF 1766 END IF ! ns_me 1767 END DO ! ip 1768 1769 IF (ns_partner*ns_me /= 0) THEN 1770 DEALLOCATE (rmat_loc) 1771 DO idim = 1, dim2 1772 DEALLOCATE (xyz_mix(idim)%c_array) 1773 END DO 1774 END IF 1775 1776 END DO ! iperm 1777 1778 ! calculate the max gradient 1779 DO idim = 1, dim2 1780 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2) 1781 ii = i - ns_bound(para_env%mepos, 1) + 1 1782 zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii) 1783 zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii) 1784 END DO 1785 rcount(:) = SIZE(zdiag_me(idim)%c_array) 1786 rdispl(1) = 0 1787 DO ip = 2, para_env%num_pe 1788 rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1) 1789 ENDDO 1790 ! collect all the diagonal elements in a replicated 1d array 1791 CALL mp_allgather(zdiag_me(idim)%c_array, zdiag_all(idim)%c_array, rcount, rdispl, para_env%group) 1792 END DO 1793 1794 gmax = 0.0_dp 1795 DO j = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2) 1796 k = j - ns_bound(para_env%mepos, 1) + 1 1797 DO i = 1, j - 1 1798 ! find the location of the diagonal element (i,i) 1799 DO ip = 0, para_env%num_pe - 1 1800 IF (i >= ns_bound(ip, 1) .AND. i <= ns_bound(ip, 2)) THEN 1801 ip_has_i = ip 1802 EXIT 1803 END IF 1804 END DO 1805 ii = nblock_max*ip_has_i + i - ns_bound(ip_has_i, 1) + 1 1806 ! mepos has the diagonal element (j,j), as well as the off diagonal (i,j) 1807 jj = nblock_max*para_env%mepos + j - ns_bound(para_env%mepos, 1) + 1 1808 grad = 0.0_dp 1809 DO idim = 1, dim2 1810 zi = zdiag_all(idim)%c_array(ii) 1811 zj = zdiag_all(idim)%c_array(jj) 1812 grad = grad + weights(idim)*REAL(4.0_dp*CONJG(cz_ij_loc(idim)%c_array(i, k))*(zj - zi), dp) 1813 END DO 1814 gmax = MAX(gmax, ABS(grad)) 1815 END DO 1816 END DO 1817 1818 CALL mp_max(gmax, para_env%group) 1819 tolerance = gmax 1820 1821 func = 0.0_dp 1822 DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2) 1823 k = i - ns_bound(para_env%mepos, 1) + 1 1824 DO idim = 1, dim2 1825 zr = REAL(cz_ij_loc(idim)%c_array(i, k), dp) 1826 zc = AIMAG(cz_ij_loc(idim)%c_array(i, k)) 1827 func = func + weights(idim)*(1.0_dp - (zr*zr + zc*zc))/twopi/twopi 1828 END DO 1829 END DO 1830 CALL mp_sum(func, para_env%group) 1831 t2 = m_walltime() 1832 1833 IF (output_unit > 0 .AND. MODULO(sweeps, out_each) == 0) THEN 1834 WRITE (output_unit, '(T20,I12,T35,F20.10,T60,E12.4,F8.3)') sweeps, func, tolerance, t2 - t1 1835 CALL m_flush(output_unit) 1836 END IF 1837 IF (tolerance < eps_localization) EXIT 1838 CALL external_control(should_stop, "LOC", target_time=target_time, start_time=start_time) 1839 IF (should_stop) EXIT 1840 1841 END DO ! sweeps 1842 1843 ! buffer for message passing 1844 DEALLOCATE (rmat_recv_all) 1845 1846 DEALLOCATE (rmat_recv) 1847 DEALLOCATE (rmat_send) 1848 IF (ns_me > 0) THEN 1849 DEALLOCATE (c_array_me) 1850 ENDIF 1851 DO idim = 1, dim2 1852 DEALLOCATE (zdiag_me(idim)%c_array) 1853 DEALLOCATE (zdiag_all(idim)%c_array) 1854 END DO 1855 DEALLOCATE (zdiag_me) 1856 DEALLOCATE (zdiag_all) 1857 DEALLOCATE (xyz_mix) 1858 DO idim = 1, dim2 1859 IF (ns_me /= 0) THEN 1860 DEALLOCATE (xyz_mix_ns(idim)%c_array) 1861 ENDIF 1862 END DO 1863 DEALLOCATE (xyz_mix_ns) 1864 IF (ns_me /= 0) THEN 1865 DEALLOCATE (gmat) 1866 ENDIF 1867 DEALLOCATE (mii) 1868 DEALLOCATE (mij) 1869 DEALLOCATE (mjj) 1870 1871 ilow1 = ns_bound(para_env%mepos, 1) 1872 ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1 1873 ALLOCATE (z_ij_loc_re(nstate, nblock_max)) 1874 ALLOCATE (z_ij_loc_im(nstate, nblock_max)) 1875 DO idim = 1, dim2 1876 DO ip = 0, para_env%num_pe - 1 1877 z_ij_loc_re = 0.0_dp 1878 z_ij_loc_im = 0.0_dp 1879 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1 1880 IF (ip == para_env%mepos) THEN 1881 ns_me = nblock 1882 DO i = 1, ns_me 1883 ii = ilow1 + i - 1 1884 DO j = 1, nstate 1885 z_ij_loc_re(j, i) = REAL(cz_ij_loc(idim)%c_array(j, i), dp) 1886 z_ij_loc_im(j, i) = AIMAG(cz_ij_loc(idim)%c_array(j, i)) 1887 END DO 1888 END DO 1889 END IF 1890 CALL mp_bcast(z_ij_loc_re, ip, para_env%group) 1891 CALL mp_bcast(z_ij_loc_im, ip, para_env%group) 1892 CALL cp_fm_set_submatrix(zij(1, idim)%matrix, z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock) 1893 CALL cp_fm_set_submatrix(zij(2, idim)%matrix, z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock) 1894 END DO ! ip 1895 END DO 1896 1897 DO ip = 0, para_env%num_pe - 1 1898 z_ij_loc_re = 0.0_dp 1899 nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1 1900 IF (ip == para_env%mepos) THEN 1901 ns_me = nblock 1902 DO i = 1, ns_me 1903 ii = ilow1 + i - 1 1904 DO j = 1, nstate 1905 z_ij_loc_re(j, i) = rotmat(j, i) 1906 END DO 1907 END DO 1908 END IF 1909 CALL mp_bcast(z_ij_loc_re, ip, para_env%group) 1910 CALL cp_fm_set_submatrix(rmat, z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock) 1911 END DO 1912 1913 DEALLOCATE (z_ij_loc_re) 1914 DEALLOCATE (z_ij_loc_im) 1915 DO idim = 1, dim2 1916 DEALLOCATE (cz_ij_loc(idim)%c_array) 1917 END DO 1918 DEALLOCATE (cz_ij_loc) 1919 1920 CALL mp_sync(para_env%group) 1921 CALL rotate_orbitals(rmat, vectors) 1922 CALL cp_fm_release(rmat) 1923 1924 DEALLOCATE (rotmat) 1925 DEALLOCATE (ns_bound, list_pair) 1926 1927 CALL timestop(handle) 1928 1929 END SUBROUTINE jacobi_rot_para 1930 1931! ************************************************************************************************** 1932!> \brief ... 1933!> \param iperm ... 1934!> \param para_env ... 1935!> \param list_pair ... 1936! ************************************************************************************************** 1937 SUBROUTINE eberlein(iperm, para_env, list_pair) 1938 INTEGER, INTENT(IN) :: iperm 1939 TYPE(cp_para_env_type), POINTER :: para_env 1940 INTEGER, DIMENSION(:, :) :: list_pair 1941 1942 CHARACTER(len=*), PARAMETER :: routineN = 'eberlein', routineP = moduleN//':'//routineN 1943 1944 INTEGER :: i, ii, jj, npair 1945 1946 npair = (para_env%num_pe + 1)/2 1947 IF (iperm == 1) THEN 1948!..set up initial ordering 1949 DO I = 0, para_env%num_pe - 1 1950 II = ((i + 1) + 1)/2 1951 JJ = MOD((i + 1) + 1, 2) + 1 1952 list_pair(JJ, II) = i 1953 ENDDO 1954 IF (MOD(para_env%num_pe, 2) == 1) list_pair(2, npair) = -1 1955 ELSEIF (MOD(iperm, 2) == 0) THEN 1956!..a type shift 1957 jj = list_pair(1, npair) 1958 DO I = npair, 3, -1 1959 list_pair(1, I) = list_pair(1, I - 1) 1960 ENDDO 1961 list_pair(1, 2) = list_pair(2, 1) 1962 list_pair(2, 1) = jj 1963 ELSE 1964!..b type shift 1965 jj = list_pair(2, 1) 1966 DO I = 1, npair - 1 1967 list_pair(2, I) = list_pair(2, I + 1) 1968 ENDDO 1969 list_pair(2, npair) = jj 1970 ENDIF 1971 1972 END SUBROUTINE eberlein 1973 1974! ************************************************************************************************** 1975!> \brief ... 1976!> \param vectors ... 1977!> \param op_sm_set ... 1978!> \param zij_fm_set ... 1979! ************************************************************************************************** 1980 SUBROUTINE zij_matrix(vectors, op_sm_set, zij_fm_set) 1981 1982 TYPE(cp_fm_type), POINTER :: vectors 1983 TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: op_sm_set 1984 TYPE(cp_fm_p_type), DIMENSION(:, :), POINTER :: zij_fm_set 1985 1986 CHARACTER(len=*), PARAMETER :: routineN = 'zij_matrix', routineP = moduleN//':'//routineN 1987 1988 INTEGER :: handle, i, j, nao, nmoloc 1989 TYPE(cp_fm_type), POINTER :: opvec 1990 1991 CALL timeset(routineN, handle) 1992 1993 NULLIFY (opvec) 1994 ! get rows and cols of the input 1995 CALL cp_fm_get_info(vectors, nrow_global=nao, ncol_global=nmoloc) 1996 ! replicate the input kind of matrix 1997 CALL cp_fm_create(opvec, vectors%matrix_struct) 1998 1999 ! Compute zij here 2000 DO i = 1, SIZE(zij_fm_set, 2) 2001 DO j = 1, SIZE(zij_fm_set, 1) 2002 CALL cp_fm_set_all(zij_fm_set(j, i)%matrix, 0.0_dp) 2003 CALL cp_dbcsr_sm_fm_multiply(op_sm_set(j, i)%matrix, vectors, opvec, ncol=nmoloc) 2004 CALL cp_gemm("T", "N", nmoloc, nmoloc, nao, 1.0_dp, vectors, opvec, 0.0_dp, & 2005 zij_fm_set(j, i)%matrix) 2006 ENDDO 2007 ENDDO 2008 2009 CALL cp_fm_release(opvec) 2010 CALL timestop(handle) 2011 2012 END SUBROUTINE zij_matrix 2013 2014! ************************************************************************************************** 2015!> \brief ... 2016!> \param vectors ... 2017! ************************************************************************************************** 2018 SUBROUTINE scdm_qrfact(vectors) 2019 2020 TYPE(cp_fm_type), POINTER :: vectors 2021 2022 CHARACTER(len=*), PARAMETER :: routineN = 'scdm_qrfact', routineP = moduleN//':'//routineN 2023 2024 INTEGER :: handle, ncolT, nrowT 2025 REAL(KIND=dp), DIMENSION(:), POINTER :: tau 2026 TYPE(cp_fm_struct_type), POINTER :: cstruct 2027 TYPE(cp_fm_type), POINTER :: CTp, Qf, tmp 2028 2029 CALL timeset(routineN, handle) 2030 2031 ! Create Transpose of Coefficient Matrix vectors 2032 nrowT = vectors%matrix_struct%ncol_global 2033 ncolT = vectors%matrix_struct%nrow_global 2034 2035 CALL cp_fm_struct_create(cstruct, vectors%matrix_struct%para_env, & 2036 vectors%matrix_struct%context, nrowT, ncolT, vectors%matrix_struct%ncol_block, & 2037 vectors%matrix_struct%nrow_block, first_p_pos=vectors%matrix_struct%first_p_pos) 2038 CALL cp_fm_create(CTp, cstruct) 2039 CALL cp_fm_struct_release(cstruct) 2040 2041 ALLOCATE (tau(nrowT)) 2042 2043 CALL cp_fm_transpose(vectors, CTp) 2044 2045 ! Call SCALAPACK routine to get QR decomposition of CTs 2046 CALL cp_fm_pdgeqpf(CTp, tau, nrowT, ncolT, 1, 1) 2047 2048 ! Construction of Q from the scalapack output 2049 CALL cp_fm_struct_create(cstruct, para_env=CTp%matrix_struct%para_env, & 2050 context=CTp%matrix_struct%context, nrow_global=CTp%matrix_struct%nrow_global, & 2051 ncol_global=CTp%matrix_struct%nrow_global) 2052 CALL cp_fm_create(Qf, cstruct) 2053 CALL cp_fm_struct_release(cstruct) 2054 CALL cp_fm_to_fm_submat(CTp, Qf, nrowT, nrowT, 1, 1, 1, 1) 2055 2056 ! Call SCALAPACK routine to get Q 2057 CALL cp_fm_pdorgqr(Qf, tau, nrowT, 1, 1) 2058 2059 ! Transform original coefficient matrix vectors 2060 CALL cp_fm_create(tmp, vectors%matrix_struct) 2061 CALL cp_fm_set_all(tmp, 0.0_dp, 1.0_dp) 2062 CALL cp_fm_to_fm(vectors, tmp) 2063 CALL cp_gemm('N', 'N', ncolT, nrowT, nrowT, 1.0_dp, tmp, Qf, 0.0_dp, vectors) 2064 2065 ! Cleanup 2066 CALL cp_fm_release(CTp) 2067 CALL cp_fm_release(tmp) 2068 CALL cp_fm_release(Qf) 2069 DEALLOCATE (tau) 2070 2071 CALL timestop(handle) 2072 2073 END SUBROUTINE scdm_qrfact 2074 2075END MODULE qs_localization_methods 2076