1!--------------------------------------------------------------------------------------------------! 2! CP2K: A general program to perform molecular dynamics simulations ! 3! Copyright (C) 2000 - 2019 CP2K developers group ! 4!--------------------------------------------------------------------------------------------------! 5 6! ************************************************************************************************** 7!> \brief module that contains the algorithms to perform an itrative 8!> diagonalization by the block-Lanczos approach 9!> \par History 10!> 05.2009 created [MI] 11!> \author fawzi 12! ************************************************************************************************** 13MODULE qs_scf_lanczos 14 15 USE cp_fm_basic_linalg, ONLY: cp_fm_column_scale,& 16 cp_fm_qr_factorization,& 17 cp_fm_scale_and_add,& 18 cp_fm_transpose,& 19 cp_fm_triangular_multiply 20 USE cp_fm_cholesky, ONLY: cp_fm_cholesky_decompose 21 USE cp_fm_diag, ONLY: choose_eigv_solver 22 USE cp_fm_struct, ONLY: cp_fm_struct_create,& 23 cp_fm_struct_release,& 24 cp_fm_struct_type 25 USE cp_fm_types, ONLY: & 26 cp_fm_create, cp_fm_get_submatrix, cp_fm_p_type, cp_fm_release, cp_fm_set_all, & 27 cp_fm_set_submatrix, cp_fm_to_fm, cp_fm_type, cp_fm_vectorsnorm 28 USE cp_gemm_interface, ONLY: cp_gemm 29 USE cp_log_handling, ONLY: cp_to_string 30 USE kinds, ONLY: dp 31 USE message_passing, ONLY: mp_sum 32 USE qs_mo_types, ONLY: get_mo_set,& 33 mo_set_p_type 34 USE qs_scf_types, ONLY: krylov_space_type 35 USE scf_control_types, ONLY: scf_control_type 36#include "./base/base_uses.f90" 37 38 IMPLICIT NONE 39 PRIVATE 40 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_scf_lanczos' 41 42 PUBLIC :: krylov_space_allocate, lanczos_refinement, lanczos_refinement_2v 43 44CONTAINS 45 46! ************************************************************************************************** 47 48! ************************************************************************************************** 49!> \brief allocates matrices and vectros used in the construction of 50!> the krylov space and for the lanczos refinement 51!> \param krylov_space ... 52!> \param scf_control ... 53!> \param mos ... 54!> \param 55!> \par History 56!> 05.2009 created [MI] 57! ************************************************************************************************** 58 59 SUBROUTINE krylov_space_allocate(krylov_space, scf_control, mos) 60 61 TYPE(krylov_space_type), POINTER :: krylov_space 62 TYPE(scf_control_type), POINTER :: scf_control 63 TYPE(mo_set_p_type), DIMENSION(:), POINTER :: mos 64 65 CHARACTER(LEN=*), PARAMETER :: routineN = 'krylov_space_allocate', & 66 routineP = moduleN//':'//routineN 67 68 INTEGER :: handle, ik, ispin, max_nmo, nao, nblock, & 69 ndim, nk, nmo, nspin 70 TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp 71 TYPE(cp_fm_type), POINTER :: mo_coeff 72 73 CALL timeset(routineN, handle) 74 75 CPASSERT(ASSOCIATED(krylov_space)) 76 77 IF (.NOT. ASSOCIATED(krylov_space%mo_conv)) THEN 78 NULLIFY (fm_struct_tmp, mo_coeff) 79 80 krylov_space%nkrylov = scf_control%diagonalization%nkrylov 81 krylov_space%nblock = scf_control%diagonalization%nblock_krylov 82 83 nk = krylov_space%nkrylov 84 nblock = krylov_space%nblock 85 nspin = SIZE(mos, 1) 86 87 ALLOCATE (krylov_space%mo_conv(nspin)) 88 ALLOCATE (krylov_space%mo_refine(nspin)) 89 ALLOCATE (krylov_space%chc_mat(nspin)) 90 ALLOCATE (krylov_space%c_vec(nspin)) 91 max_nmo = 0 92 DO ispin = 1, nspin 93 CALL get_mo_set(mos(ispin)%mo_set, mo_coeff=mo_coeff, nao=nao, nmo=nmo) 94 CALL cp_fm_create(krylov_space%mo_conv(ispin)%matrix, mo_coeff%matrix_struct) 95 CALL cp_fm_create(krylov_space%mo_refine(ispin)%matrix, mo_coeff%matrix_struct) 96 NULLIFY (fm_struct_tmp) 97 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmo, ncol_global=nmo, & 98 para_env=mo_coeff%matrix_struct%para_env, & 99 context=mo_coeff%matrix_struct%context) 100 CALL cp_fm_create(krylov_space%chc_mat(ispin)%matrix, fm_struct_tmp, "chc") 101 CALL cp_fm_create(krylov_space%c_vec(ispin)%matrix, fm_struct_tmp, "vec") 102 CALL cp_fm_struct_release(fm_struct_tmp) 103 max_nmo = MAX(max_nmo, nmo) 104 END DO 105 106 !the use of max_nmo might not be ok, in this case allocate nspin matrices 107 ALLOCATE (krylov_space%c_eval(max_nmo)) 108 109 ALLOCATE (krylov_space%v_mat(nk)) 110 111 NULLIFY (fm_struct_tmp) 112 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nblock, & 113 para_env=mo_coeff%matrix_struct%para_env, & 114 context=mo_coeff%matrix_struct%context) 115 DO ik = 1, nk 116 CALL cp_fm_create(krylov_space%v_mat(ik)%matrix, matrix_struct=fm_struct_tmp, & 117 name="v_mat_"//TRIM(ADJUSTL(cp_to_string(ik)))) 118 END DO 119 CALL cp_fm_create(krylov_space%tmp_mat, matrix_struct=fm_struct_tmp, & 120 name="tmp_mat") 121 CALL cp_fm_struct_release(fm_struct_tmp) 122 123 ! NOTE: the following matrices are small and could be defined 124! as standard array rather than istributed fm 125 NULLIFY (fm_struct_tmp) 126 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nblock, ncol_global=nblock, & 127 para_env=mo_coeff%matrix_struct%para_env, & 128 context=mo_coeff%matrix_struct%context) 129 CALL cp_fm_create(krylov_space%block1_mat, matrix_struct=fm_struct_tmp, & 130 name="a_mat_"//TRIM(ADJUSTL(cp_to_string(ik)))) 131 CALL cp_fm_create(krylov_space%block2_mat, matrix_struct=fm_struct_tmp, & 132 name="b_mat_"//TRIM(ADJUSTL(cp_to_string(ik)))) 133 CALL cp_fm_create(krylov_space%block3_mat, matrix_struct=fm_struct_tmp, & 134 name="b2_mat_"//TRIM(ADJUSTL(cp_to_string(ik)))) 135 CALL cp_fm_struct_release(fm_struct_tmp) 136 137 ndim = nblock*nk 138 NULLIFY (fm_struct_tmp) 139 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ndim, ncol_global=ndim, & 140 para_env=mo_coeff%matrix_struct%para_env, & 141 context=mo_coeff%matrix_struct%context) 142 CALL cp_fm_create(krylov_space%block4_mat, matrix_struct=fm_struct_tmp, & 143 name="t_mat") 144 CALL cp_fm_create(krylov_space%block5_mat, matrix_struct=fm_struct_tmp, & 145 name="t_vec") 146 CALL cp_fm_struct_release(fm_struct_tmp) 147 ALLOCATE (krylov_space%t_eval(ndim)) 148 149 ELSE 150 !Nothing should be done 151 END IF 152 153 CALL timestop(handle) 154 155 END SUBROUTINE krylov_space_allocate 156 157! ************************************************************************************************** 158!> \brief lanczos refinement by blocks of not-converged MOs 159!> \param krylov_space ... 160!> \param ks ... 161!> \param c0 ... 162!> \param c1 ... 163!> \param eval ... 164!> \param nao ... 165!> \param eps_iter ... 166!> \param ispin ... 167!> \param check_moconv_only ... 168!> \param 169!> \par History 170!> 05.2009 created [MI] 171! ************************************************************************************************** 172 173 SUBROUTINE lanczos_refinement(krylov_space, ks, c0, c1, eval, nao, & 174 eps_iter, ispin, check_moconv_only) 175 176 TYPE(krylov_space_type), POINTER :: krylov_space 177 TYPE(cp_fm_type), POINTER :: ks, c0, c1 178 REAL(dp), DIMENSION(:), POINTER :: eval 179 INTEGER, INTENT(IN) :: nao 180 REAL(dp), INTENT(IN) :: eps_iter 181 INTEGER, INTENT(IN) :: ispin 182 LOGICAL, INTENT(IN), OPTIONAL :: check_moconv_only 183 184 CHARACTER(LEN=*), PARAMETER :: routineN = 'lanczos_refinement', & 185 routineP = moduleN//':'//routineN 186 REAL(KIND=dp), PARAMETER :: rmone = -1.0_dp, rone = 1.0_dp, & 187 rzero = 0.0_dp 188 189 INTEGER :: hand1, hand2, hand3, hand4, hand5, handle, ib, ik, imo, imo_low, imo_up, it, jt, & 190 nblock, ndim, nmo, nmo_converged, nmo_nc, nmob, num_blocks 191 INTEGER, ALLOCATABLE, DIMENSION(:) :: itaken 192 LOGICAL :: my_check_moconv_only 193 REAL(dp) :: max_norm, min_norm, vmax 194 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: q_mat, tblock, tvblock 195 REAL(dp), DIMENSION(:), POINTER :: c_res, t_eval 196 TYPE(cp_fm_p_type), DIMENSION(:), POINTER :: v_mat 197 TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp 198 TYPE(cp_fm_type), POINTER :: a_mat, b2_mat, b_mat, c2_tmp, c3_tmp, & 199 c_tmp, chc, evec, hc, t_mat, t_vec 200 201 CALL timeset(routineN, handle) 202 203 NULLIFY (fm_struct_tmp) 204 NULLIFY (chc, evec) 205 NULLIFY (c_res, t_eval) 206 NULLIFY (hc, t_mat, t_vec, c2_tmp) 207 NULLIFY (a_mat, b_mat, b2_mat, v_mat) 208 209 nmo = SIZE(eval, 1) 210 my_check_moconv_only = .FALSE. 211 IF (PRESENT(check_moconv_only)) my_check_moconv_only = check_moconv_only 212 213 chc => krylov_space%chc_mat(ispin)%matrix 214 evec => krylov_space%c_vec(ispin)%matrix 215 c_res => krylov_space%c_eval 216 t_eval => krylov_space%t_eval 217 218 NULLIFY (fm_struct_tmp, c_tmp) 219 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmo, & 220 para_env=c0%matrix_struct%para_env, & 221 context=c0%matrix_struct%context) 222 CALL cp_fm_create(c_tmp, matrix_struct=fm_struct_tmp, & 223 name="c_tmp") 224 CALL cp_fm_create(hc, matrix_struct=fm_struct_tmp, & 225 name="hc") 226 CALL cp_fm_struct_release(fm_struct_tmp) 227 228 !Compute (C^t)HC 229 CALL cp_gemm('N', 'N', nao, nmo, nao, rone, ks, c0, rzero, hc) 230 CALL cp_gemm('T', 'N', nmo, nmo, nao, rone, c0, hc, rzero, chc) 231 232 !Diagonalize (C^t)HC 233 CALL timeset(routineN//"diag_chc", hand1) 234 CALL choose_eigv_solver(chc, evec, eval) 235 CALL timestop(hand1) 236 237 !Rotate the C vectors 238 CALL cp_gemm('N', 'N', nao, nmo, nmo, rone, c0, evec, rzero, c1) 239 240 !Check for converged states 241 CALL cp_gemm('N', 'N', nao, nmo, nmo, rone, hc, evec, rzero, c_tmp) 242 CALL cp_fm_to_fm(c1, c0, nmo, 1, 1) 243 CALL cp_fm_column_scale(c1, eval) 244 CALL cp_fm_scale_and_add(1.0_dp, c_tmp, rmone, c1) 245 CALL cp_fm_vectorsnorm(c_tmp, c_res) 246 247 nmo_converged = 0 248 nmo_nc = 0 249 max_norm = 0.0_dp 250 min_norm = 1.e10_dp 251 CALL cp_fm_set_all(c1, rzero) 252 DO imo = 1, nmo 253 max_norm = MAX(max_norm, c_res(imo)) 254 min_norm = MIN(min_norm, c_res(imo)) 255 END DO 256 DO imo = 1, nmo 257 IF (c_res(imo) <= eps_iter) THEN 258 nmo_converged = nmo_converged + 1 259 ELSE 260 nmo_nc = nmo - nmo_converged 261 EXIT 262 END IF 263 END DO 264 265 nblock = krylov_space%nblock 266 num_blocks = nmo_nc/nblock 267 268 krylov_space%nmo_nc = nmo_nc 269 krylov_space%nmo_conv = nmo_converged 270 krylov_space%max_res_norm = max_norm 271 krylov_space%min_res_norm = min_norm 272 273 IF (my_check_moconv_only) THEN 274 CALL cp_fm_release(c_tmp) 275 CALL cp_fm_release(hc) 276 CALL timestop(handle) 277 RETURN 278 ELSE IF (krylov_space%nmo_nc > 0) THEN 279 280 CALL cp_fm_to_fm(c0, c1, nmo_nc, nmo_converged + 1, 1) 281 282 nblock = krylov_space%nblock 283 IF (MODULO(nmo_nc, nblock) > 0.0_dp) THEN 284 num_blocks = nmo_nc/nblock + 1 285 ELSE 286 num_blocks = nmo_nc/nblock 287 END IF 288 289 DO ib = 1, num_blocks 290 291 imo_low = (ib - 1)*nblock + 1 292 imo_up = MIN(ib*nblock, nmo_nc) 293 nmob = imo_up - imo_low + 1 294 ndim = krylov_space%nkrylov*nmob 295 296 NULLIFY (fm_struct_tmp) 297 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=ndim, & 298 para_env=c0%matrix_struct%para_env, & 299 context=c0%matrix_struct%context) 300 CALL cp_fm_create(c2_tmp, matrix_struct=fm_struct_tmp, & 301 name="c2_tmp") 302 CALL cp_fm_struct_release(fm_struct_tmp) 303 NULLIFY (fm_struct_tmp) 304 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmob, ncol_global=ndim, & 305 para_env=c0%matrix_struct%para_env, & 306 context=c0%matrix_struct%context) 307 CALL cp_fm_create(c3_tmp, matrix_struct=fm_struct_tmp, & 308 name="c3_tmp") 309 CALL cp_fm_struct_release(fm_struct_tmp) 310 311 ! Create local matrix of right size 312 IF (nmob /= nblock) THEN 313 NULLIFY (a_mat, b_mat, b2_mat, t_mat, t_vec, v_mat) 314 NULLIFY (fm_struct_tmp) 315 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmob, ncol_global=nmob, & 316 para_env=chc%matrix_struct%para_env, & 317 context=chc%matrix_struct%context) 318 CALL cp_fm_create(a_mat, matrix_struct=fm_struct_tmp, & 319 name="a_mat") 320 CALL cp_fm_create(b_mat, matrix_struct=fm_struct_tmp, & 321 name="b_mat") 322 CALL cp_fm_create(b2_mat, matrix_struct=fm_struct_tmp, & 323 name="b2_mat") 324 CALL cp_fm_struct_release(fm_struct_tmp) 325 NULLIFY (fm_struct_tmp) 326 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ndim, ncol_global=ndim, & 327 para_env=chc%matrix_struct%para_env, & 328 context=chc%matrix_struct%context) 329 CALL cp_fm_create(t_mat, matrix_struct=fm_struct_tmp, & 330 name="t_mat") 331 CALL cp_fm_create(t_vec, matrix_struct=fm_struct_tmp, & 332 name="t_vec") 333 CALL cp_fm_struct_release(fm_struct_tmp) 334 NULLIFY (fm_struct_tmp) 335 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmob, & 336 para_env=c0%matrix_struct%para_env, & 337 context=c0%matrix_struct%context) 338 ALLOCATE (v_mat(krylov_space%nkrylov)) 339 DO ik = 1, krylov_space%nkrylov 340 CALL cp_fm_create(v_mat(ik)%matrix, matrix_struct=fm_struct_tmp, & 341 name="v_mat") 342 END DO 343 CALL cp_fm_struct_release(fm_struct_tmp) 344 ELSE 345 a_mat => krylov_space%block1_mat 346 b_mat => krylov_space%block2_mat 347 b2_mat => krylov_space%block3_mat 348 t_mat => krylov_space%block4_mat 349 t_vec => krylov_space%block5_mat 350 v_mat => krylov_space%v_mat 351 END IF 352 353 ALLOCATE (tblock(nmob, nmob)) 354 ALLOCATE (tvblock(nmob, ndim)) 355 356 CALL timeset(routineN//"_kry_loop", hand2) 357 CALL cp_fm_set_all(b_mat, rzero) 358 CALL cp_fm_set_all(t_mat, rzero) 359 CALL cp_fm_to_fm(c1, v_mat(1)%matrix, nmob, imo_low, 1) 360 361 !Compute A =(V^t)HV 362 CALL cp_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(1)%matrix, rzero, hc) 363 CALL cp_gemm('T', 'N', nmob, nmob, nao, rone, v_mat(1)%matrix, hc, & 364 rzero, a_mat) 365 366 !Compute the residual matrix R for next 367 !factorisation 368 CALL cp_gemm('N', 'N', nao, nmob, nmob, rone, v_mat(1)%matrix, a_mat, & 369 rzero, c_tmp) 370 CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc) 371 372 ! Build the block tridiagonal matrix 373 CALL cp_fm_get_submatrix(a_mat, tblock, 1, 1, nmob, nmob) 374 CALL cp_fm_set_submatrix(t_mat, tblock, 1, 1, nmob, nmob) 375 376 DO ik = 2, krylov_space%nkrylov 377 378 ! Call lapack for QR factorization 379 CALL cp_fm_set_all(b_mat, rzero) 380 CALL cp_fm_to_fm(c_tmp, v_mat(ik)%matrix, nmob, 1, 1) 381 CALL cp_fm_qr_factorization(c_tmp, b_mat, nao, nmob, 1, 1) 382 383 CALL cp_fm_triangular_multiply(b_mat, v_mat(ik)%matrix, side="R", invert_tr=.TRUE., & 384 n_rows=nao, n_cols=nmob) 385 386 !Compute A =(V^t)HV 387 CALL cp_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(ik)%matrix, rzero, hc) 388 CALL cp_gemm('T', 'N', nmob, nmob, nao, rone, v_mat(ik)%matrix, hc, rzero, a_mat) 389 390 !Compute the !residual matrix R !for next !factorisation 391 CALL cp_gemm('N', 'N', nao, nmob, nmob, rone, v_mat(ik)%matrix, a_mat, & 392 rzero, c_tmp) 393 CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc) 394 CALL cp_fm_to_fm(v_mat(ik - 1)%matrix, hc, nmob, 1, 1) 395 CALL cp_fm_triangular_multiply(b_mat, hc, side='R', transpose_tr=.TRUE., & 396 n_rows=nao, n_cols=nmob, alpha=rmone) 397 CALL cp_fm_scale_and_add(rone, c_tmp, rone, hc) 398 399 ! Build the block tridiagonal matrix 400 it = (ik - 2)*nmob + 1 401 jt = (ik - 1)*nmob + 1 402 403 CALL cp_fm_get_submatrix(a_mat, tblock, 1, 1, nmob, nmob) 404 CALL cp_fm_set_submatrix(t_mat, tblock, jt, jt, nmob, nmob) 405 CALL cp_fm_transpose(b_mat, a_mat) 406 CALL cp_fm_get_submatrix(a_mat, tblock, 1, 1, nmob, nmob) 407 CALL cp_fm_set_submatrix(t_mat, tblock, it, jt, nmob, nmob) 408 409 END DO ! ik 410 CALL timestop(hand2) 411 412 DEALLOCATE (tblock) 413 414 CALL timeset(routineN//"_diag_tri", hand3) 415 416 CALL choose_eigv_solver(t_mat, t_vec, t_eval) 417 ! Diagonalize the block-tridiagonal matrix 418 CALL timestop(hand3) 419 420 CALL timeset(routineN//"_build_cnew", hand4) 421! !Compute the refined vectors 422 CALL cp_fm_set_all(c2_tmp, rzero) 423 DO ik = 1, krylov_space%nkrylov 424 jt = (ik - 1)*nmob 425 CALL cp_gemm('N', 'N', nao, ndim, nmob, rone, v_mat(ik)%matrix, t_vec, rone, c2_tmp, & 426 b_first_row=(jt + 1)) 427 END DO 428 DEALLOCATE (tvblock) 429 430 CALL cp_fm_set_all(c3_tmp, rzero) 431 CALL cp_gemm('T', 'N', nmob, ndim, nao, rone, v_mat(1)%matrix, c2_tmp, rzero, c3_tmp) 432 433 !Try to avoid linear dependencies 434 ALLOCATE (q_mat(nmob, ndim)) 435 !get max 436 CALL cp_fm_get_submatrix(c3_tmp, q_mat, 1, 1, nmob, ndim) 437 438 ALLOCATE (itaken(ndim)) 439 itaken = 0 440 DO it = 1, nmob 441 vmax = 0.0_dp 442 !select index ik 443 DO jt = 1, ndim 444 IF (itaken(jt) == 0 .AND. ABS(q_mat(it, jt)) > vmax) THEN 445 vmax = ABS(q_mat(it, jt)) 446 ik = jt 447 END IF 448 END DO 449 itaken(ik) = 1 450 451 CALL cp_fm_to_fm(c2_tmp, v_mat(1)%matrix, 1, ik, it) 452 END DO 453 DEALLOCATE (itaken) 454 DEALLOCATE (q_mat) 455 456 !Copy in the converged set to enlarge the converged subspace 457 CALL cp_fm_to_fm(v_mat(1)%matrix, c0, nmob, 1, (nmo_converged + imo_low)) 458 CALL timestop(hand4) 459 460 IF (nmob < nblock) THEN 461 CALL cp_fm_release(a_mat) 462 CALL cp_fm_release(b_mat) 463 CALL cp_fm_release(b2_mat) 464 CALL cp_fm_release(t_mat) 465 CALL cp_fm_release(t_vec) 466 DO ik = 1, krylov_space%nkrylov 467 CALL cp_fm_release(v_mat(ik)%matrix) 468 END DO 469 DEALLOCATE (v_mat) 470 471 END IF 472 CALL cp_fm_release(c2_tmp) 473 CALL cp_fm_release(c3_tmp) 474 END DO ! ib 475 476 CALL timeset(routineN//"_ortho", hand5) 477 CALL cp_gemm('T', 'N', nmo, nmo, nao, rone, c0, c0, rzero, chc) 478 479 CALL cp_fm_cholesky_decompose(chc, nmo) 480 CALL cp_fm_triangular_multiply(chc, c0, 'R', invert_tr=.TRUE.) 481 CALL timestop(hand5) 482 483 CALL cp_fm_release(c_tmp) 484 CALL cp_fm_release(hc) 485 ELSE 486 CALL cp_fm_release(c_tmp) 487 CALL cp_fm_release(hc) 488 CALL timestop(handle) 489 RETURN 490 END IF 491 492 CALL timestop(handle) 493 494 END SUBROUTINE lanczos_refinement 495 496!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 497 498! ************************************************************************************************** 499!> \brief ... 500!> \param krylov_space ... 501!> \param ks ... 502!> \param c0 ... 503!> \param c1 ... 504!> \param eval ... 505!> \param nao ... 506!> \param eps_iter ... 507!> \param ispin ... 508!> \param check_moconv_only ... 509! ************************************************************************************************** 510 SUBROUTINE lanczos_refinement_2v(krylov_space, ks, c0, c1, eval, nao, & 511 eps_iter, ispin, check_moconv_only) 512 513 TYPE(krylov_space_type), POINTER :: krylov_space 514 TYPE(cp_fm_type), POINTER :: ks, c0, c1 515 REAL(dp), DIMENSION(:), POINTER :: eval 516 INTEGER, INTENT(IN) :: nao 517 REAL(dp), INTENT(IN) :: eps_iter 518 INTEGER, INTENT(IN) :: ispin 519 LOGICAL, INTENT(IN), OPTIONAL :: check_moconv_only 520 521 CHARACTER(LEN=*), PARAMETER :: routineN = 'lanczos_refinement_2v', & 522 routineP = moduleN//':'//routineN 523 REAL(KIND=dp), PARAMETER :: rmone = -1.0_dp, rone = 1.0_dp, & 524 rzero = 0.0_dp 525 526 INTEGER :: hand1, hand2, hand3, hand4, hand5, hand6, handle, i, ia, ib, ik, imo, imo_low, & 527 imo_up, info, it, j, jt, liwork, lwork, nblock, ndim, nmo, nmo_converged, nmo_nc, nmob, & 528 num_blocks 529 INTEGER, ALLOCATABLE, DIMENSION(:) :: itaken 530 INTEGER, DIMENSION(:), POINTER :: iwork 531 LOGICAL :: my_check_moconv_only 532 REAL(dp) :: max_norm, min_norm, vmax 533 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: a_block, b_block, q_mat, t_mat 534 REAL(dp), DIMENSION(:), POINTER :: c_res, t_eval 535 REAL(KIND=dp), DIMENSION(:), POINTER :: work 536 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a_loc, b_loc 537 TYPE(cp_fm_p_type), DIMENSION(:), POINTER :: v_mat 538 TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp 539 TYPE(cp_fm_type), POINTER :: b_mat, c2_tmp, c_tmp, chc, evec, hc, & 540 v_tmp 541 542 CALL timeset(routineN, handle) 543 544 NULLIFY (fm_struct_tmp) 545 NULLIFY (chc, evec) 546 NULLIFY (c_res, t_eval) 547 NULLIFY (hc, c_tmp, c2_tmp) 548 NULLIFY (v_tmp) 549 NULLIFY (b_mat, v_mat) 550 NULLIFY (b_loc, a_loc) 551 552 nmo = SIZE(eval, 1) 553 my_check_moconv_only = .FALSE. 554 IF (PRESENT(check_moconv_only)) my_check_moconv_only = check_moconv_only 555 556 chc => krylov_space%chc_mat(ispin)%matrix 557 evec => krylov_space%c_vec(ispin)%matrix 558 c_res => krylov_space%c_eval 559 t_eval => krylov_space%t_eval 560 561 NULLIFY (fm_struct_tmp, c_tmp) 562 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmo, & 563 para_env=c0%matrix_struct%para_env, & 564 context=c0%matrix_struct%context) 565 CALL cp_fm_create(c_tmp, matrix_struct=fm_struct_tmp, & 566 name="c_tmp") 567 CALL cp_fm_create(hc, matrix_struct=fm_struct_tmp, & 568 name="hc") 569 CALL cp_fm_struct_release(fm_struct_tmp) 570 571 !Compute (C^t)HC 572 CALL cp_gemm('N', 'N', nao, nmo, nao, rone, ks, c0, rzero, hc) 573 CALL cp_gemm('T', 'N', nmo, nmo, nao, rone, c0, hc, rzero, chc) 574 575 !Diagonalize (C^t)HC 576 CALL timeset(routineN//"diag_chc", hand1) 577 CALL choose_eigv_solver(chc, evec, eval) 578 579 CALL timestop(hand1) 580 581 CALL timeset(routineN//"check_conv", hand6) 582 !Rotate the C vectors 583 CALL cp_gemm('N', 'N', nao, nmo, nmo, rone, c0, evec, rzero, c1) 584 585 !Check for converged states 586 CALL cp_gemm('N', 'N', nao, nmo, nmo, rone, hc, evec, rzero, c_tmp) 587 CALL cp_fm_to_fm(c1, c0, nmo, 1, 1) 588 CALL cp_fm_column_scale(c1, eval) 589 CALL cp_fm_scale_and_add(1.0_dp, c_tmp, rmone, c1) 590 CALL cp_fm_vectorsnorm(c_tmp, c_res) 591 592 nmo_converged = 0 593 nmo_nc = 0 594 max_norm = 0.0_dp 595 min_norm = 1.e10_dp 596 CALL cp_fm_set_all(c1, rzero) 597 DO imo = 1, nmo 598 max_norm = MAX(max_norm, c_res(imo)) 599 min_norm = MIN(min_norm, c_res(imo)) 600 END DO 601 DO imo = 1, nmo 602 IF (c_res(imo) <= eps_iter) THEN 603 nmo_converged = nmo_converged + 1 604 ELSE 605 nmo_nc = nmo - nmo_converged 606 EXIT 607 END IF 608 END DO 609 CALL timestop(hand6) 610 611 CALL cp_fm_release(c_tmp) 612 CALL cp_fm_release(hc) 613 614 krylov_space%nmo_nc = nmo_nc 615 krylov_space%nmo_conv = nmo_converged 616 krylov_space%max_res_norm = max_norm 617 krylov_space%min_res_norm = min_norm 618 619 IF (my_check_moconv_only) THEN 620 ! Do nothing 621 ELSE IF (krylov_space%nmo_nc > 0) THEN 622 623 CALL cp_fm_to_fm(c0, c1, nmo_nc, nmo_converged + 1, 1) 624 625 nblock = krylov_space%nblock 626 IF (MODULO(nmo_nc, nblock) > 0.0_dp) THEN 627 num_blocks = nmo_nc/nblock + 1 628 ELSE 629 num_blocks = nmo_nc/nblock 630 END IF 631 632 DO ib = 1, num_blocks 633 634 imo_low = (ib - 1)*nblock + 1 635 imo_up = MIN(ib*nblock, nmo_nc) 636 nmob = imo_up - imo_low + 1 637 ndim = krylov_space%nkrylov*nmob 638 639 ! Allocation 640 CALL timeset(routineN//"alloc", hand6) 641 ALLOCATE (a_block(nmob, nmob)) 642 ALLOCATE (b_block(nmob, nmob)) 643 ALLOCATE (t_mat(ndim, ndim)) 644 645 NULLIFY (fm_struct_tmp) 646 ! by forcing ncol_block=nmo, the needed part of the matrix is distributed on a subset of processes 647 ! this is due to the use of two-dimensional grids of processes 648 ! nrow_global is distributed over num_pe(1) 649 ! a local_data array is anyway allocated for the processes non included 650 ! this should have a minimum size 651 ! with ncol_local=1. 652 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmob, & 653 ncol_block=nmob, & 654 para_env=c0%matrix_struct%para_env, & 655 context=c0%matrix_struct%context, & 656 force_block=.TRUE.) 657 CALL cp_fm_create(c_tmp, matrix_struct=fm_struct_tmp, & 658 name="c_tmp") 659 CALL cp_fm_set_all(c_tmp, rzero) 660 CALL cp_fm_create(v_tmp, matrix_struct=fm_struct_tmp, & 661 name="v_tmp") 662 CALL cp_fm_set_all(v_tmp, rzero) 663 CALL cp_fm_struct_release(fm_struct_tmp) 664 NULLIFY (fm_struct_tmp) 665 ALLOCATE (v_mat(krylov_space%nkrylov)) 666 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmob, & 667 ncol_block=nmob, & 668 para_env=c0%matrix_struct%para_env, & 669 context=c0%matrix_struct%context, & 670 force_block=.TRUE.) 671 DO ik = 1, krylov_space%nkrylov 672 CALL cp_fm_create(v_mat(ik)%matrix, matrix_struct=fm_struct_tmp, & 673 name="v_mat") 674 END DO 675 CALL cp_fm_create(hc, matrix_struct=fm_struct_tmp, & 676 name="hc") 677 CALL cp_fm_struct_release(fm_struct_tmp) 678 NULLIFY (fm_struct_tmp) 679 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=ndim, & 680 ncol_block=ndim, & 681 para_env=c0%matrix_struct%para_env, & 682 context=c0%matrix_struct%context, & 683 force_block=.TRUE.) 684 CALL cp_fm_create(c2_tmp, matrix_struct=fm_struct_tmp, & 685 name="c2_tmp") 686 CALL cp_fm_struct_release(fm_struct_tmp) 687 688 NULLIFY (fm_struct_tmp) 689 CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmob, ncol_global=nmob, & 690 para_env=c0%matrix_struct%para_env, & 691 context=c0%matrix_struct%context) 692 CALL cp_fm_create(b_mat, matrix_struct=fm_struct_tmp, & 693 name="b_mat") 694 CALL cp_fm_struct_release(fm_struct_tmp) 695 CALL timestop(hand6) 696 !End allocation 697 698 CALL cp_fm_set_all(b_mat, rzero) 699 CALL cp_fm_to_fm(c1, v_mat(1)%matrix, nmob, imo_low, 1) 700 701 ! Here starts the construction of krylov space 702 CALL timeset(routineN//"_kry_loop", hand2) 703 !Compute A =(V^t)HV 704 CALL cp_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(1)%matrix, rzero, hc) 705 706 a_block = 0.0_dp 707 a_loc => v_mat(1)%matrix%local_data 708 b_loc => hc%local_data 709 710 IF (SIZE(hc%local_data, 2) == nmob) THEN 711 ! this is a work around to avoid problems due to the two dimensional grid of processes 712 CALL dgemm('T', 'N', nmob, nmob, SIZE(hc%local_data, 1), 1.0_dp, a_loc(1, 1), & 713 SIZE(hc%local_data, 1), b_loc(1, 1), SIZE(hc%local_data, 1), 0.0_dp, a_block(1, 1), nmob) 714 END IF 715 CALL mp_sum(a_block, hc%matrix_struct%para_env%group) 716 717 !Compute the residual matrix R for next 718 !factorisation 719 c_tmp%local_data = 0.0_dp 720 IF (SIZE(c_tmp%local_data, 2) == nmob) THEN 721 a_loc => v_mat(1)%matrix%local_data 722 b_loc => c_tmp%local_data 723 CALL dgemm('N', 'N', SIZE(c_tmp%local_data, 1), nmob, nmob, 1.0_dp, a_loc(1, 1), & 724 SIZE(c_tmp%local_data, 1), a_block(1, 1), nmob, 0.0_dp, & 725 b_loc(1, 1), SIZE(c_tmp%local_data, 1)) 726 END IF 727 CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc) 728 729 ! Build the block tridiagonal matrix 730 t_mat = 0.0_dp 731 DO i = 1, nmob 732 t_mat(1:nmob, i) = a_block(1:nmob, i) 733 END DO 734 735 DO ik = 2, krylov_space%nkrylov 736 ! Call lapack for QR factorization 737 CALL cp_fm_set_all(b_mat, rzero) 738 CALL cp_fm_to_fm(c_tmp, v_mat(ik)%matrix, nmob, 1, 1) 739 CALL cp_fm_qr_factorization(c_tmp, b_mat, nao, nmob, 1, 1) 740 741 CALL cp_fm_triangular_multiply(b_mat, v_mat(ik)%matrix, side="R", invert_tr=.TRUE., & 742 n_rows=nao, n_cols=nmob) 743 744 CALL cp_fm_get_submatrix(b_mat, b_block, 1, 1, nmob, nmob) 745 746 !Compute A =(V^t)HV 747 CALL cp_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(ik)%matrix, rzero, hc) 748 749 a_block = 0.0_dp 750 IF (SIZE(hc%local_data, 2) == nmob) THEN 751 a_loc => v_mat(ik)%matrix%local_data 752 b_loc => hc%local_data 753 CALL dgemm('T', 'N', nmob, nmob, SIZE(hc%local_data, 1), 1.0_dp, a_loc(1, 1), & 754 SIZE(hc%local_data, 1), b_loc(1, 1), SIZE(hc%local_data, 1), 0.0_dp, a_block(1, 1), nmob) 755 END IF 756 CALL mp_sum(a_block, hc%matrix_struct%para_env%group) 757 758 !Compute the residual matrix R for next 759 !factorisation 760 c_tmp%local_data = 0.0_dp 761 IF (SIZE(c_tmp%local_data, 2) == nmob) THEN 762 a_loc => v_mat(ik)%matrix%local_data 763 b_loc => c_tmp%local_data 764 CALL dgemm('N', 'N', SIZE(c_tmp%local_data, 1), nmob, nmob, 1.0_dp, a_loc(1, 1), & 765 SIZE(c_tmp%local_data, 1), a_block(1, 1), nmob, 0.0_dp, & 766 b_loc(1, 1), SIZE(c_tmp%local_data, 1)) 767 END IF 768 CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc) 769 770 IF (SIZE(c_tmp%local_data, 2) == nmob) THEN 771 a_loc => v_mat(ik - 1)%matrix%local_data 772 DO j = 1, nmob 773 DO i = 1, j 774 DO ia = 1, SIZE(c_tmp%local_data, 1) 775 b_loc(ia, i) = b_loc(ia, i) - a_loc(ia, j)*b_block(i, j) 776 END DO 777 END DO 778 END DO 779 END IF 780 781 ! Build the block tridiagonal matrix 782 it = (ik - 2)*nmob 783 jt = (ik - 1)*nmob 784 DO j = 1, nmob 785 t_mat(jt + 1:jt + nmob, jt + j) = a_block(1:nmob, j) 786 DO i = 1, nmob 787 t_mat(it + i, jt + j) = b_block(j, i) 788 t_mat(jt + j, it + i) = b_block(j, i) 789 END DO 790 END DO 791 END DO ! ik 792 CALL timestop(hand2) 793 794 CALL timeset(routineN//"_diag_tri", hand3) 795 lwork = 1 + 6*ndim + 2*ndim**2 + 5000 796 liwork = 5*ndim + 3 797 ALLOCATE (work(lwork)) 798 ALLOCATE (iwork(liwork)) 799 800 ! Diagonalize the block-tridiagonal matrix 801 CALL dsyevd('V', 'U', ndim, t_mat(1, 1), ndim, t_eval(1), & 802 work(1), lwork, iwork(1), liwork, info) 803 DEALLOCATE (work) 804 DEALLOCATE (iwork) 805 CALL timestop(hand3) 806 807 CALL timeset(routineN//"_build_cnew", hand4) 808! !Compute the refined vectors 809 810 c2_tmp%local_data = 0.0_dp 811 ALLOCATE (q_mat(nmob, ndim)) 812 q_mat = 0.0_dp 813 b_loc => c2_tmp%local_data 814 DO ik = 1, krylov_space%nkrylov 815 CALL cp_fm_to_fm(v_mat(ik)%matrix, v_tmp, nmob, 1, 1) 816 IF (SIZE(c2_tmp%local_data, 2) == ndim) THEN 817! a_loc => v_mat(ik)%matrix%local_data 818 a_loc => v_tmp%local_data 819 it = (ik - 1)*nmob 820 CALL dgemm('N', 'N', SIZE(c2_tmp%local_data, 1), ndim, nmob, 1.0_dp, a_loc(1, 1), & 821 SIZE(c2_tmp%local_data, 1), t_mat(it + 1, 1), ndim, 1.0_dp, & 822 b_loc(1, 1), SIZE(c2_tmp%local_data, 1)) 823 END IF 824 END DO !ik 825 826 !Try to avoid linear dependencies 827 CALL cp_fm_to_fm(v_mat(1)%matrix, v_tmp, nmob, 1, 1) 828 IF (SIZE(c2_tmp%local_data, 2) == ndim) THEN 829! a_loc => v_mat(1)%matrix%local_data 830 a_loc => v_tmp%local_data 831 b_loc => c2_tmp%local_data 832 CALL dgemm('T', 'N', nmob, ndim, SIZE(v_tmp%local_data, 1), 1.0_dp, a_loc(1, 1), & 833 SIZE(v_tmp%local_data, 1), b_loc(1, 1), SIZE(v_tmp%local_data, 1), & 834 0.0_dp, q_mat(1, 1), nmob) 835 END IF 836 CALL mp_sum(q_mat, hc%matrix_struct%para_env%group) 837 838 ALLOCATE (itaken(ndim)) 839 itaken = 0 840 DO it = 1, nmob 841 vmax = 0.0_dp 842 !select index ik 843 DO jt = 1, ndim 844 IF (itaken(jt) == 0 .AND. ABS(q_mat(it, jt)) > vmax) THEN 845 vmax = ABS(q_mat(it, jt)) 846 ik = jt 847 END IF 848 END DO 849 itaken(ik) = 1 850 851 CALL cp_fm_to_fm(c2_tmp, v_mat(1)%matrix, 1, ik, it) 852 END DO 853 DEALLOCATE (itaken) 854 DEALLOCATE (q_mat) 855 856 !Copy in the converged set to enlarge the converged subspace 857 CALL cp_fm_to_fm(v_mat(1)%matrix, c0, nmob, 1, (nmo_converged + imo_low)) 858 CALL timestop(hand4) 859 860 CALL cp_fm_release(c2_tmp) 861 CALL cp_fm_release(c_tmp) 862 CALL cp_fm_release(hc) 863 CALL cp_fm_release(v_tmp) 864 CALL cp_fm_release(b_mat) 865 866 DEALLOCATE (t_mat) 867 DEALLOCATE (a_block) 868 DEALLOCATE (b_block) 869 DO ik = 1, krylov_space%nkrylov 870 CALL cp_fm_release(v_mat(ik)%matrix) 871 END DO 872 DEALLOCATE (v_mat) 873 874 END DO ! ib 875 876 CALL timeset(routineN//"_ortho", hand5) 877 CALL cp_gemm('T', 'N', nmo, nmo, nao, rone, c0, c0, rzero, chc) 878 879 CALL cp_fm_cholesky_decompose(chc, nmo) 880 CALL cp_fm_triangular_multiply(chc, c0, 'R', invert_tr=.TRUE.) 881 CALL timestop(hand5) 882 ELSE 883 ! Do nothing 884 END IF 885 886 CALL timestop(handle) 887 END SUBROUTINE lanczos_refinement_2v 888 889END MODULE qs_scf_lanczos 890