1!--------------------------------------------------------------------------------------------------! 2! CP2K: A general program to perform molecular dynamics simulations ! 3! Copyright (C) 2000 - 2019 CP2K developers group ! 4!--------------------------------------------------------------------------------------------------! 5! ************************************************************************************************** 6!> \brief Routines useful for iterative matrix calculations 7!> \par History 8!> 2010.10 created [Joost VandeVondele] 9!> \author Joost VandeVondele 10! ************************************************************************************************** 11MODULE iterate_matrix 12 USE arnoldi_api, ONLY: arnoldi_data_type,& 13 arnoldi_extremal 14 USE bibliography, ONLY: Richters2018,& 15 cite_reference 16 USE cp_log_handling, ONLY: cp_get_default_logger,& 17 cp_logger_get_default_unit_nr,& 18 cp_logger_type 19 USE dbcsr_api, ONLY: & 20 dbcsr_add, dbcsr_add_on_diag, dbcsr_copy, dbcsr_create, dbcsr_desymmetrize, dbcsr_filter, & 21 dbcsr_frobenius_norm, dbcsr_gershgorin_norm, dbcsr_get_diag, dbcsr_get_info, & 22 dbcsr_get_matrix_type, dbcsr_get_occupation, dbcsr_multiply, dbcsr_norm, & 23 dbcsr_norm_maxabsnorm, dbcsr_p_type, dbcsr_release, dbcsr_scale, dbcsr_set, & 24 dbcsr_set_diag, dbcsr_trace, dbcsr_transposed, dbcsr_type, dbcsr_type_no_symmetry 25 USE kinds, ONLY: dp,& 26 int_8 27 USE machine, ONLY: m_flush,& 28 m_walltime 29 USE mathconstants, ONLY: ifac 30 USE mathlib, ONLY: abnormal_value 31#include "./base/base_uses.f90" 32 33 IMPLICIT NONE 34 35 PRIVATE 36 37 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'iterate_matrix' 38 39 INTERFACE purify_mcweeny 40 MODULE PROCEDURE purify_mcweeny_orth, purify_mcweeny_nonorth 41 END INTERFACE 42 43 PUBLIC :: invert_Hotelling, matrix_sign_Newton_Schulz, matrix_sqrt_Newton_Schulz, & 44 matrix_sqrt_proot, matrix_sign_proot, & 45 purify_mcweeny, invert_Taylor, determinant 46 47CONTAINS 48 49! ***************************************************************************** 50!> \brief Computes the determinant of a symmetric positive definite matrix 51!> using the trace of the matrix logarithm via Mercator series: 52!> det(A) = det(S)det(I+X)det(S), where S=diag(sqrt(Aii),..,sqrt(Ann)) 53!> det(I+X) = Exp(Trace(Ln(I+X))) 54!> Ln(I+X) = X - X^2/2 + X^3/3 - X^4/4 + .. 55!> The series converges only if the Frobenius norm of X is less than 1. 56!> If it is more than one we compute (recursevily) the determinant of 57!> the square root of (I+X). 58!> \param matrix ... 59!> \param det - determinant 60!> \param threshold ... 61!> \par History 62!> 2015.04 created [Rustam Z Khaliullin] 63!> \author Rustam Z. Khaliullin 64! ************************************************************************************************** 65 RECURSIVE SUBROUTINE determinant(matrix, det, threshold) 66 67 TYPE(dbcsr_type), INTENT(INOUT) :: matrix 68 REAL(KIND=dp), INTENT(INOUT) :: det 69 REAL(KIND=dp), INTENT(IN) :: threshold 70 71 CHARACTER(LEN=*), PARAMETER :: routineN = 'determinant', routineP = moduleN//':'//routineN 72 73 INTEGER :: handle, i, max_iter_lanczos, nsize, & 74 order_lanczos, sign_iter, unit_nr 75 INTEGER(KIND=int_8) :: flop1 76 INTEGER, SAVE :: recursion_depth = 0 77 REAL(KIND=dp) :: det0, eps_lanczos, frobnorm, maxnorm, & 78 occ_matrix, t1, t2, trace 79 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: diagonal 80 TYPE(cp_logger_type), POINTER :: logger 81 TYPE(dbcsr_type) :: tmp1, tmp2, tmp3 82 83 CALL timeset(routineN, handle) 84 85 ! get a useful output_unit 86 logger => cp_get_default_logger() 87 IF (logger%para_env%ionode) THEN 88 unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.) 89 ELSE 90 unit_nr = -1 91 ENDIF 92 93 ! Note: tmp1 and tmp2 have the same matrix type as the 94 ! initial matrix (tmp3 does not have symmetry constraints) 95 ! this might lead to uninteded results with anti-symmetric 96 ! matrices 97 CALL dbcsr_create(tmp1, template=matrix, & 98 matrix_type=dbcsr_type_no_symmetry) 99 CALL dbcsr_create(tmp2, template=matrix, & 100 matrix_type=dbcsr_type_no_symmetry) 101 CALL dbcsr_create(tmp3, template=matrix, & 102 matrix_type=dbcsr_type_no_symmetry) 103 104 ! compute the product of the diagonal elements 105 CALL dbcsr_get_info(matrix, nfullrows_total=nsize) 106 ALLOCATE (diagonal(nsize)) 107 CALL dbcsr_get_diag(matrix, diagonal) 108 det = PRODUCT(diagonal) 109 110 ! create diagonal SQRTI matrix 111 diagonal(:) = 1.0_dp/(SQRT(diagonal(:))) 112 !ROLL CALL dbcsr_copy(tmp1,matrix) 113 CALL dbcsr_desymmetrize(matrix, tmp1) 114 CALL dbcsr_set(tmp1, 0.0_dp) 115 CALL dbcsr_set_diag(tmp1, diagonal) 116 CALL dbcsr_filter(tmp1, threshold) 117 DEALLOCATE (diagonal) 118 119 ! normalize the main diagonal, off-diagonal elements are scaled to 120 ! make the norm of the matrix less than 1 121 CALL dbcsr_multiply("N", "N", 1.0_dp, & 122 matrix, & 123 tmp1, & 124 0.0_dp, tmp3, & 125 filter_eps=threshold) 126 CALL dbcsr_multiply("N", "N", 1.0_dp, & 127 tmp1, & 128 tmp3, & 129 0.0_dp, tmp2, & 130 filter_eps=threshold) 131 132 ! subtract the main diagonal to create matrix X 133 CALL dbcsr_add_on_diag(tmp2, -1.0_dp) 134 frobnorm = dbcsr_frobenius_norm(tmp2) 135 IF (unit_nr > 0) THEN 136 IF (recursion_depth .EQ. 0) THEN 137 WRITE (unit_nr, '()') 138 ELSE 139 WRITE (unit_nr, '(T6,A28,1X,I15)') & 140 "Recursive iteration:", recursion_depth 141 ENDIF 142 WRITE (unit_nr, '(T6,A28,1X,F15.10)') & 143 "Frobenius norm:", frobnorm 144 CALL m_flush(unit_nr) 145 ENDIF 146 147 IF (frobnorm .GE. 1.0_dp) THEN 148 149 CALL dbcsr_add_on_diag(tmp2, 1.0_dp) 150 ! these controls should be provided as input 151 order_lanczos = 3 152 eps_lanczos = 1.0E-4_dp 153 max_iter_lanczos = 40 154 CALL matrix_sqrt_Newton_Schulz( & 155 tmp3, & ! output sqrt 156 tmp1, & ! output sqrti 157 tmp2, & ! input original 158 threshold=threshold, & 159 order=order_lanczos, & 160 eps_lanczos=eps_lanczos, & 161 max_iter_lanczos=max_iter_lanczos) 162 recursion_depth = recursion_depth + 1 163 CALL determinant(tmp3, det0, threshold) 164 recursion_depth = recursion_depth - 1 165 det = det*det0*det0 166 167 ELSE 168 169 ! create accumulator 170 CALL dbcsr_copy(tmp1, tmp2) 171 ! re-create to make use of symmetry 172 !ROLL CALL dbcsr_create(tmp3,template=matrix) 173 174 IF (unit_nr > 0) WRITE (unit_nr, *) 175 176 ! initialize the sign of the term 177 sign_iter = -1 178 DO i = 1, 100 179 180 t1 = m_walltime() 181 182 ! multiply X^i by X 183 ! note that the first iteration evaluates X^2 184 ! because the trace of X^1 is zero by construction 185 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, tmp2, & 186 0.0_dp, tmp3, & 187 filter_eps=threshold, & 188 flop=flop1) 189 CALL dbcsr_copy(tmp1, tmp3) 190 191 ! get trace 192 CALL dbcsr_trace(tmp1, trace) 193 trace = trace*sign_iter/(1.0_dp*(i + 1)) 194 sign_iter = -sign_iter 195 196 ! update the determinant 197 det = det*EXP(trace) 198 199 occ_matrix = dbcsr_get_occupation(tmp1) 200 CALL dbcsr_norm(tmp1, & 201 dbcsr_norm_maxabsnorm, norm_scalar=maxnorm) 202 203 t2 = m_walltime() 204 205 IF (unit_nr > 0) THEN 206 WRITE (unit_nr, '(T6,A,1X,I3,1X,F7.5,F16.10,F10.3,F11.3)') & 207 "Determinant iter", i, occ_matrix, & 208 det, t2 - t1, & 209 flop1/(1.0E6_dp*MAX(0.001_dp, t2 - t1)) 210 CALL m_flush(unit_nr) 211 ENDIF 212 213 ! exit if the trace is close to zero 214 IF (maxnorm < threshold) EXIT 215 216 ENDDO ! end iterations 217 218 IF (unit_nr > 0) THEN 219 WRITE (unit_nr, '()') 220 CALL m_flush(unit_nr) 221 ENDIF 222 223 ENDIF ! decide to do sqrt or not 224 225 IF (unit_nr > 0) THEN 226 IF (recursion_depth .EQ. 0) THEN 227 WRITE (unit_nr, '(T6,A28,1X,F15.10)') & 228 "Final determinant:", det 229 WRITE (unit_nr, '()') 230 ELSE 231 WRITE (unit_nr, '(T6,A28,1X,F15.10)') & 232 "Recursive determinant:", det 233 ENDIF 234 CALL m_flush(unit_nr) 235 ENDIF 236 237 CALL dbcsr_release(tmp1) 238 CALL dbcsr_release(tmp2) 239 CALL dbcsr_release(tmp3) 240 241 CALL timestop(handle) 242 243 END SUBROUTINE determinant 244 245! ************************************************************************************************** 246!> \brief invert a symmetric positive definite diagonally dominant matrix 247!> \param matrix_inverse ... 248!> \param matrix ... 249!> \param threshold convergence threshold nased on the max abs 250!> \param use_inv_as_guess logical whether input can be used as guess for inverse 251!> \param norm_convergence convergence threshold for the 2-norm, useful for approximate solutions 252!> \param filter_eps filter_eps for matrix multiplications, if not passed nothing is filteres 253!> \param accelerator_order ... 254!> \param max_iter_lanczos ... 255!> \param eps_lanczos ... 256!> \param silent ... 257!> \par History 258!> 2010.10 created [Joost VandeVondele] 259!> 2011.10 guess option added [Rustam Z Khaliullin] 260!> \author Joost VandeVondele 261! ************************************************************************************************** 262 SUBROUTINE invert_Taylor(matrix_inverse, matrix, threshold, use_inv_as_guess, & 263 norm_convergence, filter_eps, accelerator_order, & 264 max_iter_lanczos, eps_lanczos, silent) 265 266 TYPE(dbcsr_type), INTENT(INOUT), TARGET :: matrix_inverse, matrix 267 REAL(KIND=dp), INTENT(IN) :: threshold 268 LOGICAL, INTENT(IN), OPTIONAL :: use_inv_as_guess 269 REAL(KIND=dp), INTENT(IN), OPTIONAL :: norm_convergence, filter_eps 270 INTEGER, INTENT(IN), OPTIONAL :: accelerator_order, max_iter_lanczos 271 REAL(KIND=dp), INTENT(IN), OPTIONAL :: eps_lanczos 272 LOGICAL, INTENT(IN), OPTIONAL :: silent 273 274 CHARACTER(LEN=*), PARAMETER :: routineN = 'invert_Taylor', routineP = moduleN//':'//routineN 275 276 INTEGER :: accelerator_type, handle, i, & 277 my_max_iter_lanczos, nrows, unit_nr 278 INTEGER(KIND=int_8) :: flop2 279 LOGICAL :: converged, use_inv_guess 280 REAL(KIND=dp) :: coeff, convergence, maxnorm_matrix, & 281 my_eps_lanczos, occ_matrix, t1, t2 282 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: p_diagonal 283 TYPE(cp_logger_type), POINTER :: logger 284 TYPE(dbcsr_type), TARGET :: tmp1, tmp2, tmp3_sym 285 286 CALL timeset(routineN, handle) 287 288 logger => cp_get_default_logger() 289 IF (logger%para_env%ionode) THEN 290 unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.) 291 ELSE 292 unit_nr = -1 293 ENDIF 294 IF (PRESENT(silent)) THEN 295 IF (silent) unit_nr = -1 296 END IF 297 298 convergence = threshold 299 IF (PRESENT(norm_convergence)) convergence = norm_convergence 300 301 accelerator_type = 0 302 IF (PRESENT(accelerator_order)) accelerator_type = accelerator_order 303 IF (accelerator_type .GT. 1) accelerator_type = 1 304 305 use_inv_guess = .FALSE. 306 IF (PRESENT(use_inv_as_guess)) use_inv_guess = use_inv_as_guess 307 308 my_max_iter_lanczos = 64 309 my_eps_lanczos = 1.0E-3_dp 310 IF (PRESENT(max_iter_lanczos)) my_max_iter_lanczos = max_iter_lanczos 311 IF (PRESENT(eps_lanczos)) my_eps_lanczos = eps_lanczos 312 313 CALL dbcsr_create(tmp1, template=matrix_inverse, matrix_type=dbcsr_type_no_symmetry) 314 CALL dbcsr_create(tmp2, template=matrix_inverse, matrix_type=dbcsr_type_no_symmetry) 315 CALL dbcsr_create(tmp3_sym, template=matrix_inverse) 316 317 CALL dbcsr_get_info(matrix, nfullrows_total=nrows) 318 ALLOCATE (p_diagonal(nrows)) 319 320 ! generate the initial guess 321 IF (.NOT. use_inv_guess) THEN 322 323 SELECT CASE (accelerator_type) 324 CASE (0) 325 ! use tmp1 to hold off-diagonal elements 326 CALL dbcsr_desymmetrize(matrix, tmp1) 327 p_diagonal(:) = 0.0_dp 328 CALL dbcsr_set_diag(tmp1, p_diagonal) 329 !CALL dbcsr_print(tmp1) 330 ! invert the main diagonal 331 CALL dbcsr_get_diag(matrix, p_diagonal) 332 DO i = 1, nrows 333 IF (p_diagonal(i) .NE. 0.0_dp) THEN 334 p_diagonal(i) = 1.0_dp/p_diagonal(i) 335 ENDIF 336 ENDDO 337 CALL dbcsr_set(matrix_inverse, 0.0_dp) 338 CALL dbcsr_add_on_diag(matrix_inverse, 1.0_dp) 339 CALL dbcsr_set_diag(matrix_inverse, p_diagonal) 340 CASE DEFAULT 341 CPABORT("Illegal accelerator order") 342 END SELECT 343 344 ELSE 345 346 CPABORT("Guess is NYI") 347 348 ENDIF 349 350 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, matrix_inverse, & 351 0.0_dp, tmp2, filter_eps=filter_eps) 352 353 IF (unit_nr > 0) WRITE (unit_nr, *) 354 355 ! scale the approximate inverse to be within the convergence radius 356 t1 = m_walltime() 357 358 ! done with the initial guess, start iterations 359 converged = .FALSE. 360 CALL dbcsr_desymmetrize(matrix_inverse, tmp1) 361 coeff = 1.0_dp 362 DO i = 1, 100 363 364 ! coeff = +/- 1 365 coeff = -1.0_dp*coeff 366 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, tmp2, 0.0_dp, & 367 tmp3_sym, & 368 flop=flop2, filter_eps=filter_eps) 369 !flop=flop2) 370 CALL dbcsr_add(matrix_inverse, tmp3_sym, 1.0_dp, coeff) 371 CALL dbcsr_release(tmp1) 372 CALL dbcsr_create(tmp1, template=matrix_inverse, matrix_type=dbcsr_type_no_symmetry) 373 CALL dbcsr_desymmetrize(tmp3_sym, tmp1) 374 375 ! for the convergence check 376 CALL dbcsr_norm(tmp3_sym, & 377 dbcsr_norm_maxabsnorm, norm_scalar=maxnorm_matrix) 378 379 t2 = m_walltime() 380 occ_matrix = dbcsr_get_occupation(matrix_inverse) 381 382 IF (unit_nr > 0) THEN 383 WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)') "Taylor iter", i, occ_matrix, & 384 maxnorm_matrix, t2 - t1, & 385 flop2/(1.0E6_dp*MAX(0.001_dp, t2 - t1)) 386 CALL m_flush(unit_nr) 387 ENDIF 388 389 IF (maxnorm_matrix < convergence) THEN 390 converged = .TRUE. 391 EXIT 392 ENDIF 393 394 t1 = m_walltime() 395 396 ENDDO 397 398 !last convergence check 399 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix, matrix_inverse, 0.0_dp, tmp1, & 400 filter_eps=filter_eps) 401 CALL dbcsr_add_on_diag(tmp1, -1.0_dp) 402 !frob_matrix = dbcsr_frobenius_norm(tmp1) 403 CALL dbcsr_norm(tmp1, dbcsr_norm_maxabsnorm, norm_scalar=maxnorm_matrix) 404 IF (unit_nr > 0) THEN 405 WRITE (unit_nr, '(T6,A,E12.5)') "Final Taylor error", maxnorm_matrix 406 WRITE (unit_nr, '()') 407 CALL m_flush(unit_nr) 408 ENDIF 409 IF (maxnorm_matrix > convergence) THEN 410 converged = .FALSE. 411 IF (unit_nr > 0) THEN 412 WRITE (unit_nr, *) 'Final convergence check failed' 413 ENDIF 414 ENDIF 415 416 IF (.NOT. converged) THEN 417 CPABORT("Taylor inversion did not converge") 418 ENDIF 419 420 CALL dbcsr_release(tmp1) 421 CALL dbcsr_release(tmp2) 422 CALL dbcsr_release(tmp3_sym) 423 424 DEALLOCATE (p_diagonal) 425 426 CALL timestop(handle) 427 428 END SUBROUTINE invert_Taylor 429 430! ************************************************************************************************** 431!> \brief invert a symmetric positive definite matrix by Hotelling's method 432!> explicit symmetrization makes this code not suitable for other matrix types 433!> Currently a bit messy with the options, to to be cleaned soon 434!> \param matrix_inverse ... 435!> \param matrix ... 436!> \param threshold convergence threshold nased on the max abs 437!> \param use_inv_as_guess logical whether input can be used as guess for inverse 438!> \param norm_convergence convergence threshold for the 2-norm, useful for approximate solutions 439!> \param filter_eps filter_eps for matrix multiplications, if not passed nothing is filteres 440!> \param accelerator_order ... 441!> \param max_iter_lanczos ... 442!> \param eps_lanczos ... 443!> \param silent ... 444!> \par History 445!> 2010.10 created [Joost VandeVondele] 446!> 2011.10 guess option added [Rustam Z Khaliullin] 447!> \author Joost VandeVondele 448! ************************************************************************************************** 449 SUBROUTINE invert_Hotelling(matrix_inverse, matrix, threshold, use_inv_as_guess, & 450 norm_convergence, filter_eps, accelerator_order, & 451 max_iter_lanczos, eps_lanczos, silent) 452 453 TYPE(dbcsr_type), INTENT(INOUT), TARGET :: matrix_inverse, matrix 454 REAL(KIND=dp), INTENT(IN) :: threshold 455 LOGICAL, INTENT(IN), OPTIONAL :: use_inv_as_guess 456 REAL(KIND=dp), INTENT(IN), OPTIONAL :: norm_convergence, filter_eps 457 INTEGER, INTENT(IN), OPTIONAL :: accelerator_order, max_iter_lanczos 458 REAL(KIND=dp), INTENT(IN), OPTIONAL :: eps_lanczos 459 LOGICAL, INTENT(IN), OPTIONAL :: silent 460 461 CHARACTER(LEN=*), PARAMETER :: routineN = 'invert_Hotelling', & 462 routineP = moduleN//':'//routineN 463 464 INTEGER :: accelerator_type, handle, i, & 465 my_max_iter_lanczos, unit_nr 466 INTEGER(KIND=int_8) :: flop1, flop2 467 LOGICAL :: arnoldi_converged, converged, & 468 use_inv_guess 469 REAL(KIND=dp) :: convergence, frob_matrix, gershgorin_norm, max_ev, maxnorm_matrix, min_ev, & 470 my_eps_lanczos, my_filter_eps, occ_matrix, scalingf, t1, t2 471 TYPE(cp_logger_type), POINTER :: logger 472 TYPE(dbcsr_type), TARGET :: tmp1, tmp2 473 474 !TYPE(arnoldi_data_type) :: my_arnoldi 475 !TYPE(dbcsr_p_type), DIMENSION(1) :: mymat 476 477 CALL timeset(routineN, handle) 478 479 logger => cp_get_default_logger() 480 IF (logger%para_env%ionode) THEN 481 unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.) 482 ELSE 483 unit_nr = -1 484 ENDIF 485 IF (PRESENT(silent)) THEN 486 IF (silent) unit_nr = -1 487 END IF 488 489 convergence = threshold 490 IF (PRESENT(norm_convergence)) convergence = norm_convergence 491 492 accelerator_type = 1 493 IF (PRESENT(accelerator_order)) accelerator_type = accelerator_order 494 IF (accelerator_type .GT. 1) accelerator_type = 1 495 496 use_inv_guess = .FALSE. 497 IF (PRESENT(use_inv_as_guess)) use_inv_guess = use_inv_as_guess 498 499 my_max_iter_lanczos = 64 500 my_eps_lanczos = 1.0E-3_dp 501 IF (PRESENT(max_iter_lanczos)) my_max_iter_lanczos = max_iter_lanczos 502 IF (PRESENT(eps_lanczos)) my_eps_lanczos = eps_lanczos 503 504 my_filter_eps = threshold 505 IF (PRESENT(filter_eps)) my_filter_eps = filter_eps 506 507 ! generate the initial guess 508 IF (.NOT. use_inv_guess) THEN 509 510 SELECT CASE (accelerator_type) 511 CASE (0) 512 gershgorin_norm = dbcsr_gershgorin_norm(matrix) 513 frob_matrix = dbcsr_frobenius_norm(matrix) 514 CALL dbcsr_set(matrix_inverse, 0.0_dp) 515 CALL dbcsr_add_on_diag(matrix_inverse, 1/MIN(gershgorin_norm, frob_matrix)) 516 CASE (1) 517 ! initialize matrix to unity and use arnoldi (below) to scale it into the convergence range 518 CALL dbcsr_set(matrix_inverse, 0.0_dp) 519 CALL dbcsr_add_on_diag(matrix_inverse, 1.0_dp) 520 CASE DEFAULT 521 CPABORT("Illegal accelerator order") 522 END SELECT 523 524 ! everything commutes, therefore our all products will be symmetric 525 CALL dbcsr_create(tmp1, template=matrix_inverse) 526 527 ELSE 528 529 ! It is unlikely that our guess will commute with the matrix, therefore the first product will 530 ! be non symmetric 531 CALL dbcsr_create(tmp1, template=matrix_inverse, matrix_type=dbcsr_type_no_symmetry) 532 533 ENDIF 534 535 CALL dbcsr_create(tmp2, template=matrix_inverse) 536 537 IF (unit_nr > 0) WRITE (unit_nr, *) 538 539 ! scale the approximate inverse to be within the convergence radius 540 t1 = m_walltime() 541 542 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_inverse, matrix, & 543 0.0_dp, tmp1, flop=flop1, filter_eps=my_filter_eps) 544 545 IF (accelerator_type == 1) THEN 546 547 ! scale the matrix to get into the convergence range 548 CALL arnoldi_extremal(tmp1, max_eV, min_eV, threshold=my_eps_lanczos, & 549 max_iter=my_max_iter_lanczos, converged=arnoldi_converged) 550 !mymat(1)%matrix => tmp1 551 !CALL setup_arnoldi_data(my_arnoldi, mymat, max_iter=30, threshold=1.0E-3_dp, selection_crit=1, & 552 ! nval_request=2, nrestarts=2, generalized_ev=.FALSE., iram=.TRUE.) 553 !CALL arnoldi_ev(mymat, my_arnoldi) 554 !max_eV = REAL(get_selected_ritz_val(my_arnoldi, 2), dp) 555 !min_eV = REAL(get_selected_ritz_val(my_arnoldi, 1), dp) 556 !CALL deallocate_arnoldi_data(my_arnoldi) 557 558 IF (unit_nr > 0) THEN 559 WRITE (unit_nr, *) 560 WRITE (unit_nr, '(T6,A,1X,L1,A,E12.3)') "Lanczos converged: ", arnoldi_converged, " threshold:", my_eps_lanczos 561 WRITE (unit_nr, '(T6,A,1X,E12.3,E12.3)') "Est. extremal eigenvalues:", max_eV, min_eV 562 WRITE (unit_nr, '(T6,A,1X,E12.3)') "Est. condition number :", max_eV/MAX(min_eV, EPSILON(min_eV)) 563 ENDIF 564 565 ! 2.0 would be the correct scaling however, we should make sure here, that we are in the convergence radius 566 scalingf = 1.9_dp/(max_eV + min_eV) 567 CALL dbcsr_scale(tmp1, scalingf) 568 CALL dbcsr_scale(matrix_inverse, scalingf) 569 min_ev = min_ev*scalingf 570 571 ENDIF 572 573 ! done with the initial guess, start iterations 574 converged = .FALSE. 575 DO i = 1, 100 576 577 ! tmp1 = S^-1 S 578 579 ! for the convergence check 580 CALL dbcsr_add_on_diag(tmp1, -1.0_dp) 581 CALL dbcsr_norm(tmp1, & 582 dbcsr_norm_maxabsnorm, norm_scalar=maxnorm_matrix) 583 CALL dbcsr_add_on_diag(tmp1, +1.0_dp) 584 585 ! tmp2 = S^-1 S S^-1 586 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, matrix_inverse, 0.0_dp, tmp2, & 587 flop=flop2, filter_eps=my_filter_eps) 588 ! S^-1_{n+1} = 2 S^-1 - S^-1 S S^-1 589 CALL dbcsr_add(matrix_inverse, tmp2, 2.0_dp, -1.0_dp) 590 591 CALL dbcsr_filter(matrix_inverse, my_filter_eps) 592 t2 = m_walltime() 593 occ_matrix = dbcsr_get_occupation(matrix_inverse) 594 595 ! use the scalar form of the algorithm to trace the EV 596 IF (accelerator_type == 1) THEN 597 min_ev = min_ev*(2.0_dp - min_ev) 598 IF (PRESENT(norm_convergence)) maxnorm_matrix = ABS(min_eV - 1.0_dp) 599 ENDIF 600 601 IF (unit_nr > 0) THEN 602 WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)') "Hotelling iter", i, occ_matrix, & 603 maxnorm_matrix, t2 - t1, & 604 (flop1 + flop2)/(1.0E6_dp*MAX(0.001_dp, t2 - t1)) 605 CALL m_flush(unit_nr) 606 ENDIF 607 608 IF (maxnorm_matrix < convergence) THEN 609 converged = .TRUE. 610 EXIT 611 ENDIF 612 613 ! scale the matrix for improved convergence 614 IF (accelerator_type == 1) THEN 615 min_ev = min_ev*2.0_dp/(min_ev + 1.0_dp) 616 CALL dbcsr_scale(matrix_inverse, 2.0_dp/(min_ev + 1.0_dp)) 617 ENDIF 618 619 t1 = m_walltime() 620 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_inverse, matrix, & 621 0.0_dp, tmp1, flop=flop1, filter_eps=my_filter_eps) 622 623 ENDDO 624 625 IF (.NOT. converged) THEN 626 CPABORT("Hotelling inversion did not converge") 627 ENDIF 628 629 ! try to symmetrize the output matrix 630 IF (dbcsr_get_matrix_type(matrix_inverse) == dbcsr_type_no_symmetry) THEN 631 CALL dbcsr_transposed(tmp2, matrix_inverse) 632 CALL dbcsr_add(matrix_inverse, tmp2, 0.5_dp, 0.5_dp) 633 END IF 634 635 IF (unit_nr > 0) THEN 636! WRITE(unit_nr,'(T6,A,1X,I3,1X,F10.8,E12.3)') "Final Hotelling ",i,occ_matrix,& 637! !frob_matrix/frob_matrix_base 638! maxnorm_matrix 639 WRITE (unit_nr, '()') 640 CALL m_flush(unit_nr) 641 ENDIF 642 643 CALL dbcsr_release(tmp1) 644 CALL dbcsr_release(tmp2) 645 646 CALL timestop(handle) 647 648 END SUBROUTINE invert_Hotelling 649 650! ************************************************************************************************** 651!> \brief compute the sign a matrix using Newton-Schulz iterations 652!> \param matrix_sign ... 653!> \param matrix ... 654!> \param threshold ... 655!> \param sign_order ... 656!> \par History 657!> 2010.10 created [Joost VandeVondele] 658!> 2019.05 extended to order byxond 2 [Robert Schade] 659!> \author Joost VandeVondele, Robert Schade 660! ************************************************************************************************** 661 SUBROUTINE matrix_sign_Newton_Schulz(matrix_sign, matrix, threshold, sign_order) 662 663 TYPE(dbcsr_type), INTENT(INOUT) :: matrix_sign, matrix 664 REAL(KIND=dp), INTENT(IN) :: threshold 665 INTEGER, INTENT(IN), OPTIONAL :: sign_order 666 667 CHARACTER(LEN=*), PARAMETER :: routineN = 'matrix_sign_Newton_Schulz', & 668 routineP = moduleN//':'//routineN 669 670 INTEGER :: count, handle, i, order, unit_nr 671 INTEGER(KIND=int_8) :: flops 672 REAL(KIND=dp) :: a0, a1, a2, a3, a4, a5, floptot, & 673 frob_matrix, frob_matrix_base, & 674 gersh_matrix, occ_matrix, prefactor, & 675 t1, t2 676 TYPE(cp_logger_type), POINTER :: logger 677 TYPE(dbcsr_type) :: tmp1, tmp2, tmp3, tmp4 678 679 CALL timeset(routineN, handle) 680 681 logger => cp_get_default_logger() 682 IF (logger%para_env%ionode) THEN 683 unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.) 684 ELSE 685 unit_nr = -1 686 ENDIF 687 688 IF (PRESENT(sign_order)) THEN 689 order = sign_order 690 ELSE 691 order = 2 692 ENDIF 693 694 CALL dbcsr_create(tmp1, template=matrix_sign) 695 696 CALL dbcsr_create(tmp2, template=matrix_sign) 697 IF (order .GE. 4) THEN 698 CALL dbcsr_create(tmp3, template=matrix_sign) 699 ENDIF 700 IF (order .GE. 7) THEN 701 CALL dbcsr_create(tmp4, template=matrix_sign) 702 ENDIF 703 704 CALL dbcsr_copy(matrix_sign, matrix) 705 CALL dbcsr_filter(matrix_sign, threshold) 706 707 ! scale the matrix to get into the convergence range 708 frob_matrix = dbcsr_frobenius_norm(matrix_sign) 709 gersh_matrix = dbcsr_gershgorin_norm(matrix_sign) 710 CALL dbcsr_scale(matrix_sign, 1/MIN(frob_matrix, gersh_matrix)) 711 712 IF (unit_nr > 0) WRITE (unit_nr, *) 713 714 count = 0 715 DO i = 1, 100 716 floptot = 0_dp 717 t1 = m_walltime() 718 ! tmp1 = X * X 719 CALL dbcsr_multiply("N", "N", -1.0_dp, matrix_sign, matrix_sign, 0.0_dp, tmp1, & 720 filter_eps=threshold, flop=flops) 721 floptot = floptot + flops 722 723 ! check convergence (frob norm of what should be the identity matrix minus identity matrix) 724 frob_matrix_base = dbcsr_frobenius_norm(tmp1) 725 CALL dbcsr_add_on_diag(tmp1, +1.0_dp) 726 frob_matrix = dbcsr_frobenius_norm(tmp1) 727 728 ! f(y) approx 1/sqrt(1-y) 729 ! f(y)=1+y/2+3/8*y^2+5/16*y^3+35/128*y^4+63/256*y^5+231/1024*y^6 730 ! f2(y)=1+y/2=1/2*(2+y) 731 ! f3(y)=1+y/2+3/8*y^2=3/8*(8/3+4/3*y+y^2) 732 ! f4(y)=1+y/2+3/8*y^2+5/16*y^3=5/16*(16/5+8/5*y+6/5*y^2+y^3) 733 ! f5(y)=1+y/2+3/8*y^2+5/16*y^3+35/128*y^4=35/128*(128/35+128/70*y+48/35*y^2+8/7*y^3+y^4) 734 ! z(y)=(y+a_0)*y+a_1 735 ! f5(y)=35/128*((z(y)+y+a_2)*z(y)+a_3) 736 ! =35/128*((a_1^2+a_1a_2+a_3)+(2*a_0a_1+a_1+a_0a_2)y+(a_0^2+a_0+2a_1+a_2)y^2+(2a_0+1)y^3+y^4) 737 ! a_0=1/14 738 ! a_1=23819/13720 739 ! a_2=1269/980-2a_1=-3734/1715 740 ! a_3=832591127/188238400 741 ! f6(y)=1+y/2+3/8*y^2+5/16*y^3+35/128*y^4+63/256*y^5 742 ! =63/256*(256/63 + (128 y)/63 + (32 y^2)/21 + (80 y^3)/63 + (10 y^4)/9 + y^5) 743 ! f7(y)=1+y/2+3/8*y^2+5/16*y^3+35/128*y^4+63/256*y^5+231/1024*y^6 744 ! =231/1024*(1024/231+512/231*y+128/77*y^2+320/231*y^3+40/33*y^4+12/11*y^5+y^6) 745 ! z(y)=(y+a_0)*y+a_1 746 ! w(y)=(y+a_2)*z(y)+a_3 747 ! f7(y)=(w(y)+z(y)+a_4)*w(y)+a_5 748 ! a_0= 1.3686502058092053653287666647611728507211996691324048468010382350359929055186612505791532871573242422 749 ! a_1= 1.7089671854477436685850554669524985556296280184497503489303331821456795715195510972774979091893741568 750 ! a_2=-1.3231956603546599107833121193066273961757451236778593922555836895814474509732067051246078326118696968 751 ! a_3= 3.9876642330847931291749479958277754186675336169578593000744380254770411483327581042259415937710270453 752 ! a_4=-3.7273299006476825027065704937541279833880400042556351139273912137942678919776364526511485025132991667 753 ! a_5= 4.9369932474103023792021351907971943220607580694533770325967170245194362399287150565595441897740173578 754 ! 755 ! y=1-X*X 756 757 ! tmp1 = I-x*x 758 IF (order .EQ. 2) THEN 759 prefactor = 0.5_dp 760 761 ! update the above to 3*I-X*X 762 CALL dbcsr_add_on_diag(tmp1, +2.0_dp) 763 occ_matrix = dbcsr_get_occupation(matrix_sign) 764 ELSE IF (order .EQ. 3) THEN 765 ! with one multiplication 766 ! tmp1=y 767 CALL dbcsr_copy(tmp2, tmp1) 768 CALL dbcsr_scale(tmp1, 4.0_dp/3.0_dp) 769 CALL dbcsr_add_on_diag(tmp1, 8.0_dp/3.0_dp) 770 771 ! tmp2=y^2 772 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, tmp2, 1.0_dp, tmp1, & 773 filter_eps=threshold, flop=flops) 774 floptot = floptot + flops 775 prefactor = 3.0_dp/8.0_dp 776 777 ELSE IF (order .EQ. 4) THEN 778 ! with two multiplications 779 ! tmp1=y 780 CALL dbcsr_copy(tmp3, tmp1) 781 CALL dbcsr_scale(tmp1, 8.0_dp/5.0_dp) 782 CALL dbcsr_add_on_diag(tmp1, 16.0_dp/5.0_dp) 783 784 ! 785 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp3, tmp3, 0.0_dp, tmp2, & 786 filter_eps=threshold, flop=flops) 787 floptot = floptot + flops 788 789 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 6.0_dp/5.0_dp) 790 791 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, tmp3, 1.0_dp, tmp1, & 792 filter_eps=threshold, flop=flops) 793 floptot = floptot + flops 794 795 prefactor = 5.0_dp/16.0_dp 796 ELSE IF (order .EQ. -5) THEN 797 ! with three multiplications 798 ! tmp1=y 799 CALL dbcsr_copy(tmp3, tmp1) 800 CALL dbcsr_scale(tmp1, 128.0_dp/70.0_dp) 801 CALL dbcsr_add_on_diag(tmp1, 128.0_dp/35.0_dp) 802 803 ! 804 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp3, tmp3, 0.0_dp, tmp2, & 805 filter_eps=threshold, flop=flops) 806 floptot = floptot + flops 807 808 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 48.0_dp/35.0_dp) 809 810 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, tmp3, 0.0_dp, tmp2, & 811 filter_eps=threshold, flop=flops) 812 floptot = floptot + flops 813 814 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 8.0_dp/7.0_dp) 815 816 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, tmp3, 1.0_dp, tmp1, & 817 filter_eps=threshold, flop=flops) 818 floptot = floptot + flops 819 820 prefactor = 35.0_dp/128.0_dp 821 ELSE IF (order .EQ. 5) THEN 822 ! with two multiplications 823 ! z(y)=(y+a_0)*y+a_1 824 ! f5(y)=35/128*((z(y)+y+a_2)*z(y)+a_3) 825 ! =35/128*((a_1^2+a_1a_2+a_3)+(2*a_0a_1+a_1+a_0a_2)y+(a_0^2+a_0+2a_1+a_2)y^2+(2a_0+1)y^3+y^4) 826 ! a_0=1/14 827 ! a_1=23819/13720 828 ! a_2=1269/980-2a_1=-3734/1715 829 ! a_3=832591127/188238400 830 a0 = 1.0_dp/14.0_dp 831 a1 = 23819.0_dp/13720.0_dp 832 a2 = -3734_dp/1715.0_dp 833 a3 = 832591127_dp/188238400.0_dp 834 835 ! tmp1=y 836 ! tmp3=z 837 CALL dbcsr_copy(tmp3, tmp1) 838 CALL dbcsr_add_on_diag(tmp3, a0) 839 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp3, tmp1, 0.0_dp, tmp3, & 840 filter_eps=threshold, flop=flops) 841 floptot = floptot + flops 842 CALL dbcsr_add_on_diag(tmp3, a1) 843 844 CALL dbcsr_add_on_diag(tmp1, a2) 845 CALL dbcsr_add(tmp1, tmp3, 1.0_dp, 1.0_dp) 846 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, tmp3, 0.0_dp, tmp1, & 847 filter_eps=threshold, flop=flops) 848 floptot = floptot + flops 849 CALL dbcsr_add_on_diag(tmp1, a3) 850 851 prefactor = 35.0_dp/128.0_dp 852 ELSE IF (order .EQ. 6) THEN 853 ! with four multiplications 854 ! f6(y)=63/256*(256/63 + (128 y)/63 + (32 y^2)/21 + (80 y^3)/63 + (10 y^4)/9 + y^5) 855 ! tmp1=y 856 CALL dbcsr_copy(tmp3, tmp1) 857 CALL dbcsr_scale(tmp1, 128.0_dp/63.0_dp) 858 CALL dbcsr_add_on_diag(tmp1, 256.0_dp/63.0_dp) 859 860 ! 861 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp3, tmp3, 0.0_dp, tmp2, & 862 filter_eps=threshold, flop=flops) 863 floptot = floptot + flops 864 865 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 32.0_dp/21.0_dp) 866 867 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, tmp3, 0.0_dp, tmp2, & 868 filter_eps=threshold, flop=flops) 869 floptot = floptot + flops 870 871 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 80.0_dp/63.0_dp) 872 873 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, tmp3, 0.0_dp, tmp2, & 874 filter_eps=threshold, flop=flops) 875 floptot = floptot + flops 876 877 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 10.0_dp/9.0_dp) 878 879 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, tmp3, 1.0_dp, tmp1, & 880 filter_eps=threshold, flop=flops) 881 floptot = floptot + flops 882 883 prefactor = 63.0_dp/256.0_dp 884 ELSE IF (order .EQ. 7) THEN 885 ! with three multiplications 886 887 a0 = 1.3686502058092053653287666647611728507211996691324048468010382350359929055186612505791532871573242422_dp 888 a1 = 1.7089671854477436685850554669524985556296280184497503489303331821456795715195510972774979091893741568_dp 889 a2 = -1.3231956603546599107833121193066273961757451236778593922555836895814474509732067051246078326118696968_dp 890 a3 = 3.9876642330847931291749479958277754186675336169578593000744380254770411483327581042259415937710270453_dp 891 a4 = -3.7273299006476825027065704937541279833880400042556351139273912137942678919776364526511485025132991667_dp 892 a5 = 4.9369932474103023792021351907971943220607580694533770325967170245194362399287150565595441897740173578_dp 893 ! =231/1024*(1024/231+512/231*y+128/77*y^2+320/231*y^3+40/33*y^4+12/11*y^5+y^6) 894 ! z(y)=(y+a_0)*y+a_1 895 ! w(y)=(y+a_2)*z(y)+a_3 896 ! f7(y)=(w(y)+z(y)+a_4)*w(y)+a_5 897 898 ! tmp1=y 899 ! tmp3=z 900 CALL dbcsr_copy(tmp3, tmp1) 901 CALL dbcsr_add_on_diag(tmp3, a0) 902 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp3, tmp1, 0.0_dp, tmp3, & 903 filter_eps=threshold, flop=flops) 904 floptot = floptot + flops 905 CALL dbcsr_add_on_diag(tmp3, a1) 906 907 ! tmp4=w 908 CALL dbcsr_copy(tmp4, tmp1) 909 CALL dbcsr_add_on_diag(tmp4, a2) 910 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp4, tmp3, 0.0_dp, tmp4, & 911 filter_eps=threshold, flop=flops) 912 floptot = floptot + flops 913 CALL dbcsr_add_on_diag(tmp4, a3) 914 915 CALL dbcsr_add(tmp3, tmp4, 1.0_dp, 1.0_dp) 916 CALL dbcsr_add_on_diag(tmp3, a4) 917 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp3, tmp4, 0.0_dp, tmp1, & 918 filter_eps=threshold, flop=flops) 919 floptot = floptot + flops 920 CALL dbcsr_add_on_diag(tmp1, a5) 921 922 prefactor = 231.0_dp/1024.0_dp 923 ELSE 924 CPABORT("requested order is not implemented.") 925 ENDIF 926 927 ! tmp2 = X * prefactor * 928 CALL dbcsr_multiply("N", "N", prefactor, matrix_sign, tmp1, 0.0_dp, tmp2, & 929 filter_eps=threshold, flop=flops) 930 floptot = floptot + flops 931 932 ! done iterating 933 ! CALL dbcsr_filter(tmp2,threshold) 934 CALL dbcsr_copy(matrix_sign, tmp2) 935 t2 = m_walltime() 936 937 occ_matrix = dbcsr_get_occupation(matrix_sign) 938 939 IF (unit_nr > 0) THEN 940 WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)') "NS sign iter ", i, occ_matrix, & 941 frob_matrix/frob_matrix_base, t2 - t1, & 942 floptot/(1.0E6_dp*MAX(0.001_dp, t2 - t1)) 943 CALL m_flush(unit_nr) 944 ENDIF 945 946 ! frob_matrix/frob_matrix_base < SQRT(threshold) 947 IF (frob_matrix*frob_matrix < (threshold*frob_matrix_base*frob_matrix_base)) EXIT 948 949 ENDDO 950 951 ! this check is not really needed 952 CALL dbcsr_multiply("N", "N", +1.0_dp, matrix_sign, matrix_sign, 0.0_dp, tmp1, & 953 filter_eps=threshold) 954 frob_matrix_base = dbcsr_frobenius_norm(tmp1) 955 CALL dbcsr_add_on_diag(tmp1, -1.0_dp) 956 frob_matrix = dbcsr_frobenius_norm(tmp1) 957 occ_matrix = dbcsr_get_occupation(matrix_sign) 958 IF (unit_nr > 0) THEN 959 WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3)') "Final NS sign iter", i, occ_matrix, & 960 frob_matrix/frob_matrix_base 961 WRITE (unit_nr, '()') 962 CALL m_flush(unit_nr) 963 ENDIF 964 965 CALL dbcsr_release(tmp1) 966 CALL dbcsr_release(tmp2) 967 IF (order .GE. 4) THEN 968 CALL dbcsr_release(tmp3) 969 ENDIF 970 IF (order .GE. 7) THEN 971 CALL dbcsr_release(tmp4) 972 ENDIF 973 974 CALL timestop(handle) 975 976 END SUBROUTINE matrix_sign_Newton_Schulz 977 978 ! ************************************************************************************************** 979!> \brief compute the sign a matrix using the general algorithm for the p-th root of Richters et al. 980!> Commun. Comput. Phys., 25 (2019), pp. 564-585. 981!> \param matrix_sign ... 982!> \param matrix ... 983!> \param threshold ... 984!> \param sign_order ... 985!> \par History 986!> 2019.03 created [Robert Schade] 987!> \author Robert Schade 988! ************************************************************************************************** 989 SUBROUTINE matrix_sign_proot(matrix_sign, matrix, threshold, sign_order) 990 991 TYPE(dbcsr_type), INTENT(INOUT) :: matrix_sign, matrix 992 REAL(KIND=dp), INTENT(IN) :: threshold 993 INTEGER, INTENT(IN), OPTIONAL :: sign_order 994 995 CHARACTER(LEN=*), PARAMETER :: routineN = 'matrix_sign_proot', & 996 routineP = moduleN//':'//routineN 997 998 INTEGER :: handle, order, unit_nr 999 INTEGER(KIND=int_8) :: flop0, flop1, flop2 1000 LOGICAL :: converged, symmetrize 1001 REAL(KIND=dp) :: frob_matrix, frob_matrix_base, occ_matrix 1002 TYPE(cp_logger_type), POINTER :: logger 1003 TYPE(dbcsr_type) :: matrix2, matrix_sqrt, matrix_sqrt_inv, & 1004 tmp1, tmp2 1005 1006 CALL timeset(routineN, handle) 1007 1008 logger => cp_get_default_logger() 1009 IF (logger%para_env%ionode) THEN 1010 unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.) 1011 ELSE 1012 unit_nr = -1 1013 ENDIF 1014 1015 IF (PRESENT(sign_order)) THEN 1016 order = sign_order 1017 ELSE 1018 order = 2 1019 ENDIF 1020 1021 CALL dbcsr_create(tmp1, template=matrix_sign) 1022 1023 CALL dbcsr_create(tmp2, template=matrix_sign) 1024 1025 CALL dbcsr_create(matrix2, template=matrix, matrix_type=dbcsr_type_no_symmetry) 1026 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix, matrix, 0.0_dp, matrix2, & 1027 filter_eps=threshold, flop=flop0) 1028 !CALL dbcsr_filter(matrix2, threshold) 1029 1030 !CALL dbcsr_copy(matrix_sign, matrix) 1031 !CALL dbcsr_filter(matrix_sign, threshold) 1032 1033 IF (unit_nr > 0) WRITE (unit_nr, *) 1034 1035 CALL dbcsr_create(matrix_sqrt, template=matrix2) 1036 CALL dbcsr_create(matrix_sqrt_inv, template=matrix2) 1037 IF (unit_nr > 0) WRITE (unit_nr, *) "Threshold=", threshold 1038 1039 symmetrize = .FALSE. 1040 CALL matrix_sqrt_proot(matrix_sqrt, matrix_sqrt_inv, matrix2, threshold, order, & 1041 0.01_dp, 100, symmetrize, converged) 1042! call matrix_sqrt_Newton_Schulz(matrix_sqrt, matrix_sqrt_inv, matrix2, threshold, order, & 1043! 0.01_dp,100, symmetrize,converged) 1044 1045 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix, matrix_sqrt_inv, 0.0_dp, matrix_sign, & 1046 filter_eps=threshold, flop=flop1) 1047 1048 ! this check is not really needed 1049 CALL dbcsr_multiply("N", "N", +1.0_dp, matrix_sign, matrix_sign, 0.0_dp, tmp1, & 1050 filter_eps=threshold, flop=flop2) 1051 frob_matrix_base = dbcsr_frobenius_norm(tmp1) 1052 CALL dbcsr_add_on_diag(tmp1, -1.0_dp) 1053 frob_matrix = dbcsr_frobenius_norm(tmp1) 1054 occ_matrix = dbcsr_get_occupation(matrix_sign) 1055 IF (unit_nr > 0) THEN 1056 WRITE (unit_nr, '(T6,A,F10.8,E12.3)') "Final proot sign iter", occ_matrix, & 1057 frob_matrix/frob_matrix_base 1058 WRITE (unit_nr, '()') 1059 CALL m_flush(unit_nr) 1060 ENDIF 1061 1062 CALL dbcsr_release(tmp1) 1063 CALL dbcsr_release(tmp2) 1064 CALL dbcsr_release(matrix2) 1065 CALL dbcsr_release(matrix_sqrt) 1066 CALL dbcsr_release(matrix_sqrt_inv) 1067 1068 CALL timestop(handle) 1069 1070 END SUBROUTINE matrix_sign_proot 1071 1072! ************************************************************************************************** 1073!> \brief compute the sqrt of a matrix via the sign function and the corresponding Newton-Schulz iterations 1074!> the order of the algorithm should be 2..5, 3 or 5 is recommended 1075!> \param matrix_sqrt ... 1076!> \param matrix_sqrt_inv ... 1077!> \param matrix ... 1078!> \param threshold ... 1079!> \param order ... 1080!> \param eps_lanczos ... 1081!> \param max_iter_lanczos ... 1082!> \param symmetrize ... 1083!> \param converged ... 1084!> \par History 1085!> 2010.10 created [Joost VandeVondele] 1086!> \author Joost VandeVondele 1087! ************************************************************************************************** 1088 SUBROUTINE matrix_sqrt_Newton_Schulz(matrix_sqrt, matrix_sqrt_inv, matrix, threshold, order, & 1089 eps_lanczos, max_iter_lanczos, symmetrize, converged) 1090 TYPE(dbcsr_type), INTENT(INOUT) :: matrix_sqrt, matrix_sqrt_inv, matrix 1091 REAL(KIND=dp), INTENT(IN) :: threshold 1092 INTEGER, INTENT(IN) :: order 1093 REAL(KIND=dp), INTENT(IN) :: eps_lanczos 1094 INTEGER, INTENT(IN) :: max_iter_lanczos 1095 LOGICAL, OPTIONAL :: symmetrize, converged 1096 1097 CHARACTER(LEN=*), PARAMETER :: routineN = 'matrix_sqrt_Newton_Schulz', & 1098 routineP = moduleN//':'//routineN 1099 1100 INTEGER :: handle, i, unit_nr 1101 INTEGER(KIND=int_8) :: flop1, flop2, flop3, flop4, flop5 1102 LOGICAL :: arnoldi_converged, tsym 1103 REAL(KIND=dp) :: a, b, c, conv, d, frob_matrix, & 1104 frob_matrix_base, gershgorin_norm, & 1105 max_ev, min_ev, oa, ob, oc, & 1106 occ_matrix, od, scaling, t1, t2 1107 TYPE(cp_logger_type), POINTER :: logger 1108 TYPE(dbcsr_type) :: tmp1, tmp2, tmp3 1109 1110 CALL timeset(routineN, handle) 1111 1112 logger => cp_get_default_logger() 1113 IF (logger%para_env%ionode) THEN 1114 unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.) 1115 ELSE 1116 unit_nr = -1 1117 ENDIF 1118 1119 IF (PRESENT(converged)) converged = .FALSE. 1120 IF (PRESENT(symmetrize)) THEN 1121 tsym = symmetrize 1122 ELSE 1123 tsym = .TRUE. 1124 ENDIF 1125 1126 ! for stability symmetry can not be assumed 1127 CALL dbcsr_create(tmp1, template=matrix, matrix_type=dbcsr_type_no_symmetry) 1128 CALL dbcsr_create(tmp2, template=matrix, matrix_type=dbcsr_type_no_symmetry) 1129 IF (order .GE. 4) THEN 1130 CALL dbcsr_create(tmp3, template=matrix, matrix_type=dbcsr_type_no_symmetry) 1131 ENDIF 1132 1133 CALL dbcsr_set(matrix_sqrt_inv, 0.0_dp) 1134 CALL dbcsr_add_on_diag(matrix_sqrt_inv, 1.0_dp) 1135 CALL dbcsr_filter(matrix_sqrt_inv, threshold) 1136 CALL dbcsr_copy(matrix_sqrt, matrix) 1137 1138 ! scale the matrix to get into the convergence range 1139 IF (order == 0) THEN 1140 1141 gershgorin_norm = dbcsr_gershgorin_norm(matrix_sqrt) 1142 frob_matrix = dbcsr_frobenius_norm(matrix_sqrt) 1143 scaling = 1.0_dp/MIN(frob_matrix, gershgorin_norm) 1144 1145 ELSE 1146 1147 ! scale the matrix to get into the convergence range 1148 CALL arnoldi_extremal(matrix_sqrt, max_ev, min_ev, threshold=eps_lanczos, & 1149 max_iter=max_iter_lanczos, converged=arnoldi_converged) 1150 IF (unit_nr > 0) THEN 1151 WRITE (unit_nr, *) 1152 WRITE (unit_nr, '(T6,A,1X,L1,A,E12.3)') "Lanczos converged: ", arnoldi_converged, " threshold:", eps_lanczos 1153 WRITE (unit_nr, '(T6,A,1X,E12.3,E12.3)') "Est. extremal eigenvalues:", max_ev, min_ev 1154 WRITE (unit_nr, '(T6,A,1X,E12.3)') "Est. condition number :", max_ev/MAX(min_ev, EPSILON(min_ev)) 1155 ENDIF 1156 ! conservatively assume we get a relatively large error (100*threshold_lanczos) in the estimates 1157 ! and adjust the scaling to be on the safe side 1158 scaling = 2.0_dp/(max_ev + min_ev + 100*eps_lanczos) 1159 1160 ENDIF 1161 1162 CALL dbcsr_scale(matrix_sqrt, scaling) 1163 CALL dbcsr_filter(matrix_sqrt, threshold) 1164 IF (unit_nr > 0) THEN 1165 WRITE (unit_nr, *) 1166 WRITE (unit_nr, *) "Order=", order 1167 ENDIF 1168 1169 DO i = 1, 100 1170 1171 t1 = m_walltime() 1172 1173 ! tmp1 = Zk * Yk - I 1174 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt_inv, matrix_sqrt, 0.0_dp, tmp1, & 1175 filter_eps=threshold, flop=flop1) 1176 frob_matrix_base = dbcsr_frobenius_norm(tmp1) 1177 CALL dbcsr_add_on_diag(tmp1, -1.0_dp) 1178 1179 ! check convergence (frob norm of what should be the identity matrix minus identity matrix) 1180 frob_matrix = dbcsr_frobenius_norm(tmp1) 1181 1182 flop4 = 0; flop5 = 0 1183 SELECT CASE (order) 1184 CASE (0, 2) 1185 ! update the above to 0.5*(3*I-Zk*Yk) 1186 CALL dbcsr_add_on_diag(tmp1, -2.0_dp) 1187 CALL dbcsr_scale(tmp1, -0.5_dp) 1188 CASE (3) 1189 ! tmp2 = tmp1 ** 2 1190 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, tmp1, 0.0_dp, tmp2, & 1191 filter_eps=threshold, flop=flop4) 1192 ! tmp1 = 1/16 * (16*I-8*tmp1+6*tmp1**2-5*tmp1**3) 1193 CALL dbcsr_add(tmp1, tmp2, -4.0_dp, 3.0_dp) 1194 CALL dbcsr_add_on_diag(tmp1, 8.0_dp) 1195 CALL dbcsr_scale(tmp1, 0.125_dp) 1196 CASE (4) ! as expensive as case(5), so little need to use it 1197 ! tmp2 = tmp1 ** 2 1198 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, tmp1, 0.0_dp, tmp2, & 1199 filter_eps=threshold, flop=flop4) 1200 ! tmp3 = tmp2 * tmp1 1201 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, tmp1, 0.0_dp, tmp3, & 1202 filter_eps=threshold, flop=flop5) 1203 CALL dbcsr_scale(tmp1, -8.0_dp) 1204 CALL dbcsr_add_on_diag(tmp1, 16.0_dp) 1205 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 6.0_dp) 1206 CALL dbcsr_add(tmp1, tmp3, 1.0_dp, -5.0_dp) 1207 CALL dbcsr_scale(tmp1, 1/16.0_dp) 1208 CASE (5) 1209 ! Knuth's reformulation to evaluate the polynomial of 4th degree in 2 multiplications 1210 ! p = y4+A*y3+B*y2+C*y+D 1211 ! z := y * (y+a); P := (z+y+b) * (z+c) + d. 1212 ! a=(A-1)/2 ; b=B*(a+1)-C-a*(a+1)*(a+1) 1213 ! c=B-b-a*(a+1) 1214 ! d=D-bc 1215 oa = -40.0_dp/35.0_dp 1216 ob = 48.0_dp/35.0_dp 1217 oc = -64.0_dp/35.0_dp 1218 od = 128.0_dp/35.0_dp 1219 a = (oa - 1)/2 1220 b = ob*(a + 1) - oc - a*(a + 1)**2 1221 c = ob - b - a*(a + 1) 1222 d = od - b*c 1223 ! tmp2 = tmp1 ** 2 + a * tmp1 1224 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, tmp1, 0.0_dp, tmp2, & 1225 filter_eps=threshold, flop=flop4) 1226 CALL dbcsr_add(tmp2, tmp1, 1.0_dp, a) 1227 ! tmp3 = tmp2 + tmp1 + b 1228 CALL dbcsr_copy(tmp3, tmp2) 1229 CALL dbcsr_add(tmp3, tmp1, 1.0_dp, 1.0_dp) 1230 CALL dbcsr_add_on_diag(tmp3, b) 1231 ! tmp2 = tmp2 + c 1232 CALL dbcsr_add_on_diag(tmp2, c) 1233 ! tmp1 = tmp2 * tmp3 1234 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, tmp3, 0.0_dp, tmp1, & 1235 filter_eps=threshold, flop=flop5) 1236 ! tmp1 = tmp1 + d 1237 CALL dbcsr_add_on_diag(tmp1, d) 1238 ! final scale 1239 CALL dbcsr_scale(tmp1, 35.0_dp/128.0_dp) 1240 CASE DEFAULT 1241 CPABORT("Illegal order value") 1242 END SELECT 1243 1244 ! tmp2 = Yk * tmp1 = Y(k+1) 1245 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt, tmp1, 0.0_dp, tmp2, & 1246 filter_eps=threshold, flop=flop2) 1247 ! CALL dbcsr_filter(tmp2,threshold) 1248 CALL dbcsr_copy(matrix_sqrt, tmp2) 1249 1250 ! tmp2 = tmp1 * Zk = Z(k+1) 1251 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp1, matrix_sqrt_inv, 0.0_dp, tmp2, & 1252 filter_eps=threshold, flop=flop3) 1253 ! CALL dbcsr_filter(tmp2,threshold) 1254 CALL dbcsr_copy(matrix_sqrt_inv, tmp2) 1255 1256 occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv) 1257 1258 ! done iterating 1259 t2 = m_walltime() 1260 1261 conv = frob_matrix/frob_matrix_base 1262 1263 IF (unit_nr > 0) THEN 1264 WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)') "NS sqrt iter ", i, occ_matrix, & 1265 conv, t2 - t1, & 1266 (flop1 + flop2 + flop3 + flop4 + flop5)/(1.0E6_dp*MAX(0.001_dp, t2 - t1)) 1267 CALL m_flush(unit_nr) 1268 ENDIF 1269 1270 IF (abnormal_value(conv)) & 1271 CPABORT("conv is an abnormal value (NaN/Inf).") 1272 1273 ! conv < SQRT(threshold) 1274 IF ((conv*conv) < threshold) THEN 1275 IF (PRESENT(converged)) converged = .TRUE. 1276 EXIT 1277 ENDIF 1278 1279 ENDDO 1280 1281 ! symmetrize the matrices as this is not guaranteed by the algorithm 1282 IF (tsym) THEN 1283 IF (unit_nr > 0) THEN 1284 WRITE (unit_nr, '(A20)') "SYMMETRIZING RESULTS" 1285 ENDIF 1286 CALL dbcsr_transposed(tmp1, matrix_sqrt_inv) 1287 CALL dbcsr_add(matrix_sqrt_inv, tmp1, 0.5_dp, 0.5_dp) 1288 CALL dbcsr_transposed(tmp1, matrix_sqrt) 1289 CALL dbcsr_add(matrix_sqrt, tmp1, 0.5_dp, 0.5_dp) 1290 ENDIF 1291 1292 ! this check is not really needed 1293 CALL dbcsr_multiply("N", "N", +1.0_dp, matrix_sqrt_inv, matrix_sqrt, 0.0_dp, tmp1, & 1294 filter_eps=threshold) 1295 frob_matrix_base = dbcsr_frobenius_norm(tmp1) 1296 CALL dbcsr_add_on_diag(tmp1, -1.0_dp) 1297 frob_matrix = dbcsr_frobenius_norm(tmp1) 1298 occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv) 1299 IF (unit_nr > 0) THEN 1300 WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3)') "Final NS sqrt iter ", i, occ_matrix, & 1301 frob_matrix/frob_matrix_base 1302 WRITE (unit_nr, '()') 1303 CALL m_flush(unit_nr) 1304 ENDIF 1305 1306 ! scale to proper end results 1307 CALL dbcsr_scale(matrix_sqrt, 1/SQRT(scaling)) 1308 CALL dbcsr_scale(matrix_sqrt_inv, SQRT(scaling)) 1309 1310 CALL dbcsr_release(tmp1) 1311 CALL dbcsr_release(tmp2) 1312 IF (order .GE. 4) THEN 1313 CALL dbcsr_release(tmp3) 1314 ENDIF 1315 1316 CALL timestop(handle) 1317 1318 END SUBROUTINE matrix_sqrt_Newton_Schulz 1319 1320! ************************************************************************************************** 1321!> \brief compute the sqrt of a matrix via the general algorithm for the p-th root of Richters et al. 1322!> Commun. Comput. Phys., 25 (2019), pp. 564-585. 1323!> \param matrix_sqrt ... 1324!> \param matrix_sqrt_inv ... 1325!> \param matrix ... 1326!> \param threshold ... 1327!> \param order ... 1328!> \param eps_lanczos ... 1329!> \param max_iter_lanczos ... 1330!> \param symmetrize ... 1331!> \param converged ... 1332!> \par History 1333!> 2019.04 created [Robert Schade] 1334!> \author Robert Schade 1335! ************************************************************************************************** 1336 SUBROUTINE matrix_sqrt_proot(matrix_sqrt, matrix_sqrt_inv, matrix, threshold, order, & 1337 eps_lanczos, max_iter_lanczos, symmetrize, converged) 1338 TYPE(dbcsr_type), INTENT(INOUT) :: matrix_sqrt, matrix_sqrt_inv, matrix 1339 REAL(KIND=dp), INTENT(IN) :: threshold 1340 INTEGER, INTENT(IN) :: order 1341 REAL(KIND=dp), INTENT(IN) :: eps_lanczos 1342 INTEGER, INTENT(IN) :: max_iter_lanczos 1343 LOGICAL, OPTIONAL :: symmetrize, converged 1344 1345 CHARACTER(LEN=*), PARAMETER :: routineN = 'matrix_sqrt_proot', & 1346 routineP = moduleN//':'//routineN 1347 1348 INTEGER :: choose, handle, i, ii, j, unit_nr 1349 INTEGER(KIND=int_8) :: f, flop1, flop2, flop3, flop4, flop5 1350 LOGICAL :: arnoldi_converged, test, tsym 1351 REAL(KIND=dp) :: conv, frob_matrix, frob_matrix_base, & 1352 max_ev, min_ev, occ_matrix, scaling, & 1353 t1, t2 1354 TYPE(cp_logger_type), POINTER :: logger 1355 TYPE(dbcsr_type) :: BK2A, matrixS, Rmat, tmp1, tmp2 1356 1357 CALL cite_reference(Richters2018) 1358 1359 test = .FALSE. 1360 1361 CALL timeset(routineN, handle) 1362 1363 logger => cp_get_default_logger() 1364 IF (logger%para_env%ionode) THEN 1365 unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.) 1366 ELSE 1367 unit_nr = -1 1368 ENDIF 1369 1370 IF (PRESENT(converged)) converged = .FALSE. 1371 IF (PRESENT(symmetrize)) THEN 1372 tsym = symmetrize 1373 ELSE 1374 tsym = .TRUE. 1375 ENDIF 1376 1377 ! for stability symmetry can not be assumed 1378 CALL dbcsr_create(tmp1, template=matrix, matrix_type=dbcsr_type_no_symmetry) 1379 CALL dbcsr_create(tmp2, template=matrix, matrix_type=dbcsr_type_no_symmetry) 1380 CALL dbcsr_create(Rmat, template=matrix, matrix_type=dbcsr_type_no_symmetry) 1381 CALL dbcsr_create(matrixS, template=matrix, matrix_type=dbcsr_type_no_symmetry) 1382 1383 CALL dbcsr_copy(matrixS, matrix) 1384 IF (1 .EQ. 1) THEN 1385 ! scale the matrix to get into the convergence range 1386 CALL arnoldi_extremal(matrixS, max_ev, min_ev, threshold=eps_lanczos, & 1387 max_iter=max_iter_lanczos, converged=arnoldi_converged) 1388 IF (unit_nr > 0) THEN 1389 WRITE (unit_nr, *) 1390 WRITE (unit_nr, '(T6,A,1X,L1,A,E12.3)') "Lanczos converged: ", arnoldi_converged, " threshold:", eps_lanczos 1391 WRITE (unit_nr, '(T6,A,1X,E12.3,E12.3)') "Est. extremal eigenvalues:", max_ev, min_ev 1392 WRITE (unit_nr, '(T6,A,1X,E12.3)') "Est. condition number :", max_ev/MAX(min_ev, EPSILON(min_ev)) 1393 ENDIF 1394 ! conservatively assume we get a relatively large error (100*threshold_lanczos) in the estimates 1395 ! and adjust the scaling to be on the safe side 1396 scaling = 2.0_dp/(max_ev + min_ev + 100*eps_lanczos) 1397 CALL dbcsr_scale(matrixS, scaling) 1398 CALL dbcsr_filter(matrixS, threshold) 1399 ELSE 1400 scaling = 1.0_dp 1401 ENDIF 1402 1403 CALL dbcsr_set(matrix_sqrt_inv, 0.0_dp) 1404 CALL dbcsr_add_on_diag(matrix_sqrt_inv, 1.0_dp) 1405 !CALL dbcsr_filter(matrix_sqrt_inv, threshold) 1406 1407 IF (unit_nr > 0) THEN 1408 WRITE (unit_nr, *) 1409 WRITE (unit_nr, *) "Order=", order 1410 ENDIF 1411 1412 DO i = 1, 100 1413 1414 t1 = m_walltime() 1415 IF (1 .EQ. 1) THEN 1416 !build R=1-A B_K^2 1417 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt_inv, matrix_sqrt_inv, 0.0_dp, tmp1, & 1418 filter_eps=threshold, flop=flop1) 1419 CALL dbcsr_multiply("N", "N", 1.0_dp, matrixS, tmp1, 0.0_dp, Rmat, & 1420 filter_eps=threshold, flop=flop2) 1421 CALL dbcsr_scale(Rmat, -1.0_dp) 1422 CALL dbcsr_add_on_diag(Rmat, 1.0_dp) 1423 1424 flop4 = 0; flop5 = 0 1425 CALL dbcsr_set(tmp1, 0.0_dp) 1426 CALL dbcsr_add_on_diag(tmp1, 2.0_dp) 1427 1428 flop3 = 0 1429 1430 DO j = 2, order 1431 IF (j .EQ. 2) THEN 1432 CALL dbcsr_copy(tmp2, Rmat) 1433 ELSE 1434 f = 0 1435 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, Rmat, 0.0_dp, tmp2, & 1436 filter_eps=threshold, flop=f) 1437 flop3 = flop3 + f 1438 ENDIF 1439 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, 1.0_dp) 1440 ENDDO 1441 ELSE 1442 CALL dbcsr_create(BK2A, template=matrix, matrix_type=dbcsr_type_no_symmetry) 1443 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt_inv, matrixS, 0.0_dp, BK2A, & 1444 filter_eps=threshold, flop=flop1) 1445 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt_inv, BK2A, 0.0_dp, BK2A, & 1446 filter_eps=threshold, flop=flop2) 1447 CALL dbcsr_copy(Rmat, BK2A) 1448 CALL dbcsr_add_on_diag(Rmat, -1.0_dp) 1449 1450 CALL dbcsr_set(tmp1, 0.0_dp) 1451 CALL dbcsr_add_on_diag(tmp1, 1.0_dp) 1452 1453 CALL dbcsr_set(tmp2, 0.0_dp) 1454 CALL dbcsr_add_on_diag(tmp2, 1.0_dp) 1455 1456 flop3 = 0 1457 DO j = 1, order 1458 !choose=factorial(order)/(factorial(j)*factorial(order-j)) 1459 choose = PRODUCT((/(ii, ii=1, order)/))/(PRODUCT((/(ii, ii=1, j)/))*PRODUCT((/(ii, ii=1, order - j)/))) 1460 CALL dbcsr_add(tmp1, tmp2, 1.0_dp, -1.0_dp*(-1)**j*choose) 1461 IF (j .LT. order) THEN 1462 f = 0 1463 CALL dbcsr_multiply("N", "N", 1.0_dp, tmp2, BK2A, 0.0_dp, tmp2, & 1464 filter_eps=threshold, flop=f) 1465 flop3 = flop3 + f 1466 ENDIF 1467 ENDDO 1468 CALL dbcsr_release(BK2A) 1469 ENDIF 1470 1471 CALL dbcsr_multiply("N", "N", 0.5_dp, matrix_sqrt_inv, tmp1, 0.0_dp, matrix_sqrt_inv, & 1472 filter_eps=threshold, flop=flop4) 1473 1474 occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv) 1475 1476 ! done iterating 1477 t2 = m_walltime() 1478 1479 conv = dbcsr_frobenius_norm(Rmat) 1480 1481 IF (unit_nr > 0) THEN 1482 WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3,F12.3,F13.3)') "PROOT sqrt iter ", i, occ_matrix, & 1483 conv, t2 - t1, & 1484 (flop1 + flop2 + flop3 + flop4 + flop5)/(1.0E6_dp*MAX(0.001_dp, t2 - t1)) 1485 CALL m_flush(unit_nr) 1486 ENDIF 1487 1488 IF (abnormal_value(conv)) & 1489 CPABORT("conv is an abnormal value (NaN/Inf).") 1490 1491 ! conv < SQRT(threshold) 1492 IF ((conv*conv) < threshold) THEN 1493 IF (PRESENT(converged)) converged = .TRUE. 1494 EXIT 1495 ENDIF 1496 1497 ENDDO 1498 1499 ! scale to proper end results 1500 CALL dbcsr_scale(matrix_sqrt_inv, SQRT(scaling)) 1501 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt_inv, matrix, 0.0_dp, matrix_sqrt, & 1502 filter_eps=threshold, flop=flop5) 1503 1504 ! symmetrize the matrices as this is not guaranteed by the algorithm 1505 IF (tsym) THEN 1506 IF (unit_nr > 0) THEN 1507 WRITE (unit_nr, '(A20)') "SYMMETRIZING RESULTS" 1508 ENDIF 1509 CALL dbcsr_transposed(tmp1, matrix_sqrt_inv) 1510 CALL dbcsr_add(matrix_sqrt_inv, tmp1, 0.5_dp, 0.5_dp) 1511 CALL dbcsr_transposed(tmp1, matrix_sqrt) 1512 CALL dbcsr_add(matrix_sqrt, tmp1, 0.5_dp, 0.5_dp) 1513 ENDIF 1514 1515 ! this check is not really needed 1516 IF (test) THEN 1517 CALL dbcsr_multiply("N", "N", +1.0_dp, matrix_sqrt_inv, matrix_sqrt, 0.0_dp, tmp1, & 1518 filter_eps=threshold) 1519 frob_matrix_base = dbcsr_frobenius_norm(tmp1) 1520 CALL dbcsr_add_on_diag(tmp1, -1.0_dp) 1521 frob_matrix = dbcsr_frobenius_norm(tmp1) 1522 occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv) 1523 IF (unit_nr > 0) THEN 1524 WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3)') "Final PROOT S^{-1/2} S^{1/2}-Eins error ", i, occ_matrix, & 1525 frob_matrix/frob_matrix_base 1526 WRITE (unit_nr, '()') 1527 CALL m_flush(unit_nr) 1528 ENDIF 1529 1530 ! this check is not really needed 1531 CALL dbcsr_multiply("N", "N", +1.0_dp, matrix_sqrt_inv, matrix_sqrt_inv, 0.0_dp, tmp2, & 1532 filter_eps=threshold) 1533 CALL dbcsr_multiply("N", "N", +1.0_dp, tmp2, matrix, 0.0_dp, tmp1, & 1534 filter_eps=threshold) 1535 frob_matrix_base = dbcsr_frobenius_norm(tmp1) 1536 CALL dbcsr_add_on_diag(tmp1, -1.0_dp) 1537 frob_matrix = dbcsr_frobenius_norm(tmp1) 1538 occ_matrix = dbcsr_get_occupation(matrix_sqrt_inv) 1539 IF (unit_nr > 0) THEN 1540 WRITE (unit_nr, '(T6,A,1X,I3,1X,F10.8,E12.3)') "Final PROOT S^{-1/2} S^{-1/2} S-Eins error ", i, occ_matrix, & 1541 frob_matrix/frob_matrix_base 1542 WRITE (unit_nr, '()') 1543 CALL m_flush(unit_nr) 1544 ENDIF 1545 ENDIF 1546 1547 CALL dbcsr_release(tmp1) 1548 CALL dbcsr_release(tmp2) 1549 CALL dbcsr_release(Rmat) 1550 CALL dbcsr_release(matrixS) 1551 1552 CALL timestop(handle) 1553 END SUBROUTINE matrix_sqrt_proot 1554 1555! ************************************************************************************************** 1556!> \brief ... 1557!> \param matrix_exp ... 1558!> \param matrix ... 1559!> \param omega ... 1560!> \param alpha ... 1561!> \param threshold ... 1562! ************************************************************************************************** 1563 SUBROUTINE matrix_exponential(matrix_exp, matrix, omega, alpha, threshold) 1564 ! compute matrix_exp=omega*exp(alpha*matrix) 1565 TYPE(dbcsr_type), INTENT(INOUT) :: matrix_exp, matrix 1566 REAL(KIND=dp), INTENT(IN) :: omega, alpha, threshold 1567 1568 CHARACTER(LEN=*), PARAMETER :: routineN = 'matrix_exponential', & 1569 routineP = moduleN//':'//routineN 1570 REAL(dp), PARAMETER :: one = 1.0_dp, toll = 1.E-17_dp, & 1571 zero = 0.0_dp 1572 1573 INTEGER :: handle, i, k, unit_nr 1574 REAL(dp) :: factorial, norm_C, norm_D, norm_scalar 1575 TYPE(cp_logger_type), POINTER :: logger 1576 TYPE(dbcsr_type) :: B, B_square, C, D, D_product 1577 1578 CALL timeset(routineN, handle) 1579 1580 logger => cp_get_default_logger() 1581 IF (logger%para_env%ionode) THEN 1582 unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.) 1583 ELSE 1584 unit_nr = -1 1585 ENDIF 1586 1587 ! Calculate the norm of the matrix alpha*matrix, and scale it until it is less than 1.0 1588 norm_scalar = ABS(alpha)*dbcsr_frobenius_norm(matrix) 1589 1590 ! k=scaling parameter 1591 k = 1 1592 DO 1593 IF ((norm_scalar/2.0_dp**k) <= one) EXIT 1594 k = k + 1 1595 END DO 1596 1597 ! copy and scale the input matrix in matrix C and in matrix D 1598 CALL dbcsr_create(C, template=matrix, matrix_type=dbcsr_type_no_symmetry) 1599 CALL dbcsr_copy(C, matrix) 1600 CALL dbcsr_scale(C, alpha_scalar=alpha/2.0_dp**k) 1601 1602 CALL dbcsr_create(D, template=matrix, matrix_type=dbcsr_type_no_symmetry) 1603 CALL dbcsr_copy(D, C) 1604 1605 ! write(*,*) 1606 ! write(*,*) 1607 ! CALL dbcsr_print(D, nodata=.FALSE., matlab_format=.TRUE., variable_name="D", unit_nr=6) 1608 1609 ! set the B matrix as B=Identity+D 1610 CALL dbcsr_create(B, template=matrix, matrix_type=dbcsr_type_no_symmetry) 1611 CALL dbcsr_copy(B, D) 1612 CALL dbcsr_add_on_diag(B, alpha_scalar=one) 1613 1614 ! CALL dbcsr_print(B, nodata=.FALSE., matlab_format=.TRUE., variable_name="B", unit_nr=6) 1615 1616 ! Calculate the norm of C and moltiply by toll to be used as a threshold 1617 norm_C = toll*dbcsr_frobenius_norm(matrix) 1618 1619 ! iteration for the trucated taylor series expansion 1620 CALL dbcsr_create(D_product, template=matrix, matrix_type=dbcsr_type_no_symmetry) 1621 i = 1 1622 DO 1623 i = i + 1 1624 ! compute D_product=D*C 1625 CALL dbcsr_multiply("N", "N", one, D, C, & 1626 zero, D_product, filter_eps=threshold) 1627 1628 ! copy D_product in D 1629 CALL dbcsr_copy(D, D_product) 1630 1631 ! calculate B=B+D_product/fat(i) 1632 factorial = ifac(i) 1633 CALL dbcsr_add(B, D_product, one, factorial) 1634 1635 ! check for convergence using the norm of D (copy of the matrix D_product) and C 1636 norm_D = factorial*dbcsr_frobenius_norm(D) 1637 IF (norm_D < norm_C) EXIT 1638 END DO 1639 1640 ! start the k iteration for the squaring of the matrix 1641 CALL dbcsr_create(B_square, template=matrix, matrix_type=dbcsr_type_no_symmetry) 1642 DO i = 1, k 1643 !compute B_square=B*B 1644 CALL dbcsr_multiply("N", "N", one, B, B, & 1645 zero, B_square, filter_eps=threshold) 1646 ! copy Bsquare in B to iterate 1647 CALL dbcsr_copy(B, B_square) 1648 END DO 1649 1650 ! copy B_square in matrix_exp and 1651 CALL dbcsr_copy(matrix_exp, B_square) 1652 1653 ! scale matrix_exp by omega, matrix_exp=omega*B_square 1654 CALL dbcsr_scale(matrix_exp, alpha_scalar=omega) 1655 ! write(*,*) alpha,omega 1656 1657 CALL timestop(handle) 1658 1659 END SUBROUTINE matrix_exponential 1660 1661! ************************************************************************************************** 1662!> \brief McWeeny purification of a matrix in the orthonormal basis 1663!> \param matrix_p Matrix to purify (needs to be almost idempotent already) 1664!> \param threshold Threshold used as filter_eps and convergence criteria 1665!> \param max_steps Max number of iterations 1666!> \par History 1667!> 2013.01 created [Florian Schiffmann] 1668!> 2014.07 slightly refactored [Ole Schuett] 1669!> \author Florian Schiffmann 1670! ************************************************************************************************** 1671 SUBROUTINE purify_mcweeny_orth(matrix_p, threshold, max_steps) 1672 TYPE(dbcsr_type), DIMENSION(:) :: matrix_p 1673 REAL(KIND=dp) :: threshold 1674 INTEGER :: max_steps 1675 1676 CHARACTER(LEN=*), PARAMETER :: routineN = 'purify_mcweeny_orth', & 1677 routineP = moduleN//':'//routineN 1678 1679 INTEGER :: handle, i, ispin, unit_nr 1680 REAL(KIND=dp) :: frob_norm, trace 1681 TYPE(cp_logger_type), POINTER :: logger 1682 TYPE(dbcsr_type) :: matrix_pp, matrix_tmp 1683 1684 CALL timeset(routineN, handle) 1685 logger => cp_get_default_logger() 1686 IF (logger%para_env%ionode) THEN 1687 unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.) 1688 ELSE 1689 unit_nr = -1 1690 ENDIF 1691 1692 CALL dbcsr_create(matrix_pp, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry) 1693 CALL dbcsr_create(matrix_tmp, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry) 1694 CALL dbcsr_trace(matrix_p(1), trace) 1695 1696 DO ispin = 1, SIZE(matrix_p) 1697 DO i = 1, max_steps 1698 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_p(ispin), matrix_p(ispin), & 1699 0.0_dp, matrix_pp, filter_eps=threshold) 1700 1701 ! test convergence 1702 CALL dbcsr_copy(matrix_tmp, matrix_pp) 1703 CALL dbcsr_add(matrix_tmp, matrix_p(ispin), 1.0_dp, -1.0_dp) 1704 frob_norm = dbcsr_frobenius_norm(matrix_tmp) ! tmp = PP - P 1705 IF (unit_nr > 0) WRITE (unit_nr, '(t3,a,f16.8)') "McWeeny: Deviation of idempotency", frob_norm 1706 IF (unit_nr > 0) CALL m_flush(unit_nr) 1707 1708 ! construct new P 1709 CALL dbcsr_copy(matrix_tmp, matrix_pp) 1710 CALL dbcsr_multiply("N", "N", -2.0_dp, matrix_pp, matrix_p(ispin), & 1711 3.0_dp, matrix_tmp, filter_eps=threshold) 1712 CALL dbcsr_copy(matrix_p(ispin), matrix_tmp) ! tmp = 3PP - 2PPP 1713 1714 ! frob_norm < SQRT(trace*threshold) 1715 IF (frob_norm*frob_norm < trace*threshold) EXIT 1716 END DO 1717 END DO 1718 1719 CALL dbcsr_release(matrix_pp) 1720 CALL dbcsr_release(matrix_tmp) 1721 CALL timestop(handle) 1722 END SUBROUTINE purify_mcweeny_orth 1723 1724! ************************************************************************************************** 1725!> \brief McWeeny purification of a matrix in the non-orthonormal basis 1726!> \param matrix_p Matrix to purify (needs to be almost idempotent already) 1727!> \param matrix_s Overlap-Matrix 1728!> \param threshold Threshold used as filter_eps and convergence criteria 1729!> \param max_steps Max number of iterations 1730!> \par History 1731!> 2013.01 created [Florian Schiffmann] 1732!> 2014.07 slightly refactored [Ole Schuett] 1733!> \author Florian Schiffmann 1734! ************************************************************************************************** 1735 SUBROUTINE purify_mcweeny_nonorth(matrix_p, matrix_s, threshold, max_steps) 1736 TYPE(dbcsr_type), DIMENSION(:) :: matrix_p 1737 TYPE(dbcsr_type) :: matrix_s 1738 REAL(KIND=dp) :: threshold 1739 INTEGER :: max_steps 1740 1741 CHARACTER(LEN=*), PARAMETER :: routineN = 'purify_mcweeny_nonorth', & 1742 routineP = moduleN//':'//routineN 1743 1744 INTEGER :: handle, i, ispin, unit_nr 1745 REAL(KIND=dp) :: frob_norm, trace 1746 TYPE(cp_logger_type), POINTER :: logger 1747 TYPE(dbcsr_type) :: matrix_ps, matrix_psp, matrix_test 1748 1749 CALL timeset(routineN, handle) 1750 1751 logger => cp_get_default_logger() 1752 IF (logger%para_env%ionode) THEN 1753 unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.) 1754 ELSE 1755 unit_nr = -1 1756 ENDIF 1757 1758 CALL dbcsr_create(matrix_ps, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry) 1759 CALL dbcsr_create(matrix_psp, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry) 1760 CALL dbcsr_create(matrix_test, template=matrix_p(1), matrix_type=dbcsr_type_no_symmetry) 1761 1762 DO ispin = 1, SIZE(matrix_p) 1763 DO i = 1, max_steps 1764 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_p(ispin), matrix_s, & 1765 0.0_dp, matrix_ps, filter_eps=threshold) 1766 CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_ps, matrix_p(ispin), & 1767 0.0_dp, matrix_psp, filter_eps=threshold) 1768 IF (i == 1) CALL dbcsr_trace(matrix_ps, trace) 1769 1770 ! test convergence 1771 CALL dbcsr_copy(matrix_test, matrix_psp) 1772 CALL dbcsr_add(matrix_test, matrix_p(ispin), 1.0_dp, -1.0_dp) 1773 frob_norm = dbcsr_frobenius_norm(matrix_test) ! test = PSP - P 1774 IF (unit_nr > 0) WRITE (unit_nr, '(t3,a,2f16.8)') "McWeeny: Deviation of idempotency", frob_norm 1775 IF (unit_nr > 0) CALL m_flush(unit_nr) 1776 1777 ! construct new P 1778 CALL dbcsr_copy(matrix_p(ispin), matrix_psp) 1779 CALL dbcsr_multiply("N", "N", -2.0_dp, matrix_ps, matrix_psp, & 1780 3.0_dp, matrix_p(ispin), filter_eps=threshold) 1781 1782 ! frob_norm < SQRT(trace*threshold) 1783 IF (frob_norm*frob_norm < trace*threshold) EXIT 1784 END DO 1785 END DO 1786 1787 CALL dbcsr_release(matrix_ps) 1788 CALL dbcsr_release(matrix_psp) 1789 CALL dbcsr_release(matrix_test) 1790 CALL timestop(handle) 1791 END SUBROUTINE purify_mcweeny_nonorth 1792 1793END MODULE iterate_matrix 1794