1!--------------------------------------------------------------------------------------------------! 2! CP2K: A general program to perform molecular dynamics simulations ! 3! Copyright (C) 2000 - 2020 CP2K developers group ! 4!--------------------------------------------------------------------------------------------------! 5 6! ************************************************************************************************** 7!> \brief basic linear algebra operations for full matrices 8!> \par History 9!> 08.2002 split out of qs_blacs [fawzi] 10!> \author Fawzi Mohamed 11! ************************************************************************************************** 12MODULE cp_fm_basic_linalg 13 USE cp_blacs_env, ONLY: cp_blacs_env_type 14 USE cp_fm_struct, ONLY: cp_fm_struct_equivalent 15 USE cp_fm_types, ONLY: & 16 cp_fm_create, cp_fm_get_element, cp_fm_get_info, cp_fm_get_submatrix, cp_fm_p_type, & 17 cp_fm_release, cp_fm_set_all, cp_fm_set_element, cp_fm_set_submatrix, cp_fm_to_fm, & 18 cp_fm_type 19 USE cp_log_handling, ONLY: cp_to_string 20 USE kahan_sum, ONLY: accurate_dot_product,& 21 accurate_sum 22 USE kinds, ONLY: dp,& 23 int_8,& 24 sp 25 USE machine, ONLY: m_memory 26 USE mathlib, ONLY: get_pseudo_inverse_svd,& 27 invert_matrix 28 USE message_passing, ONLY: mp_sum 29#include "../base/base_uses.f90" 30 31 IMPLICIT NONE 32 PRIVATE 33 34 LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .TRUE. 35 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_basic_linalg' 36 37 PUBLIC :: cp_fm_scale, & ! scale a matrix 38 cp_fm_scale_and_add, & ! scale and add two matrices 39 cp_fm_column_scale, & ! scale columns of a matrix 40 cp_fm_row_scale, & ! scale rows of a matrix 41 cp_fm_trace, & ! trace of the transpose(A)*B 42 cp_fm_contracted_trace, & ! sum_{i,...,k} Tr [A(i,...,k)^T * B(i,...,k)] 43 cp_fm_norm, & ! different norms of A 44 cp_fm_schur_product, & ! schur product 45 cp_fm_transpose, & ! transpose a matrix 46 cp_fm_upper_to_full, & ! symmetrise an upper symmetric matrix 47 cp_fm_syrk, & ! rank k update 48 cp_fm_triangular_multiply, & ! triangular matrix multiply / solve 49 cp_fm_symm, & ! multiply a symmetric with a non-symmetric matrix 50 cp_fm_gemm, & ! multiply two matrices 51 cp_complex_fm_gemm, & ! multiply two complex matrices, represented by non_complex fm matrices 52 cp_fm_invert, & ! computes the inverse and determinant 53 cp_fm_frobenius_norm, & ! frobenius norm 54 cp_fm_triangular_invert, & ! compute the reciprocal of a triangular matrix 55 cp_fm_qr_factorization, & ! compute the QR factorization of a rectangular matrix 56 cp_fm_solve, & ! solves the equation A*B=C A and C are input 57 cp_fm_pdgeqpf, & ! compute a QR factorization with column pivoting of a M-by-N distributed matrix 58 cp_fm_pdorgqr ! generates an M-by-N as first N columns of a product of K elementary reflectors 59 60 REAL(KIND=dp), EXTERNAL :: dlange, pdlange, pdlatra 61#if defined (__ACCELERATE) 62 REAL(KIND=dp), EXTERNAL :: slange 63 REAL(KIND=sp), EXTERNAL :: pslange, pslatra 64#else 65 REAL(KIND=sp), EXTERNAL :: slange, pslange, pslatra 66#endif 67 68 INTERFACE cp_fm_trace 69 MODULE PROCEDURE cp_fm_trace_a0b0t0 70 MODULE PROCEDURE cp_fm_trace_a1b0t1 71 MODULE PROCEDURE cp_fm_trace_a1b1t1 72 END INTERFACE cp_fm_trace 73 74 INTERFACE cp_fm_contracted_trace 75 MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2 76 END INTERFACE cp_fm_contracted_trace 77CONTAINS 78 79! ************************************************************************************************** 80!> \brief calc A <- alpha*A + beta*B 81!> optimized for alpha == 1.0 (just add beta*B) and beta == 0.0 (just 82!> scale A) 83!> \param alpha ... 84!> \param matrix_a ... 85!> \param beta ... 86!> \param matrix_b ... 87! ************************************************************************************************** 88 SUBROUTINE cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b) 89 90 REAL(KIND=dp), INTENT(IN) :: alpha 91 TYPE(cp_fm_type), POINTER :: matrix_a 92 REAL(KIND=dp), INTENT(in), OPTIONAL :: beta 93 TYPE(cp_fm_type), OPTIONAL, POINTER :: matrix_b 94 95 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_scale_and_add', & 96 routineP = moduleN//':'//routineN 97 98 INTEGER :: handle, size_a 99 REAL(KIND=dp) :: my_beta 100 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b 101 REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp 102 103 CALL timeset(routineN, handle) 104 105 IF (PRESENT(matrix_b)) THEN 106 my_beta = 1.0_dp 107 ELSE 108 my_beta = 0.0_dp 109 ENDIF 110 IF (PRESENT(beta)) my_beta = beta 111 NULLIFY (a, b) 112 113 CPASSERT(ASSOCIATED(matrix_a)) 114 CPASSERT(matrix_a%ref_count > 0) 115 116 IF (PRESENT(beta)) THEN 117 CPASSERT(PRESENT(matrix_b)) 118 CPASSERT(ASSOCIATED(matrix_b)) 119 CPASSERT(matrix_b%ref_count > 0) 120 IF (matrix_a%id_nr == matrix_b%id_nr) THEN 121 IF (matrix_a%id_nr == matrix_b%id_nr) & 122 CPWARN("Bad use of routine. Call cp_fm_scale instead") 123 CALL cp_fm_scale(alpha + beta, matrix_a) 124 CALL timestop(handle) 125 RETURN 126 END IF 127 END IF 128 129 a => matrix_a%local_data 130 a_sp => matrix_a%local_data_sp 131 132 IF (matrix_a%use_sp) THEN 133 size_a = SIZE(a_sp, 1)*SIZE(a_sp, 2) 134 ELSE 135 size_a = SIZE(a, 1)*SIZE(a, 2) 136 ENDIF 137 138 IF (alpha /= 1.0_dp) THEN 139 IF (matrix_a%use_sp) THEN 140 CALL sscal(size_a, REAL(alpha, sp), a_sp, 1) 141 ELSE 142 CALL dscal(size_a, alpha, a, 1) 143 ENDIF 144 ENDIF 145 IF (my_beta .NE. 0.0_dp) THEN 146 IF (matrix_a%matrix_struct%context%group /= matrix_b%matrix_struct%context%group) & 147 CPABORT("matrixes must be in the same blacs context") 148 149 IF (cp_fm_struct_equivalent(matrix_a%matrix_struct, & 150 matrix_b%matrix_struct)) THEN 151 152 b => matrix_b%local_data 153 b_sp => matrix_b%local_data_sp 154 155 IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN 156 CALL saxpy(size_a, REAL(my_beta, sp), b_sp, 1, a_sp, 1) 157 ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN 158 CALL saxpy(size_a, REAL(my_beta, sp), REAL(b, sp), 1, a_sp, 1) 159 ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN 160 CALL daxpy(size_a, my_beta, REAL(b_sp, dp), 1, a, 1) 161 ELSE 162 CALL daxpy(size_a, my_beta, b, 1, a, 1) 163 ENDIF 164 165 ELSE 166#ifdef __SCALAPACK 167 CPABORT("to do (pdscal,pdcopy,pdaxpy)") 168#else 169 CPABORT("") 170#endif 171 END IF 172 173 END IF 174 175 CALL timestop(handle) 176 177 END SUBROUTINE cp_fm_scale_and_add 178 179! ************************************************************************************************** 180!> \brief interface to BLACS geadd: 181!> matrix_b = beta*matrix_b + alpha*opt(matrix_a) 182!> where opt(matrix_a) can be either: 183!> 'N': matrix_a 184!> 'T': matrix_a^T 185!> 'C': matrix_a^H (Hermitian conjugate) 186!> note that this is a level three routine, use cp_fm_scale_and_add if that 187!> is sufficient for your needs 188!> \param alpha : complex scalar 189!> \param trans : 'N' normal, 'T' transposed 190!> \param matrix_a : input matrix_a 191!> \param beta : complex scalar 192!> \param matrix_b : input matrix_b, upon out put the updated matrix_b 193!> \author Lianheng Tong 194! ************************************************************************************************** 195 SUBROUTINE cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b) 196 REAL(KIND=dp), INTENT(IN) :: alpha, beta 197 CHARACTER, INTENT(IN) :: trans 198 TYPE(cp_fm_type), POINTER :: matrix_a, matrix_b 199 200 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_geadd', & 201 routineP = moduleN//':'//routineN 202 203 INTEGER :: nrow_global, ncol_global, handle 204 REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa, bb 205#if defined(__SCALAPACK) 206 INTEGER, DIMENSION(9) :: desca, descb 207#else 208 INTEGER :: ii, jj 209#endif 210 211 CALL timeset(routineN, handle) 212 213 CPASSERT(ASSOCIATED(matrix_a)) 214 CPASSERT(ASSOCIATED(matrix_b)) 215 nrow_global = matrix_a%matrix_struct%nrow_global 216 ncol_global = matrix_a%matrix_struct%ncol_global 217 CPASSERT(nrow_global .EQ. matrix_b%matrix_struct%nrow_global) 218 CPASSERT(ncol_global .EQ. matrix_b%matrix_struct%ncol_global) 219 220 aa => matrix_a%local_data 221 bb => matrix_b%local_data 222 223#if defined(__SCALAPACK) 224 desca = matrix_a%matrix_struct%descriptor 225 descb = matrix_b%matrix_struct%descriptor 226 CALL pdgeadd(trans, & 227 nrow_global, & 228 ncol_global, & 229 alpha, & 230 aa, & 231 1, 1, & 232 desca, & 233 beta, & 234 bb, & 235 1, 1, & 236 descb) 237#else 238 ! dgeadd is not a standard BLAS function, although is implemented 239 ! in some libraries like OpenBLAS, so not going to use it here 240 SELECT CASE (trans) 241 CASE ('T') 242 DO jj = 1, ncol_global 243 DO ii = 1, nrow_global 244 bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(jj, ii) 245 END DO 246 END DO 247 CASE DEFAULT 248 DO jj = 1, ncol_global 249 DO ii = 1, nrow_global 250 bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(ii, jj) 251 END DO 252 END DO 253 END SELECT 254#endif 255 256 CALL timestop(handle) 257 258 END SUBROUTINE cp_fm_geadd 259 260! ************************************************************************************************** 261!> \brief Computes the LU-decomposition of the matrix, and the determinant of the matrix 262!> IMPORTANT : the sign of the determinant is not defined correctly yet .... 263!> \param matrix_a ... 264!> \param almost_determinant ... 265!> \param correct_sign ... 266!> \par History 267!> added correct_sign 02.07 (fschiff) 268!> \author Joost VandeVondele 269!> \note 270!> - matrix_a is overwritten 271!> - the sign of the determinant might be wrong 272!> - SERIOUS WARNING (KNOWN BUG) : the sign of the determinant depends on ipivot 273!> - one should be able to find out if ipivot is an even or an odd permutation... 274!> if you need the correct sign, just add correct_sign==.TRUE. (fschiff) 275! ************************************************************************************************** 276 SUBROUTINE cp_fm_lu_decompose(matrix_a, almost_determinant, correct_sign) 277 TYPE(cp_fm_type), POINTER :: matrix_a 278 REAL(KIND=dp), INTENT(OUT) :: almost_determinant 279 LOGICAL, INTENT(IN), OPTIONAL :: correct_sign 280 281 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_lu_decompose', & 282 routineP = moduleN//':'//routineN 283 284 INTEGER :: handle, i, info, n 285 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot 286 REAL(KIND=dp) :: determinant 287 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a 288#if defined(__SCALAPACK) 289 INTEGER, DIMENSION(9) :: desca 290 REAL(KIND=dp), DIMENSION(:), POINTER :: diag 291#else 292 INTEGER :: lda 293#endif 294 295 CALL timeset(routineN, handle) 296 297 a => matrix_a%local_data 298 n = matrix_a%matrix_struct%nrow_global 299 ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block)) 300 301#if defined(__SCALAPACK) 302 MARK_USED(correct_sign) 303 desca(:) = matrix_a%matrix_struct%descriptor(:) 304 CALL pdgetrf(n, n, a(1, 1), 1, 1, desca, ipivot, info) 305 306 ALLOCATE (diag(n)) 307 diag(:) = 0.0_dp 308 DO i = 1, n 309 CALL cp_fm_get_element(matrix_a, i, i, diag(i)) ! not completely optimal in speed i would say 310 ENDDO 311 determinant = 1.0_dp 312 DO i = 1, n 313 determinant = determinant*diag(i) 314 ENDDO 315 DEALLOCATE (diag) 316#else 317 lda = SIZE(a, 1) 318 CALL dgetrf(n, n, a(1, 1), lda, ipivot, info) 319 determinant = 1.0_dp 320 IF (correct_sign) THEN 321 DO i = 1, n 322 IF (ipivot(i) .NE. i) THEN 323 determinant = -determinant*a(i, i) 324 ELSE 325 determinant = determinant*a(i, i) 326 END IF 327 END DO 328 ELSE 329 DO i = 1, n 330 determinant = determinant*a(i, i) 331 ENDDO 332 END IF 333#endif 334 ! info is allowed to be zero 335 ! this does just signal a zero diagonal element 336 DEALLOCATE (ipivot) 337 almost_determinant = determinant ! notice that the sign is random 338 CALL timestop(handle) 339 END SUBROUTINE 340 341! ************************************************************************************************** 342!> \brief computes matrix_c = beta * matrix_c + alpha * ( matrix_a ** transa ) * ( matrix_b ** transb ) 343!> \param transa : 'N' -> normal 'T' -> transpose 344!> alpha,beta :: can be 0.0_dp and 1.0_dp 345!> \param transb ... 346!> \param m ... 347!> \param n ... 348!> \param k ... 349!> \param alpha ... 350!> \param matrix_a : m x k matrix ( ! for transa = 'N') 351!> \param matrix_b : k x n matrix ( ! for transb = 'N') 352!> \param beta ... 353!> \param matrix_c : m x n matrix 354!> \param a_first_col ... 355!> \param a_first_row ... 356!> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage) 357!> \param b_first_row ... 358!> \param c_first_col ... 359!> \param c_first_row ... 360!> \author Matthias Krack 361!> \note 362!> matrix_c should have no overlap with matrix_a, matrix_b 363! ************************************************************************************************** 364 SUBROUTINE cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, & 365 matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, & 366 c_first_col, c_first_row) 367 368 CHARACTER(LEN=1), INTENT(IN) :: transa, transb 369 INTEGER, INTENT(IN) :: m, n, k 370 REAL(KIND=dp), INTENT(IN) :: alpha 371 TYPE(cp_fm_type), POINTER :: matrix_a, matrix_b 372 REAL(KIND=dp), INTENT(IN) :: beta 373 TYPE(cp_fm_type), POINTER :: matrix_c 374 INTEGER, INTENT(IN), OPTIONAL :: a_first_col, a_first_row, & 375 b_first_col, b_first_row, & 376 c_first_col, c_first_row 377 378 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_gemm', & 379 routineP = moduleN//':'//routineN 380 381 INTEGER :: handle, i_a, i_b, i_c, j_a, & 382 j_b, j_c 383 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b, c 384 REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp, c_sp 385#if defined(__SCALAPACK) 386 INTEGER, DIMENSION(9) :: desca, descb, descc 387#else 388 INTEGER :: lda, ldb, ldc 389#endif 390 391 CALL timeset(routineN, handle) 392 393 !sample peak memory 394 CALL m_memory() 395 396 a => matrix_a%local_data 397 b => matrix_b%local_data 398 c => matrix_c%local_data 399 400 a_sp => matrix_a%local_data_sp 401 b_sp => matrix_b%local_data_sp 402 c_sp => matrix_c%local_data_sp 403 404 IF (PRESENT(a_first_row)) THEN 405 i_a = a_first_row 406 ELSE 407 i_a = 1 408 END IF 409 IF (PRESENT(a_first_col)) THEN 410 j_a = a_first_col 411 ELSE 412 j_a = 1 413 END IF 414 IF (PRESENT(b_first_row)) THEN 415 i_b = b_first_row 416 ELSE 417 i_b = 1 418 END IF 419 IF (PRESENT(b_first_col)) THEN 420 j_b = b_first_col 421 ELSE 422 j_b = 1 423 END IF 424 IF (PRESENT(c_first_row)) THEN 425 i_c = c_first_row 426 ELSE 427 i_c = 1 428 END IF 429 IF (PRESENT(c_first_col)) THEN 430 j_c = c_first_col 431 ELSE 432 j_c = 1 433 END IF 434 435#if defined(__SCALAPACK) 436 437 desca(:) = matrix_a%matrix_struct%descriptor(:) 438 descb(:) = matrix_b%matrix_struct%descriptor(:) 439 descc(:) = matrix_c%matrix_struct%descriptor(:) 440 441 IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN 442 443 CALL psgemm(transa, transb, m, n, k, REAL(alpha, sp), a_sp(1, 1), i_a, j_a, desca, b_sp(1, 1), i_b, j_b, & 444 descb, REAL(beta, sp), c_sp(1, 1), i_c, j_c, descc) 445 446 ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN 447 448 CALL pdgemm(transa, transb, m, n, k, alpha, a(1, 1), i_a, j_a, desca, b(1, 1), i_b, j_b, & 449 descb, beta, c(1, 1), i_c, j_c, descc) 450 451 ELSE 452 CPABORT("Mixed precision gemm NYI") 453 ENDIF 454#else 455 456 IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN 457 458 lda = SIZE(a_sp, 1) 459 ldb = SIZE(b_sp, 1) 460 ldc = SIZE(c_sp, 1) 461 462 CALL sgemm(transa, transb, m, n, k, REAL(alpha, sp), a_sp(i_a, j_a), lda, b_sp(i_b, j_b), ldb, & 463 REAL(beta, sp), c_sp(i_c, j_c), ldc) 464 465 ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN 466 467 lda = SIZE(a, 1) 468 ldb = SIZE(b, 1) 469 ldc = SIZE(c, 1) 470 471 CALL dgemm(transa, transb, m, n, k, alpha, a(i_a, j_a), lda, b(i_b, j_b), ldb, beta, c(i_c, j_c), ldc) 472 473 ELSE 474 CPABORT("Mixed precision gemm NYI") 475 ENDIF 476 477#endif 478 CALL timestop(handle) 479 480 END SUBROUTINE cp_fm_gemm 481 482! ************************************************************************************************** 483!> \brief computes matrix_c = beta * matrix_c + alpha * matrix_a * matrix_b 484!> computes matrix_c = beta * matrix_c + alpha * matrix_b * matrix_a 485!> where matrix_a is symmetric 486!> \param side : 'L' -> matrix_a is on the left 'R' -> matrix_a is on the right 487!> alpha,beta :: can be 0.0_dp and 1.0_dp 488!> \param uplo ... 489!> \param m ... 490!> \param n ... 491!> \param alpha ... 492!> \param matrix_a : m x m matrix 493!> \param matrix_b : m x n matrix 494!> \param beta ... 495!> \param matrix_c : m x n matrix 496!> \author Matthias Krack 497!> \note 498!> matrix_c should have no overlap with matrix_a, matrix_b 499!> all matrices in QS are upper triangular, so uplo should be 'U' always 500!> matrix_a is always an m x m matrix 501!> it is typically slower to do cp_fm_symm than cp_fm_gemm (especially in parallel easily 50 percent !) 502! ************************************************************************************************** 503 SUBROUTINE cp_fm_symm(side, uplo, m, n, alpha, matrix_a, matrix_b, beta, matrix_c) 504 505 CHARACTER(LEN=1), INTENT(IN) :: side, uplo 506 INTEGER, INTENT(IN) :: m, n 507 REAL(KIND=dp), INTENT(IN) :: alpha 508 TYPE(cp_fm_type), POINTER :: matrix_a, matrix_b 509 REAL(KIND=dp), INTENT(IN) :: beta 510 TYPE(cp_fm_type), POINTER :: matrix_c 511 512 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_symm', & 513 routineP = moduleN//':'//routineN 514 515 INTEGER :: handle 516 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b, c 517#if defined(__SCALAPACK) 518 INTEGER, DIMENSION(9) :: desca, descb, descc 519#else 520 INTEGER :: lda, ldb, ldc 521#endif 522 523 CALL timeset(routineN, handle) 524 525 a => matrix_a%local_data 526 b => matrix_b%local_data 527 c => matrix_c%local_data 528 529#if defined(__SCALAPACK) 530 531 desca(:) = matrix_a%matrix_struct%descriptor(:) 532 descb(:) = matrix_b%matrix_struct%descriptor(:) 533 descc(:) = matrix_c%matrix_struct%descriptor(:) 534 535 CALL pdsymm(side, uplo, m, n, alpha, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb, beta, c(1, 1), 1, 1, descc) 536 537#else 538 539 lda = matrix_a%matrix_struct%local_leading_dimension 540 ldb = matrix_b%matrix_struct%local_leading_dimension 541 ldc = matrix_c%matrix_struct%local_leading_dimension 542 543 CALL dsymm(side, uplo, m, n, alpha, a(1, 1), lda, b(1, 1), ldb, beta, c(1, 1), ldc) 544 545#endif 546 CALL timestop(handle) 547 548 END SUBROUTINE cp_fm_symm 549 550! ************************************************************************************************** 551!> \brief computes the Frobenius norm of matrix_a 552!> \param matrix_a : m x n matrix 553!> \param norm ... 554!> \author VW 555! ************************************************************************************************** 556 SUBROUTINE cp_fm_frobenius_norm(matrix_a, norm) 557 TYPE(cp_fm_type), POINTER :: matrix_a 558 REAL(KIND=dp), INTENT(inout) :: norm 559 560 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_frobenius_norm', & 561 routineP = moduleN//':'//routineN 562 563 INTEGER :: handle, size_a 564 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a 565 REAL(KIND=dp), EXTERNAL :: DDOT 566#if defined(__SCALAPACK) 567 INTEGER :: group 568#endif 569 570 CALL timeset(routineN, handle) 571 572 norm = 0.0_dp 573 a => matrix_a%local_data 574 size_a = SIZE(a, 1)*SIZE(a, 2) 575 norm = DDOT(size_a, a(1, 1), 1, a(1, 1), 1) 576#if defined(__SCALAPACK) 577 group = matrix_a%matrix_struct%para_env%group 578 CALL mp_sum(norm, group) 579#endif 580 norm = SQRT(norm) 581 582 CALL timestop(handle) 583 584 END SUBROUTINE cp_fm_frobenius_norm 585 586! ************************************************************************************************** 587!> \brief performs a rank-k update of a symmetric matrix_c 588!> matrix_c = beta * matrix_c + alpha * matrix_a * transpose ( matrix_a ) 589!> \param uplo : 'U' ('L') 590!> \param trans : 'N' ('T') 591!> \param k : number of cols to use in matrix_a 592!> ia,ja :: 1,1 (could be used for selecting subblock of a) 593!> \param alpha ... 594!> \param matrix_a ... 595!> \param ia ... 596!> \param ja ... 597!> \param beta ... 598!> \param matrix_c ... 599!> \author Matthias Krack 600!> \note 601!> In QS uplo should 'U' (upper part updated) 602! ************************************************************************************************** 603 SUBROUTINE cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c) 604 CHARACTER(LEN=1), INTENT(IN) :: uplo, trans 605 INTEGER, INTENT(IN) :: k 606 REAL(KIND=dp), INTENT(IN) :: alpha 607 TYPE(cp_fm_type), POINTER :: matrix_a 608 INTEGER, INTENT(IN) :: ia, ja 609 REAL(KIND=dp), INTENT(IN) :: beta 610 TYPE(cp_fm_type), POINTER :: matrix_c 611 612 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_syrk', & 613 routineP = moduleN//':'//routineN 614 615 INTEGER :: handle, n 616 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, c 617#if defined(__SCALAPACK) 618 INTEGER, DIMENSION(9) :: desca, descc 619#else 620 INTEGER :: lda, ldc 621#endif 622 623 CALL timeset(routineN, handle) 624 625 n = matrix_c%matrix_struct%nrow_global 626 627 a => matrix_a%local_data 628 c => matrix_c%local_data 629 630#if defined(__SCALAPACK) 631 632 desca(:) = matrix_a%matrix_struct%descriptor(:) 633 descc(:) = matrix_c%matrix_struct%descriptor(:) 634 635 CALL pdsyrk(uplo, trans, n, k, alpha, a(1, 1), ia, ja, desca, beta, c(1, 1), 1, 1, descc) 636 637#else 638 639 lda = SIZE(a, 1) 640 ldc = SIZE(c, 1) 641 642 CALL dsyrk(uplo, trans, n, k, alpha, a(ia, ja), lda, beta, c(1, 1), ldc) 643 644#endif 645 CALL timestop(handle) 646 647 END SUBROUTINE cp_fm_syrk 648 649! ************************************************************************************************** 650!> \brief computes the schur product of two matrices 651!> c_ij = a_ij * b_ij 652!> \param matrix_a ... 653!> \param matrix_b ... 654!> \param matrix_c ... 655!> \author Joost VandeVondele 656! ************************************************************************************************** 657 SUBROUTINE cp_fm_schur_product(matrix_a, matrix_b, matrix_c) 658 659 TYPE(cp_fm_type), POINTER :: matrix_a, matrix_b, matrix_c 660 661 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_schur_product', & 662 routineP = moduleN//':'//routineN 663 664 INTEGER :: handle, icol_local, irow_local, mypcol, & 665 myprow, ncol_local, npcol, nprow, & 666 nrow_local 667 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b, c 668 TYPE(cp_blacs_env_type), POINTER :: context 669 670 CALL timeset(routineN, handle) 671 672 context => matrix_a%matrix_struct%context 673 myprow = context%mepos(1) 674 mypcol = context%mepos(2) 675 nprow = context%num_pe(1) 676 npcol = context%num_pe(2) 677 678 a => matrix_a%local_data 679 b => matrix_b%local_data 680 c => matrix_c%local_data 681 682 nrow_local = matrix_a%matrix_struct%nrow_locals(myprow) 683 ncol_local = matrix_a%matrix_struct%ncol_locals(mypcol) 684 685 DO icol_local = 1, ncol_local 686 DO irow_local = 1, nrow_local 687 c(irow_local, icol_local) = a(irow_local, icol_local)*b(irow_local, icol_local) 688 END DO 689 END DO 690 691 CALL timestop(handle) 692 693 END SUBROUTINE cp_fm_schur_product 694 695! ************************************************************************************************** 696!> \brief returns the trace of matrix_a^T matrix_b, i.e 697!> sum_{i,j}(matrix_a(i,j)*matrix_b(i,j)) 698!> \param matrix_a a matrix 699!> \param matrix_b another matrix 700!> \param trace ... 701!> \par History 702!> 11.06.2001 Creation (Matthias Krack) 703!> 12.2002 added doc [fawzi] 704!> \author Matthias Krack 705!> \note 706!> note the transposition of matrix_a! 707! ************************************************************************************************** 708 SUBROUTINE cp_fm_trace_a0b0t0(matrix_a, matrix_b, trace) 709 710 TYPE(cp_fm_type), POINTER :: matrix_a, matrix_b 711 REAL(KIND=dp), INTENT(OUT) :: trace 712 713 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a0b0t0', & 714 routineP = moduleN//':'//routineN 715 716 INTEGER :: group, handle, mypcol, myprow, & 717 ncol_local, npcol, nprow, nrow_local 718 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b 719 REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp 720 TYPE(cp_blacs_env_type), POINTER :: context 721 722 CALL timeset(routineN, handle) 723 724 context => matrix_a%matrix_struct%context 725 myprow = context%mepos(1) 726 mypcol = context%mepos(2) 727 nprow = context%num_pe(1) 728 npcol = context%num_pe(2) 729 730 group = matrix_a%matrix_struct%para_env%group 731 732 a => matrix_a%local_data 733 b => matrix_b%local_data 734 735 a_sp => matrix_a%local_data_sp 736 b_sp => matrix_b%local_data_sp 737 738 nrow_local = MIN(matrix_a%matrix_struct%nrow_locals(myprow), matrix_b%matrix_struct%nrow_locals(myprow)) 739 ncol_local = MIN(matrix_a%matrix_struct%ncol_locals(mypcol), matrix_b%matrix_struct%ncol_locals(mypcol)) 740 741 ! cries for an accurate_dot_product 742 IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN 743 trace = accurate_sum(REAL(a_sp(1:nrow_local, 1:ncol_local)* & 744 b_sp(1:nrow_local, 1:ncol_local), dp)) 745 ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN 746 trace = accurate_sum(REAL(a_sp(1:nrow_local, 1:ncol_local), dp)* & 747 b(1:nrow_local, 1:ncol_local)) 748 ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN 749 trace = accurate_sum(a(1:nrow_local, 1:ncol_local)* & 750 REAL(b_sp(1:nrow_local, 1:ncol_local), dp)) 751 ELSE 752 trace = accurate_sum(a(1:nrow_local, 1:ncol_local)* & 753 b(1:nrow_local, 1:ncol_local)) 754 ENDIF 755 756 CALL mp_sum(trace, group) 757 758 CALL timestop(handle) 759 760 END SUBROUTINE cp_fm_trace_a0b0t0 761 762! ************************************************************************************************** 763!> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b) for each pair of matrices A_k and B. 764!> \param matrix_a list of A matrices 765!> \param matrix_b B matrix 766!> \param trace computed traces 767!> \par History 768!> * 08.2018 forked from cp_fm_trace() [Sergey Chulkov] 769!> \note \parblock 770!> Computing the trace requires collective communication between involved MPI processes 771!> that implies a synchronisation point between them. The aim of this subroutine is to reduce 772!> the amount of time wasted in such synchronisation by performing one large collective 773!> operation which involves all the matrices in question. 774!> 775!> The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b0t1' means that 776!> the dummy variables 'matrix_a' and 'trace' are 1-dimensional arrays, while the variable 777!> 'matrix_b' is a single matrix. 778!> \endparblock 779! ************************************************************************************************** 780 SUBROUTINE cp_fm_trace_a1b0t1(matrix_a, matrix_b, trace) 781 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(in) :: matrix_a 782 TYPE(cp_fm_type), POINTER :: matrix_b 783 REAL(kind=dp), DIMENSION(:), INTENT(out) :: trace 784 785 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b0t1', & 786 routineP = moduleN//':'//routineN 787 788 INTEGER :: group, handle, imatrix, n_matrices, & 789 ncols_local, nrows_local 790 LOGICAL :: use_sp_a, use_sp_b 791 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b 792 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp 793 794 CALL timeset(routineN, handle) 795 796 n_matrices = SIZE(trace) 797 CPASSERT(SIZE(matrix_a) == n_matrices) 798 799 CALL cp_fm_get_info(matrix_b, nrow_local=nrows_local, ncol_local=ncols_local) 800 use_sp_b = matrix_b%use_sp 801 802 IF (use_sp_b) THEN 803 ldata_b_sp => matrix_b%local_data_sp(1:nrows_local, 1:ncols_local) 804 ELSE 805 ldata_b => matrix_b%local_data(1:nrows_local, 1:ncols_local) 806 END IF 807 808!$OMP PARALLEL DO DEFAULT(NONE), & 809!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, use_sp_a), & 810!$OMP SHARED(ldata_b, ldata_b_sp, matrix_a, matrix_b), & 811!$OMP SHARED(ncols_local, nrows_local, n_matrices, trace, use_sp_b) 812 813 DO imatrix = 1, n_matrices 814 815 use_sp_a = matrix_a(imatrix)%matrix%use_sp 816 817 ! assume that the matrices A(i) and B have identical shapes and distribution schemes 818 IF (use_sp_a .AND. use_sp_b) THEN 819 ldata_a_sp => matrix_a(imatrix)%matrix%local_data_sp(1:nrows_local, 1:ncols_local) 820 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp) 821 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN 822 ldata_a => matrix_a(imatrix)%matrix%local_data(1:nrows_local, 1:ncols_local) 823 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b) 824 ELSE 825 CPABORT("Matrices A and B are of different types") 826 END IF 827 END DO 828!$OMP END PARALLEL DO 829 830 group = matrix_b%matrix_struct%para_env%group 831 CALL mp_sum(trace, group) 832 833 CALL timestop(handle) 834 END SUBROUTINE cp_fm_trace_a1b0t1 835 836! ************************************************************************************************** 837!> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b(k)) for each pair of matrices A_k and B_k. 838!> \param matrix_a list of A matrices 839!> \param matrix_b list of B matrices 840!> \param trace computed traces 841!> \param accurate ... 842!> \par History 843!> * 11.2016 forked from cp_fm_trace() [Sergey Chulkov] 844!> \note \parblock 845!> Computing the trace requires collective communication between involved MPI processes 846!> that implies a synchronisation point between them. The aim of this subroutine is to reduce 847!> the amount of time wasted in such synchronisation by performing one large collective 848!> operation which involves all the matrices in question. 849!> 850!> The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b1t1' means that 851!> all dummy variables (matrix_a, matrix_b, and trace) are 1-dimensional arrays. 852!> \endparblock 853! ************************************************************************************************** 854 SUBROUTINE cp_fm_trace_a1b1t1(matrix_a, matrix_b, trace, accurate) 855 TYPE(cp_fm_p_type), DIMENSION(:), INTENT(in) :: matrix_a, matrix_b 856 REAL(kind=dp), DIMENSION(:), INTENT(out) :: trace 857 LOGICAL, INTENT(IN), OPTIONAL :: accurate 858 859 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1', & 860 routineP = moduleN//':'//routineN 861 862 INTEGER :: group, handle, imatrix, n_matrices, & 863 ncols_local, nrows_local 864 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b 865 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b 866 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp 867 868 CALL timeset(routineN, handle) 869 870 n_matrices = SIZE(trace) 871 CPASSERT(SIZE(matrix_a) == n_matrices) 872 CPASSERT(SIZE(matrix_b) == n_matrices) 873 874 use_accurate_sum = .TRUE. 875 IF (PRESENT(accurate)) use_accurate_sum = accurate 876 877!$OMP PARALLEL DO DEFAULT(NONE), & 878!$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), & 879!$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), & 880!$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum) 881 DO imatrix = 1, n_matrices 882 CALL cp_fm_get_info(matrix_a(imatrix)%matrix, nrow_local=nrows_local, ncol_local=ncols_local) 883 884 use_sp_a = matrix_a(imatrix)%matrix%use_sp 885 use_sp_b = matrix_b(imatrix)%matrix%use_sp 886 887 ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes 888 IF (use_sp_a .AND. use_sp_b) THEN 889 ldata_a_sp => matrix_a(imatrix)%matrix%local_data_sp(1:nrows_local, 1:ncols_local) 890 ldata_b_sp => matrix_b(imatrix)%matrix%local_data_sp(1:nrows_local, 1:ncols_local) 891 IF (use_accurate_sum) THEN 892 trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp) 893 ELSE 894 trace(imatrix) = SUM(ldata_a_sp*ldata_b_sp) 895 END IF 896 ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN 897 ldata_a => matrix_a(imatrix)%matrix%local_data(1:nrows_local, 1:ncols_local) 898 ldata_b => matrix_b(imatrix)%matrix%local_data(1:nrows_local, 1:ncols_local) 899 IF (use_accurate_sum) THEN 900 trace(imatrix) = accurate_dot_product(ldata_a, ldata_b) 901 ELSE 902 trace(imatrix) = SUM(ldata_a*ldata_b) 903 END IF 904 ELSE 905 CPABORT("Matrices A and B are of different types") 906 END IF 907 END DO 908!$OMP END PARALLEL DO 909 910 group = matrix_a(1)%matrix%matrix_struct%para_env%group 911 CALL mp_sum(trace, group) 912 913 CALL timestop(handle) 914 END SUBROUTINE cp_fm_trace_a1b1t1 915 916! ************************************************************************************************** 917!> \brief Compute trace(i,j) = \sum_k Tr (matrix_a(k,i)^T matrix_b(k,j)). 918!> \param matrix_a list of A matrices 919!> \param matrix_b list of B matrices 920!> \param trace computed traces 921!> \param accurate ... 922! ************************************************************************************************** 923 SUBROUTINE cp_fm_contracted_trace_a2b2t2(matrix_a, matrix_b, trace, accurate) 924 TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(in) :: matrix_a, matrix_b 925 REAL(kind=dp), DIMENSION(:, :), INTENT(out) :: trace 926 LOGICAL, INTENT(IN), OPTIONAL :: accurate 927 928 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2', & 929 routineP = moduleN//':'//routineN 930 931 INTEGER :: group, handle, ia, ib, iz, na, nb, & 932 ncols_local, nrows_local, nz 933 INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces 934 LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b 935 REAL(kind=dp) :: t 936 REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b 937 REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp 938 939 CALL timeset(routineN, handle) 940 941 nz = SIZE(matrix_a, 1) 942 CPASSERT(SIZE(matrix_b, 1) == nz) 943 944 na = SIZE(matrix_a, 2) 945 nb = SIZE(matrix_b, 2) 946 CPASSERT(SIZE(trace, 1) == na) 947 CPASSERT(SIZE(trace, 2) == nb) 948 949 use_accurate_sum = .TRUE. 950 IF (PRESENT(accurate)) use_accurate_sum = accurate 951 952 ! here we use one running index (itrace) instead of two (ia, ib) in order to 953 ! improve load balance between shared-memory threads 954 ntraces = na*nb 955 na8 = INT(na, kind=int_8) 956 957!$OMP PARALLEL DO DEFAULT(NONE), & 958!$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), & 959!$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), & 960!$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum) 961 DO itrace = 1, ntraces 962 ib8 = (itrace - 1)/na8 963 ia = INT(itrace - ib8*na8) 964 ib = INT(ib8) + 1 965 966 t = 0.0_dp 967 DO iz = 1, nz 968 CALL cp_fm_get_info(matrix_a(iz, ia)%matrix, nrow_local=nrows_local, ncol_local=ncols_local) 969 use_sp_a = matrix_a(iz, ia)%matrix%use_sp 970 use_sp_b = matrix_b(iz, ib)%matrix%use_sp 971 972 ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes 973 IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN 974 ldata_a => matrix_a(iz, ia)%matrix%local_data(1:nrows_local, 1:ncols_local) 975 ldata_b => matrix_b(iz, ib)%matrix%local_data(1:nrows_local, 1:ncols_local) 976 IF (use_accurate_sum) THEN 977 t = t + accurate_dot_product(ldata_a, ldata_b) 978 ELSE 979 t = t + SUM(ldata_a*ldata_b) 980 END IF 981 ELSE IF (use_sp_a .AND. use_sp_b) THEN 982 ldata_a_sp => matrix_a(iz, ia)%matrix%local_data_sp(1:nrows_local, 1:ncols_local) 983 ldata_b_sp => matrix_b(iz, ib)%matrix%local_data_sp(1:nrows_local, 1:ncols_local) 984 IF (use_accurate_sum) THEN 985 t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp) 986 ELSE 987 t = t + SUM(ldata_a_sp*ldata_b_sp) 988 END IF 989 ELSE 990 CPABORT("Matrices A and B are of different types") 991 END IF 992 END DO 993 trace(ia, ib) = t 994 END DO 995!$OMP END PARALLEL DO 996 997 group = matrix_a(1, 1)%matrix%matrix_struct%para_env%group 998 CALL mp_sum(trace, group) 999 1000 CALL timestop(handle) 1001 END SUBROUTINE cp_fm_contracted_trace_a2b2t2 1002 1003! ************************************************************************************************** 1004!> \brief multiplies in place by a triangular matrix: 1005!> matrix_b = alpha op(triangular_matrix) matrix_b 1006!> or (if side='R') 1007!> matrix_b = alpha matrix_b op(triangular_matrix) 1008!> op(triangular_matrix) is: 1009!> triangular_matrix (if transpose_tr=.false. and invert_tr=.false.) 1010!> triangular_matrix^T (if transpose_tr=.true. and invert_tr=.false.) 1011!> triangular_matrix^(-1) (if transpose_tr=.false. and invert_tr=.true.) 1012!> triangular_matrix^(-T) (if transpose_tr=.true. and invert_tr=.true.) 1013!> \param triangular_matrix the triangular matrix that multiplies the other 1014!> \param matrix_b the matrix that gets multiplied and stores the result 1015!> \param side on which side of matrix_b stays op(triangular_matrix) 1016!> (defaults to 'L') 1017!> \param transpose_tr if the triangular matrix should be transposed 1018!> (defaults to false) 1019!> \param invert_tr if the triangular matrix should be inverted 1020!> (defaults to false) 1021!> \param uplo_tr if triangular_matrix is stored in the upper ('U') or 1022!> lower ('L') triangle (defaults to 'U') 1023!> \param unit_diag_tr if the diagonal elements of triangular_matrix should 1024!> be assumed to be 1 (defaults to false) 1025!> \param n_rows the number of rows of the result (defaults to 1026!> size(matrix_b,1)) 1027!> \param n_cols the number of columns of the result (defaults to 1028!> size(matrix_b,2)) 1029!> \param alpha ... 1030!> \par History 1031!> 08.2002 created [fawzi] 1032!> \author Fawzi Mohamed 1033!> \note 1034!> needs an mpi env 1035! ************************************************************************************************** 1036 SUBROUTINE cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, & 1037 transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, & 1038 alpha) 1039 TYPE(cp_fm_type), POINTER :: triangular_matrix, matrix_b 1040 CHARACTER, INTENT(in), OPTIONAL :: side 1041 LOGICAL, INTENT(in), OPTIONAL :: transpose_tr, invert_tr 1042 CHARACTER, INTENT(in), OPTIONAL :: uplo_tr 1043 LOGICAL, INTENT(in), OPTIONAL :: unit_diag_tr 1044 INTEGER, INTENT(in), OPTIONAL :: n_rows, n_cols 1045 REAL(KIND=dp), INTENT(in), OPTIONAL :: alpha 1046 1047 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_triangular_multiply', & 1048 routineP = moduleN//':'//routineN 1049 1050 CHARACTER :: side_char, transa, unit_diag, uplo 1051 INTEGER :: handle, m, n 1052 LOGICAL :: invert 1053 REAL(KIND=dp) :: al 1054 1055 CALL timeset(routineN, handle) 1056 side_char = 'L' 1057 unit_diag = 'N' 1058 uplo = 'U' 1059 transa = 'N' 1060 invert = .FALSE. 1061 al = 1.0_dp 1062 CALL cp_fm_get_info(matrix_b, nrow_global=m, ncol_global=n) 1063 IF (PRESENT(side)) side_char = side 1064 IF (PRESENT(invert_tr)) invert = invert_tr 1065 IF (PRESENT(uplo_tr)) uplo = uplo_tr 1066 IF (PRESENT(unit_diag_tr)) THEN 1067 IF (unit_diag_tr) THEN 1068 unit_diag = 'U' 1069 ELSE 1070 unit_diag = 'N' 1071 END IF 1072 END IF 1073 IF (PRESENT(transpose_tr)) THEN 1074 IF (transpose_tr) THEN 1075 transa = 'T' 1076 ELSE 1077 transa = 'N' 1078 END IF 1079 END IF 1080 IF (PRESENT(alpha)) al = alpha 1081 IF (PRESENT(n_rows)) m = n_rows 1082 IF (PRESENT(n_cols)) n = n_cols 1083 1084 IF (invert) THEN 1085 1086#if defined(__SCALAPACK) 1087 CALL pdtrsm(side_char, uplo, transa, unit_diag, m, n, al, & 1088 triangular_matrix%local_data(1, 1), 1, 1, & 1089 triangular_matrix%matrix_struct%descriptor, & 1090 matrix_b%local_data(1, 1), 1, 1, & 1091 matrix_b%matrix_struct%descriptor(1)) 1092#else 1093 CALL dtrsm(side_char, uplo, transa, unit_diag, m, n, al, & 1094 triangular_matrix%local_data(1, 1), & 1095 SIZE(triangular_matrix%local_data, 1), & 1096 matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1)) 1097#endif 1098 1099 ELSE 1100 1101#if defined(__SCALAPACK) 1102 CALL pdtrmm(side_char, uplo, transa, unit_diag, m, n, al, & 1103 triangular_matrix%local_data(1, 1), 1, 1, & 1104 triangular_matrix%matrix_struct%descriptor, & 1105 matrix_b%local_data(1, 1), 1, 1, & 1106 matrix_b%matrix_struct%descriptor(1)) 1107#else 1108 CALL dtrmm(side_char, uplo, transa, unit_diag, m, n, al, & 1109 triangular_matrix%local_data(1, 1), & 1110 SIZE(triangular_matrix%local_data, 1), & 1111 matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1)) 1112#endif 1113 1114 END IF 1115 1116 CALL timestop(handle) 1117 END SUBROUTINE cp_fm_triangular_multiply 1118 1119! ************************************************************************************************** 1120!> \brief scales a matrix 1121!> matrix_a = alpha * matrix_b 1122!> \param alpha ... 1123!> \param matrix_a ... 1124!> \note 1125!> use cp_fm_set_all to zero (avoids problems with nan) 1126! ************************************************************************************************** 1127 SUBROUTINE cp_fm_scale(alpha, matrix_a) 1128 REAL(KIND=dp), INTENT(IN) :: alpha 1129 TYPE(cp_fm_type), POINTER :: matrix_a 1130 1131 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_scale', routineP = moduleN//':'//routineN 1132 1133 INTEGER :: handle, size_a 1134 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a 1135 1136 CALL timeset(routineN, handle) 1137 1138 NULLIFY (a) 1139 1140 CPASSERT(ASSOCIATED(matrix_a)) 1141 CPASSERT(matrix_a%ref_count > 0) 1142 1143 a => matrix_a%local_data 1144 size_a = SIZE(a, 1)*SIZE(a, 2) 1145 1146 CALL DSCAL(size_a, alpha, a, 1) 1147 1148 CALL timestop(handle) 1149 1150 END SUBROUTINE cp_fm_scale 1151 1152! ************************************************************************************************** 1153!> \brief transposes a matrix 1154!> matrixt = matrix ^ T 1155!> \param matrix ... 1156!> \param matrixt ... 1157!> \note 1158!> all matrix elements are transposed (see cp_fm_upper_to_half to symmetrise a matrix) 1159! ************************************************************************************************** 1160 SUBROUTINE cp_fm_transpose(matrix, matrixt) 1161 TYPE(cp_fm_type), POINTER :: matrix, matrixt 1162 1163 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_transpose', & 1164 routineP = moduleN//':'//routineN 1165 1166 INTEGER :: handle, ncol_global, & 1167 nrow_global, ncol_globalt, nrow_globalt 1168 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, c 1169#if defined(__SCALAPACK) 1170 INTEGER, DIMENSION(9) :: desca, descc 1171#else 1172 INTEGER :: i, j 1173#endif 1174 1175 CPASSERT(ASSOCIATED(matrix)) 1176 CPASSERT(ASSOCIATED(matrixt)) 1177 nrow_global = matrix%matrix_struct%nrow_global 1178 ncol_global = matrix%matrix_struct%ncol_global 1179 nrow_globalt = matrixt%matrix_struct%nrow_global 1180 ncol_globalt = matrixt%matrix_struct%ncol_global 1181 CPASSERT(nrow_global == ncol_globalt) 1182 CPASSERT(nrow_globalt == ncol_global) 1183 1184 CALL timeset(routineN, handle) 1185 1186 a => matrix%local_data 1187 c => matrixt%local_data 1188 1189#if defined(__SCALAPACK) 1190 desca(:) = matrix%matrix_struct%descriptor(:) 1191 descc(:) = matrixt%matrix_struct%descriptor(:) 1192 CALL pdtran(ncol_global, nrow_global, 1.0_dp, a(1, 1), 1, 1, desca, 0.0_dp, c(1, 1), 1, 1, descc) 1193#else 1194 DO j = 1, ncol_global 1195 DO i = 1, nrow_global 1196 c(j, i) = a(i, j) 1197 ENDDO 1198 ENDDO 1199#endif 1200 CALL timestop(handle) 1201 1202 END SUBROUTINE cp_fm_transpose 1203 1204! ************************************************************************************************** 1205!> \brief given an upper triangular matrix computes the corresponding full matrix 1206!> \param matrix the upper triangular matrix as input, the full matrix as output 1207!> \param work a matrix of the same size as matrix 1208!> \author Matthias Krack 1209!> \note 1210!> the lower triangular part is irrelevant 1211! ************************************************************************************************** 1212 SUBROUTINE cp_fm_upper_to_full(matrix, work) 1213 1214 TYPE(cp_fm_type), POINTER :: matrix, work 1215 1216 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_upper_to_full', & 1217 routineP = moduleN//':'//routineN 1218 1219 INTEGER :: handle, icol_global, irow_global, & 1220 mypcol, myprow, ncol_global, & 1221 npcol, nprow, nrow_global 1222 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a 1223 REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp 1224 TYPE(cp_blacs_env_type), POINTER :: context 1225 1226#if defined(__SCALAPACK) 1227 INTEGER :: icol_local, irow_local, & 1228 ncol_block, ncol_local, & 1229 nrow_block, nrow_local 1230 INTEGER, DIMENSION(9) :: desca, descc 1231 INTEGER, EXTERNAL :: indxl2g 1232 REAL(KIND=dp), DIMENSION(:, :), POINTER :: c 1233 REAL(KIND=sp), DIMENSION(:, :), POINTER :: c_sp 1234#endif 1235 1236 CPASSERT(ASSOCIATED(matrix)) 1237 CPASSERT(ASSOCIATED(work)) 1238 nrow_global = matrix%matrix_struct%nrow_global 1239 ncol_global = matrix%matrix_struct%ncol_global 1240 CPASSERT(nrow_global == ncol_global) 1241 nrow_global = work%matrix_struct%nrow_global 1242 ncol_global = work%matrix_struct%ncol_global 1243 CPASSERT(nrow_global == ncol_global) 1244 CPASSERT(matrix%use_sp .EQV. work%use_sp) 1245 1246 CALL timeset(routineN, handle) 1247 1248 context => matrix%matrix_struct%context 1249 myprow = context%mepos(1) 1250 mypcol = context%mepos(2) 1251 nprow = context%num_pe(1) 1252 npcol = context%num_pe(2) 1253 1254#if defined(__SCALAPACK) 1255 1256 nrow_block = matrix%matrix_struct%nrow_block 1257 ncol_block = matrix%matrix_struct%ncol_block 1258 1259 nrow_local = matrix%matrix_struct%nrow_locals(myprow) 1260 ncol_local = matrix%matrix_struct%ncol_locals(mypcol) 1261 1262 a => work%local_data 1263 a_sp => work%local_data_sp 1264 desca(:) = work%matrix_struct%descriptor(:) 1265 c => matrix%local_data 1266 c_sp => matrix%local_data_sp 1267 descc(:) = matrix%matrix_struct%descriptor(:) 1268 1269 DO icol_local = 1, ncol_local 1270 icol_global = indxl2g(icol_local, ncol_block, mypcol, & 1271 matrix%matrix_struct%first_p_pos(2), npcol) 1272 DO irow_local = 1, nrow_local 1273 irow_global = indxl2g(irow_local, nrow_block, myprow, & 1274 matrix%matrix_struct%first_p_pos(1), nprow) 1275 IF (irow_global > icol_global) THEN 1276 IF (matrix%use_sp) THEN 1277 c_sp(irow_local, icol_local) = 0.0_sp 1278 ELSE 1279 c(irow_local, icol_local) = 0.0_dp 1280 ENDIF 1281 ELSE IF (irow_global == icol_global) THEN 1282 IF (matrix%use_sp) THEN 1283 c_sp(irow_local, icol_local) = 0.5_sp*c_sp(irow_local, icol_local) 1284 ELSE 1285 c(irow_local, icol_local) = 0.5_dp*c(irow_local, icol_local) 1286 ENDIF 1287 END IF 1288 END DO 1289 END DO 1290 1291 DO icol_local = 1, ncol_local 1292 DO irow_local = 1, nrow_local 1293 IF (matrix%use_sp) THEN 1294 a_sp(irow_local, icol_local) = c_sp(irow_local, icol_local) 1295 ELSE 1296 a(irow_local, icol_local) = c(irow_local, icol_local) 1297 ENDIF 1298 ENDDO 1299 ENDDO 1300 1301 IF (matrix%use_sp) THEN 1302 CALL pstran(nrow_global, ncol_global, 1.0_sp, a_sp(1, 1), 1, 1, desca, 1.0_sp, c_sp(1, 1), 1, 1, descc) 1303 ELSE 1304 CALL pdtran(nrow_global, ncol_global, 1.0_dp, a(1, 1), 1, 1, desca, 1.0_dp, c(1, 1), 1, 1, descc) 1305 ENDIF 1306 1307#else 1308 1309 a => matrix%local_data 1310 a_sp => matrix%local_data_sp 1311 DO irow_global = 1, nrow_global 1312 DO icol_global = irow_global + 1, ncol_global 1313 IF (matrix%use_sp) THEN 1314 a_sp(icol_global, irow_global) = a_sp(irow_global, icol_global) 1315 ELSE 1316 a(icol_global, irow_global) = a(irow_global, icol_global) 1317 ENDIF 1318 ENDDO 1319 ENDDO 1320 1321#endif 1322 CALL timestop(handle) 1323 1324 END SUBROUTINE cp_fm_upper_to_full 1325 1326! ************************************************************************************************** 1327!> \brief scales column i of matrix a with scaling(i) 1328!> \param matrixa ... 1329!> \param scaling : an array used for scaling the columns, 1330!> SIZE(scaling) determines the number of columns to be scaled 1331!> \author Joost VandeVondele 1332!> \note 1333!> this is very useful as a first step in the computation of C = sum_i alpha_i A_i transpose (A_i) 1334!> that is a rank-k update (cp_fm_syrk , cp_sm_plus_fm_fm_t) 1335!> this procedure can be up to 20 times faster than calling cp_fm_syrk n times 1336!> where every vector has a different prefactor 1337! ************************************************************************************************** 1338 SUBROUTINE cp_fm_column_scale(matrixa, scaling) 1339 TYPE(cp_fm_type), POINTER :: matrixa 1340 REAL(KIND=dp), DIMENSION(:), INTENT(in) :: scaling 1341 1342 INTEGER :: k, mypcol, myprow, n, ncol_global, & 1343 npcol, nprow 1344 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a 1345 REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp 1346#if defined(__SCALAPACK) 1347 INTEGER :: icol_global, icol_local, & 1348 ipcol, iprow, irow_local 1349#else 1350 INTEGER :: i 1351#endif 1352 1353 myprow = matrixa%matrix_struct%context%mepos(1) 1354 mypcol = matrixa%matrix_struct%context%mepos(2) 1355 nprow = matrixa%matrix_struct%context%num_pe(1) 1356 npcol = matrixa%matrix_struct%context%num_pe(2) 1357 1358 ncol_global = matrixa%matrix_struct%ncol_global 1359 1360 a => matrixa%local_data 1361 a_sp => matrixa%local_data_sp 1362 IF (matrixa%use_sp) THEN 1363 n = SIZE(a_sp, 1) 1364 ELSE 1365 n = SIZE(a, 1) 1366 ENDIF 1367 k = MIN(SIZE(scaling), ncol_global) 1368 1369#if defined(__SCALAPACK) 1370 1371 DO icol_global = 1, k 1372 CALL infog2l(1, icol_global, matrixa%matrix_struct%descriptor, & 1373 nprow, npcol, myprow, mypcol, & 1374 irow_local, icol_local, iprow, ipcol) 1375 IF ((ipcol == mypcol)) THEN 1376 IF (matrixa%use_sp) THEN 1377 CALL SSCAL(n, REAL(scaling(icol_global), sp), a_sp(1, icol_local), 1) 1378 ELSE 1379 CALL DSCAL(n, scaling(icol_global), a(1, icol_local), 1) 1380 ENDIF 1381 END IF 1382 ENDDO 1383#else 1384 DO i = 1, k 1385 IF (matrixa%use_sp) THEN 1386 CALL SSCAL(n, REAL(scaling(i), sp), a_sp(1, i), 1) 1387 ELSE 1388 CALL DSCAL(n, scaling(i), a(1, i), 1) 1389 ENDIF 1390 ENDDO 1391#endif 1392 END SUBROUTINE cp_fm_column_scale 1393 1394! ************************************************************************************************** 1395!> \brief scales row i of matrix a with scaling(i) 1396!> \param matrixa ... 1397!> \param scaling : an array used for scaling the columns, 1398!> \author JGH 1399!> \note 1400! ************************************************************************************************** 1401 SUBROUTINE cp_fm_row_scale(matrixa, scaling) 1402 TYPE(cp_fm_type), POINTER :: matrixa 1403 REAL(KIND=dp), DIMENSION(:), INTENT(in) :: scaling 1404 1405 INTEGER :: n, m, nrow_global, nrow_local, ncol_local 1406 INTEGER, DIMENSION(:), POINTER :: row_indices 1407 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a 1408 REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp 1409#if defined(__SCALAPACK) 1410 INTEGER :: irow_global, icol, irow 1411#else 1412 INTEGER :: j 1413#endif 1414 1415 CALL cp_fm_get_info(matrixa, row_indices=row_indices, nrow_global=nrow_global, & 1416 nrow_local=nrow_local, ncol_local=ncol_local) 1417 CPASSERT(SIZE(scaling) == nrow_global) 1418 1419 a => matrixa%local_data 1420 a_sp => matrixa%local_data_sp 1421 IF (matrixa%use_sp) THEN 1422 n = SIZE(a_sp, 1) 1423 m = SIZE(a_sp, 2) 1424 ELSE 1425 n = SIZE(a, 1) 1426 m = SIZE(a, 2) 1427 ENDIF 1428 1429#if defined(__SCALAPACK) 1430 DO icol = 1, ncol_local 1431 IF (matrixa%use_sp) THEN 1432 DO irow = 1, nrow_local 1433 irow_global = row_indices(irow) 1434 a(irow, icol) = REAL(scaling(irow_global), dp)*a(irow, icol) 1435 END DO 1436 ELSE 1437 DO irow = 1, nrow_local 1438 irow_global = row_indices(irow) 1439 a(irow, icol) = scaling(irow_global)*a(irow, icol) 1440 END DO 1441 ENDIF 1442 ENDDO 1443#else 1444 IF (matrixa%use_sp) THEN 1445 DO j = 1, m 1446 a_sp(1:n, j) = REAL(scaling(1:n), sp)*a_sp(1:n, j) 1447 ENDDO 1448 ELSE 1449 DO j = 1, m 1450 a(1:n, j) = scaling(1:n)*a(1:n, j) 1451 ENDDO 1452 ENDIF 1453#endif 1454 END SUBROUTINE cp_fm_row_scale 1455! ************************************************************************************************** 1456!> \brief Inverts a cp_fm_type matrix, optionally returning the determinant of the input matrix 1457!> \param matrix_a the matrix to invert 1458!> \param matrix_inverse the inverse of matrix_a 1459!> \param det_a the determinant of matrix_a 1460!> \param eps_svd optional parameter to active SVD based inversion, singular values below eps_svd 1461!> are screened 1462!> \param eigval optionally return matrix eigenvalues/singular values 1463!> \par History 1464!> note of Jan Wilhelm (12.2015) 1465!> - computation of determinant corrected 1466!> - determinant only computed if det_a is present 1467!> 12.2016 added option to use SVD instead of LU [Nico Holmberg] 1468!> \author Florian Schiffmann(02.2007) 1469! ************************************************************************************************** 1470 SUBROUTINE cp_fm_invert(matrix_a, matrix_inverse, det_a, eps_svd, eigval) 1471 1472 TYPE(cp_fm_type), POINTER :: matrix_a, matrix_inverse 1473 REAL(KIND=dp), INTENT(OUT), OPTIONAL :: det_a 1474 REAL(KIND=dp), INTENT(IN), OPTIONAL :: eps_svd 1475 REAL(KIND=dp), DIMENSION(:), POINTER, & 1476 INTENT(INOUT), OPTIONAL :: eigval 1477 1478 CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_invert', & 1479 routineP = moduleN//':'//routineN 1480 1481 INTEGER :: n 1482 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot 1483 REAL(KIND=dp) :: determinant, my_eps_svd 1484 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a 1485 TYPE(cp_fm_type), POINTER :: matrix_lu 1486 1487#if defined(__SCALAPACK) 1488 TYPE(cp_fm_type), POINTER :: u, vt, sigma, inv_sigma_ut 1489 INTEGER :: i, info, liwork, lwork, group, exponent_of_minus_one 1490 INTEGER, DIMENSION(9) :: desca 1491 LOGICAL :: quenched 1492 REAL(KIND=dp) :: alpha, beta 1493 REAL(KIND=dp), DIMENSION(:), POINTER :: diag 1494 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: work 1495#else 1496 LOGICAL :: sign 1497 REAL(KIND=dp) :: eps1 1498#endif 1499 1500 my_eps_svd = 0.0_dp 1501 IF (PRESENT(eps_svd)) my_eps_svd = eps_svd 1502 1503 CALL cp_fm_create(matrix=matrix_lu, & 1504 matrix_struct=matrix_a%matrix_struct, & 1505 name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX") 1506 CALL cp_fm_to_fm(matrix_a, matrix_lu) 1507 1508 a => matrix_lu%local_data 1509 n = matrix_lu%matrix_struct%nrow_global 1510 ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block)) 1511 ipivot(:) = 0 1512#if defined(__SCALAPACK) 1513 IF (my_eps_svd .EQ. 0.0_dp) THEN 1514 ! Use LU decomposition 1515 lwork = 3*n 1516 liwork = 3*n 1517 desca(:) = matrix_lu%matrix_struct%descriptor(:) 1518 CALL pdgetrf(n, n, a(1, 1), 1, 1, desca, ipivot, info) 1519 1520 IF (PRESENT(det_a) .OR. PRESENT(eigval)) THEN 1521 1522 ALLOCATE (diag(n)) 1523 diag(:) = 0.0_dp 1524 DO i = 1, n 1525 CALL cp_fm_get_element(matrix_lu, i, i, diag(i)) ! not completely optimal in speed i would say 1526 ENDDO 1527 1528 exponent_of_minus_one = 0 1529 determinant = 1.0_dp 1530 DO i = 1, n 1531 determinant = determinant*diag(i) 1532 IF (ipivot(i) .NE. i) THEN 1533 exponent_of_minus_one = exponent_of_minus_one + 1 1534 END IF 1535 ENDDO 1536 IF (PRESENT(eigval)) THEN 1537 CPASSERT(.NOT. ASSOCIATED(eigval)) 1538 ALLOCATE (eigval(n)) 1539 eigval(:) = diag 1540 END IF 1541 DEALLOCATE (diag) 1542 1543 group = matrix_lu%matrix_struct%para_env%group 1544 CALL mp_sum(exponent_of_minus_one, group) 1545 1546 determinant = determinant*(-1.0_dp)**exponent_of_minus_one 1547 1548 END IF 1549 1550 alpha = 0.0_dp 1551 beta = 1.0_dp 1552 CALL cp_fm_set_all(matrix_inverse, alpha, beta) 1553 CALL pdgetrs('N', n, n, matrix_lu%local_data, 1, 1, desca, ipivot, matrix_inverse%local_data, 1, 1, desca, info) 1554 ELSE 1555 ! Use singular value decomposition 1556 CALL cp_fm_create(matrix=u, & 1557 matrix_struct=matrix_a%matrix_struct, & 1558 name="LEFT_SINGULAR_MATRIX") 1559 CALL cp_fm_set_all(u, alpha=0.0_dp) 1560 CALL cp_fm_create(matrix=vt, & 1561 matrix_struct=matrix_a%matrix_struct, & 1562 name="RIGHT_SINGULAR_MATRIX") 1563 CALL cp_fm_set_all(vt, alpha=0.0_dp) 1564 ALLOCATE (diag(n)) 1565 diag(:) = 0.0_dp 1566 desca(:) = matrix_lu%matrix_struct%descriptor(:) 1567 ALLOCATE (work(1)) 1568 ! Workspace query 1569 lwork = -1 1570 CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, & 1571 1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info) 1572 lwork = INT(work(1)) 1573 DEALLOCATE (work) 1574 ALLOCATE (work(lwork)) 1575 ! SVD 1576 CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, & 1577 1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info) 1578 ! info == n+1 implies homogeneity error when the number of procs is large 1579 ! this likely isnt a problem, but maybe we should handle it separately 1580 IF (info /= 0 .AND. info /= n + 1) & 1581 CPABORT("Singular value decomposition of matrix failed.") 1582 ! (Pseudo)inverse and (pseudo)determinant 1583 CALL cp_fm_create(matrix=sigma, & 1584 matrix_struct=matrix_a%matrix_struct, & 1585 name="SINGULAR_VALUE_MATRIX") 1586 CALL cp_fm_set_all(sigma, alpha=0.0_dp) 1587 determinant = 1.0_dp 1588 quenched = .FALSE. 1589 IF (PRESENT(eigval)) THEN 1590 CPASSERT(.NOT. ASSOCIATED(eigval)) 1591 ALLOCATE (eigval(n)) 1592 eigval(:) = diag 1593 END IF 1594 DO i = 1, n 1595 IF (diag(i) < my_eps_svd) THEN 1596 diag(i) = 0.0_dp 1597 quenched = .TRUE. 1598 ELSE 1599 determinant = determinant*diag(i) 1600 diag(i) = 1.0_dp/diag(i) 1601 ENDIF 1602 CALL cp_fm_set_element(sigma, i, i, diag(i)) 1603 END DO 1604 DEALLOCATE (diag) 1605 IF (quenched) & 1606 CALL cp_warn(__LOCATION__, & 1607 "Linear dependencies were detected in the SVD inversion of matrix "//TRIM(ADJUSTL(matrix_a%name))// & 1608 ". At least one singular value has been quenched.") 1609 ! Sigma^-1 * U^T 1610 CALL cp_fm_create(matrix=inv_sigma_ut, & 1611 matrix_struct=matrix_a%matrix_struct, & 1612 name="SINGULAR_VALUE_MATRIX") 1613 CALL cp_fm_set_all(inv_sigma_ut, alpha=0.0_dp) 1614 CALL pdgemm('N', 'T', n, n, n, 1.0_dp, sigma%local_data, 1, 1, desca, & 1615 u%local_data, 1, 1, desca, 0.0_dp, inv_sigma_ut%local_data, 1, 1, desca) 1616 ! A^-1 = V * (Sigma^-1 * U^T) 1617 CALL cp_fm_set_all(matrix_inverse, alpha=0.0_dp) 1618 CALL pdgemm('T', 'N', n, n, n, 1.0_dp, vt%local_data, 1, 1, desca, & 1619 inv_sigma_ut%local_data, 1, 1, desca, 0.0_dp, matrix_inverse%local_data, 1, 1, desca) 1620 ! Clean up 1621 DEALLOCATE (work) 1622 CALL cp_fm_release(u) 1623 CALL cp_fm_release(vt) 1624 CALL cp_fm_release(sigma) 1625 CALL cp_fm_release(inv_sigma_ut) 1626 END IF 1627#else 1628 IF (my_eps_svd .EQ. 0.0_dp) THEN 1629 sign = .TRUE. 1630 CALL invert_matrix(matrix_a%local_data, matrix_inverse%local_data, & 1631 eval_error=eps1) 1632 CALL cp_fm_lu_decompose(matrix_lu, determinant, correct_sign=sign) 1633 IF (PRESENT(eigval)) & 1634 CALL cp_abort(__LOCATION__, & 1635 "NYI. Eigenvalues not available for return without SCALAPACK.") 1636 ELSE 1637 CALL get_pseudo_inverse_svd(matrix_a%local_data, matrix_inverse%local_data, eps_svd, & 1638 determinant, eigval) 1639 END IF 1640#endif 1641 CALL cp_fm_release(matrix_lu) 1642 DEALLOCATE (ipivot) 1643 IF (PRESENT(det_a)) det_a = determinant 1644 END SUBROUTINE cp_fm_invert 1645 1646! ************************************************************************************************** 1647!> \brief inverts a triangular matrix 1648!> \param matrix_a ... 1649!> \param uplo_tr ... 1650!> \author MI 1651! ************************************************************************************************** 1652 SUBROUTINE cp_fm_triangular_invert(matrix_a, uplo_tr) 1653 1654 TYPE(cp_fm_type), POINTER :: matrix_a 1655 CHARACTER, INTENT(IN), OPTIONAL :: uplo_tr 1656 1657 CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_triangular_invert', & 1658 routineP = moduleN//':'//routineN 1659 1660 CHARACTER :: unit_diag, uplo 1661 INTEGER :: handle, info, ncol_global 1662 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a 1663#if defined(__SCALAPACK) 1664 INTEGER, DIMENSION(9) :: desca 1665#endif 1666 1667 CALL timeset(routineN, handle) 1668 1669 unit_diag = 'N' 1670 uplo = 'U' 1671 IF (PRESENT(uplo_tr)) uplo = uplo_tr 1672 1673 ncol_global = matrix_a%matrix_struct%ncol_global 1674 1675 a => matrix_a%local_data 1676 1677#if defined(__SCALAPACK) 1678 1679 desca(:) = matrix_a%matrix_struct%descriptor(:) 1680 1681 CALL pdtrtri(uplo, unit_diag, ncol_global, a(1, 1), 1, 1, desca, info) 1682 1683#else 1684 CALL dtrtri(uplo, unit_diag, ncol_global, a(1, 1), ncol_global, info) 1685#endif 1686 1687 CALL timestop(handle) 1688 END SUBROUTINE cp_fm_triangular_invert 1689 1690! ************************************************************************************************** 1691!> \brief performs a QR factorization of the input rectangular matrix A or of a submatrix of A 1692!> the computed upper triangular matrix R is in output in the submatrix sub(A) of size NxN 1693!> M and M give the dimension of the submatrix that has to be factorized (MxN) with M>N 1694!> \param matrix_a ... 1695!> \param matrix_r ... 1696!> \param nrow_fact ... 1697!> \param ncol_fact ... 1698!> \param first_row ... 1699!> \param first_col ... 1700!> \author MI 1701! ************************************************************************************************** 1702 SUBROUTINE cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col) 1703 TYPE(cp_fm_type), POINTER :: matrix_a, matrix_r 1704 INTEGER, INTENT(IN), OPTIONAL :: nrow_fact, ncol_fact, & 1705 first_row, first_col 1706 1707 CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_qr_factorization', & 1708 routineP = moduleN//':'//routineN 1709 1710 INTEGER :: handle, i, icol, info, irow, & 1711 j, lda, lwork, ncol, & 1712 ndim, nrow 1713 REAL(dp), ALLOCATABLE, DIMENSION(:) :: tau, work 1714 REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: r_mat 1715 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a 1716#if defined(__SCALAPACK) 1717 INTEGER, DIMENSION(9) :: desca 1718#endif 1719 1720 CALL timeset(routineN, handle) 1721 1722 ncol = matrix_a%matrix_struct%ncol_global 1723 nrow = matrix_a%matrix_struct%nrow_global 1724 lda = nrow 1725 1726 a => matrix_a%local_data 1727 1728 IF (PRESENT(nrow_fact)) nrow = nrow_fact 1729 IF (PRESENT(ncol_fact)) ncol = ncol_fact 1730 irow = 1 1731 IF (PRESENT(first_row)) irow = first_row 1732 icol = 1 1733 IF (PRESENT(first_col)) icol = first_col 1734 1735 CPASSERT(nrow >= ncol) 1736 ndim = SIZE(a, 2) 1737! ALLOCATE(ipiv(ndim)) 1738 ALLOCATE (tau(ndim)) 1739 1740#if defined(__SCALAPACK) 1741 1742 desca(:) = matrix_a%matrix_struct%descriptor(:) 1743 1744 lwork = -1 1745 ALLOCATE (work(2*ndim)) 1746 CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info) 1747 lwork = INT(work(1)) 1748 DEALLOCATE (work) 1749 ALLOCATE (work(lwork)) 1750 CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info) 1751 1752#else 1753 lwork = -1 1754 ALLOCATE (work(2*ndim)) 1755 CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info) 1756 lwork = INT(work(1)) 1757 DEALLOCATE (work) 1758 ALLOCATE (work(lwork)) 1759 CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info) 1760 1761#endif 1762 1763 ALLOCATE (r_mat(ncol, ncol)) 1764 CALL cp_fm_get_submatrix(matrix_a, r_mat, 1, 1, ncol, ncol) 1765 DO i = 1, ncol 1766 DO j = i + 1, ncol 1767 r_mat(j, i) = 0.0_dp 1768 END DO 1769 END DO 1770 CALL cp_fm_set_submatrix(matrix_r, r_mat, 1, 1, ncol, ncol) 1771 1772 DEALLOCATE (tau, work, r_mat) 1773 1774 CALL timestop(handle) 1775 1776 END SUBROUTINE cp_fm_qr_factorization 1777 1778! ************************************************************************************************** 1779!> \brief computes the the solution to A*b=A_general using lu decomposition 1780!> pay attention, both matrices are overwritten, a_general contais the result 1781!> \param matrix_a ... 1782!> \param general_a ... 1783!> \author Florian Schiffmann 1784! ************************************************************************************************** 1785 SUBROUTINE cp_fm_solve(matrix_a, general_a) 1786 TYPE(cp_fm_type), POINTER :: matrix_a, general_a 1787 1788 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_solve', & 1789 routineP = moduleN//':'//routineN 1790 1791 INTEGER :: handle, info, n 1792 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot 1793 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, a_general 1794#if defined(__SCALAPACK) 1795 INTEGER, DIMENSION(9) :: desca, descb 1796#else 1797 INTEGER :: lda, ldb 1798#endif 1799 1800 CALL timeset(routineN, handle) 1801 1802 a => matrix_a%local_data 1803 a_general => general_a%local_data 1804 n = matrix_a%matrix_struct%nrow_global 1805 ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block)) 1806 1807#if defined(__SCALAPACK) 1808 desca(:) = matrix_a%matrix_struct%descriptor(:) 1809 descb(:) = general_a%matrix_struct%descriptor(:) 1810 CALL pdgetrf(n, n, a(1, 1), 1, 1, desca, ipivot, info) 1811 CALL pdgetrs("N", n, n, a(1, 1), 1, 1, desca, ipivot, a_general(1, 1), & 1812 1, 1, descb, info) 1813 1814#else 1815 lda = SIZE(a, 1) 1816 ldb = SIZE(a_general, 1) 1817 CALL dgetrf(n, n, a(1, 1), lda, ipivot, info) 1818 CALL dgetrs("N", n, n, a(1, 1), lda, ipivot, a_general, ldb, info) 1819 1820#endif 1821 ! info is allowed to be zero 1822 ! this does just signal a zero diagonal element 1823 DEALLOCATE (ipivot) 1824 CALL timestop(handle) 1825 END SUBROUTINE 1826 1827! ************************************************************************************************** 1828!> \brief Convenience function. Computes the matrix multiplications needed 1829!> for the multiplication of complex matrices. 1830!> C = beta * C + alpha * ( A ** transa ) * ( B ** transb ) 1831!> \param transa : 'N' -> normal 'T' -> transpose 1832!> alpha,beta :: can be 0.0_dp and 1.0_dp 1833!> \param transb ... 1834!> \param m ... 1835!> \param n ... 1836!> \param k ... 1837!> \param alpha ... 1838!> \param A_re m x k matrix ( ! for transa = 'N'), real part 1839!> \param A_im m x k matrix ( ! for transa = 'N'), imaginary part 1840!> \param B_re k x n matrix ( ! for transa = 'N'), real part 1841!> \param B_im k x n matrix ( ! for transa = 'N'), imaginary part 1842!> \param beta ... 1843!> \param C_re m x n matrix, real part 1844!> \param C_im m x n matrix, imaginary part 1845!> \param a_first_col ... 1846!> \param a_first_row ... 1847!> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage) 1848!> \param b_first_row ... 1849!> \param c_first_col ... 1850!> \param c_first_row ... 1851!> \author Samuel Andermatt 1852!> \note 1853!> C should have no overlap with A, B 1854! ************************************************************************************************** 1855 SUBROUTINE cp_complex_fm_gemm(transa, transb, m, n, k, alpha, A_re, A_im, B_re, B_im, beta, & 1856 C_re, C_im, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, & 1857 c_first_row) 1858 CHARACTER(LEN=1), INTENT(IN) :: transa, transb 1859 INTEGER, INTENT(IN) :: m, n, k 1860 REAL(KIND=dp), INTENT(IN) :: alpha 1861 TYPE(cp_fm_type), POINTER :: A_re, A_im, B_re, B_im 1862 REAL(KIND=dp), INTENT(IN) :: beta 1863 TYPE(cp_fm_type), POINTER :: C_re, C_im 1864 INTEGER, INTENT(IN), OPTIONAL :: a_first_col, a_first_row, b_first_col, & 1865 b_first_row, c_first_col, c_first_row 1866 1867 CHARACTER(len=*), PARAMETER :: routineN = 'cp_complex_fm_gemm', & 1868 routineP = moduleN//':'//routineN 1869 1870 INTEGER :: handle 1871 1872 CALL timeset(routineN, handle) 1873 1874 CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_re, B_re, beta, C_re, & 1875 a_first_col=a_first_col, & 1876 a_first_row=a_first_row, & 1877 b_first_col=b_first_col, & 1878 b_first_row=b_first_row, & 1879 c_first_col=c_first_col, & 1880 c_first_row=c_first_row) 1881 CALL cp_fm_gemm(transa, transb, m, n, k, -alpha, A_im, B_im, 1.0_dp, C_re, & 1882 a_first_col=a_first_col, & 1883 a_first_row=a_first_row, & 1884 b_first_col=b_first_col, & 1885 b_first_row=b_first_row, & 1886 c_first_col=c_first_col, & 1887 c_first_row=c_first_row) 1888 CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_re, B_im, beta, C_im, & 1889 a_first_col=a_first_col, & 1890 a_first_row=a_first_row, & 1891 b_first_col=b_first_col, & 1892 b_first_row=b_first_row, & 1893 c_first_col=c_first_col, & 1894 c_first_row=c_first_row) 1895 CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_im, B_re, 1.0_dp, C_im, & 1896 a_first_col=a_first_col, & 1897 a_first_row=a_first_row, & 1898 b_first_col=b_first_col, & 1899 b_first_row=b_first_row, & 1900 c_first_col=c_first_col, & 1901 c_first_row=c_first_row) 1902 1903 CALL timestop(handle) 1904 1905 END SUBROUTINE cp_complex_fm_gemm 1906 1907! ************************************************************************************************** 1908!> \brief inverts a matrix using LU decomposition 1909!> the input matrix will be overwritten 1910!> \param matrix : input a general square non-singular matrix, outputs its inverse 1911!> \param info_out : optional, if present outputs the info from (p)zgetri 1912!> \author Lianheng Tong 1913! ************************************************************************************************** 1914 SUBROUTINE cp_fm_lu_invert(matrix, info_out) 1915 TYPE(cp_fm_type), POINTER :: matrix 1916 INTEGER, INTENT(OUT), OPTIONAL :: info_out 1917 1918 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_lu_invert', & 1919 routineP = moduleN//':'//routineN 1920 1921 INTEGER :: nrows_global, handle, info, lwork 1922 INTEGER, DIMENSION(:), ALLOCATABLE :: ipivot 1923 REAL(KIND=dp), DIMENSION(:, :), POINTER :: mat 1924 REAL(KIND=sp), DIMENSION(:, :), POINTER :: mat_sp 1925 REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: work 1926 REAL(KIND=sp), DIMENSION(:), ALLOCATABLE :: work_sp 1927#if defined(__SCALAPACK) 1928 INTEGER :: liwork 1929 INTEGER, DIMENSION(9) :: desca 1930 INTEGER, DIMENSION(:), ALLOCATABLE :: iwork 1931#else 1932 INTEGER :: lda 1933#endif 1934 1935 CALL timeset(routineN, handle) 1936 1937 mat => matrix%local_data 1938 mat_sp => matrix%local_data_sp 1939 nrows_global = matrix%matrix_struct%nrow_global 1940 CPASSERT(nrows_global .EQ. matrix%matrix_struct%ncol_global) 1941 ALLOCATE (ipivot(nrows_global)) 1942 ! do LU decomposition 1943#if defined(__SCALAPACK) 1944 desca = matrix%matrix_struct%descriptor 1945 IF (matrix%use_sp) THEN 1946 CALL psgetrf(nrows_global, nrows_global, & 1947 mat_sp, 1, 1, desca, ipivot, info) 1948 ELSE 1949 CALL pdgetrf(nrows_global, nrows_global, & 1950 mat, 1, 1, desca, ipivot, info) 1951 END IF 1952#else 1953 lda = SIZE(mat, 1) 1954 IF (matrix%use_sp) THEN 1955 CALL sgetrf(nrows_global, nrows_global, & 1956 mat_sp, lda, ipivot, info) 1957 ELSE 1958 CALL dgetrf(nrows_global, nrows_global, & 1959 mat, lda, ipivot, info) 1960 END IF 1961#endif 1962 IF (info /= 0) THEN 1963 CALL cp_abort(__LOCATION__, "LU decomposition has failed") 1964 END IF 1965 ! do inversion 1966 IF (matrix%use_sp) THEN 1967 ALLOCATE (work(1)) 1968 ELSE 1969 ALLOCATE (work_sp(1)) 1970 END IF 1971#if defined(__SCALAPACK) 1972 ALLOCATE (iwork(1)) 1973 IF (matrix%use_sp) THEN 1974 CALL psgetri(nrows_global, mat_sp, 1, 1, desca, & 1975 ipivot, work_sp, -1, iwork, -1, info) 1976 lwork = INT(work_sp(1)) 1977 DEALLOCATE (work_sp) 1978 ALLOCATE (work_sp(lwork)) 1979 ELSE 1980 CALL pdgetri(nrows_global, mat, 1, 1, desca, & 1981 ipivot, work, -1, iwork, -1, info) 1982 lwork = INT(work(1)) 1983 DEALLOCATE (work) 1984 ALLOCATE (work(lwork)) 1985 END IF 1986 liwork = INT(iwork(1)) 1987 DEALLOCATE (iwork) 1988 ALLOCATE (iwork(liwork)) 1989 IF (matrix%use_sp) THEN 1990 CALL psgetri(nrows_global, mat_sp, 1, 1, desca, & 1991 ipivot, work_sp, lwork, iwork, liwork, info) 1992 ELSE 1993 CALL pdgetri(nrows_global, mat, 1, 1, desca, & 1994 ipivot, work, lwork, iwork, liwork, info) 1995 END IF 1996 DEALLOCATE (iwork) 1997#else 1998 IF (matrix%use_sp) THEN 1999 CALL sgetri(nrows_global, mat_sp, lda, & 2000 ipivot, work_sp, -1, info) 2001 lwork = INT(work_sp(1)) 2002 DEALLOCATE (work_sp) 2003 ALLOCATE (work_sp(lwork)) 2004 CALL sgetri(nrows_global, mat_sp, lda, & 2005 ipivot, work_sp, lwork, info) 2006 ELSE 2007 CALL dgetri(nrows_global, mat, lda, & 2008 ipivot, work, -1, info) 2009 lwork = INT(work(1)) 2010 DEALLOCATE (work) 2011 ALLOCATE (work(lwork)) 2012 CALL dgetri(nrows_global, mat, lda, & 2013 ipivot, work, lwork, info) 2014 END IF 2015#endif 2016 IF (matrix%use_sp) THEN 2017 DEALLOCATE (work_sp) 2018 ELSE 2019 DEALLOCATE (work) 2020 END IF 2021 DEALLOCATE (ipivot) 2022 2023 IF (PRESENT(info_out)) THEN 2024 info_out = info 2025 ELSE 2026 IF (info /= 0) & 2027 CALL cp_abort(__LOCATION__, "LU inversion has failed") 2028 END IF 2029 2030 CALL timestop(handle) 2031 2032 END SUBROUTINE cp_fm_lu_invert 2033 2034! ************************************************************************************************** 2035!> \brief norm of matrix using (p)dlange 2036!> \param matrix : input a general matrix 2037!> \param mode : 'M' max abs element value, 2038!> '1' or 'O' one norm, i.e. maximum column sum 2039!> 'I' infinity norm, i.e. maximum row sum 2040!> 'F' or 'E' Frobenius norm, i.e. sqrt of sum of all squares of elements 2041!> \return : the norm according to mode 2042!> \author Lianheng Tong 2043! ************************************************************************************************** 2044 FUNCTION cp_fm_norm(matrix, mode) RESULT(res) 2045 TYPE(cp_fm_type), POINTER :: matrix 2046 CHARACTER, INTENT(IN) :: mode 2047 REAL(KIND=dp) :: res 2048 2049 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_norm', & 2050 routineP = moduleN//':'//routineN 2051 2052 INTEGER :: nrows, ncols, handle, lwork, nrows_local, ncols_local 2053 REAL(KIND=sp) :: res_sp 2054 REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa 2055 REAL(KIND=sp), DIMENSION(:, :), POINTER :: aa_sp 2056 REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: work 2057 REAL(KIND=sp), DIMENSION(:), ALLOCATABLE :: work_sp 2058#if defined(__SCALAPACK) 2059 INTEGER, DIMENSION(9) :: desca 2060#else 2061 INTEGER :: lda 2062#endif 2063 2064 CALL timeset(routineN, handle) 2065 2066 CALL cp_fm_get_info(matrix=matrix, & 2067 nrow_global=nrows, & 2068 ncol_global=ncols, & 2069 nrow_local=nrows_local, & 2070 ncol_local=ncols_local) 2071 aa => matrix%local_data 2072 aa_sp => matrix%local_data_sp 2073 2074#if defined(__SCALAPACK) 2075 desca = matrix%matrix_struct%descriptor 2076 SELECT CASE (mode) 2077 CASE ('M', 'm') 2078 lwork = 1 2079 CASE ('1', 'O', 'o') 2080 lwork = ncols_local 2081 CASE ('I', 'i') 2082 lwork = nrows_local 2083 CASE ('F', 'f', 'E', 'e') 2084 lwork = 1 2085 CASE DEFAULT 2086 CPABORT("mode input is not valid") 2087 END SELECT 2088 IF (matrix%use_sp) THEN 2089 ALLOCATE (work_sp(lwork)) 2090 res_sp = pslange(mode, nrows, ncols, aa_sp, 1, 1, desca, work_sp) 2091 DEALLOCATE (work_sp) 2092 res = REAL(res_sp, KIND=dp) 2093 ELSE 2094 ALLOCATE (work(lwork)) 2095 res = pdlange(mode, nrows, ncols, aa, 1, 1, desca, work) 2096 DEALLOCATE (work) 2097 END IF 2098#else 2099 SELECT CASE (mode) 2100 CASE ('M', 'm') 2101 lwork = 1 2102 CASE ('1', 'O', 'o') 2103 lwork = 1 2104 CASE ('I', 'i') 2105 lwork = nrows 2106 CASE ('F', 'f', 'E', 'e') 2107 lwork = 1 2108 CASE DEFAULT 2109 CPABORT("mode input is not valid") 2110 END SELECT 2111 IF (matrix%use_sp) THEN 2112 ALLOCATE (work_sp(lwork)) 2113 lda = SIZE(aa_sp, 1) 2114 res_sp = slange(mode, nrows, ncols, aa_sp, lda, work_sp) 2115 DEALLOCATE (work_sp) 2116 res = REAL(res_sp, KIND=dp) 2117 ELSE 2118 ALLOCATE (work(lwork)) 2119 lda = SIZE(aa, 1) 2120 res = dlange(mode, nrows, ncols, aa, lda, work) 2121 DEALLOCATE (work) 2122 END IF 2123#endif 2124 2125 CALL timestop(handle) 2126 2127 END FUNCTION cp_fm_norm 2128 2129! ************************************************************************************************** 2130!> \brief trace of a matrix using pdlatra 2131!> \param matrix : input a square matrix 2132!> \return : the trace 2133!> \author Lianheng Tong 2134! ************************************************************************************************** 2135 FUNCTION cp_fm_latra(matrix) RESULT(res) 2136 TYPE(cp_fm_type), POINTER :: matrix 2137 REAL(KIND=dp) :: res 2138 2139 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_latra', & 2140 routineP = moduleN//':'//routineN 2141 2142 INTEGER :: nrows, ncols, handle 2143 REAL(KIND=sp) :: res_sp 2144 REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa 2145 REAL(KIND=sp), DIMENSION(:, :), POINTER :: aa_sp 2146#if defined(__SCALAPACK) 2147 INTEGER, DIMENSION(9) :: desca 2148#else 2149 INTEGER :: ii 2150#endif 2151 2152 CALL timeset(routineN, handle) 2153 2154 nrows = matrix%matrix_struct%nrow_global 2155 ncols = matrix%matrix_struct%ncol_global 2156 CPASSERT(nrows .EQ. ncols) 2157 aa => matrix%local_data 2158 aa_sp => matrix%local_data_sp 2159 2160#if defined(__SCALAPACK) 2161 desca = matrix%matrix_struct%descriptor 2162 IF (matrix%use_sp) THEN 2163 res_sp = pslatra(nrows, aa_sp, 1, 1, desca) 2164 res = REAL(res_sp, KIND=dp) 2165 ELSE 2166 res = pdlatra(nrows, aa, 1, 1, desca) 2167 END IF 2168#else 2169 IF (matrix%use_sp) THEN 2170 res_sp = 0.0_sp 2171 DO ii = 1, nrows 2172 res_sp = res_sp + aa_sp(ii, ii) 2173 END DO 2174 res = REAL(res_sp, KIND=dp) 2175 ELSE 2176 res = 0.0_dp 2177 DO ii = 1, nrows 2178 res = res + aa(ii, ii) 2179 END DO 2180 END IF 2181#endif 2182 2183 CALL timestop(handle) 2184 2185 END FUNCTION cp_fm_latra 2186 2187! ************************************************************************************************** 2188!> \brief compute a QR factorization with column pivoting of a M-by-N distributed matrix 2189!> sub( A ) = A(IA:IA+M-1,JA:JA+N-1) 2190!> \param matrix : input M-by-N distributed matrix sub( A ) which is to be factored 2191!> \param tau : scalar factors TAU of the elementary reflectors. TAU is tied to the distributed matrix A 2192!> \param nrow ... 2193!> \param ncol ... 2194!> \param first_row ... 2195!> \param first_col ... 2196!> \author MI 2197! ************************************************************************************************** 2198 2199 SUBROUTINE cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col) 2200 2201 TYPE(cp_fm_type), POINTER :: matrix 2202 REAL(KIND=dp), DIMENSION(:), POINTER :: tau 2203 INTEGER, INTENT(IN) :: nrow, ncol, first_row, first_col 2204 2205 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_pdgeqpf', & 2206 routineP = moduleN//':'//routineN 2207 2208 INTEGER :: handle 2209 INTEGER :: info, lwork 2210 INTEGER, ALLOCATABLE, DIMENSION(:) :: ipiv 2211 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a 2212 REAL(KIND=dp), DIMENSION(:), POINTER :: work 2213#if defined(__SCALAPACK) 2214 INTEGER, DIMENSION(9) :: descc 2215#else 2216 INTEGER :: ii 2217#endif 2218 2219 CALL timeset(routineN, handle) 2220 2221 a => matrix%local_data 2222 lwork = -1 2223 ALLOCATE (work(2*nrow)) 2224 ALLOCATE (ipiv(ncol)) 2225 info = 0 2226 tau = 0 2227 2228#if defined(__SCALAPACK) 2229 descc(:) = matrix%matrix_struct%descriptor(:) 2230 ! Call SCALAPACK routine to get optimal work dimension 2231 CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info) 2232 lwork = INT(work(1)) 2233 DEALLOCATE (work) 2234 ALLOCATE (work(lwork)) 2235 tau = 0.0_dp 2236 ipiv = 0 2237 2238 ! Call SCALAPACK routine to get QR decomposition of CTs 2239 CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info) 2240 CPASSERT(INFO == 0) 2241#else 2242 ! NIY 2243 ii = 1 2244 tau(first_row) = 0 2245 tau(first_col) = 0 2246#endif 2247 2248 DEALLOCATE (work) 2249 DEALLOCATE (ipiv) 2250 2251 CALL timestop(handle) 2252 2253 END SUBROUTINE cp_fm_pdgeqpf 2254 2255! ************************************************************************************************** 2256!> \brief generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1) 2257!> with orthonormal columns, which is defined as the first N columns of a product of K 2258!> elementary reflectors of order M 2259!> \param matrix : On entry, the j-th column must contain the vector which defines the elementary reflector 2260!> as returned from PDGEQRF 2261!> On exit it contains the M-by-N distributed matrix Q 2262!> \param tau : contains the scalar factors TAU of elementary reflectors as returned by PDGEQRF 2263!> \param nrow ... 2264!> \param first_row ... 2265!> \param first_col ... 2266!> \author MI 2267! ************************************************************************************************** 2268 2269 SUBROUTINE cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col) 2270 2271 TYPE(cp_fm_type), POINTER :: matrix 2272 REAL(KIND=dp), DIMENSION(:), POINTER :: tau 2273 INTEGER, INTENT(IN) :: nrow, first_row, first_col 2274 2275 CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_pdorgqr', & 2276 routineP = moduleN//':'//routineN 2277 2278 INTEGER :: handle 2279 INTEGER :: info, lwork 2280 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a 2281 REAL(KIND=dp), DIMENSION(:), POINTER :: work 2282#if defined(__SCALAPACK) 2283 INTEGER, DIMENSION(9) :: descc 2284#else 2285 INTEGER :: ii 2286#endif 2287 2288 CALL timeset(routineN, handle) 2289 2290 a => matrix%local_data 2291 lwork = -1 2292 ALLOCATE (work(2*nrow)) 2293 info = 0 2294 2295#if defined(__SCALAPACK) 2296 descc(:) = matrix%matrix_struct%descriptor(:) 2297 2298 CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info) 2299 CPASSERT(info == 0) 2300 lwork = INT(work(1)) 2301 DEALLOCATE (work) 2302 ALLOCATE (work(lwork)) 2303 2304 ! Call SCALAPACK routine to get Q 2305 CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info) 2306 CPASSERT(INFO == 0) 2307#else 2308 ! NIY 2309 ii = 1 2310 tau(first_row) = 0 2311 tau(first_col) = 0 2312#endif 2313 2314 DEALLOCATE (work) 2315 CALL timestop(handle) 2316 2317 END SUBROUTINE cp_fm_pdorgqr 2318 2319END MODULE cp_fm_basic_linalg 2320