1!--------------------------------------------------------------------------------------------------!
2!   CP2K: A general program to perform molecular dynamics simulations                              !
3!   Copyright (C) 2000 - 2020  CP2K developers group                                               !
4!--------------------------------------------------------------------------------------------------!
5
6! **************************************************************************************************
7!> \brief basic linear algebra operations for full matrices
8!> \par History
9!>      08.2002 split out of qs_blacs [fawzi]
10!> \author Fawzi Mohamed
11! **************************************************************************************************
12MODULE cp_fm_basic_linalg
13   USE cp_blacs_env,                    ONLY: cp_blacs_env_type
14   USE cp_fm_struct,                    ONLY: cp_fm_struct_equivalent
15   USE cp_fm_types,                     ONLY: &
16        cp_fm_create, cp_fm_get_element, cp_fm_get_info, cp_fm_get_submatrix, cp_fm_p_type, &
17        cp_fm_release, cp_fm_set_all, cp_fm_set_element, cp_fm_set_submatrix, cp_fm_to_fm, &
18        cp_fm_type
19   USE cp_log_handling,                 ONLY: cp_to_string
20   USE kahan_sum,                       ONLY: accurate_dot_product,&
21                                              accurate_sum
22   USE kinds,                           ONLY: dp,&
23                                              int_8,&
24                                              sp
25   USE machine,                         ONLY: m_memory
26   USE mathlib,                         ONLY: get_pseudo_inverse_svd,&
27                                              invert_matrix
28   USE message_passing,                 ONLY: mp_sum
29#include "../base/base_uses.f90"
30
31   IMPLICIT NONE
32   PRIVATE
33
34   LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .TRUE.
35   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_basic_linalg'
36
37   PUBLIC :: cp_fm_scale, & ! scale a matrix
38             cp_fm_scale_and_add, & ! scale and add two matrices
39             cp_fm_column_scale, & ! scale columns of a matrix
40             cp_fm_row_scale, & ! scale rows of a matrix
41             cp_fm_trace, & ! trace of the transpose(A)*B
42             cp_fm_contracted_trace, & ! sum_{i,...,k} Tr [A(i,...,k)^T * B(i,...,k)]
43             cp_fm_norm, & ! different norms of A
44             cp_fm_schur_product, & ! schur product
45             cp_fm_transpose, & ! transpose a matrix
46             cp_fm_upper_to_full, & ! symmetrise an upper symmetric matrix
47             cp_fm_syrk, & ! rank k update
48             cp_fm_triangular_multiply, & ! triangular matrix multiply / solve
49             cp_fm_symm, & ! multiply a symmetric with a non-symmetric matrix
50             cp_fm_gemm, & ! multiply two matrices
51             cp_complex_fm_gemm, & ! multiply two complex matrices, represented by non_complex fm matrices
52             cp_fm_invert, & ! computes the inverse and determinant
53             cp_fm_frobenius_norm, & ! frobenius norm
54             cp_fm_triangular_invert, & ! compute the reciprocal of a triangular matrix
55             cp_fm_qr_factorization, & ! compute the QR factorization of a rectangular matrix
56             cp_fm_solve, & ! solves the equation  A*B=C A and C are input
57             cp_fm_pdgeqpf, & ! compute a QR factorization with column pivoting of a M-by-N distributed matrix
58             cp_fm_pdorgqr ! generates an M-by-N as first N columns of a product of K elementary reflectors
59
60   REAL(KIND=dp), EXTERNAL :: dlange, pdlange, pdlatra
61#if defined (__ACCELERATE)
62   REAL(KIND=dp), EXTERNAL :: slange
63   REAL(KIND=sp), EXTERNAL :: pslange, pslatra
64#else
65   REAL(KIND=sp), EXTERNAL :: slange, pslange, pslatra
66#endif
67
68   INTERFACE cp_fm_trace
69      MODULE PROCEDURE cp_fm_trace_a0b0t0
70      MODULE PROCEDURE cp_fm_trace_a1b0t1
71      MODULE PROCEDURE cp_fm_trace_a1b1t1
72   END INTERFACE cp_fm_trace
73
74   INTERFACE cp_fm_contracted_trace
75      MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2
76   END INTERFACE cp_fm_contracted_trace
77CONTAINS
78
79! **************************************************************************************************
80!> \brief calc A <- alpha*A + beta*B
81!>      optimized for alpha == 1.0 (just add beta*B) and beta == 0.0 (just
82!>      scale A)
83!> \param alpha ...
84!> \param matrix_a ...
85!> \param beta ...
86!> \param matrix_b ...
87! **************************************************************************************************
88   SUBROUTINE cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
89
90      REAL(KIND=dp), INTENT(IN)                          :: alpha
91      TYPE(cp_fm_type), POINTER                          :: matrix_a
92      REAL(KIND=dp), INTENT(in), OPTIONAL                :: beta
93      TYPE(cp_fm_type), OPTIONAL, POINTER                :: matrix_b
94
95      CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_scale_and_add', &
96         routineP = moduleN//':'//routineN
97
98      INTEGER                                            :: handle, size_a
99      REAL(KIND=dp)                                      :: my_beta
100      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a, b
101      REAL(KIND=sp), DIMENSION(:, :), POINTER            :: a_sp, b_sp
102
103      CALL timeset(routineN, handle)
104
105      IF (PRESENT(matrix_b)) THEN
106         my_beta = 1.0_dp
107      ELSE
108         my_beta = 0.0_dp
109      ENDIF
110      IF (PRESENT(beta)) my_beta = beta
111      NULLIFY (a, b)
112
113      CPASSERT(ASSOCIATED(matrix_a))
114      CPASSERT(matrix_a%ref_count > 0)
115
116      IF (PRESENT(beta)) THEN
117         CPASSERT(PRESENT(matrix_b))
118         CPASSERT(ASSOCIATED(matrix_b))
119         CPASSERT(matrix_b%ref_count > 0)
120         IF (matrix_a%id_nr == matrix_b%id_nr) THEN
121            IF (matrix_a%id_nr == matrix_b%id_nr) &
122               CPWARN("Bad use of routine. Call cp_fm_scale instead")
123            CALL cp_fm_scale(alpha + beta, matrix_a)
124            CALL timestop(handle)
125            RETURN
126         END IF
127      END IF
128
129      a => matrix_a%local_data
130      a_sp => matrix_a%local_data_sp
131
132      IF (matrix_a%use_sp) THEN
133         size_a = SIZE(a_sp, 1)*SIZE(a_sp, 2)
134      ELSE
135         size_a = SIZE(a, 1)*SIZE(a, 2)
136      ENDIF
137
138      IF (alpha /= 1.0_dp) THEN
139         IF (matrix_a%use_sp) THEN
140            CALL sscal(size_a, REAL(alpha, sp), a_sp, 1)
141         ELSE
142            CALL dscal(size_a, alpha, a, 1)
143         ENDIF
144      ENDIF
145      IF (my_beta .NE. 0.0_dp) THEN
146         IF (matrix_a%matrix_struct%context%group /= matrix_b%matrix_struct%context%group) &
147            CPABORT("matrixes must be in the same blacs context")
148
149         IF (cp_fm_struct_equivalent(matrix_a%matrix_struct, &
150                                     matrix_b%matrix_struct)) THEN
151
152            b => matrix_b%local_data
153            b_sp => matrix_b%local_data_sp
154
155            IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
156               CALL saxpy(size_a, REAL(my_beta, sp), b_sp, 1, a_sp, 1)
157            ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
158               CALL saxpy(size_a, REAL(my_beta, sp), REAL(b, sp), 1, a_sp, 1)
159            ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
160               CALL daxpy(size_a, my_beta, REAL(b_sp, dp), 1, a, 1)
161            ELSE
162               CALL daxpy(size_a, my_beta, b, 1, a, 1)
163            ENDIF
164
165         ELSE
166#ifdef __SCALAPACK
167            CPABORT("to do (pdscal,pdcopy,pdaxpy)")
168#else
169            CPABORT("")
170#endif
171         END IF
172
173      END IF
174
175      CALL timestop(handle)
176
177   END SUBROUTINE cp_fm_scale_and_add
178
179! **************************************************************************************************
180!> \brief interface to BLACS geadd:
181!>                matrix_b = beta*matrix_b + alpha*opt(matrix_a)
182!>        where opt(matrix_a) can be either:
183!>              'N':  matrix_a
184!>              'T':  matrix_a^T
185!>              'C':  matrix_a^H (Hermitian conjugate)
186!>        note that this is a level three routine, use cp_fm_scale_and_add if that
187!>        is sufficient for your needs
188!> \param alpha  : complex scalar
189!> \param trans  : 'N' normal, 'T' transposed
190!> \param matrix_a : input matrix_a
191!> \param beta   : complex scalar
192!> \param matrix_b : input matrix_b, upon out put the updated matrix_b
193!> \author  Lianheng Tong
194! **************************************************************************************************
195   SUBROUTINE cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
196      REAL(KIND=dp), INTENT(IN) :: alpha, beta
197      CHARACTER, INTENT(IN) :: trans
198      TYPE(cp_fm_type), POINTER :: matrix_a, matrix_b
199
200      CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_geadd', &
201                                     routineP = moduleN//':'//routineN
202
203      INTEGER :: nrow_global, ncol_global, handle
204      REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa, bb
205#if defined(__SCALAPACK)
206      INTEGER, DIMENSION(9) :: desca, descb
207#else
208      INTEGER :: ii, jj
209#endif
210
211      CALL timeset(routineN, handle)
212
213      CPASSERT(ASSOCIATED(matrix_a))
214      CPASSERT(ASSOCIATED(matrix_b))
215      nrow_global = matrix_a%matrix_struct%nrow_global
216      ncol_global = matrix_a%matrix_struct%ncol_global
217      CPASSERT(nrow_global .EQ. matrix_b%matrix_struct%nrow_global)
218      CPASSERT(ncol_global .EQ. matrix_b%matrix_struct%ncol_global)
219
220      aa => matrix_a%local_data
221      bb => matrix_b%local_data
222
223#if defined(__SCALAPACK)
224      desca = matrix_a%matrix_struct%descriptor
225      descb = matrix_b%matrix_struct%descriptor
226      CALL pdgeadd(trans, &
227                   nrow_global, &
228                   ncol_global, &
229                   alpha, &
230                   aa, &
231                   1, 1, &
232                   desca, &
233                   beta, &
234                   bb, &
235                   1, 1, &
236                   descb)
237#else
238      ! dgeadd is not a standard BLAS function, although is implemented
239      ! in some libraries like OpenBLAS, so not going to use it here
240      SELECT CASE (trans)
241      CASE ('T')
242         DO jj = 1, ncol_global
243            DO ii = 1, nrow_global
244               bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(jj, ii)
245            END DO
246         END DO
247      CASE DEFAULT
248         DO jj = 1, ncol_global
249            DO ii = 1, nrow_global
250               bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(ii, jj)
251            END DO
252         END DO
253      END SELECT
254#endif
255
256      CALL timestop(handle)
257
258   END SUBROUTINE cp_fm_geadd
259
260! **************************************************************************************************
261!> \brief Computes the LU-decomposition of the matrix, and the determinant of the matrix
262!>      IMPORTANT : the sign of the determinant is not defined correctly yet ....
263!> \param matrix_a ...
264!> \param almost_determinant ...
265!> \param correct_sign ...
266!> \par History
267!>      added correct_sign 02.07 (fschiff)
268!> \author Joost VandeVondele
269!> \note
270!>      - matrix_a is overwritten
271!>      - the sign of the determinant might be wrong
272!>      - SERIOUS WARNING (KNOWN BUG) : the sign of the determinant depends on ipivot
273!>      - one should be able to find out if ipivot is an even or an odd permutation...
274!>        if you need the correct sign, just add correct_sign==.TRUE. (fschiff)
275! **************************************************************************************************
276   SUBROUTINE cp_fm_lu_decompose(matrix_a, almost_determinant, correct_sign)
277      TYPE(cp_fm_type), POINTER                :: matrix_a
278      REAL(KIND=dp), INTENT(OUT)               :: almost_determinant
279      LOGICAL, INTENT(IN), OPTIONAL            :: correct_sign
280
281      CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_lu_decompose', &
282                                     routineP = moduleN//':'//routineN
283
284      INTEGER                                  :: handle, i, info, n
285      INTEGER, ALLOCATABLE, DIMENSION(:)       :: ipivot
286      REAL(KIND=dp)                            :: determinant
287      REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
288#if defined(__SCALAPACK)
289      INTEGER, DIMENSION(9)                    :: desca
290      REAL(KIND=dp), DIMENSION(:), POINTER     :: diag
291#else
292      INTEGER                                  :: lda
293#endif
294
295      CALL timeset(routineN, handle)
296
297      a => matrix_a%local_data
298      n = matrix_a%matrix_struct%nrow_global
299      ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
300
301#if defined(__SCALAPACK)
302      MARK_USED(correct_sign)
303      desca(:) = matrix_a%matrix_struct%descriptor(:)
304      CALL pdgetrf(n, n, a(1, 1), 1, 1, desca, ipivot, info)
305
306      ALLOCATE (diag(n))
307      diag(:) = 0.0_dp
308      DO i = 1, n
309         CALL cp_fm_get_element(matrix_a, i, i, diag(i)) !  not completely optimal in speed i would say
310      ENDDO
311      determinant = 1.0_dp
312      DO i = 1, n
313         determinant = determinant*diag(i)
314      ENDDO
315      DEALLOCATE (diag)
316#else
317      lda = SIZE(a, 1)
318      CALL dgetrf(n, n, a(1, 1), lda, ipivot, info)
319      determinant = 1.0_dp
320      IF (correct_sign) THEN
321         DO i = 1, n
322            IF (ipivot(i) .NE. i) THEN
323               determinant = -determinant*a(i, i)
324            ELSE
325               determinant = determinant*a(i, i)
326            END IF
327         END DO
328      ELSE
329         DO i = 1, n
330            determinant = determinant*a(i, i)
331         ENDDO
332      END IF
333#endif
334      ! info is allowed to be zero
335      ! this does just signal a zero diagonal element
336      DEALLOCATE (ipivot)
337      almost_determinant = determinant ! notice that the sign is random
338      CALL timestop(handle)
339   END SUBROUTINE
340
341! **************************************************************************************************
342!> \brief computes matrix_c = beta * matrix_c + alpha * ( matrix_a  ** transa ) * ( matrix_b ** transb )
343!> \param transa : 'N' -> normal   'T' -> transpose
344!>      alpha,beta :: can be 0.0_dp and 1.0_dp
345!> \param transb ...
346!> \param m ...
347!> \param n ...
348!> \param k ...
349!> \param alpha ...
350!> \param matrix_a : m x k matrix ( ! for transa = 'N')
351!> \param matrix_b : k x n matrix ( ! for transb = 'N')
352!> \param beta ...
353!> \param matrix_c : m x n matrix
354!> \param a_first_col ...
355!> \param a_first_row ...
356!> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
357!> \param b_first_row ...
358!> \param c_first_col ...
359!> \param c_first_row ...
360!> \author Matthias Krack
361!> \note
362!>      matrix_c should have no overlap with matrix_a, matrix_b
363! **************************************************************************************************
364   SUBROUTINE cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, &
365                         matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, &
366                         c_first_col, c_first_row)
367
368      CHARACTER(LEN=1), INTENT(IN)             :: transa, transb
369      INTEGER, INTENT(IN)                      :: m, n, k
370      REAL(KIND=dp), INTENT(IN)                :: alpha
371      TYPE(cp_fm_type), POINTER                :: matrix_a, matrix_b
372      REAL(KIND=dp), INTENT(IN)                :: beta
373      TYPE(cp_fm_type), POINTER                :: matrix_c
374      INTEGER, INTENT(IN), OPTIONAL            :: a_first_col, a_first_row, &
375                                                  b_first_col, b_first_row, &
376                                                  c_first_col, c_first_row
377
378      CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_gemm', &
379                                     routineP = moduleN//':'//routineN
380
381      INTEGER                                  :: handle, i_a, i_b, i_c, j_a, &
382                                                  j_b, j_c
383      REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b, c
384      REAL(KIND=sp), DIMENSION(:, :), POINTER  :: a_sp, b_sp, c_sp
385#if defined(__SCALAPACK)
386      INTEGER, DIMENSION(9)                    :: desca, descb, descc
387#else
388      INTEGER                                  :: lda, ldb, ldc
389#endif
390
391      CALL timeset(routineN, handle)
392
393      !sample peak memory
394      CALL m_memory()
395
396      a => matrix_a%local_data
397      b => matrix_b%local_data
398      c => matrix_c%local_data
399
400      a_sp => matrix_a%local_data_sp
401      b_sp => matrix_b%local_data_sp
402      c_sp => matrix_c%local_data_sp
403
404      IF (PRESENT(a_first_row)) THEN
405         i_a = a_first_row
406      ELSE
407         i_a = 1
408      END IF
409      IF (PRESENT(a_first_col)) THEN
410         j_a = a_first_col
411      ELSE
412         j_a = 1
413      END IF
414      IF (PRESENT(b_first_row)) THEN
415         i_b = b_first_row
416      ELSE
417         i_b = 1
418      END IF
419      IF (PRESENT(b_first_col)) THEN
420         j_b = b_first_col
421      ELSE
422         j_b = 1
423      END IF
424      IF (PRESENT(c_first_row)) THEN
425         i_c = c_first_row
426      ELSE
427         i_c = 1
428      END IF
429      IF (PRESENT(c_first_col)) THEN
430         j_c = c_first_col
431      ELSE
432         j_c = 1
433      END IF
434
435#if defined(__SCALAPACK)
436
437      desca(:) = matrix_a%matrix_struct%descriptor(:)
438      descb(:) = matrix_b%matrix_struct%descriptor(:)
439      descc(:) = matrix_c%matrix_struct%descriptor(:)
440
441      IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
442
443         CALL psgemm(transa, transb, m, n, k, REAL(alpha, sp), a_sp(1, 1), i_a, j_a, desca, b_sp(1, 1), i_b, j_b, &
444                     descb, REAL(beta, sp), c_sp(1, 1), i_c, j_c, descc)
445
446      ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
447
448         CALL pdgemm(transa, transb, m, n, k, alpha, a(1, 1), i_a, j_a, desca, b(1, 1), i_b, j_b, &
449                     descb, beta, c(1, 1), i_c, j_c, descc)
450
451      ELSE
452         CPABORT("Mixed precision gemm NYI")
453      ENDIF
454#else
455
456      IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
457
458         lda = SIZE(a_sp, 1)
459         ldb = SIZE(b_sp, 1)
460         ldc = SIZE(c_sp, 1)
461
462         CALL sgemm(transa, transb, m, n, k, REAL(alpha, sp), a_sp(i_a, j_a), lda, b_sp(i_b, j_b), ldb, &
463                    REAL(beta, sp), c_sp(i_c, j_c), ldc)
464
465      ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
466
467         lda = SIZE(a, 1)
468         ldb = SIZE(b, 1)
469         ldc = SIZE(c, 1)
470
471         CALL dgemm(transa, transb, m, n, k, alpha, a(i_a, j_a), lda, b(i_b, j_b), ldb, beta, c(i_c, j_c), ldc)
472
473      ELSE
474         CPABORT("Mixed precision gemm NYI")
475      ENDIF
476
477#endif
478      CALL timestop(handle)
479
480   END SUBROUTINE cp_fm_gemm
481
482! **************************************************************************************************
483!> \brief computes matrix_c = beta * matrix_c + alpha *  matrix_a  *  matrix_b
484!>      computes matrix_c = beta * matrix_c + alpha *  matrix_b  *  matrix_a
485!>      where matrix_a is symmetric
486!> \param side : 'L' -> matrix_a is on the left 'R' -> matrix_a is on the right
487!>      alpha,beta :: can be 0.0_dp and 1.0_dp
488!> \param uplo ...
489!> \param m ...
490!> \param n ...
491!> \param alpha ...
492!> \param matrix_a : m x m matrix
493!> \param matrix_b : m x n matrix
494!> \param beta ...
495!> \param matrix_c : m x n matrix
496!> \author Matthias Krack
497!> \note
498!>      matrix_c should have no overlap with matrix_a, matrix_b
499!>      all matrices in QS are upper triangular, so uplo should be 'U' always
500!>      matrix_a is always an m x m matrix
501!>      it is typically slower to do cp_fm_symm than cp_fm_gemm (especially in parallel easily 50 percent !)
502! **************************************************************************************************
503   SUBROUTINE cp_fm_symm(side, uplo, m, n, alpha, matrix_a, matrix_b, beta, matrix_c)
504
505      CHARACTER(LEN=1), INTENT(IN)             :: side, uplo
506      INTEGER, INTENT(IN)                      :: m, n
507      REAL(KIND=dp), INTENT(IN)                :: alpha
508      TYPE(cp_fm_type), POINTER                :: matrix_a, matrix_b
509      REAL(KIND=dp), INTENT(IN)                :: beta
510      TYPE(cp_fm_type), POINTER                :: matrix_c
511
512      CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_symm', &
513                                     routineP = moduleN//':'//routineN
514
515      INTEGER                                  :: handle
516      REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b, c
517#if defined(__SCALAPACK)
518      INTEGER, DIMENSION(9)                    :: desca, descb, descc
519#else
520      INTEGER                                  :: lda, ldb, ldc
521#endif
522
523      CALL timeset(routineN, handle)
524
525      a => matrix_a%local_data
526      b => matrix_b%local_data
527      c => matrix_c%local_data
528
529#if defined(__SCALAPACK)
530
531      desca(:) = matrix_a%matrix_struct%descriptor(:)
532      descb(:) = matrix_b%matrix_struct%descriptor(:)
533      descc(:) = matrix_c%matrix_struct%descriptor(:)
534
535      CALL pdsymm(side, uplo, m, n, alpha, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb, beta, c(1, 1), 1, 1, descc)
536
537#else
538
539      lda = matrix_a%matrix_struct%local_leading_dimension
540      ldb = matrix_b%matrix_struct%local_leading_dimension
541      ldc = matrix_c%matrix_struct%local_leading_dimension
542
543      CALL dsymm(side, uplo, m, n, alpha, a(1, 1), lda, b(1, 1), ldb, beta, c(1, 1), ldc)
544
545#endif
546      CALL timestop(handle)
547
548   END SUBROUTINE cp_fm_symm
549
550! **************************************************************************************************
551!> \brief computes the Frobenius norm of matrix_a
552!> \param matrix_a : m x n matrix
553!> \param norm ...
554!> \author VW
555! **************************************************************************************************
556   SUBROUTINE cp_fm_frobenius_norm(matrix_a, norm)
557      TYPE(cp_fm_type), POINTER                :: matrix_a
558      REAL(KIND=dp), INTENT(inout)             :: norm
559
560      CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_frobenius_norm', &
561                                     routineP = moduleN//':'//routineN
562
563      INTEGER                                  :: handle, size_a
564      REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
565      REAL(KIND=dp), EXTERNAL                  :: DDOT
566#if defined(__SCALAPACK)
567      INTEGER                                  :: group
568#endif
569
570      CALL timeset(routineN, handle)
571
572      norm = 0.0_dp
573      a => matrix_a%local_data
574      size_a = SIZE(a, 1)*SIZE(a, 2)
575      norm = DDOT(size_a, a(1, 1), 1, a(1, 1), 1)
576#if defined(__SCALAPACK)
577      group = matrix_a%matrix_struct%para_env%group
578      CALL mp_sum(norm, group)
579#endif
580      norm = SQRT(norm)
581
582      CALL timestop(handle)
583
584   END SUBROUTINE cp_fm_frobenius_norm
585
586! **************************************************************************************************
587!> \brief performs a rank-k update of a symmetric matrix_c
588!>         matrix_c = beta * matrix_c + alpha * matrix_a * transpose ( matrix_a )
589!> \param uplo : 'U'   ('L')
590!> \param trans : 'N'  ('T')
591!> \param k : number of cols to use in matrix_a
592!>      ia,ja ::  1,1 (could be used for selecting subblock of a)
593!> \param alpha ...
594!> \param matrix_a ...
595!> \param ia ...
596!> \param ja ...
597!> \param beta ...
598!> \param matrix_c ...
599!> \author Matthias Krack
600!> \note
601!>      In QS uplo should 'U' (upper part updated)
602! **************************************************************************************************
603   SUBROUTINE cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c)
604      CHARACTER(LEN=1), INTENT(IN)             :: uplo, trans
605      INTEGER, INTENT(IN)                      :: k
606      REAL(KIND=dp), INTENT(IN)                :: alpha
607      TYPE(cp_fm_type), POINTER                :: matrix_a
608      INTEGER, INTENT(IN)                      :: ia, ja
609      REAL(KIND=dp), INTENT(IN)                :: beta
610      TYPE(cp_fm_type), POINTER                :: matrix_c
611
612      CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_syrk', &
613                                     routineP = moduleN//':'//routineN
614
615      INTEGER                                  :: handle, n
616      REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, c
617#if defined(__SCALAPACK)
618      INTEGER, DIMENSION(9)                    :: desca, descc
619#else
620      INTEGER                                  :: lda, ldc
621#endif
622
623      CALL timeset(routineN, handle)
624
625      n = matrix_c%matrix_struct%nrow_global
626
627      a => matrix_a%local_data
628      c => matrix_c%local_data
629
630#if defined(__SCALAPACK)
631
632      desca(:) = matrix_a%matrix_struct%descriptor(:)
633      descc(:) = matrix_c%matrix_struct%descriptor(:)
634
635      CALL pdsyrk(uplo, trans, n, k, alpha, a(1, 1), ia, ja, desca, beta, c(1, 1), 1, 1, descc)
636
637#else
638
639      lda = SIZE(a, 1)
640      ldc = SIZE(c, 1)
641
642      CALL dsyrk(uplo, trans, n, k, alpha, a(ia, ja), lda, beta, c(1, 1), ldc)
643
644#endif
645      CALL timestop(handle)
646
647   END SUBROUTINE cp_fm_syrk
648
649! **************************************************************************************************
650!> \brief computes the schur product of two matrices
651!>       c_ij = a_ij * b_ij
652!> \param matrix_a ...
653!> \param matrix_b ...
654!> \param matrix_c ...
655!> \author Joost VandeVondele
656! **************************************************************************************************
657   SUBROUTINE cp_fm_schur_product(matrix_a, matrix_b, matrix_c)
658
659      TYPE(cp_fm_type), POINTER                          :: matrix_a, matrix_b, matrix_c
660
661      CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_schur_product', &
662         routineP = moduleN//':'//routineN
663
664      INTEGER                                            :: handle, icol_local, irow_local, mypcol, &
665                                                            myprow, ncol_local, npcol, nprow, &
666                                                            nrow_local
667      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a, b, c
668      TYPE(cp_blacs_env_type), POINTER                   :: context
669
670      CALL timeset(routineN, handle)
671
672      context => matrix_a%matrix_struct%context
673      myprow = context%mepos(1)
674      mypcol = context%mepos(2)
675      nprow = context%num_pe(1)
676      npcol = context%num_pe(2)
677
678      a => matrix_a%local_data
679      b => matrix_b%local_data
680      c => matrix_c%local_data
681
682      nrow_local = matrix_a%matrix_struct%nrow_locals(myprow)
683      ncol_local = matrix_a%matrix_struct%ncol_locals(mypcol)
684
685      DO icol_local = 1, ncol_local
686         DO irow_local = 1, nrow_local
687            c(irow_local, icol_local) = a(irow_local, icol_local)*b(irow_local, icol_local)
688         END DO
689      END DO
690
691      CALL timestop(handle)
692
693   END SUBROUTINE cp_fm_schur_product
694
695! **************************************************************************************************
696!> \brief returns the trace of matrix_a^T matrix_b, i.e
697!>      sum_{i,j}(matrix_a(i,j)*matrix_b(i,j))
698!> \param matrix_a a matrix
699!> \param matrix_b another matrix
700!> \param trace ...
701!> \par History
702!>      11.06.2001 Creation (Matthias Krack)
703!>      12.2002 added doc [fawzi]
704!> \author Matthias Krack
705!> \note
706!>      note the transposition of matrix_a!
707! **************************************************************************************************
708   SUBROUTINE cp_fm_trace_a0b0t0(matrix_a, matrix_b, trace)
709
710      TYPE(cp_fm_type), POINTER                          :: matrix_a, matrix_b
711      REAL(KIND=dp), INTENT(OUT)                         :: trace
712
713      CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a0b0t0', &
714         routineP = moduleN//':'//routineN
715
716      INTEGER                                            :: group, handle, mypcol, myprow, &
717                                                            ncol_local, npcol, nprow, nrow_local
718      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a, b
719      REAL(KIND=sp), DIMENSION(:, :), POINTER            :: a_sp, b_sp
720      TYPE(cp_blacs_env_type), POINTER                   :: context
721
722      CALL timeset(routineN, handle)
723
724      context => matrix_a%matrix_struct%context
725      myprow = context%mepos(1)
726      mypcol = context%mepos(2)
727      nprow = context%num_pe(1)
728      npcol = context%num_pe(2)
729
730      group = matrix_a%matrix_struct%para_env%group
731
732      a => matrix_a%local_data
733      b => matrix_b%local_data
734
735      a_sp => matrix_a%local_data_sp
736      b_sp => matrix_b%local_data_sp
737
738      nrow_local = MIN(matrix_a%matrix_struct%nrow_locals(myprow), matrix_b%matrix_struct%nrow_locals(myprow))
739      ncol_local = MIN(matrix_a%matrix_struct%ncol_locals(mypcol), matrix_b%matrix_struct%ncol_locals(mypcol))
740
741      ! cries for an accurate_dot_product
742      IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
743         trace = accurate_sum(REAL(a_sp(1:nrow_local, 1:ncol_local)* &
744                                   b_sp(1:nrow_local, 1:ncol_local), dp))
745      ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
746         trace = accurate_sum(REAL(a_sp(1:nrow_local, 1:ncol_local), dp)* &
747                              b(1:nrow_local, 1:ncol_local))
748      ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
749         trace = accurate_sum(a(1:nrow_local, 1:ncol_local)* &
750                              REAL(b_sp(1:nrow_local, 1:ncol_local), dp))
751      ELSE
752         trace = accurate_sum(a(1:nrow_local, 1:ncol_local)* &
753                              b(1:nrow_local, 1:ncol_local))
754      ENDIF
755
756      CALL mp_sum(trace, group)
757
758      CALL timestop(handle)
759
760   END SUBROUTINE cp_fm_trace_a0b0t0
761
762! **************************************************************************************************
763!> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b) for each pair of matrices A_k and B.
764!> \param matrix_a list of A matrices
765!> \param matrix_b B matrix
766!> \param trace    computed traces
767!> \par History
768!>    * 08.2018 forked from cp_fm_trace() [Sergey Chulkov]
769!> \note \parblock
770!>      Computing the trace requires collective communication between involved MPI processes
771!>      that implies a synchronisation point between them. The aim of this subroutine is to reduce
772!>      the amount of time wasted in such synchronisation by performing one large collective
773!>      operation which involves all the matrices in question.
774!>
775!>      The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b0t1' means that
776!>      the dummy variables 'matrix_a' and 'trace' are 1-dimensional arrays, while the variable
777!>      'matrix_b' is a single matrix.
778!>      \endparblock
779! **************************************************************************************************
780   SUBROUTINE cp_fm_trace_a1b0t1(matrix_a, matrix_b, trace)
781      TYPE(cp_fm_p_type), DIMENSION(:), INTENT(in)       :: matrix_a
782      TYPE(cp_fm_type), POINTER                          :: matrix_b
783      REAL(kind=dp), DIMENSION(:), INTENT(out)           :: trace
784
785      CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b0t1', &
786         routineP = moduleN//':'//routineN
787
788      INTEGER                                            :: group, handle, imatrix, n_matrices, &
789                                                            ncols_local, nrows_local
790      LOGICAL                                            :: use_sp_a, use_sp_b
791      REAL(kind=dp), DIMENSION(:, :), POINTER            :: ldata_a, ldata_b
792      REAL(kind=sp), DIMENSION(:, :), POINTER            :: ldata_a_sp, ldata_b_sp
793
794      CALL timeset(routineN, handle)
795
796      n_matrices = SIZE(trace)
797      CPASSERT(SIZE(matrix_a) == n_matrices)
798
799      CALL cp_fm_get_info(matrix_b, nrow_local=nrows_local, ncol_local=ncols_local)
800      use_sp_b = matrix_b%use_sp
801
802      IF (use_sp_b) THEN
803         ldata_b_sp => matrix_b%local_data_sp(1:nrows_local, 1:ncols_local)
804      ELSE
805         ldata_b => matrix_b%local_data(1:nrows_local, 1:ncols_local)
806      END IF
807
808!$OMP PARALLEL DO DEFAULT(NONE), &
809!$OMP             PRIVATE(imatrix, ldata_a, ldata_a_sp, use_sp_a), &
810!$OMP             SHARED(ldata_b, ldata_b_sp, matrix_a, matrix_b), &
811!$OMP             SHARED(ncols_local, nrows_local, n_matrices, trace, use_sp_b)
812
813      DO imatrix = 1, n_matrices
814
815         use_sp_a = matrix_a(imatrix)%matrix%use_sp
816
817         ! assume that the matrices A(i) and B have identical shapes and distribution schemes
818         IF (use_sp_a .AND. use_sp_b) THEN
819            ldata_a_sp => matrix_a(imatrix)%matrix%local_data_sp(1:nrows_local, 1:ncols_local)
820            trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
821         ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
822            ldata_a => matrix_a(imatrix)%matrix%local_data(1:nrows_local, 1:ncols_local)
823            trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
824         ELSE
825            CPABORT("Matrices A and B are of different types")
826         END IF
827      END DO
828!$OMP END PARALLEL DO
829
830      group = matrix_b%matrix_struct%para_env%group
831      CALL mp_sum(trace, group)
832
833      CALL timestop(handle)
834   END SUBROUTINE cp_fm_trace_a1b0t1
835
836! **************************************************************************************************
837!> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b(k)) for each pair of matrices A_k and B_k.
838!> \param matrix_a list of A matrices
839!> \param matrix_b list of B matrices
840!> \param trace    computed traces
841!> \param accurate ...
842!> \par History
843!>    * 11.2016 forked from cp_fm_trace() [Sergey Chulkov]
844!> \note \parblock
845!>      Computing the trace requires collective communication between involved MPI processes
846!>      that implies a synchronisation point between them. The aim of this subroutine is to reduce
847!>      the amount of time wasted in such synchronisation by performing one large collective
848!>      operation which involves all the matrices in question.
849!>
850!>      The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b1t1' means that
851!>      all dummy variables (matrix_a, matrix_b, and trace) are 1-dimensional arrays.
852!>      \endparblock
853! **************************************************************************************************
854   SUBROUTINE cp_fm_trace_a1b1t1(matrix_a, matrix_b, trace, accurate)
855      TYPE(cp_fm_p_type), DIMENSION(:), INTENT(in)       :: matrix_a, matrix_b
856      REAL(kind=dp), DIMENSION(:), INTENT(out)           :: trace
857      LOGICAL, INTENT(IN), OPTIONAL                      :: accurate
858
859      CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1', &
860         routineP = moduleN//':'//routineN
861
862      INTEGER                                            :: group, handle, imatrix, n_matrices, &
863                                                            ncols_local, nrows_local
864      LOGICAL                                            :: use_accurate_sum, use_sp_a, use_sp_b
865      REAL(kind=dp), DIMENSION(:, :), POINTER            :: ldata_a, ldata_b
866      REAL(kind=sp), DIMENSION(:, :), POINTER            :: ldata_a_sp, ldata_b_sp
867
868      CALL timeset(routineN, handle)
869
870      n_matrices = SIZE(trace)
871      CPASSERT(SIZE(matrix_a) == n_matrices)
872      CPASSERT(SIZE(matrix_b) == n_matrices)
873
874      use_accurate_sum = .TRUE.
875      IF (PRESENT(accurate)) use_accurate_sum = accurate
876
877!$OMP PARALLEL DO DEFAULT(NONE), &
878!$OMP             PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
879!$OMP             PRIVATE(nrows_local, use_sp_a, use_sp_b), &
880!$OMP             SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
881      DO imatrix = 1, n_matrices
882         CALL cp_fm_get_info(matrix_a(imatrix)%matrix, nrow_local=nrows_local, ncol_local=ncols_local)
883
884         use_sp_a = matrix_a(imatrix)%matrix%use_sp
885         use_sp_b = matrix_b(imatrix)%matrix%use_sp
886
887         ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
888         IF (use_sp_a .AND. use_sp_b) THEN
889            ldata_a_sp => matrix_a(imatrix)%matrix%local_data_sp(1:nrows_local, 1:ncols_local)
890            ldata_b_sp => matrix_b(imatrix)%matrix%local_data_sp(1:nrows_local, 1:ncols_local)
891            IF (use_accurate_sum) THEN
892               trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
893            ELSE
894               trace(imatrix) = SUM(ldata_a_sp*ldata_b_sp)
895            END IF
896         ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
897            ldata_a => matrix_a(imatrix)%matrix%local_data(1:nrows_local, 1:ncols_local)
898            ldata_b => matrix_b(imatrix)%matrix%local_data(1:nrows_local, 1:ncols_local)
899            IF (use_accurate_sum) THEN
900               trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
901            ELSE
902               trace(imatrix) = SUM(ldata_a*ldata_b)
903            END IF
904         ELSE
905            CPABORT("Matrices A and B are of different types")
906         END IF
907      END DO
908!$OMP END PARALLEL DO
909
910      group = matrix_a(1)%matrix%matrix_struct%para_env%group
911      CALL mp_sum(trace, group)
912
913      CALL timestop(handle)
914   END SUBROUTINE cp_fm_trace_a1b1t1
915
916! **************************************************************************************************
917!> \brief Compute trace(i,j) = \sum_k Tr (matrix_a(k,i)^T matrix_b(k,j)).
918!> \param matrix_a list of A matrices
919!> \param matrix_b list of B matrices
920!> \param trace    computed traces
921!> \param accurate ...
922! **************************************************************************************************
923   SUBROUTINE cp_fm_contracted_trace_a2b2t2(matrix_a, matrix_b, trace, accurate)
924      TYPE(cp_fm_p_type), DIMENSION(:, :), INTENT(in)    :: matrix_a, matrix_b
925      REAL(kind=dp), DIMENSION(:, :), INTENT(out)        :: trace
926      LOGICAL, INTENT(IN), OPTIONAL                      :: accurate
927
928      CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2', &
929         routineP = moduleN//':'//routineN
930
931      INTEGER                                            :: group, handle, ia, ib, iz, na, nb, &
932                                                            ncols_local, nrows_local, nz
933      INTEGER(kind=int_8)                                :: ib8, itrace, na8, ntraces
934      LOGICAL                                            :: use_accurate_sum, use_sp_a, use_sp_b
935      REAL(kind=dp)                                      :: t
936      REAL(kind=dp), DIMENSION(:, :), POINTER            :: ldata_a, ldata_b
937      REAL(kind=sp), DIMENSION(:, :), POINTER            :: ldata_a_sp, ldata_b_sp
938
939      CALL timeset(routineN, handle)
940
941      nz = SIZE(matrix_a, 1)
942      CPASSERT(SIZE(matrix_b, 1) == nz)
943
944      na = SIZE(matrix_a, 2)
945      nb = SIZE(matrix_b, 2)
946      CPASSERT(SIZE(trace, 1) == na)
947      CPASSERT(SIZE(trace, 2) == nb)
948
949      use_accurate_sum = .TRUE.
950      IF (PRESENT(accurate)) use_accurate_sum = accurate
951
952      ! here we use one running index (itrace) instead of two (ia, ib) in order to
953      ! improve load balance between shared-memory threads
954      ntraces = na*nb
955      na8 = INT(na, kind=int_8)
956
957!$OMP PARALLEL DO DEFAULT(NONE), &
958!$OMP             PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
959!$OMP             PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
960!$OMP             SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
961      DO itrace = 1, ntraces
962         ib8 = (itrace - 1)/na8
963         ia = INT(itrace - ib8*na8)
964         ib = INT(ib8) + 1
965
966         t = 0.0_dp
967         DO iz = 1, nz
968            CALL cp_fm_get_info(matrix_a(iz, ia)%matrix, nrow_local=nrows_local, ncol_local=ncols_local)
969            use_sp_a = matrix_a(iz, ia)%matrix%use_sp
970            use_sp_b = matrix_b(iz, ib)%matrix%use_sp
971
972            ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
973            IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
974               ldata_a => matrix_a(iz, ia)%matrix%local_data(1:nrows_local, 1:ncols_local)
975               ldata_b => matrix_b(iz, ib)%matrix%local_data(1:nrows_local, 1:ncols_local)
976               IF (use_accurate_sum) THEN
977                  t = t + accurate_dot_product(ldata_a, ldata_b)
978               ELSE
979                  t = t + SUM(ldata_a*ldata_b)
980               END IF
981            ELSE IF (use_sp_a .AND. use_sp_b) THEN
982               ldata_a_sp => matrix_a(iz, ia)%matrix%local_data_sp(1:nrows_local, 1:ncols_local)
983               ldata_b_sp => matrix_b(iz, ib)%matrix%local_data_sp(1:nrows_local, 1:ncols_local)
984               IF (use_accurate_sum) THEN
985                  t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
986               ELSE
987                  t = t + SUM(ldata_a_sp*ldata_b_sp)
988               END IF
989            ELSE
990               CPABORT("Matrices A and B are of different types")
991            END IF
992         END DO
993         trace(ia, ib) = t
994      END DO
995!$OMP END PARALLEL DO
996
997      group = matrix_a(1, 1)%matrix%matrix_struct%para_env%group
998      CALL mp_sum(trace, group)
999
1000      CALL timestop(handle)
1001   END SUBROUTINE cp_fm_contracted_trace_a2b2t2
1002
1003! **************************************************************************************************
1004!> \brief multiplies in place by a triangular matrix:
1005!>       matrix_b = alpha op(triangular_matrix) matrix_b
1006!>      or (if side='R')
1007!>       matrix_b = alpha matrix_b op(triangular_matrix)
1008!>      op(triangular_matrix) is:
1009!>       triangular_matrix (if transpose_tr=.false. and invert_tr=.false.)
1010!>       triangular_matrix^T (if transpose_tr=.true. and invert_tr=.false.)
1011!>       triangular_matrix^(-1) (if transpose_tr=.false. and invert_tr=.true.)
1012!>       triangular_matrix^(-T) (if transpose_tr=.true. and invert_tr=.true.)
1013!> \param triangular_matrix the triangular matrix that multiplies the other
1014!> \param matrix_b the matrix that gets multiplied and stores the result
1015!> \param side on which side of matrix_b stays op(triangular_matrix)
1016!>        (defaults to 'L')
1017!> \param transpose_tr if the triangular matrix should be transposed
1018!>        (defaults to false)
1019!> \param invert_tr if the triangular matrix should be inverted
1020!>        (defaults to false)
1021!> \param uplo_tr if triangular_matrix is stored in the upper ('U') or
1022!>        lower ('L') triangle (defaults to 'U')
1023!> \param unit_diag_tr if the diagonal elements of triangular_matrix should
1024!>        be assumed to be 1 (defaults to false)
1025!> \param n_rows the number of rows of the result (defaults to
1026!>        size(matrix_b,1))
1027!> \param n_cols the number of columns of the result (defaults to
1028!>        size(matrix_b,2))
1029!> \param alpha ...
1030!> \par History
1031!>      08.2002 created [fawzi]
1032!> \author Fawzi Mohamed
1033!> \note
1034!>      needs an mpi env
1035! **************************************************************************************************
1036   SUBROUTINE cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, &
1037                                        transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, &
1038                                        alpha)
1039      TYPE(cp_fm_type), POINTER                          :: triangular_matrix, matrix_b
1040      CHARACTER, INTENT(in), OPTIONAL                    :: side
1041      LOGICAL, INTENT(in), OPTIONAL                      :: transpose_tr, invert_tr
1042      CHARACTER, INTENT(in), OPTIONAL                    :: uplo_tr
1043      LOGICAL, INTENT(in), OPTIONAL                      :: unit_diag_tr
1044      INTEGER, INTENT(in), OPTIONAL                      :: n_rows, n_cols
1045      REAL(KIND=dp), INTENT(in), OPTIONAL                :: alpha
1046
1047      CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_triangular_multiply', &
1048         routineP = moduleN//':'//routineN
1049
1050      CHARACTER                                          :: side_char, transa, unit_diag, uplo
1051      INTEGER                                            :: handle, m, n
1052      LOGICAL                                            :: invert
1053      REAL(KIND=dp)                                      :: al
1054
1055      CALL timeset(routineN, handle)
1056      side_char = 'L'
1057      unit_diag = 'N'
1058      uplo = 'U'
1059      transa = 'N'
1060      invert = .FALSE.
1061      al = 1.0_dp
1062      CALL cp_fm_get_info(matrix_b, nrow_global=m, ncol_global=n)
1063      IF (PRESENT(side)) side_char = side
1064      IF (PRESENT(invert_tr)) invert = invert_tr
1065      IF (PRESENT(uplo_tr)) uplo = uplo_tr
1066      IF (PRESENT(unit_diag_tr)) THEN
1067         IF (unit_diag_tr) THEN
1068            unit_diag = 'U'
1069         ELSE
1070            unit_diag = 'N'
1071         END IF
1072      END IF
1073      IF (PRESENT(transpose_tr)) THEN
1074         IF (transpose_tr) THEN
1075            transa = 'T'
1076         ELSE
1077            transa = 'N'
1078         END IF
1079      END IF
1080      IF (PRESENT(alpha)) al = alpha
1081      IF (PRESENT(n_rows)) m = n_rows
1082      IF (PRESENT(n_cols)) n = n_cols
1083
1084      IF (invert) THEN
1085
1086#if defined(__SCALAPACK)
1087         CALL pdtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
1088                     triangular_matrix%local_data(1, 1), 1, 1, &
1089                     triangular_matrix%matrix_struct%descriptor, &
1090                     matrix_b%local_data(1, 1), 1, 1, &
1091                     matrix_b%matrix_struct%descriptor(1))
1092#else
1093         CALL dtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
1094                    triangular_matrix%local_data(1, 1), &
1095                    SIZE(triangular_matrix%local_data, 1), &
1096                    matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
1097#endif
1098
1099      ELSE
1100
1101#if defined(__SCALAPACK)
1102         CALL pdtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
1103                     triangular_matrix%local_data(1, 1), 1, 1, &
1104                     triangular_matrix%matrix_struct%descriptor, &
1105                     matrix_b%local_data(1, 1), 1, 1, &
1106                     matrix_b%matrix_struct%descriptor(1))
1107#else
1108         CALL dtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
1109                    triangular_matrix%local_data(1, 1), &
1110                    SIZE(triangular_matrix%local_data, 1), &
1111                    matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
1112#endif
1113
1114      END IF
1115
1116      CALL timestop(handle)
1117   END SUBROUTINE cp_fm_triangular_multiply
1118
1119! **************************************************************************************************
1120!> \brief scales a matrix
1121!>      matrix_a = alpha * matrix_b
1122!> \param alpha ...
1123!> \param matrix_a ...
1124!> \note
1125!>      use cp_fm_set_all to zero (avoids problems with nan)
1126! **************************************************************************************************
1127   SUBROUTINE cp_fm_scale(alpha, matrix_a)
1128      REAL(KIND=dp), INTENT(IN)                          :: alpha
1129      TYPE(cp_fm_type), POINTER                          :: matrix_a
1130
1131      CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_scale', routineP = moduleN//':'//routineN
1132
1133      INTEGER                                            :: handle, size_a
1134      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a
1135
1136      CALL timeset(routineN, handle)
1137
1138      NULLIFY (a)
1139
1140      CPASSERT(ASSOCIATED(matrix_a))
1141      CPASSERT(matrix_a%ref_count > 0)
1142
1143      a => matrix_a%local_data
1144      size_a = SIZE(a, 1)*SIZE(a, 2)
1145
1146      CALL DSCAL(size_a, alpha, a, 1)
1147
1148      CALL timestop(handle)
1149
1150   END SUBROUTINE cp_fm_scale
1151
1152! **************************************************************************************************
1153!> \brief transposes a matrix
1154!>      matrixt = matrix ^ T
1155!> \param matrix ...
1156!> \param matrixt ...
1157!> \note
1158!>      all matrix elements are transposed (see cp_fm_upper_to_half to symmetrise a matrix)
1159! **************************************************************************************************
1160   SUBROUTINE cp_fm_transpose(matrix, matrixt)
1161      TYPE(cp_fm_type), POINTER                :: matrix, matrixt
1162
1163      CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_transpose', &
1164                                     routineP = moduleN//':'//routineN
1165
1166      INTEGER                                  :: handle, ncol_global, &
1167                                                  nrow_global, ncol_globalt, nrow_globalt
1168      REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, c
1169#if defined(__SCALAPACK)
1170      INTEGER, DIMENSION(9)                    :: desca, descc
1171#else
1172      INTEGER                                  :: i, j
1173#endif
1174
1175      CPASSERT(ASSOCIATED(matrix))
1176      CPASSERT(ASSOCIATED(matrixt))
1177      nrow_global = matrix%matrix_struct%nrow_global
1178      ncol_global = matrix%matrix_struct%ncol_global
1179      nrow_globalt = matrixt%matrix_struct%nrow_global
1180      ncol_globalt = matrixt%matrix_struct%ncol_global
1181      CPASSERT(nrow_global == ncol_globalt)
1182      CPASSERT(nrow_globalt == ncol_global)
1183
1184      CALL timeset(routineN, handle)
1185
1186      a => matrix%local_data
1187      c => matrixt%local_data
1188
1189#if defined(__SCALAPACK)
1190      desca(:) = matrix%matrix_struct%descriptor(:)
1191      descc(:) = matrixt%matrix_struct%descriptor(:)
1192      CALL pdtran(ncol_global, nrow_global, 1.0_dp, a(1, 1), 1, 1, desca, 0.0_dp, c(1, 1), 1, 1, descc)
1193#else
1194      DO j = 1, ncol_global
1195         DO i = 1, nrow_global
1196            c(j, i) = a(i, j)
1197         ENDDO
1198      ENDDO
1199#endif
1200      CALL timestop(handle)
1201
1202   END SUBROUTINE cp_fm_transpose
1203
1204! **************************************************************************************************
1205!> \brief given an upper triangular matrix computes the corresponding full matrix
1206!> \param matrix the upper triangular matrix as input, the full matrix as output
1207!> \param work a matrix of the same size as matrix
1208!> \author Matthias Krack
1209!> \note
1210!>       the lower triangular part is irrelevant
1211! **************************************************************************************************
1212   SUBROUTINE cp_fm_upper_to_full(matrix, work)
1213
1214      TYPE(cp_fm_type), POINTER                :: matrix, work
1215
1216      CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_upper_to_full', &
1217                                     routineP = moduleN//':'//routineN
1218
1219      INTEGER                                  :: handle, icol_global, irow_global, &
1220                                                  mypcol, myprow, ncol_global, &
1221                                                  npcol, nprow, nrow_global
1222      REAL(KIND=dp), DIMENSION(:, :), POINTER   :: a
1223      REAL(KIND=sp), DIMENSION(:, :), POINTER   :: a_sp
1224      TYPE(cp_blacs_env_type), POINTER         :: context
1225
1226#if defined(__SCALAPACK)
1227      INTEGER                                  :: icol_local, irow_local, &
1228                                                  ncol_block, ncol_local, &
1229                                                  nrow_block, nrow_local
1230      INTEGER, DIMENSION(9)                    :: desca, descc
1231      INTEGER, EXTERNAL                        :: indxl2g
1232      REAL(KIND=dp), DIMENSION(:, :), POINTER   :: c
1233      REAL(KIND=sp), DIMENSION(:, :), POINTER   :: c_sp
1234#endif
1235
1236      CPASSERT(ASSOCIATED(matrix))
1237      CPASSERT(ASSOCIATED(work))
1238      nrow_global = matrix%matrix_struct%nrow_global
1239      ncol_global = matrix%matrix_struct%ncol_global
1240      CPASSERT(nrow_global == ncol_global)
1241      nrow_global = work%matrix_struct%nrow_global
1242      ncol_global = work%matrix_struct%ncol_global
1243      CPASSERT(nrow_global == ncol_global)
1244      CPASSERT(matrix%use_sp .EQV. work%use_sp)
1245
1246      CALL timeset(routineN, handle)
1247
1248      context => matrix%matrix_struct%context
1249      myprow = context%mepos(1)
1250      mypcol = context%mepos(2)
1251      nprow = context%num_pe(1)
1252      npcol = context%num_pe(2)
1253
1254#if defined(__SCALAPACK)
1255
1256      nrow_block = matrix%matrix_struct%nrow_block
1257      ncol_block = matrix%matrix_struct%ncol_block
1258
1259      nrow_local = matrix%matrix_struct%nrow_locals(myprow)
1260      ncol_local = matrix%matrix_struct%ncol_locals(mypcol)
1261
1262      a => work%local_data
1263      a_sp => work%local_data_sp
1264      desca(:) = work%matrix_struct%descriptor(:)
1265      c => matrix%local_data
1266      c_sp => matrix%local_data_sp
1267      descc(:) = matrix%matrix_struct%descriptor(:)
1268
1269      DO icol_local = 1, ncol_local
1270         icol_global = indxl2g(icol_local, ncol_block, mypcol, &
1271                               matrix%matrix_struct%first_p_pos(2), npcol)
1272         DO irow_local = 1, nrow_local
1273            irow_global = indxl2g(irow_local, nrow_block, myprow, &
1274                                  matrix%matrix_struct%first_p_pos(1), nprow)
1275            IF (irow_global > icol_global) THEN
1276               IF (matrix%use_sp) THEN
1277                  c_sp(irow_local, icol_local) = 0.0_sp
1278               ELSE
1279                  c(irow_local, icol_local) = 0.0_dp
1280               ENDIF
1281            ELSE IF (irow_global == icol_global) THEN
1282               IF (matrix%use_sp) THEN
1283                  c_sp(irow_local, icol_local) = 0.5_sp*c_sp(irow_local, icol_local)
1284               ELSE
1285                  c(irow_local, icol_local) = 0.5_dp*c(irow_local, icol_local)
1286               ENDIF
1287            END IF
1288         END DO
1289      END DO
1290
1291      DO icol_local = 1, ncol_local
1292      DO irow_local = 1, nrow_local
1293         IF (matrix%use_sp) THEN
1294            a_sp(irow_local, icol_local) = c_sp(irow_local, icol_local)
1295         ELSE
1296            a(irow_local, icol_local) = c(irow_local, icol_local)
1297         ENDIF
1298      ENDDO
1299      ENDDO
1300
1301      IF (matrix%use_sp) THEN
1302         CALL pstran(nrow_global, ncol_global, 1.0_sp, a_sp(1, 1), 1, 1, desca, 1.0_sp, c_sp(1, 1), 1, 1, descc)
1303      ELSE
1304         CALL pdtran(nrow_global, ncol_global, 1.0_dp, a(1, 1), 1, 1, desca, 1.0_dp, c(1, 1), 1, 1, descc)
1305      ENDIF
1306
1307#else
1308
1309      a => matrix%local_data
1310      a_sp => matrix%local_data_sp
1311      DO irow_global = 1, nrow_global
1312         DO icol_global = irow_global + 1, ncol_global
1313            IF (matrix%use_sp) THEN
1314               a_sp(icol_global, irow_global) = a_sp(irow_global, icol_global)
1315            ELSE
1316               a(icol_global, irow_global) = a(irow_global, icol_global)
1317            ENDIF
1318         ENDDO
1319      ENDDO
1320
1321#endif
1322      CALL timestop(handle)
1323
1324   END SUBROUTINE cp_fm_upper_to_full
1325
1326! **************************************************************************************************
1327!> \brief scales column i of matrix a with scaling(i)
1328!> \param matrixa ...
1329!> \param scaling : an array used for scaling the columns,
1330!>                  SIZE(scaling) determines the number of columns to be scaled
1331!> \author Joost VandeVondele
1332!> \note
1333!>      this is very useful as a first step in the computation of C = sum_i alpha_i A_i transpose (A_i)
1334!>      that is a rank-k update (cp_fm_syrk , cp_sm_plus_fm_fm_t)
1335!>      this procedure can be up to 20 times faster than calling cp_fm_syrk n times
1336!>      where every vector has a different prefactor
1337! **************************************************************************************************
1338   SUBROUTINE cp_fm_column_scale(matrixa, scaling)
1339      TYPE(cp_fm_type), POINTER                :: matrixa
1340      REAL(KIND=dp), DIMENSION(:), INTENT(in)  :: scaling
1341
1342      INTEGER                                  :: k, mypcol, myprow, n, ncol_global, &
1343                                                  npcol, nprow
1344      REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
1345      REAL(KIND=sp), DIMENSION(:, :), POINTER  :: a_sp
1346#if defined(__SCALAPACK)
1347      INTEGER                                  :: icol_global, icol_local, &
1348                                                  ipcol, iprow, irow_local
1349#else
1350      INTEGER                                  :: i
1351#endif
1352
1353      myprow = matrixa%matrix_struct%context%mepos(1)
1354      mypcol = matrixa%matrix_struct%context%mepos(2)
1355      nprow = matrixa%matrix_struct%context%num_pe(1)
1356      npcol = matrixa%matrix_struct%context%num_pe(2)
1357
1358      ncol_global = matrixa%matrix_struct%ncol_global
1359
1360      a => matrixa%local_data
1361      a_sp => matrixa%local_data_sp
1362      IF (matrixa%use_sp) THEN
1363         n = SIZE(a_sp, 1)
1364      ELSE
1365         n = SIZE(a, 1)
1366      ENDIF
1367      k = MIN(SIZE(scaling), ncol_global)
1368
1369#if defined(__SCALAPACK)
1370
1371      DO icol_global = 1, k
1372         CALL infog2l(1, icol_global, matrixa%matrix_struct%descriptor, &
1373                      nprow, npcol, myprow, mypcol, &
1374                      irow_local, icol_local, iprow, ipcol)
1375         IF ((ipcol == mypcol)) THEN
1376            IF (matrixa%use_sp) THEN
1377               CALL SSCAL(n, REAL(scaling(icol_global), sp), a_sp(1, icol_local), 1)
1378            ELSE
1379               CALL DSCAL(n, scaling(icol_global), a(1, icol_local), 1)
1380            ENDIF
1381         END IF
1382      ENDDO
1383#else
1384      DO i = 1, k
1385         IF (matrixa%use_sp) THEN
1386            CALL SSCAL(n, REAL(scaling(i), sp), a_sp(1, i), 1)
1387         ELSE
1388            CALL DSCAL(n, scaling(i), a(1, i), 1)
1389         ENDIF
1390      ENDDO
1391#endif
1392   END SUBROUTINE cp_fm_column_scale
1393
1394! **************************************************************************************************
1395!> \brief scales row i of matrix a with scaling(i)
1396!> \param matrixa ...
1397!> \param scaling : an array used for scaling the columns,
1398!> \author JGH
1399!> \note
1400! **************************************************************************************************
1401   SUBROUTINE cp_fm_row_scale(matrixa, scaling)
1402      TYPE(cp_fm_type), POINTER                :: matrixa
1403      REAL(KIND=dp), DIMENSION(:), INTENT(in)  :: scaling
1404
1405      INTEGER                                  :: n, m, nrow_global, nrow_local, ncol_local
1406      INTEGER, DIMENSION(:), POINTER           :: row_indices
1407      REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
1408      REAL(KIND=sp), DIMENSION(:, :), POINTER  :: a_sp
1409#if defined(__SCALAPACK)
1410      INTEGER                                  :: irow_global, icol, irow
1411#else
1412      INTEGER                                  :: j
1413#endif
1414
1415      CALL cp_fm_get_info(matrixa, row_indices=row_indices, nrow_global=nrow_global, &
1416                          nrow_local=nrow_local, ncol_local=ncol_local)
1417      CPASSERT(SIZE(scaling) == nrow_global)
1418
1419      a => matrixa%local_data
1420      a_sp => matrixa%local_data_sp
1421      IF (matrixa%use_sp) THEN
1422         n = SIZE(a_sp, 1)
1423         m = SIZE(a_sp, 2)
1424      ELSE
1425         n = SIZE(a, 1)
1426         m = SIZE(a, 2)
1427      ENDIF
1428
1429#if defined(__SCALAPACK)
1430      DO icol = 1, ncol_local
1431         IF (matrixa%use_sp) THEN
1432            DO irow = 1, nrow_local
1433               irow_global = row_indices(irow)
1434               a(irow, icol) = REAL(scaling(irow_global), dp)*a(irow, icol)
1435            END DO
1436         ELSE
1437            DO irow = 1, nrow_local
1438               irow_global = row_indices(irow)
1439               a(irow, icol) = scaling(irow_global)*a(irow, icol)
1440            END DO
1441         ENDIF
1442      ENDDO
1443#else
1444      IF (matrixa%use_sp) THEN
1445         DO j = 1, m
1446            a_sp(1:n, j) = REAL(scaling(1:n), sp)*a_sp(1:n, j)
1447         ENDDO
1448      ELSE
1449         DO j = 1, m
1450            a(1:n, j) = scaling(1:n)*a(1:n, j)
1451         ENDDO
1452      ENDIF
1453#endif
1454   END SUBROUTINE cp_fm_row_scale
1455! **************************************************************************************************
1456!> \brief Inverts a cp_fm_type matrix, optionally returning the determinant of the input matrix
1457!> \param matrix_a the matrix to invert
1458!> \param matrix_inverse the inverse of matrix_a
1459!> \param det_a the determinant of matrix_a
1460!> \param eps_svd optional parameter to active SVD based inversion, singular values below eps_svd
1461!>                are screened
1462!> \param eigval optionally return matrix eigenvalues/singular values
1463!> \par History
1464!>      note of Jan Wilhelm (12.2015)
1465!>      - computation of determinant corrected
1466!>      - determinant only computed if det_a is present
1467!>      12.2016 added option to use SVD instead of LU [Nico Holmberg]
1468!> \author Florian Schiffmann(02.2007)
1469! **************************************************************************************************
1470   SUBROUTINE cp_fm_invert(matrix_a, matrix_inverse, det_a, eps_svd, eigval)
1471
1472      TYPE(cp_fm_type), POINTER                :: matrix_a, matrix_inverse
1473      REAL(KIND=dp), INTENT(OUT), OPTIONAL     :: det_a
1474      REAL(KIND=dp), INTENT(IN), OPTIONAL      :: eps_svd
1475      REAL(KIND=dp), DIMENSION(:), POINTER, &
1476         INTENT(INOUT), OPTIONAL               :: eigval
1477
1478      CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_invert', &
1479                                     routineP = moduleN//':'//routineN
1480
1481      INTEGER                                  :: n
1482      INTEGER, ALLOCATABLE, DIMENSION(:)       :: ipivot
1483      REAL(KIND=dp)                            :: determinant, my_eps_svd
1484      REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
1485      TYPE(cp_fm_type), POINTER                :: matrix_lu
1486
1487#if defined(__SCALAPACK)
1488      TYPE(cp_fm_type), POINTER                :: u, vt, sigma, inv_sigma_ut
1489      INTEGER                                  :: i, info, liwork, lwork, group, exponent_of_minus_one
1490      INTEGER, DIMENSION(9)                    :: desca
1491      LOGICAL                                  :: quenched
1492      REAL(KIND=dp)                            :: alpha, beta
1493      REAL(KIND=dp), DIMENSION(:), POINTER     :: diag
1494      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: work
1495#else
1496      LOGICAL                                  :: sign
1497      REAL(KIND=dp)                            :: eps1
1498#endif
1499
1500      my_eps_svd = 0.0_dp
1501      IF (PRESENT(eps_svd)) my_eps_svd = eps_svd
1502
1503      CALL cp_fm_create(matrix=matrix_lu, &
1504                        matrix_struct=matrix_a%matrix_struct, &
1505                        name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
1506      CALL cp_fm_to_fm(matrix_a, matrix_lu)
1507
1508      a => matrix_lu%local_data
1509      n = matrix_lu%matrix_struct%nrow_global
1510      ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
1511      ipivot(:) = 0
1512#if defined(__SCALAPACK)
1513      IF (my_eps_svd .EQ. 0.0_dp) THEN
1514         ! Use LU decomposition
1515         lwork = 3*n
1516         liwork = 3*n
1517         desca(:) = matrix_lu%matrix_struct%descriptor(:)
1518         CALL pdgetrf(n, n, a(1, 1), 1, 1, desca, ipivot, info)
1519
1520         IF (PRESENT(det_a) .OR. PRESENT(eigval)) THEN
1521
1522            ALLOCATE (diag(n))
1523            diag(:) = 0.0_dp
1524            DO i = 1, n
1525               CALL cp_fm_get_element(matrix_lu, i, i, diag(i)) !  not completely optimal in speed i would say
1526            ENDDO
1527
1528            exponent_of_minus_one = 0
1529            determinant = 1.0_dp
1530            DO i = 1, n
1531               determinant = determinant*diag(i)
1532               IF (ipivot(i) .NE. i) THEN
1533                  exponent_of_minus_one = exponent_of_minus_one + 1
1534               END IF
1535            ENDDO
1536            IF (PRESENT(eigval)) THEN
1537               CPASSERT(.NOT. ASSOCIATED(eigval))
1538               ALLOCATE (eigval(n))
1539               eigval(:) = diag
1540            END IF
1541            DEALLOCATE (diag)
1542
1543            group = matrix_lu%matrix_struct%para_env%group
1544            CALL mp_sum(exponent_of_minus_one, group)
1545
1546            determinant = determinant*(-1.0_dp)**exponent_of_minus_one
1547
1548         END IF
1549
1550         alpha = 0.0_dp
1551         beta = 1.0_dp
1552         CALL cp_fm_set_all(matrix_inverse, alpha, beta)
1553         CALL pdgetrs('N', n, n, matrix_lu%local_data, 1, 1, desca, ipivot, matrix_inverse%local_data, 1, 1, desca, info)
1554      ELSE
1555         ! Use singular value decomposition
1556         CALL cp_fm_create(matrix=u, &
1557                           matrix_struct=matrix_a%matrix_struct, &
1558                           name="LEFT_SINGULAR_MATRIX")
1559         CALL cp_fm_set_all(u, alpha=0.0_dp)
1560         CALL cp_fm_create(matrix=vt, &
1561                           matrix_struct=matrix_a%matrix_struct, &
1562                           name="RIGHT_SINGULAR_MATRIX")
1563         CALL cp_fm_set_all(vt, alpha=0.0_dp)
1564         ALLOCATE (diag(n))
1565         diag(:) = 0.0_dp
1566         desca(:) = matrix_lu%matrix_struct%descriptor(:)
1567         ALLOCATE (work(1))
1568         ! Workspace query
1569         lwork = -1
1570         CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
1571                      1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
1572         lwork = INT(work(1))
1573         DEALLOCATE (work)
1574         ALLOCATE (work(lwork))
1575         ! SVD
1576         CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
1577                      1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
1578         ! info == n+1 implies homogeneity error when the number of procs is large
1579         ! this likely isnt a problem, but maybe we should handle it separately
1580         IF (info /= 0 .AND. info /= n + 1) &
1581            CPABORT("Singular value decomposition of matrix failed.")
1582         ! (Pseudo)inverse and (pseudo)determinant
1583         CALL cp_fm_create(matrix=sigma, &
1584                           matrix_struct=matrix_a%matrix_struct, &
1585                           name="SINGULAR_VALUE_MATRIX")
1586         CALL cp_fm_set_all(sigma, alpha=0.0_dp)
1587         determinant = 1.0_dp
1588         quenched = .FALSE.
1589         IF (PRESENT(eigval)) THEN
1590            CPASSERT(.NOT. ASSOCIATED(eigval))
1591            ALLOCATE (eigval(n))
1592            eigval(:) = diag
1593         END IF
1594         DO i = 1, n
1595            IF (diag(i) < my_eps_svd) THEN
1596               diag(i) = 0.0_dp
1597               quenched = .TRUE.
1598            ELSE
1599               determinant = determinant*diag(i)
1600               diag(i) = 1.0_dp/diag(i)
1601            ENDIF
1602            CALL cp_fm_set_element(sigma, i, i, diag(i))
1603         END DO
1604         DEALLOCATE (diag)
1605         IF (quenched) &
1606            CALL cp_warn(__LOCATION__, &
1607                         "Linear dependencies were detected in the SVD inversion of matrix "//TRIM(ADJUSTL(matrix_a%name))// &
1608                         ". At least one singular value has been quenched.")
1609         ! Sigma^-1 * U^T
1610         CALL cp_fm_create(matrix=inv_sigma_ut, &
1611                           matrix_struct=matrix_a%matrix_struct, &
1612                           name="SINGULAR_VALUE_MATRIX")
1613         CALL cp_fm_set_all(inv_sigma_ut, alpha=0.0_dp)
1614         CALL pdgemm('N', 'T', n, n, n, 1.0_dp, sigma%local_data, 1, 1, desca, &
1615                     u%local_data, 1, 1, desca, 0.0_dp, inv_sigma_ut%local_data, 1, 1, desca)
1616         ! A^-1 = V * (Sigma^-1 * U^T)
1617         CALL cp_fm_set_all(matrix_inverse, alpha=0.0_dp)
1618         CALL pdgemm('T', 'N', n, n, n, 1.0_dp, vt%local_data, 1, 1, desca, &
1619                     inv_sigma_ut%local_data, 1, 1, desca, 0.0_dp, matrix_inverse%local_data, 1, 1, desca)
1620         ! Clean up
1621         DEALLOCATE (work)
1622         CALL cp_fm_release(u)
1623         CALL cp_fm_release(vt)
1624         CALL cp_fm_release(sigma)
1625         CALL cp_fm_release(inv_sigma_ut)
1626      END IF
1627#else
1628      IF (my_eps_svd .EQ. 0.0_dp) THEN
1629         sign = .TRUE.
1630         CALL invert_matrix(matrix_a%local_data, matrix_inverse%local_data, &
1631                            eval_error=eps1)
1632         CALL cp_fm_lu_decompose(matrix_lu, determinant, correct_sign=sign)
1633         IF (PRESENT(eigval)) &
1634            CALL cp_abort(__LOCATION__, &
1635                          "NYI. Eigenvalues not available for return without SCALAPACK.")
1636      ELSE
1637         CALL get_pseudo_inverse_svd(matrix_a%local_data, matrix_inverse%local_data, eps_svd, &
1638                                     determinant, eigval)
1639      END IF
1640#endif
1641      CALL cp_fm_release(matrix_lu)
1642      DEALLOCATE (ipivot)
1643      IF (PRESENT(det_a)) det_a = determinant
1644   END SUBROUTINE cp_fm_invert
1645
1646! **************************************************************************************************
1647!> \brief inverts a triangular matrix
1648!> \param matrix_a ...
1649!> \param uplo_tr ...
1650!> \author MI
1651! **************************************************************************************************
1652   SUBROUTINE cp_fm_triangular_invert(matrix_a, uplo_tr)
1653
1654      TYPE(cp_fm_type), POINTER                :: matrix_a
1655      CHARACTER, INTENT(IN), OPTIONAL          :: uplo_tr
1656
1657      CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_triangular_invert', &
1658                                     routineP = moduleN//':'//routineN
1659
1660      CHARACTER                                :: unit_diag, uplo
1661      INTEGER                                  :: handle, info, ncol_global
1662      REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
1663#if defined(__SCALAPACK)
1664      INTEGER, DIMENSION(9)                    :: desca
1665#endif
1666
1667      CALL timeset(routineN, handle)
1668
1669      unit_diag = 'N'
1670      uplo = 'U'
1671      IF (PRESENT(uplo_tr)) uplo = uplo_tr
1672
1673      ncol_global = matrix_a%matrix_struct%ncol_global
1674
1675      a => matrix_a%local_data
1676
1677#if defined(__SCALAPACK)
1678
1679      desca(:) = matrix_a%matrix_struct%descriptor(:)
1680
1681      CALL pdtrtri(uplo, unit_diag, ncol_global, a(1, 1), 1, 1, desca, info)
1682
1683#else
1684      CALL dtrtri(uplo, unit_diag, ncol_global, a(1, 1), ncol_global, info)
1685#endif
1686
1687      CALL timestop(handle)
1688   END SUBROUTINE cp_fm_triangular_invert
1689
1690! **************************************************************************************************
1691!> \brief  performs a QR factorization of the input rectangular matrix A or of a submatrix of A
1692!>         the computed upper triangular matrix R is in output in the submatrix sub(A) of size NxN
1693!>         M and M give the dimension of the submatrix that has to be factorized (MxN) with M>N
1694!> \param matrix_a ...
1695!> \param matrix_r ...
1696!> \param nrow_fact ...
1697!> \param ncol_fact ...
1698!> \param first_row ...
1699!> \param first_col ...
1700!> \author MI
1701! **************************************************************************************************
1702   SUBROUTINE cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col)
1703      TYPE(cp_fm_type), POINTER                :: matrix_a, matrix_r
1704      INTEGER, INTENT(IN), OPTIONAL            :: nrow_fact, ncol_fact, &
1705                                                  first_row, first_col
1706
1707      CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_qr_factorization', &
1708                                     routineP = moduleN//':'//routineN
1709
1710      INTEGER                                  :: handle, i, icol, info, irow, &
1711                                                  j, lda, lwork, ncol, &
1712                                                  ndim, nrow
1713      REAL(dp), ALLOCATABLE, DIMENSION(:)      :: tau, work
1714      REAL(dp), ALLOCATABLE, DIMENSION(:, :)   :: r_mat
1715      REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
1716#if defined(__SCALAPACK)
1717      INTEGER, DIMENSION(9)                    :: desca
1718#endif
1719
1720      CALL timeset(routineN, handle)
1721
1722      ncol = matrix_a%matrix_struct%ncol_global
1723      nrow = matrix_a%matrix_struct%nrow_global
1724      lda = nrow
1725
1726      a => matrix_a%local_data
1727
1728      IF (PRESENT(nrow_fact)) nrow = nrow_fact
1729      IF (PRESENT(ncol_fact)) ncol = ncol_fact
1730      irow = 1
1731      IF (PRESENT(first_row)) irow = first_row
1732      icol = 1
1733      IF (PRESENT(first_col)) icol = first_col
1734
1735      CPASSERT(nrow >= ncol)
1736      ndim = SIZE(a, 2)
1737!    ALLOCATE(ipiv(ndim))
1738      ALLOCATE (tau(ndim))
1739
1740#if defined(__SCALAPACK)
1741
1742      desca(:) = matrix_a%matrix_struct%descriptor(:)
1743
1744      lwork = -1
1745      ALLOCATE (work(2*ndim))
1746      CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
1747      lwork = INT(work(1))
1748      DEALLOCATE (work)
1749      ALLOCATE (work(lwork))
1750      CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
1751
1752#else
1753      lwork = -1
1754      ALLOCATE (work(2*ndim))
1755      CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
1756      lwork = INT(work(1))
1757      DEALLOCATE (work)
1758      ALLOCATE (work(lwork))
1759      CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
1760
1761#endif
1762
1763      ALLOCATE (r_mat(ncol, ncol))
1764      CALL cp_fm_get_submatrix(matrix_a, r_mat, 1, 1, ncol, ncol)
1765      DO i = 1, ncol
1766         DO j = i + 1, ncol
1767            r_mat(j, i) = 0.0_dp
1768         END DO
1769      END DO
1770      CALL cp_fm_set_submatrix(matrix_r, r_mat, 1, 1, ncol, ncol)
1771
1772      DEALLOCATE (tau, work, r_mat)
1773
1774      CALL timestop(handle)
1775
1776   END SUBROUTINE cp_fm_qr_factorization
1777
1778! **************************************************************************************************
1779!> \brief computes the the solution to A*b=A_general using lu decomposition
1780!>        pay attention, both matrices are overwritten, a_general contais the result
1781!> \param matrix_a ...
1782!> \param general_a ...
1783!> \author Florian Schiffmann
1784! **************************************************************************************************
1785   SUBROUTINE cp_fm_solve(matrix_a, general_a)
1786      TYPE(cp_fm_type), POINTER                :: matrix_a, general_a
1787
1788      CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_solve', &
1789                                     routineP = moduleN//':'//routineN
1790
1791      INTEGER                                  :: handle, info, n
1792      INTEGER, ALLOCATABLE, DIMENSION(:)       :: ipivot
1793      REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, a_general
1794#if defined(__SCALAPACK)
1795      INTEGER, DIMENSION(9)                    :: desca, descb
1796#else
1797      INTEGER                                  :: lda, ldb
1798#endif
1799
1800      CALL timeset(routineN, handle)
1801
1802      a => matrix_a%local_data
1803      a_general => general_a%local_data
1804      n = matrix_a%matrix_struct%nrow_global
1805      ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
1806
1807#if defined(__SCALAPACK)
1808      desca(:) = matrix_a%matrix_struct%descriptor(:)
1809      descb(:) = general_a%matrix_struct%descriptor(:)
1810      CALL pdgetrf(n, n, a(1, 1), 1, 1, desca, ipivot, info)
1811      CALL pdgetrs("N", n, n, a(1, 1), 1, 1, desca, ipivot, a_general(1, 1), &
1812                   1, 1, descb, info)
1813
1814#else
1815      lda = SIZE(a, 1)
1816      ldb = SIZE(a_general, 1)
1817      CALL dgetrf(n, n, a(1, 1), lda, ipivot, info)
1818      CALL dgetrs("N", n, n, a(1, 1), lda, ipivot, a_general, ldb, info)
1819
1820#endif
1821      ! info is allowed to be zero
1822      ! this does just signal a zero diagonal element
1823      DEALLOCATE (ipivot)
1824      CALL timestop(handle)
1825   END SUBROUTINE
1826
1827! **************************************************************************************************
1828!> \brief Convenience function. Computes the matrix multiplications needed
1829!>        for the multiplication of complex matrices.
1830!>        C = beta * C + alpha * ( A  ** transa ) * ( B ** transb )
1831!> \param transa : 'N' -> normal   'T' -> transpose
1832!>      alpha,beta :: can be 0.0_dp and 1.0_dp
1833!> \param transb ...
1834!> \param m ...
1835!> \param n ...
1836!> \param k ...
1837!> \param alpha ...
1838!> \param A_re m x k matrix ( ! for transa = 'N'), real part
1839!> \param A_im m x k matrix ( ! for transa = 'N'), imaginary part
1840!> \param B_re k x n matrix ( ! for transa = 'N'), real part
1841!> \param B_im k x n matrix ( ! for transa = 'N'), imaginary part
1842!> \param beta ...
1843!> \param C_re m x n matrix, real part
1844!> \param C_im m x n matrix, imaginary part
1845!> \param a_first_col ...
1846!> \param a_first_row ...
1847!> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
1848!> \param b_first_row ...
1849!> \param c_first_col ...
1850!> \param c_first_row ...
1851!> \author Samuel Andermatt
1852!> \note
1853!>      C should have no overlap with A, B
1854! **************************************************************************************************
1855   SUBROUTINE cp_complex_fm_gemm(transa, transb, m, n, k, alpha, A_re, A_im, B_re, B_im, beta, &
1856                                 C_re, C_im, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, &
1857                                 c_first_row)
1858      CHARACTER(LEN=1), INTENT(IN)                       :: transa, transb
1859      INTEGER, INTENT(IN)                                :: m, n, k
1860      REAL(KIND=dp), INTENT(IN)                          :: alpha
1861      TYPE(cp_fm_type), POINTER                          :: A_re, A_im, B_re, B_im
1862      REAL(KIND=dp), INTENT(IN)                          :: beta
1863      TYPE(cp_fm_type), POINTER                          :: C_re, C_im
1864      INTEGER, INTENT(IN), OPTIONAL                      :: a_first_col, a_first_row, b_first_col, &
1865                                                            b_first_row, c_first_col, c_first_row
1866
1867      CHARACTER(len=*), PARAMETER :: routineN = 'cp_complex_fm_gemm', &
1868         routineP = moduleN//':'//routineN
1869
1870      INTEGER                                            :: handle
1871
1872      CALL timeset(routineN, handle)
1873
1874      CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_re, B_re, beta, C_re, &
1875                      a_first_col=a_first_col, &
1876                      a_first_row=a_first_row, &
1877                      b_first_col=b_first_col, &
1878                      b_first_row=b_first_row, &
1879                      c_first_col=c_first_col, &
1880                      c_first_row=c_first_row)
1881      CALL cp_fm_gemm(transa, transb, m, n, k, -alpha, A_im, B_im, 1.0_dp, C_re, &
1882                      a_first_col=a_first_col, &
1883                      a_first_row=a_first_row, &
1884                      b_first_col=b_first_col, &
1885                      b_first_row=b_first_row, &
1886                      c_first_col=c_first_col, &
1887                      c_first_row=c_first_row)
1888      CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_re, B_im, beta, C_im, &
1889                      a_first_col=a_first_col, &
1890                      a_first_row=a_first_row, &
1891                      b_first_col=b_first_col, &
1892                      b_first_row=b_first_row, &
1893                      c_first_col=c_first_col, &
1894                      c_first_row=c_first_row)
1895      CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_im, B_re, 1.0_dp, C_im, &
1896                      a_first_col=a_first_col, &
1897                      a_first_row=a_first_row, &
1898                      b_first_col=b_first_col, &
1899                      b_first_row=b_first_row, &
1900                      c_first_col=c_first_col, &
1901                      c_first_row=c_first_row)
1902
1903      CALL timestop(handle)
1904
1905   END SUBROUTINE cp_complex_fm_gemm
1906
1907! **************************************************************************************************
1908!> \brief inverts a matrix using LU decomposition
1909!>        the input matrix will be overwritten
1910!> \param matrix   : input a general square non-singular matrix, outputs its inverse
1911!> \param info_out : optional, if present outputs the info from (p)zgetri
1912!> \author Lianheng Tong
1913! **************************************************************************************************
1914   SUBROUTINE cp_fm_lu_invert(matrix, info_out)
1915      TYPE(cp_fm_type), POINTER                :: matrix
1916      INTEGER, INTENT(OUT), OPTIONAL           :: info_out
1917
1918      CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_lu_invert', &
1919                                     routineP = moduleN//':'//routineN
1920
1921      INTEGER :: nrows_global, handle, info, lwork
1922      INTEGER, DIMENSION(:), ALLOCATABLE       :: ipivot
1923      REAL(KIND=dp), DIMENSION(:, :), POINTER  :: mat
1924      REAL(KIND=sp), DIMENSION(:, :), POINTER  :: mat_sp
1925      REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: work
1926      REAL(KIND=sp), DIMENSION(:), ALLOCATABLE :: work_sp
1927#if defined(__SCALAPACK)
1928      INTEGER                                  :: liwork
1929      INTEGER, DIMENSION(9)                    :: desca
1930      INTEGER, DIMENSION(:), ALLOCATABLE       :: iwork
1931#else
1932      INTEGER                                  :: lda
1933#endif
1934
1935      CALL timeset(routineN, handle)
1936
1937      mat => matrix%local_data
1938      mat_sp => matrix%local_data_sp
1939      nrows_global = matrix%matrix_struct%nrow_global
1940      CPASSERT(nrows_global .EQ. matrix%matrix_struct%ncol_global)
1941      ALLOCATE (ipivot(nrows_global))
1942      ! do LU decomposition
1943#if defined(__SCALAPACK)
1944      desca = matrix%matrix_struct%descriptor
1945      IF (matrix%use_sp) THEN
1946         CALL psgetrf(nrows_global, nrows_global, &
1947                      mat_sp, 1, 1, desca, ipivot, info)
1948      ELSE
1949         CALL pdgetrf(nrows_global, nrows_global, &
1950                      mat, 1, 1, desca, ipivot, info)
1951      END IF
1952#else
1953      lda = SIZE(mat, 1)
1954      IF (matrix%use_sp) THEN
1955         CALL sgetrf(nrows_global, nrows_global, &
1956                     mat_sp, lda, ipivot, info)
1957      ELSE
1958         CALL dgetrf(nrows_global, nrows_global, &
1959                     mat, lda, ipivot, info)
1960      END IF
1961#endif
1962      IF (info /= 0) THEN
1963         CALL cp_abort(__LOCATION__, "LU decomposition has failed")
1964      END IF
1965      ! do inversion
1966      IF (matrix%use_sp) THEN
1967         ALLOCATE (work(1))
1968      ELSE
1969         ALLOCATE (work_sp(1))
1970      END IF
1971#if defined(__SCALAPACK)
1972      ALLOCATE (iwork(1))
1973      IF (matrix%use_sp) THEN
1974         CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
1975                      ipivot, work_sp, -1, iwork, -1, info)
1976         lwork = INT(work_sp(1))
1977         DEALLOCATE (work_sp)
1978         ALLOCATE (work_sp(lwork))
1979      ELSE
1980         CALL pdgetri(nrows_global, mat, 1, 1, desca, &
1981                      ipivot, work, -1, iwork, -1, info)
1982         lwork = INT(work(1))
1983         DEALLOCATE (work)
1984         ALLOCATE (work(lwork))
1985      END IF
1986      liwork = INT(iwork(1))
1987      DEALLOCATE (iwork)
1988      ALLOCATE (iwork(liwork))
1989      IF (matrix%use_sp) THEN
1990         CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
1991                      ipivot, work_sp, lwork, iwork, liwork, info)
1992      ELSE
1993         CALL pdgetri(nrows_global, mat, 1, 1, desca, &
1994                      ipivot, work, lwork, iwork, liwork, info)
1995      END IF
1996      DEALLOCATE (iwork)
1997#else
1998      IF (matrix%use_sp) THEN
1999         CALL sgetri(nrows_global, mat_sp, lda, &
2000                     ipivot, work_sp, -1, info)
2001         lwork = INT(work_sp(1))
2002         DEALLOCATE (work_sp)
2003         ALLOCATE (work_sp(lwork))
2004         CALL sgetri(nrows_global, mat_sp, lda, &
2005                     ipivot, work_sp, lwork, info)
2006      ELSE
2007         CALL dgetri(nrows_global, mat, lda, &
2008                     ipivot, work, -1, info)
2009         lwork = INT(work(1))
2010         DEALLOCATE (work)
2011         ALLOCATE (work(lwork))
2012         CALL dgetri(nrows_global, mat, lda, &
2013                     ipivot, work, lwork, info)
2014      END IF
2015#endif
2016      IF (matrix%use_sp) THEN
2017         DEALLOCATE (work_sp)
2018      ELSE
2019         DEALLOCATE (work)
2020      END IF
2021      DEALLOCATE (ipivot)
2022
2023      IF (PRESENT(info_out)) THEN
2024         info_out = info
2025      ELSE
2026         IF (info /= 0) &
2027            CALL cp_abort(__LOCATION__, "LU inversion has failed")
2028      END IF
2029
2030      CALL timestop(handle)
2031
2032   END SUBROUTINE cp_fm_lu_invert
2033
2034! **************************************************************************************************
2035!> \brief norm of matrix using (p)dlange
2036!> \param matrix   : input a general matrix
2037!> \param mode     : 'M' max abs element value,
2038!>                   '1' or 'O' one norm, i.e. maximum column sum
2039!>                   'I' infinity norm, i.e. maximum row sum
2040!>                   'F' or 'E' Frobenius norm, i.e. sqrt of sum of all squares of elements
2041!> \return : the norm according to mode
2042!> \author Lianheng Tong
2043! **************************************************************************************************
2044   FUNCTION cp_fm_norm(matrix, mode) RESULT(res)
2045      TYPE(cp_fm_type), POINTER :: matrix
2046      CHARACTER, INTENT(IN) :: mode
2047      REAL(KIND=dp) :: res
2048
2049      CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_norm', &
2050                                     routineP = moduleN//':'//routineN
2051
2052      INTEGER :: nrows, ncols, handle, lwork, nrows_local, ncols_local
2053      REAL(KIND=sp) :: res_sp
2054      REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa
2055      REAL(KIND=sp), DIMENSION(:, :), POINTER :: aa_sp
2056      REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: work
2057      REAL(KIND=sp), DIMENSION(:), ALLOCATABLE :: work_sp
2058#if defined(__SCALAPACK)
2059      INTEGER, DIMENSION(9) :: desca
2060#else
2061      INTEGER :: lda
2062#endif
2063
2064      CALL timeset(routineN, handle)
2065
2066      CALL cp_fm_get_info(matrix=matrix, &
2067                          nrow_global=nrows, &
2068                          ncol_global=ncols, &
2069                          nrow_local=nrows_local, &
2070                          ncol_local=ncols_local)
2071      aa => matrix%local_data
2072      aa_sp => matrix%local_data_sp
2073
2074#if defined(__SCALAPACK)
2075      desca = matrix%matrix_struct%descriptor
2076      SELECT CASE (mode)
2077      CASE ('M', 'm')
2078         lwork = 1
2079      CASE ('1', 'O', 'o')
2080         lwork = ncols_local
2081      CASE ('I', 'i')
2082         lwork = nrows_local
2083      CASE ('F', 'f', 'E', 'e')
2084         lwork = 1
2085      CASE DEFAULT
2086         CPABORT("mode input is not valid")
2087      END SELECT
2088      IF (matrix%use_sp) THEN
2089         ALLOCATE (work_sp(lwork))
2090         res_sp = pslange(mode, nrows, ncols, aa_sp, 1, 1, desca, work_sp)
2091         DEALLOCATE (work_sp)
2092         res = REAL(res_sp, KIND=dp)
2093      ELSE
2094         ALLOCATE (work(lwork))
2095         res = pdlange(mode, nrows, ncols, aa, 1, 1, desca, work)
2096         DEALLOCATE (work)
2097      END IF
2098#else
2099      SELECT CASE (mode)
2100      CASE ('M', 'm')
2101         lwork = 1
2102      CASE ('1', 'O', 'o')
2103         lwork = 1
2104      CASE ('I', 'i')
2105         lwork = nrows
2106      CASE ('F', 'f', 'E', 'e')
2107         lwork = 1
2108      CASE DEFAULT
2109         CPABORT("mode input is not valid")
2110      END SELECT
2111      IF (matrix%use_sp) THEN
2112         ALLOCATE (work_sp(lwork))
2113         lda = SIZE(aa_sp, 1)
2114         res_sp = slange(mode, nrows, ncols, aa_sp, lda, work_sp)
2115         DEALLOCATE (work_sp)
2116         res = REAL(res_sp, KIND=dp)
2117      ELSE
2118         ALLOCATE (work(lwork))
2119         lda = SIZE(aa, 1)
2120         res = dlange(mode, nrows, ncols, aa, lda, work)
2121         DEALLOCATE (work)
2122      END IF
2123#endif
2124
2125      CALL timestop(handle)
2126
2127   END FUNCTION cp_fm_norm
2128
2129! **************************************************************************************************
2130!> \brief trace of a matrix using pdlatra
2131!> \param matrix   : input a square matrix
2132!> \return : the trace
2133!> \author Lianheng Tong
2134! **************************************************************************************************
2135   FUNCTION cp_fm_latra(matrix) RESULT(res)
2136      TYPE(cp_fm_type), POINTER :: matrix
2137      REAL(KIND=dp) :: res
2138
2139      CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_latra', &
2140                                     routineP = moduleN//':'//routineN
2141
2142      INTEGER :: nrows, ncols, handle
2143      REAL(KIND=sp) :: res_sp
2144      REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa
2145      REAL(KIND=sp), DIMENSION(:, :), POINTER :: aa_sp
2146#if defined(__SCALAPACK)
2147      INTEGER, DIMENSION(9) :: desca
2148#else
2149      INTEGER :: ii
2150#endif
2151
2152      CALL timeset(routineN, handle)
2153
2154      nrows = matrix%matrix_struct%nrow_global
2155      ncols = matrix%matrix_struct%ncol_global
2156      CPASSERT(nrows .EQ. ncols)
2157      aa => matrix%local_data
2158      aa_sp => matrix%local_data_sp
2159
2160#if defined(__SCALAPACK)
2161      desca = matrix%matrix_struct%descriptor
2162      IF (matrix%use_sp) THEN
2163         res_sp = pslatra(nrows, aa_sp, 1, 1, desca)
2164         res = REAL(res_sp, KIND=dp)
2165      ELSE
2166         res = pdlatra(nrows, aa, 1, 1, desca)
2167      END IF
2168#else
2169      IF (matrix%use_sp) THEN
2170         res_sp = 0.0_sp
2171         DO ii = 1, nrows
2172            res_sp = res_sp + aa_sp(ii, ii)
2173         END DO
2174         res = REAL(res_sp, KIND=dp)
2175      ELSE
2176         res = 0.0_dp
2177         DO ii = 1, nrows
2178            res = res + aa(ii, ii)
2179         END DO
2180      END IF
2181#endif
2182
2183      CALL timestop(handle)
2184
2185   END FUNCTION cp_fm_latra
2186
2187! **************************************************************************************************
2188!> \brief compute a QR factorization with column pivoting of a M-by-N distributed matrix
2189!>        sub( A ) = A(IA:IA+M-1,JA:JA+N-1)
2190!> \param matrix   : input M-by-N distributed matrix sub( A ) which is to be factored
2191!> \param tau      :  scalar factors TAU of the elementary reflectors. TAU is tied to the distributed matrix A
2192!> \param nrow ...
2193!> \param ncol ...
2194!> \param first_row ...
2195!> \param first_col ...
2196!> \author MI
2197! **************************************************************************************************
2198
2199   SUBROUTINE cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
2200
2201      TYPE(cp_fm_type), POINTER                          :: matrix
2202      REAL(KIND=dp), DIMENSION(:), POINTER               :: tau
2203      INTEGER, INTENT(IN)                                :: nrow, ncol, first_row, first_col
2204
2205      CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_pdgeqpf', &
2206                                     routineP = moduleN//':'//routineN
2207
2208      INTEGER                                            :: handle
2209      INTEGER                                            :: info, lwork
2210      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ipiv
2211      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a
2212      REAL(KIND=dp), DIMENSION(:), POINTER               :: work
2213#if defined(__SCALAPACK)
2214      INTEGER, DIMENSION(9) :: descc
2215#else
2216      INTEGER :: ii
2217#endif
2218
2219      CALL timeset(routineN, handle)
2220
2221      a => matrix%local_data
2222      lwork = -1
2223      ALLOCATE (work(2*nrow))
2224      ALLOCATE (ipiv(ncol))
2225      info = 0
2226      tau = 0
2227
2228#if defined(__SCALAPACK)
2229      descc(:) = matrix%matrix_struct%descriptor(:)
2230      ! Call SCALAPACK routine to get optimal work dimension
2231      CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
2232      lwork = INT(work(1))
2233      DEALLOCATE (work)
2234      ALLOCATE (work(lwork))
2235      tau = 0.0_dp
2236      ipiv = 0
2237
2238      ! Call SCALAPACK routine to get QR decomposition of CTs
2239      CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
2240      CPASSERT(INFO == 0)
2241#else
2242      ! NIY
2243      ii = 1
2244      tau(first_row) = 0
2245      tau(first_col) = 0
2246#endif
2247
2248      DEALLOCATE (work)
2249      DEALLOCATE (ipiv)
2250
2251      CALL timestop(handle)
2252
2253   END SUBROUTINE cp_fm_pdgeqpf
2254
2255! **************************************************************************************************
2256!> \brief generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1)
2257!>         with orthonormal columns, which is defined as the first N columns of a product of K
2258!>         elementary reflectors of order M
2259!> \param matrix : On entry, the j-th column must contain the vector which defines the elementary reflector
2260!>                  as returned from PDGEQRF
2261!>                 On exit it contains  the M-by-N distributed matrix Q
2262!> \param tau :   contains the scalar factors TAU of elementary reflectors  as returned by PDGEQRF
2263!> \param nrow ...
2264!> \param first_row ...
2265!> \param first_col ...
2266!> \author MI
2267! **************************************************************************************************
2268
2269   SUBROUTINE cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
2270
2271      TYPE(cp_fm_type), POINTER                          :: matrix
2272      REAL(KIND=dp), DIMENSION(:), POINTER               :: tau
2273      INTEGER, INTENT(IN)                                :: nrow, first_row, first_col
2274
2275      CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_pdorgqr', &
2276                                     routineP = moduleN//':'//routineN
2277
2278      INTEGER                                            :: handle
2279      INTEGER                                            :: info, lwork
2280      REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a
2281      REAL(KIND=dp), DIMENSION(:), POINTER               :: work
2282#if defined(__SCALAPACK)
2283      INTEGER, DIMENSION(9) :: descc
2284#else
2285      INTEGER :: ii
2286#endif
2287
2288      CALL timeset(routineN, handle)
2289
2290      a => matrix%local_data
2291      lwork = -1
2292      ALLOCATE (work(2*nrow))
2293      info = 0
2294
2295#if defined(__SCALAPACK)
2296      descc(:) = matrix%matrix_struct%descriptor(:)
2297
2298      CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
2299      CPASSERT(info == 0)
2300      lwork = INT(work(1))
2301      DEALLOCATE (work)
2302      ALLOCATE (work(lwork))
2303
2304      ! Call SCALAPACK routine to get Q
2305      CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
2306      CPASSERT(INFO == 0)
2307#else
2308      ! NIY
2309      ii = 1
2310      tau(first_row) = 0
2311      tau(first_col) = 0
2312#endif
2313
2314      DEALLOCATE (work)
2315      CALL timestop(handle)
2316
2317   END SUBROUTINE cp_fm_pdorgqr
2318
2319END MODULE cp_fm_basic_linalg
2320