1 #include "relapack.h"
2
3 static void RELAPACK_clauum_rec(const char *, const blasint *, float *,
4 const blasint *, blasint *);
5
6
7 /** CLAUUM computes the product U * U**H or L**H * L, where the triangular factor U or L is stored in the upper or lower triangular part of the array A.
8 *
9 * This routine is functionally equivalent to LAPACK's clauum.
10 * For details on its interface, see
11 * http://www.netlib.org/lapack/explore-html/d2/d36/clauum_8f.html
12 * */
RELAPACK_clauum(const char * uplo,const blasint * n,float * A,const blasint * ldA,blasint * info)13 void RELAPACK_clauum(
14 const char *uplo, const blasint *n,
15 float *A, const blasint *ldA,
16 blasint *info
17 ) {
18
19 // Check arguments
20 const blasint lower = LAPACK(lsame)(uplo, "L");
21 const blasint upper = LAPACK(lsame)(uplo, "U");
22 *info = 0;
23 if (!lower && !upper)
24 *info = -1;
25 else if (*n < 0)
26 *info = -2;
27 else if (*ldA < MAX(1, *n))
28 *info = -4;
29 if (*info) {
30 const blasint minfo = -*info;
31 LAPACK(xerbla)("CLAUUM", &minfo, strlen("CLAUUM"));
32 return;
33 }
34
35 if (*n == 0) return;
36
37 // Clean char * arguments
38 const char cleanuplo = lower ? 'L' : 'U';
39
40 // Recursive kernel
41 RELAPACK_clauum_rec(&cleanuplo, n, A, ldA, info);
42 }
43
44
45 /** clauum's recursive compute kernel */
RELAPACK_clauum_rec(const char * uplo,const blasint * n,float * A,const blasint * ldA,blasint * info)46 static void RELAPACK_clauum_rec(
47 const char *uplo, const blasint *n,
48 float *A, const blasint *ldA,
49 blasint *info
50 ) {
51
52 if (*n <= MAX(CROSSOVER_CLAUUM, 1)) {
53 // Unblocked
54 LAPACK(clauu2)(uplo, n, A, ldA, info);
55 return;
56 }
57
58 // Constants
59 const float ONE[] = { 1., 0. };
60
61 // Splitting
62 const blasint n1 = CREC_SPLIT(*n);
63 const blasint n2 = *n - n1;
64
65 // A_TL A_TR
66 // A_BL A_BR
67 float *const A_TL = A;
68 float *const A_TR = A + 2 * *ldA * n1;
69 float *const A_BL = A + 2 * n1;
70 float *const A_BR = A + 2 * *ldA * n1 + 2 * n1;
71
72 // recursion(A_TL)
73 RELAPACK_clauum_rec(uplo, &n1, A_TL, ldA, info);
74
75 if (*uplo == 'L') {
76 // A_TL = A_TL + A_BL' * A_BL
77 BLAS(cherk)("L", "C", &n1, &n2, ONE, A_BL, ldA, ONE, A_TL, ldA);
78 // A_BL = A_BR' * A_BL
79 BLAS(ctrmm)("L", "L", "C", "N", &n2, &n1, ONE, A_BR, ldA, A_BL, ldA);
80 } else {
81 // A_TL = A_TL + A_TR * A_TR'
82 BLAS(cherk)("U", "N", &n1, &n2, ONE, A_TR, ldA, ONE, A_TL, ldA);
83 // A_TR = A_TR * A_BR'
84 BLAS(ctrmm)("R", "U", "C", "N", &n1, &n2, ONE, A_BR, ldA, A_TR, ldA);
85 }
86
87 // recursion(A_BR)
88 RELAPACK_clauum_rec(uplo, &n2, A_BR, ldA, info);
89 }
90