1*> \brief \b ZHETRD_HE2HB
2*
3*  @precisions fortran z -> s d c
4*
5*  =========== DOCUMENTATION ===========
6*
7* Online html documentation available at
8*            http://www.netlib.org/lapack/explore-html/
9*
10*> \htmlonly
11*> Download ZHETRD_HE2HB + dependencies
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetrd_he2hb.f">
13*> [TGZ]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetrd_he2hb.f">
15*> [ZIP]</a>
16*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetrd_he2hb.f">
17*> [TXT]</a>
18*> \endhtmlonly
19*
20*  Definition:
21*  ===========
22*
23*       SUBROUTINE ZHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU,
24*                              WORK, LWORK, INFO )
25*
26*       IMPLICIT NONE
27*
28*       .. Scalar Arguments ..
29*       CHARACTER          UPLO
30*       INTEGER            INFO, LDA, LDAB, LWORK, N, KD
31*       ..
32*       .. Array Arguments ..
33*       COMPLEX*16         A( LDA, * ), AB( LDAB, * ),
34*                          TAU( * ), WORK( * )
35*       ..
36*
37*
38*> \par Purpose:
39*  =============
40*>
41*> \verbatim
42*>
43*> ZHETRD_HE2HB reduces a complex Hermitian matrix A to complex Hermitian
44*> band-diagonal form AB by a unitary similarity transformation:
45*> Q**H * A * Q = AB.
46*> \endverbatim
47*
48*  Arguments:
49*  ==========
50*
51*> \param[in] UPLO
52*> \verbatim
53*>          UPLO is CHARACTER*1
54*>          = 'U':  Upper triangle of A is stored;
55*>          = 'L':  Lower triangle of A is stored.
56*> \endverbatim
57*>
58*> \param[in] N
59*> \verbatim
60*>          N is INTEGER
61*>          The order of the matrix A.  N >= 0.
62*> \endverbatim
63*>
64*> \param[in] KD
65*> \verbatim
66*>          KD is INTEGER
67*>          The number of superdiagonals of the reduced matrix if UPLO = 'U',
68*>          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
69*>          The reduced matrix is stored in the array AB.
70*> \endverbatim
71*>
72*> \param[in,out] A
73*> \verbatim
74*>          A is COMPLEX*16 array, dimension (LDA,N)
75*>          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
76*>          N-by-N upper triangular part of A contains the upper
77*>          triangular part of the matrix A, and the strictly lower
78*>          triangular part of A is not referenced.  If UPLO = 'L', the
79*>          leading N-by-N lower triangular part of A contains the lower
80*>          triangular part of the matrix A, and the strictly upper
81*>          triangular part of A is not referenced.
82*>          On exit, if UPLO = 'U', the diagonal and first superdiagonal
83*>          of A are overwritten by the corresponding elements of the
84*>          tridiagonal matrix T, and the elements above the first
85*>          superdiagonal, with the array TAU, represent the unitary
86*>          matrix Q as a product of elementary reflectors; if UPLO
87*>          = 'L', the diagonal and first subdiagonal of A are over-
88*>          written by the corresponding elements of the tridiagonal
89*>          matrix T, and the elements below the first subdiagonal, with
90*>          the array TAU, represent the unitary matrix Q as a product
91*>          of elementary reflectors. See Further Details.
92*> \endverbatim
93*>
94*> \param[in] LDA
95*> \verbatim
96*>          LDA is INTEGER
97*>          The leading dimension of the array A.  LDA >= max(1,N).
98*> \endverbatim
99*>
100*> \param[out] AB
101*> \verbatim
102*>          AB is COMPLEX*16 array, dimension (LDAB,N)
103*>          On exit, the upper or lower triangle of the Hermitian band
104*>          matrix A, stored in the first KD+1 rows of the array.  The
105*>          j-th column of A is stored in the j-th column of the array AB
106*>          as follows:
107*>          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
108*>          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
109*> \endverbatim
110*>
111*> \param[in] LDAB
112*> \verbatim
113*>          LDAB is INTEGER
114*>          The leading dimension of the array AB.  LDAB >= KD+1.
115*> \endverbatim
116*>
117*> \param[out] TAU
118*> \verbatim
119*>          TAU is COMPLEX*16 array, dimension (N-KD)
120*>          The scalar factors of the elementary reflectors (see Further
121*>          Details).
122*> \endverbatim
123*>
124*> \param[out] WORK
125*> \verbatim
126*>          WORK is COMPLEX*16 array, dimension (LWORK)
127*>          On exit, if INFO = 0, or if LWORK=-1,
128*>          WORK(1) returns the size of LWORK.
129*> \endverbatim
130*>
131*> \param[in] LWORK
132*> \verbatim
133*>          LWORK is INTEGER
134*>          The dimension of the array WORK which should be calculated
135*>          by a workspace query. LWORK = MAX(1, LWORK_QUERY)
136*>          If LWORK = -1, then a workspace query is assumed; the routine
137*>          only calculates the optimal size of the WORK array, returns
138*>          this value as the first entry of the WORK array, and no error
139*>          message related to LWORK is issued by XERBLA.
140*>          LWORK_QUERY = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
141*>          where FACTOPTNB is the blocking used by the QR or LQ
142*>          algorithm, usually FACTOPTNB=128 is a good choice otherwise
143*>          putting LWORK=-1 will provide the size of WORK.
144*> \endverbatim
145*>
146*> \param[out] INFO
147*> \verbatim
148*>          INFO is INTEGER
149*>          = 0:  successful exit
150*>          < 0:  if INFO = -i, the i-th argument had an illegal value
151*> \endverbatim
152*
153*  Authors:
154*  ========
155*
156*> \author Univ. of Tennessee
157*> \author Univ. of California Berkeley
158*> \author Univ. of Colorado Denver
159*> \author NAG Ltd.
160*
161*> \ingroup complex16HEcomputational
162*
163*> \par Further Details:
164*  =====================
165*>
166*> \verbatim
167*>
168*>  Implemented by Azzam Haidar.
169*>
170*>  All details are available on technical report, SC11, SC13 papers.
171*>
172*>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
173*>  Parallel reduction to condensed forms for symmetric eigenvalue problems
174*>  using aggregated fine-grained and memory-aware kernels. In Proceedings
175*>  of 2011 International Conference for High Performance Computing,
176*>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
177*>  Article 8 , 11 pages.
178*>  http://doi.acm.org/10.1145/2063384.2063394
179*>
180*>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
181*>  An improved parallel singular value algorithm and its implementation
182*>  for multicore hardware, In Proceedings of 2013 International Conference
183*>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
184*>  Denver, Colorado, USA, 2013.
185*>  Article 90, 12 pages.
186*>  http://doi.acm.org/10.1145/2503210.2503292
187*>
188*>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
189*>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure
190*>  calculations based on fine-grained memory aware tasks.
191*>  International Journal of High Performance Computing Applications.
192*>  Volume 28 Issue 2, Pages 196-209, May 2014.
193*>  http://hpc.sagepub.com/content/28/2/196
194*>
195*> \endverbatim
196*>
197*> \verbatim
198*>
199*>  If UPLO = 'U', the matrix Q is represented as a product of elementary
200*>  reflectors
201*>
202*>     Q = H(k)**H . . . H(2)**H H(1)**H, where k = n-kd.
203*>
204*>  Each H(i) has the form
205*>
206*>     H(i) = I - tau * v * v**H
207*>
208*>  where tau is a complex scalar, and v is a complex vector with
209*>  v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in
210*>  A(i,i+kd+1:n), and tau in TAU(i).
211*>
212*>  If UPLO = 'L', the matrix Q is represented as a product of elementary
213*>  reflectors
214*>
215*>     Q = H(1) H(2) . . . H(k), where k = n-kd.
216*>
217*>  Each H(i) has the form
218*>
219*>     H(i) = I - tau * v * v**H
220*>
221*>  where tau is a complex scalar, and v is a complex vector with
222*>  v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in
223*>  A(i+kd+2:n,i), and tau in TAU(i).
224*>
225*>  The contents of A on exit are illustrated by the following examples
226*>  with n = 5:
227*>
228*>  if UPLO = 'U':                       if UPLO = 'L':
229*>
230*>    (  ab  ab/v1  v1      v1     v1    )              (  ab                            )
231*>    (      ab     ab/v2   v2     v2    )              (  ab/v1  ab                     )
232*>    (             ab      ab/v3  v3    )              (  v1     ab/v2  ab              )
233*>    (                     ab     ab/v4 )              (  v1     v2     ab/v3  ab       )
234*>    (                            ab    )              (  v1     v2     v3     ab/v4 ab )
235*>
236*>  where d and e denote diagonal and off-diagonal elements of T, and vi
237*>  denotes an element of the vector defining H(i).
238*> \endverbatim
239*>
240*  =====================================================================
241      SUBROUTINE ZHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU,
242     $                         WORK, LWORK, INFO )
243*
244      IMPLICIT NONE
245*
246*  -- LAPACK computational routine --
247*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
248*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
249*
250*     .. Scalar Arguments ..
251      CHARACTER          UPLO
252      INTEGER            INFO, LDA, LDAB, LWORK, N, KD
253*     ..
254*     .. Array Arguments ..
255      COMPLEX*16         A( LDA, * ), AB( LDAB, * ),
256     $                   TAU( * ), WORK( * )
257*     ..
258*
259*  =====================================================================
260*
261*     .. Parameters ..
262      DOUBLE PRECISION   RONE
263      COMPLEX*16         ZERO, ONE, HALF
264      PARAMETER          ( RONE = 1.0D+0,
265     $                   ZERO = ( 0.0D+0, 0.0D+0 ),
266     $                   ONE = ( 1.0D+0, 0.0D+0 ),
267     $                   HALF = ( 0.5D+0, 0.0D+0 ) )
268*     ..
269*     .. Local Scalars ..
270      LOGICAL            LQUERY, UPPER
271      INTEGER            I, J, IINFO, LWMIN, PN, PK, LK,
272     $                   LDT, LDW, LDS2, LDS1,
273     $                   LS2, LS1, LW, LT,
274     $                   TPOS, WPOS, S2POS, S1POS
275*     ..
276*     .. External Subroutines ..
277      EXTERNAL           XERBLA, ZHER2K, ZHEMM, ZGEMM, ZCOPY,
278     $                   ZLARFT, ZGELQF, ZGEQRF, ZLASET
279*     ..
280*     .. Intrinsic Functions ..
281      INTRINSIC          MIN, MAX
282*     ..
283*     .. External Functions ..
284      LOGICAL            LSAME
285      INTEGER            ILAENV2STAGE
286      EXTERNAL           LSAME, ILAENV2STAGE
287*     ..
288*     .. Executable Statements ..
289*
290*     Determine the minimal workspace size required
291*     and test the input parameters
292*
293      INFO   = 0
294      UPPER  = LSAME( UPLO, 'U' )
295      LQUERY = ( LWORK.EQ.-1 )
296      LWMIN  = ILAENV2STAGE( 4, 'ZHETRD_HE2HB', '', N, KD, -1, -1 )
297
298      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
299         INFO = -1
300      ELSE IF( N.LT.0 ) THEN
301         INFO = -2
302      ELSE IF( KD.LT.0 ) THEN
303         INFO = -3
304      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
305         INFO = -5
306      ELSE IF( LDAB.LT.MAX( 1, KD+1 ) ) THEN
307         INFO = -7
308      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
309         INFO = -10
310      END IF
311*
312      IF( INFO.NE.0 ) THEN
313         CALL XERBLA( 'ZHETRD_HE2HB', -INFO )
314         RETURN
315      ELSE IF( LQUERY ) THEN
316         WORK( 1 ) = LWMIN
317         RETURN
318      END IF
319*
320*     Quick return if possible
321*     Copy the upper/lower portion of A into AB
322*
323      IF( N.LE.KD+1 ) THEN
324          IF( UPPER ) THEN
325              DO 100 I = 1, N
326                  LK = MIN( KD+1, I )
327                  CALL ZCOPY( LK, A( I-LK+1, I ), 1,
328     $                            AB( KD+1-LK+1, I ), 1 )
329  100         CONTINUE
330          ELSE
331              DO 110 I = 1, N
332                  LK = MIN( KD+1, N-I+1 )
333                  CALL ZCOPY( LK, A( I, I ), 1, AB( 1, I ), 1 )
334  110         CONTINUE
335          ENDIF
336          WORK( 1 ) = 1
337          RETURN
338      END IF
339*
340*     Determine the pointer position for the workspace
341*
342      LDT    = KD
343      LDS1   = KD
344      LT     = LDT*KD
345      LW     = N*KD
346      LS1    = LDS1*KD
347      LS2    = LWMIN - LT - LW - LS1
348*      LS2 = N*MAX(KD,FACTOPTNB)
349      TPOS   = 1
350      WPOS   = TPOS  + LT
351      S1POS  = WPOS  + LW
352      S2POS  = S1POS + LS1
353      IF( UPPER ) THEN
354          LDW    = KD
355          LDS2   = KD
356      ELSE
357          LDW    = N
358          LDS2   = N
359      ENDIF
360*
361*
362*     Set the workspace of the triangular matrix T to zero once such a
363*     way every time T is generated the upper/lower portion will be always zero
364*
365      CALL ZLASET( "A", LDT, KD, ZERO, ZERO, WORK( TPOS ), LDT )
366*
367      IF( UPPER ) THEN
368          DO 10 I = 1, N - KD, KD
369             PN = N-I-KD+1
370             PK = MIN( N-I-KD+1, KD )
371*
372*            Compute the LQ factorization of the current block
373*
374             CALL ZGELQF( KD, PN, A( I, I+KD ), LDA,
375     $                    TAU( I ), WORK( S2POS ), LS2, IINFO )
376*
377*            Copy the upper portion of A into AB
378*
379             DO 20 J = I, I+PK-1
380                LK = MIN( KD, N-J ) + 1
381                CALL ZCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 )
382   20        CONTINUE
383*
384             CALL ZLASET( 'Lower', PK, PK, ZERO, ONE,
385     $                    A( I, I+KD ), LDA )
386*
387*            Form the matrix T
388*
389             CALL ZLARFT( 'Forward', 'Rowwise', PN, PK,
390     $                    A( I, I+KD ), LDA, TAU( I ),
391     $                    WORK( TPOS ), LDT )
392*
393*            Compute W:
394*
395             CALL ZGEMM( 'Conjugate', 'No transpose', PK, PN, PK,
396     $                   ONE,  WORK( TPOS ), LDT,
397     $                         A( I, I+KD ), LDA,
398     $                   ZERO, WORK( S2POS ), LDS2 )
399*
400             CALL ZHEMM( 'Right', UPLO, PK, PN,
401     $                   ONE,  A( I+KD, I+KD ), LDA,
402     $                         WORK( S2POS ), LDS2,
403     $                   ZERO, WORK( WPOS ), LDW )
404*
405             CALL ZGEMM( 'No transpose', 'Conjugate', PK, PK, PN,
406     $                   ONE,  WORK( WPOS ), LDW,
407     $                         WORK( S2POS ), LDS2,
408     $                   ZERO, WORK( S1POS ), LDS1 )
409*
410             CALL ZGEMM( 'No transpose', 'No transpose', PK, PN, PK,
411     $                   -HALF, WORK( S1POS ), LDS1,
412     $                          A( I, I+KD ), LDA,
413     $                   ONE,   WORK( WPOS ), LDW )
414*
415*
416*            Update the unreduced submatrix A(i+kd:n,i+kd:n), using
417*            an update of the form:  A := A - V'*W - W'*V
418*
419             CALL ZHER2K( UPLO, 'Conjugate', PN, PK,
420     $                    -ONE, A( I, I+KD ), LDA,
421     $                          WORK( WPOS ), LDW,
422     $                    RONE, A( I+KD, I+KD ), LDA )
423   10     CONTINUE
424*
425*        Copy the upper band to AB which is the band storage matrix
426*
427         DO 30 J = N-KD+1, N
428            LK = MIN(KD, N-J) + 1
429            CALL ZCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 )
430   30    CONTINUE
431*
432      ELSE
433*
434*         Reduce the lower triangle of A to lower band matrix
435*
436          DO 40 I = 1, N - KD, KD
437             PN = N-I-KD+1
438             PK = MIN( N-I-KD+1, KD )
439*
440*            Compute the QR factorization of the current block
441*
442             CALL ZGEQRF( PN, KD, A( I+KD, I ), LDA,
443     $                    TAU( I ), WORK( S2POS ), LS2, IINFO )
444*
445*            Copy the upper portion of A into AB
446*
447             DO 50 J = I, I+PK-1
448                LK = MIN( KD, N-J ) + 1
449                CALL ZCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 )
450   50        CONTINUE
451*
452             CALL ZLASET( 'Upper', PK, PK, ZERO, ONE,
453     $                    A( I+KD, I ), LDA )
454*
455*            Form the matrix T
456*
457             CALL ZLARFT( 'Forward', 'Columnwise', PN, PK,
458     $                    A( I+KD, I ), LDA, TAU( I ),
459     $                    WORK( TPOS ), LDT )
460*
461*            Compute W:
462*
463             CALL ZGEMM( 'No transpose', 'No transpose', PN, PK, PK,
464     $                   ONE, A( I+KD, I ), LDA,
465     $                         WORK( TPOS ), LDT,
466     $                   ZERO, WORK( S2POS ), LDS2 )
467*
468             CALL ZHEMM( 'Left', UPLO, PN, PK,
469     $                   ONE, A( I+KD, I+KD ), LDA,
470     $                         WORK( S2POS ), LDS2,
471     $                   ZERO, WORK( WPOS ), LDW )
472*
473             CALL ZGEMM( 'Conjugate', 'No transpose', PK, PK, PN,
474     $                   ONE, WORK( S2POS ), LDS2,
475     $                         WORK( WPOS ), LDW,
476     $                   ZERO, WORK( S1POS ), LDS1 )
477*
478             CALL ZGEMM( 'No transpose', 'No transpose', PN, PK, PK,
479     $                   -HALF, A( I+KD, I ), LDA,
480     $                         WORK( S1POS ), LDS1,
481     $                   ONE, WORK( WPOS ), LDW )
482*
483*
484*            Update the unreduced submatrix A(i+kd:n,i+kd:n), using
485*            an update of the form:  A := A - V*W' - W*V'
486*
487             CALL ZHER2K( UPLO, 'No transpose', PN, PK,
488     $                    -ONE, A( I+KD, I ), LDA,
489     $                           WORK( WPOS ), LDW,
490     $                    RONE, A( I+KD, I+KD ), LDA )
491*            ==================================================================
492*            RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED
493*             DO 45 J = I, I+PK-1
494*                LK = MIN( KD, N-J ) + 1
495*                CALL ZCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 )
496*   45        CONTINUE
497*            ==================================================================
498   40     CONTINUE
499*
500*        Copy the lower band to AB which is the band storage matrix
501*
502         DO 60 J = N-KD+1, N
503            LK = MIN(KD, N-J) + 1
504            CALL ZCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 )
505   60    CONTINUE
506
507      END IF
508*
509      WORK( 1 ) = LWMIN
510      RETURN
511*
512*     End of ZHETRD_HE2HB
513*
514      END
515