1*> \brief \b ZGETSQRHRT
2*
3*  =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6*            http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download ZGETSQRHRT + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgetsqrhrt.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgetsqrhrt.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgetsqrhrt.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18*  Definition:
19*  ===========
20*
21*       SUBROUTINE ZGETSQRHRT( M, N, MB1, NB1, NB2, A, LDA, T, LDT, WORK,
22*      $                       LWORK, INFO )
23*       IMPLICIT NONE
24*
25*       .. Scalar Arguments ..
26*       INTEGER           INFO, LDA, LDT, LWORK, M, N, NB1, NB2, MB1
27*       ..
28*       .. Array Arguments ..
29*       COMPLEX*16        A( LDA, * ), T( LDT, * ), WORK( * )
30*       ..
31*
32*
33*> \par Purpose:
34*  =============
35*>
36*> \verbatim
37*>
38*> ZGETSQRHRT computes a NB2-sized column blocked QR-factorization
39*> of a complex M-by-N matrix A with M >= N,
40*>
41*>    A = Q * R.
42*>
43*> The routine uses internally a NB1-sized column blocked and MB1-sized
44*> row blocked TSQR-factorization and perfors the reconstruction
45*> of the Householder vectors from the TSQR output. The routine also
46*> converts the R_tsqr factor from the TSQR-factorization output into
47*> the R factor that corresponds to the Householder QR-factorization,
48*>
49*>    A = Q_tsqr * R_tsqr = Q * R.
50*>
51*> The output Q and R factors are stored in the same format as in ZGEQRT
52*> (Q is in blocked compact WY-representation). See the documentation
53*> of ZGEQRT for more details on the format.
54*> \endverbatim
55*
56*  Arguments:
57*  ==========
58*
59*> \param[in] M
60*> \verbatim
61*>          M is INTEGER
62*>          The number of rows of the matrix A.  M >= 0.
63*> \endverbatim
64*>
65*> \param[in] N
66*> \verbatim
67*>          N is INTEGER
68*>          The number of columns of the matrix A. M >= N >= 0.
69*> \endverbatim
70*>
71*> \param[in] MB1
72*> \verbatim
73*>          MB1 is INTEGER
74*>          The row block size to be used in the blocked TSQR.
75*>          MB1 > N.
76*> \endverbatim
77*>
78*> \param[in] NB1
79*> \verbatim
80*>          NB1 is INTEGER
81*>          The column block size to be used in the blocked TSQR.
82*>          N >= NB1 >= 1.
83*> \endverbatim
84*>
85*> \param[in] NB2
86*> \verbatim
87*>          NB2 is INTEGER
88*>          The block size to be used in the blocked QR that is
89*>          output. NB2 >= 1.
90*> \endverbatim
91*>
92*> \param[in,out] A
93*> \verbatim
94*>          A is COMPLEX*16 array, dimension (LDA,N)
95*>
96*>          On entry: an M-by-N matrix A.
97*>
98*>          On exit:
99*>           a) the elements on and above the diagonal
100*>              of the array contain the N-by-N upper-triangular
101*>              matrix R corresponding to the Householder QR;
102*>           b) the elements below the diagonal represent Q by
103*>              the columns of blocked V (compact WY-representation).
104*> \endverbatim
105*>
106*> \param[in] LDA
107*> \verbatim
108*>          LDA is INTEGER
109*>          The leading dimension of the array A.  LDA >= max(1,M).
110*> \endverbatim
111*>
112*> \param[out] T
113*> \verbatim
114*>          T is COMPLEX*16 array, dimension (LDT,N))
115*>          The upper triangular block reflectors stored in compact form
116*>          as a sequence of upper triangular blocks.
117*> \endverbatim
118*>
119*> \param[in] LDT
120*> \verbatim
121*>          LDT is INTEGER
122*>          The leading dimension of the array T.  LDT >= NB2.
123*> \endverbatim
124*>
125*> \param[out] WORK
126*> \verbatim
127*>          (workspace) COMPLEX*16 array, dimension (MAX(1,LWORK))
128*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
129*> \endverbatim
130*>
131*> \param[in] LWORK
132*> \verbatim
133*>          The dimension of the array WORK.
134*>          LWORK >= MAX( LWT + LW1, MAX( LWT+N*N+LW2, LWT+N*N+N ) ),
135*>          where
136*>             NUM_ALL_ROW_BLOCKS = CEIL((M-N)/(MB1-N)),
137*>             NB1LOCAL = MIN(NB1,N).
138*>             LWT = NUM_ALL_ROW_BLOCKS * N * NB1LOCAL,
139*>             LW1 = NB1LOCAL * N,
140*>             LW2 = NB1LOCAL * MAX( NB1LOCAL, ( N - NB1LOCAL ) ),
141*>          If LWORK = -1, then a workspace query is assumed.
142*>          The routine only calculates the optimal size of the WORK
143*>          array, returns this value as the first entry of the WORK
144*>          array, and no error message related to LWORK is issued
145*>          by XERBLA.
146*> \endverbatim
147*>
148*> \param[out] INFO
149*> \verbatim
150*>          INFO is INTEGER
151*>          = 0:  successful exit
152*>          < 0:  if INFO = -i, the i-th argument had an illegal value
153*> \endverbatim
154*
155*  Authors:
156*  ========
157*
158*> \author Univ. of Tennessee
159*> \author Univ. of California Berkeley
160*> \author Univ. of Colorado Denver
161*> \author NAG Ltd.
162*
163*> \ingroup comlpex16OTHERcomputational
164*
165*> \par Contributors:
166*  ==================
167*>
168*> \verbatim
169*>
170*> November 2020, Igor Kozachenko,
171*>                Computer Science Division,
172*>                University of California, Berkeley
173*>
174*> \endverbatim
175*>
176*  =====================================================================
177      SUBROUTINE ZGETSQRHRT( M, N, MB1, NB1, NB2, A, LDA, T, LDT, WORK,
178     $                       LWORK, INFO )
179      IMPLICIT NONE
180*
181*  -- LAPACK computational routine --
182*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
183*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
184*
185*     .. Scalar Arguments ..
186      INTEGER           INFO, LDA, LDT, LWORK, M, N, NB1, NB2, MB1
187*     ..
188*     .. Array Arguments ..
189      COMPLEX*16        A( LDA, * ), T( LDT, * ), WORK( * )
190*     ..
191*
192*  =====================================================================
193*
194*     .. Parameters ..
195      COMPLEX*16         CONE
196      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ) )
197*     ..
198*     .. Local Scalars ..
199      LOGICAL            LQUERY
200      INTEGER            I, IINFO, J, LW1, LW2, LWT, LDWT, LWORKOPT,
201     $                   NB1LOCAL, NB2LOCAL, NUM_ALL_ROW_BLOCKS
202*     ..
203*     .. External Subroutines ..
204      EXTERNAL           ZCOPY, ZLATSQR, ZUNGTSQR_ROW, ZUNHR_COL,
205     $                   XERBLA
206*     ..
207*     .. Intrinsic Functions ..
208      INTRINSIC          CEILING, DBLE, DCMPLX, MAX, MIN
209*     ..
210*     .. Executable Statements ..
211*
212*     Test the input arguments
213*
214      INFO = 0
215      LQUERY  = LWORK.EQ.-1
216      IF( M.LT.0 ) THEN
217         INFO = -1
218      ELSE IF( N.LT.0 .OR. M.LT.N ) THEN
219         INFO = -2
220      ELSE IF( MB1.LE.N ) THEN
221         INFO = -3
222      ELSE IF( NB1.LT.1 ) THEN
223         INFO = -4
224      ELSE IF( NB2.LT.1 ) THEN
225         INFO = -5
226      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
227         INFO = -7
228      ELSE IF( LDT.LT.MAX( 1,  MIN( NB2, N ) ) ) THEN
229         INFO = -9
230      ELSE
231*
232*        Test the input LWORK for the dimension of the array WORK.
233*        This workspace is used to store array:
234*        a) Matrix T and WORK for ZLATSQR;
235*        b) N-by-N upper-triangular factor R_tsqr;
236*        c) Matrix T and array WORK for ZUNGTSQR_ROW;
237*        d) Diagonal D for ZUNHR_COL.
238*
239         IF( LWORK.LT.N*N+1 .AND. .NOT.LQUERY ) THEN
240            INFO = -11
241         ELSE
242*
243*           Set block size for column blocks
244*
245            NB1LOCAL = MIN( NB1, N )
246*
247            NUM_ALL_ROW_BLOCKS = MAX( 1,
248     $                   CEILING( DBLE( M - N ) / DBLE( MB1 - N ) ) )
249*
250*           Length and leading dimension of WORK array to place
251*           T array in TSQR.
252*
253            LWT = NUM_ALL_ROW_BLOCKS * N * NB1LOCAL
254
255            LDWT = NB1LOCAL
256*
257*           Length of TSQR work array
258*
259            LW1 = NB1LOCAL * N
260*
261*           Length of ZUNGTSQR_ROW work array.
262*
263            LW2 = NB1LOCAL * MAX( NB1LOCAL, ( N - NB1LOCAL ) )
264*
265            LWORKOPT = MAX( LWT + LW1, MAX( LWT+N*N+LW2, LWT+N*N+N ) )
266*
267            IF( ( LWORK.LT.MAX( 1, LWORKOPT ) ).AND.(.NOT.LQUERY) ) THEN
268               INFO = -11
269            END IF
270*
271         END IF
272      END IF
273*
274*     Handle error in the input parameters and return workspace query.
275*
276      IF( INFO.NE.0 ) THEN
277         CALL XERBLA( 'ZGETSQRHRT', -INFO )
278         RETURN
279      ELSE IF ( LQUERY ) THEN
280         WORK( 1 ) = DCMPLX( LWORKOPT )
281         RETURN
282      END IF
283*
284*     Quick return if possible
285*
286      IF( MIN( M, N ).EQ.0 ) THEN
287         WORK( 1 ) = DCMPLX( LWORKOPT )
288         RETURN
289      END IF
290*
291      NB2LOCAL = MIN( NB2, N )
292*
293*
294*     (1) Perform TSQR-factorization of the M-by-N matrix A.
295*
296      CALL ZLATSQR( M, N, MB1, NB1LOCAL, A, LDA, WORK, LDWT,
297     $              WORK(LWT+1), LW1, IINFO )
298*
299*     (2) Copy the factor R_tsqr stored in the upper-triangular part
300*         of A into the square matrix in the work array
301*         WORK(LWT+1:LWT+N*N) column-by-column.
302*
303      DO J = 1, N
304         CALL ZCOPY( J, A( 1, J ), 1, WORK( LWT + N*(J-1)+1 ), 1 )
305      END DO
306*
307*     (3) Generate a M-by-N matrix Q with orthonormal columns from
308*     the result stored below the diagonal in the array A in place.
309*
310
311      CALL ZUNGTSQR_ROW( M, N, MB1, NB1LOCAL, A, LDA, WORK, LDWT,
312     $                   WORK( LWT+N*N+1 ), LW2, IINFO )
313*
314*     (4) Perform the reconstruction of Householder vectors from
315*     the matrix Q (stored in A) in place.
316*
317      CALL ZUNHR_COL( M, N, NB2LOCAL, A, LDA, T, LDT,
318     $                WORK( LWT+N*N+1 ), IINFO )
319*
320*     (5) Copy the factor R_tsqr stored in the square matrix in the
321*     work array WORK(LWT+1:LWT+N*N) into the upper-triangular
322*     part of A.
323*
324*     (6) Compute from R_tsqr the factor R_hr corresponding to
325*     the reconstructed Householder vectors, i.e. R_hr = S * R_tsqr.
326*     This multiplication by the sign matrix S on the left means
327*     changing the sign of I-th row of the matrix R_tsqr according
328*     to sign of the I-th diagonal element DIAG(I) of the matrix S.
329*     DIAG is stored in WORK( LWT+N*N+1 ) from the ZUNHR_COL output.
330*
331*     (5) and (6) can be combined in a single loop, so the rows in A
332*     are accessed only once.
333*
334      DO I = 1, N
335         IF( WORK( LWT+N*N+I ).EQ.-CONE ) THEN
336            DO J = I, N
337               A( I, J ) = -CONE * WORK( LWT+N*(J-1)+I )
338            END DO
339         ELSE
340            CALL ZCOPY( N-I+1, WORK(LWT+N*(I-1)+I), N, A( I, I ), LDA )
341         END IF
342      END DO
343*
344      WORK( 1 ) = DCMPLX( LWORKOPT )
345      RETURN
346*
347*     End of ZGETSQRHRT
348*
349      END