1*> \brief \b ZUNGTSQR_ROW
2*
3*  =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6*            http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download ZUNGTSQR_ROW + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zunrgtsqr_row.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zunrgtsqr_row.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zunrgtsqr_row.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18*  Definition:
19*  ===========
20*
21*       SUBROUTINE ZUNGTSQR_ROW( M, N, MB, NB, A, LDA, T, LDT, WORK,
22*      $                         LWORK, INFO )
23*       IMPLICIT NONE
24*
25*       .. Scalar Arguments ..
26*       INTEGER           INFO, LDA, LDT, LWORK, M, N, MB, NB
27*       ..
28*       .. Array Arguments ..
29*       COMPLEX*16        A( LDA, * ), T( LDT, * ), WORK( * )
30*       ..
31*
32*> \par Purpose:
33*  =============
34*>
35*> \verbatim
36*>
37*> ZUNGTSQR_ROW generates an M-by-N complex matrix Q_out with
38*> orthonormal columns from the output of ZLATSQR. These N orthonormal
39*> columns are the first N columns of a product of complex unitary
40*> matrices Q(k)_in of order M, which are returned by ZLATSQR in
41*> a special format.
42*>
43*>      Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
44*>
45*> The input matrices Q(k)_in are stored in row and column blocks in A.
46*> See the documentation of ZLATSQR for more details on the format of
47*> Q(k)_in, where each Q(k)_in is represented by block Householder
48*> transformations. This routine calls an auxiliary routine ZLARFB_GETT,
49*> where the computation is performed on each individual block. The
50*> algorithm first sweeps NB-sized column blocks from the right to left
51*> starting in the bottom row block and continues to the top row block
52*> (hence _ROW in the routine name). This sweep is in reverse order of
53*> the order in which ZLATSQR generates the output blocks.
54*> \endverbatim
55*
56*  Arguments:
57*  ==========
58*
59*> \param[in] M
60*> \verbatim
61*>          M is INTEGER
62*>          The number of rows of the matrix A.  M >= 0.
63*> \endverbatim
64*>
65*> \param[in] N
66*> \verbatim
67*>          N is INTEGER
68*>          The number of columns of the matrix A. M >= N >= 0.
69*> \endverbatim
70*>
71*> \param[in] MB
72*> \verbatim
73*>          MB is INTEGER
74*>          The row block size used by ZLATSQR to return
75*>          arrays A and T. MB > N.
76*>          (Note that if MB > M, then M is used instead of MB
77*>          as the row block size).
78*> \endverbatim
79*>
80*> \param[in] NB
81*> \verbatim
82*>          NB is INTEGER
83*>          The column block size used by ZLATSQR to return
84*>          arrays A and T. NB >= 1.
85*>          (Note that if NB > N, then N is used instead of NB
86*>          as the column block size).
87*> \endverbatim
88*>
89*> \param[in,out] A
90*> \verbatim
91*>          A is COMPLEX*16 array, dimension (LDA,N)
92*>
93*>          On entry:
94*>
95*>             The elements on and above the diagonal are not used as
96*>             input. The elements below the diagonal represent the unit
97*>             lower-trapezoidal blocked matrix V computed by ZLATSQR
98*>             that defines the input matrices Q_in(k) (ones on the
99*>             diagonal are not stored). See ZLATSQR for more details.
100*>
101*>          On exit:
102*>
103*>             The array A contains an M-by-N orthonormal matrix Q_out,
104*>             i.e the columns of A are orthogonal unit vectors.
105*> \endverbatim
106*>
107*> \param[in] LDA
108*> \verbatim
109*>          LDA is INTEGER
110*>          The leading dimension of the array A.  LDA >= max(1,M).
111*> \endverbatim
112*>
113*> \param[in] T
114*> \verbatim
115*>          T is COMPLEX*16 array,
116*>          dimension (LDT, N * NIRB)
117*>          where NIRB = Number_of_input_row_blocks
118*>                     = MAX( 1, CEIL((M-N)/(MB-N)) )
119*>          Let NICB = Number_of_input_col_blocks
120*>                   = CEIL(N/NB)
121*>
122*>          The upper-triangular block reflectors used to define the
123*>          input matrices Q_in(k), k=(1:NIRB*NICB). The block
124*>          reflectors are stored in compact form in NIRB block
125*>          reflector sequences. Each of the NIRB block reflector
126*>          sequences is stored in a larger NB-by-N column block of T
127*>          and consists of NICB smaller NB-by-NB upper-triangular
128*>          column blocks. See ZLATSQR for more details on the format
129*>          of T.
130*> \endverbatim
131*>
132*> \param[in] LDT
133*> \verbatim
134*>          LDT is INTEGER
135*>          The leading dimension of the array T.
136*>          LDT >= max(1,min(NB,N)).
137*> \endverbatim
138*>
139*> \param[out] WORK
140*> \verbatim
141*>          (workspace) COMPLEX*16 array, dimension (MAX(1,LWORK))
142*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
143*> \endverbatim
144*>
145*> \param[in] LWORK
146*> \verbatim
147*>          The dimension of the array WORK.
148*>          LWORK >= NBLOCAL * MAX(NBLOCAL,(N-NBLOCAL)),
149*>          where NBLOCAL=MIN(NB,N).
150*>          If LWORK = -1, then a workspace query is assumed.
151*>          The routine only calculates the optimal size of the WORK
152*>          array, returns this value as the first entry of the WORK
153*>          array, and no error message related to LWORK is issued
154*>          by XERBLA.
155*> \endverbatim
156*>
157*> \param[out] INFO
158*> \verbatim
159*>          INFO is INTEGER
160*>          = 0:  successful exit
161*>          < 0:  if INFO = -i, the i-th argument had an illegal value
162*> \endverbatim
163*>
164*  Authors:
165*  ========
166*
167*> \author Univ. of Tennessee
168*> \author Univ. of California Berkeley
169*> \author Univ. of Colorado Denver
170*> \author NAG Ltd.
171*
172*> \ingroup complex16OTHERcomputational
173*
174*> \par Contributors:
175*  ==================
176*>
177*> \verbatim
178*>
179*> November 2020, Igor Kozachenko,
180*>                Computer Science Division,
181*>                University of California, Berkeley
182*>
183*> \endverbatim
184*>
185*  =====================================================================
186      SUBROUTINE ZUNGTSQR_ROW( M, N, MB, NB, A, LDA, T, LDT, WORK,
187     $                         LWORK, INFO )
188      IMPLICIT NONE
189*
190*  -- LAPACK computational routine --
191*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
192*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
193*
194*     .. Scalar Arguments ..
195      INTEGER           INFO, LDA, LDT, LWORK, M, N, MB, NB
196*     ..
197*     .. Array Arguments ..
198      COMPLEX*16        A( LDA, * ), T( LDT, * ), WORK( * )
199*     ..
200*
201*  =====================================================================
202*
203*     .. Parameters ..
204      COMPLEX*16         CONE, CZERO
205      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ),
206     $                     CZERO = ( 0.0D+0, 0.0D+0 ) )
207*     ..
208*     .. Local Scalars ..
209      LOGICAL            LQUERY
210      INTEGER            NBLOCAL, MB2, M_PLUS_ONE, ITMP, IB_BOTTOM,
211     $                   LWORKOPT, NUM_ALL_ROW_BLOCKS, JB_T, IB, IMB,
212     $                   KB, KB_LAST, KNB, MB1
213*     ..
214*     .. Local Arrays ..
215      COMPLEX*16         DUMMY( 1, 1 )
216*     ..
217*     .. External Subroutines ..
218      EXTERNAL           ZLARFB_GETT, ZLASET, XERBLA
219*     ..
220*     .. Intrinsic Functions ..
221      INTRINSIC          DCMPLX, MAX, MIN
222*     ..
223*     .. Executable Statements ..
224*
225*     Test the input parameters
226*
227      INFO = 0
228      LQUERY  = LWORK.EQ.-1
229      IF( M.LT.0 ) THEN
230         INFO = -1
231      ELSE IF( N.LT.0 .OR. M.LT.N ) THEN
232         INFO = -2
233      ELSE IF( MB.LE.N ) THEN
234         INFO = -3
235      ELSE IF( NB.LT.1 ) THEN
236         INFO = -4
237      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
238         INFO = -6
239      ELSE IF( LDT.LT.MAX( 1, MIN( NB, N ) ) ) THEN
240         INFO = -8
241      ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
242         INFO = -10
243      END IF
244*
245      NBLOCAL = MIN( NB, N )
246*
247*     Determine the workspace size.
248*
249      IF( INFO.EQ.0 ) THEN
250         LWORKOPT = NBLOCAL * MAX( NBLOCAL, ( N - NBLOCAL ) )
251      END IF
252*
253*     Handle error in the input parameters and handle the workspace query.
254*
255      IF( INFO.NE.0 ) THEN
256         CALL XERBLA( 'ZUNGTSQR_ROW', -INFO )
257         RETURN
258      ELSE IF ( LQUERY ) THEN
259         WORK( 1 ) = DCMPLX( LWORKOPT )
260         RETURN
261      END IF
262*
263*     Quick return if possible
264*
265      IF( MIN( M, N ).EQ.0 ) THEN
266         WORK( 1 ) = DCMPLX( LWORKOPT )
267         RETURN
268      END IF
269*
270*     (0) Set the upper-triangular part of the matrix A to zero and
271*     its diagonal elements to one.
272*
273      CALL ZLASET('U', M, N, CZERO, CONE, A, LDA )
274*
275*     KB_LAST is the column index of the last column block reflector
276*     in the matrices T and V.
277*
278      KB_LAST = ( ( N-1 ) / NBLOCAL ) * NBLOCAL + 1
279*
280*
281*     (1) Bottom-up loop over row blocks of A, except the top row block.
282*     NOTE: If MB>=M, then the loop is never executed.
283*
284      IF ( MB.LT.M ) THEN
285*
286*        MB2 is the row blocking size for the row blocks before the
287*        first top row block in the matrix A. IB is the row index for
288*        the row blocks in the matrix A before the first top row block.
289*        IB_BOTTOM is the row index for the last bottom row block
290*        in the matrix A. JB_T is the column index of the corresponding
291*        column block in the matrix T.
292*
293*        Initialize variables.
294*
295*        NUM_ALL_ROW_BLOCKS is the number of row blocks in the matrix A
296*        including the first row block.
297*
298         MB2 = MB - N
299         M_PLUS_ONE = M + 1
300         ITMP = ( M - MB - 1 ) / MB2
301         IB_BOTTOM = ITMP * MB2 + MB + 1
302         NUM_ALL_ROW_BLOCKS = ITMP + 2
303         JB_T = NUM_ALL_ROW_BLOCKS * N + 1
304*
305         DO IB = IB_BOTTOM, MB+1, -MB2
306*
307*           Determine the block size IMB for the current row block
308*           in the matrix A.
309*
310            IMB = MIN( M_PLUS_ONE - IB, MB2 )
311*
312*           Determine the column index JB_T for the current column block
313*           in the matrix T.
314*
315            JB_T = JB_T - N
316*
317*           Apply column blocks of H in the row block from right to left.
318*
319*           KB is the column index of the current column block reflector
320*           in the matrices T and V.
321*
322            DO KB = KB_LAST, 1, -NBLOCAL
323*
324*              Determine the size of the current column block KNB in
325*              the matrices T and V.
326*
327               KNB = MIN( NBLOCAL, N - KB + 1 )
328*
329               CALL ZLARFB_GETT( 'I', IMB, N-KB+1, KNB,
330     $                     T( 1, JB_T+KB-1 ), LDT, A( KB, KB ), LDA,
331     $                     A( IB, KB ), LDA, WORK, KNB )
332*
333            END DO
334*
335         END DO
336*
337      END IF
338*
339*     (2) Top row block of A.
340*     NOTE: If MB>=M, then we have only one row block of A of size M
341*     and we work on the entire matrix A.
342*
343      MB1 = MIN( MB, M )
344*
345*     Apply column blocks of H in the top row block from right to left.
346*
347*     KB is the column index of the current block reflector in
348*     the matrices T and V.
349*
350      DO KB = KB_LAST, 1, -NBLOCAL
351*
352*        Determine the size of the current column block KNB in
353*        the matrices T and V.
354*
355         KNB = MIN( NBLOCAL, N - KB + 1 )
356*
357         IF( MB1-KB-KNB+1.EQ.0 ) THEN
358*
359*           In SLARFB_GETT parameters, when M=0, then the matrix B
360*           does not exist, hence we need to pass a dummy array
361*           reference DUMMY(1,1) to B with LDDUMMY=1.
362*
363            CALL ZLARFB_GETT( 'N', 0, N-KB+1, KNB,
364     $                        T( 1, KB ), LDT, A( KB, KB ), LDA,
365     $                        DUMMY( 1, 1 ), 1, WORK, KNB )
366         ELSE
367            CALL ZLARFB_GETT( 'N', MB1-KB-KNB+1, N-KB+1, KNB,
368     $                        T( 1, KB ), LDT, A( KB, KB ), LDA,
369     $                        A( KB+KNB, KB), LDA, WORK, KNB )
370
371         END IF
372*
373      END DO
374*
375      WORK( 1 ) = DCMPLX( LWORKOPT )
376      RETURN
377*
378*     End of ZUNGTSQR_ROW
379*
380      END
381