1*> \brief \b SLAMTSQR
2*
3*  Definition:
4*  ===========
5*
6*      SUBROUTINE SLAMTSQR( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T,
7*     $                     LDT, C, LDC, WORK, LWORK, INFO )
8*
9*
10*     .. Scalar Arguments ..
11*      CHARACTER         SIDE, TRANS
12*      INTEGER           INFO, LDA, M, N, K, MB, NB, LDT, LWORK, LDC
13*     ..
14*     .. Array Arguments ..
15*      DOUBLE        A( LDA, * ), WORK( * ), C(LDC, * ),
16*     $                  T( LDT, * )
17*> \par Purpose:
18*  =============
19*>
20*> \verbatim
21*>
22*>      SLAMTSQR overwrites the general real M-by-N matrix C with
23*>
24*>
25*>                 SIDE = 'L'     SIDE = 'R'
26*> TRANS = 'N':      Q * C          C * Q
27*> TRANS = 'T':      Q**T * C       C * Q**T
28*>      where Q is a real orthogonal matrix defined as the product
29*>      of blocked elementary reflectors computed by tall skinny
30*>      QR factorization (DLATSQR)
31*> \endverbatim
32*
33*  Arguments:
34*  ==========
35*
36*> \param[in] SIDE
37*> \verbatim
38*>          SIDE is CHARACTER*1
39*>          = 'L': apply Q or Q**T from the Left;
40*>          = 'R': apply Q or Q**T from the Right.
41*> \endverbatim
42*>
43*> \param[in] TRANS
44*> \verbatim
45*>          TRANS is CHARACTER*1
46*>          = 'N':  No transpose, apply Q;
47*>          = 'T':  Transpose, apply Q**T.
48*> \endverbatim
49*>
50*> \param[in] M
51*> \verbatim
52*>          M is INTEGER
53*>          The number of rows of the matrix A.  M >=0.
54*> \endverbatim
55*>
56*> \param[in] N
57*> \verbatim
58*>          N is INTEGER
59*>          The number of columns of the matrix C. M >= N >= 0.
60*> \endverbatim
61*>
62*> \param[in] K
63*> \verbatim
64*>          K is INTEGER
65*>          The number of elementary reflectors whose product defines
66*>          the matrix Q.
67*>          N >= K >= 0;
68*>
69*> \endverbatim
70*>
71*> \param[in] MB
72*> \verbatim
73*>          MB is INTEGER
74*>          The block size to be used in the blocked QR.
75*>          MB > N. (must be the same as DLATSQR)
76*> \endverbatim
77*>
78*> \param[in] NB
79*> \verbatim
80*>          NB is INTEGER
81*>          The column block size to be used in the blocked QR.
82*>          N >= NB >= 1.
83*> \endverbatim
84*>
85*> \param[in] A
86*> \verbatim
87*>          A is REAL array, dimension (LDA,K)
88*>          The i-th column must contain the vector which defines the
89*>          blockedelementary reflector H(i), for i = 1,2,...,k, as
90*>          returned by DLATSQR in the first k columns of
91*>          its array argument A.
92*> \endverbatim
93*>
94*> \param[in] LDA
95*> \verbatim
96*>          LDA is INTEGER
97*>          The leading dimension of the array A.
98*>          If SIDE = 'L', LDA >= max(1,M);
99*>          if SIDE = 'R', LDA >= max(1,N).
100*> \endverbatim
101*>
102*> \param[in] T
103*> \verbatim
104*>          T is REAL array, dimension
105*>          ( N * Number of blocks(CEIL(M-K/MB-K)),
106*>          The blocked upper triangular block reflectors stored in compact form
107*>          as a sequence of upper triangular blocks.  See below
108*>          for further details.
109*> \endverbatim
110*>
111*> \param[in] LDT
112*> \verbatim
113*>          LDT is INTEGER
114*>          The leading dimension of the array T.  LDT >= NB.
115*> \endverbatim
116*>
117*> \param[in,out] C
118*> \verbatim
119*>          C is REAL array, dimension (LDC,N)
120*>          On entry, the M-by-N matrix C.
121*>          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
122*> \endverbatim
123*>
124*> \param[in] LDC
125*> \verbatim
126*>          LDC is INTEGER
127*>          The leading dimension of the array C. LDC >= max(1,M).
128*> \endverbatim
129*>
130*> \param[out] WORK
131*> \verbatim
132*>         (workspace) REAL array, dimension (MAX(1,LWORK))
133*>
134*> \endverbatim
135*> \param[in] LWORK
136*> \verbatim
137*>          LWORK is INTEGER
138*>          The dimension of the array WORK.
139*>
140*>          If SIDE = 'L', LWORK >= max(1,N)*NB;
141*>          if SIDE = 'R', LWORK >= max(1,MB)*NB.
142*>          If LWORK = -1, then a workspace query is assumed; the routine
143*>          only calculates the optimal size of the WORK array, returns
144*>          this value as the first entry of the WORK array, and no error
145*>          message related to LWORK is issued by XERBLA.
146*>
147*> \endverbatim
148*> \param[out] INFO
149*> \verbatim
150*>          INFO is INTEGER
151*>          = 0:  successful exit
152*>          < 0:  if INFO = -i, the i-th argument had an illegal value
153*> \endverbatim
154*
155*  Authors:
156*  ========
157*
158*> \author Univ. of Tennessee
159*> \author Univ. of California Berkeley
160*> \author Univ. of Colorado Denver
161*> \author NAG Ltd.
162*
163*> \par Further Details:
164*  =====================
165*>
166*> \verbatim
167*> Tall-Skinny QR (TSQR) performs QR by a sequence of orthogonal transformations,
168*> representing Q as a product of other orthogonal matrices
169*>   Q = Q(1) * Q(2) * . . . * Q(k)
170*> where each Q(i) zeros out subdiagonal entries of a block of MB rows of A:
171*>   Q(1) zeros out the subdiagonal entries of rows 1:MB of A
172*>   Q(2) zeros out the bottom MB-N rows of rows [1:N,MB+1:2*MB-N] of A
173*>   Q(3) zeros out the bottom MB-N rows of rows [1:N,2*MB-N+1:3*MB-2*N] of A
174*>   . . .
175*>
176*> Q(1) is computed by GEQRT, which represents Q(1) by Householder vectors
177*> stored under the diagonal of rows 1:MB of A, and by upper triangular
178*> block reflectors, stored in array T(1:LDT,1:N).
179*> For more information see Further Details in GEQRT.
180*>
181*> Q(i) for i>1 is computed by TPQRT, which represents Q(i) by Householder vectors
182*> stored in rows [(i-1)*(MB-N)+N+1:i*(MB-N)+N] of A, and by upper triangular
183*> block reflectors, stored in array T(1:LDT,(i-1)*N+1:i*N).
184*> The last Q(k) may use fewer rows.
185*> For more information see Further Details in TPQRT.
186*>
187*> For more details of the overall algorithm, see the description of
188*> Sequential TSQR in Section 2.2 of [1].
189*>
190*> [1] “Communication-Optimal Parallel and Sequential QR and LU Factorizations,”
191*>     J. Demmel, L. Grigori, M. Hoemmen, J. Langou,
192*>     SIAM J. Sci. Comput, vol. 34, no. 1, 2012
193*> \endverbatim
194*>
195*  =====================================================================
196      SUBROUTINE SLAMTSQR( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T,
197     $        LDT, C, LDC, WORK, LWORK, INFO )
198*
199*  -- LAPACK computational routine --
200*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
201*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
202*
203*     .. Scalar Arguments ..
204      CHARACTER         SIDE, TRANS
205      INTEGER           INFO, LDA, M, N, K, MB, NB, LDT, LWORK, LDC
206*     ..
207*     .. Array Arguments ..
208      REAL              A( LDA, * ), WORK( * ), C(LDC, * ),
209     $                T( LDT, * )
210*     ..
211*
212* =====================================================================
213*
214*     ..
215*     .. Local Scalars ..
216      LOGICAL    LEFT, RIGHT, TRAN, NOTRAN, LQUERY
217      INTEGER    I, II, KK, LW, CTR
218*     ..
219*     .. External Functions ..
220      LOGICAL            LSAME
221      EXTERNAL           LSAME
222*     .. External Subroutines ..
223      EXTERNAL           SGEMQRT, STPMQRT, XERBLA
224*     ..
225*     .. Executable Statements ..
226*
227*     Test the input arguments
228*
229      LQUERY  = LWORK.LT.0
230      NOTRAN  = LSAME( TRANS, 'N' )
231      TRAN    = LSAME( TRANS, 'T' )
232      LEFT    = LSAME( SIDE, 'L' )
233      RIGHT   = LSAME( SIDE, 'R' )
234      IF (LEFT) THEN
235        LW = N * NB
236      ELSE
237        LW = MB * NB
238      END IF
239*
240      INFO = 0
241      IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN
242         INFO = -1
243      ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN
244         INFO = -2
245      ELSE IF( M.LT.0 ) THEN
246        INFO = -3
247      ELSE IF( N.LT.0 ) THEN
248        INFO = -4
249      ELSE IF( K.LT.0 ) THEN
250        INFO = -5
251      ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
252        INFO = -9
253      ELSE IF( LDT.LT.MAX( 1, NB) ) THEN
254        INFO = -11
255      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
256         INFO = -13
257      ELSE IF(( LWORK.LT.MAX(1,LW)).AND.(.NOT.LQUERY)) THEN
258        INFO = -15
259      END IF
260*
261*     Determine the block size if it is tall skinny or short and wide
262*
263      IF( INFO.EQ.0)  THEN
264          WORK(1) = LW
265      END IF
266*
267      IF( INFO.NE.0 ) THEN
268        CALL XERBLA( 'SLAMTSQR', -INFO )
269        RETURN
270      ELSE IF (LQUERY) THEN
271       RETURN
272      END IF
273*
274*     Quick return if possible
275*
276      IF( MIN(M,N,K).EQ.0 ) THEN
277        RETURN
278      END IF
279*
280      IF((MB.LE.K).OR.(MB.GE.MAX(M,N,K))) THEN
281        CALL SGEMQRT( SIDE, TRANS, M, N, K, NB, A, LDA,
282     $        T, LDT, C, LDC, WORK, INFO)
283        RETURN
284       END IF
285*
286      IF(LEFT.AND.NOTRAN) THEN
287*
288*         Multiply Q to the last block of C
289*
290         KK = MOD((M-K),(MB-K))
291         CTR = (M-K)/(MB-K)
292         IF (KK.GT.0) THEN
293           II=M-KK+1
294           CALL STPMQRT('L','N',KK , N, K, 0, NB, A(II,1), LDA,
295     $       T(1,CTR*K+1),LDT , C(1,1), LDC,
296     $       C(II,1), LDC, WORK, INFO )
297         ELSE
298           II=M+1
299         END IF
300*
301         DO I=II-(MB-K),MB+1,-(MB-K)
302*
303*         Multiply Q to the current block of C (I:I+MB,1:N)
304*
305           CTR = CTR - 1
306           CALL STPMQRT('L','N',MB-K , N, K, 0,NB, A(I,1), LDA,
307     $         T(1, CTR * K + 1), LDT, C(1,1), LDC,
308     $         C(I,1), LDC, WORK, INFO )
309*
310         END DO
311*
312*         Multiply Q to the first block of C (1:MB,1:N)
313*
314         CALL SGEMQRT('L','N',MB , N, K, NB, A(1,1), LDA, T
315     $            ,LDT ,C(1,1), LDC, WORK, INFO )
316*
317      ELSE IF (LEFT.AND.TRAN) THEN
318*
319*         Multiply Q to the first block of C
320*
321         KK = MOD((M-K),(MB-K))
322         II=M-KK+1
323         CTR = 1
324         CALL SGEMQRT('L','T',MB , N, K, NB, A(1,1), LDA, T
325     $            ,LDT ,C(1,1), LDC, WORK, INFO )
326*
327         DO I=MB+1,II-MB+K,(MB-K)
328*
329*         Multiply Q to the current block of C (I:I+MB,1:N)
330*
331          CALL STPMQRT('L','T',MB-K , N, K, 0,NB, A(I,1), LDA,
332     $       T(1,CTR * K + 1),LDT, C(1,1), LDC,
333     $       C(I,1), LDC, WORK, INFO )
334          CTR = CTR + 1
335*
336         END DO
337         IF(II.LE.M) THEN
338*
339*         Multiply Q to the last block of C
340*
341          CALL STPMQRT('L','T',KK , N, K, 0,NB, A(II,1), LDA,
342     $      T(1, CTR * K + 1), LDT, C(1,1), LDC,
343     $      C(II,1), LDC, WORK, INFO )
344*
345         END IF
346*
347      ELSE IF(RIGHT.AND.TRAN) THEN
348*
349*         Multiply Q to the last block of C
350*
351          KK = MOD((N-K),(MB-K))
352          CTR = (N-K)/(MB-K)
353          IF (KK.GT.0) THEN
354            II=N-KK+1
355            CALL STPMQRT('R','T',M , KK, K, 0, NB, A(II,1), LDA,
356     $        T(1, CTR * K + 1), LDT, C(1,1), LDC,
357     $        C(1,II), LDC, WORK, INFO )
358          ELSE
359            II=N+1
360          END IF
361*
362          DO I=II-(MB-K),MB+1,-(MB-K)
363*
364*         Multiply Q to the current block of C (1:M,I:I+MB)
365*
366            CTR = CTR - 1
367            CALL STPMQRT('R','T',M , MB-K, K, 0,NB, A(I,1), LDA,
368     $          T(1, CTR * K + 1), LDT, C(1,1), LDC,
369     $          C(1,I), LDC, WORK, INFO )
370*
371          END DO
372*
373*         Multiply Q to the first block of C (1:M,1:MB)
374*
375          CALL SGEMQRT('R','T',M , MB, K, NB, A(1,1), LDA, T
376     $              ,LDT ,C(1,1), LDC, WORK, INFO )
377*
378      ELSE IF (RIGHT.AND.NOTRAN) THEN
379*
380*         Multiply Q to the first block of C
381*
382         KK = MOD((N-K),(MB-K))
383         II=N-KK+1
384         CTR = 1
385         CALL SGEMQRT('R','N', M, MB , K, NB, A(1,1), LDA, T
386     $              ,LDT ,C(1,1), LDC, WORK, INFO )
387*
388         DO I=MB+1,II-MB+K,(MB-K)
389*
390*         Multiply Q to the current block of C (1:M,I:I+MB)
391*
392          CALL STPMQRT('R','N', M, MB-K, K, 0,NB, A(I,1), LDA,
393     $         T(1, CTR * K + 1),LDT, C(1,1), LDC,
394     $         C(1,I), LDC, WORK, INFO )
395          CTR = CTR + 1
396*
397         END DO
398         IF(II.LE.N) THEN
399*
400*         Multiply Q to the last block of C
401*
402          CALL STPMQRT('R','N', M, KK , K, 0,NB, A(II,1), LDA,
403     $        T(1, CTR * K + 1),LDT, C(1,1), LDC,
404     $        C(1,II), LDC, WORK, INFO )
405*
406         END IF
407*
408      END IF
409*
410      WORK(1) = LW
411      RETURN
412*
413*     End of SLAMTSQR
414*
415      END
416