1      SUBROUTINE PZGGRQF( M, P, N, A, IA, JA, DESCA, TAUA, B, IB, JB,
2     $                    DESCB, TAUB, WORK, LWORK, INFO )
3*
4*  -- ScaLAPACK routine (version 1.7) --
5*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6*     and University of California, Berkeley.
7*     May 1, 1997
8*
9*     .. Scalar Arguments ..
10      INTEGER            IA, IB, INFO, JA, JB, LWORK, M, N, P
11*     ..
12*     .. Array Arguments ..
13      INTEGER            DESCA( * ), DESCB( * )
14      COMPLEX*16         A( * ), B( * ), TAUA( * ), TAUB( * ), WORK( * )
15*     ..
16*
17*  Purpose
18*  =======
19*
20*  PZGGRQF computes a generalized RQ factorization of
21*  an M-by-N matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1)
22*  and a P-by-N matrix sub( B ) = B(IB:IB+P-1,JB:JB+N-1):
23*
24*              sub( A ) = R*Q,        sub( B ) = Z*T*Q,
25*
26*  where Q is an N-by-N unitary matrix, Z is a P-by-P unitary matrix,
27*  and R and T assume one of the forms:
28*
29*  if M <= N,  R = ( 0  R12 ) M,   or if M > N,  R = ( R11 ) M-N,
30*                   N-M  M                           ( R21 ) N
31*                                                       N
32*
33*  where R12 or R21 is upper triangular, and
34*
35*  if P >= N,  T = ( T11 ) N  ,   or if P < N,  T = ( T11  T12 ) P,
36*                  (  0  ) P-N                         P   N-P
37*                     N
38*
39*  where T11 is upper triangular.
40*
41*  In particular, if sub( B ) is square and nonsingular, the GRQ
42*  factorization of sub( A ) and sub( B ) implicitly gives the RQ
43*  factorization of sub( A )*inv( sub( B ) ):
44*
45*               sub( A )*inv( sub( B ) ) = (R*inv(T))*Z'
46*
47*  where inv( sub( B ) ) denotes the inverse of the matrix sub( B ),
48*  and Z' denotes the conjugate transpose of matrix Z.
49*
50*  Notes
51*  =====
52*
53*  Each global data object is described by an associated description
54*  vector.  This vector stores the information required to establish
55*  the mapping between an object element and its corresponding process
56*  and memory location.
57*
58*  Let A be a generic term for any 2D block cyclicly distributed array.
59*  Such a global array has an associated description vector DESCA.
60*  In the following comments, the character _ should be read as
61*  "of the global array".
62*
63*  NOTATION        STORED IN      EXPLANATION
64*  --------------- -------------- --------------------------------------
65*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
66*                                 DTYPE_A = 1.
67*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
68*                                 the BLACS process grid A is distribu-
69*                                 ted over. The context itself is glo-
70*                                 bal, but the handle (the integer
71*                                 value) may vary.
72*  M_A    (global) DESCA( M_ )    The number of rows in the global
73*                                 array A.
74*  N_A    (global) DESCA( N_ )    The number of columns in the global
75*                                 array A.
76*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
77*                                 the rows of the array.
78*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
79*                                 the columns of the array.
80*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
81*                                 row of the array A is distributed.
82*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
83*                                 first column of the array A is
84*                                 distributed.
85*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
86*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
87*
88*  Let K be the number of rows or columns of a distributed matrix,
89*  and assume that its process grid has dimension p x q.
90*  LOCr( K ) denotes the number of elements of K that a process
91*  would receive if K were distributed over the p processes of its
92*  process column.
93*  Similarly, LOCc( K ) denotes the number of elements of K that a
94*  process would receive if K were distributed over the q processes of
95*  its process row.
96*  The values of LOCr() and LOCc() may be determined via a call to the
97*  ScaLAPACK tool function, NUMROC:
98*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
99*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
100*  An upper bound for these quantities may be computed by:
101*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
102*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
103*
104*  Arguments
105*  =========
106*
107*  M       (global input) INTEGER
108*          The number of rows to be operated on i.e the number of
109*          rows of the distributed submatrix sub( A ).  M >= 0.
110*
111*  P       (global input) INTEGER
112*          The number of rows to be operated on i.e the number of
113*          rows of the distributed submatrix sub( B ).  P >= 0.
114*
115*  N       (global input) INTEGER
116*          The number of columns to be operated on i.e the number of
117*          columns of the distributed submatrices sub( A ) and sub( B ).
118*          N >= 0.
119*
120*  A       (local input/local output) COMPLEX*16 pointer into the
121*          local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
122*          On entry, the local pieces of the M-by-N distributed matrix
123*          sub( A ) which is to be factored. On exit, if M <= N, the
124*          upper triangle of A( IA:IA+M-1, JA+N-M:JA+N-1 ) contains the
125*          M by M upper triangular matrix R; if M >= N, the elements on
126*          and above the (M-N)-th subdiagonal contain the M by N upper
127*          trapezoidal matrix R; the remaining elements, with the array
128*          TAUA, represent the unitary matrix Q as a product of
129*          elementary reflectors (see Further Details).
130*
131*  IA      (global input) INTEGER
132*          The row index in the global array A indicating the first
133*          row of sub( A ).
134*
135*  JA      (global input) INTEGER
136*          The column index in the global array A indicating the
137*          first column of sub( A ).
138*
139*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
140*          The array descriptor for the distributed matrix A.
141*
142*  TAUA    (local output) COMPLEX*16, array, dimension LOCr(IA+M-1)
143*          This array contains the scalar factors of the elementary
144*          reflectors which represent the unitary matrix Q. TAUA is
145*          tied to the distributed matrix A (see Further Details).
146*
147*  B       (local input/local output) COMPLEX*16 pointer into the
148*          local memory to an array of dimension (LLD_B, LOCc(JB+N-1)).
149*          On entry, the local pieces of the P-by-N distributed matrix
150*          sub( B ) which is to be factored.  On exit, the elements on
151*          and above the diagonal of sub( B ) contain the min(P,N) by N
152*          upper trapezoidal matrix T (T is upper triangular if P >= N);
153*          the elements below the diagonal, with the array TAUB,
154*          represent the unitary matrix Z as a product of elementary
155*          reflectors (see Further Details).
156*
157*  IB      (global input) INTEGER
158*          The row index in the global array B indicating the first
159*          row of sub( B ).
160*
161*  JB      (global input) INTEGER
162*          The column index in the global array B indicating the
163*          first column of sub( B ).
164*
165*  DESCB   (global and local input) INTEGER array of dimension DLEN_.
166*          The array descriptor for the distributed matrix B.
167*
168*  TAUB    (local output) COMPLEX*16, array, dimension
169*          LOCc(JB+MIN(P,N)-1). This array contains the scalar factors
170*          TAUB of the elementary reflectors which represent the unitary
171*          matrix Z. TAUB is tied to the distributed matrix B (see
172*          Further Details).
173*
174*  WORK    (local workspace/local output) COMPLEX*16 array,
175*                                                   dimension (LWORK)
176*          On exit, WORK(1) returns the minimal and optimal LWORK.
177*
178*  LWORK   (local or global input) INTEGER
179*          The dimension of the array WORK.
180*          LWORK is local input and must be at least
181*          LWORK >= MAX( MB_A * ( MpA0 + NqA0 + MB_A ),
182*                        MAX( (MB_A*(MB_A-1))/2, (PpB0 + NqB0)*MB_A ) +
183*                             MB_A * MB_A,
184*                        NB_B * ( PpB0 + NqB0 + NB_B ) ), where
185*
186*          IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ),
187*          IAROW  = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
188*          IACOL  = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),
189*          MpA0   = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ),
190*          NqA0   = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ),
191*
192*          IROFFB = MOD( IB-1, MB_B ), ICOFFB = MOD( JB-1, NB_B ),
193*          IBROW  = INDXG2P( IB, MB_B, MYROW, RSRC_B, NPROW ),
194*          IBCOL  = INDXG2P( JB, NB_B, MYCOL, CSRC_B, NPCOL ),
195*          PpB0   = NUMROC( P+IROFFB, MB_B, MYROW, IBROW, NPROW ),
196*          NqB0   = NUMROC( N+ICOFFB, NB_B, MYCOL, IBCOL, NPCOL ),
197*
198*          and NUMROC, INDXG2P are ScaLAPACK tool functions;
199*          MYROW, MYCOL, NPROW and NPCOL can be determined by calling
200*          the subroutine BLACS_GRIDINFO.
201*
202*          If LWORK = -1, then LWORK is global input and a workspace
203*          query is assumed; the routine only calculates the minimum
204*          and optimal size for all work arrays. Each of these
205*          values is returned in the first entry of the corresponding
206*          work array, and no error message is issued by PXERBLA.
207*
208*  INFO    (global output) INTEGER
209*          = 0:  successful exit
210*          < 0:  If the i-th argument is an array and the j-entry had
211*                an illegal value, then INFO = -(i*100+j), if the i-th
212*                argument is a scalar and had an illegal value, then
213*                INFO = -i.
214*
215*  Further Details
216*  ===============
217*
218*  The matrix Q is represented as a product of elementary reflectors
219*
220*     Q = H(ia)' H(ia+1)' . . . H(ia+k-1)', where k = min(m,n).
221*
222*  Each H(i) has the form
223*
224*     H(i) = I - taua * v * v'
225*
226*  where taua is a complex scalar, and v is a complex vector with
227*  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; conjg(v(1:n-k+i-1)) is stored on
228*  exit in A(ia+m-k+i-1,ja:ja+n-k+i-2), and taua in TAUA(ia+m-k+i-1).
229*  To form Q explicitly, use ScaLAPACK subroutine PZUNGRQ.
230*  To use Q to update another matrix, use ScaLAPACK subroutine PZUNMRQ.
231*
232*  The matrix Z is represented as a product of elementary reflectors
233*
234*     Z = H(jb) H(jb+1) . . . H(jb+k-1), where k = min(p,n).
235*
236*  Each H(i) has the form
237*
238*     H(i) = I - taub * v * v'
239*
240*  where taub is a complex scalar, and v is a complex vector with
241*  v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in
242*  B(ib+i:ib+p-1,jb+i-1), and taub in TAUB(jb+i-1).
243*  To form Z explicitly, use ScaLAPACK subroutine PZUNGQR.
244*  To use Z to update another matrix, use ScaLAPACK subroutine PZUNMQR.
245*
246*  Alignment requirements
247*  ======================
248*
249*  The distributed submatrices sub( A ) and sub( B ) must verify some
250*  alignment properties, namely the following expression should be true:
251*
252*  ( NB_A.EQ.NB_B .AND. ICOFFA.EQ.ICOFFB .AND. IACOL.EQ.IBCOL )
253*
254*  =====================================================================
255*
256*     .. Parameters ..
257      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
258     $                   LLD_, MB_, M_, NB_, N_, RSRC_
259      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
260     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
261     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
262*     .. Local Scalars ..
263      LOGICAL            LQUERY
264      INTEGER            IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB,
265     $                   ICTXT, IROFFA, IROFFB, LWMIN, MPA0, MYCOL,
266     $                   MYROW, NPCOL, NPROW, NQA0, NQB0, PPB0
267*     ..
268*     .. External Subroutines ..
269      EXTERNAL           BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PXERBLA,
270     $                   PZGEQRF, PZGERQF, PZUNMRQ
271*     ..
272*     .. Local Arrays ..
273      INTEGER            IDUM1( 1 ), IDUM2( 1 )
274*     ..
275*     .. External Functions ..
276      INTEGER            INDXG2P, NUMROC
277      EXTERNAL           INDXG2P, NUMROC
278*     ..
279*     .. Intrinsic Functions ..
280      INTRINSIC          DBLE, DCMPLX, INT, MAX, MIN, MOD
281*     ..
282*     .. Executable Statements ..
283*
284*     Get grid parameters
285*
286      ICTXT = DESCA( CTXT_ )
287      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
288*
289*     Test the input parameters
290*
291      INFO = 0
292      IF( NPROW.EQ.-1 ) THEN
293         INFO = -707
294      ELSE
295         CALL CHK1MAT( M, 1, N, 3, IA, JA, DESCA, 7, INFO )
296         CALL CHK1MAT( P, 2, N, 3, IB, JB, DESCB, 12, INFO )
297         IF( INFO.EQ.0 ) THEN
298            IROFFA = MOD( IA-1, DESCA( MB_ ) )
299            ICOFFA = MOD( JA-1, DESCA( NB_ ) )
300            IROFFB = MOD( IB-1, DESCB( MB_ ) )
301            ICOFFB = MOD( JB-1, DESCB( NB_ ) )
302            IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ),
303     $                       NPROW )
304            IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ),
305     $                       NPCOL )
306            IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ),
307     $                       NPROW )
308            IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ),
309     $                       NPCOL )
310            MPA0 = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW )
311            NQA0 = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL )
312            PPB0 = NUMROC( P+IROFFB, DESCB( MB_ ), MYROW, IBROW, NPROW )
313            NQB0 = NUMROC( N+ICOFFB, DESCB( NB_ ), MYCOL, IBCOL, NPCOL )
314            LWMIN = MAX( DESCA( MB_ ) * ( MPA0 + NQA0 + DESCA( MB_ ) ),
315     $        MAX( MAX( ( DESCA( MB_ )*( DESCA( MB_ ) - 1 ) ) / 2,
316     $        ( PPB0 + NQB0 ) * DESCA( MB_ ) ) +
317     $          DESCA( MB_ ) * DESCA( MB_ ),
318     $        DESCB( NB_ ) * ( PPB0 + NQB0 + DESCB( NB_ ) ) ) )
319*
320            WORK( 1 ) = DCMPLX( DBLE( LWMIN ) )
321            LQUERY = ( LWORK.EQ.-1 )
322            IF( IACOL.NE.IBCOL .OR. ICOFFA.NE.ICOFFB ) THEN
323               INFO = -11
324            ELSE IF( DESCA( NB_ ).NE.DESCB( NB_ ) ) THEN
325               INFO = -1204
326            ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN
327               INFO = -1207
328            ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
329               INFO = -15
330            END IF
331         END IF
332         IF( LWORK.EQ.-1 ) THEN
333            IDUM1( 1 ) = -1
334         ELSE
335            IDUM1( 1 ) = 1
336         END IF
337         IDUM2( 1 ) = 15
338         CALL PCHK2MAT( M, 1, N, 3, IA, JA, DESCA, 7, P, 2, N, 3, IB,
339     $                  JB, DESCB, 12, 1, IDUM1, IDUM2, INFO )
340      END IF
341*
342      IF( INFO.NE.0 ) THEN
343         CALL PXERBLA( ICTXT, 'PZGGRQF', -INFO )
344         RETURN
345      ELSE IF( LQUERY ) THEN
346         RETURN
347      END IF
348*
349*     RQ factorization of M-by-N matrix sub( A ): sub( A ) = R*Q
350*
351      CALL PZGERQF( M, N, A, IA, JA, DESCA, TAUA, WORK, LWORK, INFO )
352      LWMIN = INT( WORK( 1 ) )
353*
354*     Update sub( B ) := sub( B )*Q'
355*
356      CALL PZUNMRQ( 'Right', 'Conjugate Transpose', P, N, MIN( M, N ),
357     $              A, MAX( IA, IA+M-N ), JA, DESCA, TAUA, B, IB, JB,
358     $              DESCB, WORK, LWORK, INFO )
359      LWMIN = MAX( LWMIN, INT( WORK( 1 ) ) )
360*
361*     QR factorization of P-by-N matrix sub( B ): sub( B ) = Z*T
362*
363      CALL PZGEQRF( P, N, B, IB, JB, DESCB, TAUB, WORK, LWORK, INFO )
364      WORK( 1 ) = DCMPLX( DBLE( MAX( LWMIN, INT( WORK( 1 ) ) ) ) )
365*
366      RETURN
367*
368*     End of PZGGRQF
369*
370      END
371