1      SUBROUTINE PCGERQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK,
2     $                    INFO )
3*
4*  -- ScaLAPACK routine (version 1.7) --
5*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6*     and University of California, Berkeley.
7*     May 25, 2001
8*
9*     .. Scalar Arguments ..
10      INTEGER            IA, INFO, JA, LWORK, M, N
11*     ..
12*     .. Array Arguments ..
13      INTEGER            DESCA( * )
14      COMPLEX            A( * ), TAU( * ), WORK( * )
15*     ..
16*
17*  Purpose
18*  =======
19*
20*  PCGERQ2 computes a RQ factorization of a complex distributed M-by-N
21*  matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1) = R * Q.
22*
23*  Notes
24*  =====
25*
26*  Each global data object is described by an associated description
27*  vector.  This vector stores the information required to establish
28*  the mapping between an object element and its corresponding process
29*  and memory location.
30*
31*  Let A be a generic term for any 2D block cyclicly distributed array.
32*  Such a global array has an associated description vector DESCA.
33*  In the following comments, the character _ should be read as
34*  "of the global array".
35*
36*  NOTATION        STORED IN      EXPLANATION
37*  --------------- -------------- --------------------------------------
38*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
39*                                 DTYPE_A = 1.
40*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
41*                                 the BLACS process grid A is distribu-
42*                                 ted over. The context itself is glo-
43*                                 bal, but the handle (the integer
44*                                 value) may vary.
45*  M_A    (global) DESCA( M_ )    The number of rows in the global
46*                                 array A.
47*  N_A    (global) DESCA( N_ )    The number of columns in the global
48*                                 array A.
49*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
50*                                 the rows of the array.
51*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
52*                                 the columns of the array.
53*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
54*                                 row of the array A is distributed.
55*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
56*                                 first column of the array A is
57*                                 distributed.
58*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
59*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
60*
61*  Let K be the number of rows or columns of a distributed matrix,
62*  and assume that its process grid has dimension p x q.
63*  LOCr( K ) denotes the number of elements of K that a process
64*  would receive if K were distributed over the p processes of its
65*  process column.
66*  Similarly, LOCc( K ) denotes the number of elements of K that a
67*  process would receive if K were distributed over the q processes of
68*  its process row.
69*  The values of LOCr() and LOCc() may be determined via a call to the
70*  ScaLAPACK tool function, NUMROC:
71*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
72*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
73*  An upper bound for these quantities may be computed by:
74*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
75*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
76*
77*  Arguments
78*  =========
79*
80*  M       (global input) INTEGER
81*          The number of rows to be operated on, i.e. the number of rows
82*          of the distributed submatrix sub( A ). M >= 0.
83*
84*  N       (global input) INTEGER
85*          The number of columns to be operated on, i.e. the number of
86*          columns of the distributed submatrix sub( A ). N >= 0.
87*
88*  A       (local input/local output) COMPLEX pointer into the
89*          local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
90*          On entry, the local pieces of the M-by-N distributed matrix
91*          sub( A ) which is to be factored. On exit, if M <= N, the
92*          upper triangle of A( IA:IA+M-1, JA+N-M:JA+N-1 ) contains the
93*          M by M upper triangular matrix R; if M >= N, the elements on
94*          and above the (M-N)-th subdiagonal contain the M by N upper
95*          trapezoidal matrix R; the remaining elements, with the array
96*          TAU, represent the unitary matrix Q as a product of
97*          elementary reflectors (see Further Details).
98*
99*  IA      (global input) INTEGER
100*          The row index in the global array A indicating the first
101*          row of sub( A ).
102*
103*  JA      (global input) INTEGER
104*          The column index in the global array A indicating the
105*          first column of sub( A ).
106*
107*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
108*          The array descriptor for the distributed matrix A.
109*
110*  TAU     (local output) COMPLEX, array, dimension LOCr(IA+M-1)
111*          This array contains the scalar factors of the elementary
112*          reflectors. TAU is tied to the distributed matrix A.
113*
114*  WORK    (local workspace/local output) COMPLEX array,
115*                                                   dimension (LWORK)
116*          On exit, WORK(1) returns the minimal and optimal LWORK.
117*
118*  LWORK   (local or global input) INTEGER
119*          The dimension of the array WORK.
120*          LWORK is local input and must be at least
121*          LWORK >= Nq0 + MAX( 1, Mp0 ), where
122*
123*          IROFF = MOD( IA-1, MB_A ), ICOFF = MOD( JA-1, NB_A ),
124*          IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
125*          IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),
126*          Mp0   = NUMROC( M+IROFF, MB_A, MYROW, IAROW, NPROW ),
127*          Nq0   = NUMROC( N+ICOFF, NB_A, MYCOL, IACOL, NPCOL ),
128*
129*          and NUMROC, INDXG2P are ScaLAPACK tool functions;
130*          MYROW, MYCOL, NPROW and NPCOL can be determined by calling
131*          the subroutine BLACS_GRIDINFO.
132*
133*          If LWORK = -1, then LWORK is global input and a workspace
134*          query is assumed; the routine only calculates the minimum
135*          and optimal size for all work arrays. Each of these
136*          values is returned in the first entry of the corresponding
137*          work array, and no error message is issued by PXERBLA.
138*
139*  INFO    (local output) INTEGER
140*          = 0:  successful exit
141*          < 0:  If the i-th argument is an array and the j-entry had
142*                an illegal value, then INFO = -(i*100+j), if the i-th
143*                argument is a scalar and had an illegal value, then
144*                INFO = -i.
145*
146*  Further Details
147*  ===============
148*
149*  The matrix Q is represented as a product of elementary reflectors
150*
151*     Q = H(ia)' H(ia+1)' . . . H(ia+k-1)', where k = min(m,n).
152*
153*  Each H(i) has the form
154*
155*     H(i) = I - tau * v * v'
156*
157*  where tau is a complex scalar, and v is a complex vector with
158*  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; conjg(v(1:n-k+i-1)) is stored on
159*  exit in A(ia+m-k+i-1,ja:ja+n-k+i-2), and tau in TAU(ia+m-k+i-1).
160*
161*  =====================================================================
162*
163*     .. Parameters ..
164      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
165     $                   LLD_, MB_, M_, NB_, N_, RSRC_
166      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
167     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
168     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
169      COMPLEX            ONE
170      PARAMETER          ( ONE = 1.0E+0 )
171*     ..
172*     .. Local Scalars ..
173      LOGICAL            LQUERY
174      CHARACTER          COLBTOP, ROWBTOP
175      INTEGER            IACOL, IAROW, I, ICTXT, J, K, LWMIN, MP, MYCOL,
176     $                   MYROW, NPCOL, NPROW, NQ
177      COMPLEX            AII
178*     ..
179*     .. External Subroutines ..
180      EXTERNAL           BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT,
181     $                   PCELSET, PCLACGV, PCLARF, PCLARFG,
182     $                   PB_TOPGET, PB_TOPSET, PXERBLA
183*     ..
184*     .. External Functions ..
185      INTEGER            INDXG2P, NUMROC
186      EXTERNAL           INDXG2P, NUMROC
187*     ..
188*     .. Intrinsic Functions ..
189      INTRINSIC          CMPLX, MAX, MIN, MOD, REAL
190*     ..
191*     .. Executable Statements ..
192*
193*     Get grid parameters
194*
195      ICTXT = DESCA( CTXT_ )
196      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
197*
198*     Test the input parameters
199*
200      INFO = 0
201      IF( NPROW.EQ.-1 ) THEN
202         INFO = -(600+CTXT_)
203      ELSE
204         CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO )
205         IF( INFO.EQ.0 ) THEN
206            IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ),
207     $                       NPROW )
208            IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ),
209     $                       NPCOL )
210            MP = NUMROC( M+MOD( IA-1, DESCA( MB_ ) ), DESCA( MB_ ),
211     $                   MYROW, IAROW, NPROW )
212            NQ = NUMROC( N+MOD( JA-1, DESCA( NB_ ) ), DESCA( NB_ ),
213     $                   MYCOL, IACOL, NPCOL )
214            LWMIN = NQ + MAX( 1, MP )
215*
216            WORK( 1 ) = CMPLX( REAL( LWMIN ) )
217            LQUERY = ( LWORK.EQ.-1 )
218            IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY )
219     $         INFO = -9
220         END IF
221      END IF
222*
223      IF( INFO.NE.0 ) THEN
224         CALL PXERBLA( ICTXT, 'PCGERQ2', -INFO )
225         CALL BLACS_ABORT( ICTXT, 1 )
226         RETURN
227      ELSE IF( LQUERY ) THEN
228         RETURN
229      END IF
230*
231*     Quick return if possible
232*
233      IF( M.EQ.0 .OR. N.EQ.0 )
234     $   RETURN
235*
236      CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP )
237      CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP )
238      CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ' ' )
239      CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', 'D-ring' )
240*
241      K = MIN( M, N )
242      DO 10 I = IA+K-1, IA, -1
243         J = JA + I - IA
244*
245*        Generate elementary reflector H(i) to annihilate
246*        A(i+m-k,ja:j+n-k-1)
247*
248         CALL PCLACGV( N-K+J-JA+1, A, I+M-K, JA, DESCA, DESCA( M_ ) )
249         CALL PCLARFG( N-K+J-JA+1, AII, I+M-K, J+N-K, A, I+M-K, JA,
250     $                 DESCA, DESCA( M_ ), TAU )
251*
252*        Apply H(i) to A(ia:i+m-k-1,ja:j+n-k) from the right
253*
254         CALL PCELSET( A, I+M-K, J+N-K, DESCA, ONE )
255         CALL PCLARF( 'Right', M-K+I-IA, N-K+J-JA+1, A, M-K+I, JA,
256     $                DESCA, DESCA( M_ ), TAU, A, IA, JA, DESCA, WORK )
257         CALL PCELSET( A, I+M-K, J+N-K, DESCA, AII )
258         CALL PCLACGV( N-K+J-JA+1, A, I+M-K, JA, DESCA, DESCA( M_ ) )
259*
260   10 CONTINUE
261*
262      CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP )
263      CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP )
264*
265      WORK( 1 ) = CMPLX( REAL( LWMIN ) )
266*
267      RETURN
268*
269*     End of PCGERQ2
270*
271      END
272