1      SUBROUTINE PCLAHRD( N, K, NB, A, IA, JA, DESCA, TAU, T, Y, IY, JY,
2     $                    DESCY, WORK )
3*
4*  -- ScaLAPACK auxiliary routine (version 1.7) --
5*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6*     and University of California, Berkeley.
7*     May 1, 1997
8*
9*     .. Scalar Arguments ..
10      INTEGER             IA, IY, JA, JY, K, N, NB
11*     ..
12*     .. Array Arguments ..
13      INTEGER             DESCA( * ), DESCY( * )
14      COMPLEX             A( * ), T( * ), TAU( * ), WORK( * ), Y( * )
15*     ..
16*
17*  Purpose
18*  =======
19*
20*  PCLAHRD reduces the first NB columns of a complex general
21*  N-by-(N-K+1) distributed matrix A(IA:IA+N-1,JA:JA+N-K) so that
22*  elements below the k-th subdiagonal are zero. The reduction is
23*  performed by an unitary similarity transformation Q' * A * Q. The
24*  routine returns the matrices V and T which determine Q as a block
25*  reflector I - V*T*V', and also the matrix Y = A * V * T.
26*
27*  This is an auxiliary routine called by PCGEHRD. In the following
28*  comments sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1).
29*
30*  Arguments
31*  =========
32*
33*  N       (global input) INTEGER
34*          The number of rows and columns to be operated on, i.e. the
35*          order of the distributed submatrix sub( A ).
36*          N >= 0.
37*
38*  K       (global input) INTEGER
39*          The offset for the reduction. Elements below the k-th
40*          subdiagonal in the first NB columns are reduced to zero.
41*
42*  NB      (global input) INTEGER
43*          The number of columns to be reduced.
44*
45*  A       (local input/local output) COMPLEX pointer into
46*          the local memory to an array of dimension (LLD_A,
47*          LOCc(JA+N-K)). On entry, this array contains the the local
48*          pieces of the N-by-(N-K+1) general distributed matrix
49*          A(IA:IA+N-1,JA:JA+N-K). On exit, the elements on and above
50*          the k-th subdiagonal in the first NB columns are overwritten
51*          with the corresponding elements of the reduced distributed
52*          matrix; the elements below the k-th subdiagonal, with the
53*          array TAU, represent the matrix Q as a product of elementary
54*          reflectors. The other columns of A(IA:IA+N-1,JA:JA+N-K) are
55*          unchanged. See Further Details.
56*
57*  IA      (global input) INTEGER
58*          The row index in the global array A indicating the first
59*          row of sub( A ).
60*
61*  JA      (global input) INTEGER
62*          The column index in the global array A indicating the
63*          first column of sub( A ).
64*
65*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
66*          The array descriptor for the distributed matrix A.
67*
68*  TAU     (local output) COMPLEX array, dimension LOCc(JA+N-2)
69*          The scalar factors of the elementary reflectors (see Further
70*          Details). TAU is tied to the distributed matrix A.
71*
72*  T       (local output) COMPLEX array, dimension (NB_A,NB_A)
73*          The upper triangular matrix T.
74*
75*  Y       (local output) COMPLEX pointer into the local memory
76*          to an array of dimension (LLD_Y,NB_A). On exit, this array
77*          contains the local pieces of the N-by-NB distributed
78*          matrix Y. LLD_Y >= LOCr(IA+N-1).
79*
80*  IY      (global input) INTEGER
81*          The row index in the global array Y indicating the first
82*          row of sub( Y ).
83*
84*  JY      (global input) INTEGER
85*          The column index in the global array Y indicating the
86*          first column of sub( Y ).
87*
88*  DESCY   (global and local input) INTEGER array of dimension DLEN_.
89*          The array descriptor for the distributed matrix Y.
90*
91*  WORK    (local workspace) COMPLEX array, dimension (NB)
92*
93*  Further Details
94*  ===============
95*
96*  The matrix Q is represented as a product of nb elementary reflectors
97*
98*     Q = H(1) H(2) . . . H(nb).
99*
100*  Each H(i) has the form
101*
102*     H(i) = I - tau * v * v'
103*
104*  where tau is a complex scalar, and v is a complex vector with
105*  v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in
106*  A(ia+i+k:ia+n-1,ja+i-1), and tau in TAU(ja+i-1).
107*
108*  The elements of the vectors v together form the (n-k+1)-by-nb matrix
109*  V which is needed, with T and Y, to apply the transformation to the
110*  unreduced part of the matrix, using an update of the form:
111*  A(ia:ia+n-1,ja:ja+n-k) := (I-V*T*V')*(A(ia:ia+n-1,ja:ja+n-k)-Y*V').
112*
113*  The contents of A(ia:ia+n-1,ja:ja+n-k) on exit are illustrated by the
114*  following example with n = 7, k = 3 and nb = 2:
115*
116*     ( a   h   a   a   a )
117*     ( a   h   a   a   a )
118*     ( a   h   a   a   a )
119*     ( h   h   a   a   a )
120*     ( v1  h   a   a   a )
121*     ( v1  v2  a   a   a )
122*     ( v1  v2  a   a   a )
123*
124*  where a denotes an element of the original matrix
125*  A(ia:ia+n-1,ja:ja+n-k), h denotes a modified element of the upper
126*  Hessenberg matrix H, and vi denotes an element of the vector
127*  defining H(i).
128*
129*  =====================================================================
130*
131*     .. Parameters ..
132      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
133     $                   LLD_, MB_, M_, NB_, N_, RSRC_
134      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
135     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
136     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
137      COMPLEX            ONE, ZERO
138      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
139     $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
140*     ..
141*     .. Local Scalars ..
142      LOGICAL            IPROC
143      INTEGER            I, IACOL, IAROW, ICTXT, IOFF, II, J, JJ, JL,
144     $                   JT, JW, L, MYROW, MYCOL, NPCOL, NPROW, NQ
145      COMPLEX            EI
146*     ..
147*     .. Local Arrays ..
148      INTEGER            DESCW( DLEN_ )
149*     ..
150*     .. External Functions ..
151      INTEGER            NUMROC
152      EXTERNAL           NUMROC
153*     ..
154*     .. External Subroutines ..
155      EXTERNAL           BLACS_GRIDINFO, CAXPY, CCOPY, CSCAL,
156     $                   CTRMV, DESCSET, INFOG2L, PCELSET,
157     $                   PCGEMV, PCLACGV, PCLARFG, PCSCAL
158*     ..
159*     .. Intrinsic Functions ..
160      INTRINSIC          MIN, MOD
161*     ..
162*     .. Executable Statements ..
163*
164*     Quick return if possible
165*
166      IF( N.LE.1 )
167     $   RETURN
168*
169      ICTXT = DESCA( CTXT_ )
170      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
171*
172      IOFF = MOD( JA-1, DESCA( NB_ ) )
173      CALL INFOG2L( IA+K, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II,
174     $              JJ, IAROW, IACOL )
175*
176      IPROC = ( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL )
177      NQ = NUMROC( N+JA-1, DESCA( NB_ ), MYCOL, IACOL, NPCOL )
178      IF( MYCOL.EQ.IACOL )
179     $   NQ = NQ - IOFF
180*
181      EI = ZERO
182
183      JW = IOFF + 1
184      CALL DESCSET( DESCW, 1, DESCA( MB_ ), 1, DESCA( MB_ ), IAROW,
185     $              IACOL, ICTXT, 1 )
186*
187      DO 10 L = 1, NB
188         I = IA + K + L - 2
189         J = JA + L - 1
190*
191         IF( L.GT.1 ) THEN
192*
193*           Update A(ia:ia+n-1,j)
194*
195*           Compute i-th column of A - Y * V'
196*
197            CALL PCLACGV( L-1, A, I, JA, DESCA, DESCA( M_ ) )
198            CALL PCGEMV( 'No transpose', N, L-1, -ONE, Y, IY, JY, DESCY,
199     $                   A, I, JA, DESCA, DESCA( M_ ), ONE, A, IA, J,
200     $                   DESCA, 1 )
201            CALL PCLACGV( L-1, A, I, JA, DESCA, DESCA( M_ ) )
202*
203*           Apply I - V * T' * V' to this column (call it b) from the
204*           left, using the last column of T as workspace
205*
206*           Let  V = ( V1 )   and   b = ( b1 )   (first I-1 rows)
207*                    ( V2 )             ( b2 )
208*
209*           where V1 is unit lower triangular
210*
211*           w := V1' * b1
212*
213            IF( IPROC ) THEN
214               CALL CCOPY( L-1, A( (JJ+L-2)*DESCA( LLD_ )+II ), 1,
215     $                     WORK( JW ), 1 )
216               CALL CTRMV( 'Lower', 'Conjugate transpose', 'Unit', L-1,
217     $                     A( (JJ-1)*DESCA( LLD_ )+II ), DESCA( LLD_ ),
218     $                     WORK( JW ), 1 )
219            END IF
220*
221*           w := w + V2'*b2
222*
223            CALL PCGEMV( 'Conjugate transpose', N-K-L+1, L-1, ONE, A,
224     $                   I+1, JA, DESCA, A, I+1, J, DESCA, 1, ONE, WORK,
225     $                   1, JW, DESCW, DESCW( M_ ) )
226*
227*           w := T'*w
228*
229            IF( IPROC )
230     $         CALL CTRMV( 'Upper', 'Conjugate transpose', 'Non-unit',
231     $                     L-1, T, DESCA( NB_ ), WORK( JW ), 1 )
232*
233*           b2 := b2 - V2*w
234*
235            CALL PCGEMV( 'No transpose', N-K-L+1, L-1, -ONE, A, I+1, JA,
236     $                   DESCA, WORK, 1, JW, DESCW, DESCW( M_ ), ONE,
237     $                   A, I+1, J, DESCA, 1 )
238*
239*           b1 := b1 - V1*w
240*
241            IF( IPROC ) THEN
242               CALL CTRMV( 'Lower', 'No transpose', 'Unit', L-1,
243     $                     A( (JJ-1)*DESCA( LLD_ )+II ), DESCA( LLD_ ),
244     $                     WORK( JW ), 1 )
245               CALL CAXPY( L-1, -ONE, WORK( JW ), 1,
246     $                     A( ( JJ+L-2 )*DESCA( LLD_ )+II ), 1 )
247            END IF
248            CALL PCELSET( A, I, J-1, DESCA, EI )
249         END IF
250*
251*        Generate the elementary reflector H(i) to annihilate
252*        A(ia+k+i:ia+n-1,j)
253*
254         CALL PCLARFG( N-K-L+1, EI, I+1, J, A, MIN( I+2, N+IA-1 ), J,
255     $                 DESCA, 1, TAU )
256         CALL PCELSET( A, I+1, J, DESCA, ONE )
257*
258*        Compute  Y(iy:y+n-1,jy+l-1)
259*
260         CALL PCGEMV( 'No transpose', N, N-K-L+1, ONE, A, IA, J+1,
261     $                DESCA, A, I+1, J, DESCA, 1, ZERO, Y, IY, JY+L-1,
262     $                DESCY, 1 )
263         CALL PCGEMV( 'Conjugate transpose', N-K-L+1, L-1, ONE, A, I+1,
264     $                JA, DESCA, A, I+1, J, DESCA, 1, ZERO, WORK, 1, JW,
265     $                DESCW, DESCW( M_ ) )
266         CALL PCGEMV( 'No transpose', N, L-1, -ONE, Y, IY, JY, DESCY,
267     $                WORK, 1, JW, DESCW, DESCW( M_ ), ONE, Y, IY,
268     $                JY+L-1, DESCY, 1 )
269         JL = MIN( JJ+L-1, JA+NQ-1 )
270         CALL PCSCAL( N, TAU( JL ), Y, IY, JY+L-1, DESCY, 1 )
271*
272*        Compute T(1:i,i)
273*
274         IF( IPROC ) THEN
275            JT = ( L-1 ) * DESCA( NB_ )
276            CALL CSCAL( L-1, -TAU( JL ), WORK( JW ), 1 )
277            CALL CCOPY( L-1, WORK( JW ), 1, T( JT+1 ), 1 )
278            CALL CTRMV( 'Upper', 'No transpose', 'Non-unit', L-1, T,
279     $                  DESCA( NB_ ), T( JT+1 ), 1 )
280            T( JT+L ) = TAU( JL )
281         END IF
282   10 CONTINUE
283*
284      CALL PCELSET( A, K+NB+IA-1, J, DESCA, EI )
285*
286      RETURN
287*
288*     End of PCLAHRD
289*
290      END
291