1      SUBROUTINE PDLAED3( ICTXT, K, N, NB, D, DROW, DCOL, RHO, DLAMDA,
2     $                    W, Z, U, LDU, BUF, INDX, INDCOL, INDROW,
3     $                    INDXR, INDXC, CTOT, NPCOL, INFO )
4*
5*  -- ScaLAPACK auxiliary routine (version 1.7) --
6*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
7*     and University of California, Berkeley.
8*     December 31, 1998
9*
10*     .. Scalar Arguments ..
11      INTEGER            DCOL, DROW, ICTXT, INFO, K, LDU, N, NB, NPCOL
12      DOUBLE PRECISION   RHO
13*     ..
14*     .. Array Arguments ..
15      INTEGER            CTOT( 0: NPCOL-1, 4 ), INDCOL( * ),
16     $                   INDROW( * ), INDX( * ), INDXC( * ), INDXR( * )
17      DOUBLE PRECISION   BUF( * ), D( * ), DLAMDA( * ), U( LDU, * ),
18     $                   W( * ), Z( * )
19*     ..
20*
21*  Purpose
22*  =======
23*
24*  PDLAED3 finds the roots of the secular equation, as defined by the
25*  values in D, W, and RHO, between 1 and K.  It makes the
26*  appropriate calls to SLAED4
27*
28*  This code makes very mild assumptions about floating point
29*  arithmetic. It will work on machines with a guard digit in
30*  add/subtract, or on those binary machines without guard digits
31*  which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
32*  It could conceivably fail on hexadecimal or decimal machines
33*  without guard digits, but we know of none.
34*
35*  Arguments
36*  =========
37*
38*  ICTXT  (global input) INTEGER
39*         The BLACS context handle, indicating the global context of
40*         the operation on the matrix. The context itself is global.
41*
42*  K      (output) INTEGER
43*         The number of non-deflated eigenvalues, and the order of the
44*         related secular equation. 0 <= K <=N.
45*
46*  N      (input) INTEGER
47*         The dimension of the symmetric tridiagonal matrix.  N >= 0.
48*
49*  NB      (global input) INTEGER
50*          The blocking factor used to distribute the columns of the
51*          matrix. NB >= 1.
52*
53*  D      (input/output) DOUBLE PRECISION array, dimension (N)
54*         On entry, D contains the eigenvalues of the two submatrices to
55*         be combined.
56*         On exit, D contains the trailing (N-K) updated eigenvalues
57*         (those which were deflated) sorted into increasing order.
58*
59*  DROW   (global input) INTEGER
60*          The process row over which the first row of the matrix D is
61*          distributed. 0 <= DROW < NPROW.
62*
63*  DCOL   (global input) INTEGER
64*          The process column over which the first column of the
65*          matrix D is distributed. 0 <= DCOL < NPCOL.
66*
67*  RHO    (global input/output) DOUBLE PRECISION
68*         On entry, the off-diagonal element associated with the rank-1
69*         cut which originally split the two submatrices which are now
70*         being recombined.
71*         On exit, RHO has been modified to the value required by
72*         PDLAED3.
73*
74*  DLAMDA (global output) DOUBLE PRECISION array, dimension (N)
75*         A copy of the first K eigenvalues which will be used by
76*         DLAED4 to form the secular equation.
77*
78*  W      (global output) DOUBLE PRECISION array, dimension (N)
79*         The first k values of the final deflation-altered z-vector
80*         which will be passed to DLAED4.
81*
82*  Z      (global input) DOUBLE PRECISION array, dimension (N)
83*         On entry, Z contains the updating vector (the last
84*         row of the first sub-eigenvector matrix and the first row of
85*         the second sub-eigenvector matrix).
86*         On exit, the contents of Z have been destroyed by the updating
87*         process.
88*
89*  U     (global output) DOUBLE PRECISION array
90*         global dimension (N, N), local dimension (LDU, NQ).
91*         (See PDLAED0 for definition of NQ.)
92*         Q  contains the orthonormal eigenvectors of the symmetric
93*         tridiagonal matrix.
94*
95*  LDU    (input) INTEGER
96*         The leading dimension of the array U.
97*
98*  BUF    (workspace) DOUBLE PRECISION array, dimension 3*N
99*
100*
101*  INDX   (workspace) INTEGER array, dimension (N)
102*         The permutation used to sort the contents of DLAMDA into
103*         ascending order.
104*
105*  INDCOL (workspace) INTEGER array, dimension (N)
106*
107*
108*  INDROW (workspace) INTEGER array, dimension (N)
109*
110*
111*  INDXR (workspace) INTEGER array, dimension (N)
112*
113*
114*  INDXC (workspace) INTEGER array, dimension (N)
115*
116*  CTOT   (workspace) INTEGER array, dimension( NPCOL, 4)
117*
118*  NPCOL   (global input) INTEGER
119*          The total number of columns over which the distributed
120*           submatrix is distributed.
121*
122*  INFO   (output) INTEGER
123*          = 0:  successful exit.
124*          < 0:  if INFO = -i, the i-th argument had an illegal value.
125*          > 0:  The algorithm failed to compute the ith eigenvalue.
126*
127*  =====================================================================
128*
129*     .. Parameters ..
130      DOUBLE PRECISION   ONE
131      PARAMETER          ( ONE = 1.0D+0 )
132*     ..
133*     .. Local Scalars ..
134      INTEGER            COL, GI, I, IINFO, IIU, IPD, IU, J, JJU, JU,
135     $                   KK, KL, KLC, KLR, MYCOL, MYKL, MYKLR, MYROW,
136     $                   NPROW, PDC, PDR, ROW
137      DOUBLE PRECISION   AUX, TEMP
138*     ..
139*     .. External Functions ..
140      INTEGER            INDXG2L
141      DOUBLE PRECISION   DLAMC3, DNRM2
142      EXTERNAL           INDXG2L, DLAMC3, DNRM2
143*     ..
144*     .. External Subroutines ..
145      EXTERNAL           BLACS_GRIDINFO, DCOPY, DGEBR2D, DGEBS2D,
146     $                   DGERV2D, DGESD2D, DLAED4
147*     ..
148*     .. Intrinsic Functions ..
149      INTRINSIC          MOD, SIGN, SQRT
150*     ..
151*     .. Executable Statements ..
152*
153*     Test the input parameters.
154*
155      INFO = 0
156*
157*     Quick return if possible
158*
159      IF( K.EQ.0 )
160     $   RETURN
161*
162      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
163*
164      ROW = DROW
165      COL = DCOL
166      DO 20 I = 1, N, NB
167         DO 10 J = 0, NB - 1
168            IF( I+J.LE.N ) THEN
169               INDROW( I+J ) = ROW
170               INDCOL( I+J ) = COL
171            END IF
172   10    CONTINUE
173         ROW = MOD( ROW+1, NPROW )
174         COL = MOD( COL+1, NPCOL )
175   20 CONTINUE
176*
177      MYKL = CTOT( MYCOL, 1 ) + CTOT( MYCOL, 2 ) + CTOT( MYCOL, 3 )
178      KLR = MYKL / NPROW
179      IF( MYROW.EQ.DROW ) THEN
180         MYKLR = KLR + MOD( MYKL, NPROW )
181      ELSE
182         MYKLR = KLR
183      END IF
184      PDC = 1
185      COL = DCOL
186   30 CONTINUE
187      IF( MYCOL.NE.COL ) THEN
188         PDC = PDC + CTOT( COL, 1 ) + CTOT( COL, 2 ) + CTOT( COL, 3 )
189         COL = MOD( COL+1, NPCOL )
190         GO TO 30
191      END IF
192      PDR = PDC
193      KL = KLR + MOD( MYKL, NPROW )
194      ROW = DROW
195   40 CONTINUE
196      IF( MYROW.NE.ROW ) THEN
197         PDR = PDR + KL
198         KL = KLR
199         ROW = MOD( ROW+1, NPROW )
200         GO TO 40
201      END IF
202*
203      DO 50 I = 1, K
204         DLAMDA( I ) = DLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I )
205         Z( I ) = ONE
206   50 CONTINUE
207      IF( MYKLR.GT.0 ) THEN
208         KK = PDR
209         DO 80 I = 1, MYKLR
210            CALL DLAED4( K, KK, DLAMDA, W, BUF, RHO, BUF( K+I ), IINFO )
211            IF( IINFO.NE.0 ) THEN
212               INFO = KK
213            END IF
214*
215*     ..Compute part of z
216*
217            DO 60 J = 1, KK - 1
218               Z( J ) = Z( J )*( BUF( J ) /
219     $                  ( DLAMDA( J )-DLAMDA( KK ) ) )
220   60       CONTINUE
221            Z( KK ) = Z( KK )*BUF( KK )
222            DO 70 J = KK + 1, K
223               Z( J ) = Z( J )*( BUF( J ) /
224     $                  ( DLAMDA( J )-DLAMDA( KK ) ) )
225   70       CONTINUE
226            KK = KK + 1
227   80    CONTINUE
228*
229         IF( MYROW.NE.DROW ) THEN
230            CALL DCOPY( K, Z, 1, BUF, 1 )
231            CALL DGESD2D( ICTXT, K+MYKLR, 1, BUF, K+MYKLR, DROW, MYCOL )
232         ELSE
233            IPD = 2*K + 1
234            CALL DCOPY( MYKLR, BUF( K+1 ), 1, BUF( IPD ), 1 )
235            IF( KLR.GT.0 ) THEN
236               IPD = MYKLR + IPD
237               ROW = MOD( DROW+1, NPROW )
238               DO 100 I = 1, NPROW - 1
239                  CALL DGERV2D( ICTXT, K+KLR, 1, BUF, K+KLR, ROW,
240     $                          MYCOL )
241                  CALL DCOPY( KLR, BUF( K+1 ), 1, BUF( IPD ), 1 )
242                  DO 90 J = 1, K
243                     Z( J ) = Z( J )*BUF( J )
244   90             CONTINUE
245                  IPD = IPD + KLR
246                  ROW = MOD( ROW+1, NPROW )
247  100          CONTINUE
248            END IF
249         END IF
250      END IF
251*
252      IF( MYROW.EQ.DROW ) THEN
253         IF( MYCOL.NE.DCOL .AND. MYKL.NE.0 ) THEN
254            CALL DCOPY( K, Z, 1, BUF, 1 )
255            CALL DCOPY( MYKL, BUF( 2*K+1 ), 1, BUF( K+1 ), 1 )
256            CALL DGESD2D( ICTXT, K+MYKL, 1, BUF, K+MYKL, MYROW, DCOL )
257         ELSE IF( MYCOL.EQ.DCOL ) THEN
258            IPD = 2*K + 1
259            COL = DCOL
260            KL = MYKL
261            DO 120 I = 1, NPCOL - 1
262               IPD = IPD + KL
263               COL = MOD( COL+1, NPCOL )
264               KL = CTOT( COL, 1 ) + CTOT( COL, 2 ) + CTOT( COL, 3 )
265               IF( KL.NE.0 ) THEN
266                  CALL DGERV2D( ICTXT, K+KL, 1, BUF, K+KL, MYROW, COL )
267                  CALL DCOPY( KL, BUF( K+1 ), 1, BUF( IPD ), 1 )
268                  DO 110 J = 1, K
269                     Z( J ) = Z( J )*BUF( J )
270  110             CONTINUE
271               END IF
272  120       CONTINUE
273            DO 130 I = 1, K
274               Z( I ) = SIGN( SQRT( -Z( I ) ), W( I ) )
275  130       CONTINUE
276*
277         END IF
278      END IF
279*
280*     Diffusion
281*
282      IF( MYROW.EQ.DROW .AND. MYCOL.EQ.DCOL ) THEN
283         CALL DCOPY( K, Z, 1, BUF, 1 )
284         CALL DCOPY( K, BUF( 2*K+1 ), 1, BUF( K+1 ), 1 )
285         CALL DGEBS2D( ICTXT, 'All', ' ', 2*K, 1, BUF, 2*K )
286      ELSE
287         CALL DGEBR2D( ICTXT, 'All', ' ', 2*K, 1, BUF, 2*K, DROW, DCOL )
288         CALL DCOPY( K, BUF, 1, Z, 1 )
289      END IF
290*
291*     Copy of D at the good place
292*
293      KLC = 0
294      KLR = 0
295      DO 140 I = 1, K
296         GI = INDX( I )
297         D( GI ) = BUF( K+I )
298         COL = INDCOL( GI )
299         ROW = INDROW( GI )
300         IF( COL.EQ.MYCOL ) THEN
301            KLC = KLC + 1
302            INDXC( KLC ) = I
303         END IF
304         IF( ROW.EQ.MYROW ) THEN
305            KLR = KLR + 1
306            INDXR( KLR ) = I
307         END IF
308  140 CONTINUE
309*
310*     Compute eigenvectors of the modified rank-1 modification.
311*
312      IF( MYKL.NE.0 ) THEN
313         DO 180 J = 1, MYKL
314            KK = INDXC( J )
315            JU = INDX( KK )
316            JJU = INDXG2L( JU, NB, J, J, NPCOL )
317            CALL DLAED4( K, KK, DLAMDA, W, BUF, RHO, AUX, IINFO )
318            IF( IINFO.NE.0 ) THEN
319               INFO = KK
320            END IF
321            IF( K.EQ.1 .OR. K.EQ.2 ) THEN
322               DO 150 I = 1, KLR
323                  KK = INDXR( I )
324                  IU = INDX( KK )
325                  IIU = INDXG2L( IU, NB, J, J, NPROW )
326                  U( IIU, JJU ) = BUF( KK )
327  150          CONTINUE
328               GO TO 180
329            END IF
330*
331            DO 160 I = 1, K
332               BUF( I ) = Z( I ) / BUF( I )
333  160       CONTINUE
334            TEMP = DNRM2( K, BUF, 1 )
335            DO 170 I = 1, KLR
336               KK = INDXR( I )
337               IU = INDX( KK )
338               IIU = INDXG2L( IU, NB, J, J, NPROW )
339               U( IIU, JJU ) = BUF( KK ) / TEMP
340  170       CONTINUE
341*
342  180    CONTINUE
343      END IF
344*
345  190 CONTINUE
346*
347      RETURN
348*
349*     End of PDLAED3
350*
351      END
352