1      SUBROUTINE PSLAED3( ICTXT, K, N, NB, D, DROW, DCOL, RHO, DLAMDA,
2     $                    W, Z, U, LDU, BUF, INDX, INDCOL, INDROW,
3     $                    INDXR, INDXC, CTOT, NPCOL, INFO )
4*
5*  -- ScaLAPACK auxiliary routine (version 1.7) --
6*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
7*     and University of California, Berkeley.
8*     December 31, 1998
9*
10*     .. Scalar Arguments ..
11      INTEGER            DCOL, DROW, ICTXT, INFO, K, LDU, N, NB, NPCOL
12      REAL               RHO
13*     ..
14*     .. Array Arguments ..
15      INTEGER            CTOT( 0: NPCOL-1, 4 ), INDCOL( * ),
16     $                   INDROW( * ), INDX( * ), INDXC( * ), INDXR( * )
17      REAL               BUF( * ), D( * ), DLAMDA( * ), U( LDU, * ),
18     $                   W( * ), Z( * )
19*     ..
20*
21*  Purpose
22*  =======
23*
24*  PSLAED3 finds the roots of the secular equation, as defined by the
25*  values in D, W, and RHO, between 1 and K.  It makes the
26*  appropriate calls to SLAED4
27*
28*  This code makes very mild assumptions about floating point
29*  arithmetic. It will work on machines with a guard digit in
30*  add/subtract, or on those binary machines without guard digits
31*  which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
32*  It could conceivably fail on hexadecimal or decimal machines
33*  without guard digits, but we know of none.
34*
35*  Arguments
36*  =========
37*
38*  ICTXT  (global input) INTEGER
39*         The BLACS context handle, indicating the global context of
40*         the operation on the matrix. The context itself is global.
41*
42*  K      (output) INTEGER
43*         The number of non-deflated eigenvalues, and the order of the
44*         related secular equation. 0 <= K <=N.
45*
46*  N      (input) INTEGER
47*         The dimension of the symmetric tridiagonal matrix.  N >= 0.
48*
49*  NB      (global input) INTEGER
50*          The blocking factor used to distribute the columns of the
51*          matrix. NB >= 1.
52*
53*  D      (input/output) REAL array, dimension (N)
54*         On entry, D contains the eigenvalues of the two submatrices to
55*         be combined.
56*         On exit, D contains the trailing (N-K) updated eigenvalues
57*         (those which were deflated) sorted into increasing order.
58*
59*  DROW   (global input) INTEGER
60*          The process row over which the first row of the matrix D is
61*          distributed. 0 <= DROW < NPROW.
62*
63*  DCOL   (global input) INTEGER
64*          The process column over which the first column of the
65*          matrix D is distributed. 0 <= DCOL < NPCOL.
66*
67*  RHO    (global input/output) REAL
68*         On entry, the off-diagonal element associated with the rank-1
69*         cut which originally split the two submatrices which are now
70*         being recombined.
71*         On exit, RHO has been modified to the value required by
72*         PSLAED3.
73*
74*  DLAMDA (global output) REAL array, dimension (N)
75*         A copy of the first K eigenvalues which will be used by
76*         SLAED4 to form the secular equation.
77*
78*  W      (global output) REAL array, dimension (N)
79*         The first k values of the final deflation-altered z-vector
80*         which will be passed to SLAED4.
81*
82*  Z      (global input) REAL array, dimension (N)
83*         On entry, Z contains the updating vector (the last
84*         row of the first sub-eigenvector matrix and the first row of
85*         the second sub-eigenvector matrix).
86*         On exit, the contents of Z have been destroyed by the updating
87*         process.
88*
89*  U     (global output) REAL array
90*         global dimension (N, N), local dimension (LDU, NQ).
91*         (See PSLAED0 for definition of NQ.)
92*         Q  contains the orthonormal eigenvectors of the symmetric
93*         tridiagonal matrix.
94*
95*  LDU    (input) INTEGER
96*         The leading dimension of the array U.
97*
98*  BUF    (workspace) REAL array, dimension 3*N
99*
100*
101*  INDX   (workspace) INTEGER array, dimension (N)
102*         The permutation used to sort the contents of DLAMDA into
103*         ascending order.
104*
105*  INDCOL (workspace) INTEGER array, dimension (N)
106*
107*
108*  INDROW (workspace) INTEGER array, dimension (N)
109*
110*
111*  INDXR (workspace) INTEGER array, dimension (N)
112*
113*
114*  INDXC (workspace) INTEGER array, dimension (N)
115*
116*  CTOT   (workspace) INTEGER array, dimension( NPCOL, 4)
117*
118*  NPCOL   (global input) INTEGER
119*          The total number of columns over which the distributed
120*           submatrix is distributed.
121*
122*  INFO   (output) INTEGER
123*          = 0:  successful exit.
124*          < 0:  if INFO = -i, the i-th argument had an illegal value.
125*          > 0:  The algorithm failed to compute the ith eigenvalue.
126*
127*  =====================================================================
128*
129*     .. Parameters ..
130      REAL               ONE
131      PARAMETER          ( ONE = 1.0E0 )
132*     ..
133*     .. Local Scalars ..
134      INTEGER            COL, GI, I, IINFO, IIU, IPD, IU, J, JJU, JU,
135     $                   KK, KL, KLC, KLR, MYCOL, MYKL, MYKLR, MYROW,
136     $                   NPROW, PDC, PDR, ROW
137      REAL               AUX, TEMP
138*     ..
139*     .. External Functions ..
140      INTEGER            INDXG2L
141      REAL               SLAMC3, SNRM2
142      EXTERNAL           INDXG2L, SLAMC3, SNRM2
143*     ..
144*     .. External Subroutines ..
145      EXTERNAL           BLACS_GRIDINFO, SCOPY, SGEBR2D, SGEBS2D,
146     $                   SGERV2D, SGESD2D, SLAED4
147*     ..
148*     .. Intrinsic Functions ..
149      INTRINSIC          MOD, SIGN, SQRT
150*     ..
151*     .. Executable Statements ..
152*
153*     Test the input parameters.
154*
155      IINFO = 0
156*
157*     Quick return if possible
158*
159      IF( K.EQ.0 )
160     $   RETURN
161*
162      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
163*
164      ROW = DROW
165      COL = DCOL
166      DO 20 I = 1, N, NB
167         DO 10 J = 0, NB - 1
168            INDROW( I+J ) = ROW
169            INDCOL( I+J ) = COL
170   10    CONTINUE
171         ROW = MOD( ROW+1, NPROW )
172         COL = MOD( COL+1, NPCOL )
173   20 CONTINUE
174*
175      MYKL = CTOT( MYCOL, 1 ) + CTOT( MYCOL, 2 ) + CTOT( MYCOL, 3 )
176      KLR = MYKL / NPROW
177      IF( MYROW.EQ.DROW ) THEN
178         MYKLR = KLR + MOD( MYKL, NPROW )
179      ELSE
180         MYKLR = KLR
181      END IF
182      PDC = 1
183      COL = DCOL
184   30 CONTINUE
185      IF( MYCOL.NE.COL ) THEN
186         PDC = PDC + CTOT( COL, 1 ) + CTOT( COL, 2 ) + CTOT( COL, 3 )
187         COL = MOD( COL+1, NPCOL )
188         GO TO 30
189      END IF
190      PDR = PDC
191      KL = KLR + MOD( MYKL, NPROW )
192      ROW = DROW
193   40 CONTINUE
194      IF( MYROW.NE.ROW ) THEN
195         PDR = PDR + KL
196         KL = KLR
197         ROW = MOD( ROW+1, NPROW )
198         GO TO 40
199      END IF
200*
201      DO 50 I = 1, K
202         DLAMDA( I ) = SLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I )
203         Z( I ) = ONE
204   50 CONTINUE
205      IF( MYKLR.GT.0 ) THEN
206         KK = PDR
207         DO 80 I = 1, MYKLR
208            CALL SLAED4( K, KK, DLAMDA, W, BUF, RHO, BUF( K+I ), IINFO )
209            IF( IINFO.NE.0 ) THEN
210               INFO = KK
211            END IF
212*
213*     ..Compute part of z
214*
215            DO 60 J = 1, KK - 1
216               Z( J ) = Z( J )*( BUF( J ) /
217     $                  ( DLAMDA( J )-DLAMDA( KK ) ) )
218   60       CONTINUE
219            Z( KK ) = Z( KK )*BUF( KK )
220            DO 70 J = KK + 1, K
221               Z( J ) = Z( J )*( BUF( J ) /
222     $                  ( DLAMDA( J )-DLAMDA( KK ) ) )
223   70       CONTINUE
224            KK = KK + 1
225   80    CONTINUE
226*
227         IF( MYROW.NE.DROW ) THEN
228            CALL SCOPY( K, Z, 1, BUF, 1 )
229            CALL SGESD2D( ICTXT, K+MYKLR, 1, BUF, K+MYKLR, DROW, MYCOL )
230         ELSE
231            IPD = 2*K + 1
232            CALL SCOPY( MYKLR, BUF( K+1 ), 1, BUF( IPD ), 1 )
233            IF( KLR.GT.0 ) THEN
234               IPD = MYKLR + IPD
235               ROW = MOD( DROW+1, NPROW )
236               DO 100 I = 1, NPROW - 1
237                  CALL SGERV2D( ICTXT, K+KLR, 1, BUF, K+KLR, ROW,
238     $                          MYCOL )
239                  CALL SCOPY( KLR, BUF( K+1 ), 1, BUF( IPD ), 1 )
240                  DO 90 J = 1, K
241                     Z( J ) = Z( J )*BUF( J )
242   90             CONTINUE
243                  IPD = IPD + KLR
244                  ROW = MOD( ROW+1, NPROW )
245  100          CONTINUE
246            END IF
247         END IF
248      END IF
249*
250      IF( MYROW.EQ.DROW ) THEN
251         IF( MYCOL.NE.DCOL .AND. MYKL.NE.0 ) THEN
252            CALL SCOPY( K, Z, 1, BUF, 1 )
253            CALL SCOPY( MYKL, BUF( 2*K+1 ), 1, BUF( K+1 ), 1 )
254            CALL SGESD2D( ICTXT, K+MYKL, 1, BUF, K+MYKL, MYROW, DCOL )
255         ELSE IF( MYCOL.EQ.DCOL ) THEN
256            IPD = 2*K + 1
257            COL = DCOL
258            KL = MYKL
259            DO 120 I = 1, NPCOL - 1
260               IPD = IPD + KL
261               COL = MOD( COL+1, NPCOL )
262               KL = CTOT( COL, 1 ) + CTOT( COL, 2 ) + CTOT( COL, 3 )
263               IF( KL.NE.0 ) THEN
264                  CALL SGERV2D( ICTXT, K+KL, 1, BUF, K+KL, MYROW, COL )
265                  CALL SCOPY( KL, BUF( K+1 ), 1, BUF( IPD ), 1 )
266                  DO 110 J = 1, K
267                     Z( J ) = Z( J )*BUF( J )
268  110             CONTINUE
269               END IF
270  120       CONTINUE
271            DO 130 I = 1, K
272               Z( I ) = SIGN( SQRT( -Z( I ) ), W( I ) )
273  130       CONTINUE
274*
275         END IF
276      END IF
277*
278*     Diffusion
279*
280      IF( MYROW.EQ.DROW .AND. MYCOL.EQ.DCOL ) THEN
281         CALL SCOPY( K, Z, 1, BUF, 1 )
282         CALL SCOPY( K, BUF( 2*K+1 ), 1, BUF( K+1 ), 1 )
283         CALL SGEBS2D( ICTXT, 'All', ' ', 2*K, 1, BUF, 2*K )
284      ELSE
285         CALL SGEBR2D( ICTXT, 'All', ' ', 2*K, 1, BUF, 2*K, DROW, DCOL )
286         CALL SCOPY( K, BUF, 1, Z, 1 )
287      END IF
288*
289*     Copy of D at the good place
290*
291      KLC = 0
292      KLR = 0
293      DO 140 I = 1, K
294         GI = INDX( I )
295         D( GI ) = BUF( K+I )
296         COL = INDCOL( GI )
297         ROW = INDROW( GI )
298         IF( COL.EQ.MYCOL ) THEN
299            KLC = KLC + 1
300            INDXC( KLC ) = I
301         END IF
302         IF( ROW.EQ.MYROW ) THEN
303            KLR = KLR + 1
304            INDXR( KLR ) = I
305         END IF
306  140 CONTINUE
307*
308*     Compute eigenvectors of the modified rank-1 modification.
309*
310      IF( MYKL.NE.0 ) THEN
311         DO 180 J = 1, MYKL
312            KK = INDXC( J )
313            JU = INDX( KK )
314            JJU = INDXG2L( JU, NB, J, J, NPCOL )
315            CALL SLAED4( K, KK, DLAMDA, W, BUF, RHO, AUX, IINFO )
316            IF( IINFO.NE.0 ) THEN
317               INFO = KK
318            END IF
319            IF( K.EQ.1 .OR. K.EQ.2 ) THEN
320               DO 150 I = 1, KLR
321                  KK = INDXR( I )
322                  IU = INDX( KK )
323                  IIU = INDXG2L( IU, NB, J, J, NPROW )
324                  U( IIU, JJU ) = BUF( KK )
325  150          CONTINUE
326               GO TO 180
327            END IF
328*
329            DO 160 I = 1, K
330               BUF( I ) = Z( I ) / BUF( I )
331  160       CONTINUE
332            TEMP = SNRM2( K, BUF, 1 )
333            DO 170 I = 1, KLR
334               KK = INDXR( I )
335               IU = INDX( KK )
336               IIU = INDXG2L( IU, NB, J, J, NPROW )
337               U( IIU, JJU ) = BUF( KK ) / TEMP
338  170       CONTINUE
339  180    CONTINUE
340      END IF
341*
342  190 CONTINUE
343      RETURN
344*
345*     End of PSLAED3
346*
347      END
348