1 SUBROUTINE PDLAED3( ICTXT, K, N, NB, D, DROW, DCOL, RHO, DLAMDA, 2 $ W, Z, U, LDU, BUF, INDX, INDCOL, INDROW, 3 $ INDXR, INDXC, CTOT, NPCOL, INFO ) 4* 5* -- ScaLAPACK auxiliary routine (version 1.7) -- 6* University of Tennessee, Knoxville, Oak Ridge National Laboratory, 7* and University of California, Berkeley. 8* December 31, 1998 9* 10* .. Scalar Arguments .. 11 INTEGER DCOL, DROW, ICTXT, INFO, K, LDU, N, NB, NPCOL 12 DOUBLE PRECISION RHO 13* .. 14* .. Array Arguments .. 15 INTEGER CTOT( 0: NPCOL-1, 4 ), INDCOL( * ), 16 $ INDROW( * ), INDX( * ), INDXC( * ), INDXR( * ) 17 DOUBLE PRECISION BUF( * ), D( * ), DLAMDA( * ), U( LDU, * ), 18 $ W( * ), Z( * ) 19* .. 20* 21* Purpose 22* ======= 23* 24* PDLAED3 finds the roots of the secular equation, as defined by the 25* values in D, W, and RHO, between 1 and K. It makes the 26* appropriate calls to SLAED4 27* 28* This code makes very mild assumptions about floating point 29* arithmetic. It will work on machines with a guard digit in 30* add/subtract, or on those binary machines without guard digits 31* which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. 32* It could conceivably fail on hexadecimal or decimal machines 33* without guard digits, but we know of none. 34* 35* Arguments 36* ========= 37* 38* ICTXT (global input) INTEGER 39* The BLACS context handle, indicating the global context of 40* the operation on the matrix. The context itself is global. 41* 42* K (output) INTEGER 43* The number of non-deflated eigenvalues, and the order of the 44* related secular equation. 0 <= K <=N. 45* 46* N (input) INTEGER 47* The dimension of the symmetric tridiagonal matrix. N >= 0. 48* 49* NB (global input) INTEGER 50* The blocking factor used to distribute the columns of the 51* matrix. NB >= 1. 52* 53* D (input/output) DOUBLE PRECISION array, dimension (N) 54* On entry, D contains the eigenvalues of the two submatrices to 55* be combined. 56* On exit, D contains the trailing (N-K) updated eigenvalues 57* (those which were deflated) sorted into increasing order. 58* 59* DROW (global input) INTEGER 60* The process row over which the first row of the matrix D is 61* distributed. 0 <= DROW < NPROW. 62* 63* DCOL (global input) INTEGER 64* The process column over which the first column of the 65* matrix D is distributed. 0 <= DCOL < NPCOL. 66* 67* RHO (global input/output) DOUBLE PRECISION 68* On entry, the off-diagonal element associated with the rank-1 69* cut which originally split the two submatrices which are now 70* being recombined. 71* On exit, RHO has been modified to the value required by 72* PDLAED3. 73* 74* DLAMDA (global output) DOUBLE PRECISION array, dimension (N) 75* A copy of the first K eigenvalues which will be used by 76* DLAED4 to form the secular equation. 77* 78* W (global output) DOUBLE PRECISION array, dimension (N) 79* The first k values of the final deflation-altered z-vector 80* which will be passed to DLAED4. 81* 82* Z (global input) DOUBLE PRECISION array, dimension (N) 83* On entry, Z contains the updating vector (the last 84* row of the first sub-eigenvector matrix and the first row of 85* the second sub-eigenvector matrix). 86* On exit, the contents of Z have been destroyed by the updating 87* process. 88* 89* U (global output) DOUBLE PRECISION array 90* global dimension (N, N), local dimension (LDU, NQ). 91* (See PDLAED0 for definition of NQ.) 92* Q contains the orthonormal eigenvectors of the symmetric 93* tridiagonal matrix. 94* 95* LDU (input) INTEGER 96* The leading dimension of the array U. 97* 98* BUF (workspace) DOUBLE PRECISION array, dimension 3*N 99* 100* 101* INDX (workspace) INTEGER array, dimension (N) 102* The permutation used to sort the contents of DLAMDA into 103* ascending order. 104* 105* INDCOL (workspace) INTEGER array, dimension (N) 106* 107* 108* INDROW (workspace) INTEGER array, dimension (N) 109* 110* 111* INDXR (workspace) INTEGER array, dimension (N) 112* 113* 114* INDXC (workspace) INTEGER array, dimension (N) 115* 116* CTOT (workspace) INTEGER array, dimension( NPCOL, 4) 117* 118* NPCOL (global input) INTEGER 119* The total number of columns over which the distributed 120* submatrix is distributed. 121* 122* INFO (output) INTEGER 123* = 0: successful exit. 124* < 0: if INFO = -i, the i-th argument had an illegal value. 125* > 0: The algorithm failed to compute the ith eigenvalue. 126* 127* ===================================================================== 128* 129* .. Parameters .. 130 DOUBLE PRECISION ONE 131 PARAMETER ( ONE = 1.0D+0 ) 132* .. 133* .. Local Scalars .. 134 INTEGER COL, GI, I, IINFO, IIU, IPD, IU, J, JJU, JU, 135 $ KK, KL, KLC, KLR, MYCOL, MYKL, MYKLR, MYROW, 136 $ NPROW, PDC, PDR, ROW 137 DOUBLE PRECISION AUX, TEMP 138* .. 139* .. External Functions .. 140 INTEGER INDXG2L 141 DOUBLE PRECISION DLAMC3, DNRM2 142 EXTERNAL INDXG2L, DLAMC3, DNRM2 143* .. 144* .. External Subroutines .. 145 EXTERNAL BLACS_GRIDINFO, DCOPY, DGEBR2D, DGEBS2D, 146 $ DGERV2D, DGESD2D, DLAED4 147* .. 148* .. Intrinsic Functions .. 149 INTRINSIC MOD, SIGN, SQRT 150* .. 151* .. Executable Statements .. 152* 153* Test the input parameters. 154* 155 INFO = 0 156* 157* Quick return if possible 158* 159 IF( K.EQ.0 ) 160 $ RETURN 161* 162 CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) 163* 164 ROW = DROW 165 COL = DCOL 166 DO 20 I = 1, N, NB 167 DO 10 J = 0, NB - 1 168 IF( I+J.LE.N ) THEN 169 INDROW( I+J ) = ROW 170 INDCOL( I+J ) = COL 171 END IF 172 10 CONTINUE 173 ROW = MOD( ROW+1, NPROW ) 174 COL = MOD( COL+1, NPCOL ) 175 20 CONTINUE 176* 177 MYKL = CTOT( MYCOL, 1 ) + CTOT( MYCOL, 2 ) + CTOT( MYCOL, 3 ) 178 KLR = MYKL / NPROW 179 IF( MYROW.EQ.DROW ) THEN 180 MYKLR = KLR + MOD( MYKL, NPROW ) 181 ELSE 182 MYKLR = KLR 183 END IF 184 PDC = 1 185 COL = DCOL 186 30 CONTINUE 187 IF( MYCOL.NE.COL ) THEN 188 PDC = PDC + CTOT( COL, 1 ) + CTOT( COL, 2 ) + CTOT( COL, 3 ) 189 COL = MOD( COL+1, NPCOL ) 190 GO TO 30 191 END IF 192 PDR = PDC 193 KL = KLR + MOD( MYKL, NPROW ) 194 ROW = DROW 195 40 CONTINUE 196 IF( MYROW.NE.ROW ) THEN 197 PDR = PDR + KL 198 KL = KLR 199 ROW = MOD( ROW+1, NPROW ) 200 GO TO 40 201 END IF 202* 203 DO 50 I = 1, K 204 DLAMDA( I ) = DLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I ) 205 Z( I ) = ONE 206 50 CONTINUE 207 IF( MYKLR.GT.0 ) THEN 208 KK = PDR 209 DO 80 I = 1, MYKLR 210 CALL DLAED4( K, KK, DLAMDA, W, BUF, RHO, BUF( K+I ), IINFO ) 211 IF( IINFO.NE.0 ) THEN 212 INFO = KK 213 END IF 214* 215* ..Compute part of z 216* 217 DO 60 J = 1, KK - 1 218 Z( J ) = Z( J )*( BUF( J ) / 219 $ ( DLAMDA( J )-DLAMDA( KK ) ) ) 220 60 CONTINUE 221 Z( KK ) = Z( KK )*BUF( KK ) 222 DO 70 J = KK + 1, K 223 Z( J ) = Z( J )*( BUF( J ) / 224 $ ( DLAMDA( J )-DLAMDA( KK ) ) ) 225 70 CONTINUE 226 KK = KK + 1 227 80 CONTINUE 228* 229 IF( MYROW.NE.DROW ) THEN 230 CALL DCOPY( K, Z, 1, BUF, 1 ) 231 CALL DGESD2D( ICTXT, K+MYKLR, 1, BUF, K+MYKLR, DROW, MYCOL ) 232 ELSE 233 IPD = 2*K + 1 234 CALL DCOPY( MYKLR, BUF( K+1 ), 1, BUF( IPD ), 1 ) 235 IF( KLR.GT.0 ) THEN 236 IPD = MYKLR + IPD 237 ROW = MOD( DROW+1, NPROW ) 238 DO 100 I = 1, NPROW - 1 239 CALL DGERV2D( ICTXT, K+KLR, 1, BUF, K+KLR, ROW, 240 $ MYCOL ) 241 CALL DCOPY( KLR, BUF( K+1 ), 1, BUF( IPD ), 1 ) 242 DO 90 J = 1, K 243 Z( J ) = Z( J )*BUF( J ) 244 90 CONTINUE 245 IPD = IPD + KLR 246 ROW = MOD( ROW+1, NPROW ) 247 100 CONTINUE 248 END IF 249 END IF 250 END IF 251* 252 IF( MYROW.EQ.DROW ) THEN 253 IF( MYCOL.NE.DCOL .AND. MYKL.NE.0 ) THEN 254 CALL DCOPY( K, Z, 1, BUF, 1 ) 255 CALL DCOPY( MYKL, BUF( 2*K+1 ), 1, BUF( K+1 ), 1 ) 256 CALL DGESD2D( ICTXT, K+MYKL, 1, BUF, K+MYKL, MYROW, DCOL ) 257 ELSE IF( MYCOL.EQ.DCOL ) THEN 258 IPD = 2*K + 1 259 COL = DCOL 260 KL = MYKL 261 DO 120 I = 1, NPCOL - 1 262 IPD = IPD + KL 263 COL = MOD( COL+1, NPCOL ) 264 KL = CTOT( COL, 1 ) + CTOT( COL, 2 ) + CTOT( COL, 3 ) 265 IF( KL.NE.0 ) THEN 266 CALL DGERV2D( ICTXT, K+KL, 1, BUF, K+KL, MYROW, COL ) 267 CALL DCOPY( KL, BUF( K+1 ), 1, BUF( IPD ), 1 ) 268 DO 110 J = 1, K 269 Z( J ) = Z( J )*BUF( J ) 270 110 CONTINUE 271 END IF 272 120 CONTINUE 273 DO 130 I = 1, K 274 Z( I ) = SIGN( SQRT( -Z( I ) ), W( I ) ) 275 130 CONTINUE 276* 277 END IF 278 END IF 279* 280* Diffusion 281* 282 IF( MYROW.EQ.DROW .AND. MYCOL.EQ.DCOL ) THEN 283 CALL DCOPY( K, Z, 1, BUF, 1 ) 284 CALL DCOPY( K, BUF( 2*K+1 ), 1, BUF( K+1 ), 1 ) 285 CALL DGEBS2D( ICTXT, 'All', ' ', 2*K, 1, BUF, 2*K ) 286 ELSE 287 CALL DGEBR2D( ICTXT, 'All', ' ', 2*K, 1, BUF, 2*K, DROW, DCOL ) 288 CALL DCOPY( K, BUF, 1, Z, 1 ) 289 END IF 290* 291* Copy of D at the good place 292* 293 KLC = 0 294 KLR = 0 295 DO 140 I = 1, K 296 GI = INDX( I ) 297 D( GI ) = BUF( K+I ) 298 COL = INDCOL( GI ) 299 ROW = INDROW( GI ) 300 IF( COL.EQ.MYCOL ) THEN 301 KLC = KLC + 1 302 INDXC( KLC ) = I 303 END IF 304 IF( ROW.EQ.MYROW ) THEN 305 KLR = KLR + 1 306 INDXR( KLR ) = I 307 END IF 308 140 CONTINUE 309* 310* Compute eigenvectors of the modified rank-1 modification. 311* 312 IF( MYKL.NE.0 ) THEN 313 DO 180 J = 1, MYKL 314 KK = INDXC( J ) 315 JU = INDX( KK ) 316 JJU = INDXG2L( JU, NB, J, J, NPCOL ) 317 CALL DLAED4( K, KK, DLAMDA, W, BUF, RHO, AUX, IINFO ) 318 IF( IINFO.NE.0 ) THEN 319 INFO = KK 320 END IF 321 IF( K.EQ.1 .OR. K.EQ.2 ) THEN 322 DO 150 I = 1, KLR 323 KK = INDXR( I ) 324 IU = INDX( KK ) 325 IIU = INDXG2L( IU, NB, J, J, NPROW ) 326 U( IIU, JJU ) = BUF( KK ) 327 150 CONTINUE 328 GO TO 180 329 END IF 330* 331 DO 160 I = 1, K 332 BUF( I ) = Z( I ) / BUF( I ) 333 160 CONTINUE 334 TEMP = DNRM2( K, BUF, 1 ) 335 DO 170 I = 1, KLR 336 KK = INDXR( I ) 337 IU = INDX( KK ) 338 IIU = INDXG2L( IU, NB, J, J, NPROW ) 339 U( IIU, JJU ) = BUF( KK ) / TEMP 340 170 CONTINUE 341* 342 180 CONTINUE 343 END IF 344* 345 190 CONTINUE 346* 347 RETURN 348* 349* End of PDLAED3 350* 351 END 352