1 SUBROUTINE PSLAED3( ICTXT, K, N, NB, D, DROW, DCOL, RHO, DLAMDA, 2 $ W, Z, U, LDU, BUF, INDX, INDCOL, INDROW, 3 $ INDXR, INDXC, CTOT, NPCOL, INFO ) 4* 5* -- ScaLAPACK auxiliary routine (version 1.7) -- 6* University of Tennessee, Knoxville, Oak Ridge National Laboratory, 7* and University of California, Berkeley. 8* December 31, 1998 9* 10* .. Scalar Arguments .. 11 INTEGER DCOL, DROW, ICTXT, INFO, K, LDU, N, NB, NPCOL 12 REAL RHO 13* .. 14* .. Array Arguments .. 15 INTEGER CTOT( 0: NPCOL-1, 4 ), INDCOL( * ), 16 $ INDROW( * ), INDX( * ), INDXC( * ), INDXR( * ) 17 REAL BUF( * ), D( * ), DLAMDA( * ), U( LDU, * ), 18 $ W( * ), Z( * ) 19* .. 20* 21* Purpose 22* ======= 23* 24* PSLAED3 finds the roots of the secular equation, as defined by the 25* values in D, W, and RHO, between 1 and K. It makes the 26* appropriate calls to SLAED4 27* 28* This code makes very mild assumptions about floating point 29* arithmetic. It will work on machines with a guard digit in 30* add/subtract, or on those binary machines without guard digits 31* which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. 32* It could conceivably fail on hexadecimal or decimal machines 33* without guard digits, but we know of none. 34* 35* Arguments 36* ========= 37* 38* ICTXT (global input) INTEGER 39* The BLACS context handle, indicating the global context of 40* the operation on the matrix. The context itself is global. 41* 42* K (output) INTEGER 43* The number of non-deflated eigenvalues, and the order of the 44* related secular equation. 0 <= K <=N. 45* 46* N (input) INTEGER 47* The dimension of the symmetric tridiagonal matrix. N >= 0. 48* 49* NB (global input) INTEGER 50* The blocking factor used to distribute the columns of the 51* matrix. NB >= 1. 52* 53* D (input/output) REAL array, dimension (N) 54* On entry, D contains the eigenvalues of the two submatrices to 55* be combined. 56* On exit, D contains the trailing (N-K) updated eigenvalues 57* (those which were deflated) sorted into increasing order. 58* 59* DROW (global input) INTEGER 60* The process row over which the first row of the matrix D is 61* distributed. 0 <= DROW < NPROW. 62* 63* DCOL (global input) INTEGER 64* The process column over which the first column of the 65* matrix D is distributed. 0 <= DCOL < NPCOL. 66* 67* RHO (global input/output) REAL 68* On entry, the off-diagonal element associated with the rank-1 69* cut which originally split the two submatrices which are now 70* being recombined. 71* On exit, RHO has been modified to the value required by 72* PSLAED3. 73* 74* DLAMDA (global output) REAL array, dimension (N) 75* A copy of the first K eigenvalues which will be used by 76* SLAED4 to form the secular equation. 77* 78* W (global output) REAL array, dimension (N) 79* The first k values of the final deflation-altered z-vector 80* which will be passed to SLAED4. 81* 82* Z (global input) REAL array, dimension (N) 83* On entry, Z contains the updating vector (the last 84* row of the first sub-eigenvector matrix and the first row of 85* the second sub-eigenvector matrix). 86* On exit, the contents of Z have been destroyed by the updating 87* process. 88* 89* U (global output) REAL array 90* global dimension (N, N), local dimension (LDU, NQ). 91* (See PSLAED0 for definition of NQ.) 92* Q contains the orthonormal eigenvectors of the symmetric 93* tridiagonal matrix. 94* 95* LDU (input) INTEGER 96* The leading dimension of the array U. 97* 98* BUF (workspace) REAL array, dimension 3*N 99* 100* 101* INDX (workspace) INTEGER array, dimension (N) 102* The permutation used to sort the contents of DLAMDA into 103* ascending order. 104* 105* INDCOL (workspace) INTEGER array, dimension (N) 106* 107* 108* INDROW (workspace) INTEGER array, dimension (N) 109* 110* 111* INDXR (workspace) INTEGER array, dimension (N) 112* 113* 114* INDXC (workspace) INTEGER array, dimension (N) 115* 116* CTOT (workspace) INTEGER array, dimension( NPCOL, 4) 117* 118* NPCOL (global input) INTEGER 119* The total number of columns over which the distributed 120* submatrix is distributed. 121* 122* INFO (output) INTEGER 123* = 0: successful exit. 124* < 0: if INFO = -i, the i-th argument had an illegal value. 125* > 0: The algorithm failed to compute the ith eigenvalue. 126* 127* ===================================================================== 128* 129* .. Parameters .. 130 REAL ONE 131 PARAMETER ( ONE = 1.0E0 ) 132* .. 133* .. Local Scalars .. 134 INTEGER COL, GI, I, IINFO, IIU, IPD, IU, J, JJU, JU, 135 $ KK, KL, KLC, KLR, MYCOL, MYKL, MYKLR, MYROW, 136 $ NPROW, PDC, PDR, ROW 137 REAL AUX, TEMP 138* .. 139* .. External Functions .. 140 INTEGER INDXG2L 141 REAL SLAMC3, SNRM2 142 EXTERNAL INDXG2L, SLAMC3, SNRM2 143* .. 144* .. External Subroutines .. 145 EXTERNAL BLACS_GRIDINFO, SCOPY, SGEBR2D, SGEBS2D, 146 $ SGERV2D, SGESD2D, SLAED4 147* .. 148* .. Intrinsic Functions .. 149 INTRINSIC MOD, SIGN, SQRT 150* .. 151* .. Executable Statements .. 152* 153* Test the input parameters. 154* 155 IINFO = 0 156* 157* Quick return if possible 158* 159 IF( K.EQ.0 ) 160 $ RETURN 161* 162 CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) 163* 164 ROW = DROW 165 COL = DCOL 166 DO 20 I = 1, N, NB 167 DO 10 J = 0, NB - 1 168 INDROW( I+J ) = ROW 169 INDCOL( I+J ) = COL 170 10 CONTINUE 171 ROW = MOD( ROW+1, NPROW ) 172 COL = MOD( COL+1, NPCOL ) 173 20 CONTINUE 174* 175 MYKL = CTOT( MYCOL, 1 ) + CTOT( MYCOL, 2 ) + CTOT( MYCOL, 3 ) 176 KLR = MYKL / NPROW 177 IF( MYROW.EQ.DROW ) THEN 178 MYKLR = KLR + MOD( MYKL, NPROW ) 179 ELSE 180 MYKLR = KLR 181 END IF 182 PDC = 1 183 COL = DCOL 184 30 CONTINUE 185 IF( MYCOL.NE.COL ) THEN 186 PDC = PDC + CTOT( COL, 1 ) + CTOT( COL, 2 ) + CTOT( COL, 3 ) 187 COL = MOD( COL+1, NPCOL ) 188 GO TO 30 189 END IF 190 PDR = PDC 191 KL = KLR + MOD( MYKL, NPROW ) 192 ROW = DROW 193 40 CONTINUE 194 IF( MYROW.NE.ROW ) THEN 195 PDR = PDR + KL 196 KL = KLR 197 ROW = MOD( ROW+1, NPROW ) 198 GO TO 40 199 END IF 200* 201 DO 50 I = 1, K 202 DLAMDA( I ) = SLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I ) 203 Z( I ) = ONE 204 50 CONTINUE 205 IF( MYKLR.GT.0 ) THEN 206 KK = PDR 207 DO 80 I = 1, MYKLR 208 CALL SLAED4( K, KK, DLAMDA, W, BUF, RHO, BUF( K+I ), IINFO ) 209 IF( IINFO.NE.0 ) THEN 210 INFO = KK 211 END IF 212* 213* ..Compute part of z 214* 215 DO 60 J = 1, KK - 1 216 Z( J ) = Z( J )*( BUF( J ) / 217 $ ( DLAMDA( J )-DLAMDA( KK ) ) ) 218 60 CONTINUE 219 Z( KK ) = Z( KK )*BUF( KK ) 220 DO 70 J = KK + 1, K 221 Z( J ) = Z( J )*( BUF( J ) / 222 $ ( DLAMDA( J )-DLAMDA( KK ) ) ) 223 70 CONTINUE 224 KK = KK + 1 225 80 CONTINUE 226* 227 IF( MYROW.NE.DROW ) THEN 228 CALL SCOPY( K, Z, 1, BUF, 1 ) 229 CALL SGESD2D( ICTXT, K+MYKLR, 1, BUF, K+MYKLR, DROW, MYCOL ) 230 ELSE 231 IPD = 2*K + 1 232 CALL SCOPY( MYKLR, BUF( K+1 ), 1, BUF( IPD ), 1 ) 233 IF( KLR.GT.0 ) THEN 234 IPD = MYKLR + IPD 235 ROW = MOD( DROW+1, NPROW ) 236 DO 100 I = 1, NPROW - 1 237 CALL SGERV2D( ICTXT, K+KLR, 1, BUF, K+KLR, ROW, 238 $ MYCOL ) 239 CALL SCOPY( KLR, BUF( K+1 ), 1, BUF( IPD ), 1 ) 240 DO 90 J = 1, K 241 Z( J ) = Z( J )*BUF( J ) 242 90 CONTINUE 243 IPD = IPD + KLR 244 ROW = MOD( ROW+1, NPROW ) 245 100 CONTINUE 246 END IF 247 END IF 248 END IF 249* 250 IF( MYROW.EQ.DROW ) THEN 251 IF( MYCOL.NE.DCOL .AND. MYKL.NE.0 ) THEN 252 CALL SCOPY( K, Z, 1, BUF, 1 ) 253 CALL SCOPY( MYKL, BUF( 2*K+1 ), 1, BUF( K+1 ), 1 ) 254 CALL SGESD2D( ICTXT, K+MYKL, 1, BUF, K+MYKL, MYROW, DCOL ) 255 ELSE IF( MYCOL.EQ.DCOL ) THEN 256 IPD = 2*K + 1 257 COL = DCOL 258 KL = MYKL 259 DO 120 I = 1, NPCOL - 1 260 IPD = IPD + KL 261 COL = MOD( COL+1, NPCOL ) 262 KL = CTOT( COL, 1 ) + CTOT( COL, 2 ) + CTOT( COL, 3 ) 263 IF( KL.NE.0 ) THEN 264 CALL SGERV2D( ICTXT, K+KL, 1, BUF, K+KL, MYROW, COL ) 265 CALL SCOPY( KL, BUF( K+1 ), 1, BUF( IPD ), 1 ) 266 DO 110 J = 1, K 267 Z( J ) = Z( J )*BUF( J ) 268 110 CONTINUE 269 END IF 270 120 CONTINUE 271 DO 130 I = 1, K 272 Z( I ) = SIGN( SQRT( -Z( I ) ), W( I ) ) 273 130 CONTINUE 274* 275 END IF 276 END IF 277* 278* Diffusion 279* 280 IF( MYROW.EQ.DROW .AND. MYCOL.EQ.DCOL ) THEN 281 CALL SCOPY( K, Z, 1, BUF, 1 ) 282 CALL SCOPY( K, BUF( 2*K+1 ), 1, BUF( K+1 ), 1 ) 283 CALL SGEBS2D( ICTXT, 'All', ' ', 2*K, 1, BUF, 2*K ) 284 ELSE 285 CALL SGEBR2D( ICTXT, 'All', ' ', 2*K, 1, BUF, 2*K, DROW, DCOL ) 286 CALL SCOPY( K, BUF, 1, Z, 1 ) 287 END IF 288* 289* Copy of D at the good place 290* 291 KLC = 0 292 KLR = 0 293 DO 140 I = 1, K 294 GI = INDX( I ) 295 D( GI ) = BUF( K+I ) 296 COL = INDCOL( GI ) 297 ROW = INDROW( GI ) 298 IF( COL.EQ.MYCOL ) THEN 299 KLC = KLC + 1 300 INDXC( KLC ) = I 301 END IF 302 IF( ROW.EQ.MYROW ) THEN 303 KLR = KLR + 1 304 INDXR( KLR ) = I 305 END IF 306 140 CONTINUE 307* 308* Compute eigenvectors of the modified rank-1 modification. 309* 310 IF( MYKL.NE.0 ) THEN 311 DO 180 J = 1, MYKL 312 KK = INDXC( J ) 313 JU = INDX( KK ) 314 JJU = INDXG2L( JU, NB, J, J, NPCOL ) 315 CALL SLAED4( K, KK, DLAMDA, W, BUF, RHO, AUX, IINFO ) 316 IF( IINFO.NE.0 ) THEN 317 INFO = KK 318 END IF 319 IF( K.EQ.1 .OR. K.EQ.2 ) THEN 320 DO 150 I = 1, KLR 321 KK = INDXR( I ) 322 IU = INDX( KK ) 323 IIU = INDXG2L( IU, NB, J, J, NPROW ) 324 U( IIU, JJU ) = BUF( KK ) 325 150 CONTINUE 326 GO TO 180 327 END IF 328* 329 DO 160 I = 1, K 330 BUF( I ) = Z( I ) / BUF( I ) 331 160 CONTINUE 332 TEMP = SNRM2( K, BUF, 1 ) 333 DO 170 I = 1, KLR 334 KK = INDXR( I ) 335 IU = INDX( KK ) 336 IIU = INDXG2L( IU, NB, J, J, NPROW ) 337 U( IIU, JJU ) = BUF( KK ) / TEMP 338 170 CONTINUE 339 180 CONTINUE 340 END IF 341* 342 190 CONTINUE 343 RETURN 344* 345* End of PSLAED3 346* 347 END 348