1      SUBROUTINE SLAR1VA(N, B1, BN, LAMBDA, D, L, LD, LLD,
2     $           PIVMIN, GAPTOL, Z, WANTNC, NEGCNT, ZTZ, MINGMA,
3     $           R, ISUPPZ, NRMINV, RESID, RQCORR, WORK )
4*
5      IMPLICIT NONE
6*
7*  -- ScaLAPACK computational routine (version 2.0) --
8*     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
9*     July 4, 2010
10*
11*     .. Scalar Arguments ..
12      LOGICAL            WANTNC
13      INTEGER   B1, BN, N, NEGCNT, R
14      REAL               GAPTOL, LAMBDA, MINGMA, NRMINV, PIVMIN, RESID,
15     $                   RQCORR, ZTZ
16*     ..
17*     .. Array Arguments ..
18      INTEGER            ISUPPZ( * )
19      REAL               D( * ), L( * ), LD( * ), LLD( * ),
20     $                  WORK( * )
21      REAL             Z( * )
22*
23*  Purpose
24*  =======
25*
26*  SLAR1VA computes the (scaled) r-th column of the inverse of
27*  the sumbmatrix in rows B1 through BN of the tridiagonal matrix
28*  L D L^T - sigma I. When sigma is close to an eigenvalue, the
29*  computed vector is an accurate eigenvector. Usually, r corresponds
30*  to the index where the eigenvector is largest in magnitude.
31*  The following steps accomplish this computation :
32*  (a) Stationary qd transform,  L D L^T - sigma I = L(+) D(+) L(+)^T,
33*  (b) Progressive qd transform, L D L^T - sigma I = U(-) D(-) U(-)^T,
34*  (c) Computation of the diagonal elements of the inverse of
35*      L D L^T - sigma I by combining the above transforms, and choosing
36*      r as the index where the diagonal of the inverse is (one of the)
37*      largest in magnitude.
38*  (d) Computation of the (scaled) r-th column of the inverse using the
39*      twisted factorization obtained by combining the top part of the
40*      the stationary and the bottom part of the progressive transform.
41*
42*  Arguments
43*  =========
44*
45*  N        (input) INTEGER
46*           The order of the matrix L D L^T.
47*
48*  B1       (input) INTEGER
49*           First index of the submatrix of L D L^T.
50*
51*  BN       (input) INTEGER
52*           Last index of the submatrix of L D L^T.
53*
54*  LAMBDA    (input) REAL
55*           The shift. In order to compute an accurate eigenvector,
56*           LAMBDA should be a good approximation to an eigenvalue
57*           of L D L^T.
58*
59*  L        (input) REAL             array, dimension (N-1)
60*           The (n-1) subdiagonal elements of the unit bidiagonal matrix
61*           L, in elements 1 to N-1.
62*
63*  D        (input) REAL             array, dimension (N)
64*           The n diagonal elements of the diagonal matrix D.
65*
66*  LD       (input) REAL             array, dimension (N-1)
67*           The n-1 elements L(i)*D(i).
68*
69*  LLD      (input) REAL             array, dimension (N-1)
70*           The n-1 elements L(i)*L(i)*D(i).
71*
72*  PIVMIN   (input) REAL
73*           The minimum pivot in the Sturm sequence.
74*
75*  GAPTOL   (input) REAL
76*           Tolerance that indicates when eigenvector entries are negligible
77*           w.r.t. their contribution to the residual.
78*
79*  Z        (input/output) REAL             array, dimension (N)
80*           On input, all entries of Z must be set to 0.
81*           On output, Z contains the (scaled) r-th column of the
82*           inverse. The scaling is such that Z(R) equals 1.
83*
84*  WANTNC   (input) LOGICAL
85*           Specifies whether NEGCNT has to be computed.
86*
87*  NEGCNT   (output) INTEGER
88*           If WANTNC is .TRUE. then NEGCNT = the number of pivots < pivmin
89*           in the  matrix factorization L D L^T, and NEGCNT = -1 otherwise.
90*
91*  ZTZ      (output) REAL
92*           The square of the 2-norm of Z.
93*
94*  MINGMA   (output) REAL
95*           The reciprocal of the largest (in magnitude) diagonal
96*           element of the inverse of L D L^T - sigma I.
97*
98*  R        (input/output) INTEGER
99*           The twist index for the twisted factorization used to
100*           compute Z.
101*           On input, 0 <= R <= N. If R is input as 0, R is set to
102*           the index where (L D L^T - sigma I)^{-1} is largest
103*           in magnitude. If 1 <= R <= N, R is unchanged.
104*           On output, R contains the twist index used to compute Z.
105*           Ideally, R designates the position of the maximum entry in the
106*           eigenvector.
107*
108*  ISUPPZ   (output) INTEGER array, dimension (2)
109*           The support of the vector in Z, i.e., the vector Z is
110*           nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ).
111*
112*  NRMINV   (output) REAL
113*           NRMINV = 1/SQRT( ZTZ )
114*
115*  RESID    (output) REAL
116*           The residual of the FP vector.
117*           RESID = ABS( MINGMA )/SQRT( ZTZ )
118*
119*  RQCORR   (output) REAL
120*           The Rayleigh Quotient correction to LAMBDA.
121*           RQCORR = MINGMA*TMP
122*
123*  WORK     (workspace) REAL             array, dimension (4*N)
124*
125*  Further Details
126*  ===============
127*
128*  Based on contributions by
129*     Beresford Parlett, University of California, Berkeley, USA
130*     Jim Demmel, University of California, Berkeley, USA
131*     Inderjit Dhillon, University of Texas, Austin, USA
132*     Osni Marques, LBNL/NERSC, USA
133*     Christof Voemel, University of California, Berkeley, USA
134*
135*  =====================================================================
136*
137*     .. Parameters ..
138      INTEGER            BLKLEN
139      PARAMETER          ( BLKLEN = 16 )
140       REAL               ZERO, ONE
141      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
142
143*     ..
144*     .. Local Scalars ..
145      LOGICAL            SAWNAN1, SAWNAN2
146      INTEGER            BI, I, INDLPL, INDP, INDS, INDUMN, NB, NEG1,
147     $                   NEG2, NX, R1, R2, TO
148      REAL                        ABSZCUR, ABSZPREV, DMINUS, DPLUS, EPS,
149     $                            S, TMP, ZPREV
150*     ..
151*     .. External Functions ..
152      LOGICAL SISNAN
153      REAL               SLAMCH
154      EXTERNAL           SISNAN, SLAMCH
155*     ..
156*     .. Intrinsic Functions ..
157      INTRINSIC          ABS, MAX, MIN, REAL
158*     ..
159*     .. Executable Statements ..
160*
161      EPS = SLAMCH( 'Precision' )
162
163
164      IF( R.EQ.0 ) THEN
165         R1 = B1
166         R2 = BN
167      ELSE
168         R1 = R
169         R2 = R
170      END IF
171
172*     Storage for LPLUS
173      INDLPL = 0
174*     Storage for UMINUS
175      INDUMN = N
176      INDS = 2*N + 1
177      INDP = 3*N + 1
178
179      IF( B1.EQ.1 ) THEN
180         WORK( INDS ) = ZERO
181      ELSE
182         WORK( INDS+B1-1 ) = LLD( B1-1 )
183      END IF
184
185*
186*     Compute the stationary transform (using the differential form)
187*     until the index R2.
188*
189      SAWNAN1 = .FALSE.
190      NEG1 = 0
191      S = WORK( INDS+B1-1 ) - LAMBDA
192      DO 50 I = B1, R1 - 1
193         DPLUS = D( I ) + S
194         WORK( INDLPL+I ) = LD( I ) / DPLUS
195         IF(DPLUS.LT.ZERO) NEG1 = NEG1 + 1
196         WORK( INDS+I ) = S*WORK( INDLPL+I )*L( I )
197         S = WORK( INDS+I ) - LAMBDA
198 50   CONTINUE
199      SAWNAN1 = SISNAN( S )
200      IF( SAWNAN1 ) GOTO 60
201      DO 51 I = R1, R2 - 1
202         DPLUS = D( I ) + S
203         WORK( INDLPL+I ) = LD( I ) / DPLUS
204         WORK( INDS+I ) = S*WORK( INDLPL+I )*L( I )
205         S = WORK( INDS+I ) - LAMBDA
206 51   CONTINUE
207      SAWNAN1 = SISNAN( S )
208*
209 60   CONTINUE
210      IF( SAWNAN1 ) THEN
211*        Runs a slower version of the above loop if a NaN is detected
212         NEG1 = 0
213         S = WORK( INDS+B1-1 ) - LAMBDA
214         DO 70 I = B1, R1 - 1
215            DPLUS = D( I ) + S
216            IF(ABS(DPLUS).LT.PIVMIN) DPLUS = -PIVMIN
217            WORK( INDLPL+I ) = LD( I ) / DPLUS
218            IF(DPLUS.LT.ZERO) NEG1 = NEG1 + 1
219            WORK( INDS+I ) = S*WORK( INDLPL+I )*L( I )
220            IF( WORK( INDLPL+I ).EQ.ZERO )
221     $                      WORK( INDS+I ) = LLD( I )
222            S = WORK( INDS+I ) - LAMBDA
223 70      CONTINUE
224         DO 71 I = R1, R2 - 1
225            DPLUS = D( I ) + S
226            IF(ABS(DPLUS).LT.PIVMIN) DPLUS = -PIVMIN
227            WORK( INDLPL+I ) = LD( I ) / DPLUS
228            WORK( INDS+I ) = S*WORK( INDLPL+I )*L( I )
229            IF( WORK( INDLPL+I ).EQ.ZERO )
230     $                      WORK( INDS+I ) = LLD( I )
231            S = WORK( INDS+I ) - LAMBDA
232 71      CONTINUE
233      END IF
234*
235*     Compute the progressive transform (using the differential form)
236*     until the index R1
237*
238      SAWNAN2 = .FALSE.
239      NEG2 = 0
240      WORK( INDP+BN-1 ) = D( BN ) - LAMBDA
241      DO 80 I = BN - 1, R1, -1
242         DMINUS = LLD( I ) + WORK( INDP+I )
243         TMP = D( I ) / DMINUS
244         IF(DMINUS.LT.ZERO) NEG2 = NEG2 + 1
245         WORK( INDUMN+I ) = L( I )*TMP
246         WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - LAMBDA
247 80   CONTINUE
248      TMP = WORK( INDP+R1-1 )
249      SAWNAN2 = SISNAN( TMP )
250      IF( SAWNAN2 ) THEN
251*        Runs a slower version of the above loop if a NaN is detected
252         NEG2 = 0
253         DO 100 I = BN-1, R1, -1
254            DMINUS = LLD( I ) + WORK( INDP+I )
255            IF(ABS(DMINUS).LT.PIVMIN) DMINUS = -PIVMIN
256            TMP = D( I ) / DMINUS
257            IF(DMINUS.LT.ZERO) NEG2 = NEG2 + 1
258            WORK( INDUMN+I ) = L( I )*TMP
259            WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - LAMBDA
260            IF( TMP.EQ.ZERO )
261     $          WORK( INDP+I-1 ) = D( I ) - LAMBDA
262 100     CONTINUE
263      END IF
264*
265*     Find the index (from R1 to R2) of the largest (in magnitude)
266*     diagonal element of the inverse
267*
268      MINGMA = WORK( INDS+R1-1 ) + WORK( INDP+R1-1 )
269      IF( MINGMA.LT.ZERO ) NEG1 = NEG1 + 1
270      IF( WANTNC ) THEN
271         NEGCNT = NEG1 + NEG2
272      ELSE
273         NEGCNT = -1
274      ENDIF
275      IF( ABS(MINGMA).EQ.ZERO )
276     $   MINGMA = EPS*WORK( INDS+R1-1 )
277      R = R1
278      DO 110 I = R1, R2 - 1
279         TMP = WORK( INDS+I ) + WORK( INDP+I )
280         IF( TMP.EQ.ZERO )
281     $      TMP = EPS*WORK( INDS+I )
282         IF( ABS( TMP ).LE.ABS( MINGMA ) ) THEN
283            MINGMA = TMP
284            R = I + 1
285         END IF
286 110  CONTINUE
287*
288*     Compute the FP vector: solve N^T v = e_r
289*
290      ISUPPZ( 1 ) = B1
291      ISUPPZ( 2 ) = BN
292      Z( R ) = ONE
293      ZTZ = ONE
294*
295*     Compute the FP vector upwards from R
296*
297      NB = INT((R-B1)/BLKLEN)
298      NX = R-NB*BLKLEN
299      IF( .NOT.SAWNAN1 ) THEN
300         DO 210 BI = R-1, NX, -BLKLEN
301            TO = BI-BLKLEN+1
302            DO 205 I = BI, TO, -1
303               Z( I ) = -( WORK(INDLPL+I)*Z(I+1) )
304               ZTZ = ZTZ + Z( I )*Z( I )
305 205        CONTINUE
306            IF( ABS(Z(TO)).LT.EPS .AND.
307     $        ABS(Z(TO+1)).LT.EPS ) THEN
308               ISUPPZ(1) = TO
309               GOTO 220
310	    ENDIF
311 210     CONTINUE
312         DO 215 I = NX-1, B1, -1
313            Z( I ) = -( WORK(INDLPL+I)*Z(I+1) )
314            ZTZ = ZTZ + Z( I )*Z( I )
315 215     CONTINUE
316 220     CONTINUE
317      ELSE
318*        Run slower loop if NaN occurred.
319         DO 230 BI = R-1, NX, -BLKLEN
320            TO = BI-BLKLEN+1
321            DO 225 I = BI, TO, -1
322               IF( Z( I+1 ).EQ.ZERO ) THEN
323                  Z( I ) = -( LD( I+1 ) / LD( I ) )*Z( I+2 )
324               ELSE
325                  Z( I ) = -( WORK( INDLPL+I )*Z( I+1 ) )
326               END IF
327               ZTZ = ZTZ + Z( I )*Z( I )
328 225        CONTINUE
329            IF( ABS(Z(TO)).LT.EPS .AND.
330     $        ABS(Z(TO+1)).LT.EPS ) THEN
331               ISUPPZ(1) = TO
332               GOTO 240
333	    ENDIF
334 230     CONTINUE
335         DO 235 I = NX-1, B1, -1
336            IF( Z( I+1 ).EQ.ZERO ) THEN
337               Z( I ) = -( LD( I+1 ) / LD( I ) )*Z( I+2 )
338            ELSE
339               Z( I ) = -( WORK( INDLPL+I )*Z( I+1 ) )
340            END IF
341            ZTZ = ZTZ + Z( I )*Z( I )
342 235     CONTINUE
343 240     CONTINUE
344      ENDIF
345      DO 245 I= B1, (ISUPPZ(1)-1)
346         Z(I) = ZERO
347 245  CONTINUE
348
349*     Compute the FP vector downwards from R in blocks of size BLKLEN
350      IF( .NOT.SAWNAN2 ) THEN
351         DO 260 BI = R+1, BN, BLKLEN
352            TO = BI+BLKLEN-1
353            IF ( TO.LE.BN ) THEN
354               DO 250 I = BI, TO
355                  Z(I) = -(WORK(INDUMN+I-1)*Z(I-1))
356                  ZTZ = ZTZ + Z( I )*Z( I )
357 250           CONTINUE
358               IF( ABS(Z(TO)).LE.EPS .AND.
359     $             ABS(Z(TO-1)).LE.EPS ) THEN
360                  ISUPPZ(2) = TO
361                  GOTO 265
362	       ENDIF
363            ELSE
364               DO 255 I = BI, BN
365                  Z(I) = -(WORK(INDUMN+I-1)*Z(I-1))
366                  ZTZ = ZTZ + Z( I )*Z( I )
367 255           CONTINUE
368            ENDIF
369 260     CONTINUE
370 265     CONTINUE
371      ELSE
372*        Run slower loop if NaN occurred.
373         DO 280 BI = R+1, BN, BLKLEN
374            TO = BI+BLKLEN-1
375            IF ( TO.LE.BN ) THEN
376               DO 270 I = BI, TO
377                  ZPREV = Z(I-1)
378                  ABSZPREV = ABS(ZPREV)
379                  IF( ZPREV.NE.ZERO ) THEN
380                     Z(I)= -(WORK(INDUMN+I-1)*ZPREV)
381                  ELSE
382                     Z(I)= -(LD(I-2)/LD(I-1))*Z(I-2)
383                  END IF
384                  ABSZCUR = ABS(Z(I))
385                  ZTZ = ZTZ + ABSZCUR**2
386 270           CONTINUE
387               IF( ABSZCUR.LT.EPS .AND.
388     $             ABSZPREV.LT.EPS ) THEN
389                  ISUPPZ(2) = I
390                  GOTO 285
391	       ENDIF
392            ELSE
393               DO 275 I = BI, BN
394                  ZPREV = Z(I-1)
395                  ABSZPREV = ABS(ZPREV)
396                  IF( ZPREV.NE.ZERO ) THEN
397                     Z(I)= -(WORK(INDUMN+I-1)*ZPREV)
398                  ELSE
399                     Z(I)= -(LD(I-2)/LD(I-1))*Z(I-2)
400                  END IF
401                  ABSZCUR = ABS(Z(I))
402                  ZTZ = ZTZ + ABSZCUR**2
403 275           CONTINUE
404            ENDIF
405 280     CONTINUE
406 285     CONTINUE
407      END IF
408      DO 290 I= ISUPPZ(2)+1,BN
409         Z(I) = ZERO
410 290  CONTINUE
411*
412*     Compute quantities for convergence test
413*
414      TMP = ONE / ZTZ
415      NRMINV = SQRT( TMP )
416      RESID = ABS( MINGMA )*NRMINV
417      RQCORR = MINGMA*TMP
418*
419      RETURN
420*
421*     End of SLAR1VA
422*
423      END
424