1      SUBROUTINE DSTEGR2B( JOBZ, N, D, E,
2     $                   M, W, Z, LDZ, NZC, ISUPPZ, WORK, LWORK, IWORK,
3     $                   LIWORK, DOL, DOU, NEEDIL, NEEDIU,
4     $                   INDWLC, PIVMIN, SCALE, WL, WU,
5     $                   VSTART, FINISH, MAXCLS,
6     $                   NDEPTH, PARITY, ZOFFSET, INFO )
7*
8*  -- ScaLAPACK auxiliary routine (version 2.0) --
9*     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
10*     July 4, 2010
11*
12      IMPLICIT NONE
13*
14*     .. Scalar Arguments ..
15      CHARACTER          JOBZ
16      INTEGER            DOL, DOU, INDWLC, INFO, LDZ, LIWORK, LWORK, M,
17     $                   MAXCLS, N, NDEPTH, NEEDIL, NEEDIU, NZC, PARITY,
18     $                   ZOFFSET
19
20      DOUBLE PRECISION PIVMIN, SCALE, WL, WU
21      LOGICAL VSTART, FINISH
22
23*     ..
24*     .. Array Arguments ..
25      INTEGER            ISUPPZ( * ), IWORK( * )
26      DOUBLE PRECISION   D( * ), E( * ), W( * ), WORK( * )
27      DOUBLE PRECISION   Z( LDZ, * )
28*     ..
29*
30*  Purpose
31*  =======
32*
33*  DSTEGR2B should only be called after a call to DSTEGR2A.
34*  From eigenvalues and initial representations computed by DSTEGR2A,
35*  DSTEGR2B computes the selected eigenvalues and eigenvectors
36*  of the real symmetric tridiagonal matrix in parallel
37*  on multiple processors. It is potentially invoked multiple times
38*  on a given processor because the locally relevant representation tree
39*  might depend on spectral information that is "owned" by other processors
40*  and might need to be communicated.
41*
42*  Please note:
43*  1. The calling sequence has two additional INTEGER parameters,
44*     DOL and DOU, that should satisfy M>=DOU>=DOL>=1.
45*     These parameters are only relevant for the case JOBZ = 'V'.
46*     DSTEGR2B  ONLY computes the eigenVECTORS
47*     corresponding to eigenvalues DOL through DOU in W. (That is,
48*     instead of computing the eigenvectors belonging to W(1)
49*     through W(M), only the eigenvectors belonging to eigenvalues
50*     W(DOL) through W(DOU) are computed. In this case, only the
51*     eigenvalues DOL:DOU are guaranteed to be accurately refined
52*     to all figures by Rayleigh-Quotient iteration.
53*
54*  2. The additional arguments VSTART, FINISH, NDEPTH, PARITY, ZOFFSET
55*     are included as a thread-safe implementation equivalent to SAVE variables.
56*     These variables store details about the local representation tree which is
57*     computed layerwise. For scalability reasons, eigenvalues belonging to the
58*     locally relevant representation tree might be computed on other processors.
59*     These need to be communicated before the inspection of the RRRs can proceed
60*     on any given layer.
61*     Note that only when the variable FINISH is true, the computation has ended
62*     All eigenpairs between DOL and DOU have been computed. M is set = DOU - DOL + 1.
63*
64*  3. DSTEGR2B needs more workspace in Z than the sequential DSTEGR.
65*     It is used to store the conformal embedding of the local representation tree.
66*
67*  Arguments
68*  =========
69*
70*  JOBZ    (input) CHARACTER*1
71*          = 'N':  Compute eigenvalues only;
72*          = 'V':  Compute eigenvalues and eigenvectors.
73*
74*  N       (input) INTEGER
75*          The order of the matrix.  N >= 0.
76*
77*  D       (input/output) DOUBLE PRECISION array, dimension (N)
78*          On entry, the N diagonal elements of the tridiagonal matrix
79*          T. On exit, D is overwritten.
80*
81*  E       (input/output) DOUBLE PRECISION array, dimension (N)
82*          On entry, the (N-1) subdiagonal elements of the tridiagonal
83*          matrix T in elements 1 to N-1 of E. E(N) need not be set on
84*          input, but is used internally as workspace.
85*          On exit, E is overwritten.
86*
87*  M       (input) INTEGER
88*          The total number of eigenvalues found
89*          in DSTEGR2A.  0 <= M <= N.
90*
91*  W       (input) DOUBLE PRECISION array, dimension (N)
92*          The first M elements contain approximations to the selected
93*          eigenvalues in ascending order. Note that only the eigenvalues from
94*          the locally relevant part of the representation tree, that is
95*          all the clusters that include eigenvalues from DOL:DOU, are reliable
96*          on this processor. (It does not need to know about any others anyway.)
97*
98*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) )
99*          If JOBZ = 'V', and if INFO = 0, then
100*          a subset of the first M columns of Z
101*          contain the orthonormal eigenvectors of the matrix T
102*          corresponding to the selected eigenvalues, with the i-th
103*          column of Z holding the eigenvector associated with W(i).
104*          See DOL, DOU for more information.
105*
106*  LDZ     (input) INTEGER
107*          The leading dimension of the array Z.  LDZ >= 1, and if
108*          JOBZ = 'V', then LDZ >= max(1,N).
109*
110*  NZC     (input) INTEGER
111*          The number of eigenvectors to be held in the array Z.
112*
113*  ISUPPZ  (output) INTEGER ARRAY, dimension ( 2*max(1,M) )
114*          The support of the eigenvectors in Z, i.e., the indices
115*          indicating the nonzero elements in Z. The i-th computed eigenvector
116*          is nonzero only in elements ISUPPZ( 2*i-1 ) through
117*          ISUPPZ( 2*i ). This is relevant in the case when the matrix
118*          is split. ISUPPZ is only set if N>2.
119*
120*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
121*          On exit, if INFO = 0, WORK(1) returns the optimal
122*          (and minimal) LWORK.
123*
124*  LWORK   (input) INTEGER
125*          The dimension of the array WORK. LWORK >= max(1,18*N)
126*          if JOBZ = 'V', and LWORK >= max(1,12*N) if JOBZ = 'N'.
127*          If LWORK = -1, then a workspace query is assumed; the routine
128*          only calculates the optimal size of the WORK array, returns
129*          this value as the first entry of the WORK array, and no error
130*          message related to LWORK is issued.
131*
132*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
133*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
134*
135*  LIWORK  (input) INTEGER
136*          The dimension of the array IWORK.  LIWORK >= max(1,10*N)
137*          if the eigenvectors are desired, and LIWORK >= max(1,8*N)
138*          if only the eigenvalues are to be computed.
139*          If LIWORK = -1, then a workspace query is assumed; the
140*          routine only calculates the optimal size of the IWORK array,
141*          returns this value as the first entry of the IWORK array, and
142*          no error message related to LIWORK is issued.
143*
144*  DOL     (input) INTEGER
145*  DOU     (input) INTEGER
146*          From the eigenvalues W(1:M), only eigenvectors
147*          Z(:,DOL) to Z(:,DOU) are computed.
148*          If DOL > 1, then Z(:,DOL-1-ZOFFSET) is used and overwritten.
149*          If DOU < M, then Z(:,DOU+1-ZOFFSET) is used and overwritten.
150*
151*  NEEDIL  (input/output) INTEGER
152*  NEEDIU  (input/output) INTEGER
153*          Describes which are the left and right outermost eigenvalues
154*          still to be computed. Initially computed by DLARRE2A,
155*          modified in the course of the algorithm.
156*
157*  INDWLC  (output) DOUBLE PRECISION
158*          Pointer into the workspace, location where the local
159*          eigenvalue representations are stored. ("Local eigenvalues"
160*          are those relative to the individual shifts of the RRRs.)
161*
162*  PIVMIN  (input) DOUBLE PRECISION
163*          The minimum pivot in the sturm sequence for T.
164*
165*  SCALE   (input) DOUBLE PRECISION
166*          The scaling factor for T. Used for unscaling the eigenvalues
167*          at the very end of the algorithm.
168*
169*  WL      (input) DOUBLE PRECISION
170*  WU      (input) DOUBLE PRECISION
171*          The interval (WL, WU] contains all the wanted eigenvalues.
172*
173*  VSTART  (input/output) LOGICAL
174*          .TRUE. on initialization, set to .FALSE. afterwards.
175*
176*  FINISH  (input/output) LOGICAL
177*          indicates whether all eigenpairs have been computed
178*
179*  MAXCLS  (input/output) INTEGER
180*          The largest cluster worked on by this processor in the
181*          representation tree.
182*
183*  NDEPTH  (input/output) INTEGER
184*          The current depth of the representation tree. Set to
185*          zero on initial pass, changed when the deeper levels of
186*          the representation tree are generated.
187*
188*  PARITY  (input/output) INTEGER
189*          An internal parameter needed for the storage of the
190*          clusters on the current level of the representation tree.
191*
192*  ZOFFSET (input) INTEGER
193*          Offset for storing the eigenpairs when Z is distributed
194*          in 1D-cyclic fashion
195*
196*  INFO    (output) INTEGER
197*          On exit, INFO
198*          = 0:  successful exit
199*          other:if INFO = -i, the i-th argument had an illegal value
200*                if INFO = 20X, internal error in DLARRV2.
201*                Here, the digit X = ABS( IINFO ) < 10, where IINFO is
202*                the nonzero error code returned by DLARRV2.
203*
204*     .. Parameters ..
205      DOUBLE PRECISION   ONE, FOUR, MINRGP
206      PARAMETER          ( ONE = 1.0D0,
207     $                     FOUR = 4.0D0,
208     $                     MINRGP = 1.0D-3 )
209*     ..
210*     .. Local Scalars ..
211      LOGICAL            LQUERY, WANTZ, ZQUERY
212      INTEGER            IINDBL, IINDW, IINDWK, IINFO, IINSPL, INDERR,
213     $                   INDGP, INDGRS, INDSDM, INDWRK, ITMP, J, LIWMIN,
214     $                   LWMIN
215      DOUBLE PRECISION   EPS, RTOL1, RTOL2
216*     ..
217*     .. External Functions ..
218      LOGICAL            LSAME
219      DOUBLE PRECISION   DLAMCH, DLANST
220      EXTERNAL           LSAME, DLAMCH, DLANST
221*     ..
222*     .. External Subroutines ..
223      EXTERNAL           DLARRV2, DSCAL
224*     ..
225*     .. Intrinsic Functions ..
226      INTRINSIC          DBLE, MAX, MIN, SQRT
227*     ..
228*     .. Executable Statements ..
229*
230*     Test the input parameters.
231*
232      WANTZ = LSAME( JOBZ, 'V' )
233*
234      LQUERY = ( ( LWORK.EQ.-1 ).OR.( LIWORK.EQ.-1 ) )
235      ZQUERY = ( NZC.EQ.-1 )
236
237*     DSTEGR2B needs WORK of size 6*N, IWORK of size 3*N.
238*     In addition, DLARRE2A needed WORK of size 6*N, IWORK of size 5*N.
239*     Workspace is kept consistent even though DLARRE2A is not called here.
240*     Furthermore, DLARRV2 needs WORK of size 12*N, IWORK of size 7*N.
241      IF( WANTZ ) THEN
242         LWMIN = 18*N
243         LIWMIN = 10*N
244      ELSE
245*        need less workspace if only the eigenvalues are wanted
246         LWMIN = 12*N
247         LIWMIN = 8*N
248      ENDIF
249*
250      INFO = 0
251*
252*     Get machine constants.
253*
254      EPS = DLAMCH( 'Precision' )
255*
256      IF( (N.EQ.0).OR.(N.EQ.1) ) THEN
257         FINISH = .TRUE.
258         RETURN
259      ENDIF
260
261      IF(ZQUERY.OR.LQUERY)
262     $   RETURN
263*
264      INDGRS = 1
265      INDERR = 2*N + 1
266      INDGP = 3*N + 1
267      INDSDM = 4*N + 1
268      INDWRK = 6*N + 1
269      INDWLC = INDWRK
270*
271      IINSPL = 1
272      IINDBL = N + 1
273      IINDW = 2*N + 1
274      IINDWK = 3*N + 1
275
276*     Set the tolerance parameters for bisection
277      RTOL1 = FOUR*SQRT(EPS)
278      RTOL2 = MAX( SQRT(EPS)*5.0D-3, FOUR * EPS )
279
280
281      IF( WANTZ ) THEN
282*
283*        Compute the desired eigenvectors corresponding to the computed
284*        eigenvalues
285*
286         CALL DLARRV2( N, WL, WU, D, E,
287     $                PIVMIN, IWORK( IINSPL ), M,
288     $                DOL, DOU, NEEDIL, NEEDIU, MINRGP, RTOL1, RTOL2,
289     $                W, WORK( INDERR ), WORK( INDGP ), IWORK( IINDBL ),
290     $                IWORK( IINDW ), WORK( INDGRS ),
291     $                WORK( INDSDM ), Z, LDZ,
292     $                ISUPPZ, WORK( INDWRK ), IWORK( IINDWK ),
293     $                VSTART, FINISH,
294     $                MAXCLS, NDEPTH, PARITY, ZOFFSET, IINFO )
295         IF( IINFO.NE.0 ) THEN
296            INFO = 200 + ABS( IINFO )
297            RETURN
298         END IF
299*
300      ELSE
301*        DLARRE2A computed eigenvalues of the (shifted) root representation
302*        DLARRV2 returns the eigenvalues of the unshifted matrix.
303*        However, if the eigenvectors are not desired by the user, we need
304*        to apply the corresponding shifts from DLARRE2A to obtain the
305*        eigenvalues of the original matrix.
306         DO 30 J = 1, M
307            ITMP = IWORK( IINDBL+J-1 )
308            W( J ) = W( J ) + E( IWORK( IINSPL+ITMP-1 ) )
309 30      CONTINUE
310*
311         FINISH = .TRUE.
312*
313      END IF
314*
315
316      IF(FINISH) THEN
317*        All eigenpairs have been computed
318
319*
320*        If matrix was scaled, then rescale eigenvalues appropriately.
321*
322         IF( SCALE.NE.ONE ) THEN
323            CALL DSCAL( M, ONE / SCALE, W, 1 )
324         END IF
325*
326*        Correct M if needed
327*
328         IF ( WANTZ ) THEN
329            IF( DOL.NE.1 .OR. DOU.NE.M ) THEN
330               M = DOU - DOL +1
331            ENDIF
332         ENDIF
333*
334*        No sorting of eigenpairs is done here, done later in the
335*        calling subroutine
336*
337         WORK( 1 ) = LWMIN
338         IWORK( 1 ) = LIWMIN
339      ENDIF
340
341      RETURN
342*
343*     End of DSTEGR2B
344*
345      END
346