1 /* Ergo, version 3.8, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30  /* This file belongs to the template_lapack part of the Ergo source
31   * code. The source files in the template_lapack directory are modified
32   * versions of files originally distributed as CLAPACK, see the
33   * Copyright/license notice in the file template_lapack/COPYING.
34   */
35 
36 
37 #ifndef TEMPLATE_LAPACK_LARRB_HEADER
38 #define TEMPLATE_LAPACK_LARRB_HEADER
39 
40 
41 #include "template_lapack_laneg.h"
42 
43 
44 template<class Treal>
template_lapack_larrb(integer * n,Treal * d__,Treal * lld,integer * ifirst,integer * ilast,Treal * rtol1,Treal * rtol2,integer * offset,Treal * w,Treal * wgap,Treal * werr,Treal * work,integer * iwork,Treal * pivmin,Treal * spdiam,integer * twist,integer * info)45 int template_lapack_larrb(integer *n, Treal *d__, Treal *lld,
46 	integer *ifirst, integer *ilast, Treal *rtol1, Treal *rtol2,
47 	 integer *offset, Treal *w, Treal *wgap, Treal *werr,
48 	Treal *work, integer *iwork, Treal *pivmin, Treal *
49 	spdiam, integer *twist, integer *info)
50 {
51     /* System generated locals */
52     integer i__1;
53     Treal d__1, d__2;
54 
55     /* Local variables */
56     integer i__, k, r__, i1, ii, ip;
57     Treal gap, mid, tmp, back, lgap, rgap, left;
58     integer iter, nint, prev, next;
59     Treal cvrgd, right, width;
60     integer negcnt;
61     Treal mnwdth;
62     integer olnint, maxitr;
63 
64 
65 /*  -- LAPACK auxiliary routine (version 3.2) -- */
66 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
67 /*     November 2006 */
68 
69 /*     .. Scalar Arguments .. */
70 /*     .. */
71 /*     .. Array Arguments .. */
72 /*     .. */
73 
74 /*  Purpose */
75 /*  ======= */
76 
77 /*  Given the relatively robust representation(RRR) L D L^T, DLARRB */
78 /*  does "limited" bisection to refine the eigenvalues of L D L^T, */
79 /*  W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial */
80 /*  guesses for these eigenvalues are input in W, the corresponding estimate */
81 /*  of the error in these guesses and their gaps are input in WERR */
82 /*  and WGAP, respectively. During bisection, intervals */
83 /*  [left, right] are maintained by storing their mid-points and */
84 /*  semi-widths in the arrays W and WERR respectively. */
85 
86 /*  Arguments */
87 /*  ========= */
88 
89 /*  N       (input) INTEGER */
90 /*          The order of the matrix. */
91 
92 /*  D       (input) DOUBLE PRECISION array, dimension (N) */
93 /*          The N diagonal elements of the diagonal matrix D. */
94 
95 /*  LLD     (input) DOUBLE PRECISION array, dimension (N-1) */
96 /*          The (N-1) elements L(i)*L(i)*D(i). */
97 
98 /*  IFIRST  (input) INTEGER */
99 /*          The index of the first eigenvalue to be computed. */
100 
101 /*  ILAST   (input) INTEGER */
102 /*          The index of the last eigenvalue to be computed. */
103 
104 /*  RTOL1   (input) DOUBLE PRECISION */
105 /*  RTOL2   (input) DOUBLE PRECISION */
106 /*          Tolerance for the convergence of the bisection intervals. */
107 /*          An interval [LEFT,RIGHT] has converged if */
108 /*          RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */
109 /*          where GAP is the (estimated) distance to the nearest */
110 /*          eigenvalue. */
111 
112 /*  OFFSET  (input) INTEGER */
113 /*          Offset for the arrays W, WGAP and WERR, i.e., the IFIRST-OFFSET */
114 /*          through ILAST-OFFSET elements of these arrays are to be used. */
115 
116 /*  W       (input/output) DOUBLE PRECISION array, dimension (N) */
117 /*          On input, W( IFIRST-OFFSET ) through W( ILAST-OFFSET ) are */
118 /*          estimates of the eigenvalues of L D L^T indexed IFIRST throug */
119 /*          ILAST. */
120 /*          On output, these estimates are refined. */
121 
122 /*  WGAP    (input/output) DOUBLE PRECISION array, dimension (N-1) */
123 /*          On input, the (estimated) gaps between consecutive */
124 /*          eigenvalues of L D L^T, i.e., WGAP(I-OFFSET) is the gap between */
125 /*          eigenvalues I and I+1. Note that if IFIRST.EQ.ILAST */
126 /*          then WGAP(IFIRST-OFFSET) must be set to ZERO. */
127 /*          On output, these gaps are refined. */
128 
129 /*  WERR    (input/output) DOUBLE PRECISION array, dimension (N) */
130 /*          On input, WERR( IFIRST-OFFSET ) through WERR( ILAST-OFFSET ) are */
131 /*          the errors in the estimates of the corresponding elements in W. */
132 /*          On output, these errors are refined. */
133 
134 /*  WORK    (workspace) DOUBLE PRECISION array, dimension (2*N) */
135 /*          Workspace. */
136 
137 /*  IWORK   (workspace) INTEGER array, dimension (2*N) */
138 /*          Workspace. */
139 
140 /*  PIVMIN  (input) DOUBLE PRECISION */
141 /*          The minimum pivot in the Sturm sequence. */
142 
143 /*  SPDIAM  (input) DOUBLE PRECISION */
144 /*          The spectral diameter of the matrix. */
145 
146 /*  TWIST   (input) INTEGER */
147 /*          The twist index for the twisted factorization that is used */
148 /*          for the negcount. */
149 /*          TWIST = N: Compute negcount from L D L^T - LAMBDA I = L+ D+ L+^T */
150 /*          TWIST = 1: Compute negcount from L D L^T - LAMBDA I = U- D- U-^T */
151 /*          TWIST = R: Compute negcount from L D L^T - LAMBDA I = N(r) D(r) N(r) */
152 
153 /*  INFO    (output) INTEGER */
154 /*          Error flag. */
155 
156 /*  Further Details */
157 /*  =============== */
158 
159 /*  Based on contributions by */
160 /*     Beresford Parlett, University of California, Berkeley, USA */
161 /*     Jim Demmel, University of California, Berkeley, USA */
162 /*     Inderjit Dhillon, University of Texas, Austin, USA */
163 /*     Osni Marques, LBNL/NERSC, USA */
164 /*     Christof Voemel, University of California, Berkeley, USA */
165 
166 /*  ===================================================================== */
167 
168 /*     .. Parameters .. */
169 /*     .. */
170 /*     .. Local Scalars .. */
171 /*     .. */
172 /*     .. External Functions .. */
173 
174 /*     .. */
175 /*     .. Intrinsic Functions .. */
176 /*     .. */
177 /*     .. Executable Statements .. */
178 
179     /* Parameter adjustments */
180     --iwork;
181     --work;
182     --werr;
183     --wgap;
184     --w;
185     --lld;
186     --d__;
187 
188     /* Function Body */
189     *info = 0;
190 
191     maxitr = (integer) ((template_blas_log(*spdiam + *pivmin) - template_blas_log(*pivmin)) / template_blas_log(2.)) +
192 	    2;
193     mnwdth = *pivmin * 2.;
194 
195     r__ = *twist;
196     if (r__ < 1 || r__ > *n) {
197 	r__ = *n;
198     }
199 
200 /*     Initialize unconverged intervals in [ WORK(2*I-1), WORK(2*I) ]. */
201 /*     The Sturm Count, Count( WORK(2*I-1) ) is arranged to be I-1, while */
202 /*     Count( WORK(2*I) ) is stored in IWORK( 2*I ). The integer IWORK( 2*I-1 ) */
203 /*     for an unconverged interval is set to the index of the next unconverged */
204 /*     interval, and is -1 or 0 for a converged interval. Thus a linked */
205 /*     list of unconverged intervals is set up. */
206 
207     i1 = *ifirst;
208 /*     The number of unconverged intervals */
209     nint = 0;
210 /*     The last unconverged interval found */
211     prev = 0;
212     rgap = wgap[i1 - *offset];
213     i__1 = *ilast;
214     for (i__ = i1; i__ <= i__1; ++i__) {
215 	k = i__ << 1;
216 	ii = i__ - *offset;
217 	left = w[ii] - werr[ii];
218 	right = w[ii] + werr[ii];
219 	lgap = rgap;
220 	rgap = wgap[ii];
221 	gap = minMACRO(lgap,rgap);
222 /*        Make sure that [LEFT,RIGHT] contains the desired eigenvalue */
223 /*        Compute negcount from dstqds facto L+D+L+^T = L D L^T - LEFT */
224 
225 /*        Do while( NEGCNT(LEFT).GT.I-1 ) */
226 
227 	back = werr[ii];
228 L20:
229 	negcnt = template_lapack_laneg(n, &d__[1], &lld[1], &left, pivmin, &r__);
230 	if (negcnt > i__ - 1) {
231 	    left -= back;
232 	    back *= 2.;
233 	    goto L20;
234 	}
235 
236 /*        Do while( NEGCNT(RIGHT).LT.I ) */
237 /*        Compute negcount from dstqds facto L+D+L+^T = L D L^T - RIGHT */
238 
239 	back = werr[ii];
240 L50:
241 	negcnt = template_lapack_laneg(n, &d__[1], &lld[1], &right, pivmin, &r__);
242 	if (negcnt < i__) {
243 	    right += back;
244 	    back *= 2.;
245 	    goto L50;
246 	}
247 	width = (d__1 = left - right, absMACRO(d__1)) * .5;
248 /* Computing MAX */
249 	d__1 = absMACRO(left), d__2 = absMACRO(right);
250 	tmp = maxMACRO(d__1,d__2);
251 /* Computing MAX */
252 	d__1 = *rtol1 * gap, d__2 = *rtol2 * tmp;
253 	cvrgd = maxMACRO(d__1,d__2);
254 	if (width <= cvrgd || width <= mnwdth) {
255 /*           This interval has already converged and does not need refinement. */
256 /*           (Note that the gaps might change through refining the */
257 /*            eigenvalues, however, they can only get bigger.) */
258 /*           Remove it from the list. */
259 	    iwork[k - 1] = -1;
260 /*           Make sure that I1 always points to the first unconverged interval */
261 	    if (i__ == i1 && i__ < *ilast) {
262 		i1 = i__ + 1;
263 	    }
264 	    if (prev >= i1 && i__ <= *ilast) {
265 		iwork[(prev << 1) - 1] = i__ + 1;
266 	    }
267 	} else {
268 /*           unconverged interval found */
269 	    prev = i__;
270 	    ++nint;
271 	    iwork[k - 1] = i__ + 1;
272 	    iwork[k] = negcnt;
273 	}
274 	work[k - 1] = left;
275 	work[k] = right;
276 /* L75: */
277     }
278 
279 /*     Do while( NINT.GT.0 ), i.e. there are still unconverged intervals */
280 /*     and while (ITER.LT.MAXITR) */
281 
282     iter = 0;
283 L80:
284     prev = i1 - 1;
285     i__ = i1;
286     olnint = nint;
287     i__1 = olnint;
288     for (ip = 1; ip <= i__1; ++ip) {
289 	k = i__ << 1;
290 	ii = i__ - *offset;
291 	rgap = wgap[ii];
292 	lgap = rgap;
293 	if (ii > 1) {
294 	    lgap = wgap[ii - 1];
295 	}
296 	gap = minMACRO(lgap,rgap);
297 	next = iwork[k - 1];
298 	left = work[k - 1];
299 	right = work[k];
300 	mid = (left + right) * .5;
301 /*        semiwidth of interval */
302 	width = right - mid;
303 /* Computing MAX */
304 	d__1 = absMACRO(left), d__2 = absMACRO(right);
305 	tmp = maxMACRO(d__1,d__2);
306 /* Computing MAX */
307 	d__1 = *rtol1 * gap, d__2 = *rtol2 * tmp;
308 	cvrgd = maxMACRO(d__1,d__2);
309 	if (width <= cvrgd || width <= mnwdth || iter == maxitr) {
310 /*           reduce number of unconverged intervals */
311 	    --nint;
312 /*           Mark interval as converged. */
313 	    iwork[k - 1] = 0;
314 	    if (i1 == i__) {
315 		i1 = next;
316 	    } else {
317 /*              Prev holds the last unconverged interval previously examined */
318 		if (prev >= i1) {
319 		    iwork[(prev << 1) - 1] = next;
320 		}
321 	    }
322 	    i__ = next;
323 	    goto L100;
324 	}
325 	prev = i__;
326 
327 /*        Perform one bisection step */
328 
329 	negcnt = template_lapack_laneg(n, &d__[1], &lld[1], &mid, pivmin, &r__);
330 	if (negcnt <= i__ - 1) {
331 	    work[k - 1] = mid;
332 	} else {
333 	    work[k] = mid;
334 	}
335 	i__ = next;
336 L100:
337 	;
338     }
339     ++iter;
340 /*     do another loop if there are still unconverged intervals */
341 /*     However, in the last iteration, all intervals are accepted */
342 /*     since this is the best we can do. */
343     if (nint > 0 && iter <= maxitr) {
344 	goto L80;
345     }
346 
347 
348 /*     At this point, all the intervals have converged */
349     i__1 = *ilast;
350     for (i__ = *ifirst; i__ <= i__1; ++i__) {
351 	k = i__ << 1;
352 	ii = i__ - *offset;
353 /*        All intervals marked by '0' have been refined. */
354 	if (iwork[k - 1] == 0) {
355 	    w[ii] = (work[k - 1] + work[k]) * .5;
356 	    werr[ii] = work[k] - w[ii];
357 	}
358 /* L110: */
359     }
360 
361     i__1 = *ilast;
362     for (i__ = *ifirst + 1; i__ <= i__1; ++i__) {
363 	k = i__ << 1;
364 	ii = i__ - *offset;
365 /* Computing MAX */
366 	d__1 = 0., d__2 = w[ii] - werr[ii] - w[ii - 1] - werr[ii - 1];
367 	wgap[ii - 1] = maxMACRO(d__1,d__2);
368 /* L111: */
369     }
370     return 0;
371 
372 /*     End of DLARRB */
373 
374 } /* dlarrb_ */
375 
376 #endif
377