1 /* This software was developed by Bruce Hendrickson and Robert Leland *
2 * at Sandia National Laboratories under US Department of Energy *
3 * contract DE-AC04-76DP00789 and is copyrighted by Sandia Corporation. */
4
5 #include <stdio.h>
6 #include "structs.h"
7 #include "defs.h"
8
9 /* Lanczos iteration with FULL orthogonalization.
10 Works in standard (version 1) or inverse operator (version 2) mode. */
11 /* Finds the lowest (zero) eigenvalue, but does not print it
12 or compute corresponding eigenvector. */
13 /* Does NOT find distinct eigenvectors corresponding to multiple
14 eigenvectors and hence will not lead to good partitioners
15 for symmetric graphs. Will be useful for graphs known
16 (believed) not to have multiple eigenvectors, e.g. graphs
17 in which a random set of edges have been added or perturbed. */
18 /* May fail on small graphs with high multiplicities (e.g. k5) if Ritz
19 pairs converge before there have been as many iterations as the number
20 of eigenvalues sought. This is rare and a different random number
21 seed will generally alleviate the problem. */
22 /* Convergence check uses Paige bji estimate over the whole
23 spectrum of T. This is a lot of work, but we are trying to be
24 extra safe. Since we are orthogonalizing fully, we assume the
25 bji esitmates are very good and don't provide a contingency for
26 when they don't match the residuals. */
27 /* A lot of the time in this routine (say half) is spent in ql finding
28 the evals of T on each iteration. This could be reduced by only using
29 ql say every 10 steps. This might require that the orthogonalization
30 be done with Householder (rather than Gram-Scmidt as currently)
31 to avoid a problem with zero beta values causing subsequent breakdown.
32 But this routine is really intended to be used for smallish problems
33 where the Lanczos runs will be short, e.g. when we are using the inverse
34 operator method. In the inverse operator case, it's really important to
35 stop quickly to avoid additional back solves. If the ql work is really
36 a problem then we should be using a selective othogonalization algorithm.
37 This routine provides a convenient reference point for how well those
38 routines are functioning since it has the same basic structure but just
39 does more orthogonalizing. */
40 /* The algorithm orthogonalizes the starting vector and the residual vectors
41 against the vector of all ones since we know that is the null space of
42 the Laplacian. This generally saves net time because Lanczos tends to
43 converge faster. */
44 /* Replaced call to ql() with call to get_ritzvals(). This starts with ql
45 bisection, whichever is predicted to be faster based on a simple complexity
46 model. If that fails it switches to the other. This routine should be
47 safer and faster than straight ql (which does occasionally fail). */
48 /* NOTE: This routine indexes beta (and workj) from 1 to maxj+1, whereas
49 selective orthogonalization indexes them from 0 to maxj. */
50
51 /* Comments for Lanczos with inverted operator: */
52 /* Used Symmlq for the back solve since already maintaining that code for
53 the RQI/Symmlq multilevel method. Straight CG would only be marginally
54 faster. */
55 /* The orthogonalization against the vector of all ones in Symmlq is not
56 as efficient as possible - could in principle use orthog1 method, but
57 don't want to disrupt the RQI/Symmlq code. Also, probably would only
58 need to orthogonalize out a given mode periodically. But we want to be
59 extra robust numericlly. */
60
lanczos_FO(A,n,d,y,lambda,bound,eigtol,vwsqrt,maxdeg,version)61 void lanczos_FO(A, n, d, y, lambda, bound, eigtol, vwsqrt, maxdeg, version)
62 struct vtx_data **A; /* graph data structure */
63 int n; /* number of rows/colums in matrix */
64 int d; /* problem dimension = # evecs to find */
65 double **y; /* columns of y are eigenvectors of A */
66 double *lambda; /* ritz approximation to eigenvals of A */
67 double *bound; /* on ritz pair approximations to eig pairs of A */
68 double eigtol; /* tolerance on eigenvectors */
69 double *vwsqrt; /* square root of vertex weights */
70 double maxdeg; /* maximum degree of graph */
71 int version; /* 1 = standard mode, 2 = inverse operator mode */
72
73 {
74 extern FILE *Output_File; /* output file or NULL */
75 extern int DEBUG_EVECS; /* print debugging output? */
76 extern int DEBUG_TRACE; /* trace main execution path */
77 extern int WARNING_EVECS; /* print warning messages? */
78 extern int LANCZOS_MAXITNS; /* maximum Lanczos iterations allowed */
79 extern double BISECTION_SAFETY; /* safety factor for bisection algorithm */
80 extern double SRESTOL; /* resid tol for T evec comp */
81 extern double DOUBLE_MAX; /* Warning on inaccurate computation of evec of T */
82 extern double splarax_time; /* time matvecs */
83 extern double orthog_time; /* time orthogonalization work */
84 extern double tevec_time; /* time tridiagonal eigvec work */
85 extern double evec_time; /* time to generate eigenvectors */
86 extern double ql_time; /* time tridiagonal eigval work */
87 extern double blas_time; /* time for blas (not assembly coded) */
88 extern double init_time; /* time for allocating memory, etc. */
89 extern double scan_time; /* time for scanning bounds list */
90 extern double debug_time; /* time for debug computations and output */
91 int i, j; /* indicies */
92 int maxj; /* maximum number of Lanczos iterations */
93 double *u, *r; /* Lanczos vectors */
94 double *Aq; /* sparse matrix-vector product vector */
95 double *alpha, *beta; /* the Lanczos scalars from each step */
96 double *ritz; /* copy of alpha for tqli */
97 double *workj; /* work vector (eg. for tqli) */
98 double *workn; /* work vector (eg. for checkeig) */
99 double *s; /* eigenvector of T */
100 double **q; /* columns of q = Lanczos basis vectors */
101 double *bj; /* beta(j)*(last element of evecs of T) */
102 double bis_safety; /* real safety factor for bisection algorithm */
103 double Sres; /* how well Tevec calculated eigvecs */
104 double Sres_max; /* Maximum value of Sres */
105 int inc_bis_safety; /* need to increase bisection safety */
106 double *Ares; /* how well Lanczos calculated each eigpair */
107 double *inv_lambda; /* eigenvalues of inverse operator */
108 int *index; /* the Ritz index of an eigenpair */
109 struct orthlink *orthlist; /* vectors to orthogonalize against in Lanczos */
110 struct orthlink *orthlist2; /* vectors to orthogonalize against in Symmlq */
111 struct orthlink *temp; /* for expanding orthogonalization list */
112 double *ritzvec; /* ritz vector for current iteration */
113 double *zeros; /* vector of all zeros */
114 double *ones; /* vector of all ones */
115 struct scanlink *scanlist; /* list of fields for min ritz vals */
116 struct scanlink *curlnk; /* for traversing the scanlist */
117 double bji_tol; /* tol on bji estimate of A e-residual */
118 int converged; /* has the iteration converged? */
119 double time; /* current clock time */
120 double shift, rtol; /* symmlq input */
121 long precon, goodb, nout; /* symmlq input */
122 long checka, intlim; /* symmlq input */
123 double anorm, acond; /* symmlq output */
124 double rnorm, ynorm; /* symmlq output */
125 long istop, itn; /* symmlq output */
126 double macheps; /* machine precision calculated by symmlq */
127 double normxlim; /* a stopping criteria for symmlq */
128 long itnmin; /* enforce minimum number of iterations */
129 int symmlqitns; /* # symmlq itns */
130 double *wv1, *wv2, *wv3; /* Symmlq work space */
131 double *wv4, *wv5, *wv6; /* Symmlq work space */
132 long long_n; /* long int copy of n for symmlq */
133 int ritzval_flag = 0; /* status flag for ql() */
134 double Anorm; /* Norm estimate of the Laplacian matrix */
135 int left, right; /* ranges on the search for ritzvals */
136 int memory_ok; /* TRUE as long as don't run out of memory */
137
138 double *mkvec(); /* allocates space for a vector */
139 double *mkvec_ret(); /* mkvec() which returns error code */
140 double *smalloc(); /* safe version of malloc */
141 double dot(); /* standard dot product routine */
142 struct orthlink *makeorthlnk(); /* make space for entry in orthog. set */
143 double norm(); /* vector norm */
144 double Tevec(); /* calc evec of T by linear recurrence */
145 struct scanlink *mkscanlist(); /* make scan list for min ritz vecs */
146 double lanc_seconds(); /* current clock timer */
147 int sfree(), symmlq_(), get_ritzvals();
148 void setvec(), vecscale(), update(), vecran(), strout();
149 void splarax(), scanmin(), scanmax(), frvec(), orthogonalize();
150 void orthog1(), orthogvec(), bail(), warnings(), mkeigvecs();
151
152 if (DEBUG_TRACE > 0) {
153 printf("<Entering lanczos_FO>\n");
154 }
155
156 if (DEBUG_EVECS > 0) {
157 if (version == 1) {
158 printf("Full orthogonalization Lanczos, matrix size = %d\n", n);
159 }
160 else {
161 printf("Full orthogonalization Lanczos, inverted operator, matrix size = %d\n", n);
162 }
163 }
164
165 /* Initialize time. */
166 time = lanc_seconds();
167
168 if (n < d + 1) {
169 bail("ERROR: System too small for number of eigenvalues requested.",1);
170 /* d+1 since don't use zero eigenvalue pair */
171 }
172
173 /* Allocate Lanczos space. */
174 maxj = LANCZOS_MAXITNS;
175 u = mkvec(1, n);
176 r = mkvec(1, n);
177 Aq = mkvec(1, n);
178 ritzvec = mkvec(1, n);
179 zeros = mkvec(1, n);
180 setvec(zeros, 1, n, 0.0);
181 workn = mkvec(1, n);
182 Ares = mkvec(1, d);
183 inv_lambda = mkvec(1, d);
184 index = (int *) smalloc((unsigned) (d + 1) * sizeof(int));
185 alpha = mkvec(1, maxj);
186 beta = mkvec(1, maxj + 1);
187 ritz = mkvec(1, maxj);
188 s = mkvec(1, maxj);
189 bj = mkvec(1, maxj);
190 workj = mkvec(1, maxj + 1);
191 q = (double **) smalloc((unsigned) (maxj + 1) * sizeof(double *));
192 scanlist = mkscanlist(d);
193
194 if (version == 2) {
195 /* Allocate Symmlq space all in one chunk. */
196 wv1 = (double *) smalloc((unsigned) 6 * (n + 1) * sizeof(double));
197 wv2 = &wv1[(n + 1)];
198 wv3 = &wv1[2 * (n + 1)];
199 wv4 = &wv1[3 * (n + 1)];
200 wv5 = &wv1[4 * (n + 1)];
201 wv6 = &wv1[5 * (n + 1)];
202
203 /* Set invariant symmlq parameters */
204 precon = FALSE; /* FALSE until we figure out a good way */
205 goodb = FALSE; /* should be FALSE for this application */
206 checka = FALSE; /* if don't know by now, too bad */
207 intlim = n; /* set to enforce a maximum number of Symmlq itns */
208 itnmin = 0; /* set to enforce a minimum number of Symmlq itns */
209 shift = 0.0; /* since just solving rather than doing RQI */
210 symmlqitns = 0; /* total number of Symmlq iterations */
211 nout = 0; /* Effectively disabled - see notes in symmlq.f */
212 rtol = 1.0e-5; /* requested residual tolerance */
213 normxlim = DOUBLE_MAX; /* Effectively disables ||x|| termination criterion */
214 long_n = n; /* copy to long for linting */
215 }
216
217 /* Initialize. */
218 vecran(r, 1, n);
219 if (vwsqrt == NULL) {
220 /* whack one's direction from initial vector */
221 orthog1(r, 1, n);
222
223 /* list the ones direction for later use in Symmlq */
224 if (version == 2) {
225 orthlist2 = makeorthlnk();
226 ones = mkvec(1, n);
227 setvec(ones, 1, n, 1.0);
228 orthlist2->vec = ones;
229 orthlist2->pntr = NULL;
230 }
231 }
232 else {
233 /* whack vwsqrt direction from initial vector */
234 orthogvec(r, 1, n, vwsqrt);
235
236 if (version == 2) {
237 /* list the vwsqrt direction for later use in Symmlq */
238 orthlist2 = makeorthlnk();
239 orthlist2->vec = vwsqrt;
240 orthlist2->pntr = NULL;
241 }
242 }
243 beta[1] = norm(r, 1, n);
244 q[0] = zeros;
245 bji_tol = eigtol;
246 orthlist = NULL;
247 Sres_max = 0.0;
248 Anorm = 2 * maxdeg; /* Gershgorin estimate for ||A|| */
249 bis_safety = BISECTION_SAFETY;
250 inc_bis_safety = FALSE;
251 init_time += lanc_seconds() - time;
252
253 /* Main Lanczos loop. */
254 j = 1;
255 converged = FALSE;
256 memory_ok = TRUE;
257 while ((j <= maxj) && (converged == FALSE) && memory_ok) {
258 time = lanc_seconds();
259
260 /* Allocate next Lanczos vector. If fail, back up one step and compute approx. eigvec. */
261 q[j] = mkvec_ret(1, n);
262 if (q[j] == NULL) {
263 memory_ok = FALSE;
264 if (DEBUG_EVECS > 0 || WARNING_EVECS > 0) {
265 strout("WARNING: Lanczos out of memory; computing best approximation available.\n");
266 }
267 if (j <= 2) {
268 bail("ERROR: Sorry, can't salvage Lanczos.",1);
269 /* ... save yourselves, men. */
270 }
271 j--;
272 }
273
274 vecscale(q[j], 1, n, 1.0 / beta[j], r);
275 blas_time += lanc_seconds() - time;
276 time = lanc_seconds();
277 if (version == 1) {
278 splarax(Aq, A, n, q[j], vwsqrt, workn);
279 }
280 else {
281 symmlq_(&long_n, &(q[j][1]), &wv1[1], &wv2[1], &wv3[1], &wv4[1], &Aq[1], &wv5[1],
282 &wv6[1], &checka, &goodb, &precon, &shift, &nout,
283 &intlim, &rtol, &istop, &itn, &anorm, &acond,
284 &rnorm, &ynorm, (double *) A, vwsqrt, (double *) orthlist2,
285 &macheps, &normxlim, &itnmin);
286 symmlqitns += itn;
287 if (DEBUG_EVECS > 2) {
288 printf("Symmlq report: rtol %g\n", rtol);
289 printf(" system norm %g, solution norm %g\n", anorm, ynorm);
290 printf(" system condition %g, residual %g\n", acond, rnorm);
291 printf(" termination condition %2ld, iterations %3ld\n", istop, itn);
292 }
293 }
294 splarax_time += lanc_seconds() - time;
295 time = lanc_seconds();
296 update(u, 1, n, Aq, -beta[j], q[j - 1]);
297 alpha[j] = dot(u, 1, n, q[j]);
298 update(r, 1, n, u, -alpha[j], q[j]);
299 blas_time += lanc_seconds() - time;
300 time = lanc_seconds();
301 if (vwsqrt == NULL) {
302 orthog1(r, 1, n);
303 }
304 else {
305 orthogvec(r, 1, n, vwsqrt);
306 }
307 orthogonalize(r, n, orthlist);
308 temp = orthlist;
309 orthlist = makeorthlnk();
310 orthlist->vec = q[j];
311 orthlist->pntr = temp;
312 beta[j + 1] = norm(r, 1, n);
313 orthog_time += lanc_seconds() - time;
314
315 time = lanc_seconds();
316 left = j/2;
317 right = j - left + 1;
318 if (inc_bis_safety) {
319 bis_safety *= 10;
320 inc_bis_safety = FALSE;
321 }
322 ritzval_flag = get_ritzvals(alpha, beta+1, j, Anorm, workj+1,
323 ritz, d, left, right, eigtol, bis_safety);
324 /* ... have to off-set beta and workj since full orthogonalization
325 indexes these from 1 to maxj+1 whereas selective orthog.
326 indexes them from 0 to maxj */
327
328 if (ritzval_flag != 0) {
329 bail("ERROR: Both Sturm bisection and QL failed.",1);
330 /* ... give up. */
331 }
332 ql_time += lanc_seconds() - time;
333
334 /* Convergence check using Paige bji estimates. */
335 time = lanc_seconds();
336 for (i = 1; i <= j; i++) {
337 Sres = Tevec(alpha, beta, j, ritz[i], s);
338 if (Sres > Sres_max) {
339 Sres_max = Sres;
340 }
341 if (Sres > SRESTOL) {
342 inc_bis_safety = TRUE;
343 }
344 bj[i] = s[j] * beta[j + 1];
345 }
346 tevec_time += lanc_seconds() - time;
347
348
349 time = lanc_seconds();
350 if (version == 1) {
351 scanmin(ritz, 1, j, &scanlist);
352 }
353 else {
354 scanmax(ritz, 1, j, &scanlist);
355 }
356 converged = TRUE;
357 if (j < d)
358 converged = FALSE;
359 else {
360 curlnk = scanlist;
361 while (curlnk != NULL) {
362 if (bj[curlnk->indx] > bji_tol) {
363 converged = FALSE;
364 }
365 curlnk = curlnk->pntr;
366 }
367 }
368 scan_time += lanc_seconds() - time;
369 j++;
370 }
371 j--;
372
373 /* Collect eigenvalue and bound information. */
374 time = lanc_seconds();
375 mkeigvecs(scanlist,lambda,bound,index,bj,d,&Sres_max,alpha,beta+1,j,s,y,n,q);
376 evec_time += lanc_seconds() - time;
377
378 /* Analyze computation for and report additional problems */
379 time = lanc_seconds();
380 if (DEBUG_EVECS>0 && version == 2) {
381 printf("\nTotal Symmlq iterations %3d\n", symmlqitns);
382 }
383 if (version == 2) {
384 for (i = 1; i <= d; i++) {
385 lambda[i] = 1.0/lambda[i];
386 }
387 }
388 warnings(workn, A, y, n, lambda, vwsqrt, Ares, bound, index,
389 d, j, maxj, Sres_max, eigtol, u, Anorm, Output_File);
390 debug_time += lanc_seconds() - time;
391
392 /* Free any memory allocated in this routine. */
393 time = lanc_seconds();
394 frvec(u, 1);
395 frvec(r, 1);
396 frvec(Aq, 1);
397 frvec(ritzvec, 1);
398 frvec(zeros, 1);
399 if (vwsqrt == NULL && version == 2) {
400 frvec(ones, 1);
401 }
402 frvec(workn, 1);
403 frvec(Ares, 1);
404 frvec(inv_lambda, 1);
405 sfree((char *) index);
406 frvec(alpha, 1);
407 frvec(beta, 1);
408 frvec(ritz, 1);
409 frvec(s, 1);
410 frvec(bj, 1);
411 frvec(workj, 1);
412 if (version == 2) {
413 frvec(wv1, 0);
414 }
415 while (scanlist != NULL) {
416 curlnk = scanlist->pntr;
417 sfree((char *) scanlist);
418 scanlist = curlnk;
419 }
420 for (i = 1; i <= j; i++) {
421 frvec(q[i], 1);
422 }
423 while (orthlist != NULL) {
424 temp = orthlist->pntr;
425 sfree((char *) orthlist);
426 orthlist = temp;
427 }
428 while (orthlist2 != NULL && version == 2) {
429 temp = orthlist2->pntr;
430 sfree((char *) orthlist2);
431 orthlist2 = temp;
432 }
433 sfree((char *) q);
434 init_time += lanc_seconds() - time;
435 }
436