1 /* This software was developed by Bruce Hendrickson and Robert Leland   *
2  * at Sandia National Laboratories under US Department of Energy        *
3  * contract DE-AC04-76DP00789 and is copyrighted by Sandia Corporation. */
4 
5 #include <stdio.h>
6 #include "structs.h"
7 #include "defs.h"
8 
9 /* Lanczos iteration with FULL orthogonalization.
10    Works in standard (version 1) or inverse operator (version 2) mode. */
11 /* Finds the lowest (zero) eigenvalue, but does not print it
12    or compute corresponding eigenvector. */
13 /* Does NOT find distinct eigenvectors corresponding to multiple
14    eigenvectors and hence will not lead to good partitioners
15    for symmetric graphs. Will be useful for graphs known
16    (believed) not to have multiple eigenvectors, e.g. graphs
17    in which a random set of edges have been added or perturbed. */
18 /* May fail on small graphs with high multiplicities (e.g. k5) if Ritz
19    pairs converge before there have been as many iterations as the number
20    of eigenvalues sought. This is rare and a different random number
21    seed will generally alleviate the problem. */
22 /* Convergence check uses Paige bji estimate over the whole
23    spectrum of T. This is a lot of work, but we are trying to be
24    extra safe. Since we are orthogonalizing fully, we assume the
25    bji esitmates are very good and don't provide a contingency for
26    when they don't match the residuals. */
27 /* A lot of the time in this routine (say half) is spent in ql finding
28    the evals of T on each iteration. This could be reduced by only using
29    ql say every 10 steps. This might require that the orthogonalization
30    be done with Householder (rather than Gram-Scmidt as currently)
31    to avoid a problem with zero beta values causing subsequent breakdown.
32    But this routine is really intended to be used for smallish problems
33    where the Lanczos runs will be short, e.g. when we are using the inverse
34    operator method. In the inverse operator case, it's really important to
35    stop quickly to avoid additional back solves. If the ql work is really
36    a problem then we should be using a selective othogonalization algorithm.
37    This routine provides a convenient reference point for how well those
38    routines are functioning since it has the same basic structure but just
39    does more orthogonalizing. */
40 /* The algorithm orthogonalizes the starting vector and the residual vectors
41    against the vector of all ones since we know that is the null space of
42    the Laplacian. This generally saves net time because Lanczos tends to
43    converge faster. */
44 /* Replaced call to ql() with call to get_ritzvals(). This starts with ql
45    bisection, whichever is predicted to be faster based on a simple complexity
46    model. If that fails it switches to the other. This routine should be
47    safer and faster than straight ql (which does occasionally fail). */
48 /* NOTE: This routine indexes beta (and workj) from 1 to maxj+1, whereas
49    selective orthogonalization indexes them from 0 to maxj. */
50 
51 /* Comments for Lanczos with inverted operator: */
52 /* Used Symmlq for the back solve since already maintaining that code for
53    the RQI/Symmlq multilevel method. Straight CG would only be marginally
54    faster. */
55 /* The orthogonalization against the vector of all ones in Symmlq is not
56    as efficient as possible - could in principle use orthog1 method, but
57    don't want to disrupt the RQI/Symmlq code. Also, probably would only
58    need to orthogonalize out a given mode periodically. But we want to be
59    extra robust numericlly. */
60 
lanczos_FO(A,n,d,y,lambda,bound,eigtol,vwsqrt,maxdeg,version)61 void      lanczos_FO(A, n, d, y, lambda, bound, eigtol, vwsqrt, maxdeg, version)
62 struct vtx_data **A;		/* graph data structure */
63 int       n;			/* number of rows/colums in matrix */
64 int       d;			/* problem dimension = # evecs to find */
65 double  **y;			/* columns of y are eigenvectors of A  */
66 double   *lambda;		/* ritz approximation to eigenvals of A */
67 double   *bound;		/* on ritz pair approximations to eig pairs of A */
68 double    eigtol;		/* tolerance on eigenvectors */
69 double   *vwsqrt;		/* square root of vertex weights */
70 double    maxdeg;               /* maximum degree of graph */
71 int       version;		/* 1 = standard mode, 2 = inverse operator mode */
72 
73 {
74     extern FILE *Output_File;	/* output file or NULL */
75     extern int DEBUG_EVECS;	/* print debugging output? */
76     extern int DEBUG_TRACE;	/* trace main execution path */
77     extern int WARNING_EVECS;	/* print warning messages? */
78     extern int LANCZOS_MAXITNS;         /* maximum Lanczos iterations allowed */
79     extern double BISECTION_SAFETY;	/* safety factor for bisection algorithm */
80     extern double SRESTOL;		/* resid tol for T evec comp */
81     extern double DOUBLE_MAX;	/* Warning on inaccurate computation of evec of T */
82     extern double splarax_time;	/* time matvecs */
83     extern double orthog_time;	/* time orthogonalization work */
84     extern double tevec_time;	/* time tridiagonal eigvec work */
85     extern double evec_time;	/* time to generate eigenvectors */
86     extern double ql_time;      /* time tridiagonal eigval work */
87     extern double blas_time;	/* time for blas (not assembly coded) */
88     extern double init_time;	/* time for allocating memory, etc. */
89     extern double scan_time;	/* time for scanning bounds list */
90     extern double debug_time;	/* time for debug computations and output */
91     int       i, j;		/* indicies */
92     int       maxj;		/* maximum number of Lanczos iterations */
93     double   *u, *r;		/* Lanczos vectors */
94     double   *Aq;		/* sparse matrix-vector product vector */
95     double   *alpha, *beta;	/* the Lanczos scalars from each step */
96     double   *ritz;		/* copy of alpha for tqli */
97     double   *workj;		/* work vector (eg. for tqli) */
98     double   *workn;		/* work vector (eg. for checkeig) */
99     double   *s;		/* eigenvector of T */
100     double  **q;		/* columns of q = Lanczos basis vectors */
101     double   *bj;		/* beta(j)*(last element of evecs of T) */
102     double    bis_safety;	/* real safety factor for bisection algorithm */
103     double    Sres;		/* how well Tevec calculated eigvecs */
104     double    Sres_max;		/* Maximum value of Sres */
105     int       inc_bis_safety;	/* need to increase bisection safety */
106     double   *Ares;		/* how well Lanczos calculated each eigpair */
107     double   *inv_lambda;	/* eigenvalues of inverse operator */
108     int      *index;		/* the Ritz index of an eigenpair */
109     struct orthlink *orthlist;	/* vectors to orthogonalize against in Lanczos */
110     struct orthlink *orthlist2;	/* vectors to orthogonalize against in Symmlq */
111     struct orthlink *temp;	/* for expanding orthogonalization list */
112     double   *ritzvec;		/* ritz vector for current iteration */
113     double   *zeros;		/* vector of all zeros */
114     double   *ones;		/* vector of all ones */
115     struct scanlink *scanlist;	/* list of fields for min ritz vals */
116     struct scanlink *curlnk;	/* for traversing the scanlist */
117     double    bji_tol;		/* tol on bji estimate of A e-residual */
118     int       converged;	/* has the iteration converged? */
119     double    time;		/* current clock time */
120     double    shift, rtol;		/* symmlq input */
121     long      precon, goodb, nout;	/* symmlq input */
122     long      checka, intlim;	/* symmlq input */
123     double    anorm, acond;	/* symmlq output */
124     double    rnorm, ynorm;	/* symmlq output */
125     long      istop, itn;	/* symmlq output */
126     double    macheps;		/* machine precision calculated by symmlq */
127     double    normxlim;		/* a stopping criteria for symmlq */
128     long      itnmin;		/* enforce minimum number of iterations */
129     int       symmlqitns;	/* # symmlq itns */
130     double   *wv1, *wv2, *wv3;	/* Symmlq work space */
131     double   *wv4, *wv5, *wv6;	/* Symmlq work space */
132     long      long_n;		/* long int copy of n for symmlq */
133     int       ritzval_flag = 0;	/* status flag for ql() */
134     double    Anorm;            /* Norm estimate of the Laplacian matrix */
135     int       left, right;      /* ranges on the search for ritzvals */
136     int       memory_ok;        /* TRUE as long as don't run out of memory */
137 
138     double   *mkvec();		/* allocates space for a vector */
139     double   *mkvec_ret();      /* mkvec() which returns error code */
140     double   *smalloc();	/* safe version of malloc */
141     double    dot();		/* standard dot product routine */
142     struct orthlink *makeorthlnk();	/* make space for entry in orthog. set */
143     double    norm();		/* vector norm */
144     double    Tevec();		/* calc evec of T by linear recurrence */
145     struct scanlink *mkscanlist();	/* make scan list for min ritz vecs */
146     double    lanc_seconds();	/* current clock timer */
147     int       sfree(), symmlq_(), get_ritzvals();
148     void      setvec(), vecscale(), update(), vecran(), strout();
149     void      splarax(), scanmin(), scanmax(), frvec(), orthogonalize();
150     void      orthog1(), orthogvec(), bail(), warnings(), mkeigvecs();
151 
152     if (DEBUG_TRACE > 0) {
153         printf("<Entering lanczos_FO>\n");
154     }
155 
156     if (DEBUG_EVECS > 0) {
157 	if (version == 1) {
158     	    printf("Full orthogonalization Lanczos, matrix size = %d\n", n);
159 	}
160 	else {
161     	    printf("Full orthogonalization Lanczos, inverted operator, matrix size = %d\n", n);
162 	}
163     }
164 
165     /* Initialize time. */
166     time = lanc_seconds();
167 
168     if (n < d + 1) {
169 	bail("ERROR: System too small for number of eigenvalues requested.",1);
170 	/* d+1 since don't use zero eigenvalue pair */
171     }
172 
173     /* Allocate Lanczos space. */
174     maxj = LANCZOS_MAXITNS;
175     u = mkvec(1, n);
176     r = mkvec(1, n);
177     Aq = mkvec(1, n);
178     ritzvec = mkvec(1, n);
179     zeros = mkvec(1, n);
180     setvec(zeros, 1, n, 0.0);
181     workn = mkvec(1, n);
182     Ares = mkvec(1, d);
183     inv_lambda = mkvec(1, d);
184     index = (int *) smalloc((unsigned) (d + 1) * sizeof(int));
185     alpha = mkvec(1, maxj);
186     beta = mkvec(1, maxj + 1);
187     ritz = mkvec(1, maxj);
188     s = mkvec(1, maxj);
189     bj = mkvec(1, maxj);
190     workj = mkvec(1, maxj + 1);
191     q = (double **) smalloc((unsigned) (maxj + 1) * sizeof(double *));
192     scanlist = mkscanlist(d);
193 
194     if (version == 2) {
195         /* Allocate Symmlq space all in one chunk. */
196         wv1 = (double *) smalloc((unsigned) 6 * (n + 1) * sizeof(double));
197         wv2 = &wv1[(n + 1)];
198         wv3 = &wv1[2 * (n + 1)];
199         wv4 = &wv1[3 * (n + 1)];
200         wv5 = &wv1[4 * (n + 1)];
201         wv6 = &wv1[5 * (n + 1)];
202 
203         /* Set invariant symmlq parameters */
204         precon = FALSE;		/* FALSE until we figure out a good way */
205         goodb = FALSE;		/* should be FALSE for this application */
206         checka = FALSE;		/* if don't know by now, too bad */
207         intlim = n;			/* set to enforce a maximum number of Symmlq itns */
208         itnmin = 0;			/* set to enforce a minimum number of Symmlq itns */
209         shift = 0.0;		/* since just solving rather than doing RQI */
210         symmlqitns = 0;		/* total number of Symmlq iterations */
211         nout = 0;			/* Effectively disabled - see notes in symmlq.f */
212         rtol = 1.0e-5;		/* requested residual tolerance */
213         normxlim = DOUBLE_MAX;	/* Effectively disables ||x|| termination criterion */
214         long_n = n;			/* copy to long for linting */
215     }
216 
217     /* Initialize. */
218     vecran(r, 1, n);
219     if (vwsqrt == NULL) {
220 	/* whack one's direction from initial vector */
221 	orthog1(r, 1, n);
222 
223 	/* list the ones direction for later use in Symmlq */
224 	if (version == 2) {
225 	    orthlist2 = makeorthlnk();
226 	    ones = mkvec(1, n);
227 	    setvec(ones, 1, n, 1.0);
228 	    orthlist2->vec = ones;
229 	    orthlist2->pntr = NULL;
230 	}
231     }
232     else {
233 	/* whack vwsqrt direction from initial vector */
234 	orthogvec(r, 1, n, vwsqrt);
235 
236 	if (version == 2) {
237 	    /* list the vwsqrt direction for later use in Symmlq */
238 	    orthlist2 = makeorthlnk();
239 	    orthlist2->vec = vwsqrt;
240 	    orthlist2->pntr = NULL;
241 	}
242     }
243     beta[1] = norm(r, 1, n);
244     q[0] = zeros;
245     bji_tol = eigtol;
246     orthlist = NULL;
247     Sres_max = 0.0;
248     Anorm = 2 * maxdeg;                         /* Gershgorin estimate for ||A|| */
249     bis_safety = BISECTION_SAFETY;
250     inc_bis_safety = FALSE;
251     init_time += lanc_seconds() - time;
252 
253     /* Main Lanczos loop. */
254     j = 1;
255     converged = FALSE;
256     memory_ok = TRUE;
257     while ((j <= maxj) && (converged == FALSE) && memory_ok) {
258 	time = lanc_seconds();
259 
260 	/* Allocate next Lanczos vector. If fail, back up one step and compute approx. eigvec. */
261 	q[j] = mkvec_ret(1, n);
262         if (q[j] == NULL) {
263 	    memory_ok = FALSE;
264   	    if (DEBUG_EVECS > 0 || WARNING_EVECS > 0) {
265                 strout("WARNING: Lanczos out of memory; computing best approximation available.\n");
266             }
267 	    if (j <= 2) {
268 	        bail("ERROR: Sorry, can't salvage Lanczos.",1);
269   	        /* ... save yourselves, men.  */
270 	    }
271             j--;
272 	}
273 
274 	vecscale(q[j], 1, n, 1.0 / beta[j], r);
275 	blas_time += lanc_seconds() - time;
276 	time = lanc_seconds();
277 	if (version == 1) {
278             splarax(Aq, A, n, q[j], vwsqrt, workn);
279 	}
280 	else {
281 	    symmlq_(&long_n, &(q[j][1]), &wv1[1], &wv2[1], &wv3[1], &wv4[1], &Aq[1], &wv5[1],
282 		&wv6[1], &checka, &goodb, &precon, &shift, &nout,
283 		&intlim, &rtol, &istop, &itn, &anorm, &acond,
284 		&rnorm, &ynorm, (double *) A, vwsqrt, (double *) orthlist2,
285 		&macheps, &normxlim, &itnmin);
286 	    symmlqitns += itn;
287 	    if (DEBUG_EVECS > 2) {
288 	        printf("Symmlq report:      rtol %g\n", rtol);
289 	        printf("  system norm %g, solution norm %g\n", anorm, ynorm);
290 	        printf("  system condition %g, residual %g\n", acond, rnorm);
291 	        printf("  termination condition %2ld, iterations %3ld\n", istop, itn);
292 	    }
293 	}
294 	splarax_time += lanc_seconds() - time;
295 	time = lanc_seconds();
296 	update(u, 1, n, Aq, -beta[j], q[j - 1]);
297 	alpha[j] = dot(u, 1, n, q[j]);
298 	update(r, 1, n, u, -alpha[j], q[j]);
299 	blas_time += lanc_seconds() - time;
300 	time = lanc_seconds();
301 	if (vwsqrt == NULL) {
302 	    orthog1(r, 1, n);
303 	}
304 	else {
305 	    orthogvec(r, 1, n, vwsqrt);
306 	}
307 	orthogonalize(r, n, orthlist);
308 	temp = orthlist;
309 	orthlist = makeorthlnk();
310 	orthlist->vec = q[j];
311 	orthlist->pntr = temp;
312 	beta[j + 1] = norm(r, 1, n);
313 	orthog_time += lanc_seconds() - time;
314 
315 	time = lanc_seconds();
316 	left = j/2;
317 	right = j - left + 1;
318 	if (inc_bis_safety) {
319 	    bis_safety *= 10;
320 	    inc_bis_safety = FALSE;
321 	}
322 	ritzval_flag = get_ritzvals(alpha, beta+1, j, Anorm, workj+1,
323                                     ritz, d, left, right, eigtol, bis_safety);
324         /* ... have to off-set beta and workj since full orthogonalization
325                indexes these from 1 to maxj+1 whereas selective orthog.
326                indexes them from 0 to maxj */
327 
328 	if (ritzval_flag != 0) {
329             bail("ERROR: Both Sturm bisection and QL failed.",1);
330 	    /* ... give up. */
331  	}
332         ql_time += lanc_seconds() - time;
333 
334 	/* Convergence check using Paige bji estimates. */
335 	time = lanc_seconds();
336 	for (i = 1; i <= j; i++) {
337 	    Sres = Tevec(alpha, beta, j, ritz[i], s);
338 	    if (Sres > Sres_max) {
339 		Sres_max = Sres;
340 	    }
341 	    if (Sres > SRESTOL) {
342 		inc_bis_safety = TRUE;
343 	    }
344 	    bj[i] = s[j] * beta[j + 1];
345 	}
346 	tevec_time += lanc_seconds() - time;
347 
348 
349 	time = lanc_seconds();
350 	if (version == 1) {
351 	    scanmin(ritz, 1, j, &scanlist);
352 	}
353 	else {
354 	    scanmax(ritz, 1, j, &scanlist);
355 	}
356 	converged = TRUE;
357 	if (j < d)
358 	    converged = FALSE;
359 	else {
360 	    curlnk = scanlist;
361 	    while (curlnk != NULL) {
362 		if (bj[curlnk->indx] > bji_tol) {
363 		    converged = FALSE;
364 		}
365 		curlnk = curlnk->pntr;
366 	    }
367 	}
368 	scan_time += lanc_seconds() - time;
369 	j++;
370     }
371     j--;
372 
373     /* Collect eigenvalue and bound information. */
374     time = lanc_seconds();
375     mkeigvecs(scanlist,lambda,bound,index,bj,d,&Sres_max,alpha,beta+1,j,s,y,n,q);
376     evec_time += lanc_seconds() - time;
377 
378     /* Analyze computation for and report additional problems */
379     time = lanc_seconds();
380     if (DEBUG_EVECS>0 && version == 2) {
381 	printf("\nTotal Symmlq iterations %3d\n", symmlqitns);
382     }
383     if (version == 2) {
384         for (i = 1; i <= d; i++) {
385 	    lambda[i] = 1.0/lambda[i];
386 	}
387     }
388     warnings(workn, A, y, n, lambda, vwsqrt, Ares, bound, index,
389              d, j, maxj, Sres_max, eigtol, u, Anorm, Output_File);
390     debug_time += lanc_seconds() - time;
391 
392     /* Free any memory allocated in this routine. */
393     time = lanc_seconds();
394     frvec(u, 1);
395     frvec(r, 1);
396     frvec(Aq, 1);
397     frvec(ritzvec, 1);
398     frvec(zeros, 1);
399     if (vwsqrt == NULL && version == 2) {
400 	frvec(ones, 1);
401     }
402     frvec(workn, 1);
403     frvec(Ares, 1);
404     frvec(inv_lambda, 1);
405     sfree((char *) index);
406     frvec(alpha, 1);
407     frvec(beta, 1);
408     frvec(ritz, 1);
409     frvec(s, 1);
410     frvec(bj, 1);
411     frvec(workj, 1);
412     if (version == 2) {
413 	frvec(wv1, 0);
414     }
415     while (scanlist != NULL) {
416 	curlnk = scanlist->pntr;
417 	sfree((char *) scanlist);
418 	scanlist = curlnk;
419     }
420     for (i = 1; i <= j; i++) {
421 	frvec(q[i], 1);
422     }
423     while (orthlist != NULL) {
424 	temp = orthlist->pntr;
425 	sfree((char *) orthlist);
426 	orthlist = temp;
427     }
428     while (orthlist2 != NULL && version == 2) {
429 	temp = orthlist2->pntr;
430 	sfree((char *) orthlist2);
431 	orthlist2 = temp;
432     }
433     sfree((char *) q);
434     init_time += lanc_seconds() - time;
435 }
436