1 /*
2 $Id$
3 *======================================================================
4 *
5 * DISCLAIMER
6 *
7 * This material was prepared as an account of work sponsored by an
8 * agency of the United States Government. Neither the United States
9 * Government nor the United States Department of Energy, nor Battelle,
10 * nor any of their employees, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
11 * ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
12 * COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT,
13 * SOFTWARE, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT
14 * INFRINGE PRIVATELY OWNED RIGHTS.
15 *
16 * ACKNOWLEDGMENT
17 *
18 * This software and its documentation were produced with Government
19 * support under Contract Number DE-AC06-76RLO-1830 awarded by the United
20 * States Department of Energy. The Government retains a paid-up
21 * non-exclusive, irrevocable worldwide license to reproduce, prepare
22 * derivative works, perform publicly and display publicly by or for the
23 * Government, including the right to distribute to other Government
24 * contractors.
25 *
26 *======================================================================
27 *
28 * -- PEIGS routine (version 2.1) --
29 * Pacific Northwest Laboratory
30 * July 28, 1995
31 *
32 *======================================================================
33 */
34 /*
35 *********************************************
36
37 PeIGS internal routine for eigen-value
38 cluster analysis
39
40 This version ASSUMES eigenvalues
41 are accurate relative to norm(T), where T is the full
42 tridiagonal matrix, but not necessarily relative to norm(Ti)
43 if norm(Ti) < norm(T), where Ti is one of the submatrices into
44 which T breaks (as per isplit, nsplit). If eigenvalues in block
45 Ti are always accurate relative to norm(Ti), then ortol should be
46 based on norm(Ti) rather than norm(T).
47
48 This assumption means that the orthogonalization criterion for
49 all blocks is based on norm(T), not norm(Ti) for each block. This
50 means we do orthogonalization more often than we would if the
51 orthogonalization criterion for each block was based on norm(Ti).
52
53 This assumption about accuracy is consistent with taking
54 abstol <= 0.0 in dstebz, pdspev, etc., i.e., then dstebz/pstebz
55 computes eigenvalues with an error less than eps * norm(T).
56
57 In principle the user could specify 0.0 < abstol < eps * norm(T)
58 in dstebz/pstebz
59 in which case the assumption of this routines may be violated.
60 Violation of this assumption will not reduce the accuracy of the
61 results, it will only cause the code to run slower since it
62 will cause more eigenvectors to be orthogonalized than is strictly
63 necessary.
64
65 If we do not make this assumption, and compute the orthogonalization
66 criterion based on norm(Ti), but the eigenvalues are only accurate
67 relative to norm(T) but not norm(Ti), then the computed eigenvectors
68 are likely to have poor (possible no) orthogonality.
69
70
71 Integer clustrf_ (n, d, e, m, w, mapZ, vecZ, iblock, nsplit, isplit,ptbeval, num_clustr, clustr_info, *imin,
72 , proclist, iscratch )
73
74 Integer *n, *m, *mapZ, *iblock, *nsplit, *isplit, *num_clustr, *clustr_info, *imin, *proclist, *iscratch;
75 DoublePrecision *d, *e, *w, **vecZ, *ptbeval;
76
77
78 not intended for separate call
79 not protected from user input errors
80
81 */
82
83 #include <stdio.h>
84 #include <stdlib.h>
85 #include <math.h>
86
87 #include "globalp.c.h"
88
89 #define MAX(a,b) ((a) > (b) ? (a) : (b))
90 #define MIN(a,b) ((a) < (b) ? (a) : (b))
91
92 #define R_ZERO (DoublePrecision) 0.0e0
93 #define R_ONE (DoublePrecision) 1.0e0
94 #define R_TEN (DoublePrecision) 1.0e1
95 #define ODM3 (DoublePrecision) 1.0e-3
96 #define ODM1 (DoublePrecision) 1.0e-1
97 #define EXTRA 1
98 #define KK 2
99 #define MAXITS 5
100 #define DOUBLE 200
101 #define INTEGER 20
102 #define TWO 2
103
104 #define I_ZERO 0
105
106
clustr_check(c1,cn,imin,imax)107 static Integer clustr_check(c1, cn, imin, imax)
108 Integer c1, cn, imin, imax;
109 {
110 /*
111 routine to determine if
112 the cluster is
113 actually in the desired region of the clustr finder
114 */
115
116 if ( cn < imin )
117 return(-1);
118
119 if ( imax < c1 )
120 return(-1);
121
122 return(1);
123 }
124
125
clustrf5_(n,d,e,m,w,mapZ,vecZ,iblock,nsplit,isplit,ptbeval,num_clustr,clustr_info,imin,proclist,nacluster,icsplit,iscratch)126 Integer clustrf5_ (n, d, e, m, w, mapZ, vecZ, iblock, nsplit, isplit, ptbeval, num_clustr,
127 clustr_info, imin, proclist , nacluster, icsplit, iscratch)
128 Integer *n, *m, *mapZ, *iblock, *nsplit, *isplit, *num_clustr,
129 *clustr_info, *imin, *proclist, *nacluster, *icsplit, *iscratch;
130 DoublePrecision *d, *e, *w, **vecZ, *ptbeval;
131 /*
132 this routine finds the cluster information for symmetric tridiagonal matrices
133
134 on input:
135
136 n = dimension of the matrix
137 d = array of n DoublePrecision; diagonal of the tridiagonal matrices
138 e = array of n DoublePrecision; e[0] is junk -- left over from eispack--should remove
139 super-diagonal of the tridiagonal matrices;
140 m = number of the number eigenvalues; from dstebz
141 w = array of m DoublePrecision precision; eigenvalues from dstebz
142 iblock = array of n integers ( at most ); from dstebz
143 nsplit = number of blocks; output of dstebz
144 isplit = split points of blocks; output of dstebz
145 ptb_eval = perturbed eigenvalues
146
147 output:
148
149 function returns the maximum cluster size ;
150
151 num_one_blk = number of size 1 blocks
152 one_block = integer array ; location of size 1 blocks
153 clustr_info = integer array ; max size 4 * n; cluster information : beg block, end of block
154 beg of cluster, end of cluster
155 nsplit = number of clusters
156
157 nacluster = total number of clusters in the list w of m
158 eigenvalues, including clusters of size 1 in
159 blocks of size 1.
160
161 icsplit ....... Length m.
162 icsplit[i] is
163 the index (0 to m-1) of the last eigenvalue in the i-th
164 cluster in w, i = 0 to nacluster-1.
165 The i-th cluster, i = 0 to nacluster - 1 is
166 the set of eigenvalues
167
168 0 to icsplit[0], i = 0
169 icsplit[i-1]+1 to icsplit[i], i = 1 to nacluster-1.
170
171 */
172 {
173 Integer jblk, nblk, msize;
174 Integer i, j, num_cls, num_all_cls;
175 Integer b1, num_eig;
176 Integer max_clustr_size;
177 Integer beg_of_block, end_of_block;
178 Integer bn;
179 Integer clustrptr=0, blksiz;
180 Integer me;
181 Integer *c_ptr;
182 Integer iflag, k, imax;
183 Integer ii, nn_proc;
184
185 DoublePrecision tmp, *eval, sep, eps;
186 DoublePrecision onenrm, pertol;
187 DoublePrecision xjm, eps1;
188 DoublePrecision ortol, xj, sepfine;
189
190 Integer clustr_check();
191 extern Integer count_list();
192 extern Integer reduce_list2();
193 extern Integer menode_ ();
194 extern Integer idamax_(), mxmynd_ (), mypid();
195 extern void fil_int_lst();
196 extern void fil_dbl_lst();
197
198 /*
199 intrinsic DoublePrecision precision
200 */
201
202 /*
203 blas calls
204 */
205
206 extern void dcopy_(), dscal_(), daxpy_();
207 extern DoublePrecision ddot_(), dasum_(), dnrm2_();
208
209 /*
210 lapack calls
211 */
212
213 extern void xerbla_(), dlagts_(), dlagtf_(), dlarnv_();
214
215 me = mxmynd_ ();
216 num_eig = *m;
217
218 *num_clustr = 0;
219 num_cls = 0;
220 msize = *n;
221 num_all_cls = I_ZERO;
222 *nacluster = I_ZERO;
223 max_clustr_size = I_ZERO;
224
225 /*
226 Compute reorthogonalization criterion.
227
228 ASSUMES eigenvalues
229 are accurate relative to norm(T), where T is the full
230 tridiagonal matrix, but not necessarily relative to norm(Ti)
231 if norm(Ti) < norm(T), where Ti is one of the submatrices into
232 which T breaks (as per isplit, nsplit). If eigenvalues in block
233 Ti are always accurate relative to norm(Ti), then ortol should be
234 based on norm(Ti) rather than norm(T) (the commented out computation
235 of ortol latter in the code was used to do this before even though
236 the eigenvalues weren't really accurate relative to norm(Ti). ).
237 */
238
239 onenrm = fabs( d[0] ) + fabs( e[1] );
240 tmp = fabs(d[msize-1]) + fabs(e[msize-1]);
241 onenrm = MAX(onenrm, tmp);
242 for (i = 1; i < msize-1; ++i) {
243 tmp = fabs(d[i]) + fabs(e[i]) + fabs(e[i + 1]);
244 onenrm = MAX(onenrm, tmp);
245 }
246
247 ortol = onenrm * (DoublePrecision ) 1.e-3 ;
248 sepfine = MAX(1.e2, (DoublePrecision) msize) * sqrt(DLAMCHE);
249 sepfine = MIN(sepfine, MIN(1.e-3, 1.0e0/((DoublePrecision) msize)));
250
251 /*
252 sepfine = sepfine*MAX(ortol, 1.e0);
253 */
254
255 c_ptr = iscratch;
256 nn_proc = reduce_list2( num_eig, mapZ, c_ptr);
257
258 iflag = -1;
259 for ( j = 0; j < nn_proc; j++ ){
260 if ( c_ptr[j] == me ) {
261 iflag = j;
262 break;
263 }
264 }
265
266 k = 0;
267 imax = -1;
268 *imin = -1;
269 for ( j = 0; j < iflag; j++ ) {
270 imax = count_list(c_ptr[j], mapZ, &num_eig);
271 k += imax;
272 }
273
274
275 /*
276 this processor finds the eigenvectors between imin <= and <= imax
277 */
278
279 if ( iflag == -1 ){
280 *imin = -1;
281 imax = -1;
282 }
283 else {
284 *imin = k;
285 imax = k + count_list( me, mapZ, &num_eig) - 1;
286 }
287
288 c_ptr = iscratch;
289 nn_proc = reduce_list2(num_eig, mapZ, c_ptr );
290
291 c_ptr += nn_proc;
292 for ( j = 0; j < num_eig; j++ )
293 *(c_ptr++ ) = mapZ[j];
294
295 c_ptr = iscratch + nn_proc;
296
297 k = 0;
298 for ( j = 0; j < nn_proc; j++ ) {
299 ii = iscratch[j];
300 end_of_block = count_list(ii, c_ptr, &num_eig);
301 fil_int_lst (end_of_block, &mapZ[k], ii);
302 k += end_of_block;
303 }
304
305 eval = ptbeval;
306 c_ptr = clustr_info;
307
308 /*
309 cluster information
310 */
311
312 for ( j = 0; j < 4 * msize; j++ )
313 *(c_ptr++) = -1;
314
315 for ( j = 0; j < num_eig; j++ )
316 icsplit[j] = -5;
317
318 if ( iflag == -1 )
319 return(-50);
320
321 c_ptr = clustr_info;
322
323 /*
324 should have the same error messages as lapack
325 */
326
327 /*
328 get machine precision
329 */
330
331 eps = DLAMCHE;
332
333 /*
334
335 should spread this across blocks;
336 assume, of course, that nblk is set correctly
337
338 */
339
340 beg_of_block = -1;
341 end_of_block = -1;
342
343 while ( end_of_block < (num_eig-1 )) {
344 ++end_of_block;
345 beg_of_block = end_of_block;
346
347 /*
348 find the end of the block
349 */
350
351
352
353 while (( iblock[end_of_block] == iblock[beg_of_block] ) && ( end_of_block < num_eig )) {
354 end_of_block++;
355 }
356
357 --end_of_block;
358
359 /*
360 */
361
362 nblk = iblock[beg_of_block] - 1;
363
364 if (nblk == 0 )
365 b1 = 0 ;
366 else
367 b1 = isplit[nblk-1];
368
369
370
371 bn = isplit[nblk]-1;
372 blksiz = bn - b1 + 1;
373
374 /*
375 Skip all the work if the block size is one.
376 */
377
378 #ifdef DEBUG1
379 fprintf(stderr, "me = %d b1 = %d bn = %d blksiz1 = %d \n", me, b1, bn, blksiz );
380 #endif
381
382 if (blksiz == 1) {
383 if ( clustr_check(beg_of_block, end_of_block, *imin, imax) == 1 ) {
384 ii = beg_of_block - *imin;
385 fil_dbl_lst ( msize, vecZ[ii], 0.0e0);
386 vecZ[ii][b1] = 1.0e0;
387 }
388
389 icsplit[ num_all_cls ] = end_of_block;
390 num_all_cls++;
391 *nacluster = num_all_cls;
392
393 }
394
395 #ifdef DEBUG1
396 fprintf(stderr, " got here 1 me = %d \n", me );
397 #endif
398
399 if ( blksiz > 1 ) {
400 /*
401 Compute reorthogonalization criterion and stopping criterion
402 Gershgorin radius
403
404 should use pointers or loop unrooling here to speed things up; this is a O(n) operation
405 so things aren't that expensive
406
407 */
408
409 /*
410 * This is how one would compute ortol if the eigenvalues of block i
411 * of T, Ti, were accurate relative to norm(Ti).
412 *
413 * onenrm = fabs( d[b1] ) + fabs( e[b1+1] );
414 * tmp = fabs(d[bn]) + fabs(e[bn]);
415 * onenrm = max(onenrm, tmp);
416 * for (i = b1 + 1; i < bn; ++i) {
417 * tmp = fabs(d[i]) + fabs(e[i]) + fabs(e[i + 1]);
418 * onenrm = max(onenrm, tmp);
419 * }
420 *
421 * ortol = onenrm * (DoublePrecision ) 1.e-3 ;
422 */
423
424 #ifdef DEBUG1
425 fprintf(stderr, " got here 2 me = %d \n", me );
426 #endif
427
428
429 /*
430 Loop through eigenvalues of block nblk.
431 */
432
433 jblk = 0;
434 for (j = beg_of_block; j < ( end_of_block + 1) ; ++j) {
435 ++jblk;
436 xj = w[j];
437
438 /*
439 If eigenvalues j and j-1 are too close, add a relatively small
440 perturbation. If the eigenvalues are not in increasing
441 order in the block, exit.
442 */
443
444 if (jblk > 1) {
445 eps1 = fabs(eps * xj);
446 pertol = eps1 * R_TEN;
447 if ((xj - w[j-1]) < -eps1) {
448 printf(" Error in ordering eigenvalues: -5 error clustrf me = %d \n",
449 (int) me );
450 }
451 sep = xj - xjm;
452 /*
453 if (sep < pertol*MAX(fabs(xj), fabs(xjm)))
454 */
455 if ( sep < pertol)
456 xj = xjm + pertol;
457 }
458 eval[j] = xj;
459
460 #ifdef DEBUG1
461 fprintf(stderr, " got here 3 me = %d \n", me );
462 #endif
463
464 if (jblk == 1)
465 clustrptr = j;
466
467 /*
468 tight cluster
469 */
470
471 if ( jblk > 1 ) { /* jblk > 1 */
472 sep = fabs(xj - xjm);
473 /*
474 printf(" got here 4 me = %d sep xj %20.16g xjm %20.16g sep %20.16g \n", me, xj, xjm, sep-sepfine*MAX(fabs(xj), fabs(xjm)));
475 */
476
477 if (fabs(sep) > fabs(sepfine*MAX(fabs(xj),fabs(xjm)))) {
478 if ( clustr_check(clustrptr, j-1, *imin, imax) == 1 ) {
479 *(c_ptr++) = clustrptr;
480 *(c_ptr++) = j-1;
481 *(c_ptr++) = b1;
482 *(c_ptr++) = bn;
483 num_cls++;
484 max_clustr_size = MAX( j - clustrptr, max_clustr_size);
485 }
486 clustrptr = j;
487 jblk = 1;
488
489 icsplit[ num_all_cls ] = j-1;
490 num_all_cls++;
491 *nacluster = num_all_cls;
492
493
494 #ifdef DEBUG1
495 fprintf(stderr, " out of 4 me = %d \n", me );
496 #endif
497
498 }
499 }
500
501 /*
502 assume that xj - xjm < ortol but we're at the end of the blk
503 */
504 if ( j == end_of_block ) {
505 if ( clustr_check(clustrptr, end_of_block, *imin, imax) == 1 ) {
506
507 #ifdef DEBUG1
508 fprintf(stderr, " got here 5 me = %d \n", me );
509 #endif
510
511 *(c_ptr++) = clustrptr;
512 *(c_ptr++) = end_of_block;
513 *(c_ptr++) = b1;
514 *(c_ptr++) = bn;
515 num_cls++;
516 max_clustr_size = MAX( (end_of_block -clustrptr + 1), max_clustr_size);
517 #ifdef DEBUG1
518 fprintf(stderr, " out of 5 me = %d cbeg %d cend %d b1 %d bn %d \n",me,
519 clustrptr, end_of_block, b1, bn );
520 #endif
521 }
522
523 icsplit[ num_all_cls ] = end_of_block;
524 num_all_cls++;
525 *nacluster = num_all_cls;
526
527 }
528
529 xjm = xj;
530 }
531 }
532 }
533
534 /*
535 ime = -1;
536 ii = -1;
537 for ( ii = 0; ii < msize; ii++ ){
538 if ( mapZ[ii] == me ){
539 ime = ii;
540 break;
541 }
542 }
543
544 if ( ime % 2 == 1 ) {
545 if ( num_all_cls > 1 ) {
546 iscratch[0] = clustr_info[0];
547 iscratch[1] = clustr_info[1];
548 iscratch[2] = clustr_info[2];
549 iscratch[3] = clustr_info[3];
550 c_ptr = &clustr_info[ 4 * ( num_all_cls - 1)];
551 clustr_info[0] = c_ptr[0];
552 clustr_info[1] = c_ptr[1];
553 clustr_info[2] = c_ptr[2];
554 clustr_info[3] = c_ptr[3];
555 c_ptr = &clustr_info[0];
556 c_ptr[0] = iscratch[0];
557 c_ptr[1] = iscratch[1];
558 c_ptr[2] = iscratch[2];
559 c_ptr[3] = iscratch[3];
560 }
561 }
562 */
563
564 c_ptr = clustr_info;
565 *num_clustr = num_cls;
566
567
568 /*
569 for ( ii = 0; ii < 4* *num_clustr; ii++ )
570 printf("me = %d clustr_info[%d] = %d \n", me, ii, *(c_ptr++));
571 */
572
573 /*
574 for ( ii = 0; ii < num_eig ; ii++ )
575 printf("me = %d mapZ[%d] = %d \n", me, ii, mapZ[ii]);
576
577 printf("me = %d exiting clustrf2 imin = %d \n", me, *imin);
578 */
579
580
581 /*
582 for ( ii = 0; ii < 4* *num_clustr; ii++ )
583 printf("me = %d clustrf5 close clustr_info[%d] = %d \n", me, ii, *(c_ptr++));
584
585 */
586
587 return(max_clustr_size);
588 }
589
590