1 /*
2  $Id$
3  *======================================================================
4  *
5  * DISCLAIMER
6  *
7  * This material was prepared as an account of work sponsored by an
8  * agency of the United States Government.  Neither the United States
9  * Government nor the United States Department of Energy, nor Battelle,
10  * nor any of their employees, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
11  * ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
12  * COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT,
13  * SOFTWARE, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT
14  * INFRINGE PRIVATELY OWNED RIGHTS.
15  *
16  * ACKNOWLEDGMENT
17  *
18  * This software and its documentation were produced with Government
19  * support under Contract Number DE-AC06-76RLO-1830 awarded by the United
20  * States Department of Energy.  The Government retains a paid-up
21  * non-exclusive, irrevocable worldwide license to reproduce, prepare
22  * derivative works, perform publicly and display publicly by or for the
23  * Government, including the right to distribute to other Government
24  * contractors.
25  *
26  *======================================================================
27  *
28  *  -- PEIGS  routine (version 2.1) --
29  *     Pacific Northwest Laboratory
30  *     July 28, 1995
31  *
32  *======================================================================
33  */
34 /*
35  *********************************************
36 
37  PeIGS internal routine for eigen-value
38  cluster analysis
39 
40       This version ASSUMES eigenvalues
41       are accurate relative to norm(T), where T is the full
42       tridiagonal matrix, but not necessarily relative to norm(Ti)
43       if norm(Ti) < norm(T), where Ti is one of the submatrices into
44       which T breaks (as per isplit, nsplit).  If eigenvalues in block
45       Ti are always accurate relative to norm(Ti), then ortol should be
46       based on norm(Ti) rather than norm(T).
47 
48       This assumption means that the orthogonalization criterion for
49       all blocks is based on norm(T), not norm(Ti) for each block.  This
50       means we do orthogonalization more often than we would if the
51       orthogonalization criterion for each block was based on norm(Ti).
52 
53       This assumption about accuracy is consistent with taking
54       abstol <= 0.0 in dstebz, pdspev, etc., i.e., then dstebz/pstebz
55       computes eigenvalues with an error less than eps * norm(T).
56 
57       In principle the user could specify 0.0 < abstol < eps * norm(T)
58       in dstebz/pstebz
59       in which case the assumption of this routines may be violated.
60       Violation of this assumption will not reduce the accuracy of the
61       results, it will only cause the code to run slower since it
62       will cause more eigenvectors to be orthogonalized than is strictly
63       necessary.
64 
65       If we do not make this assumption, and compute the orthogonalization
66       criterion based on norm(Ti), but the eigenvalues are only accurate
67       relative to norm(T) but not norm(Ti), then the computed eigenvectors
68       are likely to have poor (possible no) orthogonality.
69 
70 
71       Integer clustrf_ (n, d, e, m, w, mapZ, vecZ, iblock, nsplit, isplit,ptbeval, num_clustr, clustr_info, *imin,
72  , proclist, iscratch )
73 
74  Integer *n, *m, *mapZ, *iblock, *nsplit, *isplit, *num_clustr, *clustr_info, *imin, *proclist, *iscratch;
75  DoublePrecision *d, *e, *w, **vecZ, *ptbeval;
76 
77 
78  not intended for separate call
79  not protected from user input errors
80 
81  */
82 
83 #include <stdio.h>
84 #include <stdlib.h>
85 #include <math.h>
86 
87 #include "globalp.c.h"
88 
89 #define MAX(a,b) ((a) > (b) ? (a) : (b))
90 #define MIN(a,b) ((a) < (b) ? (a) : (b))
91 
92 #define R_ZERO (DoublePrecision) 0.0e0
93 #define R_ONE  (DoublePrecision) 1.0e0
94 #define R_TEN  (DoublePrecision) 1.0e1
95 #define ODM3 (DoublePrecision) 1.0e-3
96 #define ODM1 (DoublePrecision) 1.0e-1
97 #define EXTRA 1
98 #define KK    2
99 #define MAXITS 5
100 #define DOUBLE 200
101 #define INTEGER 20
102 #define TWO  2
103 
104 #define I_ZERO 0
105 
106 
clustr_check(c1,cn,imin,imax)107   static Integer clustr_check(c1, cn, imin, imax)
108      Integer c1, cn, imin, imax;
109 {
110   /*
111     routine to determine if
112     the cluster is
113     actually in the desired region of the clustr finder
114     */
115 
116   if ( cn < imin )
117     return(-1);
118 
119   if ( imax  < c1 )
120     return(-1);
121 
122   return(1);
123 }
124 
125 
clustrf5_(n,d,e,m,w,mapZ,vecZ,iblock,nsplit,isplit,ptbeval,num_clustr,clustr_info,imin,proclist,nacluster,icsplit,iscratch)126 Integer clustrf5_ (n, d, e, m, w, mapZ, vecZ, iblock, nsplit, isplit, ptbeval, num_clustr,
127 		   clustr_info,  imin, proclist , nacluster, icsplit, iscratch)
128      Integer *n, *m, *mapZ, *iblock, *nsplit, *isplit, *num_clustr,
129   *clustr_info, *imin, *proclist, *nacluster, *icsplit, *iscratch;
130      DoublePrecision *d, *e, *w, **vecZ, *ptbeval;
131      /*
132        this routine finds the cluster information for symmetric tridiagonal matrices
133 
134        on input:
135 
136        n = dimension of the matrix
137        d = array of n DoublePrecision; diagonal of the tridiagonal matrices
138        e = array of n DoublePrecision; e[0] is junk -- left over from eispack--should remove
139        super-diagonal of the tridiagonal matrices;
140        m = number of the number eigenvalues; from dstebz
141        w = array of m DoublePrecision precision; eigenvalues from dstebz
142        iblock = array of n integers ( at most ); from dstebz
143        nsplit = number of blocks; output of dstebz
144        isplit = split points of blocks; output of dstebz
145        ptb_eval = perturbed eigenvalues
146 
147        output:
148 
149        function returns the maximum cluster size ;
150 
151        num_one_blk = number of size 1 blocks
152        one_block   = integer array ; location of size 1 blocks
153        clustr_info = integer array ; max size 4 * n; cluster information : beg block, end of block
154        beg of cluster, end of cluster
155        nsplit      = number of clusters
156 
157        nacluster = total number of clusters in the list w of m
158                      eigenvalues,  including clusters of size 1 in
159                      blocks of size 1.
160 
161        icsplit ....... Length m.
162                      icsplit[i] is
163                      the index (0 to m-1) of the last eigenvalue in the i-th
164                      cluster in w, i = 0 to nacluster-1.
165                      The i-th cluster, i = 0 to nacluster - 1 is
166                      the set of eigenvalues
167 
168                      0 to icsplit[0],              i = 0
169                      icsplit[i-1]+1 to icsplit[i], i = 1 to nacluster-1.
170 
171        */
172 {
173   Integer jblk, nblk, msize;
174   Integer i, j, num_cls, num_all_cls;
175   Integer b1, num_eig;
176   Integer max_clustr_size;
177   Integer beg_of_block, end_of_block;
178   Integer bn;
179   Integer clustrptr=0, blksiz;
180   Integer me;
181   Integer *c_ptr;
182   Integer iflag, k, imax;
183   Integer ii, nn_proc;
184 
185   DoublePrecision tmp, *eval, sep, eps;
186   DoublePrecision onenrm, pertol;
187   DoublePrecision xjm, eps1;
188   DoublePrecision ortol, xj, sepfine;
189 
190   Integer clustr_check();
191   extern Integer count_list();
192   extern Integer reduce_list2();
193   extern Integer menode_ ();
194   extern Integer idamax_(), mxmynd_ (), mypid();
195   extern void fil_int_lst();
196   extern void fil_dbl_lst();
197 
198   /*
199     intrinsic DoublePrecision precision
200     */
201 
202   /*
203     blas calls
204     */
205 
206   extern void dcopy_(), dscal_(), daxpy_();
207   extern DoublePrecision ddot_(), dasum_(), dnrm2_();
208 
209   /*
210     lapack calls
211     */
212 
213   extern void xerbla_(), dlagts_(), dlagtf_(), dlarnv_();
214 
215   me = mxmynd_ ();
216   num_eig = *m;
217 
218   *num_clustr = 0;
219   num_cls = 0;
220   msize = *n;
221   num_all_cls = I_ZERO;
222   *nacluster = I_ZERO;
223   max_clustr_size = I_ZERO;
224 
225   /*
226       Compute reorthogonalization criterion.
227 
228       ASSUMES eigenvalues
229       are accurate relative to norm(T), where T is the full
230       tridiagonal matrix, but not necessarily relative to norm(Ti)
231       if norm(Ti) < norm(T), where Ti is one of the submatrices into
232       which T breaks (as per isplit, nsplit).  If eigenvalues in block
233       Ti are always accurate relative to norm(Ti), then ortol should be
234       based on norm(Ti) rather than norm(T) (the commented out computation
235       of ortol latter in the code was used to do this before even though
236       the eigenvalues weren't really accurate relative to norm(Ti). ).
237    */
238 
239   onenrm = fabs( d[0] ) + fabs( e[1] );
240   tmp = fabs(d[msize-1]) + fabs(e[msize-1]);
241   onenrm = MAX(onenrm, tmp);
242   for (i = 1; i < msize-1; ++i) {
243     tmp = fabs(d[i]) + fabs(e[i]) + fabs(e[i + 1]);
244     onenrm = MAX(onenrm, tmp);
245   }
246 
247   ortol = onenrm * (DoublePrecision ) 1.e-3 ;
248   sepfine = MAX(1.e2, (DoublePrecision) msize) * sqrt(DLAMCHE);
249   sepfine = MIN(sepfine, MIN(1.e-3, 1.0e0/((DoublePrecision) msize)));
250 
251   /*
252     sepfine = sepfine*MAX(ortol, 1.e0);
253     */
254 
255   c_ptr = iscratch;
256   nn_proc = reduce_list2( num_eig, mapZ, c_ptr);
257 
258   iflag = -1;
259   for ( j = 0; j < nn_proc; j++ ){
260     if ( c_ptr[j] == me ) {
261       iflag = j;
262       break;
263     }
264   }
265 
266   k = 0;
267   imax = -1;
268   *imin = -1;
269   for ( j = 0; j < iflag; j++ ) {
270     imax = count_list(c_ptr[j], mapZ, &num_eig);
271     k += imax;
272   }
273 
274 
275   /*
276     this processor finds the eigenvectors between imin <= and <= imax
277     */
278 
279   if ( iflag == -1 ){
280     *imin = -1;
281     imax = -1;
282   }
283   else {
284     *imin = k;
285     imax = k + count_list( me, mapZ, &num_eig) - 1;
286   }
287 
288   c_ptr = iscratch;
289   nn_proc = reduce_list2(num_eig, mapZ, c_ptr );
290 
291   c_ptr += nn_proc;
292   for ( j = 0; j < num_eig; j++ )
293     *(c_ptr++ ) = mapZ[j];
294 
295   c_ptr = iscratch + nn_proc;
296 
297   k = 0;
298   for ( j = 0; j < nn_proc; j++ ) {
299     ii = iscratch[j];
300     end_of_block = count_list(ii, c_ptr, &num_eig);
301     fil_int_lst (end_of_block, &mapZ[k], ii);
302     k += end_of_block;
303   }
304 
305   eval = ptbeval;
306   c_ptr = clustr_info;
307 
308   /*
309     cluster information
310     */
311 
312   for ( j = 0; j <  4 * msize; j++ )
313     *(c_ptr++) = -1;
314 
315   for ( j = 0; j < num_eig; j++ )
316     icsplit[j] = -5;
317 
318   if ( iflag == -1 )
319     return(-50);
320 
321   c_ptr = clustr_info;
322 
323   /*
324     should have the same error messages as lapack
325     */
326 
327   /*
328     get machine precision
329     */
330 
331   eps = DLAMCHE;
332 
333   /*
334 
335     should spread this across blocks;
336     assume, of course, that nblk is set correctly
337 
338     */
339 
340   beg_of_block = -1;
341   end_of_block = -1;
342 
343   while ( end_of_block < (num_eig-1 )) {
344     ++end_of_block;
345     beg_of_block = end_of_block;
346 
347     /*
348       find the end of the block
349       */
350 
351 
352 
353     while (( iblock[end_of_block] == iblock[beg_of_block] ) && ( end_of_block < num_eig )) {
354       end_of_block++;
355     }
356 
357     --end_of_block;
358 
359     /*
360      */
361 
362     nblk = iblock[beg_of_block] - 1;
363 
364     if (nblk == 0 )
365       b1 = 0 ;
366     else
367       b1 = isplit[nblk-1];
368 
369 
370 
371     bn = isplit[nblk]-1;
372     blksiz = bn - b1 + 1;
373 
374     /*
375       Skip all the work if the block size is one.
376       */
377 
378 #ifdef DEBUG1
379     fprintf(stderr, "me = %d b1 = %d  bn = %d blksiz1 = %d \n", me, b1, bn, blksiz );
380 #endif
381 
382     if (blksiz == 1) {
383       if ( clustr_check(beg_of_block, end_of_block, *imin, imax) == 1 ) {
384 	ii = beg_of_block - *imin;
385 	fil_dbl_lst ( msize, vecZ[ii], 0.0e0);
386 	vecZ[ii][b1] = 1.0e0;
387       }
388 
389       icsplit[ num_all_cls ] = end_of_block;
390       num_all_cls++;
391       *nacluster = num_all_cls;
392 
393     }
394 
395 #ifdef DEBUG1
396     fprintf(stderr, " got here 1 me = %d \n", me );
397 #endif
398 
399     if ( blksiz > 1 ) {
400       /*
401 	Compute reorthogonalization criterion and stopping criterion
402 	Gershgorin radius
403 
404 	should use pointers or loop unrooling here to speed things up; this is a O(n) operation
405 	so things aren't that expensive
406 
407 	*/
408 
409 /*
410  *  This is how one would compute ortol if the eigenvalues of block i
411  *  of T, Ti, were accurate relative to norm(Ti).
412  *
413  *     onenrm = fabs( d[b1] ) + fabs( e[b1+1] );
414  *     tmp = fabs(d[bn]) + fabs(e[bn]);
415  *     onenrm = max(onenrm, tmp);
416  *     for (i = b1 + 1; i < bn; ++i) {
417 *	tmp = fabs(d[i]) + fabs(e[i]) + fabs(e[i + 1]);
418 *	onenrm = max(onenrm, tmp);
419 *      }
420  *
421  *     ortol = onenrm * (DoublePrecision ) 1.e-3 ;
422  */
423 
424 #ifdef DEBUG1
425     fprintf(stderr, " got here 2 me = %d \n", me );
426 #endif
427 
428 
429       /*
430 	Loop through eigenvalues of block nblk.
431 	*/
432 
433       jblk = 0;
434       for (j = beg_of_block; j < ( end_of_block + 1) ; ++j) {
435 	++jblk;
436 	xj = w[j];
437 
438 	/*
439 	  If eigenvalues j and j-1 are too close, add a relatively small
440 	  perturbation.  If the eigenvalues are not in increasing
441 	  order in the block, exit.
442 	*/
443 
444 	if (jblk > 1) {
445 	  eps1 = fabs(eps * xj);
446 	  pertol = eps1 * R_TEN;
447 	  if ((xj - w[j-1]) < -eps1) {
448 	    printf(" Error in ordering eigenvalues: -5 error clustrf me = %d \n",
449                    (int) me );
450 	  }
451 	  sep = xj - xjm;
452 /*
453 	  if (sep < pertol*MAX(fabs(xj), fabs(xjm)))
454 */
455 	if ( sep < pertol)
456 	 xj = xjm + pertol;
457 	}
458 	eval[j] = xj;
459 
460 #ifdef DEBUG1
461 	fprintf(stderr, " got here 3 me = %d \n", me );
462 #endif
463 
464 	if (jblk == 1)
465 	  clustrptr = j;
466 
467 	/*
468 	  tight cluster
469 	  */
470 
471 	if ( jblk > 1 )  {            /* jblk > 1 */
472 	  sep = fabs(xj - xjm);
473 	  /*
474 	    printf(" got here 4 me = %d sep xj %20.16g xjm %20.16g  sep %20.16g \n", me, xj, xjm, sep-sepfine*MAX(fabs(xj), fabs(xjm)));
475 	  */
476 
477 	  if (fabs(sep) > fabs(sepfine*MAX(fabs(xj),fabs(xjm)))) {
478 	    if ( clustr_check(clustrptr, j-1, *imin, imax) == 1 ) {
479 	      *(c_ptr++) = clustrptr;
480 	      *(c_ptr++) = j-1;
481 	      *(c_ptr++) = b1;
482 	      *(c_ptr++) = bn;
483 	      num_cls++;
484 	      max_clustr_size = MAX( j - clustrptr, max_clustr_size);
485 	    }
486 	    clustrptr = j;
487 	    jblk = 1;
488 
489             icsplit[ num_all_cls ] = j-1;
490             num_all_cls++;
491             *nacluster = num_all_cls;
492 
493 
494 #ifdef DEBUG1
495 	    fprintf(stderr, " out of 4 me = %d \n", me );
496 #endif
497 
498 	  }
499 	}
500 
501 	/*
502 	  assume that xj - xjm < ortol but we're at the end of the blk
503 	  */
504 	if ( j == end_of_block ) {
505 	  if ( clustr_check(clustrptr, end_of_block, *imin, imax) == 1 ) {
506 
507 #ifdef DEBUG1
508 	    fprintf(stderr, " got here 5 me = %d \n", me );
509 #endif
510 
511 	    *(c_ptr++) = clustrptr;
512 	    *(c_ptr++) = end_of_block;
513 	    *(c_ptr++) = b1;
514 	    *(c_ptr++) = bn;
515 	    num_cls++;
516 	    max_clustr_size = MAX( (end_of_block -clustrptr + 1), max_clustr_size);
517 #ifdef DEBUG1
518 	    fprintf(stderr, " out of 5 me = %d cbeg %d cend %d b1 %d bn %d \n",me,
519 		    clustrptr, end_of_block, b1, bn );
520 #endif
521 	  }
522 
523           icsplit[ num_all_cls ] = end_of_block;
524           num_all_cls++;
525           *nacluster = num_all_cls;
526 
527 	}
528 
529 	xjm = xj;
530       }
531     }
532   }
533 
534   /*
535   ime = -1;
536   ii = -1;
537   for ( ii = 0; ii < msize; ii++ ){
538     if ( mapZ[ii] == me ){
539       ime = ii;
540       break;
541     }
542   }
543 
544   if ( ime % 2 == 1 ) {
545     if ( num_all_cls > 1 ) {
546       iscratch[0] = clustr_info[0];
547       iscratch[1] = clustr_info[1];
548       iscratch[2] = clustr_info[2];
549       iscratch[3] = clustr_info[3];
550       c_ptr = &clustr_info[ 4 * ( num_all_cls - 1)];
551       clustr_info[0] = c_ptr[0];
552       clustr_info[1] = c_ptr[1];
553       clustr_info[2] = c_ptr[2];
554       clustr_info[3] = c_ptr[3];
555       c_ptr = &clustr_info[0];
556       c_ptr[0] = iscratch[0];
557       c_ptr[1] = iscratch[1];
558       c_ptr[2] = iscratch[2];
559       c_ptr[3] = iscratch[3];
560     }
561   }
562   */
563 
564   c_ptr = clustr_info;
565   *num_clustr = num_cls;
566 
567 
568 /*
569   for ( ii = 0; ii < 4* *num_clustr; ii++ )
570     printf("me = %d clustr_info[%d] =  %d \n", me, ii, *(c_ptr++));
571 */
572 
573   /*
574     for ( ii = 0; ii < num_eig ; ii++ )
575     printf("me = %d mapZ[%d] =  %d \n", me, ii, mapZ[ii]);
576 
577     printf("me = %d exiting clustrf2 imin =  %d \n", me, *imin);
578     */
579 
580 
581 /*
582     for ( ii = 0; ii < 4* *num_clustr; ii++ )
583     printf("me = %d clustrf5 close clustr_info[%d] =  %d \n", me, ii, *(c_ptr++));
584 
585 */
586 
587     return(max_clustr_size);
588 }
589 
590