/* $Id$ *====================================================================== * * DISCLAIMER * * This material was prepared as an account of work sponsored by an * agency of the United States Government. Neither the United States * Government nor the United States Department of Energy, nor Battelle, * nor any of their employees, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR * ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY, * COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, * SOFTWARE, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT * INFRINGE PRIVATELY OWNED RIGHTS. * * ACKNOWLEDGMENT * * This software and its documentation were produced with Government * support under Contract Number DE-AC06-76RLO-1830 awarded by the United * States Department of Energy. The Government retains a paid-up * non-exclusive, irrevocable worldwide license to reproduce, prepare * derivative works, perform publicly and display publicly by or for the * Government, including the right to distribute to other Government * contractors. * *====================================================================== * * -- PEIGS routine (version 2.1) -- * Pacific Northwest Laboratory * July 28, 1995 * *====================================================================== */ /* ********************************************* PeIGS internal routine for eigen-value cluster analysis This version ASSUMES eigenvalues are accurate relative to norm(T), where T is the full tridiagonal matrix, but not necessarily relative to norm(Ti) if norm(Ti) < norm(T), where Ti is one of the submatrices into which T breaks (as per isplit, nsplit). If eigenvalues in block Ti are always accurate relative to norm(Ti), then ortol should be based on norm(Ti) rather than norm(T). This assumption means that the orthogonalization criterion for all blocks is based on norm(T), not norm(Ti) for each block. This means we do orthogonalization more often than we would if the orthogonalization criterion for each block was based on norm(Ti). This assumption about accuracy is consistent with taking abstol <= 0.0 in dstebz, pdspev, etc., i.e., then dstebz/pstebz computes eigenvalues with an error less than eps * norm(T). In principle the user could specify 0.0 < abstol < eps * norm(T) in dstebz/pstebz in which case the assumption of this routines may be violated. Violation of this assumption will not reduce the accuracy of the results, it will only cause the code to run slower since it will cause more eigenvectors to be orthogonalized than is strictly necessary. If we do not make this assumption, and compute the orthogonalization criterion based on norm(Ti), but the eigenvalues are only accurate relative to norm(T) but not norm(Ti), then the computed eigenvectors are likely to have poor (possible no) orthogonality. Integer clustrf_ (n, d, e, m, w, mapZ, vecZ, iblock, nsplit, isplit,ptbeval, num_clustr, clustr_info, *imin, , proclist, iscratch ) Integer *n, *m, *mapZ, *iblock, *nsplit, *isplit, *num_clustr, *clustr_info, *imin, *proclist, *iscratch; DoublePrecision *d, *e, *w, **vecZ, *ptbeval; not intended for separate call not protected from user input errors */ #include #include #include #include "globalp.c.h" #define MAX(a,b) ((a) > (b) ? (a) : (b)) #define MIN(a,b) ((a) < (b) ? (a) : (b)) #define R_ZERO (DoublePrecision) 0.0e0 #define R_ONE (DoublePrecision) 1.0e0 #define R_TEN (DoublePrecision) 1.0e1 #define ODM3 (DoublePrecision) 1.0e-3 #define ODM1 (DoublePrecision) 1.0e-1 #define EXTRA 1 #define KK 2 #define MAXITS 5 #define DOUBLE 200 #define INTEGER 20 #define TWO 2 #define I_ZERO 0 static Integer clustr_check(c1, cn, imin, imax) Integer c1, cn, imin, imax; { /* routine to determine if the cluster is actually in the desired region of the clustr finder */ if ( cn < imin ) return(-1); if ( imax < c1 ) return(-1); return(1); } Integer clustrf5_ (n, d, e, m, w, mapZ, vecZ, iblock, nsplit, isplit, ptbeval, num_clustr, clustr_info, imin, proclist , nacluster, icsplit, iscratch) Integer *n, *m, *mapZ, *iblock, *nsplit, *isplit, *num_clustr, *clustr_info, *imin, *proclist, *nacluster, *icsplit, *iscratch; DoublePrecision *d, *e, *w, **vecZ, *ptbeval; /* this routine finds the cluster information for symmetric tridiagonal matrices on input: n = dimension of the matrix d = array of n DoublePrecision; diagonal of the tridiagonal matrices e = array of n DoublePrecision; e[0] is junk -- left over from eispack--should remove super-diagonal of the tridiagonal matrices; m = number of the number eigenvalues; from dstebz w = array of m DoublePrecision precision; eigenvalues from dstebz iblock = array of n integers ( at most ); from dstebz nsplit = number of blocks; output of dstebz isplit = split points of blocks; output of dstebz ptb_eval = perturbed eigenvalues output: function returns the maximum cluster size ; num_one_blk = number of size 1 blocks one_block = integer array ; location of size 1 blocks clustr_info = integer array ; max size 4 * n; cluster information : beg block, end of block beg of cluster, end of cluster nsplit = number of clusters nacluster = total number of clusters in the list w of m eigenvalues, including clusters of size 1 in blocks of size 1. icsplit ....... Length m. icsplit[i] is the index (0 to m-1) of the last eigenvalue in the i-th cluster in w, i = 0 to nacluster-1. The i-th cluster, i = 0 to nacluster - 1 is the set of eigenvalues 0 to icsplit[0], i = 0 icsplit[i-1]+1 to icsplit[i], i = 1 to nacluster-1. */ { Integer jblk, nblk, msize; Integer i, j, num_cls, num_all_cls; Integer b1, num_eig; Integer max_clustr_size; Integer beg_of_block, end_of_block; Integer bn; Integer clustrptr=0, blksiz; Integer me; Integer *c_ptr; Integer iflag, k, imax; Integer ii, nn_proc; DoublePrecision tmp, *eval, sep, eps; DoublePrecision onenrm, pertol; DoublePrecision xjm, eps1; DoublePrecision ortol, xj, sepfine; Integer clustr_check(); extern Integer count_list(); extern Integer reduce_list2(); extern Integer menode_ (); extern Integer idamax_(), mxmynd_ (), mypid(); extern void fil_int_lst(); extern void fil_dbl_lst(); /* intrinsic DoublePrecision precision */ /* blas calls */ extern void dcopy_(), dscal_(), daxpy_(); extern DoublePrecision ddot_(), dasum_(), dnrm2_(); /* lapack calls */ extern void xerbla_(), dlagts_(), dlagtf_(), dlarnv_(); me = mxmynd_ (); num_eig = *m; *num_clustr = 0; num_cls = 0; msize = *n; num_all_cls = I_ZERO; *nacluster = I_ZERO; max_clustr_size = I_ZERO; /* Compute reorthogonalization criterion. ASSUMES eigenvalues are accurate relative to norm(T), where T is the full tridiagonal matrix, but not necessarily relative to norm(Ti) if norm(Ti) < norm(T), where Ti is one of the submatrices into which T breaks (as per isplit, nsplit). If eigenvalues in block Ti are always accurate relative to norm(Ti), then ortol should be based on norm(Ti) rather than norm(T) (the commented out computation of ortol latter in the code was used to do this before even though the eigenvalues weren't really accurate relative to norm(Ti). ). */ onenrm = fabs( d[0] ) + fabs( e[1] ); tmp = fabs(d[msize-1]) + fabs(e[msize-1]); onenrm = MAX(onenrm, tmp); for (i = 1; i < msize-1; ++i) { tmp = fabs(d[i]) + fabs(e[i]) + fabs(e[i + 1]); onenrm = MAX(onenrm, tmp); } ortol = onenrm * (DoublePrecision ) 1.e-3 ; sepfine = MAX(1.e2, (DoublePrecision) msize) * sqrt(DLAMCHE); sepfine = MIN(sepfine, MIN(1.e-3, 1.0e0/((DoublePrecision) msize))); /* sepfine = sepfine*MAX(ortol, 1.e0); */ c_ptr = iscratch; nn_proc = reduce_list2( num_eig, mapZ, c_ptr); iflag = -1; for ( j = 0; j < nn_proc; j++ ){ if ( c_ptr[j] == me ) { iflag = j; break; } } k = 0; imax = -1; *imin = -1; for ( j = 0; j < iflag; j++ ) { imax = count_list(c_ptr[j], mapZ, &num_eig); k += imax; } /* this processor finds the eigenvectors between imin <= and <= imax */ if ( iflag == -1 ){ *imin = -1; imax = -1; } else { *imin = k; imax = k + count_list( me, mapZ, &num_eig) - 1; } c_ptr = iscratch; nn_proc = reduce_list2(num_eig, mapZ, c_ptr ); c_ptr += nn_proc; for ( j = 0; j < num_eig; j++ ) *(c_ptr++ ) = mapZ[j]; c_ptr = iscratch + nn_proc; k = 0; for ( j = 0; j < nn_proc; j++ ) { ii = iscratch[j]; end_of_block = count_list(ii, c_ptr, &num_eig); fil_int_lst (end_of_block, &mapZ[k], ii); k += end_of_block; } eval = ptbeval; c_ptr = clustr_info; /* cluster information */ for ( j = 0; j < 4 * msize; j++ ) *(c_ptr++) = -1; for ( j = 0; j < num_eig; j++ ) icsplit[j] = -5; if ( iflag == -1 ) return(-50); c_ptr = clustr_info; /* should have the same error messages as lapack */ /* get machine precision */ eps = DLAMCHE; /* should spread this across blocks; assume, of course, that nblk is set correctly */ beg_of_block = -1; end_of_block = -1; while ( end_of_block < (num_eig-1 )) { ++end_of_block; beg_of_block = end_of_block; /* find the end of the block */ while (( iblock[end_of_block] == iblock[beg_of_block] ) && ( end_of_block < num_eig )) { end_of_block++; } --end_of_block; /* */ nblk = iblock[beg_of_block] - 1; if (nblk == 0 ) b1 = 0 ; else b1 = isplit[nblk-1]; bn = isplit[nblk]-1; blksiz = bn - b1 + 1; /* Skip all the work if the block size is one. */ #ifdef DEBUG1 fprintf(stderr, "me = %d b1 = %d bn = %d blksiz1 = %d \n", me, b1, bn, blksiz ); #endif if (blksiz == 1) { if ( clustr_check(beg_of_block, end_of_block, *imin, imax) == 1 ) { ii = beg_of_block - *imin; fil_dbl_lst ( msize, vecZ[ii], 0.0e0); vecZ[ii][b1] = 1.0e0; } icsplit[ num_all_cls ] = end_of_block; num_all_cls++; *nacluster = num_all_cls; } #ifdef DEBUG1 fprintf(stderr, " got here 1 me = %d \n", me ); #endif if ( blksiz > 1 ) { /* Compute reorthogonalization criterion and stopping criterion Gershgorin radius should use pointers or loop unrooling here to speed things up; this is a O(n) operation so things aren't that expensive */ /* * This is how one would compute ortol if the eigenvalues of block i * of T, Ti, were accurate relative to norm(Ti). * * onenrm = fabs( d[b1] ) + fabs( e[b1+1] ); * tmp = fabs(d[bn]) + fabs(e[bn]); * onenrm = max(onenrm, tmp); * for (i = b1 + 1; i < bn; ++i) { * tmp = fabs(d[i]) + fabs(e[i]) + fabs(e[i + 1]); * onenrm = max(onenrm, tmp); * } * * ortol = onenrm * (DoublePrecision ) 1.e-3 ; */ #ifdef DEBUG1 fprintf(stderr, " got here 2 me = %d \n", me ); #endif /* Loop through eigenvalues of block nblk. */ jblk = 0; for (j = beg_of_block; j < ( end_of_block + 1) ; ++j) { ++jblk; xj = w[j]; /* If eigenvalues j and j-1 are too close, add a relatively small perturbation. If the eigenvalues are not in increasing order in the block, exit. */ if (jblk > 1) { eps1 = fabs(eps * xj); pertol = eps1 * R_TEN; if ((xj - w[j-1]) < -eps1) { printf(" Error in ordering eigenvalues: -5 error clustrf me = %d \n", (int) me ); } sep = xj - xjm; /* if (sep < pertol*MAX(fabs(xj), fabs(xjm))) */ if ( sep < pertol) xj = xjm + pertol; } eval[j] = xj; #ifdef DEBUG1 fprintf(stderr, " got here 3 me = %d \n", me ); #endif if (jblk == 1) clustrptr = j; /* tight cluster */ if ( jblk > 1 ) { /* jblk > 1 */ sep = fabs(xj - xjm); /* printf(" got here 4 me = %d sep xj %20.16g xjm %20.16g sep %20.16g \n", me, xj, xjm, sep-sepfine*MAX(fabs(xj), fabs(xjm))); */ if (fabs(sep) > fabs(sepfine*MAX(fabs(xj),fabs(xjm)))) { if ( clustr_check(clustrptr, j-1, *imin, imax) == 1 ) { *(c_ptr++) = clustrptr; *(c_ptr++) = j-1; *(c_ptr++) = b1; *(c_ptr++) = bn; num_cls++; max_clustr_size = MAX( j - clustrptr, max_clustr_size); } clustrptr = j; jblk = 1; icsplit[ num_all_cls ] = j-1; num_all_cls++; *nacluster = num_all_cls; #ifdef DEBUG1 fprintf(stderr, " out of 4 me = %d \n", me ); #endif } } /* assume that xj - xjm < ortol but we're at the end of the blk */ if ( j == end_of_block ) { if ( clustr_check(clustrptr, end_of_block, *imin, imax) == 1 ) { #ifdef DEBUG1 fprintf(stderr, " got here 5 me = %d \n", me ); #endif *(c_ptr++) = clustrptr; *(c_ptr++) = end_of_block; *(c_ptr++) = b1; *(c_ptr++) = bn; num_cls++; max_clustr_size = MAX( (end_of_block -clustrptr + 1), max_clustr_size); #ifdef DEBUG1 fprintf(stderr, " out of 5 me = %d cbeg %d cend %d b1 %d bn %d \n",me, clustrptr, end_of_block, b1, bn ); #endif } icsplit[ num_all_cls ] = end_of_block; num_all_cls++; *nacluster = num_all_cls; } xjm = xj; } } } /* ime = -1; ii = -1; for ( ii = 0; ii < msize; ii++ ){ if ( mapZ[ii] == me ){ ime = ii; break; } } if ( ime % 2 == 1 ) { if ( num_all_cls > 1 ) { iscratch[0] = clustr_info[0]; iscratch[1] = clustr_info[1]; iscratch[2] = clustr_info[2]; iscratch[3] = clustr_info[3]; c_ptr = &clustr_info[ 4 * ( num_all_cls - 1)]; clustr_info[0] = c_ptr[0]; clustr_info[1] = c_ptr[1]; clustr_info[2] = c_ptr[2]; clustr_info[3] = c_ptr[3]; c_ptr = &clustr_info[0]; c_ptr[0] = iscratch[0]; c_ptr[1] = iscratch[1]; c_ptr[2] = iscratch[2]; c_ptr[3] = iscratch[3]; } } */ c_ptr = clustr_info; *num_clustr = num_cls; /* for ( ii = 0; ii < 4* *num_clustr; ii++ ) printf("me = %d clustr_info[%d] = %d \n", me, ii, *(c_ptr++)); */ /* for ( ii = 0; ii < num_eig ; ii++ ) printf("me = %d mapZ[%d] = %d \n", me, ii, mapZ[ii]); printf("me = %d exiting clustrf2 imin = %d \n", me, *imin); */ /* for ( ii = 0; ii < 4* *num_clustr; ii++ ) printf("me = %d clustrf5 close clustr_info[%d] = %d \n", me, ii, *(c_ptr++)); */ return(max_clustr_size); }