1 /*
2 $Id$
3 *======================================================================
4 *
5 * DISCLAIMER
6 *
7 * This material was prepared as an account of work sponsored by an
8 * agency of the United States Government. Neither the United States
9 * Government nor the United States Department of Energy, nor Battelle,
10 * nor any of their employees, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
11 * ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
12 * COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT,
13 * SOFTWARE, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT
14 * INFRINGE PRIVATELY OWNED RIGHTS.
15 *
16 * ACKNOWLEDGMENT
17 *
18 * This software and its documentation were produced with Government
19 * support under Contract Number DE-AC06-76RLO-1830 awarded by the United
20 * States Department of Energy. The Government retains a paid-up
21 * non-exclusive, irrevocable worldwide license to reproduce, prepare
22 * derivative works, perform publicly and display publicly by or for the
23 * Government, including the right to distribute to other Government
24 * contractors.
25 *
26 *======================================================================
27 *
28 * -- PEIGS routine (version 2.1) --
29 * Pacific Northwest Laboratory
30 * July 28, 1995
31 *
32 *======================================================================
33 */
34 /*
35 PeIGS internal routine:
36
37 not intended for external use; see the routine pstein
38
39 Integer clustrinv_(n, d, e, eval, schedule, num_clustr, mapZ, mapvecZ, vecZ, imin, nacluster, icsplit, iscratch, scratch)
40
41 Integer *n, *schedule, *num_clustr, *mapZ, *mapvecZ, *Zbegn, *nacluster,
42 *icsplit, *iscratch;
43
44 DoublePrecision *d, *e, *eval, **vecZ, *scratch;
45
46
47 for computing eigenvectors by inverse iteration with cluster modified Gram Schmidt
48 with "de-tangling" of clusters.
49
50 returns 0 if everything seems ok.
51 k > 0 if k-th eigenvector (owned by processor mapZ[k-1], k = 1 to neigval,
52 failed to converge in inverse iteration. In this case eigenvectors
53 1 to k-1 converged ok, eigenvector k failed to converge, and it is
54 possible that some eigenvectors numbered greater than k failed to
55 converge. In any case, the first k-1 eigenvectors are good,
56 while eigenvectors k is bad, and eigenvectors k+1 to neigval are
57 probably bad too. Note that the meaning of bad is not always clear,
58 in general, however, it means that the eigenvector is probably
59 not accurate to full precision.
60
61
62 This code "de-tangles" clusters as follows. Assume the eigenvalues in a cluster
63 are owned by more than one processor (as determined by mapZ). Then, the processor
64 owing the first eigenvector in the cluster does all required iterations of
65 inverse iteration and mgs to compute the eigenvectors in this cluster. It then
66 sends these finished eigenvectors to the second processor in mapZ which owns part
67 of this cluster. At this point the clusters are de-tangled and all processors can
68 start working on their eigenvectors.
69
70 This version of the code assumes that during de-tangling each processor owns the
71 first eigenvector in at most one multi-processor cluster and receives the finished
72 eigenvalues from at most one other processor. This assumptions are satified by
73 the current algorithm in clustrf and are checkd by this routine.
74
75 */
76
77 #include <stdio.h>
78 #include <math.h>
79
80 #include "globalp.c.h"
81 #include "clustr_inv.h"
82
83 /*
84 #define DEBUG1
85 #define DEBUG5
86 */
87 #define CLUSTRLEN 4
88 #define LOOP 3
89 #define INV_TIME 2
90 #define ITIME1 1
91
clustrinv4_(n,d,e,dplus,lplus,eval,schedule,num_clustr,mapZ,mapvecZ,vecZ,imin,nacluster,icsplit,iscratch,scratch)92 Integer clustrinv4_(n, d, e, dplus, lplus, eval, schedule, num_clustr, mapZ, mapvecZ, vecZ, imin, nacluster, icsplit, iscratch, scratch)
93 Integer *n, *schedule, *num_clustr, *mapZ, *mapvecZ, *imin,
94 *nacluster, *icsplit, *iscratch;
95 DoublePrecision *d, *e, *eval, **vecZ, *scratch, *dplus, *lplus;
96 {
97 /*
98 n = dimension of the tridiagonal matrix
99 d = diagonal of the tridiagonal matrix
100 e[2:n] = upper diagonal of the tridiagonal matrix
101 schedule = a integer array of the scheduling information
102 mapZ = location of the eigenvectors
103 mapvecZ = in
104 vecZ
105 iscratch = integer scratch space
106 scratch = DoublePrecision precision scratch space
107
108 returns the number of eigenvector that this processor holds
109 */
110
111 static Integer three = 3, IONE = 1;
112 Integer indx, i, j, iseed[4], bb1, bn;
113 Integer blksiz, clustr_ptr, cn;
114 Integer me, naproc, Zvec;
115 Integer *cl_ptr;
116 Integer c1, csiz, xc1, xcsiz, xblksiz;
117 Integer cl_num;
118 Integer itime;
119 Integer send_num, send_cl, send_to,
120 recv_num, recv_cl, recv_from,
121 myindx, itype, nvecs, isize, ival, first, ibad, itmp;
122
123 DoublePrecision stpcrt, onenrm, eps;
124 DoublePrecision tmp, *dscrat, *first_buf;
125
126 extern void xerbla_();
127 extern Integer idamax_(), mclock_(), succ_(), mxmynd_(), mxnprc_();
128 extern DoublePrecision dasum_(), dlamch_(), dnrm2_(), ddot_(), dlarnd_();
129 extern void dlagtf_(), dlarnv_();
130 extern void printff_(), mgs_3();
131 extern void mgspnl_(), dscal_(), dlagts_();
132 extern void dcopy_(), daxpy_();
133 extern Integer count_list ();
134 extern Integer inv_it();
135 extern void mgs ();
136 extern DoublePrecision dlamch_();
137 extern void fil_dbl_lst ();
138
139 /*
140 Function Body
141 */
142
143 me = mxmynd_();
144 naproc = mxnprc_();
145
146 ibad = 0;
147
148 dscrat = scratch;
149
150 #ifdef DEBUG5
151
152 if( me == mapZ[0] ){
153 fprintf(stderr, " nacluster = %d \n", *nacluster );
154 cn = -1;
155 for( j = 0; j < *nacluster; j++ ) {
156 c1 = cn + 1;
157 cn = icsplit[j];
158 if( cn > c1 )
159 fprintf( stderr, " cluster[%d] = %d to %d owned by %d to %d \n",
160 j,c1,cn, mapZ[c1], mapZ[cn] );
161 }
162 for( j = 0; j < *n; j++ )
163 fprintf( stderr, " eval[%d] = %g \n", j, eval[j]);
164 for( j = 0; j < *n; j++ )
165 fprintf( stderr, " d[%d] = %g e[%d] = %g \n", j, d[j], j, e[j] );
166 }
167 mxsync_();
168 exit(-1);
169 #endif
170 #ifdef DEBUG1
171 fprintf(stderr, " in clustrxx me = %d \n", me );
172
173 if( me == mapZ[0] ){
174 cn = -1;
175 for( j = 0; j < *nacluster; j++ ) {
176 c1 = cn + 1;
177 cn = icsplit[j];
178 if( cn > c1 )
179 fprintf( stderr, " cluster[%d] = %d to %d owned by %d to %d \n",
180 j,c1,cn, mapZ[c1], mapZ[cn] );
181 }
182 }
183 #endif
184
185 /*
186 Get machine constants. should set this up somewhere to call it only once
187 */
188
189 /*
190 eps = dlamch_("E");
191 */
192
193 eps = DLAMCHE;
194
195 /*
196 Initialize seed for random number generator DLARND.
197 how about randomizing the randomness?
198 */
199
200 blksiz = 1;
201
202 for (i = 0; i < 4; ++i)
203 iseed[i] = 1;
204
205 iseed[3] = 2*me + 1;
206
207 /*
208 Compute eigenvectors of matrix blocks.
209 */
210
211 /*
212 this is the number of clusters that this processor will process
213 */
214
215
216 cl_num = *num_clustr;
217 Zvec = 0;
218
219 if ( cl_num == 0 )
220 return(ibad);
221
222 /*
223 * If I own the start of my last cluster, but not all of this cluster
224 * then do my part of the last cluster and send it to the processor
225 * which owns the first vector in the cluster which is not mine.
226 */
227
228 cl_ptr = schedule;
229
230 send_num = 0;
231 recv_num = 0;
232 if( naproc > 1 ) {
233 for (clustr_ptr= 0; clustr_ptr < cl_num ; clustr_ptr++) {
234
235 c1 = *(cl_ptr++);
236 cn = *(cl_ptr++);
237 bb1 = *(cl_ptr++);
238 bn = *(cl_ptr++);
239
240 if( cn > c1 ) {
241 if( mapZ[c1] == me ) {
242 for (j = c1+1; j <= cn; j++ ){
243 if ( mapZ[j] != me ) {
244 send_num++;
245 send_cl = clustr_ptr;
246 send_to = mapZ[j];
247 break;
248 }
249 }
250 }
251 else {
252
253 for (j = c1+1; j <= cn; j++ ){
254 if ( mapZ[j] != mapZ[c1] ) {
255 if ( mapZ[j] == me ) {
256 recv_num++;
257 recv_cl = clustr_ptr;
258 recv_from = mapZ[c1];
259 }
260 break;
261 }
262 }
263 }
264 }
265 }
266
267 /*
268 *
269 * The following tests verify that the "de-tangling" assumptions about
270 * sending and/or receiving at most one finished set of eigenvectors listed
271 * in the subroutine header are satisfied.
272 *
273 * The exit(-1) conditions should only occur if the
274 * cluster scheduling algorithm is changed from its current form without
275 * modifying the de-tangeling code appropriately. Thus, the following exits
276 * should never be executed in the tested, distribution version of the code.
277 * Thus, we do not try to do a global exit here.
278 *
279 */
280
281 if( send_num > 1 || recv_num > 1 ) {
282 fprintf( stderr, " me = %d Internal Error in PEIGS clustrinv. \n", me );
283 fprintf( stderr, " me = %d recv_num and/or send_num > 1. \n", me);
284 exit( -1 );
285 }
286 if( ( send_num > 0 ) && ( send_cl != cl_num - 1) ) {
287 fprintf( stderr, " me = %d Internal Error in PEIGS clustrinv. \n", me );
288 fprintf( stderr, " me = %d send_cl != cl_num - 1. \n", me);
289 exit( -1 );
290 }
291 if( ( recv_num > 0 ) && ( recv_cl != 0 ) ) {
292 fprintf( stderr, " me = %d Internal Error in PEIGS clustrinv. \n", me );
293 fprintf( stderr, " me = %d recv_cl != 0. \n", me);
294 exit( -1 );
295 }
296 }
297
298 if( recv_num > 0 || send_num > 0 ) {
299
300 /* Set iscratch to a linked-list such that
301 * iscratch[j] = k means processor
302 * j receives from processor k when sending
303 * the first block of a cluster to the next processor in that cluster.
304 * k = -1 means processor j receives from no one.
305 * Only really care about the part of the linked-list
306 * including 'me'.
307 */
308
309 for (j= 0; j < naproc; j++)
310 iscratch[j] = -1;
311
312 cn = -1;
313 for( i = 0; i < *nacluster; i++ ) {
314 c1 = cn + 1;
315 cn = icsplit[i];
316 if( cn > c1 ) {
317 for (j = c1+1; j <= cn; j++ )
318 if ( mapZ[j] != mapZ[c1] ) {
319 iscratch[ mapZ[j] ] = mapZ[c1];
320 break;
321 }
322 }
323 }
324
325 myindx = 0;
326 j = me;
327 for (i= 0; i < naproc; i++) {
328 if( iscratch[j] == -1 )
329 break;
330
331 myindx++;
332 j = iscratch[j];
333 }
334
335 if( iscratch[j] != -1 ) {
336 fprintf( stderr, " me = %d Internal Error in PEIGS clustrinv. \n", me );
337 fprintf( stderr, " me = %d Swapping of initial cluster data \n", me);
338 fprintf( stderr, " me = %d does not have a well defined start. \n", me);
339 fprintf( stderr, " me = %d clustrinv,mgs cannot handle this. \n", me);
340 exit( -1 );
341 }
342
343 if( send_num == 0 ) {
344 itype = 999999;
345
346 c1 = schedule[4*recv_cl];
347 csiz = schedule[4*recv_cl+1] - c1 + 1;
348
349 blksiz = schedule[4*recv_cl+3] - schedule[4*recv_cl+2] + 1;
350
351 nvecs = count_list( recv_from, &mapZ[c1], &csiz);
352
353 isize = sizeof( DoublePrecision ) * blksiz * nvecs;
354
355 first_buf = dscrat;
356 dscrat += nvecs * blksiz;
357
358 #ifdef DEBUG1
359 fprintf(stderr, " me = %d Just before mxread isize = %d nvecs = %d \n", me, isize, nvecs );
360 #endif
361
362 ival = mxread_( first_buf, &isize, &recv_from, &itype );
363
364 #ifdef DEBUG1
365 fprintf(stderr, " me = %d Just after mxread \n", me );
366 for( j = 0; j < nvecs*blksiz; j++)
367 fprintf(stderr, " me = %d first_buf[%d] = %g \n",
368 me, j, first_buf[j]);
369 #endif
370
371 }
372 }
373
374 cl_ptr = schedule;
375 for (clustr_ptr= 0; clustr_ptr < cl_num ; clustr_ptr++) {
376
377 if( clustr_ptr == 0 && send_num > 0 ) {
378
379 c1 = schedule[ 4*send_cl ];
380 cn = schedule[ 4*send_cl + 1 ];
381 bb1 = schedule[ 4*send_cl + 2 ];
382 bn = schedule[ 4*send_cl + 3 ];
383
384 if( clustr_ptr == send_cl )
385 cl_ptr += 4;
386 }
387 else {
388
389 c1 = *(cl_ptr++);
390 cn = *(cl_ptr++);
391 bb1 = *(cl_ptr++);
392 bn = *(cl_ptr++);
393
394 if( clustr_ptr == send_cl && send_num > 0 )
395 cl_ptr += 4;
396
397 }
398
399
400 if ( c1 < *imin ) {
401 Zvec = 0;
402 }
403 else {
404 Zvec = c1 - *imin;
405 }
406
407
408 blksiz = bn - bb1 + 1;
409 csiz = cn - c1 + 1;
410
411 onenrm = fabs(d[bb1]) + fabs(e[bb1 + 1]);
412 tmp = fabs(d[bn])+ fabs(e[bn]);
413 onenrm = max(onenrm,tmp);
414
415 for (i = bb1 + 1; i < bn; ++i){
416 tmp = fabs(d[i])+fabs(e[i])+fabs(e[i + 1]);
417 onenrm = max(onenrm, tmp );
418 }
419
420 stpcrt = sqrt((DoublePrecision ) 1.0e-1 / (DoublePrecision ) blksiz);
421
422 for (i = 0; i < 4; ++i)
423 iseed[i] = 1;
424 iseed[3] = 1;
425
426 indx = 0;
427 for (j = c1; j <= cn; j++ ){
428 if ( mapZ[j] == me ) {
429 i = indx + Zvec;
430
431 /*
432 mapvecZ[ i ] = j;
433 */
434
435 /*
436 Initialize vector to zero.
437 */
438
439 fil_dbl_lst (*n, vecZ[i], 0.0e0);
440 dlarnv_(&three, &iseed[0], &blksiz, &vecZ[i][bb1]);
441 indx++;
442 }
443 }
444
445 #ifdef DEBUG1
446 fprintf(stderr, " just before inv_it and mgs of clustrxx me = %d c1 = %d cn = %d \n", me, c1, cn );
447 #endif
448
449 first = 0;
450 if( clustr_ptr == 0 && send_num > 0 )
451 first = 1;
452
453 itime = 1;
454 for ( j = 0; j < INV_TIME; j++ ) {
455 itmp = inv_it4( n, &c1, &cn, &bb1, &bn, &Zvec, &mapZ[c1], mapvecZ, vecZ,
456 dplus, lplus, eval, &eps, &stpcrt, &onenrm, iscratch, dscrat);
457
458 if( itmp > 0 )
459 if( ibad == 0 || itmp < ibad )
460 ibad = itmp;
461
462 if ( c1 != cn ) {
463 for ( i = 0; i < itime ; i++ ) {
464 mgs_3( &csiz, vecZ, &mapZ[c1], &bb1, &bn, &Zvec, &first, first_buf, iscratch, dscrat);
465 }
466 itime = 1;
467 }
468 }
469
470 #ifdef DEBUG1
471 fprintf(stderr, " clustrxx3 me = %d before send/rec \n", me );
472 #endif
473
474 /*
475 * Swap beginning portions of clusters which are distributed
476 * across more than one processor.
477 */
478
479 if( clustr_ptr == 0 && send_num > 0 ) {
480
481 itype = 999999;
482
483 if( recv_num > 0 && (( myindx % 2 ) == 0 ) ) {
484 xc1 = schedule[4*recv_cl];
485 xcsiz = schedule[4*recv_cl+1] - xc1 + 1;
486
487 xblksiz = schedule[4*recv_cl+3] - schedule[4*recv_cl+2] + 1;
488
489 nvecs = count_list( recv_from, &mapZ[xc1], &xcsiz);
490 isize = sizeof( DoublePrecision ) * xblksiz * nvecs;
491
492 first_buf = dscrat;
493 dscrat += nvecs * xblksiz;
494
495 #ifdef DEBUG1
496 fprintf(stderr, " me = %d Just before mxread 2 isize = %d nvecs = %d \n", me, isize, nvecs );
497 #endif
498
499 ival = mxread_( first_buf, &isize, &recv_from, &itype );
500 }
501
502 nvecs = 0;
503 for (j = c1; j <= cn; j++ ){
504 if ( mapZ[j] == me ) {
505 dcopy_(&blksiz, &vecZ[nvecs+Zvec][bb1], &IONE,
506 dscrat+nvecs*blksiz, &IONE );
507 nvecs++;
508 }
509 }
510
511 isize = sizeof( DoublePrecision ) * blksiz * nvecs;
512 #ifdef DEBUG1
513 fprintf(stderr, " me = %d Just before mxwrit isize = %d nvecs = %d \n", me, isize, nvecs );
514 for( j = 0; j < nvecs*blksiz; j++)
515 fprintf(stderr, " me = %d sending dscrat[%d] = %g \n",
516 me, j, dscrat[j]);
517 #endif
518
519 ival = mxwrit_( dscrat, &isize, &send_to, &itype );
520
521 if( recv_num > 0 && (( myindx % 2 ) != 0 ) ) {
522 xc1 = schedule[4*recv_cl];
523 xcsiz = schedule[4*recv_cl+1] - xc1 + 1;
524
525 xblksiz = schedule[4*recv_cl+3] - schedule[4*recv_cl+2] + 1;
526
527 nvecs = count_list( recv_from, &mapZ[xc1], &xcsiz);
528 isize = sizeof( DoublePrecision ) * xblksiz * nvecs;
529
530 first_buf = dscrat;
531 dscrat += nvecs * xblksiz;
532
533 #ifdef DEBUG1
534 fprintf(stderr, " me = %d Just before mxread 3 isize = %d nvecs = %d \n", me, isize, nvecs );
535 #endif
536
537 ival = mxread_( first_buf, &isize, &recv_from, &itype );
538 }
539 }
540 #ifdef DEBUG1
541 fprintf(stderr, " clustrxx3 me = %d after send/rec \n", me );
542 #endif
543
544 }
545
546 #ifdef DEBUG1
547 fprintf(stderr, " me = %d Exiting clustrinv_ \n", me );
548 #endif
549
550 return(ibad);
551 }
552