1 /*
2 $Id$
3 *======================================================================
4 *
5 * DISCLAIMER
6 *
7 * This material was prepared as an account of work sponsored by an
8 * agency of the United States Government. Neither the United States
9 * Government nor the United States Department of Energy, nor Battelle,
10 * nor any of their employees, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
11 * ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
12 * COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT,
13 * SOFTWARE, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT
14 * INFRINGE PRIVATELY OWNED RIGHTS.
15 *
16 * ACKNOWLEDGMENT
17 *
18 * This software and its documentation were produced with Government
19 * support under Contract Number DE-AC06-76RLO-1830 awarded by the United
20 * States Department of Energy. The Government retains a paid-up
21 * non-exclusive, irrevocable worldwide license to reproduce, prepare
22 * derivative works, perform publicly and display publicly by or for the
23 * Government, including the right to distribute to other Government
24 * contractors.
25 *
26 *======================================================================
27 *
28 * -- PEIGS routine (version 2.1) --
29 * Pacific Northwest Laboratory
30 * July 28, 1995
31 *
32 *======================================================================
33 */
34 /************************************************
35 *
36 * C routine for reduction of a symmetric
37 * matrix to symmetric tridiagonal form
38 *
39 * the tridiagonal matrix is returned as d[0:n-1]
40 * and e[1:n-1]
41 *
42 * Integer tred2 (n, vecA, mapA, Q, mapQ, diag, upperdiag, iwork, work )
43 * DoublePrecision **vecA, matrix to be reduced
44 * **Q, eigenvector matrix
45 * *diag, diagonal elements of tri-diagonal matrix
46 * *upperdiag, upper diagonal elements of tri-diagonal matrix
47 * *work; Householder vector (temp space of size n doubles)
48 *
49 * Integer *n, problem size
50 * *mapA,
51 * *mapQ,
52 * *iwork; integer scratch work space
53 *
54 *
55 *
56 *
57 */
58
59 /* ================================================================= */
60
61 #include <stdio.h>
62 #include <math.h>
63 #include <stdlib.h>
64
65 #include "globalp.c.h"
66
67 extern DoublePrecision ddot_(), dnrm2_(), dasum_();
68
69 #define MSG_START 25000
70
71 /*
72 ----- interfaces to Littlefield's portable mx... comm routines --------
73 */
74
75 /*
76 The following #define's are to avoid recursive calls if the mx...
77 routines are in turn implemented using PICL.
78 */
79
80 #define sync0 prs1sync0
81 #define clock0 prs1clock0
82 #define who0 prs1who0
83
84 #define MAXPROCS 1024
85 extern DoublePrecision clock0();
86
bbcast00(buf,len,type,root,snumprocs,plist)87 void bbcast00(buf, len, type, root, snumprocs, plist)
88 char *buf;
89 Integer len;
90 Integer type;
91 Integer root, snumprocs;
92 Integer *plist;
93 {
94 /*
95 mkplist (); assume that plist has the list of all the processors
96 */
97
98 extern Integer mxbrod_();
99
100 mxbrod_(buf,&root,&len,&snumprocs,plist,&type);
101 }
102
ibcast00(buf,len,type,root,snumprocs,plist)103 void ibcast00(buf, len, type, root, snumprocs, plist)
104 Integer *buf;
105 Integer len;
106 Integer type;
107 Integer root, snumprocs;
108 Integer *plist;
109 {
110 /*
111 mkplist (); assume that plist has the list of all the processors
112 */
113
114 extern Integer mxbrod_();
115
116 mxbrod_(buf,&root,&len,&snumprocs,plist,&type);
117 }
118
peigs_gmax00(buf,items,datatype,msgtype,root,snumprocs,plist,work)119 void peigs_gmax00(buf, items, datatype, msgtype, root, snumprocs, plist, work)
120 char *buf;
121 Integer items;
122 Integer datatype;
123 Integer msgtype;
124 Integer root;
125 Integer *plist, snumprocs;
126 DoublePrecision *work; /* workspace containing at least bufsiz bytes (see cmbbrf.h) */
127 {
128 Integer eight = sizeof(DoublePrecision);
129 extern Integer maxd_();
130 extern Integer mxcombv1_();
131
132 mxcombv1_( buf, maxd_, &eight, &items, &snumprocs, plist, &msgtype, (char *)work);
133 }
134
135
136
137
gsum00(buf,items,datatype,msgtype,root,snumprocs,plist,work)138 void gsum00(buf, items, datatype, msgtype, root, snumprocs, plist, work)
139 /*
140 Note that this implementation of gsum00 differs from Chinchalkar's
141 in that it leaves the result in all nodes participating nodes
142 (0..snumprocs-1) instead of just the root. This allows deleting
143 the explicit bbcast00 calls following each of the gsum00's.
144 */
145 char *buf;
146 Integer items;
147 Integer datatype;
148 Integer msgtype;
149 Integer root;
150 Integer *plist, snumprocs;
151 DoublePrecision *work; /* workspace containing at least bufsiz bytes (see cmbbrf.h) */
152 {
153 Integer eight = sizeof(DoublePrecision);
154 extern Integer sumdv_();
155 extern Integer mxcombv1_();
156
157 mxcombv1_( buf, sumdv_, &eight, &items, &snumprocs, plist, &msgtype, (char *)work);
158 }
159
gsum01(buf,items,datatype,msgtype,root,snumprocs,plist,work)160 void gsum01(buf, items, datatype, msgtype, root, snumprocs, plist, work)
161 /*
162 vectorized integer global sum
163 */
164 char *buf;
165 Integer items;
166 Integer datatype;
167 Integer msgtype;
168 Integer root;
169 Integer *plist, snumprocs;
170 DoublePrecision *work; /* workspace containing at least bufsiz bytes (see cmbbrf.h) */
171 {
172 Integer data_len = sizeof(Integer);
173 extern Integer sumiv_();
174 extern Integer mxcombv1_();
175
176 mxcombv1_( buf, sumiv_, &data_len, &items, &snumprocs, plist, &msgtype, (char *)work);
177 }
178
179 /* ----- FORTRAN interface ---------- */
180
181
tred2(n,vecA,mapA,Q,mapQ,diag,upperdiag,iwork,work)182 Integer tred2(n, vecA, mapA, Q, mapQ, diag, upperdiag, iwork, work )
183 Integer *n, /* problem size */
184 *mapA,
185 *mapQ,
186 *iwork; /* integer scratch work space */
187
188 DoublePrecision **vecA, /* matrix to be reduced */
189 **Q, /* eigenvector matrix */
190 *diag, /* diagonal elements of tri-diagonal matrix */
191 *upperdiag, /* upper diagonal elements of tri-diagonal matrix */
192 *work; /* Householder vector plus gsum00 workspace.
193 (temp space of size n +1 doubles) PLUS
194 at least bufsiz bytes (see cmbbrf.h) */
195 {
196
197 /*
198 * TRED2 : reduction of a real symmetric matrix to tri-diagonal form
199 * Uses Householder reductions.
200 *
201 * INPUT:
202 *
203 * A[n][n] -> input matrix to be reduced.
204 * only the upper triangular portion need be filled
205 * distributed by rows(or columns) in a wrap fashion. (not compact)
206 * n -> problem size.
207 * numprocs -> number of processors to use for solution
208 *
209 * OUTPUT:
210 *
211 * diag[n] -> diagonal elements of the tri-diagonal matrix
212 * obtained from A
213 * upperdiag[n] -> upper diagonal elements of the tri-diagonal matrix
214 * obtained from A. upperdiag[0] is set to 0.0 and
215 * upperdiag[1] to upperdiag[n-1] contains the desired
216 * result
217 * Q(n,n) -> eigenvector matrix
218 * distributed by rows(or columns) in a wrap fashion (compact)
219 * P_0.P_1.P_2...
220 *
221 * tred2() returns an error code, which, if 0 indicates success. NYI
222
223 March 31, 1993
224 modified to work with mapping list wrapping
225 mapQ is assume to be the same as mapA
226
227 */
228
229 static Integer IONE = 1;
230 DoublePrecision DZERO = (DoublePrecision) 0.0e0, DONE = (DoublePrecision) 1.0e0;
231
232 Integer i, j, k; /* counters */
233 Integer row_indx, linfo;
234 Integer msize;
235 Integer n_procs, *mapvecA, *mapvecQ;
236 Integer *iscrat, *proclist;
237
238 DoublePrecision norm2, /* 2 norm of a vector */
239 new_norm, /* 2 norm of a vector */
240 first, /* first component of a vector */
241 onenorm, /* one norm of a vector */
242 p_t_v; /* p'v */
243
244 DoublePrecision w, temp_factor; /* temp vars */
245 DoublePrecision *HH_vec, *workMX;
246
247 char msg[45];
248
249 Integer me; /* my node number */
250
251
252 char *strcpy();
253
254 extern Integer mxmynd_();
255 extern Integer mxmync_();
256 extern Integer reduce_list2();
257 extern Integer count_list();
258 extern Integer indxL();
259 extern Integer mxnprc_();
260 extern Integer fil_mapvec_();
261
262 extern void xerbla_();
263 extern void mapdif1_();
264
265 extern void g_exit_();
266 extern void pxerbla2_();
267
268 extern void dscal_();
269 extern void dcopy_();
270 extern void daxpy_();
271 extern DoublePrecision dasum_();
272
273
274 Integer size, nrowsA, nrowsQ;
275
276 Integer iii;
277
278 /* ------------------------------------------------------------------- */
279
280 me = mxmynd_();
281 strcpy ( msg, "Error in TRED2 \n");
282
283 linfo = 0;
284
285 if ( n == NULL ) {
286 linfo = -1;
287 xerbla_( "TRED2 \n", &linfo);
288 return(1);
289 }
290
291 if ( *n < 1) {
292 linfo = -1;
293 xerbla_( "TRED2 \n", &linfo);
294 return(1);
295 }
296
297 if ( vecA == NULL ) {
298 linfo = -2;
299 xerbla_( "TRED2 \n", &linfo);
300 return(1);
301 }
302
303 if ( mapA == NULL ) {
304 linfo = -3;
305 xerbla_( "TRED2 \n", &linfo);
306 return(1);
307 }
308
309
310 iii = mxnprc_();
311 iscrat = mapA;
312 for ( i = 0; i < *n; i++ ) {
313 j = *(iscrat++);
314 if ( j < 0 ) {
315 linfo = -3;
316 xerbla_( "TRED2 \n", &linfo);
317 }
318 if ( j > iii ){
319 linfo = -3;
320 xerbla_( "TRED2 \n", &linfo);
321 }
322 }
323
324 nrowsA = count_list( me, mapA, n);
325 for ( i = 0 ; i < nrowsA; i++ )
326 if ( vecA[i] == NULL ){
327 linfo = -2;
328 fprintf(stderr, "node = %d NULL vector assignment in vecA \n", me);
329 xerbla_( "TRED2 \n", &linfo);
330 }
331
332 if ( iwork == NULL ) {
333 linfo = -8;
334 xerbla_( "TRED2 \n", &linfo);
335 }
336
337 if ( work == NULL ){
338 linfo = -9;
339 xerbla_( "TRED2 \n", &linfo);
340 }
341
342 if ( Q == NULL )
343 linfo = -4;
344
345 if ( mapQ == NULL )
346 linfo = -5;
347
348 if ( diag == NULL )
349 linfo = -6;
350
351 if ( upperdiag == NULL )
352 linfo = -7;
353
354 sprintf(msg, "TRED22:Error in argument %d \n", linfo);
355
356 /*
357 printf(" in tred22.c me = %d \n", me );
358 */
359
360
361 g_exit_( &linfo, msg, mapA, n, iwork, work);
362
363 /* checking the other arguments to see if further error check is possible */
364
365 *iwork = *n;
366 iscrat = iwork + 1;
367 for ( k = 0; k < *n; k++ )
368 *(iscrat++) = mapA[k];
369
370 linfo = 0;
371 k = (*n + 1)*sizeof(Integer);
372 iscrat += *n + 1;
373 pxerbla2_( &k, (char *)iwork, mapA, n, iscrat, &linfo );
374 if( linfo != 0 )
375 linfo = -1;
376
377 g_exit_( &linfo, "TRED22: Mapping inconsistancies. mapA\n", mapA, n, iwork, work);
378
379 iscrat = iwork;
380 linfo = 0;
381 k = *n * sizeof(Integer);
382 pxerbla2_( &k, (char *) mapQ, mapA, n, iscrat, &linfo );
383
384 mapdif1_( n, mapQ, n, mapA, iscrat, &j );
385 if( linfo != 0 || j != 0 )
386 j = -1;
387
388 g_exit_( &j, "TRED22: Mapping set differ:mapQ and mapA\n", mapA, n, iwork, work);
389
390
391
392 /*
393 setting the location of the Q matrix
394 */
395
396
397
398 /*
399
400 set the matrix Q to be distributed as matrix A ; replicate this information
401 One should probably do a more general Q matrix distribution; this is just to enforce
402 that mapQ = mapA
403
404 */
405
406 /*
407 DZERO out upperdiag[0]
408 */
409
410 upperdiag[0] = DZERO;
411
412 /*
413 set Q to the identity matrix
414 */
415
416 if ( n == NULL )
417 exit(-1);
418
419
420 if ( *n < 0 )
421 exit(-1);
422
423 msize = *n;
424
425 HH_vec = work;
426
427 workMX = work + *n + 1;
428
429 iscrat = iwork;
430 mapvecQ = iscrat;
431 nrowsQ = fil_mapvec_( &me, &msize, mapQ, mapvecQ );
432 iscrat += nrowsQ;
433 mapvecA = iscrat;
434 nrowsA = fil_mapvec_( &me, &msize, mapA, mapvecA );
435 iscrat += nrowsA;
436 proclist = iscrat;
437 n_procs = reduce_list2( msize, mapA, proclist);
438 iscrat += n_procs;
439
440 /*
441 initialize Q matrix to the identity matrix
442 */
443
444 k = 0;
445 for (i=0; i< msize; i++) {
446 if ( mapQ[i] == me ) {
447 for (j=0; j< msize; j++) {
448 Q[k][j] = DZERO;
449 }
450 Q[k][i] = DONE;
451 k++;
452 }
453 }
454
455
456 /* apply the n-2 HH transformations */
457
458 row_indx = -1;
459 for (i=0; i<(msize-2); i++) {
460 if ( mapA[i] == me ) {
461 row_indx++;
462 /*
463 construct the HH vector based on ROW i
464 */
465
466 /*
467 scale the vector if necessary
468 */
469
470 size = msize - i - 1;
471 onenorm = dasum_(&size, &vecA[row_indx][1], &IONE);
472
473 /* now we know the diagonal entry */
474
475 diag[i] = vecA[row_indx][0];
476
477 if (onenorm == DZERO) {
478 /* then skip this transformation */
479
480 vecA[row_indx][0] = DZERO;
481 upperdiag[i+1] = DZERO;
482 }
483 else {
484 vecA[row_indx][0] = DONE;
485
486 /* scale vector */
487
488 size = msize - i - 1;
489 norm2 = dnrm2_(&size, &vecA[row_indx][1], &IONE)/onenorm;
490 vecA[row_indx][1] /= onenorm;
491
492 if (vecA[row_indx][1] < DZERO) {
493 first = vecA[row_indx][1] - norm2;
494 upperdiag[i+1] = norm2*onenorm;
495 }
496 else {
497 first = vecA[row_indx][1] + norm2;
498 upperdiag[i+1] = -norm2*onenorm;
499 }
500
501 /* change A[i][i+1:n-1] to v and normalize it too */
502
503 new_norm = norm2*norm2 - vecA[row_indx][1] * vecA[row_indx][1] + first*first;
504 new_norm = sqrt(new_norm);
505
506 vecA[row_indx][1] = first/new_norm;
507 temp_factor = DONE/onenorm/new_norm;
508 size = msize - i - 2;
509 dscal_(&size, &temp_factor, &vecA[row_indx][2], &IONE);
510 }
511
512 /* send v to all other nodes */
513
514 bbcast00( (char * ) &vecA[row_indx][0], (msize-i)*sizeof(DoublePrecision), i, me, n_procs, proclist);
515
516 /* copy the new HH vector into HH_vec */
517
518 size = msize - i;
519 dcopy_(&size, &vecA[row_indx][0], &IONE, &HH_vec[i], &IONE);
520
521 }
522 else {
523 /*
524 ireceive v
525 */
526 bbcast00( (char *) &HH_vec[i], (msize-i)*sizeof(DoublePrecision), i, mapA[i], n_procs, proclist);
527 diag[i] = DZERO;
528 upperdiag[i+1] = DZERO;
529 }
530
531 if (HH_vec[i] != DZERO ) {
532 /* then this transformation has not been skipped */
533
534 /*
535 * Now HH_vec[i+1:n-1] = v and v'v = 1
536 * update A(i+1:n-1, i+1:n-1)
537 * NOTE: diag[i+1:n-1] is unused. Store p in it.
538 */
539
540 /* DZERO out vector; should dscal it */
541
542 for (k=i+1; k<msize; k++) {
543 diag[k] = DZERO;
544 }
545
546 j = row_indx;
547 for ( k = i + 1; k < msize; k++) {
548 if ( mapA[k] == me ) {
549 size = msize - k - 1;
550 j++;
551
552 /* above the diagonal */
553
554 diag[k] += ddot_( &size, &vecA[j][1], &IONE, &HH_vec[k+1], &IONE);
555
556 /* below the diagonal */
557
558 daxpy_(&size, &HH_vec[k], &vecA[j][1], &IONE, &diag[k+1], &IONE);
559
560 /* diagonal entry */
561
562 diag[k] += vecA[j][0] * HH_vec[k];
563 }
564 }
565
566
567 /* p is now distributed across processors. collect it */
568
569 gsum00( (char *) &diag[i+1], msize-i-1, 5, MSG_START, mapA[0], n_procs, proclist, workMX);
570
571 size = msize - i - 1;
572 temp_factor = ((DoublePrecision) -2.0e0);
573 dscal_(&size, &temp_factor, &diag[i+1], &IONE );
574
575 /* over-write diag[i+1:n-1] with w */
576
577 p_t_v = -ddot_(&size, &diag[i+1], &IONE, &HH_vec[i+1], &IONE );
578 daxpy_(&size, &p_t_v, &HH_vec[i+1], &IONE, &diag[i+1], &IONE);
579
580 /* and finally update A[i+1:n-1][i+1:n-1] */
581
582 j = row_indx;
583 for (k=i+1; k<msize; k++) {
584 if ( mapA[k] == me) {
585 j++;
586 size = msize - k;
587 daxpy_(&size, &HH_vec[k], &diag[k], &IONE , &vecA[j][0], &IONE );
588 daxpy_(&size, &diag[k], &HH_vec[k], &IONE , &vecA[j][0], &IONE );
589 }
590 }
591
592 /* FORWARD ACCUMULATION */
593 /* compute w. No need to store it. beta is -2 */
594
595 for (j=0; j < nrowsQ; j++) {
596 size = msize - i - 1;
597 w =((DoublePrecision) -2.0e0) * ddot_(&size, Q[j] + i + 1, &IONE , &HH_vec[i+1], &IONE );
598 daxpy_(&size, &w, &HH_vec[i+1], &IONE, Q[j] + i + 1, &IONE);
599 }
600 }
601 }
602
603 /*
604 now whatever is left over
605 */
606
607 if (msize == 1) {
608 if (mapA[0] == me ) {
609 diag[0] = vecA[0][0];
610 }
611 }
612 else {
613 if ( mapA[msize-2] == me ){
614 j = indxL( msize-2, nrowsA, mapvecA);
615 upperdiag[msize-1] = vecA[j][1];
616 diag[msize-2] = vecA[j][0];
617 }
618 else {
619 upperdiag[msize-1] = DZERO;
620 diag[msize-2] = DZERO;
621 }
622
623 if ( mapA[msize-1] == me ){
624 j = indxL( msize-1, nrowsA, mapvecA);
625 diag[msize-1] = vecA[j][0];
626 }
627 else {
628 diag[msize-1] = DZERO;
629 }
630 }
631
632 /*
633 collect diag[] and upperdiag[]
634 */
635
636 gsum00((char * ) diag, msize, 5, MSG_START+2, mapA[0], n_procs, proclist, workMX);
637 gsum00((char * ) &upperdiag[1], msize-1, 5, MSG_START+4, mapA[0], n_procs, proclist, workMX);
638
639
640 /*
641 printf(" out tred22.c me = %d \n", me );
642 */
643
644 /*
645 for (k=0; k<msize; k++)
646 printf(" k = %d tridiag d = %f e = %f \n", k, diag[k], upperdiag[k]);
647 */
648
649 return 0;
650 }
651