1 /*
2  $Id$
3  *======================================================================
4  *
5  * DISCLAIMER
6  *
7  * This material was prepared as an account of work sponsored by an
8  * agency of the United States Government.  Neither the United States
9  * Government nor the United States Department of Energy, nor Battelle,
10  * nor any of their employees, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
11  * ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
12  * COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT,
13  * SOFTWARE, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT
14  * INFRINGE PRIVATELY OWNED RIGHTS.
15  *
16  * ACKNOWLEDGMENT
17  *
18  * This software and its documentation were produced with Government
19  * support under Contract Number DE-AC06-76RLO-1830 awarded by the United
20  * States Department of Energy.  The Government retains a paid-up
21  * non-exclusive, irrevocable worldwide license to reproduce, prepare
22  * derivative works, perform publicly and display publicly by or for the
23  * Government, including the right to distribute to other Government
24  * contractors.
25  *
26  *======================================================================
27  *
28  *  -- PEIGS  routine (version 2.1) --
29  *     Pacific Northwest Laboratory
30  *     July 28, 1995
31  *
32  *======================================================================
33  */
34 /************************************************
35  *
36  *     C routine for reduction of a symmetric
37  *     matrix to symmetric tridiagonal form
38  *
39  *  the tridiagonal matrix is returned as d[0:n-1]
40  *  and e[1:n-1]
41  *
42  * Integer tred2 (n, vecA, mapA, Q, mapQ, diag, upperdiag, iwork, work )
43  *     DoublePrecision **vecA,               matrix to be reduced
44  *       **Q,                       eigenvector matrix
45  *       *diag,              diagonal elements of tri-diagonal matrix
46  *       *upperdiag,         upper diagonal elements of tri-diagonal matrix
47  *       *work;              Householder vector (temp space of size n doubles)
48  *
49  *       Integer   *n,                  problem size
50  *       *mapA,
51  *       *mapQ,
52  *       *iwork;          integer scratch work space
53  *
54  *
55  *
56  *
57  */
58 
59 /* ================================================================= */
60 
61 #include <stdio.h>
62 #include <math.h>
63 #include <stdlib.h>
64 
65 #include "globalp.c.h"
66 
67 extern DoublePrecision ddot_(), dnrm2_(), dasum_();
68 
69 #define MSG_START 25000
70 
71 /*
72   ----- interfaces to Littlefield's portable mx... comm routines --------
73   */
74 
75 /*
76   The following #define's are to avoid recursive calls if the mx...
77   routines are in turn implemented using PICL.
78   */
79 
80 #define sync0  prs1sync0
81 #define clock0 prs1clock0
82 #define who0   prs1who0
83 
84 #define MAXPROCS 1024
85 extern DoublePrecision clock0();
86 
bbcast00(buf,len,type,root,snumprocs,plist)87 void bbcast00(buf, len, type, root, snumprocs, plist)
88      char *buf;
89      Integer len;
90      Integer type;
91      Integer root, snumprocs;
92      Integer *plist;
93 {
94   /*
95     mkplist ();  assume that plist has the list of all the processors
96     */
97 
98   extern Integer mxbrod_();
99 
100   mxbrod_(buf,&root,&len,&snumprocs,plist,&type);
101 }
102 
ibcast00(buf,len,type,root,snumprocs,plist)103 void ibcast00(buf, len, type, root, snumprocs, plist)
104      Integer *buf;
105      Integer len;
106      Integer type;
107      Integer root, snumprocs;
108      Integer *plist;
109 {
110   /*
111     mkplist ();  assume that plist has the list of all the processors
112     */
113 
114   extern Integer mxbrod_();
115 
116   mxbrod_(buf,&root,&len,&snumprocs,plist,&type);
117 }
118 
peigs_gmax00(buf,items,datatype,msgtype,root,snumprocs,plist,work)119 void peigs_gmax00(buf, items, datatype, msgtype, root, snumprocs, plist, work)
120      char *buf;
121      Integer items;
122      Integer datatype;
123      Integer msgtype;
124      Integer root;
125      Integer *plist, snumprocs;
126      DoublePrecision *work;  /* workspace containing at least bufsiz bytes (see cmbbrf.h) */
127 {
128   Integer eight = sizeof(DoublePrecision);
129   extern Integer maxd_();
130   extern Integer mxcombv1_();
131 
132   mxcombv1_( buf, maxd_, &eight, &items, &snumprocs, plist, &msgtype, (char *)work);
133 }
134 
135 
136 
137 
gsum00(buf,items,datatype,msgtype,root,snumprocs,plist,work)138 void gsum00(buf, items, datatype, msgtype, root, snumprocs, plist, work)
139      /*
140 	Note that this implementation of gsum00 differs from Chinchalkar's
141 	in that it leaves the result in all nodes participating nodes
142 	(0..snumprocs-1) instead of just the root.  This allows deleting
143 	the explicit bbcast00 calls following each of the gsum00's.
144 	*/
145      char *buf;
146      Integer items;
147      Integer datatype;
148      Integer msgtype;
149      Integer root;
150      Integer *plist, snumprocs;
151      DoublePrecision *work;  /* workspace containing at least bufsiz bytes (see cmbbrf.h) */
152 {
153   Integer eight = sizeof(DoublePrecision);
154   extern Integer sumdv_();
155   extern Integer mxcombv1_();
156 
157   mxcombv1_( buf, sumdv_, &eight, &items, &snumprocs, plist, &msgtype, (char *)work);
158 }
159 
gsum01(buf,items,datatype,msgtype,root,snumprocs,plist,work)160 void gsum01(buf, items, datatype, msgtype, root, snumprocs, plist, work)
161      /*
162        vectorized integer global sum
163        */
164      char *buf;
165      Integer items;
166      Integer datatype;
167      Integer msgtype;
168      Integer root;
169      Integer *plist, snumprocs;
170      DoublePrecision *work;  /* workspace containing at least bufsiz bytes (see cmbbrf.h) */
171 {
172   Integer data_len = sizeof(Integer);
173   extern Integer sumiv_();
174   extern Integer mxcombv1_();
175 
176   mxcombv1_( buf, sumiv_, &data_len, &items, &snumprocs, plist, &msgtype, (char *)work);
177 }
178 
179 /* ----- FORTRAN interface ---------- */
180 
181 
tred2(n,vecA,mapA,Q,mapQ,diag,upperdiag,iwork,work)182 Integer tred2(n, vecA, mapA, Q, mapQ, diag, upperdiag, iwork, work )
183      Integer   *n,                  /* problem size */
184      *mapA,
185      *mapQ,
186      *iwork;          /* integer scratch work space */
187 
188      DoublePrecision **vecA,               /* matrix to be reduced */
189      **Q,                /* eigenvector matrix */
190      *diag,             /* diagonal elements of tri-diagonal matrix */
191      *upperdiag,        /* upper diagonal elements of tri-diagonal matrix */
192      *work;           /* Householder vector plus gsum00 workspace.
193                          (temp space of size n +1 doubles) PLUS
194                          at least bufsiz bytes (see cmbbrf.h) */
195 {
196 
197   /*
198    * TRED2 : reduction of a real symmetric matrix to tri-diagonal form
199    *         Uses Householder reductions.
200    *
201    * INPUT:
202    *
203    * A[n][n]         ->  input matrix to be reduced.
204    *                     only the upper triangular portion need be filled
205    *                     distributed by rows(or columns) in a wrap fashion. (not compact)
206    * n               ->  problem size.
207    * numprocs        ->  number of processors to use for solution
208    *
209    * OUTPUT:
210    *
211    * diag[n]         ->  diagonal elements of the tri-diagonal matrix
212    *                     obtained from A
213    * upperdiag[n]    ->  upper diagonal elements of the tri-diagonal matrix
214    *                     obtained from A. upperdiag[0] is set to 0.0 and
215    *                     upperdiag[1] to upperdiag[n-1] contains the desired
216    *                     result
217    * Q(n,n)          ->  eigenvector matrix
218    *                     distributed by rows(or columns) in a wrap fashion (compact)
219    *                      P_0.P_1.P_2...
220    *
221    * tred2() returns an error code, which, if 0 indicates success. NYI
222 
223    March 31, 1993
224    modified to work with mapping list wrapping
225    mapQ is assume to be the same as mapA
226 
227    */
228 
229   static Integer IONE = 1;
230   DoublePrecision DZERO = (DoublePrecision) 0.0e0, DONE = (DoublePrecision) 1.0e0;
231 
232   Integer i, j, k;               /* counters */
233   Integer row_indx, linfo;
234   Integer msize;
235   Integer n_procs, *mapvecA, *mapvecQ;
236   Integer *iscrat, *proclist;
237 
238   DoublePrecision norm2,              /* 2 norm of a vector */
239   new_norm,           /* 2 norm of a vector */
240   first,              /* first component of a vector */
241   onenorm,            /* one norm of a vector */
242   p_t_v;              /* p'v */
243 
244   DoublePrecision w, temp_factor;     /* temp vars */
245   DoublePrecision *HH_vec, *workMX;
246 
247   char msg[45];
248 
249   Integer me;                    /* my node number */
250 
251 
252   char *strcpy();
253 
254   extern Integer mxmynd_();
255   extern Integer mxmync_();
256   extern Integer reduce_list2();
257   extern Integer count_list();
258   extern Integer indxL();
259   extern Integer mxnprc_();
260   extern Integer fil_mapvec_();
261 
262   extern void xerbla_();
263   extern void mapdif1_();
264 
265   extern void g_exit_();
266   extern void pxerbla2_();
267 
268   extern void dscal_();
269   extern void dcopy_();
270   extern void daxpy_();
271   extern DoublePrecision dasum_();
272 
273 
274   Integer size, nrowsA, nrowsQ;
275 
276   Integer iii;
277 
278   /* ------------------------------------------------------------------- */
279 
280   me = mxmynd_();
281   strcpy ( msg, "Error in TRED2 \n");
282 
283   linfo = 0;
284 
285   if ( n == NULL ) {
286     linfo = -1;
287     xerbla_( "TRED2 \n", &linfo);
288     return(1);
289   }
290 
291   if ( *n < 1) {
292     linfo = -1;
293     xerbla_( "TRED2 \n", &linfo);
294     return(1);
295   }
296 
297   if ( vecA == NULL ) {
298     linfo = -2;
299     xerbla_( "TRED2 \n", &linfo);
300     return(1);
301   }
302 
303   if ( mapA == NULL ) {
304     linfo = -3;
305     xerbla_( "TRED2 \n", &linfo);
306     return(1);
307   }
308 
309 
310   iii = mxnprc_();
311   iscrat = mapA;
312   for ( i = 0; i < *n; i++ ) {
313     j = *(iscrat++);
314     if ( j < 0 ) {
315       linfo = -3;
316       xerbla_( "TRED2 \n", &linfo);
317     }
318     if ( j > iii  ){
319       linfo = -3;
320       xerbla_( "TRED2 \n", &linfo);
321     }
322   }
323 
324   nrowsA = count_list( me, mapA, n);
325   for ( i = 0 ; i < nrowsA; i++ )
326     if ( vecA[i] == NULL ){
327       linfo = -2;
328       fprintf(stderr, "node = %d NULL vector assignment in vecA \n", me);
329       xerbla_( "TRED2 \n", &linfo);
330     }
331 
332   if ( iwork == NULL ) {
333     linfo = -8;
334     xerbla_( "TRED2 \n", &linfo);
335   }
336 
337   if ( work == NULL ){
338     linfo = -9;
339     xerbla_( "TRED2 \n", &linfo);
340   }
341 
342   if ( Q == NULL )
343     linfo = -4;
344 
345   if ( mapQ == NULL )
346     linfo = -5;
347 
348   if ( diag == NULL )
349     linfo = -6;
350 
351   if ( upperdiag == NULL )
352     linfo = -7;
353 
354   sprintf(msg, "TRED22:Error in argument %d \n", linfo);
355 
356   /*
357   printf(" in tred22.c me = %d \n", me );
358   */
359 
360 
361   g_exit_( &linfo, msg, mapA, n, iwork, work);
362 
363   /* checking the other arguments to see if further error check is possible */
364 
365   *iwork = *n;
366   iscrat = iwork + 1;
367   for ( k = 0; k < *n; k++ )
368     *(iscrat++) = mapA[k];
369 
370   linfo = 0;
371   k = (*n + 1)*sizeof(Integer);
372   iscrat += *n + 1;
373   pxerbla2_( &k, (char *)iwork, mapA, n, iscrat, &linfo );
374   if( linfo != 0 )
375     linfo = -1;
376 
377   g_exit_( &linfo, "TRED22: Mapping inconsistancies. mapA\n", mapA, n, iwork, work);
378 
379   iscrat = iwork;
380   linfo = 0;
381   k = *n * sizeof(Integer);
382   pxerbla2_( &k, (char *) mapQ, mapA, n, iscrat, &linfo );
383 
384   mapdif1_( n, mapQ, n, mapA, iscrat, &j );
385   if( linfo != 0  || j != 0 )
386     j = -1;
387 
388   g_exit_( &j, "TRED22: Mapping set differ:mapQ and mapA\n", mapA, n, iwork, work);
389 
390 
391 
392   /*
393     setting the location of the Q matrix
394     */
395 
396 
397 
398   /*
399 
400     set the matrix Q to be distributed as matrix A  ; replicate this information
401     One should probably do a more general Q matrix distribution; this is just to enforce
402     that mapQ = mapA
403 
404     */
405 
406   /*
407     DZERO out upperdiag[0]
408     */
409 
410   upperdiag[0] = DZERO;
411 
412   /*
413     set Q to the identity matrix
414     */
415 
416   if ( n == NULL )
417     exit(-1);
418 
419 
420   if ( *n < 0 )
421     exit(-1);
422 
423   msize = *n;
424 
425   HH_vec = work;
426 
427   workMX = work + *n + 1;
428 
429   iscrat = iwork;
430   mapvecQ = iscrat;
431   nrowsQ = fil_mapvec_( &me, &msize, mapQ, mapvecQ );
432   iscrat += nrowsQ;
433   mapvecA = iscrat;
434   nrowsA = fil_mapvec_( &me, &msize, mapA, mapvecA );
435   iscrat += nrowsA;
436   proclist = iscrat;
437   n_procs = reduce_list2( msize, mapA, proclist);
438   iscrat += n_procs;
439 
440   /*
441     initialize Q matrix to the identity matrix
442     */
443 
444   k = 0;
445   for (i=0; i< msize; i++) {
446     if ( mapQ[i] == me ) {
447       for (j=0; j< msize; j++) {
448 	Q[k][j] = DZERO;
449       }
450       Q[k][i] = DONE;
451       k++;
452     }
453   }
454 
455 
456   /* apply the n-2 HH transformations */
457 
458   row_indx = -1;
459   for (i=0; i<(msize-2); i++) {
460     if ( mapA[i] == me ) {
461       row_indx++;
462       /*
463 	construct the HH vector based on ROW i
464 	*/
465 
466       /*
467 	scale the vector if necessary
468 	*/
469 
470       size = msize - i - 1;
471       onenorm = dasum_(&size, &vecA[row_indx][1], &IONE);
472 
473       /* now we know the diagonal entry */
474 
475       diag[i] = vecA[row_indx][0];
476 
477       if (onenorm == DZERO) {
478 	/* then skip this transformation */
479 
480 	vecA[row_indx][0] = DZERO;
481 	upperdiag[i+1] = DZERO;
482       }
483       else {
484 	vecA[row_indx][0] = DONE;
485 
486 	/* scale vector */
487 
488 	size = msize - i - 1;
489 	norm2 = dnrm2_(&size, &vecA[row_indx][1], &IONE)/onenorm;
490 	vecA[row_indx][1] /= onenorm;
491 
492 	if (vecA[row_indx][1] < DZERO)  {
493 	  first = vecA[row_indx][1] - norm2;
494 	  upperdiag[i+1] = norm2*onenorm;
495 	}
496 	else {
497 	  first = vecA[row_indx][1] + norm2;
498 	  upperdiag[i+1] = -norm2*onenorm;
499 	}
500 
501 	/* change A[i][i+1:n-1] to v and normalize it too */
502 
503 	new_norm = norm2*norm2 - vecA[row_indx][1] * vecA[row_indx][1] + first*first;
504 	new_norm = sqrt(new_norm);
505 
506 	vecA[row_indx][1] = first/new_norm;
507 	temp_factor = DONE/onenorm/new_norm;
508 	size = msize - i - 2;
509 	dscal_(&size, &temp_factor, &vecA[row_indx][2], &IONE);
510       }
511 
512       /* send v to all other nodes */
513 
514       bbcast00( (char * ) &vecA[row_indx][0], (msize-i)*sizeof(DoublePrecision), i, me, n_procs, proclist);
515 
516       /* copy the new HH vector into HH_vec */
517 
518       size = msize - i;
519       dcopy_(&size, &vecA[row_indx][0], &IONE, &HH_vec[i], &IONE);
520 
521     }
522     else  {
523       /*
524 	ireceive v
525 	*/
526       bbcast00( (char *) &HH_vec[i], (msize-i)*sizeof(DoublePrecision), i, mapA[i], n_procs, proclist);
527       diag[i] = DZERO;
528       upperdiag[i+1] = DZERO;
529     }
530 
531     if (HH_vec[i] != DZERO ) {
532       /* then this transformation has not been skipped */
533 
534       /*
535        * Now HH_vec[i+1:n-1] = v and v'v = 1
536        * update A(i+1:n-1, i+1:n-1)
537        * NOTE: diag[i+1:n-1] is unused. Store p in it.
538        */
539 
540       /* DZERO out vector; should dscal it */
541 
542       for (k=i+1; k<msize; k++) {
543 	diag[k] = DZERO;
544       }
545 
546       j = row_indx;
547       for ( k = i + 1; k < msize; k++) {
548 	if ( mapA[k] == me ) {
549 	  size = msize - k - 1;
550 	  j++;
551 
552 	  /* above the diagonal */
553 
554 	  diag[k] += ddot_( &size, &vecA[j][1], &IONE, &HH_vec[k+1], &IONE);
555 
556 	  /* below the diagonal */
557 
558 	  daxpy_(&size, &HH_vec[k], &vecA[j][1], &IONE, &diag[k+1], &IONE);
559 
560 	  /* diagonal entry */
561 
562 	  diag[k] += vecA[j][0] * HH_vec[k];
563 	}
564       }
565 
566 
567       /* p is now distributed across processors. collect it */
568 
569       gsum00( (char *) &diag[i+1], msize-i-1, 5, MSG_START, mapA[0], n_procs, proclist, workMX);
570 
571       size = msize - i - 1;
572       temp_factor = ((DoublePrecision) -2.0e0);
573       dscal_(&size, &temp_factor, &diag[i+1], &IONE );
574 
575       /* over-write diag[i+1:n-1] with w */
576 
577       p_t_v = -ddot_(&size, &diag[i+1], &IONE, &HH_vec[i+1], &IONE );
578       daxpy_(&size, &p_t_v, &HH_vec[i+1], &IONE, &diag[i+1], &IONE);
579 
580       /* and finally update A[i+1:n-1][i+1:n-1] */
581 
582       j = row_indx;
583       for (k=i+1; k<msize; k++) {
584 	if ( mapA[k] == me) {
585 	  j++;
586 	  size = msize - k;
587 	  daxpy_(&size, &HH_vec[k], &diag[k], &IONE , &vecA[j][0], &IONE );
588 	  daxpy_(&size, &diag[k], &HH_vec[k], &IONE , &vecA[j][0], &IONE );
589 	}
590       }
591 
592       /* FORWARD ACCUMULATION */
593       /* compute w. No need to store it. beta is -2 */
594 
595       for (j=0; j < nrowsQ; j++) {
596 	size = msize - i - 1;
597 	w =((DoublePrecision) -2.0e0) * ddot_(&size, Q[j] + i + 1, &IONE , &HH_vec[i+1], &IONE );
598 	daxpy_(&size, &w, &HH_vec[i+1], &IONE, Q[j] + i + 1, &IONE);
599       }
600     }
601   }
602 
603   /*
604     now whatever is left over
605     */
606 
607   if (msize == 1) {
608     if (mapA[0] == me ) {
609       diag[0] = vecA[0][0];
610     }
611   }
612   else {
613     if ( mapA[msize-2] == me  ){
614       j = indxL( msize-2, nrowsA, mapvecA);
615       upperdiag[msize-1] = vecA[j][1];
616       diag[msize-2] = vecA[j][0];
617     }
618     else {
619       upperdiag[msize-1] = DZERO;
620       diag[msize-2] = DZERO;
621     }
622 
623     if ( mapA[msize-1] == me ){
624       j = indxL( msize-1, nrowsA, mapvecA);
625       diag[msize-1] = vecA[j][0];
626     }
627     else {
628       diag[msize-1] = DZERO;
629     }
630   }
631 
632   /*
633     collect diag[] and upperdiag[]
634     */
635 
636   gsum00((char * ) diag, msize, 5, MSG_START+2, mapA[0], n_procs, proclist, workMX);
637   gsum00((char * ) &upperdiag[1], msize-1, 5, MSG_START+4, mapA[0], n_procs, proclist, workMX);
638 
639 
640   /*
641     printf(" out tred22.c me = %d \n", me );
642     */
643 
644   /*
645   for (k=0; k<msize; k++)
646     printf(" k = %d tridiag d = %f e = %f \n", k, diag[k], upperdiag[k]);
647     */
648 
649   return 0;
650 }
651