1 /*
2  $Id$
3  *======================================================================
4  *
5  * DISCLAIMER
6  *
7  * This material was prepared as an account of work sponsored by an
8  * agency of the United States Government.  Neither the United States
9  * Government nor the United States Department of Energy, nor Battelle,
10  * nor any of their employees, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
11  * ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
12  * COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT,
13  * SOFTWARE, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT
14  * INFRINGE PRIVATELY OWNED RIGHTS.
15  *
16  * ACKNOWLEDGMENT
17  *
18  * This software and its documentation were produced with Government
19  * support under Contract Number DE-AC06-76RLO-1830 awarded by the United
20  * States Department of Energy.  The Government retains a paid-up
21  * non-exclusive, irrevocable worldwide license to reproduce, prepare
22  * derivative works, perform publicly and display publicly by or for the
23  * Government, including the right to distribute to other Government
24  * contractors.
25  *
26  *======================================================================
27  *
28  *  -- PEIGS  routine (version 2.1) --
29  *     Pacific Northwest Laboratory
30  *     July 28, 1995
31  *
32  *======================================================================
33  */
34 #include <stdio.h>
35 
36 #include "globalp.c.h"
37 
38 #define ZERO ((DoublePrecision) 0.0e0)
39 
40 #define max(a,b) ((a) > (b) ? (a) : (b))
41 
resid(n,colA,mapA,m,colZ,mapZ,eval,ibuffptr,iwork,work,res,info)42 void resid( n, colA, mapA, m, colZ, mapZ, eval, ibuffptr, iwork, work, res, info)
43      Integer *n, *mapA, *m, *mapZ, *iwork, *info;
44      DoublePrecision **colA, **colZ, *eval, **ibuffptr, *work, *res;
45 
46 /*
47    this subroutine computes the residual
48 
49    res = max_{i} | A z_{i} - \lambda_{i} z_{i} |/( | A | * ulp )
50 
51    where
52    A is an n-by-n symmetric matrix, in packed storage format,
53    column (or equivalently row) distributed
54 
55    (lambda_i, z_i) is a standard eigen-pair of A and
56    Z is an n-by-m matrix of eigenvectors, in packed storage format,
57    column distributed
58 
59    ULP = (machine precision) * (machine base)
60 
61    |A z_{i} ... | is the infinity-norm,
62    |A| is the 1-norm of A,
63    res is reasonable if it is of order about 50 or less.
64 
65    Arguments
66    ---------
67    In the following:
68 
69      INTEGER          = "pointer to Integer"
70      DOUBLE PRECISION = "pointer to DoublePrecision"
71 
72      me     = this processor's id (= mxmynd_())
73      nprocs = number of allocated processors ( = mxnprc_())
74      nvecsA = number of entries in mapA equal to me
75                   (= count_list( me, mapA, n ))
76      nvecsZ = number of entries in mapZ equal to me
77                   (= count_list( me, mapZ, m ))
78      sDP    = sizeof( DoublePrecision )
79 
80 
81    n....... (input) INTEGER
82             dimension of the matrix A
83 
84    colA ... (input) array of pointers to DoublePrecision,
85                     length (nvecsA)
86             The part of matrix A owned by this processer stored
87             in packed format, i.e., colA[i] points to the diagonal
88             element of the i-th column (or equivalently row) of A
89             owned by this processor, i = 0 to nvecsA-1.
90 
91    mapA ... (input) INTEGER array, length (n)
92             The i-th column (or equivalently row) of A is
93             owned by processor mapA[i], i = 0 to n-1.
94 
95    m....... (input) INTEGER
96             number of columns of Z, i.e. # of eigenvalues/eigenvectors
97 
98    colZ ... (input) array of pointers to DoublePrecision,
99                     length (nvecsZ)
100             The part of matrix Z owned by this processer, stored
101             in packed format, i.e., colZ[i] points to the start
102             of the i-th column of matrix Z owned by this
103             processor, i = 0 to nvecsZ-1.
104 
105    mapZ ... (input) INTEGER array, length (m)
106             The i-th column of matrix Z is owned by processor
107             mapZ[i], i = 0 to m-1.
108 
109    eval.... (input) DOUBLE PRECISION array, length (m)
110             the eigenvalues
111 
112    ibuffptr (workspace) array of pointers to DoublePrecision,
113                         length (2 * nvecsZ + 1)
114 
115    iwork... (workspace) INTEGER array,
116                         length( m + maximum( nprocs, n+nvecsA, i_mxm88 ))
117                         where i_mxm88 = 2*(n+m)+nvecsA+nvecsZ
118 
119    work.... (workspace) DOUBLE PRECISION array,
120                         length( nvecsZ * n + maximum( d_mxm88,
121                                                       n + 1 + mxlbuf_() / sDP + 1 )
122                         where d_mxm88 = maximum ( (nvecsZ+1)*n, mxlbuf_()/sDP + 1 )
123 
124    res..... (output) DOUBLE PRECISION
125             the residual described above.
126 
127    info.... (output) INTEGER
128             = 0, not currently used
129 
130  */
131 {
132   static Integer IONE = 1;
133 
134   Integer           ll, i, nvecsA, nvecsZ, me, nprocs, k;
135   Integer          *iscrat, *proclist;
136   DoublePrecision   t, derror, normA, ulp;
137   DoublePrecision  *ptr, *scrat;
138   DoublePrecision **dbuffptr, **vecZ1;
139 
140   extern void             dcopy_(), daxpy_();
141   extern DoublePrecision  damax_();
142   extern Integer          mxmynd_();
143   extern Integer          count_list(), reduce_list2();
144   extern void             gmax00();
145   extern void             mxm88(), sonenrm ();
146   extern void             mdiff1_(), bbcast00();
147 
148   /*
149     usual story about error handling
150     */
151 
152   ll = *n;
153 
154   *res = 0.e0;
155   *info = 0;
156 
157   if ( ll < 1 )
158     return;  /* error */
159 
160   /*
161     should perform global sync to check for errors
162     */
163 
164   me = mxmynd_();
165 
166   iscrat = iwork;
167 
168   nvecsA = count_list( me, mapA, &ll );
169   nvecsZ = count_list( me, mapZ, m   );
170 
171   if (( nvecsA == 0 ) && ( nvecsZ == 0 ))
172     return;
173 
174   proclist = iscrat;
175   nprocs = reduce_list2( *m, mapZ, proclist );
176   iscrat += nprocs;
177 
178   scrat = work;
179 
180   dbuffptr = ibuffptr;
181 
182   vecZ1 = dbuffptr;
183   dbuffptr += nvecsZ + 1;
184 
185   /*
186     copy over the matrix to a buffer area
187     */
188 
189   ptr = scrat;
190   for ( i = 0; i < nvecsZ; i++ ) {
191     vecZ1[i] = ptr;
192     dcopy_( &ll, colZ[i], &IONE, ptr, &IONE );
193     ptr += ll;
194   }
195 
196   scrat = ptr;
197 
198   /*
199     compute vecZ1 <- A . Z
200     */
201 
202   mxm88( &ll, colA, mapA, m, vecZ1, mapZ, iscrat, scrat, dbuffptr);
203 
204   k = 0;
205   for ( i = 0; i < *m; i++ ) {
206     if ( mapZ[i] == me ) {
207       t = -eval[i];
208       daxpy_( &ll, &t, colZ[k], &IONE, vecZ1[k], &IONE);
209       k++;
210     }
211   }
212 
213   /*
214    * vecZ1[i] is now A z_{i} - lambda_{i} z_{i}
215    */
216 
217   derror =  0.0e0;
218   for ( i = 0; i < nvecsZ; i++ ) {
219     derror = max( damax_( &ll, vecZ1[i], &IONE), derror);
220   }
221 
222   gmax00( (char *) &derror, 1, 5, 16, proclist[0], nprocs, proclist, scrat);
223   sonenrm( &ll, colA, mapA, &normA, iscrat, scrat, info);
224   if( normA == 0.0e0 ) normA = 1.0e0;
225 
226   /*
227    * Make sure all processors in mapZ know normA.
228    */
229 
230   mdiff1_( m, mapZ, n, mapA, iscrat, &nprocs );
231 
232   if( nprocs > 0 ){
233     iscrat[nprocs] = mapA[0];
234     nprocs++;
235     bbcast00( (char *) &normA, sizeof(DoublePrecision), 2, mapA[0], nprocs, iscrat );
236   }
237 
238   if( nvecsZ > 0 ){
239     ulp = DLAMCHE * DLAMCHB ;
240     *res = derror / normA / ulp;
241   }
242 
243   /*
244    * Make sure all processors in mapA and mapZ know res.
245    */
246 
247   mdiff1_( n, mapA, m, mapZ, iscrat, &nprocs );
248 
249   if( nprocs > 0 ){
250     iscrat[nprocs] = mapZ[0];
251     nprocs++;
252     bbcast00( (char *) res, sizeof(DoublePrecision), 3, mapZ[0], nprocs, iscrat );
253   }
254   return;
255 }
256 
257