1 /*
2 $Id$
3 *======================================================================
4 *
5 * DISCLAIMER
6 *
7 * This material was prepared as an account of work sponsored by an
8 * agency of the United States Government. Neither the United States
9 * Government nor the United States Department of Energy, nor Battelle,
10 * nor any of their employees, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
11 * ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
12 * COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT,
13 * SOFTWARE, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT
14 * INFRINGE PRIVATELY OWNED RIGHTS.
15 *
16 * ACKNOWLEDGMENT
17 *
18 * This software and its documentation were produced with Government
19 * support under Contract Number DE-AC06-76RLO-1830 awarded by the United
20 * States Department of Energy. The Government retains a paid-up
21 * non-exclusive, irrevocable worldwide license to reproduce, prepare
22 * derivative works, perform publicly and display publicly by or for the
23 * Government, including the right to distribute to other Government
24 * contractors.
25 *
26 *======================================================================
27 *
28 * -- PEIGS routine (version 2.1) --
29 * Pacific Northwest Laboratory
30 * July 28, 1995
31 *
32 *======================================================================
33 */
34 #include <stdio.h>
35
36 #include "globalp.c.h"
37
38 #define ZERO ((DoublePrecision) 0.0e0)
39
40 #define max(a,b) ((a) > (b) ? (a) : (b))
41
resid(n,colA,mapA,m,colZ,mapZ,eval,ibuffptr,iwork,work,res,info)42 void resid( n, colA, mapA, m, colZ, mapZ, eval, ibuffptr, iwork, work, res, info)
43 Integer *n, *mapA, *m, *mapZ, *iwork, *info;
44 DoublePrecision **colA, **colZ, *eval, **ibuffptr, *work, *res;
45
46 /*
47 this subroutine computes the residual
48
49 res = max_{i} | A z_{i} - \lambda_{i} z_{i} |/( | A | * ulp )
50
51 where
52 A is an n-by-n symmetric matrix, in packed storage format,
53 column (or equivalently row) distributed
54
55 (lambda_i, z_i) is a standard eigen-pair of A and
56 Z is an n-by-m matrix of eigenvectors, in packed storage format,
57 column distributed
58
59 ULP = (machine precision) * (machine base)
60
61 |A z_{i} ... | is the infinity-norm,
62 |A| is the 1-norm of A,
63 res is reasonable if it is of order about 50 or less.
64
65 Arguments
66 ---------
67 In the following:
68
69 INTEGER = "pointer to Integer"
70 DOUBLE PRECISION = "pointer to DoublePrecision"
71
72 me = this processor's id (= mxmynd_())
73 nprocs = number of allocated processors ( = mxnprc_())
74 nvecsA = number of entries in mapA equal to me
75 (= count_list( me, mapA, n ))
76 nvecsZ = number of entries in mapZ equal to me
77 (= count_list( me, mapZ, m ))
78 sDP = sizeof( DoublePrecision )
79
80
81 n....... (input) INTEGER
82 dimension of the matrix A
83
84 colA ... (input) array of pointers to DoublePrecision,
85 length (nvecsA)
86 The part of matrix A owned by this processer stored
87 in packed format, i.e., colA[i] points to the diagonal
88 element of the i-th column (or equivalently row) of A
89 owned by this processor, i = 0 to nvecsA-1.
90
91 mapA ... (input) INTEGER array, length (n)
92 The i-th column (or equivalently row) of A is
93 owned by processor mapA[i], i = 0 to n-1.
94
95 m....... (input) INTEGER
96 number of columns of Z, i.e. # of eigenvalues/eigenvectors
97
98 colZ ... (input) array of pointers to DoublePrecision,
99 length (nvecsZ)
100 The part of matrix Z owned by this processer, stored
101 in packed format, i.e., colZ[i] points to the start
102 of the i-th column of matrix Z owned by this
103 processor, i = 0 to nvecsZ-1.
104
105 mapZ ... (input) INTEGER array, length (m)
106 The i-th column of matrix Z is owned by processor
107 mapZ[i], i = 0 to m-1.
108
109 eval.... (input) DOUBLE PRECISION array, length (m)
110 the eigenvalues
111
112 ibuffptr (workspace) array of pointers to DoublePrecision,
113 length (2 * nvecsZ + 1)
114
115 iwork... (workspace) INTEGER array,
116 length( m + maximum( nprocs, n+nvecsA, i_mxm88 ))
117 where i_mxm88 = 2*(n+m)+nvecsA+nvecsZ
118
119 work.... (workspace) DOUBLE PRECISION array,
120 length( nvecsZ * n + maximum( d_mxm88,
121 n + 1 + mxlbuf_() / sDP + 1 )
122 where d_mxm88 = maximum ( (nvecsZ+1)*n, mxlbuf_()/sDP + 1 )
123
124 res..... (output) DOUBLE PRECISION
125 the residual described above.
126
127 info.... (output) INTEGER
128 = 0, not currently used
129
130 */
131 {
132 static Integer IONE = 1;
133
134 Integer ll, i, nvecsA, nvecsZ, me, nprocs, k;
135 Integer *iscrat, *proclist;
136 DoublePrecision t, derror, normA, ulp;
137 DoublePrecision *ptr, *scrat;
138 DoublePrecision **dbuffptr, **vecZ1;
139
140 extern void dcopy_(), daxpy_();
141 extern DoublePrecision damax_();
142 extern Integer mxmynd_();
143 extern Integer count_list(), reduce_list2();
144 extern void gmax00();
145 extern void mxm88(), sonenrm ();
146 extern void mdiff1_(), bbcast00();
147
148 /*
149 usual story about error handling
150 */
151
152 ll = *n;
153
154 *res = 0.e0;
155 *info = 0;
156
157 if ( ll < 1 )
158 return; /* error */
159
160 /*
161 should perform global sync to check for errors
162 */
163
164 me = mxmynd_();
165
166 iscrat = iwork;
167
168 nvecsA = count_list( me, mapA, &ll );
169 nvecsZ = count_list( me, mapZ, m );
170
171 if (( nvecsA == 0 ) && ( nvecsZ == 0 ))
172 return;
173
174 proclist = iscrat;
175 nprocs = reduce_list2( *m, mapZ, proclist );
176 iscrat += nprocs;
177
178 scrat = work;
179
180 dbuffptr = ibuffptr;
181
182 vecZ1 = dbuffptr;
183 dbuffptr += nvecsZ + 1;
184
185 /*
186 copy over the matrix to a buffer area
187 */
188
189 ptr = scrat;
190 for ( i = 0; i < nvecsZ; i++ ) {
191 vecZ1[i] = ptr;
192 dcopy_( &ll, colZ[i], &IONE, ptr, &IONE );
193 ptr += ll;
194 }
195
196 scrat = ptr;
197
198 /*
199 compute vecZ1 <- A . Z
200 */
201
202 mxm88( &ll, colA, mapA, m, vecZ1, mapZ, iscrat, scrat, dbuffptr);
203
204 k = 0;
205 for ( i = 0; i < *m; i++ ) {
206 if ( mapZ[i] == me ) {
207 t = -eval[i];
208 daxpy_( &ll, &t, colZ[k], &IONE, vecZ1[k], &IONE);
209 k++;
210 }
211 }
212
213 /*
214 * vecZ1[i] is now A z_{i} - lambda_{i} z_{i}
215 */
216
217 derror = 0.0e0;
218 for ( i = 0; i < nvecsZ; i++ ) {
219 derror = max( damax_( &ll, vecZ1[i], &IONE), derror);
220 }
221
222 gmax00( (char *) &derror, 1, 5, 16, proclist[0], nprocs, proclist, scrat);
223 sonenrm( &ll, colA, mapA, &normA, iscrat, scrat, info);
224 if( normA == 0.0e0 ) normA = 1.0e0;
225
226 /*
227 * Make sure all processors in mapZ know normA.
228 */
229
230 mdiff1_( m, mapZ, n, mapA, iscrat, &nprocs );
231
232 if( nprocs > 0 ){
233 iscrat[nprocs] = mapA[0];
234 nprocs++;
235 bbcast00( (char *) &normA, sizeof(DoublePrecision), 2, mapA[0], nprocs, iscrat );
236 }
237
238 if( nvecsZ > 0 ){
239 ulp = DLAMCHE * DLAMCHB ;
240 *res = derror / normA / ulp;
241 }
242
243 /*
244 * Make sure all processors in mapA and mapZ know res.
245 */
246
247 mdiff1_( n, mapA, m, mapZ, iscrat, &nprocs );
248
249 if( nprocs > 0 ){
250 iscrat[nprocs] = mapZ[0];
251 nprocs++;
252 bbcast00( (char *) res, sizeof(DoublePrecision), 3, mapZ[0], nprocs, iscrat );
253 }
254 return;
255 }
256
257