1 #include "BSprivate.h"
2
3 /*+ BSpar_symmlq - SYMMLQ; reverse communication
4
5 Input Parameters:
6 . num_cols - the length of the vector on this processor
7 . rhs - the rhs on this processor
8 . x - the solution vector on this processor
9 . resid - the residual vector on this processor
10 . z - the work vector on this processor
11 . v - the work vector on this processor
12 . w - the work vector on this processor
13 . r2 - the work vector on this processor
14 . beta thru conv - symmlq values that aren't of much interest
15 . cur_step - the current cg step
16 . cur_phase - the current cg phase
17 . procinfo - the usual processor stuff
18
19 Output Parameters:
20 . many of the input parameters are changed, it is just the usual
21 SYMMLQ algorithm
22
23 Returns:
24 The action of the calling program depends on the return value
25 SYMM_MATVECXR: multiply A*x and put the results in resid
26 SYMM_MATVECVZ: multiply A*v and put the results in z
27 SYMM_MSOLVE1: multiply A(-1)*resid and put the results in z
28 SYMM_MSOLVE2: multiply A(-1)*r2 and put the results in z
29 SYMM_MSOLVEB: multiply A(-1)*rhs and put the results in z
30 SYMM_ERROR: error during SYMMLQ
31 SYMM_DONE: SYMMLQ is finished, convergence has occurred
32
33 Notes:
34 The first time that BSpar_bcg is called, cur_step and
35 cur_phase should be 0 and conv should be FALSE.
36
37 +*/
BSpar_symmlq(int num_cols,FLOAT * rhs,FLOAT * x,FLOAT * resid,FLOAT * z,FLOAT * v,FLOAT * w,FLOAT * r2,FLOAT * cg_beta,FLOAT * cg_alpha,FLOAT * obeta,FLOAT * gbar,FLOAT * dbar,FLOAT * rhs1,FLOAT * rhs2,FLOAT * bstep,FLOAT * snprod,FLOAT * beta1,FLOAT * tnorm,FLOAT * ynorm2,FLOAT * bestnm,int * cgpt,int conv,int * cur_step,int * cur_phase,BSprocinfo * procinfo)38 int BSpar_symmlq(int num_cols, FLOAT *rhs, FLOAT *x, FLOAT *resid, FLOAT *z,
39 FLOAT *v, FLOAT *w, FLOAT *r2, FLOAT *cg_beta, FLOAT *cg_alpha, FLOAT *obeta,
40 FLOAT *gbar, FLOAT *dbar, FLOAT *rhs1, FLOAT *rhs2, FLOAT *bstep,
41 FLOAT *snprod, FLOAT *beta1, FLOAT *tnorm, FLOAT *ynorm2, FLOAT *bestnm,
42 int *cgpt, int conv, int *cur_step, int *cur_phase, BSprocinfo *procinfo)
43 {
44 int i;
45 FLOAT t1, t2;
46 FLOAT s;
47 FLOAT gamma, cs, sn, delta, epsln;
48 FLOAT ynorm, anorm, epsa, gpert, diag;
49 FLOAT elqnrm, qrnorm, cgnorm;
50 FLOAT zbar;
51 FLOAT eps;
52 char EPS = 'E';
53
54 eps = DLAMCH(&EPS);
55 switch((*cur_phase)) {
56 case 0: {
57 /* compute A*x=r */
58 (*cur_phase) = 1;
59 (*cur_step) = 0;
60 return(SYMM_MATVECXR);
61 }
62 case 1: {
63 /* compute r and then z = M(-1)*r */
64 for (i=0;i<num_cols;i++) {
65 resid[i] = rhs[i] - resid[i];
66 }
67 (*cur_phase) = 2;
68 return(SYMM_MSOLVE1);
69 }
70 case 2: {
71 /* compute beta1 and then z = A*v */
72 (*cg_beta) = BSpar_ip(num_cols,z,resid,procinfo); CHKERRN(0);
73 if ((*cg_beta) < 0.0) return(SYMM_ERROR);
74 (*cg_beta) = sqrt((*cg_beta));
75 (*beta1) = (*cg_beta);
76 s = 1.0/(*cg_beta);
77 for (i=0;i<num_cols;i++) {
78 v[i] = s*z[i];
79 }
80 (*cur_phase) = 3;
81 return(SYMM_MATVECVZ);
82 }
83 case 3: {
84 /* compute cg_alpha, z, r2, and then z = M(-1)*r2 */
85 (*cg_alpha) = BSpar_ip(num_cols,v,z,procinfo); CHKERRN(0);
86 s = -(*cg_alpha)/(*cg_beta);
87 for (i=0;i<num_cols;i++) {
88 z[i] += s*resid[i];
89 }
90 t1 = BSpar_ip(num_cols,v,z,procinfo); CHKERRN(0);
91 t2 = BSpar_ip(num_cols,v,v,procinfo); CHKERRN(0);
92 s = -t1/t2;
93 for (i=0;i<num_cols;i++) {
94 z[i] += s*v[i];
95 }
96 for (i=0;i<num_cols;i++) {
97 r2[i] = z[i];
98 }
99 (*cur_phase) = 4;
100 return(SYMM_MSOLVE2);
101 }
102 case 4: {
103 /* compute beta */
104 (*obeta) = (*cg_beta);
105 (*cg_beta) = BSpar_ip(num_cols,z,r2,procinfo); CHKERRN(0);
106 if ((*cg_beta) < 0.0) return(SYMM_ERROR);
107 (*cg_beta) = sqrt((*cg_beta));
108
109 /* initialize accumulators */
110 (*gbar) = (*cg_alpha);
111 (*dbar) = (*cg_beta);
112 (*bstep) = 0.0;
113 (*snprod) = 1.0;
114 (*rhs1) = (*beta1);
115 (*rhs2) = 0.0;
116 (*tnorm) = 0.0;
117 (*ynorm2) = 0.0;
118 for (i=0;i<num_cols;i++) {
119 w[i] = 0.0;
120 }
121
122 /* estimate norms */
123 (*tnorm) += (*cg_alpha)*(*cg_alpha) + 2.0*(*cg_beta)*(*cg_beta);
124 anorm = sqrt(*tnorm);
125 ynorm = sqrt(*ynorm2);
126 epsa = anorm*eps;
127 gpert = (*gbar) + epsa;
128 if ((*gbar) < 0.0) gpert = (*gbar) - epsa;
129 diag = fabs(gpert);
130 elqnrm = sqrt((*rhs1)*(*rhs1)+(*rhs2)*(*rhs2));
131 qrnorm = (*snprod)*(*beta1);
132 cgnorm = qrnorm * (*cg_beta)/diag;
133 if (cgnorm < elqnrm) {
134 (*cgpt) = TRUE;
135 (*bestnm) = cgnorm;
136 } else {
137 (*cgpt) = FALSE;
138 (*bestnm) = elqnrm;
139 }
140
141 /* compute v and then z = A*v */
142 s = 1.0/(*cg_beta);
143 for (i=0;i<num_cols;i++) {
144 v[i] = s*z[i];
145 }
146 (*cur_phase) = 6;
147 return(SYMM_MATVECVZ);
148 }
149 case 5: {
150 /* compute beta */
151 (*obeta) = (*cg_beta);
152 (*cg_beta) = BSpar_ip(num_cols,z,r2,procinfo); CHKERRN(0);
153 if ((*cg_beta) < 0.0) return(SYMM_ERROR);
154 (*cg_beta) = sqrt((*cg_beta));
155
156 /* get x */
157 gamma = sqrt((*gbar)*(*gbar)+(*obeta)*(*obeta));
158 cs = (*gbar)/gamma;
159 sn = (*obeta)/gamma;
160 delta = cs*(*dbar) + sn*(*cg_alpha);
161 (*gbar) = sn*(*dbar) - cs*(*cg_alpha);
162 epsln = sn*(*cg_beta);
163 (*dbar) = -cs*(*cg_beta);
164 s = (*rhs1)/gamma;
165 t1 = s*cs;
166 t2 = s*sn;
167 for (i=0;i<num_cols;i++) {
168 x[i] = (w[i]*t1 + v[i]*t2) + x[i];
169 w[i] = w[i]*sn - v[i]*cs;
170 }
171 (*rhs1) = (*rhs2) - delta*s;
172 (*rhs2) = -epsln*s;
173 (*bstep) = (*snprod)*cs*s + (*bstep);
174 (*snprod) = (*snprod)*sn;
175 (*ynorm2) += s*s;
176
177 /* estimate norms */
178 (*tnorm) += (*cg_alpha)*(*cg_alpha) + 2.0*(*cg_beta)*(*cg_beta);
179 anorm = sqrt(*tnorm);
180 ynorm = sqrt(*ynorm2);
181 epsa = anorm*eps;
182 gpert = (*gbar) + epsa;
183 if ((*gbar) < 0.0) gpert = (*gbar) - epsa;
184 diag = fabs(gpert);
185 elqnrm = sqrt((*rhs1)*(*rhs1)+(*rhs2)*(*rhs2));
186 qrnorm = (*snprod)*(*beta1);
187 cgnorm = qrnorm * (*cg_beta)/diag;
188 if (cgnorm < elqnrm) {
189 (*cgpt) = TRUE;
190 (*bestnm) = cgnorm;
191 } else {
192 (*cgpt) = FALSE;
193 (*bestnm) = elqnrm;
194 }
195
196 /* compute v and then z = A*v */
197 s = 1.0/(*cg_beta);
198 for (i=0;i<num_cols;i++) {
199 v[i] = s*z[i];
200 }
201 (*cur_phase) = 6;
202 (*cur_step)++;
203 return(SYMM_MATVECVZ);
204 }
205 case 6: {
206 if (conv) {
207 /* do next to last finishing step and then z = A*rhs */
208 if (cgpt) {
209 zbar = (*rhs1)/(*gbar);
210 (*bstep) = (*snprod)*zbar + (*bstep);
211 ynorm = sqrt((*ynorm2) + zbar*zbar);
212 for (i=0;i<num_cols;i++) {
213 x[i] += zbar*w[i];
214 }
215 }
216 (*bstep) = (*bstep) / (*beta1);
217 for (i=0;i<num_cols;i++) {
218 z[i] = rhs[i];
219 }
220 (*cur_phase) = 7;
221 return(SYMM_MSOLVEB);
222 } else {
223 /* orthogonalize z and then z = A*r2 */
224 s = -(*cg_beta)/(*obeta);
225 for (i=0;i<num_cols;i++) {
226 z[i] += s*resid[i];
227 }
228 (*cg_alpha) = BSpar_ip(num_cols,v,z,procinfo); CHKERRN(0);
229 s = -(*cg_alpha)/(*cg_beta);
230 for (i=0;i<num_cols;i++) {
231 z[i] += s*r2[i];
232 }
233 for (i=0;i<num_cols;i++) {
234 resid[i] = r2[i];
235 }
236 for (i=0;i<num_cols;i++) {
237 r2[i] = z[i];
238 }
239 (*cur_phase) = 5;
240 return(SYMM_MSOLVE2);
241 }
242 }
243 case 7: {
244 for (i=0;i<num_cols;i++) {
245 x[i] += (*bstep)*z[i];
246 }
247 return(SYMM_DONE);
248 }
249 default: {
250 return(SYMM_ERROR);
251 }
252 }
253 }
254