1 #include "BSprivate.h"
2 
3 /*+ BSpar_symmlq - SYMMLQ; reverse communication
4 
5     Input Parameters:
6 .   num_cols - the length of the vector on this processor
7 .   rhs - the rhs on this processor
8 .   x - the solution vector on this processor
9 .   resid - the residual vector on this processor
10 .   z - the work vector on this processor
11 .   v - the work vector on this processor
12 .   w - the work vector on this processor
13 .   r2 - the work vector on this processor
14 .   beta thru conv - symmlq values that aren't of much interest
15 .   cur_step - the current cg step
16 .   cur_phase - the current cg phase
17 .   procinfo - the usual processor stuff
18 
19     Output Parameters:
20 .   many of the input parameters are changed, it is just the usual
21     SYMMLQ algorithm
22 
23     Returns:
24     The action of the calling program depends on the return value
25     SYMM_MATVECXR: multiply A*x and put the results in resid
26     SYMM_MATVECVZ: multiply A*v and put the results in z
27     SYMM_MSOLVE1: multiply A(-1)*resid and put the results in z
28     SYMM_MSOLVE2: multiply A(-1)*r2 and put the results in z
29     SYMM_MSOLVEB: multiply A(-1)*rhs and put the results in z
30     SYMM_ERROR: error during SYMMLQ
31     SYMM_DONE: SYMMLQ is finished, convergence has occurred
32 
33     Notes:
34     The first time that BSpar_bcg is called, cur_step and
35     cur_phase should be 0 and conv should be FALSE.
36 
37  +*/
BSpar_symmlq(int num_cols,FLOAT * rhs,FLOAT * x,FLOAT * resid,FLOAT * z,FLOAT * v,FLOAT * w,FLOAT * r2,FLOAT * cg_beta,FLOAT * cg_alpha,FLOAT * obeta,FLOAT * gbar,FLOAT * dbar,FLOAT * rhs1,FLOAT * rhs2,FLOAT * bstep,FLOAT * snprod,FLOAT * beta1,FLOAT * tnorm,FLOAT * ynorm2,FLOAT * bestnm,int * cgpt,int conv,int * cur_step,int * cur_phase,BSprocinfo * procinfo)38 int BSpar_symmlq(int num_cols, FLOAT *rhs, FLOAT *x, FLOAT *resid, FLOAT *z,
39 	FLOAT *v, FLOAT *w, FLOAT *r2, FLOAT *cg_beta, FLOAT *cg_alpha, FLOAT *obeta,
40 	FLOAT *gbar, FLOAT *dbar, FLOAT *rhs1, FLOAT *rhs2, FLOAT *bstep,
41 	FLOAT *snprod, FLOAT *beta1, FLOAT *tnorm, FLOAT *ynorm2, FLOAT *bestnm,
42 	int *cgpt, int conv, int *cur_step, int *cur_phase, BSprocinfo *procinfo)
43 {
44 	int	i;
45 	FLOAT	t1, t2;
46 	FLOAT	s;
47 	FLOAT	gamma, cs, sn, delta, epsln;
48 	FLOAT	ynorm, anorm, epsa, gpert, diag;
49 	FLOAT	elqnrm, qrnorm, cgnorm;
50 	FLOAT	zbar;
51 	FLOAT	eps;
52 	char EPS = 'E';
53 
54 	eps = DLAMCH(&EPS);
55 	switch((*cur_phase)) {
56 		case 0: {
57 			/* compute A*x=r */
58 			(*cur_phase) = 1;
59 			(*cur_step) = 0;
60 			return(SYMM_MATVECXR);
61 		}
62 		case 1: {
63 			/* compute r and then z = M(-1)*r */
64 			for (i=0;i<num_cols;i++) {
65 				resid[i] = rhs[i] - resid[i];
66 			}
67 			(*cur_phase) = 2;
68 			return(SYMM_MSOLVE1);
69 		}
70 		case 2: {
71 			/* compute beta1 and then z = A*v */
72 			(*cg_beta) = BSpar_ip(num_cols,z,resid,procinfo); CHKERRN(0);
73 			if ((*cg_beta) < 0.0) return(SYMM_ERROR);
74 			(*cg_beta) = sqrt((*cg_beta));
75 			(*beta1) = (*cg_beta);
76 			s = 1.0/(*cg_beta);
77 			for (i=0;i<num_cols;i++) {
78 				v[i] = s*z[i];
79 			}
80 			(*cur_phase) = 3;
81 			return(SYMM_MATVECVZ);
82 		}
83 		case 3: {
84 			/* compute cg_alpha, z, r2, and then z = M(-1)*r2 */
85 			(*cg_alpha) = BSpar_ip(num_cols,v,z,procinfo); CHKERRN(0);
86 			s = -(*cg_alpha)/(*cg_beta);
87 			for (i=0;i<num_cols;i++) {
88 				z[i] += s*resid[i];
89 			}
90 			t1 = BSpar_ip(num_cols,v,z,procinfo); CHKERRN(0);
91 			t2 = BSpar_ip(num_cols,v,v,procinfo); CHKERRN(0);
92 			s = -t1/t2;
93 			for (i=0;i<num_cols;i++) {
94 				z[i] += s*v[i];
95 			}
96 			for (i=0;i<num_cols;i++) {
97 				r2[i] = z[i];
98 			}
99 			(*cur_phase) = 4;
100 			return(SYMM_MSOLVE2);
101 		}
102 		case 4: {
103 			/* compute beta */
104 			(*obeta) = (*cg_beta);
105 			(*cg_beta) = BSpar_ip(num_cols,z,r2,procinfo); CHKERRN(0);
106 			if ((*cg_beta) < 0.0) return(SYMM_ERROR);
107 			(*cg_beta) = sqrt((*cg_beta));
108 
109 			/* initialize accumulators */
110 			(*gbar) = (*cg_alpha);
111 			(*dbar) = (*cg_beta);
112 			(*bstep) = 0.0;
113 			(*snprod) = 1.0;
114 			(*rhs1) = (*beta1);
115 			(*rhs2) = 0.0;
116 			(*tnorm) = 0.0;
117 			(*ynorm2) = 0.0;
118 			for (i=0;i<num_cols;i++) {
119 				w[i] = 0.0;
120 			}
121 
122 			/* estimate norms */
123 			(*tnorm) += (*cg_alpha)*(*cg_alpha) + 2.0*(*cg_beta)*(*cg_beta);
124 			anorm = sqrt(*tnorm);
125 			ynorm = sqrt(*ynorm2);
126 			epsa = anorm*eps;
127 			gpert = (*gbar) + epsa;
128 			if ((*gbar) < 0.0) gpert = (*gbar) - epsa;
129 			diag = fabs(gpert);
130 			elqnrm = sqrt((*rhs1)*(*rhs1)+(*rhs2)*(*rhs2));
131 			qrnorm = (*snprod)*(*beta1);
132 			cgnorm = qrnorm * (*cg_beta)/diag;
133 			if (cgnorm < elqnrm) {
134 				(*cgpt) = TRUE;
135 				(*bestnm) = cgnorm;
136 			} else {
137 				(*cgpt) = FALSE;
138 				(*bestnm) = elqnrm;
139 			}
140 
141 			/* compute v and then z = A*v */
142 			s = 1.0/(*cg_beta);
143 			for (i=0;i<num_cols;i++) {
144 				v[i] = s*z[i];
145 			}
146 			(*cur_phase) = 6;
147 			return(SYMM_MATVECVZ);
148 		}
149 		case 5: {
150 			/* compute beta */
151 			(*obeta) = (*cg_beta);
152 			(*cg_beta) = BSpar_ip(num_cols,z,r2,procinfo); CHKERRN(0);
153 			if ((*cg_beta) < 0.0) return(SYMM_ERROR);
154 			(*cg_beta) = sqrt((*cg_beta));
155 
156 			/* get x */
157 			gamma = sqrt((*gbar)*(*gbar)+(*obeta)*(*obeta));
158 			cs = (*gbar)/gamma;
159 			sn = (*obeta)/gamma;
160 			delta = cs*(*dbar) + sn*(*cg_alpha);
161 			(*gbar) = sn*(*dbar) - cs*(*cg_alpha);
162 			epsln = sn*(*cg_beta);
163 			(*dbar) = -cs*(*cg_beta);
164 			s = (*rhs1)/gamma;
165 			t1 = s*cs;
166 			t2 = s*sn;
167 			for (i=0;i<num_cols;i++) {
168 				x[i] = (w[i]*t1 + v[i]*t2) + x[i];
169 				w[i] = w[i]*sn - v[i]*cs;
170 			}
171 			(*rhs1) = (*rhs2) - delta*s;
172 			(*rhs2) = -epsln*s;
173 			(*bstep) = (*snprod)*cs*s + (*bstep);
174 			(*snprod) = (*snprod)*sn;
175 			(*ynorm2) += s*s;
176 
177 			/* estimate norms */
178 			(*tnorm) += (*cg_alpha)*(*cg_alpha) + 2.0*(*cg_beta)*(*cg_beta);
179 			anorm = sqrt(*tnorm);
180 			ynorm = sqrt(*ynorm2);
181 			epsa = anorm*eps;
182 			gpert = (*gbar) + epsa;
183 			if ((*gbar) < 0.0) gpert = (*gbar) - epsa;
184 			diag = fabs(gpert);
185 			elqnrm = sqrt((*rhs1)*(*rhs1)+(*rhs2)*(*rhs2));
186 			qrnorm = (*snprod)*(*beta1);
187 			cgnorm = qrnorm * (*cg_beta)/diag;
188 			if (cgnorm < elqnrm) {
189 				(*cgpt) = TRUE;
190 				(*bestnm) = cgnorm;
191 			} else {
192 				(*cgpt) = FALSE;
193 				(*bestnm) = elqnrm;
194 			}
195 
196 			/* compute v and then z = A*v */
197 			s = 1.0/(*cg_beta);
198 			for (i=0;i<num_cols;i++) {
199 				v[i] = s*z[i];
200 			}
201 			(*cur_phase) = 6;
202 			(*cur_step)++;
203 			return(SYMM_MATVECVZ);
204 		}
205 		case 6: {
206 			if (conv) {
207 				/* do next to last finishing step and then z = A*rhs */
208 				if (cgpt) {
209 					zbar = (*rhs1)/(*gbar);
210 					(*bstep) = (*snprod)*zbar + (*bstep);
211 					ynorm = sqrt((*ynorm2) + zbar*zbar);
212 					for (i=0;i<num_cols;i++) {
213 						x[i] += zbar*w[i];
214 					}
215 				}
216 				(*bstep) = (*bstep) / (*beta1);
217 				for (i=0;i<num_cols;i++) {
218 					z[i] = rhs[i];
219 				}
220 				(*cur_phase) = 7;
221 				return(SYMM_MSOLVEB);
222 			} else {
223 				/* orthogonalize z and then z = A*r2 */
224 				s = -(*cg_beta)/(*obeta);
225 				for (i=0;i<num_cols;i++) {
226 					z[i] += s*resid[i];
227 				}
228 				(*cg_alpha) = BSpar_ip(num_cols,v,z,procinfo); CHKERRN(0);
229 				s = -(*cg_alpha)/(*cg_beta);
230 				for (i=0;i<num_cols;i++) {
231 					z[i] += s*r2[i];
232 				}
233 				for (i=0;i<num_cols;i++) {
234 					resid[i] = r2[i];
235 				}
236 				for (i=0;i<num_cols;i++) {
237 					r2[i] = z[i];
238 				}
239 				(*cur_phase) = 5;
240 				return(SYMM_MSOLVE2);
241 			}
242 		}
243 		case 7: {
244 				for (i=0;i<num_cols;i++) {
245 					x[i] += (*bstep)*z[i];
246 				}
247 				return(SYMM_DONE);
248 		}
249 		default: {
250 			return(SYMM_ERROR);
251 		}
252 	}
253 }
254