1 #include "BSprivate.h"
2 
3 /*+ BSpar_bcg - conjugate gradient on a block of vectors; reverse communication
4 
5     Input Parameters:
6 .   num_cols - the length of the vector on this processor
7 .   rhs - the contiguous block of rhs vectors on this processor
8           length = num_cols * nBS
9 .   x - the contiguous block of solution vectors on this processor
10           length = num_cols * nBS
11 .   resid - the contiguous block of residual vectors on this processor
12           length = num_cols * nBS
13 .   z - the contiguous block of work vectors on this processor
14           length = num_cols * nBS
15 .   p - the contiguous block of work vectors on this processor
16           length = num_cols * nBS
17 .   cg_beta - the vector of cg coefficients
18           length = nBS
19 .   cg_alpha - the vector of cg coefficients
20           length = nBS
21 .   cur_step - the current cg step
22 .   cur_phase - the current cg phase
23 .   nBS - the number of columns in the block
24 .   procinfo - the usual processor stuff
25 
26     Output Parameters:
27 .   many of the input parameters are changed, it is just the usual
28     CG algorithm
29 
30     Returns:
31     The action of the calling program depends on the return value
32     CG_MATVECR: multiply A*x and put the results in resid
33     CG_MATVECZ: multiply A*p and put the results in z
34     CG_MSOLVE: multiply A(-1)*resid and put the results in z
35     CG_ERROR: Error during CG, indefinite matrix encountered
36 
37     Notes:
38     This is NOT the block conjugate gradient algorithm.  It is
39     a separate conjugate gradient run on each vector, but the
40     operations are blocked for efficiency.
41 
42     The first time that BSpar_bcg is called, cur_step and
43     cur_phase should be 0.
44 
45  +*/
BSpar_bcg(int num_cols,FLOAT * rhs,FLOAT * x,FLOAT * resid,FLOAT * z,FLOAT * p,FLOAT * cg_beta,FLOAT * cg_alpha,int * cur_step,int * cur_phase,int nBS,BSprocinfo * procinfo)46 int BSpar_bcg(int num_cols, FLOAT *rhs, FLOAT *x, FLOAT *resid, FLOAT *z,
47 	FLOAT *p, FLOAT *cg_beta, FLOAT *cg_alpha, int *cur_step, int *cur_phase,
48 	int nBS, BSprocinfo *procinfo)
49 {
50 	int	i, j;
51 	FLOAT	bk, *bknum;
52 	FLOAT	ak;
53 	FLOAT	*t_z, *t_p, *t_resid, *t_x;
54 
55 	switch((*cur_phase)) {
56 		case 0: {
57 			(*cur_phase) = 1;
58 			return(CG_MATVECR);
59 		}
60 		case 1: {
61 			for (i=0;i<num_cols*nBS;i++) {
62 				resid[i] = rhs[i] - resid[i];
63 			}
64 			(*cur_phase) = 2;
65 			(*cur_step) = 1;
66 			return(CG_MSOLVE);
67 		}
68 		case 2: {
69 			MY_MALLOCN(bknum,(FLOAT *),sizeof(FLOAT)*nBS,1);
70 			BSpar_bip(num_cols,z,resid,nBS,bknum,procinfo); CHKERRN(0);
71 			for (j=0;j<nBS;j++) {
72 				t_z = &(z[j*num_cols]);
73 				t_p = &(p[j*num_cols]);
74 				if (bknum[j] < 0.0) return(CG_ERROR);
75 				if ((*cur_step) == 1) {
76 					for (i=0;i<num_cols;i++) {
77 						t_p[i] = t_z[i];
78 					}
79 				} else {
80 					bk = bknum[j]/cg_beta[j];
81 					for (i=0;i<num_cols;i++) {
82 						t_p[i] = t_z[i] + bk*t_p[i];
83 					}
84 				}
85 				cg_beta[j] = bknum[j];
86 			}
87 			MY_FREE(bknum);
88 			(*cur_phase) = 3;
89 			return(CG_MATVECZ);
90 		}
91 		case 3: {
92 			BSpar_bip(num_cols,p,z,nBS,cg_alpha,procinfo); CHKERRN(0);
93 			for (j=0;j<nBS;j++) {
94 				t_z = &(z[j*num_cols]);
95 				t_resid = &(resid[j*num_cols]);
96 				t_p = &(p[j*num_cols]);
97 				t_x = &(x[j*num_cols]);
98 				if (cg_alpha[j] < 0.0) return(CG_ERROR);
99 				ak = cg_beta[j]/cg_alpha[j];
100 				for (i=0;i<num_cols;i++) {
101 					t_x[i] += ak*t_p[i];
102 					t_resid[i] -= ak*t_z[i];
103 				}
104 			}
105 			(*cur_phase) = 2;
106 			(*cur_step)++;
107 			return(CG_MSOLVE);
108 		}
109 		default: {
110 			return(CG_ERROR);
111 		}
112 	}
113 }
114