1 #include "BSprivate.h"
2
3 /*+ BSpar_bcg - conjugate gradient on a block of vectors; reverse communication
4
5 Input Parameters:
6 . num_cols - the length of the vector on this processor
7 . rhs - the contiguous block of rhs vectors on this processor
8 length = num_cols * nBS
9 . x - the contiguous block of solution vectors on this processor
10 length = num_cols * nBS
11 . resid - the contiguous block of residual vectors on this processor
12 length = num_cols * nBS
13 . z - the contiguous block of work vectors on this processor
14 length = num_cols * nBS
15 . p - the contiguous block of work vectors on this processor
16 length = num_cols * nBS
17 . cg_beta - the vector of cg coefficients
18 length = nBS
19 . cg_alpha - the vector of cg coefficients
20 length = nBS
21 . cur_step - the current cg step
22 . cur_phase - the current cg phase
23 . nBS - the number of columns in the block
24 . procinfo - the usual processor stuff
25
26 Output Parameters:
27 . many of the input parameters are changed, it is just the usual
28 CG algorithm
29
30 Returns:
31 The action of the calling program depends on the return value
32 CG_MATVECR: multiply A*x and put the results in resid
33 CG_MATVECZ: multiply A*p and put the results in z
34 CG_MSOLVE: multiply A(-1)*resid and put the results in z
35 CG_ERROR: Error during CG, indefinite matrix encountered
36
37 Notes:
38 This is NOT the block conjugate gradient algorithm. It is
39 a separate conjugate gradient run on each vector, but the
40 operations are blocked for efficiency.
41
42 The first time that BSpar_bcg is called, cur_step and
43 cur_phase should be 0.
44
45 +*/
BSpar_bcg(int num_cols,FLOAT * rhs,FLOAT * x,FLOAT * resid,FLOAT * z,FLOAT * p,FLOAT * cg_beta,FLOAT * cg_alpha,int * cur_step,int * cur_phase,int nBS,BSprocinfo * procinfo)46 int BSpar_bcg(int num_cols, FLOAT *rhs, FLOAT *x, FLOAT *resid, FLOAT *z,
47 FLOAT *p, FLOAT *cg_beta, FLOAT *cg_alpha, int *cur_step, int *cur_phase,
48 int nBS, BSprocinfo *procinfo)
49 {
50 int i, j;
51 FLOAT bk, *bknum;
52 FLOAT ak;
53 FLOAT *t_z, *t_p, *t_resid, *t_x;
54
55 switch((*cur_phase)) {
56 case 0: {
57 (*cur_phase) = 1;
58 return(CG_MATVECR);
59 }
60 case 1: {
61 for (i=0;i<num_cols*nBS;i++) {
62 resid[i] = rhs[i] - resid[i];
63 }
64 (*cur_phase) = 2;
65 (*cur_step) = 1;
66 return(CG_MSOLVE);
67 }
68 case 2: {
69 MY_MALLOCN(bknum,(FLOAT *),sizeof(FLOAT)*nBS,1);
70 BSpar_bip(num_cols,z,resid,nBS,bknum,procinfo); CHKERRN(0);
71 for (j=0;j<nBS;j++) {
72 t_z = &(z[j*num_cols]);
73 t_p = &(p[j*num_cols]);
74 if (bknum[j] < 0.0) return(CG_ERROR);
75 if ((*cur_step) == 1) {
76 for (i=0;i<num_cols;i++) {
77 t_p[i] = t_z[i];
78 }
79 } else {
80 bk = bknum[j]/cg_beta[j];
81 for (i=0;i<num_cols;i++) {
82 t_p[i] = t_z[i] + bk*t_p[i];
83 }
84 }
85 cg_beta[j] = bknum[j];
86 }
87 MY_FREE(bknum);
88 (*cur_phase) = 3;
89 return(CG_MATVECZ);
90 }
91 case 3: {
92 BSpar_bip(num_cols,p,z,nBS,cg_alpha,procinfo); CHKERRN(0);
93 for (j=0;j<nBS;j++) {
94 t_z = &(z[j*num_cols]);
95 t_resid = &(resid[j*num_cols]);
96 t_p = &(p[j*num_cols]);
97 t_x = &(x[j*num_cols]);
98 if (cg_alpha[j] < 0.0) return(CG_ERROR);
99 ak = cg_beta[j]/cg_alpha[j];
100 for (i=0;i<num_cols;i++) {
101 t_x[i] += ak*t_p[i];
102 t_resid[i] -= ak*t_z[i];
103 }
104 }
105 (*cur_phase) = 2;
106 (*cur_step)++;
107 return(CG_MSOLVE);
108 }
109 default: {
110 return(CG_ERROR);
111 }
112 }
113 }
114