1 #include "BSprivate.h"
2 
3 /*@ BStri_mult - Multiply the matrix (A - shift*B) by a block of vectors
4 
5     Input Parameters:
6 .   A - a sparse matrix
7 .   comm_A - the communication structure for A
8 .   B - a sparse matrix
9 .   comm_B - the communication structure for B
10 .   v1 - the block of vectors to multiply by
11 .   t1 - a block of work vectors
12 .   t2 - a block of work vectors
13 .   shift - the shift value in (A-shift*B)
14 .   BS - the number of vectors in v1
15 .   procinfo - the usual processor stuff
16 
17     Output Parameters:
18 .   v2 - the resulting block of vectors
19 
20     Returns:
21     void
22 
23     Notes:
24     Different code is used to multiply a single vector than is used
25     to multiply a block of vectors (this improves efficiency).  Also
26     different code is used if shift=0.0.  If B is NULL, then we
27     assume that it is the identity matrix.
28 
29  @*/
BStri_mult(BSpar_mat * A,BScomm * comm_A,BSpar_mat * B,BScomm * comm_B,FLOAT * v1,FLOAT * v2,FLOAT * t1,FLOAT * t2,FLOAT shift,int BS,BSprocinfo * procinfo)30 void BStri_mult(BSpar_mat *A, BScomm *comm_A, BSpar_mat *B, BScomm *comm_B,
31 	FLOAT *v1, FLOAT *v2, FLOAT *t1, FLOAT *t2, FLOAT shift, int BS,
32 	BSprocinfo *procinfo)
33 {
34 	int	i, j, n;
35 	FLOAT	*t_t1, *t_t2, *t_v1, *t_v2;
36 
37 	if ((B == NULL) || (shift == 0.0)) {
38 		if (BS == 1) {
39 			MLOG_ELM(procinfo->procset);
40 			if (procinfo->single) {
41 				BSforward1(A,v1,v2,comm_A,procinfo); CHKERR(0);
42 			} else {
43 				BSforward(A,v1,v2,comm_A,procinfo); CHKERR(0);
44 			}
45 			MLOG_ACC(MM_FORWARD);
46 			MLOG_ELM(procinfo->procset);
47 			if (procinfo->single) {
48 				BSbackward1(A,v1,v2,comm_A,procinfo); CHKERR(0);
49 			} else {
50 				BSbackward(A,v1,v2,comm_A,procinfo); CHKERR(0);
51 			}
52 			MLOG_ACC(MM_BACKWARD);
53 			if (shift != 0.0) {
54 				n = A->num_rows;
55 				for (i=0;i<n;i++) {
56 					v2[i] -= shift*(v1[i]/(fabs(A->scale_diag[i])));
57 				}
58 			}
59 		} else {
60 			MLOG_ELM(procinfo->procset);
61 			BSb_forward(A,v1,v2,comm_A,BS,procinfo); CHKERR(0);
62 			MLOG_ACC(BMM_FORWARD);
63 			MLOG_ELM(procinfo->procset);
64 			BSb_backward(A,v1,v2,comm_A,BS,procinfo); CHKERR(0);
65 			MLOG_ACC(BMM_BACKWARD);
66 			if (shift != 0.0) {
67 				n = A->num_rows;
68 				for (i=0;i<BS;i++) {
69 					t_v2 = &(v2[i*n]);
70 					/* is tv_1 this correct? */
71 					t_v1 = &(v1[i*n]);
72 					for (j=0;j<n;j++) {
73 						t_v2[j] -= shift*(t_v1[j]/A->scale_diag[j]);
74 					}
75 				}
76 			}
77 		}
78 	} else {
79 		if (BS == 1) {
80 			n = A->num_rows;
81 			MLOG_ELM(procinfo->procset);
82 			if (procinfo->single) {
83 				BSforward1(A,v1,v2,comm_A,procinfo); CHKERR(0);
84 			} else {
85 				BSforward(A,v1,v2,comm_A,procinfo); CHKERR(0);
86 			}
87 			MLOG_ACC(MM_FORWARD);
88 			MLOG_ELM(procinfo->procset);
89 			if (procinfo->single) {
90 				BSbackward1(A,v1,v2,comm_A,procinfo); CHKERR(0);
91 			} else {
92 				BSbackward(A,v1,v2,comm_A,procinfo); CHKERR(0);
93 			}
94 			MLOG_ACC(MM_BACKWARD);
95 			if(A->icc_storage) {
96 				for (i=0;i<n;i++) {
97 					t1[i] = v1[i]/sqrt(A->scale_diag[i]);
98 				}
99 			} else {
100 				for (i=0;i<n;i++) {
101 					t1[i] = v1[i]/sqrt(fabs(A->scale_diag[i]));
102 				}
103 			}
104 			BSiperm_dvec(t1,t2,A->perm); CHKERR(0);
105 			BSperm_dvec(t2,t1,B->perm); CHKERR(0);
106 			MLOG_ELM(procinfo->procset);
107 			if (procinfo->single) {
108 				BSforward1(B,t1,t2,comm_B,procinfo); CHKERR(0);
109 			} else {
110 				BSforward(B,t1,t2,comm_B,procinfo); CHKERR(0);
111 			}
112 			MLOG_ACC(OMM_FORWARD);
113 			MLOG_ELM(procinfo->procset);
114 			if (procinfo->single) {
115 				BSbackward1(B,t1,t2,comm_B,procinfo); CHKERR(0);
116 			} else {
117 				BSbackward(B,t1,t2,comm_B,procinfo); CHKERR(0);
118 			}
119 			MLOG_ACC(OMM_BACKWARD);
120 			BSiperm_dvec(t2,t1,B->perm); CHKERR(0);
121 			BSperm_dvec(t1,t2,A->perm); CHKERR(0);
122 			if(A->icc_storage) {
123 				for (i=0;i<n;i++) {
124 					v2[i] -= shift*(t2[i]/sqrt(A->scale_diag[i]));
125 				}
126 			} else {
127 				for (i=0;i<n;i++) {
128 					v2[i] -= shift*(t2[i]/sqrt(fabs(A->scale_diag[i])));
129 				}
130 			}
131 		} else {
132 			n = A->num_rows;
133 			MLOG_ELM(procinfo->procset);
134 			BSb_forward(A,v1,v2,comm_A,BS,procinfo); CHKERR(0);
135 			MLOG_ACC(BMM_FORWARD);
136 			MLOG_ELM(procinfo->procset);
137 			BSb_backward(A,v1,v2,comm_A,BS,procinfo); CHKERR(0);
138 			MLOG_ACC(BMM_BACKWARD);
139 			if(A->icc_storage) {
140 				for (i=0;i<BS;i++) {
141 					t_t1 = &(t1[i*n]);
142 					t_v1 = &(v1[i*n]);
143 					for (j=0;j<n;j++) {
144 						t_t1[j] = t_v1[j]/sqrt(A->scale_diag[j]);
145 					}
146 				}
147 			} else {
148 				for (i=0;i<BS;i++) {
149 					t_t1 = &(t1[i*n]);
150 					t_v1 = &(v1[i*n]);
151 					for (j=0;j<n;j++) {
152 						t_t1[j] = t_v1[j]/sqrt(fabs(A->scale_diag[j]));
153 					}
154 				}
155 			}
156 			for (i=0;i<BS;i++) {
157 				t_t1 = &(t1[i*n]);
158 				t_t2 = &(t2[i*n]);
159 				BSiperm_dvec(t_t1,t_t2,A->perm); CHKERR(0);
160 				BSperm_dvec(t_t2,t_t1,B->perm); CHKERR(0);
161 			}
162 			MLOG_ELM(procinfo->procset);
163 			BSb_forward(B,t1,t2,comm_B,BS,procinfo); CHKERR(0);
164 			MLOG_ACC(OBMM_FORWARD);
165 			MLOG_ELM(procinfo->procset);
166 			BSb_backward(B,t1,t2,comm_B,BS,procinfo); CHKERR(0);
167 			MLOG_ACC(OBMM_BACKWARD);
168 			for (i=0;i<BS;i++) {
169 				t_t1 = &(t1[i*n]);
170 				t_t2 = &(t2[i*n]);
171 				BSiperm_dvec(t_t2,t_t1,B->perm); CHKERR(0);
172 				BSperm_dvec(t_t1,t_t2,A->perm); CHKERR(0);
173 			}
174 			if(A->icc_storage) {
175 				for (i=0;i<BS;i++) {
176 					t_t2 = &(t2[i*n]);
177 					t_v2 = &(v2[i*n]);
178 					for (j=0;j<n;j++) {
179 						t_v2[j] -= shift*(t_t2[j]/sqrt(A->scale_diag[j]));
180 					}
181 				}
182 			} else {
183 				for (i=0;i<BS;i++) {
184 					t_t2 = &(t2[i*n]);
185 					t_v2 = &(v2[i*n]);
186 					for (j=0;j<n;j++) {
187 						t_v2[j] -= shift*(t_t2[j]/sqrt(fabs(A->scale_diag[j])));
188 					}
189 				}
190 			}
191 		}
192 	}
193 }
194