1 #include "BSprivate.h"
2
3 /*@ BStri_mult - Multiply the matrix (A - shift*B) by a block of vectors
4
5 Input Parameters:
6 . A - a sparse matrix
7 . comm_A - the communication structure for A
8 . B - a sparse matrix
9 . comm_B - the communication structure for B
10 . v1 - the block of vectors to multiply by
11 . t1 - a block of work vectors
12 . t2 - a block of work vectors
13 . shift - the shift value in (A-shift*B)
14 . BS - the number of vectors in v1
15 . procinfo - the usual processor stuff
16
17 Output Parameters:
18 . v2 - the resulting block of vectors
19
20 Returns:
21 void
22
23 Notes:
24 Different code is used to multiply a single vector than is used
25 to multiply a block of vectors (this improves efficiency). Also
26 different code is used if shift=0.0. If B is NULL, then we
27 assume that it is the identity matrix.
28
29 @*/
BStri_mult(BSpar_mat * A,BScomm * comm_A,BSpar_mat * B,BScomm * comm_B,FLOAT * v1,FLOAT * v2,FLOAT * t1,FLOAT * t2,FLOAT shift,int BS,BSprocinfo * procinfo)30 void BStri_mult(BSpar_mat *A, BScomm *comm_A, BSpar_mat *B, BScomm *comm_B,
31 FLOAT *v1, FLOAT *v2, FLOAT *t1, FLOAT *t2, FLOAT shift, int BS,
32 BSprocinfo *procinfo)
33 {
34 int i, j, n;
35 FLOAT *t_t1, *t_t2, *t_v1, *t_v2;
36
37 if ((B == NULL) || (shift == 0.0)) {
38 if (BS == 1) {
39 MLOG_ELM(procinfo->procset);
40 if (procinfo->single) {
41 BSforward1(A,v1,v2,comm_A,procinfo); CHKERR(0);
42 } else {
43 BSforward(A,v1,v2,comm_A,procinfo); CHKERR(0);
44 }
45 MLOG_ACC(MM_FORWARD);
46 MLOG_ELM(procinfo->procset);
47 if (procinfo->single) {
48 BSbackward1(A,v1,v2,comm_A,procinfo); CHKERR(0);
49 } else {
50 BSbackward(A,v1,v2,comm_A,procinfo); CHKERR(0);
51 }
52 MLOG_ACC(MM_BACKWARD);
53 if (shift != 0.0) {
54 n = A->num_rows;
55 for (i=0;i<n;i++) {
56 v2[i] -= shift*(v1[i]/(fabs(A->scale_diag[i])));
57 }
58 }
59 } else {
60 MLOG_ELM(procinfo->procset);
61 BSb_forward(A,v1,v2,comm_A,BS,procinfo); CHKERR(0);
62 MLOG_ACC(BMM_FORWARD);
63 MLOG_ELM(procinfo->procset);
64 BSb_backward(A,v1,v2,comm_A,BS,procinfo); CHKERR(0);
65 MLOG_ACC(BMM_BACKWARD);
66 if (shift != 0.0) {
67 n = A->num_rows;
68 for (i=0;i<BS;i++) {
69 t_v2 = &(v2[i*n]);
70 /* is tv_1 this correct? */
71 t_v1 = &(v1[i*n]);
72 for (j=0;j<n;j++) {
73 t_v2[j] -= shift*(t_v1[j]/A->scale_diag[j]);
74 }
75 }
76 }
77 }
78 } else {
79 if (BS == 1) {
80 n = A->num_rows;
81 MLOG_ELM(procinfo->procset);
82 if (procinfo->single) {
83 BSforward1(A,v1,v2,comm_A,procinfo); CHKERR(0);
84 } else {
85 BSforward(A,v1,v2,comm_A,procinfo); CHKERR(0);
86 }
87 MLOG_ACC(MM_FORWARD);
88 MLOG_ELM(procinfo->procset);
89 if (procinfo->single) {
90 BSbackward1(A,v1,v2,comm_A,procinfo); CHKERR(0);
91 } else {
92 BSbackward(A,v1,v2,comm_A,procinfo); CHKERR(0);
93 }
94 MLOG_ACC(MM_BACKWARD);
95 if(A->icc_storage) {
96 for (i=0;i<n;i++) {
97 t1[i] = v1[i]/sqrt(A->scale_diag[i]);
98 }
99 } else {
100 for (i=0;i<n;i++) {
101 t1[i] = v1[i]/sqrt(fabs(A->scale_diag[i]));
102 }
103 }
104 BSiperm_dvec(t1,t2,A->perm); CHKERR(0);
105 BSperm_dvec(t2,t1,B->perm); CHKERR(0);
106 MLOG_ELM(procinfo->procset);
107 if (procinfo->single) {
108 BSforward1(B,t1,t2,comm_B,procinfo); CHKERR(0);
109 } else {
110 BSforward(B,t1,t2,comm_B,procinfo); CHKERR(0);
111 }
112 MLOG_ACC(OMM_FORWARD);
113 MLOG_ELM(procinfo->procset);
114 if (procinfo->single) {
115 BSbackward1(B,t1,t2,comm_B,procinfo); CHKERR(0);
116 } else {
117 BSbackward(B,t1,t2,comm_B,procinfo); CHKERR(0);
118 }
119 MLOG_ACC(OMM_BACKWARD);
120 BSiperm_dvec(t2,t1,B->perm); CHKERR(0);
121 BSperm_dvec(t1,t2,A->perm); CHKERR(0);
122 if(A->icc_storage) {
123 for (i=0;i<n;i++) {
124 v2[i] -= shift*(t2[i]/sqrt(A->scale_diag[i]));
125 }
126 } else {
127 for (i=0;i<n;i++) {
128 v2[i] -= shift*(t2[i]/sqrt(fabs(A->scale_diag[i])));
129 }
130 }
131 } else {
132 n = A->num_rows;
133 MLOG_ELM(procinfo->procset);
134 BSb_forward(A,v1,v2,comm_A,BS,procinfo); CHKERR(0);
135 MLOG_ACC(BMM_FORWARD);
136 MLOG_ELM(procinfo->procset);
137 BSb_backward(A,v1,v2,comm_A,BS,procinfo); CHKERR(0);
138 MLOG_ACC(BMM_BACKWARD);
139 if(A->icc_storage) {
140 for (i=0;i<BS;i++) {
141 t_t1 = &(t1[i*n]);
142 t_v1 = &(v1[i*n]);
143 for (j=0;j<n;j++) {
144 t_t1[j] = t_v1[j]/sqrt(A->scale_diag[j]);
145 }
146 }
147 } else {
148 for (i=0;i<BS;i++) {
149 t_t1 = &(t1[i*n]);
150 t_v1 = &(v1[i*n]);
151 for (j=0;j<n;j++) {
152 t_t1[j] = t_v1[j]/sqrt(fabs(A->scale_diag[j]));
153 }
154 }
155 }
156 for (i=0;i<BS;i++) {
157 t_t1 = &(t1[i*n]);
158 t_t2 = &(t2[i*n]);
159 BSiperm_dvec(t_t1,t_t2,A->perm); CHKERR(0);
160 BSperm_dvec(t_t2,t_t1,B->perm); CHKERR(0);
161 }
162 MLOG_ELM(procinfo->procset);
163 BSb_forward(B,t1,t2,comm_B,BS,procinfo); CHKERR(0);
164 MLOG_ACC(OBMM_FORWARD);
165 MLOG_ELM(procinfo->procset);
166 BSb_backward(B,t1,t2,comm_B,BS,procinfo); CHKERR(0);
167 MLOG_ACC(OBMM_BACKWARD);
168 for (i=0;i<BS;i++) {
169 t_t1 = &(t1[i*n]);
170 t_t2 = &(t2[i*n]);
171 BSiperm_dvec(t_t2,t_t1,B->perm); CHKERR(0);
172 BSperm_dvec(t_t1,t_t2,A->perm); CHKERR(0);
173 }
174 if(A->icc_storage) {
175 for (i=0;i<BS;i++) {
176 t_t2 = &(t2[i*n]);
177 t_v2 = &(v2[i*n]);
178 for (j=0;j<n;j++) {
179 t_v2[j] -= shift*(t_t2[j]/sqrt(A->scale_diag[j]));
180 }
181 }
182 } else {
183 for (i=0;i<BS;i++) {
184 t_t2 = &(t2[i*n]);
185 t_v2 = &(v2[i*n]);
186 for (j=0;j<n;j++) {
187 t_v2[j] -= shift*(t_t2[j]/sqrt(fabs(A->scale_diag[j])));
188 }
189 }
190 }
191 }
192 }
193 }
194