1 #include "BSprivate.h"
2
3 /*@ BSforward1 - Forward triangular matrix multiplication on a single vector
4
5 Input Parameters:
6 . A - The sparse matrix
7 . x - The rhs
8 . comm - The communication structure for A
9 . procinfo - the usual processor information
10
11 Output Parameters:
12 . b - on exit contains A*x
13
14 Returns:
15 void
16
17 Notes:
18 We assume that A has no i-nodes or cliques
19
20 @*/
BSforward1(BSpar_mat * A,FLOAT * x,FLOAT * b,BScomm * comm,BSprocinfo * procinfo)21 void BSforward1(BSpar_mat *A, FLOAT *x, FLOAT *b,
22 BScomm *comm, BSprocinfo *procinfo)
23 {
24 BMphase *to_phase, *from_phase;
25 BMmsg *msg;
26 int i, j, k;
27 int cl_ind, in_ind, symmetric;
28 int count, size, ind;
29 int *row;
30 FLOAT *nz;
31 BScl_2_inode *clique2inode;
32 BSnumbering *color2clique;
33 BSinode *inodes;
34 int *data_ptr, msg_len;
35 FLOAT *msg_buf;
36 FLOAT t;
37 int *gnum, *iperm;
38
39 /* Is the symmetric data structure used? */
40 symmetric = A->icc_storage;
41
42 color2clique = A->color2clique;
43 clique2inode = A->clique2inode;
44 inodes = A->inodes->list;
45 gnum = A->global_row_num->numbers;
46 iperm = A->inv_perm->perm;
47
48 /* post for all messages */
49 BMinit_comp_msg(comm->from_msg,procinfo); CHKERR(0);
50
51 if(symmetric) {
52 if (A->save_diag == NULL) {
53 /* because we know the diagonal is ones, initialize b to x */
54 for (i=0;i<A->num_rows;i++) b[i] = x[i];
55 } else {
56 for (i=0;i<A->num_rows;i++) b[i] = A->save_diag[i]*x[i];
57 }
58 }
59
60 /* first send my messages */
61 for (i=0;i<color2clique->length-1;i++) {
62
63 if(!symmetric) {
64 /* first multiply with the scaled diag, if any (ILU) */
65 for (cl_ind=color2clique->numbers[i];
66 cl_ind<color2clique->numbers[i+1]; cl_ind++) {
67 if (procinfo->my_id == clique2inode->proc[cl_ind]) {
68 ind = clique2inode->d_mats[cl_ind].local_ind;
69 if (A->scale_diag == NULL)
70 b[ind] = A->diag[ind]*x[ind];
71 else
72 if (A->diag[ind] > 0.0)
73 b[ind] = x[ind];
74 else if (A->diag[ind] < 0.0)
75 b[ind] = -x[ind];
76 else
77 b[ind] = 0.0;
78 }
79 }
80 }
81
82 /* now send my messages */
83 to_phase = BMget_phase(comm->to_msg,i); CHKERR(0);
84 msg = NULL;
85 while ((msg = BMnext_msg(to_phase,msg)) != NULL) {
86 CHKERR(0);
87 msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERR(0);
88 data_ptr = BMget_user(msg,&msg_len); CHKERR(0);
89 for (j=0;j<msg_len;j++) {
90 msg_buf[j] = x[data_ptr[j]];
91 }
92 BMsendf_msg(msg,procinfo); CHKERR(0);
93 }
94 CHKERR(0);
95 }
96
97 /* do some local work */
98 for (i=0;i<color2clique->length-1;i++) {
99 for (cl_ind=color2clique->numbers[i];
100 cl_ind<color2clique->numbers[i+1];cl_ind++) {
101 if (procinfo->my_id == clique2inode->proc[cl_ind]) {
102 /* only do the strictly lower triangular part */
103 /* we ASSUME the diagonal is all 1's */
104 /* and it is taken care of above */
105 /* now, multiply the inodes */
106 in_ind=clique2inode->inode_index[cl_ind];
107 size = inodes[in_ind].length;
108 if (size > 0) {
109 ind = clique2inode->d_mats[cl_ind].local_ind;
110 row = inodes[in_ind].row_num;
111 nz = inodes[in_ind].nz;
112 t = x[ind];
113 for (k=0;k<size;k++) b[row[k]] += nz[k]*t;
114 }
115 }
116 }
117 }
118
119 /* receive my messages and do non-local work */
120 for (i=0;i<color2clique->length-1;i++) {
121 from_phase = BMget_phase(comm->from_msg,i); CHKERR(0);
122 while ((msg = BMrecv_msg(from_phase)) != NULL) {
123 CHKERR(0);
124 msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERR(0);
125 data_ptr = BMget_user(msg,&msg_len); CHKERR(0);
126 count = 0;
127 for (cl_ind=data_ptr[0];cl_ind<=data_ptr[1];cl_ind++) {
128 in_ind=clique2inode->inode_index[cl_ind];
129 size = inodes[in_ind].length;
130 if (size > 0) {
131 row = inodes[in_ind].row_num;
132 nz = inodes[in_ind].nz;
133 t = msg_buf[count];
134 for (k=0;k<size;k++) b[row[k]] += nz[k]*t;
135 }
136 count++;
137 }
138 BMfree_msg(msg); CHKERR(0);
139 }
140 CHKERR(0);
141 }
142 /* wait for all of the sent messages to finish */
143 BMfinish_comp_msg(comm->to_msg,procinfo); CHKERR(0);
144 if(symmetric) {
145 MLOG_flop((2*A->local_nnz));
146 } else {
147 MLOG_flop((4*A->local_nnz-2*A->num_rows));
148 }
149
150 }
151