1 #include "BSprivate.h"
2 
3 /*@ BSforward1 - Forward triangular matrix multiplication on a single vector
4 
5     Input Parameters:
6 .   A - The sparse matrix
7 .   x - The rhs
8 .   comm - The communication structure for A
9 .   procinfo - the usual processor information
10 
11     Output Parameters:
12 .   b - on exit contains A*x
13 
14     Returns:
15     void
16 
17 	Notes:
18     We assume that A has no i-nodes or cliques
19 
20  @*/
BSforward1(BSpar_mat * A,FLOAT * x,FLOAT * b,BScomm * comm,BSprocinfo * procinfo)21 void BSforward1(BSpar_mat *A, FLOAT *x, FLOAT *b,
22 		BScomm *comm, BSprocinfo *procinfo)
23 {
24 	BMphase *to_phase, *from_phase;
25 	BMmsg *msg;
26 	int	i, j, k;
27 	int	cl_ind, in_ind, symmetric;
28 	int	count, size, ind;
29 	int *row;
30 	FLOAT *nz;
31 	BScl_2_inode *clique2inode;
32 	BSnumbering *color2clique;
33 	BSinode *inodes;
34 	int	*data_ptr, msg_len;
35 	FLOAT *msg_buf;
36 	FLOAT	t;
37 	int *gnum, *iperm;
38 
39     /* Is the symmetric data structure used? */
40 	symmetric = A->icc_storage;
41 
42 	color2clique = A->color2clique;
43 	clique2inode = A->clique2inode;
44 	inodes = A->inodes->list;
45 	gnum = A->global_row_num->numbers;
46 	iperm = A->inv_perm->perm;
47 
48 	/* post for all messages */
49 	BMinit_comp_msg(comm->from_msg,procinfo); CHKERR(0);
50 
51 	if(symmetric) {
52 		if (A->save_diag == NULL) {
53 			/* because we know the diagonal is ones, initialize b to x */
54 			for (i=0;i<A->num_rows;i++) b[i] = x[i];
55 		} else {
56 			for (i=0;i<A->num_rows;i++) b[i] = A->save_diag[i]*x[i];
57 		}
58 	}
59 
60 	/* first send my messages */
61 	for (i=0;i<color2clique->length-1;i++) {
62 
63 		if(!symmetric) {
64 			/* first multiply with the scaled diag, if any (ILU) */
65 			for (cl_ind=color2clique->numbers[i];
66 				cl_ind<color2clique->numbers[i+1]; cl_ind++) {
67 				if (procinfo->my_id == clique2inode->proc[cl_ind]) {
68 					ind = clique2inode->d_mats[cl_ind].local_ind;
69 					if (A->scale_diag == NULL)
70 						b[ind] = A->diag[ind]*x[ind];
71 					else
72 						if (A->diag[ind] > 0.0)
73 							b[ind] = x[ind];
74 						else if (A->diag[ind] < 0.0)
75 							b[ind] = -x[ind];
76 						else
77 							b[ind] = 0.0;
78 				}
79 			}
80 		}
81 
82 		/* now send my messages */
83 		to_phase = BMget_phase(comm->to_msg,i); CHKERR(0);
84 		msg = NULL;
85 		while ((msg = BMnext_msg(to_phase,msg)) != NULL) {
86 			CHKERR(0);
87 			msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERR(0);
88 			data_ptr = BMget_user(msg,&msg_len); CHKERR(0);
89 			for (j=0;j<msg_len;j++) {
90 				msg_buf[j] = x[data_ptr[j]];
91 			}
92 			BMsendf_msg(msg,procinfo); CHKERR(0);
93 		}
94 		CHKERR(0);
95 	}
96 
97 	/* do some local work */
98 	for (i=0;i<color2clique->length-1;i++) {
99 		for (cl_ind=color2clique->numbers[i];
100 			cl_ind<color2clique->numbers[i+1];cl_ind++) {
101 			if (procinfo->my_id == clique2inode->proc[cl_ind]) {
102 				/* only do the strictly lower triangular part */
103 				/* we ASSUME the diagonal is all 1's */
104 				/* and it is taken care of above */
105 				/* now, multiply the inodes */
106 				in_ind=clique2inode->inode_index[cl_ind];
107 				size = inodes[in_ind].length;
108 				if (size > 0) {
109 					ind = clique2inode->d_mats[cl_ind].local_ind;
110 					row = inodes[in_ind].row_num;
111 					nz = inodes[in_ind].nz;
112 					t = x[ind];
113 					for (k=0;k<size;k++) b[row[k]] += nz[k]*t;
114 				}
115 			}
116 		}
117 	}
118 
119 	/* receive my messages and do non-local work */
120 	for (i=0;i<color2clique->length-1;i++) {
121 		from_phase = BMget_phase(comm->from_msg,i); CHKERR(0);
122 		while ((msg = BMrecv_msg(from_phase)) != NULL) {
123 			CHKERR(0);
124 			msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERR(0);
125 			data_ptr = BMget_user(msg,&msg_len); CHKERR(0);
126 			count = 0;
127 			for (cl_ind=data_ptr[0];cl_ind<=data_ptr[1];cl_ind++) {
128 				in_ind=clique2inode->inode_index[cl_ind];
129 				size = inodes[in_ind].length;
130 				if (size > 0) {
131 					row = inodes[in_ind].row_num;
132 					nz = inodes[in_ind].nz;
133 					t = msg_buf[count];
134 					for (k=0;k<size;k++) b[row[k]] += nz[k]*t;
135 				}
136 				count++;
137 			}
138 			BMfree_msg(msg); CHKERR(0);
139 		}
140 		CHKERR(0);
141 	}
142 	/* wait for all of the sent messages to finish */
143 	BMfinish_comp_msg(comm->to_msg,procinfo); CHKERR(0);
144 	if(symmetric) {
145 		MLOG_flop((2*A->local_nnz));
146 	} else {
147 		MLOG_flop((4*A->local_nnz-2*A->num_rows));
148 	}
149 
150 }
151