1 #include "BSprivate.h"
2 
3 /*@ BSbackward1 - Backward triangular matrix multiplication on a
4                   single vector
5 
6     Input Parameters:
7 .   A - The sparse matrix
8 .   x - The input vector
9 .   comm - The communication structure for A
10 .   procinfo - the usual processor information
11 
12     Output Parameters:
13 .   b - on exit contains A*x
14 
15     Returns:
16     void
17 
18 	Notes:
19     We assume that A has no i-nodes or cliques
20 
21  @*/
BSbackward1(BSpar_mat * A,FLOAT * x,FLOAT * b,BScomm * comm,BSprocinfo * procinfo)22 void BSbackward1(BSpar_mat *A, FLOAT *x, FLOAT *b,
23 		BScomm *comm, BSprocinfo *procinfo)
24 {
25 	BMcomp_msg *from_msg, *to_msg;
26 	BMphase *to_phase, *from_phase;
27 	BMmsg *msg;
28 	int	i, j, k;
29 	int	cl_ind, in_ind;
30 	int	count, size, ind;
31 	int *row;
32 	FLOAT *nz;
33 	BScl_2_inode *clique2inode;
34 	BSnumbering *color2clique;
35 	BSinode *inodes;
36 	int	*data_ptr, msg_len;
37 	FLOAT *msg_buf;
38 	FLOAT	t;
39 
40 	if(!A->icc_storage) {
41 		/* nonsymmetric version done in BSforward() */
42 		return;
43 	}
44 
45 	color2clique = A->color2clique;
46 	clique2inode = A->clique2inode;
47 	inodes = A->inodes->list;
48 
49 	/* REMEMBER, the to and from phase are switched here */
50 	from_msg = comm->to_msg;
51 	to_msg = comm->from_msg;
52 
53 	/* post for all messages */
54 	BMinit_comp_msg(from_msg,procinfo); CHKERR(0);
55 
56 	/* REMEMBER, the diagonal has already been taken care */
57 
58 	/* now do this phase by phase */
59 	for (i=color2clique->length-2;i>=0;i--) {
60 		/* first send my messages */
61 		/* this will involve computing partial sums */
62 		to_phase = BMget_phase(to_msg,i); CHKERR(0);
63 		msg = NULL;
64 		while ((msg = BMnext_msg(to_phase,msg)) != NULL) {
65 			CHKERR(0);
66 			msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERR(0);
67 			data_ptr = BMget_user(msg,&msg_len); CHKERR(0);
68 			count = 0;
69 			for (cl_ind=data_ptr[0];cl_ind<=data_ptr[1];cl_ind++) {
70 				in_ind=clique2inode->inode_index[cl_ind];
71 				size = inodes[in_ind].length;
72 				if (size > 0) {
73 					row = inodes[in_ind].row_num;
74 					nz = inodes[in_ind].nz;
75 					t = 0.0;
76 					for (k=0;k<size;k++) t += x[row[k]]*nz[k];
77 					msg_buf[count] = t;
78 				}
79 				count++;
80 			}
81 			BMsendf_msg(msg,procinfo); CHKERR(0);
82 		}
83 		CHKERR(0);
84 	}
85 
86 	/* do some local work */
87 	for (i=color2clique->length-2;i>=0;i--) {
88 		for (cl_ind=color2clique->numbers[i];
89 			cl_ind<color2clique->numbers[i+1];cl_ind++) {
90 			if (procinfo->my_id == clique2inode->proc[cl_ind]) {
91 				/* only do the strictly upper triangular part */
92 				/* we ASSUME the diagonal is all 1's */
93 				/* which is taken care prior to this */
94 				/* now, multiply the inodes */
95 				in_ind=clique2inode->inode_index[cl_ind];
96 				size = inodes[in_ind].length;
97 				if (size > 0) {
98 					ind = clique2inode->d_mats[cl_ind].local_ind;
99 					row = inodes[in_ind].row_num;
100 					nz = inodes[in_ind].nz;
101 					t = 0.0;
102 					for (k=0;k<size;k++) t += x[row[k]]*nz[k];
103 					b[ind] += t;
104 				}
105 			}
106 		}
107 	}
108 
109 	/* receive my messages and update my rhs */
110 	for (i=color2clique->length-2;i>=0;i--) {
111 		from_phase = BMget_phase(from_msg,i); CHKERR(0);
112 		while ((msg = BMrecv_msg(from_phase)) != NULL) {
113 			CHKERR(0);
114 			msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERR(0);
115 			data_ptr = BMget_user(msg,&msg_len); CHKERR(0);
116 			msg_len = BMget_msg_size(msg); CHKERR(0);
117 			count = 0;
118 			for (j=0;j<msg_len;j++) b[data_ptr[j]] += msg_buf[j];
119 			BMfree_msg(msg); CHKERR(0);
120 		}
121 		CHKERR(0);
122 	}
123 
124 	/* wait for all of the sent messages to finish */
125 	BMfinish_comp_msg(to_msg,procinfo); CHKERR(0);
126 	MLOG_flop((2*A->local_nnz));
127 
128 }
129