1 #include "BSprivate.h"
2 
3 /*@ BSfor_solve1 - Forward triangular matrix solution on a
4                   single vector
5 
6     Input Parameters:
7 .   A - The sparse matrix
8 .   x - The rhs
9 .   comm - The communication structure for A
10 .   procinfo - the usual processor information
11 
12     Output Parameters:
13 .   x - on exit contains the solution vector
14 
15     Returns:
16     void
17 
18 	Notes:
19     We assume that A has no i-nodes or cliques
20 
21  @*/
BSfor_solve1(BSpar_mat * A,FLOAT * x,BScomm * comm,BSprocinfo * procinfo)22 void BSfor_solve1(BSpar_mat *A, FLOAT *x, BScomm *comm, BSprocinfo *procinfo)
23 {
24 	BMphase *to_phase, *from_phase;
25 	BMmsg *msg;
26 	int	i, j, k;
27 	int	cl_ind, in_ind, symmetric;
28 	int	count, size, ind;
29 	int *row;
30 	FLOAT *nz;
31 	BScl_2_inode *clique2inode;
32 	BSnumbering *color2clique;
33 	BSinode *inodes;
34 	int	*data_ptr, msg_len;
35 	FLOAT *msg_buf;
36 	FLOAT	t;
37 	int *gnum, *iperm;
38 
39 	/* Is the symmetric data structure used? */
40 	symmetric = A->icc_storage;
41 
42 	color2clique = A->color2clique;
43 	clique2inode = A->clique2inode;
44 	inodes = A->inodes->list;
45 	gnum = A->global_row_num->numbers;
46 	iperm = A->inv_perm->perm;
47 
48 	/* post for all messages */
49 	BMinit_comp_msg(comm->from_msg,procinfo); CHKERR(0);
50 
51 	/* now do this phase by phase */
52 	for (i=0;i<color2clique->length-1;i++) {
53 		if(symmetric) {
54 			/* find my portion of the solution using the cliques on the diagonal */
55 			for (cl_ind=color2clique->numbers[i];
56 				cl_ind<color2clique->numbers[i+1];cl_ind++) {
57 				if (procinfo->my_id == clique2inode->proc[cl_ind]) {
58 					/* first, multiply the clique */
59 					/* the clique is stored, inverted, in the upper triangle */
60 					ind = clique2inode->d_mats[cl_ind].local_ind;
61 					x[ind] *= *(clique2inode->d_mats[cl_ind].matrix);
62 				}
63 			}
64 		}
65 
66 		/* now send my messages */
67 		to_phase = BMget_phase(comm->to_msg,i); CHKERR(0);
68 		msg = NULL;
69 		while ((msg = BMnext_msg(to_phase,msg)) != NULL) {
70 			CHKERR(0);
71 			msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERR(0);
72 			data_ptr = BMget_user(msg,&msg_len); CHKERR(0);
73 			for (j=0;j<msg_len;j++) {
74 				msg_buf[j] = x[data_ptr[j]];
75 			}
76 			BMsendf_msg(msg,procinfo); CHKERR(0);
77 		}
78 		CHKERR(0);
79 
80 		/* do some local work */
81 		for (cl_ind=color2clique->numbers[i];
82 			cl_ind<color2clique->numbers[i+1];cl_ind++) {
83 			if (procinfo->my_id == clique2inode->proc[cl_ind]) {
84 				/* multiply the inodes */
85 				in_ind=clique2inode->inode_index[cl_ind];
86 				size = inodes[in_ind].length;
87 				if (size > 0) {
88 					ind = clique2inode->d_mats[cl_ind].local_ind;
89 					row = inodes[in_ind].row_num;
90 					nz = inodes[in_ind].nz;
91 					t = x[ind];
92 					if(symmetric) {
93 						for (k=0;k<size;k++) x[row[k]] -= t*nz[k];
94 					} else {
95 						for (k=0;k<size;k++) {
96 							if (gnum[iperm[row[k]]] > inodes[in_ind].gcol_num)
97 								x[row[k]] -= nz[k]*t;
98 						}
99 					}
100 				}
101 			}
102 		}
103 
104 		/* receive my messages and do non-local work */
105 		from_phase = BMget_phase(comm->from_msg,i); CHKERR(0);
106 		while ((msg = BMrecv_msg(from_phase)) != NULL) {
107 			CHKERR(0);
108 			msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERR(0);
109 			data_ptr = BMget_user(msg,&msg_len); CHKERR(0);
110 			count = 0;
111 			for (cl_ind=data_ptr[0];cl_ind<=data_ptr[1];cl_ind++) {
112 				in_ind=clique2inode->inode_index[cl_ind];
113 				size = inodes[in_ind].length;
114 				if (size > 0) {
115 					row = inodes[in_ind].row_num;
116 					nz = inodes[in_ind].nz;
117 					t = msg_buf[count];
118 					if(symmetric) {
119 						for (k=0;k<size;k++) x[row[k]] -= t*nz[k];
120 					} else {
121 						for (k=0;k<size;k++) {
122 							if (gnum[iperm[row[k]]] > inodes[in_ind].gcol_num)
123 								x[row[k]] -= nz[k]*t;
124 						}
125 					}
126 				}
127 				count++;
128 			}
129 			BMfree_msg(msg); CHKERR(0);
130 		}
131 		CHKERR(0);
132 	}
133 	/* wait for all of the sent messages to finish */
134 	BMfinish_comp_msg(comm->to_msg,procinfo); CHKERR(0);
135 	MLOG_flop((2*A->local_nnz));
136 }
137