1 #include "BSprivate.h"
2 
3 /* a data structure used only in factoring */
4 /* nodes in a list of i-nodes that need to be received/sent during */
5 /* factorization */
6 typedef struct __list_type {
7 	int	length;					/* length of the columns in the i-node */
8 	int	cur_len;				/* don't know anymore */
9 	FLOAT	*d_buffer;			/* don't know anymore */
10 	int	*i_buffer;				/* don't know anymore */
11 	int	buf_size;				/* don't know anymore */
12 	int	cl_ind; 				/* index of the clique, if local */
13 	int	in_ind; 				/* index of the i-node if local */
14 } list_type;
15 
16 #include "BStree.h"
17 
18 extern void BSupdt_matrix1(BStree ,int ,int *, FLOAT *,int ,int *,FLOAT *,
19             int ,int ,BScl_2_inode *,BSinode *, BSprocinfo *);
20 
21 /*+ BSfactor1 - Compute the incomplete factor of a matrix (no inode
22 version)
23 
24     Input Parameters:
25 .   A - The sparse matrix to be factored
26 .   comm - the communication structure of the factoring
27 .   procinfo - the usual processor info
28 
29     Output Parameters:
30 .   A - The factored sparse matrix
31 
32     Returns:
33     0 if successful, otherwise a negative number whose absolute
34     value is the row number of the color (less one) where the
35     failure occured.
36 
37  +*/
BSfactor1(BSpar_mat * A,BScomm * comm,BSprocinfo * procinfo)38 int BSfactor1(BSpar_mat *A, BScomm *comm, BSprocinfo *procinfo)
39 {
40 	BMphase *to_phase, *from_phase;
41 	BMmsg *msg;
42 	int	i, j, k;
43 	int	cl_ind, in_ind;
44 	int	count, size, ind;
45 	int	length, gnum;
46 	int	found;
47 	int	dummy = 0;
48 	int	*intptr, *intptr2, *i_buffer;
49 	FLOAT *d_buffer;
50 	BScl_2_inode *clique2inode;
51 	BSnumbering *color2clique;
52 	BSinode *inodes;
53 	int	*data_ptr, data_len, msg_len;
54 	FLOAT *msg_buf;
55 	FLOAT *nzptr;
56 	FLOAT t;
57 	int	info, fact_error;
58 	BStree	recv_tree;
59 	BStree_ptr tree_node_ptr;
60 	BStree	inode_tree;
61 	list_type	*list_data_ptr;
62 	int	*g_row_num, *iperm;
63 
64 	BMinit_comp_msg(comm->from_msg,procinfo); CHKERRN(0);
65 	BMinit_comp_msg(comm->to_msg,procinfo); CHKERRN(0);
66 	color2clique = A->color2clique;
67 	clique2inode = A->clique2inode;
68 	inodes = A->inodes->list;
69 	g_row_num = A->global_row_num->numbers;
70 	iperm = A->inv_perm->perm;
71 
72 	MY_INIT_TREE(inode_tree,(sizeof(int)*2));
73 	for (i=0;i<color2clique->length-1;i++) {
74 		for (cl_ind=color2clique->numbers[i];
75 			cl_ind<color2clique->numbers[i+1];cl_ind++) {
76 			for (in_ind=clique2inode->inode_index[cl_ind];
77 				in_ind<clique2inode->inode_index[cl_ind+1];in_ind++) {
78 				MY_INSERT_TREE_NODE(inode_tree,(inodes[in_ind].gcol_num),
79 					found,tree_node_ptr,dummy);
80 				data_ptr = (int *) MY_GET_TREE_DATA(tree_node_ptr);
81 				data_ptr[0] = in_ind;
82 				data_ptr[1] = cl_ind;
83 			}
84 		}
85 	}
86 
87 	fact_error = FALSE;
88 	/* now do this phase by phase */
89 	for (i=0;i<color2clique->length-1;i++) {
90 		to_phase = BMget_phase(comm->to_msg,i); CHKERRN(0);
91 		from_phase = BMget_phase(comm->from_msg,i); CHKERRN(0);
92 
93 		/* send my part of each column in this color */
94 		/* use the "from" part here */
95 		msg = NULL;
96 		while ((msg = BMnext_msg(from_phase,msg)) != NULL) {
97 			CHKERRN(0);
98 			/* allocate space for the message */
99 			data_ptr = BMget_user(msg,&data_len); CHKERRN(0);
100 			in_ind = data_ptr[1];
101 			size = inodes[in_ind].length*sizeof(FLOAT)
102 				+ inodes[in_ind].length*sizeof(int);
103 			MY_MALLOCN(msg_buf,(FLOAT *),size,1);
104 			BMset_msg_ptr(msg,msg_buf); CHKERRN(0);
105 			BMset_msg_size(msg,size); CHKERRN(0);
106 			count = 0;
107 			nzptr = inodes[in_ind].nz;
108 			for (k=0;k<inodes[in_ind].length;k++) {
109 				msg_buf[count] = nzptr[k];
110 				count++;
111 			}
112 			intptr = (int *) &(msg_buf[count]);
113 			intptr2 = (int *) inodes[in_ind].row_num;
114 			for (j=0;j<inodes[in_ind].length;j++) {
115 				intptr[j] = g_row_num[iperm[intptr2[j]]];
116 			}
117 			BMsend_block_msg(from_phase,msg,procinfo); CHKERRN(0);
118 			MY_FREE(msg_buf);
119 			BMset_msg_ptr(msg,NULL); CHKERRN(0);
120 		}
121 		CHKERRN(0);
122 		BMsend_block_msg(from_phase,NULL,procinfo); CHKERRN(0);
123 
124 
125 		/* factor the cliques in this color */
126 		for (cl_ind=color2clique->numbers[i];
127 			cl_ind<color2clique->numbers[i+1];cl_ind++) {
128 			if (procinfo->my_id == clique2inode->proc[cl_ind]) {
129 				t = *(clique2inode->d_mats[cl_ind].matrix);
130 				if (t <= 0.0) {
131 					fact_error = TRUE;
132 				} else {
133 				*(clique2inode->d_mats[cl_ind].matrix) = 1/sqrt(t);
134 				}
135 				MLOG_flop(1);
136 			}
137 		}
138 
139 		/* figure out what I am going to receive and make room for it */
140 		/* use the "to" part here */
141 		MY_INIT_TREE(recv_tree,sizeof(list_type));
142 		msg = NULL;
143 		while ((msg = BMnext_msg(to_phase,msg)) != NULL) {
144 			CHKERRN(0);
145 			/* search list for this number */
146 			data_ptr = BMget_user(msg,&data_len); CHKERRN(0);
147 			gnum = data_ptr[0];
148 			length = data_ptr[1];
149 			MY_INSERT_TREE_NODE(recv_tree,gnum,found,tree_node_ptr,dummy);
150 			list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
151 			if (found) {
152 				list_data_ptr->length += length;
153 			} else {
154 				list_data_ptr->length = length;
155 				list_data_ptr->cur_len = 0;
156 				list_data_ptr->d_buffer = NULL;
157 				list_data_ptr->i_buffer = NULL;
158 				list_data_ptr->cl_ind = -1;
159 				list_data_ptr->in_ind = -1;
160 			}
161 		}
162 		CHKERRN(0);
163 
164 		/* add any of my parts to the list, if necessary */
165 		for (cl_ind=color2clique->numbers[i];
166 			cl_ind<color2clique->numbers[i+1];cl_ind++) {
167 			/* only work on columns that I own */
168 			if (clique2inode->proc[cl_ind] != procinfo->my_id) continue;
169 			for (in_ind=clique2inode->inode_index[cl_ind];
170 				in_ind<clique2inode->inode_index[cl_ind+1];in_ind++) {
171 				MY_INSERT_TREE_NODE(recv_tree,(inodes[in_ind].gcol_num),
172 					found,tree_node_ptr,dummy);
173 				list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
174 				if (found) {
175 					list_data_ptr->cl_ind = cl_ind;
176 					list_data_ptr->in_ind = in_ind;
177 					list_data_ptr->length += inodes[in_ind].length;
178 				} else {
179 					list_data_ptr->length = inodes[in_ind].length;
180 					list_data_ptr->cur_len = 0;
181 					list_data_ptr->d_buffer = NULL;
182 					list_data_ptr->i_buffer = NULL;
183 					list_data_ptr->cl_ind = cl_ind;
184 					list_data_ptr->in_ind = in_ind;
185 				}
186 			}
187 		}
188 		/* make sure that no one has been received out of order */
189 		if (procinfo->error_check) {
190 			MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
191 			while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
192 				list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
193 				MY_NEXT_IN_TREE(tree_node_ptr);
194 				if (list_data_ptr->cl_ind == -1) {
195 					MY_SETERRCN(FACTOR_ERROR,"Received column out of order\n");
196 				}
197 			}
198 		}
199 
200 		/* allocate space for each whole column */
201 		MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
202 		while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
203 			list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
204 			MY_NEXT_IN_TREE(tree_node_ptr);
205 			list_data_ptr->buf_size =
206 				(list_data_ptr->length*sizeof(FLOAT)) +
207 				(list_data_ptr->length*sizeof(int));
208 			MY_MALLOCN(list_data_ptr->d_buffer,(FLOAT *),
209 				list_data_ptr->buf_size,2);
210 			list_data_ptr->i_buffer = (int *)
211 				&(list_data_ptr->d_buffer[list_data_ptr->length]);
212 		}
213 
214 		/* put my part of the column into the collected column */
215 		MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
216 		while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
217 			list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
218 			MY_NEXT_IN_TREE(tree_node_ptr);
219 			in_ind = list_data_ptr->in_ind;
220 			length = inodes[in_ind].length;
221 			nzptr = inodes[in_ind].nz;
222 			d_buffer = list_data_ptr->d_buffer;
223 			for (k=0;k<length;k++) {
224 				d_buffer[k] = nzptr[k];
225 			}
226 			d_buffer += list_data_ptr->length;
227 			i_buffer = list_data_ptr->i_buffer;
228 			intptr = inodes[in_ind].row_num;
229 			/* while copying, convert i_buffer from new local to new global */
230 			for (j=0;j<length;j++) {
231 				i_buffer[j] = g_row_num[iperm[intptr[j]]];
232 			}
233 			list_data_ptr->cur_len += length;
234 		}
235 
236 		/* collect all the parts of "my" columns */
237 		while ((msg = BMrecv_block_msg(to_phase,procinfo)) != NULL) {
238 			CHKERRN(0);
239 			BMcheck_on_async_block(from_phase); CHKERRN(0);
240 			msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERRN(0);
241 			BMset_msg_ptr(msg,NULL); CHKERRN(0);
242 			data_ptr = BMget_user(msg,&msg_len); CHKERRN(0);
243 			gnum = data_ptr[0];
244 			length = data_ptr[1];
245 			MY_SEARCH_TREE(recv_tree,gnum,found,tree_node_ptr);
246 			if (found) {
247 				list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
248 				d_buffer = &(list_data_ptr->d_buffer[list_data_ptr->cur_len]);
249 				for (k=0;k<length;k++) {
250 					d_buffer[k] = msg_buf[k];
251 				}
252 				d_buffer += list_data_ptr->length;
253 				i_buffer = &(list_data_ptr->i_buffer[list_data_ptr->cur_len]);
254 				intptr = (int *) &(msg_buf[length]);
255 				for (j=0;j<length;j++) {
256 					i_buffer[j] = intptr[j];
257 				}
258 				list_data_ptr->cur_len += length;
259 			} else {
260 				MY_SETERRCN(FACTOR_ERROR,"Can't find matching column\n");
261 			}
262 		}
263 		CHKERRN(0);
264 		BMfree_block_msg(to_phase); CHKERRN(0);
265 		BMfinish_on_async_block(from_phase); CHKERRN(0);
266 
267 		/* see if the factorizations went okay, if not, then return */
268 		/* we have to wait until here to make sure all messages are received */
269 		/* make anything that needs free'ing is free'ed */
270 		/* everyone agrees on success/error */
271 		GISUM(&fact_error,1,&info,procinfo->procset);
272 		if (fact_error != 0) {
273 			/* free up space and return with the negative phase number */
274 			MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
275 			while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
276 				list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
277 				MY_NEXT_IN_TREE(tree_node_ptr);
278 				MY_FREE(list_data_ptr->d_buffer);
279 			}
280 			MY_FREE_TREE(recv_tree);
281 			return(-(i+1));
282 		}
283 
284 		/* factor these columns */
285 		MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
286 		while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
287 			list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
288 			MY_NEXT_IN_TREE(tree_node_ptr);
289 			if (list_data_ptr->length <= 0) continue;
290 			cl_ind = list_data_ptr->cl_ind;
291 			t = *(clique2inode->d_mats[cl_ind].matrix);
292 			for (j=0;j<list_data_ptr->length;j++) {
293 				(list_data_ptr->d_buffer[j]) *= t;
294 			}
295 			MLOG_flop(list_data_ptr->length);
296 		}
297 
298 		/* send these columns out */
299 		msg = NULL;
300 		while ((msg = BMnext_msg(to_phase,msg)) != NULL) {
301 			CHKERRN(0);
302 			data_ptr = BMget_user(msg,&data_len); CHKERRN(0);
303 			gnum = data_ptr[0];
304 			MY_SEARCH_TREE(recv_tree,gnum,found,tree_node_ptr);
305 			if (found) {
306 				list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
307 			} else {
308 				MY_SETERRCN(FACTOR_ERROR,"Can't find matching column\n");
309 			}
310 			BMset_msg_ptr(msg,list_data_ptr->d_buffer); CHKERRN(0);
311 			BMset_msg_size(msg,list_data_ptr->buf_size); CHKERRN(0);
312 			BMsend_block_msg(to_phase,msg,procinfo); CHKERRN(0);
313 			BMset_msg_ptr(msg,NULL); CHKERRN(0);
314 		}
315 		CHKERRN(0);
316 		BMsend_block_msg(to_phase,NULL,procinfo); CHKERRN(0);
317 
318 		/* retrieve my part of the column and put it in the data structure */
319 		MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
320 		while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
321 			list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
322 			MY_NEXT_IN_TREE(tree_node_ptr);
323 			if (list_data_ptr->length <= 0) continue;
324 			in_ind = list_data_ptr->in_ind;
325 			length = inodes[in_ind].length;
326 			nzptr = inodes[in_ind].nz;
327 			d_buffer = list_data_ptr->d_buffer;
328 			for (k=0;k<length;k++) {
329 				nzptr[k] = d_buffer[k];
330 			}
331 			d_buffer += list_data_ptr->length;
332 		}
333 
334 		/* now update the remaining matrix using the local columns */
335 		MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
336 		while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
337 			list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
338 			MY_NEXT_IN_TREE(tree_node_ptr);
339 			if (list_data_ptr->length <= 0) continue;
340 			in_ind = list_data_ptr->in_ind;
341 			BSupdt_matrix1(inode_tree,list_data_ptr->length,
342 				list_data_ptr->i_buffer,list_data_ptr->d_buffer,
343 				inodes[in_ind].length,
344 				inodes[in_ind].row_num,inodes[in_ind].nz,
345 				color2clique->numbers[i+1],
346 				color2clique->numbers[color2clique->length-1],clique2inode,
347 				inodes,procinfo); CHKERRN(0);
348 		}
349 
350 		/* receive the incoming columns and update the remaining matrix */
351 		/* strip out the "local" part and put it in the matrix */
352 		while ((msg = BMrecv_block_msg(from_phase,procinfo)) != NULL) {
353 			CHKERRN(0);
354 			BMcheck_on_async_block(to_phase); CHKERRN(0);
355 			msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERRN(0);
356 			BMset_msg_ptr(msg,NULL); CHKERRN(0);
357 			data_ptr = BMget_user(msg,&msg_len); CHKERRN(0);
358 			cl_ind = data_ptr[0];
359 			in_ind = data_ptr[1];
360 			/* let gnum be first global row location of my part */
361 			gnum = g_row_num[iperm[inodes[in_ind].row_num[0]]];
362 			length = inodes[in_ind].length;
363 			msg_len = BMget_msg_size(msg); CHKERRN(0);
364 			msg_len /= ((sizeof(FLOAT))+sizeof(int));
365 
366 			/* find my local part */
367 			i_buffer = (int *) &(msg_buf[msg_len]);
368 			for (ind=0;ind<msg_len;ind++) {
369 				if (gnum == i_buffer[ind]) break;
370 			}
371 			nzptr = inodes[in_ind].nz;
372 			d_buffer = &(msg_buf[ind]);
373 			for (k=0;k<length;k++) {
374 				nzptr[k] = d_buffer[k];
375 			}
376 			d_buffer += msg_len;
377 
378 			/* update the remaining matrix */
379 			BSupdt_matrix1(inode_tree,msg_len,i_buffer,msg_buf,length,
380 				inodes[in_ind].row_num,nzptr,color2clique->numbers[i+1],
381 				color2clique->numbers[color2clique->length-1],clique2inode,
382 				inodes,procinfo); CHKERRN(0);
383 		}
384 		CHKERRN(0);
385 		BMfree_block_msg(from_phase); CHKERRN(0);
386 		BMfinish_on_async_block(to_phase); CHKERRN(0);
387 
388 		/* now free up the list */
389 		MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
390 		while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
391 			list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
392 			MY_NEXT_IN_TREE(tree_node_ptr);
393 			MY_FREE(list_data_ptr->d_buffer);
394 		}
395 		MY_FREE_TREE(recv_tree);
396 	}
397 
398 	MY_FREE_TREE(inode_tree);
399 	BMfinish_comp_msg(comm->from_msg,procinfo); CHKERRN(0);
400 	BMfinish_comp_msg(comm->to_msg,procinfo); CHKERRN(0);
401 	return(0);
402 }
403 
404 /*+ BSupdt_matrix1 - Update the matrix using a bunch of i-nodes
405 
406     Input Parameters:
407     I don't really know what these things do now.
408 
409     Output Parameters:
410     I don't really know what these things do now.
411 
412     Returns:
413     void
414 
415     Notes:
416     This code is very complicated and could be done much better.
417 
418 +*/
BSupdt_matrix1(BStree inode_tree,int len,int * updt_index,FLOAT * updt_inode,int len2,int * my_inode_ind,FLOAT * my_inode,int start,int end,BScl_2_inode * clique2inode,BSinode * inodes,BSprocinfo * procinfo)419 void BSupdt_matrix1(BStree inode_tree,int len,int *updt_index,
420 FLOAT *updt_inode,int len2,int *my_inode_ind,FLOAT *my_inode,
421 int start,int end,BScl_2_inode *clique2inode,BSinode *inodes,
422 BSprocinfo *procinfo)
423 {
424 	int	i, j;
425 	int	updt_count, offset, toffset;
426 	int	cl_ind, in_ind;
427 	BSpermutation *iperm;
428 	FLOAT	*work, *matrix;
429 	FLOAT	work2;
430 	int	*intptr;
431 	FLOAT	*nzptr;
432 	int	tcount, count2, gnum, size;
433 	int	found;
434 	BStree_ptr	tree_node_ptr;
435 	int	*tree_data_ptr;
436 
437 	/* sort the updt_inode */
438 	iperm = BSalloc_permutation(len); CHKERR(0);
439 	for (i=0;i<len;i++) iperm->perm[i] = i;
440 	BSheap_sort1(len,updt_index,iperm->perm); CHKERR(0);
441 	MY_MALLOC(work,(FLOAT *),sizeof(FLOAT)*len,1);
442 	BSiperm_dvec(updt_inode,work,iperm); CHKERR(0);
443 	for (j=0;j<len;j++) {
444 		updt_inode[j] = work[j];
445 	}
446 	MY_FREE(work);
447 	BSfree_permutation(iperm); CHKERR(0);
448 
449 	for (updt_count=0;updt_count<len;updt_count++) {
450 		MY_SEARCH_TREE(inode_tree,updt_index[updt_count],found,tree_node_ptr);
451 		if (found) {
452 			tree_data_ptr = (int *) MY_GET_TREE_DATA(tree_node_ptr);
453 			in_ind = tree_data_ptr[0];
454 			cl_ind = tree_data_ptr[1];
455 			/********** update the inode */
456 			tcount = 0;
457 			count2 = 0;
458 			intptr = inodes[in_ind].row_num;
459 			while ((tcount<len2) && (count2<inodes[in_ind].length)) {
460 				if (my_inode_ind[tcount] == intptr[count2]) {
461 					work2 = my_inode[tcount];
462 					nzptr = &(inodes[in_ind].nz[count2]);
463 					(*nzptr) -= work2*updt_inode[updt_count];
464 					MLOG_flop(2);
465 					tcount++;
466 					count2++;
467 				} else if (my_inode_ind[tcount] < intptr[count2]) {
468 					tcount++;
469 				} else {
470 					count2++;
471 				}
472 			}
473 			/* update a protion of the clique if it is local */
474 			if (clique2inode->proc[cl_ind] == procinfo->my_id) {
475 				/***** update the clique (stored in UPPER triangle) */
476 				gnum = clique2inode->g_offset[cl_ind];
477 				offset = updt_index[updt_count]-gnum;
478 				matrix = clique2inode->d_mats[cl_ind].matrix;
479 				size = clique2inode->d_mats[cl_ind].size;
480 				tcount = updt_count;
481 				work2 = updt_inode[tcount];
482 				while (tcount < len) {
483 					toffset = updt_index[tcount]-gnum;
484 					if (toffset < size) {
485 						matrix[(toffset*size)+offset] -= work2*
486 							updt_inode[tcount];
487 						MLOG_flop(2);
488 					} else {
489 						break;
490 					}
491 					tcount++;
492 				}
493 			}
494 			/* end of clique update */
495 		}
496 	}
497 }
498