1 #include "BSprivate.h"
2
3 /* a data structure used only in factoring */
4 /* nodes in a list of i-nodes that need to be received/sent during */
5 /* factorization */
6 typedef struct __list_type {
7 int length; /* length of the columns in the i-node */
8 int cur_len; /* don't know anymore */
9 FLOAT *d_buffer; /* don't know anymore */
10 int *i_buffer; /* don't know anymore */
11 int buf_size; /* don't know anymore */
12 int cl_ind; /* index of the clique, if local */
13 int in_ind; /* index of the i-node if local */
14 } list_type;
15
16 #include "BStree.h"
17
18 extern void BSupdt_matrix1(BStree ,int ,int *, FLOAT *,int ,int *,FLOAT *,
19 int ,int ,BScl_2_inode *,BSinode *, BSprocinfo *);
20
21 /*+ BSfactor1 - Compute the incomplete factor of a matrix (no inode
22 version)
23
24 Input Parameters:
25 . A - The sparse matrix to be factored
26 . comm - the communication structure of the factoring
27 . procinfo - the usual processor info
28
29 Output Parameters:
30 . A - The factored sparse matrix
31
32 Returns:
33 0 if successful, otherwise a negative number whose absolute
34 value is the row number of the color (less one) where the
35 failure occured.
36
37 +*/
BSfactor1(BSpar_mat * A,BScomm * comm,BSprocinfo * procinfo)38 int BSfactor1(BSpar_mat *A, BScomm *comm, BSprocinfo *procinfo)
39 {
40 BMphase *to_phase, *from_phase;
41 BMmsg *msg;
42 int i, j, k;
43 int cl_ind, in_ind;
44 int count, size, ind;
45 int length, gnum;
46 int found;
47 int dummy = 0;
48 int *intptr, *intptr2, *i_buffer;
49 FLOAT *d_buffer;
50 BScl_2_inode *clique2inode;
51 BSnumbering *color2clique;
52 BSinode *inodes;
53 int *data_ptr, data_len, msg_len;
54 FLOAT *msg_buf;
55 FLOAT *nzptr;
56 FLOAT t;
57 int info, fact_error;
58 BStree recv_tree;
59 BStree_ptr tree_node_ptr;
60 BStree inode_tree;
61 list_type *list_data_ptr;
62 int *g_row_num, *iperm;
63
64 BMinit_comp_msg(comm->from_msg,procinfo); CHKERRN(0);
65 BMinit_comp_msg(comm->to_msg,procinfo); CHKERRN(0);
66 color2clique = A->color2clique;
67 clique2inode = A->clique2inode;
68 inodes = A->inodes->list;
69 g_row_num = A->global_row_num->numbers;
70 iperm = A->inv_perm->perm;
71
72 MY_INIT_TREE(inode_tree,(sizeof(int)*2));
73 for (i=0;i<color2clique->length-1;i++) {
74 for (cl_ind=color2clique->numbers[i];
75 cl_ind<color2clique->numbers[i+1];cl_ind++) {
76 for (in_ind=clique2inode->inode_index[cl_ind];
77 in_ind<clique2inode->inode_index[cl_ind+1];in_ind++) {
78 MY_INSERT_TREE_NODE(inode_tree,(inodes[in_ind].gcol_num),
79 found,tree_node_ptr,dummy);
80 data_ptr = (int *) MY_GET_TREE_DATA(tree_node_ptr);
81 data_ptr[0] = in_ind;
82 data_ptr[1] = cl_ind;
83 }
84 }
85 }
86
87 fact_error = FALSE;
88 /* now do this phase by phase */
89 for (i=0;i<color2clique->length-1;i++) {
90 to_phase = BMget_phase(comm->to_msg,i); CHKERRN(0);
91 from_phase = BMget_phase(comm->from_msg,i); CHKERRN(0);
92
93 /* send my part of each column in this color */
94 /* use the "from" part here */
95 msg = NULL;
96 while ((msg = BMnext_msg(from_phase,msg)) != NULL) {
97 CHKERRN(0);
98 /* allocate space for the message */
99 data_ptr = BMget_user(msg,&data_len); CHKERRN(0);
100 in_ind = data_ptr[1];
101 size = inodes[in_ind].length*sizeof(FLOAT)
102 + inodes[in_ind].length*sizeof(int);
103 MY_MALLOCN(msg_buf,(FLOAT *),size,1);
104 BMset_msg_ptr(msg,msg_buf); CHKERRN(0);
105 BMset_msg_size(msg,size); CHKERRN(0);
106 count = 0;
107 nzptr = inodes[in_ind].nz;
108 for (k=0;k<inodes[in_ind].length;k++) {
109 msg_buf[count] = nzptr[k];
110 count++;
111 }
112 intptr = (int *) &(msg_buf[count]);
113 intptr2 = (int *) inodes[in_ind].row_num;
114 for (j=0;j<inodes[in_ind].length;j++) {
115 intptr[j] = g_row_num[iperm[intptr2[j]]];
116 }
117 BMsend_block_msg(from_phase,msg,procinfo); CHKERRN(0);
118 MY_FREE(msg_buf);
119 BMset_msg_ptr(msg,NULL); CHKERRN(0);
120 }
121 CHKERRN(0);
122 BMsend_block_msg(from_phase,NULL,procinfo); CHKERRN(0);
123
124
125 /* factor the cliques in this color */
126 for (cl_ind=color2clique->numbers[i];
127 cl_ind<color2clique->numbers[i+1];cl_ind++) {
128 if (procinfo->my_id == clique2inode->proc[cl_ind]) {
129 t = *(clique2inode->d_mats[cl_ind].matrix);
130 if (t <= 0.0) {
131 fact_error = TRUE;
132 } else {
133 *(clique2inode->d_mats[cl_ind].matrix) = 1/sqrt(t);
134 }
135 MLOG_flop(1);
136 }
137 }
138
139 /* figure out what I am going to receive and make room for it */
140 /* use the "to" part here */
141 MY_INIT_TREE(recv_tree,sizeof(list_type));
142 msg = NULL;
143 while ((msg = BMnext_msg(to_phase,msg)) != NULL) {
144 CHKERRN(0);
145 /* search list for this number */
146 data_ptr = BMget_user(msg,&data_len); CHKERRN(0);
147 gnum = data_ptr[0];
148 length = data_ptr[1];
149 MY_INSERT_TREE_NODE(recv_tree,gnum,found,tree_node_ptr,dummy);
150 list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
151 if (found) {
152 list_data_ptr->length += length;
153 } else {
154 list_data_ptr->length = length;
155 list_data_ptr->cur_len = 0;
156 list_data_ptr->d_buffer = NULL;
157 list_data_ptr->i_buffer = NULL;
158 list_data_ptr->cl_ind = -1;
159 list_data_ptr->in_ind = -1;
160 }
161 }
162 CHKERRN(0);
163
164 /* add any of my parts to the list, if necessary */
165 for (cl_ind=color2clique->numbers[i];
166 cl_ind<color2clique->numbers[i+1];cl_ind++) {
167 /* only work on columns that I own */
168 if (clique2inode->proc[cl_ind] != procinfo->my_id) continue;
169 for (in_ind=clique2inode->inode_index[cl_ind];
170 in_ind<clique2inode->inode_index[cl_ind+1];in_ind++) {
171 MY_INSERT_TREE_NODE(recv_tree,(inodes[in_ind].gcol_num),
172 found,tree_node_ptr,dummy);
173 list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
174 if (found) {
175 list_data_ptr->cl_ind = cl_ind;
176 list_data_ptr->in_ind = in_ind;
177 list_data_ptr->length += inodes[in_ind].length;
178 } else {
179 list_data_ptr->length = inodes[in_ind].length;
180 list_data_ptr->cur_len = 0;
181 list_data_ptr->d_buffer = NULL;
182 list_data_ptr->i_buffer = NULL;
183 list_data_ptr->cl_ind = cl_ind;
184 list_data_ptr->in_ind = in_ind;
185 }
186 }
187 }
188 /* make sure that no one has been received out of order */
189 if (procinfo->error_check) {
190 MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
191 while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
192 list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
193 MY_NEXT_IN_TREE(tree_node_ptr);
194 if (list_data_ptr->cl_ind == -1) {
195 MY_SETERRCN(FACTOR_ERROR,"Received column out of order\n");
196 }
197 }
198 }
199
200 /* allocate space for each whole column */
201 MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
202 while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
203 list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
204 MY_NEXT_IN_TREE(tree_node_ptr);
205 list_data_ptr->buf_size =
206 (list_data_ptr->length*sizeof(FLOAT)) +
207 (list_data_ptr->length*sizeof(int));
208 MY_MALLOCN(list_data_ptr->d_buffer,(FLOAT *),
209 list_data_ptr->buf_size,2);
210 list_data_ptr->i_buffer = (int *)
211 &(list_data_ptr->d_buffer[list_data_ptr->length]);
212 }
213
214 /* put my part of the column into the collected column */
215 MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
216 while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
217 list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
218 MY_NEXT_IN_TREE(tree_node_ptr);
219 in_ind = list_data_ptr->in_ind;
220 length = inodes[in_ind].length;
221 nzptr = inodes[in_ind].nz;
222 d_buffer = list_data_ptr->d_buffer;
223 for (k=0;k<length;k++) {
224 d_buffer[k] = nzptr[k];
225 }
226 d_buffer += list_data_ptr->length;
227 i_buffer = list_data_ptr->i_buffer;
228 intptr = inodes[in_ind].row_num;
229 /* while copying, convert i_buffer from new local to new global */
230 for (j=0;j<length;j++) {
231 i_buffer[j] = g_row_num[iperm[intptr[j]]];
232 }
233 list_data_ptr->cur_len += length;
234 }
235
236 /* collect all the parts of "my" columns */
237 while ((msg = BMrecv_block_msg(to_phase,procinfo)) != NULL) {
238 CHKERRN(0);
239 BMcheck_on_async_block(from_phase); CHKERRN(0);
240 msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERRN(0);
241 BMset_msg_ptr(msg,NULL); CHKERRN(0);
242 data_ptr = BMget_user(msg,&msg_len); CHKERRN(0);
243 gnum = data_ptr[0];
244 length = data_ptr[1];
245 MY_SEARCH_TREE(recv_tree,gnum,found,tree_node_ptr);
246 if (found) {
247 list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
248 d_buffer = &(list_data_ptr->d_buffer[list_data_ptr->cur_len]);
249 for (k=0;k<length;k++) {
250 d_buffer[k] = msg_buf[k];
251 }
252 d_buffer += list_data_ptr->length;
253 i_buffer = &(list_data_ptr->i_buffer[list_data_ptr->cur_len]);
254 intptr = (int *) &(msg_buf[length]);
255 for (j=0;j<length;j++) {
256 i_buffer[j] = intptr[j];
257 }
258 list_data_ptr->cur_len += length;
259 } else {
260 MY_SETERRCN(FACTOR_ERROR,"Can't find matching column\n");
261 }
262 }
263 CHKERRN(0);
264 BMfree_block_msg(to_phase); CHKERRN(0);
265 BMfinish_on_async_block(from_phase); CHKERRN(0);
266
267 /* see if the factorizations went okay, if not, then return */
268 /* we have to wait until here to make sure all messages are received */
269 /* make anything that needs free'ing is free'ed */
270 /* everyone agrees on success/error */
271 GISUM(&fact_error,1,&info,procinfo->procset);
272 if (fact_error != 0) {
273 /* free up space and return with the negative phase number */
274 MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
275 while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
276 list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
277 MY_NEXT_IN_TREE(tree_node_ptr);
278 MY_FREE(list_data_ptr->d_buffer);
279 }
280 MY_FREE_TREE(recv_tree);
281 return(-(i+1));
282 }
283
284 /* factor these columns */
285 MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
286 while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
287 list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
288 MY_NEXT_IN_TREE(tree_node_ptr);
289 if (list_data_ptr->length <= 0) continue;
290 cl_ind = list_data_ptr->cl_ind;
291 t = *(clique2inode->d_mats[cl_ind].matrix);
292 for (j=0;j<list_data_ptr->length;j++) {
293 (list_data_ptr->d_buffer[j]) *= t;
294 }
295 MLOG_flop(list_data_ptr->length);
296 }
297
298 /* send these columns out */
299 msg = NULL;
300 while ((msg = BMnext_msg(to_phase,msg)) != NULL) {
301 CHKERRN(0);
302 data_ptr = BMget_user(msg,&data_len); CHKERRN(0);
303 gnum = data_ptr[0];
304 MY_SEARCH_TREE(recv_tree,gnum,found,tree_node_ptr);
305 if (found) {
306 list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
307 } else {
308 MY_SETERRCN(FACTOR_ERROR,"Can't find matching column\n");
309 }
310 BMset_msg_ptr(msg,list_data_ptr->d_buffer); CHKERRN(0);
311 BMset_msg_size(msg,list_data_ptr->buf_size); CHKERRN(0);
312 BMsend_block_msg(to_phase,msg,procinfo); CHKERRN(0);
313 BMset_msg_ptr(msg,NULL); CHKERRN(0);
314 }
315 CHKERRN(0);
316 BMsend_block_msg(to_phase,NULL,procinfo); CHKERRN(0);
317
318 /* retrieve my part of the column and put it in the data structure */
319 MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
320 while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
321 list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
322 MY_NEXT_IN_TREE(tree_node_ptr);
323 if (list_data_ptr->length <= 0) continue;
324 in_ind = list_data_ptr->in_ind;
325 length = inodes[in_ind].length;
326 nzptr = inodes[in_ind].nz;
327 d_buffer = list_data_ptr->d_buffer;
328 for (k=0;k<length;k++) {
329 nzptr[k] = d_buffer[k];
330 }
331 d_buffer += list_data_ptr->length;
332 }
333
334 /* now update the remaining matrix using the local columns */
335 MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
336 while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
337 list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
338 MY_NEXT_IN_TREE(tree_node_ptr);
339 if (list_data_ptr->length <= 0) continue;
340 in_ind = list_data_ptr->in_ind;
341 BSupdt_matrix1(inode_tree,list_data_ptr->length,
342 list_data_ptr->i_buffer,list_data_ptr->d_buffer,
343 inodes[in_ind].length,
344 inodes[in_ind].row_num,inodes[in_ind].nz,
345 color2clique->numbers[i+1],
346 color2clique->numbers[color2clique->length-1],clique2inode,
347 inodes,procinfo); CHKERRN(0);
348 }
349
350 /* receive the incoming columns and update the remaining matrix */
351 /* strip out the "local" part and put it in the matrix */
352 while ((msg = BMrecv_block_msg(from_phase,procinfo)) != NULL) {
353 CHKERRN(0);
354 BMcheck_on_async_block(to_phase); CHKERRN(0);
355 msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERRN(0);
356 BMset_msg_ptr(msg,NULL); CHKERRN(0);
357 data_ptr = BMget_user(msg,&msg_len); CHKERRN(0);
358 cl_ind = data_ptr[0];
359 in_ind = data_ptr[1];
360 /* let gnum be first global row location of my part */
361 gnum = g_row_num[iperm[inodes[in_ind].row_num[0]]];
362 length = inodes[in_ind].length;
363 msg_len = BMget_msg_size(msg); CHKERRN(0);
364 msg_len /= ((sizeof(FLOAT))+sizeof(int));
365
366 /* find my local part */
367 i_buffer = (int *) &(msg_buf[msg_len]);
368 for (ind=0;ind<msg_len;ind++) {
369 if (gnum == i_buffer[ind]) break;
370 }
371 nzptr = inodes[in_ind].nz;
372 d_buffer = &(msg_buf[ind]);
373 for (k=0;k<length;k++) {
374 nzptr[k] = d_buffer[k];
375 }
376 d_buffer += msg_len;
377
378 /* update the remaining matrix */
379 BSupdt_matrix1(inode_tree,msg_len,i_buffer,msg_buf,length,
380 inodes[in_ind].row_num,nzptr,color2clique->numbers[i+1],
381 color2clique->numbers[color2clique->length-1],clique2inode,
382 inodes,procinfo); CHKERRN(0);
383 }
384 CHKERRN(0);
385 BMfree_block_msg(from_phase); CHKERRN(0);
386 BMfinish_on_async_block(to_phase); CHKERRN(0);
387
388 /* now free up the list */
389 MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
390 while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
391 list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
392 MY_NEXT_IN_TREE(tree_node_ptr);
393 MY_FREE(list_data_ptr->d_buffer);
394 }
395 MY_FREE_TREE(recv_tree);
396 }
397
398 MY_FREE_TREE(inode_tree);
399 BMfinish_comp_msg(comm->from_msg,procinfo); CHKERRN(0);
400 BMfinish_comp_msg(comm->to_msg,procinfo); CHKERRN(0);
401 return(0);
402 }
403
404 /*+ BSupdt_matrix1 - Update the matrix using a bunch of i-nodes
405
406 Input Parameters:
407 I don't really know what these things do now.
408
409 Output Parameters:
410 I don't really know what these things do now.
411
412 Returns:
413 void
414
415 Notes:
416 This code is very complicated and could be done much better.
417
418 +*/
BSupdt_matrix1(BStree inode_tree,int len,int * updt_index,FLOAT * updt_inode,int len2,int * my_inode_ind,FLOAT * my_inode,int start,int end,BScl_2_inode * clique2inode,BSinode * inodes,BSprocinfo * procinfo)419 void BSupdt_matrix1(BStree inode_tree,int len,int *updt_index,
420 FLOAT *updt_inode,int len2,int *my_inode_ind,FLOAT *my_inode,
421 int start,int end,BScl_2_inode *clique2inode,BSinode *inodes,
422 BSprocinfo *procinfo)
423 {
424 int i, j;
425 int updt_count, offset, toffset;
426 int cl_ind, in_ind;
427 BSpermutation *iperm;
428 FLOAT *work, *matrix;
429 FLOAT work2;
430 int *intptr;
431 FLOAT *nzptr;
432 int tcount, count2, gnum, size;
433 int found;
434 BStree_ptr tree_node_ptr;
435 int *tree_data_ptr;
436
437 /* sort the updt_inode */
438 iperm = BSalloc_permutation(len); CHKERR(0);
439 for (i=0;i<len;i++) iperm->perm[i] = i;
440 BSheap_sort1(len,updt_index,iperm->perm); CHKERR(0);
441 MY_MALLOC(work,(FLOAT *),sizeof(FLOAT)*len,1);
442 BSiperm_dvec(updt_inode,work,iperm); CHKERR(0);
443 for (j=0;j<len;j++) {
444 updt_inode[j] = work[j];
445 }
446 MY_FREE(work);
447 BSfree_permutation(iperm); CHKERR(0);
448
449 for (updt_count=0;updt_count<len;updt_count++) {
450 MY_SEARCH_TREE(inode_tree,updt_index[updt_count],found,tree_node_ptr);
451 if (found) {
452 tree_data_ptr = (int *) MY_GET_TREE_DATA(tree_node_ptr);
453 in_ind = tree_data_ptr[0];
454 cl_ind = tree_data_ptr[1];
455 /********** update the inode */
456 tcount = 0;
457 count2 = 0;
458 intptr = inodes[in_ind].row_num;
459 while ((tcount<len2) && (count2<inodes[in_ind].length)) {
460 if (my_inode_ind[tcount] == intptr[count2]) {
461 work2 = my_inode[tcount];
462 nzptr = &(inodes[in_ind].nz[count2]);
463 (*nzptr) -= work2*updt_inode[updt_count];
464 MLOG_flop(2);
465 tcount++;
466 count2++;
467 } else if (my_inode_ind[tcount] < intptr[count2]) {
468 tcount++;
469 } else {
470 count2++;
471 }
472 }
473 /* update a protion of the clique if it is local */
474 if (clique2inode->proc[cl_ind] == procinfo->my_id) {
475 /***** update the clique (stored in UPPER triangle) */
476 gnum = clique2inode->g_offset[cl_ind];
477 offset = updt_index[updt_count]-gnum;
478 matrix = clique2inode->d_mats[cl_ind].matrix;
479 size = clique2inode->d_mats[cl_ind].size;
480 tcount = updt_count;
481 work2 = updt_inode[tcount];
482 while (tcount < len) {
483 toffset = updt_index[tcount]-gnum;
484 if (toffset < size) {
485 matrix[(toffset*size)+offset] -= work2*
486 updt_inode[tcount];
487 MLOG_flop(2);
488 } else {
489 break;
490 }
491 tcount++;
492 }
493 }
494 /* end of clique update */
495 }
496 }
497 }
498