1 #include "BSprivate.h"
2
3 /* a data structure used only in factoring */
4 /* nodes in a list of i-nodes that need to be received/sent during */
5 /* factorization */
6 typedef struct __list_type {
7 int length; /* length of the columns in the i-node */
8 int num_cols; /* # of columns in the i-node */
9 int cur_len; /* don't know anymore */
10 FLOAT *d_buffer; /* don't know anymore */
11 int *i_buffer; /* don't know anymore */
12 int buf_size; /* don't know anymore */
13 int cl_ind; /* index of the clique, if local */
14 int in_ind; /* index of the i-node if local */
15 } list_type;
16
17 #include "BStree.h"
18
19 extern void BSilu_updt_matrix(BSpar_mat *, int, BStree, BStree, int, int, int *,
20 FLOAT *, int , int *, FLOAT *, int , int , BScl_2_inode *, BSinode *,
21 BSprocinfo *);
22
23 /*+ BSilu_factorn - Compute the incomplete LU factor of a matrix
24
25 Input Parameters:
26 . A - The sparse matrix to be factored
27 . comm - the communication structure of the factoring
28 . procinfo - the usual processor info
29
30 Output Parameters:
31 . A - The factored sparse matrix
32
33 Returns:
34 0 if successful, otherwise a negative number whose absolute
35 value is the row number of the color (less one) where the
36 failure occured.
37
38 +*/
39
40 typedef struct __col_list {
41 int count;
42 int *col;
43 FLOAT **nz;
44 } col_list;
45
BSilu_factorn(BSpar_mat * A,BScomm * comm,BSprocinfo * procinfo)46 int BSilu_factorn(BSpar_mat *A, BScomm *comm, BSprocinfo *procinfo)
47 {
48 BMphase *to_phase, *from_phase;
49 BMmsg *msg;
50 int i, j, k;
51 int cl_ind, in_ind;
52 int count, size, ind;
53 int length, num_cols, gnum, offset;
54 int found;
55 int *intptr, *intptr2, *i_buffer;
56 FLOAT *d_buffer;
57 BScl_2_inode *clique2inode;
58 BSnumbering *color2clique;
59 BSinode *inodes;
60 int *data_ptr, data_len, msg_len;
61 FLOAT *msg_buf, *matrix;
62 FLOAT *nzptr;
63 char TR = 'N';
64 FLOAT one = 1.0;
65 int info, fact_error;
66 list_type *list_data_ptr;
67 BStree_ptr tree_node_ptr;
68 BStree recv_tree;
69 BStree inode_tree;
70 int dummy = 0;
71 int *g_row_num, *iperm;
72
73 int l, max_size, dd_size, *ipiv, *ii_buffer;
74 FLOAT zero=0.0, *work, *dd_buffer, *temp;
75 BStree rowtree;
76 BStree_ptr node_ptr;
77 col_list *col_list_ptr;
78
79
80 BMinit_comp_msg(comm->from_msg,procinfo); CHKERRN(0);
81 BMinit_comp_msg(comm->to_msg,procinfo); CHKERRN(0);
82 color2clique = A->color2clique;
83 clique2inode = A->clique2inode;
84 inodes = A->inodes->list;
85 g_row_num = A->global_row_num->numbers;
86 iperm = A->inv_perm->perm;
87
88 /* max_size is added in the following part (ILU) */
89 max_size = -1;
90 MY_INIT_TREE(inode_tree,(sizeof(int)*2));
91 for (i=0;i<color2clique->length-1;i++) {
92 for (cl_ind=color2clique->numbers[i];
93 cl_ind<color2clique->numbers[i+1];cl_ind++) {
94 if (clique2inode->d_mats[cl_ind].size > max_size)
95 max_size = clique2inode->d_mats[cl_ind].size;
96 for (in_ind=clique2inode->inode_index[cl_ind];
97 in_ind<clique2inode->inode_index[cl_ind+1];in_ind++) {
98 MY_INSERT_TREE_NODE(inode_tree,(inodes[in_ind].gcol_num),
99 found,tree_node_ptr,dummy);
100 data_ptr = (int *) MY_GET_TREE_DATA(tree_node_ptr);
101 data_ptr[0] = in_ind;
102 data_ptr[1] = cl_ind;
103 }
104 }
105 }
106 MY_MALLOCN(work,(FLOAT *),max_size*sizeof(FLOAT),1);
107
108 /* Create rowtree just like factor1 case. But take care of */
109 /* the inode thing (ILU) */
110 MY_INIT_TREE(rowtree,sizeof(col_list));
111 for (j=0; j<A->inodes->length; j++) {
112 for (k=0; k<inodes[j].num_cols; k++) {
113 for (i=0; i<inodes[j].length; i++) {
114 MY_INSERT_TREE_NODE(rowtree,g_row_num[iperm[inodes[j].row_num[i]]],
115 found,node_ptr,dummy);
116 col_list_ptr = (col_list *) MY_GET_TREE_DATA(node_ptr);
117 if (found) {
118 col_list_ptr->count++;
119 }
120 else {
121 col_list_ptr->count = 1;
122 /* Recall that A->max_local_row_length is used to store max_row_len */
123 MY_MALLOCN(col_list_ptr->col,(int *),sizeof(int)*
124 A->max_local_row_length,1);
125 MY_MALLOCN(col_list_ptr->nz,(FLOAT **),sizeof(FLOAT *)*
126 A->max_local_row_length,1);
127 }
128 col_list_ptr->col[col_list_ptr->count-1] = inodes[j].gcol_num+k;
129 col_list_ptr->nz[col_list_ptr->count-1] =
130 &(inodes[j].nz[k*inodes[j].length+i]);
131 }
132 }
133 }
134
135 /*
136 MY_FIRST_IN_TREE(rowtree,node_ptr);
137 while (! IS_TREE_PTR_NULL(node_ptr)) {
138 col_list_ptr = (col_list *) MY_GET_TREE_DATA(node_ptr);
139 MY_NEXT_IN_TREE(node_ptr);
140 printf("[%d]:rowtree list: ",procinfo->my_id);
141 for (i=0; i<col_list_ptr->count; i++)
142 printf(" %2d",col_list_ptr->col[i]);
143 printf("\n");
144 }
145 */
146
147 fact_error = FALSE;
148 /* now do this phase by phase */
149 for (i=0;i<color2clique->length-1;i++) {
150 to_phase = BMget_phase(comm->to_msg,i); CHKERRN(0);
151 from_phase = BMget_phase(comm->from_msg,i); CHKERRN(0);
152
153 /* send my part of each column in this color */
154 /* use the "from" part here */
155 msg = NULL;
156 while ((msg = BMnext_msg(from_phase,msg)) != NULL) {
157 CHKERRN(0);
158 /* allocate space for the message */
159 data_ptr = BMget_user(msg,&data_len); CHKERRN(0);
160 in_ind = data_ptr[1];
161 size = inodes[in_ind].length*inodes[in_ind].num_cols*sizeof(FLOAT)
162 + inodes[in_ind].length*sizeof(int);
163 MY_MALLOCN(msg_buf,(FLOAT *),size,1);
164 BMset_msg_ptr(msg,msg_buf); CHKERRN(0);
165 BMset_msg_size(msg,size); CHKERRN(0);
166 count = 0;
167 for (j=0;j<inodes[in_ind].num_cols;j++) {
168 nzptr = &(inodes[in_ind].nz[j*inodes[in_ind].length]);
169 for (k=0;k<inodes[in_ind].length;k++) {
170 msg_buf[count] = nzptr[k];
171 count++;
172 }
173 }
174 intptr = (int *) &(msg_buf[count]);
175 intptr2 = (int *) inodes[in_ind].row_num;
176 for (j=0;j<inodes[in_ind].length;j++) {
177 intptr[j] = g_row_num[iperm[intptr2[j]]];
178 }
179 BMsend_block_msg(from_phase,msg,procinfo); CHKERRN(0);
180 MY_FREE(msg_buf);
181 BMset_msg_ptr(msg,NULL); CHKERRN(0);
182 }
183 CHKERRN(0);
184 BMsend_block_msg(from_phase,NULL,procinfo); CHKERRN(0);
185
186 /* factor the cliques in this color */
187 /* the following is modified (ILU) */
188 for (cl_ind=color2clique->numbers[i];
189 cl_ind<color2clique->numbers[i+1];cl_ind++) {
190 if (procinfo->my_id == clique2inode->proc[cl_ind]) {
191 /* factor this clique */
192 /* stored in the upper diagonal */
193 size = clique2inode->d_mats[cl_ind].size;
194 matrix = clique2inode->d_mats[cl_ind].matrix;
195 ipiv = (int *) &(matrix[size*size]);
196 DGETRF(&size,&size,matrix,&size,ipiv,&info);
197 MLOG_flop((size*size*size)/6);
198 if (info != 0) fact_error = TRUE;
199 /* now invert the clique */
200 DGETRI(&size,matrix,&size,ipiv,work,&size,&info);
201 MLOG_flop((size*size*size)/6);
202 if (info != 0) fact_error = TRUE;
203 }
204 }
205
206 /* figure out what I am going to receive and make room for it */
207 /* use the "to" part here */
208 MY_INIT_TREE(recv_tree,sizeof(list_type));
209 msg = NULL;
210 while ((msg = BMnext_msg(to_phase,msg)) != NULL) {
211 CHKERRN(0);
212 /* search list for this number */
213 data_ptr = BMget_user(msg,&data_len); CHKERRN(0);
214 gnum = data_ptr[0];
215 length = data_ptr[1];
216 num_cols = data_ptr[2];
217 MY_INSERT_TREE_NODE(recv_tree,gnum,found,tree_node_ptr,dummy);
218 list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
219 if (found) {
220 list_data_ptr->length += length;
221 } else {
222 list_data_ptr->length = length;
223 list_data_ptr->num_cols = num_cols;
224 list_data_ptr->cur_len = 0;
225 list_data_ptr->d_buffer = NULL;
226 list_data_ptr->i_buffer = NULL;
227 list_data_ptr->cl_ind = -1;
228 list_data_ptr->in_ind = -1;
229 }
230 }
231 CHKERRN(0);
232 /* add any of my parts to the list, if necessary */
233 for (cl_ind=color2clique->numbers[i];
234 cl_ind<color2clique->numbers[i+1];cl_ind++) {
235 /* only work on columns that I own */
236 if (clique2inode->proc[cl_ind] != procinfo->my_id) continue;
237 for (in_ind=clique2inode->inode_index[cl_ind];
238 in_ind<clique2inode->inode_index[cl_ind+1];in_ind++) {
239 MY_INSERT_TREE_NODE(recv_tree,(inodes[in_ind].gcol_num),
240 found,tree_node_ptr,dummy);
241 list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
242 if (found) {
243 list_data_ptr->cl_ind = cl_ind;
244 list_data_ptr->in_ind = in_ind;
245 list_data_ptr->length += inodes[in_ind].length;
246 } else {
247 list_data_ptr->length = inodes[in_ind].length;
248 list_data_ptr->num_cols = inodes[in_ind].num_cols;
249 list_data_ptr->cur_len = 0;
250 list_data_ptr->d_buffer = NULL;
251 list_data_ptr->i_buffer = NULL;
252 list_data_ptr->cl_ind = cl_ind;
253 list_data_ptr->in_ind = in_ind;
254 }
255 }
256 }
257 /* make sure that no one has been received out of order */
258 if (procinfo->error_check) {
259 MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
260 while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
261 list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
262 MY_NEXT_IN_TREE(tree_node_ptr);
263 if (list_data_ptr->cl_ind == -1) {
264 MY_SETERRCN(FACTOR_ERROR,"Received column out of order\n");
265 }
266 }
267 }
268
269 /* allocate space for each whole column */
270 MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
271 while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
272 list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
273 MY_NEXT_IN_TREE(tree_node_ptr);
274 list_data_ptr->buf_size =
275 (list_data_ptr->num_cols*list_data_ptr->length*sizeof(FLOAT)) +
276 (list_data_ptr->length*sizeof(int));
277 MY_MALLOCN(list_data_ptr->d_buffer,(FLOAT *),
278 list_data_ptr->buf_size,2);
279 list_data_ptr->i_buffer = (int *)
280 &(list_data_ptr->d_buffer[list_data_ptr->num_cols*
281 list_data_ptr->length]);
282 }
283
284 /* put my part of the column into the collected column */
285 MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
286 while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
287 list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
288 MY_NEXT_IN_TREE(tree_node_ptr);
289 in_ind = list_data_ptr->in_ind;
290 length = inodes[in_ind].length;
291 num_cols = inodes[in_ind].num_cols;
292 nzptr = inodes[in_ind].nz;
293 d_buffer = list_data_ptr->d_buffer;
294 for (j=0;j<num_cols;j++) {
295 for (k=0;k<length;k++) {
296 d_buffer[k] = nzptr[(j*length)+k];
297 }
298 d_buffer += list_data_ptr->length;
299 }
300 i_buffer = list_data_ptr->i_buffer;
301 intptr = inodes[in_ind].row_num;
302 /* while copying, convert i_buffer from new local to new global */
303 for (j=0;j<length;j++) {
304 i_buffer[j] = g_row_num[iperm[intptr[j]]];
305 }
306 list_data_ptr->cur_len += length;
307 }
308
309 /* collect all the parts of "my" columns */
310 while ((msg = BMrecv_block_msg(to_phase,procinfo)) != NULL) {
311 CHKERRN(0);
312 BMcheck_on_async_block(from_phase); CHKERRN(0);
313 msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERRN(0);
314 BMset_msg_ptr(msg,NULL); CHKERRN(0);
315 data_ptr = BMget_user(msg,&msg_len); CHKERRN(0);
316 gnum = data_ptr[0];
317 length = data_ptr[1];
318 num_cols = data_ptr[2];
319 MY_SEARCH_TREE(recv_tree,gnum,found,tree_node_ptr);
320 if (found) {
321 list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
322 d_buffer = &(list_data_ptr->d_buffer[list_data_ptr->cur_len]);
323 count = 0;
324 for (j=0;j<num_cols;j++) {
325 for (k=0;k<length;k++) {
326 d_buffer[k] = msg_buf[count];
327 count++;
328 }
329 d_buffer += list_data_ptr->length;
330 }
331 i_buffer = &(list_data_ptr->i_buffer[list_data_ptr->cur_len]);
332 intptr = (int *) &(msg_buf[num_cols*length]);
333 for (j=0;j<length;j++) {
334 i_buffer[j] = intptr[j];
335 }
336 list_data_ptr->cur_len += length;
337 } else {
338 MY_SETERRCN(FACTOR_ERROR,"Can't find matching column\n");
339 }
340 }
341 CHKERRN(0);
342 BMfree_block_msg(to_phase); CHKERRN(0);
343 BMfinish_on_async_block(from_phase); CHKERRN(0);
344
345 /* see if the factorizations went okay, if not, then return */
346 /* we have to wait until here to make sure all messages are received */
347 /* make anything that needs free'ing is free'ed */
348 /* everyone agrees on success/error */
349 GISUM(&fact_error,1,&info,procinfo->procset);
350 if (fact_error != 0) {
351 /* free up space and return with the negative phase number */
352 MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
353 while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
354 list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
355 MY_NEXT_IN_TREE(tree_node_ptr);
356 MY_FREE(list_data_ptr->d_buffer);
357 }
358 MY_FREE_TREE(recv_tree);
359 return(-(i+1));
360 }
361
362 /* factor these columns */
363 /* the following part is modified (ILU) */
364 MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
365 while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
366 list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
367 MY_NEXT_IN_TREE(tree_node_ptr);
368 if (list_data_ptr->length <= 0) continue;
369 cl_ind = list_data_ptr->cl_ind;
370 in_ind = list_data_ptr->in_ind;
371 j = inodes[in_ind].num_cols;
372 offset = inodes[in_ind].gcol_num - clique2inode->g_offset[cl_ind];
373 size = clique2inode->d_mats[cl_ind].size;
374 matrix = clique2inode->d_mats[cl_ind].matrix;
375 matrix += ((offset*size)+offset);
376 length = list_data_ptr->length;
377
378 MY_MALLOCN(temp,(FLOAT *),length*j*sizeof(FLOAT),1);
379 for (k=0; k<j*length; k++) temp[k] = list_data_ptr->d_buffer[k];
380
381 DGEMM(&TR,&TR,&length,&j,&j,&one,temp,&length,
382 matrix,&size,&zero,list_data_ptr->d_buffer,&length);
383
384 MY_FREE(temp);
385 MLOG_flop(2*((j*(j+1))/2)*list_data_ptr->length*j);
386 }
387
388 /* send these columns out */
389 /* the following part is modified (ILU) */
390 msg = NULL;
391 while ((msg = BMnext_msg(to_phase,msg)) != NULL) {
392 CHKERRN(0);
393 data_ptr = BMget_user(msg,&data_len); CHKERRN(0);
394 gnum = data_ptr[0];
395 MY_SEARCH_TREE(recv_tree,gnum,found,tree_node_ptr);
396 if (found) {
397 /* insert pivot row to the message (ILU) */
398 list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
399 length = list_data_ptr->length;
400 num_cols = list_data_ptr->num_cols;
401
402 dd_size = length*(2*sizeof(FLOAT)*num_cols+sizeof(int));
403 MY_MALLOCN(dd_buffer,(FLOAT *),dd_size,1);
404 for (j=0; j<num_cols*length; j++)
405 dd_buffer[j] = list_data_ptr->d_buffer[j];
406 k = j;
407
408 for (l=0; l<num_cols; l++) {
409 MY_SEARCH_TREE(rowtree,gnum+l,found,node_ptr);
410 if (found) {
411 col_list_ptr = (col_list *) MY_GET_TREE_DATA(node_ptr);
412 for (j=0; j<col_list_ptr->count; j++)
413 dd_buffer[k+j] = col_list_ptr->nz[j][0];
414 k += col_list_ptr->count;
415 }
416 else {
417 printf("Error: Something wrong with rowtree.\n");
418 }
419 }
420 ii_buffer = (int *) &(dd_buffer[k]);
421 for (j=0; j<length; j++)
422 ii_buffer[j] = list_data_ptr->i_buffer[j];
423
424 BMset_msg_ptr(msg,dd_buffer); CHKERRN(0);
425 BMset_msg_size(msg,dd_size); CHKERRN(0);
426 BMsend_block_msg(to_phase,msg,procinfo); CHKERRN(0);
427 BMset_msg_ptr(msg,NULL); CHKERRN(0);
428
429 MY_FREE(dd_buffer);
430 } else {
431 MY_SETERRCN(FACTOR_ERROR,"Can't find matching column\n");
432 }
433 }
434 CHKERRN(0);
435 BMsend_block_msg(to_phase,NULL,procinfo); CHKERRN(0);
436
437 /* retrieve my part of the column and put it in the data structure */
438 /* the following part is modified (ILU) */
439 MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
440 while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
441 list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
442 MY_NEXT_IN_TREE(tree_node_ptr);
443 if (list_data_ptr->length <= 0) continue;
444 in_ind = list_data_ptr->in_ind;
445 cl_ind = list_data_ptr->cl_ind;
446 length = inodes[in_ind].length;
447 num_cols = inodes[in_ind].num_cols;
448 nzptr = inodes[in_ind].nz;
449 d_buffer = list_data_ptr->d_buffer;
450 i_buffer = list_data_ptr->i_buffer;
451 for (j=0;j<num_cols;j++) {
452 for (k=0;k<length;k++) {
453 if (i_buffer[k] > clique2inode->g_offset[cl_ind])
454 nzptr[(j*length)+k] = d_buffer[k];
455 }
456 d_buffer += list_data_ptr->length;
457 }
458 }
459
460 /* now update the remaining matrix using the local columns */
461 /* the following part is modified (ILU) */
462 MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
463 while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
464 list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
465 MY_NEXT_IN_TREE(tree_node_ptr);
466 if (list_data_ptr->length <= 0) continue;
467 in_ind = list_data_ptr->in_ind;
468 gnum = inodes[in_ind].gcol_num;
469
470 MY_MALLOCN(dd_buffer,(FLOAT *),list_data_ptr->length*
471 list_data_ptr->num_cols*sizeof(FLOAT),1);
472 k = 0;
473 for (l=0; l<list_data_ptr->num_cols; l++) {
474 MY_SEARCH_TREE(rowtree,gnum+l,found,node_ptr);
475 if (found) {
476 col_list_ptr = (col_list *) MY_GET_TREE_DATA(node_ptr);
477 for (j=0; j<col_list_ptr->count; j++)
478 dd_buffer[k+j] = col_list_ptr->nz[j][0];
479 k += col_list_ptr->count;
480 }
481 else {
482 printf("Error: Something wrong with rowtree.\n");
483 }
484 }
485
486 BSilu_updt_matrix(A,gnum,rowtree,
487 inode_tree,list_data_ptr->length,
488 list_data_ptr->num_cols,
489 list_data_ptr->i_buffer,list_data_ptr->d_buffer,
490 col_list_ptr->count,col_list_ptr->col,dd_buffer,
491 color2clique->numbers[i+1],
492 color2clique->numbers[color2clique->length-1],clique2inode,
493 inodes,procinfo); CHKERRN(0);
494
495 MY_FREE(dd_buffer);
496 }
497
498 /* receive the incoming columns and update the remaining matrix */
499 /* strip out the "local" part and put it in the matrix */
500 /* the following part is modified (ILU) */
501 while ((msg = BMrecv_block_msg(from_phase,procinfo)) != NULL) {
502 CHKERRN(0);
503 BMcheck_on_async_block(to_phase); CHKERRN(0);
504 msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERRN(0);
505 BMset_msg_ptr(msg,NULL); CHKERRN(0);
506 data_ptr = BMget_user(msg,&msg_len); CHKERRN(0);
507 cl_ind = data_ptr[0];
508 in_ind = data_ptr[1];
509 /* let gnum be first global row location of my part */
510 gnum = g_row_num[iperm[inodes[in_ind].row_num[0]]];
511 num_cols = inodes[in_ind].num_cols;
512 length = inodes[in_ind].length;
513 msg_len = BMget_msg_size(msg); CHKERRN(0);
514 msg_len /= 2*sizeof(FLOAT)*num_cols+sizeof(int);
515
516 /* find my local part */
517 i_buffer = (int *) &(msg_buf[2*num_cols*msg_len]);
518 for (ind=0;ind<msg_len;ind++) {
519 if (gnum == i_buffer[ind]) break;
520 }
521 nzptr = inodes[in_ind].nz;
522 d_buffer = &(msg_buf[ind]);
523 for (j=0;j<num_cols;j++) {
524 for (k=0;k<length;k++) {
525 if (i_buffer[ind+k] > clique2inode->g_offset[cl_ind])
526 nzptr[(j*length)+k] = d_buffer[k];
527 }
528 d_buffer += msg_len;
529 }
530
531 /* update the remaining matrix */
532 BSilu_updt_matrix(A,inodes[in_ind].gcol_num,rowtree,
533 inode_tree,msg_len,num_cols,i_buffer,msg_buf,msg_len,
534 i_buffer,&(msg_buf[msg_len*num_cols]),color2clique->numbers[i+1],
535 color2clique->numbers[color2clique->length-1],clique2inode,
536 inodes,procinfo); CHKERRN(0);
537 }
538 CHKERRN(0);
539 BMfree_block_msg(from_phase); CHKERRN(0);
540 BMfinish_on_async_block(to_phase); CHKERRN(0);
541
542 /* now free up the list */
543 MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
544 while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
545 list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
546 MY_NEXT_IN_TREE(tree_node_ptr);
547 MY_FREE(list_data_ptr->d_buffer);
548 }
549 MY_FREE_TREE(recv_tree);
550 }
551
552 /* now free the rowtree and other stuff (ILU) */
553 MY_FIRST_IN_TREE(rowtree,node_ptr);
554 while (! IS_TREE_PTR_NULL(node_ptr)) {
555 col_list_ptr = (col_list *) MY_GET_TREE_DATA(node_ptr);
556 MY_NEXT_IN_TREE(node_ptr);
557 MY_FREE(col_list_ptr->col);
558 MY_FREE(col_list_ptr->nz);
559 }
560 MY_FREE_TREE(rowtree);
561 MY_FREE_TREE(inode_tree);
562 MY_FREE(work);
563
564 BMfinish_comp_msg(comm->from_msg,procinfo); CHKERRN(0);
565 BMfinish_comp_msg(comm->to_msg,procinfo); CHKERRN(0);
566 return(0);
567 }
568
569 /*+ BSilu_updt_matrix - Update the matrix using a bunch of i-nodes
570
571 Input Parameters:
572 I don't really know what these things do now.
573
574 Output Parameters:
575 I don't really know what these things do now.
576
577 Returns:
578 void
579
580 Notes:
581 This code is very complicated and could be done much better.
582
583 +*/
BSilu_updt_matrix(BSpar_mat * A,int colnum,BStree rowtree,BStree inode_tree,int len,int num_cols,int * updt_index,FLOAT * updt_inode,int len2,int * my_inode_ind,FLOAT * my_inode,int start,int end,BScl_2_inode * clique2inode,BSinode * inodes,BSprocinfo * procinfo)584 void BSilu_updt_matrix(BSpar_mat *A, int colnum, BStree rowtree,
585 BStree inode_tree, int len, int num_cols, int *updt_index,
586 FLOAT *updt_inode, int len2, int *my_inode_ind, FLOAT *my_inode,
587 int start, int end, BScl_2_inode *clique2inode, BSinode *inodes,
588 BSprocinfo *procinfo)
589 {
590 int i, j;
591 int updt_count, offset, toffset;
592 int cl_ind, in_ind;
593 BSpermutation *iperm;
594 FLOAT *work, *matrix;
595 int *intptr;
596 FLOAT *nzptr;
597 int tcount, count2, gnum, size;
598 int found;
599 BStree_ptr tree_node_ptr;
600 int *tree_data_ptr;
601
602 int k, l, *rowptr, *row_gnum, *g_iperm;
603 BStree_ptr node_ptr;
604 col_list *col_list_ptr;
605 FLOAT one = 1.0, none = -1.0;
606 char TR = 'T', NTR = 'N';
607
608 /* sort the updt_inode */
609 iperm = BSalloc_permutation(len); CHKERR(0);
610 for (i=0;i<len;i++) iperm->perm[i] = i;
611 BSheap_sort1(len,updt_index,iperm->perm); CHKERR(0);
612 MY_MALLOC(work,(FLOAT *),sizeof(FLOAT)*len,1);
613 for (i=0;i<num_cols;i++) {
614 BSiperm_dvec(&(updt_inode[i*len]),work,iperm); CHKERR(0);
615 for (j=0;j<len;j++) updt_inode[(i*len)+j] = work[j];
616 }
617 MY_FREE(work);
618 BSfree_permutation(iperm); CHKERR(0);
619
620 row_gnum = A->global_row_num->numbers;
621 g_iperm = A->inv_perm->perm;
622
623 updt_count = 0;
624 while (updt_count < len) {
625 MY_SEARCH_TREE(inode_tree,updt_index[updt_count],found,tree_node_ptr);
626 if (found) {
627 tree_data_ptr = (int *) MY_GET_TREE_DATA(tree_node_ptr);
628 in_ind = tree_data_ptr[0];
629 cl_ind = tree_data_ptr[1];
630
631 /* make sure the nz are below pivot (ILU) */
632 if (clique2inode->g_offset[cl_ind] > colnum) {
633 /* update the inode */
634 tcount = 0;
635 count2 = 0;
636 MY_SEARCH_TREE(rowtree,updt_index[updt_count],found,node_ptr);
637 if (found) {
638 col_list_ptr = (col_list *) MY_GET_TREE_DATA(node_ptr);
639 intptr = col_list_ptr->col;
640 while ((tcount<len2) && (count2<col_list_ptr->count)) {
641 if ((my_inode_ind[tcount] == intptr[count2]) &
642 (intptr[count2] > colnum)) {
643 MY_SEARCH_TREE(inode_tree,intptr[count2],found,tree_node_ptr);
644 tree_data_ptr = (int *) MY_GET_TREE_DATA(tree_node_ptr);
645 i = tree_data_ptr[0];
646 nzptr = inodes[i].nz;
647 rowptr = inodes[i].row_num;
648 /* search the row that we are going to modify (ILU) */
649 for (j=0; j<inodes[i].length; j++)
650 if (row_gnum[g_iperm[rowptr[j]]] == updt_index[updt_count])
651 break;
652
653 /* usual Gaussian elimination step (ILU) */
654 if (j<inodes[i].length)
655 DGEMM(&NTR,&TR,&(inodes[in_ind].num_cols),&(inodes[i].num_cols),
656 &num_cols,&none,&(updt_inode[updt_count]),&len,
657 &(my_inode[tcount]),&len2,&one,&(nzptr[j]),
658 &(inodes[i].length));
659
660 MLOG_flop(2*num_cols*inodes[in_ind].num_cols);
661 tcount += inodes[i].num_cols;
662 count2 += inodes[i].num_cols;
663 } else if (my_inode_ind[tcount] < intptr[count2]) {
664 tcount++;
665 } else {
666 count2++;
667 }
668 }
669 /* update a portion of the clique if it is local */
670 gnum = clique2inode->g_offset[cl_ind];
671 offset = updt_index[updt_count]-gnum;
672 matrix = clique2inode->d_mats[cl_ind].matrix;
673 size = clique2inode->d_mats[cl_ind].size;
674 for (j=0; j<len2; j++)
675 if (my_inode_ind[j] == updt_index[updt_count]) break;
676 if (j==len2) {
677 printf("Error: The matrix is not structurally symmetric!\n");
678 }
679 for (k=0; k<inodes[in_ind].num_cols; k++) {
680 tcount = updt_count;
681 toffset = updt_index[updt_count+k]-gnum;
682 if (toffset < size) {
683 while ((tcount < len) & (updt_index[tcount]-gnum < size)) {
684 for (l=0; l<num_cols; l++) {
685 matrix[toffset*size+offset+tcount-updt_count] -=
686 my_inode[l*len2+j+k]*updt_inode[l*len+tcount];
687 }
688 MLOG_flop(2*num_cols);
689 tcount++;
690 }
691 }
692 }
693 /* end of update clique */
694 }
695 }
696 updt_count += inodes[in_ind].num_cols;
697 } else {
698 updt_count++;
699 }
700 }
701 }
702