1 #include "BSprivate.h"
2 
3 /* a data structure used only in factoring */
4 /* nodes in a list of i-nodes that need to be received/sent during */
5 /* factorization */
6 typedef struct __list_type {
7   int	length;		/* length of the columns in the i-node */
8   int	num_cols;	/* # of columns in the i-node */
9   int	cur_len;	/* don't know anymore */
10   FLOAT	*d_buffer;	/* don't know anymore */
11   int	*i_buffer;	/* don't know anymore */
12   int	buf_size;	/* don't know anymore */
13   int	cl_ind; 	/* index of the clique, if local */
14   int	in_ind; 	/* index of the i-node if local */
15 } list_type;
16 
17 #include "BStree.h"
18 
19 extern void BSilu_updt_matrix(BSpar_mat *, int, BStree, BStree, int, int, int *,
20      FLOAT *, int , int *, FLOAT *, int , int , BScl_2_inode *, BSinode *,
21      BSprocinfo *);
22 
23 /*+ BSilu_factorn - Compute the incomplete LU factor of a matrix
24 
25     Input Parameters:
26 .   A - The sparse matrix to be factored
27 .   comm - the communication structure of the factoring
28 .   procinfo - the usual processor info
29 
30     Output Parameters:
31 .   A - The factored sparse matrix
32 
33     Returns:
34     0 if successful, otherwise a negative number whose absolute
35     value is the row number of the color (less one) where the
36     failure occured.
37 
38  +*/
39 
40 typedef struct __col_list {
41   int   count;
42   int   *col;
43   FLOAT **nz;
44 } col_list;
45 
BSilu_factorn(BSpar_mat * A,BScomm * comm,BSprocinfo * procinfo)46 int BSilu_factorn(BSpar_mat *A, BScomm *comm, BSprocinfo *procinfo)
47 {
48   BMphase *to_phase, *from_phase;
49   BMmsg *msg;
50   int  i, j, k;
51   int  cl_ind, in_ind;
52   int  count, size, ind;
53   int  length, num_cols, gnum, offset;
54   int  found;
55   int  *intptr, *intptr2, *i_buffer;
56   FLOAT *d_buffer;
57   BScl_2_inode *clique2inode;
58   BSnumbering *color2clique;
59   BSinode *inodes;
60   int  *data_ptr, data_len, msg_len;
61   FLOAT *msg_buf, *matrix;
62   FLOAT *nzptr;
63   char TR = 'N';
64   FLOAT one = 1.0;
65   int  info, fact_error;
66   list_type *list_data_ptr;
67   BStree_ptr  tree_node_ptr;
68   BStree  recv_tree;
69   BStree  inode_tree;
70   int  dummy = 0;
71   int  *g_row_num, *iperm;
72 
73   int        l, max_size, dd_size, *ipiv, *ii_buffer;
74   FLOAT      zero=0.0, *work, *dd_buffer, *temp;
75   BStree     rowtree;
76   BStree_ptr node_ptr;
77   col_list   *col_list_ptr;
78 
79 
80   BMinit_comp_msg(comm->from_msg,procinfo); CHKERRN(0);
81   BMinit_comp_msg(comm->to_msg,procinfo); CHKERRN(0);
82   color2clique = A->color2clique;
83   clique2inode = A->clique2inode;
84   inodes = A->inodes->list;
85   g_row_num = A->global_row_num->numbers;
86   iperm = A->inv_perm->perm;
87 
88   /* max_size is added in the following part (ILU) */
89   max_size = -1;
90   MY_INIT_TREE(inode_tree,(sizeof(int)*2));
91   for (i=0;i<color2clique->length-1;i++) {
92     for (cl_ind=color2clique->numbers[i];
93       cl_ind<color2clique->numbers[i+1];cl_ind++) {
94       if (clique2inode->d_mats[cl_ind].size > max_size)
95 	max_size = clique2inode->d_mats[cl_ind].size;
96       for (in_ind=clique2inode->inode_index[cl_ind];
97         in_ind<clique2inode->inode_index[cl_ind+1];in_ind++) {
98         MY_INSERT_TREE_NODE(inode_tree,(inodes[in_ind].gcol_num),
99           found,tree_node_ptr,dummy);
100         data_ptr = (int *) MY_GET_TREE_DATA(tree_node_ptr);
101         data_ptr[0] = in_ind;
102         data_ptr[1] = cl_ind;
103       }
104     }
105   }
106   MY_MALLOCN(work,(FLOAT *),max_size*sizeof(FLOAT),1);
107 
108   /* Create rowtree just like factor1 case. But take care of */
109   /* the inode thing (ILU)                                   */
110   MY_INIT_TREE(rowtree,sizeof(col_list));
111   for (j=0; j<A->inodes->length; j++) {
112     for (k=0; k<inodes[j].num_cols; k++) {
113       for (i=0; i<inodes[j].length; i++) {
114         MY_INSERT_TREE_NODE(rowtree,g_row_num[iperm[inodes[j].row_num[i]]],
115 			    found,node_ptr,dummy);
116         col_list_ptr = (col_list *) MY_GET_TREE_DATA(node_ptr);
117         if (found) {
118 	  col_list_ptr->count++;
119         }
120         else {
121 	  col_list_ptr->count = 1;
122 	  /* Recall that A->max_local_row_length is used to store max_row_len */
123 	  MY_MALLOCN(col_list_ptr->col,(int *),sizeof(int)*
124 			A->max_local_row_length,1);
125 	  MY_MALLOCN(col_list_ptr->nz,(FLOAT **),sizeof(FLOAT *)*
126 		    A->max_local_row_length,1);
127         }
128         col_list_ptr->col[col_list_ptr->count-1] = inodes[j].gcol_num+k;
129         col_list_ptr->nz[col_list_ptr->count-1] =
130 	  &(inodes[j].nz[k*inodes[j].length+i]);
131       }
132     }
133   }
134 
135 /*
136 MY_FIRST_IN_TREE(rowtree,node_ptr);
137 while (! IS_TREE_PTR_NULL(node_ptr)) {
138   col_list_ptr = (col_list *) MY_GET_TREE_DATA(node_ptr);
139   MY_NEXT_IN_TREE(node_ptr);
140   printf("[%d]:rowtree list: ",procinfo->my_id);
141   for (i=0; i<col_list_ptr->count; i++)
142     printf(" %2d",col_list_ptr->col[i]);
143   printf("\n");
144 }
145 */
146 
147   fact_error = FALSE;
148   /* now do this phase by phase */
149   for (i=0;i<color2clique->length-1;i++) {
150     to_phase = BMget_phase(comm->to_msg,i); CHKERRN(0);
151     from_phase = BMget_phase(comm->from_msg,i); CHKERRN(0);
152 
153     /* send my part of each column in this color */
154     /* use the "from" part here */
155     msg = NULL;
156     while ((msg = BMnext_msg(from_phase,msg)) != NULL) {
157       CHKERRN(0);
158       /* allocate space for the message */
159       data_ptr = BMget_user(msg,&data_len); CHKERRN(0);
160       in_ind = data_ptr[1];
161       size = inodes[in_ind].length*inodes[in_ind].num_cols*sizeof(FLOAT)
162         + inodes[in_ind].length*sizeof(int);
163       MY_MALLOCN(msg_buf,(FLOAT *),size,1);
164       BMset_msg_ptr(msg,msg_buf); CHKERRN(0);
165       BMset_msg_size(msg,size); CHKERRN(0);
166       count = 0;
167       for (j=0;j<inodes[in_ind].num_cols;j++) {
168         nzptr = &(inodes[in_ind].nz[j*inodes[in_ind].length]);
169         for (k=0;k<inodes[in_ind].length;k++) {
170           msg_buf[count] = nzptr[k];
171           count++;
172         }
173       }
174       intptr = (int *) &(msg_buf[count]);
175       intptr2 = (int *) inodes[in_ind].row_num;
176       for (j=0;j<inodes[in_ind].length;j++) {
177         intptr[j] = g_row_num[iperm[intptr2[j]]];
178       }
179       BMsend_block_msg(from_phase,msg,procinfo); CHKERRN(0);
180       MY_FREE(msg_buf);
181       BMset_msg_ptr(msg,NULL); CHKERRN(0);
182     }
183     CHKERRN(0);
184     BMsend_block_msg(from_phase,NULL,procinfo); CHKERRN(0);
185 
186     /* factor the cliques in this color */
187     /* the following is modified (ILU)  */
188     for (cl_ind=color2clique->numbers[i];
189       cl_ind<color2clique->numbers[i+1];cl_ind++) {
190       if (procinfo->my_id == clique2inode->proc[cl_ind]) {
191         /* factor this clique */
192         /* stored in the upper diagonal */
193         size = clique2inode->d_mats[cl_ind].size;
194         matrix = clique2inode->d_mats[cl_ind].matrix;
195 	ipiv = (int *) &(matrix[size*size]);
196 	DGETRF(&size,&size,matrix,&size,ipiv,&info);
197         MLOG_flop((size*size*size)/6);
198         if (info != 0) fact_error = TRUE;
199         /* now invert the clique */
200         DGETRI(&size,matrix,&size,ipiv,work,&size,&info);
201         MLOG_flop((size*size*size)/6);
202         if (info != 0) fact_error = TRUE;
203       }
204     }
205 
206     /* figure out what I am going to receive and make room for it */
207     /* use the "to" part here */
208     MY_INIT_TREE(recv_tree,sizeof(list_type));
209     msg = NULL;
210     while ((msg = BMnext_msg(to_phase,msg)) != NULL) {
211       CHKERRN(0);
212       /* search list for this number */
213       data_ptr = BMget_user(msg,&data_len); CHKERRN(0);
214       gnum = data_ptr[0];
215       length = data_ptr[1];
216       num_cols = data_ptr[2];
217       MY_INSERT_TREE_NODE(recv_tree,gnum,found,tree_node_ptr,dummy);
218       list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
219       if (found) {
220         list_data_ptr->length += length;
221       } else {
222         list_data_ptr->length = length;
223         list_data_ptr->num_cols = num_cols;
224         list_data_ptr->cur_len = 0;
225         list_data_ptr->d_buffer = NULL;
226         list_data_ptr->i_buffer = NULL;
227         list_data_ptr->cl_ind = -1;
228         list_data_ptr->in_ind = -1;
229       }
230     }
231     CHKERRN(0);
232     /* add any of my parts to the list, if necessary */
233     for (cl_ind=color2clique->numbers[i];
234       cl_ind<color2clique->numbers[i+1];cl_ind++) {
235       /* only work on columns that I own */
236       if (clique2inode->proc[cl_ind] != procinfo->my_id) continue;
237       for (in_ind=clique2inode->inode_index[cl_ind];
238         in_ind<clique2inode->inode_index[cl_ind+1];in_ind++) {
239         MY_INSERT_TREE_NODE(recv_tree,(inodes[in_ind].gcol_num),
240           found,tree_node_ptr,dummy);
241         list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
242         if (found) {
243           list_data_ptr->cl_ind = cl_ind;
244           list_data_ptr->in_ind = in_ind;
245           list_data_ptr->length += inodes[in_ind].length;
246         } else {
247           list_data_ptr->length = inodes[in_ind].length;
248           list_data_ptr->num_cols = inodes[in_ind].num_cols;
249           list_data_ptr->cur_len = 0;
250           list_data_ptr->d_buffer = NULL;
251           list_data_ptr->i_buffer = NULL;
252           list_data_ptr->cl_ind = cl_ind;
253           list_data_ptr->in_ind = in_ind;
254         }
255       }
256     }
257     /* make sure that no one has been received out of order */
258     if (procinfo->error_check) {
259       MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
260       while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
261         list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
262         MY_NEXT_IN_TREE(tree_node_ptr);
263         if (list_data_ptr->cl_ind == -1) {
264           MY_SETERRCN(FACTOR_ERROR,"Received column out of order\n");
265         }
266       }
267     }
268 
269     /* allocate space for each whole column */
270     MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
271     while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
272       list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
273       MY_NEXT_IN_TREE(tree_node_ptr);
274       list_data_ptr->buf_size =
275         (list_data_ptr->num_cols*list_data_ptr->length*sizeof(FLOAT)) +
276         (list_data_ptr->length*sizeof(int));
277       MY_MALLOCN(list_data_ptr->d_buffer,(FLOAT *),
278         list_data_ptr->buf_size,2);
279       list_data_ptr->i_buffer = (int *)
280         &(list_data_ptr->d_buffer[list_data_ptr->num_cols*
281         list_data_ptr->length]);
282     }
283 
284     /* put my part of the column into the collected column */
285     MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
286     while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
287       list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
288       MY_NEXT_IN_TREE(tree_node_ptr);
289       in_ind = list_data_ptr->in_ind;
290       length = inodes[in_ind].length;
291       num_cols = inodes[in_ind].num_cols;
292       nzptr = inodes[in_ind].nz;
293       d_buffer = list_data_ptr->d_buffer;
294       for (j=0;j<num_cols;j++) {
295         for (k=0;k<length;k++) {
296           d_buffer[k] = nzptr[(j*length)+k];
297         }
298         d_buffer += list_data_ptr->length;
299       }
300       i_buffer = list_data_ptr->i_buffer;
301       intptr = inodes[in_ind].row_num;
302       /* while copying, convert i_buffer from new local to new global */
303       for (j=0;j<length;j++) {
304         i_buffer[j] = g_row_num[iperm[intptr[j]]];
305       }
306       list_data_ptr->cur_len += length;
307     }
308 
309     /* collect all the parts of "my" columns */
310     while ((msg = BMrecv_block_msg(to_phase,procinfo)) != NULL) {
311       CHKERRN(0);
312       BMcheck_on_async_block(from_phase); CHKERRN(0);
313       msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERRN(0);
314       BMset_msg_ptr(msg,NULL); CHKERRN(0);
315       data_ptr = BMget_user(msg,&msg_len); CHKERRN(0);
316       gnum = data_ptr[0];
317       length = data_ptr[1];
318       num_cols = data_ptr[2];
319       MY_SEARCH_TREE(recv_tree,gnum,found,tree_node_ptr);
320       if (found) {
321         list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
322         d_buffer = &(list_data_ptr->d_buffer[list_data_ptr->cur_len]);
323         count = 0;
324         for (j=0;j<num_cols;j++) {
325           for (k=0;k<length;k++) {
326             d_buffer[k] = msg_buf[count];
327             count++;
328           }
329           d_buffer += list_data_ptr->length;
330         }
331         i_buffer = &(list_data_ptr->i_buffer[list_data_ptr->cur_len]);
332         intptr = (int *) &(msg_buf[num_cols*length]);
333         for (j=0;j<length;j++) {
334           i_buffer[j] = intptr[j];
335         }
336         list_data_ptr->cur_len += length;
337       } else {
338         MY_SETERRCN(FACTOR_ERROR,"Can't find matching column\n");
339       }
340     }
341     CHKERRN(0);
342     BMfree_block_msg(to_phase); CHKERRN(0);
343     BMfinish_on_async_block(from_phase); CHKERRN(0);
344 
345     /* see if the factorizations went okay, if not, then return */
346     /* we have to wait until here to make sure all messages are received */
347     /* make anything that needs free'ing is free'ed */
348     /* everyone agrees on success/error */
349     GISUM(&fact_error,1,&info,procinfo->procset);
350     if (fact_error != 0) {
351       /* free up space and return with the negative phase number */
352       MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
353       while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
354         list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
355         MY_NEXT_IN_TREE(tree_node_ptr);
356         MY_FREE(list_data_ptr->d_buffer);
357       }
358       MY_FREE_TREE(recv_tree);
359       return(-(i+1));
360     }
361 
362     /* factor these columns */
363     /* the following part is modified (ILU) */
364     MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
365     while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
366       list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
367       MY_NEXT_IN_TREE(tree_node_ptr);
368       if (list_data_ptr->length <= 0) continue;
369       cl_ind = list_data_ptr->cl_ind;
370       in_ind = list_data_ptr->in_ind;
371       j = inodes[in_ind].num_cols;
372       offset = inodes[in_ind].gcol_num - clique2inode->g_offset[cl_ind];
373       size = clique2inode->d_mats[cl_ind].size;
374       matrix = clique2inode->d_mats[cl_ind].matrix;
375       matrix += ((offset*size)+offset);
376       length = list_data_ptr->length;
377 
378       MY_MALLOCN(temp,(FLOAT *),length*j*sizeof(FLOAT),1);
379       for (k=0; k<j*length; k++) temp[k] = list_data_ptr->d_buffer[k];
380 
381       DGEMM(&TR,&TR,&length,&j,&j,&one,temp,&length,
382 	     matrix,&size,&zero,list_data_ptr->d_buffer,&length);
383 
384       MY_FREE(temp);
385       MLOG_flop(2*((j*(j+1))/2)*list_data_ptr->length*j);
386     }
387 
388     /* send these columns out */
389     /* the following part is modified (ILU) */
390     msg = NULL;
391     while ((msg = BMnext_msg(to_phase,msg)) != NULL) {
392       CHKERRN(0);
393       data_ptr = BMget_user(msg,&data_len); CHKERRN(0);
394       gnum = data_ptr[0];
395       MY_SEARCH_TREE(recv_tree,gnum,found,tree_node_ptr);
396       if (found) {
397 	/* insert pivot row to the message (ILU) */
398         list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
399 	length = list_data_ptr->length;
400         num_cols = list_data_ptr->num_cols;
401 
402 	dd_size = length*(2*sizeof(FLOAT)*num_cols+sizeof(int));
403 	MY_MALLOCN(dd_buffer,(FLOAT *),dd_size,1);
404 	for (j=0; j<num_cols*length; j++)
405 	  dd_buffer[j] = list_data_ptr->d_buffer[j];
406         k = j;
407 
408 	for (l=0; l<num_cols; l++) {
409 	  MY_SEARCH_TREE(rowtree,gnum+l,found,node_ptr);
410 	  if (found) {
411 	    col_list_ptr = (col_list *) MY_GET_TREE_DATA(node_ptr);
412 	    for (j=0; j<col_list_ptr->count; j++)
413 	      dd_buffer[k+j] = col_list_ptr->nz[j][0];
414             k += col_list_ptr->count;
415           }
416 	  else {
417 	    printf("Error: Something wrong with rowtree.\n");
418           }
419         }
420         ii_buffer = (int *) &(dd_buffer[k]);
421 	for (j=0; j<length; j++)
422 	  ii_buffer[j] = list_data_ptr->i_buffer[j];
423 
424         BMset_msg_ptr(msg,dd_buffer); CHKERRN(0);
425         BMset_msg_size(msg,dd_size); CHKERRN(0);
426         BMsend_block_msg(to_phase,msg,procinfo); CHKERRN(0);
427         BMset_msg_ptr(msg,NULL); CHKERRN(0);
428 
429         MY_FREE(dd_buffer);
430       } else {
431         MY_SETERRCN(FACTOR_ERROR,"Can't find matching column\n");
432       }
433     }
434     CHKERRN(0);
435     BMsend_block_msg(to_phase,NULL,procinfo); CHKERRN(0);
436 
437     /* retrieve my part of the column and put it in the data structure */
438     /* the following part is modified (ILU) */
439     MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
440     while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
441       list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
442       MY_NEXT_IN_TREE(tree_node_ptr);
443       if (list_data_ptr->length <= 0) continue;
444       in_ind = list_data_ptr->in_ind;
445       cl_ind = list_data_ptr->cl_ind;
446       length = inodes[in_ind].length;
447       num_cols = inodes[in_ind].num_cols;
448       nzptr = inodes[in_ind].nz;
449       d_buffer = list_data_ptr->d_buffer;
450       i_buffer = list_data_ptr->i_buffer;
451       for (j=0;j<num_cols;j++) {
452         for (k=0;k<length;k++) {
453 	  if (i_buffer[k] > clique2inode->g_offset[cl_ind])
454             nzptr[(j*length)+k] = d_buffer[k];
455         }
456         d_buffer += list_data_ptr->length;
457       }
458     }
459 
460     /* now update the remaining matrix using the local columns */
461     /* the following part is modified (ILU) */
462     MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
463     while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
464       list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
465       MY_NEXT_IN_TREE(tree_node_ptr);
466       if (list_data_ptr->length <= 0) continue;
467       in_ind = list_data_ptr->in_ind;
468       gnum = inodes[in_ind].gcol_num;
469 
470       MY_MALLOCN(dd_buffer,(FLOAT *),list_data_ptr->length*
471 		list_data_ptr->num_cols*sizeof(FLOAT),1);
472       k = 0;
473       for (l=0; l<list_data_ptr->num_cols; l++) {
474         MY_SEARCH_TREE(rowtree,gnum+l,found,node_ptr);
475         if (found) {
476          col_list_ptr = (col_list *) MY_GET_TREE_DATA(node_ptr);
477          for (j=0; j<col_list_ptr->count; j++)
478            dd_buffer[k+j] = col_list_ptr->nz[j][0];
479          k += col_list_ptr->count;
480         }
481 	else {
482 	  printf("Error: Something wrong with rowtree.\n");
483         }
484       }
485 
486       BSilu_updt_matrix(A,gnum,rowtree,
487 	inode_tree,list_data_ptr->length,
488         list_data_ptr->num_cols,
489         list_data_ptr->i_buffer,list_data_ptr->d_buffer,
490         col_list_ptr->count,col_list_ptr->col,dd_buffer,
491         color2clique->numbers[i+1],
492         color2clique->numbers[color2clique->length-1],clique2inode,
493         inodes,procinfo); CHKERRN(0);
494 
495       MY_FREE(dd_buffer);
496     }
497 
498     /* receive the incoming columns and update the remaining matrix */
499     /* strip out the "local" part and put it in the matrix */
500     /* the following part is modified (ILU) */
501     while ((msg = BMrecv_block_msg(from_phase,procinfo)) != NULL) {
502       CHKERRN(0);
503       BMcheck_on_async_block(to_phase); CHKERRN(0);
504       msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERRN(0);
505       BMset_msg_ptr(msg,NULL); CHKERRN(0);
506       data_ptr = BMget_user(msg,&msg_len); CHKERRN(0);
507       cl_ind = data_ptr[0];
508       in_ind = data_ptr[1];
509       /* let gnum be first global row location of my part */
510       gnum = g_row_num[iperm[inodes[in_ind].row_num[0]]];
511       num_cols = inodes[in_ind].num_cols;
512       length = inodes[in_ind].length;
513       msg_len = BMget_msg_size(msg); CHKERRN(0);
514       msg_len /= 2*sizeof(FLOAT)*num_cols+sizeof(int);
515 
516       /* find my local part */
517       i_buffer = (int *) &(msg_buf[2*num_cols*msg_len]);
518       for (ind=0;ind<msg_len;ind++) {
519         if (gnum == i_buffer[ind]) break;
520       }
521       nzptr = inodes[in_ind].nz;
522       d_buffer = &(msg_buf[ind]);
523       for (j=0;j<num_cols;j++) {
524         for (k=0;k<length;k++) {
525 	  if (i_buffer[ind+k] > clique2inode->g_offset[cl_ind])
526             nzptr[(j*length)+k] = d_buffer[k];
527         }
528         d_buffer += msg_len;
529       }
530 
531       /* update the remaining matrix */
532       BSilu_updt_matrix(A,inodes[in_ind].gcol_num,rowtree,
533 	inode_tree,msg_len,num_cols,i_buffer,msg_buf,msg_len,
534         i_buffer,&(msg_buf[msg_len*num_cols]),color2clique->numbers[i+1],
535         color2clique->numbers[color2clique->length-1],clique2inode,
536         inodes,procinfo); CHKERRN(0);
537     }
538     CHKERRN(0);
539     BMfree_block_msg(from_phase); CHKERRN(0);
540     BMfinish_on_async_block(to_phase); CHKERRN(0);
541 
542     /* now free up the list */
543     MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
544     while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
545       list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
546       MY_NEXT_IN_TREE(tree_node_ptr);
547       MY_FREE(list_data_ptr->d_buffer);
548     }
549     MY_FREE_TREE(recv_tree);
550   }
551 
552   /* now free the rowtree and other stuff (ILU) */
553   MY_FIRST_IN_TREE(rowtree,node_ptr);
554   while (! IS_TREE_PTR_NULL(node_ptr)) {
555     col_list_ptr = (col_list *) MY_GET_TREE_DATA(node_ptr);
556     MY_NEXT_IN_TREE(node_ptr);
557     MY_FREE(col_list_ptr->col);
558     MY_FREE(col_list_ptr->nz);
559   }
560   MY_FREE_TREE(rowtree);
561   MY_FREE_TREE(inode_tree);
562   MY_FREE(work);
563 
564   BMfinish_comp_msg(comm->from_msg,procinfo); CHKERRN(0);
565   BMfinish_comp_msg(comm->to_msg,procinfo); CHKERRN(0);
566   return(0);
567 }
568 
569 /*+ BSilu_updt_matrix - Update the matrix using a bunch of i-nodes
570 
571     Input Parameters:
572     I don't really know what these things do now.
573 
574     Output Parameters:
575     I don't really know what these things do now.
576 
577     Returns:
578     void
579 
580     Notes:
581     This code is very complicated and could be done much better.
582 
583 +*/
BSilu_updt_matrix(BSpar_mat * A,int colnum,BStree rowtree,BStree inode_tree,int len,int num_cols,int * updt_index,FLOAT * updt_inode,int len2,int * my_inode_ind,FLOAT * my_inode,int start,int end,BScl_2_inode * clique2inode,BSinode * inodes,BSprocinfo * procinfo)584 void BSilu_updt_matrix(BSpar_mat *A, int colnum, BStree rowtree,
585 BStree inode_tree, int len, int num_cols, int *updt_index,
586 FLOAT *updt_inode, int len2, int *my_inode_ind, FLOAT *my_inode,
587 int start, int end, BScl_2_inode *clique2inode, BSinode *inodes,
588 BSprocinfo *procinfo)
589 {
590   int  i, j;
591   int  updt_count, offset, toffset;
592   int  cl_ind, in_ind;
593   BSpermutation *iperm;
594   FLOAT  *work, *matrix;
595   int  *intptr;
596   FLOAT  *nzptr;
597   int  tcount, count2, gnum, size;
598   int  found;
599   BStree_ptr  tree_node_ptr;
600   int  *tree_data_ptr;
601 
602   int        k, l, *rowptr, *row_gnum, *g_iperm;
603   BStree_ptr node_ptr;
604   col_list   *col_list_ptr;
605   FLOAT      one = 1.0, none = -1.0;
606   char       TR = 'T', NTR = 'N';
607 
608   /* sort the updt_inode */
609   iperm = BSalloc_permutation(len); CHKERR(0);
610   for (i=0;i<len;i++) iperm->perm[i] = i;
611   BSheap_sort1(len,updt_index,iperm->perm); CHKERR(0);
612   MY_MALLOC(work,(FLOAT *),sizeof(FLOAT)*len,1);
613   for (i=0;i<num_cols;i++) {
614     BSiperm_dvec(&(updt_inode[i*len]),work,iperm); CHKERR(0);
615     for (j=0;j<len;j++) updt_inode[(i*len)+j] = work[j];
616   }
617   MY_FREE(work);
618   BSfree_permutation(iperm); CHKERR(0);
619 
620   row_gnum = A->global_row_num->numbers;
621   g_iperm = A->inv_perm->perm;
622 
623   updt_count = 0;
624   while (updt_count < len) {
625     MY_SEARCH_TREE(inode_tree,updt_index[updt_count],found,tree_node_ptr);
626     if (found) {
627       tree_data_ptr = (int *) MY_GET_TREE_DATA(tree_node_ptr);
628       in_ind = tree_data_ptr[0];
629       cl_ind = tree_data_ptr[1];
630 
631       /* make sure the nz are below pivot (ILU) */
632       if (clique2inode->g_offset[cl_ind] > colnum) {
633         /* update the inode */
634         tcount = 0;
635         count2 = 0;
636         MY_SEARCH_TREE(rowtree,updt_index[updt_count],found,node_ptr);
637 	if (found) {
638 	  col_list_ptr = (col_list *) MY_GET_TREE_DATA(node_ptr);
639 	  intptr = col_list_ptr->col;
640           while ((tcount<len2) && (count2<col_list_ptr->count)) {
641             if ((my_inode_ind[tcount] == intptr[count2]) &
642 		(intptr[count2] > colnum)) {
643               MY_SEARCH_TREE(inode_tree,intptr[count2],found,tree_node_ptr);
644 	      tree_data_ptr = (int *) MY_GET_TREE_DATA(tree_node_ptr);
645               i = tree_data_ptr[0];
646 	      nzptr  = inodes[i].nz;
647 	      rowptr = inodes[i].row_num;
648 	      /* search the row that we are going to modify (ILU) */
649 	      for (j=0; j<inodes[i].length; j++)
650 		if (row_gnum[g_iperm[rowptr[j]]] == updt_index[updt_count])
651 		  break;
652 
653               /* usual Gaussian elimination step (ILU) */
654 	      if (j<inodes[i].length)
655 		DGEMM(&NTR,&TR,&(inodes[in_ind].num_cols),&(inodes[i].num_cols),
656 		     &num_cols,&none,&(updt_inode[updt_count]),&len,
657 		     &(my_inode[tcount]),&len2,&one,&(nzptr[j]),
658 		     &(inodes[i].length));
659 
660               MLOG_flop(2*num_cols*inodes[in_ind].num_cols);
661               tcount += inodes[i].num_cols;
662               count2 += inodes[i].num_cols;
663             } else if (my_inode_ind[tcount] < intptr[count2]) {
664               tcount++;
665             } else {
666               count2++;
667             }
668           }
669           /* update a portion of the clique if it is local */
670           gnum = clique2inode->g_offset[cl_ind];
671           offset = updt_index[updt_count]-gnum;
672           matrix = clique2inode->d_mats[cl_ind].matrix;
673           size = clique2inode->d_mats[cl_ind].size;
674 	  for (j=0; j<len2; j++)
675 	    if (my_inode_ind[j] == updt_index[updt_count]) break;
676           if (j==len2) {
677 	    printf("Error: The matrix is not structurally symmetric!\n");
678           }
679           for (k=0; k<inodes[in_ind].num_cols; k++) {
680 	    tcount = updt_count;
681 	    toffset = updt_index[updt_count+k]-gnum;
682 	    if (toffset < size) {
683 	      while ((tcount < len) & (updt_index[tcount]-gnum < size)) {
684 		for (l=0; l<num_cols; l++) {
685 		  matrix[toffset*size+offset+tcount-updt_count] -=
686 		    my_inode[l*len2+j+k]*updt_inode[l*len+tcount];
687                 }
688                 MLOG_flop(2*num_cols);
689 	        tcount++;
690               }
691             }
692           }
693 	  /* end of update clique */
694         }
695       }
696       updt_count += inodes[in_ind].num_cols;
697     } else {
698       updt_count++;
699     }
700   }
701 }
702