1 /******************************************************************************
2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4 *
5 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6 ******************************************************************************/
7
8 /******************************************************************************
9 *
10 * HYPRE_DDILUT interface
11 *
12 *****************************************************************************/
13
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <math.h>
17
18 #include "utilities/_hypre_utilities.h"
19 #include "HYPRE.h"
20 #include "IJ_mv/HYPRE_IJ_mv.h"
21 #include "parcsr_mv/HYPRE_parcsr_mv.h"
22 #include "parcsr_mv/_hypre_parcsr_mv.h"
23 #include "parcsr_ls/HYPRE_parcsr_ls.h"
24 #include "HYPRE_MHMatrix.h"
25
26 #ifdef HAVE_ML
27
28 #include "ml_struct.h"
29 #include "ml_aggregate.h"
30
31 #endif
32
33 #include "HYPRE_MHMatrix.h"
34 #include "HYPRE_FEI.h"
35
36 extern int HYPRE_LSI_MLConstructMHMatrix(HYPRE_ParCSRMatrix,MH_Matrix *,
37 MPI_Comm, int *, MH_Context *);
38 extern int HYPRE_LSI_DDIlutGetRowLengths(MH_Matrix *,int *, int **,MPI_Comm);
39 extern int HYPRE_LSI_DDIlutGetOffProcRows(MH_Matrix *Amat, int leng, int *,
40 int Noffset, int *map, int *map2, int **int_buf,
41 double **dble_buf, MPI_Comm mpi_comm);
42 extern int HYPRE_LSI_DDIlutDecompose(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat,
43 int total_recv_leng, int *recv_lengths, int *ext_ja,
44 double *ext_aa, int *map, int *map2, int Noffset);
45 extern int HYPRE_LSI_DDIlutDecompose2(HYPRE_LSI_DDIlut *ilut_ptr,
46 MH_Matrix *Amat,int total_recv_leng, int *recv_lengths,
47 int *ext_ja, double *ext_aa, int *map, int *map2, int Noffset);
48 extern void HYPRE_LSI_qsort1a(int *, int *, int, int);
49 extern void hypre_qsort0(int *, int, int);
50 extern int HYPRE_LSI_SplitDSort(double*,int,int*,int);
51 extern int MH_ExchBdry(double *, void *);
52 extern int MH_ExchBdryBack(double *, void *, int *, double **, int **);
53 extern int MH_GetRow(void *, int, int *, int, int *, double *, int *);
54 extern int HYPRE_LSI_Cuthill(int, int *, int *, double *, int *, int *);
55 extern int HYPRE_LSI_Search(int *, int, int);
56
57 #define habs(x) ((x) > 0 ? (x) : -(x))
58
59 /*--------------------------------------------------------------------------
60 * HYPRE_LSI_DDIlutCreate - Return a DDIlut preconditioner object "solver".
61 *--------------------------------------------------------------------------*/
62
HYPRE_LSI_DDIlutCreate(MPI_Comm comm,HYPRE_Solver * solver)63 int HYPRE_LSI_DDIlutCreate( MPI_Comm comm, HYPRE_Solver *solver )
64 {
65 HYPRE_LSI_DDIlut *ilut_ptr;
66
67 ilut_ptr = hypre_TAlloc(HYPRE_LSI_DDIlut, 1, HYPRE_MEMORY_HOST);
68
69 if (ilut_ptr == NULL) return 1;
70
71 ilut_ptr->comm = comm;
72 ilut_ptr->mh_mat = NULL;
73 ilut_ptr->fillin = 0.0;
74 ilut_ptr->thresh = 0.0; /* defaults */
75 ilut_ptr->mat_ia = NULL;
76 ilut_ptr->mat_ja = NULL;
77 ilut_ptr->mat_aa = NULL;
78 ilut_ptr->outputLevel = 0;
79 ilut_ptr->overlap = 0;
80 ilut_ptr->order_array = NULL;
81 ilut_ptr->reorder_array = NULL;
82 ilut_ptr->reorder = 0;
83
84 *solver = (HYPRE_Solver) ilut_ptr;
85
86 return 0;
87 }
88
89 /*--------------------------------------------------------------------------
90 * HYPRE_LSI_DDIlutDestroy - Destroy a DDIlut object.
91 *--------------------------------------------------------------------------*/
92
HYPRE_LSI_DDIlutDestroy(HYPRE_Solver solver)93 int HYPRE_LSI_DDIlutDestroy( HYPRE_Solver solver )
94 {
95 int i;
96 HYPRE_LSI_DDIlut *ilut_ptr;
97
98 ilut_ptr = (HYPRE_LSI_DDIlut *) solver;
99 hypre_TFree(ilut_ptr->mat_ia, HYPRE_MEMORY_HOST);
100 hypre_TFree(ilut_ptr->mat_ja, HYPRE_MEMORY_HOST);
101 hypre_TFree(ilut_ptr->mat_aa, HYPRE_MEMORY_HOST);
102 if ( ilut_ptr->mh_mat != NULL )
103 {
104 hypre_TFree(ilut_ptr->mh_mat->sendProc, HYPRE_MEMORY_HOST);
105 hypre_TFree(ilut_ptr->mh_mat->sendLeng, HYPRE_MEMORY_HOST);
106 hypre_TFree(ilut_ptr->mh_mat->recvProc, HYPRE_MEMORY_HOST);
107 hypre_TFree(ilut_ptr->mh_mat->recvLeng, HYPRE_MEMORY_HOST);
108 for ( i = 0; i < ilut_ptr->mh_mat->sendProcCnt; i++ )
109 hypre_TFree(ilut_ptr->mh_mat->sendList[i], HYPRE_MEMORY_HOST);
110 hypre_TFree(ilut_ptr->mh_mat->sendList, HYPRE_MEMORY_HOST);
111 hypre_TFree(ilut_ptr->mh_mat, HYPRE_MEMORY_HOST);
112 }
113 ilut_ptr->mh_mat = NULL;
114 hypre_TFree(ilut_ptr->order_array, HYPRE_MEMORY_HOST);
115 hypre_TFree(ilut_ptr->reorder_array, HYPRE_MEMORY_HOST);
116 hypre_TFree(ilut_ptr, HYPRE_MEMORY_HOST);
117
118 return 0;
119 }
120
121 /*--------------------------------------------------------------------------
122 * HYPRE_LSI_DDIlutSetFillin - Set the fill-in parameter.
123 *--------------------------------------------------------------------------*/
124
HYPRE_LSI_DDIlutSetFillin(HYPRE_Solver solver,double fillin)125 int HYPRE_LSI_DDIlutSetFillin(HYPRE_Solver solver, double fillin)
126 {
127 HYPRE_LSI_DDIlut *ilut_ptr = (HYPRE_LSI_DDIlut *) solver;
128
129 ilut_ptr->fillin = fillin;
130
131 return 0;
132 }
133
134 /*--------------------------------------------------------------------------
135 * HYPRE_LSI_DDIlutSetDropTolerance - Set the threshold for dropping
136 *--------------------------------------------------------------------------*/
137
HYPRE_LSI_DDIlutSetDropTolerance(HYPRE_Solver solver,double thresh)138 int HYPRE_LSI_DDIlutSetDropTolerance(HYPRE_Solver solver, double thresh)
139 {
140 HYPRE_LSI_DDIlut *ilut_ptr = (HYPRE_LSI_DDIlut *) solver;
141
142 ilut_ptr->thresh = thresh;
143
144 return 0;
145 }
146
147 /*--------------------------------------------------------------------------
148 * HYPRE_LSI_DDIlutSetOverlap - turn on overlap
149 *--------------------------------------------------------------------------*/
150
HYPRE_LSI_DDIlutSetOverlap(HYPRE_Solver solver)151 int HYPRE_LSI_DDIlutSetOverlap(HYPRE_Solver solver)
152 {
153 HYPRE_LSI_DDIlut *ilut_ptr = (HYPRE_LSI_DDIlut *) solver;
154
155 ilut_ptr->overlap = 1;
156
157 return 0;
158 }
159
160 /*--------------------------------------------------------------------------
161 * HYPRE_LSI_DDIlutSetReorder - turn on reordering
162 *--------------------------------------------------------------------------*/
163
HYPRE_LSI_DDIlutSetReorder(HYPRE_Solver solver)164 int HYPRE_LSI_DDIlutSetReorder(HYPRE_Solver solver)
165 {
166 HYPRE_LSI_DDIlut *ilut_ptr = (HYPRE_LSI_DDIlut *) solver;
167
168 ilut_ptr->reorder = 1;
169
170 return 0;
171 }
172
173 /*--------------------------------------------------------------------------
174 * HYPRE_LSI_DDIlutSetOutputLevel - Set debug level
175 *--------------------------------------------------------------------------*/
176
HYPRE_LSI_DDIlutSetOutputLevel(HYPRE_Solver solver,int level)177 int HYPRE_LSI_DDIlutSetOutputLevel(HYPRE_Solver solver, int level)
178 {
179 HYPRE_LSI_DDIlut *ilut_ptr = (HYPRE_LSI_DDIlut *) solver;
180
181 ilut_ptr->outputLevel = level;
182
183 return 0;
184 }
185
186 /*--------------------------------------------------------------------------
187 * HYPRE_LSI_DDIlutSolve - Solve function for DDILUT.
188 *--------------------------------------------------------------------------*/
189
HYPRE_LSI_DDIlutSolve(HYPRE_Solver solver,HYPRE_ParCSRMatrix A,HYPRE_ParVector b,HYPRE_ParVector x)190 int HYPRE_LSI_DDIlutSolve( HYPRE_Solver solver, HYPRE_ParCSRMatrix A,
191 HYPRE_ParVector b, HYPRE_ParVector x )
192 {
193 int i, j, *idiag, Nrows, extNrows, *mat_ia, *mat_ja;
194 int column, *order_list, *reorder_list, order_flag;
195 double *rhs, *soln, *dbuffer, ddata, *mat_aa;
196 HYPRE_LSI_DDIlut *ilut_ptr = (HYPRE_LSI_DDIlut *) solver;
197 MH_Context *context;
198 MPI_Comm mpi_comm;
199
200 rhs = hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector *) b));
201 soln = hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector *) x));
202
203 Nrows = ilut_ptr->Nrows;
204 extNrows = ilut_ptr->extNrows;
205 mat_ia = ilut_ptr->mat_ia;
206 mat_ja = ilut_ptr->mat_ja;
207 mat_aa = ilut_ptr->mat_aa;
208 order_list = ilut_ptr->order_array;
209 reorder_list = ilut_ptr->reorder_array;
210 order_flag = ilut_ptr->reorder;
211
212 dbuffer = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
213 idiag = hypre_TAlloc(int, extNrows , HYPRE_MEMORY_HOST);
214 for ( i = 0; i < Nrows; i++ ) dbuffer[i] = rhs[i];
215
216 HYPRE_ParCSRMatrixGetComm(A, &mpi_comm);
217 context = hypre_TAlloc(MH_Context, 1, HYPRE_MEMORY_HOST);
218 context->Amat = ilut_ptr->mh_mat;
219 context->comm = mpi_comm;
220
221 if ( extNrows > Nrows ) MH_ExchBdry(dbuffer, context);
222 if ( order_flag )
223 for ( i = 0; i < Nrows; i++ ) dbuffer[i] = rhs[order_list[i]];
224 else
225 for ( i = 0; i < Nrows; i++ ) dbuffer[i] = rhs[i];
226
227 for ( i = 0; i < extNrows; i++ )
228 {
229 ddata = 0.0;
230 for ( j = mat_ia[i]; j < mat_ia[i+1]; j++ )
231 {
232 column = mat_ja[j];
233 if ( column == i ) { idiag[i] = j; break;}
234 ddata += mat_aa[j] * dbuffer[column];
235 }
236 dbuffer[i] -= ddata;
237 }
238 for ( i = extNrows-1; i >= 0; i-- )
239 {
240 ddata = 0.0;
241 for ( j = idiag[i]+1; j < mat_ia[i+1]; j++ )
242 {
243 column = mat_ja[j];
244 ddata += mat_aa[j] * dbuffer[column];
245 }
246 dbuffer[i] -= ddata;
247 dbuffer[i] /= mat_aa[idiag[i]];
248 }
249 if ( order_flag )
250 for ( i = 0; i < Nrows; i++ ) soln[i] = dbuffer[reorder_list[i]];
251 else
252 for ( i = 0; i < Nrows; i++ ) soln[i] = dbuffer[i];
253 hypre_TFree(dbuffer, HYPRE_MEMORY_HOST);
254 hypre_TFree(idiag, HYPRE_MEMORY_HOST);
255 hypre_TFree(context, HYPRE_MEMORY_HOST);
256
257 return 0;
258 }
259
260 /*--------------------------------------------------------------------------
261 * HYPRE_LSI_DDIlutSetup - Set up function for LSI_DDIlut.
262 *--------------------------------------------------------------------------*/
263
HYPRE_LSI_DDIlutSetup(HYPRE_Solver solver,HYPRE_ParCSRMatrix A_csr,HYPRE_ParVector b,HYPRE_ParVector x)264 int HYPRE_LSI_DDIlutSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr,
265 HYPRE_ParVector b, HYPRE_ParVector x )
266 {
267 int i, j, offset, total_recv_leng, *recv_lengths=NULL;
268 int *int_buf=NULL, mypid, nprocs, *parray;
269 int *map=NULL, *map2=NULL, *row_partition=NULL,*parray2;
270 double *dble_buf=NULL;
271 HYPRE_LSI_DDIlut *ilut_ptr = (HYPRE_LSI_DDIlut *) solver;
272 MH_Context *context=NULL;
273 MH_Matrix *mh_mat=NULL;
274 MPI_Comm mpi_comm;
275
276 /* ---------------------------------------------------------------- */
277 /* get the row information in my processors */
278 /* ---------------------------------------------------------------- */
279
280 HYPRE_ParCSRMatrixGetComm(A_csr, &mpi_comm);
281 MPI_Comm_rank(mpi_comm, &mypid);
282 MPI_Comm_size(mpi_comm, &nprocs);
283 HYPRE_ParCSRMatrixGetRowPartitioning(A_csr, &row_partition);
284
285 /* ---------------------------------------------------------------- */
286 /* convert the incoming CSR matrix into a MH matrix */
287 /* ---------------------------------------------------------------- */
288
289 context = hypre_TAlloc(MH_Context, 1, HYPRE_MEMORY_HOST);
290 context->comm = mpi_comm;
291 context->globalEqns = row_partition[nprocs];
292 context->partition = hypre_TAlloc(int, (nprocs+1), HYPRE_MEMORY_HOST);
293 for (i=0; i<=nprocs; i++) context->partition[i] = row_partition[i];
294 hypre_TFree( row_partition , HYPRE_MEMORY_HOST);
295 mh_mat = hypre_TAlloc( MH_Matrix, 1, HYPRE_MEMORY_HOST);
296 context->Amat = mh_mat;
297 HYPRE_LSI_MLConstructMHMatrix(A_csr,mh_mat,mpi_comm,
298 context->partition,context);
299
300 /* ---------------------------------------------------------------- */
301 /* compose the enlarged overlapped local matrix */
302 /* ---------------------------------------------------------------- */
303
304 if ( ilut_ptr->overlap != 0 )
305 {
306 HYPRE_LSI_DDIlutComposeOverlappedMatrix(mh_mat, &total_recv_leng,
307 &recv_lengths, &int_buf, &dble_buf, &map, &map2,&offset,
308 mpi_comm);
309 }
310 else
311 {
312 total_recv_leng = 0;
313 recv_lengths = NULL;
314 int_buf = NULL;
315 dble_buf = NULL;
316 map = NULL;
317 map2 = NULL;
318 parray = hypre_TAlloc(int, nprocs , HYPRE_MEMORY_HOST);
319 parray2 = hypre_TAlloc(int, nprocs , HYPRE_MEMORY_HOST);
320 for ( i = 0; i < nprocs; i++ ) parray2[i] = 0;
321 parray2[mypid] = mh_mat->Nrows;
322 MPI_Allreduce(parray2,parray,nprocs,MPI_INT,MPI_SUM,mpi_comm);
323 offset = 0;
324 for (i = 0; i < mypid; i++) offset += parray[i];
325 hypre_TFree(parray, HYPRE_MEMORY_HOST);
326 hypre_TFree(parray2, HYPRE_MEMORY_HOST);
327 }
328
329 /* ---------------------------------------------------------------- */
330 /* perform ILUT decomposition on local matrix */
331 /* ---------------------------------------------------------------- */
332
333 if ( ilut_ptr->mat_ia == NULL )
334 HYPRE_LSI_DDIlutDecompose(ilut_ptr,mh_mat,total_recv_leng,recv_lengths,
335 int_buf, dble_buf, map,map2, offset);
336 else
337 {
338 HYPRE_LSI_DDIlutDecompose2(ilut_ptr,mh_mat,total_recv_leng,recv_lengths,
339 int_buf, dble_buf, map,map2, offset);
340 if ( mypid == 0 && ilut_ptr->outputLevel >= 1 )
341 printf("DDILUT : preconditioner pattern reused.\n");
342 }
343 if ( mypid == 0 && ilut_ptr->outputLevel > 2 )
344 {
345 for ( i = 0; i < ilut_ptr->extNrows; i++ )
346 for ( j = ilut_ptr->mat_ia[i]; j < ilut_ptr->mat_ia[i+1]; j++ )
347 printf("LA(%d,%d) = %e;\n", i+1, ilut_ptr->mat_ja[j]+1,
348 ilut_ptr->mat_aa[j]);
349 }
350
351 ilut_ptr->mh_mat = mh_mat;
352 hypre_TFree(mh_mat->rowptr, HYPRE_MEMORY_HOST);
353 hypre_TFree(mh_mat->colnum, HYPRE_MEMORY_HOST);
354 hypre_TFree(mh_mat->values, HYPRE_MEMORY_HOST);
355 hypre_TFree(map, HYPRE_MEMORY_HOST);
356 hypre_TFree(map2, HYPRE_MEMORY_HOST);
357 hypre_TFree(int_buf, HYPRE_MEMORY_HOST);
358 hypre_TFree(dble_buf , HYPRE_MEMORY_HOST);
359 hypre_TFree(recv_lengths, HYPRE_MEMORY_HOST);
360 hypre_TFree(context->partition, HYPRE_MEMORY_HOST);
361 hypre_TFree(context, HYPRE_MEMORY_HOST);
362 return 0;
363 }
364
365 /*****************************************************************************/
366 /* subroutines used for constructing overlapped matrix */
367 /*****************************************************************************/
368
HYPRE_LSI_DDIlutGetRowLengths(MH_Matrix * Amat,int * leng,int ** recv_leng,MPI_Comm mpi_comm)369 int HYPRE_LSI_DDIlutGetRowLengths(MH_Matrix *Amat, int *leng, int **recv_leng,
370 MPI_Comm mpi_comm)
371 {
372 int i, j, m, mypid, index, *temp_list, allocated_space, length;
373 int nRecv, *recvProc, *recvLeng, *cols, total_recv, mtype, msgtype;
374 int nSend, *sendProc, *sendLeng, **sendList, proc_id, offset;
375 double *vals;
376 MPI_Request *Request;
377 MPI_Status status;
378 MH_Context *context;
379
380 /* ---------------------------------------------------------------- */
381 /* fetch communication information */
382 /* ---------------------------------------------------------------- */
383
384 MPI_Comm_rank(mpi_comm, &mypid);
385 nRecv = Amat->recvProcCnt;
386 nSend = Amat->sendProcCnt;
387 recvProc = Amat->recvProc;
388 recvLeng = Amat->recvLeng;
389 sendProc = Amat->sendProc;
390 sendLeng = Amat->sendLeng;
391 sendList = Amat->sendList;
392 total_recv = 0;
393 for ( i = 0; i < nRecv; i++ ) total_recv += recvLeng[i];
394
395 (*leng) = total_recv;
396 if ( nRecv <= 0 ) (*recv_leng) = NULL;
397
398 MPI_Barrier(mpi_comm);
399
400 /* ---------------------------------------------------------------- */
401 /* post receives for all messages */
402 /* ---------------------------------------------------------------- */
403
404 (*recv_leng) = hypre_TAlloc(int, total_recv , HYPRE_MEMORY_HOST);
405 if (nRecv > 0) Request = hypre_TAlloc(MPI_Request, nRecv, HYPRE_MEMORY_HOST);
406 offset = 0;
407 mtype = 2001;
408 for (i = 0; i < nRecv; i++)
409 {
410 proc_id = recvProc[i];
411 msgtype = mtype;
412 length = recvLeng[i];
413 MPI_Irecv((void *) &((*recv_leng)[offset]), length, MPI_INT, proc_id,
414 msgtype, mpi_comm, &Request[i]);
415 offset += length;
416 }
417
418 /* ---------------------------------------------------------------- */
419 /* write out all messages */
420 /* ---------------------------------------------------------------- */
421
422 context = hypre_TAlloc(MH_Context, 1, HYPRE_MEMORY_HOST);
423 context->Amat = Amat;
424 allocated_space = 100;
425 cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
426 vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
427 for (i = 0; i < nSend; i++)
428 {
429 proc_id = sendProc[i];
430 length = sendLeng[i];
431 temp_list = hypre_TAlloc(int, sendLeng[i] , HYPRE_MEMORY_HOST);
432 for (j = 0; j < length; j++)
433 {
434 index = sendList[i][j];
435 while (MH_GetRow(context,1,&index,allocated_space,cols,vals,&m)==0)
436 {
437 hypre_TFree(cols, HYPRE_MEMORY_HOST);
438 hypre_TFree(vals, HYPRE_MEMORY_HOST);
439 allocated_space += 200 + 1;
440 cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
441 vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
442 }
443 temp_list[j] = m;
444 }
445 msgtype = mtype;
446 MPI_Send((void*)temp_list,length,MPI_INT,proc_id,msgtype,mpi_comm);
447 hypre_TFree(temp_list, HYPRE_MEMORY_HOST);
448 }
449 hypre_TFree(cols, HYPRE_MEMORY_HOST);
450 hypre_TFree(vals, HYPRE_MEMORY_HOST);
451 hypre_TFree(context, HYPRE_MEMORY_HOST);
452
453 /* ---------------------------------------------------------------- */
454 /* wait for messages */
455 /* ---------------------------------------------------------------- */
456
457 for ( i = 0; i < nRecv; i++ )
458 {
459 MPI_Wait( &Request[i], &status );
460 }
461
462 if (nRecv > 0)
463 hypre_TFree(Request, HYPRE_MEMORY_HOST);
464 return 0;
465 }
466
467 /*****************************************************************************/
468 /* needed for overlapped smoothers */
469 /*****************************************************************************/
470
HYPRE_LSI_DDIlutGetOffProcRows(MH_Matrix * Amat,int leng,int * recv_leng,int Noffset,int * map,int * map2,int ** int_buf,double ** dble_buf,MPI_Comm mpi_comm)471 int HYPRE_LSI_DDIlutGetOffProcRows(MH_Matrix *Amat, int leng, int *recv_leng,
472 int Noffset, int *map, int *map2, int **int_buf,
473 double **dble_buf, MPI_Comm mpi_comm)
474 {
475 int i, j, k, m, length, offset, allocated_space, proc_id;
476 int nRecv, nSend, *recvProc, *sendProc, total_recv, mtype, msgtype;
477 int *sendLeng, *recvLeng, **sendList, *cols, *isend_buf, Nrows;
478 int nnz, nnz_offset, index, mypid;
479 double *vals, *send_buf;
480 MPI_Request *request;
481 MPI_Status status;
482 MH_Context *context;
483
484 /* ---------------------------------------------------------------- */
485 /* fetch communication information */
486 /* ---------------------------------------------------------------- */
487
488 MPI_Comm_rank(mpi_comm, &mypid);
489 Nrows = Amat->Nrows;
490 nRecv = Amat->recvProcCnt;
491 nSend = Amat->sendProcCnt;
492 recvProc = Amat->recvProc;
493 recvLeng = Amat->recvLeng;
494 sendProc = Amat->sendProc;
495 sendLeng = Amat->sendLeng;
496 sendList = Amat->sendList;
497 if ( nRecv <= 0 ) { (*int_buf) = NULL; (*dble_buf) = NULL;}
498 total_recv = 0;
499 for ( i = 0; i < leng; i++ ) total_recv += recv_leng[i];
500
501 /* ---------------------------------------------------------------- */
502 /* allocate buffer space */
503 /* ---------------------------------------------------------------- */
504
505 if ( nRecv > 0 )
506 request = hypre_TAlloc(MPI_Request , nRecv, HYPRE_MEMORY_HOST);
507 else request = NULL;
508
509 if ( total_recv > 0 )
510 {
511 (*int_buf) = hypre_TAlloc(int, total_recv , HYPRE_MEMORY_HOST);
512 (*dble_buf) = hypre_TAlloc(double, total_recv , HYPRE_MEMORY_HOST);
513 }
514
515 /* ---------------------------------------------------------------- */
516 /* post receives for all messages */
517 /* ---------------------------------------------------------------- */
518
519 offset = 0;
520 mtype = 2002;
521 nnz_offset = 0;
522 for (i = 0; i < nRecv; i++)
523 {
524 proc_id = recvProc[i];
525 msgtype = mtype;
526 length = recvLeng[i];
527 nnz = 0;
528 for (j = 0; j < length; j++) nnz += recv_leng[offset+j];
529
530 MPI_Irecv((void *) &((*dble_buf)[nnz_offset]), nnz, MPI_DOUBLE,
531 proc_id, msgtype, mpi_comm, request+i);
532 offset += length;
533 nnz_offset += nnz;
534 }
535
536 /* ---------------------------------------------------------------- */
537 /* send rows to other processors */
538 /* ---------------------------------------------------------------- */
539
540 context = hypre_TAlloc(MH_Context, 1, HYPRE_MEMORY_HOST);
541 context->Amat = Amat;
542 mtype = 2002;
543 allocated_space = 100;
544 cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
545 vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
546 for (i = 0; i < nSend; i++)
547 {
548 proc_id = sendProc[i];
549 length = sendLeng[i];
550 nnz = 0;
551 for (j = 0; j < length; j++)
552 {
553 index = sendList[i][j];
554 while (MH_GetRow(context,1,&index,allocated_space,cols,vals,&m)==0)
555 {
556 hypre_TFree(cols, HYPRE_MEMORY_HOST);
557 hypre_TFree(vals, HYPRE_MEMORY_HOST);
558 allocated_space += 200 + 1;
559 cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
560 vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
561 }
562 nnz += m;
563 }
564 if ( nnz > 0 ) send_buf = hypre_TAlloc(double, nnz , HYPRE_MEMORY_HOST);
565 offset = 0;
566 for (j = 0; j < length; j++)
567 {
568 index = sendList[i][j];
569 MH_GetRow(context,1,&index,allocated_space,cols,vals,&m);
570 for (k = 0; k < m; k++) send_buf[offset+k] = vals[k];
571 offset += m;
572 }
573 msgtype = mtype;
574 MPI_Send((void*) send_buf, nnz, MPI_DOUBLE, proc_id, msgtype,
575 mpi_comm);
576 if ( nnz > 0 )
577 hypre_TFree(send_buf, HYPRE_MEMORY_HOST);
578 }
579
580 hypre_TFree(cols, HYPRE_MEMORY_HOST);
581 hypre_TFree(vals, HYPRE_MEMORY_HOST);
582
583 /* ---------------------------------------------------------------- */
584 /* wait for all messages */
585 /* ---------------------------------------------------------------- */
586
587 for (i = 0; i < nRecv; i++)
588 {
589 MPI_Wait(request+i, &status);
590 }
591
592 /* ----------------------------------------------------------- */
593 /* post receives for all messages */
594 /* ----------------------------------------------------------- */
595
596 mtype = 2003;
597 offset = 0;
598 nnz_offset = 0;
599 for (i = 0; i < nRecv; i++)
600 {
601 proc_id = recvProc[i];
602 msgtype = mtype;
603 length = recvLeng[i];
604 nnz = 0;
605 for (j = 0; j < length; j++) nnz += recv_leng[offset+j];
606 MPI_Irecv((void *) &((*int_buf)[nnz_offset]), nnz, MPI_INT,
607 proc_id, msgtype, mpi_comm, request+i);
608 offset += length;
609 nnz_offset += nnz;
610 }
611
612 /* ---------------------------------------------------------------- */
613 /* send rows to other processors */
614 /* ---------------------------------------------------------------- */
615
616 mtype = 2003;
617 cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
618 vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
619 for (i = 0; i < nSend; i++)
620 {
621 proc_id = sendProc[i];
622 length = sendLeng[i];
623 nnz = 0;
624 for (j = 0; j < length; j++)
625 {
626 index = sendList[i][j];
627 MH_GetRow(context,1,&index,allocated_space,cols,vals,&m);
628 nnz += m;
629 }
630 if ( nnz > 0 ) isend_buf = hypre_TAlloc(int, nnz , HYPRE_MEMORY_HOST);
631 offset = 0;
632 for (j = 0; j < length; j++)
633 {
634 index = sendList[i][j];
635 MH_GetRow(context,1,&index,allocated_space,cols,vals,&m);
636 for (k = 0; k < m; k++)
637 {
638 if ( cols[k] < Nrows ) isend_buf[offset+k] = cols[k] + Noffset;
639 else isend_buf[offset+k] = map[cols[k]-Nrows];
640 }
641 offset += m;
642 }
643 msgtype = mtype;
644 MPI_Send((void*) isend_buf, nnz, MPI_INT, proc_id, msgtype,
645 mpi_comm);
646 if ( nnz > 0 )
647 hypre_TFree(isend_buf, HYPRE_MEMORY_HOST);
648 }
649 hypre_TFree(cols, HYPRE_MEMORY_HOST);
650 hypre_TFree(vals, HYPRE_MEMORY_HOST);
651
652 /* ----------------------------------------------------------- */
653 /* post receives for all messages */
654 /* ----------------------------------------------------------- */
655
656 for (i = 0; i < nRecv; i++)
657 {
658 MPI_Wait(request+i, &status);
659 }
660
661 hypre_TFree(request, HYPRE_MEMORY_HOST);
662 hypre_TFree(context, HYPRE_MEMORY_HOST);
663 return 0;
664 }
665
666 /*****************************************************************************/
667 /* construct an enlarged overlapped local matrix */
668 /*****************************************************************************/
669
HYPRE_LSI_DDIlutComposeOverlappedMatrix(MH_Matrix * mh_mat,int * total_recv_leng,int ** recv_lengths,int ** int_buf,double ** dble_buf,int ** sindex_array,int ** sindex_array2,int * offset,MPI_Comm mpi_comm)670 int HYPRE_LSI_DDIlutComposeOverlappedMatrix(MH_Matrix *mh_mat,
671 int *total_recv_leng, int **recv_lengths, int **int_buf,
672 double **dble_buf, int **sindex_array, int **sindex_array2,
673 int *offset, MPI_Comm mpi_comm)
674 {
675 int i, nprocs, mypid, Nrows, *proc_array, *proc_array2;
676 int extNrows, NrowsOffset, *index_array, *index_array2;
677 int nRecv, *recvLeng;
678 double *dble_array;
679 MH_Context *context;
680
681 /* ---------------------------------------------------------------- */
682 /* fetch communication information */
683 /* ---------------------------------------------------------------- */
684
685 MPI_Comm_rank(mpi_comm, &mypid);
686 MPI_Comm_size(mpi_comm, &nprocs);
687
688 /* ---------------------------------------------------------------- */
689 /* fetch matrix information */
690 /* ---------------------------------------------------------------- */
691
692 nRecv = mh_mat->recvProcCnt;
693 recvLeng = mh_mat->recvLeng;
694 Nrows = mh_mat->Nrows;
695
696 /* ---------------------------------------------------------------- */
697 /* compute the enlarged matrix size */
698 /* ---------------------------------------------------------------- */
699
700 (*total_recv_leng) = 0;
701 for ( i = 0; i < nRecv; i++ ) (*total_recv_leng) += recvLeng[i];
702 extNrows = Nrows + (*total_recv_leng);
703
704 /* ---------------------------------------------------------------- */
705 /* compose NrowsOffset and processor offsets proc_array */
706 /* ---------------------------------------------------------------- */
707
708 proc_array = hypre_TAlloc(int, nprocs , HYPRE_MEMORY_HOST);
709 proc_array2 = hypre_TAlloc(int, nprocs , HYPRE_MEMORY_HOST);
710 for ( i = 0; i < nprocs; i++ ) proc_array2[i] = 0;
711 proc_array2[mypid] = Nrows;
712 MPI_Allreduce(proc_array2,proc_array,nprocs,MPI_INT,MPI_SUM,mpi_comm);
713 NrowsOffset = 0;
714 for (i = 0; i < mypid; i++) NrowsOffset += proc_array[i];
715 for (i = 1; i < nprocs; i++) proc_array[i] += proc_array[i-1];
716 hypre_TFree(proc_array2, HYPRE_MEMORY_HOST);
717
718 /* ---------------------------------------------------------------- */
719 /* compose the column index map (index_array,index_array2) */
720 /* ---------------------------------------------------------------- */
721
722 context = hypre_TAlloc(MH_Context, 1, HYPRE_MEMORY_HOST);
723 context->comm = mpi_comm;
724 context->Amat = mh_mat;
725 dble_array = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
726 for (i = Nrows; i < extNrows; i++) dble_array[i] = 0.0;
727 for (i = 0; i < Nrows; i++) dble_array[i] = 1.0 * ( i + NrowsOffset );
728 MH_ExchBdry(dble_array, context);
729 if ( extNrows-Nrows > 0 )
730 index_array = hypre_TAlloc(int, (extNrows-Nrows) , HYPRE_MEMORY_HOST);
731 else
732 index_array = NULL;
733 for (i = Nrows; i < extNrows; i++) index_array[i-Nrows] = dble_array[i];
734 if ( extNrows-Nrows > 0 )
735 index_array2 = hypre_TAlloc(int, (extNrows-Nrows) , HYPRE_MEMORY_HOST);
736 else
737 index_array2 = NULL;
738 for (i = 0; i < extNrows-Nrows; i++) index_array2[i] = i;
739 hypre_TFree(dble_array, HYPRE_MEMORY_HOST);
740 hypre_TFree(context, HYPRE_MEMORY_HOST);
741
742 /* ---------------------------------------------------------------- */
743 /* send the lengths of each row to remote processor */
744 /* at the end, additional row information should be given */
745 /* in total_recv_leng, recv_lengths, int_buf, dble_buf */
746 /* ---------------------------------------------------------------- */
747
748 HYPRE_LSI_DDIlutGetRowLengths(mh_mat,total_recv_leng,recv_lengths,mpi_comm);
749 HYPRE_LSI_DDIlutGetOffProcRows(mh_mat, *total_recv_leng, *recv_lengths,
750 NrowsOffset,index_array,index_array2,int_buf, dble_buf,mpi_comm);
751
752 hypre_TFree(proc_array, HYPRE_MEMORY_HOST);
753 HYPRE_LSI_qsort1a(index_array, index_array2, 0, extNrows-Nrows-1);
754 (*sindex_array) = index_array;
755 (*sindex_array2) = index_array2;
756 (*offset) = NrowsOffset;
757 return 0;
758 }
759
760 /*****************************************************************************/
761 /* function for doing ILUT decomposition */
762 /* ( based on ILU(0) + ILUT based on magnitude) */
763 /*****************************************************************************/
764
HYPRE_LSI_DDIlutDecompose(HYPRE_LSI_DDIlut * ilut_ptr,MH_Matrix * Amat,int total_recv_leng,int * recv_lengths,int * ext_ja,double * ext_aa,int * map,int * map2,int Noffset)765 int HYPRE_LSI_DDIlutDecompose(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat,
766 int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa,
767 int *map, int *map2, int Noffset)
768 {
769 int *mat_ia, *mat_ja, i, m, allocated_space, *cols, mypid;
770 int index, first, Lcount, Ucount, j, k, total_nnz;
771 int sortcnt, colIndex, offset, nnz_count, Nrows, extNrows;
772 int *track_array, track_leng, num_small_pivot, printstep, nnz_row;
773 int *sortcols, *Amat_ia, *Amat_ja, *order_list, *reorder_list;
774 int max_nnz_row, touch_cnt=0, order_flag;
775 double *vals, ddata, *mat_aa, *diagonal, *rowNorms, *Norm2;
776 double *dble_buf, fillin, tau, rel_tau, *sortvals, *Amat_aa;
777 MH_Context *context;
778
779 /* ---------------------------------------------------------------- */
780 /* fetch ILUT parameters */
781 /* ---------------------------------------------------------------- */
782
783 MPI_Comm_rank(ilut_ptr->comm, &mypid);
784 fillin = ilut_ptr->fillin;
785 tau = ilut_ptr->thresh;
786 Nrows = Amat->Nrows;
787 extNrows = Nrows + total_recv_leng;
788 ilut_ptr->Nrows = Nrows;
789 ilut_ptr->extNrows = extNrows;
790 order_flag = ilut_ptr->reorder;
791
792 /* ---------------------------------------------------------------- */
793 /* allocate temporary storage space */
794 /* ---------------------------------------------------------------- */
795
796 allocated_space = extNrows;
797 cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
798 vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
799 sortcols = hypre_TAlloc(int, extNrows , HYPRE_MEMORY_HOST);
800 sortvals = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
801 dble_buf = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
802 diagonal = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
803 rowNorms = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
804
805 /* ---------------------------------------------------------------- */
806 /* compute the storage requirement for the ILU matrix */
807 /* ---------------------------------------------------------------- */
808
809 context = hypre_TAlloc(MH_Context, 1, HYPRE_MEMORY_HOST);
810 context->Amat = Amat;
811 total_nnz = 0;
812 for ( i = 0; i < Nrows; i++ )
813 {
814 rowNorms[i] = 0.0;
815 while (MH_GetRow(context,1,&i,allocated_space,cols,vals,&m)==0)
816 {
817 hypre_TFree(vals, HYPRE_MEMORY_HOST);
818 hypre_TFree(cols, HYPRE_MEMORY_HOST);
819 allocated_space += 200 + 1;
820 cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
821 vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
822 }
823 total_nnz += m;
824 for ( j = 0; j < m; j++ ) rowNorms[i] += habs(vals[j]);
825 rowNorms[i] /= extNrows;
826 }
827 hypre_TFree(vals, HYPRE_MEMORY_HOST);
828 hypre_TFree(cols, HYPRE_MEMORY_HOST);
829
830 /* ---------------------------------------------------------------- */
831 /* permute the matrix */
832 /* ---------------------------------------------------------------- */
833
834 Amat_ia = hypre_TAlloc(int, (Nrows+1) , HYPRE_MEMORY_HOST);
835 Amat_ja = hypre_TAlloc(int, total_nnz , HYPRE_MEMORY_HOST);
836 Amat_aa = hypre_TAlloc(double, total_nnz , HYPRE_MEMORY_HOST);
837 total_nnz = 0;
838 Amat_ia[0] = total_nnz;
839 for ( i = 0; i < Nrows; i++ )
840 {
841 MH_GetRow(context,1,&i,allocated_space,&Amat_ja[total_nnz],
842 &Amat_aa[total_nnz],&m);
843 total_nnz += m;
844 Amat_ia[i+1] = total_nnz;
845 }
846
847 if ( order_flag )
848 {
849 order_list = hypre_TAlloc(int, Nrows , HYPRE_MEMORY_HOST);
850 reorder_list = hypre_TAlloc(int, Nrows , HYPRE_MEMORY_HOST);
851 for ( i = 0; i < Nrows; i++ ) order_list[i] = reorder_list[i] = i;
852 HYPRE_LSI_Cuthill(Nrows,Amat_ia,Amat_ja,Amat_aa,order_list,reorder_list);
853 ilut_ptr->order_array = order_list;
854 ilut_ptr->reorder_array = reorder_list;
855 Norm2 = hypre_TAlloc(double, Nrows , HYPRE_MEMORY_HOST);
856 for ( i = 0; i < Nrows; i++ ) Norm2[i] = rowNorms[order_list[i]];
857 hypre_TFree(rowNorms, HYPRE_MEMORY_HOST);
858 rowNorms = Norm2;
859 }
860 /*
861 for ( i = 0; i < Nrows; i++ )
862 for ( j = Amat_ia[i]; j < Amat_ia[i+1]; j++ )
863 printf("%10d %10d %25.16e\n", i+1, Amat_ja[j]+1, Amat_aa[j]);
864 */
865
866 /* ---------------------------------------------------------------- */
867 /* allocate space */
868 /* ---------------------------------------------------------------- */
869
870 for ( i = 0; i < total_recv_leng; i++ ) total_nnz += recv_lengths[i];
871 total_nnz = (int) ((double) total_nnz * (fillin + 1.0));
872 ilut_ptr->mat_ia = hypre_TAlloc(int, (extNrows + 1 ) , HYPRE_MEMORY_HOST);
873 ilut_ptr->mat_ja = hypre_TAlloc(int, total_nnz , HYPRE_MEMORY_HOST);
874 ilut_ptr->mat_aa = hypre_TAlloc(double, total_nnz , HYPRE_MEMORY_HOST);
875 mat_ia = ilut_ptr->mat_ia;
876 mat_ja = ilut_ptr->mat_ja;
877 mat_aa = ilut_ptr->mat_aa;
878
879 offset = 0;
880 max_nnz_row = 0;
881 for ( i = 0; i < total_recv_leng; i++ )
882 {
883 rowNorms[i+Nrows] = 0.0;
884 nnz_row = 0;
885 for ( j = offset; j < offset+recv_lengths[i]; j++ )
886 {
887 index = ext_ja[j];
888 if ( index >= Noffset && index < Noffset+Nrows )
889 ext_ja[j] = index - Noffset;
890 else
891 {
892 m = HYPRE_LSI_Search(map, index, extNrows-Nrows);
893 if ( m >= 0 ) ext_ja[j] = map2[m] + Nrows;
894 else ext_ja[j] = -1;
895 }
896 if ( ext_ja[j] != -1 )
897 {
898 rowNorms[i+Nrows] += habs(ext_aa[j]);
899 nnz_row++;
900 }
901 }
902 if ( nnz_row > max_nnz_row ) max_nnz_row = nnz_row;
903 rowNorms[i+Nrows] /= extNrows;
904 offset += recv_lengths[i];
905 }
906
907 /* ---------------------------------------------------------------- */
908 /* process the first Nrows */
909 /* ---------------------------------------------------------------- */
910
911 num_small_pivot = 0;
912 nnz_count = 0;
913 mat_ia[0] = 0;
914 track_array = hypre_TAlloc(int, extNrows , HYPRE_MEMORY_HOST);
915 for ( i = 0; i < extNrows; i++ ) dble_buf[i] = 0.0;
916
917 printstep = extNrows / 10;
918
919 for ( i = 0; i < Nrows; i++ )
920 {
921 if ( i % printstep == 0 && ilut_ptr->outputLevel > 0 )
922 printf("%4d : 0DDILUT Processing row %d(%d)\n",mypid,i,extNrows);
923
924 track_leng = 0;
925 cols = &(Amat_ja[Amat_ia[i]]);
926 vals = &(Amat_aa[Amat_ia[i]]);
927 m = Amat_ia[i+1] - Amat_ia[i];
928
929 for ( j = 0; j < m; j++ )
930 {
931 if ( cols[j] < extNrows )
932 {
933 dble_buf[cols[j]] = vals[j];
934 track_array[track_leng++] = cols[j];
935 }
936 }
937 Lcount = Ucount = first = 0;
938 first = extNrows;
939 for ( j = 0; j < track_leng; j++ )
940 {
941 index = track_array[j];
942 if ( dble_buf[index] != 0 )
943 {
944 if ( index < i ) Lcount++;
945 else if ( index > i ) Ucount++;
946 else if ( index == i ) diagonal[i] = dble_buf[index];
947 if ( index < first ) first = index;
948 }
949 }
950 Lcount = Lcount * fillin;
951 Ucount = Ucount * fillin;
952 rel_tau = tau * rowNorms[i];
953 for ( j = first; j < i; j++ )
954 {
955 if ( habs(dble_buf[j]) > rel_tau )
956 {
957 ddata = dble_buf[j] / diagonal[j];
958 touch_cnt++;
959 for ( k = mat_ia[j]; k < mat_ia[j+1]; k++ )
960 {
961 colIndex = mat_ja[k];
962 if ( colIndex > j )
963 {
964 if ( dble_buf[colIndex] != 0.0 )
965 dble_buf[colIndex] -= (ddata * mat_aa[k]);
966 else
967 {
968 dble_buf[colIndex] = - (ddata * mat_aa[k]);
969 if ( dble_buf[colIndex] != 0.0 )
970 track_array[track_leng++] = colIndex;
971 }
972 }
973 }
974 dble_buf[j] = ddata;
975 }
976 else dble_buf[j] = 0.0;
977 }
978 for ( j = 0; j < m; j++ )
979 {
980 if ( cols[j] < extNrows )
981 {
982 vals[j] = dble_buf[cols[j]];
983 if ( cols[j] != i ) dble_buf[cols[j]] = 0.0;
984 }
985 }
986 sortcnt = 0;
987 for ( j = 0; j < track_leng; j++ )
988 {
989 index = track_array[j];
990 if ( index < i )
991 {
992 if ( dble_buf[index] < -rel_tau )
993 {
994 sortcols[sortcnt] = index;
995 sortvals[sortcnt++] = - dble_buf[index] * rowNorms[index];
996 }
997 else if ( dble_buf[index] > rel_tau )
998 {
999 sortcols[sortcnt] = index;
1000 sortvals[sortcnt++] = dble_buf[index] * rowNorms[index];
1001 }
1002 else dble_buf[index] = 0.0;
1003 }
1004 }
1005 if ( sortcnt > Lcount )
1006 {
1007 HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Lcount);
1008 for ( j = Lcount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0;
1009 }
1010 for ( j = 0; j < m; j++ )
1011 {
1012 if ( cols[j] < i && vals[j] != 0.0 )
1013 {
1014 mat_aa[nnz_count] = vals[j];
1015 mat_ja[nnz_count++] = cols[j];
1016 }
1017 }
1018 for ( j = 0; j < track_leng; j++ )
1019 {
1020 index = track_array[j];
1021 if ( index < i && dble_buf[index] != 0.0 )
1022 {
1023 mat_aa[nnz_count] = dble_buf[index];
1024 mat_ja[nnz_count++] = index;
1025 dble_buf[index] = 0.0;
1026 }
1027 }
1028 diagonal[i] = dble_buf[i];
1029 if ( habs(diagonal[i]) < 1.0e-16 )
1030 {
1031 diagonal[i] = 1.0E-6;
1032 num_small_pivot++;
1033 }
1034 mat_aa[nnz_count] = diagonal[i];
1035 mat_ja[nnz_count++] = i;
1036 sortcnt = 0;
1037 for ( j = 0; j < track_leng; j++ )
1038 {
1039 index = track_array[j];
1040 if ( index > i )
1041 {
1042 if ( dble_buf[index] < -rel_tau )
1043 {
1044 sortcols[sortcnt] = index;
1045 sortvals[sortcnt++] = - dble_buf[index] * rowNorms[index];
1046 }
1047 else if ( dble_buf[index] > rel_tau )
1048 {
1049 sortcols[sortcnt] = index;
1050 sortvals[sortcnt++] = dble_buf[index] * rowNorms[index];
1051 }
1052 else dble_buf[index] = 0.0;
1053 }
1054 }
1055 if ( sortcnt > Ucount )
1056 {
1057 HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Ucount);
1058 for ( j = Ucount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0;
1059 }
1060 for ( j = 0; j < m; j++ )
1061 {
1062 if ( cols[j] > i && vals[j] != 0.0 )
1063 {
1064 mat_aa[nnz_count] = vals[j];
1065 mat_ja[nnz_count++] = cols[j];
1066 }
1067 }
1068 for ( j = 0; j < track_leng; j++ )
1069 {
1070 index = track_array[j];
1071 if ( index > i && dble_buf[index] != 0.0 )
1072 {
1073 mat_aa[nnz_count] = dble_buf[index];
1074 mat_ja[nnz_count++] = index;
1075 dble_buf[index] = 0.0;
1076 }
1077 }
1078 dble_buf[i] = 0.0;
1079 mat_ia[i+1] = nnz_count;
1080 }
1081 hypre_TFree(Amat_ia, HYPRE_MEMORY_HOST);
1082 hypre_TFree(Amat_ja, HYPRE_MEMORY_HOST);
1083 hypre_TFree(Amat_aa, HYPRE_MEMORY_HOST);
1084 printf("touch_cnt = %d\n", touch_cnt);
1085
1086 /* ---------------------------------------------------------------- */
1087 /* process the off-processor rows */
1088 /* ---------------------------------------------------------------- */
1089
1090 offset = 0;
1091 cols = hypre_TAlloc(int, max_nnz_row , HYPRE_MEMORY_HOST);
1092 vals = hypre_TAlloc(double, max_nnz_row , HYPRE_MEMORY_HOST);
1093 for ( i = 0; i < extNrows; i++ ) dble_buf[i] = 0.0;
1094 for ( i = 0; i < total_recv_leng; i++ )
1095 {
1096 if ( (i+Nrows) % printstep == 0 && ilut_ptr->outputLevel > 0 )
1097 printf("%4d : *DDILUT Processing row %d(%d)\n",mypid,i+Nrows,extNrows);
1098
1099 track_leng = m = 0;
1100 for ( j = offset; j < offset+recv_lengths[i]; j++ )
1101 {
1102 if ( ext_ja[j] != -1 )
1103 {
1104 if (order_flag && ext_ja[j] < Nrows) index = reorder_list[ext_ja[j]];
1105 else index = ext_ja[j];
1106 dble_buf[index] = ext_aa[j];
1107 track_array[track_leng++] = index;
1108 cols[m] = index;
1109 vals[m++] = ext_aa[j];
1110 }
1111 }
1112 Lcount = Ucount = 0;
1113 first = extNrows;
1114 for ( j = 0; j < track_leng; j++ )
1115 {
1116 index = track_array[j];
1117 if ( dble_buf[index] != 0 )
1118 {
1119 if ( index < i+Nrows ) Lcount++;
1120 else if ( index > i+Nrows ) Ucount++;
1121 else if ( i+Nrows == index ) diagonal[i+Nrows] = dble_buf[index];
1122 if ( index < first ) first = index;
1123 }
1124 }
1125 Lcount = Lcount * fillin;
1126 Ucount = Ucount * fillin;
1127 rel_tau = tau * rowNorms[i+Nrows];
1128 for ( j = first; j < i+Nrows; j++ )
1129 {
1130 if ( habs(dble_buf[j]) > rel_tau )
1131 {
1132 ddata = dble_buf[j] / diagonal[j];
1133 for ( k = mat_ia[j]; k < mat_ia[j+1]; k++ )
1134 {
1135 colIndex = mat_ja[k];
1136 if ( colIndex > j )
1137 {
1138 if ( dble_buf[colIndex] != 0.0 )
1139 dble_buf[colIndex] -= (ddata * mat_aa[k]);
1140 else
1141 {
1142 dble_buf[colIndex] = - (ddata * mat_aa[k]);
1143 if ( dble_buf[colIndex] != 0.0 )
1144 track_array[track_leng++] = colIndex;
1145 }
1146 }
1147 }
1148 dble_buf[j] = ddata;
1149 }
1150 else dble_buf[j] = 0.0;
1151 }
1152 for ( j = 0; j < m; j++ )
1153 {
1154 if ( cols[j] < extNrows )
1155 {
1156 vals[j] = dble_buf[cols[j]];
1157 if ( cols[j] != i+Nrows ) dble_buf[cols[j]] = 0.0;
1158 }
1159 }
1160 sortcnt = 0;
1161 for ( j = 0; j < track_leng; j++ )
1162 {
1163 index = track_array[j];
1164 if ( index < i+Nrows )
1165 {
1166 if ( dble_buf[index] < -rel_tau )
1167 {
1168 sortcols[sortcnt] = index;
1169 sortvals[sortcnt++] = - dble_buf[index]*rowNorms[index];
1170 }
1171 else if ( dble_buf[index] > rel_tau )
1172 {
1173 sortcols[sortcnt] = index;
1174 sortvals[sortcnt++] = dble_buf[index] * rowNorms[index];
1175 }
1176 else dble_buf[index] = 0.0;
1177 }
1178 }
1179 if ( sortcnt > Lcount )
1180 {
1181 HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Lcount);
1182 for ( j = Lcount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0;
1183 }
1184 for ( j = 0; j < m; j++ )
1185 {
1186 if ( cols[j] < i+Nrows && vals[j] != 0.0 )
1187 {
1188 mat_aa[nnz_count] = vals[j];
1189 mat_ja[nnz_count++] = cols[j];
1190 }
1191 }
1192 for ( j = 0; j < track_leng; j++ )
1193 {
1194 index = track_array[j];
1195 if ( index < i+Nrows && dble_buf[index] != 0.0 )
1196 {
1197 mat_aa[nnz_count] = dble_buf[index];
1198 mat_ja[nnz_count++] = index;
1199 dble_buf[index] = 0.0;
1200 }
1201 }
1202 diagonal[i+Nrows] = dble_buf[i+Nrows];
1203 if ( habs(diagonal[i+Nrows]) < 1.0e-16 )
1204 {
1205 diagonal[i+Nrows] = 1.0E-6;
1206 num_small_pivot++;
1207 }
1208 mat_aa[nnz_count] = diagonal[i+Nrows];
1209 mat_ja[nnz_count++] = i+Nrows;
1210 dble_buf[i+Nrows] = 0.0;
1211 sortcnt = 0;
1212 for ( j = 0; j < track_leng; j++ )
1213 {
1214 index = track_array[j];
1215 if ( index > i+Nrows )
1216 {
1217 if ( dble_buf[index] < -rel_tau )
1218 {
1219 sortcols[sortcnt] = index;
1220 sortvals[sortcnt++] = - dble_buf[index] * rowNorms[index];
1221 }
1222 else if ( dble_buf[index] > rel_tau )
1223 {
1224 sortcols[sortcnt] = index;
1225 sortvals[sortcnt++] = dble_buf[index] * rowNorms[index];
1226 }
1227 else dble_buf[index] = 0.0;
1228 }
1229 }
1230 if ( sortcnt > Ucount )
1231 {
1232 HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Ucount);
1233 for ( j = Ucount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0;
1234 }
1235 for ( j = 0; j < m; j++ )
1236 {
1237 if ( cols[j] > i+Nrows && cols[j] < extNrows && vals[j] != 0.0 )
1238 {
1239 mat_aa[nnz_count] = vals[j];
1240 mat_ja[nnz_count++] = cols[j];
1241 }
1242 }
1243 for ( j = 0; j < track_leng; j++ )
1244 {
1245 index = track_array[j];
1246 if ( index > i+Nrows && dble_buf[index] != 0.0 )
1247 {
1248 mat_aa[nnz_count] = dble_buf[index];
1249 mat_ja[nnz_count++] = index;
1250 dble_buf[index] = 0.0;
1251 }
1252 }
1253 mat_ia[i+Nrows+1] = nnz_count;
1254 offset += recv_lengths[i];
1255 }
1256 if ( nnz_count > total_nnz )
1257 printf("WARNING in ILUTDecomp : memory bound passed.\n");
1258 if ( ilut_ptr->outputLevel > 0 )
1259 {
1260 printf("%4d : DDILUT number of nonzeros = %d\n",mypid,nnz_count);
1261 printf("%4d : DDILUT number of small pivots = %d\n",mypid,num_small_pivot);
1262 }
1263
1264 /* ---------------------------------------------------------- */
1265 /* deallocate temporary storage space */
1266 /* ---------------------------------------------------------- */
1267
1268 hypre_TFree(cols, HYPRE_MEMORY_HOST);
1269 hypre_TFree(vals, HYPRE_MEMORY_HOST);
1270 hypre_TFree(sortcols, HYPRE_MEMORY_HOST);
1271 hypre_TFree(sortvals, HYPRE_MEMORY_HOST);
1272 hypre_TFree(dble_buf, HYPRE_MEMORY_HOST);
1273 hypre_TFree(diagonal, HYPRE_MEMORY_HOST);
1274 hypre_TFree(rowNorms, HYPRE_MEMORY_HOST);
1275 hypre_TFree(context, HYPRE_MEMORY_HOST);
1276 hypre_TFree(track_array, HYPRE_MEMORY_HOST);
1277
1278 return 0;
1279 }
1280
1281 /*****************************************************************************/
1282 /* function for doing ILUT decomposition */
1283 /* (attempted for pattern reuse, but not done yet) */
1284 /*****************************************************************************/
1285
HYPRE_LSI_DDIlutDecompose2(HYPRE_LSI_DDIlut * ilut_ptr,MH_Matrix * Amat,int total_recv_leng,int * recv_lengths,int * ext_ja,double * ext_aa,int * map,int * map2,int Noffset)1286 int HYPRE_LSI_DDIlutDecompose2(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat,
1287 int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa,
1288 int *map, int *map2, int Noffset)
1289 {
1290 int *mat_ia, *mat_ja, i, m, allocated_space, *cols, mypid;
1291 int index, first, Lcount, Ucount, ncnt, j, k, total_nnz;
1292 int sortcnt, colIndex, offset, nnz_count, Nrows, extNrows;
1293 int *track_array, track_leng, num_small_pivot, printstep, ndisc;
1294 int *sortcols;
1295 double *vals, ddata, *mat_aa, *diagonal, *rowNorms;
1296 double *dble_buf, fillin, tau, rel_tau, *sortvals, absval;
1297 MH_Context *context;
1298
1299 /* ---------------------------------------------------------------- */
1300 /* fetch ILUT parameters */
1301 /* ---------------------------------------------------------------- */
1302
1303 MPI_Comm_rank(ilut_ptr->comm, &mypid);
1304 fillin = ilut_ptr->fillin;
1305 tau = ilut_ptr->thresh;
1306 Nrows = Amat->Nrows;
1307 extNrows = Nrows + total_recv_leng;
1308 ilut_ptr->Nrows = Nrows;
1309 ilut_ptr->extNrows = extNrows;
1310
1311 /* ---------------------------------------------------------------- */
1312 /* allocate temporary storage space */
1313 /* ---------------------------------------------------------------- */
1314
1315 allocated_space = extNrows;
1316 cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
1317 vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
1318 sortcols = hypre_TAlloc(int, extNrows , HYPRE_MEMORY_HOST);
1319 sortvals = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
1320 dble_buf = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
1321 diagonal = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
1322 rowNorms = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
1323
1324 /* ---------------------------------------------------------------- */
1325 /* compute the storage requirement for the ILU matrix */
1326 /* ---------------------------------------------------------------- */
1327
1328 context = hypre_TAlloc(MH_Context, 1, HYPRE_MEMORY_HOST);
1329 context->Amat = Amat;
1330 for ( i = 0; i < Nrows; i++ )
1331 {
1332 rowNorms[i] = 0.0;
1333 while (MH_GetRow(context,1,&i,allocated_space,cols,vals,&m)==0)
1334 {
1335 hypre_TFree(vals, HYPRE_MEMORY_HOST);
1336 hypre_TFree(cols, HYPRE_MEMORY_HOST);
1337 allocated_space += 200 + 1;
1338 cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
1339 vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
1340 }
1341 for ( j = 0; j < m; j++ ) rowNorms[i] += habs(vals[j]);
1342 rowNorms[i] /= extNrows;
1343 }
1344 total_nnz = 0;
1345 for ( i = 0; i < total_recv_leng; i++ ) total_nnz += recv_lengths[i];
1346 total_nnz = (int) ((double) total_nnz*(fillin+1.0)) + ilut_ptr->mat_ia[Nrows];
1347 mat_ia = ilut_ptr->mat_ia;
1348 mat_ja = ilut_ptr->mat_ja;
1349 mat_aa = ilut_ptr->mat_aa;
1350 ilut_ptr->mat_ia = hypre_TAlloc(int, (extNrows + 1 ) , HYPRE_MEMORY_HOST);
1351 ilut_ptr->mat_ja = hypre_TAlloc(int, total_nnz , HYPRE_MEMORY_HOST);
1352 ilut_ptr->mat_aa = hypre_TAlloc(double, total_nnz , HYPRE_MEMORY_HOST);
1353
1354 ncnt = 0;
1355 ilut_ptr->mat_ia[0] = 0;
1356 for ( i = 0; i < Nrows; i++ )
1357 {
1358 for ( j = mat_ia[i]; j < mat_ia[i+1]; j++ )
1359 if ( mat_ja[j] >= 0 && mat_ja[j] < extNrows )
1360 ilut_ptr->mat_ja[ncnt++] = mat_ja[j];
1361 ilut_ptr->mat_ia[i+1] = ncnt;
1362 }
1363 hypre_TFree(mat_ia, HYPRE_MEMORY_HOST);
1364 hypre_TFree(mat_ja, HYPRE_MEMORY_HOST);
1365 hypre_TFree(mat_aa, HYPRE_MEMORY_HOST);
1366 mat_ia = ilut_ptr->mat_ia;
1367 mat_ja = ilut_ptr->mat_ja;
1368 mat_aa = ilut_ptr->mat_aa;
1369
1370 /* ---------------------------------------------------------------- */
1371 /* process the first Nrows */
1372 /* ---------------------------------------------------------------- */
1373
1374 num_small_pivot = 0;
1375 mat_ia[0] = 0;
1376 track_array = hypre_TAlloc(int, extNrows , HYPRE_MEMORY_HOST);
1377 for ( i = 0; i < extNrows; i++ ) dble_buf[i] = 0.0;
1378
1379 printstep = extNrows / 10;
1380
1381 ndisc = 0;
1382 for ( i = 0; i < Nrows; i++ )
1383 {
1384 if ( i % printstep == 0 && ilut_ptr->outputLevel > 0 )
1385 printf("%4d : 1DDILUT Processing row %d(%d,%d)\n",mypid,i,extNrows,Nrows);
1386
1387 MH_GetRow(context,1,&i,allocated_space,cols,vals,&m);
1388
1389 /* ------------------------------------------------------------- */
1390 /* load the row into buffer */
1391 /* ------------------------------------------------------------- */
1392
1393 track_leng = 0;
1394 first = extNrows;
1395 for ( j = 0; j < m; j++ )
1396 {
1397 index = cols[j];
1398 if ( index < extNrows )
1399 {
1400 dble_buf[index] = vals[j];
1401 track_array[track_leng++] = index;
1402 }
1403 if ( index < first ) first = index;
1404 }
1405 for ( j = mat_ia[i]; j < mat_ia[i+1]; j++ )
1406 {
1407 index = mat_ja[j];
1408 if ( dble_buf[index] == 0.0 ) track_array[track_leng++] = index;
1409 if ( index < first ) first = index;
1410 }
1411 if ( (mat_ia[i+1]-mat_ia[i]) != track_leng) ndisc++;
1412
1413 /* ------------------------------------------------------------- */
1414 /* perform factorization */
1415 /* ------------------------------------------------------------- */
1416
1417 rel_tau = tau * rowNorms[i];
1418 for ( j = first; j < i; j++ )
1419 {
1420 if ( habs(dble_buf[j]) > rel_tau )
1421 {
1422 ddata = dble_buf[j] / diagonal[j];
1423 for ( k = mat_ia[j]; k < mat_ia[j+1]; k++ )
1424 {
1425 colIndex = mat_ja[k];
1426 if ( colIndex > j )
1427 {
1428 if ( dble_buf[colIndex] != 0.0 )
1429 dble_buf[colIndex] -= (ddata * mat_aa[k]);
1430 else
1431 {
1432 dble_buf[colIndex] = - (ddata * mat_aa[k]);
1433 if ( dble_buf[colIndex] != 0.0 )
1434 track_array[track_leng++] = colIndex;
1435 }
1436 }
1437 }
1438 dble_buf[j] = ddata;
1439 }
1440 else dble_buf[j] = 0.0;
1441 }
1442
1443 diagonal[i] = dble_buf[i];
1444 if ( habs(diagonal[i]) < 1.0e-16 )
1445 {
1446 diagonal[i] = dble_buf[i] = 1.0E-6;
1447 num_small_pivot++;
1448 }
1449 for (j = mat_ia[i]; j < mat_ia[i+1]; j++) mat_aa[j] = dble_buf[mat_ja[j]];
1450 for ( j = 0; j < track_leng; j++ ) dble_buf[track_array[j]] = 0.0;
1451 }
1452 nnz_count = mat_ia[Nrows];
1453 ncnt = 0;
1454 k = 0;
1455 for ( i = 0; i < Nrows; i++ )
1456 {
1457 for ( j = k; j < mat_ia[i+1]; j++ )
1458 {
1459 if ( mat_aa[j] != 0.0 )
1460 {
1461 mat_ja[ncnt] = mat_ja[j];
1462 mat_aa[ncnt++] = mat_aa[j];
1463 }
1464 }
1465 k = mat_ia[i+1];
1466 mat_ia[i+1] = ncnt;
1467 }
1468 if ( ilut_ptr->outputLevel > 0 )
1469 {
1470 printf("%4d : DDILUT after Nrows - nnz = %d %d\n", mypid, nnz_count, ncnt);
1471 printf("%4d : DDILUT number of small pivots = %d\n",mypid,num_small_pivot);
1472 printf("%4d : DDILUT number of pattern mismatch = %d\n",mypid,ndisc);
1473 }
1474 nnz_count = ncnt;
1475
1476 /* ---------------------------------------------------------------- */
1477 /* preparation for processing the off-processor rows */
1478 /* ---------------------------------------------------------------- */
1479
1480 offset = 0;
1481 for ( i = 0; i < total_recv_leng; i++ )
1482 {
1483 rowNorms[i+Nrows] = 0.0;
1484 for ( j = offset; j < offset+recv_lengths[i]; j++ )
1485 {
1486 index = ext_ja[j];
1487 if ( index >= Noffset && index < Noffset+Nrows )
1488 ext_ja[j] = index - Noffset;
1489 else
1490 {
1491 m = HYPRE_LSI_Search(map, index, extNrows-Nrows);
1492 if ( m >= 0 ) ext_ja[j] = map2[m] + Nrows;
1493 else ext_ja[j] = -1;
1494 if ( ext_ja[j] >= extNrows ) ext_ja[j] = -1;
1495 }
1496 if ( ext_ja[j] != -1 ) rowNorms[i+Nrows] += habs(ext_aa[j]);
1497 }
1498 rowNorms[i+Nrows] /= extNrows;
1499 offset += recv_lengths[i];
1500 }
1501
1502 /* ---------------------------------------------------------------- */
1503 /* process the off-processor rows */
1504 /* ---------------------------------------------------------------- */
1505
1506 offset = 0;
1507 for ( i = 0; i < extNrows; i++ ) dble_buf[i] = 0.0;
1508 for ( i = 0; i < total_recv_leng; i++ )
1509 {
1510 if ( (i+Nrows) % printstep == 0 && ilut_ptr->outputLevel > 0 )
1511 printf("%4d : *DDILUT Processing row %d(%d)\n",mypid,i+Nrows,extNrows);
1512
1513 track_leng = m = 0;
1514 for ( j = offset; j < offset+recv_lengths[i]; j++ )
1515 {
1516 index = ext_ja[j];
1517 if ( index != -1 )
1518 {
1519 cols[m] = index;
1520 vals[m++] = ext_aa[j];
1521 track_array[track_leng++] = index;
1522 dble_buf[index] = ext_aa[j];
1523 }
1524 }
1525 Lcount = Ucount = 0;
1526 first = extNrows;
1527 for ( j = 0; j < track_leng; j++ )
1528 {
1529 index = track_array[j];
1530 if ( dble_buf[index] != 0 )
1531 {
1532 if ( index < i+Nrows ) Lcount++;
1533 else if ( index > i+Nrows ) Ucount++;
1534 else if ( i+Nrows == index ) diagonal[i+Nrows] = dble_buf[index];
1535 if ( index < first ) first = index;
1536 }
1537 }
1538 Lcount = Lcount * fillin;
1539 Ucount = Ucount * fillin;
1540 rel_tau = tau * rowNorms[i+Nrows];
1541 for ( j = first; j < i+Nrows; j++ )
1542 {
1543 if ( habs(dble_buf[j]) > rel_tau )
1544 {
1545 ddata = dble_buf[j] / diagonal[j];
1546 for ( k = mat_ia[j]; k < mat_ia[j+1]; k++ )
1547 {
1548 colIndex = mat_ja[k];
1549 if ( colIndex > j )
1550 {
1551 if ( dble_buf[colIndex] != 0.0 )
1552 dble_buf[colIndex] -= (ddata * mat_aa[k]);
1553 else
1554 {
1555 dble_buf[colIndex] = - (ddata * mat_aa[k]);
1556 if ( dble_buf[colIndex] != 0.0 )
1557 track_array[track_leng++] = colIndex;
1558 }
1559 }
1560 }
1561 dble_buf[j] = ddata;
1562 }
1563 else dble_buf[j] = 0.0;
1564 }
1565 for ( j = 0; j < m; j++ )
1566 {
1567 index = cols[j];
1568 vals[j] = dble_buf[index];
1569 if ( index != i+Nrows ) dble_buf[index] = 0.0;
1570 }
1571 sortcnt = 0;
1572 for ( j = 0; j < track_leng; j++ )
1573 {
1574 index = track_array[j];
1575 if ( index < i+Nrows )
1576 {
1577 absval = habs( dble_buf[index] );
1578 if ( absval > rel_tau )
1579 {
1580 sortcols[sortcnt] = index;
1581 sortvals[sortcnt++] = absval * rowNorms[index];
1582 }
1583 else dble_buf[index] = 0.0;
1584 }
1585 }
1586 if ( sortcnt > Lcount )
1587 {
1588 HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Lcount);
1589 for ( j = Lcount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0;
1590 }
1591 for ( j = 0; j < m; j++ )
1592 {
1593 if ( cols[j] < i+Nrows && vals[j] != 0.0 )
1594 {
1595 mat_aa[nnz_count] = vals[j];
1596 mat_ja[nnz_count++] = cols[j];
1597 }
1598 }
1599 for ( j = 0; j < track_leng; j++ )
1600 {
1601 index = track_array[j];
1602 if ( index < i+Nrows && dble_buf[index] != 0.0 )
1603 {
1604 mat_aa[nnz_count] = dble_buf[index];
1605 mat_ja[nnz_count++] = index;
1606 dble_buf[index] = 0.0;
1607 }
1608 }
1609 diagonal[i+Nrows] = dble_buf[i+Nrows];
1610 if ( habs(diagonal[i+Nrows]) < 1.0e-16 )
1611 {
1612 diagonal[i+Nrows] = 1.0E-6;
1613 num_small_pivot++;
1614 }
1615 mat_aa[nnz_count] = diagonal[i+Nrows];
1616 mat_ja[nnz_count++] = i+Nrows;
1617 dble_buf[i+Nrows] = 0.0;
1618 sortcnt = 0;
1619 for ( j = 0; j < track_leng; j++ )
1620 {
1621 index = track_array[j];
1622 if ( index > i+Nrows )
1623 {
1624 absval = habs( dble_buf[index] );
1625 if ( absval > rel_tau )
1626 {
1627 sortcols[sortcnt] = index;
1628 sortvals[sortcnt++] = absval * rowNorms[index];
1629 }
1630 else dble_buf[index] = 0.0;
1631 }
1632 }
1633 if ( sortcnt > Ucount )
1634 {
1635 HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Ucount);
1636 for ( j = Ucount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0;
1637 }
1638 for ( j = 0; j < m; j++ )
1639 {
1640 if ( cols[j] > i+Nrows && vals[j] != 0.0 )
1641 {
1642 mat_aa[nnz_count] = vals[j];
1643 mat_ja[nnz_count++] = cols[j];
1644 }
1645 }
1646 for ( j = 0; j < track_leng; j++ )
1647 {
1648 index = track_array[j];
1649 if ( index > i+Nrows && dble_buf[index] != 0.0 )
1650 {
1651 mat_aa[nnz_count] = dble_buf[index];
1652 mat_ja[nnz_count++] = index;
1653 dble_buf[index] = 0.0;
1654 }
1655 }
1656 mat_ia[i+Nrows+1] = nnz_count;
1657 offset += recv_lengths[i];
1658 }
1659
1660 if ( nnz_count > total_nnz )
1661 printf("WARNING in ILUTDecomp : memory bound passed.\n");
1662 if ( ilut_ptr->outputLevel > 0 )
1663 {
1664 printf("%4d : DDILUT number of nonzeros = %d\n",mypid,nnz_count);
1665 printf("%4d : DDILUT number of small pivots = %d\n",mypid,num_small_pivot);
1666 }
1667
1668 /* ---------------------------------------------------------- */
1669 /* deallocate temporary storage space */
1670 /* ---------------------------------------------------------- */
1671
1672 hypre_TFree(cols, HYPRE_MEMORY_HOST);
1673 hypre_TFree(vals, HYPRE_MEMORY_HOST);
1674 hypre_TFree(sortcols, HYPRE_MEMORY_HOST);
1675 hypre_TFree(sortvals, HYPRE_MEMORY_HOST);
1676 hypre_TFree(dble_buf, HYPRE_MEMORY_HOST);
1677 hypre_TFree(diagonal, HYPRE_MEMORY_HOST);
1678 hypre_TFree(rowNorms, HYPRE_MEMORY_HOST);
1679 hypre_TFree(context, HYPRE_MEMORY_HOST);
1680 hypre_TFree(track_array, HYPRE_MEMORY_HOST);
1681
1682 return 0;
1683 }
1684
1685 /*****************************************************************************/
1686 /* function for doing ILUT decomposition */
1687 /* (purely based on magnitude) */
1688 /*****************************************************************************/
1689
HYPRE_LSI_DDIlutDecompose3(HYPRE_LSI_DDIlut * ilut_ptr,MH_Matrix * Amat,int total_recv_leng,int * recv_lengths,int * ext_ja,double * ext_aa,int * map,int * map2,int Noffset)1690 int HYPRE_LSI_DDIlutDecompose3(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat,
1691 int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa,
1692 int *map, int *map2, int Noffset)
1693 {
1694 int *mat_ia, *mat_ja, i, m, allocated_space, *cols, mypid;
1695 int index, first, Lcount, Ucount, j, k, total_nnz;
1696 int sortcnt, colIndex, offset, nnz_count, Nrows, extNrows;
1697 int *track_array, track_leng, num_small_pivot;
1698 double *vals, ddata, *mat_aa, *diagonal, *rowNorms;
1699 double *dble_buf, fillin, tau, rel_tau;
1700 MH_Context *context;
1701
1702 /* ---------------------------------------------------------------- */
1703 /* fetch ILUT parameters */
1704 /* ---------------------------------------------------------------- */
1705
1706 MPI_Comm_rank(ilut_ptr->comm, &mypid);
1707 fillin = ilut_ptr->fillin;
1708 tau = ilut_ptr->thresh;
1709 Nrows = Amat->Nrows;
1710 extNrows = Nrows + total_recv_leng;
1711 ilut_ptr->Nrows = Nrows;
1712 ilut_ptr->extNrows = extNrows;
1713
1714 /* ---------------------------------------------------------------- */
1715 /* allocate temporary storage space */
1716 /* ---------------------------------------------------------------- */
1717
1718 allocated_space = extNrows;
1719 cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
1720 vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
1721 dble_buf = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
1722 diagonal = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
1723 rowNorms = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
1724
1725 /* ---------------------------------------------------------------- */
1726 /* compute the storage requirement for the ILU matrix */
1727 /* ---------------------------------------------------------------- */
1728
1729 context = hypre_TAlloc(MH_Context, 1, HYPRE_MEMORY_HOST);
1730 context->Amat = Amat;
1731 total_nnz = 0;
1732 for ( i = 0; i < Nrows; i++ )
1733 {
1734 rowNorms[i] = 0.0;
1735 while (MH_GetRow(context,1,&i,allocated_space,cols,vals,&m)==0)
1736 {
1737 hypre_TFree(vals, HYPRE_MEMORY_HOST);
1738 hypre_TFree(cols, HYPRE_MEMORY_HOST);
1739 allocated_space += 200 + 1;
1740 cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
1741 vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
1742 }
1743 total_nnz += m;
1744 for ( j = 0; j < m; j++ ) rowNorms[i] += habs(vals[j]);
1745 rowNorms[i] /= extNrows;
1746 }
1747 for ( i = 0; i < total_recv_leng; i++ ) total_nnz += recv_lengths[i];
1748 total_nnz = (int) ((double) total_nnz * (fillin + 1.0));
1749 ilut_ptr->mat_ia = hypre_TAlloc(int, (extNrows + 1 ) , HYPRE_MEMORY_HOST);
1750 ilut_ptr->mat_ja = hypre_TAlloc(int, total_nnz , HYPRE_MEMORY_HOST);
1751 ilut_ptr->mat_aa = hypre_TAlloc(double, total_nnz , HYPRE_MEMORY_HOST);
1752 mat_ia = ilut_ptr->mat_ia;
1753 mat_ja = ilut_ptr->mat_ja;
1754 mat_aa = ilut_ptr->mat_aa;
1755
1756 offset = 0;
1757 for ( i = 0; i < total_recv_leng; i++ )
1758 {
1759 rowNorms[i+Nrows] = 0.0;
1760 for ( j = offset; j < offset+recv_lengths[i]; j++ )
1761 {
1762 index = ext_ja[j];
1763 if ( index >= Noffset && index < Noffset+Nrows )
1764 ext_ja[j] = index - Noffset;
1765 else
1766 {
1767 m = HYPRE_LSI_Search(map, index, extNrows-Nrows);
1768 if ( m >= 0 ) ext_ja[j] = map2[m] + Nrows;
1769 else ext_ja[j] = -1;
1770 }
1771 if ( ext_ja[j] != -1 ) rowNorms[i+Nrows] += habs(ext_aa[j]);
1772 }
1773 rowNorms[i+Nrows] /= extNrows;
1774 offset += recv_lengths[i];
1775 }
1776
1777 /* ---------------------------------------------------------------- */
1778 /* process the first Nrows */
1779 /* ---------------------------------------------------------------- */
1780
1781 num_small_pivot = 0;
1782 nnz_count = 0;
1783 mat_ia[0] = 0;
1784 track_array = hypre_TAlloc(int, extNrows , HYPRE_MEMORY_HOST);
1785 for ( i = 0; i < extNrows; i++ ) dble_buf[i] = 0.0;
1786
1787 for ( i = 0; i < Nrows; i++ )
1788 {
1789 if ( i % 1000 == 0 && ilut_ptr->outputLevel > 0 )
1790 printf("%4d : 2DDILUT Processing row %d(%d)\n",mypid,i,extNrows);
1791
1792 track_leng = 0;
1793 MH_GetRow(context,1,&i,allocated_space,cols,vals,&m);
1794 if ( m < 0 )
1795 printf("IlutDecompose WARNING(1): row nnz = %d\n",m);
1796
1797 for ( j = 0; j < m; j++ )
1798 {
1799 if ( cols[j] < extNrows )
1800 {
1801 dble_buf[cols[j]] = vals[j];
1802 track_array[track_leng++] = cols[j];
1803 }
1804 }
1805 Lcount = Ucount = first = 0;
1806 first = extNrows;
1807 for ( j = 0; j < track_leng; j++ )
1808 {
1809 index = track_array[j];
1810 if ( dble_buf[index] != 0 )
1811 {
1812 if ( index < i ) Lcount++;
1813 else if ( index > i ) Ucount++;
1814 else if ( index == i ) diagonal[i] = dble_buf[index];
1815 if ( index < first ) first = index;
1816 }
1817 }
1818 Lcount = Lcount * (fillin + 1);
1819 Ucount = Ucount * (fillin + 1);
1820 rel_tau = tau * rowNorms[i];
1821 for ( j = first; j < i; j++ )
1822 {
1823 if ( habs(dble_buf[j]) > rel_tau )
1824 {
1825 ddata = dble_buf[j] / diagonal[j];
1826 for ( k = mat_ia[j]; k < mat_ia[j+1]; k++ )
1827 {
1828 colIndex = mat_ja[k];
1829 if ( colIndex > j )
1830 {
1831 if ( dble_buf[colIndex] != 0.0 )
1832 dble_buf[colIndex] -= (ddata * mat_aa[k]);
1833 else
1834 {
1835 dble_buf[colIndex] = - (ddata * mat_aa[k]);
1836 track_array[track_leng++] = colIndex;
1837 }
1838 }
1839 }
1840 dble_buf[j] = ddata;
1841 }
1842 else dble_buf[j] = 0.0;
1843 }
1844
1845 sortcnt = 0;
1846 for ( j = 0; j < track_leng; j++ )
1847 {
1848 index = track_array[j];
1849 if ( index < i )
1850 {
1851 if ( dble_buf[index] < -rel_tau )
1852 {
1853 cols[sortcnt] = index;
1854 vals[sortcnt++] = - dble_buf[index] * rowNorms[index];
1855 }
1856 else if ( dble_buf[index] > rel_tau )
1857 {
1858 cols[sortcnt] = index;
1859 vals[sortcnt++] = dble_buf[index] * rowNorms[index];
1860 }
1861 else dble_buf[index] = 0.0;
1862 }
1863 }
1864
1865 if ( sortcnt > Lcount ) HYPRE_LSI_SplitDSort(vals,sortcnt,cols,Lcount);
1866 if ( sortcnt > Lcount )
1867 {
1868 for ( j = Lcount; j < sortcnt; j++ ) dble_buf[cols[j]] = 0.0;
1869 }
1870 for ( j = 0; j < track_leng; j++ )
1871 {
1872 index = track_array[j];
1873 if ( index < i && dble_buf[index] != 0.0 )
1874 {
1875 mat_aa[nnz_count] = dble_buf[index];
1876 mat_ja[nnz_count++] = index;
1877 dble_buf[index] = 0.0;
1878 }
1879 }
1880 diagonal[i] = dble_buf[i];
1881 if ( habs(diagonal[i]) < 1.0e-16 )
1882 {
1883 diagonal[i] = 1.0E-6;
1884 num_small_pivot++;
1885 }
1886 mat_aa[nnz_count] = diagonal[i];
1887 mat_ja[nnz_count++] = i;
1888 sortcnt = 0;
1889 for ( j = 0; j < track_leng; j++ )
1890 {
1891 index = track_array[j];
1892 if ( index > i )
1893 {
1894 if ( dble_buf[index] < -rel_tau )
1895 {
1896 cols[sortcnt] = index;
1897 vals[sortcnt++] = - dble_buf[index] * rowNorms[index];
1898 }
1899 else if ( dble_buf[index] > rel_tau )
1900 {
1901 cols[sortcnt] = index;
1902 vals[sortcnt++] = dble_buf[index] * rowNorms[index];
1903 }
1904 else dble_buf[index] = 0.0;
1905 }
1906 }
1907 if ( sortcnt > Ucount ) HYPRE_LSI_SplitDSort(vals,sortcnt,cols,Ucount);
1908 if ( sortcnt > Ucount )
1909 {
1910 for ( j = Ucount; j < sortcnt; j++ ) dble_buf[cols[j]] = 0.0;
1911 }
1912 for ( j = 0; j < track_leng; j++ )
1913 {
1914 index = track_array[j];
1915 if ( index > i && dble_buf[index] != 0.0 )
1916 {
1917 mat_aa[nnz_count] = dble_buf[index];
1918 mat_ja[nnz_count++] = index;
1919 dble_buf[index] = 0.0;
1920 }
1921 }
1922 dble_buf[i] = 0.0;
1923 mat_ia[i+1] = nnz_count;
1924 }
1925
1926 /* ---------------------------------------------------------------- */
1927 /* process the off-processor rows */
1928 /* ---------------------------------------------------------------- */
1929
1930 offset = 0;
1931 for ( i = 0; i < extNrows; i++ ) dble_buf[i] = 0.0;
1932 for ( i = 0; i < total_recv_leng; i++ )
1933 {
1934 if ( (i+Nrows) % 1000 == 0 && ilut_ptr->outputLevel > 0 )
1935 printf("%4d : *DDILUT Processing row %d(%d)\n",mypid,i+Nrows,extNrows);
1936
1937 track_leng = 0;
1938 for ( j = offset; j < offset+recv_lengths[i]; j++ )
1939 {
1940 if ( ext_ja[j] != -1 )
1941 {
1942 dble_buf[ext_ja[j]] = ext_aa[j];
1943 track_array[track_leng++] = ext_ja[j];
1944 }
1945 }
1946 Lcount = Ucount = 0;
1947 first = extNrows;
1948 for ( j = 0; j < track_leng; j++ )
1949 {
1950 index = track_array[j];
1951 if ( dble_buf[index] != 0 )
1952 {
1953 if ( index < i+Nrows ) Lcount++;
1954 else if ( index > i+Nrows ) Ucount++;
1955 else if ( i+Nrows == index ) diagonal[i+Nrows] = dble_buf[index];
1956 if ( index < first ) first = index;
1957 }
1958 }
1959 Lcount = Lcount * (fillin + 1);
1960 Ucount = Ucount * (fillin + 1);
1961 rel_tau = tau * rowNorms[i+Nrows];
1962 for ( j = first; j < i+Nrows; j++ )
1963 {
1964 if ( habs(dble_buf[j]) > rel_tau )
1965 {
1966 ddata = dble_buf[j] / diagonal[j];
1967 for ( k = mat_ia[j]; k < mat_ia[j+1]; k++ )
1968 {
1969 colIndex = mat_ja[k];
1970 if ( colIndex > j )
1971 {
1972 if ( dble_buf[colIndex] != 0.0 )
1973 dble_buf[colIndex] -= (ddata * mat_aa[k]);
1974 else
1975 {
1976 dble_buf[colIndex] = - (ddata * mat_aa[k]);
1977 track_array[track_leng++] = colIndex;
1978 }
1979 }
1980 }
1981 dble_buf[j] = ddata;
1982 }
1983 else dble_buf[j] = 0.0;
1984 }
1985 sortcnt = 0;
1986 for ( j = 0; j < track_leng; j++ )
1987 {
1988 index = track_array[j];
1989 if ( index < i+Nrows )
1990 {
1991 if ( dble_buf[index] < -rel_tau )
1992 {
1993 cols[sortcnt] = index;
1994 vals[sortcnt++] = - dble_buf[index]*rowNorms[index];
1995 }
1996 else if ( dble_buf[index] > rel_tau )
1997 {
1998 cols[sortcnt] = index;
1999 vals[sortcnt++] = dble_buf[index] * rowNorms[index];
2000 }
2001 else dble_buf[index] = 0.0;
2002 }
2003 }
2004 if ( sortcnt > Lcount ) HYPRE_LSI_SplitDSort(vals,sortcnt,cols,Lcount);
2005 if ( sortcnt > Lcount )
2006 {
2007 for ( j = Lcount; j < sortcnt; j++ ) dble_buf[cols[j]] = 0.0;
2008 }
2009 for ( j = 0; j < track_leng; j++ )
2010 {
2011 index = track_array[j];
2012 if ( index < i+Nrows && dble_buf[index] != 0.0 )
2013 {
2014 mat_aa[nnz_count] = dble_buf[index];
2015 mat_ja[nnz_count++] = index;
2016 dble_buf[index] = 0.0;
2017 }
2018 }
2019 diagonal[i+Nrows] = dble_buf[i+Nrows];
2020 if ( habs(diagonal[i+Nrows]) < 1.0e-16 )
2021 {
2022 diagonal[i+Nrows] = 1.0E-6;
2023 num_small_pivot++;
2024 }
2025 mat_aa[nnz_count] = diagonal[i+Nrows];
2026 mat_ja[nnz_count++] = i+Nrows;
2027 dble_buf[i+Nrows] = 0.0;
2028 sortcnt = 0;
2029 for ( j = 0; j < track_leng; j++ )
2030 {
2031 index = track_array[j];
2032 if ( index > i+Nrows )
2033 {
2034 if ( dble_buf[index] < -rel_tau )
2035 {
2036 cols[sortcnt] = index;
2037 vals[sortcnt++] = - dble_buf[index] * rowNorms[index];
2038 }
2039 else if ( dble_buf[index] > rel_tau )
2040 {
2041 cols[sortcnt] = index;
2042 vals[sortcnt++] = dble_buf[index] * rowNorms[index];
2043 }
2044 else dble_buf[index] = 0.0;
2045 }
2046 }
2047 if ( sortcnt > Ucount ) HYPRE_LSI_SplitDSort(vals,sortcnt,cols,Ucount);
2048 if ( sortcnt > Ucount )
2049 {
2050 for ( j = Ucount; j < sortcnt; j++ ) dble_buf[cols[j]] = 0.0;
2051 }
2052 for ( j = 0; j < track_leng; j++ )
2053 {
2054 index = track_array[j];
2055 if ( index > i+Nrows && dble_buf[index] != 0.0 )
2056 {
2057 mat_aa[nnz_count] = dble_buf[index];
2058 mat_ja[nnz_count++] = index;
2059 dble_buf[index] = 0.0;
2060 }
2061 }
2062 mat_ia[i+Nrows+1] = nnz_count;
2063 offset += recv_lengths[i];
2064 }
2065 if ( nnz_count > total_nnz )
2066 printf("WARNING in ILUTDecomp : memory bound passed.\n");
2067 if ( ilut_ptr->outputLevel > 0 )
2068 {
2069 printf("%4d : DDILUT number of nonzeros = %d\n",mypid,nnz_count);
2070 printf("%4d : DDILUT number of small pivots = %d\n",mypid,num_small_pivot);
2071 }
2072
2073 /* ---------------------------------------------------------- */
2074 /* deallocate temporary storage space */
2075 /* ---------------------------------------------------------- */
2076
2077 hypre_TFree(cols, HYPRE_MEMORY_HOST);
2078 hypre_TFree(vals, HYPRE_MEMORY_HOST);
2079 hypre_TFree(dble_buf, HYPRE_MEMORY_HOST);
2080 hypre_TFree(diagonal, HYPRE_MEMORY_HOST);
2081 hypre_TFree(rowNorms, HYPRE_MEMORY_HOST);
2082 hypre_TFree(context, HYPRE_MEMORY_HOST);
2083 hypre_TFree(track_array, HYPRE_MEMORY_HOST);
2084
2085 return 0;
2086 }
2087
2088 /*****************************************************************************/
2089 /* function for doing ILUT decomposition */
2090 /* (This version is based on ILU(1). It converges less well as the original */
2091 /* ILUT based on ILU(0) and magnitude. Its setup time is not faster either)*/
2092 /*****************************************************************************/
2093
HYPRE_LSI_DDIlutDecomposeNew(HYPRE_LSI_DDIlut * ilut_ptr,MH_Matrix * Amat,int total_recv_leng,int * recv_lengths,int * ext_ja,double * ext_aa,int * map,int * map2,int Noffset)2094 int HYPRE_LSI_DDIlutDecomposeNew(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat,
2095 int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa,
2096 int *map, int *map2, int Noffset)
2097 {
2098 int *mat_ia, *mat_ja, i, m, allocated_space, *cols, mypid;
2099 int index, first, ncnt, j, k, total_nnz;
2100 int colIndex, offset, nnz_count, Nrows, extNrows;
2101 int *track_array, track_leng, num_small_pivot, printstep;
2102 int *mat_ia2, *mat_ja2, *iarray;
2103 double *vals, ddata, *mat_aa, *diagonal, *rowNorms;
2104 double *dble_buf, tau, rel_tau, *mat_aa2;
2105 MH_Context *context;
2106
2107 /* ---------------------------------------------------------------- */
2108 /* fetch ILUT parameters */
2109 /* ---------------------------------------------------------------- */
2110
2111 MPI_Comm_rank(ilut_ptr->comm, &mypid);
2112 tau = ilut_ptr->thresh;
2113 Nrows = Amat->Nrows;
2114 extNrows = Nrows + total_recv_leng;
2115 ilut_ptr->Nrows = Nrows;
2116 ilut_ptr->extNrows = extNrows;
2117
2118 /* ---------------------------------------------------------------- */
2119 /* allocate temporary storage space */
2120 /* ---------------------------------------------------------------- */
2121
2122 allocated_space = extNrows;
2123 cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
2124 vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
2125 dble_buf = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
2126 diagonal = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
2127 rowNorms = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
2128
2129 /* ---------------------------------------------------------------- */
2130 /* compute the storage requirement for the ILU matrix */
2131 /* ---------------------------------------------------------------- */
2132
2133 context = hypre_TAlloc(MH_Context, 1, HYPRE_MEMORY_HOST);
2134 context->Amat = Amat;
2135 total_nnz = 0;
2136 for ( i = 0; i < Nrows; i++ )
2137 {
2138 rowNorms[i] = 0.0;
2139 while (MH_GetRow(context,1,&i,allocated_space,cols,vals,&m)==0)
2140 {
2141 hypre_TFree(vals, HYPRE_MEMORY_HOST);
2142 hypre_TFree(cols, HYPRE_MEMORY_HOST);
2143 allocated_space += 200 + 1;
2144 cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
2145 vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
2146 }
2147 total_nnz += m;
2148 for ( j = 0; j < m; j++ ) rowNorms[i] += habs(vals[j]);
2149 rowNorms[i] /= extNrows;
2150 }
2151 for ( i = 0; i < total_recv_leng; i++ ) total_nnz += recv_lengths[i];
2152 mat_ia = hypre_TAlloc(int, (extNrows + 1 ) , HYPRE_MEMORY_HOST);
2153 mat_ja = hypre_TAlloc(int, total_nnz , HYPRE_MEMORY_HOST);
2154 mat_aa = hypre_TAlloc(double, total_nnz , HYPRE_MEMORY_HOST);
2155 total_nnz = total_nnz * 7;
2156 mat_ia2 = hypre_TAlloc(int, (extNrows + 1 ) , HYPRE_MEMORY_HOST);
2157 mat_ja2 = hypre_TAlloc(int, total_nnz , HYPRE_MEMORY_HOST);
2158 ncnt = 0;
2159 mat_ia[0] = 0;
2160 for ( i = 0; i < Nrows; i++ )
2161 {
2162 MH_GetRow(context,1,&i,allocated_space,cols,vals,&m);
2163 for ( j = 0; j < m; j++ )
2164 {
2165 if ( vals[j] != 0.0 )
2166 {
2167 mat_ja[ncnt] = cols[j];
2168 mat_aa[ncnt++] = vals[j];
2169 }
2170 }
2171 mat_ia[i+1] = ncnt;
2172 }
2173 offset = 0;
2174 for ( i = 0; i < total_recv_leng; i++ )
2175 {
2176 rowNorms[i+Nrows] = 0.0;
2177 for ( j = offset; j < offset+recv_lengths[i]; j++ )
2178 {
2179 index = ext_ja[j];
2180 if ( index >= Noffset && index < Noffset+Nrows )
2181 ext_ja[j] = index - Noffset;
2182 else
2183 {
2184 m = HYPRE_LSI_Search(map, index, extNrows-Nrows);
2185 if ( m >= 0 ) ext_ja[j] = map2[m] + Nrows;
2186 else ext_ja[j] = -1;
2187 }
2188 if ( ext_ja[j] != -1 && ext_aa[j] != 0.0 )
2189 {
2190 rowNorms[i+Nrows] += habs(ext_aa[j]);
2191 mat_ja[ncnt] = ext_ja[j];
2192 mat_aa[ncnt++] = ext_aa[j];
2193 }
2194 }
2195 rowNorms[i+Nrows] /= extNrows;
2196 offset += recv_lengths[i];
2197 mat_ia[Nrows+i+1] = ncnt;
2198 }
2199
2200 /* ---------------------------------------------------------------- */
2201 /* process the pattern */
2202 /* ---------------------------------------------------------------- */
2203
2204 ncnt = 0;
2205 mat_ia2[0] = 0;
2206 printstep = extNrows / 10;
2207 for ( i = 0; i < extNrows; i++ )
2208 {
2209 if ( ( i % printstep == 0 ) && ilut_ptr->outputLevel > 0 )
2210 printf("%4d : DDILUT Processing pattern row = %d (%d)\n",mypid,i,extNrows);
2211 k = mat_ia[i+1] - mat_ia[i];
2212 for ( j = mat_ia[i]; j < mat_ia[i+1]; j++ )
2213 {
2214 index = mat_ja[j];
2215 k += ( mat_ia[index+1] - mat_ia[index] );
2216 }
2217 if ( (k+ncnt) > total_nnz )
2218 {
2219 iarray = mat_ja2;
2220 total_nnz += (extNrows - i ) * k;
2221 mat_ja2 = hypre_TAlloc(int, total_nnz , HYPRE_MEMORY_HOST);
2222 for ( j = 0; j < ncnt; j++ ) mat_ja2[j] = iarray[j];
2223 hypre_TFree(iarray, HYPRE_MEMORY_HOST);
2224 }
2225 for ( j = mat_ia[i]; j < mat_ia[i+1]; j++ )
2226 {
2227 index = mat_ja[j];
2228 mat_ja2[ncnt++] = index;
2229 for (k = mat_ia[index]; k < mat_ia[index+1]; k++)
2230 mat_ja2[ncnt++] = mat_ja[k];
2231 }
2232 hypre_qsort0(mat_ja2, mat_ia2[i], ncnt-1);
2233 k = mat_ia2[i] + 1;
2234 for ( j = mat_ia2[i]+1; j < ncnt; j++ )
2235 {
2236 if ( mat_ja2[j] != mat_ja2[k-1] ) mat_ja2[k++] = mat_ja2[j];
2237 }
2238 mat_ia2[i+1] = k;
2239 ncnt = k;
2240 }
2241 for ( i = 0; i < ncnt; i++ )
2242 if ( mat_ja2[i] < 0 || mat_ja2[i] >= extNrows )
2243 printf("%4d : DDILUT ERROR ja %d = %d \n",mypid,i,mat_ja2[i]);
2244
2245 mat_aa2 = hypre_TAlloc(double, ncnt , HYPRE_MEMORY_HOST);
2246
2247 /* ---------------------------------------------------------------- */
2248 /* process the rows */
2249 /* ---------------------------------------------------------------- */
2250
2251 num_small_pivot = 0;
2252 track_array = hypre_TAlloc(int, extNrows , HYPRE_MEMORY_HOST);
2253 for ( i = 0; i < extNrows; i++ ) dble_buf[i] = 0.0;
2254
2255 for ( i = 0; i < extNrows; i++ )
2256 {
2257 if ( i % printstep == 0 && ilut_ptr->outputLevel > 0 )
2258 printf("%4d : $DDILUT Processing row %d(%d,%d)\n",mypid,i,extNrows,Nrows);
2259
2260 /* ------------------------------------------------------------- */
2261 /* load the row into buffer */
2262 /* ------------------------------------------------------------- */
2263
2264 track_leng = 0;
2265 first = extNrows;
2266 for ( j = mat_ia[i]; j < mat_ia[i+1]; j++ )
2267 {
2268 index = mat_ja[j];
2269 dble_buf[index] = mat_aa[j];
2270 track_array[track_leng++] = index;
2271 if ( index < first ) first = index;
2272 }
2273
2274 /* ------------------------------------------------------------- */
2275 /* perform factorization */
2276 /* ------------------------------------------------------------- */
2277
2278 rel_tau = tau * rowNorms[i];
2279 for ( j = first; j < i; j++ )
2280 {
2281 if ( habs(dble_buf[j]) > rel_tau )
2282 {
2283 ddata = dble_buf[j] / diagonal[j];
2284 for ( k = mat_ia2[j]; k < mat_ia2[j+1]; k++ )
2285 {
2286 colIndex = mat_ja2[k];
2287 if ( colIndex > j && mat_aa2[k] != 0.0 )
2288 {
2289 if ( dble_buf[colIndex] != 0.0 )
2290 dble_buf[colIndex] -= (ddata * mat_aa2[k]);
2291 else
2292 {
2293 dble_buf[colIndex] = - (ddata * mat_aa2[k]);
2294 if ( dble_buf[colIndex] != 0.0 )
2295 track_array[track_leng++] = colIndex;
2296 }
2297 }
2298 }
2299 dble_buf[j] = ddata;
2300 }
2301 else dble_buf[j] = 0.0;
2302 }
2303 diagonal[i] = dble_buf[i];
2304 if ( habs(diagonal[i]) < 1.0e-16 )
2305 {
2306 diagonal[i] = dble_buf[i] = 1.0E-6;
2307 num_small_pivot++;
2308 }
2309 for (j = mat_ia2[i]; j < mat_ia2[i+1]; j++)
2310 mat_aa2[j] = dble_buf[mat_ja2[j]];
2311 for ( j = 0; j < track_leng; j++ ) dble_buf[track_array[j]] = 0.0;
2312 }
2313 nnz_count = mat_ia2[extNrows];
2314
2315 if ( ilut_ptr->outputLevel > 0 )
2316 {
2317 printf("%4d : DDILUT number of nonzeros = %d\n",mypid,nnz_count);
2318 printf("%4d : DDILUT number of small pivots = %d\n",mypid,num_small_pivot);
2319 }
2320
2321 /* ---------------------------------------------------------- */
2322 /* deallocate temporary storage space */
2323 /* ---------------------------------------------------------- */
2324
2325 ilut_ptr->mat_ia = mat_ia2;
2326 ilut_ptr->mat_ja = mat_ja2;
2327 ilut_ptr->mat_aa = mat_aa2;
2328 hypre_TFree(mat_ia, HYPRE_MEMORY_HOST);
2329 hypre_TFree(mat_ja, HYPRE_MEMORY_HOST);
2330 hypre_TFree(mat_aa, HYPRE_MEMORY_HOST);
2331 hypre_TFree(cols, HYPRE_MEMORY_HOST);
2332 hypre_TFree(vals, HYPRE_MEMORY_HOST);
2333 hypre_TFree(dble_buf, HYPRE_MEMORY_HOST);
2334 hypre_TFree(diagonal, HYPRE_MEMORY_HOST);
2335 hypre_TFree(rowNorms, HYPRE_MEMORY_HOST);
2336 hypre_TFree(context, HYPRE_MEMORY_HOST);
2337 hypre_TFree(track_array, HYPRE_MEMORY_HOST);
2338
2339 return 0;
2340 }
2341
2342
2343