1 /******************************************************************************
2  * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3  * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4  *
5  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6  ******************************************************************************/
7 
8 /******************************************************************************
9  *
10  * HYPRE_DDILUT interface
11  *
12  *****************************************************************************/
13 
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <math.h>
17 
18 #include "utilities/_hypre_utilities.h"
19 #include "HYPRE.h"
20 #include "IJ_mv/HYPRE_IJ_mv.h"
21 #include "parcsr_mv/HYPRE_parcsr_mv.h"
22 #include "parcsr_mv/_hypre_parcsr_mv.h"
23 #include "parcsr_ls/HYPRE_parcsr_ls.h"
24 #include "HYPRE_MHMatrix.h"
25 
26 #ifdef HAVE_ML
27 
28 #include "ml_struct.h"
29 #include "ml_aggregate.h"
30 
31 #endif
32 
33 #include "HYPRE_MHMatrix.h"
34 #include "HYPRE_FEI.h"
35 
36 extern int HYPRE_LSI_MLConstructMHMatrix(HYPRE_ParCSRMatrix,MH_Matrix *,
37                                      MPI_Comm, int *, MH_Context *);
38 extern int HYPRE_LSI_DDIlutGetRowLengths(MH_Matrix *,int *, int **,MPI_Comm);
39 extern int HYPRE_LSI_DDIlutGetOffProcRows(MH_Matrix *Amat, int leng, int *,
40                  int Noffset, int *map, int *map2, int **int_buf,
41                  double **dble_buf, MPI_Comm mpi_comm);
42 extern int HYPRE_LSI_DDIlutDecompose(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat,
43                  int total_recv_leng, int *recv_lengths, int *ext_ja,
44                  double *ext_aa, int *map, int *map2, int Noffset);
45 extern int HYPRE_LSI_DDIlutDecompose2(HYPRE_LSI_DDIlut *ilut_ptr,
46                  MH_Matrix *Amat,int total_recv_leng, int *recv_lengths,
47                  int *ext_ja, double *ext_aa, int *map, int *map2, int Noffset);
48 extern void HYPRE_LSI_qsort1a(int *, int *, int, int);
49 extern void hypre_qsort0(int *, int, int);
50 extern int  HYPRE_LSI_SplitDSort(double*,int,int*,int);
51 extern int  MH_ExchBdry(double *, void *);
52 extern int  MH_ExchBdryBack(double *, void *, int *, double **, int **);
53 extern int  MH_GetRow(void *, int, int *, int, int *, double *, int *);
54 extern int  HYPRE_LSI_Cuthill(int, int *, int *, double *, int *, int *);
55 extern int  HYPRE_LSI_Search(int *, int, int);
56 
57 #define habs(x) ((x) > 0 ? (x) : -(x))
58 
59 /*--------------------------------------------------------------------------
60  * HYPRE_LSI_DDIlutCreate - Return a DDIlut preconditioner object "solver".
61  *--------------------------------------------------------------------------*/
62 
HYPRE_LSI_DDIlutCreate(MPI_Comm comm,HYPRE_Solver * solver)63 int HYPRE_LSI_DDIlutCreate( MPI_Comm comm, HYPRE_Solver *solver )
64 {
65    HYPRE_LSI_DDIlut *ilut_ptr;
66 
67    ilut_ptr = hypre_TAlloc(HYPRE_LSI_DDIlut, 1, HYPRE_MEMORY_HOST);
68 
69    if (ilut_ptr == NULL) return 1;
70 
71    ilut_ptr->comm          = comm;
72    ilut_ptr->mh_mat        = NULL;
73    ilut_ptr->fillin        = 0.0;
74    ilut_ptr->thresh        = 0.0; /* defaults */
75    ilut_ptr->mat_ia        = NULL;
76    ilut_ptr->mat_ja        = NULL;
77    ilut_ptr->mat_aa        = NULL;
78    ilut_ptr->outputLevel   = 0;
79    ilut_ptr->overlap       = 0;
80    ilut_ptr->order_array   = NULL;
81    ilut_ptr->reorder_array = NULL;
82    ilut_ptr->reorder       = 0;
83 
84    *solver = (HYPRE_Solver) ilut_ptr;
85 
86    return 0;
87 }
88 
89 /*--------------------------------------------------------------------------
90  * HYPRE_LSI_DDIlutDestroy - Destroy a DDIlut object.
91  *--------------------------------------------------------------------------*/
92 
HYPRE_LSI_DDIlutDestroy(HYPRE_Solver solver)93 int HYPRE_LSI_DDIlutDestroy( HYPRE_Solver solver )
94 {
95    int              i;
96    HYPRE_LSI_DDIlut *ilut_ptr;
97 
98    ilut_ptr = (HYPRE_LSI_DDIlut *) solver;
99    hypre_TFree(ilut_ptr->mat_ia, HYPRE_MEMORY_HOST);
100    hypre_TFree(ilut_ptr->mat_ja, HYPRE_MEMORY_HOST);
101    hypre_TFree(ilut_ptr->mat_aa, HYPRE_MEMORY_HOST);
102    if ( ilut_ptr->mh_mat != NULL )
103    {
104       hypre_TFree(ilut_ptr->mh_mat->sendProc, HYPRE_MEMORY_HOST);
105       hypre_TFree(ilut_ptr->mh_mat->sendLeng, HYPRE_MEMORY_HOST);
106       hypre_TFree(ilut_ptr->mh_mat->recvProc, HYPRE_MEMORY_HOST);
107       hypre_TFree(ilut_ptr->mh_mat->recvLeng, HYPRE_MEMORY_HOST);
108       for ( i = 0; i < ilut_ptr->mh_mat->sendProcCnt; i++ )
109          hypre_TFree(ilut_ptr->mh_mat->sendList[i], HYPRE_MEMORY_HOST);
110       hypre_TFree(ilut_ptr->mh_mat->sendList, HYPRE_MEMORY_HOST);
111       hypre_TFree(ilut_ptr->mh_mat, HYPRE_MEMORY_HOST);
112    }
113    ilut_ptr->mh_mat = NULL;
114    hypre_TFree(ilut_ptr->order_array, HYPRE_MEMORY_HOST);
115    hypre_TFree(ilut_ptr->reorder_array, HYPRE_MEMORY_HOST);
116    hypre_TFree(ilut_ptr, HYPRE_MEMORY_HOST);
117 
118    return 0;
119 }
120 
121 /*--------------------------------------------------------------------------
122  * HYPRE_LSI_DDIlutSetFillin - Set the fill-in parameter.
123  *--------------------------------------------------------------------------*/
124 
HYPRE_LSI_DDIlutSetFillin(HYPRE_Solver solver,double fillin)125 int HYPRE_LSI_DDIlutSetFillin(HYPRE_Solver solver, double fillin)
126 {
127    HYPRE_LSI_DDIlut *ilut_ptr = (HYPRE_LSI_DDIlut *) solver;
128 
129    ilut_ptr->fillin = fillin;
130 
131    return 0;
132 }
133 
134 /*--------------------------------------------------------------------------
135  * HYPRE_LSI_DDIlutSetDropTolerance - Set the threshold for dropping
136  *--------------------------------------------------------------------------*/
137 
HYPRE_LSI_DDIlutSetDropTolerance(HYPRE_Solver solver,double thresh)138 int HYPRE_LSI_DDIlutSetDropTolerance(HYPRE_Solver solver, double thresh)
139 {
140    HYPRE_LSI_DDIlut *ilut_ptr = (HYPRE_LSI_DDIlut *) solver;
141 
142    ilut_ptr->thresh = thresh;
143 
144    return 0;
145 }
146 
147 /*--------------------------------------------------------------------------
148  * HYPRE_LSI_DDIlutSetOverlap - turn on overlap
149  *--------------------------------------------------------------------------*/
150 
HYPRE_LSI_DDIlutSetOverlap(HYPRE_Solver solver)151 int HYPRE_LSI_DDIlutSetOverlap(HYPRE_Solver solver)
152 {
153    HYPRE_LSI_DDIlut *ilut_ptr = (HYPRE_LSI_DDIlut *) solver;
154 
155    ilut_ptr->overlap = 1;
156 
157    return 0;
158 }
159 
160 /*--------------------------------------------------------------------------
161  * HYPRE_LSI_DDIlutSetReorder - turn on reordering
162  *--------------------------------------------------------------------------*/
163 
HYPRE_LSI_DDIlutSetReorder(HYPRE_Solver solver)164 int HYPRE_LSI_DDIlutSetReorder(HYPRE_Solver solver)
165 {
166    HYPRE_LSI_DDIlut *ilut_ptr = (HYPRE_LSI_DDIlut *) solver;
167 
168    ilut_ptr->reorder = 1;
169 
170    return 0;
171 }
172 
173 /*--------------------------------------------------------------------------
174  * HYPRE_LSI_DDIlutSetOutputLevel - Set debug level
175  *--------------------------------------------------------------------------*/
176 
HYPRE_LSI_DDIlutSetOutputLevel(HYPRE_Solver solver,int level)177 int HYPRE_LSI_DDIlutSetOutputLevel(HYPRE_Solver solver, int level)
178 {
179    HYPRE_LSI_DDIlut *ilut_ptr = (HYPRE_LSI_DDIlut *) solver;
180 
181    ilut_ptr->outputLevel = level;
182 
183    return 0;
184 }
185 
186 /*--------------------------------------------------------------------------
187  * HYPRE_LSI_DDIlutSolve - Solve function for DDILUT.
188  *--------------------------------------------------------------------------*/
189 
HYPRE_LSI_DDIlutSolve(HYPRE_Solver solver,HYPRE_ParCSRMatrix A,HYPRE_ParVector b,HYPRE_ParVector x)190 int HYPRE_LSI_DDIlutSolve( HYPRE_Solver solver, HYPRE_ParCSRMatrix A,
191                        HYPRE_ParVector b,   HYPRE_ParVector x )
192 {
193    int              i, j, *idiag, Nrows, extNrows, *mat_ia, *mat_ja;
194    int              column, *order_list, *reorder_list, order_flag;
195    double           *rhs, *soln, *dbuffer, ddata, *mat_aa;
196    HYPRE_LSI_DDIlut *ilut_ptr = (HYPRE_LSI_DDIlut *) solver;
197    MH_Context       *context;
198    MPI_Comm         mpi_comm;
199 
200    rhs  = hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector *) b));
201    soln = hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector *) x));
202 
203    Nrows        = ilut_ptr->Nrows;
204    extNrows     = ilut_ptr->extNrows;
205    mat_ia       = ilut_ptr->mat_ia;
206    mat_ja       = ilut_ptr->mat_ja;
207    mat_aa       = ilut_ptr->mat_aa;
208    order_list   = ilut_ptr->order_array;
209    reorder_list = ilut_ptr->reorder_array;
210    order_flag   = ilut_ptr->reorder;
211 
212    dbuffer = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
213    idiag   = hypre_TAlloc(int, extNrows , HYPRE_MEMORY_HOST);
214    for ( i = 0; i < Nrows; i++ ) dbuffer[i] = rhs[i];
215 
216    HYPRE_ParCSRMatrixGetComm(A, &mpi_comm);
217    context = hypre_TAlloc(MH_Context, 1, HYPRE_MEMORY_HOST);
218    context->Amat = ilut_ptr->mh_mat;
219    context->comm = mpi_comm;
220 
221    if ( extNrows > Nrows ) MH_ExchBdry(dbuffer, context);
222    if ( order_flag )
223       for ( i = 0; i < Nrows; i++ ) dbuffer[i] = rhs[order_list[i]];
224    else
225       for ( i = 0; i < Nrows; i++ ) dbuffer[i] = rhs[i];
226 
227    for ( i = 0; i < extNrows; i++ )
228    {
229       ddata = 0.0;
230       for ( j = mat_ia[i]; j < mat_ia[i+1]; j++ )
231       {
232          column = mat_ja[j];
233          if ( column == i ) { idiag[i] = j; break;}
234          ddata += mat_aa[j] * dbuffer[column];
235       }
236       dbuffer[i] -= ddata;
237    }
238    for ( i = extNrows-1; i >= 0; i-- )
239    {
240       ddata = 0.0;
241       for ( j = idiag[i]+1; j < mat_ia[i+1]; j++ )
242       {
243          column = mat_ja[j];
244          ddata += mat_aa[j] * dbuffer[column];
245       }
246       dbuffer[i] -= ddata;
247       dbuffer[i] /= mat_aa[idiag[i]];
248    }
249    if ( order_flag )
250       for ( i = 0; i < Nrows; i++ ) soln[i] = dbuffer[reorder_list[i]];
251    else
252       for ( i = 0; i < Nrows; i++ ) soln[i] = dbuffer[i];
253    hypre_TFree(dbuffer, HYPRE_MEMORY_HOST);
254    hypre_TFree(idiag, HYPRE_MEMORY_HOST);
255    hypre_TFree(context, HYPRE_MEMORY_HOST);
256 
257    return 0;
258 }
259 
260 /*--------------------------------------------------------------------------
261  * HYPRE_LSI_DDIlutSetup - Set up function for LSI_DDIlut.
262  *--------------------------------------------------------------------------*/
263 
HYPRE_LSI_DDIlutSetup(HYPRE_Solver solver,HYPRE_ParCSRMatrix A_csr,HYPRE_ParVector b,HYPRE_ParVector x)264 int HYPRE_LSI_DDIlutSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr,
265                           HYPRE_ParVector b,   HYPRE_ParVector x )
266 {
267    int              i, j, offset, total_recv_leng, *recv_lengths=NULL;
268    int              *int_buf=NULL, mypid, nprocs, *parray;
269    int              *map=NULL, *map2=NULL, *row_partition=NULL,*parray2;
270    double           *dble_buf=NULL;
271    HYPRE_LSI_DDIlut *ilut_ptr = (HYPRE_LSI_DDIlut *) solver;
272    MH_Context       *context=NULL;
273    MH_Matrix        *mh_mat=NULL;
274    MPI_Comm         mpi_comm;
275 
276    /* ---------------------------------------------------------------- */
277    /* get the row information in my processors                         */
278    /* ---------------------------------------------------------------- */
279 
280    HYPRE_ParCSRMatrixGetComm(A_csr, &mpi_comm);
281    MPI_Comm_rank(mpi_comm, &mypid);
282    MPI_Comm_size(mpi_comm, &nprocs);
283    HYPRE_ParCSRMatrixGetRowPartitioning(A_csr, &row_partition);
284 
285    /* ---------------------------------------------------------------- */
286    /* convert the incoming CSR matrix into a MH matrix                 */
287    /* ---------------------------------------------------------------- */
288 
289    context = hypre_TAlloc(MH_Context, 1, HYPRE_MEMORY_HOST);
290    context->comm = mpi_comm;
291    context->globalEqns = row_partition[nprocs];
292    context->partition = hypre_TAlloc(int, (nprocs+1), HYPRE_MEMORY_HOST);
293    for (i=0; i<=nprocs; i++) context->partition[i] = row_partition[i];
294    hypre_TFree( row_partition , HYPRE_MEMORY_HOST);
295    mh_mat = hypre_TAlloc( MH_Matrix, 1, HYPRE_MEMORY_HOST);
296    context->Amat = mh_mat;
297    HYPRE_LSI_MLConstructMHMatrix(A_csr,mh_mat,mpi_comm,
298                                  context->partition,context);
299 
300    /* ---------------------------------------------------------------- */
301    /* compose the enlarged overlapped local matrix                     */
302    /* ---------------------------------------------------------------- */
303 
304    if ( ilut_ptr->overlap != 0 )
305    {
306       HYPRE_LSI_DDIlutComposeOverlappedMatrix(mh_mat, &total_recv_leng,
307                  &recv_lengths, &int_buf, &dble_buf, &map, &map2,&offset,
308                  mpi_comm);
309    }
310    else
311    {
312       total_recv_leng = 0;
313       recv_lengths = NULL;
314       int_buf = NULL;
315       dble_buf = NULL;
316       map = NULL;
317       map2 = NULL;
318       parray  = hypre_TAlloc(int, nprocs , HYPRE_MEMORY_HOST);
319       parray2 = hypre_TAlloc(int, nprocs , HYPRE_MEMORY_HOST);
320       for ( i = 0; i < nprocs; i++ ) parray2[i] = 0;
321       parray2[mypid] = mh_mat->Nrows;
322       MPI_Allreduce(parray2,parray,nprocs,MPI_INT,MPI_SUM,mpi_comm);
323       offset = 0;
324       for (i = 0; i < mypid; i++) offset += parray[i];
325       hypre_TFree(parray, HYPRE_MEMORY_HOST);
326       hypre_TFree(parray2, HYPRE_MEMORY_HOST);
327    }
328 
329    /* ---------------------------------------------------------------- */
330    /* perform ILUT decomposition on local matrix                       */
331    /* ---------------------------------------------------------------- */
332 
333    if ( ilut_ptr->mat_ia == NULL )
334       HYPRE_LSI_DDIlutDecompose(ilut_ptr,mh_mat,total_recv_leng,recv_lengths,
335                                 int_buf, dble_buf, map,map2, offset);
336    else
337    {
338       HYPRE_LSI_DDIlutDecompose2(ilut_ptr,mh_mat,total_recv_leng,recv_lengths,
339                                  int_buf, dble_buf, map,map2, offset);
340       if ( mypid == 0 && ilut_ptr->outputLevel >= 1 )
341          printf("DDILUT : preconditioner pattern reused.\n");
342    }
343    if ( mypid == 0 && ilut_ptr->outputLevel > 2 )
344    {
345       for ( i = 0; i < ilut_ptr->extNrows; i++ )
346          for ( j = ilut_ptr->mat_ia[i]; j < ilut_ptr->mat_ia[i+1]; j++ )
347             printf("LA(%d,%d) = %e;\n", i+1, ilut_ptr->mat_ja[j]+1,
348                    ilut_ptr->mat_aa[j]);
349    }
350 
351    ilut_ptr->mh_mat = mh_mat;
352    hypre_TFree(mh_mat->rowptr, HYPRE_MEMORY_HOST);
353    hypre_TFree(mh_mat->colnum, HYPRE_MEMORY_HOST);
354    hypre_TFree(mh_mat->values, HYPRE_MEMORY_HOST);
355    hypre_TFree(map, HYPRE_MEMORY_HOST);
356    hypre_TFree(map2, HYPRE_MEMORY_HOST);
357    hypre_TFree(int_buf, HYPRE_MEMORY_HOST);
358    hypre_TFree(dble_buf , HYPRE_MEMORY_HOST);
359    hypre_TFree(recv_lengths, HYPRE_MEMORY_HOST);
360    hypre_TFree(context->partition, HYPRE_MEMORY_HOST);
361    hypre_TFree(context, HYPRE_MEMORY_HOST);
362    return 0;
363 }
364 
365 /*****************************************************************************/
366 /* subroutines used for constructing overlapped matrix                       */
367 /*****************************************************************************/
368 
HYPRE_LSI_DDIlutGetRowLengths(MH_Matrix * Amat,int * leng,int ** recv_leng,MPI_Comm mpi_comm)369 int HYPRE_LSI_DDIlutGetRowLengths(MH_Matrix *Amat, int *leng, int **recv_leng,
370                                   MPI_Comm mpi_comm)
371 {
372    int         i, j, m, mypid, index, *temp_list, allocated_space, length;
373    int         nRecv, *recvProc, *recvLeng, *cols, total_recv, mtype, msgtype;
374    int         nSend, *sendProc, *sendLeng, **sendList, proc_id, offset;
375    double      *vals;
376    MPI_Request *Request;
377    MPI_Status  status;
378    MH_Context  *context;
379 
380    /* ---------------------------------------------------------------- */
381    /* fetch communication information                                  */
382    /* ---------------------------------------------------------------- */
383 
384    MPI_Comm_rank(mpi_comm, &mypid);
385    nRecv    = Amat->recvProcCnt;
386    nSend    = Amat->sendProcCnt;
387    recvProc = Amat->recvProc;
388    recvLeng = Amat->recvLeng;
389    sendProc = Amat->sendProc;
390    sendLeng = Amat->sendLeng;
391    sendList = Amat->sendList;
392    total_recv = 0;
393    for ( i = 0; i < nRecv; i++ ) total_recv += recvLeng[i];
394 
395    (*leng) = total_recv;
396    if ( nRecv <= 0 ) (*recv_leng) = NULL;
397 
398    MPI_Barrier(mpi_comm);
399 
400    /* ---------------------------------------------------------------- */
401    /* post receives for all messages                                   */
402    /* ---------------------------------------------------------------- */
403 
404    (*recv_leng) = hypre_TAlloc(int, total_recv , HYPRE_MEMORY_HOST);
405    if (nRecv > 0) Request = hypre_TAlloc(MPI_Request, nRecv, HYPRE_MEMORY_HOST);
406    offset = 0;
407    mtype = 2001;
408    for (i = 0; i < nRecv; i++)
409    {
410       proc_id = recvProc[i];
411       msgtype = mtype;
412       length  = recvLeng[i];
413       MPI_Irecv((void *) &((*recv_leng)[offset]), length, MPI_INT, proc_id,
414                msgtype, mpi_comm, &Request[i]);
415       offset += length;
416    }
417 
418    /* ---------------------------------------------------------------- */
419    /* write out all messages                                           */
420    /* ---------------------------------------------------------------- */
421 
422    context = hypre_TAlloc(MH_Context, 1, HYPRE_MEMORY_HOST);
423    context->Amat = Amat;
424    allocated_space = 100;
425    cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
426    vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
427    for (i = 0; i < nSend; i++)
428    {
429       proc_id   = sendProc[i];
430       length    = sendLeng[i];
431       temp_list = hypre_TAlloc(int, sendLeng[i] , HYPRE_MEMORY_HOST);
432       for (j = 0; j < length; j++)
433       {
434          index = sendList[i][j];
435          while (MH_GetRow(context,1,&index,allocated_space,cols,vals,&m)==0)
436          {
437             hypre_TFree(cols, HYPRE_MEMORY_HOST);
438             hypre_TFree(vals, HYPRE_MEMORY_HOST);
439             allocated_space += 200 + 1;
440             cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
441             vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
442          }
443          temp_list[j] = m;
444       }
445       msgtype = mtype;
446       MPI_Send((void*)temp_list,length,MPI_INT,proc_id,msgtype,mpi_comm);
447       hypre_TFree(temp_list, HYPRE_MEMORY_HOST);
448    }
449    hypre_TFree(cols, HYPRE_MEMORY_HOST);
450    hypre_TFree(vals, HYPRE_MEMORY_HOST);
451    hypre_TFree(context, HYPRE_MEMORY_HOST);
452 
453    /* ---------------------------------------------------------------- */
454    /* wait for messages                                                */
455    /* ---------------------------------------------------------------- */
456 
457    for ( i = 0; i < nRecv; i++ )
458    {
459       MPI_Wait( &Request[i], &status );
460    }
461 
462    if (nRecv > 0)
463       hypre_TFree(Request, HYPRE_MEMORY_HOST);
464    return 0;
465 }
466 
467 /*****************************************************************************/
468 /* needed for overlapped smoothers                                           */
469 /*****************************************************************************/
470 
HYPRE_LSI_DDIlutGetOffProcRows(MH_Matrix * Amat,int leng,int * recv_leng,int Noffset,int * map,int * map2,int ** int_buf,double ** dble_buf,MPI_Comm mpi_comm)471 int HYPRE_LSI_DDIlutGetOffProcRows(MH_Matrix *Amat, int leng, int *recv_leng,
472                            int Noffset, int *map, int *map2, int **int_buf,
473                            double **dble_buf, MPI_Comm mpi_comm)
474 {
475    int         i, j, k, m, length, offset, allocated_space, proc_id;
476    int         nRecv, nSend, *recvProc, *sendProc, total_recv, mtype, msgtype;
477    int         *sendLeng, *recvLeng, **sendList, *cols, *isend_buf, Nrows;
478    int         nnz, nnz_offset, index, mypid;
479    double      *vals, *send_buf;
480    MPI_Request *request;
481    MPI_Status  status;
482    MH_Context  *context;
483 
484    /* ---------------------------------------------------------------- */
485    /* fetch communication information                                  */
486    /* ---------------------------------------------------------------- */
487 
488    MPI_Comm_rank(mpi_comm, &mypid);
489    Nrows    = Amat->Nrows;
490    nRecv    = Amat->recvProcCnt;
491    nSend    = Amat->sendProcCnt;
492    recvProc = Amat->recvProc;
493    recvLeng = Amat->recvLeng;
494    sendProc = Amat->sendProc;
495    sendLeng = Amat->sendLeng;
496    sendList = Amat->sendList;
497    if ( nRecv <= 0 ) { (*int_buf) = NULL; (*dble_buf) = NULL;}
498    total_recv = 0;
499    for ( i = 0; i < leng; i++ ) total_recv += recv_leng[i];
500 
501    /* ---------------------------------------------------------------- */
502    /* allocate buffer space                                            */
503    /* ---------------------------------------------------------------- */
504 
505    if ( nRecv > 0 )
506         request     = hypre_TAlloc(MPI_Request , nRecv, HYPRE_MEMORY_HOST);
507    else request = NULL;
508 
509    if ( total_recv > 0 )
510    {
511       (*int_buf)  = hypre_TAlloc(int, total_recv , HYPRE_MEMORY_HOST);
512       (*dble_buf) = hypre_TAlloc(double, total_recv , HYPRE_MEMORY_HOST);
513    }
514 
515    /* ---------------------------------------------------------------- */
516    /* post receives for all messages                                   */
517    /* ---------------------------------------------------------------- */
518 
519    offset     = 0;
520    mtype      = 2002;
521    nnz_offset = 0;
522    for (i = 0; i < nRecv; i++)
523    {
524       proc_id = recvProc[i];
525       msgtype = mtype;
526       length  = recvLeng[i];
527       nnz = 0;
528       for (j = 0; j < length; j++)  nnz += recv_leng[offset+j];
529 
530       MPI_Irecv((void *) &((*dble_buf)[nnz_offset]), nnz, MPI_DOUBLE,
531                proc_id, msgtype, mpi_comm, request+i);
532       offset += length;
533       nnz_offset += nnz;
534    }
535 
536    /* ---------------------------------------------------------------- */
537    /* send rows to other processors                                    */
538    /* ---------------------------------------------------------------- */
539 
540    context = hypre_TAlloc(MH_Context, 1, HYPRE_MEMORY_HOST);
541    context->Amat = Amat;
542    mtype = 2002;
543    allocated_space = 100;
544    cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
545    vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
546    for (i = 0; i < nSend; i++)
547    {
548       proc_id   = sendProc[i];
549       length    = sendLeng[i];
550       nnz       = 0;
551       for (j = 0; j < length; j++)
552       {
553          index = sendList[i][j];
554          while (MH_GetRow(context,1,&index,allocated_space,cols,vals,&m)==0)
555          {
556             hypre_TFree(cols, HYPRE_MEMORY_HOST);
557             hypre_TFree(vals, HYPRE_MEMORY_HOST);
558             allocated_space += 200 + 1;
559             cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
560             vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
561          }
562          nnz += m;
563       }
564       if ( nnz > 0 ) send_buf = hypre_TAlloc(double,  nnz , HYPRE_MEMORY_HOST);
565       offset = 0;
566       for (j = 0; j < length; j++)
567       {
568          index = sendList[i][j];
569          MH_GetRow(context,1,&index,allocated_space,cols,vals,&m);
570          for (k = 0; k < m; k++) send_buf[offset+k] = vals[k];
571          offset += m;
572       }
573       msgtype = mtype;
574       MPI_Send((void*) send_buf, nnz, MPI_DOUBLE, proc_id, msgtype,
575                        mpi_comm);
576       if ( nnz > 0 )
577          hypre_TFree(send_buf, HYPRE_MEMORY_HOST);
578    }
579 
580    hypre_TFree(cols, HYPRE_MEMORY_HOST);
581    hypre_TFree(vals, HYPRE_MEMORY_HOST);
582 
583    /* ---------------------------------------------------------------- */
584    /* wait for all messages                                            */
585    /* ---------------------------------------------------------------- */
586 
587    for (i = 0; i < nRecv; i++)
588    {
589       MPI_Wait(request+i, &status);
590    }
591 
592    /* ----------------------------------------------------------- */
593    /* post receives for all messages                              */
594    /* ----------------------------------------------------------- */
595 
596    mtype  = 2003;
597    offset = 0;
598    nnz_offset = 0;
599    for (i = 0; i < nRecv; i++)
600    {
601       proc_id = recvProc[i];
602       msgtype = mtype;
603       length  = recvLeng[i];
604       nnz = 0;
605       for (j = 0; j < length; j++)  nnz += recv_leng[offset+j];
606       MPI_Irecv((void *) &((*int_buf)[nnz_offset]), nnz, MPI_INT,
607                    proc_id, msgtype, mpi_comm, request+i);
608       offset += length;
609       nnz_offset += nnz;
610    }
611 
612    /* ---------------------------------------------------------------- */
613    /* send rows to other processors                                    */
614    /* ---------------------------------------------------------------- */
615 
616    mtype = 2003;
617    cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
618    vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
619    for (i = 0; i < nSend; i++)
620    {
621       proc_id   = sendProc[i];
622       length    = sendLeng[i];
623       nnz       = 0;
624       for (j = 0; j < length; j++)
625       {
626          index = sendList[i][j];
627          MH_GetRow(context,1,&index,allocated_space,cols,vals,&m);
628          nnz += m;
629       }
630       if ( nnz > 0 ) isend_buf = hypre_TAlloc(int,  nnz , HYPRE_MEMORY_HOST);
631       offset = 0;
632       for (j = 0; j < length; j++)
633       {
634          index = sendList[i][j];
635          MH_GetRow(context,1,&index,allocated_space,cols,vals,&m);
636          for (k = 0; k < m; k++)
637          {
638             if ( cols[k] < Nrows ) isend_buf[offset+k] = cols[k] + Noffset;
639             else                   isend_buf[offset+k] = map[cols[k]-Nrows];
640          }
641          offset += m;
642       }
643       msgtype = mtype;
644       MPI_Send((void*) isend_buf, nnz, MPI_INT, proc_id, msgtype,
645                        mpi_comm);
646       if ( nnz > 0 )
647          hypre_TFree(isend_buf, HYPRE_MEMORY_HOST);
648    }
649    hypre_TFree(cols, HYPRE_MEMORY_HOST);
650    hypre_TFree(vals, HYPRE_MEMORY_HOST);
651 
652    /* ----------------------------------------------------------- */
653    /* post receives for all messages                              */
654    /* ----------------------------------------------------------- */
655 
656    for (i = 0; i < nRecv; i++)
657    {
658       MPI_Wait(request+i, &status);
659    }
660 
661    hypre_TFree(request, HYPRE_MEMORY_HOST);
662    hypre_TFree(context, HYPRE_MEMORY_HOST);
663    return 0;
664 }
665 
666 /*****************************************************************************/
667 /* construct an enlarged overlapped local matrix                             */
668 /*****************************************************************************/
669 
HYPRE_LSI_DDIlutComposeOverlappedMatrix(MH_Matrix * mh_mat,int * total_recv_leng,int ** recv_lengths,int ** int_buf,double ** dble_buf,int ** sindex_array,int ** sindex_array2,int * offset,MPI_Comm mpi_comm)670 int HYPRE_LSI_DDIlutComposeOverlappedMatrix(MH_Matrix *mh_mat,
671               int *total_recv_leng, int **recv_lengths, int **int_buf,
672               double **dble_buf, int **sindex_array, int **sindex_array2,
673               int *offset, MPI_Comm mpi_comm)
674 {
675    int        i, nprocs, mypid, Nrows, *proc_array, *proc_array2;
676    int        extNrows, NrowsOffset, *index_array, *index_array2;
677    int        nRecv, *recvLeng;
678    double     *dble_array;
679    MH_Context *context;
680 
681    /* ---------------------------------------------------------------- */
682    /* fetch communication information                                  */
683    /* ---------------------------------------------------------------- */
684 
685    MPI_Comm_rank(mpi_comm, &mypid);
686    MPI_Comm_size(mpi_comm, &nprocs);
687 
688    /* ---------------------------------------------------------------- */
689    /* fetch matrix information                                         */
690    /* ---------------------------------------------------------------- */
691 
692    nRecv    = mh_mat->recvProcCnt;
693    recvLeng = mh_mat->recvLeng;
694    Nrows    = mh_mat->Nrows;
695 
696    /* ---------------------------------------------------------------- */
697    /* compute the enlarged matrix size                                 */
698    /* ---------------------------------------------------------------- */
699 
700    (*total_recv_leng) = 0;
701    for ( i = 0; i < nRecv; i++ ) (*total_recv_leng) += recvLeng[i];
702    extNrows = Nrows + (*total_recv_leng);
703 
704    /* ---------------------------------------------------------------- */
705    /* compose NrowsOffset and processor offsets proc_array             */
706    /* ---------------------------------------------------------------- */
707 
708    proc_array  = hypre_TAlloc(int, nprocs , HYPRE_MEMORY_HOST);
709    proc_array2 = hypre_TAlloc(int, nprocs , HYPRE_MEMORY_HOST);
710    for ( i = 0; i < nprocs; i++ ) proc_array2[i] = 0;
711    proc_array2[mypid] = Nrows;
712    MPI_Allreduce(proc_array2,proc_array,nprocs,MPI_INT,MPI_SUM,mpi_comm);
713    NrowsOffset = 0;
714    for (i = 0; i < mypid; i++) NrowsOffset += proc_array[i];
715    for (i = 1; i < nprocs; i++) proc_array[i] += proc_array[i-1];
716    hypre_TFree(proc_array2, HYPRE_MEMORY_HOST);
717 
718    /* ---------------------------------------------------------------- */
719    /* compose the column index map (index_array,index_array2)          */
720    /* ---------------------------------------------------------------- */
721 
722    context = hypre_TAlloc(MH_Context, 1, HYPRE_MEMORY_HOST);
723    context->comm = mpi_comm;
724    context->Amat = mh_mat;
725    dble_array  = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
726    for (i = Nrows; i < extNrows; i++) dble_array[i] = 0.0;
727    for (i = 0; i < Nrows; i++) dble_array[i] = 1.0 * ( i + NrowsOffset );
728    MH_ExchBdry(dble_array, context);
729    if ( extNrows-Nrows > 0 )
730       index_array = hypre_TAlloc(int, (extNrows-Nrows) , HYPRE_MEMORY_HOST);
731    else
732       index_array = NULL;
733    for (i = Nrows; i < extNrows; i++) index_array[i-Nrows] = dble_array[i];
734    if ( extNrows-Nrows > 0 )
735       index_array2  = hypre_TAlloc(int, (extNrows-Nrows) , HYPRE_MEMORY_HOST);
736    else
737       index_array2 = NULL;
738    for (i = 0; i < extNrows-Nrows; i++) index_array2[i] = i;
739    hypre_TFree(dble_array, HYPRE_MEMORY_HOST);
740    hypre_TFree(context, HYPRE_MEMORY_HOST);
741 
742    /* ---------------------------------------------------------------- */
743    /* send the lengths of each row to remote processor                 */
744    /* at the end, additional row information should be given           */
745    /* in total_recv_leng, recv_lengths, int_buf, dble_buf              */
746    /* ---------------------------------------------------------------- */
747 
748    HYPRE_LSI_DDIlutGetRowLengths(mh_mat,total_recv_leng,recv_lengths,mpi_comm);
749    HYPRE_LSI_DDIlutGetOffProcRows(mh_mat, *total_recv_leng, *recv_lengths,
750               NrowsOffset,index_array,index_array2,int_buf, dble_buf,mpi_comm);
751 
752    hypre_TFree(proc_array, HYPRE_MEMORY_HOST);
753    HYPRE_LSI_qsort1a(index_array, index_array2, 0, extNrows-Nrows-1);
754    (*sindex_array) = index_array;
755    (*sindex_array2) = index_array2;
756    (*offset) = NrowsOffset;
757    return 0;
758 }
759 
760 /*****************************************************************************/
761 /* function for doing ILUT decomposition                                     */
762 /* ( based on ILU(0) + ILUT based on magnitude)                              */
763 /*****************************************************************************/
764 
HYPRE_LSI_DDIlutDecompose(HYPRE_LSI_DDIlut * ilut_ptr,MH_Matrix * Amat,int total_recv_leng,int * recv_lengths,int * ext_ja,double * ext_aa,int * map,int * map2,int Noffset)765 int HYPRE_LSI_DDIlutDecompose(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat,
766            int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa,
767            int *map, int *map2, int Noffset)
768 {
769    int          *mat_ia, *mat_ja, i, m, allocated_space, *cols, mypid;
770    int          index, first, Lcount, Ucount, j, k, total_nnz;
771    int          sortcnt, colIndex, offset, nnz_count, Nrows, extNrows;
772    int          *track_array, track_leng, num_small_pivot, printstep, nnz_row;
773    int          *sortcols, *Amat_ia, *Amat_ja, *order_list, *reorder_list;
774    int          max_nnz_row, touch_cnt=0, order_flag;
775    double       *vals, ddata, *mat_aa, *diagonal, *rowNorms, *Norm2;
776    double       *dble_buf, fillin, tau, rel_tau, *sortvals, *Amat_aa;
777    MH_Context   *context;
778 
779    /* ---------------------------------------------------------------- */
780    /* fetch ILUT parameters                                            */
781    /* ---------------------------------------------------------------- */
782 
783    MPI_Comm_rank(ilut_ptr->comm, &mypid);
784    fillin   = ilut_ptr->fillin;
785    tau      = ilut_ptr->thresh;
786    Nrows    = Amat->Nrows;
787    extNrows = Nrows + total_recv_leng;
788    ilut_ptr->Nrows = Nrows;
789    ilut_ptr->extNrows = extNrows;
790    order_flag = ilut_ptr->reorder;
791 
792    /* ---------------------------------------------------------------- */
793    /* allocate temporary storage space                                 */
794    /* ---------------------------------------------------------------- */
795 
796    allocated_space = extNrows;
797    cols     = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
798    vals     = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
799    sortcols = hypre_TAlloc(int, extNrows , HYPRE_MEMORY_HOST);
800    sortvals = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
801    dble_buf = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
802    diagonal = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
803    rowNorms = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
804 
805    /* ---------------------------------------------------------------- */
806    /* compute the storage requirement for the ILU matrix               */
807    /* ---------------------------------------------------------------- */
808 
809    context = hypre_TAlloc(MH_Context, 1, HYPRE_MEMORY_HOST);
810    context->Amat = Amat;
811    total_nnz     = 0;
812    for ( i = 0; i < Nrows; i++ )
813    {
814       rowNorms[i] = 0.0;
815       while (MH_GetRow(context,1,&i,allocated_space,cols,vals,&m)==0)
816       {
817          hypre_TFree(vals, HYPRE_MEMORY_HOST);
818          hypre_TFree(cols, HYPRE_MEMORY_HOST);
819          allocated_space += 200 + 1;
820          cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
821          vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
822       }
823       total_nnz += m;
824       for ( j = 0; j < m; j++ ) rowNorms[i] += habs(vals[j]);
825       rowNorms[i] /= extNrows;
826    }
827    hypre_TFree(vals, HYPRE_MEMORY_HOST);
828    hypre_TFree(cols, HYPRE_MEMORY_HOST);
829 
830    /* ---------------------------------------------------------------- */
831    /* permute the matrix                                               */
832    /* ---------------------------------------------------------------- */
833 
834    Amat_ia      = hypre_TAlloc(int,  (Nrows+1) , HYPRE_MEMORY_HOST);
835    Amat_ja      = hypre_TAlloc(int,  total_nnz , HYPRE_MEMORY_HOST);
836    Amat_aa      = hypre_TAlloc(double,  total_nnz , HYPRE_MEMORY_HOST);
837    total_nnz    = 0;
838    Amat_ia[0]   = total_nnz;
839    for ( i = 0; i < Nrows; i++ )
840    {
841       MH_GetRow(context,1,&i,allocated_space,&Amat_ja[total_nnz],
842                 &Amat_aa[total_nnz],&m);
843       total_nnz    += m;
844       Amat_ia[i+1] = total_nnz;
845    }
846 
847    if ( order_flag )
848    {
849       order_list   = hypre_TAlloc(int,  Nrows , HYPRE_MEMORY_HOST);
850       reorder_list = hypre_TAlloc(int,  Nrows , HYPRE_MEMORY_HOST);
851       for ( i = 0; i < Nrows; i++ ) order_list[i] = reorder_list[i] = i;
852       HYPRE_LSI_Cuthill(Nrows,Amat_ia,Amat_ja,Amat_aa,order_list,reorder_list);
853       ilut_ptr->order_array = order_list;
854       ilut_ptr->reorder_array = reorder_list;
855       Norm2 = hypre_TAlloc(double, Nrows , HYPRE_MEMORY_HOST);
856       for ( i = 0; i < Nrows; i++ ) Norm2[i] = rowNorms[order_list[i]];
857       hypre_TFree(rowNorms, HYPRE_MEMORY_HOST);
858       rowNorms = Norm2;
859    }
860    /*
861    for ( i = 0; i < Nrows; i++ )
862       for ( j = Amat_ia[i]; j < Amat_ia[i+1]; j++ )
863          printf("%10d %10d %25.16e\n", i+1, Amat_ja[j]+1, Amat_aa[j]);
864    */
865 
866    /* ---------------------------------------------------------------- */
867    /* allocate space                                                   */
868    /* ---------------------------------------------------------------- */
869 
870    for ( i = 0; i < total_recv_leng; i++ ) total_nnz += recv_lengths[i];
871    total_nnz = (int) ((double) total_nnz * (fillin + 1.0));
872    ilut_ptr->mat_ia = hypre_TAlloc(int,  (extNrows + 1 ) , HYPRE_MEMORY_HOST);
873    ilut_ptr->mat_ja = hypre_TAlloc(int,  total_nnz , HYPRE_MEMORY_HOST);
874    ilut_ptr->mat_aa = hypre_TAlloc(double,  total_nnz , HYPRE_MEMORY_HOST);
875    mat_ia = ilut_ptr->mat_ia;
876    mat_ja = ilut_ptr->mat_ja;
877    mat_aa = ilut_ptr->mat_aa;
878 
879    offset = 0;
880    max_nnz_row = 0;
881    for ( i = 0; i < total_recv_leng; i++ )
882    {
883       rowNorms[i+Nrows] = 0.0;
884       nnz_row = 0;
885       for ( j = offset; j < offset+recv_lengths[i]; j++ )
886       {
887          index = ext_ja[j];
888          if ( index >= Noffset && index < Noffset+Nrows )
889             ext_ja[j] = index - Noffset;
890          else
891          {
892             m = HYPRE_LSI_Search(map, index, extNrows-Nrows);
893             if ( m >= 0 ) ext_ja[j] = map2[m] + Nrows;
894             else          ext_ja[j] = -1;
895          }
896          if ( ext_ja[j] != -1 )
897          {
898             rowNorms[i+Nrows] += habs(ext_aa[j]);
899             nnz_row++;
900          }
901       }
902       if ( nnz_row > max_nnz_row ) max_nnz_row = nnz_row;
903       rowNorms[i+Nrows] /= extNrows;
904       offset += recv_lengths[i];
905    }
906 
907    /* ---------------------------------------------------------------- */
908    /* process the first Nrows                                          */
909    /* ---------------------------------------------------------------- */
910 
911    num_small_pivot = 0;
912    nnz_count = 0;
913    mat_ia[0] = 0;
914    track_array = hypre_TAlloc(int,  extNrows , HYPRE_MEMORY_HOST);
915    for ( i = 0; i < extNrows; i++ ) dble_buf[i] = 0.0;
916 
917    printstep = extNrows /  10;
918 
919    for ( i = 0; i < Nrows; i++ )
920    {
921       if ( i % printstep == 0 && ilut_ptr->outputLevel > 0 )
922          printf("%4d : 0DDILUT Processing row %d(%d)\n",mypid,i,extNrows);
923 
924       track_leng = 0;
925       cols = &(Amat_ja[Amat_ia[i]]);
926       vals = &(Amat_aa[Amat_ia[i]]);
927       m    = Amat_ia[i+1] - Amat_ia[i];
928 
929       for ( j = 0; j < m; j++ )
930       {
931          if ( cols[j] < extNrows )
932          {
933             dble_buf[cols[j]] = vals[j];
934             track_array[track_leng++] = cols[j];
935          }
936       }
937       Lcount = Ucount = first = 0;
938       first  = extNrows;
939       for ( j = 0; j < track_leng; j++ )
940       {
941          index = track_array[j];
942          if ( dble_buf[index] != 0 )
943          {
944             if ( index < i ) Lcount++;
945             else if ( index > i ) Ucount++;
946             else if ( index == i ) diagonal[i] = dble_buf[index];
947             if ( index < first ) first = index;
948          }
949       }
950       Lcount  = Lcount * fillin;
951       Ucount  = Ucount * fillin;
952       rel_tau = tau * rowNorms[i];
953       for ( j = first; j < i; j++ )
954       {
955          if ( habs(dble_buf[j]) > rel_tau )
956          {
957             ddata = dble_buf[j] / diagonal[j];
958 touch_cnt++;
959             for ( k = mat_ia[j]; k < mat_ia[j+1]; k++ )
960             {
961                colIndex = mat_ja[k];
962                if ( colIndex > j )
963                {
964                   if ( dble_buf[colIndex] != 0.0 )
965                      dble_buf[colIndex] -= (ddata * mat_aa[k]);
966                   else
967                   {
968                      dble_buf[colIndex] = - (ddata * mat_aa[k]);
969                      if ( dble_buf[colIndex] != 0.0 )
970                         track_array[track_leng++] = colIndex;
971                   }
972                }
973             }
974             dble_buf[j] = ddata;
975          }
976          else dble_buf[j] = 0.0;
977       }
978       for ( j = 0; j < m; j++ )
979       {
980          if ( cols[j] < extNrows )
981          {
982             vals[j] = dble_buf[cols[j]];
983             if ( cols[j] != i ) dble_buf[cols[j]] = 0.0;
984          }
985       }
986       sortcnt = 0;
987       for ( j = 0; j < track_leng; j++ )
988       {
989          index = track_array[j];
990          if ( index < i )
991          {
992             if ( dble_buf[index] < -rel_tau )
993             {
994                sortcols[sortcnt] = index;
995                sortvals[sortcnt++] = - dble_buf[index] * rowNorms[index];
996             }
997             else if ( dble_buf[index] > rel_tau )
998             {
999                sortcols[sortcnt] = index;
1000                sortvals[sortcnt++] = dble_buf[index] * rowNorms[index];
1001             }
1002             else dble_buf[index] = 0.0;
1003          }
1004       }
1005       if ( sortcnt > Lcount )
1006       {
1007          HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Lcount);
1008          for ( j = Lcount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0;
1009       }
1010       for ( j = 0; j < m; j++ )
1011       {
1012          if ( cols[j] < i && vals[j] != 0.0 )
1013          {
1014             mat_aa[nnz_count] = vals[j];
1015             mat_ja[nnz_count++] = cols[j];
1016          }
1017       }
1018       for ( j = 0; j < track_leng; j++ )
1019       {
1020          index = track_array[j];
1021          if ( index < i && dble_buf[index] != 0.0 )
1022          {
1023             mat_aa[nnz_count] = dble_buf[index];
1024             mat_ja[nnz_count++] = index;
1025             dble_buf[index] = 0.0;
1026          }
1027       }
1028       diagonal[i] = dble_buf[i];
1029       if ( habs(diagonal[i]) < 1.0e-16 )
1030       {
1031          diagonal[i] = 1.0E-6;
1032          num_small_pivot++;
1033       }
1034       mat_aa[nnz_count] = diagonal[i];
1035       mat_ja[nnz_count++] = i;
1036       sortcnt = 0;
1037       for ( j = 0; j < track_leng; j++ )
1038       {
1039          index = track_array[j];
1040          if ( index > i )
1041          {
1042             if ( dble_buf[index] < -rel_tau )
1043             {
1044                sortcols[sortcnt] = index;
1045                sortvals[sortcnt++] = - dble_buf[index] * rowNorms[index];
1046             }
1047             else if ( dble_buf[index] > rel_tau )
1048             {
1049                sortcols[sortcnt] = index;
1050                sortvals[sortcnt++] = dble_buf[index] * rowNorms[index];
1051             }
1052             else dble_buf[index] = 0.0;
1053          }
1054       }
1055       if ( sortcnt > Ucount )
1056       {
1057          HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Ucount);
1058          for ( j = Ucount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0;
1059       }
1060       for ( j = 0; j < m; j++ )
1061       {
1062          if ( cols[j] > i && vals[j] != 0.0 )
1063          {
1064             mat_aa[nnz_count] = vals[j];
1065             mat_ja[nnz_count++] = cols[j];
1066          }
1067       }
1068       for ( j = 0; j < track_leng; j++ )
1069       {
1070          index = track_array[j];
1071          if ( index > i && dble_buf[index] != 0.0 )
1072          {
1073             mat_aa[nnz_count] = dble_buf[index];
1074             mat_ja[nnz_count++] = index;
1075             dble_buf[index] = 0.0;
1076          }
1077       }
1078       dble_buf[i] = 0.0;
1079       mat_ia[i+1] = nnz_count;
1080    }
1081    hypre_TFree(Amat_ia, HYPRE_MEMORY_HOST);
1082    hypre_TFree(Amat_ja, HYPRE_MEMORY_HOST);
1083    hypre_TFree(Amat_aa, HYPRE_MEMORY_HOST);
1084    printf("touch_cnt = %d\n", touch_cnt);
1085 
1086    /* ---------------------------------------------------------------- */
1087    /* process the off-processor rows                                   */
1088    /* ---------------------------------------------------------------- */
1089 
1090    offset = 0;
1091    cols = hypre_TAlloc(int,  max_nnz_row , HYPRE_MEMORY_HOST);
1092    vals = hypre_TAlloc(double,  max_nnz_row , HYPRE_MEMORY_HOST);
1093    for ( i = 0; i < extNrows; i++ ) dble_buf[i] = 0.0;
1094    for ( i = 0; i < total_recv_leng; i++ )
1095    {
1096       if ( (i+Nrows) % printstep == 0 && ilut_ptr->outputLevel > 0 )
1097          printf("%4d : *DDILUT Processing row %d(%d)\n",mypid,i+Nrows,extNrows);
1098 
1099       track_leng = m = 0;
1100       for ( j = offset; j < offset+recv_lengths[i]; j++ )
1101       {
1102          if ( ext_ja[j] != -1 )
1103          {
1104             if (order_flag && ext_ja[j] < Nrows) index = reorder_list[ext_ja[j]];
1105             else                                 index = ext_ja[j];
1106             dble_buf[index] = ext_aa[j];
1107             track_array[track_leng++] = index;
1108             cols[m] = index;
1109             vals[m++] = ext_aa[j];
1110          }
1111       }
1112       Lcount = Ucount = 0;
1113       first  = extNrows;
1114       for ( j = 0; j < track_leng; j++ )
1115       {
1116          index = track_array[j];
1117          if ( dble_buf[index] != 0 )
1118          {
1119             if ( index < i+Nrows ) Lcount++;
1120             else if ( index > i+Nrows ) Ucount++;
1121             else if ( i+Nrows == index ) diagonal[i+Nrows] = dble_buf[index];
1122             if ( index < first ) first = index;
1123          }
1124       }
1125       Lcount  = Lcount * fillin;
1126       Ucount  = Ucount * fillin;
1127       rel_tau = tau * rowNorms[i+Nrows];
1128       for ( j = first; j < i+Nrows; j++ )
1129       {
1130          if ( habs(dble_buf[j]) > rel_tau )
1131          {
1132             ddata = dble_buf[j] / diagonal[j];
1133             for ( k = mat_ia[j]; k < mat_ia[j+1]; k++ )
1134             {
1135                colIndex = mat_ja[k];
1136                if ( colIndex > j )
1137                {
1138                   if ( dble_buf[colIndex] != 0.0 )
1139                      dble_buf[colIndex] -= (ddata * mat_aa[k]);
1140                   else
1141                   {
1142                      dble_buf[colIndex] = - (ddata * mat_aa[k]);
1143                      if ( dble_buf[colIndex] != 0.0 )
1144                         track_array[track_leng++] = colIndex;
1145                   }
1146                }
1147             }
1148             dble_buf[j] = ddata;
1149          }
1150          else dble_buf[j] = 0.0;
1151       }
1152       for ( j = 0; j < m; j++ )
1153       {
1154          if ( cols[j] < extNrows )
1155          {
1156             vals[j] = dble_buf[cols[j]];
1157             if ( cols[j] != i+Nrows ) dble_buf[cols[j]] = 0.0;
1158          }
1159       }
1160       sortcnt = 0;
1161       for ( j = 0; j < track_leng; j++ )
1162       {
1163          index = track_array[j];
1164          if ( index < i+Nrows )
1165          {
1166             if ( dble_buf[index] < -rel_tau )
1167             {
1168                sortcols[sortcnt] = index;
1169                sortvals[sortcnt++] = - dble_buf[index]*rowNorms[index];
1170             }
1171             else if ( dble_buf[index] > rel_tau )
1172             {
1173                sortcols[sortcnt] = index;
1174                sortvals[sortcnt++] = dble_buf[index] * rowNorms[index];
1175             }
1176             else dble_buf[index] = 0.0;
1177          }
1178       }
1179       if ( sortcnt > Lcount )
1180       {
1181          HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Lcount);
1182          for ( j = Lcount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0;
1183       }
1184       for ( j = 0; j < m; j++ )
1185       {
1186          if ( cols[j] < i+Nrows && vals[j] != 0.0 )
1187          {
1188             mat_aa[nnz_count] = vals[j];
1189             mat_ja[nnz_count++] = cols[j];
1190          }
1191       }
1192       for ( j = 0; j < track_leng; j++ )
1193       {
1194          index = track_array[j];
1195          if ( index < i+Nrows && dble_buf[index] != 0.0 )
1196          {
1197             mat_aa[nnz_count] = dble_buf[index];
1198             mat_ja[nnz_count++] = index;
1199             dble_buf[index] = 0.0;
1200          }
1201       }
1202       diagonal[i+Nrows] = dble_buf[i+Nrows];
1203       if ( habs(diagonal[i+Nrows]) < 1.0e-16 )
1204       {
1205          diagonal[i+Nrows] = 1.0E-6;
1206          num_small_pivot++;
1207       }
1208       mat_aa[nnz_count] = diagonal[i+Nrows];
1209       mat_ja[nnz_count++] = i+Nrows;
1210       dble_buf[i+Nrows] = 0.0;
1211       sortcnt = 0;
1212       for ( j = 0; j < track_leng; j++ )
1213       {
1214          index = track_array[j];
1215          if ( index > i+Nrows )
1216          {
1217             if ( dble_buf[index] < -rel_tau )
1218             {
1219                sortcols[sortcnt] = index;
1220                sortvals[sortcnt++] = - dble_buf[index] * rowNorms[index];
1221             }
1222             else if ( dble_buf[index] > rel_tau )
1223             {
1224                sortcols[sortcnt] = index;
1225                sortvals[sortcnt++] = dble_buf[index] * rowNorms[index];
1226             }
1227             else dble_buf[index] = 0.0;
1228          }
1229       }
1230       if ( sortcnt > Ucount )
1231       {
1232          HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Ucount);
1233          for ( j = Ucount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0;
1234       }
1235       for ( j = 0; j < m; j++ )
1236       {
1237          if ( cols[j] > i+Nrows && cols[j] < extNrows && vals[j] != 0.0 )
1238          {
1239             mat_aa[nnz_count] = vals[j];
1240             mat_ja[nnz_count++] = cols[j];
1241          }
1242       }
1243       for ( j = 0; j < track_leng; j++ )
1244       {
1245          index = track_array[j];
1246          if ( index > i+Nrows && dble_buf[index] != 0.0 )
1247          {
1248             mat_aa[nnz_count] = dble_buf[index];
1249             mat_ja[nnz_count++] = index;
1250             dble_buf[index] = 0.0;
1251          }
1252       }
1253       mat_ia[i+Nrows+1] = nnz_count;
1254       offset += recv_lengths[i];
1255    }
1256    if ( nnz_count > total_nnz )
1257       printf("WARNING in ILUTDecomp : memory bound passed.\n");
1258    if ( ilut_ptr->outputLevel > 0 )
1259    {
1260       printf("%4d :  DDILUT number of nonzeros     = %d\n",mypid,nnz_count);
1261       printf("%4d :  DDILUT number of small pivots = %d\n",mypid,num_small_pivot);
1262    }
1263 
1264    /* ---------------------------------------------------------- */
1265    /* deallocate temporary storage space                         */
1266    /* ---------------------------------------------------------- */
1267 
1268    hypre_TFree(cols, HYPRE_MEMORY_HOST);
1269    hypre_TFree(vals, HYPRE_MEMORY_HOST);
1270    hypre_TFree(sortcols, HYPRE_MEMORY_HOST);
1271    hypre_TFree(sortvals, HYPRE_MEMORY_HOST);
1272    hypre_TFree(dble_buf, HYPRE_MEMORY_HOST);
1273    hypre_TFree(diagonal, HYPRE_MEMORY_HOST);
1274    hypre_TFree(rowNorms, HYPRE_MEMORY_HOST);
1275    hypre_TFree(context, HYPRE_MEMORY_HOST);
1276    hypre_TFree(track_array, HYPRE_MEMORY_HOST);
1277 
1278    return 0;
1279 }
1280 
1281 /*****************************************************************************/
1282 /* function for doing ILUT decomposition                                     */
1283 /* (attempted for pattern reuse, but not done yet)                           */
1284 /*****************************************************************************/
1285 
HYPRE_LSI_DDIlutDecompose2(HYPRE_LSI_DDIlut * ilut_ptr,MH_Matrix * Amat,int total_recv_leng,int * recv_lengths,int * ext_ja,double * ext_aa,int * map,int * map2,int Noffset)1286 int HYPRE_LSI_DDIlutDecompose2(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat,
1287            int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa,
1288            int *map, int *map2, int Noffset)
1289 {
1290    int          *mat_ia, *mat_ja, i, m, allocated_space, *cols, mypid;
1291    int          index, first, Lcount, Ucount, ncnt, j, k, total_nnz;
1292    int          sortcnt, colIndex, offset, nnz_count, Nrows, extNrows;
1293    int          *track_array, track_leng, num_small_pivot, printstep, ndisc;
1294    int          *sortcols;
1295    double       *vals, ddata, *mat_aa, *diagonal, *rowNorms;
1296    double       *dble_buf, fillin, tau, rel_tau, *sortvals, absval;
1297    MH_Context   *context;
1298 
1299    /* ---------------------------------------------------------------- */
1300    /* fetch ILUT parameters                                            */
1301    /* ---------------------------------------------------------------- */
1302 
1303    MPI_Comm_rank(ilut_ptr->comm, &mypid);
1304    fillin   = ilut_ptr->fillin;
1305    tau      = ilut_ptr->thresh;
1306    Nrows    = Amat->Nrows;
1307    extNrows = Nrows + total_recv_leng;
1308    ilut_ptr->Nrows = Nrows;
1309    ilut_ptr->extNrows = extNrows;
1310 
1311    /* ---------------------------------------------------------------- */
1312    /* allocate temporary storage space                                 */
1313    /* ---------------------------------------------------------------- */
1314 
1315    allocated_space = extNrows;
1316    cols     = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
1317    vals     = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
1318    sortcols = hypre_TAlloc(int, extNrows , HYPRE_MEMORY_HOST);
1319    sortvals = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
1320    dble_buf = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
1321    diagonal = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
1322    rowNorms = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
1323 
1324    /* ---------------------------------------------------------------- */
1325    /* compute the storage requirement for the ILU matrix               */
1326    /* ---------------------------------------------------------------- */
1327 
1328    context = hypre_TAlloc(MH_Context, 1, HYPRE_MEMORY_HOST);
1329    context->Amat = Amat;
1330    for ( i = 0; i < Nrows; i++ )
1331    {
1332       rowNorms[i] = 0.0;
1333       while (MH_GetRow(context,1,&i,allocated_space,cols,vals,&m)==0)
1334       {
1335          hypre_TFree(vals, HYPRE_MEMORY_HOST);
1336          hypre_TFree(cols, HYPRE_MEMORY_HOST);
1337          allocated_space += 200 + 1;
1338          cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
1339          vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
1340       }
1341       for ( j = 0; j < m; j++ ) rowNorms[i] += habs(vals[j]);
1342       rowNorms[i] /= extNrows;
1343    }
1344    total_nnz = 0;
1345    for ( i = 0; i < total_recv_leng; i++ ) total_nnz += recv_lengths[i];
1346    total_nnz = (int) ((double) total_nnz*(fillin+1.0)) + ilut_ptr->mat_ia[Nrows];
1347    mat_ia = ilut_ptr->mat_ia;
1348    mat_ja = ilut_ptr->mat_ja;
1349    mat_aa = ilut_ptr->mat_aa;
1350    ilut_ptr->mat_ia = hypre_TAlloc(int,  (extNrows + 1 ) , HYPRE_MEMORY_HOST);
1351    ilut_ptr->mat_ja = hypre_TAlloc(int,  total_nnz , HYPRE_MEMORY_HOST);
1352    ilut_ptr->mat_aa = hypre_TAlloc(double,  total_nnz , HYPRE_MEMORY_HOST);
1353 
1354    ncnt = 0;
1355    ilut_ptr->mat_ia[0] = 0;
1356    for ( i = 0; i < Nrows; i++ )
1357    {
1358       for ( j = mat_ia[i]; j < mat_ia[i+1]; j++ )
1359          if ( mat_ja[j] >= 0 && mat_ja[j] < extNrows )
1360             ilut_ptr->mat_ja[ncnt++] = mat_ja[j];
1361       ilut_ptr->mat_ia[i+1] = ncnt;
1362    }
1363    hypre_TFree(mat_ia, HYPRE_MEMORY_HOST);
1364    hypre_TFree(mat_ja, HYPRE_MEMORY_HOST);
1365    hypre_TFree(mat_aa, HYPRE_MEMORY_HOST);
1366    mat_ia = ilut_ptr->mat_ia;
1367    mat_ja = ilut_ptr->mat_ja;
1368    mat_aa = ilut_ptr->mat_aa;
1369 
1370    /* ---------------------------------------------------------------- */
1371    /* process the first Nrows                                          */
1372    /* ---------------------------------------------------------------- */
1373 
1374    num_small_pivot = 0;
1375    mat_ia[0] = 0;
1376    track_array = hypre_TAlloc(int,  extNrows , HYPRE_MEMORY_HOST);
1377    for ( i = 0; i < extNrows; i++ ) dble_buf[i] = 0.0;
1378 
1379    printstep = extNrows /  10;
1380 
1381    ndisc = 0;
1382    for ( i = 0; i < Nrows; i++ )
1383    {
1384       if ( i % printstep == 0 && ilut_ptr->outputLevel > 0 )
1385          printf("%4d : 1DDILUT Processing row %d(%d,%d)\n",mypid,i,extNrows,Nrows);
1386 
1387       MH_GetRow(context,1,&i,allocated_space,cols,vals,&m);
1388 
1389       /* ------------------------------------------------------------- */
1390       /* load the row into buffer                                      */
1391       /* ------------------------------------------------------------- */
1392 
1393       track_leng = 0;
1394       first      = extNrows;
1395       for ( j = 0; j < m; j++ )
1396       {
1397          index = cols[j];
1398          if ( index < extNrows )
1399          {
1400             dble_buf[index] = vals[j];
1401             track_array[track_leng++] = index;
1402          }
1403          if ( index < first ) first = index;
1404       }
1405       for ( j = mat_ia[i]; j < mat_ia[i+1]; j++ )
1406       {
1407          index = mat_ja[j];
1408          if ( dble_buf[index] == 0.0 ) track_array[track_leng++] = index;
1409          if ( index < first ) first = index;
1410       }
1411 if ( (mat_ia[i+1]-mat_ia[i]) != track_leng) ndisc++;
1412 
1413       /* ------------------------------------------------------------- */
1414       /* perform factorization                                         */
1415       /* ------------------------------------------------------------- */
1416 
1417       rel_tau = tau * rowNorms[i];
1418       for ( j = first; j < i; j++ )
1419       {
1420          if ( habs(dble_buf[j]) > rel_tau )
1421          {
1422             ddata = dble_buf[j] / diagonal[j];
1423             for ( k = mat_ia[j]; k < mat_ia[j+1]; k++ )
1424             {
1425                colIndex = mat_ja[k];
1426                if ( colIndex > j )
1427                {
1428                   if ( dble_buf[colIndex] != 0.0 )
1429                      dble_buf[colIndex] -= (ddata * mat_aa[k]);
1430                   else
1431                   {
1432                      dble_buf[colIndex] = - (ddata * mat_aa[k]);
1433                      if ( dble_buf[colIndex] != 0.0 )
1434                         track_array[track_leng++] = colIndex;
1435                   }
1436                }
1437             }
1438             dble_buf[j] = ddata;
1439          }
1440          else dble_buf[j] = 0.0;
1441       }
1442 
1443       diagonal[i] = dble_buf[i];
1444       if ( habs(diagonal[i]) < 1.0e-16 )
1445       {
1446          diagonal[i] = dble_buf[i] = 1.0E-6;
1447          num_small_pivot++;
1448       }
1449       for (j = mat_ia[i]; j < mat_ia[i+1]; j++) mat_aa[j] = dble_buf[mat_ja[j]];
1450       for ( j = 0; j < track_leng; j++ ) dble_buf[track_array[j]] = 0.0;
1451    }
1452    nnz_count = mat_ia[Nrows];
1453    ncnt = 0;
1454    k = 0;
1455    for ( i = 0; i < Nrows; i++ )
1456    {
1457       for ( j = k; j < mat_ia[i+1]; j++ )
1458       {
1459          if ( mat_aa[j] != 0.0 )
1460          {
1461             mat_ja[ncnt] = mat_ja[j];
1462             mat_aa[ncnt++] = mat_aa[j];
1463          }
1464       }
1465       k = mat_ia[i+1];
1466       mat_ia[i+1] = ncnt;
1467    }
1468    if ( ilut_ptr->outputLevel > 0 )
1469    {
1470       printf("%4d :  DDILUT after Nrows - nnz = %d %d\n", mypid, nnz_count, ncnt);
1471       printf("%4d :  DDILUT number of small pivots = %d\n",mypid,num_small_pivot);
1472       printf("%4d :  DDILUT number of pattern mismatch = %d\n",mypid,ndisc);
1473    }
1474    nnz_count = ncnt;
1475 
1476    /* ---------------------------------------------------------------- */
1477    /* preparation for processing the off-processor rows                */
1478    /* ---------------------------------------------------------------- */
1479 
1480    offset = 0;
1481    for ( i = 0; i < total_recv_leng; i++ )
1482    {
1483       rowNorms[i+Nrows] = 0.0;
1484       for ( j = offset; j < offset+recv_lengths[i]; j++ )
1485       {
1486          index = ext_ja[j];
1487          if ( index >= Noffset && index < Noffset+Nrows )
1488             ext_ja[j] = index - Noffset;
1489          else
1490          {
1491             m = HYPRE_LSI_Search(map, index, extNrows-Nrows);
1492             if ( m >= 0 ) ext_ja[j] = map2[m] + Nrows;
1493             else          ext_ja[j] = -1;
1494             if ( ext_ja[j] >= extNrows ) ext_ja[j] = -1;
1495          }
1496          if ( ext_ja[j] != -1 ) rowNorms[i+Nrows] += habs(ext_aa[j]);
1497       }
1498       rowNorms[i+Nrows] /= extNrows;
1499       offset += recv_lengths[i];
1500    }
1501 
1502    /* ---------------------------------------------------------------- */
1503    /* process the off-processor rows                                   */
1504    /* ---------------------------------------------------------------- */
1505 
1506    offset = 0;
1507    for ( i = 0; i < extNrows; i++ ) dble_buf[i] = 0.0;
1508    for ( i = 0; i < total_recv_leng; i++ )
1509    {
1510       if ( (i+Nrows) % printstep == 0 && ilut_ptr->outputLevel > 0 )
1511          printf("%4d : *DDILUT Processing row %d(%d)\n",mypid,i+Nrows,extNrows);
1512 
1513       track_leng = m = 0;
1514       for ( j = offset; j < offset+recv_lengths[i]; j++ )
1515       {
1516          index = ext_ja[j];
1517          if ( index != -1 )
1518          {
1519             cols[m] = index;
1520             vals[m++] = ext_aa[j];
1521             track_array[track_leng++] = index;
1522             dble_buf[index] = ext_aa[j];
1523          }
1524       }
1525       Lcount = Ucount = 0;
1526       first  = extNrows;
1527       for ( j = 0; j < track_leng; j++ )
1528       {
1529          index = track_array[j];
1530          if ( dble_buf[index] != 0 )
1531          {
1532             if ( index < i+Nrows ) Lcount++;
1533             else if ( index > i+Nrows ) Ucount++;
1534             else if ( i+Nrows == index ) diagonal[i+Nrows] = dble_buf[index];
1535             if ( index < first ) first = index;
1536          }
1537       }
1538       Lcount  = Lcount * fillin;
1539       Ucount  = Ucount * fillin;
1540       rel_tau = tau * rowNorms[i+Nrows];
1541       for ( j = first; j < i+Nrows; j++ )
1542       {
1543          if ( habs(dble_buf[j]) > rel_tau )
1544          {
1545             ddata = dble_buf[j] / diagonal[j];
1546             for ( k = mat_ia[j]; k < mat_ia[j+1]; k++ )
1547             {
1548                colIndex = mat_ja[k];
1549                if ( colIndex > j )
1550                {
1551                   if ( dble_buf[colIndex] != 0.0 )
1552                      dble_buf[colIndex] -= (ddata * mat_aa[k]);
1553                   else
1554                   {
1555                      dble_buf[colIndex] = - (ddata * mat_aa[k]);
1556                      if ( dble_buf[colIndex] != 0.0 )
1557                         track_array[track_leng++] = colIndex;
1558                   }
1559                }
1560             }
1561             dble_buf[j] = ddata;
1562          }
1563          else dble_buf[j] = 0.0;
1564       }
1565       for ( j = 0; j < m; j++ )
1566       {
1567          index = cols[j];
1568          vals[j] = dble_buf[index];
1569          if ( index != i+Nrows ) dble_buf[index] = 0.0;
1570       }
1571       sortcnt = 0;
1572       for ( j = 0; j < track_leng; j++ )
1573       {
1574          index = track_array[j];
1575          if ( index < i+Nrows )
1576          {
1577             absval = habs( dble_buf[index] );
1578             if ( absval > rel_tau )
1579             {
1580                sortcols[sortcnt] = index;
1581                sortvals[sortcnt++] = absval * rowNorms[index];
1582             }
1583             else dble_buf[index] = 0.0;
1584          }
1585       }
1586       if ( sortcnt > Lcount )
1587       {
1588          HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Lcount);
1589          for ( j = Lcount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0;
1590       }
1591       for ( j = 0; j < m; j++ )
1592       {
1593          if ( cols[j] < i+Nrows && vals[j] != 0.0 )
1594          {
1595             mat_aa[nnz_count] = vals[j];
1596             mat_ja[nnz_count++] = cols[j];
1597          }
1598       }
1599       for ( j = 0; j < track_leng; j++ )
1600       {
1601          index = track_array[j];
1602          if ( index < i+Nrows && dble_buf[index] != 0.0 )
1603          {
1604             mat_aa[nnz_count] = dble_buf[index];
1605             mat_ja[nnz_count++] = index;
1606             dble_buf[index] = 0.0;
1607          }
1608       }
1609       diagonal[i+Nrows] = dble_buf[i+Nrows];
1610       if ( habs(diagonal[i+Nrows]) < 1.0e-16 )
1611       {
1612          diagonal[i+Nrows] = 1.0E-6;
1613          num_small_pivot++;
1614       }
1615       mat_aa[nnz_count] = diagonal[i+Nrows];
1616       mat_ja[nnz_count++] = i+Nrows;
1617       dble_buf[i+Nrows] = 0.0;
1618       sortcnt = 0;
1619       for ( j = 0; j < track_leng; j++ )
1620       {
1621          index = track_array[j];
1622          if ( index > i+Nrows )
1623          {
1624             absval = habs( dble_buf[index] );
1625             if ( absval > rel_tau )
1626             {
1627                sortcols[sortcnt] = index;
1628                sortvals[sortcnt++] = absval * rowNorms[index];
1629             }
1630             else dble_buf[index] = 0.0;
1631          }
1632       }
1633       if ( sortcnt > Ucount )
1634       {
1635          HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Ucount);
1636          for ( j = Ucount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0;
1637       }
1638       for ( j = 0; j < m; j++ )
1639       {
1640          if ( cols[j] > i+Nrows && vals[j] != 0.0 )
1641          {
1642             mat_aa[nnz_count] = vals[j];
1643             mat_ja[nnz_count++] = cols[j];
1644          }
1645       }
1646       for ( j = 0; j < track_leng; j++ )
1647       {
1648          index = track_array[j];
1649          if ( index > i+Nrows && dble_buf[index] != 0.0 )
1650          {
1651             mat_aa[nnz_count] = dble_buf[index];
1652             mat_ja[nnz_count++] = index;
1653             dble_buf[index] = 0.0;
1654          }
1655       }
1656       mat_ia[i+Nrows+1] = nnz_count;
1657       offset += recv_lengths[i];
1658    }
1659 
1660    if ( nnz_count > total_nnz )
1661       printf("WARNING in ILUTDecomp : memory bound passed.\n");
1662    if ( ilut_ptr->outputLevel > 0 )
1663    {
1664       printf("%4d :  DDILUT number of nonzeros     = %d\n",mypid,nnz_count);
1665       printf("%4d :  DDILUT number of small pivots = %d\n",mypid,num_small_pivot);
1666    }
1667 
1668    /* ---------------------------------------------------------- */
1669    /* deallocate temporary storage space                         */
1670    /* ---------------------------------------------------------- */
1671 
1672    hypre_TFree(cols, HYPRE_MEMORY_HOST);
1673    hypre_TFree(vals, HYPRE_MEMORY_HOST);
1674    hypre_TFree(sortcols, HYPRE_MEMORY_HOST);
1675    hypre_TFree(sortvals, HYPRE_MEMORY_HOST);
1676    hypre_TFree(dble_buf, HYPRE_MEMORY_HOST);
1677    hypre_TFree(diagonal, HYPRE_MEMORY_HOST);
1678    hypre_TFree(rowNorms, HYPRE_MEMORY_HOST);
1679    hypre_TFree(context, HYPRE_MEMORY_HOST);
1680    hypre_TFree(track_array, HYPRE_MEMORY_HOST);
1681 
1682    return 0;
1683 }
1684 
1685 /*****************************************************************************/
1686 /* function for doing ILUT decomposition                                     */
1687 /* (purely based on magnitude)                                               */
1688 /*****************************************************************************/
1689 
HYPRE_LSI_DDIlutDecompose3(HYPRE_LSI_DDIlut * ilut_ptr,MH_Matrix * Amat,int total_recv_leng,int * recv_lengths,int * ext_ja,double * ext_aa,int * map,int * map2,int Noffset)1690 int HYPRE_LSI_DDIlutDecompose3(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat,
1691            int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa,
1692            int *map, int *map2, int Noffset)
1693 {
1694    int          *mat_ia, *mat_ja, i, m, allocated_space, *cols, mypid;
1695    int          index, first, Lcount, Ucount, j, k, total_nnz;
1696    int          sortcnt, colIndex, offset, nnz_count, Nrows, extNrows;
1697    int          *track_array, track_leng, num_small_pivot;
1698    double       *vals, ddata, *mat_aa, *diagonal, *rowNorms;
1699    double       *dble_buf, fillin, tau, rel_tau;
1700    MH_Context   *context;
1701 
1702    /* ---------------------------------------------------------------- */
1703    /* fetch ILUT parameters                                            */
1704    /* ---------------------------------------------------------------- */
1705 
1706    MPI_Comm_rank(ilut_ptr->comm, &mypid);
1707    fillin   = ilut_ptr->fillin;
1708    tau      = ilut_ptr->thresh;
1709    Nrows    = Amat->Nrows;
1710    extNrows = Nrows + total_recv_leng;
1711    ilut_ptr->Nrows = Nrows;
1712    ilut_ptr->extNrows = extNrows;
1713 
1714    /* ---------------------------------------------------------------- */
1715    /* allocate temporary storage space                                 */
1716    /* ---------------------------------------------------------------- */
1717 
1718    allocated_space = extNrows;
1719    cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
1720    vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
1721    dble_buf = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
1722    diagonal = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
1723    rowNorms = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
1724 
1725    /* ---------------------------------------------------------------- */
1726    /* compute the storage requirement for the ILU matrix               */
1727    /* ---------------------------------------------------------------- */
1728 
1729    context = hypre_TAlloc(MH_Context, 1, HYPRE_MEMORY_HOST);
1730    context->Amat = Amat;
1731    total_nnz     = 0;
1732    for ( i = 0; i < Nrows; i++ )
1733    {
1734       rowNorms[i] = 0.0;
1735       while (MH_GetRow(context,1,&i,allocated_space,cols,vals,&m)==0)
1736       {
1737          hypre_TFree(vals, HYPRE_MEMORY_HOST);
1738          hypre_TFree(cols, HYPRE_MEMORY_HOST);
1739          allocated_space += 200 + 1;
1740          cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
1741          vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
1742       }
1743       total_nnz += m;
1744       for ( j = 0; j < m; j++ ) rowNorms[i] += habs(vals[j]);
1745       rowNorms[i] /= extNrows;
1746    }
1747    for ( i = 0; i < total_recv_leng; i++ ) total_nnz += recv_lengths[i];
1748    total_nnz = (int) ((double) total_nnz * (fillin + 1.0));
1749    ilut_ptr->mat_ia = hypre_TAlloc(int,  (extNrows + 1 ) , HYPRE_MEMORY_HOST);
1750    ilut_ptr->mat_ja = hypre_TAlloc(int,  total_nnz , HYPRE_MEMORY_HOST);
1751    ilut_ptr->mat_aa = hypre_TAlloc(double,  total_nnz , HYPRE_MEMORY_HOST);
1752    mat_ia = ilut_ptr->mat_ia;
1753    mat_ja = ilut_ptr->mat_ja;
1754    mat_aa = ilut_ptr->mat_aa;
1755 
1756    offset = 0;
1757    for ( i = 0; i < total_recv_leng; i++ )
1758    {
1759       rowNorms[i+Nrows] = 0.0;
1760       for ( j = offset; j < offset+recv_lengths[i]; j++ )
1761       {
1762          index = ext_ja[j];
1763          if ( index >= Noffset && index < Noffset+Nrows )
1764             ext_ja[j] = index - Noffset;
1765          else
1766          {
1767             m = HYPRE_LSI_Search(map, index, extNrows-Nrows);
1768             if ( m >= 0 ) ext_ja[j] = map2[m] + Nrows;
1769             else          ext_ja[j] = -1;
1770          }
1771          if ( ext_ja[j] != -1 ) rowNorms[i+Nrows] += habs(ext_aa[j]);
1772       }
1773       rowNorms[i+Nrows] /= extNrows;
1774       offset += recv_lengths[i];
1775    }
1776 
1777    /* ---------------------------------------------------------------- */
1778    /* process the first Nrows                                          */
1779    /* ---------------------------------------------------------------- */
1780 
1781    num_small_pivot = 0;
1782    nnz_count = 0;
1783    mat_ia[0] = 0;
1784    track_array = hypre_TAlloc(int,  extNrows , HYPRE_MEMORY_HOST);
1785    for ( i = 0; i < extNrows; i++ ) dble_buf[i] = 0.0;
1786 
1787    for ( i = 0; i < Nrows; i++ )
1788    {
1789       if ( i % 1000 == 0 && ilut_ptr->outputLevel > 0 )
1790          printf("%4d : 2DDILUT Processing row %d(%d)\n",mypid,i,extNrows);
1791 
1792       track_leng = 0;
1793       MH_GetRow(context,1,&i,allocated_space,cols,vals,&m);
1794       if ( m < 0 )
1795          printf("IlutDecompose WARNING(1): row nnz = %d\n",m);
1796 
1797       for ( j = 0; j < m; j++ )
1798       {
1799          if ( cols[j] < extNrows )
1800          {
1801             dble_buf[cols[j]] = vals[j];
1802             track_array[track_leng++] = cols[j];
1803          }
1804       }
1805       Lcount = Ucount = first = 0;
1806       first  = extNrows;
1807       for ( j = 0; j < track_leng; j++ )
1808       {
1809          index = track_array[j];
1810          if ( dble_buf[index] != 0 )
1811          {
1812             if ( index < i ) Lcount++;
1813             else if ( index > i ) Ucount++;
1814             else if ( index == i ) diagonal[i] = dble_buf[index];
1815             if ( index < first ) first = index;
1816          }
1817       }
1818       Lcount  = Lcount * (fillin + 1);
1819       Ucount  = Ucount * (fillin + 1);
1820       rel_tau = tau * rowNorms[i];
1821       for ( j = first; j < i; j++ )
1822       {
1823          if ( habs(dble_buf[j]) > rel_tau )
1824          {
1825             ddata = dble_buf[j] / diagonal[j];
1826             for ( k = mat_ia[j]; k < mat_ia[j+1]; k++ )
1827             {
1828                colIndex = mat_ja[k];
1829                if ( colIndex > j )
1830                {
1831                   if ( dble_buf[colIndex] != 0.0 )
1832                      dble_buf[colIndex] -= (ddata * mat_aa[k]);
1833                   else
1834                   {
1835                      dble_buf[colIndex] = - (ddata * mat_aa[k]);
1836                      track_array[track_leng++] = colIndex;
1837                   }
1838                }
1839             }
1840             dble_buf[j] = ddata;
1841          }
1842          else dble_buf[j] = 0.0;
1843       }
1844 
1845       sortcnt = 0;
1846       for ( j = 0; j < track_leng; j++ )
1847       {
1848          index = track_array[j];
1849          if ( index < i )
1850          {
1851             if ( dble_buf[index] < -rel_tau )
1852             {
1853                cols[sortcnt] = index;
1854                vals[sortcnt++] = - dble_buf[index] * rowNorms[index];
1855             }
1856             else if ( dble_buf[index] > rel_tau )
1857             {
1858                cols[sortcnt] = index;
1859                vals[sortcnt++] = dble_buf[index] * rowNorms[index];
1860             }
1861             else dble_buf[index] = 0.0;
1862          }
1863       }
1864 
1865       if ( sortcnt > Lcount ) HYPRE_LSI_SplitDSort(vals,sortcnt,cols,Lcount);
1866       if ( sortcnt > Lcount )
1867       {
1868          for ( j = Lcount; j < sortcnt; j++ ) dble_buf[cols[j]] = 0.0;
1869       }
1870       for ( j = 0; j < track_leng; j++ )
1871       {
1872          index = track_array[j];
1873          if ( index < i && dble_buf[index] != 0.0 )
1874          {
1875             mat_aa[nnz_count] = dble_buf[index];
1876             mat_ja[nnz_count++] = index;
1877             dble_buf[index] = 0.0;
1878          }
1879       }
1880       diagonal[i] = dble_buf[i];
1881       if ( habs(diagonal[i]) < 1.0e-16 )
1882       {
1883          diagonal[i] = 1.0E-6;
1884          num_small_pivot++;
1885       }
1886       mat_aa[nnz_count] = diagonal[i];
1887       mat_ja[nnz_count++] = i;
1888       sortcnt = 0;
1889       for ( j = 0; j < track_leng; j++ )
1890       {
1891          index = track_array[j];
1892          if ( index > i )
1893          {
1894             if ( dble_buf[index] < -rel_tau )
1895             {
1896                cols[sortcnt] = index;
1897                vals[sortcnt++] = - dble_buf[index] * rowNorms[index];
1898             }
1899             else if ( dble_buf[index] > rel_tau )
1900             {
1901                cols[sortcnt] = index;
1902                vals[sortcnt++] = dble_buf[index] * rowNorms[index];
1903             }
1904             else dble_buf[index] = 0.0;
1905          }
1906       }
1907       if ( sortcnt > Ucount ) HYPRE_LSI_SplitDSort(vals,sortcnt,cols,Ucount);
1908       if ( sortcnt > Ucount )
1909       {
1910          for ( j = Ucount; j < sortcnt; j++ ) dble_buf[cols[j]] = 0.0;
1911       }
1912       for ( j = 0; j < track_leng; j++ )
1913       {
1914          index = track_array[j];
1915          if ( index > i && dble_buf[index] != 0.0 )
1916          {
1917             mat_aa[nnz_count] = dble_buf[index];
1918             mat_ja[nnz_count++] = index;
1919             dble_buf[index] = 0.0;
1920          }
1921       }
1922       dble_buf[i] = 0.0;
1923       mat_ia[i+1] = nnz_count;
1924    }
1925 
1926    /* ---------------------------------------------------------------- */
1927    /* process the off-processor rows                                   */
1928    /* ---------------------------------------------------------------- */
1929 
1930    offset = 0;
1931    for ( i = 0; i < extNrows; i++ ) dble_buf[i] = 0.0;
1932    for ( i = 0; i < total_recv_leng; i++ )
1933    {
1934       if ( (i+Nrows) % 1000 == 0 && ilut_ptr->outputLevel > 0 )
1935          printf("%4d : *DDILUT Processing row %d(%d)\n",mypid,i+Nrows,extNrows);
1936 
1937       track_leng = 0;
1938       for ( j = offset; j < offset+recv_lengths[i]; j++ )
1939       {
1940          if ( ext_ja[j] != -1 )
1941          {
1942             dble_buf[ext_ja[j]] = ext_aa[j];
1943             track_array[track_leng++] = ext_ja[j];
1944          }
1945       }
1946       Lcount = Ucount = 0;
1947       first  = extNrows;
1948       for ( j = 0; j < track_leng; j++ )
1949       {
1950          index = track_array[j];
1951          if ( dble_buf[index] != 0 )
1952          {
1953             if ( index < i+Nrows ) Lcount++;
1954             else if ( index > i+Nrows ) Ucount++;
1955             else if ( i+Nrows == index ) diagonal[i+Nrows] = dble_buf[index];
1956             if ( index < first ) first = index;
1957          }
1958       }
1959       Lcount  = Lcount * (fillin + 1);
1960       Ucount  = Ucount * (fillin + 1);
1961       rel_tau = tau * rowNorms[i+Nrows];
1962       for ( j = first; j < i+Nrows; j++ )
1963       {
1964          if ( habs(dble_buf[j]) > rel_tau )
1965          {
1966             ddata = dble_buf[j] / diagonal[j];
1967             for ( k = mat_ia[j]; k < mat_ia[j+1]; k++ )
1968             {
1969                colIndex = mat_ja[k];
1970                if ( colIndex > j )
1971                {
1972                   if ( dble_buf[colIndex] != 0.0 )
1973                      dble_buf[colIndex] -= (ddata * mat_aa[k]);
1974                   else
1975                   {
1976                      dble_buf[colIndex] = - (ddata * mat_aa[k]);
1977                      track_array[track_leng++] = colIndex;
1978                   }
1979                }
1980             }
1981             dble_buf[j] = ddata;
1982          }
1983          else dble_buf[j] = 0.0;
1984       }
1985       sortcnt = 0;
1986       for ( j = 0; j < track_leng; j++ )
1987       {
1988          index = track_array[j];
1989          if ( index < i+Nrows )
1990          {
1991             if ( dble_buf[index] < -rel_tau )
1992             {
1993                cols[sortcnt] = index;
1994                vals[sortcnt++] = - dble_buf[index]*rowNorms[index];
1995             }
1996             else if ( dble_buf[index] > rel_tau )
1997             {
1998                cols[sortcnt] = index;
1999                vals[sortcnt++] = dble_buf[index] * rowNorms[index];
2000             }
2001             else dble_buf[index] = 0.0;
2002          }
2003       }
2004       if ( sortcnt > Lcount ) HYPRE_LSI_SplitDSort(vals,sortcnt,cols,Lcount);
2005       if ( sortcnt > Lcount )
2006       {
2007          for ( j = Lcount; j < sortcnt; j++ ) dble_buf[cols[j]] = 0.0;
2008       }
2009       for ( j = 0; j < track_leng; j++ )
2010       {
2011          index = track_array[j];
2012          if ( index < i+Nrows && dble_buf[index] != 0.0 )
2013          {
2014             mat_aa[nnz_count] = dble_buf[index];
2015             mat_ja[nnz_count++] = index;
2016             dble_buf[index] = 0.0;
2017          }
2018       }
2019       diagonal[i+Nrows] = dble_buf[i+Nrows];
2020       if ( habs(diagonal[i+Nrows]) < 1.0e-16 )
2021       {
2022          diagonal[i+Nrows] = 1.0E-6;
2023          num_small_pivot++;
2024       }
2025       mat_aa[nnz_count] = diagonal[i+Nrows];
2026       mat_ja[nnz_count++] = i+Nrows;
2027       dble_buf[i+Nrows] = 0.0;
2028       sortcnt = 0;
2029       for ( j = 0; j < track_leng; j++ )
2030       {
2031          index = track_array[j];
2032          if ( index > i+Nrows )
2033          {
2034             if ( dble_buf[index] < -rel_tau )
2035             {
2036                cols[sortcnt] = index;
2037                vals[sortcnt++] = - dble_buf[index] * rowNorms[index];
2038             }
2039             else if ( dble_buf[index] > rel_tau )
2040             {
2041                cols[sortcnt] = index;
2042                vals[sortcnt++] = dble_buf[index] * rowNorms[index];
2043             }
2044             else dble_buf[index] = 0.0;
2045          }
2046       }
2047       if ( sortcnt > Ucount ) HYPRE_LSI_SplitDSort(vals,sortcnt,cols,Ucount);
2048       if ( sortcnt > Ucount )
2049       {
2050          for ( j = Ucount; j < sortcnt; j++ ) dble_buf[cols[j]] = 0.0;
2051       }
2052       for ( j = 0; j < track_leng; j++ )
2053       {
2054          index = track_array[j];
2055          if ( index > i+Nrows && dble_buf[index] != 0.0 )
2056          {
2057             mat_aa[nnz_count] = dble_buf[index];
2058             mat_ja[nnz_count++] = index;
2059             dble_buf[index] = 0.0;
2060          }
2061       }
2062       mat_ia[i+Nrows+1] = nnz_count;
2063       offset += recv_lengths[i];
2064    }
2065    if ( nnz_count > total_nnz )
2066       printf("WARNING in ILUTDecomp : memory bound passed.\n");
2067    if ( ilut_ptr->outputLevel > 0 )
2068    {
2069       printf("%4d :  DDILUT number of nonzeros     = %d\n",mypid,nnz_count);
2070       printf("%4d :  DDILUT number of small pivots = %d\n",mypid,num_small_pivot);
2071    }
2072 
2073    /* ---------------------------------------------------------- */
2074    /* deallocate temporary storage space                         */
2075    /* ---------------------------------------------------------- */
2076 
2077    hypre_TFree(cols, HYPRE_MEMORY_HOST);
2078    hypre_TFree(vals, HYPRE_MEMORY_HOST);
2079    hypre_TFree(dble_buf, HYPRE_MEMORY_HOST);
2080    hypre_TFree(diagonal, HYPRE_MEMORY_HOST);
2081    hypre_TFree(rowNorms, HYPRE_MEMORY_HOST);
2082    hypre_TFree(context, HYPRE_MEMORY_HOST);
2083    hypre_TFree(track_array, HYPRE_MEMORY_HOST);
2084 
2085    return 0;
2086 }
2087 
2088 /*****************************************************************************/
2089 /* function for doing ILUT decomposition                                     */
2090 /* (This version is based on ILU(1).  It converges less well as the original */
2091 /*  ILUT based on ILU(0) and magnitude.  Its setup time is not faster either)*/
2092 /*****************************************************************************/
2093 
HYPRE_LSI_DDIlutDecomposeNew(HYPRE_LSI_DDIlut * ilut_ptr,MH_Matrix * Amat,int total_recv_leng,int * recv_lengths,int * ext_ja,double * ext_aa,int * map,int * map2,int Noffset)2094 int HYPRE_LSI_DDIlutDecomposeNew(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat,
2095            int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa,
2096            int *map, int *map2, int Noffset)
2097 {
2098    int          *mat_ia, *mat_ja, i, m, allocated_space, *cols, mypid;
2099    int          index, first, ncnt, j, k, total_nnz;
2100    int          colIndex, offset, nnz_count, Nrows, extNrows;
2101    int          *track_array, track_leng, num_small_pivot, printstep;
2102    int          *mat_ia2, *mat_ja2, *iarray;
2103    double       *vals, ddata, *mat_aa, *diagonal, *rowNorms;
2104    double       *dble_buf, tau, rel_tau, *mat_aa2;
2105    MH_Context   *context;
2106 
2107    /* ---------------------------------------------------------------- */
2108    /* fetch ILUT parameters                                            */
2109    /* ---------------------------------------------------------------- */
2110 
2111    MPI_Comm_rank(ilut_ptr->comm, &mypid);
2112    tau      = ilut_ptr->thresh;
2113    Nrows    = Amat->Nrows;
2114    extNrows = Nrows + total_recv_leng;
2115    ilut_ptr->Nrows = Nrows;
2116    ilut_ptr->extNrows = extNrows;
2117 
2118    /* ---------------------------------------------------------------- */
2119    /* allocate temporary storage space                                 */
2120    /* ---------------------------------------------------------------- */
2121 
2122    allocated_space = extNrows;
2123    cols     = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
2124    vals     = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
2125    dble_buf = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
2126    diagonal = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
2127    rowNorms = hypre_TAlloc(double, extNrows , HYPRE_MEMORY_HOST);
2128 
2129    /* ---------------------------------------------------------------- */
2130    /* compute the storage requirement for the ILU matrix               */
2131    /* ---------------------------------------------------------------- */
2132 
2133    context = hypre_TAlloc(MH_Context, 1, HYPRE_MEMORY_HOST);
2134    context->Amat = Amat;
2135    total_nnz     = 0;
2136    for ( i = 0; i < Nrows; i++ )
2137    {
2138       rowNorms[i] = 0.0;
2139       while (MH_GetRow(context,1,&i,allocated_space,cols,vals,&m)==0)
2140       {
2141          hypre_TFree(vals, HYPRE_MEMORY_HOST);
2142          hypre_TFree(cols, HYPRE_MEMORY_HOST);
2143          allocated_space += 200 + 1;
2144          cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST);
2145          vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST);
2146       }
2147       total_nnz += m;
2148       for ( j = 0; j < m; j++ ) rowNorms[i] += habs(vals[j]);
2149       rowNorms[i] /= extNrows;
2150    }
2151    for ( i = 0; i < total_recv_leng; i++ ) total_nnz += recv_lengths[i];
2152    mat_ia = hypre_TAlloc(int,  (extNrows + 1 ) , HYPRE_MEMORY_HOST);
2153    mat_ja = hypre_TAlloc(int,  total_nnz , HYPRE_MEMORY_HOST);
2154    mat_aa = hypre_TAlloc(double,  total_nnz , HYPRE_MEMORY_HOST);
2155    total_nnz = total_nnz * 7;
2156    mat_ia2 = hypre_TAlloc(int,  (extNrows + 1 ) , HYPRE_MEMORY_HOST);
2157    mat_ja2 = hypre_TAlloc(int,  total_nnz , HYPRE_MEMORY_HOST);
2158    ncnt = 0;
2159    mat_ia[0] = 0;
2160    for ( i = 0; i < Nrows; i++ )
2161    {
2162       MH_GetRow(context,1,&i,allocated_space,cols,vals,&m);
2163       for ( j = 0; j < m; j++ )
2164       {
2165          if ( vals[j] != 0.0 )
2166          {
2167             mat_ja[ncnt] = cols[j];
2168             mat_aa[ncnt++] = vals[j];
2169          }
2170       }
2171       mat_ia[i+1] = ncnt;
2172    }
2173    offset = 0;
2174    for ( i = 0; i < total_recv_leng; i++ )
2175    {
2176       rowNorms[i+Nrows] = 0.0;
2177       for ( j = offset; j < offset+recv_lengths[i]; j++ )
2178       {
2179          index = ext_ja[j];
2180          if ( index >= Noffset && index < Noffset+Nrows )
2181             ext_ja[j] = index - Noffset;
2182          else
2183          {
2184             m = HYPRE_LSI_Search(map, index, extNrows-Nrows);
2185             if ( m >= 0 ) ext_ja[j] = map2[m] + Nrows;
2186             else          ext_ja[j] = -1;
2187          }
2188          if ( ext_ja[j] != -1 && ext_aa[j] != 0.0 )
2189          {
2190             rowNorms[i+Nrows] += habs(ext_aa[j]);
2191             mat_ja[ncnt] = ext_ja[j];
2192             mat_aa[ncnt++] = ext_aa[j];
2193          }
2194       }
2195       rowNorms[i+Nrows] /= extNrows;
2196       offset += recv_lengths[i];
2197       mat_ia[Nrows+i+1] = ncnt;
2198    }
2199 
2200    /* ---------------------------------------------------------------- */
2201    /* process the pattern                                              */
2202    /* ---------------------------------------------------------------- */
2203 
2204    ncnt = 0;
2205    mat_ia2[0] = 0;
2206    printstep = extNrows /  10;
2207    for ( i = 0; i < extNrows; i++ )
2208    {
2209       if ( ( i % printstep == 0 ) && ilut_ptr->outputLevel > 0 )
2210          printf("%4d :  DDILUT Processing pattern row = %d (%d)\n",mypid,i,extNrows);
2211       k = mat_ia[i+1] - mat_ia[i];
2212       for ( j = mat_ia[i]; j < mat_ia[i+1]; j++ )
2213       {
2214          index = mat_ja[j];
2215          k += ( mat_ia[index+1] - mat_ia[index] );
2216       }
2217       if ( (k+ncnt) > total_nnz )
2218       {
2219          iarray = mat_ja2;
2220          total_nnz += (extNrows - i ) * k;
2221          mat_ja2 = hypre_TAlloc(int,  total_nnz , HYPRE_MEMORY_HOST);
2222          for ( j = 0; j < ncnt; j++ ) mat_ja2[j] = iarray[j];
2223          hypre_TFree(iarray, HYPRE_MEMORY_HOST);
2224       }
2225       for ( j = mat_ia[i]; j < mat_ia[i+1]; j++ )
2226       {
2227          index = mat_ja[j];
2228          mat_ja2[ncnt++] = index;
2229          for (k = mat_ia[index]; k < mat_ia[index+1]; k++)
2230             mat_ja2[ncnt++] = mat_ja[k];
2231       }
2232       hypre_qsort0(mat_ja2, mat_ia2[i], ncnt-1);
2233       k = mat_ia2[i] + 1;
2234       for ( j = mat_ia2[i]+1; j < ncnt; j++ )
2235       {
2236          if ( mat_ja2[j] != mat_ja2[k-1] ) mat_ja2[k++] = mat_ja2[j];
2237       }
2238       mat_ia2[i+1] = k;
2239       ncnt = k;
2240    }
2241    for ( i = 0; i < ncnt; i++ )
2242       if ( mat_ja2[i] < 0 || mat_ja2[i] >= extNrows )
2243          printf("%4d :  DDILUT ERROR  ja %d = %d \n",mypid,i,mat_ja2[i]);
2244 
2245    mat_aa2 = hypre_TAlloc(double,  ncnt , HYPRE_MEMORY_HOST);
2246 
2247    /* ---------------------------------------------------------------- */
2248    /* process the rows                                                 */
2249    /* ---------------------------------------------------------------- */
2250 
2251    num_small_pivot = 0;
2252    track_array = hypre_TAlloc(int,  extNrows , HYPRE_MEMORY_HOST);
2253    for ( i = 0; i < extNrows; i++ ) dble_buf[i] = 0.0;
2254 
2255    for ( i = 0; i < extNrows; i++ )
2256    {
2257       if ( i % printstep == 0 && ilut_ptr->outputLevel > 0 )
2258          printf("%4d : $DDILUT Processing row %d(%d,%d)\n",mypid,i,extNrows,Nrows);
2259 
2260       /* ------------------------------------------------------------- */
2261       /* load the row into buffer                                      */
2262       /* ------------------------------------------------------------- */
2263 
2264       track_leng = 0;
2265       first      = extNrows;
2266       for ( j = mat_ia[i]; j < mat_ia[i+1]; j++ )
2267       {
2268          index = mat_ja[j];
2269          dble_buf[index] = mat_aa[j];
2270          track_array[track_leng++] = index;
2271          if ( index < first ) first = index;
2272       }
2273 
2274       /* ------------------------------------------------------------- */
2275       /* perform factorization                                         */
2276       /* ------------------------------------------------------------- */
2277 
2278       rel_tau = tau * rowNorms[i];
2279       for ( j = first; j < i; j++ )
2280       {
2281          if ( habs(dble_buf[j]) > rel_tau )
2282          {
2283             ddata = dble_buf[j] / diagonal[j];
2284             for ( k = mat_ia2[j]; k < mat_ia2[j+1]; k++ )
2285             {
2286                colIndex = mat_ja2[k];
2287                if ( colIndex > j && mat_aa2[k] != 0.0 )
2288                {
2289                   if ( dble_buf[colIndex] != 0.0 )
2290                      dble_buf[colIndex] -= (ddata * mat_aa2[k]);
2291                   else
2292                   {
2293                      dble_buf[colIndex] = - (ddata * mat_aa2[k]);
2294                      if ( dble_buf[colIndex] != 0.0 )
2295                         track_array[track_leng++] = colIndex;
2296                   }
2297                }
2298             }
2299             dble_buf[j] = ddata;
2300          }
2301          else dble_buf[j] = 0.0;
2302       }
2303       diagonal[i] = dble_buf[i];
2304       if ( habs(diagonal[i]) < 1.0e-16 )
2305       {
2306          diagonal[i] = dble_buf[i] = 1.0E-6;
2307          num_small_pivot++;
2308       }
2309       for (j = mat_ia2[i]; j < mat_ia2[i+1]; j++)
2310          mat_aa2[j] = dble_buf[mat_ja2[j]];
2311       for ( j = 0; j < track_leng; j++ ) dble_buf[track_array[j]] = 0.0;
2312    }
2313    nnz_count = mat_ia2[extNrows];
2314 
2315    if ( ilut_ptr->outputLevel > 0 )
2316    {
2317       printf("%4d :  DDILUT number of nonzeros     = %d\n",mypid,nnz_count);
2318       printf("%4d :  DDILUT number of small pivots = %d\n",mypid,num_small_pivot);
2319    }
2320 
2321    /* ---------------------------------------------------------- */
2322    /* deallocate temporary storage space                         */
2323    /* ---------------------------------------------------------- */
2324 
2325    ilut_ptr->mat_ia = mat_ia2;
2326    ilut_ptr->mat_ja = mat_ja2;
2327    ilut_ptr->mat_aa = mat_aa2;
2328    hypre_TFree(mat_ia, HYPRE_MEMORY_HOST);
2329    hypre_TFree(mat_ja, HYPRE_MEMORY_HOST);
2330    hypre_TFree(mat_aa, HYPRE_MEMORY_HOST);
2331    hypre_TFree(cols, HYPRE_MEMORY_HOST);
2332    hypre_TFree(vals, HYPRE_MEMORY_HOST);
2333    hypre_TFree(dble_buf, HYPRE_MEMORY_HOST);
2334    hypre_TFree(diagonal, HYPRE_MEMORY_HOST);
2335    hypre_TFree(rowNorms, HYPRE_MEMORY_HOST);
2336    hypre_TFree(context, HYPRE_MEMORY_HOST);
2337    hypre_TFree(track_array, HYPRE_MEMORY_HOST);
2338 
2339    return 0;
2340 }
2341 
2342 
2343