1 /******************************************************************************
2  * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3  * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4  *
5  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6  ******************************************************************************/
7 
8 #define PDEGREE 1
9 #define MU      0.5
10 #define pruneFactor 0.1
11 
12 #if 0 /* RDF: Not sure this is really needed */
13 #ifdef WIN32
14 #define strcmp _stricmp
15 #endif
16 #endif
17 
18 /* #define MLI_USE_HYPRE_MATMATMULT */
19 
20 #include <string.h>
21 #include "HYPRE.h"
22 #include "_hypre_parcsr_ls.h"
23 #include "mli_utils.h"
24 #include "mli_matrix.h"
25 #include "mli_matrix_misc.h"
26 #include "mli_vector.h"
27 #include "mli_solver.h"
28 #include "mli_method_amgrs.h"
29 
30 #define habs(x)  ((x) > 0 ? x : -(x))
31 
32 /* ********************************************************************* *
33  * constructor
34  * --------------------------------------------------------------------- */
35 
MLI_Method_AMGRS(MPI_Comm comm)36 MLI_Method_AMGRS::MLI_Method_AMGRS( MPI_Comm comm ) : MLI_Method( comm )
37 {
38    char name[100];
39 
40    strcpy(name, "AMGRS");
41    setName( name );
42    setID( MLI_METHOD_AMGRS_ID );
43    outputLevel_      = 0;
44    maxLevels_        = 25;
45    numLevels_        = 25;
46    currLevel_        = 0;
47    coarsenScheme_    = MLI_METHOD_AMGRS_FALGOUT;
48    measureType_      = 0;              /* default : local measure */
49    threshold_        = 0.5;
50    nodeDOF_          = 1;
51    minCoarseSize_    = 200;
52    maxRowSum_        = 0.9;
53    symmetric_        = 1;
54    useInjectionForR_ = 0;
55    truncFactor_      = 0.0;
56    mxelmtsP_         = 0;
57    strcpy(smoother_, "Jacobi");
58    smootherNSweeps_ = 2;
59    smootherWeights_  = new double[2];
60    smootherWeights_[0] = smootherWeights_[1] = 0.667;
61    smootherPrintRNorm_ = 0;
62    smootherFindOmega_  = 0;
63    strcpy(coarseSolver_, "SGS");
64    coarseSolverNSweeps_ = 20;
65    coarseSolverWeights_ = new double[20];
66    for ( int j = 0; j < 20; j++ ) coarseSolverWeights_[j] = 1.0;
67    RAPTime_            = 0.0;
68    totalTime_          = 0.0;
69 }
70 
71 /* ********************************************************************* *
72  * destructor
73  * --------------------------------------------------------------------- */
74 
~MLI_Method_AMGRS()75 MLI_Method_AMGRS::~MLI_Method_AMGRS()
76 {
77    if ( smootherWeights_     != NULL ) delete [] smootherWeights_;
78    if ( coarseSolverWeights_ != NULL ) delete [] coarseSolverWeights_;
79 }
80 
81 /* ********************************************************************* *
82  * set parameters
83  * --------------------------------------------------------------------- */
84 
setParams(char * in_name,int argc,char * argv[])85 int MLI_Method_AMGRS::setParams(char *in_name, int argc, char *argv[])
86 {
87    int        level, size, nSweeps=1;
88    double     thresh, *weights=NULL;
89    char       param1[256], param2[256];
90 
91    sscanf(in_name, "%s", param1);
92    if ( !strcmp(param1, "setOutputLevel" ))
93    {
94       sscanf(in_name,"%s %d", param1, &level);
95       return ( setOutputLevel( level ) );
96    }
97    else if ( !strcmp(param1, "setNumLevels" ))
98    {
99       sscanf(in_name,"%s %d", param1, &level);
100       return ( setNumLevels( level ) );
101    }
102    else if ( !strcmp(param1, "setCoarsenScheme" ))
103    {
104       sscanf(in_name,"%s %s", param1, param2);
105       if ( !strcmp(param2, "cljp" ) )
106          return ( setCoarsenScheme( MLI_METHOD_AMGRS_CLJP ) );
107       else if ( !strcmp(param2, "ruge" ) )
108          return ( setCoarsenScheme( MLI_METHOD_AMGRS_RUGE ) );
109       else if ( !strcmp(param2, "falgout" ) )
110          return ( setCoarsenScheme( MLI_METHOD_AMGRS_FALGOUT ) );
111       else
112       {
113          printf("MLI_Method_AMGRS::setParams ERROR : setCoarsenScheme not");
114          printf(" valid.  Valid options are : cljp, ruge, and falgout \n");
115          return 1;
116       }
117    }
118    else if ( !strcmp(param1, "setMeasureType" ))
119    {
120       sscanf(in_name,"%s %s", param1, param2);
121       if ( !strcmp(param2, "local" ) )
122          return ( setMeasureType( 0 ) );
123       else if ( !strcmp(param2, "global" ) )
124          return ( setMeasureType( 1 ) );
125       else
126       {
127          printf("MLI_Method_AMGRS::setParams ERROR : setMeasureType not");
128          printf(" valid.  Valid options are : local or global\n");
129          return 1;
130       }
131    }
132    else if ( !strcmp(param1, "setStrengthThreshold" ))
133    {
134       sscanf(in_name,"%s %lg", param1, &thresh);
135       return ( setStrengthThreshold( thresh ) );
136    }
137    else if ( !strcmp(param1, "setTruncationFactor" ))
138    {
139       sscanf(in_name,"%s %lg", param1, &truncFactor_);
140       return 0;
141    }
142    else if ( !strcmp(param1, "setPMaxElmts" ))
143    {
144       sscanf(in_name,"%s %d", param1, &mxelmtsP_);
145       return 0;
146    }
147    else if ( !strcmp(param1, "setNodeDOF" ))
148    {
149       sscanf(in_name,"%s %d", param1, &size);
150       return ( setNodeDOF( size ) );
151    }
152    else if ( !strcmp(param1, "setNullSpace" ))
153    {
154       size = *(int *) argv[0];
155       return ( setNodeDOF( size ) );
156    }
157    else if ( !strcmp(param1, "setMinCoarseSize" ))
158    {
159       sscanf(in_name,"%s %d", param1, &size);
160       return ( setMinCoarseSize( size ) );
161    }
162    else if ( !strcmp(param1, "nonsymmetric" ))
163    {
164       symmetric_ = 0;
165       return 0;
166    }
167    else if ( !strcmp(param1, "useInjectionForR" ))
168    {
169       useInjectionForR_ = 1;
170       return 0;
171    }
172    else if ( !strcmp(param1, "setSmoother" ) ||
173              !strcmp(param1, "setPreSmoother" ))
174    {
175       sscanf(in_name,"%s %s", param1, param2);
176       if ( argc != 2 )
177       {
178          printf("MLI_Method_AMGRS::setParams ERROR - setSmoother needs");
179          printf(" 2 arguments.\n");
180          printf("     argument[0] : number of relaxation sweeps \n");
181          printf("     argument[1] : relaxation weights\n");
182          return 1;
183       }
184       nSweeps = *(int *)   argv[0];
185       weights = (double *) argv[1];
186       return ( setSmoother(param2, nSweeps, weights) );
187    }
188    else if ( !strcmp(param1, "setSmootherPrintRNorm" ))
189    {
190       smootherPrintRNorm_ = 1;
191       return 0;
192    }
193    else if ( !strcmp(param1, "setSmootherFindOmega" ))
194    {
195       smootherFindOmega_ = 1;
196       return 0;
197    }
198    else if ( !strcmp(param1, "setCoarseSolver" ))
199    {
200       sscanf(in_name,"%s %s", param1, param2);
201       if ( strcmp(param2, "SuperLU") && argc != 2 )
202       {
203          printf("MLI_Method_AMGRS::setParams ERROR - setCoarseSolver needs");
204          printf(" 2 arguments.\n");
205          printf("     argument[0] : number of relaxation sweeps \n");
206          printf("     argument[1] : relaxation weights\n");
207          return 1;
208       }
209       else if ( strcmp(param2, "SuperLU") )
210       {
211          nSweeps   = *(int *)   argv[0];
212          weights   = (double *) argv[1];
213       }
214       else if ( !strcmp(param2, "SuperLU") )
215       {
216          nSweeps = 1;
217          weights = NULL;
218       }
219       return ( setCoarseSolver(param2, nSweeps, weights) );
220    }
221    else if ( !strcmp(param1, "print" ))
222    {
223       return ( print() );
224    }
225    return 1;
226 }
227 
228 /***********************************************************************
229  * generate multilevel structure
230  * --------------------------------------------------------------------- */
231 
setup(MLI * mli)232 int MLI_Method_AMGRS::setup( MLI *mli )
233 {
234    int             k, level, irow, localNRows, mypid, nprocs, startRow;
235    int             numNodes, one=1, globalNRows, *coarsePartition;
236    int             *CFMarkers=NULL, coarseNRows, *dofArray, *cdofArray=NULL;
237    int             *reduceArray1, *reduceArray2, *rowLengs, ierr, zeroNRows;
238    int             startCol, localNCols, colInd, rowNum;
239    int             globalCoarseNRows, numTrials;
240    int             *mapStoA;
241    double          startTime, elapsedTime, colVal=1.0;
242    char            paramString[100], *targv[10];
243    MLI_Matrix      *mli_Pmat, *mli_Rmat, *mli_APmat, *mli_Amat, *mli_cAmat;
244    MLI_Matrix      *mli_ATmat, *mli_Affmat, *mli_Afcmat;
245    MLI_Solver      *smootherPtr, *csolverPtr;
246    MPI_Comm        comm;
247    MLI_Function    *funcPtr;
248    HYPRE_IJMatrix  IJRmat;
249    hypre_ParCSRMatrix *hypreA, *hypreS, *hypreAT, *hypreST, *hypreP, *hypreR;
250    hypre_ParCSRMatrix *hypreRT, *hypreS2=NULL;
251 #ifdef MLI_USE_HYPRE_MATMATMULT
252    hypre_ParCSRMatrix *hypreAP, *hypreCA;
253 #endif
254 
255 #ifdef MLI_DEBUG_DETAILED
256    printf("MLI_Method_AMGRS::setup begins...\n");
257 #endif
258 
259    /* --------------------------------------------------------------- */
260    /* fetch machine parameters                                        */
261    /* --------------------------------------------------------------- */
262 
263    RAPTime_ = 0.0;
264    comm     = getComm();
265    MPI_Comm_rank( comm, &mypid );
266    MPI_Comm_size( comm, &nprocs );
267    totalTime_ = MLI_Utils_WTime();
268 
269    /* --------------------------------------------------------------- */
270    /* traverse all levels                                             */
271    /* --------------------------------------------------------------- */
272 
273    for ( level = 0; level < numLevels_; level++ )
274    {
275       if ( mypid == 0 && outputLevel_ > 0 )
276       {
277          printf("\t*****************************************************\n");
278          printf("\t*** Ruge Stuben AMG : level = %d\n", level);
279          printf("\t-----------------------------------------------------\n");
280       }
281       currLevel_ = level;
282       if ( level == numLevels_-1 ) break;
283 
284       /* ------fetch fine grid matrix----------------------------------- */
285 
286       mli_Amat = mli->getSystemMatrix(level);
287       hypre_assert ( mli_Amat != NULL );
288       hypreA = (hypre_ParCSRMatrix *) mli_Amat->getMatrix();
289       startRow    = hypre_ParCSRMatrixFirstRowIndex(hypreA);
290       localNRows  = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(hypreA));
291       globalNRows = hypre_ParCSRMatrixGlobalNumRows(hypreA);
292 
293       /* ------create strength matrix----------------------------------- */
294 
295       numNodes = localNRows / nodeDOF_;
296       if ( level == 0 && (numNodes * nodeDOF_) != localNRows )
297       {
298          printf("\tMLI_Method_AMGRS::setup - nrows not divisible by dof.\n");
299          printf("\tMLI_Method_AMGRS::setup - revert nodeDOF to 1.\n");
300          nodeDOF_ = 1;
301          numNodes = localNRows / nodeDOF_;
302       }
303       if ( level == 0 )
304       {
305          if ( localNRows > 0 ) dofArray = new int[localNRows];
306          else                  dofArray = NULL;
307          for ( irow = 0; irow < localNRows; irow+=nodeDOF_ )
308             for ( k = 0; k < nodeDOF_; k++ ) dofArray[irow+k] = k;
309       }
310       else
311       {
312          if ( level > 0 && dofArray != NULL ) delete [] dofArray;
313          dofArray = cdofArray;
314       }
315       hypre_BoomerAMGCreateS(hypreA, threshold_, maxRowSum_, nodeDOF_,
316                              dofArray, &hypreS);
317       if ( threshold_ > 0 )
318       {
319          hypre_BoomerAMGCreateSCommPkg(hypreA, hypreS, &mapStoA);
320       }
321       else mapStoA = NULL;
322 
323       if ( coarsenScheme_ == MLI_METHOD_AMGRS_CR )
324       {
325          hypre_BoomerAMGCreateS(hypreA, 1.0e-16, maxRowSum_, nodeDOF_,
326                                 dofArray, &hypreS2);
327       }
328       else hypreS2 = NULL;
329 
330       /* ------perform coarsening--------------------------------------- */
331 
332       switch ( coarsenScheme_ )
333       {
334          case MLI_METHOD_AMGRS_CLJP :
335               hypre_BoomerAMGCoarsen(hypreS, hypreA, 0, outputLevel_,
336                             	     &CFMarkers);
337               break;
338          case MLI_METHOD_AMGRS_RUGE :
339               hypre_BoomerAMGCoarsenRuge(hypreS, hypreA, measureType_,
340                             	 coarsenScheme_, outputLevel_, &CFMarkers);
341               break;
342          case MLI_METHOD_AMGRS_FALGOUT :
343               hypre_BoomerAMGCoarsenFalgout(hypreS, hypreA, measureType_,
344                                             outputLevel_, &CFMarkers);
345               break;
346          case MLI_METHOD_AMGRS_CR :
347               hypre_BoomerAMGCoarsen(hypreS, hypreA, 0, outputLevel_,
348                             	     &CFMarkers);
349               k = 0;
350               for (irow = 0; irow < localNRows; irow++)
351               {
352                  if (CFMarkers[irow] > 0) {CFMarkers[irow] = 1; k++;}
353                  else if (CFMarkers[irow] < 0) CFMarkers[irow] = 0;
354               }
355               printf("\tAMGRS_CR(1) nCoarse = %d\n", k);
356               numTrials = 100;
357               mli_Affmat = performCR(mli_Amat,CFMarkers,&mli_Afcmat,numTrials,
358                                      hypreS2);
359               k = 0;
360               for (irow = 0; irow < localNRows; irow++)
361               {
362                  if (CFMarkers[irow] > 0) {CFMarkers[irow] = 1; k++;}
363                  else if (CFMarkers[irow] <= 0) CFMarkers[irow] = -1;
364               }
365               printf("\tAMGRS_CR(2) nCoarse = %d\n", k);
366               break;
367       }
368       coarseNRows = 0;
369       for ( irow = 0; irow < localNRows; irow++ )
370          if ( CFMarkers[irow] == 1 ) coarseNRows++;
371 
372       /* ------if nonsymmetric, compute S for R------------------------- */
373 
374       if ( symmetric_ == 0 )
375       {
376          MLI_Matrix_Transpose( mli_Amat, &mli_ATmat );
377          hypreAT = (hypre_ParCSRMatrix *) mli_ATmat->getMatrix();
378          hypre_BoomerAMGCreateS(hypreAT, threshold_, maxRowSum_, nodeDOF_,
379                                 dofArray, &hypreST);
380          hypre_BoomerAMGCoarsen(hypreST, hypreAT, 1, outputLevel_,
381                             	&CFMarkers);
382          coarseNRows = 0;
383          for ( irow = 0; irow < localNRows; irow++ )
384             if ( CFMarkers[irow] == 1 ) coarseNRows++;
385       }
386 
387       /* ------construct processor maps for the coarse grid------------- */
388 
389       coarsePartition = (int *) hypre_CTAlloc(int, nprocs+1, HYPRE_MEMORY_HOST);
390       coarsePartition[0] = 0;
391       MPI_Allgather(&coarseNRows, 1, MPI_INT, &(coarsePartition[1]),
392 		    1, MPI_INT, comm);
393       for ( irow = 2; irow < nprocs+1; irow++ )
394          coarsePartition[irow] += coarsePartition[irow-1];
395       globalCoarseNRows = coarsePartition[nprocs];
396 
397       if ( outputLevel_ > 1 && mypid == 0 )
398          printf("\tMLI_Method_AMGRS::setup - # C dof = %d(%d)\n",
399                 globalCoarseNRows, globalNRows);
400 
401       /* ------if nonsymmetric, need to make sure localNRows > 0 ------ */
402       /* ------ or the matrixTranspose function will give problems ----- */
403 
404       zeroNRows = 0;
405       if ( symmetric_ == 0 )
406       {
407          for ( irow = 0; irow < nprocs; irow++ )
408          {
409             if ( (coarsePartition[irow+1]-coarsePartition[irow]) <= 0 )
410             {
411                zeroNRows = 1;
412                break;
413             }
414          }
415       }
416 
417       /* ------ wrap up creating the multigrid hierarchy --------------- */
418 
419       if ( coarsePartition[nprocs] < minCoarseSize_ ||
420            coarsePartition[nprocs] == globalNRows || zeroNRows == 1 )
421       {
422          if ( symmetric_ == 0 )
423          {
424             delete mli_ATmat;
425             hypre_ParCSRMatrixDestroy(hypreST);
426          }
427          hypre_TFree( coarsePartition , HYPRE_MEMORY_HOST);
428          if ( CFMarkers != NULL ) hypre_TFree( CFMarkers , HYPRE_MEMORY_HOST);
429          if ( hypreS  != NULL ) hypre_ParCSRMatrixDestroy(hypreS);
430          if ( hypreS2 != NULL ) hypre_ParCSRMatrixDestroy(hypreS2);
431          if ( coarsenScheme_ == MLI_METHOD_AMGRS_CR )
432          {
433             delete mli_Affmat;
434             delete mli_Afcmat;
435          }
436          break;
437       }
438       k = (int) (globalNRows * 0.75);
439       //if ( coarsenScheme_ > 0 && coarsePartition[nprocs] >= k )
440       //   coarsenScheme_ = 0;
441 
442       /* ------create new dof array for coarse grid--------------------- */
443 
444       if ( coarseNRows > 0 ) cdofArray = new int[coarseNRows];
445       else                   cdofArray = NULL;
446       coarseNRows = 0;
447       for ( irow = 0; irow < localNRows; irow++ )
448       {
449          if ( CFMarkers[irow] == 1 )
450             cdofArray[coarseNRows++] = dofArray[irow];
451       }
452 
453       /* ------build and set the interpolation operator----------------- */
454 
455 #if 0
456       //===============================================
457       // This part is for future research on better Pmat
458       //===============================================
459       if ( coarsenScheme_ == MLI_METHOD_AMGRS_CR )
460       {
461          mli_Pmat = createPmat(CFMarkers, mli_Amat, mli_Affmat, mli_Afcmat);
462          delete mli_Affmat;
463          delete mli_Afcmat;
464          mli->setProlongation(level+1, mli_Pmat);
465       }
466       else
467       //===============================================
468 #endif
469       {
470          hypre_BoomerAMGBuildInterp(hypreA, CFMarkers, hypreS,
471                      coarsePartition, nodeDOF_, dofArray, outputLevel_,
472                      truncFactor_, mxelmtsP_, mapStoA, &hypreP);
473          funcPtr = new MLI_Function();
474          MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr);
475          sprintf(paramString, "HYPRE_ParCSR" );
476          mli_Pmat = new MLI_Matrix( (void *) hypreP, paramString, funcPtr );
477          mli->setProlongation(level+1, mli_Pmat);
478          delete funcPtr;
479       }
480       if ( hypreS  != NULL ) hypre_ParCSRMatrixDestroy(hypreS);
481       if ( hypreS2 != NULL ) hypre_ParCSRMatrixDestroy(hypreS2);
482 
483       /* ------build and set the restriction operator, if needed-------- */
484 
485       if ( useInjectionForR_ == 1 )
486       {
487          reduceArray1 = new int[nprocs+1];
488          reduceArray2 = new int[nprocs+1];
489          for ( k = 0; k < nprocs; k++ ) reduceArray1[k] = 0;
490          reduceArray1[mypid] = coarseNRows;
491          MPI_Allreduce(reduceArray1,reduceArray2,nprocs,MPI_INT,MPI_SUM,comm);
492          for (k = nprocs-1; k >= 0; k--) reduceArray2[k+1] = reduceArray2[k];
493          reduceArray2[0] = 0;
494          for ( k = 2; k <= nprocs; k++ ) reduceArray2[k] += reduceArray2[k-1];
495          startCol   = reduceArray2[mypid];
496          localNCols = reduceArray2[mypid+1] - startCol;
497          globalCoarseNRows = reduceArray2[nprocs];
498          ierr = HYPRE_IJMatrixCreate(comm, startCol, startCol+localNCols-1,
499                         startRow,startRow+localNRows-1,&IJRmat);
500          ierr = HYPRE_IJMatrixSetObjectType(IJRmat, HYPRE_PARCSR);
501          hypre_assert(!ierr);
502          rowLengs = new int[localNCols];
503          for ( k = 0; k < localNCols; k++ ) rowLengs[k] = 1;
504          ierr = HYPRE_IJMatrixSetRowSizes(IJRmat, rowLengs);
505          ierr = HYPRE_IJMatrixInitialize(IJRmat);
506          hypre_assert(!ierr);
507          delete [] rowLengs;
508          delete [] reduceArray1;
509          delete [] reduceArray2;
510          k = 0;
511          for ( irow = 0; irow < localNCols; irow++ )
512          {
513             while ( CFMarkers[k] != 1 ) k++;
514             rowNum = startCol + irow;
515             colInd = k + startRow;
516             HYPRE_IJMatrixSetValues(IJRmat, 1, &one, (const int *) &rowNum,
517                     (const int *) &colInd, (const double *) &colVal);
518             k++;
519          }
520          ierr = HYPRE_IJMatrixAssemble(IJRmat);
521          hypre_assert( !ierr );
522          HYPRE_IJMatrixGetObject(IJRmat, (void **) &hypreR);
523          hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) hypreR);
524          funcPtr = new MLI_Function();
525          MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr);
526          sprintf(paramString, "HYPRE_ParCSR" );
527          mli_Rmat = new MLI_Matrix( (void *) hypreR, paramString, funcPtr );
528          mli->setRestriction(level, mli_Rmat);
529          delete funcPtr;
530          delete mli_ATmat;
531          hypre_ParCSRMatrixDestroy(hypreST);
532       }
533       else if ( symmetric_ == 0 )
534       {
535          hypre_BoomerAMGBuildInterp(hypreAT, CFMarkers, hypreST,
536                      coarsePartition, nodeDOF_, dofArray, outputLevel_,
537                      truncFactor_, mxelmtsP_, mapStoA, &hypreRT);
538          hypre_ParCSRMatrixTranspose( hypreRT, &hypreR, one );
539          funcPtr = new MLI_Function();
540          MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr);
541          sprintf(paramString, "HYPRE_ParCSR" );
542          mli_Rmat = new MLI_Matrix( (void *) hypreR, paramString, funcPtr );
543          mli->setRestriction(level, mli_Rmat);
544          delete funcPtr;
545          delete mli_ATmat;
546          hypre_ParCSRMatrixDestroy(hypreST);
547          hypre_ParCSRMatrixDestroy(hypreRT);
548       }
549       else
550       {
551          sprintf(paramString, "HYPRE_ParCSRT");
552          mli_Rmat = new MLI_Matrix(mli_Pmat->getMatrix(), paramString, NULL);
553          mli->setRestriction(level, mli_Rmat);
554       }
555       if ( CFMarkers != NULL ) hypre_TFree( CFMarkers , HYPRE_MEMORY_HOST);
556       //if ( coarsePartition != NULL ) hypre_TFree( coarsePartition , HYPRE_MEMORY_HOST);
557 
558       startTime = MLI_Utils_WTime();
559 
560       /* ------construct and set the coarse grid matrix----------------- */
561 
562       if ( mypid == 0 && outputLevel_ > 0 ) printf("\tComputing RAP\n");
563       if ( symmetric_ == 1 )
564       {
565          //hypreP = (hypre_ParCSRMatrix *) mli_Pmat->getMatrix();
566          //hypre_ParCSRMatrixTranspose(hypreP, &hypreR, 1);
567          //hypreAP = hypre_ParMatmul(hypreA, hypreP);
568          //hypreCA = hypre_ParMatmul(hypreR, hypreAP);
569          //funcPtr = new MLI_Function();
570          //MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr);
571          //sprintf(paramString, "HYPRE_ParCSR" );
572          //mli_cAmat = new MLI_Matrix((void *) hypreCA, paramString, funcPtr);
573          //delete funcPtr;
574          //hypre_ParCSRMatrixDestroy( hypreR );
575          //hypre_ParCSRMatrixDestroy( hypreAP );
576          MLI_Matrix_ComputePtAP(mli_Pmat, mli_Amat, &mli_cAmat);
577       }
578       else
579       {
580 #ifdef MLI_USE_HYPRE_MATMATMULT
581          hypreP  = (hypre_ParCSRMatrix *) mli_Pmat->getMatrix();
582          hypreR  = (hypre_ParCSRMatrix *) mli_Rmat->getMatrix();
583          hypreAP = hypre_ParMatmul( hypreA, hypreP );
584          hypreCA = hypre_ParMatmul( hypreR, hypreAP );
585          hypre_ParCSRMatrixDestroy( hypreAP );
586          funcPtr = new MLI_Function();
587          MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr);
588          sprintf(paramString, "HYPRE_ParCSR" );
589          mli_cAmat = new MLI_Matrix((void *) hypreCA, paramString, funcPtr);
590          delete funcPtr;
591 #else
592            MLI_Matrix_MatMatMult(mli_Amat, mli_Pmat, &mli_APmat);
593            MLI_Matrix_MatMatMult(mli_Rmat, mli_APmat, &mli_cAmat);
594            delete mli_APmat;
595 #endif
596       }
597       mli->setSystemMatrix(level+1, mli_cAmat);
598       elapsedTime = (MLI_Utils_WTime() - startTime);
599       RAPTime_ += elapsedTime;
600       if ( mypid == 0 && outputLevel_ > 0 )
601          printf("\tRAP computed, time = %e seconds.\n", elapsedTime);
602 
603       /* ------set the smoothers---------------------------------------- */
604 
605       smootherPtr = MLI_Solver_CreateFromName( smoother_ );
606       targv[0] = (char *) &smootherNSweeps_;
607       targv[1] = (char *) smootherWeights_;
608       sprintf( paramString, "relaxWeight" );
609       smootherPtr->setParams(paramString, 2, targv);
610       if ( smootherPrintRNorm_ == 1 )
611       {
612          sprintf( paramString, "printRNorm" );
613          smootherPtr->setParams(paramString, 0, NULL);
614       }
615       if ( smootherFindOmega_ == 1 )
616       {
617          sprintf( paramString, "findOmega" );
618          smootherPtr->setParams(paramString, 0, NULL);
619       }
620       sprintf( paramString, "setModifiedDiag" );
621       smootherPtr->setParams(paramString, 0, NULL);
622       smootherPtr->setup(mli_Amat);
623       mli->setSmoother( level, MLI_SMOOTHER_BOTH, smootherPtr );
624    }
625    if ( dofArray != NULL ) delete [] dofArray;
626 
627    /* ------set the coarse grid solver---------------------------------- */
628 
629    if (mypid == 0 && outputLevel_ > 0) printf("\tCoarse level = %d\n",level);
630    csolverPtr = MLI_Solver_CreateFromName( coarseSolver_ );
631    if ( strcmp(coarseSolver_, "SuperLU") )
632    {
633       targv[0] = (char *) &coarseSolverNSweeps_;
634       targv[1] = (char *) coarseSolverWeights_ ;
635       sprintf( paramString, "relaxWeight" );
636       csolverPtr->setParams(paramString, 2, targv);
637    }
638    mli_Amat = mli->getSystemMatrix(level);
639    csolverPtr->setup(mli_Amat);
640    mli->setCoarseSolve(csolverPtr);
641    totalTime_ = MLI_Utils_WTime() - totalTime_;
642 
643    /* --------------------------------------------------------------- */
644    /* return the coarsest grid level number                           */
645    /* --------------------------------------------------------------- */
646 
647    if ( outputLevel_ >= 2 ) printStatistics(mli);
648 
649 #ifdef MLI_DEBUG_DETAILED
650    printf("MLI_Method_AMGRS::setup ends.");
651 #endif
652    return (level+1);
653 }
654 
655 /* ********************************************************************* *
656  * set diagnostics output level
657  * --------------------------------------------------------------------- */
658 
setOutputLevel(int level)659 int MLI_Method_AMGRS::setOutputLevel( int level )
660 {
661    outputLevel_ = level;
662    return 0;
663 }
664 
665 /* ********************************************************************* *
666  * set number of levels
667  * --------------------------------------------------------------------- */
668 
setNumLevels(int nlevels)669 int MLI_Method_AMGRS::setNumLevels( int nlevels )
670 {
671    if ( nlevels < maxLevels_ && nlevels > 0 ) numLevels_ = nlevels;
672    return 0;
673 }
674 
675 /* ********************************************************************* *
676  * set smoother
677  * --------------------------------------------------------------------- */
678 
setSmoother(char * stype,int num,double * wgt)679 int MLI_Method_AMGRS::setSmoother(char *stype, int num, double *wgt)
680 {
681    int i;
682 
683 #ifdef MLI_DEBUG_DETAILED
684    printf("MLI_Method_AMGRS::setSmoother - type = %s.\n", stype);
685 #endif
686 
687    strcpy( smoother_, stype );
688    if ( num > 0 ) smootherNSweeps_ = num; else smootherNSweeps_ = 1;
689    delete [] smootherWeights_;
690    smootherWeights_ = new double[smootherNSweeps_];
691    if ( wgt == NULL )
692       for (i = 0; i < smootherNSweeps_; i++) smootherWeights_[i] = 0.;
693    else
694       for (i = 0; i < smootherNSweeps_; i++) smootherWeights_[i] = wgt[i];
695    return 0;
696 }
697 
698 /* ********************************************************************* *
699  * set coarse solver
700  * --------------------------------------------------------------------- */
701 
setCoarseSolver(char * stype,int num,double * wgt)702 int MLI_Method_AMGRS::setCoarseSolver( char *stype, int num, double *wgt )
703 {
704    int i;
705 
706 #ifdef MLI_DEBUG_DETAILED
707    printf("MLI_Method_AMGRS::setCoarseSolver - type = %s.\n", stype);
708 #endif
709 
710    strcpy( coarseSolver_, stype );
711    if ( num > 0 ) coarseSolverNSweeps_ = num; else coarseSolverNSweeps_ = 1;
712    delete [] coarseSolverWeights_ ;
713    if ( wgt != NULL && strcmp(coarseSolver_, "SuperLU") )
714    {
715       coarseSolverWeights_ = new double[coarseSolverNSweeps_];
716       for (i = 0; i < coarseSolverNSweeps_; i++)
717          coarseSolverWeights_ [i] = wgt[i];
718    }
719    else coarseSolverWeights_  = NULL;
720    return 0;
721 }
722 
723 /* ********************************************************************* *
724  * set measure type
725  * --------------------------------------------------------------------- */
726 
setMeasureType(int mtype)727 int MLI_Method_AMGRS::setMeasureType( int mtype )
728 {
729    measureType_ = mtype;
730    return 0;
731 }
732 
733 /* ********************************************************************* *
734  * set node degree of freedom
735  * --------------------------------------------------------------------- */
736 
setNodeDOF(int dof)737 int MLI_Method_AMGRS::setNodeDOF( int dof )
738 {
739    if ( dof > 0 && dof < 20 ) nodeDOF_ = dof;
740    return 0;
741 }
742 
743 /* ********************************************************************* *
744  * set coarsening scheme
745  * --------------------------------------------------------------------- */
746 
setCoarsenScheme(int scheme)747 int MLI_Method_AMGRS::setCoarsenScheme( int scheme )
748 {
749    if ( scheme == MLI_METHOD_AMGRS_CLJP )
750    {
751       coarsenScheme_ = MLI_METHOD_AMGRS_CLJP;
752       return 0;
753    }
754    else if ( scheme == MLI_METHOD_AMGRS_RUGE )
755    {
756       coarsenScheme_ = MLI_METHOD_AMGRS_RUGE;
757       return 0;
758    }
759    else if ( scheme == MLI_METHOD_AMGRS_FALGOUT )
760    {
761       coarsenScheme_ = MLI_METHOD_AMGRS_FALGOUT;
762       return 0;
763    }
764    else
765    {
766       printf("MLI_Method_AMGRS::setCoarsenScheme - invalid scheme.\n");
767       return 1;
768    }
769 }
770 
771 /* ********************************************************************* *
772  * set minimum coarse size
773  * --------------------------------------------------------------------- */
774 
setMinCoarseSize(int coarse_size)775 int MLI_Method_AMGRS::setMinCoarseSize( int coarse_size )
776 {
777    if ( coarse_size > 0 ) minCoarseSize_ = coarse_size;
778    return 0;
779 }
780 
781 /* ********************************************************************* *
782  * set coarsening threshold
783  * --------------------------------------------------------------------- */
784 
setStrengthThreshold(double thresh)785 int MLI_Method_AMGRS::setStrengthThreshold( double thresh )
786 {
787    if ( thresh > 0.0 ) threshold_ = thresh;
788    else                threshold_ = 0.0;
789    return 0;
790 }
791 
792 /* ********************************************************************* *
793  * print AMGRS information
794  * --------------------------------------------------------------------- */
795 
print()796 int MLI_Method_AMGRS::print()
797 {
798    int      mypid;
799    MPI_Comm comm = getComm();
800 
801    MPI_Comm_rank( comm, &mypid);
802    if ( mypid == 0 )
803    {
804       printf("\t********************************************************\n");
805       printf("\t*** method name             = %s\n", getName());
806       printf("\t*** number of levels        = %d\n", numLevels_);
807       printf("\t*** coarsen type            = %d\n", coarsenScheme_);
808       printf("\t*** measure type            = %d\n", measureType_);
809       printf("\t*** strength threshold      = %e\n", threshold_);
810       printf("\t*** truncation factor       = %e\n", truncFactor_);
811       printf("\t*** P max elments           = %d\n", mxelmtsP_);
812       printf("\t*** nodal degree of freedom = %d\n", nodeDOF_);
813       printf("\t*** symmetric flag          = %d\n", symmetric_);
814       printf("\t*** R injection flag        = %d\n", useInjectionForR_);
815       printf("\t*** minimum coarse size     = %d\n", minCoarseSize_);
816       printf("\t*** smoother type           = %s\n", smoother_);
817       printf("\t*** smoother nsweeps        = %d\n", smootherNSweeps_);
818       printf("\t*** coarse solver type      = %s\n", coarseSolver_);
819       printf("\t*** coarse solver nsweeps   = %d\n", coarseSolverNSweeps_);
820       printf("\t********************************************************\n");
821    }
822    return 0;
823 }
824 
825 /* ********************************************************************* *
826  * print AMGRS statistics information
827  * --------------------------------------------------------------------- */
828 
printStatistics(MLI * mli)829 int MLI_Method_AMGRS::printStatistics(MLI *mli)
830 {
831    int          mypid, level, globalNRows, totNRows, fineNRows;
832    int          maxNnz, minNnz, fineNnz, totNnz, thisNnz, itemp;
833    double       maxVal, minVal, dtemp;
834    char         paramString[100];
835    MLI_Matrix   *mli_Amat, *mli_Pmat;
836    MPI_Comm     comm = getComm();
837 
838    /* --------------------------------------------------------------- */
839    /* output header                                                   */
840    /* --------------------------------------------------------------- */
841 
842    MPI_Comm_rank( comm, &mypid);
843    if ( mypid == 0 )
844       printf("\t****************** AMGRS Statistics ********************\n");
845 
846    /* --------------------------------------------------------------- */
847    /* output processing time                                          */
848    /* --------------------------------------------------------------- */
849 
850    if ( mypid == 0 )
851    {
852       printf("\t*** number of levels = %d\n", currLevel_+1);
853       printf("\t*** total RAP   time = %e seconds\n", RAPTime_);
854       printf("\t*** total GenML time = %e seconds\n", totalTime_);
855       printf("\t******************** Amatrix ***************************\n");
856       printf("\t*level   Nrows MaxNnz MinNnz TotalNnz  maxValue  minValue*\n");
857    }
858 
859    /* --------------------------------------------------------------- */
860    /* fine and coarse matrix complexity information                   */
861    /* --------------------------------------------------------------- */
862 
863    totNnz = totNRows = 0;
864    for ( level = 0; level <= currLevel_; level++ )
865    {
866       mli_Amat = mli->getSystemMatrix( level );
867       sprintf(paramString, "nrows");
868       mli_Amat->getMatrixInfo(paramString, globalNRows, dtemp);
869       sprintf(paramString, "maxnnz");
870       mli_Amat->getMatrixInfo(paramString, maxNnz, dtemp);
871       sprintf(paramString, "minnnz");
872       mli_Amat->getMatrixInfo(paramString, minNnz, dtemp);
873       sprintf(paramString, "totnnz");
874       mli_Amat->getMatrixInfo(paramString, thisNnz, dtemp);
875       sprintf(paramString, "maxval");
876       mli_Amat->getMatrixInfo(paramString, itemp, maxVal);
877       sprintf(paramString, "minval");
878       mli_Amat->getMatrixInfo(paramString, itemp, minVal);
879       if ( mypid == 0 )
880       {
881          printf("\t*%3d %9d %5d  %5d %10d %8.3e %8.3e *\n",level,
882                 globalNRows, maxNnz, minNnz, thisNnz, maxVal, minVal);
883       }
884       if ( level == 0 ) fineNnz = thisNnz;
885       totNnz += thisNnz;
886       if ( level == 0 ) fineNRows = globalNRows;
887       totNRows += globalNRows;
888    }
889 
890    /* --------------------------------------------------------------- */
891    /* prolongation operator complexity information                    */
892    /* --------------------------------------------------------------- */
893 
894    if ( mypid == 0 )
895    {
896       printf("\t******************** Pmatrix ***************************\n");
897       printf("\t*level   Nrows MaxNnz MinNnz TotalNnz  maxValue  minValue*\n");
898       fflush(stdout);
899    }
900    for ( level = 1; level <= currLevel_; level++ )
901    {
902       mli_Pmat = mli->getProlongation( level );
903       sprintf(paramString, "nrows");
904       mli_Pmat->getMatrixInfo(paramString, globalNRows, dtemp);
905       sprintf(paramString, "maxnnz");
906       mli_Pmat->getMatrixInfo(paramString, maxNnz, dtemp);
907       sprintf(paramString, "minnnz");
908       mli_Pmat->getMatrixInfo(paramString, minNnz, dtemp);
909       sprintf(paramString, "totnnz");
910       mli_Pmat->getMatrixInfo(paramString, thisNnz, dtemp);
911       sprintf(paramString, "maxval");
912       mli_Pmat->getMatrixInfo(paramString, itemp, maxVal);
913       sprintf(paramString, "minval");
914       mli_Pmat->getMatrixInfo(paramString, itemp, minVal);
915       if ( mypid == 0 )
916       {
917          printf("\t*%3d %9d %5d  %5d %10d %8.3e %8.3e *\n",level,
918                 globalNRows, maxNnz, minNnz, thisNnz, maxVal, minVal);
919       }
920    }
921 
922    /* --------------------------------------------------------------- */
923    /* other complexity information                                    */
924    /* --------------------------------------------------------------- */
925 
926    if ( mypid == 0 )
927    {
928       printf("\t********************************************************\n");
929       dtemp = (double) totNnz / (double) fineNnz;
930       printf("\t*** Amat complexity  = %e\n", dtemp);
931       dtemp = (double) totNRows / (double) fineNRows;
932       printf("\t*** grid complexity  = %e\n", dtemp);
933       printf("\t********************************************************\n");
934       fflush(stdout);
935    }
936    return 0;
937 }
938 
939 /* ********************************************************************* *
940  * perform compatible relaxation
941  * --------------------------------------------------------------------- */
942 
performCR(MLI_Matrix * mli_Amat,int * indepSet,MLI_Matrix ** AfcMat,int numTrials,hypre_ParCSRMatrix * hypreS)943 MLI_Matrix *MLI_Method_AMGRS::performCR(MLI_Matrix *mli_Amat, int *indepSet,
944                                         MLI_Matrix **AfcMat, int numTrials,
945                                         hypre_ParCSRMatrix *hypreS)
946 {
947    int    nprocs, mypid, localNRows, iT, numFpts, irow, *reduceArray1;
948    int    *reduceArray2, iP, FStartRow, FNRows, ierr, *rowLengs;
949    int    startRow, rowIndex, colIndex, targc, numSweeps=4, rowCount;
950    int    one=1, iC, *ADiagI, *ADiagJ, *fList, colInd;
951    int    iV, ranSeed, jcol, rowCount2, CStartRow, CNRows;
952    int    kcol;
953    int    numVectors = 1, *SDiagI, *SDiagJ, count;
954    int    numNew1, numNew2, numNew3, *sortIndices, stopRefine=1;
955    double maxEigen, relaxWts[5], colValue, rnorm0, rnorm1, dOne=1.0;
956    double *XaccData, *XData, *ADiagD;
957    double aratio, ratio1, *ADiagA, targetMu=MU;
958    char   paramString[200], *targv[2];
959    HYPRE_IJMatrix     IJPFF, IJPFC;
960    hypre_ParCSRMatrix *hypreA, *hypreAff, *hyprePFC;
961    hypre_ParCSRMatrix *hypreAfc, *hyprePFF, *hyprePFFT, *hypreAPFC;
962    hypre_CSRMatrix    *ADiag, *SDiag;
963    HYPRE_IJVector     IJB, IJX, IJXacc;
964    hypre_ParVector    *hypreB, *hypreX, *hypreXacc;
965    MLI_Matrix *mli_PFFMat, *mli_AffMat, *mli_AfcMat;
966 #ifdef HAVE_TRANS
967    MLI_Matrix *mli_AffTMat;
968 #endif
969    MLI_Vector *mli_Xvec, *mli_Bvec;
970    MLI_Solver *smootherPtr;
971    MPI_Comm   comm;
972    MLI_Function *funcPtr;
973 
974    /* ------------------------------------------------------ */
975    /* get matrix and machine information                     */
976    /* ------------------------------------------------------ */
977 
978    comm = getComm();
979    MPI_Comm_size(comm, &nprocs);
980    MPI_Comm_rank(comm, &mypid);
981    hypreA = (hypre_ParCSRMatrix *) mli_Amat->getMatrix();
982    ADiag = hypre_ParCSRMatrixDiag(hypreA);
983    ADiagI = hypre_CSRMatrixI(ADiag);
984    ADiagJ = hypre_CSRMatrixJ(ADiag);
985    ADiagA = hypre_CSRMatrixData(ADiag);
986    SDiag = hypre_ParCSRMatrixDiag(hypreS);
987    SDiagI = hypre_CSRMatrixI(SDiag);
988    SDiagJ = hypre_CSRMatrixJ(SDiag);
989    localNRows = hypre_CSRMatrixNumRows(ADiag);
990    startRow = hypre_ParCSRMatrixFirstRowIndex(hypreA);
991 
992    /* ------------------------------------------------------ */
993    /* select initial set of fine points                      */
994    /* ------------------------------------------------------ */
995 
996 #if 0
997    if (numTrials != 1)
998    {
999       for (irow = 0; irow < localNRows; irow++) indepSet[irow] = 1;
1000       for (irow = 0; irow < localNRows; irow++)
1001       {
1002          if (indepSet[irow] == 1)  /* if I am a C-point */
1003          {
1004             indepSet[irow] = 0;  /* set myself to be a F-pt */
1005             for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++)
1006             {
1007                colInd = ADiagJ[jcol]; /* for each of my neighbors */
1008                if (indepSet[colInd] == 1) /* if it is a C-point */
1009                {
1010                   /* if I depend strongly on it, leave it as C-pt */
1011                   for (kcol = SDiagI[irow]; kcol < SDiagI[irow+1]; kcol++)
1012                   {
1013                      if (SDiagJ[kcol] == colInd)
1014                      {
1015                         indepSet[colInd] = -1;
1016                         break;
1017                      }
1018                   }
1019                   /* if I don't depend strongly on it, see if it depends on me*/
1020                   if (kcol == SDiagI[irow+1])
1021                   {
1022                      for (kcol=SDiagI[colInd]; kcol < SDiagI[colInd+1]; kcol++)
1023                      {
1024                         if (SDiagJ[kcol] == irow)
1025                         {
1026                            indepSet[colInd] = -1;
1027                            break;
1028                         }
1029                      }
1030                   }
1031                }
1032             }
1033          }
1034       }
1035       for (irow = 0; irow < localNRows; irow++)
1036          if (indepSet[irow] < 0) indepSet[irow] = 1;
1037       count = 0;
1038       for (irow = 0; irow < localNRows; irow++)
1039          if (indepSet[irow] == 1) count++;
1040 
1041       /* ------------------------------------------------------ */
1042       /* select second set of fine points                       */
1043       /* ------------------------------------------------------ */
1044 
1045       for (irow = 0; irow < localNRows; irow++)
1046       {
1047          if (indepSet[irow] == 1)  /* if I am a C-point */
1048          {
1049             count = 0;
1050             for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++)
1051             {
1052                colInd = ADiagJ[jcol]; /* for each of my neighbors */
1053                if (indepSet[colInd] == 0) /* if it is a F-point */
1054                {
1055                   /* if I depend strongly on it, increment counter */
1056                   for (kcol = SDiagI[irow]; kcol < SDiagI[irow+1]; kcol++)
1057                   {
1058                      if (SDiagJ[kcol] == colInd)
1059                      {
1060                         count++;
1061                         break;
1062                      }
1063                   }
1064                   /* if I don't depend strongly on it, see if it depends on me*/
1065                   if (kcol == SDiagI[irow+1])
1066                   {
1067                      for (kcol=SDiagI[colInd]; kcol < SDiagI[colInd+1]; kcol++)
1068                      {
1069                         if (SDiagJ[kcol] == irow)
1070                         {
1071                            count++;
1072                            break;
1073                         }
1074                      }
1075                   }
1076                }
1077             }
1078             if (count <= 1) indepSet[irow] = 0;
1079          }
1080       }
1081       count = 0;
1082       for (irow = 0; irow < localNRows; irow++)
1083          if (indepSet[irow] == 1) count++;
1084    }
1085 #endif
1086 
1087    /* ------------------------------------------------------ */
1088    /* loop over number of trials                             */
1089    /* ------------------------------------------------------ */
1090 
1091    printf("\tPerform compatible relaxation\n");
1092    ADiagD = new double[localNRows];
1093    for (irow = 0; irow < localNRows; irow++)
1094       for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++)
1095          if (ADiagJ[jcol] == irow) {ADiagD[irow] = ADiagA[jcol]; break;}
1096    fList = new int[localNRows];
1097    aratio = 0.0;
1098    numNew1 = numNew2 = numNew3 = 0;
1099    for (iT = 0; iT < numTrials; iT++)
1100    {
1101       /* --------------------------------------------------- */
1102       /* get Aff and Afc matrices (get dimension)            */
1103       /* --------------------------------------------------- */
1104 
1105       numFpts = 0;
1106       for (irow = 0; irow < localNRows; irow++)
1107          if (indepSet[irow] != 1) fList[numFpts++] = irow;
1108       printf("\tTrial %3d (%3d) : number of F-points = %d\n", iT,
1109              numTrials, numFpts);
1110       reduceArray1 = new int[nprocs+1];
1111       reduceArray2 = new int[nprocs+1];
1112       for (iP = 0; iP < nprocs; iP++) reduceArray1[iP] = 0;
1113       for (iP = 0; iP < nprocs; iP++) reduceArray2[iP] = 0;
1114       reduceArray1[mypid] = numFpts;
1115       MPI_Allreduce(reduceArray1,reduceArray2,nprocs,MPI_INT,MPI_SUM,comm);
1116       for (iP = nprocs-1; iP >= 0; iP--) reduceArray2[iP+1] = reduceArray2[iP];
1117       reduceArray2[0] = 0;
1118       for (iP = 2; iP <= nprocs; iP++) reduceArray2[iP] += reduceArray2[iP-1];
1119       FStartRow = reduceArray2[mypid];
1120       FNRows = reduceArray2[mypid+1] - FStartRow;
1121       delete [] reduceArray1;
1122       delete [] reduceArray2;
1123       CStartRow = startRow - FStartRow;
1124       CNRows = localNRows - FNRows;
1125 
1126       /* --------------------------------------------------- */
1127       /* get Aff and Afc matrices (create permute matrices)  */
1128       /* --------------------------------------------------- */
1129 
1130       ierr = HYPRE_IJMatrixCreate(comm,startRow,startRow+localNRows-1,
1131                            FStartRow,FStartRow+FNRows-1,&IJPFF);
1132       ierr = HYPRE_IJMatrixSetObjectType(IJPFF, HYPRE_PARCSR);
1133       hypre_assert(!ierr);
1134       rowLengs = new int[localNRows];
1135       for (irow = 0; irow < localNRows; irow++) rowLengs[irow] = 1;
1136       ierr = HYPRE_IJMatrixSetRowSizes(IJPFF, rowLengs);
1137       ierr = HYPRE_IJMatrixInitialize(IJPFF);
1138       hypre_assert(!ierr);
1139 
1140       ierr = HYPRE_IJMatrixCreate(comm,startRow,startRow+localNRows-1,
1141                    CStartRow,CStartRow+CNRows-1, &IJPFC);
1142       ierr = HYPRE_IJMatrixSetObjectType(IJPFC, HYPRE_PARCSR);
1143       hypre_assert(!ierr);
1144       ierr = HYPRE_IJMatrixSetRowSizes(IJPFC, rowLengs);
1145       ierr = HYPRE_IJMatrixInitialize(IJPFC);
1146       hypre_assert(!ierr);
1147       delete [] rowLengs;
1148 
1149       /* --------------------------------------------------- */
1150       /* get Aff and Afc matrices (load permute matrices)    */
1151       /* --------------------------------------------------- */
1152 
1153       colValue = 1.0;
1154       rowCount = rowCount2 = 0;
1155       for (irow = 0; irow < localNRows; irow++)
1156       {
1157          rowIndex = startRow + irow;
1158          if (indepSet[irow] == 0)
1159          {
1160             colIndex = FStartRow + rowCount;
1161             HYPRE_IJMatrixSetValues(IJPFF,1,&one,(const int *) &rowIndex,
1162                     (const int *) &colIndex, (const double *) &colValue);
1163             rowCount++;
1164          }
1165          else
1166          {
1167             colIndex = CStartRow + rowCount2;
1168             HYPRE_IJMatrixSetValues(IJPFC,1,&one,(const int *) &rowIndex,
1169                     (const int *) &colIndex, (const double *) &colValue);
1170             rowCount2++;
1171          }
1172       }
1173       ierr = HYPRE_IJMatrixAssemble(IJPFF);
1174       hypre_assert( !ierr );
1175       HYPRE_IJMatrixGetObject(IJPFF, (void **) &hyprePFF);
1176       //hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) hyprePFF);
1177       sprintf(paramString, "HYPRE_ParCSR" );
1178       mli_PFFMat = new MLI_Matrix((void *)hyprePFF,paramString,NULL);
1179 
1180       ierr = HYPRE_IJMatrixAssemble(IJPFC);
1181       hypre_assert( !ierr );
1182       HYPRE_IJMatrixGetObject(IJPFC, (void **) &hyprePFC);
1183       //hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) hyprePFC);
1184       hypreAPFC = hypre_ParMatmul(hypreA, hyprePFC);
1185       hypre_ParCSRMatrixTranspose(hyprePFF, &hyprePFFT, 1);
1186       hypreAfc = hypre_ParMatmul(hyprePFFT, hypreAPFC);
1187 
1188       funcPtr = new MLI_Function();
1189       MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr);
1190       sprintf(paramString, "HYPRE_ParCSR" );
1191       mli_AfcMat = new MLI_Matrix((void *)hypreAfc,paramString,funcPtr);
1192       delete funcPtr;
1193 
1194       MLI_Matrix_ComputePtAP(mli_PFFMat, mli_Amat, &mli_AffMat);
1195       hypreAff  = (hypre_ParCSRMatrix *) mli_AffMat->getMatrix();
1196 
1197       //if (aratio > targetMu || iT == numTrials-1) break;
1198       //if (((double)FNRows/(double)localNRows) > 0.75) break;
1199 
1200       HYPRE_IJVectorCreate(comm,FStartRow, FStartRow+FNRows-1,&IJX);
1201       HYPRE_IJVectorSetObjectType(IJX, HYPRE_PARCSR);
1202       HYPRE_IJVectorInitialize(IJX);
1203       HYPRE_IJVectorAssemble(IJX);
1204       HYPRE_IJVectorGetObject(IJX, (void **) &hypreX);
1205       sprintf(paramString, "HYPRE_ParVector" );
1206       mli_Xvec = new MLI_Vector((void *)hypreX,paramString,NULL);
1207 
1208       HYPRE_IJVectorCreate(comm,FStartRow, FStartRow+FNRows-1,&IJXacc);
1209       HYPRE_IJVectorSetObjectType(IJXacc, HYPRE_PARCSR);
1210       HYPRE_IJVectorInitialize(IJXacc);
1211       HYPRE_IJVectorAssemble(IJXacc);
1212       HYPRE_IJVectorGetObject(IJXacc, (void **) &hypreXacc);
1213       hypre_ParVectorSetConstantValues(hypreXacc, 0.0);
1214 
1215       HYPRE_IJVectorCreate(comm,FStartRow, FStartRow+FNRows-1,&IJB);
1216       HYPRE_IJVectorSetObjectType(IJB, HYPRE_PARCSR);
1217       HYPRE_IJVectorInitialize(IJB);
1218       HYPRE_IJVectorAssemble(IJB);
1219       HYPRE_IJVectorGetObject(IJB, (void **) &hypreB);
1220       hypre_ParVectorSetConstantValues(hypreB, 0.0);
1221       sprintf(paramString, "HYPRE_ParVector" );
1222       mli_Bvec = new MLI_Vector((void *)hypreB,paramString,NULL);
1223 
1224       /* --------------------------------------------------- */
1225       /* set up Jacobi smoother with 4 sweeps and weight=1   */
1226       /* --------------------------------------------------- */
1227 
1228       strcpy(paramString, "Jacobi");
1229       smootherPtr = MLI_Solver_CreateFromName(paramString);
1230       strcpy(paramString, "setModifiedDiag");
1231       smootherPtr->setParams(paramString, 0, NULL);
1232       targc = 2;
1233       numSweeps = 1;
1234       targv[0] = (char *) &numSweeps;
1235       for (iC = 0; iC < 5; iC++) relaxWts[iC] = 3.0/3.0;
1236       targv[1] = (char *) relaxWts;
1237       strcpy(paramString, "relaxWeight");
1238       smootherPtr->setParams(paramString, targc, targv);
1239       maxEigen = 1.0;
1240       targc = 1;
1241       targv[0] = (char *) &maxEigen;
1242       strcpy(paramString, "setMaxEigen");
1243       smootherPtr->setParams(paramString, targc, targv);
1244       smootherPtr->setup(mli_AffMat);
1245 
1246       /* --------------------------------------------------- */
1247       /* relaxation                                          */
1248       /* --------------------------------------------------- */
1249 
1250       targc = 2;
1251       targv[0] = (char *) &numSweeps;
1252       targv[1] = (char *) relaxWts;
1253       strcpy(paramString, "relaxWeight");
1254       aratio = 0.0;
1255       XData = (double *) hypre_VectorData(hypre_ParVectorLocalVector(hypreX));
1256       XaccData = (double *)
1257                hypre_VectorData(hypre_ParVectorLocalVector(hypreXacc));
1258       for (iV = 0; iV < numVectors; iV++)
1259       {
1260          ranSeed = 9001 * 7901 * iV *iV * iV * iV + iV * iV * iV + 101;
1261          HYPRE_ParVectorSetRandomValues((HYPRE_ParVector) hypreX,ranSeed);
1262          for (irow = 0; irow < FNRows; irow++)
1263             XData[irow] = 0.5 * XData[irow] + 0.5;
1264          hypre_ParVectorAxpy(dOne, hypreX, hypreXacc);
1265          hypre_ParVectorSetConstantValues(hypreB, 0.0);
1266          hypre_ParCSRMatrixMatvec(-1.0, hypreAff, hypreX, 1.0, hypreB);
1267          rnorm0 = sqrt(hypre_ParVectorInnerProd(hypreB, hypreB));
1268          hypre_ParVectorSetConstantValues(hypreB, 0.0);
1269 
1270          numSweeps = 5;
1271          smootherPtr->setParams(paramString, targc, targv);
1272          smootherPtr->solve(mli_Bvec, mli_Xvec);
1273          hypre_ParCSRMatrixMatvec(-1.0, hypreAff, hypreX, 1.0, hypreB);
1274          rnorm1 = sqrt(hypre_ParVectorInnerProd(hypreB, hypreB));
1275 
1276          rnorm0 = rnorm1;
1277          hypre_ParVectorSetConstantValues(hypreB, 0.0);
1278          numSweeps = PDEGREE+1;
1279          smootherPtr->setParams(paramString, targc, targv);
1280          smootherPtr->solve(mli_Bvec, mli_Xvec);
1281          hypre_ParCSRMatrixMatvec(-1.0, hypreAff, hypreX, 1.0, hypreB);
1282          rnorm1 = sqrt(hypre_ParVectorInnerProd(hypreB, hypreB));
1283 
1284          aratio = 0;
1285          for (irow = 0; irow < FNRows; irow++)
1286          {
1287             ratio1 = habs(XData[irow]);
1288             XaccData[irow] = ratio1;
1289             if (ratio1 > aratio) aratio = ratio1;
1290          }
1291          printf("\tTrial %3d : Jacobi norms = %16.8e %16.8e %16.8e\n",iT,
1292                 rnorm0,rnorm1,aratio);
1293          if (rnorm0 > 1.0e-10) aratio = rnorm1 / rnorm0;
1294          else aratio = 0.0;
1295       }
1296       delete smootherPtr;
1297 
1298       /* --------------------------------------------------- */
1299       /* select fine points                                  */
1300       /* --------------------------------------------------- */
1301 
1302       if (iV == numVectors) aratio /= (double) numVectors;
1303       if (aratio < targetMu)
1304       {
1305          if (stopRefine == 0)
1306          {
1307             for (irow = 0; irow < localNRows; irow++)
1308             {
1309                if (indepSet[irow] == 1)  /* if I am a C-point */
1310                {
1311                   count = 0;
1312                   for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++)
1313                   {
1314                      colInd = ADiagJ[jcol]; /* for each of my neighbors */
1315                      if (indepSet[colInd] == 0) /* if it is a F-point */
1316                      {
1317                         /* if I depend strongly on it, increment counter */
1318                         for (kcol= SDiagI[irow]; kcol < SDiagI[irow+1]; kcol++)
1319                         {
1320                            if (SDiagJ[kcol] == colInd)
1321                            {
1322                               count++;
1323                               break;
1324                            }
1325                         }
1326                         if (kcol == SDiagI[irow+1])
1327                         {
1328                           for (kcol=SDiagI[colInd];kcol<SDiagI[colInd+1];kcol++)
1329                           {
1330                              if (SDiagJ[kcol] == irow)
1331                              {
1332                                 count++;
1333                                 break;
1334                              }
1335                           }
1336                         }
1337                      }
1338                   }
1339                   if (count <= 3*(iT+1)) indepSet[irow] = 0;
1340                }
1341             }
1342          }
1343       }
1344       else if (aratio > targetMu)
1345       {
1346          sortIndices = new int[localNRows];
1347          for (irow = 0; irow < localNRows; irow++) sortIndices[irow] = -1;
1348          iC = 0;
1349          for (irow = 0; irow < localNRows; irow++)
1350             if (indepSet[irow] != 1) sortIndices[irow] = iC++;
1351 
1352          for (irow = 0; irow < localNRows; irow++)
1353          {
1354             iC = sortIndices[irow];
1355             if (indepSet[irow] == 0 && (habs(XaccData[iC]) > 0.1))
1356             {
1357                aratio = targetMu;
1358                //stopRefine = 1;
1359                indepSet[irow] = 1; /* set it to a coarse point */
1360                for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++)
1361                {
1362                   colInd = ADiagJ[jcol];
1363                   if (indepSet[colInd] == 0) /* if it is a F-point */
1364                   {
1365                      for (kcol = SDiagI[irow]; kcol < SDiagI[irow+1]; kcol++)
1366                      {
1367                         if (SDiagJ[kcol] == colInd)
1368                         {
1369                            indepSet[colInd] = -1;
1370                            break;
1371                         }
1372                      }
1373                      if (kcol == SDiagI[irow+1])
1374                      {
1375                         for (kcol=SDiagI[colInd];kcol<SDiagI[colInd+1];kcol++)
1376                         {
1377                            if (SDiagJ[kcol] == irow)
1378                            {
1379                               indepSet[colInd] = -1;
1380                               break;
1381                            }
1382                         }
1383                      }
1384                   }
1385                }
1386             }
1387          }
1388          for (irow = 0; irow < localNRows; irow++)
1389             if (indepSet[irow] == -1) indepSet[irow] = 0;
1390          delete [] sortIndices;
1391       }
1392 
1393       /* --------------------------------------------------- */
1394       /* clean up                                            */
1395       /* --------------------------------------------------- */
1396 
1397       HYPRE_IJMatrixDestroy(IJPFF);
1398       hypre_ParCSRMatrixDestroy(hyprePFFT);
1399       hypre_ParCSRMatrixDestroy(hypreAPFC);
1400       delete mli_PFFMat;
1401 #ifdef HAVE_TRANS
1402       delete mli_AffTMat;
1403 #endif
1404       HYPRE_IJMatrixDestroy(IJPFC);
1405       HYPRE_IJVectorDestroy(IJX);
1406       HYPRE_IJVectorDestroy(IJB);
1407       HYPRE_IJVectorDestroy(IJXacc);
1408       delete mli_Bvec;
1409       delete mli_Xvec;
1410       //if (localNRows != FNRows && aratio < targetMu) break;
1411       //if (aratio < targetMu) break;
1412       if (numTrials == 1) break;
1413       numNew1 = numNew2;
1414       numNew2 = numNew3;
1415       numNew3 = 0;
1416       for (irow=0; irow < localNRows; irow++) if (indepSet[irow]==0) numNew3++;
1417       if (numNew3 == numNew1) break;
1418       delete mli_AffMat;
1419       delete mli_AfcMat;
1420       //hypre_ParCSRMatrixDestroy(hypreAfc);
1421    }
1422 
1423    /* ------------------------------------------------------ */
1424    /* final clean up                                         */
1425    /* ------------------------------------------------------ */
1426 
1427    delete [] ADiagD;
1428    delete [] fList;
1429    (*AfcMat) = mli_AfcMat;
1430    return mli_AffMat;
1431 }
1432 
1433 /* ********************************************************************* *
1434  * create the prolongation matrix
1435  * --------------------------------------------------------------------- */
1436 
createPmat(int * indepSet,MLI_Matrix * mli_Amat,MLI_Matrix * mli_Affmat,MLI_Matrix * mli_Afcmat)1437 MLI_Matrix *MLI_Method_AMGRS::createPmat(int *indepSet, MLI_Matrix *mli_Amat,
1438                                MLI_Matrix *mli_Affmat, MLI_Matrix *mli_Afcmat)
1439 {
1440    int    *ADiagI, *ADiagJ, localNRows, AffNRows, AffStartRow, irow;
1441    int    *rowLengs, ierr, startRow, rowCount, rowIndex, colIndex;
1442    int    *colInd, rowSize, jcol, one=1, maxRowLeng, nnz, PDegree=PDEGREE;
1443    int    *tPDiagI, *tPDiagJ, cCount, fCount, ncount, *ADDiagI, *ADDiagJ;
1444    int    *AD2DiagI, *AD2DiagJ, *newColInd, newRowSize;
1445    int    nprocs, AccStartRow, AccNRows;
1446    double *ADiagA, *colVal, colValue, *newColVal, *DDiagA;
1447    double *tPDiagA, *ADDiagA, *AD2DiagA, omega=2.0/3.0, dtemp;
1448    char   paramString[100];
1449    HYPRE_IJMatrix     IJInvD, IJP;
1450    hypre_ParCSRMatrix *hypreA, *hypreAff, *hypreInvD, *hypreP=NULL, *hypreAD;
1451    hypre_ParCSRMatrix *hypreAD2, *hypreAfc, *hypreTmp;
1452    hypre_CSRMatrix    *ADiag, *DDiag, *tPDiag, *ADDiag, *AD2Diag;
1453    MLI_Function       *funcPtr;
1454    MLI_Matrix         *mli_Pmat;
1455    MPI_Comm           comm;
1456 
1457    /* ------------------------------------------------------ */
1458    /* get matrix information                                 */
1459    /* ------------------------------------------------------ */
1460 
1461    comm = getComm();
1462    MPI_Comm_size(comm, &nprocs);
1463    hypreA = (hypre_ParCSRMatrix *) mli_Amat->getMatrix();
1464    startRow = hypre_ParCSRMatrixFirstRowIndex(hypreA);
1465    localNRows = hypre_ParCSRMatrixNumRows(hypreA);
1466 
1467    hypreAff = (hypre_ParCSRMatrix *) mli_Affmat->getMatrix();
1468    AffStartRow = hypre_ParCSRMatrixFirstRowIndex(hypreAff);
1469    AffNRows = hypre_ParCSRMatrixNumRows(hypreAff);
1470 
1471    /* ------------------------------------------------------ */
1472    /* create the diagonal matrix of A                        */
1473    /* ------------------------------------------------------ */
1474 
1475    ierr = HYPRE_IJMatrixCreate(comm,AffStartRow,AffStartRow+AffNRows-1,
1476                            AffStartRow,AffStartRow+AffNRows-1,&IJInvD);
1477    ierr = HYPRE_IJMatrixSetObjectType(IJInvD, HYPRE_PARCSR);
1478    hypre_assert(!ierr);
1479    rowLengs = new int[AffNRows];
1480    for (irow = 0; irow < AffNRows; irow++) rowLengs[irow] = 1;
1481    ierr = HYPRE_IJMatrixSetRowSizes(IJInvD, rowLengs);
1482    ierr = HYPRE_IJMatrixInitialize(IJInvD);
1483    hypre_assert(!ierr);
1484    delete [] rowLengs;
1485 
1486    /* ------------------------------------------------------ */
1487    /* load the diagonal matrix of A                          */
1488    /* ------------------------------------------------------ */
1489 
1490    rowCount = 0;
1491    for (irow = 0; irow < localNRows; irow++)
1492    {
1493       rowIndex = startRow + irow;
1494       if (indepSet[irow] == 0)
1495       {
1496          HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) hypreA, rowIndex,
1497                                   &rowSize, &colInd, &colVal);
1498          colValue = 1.0;
1499          for (jcol = 0; jcol < rowSize; jcol++)
1500          {
1501             if (colInd[jcol] == rowIndex)
1502             {
1503                colValue = colVal[jcol];
1504                break;
1505             }
1506          }
1507          if (colValue >= 0.0)
1508          {
1509             for (jcol = 0; jcol < rowSize; jcol++)
1510                if (colInd[jcol] != rowIndex &&
1511                    (indepSet[colInd[jcol]-startRow] == 0) &&
1512                    colVal[jcol] > 0.0)
1513                   colValue += colVal[jcol];
1514          }
1515          else
1516          {
1517             for (jcol = 0; jcol < rowSize; jcol++)
1518                if (colInd[jcol] != rowIndex &&
1519                    (indepSet[colInd[jcol]-startRow] == 0) &&
1520                    colVal[jcol] < 0.0)
1521                   colValue += colVal[jcol];
1522          }
1523          colValue = 1.0 / colValue;
1524          colIndex = AffStartRow + rowCount;
1525          HYPRE_IJMatrixSetValues(IJInvD,1,&one,(const int *) &colIndex,
1526                     (const int *) &colIndex, (const double *) &colValue);
1527          rowCount++;
1528          HYPRE_ParCSRMatrixRestoreRow((HYPRE_ParCSRMatrix) hypreA, rowIndex,
1529                                       &rowSize, &colInd, &colVal);
1530       }
1531    }
1532 
1533    /* ------------------------------------------------------ */
1534    /* finally assemble the diagonal matrix of A              */
1535    /* ------------------------------------------------------ */
1536 
1537    ierr = HYPRE_IJMatrixAssemble(IJInvD);
1538    hypre_assert( !ierr );
1539    HYPRE_IJMatrixGetObject(IJInvD, (void **) &hypreInvD);
1540    ierr += HYPRE_IJMatrixSetObjectType(IJInvD, -1);
1541    ierr += HYPRE_IJMatrixDestroy(IJInvD);
1542    hypre_assert( !ierr );
1543 
1544    /* ------------------------------------------------------ */
1545    /* generate polynomial of Aff and invD                    */
1546    /* ------------------------------------------------------ */
1547 
1548    if (PDegree == 0)
1549    {
1550       hypreP = hypreInvD;
1551       hypreInvD = NULL;
1552       ADiag  = hypre_ParCSRMatrixDiag(hypreP);
1553       ADiagI = hypre_CSRMatrixI(ADiag);
1554       ADiagJ = hypre_CSRMatrixJ(ADiag);
1555       ADiagA = hypre_CSRMatrixData(ADiag);
1556       for (irow = 0; irow < AffNRows; irow++)
1557          for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++)
1558             ADiagA[jcol] = - ADiagA[jcol];
1559    }
1560    else if (PDegree == 1)
1561    {
1562 #if 1
1563       hypreP = hypre_ParMatmul(hypreAff, hypreInvD);
1564       DDiag  = hypre_ParCSRMatrixDiag(hypreInvD);
1565       DDiagA = hypre_CSRMatrixData(DDiag);
1566       ADiag  = hypre_ParCSRMatrixDiag(hypreP);
1567       ADiagI = hypre_CSRMatrixI(ADiag);
1568       ADiagJ = hypre_CSRMatrixJ(ADiag);
1569       ADiagA = hypre_CSRMatrixData(ADiag);
1570       for (irow = 0; irow < AffNRows; irow++)
1571       {
1572          for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++)
1573          {
1574             if (ADiagJ[jcol] == irow)
1575                  ADiagA[jcol] = - omega*DDiagA[irow]*(2.0-omega*ADiagA[jcol]);
1576             else ADiagA[jcol] = omega * omega * DDiagA[irow] * ADiagA[jcol];
1577          }
1578       }
1579 #else
1580       ierr = HYPRE_IJMatrixCreate(comm,AffStartRow,AffStartRow+AffNRows-1,
1581                            AffStartRow,AffStartRow+AffNRows-1,&IJP);
1582       ierr = HYPRE_IJMatrixSetObjectType(IJP, HYPRE_PARCSR);
1583       hypre_assert(!ierr);
1584       rowLengs = new int[AffNRows];
1585       maxRowLeng = 0;
1586       ADiag   = hypre_ParCSRMatrixDiag(hypreAff);
1587       ADiagI  = hypre_CSRMatrixI(ADiag);
1588       ADiagJ  = hypre_CSRMatrixJ(ADiag);
1589       ADiagA  = hypre_CSRMatrixData(ADiag);
1590       for (irow = 0; irow < AffNRows; irow++)
1591       {
1592          newRowSize = 1;
1593          for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++)
1594             if (ADiagJ[jcol] == irow) {index = jcol; break;}
1595          for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++)
1596             if (ADiagJ[jcol] != irow && ADiagA[jcol]*ADiagA[index] < 0.0)
1597                newRowSize++;
1598          rowLengs[irow] = newRowSize;
1599          if (newRowSize > maxRowLeng) maxRowLeng = newRowSize;
1600       }
1601       ierr = HYPRE_IJMatrixSetRowSizes(IJP, rowLengs);
1602       ierr = HYPRE_IJMatrixInitialize(IJP);
1603       hypre_assert(!ierr);
1604       delete [] rowLengs;
1605       newColInd = new int[maxRowLeng];
1606       newColVal = new double[maxRowLeng];
1607       for (irow = 0; irow < AffNRows; irow++)
1608       {
1609          newRowSize = 0;
1610          index = -1;
1611          for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++)
1612             if (ADiagJ[jcol] == irow) {index = jcol; break;}
1613          if (index == -1) printf("WARNING : zero diagonal.\n");
1614          newColInd[0] = AffStartRow + irow;
1615          newColVal[0] = ADiagA[index];
1616          newRowSize++;
1617          for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++)
1618          {
1619             if (ADiagJ[jcol] != irow && ADiagA[jcol]*ADiagA[index] < 0.0)
1620             {
1621                newColInd[newRowSize] = AffStartRow + ADiagJ[jcol];
1622                newColVal[newRowSize++] = ADiagA[jcol];
1623             }
1624             else
1625             {
1626                newColVal[0] += ADiagA[jcol];
1627             }
1628          }
1629          for (jcol = 1; jcol < newRowSize; jcol++)
1630             newColVal[jcol] /= (-newColVal[0]);
1631          newColVal[0] = 2.0 - newColVal[0];
1632          rowIndex = AffStartRow + irow;
1633          ierr = HYPRE_IJMatrixSetValues(IJP, 1, &newRowSize,
1634                    (const int *) &rowIndex, (const int *) newColInd,
1635                    (const double *) newColVal);
1636          hypre_assert(!ierr);
1637       }
1638       delete [] newColInd;
1639       delete [] newColVal;
1640       ierr = HYPRE_IJMatrixAssemble(IJP);
1641       hypre_assert( !ierr );
1642       HYPRE_IJMatrixGetObject(IJP, (void **) &hypreAD);
1643       hypreP = hypre_ParMatmul(hypreAD, hypreInvD);
1644       ierr += HYPRE_IJMatrixDestroy(IJP);
1645 #endif
1646    }
1647    else if (PDegree == 2)
1648    {
1649       hypreAD  = hypre_ParMatmul(hypreAff, hypreInvD);
1650       hypreAD2 = hypre_ParMatmul(hypreAD, hypreAD);
1651       ADDiag   = hypre_ParCSRMatrixDiag(hypreAD);
1652       AD2Diag  = hypre_ParCSRMatrixDiag(hypreAD2);
1653       ADDiagI  = hypre_CSRMatrixI(ADDiag);
1654       ADDiagJ  = hypre_CSRMatrixJ(ADDiag);
1655       ADDiagA  = hypre_CSRMatrixData(ADDiag);
1656       AD2DiagI = hypre_CSRMatrixI(AD2Diag);
1657       AD2DiagJ = hypre_CSRMatrixJ(AD2Diag);
1658       AD2DiagA = hypre_CSRMatrixData(AD2Diag);
1659       DDiag    = hypre_ParCSRMatrixDiag(hypreInvD);
1660       DDiagA   = hypre_CSRMatrixData(DDiag);
1661       newColInd = new int[2*AffNRows];
1662       newColVal = new double[2*AffNRows];
1663       ierr = HYPRE_IJMatrixCreate(comm,AffStartRow,AffStartRow+AffNRows-1,
1664                            AffStartRow,AffStartRow+AffNRows-1,&IJP);
1665       ierr = HYPRE_IJMatrixSetObjectType(IJP, HYPRE_PARCSR);
1666       hypre_assert(!ierr);
1667       rowLengs = new int[AffNRows];
1668       maxRowLeng = 0;
1669       for (irow = 0; irow < AffNRows; irow++)
1670       {
1671          newRowSize = 0;
1672          for (jcol = ADDiagI[irow]; jcol < ADDiagI[irow+1]; jcol++)
1673             newColInd[newRowSize] = ADDiagJ[jcol];
1674          for (jcol = AD2DiagI[irow]; jcol < AD2DiagI[irow+1]; jcol++)
1675             newColInd[newRowSize] = AD2DiagJ[jcol];
1676          if (newRowSize > maxRowLeng) maxRowLeng = newRowSize;
1677          hypre_qsort0(newColInd, 0, newRowSize-1);
1678          ncount = 0;
1679          for ( jcol = 0; jcol < newRowSize; jcol++ )
1680          {
1681             if ( newColInd[jcol] != newColInd[ncount] )
1682             {
1683                ncount++;
1684                newColInd[ncount] = newColInd[jcol];
1685             }
1686          }
1687          newRowSize = ncount + 1;
1688          rowLengs[irow] = newRowSize;
1689       }
1690       ierr = HYPRE_IJMatrixSetRowSizes(IJP, rowLengs);
1691       ierr = HYPRE_IJMatrixInitialize(IJP);
1692       hypre_assert(!ierr);
1693       delete [] rowLengs;
1694       nnz = 0;
1695       for (irow = 0; irow < AffNRows; irow++)
1696       {
1697          rowIndex = AffStartRow + irow;
1698          newRowSize = 0;
1699          for (jcol = ADDiagI[irow]; jcol < ADDiagI[irow+1]; jcol++)
1700          {
1701             newColInd[newRowSize] = ADDiagJ[jcol];
1702             if (ADDiagJ[jcol] == irow)
1703                newColVal[newRowSize++] = 3.0 * (1.0 - ADDiagA[jcol]);
1704             else
1705                newColVal[newRowSize++] = - 3.0 * ADDiagA[jcol];
1706          }
1707          for (jcol = AD2DiagI[irow]; jcol < AD2DiagI[irow+1]; jcol++)
1708          {
1709             newColInd[newRowSize] = AD2DiagJ[jcol];
1710             newColVal[newRowSize++] = AD2DiagA[jcol];
1711          }
1712          hypre_qsort1(newColInd, newColVal, 0, newRowSize-1);
1713          ncount = 0;
1714          for ( jcol = 0; jcol < newRowSize; jcol++ )
1715          {
1716             if ( jcol != ncount && newColInd[jcol] == newColInd[ncount] )
1717                newColVal[ncount] += newColVal[jcol];
1718             else if ( newColInd[jcol] != newColInd[ncount] )
1719             {
1720                ncount++;
1721                newColVal[ncount] = newColVal[jcol];
1722                newColInd[ncount] = newColInd[jcol];
1723             }
1724          }
1725          newRowSize = ncount + 1;
1726          for ( jcol = 0; jcol < newRowSize; jcol++ )
1727             newColVal[jcol] = - (DDiagA[irow] * newColVal[jcol]);
1728 
1729          ierr = HYPRE_IJMatrixSetValues(IJP, 1, &newRowSize,
1730                    (const int *) &rowIndex, (const int *) newColInd,
1731                    (const double *) newColVal);
1732          nnz += newRowSize;
1733          hypre_assert(!ierr);
1734       }
1735       delete [] newColInd;
1736       delete [] newColVal;
1737       ierr = HYPRE_IJMatrixAssemble(IJP);
1738       hypre_assert( !ierr );
1739       HYPRE_IJMatrixGetObject(IJP, (void **) &hypreP);
1740       ierr += HYPRE_IJMatrixSetObjectType(IJP, -1);
1741       ierr += HYPRE_IJMatrixDestroy(IJP);
1742       hypre_assert(!ierr);
1743       hypre_ParCSRMatrixDestroy(hypreAD);
1744       hypre_ParCSRMatrixDestroy(hypreAD2);
1745    }
1746    if (hypreInvD != NULL) hypre_ParCSRMatrixDestroy(hypreInvD);
1747 
1748    /* ------------------------------------------------------ */
1749    /* create the final P matrix (from hypreP)                */
1750    /* ------------------------------------------------------ */
1751 
1752    hypreAfc = (hypre_ParCSRMatrix *) mli_Afcmat->getMatrix();
1753    hypreTmp = hypre_ParMatmul(hypreP, hypreAfc);
1754    hypre_ParCSRMatrixDestroy(hypreP);
1755    hypreP = hypreTmp;
1756    tPDiag   = hypre_ParCSRMatrixDiag(hypreP);
1757    tPDiagI  = hypre_CSRMatrixI(tPDiag);
1758    tPDiagJ  = hypre_CSRMatrixJ(tPDiag);
1759    tPDiagA  = hypre_CSRMatrixData(tPDiag);
1760    AccStartRow = startRow - AffStartRow;
1761    AccNRows = localNRows - AffNRows;
1762    ierr = HYPRE_IJMatrixCreate(comm,startRow,startRow+localNRows-1,
1763                         AccStartRow,AccStartRow+AccNRows-1,&IJP);
1764    ierr = HYPRE_IJMatrixSetObjectType(IJP, HYPRE_PARCSR);
1765    hypre_assert(!ierr);
1766    rowLengs = new int[localNRows];
1767    maxRowLeng = 0;
1768    ncount = 0;
1769    for (irow = 0; irow < localNRows; irow++)
1770    {
1771       if (indepSet[irow] == 1) rowLengs[irow] = 1;
1772       else
1773       {
1774          rowLengs[irow] = tPDiagI[ncount+1] - tPDiagI[ncount];
1775          ncount++;
1776       }
1777       if (rowLengs[irow] > maxRowLeng) maxRowLeng = rowLengs[irow];
1778    }
1779    ierr = HYPRE_IJMatrixSetRowSizes(IJP, rowLengs);
1780    ierr = HYPRE_IJMatrixInitialize(IJP);
1781    hypre_assert(!ierr);
1782    delete [] rowLengs;
1783    fCount = 0;
1784    cCount = 0;
1785    newColInd = new int[maxRowLeng];
1786    newColVal = new double[maxRowLeng];
1787    for (irow = 0; irow < localNRows; irow++)
1788    {
1789       rowIndex = startRow + irow;
1790       if (indepSet[irow] == 1)
1791       {
1792          newRowSize = 1;
1793          newColInd[0] = AccStartRow + cCount;
1794          newColVal[0] = 1.0;
1795          cCount++;
1796       }
1797       else
1798       {
1799          newRowSize = 0;
1800          for (jcol = tPDiagI[fCount]; jcol < tPDiagI[fCount+1]; jcol++)
1801          {
1802             newColInd[newRowSize] = tPDiagJ[jcol] + AccStartRow;
1803             newColVal[newRowSize++] = tPDiagA[jcol];
1804          }
1805          fCount++;
1806       }
1807 
1808       /* pruning */
1809       if (irow == 0) printf("pruning and scaling\n");
1810       dtemp = 0.0;
1811       for (jcol = 0; jcol < newRowSize; jcol++)
1812          if (habs(newColVal[jcol]) > dtemp) dtemp = habs(newColVal[jcol]);
1813       dtemp *= pruneFactor;
1814       ncount = 0;
1815       for (jcol = 0; jcol < newRowSize; jcol++)
1816       {
1817          if (habs(newColVal[jcol]) > dtemp)
1818          {
1819             newColInd[ncount] = newColInd[jcol];
1820             newColVal[ncount++] = newColVal[jcol];
1821          }
1822       }
1823       newRowSize = ncount;
1824 
1825       /* scaling */
1826       dtemp = 0.0;
1827       for (jcol = 0; jcol < newRowSize; jcol++)
1828          dtemp += habs(newColVal[jcol]);
1829       dtemp = 1.0 / dtemp;
1830       for (jcol = 0; jcol < newRowSize; jcol++)
1831          newColVal[jcol] *= dtemp;
1832       ierr = HYPRE_IJMatrixSetValues(IJP, 1, &newRowSize,
1833                    (const int *) &rowIndex, (const int *) newColInd,
1834                    (const double *) newColVal);
1835       hypre_assert(!ierr);
1836    }
1837    delete [] newColInd;
1838    delete [] newColVal;
1839    ierr = HYPRE_IJMatrixAssemble(IJP);
1840    hypre_assert( !ierr );
1841    hypre_ParCSRMatrixDestroy(hypreP);
1842    HYPRE_IJMatrixGetObject(IJP, (void **) &hypreP);
1843    ierr += HYPRE_IJMatrixSetObjectType(IJP, -1);
1844    ierr += HYPRE_IJMatrixDestroy(IJP);
1845    hypre_assert(!ierr);
1846 
1847    /* ------------------------------------------------------ */
1848    /* package the P matrix                                   */
1849    /* ------------------------------------------------------ */
1850 
1851    sprintf(paramString, "HYPRE_ParCSR");
1852    funcPtr = new MLI_Function();
1853    MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr);
1854    mli_Pmat = new MLI_Matrix((void*) hypreP, paramString, funcPtr);
1855    delete funcPtr;
1856    return mli_Pmat;
1857 }
1858