1 /******************************************************************************
2  * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3  * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4  *
5  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6  ******************************************************************************/
7 
8 // *********************************************************************
9 // This file is customized to use HYPRE matrix format
10 // *********************************************************************
11 
12 // *********************************************************************
13 // system includes
14 // ---------------------------------------------------------------------
15 
16 #include <string.h>
17 
18 #define MABS(x) ((x) > 0 ? (x) : (-(x)))
19 
20 // *********************************************************************
21 // HYPRE includes external to MLI
22 // ---------------------------------------------------------------------
23 
24 #include "HYPRE.h"
25 #include "_hypre_utilities.h"
26 #include "HYPRE_IJ_mv.h"
27 #include "seq_mv.h"
28 #include "_hypre_parcsr_mv.h"
29 
30 #define mabs(x) ((x) > 0 ? x : -(x))
31 
32 // *********************************************************************
33 // local defines
34 // ---------------------------------------------------------------------
35 
36 #define MLI_METHOD_AMGSA_READY       -1
37 #define MLI_METHOD_AMGSA_SELECTED    -2
38 #define MLI_METHOD_AMGSA_PENDING     -3
39 #define MLI_METHOD_AMGSA_NOTSELECTED -4
40 #define MLI_METHOD_AMGSA_SELECTED2   -5
41 
42 // *********************************************************************
43 // local MLI includes
44 // ---------------------------------------------------------------------
45 
46 #include "mli_method_amgsa.h"
47 #include "mli_utils.h"
48 #include "mli_sfei.h"
49 #include "mli_fedata_utils.h"
50 #include "mli_matrix.h"
51 #include "mli_matrix_misc.h"
52 #include "mli_solver.h"
53 
54 // *********************************************************************
55 // functions external to MLI
56 // ---------------------------------------------------------------------
57 
58 extern "C"
59 {
60    /* ARPACK function to compute eigenvalues/eigenvectors */
61 
62    void dnstev_(int *n, int *nev, char *which, double *sigmar,
63                 double *sigmai, int *colptr, int *rowind, double *nzvals,
64                 double *dr, double *di, double *z, int *ldz, int *info,
65                 double *tol);
66 }
67 
68 /***********************************************************************
69  * COMPUTE SUBDOMAIN-BASED NULL SPACES USING EIGENDECOMPOSITION
70  ***********************************************************************
71  * compute initial null spaces (for the subdomain only) using FEData
72  * --------------------------------------------------------------------- */
73 
setupSFEIBasedNullSpaces(MLI * mli)74 int MLI_Method_AMGSA::setupSFEIBasedNullSpaces(MLI *mli)
75 {
76    int          k, iN, iD, iR, level, mypid, nElems, elemNNodes;
77    int          iE, iN2, **elemNodeLists, *elemNodeList1D, totalNNodes;
78    int          *partition, localStartRow, localNRows, *newElemNodeList;
79    int          eMatDim, newNNodes, *elemNodeList, count, *orderArray;
80    int          csrNrows, *csrIA, *csrJA, offset, rowSize, startCol;
81    int          rowInd, colInd, colOffset, rowLeng, start, nSubdomains;
82    double       **elemMatrices, *elemMat, *csrAA, dAccum;
83    double       *eigenR, *eigenI, *eigenV;
84    char         which[20], filename[100];;
85    FILE         *fp;
86    MPI_Comm     comm;
87    MLI_SFEI     *sfei;
88    MLI_Matrix   *mliAmat;
89    hypre_ParCSRMatrix *hypreA;
90 #ifdef MLI_ARPACK
91    int          info;
92    double       sigmaR, sigmaI;
93 #endif
94 
95 #ifdef MLI_DEBUG_DETAILED
96    printf("MLI_Method_AMGSA::setupSFEIBasedNullSpaces begins.\n");
97 #endif
98 
99    /* --------------------------------------------------------------- */
100    /* error checking                                                  */
101    /* --------------------------------------------------------------- */
102 
103    if (mli == NULL)
104    {
105       printf("MLI_Method_AMGSA::setupSFEIBasedNullSpaces ERROR");
106       printf(" - no mli.\n");
107       exit(1);
108    }
109    level = 0;
110    sfei = mli->getSFEI(level);
111    if (sfei == NULL)
112    {
113       printf("MLI_Method_AMGSA::setupSFEIBasedNullSpaces ERROR");
114       printf(" - no sfei.\n");
115       exit(1);
116    }
117    nSubdomains = sfei->getNumElemBlocks();
118    if (nSubdomains <= 0) return 0;
119 
120    /* --------------------------------------------------------------- */
121    /* fetch communicator matrix information                           */
122    /* --------------------------------------------------------------- */
123 
124    comm = getComm();
125    MPI_Comm_rank(comm, &mypid);
126    mliAmat = mli->getSystemMatrix(level);
127    hypreA  = (hypre_ParCSRMatrix *) mliAmat->getMatrix();
128    HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA,
129                                         &partition);
130    localStartRow = partition[mypid];
131    localNRows    = partition[mypid+1] - localStartRow;
132    free(partition);
133 
134    /* --------------------------------------------------------------- */
135    /* fetch communicator matrix information                           */
136    /* --------------------------------------------------------------- */
137 
138    startCol = 0;
139    if (nullspaceVec_ != NULL)
140    {
141       for (k = 0; k < nullspaceDim_; k++)
142       {
143          dAccum = 0.0;
144          for (iR = 0; iR < nullspaceLen_; iR++)
145             dAccum += mabs(nullspaceVec_[iR+k*nullspaceLen_]);
146          if (dAccum == 0.0) {startCol = k; break;}
147       }
148       if (k == nullspaceDim_) startCol = nullspaceDim_;
149       if ( startCol == nullspaceDim_ ) return 0;
150    }
151 
152    /* --------------------------------------------------------------- */
153    /* initialize null space vector and aggregation label              */
154    /* --------------------------------------------------------------- */
155 
156    //if ( nullspaceVec_ != NULL ) delete [] nullspaceVec_;
157    if (nullspaceVec_ != NULL) hypre_assert( nullspaceLen_ == localNRows );
158    if (nullspaceVec_ == NULL)
159    {
160       nullspaceLen_ = localNRows;
161       nullspaceVec_ = new double[localNRows*nullspaceDim_];
162    }
163    if (saLabels_ == NULL)
164    {
165       saLabels_ = new int*[maxLevels_];
166       for (k = 0; k < maxLevels_; k++) saLabels_[k] = NULL;
167    }
168    if (saLabels_[0] != NULL) delete [] saLabels_[0];
169    saLabels_[0] = new int[localNRows];
170    for (k = 0; k < localNRows; k++) saLabels_[0][k] = -1;
171 
172    /* --------------------------------------------------------------- */
173    /* fetch SFEI information (nElems,elemIDs,elemNNodes,elemNodeLists)*/
174    /* --------------------------------------------------------------- */
175 
176    if ((printToFile_ & 8) != 0 && nSubdomains == 1)
177    {
178       sprintf(filename,"elemNodeList.%d", mypid);
179       fp = fopen(filename,"w");
180    }
181    else fp = NULL;
182    for (iD = 0; iD < nSubdomains; iD++)
183    {
184       nElems = sfei->getBlockNumElems(iD);
185       if (fp != NULL) fprintf(fp, "%d\n", nElems);
186       elemNNodes  = sfei->getBlockElemNEqns(iD);
187       elemNodeLists = sfei->getBlockElemEqnLists(iD);
188       elemMatrices  = sfei->getBlockElemStiffness(iD);
189       totalNNodes = nElems * elemNNodes;
190       elemNodeList1D = new int[totalNNodes];
191       count = 0;
192       for (iE = 0; iE < nElems; iE++)
193          for (iN = 0; iN < elemNNodes; iN++)
194             elemNodeList1D[count++] = elemNodeLists[iE][iN];
195 
196       /* ------------------------------------------------------ */
197       /* find the number of nodes in local subdomain (including */
198       /* external nodes)                                        */
199       /* ------------------------------------------------------ */
200 
201       orderArray = new int[totalNNodes];
202       newElemNodeList = new int[totalNNodes];
203       for ( iN = 0; iN < totalNNodes; iN++ )
204       {
205          orderArray[iN] = iN;
206          newElemNodeList[iN] = elemNodeList1D[iN];
207       }
208       MLI_Utils_IntQSort2(newElemNodeList,orderArray,0,totalNNodes-1);
209       elemNodeList1D[orderArray[0]] = 0;
210       newNNodes = 0;
211       for (iN = 1; iN < totalNNodes; iN++)
212       {
213          if (newElemNodeList[iN] == newElemNodeList[newNNodes])
214             elemNodeList1D[orderArray[iN]] = newNNodes;
215          else
216          {
217             newNNodes++;
218             elemNodeList1D[orderArray[iN]] = newNNodes;
219             newElemNodeList[newNNodes] = newElemNodeList[iN];
220          }
221       }
222       if (totalNNodes > 0) newNNodes++;
223       delete [] orderArray;
224       delete [] newElemNodeList;
225 
226       /* -------------------------------------------------------- */
227       /* allocate and initialize subdomain matrix                 */
228       /* -------------------------------------------------------- */
229 
230       eMatDim  = elemNNodes;
231       rowSize  = elemNNodes * 16;
232       csrNrows = newNNodes;
233       csrIA    = new int[csrNrows+1];
234       csrJA    = new int[csrNrows*rowSize];
235       hypre_assert( csrJA != NULL );
236       csrAA    = new double[csrNrows*rowSize];
237       hypre_assert(csrAA != NULL);
238       for (iR = 0; iR < csrNrows; iR++) csrIA[iR] = iR * rowSize;
239 
240       /* -------------------------------------------------------- */
241       /* construct CSR matrix (with holes)                        */
242       /* -------------------------------------------------------- */
243 
244       for (iE = 0; iE < nElems; iE++)
245       {
246          elemMat = elemMatrices[iE];
247          elemNodeList = elemNodeLists[iE];
248          for (iN = 0; iN < elemNNodes; iN++)
249          {
250             colInd = elemNodeList1D[iN+iE*elemNNodes];
251             colOffset = eMatDim * iN;
252             for (iN2 = 0; iN2 < elemNNodes; iN2++)
253             {
254                if (elemMat[colOffset+iN2] != 0.0)
255                {
256                   rowInd = elemNodeList1D[iN2+iE*elemNNodes];
257                   offset = csrIA[rowInd]++;
258                   csrJA[offset] = colInd;
259                   csrAA[offset] = elemMat[iN2+colOffset];
260                }
261             }
262          }
263       }
264 
265       /* -------------------------------------------------------- */
266       /* compress the CSR matrix                                  */
267       /* -------------------------------------------------------- */
268 
269       offset = 0;
270       for (iR = 0; iR < csrNrows; iR++)
271       {
272          if (csrIA[iR] > rowSize * (iR+1))
273          {
274             printf("MLI_Method_AMGSA::setupSFEIBasedNullSpaces ");
275             printf("ERROR : rowSize too large (increase it). \n");
276             printf("   => allowed = %d, actual = %d\n",rowSize,
277                    csrIA[iR]-rowSize*iR);
278             exit(1);
279          }
280          rowLeng = csrIA[iR] - iR * rowSize;
281          csrIA[iR] = offset;
282          start = iR * rowSize;
283 
284          MLI_Utils_IntQSort2a(&(csrJA[start]),&(csrAA[start]),0,rowLeng-1);
285          count = start;
286          for (k = start+1; k < start+rowLeng; k++)
287          {
288             if (csrJA[k] == csrJA[count]) csrAA[count] += csrAA[k];
289             else
290             {
291                count++;
292                csrJA[count] = csrJA[k];
293                csrAA[count] = csrAA[k];
294             }
295          }
296          if (rowLeng > 0) count = count - start + 1;
297          else               count = 0;
298          for (k = 0; k < count; k++)
299          {
300             csrJA[offset+k] = csrJA[start+k];
301             csrAA[offset+k] = csrAA[start+k];
302          }
303          offset += count;
304       }
305       csrIA[csrNrows] = offset;
306 
307       /* -------------------------------------------------------- */
308       /* change from base-0 to base-1 indexing for Fortran call   */
309       /* -------------------------------------------------------- */
310 
311       for (iR = 0; iR < csrIA[csrNrows]; iR++) csrJA[iR]++;
312       for (iR = 0; iR <= csrNrows; iR++) csrIA[iR]++;
313 
314       /* -------------------------------------------------------- */
315       /* compute near null spaces                                 */
316       /* -------------------------------------------------------- */
317 
318       strcpy(which, "Shift");
319       eigenR = new double[nullspaceDim_+1];
320       eigenI = new double[nullspaceDim_+1];
321       eigenV = new double[csrNrows*(nullspaceDim_+1)];
322       hypre_assert((long) eigenV);
323 
324 #ifdef MLI_ARPACK
325       sigmaR = 1.0e-6;
326       sigmaI = 0.0e0;
327       dnstev_(&csrNrows, &nullspaceDim_, which, &sigmaR, &sigmaI,
328            csrIA, csrJA, csrAA, eigenR, eigenI, eigenV, &csrNrows, &info,
329            &arpackTol_);
330       if (outputLevel_ > 2)
331       {
332          printf("%5d : Subdomain %3d (%3d) (size=%d) : \n",mypid,iD,
333                 nSubdomains,csrNrows);
334          for (k = 0; k < nullspaceDim_; k++)
335          printf("\t%5d : ARPACK eigenvalues %2d = %16.8e %16.8e\n", mypid,
336                 k, eigenR[k], eigenI[k]);
337       }
338 #else
339       printf("MLI_Method_AMGSA::FATAL ERROR : ARPACK not installed.\n");
340       exit(1);
341 #endif
342 
343 //    strcpy( which, "destroy" );
344 #ifdef MLI_ARPACK
345 //    dnstev_(&csrNrows, &nullspaceDim_, which, &sigmaR, &sigmaI,
346 //            csrIA, csrJA, csrAA, eigenR, eigenI, eigenV, &csrNrows, &info,
347 //            &arpackTol_);
348 #else
349       printf("MLI_Method_AMGSA::FATAL ERROR : ARPACK not installed.\n");
350       exit(1);
351 #endif
352 
353       delete [] eigenR;
354       delete [] eigenI;
355       delete [] csrIA;
356       delete [] csrJA;
357       delete [] csrAA;
358 
359       /* -------------------------------------------------------- */
360       /* load the null space vectors                              */
361       /* -------------------------------------------------------- */
362 
363       if (nullspaceLen_ == 0) nullspaceLen_ = localNRows;
364       if (nullspaceVec_ == NULL)
365          nullspaceVec_ = new double[nullspaceLen_ * nullspaceDim_];
366       for (iE = 0; iE < nElems; iE++)
367       {
368          elemNodeList = elemNodeLists[iE];
369          for (iN = 0; iN < elemNNodes; iN++)
370          {
371             rowInd = elemNodeList[iN] - localStartRow;
372             if (fp != NULL) fprintf(fp,"%7d ", rowInd+1);
373             if (rowInd >= 0 && rowInd < localNRows)
374             {
375                saLabels_[0][rowInd] = iD;
376                colInd = elemNodeList1D[iE*elemNNodes+iN];
377                for (k = startCol; k < nullspaceDim_; k++)
378                   nullspaceVec_[rowInd+k*nullspaceLen_] =
379                         eigenV[colInd+k*csrNrows];
380             }
381          }
382          if (fp != NULL) fprintf(fp,"\n");
383       }
384       delete [] elemNodeList1D;
385 
386       /* -------------------------------------------------------- */
387       /* clean up                                                 */
388       /* -------------------------------------------------------- */
389 
390       delete [] eigenV;
391    }
392    if (fp != NULL) fclose(fp);
393 
394    if ((printToFile_ & 4) != 0)
395    {
396       sprintf(filename, "eVec.%d", mypid);
397       fp = fopen(filename, "w");
398       fprintf(fp," %d %d\n", nullspaceLen_, nullspaceDim_);
399       for ( iN = 0; iN < nullspaceLen_; iN++ )
400       {
401          for ( k = 0; k < nullspaceDim_; k++ )
402             fprintf(fp,"%17.9e ",nullspaceVec_[nullspaceLen_*k+iN]);
403          fprintf(fp,"\n");
404       }
405       fclose(fp);
406    }
407 
408 #ifdef MLI_DEBUG_DETAILED
409    printf("MLI_Method_AMGSA::setupSFEIBasedNullSpaces ends.\n");
410 #endif
411 
412    return 0;
413 }
414 
415 /***********************************************************************
416  * SET UP AGGREGATES BASED ON FEI SUBDOMAIN INFORMATION
417  ***********************************************************************
418  * set up domain decomposition method by having each subdomain with
419  * the same aggregate number 0
420  * --------------------------------------------------------------------- */
421 
setupSFEIBasedAggregates(MLI * mli)422 int MLI_Method_AMGSA::setupSFEIBasedAggregates(MLI *mli)
423 {
424    int                iR, iD, level, mypid, *partition, localNRows, *aggrMap;
425    int                nSubdomains, nElems, elemNNodes, **elemNodeLists;
426    int                iE, iN, localStartRow, nprocs, index, count, *aggrMap2;
427    MPI_Comm           comm;
428    MLI_Matrix         *mliAmat;
429    hypre_ParCSRMatrix *hypreA;
430    MLI_SFEI           *sfei;
431 
432 #ifdef MLI_DEBUG_DETAILED
433    printf("MLI_Method_AMGSA:setupSFEIBasedAggregates begins...\n");
434 #endif
435 
436    /* --------------------------------------------------------------- */
437    /* error checking                                                  */
438    /* --------------------------------------------------------------- */
439 
440    if (mli == NULL)
441    {
442       printf("MLI_Method_AMGSA::setupSFEIBasedAggregates ERROR");
443       printf(" - no mli.\n");
444       exit(1);
445    }
446    level = 0;
447    sfei = mli->getSFEI(level);
448    if (sfei == NULL)
449    {
450       printf("MLI_Method_AMGSA::setupSFEIBasedAggregates ERROR");
451       printf(" - no sfei.\n");
452       exit(1);
453    }
454    sfei->freeStiffnessMatrices();
455    nSubdomains = sfei->getNumElemBlocks();
456    if (nSubdomains <= 0) return 0;
457 
458    /* --------------------------------------------------------------- */
459    /* fetch communicator and matrix information                       */
460    /* --------------------------------------------------------------- */
461 
462    comm = getComm();
463    MPI_Comm_rank(comm, &mypid);
464    MPI_Comm_size(comm, &nprocs);
465    mliAmat = mli->getSystemMatrix(level);
466    hypreA  = (hypre_ParCSRMatrix *) mliAmat->getMatrix();
467    HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA,
468                                         &partition);
469    localStartRow = partition[mypid];
470    localNRows = partition[mypid+1] - localStartRow;
471    free(partition);
472 
473    /* --------------------------------------------------------------- */
474    /* create and fill the aggrMap array                               */
475    /* --------------------------------------------------------------- */
476 
477    aggrMap  = new int[localNRows];
478    aggrMap2 = new int[localNRows];
479    for (iR = 0; iR < localNRows; iR++) aggrMap[iR] = -1;
480    if (saDataAux_ != NULL)
481    {
482       index = saDataAux_[0][0] + 1;
483       for (iD = 0; iD < index; iD++) delete [] saDataAux_[iD];
484       delete [] saDataAux_;
485    }
486    saDataAux_ = new int*[nSubdomains+1];
487    saDataAux_[0] = new int[nSubdomains+1];
488    for (iD = 1; iD < nSubdomains+1; iD++) saDataAux_[iD] = NULL;
489    saDataAux_[0][0] = nSubdomains;
490 
491    for (iD = 0; iD < nSubdomains; iD++)
492    {
493       for ( iR = 0; iR < localNRows; iR++ ) aggrMap2[iR] = -1;
494       nElems = sfei->getBlockNumElems(iD);
495       elemNNodes  = sfei->getBlockElemNEqns(iD);
496       elemNodeLists = sfei->getBlockElemEqnLists(iD);
497       for (iE = 0; iE < nElems; iE++)
498       {
499          for (iN = 0; iN < elemNNodes; iN++)
500          {
501             index = elemNodeLists[iE][iN] - localStartRow;
502             if (index >= 0 && index < localNRows && aggrMap[index] < 0)
503                aggrMap[index] = iD;
504             if (index >= 0 && index < localNRows)
505                aggrMap2[index] = iD;
506          }
507       }
508       count = 0;
509       for (iR = 0; iR < localNRows; iR++) if (aggrMap2[iR] >= 0) count++;
510       saDataAux_[0][iD+1] = count;
511       saDataAux_[iD+1] = new int[count];
512       count = 0;
513       for (iR = 0; iR < localNRows; iR++)
514          if (aggrMap2[iR] >= 0) saDataAux_[iD+1][count++] = iR;
515    }
516 #if 0
517    /* force non-overlapped aggregates */
518    for ( iD = 0; iD < nSubdomains; iD++ )
519    {
520       count = 0;
521       for (iR = 0; iR < localNRows; iR++) if (aggrMap[iR] == iD) count++;
522       saDataAux_[0][iD+1] = count;
523       if (saDataAux_[iD+1] != NULL) delete [] saDataAux_[iD+1];
524       saDataAux_[iD+1] = new int[count];
525       count = 0;
526       for (iR = 0; iR < localNRows; iR++)
527          if (aggrMap[iR] == iD) saDataAux_[iD+1][count++] = iR;
528    }
529 #endif
530    delete [] aggrMap2;
531    saData_[0]     = aggrMap;
532    saCounts_[0]   = nSubdomains;
533    numLevels_     = 2;
534    minCoarseSize_ = nprocs;
535 
536 #ifdef MLI_DEBUG_DETAILED
537    printf("MLI_Method_AMGSA::setupSFEIBasedAggregates ends.\n");
538 #endif
539 
540    return 0;
541 }
542 
543 /***********************************************************************
544  * set up domain decomposition method by extending the local problem
545  * (based on Bank-Lu-Tong-Vassilevski method but with aggregation)
546  * --------------------------------------------------------------------- */
547 
setupExtendedDomainDecomp(MLI * mli)548 int MLI_Method_AMGSA::setupExtendedDomainDecomp(MLI *mli)
549 {
550    MLI_Function *funcPtr;
551 
552    /* --------------------------------------------------------------- */
553    /* error checking                                                  */
554    /* --------------------------------------------------------------- */
555 
556    if (mli == NULL)
557    {
558       printf("MLI_Method_AMGSA::setupExtendedDomainDecomp ERROR");
559       printf(" - no mli.\n");
560       exit(1);
561    }
562 
563    /* *************************************************************** */
564    /* creating the local expanded matrix                              */
565    /* --------------------------------------------------------------- */
566 
567    /* --------------------------------------------------------------- */
568    /* fetch communicator and fine matrix information                  */
569    /* --------------------------------------------------------------- */
570 
571    MPI_Comm           comm;
572    int                mypid, nprocs, level, *partition, ANRows;
573    MLI_Matrix         *mli_Amat;
574    hypre_ParCSRMatrix *hypreA;
575 
576    comm = getComm();
577    MPI_Comm_rank( comm, &mypid );
578    MPI_Comm_size( comm, &nprocs );
579 
580 #ifdef MLI_DEBUG_DETAILED
581    printf("%d : MLI_Method_AMGSA::setupExtendedDomainDecomp begins...\n",mypid);
582 #endif
583 
584    level = 0;
585    mli_Amat = mli->getSystemMatrix( level );
586    hypreA  = (hypre_ParCSRMatrix *) mli_Amat->getMatrix();
587    HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA,
588                                         &partition);
589    ANRows = partition[mypid+1] - partition[mypid];
590    free( partition );
591 
592    /* --------------------------------------------------------------- */
593    /* first save the nodeDofs and null space information              */
594    /* (since genP replaces the null vectors with new ones)            */
595    /* --------------------------------------------------------------- */
596 
597    int    nodeDofs, iD, iD2;
598    double *nullVecs=NULL;
599 
600    nodeDofs = currNodeDofs_;
601    nullVecs = new double[nullspaceDim_*ANRows];
602    if (nullspaceVec_ != NULL)
603    {
604       for (iD = 0; iD < nullspaceDim_*ANRows; iD++)
605          nullVecs[iD] = nullspaceVec_[iD];
606    }
607    else
608    {
609       for (iD = 0; iD < nullspaceDim_; iD++)
610          for (iD2 = 0; iD2 < ANRows; iD2++)
611             if (MABS((iD - iD2)) % nullspaceDim_ == 0)
612                  nullVecs[iD*ANRows+iD2] = 1.0;
613             else nullVecs[iD*ANRows+iD2] = 0.0;
614    }
615 
616    /* --------------------------------------------------------------- */
617    /* create the first coarse grid matrix (the off processor block    */
618    /* will be the 2,2 block of my expanded matrix)                    */
619    /* Note: genP_DD coarsens less on processor boundaries.            */
620    /* --------------------------------------------------------------- */
621 
622    int        *ACPartition, ACNRows, ACStart, *aggrInfo, *bdryData;
623    MLI_Matrix *mli_Pmat, *mli_cAmat;
624    hypre_ParCSRMatrix  *hypreAc, *hypreP;
625    hypre_ParCSRCommPkg *commPkg;
626 
627    genP_DD(mli_Amat, &mli_Pmat, &aggrInfo, &bdryData);
628    delete [] aggrInfo;
629    hypreP = (hypre_ParCSRMatrix *) mli_Pmat->getMatrix();
630    commPkg = hypre_ParCSRMatrixCommPkg(hypreP);
631    if (commPkg == NULL) hypre_MatvecCommPkgCreate(hypreP);
632 
633    MLI_Matrix_ComputePtAP(mli_Pmat, mli_Amat, &mli_cAmat);
634    hypreAc = (hypre_ParCSRMatrix *) mli_cAmat->getMatrix();
635 
636    HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreAc,
637                                         &ACPartition);
638    ACStart = ACPartition[mypid];
639    ACNRows = ACPartition[mypid+1] - ACStart;
640 
641    /* --------------------------------------------------------------- */
642    /* fetch communication information for the coarse matrix           */
643    /* --------------------------------------------------------------- */
644 
645    int   nRecvs, *recvProcs, nSends, *sendProcs;
646 
647    commPkg = hypre_ParCSRMatrixCommPkg(hypreAc);
648    if ( commPkg == NULL )
649    {
650       hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) hypreAc);
651       commPkg = hypre_ParCSRMatrixCommPkg(hypreAc);
652    }
653    nRecvs    = hypre_ParCSRCommPkgNumRecvs(commPkg);
654    recvProcs = hypre_ParCSRCommPkgRecvProcs(commPkg);
655    nSends    = hypre_ParCSRCommPkgNumSends(commPkg);
656    sendProcs = hypre_ParCSRCommPkgSendProcs(commPkg);
657 
658    /* --------------------------------------------------------------- */
659    /* calculate the size of my local expanded off matrix (this is     */
660    /* needed to create the permutation matrix) - the local size of    */
661    /* the expanded off matrix should be the total number of rows of   */
662    /* my recv neighbors                                               */
663    /* --------------------------------------------------------------- */
664 
665    int  iP, PENCols, proc, *PEPartition, PECStart, *recvLengs;
666 
667    PENCols = 0;
668    if (nRecvs > 0) recvLengs = new int[nRecvs];
669    for (iP = 0; iP < nRecvs; iP++)
670    {
671       proc = recvProcs[iP];
672       PENCols += (ACPartition[proc+1] - ACPartition[proc]);
673       recvLengs[iP] = ACPartition[proc+1] - ACPartition[proc];
674    }
675    PEPartition = new int[nprocs+1];
676    MPI_Allgather(&PENCols,1,MPI_INT,&(PEPartition[1]),1,MPI_INT,comm);
677    PEPartition[0] = 0;
678    for (iP = 2; iP <= nprocs; iP++)
679       PEPartition[iP] += PEPartition[iP-1];
680    PECStart = PEPartition[mypid];
681 
682    /* --------------------------------------------------------------- */
683    /* since PE(i,j) = 1 means putting external row i into local row   */
684    /* j, and since I may not own row i of PE, I need to tell my       */
685    /* neighbor processor who owns row i the value of j.               */
686    /* ==> procOffsets                                                 */
687    /* --------------------------------------------------------------- */
688 
689    int         *procOffsets, offset;
690    MPI_Request *mpiRequests;
691    MPI_Status  mpiStatus;
692 
693    if (nSends > 0)
694    {
695       mpiRequests = new MPI_Request[nSends];
696       procOffsets = new int[nSends];
697    }
698    for (iP = 0; iP < nSends; iP++)
699       MPI_Irecv(&procOffsets[iP],1,MPI_INT,sendProcs[iP],13582,comm,
700                 &(mpiRequests[iP]));
701    offset = 0;
702    for (iP = 0; iP < nRecvs; iP++)
703    {
704       MPI_Send(&offset, 1, MPI_INT, recvProcs[iP], 13582, comm);
705       offset += (ACPartition[recvProcs[iP]+1] - ACPartition[recvProcs[iP]]);
706    }
707    for (iP = 0; iP < nSends; iP++)
708       MPI_Wait(&(mpiRequests[iP]), &mpiStatus);
709    if (nSends > 0) delete [] mpiRequests;
710 
711    /* --------------------------------------------------------------- */
712    /* create the permutation matrix to gather expanded coarse matrix  */
713    /* PE has same number of rows as Ac but it has many more columns   */
714    /* --------------------------------------------------------------- */
715 
716    int            ierr, *rowSizes, *colInds, rowIndex;
717    double         *colVals;
718    char           paramString[50];
719    MLI_Matrix     *mli_PE;
720    HYPRE_IJMatrix IJ_PE;
721    hypre_ParCSRMatrix *hyprePE;
722 
723    ierr  = HYPRE_IJMatrixCreate(comm,ACStart,ACStart+ACNRows-1,
724                                 PECStart,PECStart+PENCols-1,&IJ_PE);
725    ierr += HYPRE_IJMatrixSetObjectType(IJ_PE, HYPRE_PARCSR);
726    hypre_assert(!ierr);
727    if (ACNRows > 0) rowSizes = new int[ACNRows];
728    for (iD = 0; iD < ACNRows; iD++) rowSizes[iD] = nSends;
729    ierr  = HYPRE_IJMatrixSetRowSizes(IJ_PE, rowSizes);
730    ierr += HYPRE_IJMatrixInitialize(IJ_PE);
731    hypre_assert(!ierr);
732    if (ACNRows > 0) delete [] rowSizes;
733    if (nSends > 0)
734    {
735       colInds = new int[nSends];
736       colVals = new double[nSends];
737       for (iP = 0; iP < nSends; iP++) colVals[iP] = 1.0;
738    }
739    for (iD = 0; iD < ACNRows; iD++)
740    {
741       rowIndex = iD + ACStart;
742       for (iP = 0; iP < nSends; iP++)
743          colInds[iP] = procOffsets[iP] + PEPartition[sendProcs[iP]] + iD;
744       HYPRE_IJMatrixSetValues(IJ_PE, 1, &nSends, (const int *) &rowIndex,
745                 (const int *) colInds, (const double *) colVals);
746    }
747    if (nSends > 0)
748    {
749       delete [] colInds;
750       delete [] colVals;
751       delete [] procOffsets;
752    }
753    HYPRE_IJMatrixAssemble(IJ_PE);
754    HYPRE_IJMatrixGetObject(IJ_PE, (void **) &hyprePE);
755    funcPtr = new MLI_Function();
756    MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr);
757    sprintf(paramString, "HYPRE_ParCSR");
758    mli_PE = new MLI_Matrix( (void *) hyprePE, paramString, funcPtr);
759    delete funcPtr;
760    commPkg = hypre_ParCSRMatrixCommPkg(hyprePE);
761    if (commPkg == NULL)
762       hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) hyprePE);
763 
764    /* --------------------------------------------------------------- */
765    /* form the expanded coarse matrix (that is, the (2,2) block of my */
766    /* final matrix). Also, form A * P (in order to form the (1,2) and */
767    /* and (2,1) block                                                 */
768    /* --------------------------------------------------------------- */
769 
770    MLI_Matrix         *mli_AExt;
771    hypre_ParCSRMatrix *hypreAExt, *hypreAP;
772 
773    MLI_Matrix_ComputePtAP(mli_PE, mli_cAmat, &mli_AExt);
774    hypreAExt = (hypre_ParCSRMatrix *) mli_AExt->getMatrix();
775    hypreP    = (hypre_ParCSRMatrix *) mli_Pmat->getMatrix();
776    hypreAP   = hypre_ParMatmul(hypreA, hypreP);
777 
778    /* --------------------------------------------------------------- */
779    /* coarsen one more time for non-neighbor fine aggregates          */
780    /* then create [A_c   Aco Poo Po2                                  */
781    /*              trans Po2^T Aoo Po2]                               */
782    /* --------------------------------------------------------------- */
783 
784 #if 0
785    // generate Po2
786    MLI_Matrix *mli_Pmat2;
787    // set up one aggregate per processor
788 //### need to set bdryData for my immediate neighbors to 0
789    genP_Selective(mli_AExt, &mli_Pmat2, PENCols, bdryData);
790    delete [] bdryData;
791 
792    // compute Aco Poo Po2
793    hypre_ParCSRMatrix *hypreP2, *hypreP3, *hypreAP2;
794    hypreP2  = (hypre_ParCSRMatrix *) mli_Pmat2->getMatrix();
795    hypreAP2 = hypre_ParMatmul(hypreAP, hypreP2);
796    hypreP3  = hypre_ParMatmul(hypreP, hypreP2);
797 
798    // compute Po2^T Aoo Po2
799    MLI_Matrix *mli_AExt2;
800    MLI_Matrix_ComputePtAP(mli_Pmat2, mli_AExt, &mli_AExt2);
801 
802    // adjust pointers
803    hypre_ParCSRMatrixDestroy(hypreAP);
804    hypreAP = hypreAP2;
805    delete mli_Pmat;
806    funcPtr = new MLI_Function();
807    MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr);
808    sprintf(paramString, "HYPRE_ParCSR");
809    mli_Pmat = new MLI_Matrix(hypreP3, paramString, funcPtr);
810    delete funcPtr;
811    delete mli_Pmat2;
812    delete mli_AExt;
813    mli_AExt = mli_AExt2;
814    hypreAExt = (hypre_ParCSRMatrix *) mli_AExt->getMatrix();
815    hypreP    = (hypre_ParCSRMatrix *) mli_Pmat->getMatrix();
816 #endif
817 
818    /* --------------------------------------------------------------- */
819    /* concatenate the local and the (1,2) and (2,2) block             */
820    /* --------------------------------------------------------------- */
821 
822    int    *AdiagI, *AdiagJ, *APoffdI, *APoffdJ, *AEdiagI, *AEdiagJ, index;
823    int    *APcmap, newNrows, *newIA, *newJA, newNnz, *auxIA, iR, iC;
824    double *AdiagA, *APoffdA, *AEdiagA, *newAA;
825    hypre_CSRMatrix *Adiag, *APoffd, *AEdiag;
826 
827    Adiag   = hypre_ParCSRMatrixDiag(hypreA);
828    AdiagI  = hypre_CSRMatrixI(Adiag);
829    AdiagJ  = hypre_CSRMatrixJ(Adiag);
830    AdiagA  = hypre_CSRMatrixData(Adiag);
831    APoffd  = hypre_ParCSRMatrixOffd(hypreAP);
832    APoffdI = hypre_CSRMatrixI(APoffd);
833    APoffdJ = hypre_CSRMatrixJ(APoffd);
834    APoffdA = hypre_CSRMatrixData(APoffd);
835    APcmap  = hypre_ParCSRMatrixColMapOffd(hypreAP);
836    AEdiag  = hypre_ParCSRMatrixDiag(hypreAExt);
837    AEdiagI = hypre_CSRMatrixI(AEdiag);
838    AEdiagJ = hypre_CSRMatrixJ(AEdiag);
839    AEdiagA = hypre_CSRMatrixData(AEdiag);
840    newNrows = ANRows + PENCols;
841    newIA    = new int[newNrows+1];
842    newNnz   = AdiagI[ANRows] + AEdiagI[PENCols] + 2 * APoffdI[ANRows];
843    newJA    = new int[newNnz];
844    newAA    = new double[newNnz];
845    auxIA    = new int[PENCols];
846    newNnz   = 0;
847    newIA[0] = newNnz;
848    for ( iR = 0; iR < PENCols; iR++ ) auxIA[iR] = 0;
849 
850    // (1,1) and (1,2) blocks
851    for ( iR = 0; iR < ANRows; iR++ )
852    {
853       for ( iC = AdiagI[iR]; iC < AdiagI[iR+1]; iC++ )
854       {
855          newJA[newNnz] = AdiagJ[iC];
856          newAA[newNnz++] = AdiagA[iC];
857       }
858       for ( iC = APoffdI[iR]; iC < APoffdI[iR+1]; iC++ )
859       {
860          index = APcmap[APoffdJ[iC]];
861          offset = ANRows;
862          for ( iP = 0; iP < nRecvs; iP++ )
863          {
864             if ( index < ACPartition[recvProcs[iP]+1] )
865             {
866                index = index - ACPartition[recvProcs[iP]] + offset;
867                break;
868             }
869             offset += (ACPartition[recvProcs[iP]+1] -
870                        ACPartition[recvProcs[iP]]);
871          }
872          newJA[newNnz] = index;
873          newAA[newNnz++] = APoffdA[iC];
874          APoffdJ[iC] = index;
875          auxIA[index-ANRows]++;
876       }
877       newIA[iR+1] = newNnz;
878    }
879 
880    // (2,2) block
881    for ( iR = ANRows; iR < ANRows+PENCols; iR++ )
882    {
883       newNnz += auxIA[iR-ANRows];
884       for ( iC = AEdiagI[iR-ANRows]; iC < AEdiagI[iR-ANRows+1]; iC++ )
885       {
886          newJA[newNnz] = AEdiagJ[iC] + ANRows;
887          newAA[newNnz++] = AEdiagA[iC];
888       }
889       newIA[iR+1] = newNnz;
890    }
891 
892    // (2,1) block
893    for ( iR = 0; iR < PENCols; iR++ ) auxIA[iR] = 0;
894    for ( iR = 0; iR < ANRows; iR++ )
895    {
896       for ( iC = APoffdI[iR]; iC < APoffdI[iR+1]; iC++ )
897       {
898          index = APoffdJ[iC];
899          offset = newIA[index] + auxIA[index-ANRows];
900          newJA[offset] = iR;
901          newAA[offset] = APoffdA[iC];
902          auxIA[index-ANRows]++;
903       }
904    }
905 
906    /* --------------------------------------------------------------- */
907 
908    int                iZero=0, *newRowSizes;
909    MPI_Comm           newMPIComm;
910    HYPRE_IJMatrix     IJnewA;
911    hypre_ParCSRMatrix *hypreNewA;
912    MLI_Matrix         *mli_newA;
913 
914    MPI_Comm_split(comm, mypid, iZero, &newMPIComm);
915    ierr  = HYPRE_IJMatrixCreate(newMPIComm,iZero,newNrows-1,iZero,
916                                 newNrows-1,&IJnewA);
917    ierr += HYPRE_IJMatrixSetObjectType(IJnewA, HYPRE_PARCSR);
918    hypre_assert(!ierr);
919    if ( newNrows > 0 ) newRowSizes = new int[newNrows];
920    for ( iD = 0; iD < newNrows; iD++ )
921       newRowSizes[iD] = newIA[iD+1] - newIA[iD];
922    ierr  = HYPRE_IJMatrixSetRowSizes(IJnewA, newRowSizes);
923    ierr += HYPRE_IJMatrixInitialize(IJnewA);
924    hypre_assert(!ierr);
925    for ( iD = 0; iD < newNrows; iD++ )
926    {
927       offset = newIA[iD];
928       HYPRE_IJMatrixSetValues(IJnewA, 1, &newRowSizes[iD], (const int *) &iD,
929                (const int *) &newJA[offset], (const double *) &newAA[offset]);
930    }
931    if ( newNrows > 0 ) delete [] newRowSizes;
932    delete [] newIA;
933    delete [] newJA;
934    delete [] newAA;
935    HYPRE_IJMatrixAssemble(IJnewA);
936    HYPRE_IJMatrixGetObject(IJnewA, (void **) &hypreNewA);
937    sprintf(paramString, "HYPRE_ParCSR" );
938    funcPtr = new MLI_Function();
939    MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr);
940    mli_newA = new MLI_Matrix( (void *) hypreNewA, paramString, funcPtr);
941 
942    /* --------------------------------------------------------------- */
943    /* communicate null space vectors                                  */
944    /* --------------------------------------------------------------- */
945 
946    int    rLength, sLength;
947    double *tmpNullVecs, *newNullVecs;
948 
949    if ( PENCols > 0 ) tmpNullVecs = new double[PENCols*nullspaceDim_];
950    if ( nRecvs > 0 ) mpiRequests = new MPI_Request[nRecvs];
951 
952    offset = 0;
953    for ( iP = 0; iP < nRecvs; iP++ )
954    {
955       rLength = ACPartition[recvProcs[iP]+1] - ACPartition[recvProcs[iP]];
956       rLength *= nullspaceDim_;
957       MPI_Irecv(&tmpNullVecs[offset],rLength,MPI_DOUBLE,recvProcs[iP],14581,
958                 comm,&(mpiRequests[iP]));
959       offset += rLength;
960    }
961    for ( iP = 0; iP < nSends; iP++ )
962    {
963       sLength = ACNRows * nullspaceDim_;
964       MPI_Send(nullVecs, sLength, MPI_DOUBLE, sendProcs[iP], 14581, comm);
965    }
966    for ( iP = 0; iP < nRecvs; iP++ )
967       MPI_Wait( &(mpiRequests[iP]), &mpiStatus );
968    if ( nRecvs > 0 ) delete [] mpiRequests;
969 
970    newNullVecs = new double[newNrows*nullspaceDim_];
971    for ( iD = 0; iD < nullspaceDim_; iD++ )
972       for ( iD2 = 0; iD2 < ANRows; iD2++ )
973          newNullVecs[iD*newNrows+iD2] = nullVecs[iD*ANRows+iD2];
974    offset = 0;
975    for ( iP = 0; iP < nRecvs; iP++ )
976    {
977       rLength = ACPartition[recvProcs[iP]+1] - ACPartition[recvProcs[iP]];
978       for ( iD = 0; iD < nullspaceDim_; iD++ )
979          for ( iD2 = 0; iD2 < rLength; iD2++ )
980             newNullVecs[iD*newNrows+iD2+offset] =
981                tmpNullVecs[offset+iD*rLength+iD2];
982       rLength *= nullspaceDim_;
983       offset += rLength;
984    }
985    if ( PENCols > 0 ) delete [] tmpNullVecs;
986 
987    /* --------------------------------------------------------------- */
988    /* create new overlapped DD smoother using sequential SuperLU      */
989    /* --------------------------------------------------------------- */
990 
991    int        *sendLengs;
992    char       *targv[7];
993    MLI_Solver *smootherPtr;
994    MLI_Matrix *mli_PSmat;
995 
996    //sprintf( paramString, "SeqSuperLU" );
997    if (!strcmp(preSmoother_, "CGMLI")) sprintf(paramString, "CGMLI");
998    else                                sprintf(paramString, "CGAMG");
999    smootherPtr = MLI_Solver_CreateFromName(paramString);
1000    sprintf( paramString, "numSweeps 10000" );
1001    smootherPtr->setParams(paramString, 0, NULL);
1002    sprintf( paramString, "tolerance 1.0e-6" );
1003    smootherPtr->setParams(paramString, 0, NULL);
1004 
1005    // restate these lines if aggregation is used for subdomain solve
1006    // delete [] newNullVecs;
1007    //sprintf( paramString, "setNullSpace" );
1008    //targv[0] = (char *) &nullspaceDim_;
1009    //targv[1] = (char *) newNullVecs;
1010    //smootherPtr->setParams(paramString, 2, targv);
1011 
1012    // send PSmat and communication information to smoother
1013    if ( nSends > 0 ) sendLengs = new int[nSends];
1014    for ( iP = 0; iP < nSends; iP++ ) sendLengs[iP] = ACNRows;
1015    sprintf( paramString, "setPmat" );
1016    mli_PSmat = mli_Pmat;
1017    targv[0] = (char *) mli_PSmat;
1018    smootherPtr->setParams(paramString, 1, targv);
1019    sprintf( paramString, "setCommData" );
1020    targv[0] = (char *) &nRecvs;
1021    targv[1] = (char *) recvProcs;
1022    targv[2] = (char *) recvLengs;
1023    targv[3] = (char *) &nSends;
1024    targv[4] = (char *) sendProcs;
1025    targv[5] = (char *) sendLengs;
1026    targv[6] = (char *) &comm;
1027    smootherPtr->setParams(paramString, 7, targv);
1028    if ( nSends > 0 ) delete [] sendLengs;
1029 
1030    smootherPtr->setup(mli_newA);
1031    mli->setSmoother( level, MLI_SMOOTHER_PRE, smootherPtr );
1032 
1033    /* --------------------------------------------------------------- */
1034    /* create prolongation and coarse grid operators                   */
1035    /* --------------------------------------------------------------- */
1036 
1037    MLI_Solver *csolvePtr;
1038    MLI_Matrix *mli_Rmat;
1039 
1040    delete mli_cAmat;
1041 
1042    // set up one aggregate per processor
1043    saCounts_[0] = 1;
1044    if (saData_[0] != NULL) delete [] saData_[0];
1045    saData_[0] = new int[ANRows];
1046    for ( iD = 0; iD < ANRows; iD++ ) saData_[0][iD] = 0;
1047 
1048    // restore nullspace changed by the last genP
1049    currNodeDofs_ = nodeDofs;
1050    if ( nullspaceVec_ != NULL ) delete [] nullspaceVec_;
1051    nullspaceVec_ = new double[nullspaceDim_*ANRows];
1052    for ( iD = 0; iD < nullspaceDim_*ANRows; iD++)
1053       nullspaceVec_[iD] = nullVecs[iD];
1054 
1055    // create prolongation and coarse grid operators
1056    genP(mli_Amat, &mli_Pmat, saCounts_[0], saData_[0]);
1057    MLI_Matrix_ComputePtAP(mli_Pmat, mli_Amat, &mli_cAmat);
1058    mli->setSystemMatrix(level+1, mli_cAmat);
1059    mli->setProlongation(level+1, mli_Pmat);
1060    sprintf(paramString, "HYPRE_ParCSRT");
1061    mli_Rmat = new MLI_Matrix(mli_Pmat->getMatrix(), paramString, NULL);
1062    mli->setRestriction(level, mli_Rmat);
1063 
1064    /* --------------------------------------------------------------- */
1065    /* setup coarse solver                                             */
1066    /* --------------------------------------------------------------- */
1067 
1068    strcpy( paramString, "SuperLU" );
1069    csolvePtr = MLI_Solver_CreateFromName( paramString );
1070    csolvePtr->setup(mli_cAmat);
1071    mli->setCoarseSolve(csolvePtr);
1072 
1073    /* --------------------------------------------------------------- */
1074    /* clean up                                                        */
1075    /* --------------------------------------------------------------- */
1076 
1077    free( ACPartition );
1078    delete [] PEPartition;
1079    delete [] auxIA;
1080    HYPRE_IJMatrixDestroy(IJ_PE);
1081    delete mli_AExt;
1082    delete [] nullVecs;
1083    delete mli_PE;
1084 
1085 #ifdef MLI_DEBUG_DETAILED
1086    printf("%d : MLI_Method_AMGSA::setupExtendedDomainDecomp ends.\n",mypid);
1087 #endif
1088 
1089    level = 2;
1090    return (level);
1091 }
1092 
1093 // ************************************************************************
1094 // Purpose : Given Amat, perform preferential coarsening (small aggregates
1095 //           near processor boundaries and create the corresponding Pmat
1096 // (called by setupExtendedDomainDecomp)
1097 // ------------------------------------------------------------------------
1098 
genP_DD(MLI_Matrix * mli_Amat,MLI_Matrix ** PmatOut,int ** eqn2aggrOut,int ** bdryDataOut)1099 double MLI_Method_AMGSA::genP_DD(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut,
1100                                  int **eqn2aggrOut, int **bdryDataOut)
1101 {
1102    int    mypid, nprocs, *partition, AStartRow, AEndRow, ALocalNRows;
1103    int    blkSize, naggr, *node2aggr, ierr, PLocalNCols, PStartCol;
1104    int    PLocalNRows, PStartRow, *eqn2aggr, irow, jcol, ig, *bdryData;
1105    int    *PCols, maxAggSize, *aggCntArray, index, **aggIndArray;
1106    int    aggSize, nzcnt, *rowLengths, rowNum, *colInd;
1107    double **PVecs, *newNull, *qArray, *rArray, *colVal;
1108    char   paramString[200];
1109    HYPRE_IJMatrix      IJPmat;
1110    hypre_ParCSRMatrix  *Amat, *A2mat, *Pmat;
1111    MLI_Matrix          *mli_A2mat=NULL, *mli_Pmat;
1112    MPI_Comm            comm;
1113    hypre_ParCSRCommPkg *commPkg;
1114    MLI_Function        *funcPtr;
1115 
1116    /*-----------------------------------------------------------------
1117     * fetch matrix and machine information
1118     *-----------------------------------------------------------------*/
1119 
1120    Amat = (hypre_ParCSRMatrix *) mli_Amat->getMatrix();
1121    comm = hypre_ParCSRMatrixComm(Amat);
1122    MPI_Comm_rank(comm,&mypid);
1123    MPI_Comm_size(comm,&nprocs);
1124 
1125    /*-----------------------------------------------------------------
1126     * fetch other matrix information
1127     *-----------------------------------------------------------------*/
1128 
1129    HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) Amat,&partition);
1130    AStartRow = partition[mypid];
1131    AEndRow   = partition[mypid+1] - 1;
1132    ALocalNRows = AEndRow - AStartRow + 1;
1133    free( partition );
1134 
1135    /*-----------------------------------------------------------------
1136     * reduce Amat based on the block size information (if nodeDofs_ > 1)
1137     *-----------------------------------------------------------------*/
1138 
1139    blkSize = currNodeDofs_;
1140    if (blkSize > 1) MLI_Matrix_Compress(mli_Amat, blkSize, &mli_A2mat);
1141    else             mli_A2mat = mli_Amat;
1142    A2mat = (hypre_ParCSRMatrix *) mli_A2mat->getMatrix();
1143 
1144    /*-----------------------------------------------------------------
1145     * modify minimum aggregate size, if needed
1146     *-----------------------------------------------------------------*/
1147 
1148    minAggrSize_ = nullspaceDim_ / currNodeDofs_;
1149    if ( minAggrSize_ <= 1 ) minAggrSize_ = 2;
1150 
1151    /*-----------------------------------------------------------------
1152     * perform coarsening (small aggregates on processor boundaries)
1153     * 10/2005 : add bdryData for secondary aggregation
1154     *-----------------------------------------------------------------*/
1155 
1156    coarsenGraded(A2mat, &naggr, &node2aggr, &bdryData);
1157    if (blkSize > 1 && mli_A2mat != NULL) delete mli_A2mat;
1158    if (blkSize > 1)
1159    {
1160       (*bdryDataOut) = new int[ALocalNRows];
1161       for (irow = 0; irow < ALocalNRows; irow++)
1162          (*bdryDataOut)[irow] = bdryData[irow/blkSize];
1163       delete [] bdryData;
1164    }
1165    else (*bdryDataOut) = bdryData;
1166 
1167    /*-----------------------------------------------------------------
1168     * fetch the coarse grid information and instantiate P
1169     *-----------------------------------------------------------------*/
1170 
1171    PLocalNCols = naggr * nullspaceDim_;
1172    MLI_Utils_GenPartition(comm, PLocalNCols, &partition);
1173    PStartCol = partition[mypid];
1174    free( partition );
1175    PLocalNRows = ALocalNRows;
1176    PStartRow   = AStartRow;
1177    ierr = HYPRE_IJMatrixCreate(comm,PStartRow,PStartRow+PLocalNRows-1,
1178                           PStartCol,PStartCol+PLocalNCols-1,&IJPmat);
1179    ierr = HYPRE_IJMatrixSetObjectType(IJPmat, HYPRE_PARCSR);
1180    hypre_assert(!ierr);
1181 
1182    /*-----------------------------------------------------------------
1183     * expand the aggregation information if block size > 1 ==> eqn2aggr
1184     *-----------------------------------------------------------------*/
1185 
1186    if ( blkSize > 1 )
1187    {
1188       eqn2aggr = new int[ALocalNRows];
1189       for ( irow = 0; irow < ALocalNRows; irow++ )
1190          eqn2aggr[irow] = node2aggr[irow/blkSize];
1191       delete [] node2aggr;
1192    }
1193    else eqn2aggr = node2aggr;
1194 
1195    /*-----------------------------------------------------------------
1196     * create a compact form for the null space vectors
1197     * (get ready to perform QR on them)
1198     *-----------------------------------------------------------------*/
1199 
1200    PVecs = new double*[nullspaceDim_];
1201    PCols = new int[PLocalNRows];
1202    for (irow = 0; irow < nullspaceDim_; irow++)
1203       PVecs[irow] = new double[PLocalNRows];
1204    for ( irow = 0; irow < PLocalNRows; irow++ )
1205    {
1206       if ( eqn2aggr[irow] >= 0 )
1207            PCols[irow] = PStartCol + eqn2aggr[irow] * nullspaceDim_;
1208       else PCols[irow] = PStartCol + (-eqn2aggr[irow]-1) * nullspaceDim_;
1209       if ( nullspaceVec_ != NULL )
1210       {
1211          for ( jcol = 0; jcol < nullspaceDim_; jcol++ )
1212          PVecs[jcol][irow] = nullspaceVec_[jcol*PLocalNRows+irow];
1213       }
1214       else
1215       {
1216          for ( jcol = 0; jcol < nullspaceDim_; jcol++ )
1217          {
1218             if (irow % nullspaceDim_ == jcol) PVecs[jcol][irow] = 1.0;
1219             else                              PVecs[jcol][irow] = 0.0;
1220          }
1221       }
1222    }
1223 
1224    /*-----------------------------------------------------------------
1225     * perform QR for null space
1226     *-----------------------------------------------------------------*/
1227 
1228    newNull = NULL;
1229    if ( PLocalNRows > 0 )
1230    {
1231       /* ------ count the size of each aggregate ------ */
1232 
1233       aggCntArray = new int[naggr];
1234       for ( ig = 0; ig < naggr; ig++ ) aggCntArray[ig] = 0;
1235       for ( irow = 0; irow < PLocalNRows; irow++ )
1236          if ( eqn2aggr[irow] >= 0 ) aggCntArray[eqn2aggr[irow]]++;
1237          else                       aggCntArray[(-eqn2aggr[irow]-1)]++;
1238       maxAggSize = 0;
1239       for ( ig = 0; ig < naggr; ig++ )
1240          if (aggCntArray[ig] > maxAggSize) maxAggSize = aggCntArray[ig];
1241 
1242       /* ------ register which equation is in which aggregate ------ */
1243 
1244       aggIndArray = new int*[naggr];
1245       for ( ig = 0; ig < naggr; ig++ )
1246       {
1247          aggIndArray[ig] = new int[aggCntArray[ig]];
1248          aggCntArray[ig] = 0;
1249       }
1250       for ( irow = 0; irow < PLocalNRows; irow++ )
1251       {
1252          index = eqn2aggr[irow];
1253          if ( index >= 0 )
1254             aggIndArray[index][aggCntArray[index]++] = irow;
1255          else
1256             aggIndArray[-index-1][aggCntArray[-index-1]++] = irow;
1257       }
1258 
1259       /* ------ allocate storage for QR factorization ------ */
1260 
1261       qArray  = new double[maxAggSize * nullspaceDim_];
1262       rArray  = new double[nullspaceDim_ * nullspaceDim_];
1263       newNull = new double[naggr*nullspaceDim_*nullspaceDim_];
1264 
1265       /* ------ perform QR on each aggregate ------ */
1266 
1267       for ( ig = 0; ig < naggr; ig++ )
1268       {
1269          aggSize = aggCntArray[ig];
1270 
1271          if ( aggSize < nullspaceDim_ )
1272          {
1273             printf("Aggregation ERROR : underdetermined system in QR.\n");
1274             printf("            error on Proc %d\n", mypid);
1275             printf("            error on aggr %d (%d)\n", ig, naggr);
1276             printf("            aggr size is %d\n", aggSize);
1277             exit(1);
1278          }
1279 
1280          /* ------ put data into the temporary array ------ */
1281 
1282          for ( jcol = 0; jcol < aggSize; jcol++ )
1283          {
1284             for ( irow = 0; irow < nullspaceDim_; irow++ )
1285                qArray[aggSize*irow+jcol] = PVecs[irow][aggIndArray[ig][jcol]];
1286          }
1287 
1288          /* ------ call QR function ------ */
1289 
1290 /*
1291          if ( currLevel_ < (numLevels_-1) )
1292          {
1293             info = MLI_Utils_QR(qArray, rArray, aggSize, nullspaceDim_);
1294             if (info != 0)
1295             {
1296                printf("%4d : Aggregation WARNING : QR returns non-zero for\n",
1297                       mypid);
1298                printf("  aggregate %d, size = %d, info = %d\n",ig,aggSize,info);
1299             }
1300          }
1301          else
1302          {
1303             for ( irow = 0; irow < nullspaceDim_; irow++ )
1304             {
1305                dtemp = 0.0;
1306                for ( jcol = 0; jcol < aggSize; jcol++ )
1307                   dtemp += qArray[aggSize*irow+jcol]*qArray[aggSize*irow+jcol];
1308                dtemp = 1.0 / sqrt(dtemp);
1309                for ( jcol = 0; jcol < aggSize; jcol++ )
1310                   qArray[aggSize*irow+jcol] *= dtemp;
1311             }
1312          }
1313 */
1314 
1315          /* ------ after QR, put the R into the next null space ------ */
1316 
1317 /*
1318          for ( jcol = 0; jcol < nullspaceDim_; jcol++ )
1319             for ( irow = 0; irow < nullspaceDim_; irow++ )
1320                newNull[ig*nullspaceDim_+jcol+irow*naggr*nullspaceDim_] =
1321                          rArray[jcol+nullspaceDim_*irow];
1322 */
1323          for ( jcol = 0; jcol < nullspaceDim_; jcol++ )
1324             for ( irow = 0; irow < nullspaceDim_; irow++ )
1325                if ( irow == jcol )
1326                   newNull[ig*nullspaceDim_+jcol+irow*naggr*nullspaceDim_] = 1.0;
1327                else
1328                   newNull[ig*nullspaceDim_+jcol+irow*naggr*nullspaceDim_] = 0.0;
1329 
1330          /* ------ put the P to PVecs ------ */
1331 
1332          for ( jcol = 0; jcol < aggSize; jcol++ )
1333          {
1334             for ( irow = 0; irow < nullspaceDim_; irow++ )
1335             {
1336                index = aggIndArray[ig][jcol];
1337                PVecs[irow][index] = qArray[ irow*aggSize + jcol ];
1338             }
1339          }
1340       }
1341       for ( ig = 0; ig < naggr; ig++ ) delete [] aggIndArray[ig];
1342       delete [] aggIndArray;
1343       delete [] aggCntArray;
1344       delete [] qArray;
1345       delete [] rArray;
1346    }
1347    if ( nullspaceVec_ != NULL ) delete [] nullspaceVec_;
1348    nullspaceVec_ = newNull;
1349 
1350    /*-----------------------------------------------------------------
1351     * initialize Pmat
1352     *-----------------------------------------------------------------*/
1353 
1354    rowLengths = new int[PLocalNRows];
1355    for ( irow = 0; irow < PLocalNRows; irow++ )
1356       rowLengths[irow] = nullspaceDim_;
1357    ierr = HYPRE_IJMatrixSetRowSizes(IJPmat, rowLengths);
1358    ierr = HYPRE_IJMatrixInitialize(IJPmat);
1359    hypre_assert(!ierr);
1360    delete [] rowLengths;
1361 
1362    /*--------------------------------------------------------------------
1363     * load and assemble Pmat
1364     *--------------------------------------------------------------------*/
1365 
1366    colInd = new int[nullspaceDim_];
1367    colVal = new double[nullspaceDim_];
1368    for ( irow = 0; irow < PLocalNRows; irow++ )
1369    {
1370       if ( PCols[irow] >= 0 )
1371       {
1372          nzcnt = 0;
1373          for ( jcol = 0; jcol < nullspaceDim_; jcol++ )
1374          {
1375             if ( PVecs[jcol][irow] != 0.0 )
1376             {
1377                colInd[nzcnt] = PCols[irow] + jcol;
1378                colVal[nzcnt++] = PVecs[jcol][irow];
1379             }
1380          }
1381          rowNum = PStartRow + irow;
1382          HYPRE_IJMatrixSetValues(IJPmat, 1, &nzcnt,
1383                              (const int *) &rowNum, (const int *) colInd,
1384                              (const double *) colVal);
1385       }
1386    }
1387    ierr = HYPRE_IJMatrixAssemble(IJPmat);
1388    hypre_assert( !ierr );
1389    HYPRE_IJMatrixGetObject(IJPmat, (void **) &Pmat);
1390    hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) Pmat);
1391    commPkg = hypre_ParCSRMatrixCommPkg(Amat);
1392    if (!commPkg) hypre_MatvecCommPkgCreate(Amat);
1393    HYPRE_IJMatrixSetObjectType(IJPmat, -1);
1394    HYPRE_IJMatrixDestroy( IJPmat );
1395    delete [] colInd;
1396    delete [] colVal;
1397 
1398    /*-----------------------------------------------------------------
1399     * clean up
1400     *-----------------------------------------------------------------*/
1401 
1402    if ( PCols != NULL ) delete [] PCols;
1403    if ( PVecs != NULL )
1404    {
1405       for (irow = 0; irow < nullspaceDim_; irow++)
1406          if ( PVecs[irow] != NULL ) delete [] PVecs[irow];
1407       delete [] PVecs;
1408    }
1409    (*eqn2aggrOut) = eqn2aggr;
1410 
1411    /*-----------------------------------------------------------------
1412     * set up and return Pmat
1413     *-----------------------------------------------------------------*/
1414 
1415    funcPtr = new MLI_Function();
1416    MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr);
1417    sprintf(paramString, "HYPRE_ParCSR" );
1418    mli_Pmat = new MLI_Matrix( Pmat, paramString, funcPtr );
1419    (*PmatOut) = mli_Pmat;
1420    delete funcPtr;
1421    return 0.0;
1422 }
1423 
1424 // *********************************************************************
1425 // graded coarsening scheme (Given a graph, aggregate on the local subgraph
1426 // but give smaller aggregate near processor boundaries)
1427 // (called by setupExtendedDomainDecomp/genP_DD)
1428 // ---------------------------------------------------------------------
1429 
coarsenGraded(hypre_ParCSRMatrix * hypreG,int * mliAggrLeng,int ** mliAggrArray,int ** bdryData)1430 int MLI_Method_AMGSA::coarsenGraded(hypre_ParCSRMatrix *hypreG,
1431                          int *mliAggrLeng, int **mliAggrArray, int **bdryData)
1432 {
1433    MPI_Comm  comm;
1434    int       mypid, nprocs, *partition, startRow, endRow, maxInd;
1435    int       localNRows, naggr=0, *node2aggr, *aggrSizes, nUndone;
1436    int       irow, jcol, colNum, rowLeng, *cols, globalNRows;
1437    int       *nodeStat, selectFlag, nSelected=0, nNotSelected=0, count;
1438    int       ibuf[2], itmp[2], *bdrySet, localMinSize, index, maxCount;
1439    int       *GDiagI, *GDiagJ, *GOffdI;
1440    double    maxVal, *vals, *GDiagA;
1441    hypre_CSRMatrix *GDiag, *GOffd;
1442 #ifdef MLI_DEBUG_DETAILED
1443    int       rowNum;
1444 #endif
1445 
1446    /*-----------------------------------------------------------------
1447     * fetch machine and matrix parameters
1448     *-----------------------------------------------------------------*/
1449 
1450    comm = hypre_ParCSRMatrixComm(hypreG);
1451    MPI_Comm_rank(comm,&mypid);
1452    MPI_Comm_size(comm,&nprocs);
1453    HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreG,
1454                                         &partition);
1455    startRow = partition[mypid];
1456    endRow   = partition[mypid+1] - 1;
1457    free( partition );
1458    localNRows = endRow - startRow + 1;
1459    MPI_Allreduce(&localNRows, &globalNRows, 1, MPI_INT, MPI_SUM, comm);
1460    if ( mypid == 0 && outputLevel_ > 1 )
1461    {
1462       printf("\t*** Aggregation(G) : total nodes to aggregate = %d\n",
1463              globalNRows);
1464    }
1465    GDiag  = hypre_ParCSRMatrixDiag(hypreG);
1466    GDiagI = hypre_CSRMatrixI(GDiag);
1467    GDiagJ = hypre_CSRMatrixJ(GDiag);
1468    GDiagA = hypre_CSRMatrixData(GDiag);
1469    GOffd  = hypre_ParCSRMatrixOffd(hypreG);
1470    GOffdI = hypre_CSRMatrixI(GOffd);
1471 
1472    /*-----------------------------------------------------------------
1473     * allocate status arrays
1474     *-----------------------------------------------------------------*/
1475 
1476    if (localNRows > 0)
1477    {
1478       node2aggr = new int[localNRows];
1479       aggrSizes = new int[localNRows];
1480       nodeStat  = new int[localNRows];
1481       bdrySet   = new int[localNRows];
1482       for ( irow = 0; irow < localNRows; irow++ )
1483       {
1484          aggrSizes[irow] = 0;
1485          node2aggr[irow] = -1;
1486          nodeStat[irow]  = MLI_METHOD_AMGSA_READY;
1487          bdrySet[irow]   = 0;
1488       }
1489    }
1490    else node2aggr = aggrSizes = nodeStat = bdrySet = NULL;
1491 
1492    /*-----------------------------------------------------------------
1493     * search for zero rows and rows near the processor boundaries
1494     *-----------------------------------------------------------------*/
1495 
1496    for ( irow = 0; irow < localNRows; irow++ )
1497    {
1498       rowLeng = GDiagI[irow+1] - GDiagI[irow];
1499       if (rowLeng <= 0)
1500       {
1501          nodeStat[irow] = MLI_METHOD_AMGSA_NOTSELECTED;
1502          nNotSelected++;
1503       }
1504       if (GOffdI != NULL && (GOffdI[irow+1] - GOffdI[irow]) > 0)
1505          bdrySet[irow] = 1;
1506    }
1507 
1508    /*-----------------------------------------------------------------
1509     * Phase 0 : form aggregates near the boundary (aggregates should be
1510     *           as small as possible, but has to satisfy min size.
1511     *           The algorithm gives preference to nodes on boundaries.)
1512     *-----------------------------------------------------------------*/
1513 
1514    localMinSize = nullspaceDim_ / currNodeDofs_ * 2;
1515    for ( irow = 0; irow < localNRows; irow++ )
1516    {
1517       if ( nodeStat[irow] == MLI_METHOD_AMGSA_READY && bdrySet[irow] == 1 )
1518       {
1519          nSelected++;
1520          node2aggr[irow]  = - naggr - 1;
1521          aggrSizes[naggr] = 1;
1522          nodeStat[irow]  = MLI_METHOD_AMGSA_SELECTED2;
1523          if (localMinSize > 1)
1524          {
1525             rowLeng = GDiagI[irow+1] - GDiagI[irow];
1526             cols = &(GDiagJ[GDiagI[irow]]);
1527             jcol = 0;
1528             while ( aggrSizes[naggr] < localMinSize && jcol < rowLeng )
1529             {
1530                index = cols[jcol];
1531                if ( index != irow && (bdrySet[index] != 1) &&
1532                     nodeStat[irow] == MLI_METHOD_AMGSA_READY )
1533                {
1534                   node2aggr[index] = naggr;
1535                   aggrSizes[naggr]++;
1536                   nodeStat[index]  = MLI_METHOD_AMGSA_SELECTED2;
1537                }
1538                jcol++;
1539             }
1540          }
1541          naggr++;
1542       }
1543    }
1544    itmp[0] = naggr;
1545    itmp[1] = nSelected;
1546    if (outputLevel_ > 1) MPI_Allreduce(itmp, ibuf, 2, MPI_INT, MPI_SUM, comm);
1547    if ( mypid == 0 && outputLevel_ > 1 )
1548    {
1549       printf("\t*** Aggregation(G) P0 : no. of aggregates     = %d\n",ibuf[0]);
1550       printf("\t*** Aggregation(G) P0 : no. nodes aggregated  = %d\n",ibuf[1]);
1551    }
1552 
1553    // search for seed node with largest number of neighbors
1554 
1555    maxInd   = -1;
1556    maxCount = -1;
1557    for ( irow = 0; irow < localNRows; irow++ )
1558    {
1559       if ( nodeStat[irow] == MLI_METHOD_AMGSA_READY )
1560       {
1561          count = 0;
1562          rowLeng = GDiagI[irow+1] - GDiagI[irow];
1563          cols = &(GDiagJ[GDiagI[irow]]);
1564          for ( jcol = 0; jcol < rowLeng; jcol++ )
1565          {
1566             index = cols[jcol];
1567             if ( nodeStat[index] == MLI_METHOD_AMGSA_READY ) count++;
1568          }
1569          if ( count > maxCount )
1570          {
1571             maxCount = count;
1572             maxInd = irow;
1573          }
1574       }
1575    }
1576 
1577    /*-----------------------------------------------------------------
1578     * Phase 1 : form aggregates
1579     *-----------------------------------------------------------------*/
1580 
1581    for ( irow = 0; irow < localNRows; irow++ )
1582    {
1583       if ( nodeStat[irow] == MLI_METHOD_AMGSA_READY )
1584       {
1585          rowLeng = GDiagI[irow+1] - GDiagI[irow];
1586          cols = &(GDiagJ[GDiagI[irow]]);
1587          selectFlag = 1;
1588          count      = 1;
1589          for ( jcol = 0; jcol < rowLeng; jcol++ )
1590          {
1591             colNum = cols[jcol];
1592             if ( nodeStat[colNum] != MLI_METHOD_AMGSA_READY )
1593             {
1594                selectFlag = 0;
1595                break;
1596             }
1597             else count++;
1598          }
1599          if ( selectFlag == 1 && count >= minAggrSize_ )
1600          {
1601             aggrSizes[naggr] = 0;
1602             for ( jcol = 0; jcol < rowLeng; jcol++ )
1603             {
1604                colNum = cols[jcol];
1605                node2aggr[colNum] = naggr;
1606                nodeStat[colNum] = MLI_METHOD_AMGSA_SELECTED;
1607                aggrSizes[naggr]++;
1608                nSelected++;
1609             }
1610             naggr++;
1611          }
1612       }
1613    }
1614    itmp[0] = naggr;
1615    itmp[1] = nSelected;
1616    if (outputLevel_ > 1) MPI_Allreduce(itmp, ibuf, 2, MPI_INT, MPI_SUM, comm);
1617    if ( mypid == 0 && outputLevel_ > 1 )
1618    {
1619       printf("\t*** Aggregation(G) P1 : no. of aggregates     = %d\n",ibuf[0]);
1620       printf("\t*** Aggregation(G) P1 : no. nodes aggregated  = %d\n",ibuf[1]);
1621    }
1622 
1623    /*-----------------------------------------------------------------
1624     * Phase 2 : put the rest into one of the existing aggregates
1625     *-----------------------------------------------------------------*/
1626 
1627    if ( (nSelected+nNotSelected) < localNRows )
1628    {
1629       for ( irow = 0; irow < localNRows; irow++ )
1630       {
1631          if ( nodeStat[irow] == MLI_METHOD_AMGSA_READY )
1632          {
1633             rowLeng = GDiagI[irow+1] - GDiagI[irow];
1634             cols    = &(GDiagJ[GDiagI[irow]]);
1635             vals    = &(GDiagA[GDiagI[irow]]);
1636             maxInd  = -1;
1637             maxVal  = 0.0;
1638             for ( jcol = 0; jcol < rowLeng; jcol++ )
1639             {
1640                colNum = cols[jcol];
1641                if ( nodeStat[colNum] == MLI_METHOD_AMGSA_SELECTED )
1642                {
1643                   if (vals[jcol] > maxVal)
1644                   {
1645                      maxInd = colNum;
1646                      maxVal = vals[jcol];
1647                   }
1648                }
1649             }
1650             if ( maxInd != -1 )
1651             {
1652                node2aggr[irow] = node2aggr[maxInd];
1653                nodeStat[irow] = MLI_METHOD_AMGSA_PENDING;
1654                aggrSizes[node2aggr[maxInd]]++;
1655             }
1656          }
1657       }
1658       for ( irow = 0; irow < localNRows; irow++ )
1659       {
1660          if ( nodeStat[irow] == MLI_METHOD_AMGSA_PENDING )
1661          {
1662             nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED;
1663             nSelected++;
1664          }
1665       }
1666    }
1667    itmp[0] = naggr;
1668    itmp[1] = nSelected;
1669    if (outputLevel_ > 1) MPI_Allreduce(itmp,ibuf,2,MPI_INT,MPI_SUM,comm);
1670    if ( mypid == 0 && outputLevel_ > 1 )
1671    {
1672       printf("\t*** Aggregation(G) P2 : no. of aggregates     = %d\n",ibuf[0]);
1673       printf("\t*** Aggregation(G) P2 : no. nodes aggregated  = %d\n",ibuf[1]);
1674    }
1675 
1676    /*-----------------------------------------------------------------
1677     * Phase 3 : form aggregates for all other rows
1678     *-----------------------------------------------------------------*/
1679 
1680    if ( (nSelected+nNotSelected) < localNRows )
1681    {
1682       for ( irow = 0; irow < localNRows; irow++ )
1683       {
1684          if ( nodeStat[irow] == MLI_METHOD_AMGSA_READY )
1685          {
1686             rowLeng = GDiagI[irow+1] - GDiagI[irow];
1687             cols    = &(GDiagJ[GDiagI[irow]]);
1688             count = 1;
1689             for ( jcol = 0; jcol < rowLeng; jcol++ )
1690             {
1691                colNum = cols[jcol];
1692                if ( nodeStat[colNum] == MLI_METHOD_AMGSA_READY ) count++;
1693             }
1694             if ( count > 1 && count >= minAggrSize_ )
1695             {
1696                aggrSizes[naggr] = 0;
1697                for ( jcol = 0; jcol < rowLeng; jcol++ )
1698                {
1699                   colNum = cols[jcol];
1700                   if ( nodeStat[colNum] == MLI_METHOD_AMGSA_READY )
1701                   {
1702                      nodeStat[colNum] = MLI_METHOD_AMGSA_SELECTED;
1703                      node2aggr[colNum] = naggr;
1704                      aggrSizes[naggr]++;
1705                      nSelected++;
1706                   }
1707                }
1708                naggr++;
1709             }
1710          }
1711       }
1712    }
1713    itmp[0] = naggr;
1714    itmp[1] = nSelected;
1715    if (outputLevel_ > 1) MPI_Allreduce(itmp,ibuf,2,MPI_INT,MPI_SUM,comm);
1716    if ( mypid == 0 && outputLevel_ > 1 )
1717    {
1718       printf("\t*** Aggregation(G) P3 : no. of aggregates     = %d\n",ibuf[0]);
1719       printf("\t*** Aggregation(G) P3 : no. nodes aggregated  = %d\n",ibuf[1]);
1720    }
1721 
1722    /*-----------------------------------------------------------------
1723     * Phase 4 : finally put all lone rows into some neighbor aggregate
1724     *-----------------------------------------------------------------*/
1725 
1726    if ( (nSelected+nNotSelected) < localNRows )
1727    {
1728       for ( irow = 0; irow < localNRows; irow++ )
1729       {
1730          if ( nodeStat[irow] == MLI_METHOD_AMGSA_READY )
1731          {
1732             rowLeng = GDiagI[irow+1] - GDiagI[irow];
1733             cols    = &(GDiagJ[GDiagI[irow]]);
1734             for ( jcol = 0; jcol < rowLeng; jcol++ )
1735             {
1736                colNum = cols[jcol];
1737                if ( nodeStat[colNum] == MLI_METHOD_AMGSA_SELECTED )
1738                {
1739                   node2aggr[irow] = node2aggr[colNum];
1740                   nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED;
1741                   aggrSizes[node2aggr[colNum]]++;
1742                   nSelected++;
1743                   break;
1744                }
1745             }
1746          }
1747       }
1748    }
1749    itmp[0] = naggr;
1750    itmp[1] = nSelected;
1751    if ( outputLevel_ > 1 ) MPI_Allreduce(itmp,ibuf,2,MPI_INT,MPI_SUM,comm);
1752    if ( mypid == 0 && outputLevel_ > 1 )
1753    {
1754       printf("\t*** Aggregation(G) P4 : no. of aggregates     = %d\n",ibuf[0]);
1755       printf("\t*** Aggregation(G) P4 : no. nodes aggregated  = %d\n",ibuf[1]);
1756    }
1757    nUndone = localNRows - nSelected - nNotSelected;
1758 //if ( nUndone > 0 )
1759    if ( nUndone > localNRows )
1760    {
1761       count = nUndone / minAggrSize_;
1762       if ( count == 0 ) count = 1;
1763       count += naggr;
1764       irow = jcol = 0;
1765       while ( nUndone > 0 )
1766       {
1767          if ( nodeStat[irow] == MLI_METHOD_AMGSA_READY )
1768          {
1769             node2aggr[irow] = naggr;
1770             nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED;
1771             nUndone--;
1772             nSelected++;
1773             jcol++;
1774             if ( jcol >= minAggrSize_ && naggr < count-1 )
1775             {
1776                jcol = 0;
1777                naggr++;
1778             }
1779          }
1780          irow++;
1781       }
1782       naggr = count;
1783    }
1784    itmp[0] = naggr;
1785    itmp[1] = nSelected;
1786    if ( outputLevel_ > 1 ) MPI_Allreduce(itmp,ibuf,2,MPI_INT,MPI_SUM,comm);
1787    if ( mypid == 0 && outputLevel_ > 1 )
1788    {
1789       printf("\t*** Aggregation(G) P5 : no. of aggregates     = %d\n",ibuf[0]);
1790       printf("\t*** Aggregation(G) P5 : no. nodes aggregated  = %d\n",ibuf[1]);
1791    }
1792 
1793    /*-----------------------------------------------------------------
1794     * diagnostics
1795     *-----------------------------------------------------------------*/
1796 
1797    if ( (nSelected+nNotSelected) < localNRows )
1798    {
1799 #ifdef MLI_DEBUG_DETAILED
1800       for ( irow = 0; irow < localNRows; irow++ )
1801       {
1802          if ( nodeStat[irow] == MLI_METHOD_AMGSA_READY )
1803          {
1804             rowNum = startRow + irow;
1805             printf("%5d : unaggregated node = %8d\n", mypid, rowNum);
1806             hypre_ParCSRMatrixGetRow(hypreG,rowNum,&rowLeng,&cols,NULL);
1807             for ( jcol = 0; jcol < rowLeng; jcol++ )
1808             {
1809                colNum = cols[jcol];
1810                printf("ERROR : neighbor of unselected node %9d = %9d\n",
1811                      rowNum, colNum);
1812             }
1813             hypre_ParCSRMatrixRestoreRow(hypreG,rowNum,&rowLeng,&cols,NULL);
1814          }
1815       }
1816 #else
1817       printf("%5d : ERROR - not all nodes aggregated.\n", mypid);
1818       exit(1);
1819 #endif
1820    }
1821 
1822    /*-----------------------------------------------------------------
1823     * clean up and initialize the output arrays
1824     *-----------------------------------------------------------------*/
1825 
1826    if (localNRows > 0) delete [] aggrSizes;
1827    if (localNRows > 0) delete [] nodeStat;
1828    if (localNRows == 1 && naggr == 0)
1829    {
1830       node2aggr[0] = 0;
1831       naggr = 1;
1832    }
1833    (*bdryData)     = bdrySet;
1834    (*mliAggrArray) = node2aggr;
1835    (*mliAggrLeng)  = naggr;
1836    return 0;
1837 }
1838 
1839 // ************************************************************************
1840 // Purpose : Given Amat, perform preferential coarsening (no coarsening
1841 //           when the bdry flag = 1
1842 // (called by setupExtendedDomainDecomp)
1843 // ------------------------------------------------------------------------
1844 
genP_Selective(MLI_Matrix * mli_Amat,MLI_Matrix ** PmatOut,int ALen,int * bdryData)1845 double MLI_Method_AMGSA::genP_Selective(MLI_Matrix *mli_Amat,
1846                   MLI_Matrix **PmatOut, int ALen, int *bdryData)
1847 {
1848    int    mypid, nprocs, *partition, AStartRow, AEndRow, ALocalNRows;
1849    int    blkSize, naggr, *node2aggr, ierr, PLocalNCols, PStartCol;
1850    int    PLocalNRows, PStartRow, *eqn2aggr, irow, jcol, ig;
1851    int    *PCols, maxAggSize, *aggCntArray, index, **aggIndArray;
1852    int    aggSize, nzcnt, *rowLengths, rowNum, *colInd, *compressBdryData;
1853    double **PVecs, *newNull, *qArray, *rArray, *colVal;
1854    char   paramString[200];
1855    HYPRE_IJMatrix      IJPmat;
1856    hypre_ParCSRMatrix  *Amat, *A2mat, *Pmat;
1857    MLI_Matrix          *mli_A2mat=NULL, *mli_Pmat;
1858    MPI_Comm            comm;
1859    hypre_ParCSRCommPkg *commPkg;
1860    MLI_Function        *funcPtr;
1861 
1862    /*-----------------------------------------------------------------
1863     * fetch matrix information
1864     *-----------------------------------------------------------------*/
1865 
1866    Amat = (hypre_ParCSRMatrix *) mli_Amat->getMatrix();
1867    comm = hypre_ParCSRMatrixComm(Amat);
1868    MPI_Comm_rank(comm,&mypid);
1869    MPI_Comm_size(comm,&nprocs);
1870    HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) Amat,&partition);
1871    AStartRow = partition[mypid];
1872    AEndRow   = partition[mypid+1] - 1;
1873    ALocalNRows = AEndRow - AStartRow + 1;
1874    free(partition);
1875 
1876    /*-----------------------------------------------------------------
1877     * reduce Amat based on the block size information (if nodeDofs_ > 1)
1878     *-----------------------------------------------------------------*/
1879 
1880    blkSize = currNodeDofs_;
1881    if (blkSize > 1) MLI_Matrix_Compress(mli_Amat, blkSize, &mli_A2mat);
1882    else             mli_A2mat = mli_Amat;
1883    A2mat = (hypre_ParCSRMatrix *) mli_A2mat->getMatrix();
1884 
1885    /*-----------------------------------------------------------------
1886     * modify minimum aggregate size, if needed
1887     *-----------------------------------------------------------------*/
1888 
1889    minAggrSize_ = nullspaceDim_ / currNodeDofs_;
1890    if (minAggrSize_ <= 1) minAggrSize_ = 2;
1891 
1892    /*-----------------------------------------------------------------
1893     * perform coarsening (no aggregation on processor boundaries)
1894     *-----------------------------------------------------------------*/
1895 
1896    if (blkSize > 1)
1897    {
1898       compressBdryData = new int[ALocalNRows/blkSize];
1899       for (irow = 0; irow < ALocalNRows; irow+=blkSize)
1900          compressBdryData[irow/blkSize] = bdryData[irow];
1901    }
1902    else compressBdryData = bdryData;
1903 
1904    coarsenSelective(A2mat, &naggr, &node2aggr, bdryData);
1905    if (blkSize > 1 && mli_A2mat != NULL) delete mli_A2mat;
1906    if (blkSize > 1) delete [] compressBdryData;
1907 
1908    /*-----------------------------------------------------------------
1909     * fetch the coarse grid information and instantiate P
1910     *-----------------------------------------------------------------*/
1911 
1912    PLocalNCols = naggr * nullspaceDim_;
1913    MLI_Utils_GenPartition(comm, PLocalNCols, &partition);
1914    PStartCol = partition[mypid];
1915    free( partition );
1916    PLocalNRows = ALocalNRows;
1917    PStartRow   = AStartRow;
1918    ierr = HYPRE_IJMatrixCreate(comm,PStartRow,PStartRow+PLocalNRows-1,
1919                           PStartCol,PStartCol+PLocalNCols-1,&IJPmat);
1920    ierr = HYPRE_IJMatrixSetObjectType(IJPmat, HYPRE_PARCSR);
1921    hypre_assert(!ierr);
1922 
1923    /*-----------------------------------------------------------------
1924     * expand the aggregation information if block size > 1 ==> eqn2aggr
1925     *-----------------------------------------------------------------*/
1926 
1927    if ( blkSize > 1 )
1928    {
1929       eqn2aggr = new int[ALocalNRows];
1930       for ( irow = 0; irow < ALocalNRows; irow++ )
1931          eqn2aggr[irow] = node2aggr[irow/blkSize];
1932       delete [] node2aggr;
1933    }
1934    else eqn2aggr = node2aggr;
1935 
1936    /*-----------------------------------------------------------------
1937     * create a compact form for the null space vectors
1938     * (get ready to perform QR on them)
1939     *-----------------------------------------------------------------*/
1940 
1941    PVecs = new double*[nullspaceDim_];
1942    PCols = new int[PLocalNRows];
1943    for (irow = 0; irow < nullspaceDim_; irow++)
1944       PVecs[irow] = new double[PLocalNRows];
1945    for ( irow = 0; irow < PLocalNRows; irow++ )
1946    {
1947       if ( eqn2aggr[irow] >= 0 )
1948            PCols[irow] = PStartCol + eqn2aggr[irow] * nullspaceDim_;
1949       else PCols[irow] = PStartCol + (-eqn2aggr[irow]-1) * nullspaceDim_;
1950       if ( nullspaceVec_ != NULL )
1951       {
1952          for ( jcol = 0; jcol < nullspaceDim_; jcol++ )
1953          PVecs[jcol][irow] = nullspaceVec_[jcol*PLocalNRows+irow];
1954       }
1955       else
1956       {
1957          for ( jcol = 0; jcol < nullspaceDim_; jcol++ )
1958          {
1959             if (irow % nullspaceDim_ == jcol) PVecs[jcol][irow] = 1.0;
1960             else                              PVecs[jcol][irow] = 0.0;
1961          }
1962       }
1963    }
1964 
1965    /*-----------------------------------------------------------------
1966     * perform QR for null space
1967     *-----------------------------------------------------------------*/
1968 
1969    newNull = NULL;
1970    if ( PLocalNRows > 0 )
1971    {
1972       /* ------ count the size of each aggregate ------ */
1973 
1974       aggCntArray = new int[naggr];
1975       for ( ig = 0; ig < naggr; ig++ ) aggCntArray[ig] = 0;
1976       for ( irow = 0; irow < PLocalNRows; irow++ )
1977          if ( eqn2aggr[irow] >= 0 ) aggCntArray[eqn2aggr[irow]]++;
1978          else                       aggCntArray[(-eqn2aggr[irow]-1)]++;
1979       maxAggSize = 0;
1980       for ( ig = 0; ig < naggr; ig++ )
1981          if (aggCntArray[ig] > maxAggSize) maxAggSize = aggCntArray[ig];
1982 
1983       /* ------ register which equation is in which aggregate ------ */
1984 
1985       aggIndArray = new int*[naggr];
1986       for ( ig = 0; ig < naggr; ig++ )
1987       {
1988          aggIndArray[ig] = new int[aggCntArray[ig]];
1989          aggCntArray[ig] = 0;
1990       }
1991       for ( irow = 0; irow < PLocalNRows; irow++ )
1992       {
1993          index = eqn2aggr[irow];
1994          if ( index >= 0 )
1995             aggIndArray[index][aggCntArray[index]++] = irow;
1996          else
1997             aggIndArray[-index-1][aggCntArray[-index-1]++] = irow;
1998       }
1999 
2000       /* ------ allocate storage for QR factorization ------ */
2001 
2002       qArray  = new double[maxAggSize * nullspaceDim_];
2003       rArray  = new double[nullspaceDim_ * nullspaceDim_];
2004       newNull = new double[naggr*nullspaceDim_*nullspaceDim_];
2005 
2006       /* ------ perform QR on each aggregate ------ */
2007 
2008       for ( ig = 0; ig < naggr; ig++ )
2009       {
2010          aggSize = aggCntArray[ig];
2011 
2012          if ( aggSize < nullspaceDim_ )
2013          {
2014             printf("Aggregation ERROR : underdetermined system in QR.\n");
2015             printf("            error on Proc %d\n", mypid);
2016             printf("            error on aggr %d (%d)\n", ig, naggr);
2017             printf("            aggr size is %d\n", aggSize);
2018             exit(1);
2019          }
2020 
2021          /* ------ put data into the temporary array ------ */
2022 
2023          for ( jcol = 0; jcol < aggSize; jcol++ )
2024          {
2025             for ( irow = 0; irow < nullspaceDim_; irow++ )
2026                qArray[aggSize*irow+jcol] = PVecs[irow][aggIndArray[ig][jcol]];
2027          }
2028 
2029          /* ------ after QR, put the R into the next null space ------ */
2030 
2031          for ( jcol = 0; jcol < nullspaceDim_; jcol++ )
2032             for ( irow = 0; irow < nullspaceDim_; irow++ )
2033                if ( irow == jcol )
2034                   newNull[ig*nullspaceDim_+jcol+irow*naggr*nullspaceDim_]=1.0;
2035                else
2036                   newNull[ig*nullspaceDim_+jcol+irow*naggr*nullspaceDim_]=0.0;
2037 
2038          /* ------ put the P to PVecs ------ */
2039 
2040          for ( jcol = 0; jcol < aggSize; jcol++ )
2041          {
2042             for ( irow = 0; irow < nullspaceDim_; irow++ )
2043             {
2044                index = aggIndArray[ig][jcol];
2045                PVecs[irow][index] = qArray[ irow*aggSize + jcol ];
2046             }
2047          }
2048       }
2049       for ( ig = 0; ig < naggr; ig++ ) delete [] aggIndArray[ig];
2050       delete [] aggIndArray;
2051       delete [] aggCntArray;
2052       delete [] qArray;
2053       delete [] rArray;
2054    }
2055    if ( nullspaceVec_ != NULL ) delete [] nullspaceVec_;
2056    nullspaceVec_ = newNull;
2057 
2058    /*-----------------------------------------------------------------
2059     * initialize Pmat
2060     *-----------------------------------------------------------------*/
2061 
2062    rowLengths = new int[PLocalNRows];
2063    for ( irow = 0; irow < PLocalNRows; irow++ )
2064       rowLengths[irow] = nullspaceDim_;
2065    ierr = HYPRE_IJMatrixSetRowSizes(IJPmat, rowLengths);
2066    ierr = HYPRE_IJMatrixInitialize(IJPmat);
2067    hypre_assert(!ierr);
2068    delete [] rowLengths;
2069 
2070    /*--------------------------------------------------------------------
2071     * load and assemble Pmat
2072     *--------------------------------------------------------------------*/
2073 
2074    colInd = new int[nullspaceDim_];
2075    colVal = new double[nullspaceDim_];
2076    for ( irow = 0; irow < PLocalNRows; irow++ )
2077    {
2078       if ( PCols[irow] >= 0 )
2079       {
2080          nzcnt = 0;
2081          for ( jcol = 0; jcol < nullspaceDim_; jcol++ )
2082          {
2083             if ( PVecs[jcol][irow] != 0.0 )
2084             {
2085                colInd[nzcnt] = PCols[irow] + jcol;
2086                colVal[nzcnt++] = PVecs[jcol][irow];
2087             }
2088          }
2089          rowNum = PStartRow + irow;
2090          HYPRE_IJMatrixSetValues(IJPmat, 1, &nzcnt,
2091                              (const int *) &rowNum, (const int *) colInd,
2092                              (const double *) colVal);
2093       }
2094    }
2095    ierr = HYPRE_IJMatrixAssemble(IJPmat);
2096    hypre_assert( !ierr );
2097    HYPRE_IJMatrixGetObject(IJPmat, (void **) &Pmat);
2098    hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) Pmat);
2099    commPkg = hypre_ParCSRMatrixCommPkg(Amat);
2100    if (!commPkg) hypre_MatvecCommPkgCreate(Amat);
2101    HYPRE_IJMatrixSetObjectType(IJPmat, -1);
2102    HYPRE_IJMatrixDestroy( IJPmat );
2103    delete [] colInd;
2104    delete [] colVal;
2105 
2106    /*-----------------------------------------------------------------
2107     * clean up
2108     *-----------------------------------------------------------------*/
2109 
2110    if (PCols != NULL) delete [] PCols;
2111    if (PVecs != NULL)
2112    {
2113       for (irow = 0; irow < nullspaceDim_; irow++)
2114          if (PVecs[irow] != NULL) delete [] PVecs[irow];
2115       delete [] PVecs;
2116    }
2117    delete [] eqn2aggr;
2118 
2119    /*-----------------------------------------------------------------
2120     * set up and return Pmat
2121     *-----------------------------------------------------------------*/
2122 
2123    funcPtr = new MLI_Function();
2124    MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr);
2125    sprintf(paramString, "HYPRE_ParCSR" );
2126    mli_Pmat = new MLI_Matrix( Pmat, paramString, funcPtr );
2127    (*PmatOut) = mli_Pmat;
2128    delete funcPtr;
2129    return 0.0;
2130 }
2131 
2132 // *********************************************************************
2133 // selective coarsening scheme (Given a graph, aggregate on the local
2134 // subgraph but no aggregation near processor boundaries)
2135 // (called by setupExtendedDomainDecomp/genP_Selective)
2136 // ---------------------------------------------------------------------
2137 
coarsenSelective(hypre_ParCSRMatrix * hypreG,int * naggrOut,int ** aggrInfoOut,int * bdryData)2138 int MLI_Method_AMGSA::coarsenSelective(hypre_ParCSRMatrix *hypreG,
2139                          int *naggrOut, int **aggrInfoOut, int *bdryData)
2140 {
2141    MPI_Comm  comm;
2142    int       mypid, nprocs, *partition, startRow, endRow, maxInd;
2143    int       localNRows, naggr=0, *node2aggr, *aggrSizes, nUndone;
2144    int       irow, jcol, colNum, rowLeng, *cols, globalNRows;
2145    int       *nodeStat, selectFlag, nSelected=0, nNotSelected=0, count;
2146    int       *GDiagI, *GDiagJ;
2147    double    maxVal, *vals, *GDiagA;
2148    hypre_CSRMatrix *GDiag;
2149 #ifdef MLI_DEBUG_DETAILED
2150    int       rowNum;
2151 #endif
2152 
2153    /*-----------------------------------------------------------------
2154     * fetch machine and matrix parameters
2155     *-----------------------------------------------------------------*/
2156 
2157    comm = hypre_ParCSRMatrixComm(hypreG);
2158    MPI_Comm_rank(comm,&mypid);
2159    MPI_Comm_size(comm,&nprocs);
2160    HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreG,
2161                                         &partition);
2162    startRow = partition[mypid];
2163    endRow   = partition[mypid+1] - 1;
2164    free( partition );
2165    localNRows = endRow - startRow + 1;
2166    MPI_Allreduce(&localNRows, &globalNRows, 1, MPI_INT, MPI_SUM, comm);
2167    if ( mypid == 0 && outputLevel_ > 1 )
2168    {
2169       printf("\t*** Aggregation(G) : total nodes to aggregate = %d\n",
2170              globalNRows);
2171    }
2172    GDiag  = hypre_ParCSRMatrixDiag(hypreG);
2173    GDiagI = hypre_CSRMatrixI(GDiag);
2174    GDiagJ = hypre_CSRMatrixJ(GDiag);
2175    GDiagA = hypre_CSRMatrixData(GDiag);
2176 
2177    /*-----------------------------------------------------------------
2178     * allocate status arrays
2179     *-----------------------------------------------------------------*/
2180 
2181    if (localNRows > 0)
2182    {
2183       node2aggr = new int[localNRows];
2184       aggrSizes = new int[localNRows];
2185       nodeStat  = new int[localNRows];
2186       for ( irow = 0; irow < localNRows; irow++ )
2187       {
2188          if (bdryData[irow] == 1)
2189          {
2190             nodeStat[irow]  = MLI_METHOD_AMGSA_SELECTED;
2191             node2aggr[irow] = naggr++;
2192             aggrSizes[irow] = 1;
2193             nSelected++;
2194          }
2195          else
2196          {
2197             nodeStat[irow]  = MLI_METHOD_AMGSA_READY;
2198             node2aggr[irow] = -1;
2199             aggrSizes[irow] = 0;
2200          }
2201       }
2202    }
2203    else node2aggr = aggrSizes = nodeStat = NULL;
2204 
2205    /*-----------------------------------------------------------------
2206     * search for zero rows and rows near the processor boundaries
2207     *-----------------------------------------------------------------*/
2208 
2209    for ( irow = 0; irow < localNRows; irow++ )
2210    {
2211       rowLeng = GDiagI[irow+1] - GDiagI[irow];
2212       if (rowLeng <= 0)
2213       {
2214          nodeStat[irow] = MLI_METHOD_AMGSA_NOTSELECTED;
2215          nNotSelected++;
2216       }
2217    }
2218 
2219    /*-----------------------------------------------------------------
2220     * Phase 1 : form aggregates
2221     *-----------------------------------------------------------------*/
2222 
2223    for (irow = 0; irow < localNRows; irow++)
2224    {
2225       if (nodeStat[irow] == MLI_METHOD_AMGSA_READY)
2226       {
2227          rowLeng = GDiagI[irow+1] - GDiagI[irow];
2228          cols = &(GDiagJ[GDiagI[irow]]);
2229          selectFlag = 1;
2230          count      = 1;
2231          for ( jcol = 0; jcol < rowLeng; jcol++ )
2232          {
2233             colNum = cols[jcol];
2234             if (nodeStat[colNum] != MLI_METHOD_AMGSA_READY)
2235             {
2236                selectFlag = 0;
2237                break;
2238             }
2239             else count++;
2240          }
2241          if (selectFlag == 1 && count >= minAggrSize_)
2242          {
2243             aggrSizes[naggr] = 0;
2244             for ( jcol = 0; jcol < rowLeng; jcol++ )
2245             {
2246                colNum = cols[jcol];
2247                node2aggr[colNum] = naggr;
2248                nodeStat[colNum] = MLI_METHOD_AMGSA_SELECTED;
2249                aggrSizes[naggr]++;
2250                nSelected++;
2251             }
2252             naggr++;
2253          }
2254       }
2255    }
2256 
2257    /*-----------------------------------------------------------------
2258     * Phase 2 : put the rest into one of the existing aggregates
2259     *-----------------------------------------------------------------*/
2260 
2261    if ((nSelected+nNotSelected) < localNRows)
2262    {
2263       for (irow = 0; irow < localNRows; irow++)
2264       {
2265          if (nodeStat[irow] == MLI_METHOD_AMGSA_READY)
2266          {
2267             rowLeng = GDiagI[irow+1] - GDiagI[irow];
2268             cols    = &(GDiagJ[GDiagI[irow]]);
2269             vals    = &(GDiagA[GDiagI[irow]]);
2270             maxInd  = -1;
2271             maxVal  = 0.0;
2272             for (jcol = 0; jcol < rowLeng; jcol++)
2273             {
2274                colNum = cols[jcol];
2275                if (nodeStat[colNum] == MLI_METHOD_AMGSA_SELECTED)
2276                {
2277                   if (vals[jcol] > maxVal)
2278                   {
2279                      maxInd = colNum;
2280                      maxVal = vals[jcol];
2281                   }
2282                }
2283             }
2284             if (maxInd != -1)
2285             {
2286                node2aggr[irow] = node2aggr[maxInd];
2287                nodeStat[irow] = MLI_METHOD_AMGSA_PENDING;
2288                aggrSizes[node2aggr[maxInd]]++;
2289             }
2290          }
2291       }
2292       for (irow = 0; irow < localNRows; irow++)
2293       {
2294          if (nodeStat[irow] == MLI_METHOD_AMGSA_PENDING)
2295          {
2296             nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED;
2297             nSelected++;
2298          }
2299       }
2300    }
2301 
2302    /*-----------------------------------------------------------------
2303     * Phase 3 : form aggregates for all other rows
2304     *-----------------------------------------------------------------*/
2305 
2306    if ((nSelected+nNotSelected) < localNRows)
2307    {
2308       for (irow = 0; irow < localNRows; irow++)
2309       {
2310          if (nodeStat[irow] == MLI_METHOD_AMGSA_READY)
2311          {
2312             rowLeng = GDiagI[irow+1] - GDiagI[irow];
2313             cols    = &(GDiagJ[GDiagI[irow]]);
2314             count = 1;
2315             for (jcol = 0; jcol < rowLeng; jcol++)
2316             {
2317                colNum = cols[jcol];
2318                if (nodeStat[colNum] == MLI_METHOD_AMGSA_READY) count++;
2319             }
2320             if (count > 1 && count >= minAggrSize_)
2321             {
2322                aggrSizes[naggr] = 0;
2323                for (jcol = 0; jcol < rowLeng; jcol++)
2324                {
2325                   colNum = cols[jcol];
2326                   if (nodeStat[colNum] == MLI_METHOD_AMGSA_READY)
2327                   {
2328                      nodeStat[colNum] = MLI_METHOD_AMGSA_SELECTED;
2329                      node2aggr[colNum] = naggr;
2330                      aggrSizes[naggr]++;
2331                      nSelected++;
2332                   }
2333                }
2334                naggr++;
2335             }
2336          }
2337       }
2338    }
2339 
2340    /*-----------------------------------------------------------------
2341     * Phase 4 : finally put all lone rows into some neighbor aggregate
2342     *-----------------------------------------------------------------*/
2343 
2344    if ((nSelected+nNotSelected) < localNRows)
2345    {
2346       for (irow = 0; irow < localNRows; irow++)
2347       {
2348          if (nodeStat[irow] == MLI_METHOD_AMGSA_READY)
2349          {
2350             rowLeng = GDiagI[irow+1] - GDiagI[irow];
2351             cols    = &(GDiagJ[GDiagI[irow]]);
2352             for (jcol = 0; jcol < rowLeng; jcol++)
2353             {
2354                colNum = cols[jcol];
2355                if (nodeStat[colNum] == MLI_METHOD_AMGSA_SELECTED)
2356                {
2357                   node2aggr[irow] = node2aggr[colNum];
2358                   nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED;
2359                   aggrSizes[node2aggr[colNum]]++;
2360                   nSelected++;
2361                   break;
2362                }
2363             }
2364          }
2365       }
2366    }
2367    nUndone = localNRows - nSelected - nNotSelected;
2368 //if ( nUndone > 0 )
2369    if ( nUndone > localNRows )
2370    {
2371       count = nUndone / minAggrSize_;
2372       if ( count == 0 ) count = 1;
2373       count += naggr;
2374       irow = jcol = 0;
2375       while ( nUndone > 0 )
2376       {
2377          if ( nodeStat[irow] == MLI_METHOD_AMGSA_READY )
2378          {
2379             node2aggr[irow] = naggr;
2380             nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED;
2381             nUndone--;
2382             nSelected++;
2383             jcol++;
2384             if ( jcol >= minAggrSize_ && naggr < count-1 )
2385             {
2386                jcol = 0;
2387                naggr++;
2388             }
2389          }
2390          irow++;
2391       }
2392       naggr = count;
2393    }
2394 
2395    /*-----------------------------------------------------------------
2396     * diagnostics
2397     *-----------------------------------------------------------------*/
2398 
2399    if ((nSelected+nNotSelected) < localNRows)
2400    {
2401 #ifdef MLI_DEBUG_DETAILED
2402       for (irow = 0; irow < localNRows; irow++)
2403       {
2404          if (nodeStat[irow] == MLI_METHOD_AMGSA_READY)
2405          {
2406             rowNum = startRow + irow;
2407             printf("%5d : unaggregated node = %8d\n", mypid, rowNum);
2408             hypre_ParCSRMatrixGetRow(hypreG,rowNum,&rowLeng,&cols,NULL);
2409             for ( jcol = 0; jcol < rowLeng; jcol++ )
2410             {
2411                colNum = cols[jcol];
2412                printf("ERROR : neighbor of unselected node %9d = %9d\n",
2413                      rowNum, colNum);
2414             }
2415             hypre_ParCSRMatrixRestoreRow(hypreG,rowNum,&rowLeng,&cols,NULL);
2416          }
2417       }
2418 #else
2419       printf("%5d : ERROR - not all nodes aggregated.\n", mypid);
2420       exit(1);
2421 #endif
2422    }
2423 
2424    /*-----------------------------------------------------------------
2425     * clean up and initialize the output arrays
2426     *-----------------------------------------------------------------*/
2427 
2428    if (localNRows > 0) delete [] aggrSizes;
2429    if (localNRows > 0) delete [] nodeStat;
2430    if (localNRows == 1 && naggr == 0)
2431    {
2432       node2aggr[0] = 0;
2433       naggr = 1;
2434    }
2435    (*aggrInfoOut) = node2aggr;
2436    (*naggrOut)  = naggr;
2437    return 0;
2438 }
2439 
2440 //**********************************************************************
2441 // set up domain decomposition method by extending the local problem
2442 // (A simplified version of setupExtendedDomainDecomp using inefficient
2443 // method - just for testing only)
2444 // ---------------------------------------------------------------------
2445 
setupExtendedDomainDecomp2(MLI * mli)2446 int MLI_Method_AMGSA::setupExtendedDomainDecomp2(MLI *mli)
2447 {
2448    MLI_Function *funcPtr;
2449    int     nRecvs, *recvProcs, nSends, *sendProcs, ierr, *rowSizes;
2450    int     *colInds, rowIndex, iP;
2451    int     *recvLengs, mypid, nprocs, level, *Apartition, ANRows;
2452    int     nodeDofs, iD, iD2, AStart, AExtNRows;
2453    double  *nullVecs=NULL, *colVals;
2454    char    paramString[50];
2455    MPI_Comm            comm;
2456    MLI_Matrix          *mli_Amat;
2457    hypre_ParCSRMatrix  *hypreA;
2458    hypre_ParCSRCommPkg *commPkg;
2459 
2460    /* --------------------------------------------------------------- */
2461    /* error checking                                                  */
2462    /* --------------------------------------------------------------- */
2463 
2464    if (mli == NULL)
2465    {
2466       printf("MLI_Method_AMGSA::setupExtendedDomainDecomp2 ERROR");
2467       printf(" - no mli.\n");
2468       exit(1);
2469    }
2470 
2471    /* --------------------------------------------------------------- */
2472    /* fetch communicator and fine matrix information                  */
2473    /* --------------------------------------------------------------- */
2474 
2475    comm = getComm();
2476    MPI_Comm_rank(comm, &mypid);
2477    MPI_Comm_size(comm, &nprocs);
2478 
2479 #ifdef MLI_DEBUG_DETAILED
2480    printf("%d : AMGSA::setupExtendedDomainDecomp2 begins...\n",mypid);
2481 #endif
2482 
2483    level = 0;
2484    mli_Amat = mli->getSystemMatrix(level);
2485    hypreA  = (hypre_ParCSRMatrix *) mli_Amat->getMatrix();
2486    HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA,
2487                                         &Apartition);
2488    AStart = Apartition[mypid];
2489    ANRows = Apartition[mypid+1] - AStart;
2490 
2491    /* --------------------------------------------------------------- */
2492    /* first save the nodeDofs and null space information since it     */
2493    /* will be destroyed soon and needs to be recovered later          */
2494    /* --------------------------------------------------------------- */
2495 
2496    nodeDofs = currNodeDofs_;
2497    nullVecs = new double[nullspaceDim_*ANRows];
2498    if (nullspaceVec_ != NULL)
2499    {
2500       for (iD = 0; iD < nullspaceDim_*ANRows; iD++)
2501          nullVecs[iD] = nullspaceVec_[iD];
2502    }
2503    else
2504    {
2505       for (iD = 0; iD < nullspaceDim_; iD++)
2506          for (iD2 = 0; iD2 < ANRows; iD2++)
2507             if (MABS((iD - iD2)) % nullspaceDim_ == 0)
2508                  nullVecs[iD*ANRows+iD2] = 1.0;
2509             else nullVecs[iD*ANRows+iD2] = 0.0;
2510    }
2511 
2512    /* --------------------------------------------------------------- */
2513    /* get my neighbor processor information                           */
2514    /* --------------------------------------------------------------- */
2515 
2516    commPkg = hypre_ParCSRMatrixCommPkg(hypreA);
2517    if (commPkg == NULL)
2518    {
2519       hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) hypreA);
2520       commPkg = hypre_ParCSRMatrixCommPkg(hypreA);
2521    }
2522    nRecvs    = hypre_ParCSRCommPkgNumRecvs(commPkg);
2523    recvProcs = hypre_ParCSRCommPkgRecvProcs(commPkg);
2524    nSends    = hypre_ParCSRCommPkgNumSends(commPkg);
2525    sendProcs = hypre_ParCSRCommPkgSendProcs(commPkg);
2526    if (nRecvs > 0) recvLengs = new int[nRecvs];
2527 
2528    /* --------------------------------------------------------------- */
2529    /* calculate the size of my expanded matrix AExt which is the sum  */
2530    /* of the rows of my neighbors (and mine also)                     */
2531    /* --------------------------------------------------------------- */
2532 
2533    AExtNRows = 0;
2534    for (iP = 0; iP < nRecvs; iP++)
2535    {
2536       recvLengs[iP] = Apartition[recvProcs[iP]+1] - Apartition[recvProcs[iP]];
2537       AExtNRows += recvLengs[iP];
2538    }
2539 
2540    /* --------------------------------------------------------------- */
2541    /* communicate processor offsets for AExt (needed to create        */
2542    /* partition)                                                      */
2543    /* --------------------------------------------------------------- */
2544 
2545    int *AExtpartition = new int[nprocs+1];
2546    MPI_Allgather(&AExtNRows, 1, MPI_INT, &AExtpartition[1], 1, MPI_INT, comm);
2547    AExtpartition[0] = 0;
2548    for (iP = 1; iP < nprocs; iP++)
2549       AExtpartition[iP+1] = AExtpartition[iP+1] + AExtpartition[iP];
2550 
2551    /* --------------------------------------------------------------- */
2552    /* create QExt (permutation matrix for getting local AExt)         */
2553    /* --------------------------------------------------------------- */
2554 
2555    int QExtCStart = AExtpartition[mypid];
2556    int QExtNCols = AExtpartition[mypid+1] - QExtCStart;
2557    int QExtNRows = ANRows;
2558    int QExtRStart = AStart;
2559    HYPRE_IJMatrix IJ_QExt;
2560 
2561    ierr  = HYPRE_IJMatrixCreate(comm,QExtRStart,QExtRStart+QExtNRows-1,
2562                                 QExtCStart,QExtCStart+QExtNCols-1,&IJ_QExt);
2563    ierr += HYPRE_IJMatrixSetObjectType(IJ_QExt, HYPRE_PARCSR);
2564    hypre_assert(!ierr);
2565    rowSizes = new int[QExtNRows];
2566    for (iD = 0; iD < ANRows; iD++) rowSizes[iD] = 2 * nSends;
2567    ierr  = HYPRE_IJMatrixSetRowSizes(IJ_QExt, rowSizes);
2568    ierr += HYPRE_IJMatrixInitialize(IJ_QExt);
2569    hypre_assert(!ierr);
2570    delete [] rowSizes;
2571 
2572    /* --------------------------------------------------------------- */
2573    /* when creating QExt, need to set up the QExtOffsets ==>          */
2574    /* since QExt(i,j) = 1 means putting local col i into external col */
2575    /* j, and since I may not own row i of PE, I need to tell my       */
2576    /* neighbor processor who owns row i the value of j.               */
2577    /* --------------------------------------------------------------- */
2578 
2579    int         *QExtOffsets, offset, pindex;
2580    MPI_Request *mpiRequests;
2581    MPI_Status  mpiStatus;
2582 
2583    if (nSends > 0)
2584    {
2585       mpiRequests = new MPI_Request[nSends];
2586       QExtOffsets = new int[nSends];
2587    }
2588    for (iP = 0; iP < nSends; iP++)
2589       MPI_Irecv(&QExtOffsets[iP],1,MPI_INT,sendProcs[iP],434243,comm,
2590                 &(mpiRequests[iP]));
2591    offset = 0;
2592    // tell neighbor processors that their cols will become my offset
2593    // cols (+ my processor offset)
2594    for (iP = 0; iP < nRecvs; iP++)
2595    {
2596       MPI_Send(&offset, 1, MPI_INT, recvProcs[iP], 434243, comm);
2597       offset += (Apartition[recvProcs[iP]+1] - Apartition[recvProcs[iP]]);
2598    }
2599    for ( iP = 0; iP < nSends; iP++ )
2600       MPI_Wait( &(mpiRequests[iP]), &mpiStatus );
2601    if (nSends > 0) delete [] mpiRequests;
2602 
2603    /* --------------------------------------------------------------- */
2604    /* create QExt                                                     */
2605    /* --------------------------------------------------------------- */
2606 
2607    hypre_ParCSRMatrix *hypreQExt;
2608    MLI_Matrix         *mli_QExt;
2609 
2610    if (nSends > 0)
2611    {
2612       colInds = new int[nSends+1];
2613       colVals = new double[nSends+1];
2614       for (iP = 0; iP <= nSends; iP++) colVals[iP] = 1.0;
2615    }
2616    for (iD = 0; iD < QExtNRows; iD++)
2617    {
2618       rowIndex = QExtRStart + iD;
2619       colInds[0] = QExtRStart + iD;
2620       for (iP = 0; iP < nSends; iP++)
2621       {
2622          pindex = sendProcs[iP];
2623          colInds[iP] = AExtpartition[pindex] + QExtOffsets[iP] + iD;
2624       }
2625       HYPRE_IJMatrixSetValues(IJ_QExt, 1, &nSends, (const int *) &rowIndex,
2626                 (const int *) colInds, (const double *) colVals);
2627    }
2628    if (nSends > 0)
2629    {
2630       delete [] colInds;
2631       delete [] colVals;
2632       delete [] QExtOffsets;
2633    }
2634    HYPRE_IJMatrixAssemble(IJ_QExt);
2635    HYPRE_IJMatrixGetObject(IJ_QExt, (void **) &hypreQExt);
2636    sprintf(paramString, "HYPRE_ParCSR");
2637    mli_QExt = new MLI_Matrix( (void *) hypreQExt, paramString, NULL);
2638    commPkg = hypre_ParCSRMatrixCommPkg(hypreQExt);
2639    if (commPkg == NULL)
2640       hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) hypreQExt);
2641 
2642    /* --------------------------------------------------------------- */
2643    /* form the expanded fine matrix                                   */
2644    /* --------------------------------------------------------------- */
2645 
2646    MLI_Matrix         *mli_AExt;
2647    //hypre_ParCSRMatrix *hypreAExt;
2648 
2649    MLI_Matrix_ComputePtAP(mli_QExt, mli_Amat, &mli_AExt);
2650    //hypreAExt = (hypre_ParCSRMatrix *) mli_AExt->getMatrix();
2651    delete mli_QExt;
2652    HYPRE_IJMatrixDestroy(IJ_QExt);
2653 
2654    /* --------------------------------------------------------------- */
2655    /* communicate null space information for graded coarsening        */
2656    /* --------------------------------------------------------------- */
2657 
2658    int    rLength, sLength;
2659    double *tmpNullVecs;
2660 
2661    if (QExtNCols > 0) tmpNullVecs = new double[QExtNCols*nullspaceDim_];
2662    if (nRecvs > 0) mpiRequests = new MPI_Request[nRecvs];
2663 
2664    offset = 0;
2665    for (iP = 0; iP < nRecvs; iP++)
2666    {
2667       rLength = AExtpartition[recvProcs[iP]+1] - AExtpartition[recvProcs[iP]];
2668       rLength *= nullspaceDim_;
2669       MPI_Irecv(&tmpNullVecs[offset],rLength,MPI_DOUBLE,recvProcs[iP],14581,
2670                 comm,&(mpiRequests[iP]));
2671       offset += rLength;
2672    }
2673    for (iP = 0; iP < nSends; iP++)
2674    {
2675       sLength = ANRows * nullspaceDim_;
2676       MPI_Send(nullVecs, sLength, MPI_DOUBLE, sendProcs[iP], 14581, comm);
2677    }
2678    for (iP = 0; iP < nRecvs; iP++)
2679       MPI_Wait(&(mpiRequests[iP]), &mpiStatus);
2680    if (nRecvs > 0) delete [] mpiRequests;
2681 
2682    if (nullspaceVec_ != NULL) delete [] nullspaceVec_;
2683    nullspaceVec_ = new double[QExtNCols*nullspaceDim_];
2684    for (iD = 0; iD < nullspaceDim_; iD++)
2685       for (iD2 = 0; iD2 < ANRows; iD2++)
2686          nullspaceVec_[iD*QExtNCols+iD2] = nullVecs[iD*ANRows+iD2];
2687    offset = ANRows;
2688    for ( iP = 0; iP < nRecvs; iP++ )
2689    {
2690       rLength = AExtpartition[recvProcs[iP]+1] - AExtpartition[recvProcs[iP]];
2691       for (iD = 0; iD < nullspaceDim_; iD++)
2692          for (iD2 = 0; iD2 < rLength; iD2++)
2693             nullspaceVec_[iD*QExtNCols+iD2+offset] =
2694                tmpNullVecs[offset+iD*rLength+iD2];
2695       rLength *= nullspaceDim_;
2696       offset += rLength;
2697    }
2698    if (QExtNCols > 0) delete [] tmpNullVecs;
2699    delete [] AExtpartition;
2700 
2701    /* --------------------------------------------------------------- */
2702    /* graded coarsening on the expanded fine matrix                   */
2703    /* (this P will be needed later to collect neighbor unknowns       */
2704    /*  before smoothing - its transpose)                              */
2705    /* --------------------------------------------------------------- */
2706 
2707    MLI_Matrix *mli_PExt;
2708    genP_AExt(mli_AExt, &mli_PExt, ANRows);
2709 
2710    /* --------------------------------------------------------------- */
2711    /* create the local domain decomposition matrix                    */
2712    /* reduced so that the local subdomain matrix is smaller - will be */
2713    /* used as a smoother                                              */
2714    /* --------------------------------------------------------------- */
2715 
2716    MLI_Matrix *mli_ACExt;
2717    MLI_Matrix_ComputePtAP(mli_PExt, mli_AExt, &mli_ACExt);
2718 
2719    /* --------------------------------------------------------------- */
2720    /* need to convert the ACExt into local matrix (since the matrix   */
2721    /* is local)                                                       */
2722    /* --------------------------------------------------------------- */
2723 
2724    int      iZero=0, ACExtNRows, *newRowSizes, *ACExtI, *ACExtJ;
2725    double   *ACExtD;
2726    MPI_Comm newMPIComm;
2727    hypre_ParCSRMatrix *hypreACExt, *hypreNewA;
2728    hypre_CSRMatrix    *csrACExt;
2729    HYPRE_IJMatrix     IJnewA;
2730 
2731    hypreACExt = (hypre_ParCSRMatrix *) mli_AExt->getMatrix();
2732    ACExtNRows = hypre_ParCSRMatrixNumRows(hypreACExt);
2733 
2734    MPI_Comm_split(comm, mypid, iZero, &newMPIComm);
2735    ierr  = HYPRE_IJMatrixCreate(newMPIComm,iZero,ACExtNRows-1,iZero,
2736                                 ACExtNRows-1, &IJnewA);
2737    ierr += HYPRE_IJMatrixSetObjectType(IJnewA, HYPRE_PARCSR);
2738    hypre_assert(!ierr);
2739    if (ACExtNRows > 0) newRowSizes = new int[ACExtNRows];
2740    csrACExt = hypre_ParCSRMatrixDiag(hypreACExt);
2741    ACExtI = hypre_CSRMatrixI(csrACExt);
2742    ACExtJ = hypre_CSRMatrixJ(csrACExt);
2743    ACExtD = hypre_CSRMatrixData(csrACExt);
2744    for (iD = 0; iD < ACExtNRows; iD++)
2745       newRowSizes[iD] = ACExtI[iD+1] - ACExtI[iD];
2746    ierr  = HYPRE_IJMatrixSetRowSizes(IJnewA, newRowSizes);
2747    ierr += HYPRE_IJMatrixInitialize(IJnewA);
2748    hypre_assert(!ierr);
2749    for (iD = 0; iD < ACExtNRows; iD++)
2750    {
2751       offset = ACExtI[iD];
2752       HYPRE_IJMatrixSetValues(IJnewA, 1, &newRowSizes[iD], (const int *) &iD,
2753                (const int *) &ACExtJ[offset], (const double *) &ACExtD[offset]);
2754    }
2755    if (ACExtNRows > 0) delete [] newRowSizes;
2756    HYPRE_IJMatrixAssemble(IJnewA);
2757    HYPRE_IJMatrixGetObject(IJnewA, (void **) &hypreNewA);
2758    sprintf(paramString, "HYPRE_ParCSR");
2759    funcPtr = new MLI_Function();
2760    MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr);
2761    delete mli_ACExt;
2762    mli_ACExt = new MLI_Matrix( (void *) hypreNewA, paramString, funcPtr);
2763    delete mli_AExt;
2764    HYPRE_IJMatrixSetObjectType(IJnewA, -1);
2765    HYPRE_IJMatrixDestroy(IJnewA);
2766 
2767    /* --------------------------------------------------------------- */
2768    /* create new overlapped DD smoother using sequential SuperLU      */
2769    /* --------------------------------------------------------------- */
2770 
2771    int        *sendLengs;
2772    char       *targv[7];
2773    MLI_Solver *smootherPtr;
2774    MLI_Matrix *mli_PSmat;
2775 
2776    if (!strcmp(preSmoother_, "CGMLI")) sprintf(paramString, "CGMLI");
2777    else                                sprintf(paramString, "CGAMG");
2778    smootherPtr = MLI_Solver_CreateFromName(paramString);
2779    sprintf(paramString, "numSweeps 10000");
2780    smootherPtr->setParams(paramString, 0, NULL);
2781    sprintf(paramString, "tolerance 1.0e-6");
2782    smootherPtr->setParams(paramString, 0, NULL);
2783 
2784    // send PSmat and communication information to smoother
2785    if (nSends > 0) sendLengs = new int[nSends];
2786    for (iP = 0; iP < nSends; iP++) sendLengs[iP] = ANRows;
2787    sprintf(paramString, "setPmat");
2788    mli_PSmat = mli_PExt;
2789    targv[0] = (char *) mli_PSmat;
2790    smootherPtr->setParams(paramString, 1, targv);
2791    sprintf(paramString, "setCommData");
2792    targv[0] = (char *) &nRecvs;
2793    targv[1] = (char *) recvProcs;
2794    targv[2] = (char *) recvLengs;
2795    targv[3] = (char *) &nSends;
2796    targv[4] = (char *) sendProcs;
2797    targv[5] = (char *) sendLengs;
2798    targv[6] = (char *) &comm;
2799    smootherPtr->setParams(paramString, 7, targv);
2800    if (nSends > 0) delete [] sendLengs;
2801    if (nRecvs > 0) delete [] recvLengs;
2802 
2803    smootherPtr->setup(mli_ACExt);
2804    mli->setSmoother(level, MLI_SMOOTHER_PRE, smootherPtr);
2805 
2806    /* --------------------------------------------------------------- */
2807    /* create prolongation and coarse grid operators                   */
2808    /* --------------------------------------------------------------- */
2809 
2810    MLI_Solver *csolvePtr;
2811    MLI_Matrix *mli_Pmat, *mli_Rmat, *mli_cAmat;
2812 
2813    // set up one aggregate per processor
2814    saCounts_[0] = 1;
2815    if (saData_[0] != NULL) delete [] saData_[0];
2816    saData_[0] = new int[ANRows];
2817    for (iD = 0; iD < ANRows; iD++) saData_[0][iD] = 0;
2818 
2819    // restore nullspace changed by the last genP
2820    currNodeDofs_ = nodeDofs;
2821    if (nullspaceVec_ != NULL) delete [] nullspaceVec_;
2822    nullspaceVec_ = new double[nullspaceDim_*ANRows];
2823    for (iD = 0; iD < nullspaceDim_*ANRows; iD++)
2824       nullspaceVec_[iD] = nullVecs[iD];
2825    delete [] nullVecs;
2826 
2827    // create prolongation and coarse grid operators
2828    genP(mli_Amat, &mli_Pmat, saCounts_[0], saData_[0]);
2829    MLI_Matrix_ComputePtAP(mli_Pmat, mli_Amat, &mli_cAmat);
2830    mli->setSystemMatrix(level+1, mli_cAmat);
2831    mli->setProlongation(level+1, mli_Pmat);
2832    sprintf(paramString, "HYPRE_ParCSRT");
2833    mli_Rmat = new MLI_Matrix(mli_Pmat->getMatrix(), paramString, NULL);
2834    mli->setRestriction(level, mli_Rmat);
2835 
2836    /* --------------------------------------------------------------- */
2837    /* setup coarse solver                                             */
2838    /* --------------------------------------------------------------- */
2839 
2840    strcpy( paramString, "SuperLU" );
2841    csolvePtr = MLI_Solver_CreateFromName( paramString );
2842    csolvePtr->setup(mli_cAmat);
2843    mli->setCoarseSolve(csolvePtr);
2844 
2845    /* --------------------------------------------------------------- */
2846    /* clean up                                                        */
2847    /* --------------------------------------------------------------- */
2848 
2849    free(Apartition);
2850 
2851 #ifdef MLI_DEBUG_DETAILED
2852    printf("%d : MLI_Method_AMGSA::setupExtendedDomainDecomp2 ends.\n",mypid);
2853 #endif
2854 
2855    level = 2;
2856    return (level);
2857 }
2858 
2859 // ************************************************************************
2860 // Purpose : Given Amat, perform preferential coarsening
2861 // (setupExtendedDomainDecomp2)
2862 // ------------------------------------------------------------------------
2863 
genP_AExt(MLI_Matrix * mli_Amat,MLI_Matrix ** PmatOut,int inANRows)2864 double MLI_Method_AMGSA::genP_AExt(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut,
2865                                    int inANRows)
2866 {
2867    int    mypid, nprocs, *partition, AStartRow, AEndRow, ALocalNRows;
2868    int    blkSize, naggr, *node2aggr, ierr, PLocalNCols, PStartCol;
2869    int    PLocalNRows, PStartRow, *eqn2aggr, irow, jcol, ig;
2870    int    *PCols, maxAggSize, *aggCntArray, index, **aggIndArray;
2871    int    aggSize, nzcnt, *rowLengths, rowNum, *colInd;
2872    double **PVecs, *newNull, *qArray, *rArray, *colVal;
2873    char   paramString[200];
2874    HYPRE_IJMatrix      IJPmat;
2875    hypre_ParCSRMatrix  *Amat, *A2mat, *Pmat;
2876    MLI_Matrix          *mli_A2mat=NULL, *mli_Pmat;
2877    MPI_Comm            comm;
2878    hypre_ParCSRCommPkg *commPkg;
2879    MLI_Function        *funcPtr;
2880 
2881    /*-----------------------------------------------------------------
2882     * fetch matrix and machine information
2883     *-----------------------------------------------------------------*/
2884 
2885    Amat = (hypre_ParCSRMatrix *) mli_Amat->getMatrix();
2886    comm = hypre_ParCSRMatrixComm(Amat);
2887    MPI_Comm_rank(comm,&mypid);
2888    MPI_Comm_size(comm,&nprocs);
2889 
2890    /*-----------------------------------------------------------------
2891     * fetch other matrix information
2892     *-----------------------------------------------------------------*/
2893 
2894    HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) Amat,&partition);
2895    AStartRow = partition[mypid];
2896    AEndRow   = partition[mypid+1] - 1;
2897    ALocalNRows = AEndRow - AStartRow + 1;
2898    free(partition);
2899 
2900    /*-----------------------------------------------------------------
2901     * reduce Amat based on the block size information (if nodeDofs_ > 1)
2902     *-----------------------------------------------------------------*/
2903 
2904    blkSize = currNodeDofs_;
2905    if (blkSize > 1) MLI_Matrix_Compress(mli_Amat, blkSize, &mli_A2mat);
2906    else             mli_A2mat = mli_Amat;
2907    A2mat = (hypre_ParCSRMatrix *) mli_A2mat->getMatrix();
2908 
2909    /*-----------------------------------------------------------------
2910     * modify minimum aggregate size, if needed
2911     *-----------------------------------------------------------------*/
2912 
2913    minAggrSize_ = nullspaceDim_ / currNodeDofs_;
2914    if (minAggrSize_ <= 1) minAggrSize_ = 2;
2915 
2916    /*-----------------------------------------------------------------
2917     * perform coarsening
2918     *-----------------------------------------------------------------*/
2919 
2920    coarsenAExt(A2mat, &naggr, &node2aggr, inANRows);
2921    if (blkSize > 1 && mli_A2mat != NULL) delete mli_A2mat;
2922 
2923    /*-----------------------------------------------------------------
2924     * fetch the coarse grid information and instantiate P
2925     *-----------------------------------------------------------------*/
2926 
2927    PLocalNCols = naggr * nullspaceDim_;
2928    MLI_Utils_GenPartition(comm, PLocalNCols, &partition);
2929    PStartCol = partition[mypid];
2930    free(partition);
2931    PLocalNRows = ALocalNRows;
2932    PStartRow   = AStartRow;
2933    ierr = HYPRE_IJMatrixCreate(comm,PStartRow,PStartRow+PLocalNRows-1,
2934                           PStartCol,PStartCol+PLocalNCols-1,&IJPmat);
2935    ierr = HYPRE_IJMatrixSetObjectType(IJPmat, HYPRE_PARCSR);
2936    hypre_assert(!ierr);
2937 
2938    /*-----------------------------------------------------------------
2939     * expand the aggregation information if block size > 1 ==> eqn2aggr
2940     *-----------------------------------------------------------------*/
2941 
2942    if (blkSize > 1)
2943    {
2944       eqn2aggr = new int[ALocalNRows];
2945       for (irow = 0; irow < ALocalNRows; irow++)
2946          eqn2aggr[irow] = node2aggr[irow/blkSize];
2947       delete [] node2aggr;
2948    }
2949    else eqn2aggr = node2aggr;
2950 
2951    /*-----------------------------------------------------------------
2952     * create a compact form for the null space vectors
2953     * (get ready to perform QR on them)
2954     *-----------------------------------------------------------------*/
2955 
2956    PVecs = new double*[nullspaceDim_];
2957    PCols = new int[PLocalNRows];
2958    for (irow = 0; irow < nullspaceDim_; irow++)
2959       PVecs[irow] = new double[PLocalNRows];
2960    for (irow = 0; irow < PLocalNRows; irow++)
2961    {
2962       if (eqn2aggr[irow] >= 0)
2963            PCols[irow] = PStartCol + eqn2aggr[irow] * nullspaceDim_;
2964       else PCols[irow] = PStartCol + (-eqn2aggr[irow]-1) * nullspaceDim_;
2965       if (nullspaceVec_ != NULL)
2966       {
2967          for (jcol = 0; jcol < nullspaceDim_; jcol++)
2968             PVecs[jcol][irow] = nullspaceVec_[jcol*PLocalNRows+irow];
2969       }
2970       else
2971       {
2972          for (jcol = 0; jcol < nullspaceDim_; jcol++)
2973          {
2974             if (irow % nullspaceDim_ == jcol) PVecs[jcol][irow] = 1.0;
2975             else                              PVecs[jcol][irow] = 0.0;
2976          }
2977       }
2978    }
2979 
2980    /*-----------------------------------------------------------------
2981     * perform QR for null space
2982     *-----------------------------------------------------------------*/
2983 
2984    newNull = NULL;
2985    if (PLocalNRows > 0)
2986    {
2987       /* ------ count the size of each aggregate ------ */
2988 
2989       aggCntArray = new int[naggr];
2990       for (ig = 0; ig < naggr; ig++) aggCntArray[ig] = 0;
2991       for (irow = 0; irow < PLocalNRows; irow++)
2992          if (eqn2aggr[irow] >= 0) aggCntArray[eqn2aggr[irow]]++;
2993          else                     aggCntArray[(-eqn2aggr[irow]-1)]++;
2994       maxAggSize = 0;
2995       for (ig = 0; ig < naggr; ig++)
2996          if (aggCntArray[ig] > maxAggSize) maxAggSize = aggCntArray[ig];
2997 
2998       /* ------ register which equation is in which aggregate ------ */
2999 
3000       aggIndArray = new int*[naggr];
3001       for (ig = 0; ig < naggr; ig++)
3002       {
3003          aggIndArray[ig] = new int[aggCntArray[ig]];
3004          aggCntArray[ig] = 0;
3005       }
3006       for (irow = 0; irow < PLocalNRows; irow++)
3007       {
3008          index = eqn2aggr[irow];
3009          if (index >= 0)
3010             aggIndArray[index][aggCntArray[index]++] = irow;
3011          else
3012             aggIndArray[-index-1][aggCntArray[-index-1]++] = irow;
3013       }
3014 
3015       /* ------ allocate storage for QR factorization ------ */
3016 
3017       qArray  = new double[maxAggSize * nullspaceDim_];
3018       rArray  = new double[nullspaceDim_ * nullspaceDim_];
3019       newNull = new double[naggr*nullspaceDim_*nullspaceDim_];
3020 
3021       /* ------ perform QR on each aggregate ------ */
3022 
3023       for (ig = 0; ig < naggr; ig++)
3024       {
3025          aggSize = aggCntArray[ig];
3026 
3027          if (aggSize < nullspaceDim_)
3028          {
3029             printf("Aggregation ERROR : underdetermined system in QR.\n");
3030             printf("            error on Proc %d\n", mypid);
3031             printf("            error on aggr %d (%d)\n", ig, naggr);
3032             printf("            aggr size is %d\n", aggSize);
3033             exit(1);
3034          }
3035 
3036          /* ------ put data into the temporary array ------ */
3037 
3038          for (jcol = 0; jcol < aggSize; jcol++)
3039          {
3040             for (irow = 0; irow < nullspaceDim_; irow++)
3041                qArray[aggSize*irow+jcol] = PVecs[irow][aggIndArray[ig][jcol]];
3042          }
3043 
3044          /* ------ after QR, put the R into the next null space ------ */
3045 
3046          for (jcol = 0; jcol < nullspaceDim_; jcol++)
3047             for (irow = 0; irow < nullspaceDim_; irow++)
3048                if (irow == jcol)
3049                   newNull[ig*nullspaceDim_+jcol+irow*naggr*nullspaceDim_] = 1.0;
3050                else
3051                   newNull[ig*nullspaceDim_+jcol+irow*naggr*nullspaceDim_] = 0.0;
3052 
3053          /* ------ put the P to PVecs ------ */
3054 
3055          for (jcol = 0; jcol < aggSize; jcol++)
3056          {
3057             for (irow = 0; irow < nullspaceDim_; irow++)
3058             {
3059                index = aggIndArray[ig][jcol];
3060                PVecs[irow][index] = qArray[ irow*aggSize + jcol ];
3061             }
3062          }
3063       }
3064       for (ig = 0; ig < naggr; ig++) delete [] aggIndArray[ig];
3065       delete [] aggIndArray;
3066       delete [] aggCntArray;
3067       delete [] qArray;
3068       delete [] rArray;
3069    }
3070    if (nullspaceVec_ != NULL) delete [] nullspaceVec_;
3071    nullspaceVec_ = newNull;
3072 
3073    /*-----------------------------------------------------------------
3074     * initialize Pmat
3075     *-----------------------------------------------------------------*/
3076 
3077    rowLengths = new int[PLocalNRows];
3078    for (irow = 0; irow < PLocalNRows; irow++)
3079       rowLengths[irow] = nullspaceDim_;
3080    ierr = HYPRE_IJMatrixSetRowSizes(IJPmat, rowLengths);
3081    ierr = HYPRE_IJMatrixInitialize(IJPmat);
3082    hypre_assert(!ierr);
3083    delete [] rowLengths;
3084 
3085    /*--------------------------------------------------------------------
3086     * load and assemble Pmat
3087     *--------------------------------------------------------------------*/
3088 
3089    colInd = new int[nullspaceDim_];
3090    colVal = new double[nullspaceDim_];
3091    for (irow = 0; irow < PLocalNRows; irow++)
3092    {
3093       if (PCols[irow] >= 0)
3094       {
3095          nzcnt = 0;
3096          for (jcol = 0; jcol < nullspaceDim_; jcol++)
3097          {
3098             if (PVecs[jcol][irow] != 0.0)
3099             {
3100                colInd[nzcnt] = PCols[irow] + jcol;
3101                colVal[nzcnt++] = PVecs[jcol][irow];
3102             }
3103          }
3104          rowNum = PStartRow + irow;
3105          HYPRE_IJMatrixSetValues(IJPmat, 1, &nzcnt,
3106                              (const int *) &rowNum, (const int *) colInd,
3107                              (const double *) colVal);
3108       }
3109    }
3110    ierr = HYPRE_IJMatrixAssemble(IJPmat);
3111    hypre_assert(!ierr);
3112    HYPRE_IJMatrixGetObject(IJPmat, (void **) &Pmat);
3113    hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) Pmat);
3114    commPkg = hypre_ParCSRMatrixCommPkg(Amat);
3115    if (!commPkg) hypre_MatvecCommPkgCreate(Amat);
3116    HYPRE_IJMatrixSetObjectType(IJPmat, -1);
3117    HYPRE_IJMatrixDestroy(IJPmat);
3118    delete [] colInd;
3119    delete [] colVal;
3120 
3121    /*-----------------------------------------------------------------
3122     * clean up
3123     *-----------------------------------------------------------------*/
3124 
3125    if (PCols != NULL) delete [] PCols;
3126    if (PVecs != NULL)
3127    {
3128       for (irow = 0; irow < nullspaceDim_; irow++)
3129          if (PVecs[irow] != NULL) delete [] PVecs[irow];
3130       delete [] PVecs;
3131    }
3132 
3133    /*-----------------------------------------------------------------
3134     * set up and return Pmat
3135     *-----------------------------------------------------------------*/
3136 
3137    funcPtr = new MLI_Function();
3138    MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr);
3139    sprintf(paramString, "HYPRE_ParCSR" );
3140    mli_Pmat = new MLI_Matrix( Pmat, paramString, funcPtr );
3141    (*PmatOut) = mli_Pmat;
3142    delete funcPtr;
3143    return 0.0;
3144 }
3145 
3146 // *********************************************************************
3147 // graded coarsening scheme (Given a graph, aggregate on the local subgraph
3148 // but give smaller aggregate near processor boundaries)
3149 // (called by setupExtendedDomainDecomp2/genP_AExt)
3150 // ---------------------------------------------------------------------
3151 
coarsenAExt(hypre_ParCSRMatrix * hypreG,int * mliAggrLeng,int ** mliAggrArray,int inANRows)3152 int MLI_Method_AMGSA::coarsenAExt(hypre_ParCSRMatrix *hypreG,
3153                int *mliAggrLeng, int **mliAggrArray, int inANRows)
3154 {
3155    MPI_Comm  comm;
3156    int       mypid, nprocs, *partition, startRow, endRow, maxInd;
3157    int       localNRows, naggr=0, *node2aggr, *aggrSizes;
3158    int       irow, jcol, rowLeng, globalNRows, index;
3159    int       *nodeStat, selectFlag, nSelected=0, nNotSelected=0, count;
3160    int       *GDiagI, *GDiagJ;
3161    double    maxVal, *GDiagA;
3162    hypre_CSRMatrix *GDiag;
3163 
3164    /*-----------------------------------------------------------------
3165     * fetch machine and matrix parameters
3166     *-----------------------------------------------------------------*/
3167 
3168    comm = hypre_ParCSRMatrixComm(hypreG);
3169    MPI_Comm_rank(comm,&mypid);
3170    MPI_Comm_size(comm,&nprocs);
3171    HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreG,
3172                                         &partition);
3173    startRow = partition[mypid];
3174    endRow   = partition[mypid+1] - 1;
3175    free(partition);
3176    localNRows = endRow - startRow + 1;
3177    MPI_Allreduce(&localNRows, &globalNRows, 1, MPI_INT, MPI_SUM, comm);
3178    if (mypid == 0 && outputLevel_ > 1)
3179    {
3180       printf("\t*** Aggregation(E) : total nodes to aggregate = %d\n",
3181              globalNRows);
3182    }
3183    GDiag  = hypre_ParCSRMatrixDiag(hypreG);
3184    GDiagI = hypre_CSRMatrixI(GDiag);
3185    GDiagJ = hypre_CSRMatrixJ(GDiag);
3186    GDiagA = hypre_CSRMatrixData(GDiag);
3187 
3188    /*-----------------------------------------------------------------
3189     * allocate status arrays
3190     *-----------------------------------------------------------------*/
3191 
3192    if (localNRows > 0)
3193    {
3194       node2aggr = new int[localNRows];
3195       aggrSizes = new int[localNRows];
3196       nodeStat  = new int[localNRows];
3197       for (irow = 0; irow < inANRows; irow++)
3198       {
3199          aggrSizes[irow] = 1;
3200          node2aggr[irow] = -1;
3201          nodeStat[irow]  = MLI_METHOD_AMGSA_SELECTED;
3202       }
3203       for (irow = inANRows; irow < localNRows; irow++)
3204       {
3205          aggrSizes[irow] = 0;
3206          node2aggr[irow] = -1;
3207          nodeStat[irow]  = MLI_METHOD_AMGSA_READY;
3208       }
3209       nSelected = inANRows;
3210    }
3211    else node2aggr = aggrSizes = nodeStat = NULL;
3212 
3213    /*-----------------------------------------------------------------
3214     * search for zero rows and rows near the processor boundaries
3215     *-----------------------------------------------------------------*/
3216 
3217    for (irow = inANRows; irow < localNRows; irow++)
3218    {
3219       rowLeng = GDiagI[irow+1] - GDiagI[irow];
3220       if (rowLeng <= 0)
3221       {
3222          nodeStat[irow] = MLI_METHOD_AMGSA_NOTSELECTED;
3223          nNotSelected++;
3224       }
3225    }
3226 
3227    /*-----------------------------------------------------------------
3228     * Phase 0 : 1 node per aggregate for the immediate neighbors
3229     *-----------------------------------------------------------------*/
3230 
3231    for (irow = 0; irow < inANRows; irow++)
3232    {
3233       for (jcol = GDiagI[irow]; jcol < GDiagI[irow+1]; jcol++)
3234       {
3235          index = GDiagJ[jcol];
3236          if (index >= inANRows && nodeStat[index]==MLI_METHOD_AMGSA_READY)
3237          {
3238             node2aggr[index]  = naggr;
3239             nodeStat[index] = MLI_METHOD_AMGSA_SELECTED;
3240             aggrSizes[naggr] = 1;
3241             naggr++;
3242             nSelected++;
3243          }
3244       }
3245    }
3246 
3247    /*-----------------------------------------------------------------
3248     * Phase 1 : small aggregates for the next level
3249     *-----------------------------------------------------------------*/
3250 
3251    for (irow = inANRows; irow < localNRows; irow++)
3252    {
3253       if (nodeStat[irow] == MLI_METHOD_AMGSA_READY)
3254       {
3255          selectFlag = 0;
3256          for (jcol = GDiagI[irow]; jcol < GDiagI[irow+1]; jcol++)
3257          {
3258             index = GDiagJ[jcol];
3259             if (nodeStat[index] == MLI_METHOD_AMGSA_SELECTED)
3260             {
3261                selectFlag = 1;
3262                break;
3263             }
3264          }
3265          if (selectFlag == 1)
3266          {
3267             nSelected++;
3268             node2aggr[irow]  = naggr;
3269             aggrSizes[naggr] = 1;
3270             nodeStat[irow]  = MLI_METHOD_AMGSA_SELECTED2;
3271             for (jcol = GDiagI[irow]; jcol < GDiagI[irow+1]; jcol++)
3272             {
3273                index = GDiagJ[jcol];
3274                if (nodeStat[irow]==MLI_METHOD_AMGSA_READY)
3275                {
3276                   nSelected++;
3277                   node2aggr[index] = naggr;
3278                   aggrSizes[naggr]++;
3279                   nodeStat[index]  = MLI_METHOD_AMGSA_SELECTED2;
3280                }
3281             }
3282             naggr++;
3283          }
3284       }
3285    }
3286    for (irow = inANRows; irow < localNRows; irow++)
3287       if (nodeStat[index] == MLI_METHOD_AMGSA_SELECTED2)
3288          nodeStat[index] = MLI_METHOD_AMGSA_SELECTED;
3289 
3290    /*-----------------------------------------------------------------
3291     * Phase 1 : form aggregates
3292     *-----------------------------------------------------------------*/
3293 
3294    for (irow = inANRows; irow < localNRows; irow++)
3295    {
3296       if (nodeStat[irow] == MLI_METHOD_AMGSA_READY)
3297       {
3298          selectFlag = 1;
3299          for (jcol = GDiagI[irow]; jcol < GDiagI[irow+1]; jcol++)
3300          {
3301             index = GDiagJ[jcol];
3302             if (nodeStat[index] != MLI_METHOD_AMGSA_READY)
3303             {
3304                selectFlag = 0;
3305                break;
3306             }
3307          }
3308          if (selectFlag == 1)
3309          {
3310             aggrSizes[naggr] = 1;
3311             node2aggr[irow] = naggr;
3312             nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED;
3313             nSelected++;
3314             for (jcol = GDiagI[irow]; jcol < GDiagI[irow+1]; jcol++)
3315             {
3316                index = GDiagJ[jcol];
3317                node2aggr[index] = naggr;
3318                nodeStat[index] = MLI_METHOD_AMGSA_SELECTED;
3319                aggrSizes[naggr]++;
3320                nSelected++;
3321             }
3322             naggr++;
3323          }
3324       }
3325    }
3326 
3327    /*-----------------------------------------------------------------
3328     * Phase 2 : put the rest into one of the existing aggregates
3329     *-----------------------------------------------------------------*/
3330 
3331    if ((nSelected+nNotSelected) < localNRows)
3332    {
3333       for (irow = inANRows; irow < localNRows; irow++)
3334       {
3335          if (nodeStat[irow] == MLI_METHOD_AMGSA_READY)
3336          {
3337             maxInd  = -1;
3338             maxVal  = 0.0;
3339             for (jcol = GDiagI[irow]; jcol < GDiagI[irow+1]; jcol++)
3340             {
3341                index = GDiagJ[jcol];
3342                if (nodeStat[index] == MLI_METHOD_AMGSA_SELECTED)
3343                {
3344                   if (GDiagA[jcol] > maxVal)
3345                   {
3346                      maxInd = jcol;
3347                      maxVal = GDiagA[jcol];
3348                   }
3349                }
3350             }
3351             if (maxInd != -1)
3352             {
3353                node2aggr[irow] = node2aggr[maxInd];
3354                nodeStat[irow] = MLI_METHOD_AMGSA_PENDING;
3355                aggrSizes[node2aggr[maxInd]]++;
3356             }
3357          }
3358       }
3359       for (irow = inANRows; irow < localNRows; irow++)
3360       {
3361          if (nodeStat[irow] == MLI_METHOD_AMGSA_PENDING)
3362          {
3363             nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED;
3364             nSelected++;
3365          }
3366       }
3367    }
3368 
3369    /*-----------------------------------------------------------------
3370     * Phase 4 : finally put all lone rows into some neighbor aggregate
3371     *-----------------------------------------------------------------*/
3372 
3373    int nUndone = localNRows - nSelected - nNotSelected;
3374    if (nUndone > 0)
3375    {
3376       count = nUndone / minAggrSize_;
3377       if (count == 0) count = 1;
3378       count += naggr;
3379       irow = jcol = 0;
3380       while (nUndone > 0)
3381       {
3382          if (nodeStat[irow] == MLI_METHOD_AMGSA_READY)
3383          {
3384             node2aggr[irow] = naggr;
3385             nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED;
3386             nUndone--;
3387             nSelected++;
3388             jcol++;
3389             if (jcol >= minAggrSize_ && naggr < count-1)
3390             {
3391                jcol = 0;
3392                naggr++;
3393             }
3394          }
3395          irow++;
3396       }
3397       naggr = count;
3398    }
3399 
3400    /*-----------------------------------------------------------------
3401     * clean up and initialize the output arrays
3402     *-----------------------------------------------------------------*/
3403 
3404    if (localNRows > 0) delete [] aggrSizes;
3405    if (localNRows > 0) delete [] nodeStat;
3406    if (localNRows == 1 && naggr == 0)
3407    {
3408       node2aggr[0] = 0;
3409       naggr = 1;
3410    }
3411    (*mliAggrArray) = node2aggr;
3412    (*mliAggrLeng)  = naggr;
3413    return 0;
3414 }
3415 
3416