1 /******************************************************************************
2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4 *
5 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6 ******************************************************************************/
7
8 // *********************************************************************
9 // This file is customized to use HYPRE matrix format
10 // *********************************************************************
11
12 // *********************************************************************
13 // system includes
14 // ---------------------------------------------------------------------
15
16 #include <string.h>
17
18 #define MABS(x) ((x) > 0 ? (x) : (-(x)))
19
20 // *********************************************************************
21 // HYPRE includes external to MLI
22 // ---------------------------------------------------------------------
23
24 #include "HYPRE.h"
25 #include "_hypre_utilities.h"
26 #include "HYPRE_IJ_mv.h"
27 #include "seq_mv.h"
28 #include "_hypre_parcsr_mv.h"
29
30 #define mabs(x) ((x) > 0 ? x : -(x))
31
32 // *********************************************************************
33 // local defines
34 // ---------------------------------------------------------------------
35
36 #define MLI_METHOD_AMGSA_READY -1
37 #define MLI_METHOD_AMGSA_SELECTED -2
38 #define MLI_METHOD_AMGSA_PENDING -3
39 #define MLI_METHOD_AMGSA_NOTSELECTED -4
40 #define MLI_METHOD_AMGSA_SELECTED2 -5
41
42 // *********************************************************************
43 // local MLI includes
44 // ---------------------------------------------------------------------
45
46 #include "mli_method_amgsa.h"
47 #include "mli_utils.h"
48 #include "mli_sfei.h"
49 #include "mli_fedata_utils.h"
50 #include "mli_matrix.h"
51 #include "mli_matrix_misc.h"
52 #include "mli_solver.h"
53
54 // *********************************************************************
55 // functions external to MLI
56 // ---------------------------------------------------------------------
57
58 extern "C"
59 {
60 /* ARPACK function to compute eigenvalues/eigenvectors */
61
62 void dnstev_(int *n, int *nev, char *which, double *sigmar,
63 double *sigmai, int *colptr, int *rowind, double *nzvals,
64 double *dr, double *di, double *z, int *ldz, int *info,
65 double *tol);
66 }
67
68 /***********************************************************************
69 * COMPUTE SUBDOMAIN-BASED NULL SPACES USING EIGENDECOMPOSITION
70 ***********************************************************************
71 * compute initial null spaces (for the subdomain only) using FEData
72 * --------------------------------------------------------------------- */
73
setupSFEIBasedNullSpaces(MLI * mli)74 int MLI_Method_AMGSA::setupSFEIBasedNullSpaces(MLI *mli)
75 {
76 int k, iN, iD, iR, level, mypid, nElems, elemNNodes;
77 int iE, iN2, **elemNodeLists, *elemNodeList1D, totalNNodes;
78 int *partition, localStartRow, localNRows, *newElemNodeList;
79 int eMatDim, newNNodes, *elemNodeList, count, *orderArray;
80 int csrNrows, *csrIA, *csrJA, offset, rowSize, startCol;
81 int rowInd, colInd, colOffset, rowLeng, start, nSubdomains;
82 double **elemMatrices, *elemMat, *csrAA, dAccum;
83 double *eigenR, *eigenI, *eigenV;
84 char which[20], filename[100];;
85 FILE *fp;
86 MPI_Comm comm;
87 MLI_SFEI *sfei;
88 MLI_Matrix *mliAmat;
89 hypre_ParCSRMatrix *hypreA;
90 #ifdef MLI_ARPACK
91 int info;
92 double sigmaR, sigmaI;
93 #endif
94
95 #ifdef MLI_DEBUG_DETAILED
96 printf("MLI_Method_AMGSA::setupSFEIBasedNullSpaces begins.\n");
97 #endif
98
99 /* --------------------------------------------------------------- */
100 /* error checking */
101 /* --------------------------------------------------------------- */
102
103 if (mli == NULL)
104 {
105 printf("MLI_Method_AMGSA::setupSFEIBasedNullSpaces ERROR");
106 printf(" - no mli.\n");
107 exit(1);
108 }
109 level = 0;
110 sfei = mli->getSFEI(level);
111 if (sfei == NULL)
112 {
113 printf("MLI_Method_AMGSA::setupSFEIBasedNullSpaces ERROR");
114 printf(" - no sfei.\n");
115 exit(1);
116 }
117 nSubdomains = sfei->getNumElemBlocks();
118 if (nSubdomains <= 0) return 0;
119
120 /* --------------------------------------------------------------- */
121 /* fetch communicator matrix information */
122 /* --------------------------------------------------------------- */
123
124 comm = getComm();
125 MPI_Comm_rank(comm, &mypid);
126 mliAmat = mli->getSystemMatrix(level);
127 hypreA = (hypre_ParCSRMatrix *) mliAmat->getMatrix();
128 HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA,
129 &partition);
130 localStartRow = partition[mypid];
131 localNRows = partition[mypid+1] - localStartRow;
132 free(partition);
133
134 /* --------------------------------------------------------------- */
135 /* fetch communicator matrix information */
136 /* --------------------------------------------------------------- */
137
138 startCol = 0;
139 if (nullspaceVec_ != NULL)
140 {
141 for (k = 0; k < nullspaceDim_; k++)
142 {
143 dAccum = 0.0;
144 for (iR = 0; iR < nullspaceLen_; iR++)
145 dAccum += mabs(nullspaceVec_[iR+k*nullspaceLen_]);
146 if (dAccum == 0.0) {startCol = k; break;}
147 }
148 if (k == nullspaceDim_) startCol = nullspaceDim_;
149 if ( startCol == nullspaceDim_ ) return 0;
150 }
151
152 /* --------------------------------------------------------------- */
153 /* initialize null space vector and aggregation label */
154 /* --------------------------------------------------------------- */
155
156 //if ( nullspaceVec_ != NULL ) delete [] nullspaceVec_;
157 if (nullspaceVec_ != NULL) hypre_assert( nullspaceLen_ == localNRows );
158 if (nullspaceVec_ == NULL)
159 {
160 nullspaceLen_ = localNRows;
161 nullspaceVec_ = new double[localNRows*nullspaceDim_];
162 }
163 if (saLabels_ == NULL)
164 {
165 saLabels_ = new int*[maxLevels_];
166 for (k = 0; k < maxLevels_; k++) saLabels_[k] = NULL;
167 }
168 if (saLabels_[0] != NULL) delete [] saLabels_[0];
169 saLabels_[0] = new int[localNRows];
170 for (k = 0; k < localNRows; k++) saLabels_[0][k] = -1;
171
172 /* --------------------------------------------------------------- */
173 /* fetch SFEI information (nElems,elemIDs,elemNNodes,elemNodeLists)*/
174 /* --------------------------------------------------------------- */
175
176 if ((printToFile_ & 8) != 0 && nSubdomains == 1)
177 {
178 sprintf(filename,"elemNodeList.%d", mypid);
179 fp = fopen(filename,"w");
180 }
181 else fp = NULL;
182 for (iD = 0; iD < nSubdomains; iD++)
183 {
184 nElems = sfei->getBlockNumElems(iD);
185 if (fp != NULL) fprintf(fp, "%d\n", nElems);
186 elemNNodes = sfei->getBlockElemNEqns(iD);
187 elemNodeLists = sfei->getBlockElemEqnLists(iD);
188 elemMatrices = sfei->getBlockElemStiffness(iD);
189 totalNNodes = nElems * elemNNodes;
190 elemNodeList1D = new int[totalNNodes];
191 count = 0;
192 for (iE = 0; iE < nElems; iE++)
193 for (iN = 0; iN < elemNNodes; iN++)
194 elemNodeList1D[count++] = elemNodeLists[iE][iN];
195
196 /* ------------------------------------------------------ */
197 /* find the number of nodes in local subdomain (including */
198 /* external nodes) */
199 /* ------------------------------------------------------ */
200
201 orderArray = new int[totalNNodes];
202 newElemNodeList = new int[totalNNodes];
203 for ( iN = 0; iN < totalNNodes; iN++ )
204 {
205 orderArray[iN] = iN;
206 newElemNodeList[iN] = elemNodeList1D[iN];
207 }
208 MLI_Utils_IntQSort2(newElemNodeList,orderArray,0,totalNNodes-1);
209 elemNodeList1D[orderArray[0]] = 0;
210 newNNodes = 0;
211 for (iN = 1; iN < totalNNodes; iN++)
212 {
213 if (newElemNodeList[iN] == newElemNodeList[newNNodes])
214 elemNodeList1D[orderArray[iN]] = newNNodes;
215 else
216 {
217 newNNodes++;
218 elemNodeList1D[orderArray[iN]] = newNNodes;
219 newElemNodeList[newNNodes] = newElemNodeList[iN];
220 }
221 }
222 if (totalNNodes > 0) newNNodes++;
223 delete [] orderArray;
224 delete [] newElemNodeList;
225
226 /* -------------------------------------------------------- */
227 /* allocate and initialize subdomain matrix */
228 /* -------------------------------------------------------- */
229
230 eMatDim = elemNNodes;
231 rowSize = elemNNodes * 16;
232 csrNrows = newNNodes;
233 csrIA = new int[csrNrows+1];
234 csrJA = new int[csrNrows*rowSize];
235 hypre_assert( csrJA != NULL );
236 csrAA = new double[csrNrows*rowSize];
237 hypre_assert(csrAA != NULL);
238 for (iR = 0; iR < csrNrows; iR++) csrIA[iR] = iR * rowSize;
239
240 /* -------------------------------------------------------- */
241 /* construct CSR matrix (with holes) */
242 /* -------------------------------------------------------- */
243
244 for (iE = 0; iE < nElems; iE++)
245 {
246 elemMat = elemMatrices[iE];
247 elemNodeList = elemNodeLists[iE];
248 for (iN = 0; iN < elemNNodes; iN++)
249 {
250 colInd = elemNodeList1D[iN+iE*elemNNodes];
251 colOffset = eMatDim * iN;
252 for (iN2 = 0; iN2 < elemNNodes; iN2++)
253 {
254 if (elemMat[colOffset+iN2] != 0.0)
255 {
256 rowInd = elemNodeList1D[iN2+iE*elemNNodes];
257 offset = csrIA[rowInd]++;
258 csrJA[offset] = colInd;
259 csrAA[offset] = elemMat[iN2+colOffset];
260 }
261 }
262 }
263 }
264
265 /* -------------------------------------------------------- */
266 /* compress the CSR matrix */
267 /* -------------------------------------------------------- */
268
269 offset = 0;
270 for (iR = 0; iR < csrNrows; iR++)
271 {
272 if (csrIA[iR] > rowSize * (iR+1))
273 {
274 printf("MLI_Method_AMGSA::setupSFEIBasedNullSpaces ");
275 printf("ERROR : rowSize too large (increase it). \n");
276 printf(" => allowed = %d, actual = %d\n",rowSize,
277 csrIA[iR]-rowSize*iR);
278 exit(1);
279 }
280 rowLeng = csrIA[iR] - iR * rowSize;
281 csrIA[iR] = offset;
282 start = iR * rowSize;
283
284 MLI_Utils_IntQSort2a(&(csrJA[start]),&(csrAA[start]),0,rowLeng-1);
285 count = start;
286 for (k = start+1; k < start+rowLeng; k++)
287 {
288 if (csrJA[k] == csrJA[count]) csrAA[count] += csrAA[k];
289 else
290 {
291 count++;
292 csrJA[count] = csrJA[k];
293 csrAA[count] = csrAA[k];
294 }
295 }
296 if (rowLeng > 0) count = count - start + 1;
297 else count = 0;
298 for (k = 0; k < count; k++)
299 {
300 csrJA[offset+k] = csrJA[start+k];
301 csrAA[offset+k] = csrAA[start+k];
302 }
303 offset += count;
304 }
305 csrIA[csrNrows] = offset;
306
307 /* -------------------------------------------------------- */
308 /* change from base-0 to base-1 indexing for Fortran call */
309 /* -------------------------------------------------------- */
310
311 for (iR = 0; iR < csrIA[csrNrows]; iR++) csrJA[iR]++;
312 for (iR = 0; iR <= csrNrows; iR++) csrIA[iR]++;
313
314 /* -------------------------------------------------------- */
315 /* compute near null spaces */
316 /* -------------------------------------------------------- */
317
318 strcpy(which, "Shift");
319 eigenR = new double[nullspaceDim_+1];
320 eigenI = new double[nullspaceDim_+1];
321 eigenV = new double[csrNrows*(nullspaceDim_+1)];
322 hypre_assert((long) eigenV);
323
324 #ifdef MLI_ARPACK
325 sigmaR = 1.0e-6;
326 sigmaI = 0.0e0;
327 dnstev_(&csrNrows, &nullspaceDim_, which, &sigmaR, &sigmaI,
328 csrIA, csrJA, csrAA, eigenR, eigenI, eigenV, &csrNrows, &info,
329 &arpackTol_);
330 if (outputLevel_ > 2)
331 {
332 printf("%5d : Subdomain %3d (%3d) (size=%d) : \n",mypid,iD,
333 nSubdomains,csrNrows);
334 for (k = 0; k < nullspaceDim_; k++)
335 printf("\t%5d : ARPACK eigenvalues %2d = %16.8e %16.8e\n", mypid,
336 k, eigenR[k], eigenI[k]);
337 }
338 #else
339 printf("MLI_Method_AMGSA::FATAL ERROR : ARPACK not installed.\n");
340 exit(1);
341 #endif
342
343 // strcpy( which, "destroy" );
344 #ifdef MLI_ARPACK
345 // dnstev_(&csrNrows, &nullspaceDim_, which, &sigmaR, &sigmaI,
346 // csrIA, csrJA, csrAA, eigenR, eigenI, eigenV, &csrNrows, &info,
347 // &arpackTol_);
348 #else
349 printf("MLI_Method_AMGSA::FATAL ERROR : ARPACK not installed.\n");
350 exit(1);
351 #endif
352
353 delete [] eigenR;
354 delete [] eigenI;
355 delete [] csrIA;
356 delete [] csrJA;
357 delete [] csrAA;
358
359 /* -------------------------------------------------------- */
360 /* load the null space vectors */
361 /* -------------------------------------------------------- */
362
363 if (nullspaceLen_ == 0) nullspaceLen_ = localNRows;
364 if (nullspaceVec_ == NULL)
365 nullspaceVec_ = new double[nullspaceLen_ * nullspaceDim_];
366 for (iE = 0; iE < nElems; iE++)
367 {
368 elemNodeList = elemNodeLists[iE];
369 for (iN = 0; iN < elemNNodes; iN++)
370 {
371 rowInd = elemNodeList[iN] - localStartRow;
372 if (fp != NULL) fprintf(fp,"%7d ", rowInd+1);
373 if (rowInd >= 0 && rowInd < localNRows)
374 {
375 saLabels_[0][rowInd] = iD;
376 colInd = elemNodeList1D[iE*elemNNodes+iN];
377 for (k = startCol; k < nullspaceDim_; k++)
378 nullspaceVec_[rowInd+k*nullspaceLen_] =
379 eigenV[colInd+k*csrNrows];
380 }
381 }
382 if (fp != NULL) fprintf(fp,"\n");
383 }
384 delete [] elemNodeList1D;
385
386 /* -------------------------------------------------------- */
387 /* clean up */
388 /* -------------------------------------------------------- */
389
390 delete [] eigenV;
391 }
392 if (fp != NULL) fclose(fp);
393
394 if ((printToFile_ & 4) != 0)
395 {
396 sprintf(filename, "eVec.%d", mypid);
397 fp = fopen(filename, "w");
398 fprintf(fp," %d %d\n", nullspaceLen_, nullspaceDim_);
399 for ( iN = 0; iN < nullspaceLen_; iN++ )
400 {
401 for ( k = 0; k < nullspaceDim_; k++ )
402 fprintf(fp,"%17.9e ",nullspaceVec_[nullspaceLen_*k+iN]);
403 fprintf(fp,"\n");
404 }
405 fclose(fp);
406 }
407
408 #ifdef MLI_DEBUG_DETAILED
409 printf("MLI_Method_AMGSA::setupSFEIBasedNullSpaces ends.\n");
410 #endif
411
412 return 0;
413 }
414
415 /***********************************************************************
416 * SET UP AGGREGATES BASED ON FEI SUBDOMAIN INFORMATION
417 ***********************************************************************
418 * set up domain decomposition method by having each subdomain with
419 * the same aggregate number 0
420 * --------------------------------------------------------------------- */
421
setupSFEIBasedAggregates(MLI * mli)422 int MLI_Method_AMGSA::setupSFEIBasedAggregates(MLI *mli)
423 {
424 int iR, iD, level, mypid, *partition, localNRows, *aggrMap;
425 int nSubdomains, nElems, elemNNodes, **elemNodeLists;
426 int iE, iN, localStartRow, nprocs, index, count, *aggrMap2;
427 MPI_Comm comm;
428 MLI_Matrix *mliAmat;
429 hypre_ParCSRMatrix *hypreA;
430 MLI_SFEI *sfei;
431
432 #ifdef MLI_DEBUG_DETAILED
433 printf("MLI_Method_AMGSA:setupSFEIBasedAggregates begins...\n");
434 #endif
435
436 /* --------------------------------------------------------------- */
437 /* error checking */
438 /* --------------------------------------------------------------- */
439
440 if (mli == NULL)
441 {
442 printf("MLI_Method_AMGSA::setupSFEIBasedAggregates ERROR");
443 printf(" - no mli.\n");
444 exit(1);
445 }
446 level = 0;
447 sfei = mli->getSFEI(level);
448 if (sfei == NULL)
449 {
450 printf("MLI_Method_AMGSA::setupSFEIBasedAggregates ERROR");
451 printf(" - no sfei.\n");
452 exit(1);
453 }
454 sfei->freeStiffnessMatrices();
455 nSubdomains = sfei->getNumElemBlocks();
456 if (nSubdomains <= 0) return 0;
457
458 /* --------------------------------------------------------------- */
459 /* fetch communicator and matrix information */
460 /* --------------------------------------------------------------- */
461
462 comm = getComm();
463 MPI_Comm_rank(comm, &mypid);
464 MPI_Comm_size(comm, &nprocs);
465 mliAmat = mli->getSystemMatrix(level);
466 hypreA = (hypre_ParCSRMatrix *) mliAmat->getMatrix();
467 HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA,
468 &partition);
469 localStartRow = partition[mypid];
470 localNRows = partition[mypid+1] - localStartRow;
471 free(partition);
472
473 /* --------------------------------------------------------------- */
474 /* create and fill the aggrMap array */
475 /* --------------------------------------------------------------- */
476
477 aggrMap = new int[localNRows];
478 aggrMap2 = new int[localNRows];
479 for (iR = 0; iR < localNRows; iR++) aggrMap[iR] = -1;
480 if (saDataAux_ != NULL)
481 {
482 index = saDataAux_[0][0] + 1;
483 for (iD = 0; iD < index; iD++) delete [] saDataAux_[iD];
484 delete [] saDataAux_;
485 }
486 saDataAux_ = new int*[nSubdomains+1];
487 saDataAux_[0] = new int[nSubdomains+1];
488 for (iD = 1; iD < nSubdomains+1; iD++) saDataAux_[iD] = NULL;
489 saDataAux_[0][0] = nSubdomains;
490
491 for (iD = 0; iD < nSubdomains; iD++)
492 {
493 for ( iR = 0; iR < localNRows; iR++ ) aggrMap2[iR] = -1;
494 nElems = sfei->getBlockNumElems(iD);
495 elemNNodes = sfei->getBlockElemNEqns(iD);
496 elemNodeLists = sfei->getBlockElemEqnLists(iD);
497 for (iE = 0; iE < nElems; iE++)
498 {
499 for (iN = 0; iN < elemNNodes; iN++)
500 {
501 index = elemNodeLists[iE][iN] - localStartRow;
502 if (index >= 0 && index < localNRows && aggrMap[index] < 0)
503 aggrMap[index] = iD;
504 if (index >= 0 && index < localNRows)
505 aggrMap2[index] = iD;
506 }
507 }
508 count = 0;
509 for (iR = 0; iR < localNRows; iR++) if (aggrMap2[iR] >= 0) count++;
510 saDataAux_[0][iD+1] = count;
511 saDataAux_[iD+1] = new int[count];
512 count = 0;
513 for (iR = 0; iR < localNRows; iR++)
514 if (aggrMap2[iR] >= 0) saDataAux_[iD+1][count++] = iR;
515 }
516 #if 0
517 /* force non-overlapped aggregates */
518 for ( iD = 0; iD < nSubdomains; iD++ )
519 {
520 count = 0;
521 for (iR = 0; iR < localNRows; iR++) if (aggrMap[iR] == iD) count++;
522 saDataAux_[0][iD+1] = count;
523 if (saDataAux_[iD+1] != NULL) delete [] saDataAux_[iD+1];
524 saDataAux_[iD+1] = new int[count];
525 count = 0;
526 for (iR = 0; iR < localNRows; iR++)
527 if (aggrMap[iR] == iD) saDataAux_[iD+1][count++] = iR;
528 }
529 #endif
530 delete [] aggrMap2;
531 saData_[0] = aggrMap;
532 saCounts_[0] = nSubdomains;
533 numLevels_ = 2;
534 minCoarseSize_ = nprocs;
535
536 #ifdef MLI_DEBUG_DETAILED
537 printf("MLI_Method_AMGSA::setupSFEIBasedAggregates ends.\n");
538 #endif
539
540 return 0;
541 }
542
543 /***********************************************************************
544 * set up domain decomposition method by extending the local problem
545 * (based on Bank-Lu-Tong-Vassilevski method but with aggregation)
546 * --------------------------------------------------------------------- */
547
setupExtendedDomainDecomp(MLI * mli)548 int MLI_Method_AMGSA::setupExtendedDomainDecomp(MLI *mli)
549 {
550 MLI_Function *funcPtr;
551
552 /* --------------------------------------------------------------- */
553 /* error checking */
554 /* --------------------------------------------------------------- */
555
556 if (mli == NULL)
557 {
558 printf("MLI_Method_AMGSA::setupExtendedDomainDecomp ERROR");
559 printf(" - no mli.\n");
560 exit(1);
561 }
562
563 /* *************************************************************** */
564 /* creating the local expanded matrix */
565 /* --------------------------------------------------------------- */
566
567 /* --------------------------------------------------------------- */
568 /* fetch communicator and fine matrix information */
569 /* --------------------------------------------------------------- */
570
571 MPI_Comm comm;
572 int mypid, nprocs, level, *partition, ANRows;
573 MLI_Matrix *mli_Amat;
574 hypre_ParCSRMatrix *hypreA;
575
576 comm = getComm();
577 MPI_Comm_rank( comm, &mypid );
578 MPI_Comm_size( comm, &nprocs );
579
580 #ifdef MLI_DEBUG_DETAILED
581 printf("%d : MLI_Method_AMGSA::setupExtendedDomainDecomp begins...\n",mypid);
582 #endif
583
584 level = 0;
585 mli_Amat = mli->getSystemMatrix( level );
586 hypreA = (hypre_ParCSRMatrix *) mli_Amat->getMatrix();
587 HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA,
588 &partition);
589 ANRows = partition[mypid+1] - partition[mypid];
590 free( partition );
591
592 /* --------------------------------------------------------------- */
593 /* first save the nodeDofs and null space information */
594 /* (since genP replaces the null vectors with new ones) */
595 /* --------------------------------------------------------------- */
596
597 int nodeDofs, iD, iD2;
598 double *nullVecs=NULL;
599
600 nodeDofs = currNodeDofs_;
601 nullVecs = new double[nullspaceDim_*ANRows];
602 if (nullspaceVec_ != NULL)
603 {
604 for (iD = 0; iD < nullspaceDim_*ANRows; iD++)
605 nullVecs[iD] = nullspaceVec_[iD];
606 }
607 else
608 {
609 for (iD = 0; iD < nullspaceDim_; iD++)
610 for (iD2 = 0; iD2 < ANRows; iD2++)
611 if (MABS((iD - iD2)) % nullspaceDim_ == 0)
612 nullVecs[iD*ANRows+iD2] = 1.0;
613 else nullVecs[iD*ANRows+iD2] = 0.0;
614 }
615
616 /* --------------------------------------------------------------- */
617 /* create the first coarse grid matrix (the off processor block */
618 /* will be the 2,2 block of my expanded matrix) */
619 /* Note: genP_DD coarsens less on processor boundaries. */
620 /* --------------------------------------------------------------- */
621
622 int *ACPartition, ACNRows, ACStart, *aggrInfo, *bdryData;
623 MLI_Matrix *mli_Pmat, *mli_cAmat;
624 hypre_ParCSRMatrix *hypreAc, *hypreP;
625 hypre_ParCSRCommPkg *commPkg;
626
627 genP_DD(mli_Amat, &mli_Pmat, &aggrInfo, &bdryData);
628 delete [] aggrInfo;
629 hypreP = (hypre_ParCSRMatrix *) mli_Pmat->getMatrix();
630 commPkg = hypre_ParCSRMatrixCommPkg(hypreP);
631 if (commPkg == NULL) hypre_MatvecCommPkgCreate(hypreP);
632
633 MLI_Matrix_ComputePtAP(mli_Pmat, mli_Amat, &mli_cAmat);
634 hypreAc = (hypre_ParCSRMatrix *) mli_cAmat->getMatrix();
635
636 HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreAc,
637 &ACPartition);
638 ACStart = ACPartition[mypid];
639 ACNRows = ACPartition[mypid+1] - ACStart;
640
641 /* --------------------------------------------------------------- */
642 /* fetch communication information for the coarse matrix */
643 /* --------------------------------------------------------------- */
644
645 int nRecvs, *recvProcs, nSends, *sendProcs;
646
647 commPkg = hypre_ParCSRMatrixCommPkg(hypreAc);
648 if ( commPkg == NULL )
649 {
650 hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) hypreAc);
651 commPkg = hypre_ParCSRMatrixCommPkg(hypreAc);
652 }
653 nRecvs = hypre_ParCSRCommPkgNumRecvs(commPkg);
654 recvProcs = hypre_ParCSRCommPkgRecvProcs(commPkg);
655 nSends = hypre_ParCSRCommPkgNumSends(commPkg);
656 sendProcs = hypre_ParCSRCommPkgSendProcs(commPkg);
657
658 /* --------------------------------------------------------------- */
659 /* calculate the size of my local expanded off matrix (this is */
660 /* needed to create the permutation matrix) - the local size of */
661 /* the expanded off matrix should be the total number of rows of */
662 /* my recv neighbors */
663 /* --------------------------------------------------------------- */
664
665 int iP, PENCols, proc, *PEPartition, PECStart, *recvLengs;
666
667 PENCols = 0;
668 if (nRecvs > 0) recvLengs = new int[nRecvs];
669 for (iP = 0; iP < nRecvs; iP++)
670 {
671 proc = recvProcs[iP];
672 PENCols += (ACPartition[proc+1] - ACPartition[proc]);
673 recvLengs[iP] = ACPartition[proc+1] - ACPartition[proc];
674 }
675 PEPartition = new int[nprocs+1];
676 MPI_Allgather(&PENCols,1,MPI_INT,&(PEPartition[1]),1,MPI_INT,comm);
677 PEPartition[0] = 0;
678 for (iP = 2; iP <= nprocs; iP++)
679 PEPartition[iP] += PEPartition[iP-1];
680 PECStart = PEPartition[mypid];
681
682 /* --------------------------------------------------------------- */
683 /* since PE(i,j) = 1 means putting external row i into local row */
684 /* j, and since I may not own row i of PE, I need to tell my */
685 /* neighbor processor who owns row i the value of j. */
686 /* ==> procOffsets */
687 /* --------------------------------------------------------------- */
688
689 int *procOffsets, offset;
690 MPI_Request *mpiRequests;
691 MPI_Status mpiStatus;
692
693 if (nSends > 0)
694 {
695 mpiRequests = new MPI_Request[nSends];
696 procOffsets = new int[nSends];
697 }
698 for (iP = 0; iP < nSends; iP++)
699 MPI_Irecv(&procOffsets[iP],1,MPI_INT,sendProcs[iP],13582,comm,
700 &(mpiRequests[iP]));
701 offset = 0;
702 for (iP = 0; iP < nRecvs; iP++)
703 {
704 MPI_Send(&offset, 1, MPI_INT, recvProcs[iP], 13582, comm);
705 offset += (ACPartition[recvProcs[iP]+1] - ACPartition[recvProcs[iP]]);
706 }
707 for (iP = 0; iP < nSends; iP++)
708 MPI_Wait(&(mpiRequests[iP]), &mpiStatus);
709 if (nSends > 0) delete [] mpiRequests;
710
711 /* --------------------------------------------------------------- */
712 /* create the permutation matrix to gather expanded coarse matrix */
713 /* PE has same number of rows as Ac but it has many more columns */
714 /* --------------------------------------------------------------- */
715
716 int ierr, *rowSizes, *colInds, rowIndex;
717 double *colVals;
718 char paramString[50];
719 MLI_Matrix *mli_PE;
720 HYPRE_IJMatrix IJ_PE;
721 hypre_ParCSRMatrix *hyprePE;
722
723 ierr = HYPRE_IJMatrixCreate(comm,ACStart,ACStart+ACNRows-1,
724 PECStart,PECStart+PENCols-1,&IJ_PE);
725 ierr += HYPRE_IJMatrixSetObjectType(IJ_PE, HYPRE_PARCSR);
726 hypre_assert(!ierr);
727 if (ACNRows > 0) rowSizes = new int[ACNRows];
728 for (iD = 0; iD < ACNRows; iD++) rowSizes[iD] = nSends;
729 ierr = HYPRE_IJMatrixSetRowSizes(IJ_PE, rowSizes);
730 ierr += HYPRE_IJMatrixInitialize(IJ_PE);
731 hypre_assert(!ierr);
732 if (ACNRows > 0) delete [] rowSizes;
733 if (nSends > 0)
734 {
735 colInds = new int[nSends];
736 colVals = new double[nSends];
737 for (iP = 0; iP < nSends; iP++) colVals[iP] = 1.0;
738 }
739 for (iD = 0; iD < ACNRows; iD++)
740 {
741 rowIndex = iD + ACStart;
742 for (iP = 0; iP < nSends; iP++)
743 colInds[iP] = procOffsets[iP] + PEPartition[sendProcs[iP]] + iD;
744 HYPRE_IJMatrixSetValues(IJ_PE, 1, &nSends, (const int *) &rowIndex,
745 (const int *) colInds, (const double *) colVals);
746 }
747 if (nSends > 0)
748 {
749 delete [] colInds;
750 delete [] colVals;
751 delete [] procOffsets;
752 }
753 HYPRE_IJMatrixAssemble(IJ_PE);
754 HYPRE_IJMatrixGetObject(IJ_PE, (void **) &hyprePE);
755 funcPtr = new MLI_Function();
756 MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr);
757 sprintf(paramString, "HYPRE_ParCSR");
758 mli_PE = new MLI_Matrix( (void *) hyprePE, paramString, funcPtr);
759 delete funcPtr;
760 commPkg = hypre_ParCSRMatrixCommPkg(hyprePE);
761 if (commPkg == NULL)
762 hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) hyprePE);
763
764 /* --------------------------------------------------------------- */
765 /* form the expanded coarse matrix (that is, the (2,2) block of my */
766 /* final matrix). Also, form A * P (in order to form the (1,2) and */
767 /* and (2,1) block */
768 /* --------------------------------------------------------------- */
769
770 MLI_Matrix *mli_AExt;
771 hypre_ParCSRMatrix *hypreAExt, *hypreAP;
772
773 MLI_Matrix_ComputePtAP(mli_PE, mli_cAmat, &mli_AExt);
774 hypreAExt = (hypre_ParCSRMatrix *) mli_AExt->getMatrix();
775 hypreP = (hypre_ParCSRMatrix *) mli_Pmat->getMatrix();
776 hypreAP = hypre_ParMatmul(hypreA, hypreP);
777
778 /* --------------------------------------------------------------- */
779 /* coarsen one more time for non-neighbor fine aggregates */
780 /* then create [A_c Aco Poo Po2 */
781 /* trans Po2^T Aoo Po2] */
782 /* --------------------------------------------------------------- */
783
784 #if 0
785 // generate Po2
786 MLI_Matrix *mli_Pmat2;
787 // set up one aggregate per processor
788 //### need to set bdryData for my immediate neighbors to 0
789 genP_Selective(mli_AExt, &mli_Pmat2, PENCols, bdryData);
790 delete [] bdryData;
791
792 // compute Aco Poo Po2
793 hypre_ParCSRMatrix *hypreP2, *hypreP3, *hypreAP2;
794 hypreP2 = (hypre_ParCSRMatrix *) mli_Pmat2->getMatrix();
795 hypreAP2 = hypre_ParMatmul(hypreAP, hypreP2);
796 hypreP3 = hypre_ParMatmul(hypreP, hypreP2);
797
798 // compute Po2^T Aoo Po2
799 MLI_Matrix *mli_AExt2;
800 MLI_Matrix_ComputePtAP(mli_Pmat2, mli_AExt, &mli_AExt2);
801
802 // adjust pointers
803 hypre_ParCSRMatrixDestroy(hypreAP);
804 hypreAP = hypreAP2;
805 delete mli_Pmat;
806 funcPtr = new MLI_Function();
807 MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr);
808 sprintf(paramString, "HYPRE_ParCSR");
809 mli_Pmat = new MLI_Matrix(hypreP3, paramString, funcPtr);
810 delete funcPtr;
811 delete mli_Pmat2;
812 delete mli_AExt;
813 mli_AExt = mli_AExt2;
814 hypreAExt = (hypre_ParCSRMatrix *) mli_AExt->getMatrix();
815 hypreP = (hypre_ParCSRMatrix *) mli_Pmat->getMatrix();
816 #endif
817
818 /* --------------------------------------------------------------- */
819 /* concatenate the local and the (1,2) and (2,2) block */
820 /* --------------------------------------------------------------- */
821
822 int *AdiagI, *AdiagJ, *APoffdI, *APoffdJ, *AEdiagI, *AEdiagJ, index;
823 int *APcmap, newNrows, *newIA, *newJA, newNnz, *auxIA, iR, iC;
824 double *AdiagA, *APoffdA, *AEdiagA, *newAA;
825 hypre_CSRMatrix *Adiag, *APoffd, *AEdiag;
826
827 Adiag = hypre_ParCSRMatrixDiag(hypreA);
828 AdiagI = hypre_CSRMatrixI(Adiag);
829 AdiagJ = hypre_CSRMatrixJ(Adiag);
830 AdiagA = hypre_CSRMatrixData(Adiag);
831 APoffd = hypre_ParCSRMatrixOffd(hypreAP);
832 APoffdI = hypre_CSRMatrixI(APoffd);
833 APoffdJ = hypre_CSRMatrixJ(APoffd);
834 APoffdA = hypre_CSRMatrixData(APoffd);
835 APcmap = hypre_ParCSRMatrixColMapOffd(hypreAP);
836 AEdiag = hypre_ParCSRMatrixDiag(hypreAExt);
837 AEdiagI = hypre_CSRMatrixI(AEdiag);
838 AEdiagJ = hypre_CSRMatrixJ(AEdiag);
839 AEdiagA = hypre_CSRMatrixData(AEdiag);
840 newNrows = ANRows + PENCols;
841 newIA = new int[newNrows+1];
842 newNnz = AdiagI[ANRows] + AEdiagI[PENCols] + 2 * APoffdI[ANRows];
843 newJA = new int[newNnz];
844 newAA = new double[newNnz];
845 auxIA = new int[PENCols];
846 newNnz = 0;
847 newIA[0] = newNnz;
848 for ( iR = 0; iR < PENCols; iR++ ) auxIA[iR] = 0;
849
850 // (1,1) and (1,2) blocks
851 for ( iR = 0; iR < ANRows; iR++ )
852 {
853 for ( iC = AdiagI[iR]; iC < AdiagI[iR+1]; iC++ )
854 {
855 newJA[newNnz] = AdiagJ[iC];
856 newAA[newNnz++] = AdiagA[iC];
857 }
858 for ( iC = APoffdI[iR]; iC < APoffdI[iR+1]; iC++ )
859 {
860 index = APcmap[APoffdJ[iC]];
861 offset = ANRows;
862 for ( iP = 0; iP < nRecvs; iP++ )
863 {
864 if ( index < ACPartition[recvProcs[iP]+1] )
865 {
866 index = index - ACPartition[recvProcs[iP]] + offset;
867 break;
868 }
869 offset += (ACPartition[recvProcs[iP]+1] -
870 ACPartition[recvProcs[iP]]);
871 }
872 newJA[newNnz] = index;
873 newAA[newNnz++] = APoffdA[iC];
874 APoffdJ[iC] = index;
875 auxIA[index-ANRows]++;
876 }
877 newIA[iR+1] = newNnz;
878 }
879
880 // (2,2) block
881 for ( iR = ANRows; iR < ANRows+PENCols; iR++ )
882 {
883 newNnz += auxIA[iR-ANRows];
884 for ( iC = AEdiagI[iR-ANRows]; iC < AEdiagI[iR-ANRows+1]; iC++ )
885 {
886 newJA[newNnz] = AEdiagJ[iC] + ANRows;
887 newAA[newNnz++] = AEdiagA[iC];
888 }
889 newIA[iR+1] = newNnz;
890 }
891
892 // (2,1) block
893 for ( iR = 0; iR < PENCols; iR++ ) auxIA[iR] = 0;
894 for ( iR = 0; iR < ANRows; iR++ )
895 {
896 for ( iC = APoffdI[iR]; iC < APoffdI[iR+1]; iC++ )
897 {
898 index = APoffdJ[iC];
899 offset = newIA[index] + auxIA[index-ANRows];
900 newJA[offset] = iR;
901 newAA[offset] = APoffdA[iC];
902 auxIA[index-ANRows]++;
903 }
904 }
905
906 /* --------------------------------------------------------------- */
907
908 int iZero=0, *newRowSizes;
909 MPI_Comm newMPIComm;
910 HYPRE_IJMatrix IJnewA;
911 hypre_ParCSRMatrix *hypreNewA;
912 MLI_Matrix *mli_newA;
913
914 MPI_Comm_split(comm, mypid, iZero, &newMPIComm);
915 ierr = HYPRE_IJMatrixCreate(newMPIComm,iZero,newNrows-1,iZero,
916 newNrows-1,&IJnewA);
917 ierr += HYPRE_IJMatrixSetObjectType(IJnewA, HYPRE_PARCSR);
918 hypre_assert(!ierr);
919 if ( newNrows > 0 ) newRowSizes = new int[newNrows];
920 for ( iD = 0; iD < newNrows; iD++ )
921 newRowSizes[iD] = newIA[iD+1] - newIA[iD];
922 ierr = HYPRE_IJMatrixSetRowSizes(IJnewA, newRowSizes);
923 ierr += HYPRE_IJMatrixInitialize(IJnewA);
924 hypre_assert(!ierr);
925 for ( iD = 0; iD < newNrows; iD++ )
926 {
927 offset = newIA[iD];
928 HYPRE_IJMatrixSetValues(IJnewA, 1, &newRowSizes[iD], (const int *) &iD,
929 (const int *) &newJA[offset], (const double *) &newAA[offset]);
930 }
931 if ( newNrows > 0 ) delete [] newRowSizes;
932 delete [] newIA;
933 delete [] newJA;
934 delete [] newAA;
935 HYPRE_IJMatrixAssemble(IJnewA);
936 HYPRE_IJMatrixGetObject(IJnewA, (void **) &hypreNewA);
937 sprintf(paramString, "HYPRE_ParCSR" );
938 funcPtr = new MLI_Function();
939 MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr);
940 mli_newA = new MLI_Matrix( (void *) hypreNewA, paramString, funcPtr);
941
942 /* --------------------------------------------------------------- */
943 /* communicate null space vectors */
944 /* --------------------------------------------------------------- */
945
946 int rLength, sLength;
947 double *tmpNullVecs, *newNullVecs;
948
949 if ( PENCols > 0 ) tmpNullVecs = new double[PENCols*nullspaceDim_];
950 if ( nRecvs > 0 ) mpiRequests = new MPI_Request[nRecvs];
951
952 offset = 0;
953 for ( iP = 0; iP < nRecvs; iP++ )
954 {
955 rLength = ACPartition[recvProcs[iP]+1] - ACPartition[recvProcs[iP]];
956 rLength *= nullspaceDim_;
957 MPI_Irecv(&tmpNullVecs[offset],rLength,MPI_DOUBLE,recvProcs[iP],14581,
958 comm,&(mpiRequests[iP]));
959 offset += rLength;
960 }
961 for ( iP = 0; iP < nSends; iP++ )
962 {
963 sLength = ACNRows * nullspaceDim_;
964 MPI_Send(nullVecs, sLength, MPI_DOUBLE, sendProcs[iP], 14581, comm);
965 }
966 for ( iP = 0; iP < nRecvs; iP++ )
967 MPI_Wait( &(mpiRequests[iP]), &mpiStatus );
968 if ( nRecvs > 0 ) delete [] mpiRequests;
969
970 newNullVecs = new double[newNrows*nullspaceDim_];
971 for ( iD = 0; iD < nullspaceDim_; iD++ )
972 for ( iD2 = 0; iD2 < ANRows; iD2++ )
973 newNullVecs[iD*newNrows+iD2] = nullVecs[iD*ANRows+iD2];
974 offset = 0;
975 for ( iP = 0; iP < nRecvs; iP++ )
976 {
977 rLength = ACPartition[recvProcs[iP]+1] - ACPartition[recvProcs[iP]];
978 for ( iD = 0; iD < nullspaceDim_; iD++ )
979 for ( iD2 = 0; iD2 < rLength; iD2++ )
980 newNullVecs[iD*newNrows+iD2+offset] =
981 tmpNullVecs[offset+iD*rLength+iD2];
982 rLength *= nullspaceDim_;
983 offset += rLength;
984 }
985 if ( PENCols > 0 ) delete [] tmpNullVecs;
986
987 /* --------------------------------------------------------------- */
988 /* create new overlapped DD smoother using sequential SuperLU */
989 /* --------------------------------------------------------------- */
990
991 int *sendLengs;
992 char *targv[7];
993 MLI_Solver *smootherPtr;
994 MLI_Matrix *mli_PSmat;
995
996 //sprintf( paramString, "SeqSuperLU" );
997 if (!strcmp(preSmoother_, "CGMLI")) sprintf(paramString, "CGMLI");
998 else sprintf(paramString, "CGAMG");
999 smootherPtr = MLI_Solver_CreateFromName(paramString);
1000 sprintf( paramString, "numSweeps 10000" );
1001 smootherPtr->setParams(paramString, 0, NULL);
1002 sprintf( paramString, "tolerance 1.0e-6" );
1003 smootherPtr->setParams(paramString, 0, NULL);
1004
1005 // restate these lines if aggregation is used for subdomain solve
1006 // delete [] newNullVecs;
1007 //sprintf( paramString, "setNullSpace" );
1008 //targv[0] = (char *) &nullspaceDim_;
1009 //targv[1] = (char *) newNullVecs;
1010 //smootherPtr->setParams(paramString, 2, targv);
1011
1012 // send PSmat and communication information to smoother
1013 if ( nSends > 0 ) sendLengs = new int[nSends];
1014 for ( iP = 0; iP < nSends; iP++ ) sendLengs[iP] = ACNRows;
1015 sprintf( paramString, "setPmat" );
1016 mli_PSmat = mli_Pmat;
1017 targv[0] = (char *) mli_PSmat;
1018 smootherPtr->setParams(paramString, 1, targv);
1019 sprintf( paramString, "setCommData" );
1020 targv[0] = (char *) &nRecvs;
1021 targv[1] = (char *) recvProcs;
1022 targv[2] = (char *) recvLengs;
1023 targv[3] = (char *) &nSends;
1024 targv[4] = (char *) sendProcs;
1025 targv[5] = (char *) sendLengs;
1026 targv[6] = (char *) &comm;
1027 smootherPtr->setParams(paramString, 7, targv);
1028 if ( nSends > 0 ) delete [] sendLengs;
1029
1030 smootherPtr->setup(mli_newA);
1031 mli->setSmoother( level, MLI_SMOOTHER_PRE, smootherPtr );
1032
1033 /* --------------------------------------------------------------- */
1034 /* create prolongation and coarse grid operators */
1035 /* --------------------------------------------------------------- */
1036
1037 MLI_Solver *csolvePtr;
1038 MLI_Matrix *mli_Rmat;
1039
1040 delete mli_cAmat;
1041
1042 // set up one aggregate per processor
1043 saCounts_[0] = 1;
1044 if (saData_[0] != NULL) delete [] saData_[0];
1045 saData_[0] = new int[ANRows];
1046 for ( iD = 0; iD < ANRows; iD++ ) saData_[0][iD] = 0;
1047
1048 // restore nullspace changed by the last genP
1049 currNodeDofs_ = nodeDofs;
1050 if ( nullspaceVec_ != NULL ) delete [] nullspaceVec_;
1051 nullspaceVec_ = new double[nullspaceDim_*ANRows];
1052 for ( iD = 0; iD < nullspaceDim_*ANRows; iD++)
1053 nullspaceVec_[iD] = nullVecs[iD];
1054
1055 // create prolongation and coarse grid operators
1056 genP(mli_Amat, &mli_Pmat, saCounts_[0], saData_[0]);
1057 MLI_Matrix_ComputePtAP(mli_Pmat, mli_Amat, &mli_cAmat);
1058 mli->setSystemMatrix(level+1, mli_cAmat);
1059 mli->setProlongation(level+1, mli_Pmat);
1060 sprintf(paramString, "HYPRE_ParCSRT");
1061 mli_Rmat = new MLI_Matrix(mli_Pmat->getMatrix(), paramString, NULL);
1062 mli->setRestriction(level, mli_Rmat);
1063
1064 /* --------------------------------------------------------------- */
1065 /* setup coarse solver */
1066 /* --------------------------------------------------------------- */
1067
1068 strcpy( paramString, "SuperLU" );
1069 csolvePtr = MLI_Solver_CreateFromName( paramString );
1070 csolvePtr->setup(mli_cAmat);
1071 mli->setCoarseSolve(csolvePtr);
1072
1073 /* --------------------------------------------------------------- */
1074 /* clean up */
1075 /* --------------------------------------------------------------- */
1076
1077 free( ACPartition );
1078 delete [] PEPartition;
1079 delete [] auxIA;
1080 HYPRE_IJMatrixDestroy(IJ_PE);
1081 delete mli_AExt;
1082 delete [] nullVecs;
1083 delete mli_PE;
1084
1085 #ifdef MLI_DEBUG_DETAILED
1086 printf("%d : MLI_Method_AMGSA::setupExtendedDomainDecomp ends.\n",mypid);
1087 #endif
1088
1089 level = 2;
1090 return (level);
1091 }
1092
1093 // ************************************************************************
1094 // Purpose : Given Amat, perform preferential coarsening (small aggregates
1095 // near processor boundaries and create the corresponding Pmat
1096 // (called by setupExtendedDomainDecomp)
1097 // ------------------------------------------------------------------------
1098
genP_DD(MLI_Matrix * mli_Amat,MLI_Matrix ** PmatOut,int ** eqn2aggrOut,int ** bdryDataOut)1099 double MLI_Method_AMGSA::genP_DD(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut,
1100 int **eqn2aggrOut, int **bdryDataOut)
1101 {
1102 int mypid, nprocs, *partition, AStartRow, AEndRow, ALocalNRows;
1103 int blkSize, naggr, *node2aggr, ierr, PLocalNCols, PStartCol;
1104 int PLocalNRows, PStartRow, *eqn2aggr, irow, jcol, ig, *bdryData;
1105 int *PCols, maxAggSize, *aggCntArray, index, **aggIndArray;
1106 int aggSize, nzcnt, *rowLengths, rowNum, *colInd;
1107 double **PVecs, *newNull, *qArray, *rArray, *colVal;
1108 char paramString[200];
1109 HYPRE_IJMatrix IJPmat;
1110 hypre_ParCSRMatrix *Amat, *A2mat, *Pmat;
1111 MLI_Matrix *mli_A2mat=NULL, *mli_Pmat;
1112 MPI_Comm comm;
1113 hypre_ParCSRCommPkg *commPkg;
1114 MLI_Function *funcPtr;
1115
1116 /*-----------------------------------------------------------------
1117 * fetch matrix and machine information
1118 *-----------------------------------------------------------------*/
1119
1120 Amat = (hypre_ParCSRMatrix *) mli_Amat->getMatrix();
1121 comm = hypre_ParCSRMatrixComm(Amat);
1122 MPI_Comm_rank(comm,&mypid);
1123 MPI_Comm_size(comm,&nprocs);
1124
1125 /*-----------------------------------------------------------------
1126 * fetch other matrix information
1127 *-----------------------------------------------------------------*/
1128
1129 HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) Amat,&partition);
1130 AStartRow = partition[mypid];
1131 AEndRow = partition[mypid+1] - 1;
1132 ALocalNRows = AEndRow - AStartRow + 1;
1133 free( partition );
1134
1135 /*-----------------------------------------------------------------
1136 * reduce Amat based on the block size information (if nodeDofs_ > 1)
1137 *-----------------------------------------------------------------*/
1138
1139 blkSize = currNodeDofs_;
1140 if (blkSize > 1) MLI_Matrix_Compress(mli_Amat, blkSize, &mli_A2mat);
1141 else mli_A2mat = mli_Amat;
1142 A2mat = (hypre_ParCSRMatrix *) mli_A2mat->getMatrix();
1143
1144 /*-----------------------------------------------------------------
1145 * modify minimum aggregate size, if needed
1146 *-----------------------------------------------------------------*/
1147
1148 minAggrSize_ = nullspaceDim_ / currNodeDofs_;
1149 if ( minAggrSize_ <= 1 ) minAggrSize_ = 2;
1150
1151 /*-----------------------------------------------------------------
1152 * perform coarsening (small aggregates on processor boundaries)
1153 * 10/2005 : add bdryData for secondary aggregation
1154 *-----------------------------------------------------------------*/
1155
1156 coarsenGraded(A2mat, &naggr, &node2aggr, &bdryData);
1157 if (blkSize > 1 && mli_A2mat != NULL) delete mli_A2mat;
1158 if (blkSize > 1)
1159 {
1160 (*bdryDataOut) = new int[ALocalNRows];
1161 for (irow = 0; irow < ALocalNRows; irow++)
1162 (*bdryDataOut)[irow] = bdryData[irow/blkSize];
1163 delete [] bdryData;
1164 }
1165 else (*bdryDataOut) = bdryData;
1166
1167 /*-----------------------------------------------------------------
1168 * fetch the coarse grid information and instantiate P
1169 *-----------------------------------------------------------------*/
1170
1171 PLocalNCols = naggr * nullspaceDim_;
1172 MLI_Utils_GenPartition(comm, PLocalNCols, &partition);
1173 PStartCol = partition[mypid];
1174 free( partition );
1175 PLocalNRows = ALocalNRows;
1176 PStartRow = AStartRow;
1177 ierr = HYPRE_IJMatrixCreate(comm,PStartRow,PStartRow+PLocalNRows-1,
1178 PStartCol,PStartCol+PLocalNCols-1,&IJPmat);
1179 ierr = HYPRE_IJMatrixSetObjectType(IJPmat, HYPRE_PARCSR);
1180 hypre_assert(!ierr);
1181
1182 /*-----------------------------------------------------------------
1183 * expand the aggregation information if block size > 1 ==> eqn2aggr
1184 *-----------------------------------------------------------------*/
1185
1186 if ( blkSize > 1 )
1187 {
1188 eqn2aggr = new int[ALocalNRows];
1189 for ( irow = 0; irow < ALocalNRows; irow++ )
1190 eqn2aggr[irow] = node2aggr[irow/blkSize];
1191 delete [] node2aggr;
1192 }
1193 else eqn2aggr = node2aggr;
1194
1195 /*-----------------------------------------------------------------
1196 * create a compact form for the null space vectors
1197 * (get ready to perform QR on them)
1198 *-----------------------------------------------------------------*/
1199
1200 PVecs = new double*[nullspaceDim_];
1201 PCols = new int[PLocalNRows];
1202 for (irow = 0; irow < nullspaceDim_; irow++)
1203 PVecs[irow] = new double[PLocalNRows];
1204 for ( irow = 0; irow < PLocalNRows; irow++ )
1205 {
1206 if ( eqn2aggr[irow] >= 0 )
1207 PCols[irow] = PStartCol + eqn2aggr[irow] * nullspaceDim_;
1208 else PCols[irow] = PStartCol + (-eqn2aggr[irow]-1) * nullspaceDim_;
1209 if ( nullspaceVec_ != NULL )
1210 {
1211 for ( jcol = 0; jcol < nullspaceDim_; jcol++ )
1212 PVecs[jcol][irow] = nullspaceVec_[jcol*PLocalNRows+irow];
1213 }
1214 else
1215 {
1216 for ( jcol = 0; jcol < nullspaceDim_; jcol++ )
1217 {
1218 if (irow % nullspaceDim_ == jcol) PVecs[jcol][irow] = 1.0;
1219 else PVecs[jcol][irow] = 0.0;
1220 }
1221 }
1222 }
1223
1224 /*-----------------------------------------------------------------
1225 * perform QR for null space
1226 *-----------------------------------------------------------------*/
1227
1228 newNull = NULL;
1229 if ( PLocalNRows > 0 )
1230 {
1231 /* ------ count the size of each aggregate ------ */
1232
1233 aggCntArray = new int[naggr];
1234 for ( ig = 0; ig < naggr; ig++ ) aggCntArray[ig] = 0;
1235 for ( irow = 0; irow < PLocalNRows; irow++ )
1236 if ( eqn2aggr[irow] >= 0 ) aggCntArray[eqn2aggr[irow]]++;
1237 else aggCntArray[(-eqn2aggr[irow]-1)]++;
1238 maxAggSize = 0;
1239 for ( ig = 0; ig < naggr; ig++ )
1240 if (aggCntArray[ig] > maxAggSize) maxAggSize = aggCntArray[ig];
1241
1242 /* ------ register which equation is in which aggregate ------ */
1243
1244 aggIndArray = new int*[naggr];
1245 for ( ig = 0; ig < naggr; ig++ )
1246 {
1247 aggIndArray[ig] = new int[aggCntArray[ig]];
1248 aggCntArray[ig] = 0;
1249 }
1250 for ( irow = 0; irow < PLocalNRows; irow++ )
1251 {
1252 index = eqn2aggr[irow];
1253 if ( index >= 0 )
1254 aggIndArray[index][aggCntArray[index]++] = irow;
1255 else
1256 aggIndArray[-index-1][aggCntArray[-index-1]++] = irow;
1257 }
1258
1259 /* ------ allocate storage for QR factorization ------ */
1260
1261 qArray = new double[maxAggSize * nullspaceDim_];
1262 rArray = new double[nullspaceDim_ * nullspaceDim_];
1263 newNull = new double[naggr*nullspaceDim_*nullspaceDim_];
1264
1265 /* ------ perform QR on each aggregate ------ */
1266
1267 for ( ig = 0; ig < naggr; ig++ )
1268 {
1269 aggSize = aggCntArray[ig];
1270
1271 if ( aggSize < nullspaceDim_ )
1272 {
1273 printf("Aggregation ERROR : underdetermined system in QR.\n");
1274 printf(" error on Proc %d\n", mypid);
1275 printf(" error on aggr %d (%d)\n", ig, naggr);
1276 printf(" aggr size is %d\n", aggSize);
1277 exit(1);
1278 }
1279
1280 /* ------ put data into the temporary array ------ */
1281
1282 for ( jcol = 0; jcol < aggSize; jcol++ )
1283 {
1284 for ( irow = 0; irow < nullspaceDim_; irow++ )
1285 qArray[aggSize*irow+jcol] = PVecs[irow][aggIndArray[ig][jcol]];
1286 }
1287
1288 /* ------ call QR function ------ */
1289
1290 /*
1291 if ( currLevel_ < (numLevels_-1) )
1292 {
1293 info = MLI_Utils_QR(qArray, rArray, aggSize, nullspaceDim_);
1294 if (info != 0)
1295 {
1296 printf("%4d : Aggregation WARNING : QR returns non-zero for\n",
1297 mypid);
1298 printf(" aggregate %d, size = %d, info = %d\n",ig,aggSize,info);
1299 }
1300 }
1301 else
1302 {
1303 for ( irow = 0; irow < nullspaceDim_; irow++ )
1304 {
1305 dtemp = 0.0;
1306 for ( jcol = 0; jcol < aggSize; jcol++ )
1307 dtemp += qArray[aggSize*irow+jcol]*qArray[aggSize*irow+jcol];
1308 dtemp = 1.0 / sqrt(dtemp);
1309 for ( jcol = 0; jcol < aggSize; jcol++ )
1310 qArray[aggSize*irow+jcol] *= dtemp;
1311 }
1312 }
1313 */
1314
1315 /* ------ after QR, put the R into the next null space ------ */
1316
1317 /*
1318 for ( jcol = 0; jcol < nullspaceDim_; jcol++ )
1319 for ( irow = 0; irow < nullspaceDim_; irow++ )
1320 newNull[ig*nullspaceDim_+jcol+irow*naggr*nullspaceDim_] =
1321 rArray[jcol+nullspaceDim_*irow];
1322 */
1323 for ( jcol = 0; jcol < nullspaceDim_; jcol++ )
1324 for ( irow = 0; irow < nullspaceDim_; irow++ )
1325 if ( irow == jcol )
1326 newNull[ig*nullspaceDim_+jcol+irow*naggr*nullspaceDim_] = 1.0;
1327 else
1328 newNull[ig*nullspaceDim_+jcol+irow*naggr*nullspaceDim_] = 0.0;
1329
1330 /* ------ put the P to PVecs ------ */
1331
1332 for ( jcol = 0; jcol < aggSize; jcol++ )
1333 {
1334 for ( irow = 0; irow < nullspaceDim_; irow++ )
1335 {
1336 index = aggIndArray[ig][jcol];
1337 PVecs[irow][index] = qArray[ irow*aggSize + jcol ];
1338 }
1339 }
1340 }
1341 for ( ig = 0; ig < naggr; ig++ ) delete [] aggIndArray[ig];
1342 delete [] aggIndArray;
1343 delete [] aggCntArray;
1344 delete [] qArray;
1345 delete [] rArray;
1346 }
1347 if ( nullspaceVec_ != NULL ) delete [] nullspaceVec_;
1348 nullspaceVec_ = newNull;
1349
1350 /*-----------------------------------------------------------------
1351 * initialize Pmat
1352 *-----------------------------------------------------------------*/
1353
1354 rowLengths = new int[PLocalNRows];
1355 for ( irow = 0; irow < PLocalNRows; irow++ )
1356 rowLengths[irow] = nullspaceDim_;
1357 ierr = HYPRE_IJMatrixSetRowSizes(IJPmat, rowLengths);
1358 ierr = HYPRE_IJMatrixInitialize(IJPmat);
1359 hypre_assert(!ierr);
1360 delete [] rowLengths;
1361
1362 /*--------------------------------------------------------------------
1363 * load and assemble Pmat
1364 *--------------------------------------------------------------------*/
1365
1366 colInd = new int[nullspaceDim_];
1367 colVal = new double[nullspaceDim_];
1368 for ( irow = 0; irow < PLocalNRows; irow++ )
1369 {
1370 if ( PCols[irow] >= 0 )
1371 {
1372 nzcnt = 0;
1373 for ( jcol = 0; jcol < nullspaceDim_; jcol++ )
1374 {
1375 if ( PVecs[jcol][irow] != 0.0 )
1376 {
1377 colInd[nzcnt] = PCols[irow] + jcol;
1378 colVal[nzcnt++] = PVecs[jcol][irow];
1379 }
1380 }
1381 rowNum = PStartRow + irow;
1382 HYPRE_IJMatrixSetValues(IJPmat, 1, &nzcnt,
1383 (const int *) &rowNum, (const int *) colInd,
1384 (const double *) colVal);
1385 }
1386 }
1387 ierr = HYPRE_IJMatrixAssemble(IJPmat);
1388 hypre_assert( !ierr );
1389 HYPRE_IJMatrixGetObject(IJPmat, (void **) &Pmat);
1390 hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) Pmat);
1391 commPkg = hypre_ParCSRMatrixCommPkg(Amat);
1392 if (!commPkg) hypre_MatvecCommPkgCreate(Amat);
1393 HYPRE_IJMatrixSetObjectType(IJPmat, -1);
1394 HYPRE_IJMatrixDestroy( IJPmat );
1395 delete [] colInd;
1396 delete [] colVal;
1397
1398 /*-----------------------------------------------------------------
1399 * clean up
1400 *-----------------------------------------------------------------*/
1401
1402 if ( PCols != NULL ) delete [] PCols;
1403 if ( PVecs != NULL )
1404 {
1405 for (irow = 0; irow < nullspaceDim_; irow++)
1406 if ( PVecs[irow] != NULL ) delete [] PVecs[irow];
1407 delete [] PVecs;
1408 }
1409 (*eqn2aggrOut) = eqn2aggr;
1410
1411 /*-----------------------------------------------------------------
1412 * set up and return Pmat
1413 *-----------------------------------------------------------------*/
1414
1415 funcPtr = new MLI_Function();
1416 MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr);
1417 sprintf(paramString, "HYPRE_ParCSR" );
1418 mli_Pmat = new MLI_Matrix( Pmat, paramString, funcPtr );
1419 (*PmatOut) = mli_Pmat;
1420 delete funcPtr;
1421 return 0.0;
1422 }
1423
1424 // *********************************************************************
1425 // graded coarsening scheme (Given a graph, aggregate on the local subgraph
1426 // but give smaller aggregate near processor boundaries)
1427 // (called by setupExtendedDomainDecomp/genP_DD)
1428 // ---------------------------------------------------------------------
1429
coarsenGraded(hypre_ParCSRMatrix * hypreG,int * mliAggrLeng,int ** mliAggrArray,int ** bdryData)1430 int MLI_Method_AMGSA::coarsenGraded(hypre_ParCSRMatrix *hypreG,
1431 int *mliAggrLeng, int **mliAggrArray, int **bdryData)
1432 {
1433 MPI_Comm comm;
1434 int mypid, nprocs, *partition, startRow, endRow, maxInd;
1435 int localNRows, naggr=0, *node2aggr, *aggrSizes, nUndone;
1436 int irow, jcol, colNum, rowLeng, *cols, globalNRows;
1437 int *nodeStat, selectFlag, nSelected=0, nNotSelected=0, count;
1438 int ibuf[2], itmp[2], *bdrySet, localMinSize, index, maxCount;
1439 int *GDiagI, *GDiagJ, *GOffdI;
1440 double maxVal, *vals, *GDiagA;
1441 hypre_CSRMatrix *GDiag, *GOffd;
1442 #ifdef MLI_DEBUG_DETAILED
1443 int rowNum;
1444 #endif
1445
1446 /*-----------------------------------------------------------------
1447 * fetch machine and matrix parameters
1448 *-----------------------------------------------------------------*/
1449
1450 comm = hypre_ParCSRMatrixComm(hypreG);
1451 MPI_Comm_rank(comm,&mypid);
1452 MPI_Comm_size(comm,&nprocs);
1453 HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreG,
1454 &partition);
1455 startRow = partition[mypid];
1456 endRow = partition[mypid+1] - 1;
1457 free( partition );
1458 localNRows = endRow - startRow + 1;
1459 MPI_Allreduce(&localNRows, &globalNRows, 1, MPI_INT, MPI_SUM, comm);
1460 if ( mypid == 0 && outputLevel_ > 1 )
1461 {
1462 printf("\t*** Aggregation(G) : total nodes to aggregate = %d\n",
1463 globalNRows);
1464 }
1465 GDiag = hypre_ParCSRMatrixDiag(hypreG);
1466 GDiagI = hypre_CSRMatrixI(GDiag);
1467 GDiagJ = hypre_CSRMatrixJ(GDiag);
1468 GDiagA = hypre_CSRMatrixData(GDiag);
1469 GOffd = hypre_ParCSRMatrixOffd(hypreG);
1470 GOffdI = hypre_CSRMatrixI(GOffd);
1471
1472 /*-----------------------------------------------------------------
1473 * allocate status arrays
1474 *-----------------------------------------------------------------*/
1475
1476 if (localNRows > 0)
1477 {
1478 node2aggr = new int[localNRows];
1479 aggrSizes = new int[localNRows];
1480 nodeStat = new int[localNRows];
1481 bdrySet = new int[localNRows];
1482 for ( irow = 0; irow < localNRows; irow++ )
1483 {
1484 aggrSizes[irow] = 0;
1485 node2aggr[irow] = -1;
1486 nodeStat[irow] = MLI_METHOD_AMGSA_READY;
1487 bdrySet[irow] = 0;
1488 }
1489 }
1490 else node2aggr = aggrSizes = nodeStat = bdrySet = NULL;
1491
1492 /*-----------------------------------------------------------------
1493 * search for zero rows and rows near the processor boundaries
1494 *-----------------------------------------------------------------*/
1495
1496 for ( irow = 0; irow < localNRows; irow++ )
1497 {
1498 rowLeng = GDiagI[irow+1] - GDiagI[irow];
1499 if (rowLeng <= 0)
1500 {
1501 nodeStat[irow] = MLI_METHOD_AMGSA_NOTSELECTED;
1502 nNotSelected++;
1503 }
1504 if (GOffdI != NULL && (GOffdI[irow+1] - GOffdI[irow]) > 0)
1505 bdrySet[irow] = 1;
1506 }
1507
1508 /*-----------------------------------------------------------------
1509 * Phase 0 : form aggregates near the boundary (aggregates should be
1510 * as small as possible, but has to satisfy min size.
1511 * The algorithm gives preference to nodes on boundaries.)
1512 *-----------------------------------------------------------------*/
1513
1514 localMinSize = nullspaceDim_ / currNodeDofs_ * 2;
1515 for ( irow = 0; irow < localNRows; irow++ )
1516 {
1517 if ( nodeStat[irow] == MLI_METHOD_AMGSA_READY && bdrySet[irow] == 1 )
1518 {
1519 nSelected++;
1520 node2aggr[irow] = - naggr - 1;
1521 aggrSizes[naggr] = 1;
1522 nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED2;
1523 if (localMinSize > 1)
1524 {
1525 rowLeng = GDiagI[irow+1] - GDiagI[irow];
1526 cols = &(GDiagJ[GDiagI[irow]]);
1527 jcol = 0;
1528 while ( aggrSizes[naggr] < localMinSize && jcol < rowLeng )
1529 {
1530 index = cols[jcol];
1531 if ( index != irow && (bdrySet[index] != 1) &&
1532 nodeStat[irow] == MLI_METHOD_AMGSA_READY )
1533 {
1534 node2aggr[index] = naggr;
1535 aggrSizes[naggr]++;
1536 nodeStat[index] = MLI_METHOD_AMGSA_SELECTED2;
1537 }
1538 jcol++;
1539 }
1540 }
1541 naggr++;
1542 }
1543 }
1544 itmp[0] = naggr;
1545 itmp[1] = nSelected;
1546 if (outputLevel_ > 1) MPI_Allreduce(itmp, ibuf, 2, MPI_INT, MPI_SUM, comm);
1547 if ( mypid == 0 && outputLevel_ > 1 )
1548 {
1549 printf("\t*** Aggregation(G) P0 : no. of aggregates = %d\n",ibuf[0]);
1550 printf("\t*** Aggregation(G) P0 : no. nodes aggregated = %d\n",ibuf[1]);
1551 }
1552
1553 // search for seed node with largest number of neighbors
1554
1555 maxInd = -1;
1556 maxCount = -1;
1557 for ( irow = 0; irow < localNRows; irow++ )
1558 {
1559 if ( nodeStat[irow] == MLI_METHOD_AMGSA_READY )
1560 {
1561 count = 0;
1562 rowLeng = GDiagI[irow+1] - GDiagI[irow];
1563 cols = &(GDiagJ[GDiagI[irow]]);
1564 for ( jcol = 0; jcol < rowLeng; jcol++ )
1565 {
1566 index = cols[jcol];
1567 if ( nodeStat[index] == MLI_METHOD_AMGSA_READY ) count++;
1568 }
1569 if ( count > maxCount )
1570 {
1571 maxCount = count;
1572 maxInd = irow;
1573 }
1574 }
1575 }
1576
1577 /*-----------------------------------------------------------------
1578 * Phase 1 : form aggregates
1579 *-----------------------------------------------------------------*/
1580
1581 for ( irow = 0; irow < localNRows; irow++ )
1582 {
1583 if ( nodeStat[irow] == MLI_METHOD_AMGSA_READY )
1584 {
1585 rowLeng = GDiagI[irow+1] - GDiagI[irow];
1586 cols = &(GDiagJ[GDiagI[irow]]);
1587 selectFlag = 1;
1588 count = 1;
1589 for ( jcol = 0; jcol < rowLeng; jcol++ )
1590 {
1591 colNum = cols[jcol];
1592 if ( nodeStat[colNum] != MLI_METHOD_AMGSA_READY )
1593 {
1594 selectFlag = 0;
1595 break;
1596 }
1597 else count++;
1598 }
1599 if ( selectFlag == 1 && count >= minAggrSize_ )
1600 {
1601 aggrSizes[naggr] = 0;
1602 for ( jcol = 0; jcol < rowLeng; jcol++ )
1603 {
1604 colNum = cols[jcol];
1605 node2aggr[colNum] = naggr;
1606 nodeStat[colNum] = MLI_METHOD_AMGSA_SELECTED;
1607 aggrSizes[naggr]++;
1608 nSelected++;
1609 }
1610 naggr++;
1611 }
1612 }
1613 }
1614 itmp[0] = naggr;
1615 itmp[1] = nSelected;
1616 if (outputLevel_ > 1) MPI_Allreduce(itmp, ibuf, 2, MPI_INT, MPI_SUM, comm);
1617 if ( mypid == 0 && outputLevel_ > 1 )
1618 {
1619 printf("\t*** Aggregation(G) P1 : no. of aggregates = %d\n",ibuf[0]);
1620 printf("\t*** Aggregation(G) P1 : no. nodes aggregated = %d\n",ibuf[1]);
1621 }
1622
1623 /*-----------------------------------------------------------------
1624 * Phase 2 : put the rest into one of the existing aggregates
1625 *-----------------------------------------------------------------*/
1626
1627 if ( (nSelected+nNotSelected) < localNRows )
1628 {
1629 for ( irow = 0; irow < localNRows; irow++ )
1630 {
1631 if ( nodeStat[irow] == MLI_METHOD_AMGSA_READY )
1632 {
1633 rowLeng = GDiagI[irow+1] - GDiagI[irow];
1634 cols = &(GDiagJ[GDiagI[irow]]);
1635 vals = &(GDiagA[GDiagI[irow]]);
1636 maxInd = -1;
1637 maxVal = 0.0;
1638 for ( jcol = 0; jcol < rowLeng; jcol++ )
1639 {
1640 colNum = cols[jcol];
1641 if ( nodeStat[colNum] == MLI_METHOD_AMGSA_SELECTED )
1642 {
1643 if (vals[jcol] > maxVal)
1644 {
1645 maxInd = colNum;
1646 maxVal = vals[jcol];
1647 }
1648 }
1649 }
1650 if ( maxInd != -1 )
1651 {
1652 node2aggr[irow] = node2aggr[maxInd];
1653 nodeStat[irow] = MLI_METHOD_AMGSA_PENDING;
1654 aggrSizes[node2aggr[maxInd]]++;
1655 }
1656 }
1657 }
1658 for ( irow = 0; irow < localNRows; irow++ )
1659 {
1660 if ( nodeStat[irow] == MLI_METHOD_AMGSA_PENDING )
1661 {
1662 nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED;
1663 nSelected++;
1664 }
1665 }
1666 }
1667 itmp[0] = naggr;
1668 itmp[1] = nSelected;
1669 if (outputLevel_ > 1) MPI_Allreduce(itmp,ibuf,2,MPI_INT,MPI_SUM,comm);
1670 if ( mypid == 0 && outputLevel_ > 1 )
1671 {
1672 printf("\t*** Aggregation(G) P2 : no. of aggregates = %d\n",ibuf[0]);
1673 printf("\t*** Aggregation(G) P2 : no. nodes aggregated = %d\n",ibuf[1]);
1674 }
1675
1676 /*-----------------------------------------------------------------
1677 * Phase 3 : form aggregates for all other rows
1678 *-----------------------------------------------------------------*/
1679
1680 if ( (nSelected+nNotSelected) < localNRows )
1681 {
1682 for ( irow = 0; irow < localNRows; irow++ )
1683 {
1684 if ( nodeStat[irow] == MLI_METHOD_AMGSA_READY )
1685 {
1686 rowLeng = GDiagI[irow+1] - GDiagI[irow];
1687 cols = &(GDiagJ[GDiagI[irow]]);
1688 count = 1;
1689 for ( jcol = 0; jcol < rowLeng; jcol++ )
1690 {
1691 colNum = cols[jcol];
1692 if ( nodeStat[colNum] == MLI_METHOD_AMGSA_READY ) count++;
1693 }
1694 if ( count > 1 && count >= minAggrSize_ )
1695 {
1696 aggrSizes[naggr] = 0;
1697 for ( jcol = 0; jcol < rowLeng; jcol++ )
1698 {
1699 colNum = cols[jcol];
1700 if ( nodeStat[colNum] == MLI_METHOD_AMGSA_READY )
1701 {
1702 nodeStat[colNum] = MLI_METHOD_AMGSA_SELECTED;
1703 node2aggr[colNum] = naggr;
1704 aggrSizes[naggr]++;
1705 nSelected++;
1706 }
1707 }
1708 naggr++;
1709 }
1710 }
1711 }
1712 }
1713 itmp[0] = naggr;
1714 itmp[1] = nSelected;
1715 if (outputLevel_ > 1) MPI_Allreduce(itmp,ibuf,2,MPI_INT,MPI_SUM,comm);
1716 if ( mypid == 0 && outputLevel_ > 1 )
1717 {
1718 printf("\t*** Aggregation(G) P3 : no. of aggregates = %d\n",ibuf[0]);
1719 printf("\t*** Aggregation(G) P3 : no. nodes aggregated = %d\n",ibuf[1]);
1720 }
1721
1722 /*-----------------------------------------------------------------
1723 * Phase 4 : finally put all lone rows into some neighbor aggregate
1724 *-----------------------------------------------------------------*/
1725
1726 if ( (nSelected+nNotSelected) < localNRows )
1727 {
1728 for ( irow = 0; irow < localNRows; irow++ )
1729 {
1730 if ( nodeStat[irow] == MLI_METHOD_AMGSA_READY )
1731 {
1732 rowLeng = GDiagI[irow+1] - GDiagI[irow];
1733 cols = &(GDiagJ[GDiagI[irow]]);
1734 for ( jcol = 0; jcol < rowLeng; jcol++ )
1735 {
1736 colNum = cols[jcol];
1737 if ( nodeStat[colNum] == MLI_METHOD_AMGSA_SELECTED )
1738 {
1739 node2aggr[irow] = node2aggr[colNum];
1740 nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED;
1741 aggrSizes[node2aggr[colNum]]++;
1742 nSelected++;
1743 break;
1744 }
1745 }
1746 }
1747 }
1748 }
1749 itmp[0] = naggr;
1750 itmp[1] = nSelected;
1751 if ( outputLevel_ > 1 ) MPI_Allreduce(itmp,ibuf,2,MPI_INT,MPI_SUM,comm);
1752 if ( mypid == 0 && outputLevel_ > 1 )
1753 {
1754 printf("\t*** Aggregation(G) P4 : no. of aggregates = %d\n",ibuf[0]);
1755 printf("\t*** Aggregation(G) P4 : no. nodes aggregated = %d\n",ibuf[1]);
1756 }
1757 nUndone = localNRows - nSelected - nNotSelected;
1758 //if ( nUndone > 0 )
1759 if ( nUndone > localNRows )
1760 {
1761 count = nUndone / minAggrSize_;
1762 if ( count == 0 ) count = 1;
1763 count += naggr;
1764 irow = jcol = 0;
1765 while ( nUndone > 0 )
1766 {
1767 if ( nodeStat[irow] == MLI_METHOD_AMGSA_READY )
1768 {
1769 node2aggr[irow] = naggr;
1770 nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED;
1771 nUndone--;
1772 nSelected++;
1773 jcol++;
1774 if ( jcol >= minAggrSize_ && naggr < count-1 )
1775 {
1776 jcol = 0;
1777 naggr++;
1778 }
1779 }
1780 irow++;
1781 }
1782 naggr = count;
1783 }
1784 itmp[0] = naggr;
1785 itmp[1] = nSelected;
1786 if ( outputLevel_ > 1 ) MPI_Allreduce(itmp,ibuf,2,MPI_INT,MPI_SUM,comm);
1787 if ( mypid == 0 && outputLevel_ > 1 )
1788 {
1789 printf("\t*** Aggregation(G) P5 : no. of aggregates = %d\n",ibuf[0]);
1790 printf("\t*** Aggregation(G) P5 : no. nodes aggregated = %d\n",ibuf[1]);
1791 }
1792
1793 /*-----------------------------------------------------------------
1794 * diagnostics
1795 *-----------------------------------------------------------------*/
1796
1797 if ( (nSelected+nNotSelected) < localNRows )
1798 {
1799 #ifdef MLI_DEBUG_DETAILED
1800 for ( irow = 0; irow < localNRows; irow++ )
1801 {
1802 if ( nodeStat[irow] == MLI_METHOD_AMGSA_READY )
1803 {
1804 rowNum = startRow + irow;
1805 printf("%5d : unaggregated node = %8d\n", mypid, rowNum);
1806 hypre_ParCSRMatrixGetRow(hypreG,rowNum,&rowLeng,&cols,NULL);
1807 for ( jcol = 0; jcol < rowLeng; jcol++ )
1808 {
1809 colNum = cols[jcol];
1810 printf("ERROR : neighbor of unselected node %9d = %9d\n",
1811 rowNum, colNum);
1812 }
1813 hypre_ParCSRMatrixRestoreRow(hypreG,rowNum,&rowLeng,&cols,NULL);
1814 }
1815 }
1816 #else
1817 printf("%5d : ERROR - not all nodes aggregated.\n", mypid);
1818 exit(1);
1819 #endif
1820 }
1821
1822 /*-----------------------------------------------------------------
1823 * clean up and initialize the output arrays
1824 *-----------------------------------------------------------------*/
1825
1826 if (localNRows > 0) delete [] aggrSizes;
1827 if (localNRows > 0) delete [] nodeStat;
1828 if (localNRows == 1 && naggr == 0)
1829 {
1830 node2aggr[0] = 0;
1831 naggr = 1;
1832 }
1833 (*bdryData) = bdrySet;
1834 (*mliAggrArray) = node2aggr;
1835 (*mliAggrLeng) = naggr;
1836 return 0;
1837 }
1838
1839 // ************************************************************************
1840 // Purpose : Given Amat, perform preferential coarsening (no coarsening
1841 // when the bdry flag = 1
1842 // (called by setupExtendedDomainDecomp)
1843 // ------------------------------------------------------------------------
1844
genP_Selective(MLI_Matrix * mli_Amat,MLI_Matrix ** PmatOut,int ALen,int * bdryData)1845 double MLI_Method_AMGSA::genP_Selective(MLI_Matrix *mli_Amat,
1846 MLI_Matrix **PmatOut, int ALen, int *bdryData)
1847 {
1848 int mypid, nprocs, *partition, AStartRow, AEndRow, ALocalNRows;
1849 int blkSize, naggr, *node2aggr, ierr, PLocalNCols, PStartCol;
1850 int PLocalNRows, PStartRow, *eqn2aggr, irow, jcol, ig;
1851 int *PCols, maxAggSize, *aggCntArray, index, **aggIndArray;
1852 int aggSize, nzcnt, *rowLengths, rowNum, *colInd, *compressBdryData;
1853 double **PVecs, *newNull, *qArray, *rArray, *colVal;
1854 char paramString[200];
1855 HYPRE_IJMatrix IJPmat;
1856 hypre_ParCSRMatrix *Amat, *A2mat, *Pmat;
1857 MLI_Matrix *mli_A2mat=NULL, *mli_Pmat;
1858 MPI_Comm comm;
1859 hypre_ParCSRCommPkg *commPkg;
1860 MLI_Function *funcPtr;
1861
1862 /*-----------------------------------------------------------------
1863 * fetch matrix information
1864 *-----------------------------------------------------------------*/
1865
1866 Amat = (hypre_ParCSRMatrix *) mli_Amat->getMatrix();
1867 comm = hypre_ParCSRMatrixComm(Amat);
1868 MPI_Comm_rank(comm,&mypid);
1869 MPI_Comm_size(comm,&nprocs);
1870 HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) Amat,&partition);
1871 AStartRow = partition[mypid];
1872 AEndRow = partition[mypid+1] - 1;
1873 ALocalNRows = AEndRow - AStartRow + 1;
1874 free(partition);
1875
1876 /*-----------------------------------------------------------------
1877 * reduce Amat based on the block size information (if nodeDofs_ > 1)
1878 *-----------------------------------------------------------------*/
1879
1880 blkSize = currNodeDofs_;
1881 if (blkSize > 1) MLI_Matrix_Compress(mli_Amat, blkSize, &mli_A2mat);
1882 else mli_A2mat = mli_Amat;
1883 A2mat = (hypre_ParCSRMatrix *) mli_A2mat->getMatrix();
1884
1885 /*-----------------------------------------------------------------
1886 * modify minimum aggregate size, if needed
1887 *-----------------------------------------------------------------*/
1888
1889 minAggrSize_ = nullspaceDim_ / currNodeDofs_;
1890 if (minAggrSize_ <= 1) minAggrSize_ = 2;
1891
1892 /*-----------------------------------------------------------------
1893 * perform coarsening (no aggregation on processor boundaries)
1894 *-----------------------------------------------------------------*/
1895
1896 if (blkSize > 1)
1897 {
1898 compressBdryData = new int[ALocalNRows/blkSize];
1899 for (irow = 0; irow < ALocalNRows; irow+=blkSize)
1900 compressBdryData[irow/blkSize] = bdryData[irow];
1901 }
1902 else compressBdryData = bdryData;
1903
1904 coarsenSelective(A2mat, &naggr, &node2aggr, bdryData);
1905 if (blkSize > 1 && mli_A2mat != NULL) delete mli_A2mat;
1906 if (blkSize > 1) delete [] compressBdryData;
1907
1908 /*-----------------------------------------------------------------
1909 * fetch the coarse grid information and instantiate P
1910 *-----------------------------------------------------------------*/
1911
1912 PLocalNCols = naggr * nullspaceDim_;
1913 MLI_Utils_GenPartition(comm, PLocalNCols, &partition);
1914 PStartCol = partition[mypid];
1915 free( partition );
1916 PLocalNRows = ALocalNRows;
1917 PStartRow = AStartRow;
1918 ierr = HYPRE_IJMatrixCreate(comm,PStartRow,PStartRow+PLocalNRows-1,
1919 PStartCol,PStartCol+PLocalNCols-1,&IJPmat);
1920 ierr = HYPRE_IJMatrixSetObjectType(IJPmat, HYPRE_PARCSR);
1921 hypre_assert(!ierr);
1922
1923 /*-----------------------------------------------------------------
1924 * expand the aggregation information if block size > 1 ==> eqn2aggr
1925 *-----------------------------------------------------------------*/
1926
1927 if ( blkSize > 1 )
1928 {
1929 eqn2aggr = new int[ALocalNRows];
1930 for ( irow = 0; irow < ALocalNRows; irow++ )
1931 eqn2aggr[irow] = node2aggr[irow/blkSize];
1932 delete [] node2aggr;
1933 }
1934 else eqn2aggr = node2aggr;
1935
1936 /*-----------------------------------------------------------------
1937 * create a compact form for the null space vectors
1938 * (get ready to perform QR on them)
1939 *-----------------------------------------------------------------*/
1940
1941 PVecs = new double*[nullspaceDim_];
1942 PCols = new int[PLocalNRows];
1943 for (irow = 0; irow < nullspaceDim_; irow++)
1944 PVecs[irow] = new double[PLocalNRows];
1945 for ( irow = 0; irow < PLocalNRows; irow++ )
1946 {
1947 if ( eqn2aggr[irow] >= 0 )
1948 PCols[irow] = PStartCol + eqn2aggr[irow] * nullspaceDim_;
1949 else PCols[irow] = PStartCol + (-eqn2aggr[irow]-1) * nullspaceDim_;
1950 if ( nullspaceVec_ != NULL )
1951 {
1952 for ( jcol = 0; jcol < nullspaceDim_; jcol++ )
1953 PVecs[jcol][irow] = nullspaceVec_[jcol*PLocalNRows+irow];
1954 }
1955 else
1956 {
1957 for ( jcol = 0; jcol < nullspaceDim_; jcol++ )
1958 {
1959 if (irow % nullspaceDim_ == jcol) PVecs[jcol][irow] = 1.0;
1960 else PVecs[jcol][irow] = 0.0;
1961 }
1962 }
1963 }
1964
1965 /*-----------------------------------------------------------------
1966 * perform QR for null space
1967 *-----------------------------------------------------------------*/
1968
1969 newNull = NULL;
1970 if ( PLocalNRows > 0 )
1971 {
1972 /* ------ count the size of each aggregate ------ */
1973
1974 aggCntArray = new int[naggr];
1975 for ( ig = 0; ig < naggr; ig++ ) aggCntArray[ig] = 0;
1976 for ( irow = 0; irow < PLocalNRows; irow++ )
1977 if ( eqn2aggr[irow] >= 0 ) aggCntArray[eqn2aggr[irow]]++;
1978 else aggCntArray[(-eqn2aggr[irow]-1)]++;
1979 maxAggSize = 0;
1980 for ( ig = 0; ig < naggr; ig++ )
1981 if (aggCntArray[ig] > maxAggSize) maxAggSize = aggCntArray[ig];
1982
1983 /* ------ register which equation is in which aggregate ------ */
1984
1985 aggIndArray = new int*[naggr];
1986 for ( ig = 0; ig < naggr; ig++ )
1987 {
1988 aggIndArray[ig] = new int[aggCntArray[ig]];
1989 aggCntArray[ig] = 0;
1990 }
1991 for ( irow = 0; irow < PLocalNRows; irow++ )
1992 {
1993 index = eqn2aggr[irow];
1994 if ( index >= 0 )
1995 aggIndArray[index][aggCntArray[index]++] = irow;
1996 else
1997 aggIndArray[-index-1][aggCntArray[-index-1]++] = irow;
1998 }
1999
2000 /* ------ allocate storage for QR factorization ------ */
2001
2002 qArray = new double[maxAggSize * nullspaceDim_];
2003 rArray = new double[nullspaceDim_ * nullspaceDim_];
2004 newNull = new double[naggr*nullspaceDim_*nullspaceDim_];
2005
2006 /* ------ perform QR on each aggregate ------ */
2007
2008 for ( ig = 0; ig < naggr; ig++ )
2009 {
2010 aggSize = aggCntArray[ig];
2011
2012 if ( aggSize < nullspaceDim_ )
2013 {
2014 printf("Aggregation ERROR : underdetermined system in QR.\n");
2015 printf(" error on Proc %d\n", mypid);
2016 printf(" error on aggr %d (%d)\n", ig, naggr);
2017 printf(" aggr size is %d\n", aggSize);
2018 exit(1);
2019 }
2020
2021 /* ------ put data into the temporary array ------ */
2022
2023 for ( jcol = 0; jcol < aggSize; jcol++ )
2024 {
2025 for ( irow = 0; irow < nullspaceDim_; irow++ )
2026 qArray[aggSize*irow+jcol] = PVecs[irow][aggIndArray[ig][jcol]];
2027 }
2028
2029 /* ------ after QR, put the R into the next null space ------ */
2030
2031 for ( jcol = 0; jcol < nullspaceDim_; jcol++ )
2032 for ( irow = 0; irow < nullspaceDim_; irow++ )
2033 if ( irow == jcol )
2034 newNull[ig*nullspaceDim_+jcol+irow*naggr*nullspaceDim_]=1.0;
2035 else
2036 newNull[ig*nullspaceDim_+jcol+irow*naggr*nullspaceDim_]=0.0;
2037
2038 /* ------ put the P to PVecs ------ */
2039
2040 for ( jcol = 0; jcol < aggSize; jcol++ )
2041 {
2042 for ( irow = 0; irow < nullspaceDim_; irow++ )
2043 {
2044 index = aggIndArray[ig][jcol];
2045 PVecs[irow][index] = qArray[ irow*aggSize + jcol ];
2046 }
2047 }
2048 }
2049 for ( ig = 0; ig < naggr; ig++ ) delete [] aggIndArray[ig];
2050 delete [] aggIndArray;
2051 delete [] aggCntArray;
2052 delete [] qArray;
2053 delete [] rArray;
2054 }
2055 if ( nullspaceVec_ != NULL ) delete [] nullspaceVec_;
2056 nullspaceVec_ = newNull;
2057
2058 /*-----------------------------------------------------------------
2059 * initialize Pmat
2060 *-----------------------------------------------------------------*/
2061
2062 rowLengths = new int[PLocalNRows];
2063 for ( irow = 0; irow < PLocalNRows; irow++ )
2064 rowLengths[irow] = nullspaceDim_;
2065 ierr = HYPRE_IJMatrixSetRowSizes(IJPmat, rowLengths);
2066 ierr = HYPRE_IJMatrixInitialize(IJPmat);
2067 hypre_assert(!ierr);
2068 delete [] rowLengths;
2069
2070 /*--------------------------------------------------------------------
2071 * load and assemble Pmat
2072 *--------------------------------------------------------------------*/
2073
2074 colInd = new int[nullspaceDim_];
2075 colVal = new double[nullspaceDim_];
2076 for ( irow = 0; irow < PLocalNRows; irow++ )
2077 {
2078 if ( PCols[irow] >= 0 )
2079 {
2080 nzcnt = 0;
2081 for ( jcol = 0; jcol < nullspaceDim_; jcol++ )
2082 {
2083 if ( PVecs[jcol][irow] != 0.0 )
2084 {
2085 colInd[nzcnt] = PCols[irow] + jcol;
2086 colVal[nzcnt++] = PVecs[jcol][irow];
2087 }
2088 }
2089 rowNum = PStartRow + irow;
2090 HYPRE_IJMatrixSetValues(IJPmat, 1, &nzcnt,
2091 (const int *) &rowNum, (const int *) colInd,
2092 (const double *) colVal);
2093 }
2094 }
2095 ierr = HYPRE_IJMatrixAssemble(IJPmat);
2096 hypre_assert( !ierr );
2097 HYPRE_IJMatrixGetObject(IJPmat, (void **) &Pmat);
2098 hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) Pmat);
2099 commPkg = hypre_ParCSRMatrixCommPkg(Amat);
2100 if (!commPkg) hypre_MatvecCommPkgCreate(Amat);
2101 HYPRE_IJMatrixSetObjectType(IJPmat, -1);
2102 HYPRE_IJMatrixDestroy( IJPmat );
2103 delete [] colInd;
2104 delete [] colVal;
2105
2106 /*-----------------------------------------------------------------
2107 * clean up
2108 *-----------------------------------------------------------------*/
2109
2110 if (PCols != NULL) delete [] PCols;
2111 if (PVecs != NULL)
2112 {
2113 for (irow = 0; irow < nullspaceDim_; irow++)
2114 if (PVecs[irow] != NULL) delete [] PVecs[irow];
2115 delete [] PVecs;
2116 }
2117 delete [] eqn2aggr;
2118
2119 /*-----------------------------------------------------------------
2120 * set up and return Pmat
2121 *-----------------------------------------------------------------*/
2122
2123 funcPtr = new MLI_Function();
2124 MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr);
2125 sprintf(paramString, "HYPRE_ParCSR" );
2126 mli_Pmat = new MLI_Matrix( Pmat, paramString, funcPtr );
2127 (*PmatOut) = mli_Pmat;
2128 delete funcPtr;
2129 return 0.0;
2130 }
2131
2132 // *********************************************************************
2133 // selective coarsening scheme (Given a graph, aggregate on the local
2134 // subgraph but no aggregation near processor boundaries)
2135 // (called by setupExtendedDomainDecomp/genP_Selective)
2136 // ---------------------------------------------------------------------
2137
coarsenSelective(hypre_ParCSRMatrix * hypreG,int * naggrOut,int ** aggrInfoOut,int * bdryData)2138 int MLI_Method_AMGSA::coarsenSelective(hypre_ParCSRMatrix *hypreG,
2139 int *naggrOut, int **aggrInfoOut, int *bdryData)
2140 {
2141 MPI_Comm comm;
2142 int mypid, nprocs, *partition, startRow, endRow, maxInd;
2143 int localNRows, naggr=0, *node2aggr, *aggrSizes, nUndone;
2144 int irow, jcol, colNum, rowLeng, *cols, globalNRows;
2145 int *nodeStat, selectFlag, nSelected=0, nNotSelected=0, count;
2146 int *GDiagI, *GDiagJ;
2147 double maxVal, *vals, *GDiagA;
2148 hypre_CSRMatrix *GDiag;
2149 #ifdef MLI_DEBUG_DETAILED
2150 int rowNum;
2151 #endif
2152
2153 /*-----------------------------------------------------------------
2154 * fetch machine and matrix parameters
2155 *-----------------------------------------------------------------*/
2156
2157 comm = hypre_ParCSRMatrixComm(hypreG);
2158 MPI_Comm_rank(comm,&mypid);
2159 MPI_Comm_size(comm,&nprocs);
2160 HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreG,
2161 &partition);
2162 startRow = partition[mypid];
2163 endRow = partition[mypid+1] - 1;
2164 free( partition );
2165 localNRows = endRow - startRow + 1;
2166 MPI_Allreduce(&localNRows, &globalNRows, 1, MPI_INT, MPI_SUM, comm);
2167 if ( mypid == 0 && outputLevel_ > 1 )
2168 {
2169 printf("\t*** Aggregation(G) : total nodes to aggregate = %d\n",
2170 globalNRows);
2171 }
2172 GDiag = hypre_ParCSRMatrixDiag(hypreG);
2173 GDiagI = hypre_CSRMatrixI(GDiag);
2174 GDiagJ = hypre_CSRMatrixJ(GDiag);
2175 GDiagA = hypre_CSRMatrixData(GDiag);
2176
2177 /*-----------------------------------------------------------------
2178 * allocate status arrays
2179 *-----------------------------------------------------------------*/
2180
2181 if (localNRows > 0)
2182 {
2183 node2aggr = new int[localNRows];
2184 aggrSizes = new int[localNRows];
2185 nodeStat = new int[localNRows];
2186 for ( irow = 0; irow < localNRows; irow++ )
2187 {
2188 if (bdryData[irow] == 1)
2189 {
2190 nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED;
2191 node2aggr[irow] = naggr++;
2192 aggrSizes[irow] = 1;
2193 nSelected++;
2194 }
2195 else
2196 {
2197 nodeStat[irow] = MLI_METHOD_AMGSA_READY;
2198 node2aggr[irow] = -1;
2199 aggrSizes[irow] = 0;
2200 }
2201 }
2202 }
2203 else node2aggr = aggrSizes = nodeStat = NULL;
2204
2205 /*-----------------------------------------------------------------
2206 * search for zero rows and rows near the processor boundaries
2207 *-----------------------------------------------------------------*/
2208
2209 for ( irow = 0; irow < localNRows; irow++ )
2210 {
2211 rowLeng = GDiagI[irow+1] - GDiagI[irow];
2212 if (rowLeng <= 0)
2213 {
2214 nodeStat[irow] = MLI_METHOD_AMGSA_NOTSELECTED;
2215 nNotSelected++;
2216 }
2217 }
2218
2219 /*-----------------------------------------------------------------
2220 * Phase 1 : form aggregates
2221 *-----------------------------------------------------------------*/
2222
2223 for (irow = 0; irow < localNRows; irow++)
2224 {
2225 if (nodeStat[irow] == MLI_METHOD_AMGSA_READY)
2226 {
2227 rowLeng = GDiagI[irow+1] - GDiagI[irow];
2228 cols = &(GDiagJ[GDiagI[irow]]);
2229 selectFlag = 1;
2230 count = 1;
2231 for ( jcol = 0; jcol < rowLeng; jcol++ )
2232 {
2233 colNum = cols[jcol];
2234 if (nodeStat[colNum] != MLI_METHOD_AMGSA_READY)
2235 {
2236 selectFlag = 0;
2237 break;
2238 }
2239 else count++;
2240 }
2241 if (selectFlag == 1 && count >= minAggrSize_)
2242 {
2243 aggrSizes[naggr] = 0;
2244 for ( jcol = 0; jcol < rowLeng; jcol++ )
2245 {
2246 colNum = cols[jcol];
2247 node2aggr[colNum] = naggr;
2248 nodeStat[colNum] = MLI_METHOD_AMGSA_SELECTED;
2249 aggrSizes[naggr]++;
2250 nSelected++;
2251 }
2252 naggr++;
2253 }
2254 }
2255 }
2256
2257 /*-----------------------------------------------------------------
2258 * Phase 2 : put the rest into one of the existing aggregates
2259 *-----------------------------------------------------------------*/
2260
2261 if ((nSelected+nNotSelected) < localNRows)
2262 {
2263 for (irow = 0; irow < localNRows; irow++)
2264 {
2265 if (nodeStat[irow] == MLI_METHOD_AMGSA_READY)
2266 {
2267 rowLeng = GDiagI[irow+1] - GDiagI[irow];
2268 cols = &(GDiagJ[GDiagI[irow]]);
2269 vals = &(GDiagA[GDiagI[irow]]);
2270 maxInd = -1;
2271 maxVal = 0.0;
2272 for (jcol = 0; jcol < rowLeng; jcol++)
2273 {
2274 colNum = cols[jcol];
2275 if (nodeStat[colNum] == MLI_METHOD_AMGSA_SELECTED)
2276 {
2277 if (vals[jcol] > maxVal)
2278 {
2279 maxInd = colNum;
2280 maxVal = vals[jcol];
2281 }
2282 }
2283 }
2284 if (maxInd != -1)
2285 {
2286 node2aggr[irow] = node2aggr[maxInd];
2287 nodeStat[irow] = MLI_METHOD_AMGSA_PENDING;
2288 aggrSizes[node2aggr[maxInd]]++;
2289 }
2290 }
2291 }
2292 for (irow = 0; irow < localNRows; irow++)
2293 {
2294 if (nodeStat[irow] == MLI_METHOD_AMGSA_PENDING)
2295 {
2296 nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED;
2297 nSelected++;
2298 }
2299 }
2300 }
2301
2302 /*-----------------------------------------------------------------
2303 * Phase 3 : form aggregates for all other rows
2304 *-----------------------------------------------------------------*/
2305
2306 if ((nSelected+nNotSelected) < localNRows)
2307 {
2308 for (irow = 0; irow < localNRows; irow++)
2309 {
2310 if (nodeStat[irow] == MLI_METHOD_AMGSA_READY)
2311 {
2312 rowLeng = GDiagI[irow+1] - GDiagI[irow];
2313 cols = &(GDiagJ[GDiagI[irow]]);
2314 count = 1;
2315 for (jcol = 0; jcol < rowLeng; jcol++)
2316 {
2317 colNum = cols[jcol];
2318 if (nodeStat[colNum] == MLI_METHOD_AMGSA_READY) count++;
2319 }
2320 if (count > 1 && count >= minAggrSize_)
2321 {
2322 aggrSizes[naggr] = 0;
2323 for (jcol = 0; jcol < rowLeng; jcol++)
2324 {
2325 colNum = cols[jcol];
2326 if (nodeStat[colNum] == MLI_METHOD_AMGSA_READY)
2327 {
2328 nodeStat[colNum] = MLI_METHOD_AMGSA_SELECTED;
2329 node2aggr[colNum] = naggr;
2330 aggrSizes[naggr]++;
2331 nSelected++;
2332 }
2333 }
2334 naggr++;
2335 }
2336 }
2337 }
2338 }
2339
2340 /*-----------------------------------------------------------------
2341 * Phase 4 : finally put all lone rows into some neighbor aggregate
2342 *-----------------------------------------------------------------*/
2343
2344 if ((nSelected+nNotSelected) < localNRows)
2345 {
2346 for (irow = 0; irow < localNRows; irow++)
2347 {
2348 if (nodeStat[irow] == MLI_METHOD_AMGSA_READY)
2349 {
2350 rowLeng = GDiagI[irow+1] - GDiagI[irow];
2351 cols = &(GDiagJ[GDiagI[irow]]);
2352 for (jcol = 0; jcol < rowLeng; jcol++)
2353 {
2354 colNum = cols[jcol];
2355 if (nodeStat[colNum] == MLI_METHOD_AMGSA_SELECTED)
2356 {
2357 node2aggr[irow] = node2aggr[colNum];
2358 nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED;
2359 aggrSizes[node2aggr[colNum]]++;
2360 nSelected++;
2361 break;
2362 }
2363 }
2364 }
2365 }
2366 }
2367 nUndone = localNRows - nSelected - nNotSelected;
2368 //if ( nUndone > 0 )
2369 if ( nUndone > localNRows )
2370 {
2371 count = nUndone / minAggrSize_;
2372 if ( count == 0 ) count = 1;
2373 count += naggr;
2374 irow = jcol = 0;
2375 while ( nUndone > 0 )
2376 {
2377 if ( nodeStat[irow] == MLI_METHOD_AMGSA_READY )
2378 {
2379 node2aggr[irow] = naggr;
2380 nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED;
2381 nUndone--;
2382 nSelected++;
2383 jcol++;
2384 if ( jcol >= minAggrSize_ && naggr < count-1 )
2385 {
2386 jcol = 0;
2387 naggr++;
2388 }
2389 }
2390 irow++;
2391 }
2392 naggr = count;
2393 }
2394
2395 /*-----------------------------------------------------------------
2396 * diagnostics
2397 *-----------------------------------------------------------------*/
2398
2399 if ((nSelected+nNotSelected) < localNRows)
2400 {
2401 #ifdef MLI_DEBUG_DETAILED
2402 for (irow = 0; irow < localNRows; irow++)
2403 {
2404 if (nodeStat[irow] == MLI_METHOD_AMGSA_READY)
2405 {
2406 rowNum = startRow + irow;
2407 printf("%5d : unaggregated node = %8d\n", mypid, rowNum);
2408 hypre_ParCSRMatrixGetRow(hypreG,rowNum,&rowLeng,&cols,NULL);
2409 for ( jcol = 0; jcol < rowLeng; jcol++ )
2410 {
2411 colNum = cols[jcol];
2412 printf("ERROR : neighbor of unselected node %9d = %9d\n",
2413 rowNum, colNum);
2414 }
2415 hypre_ParCSRMatrixRestoreRow(hypreG,rowNum,&rowLeng,&cols,NULL);
2416 }
2417 }
2418 #else
2419 printf("%5d : ERROR - not all nodes aggregated.\n", mypid);
2420 exit(1);
2421 #endif
2422 }
2423
2424 /*-----------------------------------------------------------------
2425 * clean up and initialize the output arrays
2426 *-----------------------------------------------------------------*/
2427
2428 if (localNRows > 0) delete [] aggrSizes;
2429 if (localNRows > 0) delete [] nodeStat;
2430 if (localNRows == 1 && naggr == 0)
2431 {
2432 node2aggr[0] = 0;
2433 naggr = 1;
2434 }
2435 (*aggrInfoOut) = node2aggr;
2436 (*naggrOut) = naggr;
2437 return 0;
2438 }
2439
2440 //**********************************************************************
2441 // set up domain decomposition method by extending the local problem
2442 // (A simplified version of setupExtendedDomainDecomp using inefficient
2443 // method - just for testing only)
2444 // ---------------------------------------------------------------------
2445
setupExtendedDomainDecomp2(MLI * mli)2446 int MLI_Method_AMGSA::setupExtendedDomainDecomp2(MLI *mli)
2447 {
2448 MLI_Function *funcPtr;
2449 int nRecvs, *recvProcs, nSends, *sendProcs, ierr, *rowSizes;
2450 int *colInds, rowIndex, iP;
2451 int *recvLengs, mypid, nprocs, level, *Apartition, ANRows;
2452 int nodeDofs, iD, iD2, AStart, AExtNRows;
2453 double *nullVecs=NULL, *colVals;
2454 char paramString[50];
2455 MPI_Comm comm;
2456 MLI_Matrix *mli_Amat;
2457 hypre_ParCSRMatrix *hypreA;
2458 hypre_ParCSRCommPkg *commPkg;
2459
2460 /* --------------------------------------------------------------- */
2461 /* error checking */
2462 /* --------------------------------------------------------------- */
2463
2464 if (mli == NULL)
2465 {
2466 printf("MLI_Method_AMGSA::setupExtendedDomainDecomp2 ERROR");
2467 printf(" - no mli.\n");
2468 exit(1);
2469 }
2470
2471 /* --------------------------------------------------------------- */
2472 /* fetch communicator and fine matrix information */
2473 /* --------------------------------------------------------------- */
2474
2475 comm = getComm();
2476 MPI_Comm_rank(comm, &mypid);
2477 MPI_Comm_size(comm, &nprocs);
2478
2479 #ifdef MLI_DEBUG_DETAILED
2480 printf("%d : AMGSA::setupExtendedDomainDecomp2 begins...\n",mypid);
2481 #endif
2482
2483 level = 0;
2484 mli_Amat = mli->getSystemMatrix(level);
2485 hypreA = (hypre_ParCSRMatrix *) mli_Amat->getMatrix();
2486 HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA,
2487 &Apartition);
2488 AStart = Apartition[mypid];
2489 ANRows = Apartition[mypid+1] - AStart;
2490
2491 /* --------------------------------------------------------------- */
2492 /* first save the nodeDofs and null space information since it */
2493 /* will be destroyed soon and needs to be recovered later */
2494 /* --------------------------------------------------------------- */
2495
2496 nodeDofs = currNodeDofs_;
2497 nullVecs = new double[nullspaceDim_*ANRows];
2498 if (nullspaceVec_ != NULL)
2499 {
2500 for (iD = 0; iD < nullspaceDim_*ANRows; iD++)
2501 nullVecs[iD] = nullspaceVec_[iD];
2502 }
2503 else
2504 {
2505 for (iD = 0; iD < nullspaceDim_; iD++)
2506 for (iD2 = 0; iD2 < ANRows; iD2++)
2507 if (MABS((iD - iD2)) % nullspaceDim_ == 0)
2508 nullVecs[iD*ANRows+iD2] = 1.0;
2509 else nullVecs[iD*ANRows+iD2] = 0.0;
2510 }
2511
2512 /* --------------------------------------------------------------- */
2513 /* get my neighbor processor information */
2514 /* --------------------------------------------------------------- */
2515
2516 commPkg = hypre_ParCSRMatrixCommPkg(hypreA);
2517 if (commPkg == NULL)
2518 {
2519 hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) hypreA);
2520 commPkg = hypre_ParCSRMatrixCommPkg(hypreA);
2521 }
2522 nRecvs = hypre_ParCSRCommPkgNumRecvs(commPkg);
2523 recvProcs = hypre_ParCSRCommPkgRecvProcs(commPkg);
2524 nSends = hypre_ParCSRCommPkgNumSends(commPkg);
2525 sendProcs = hypre_ParCSRCommPkgSendProcs(commPkg);
2526 if (nRecvs > 0) recvLengs = new int[nRecvs];
2527
2528 /* --------------------------------------------------------------- */
2529 /* calculate the size of my expanded matrix AExt which is the sum */
2530 /* of the rows of my neighbors (and mine also) */
2531 /* --------------------------------------------------------------- */
2532
2533 AExtNRows = 0;
2534 for (iP = 0; iP < nRecvs; iP++)
2535 {
2536 recvLengs[iP] = Apartition[recvProcs[iP]+1] - Apartition[recvProcs[iP]];
2537 AExtNRows += recvLengs[iP];
2538 }
2539
2540 /* --------------------------------------------------------------- */
2541 /* communicate processor offsets for AExt (needed to create */
2542 /* partition) */
2543 /* --------------------------------------------------------------- */
2544
2545 int *AExtpartition = new int[nprocs+1];
2546 MPI_Allgather(&AExtNRows, 1, MPI_INT, &AExtpartition[1], 1, MPI_INT, comm);
2547 AExtpartition[0] = 0;
2548 for (iP = 1; iP < nprocs; iP++)
2549 AExtpartition[iP+1] = AExtpartition[iP+1] + AExtpartition[iP];
2550
2551 /* --------------------------------------------------------------- */
2552 /* create QExt (permutation matrix for getting local AExt) */
2553 /* --------------------------------------------------------------- */
2554
2555 int QExtCStart = AExtpartition[mypid];
2556 int QExtNCols = AExtpartition[mypid+1] - QExtCStart;
2557 int QExtNRows = ANRows;
2558 int QExtRStart = AStart;
2559 HYPRE_IJMatrix IJ_QExt;
2560
2561 ierr = HYPRE_IJMatrixCreate(comm,QExtRStart,QExtRStart+QExtNRows-1,
2562 QExtCStart,QExtCStart+QExtNCols-1,&IJ_QExt);
2563 ierr += HYPRE_IJMatrixSetObjectType(IJ_QExt, HYPRE_PARCSR);
2564 hypre_assert(!ierr);
2565 rowSizes = new int[QExtNRows];
2566 for (iD = 0; iD < ANRows; iD++) rowSizes[iD] = 2 * nSends;
2567 ierr = HYPRE_IJMatrixSetRowSizes(IJ_QExt, rowSizes);
2568 ierr += HYPRE_IJMatrixInitialize(IJ_QExt);
2569 hypre_assert(!ierr);
2570 delete [] rowSizes;
2571
2572 /* --------------------------------------------------------------- */
2573 /* when creating QExt, need to set up the QExtOffsets ==> */
2574 /* since QExt(i,j) = 1 means putting local col i into external col */
2575 /* j, and since I may not own row i of PE, I need to tell my */
2576 /* neighbor processor who owns row i the value of j. */
2577 /* --------------------------------------------------------------- */
2578
2579 int *QExtOffsets, offset, pindex;
2580 MPI_Request *mpiRequests;
2581 MPI_Status mpiStatus;
2582
2583 if (nSends > 0)
2584 {
2585 mpiRequests = new MPI_Request[nSends];
2586 QExtOffsets = new int[nSends];
2587 }
2588 for (iP = 0; iP < nSends; iP++)
2589 MPI_Irecv(&QExtOffsets[iP],1,MPI_INT,sendProcs[iP],434243,comm,
2590 &(mpiRequests[iP]));
2591 offset = 0;
2592 // tell neighbor processors that their cols will become my offset
2593 // cols (+ my processor offset)
2594 for (iP = 0; iP < nRecvs; iP++)
2595 {
2596 MPI_Send(&offset, 1, MPI_INT, recvProcs[iP], 434243, comm);
2597 offset += (Apartition[recvProcs[iP]+1] - Apartition[recvProcs[iP]]);
2598 }
2599 for ( iP = 0; iP < nSends; iP++ )
2600 MPI_Wait( &(mpiRequests[iP]), &mpiStatus );
2601 if (nSends > 0) delete [] mpiRequests;
2602
2603 /* --------------------------------------------------------------- */
2604 /* create QExt */
2605 /* --------------------------------------------------------------- */
2606
2607 hypre_ParCSRMatrix *hypreQExt;
2608 MLI_Matrix *mli_QExt;
2609
2610 if (nSends > 0)
2611 {
2612 colInds = new int[nSends+1];
2613 colVals = new double[nSends+1];
2614 for (iP = 0; iP <= nSends; iP++) colVals[iP] = 1.0;
2615 }
2616 for (iD = 0; iD < QExtNRows; iD++)
2617 {
2618 rowIndex = QExtRStart + iD;
2619 colInds[0] = QExtRStart + iD;
2620 for (iP = 0; iP < nSends; iP++)
2621 {
2622 pindex = sendProcs[iP];
2623 colInds[iP] = AExtpartition[pindex] + QExtOffsets[iP] + iD;
2624 }
2625 HYPRE_IJMatrixSetValues(IJ_QExt, 1, &nSends, (const int *) &rowIndex,
2626 (const int *) colInds, (const double *) colVals);
2627 }
2628 if (nSends > 0)
2629 {
2630 delete [] colInds;
2631 delete [] colVals;
2632 delete [] QExtOffsets;
2633 }
2634 HYPRE_IJMatrixAssemble(IJ_QExt);
2635 HYPRE_IJMatrixGetObject(IJ_QExt, (void **) &hypreQExt);
2636 sprintf(paramString, "HYPRE_ParCSR");
2637 mli_QExt = new MLI_Matrix( (void *) hypreQExt, paramString, NULL);
2638 commPkg = hypre_ParCSRMatrixCommPkg(hypreQExt);
2639 if (commPkg == NULL)
2640 hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) hypreQExt);
2641
2642 /* --------------------------------------------------------------- */
2643 /* form the expanded fine matrix */
2644 /* --------------------------------------------------------------- */
2645
2646 MLI_Matrix *mli_AExt;
2647 //hypre_ParCSRMatrix *hypreAExt;
2648
2649 MLI_Matrix_ComputePtAP(mli_QExt, mli_Amat, &mli_AExt);
2650 //hypreAExt = (hypre_ParCSRMatrix *) mli_AExt->getMatrix();
2651 delete mli_QExt;
2652 HYPRE_IJMatrixDestroy(IJ_QExt);
2653
2654 /* --------------------------------------------------------------- */
2655 /* communicate null space information for graded coarsening */
2656 /* --------------------------------------------------------------- */
2657
2658 int rLength, sLength;
2659 double *tmpNullVecs;
2660
2661 if (QExtNCols > 0) tmpNullVecs = new double[QExtNCols*nullspaceDim_];
2662 if (nRecvs > 0) mpiRequests = new MPI_Request[nRecvs];
2663
2664 offset = 0;
2665 for (iP = 0; iP < nRecvs; iP++)
2666 {
2667 rLength = AExtpartition[recvProcs[iP]+1] - AExtpartition[recvProcs[iP]];
2668 rLength *= nullspaceDim_;
2669 MPI_Irecv(&tmpNullVecs[offset],rLength,MPI_DOUBLE,recvProcs[iP],14581,
2670 comm,&(mpiRequests[iP]));
2671 offset += rLength;
2672 }
2673 for (iP = 0; iP < nSends; iP++)
2674 {
2675 sLength = ANRows * nullspaceDim_;
2676 MPI_Send(nullVecs, sLength, MPI_DOUBLE, sendProcs[iP], 14581, comm);
2677 }
2678 for (iP = 0; iP < nRecvs; iP++)
2679 MPI_Wait(&(mpiRequests[iP]), &mpiStatus);
2680 if (nRecvs > 0) delete [] mpiRequests;
2681
2682 if (nullspaceVec_ != NULL) delete [] nullspaceVec_;
2683 nullspaceVec_ = new double[QExtNCols*nullspaceDim_];
2684 for (iD = 0; iD < nullspaceDim_; iD++)
2685 for (iD2 = 0; iD2 < ANRows; iD2++)
2686 nullspaceVec_[iD*QExtNCols+iD2] = nullVecs[iD*ANRows+iD2];
2687 offset = ANRows;
2688 for ( iP = 0; iP < nRecvs; iP++ )
2689 {
2690 rLength = AExtpartition[recvProcs[iP]+1] - AExtpartition[recvProcs[iP]];
2691 for (iD = 0; iD < nullspaceDim_; iD++)
2692 for (iD2 = 0; iD2 < rLength; iD2++)
2693 nullspaceVec_[iD*QExtNCols+iD2+offset] =
2694 tmpNullVecs[offset+iD*rLength+iD2];
2695 rLength *= nullspaceDim_;
2696 offset += rLength;
2697 }
2698 if (QExtNCols > 0) delete [] tmpNullVecs;
2699 delete [] AExtpartition;
2700
2701 /* --------------------------------------------------------------- */
2702 /* graded coarsening on the expanded fine matrix */
2703 /* (this P will be needed later to collect neighbor unknowns */
2704 /* before smoothing - its transpose) */
2705 /* --------------------------------------------------------------- */
2706
2707 MLI_Matrix *mli_PExt;
2708 genP_AExt(mli_AExt, &mli_PExt, ANRows);
2709
2710 /* --------------------------------------------------------------- */
2711 /* create the local domain decomposition matrix */
2712 /* reduced so that the local subdomain matrix is smaller - will be */
2713 /* used as a smoother */
2714 /* --------------------------------------------------------------- */
2715
2716 MLI_Matrix *mli_ACExt;
2717 MLI_Matrix_ComputePtAP(mli_PExt, mli_AExt, &mli_ACExt);
2718
2719 /* --------------------------------------------------------------- */
2720 /* need to convert the ACExt into local matrix (since the matrix */
2721 /* is local) */
2722 /* --------------------------------------------------------------- */
2723
2724 int iZero=0, ACExtNRows, *newRowSizes, *ACExtI, *ACExtJ;
2725 double *ACExtD;
2726 MPI_Comm newMPIComm;
2727 hypre_ParCSRMatrix *hypreACExt, *hypreNewA;
2728 hypre_CSRMatrix *csrACExt;
2729 HYPRE_IJMatrix IJnewA;
2730
2731 hypreACExt = (hypre_ParCSRMatrix *) mli_AExt->getMatrix();
2732 ACExtNRows = hypre_ParCSRMatrixNumRows(hypreACExt);
2733
2734 MPI_Comm_split(comm, mypid, iZero, &newMPIComm);
2735 ierr = HYPRE_IJMatrixCreate(newMPIComm,iZero,ACExtNRows-1,iZero,
2736 ACExtNRows-1, &IJnewA);
2737 ierr += HYPRE_IJMatrixSetObjectType(IJnewA, HYPRE_PARCSR);
2738 hypre_assert(!ierr);
2739 if (ACExtNRows > 0) newRowSizes = new int[ACExtNRows];
2740 csrACExt = hypre_ParCSRMatrixDiag(hypreACExt);
2741 ACExtI = hypre_CSRMatrixI(csrACExt);
2742 ACExtJ = hypre_CSRMatrixJ(csrACExt);
2743 ACExtD = hypre_CSRMatrixData(csrACExt);
2744 for (iD = 0; iD < ACExtNRows; iD++)
2745 newRowSizes[iD] = ACExtI[iD+1] - ACExtI[iD];
2746 ierr = HYPRE_IJMatrixSetRowSizes(IJnewA, newRowSizes);
2747 ierr += HYPRE_IJMatrixInitialize(IJnewA);
2748 hypre_assert(!ierr);
2749 for (iD = 0; iD < ACExtNRows; iD++)
2750 {
2751 offset = ACExtI[iD];
2752 HYPRE_IJMatrixSetValues(IJnewA, 1, &newRowSizes[iD], (const int *) &iD,
2753 (const int *) &ACExtJ[offset], (const double *) &ACExtD[offset]);
2754 }
2755 if (ACExtNRows > 0) delete [] newRowSizes;
2756 HYPRE_IJMatrixAssemble(IJnewA);
2757 HYPRE_IJMatrixGetObject(IJnewA, (void **) &hypreNewA);
2758 sprintf(paramString, "HYPRE_ParCSR");
2759 funcPtr = new MLI_Function();
2760 MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr);
2761 delete mli_ACExt;
2762 mli_ACExt = new MLI_Matrix( (void *) hypreNewA, paramString, funcPtr);
2763 delete mli_AExt;
2764 HYPRE_IJMatrixSetObjectType(IJnewA, -1);
2765 HYPRE_IJMatrixDestroy(IJnewA);
2766
2767 /* --------------------------------------------------------------- */
2768 /* create new overlapped DD smoother using sequential SuperLU */
2769 /* --------------------------------------------------------------- */
2770
2771 int *sendLengs;
2772 char *targv[7];
2773 MLI_Solver *smootherPtr;
2774 MLI_Matrix *mli_PSmat;
2775
2776 if (!strcmp(preSmoother_, "CGMLI")) sprintf(paramString, "CGMLI");
2777 else sprintf(paramString, "CGAMG");
2778 smootherPtr = MLI_Solver_CreateFromName(paramString);
2779 sprintf(paramString, "numSweeps 10000");
2780 smootherPtr->setParams(paramString, 0, NULL);
2781 sprintf(paramString, "tolerance 1.0e-6");
2782 smootherPtr->setParams(paramString, 0, NULL);
2783
2784 // send PSmat and communication information to smoother
2785 if (nSends > 0) sendLengs = new int[nSends];
2786 for (iP = 0; iP < nSends; iP++) sendLengs[iP] = ANRows;
2787 sprintf(paramString, "setPmat");
2788 mli_PSmat = mli_PExt;
2789 targv[0] = (char *) mli_PSmat;
2790 smootherPtr->setParams(paramString, 1, targv);
2791 sprintf(paramString, "setCommData");
2792 targv[0] = (char *) &nRecvs;
2793 targv[1] = (char *) recvProcs;
2794 targv[2] = (char *) recvLengs;
2795 targv[3] = (char *) &nSends;
2796 targv[4] = (char *) sendProcs;
2797 targv[5] = (char *) sendLengs;
2798 targv[6] = (char *) &comm;
2799 smootherPtr->setParams(paramString, 7, targv);
2800 if (nSends > 0) delete [] sendLengs;
2801 if (nRecvs > 0) delete [] recvLengs;
2802
2803 smootherPtr->setup(mli_ACExt);
2804 mli->setSmoother(level, MLI_SMOOTHER_PRE, smootherPtr);
2805
2806 /* --------------------------------------------------------------- */
2807 /* create prolongation and coarse grid operators */
2808 /* --------------------------------------------------------------- */
2809
2810 MLI_Solver *csolvePtr;
2811 MLI_Matrix *mli_Pmat, *mli_Rmat, *mli_cAmat;
2812
2813 // set up one aggregate per processor
2814 saCounts_[0] = 1;
2815 if (saData_[0] != NULL) delete [] saData_[0];
2816 saData_[0] = new int[ANRows];
2817 for (iD = 0; iD < ANRows; iD++) saData_[0][iD] = 0;
2818
2819 // restore nullspace changed by the last genP
2820 currNodeDofs_ = nodeDofs;
2821 if (nullspaceVec_ != NULL) delete [] nullspaceVec_;
2822 nullspaceVec_ = new double[nullspaceDim_*ANRows];
2823 for (iD = 0; iD < nullspaceDim_*ANRows; iD++)
2824 nullspaceVec_[iD] = nullVecs[iD];
2825 delete [] nullVecs;
2826
2827 // create prolongation and coarse grid operators
2828 genP(mli_Amat, &mli_Pmat, saCounts_[0], saData_[0]);
2829 MLI_Matrix_ComputePtAP(mli_Pmat, mli_Amat, &mli_cAmat);
2830 mli->setSystemMatrix(level+1, mli_cAmat);
2831 mli->setProlongation(level+1, mli_Pmat);
2832 sprintf(paramString, "HYPRE_ParCSRT");
2833 mli_Rmat = new MLI_Matrix(mli_Pmat->getMatrix(), paramString, NULL);
2834 mli->setRestriction(level, mli_Rmat);
2835
2836 /* --------------------------------------------------------------- */
2837 /* setup coarse solver */
2838 /* --------------------------------------------------------------- */
2839
2840 strcpy( paramString, "SuperLU" );
2841 csolvePtr = MLI_Solver_CreateFromName( paramString );
2842 csolvePtr->setup(mli_cAmat);
2843 mli->setCoarseSolve(csolvePtr);
2844
2845 /* --------------------------------------------------------------- */
2846 /* clean up */
2847 /* --------------------------------------------------------------- */
2848
2849 free(Apartition);
2850
2851 #ifdef MLI_DEBUG_DETAILED
2852 printf("%d : MLI_Method_AMGSA::setupExtendedDomainDecomp2 ends.\n",mypid);
2853 #endif
2854
2855 level = 2;
2856 return (level);
2857 }
2858
2859 // ************************************************************************
2860 // Purpose : Given Amat, perform preferential coarsening
2861 // (setupExtendedDomainDecomp2)
2862 // ------------------------------------------------------------------------
2863
genP_AExt(MLI_Matrix * mli_Amat,MLI_Matrix ** PmatOut,int inANRows)2864 double MLI_Method_AMGSA::genP_AExt(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut,
2865 int inANRows)
2866 {
2867 int mypid, nprocs, *partition, AStartRow, AEndRow, ALocalNRows;
2868 int blkSize, naggr, *node2aggr, ierr, PLocalNCols, PStartCol;
2869 int PLocalNRows, PStartRow, *eqn2aggr, irow, jcol, ig;
2870 int *PCols, maxAggSize, *aggCntArray, index, **aggIndArray;
2871 int aggSize, nzcnt, *rowLengths, rowNum, *colInd;
2872 double **PVecs, *newNull, *qArray, *rArray, *colVal;
2873 char paramString[200];
2874 HYPRE_IJMatrix IJPmat;
2875 hypre_ParCSRMatrix *Amat, *A2mat, *Pmat;
2876 MLI_Matrix *mli_A2mat=NULL, *mli_Pmat;
2877 MPI_Comm comm;
2878 hypre_ParCSRCommPkg *commPkg;
2879 MLI_Function *funcPtr;
2880
2881 /*-----------------------------------------------------------------
2882 * fetch matrix and machine information
2883 *-----------------------------------------------------------------*/
2884
2885 Amat = (hypre_ParCSRMatrix *) mli_Amat->getMatrix();
2886 comm = hypre_ParCSRMatrixComm(Amat);
2887 MPI_Comm_rank(comm,&mypid);
2888 MPI_Comm_size(comm,&nprocs);
2889
2890 /*-----------------------------------------------------------------
2891 * fetch other matrix information
2892 *-----------------------------------------------------------------*/
2893
2894 HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) Amat,&partition);
2895 AStartRow = partition[mypid];
2896 AEndRow = partition[mypid+1] - 1;
2897 ALocalNRows = AEndRow - AStartRow + 1;
2898 free(partition);
2899
2900 /*-----------------------------------------------------------------
2901 * reduce Amat based on the block size information (if nodeDofs_ > 1)
2902 *-----------------------------------------------------------------*/
2903
2904 blkSize = currNodeDofs_;
2905 if (blkSize > 1) MLI_Matrix_Compress(mli_Amat, blkSize, &mli_A2mat);
2906 else mli_A2mat = mli_Amat;
2907 A2mat = (hypre_ParCSRMatrix *) mli_A2mat->getMatrix();
2908
2909 /*-----------------------------------------------------------------
2910 * modify minimum aggregate size, if needed
2911 *-----------------------------------------------------------------*/
2912
2913 minAggrSize_ = nullspaceDim_ / currNodeDofs_;
2914 if (minAggrSize_ <= 1) minAggrSize_ = 2;
2915
2916 /*-----------------------------------------------------------------
2917 * perform coarsening
2918 *-----------------------------------------------------------------*/
2919
2920 coarsenAExt(A2mat, &naggr, &node2aggr, inANRows);
2921 if (blkSize > 1 && mli_A2mat != NULL) delete mli_A2mat;
2922
2923 /*-----------------------------------------------------------------
2924 * fetch the coarse grid information and instantiate P
2925 *-----------------------------------------------------------------*/
2926
2927 PLocalNCols = naggr * nullspaceDim_;
2928 MLI_Utils_GenPartition(comm, PLocalNCols, &partition);
2929 PStartCol = partition[mypid];
2930 free(partition);
2931 PLocalNRows = ALocalNRows;
2932 PStartRow = AStartRow;
2933 ierr = HYPRE_IJMatrixCreate(comm,PStartRow,PStartRow+PLocalNRows-1,
2934 PStartCol,PStartCol+PLocalNCols-1,&IJPmat);
2935 ierr = HYPRE_IJMatrixSetObjectType(IJPmat, HYPRE_PARCSR);
2936 hypre_assert(!ierr);
2937
2938 /*-----------------------------------------------------------------
2939 * expand the aggregation information if block size > 1 ==> eqn2aggr
2940 *-----------------------------------------------------------------*/
2941
2942 if (blkSize > 1)
2943 {
2944 eqn2aggr = new int[ALocalNRows];
2945 for (irow = 0; irow < ALocalNRows; irow++)
2946 eqn2aggr[irow] = node2aggr[irow/blkSize];
2947 delete [] node2aggr;
2948 }
2949 else eqn2aggr = node2aggr;
2950
2951 /*-----------------------------------------------------------------
2952 * create a compact form for the null space vectors
2953 * (get ready to perform QR on them)
2954 *-----------------------------------------------------------------*/
2955
2956 PVecs = new double*[nullspaceDim_];
2957 PCols = new int[PLocalNRows];
2958 for (irow = 0; irow < nullspaceDim_; irow++)
2959 PVecs[irow] = new double[PLocalNRows];
2960 for (irow = 0; irow < PLocalNRows; irow++)
2961 {
2962 if (eqn2aggr[irow] >= 0)
2963 PCols[irow] = PStartCol + eqn2aggr[irow] * nullspaceDim_;
2964 else PCols[irow] = PStartCol + (-eqn2aggr[irow]-1) * nullspaceDim_;
2965 if (nullspaceVec_ != NULL)
2966 {
2967 for (jcol = 0; jcol < nullspaceDim_; jcol++)
2968 PVecs[jcol][irow] = nullspaceVec_[jcol*PLocalNRows+irow];
2969 }
2970 else
2971 {
2972 for (jcol = 0; jcol < nullspaceDim_; jcol++)
2973 {
2974 if (irow % nullspaceDim_ == jcol) PVecs[jcol][irow] = 1.0;
2975 else PVecs[jcol][irow] = 0.0;
2976 }
2977 }
2978 }
2979
2980 /*-----------------------------------------------------------------
2981 * perform QR for null space
2982 *-----------------------------------------------------------------*/
2983
2984 newNull = NULL;
2985 if (PLocalNRows > 0)
2986 {
2987 /* ------ count the size of each aggregate ------ */
2988
2989 aggCntArray = new int[naggr];
2990 for (ig = 0; ig < naggr; ig++) aggCntArray[ig] = 0;
2991 for (irow = 0; irow < PLocalNRows; irow++)
2992 if (eqn2aggr[irow] >= 0) aggCntArray[eqn2aggr[irow]]++;
2993 else aggCntArray[(-eqn2aggr[irow]-1)]++;
2994 maxAggSize = 0;
2995 for (ig = 0; ig < naggr; ig++)
2996 if (aggCntArray[ig] > maxAggSize) maxAggSize = aggCntArray[ig];
2997
2998 /* ------ register which equation is in which aggregate ------ */
2999
3000 aggIndArray = new int*[naggr];
3001 for (ig = 0; ig < naggr; ig++)
3002 {
3003 aggIndArray[ig] = new int[aggCntArray[ig]];
3004 aggCntArray[ig] = 0;
3005 }
3006 for (irow = 0; irow < PLocalNRows; irow++)
3007 {
3008 index = eqn2aggr[irow];
3009 if (index >= 0)
3010 aggIndArray[index][aggCntArray[index]++] = irow;
3011 else
3012 aggIndArray[-index-1][aggCntArray[-index-1]++] = irow;
3013 }
3014
3015 /* ------ allocate storage for QR factorization ------ */
3016
3017 qArray = new double[maxAggSize * nullspaceDim_];
3018 rArray = new double[nullspaceDim_ * nullspaceDim_];
3019 newNull = new double[naggr*nullspaceDim_*nullspaceDim_];
3020
3021 /* ------ perform QR on each aggregate ------ */
3022
3023 for (ig = 0; ig < naggr; ig++)
3024 {
3025 aggSize = aggCntArray[ig];
3026
3027 if (aggSize < nullspaceDim_)
3028 {
3029 printf("Aggregation ERROR : underdetermined system in QR.\n");
3030 printf(" error on Proc %d\n", mypid);
3031 printf(" error on aggr %d (%d)\n", ig, naggr);
3032 printf(" aggr size is %d\n", aggSize);
3033 exit(1);
3034 }
3035
3036 /* ------ put data into the temporary array ------ */
3037
3038 for (jcol = 0; jcol < aggSize; jcol++)
3039 {
3040 for (irow = 0; irow < nullspaceDim_; irow++)
3041 qArray[aggSize*irow+jcol] = PVecs[irow][aggIndArray[ig][jcol]];
3042 }
3043
3044 /* ------ after QR, put the R into the next null space ------ */
3045
3046 for (jcol = 0; jcol < nullspaceDim_; jcol++)
3047 for (irow = 0; irow < nullspaceDim_; irow++)
3048 if (irow == jcol)
3049 newNull[ig*nullspaceDim_+jcol+irow*naggr*nullspaceDim_] = 1.0;
3050 else
3051 newNull[ig*nullspaceDim_+jcol+irow*naggr*nullspaceDim_] = 0.0;
3052
3053 /* ------ put the P to PVecs ------ */
3054
3055 for (jcol = 0; jcol < aggSize; jcol++)
3056 {
3057 for (irow = 0; irow < nullspaceDim_; irow++)
3058 {
3059 index = aggIndArray[ig][jcol];
3060 PVecs[irow][index] = qArray[ irow*aggSize + jcol ];
3061 }
3062 }
3063 }
3064 for (ig = 0; ig < naggr; ig++) delete [] aggIndArray[ig];
3065 delete [] aggIndArray;
3066 delete [] aggCntArray;
3067 delete [] qArray;
3068 delete [] rArray;
3069 }
3070 if (nullspaceVec_ != NULL) delete [] nullspaceVec_;
3071 nullspaceVec_ = newNull;
3072
3073 /*-----------------------------------------------------------------
3074 * initialize Pmat
3075 *-----------------------------------------------------------------*/
3076
3077 rowLengths = new int[PLocalNRows];
3078 for (irow = 0; irow < PLocalNRows; irow++)
3079 rowLengths[irow] = nullspaceDim_;
3080 ierr = HYPRE_IJMatrixSetRowSizes(IJPmat, rowLengths);
3081 ierr = HYPRE_IJMatrixInitialize(IJPmat);
3082 hypre_assert(!ierr);
3083 delete [] rowLengths;
3084
3085 /*--------------------------------------------------------------------
3086 * load and assemble Pmat
3087 *--------------------------------------------------------------------*/
3088
3089 colInd = new int[nullspaceDim_];
3090 colVal = new double[nullspaceDim_];
3091 for (irow = 0; irow < PLocalNRows; irow++)
3092 {
3093 if (PCols[irow] >= 0)
3094 {
3095 nzcnt = 0;
3096 for (jcol = 0; jcol < nullspaceDim_; jcol++)
3097 {
3098 if (PVecs[jcol][irow] != 0.0)
3099 {
3100 colInd[nzcnt] = PCols[irow] + jcol;
3101 colVal[nzcnt++] = PVecs[jcol][irow];
3102 }
3103 }
3104 rowNum = PStartRow + irow;
3105 HYPRE_IJMatrixSetValues(IJPmat, 1, &nzcnt,
3106 (const int *) &rowNum, (const int *) colInd,
3107 (const double *) colVal);
3108 }
3109 }
3110 ierr = HYPRE_IJMatrixAssemble(IJPmat);
3111 hypre_assert(!ierr);
3112 HYPRE_IJMatrixGetObject(IJPmat, (void **) &Pmat);
3113 hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) Pmat);
3114 commPkg = hypre_ParCSRMatrixCommPkg(Amat);
3115 if (!commPkg) hypre_MatvecCommPkgCreate(Amat);
3116 HYPRE_IJMatrixSetObjectType(IJPmat, -1);
3117 HYPRE_IJMatrixDestroy(IJPmat);
3118 delete [] colInd;
3119 delete [] colVal;
3120
3121 /*-----------------------------------------------------------------
3122 * clean up
3123 *-----------------------------------------------------------------*/
3124
3125 if (PCols != NULL) delete [] PCols;
3126 if (PVecs != NULL)
3127 {
3128 for (irow = 0; irow < nullspaceDim_; irow++)
3129 if (PVecs[irow] != NULL) delete [] PVecs[irow];
3130 delete [] PVecs;
3131 }
3132
3133 /*-----------------------------------------------------------------
3134 * set up and return Pmat
3135 *-----------------------------------------------------------------*/
3136
3137 funcPtr = new MLI_Function();
3138 MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr);
3139 sprintf(paramString, "HYPRE_ParCSR" );
3140 mli_Pmat = new MLI_Matrix( Pmat, paramString, funcPtr );
3141 (*PmatOut) = mli_Pmat;
3142 delete funcPtr;
3143 return 0.0;
3144 }
3145
3146 // *********************************************************************
3147 // graded coarsening scheme (Given a graph, aggregate on the local subgraph
3148 // but give smaller aggregate near processor boundaries)
3149 // (called by setupExtendedDomainDecomp2/genP_AExt)
3150 // ---------------------------------------------------------------------
3151
coarsenAExt(hypre_ParCSRMatrix * hypreG,int * mliAggrLeng,int ** mliAggrArray,int inANRows)3152 int MLI_Method_AMGSA::coarsenAExt(hypre_ParCSRMatrix *hypreG,
3153 int *mliAggrLeng, int **mliAggrArray, int inANRows)
3154 {
3155 MPI_Comm comm;
3156 int mypid, nprocs, *partition, startRow, endRow, maxInd;
3157 int localNRows, naggr=0, *node2aggr, *aggrSizes;
3158 int irow, jcol, rowLeng, globalNRows, index;
3159 int *nodeStat, selectFlag, nSelected=0, nNotSelected=0, count;
3160 int *GDiagI, *GDiagJ;
3161 double maxVal, *GDiagA;
3162 hypre_CSRMatrix *GDiag;
3163
3164 /*-----------------------------------------------------------------
3165 * fetch machine and matrix parameters
3166 *-----------------------------------------------------------------*/
3167
3168 comm = hypre_ParCSRMatrixComm(hypreG);
3169 MPI_Comm_rank(comm,&mypid);
3170 MPI_Comm_size(comm,&nprocs);
3171 HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreG,
3172 &partition);
3173 startRow = partition[mypid];
3174 endRow = partition[mypid+1] - 1;
3175 free(partition);
3176 localNRows = endRow - startRow + 1;
3177 MPI_Allreduce(&localNRows, &globalNRows, 1, MPI_INT, MPI_SUM, comm);
3178 if (mypid == 0 && outputLevel_ > 1)
3179 {
3180 printf("\t*** Aggregation(E) : total nodes to aggregate = %d\n",
3181 globalNRows);
3182 }
3183 GDiag = hypre_ParCSRMatrixDiag(hypreG);
3184 GDiagI = hypre_CSRMatrixI(GDiag);
3185 GDiagJ = hypre_CSRMatrixJ(GDiag);
3186 GDiagA = hypre_CSRMatrixData(GDiag);
3187
3188 /*-----------------------------------------------------------------
3189 * allocate status arrays
3190 *-----------------------------------------------------------------*/
3191
3192 if (localNRows > 0)
3193 {
3194 node2aggr = new int[localNRows];
3195 aggrSizes = new int[localNRows];
3196 nodeStat = new int[localNRows];
3197 for (irow = 0; irow < inANRows; irow++)
3198 {
3199 aggrSizes[irow] = 1;
3200 node2aggr[irow] = -1;
3201 nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED;
3202 }
3203 for (irow = inANRows; irow < localNRows; irow++)
3204 {
3205 aggrSizes[irow] = 0;
3206 node2aggr[irow] = -1;
3207 nodeStat[irow] = MLI_METHOD_AMGSA_READY;
3208 }
3209 nSelected = inANRows;
3210 }
3211 else node2aggr = aggrSizes = nodeStat = NULL;
3212
3213 /*-----------------------------------------------------------------
3214 * search for zero rows and rows near the processor boundaries
3215 *-----------------------------------------------------------------*/
3216
3217 for (irow = inANRows; irow < localNRows; irow++)
3218 {
3219 rowLeng = GDiagI[irow+1] - GDiagI[irow];
3220 if (rowLeng <= 0)
3221 {
3222 nodeStat[irow] = MLI_METHOD_AMGSA_NOTSELECTED;
3223 nNotSelected++;
3224 }
3225 }
3226
3227 /*-----------------------------------------------------------------
3228 * Phase 0 : 1 node per aggregate for the immediate neighbors
3229 *-----------------------------------------------------------------*/
3230
3231 for (irow = 0; irow < inANRows; irow++)
3232 {
3233 for (jcol = GDiagI[irow]; jcol < GDiagI[irow+1]; jcol++)
3234 {
3235 index = GDiagJ[jcol];
3236 if (index >= inANRows && nodeStat[index]==MLI_METHOD_AMGSA_READY)
3237 {
3238 node2aggr[index] = naggr;
3239 nodeStat[index] = MLI_METHOD_AMGSA_SELECTED;
3240 aggrSizes[naggr] = 1;
3241 naggr++;
3242 nSelected++;
3243 }
3244 }
3245 }
3246
3247 /*-----------------------------------------------------------------
3248 * Phase 1 : small aggregates for the next level
3249 *-----------------------------------------------------------------*/
3250
3251 for (irow = inANRows; irow < localNRows; irow++)
3252 {
3253 if (nodeStat[irow] == MLI_METHOD_AMGSA_READY)
3254 {
3255 selectFlag = 0;
3256 for (jcol = GDiagI[irow]; jcol < GDiagI[irow+1]; jcol++)
3257 {
3258 index = GDiagJ[jcol];
3259 if (nodeStat[index] == MLI_METHOD_AMGSA_SELECTED)
3260 {
3261 selectFlag = 1;
3262 break;
3263 }
3264 }
3265 if (selectFlag == 1)
3266 {
3267 nSelected++;
3268 node2aggr[irow] = naggr;
3269 aggrSizes[naggr] = 1;
3270 nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED2;
3271 for (jcol = GDiagI[irow]; jcol < GDiagI[irow+1]; jcol++)
3272 {
3273 index = GDiagJ[jcol];
3274 if (nodeStat[irow]==MLI_METHOD_AMGSA_READY)
3275 {
3276 nSelected++;
3277 node2aggr[index] = naggr;
3278 aggrSizes[naggr]++;
3279 nodeStat[index] = MLI_METHOD_AMGSA_SELECTED2;
3280 }
3281 }
3282 naggr++;
3283 }
3284 }
3285 }
3286 for (irow = inANRows; irow < localNRows; irow++)
3287 if (nodeStat[index] == MLI_METHOD_AMGSA_SELECTED2)
3288 nodeStat[index] = MLI_METHOD_AMGSA_SELECTED;
3289
3290 /*-----------------------------------------------------------------
3291 * Phase 1 : form aggregates
3292 *-----------------------------------------------------------------*/
3293
3294 for (irow = inANRows; irow < localNRows; irow++)
3295 {
3296 if (nodeStat[irow] == MLI_METHOD_AMGSA_READY)
3297 {
3298 selectFlag = 1;
3299 for (jcol = GDiagI[irow]; jcol < GDiagI[irow+1]; jcol++)
3300 {
3301 index = GDiagJ[jcol];
3302 if (nodeStat[index] != MLI_METHOD_AMGSA_READY)
3303 {
3304 selectFlag = 0;
3305 break;
3306 }
3307 }
3308 if (selectFlag == 1)
3309 {
3310 aggrSizes[naggr] = 1;
3311 node2aggr[irow] = naggr;
3312 nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED;
3313 nSelected++;
3314 for (jcol = GDiagI[irow]; jcol < GDiagI[irow+1]; jcol++)
3315 {
3316 index = GDiagJ[jcol];
3317 node2aggr[index] = naggr;
3318 nodeStat[index] = MLI_METHOD_AMGSA_SELECTED;
3319 aggrSizes[naggr]++;
3320 nSelected++;
3321 }
3322 naggr++;
3323 }
3324 }
3325 }
3326
3327 /*-----------------------------------------------------------------
3328 * Phase 2 : put the rest into one of the existing aggregates
3329 *-----------------------------------------------------------------*/
3330
3331 if ((nSelected+nNotSelected) < localNRows)
3332 {
3333 for (irow = inANRows; irow < localNRows; irow++)
3334 {
3335 if (nodeStat[irow] == MLI_METHOD_AMGSA_READY)
3336 {
3337 maxInd = -1;
3338 maxVal = 0.0;
3339 for (jcol = GDiagI[irow]; jcol < GDiagI[irow+1]; jcol++)
3340 {
3341 index = GDiagJ[jcol];
3342 if (nodeStat[index] == MLI_METHOD_AMGSA_SELECTED)
3343 {
3344 if (GDiagA[jcol] > maxVal)
3345 {
3346 maxInd = jcol;
3347 maxVal = GDiagA[jcol];
3348 }
3349 }
3350 }
3351 if (maxInd != -1)
3352 {
3353 node2aggr[irow] = node2aggr[maxInd];
3354 nodeStat[irow] = MLI_METHOD_AMGSA_PENDING;
3355 aggrSizes[node2aggr[maxInd]]++;
3356 }
3357 }
3358 }
3359 for (irow = inANRows; irow < localNRows; irow++)
3360 {
3361 if (nodeStat[irow] == MLI_METHOD_AMGSA_PENDING)
3362 {
3363 nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED;
3364 nSelected++;
3365 }
3366 }
3367 }
3368
3369 /*-----------------------------------------------------------------
3370 * Phase 4 : finally put all lone rows into some neighbor aggregate
3371 *-----------------------------------------------------------------*/
3372
3373 int nUndone = localNRows - nSelected - nNotSelected;
3374 if (nUndone > 0)
3375 {
3376 count = nUndone / minAggrSize_;
3377 if (count == 0) count = 1;
3378 count += naggr;
3379 irow = jcol = 0;
3380 while (nUndone > 0)
3381 {
3382 if (nodeStat[irow] == MLI_METHOD_AMGSA_READY)
3383 {
3384 node2aggr[irow] = naggr;
3385 nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED;
3386 nUndone--;
3387 nSelected++;
3388 jcol++;
3389 if (jcol >= minAggrSize_ && naggr < count-1)
3390 {
3391 jcol = 0;
3392 naggr++;
3393 }
3394 }
3395 irow++;
3396 }
3397 naggr = count;
3398 }
3399
3400 /*-----------------------------------------------------------------
3401 * clean up and initialize the output arrays
3402 *-----------------------------------------------------------------*/
3403
3404 if (localNRows > 0) delete [] aggrSizes;
3405 if (localNRows > 0) delete [] nodeStat;
3406 if (localNRows == 1 && naggr == 0)
3407 {
3408 node2aggr[0] = 0;
3409 naggr = 1;
3410 }
3411 (*mliAggrArray) = node2aggr;
3412 (*mliAggrLeng) = naggr;
3413 return 0;
3414 }
3415
3416