1 // -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 // vi: set et ts=4 sw=2 sts=2:
3 #ifndef DUNE_AMG_MATRIXHIERARCHY_HH
4 #define DUNE_AMG_MATRIXHIERARCHY_HH
5 
6 #include <algorithm>
7 #include <tuple>
8 #include "aggregates.hh"
9 #include "graph.hh"
10 #include "galerkin.hh"
11 #include "renumberer.hh"
12 #include "graphcreator.hh"
13 #include "hierarchy.hh"
14 #include <dune/istl/bvector.hh>
15 #include <dune/common/parallel/indexset.hh>
16 #include <dune/istl/matrixutils.hh>
17 #include <dune/istl/matrixredistribute.hh>
18 #include <dune/istl/paamg/dependency.hh>
19 #include <dune/istl/paamg/graph.hh>
20 #include <dune/istl/paamg/indicescoarsener.hh>
21 #include <dune/istl/paamg/globalaggregates.hh>
22 #include <dune/istl/paamg/construction.hh>
23 #include <dune/istl/paamg/smoother.hh>
24 #include <dune/istl/paamg/transfer.hh>
25 
26 namespace Dune
27 {
28   namespace Amg
29   {
30     /**
31      * @addtogroup ISTL_PAAMG
32      *
33      * @{
34      */
35 
36     /** @file
37      * @author Markus Blatt
38      * @brief Provides a classes representing the hierarchies in AMG.
39      */
40     enum {
41       /**
42        * @brief Hard limit for the number of processes allowed.
43        *
44        * This is needed to prevent overflows when calculating
45        * the coarsening rate. Currently set 72,000 which is
46        * enough for JUGENE.
47        */
48       MAX_PROCESSES = 72000
49     };
50 
51     /**
52      * @brief The hierarchies build by the coarsening process.
53      *
54      * Namely a hierarchy of matrices, index sets, remote indices,
55      * interfaces and communicators.
56      */
57     template<class M, class PI, class A=std::allocator<M> >
58     class MatrixHierarchy
59     {
60     public:
61       /** @brief The type of the matrix operator. */
62       typedef M MatrixOperator;
63 
64       /** @brief The type of the matrix. */
65       typedef typename MatrixOperator::matrix_type Matrix;
66 
67       /** @brief The type of the index set. */
68       typedef PI ParallelInformation;
69 
70       /** @brief The allocator to use. */
71       typedef A Allocator;
72 
73       /** @brief The type of the aggregates map we use. */
74       typedef Dune::Amg::AggregatesMap<typename MatrixGraph<Matrix>::VertexDescriptor> AggregatesMap;
75 
76       /** @brief The type of the parallel matrix hierarchy. */
77       typedef Dune::Amg::Hierarchy<MatrixOperator,Allocator> ParallelMatrixHierarchy;
78 
79       /** @brief The type of the parallel informarion hierarchy. */
80       typedef Dune::Amg::Hierarchy<ParallelInformation,Allocator> ParallelInformationHierarchy;
81 
82       /** @brief Allocator for pointers. */
83       using AAllocator = typename std::allocator_traits<Allocator>::template rebind_alloc<AggregatesMap*>;
84 
85       /** @brief The type of the aggregates maps list. */
86       typedef std::list<AggregatesMap*,AAllocator> AggregatesMapList;
87 
88       /** @brief The type of the redistribute information. */
89       typedef RedistributeInformation<ParallelInformation> RedistributeInfoType;
90 
91       /** @brief Allocator for RedistributeInfoType. */
92       using RILAllocator = typename std::allocator_traits<Allocator>::template rebind_alloc<RedistributeInfoType>;
93 
94       /** @brief The type of the list of redistribute information. */
95       typedef std::list<RedistributeInfoType,RILAllocator> RedistributeInfoList;
96 
97       /**
98        * @brief Constructor
99        * @param fineMatrix The matrix to coarsen.
100        * @param pinfo The information about the parallel data decomposition at the first level.
101        */
102       MatrixHierarchy(std::shared_ptr<MatrixOperator> fineMatrix,
103         std::shared_ptr<ParallelInformation> pinfo = std::make_shared<ParallelInformation>());
104 
105       ~MatrixHierarchy();
106 
107       /**
108        * @brief Build the matrix hierarchy using aggregation.
109        *
110        * @brief criterion The criterion describing the aggregation process.
111        */
112       template<typename O, typename T>
113       void build(const T& criterion);
114 
115       /**
116        * @brief Recalculate the galerkin products.
117        *
118        * If the data of the fine matrix changes but not its sparsity pattern
119        * this will recalculate all coarser levels without starting the expensive
120        * aggregation process all over again.
121        */
122       template<class F>
123       void recalculateGalerkin(const F& copyFlags);
124 
125       /**
126        * @brief Coarsen the vector hierarchy according to the matrix hierarchy.
127        * @param hierarchy The vector hierarchy to coarsen.
128        */
129       template<class V, class BA, class TA>
130       void coarsenVector(Hierarchy<BlockVector<V,BA>, TA>& hierarchy) const;
131 
132       /**
133        * @brief Coarsen the smoother hierarchy according to the matrix hierarchy.
134        * @param smoothers The smoother hierarchy to coarsen.
135        * @param args The arguments for the construction of the coarse level smoothers.
136        */
137       template<class S, class TA>
138       void coarsenSmoother(Hierarchy<S,TA>& smoothers,
139                            const typename SmootherTraits<S>::Arguments& args) const;
140 
141       /**
142        * @brief Get the number of levels in the hierarchy.
143        * @return The number of levels.
144        */
145       std::size_t levels() const;
146 
147       /**
148        * @brief Get the max number of levels in the hierarchy of processors.
149        * @return The maximum number of levels.
150        */
151       std::size_t maxlevels() const;
152 
153       bool hasCoarsest() const;
154 
155       /**
156        * @brief Whether the hierarchy was built.
157        * @return true if the MatrixHierarchy::build method was called.
158        */
159       bool isBuilt() const;
160 
161       /**
162        * @brief Get the matrix hierarchy.
163        * @return The matrix hierarchy.
164        */
165       const ParallelMatrixHierarchy& matrices() const;
166 
167       /**
168        * @brief Get the hierarchy of the parallel data distribution information.
169        * @return The hierarchy of the parallel data distribution information.
170        */
171       const ParallelInformationHierarchy& parallelInformation() const;
172 
173       /**
174        * @brief Get the hierarchy of the mappings of the nodes onto aggregates.
175        * @return The hierarchy of the mappings of the nodes onto aggregates.
176        */
177       const AggregatesMapList& aggregatesMaps() const;
178 
179       /**
180        * @brief Get the hierarchy of the information about redistributions,
181        * @return The hierarchy of the information about redistributions of the
182        * data to fewer processes.
183        */
184       const RedistributeInfoList& redistributeInformation() const;
185 
getProlongationDampingFactor() const186       double getProlongationDampingFactor() const
187       {
188         return prolongDamp_;
189       }
190 
191       /**
192        * @brief Get the mapping of fine level unknowns to coarse level
193        * aggregates.
194        *
195        * For each fine level unknown i the correcponding data[i] is the
196        * aggregate it belongs to on the coarsest level.
197        *
198        * @param[out] data The mapping of fine level unknowns to coarse level
199        * aggregates.
200        */
201       void getCoarsestAggregatesOnFinest(std::vector<std::size_t>& data) const;
202 
203     private:
204       typedef typename ConstructionTraits<MatrixOperator>::Arguments MatrixArgs;
205       typedef typename ConstructionTraits<ParallelInformation>::Arguments CommunicationArgs;
206       /** @brief The list of aggregates maps. */
207       AggregatesMapList aggregatesMaps_;
208       /** @brief The list of redistributes. */
209       RedistributeInfoList redistributes_;
210       /** @brief The hierarchy of parallel matrices. */
211       ParallelMatrixHierarchy matrices_;
212       /** @brief The hierarchy of the parallel information. */
213       ParallelInformationHierarchy parallelInformation_;
214 
215       /** @brief Whether the hierarchy was built. */
216       bool built_;
217 
218       /** @brief The maximum number of level across all processors.*/
219       int maxlevels_;
220 
221       double prolongDamp_;
222 
223       /**
224        * @brief functor to print matrix statistics.
225        */
226       template<class Matrix, bool print>
227       struct MatrixStats
228       {
229 
230         /**
231          * @brief Print matrix statistics.
232          */
statsDune::Amg::MatrixHierarchy::MatrixStats233         static void stats([[maybe_unused]] const Matrix& matrix)
234         {}
235       };
236 
237       template<class Matrix>
238       struct MatrixStats<Matrix,true>
239       {
240         struct calc
241         {
242           typedef typename Matrix::size_type size_type;
243           typedef typename Matrix::row_type matrix_row;
244 
calcDune::Amg::MatrixHierarchy::MatrixStats::calc245           calc()
246           {
247             min=std::numeric_limits<size_type>::max();
248             max=0;
249             sum=0;
250           }
251 
operator ()Dune::Amg::MatrixHierarchy::MatrixStats::calc252           void operator()(const matrix_row& row)
253           {
254             min=std::min(min, row.size());
255             max=std::max(max, row.size());
256             sum += row.size();
257           }
258 
259           size_type min;
260           size_type max;
261           size_type sum;
262         };
263         /**
264          * @brief Print matrix statistics.
265          */
statsDune::Amg::MatrixHierarchy::MatrixStats266         static void stats(const Matrix& matrix)
267         {
268           calc c= for_each(matrix.begin(), matrix.end(), calc());
269           dinfo<<"Matrix row: min="<<c.min<<" max="<<c.max
270                <<" average="<<static_cast<double>(c.sum)/matrix.N()
271                <<std::endl;
272         }
273       };
274     };
275 
276     /**
277      * @brief The criterion describing the stop criteria for the coarsening process.
278      */
279     template<class T>
280     class CoarsenCriterion : public T
281     {
282     public:
283       /**
284        * @brief The criterion for tagging connections as strong and nodes as isolated.
285        * This might be e.g. SymmetricCriterion or UnSymmetricCriterion.
286        */
287       typedef T AggregationCriterion;
288 
289       /**
290        * @brief Constructor
291        * @param maxLevel The maximum number of levels allowed in the matrix hierarchy (default: 100).
292        * @param coarsenTarget If the number of nodes in the matrix is below this threshold the
293        * coarsening will stop (default: 1000).
294        * @param minCoarsenRate If the coarsening rate falls below this threshold the
295        * coarsening will stop (default: 1.2)
296        * @param prolongDamp The damping factor to apply to the prolongated update (default: 1.6)
297        * @param accumulate Whether to accumulate the data onto fewer processors on coarser levels.
298        */
CoarsenCriterion(int maxLevel=100,int coarsenTarget=1000,double minCoarsenRate=1.2,double prolongDamp=1.6,AccumulationMode accumulate=successiveAccu)299       CoarsenCriterion(int maxLevel=100, int coarsenTarget=1000, double minCoarsenRate=1.2,
300                        double prolongDamp=1.6, AccumulationMode accumulate=successiveAccu)
301         : AggregationCriterion(Dune::Amg::Parameters(maxLevel, coarsenTarget, minCoarsenRate, prolongDamp, accumulate))
302       {}
303 
CoarsenCriterion(const Dune::Amg::Parameters & parms)304       CoarsenCriterion(const Dune::Amg::Parameters& parms)
305         : AggregationCriterion(parms)
306       {}
307 
308     };
309 
310     template<typename M, typename C1>
repartitionAndDistributeMatrix(const M & origMatrix,std::shared_ptr<M> newMatrix,SequentialInformation & origComm,std::shared_ptr<SequentialInformation> & newComm,RedistributeInformation<SequentialInformation> & ri,int nparts,C1 & criterion)311     bool repartitionAndDistributeMatrix([[maybe_unused]] const M& origMatrix,
312                                         [[maybe_unused]] std::shared_ptr<M> newMatrix,
313                                         [[maybe_unused]] SequentialInformation& origComm,
314                                         [[maybe_unused]] std::shared_ptr<SequentialInformation>& newComm,
315                                         [[maybe_unused]] RedistributeInformation<SequentialInformation>& ri,
316                                         [[maybe_unused]] int nparts,
317                                         [[maybe_unused]] C1& criterion)
318     {
319       DUNE_THROW(NotImplemented, "Redistribution does not make sense in sequential code!");
320     }
321 
322 
323     template<typename M, typename C, typename C1>
repartitionAndDistributeMatrix(const M & origMatrix,std::shared_ptr<M> newMatrix,C & origComm,std::shared_ptr<C> & newComm,RedistributeInformation<C> & ri,int nparts,C1 & criterion)324     bool repartitionAndDistributeMatrix(const M& origMatrix,
325                                         std::shared_ptr<M> newMatrix,
326                                         C& origComm,
327                                         std::shared_ptr<C>& newComm,
328                                         RedistributeInformation<C>& ri,
329                                         int nparts, C1& criterion)
330     {
331       Timer time;
332 #ifdef AMG_REPART_ON_COMM_GRAPH
333       // Done not repartition the matrix graph, but a graph of the communication scheme.
334       bool existentOnRedist=Dune::commGraphRepartition(origMatrix, origComm, nparts, newComm,
335                                                        ri.getInterface(),
336                                                        criterion.debugLevel()>1);
337 
338 #else
339       typedef Dune::Amg::MatrixGraph<const M> MatrixGraph;
340       typedef Dune::Amg::PropertiesGraph<MatrixGraph,
341           VertexProperties,
342           EdgeProperties,
343           IdentityMap,
344           IdentityMap> PropertiesGraph;
345       MatrixGraph graph(origMatrix);
346       PropertiesGraph pgraph(graph);
347       buildDependency(pgraph, origMatrix, criterion, false);
348 
349 #ifdef DEBUG_REPART
350       if(origComm.communicator().rank()==0)
351         std::cout<<"Original matrix"<<std::endl;
352       origComm.communicator().barrier();
353       printGlobalSparseMatrix(origMatrix, origComm, std::cout);
354 #endif
355       bool existentOnRedist=Dune::graphRepartition(pgraph, origComm, nparts,
356                                                    newComm, ri.getInterface(),
357                                                    criterion.debugLevel()>1);
358 #endif // if else AMG_REPART
359 
360       if(origComm.communicator().rank()==0  && criterion.debugLevel()>1)
361         std::cout<<"Repartitioning took "<<time.elapsed()<<" seconds."<<std::endl;
362 
363       ri.setSetup();
364 
365 #ifdef DEBUG_REPART
366       ri.checkInterface(origComm.indexSet(), newComm->indexSet(), origComm.communicator());
367 #endif
368 
369       redistributeMatrix(const_cast<M&>(origMatrix), *newMatrix, origComm, *newComm, ri);
370 
371 #ifdef DEBUG_REPART
372       if(origComm.communicator().rank()==0)
373         std::cout<<"Original matrix"<<std::endl;
374       origComm.communicator().barrier();
375       if(newComm->communicator().size()>0)
376         printGlobalSparseMatrix(*newMatrix, *newComm, std::cout);
377       origComm.communicator().barrier();
378 #endif
379 
380       if(origComm.communicator().rank()==0  && criterion.debugLevel()>1)
381         std::cout<<"Redistributing matrix took "<<time.elapsed()<<" seconds."<<std::endl;
382       return existentOnRedist;
383 
384     }
385 
386     template<class M, class IS, class A>
MatrixHierarchy(std::shared_ptr<MatrixOperator> fineMatrix,std::shared_ptr<ParallelInformation> pinfo)387     MatrixHierarchy<M,IS,A>::MatrixHierarchy(std::shared_ptr<MatrixOperator> fineMatrix,
388                                              std::shared_ptr<ParallelInformation> pinfo)
389       : matrices_(fineMatrix),
390         parallelInformation_(pinfo)
391     {
392       if (SolverCategory::category(*fineMatrix) != SolverCategory::category(*pinfo))
393         DUNE_THROW(ISTLError, "MatrixOperator and ParallelInformation must belong to the same category!");
394     }
395 
396     template<class M, class IS, class A>
397     template<typename O, typename T>
build(const T & criterion)398     void MatrixHierarchy<M,IS,A>::build(const T& criterion)
399     {
400       prolongDamp_ = criterion.getProlongationDampingFactor();
401       typedef O OverlapFlags;
402       typedef typename ParallelMatrixHierarchy::Iterator MatIterator;
403       typedef typename ParallelInformationHierarchy::Iterator PInfoIterator;
404 
405       static const int noints=(Dune::Amg::MAX_PROCESSES/4096>0) ? (Dune::Amg::MAX_PROCESSES/4096) : 1;
406 
407       typedef bigunsignedint<sizeof(int)*8*noints> BIGINT;
408       GalerkinProduct<ParallelInformation> productBuilder;
409       MatIterator mlevel = matrices_.finest();
410       MatrixStats<typename M::matrix_type,MINIMAL_DEBUG_LEVEL<=INFO_DEBUG_LEVEL>::stats(mlevel->getmat());
411 
412       PInfoIterator infoLevel = parallelInformation_.finest();
413       BIGINT finenonzeros=countNonZeros(mlevel->getmat());
414       finenonzeros = infoLevel->communicator().sum(finenonzeros);
415       BIGINT allnonzeros = finenonzeros;
416 
417 
418       int level = 0;
419       int rank = 0;
420 
421       BIGINT unknowns = mlevel->getmat().N();
422 
423       unknowns = infoLevel->communicator().sum(unknowns);
424       double dunknowns=unknowns.todouble();
425       infoLevel->buildGlobalLookup(mlevel->getmat().N());
426       redistributes_.push_back(RedistributeInfoType());
427 
428       for(; level < criterion.maxLevel(); ++level, ++mlevel) {
429         assert(matrices_.levels()==redistributes_.size());
430         rank = infoLevel->communicator().rank();
431         if(rank==0 && criterion.debugLevel()>1)
432           std::cout<<"Level "<<level<<" has "<<dunknowns<<" unknowns, "<<dunknowns/infoLevel->communicator().size()
433                    <<" unknowns per proc (procs="<<infoLevel->communicator().size()<<")"<<std::endl;
434 
435         MatrixOperator* matrix=&(*mlevel);
436         ParallelInformation* info =&(*infoLevel);
437 
438         if((
439 #if HAVE_PARMETIS
440              criterion.accumulate()==successiveAccu
441 #else
442              false
443 #endif
444              || (criterion.accumulate()==atOnceAccu
445                  && dunknowns < 30*infoLevel->communicator().size()))
446            && infoLevel->communicator().size()>1 &&
447            dunknowns/infoLevel->communicator().size() <= criterion.coarsenTarget())
448         {
449           // accumulate to fewer processors
450           std::shared_ptr<Matrix> redistMat = std::make_shared<Matrix>();
451           std::shared_ptr<ParallelInformation> redistComm;
452           std::size_t nodomains = (std::size_t)std::ceil(dunknowns/(criterion.minAggregateSize()
453                                                                     *criterion.coarsenTarget()));
454           if( nodomains<=criterion.minAggregateSize()/2 ||
455               dunknowns <= criterion.coarsenTarget() )
456             nodomains=1;
457 
458           bool existentOnNextLevel =
459             repartitionAndDistributeMatrix(mlevel->getmat(), redistMat, *infoLevel,
460                                            redistComm, redistributes_.back(), nodomains,
461                                            criterion);
462           BIGINT unknownsRedist = redistMat->N();
463           unknownsRedist = infoLevel->communicator().sum(unknownsRedist);
464           dunknowns= unknownsRedist.todouble();
465           if(redistComm->communicator().rank()==0 && criterion.debugLevel()>1)
466             std::cout<<"Level "<<level<<" (redistributed) has "<<dunknowns<<" unknowns, "<<dunknowns/redistComm->communicator().size()
467                      <<" unknowns per proc (procs="<<redistComm->communicator().size()<<")"<<std::endl;
468           MatrixArgs args(redistMat, *redistComm);
469           mlevel.addRedistributed(ConstructionTraits<MatrixOperator>::construct(args));
470           assert(mlevel.isRedistributed());
471           infoLevel.addRedistributed(redistComm);
472           infoLevel->freeGlobalLookup();
473 
474           if(!existentOnNextLevel)
475             // We do not hold any data on the redistributed partitioning
476             break;
477 
478           // Work on the redistributed Matrix from now on
479           matrix = &(mlevel.getRedistributed());
480           info = &(infoLevel.getRedistributed());
481           info->buildGlobalLookup(matrix->getmat().N());
482         }
483 
484         rank = info->communicator().rank();
485         if(dunknowns <= criterion.coarsenTarget())
486           // No further coarsening needed
487           break;
488 
489         typedef PropertiesGraphCreator<MatrixOperator,ParallelInformation> GraphCreator;
490         typedef typename GraphCreator::PropertiesGraph PropertiesGraph;
491         typedef typename GraphCreator::GraphTuple GraphTuple;
492 
493         typedef typename PropertiesGraph::VertexDescriptor Vertex;
494 
495         std::vector<bool> excluded(matrix->getmat().N(), false);
496 
497         GraphTuple graphs = GraphCreator::create(*matrix, excluded, *info, OverlapFlags());
498 
499         AggregatesMap* aggregatesMap=new AggregatesMap(std::get<1>(graphs)->maxVertex()+1);
500 
501         aggregatesMaps_.push_back(aggregatesMap);
502 
503         Timer watch;
504         watch.reset();
505         int noAggregates, isoAggregates, oneAggregates, skippedAggregates;
506 
507         std::tie(noAggregates, isoAggregates, oneAggregates, skippedAggregates) =
508           aggregatesMap->buildAggregates(matrix->getmat(), *(std::get<1>(graphs)), criterion, level==0);
509 
510         if(rank==0 && criterion.debugLevel()>2)
511           std::cout<<" Have built "<<noAggregates<<" aggregates totally ("<<isoAggregates<<" isolated aggregates, "<<
512           oneAggregates<<" aggregates of one vertex,  and skipped "<<
513           skippedAggregates<<" aggregates)."<<std::endl;
514 #ifdef TEST_AGGLO
515         {
516           // calculate size of local matrix in the distributed direction
517           int start, end, overlapStart, overlapEnd;
518           int procs=info->communicator().rank();
519           int n = UNKNOWNS/procs; // number of unknowns per process
520           int bigger = UNKNOWNS%procs; // number of process with n+1 unknows
521 
522           // Compute owner region
523           if(rank<bigger) {
524             start = rank*(n+1);
525             end   = (rank+1)*(n+1);
526           }else{
527             start = bigger + rank * n;
528             end   = bigger + (rank + 1) * n;
529           }
530 
531           // Compute overlap region
532           if(start>0)
533             overlapStart = start - 1;
534           else
535             overlapStart = start;
536 
537           if(end<UNKNOWNS)
538             overlapEnd = end + 1;
539           else
540             overlapEnd = end;
541 
542           assert((UNKNOWNS)*(overlapEnd-overlapStart)==aggregatesMap->noVertices());
543           for(int j=0; j< UNKNOWNS; ++j)
544             for(int i=0; i < UNKNOWNS; ++i)
545             {
546               if(i>=overlapStart && i<overlapEnd)
547               {
548                 int no = (j/2)*((UNKNOWNS)/2)+i/2;
549                 (*aggregatesMap)[j*(overlapEnd-overlapStart)+i-overlapStart]=no;
550               }
551             }
552         }
553 #endif
554         if(criterion.debugLevel()>1 && info->communicator().rank()==0)
555           std::cout<<"aggregating finished."<<std::endl;
556 
557         BIGINT gnoAggregates=noAggregates;
558         gnoAggregates = info->communicator().sum(gnoAggregates);
559         double dgnoAggregates = gnoAggregates.todouble();
560 #ifdef TEST_AGGLO
561         BIGINT gnoAggregates=((UNKNOWNS)/2)*((UNKNOWNS)/2);
562 #endif
563 
564         if(criterion.debugLevel()>2 && rank==0)
565           std::cout << "Building "<<dgnoAggregates<<" aggregates took "<<watch.elapsed()<<" seconds."<<std::endl;
566 
567         if(dgnoAggregates==0 || dunknowns/dgnoAggregates<criterion.minCoarsenRate())
568         {
569           if(rank==0)
570           {
571             if(dgnoAggregates>0)
572               std::cerr << "Stopped coarsening because of rate breakdown "<<dunknowns<<"/"<<dgnoAggregates
573                         <<"="<<dunknowns/dgnoAggregates<<"<"
574                         <<criterion.minCoarsenRate()<<std::endl;
575             else
576               std::cerr<< "Could not build any aggregates. Probably no connected nodes."<<std::endl;
577           }
578           aggregatesMap->free();
579           delete aggregatesMap;
580           aggregatesMaps_.pop_back();
581 
582           if(criterion.accumulate() && mlevel.isRedistributed() && info->communicator().size()>1) {
583             // coarse level matrix was already redistributed, but to more than 1 process
584             // Therefore need to delete the redistribution. Further down it will
585             // then be redistributed to 1 process
586             delete &(mlevel.getRedistributed().getmat());
587             mlevel.deleteRedistributed();
588             delete &(infoLevel.getRedistributed());
589             infoLevel.deleteRedistributed();
590             redistributes_.back().resetSetup();
591           }
592 
593           break;
594         }
595         unknowns =  noAggregates;
596         dunknowns = dgnoAggregates;
597 
598         CommunicationArgs commargs(info->communicator(),info->category());
599         parallelInformation_.addCoarser(commargs);
600 
601         ++infoLevel; // parallel information on coarse level
602 
603         typename PropertyMapTypeSelector<VertexVisitedTag,PropertiesGraph>::Type visitedMap =
604           get(VertexVisitedTag(), *(std::get<1>(graphs)));
605 
606         watch.reset();
607         int aggregates = IndicesCoarsener<ParallelInformation,OverlapFlags>
608                          ::coarsen(*info,
609                                    *(std::get<1>(graphs)),
610                                    visitedMap,
611                                    *aggregatesMap,
612                                    *infoLevel,
613                                    noAggregates);
614         GraphCreator::free(graphs);
615 
616         if(criterion.debugLevel()>2) {
617           if(rank==0)
618             std::cout<<"Coarsening of index sets took "<<watch.elapsed()<<" seconds."<<std::endl;
619         }
620 
621         watch.reset();
622 
623         infoLevel->buildGlobalLookup(aggregates);
624         AggregatesPublisher<Vertex,OverlapFlags,ParallelInformation>::publish(*aggregatesMap,
625                                                                               *info,
626                                                                               infoLevel->globalLookup());
627 
628 
629         if(criterion.debugLevel()>2) {
630           if(rank==0)
631             std::cout<<"Communicating global aggregate numbers took "<<watch.elapsed()<<" seconds."<<std::endl;
632         }
633 
634         watch.reset();
635         std::vector<bool>& visited=excluded;
636 
637         typedef std::vector<bool>::iterator Iterator;
638         typedef IteratorPropertyMap<Iterator, IdentityMap> VisitedMap2;
639         Iterator end = visited.end();
640         for(Iterator iter= visited.begin(); iter != end; ++iter)
641           *iter=false;
642 
643         VisitedMap2 visitedMap2(visited.begin(), Dune::IdentityMap());
644 
645         std::shared_ptr<typename MatrixOperator::matrix_type>
646           coarseMatrix(productBuilder.build(*(std::get<0>(graphs)), visitedMap2,
647                                             *info,
648                                             *aggregatesMap,
649                                             aggregates,
650                                             OverlapFlags()));
651         dverb<<"Building of sparsity pattern took "<<watch.elapsed()<<std::endl;
652         watch.reset();
653         info->freeGlobalLookup();
654 
655         delete std::get<0>(graphs);
656         productBuilder.calculate(matrix->getmat(), *aggregatesMap, *coarseMatrix, *infoLevel, OverlapFlags());
657 
658         if(criterion.debugLevel()>2) {
659           if(rank==0)
660             std::cout<<"Calculation entries of Galerkin product took "<<watch.elapsed()<<" seconds."<<std::endl;
661         }
662 
663         BIGINT nonzeros = countNonZeros(*coarseMatrix);
664         allnonzeros = allnonzeros + infoLevel->communicator().sum(nonzeros);
665         MatrixArgs args(coarseMatrix, *infoLevel);
666 
667         matrices_.addCoarser(args);
668         redistributes_.push_back(RedistributeInfoType());
669       } // end level loop
670 
671 
672       infoLevel->freeGlobalLookup();
673 
674       built_=true;
675       AggregatesMap* aggregatesMap=new AggregatesMap(0);
676       aggregatesMaps_.push_back(aggregatesMap);
677 
678       if(criterion.debugLevel()>0) {
679         if(level==criterion.maxLevel()) {
680           BIGINT unknownsLevel = mlevel->getmat().N();
681           unknownsLevel = infoLevel->communicator().sum(unknownsLevel);
682           double dunknownsLevel = unknownsLevel.todouble();
683           if(rank==0 && criterion.debugLevel()>1) {
684             std::cout<<"Level "<<level<<" has "<<dunknownsLevel<<" unknowns, "<<dunknownsLevel/infoLevel->communicator().size()
685                      <<" unknowns per proc (procs="<<infoLevel->communicator().size()<<")"<<std::endl;
686           }
687         }
688       }
689 
690       if(criterion.accumulate() && !redistributes_.back().isSetup() &&
691          infoLevel->communicator().size()>1) {
692 #if HAVE_MPI && !HAVE_PARMETIS
693         if(criterion.accumulate()==successiveAccu &&
694            infoLevel->communicator().rank()==0)
695           std::cerr<<"Successive accumulation of data on coarse levels only works with ParMETIS installed."
696                    <<"  Fell back to accumulation to one domain on coarsest level"<<std::endl;
697 #endif
698 
699         // accumulate to fewer processors
700         std::shared_ptr<Matrix> redistMat = std::make_shared<Matrix>();
701         std::shared_ptr<ParallelInformation> redistComm;
702         int nodomains = 1;
703 
704         repartitionAndDistributeMatrix(mlevel->getmat(), redistMat, *infoLevel,
705                                        redistComm, redistributes_.back(), nodomains,criterion);
706         MatrixArgs args(redistMat, *redistComm);
707         BIGINT unknownsRedist = redistMat->N();
708         unknownsRedist = infoLevel->communicator().sum(unknownsRedist);
709 
710         if(redistComm->communicator().rank()==0 && criterion.debugLevel()>1) {
711           double dunknownsRedist = unknownsRedist.todouble();
712           std::cout<<"Level "<<level<<" redistributed has "<<dunknownsRedist<<" unknowns, "<<dunknownsRedist/redistComm->communicator().size()
713                    <<" unknowns per proc (procs="<<redistComm->communicator().size()<<")"<<std::endl;
714         }
715         mlevel.addRedistributed(ConstructionTraits<MatrixOperator>::construct(args));
716         infoLevel.addRedistributed(redistComm);
717         infoLevel->freeGlobalLookup();
718       }
719 
720       int levels = matrices_.levels();
721       maxlevels_ = parallelInformation_.finest()->communicator().max(levels);
722       assert(matrices_.levels()==redistributes_.size());
723       if(hasCoarsest() && rank==0 && criterion.debugLevel()>1)
724         std::cout<<"operator complexity: "<<allnonzeros.todouble()/finenonzeros.todouble()<<std::endl;
725 
726     }
727 
728     template<class M, class IS, class A>
729     const typename MatrixHierarchy<M,IS,A>::ParallelMatrixHierarchy&
matrices() const730     MatrixHierarchy<M,IS,A>::matrices() const
731     {
732       return matrices_;
733     }
734 
735     template<class M, class IS, class A>
736     const typename MatrixHierarchy<M,IS,A>::ParallelInformationHierarchy&
parallelInformation() const737     MatrixHierarchy<M,IS,A>::parallelInformation() const
738     {
739       return parallelInformation_;
740     }
741 
742     template<class M, class IS, class A>
getCoarsestAggregatesOnFinest(std::vector<std::size_t> & data) const743     void MatrixHierarchy<M,IS,A>::getCoarsestAggregatesOnFinest(std::vector<std::size_t>& data) const
744     {
745       int levels=aggregatesMaps().size();
746       int maxlevels=parallelInformation_.finest()->communicator().max(levels);
747       std::size_t size=(*(aggregatesMaps().begin()))->noVertices();
748       // We need an auxiliary vector for the consecutive prolongation.
749       std::vector<std::size_t> tmp;
750       std::vector<std::size_t> *coarse, *fine;
751 
752       // make sure the allocated space suffices.
753       tmp.reserve(size);
754       data.reserve(size);
755 
756       // Correctly assign coarse and fine for the first prolongation such that
757       // we end up in data in the end.
758       if(levels%2==0) {
759         coarse=&tmp;
760         fine=&data;
761       }else{
762         coarse=&data;
763         fine=&tmp;
764       }
765 
766       // Number the unknowns on the coarsest level consecutively for each process.
767       if(levels==maxlevels) {
768         const AggregatesMap& map = *(*(++aggregatesMaps().rbegin()));
769         std::size_t m=0;
770 
771         for(typename AggregatesMap::const_iterator iter = map.begin(); iter != map.end(); ++iter)
772           if(*iter< AggregatesMap::ISOLATED)
773             m=std::max(*iter,m);
774 
775         coarse->resize(m+1);
776         std::size_t i=0;
777         srand((unsigned)std::clock());
778         std::set<size_t> used;
779         for(typename std::vector<std::size_t>::iterator iter=coarse->begin(); iter != coarse->end();
780             ++iter, ++i)
781         {
782           std::pair<std::set<std::size_t>::iterator,bool> ibpair
783             = used.insert(static_cast<std::size_t>((((double)rand())/(RAND_MAX+1.0)))*coarse->size());
784 
785           while(!ibpair.second)
786             ibpair = used.insert(static_cast<std::size_t>((((double)rand())/(RAND_MAX+1.0))*coarse->size()));
787           *iter=*(ibpair.first);
788         }
789       }
790 
791       typename ParallelInformationHierarchy::Iterator pinfo = parallelInformation().coarsest();
792       --pinfo;
793 
794       // Now consecutively project the numbers to the finest level.
795       for(typename AggregatesMapList::const_reverse_iterator aggregates=++aggregatesMaps().rbegin();
796           aggregates != aggregatesMaps().rend(); ++aggregates,--levels) {
797 
798         fine->resize((*aggregates)->noVertices());
799         fine->assign(fine->size(), 0);
800         Transfer<typename AggregatesMap::AggregateDescriptor, std::vector<std::size_t>, ParallelInformation>
801         ::prolongateVector(*(*aggregates), *coarse, *fine, static_cast<std::size_t>(1), *pinfo);
802         --pinfo;
803         std::swap(coarse, fine);
804       }
805 
806       // Assertion to check that we really projected to data on the last step.
807       assert(coarse==&data);
808     }
809 
810     template<class M, class IS, class A>
811     const typename MatrixHierarchy<M,IS,A>::AggregatesMapList&
aggregatesMaps() const812     MatrixHierarchy<M,IS,A>::aggregatesMaps() const
813     {
814       return aggregatesMaps_;
815     }
816     template<class M, class IS, class A>
817     const typename MatrixHierarchy<M,IS,A>::RedistributeInfoList&
redistributeInformation() const818     MatrixHierarchy<M,IS,A>::redistributeInformation() const
819     {
820       return redistributes_;
821     }
822 
823     template<class M, class IS, class A>
~MatrixHierarchy()824     MatrixHierarchy<M,IS,A>::~MatrixHierarchy()
825     {
826       typedef typename AggregatesMapList::reverse_iterator AggregatesMapIterator;
827       typedef typename ParallelMatrixHierarchy::Iterator Iterator;
828       typedef typename ParallelInformationHierarchy::Iterator InfoIterator;
829 
830       AggregatesMapIterator amap = aggregatesMaps_.rbegin();
831       InfoIterator info = parallelInformation_.coarsest();
832       for(Iterator level=matrices_.coarsest(), finest=matrices_.finest(); level != finest;  --level, --info, ++amap) {
833         (*amap)->free();
834         delete *amap;
835       }
836       delete *amap;
837     }
838 
839     template<class M, class IS, class A>
840     template<class V, class BA, class TA>
coarsenVector(Hierarchy<BlockVector<V,BA>,TA> & hierarchy) const841     void MatrixHierarchy<M,IS,A>::coarsenVector(Hierarchy<BlockVector<V,BA>, TA>& hierarchy) const
842     {
843       assert(hierarchy.levels()==1);
844       typedef typename ParallelMatrixHierarchy::ConstIterator Iterator;
845       typedef typename RedistributeInfoList::const_iterator RIter;
846       RIter redist = redistributes_.begin();
847 
848       Iterator matrix = matrices_.finest(), coarsest = matrices_.coarsest();
849       int level=0;
850       if(redist->isSetup())
851         hierarchy.addRedistributedOnCoarsest(matrix.getRedistributed().getmat().N());
852       Dune::dvverb<<"Level "<<level<<" has "<<matrices_.finest()->getmat().N()<<" unknowns!"<<std::endl;
853 
854       while(matrix != coarsest) {
855         ++matrix; ++level; ++redist;
856         Dune::dvverb<<"Level "<<level<<" has "<<matrix->getmat().N()<<" unknowns!"<<std::endl;
857 
858         hierarchy.addCoarser(matrix->getmat().N());
859         if(redist->isSetup())
860           hierarchy.addRedistributedOnCoarsest(matrix.getRedistributed().getmat().N());
861 
862       }
863 
864     }
865 
866     template<class M, class IS, class A>
867     template<class S, class TA>
coarsenSmoother(Hierarchy<S,TA> & smoothers,const typename SmootherTraits<S>::Arguments & sargs) const868     void MatrixHierarchy<M,IS,A>::coarsenSmoother(Hierarchy<S,TA>& smoothers,
869                                                   const typename SmootherTraits<S>::Arguments& sargs) const
870     {
871       assert(smoothers.levels()==0);
872       typedef typename ParallelMatrixHierarchy::ConstIterator MatrixIterator;
873       typedef typename ParallelInformationHierarchy::ConstIterator PinfoIterator;
874       typedef typename AggregatesMapList::const_iterator AggregatesIterator;
875 
876       typename ConstructionTraits<S>::Arguments cargs;
877       cargs.setArgs(sargs);
878       PinfoIterator pinfo = parallelInformation_.finest();
879       AggregatesIterator aggregates = aggregatesMaps_.begin();
880       int level=0;
881       for(MatrixIterator matrix = matrices_.finest(), coarsest = matrices_.coarsest();
882           matrix != coarsest; ++matrix, ++pinfo, ++aggregates, ++level) {
883         cargs.setMatrix(matrix->getmat(), **aggregates);
884         cargs.setComm(*pinfo);
885         smoothers.addCoarser(cargs);
886       }
887       if(maxlevels()>levels()) {
888         // This is not the globally coarsest level and therefore smoothing is needed
889         cargs.setMatrix(matrices_.coarsest()->getmat(), **aggregates);
890         cargs.setComm(*pinfo);
891         smoothers.addCoarser(cargs);
892         ++level;
893       }
894     }
895 
896     template<class M, class IS, class A>
897     template<class F>
recalculateGalerkin(const F & copyFlags)898     void MatrixHierarchy<M,IS,A>::recalculateGalerkin(const F& copyFlags)
899     {
900       typedef typename AggregatesMapList::iterator AggregatesMapIterator;
901       typedef typename ParallelMatrixHierarchy::Iterator Iterator;
902       typedef typename ParallelInformationHierarchy::Iterator InfoIterator;
903 
904       AggregatesMapIterator amap = aggregatesMaps_.begin();
905       BaseGalerkinProduct productBuilder;
906       InfoIterator info = parallelInformation_.finest();
907       typename RedistributeInfoList::iterator riIter = redistributes_.begin();
908       Iterator level = matrices_.finest(), coarsest=matrices_.coarsest();
909       if(level.isRedistributed()) {
910         info->buildGlobalLookup(level->getmat().N());
911         redistributeMatrixEntries(const_cast<Matrix&>(level->getmat()),
912                                   const_cast<Matrix&>(level.getRedistributed().getmat()),
913                                   *info,info.getRedistributed(), *riIter);
914         info->freeGlobalLookup();
915       }
916 
917       for(; level!=coarsest; ++amap) {
918         const Matrix& fine = (level.isRedistributed() ? level.getRedistributed() : *level).getmat();
919         ++level;
920         ++info;
921         ++riIter;
922         productBuilder.calculate(fine, *(*amap), const_cast<Matrix&>(level->getmat()), *info, copyFlags);
923         if(level.isRedistributed()) {
924           info->buildGlobalLookup(level->getmat().N());
925           redistributeMatrixEntries(const_cast<Matrix&>(level->getmat()),
926                                     const_cast<Matrix&>(level.getRedistributed().getmat()), *info,
927                                     info.getRedistributed(), *riIter);
928           info->freeGlobalLookup();
929         }
930       }
931     }
932 
933     template<class M, class IS, class A>
levels() const934     std::size_t MatrixHierarchy<M,IS,A>::levels() const
935     {
936       return matrices_.levels();
937     }
938 
939     template<class M, class IS, class A>
maxlevels() const940     std::size_t MatrixHierarchy<M,IS,A>::maxlevels() const
941     {
942       return maxlevels_;
943     }
944 
945     template<class M, class IS, class A>
hasCoarsest() const946     bool MatrixHierarchy<M,IS,A>::hasCoarsest() const
947     {
948       return levels()==maxlevels() &&
949              (!matrices_.coarsest().isRedistributed() ||matrices_.coarsest()->getmat().N()>0);
950     }
951 
952     template<class M, class IS, class A>
isBuilt() const953     bool MatrixHierarchy<M,IS,A>::isBuilt() const
954     {
955       return built_;
956     }
957 
958     /** @} */
959   } // namespace Amg
960 } // namespace Dune
961 
962 #endif // end DUNE_AMG_MATRIXHIERARCHY_HH
963