1 /* ---------------------------------------------------------------------
2  *
3  * Copyright (C) 2019 - 2020 by the deal.II authors
4  *
5  * This file is part of the deal.II library.
6  *
7  * The deal.II library is free software; you can use it, redistribute
8  * it, and/or modify it under the terms of the GNU Lesser General
9  * Public License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  * The full text of the license can be found in the file LICENSE.md at
12  * the top level directory of deal.II.
13  *
14  * ---------------------------------------------------------------------
15 
16  *
17  * Author: Thomas C. Clevenger, Clemson University
18  *         Timo Heister, Clemson University
19  *         Guido Kanschat, Heidelberg University
20  *         Martin Kronbichler, Technical University of Munich
21  */
22 
23 
24 // @sect3{Include files}
25 
26 // The include files are a combination of step-40, step-16, and step-37:
27 
28 #include <deal.II/base/conditional_ostream.h>
29 #include <deal.II/base/data_out_base.h>
30 #include <deal.II/base/index_set.h>
31 #include <deal.II/base/logstream.h>
32 #include <deal.II/base/quadrature_lib.h>
33 #include <deal.II/base/timer.h>
34 #include <deal.II/base/parameter_handler.h>
35 #include <deal.II/distributed/grid_refinement.h>
36 #include <deal.II/distributed/tria.h>
37 #include <deal.II/dofs/dof_accessor.h>
38 #include <deal.II/dofs/dof_tools.h>
39 #include <deal.II/fe/fe_q.h>
40 #include <deal.II/fe/fe_values.h>
41 #include <deal.II/grid/grid_generator.h>
42 #include <deal.II/grid/grid_refinement.h>
43 #include <deal.II/grid/tria.h>
44 #include <deal.II/grid/tria_accessor.h>
45 #include <deal.II/grid/tria_iterator.h>
46 #include <deal.II/lac/affine_constraints.h>
47 #include <deal.II/lac/dynamic_sparsity_pattern.h>
48 #include <deal.II/lac/solver_cg.h>
49 
50 // We use the same strategy as in step-40 to switch between PETSc and
51 // Trilinos:
52 
53 #include <deal.II/lac/generic_linear_algebra.h>
54 
55 // Comment the following preprocessor definition in or out if you have
56 // PETSc and Trilinos installed and you prefer using PETSc in this
57 // example:
58 #define FORCE_USE_OF_TRILINOS
59 
60 namespace LA
61 {
62 #if defined(DEAL_II_WITH_PETSC) && !defined(DEAL_II_PETSC_WITH_COMPLEX) && \
63   !(defined(DEAL_II_WITH_TRILINOS) && defined(FORCE_USE_OF_TRILINOS))
64   using namespace dealii::LinearAlgebraPETSc;
65 #  define USE_PETSC_LA
66 #elif defined(DEAL_II_WITH_TRILINOS)
67   using namespace dealii::LinearAlgebraTrilinos;
68 #else
69 #  error DEAL_II_WITH_PETSC or DEAL_II_WITH_TRILINOS required
70 #endif
71 } // namespace LA
72 
73 #include <deal.II/matrix_free/matrix_free.h>
74 #include <deal.II/matrix_free/operators.h>
75 #include <deal.II/matrix_free/fe_evaluation.h>
76 #include <deal.II/multigrid/mg_coarse.h>
77 #include <deal.II/multigrid/mg_constrained_dofs.h>
78 #include <deal.II/multigrid/mg_matrix.h>
79 #include <deal.II/multigrid/mg_smoother.h>
80 #include <deal.II/multigrid/mg_tools.h>
81 #include <deal.II/multigrid/mg_transfer.h>
82 #include <deal.II/multigrid/multigrid.h>
83 #include <deal.II/multigrid/mg_transfer_matrix_free.h>
84 #include <deal.II/numerics/data_out.h>
85 #include <deal.II/numerics/vector_tools.h>
86 
87 // The following files are used to assemble the error estimator like in step-12:
88 #include <deal.II/fe/fe_interface_values.h>
89 #include <deal.II/meshworker/mesh_loop.h>
90 
91 using namespace dealii;
92 
93 
94 // @sect3{Coefficients and helper classes}
95 
96 // MatrixFree operators must use the
97 // dealii::LinearAlgebra::distributed::Vector vector type. Here we define
98 // operations which copy to and from Trilinos vectors for compatibility with
99 // the matrix-based code. Note that this functionality does not currently
100 // exist for PETSc vector types, so Trilinos must be installed to use the
101 // MatrixFree solver in this tutorial.
102 namespace ChangeVectorTypes
103 {
104   template <typename number>
copy(LA::MPI::Vector & out,const dealii::LinearAlgebra::distributed::Vector<number> & in)105   void copy(LA::MPI::Vector &                                         out,
106             const dealii::LinearAlgebra::distributed::Vector<number> &in)
107   {
108     dealii::LinearAlgebra::ReadWriteVector<double> rwv(
109       out.locally_owned_elements());
110     rwv.import(in, VectorOperation::insert);
111 #ifdef USE_PETSC_LA
112     AssertThrow(false,
113                 ExcMessage("CopyVectorTypes::copy() not implemented for "
114                            "PETSc vector types."));
115 #else
116     out.import(rwv, VectorOperation::insert);
117 #endif
118   }
119 
120 
121 
122   template <typename number>
copy(dealii::LinearAlgebra::distributed::Vector<number> & out,const LA::MPI::Vector & in)123   void copy(dealii::LinearAlgebra::distributed::Vector<number> &out,
124             const LA::MPI::Vector &                             in)
125   {
126     dealii::LinearAlgebra::ReadWriteVector<double> rwv;
127 #ifdef USE_PETSC_LA
128     (void)in;
129     AssertThrow(false,
130                 ExcMessage("CopyVectorTypes::copy() not implemented for "
131                            "PETSc vector types."));
132 #else
133     rwv.reinit(in);
134 #endif
135     out.import(rwv, VectorOperation::insert);
136   }
137 } // namespace ChangeVectorTypes
138 
139 
140 // Let's move on to the description of the problem we want to solve.
141 // We set the right-hand side function to 1.0. The @p value function returning a
142 // VectorizedArray is used by the matrix-free code path.
143 template <int dim>
144 class RightHandSide : public Function<dim>
145 {
146 public:
value(const Point<dim> &,const unsigned int=0) const147   virtual double value(const Point<dim> & /*p*/,
148                        const unsigned int /*component*/ = 0) const override
149   {
150     return 1.0;
151   }
152 
153 
154   template <typename number>
155   VectorizedArray<number>
value(const Point<dim,VectorizedArray<number>> &,const unsigned int=0) const156   value(const Point<dim, VectorizedArray<number>> & /*p*/,
157         const unsigned int /*component*/ = 0) const
158   {
159     return VectorizedArray<number>(1.0);
160   }
161 };
162 
163 
164 // This next class represents the diffusion coefficient. We use a variable
165 // coefficient which is 100.0 at any point where at least one coordinate is
166 // less than -0.5, and 1.0 at all other points. As above, a separate value()
167 // returning a VectorizedArray is used for the matrix-free code. An @p
168 // average() function computes the arithmetic average for a set of points.
169 template <int dim>
170 class Coefficient : public Function<dim>
171 {
172 public:
173   virtual double value(const Point<dim> &p,
174                        const unsigned int /*component*/ = 0) const override;
175 
176   template <typename number>
177   VectorizedArray<number> value(const Point<dim, VectorizedArray<number>> &p,
178                                 const unsigned int /*component*/ = 0) const;
179 
180   template <typename number>
181   number average_value(const std::vector<Point<dim, number>> &points) const;
182 
183   // When using a coefficient in the MatrixFree framework, we also
184   // need a function that creates a Table of coefficient values for a
185   // set of cells provided by the MatrixFree operator argument here.
186   template <typename number>
187   std::shared_ptr<Table<2, VectorizedArray<number>>> make_coefficient_table(
188     const MatrixFree<dim, number, VectorizedArray<number>> &mf_storage) const;
189 };
190 
191 
192 
193 template <int dim>
value(const Point<dim> & p,const unsigned int) const194 double Coefficient<dim>::value(const Point<dim> &p, const unsigned int) const
195 {
196   for (int d = 0; d < dim; ++d)
197     {
198       if (p[d] < -0.5)
199         return 100.0;
200     }
201   return 1.0;
202 }
203 
204 
205 
206 template <int dim>
207 template <typename number>
208 VectorizedArray<number>
value(const Point<dim,VectorizedArray<number>> & p,const unsigned int) const209 Coefficient<dim>::value(const Point<dim, VectorizedArray<number>> &p,
210                         const unsigned int) const
211 {
212   VectorizedArray<number> return_value = VectorizedArray<number>(1.0);
213   for (unsigned int i = 0; i < VectorizedArray<number>::size(); ++i)
214     {
215       for (int d = 0; d < dim; ++d)
216         if (p[d][i] < -0.5)
217           {
218             return_value[i] = 100.0;
219             break;
220           }
221     }
222 
223   return return_value;
224 }
225 
226 
227 
228 template <int dim>
229 template <typename number>
average_value(const std::vector<Point<dim,number>> & points) const230 number Coefficient<dim>::average_value(
231   const std::vector<Point<dim, number>> &points) const
232 {
233   number average(0);
234   for (unsigned int i = 0; i < points.size(); ++i)
235     average += value(points[i]);
236   average /= points.size();
237 
238   return average;
239 }
240 
241 
242 
243 template <int dim>
244 template <typename number>
245 std::shared_ptr<Table<2, VectorizedArray<number>>>
make_coefficient_table(const MatrixFree<dim,number,VectorizedArray<number>> & mf_storage) const246 Coefficient<dim>::make_coefficient_table(
247   const MatrixFree<dim, number, VectorizedArray<number>> &mf_storage) const
248 {
249   auto coefficient_table =
250     std::make_shared<Table<2, VectorizedArray<number>>>();
251 
252   FEEvaluation<dim, -1, 0, 1, number> fe_eval(mf_storage);
253 
254   const unsigned int n_cells    = mf_storage.n_macro_cells();
255   const unsigned int n_q_points = fe_eval.n_q_points;
256 
257   coefficient_table->reinit(n_cells, 1);
258 
259   for (unsigned int cell = 0; cell < n_cells; ++cell)
260     {
261       fe_eval.reinit(cell);
262 
263       VectorizedArray<number> average_value = 0.;
264       for (unsigned int q = 0; q < n_q_points; ++q)
265         average_value += value(fe_eval.quadrature_point(q));
266       average_value /= n_q_points;
267 
268       (*coefficient_table)(cell, 0) = average_value;
269     }
270 
271   return coefficient_table;
272 }
273 
274 
275 
276 // @sect3{Run time parameters}
277 
278 // We will use ParameterHandler to pass in parameters at runtime.  The
279 // structure @p Settings parses and stores these parameters to be queried
280 // throughout the program.
281 struct Settings
282 {
283   bool try_parse(const std::string &prm_filename);
284 
285   enum SolverType
286   {
287     gmg_mb,
288     gmg_mf,
289     amg
290   };
291 
292   SolverType solver;
293 
294   int          dimension;
295   double       smoother_dampen;
296   unsigned int smoother_steps;
297   unsigned int n_steps;
298   bool         output;
299 };
300 
301 
302 
try_parse(const std::string & prm_filename)303 bool Settings::try_parse(const std::string &prm_filename)
304 {
305   ParameterHandler prm;
306   prm.declare_entry("dim", "2", Patterns::Integer(), "The problem dimension.");
307   prm.declare_entry("n_steps",
308                     "10",
309                     Patterns::Integer(0),
310                     "Number of adaptive refinement steps.");
311   prm.declare_entry("smoother dampen",
312                     "1.0",
313                     Patterns::Double(0.0),
314                     "Dampen factor for the smoother.");
315   prm.declare_entry("smoother steps",
316                     "1",
317                     Patterns::Integer(1),
318                     "Number of smoother steps.");
319   prm.declare_entry("solver",
320                     "MF",
321                     Patterns::Selection("MF|MB|AMG"),
322                     "Switch between matrix-free GMG, "
323                     "matrix-based GMG, and AMG.");
324   prm.declare_entry("output",
325                     "false",
326                     Patterns::Bool(),
327                     "Output graphical results.");
328 
329   if (prm_filename.size() == 0)
330     {
331       std::cout << "****  Error: No input file provided!\n"
332                 << "****  Error: Call this program as './step-50 input.prm\n"
333                 << "\n"
334                 << "****  You may want to use one of the input files in this\n"
335                 << "****  directory, or use the following default values\n"
336                 << "****  to create an input file:\n";
337       if (Utilities::MPI::this_mpi_process(MPI_COMM_WORLD) == 0)
338         prm.print_parameters(std::cout, ParameterHandler::Text);
339       return false;
340     }
341 
342   try
343     {
344       prm.parse_input(prm_filename);
345     }
346   catch (std::exception &e)
347     {
348       if (Utilities::MPI::this_mpi_process(MPI_COMM_WORLD) == 0)
349         std::cerr << e.what() << std::endl;
350       return false;
351     }
352 
353   if (prm.get("solver") == "MF")
354     this->solver = gmg_mf;
355   else if (prm.get("solver") == "MB")
356     this->solver = gmg_mb;
357   else if (prm.get("solver") == "AMG")
358     this->solver = amg;
359   else
360     AssertThrow(false, ExcNotImplemented());
361 
362   this->dimension       = prm.get_integer("dim");
363   this->n_steps         = prm.get_integer("n_steps");
364   this->smoother_dampen = prm.get_double("smoother dampen");
365   this->smoother_steps  = prm.get_integer("smoother steps");
366   this->output          = prm.get_bool("output");
367 
368   return true;
369 }
370 
371 
372 
373 // @sect3{LaplaceProblem class}
374 
375 // This is the main class of the program. It looks very similar to
376 // step-16, step-37, and step-40. For the MatrixFree setup, we use the
377 // MatrixFreeOperators::LaplaceOperator class which defines `local_apply()`,
378 // `compute_diagonal()`, and `set_coefficient()` functions internally. Note that
379 // the polynomial degree is a template parameter of this class. This is
380 // necesary for the matrix-free code.
381 template <int dim, int degree>
382 class LaplaceProblem
383 {
384 public:
385   LaplaceProblem(const Settings &settings);
386   void run();
387 
388 private:
389   // We will use the following types throughout the program. First the
390   // matrix-based types, after that the matrix-free classes. For the
391   // matrix-free implementation, we use @p float for the level operators.
392   using MatrixType         = LA::MPI::SparseMatrix;
393   using VectorType         = LA::MPI::Vector;
394   using PreconditionAMG    = LA::MPI::PreconditionAMG;
395   using PreconditionJacobi = LA::MPI::PreconditionJacobi;
396 
397   using MatrixFreeLevelMatrix = MatrixFreeOperators::LaplaceOperator<
398     dim,
399     degree,
400     degree + 1,
401     1,
402     LinearAlgebra::distributed::Vector<float>>;
403   using MatrixFreeActiveMatrix = MatrixFreeOperators::LaplaceOperator<
404     dim,
405     degree,
406     degree + 1,
407     1,
408     LinearAlgebra::distributed::Vector<double>>;
409 
410   using MatrixFreeLevelVector  = LinearAlgebra::distributed::Vector<float>;
411   using MatrixFreeActiveVector = LinearAlgebra::distributed::Vector<double>;
412 
413   void setup_system();
414   void setup_multigrid();
415   void assemble_system();
416   void assemble_multigrid();
417   void assemble_rhs();
418   void solve();
419   void estimate();
420   void refine_grid();
421   void output_results(const unsigned int cycle);
422 
423   Settings settings;
424 
425   MPI_Comm           mpi_communicator;
426   ConditionalOStream pcout;
427 
428   parallel::distributed::Triangulation<dim> triangulation;
429   const MappingQ1<dim>                      mapping;
430   FE_Q<dim>                                 fe;
431 
432   DoFHandler<dim> dof_handler;
433 
434   IndexSet                  locally_owned_dofs;
435   IndexSet                  locally_relevant_dofs;
436   AffineConstraints<double> constraints;
437 
438   MatrixType             system_matrix;
439   MatrixFreeActiveMatrix mf_system_matrix;
440   VectorType             solution;
441   VectorType             right_hand_side;
442   Vector<double>         estimated_error_square_per_cell;
443 
444   MGLevelObject<MatrixType> mg_matrix;
445   MGLevelObject<MatrixType> mg_interface_in;
446   MGConstrainedDoFs         mg_constrained_dofs;
447 
448   MGLevelObject<MatrixFreeLevelMatrix> mf_mg_matrix;
449 
450   TimerOutput computing_timer;
451 };
452 
453 
454 // The only interesting part about the constructor is that we construct the
455 // multigrid hierarchy unless we use AMG. For that, we need to parse the
456 // run time parameters before this constructor completes.
457 template <int dim, int degree>
LaplaceProblem(const Settings & settings)458 LaplaceProblem<dim, degree>::LaplaceProblem(const Settings &settings)
459   : settings(settings)
460   , mpi_communicator(MPI_COMM_WORLD)
461   , pcout(std::cout, (Utilities::MPI::this_mpi_process(mpi_communicator) == 0))
462   , triangulation(mpi_communicator,
463                   Triangulation<dim>::limit_level_difference_at_vertices,
464                   (settings.solver == Settings::amg) ?
465                     parallel::distributed::Triangulation<dim>::default_setting :
466                     parallel::distributed::Triangulation<
467                       dim>::construct_multigrid_hierarchy)
468   , mapping()
469   , fe(degree)
470   , dof_handler(triangulation)
471   , computing_timer(pcout, TimerOutput::never, TimerOutput::wall_times)
472 {
473   GridGenerator::hyper_L(triangulation, -1., 1., /*colorize*/ false);
474   triangulation.refine_global(1);
475 }
476 
477 
478 
479 // @sect4{LaplaceProblem::setup_system()}
480 
481 // Unlike step-16 and step-37, we split the set up into two parts,
482 // setup_system() and setup_multigrid(). Here is the typical setup_system()
483 // function for the active mesh found in most tutorials. For matrix-free, the
484 // active mesh set up is similar to step-37; for matrix-based (GMG and AMG
485 // solvers), the setup is similar to step-40.
486 template <int dim, int degree>
setup_system()487 void LaplaceProblem<dim, degree>::setup_system()
488 {
489   TimerOutput::Scope timing(computing_timer, "Setup");
490 
491   dof_handler.distribute_dofs(fe);
492 
493   DoFTools::extract_locally_relevant_dofs(dof_handler, locally_relevant_dofs);
494   locally_owned_dofs = dof_handler.locally_owned_dofs();
495 
496   solution.reinit(locally_owned_dofs, mpi_communicator);
497   right_hand_side.reinit(locally_owned_dofs, mpi_communicator);
498   constraints.reinit(locally_relevant_dofs);
499   DoFTools::make_hanging_node_constraints(dof_handler, constraints);
500 
501   VectorTools::interpolate_boundary_values(
502     mapping, dof_handler, 0, Functions::ZeroFunction<dim>(), constraints);
503   constraints.close();
504 
505   switch (settings.solver)
506     {
507       case Settings::gmg_mf:
508         {
509           typename MatrixFree<dim, double>::AdditionalData additional_data;
510           additional_data.tasks_parallel_scheme =
511             MatrixFree<dim, double>::AdditionalData::none;
512           additional_data.mapping_update_flags =
513             (update_gradients | update_JxW_values | update_quadrature_points);
514           std::shared_ptr<MatrixFree<dim, double>> mf_storage =
515             std::make_shared<MatrixFree<dim, double>>();
516           mf_storage->reinit(dof_handler,
517                              constraints,
518                              QGauss<1>(degree + 1),
519                              additional_data);
520 
521           mf_system_matrix.initialize(mf_storage);
522 
523           const Coefficient<dim> coefficient;
524           mf_system_matrix.set_coefficient(
525             coefficient.make_coefficient_table(*mf_storage));
526 
527           break;
528         }
529 
530       case Settings::gmg_mb:
531       case Settings::amg:
532         {
533 #ifdef USE_PETSC_LA
534           DynamicSparsityPattern dsp(locally_relevant_dofs);
535           DoFTools::make_sparsity_pattern(dof_handler, dsp, constraints);
536 
537           SparsityTools::distribute_sparsity_pattern(dsp,
538                                                      locally_owned_dofs,
539                                                      mpi_communicator,
540                                                      locally_relevant_dofs);
541 
542           system_matrix.reinit(locally_owned_dofs,
543                                locally_owned_dofs,
544                                dsp,
545                                mpi_communicator);
546 #else
547           TrilinosWrappers::SparsityPattern dsp(locally_owned_dofs,
548                                                 locally_owned_dofs,
549                                                 locally_relevant_dofs,
550                                                 mpi_communicator);
551           DoFTools::make_sparsity_pattern(dof_handler, dsp, constraints);
552           dsp.compress();
553           system_matrix.reinit(dsp);
554 #endif
555 
556           break;
557         }
558 
559       default:
560         Assert(false, ExcNotImplemented());
561     }
562 }
563 
564 // @sect4{LaplaceProblem::setup_multigrid()}
565 
566 // This function does the multilevel setup for both matrix-free and
567 // matrix-based GMG. The matrix-free setup is similar to that of step-37, and
568 // the matrix-based is similar to step-16, except we must use appropriate
569 // distributed sparsity patterns.
570 //
571 // The function is not called for the AMG approach, but to err on the
572 // safe side, the main `switch` statement of this function
573 // nevertheless makes sure that the function only operates on known
574 // multigrid settings by throwing an assertion if the function were
575 // called for anything other than the two geometric multigrid methods.
576 template <int dim, int degree>
setup_multigrid()577 void LaplaceProblem<dim, degree>::setup_multigrid()
578 {
579   TimerOutput::Scope timing(computing_timer, "Setup multigrid");
580 
581   dof_handler.distribute_mg_dofs();
582 
583   mg_constrained_dofs.clear();
584   mg_constrained_dofs.initialize(dof_handler);
585 
586   const std::set<types::boundary_id> boundary_ids = {types::boundary_id(0)};
587   mg_constrained_dofs.make_zero_boundary_constraints(dof_handler, boundary_ids);
588 
589   const unsigned int n_levels = triangulation.n_global_levels();
590 
591   switch (settings.solver)
592     {
593       case Settings::gmg_mf:
594         {
595           mf_mg_matrix.resize(0, n_levels - 1);
596 
597           for (unsigned int level = 0; level < n_levels; ++level)
598             {
599               IndexSet relevant_dofs;
600               DoFTools::extract_locally_relevant_level_dofs(dof_handler,
601                                                             level,
602                                                             relevant_dofs);
603               AffineConstraints<double> level_constraints;
604               level_constraints.reinit(relevant_dofs);
605               level_constraints.add_lines(
606                 mg_constrained_dofs.get_boundary_indices(level));
607               level_constraints.close();
608 
609               typename MatrixFree<dim, float>::AdditionalData additional_data;
610               additional_data.tasks_parallel_scheme =
611                 MatrixFree<dim, float>::AdditionalData::none;
612               additional_data.mapping_update_flags =
613                 (update_gradients | update_JxW_values |
614                  update_quadrature_points);
615               additional_data.mg_level = level;
616               std::shared_ptr<MatrixFree<dim, float>> mf_storage_level(
617                 new MatrixFree<dim, float>());
618               mf_storage_level->reinit(dof_handler,
619                                        level_constraints,
620                                        QGauss<1>(degree + 1),
621                                        additional_data);
622 
623               mf_mg_matrix[level].initialize(mf_storage_level,
624                                              mg_constrained_dofs,
625                                              level);
626 
627               const Coefficient<dim> coefficient;
628               mf_mg_matrix[level].set_coefficient(
629                 coefficient.make_coefficient_table(*mf_storage_level));
630 
631               mf_mg_matrix[level].compute_diagonal();
632             }
633 
634           break;
635         }
636 
637       case Settings::gmg_mb:
638         {
639           mg_matrix.resize(0, n_levels - 1);
640           mg_matrix.clear_elements();
641           mg_interface_in.resize(0, n_levels - 1);
642           mg_interface_in.clear_elements();
643 
644           for (unsigned int level = 0; level < n_levels; ++level)
645             {
646               IndexSet dof_set;
647               DoFTools::extract_locally_relevant_level_dofs(dof_handler,
648                                                             level,
649                                                             dof_set);
650 
651               {
652 #ifdef USE_PETSC_LA
653                 DynamicSparsityPattern dsp(dof_set);
654                 MGTools::make_sparsity_pattern(dof_handler, dsp, level);
655                 dsp.compress();
656                 SparsityTools::distribute_sparsity_pattern(
657                   dsp,
658                   dof_handler.locally_owned_mg_dofs(level),
659                   mpi_communicator,
660                   dof_set);
661 
662                 mg_matrix[level].reinit(
663                   dof_handler.locally_owned_mg_dofs(level),
664                   dof_handler.locally_owned_mg_dofs(level),
665                   dsp,
666                   mpi_communicator);
667 #else
668                 TrilinosWrappers::SparsityPattern dsp(
669                   dof_handler.locally_owned_mg_dofs(level),
670                   dof_handler.locally_owned_mg_dofs(level),
671                   dof_set,
672                   mpi_communicator);
673                 MGTools::make_sparsity_pattern(dof_handler, dsp, level);
674 
675                 dsp.compress();
676                 mg_matrix[level].reinit(dsp);
677 #endif
678               }
679 
680               {
681 #ifdef USE_PETSC_LA
682                 DynamicSparsityPattern dsp(dof_set);
683                 MGTools::make_interface_sparsity_pattern(dof_handler,
684                                                          mg_constrained_dofs,
685                                                          dsp,
686                                                          level);
687                 dsp.compress();
688                 SparsityTools::distribute_sparsity_pattern(
689                   dsp,
690                   dof_handler.locally_owned_mg_dofs(level),
691                   mpi_communicator,
692                   dof_set);
693 
694                 mg_interface_in[level].reinit(
695                   dof_handler.locally_owned_mg_dofs(level),
696                   dof_handler.locally_owned_mg_dofs(level),
697                   dsp,
698                   mpi_communicator);
699 #else
700                 TrilinosWrappers::SparsityPattern dsp(
701                   dof_handler.locally_owned_mg_dofs(level),
702                   dof_handler.locally_owned_mg_dofs(level),
703                   dof_set,
704                   mpi_communicator);
705 
706                 MGTools::make_interface_sparsity_pattern(dof_handler,
707                                                          mg_constrained_dofs,
708                                                          dsp,
709                                                          level);
710                 dsp.compress();
711                 mg_interface_in[level].reinit(dsp);
712 #endif
713               }
714             }
715           break;
716         }
717 
718       default:
719         Assert(false, ExcNotImplemented());
720     }
721 }
722 
723 
724 // @sect4{LaplaceProblem::assemble_system()}
725 
726 // The assembly is split into three parts: `assemble_system()`,
727 // `assemble_multigrid()`, and `assemble_rhs()`. The
728 // `assemble_system()` function here assembles and stores the (global)
729 // system matrix and the right-hand side for the matrix-based
730 // methods. It is similar to the assembly in step-40.
731 //
732 // Note that the matrix-free method does not execute this function as it does
733 // not need to assemble a matrix, and it will instead assemble the right-hand
734 // side in assemble_rhs().
735 template <int dim, int degree>
assemble_system()736 void LaplaceProblem<dim, degree>::assemble_system()
737 {
738   TimerOutput::Scope timing(computing_timer, "Assemble");
739 
740   const QGauss<dim> quadrature_formula(degree + 1);
741 
742   FEValues<dim> fe_values(fe,
743                           quadrature_formula,
744                           update_values | update_gradients |
745                             update_quadrature_points | update_JxW_values);
746 
747   const unsigned int dofs_per_cell = fe.n_dofs_per_cell();
748   const unsigned int n_q_points    = quadrature_formula.size();
749 
750   FullMatrix<double> cell_matrix(dofs_per_cell, dofs_per_cell);
751   Vector<double>     cell_rhs(dofs_per_cell);
752 
753   std::vector<types::global_dof_index> local_dof_indices(dofs_per_cell);
754 
755   const Coefficient<dim> coefficient;
756   RightHandSide<dim>     rhs;
757   std::vector<double>    rhs_values(n_q_points);
758 
759   for (const auto &cell : dof_handler.active_cell_iterators())
760     if (cell->is_locally_owned())
761       {
762         cell_matrix = 0;
763         cell_rhs    = 0;
764 
765         fe_values.reinit(cell);
766 
767         const double coefficient_value =
768           coefficient.average_value(fe_values.get_quadrature_points());
769         rhs.value_list(fe_values.get_quadrature_points(), rhs_values);
770 
771         for (unsigned int q_point = 0; q_point < n_q_points; ++q_point)
772           for (unsigned int i = 0; i < dofs_per_cell; ++i)
773             {
774               for (unsigned int j = 0; j < dofs_per_cell; ++j)
775                 cell_matrix(i, j) +=
776                   coefficient_value *                // epsilon(x)
777                   fe_values.shape_grad(i, q_point) * // * grad phi_i(x)
778                   fe_values.shape_grad(j, q_point) * // * grad phi_j(x)
779                   fe_values.JxW(q_point);            // * dx
780 
781               cell_rhs(i) +=
782                 fe_values.shape_value(i, q_point) * // grad phi_i(x)
783                 rhs_values[q_point] *               // * f(x)
784                 fe_values.JxW(q_point);             // * dx
785             }
786 
787         cell->get_dof_indices(local_dof_indices);
788         constraints.distribute_local_to_global(cell_matrix,
789                                                cell_rhs,
790                                                local_dof_indices,
791                                                system_matrix,
792                                                right_hand_side);
793       }
794 
795   system_matrix.compress(VectorOperation::add);
796   right_hand_side.compress(VectorOperation::add);
797 }
798 
799 
800 // @sect4{LaplaceProblem::assemble_multigrid()}
801 
802 // The following function assembles and stores the multilevel matrices for the
803 // matrix-based GMG method. This function is similar to the one found in
804 // step-16, only here it works for distributed meshes. This difference amounts
805 // to adding a condition that we only assemble on locally owned level cells and
806 // a call to compress() for each matrix that is built.
807 template <int dim, int degree>
assemble_multigrid()808 void LaplaceProblem<dim, degree>::assemble_multigrid()
809 {
810   TimerOutput::Scope timing(computing_timer, "Assemble multigrid");
811 
812   QGauss<dim> quadrature_formula(degree + 1);
813 
814   FEValues<dim> fe_values(fe,
815                           quadrature_formula,
816                           update_values | update_gradients |
817                             update_quadrature_points | update_JxW_values);
818 
819   const unsigned int dofs_per_cell = fe.n_dofs_per_cell();
820   const unsigned int n_q_points    = quadrature_formula.size();
821 
822   FullMatrix<double> cell_matrix(dofs_per_cell, dofs_per_cell);
823 
824   std::vector<types::global_dof_index> local_dof_indices(dofs_per_cell);
825 
826   const Coefficient<dim> coefficient;
827 
828   std::vector<AffineConstraints<double>> boundary_constraints(
829     triangulation.n_global_levels());
830   for (unsigned int level = 0; level < triangulation.n_global_levels(); ++level)
831     {
832       IndexSet dof_set;
833       DoFTools::extract_locally_relevant_level_dofs(dof_handler,
834                                                     level,
835                                                     dof_set);
836       boundary_constraints[level].reinit(dof_set);
837       boundary_constraints[level].add_lines(
838         mg_constrained_dofs.get_refinement_edge_indices(level));
839       boundary_constraints[level].add_lines(
840         mg_constrained_dofs.get_boundary_indices(level));
841 
842       boundary_constraints[level].close();
843     }
844 
845   for (const auto &cell : dof_handler.cell_iterators())
846     if (cell->level_subdomain_id() == triangulation.locally_owned_subdomain())
847       {
848         cell_matrix = 0;
849         fe_values.reinit(cell);
850 
851         const double coefficient_value =
852           coefficient.average_value(fe_values.get_quadrature_points());
853 
854         for (unsigned int q_point = 0; q_point < n_q_points; ++q_point)
855           for (unsigned int i = 0; i < dofs_per_cell; ++i)
856             for (unsigned int j = 0; j < dofs_per_cell; ++j)
857               cell_matrix(i, j) +=
858                 coefficient_value * fe_values.shape_grad(i, q_point) *
859                 fe_values.shape_grad(j, q_point) * fe_values.JxW(q_point);
860 
861         cell->get_mg_dof_indices(local_dof_indices);
862 
863         boundary_constraints[cell->level()].distribute_local_to_global(
864           cell_matrix, local_dof_indices, mg_matrix[cell->level()]);
865 
866         for (unsigned int i = 0; i < dofs_per_cell; ++i)
867           for (unsigned int j = 0; j < dofs_per_cell; ++j)
868             if (mg_constrained_dofs.is_interface_matrix_entry(
869                   cell->level(), local_dof_indices[i], local_dof_indices[j]))
870               mg_interface_in[cell->level()].add(local_dof_indices[i],
871                                                  local_dof_indices[j],
872                                                  cell_matrix(i, j));
873       }
874 
875   for (unsigned int i = 0; i < triangulation.n_global_levels(); ++i)
876     {
877       mg_matrix[i].compress(VectorOperation::add);
878       mg_interface_in[i].compress(VectorOperation::add);
879     }
880 }
881 
882 
883 
884 // @sect4{LaplaceProblem::assemble_rhs()}
885 
886 // The final function in this triptych assembles the right-hand side
887 // vector for the matrix-free method -- because in the matrix-free
888 // framework, we don't have to assemble the matrix and can get away
889 // with only assembling the right hand side. We could do this by extracting the
890 // code from the `assemble_system()` function above that deals with the right
891 // hand side, but we decide instead to go all in on the matrix-free approach and
892 // do the assembly using that way as well.
893 //
894 // The result is a function that is similar
895 // to the one found in the "Use FEEvaluation::read_dof_values_plain()
896 // to avoid resolving constraints" subsection in the "Possibilities
897 // for extensions" section of step-37.
898 //
899 // The reason for this function is that the MatrixFree operators do not take
900 // into account non-homogeneous Dirichlet constraints, instead treating all
901 // Dirichlet constraints as homogeneous. To account for this, the right-hand
902 // side here is assembled as the residual $r_0 = f-Au_0$, where $u_0$ is a
903 // zero vector except in the Dirichlet values. Then when solving, we have that
904 // the solution is $u = u_0 + A^{-1}r_0$. This can be seen as a Newton
905 // iteration on a linear system with initial guess $u_0$. The CG solve in the
906 // `solve()` function below computes $A^{-1}r_0$ and the call to
907 // `constraints.distribute()` (which directly follows) adds the $u_0$.
908 //
909 // Obviously, since we are considering a problem with zero Dirichlet boundary,
910 // we could have taken a similar approach to step-37 `assemble_rhs()`, but this
911 // additional work allows us to change the problem declaration if we so
912 // choose.
913 //
914 // This function has two parts in the integration loop: applying the negative
915 // of matrix $A$ to $u_0$ by submitting the negative of the gradient, and adding
916 // the right-hand side contribution by submitting the value $f$. We must be sure
917 // to use `read_dof_values_plain()` for evaluating $u_0$ as `read_dof_vaues()`
918 // would set all Dirichlet values to zero.
919 //
920 // Finally, the system_rhs vector is of type LA::MPI::Vector, but the
921 // MatrixFree class only work for
922 // dealii::LinearAlgebra::distributed::Vector.  Therefore we must
923 // compute the right-hand side using MatrixFree funtionality and then
924 // use the functions in the `ChangeVectorType` namespace to copy it to
925 // the correct type.
926 template <int dim, int degree>
assemble_rhs()927 void LaplaceProblem<dim, degree>::assemble_rhs()
928 {
929   TimerOutput::Scope timing(computing_timer, "Assemble right-hand side");
930 
931   MatrixFreeActiveVector solution_copy;
932   MatrixFreeActiveVector right_hand_side_copy;
933   mf_system_matrix.initialize_dof_vector(solution_copy);
934   mf_system_matrix.initialize_dof_vector(right_hand_side_copy);
935 
936   solution_copy = 0.;
937   constraints.distribute(solution_copy);
938   solution_copy.update_ghost_values();
939   right_hand_side_copy = 0;
940   const Table<2, VectorizedArray<double>> &coefficient =
941     *(mf_system_matrix.get_coefficient());
942 
943   RightHandSide<dim> right_hand_side_function;
944 
945   FEEvaluation<dim, degree, degree + 1, 1, double> phi(
946     *mf_system_matrix.get_matrix_free());
947 
948   for (unsigned int cell = 0;
949        cell < mf_system_matrix.get_matrix_free()->n_macro_cells();
950        ++cell)
951     {
952       phi.reinit(cell);
953       phi.read_dof_values_plain(solution_copy);
954       phi.evaluate(EvaluationFlags::gradients);
955 
956       for (unsigned int q = 0; q < phi.n_q_points; ++q)
957         {
958           phi.submit_gradient(-1.0 *
959                                 (coefficient(cell, 0) * phi.get_gradient(q)),
960                               q);
961           phi.submit_value(
962             right_hand_side_function.value(phi.quadrature_point(q)), q);
963         }
964 
965       phi.integrate_scatter(EvaluationFlags::values |
966                               EvaluationFlags::gradients,
967                             right_hand_side_copy);
968     }
969 
970   right_hand_side_copy.compress(VectorOperation::add);
971 
972   ChangeVectorTypes::copy(right_hand_side, right_hand_side_copy);
973 }
974 
975 
976 
977 // @sect4{LaplaceProblem::solve()}
978 
979 // Here we set up the multigrid preconditioner, test the timing of a single
980 // V-cycle, and solve the linear system. Unsurprisingly, this is one of the
981 // places where the three methods differ the most.
982 template <int dim, int degree>
solve()983 void LaplaceProblem<dim, degree>::solve()
984 {
985   TimerOutput::Scope timing(computing_timer, "Solve");
986 
987   SolverControl solver_control(1000, 1.e-10 * right_hand_side.l2_norm());
988   solver_control.enable_history_data();
989 
990   solution = 0.;
991 
992   // The solver for the matrix-free GMG method is similar to step-37, apart
993   // from adding some interface matrices in complete analogy to step-16.
994   switch (settings.solver)
995     {
996       case Settings::gmg_mf:
997         {
998           computing_timer.enter_subsection("Solve: Preconditioner setup");
999 
1000           MGTransferMatrixFree<dim, float> mg_transfer(mg_constrained_dofs);
1001           mg_transfer.build(dof_handler);
1002 
1003           SolverControl coarse_solver_control(1000, 1e-12, false, false);
1004           SolverCG<MatrixFreeLevelVector> coarse_solver(coarse_solver_control);
1005           PreconditionIdentity            identity;
1006           MGCoarseGridIterativeSolver<MatrixFreeLevelVector,
1007                                       SolverCG<MatrixFreeLevelVector>,
1008                                       MatrixFreeLevelMatrix,
1009                                       PreconditionIdentity>
1010             coarse_grid_solver(coarse_solver, mf_mg_matrix[0], identity);
1011 
1012           using Smoother = dealii::PreconditionJacobi<MatrixFreeLevelMatrix>;
1013           MGSmootherPrecondition<MatrixFreeLevelMatrix,
1014                                  Smoother,
1015                                  MatrixFreeLevelVector>
1016             smoother;
1017           smoother.initialize(mf_mg_matrix,
1018                               typename Smoother::AdditionalData(
1019                                 settings.smoother_dampen));
1020           smoother.set_steps(settings.smoother_steps);
1021 
1022           mg::Matrix<MatrixFreeLevelVector> mg_m(mf_mg_matrix);
1023 
1024           MGLevelObject<
1025             MatrixFreeOperators::MGInterfaceOperator<MatrixFreeLevelMatrix>>
1026             mg_interface_matrices;
1027           mg_interface_matrices.resize(0, triangulation.n_global_levels() - 1);
1028           for (unsigned int level = 0; level < triangulation.n_global_levels();
1029                ++level)
1030             mg_interface_matrices[level].initialize(mf_mg_matrix[level]);
1031           mg::Matrix<MatrixFreeLevelVector> mg_interface(mg_interface_matrices);
1032 
1033           Multigrid<MatrixFreeLevelVector> mg(
1034             mg_m, coarse_grid_solver, mg_transfer, smoother, smoother);
1035           mg.set_edge_matrices(mg_interface, mg_interface);
1036 
1037           PreconditionMG<dim,
1038                          MatrixFreeLevelVector,
1039                          MGTransferMatrixFree<dim, float>>
1040             preconditioner(dof_handler, mg, mg_transfer);
1041 
1042           // Copy the solution vector and right-hand side from LA::MPI::Vector
1043           // to dealii::LinearAlgebra::distributed::Vector so that we can solve.
1044           MatrixFreeActiveVector solution_copy;
1045           MatrixFreeActiveVector right_hand_side_copy;
1046           mf_system_matrix.initialize_dof_vector(solution_copy);
1047           mf_system_matrix.initialize_dof_vector(right_hand_side_copy);
1048 
1049           ChangeVectorTypes::copy(solution_copy, solution);
1050           ChangeVectorTypes::copy(right_hand_side_copy, right_hand_side);
1051           computing_timer.leave_subsection("Solve: Preconditioner setup");
1052 
1053           // Timing for 1 V-cycle.
1054           {
1055             TimerOutput::Scope timing(computing_timer,
1056                                       "Solve: 1 multigrid V-cycle");
1057             preconditioner.vmult(solution_copy, right_hand_side_copy);
1058           }
1059           solution_copy = 0.;
1060 
1061           // Solve the linear system, update the ghost values of the solution,
1062           // copy back to LA::MPI::Vector and distribute constraints.
1063           {
1064             SolverCG<MatrixFreeActiveVector> solver(solver_control);
1065 
1066             TimerOutput::Scope timing(computing_timer, "Solve: CG");
1067             solver.solve(mf_system_matrix,
1068                          solution_copy,
1069                          right_hand_side_copy,
1070                          preconditioner);
1071           }
1072 
1073           solution_copy.update_ghost_values();
1074           ChangeVectorTypes::copy(solution, solution_copy);
1075           constraints.distribute(solution);
1076 
1077           break;
1078         }
1079 
1080         // Solver for the matrix-based GMG method, similar to step-16, only
1081         // using a Jacobi smoother instead of a SOR smoother (which is not
1082         // implemented in parallel).
1083       case Settings::gmg_mb:
1084         {
1085           computing_timer.enter_subsection("Solve: Preconditioner setup");
1086 
1087           MGTransferPrebuilt<VectorType> mg_transfer(mg_constrained_dofs);
1088           mg_transfer.build(dof_handler);
1089 
1090           SolverControl        coarse_solver_control(1000, 1e-12, false, false);
1091           SolverCG<VectorType> coarse_solver(coarse_solver_control);
1092           PreconditionIdentity identity;
1093           MGCoarseGridIterativeSolver<VectorType,
1094                                       SolverCG<VectorType>,
1095                                       MatrixType,
1096                                       PreconditionIdentity>
1097             coarse_grid_solver(coarse_solver, mg_matrix[0], identity);
1098 
1099           using Smoother = LA::MPI::PreconditionJacobi;
1100           MGSmootherPrecondition<MatrixType, Smoother, VectorType> smoother;
1101 
1102 #ifdef USE_PETSC_LA
1103           smoother.initialize(mg_matrix);
1104           Assert(
1105             settings.smoother_dampen == 1.0,
1106             ExcNotImplemented(
1107               "PETSc's PreconditionJacobi has no support for a damping parameter."));
1108 #else
1109           smoother.initialize(mg_matrix, settings.smoother_dampen);
1110 #endif
1111 
1112           smoother.set_steps(settings.smoother_steps);
1113 
1114           mg::Matrix<VectorType> mg_m(mg_matrix);
1115           mg::Matrix<VectorType> mg_in(mg_interface_in);
1116           mg::Matrix<VectorType> mg_out(mg_interface_in);
1117 
1118           Multigrid<VectorType> mg(
1119             mg_m, coarse_grid_solver, mg_transfer, smoother, smoother);
1120           mg.set_edge_matrices(mg_out, mg_in);
1121 
1122 
1123           PreconditionMG<dim, VectorType, MGTransferPrebuilt<VectorType>>
1124             preconditioner(dof_handler, mg, mg_transfer);
1125 
1126           computing_timer.leave_subsection("Solve: Preconditioner setup");
1127 
1128           // Timing for 1 V-cycle.
1129           {
1130             TimerOutput::Scope timing(computing_timer,
1131                                       "Solve: 1 multigrid V-cycle");
1132             preconditioner.vmult(solution, right_hand_side);
1133           }
1134           solution = 0.;
1135 
1136           // Solve the linear system and distribute constraints.
1137           {
1138             SolverCG<VectorType> solver(solver_control);
1139 
1140             TimerOutput::Scope timing(computing_timer, "Solve: CG");
1141             solver.solve(system_matrix,
1142                          solution,
1143                          right_hand_side,
1144                          preconditioner);
1145           }
1146 
1147           constraints.distribute(solution);
1148 
1149           break;
1150         }
1151 
1152       // Solver for the AMG method, similar to step-40.
1153       case Settings::amg:
1154         {
1155           computing_timer.enter_subsection("Solve: Preconditioner setup");
1156 
1157           PreconditionAMG                 preconditioner;
1158           PreconditionAMG::AdditionalData Amg_data;
1159 
1160 #ifdef USE_PETSC_LA
1161           Amg_data.symmetric_operator = true;
1162 #else
1163           Amg_data.elliptic              = true;
1164           Amg_data.smoother_type         = "Jacobi";
1165           Amg_data.higher_order_elements = true;
1166           Amg_data.smoother_sweeps       = settings.smoother_steps;
1167           Amg_data.aggregation_threshold = 0.02;
1168 #endif
1169 
1170           Amg_data.output_details = false;
1171 
1172           preconditioner.initialize(system_matrix, Amg_data);
1173           computing_timer.leave_subsection("Solve: Preconditioner setup");
1174 
1175           // Timing for 1 V-cycle.
1176           {
1177             TimerOutput::Scope timing(computing_timer,
1178                                       "Solve: 1 multigrid V-cycle");
1179             preconditioner.vmult(solution, right_hand_side);
1180           }
1181           solution = 0.;
1182 
1183           // Solve the linear system and distribute constraints.
1184           {
1185             SolverCG<VectorType> solver(solver_control);
1186 
1187             TimerOutput::Scope timing(computing_timer, "Solve: CG");
1188             solver.solve(system_matrix,
1189                          solution,
1190                          right_hand_side,
1191                          preconditioner);
1192           }
1193           constraints.distribute(solution);
1194 
1195           break;
1196         }
1197 
1198       default:
1199         Assert(false, ExcInternalError());
1200     }
1201 
1202   pcout << "   Number of CG iterations:      " << solver_control.last_step()
1203         << std::endl;
1204 }
1205 
1206 
1207 // @sect3{The error estimator}
1208 
1209 // We use the FEInterfaceValues class to assemble an error estimator to decide
1210 // which cells to refine. See the exact definition of the cell and face
1211 // integrals in the introduction. To use the method, we define Scratch and
1212 // Copy objects for the MeshWorker::mesh_loop() with much of the following code
1213 // being in essence as was set up in step-12 already (or at least similar in
1214 // spirit).
1215 template <int dim>
1216 struct ScratchData
1217 {
ScratchDataScratchData1218   ScratchData(const Mapping<dim> &      mapping,
1219               const FiniteElement<dim> &fe,
1220               const unsigned int        quadrature_degree,
1221               const UpdateFlags         update_flags,
1222               const UpdateFlags         interface_update_flags)
1223     : fe_values(mapping, fe, QGauss<dim>(quadrature_degree), update_flags)
1224     , fe_interface_values(mapping,
1225                           fe,
1226                           QGauss<dim - 1>(quadrature_degree),
1227                           interface_update_flags)
1228   {}
1229 
1230 
ScratchDataScratchData1231   ScratchData(const ScratchData<dim> &scratch_data)
1232     : fe_values(scratch_data.fe_values.get_mapping(),
1233                 scratch_data.fe_values.get_fe(),
1234                 scratch_data.fe_values.get_quadrature(),
1235                 scratch_data.fe_values.get_update_flags())
1236     , fe_interface_values(scratch_data.fe_values.get_mapping(),
1237                           scratch_data.fe_values.get_fe(),
1238                           scratch_data.fe_interface_values.get_quadrature(),
1239                           scratch_data.fe_interface_values.get_update_flags())
1240   {}
1241 
1242   FEValues<dim>          fe_values;
1243   FEInterfaceValues<dim> fe_interface_values;
1244 };
1245 
1246 
1247 
1248 struct CopyData
1249 {
CopyDataCopyData1250   CopyData()
1251     : cell_index(numbers::invalid_unsigned_int)
1252     , value(0.)
1253   {}
1254 
1255   CopyData(const CopyData &) = default;
1256 
1257   struct FaceData
1258   {
1259     unsigned int cell_indices[2];
1260     double       values[2];
1261   };
1262 
1263   unsigned int          cell_index;
1264   double                value;
1265   std::vector<FaceData> face_data;
1266 };
1267 
1268 
1269 template <int dim, int degree>
estimate()1270 void LaplaceProblem<dim, degree>::estimate()
1271 {
1272   TimerOutput::Scope timing(computing_timer, "Estimate");
1273 
1274   VectorType temp_solution;
1275   temp_solution.reinit(locally_owned_dofs,
1276                        locally_relevant_dofs,
1277                        mpi_communicator);
1278   temp_solution = solution;
1279 
1280   const Coefficient<dim> coefficient;
1281 
1282   estimated_error_square_per_cell.reinit(triangulation.n_active_cells());
1283 
1284   using Iterator = typename DoFHandler<dim>::active_cell_iterator;
1285 
1286   // Assembler for cell residual $h^2 \| f + \epsilon \triangle u \|_K^2$
1287   auto cell_worker = [&](const Iterator &  cell,
1288                          ScratchData<dim> &scratch_data,
1289                          CopyData &        copy_data) {
1290     FEValues<dim> &fe_values = scratch_data.fe_values;
1291     fe_values.reinit(cell);
1292 
1293     RightHandSide<dim> rhs;
1294     const double       rhs_value = rhs.value(cell->center());
1295 
1296     const double nu = coefficient.value(cell->center());
1297 
1298     std::vector<Tensor<2, dim>> hessians(fe_values.n_quadrature_points);
1299     fe_values.get_function_hessians(temp_solution, hessians);
1300 
1301     copy_data.cell_index = cell->active_cell_index();
1302 
1303     double residual_norm_square = 0.;
1304     for (unsigned k = 0; k < fe_values.n_quadrature_points; ++k)
1305       {
1306         const double residual = (rhs_value + nu * trace(hessians[k]));
1307         residual_norm_square += residual * residual * fe_values.JxW(k);
1308       }
1309 
1310     copy_data.value =
1311       cell->diameter() * cell->diameter() * residual_norm_square;
1312   };
1313 
1314   // Assembler for face term $\sum_F h_F \| \jump{\epsilon \nabla u \cdot n}
1315   // \|_F^2$
1316   auto face_worker = [&](const Iterator &    cell,
1317                          const unsigned int &f,
1318                          const unsigned int &sf,
1319                          const Iterator &    ncell,
1320                          const unsigned int &nf,
1321                          const unsigned int &nsf,
1322                          ScratchData<dim> &  scratch_data,
1323                          CopyData &          copy_data) {
1324     FEInterfaceValues<dim> &fe_interface_values =
1325       scratch_data.fe_interface_values;
1326     fe_interface_values.reinit(cell, f, sf, ncell, nf, nsf);
1327 
1328     copy_data.face_data.emplace_back();
1329     CopyData::FaceData &copy_data_face = copy_data.face_data.back();
1330 
1331     copy_data_face.cell_indices[0] = cell->active_cell_index();
1332     copy_data_face.cell_indices[1] = ncell->active_cell_index();
1333 
1334     const double coeff1 = coefficient.value(cell->center());
1335     const double coeff2 = coefficient.value(ncell->center());
1336 
1337     std::vector<Tensor<1, dim>> grad_u[2];
1338 
1339     for (unsigned int i = 0; i < 2; ++i)
1340       {
1341         grad_u[i].resize(fe_interface_values.n_quadrature_points);
1342         fe_interface_values.get_fe_face_values(i).get_function_gradients(
1343           temp_solution, grad_u[i]);
1344       }
1345 
1346     double jump_norm_square = 0.;
1347 
1348     for (unsigned int qpoint = 0;
1349          qpoint < fe_interface_values.n_quadrature_points;
1350          ++qpoint)
1351       {
1352         const double jump =
1353           coeff1 * grad_u[0][qpoint] * fe_interface_values.normal(qpoint) -
1354           coeff2 * grad_u[1][qpoint] * fe_interface_values.normal(qpoint);
1355 
1356         jump_norm_square += jump * jump * fe_interface_values.JxW(qpoint);
1357       }
1358 
1359     const double h           = cell->face(f)->measure();
1360     copy_data_face.values[0] = 0.5 * h * jump_norm_square;
1361     copy_data_face.values[1] = copy_data_face.values[0];
1362   };
1363 
1364   auto copier = [&](const CopyData &copy_data) {
1365     if (copy_data.cell_index != numbers::invalid_unsigned_int)
1366       estimated_error_square_per_cell[copy_data.cell_index] += copy_data.value;
1367 
1368     for (auto &cdf : copy_data.face_data)
1369       for (unsigned int j = 0; j < 2; ++j)
1370         estimated_error_square_per_cell[cdf.cell_indices[j]] += cdf.values[j];
1371   };
1372 
1373   const unsigned int n_gauss_points = degree + 1;
1374   ScratchData<dim>   scratch_data(mapping,
1375                                 fe,
1376                                 n_gauss_points,
1377                                 update_hessians | update_quadrature_points |
1378                                   update_JxW_values,
1379                                 update_values | update_gradients |
1380                                   update_JxW_values | update_normal_vectors);
1381   CopyData           copy_data;
1382 
1383   // We need to assemble each interior face once but we need to make sure that
1384   // both processes assemble the face term between a locally owned and a ghost
1385   // cell. This is achieved by setting the
1386   // MeshWorker::assemble_ghost_faces_both flag. We need to do this, because
1387   // we do not communicate the error estimator contributions here.
1388   MeshWorker::mesh_loop(dof_handler.begin_active(),
1389                         dof_handler.end(),
1390                         cell_worker,
1391                         copier,
1392                         scratch_data,
1393                         copy_data,
1394                         MeshWorker::assemble_own_cells |
1395                           MeshWorker::assemble_ghost_faces_both |
1396                           MeshWorker::assemble_own_interior_faces_once,
1397                         /*boundary_worker=*/nullptr,
1398                         face_worker);
1399 
1400   const double global_error_estimate =
1401     std::sqrt(Utilities::MPI::sum(estimated_error_square_per_cell.l1_norm(),
1402                                   mpi_communicator));
1403   pcout << "   Global error estimate:        " << global_error_estimate
1404         << std::endl;
1405 }
1406 
1407 
1408 // @sect4{LaplaceProblem::refine_grid()}
1409 
1410 // We use the cell-wise estimator stored in the vector @p estimate_vector and
1411 // refine a fixed number of cells (chosen here to roughly double the number of
1412 // DoFs in each step).
1413 template <int dim, int degree>
refine_grid()1414 void LaplaceProblem<dim, degree>::refine_grid()
1415 {
1416   TimerOutput::Scope timing(computing_timer, "Refine grid");
1417 
1418   const double refinement_fraction = 1. / (std::pow(2.0, dim) - 1.);
1419   parallel::distributed::GridRefinement::refine_and_coarsen_fixed_number(
1420     triangulation, estimated_error_square_per_cell, refinement_fraction, 0.0);
1421 
1422   triangulation.execute_coarsening_and_refinement();
1423 }
1424 
1425 
1426 // @sect4{LaplaceProblem::output_results()}
1427 
1428 // The output_results() function is similar to the ones found in many of the
1429 // tutorials (see step-40 for example).
1430 template <int dim, int degree>
output_results(const unsigned int cycle)1431 void LaplaceProblem<dim, degree>::output_results(const unsigned int cycle)
1432 {
1433   TimerOutput::Scope timing(computing_timer, "Output results");
1434 
1435   VectorType temp_solution;
1436   temp_solution.reinit(locally_owned_dofs,
1437                        locally_relevant_dofs,
1438                        mpi_communicator);
1439   temp_solution = solution;
1440 
1441   DataOut<dim> data_out;
1442   data_out.attach_dof_handler(dof_handler);
1443   data_out.add_data_vector(temp_solution, "solution");
1444 
1445   Vector<float> subdomain(triangulation.n_active_cells());
1446   for (unsigned int i = 0; i < subdomain.size(); ++i)
1447     subdomain(i) = triangulation.locally_owned_subdomain();
1448   data_out.add_data_vector(subdomain, "subdomain");
1449 
1450   Vector<float> level(triangulation.n_active_cells());
1451   for (const auto &cell : triangulation.active_cell_iterators())
1452     level(cell->active_cell_index()) = cell->level();
1453   data_out.add_data_vector(level, "level");
1454 
1455   if (estimated_error_square_per_cell.size() > 0)
1456     data_out.add_data_vector(estimated_error_square_per_cell,
1457                              "estimated_error_square_per_cell");
1458 
1459   data_out.build_patches();
1460 
1461   const std::string pvtu_filename = data_out.write_vtu_with_pvtu_record(
1462     "", "solution", cycle, mpi_communicator, 2 /*n_digits*/, 1 /*n_groups*/);
1463 
1464   pcout << "   Wrote " << pvtu_filename << std::endl;
1465 }
1466 
1467 
1468 // @sect4{LaplaceProblem::run()}
1469 
1470 // As in most tutorials, this function calls the various functions defined
1471 // above to setup, assemble, solve, and output the results.
1472 template <int dim, int degree>
run()1473 void LaplaceProblem<dim, degree>::run()
1474 {
1475   for (unsigned int cycle = 0; cycle < settings.n_steps; ++cycle)
1476     {
1477       pcout << "Cycle " << cycle << ':' << std::endl;
1478       if (cycle > 0)
1479         refine_grid();
1480 
1481       pcout << "   Number of active cells:       "
1482             << triangulation.n_global_active_cells();
1483 
1484       // We only output level cell data for the GMG methods (same with DoF
1485       // data below). Note that the partition efficiency is irrelevant for AMG
1486       // since the level hierarchy is not distributed or used during the
1487       // computation.
1488       if (settings.solver == Settings::gmg_mf ||
1489           settings.solver == Settings::gmg_mb)
1490         pcout << " (" << triangulation.n_global_levels() << " global levels)"
1491               << std::endl
1492               << "   Partition efficiency:         "
1493               << 1.0 / MGTools::workload_imbalance(triangulation);
1494       pcout << std::endl;
1495 
1496       setup_system();
1497 
1498       // Only set up the multilevel hierarchy for GMG.
1499       if (settings.solver == Settings::gmg_mf ||
1500           settings.solver == Settings::gmg_mb)
1501         setup_multigrid();
1502 
1503       pcout << "   Number of degrees of freedom: " << dof_handler.n_dofs();
1504       if (settings.solver == Settings::gmg_mf ||
1505           settings.solver == Settings::gmg_mb)
1506         {
1507           pcout << " (by level: ";
1508           for (unsigned int level = 0; level < triangulation.n_global_levels();
1509                ++level)
1510             pcout << dof_handler.n_dofs(level)
1511                   << (level == triangulation.n_global_levels() - 1 ? ")" :
1512                                                                      ", ");
1513         }
1514       pcout << std::endl;
1515 
1516       // For the matrix-free method, we only assemble the right-hand side.
1517       // For both matrix-based methods, we assemble both active matrix and
1518       // right-hand side, and only assemble the multigrid matrices for
1519       // matrix-based GMG.
1520       if (settings.solver == Settings::gmg_mf)
1521         assemble_rhs();
1522       else /*gmg_mb or amg*/
1523         {
1524           assemble_system();
1525           if (settings.solver == Settings::gmg_mb)
1526             assemble_multigrid();
1527         }
1528 
1529       solve();
1530       estimate();
1531 
1532       if (settings.output)
1533         output_results(cycle);
1534 
1535       computing_timer.print_summary();
1536       computing_timer.reset();
1537     }
1538 }
1539 
1540 
1541 // @sect3{The main() function}
1542 
1543 // This is a similar main function to step-40, with the exception that
1544 // we require the user to pass a .prm file as a sole command line
1545 // argument (see step-29 and the documentation of the ParameterHandler
1546 // class for a complete discussion of parameter files).
main(int argc,char * argv[])1547 int main(int argc, char *argv[])
1548 {
1549   using namespace dealii;
1550   Utilities::MPI::MPI_InitFinalize mpi_initialization(argc, argv, 1);
1551 
1552   Settings settings;
1553   if (!settings.try_parse((argc > 1) ? (argv[1]) : ""))
1554     return 0;
1555 
1556   try
1557     {
1558       constexpr unsigned int fe_degree = 2;
1559 
1560       switch (settings.dimension)
1561         {
1562           case 2:
1563             {
1564               LaplaceProblem<2, fe_degree> test(settings);
1565               test.run();
1566 
1567               break;
1568             }
1569 
1570           case 3:
1571             {
1572               LaplaceProblem<3, fe_degree> test(settings);
1573               test.run();
1574 
1575               break;
1576             }
1577 
1578           default:
1579             Assert(false, ExcMessage("This program only works in 2d and 3d."));
1580         }
1581     }
1582   catch (std::exception &exc)
1583     {
1584       std::cerr << std::endl
1585                 << std::endl
1586                 << "----------------------------------------------------"
1587                 << std::endl;
1588       std::cerr << "Exception on processing: " << std::endl
1589                 << exc.what() << std::endl
1590                 << "Aborting!" << std::endl
1591                 << "----------------------------------------------------"
1592                 << std::endl;
1593       MPI_Abort(MPI_COMM_WORLD, 1);
1594       return 1;
1595     }
1596   catch (...)
1597     {
1598       std::cerr << std::endl
1599                 << std::endl
1600                 << "----------------------------------------------------"
1601                 << std::endl;
1602       std::cerr << "Unknown exception!" << std::endl
1603                 << "Aborting!" << std::endl
1604                 << "----------------------------------------------------"
1605                 << std::endl;
1606       MPI_Abort(MPI_COMM_WORLD, 2);
1607       return 1;
1608     }
1609 
1610   return 0;
1611 }
1612