1 /* ---------------------------------------------------------------------
2  *
3  * Copyright (C) 2008 - 2020 by the deal.II authors
4  *
5  * This file is part of the deal.II library.
6  *
7  * The deal.II library is free software; you can use it, redistribute
8  * it, and/or modify it under the terms of the GNU Lesser General
9  * Public License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  * The full text of the license can be found in the file LICENSE.md at
12  * the top level directory of deal.II.
13  *
14  * ---------------------------------------------------------------------
15 
16  *
17  * Authors: Martin Kronbichler, Uppsala University,
18  *          Wolfgang Bangerth, Texas A&M University,
19  *          Timo Heister, University of Goettingen, 2008-2011
20  */
21 
22 
23 // @sect3{Include files}
24 
25 // The first task as usual is to include the functionality of these well-known
26 // deal.II library files and some C++ header files.
27 #include <deal.II/base/quadrature_lib.h>
28 #include <deal.II/base/logstream.h>
29 #include <deal.II/base/function.h>
30 #include <deal.II/base/utilities.h>
31 #include <deal.II/base/conditional_ostream.h>
32 #include <deal.II/base/work_stream.h>
33 #include <deal.II/base/timer.h>
34 #include <deal.II/base/parameter_handler.h>
35 
36 #include <deal.II/lac/full_matrix.h>
37 #include <deal.II/lac/solver_bicgstab.h>
38 #include <deal.II/lac/solver_cg.h>
39 #include <deal.II/lac/solver_gmres.h>
40 #include <deal.II/lac/affine_constraints.h>
41 #include <deal.II/lac/block_sparsity_pattern.h>
42 #include <deal.II/lac/trilinos_parallel_block_vector.h>
43 #include <deal.II/lac/trilinos_sparse_matrix.h>
44 #include <deal.II/lac/trilinos_block_sparse_matrix.h>
45 #include <deal.II/lac/trilinos_precondition.h>
46 #include <deal.II/lac/trilinos_solver.h>
47 
48 #include <deal.II/grid/tria.h>
49 #include <deal.II/grid/grid_generator.h>
50 #include <deal.II/grid/tria_accessor.h>
51 #include <deal.II/grid/tria_iterator.h>
52 #include <deal.II/grid/filtered_iterator.h>
53 #include <deal.II/grid/manifold_lib.h>
54 #include <deal.II/grid/grid_tools.h>
55 #include <deal.II/grid/grid_refinement.h>
56 
57 #include <deal.II/dofs/dof_handler.h>
58 #include <deal.II/dofs/dof_renumbering.h>
59 #include <deal.II/dofs/dof_accessor.h>
60 #include <deal.II/dofs/dof_tools.h>
61 
62 #include <deal.II/fe/fe_q.h>
63 #include <deal.II/fe/fe_dgq.h>
64 #include <deal.II/fe/fe_dgp.h>
65 #include <deal.II/fe/fe_system.h>
66 #include <deal.II/fe/fe_values.h>
67 #include <deal.II/fe/mapping_q.h>
68 
69 #include <deal.II/numerics/vector_tools.h>
70 #include <deal.II/numerics/matrix_tools.h>
71 #include <deal.II/numerics/data_out.h>
72 #include <deal.II/numerics/error_estimator.h>
73 #include <deal.II/numerics/solution_transfer.h>
74 
75 #include <fstream>
76 #include <iostream>
77 #include <limits>
78 #include <locale>
79 #include <string>
80 
81 // This is the only include file that is new: It introduces the
82 // parallel::distributed::SolutionTransfer equivalent of the
83 // dealii::SolutionTransfer class to take a solution from on mesh to the next
84 // one upon mesh refinement, but in the case of parallel distributed
85 // triangulations:
86 #include <deal.II/distributed/solution_transfer.h>
87 
88 // The following classes are used in parallel distributed computations and
89 // have all already been introduced in step-40:
90 #include <deal.II/base/index_set.h>
91 #include <deal.II/distributed/tria.h>
92 #include <deal.II/distributed/grid_refinement.h>
93 
94 
95 // The next step is like in all previous tutorial programs: We put everything
96 // into a namespace of its own and then import the deal.II classes and
97 // functions into it:
98 namespace Step32
99 {
100   using namespace dealii;
101 
102   // @sect3{Equation data}
103 
104   // In the following namespace, we define the various pieces of equation data
105   // that describe the problem. This corresponds to the various aspects of
106   // making the problem at least slightly realistic and that were exhaustively
107   // discussed in the description of the testcase in the introduction.
108   //
109   // We start with a few coefficients that have constant values (the comment
110   // after the value indicates its physical units):
111   namespace EquationData
112   {
113     constexpr double eta                   = 1e21;    /* Pa s       */
114     constexpr double kappa                 = 1e-6;    /* m^2 / s    */
115     constexpr double reference_density     = 3300;    /* kg / m^3   */
116     constexpr double reference_temperature = 293;     /* K          */
117     constexpr double expansion_coefficient = 2e-5;    /* 1/K        */
118     constexpr double specific_heat         = 1250;    /* J / K / kg */
119     constexpr double radiogenic_heating    = 7.4e-12; /* W / kg     */
120 
121 
122     constexpr double R0 = 6371000. - 2890000.; /* m          */
123     constexpr double R1 = 6371000. - 35000.;   /* m          */
124 
125     constexpr double T0 = 4000 + 273; /* K          */
126     constexpr double T1 = 700 + 273;  /* K          */
127 
128 
129     // The next set of definitions are for functions that encode the density
130     // as a function of temperature, the gravity vector, and the initial
131     // values for the temperature. Again, all of these (along with the values
132     // they compute) are discussed in the introduction:
density(const double temperature)133     double density(const double temperature)
134     {
135       return (
136         reference_density *
137         (1 - expansion_coefficient * (temperature - reference_temperature)));
138     }
139 
140 
141     template <int dim>
gravity_vector(const Point<dim> & p)142     Tensor<1, dim> gravity_vector(const Point<dim> &p)
143     {
144       const double r = p.norm();
145       return -(1.245e-6 * r + 7.714e13 / r / r) * p / r;
146     }
147 
148 
149 
150     template <int dim>
151     class TemperatureInitialValues : public Function<dim>
152     {
153     public:
TemperatureInitialValues()154       TemperatureInitialValues()
155         : Function<dim>(1)
156       {}
157 
158       virtual double value(const Point<dim> & p,
159                            const unsigned int component = 0) const override;
160 
161       virtual void vector_value(const Point<dim> &p,
162                                 Vector<double> &  value) const override;
163     };
164 
165 
166 
167     template <int dim>
value(const Point<dim> & p,const unsigned int) const168     double TemperatureInitialValues<dim>::value(const Point<dim> &p,
169                                                 const unsigned int) const
170     {
171       const double r = p.norm();
172       const double h = R1 - R0;
173 
174       const double s = (r - R0) / h;
175       const double q =
176         (dim == 3) ? std::max(0.0, cos(numbers::PI * abs(p(2) / R1))) : 1.0;
177       const double phi = std::atan2(p(0), p(1));
178       const double tau = s + 0.2 * s * (1 - s) * std::sin(6 * phi) * q;
179 
180       return T0 * (1.0 - tau) + T1 * tau;
181     }
182 
183 
184     template <int dim>
185     void
vector_value(const Point<dim> & p,Vector<double> & values) const186     TemperatureInitialValues<dim>::vector_value(const Point<dim> &p,
187                                                 Vector<double> &  values) const
188     {
189       for (unsigned int c = 0; c < this->n_components; ++c)
190         values(c) = TemperatureInitialValues<dim>::value(p, c);
191     }
192 
193 
194     // As mentioned in the introduction we need to rescale the pressure to
195     // avoid the relative ill-conditioning of the momentum and mass
196     // conservation equations. The scaling factor is $\frac{\eta}{L}$ where
197     // $L$ was a typical length scale. By experimenting it turns out that a
198     // good length scale is the diameter of plumes, which is around 10 km:
199     constexpr double pressure_scaling = eta / 10000;
200 
201     // The final number in this namespace is a constant that denotes the
202     // number of seconds per (average, tropical) year. We use this only when
203     // generating screen output: internally, all computations of this program
204     // happen in SI units (kilogram, meter, seconds) but writing geological
205     // times in seconds yields numbers that one can't relate to reality, and
206     // so we convert to years using the factor defined here:
207     const double year_in_seconds = 60 * 60 * 24 * 365.2425;
208 
209   } // namespace EquationData
210 
211 
212 
213   // @sect3{Preconditioning the Stokes system}
214 
215   // This namespace implements the preconditioner. As discussed in the
216   // introduction, this preconditioner differs in a number of key portions
217   // from the one used in step-31. Specifically, it is a right preconditioner,
218   // implementing the matrix
219   // @f{align*}
220   //   \left(\begin{array}{cc}A^{-1} & B^T
221   //                        \\0 & S^{-1}
222   // \end{array}\right)
223   // @f}
224   // where the two inverse matrix operations
225   // are approximated by linear solvers or, if the right flag is given to the
226   // constructor of this class, by a single AMG V-cycle for the velocity
227   // block. The three code blocks of the <code>vmult</code> function implement
228   // the multiplications with the three blocks of this preconditioner matrix
229   // and should be self explanatory if you have read through step-31 or the
230   // discussion of composing solvers in step-20.
231   namespace LinearSolvers
232   {
233     template <class PreconditionerTypeA, class PreconditionerTypeMp>
234     class BlockSchurPreconditioner : public Subscriptor
235     {
236     public:
BlockSchurPreconditioner(const TrilinosWrappers::BlockSparseMatrix & S,const TrilinosWrappers::BlockSparseMatrix & Spre,const PreconditionerTypeMp & Mppreconditioner,const PreconditionerTypeA & Apreconditioner,const bool do_solve_A)237       BlockSchurPreconditioner(const TrilinosWrappers::BlockSparseMatrix &S,
238                                const TrilinosWrappers::BlockSparseMatrix &Spre,
239                                const PreconditionerTypeMp &Mppreconditioner,
240                                const PreconditionerTypeA & Apreconditioner,
241                                const bool                  do_solve_A)
242         : stokes_matrix(&S)
243         , stokes_preconditioner_matrix(&Spre)
244         , mp_preconditioner(Mppreconditioner)
245         , a_preconditioner(Apreconditioner)
246         , do_solve_A(do_solve_A)
247       {}
248 
vmult(TrilinosWrappers::MPI::BlockVector & dst,const TrilinosWrappers::MPI::BlockVector & src) const249       void vmult(TrilinosWrappers::MPI::BlockVector &      dst,
250                  const TrilinosWrappers::MPI::BlockVector &src) const
251       {
252         TrilinosWrappers::MPI::Vector utmp(src.block(0));
253 
254         {
255           SolverControl solver_control(5000, 1e-6 * src.block(1).l2_norm());
256 
257           SolverCG<TrilinosWrappers::MPI::Vector> solver(solver_control);
258 
259           solver.solve(stokes_preconditioner_matrix->block(1, 1),
260                        dst.block(1),
261                        src.block(1),
262                        mp_preconditioner);
263 
264           dst.block(1) *= -1.0;
265         }
266 
267         {
268           stokes_matrix->block(0, 1).vmult(utmp, dst.block(1));
269           utmp *= -1.0;
270           utmp.add(src.block(0));
271         }
272 
273         if (do_solve_A == true)
274           {
275             SolverControl solver_control(5000, utmp.l2_norm() * 1e-2);
276             TrilinosWrappers::SolverCG solver(solver_control);
277             solver.solve(stokes_matrix->block(0, 0),
278                          dst.block(0),
279                          utmp,
280                          a_preconditioner);
281           }
282         else
283           a_preconditioner.vmult(dst.block(0), utmp);
284       }
285 
286     private:
287       const SmartPointer<const TrilinosWrappers::BlockSparseMatrix>
288         stokes_matrix;
289       const SmartPointer<const TrilinosWrappers::BlockSparseMatrix>
290                                   stokes_preconditioner_matrix;
291       const PreconditionerTypeMp &mp_preconditioner;
292       const PreconditionerTypeA & a_preconditioner;
293       const bool                  do_solve_A;
294     };
295   } // namespace LinearSolvers
296 
297 
298 
299   // @sect3{Definition of assembly data structures}
300   //
301   // As described in the introduction, we will use the WorkStream mechanism
302   // discussed in the @ref threads module to parallelize operations among the
303   // processors of a single machine. The WorkStream class requires that data
304   // is passed around in two kinds of data structures, one for scratch data
305   // and one to pass data from the assembly function to the function that
306   // copies local contributions into global objects.
307   //
308   // The following namespace (and the two sub-namespaces) contains a
309   // collection of data structures that serve this purpose, one pair for each
310   // of the four operations discussed in the introduction that we will want to
311   // parallelize. Each assembly routine gets two sets of data: a Scratch array
312   // that collects all the classes and arrays that are used for the
313   // calculation of the cell contribution, and a CopyData array that keeps
314   // local matrices and vectors which will be written into the global
315   // matrix. Whereas CopyData is a container for the final data that is
316   // written into the global matrices and vector (and, thus, absolutely
317   // necessary), the Scratch arrays are merely there for performance reasons
318   // &mdash; it would be much more expensive to set up a FEValues object on
319   // each cell, than creating it only once and updating some derivative data.
320   //
321   // Step-31 had four assembly routines: One for the preconditioner matrix of
322   // the Stokes system, one for the Stokes matrix and right hand side, one for
323   // the temperature matrices and one for the right hand side of the
324   // temperature equation. We here organize the scratch arrays and CopyData
325   // objects for each of those four assembly components using a
326   // <code>struct</code> environment (since we consider these as temporary
327   // objects we pass around, rather than classes that implement functionality
328   // of their own, though this is a more subjective point of view to
329   // distinguish between <code>struct</code>s and <code>class</code>es).
330   //
331   // Regarding the Scratch objects, each struct is equipped with a constructor
332   // that creates an @ref FEValues object using the @ref FiniteElement,
333   // Quadrature, @ref Mapping (which describes the interpolation of curved
334   // boundaries), and @ref UpdateFlags instances. Moreover, we manually
335   // implement a copy constructor (since the FEValues class is not copyable by
336   // itself), and provide some additional vector fields that are used to hold
337   // intermediate data during the computation of local contributions.
338   //
339   // Let us start with the scratch arrays and, specifically, the one used for
340   // assembly of the Stokes preconditioner:
341   namespace Assembly
342   {
343     namespace Scratch
344     {
345       template <int dim>
346       struct StokesPreconditioner
347       {
348         StokesPreconditioner(const FiniteElement<dim> &stokes_fe,
349                              const Quadrature<dim> &   stokes_quadrature,
350                              const Mapping<dim> &      mapping,
351                              const UpdateFlags         update_flags);
352 
353         StokesPreconditioner(const StokesPreconditioner &data);
354 
355 
356         FEValues<dim> stokes_fe_values;
357 
358         std::vector<Tensor<2, dim>> grad_phi_u;
359         std::vector<double>         phi_p;
360       };
361 
362       template <int dim>
StokesPreconditioner(const FiniteElement<dim> & stokes_fe,const Quadrature<dim> & stokes_quadrature,const Mapping<dim> & mapping,const UpdateFlags update_flags)363       StokesPreconditioner<dim>::StokesPreconditioner(
364         const FiniteElement<dim> &stokes_fe,
365         const Quadrature<dim> &   stokes_quadrature,
366         const Mapping<dim> &      mapping,
367         const UpdateFlags         update_flags)
368         : stokes_fe_values(mapping, stokes_fe, stokes_quadrature, update_flags)
369         , grad_phi_u(stokes_fe.n_dofs_per_cell())
370         , phi_p(stokes_fe.n_dofs_per_cell())
371       {}
372 
373 
374 
375       template <int dim>
StokesPreconditioner(const StokesPreconditioner & scratch)376       StokesPreconditioner<dim>::StokesPreconditioner(
377         const StokesPreconditioner &scratch)
378         : stokes_fe_values(scratch.stokes_fe_values.get_mapping(),
379                            scratch.stokes_fe_values.get_fe(),
380                            scratch.stokes_fe_values.get_quadrature(),
381                            scratch.stokes_fe_values.get_update_flags())
382         , grad_phi_u(scratch.grad_phi_u)
383         , phi_p(scratch.phi_p)
384       {}
385 
386 
387 
388       // The next one is the scratch object used for the assembly of the full
389       // Stokes system. Observe that we derive the StokesSystem scratch class
390       // from the StokesPreconditioner class above. We do this because all the
391       // objects that are necessary for the assembly of the preconditioner are
392       // also needed for the actual matrix system and right hand side, plus
393       // some extra data. This makes the program more compact. Note also that
394       // the assembly of the Stokes system and the temperature right hand side
395       // further down requires data from temperature and velocity,
396       // respectively, so we actually need two FEValues objects for those two
397       // cases.
398       template <int dim>
399       struct StokesSystem : public StokesPreconditioner<dim>
400       {
401         StokesSystem(const FiniteElement<dim> &stokes_fe,
402                      const Mapping<dim> &      mapping,
403                      const Quadrature<dim> &   stokes_quadrature,
404                      const UpdateFlags         stokes_update_flags,
405                      const FiniteElement<dim> &temperature_fe,
406                      const UpdateFlags         temperature_update_flags);
407 
408         StokesSystem(const StokesSystem<dim> &data);
409 
410 
411         FEValues<dim> temperature_fe_values;
412 
413         std::vector<Tensor<1, dim>>          phi_u;
414         std::vector<SymmetricTensor<2, dim>> grads_phi_u;
415         std::vector<double>                  div_phi_u;
416 
417         std::vector<double> old_temperature_values;
418       };
419 
420 
421       template <int dim>
StokesSystem(const FiniteElement<dim> & stokes_fe,const Mapping<dim> & mapping,const Quadrature<dim> & stokes_quadrature,const UpdateFlags stokes_update_flags,const FiniteElement<dim> & temperature_fe,const UpdateFlags temperature_update_flags)422       StokesSystem<dim>::StokesSystem(
423         const FiniteElement<dim> &stokes_fe,
424         const Mapping<dim> &      mapping,
425         const Quadrature<dim> &   stokes_quadrature,
426         const UpdateFlags         stokes_update_flags,
427         const FiniteElement<dim> &temperature_fe,
428         const UpdateFlags         temperature_update_flags)
429         : StokesPreconditioner<dim>(stokes_fe,
430                                     stokes_quadrature,
431                                     mapping,
432                                     stokes_update_flags)
433         , temperature_fe_values(mapping,
434                                 temperature_fe,
435                                 stokes_quadrature,
436                                 temperature_update_flags)
437         , phi_u(stokes_fe.n_dofs_per_cell())
438         , grads_phi_u(stokes_fe.n_dofs_per_cell())
439         , div_phi_u(stokes_fe.n_dofs_per_cell())
440         , old_temperature_values(stokes_quadrature.size())
441       {}
442 
443 
444       template <int dim>
StokesSystem(const StokesSystem<dim> & scratch)445       StokesSystem<dim>::StokesSystem(const StokesSystem<dim> &scratch)
446         : StokesPreconditioner<dim>(scratch)
447         , temperature_fe_values(
448             scratch.temperature_fe_values.get_mapping(),
449             scratch.temperature_fe_values.get_fe(),
450             scratch.temperature_fe_values.get_quadrature(),
451             scratch.temperature_fe_values.get_update_flags())
452         , phi_u(scratch.phi_u)
453         , grads_phi_u(scratch.grads_phi_u)
454         , div_phi_u(scratch.div_phi_u)
455         , old_temperature_values(scratch.old_temperature_values)
456       {}
457 
458 
459       // After defining the objects used in the assembly of the Stokes system,
460       // we do the same for the assembly of the matrices necessary for the
461       // temperature system. The general structure is very similar:
462       template <int dim>
463       struct TemperatureMatrix
464       {
465         TemperatureMatrix(const FiniteElement<dim> &temperature_fe,
466                           const Mapping<dim> &      mapping,
467                           const Quadrature<dim> &   temperature_quadrature);
468 
469         TemperatureMatrix(const TemperatureMatrix &data);
470 
471 
472         FEValues<dim> temperature_fe_values;
473 
474         std::vector<double>         phi_T;
475         std::vector<Tensor<1, dim>> grad_phi_T;
476       };
477 
478 
479       template <int dim>
TemperatureMatrix(const FiniteElement<dim> & temperature_fe,const Mapping<dim> & mapping,const Quadrature<dim> & temperature_quadrature)480       TemperatureMatrix<dim>::TemperatureMatrix(
481         const FiniteElement<dim> &temperature_fe,
482         const Mapping<dim> &      mapping,
483         const Quadrature<dim> &   temperature_quadrature)
484         : temperature_fe_values(mapping,
485                                 temperature_fe,
486                                 temperature_quadrature,
487                                 update_values | update_gradients |
488                                   update_JxW_values)
489         , phi_T(temperature_fe.n_dofs_per_cell())
490         , grad_phi_T(temperature_fe.n_dofs_per_cell())
491       {}
492 
493 
494       template <int dim>
TemperatureMatrix(const TemperatureMatrix & scratch)495       TemperatureMatrix<dim>::TemperatureMatrix(
496         const TemperatureMatrix &scratch)
497         : temperature_fe_values(
498             scratch.temperature_fe_values.get_mapping(),
499             scratch.temperature_fe_values.get_fe(),
500             scratch.temperature_fe_values.get_quadrature(),
501             scratch.temperature_fe_values.get_update_flags())
502         , phi_T(scratch.phi_T)
503         , grad_phi_T(scratch.grad_phi_T)
504       {}
505 
506 
507       // The final scratch object is used in the assembly of the right hand
508       // side of the temperature system. This object is significantly larger
509       // than the ones above because a lot more quantities enter the
510       // computation of the right hand side of the temperature equation. In
511       // particular, the temperature values and gradients of the previous two
512       // time steps need to be evaluated at the quadrature points, as well as
513       // the velocities and the strain rates (i.e. the symmetric gradients of
514       // the velocity) that enter the right hand side as friction heating
515       // terms. Despite the number of terms, the following should be rather
516       // self explanatory:
517       template <int dim>
518       struct TemperatureRHS
519       {
520         TemperatureRHS(const FiniteElement<dim> &temperature_fe,
521                        const FiniteElement<dim> &stokes_fe,
522                        const Mapping<dim> &      mapping,
523                        const Quadrature<dim> &   quadrature);
524 
525         TemperatureRHS(const TemperatureRHS &data);
526 
527 
528         FEValues<dim> temperature_fe_values;
529         FEValues<dim> stokes_fe_values;
530 
531         std::vector<double>         phi_T;
532         std::vector<Tensor<1, dim>> grad_phi_T;
533 
534         std::vector<Tensor<1, dim>> old_velocity_values;
535         std::vector<Tensor<1, dim>> old_old_velocity_values;
536 
537         std::vector<SymmetricTensor<2, dim>> old_strain_rates;
538         std::vector<SymmetricTensor<2, dim>> old_old_strain_rates;
539 
540         std::vector<double>         old_temperature_values;
541         std::vector<double>         old_old_temperature_values;
542         std::vector<Tensor<1, dim>> old_temperature_grads;
543         std::vector<Tensor<1, dim>> old_old_temperature_grads;
544         std::vector<double>         old_temperature_laplacians;
545         std::vector<double>         old_old_temperature_laplacians;
546       };
547 
548 
549       template <int dim>
TemperatureRHS(const FiniteElement<dim> & temperature_fe,const FiniteElement<dim> & stokes_fe,const Mapping<dim> & mapping,const Quadrature<dim> & quadrature)550       TemperatureRHS<dim>::TemperatureRHS(
551         const FiniteElement<dim> &temperature_fe,
552         const FiniteElement<dim> &stokes_fe,
553         const Mapping<dim> &      mapping,
554         const Quadrature<dim> &   quadrature)
555         : temperature_fe_values(mapping,
556                                 temperature_fe,
557                                 quadrature,
558                                 update_values | update_gradients |
559                                   update_hessians | update_quadrature_points |
560                                   update_JxW_values)
561         , stokes_fe_values(mapping,
562                            stokes_fe,
563                            quadrature,
564                            update_values | update_gradients)
565         , phi_T(temperature_fe.n_dofs_per_cell())
566         , grad_phi_T(temperature_fe.n_dofs_per_cell())
567         ,
568 
569         old_velocity_values(quadrature.size())
570         , old_old_velocity_values(quadrature.size())
571         , old_strain_rates(quadrature.size())
572         , old_old_strain_rates(quadrature.size())
573         ,
574 
575         old_temperature_values(quadrature.size())
576         , old_old_temperature_values(quadrature.size())
577         , old_temperature_grads(quadrature.size())
578         , old_old_temperature_grads(quadrature.size())
579         , old_temperature_laplacians(quadrature.size())
580         , old_old_temperature_laplacians(quadrature.size())
581       {}
582 
583 
584       template <int dim>
TemperatureRHS(const TemperatureRHS & scratch)585       TemperatureRHS<dim>::TemperatureRHS(const TemperatureRHS &scratch)
586         : temperature_fe_values(
587             scratch.temperature_fe_values.get_mapping(),
588             scratch.temperature_fe_values.get_fe(),
589             scratch.temperature_fe_values.get_quadrature(),
590             scratch.temperature_fe_values.get_update_flags())
591         , stokes_fe_values(scratch.stokes_fe_values.get_mapping(),
592                            scratch.stokes_fe_values.get_fe(),
593                            scratch.stokes_fe_values.get_quadrature(),
594                            scratch.stokes_fe_values.get_update_flags())
595         , phi_T(scratch.phi_T)
596         , grad_phi_T(scratch.grad_phi_T)
597         ,
598 
599         old_velocity_values(scratch.old_velocity_values)
600         , old_old_velocity_values(scratch.old_old_velocity_values)
601         , old_strain_rates(scratch.old_strain_rates)
602         , old_old_strain_rates(scratch.old_old_strain_rates)
603         ,
604 
605         old_temperature_values(scratch.old_temperature_values)
606         , old_old_temperature_values(scratch.old_old_temperature_values)
607         , old_temperature_grads(scratch.old_temperature_grads)
608         , old_old_temperature_grads(scratch.old_old_temperature_grads)
609         , old_temperature_laplacians(scratch.old_temperature_laplacians)
610         , old_old_temperature_laplacians(scratch.old_old_temperature_laplacians)
611       {}
612     } // namespace Scratch
613 
614 
615     // The CopyData objects are even simpler than the Scratch objects as all
616     // they have to do is to store the results of local computations until
617     // they can be copied into the global matrix or vector objects. These
618     // structures therefore only need to provide a constructor, a copy
619     // operation, and some arrays for local matrix, local vectors and the
620     // relation between local and global degrees of freedom (a.k.a.
621     // <code>local_dof_indices</code>). Again, we have one such structure for
622     // each of the four operations we will parallelize using the WorkStream
623     // class:
624     namespace CopyData
625     {
626       template <int dim>
627       struct StokesPreconditioner
628       {
629         StokesPreconditioner(const FiniteElement<dim> &stokes_fe);
630         StokesPreconditioner(const StokesPreconditioner &data);
631         StokesPreconditioner &operator=(const StokesPreconditioner &) = default;
632 
633         FullMatrix<double>                   local_matrix;
634         std::vector<types::global_dof_index> local_dof_indices;
635       };
636 
637       template <int dim>
StokesPreconditioner(const FiniteElement<dim> & stokes_fe)638       StokesPreconditioner<dim>::StokesPreconditioner(
639         const FiniteElement<dim> &stokes_fe)
640         : local_matrix(stokes_fe.n_dofs_per_cell(), stokes_fe.n_dofs_per_cell())
641         , local_dof_indices(stokes_fe.n_dofs_per_cell())
642       {}
643 
644       template <int dim>
StokesPreconditioner(const StokesPreconditioner & data)645       StokesPreconditioner<dim>::StokesPreconditioner(
646         const StokesPreconditioner &data)
647         : local_matrix(data.local_matrix)
648         , local_dof_indices(data.local_dof_indices)
649       {}
650 
651 
652 
653       template <int dim>
654       struct StokesSystem : public StokesPreconditioner<dim>
655       {
656         StokesSystem(const FiniteElement<dim> &stokes_fe);
657 
658         Vector<double> local_rhs;
659       };
660 
661       template <int dim>
StokesSystem(const FiniteElement<dim> & stokes_fe)662       StokesSystem<dim>::StokesSystem(const FiniteElement<dim> &stokes_fe)
663         : StokesPreconditioner<dim>(stokes_fe)
664         , local_rhs(stokes_fe.n_dofs_per_cell())
665       {}
666 
667 
668 
669       template <int dim>
670       struct TemperatureMatrix
671       {
672         TemperatureMatrix(const FiniteElement<dim> &temperature_fe);
673 
674         FullMatrix<double>                   local_mass_matrix;
675         FullMatrix<double>                   local_stiffness_matrix;
676         std::vector<types::global_dof_index> local_dof_indices;
677       };
678 
679       template <int dim>
TemperatureMatrix(const FiniteElement<dim> & temperature_fe)680       TemperatureMatrix<dim>::TemperatureMatrix(
681         const FiniteElement<dim> &temperature_fe)
682         : local_mass_matrix(temperature_fe.n_dofs_per_cell(),
683                             temperature_fe.n_dofs_per_cell())
684         , local_stiffness_matrix(temperature_fe.n_dofs_per_cell(),
685                                  temperature_fe.n_dofs_per_cell())
686         , local_dof_indices(temperature_fe.n_dofs_per_cell())
687       {}
688 
689 
690 
691       template <int dim>
692       struct TemperatureRHS
693       {
694         TemperatureRHS(const FiniteElement<dim> &temperature_fe);
695 
696         Vector<double>                       local_rhs;
697         std::vector<types::global_dof_index> local_dof_indices;
698         FullMatrix<double>                   matrix_for_bc;
699       };
700 
701       template <int dim>
TemperatureRHS(const FiniteElement<dim> & temperature_fe)702       TemperatureRHS<dim>::TemperatureRHS(
703         const FiniteElement<dim> &temperature_fe)
704         : local_rhs(temperature_fe.n_dofs_per_cell())
705         , local_dof_indices(temperature_fe.n_dofs_per_cell())
706         , matrix_for_bc(temperature_fe.n_dofs_per_cell(),
707                         temperature_fe.n_dofs_per_cell())
708       {}
709     } // namespace CopyData
710   }   // namespace Assembly
711 
712 
713 
714   // @sect3{The <code>BoussinesqFlowProblem</code> class template}
715   //
716   // This is the declaration of the main class. It is very similar to step-31
717   // but there are a number differences we will comment on below.
718   //
719   // The top of the class is essentially the same as in step-31, listing the
720   // public methods and a set of private functions that do the heavy
721   // lifting. Compared to step-31 there are only two additions to this
722   // section: the function <code>get_cfl_number()</code> that computes the
723   // maximum CFL number over all cells which we then compute the global time
724   // step from, and the function <code>get_entropy_variation()</code> that is
725   // used in the computation of the entropy stabilization. It is akin to the
726   // <code>get_extrapolated_temperature_range()</code> we have used in step-31
727   // for this purpose, but works on the entropy instead of the temperature
728   // instead.
729   template <int dim>
730   class BoussinesqFlowProblem
731   {
732   public:
733     struct Parameters;
734     BoussinesqFlowProblem(Parameters &parameters);
735     void run();
736 
737   private:
738     void   setup_dofs();
739     void   assemble_stokes_preconditioner();
740     void   build_stokes_preconditioner();
741     void   assemble_stokes_system();
742     void   assemble_temperature_matrix();
743     void   assemble_temperature_system(const double maximal_velocity);
744     double get_maximal_velocity() const;
745     double get_cfl_number() const;
746     double get_entropy_variation(const double average_temperature) const;
747     std::pair<double, double> get_extrapolated_temperature_range() const;
748     void                      solve();
749     void                      output_results();
750     void                      refine_mesh(const unsigned int max_grid_level);
751 
752     double compute_viscosity(
753       const std::vector<double> &        old_temperature,
754       const std::vector<double> &        old_old_temperature,
755       const std::vector<Tensor<1, dim>> &old_temperature_grads,
756       const std::vector<Tensor<1, dim>> &old_old_temperature_grads,
757       const std::vector<double> &        old_temperature_laplacians,
758       const std::vector<double> &        old_old_temperature_laplacians,
759       const std::vector<Tensor<1, dim>> &old_velocity_values,
760       const std::vector<Tensor<1, dim>> &old_old_velocity_values,
761       const std::vector<SymmetricTensor<2, dim>> &old_strain_rates,
762       const std::vector<SymmetricTensor<2, dim>> &old_old_strain_rates,
763       const double                                global_u_infty,
764       const double                                global_T_variation,
765       const double                                average_temperature,
766       const double                                global_entropy_variation,
767       const double                                cell_diameter) const;
768 
769   public:
770     // The first significant new component is the definition of a struct for
771     // the parameters according to the discussion in the introduction. This
772     // structure is initialized by reading from a parameter file during
773     // construction of this object.
774     struct Parameters
775     {
776       Parameters(const std::string &parameter_filename);
777 
778       static void declare_parameters(ParameterHandler &prm);
779       void        parse_parameters(ParameterHandler &prm);
780 
781       double end_time;
782 
783       unsigned int initial_global_refinement;
784       unsigned int initial_adaptive_refinement;
785 
786       bool         generate_graphical_output;
787       unsigned int graphical_output_interval;
788 
789       unsigned int adaptive_refinement_interval;
790 
791       double stabilization_alpha;
792       double stabilization_c_R;
793       double stabilization_beta;
794 
795       unsigned int stokes_velocity_degree;
796       bool         use_locally_conservative_discretization;
797 
798       unsigned int temperature_degree;
799     };
800 
801   private:
802     Parameters &parameters;
803 
804     // The <code>pcout</code> (for <i>%parallel <code>std::cout</code></i>)
805     // object is used to simplify writing output: each MPI process can use
806     // this to generate output as usual, but since each of these processes
807     // will (hopefully) produce the same output it will just be replicated
808     // many times over; with the ConditionalOStream class, only the output
809     // generated by one MPI process will actually be printed to screen,
810     // whereas the output by all the other threads will simply be forgotten.
811     ConditionalOStream pcout;
812 
813     // The following member variables will then again be similar to those in
814     // step-31 (and to other tutorial programs). As mentioned in the
815     // introduction, we fully distribute computations, so we will have to use
816     // the parallel::distributed::Triangulation class (see step-40) but the
817     // remainder of these variables is rather standard with two exceptions:
818     //
819     // - The <code>mapping</code> variable is used to denote a higher-order
820     // polynomial mapping. As mentioned in the introduction, we use this
821     // mapping when forming integrals through quadrature for all cells that
822     // are adjacent to either the inner or outer boundaries of our domain
823     // where the boundary is curved.
824     //
825     // - In a bit of naming confusion, you will notice below that some of the
826     // variables from namespace TrilinosWrappers are taken from namespace
827     // TrilinosWrappers::MPI (such as the right hand side vectors) whereas
828     // others are not (such as the various matrices). This is due to legacy
829     // reasons. We will frequently have to query velocities
830     // and temperatures at arbitrary quadrature points; consequently, rather
831     // than importing ghost information of a vector whenever we need access
832     // to degrees of freedom that are relevant locally but owned by another
833     // processor, we solve linear systems in %parallel but then immediately
834     // initialize a vector including ghost entries of the solution for further
835     // processing. The various <code>*_solution</code> vectors are therefore
836     // filled immediately after solving their respective linear system in
837     // %parallel and will always contain values for all
838     // @ref GlossLocallyRelevantDof "locally relevant degrees of freedom";
839     // the fully distributed vectors that we obtain from the solution process
840     // and that only ever contain the
841     // @ref GlossLocallyOwnedDof "locally owned degrees of freedom" are
842     // destroyed immediately after the solution process and after we have
843     // copied the relevant values into the member variable vectors.
844     parallel::distributed::Triangulation<dim> triangulation;
845     double                                    global_Omega_diameter;
846 
847     const MappingQ<dim> mapping;
848 
849     const FESystem<dim>       stokes_fe;
850     DoFHandler<dim>           stokes_dof_handler;
851     AffineConstraints<double> stokes_constraints;
852 
853     TrilinosWrappers::BlockSparseMatrix stokes_matrix;
854     TrilinosWrappers::BlockSparseMatrix stokes_preconditioner_matrix;
855 
856     TrilinosWrappers::MPI::BlockVector stokes_solution;
857     TrilinosWrappers::MPI::BlockVector old_stokes_solution;
858     TrilinosWrappers::MPI::BlockVector stokes_rhs;
859 
860 
861     FE_Q<dim>                 temperature_fe;
862     DoFHandler<dim>           temperature_dof_handler;
863     AffineConstraints<double> temperature_constraints;
864 
865     TrilinosWrappers::SparseMatrix temperature_mass_matrix;
866     TrilinosWrappers::SparseMatrix temperature_stiffness_matrix;
867     TrilinosWrappers::SparseMatrix temperature_matrix;
868 
869     TrilinosWrappers::MPI::Vector temperature_solution;
870     TrilinosWrappers::MPI::Vector old_temperature_solution;
871     TrilinosWrappers::MPI::Vector old_old_temperature_solution;
872     TrilinosWrappers::MPI::Vector temperature_rhs;
873 
874 
875     double       time_step;
876     double       old_time_step;
877     unsigned int timestep_number;
878 
879     std::shared_ptr<TrilinosWrappers::PreconditionAMG>    Amg_preconditioner;
880     std::shared_ptr<TrilinosWrappers::PreconditionJacobi> Mp_preconditioner;
881     std::shared_ptr<TrilinosWrappers::PreconditionJacobi> T_preconditioner;
882 
883     bool rebuild_stokes_matrix;
884     bool rebuild_stokes_preconditioner;
885     bool rebuild_temperature_matrices;
886     bool rebuild_temperature_preconditioner;
887 
888     // The next member variable, <code>computing_timer</code> is used to
889     // conveniently account for compute time spent in certain "sections" of
890     // the code that are repeatedly entered. For example, we will enter (and
891     // leave) sections for Stokes matrix assembly and would like to accumulate
892     // the run time spent in this section over all time steps. Every so many
893     // time steps as well as at the end of the program (through the destructor
894     // of the TimerOutput class) we will then produce a nice summary of the
895     // times spent in the different sections into which we categorize the
896     // run-time of this program.
897     TimerOutput computing_timer;
898 
899     // After these member variables we have a number of auxiliary functions
900     // that have been broken out of the ones listed above. Specifically, there
901     // are first three functions that we call from <code>setup_dofs</code> and
902     // then the ones that do the assembling of linear systems:
903     void setup_stokes_matrix(
904       const std::vector<IndexSet> &stokes_partitioning,
905       const std::vector<IndexSet> &stokes_relevant_partitioning);
906     void setup_stokes_preconditioner(
907       const std::vector<IndexSet> &stokes_partitioning,
908       const std::vector<IndexSet> &stokes_relevant_partitioning);
909     void setup_temperature_matrices(
910       const IndexSet &temperature_partitioning,
911       const IndexSet &temperature_relevant_partitioning);
912 
913 
914     // Following the @ref MTWorkStream "task-based parallelization" paradigm,
915     // we split all the assembly routines into two parts: a first part that
916     // can do all the calculations on a certain cell without taking care of
917     // other threads, and a second part (which is writing the local data into
918     // the global matrices and vectors) which can be entered by only one
919     // thread at a time. In order to implement that, we provide functions for
920     // each of those two steps for all the four assembly routines that we use
921     // in this program. The following eight functions do exactly this:
922     void local_assemble_stokes_preconditioner(
923       const typename DoFHandler<dim>::active_cell_iterator &cell,
924       Assembly::Scratch::StokesPreconditioner<dim> &        scratch,
925       Assembly::CopyData::StokesPreconditioner<dim> &       data);
926 
927     void copy_local_to_global_stokes_preconditioner(
928       const Assembly::CopyData::StokesPreconditioner<dim> &data);
929 
930 
931     void local_assemble_stokes_system(
932       const typename DoFHandler<dim>::active_cell_iterator &cell,
933       Assembly::Scratch::StokesSystem<dim> &                scratch,
934       Assembly::CopyData::StokesSystem<dim> &               data);
935 
936     void copy_local_to_global_stokes_system(
937       const Assembly::CopyData::StokesSystem<dim> &data);
938 
939 
940     void local_assemble_temperature_matrix(
941       const typename DoFHandler<dim>::active_cell_iterator &cell,
942       Assembly::Scratch::TemperatureMatrix<dim> &           scratch,
943       Assembly::CopyData::TemperatureMatrix<dim> &          data);
944 
945     void copy_local_to_global_temperature_matrix(
946       const Assembly::CopyData::TemperatureMatrix<dim> &data);
947 
948 
949 
950     void local_assemble_temperature_rhs(
951       const std::pair<double, double> global_T_range,
952       const double                    global_max_velocity,
953       const double                    global_entropy_variation,
954       const typename DoFHandler<dim>::active_cell_iterator &cell,
955       Assembly::Scratch::TemperatureRHS<dim> &              scratch,
956       Assembly::CopyData::TemperatureRHS<dim> &             data);
957 
958     void copy_local_to_global_temperature_rhs(
959       const Assembly::CopyData::TemperatureRHS<dim> &data);
960 
961     // Finally, we forward declare a member class that we will define later on
962     // and that will be used to compute a number of quantities from our
963     // solution vectors that we'd like to put into the output files for
964     // visualization.
965     class Postprocessor;
966   };
967 
968 
969   // @sect3{BoussinesqFlowProblem class implementation}
970 
971   // @sect4{BoussinesqFlowProblem::Parameters}
972   //
973   // Here comes the definition of the parameters for the Stokes problem. We
974   // allow to set the end time for the simulation, the level of refinements
975   // (both global and adaptive, which in the sum specify what maximum level
976   // the cells are allowed to have), and the interval between refinements in
977   // the time stepping.
978   //
979   // Then, we let the user specify constants for the stabilization parameters
980   // (as discussed in the introduction), the polynomial degree for the Stokes
981   // velocity space, whether to use the locally conservative discretization
982   // based on FE_DGP elements for the pressure or not (FE_Q elements for
983   // pressure), and the polynomial degree for the temperature interpolation.
984   //
985   // The constructor checks for a valid input file (if not, a file with
986   // default parameters for the quantities is written), and eventually parses
987   // the parameters.
988   template <int dim>
Parameters(const std::string & parameter_filename)989   BoussinesqFlowProblem<dim>::Parameters::Parameters(
990     const std::string &parameter_filename)
991     : end_time(1e8)
992     , initial_global_refinement(2)
993     , initial_adaptive_refinement(2)
994     , adaptive_refinement_interval(10)
995     , stabilization_alpha(2)
996     , stabilization_c_R(0.11)
997     , stabilization_beta(0.078)
998     , stokes_velocity_degree(2)
999     , use_locally_conservative_discretization(true)
1000     , temperature_degree(2)
1001   {
1002     ParameterHandler prm;
1003     BoussinesqFlowProblem<dim>::Parameters::declare_parameters(prm);
1004 
1005     std::ifstream parameter_file(parameter_filename);
1006 
1007     if (!parameter_file)
1008       {
1009         parameter_file.close();
1010 
1011         std::ofstream parameter_out(parameter_filename);
1012         prm.print_parameters(parameter_out, ParameterHandler::Text);
1013 
1014         AssertThrow(
1015           false,
1016           ExcMessage(
1017             "Input parameter file <" + parameter_filename +
1018             "> not found. Creating a template file of the same name."));
1019       }
1020 
1021     prm.parse_input(parameter_file);
1022     parse_parameters(prm);
1023   }
1024 
1025 
1026 
1027   // Next we have a function that declares the parameters that we expect in
1028   // the input file, together with their data types, default values and a
1029   // description:
1030   template <int dim>
declare_parameters(ParameterHandler & prm)1031   void BoussinesqFlowProblem<dim>::Parameters::declare_parameters(
1032     ParameterHandler &prm)
1033   {
1034     prm.declare_entry("End time",
1035                       "1e8",
1036                       Patterns::Double(0),
1037                       "The end time of the simulation in years.");
1038     prm.declare_entry("Initial global refinement",
1039                       "2",
1040                       Patterns::Integer(0),
1041                       "The number of global refinement steps performed on "
1042                       "the initial coarse mesh, before the problem is first "
1043                       "solved there.");
1044     prm.declare_entry("Initial adaptive refinement",
1045                       "2",
1046                       Patterns::Integer(0),
1047                       "The number of adaptive refinement steps performed after "
1048                       "initial global refinement.");
1049     prm.declare_entry("Time steps between mesh refinement",
1050                       "10",
1051                       Patterns::Integer(1),
1052                       "The number of time steps after which the mesh is to be "
1053                       "adapted based on computed error indicators.");
1054     prm.declare_entry("Generate graphical output",
1055                       "false",
1056                       Patterns::Bool(),
1057                       "Whether graphical output is to be generated or not. "
1058                       "You may not want to get graphical output if the number "
1059                       "of processors is large.");
1060     prm.declare_entry("Time steps between graphical output",
1061                       "50",
1062                       Patterns::Integer(1),
1063                       "The number of time steps between each generation of "
1064                       "graphical output files.");
1065 
1066     prm.enter_subsection("Stabilization parameters");
1067     {
1068       prm.declare_entry("alpha",
1069                         "2",
1070                         Patterns::Double(1, 2),
1071                         "The exponent in the entropy viscosity stabilization.");
1072       prm.declare_entry("c_R",
1073                         "0.11",
1074                         Patterns::Double(0),
1075                         "The c_R factor in the entropy viscosity "
1076                         "stabilization.");
1077       prm.declare_entry("beta",
1078                         "0.078",
1079                         Patterns::Double(0),
1080                         "The beta factor in the artificial viscosity "
1081                         "stabilization. An appropriate value for 2d is 0.052 "
1082                         "and 0.078 for 3d.");
1083     }
1084     prm.leave_subsection();
1085 
1086     prm.enter_subsection("Discretization");
1087     {
1088       prm.declare_entry(
1089         "Stokes velocity polynomial degree",
1090         "2",
1091         Patterns::Integer(1),
1092         "The polynomial degree to use for the velocity variables "
1093         "in the Stokes system.");
1094       prm.declare_entry(
1095         "Temperature polynomial degree",
1096         "2",
1097         Patterns::Integer(1),
1098         "The polynomial degree to use for the temperature variable.");
1099       prm.declare_entry(
1100         "Use locally conservative discretization",
1101         "true",
1102         Patterns::Bool(),
1103         "Whether to use a Stokes discretization that is locally "
1104         "conservative at the expense of a larger number of degrees "
1105         "of freedom, or to go with a cheaper discretization "
1106         "that does not locally conserve mass (although it is "
1107         "globally conservative.");
1108     }
1109     prm.leave_subsection();
1110   }
1111 
1112 
1113 
1114   // And then we need a function that reads the contents of the
1115   // ParameterHandler object we get by reading the input file and puts the
1116   // results into variables that store the values of the parameters we have
1117   // previously declared:
1118   template <int dim>
parse_parameters(ParameterHandler & prm)1119   void BoussinesqFlowProblem<dim>::Parameters::parse_parameters(
1120     ParameterHandler &prm)
1121   {
1122     end_time                  = prm.get_double("End time");
1123     initial_global_refinement = prm.get_integer("Initial global refinement");
1124     initial_adaptive_refinement =
1125       prm.get_integer("Initial adaptive refinement");
1126 
1127     adaptive_refinement_interval =
1128       prm.get_integer("Time steps between mesh refinement");
1129 
1130     generate_graphical_output = prm.get_bool("Generate graphical output");
1131     graphical_output_interval =
1132       prm.get_integer("Time steps between graphical output");
1133 
1134     prm.enter_subsection("Stabilization parameters");
1135     {
1136       stabilization_alpha = prm.get_double("alpha");
1137       stabilization_c_R   = prm.get_double("c_R");
1138       stabilization_beta  = prm.get_double("beta");
1139     }
1140     prm.leave_subsection();
1141 
1142     prm.enter_subsection("Discretization");
1143     {
1144       stokes_velocity_degree =
1145         prm.get_integer("Stokes velocity polynomial degree");
1146       temperature_degree = prm.get_integer("Temperature polynomial degree");
1147       use_locally_conservative_discretization =
1148         prm.get_bool("Use locally conservative discretization");
1149     }
1150     prm.leave_subsection();
1151   }
1152 
1153 
1154 
1155   // @sect4{BoussinesqFlowProblem::BoussinesqFlowProblem}
1156   //
1157   // The constructor of the problem is very similar to the constructor in
1158   // step-31. What is different is the %parallel communication: Trilinos uses
1159   // a message passing interface (MPI) for data distribution. When entering
1160   // the BoussinesqFlowProblem class, we have to decide how the parallelization
1161   // is to be done. We choose a rather simple strategy and let all processors
1162   // that are running the program work together, specified by the communicator
1163   // <code>MPI_COMM_WORLD</code>. Next, we create the output stream (as we
1164   // already did in step-18) that only generates output on the first MPI
1165   // process and is completely forgetful on all others. The implementation of
1166   // this idea is to check the process number when <code>pcout</code> gets a
1167   // true argument, and it uses the <code>std::cout</code> stream for
1168   // output. If we are one processor five, for instance, then we will give a
1169   // <code>false</code> argument to <code>pcout</code>, which means that the
1170   // output of that processor will not be printed. With the exception of the
1171   // mapping object (for which we use polynomials of degree 4) all but the
1172   // final member variable are exactly the same as in step-31.
1173   //
1174   // This final object, the TimerOutput object, is then told to restrict
1175   // output to the <code>pcout</code> stream (processor 0), and then we
1176   // specify that we want to get a summary table at the end of the program
1177   // which shows us wallclock times (as opposed to CPU times). We will
1178   // manually also request intermediate summaries every so many time steps in
1179   // the <code>run()</code> function below.
1180   template <int dim>
BoussinesqFlowProblem(Parameters & parameters_)1181   BoussinesqFlowProblem<dim>::BoussinesqFlowProblem(Parameters &parameters_)
1182     : parameters(parameters_)
1183     , pcout(std::cout, (Utilities::MPI::this_mpi_process(MPI_COMM_WORLD) == 0))
1184     ,
1185 
1186     triangulation(MPI_COMM_WORLD,
1187                   typename Triangulation<dim>::MeshSmoothing(
1188                     Triangulation<dim>::smoothing_on_refinement |
1189                     Triangulation<dim>::smoothing_on_coarsening))
1190     ,
1191 
1192     global_Omega_diameter(0.)
1193     ,
1194 
1195     mapping(4)
1196     ,
1197 
1198     stokes_fe(FE_Q<dim>(parameters.stokes_velocity_degree),
1199               dim,
1200               (parameters.use_locally_conservative_discretization ?
1201                  static_cast<const FiniteElement<dim> &>(
1202                    FE_DGP<dim>(parameters.stokes_velocity_degree - 1)) :
1203                  static_cast<const FiniteElement<dim> &>(
1204                    FE_Q<dim>(parameters.stokes_velocity_degree - 1))),
1205               1)
1206     ,
1207 
1208     stokes_dof_handler(triangulation)
1209     ,
1210 
1211     temperature_fe(parameters.temperature_degree)
1212     , temperature_dof_handler(triangulation)
1213     ,
1214 
1215     time_step(0)
1216     , old_time_step(0)
1217     , timestep_number(0)
1218     , rebuild_stokes_matrix(true)
1219     , rebuild_stokes_preconditioner(true)
1220     , rebuild_temperature_matrices(true)
1221     , rebuild_temperature_preconditioner(true)
1222     ,
1223 
1224     computing_timer(MPI_COMM_WORLD,
1225                     pcout,
1226                     TimerOutput::summary,
1227                     TimerOutput::wall_times)
1228   {}
1229 
1230 
1231 
1232   // @sect4{The BoussinesqFlowProblem helper functions}
1233   // @sect5{BoussinesqFlowProblem::get_maximal_velocity}
1234 
1235   // Except for two small details, the function to compute the global maximum
1236   // of the velocity is the same as in step-31. The first detail is actually
1237   // common to all functions that implement loops over all cells in the
1238   // triangulation: When operating in %parallel, each processor can only work
1239   // on a chunk of cells since each processor only has a certain part of the
1240   // entire triangulation. This chunk of cells that we want to work on is
1241   // identified via a so-called <code>subdomain_id</code>, as we also did in
1242   // step-18. All we need to change is hence to perform the cell-related
1243   // operations only on cells that are owned by the current process (as
1244   // opposed to ghost or artificial cells), i.e. for which the subdomain id
1245   // equals the number of the process ID. Since this is a commonly used
1246   // operation, there is a shortcut for this operation: we can ask whether the
1247   // cell is owned by the current processor using
1248   // <code>cell-@>is_locally_owned()</code>.
1249   //
1250   // The second difference is the way we calculate the maximum value. Before,
1251   // we could simply have a <code>double</code> variable that we checked
1252   // against on each quadrature point for each cell. Now, we have to be a bit
1253   // more careful since each processor only operates on a subset of
1254   // cells. What we do is to first let each processor calculate the maximum
1255   // among its cells, and then do a global communication operation
1256   // <code>Utilities::MPI::max</code> that computes the maximum value among
1257   // all the maximum values of the individual processors. MPI provides such a
1258   // call, but it's even simpler to use the respective function in namespace
1259   // Utilities::MPI using the MPI communicator object since that will do the
1260   // right thing even if we work without MPI and on a single machine only. The
1261   // call to <code>Utilities::MPI::max</code> needs two arguments, namely the
1262   // local maximum (input) and the MPI communicator, which is MPI_COMM_WORLD
1263   // in this example.
1264   template <int dim>
get_maximal_velocity() const1265   double BoussinesqFlowProblem<dim>::get_maximal_velocity() const
1266   {
1267     const QIterated<dim> quadrature_formula(QTrapezoid<1>(),
1268                                             parameters.stokes_velocity_degree);
1269     const unsigned int   n_q_points = quadrature_formula.size();
1270 
1271     FEValues<dim>               fe_values(mapping,
1272                             stokes_fe,
1273                             quadrature_formula,
1274                             update_values);
1275     std::vector<Tensor<1, dim>> velocity_values(n_q_points);
1276 
1277     const FEValuesExtractors::Vector velocities(0);
1278 
1279     double max_local_velocity = 0;
1280 
1281     for (const auto &cell : stokes_dof_handler.active_cell_iterators())
1282       if (cell->is_locally_owned())
1283         {
1284           fe_values.reinit(cell);
1285           fe_values[velocities].get_function_values(stokes_solution,
1286                                                     velocity_values);
1287 
1288           for (unsigned int q = 0; q < n_q_points; ++q)
1289             max_local_velocity =
1290               std::max(max_local_velocity, velocity_values[q].norm());
1291         }
1292 
1293     return Utilities::MPI::max(max_local_velocity, MPI_COMM_WORLD);
1294   }
1295 
1296 
1297   // @sect5{BoussinesqFlowProblem::get_cfl_number}
1298 
1299   // The next function does something similar, but we now compute the CFL
1300   // number, i.e., maximal velocity on a cell divided by the cell
1301   // diameter. This number is necessary to determine the time step size, as we
1302   // use a semi-explicit time stepping scheme for the temperature equation
1303   // (see step-31 for a discussion). We compute it in the same way as above:
1304   // Compute the local maximum over all locally owned cells, then exchange it
1305   // via MPI to find the global maximum.
1306   template <int dim>
get_cfl_number() const1307   double BoussinesqFlowProblem<dim>::get_cfl_number() const
1308   {
1309     const QIterated<dim> quadrature_formula(QTrapezoid<1>(),
1310                                             parameters.stokes_velocity_degree);
1311     const unsigned int   n_q_points = quadrature_formula.size();
1312 
1313     FEValues<dim>               fe_values(mapping,
1314                             stokes_fe,
1315                             quadrature_formula,
1316                             update_values);
1317     std::vector<Tensor<1, dim>> velocity_values(n_q_points);
1318 
1319     const FEValuesExtractors::Vector velocities(0);
1320 
1321     double max_local_cfl = 0;
1322 
1323     for (const auto &cell : stokes_dof_handler.active_cell_iterators())
1324       if (cell->is_locally_owned())
1325         {
1326           fe_values.reinit(cell);
1327           fe_values[velocities].get_function_values(stokes_solution,
1328                                                     velocity_values);
1329 
1330           double max_local_velocity = 1e-10;
1331           for (unsigned int q = 0; q < n_q_points; ++q)
1332             max_local_velocity =
1333               std::max(max_local_velocity, velocity_values[q].norm());
1334           max_local_cfl =
1335             std::max(max_local_cfl, max_local_velocity / cell->diameter());
1336         }
1337 
1338     return Utilities::MPI::max(max_local_cfl, MPI_COMM_WORLD);
1339   }
1340 
1341 
1342   // @sect5{BoussinesqFlowProblem::get_entropy_variation}
1343 
1344   // Next comes the computation of the global entropy variation
1345   // $\|E(T)-\bar{E}(T)\|_\infty$ where the entropy $E$ is defined as
1346   // discussed in the introduction.  This is needed for the evaluation of the
1347   // stabilization in the temperature equation as explained in the
1348   // introduction. The entropy variation is actually only needed if we use
1349   // $\alpha=2$ as a power in the residual computation. The infinity norm is
1350   // computed by the maxima over quadrature points, as usual in discrete
1351   // computations.
1352   //
1353   // In order to compute this quantity, we first have to find the
1354   // space-average $\bar{E}(T)$ and then evaluate the maximum. However, that
1355   // means that we would need to perform two loops. We can avoid the overhead
1356   // by noting that $\|E(T)-\bar{E}(T)\|_\infty =
1357   // \max\big(E_{\textrm{max}}(T)-\bar{E}(T),
1358   // \bar{E}(T)-E_{\textrm{min}}(T)\big)$, i.e., the maximum out of the
1359   // deviation from the average entropy in positive and negative
1360   // directions. The four quantities we need for the latter formula (maximum
1361   // entropy, minimum entropy, average entropy, area) can all be evaluated in
1362   // the same loop over all cells, so we choose this simpler variant.
1363   template <int dim>
get_entropy_variation(const double average_temperature) const1364   double BoussinesqFlowProblem<dim>::get_entropy_variation(
1365     const double average_temperature) const
1366   {
1367     if (parameters.stabilization_alpha != 2)
1368       return 1.;
1369 
1370     const QGauss<dim>  quadrature_formula(parameters.temperature_degree + 1);
1371     const unsigned int n_q_points = quadrature_formula.size();
1372 
1373     FEValues<dim>       fe_values(temperature_fe,
1374                             quadrature_formula,
1375                             update_values | update_JxW_values);
1376     std::vector<double> old_temperature_values(n_q_points);
1377     std::vector<double> old_old_temperature_values(n_q_points);
1378 
1379     // In the two functions above we computed the maximum of numbers that were
1380     // all non-negative, so we knew that zero was certainly a lower bound. On
1381     // the other hand, here we need to find the maximum deviation from the
1382     // average value, i.e., we will need to know the maximal and minimal
1383     // values of the entropy for which we don't a priori know the sign.
1384     //
1385     // To compute it, we can therefore start with the largest and smallest
1386     // possible values we can store in a double precision number: The minimum
1387     // is initialized with a bigger and the maximum with a smaller number than
1388     // any one that is going to appear. We are then guaranteed that these
1389     // numbers will be overwritten in the loop on the first cell or, if this
1390     // processor does not own any cells, in the communication step at the
1391     // latest. The following loop then computes the minimum and maximum local
1392     // entropy as well as keeps track of the area/volume of the part of the
1393     // domain we locally own and the integral over the entropy on it:
1394     double min_entropy = std::numeric_limits<double>::max(),
1395            max_entropy = -std::numeric_limits<double>::max(), area = 0,
1396            entropy_integrated = 0;
1397 
1398     for (const auto &cell : temperature_dof_handler.active_cell_iterators())
1399       if (cell->is_locally_owned())
1400         {
1401           fe_values.reinit(cell);
1402           fe_values.get_function_values(old_temperature_solution,
1403                                         old_temperature_values);
1404           fe_values.get_function_values(old_old_temperature_solution,
1405                                         old_old_temperature_values);
1406           for (unsigned int q = 0; q < n_q_points; ++q)
1407             {
1408               const double T =
1409                 (old_temperature_values[q] + old_old_temperature_values[q]) / 2;
1410               const double entropy =
1411                 ((T - average_temperature) * (T - average_temperature));
1412 
1413               min_entropy = std::min(min_entropy, entropy);
1414               max_entropy = std::max(max_entropy, entropy);
1415               area += fe_values.JxW(q);
1416               entropy_integrated += fe_values.JxW(q) * entropy;
1417             }
1418         }
1419 
1420     // Now we only need to exchange data between processors: we need to sum
1421     // the two integrals (<code>area</code>, <code>entropy_integrated</code>),
1422     // and get the extrema for maximum and minimum. We could do this through
1423     // four different data exchanges, but we can it with two:
1424     // Utilities::MPI::sum also exists in a variant that takes an array of
1425     // values that are all to be summed up. And we can also utilize the
1426     // Utilities::MPI::max function by realizing that forming the minimum over
1427     // the minimal entropies equals forming the negative of the maximum over
1428     // the negative of the minimal entropies; this maximum can then be
1429     // combined with forming the maximum over the maximal entropies.
1430     const double local_sums[2]   = {entropy_integrated, area},
1431                  local_maxima[2] = {-min_entropy, max_entropy};
1432     double global_sums[2], global_maxima[2];
1433 
1434     Utilities::MPI::sum(local_sums, MPI_COMM_WORLD, global_sums);
1435     Utilities::MPI::max(local_maxima, MPI_COMM_WORLD, global_maxima);
1436 
1437     // Having computed everything this way, we can then compute the average
1438     // entropy and find the $L^\infty$ norm by taking the larger of the
1439     // deviation of the maximum or minimum from the average:
1440     const double average_entropy = global_sums[0] / global_sums[1];
1441     const double entropy_diff    = std::max(global_maxima[1] - average_entropy,
1442                                          average_entropy - (-global_maxima[0]));
1443     return entropy_diff;
1444   }
1445 
1446 
1447 
1448   // @sect5{BoussinesqFlowProblem::get_extrapolated_temperature_range}
1449 
1450   // The next function computes the minimal and maximal value of the
1451   // extrapolated temperature over the entire domain. Again, this is only a
1452   // slightly modified version of the respective function in step-31. As in
1453   // the function above, we collect local minima and maxima and then compute
1454   // the global extrema using the same trick as above.
1455   //
1456   // As already discussed in step-31, the function needs to distinguish
1457   // between the first and all following time steps because it uses a higher
1458   // order temperature extrapolation scheme when at least two previous time
1459   // steps are available.
1460   template <int dim>
1461   std::pair<double, double>
get_extrapolated_temperature_range() const1462   BoussinesqFlowProblem<dim>::get_extrapolated_temperature_range() const
1463   {
1464     const QIterated<dim> quadrature_formula(QTrapezoid<1>(),
1465                                             parameters.temperature_degree);
1466     const unsigned int   n_q_points = quadrature_formula.size();
1467 
1468     FEValues<dim>       fe_values(mapping,
1469                             temperature_fe,
1470                             quadrature_formula,
1471                             update_values);
1472     std::vector<double> old_temperature_values(n_q_points);
1473     std::vector<double> old_old_temperature_values(n_q_points);
1474 
1475     double min_local_temperature = std::numeric_limits<double>::max(),
1476            max_local_temperature = -std::numeric_limits<double>::max();
1477 
1478     if (timestep_number != 0)
1479       {
1480         for (const auto &cell : temperature_dof_handler.active_cell_iterators())
1481           if (cell->is_locally_owned())
1482             {
1483               fe_values.reinit(cell);
1484               fe_values.get_function_values(old_temperature_solution,
1485                                             old_temperature_values);
1486               fe_values.get_function_values(old_old_temperature_solution,
1487                                             old_old_temperature_values);
1488 
1489               for (unsigned int q = 0; q < n_q_points; ++q)
1490                 {
1491                   const double temperature =
1492                     (1. + time_step / old_time_step) *
1493                       old_temperature_values[q] -
1494                     time_step / old_time_step * old_old_temperature_values[q];
1495 
1496                   min_local_temperature =
1497                     std::min(min_local_temperature, temperature);
1498                   max_local_temperature =
1499                     std::max(max_local_temperature, temperature);
1500                 }
1501             }
1502       }
1503     else
1504       {
1505         for (const auto &cell : temperature_dof_handler.active_cell_iterators())
1506           if (cell->is_locally_owned())
1507             {
1508               fe_values.reinit(cell);
1509               fe_values.get_function_values(old_temperature_solution,
1510                                             old_temperature_values);
1511 
1512               for (unsigned int q = 0; q < n_q_points; ++q)
1513                 {
1514                   const double temperature = old_temperature_values[q];
1515 
1516                   min_local_temperature =
1517                     std::min(min_local_temperature, temperature);
1518                   max_local_temperature =
1519                     std::max(max_local_temperature, temperature);
1520                 }
1521             }
1522       }
1523 
1524     double local_extrema[2] = {-min_local_temperature, max_local_temperature};
1525     double global_extrema[2];
1526     Utilities::MPI::max(local_extrema, MPI_COMM_WORLD, global_extrema);
1527 
1528     return std::make_pair(-global_extrema[0], global_extrema[1]);
1529   }
1530 
1531 
1532   // @sect5{BoussinesqFlowProblem::compute_viscosity}
1533 
1534   // The function that calculates the viscosity is purely local and so needs
1535   // no communication at all. It is mostly the same as in step-31 but with an
1536   // updated formulation of the viscosity if $\alpha=2$ is chosen:
1537   template <int dim>
compute_viscosity(const std::vector<double> & old_temperature,const std::vector<double> & old_old_temperature,const std::vector<Tensor<1,dim>> & old_temperature_grads,const std::vector<Tensor<1,dim>> & old_old_temperature_grads,const std::vector<double> & old_temperature_laplacians,const std::vector<double> & old_old_temperature_laplacians,const std::vector<Tensor<1,dim>> & old_velocity_values,const std::vector<Tensor<1,dim>> & old_old_velocity_values,const std::vector<SymmetricTensor<2,dim>> & old_strain_rates,const std::vector<SymmetricTensor<2,dim>> & old_old_strain_rates,const double global_u_infty,const double global_T_variation,const double average_temperature,const double global_entropy_variation,const double cell_diameter) const1538   double BoussinesqFlowProblem<dim>::compute_viscosity(
1539     const std::vector<double> &                 old_temperature,
1540     const std::vector<double> &                 old_old_temperature,
1541     const std::vector<Tensor<1, dim>> &         old_temperature_grads,
1542     const std::vector<Tensor<1, dim>> &         old_old_temperature_grads,
1543     const std::vector<double> &                 old_temperature_laplacians,
1544     const std::vector<double> &                 old_old_temperature_laplacians,
1545     const std::vector<Tensor<1, dim>> &         old_velocity_values,
1546     const std::vector<Tensor<1, dim>> &         old_old_velocity_values,
1547     const std::vector<SymmetricTensor<2, dim>> &old_strain_rates,
1548     const std::vector<SymmetricTensor<2, dim>> &old_old_strain_rates,
1549     const double                                global_u_infty,
1550     const double                                global_T_variation,
1551     const double                                average_temperature,
1552     const double                                global_entropy_variation,
1553     const double                                cell_diameter) const
1554   {
1555     if (global_u_infty == 0)
1556       return 5e-3 * cell_diameter;
1557 
1558     const unsigned int n_q_points = old_temperature.size();
1559 
1560     double max_residual = 0;
1561     double max_velocity = 0;
1562 
1563     for (unsigned int q = 0; q < n_q_points; ++q)
1564       {
1565         const Tensor<1, dim> u =
1566           (old_velocity_values[q] + old_old_velocity_values[q]) / 2;
1567 
1568         const SymmetricTensor<2, dim> strain_rate =
1569           (old_strain_rates[q] + old_old_strain_rates[q]) / 2;
1570 
1571         const double T = (old_temperature[q] + old_old_temperature[q]) / 2;
1572         const double dT_dt =
1573           (old_temperature[q] - old_old_temperature[q]) / old_time_step;
1574         const double u_grad_T =
1575           u * (old_temperature_grads[q] + old_old_temperature_grads[q]) / 2;
1576 
1577         const double kappa_Delta_T =
1578           EquationData::kappa *
1579           (old_temperature_laplacians[q] + old_old_temperature_laplacians[q]) /
1580           2;
1581         const double gamma =
1582           ((EquationData::radiogenic_heating * EquationData::density(T) +
1583             2 * EquationData::eta * strain_rate * strain_rate) /
1584            (EquationData::density(T) * EquationData::specific_heat));
1585 
1586         double residual = std::abs(dT_dt + u_grad_T - kappa_Delta_T - gamma);
1587         if (parameters.stabilization_alpha == 2)
1588           residual *= std::abs(T - average_temperature);
1589 
1590         max_residual = std::max(residual, max_residual);
1591         max_velocity = std::max(std::sqrt(u * u), max_velocity);
1592       }
1593 
1594     const double max_viscosity =
1595       (parameters.stabilization_beta * max_velocity * cell_diameter);
1596     if (timestep_number == 0)
1597       return max_viscosity;
1598     else
1599       {
1600         Assert(old_time_step > 0, ExcInternalError());
1601 
1602         double entropy_viscosity;
1603         if (parameters.stabilization_alpha == 2)
1604           entropy_viscosity =
1605             (parameters.stabilization_c_R * cell_diameter * cell_diameter *
1606              max_residual / global_entropy_variation);
1607         else
1608           entropy_viscosity =
1609             (parameters.stabilization_c_R * cell_diameter *
1610              global_Omega_diameter * max_velocity * max_residual /
1611              (global_u_infty * global_T_variation));
1612 
1613         return std::min(max_viscosity, entropy_viscosity);
1614       }
1615   }
1616 
1617 
1618 
1619   // @sect4{The BoussinesqFlowProblem setup functions}
1620 
1621   // The following three functions set up the Stokes matrix, the matrix used
1622   // for the Stokes preconditioner, and the temperature matrix. The code is
1623   // mostly the same as in step-31, but it has been broken out into three
1624   // functions of their own for simplicity.
1625   //
1626   // The main functional difference between the code here and that in step-31
1627   // is that the matrices we want to set up are distributed across multiple
1628   // processors. Since we still want to build up the sparsity pattern first
1629   // for efficiency reasons, we could continue to build the <i>entire</i>
1630   // sparsity pattern as a BlockDynamicSparsityPattern, as we did in
1631   // step-31. However, that would be inefficient: every processor would build
1632   // the same sparsity pattern, but only initialize a small part of the matrix
1633   // using it. It also violates the principle that every processor should only
1634   // work on those cells it owns (and, if necessary the layer of ghost cells
1635   // around it).
1636   //
1637   // Rather, we use an object of type TrilinosWrappers::BlockSparsityPattern,
1638   // which is (obviously) a wrapper around a sparsity pattern object provided
1639   // by Trilinos. The advantage is that the Trilinos sparsity pattern class
1640   // can communicate across multiple processors: if this processor fills in
1641   // all the nonzero entries that result from the cells it owns, and every
1642   // other processor does so as well, then at the end after some MPI
1643   // communication initiated by the <code>compress()</code> call, we will have
1644   // the globally assembled sparsity pattern available with which the global
1645   // matrix can be initialized.
1646   //
1647   // There is one important aspect when initializing Trilinos sparsity
1648   // patterns in parallel: In addition to specifying the locally owned rows
1649   // and columns of the matrices via the @p stokes_partitioning index set, we
1650   // also supply information about all the rows we are possibly going to write
1651   // into when assembling on a certain processor. The set of locally relevant
1652   // rows contains all such rows (possibly also a few unnecessary ones, but it
1653   // is difficult to find the exact row indices before actually getting
1654   // indices on all cells and resolving constraints). This additional
1655   // information allows to exactly determine the structure for the
1656   // off-processor data found during assembly. While Trilinos matrices are
1657   // able to collect this information on the fly as well (when initializing
1658   // them from some other reinit method), it is less efficient and leads to
1659   // problems when assembling matrices with multiple threads. In this program,
1660   // we pessimistically assume that only one processor at a time can write
1661   // into the matrix while assembly (whereas the computation is parallel),
1662   // which is fine for Trilinos matrices. In practice, one can do better by
1663   // hinting WorkStream at cells that do not share vertices, allowing for
1664   // parallelism among those cells (see the graph coloring algorithms and
1665   // WorkStream with colored iterators argument). However, that only works
1666   // when only one MPI processor is present because Trilinos' internal data
1667   // structures for accumulating off-processor data on the fly are not thread
1668   // safe. With the initialization presented here, there is no such problem
1669   // and one could safely introduce graph coloring for this algorithm.
1670   //
1671   // The only other change we need to make is to tell the
1672   // DoFTools::make_sparsity_pattern() function that it is only supposed to
1673   // work on a subset of cells, namely the ones whose
1674   // <code>subdomain_id</code> equals the number of the current processor, and
1675   // to ignore all other cells.
1676   //
1677   // This strategy is replicated across all three of the following functions.
1678   //
1679   // Note that Trilinos matrices store the information contained in the
1680   // sparsity patterns, so we can safely release the <code>sp</code> variable
1681   // once the matrix has been given the sparsity structure.
1682   template <int dim>
setup_stokes_matrix(const std::vector<IndexSet> & stokes_partitioning,const std::vector<IndexSet> & stokes_relevant_partitioning)1683   void BoussinesqFlowProblem<dim>::setup_stokes_matrix(
1684     const std::vector<IndexSet> &stokes_partitioning,
1685     const std::vector<IndexSet> &stokes_relevant_partitioning)
1686   {
1687     stokes_matrix.clear();
1688 
1689     TrilinosWrappers::BlockSparsityPattern sp(stokes_partitioning,
1690                                               stokes_partitioning,
1691                                               stokes_relevant_partitioning,
1692                                               MPI_COMM_WORLD);
1693 
1694     Table<2, DoFTools::Coupling> coupling(dim + 1, dim + 1);
1695     for (unsigned int c = 0; c < dim + 1; ++c)
1696       for (unsigned int d = 0; d < dim + 1; ++d)
1697         if (!((c == dim) && (d == dim)))
1698           coupling[c][d] = DoFTools::always;
1699         else
1700           coupling[c][d] = DoFTools::none;
1701 
1702     DoFTools::make_sparsity_pattern(stokes_dof_handler,
1703                                     coupling,
1704                                     sp,
1705                                     stokes_constraints,
1706                                     false,
1707                                     Utilities::MPI::this_mpi_process(
1708                                       MPI_COMM_WORLD));
1709     sp.compress();
1710 
1711     stokes_matrix.reinit(sp);
1712   }
1713 
1714 
1715 
1716   template <int dim>
setup_stokes_preconditioner(const std::vector<IndexSet> & stokes_partitioning,const std::vector<IndexSet> & stokes_relevant_partitioning)1717   void BoussinesqFlowProblem<dim>::setup_stokes_preconditioner(
1718     const std::vector<IndexSet> &stokes_partitioning,
1719     const std::vector<IndexSet> &stokes_relevant_partitioning)
1720   {
1721     Amg_preconditioner.reset();
1722     Mp_preconditioner.reset();
1723 
1724     stokes_preconditioner_matrix.clear();
1725 
1726     TrilinosWrappers::BlockSparsityPattern sp(stokes_partitioning,
1727                                               stokes_partitioning,
1728                                               stokes_relevant_partitioning,
1729                                               MPI_COMM_WORLD);
1730 
1731     Table<2, DoFTools::Coupling> coupling(dim + 1, dim + 1);
1732     for (unsigned int c = 0; c < dim + 1; ++c)
1733       for (unsigned int d = 0; d < dim + 1; ++d)
1734         if (c == d)
1735           coupling[c][d] = DoFTools::always;
1736         else
1737           coupling[c][d] = DoFTools::none;
1738 
1739     DoFTools::make_sparsity_pattern(stokes_dof_handler,
1740                                     coupling,
1741                                     sp,
1742                                     stokes_constraints,
1743                                     false,
1744                                     Utilities::MPI::this_mpi_process(
1745                                       MPI_COMM_WORLD));
1746     sp.compress();
1747 
1748     stokes_preconditioner_matrix.reinit(sp);
1749   }
1750 
1751 
1752   template <int dim>
setup_temperature_matrices(const IndexSet & temperature_partitioner,const IndexSet & temperature_relevant_partitioner)1753   void BoussinesqFlowProblem<dim>::setup_temperature_matrices(
1754     const IndexSet &temperature_partitioner,
1755     const IndexSet &temperature_relevant_partitioner)
1756   {
1757     T_preconditioner.reset();
1758     temperature_mass_matrix.clear();
1759     temperature_stiffness_matrix.clear();
1760     temperature_matrix.clear();
1761 
1762     TrilinosWrappers::SparsityPattern sp(temperature_partitioner,
1763                                          temperature_partitioner,
1764                                          temperature_relevant_partitioner,
1765                                          MPI_COMM_WORLD);
1766     DoFTools::make_sparsity_pattern(temperature_dof_handler,
1767                                     sp,
1768                                     temperature_constraints,
1769                                     false,
1770                                     Utilities::MPI::this_mpi_process(
1771                                       MPI_COMM_WORLD));
1772     sp.compress();
1773 
1774     temperature_matrix.reinit(sp);
1775     temperature_mass_matrix.reinit(sp);
1776     temperature_stiffness_matrix.reinit(sp);
1777   }
1778 
1779 
1780 
1781   // The remainder of the setup function (after splitting out the three
1782   // functions above) mostly has to deal with the things we need to do for
1783   // parallelization across processors. Because setting all of this up is a
1784   // significant compute time expense of the program, we put everything we do
1785   // here into a timer group so that we can get summary information about the
1786   // fraction of time spent in this part of the program at its end.
1787   //
1788   // At the top as usual we enumerate degrees of freedom and sort them by
1789   // component/block, followed by writing their numbers to the screen from
1790   // processor zero. The DoFHandler::distributed_dofs() function, when applied
1791   // to a parallel::distributed::Triangulation object, sorts degrees of
1792   // freedom in such a way that all degrees of freedom associated with
1793   // subdomain zero come before all those associated with subdomain one,
1794   // etc. For the Stokes part, this entails, however, that velocities and
1795   // pressures become intermixed, but this is trivially solved by sorting
1796   // again by blocks; it is worth noting that this latter operation leaves the
1797   // relative ordering of all velocities and pressures alone, i.e. within the
1798   // velocity block we will still have all those associated with subdomain
1799   // zero before all velocities associated with subdomain one, etc. This is
1800   // important since we store each of the blocks of this matrix distributed
1801   // across all processors and want this to be done in such a way that each
1802   // processor stores that part of the matrix that is roughly equal to the
1803   // degrees of freedom located on those cells that it will actually work on.
1804   //
1805   // When printing the numbers of degrees of freedom, note that these numbers
1806   // are going to be large if we use many processors. Consequently, we let the
1807   // stream put a comma separator in between every three digits. The state of
1808   // the stream, using the locale, is saved from before to after this
1809   // operation. While slightly opaque, the code works because the default
1810   // locale (which we get using the constructor call
1811   // <code>std::locale("")</code>) implies printing numbers with a comma
1812   // separator for every third digit (i.e., thousands, millions, billions).
1813   //
1814   // In this function as well as many below, we measure how much time
1815   // we spend here and collect that in a section called "Setup dof
1816   // systems" across function invocations. This is done using an
1817   // TimerOutput::Scope object that gets a timer going in the section
1818   // with above name of the `computing_timer` object upon construction
1819   // of the local variable; the timer is stopped again when the
1820   // destructor of the `timing_section` variable is called.  This, of
1821   // course, happens either at the end of the function, or if we leave
1822   // the function through a `return` statement or when an exception is
1823   // thrown somewhere -- in other words, whenever we leave this
1824   // function in any way. The use of such "scope" objects therefore
1825   // makes sure that we do not have to manually add code that tells
1826   // the timer to stop at every location where this function may be
1827   // left.
1828   template <int dim>
setup_dofs()1829   void BoussinesqFlowProblem<dim>::setup_dofs()
1830   {
1831     TimerOutput::Scope timing_section(computing_timer, "Setup dof systems");
1832 
1833     stokes_dof_handler.distribute_dofs(stokes_fe);
1834 
1835     std::vector<unsigned int> stokes_sub_blocks(dim + 1, 0);
1836     stokes_sub_blocks[dim] = 1;
1837     DoFRenumbering::component_wise(stokes_dof_handler, stokes_sub_blocks);
1838 
1839     temperature_dof_handler.distribute_dofs(temperature_fe);
1840 
1841     const std::vector<types::global_dof_index> stokes_dofs_per_block =
1842       DoFTools::count_dofs_per_fe_block(stokes_dof_handler, stokes_sub_blocks);
1843 
1844     const unsigned int n_u = stokes_dofs_per_block[0],
1845                        n_p = stokes_dofs_per_block[1],
1846                        n_T = temperature_dof_handler.n_dofs();
1847 
1848     std::locale s = pcout.get_stream().getloc();
1849     pcout.get_stream().imbue(std::locale(""));
1850     pcout << "Number of active cells: " << triangulation.n_global_active_cells()
1851           << " (on " << triangulation.n_levels() << " levels)" << std::endl
1852           << "Number of degrees of freedom: " << n_u + n_p + n_T << " (" << n_u
1853           << '+' << n_p << '+' << n_T << ')' << std::endl
1854           << std::endl;
1855     pcout.get_stream().imbue(s);
1856 
1857 
1858     // After this, we have to set up the various partitioners (of type
1859     // <code>IndexSet</code>, see the introduction) that describe which parts
1860     // of each matrix or vector will be stored where, then call the functions
1861     // that actually set up the matrices, and at the end also resize the
1862     // various vectors we keep around in this program.
1863     std::vector<IndexSet> stokes_partitioning, stokes_relevant_partitioning;
1864     IndexSet              temperature_partitioning(n_T),
1865       temperature_relevant_partitioning(n_T);
1866     IndexSet stokes_relevant_set;
1867     {
1868       IndexSet stokes_index_set = stokes_dof_handler.locally_owned_dofs();
1869       stokes_partitioning.push_back(stokes_index_set.get_view(0, n_u));
1870       stokes_partitioning.push_back(stokes_index_set.get_view(n_u, n_u + n_p));
1871 
1872       DoFTools::extract_locally_relevant_dofs(stokes_dof_handler,
1873                                               stokes_relevant_set);
1874       stokes_relevant_partitioning.push_back(
1875         stokes_relevant_set.get_view(0, n_u));
1876       stokes_relevant_partitioning.push_back(
1877         stokes_relevant_set.get_view(n_u, n_u + n_p));
1878 
1879       temperature_partitioning = temperature_dof_handler.locally_owned_dofs();
1880       DoFTools::extract_locally_relevant_dofs(
1881         temperature_dof_handler, temperature_relevant_partitioning);
1882     }
1883 
1884     // Following this, we can compute constraints for the solution vectors,
1885     // including hanging node constraints and homogeneous and inhomogeneous
1886     // boundary values for the Stokes and temperature fields. Note that as for
1887     // everything else, the constraint objects can not hold <i>all</i>
1888     // constraints on every processor. Rather, each processor needs to store
1889     // only those that are actually necessary for correctness given that it
1890     // only assembles linear systems on cells it owns. As discussed in the
1891     // @ref distributed_paper "this paper", the set of constraints we need to
1892     // know about is exactly the set of constraints on all locally relevant
1893     // degrees of freedom, so this is what we use to initialize the constraint
1894     // objects.
1895     {
1896       stokes_constraints.clear();
1897       stokes_constraints.reinit(stokes_relevant_set);
1898 
1899       DoFTools::make_hanging_node_constraints(stokes_dof_handler,
1900                                               stokes_constraints);
1901 
1902       FEValuesExtractors::Vector velocity_components(0);
1903       VectorTools::interpolate_boundary_values(
1904         stokes_dof_handler,
1905         0,
1906         Functions::ZeroFunction<dim>(dim + 1),
1907         stokes_constraints,
1908         stokes_fe.component_mask(velocity_components));
1909 
1910       std::set<types::boundary_id> no_normal_flux_boundaries;
1911       no_normal_flux_boundaries.insert(1);
1912       VectorTools::compute_no_normal_flux_constraints(stokes_dof_handler,
1913                                                       0,
1914                                                       no_normal_flux_boundaries,
1915                                                       stokes_constraints,
1916                                                       mapping);
1917       stokes_constraints.close();
1918     }
1919     {
1920       temperature_constraints.clear();
1921       temperature_constraints.reinit(temperature_relevant_partitioning);
1922 
1923       DoFTools::make_hanging_node_constraints(temperature_dof_handler,
1924                                               temperature_constraints);
1925       VectorTools::interpolate_boundary_values(
1926         temperature_dof_handler,
1927         0,
1928         EquationData::TemperatureInitialValues<dim>(),
1929         temperature_constraints);
1930       VectorTools::interpolate_boundary_values(
1931         temperature_dof_handler,
1932         1,
1933         EquationData::TemperatureInitialValues<dim>(),
1934         temperature_constraints);
1935       temperature_constraints.close();
1936     }
1937 
1938     // All this done, we can then initialize the various matrix and vector
1939     // objects to their proper sizes. At the end, we also record that all
1940     // matrices and preconditioners have to be re-computed at the beginning of
1941     // the next time step. Note how we initialize the vectors for the Stokes
1942     // and temperature right hand sides: These are writable vectors (last
1943     // boolean argument set to @p true) that have the correct one-to-one
1944     // partitioning of locally owned elements but are still given the relevant
1945     // partitioning for means of figuring out the vector entries that are
1946     // going to be set right away. As for matrices, this allows for writing
1947     // local contributions into the vector with multiple threads (always
1948     // assuming that the same vector entry is not accessed by multiple threads
1949     // at the same time). The other vectors only allow for read access of
1950     // individual elements, including ghosts, but are not suitable for
1951     // solvers.
1952     setup_stokes_matrix(stokes_partitioning, stokes_relevant_partitioning);
1953     setup_stokes_preconditioner(stokes_partitioning,
1954                                 stokes_relevant_partitioning);
1955     setup_temperature_matrices(temperature_partitioning,
1956                                temperature_relevant_partitioning);
1957 
1958     stokes_rhs.reinit(stokes_partitioning,
1959                       stokes_relevant_partitioning,
1960                       MPI_COMM_WORLD,
1961                       true);
1962     stokes_solution.reinit(stokes_relevant_partitioning, MPI_COMM_WORLD);
1963     old_stokes_solution.reinit(stokes_solution);
1964 
1965     temperature_rhs.reinit(temperature_partitioning,
1966                            temperature_relevant_partitioning,
1967                            MPI_COMM_WORLD,
1968                            true);
1969     temperature_solution.reinit(temperature_relevant_partitioning,
1970                                 MPI_COMM_WORLD);
1971     old_temperature_solution.reinit(temperature_solution);
1972     old_old_temperature_solution.reinit(temperature_solution);
1973 
1974     rebuild_stokes_matrix              = true;
1975     rebuild_stokes_preconditioner      = true;
1976     rebuild_temperature_matrices       = true;
1977     rebuild_temperature_preconditioner = true;
1978   }
1979 
1980 
1981 
1982   // @sect4{The BoussinesqFlowProblem assembly functions}
1983   //
1984   // Following the discussion in the introduction and in the @ref threads
1985   // module, we split the assembly functions into different parts:
1986   //
1987   // <ul> <li> The local calculations of matrices and right hand sides, given
1988   // a certain cell as input (these functions are named
1989   // <code>local_assemble_*</code> below). The resulting function is, in other
1990   // words, essentially the body of the loop over all cells in step-31. Note,
1991   // however, that these functions store the result from the local
1992   // calculations in variables of classes from the CopyData namespace.
1993   //
1994   // <li>These objects are then given to the second step which writes the
1995   // local data into the global data structures (these functions are named
1996   // <code>copy_local_to_global_*</code> below). These functions are pretty
1997   // trivial.
1998   //
1999   // <li>These two subfunctions are then used in the respective assembly
2000   // routine (called <code>assemble_*</code> below), where a WorkStream object
2001   // is set up and runs over all the cells that belong to the processor's
2002   // subdomain.  </ul>
2003 
2004   // @sect5{Stokes preconditioner assembly}
2005   //
2006   // Let us start with the functions that builds the Stokes
2007   // preconditioner. The first two of these are pretty trivial, given the
2008   // discussion above. Note in particular that the main point in using the
2009   // scratch data object is that we want to avoid allocating any objects on
2010   // the free space each time we visit a new cell. As a consequence, the
2011   // assembly function below only has automatic local variables, and
2012   // everything else is accessed through the scratch data object, which is
2013   // allocated only once before we start the loop over all cells:
2014   template <int dim>
local_assemble_stokes_preconditioner(const typename DoFHandler<dim>::active_cell_iterator & cell,Assembly::Scratch::StokesPreconditioner<dim> & scratch,Assembly::CopyData::StokesPreconditioner<dim> & data)2015   void BoussinesqFlowProblem<dim>::local_assemble_stokes_preconditioner(
2016     const typename DoFHandler<dim>::active_cell_iterator &cell,
2017     Assembly::Scratch::StokesPreconditioner<dim> &        scratch,
2018     Assembly::CopyData::StokesPreconditioner<dim> &       data)
2019   {
2020     const unsigned int dofs_per_cell = stokes_fe.n_dofs_per_cell();
2021     const unsigned int n_q_points =
2022       scratch.stokes_fe_values.n_quadrature_points;
2023 
2024     const FEValuesExtractors::Vector velocities(0);
2025     const FEValuesExtractors::Scalar pressure(dim);
2026 
2027     scratch.stokes_fe_values.reinit(cell);
2028     cell->get_dof_indices(data.local_dof_indices);
2029 
2030     data.local_matrix = 0;
2031 
2032     for (unsigned int q = 0; q < n_q_points; ++q)
2033       {
2034         for (unsigned int k = 0; k < dofs_per_cell; ++k)
2035           {
2036             scratch.grad_phi_u[k] =
2037               scratch.stokes_fe_values[velocities].gradient(k, q);
2038             scratch.phi_p[k] = scratch.stokes_fe_values[pressure].value(k, q);
2039           }
2040 
2041         for (unsigned int i = 0; i < dofs_per_cell; ++i)
2042           for (unsigned int j = 0; j < dofs_per_cell; ++j)
2043             data.local_matrix(i, j) +=
2044               (EquationData::eta *
2045                  scalar_product(scratch.grad_phi_u[i], scratch.grad_phi_u[j]) +
2046                (1. / EquationData::eta) * EquationData::pressure_scaling *
2047                  EquationData::pressure_scaling *
2048                  (scratch.phi_p[i] * scratch.phi_p[j])) *
2049               scratch.stokes_fe_values.JxW(q);
2050       }
2051   }
2052 
2053 
2054 
2055   template <int dim>
copy_local_to_global_stokes_preconditioner(const Assembly::CopyData::StokesPreconditioner<dim> & data)2056   void BoussinesqFlowProblem<dim>::copy_local_to_global_stokes_preconditioner(
2057     const Assembly::CopyData::StokesPreconditioner<dim> &data)
2058   {
2059     stokes_constraints.distribute_local_to_global(data.local_matrix,
2060                                                   data.local_dof_indices,
2061                                                   stokes_preconditioner_matrix);
2062   }
2063 
2064 
2065   // Now for the function that actually puts things together, using the
2066   // WorkStream functions.  WorkStream::run needs a start and end iterator to
2067   // enumerate the cells it is supposed to work on. Typically, one would use
2068   // DoFHandler::begin_active() and DoFHandler::end() for that but here we
2069   // actually only want the subset of cells that in fact are owned by the
2070   // current processor. This is where the FilteredIterator class comes into
2071   // play: you give it a range of cells and it provides an iterator that only
2072   // iterates over that subset of cells that satisfy a certain predicate (a
2073   // predicate is a function of one argument that either returns true or
2074   // false). The predicate we use here is IteratorFilters::LocallyOwnedCell,
2075   // i.e., it returns true exactly if the cell is owned by the current
2076   // processor. The resulting iterator range is then exactly what we need.
2077   //
2078   // With this obstacle out of the way, we call the WorkStream::run
2079   // function with this set of cells, scratch and copy objects, and
2080   // with pointers to two functions: the local assembly and
2081   // copy-local-to-global function. These functions need to have very
2082   // specific signatures: three arguments in the first and one
2083   // argument in the latter case (see the documentation of the
2084   // WorkStream::run function for the meaning of these arguments).
2085   // Note how we use a lambda functions to
2086   // create a function object that satisfies this requirement. It uses
2087   // function arguments for the local assembly function that specify
2088   // cell, scratch data, and copy data, as well as function argument
2089   // for the copy function that expects the
2090   // data to be written into the global matrix (also see the discussion in
2091   // step-13's <code>assemble_linear_system()</code> function). On the other
2092   // hand, the implicit zeroth argument of member functions (namely
2093   // the <code>this</code> pointer of the object on which that member
2094   // function is to operate on) is <i>bound</i> to the
2095   // <code>this</code> pointer of the current function and is captured. The
2096   // WorkStream::run function, as a consequence, does not need to know
2097   // anything about the object these functions work on.
2098   //
2099   // When the WorkStream is executed, it will create several local assembly
2100   // routines of the first kind for several cells and let some available
2101   // processors work on them. The function that needs to be synchronized,
2102   // i.e., the write operation into the global matrix, however, is executed by
2103   // only one thread at a time in the prescribed order. Of course, this only
2104   // holds for the parallelization on a single MPI process. Different MPI
2105   // processes will have their own WorkStream objects and do that work
2106   // completely independently (and in different memory spaces). In a
2107   // distributed calculation, some data will accumulate at degrees of freedom
2108   // that are not owned by the respective processor. It would be inefficient
2109   // to send data around every time we encounter such a dof. What happens
2110   // instead is that the Trilinos sparse matrix will keep that data and send
2111   // it to the owner at the end of assembly, by calling the
2112   // <code>compress()</code> command.
2113   template <int dim>
assemble_stokes_preconditioner()2114   void BoussinesqFlowProblem<dim>::assemble_stokes_preconditioner()
2115   {
2116     stokes_preconditioner_matrix = 0;
2117 
2118     const QGauss<dim> quadrature_formula(parameters.stokes_velocity_degree + 1);
2119 
2120     using CellFilter =
2121       FilteredIterator<typename DoFHandler<2>::active_cell_iterator>;
2122 
2123     auto worker =
2124       [this](const typename DoFHandler<dim>::active_cell_iterator &cell,
2125              Assembly::Scratch::StokesPreconditioner<dim> &        scratch,
2126              Assembly::CopyData::StokesPreconditioner<dim> &       data) {
2127         this->local_assemble_stokes_preconditioner(cell, scratch, data);
2128       };
2129 
2130     auto copier =
2131       [this](const Assembly::CopyData::StokesPreconditioner<dim> &data) {
2132         this->copy_local_to_global_stokes_preconditioner(data);
2133       };
2134 
2135     WorkStream::run(CellFilter(IteratorFilters::LocallyOwnedCell(),
2136                                stokes_dof_handler.begin_active()),
2137                     CellFilter(IteratorFilters::LocallyOwnedCell(),
2138                                stokes_dof_handler.end()),
2139                     worker,
2140                     copier,
2141                     Assembly::Scratch::StokesPreconditioner<dim>(
2142                       stokes_fe,
2143                       quadrature_formula,
2144                       mapping,
2145                       update_JxW_values | update_values | update_gradients),
2146                     Assembly::CopyData::StokesPreconditioner<dim>(stokes_fe));
2147 
2148     stokes_preconditioner_matrix.compress(VectorOperation::add);
2149   }
2150 
2151 
2152 
2153   // The final function in this block initiates assembly of the Stokes
2154   // preconditioner matrix and then in fact builds the Stokes
2155   // preconditioner. It is mostly the same as in the serial case. The only
2156   // difference to step-31 is that we use a Jacobi preconditioner for the
2157   // pressure mass matrix instead of IC, as discussed in the introduction.
2158   template <int dim>
build_stokes_preconditioner()2159   void BoussinesqFlowProblem<dim>::build_stokes_preconditioner()
2160   {
2161     if (rebuild_stokes_preconditioner == false)
2162       return;
2163 
2164     TimerOutput::Scope timer_section(computing_timer,
2165                                      "   Build Stokes preconditioner");
2166     pcout << "   Rebuilding Stokes preconditioner..." << std::flush;
2167 
2168     assemble_stokes_preconditioner();
2169 
2170     std::vector<std::vector<bool>> constant_modes;
2171     FEValuesExtractors::Vector     velocity_components(0);
2172     DoFTools::extract_constant_modes(stokes_dof_handler,
2173                                      stokes_fe.component_mask(
2174                                        velocity_components),
2175                                      constant_modes);
2176 
2177     Mp_preconditioner =
2178       std::make_shared<TrilinosWrappers::PreconditionJacobi>();
2179     Amg_preconditioner = std::make_shared<TrilinosWrappers::PreconditionAMG>();
2180 
2181     TrilinosWrappers::PreconditionAMG::AdditionalData Amg_data;
2182     Amg_data.constant_modes        = constant_modes;
2183     Amg_data.elliptic              = true;
2184     Amg_data.higher_order_elements = true;
2185     Amg_data.smoother_sweeps       = 2;
2186     Amg_data.aggregation_threshold = 0.02;
2187 
2188     Mp_preconditioner->initialize(stokes_preconditioner_matrix.block(1, 1));
2189     Amg_preconditioner->initialize(stokes_preconditioner_matrix.block(0, 0),
2190                                    Amg_data);
2191 
2192     rebuild_stokes_preconditioner = false;
2193 
2194     pcout << std::endl;
2195   }
2196 
2197 
2198   // @sect5{Stokes system assembly}
2199 
2200   // The next three functions implement the assembly of the Stokes system,
2201   // again split up into a part performing local calculations, one for writing
2202   // the local data into the global matrix and vector, and one for actually
2203   // running the loop over all cells with the help of the WorkStream
2204   // class. Note that the assembly of the Stokes matrix needs only to be done
2205   // in case we have changed the mesh. Otherwise, just the
2206   // (temperature-dependent) right hand side needs to be calculated
2207   // here. Since we are working with distributed matrices and vectors, we have
2208   // to call the respective <code>compress()</code> functions in the end of
2209   // the assembly in order to send non-local data to the owner process.
2210   template <int dim>
local_assemble_stokes_system(const typename DoFHandler<dim>::active_cell_iterator & cell,Assembly::Scratch::StokesSystem<dim> & scratch,Assembly::CopyData::StokesSystem<dim> & data)2211   void BoussinesqFlowProblem<dim>::local_assemble_stokes_system(
2212     const typename DoFHandler<dim>::active_cell_iterator &cell,
2213     Assembly::Scratch::StokesSystem<dim> &                scratch,
2214     Assembly::CopyData::StokesSystem<dim> &               data)
2215   {
2216     const unsigned int dofs_per_cell =
2217       scratch.stokes_fe_values.get_fe().n_dofs_per_cell();
2218     const unsigned int n_q_points =
2219       scratch.stokes_fe_values.n_quadrature_points;
2220 
2221     const FEValuesExtractors::Vector velocities(0);
2222     const FEValuesExtractors::Scalar pressure(dim);
2223 
2224     scratch.stokes_fe_values.reinit(cell);
2225 
2226     typename DoFHandler<dim>::active_cell_iterator temperature_cell(
2227       &triangulation, cell->level(), cell->index(), &temperature_dof_handler);
2228     scratch.temperature_fe_values.reinit(temperature_cell);
2229 
2230     if (rebuild_stokes_matrix)
2231       data.local_matrix = 0;
2232     data.local_rhs = 0;
2233 
2234     scratch.temperature_fe_values.get_function_values(
2235       old_temperature_solution, scratch.old_temperature_values);
2236 
2237     for (unsigned int q = 0; q < n_q_points; ++q)
2238       {
2239         const double old_temperature = scratch.old_temperature_values[q];
2240 
2241         for (unsigned int k = 0; k < dofs_per_cell; ++k)
2242           {
2243             scratch.phi_u[k] = scratch.stokes_fe_values[velocities].value(k, q);
2244             if (rebuild_stokes_matrix)
2245               {
2246                 scratch.grads_phi_u[k] =
2247                   scratch.stokes_fe_values[velocities].symmetric_gradient(k, q);
2248                 scratch.div_phi_u[k] =
2249                   scratch.stokes_fe_values[velocities].divergence(k, q);
2250                 scratch.phi_p[k] =
2251                   scratch.stokes_fe_values[pressure].value(k, q);
2252               }
2253           }
2254 
2255         if (rebuild_stokes_matrix == true)
2256           for (unsigned int i = 0; i < dofs_per_cell; ++i)
2257             for (unsigned int j = 0; j < dofs_per_cell; ++j)
2258               data.local_matrix(i, j) +=
2259                 (EquationData::eta * 2 *
2260                    (scratch.grads_phi_u[i] * scratch.grads_phi_u[j]) -
2261                  (EquationData::pressure_scaling * scratch.div_phi_u[i] *
2262                   scratch.phi_p[j]) -
2263                  (EquationData::pressure_scaling * scratch.phi_p[i] *
2264                   scratch.div_phi_u[j])) *
2265                 scratch.stokes_fe_values.JxW(q);
2266 
2267         const Tensor<1, dim> gravity = EquationData::gravity_vector(
2268           scratch.stokes_fe_values.quadrature_point(q));
2269 
2270         for (unsigned int i = 0; i < dofs_per_cell; ++i)
2271           data.local_rhs(i) += (EquationData::density(old_temperature) *
2272                                 gravity * scratch.phi_u[i]) *
2273                                scratch.stokes_fe_values.JxW(q);
2274       }
2275 
2276     cell->get_dof_indices(data.local_dof_indices);
2277   }
2278 
2279 
2280 
2281   template <int dim>
copy_local_to_global_stokes_system(const Assembly::CopyData::StokesSystem<dim> & data)2282   void BoussinesqFlowProblem<dim>::copy_local_to_global_stokes_system(
2283     const Assembly::CopyData::StokesSystem<dim> &data)
2284   {
2285     if (rebuild_stokes_matrix == true)
2286       stokes_constraints.distribute_local_to_global(data.local_matrix,
2287                                                     data.local_rhs,
2288                                                     data.local_dof_indices,
2289                                                     stokes_matrix,
2290                                                     stokes_rhs);
2291     else
2292       stokes_constraints.distribute_local_to_global(data.local_rhs,
2293                                                     data.local_dof_indices,
2294                                                     stokes_rhs);
2295   }
2296 
2297 
2298 
2299   template <int dim>
assemble_stokes_system()2300   void BoussinesqFlowProblem<dim>::assemble_stokes_system()
2301   {
2302     TimerOutput::Scope timer_section(computing_timer,
2303                                      "   Assemble Stokes system");
2304 
2305     if (rebuild_stokes_matrix == true)
2306       stokes_matrix = 0;
2307 
2308     stokes_rhs = 0;
2309 
2310     const QGauss<dim> quadrature_formula(parameters.stokes_velocity_degree + 1);
2311 
2312     using CellFilter =
2313       FilteredIterator<typename DoFHandler<2>::active_cell_iterator>;
2314 
2315     WorkStream::run(
2316       CellFilter(IteratorFilters::LocallyOwnedCell(),
2317                  stokes_dof_handler.begin_active()),
2318       CellFilter(IteratorFilters::LocallyOwnedCell(), stokes_dof_handler.end()),
2319       [this](const typename DoFHandler<dim>::active_cell_iterator &cell,
2320              Assembly::Scratch::StokesSystem<dim> &                scratch,
2321              Assembly::CopyData::StokesSystem<dim> &               data) {
2322         this->local_assemble_stokes_system(cell, scratch, data);
2323       },
2324       [this](const Assembly::CopyData::StokesSystem<dim> &data) {
2325         this->copy_local_to_global_stokes_system(data);
2326       },
2327       Assembly::Scratch::StokesSystem<dim>(
2328         stokes_fe,
2329         mapping,
2330         quadrature_formula,
2331         (update_values | update_quadrature_points | update_JxW_values |
2332          (rebuild_stokes_matrix == true ? update_gradients : UpdateFlags(0))),
2333         temperature_fe,
2334         update_values),
2335       Assembly::CopyData::StokesSystem<dim>(stokes_fe));
2336 
2337     if (rebuild_stokes_matrix == true)
2338       stokes_matrix.compress(VectorOperation::add);
2339     stokes_rhs.compress(VectorOperation::add);
2340 
2341     rebuild_stokes_matrix = false;
2342 
2343     pcout << std::endl;
2344   }
2345 
2346 
2347   // @sect5{Temperature matrix assembly}
2348 
2349   // The task to be performed by the next three functions is to calculate a
2350   // mass matrix and a Laplace matrix on the temperature system. These will be
2351   // combined in order to yield the semi-implicit time stepping matrix that
2352   // consists of the mass matrix plus a time step-dependent weight factor
2353   // times the Laplace matrix. This function is again essentially the body of
2354   // the loop over all cells from step-31.
2355   //
2356   // The two following functions perform similar services as the ones above.
2357   template <int dim>
local_assemble_temperature_matrix(const typename DoFHandler<dim>::active_cell_iterator & cell,Assembly::Scratch::TemperatureMatrix<dim> & scratch,Assembly::CopyData::TemperatureMatrix<dim> & data)2358   void BoussinesqFlowProblem<dim>::local_assemble_temperature_matrix(
2359     const typename DoFHandler<dim>::active_cell_iterator &cell,
2360     Assembly::Scratch::TemperatureMatrix<dim> &           scratch,
2361     Assembly::CopyData::TemperatureMatrix<dim> &          data)
2362   {
2363     const unsigned int dofs_per_cell =
2364       scratch.temperature_fe_values.get_fe().n_dofs_per_cell();
2365     const unsigned int n_q_points =
2366       scratch.temperature_fe_values.n_quadrature_points;
2367 
2368     scratch.temperature_fe_values.reinit(cell);
2369     cell->get_dof_indices(data.local_dof_indices);
2370 
2371     data.local_mass_matrix      = 0;
2372     data.local_stiffness_matrix = 0;
2373 
2374     for (unsigned int q = 0; q < n_q_points; ++q)
2375       {
2376         for (unsigned int k = 0; k < dofs_per_cell; ++k)
2377           {
2378             scratch.grad_phi_T[k] =
2379               scratch.temperature_fe_values.shape_grad(k, q);
2380             scratch.phi_T[k] = scratch.temperature_fe_values.shape_value(k, q);
2381           }
2382 
2383         for (unsigned int i = 0; i < dofs_per_cell; ++i)
2384           for (unsigned int j = 0; j < dofs_per_cell; ++j)
2385             {
2386               data.local_mass_matrix(i, j) +=
2387                 (scratch.phi_T[i] * scratch.phi_T[j] *
2388                  scratch.temperature_fe_values.JxW(q));
2389               data.local_stiffness_matrix(i, j) +=
2390                 (EquationData::kappa * scratch.grad_phi_T[i] *
2391                  scratch.grad_phi_T[j] * scratch.temperature_fe_values.JxW(q));
2392             }
2393       }
2394   }
2395 
2396 
2397 
2398   template <int dim>
copy_local_to_global_temperature_matrix(const Assembly::CopyData::TemperatureMatrix<dim> & data)2399   void BoussinesqFlowProblem<dim>::copy_local_to_global_temperature_matrix(
2400     const Assembly::CopyData::TemperatureMatrix<dim> &data)
2401   {
2402     temperature_constraints.distribute_local_to_global(data.local_mass_matrix,
2403                                                        data.local_dof_indices,
2404                                                        temperature_mass_matrix);
2405     temperature_constraints.distribute_local_to_global(
2406       data.local_stiffness_matrix,
2407       data.local_dof_indices,
2408       temperature_stiffness_matrix);
2409   }
2410 
2411 
2412   template <int dim>
assemble_temperature_matrix()2413   void BoussinesqFlowProblem<dim>::assemble_temperature_matrix()
2414   {
2415     if (rebuild_temperature_matrices == false)
2416       return;
2417 
2418     TimerOutput::Scope timer_section(computing_timer,
2419                                      "   Assemble temperature matrices");
2420     temperature_mass_matrix      = 0;
2421     temperature_stiffness_matrix = 0;
2422 
2423     const QGauss<dim> quadrature_formula(parameters.temperature_degree + 2);
2424 
2425     using CellFilter =
2426       FilteredIterator<typename DoFHandler<2>::active_cell_iterator>;
2427 
2428     WorkStream::run(
2429       CellFilter(IteratorFilters::LocallyOwnedCell(),
2430                  temperature_dof_handler.begin_active()),
2431       CellFilter(IteratorFilters::LocallyOwnedCell(),
2432                  temperature_dof_handler.end()),
2433       [this](const typename DoFHandler<dim>::active_cell_iterator &cell,
2434              Assembly::Scratch::TemperatureMatrix<dim> &           scratch,
2435              Assembly::CopyData::TemperatureMatrix<dim> &          data) {
2436         this->local_assemble_temperature_matrix(cell, scratch, data);
2437       },
2438       [this](const Assembly::CopyData::TemperatureMatrix<dim> &data) {
2439         this->copy_local_to_global_temperature_matrix(data);
2440       },
2441       Assembly::Scratch::TemperatureMatrix<dim>(temperature_fe,
2442                                                 mapping,
2443                                                 quadrature_formula),
2444       Assembly::CopyData::TemperatureMatrix<dim>(temperature_fe));
2445 
2446     temperature_mass_matrix.compress(VectorOperation::add);
2447     temperature_stiffness_matrix.compress(VectorOperation::add);
2448 
2449     rebuild_temperature_matrices       = false;
2450     rebuild_temperature_preconditioner = true;
2451   }
2452 
2453 
2454   // @sect5{Temperature right hand side assembly}
2455 
2456   // This is the last assembly function. It calculates the right hand side of
2457   // the temperature system, which includes the convection and the
2458   // stabilization terms. It includes a lot of evaluations of old solutions at
2459   // the quadrature points (which are necessary for calculating the artificial
2460   // viscosity of stabilization), but is otherwise similar to the other
2461   // assembly functions. Notice, once again, how we resolve the dilemma of
2462   // having inhomogeneous boundary conditions, by just making a right hand
2463   // side at this point (compare the comments for the <code>project()</code>
2464   // function above): We create some matrix columns with exactly the values
2465   // that would be entered for the temperature stiffness matrix, in case we
2466   // have inhomogeneously constrained dofs. That will account for the correct
2467   // balance of the right hand side vector with the matrix system of
2468   // temperature.
2469   template <int dim>
local_assemble_temperature_rhs(const std::pair<double,double> global_T_range,const double global_max_velocity,const double global_entropy_variation,const typename DoFHandler<dim>::active_cell_iterator & cell,Assembly::Scratch::TemperatureRHS<dim> & scratch,Assembly::CopyData::TemperatureRHS<dim> & data)2470   void BoussinesqFlowProblem<dim>::local_assemble_temperature_rhs(
2471     const std::pair<double, double> global_T_range,
2472     const double                    global_max_velocity,
2473     const double                    global_entropy_variation,
2474     const typename DoFHandler<dim>::active_cell_iterator &cell,
2475     Assembly::Scratch::TemperatureRHS<dim> &              scratch,
2476     Assembly::CopyData::TemperatureRHS<dim> &             data)
2477   {
2478     const bool use_bdf2_scheme = (timestep_number != 0);
2479 
2480     const unsigned int dofs_per_cell =
2481       scratch.temperature_fe_values.get_fe().n_dofs_per_cell();
2482     const unsigned int n_q_points =
2483       scratch.temperature_fe_values.n_quadrature_points;
2484 
2485     const FEValuesExtractors::Vector velocities(0);
2486 
2487     data.local_rhs     = 0;
2488     data.matrix_for_bc = 0;
2489     cell->get_dof_indices(data.local_dof_indices);
2490 
2491     scratch.temperature_fe_values.reinit(cell);
2492 
2493     typename DoFHandler<dim>::active_cell_iterator stokes_cell(
2494       &triangulation, cell->level(), cell->index(), &stokes_dof_handler);
2495     scratch.stokes_fe_values.reinit(stokes_cell);
2496 
2497     scratch.temperature_fe_values.get_function_values(
2498       old_temperature_solution, scratch.old_temperature_values);
2499     scratch.temperature_fe_values.get_function_values(
2500       old_old_temperature_solution, scratch.old_old_temperature_values);
2501 
2502     scratch.temperature_fe_values.get_function_gradients(
2503       old_temperature_solution, scratch.old_temperature_grads);
2504     scratch.temperature_fe_values.get_function_gradients(
2505       old_old_temperature_solution, scratch.old_old_temperature_grads);
2506 
2507     scratch.temperature_fe_values.get_function_laplacians(
2508       old_temperature_solution, scratch.old_temperature_laplacians);
2509     scratch.temperature_fe_values.get_function_laplacians(
2510       old_old_temperature_solution, scratch.old_old_temperature_laplacians);
2511 
2512     scratch.stokes_fe_values[velocities].get_function_values(
2513       stokes_solution, scratch.old_velocity_values);
2514     scratch.stokes_fe_values[velocities].get_function_values(
2515       old_stokes_solution, scratch.old_old_velocity_values);
2516     scratch.stokes_fe_values[velocities].get_function_symmetric_gradients(
2517       stokes_solution, scratch.old_strain_rates);
2518     scratch.stokes_fe_values[velocities].get_function_symmetric_gradients(
2519       old_stokes_solution, scratch.old_old_strain_rates);
2520 
2521     const double nu =
2522       compute_viscosity(scratch.old_temperature_values,
2523                         scratch.old_old_temperature_values,
2524                         scratch.old_temperature_grads,
2525                         scratch.old_old_temperature_grads,
2526                         scratch.old_temperature_laplacians,
2527                         scratch.old_old_temperature_laplacians,
2528                         scratch.old_velocity_values,
2529                         scratch.old_old_velocity_values,
2530                         scratch.old_strain_rates,
2531                         scratch.old_old_strain_rates,
2532                         global_max_velocity,
2533                         global_T_range.second - global_T_range.first,
2534                         0.5 * (global_T_range.second + global_T_range.first),
2535                         global_entropy_variation,
2536                         cell->diameter());
2537 
2538     for (unsigned int q = 0; q < n_q_points; ++q)
2539       {
2540         for (unsigned int k = 0; k < dofs_per_cell; ++k)
2541           {
2542             scratch.phi_T[k] = scratch.temperature_fe_values.shape_value(k, q);
2543             scratch.grad_phi_T[k] =
2544               scratch.temperature_fe_values.shape_grad(k, q);
2545           }
2546 
2547 
2548         const double T_term_for_rhs =
2549           (use_bdf2_scheme ?
2550              (scratch.old_temperature_values[q] *
2551                 (1 + time_step / old_time_step) -
2552               scratch.old_old_temperature_values[q] * (time_step * time_step) /
2553                 (old_time_step * (time_step + old_time_step))) :
2554              scratch.old_temperature_values[q]);
2555 
2556         const double ext_T =
2557           (use_bdf2_scheme ? (scratch.old_temperature_values[q] *
2558                                 (1 + time_step / old_time_step) -
2559                               scratch.old_old_temperature_values[q] *
2560                                 time_step / old_time_step) :
2561                              scratch.old_temperature_values[q]);
2562 
2563         const Tensor<1, dim> ext_grad_T =
2564           (use_bdf2_scheme ? (scratch.old_temperature_grads[q] *
2565                                 (1 + time_step / old_time_step) -
2566                               scratch.old_old_temperature_grads[q] * time_step /
2567                                 old_time_step) :
2568                              scratch.old_temperature_grads[q]);
2569 
2570         const Tensor<1, dim> extrapolated_u =
2571           (use_bdf2_scheme ?
2572              (scratch.old_velocity_values[q] * (1 + time_step / old_time_step) -
2573               scratch.old_old_velocity_values[q] * time_step / old_time_step) :
2574              scratch.old_velocity_values[q]);
2575 
2576         const SymmetricTensor<2, dim> extrapolated_strain_rate =
2577           (use_bdf2_scheme ?
2578              (scratch.old_strain_rates[q] * (1 + time_step / old_time_step) -
2579               scratch.old_old_strain_rates[q] * time_step / old_time_step) :
2580              scratch.old_strain_rates[q]);
2581 
2582         const double gamma =
2583           ((EquationData::radiogenic_heating * EquationData::density(ext_T) +
2584             2 * EquationData::eta * extrapolated_strain_rate *
2585               extrapolated_strain_rate) /
2586            (EquationData::density(ext_T) * EquationData::specific_heat));
2587 
2588         for (unsigned int i = 0; i < dofs_per_cell; ++i)
2589           {
2590             data.local_rhs(i) +=
2591               (T_term_for_rhs * scratch.phi_T[i] -
2592                time_step * extrapolated_u * ext_grad_T * scratch.phi_T[i] -
2593                time_step * nu * ext_grad_T * scratch.grad_phi_T[i] +
2594                time_step * gamma * scratch.phi_T[i]) *
2595               scratch.temperature_fe_values.JxW(q);
2596 
2597             if (temperature_constraints.is_inhomogeneously_constrained(
2598                   data.local_dof_indices[i]))
2599               {
2600                 for (unsigned int j = 0; j < dofs_per_cell; ++j)
2601                   data.matrix_for_bc(j, i) +=
2602                     (scratch.phi_T[i] * scratch.phi_T[j] *
2603                        (use_bdf2_scheme ? ((2 * time_step + old_time_step) /
2604                                            (time_step + old_time_step)) :
2605                                           1.) +
2606                      scratch.grad_phi_T[i] * scratch.grad_phi_T[j] *
2607                        EquationData::kappa * time_step) *
2608                     scratch.temperature_fe_values.JxW(q);
2609               }
2610           }
2611       }
2612   }
2613 
2614 
2615   template <int dim>
copy_local_to_global_temperature_rhs(const Assembly::CopyData::TemperatureRHS<dim> & data)2616   void BoussinesqFlowProblem<dim>::copy_local_to_global_temperature_rhs(
2617     const Assembly::CopyData::TemperatureRHS<dim> &data)
2618   {
2619     temperature_constraints.distribute_local_to_global(data.local_rhs,
2620                                                        data.local_dof_indices,
2621                                                        temperature_rhs,
2622                                                        data.matrix_for_bc);
2623   }
2624 
2625 
2626 
2627   // In the function that runs the WorkStream for actually calculating the
2628   // right hand side, we also generate the final matrix. As mentioned above,
2629   // it is a sum of the mass matrix and the Laplace matrix, times some time
2630   // step-dependent weight. This weight is specified by the BDF-2 time
2631   // integration scheme, see the introduction in step-31. What is new in this
2632   // tutorial program (in addition to the use of MPI parallelization and the
2633   // WorkStream class), is that we now precompute the temperature
2634   // preconditioner as well. The reason is that the setup of the Jacobi
2635   // preconditioner takes a noticeable time compared to the solver because we
2636   // usually only need between 10 and 20 iterations for solving the
2637   // temperature system (this might sound strange, as Jacobi really only
2638   // consists of a diagonal, but in Trilinos it is derived from more general
2639   // framework for point relaxation preconditioners which is a bit
2640   // inefficient). Hence, it is more efficient to precompute the
2641   // preconditioner, even though the matrix entries may slightly change
2642   // because the time step might change. This is not too big a problem because
2643   // we remesh every few time steps (and regenerate the preconditioner then).
2644   template <int dim>
assemble_temperature_system(const double maximal_velocity)2645   void BoussinesqFlowProblem<dim>::assemble_temperature_system(
2646     const double maximal_velocity)
2647   {
2648     const bool use_bdf2_scheme = (timestep_number != 0);
2649 
2650     if (use_bdf2_scheme == true)
2651       {
2652         temperature_matrix.copy_from(temperature_mass_matrix);
2653         temperature_matrix *=
2654           (2 * time_step + old_time_step) / (time_step + old_time_step);
2655         temperature_matrix.add(time_step, temperature_stiffness_matrix);
2656       }
2657     else
2658       {
2659         temperature_matrix.copy_from(temperature_mass_matrix);
2660         temperature_matrix.add(time_step, temperature_stiffness_matrix);
2661       }
2662 
2663     if (rebuild_temperature_preconditioner == true)
2664       {
2665         T_preconditioner =
2666           std::make_shared<TrilinosWrappers::PreconditionJacobi>();
2667         T_preconditioner->initialize(temperature_matrix);
2668         rebuild_temperature_preconditioner = false;
2669       }
2670 
2671     // The next part is computing the right hand side vectors.  To do so, we
2672     // first compute the average temperature $T_m$ that we use for evaluating
2673     // the artificial viscosity stabilization through the residual $E(T) =
2674     // (T-T_m)^2$. We do this by defining the midpoint between maximum and
2675     // minimum temperature as average temperature in the definition of the
2676     // entropy viscosity. An alternative would be to use the integral average,
2677     // but the results are not very sensitive to this choice. The rest then
2678     // only requires calling WorkStream::run again, binding the arguments to
2679     // the <code>local_assemble_temperature_rhs</code> function that are the
2680     // same in every call to the correct values:
2681     temperature_rhs = 0;
2682 
2683     const QGauss<dim> quadrature_formula(parameters.temperature_degree + 2);
2684     const std::pair<double, double> global_T_range =
2685       get_extrapolated_temperature_range();
2686 
2687     const double average_temperature =
2688       0.5 * (global_T_range.first + global_T_range.second);
2689     const double global_entropy_variation =
2690       get_entropy_variation(average_temperature);
2691 
2692     using CellFilter =
2693       FilteredIterator<typename DoFHandler<2>::active_cell_iterator>;
2694 
2695     auto worker =
2696       [this, global_T_range, maximal_velocity, global_entropy_variation](
2697         const typename DoFHandler<dim>::active_cell_iterator &cell,
2698         Assembly::Scratch::TemperatureRHS<dim> &              scratch,
2699         Assembly::CopyData::TemperatureRHS<dim> &             data) {
2700         this->local_assemble_temperature_rhs(global_T_range,
2701                                              maximal_velocity,
2702                                              global_entropy_variation,
2703                                              cell,
2704                                              scratch,
2705                                              data);
2706       };
2707 
2708     auto copier = [this](const Assembly::CopyData::TemperatureRHS<dim> &data) {
2709       this->copy_local_to_global_temperature_rhs(data);
2710     };
2711 
2712     WorkStream::run(CellFilter(IteratorFilters::LocallyOwnedCell(),
2713                                temperature_dof_handler.begin_active()),
2714                     CellFilter(IteratorFilters::LocallyOwnedCell(),
2715                                temperature_dof_handler.end()),
2716                     worker,
2717                     copier,
2718                     Assembly::Scratch::TemperatureRHS<dim>(
2719                       temperature_fe, stokes_fe, mapping, quadrature_formula),
2720                     Assembly::CopyData::TemperatureRHS<dim>(temperature_fe));
2721 
2722     temperature_rhs.compress(VectorOperation::add);
2723   }
2724 
2725 
2726 
2727   // @sect4{BoussinesqFlowProblem::solve}
2728 
2729   // This function solves the linear systems in each time step of the
2730   // Boussinesq problem. First, we work on the Stokes system and then on the
2731   // temperature system. In essence, it does the same things as the respective
2732   // function in step-31. However, there are a few changes here.
2733   //
2734   // The first change is related to the way we store our solution: we keep the
2735   // vectors with locally owned degrees of freedom plus ghost nodes on each
2736   // MPI node. When we enter a solver which is supposed to perform
2737   // matrix-vector products with a distributed matrix, this is not the
2738   // appropriate form, though. There, we will want to have the solution vector
2739   // to be distributed in the same way as the matrix, i.e. without any
2740   // ghosts. So what we do first is to generate a distributed vector called
2741   // <code>distributed_stokes_solution</code> and put only the locally owned
2742   // dofs into that, which is neatly done by the <code>operator=</code> of the
2743   // Trilinos vector.
2744   //
2745   // Next, we scale the pressure solution (or rather, the initial guess) for
2746   // the solver so that it matches with the length scales in the matrices, as
2747   // discussed in the introduction. We also immediately scale the pressure
2748   // solution back to the correct units after the solution is completed.  We
2749   // also need to set the pressure values at hanging nodes to zero. This we
2750   // also did in step-31 in order not to disturb the Schur complement by some
2751   // vector entries that actually are irrelevant during the solve stage. As a
2752   // difference to step-31, here we do it only for the locally owned pressure
2753   // dofs. After solving for the Stokes solution, each processor copies the
2754   // distributed solution back into the solution vector that also includes
2755   // ghost elements.
2756   //
2757   // The third and most obvious change is that we have two variants for the
2758   // Stokes solver: A fast solver that sometimes breaks down, and a robust
2759   // solver that is slower. This is what we already discussed in the
2760   // introduction. Here is how we realize it: First, we perform 30 iterations
2761   // with the fast solver based on the simple preconditioner based on the AMG
2762   // V-cycle instead of an approximate solve (this is indicated by the
2763   // <code>false</code> argument to the
2764   // <code>LinearSolvers::BlockSchurPreconditioner</code> object). If we
2765   // converge, everything is fine. If we do not converge, the solver control
2766   // object will throw an exception SolverControl::NoConvergence. Usually,
2767   // this would abort the program because we don't catch them in our usual
2768   // <code>solve()</code> functions. This is certainly not what we want to
2769   // happen here. Rather, we want to switch to the strong solver and continue
2770   // the solution process with whatever vector we got so far. Hence, we catch
2771   // the exception with the C++ try/catch mechanism. We then simply go through
2772   // the same solver sequence again in the <code>catch</code> clause, this
2773   // time passing the @p true flag to the preconditioner for the strong
2774   // solver, signaling an approximate CG solve.
2775   template <int dim>
solve()2776   void BoussinesqFlowProblem<dim>::solve()
2777   {
2778     {
2779       TimerOutput::Scope timer_section(computing_timer,
2780                                        "   Solve Stokes system");
2781 
2782       pcout << "   Solving Stokes system... " << std::flush;
2783 
2784       TrilinosWrappers::MPI::BlockVector distributed_stokes_solution(
2785         stokes_rhs);
2786       distributed_stokes_solution = stokes_solution;
2787 
2788       distributed_stokes_solution.block(1) /= EquationData::pressure_scaling;
2789 
2790       const unsigned int
2791         start = (distributed_stokes_solution.block(0).size() +
2792                  distributed_stokes_solution.block(1).local_range().first),
2793         end   = (distributed_stokes_solution.block(0).size() +
2794                distributed_stokes_solution.block(1).local_range().second);
2795       for (unsigned int i = start; i < end; ++i)
2796         if (stokes_constraints.is_constrained(i))
2797           distributed_stokes_solution(i) = 0;
2798 
2799 
2800       PrimitiveVectorMemory<TrilinosWrappers::MPI::BlockVector> mem;
2801 
2802       unsigned int  n_iterations     = 0;
2803       const double  solver_tolerance = 1e-8 * stokes_rhs.l2_norm();
2804       SolverControl solver_control(30, solver_tolerance);
2805 
2806       try
2807         {
2808           const LinearSolvers::BlockSchurPreconditioner<
2809             TrilinosWrappers::PreconditionAMG,
2810             TrilinosWrappers::PreconditionJacobi>
2811             preconditioner(stokes_matrix,
2812                            stokes_preconditioner_matrix,
2813                            *Mp_preconditioner,
2814                            *Amg_preconditioner,
2815                            false);
2816 
2817           SolverFGMRES<TrilinosWrappers::MPI::BlockVector> solver(
2818             solver_control,
2819             mem,
2820             SolverFGMRES<TrilinosWrappers::MPI::BlockVector>::AdditionalData(
2821               30));
2822           solver.solve(stokes_matrix,
2823                        distributed_stokes_solution,
2824                        stokes_rhs,
2825                        preconditioner);
2826 
2827           n_iterations = solver_control.last_step();
2828         }
2829 
2830       catch (SolverControl::NoConvergence &)
2831         {
2832           const LinearSolvers::BlockSchurPreconditioner<
2833             TrilinosWrappers::PreconditionAMG,
2834             TrilinosWrappers::PreconditionJacobi>
2835             preconditioner(stokes_matrix,
2836                            stokes_preconditioner_matrix,
2837                            *Mp_preconditioner,
2838                            *Amg_preconditioner,
2839                            true);
2840 
2841           SolverControl solver_control_refined(stokes_matrix.m(),
2842                                                solver_tolerance);
2843           SolverFGMRES<TrilinosWrappers::MPI::BlockVector> solver(
2844             solver_control_refined,
2845             mem,
2846             SolverFGMRES<TrilinosWrappers::MPI::BlockVector>::AdditionalData(
2847               50));
2848           solver.solve(stokes_matrix,
2849                        distributed_stokes_solution,
2850                        stokes_rhs,
2851                        preconditioner);
2852 
2853           n_iterations =
2854             (solver_control.last_step() + solver_control_refined.last_step());
2855         }
2856 
2857 
2858       stokes_constraints.distribute(distributed_stokes_solution);
2859 
2860       distributed_stokes_solution.block(1) *= EquationData::pressure_scaling;
2861 
2862       stokes_solution = distributed_stokes_solution;
2863       pcout << n_iterations << " iterations." << std::endl;
2864     }
2865 
2866 
2867     // Now let's turn to the temperature part: First, we compute the time step
2868     // size. We found that we need smaller time steps for 3D than for 2D for
2869     // the shell geometry. This is because the cells are more distorted in
2870     // that case (it is the smallest edge length that determines the CFL
2871     // number). Instead of computing the time step from maximum velocity and
2872     // minimal mesh size as in step-31, we compute local CFL numbers, i.e., on
2873     // each cell we compute the maximum velocity times the mesh size, and
2874     // compute the maximum of them. Hence, we need to choose the factor in
2875     // front of the time step slightly smaller.
2876     //
2877     // After temperature right hand side assembly, we solve the linear system
2878     // for temperature (with fully distributed vectors without any ghosts),
2879     // apply constraints and copy the vector back to one with ghosts.
2880     //
2881     // In the end, we extract the temperature range similarly to step-31 to
2882     // produce some output (for example in order to help us choose the
2883     // stabilization constants, as discussed in the introduction). The only
2884     // difference is that we need to exchange maxima over all processors.
2885     {
2886       TimerOutput::Scope timer_section(computing_timer,
2887                                        "   Assemble temperature rhs");
2888 
2889       old_time_step = time_step;
2890 
2891       const double scaling = (dim == 3 ? 0.25 : 1.0);
2892       time_step            = (scaling / (2.1 * dim * std::sqrt(1. * dim)) /
2893                    (parameters.temperature_degree * get_cfl_number()));
2894 
2895       const double maximal_velocity = get_maximal_velocity();
2896       pcout << "   Maximal velocity: "
2897             << maximal_velocity * EquationData::year_in_seconds * 100
2898             << " cm/year" << std::endl;
2899       pcout << "   "
2900             << "Time step: " << time_step / EquationData::year_in_seconds
2901             << " years" << std::endl;
2902 
2903       temperature_solution = old_temperature_solution;
2904       assemble_temperature_system(maximal_velocity);
2905     }
2906 
2907     {
2908       TimerOutput::Scope timer_section(computing_timer,
2909                                        "   Solve temperature system");
2910 
2911       SolverControl solver_control(temperature_matrix.m(),
2912                                    1e-12 * temperature_rhs.l2_norm());
2913       SolverCG<TrilinosWrappers::MPI::Vector> cg(solver_control);
2914 
2915       TrilinosWrappers::MPI::Vector distributed_temperature_solution(
2916         temperature_rhs);
2917       distributed_temperature_solution = temperature_solution;
2918 
2919       cg.solve(temperature_matrix,
2920                distributed_temperature_solution,
2921                temperature_rhs,
2922                *T_preconditioner);
2923 
2924       temperature_constraints.distribute(distributed_temperature_solution);
2925       temperature_solution = distributed_temperature_solution;
2926 
2927       pcout << "   " << solver_control.last_step()
2928             << " CG iterations for temperature" << std::endl;
2929 
2930       double temperature[2] = {std::numeric_limits<double>::max(),
2931                                -std::numeric_limits<double>::max()};
2932       double global_temperature[2];
2933 
2934       for (unsigned int i =
2935              distributed_temperature_solution.local_range().first;
2936            i < distributed_temperature_solution.local_range().second;
2937            ++i)
2938         {
2939           temperature[0] =
2940             std::min<double>(temperature[0],
2941                              distributed_temperature_solution(i));
2942           temperature[1] =
2943             std::max<double>(temperature[1],
2944                              distributed_temperature_solution(i));
2945         }
2946 
2947       temperature[0] *= -1.0;
2948       Utilities::MPI::max(temperature, MPI_COMM_WORLD, global_temperature);
2949       global_temperature[0] *= -1.0;
2950 
2951       pcout << "   Temperature range: " << global_temperature[0] << ' '
2952             << global_temperature[1] << std::endl;
2953     }
2954   }
2955 
2956 
2957   // @sect4{BoussinesqFlowProblem::output_results}
2958 
2959   // Next comes the function that generates the output. The quantities to
2960   // output could be introduced manually like we did in step-31. An
2961   // alternative is to hand this task over to a class PostProcessor that
2962   // inherits from the class DataPostprocessor, which can be attached to
2963   // DataOut. This allows us to output derived quantities from the solution,
2964   // like the friction heating included in this example. It overloads the
2965   // virtual function DataPostprocessor::evaluate_vector_field(),
2966   // which is then internally called from DataOut::build_patches(). We have to
2967   // give it values of the numerical solution, its derivatives, normals to the
2968   // cell, the actual evaluation points and any additional quantities. This
2969   // follows the same procedure as discussed in step-29 and other programs.
2970   template <int dim>
2971   class BoussinesqFlowProblem<dim>::Postprocessor
2972     : public DataPostprocessor<dim>
2973   {
2974   public:
2975     Postprocessor(const unsigned int partition, const double minimal_pressure);
2976 
2977     virtual void evaluate_vector_field(
2978       const DataPostprocessorInputs::Vector<dim> &inputs,
2979       std::vector<Vector<double>> &computed_quantities) const override;
2980 
2981     virtual std::vector<std::string> get_names() const override;
2982 
2983     virtual std::vector<
2984       DataComponentInterpretation::DataComponentInterpretation>
2985     get_data_component_interpretation() const override;
2986 
2987     virtual UpdateFlags get_needed_update_flags() const override;
2988 
2989   private:
2990     const unsigned int partition;
2991     const double       minimal_pressure;
2992   };
2993 
2994 
2995   template <int dim>
Postprocessor(const unsigned int partition,const double minimal_pressure)2996   BoussinesqFlowProblem<dim>::Postprocessor::Postprocessor(
2997     const unsigned int partition,
2998     const double       minimal_pressure)
2999     : partition(partition)
3000     , minimal_pressure(minimal_pressure)
3001   {}
3002 
3003 
3004   // Here we define the names for the variables we want to output. These are
3005   // the actual solution values for velocity, pressure, and temperature, as
3006   // well as the friction heating and to each cell the number of the processor
3007   // that owns it. This allows us to visualize the partitioning of the domain
3008   // among the processors. Except for the velocity, which is vector-valued,
3009   // all other quantities are scalar.
3010   template <int dim>
3011   std::vector<std::string>
get_names() const3012   BoussinesqFlowProblem<dim>::Postprocessor::get_names() const
3013   {
3014     std::vector<std::string> solution_names(dim, "velocity");
3015     solution_names.emplace_back("p");
3016     solution_names.emplace_back("T");
3017     solution_names.emplace_back("friction_heating");
3018     solution_names.emplace_back("partition");
3019 
3020     return solution_names;
3021   }
3022 
3023 
3024   template <int dim>
3025   std::vector<DataComponentInterpretation::DataComponentInterpretation>
get_data_component_interpretation() const3026   BoussinesqFlowProblem<dim>::Postprocessor::get_data_component_interpretation()
3027     const
3028   {
3029     std::vector<DataComponentInterpretation::DataComponentInterpretation>
3030       interpretation(dim,
3031                      DataComponentInterpretation::component_is_part_of_vector);
3032 
3033     interpretation.push_back(DataComponentInterpretation::component_is_scalar);
3034     interpretation.push_back(DataComponentInterpretation::component_is_scalar);
3035     interpretation.push_back(DataComponentInterpretation::component_is_scalar);
3036     interpretation.push_back(DataComponentInterpretation::component_is_scalar);
3037 
3038     return interpretation;
3039   }
3040 
3041 
3042   template <int dim>
3043   UpdateFlags
get_needed_update_flags() const3044   BoussinesqFlowProblem<dim>::Postprocessor::get_needed_update_flags() const
3045   {
3046     return update_values | update_gradients | update_quadrature_points;
3047   }
3048 
3049 
3050   // Now we implement the function that computes the derived quantities. As we
3051   // also did for the output, we rescale the velocity from its SI units to
3052   // something more readable, namely cm/year. Next, the pressure is scaled to
3053   // be between 0 and the maximum pressure. This makes it more easily
3054   // comparable -- in essence making all pressure variables positive or
3055   // zero. Temperature is taken as is, and the friction heating is computed as
3056   // $2 \eta \varepsilon(\mathbf{u}) \cdot \varepsilon(\mathbf{u})$.
3057   //
3058   // The quantities we output here are more for illustration, rather than for
3059   // actual scientific value. We come back to this briefly in the results
3060   // section of this program and explain what one may in fact be interested in.
3061   template <int dim>
evaluate_vector_field(const DataPostprocessorInputs::Vector<dim> & inputs,std::vector<Vector<double>> & computed_quantities) const3062   void BoussinesqFlowProblem<dim>::Postprocessor::evaluate_vector_field(
3063     const DataPostprocessorInputs::Vector<dim> &inputs,
3064     std::vector<Vector<double>> &               computed_quantities) const
3065   {
3066     const unsigned int n_quadrature_points = inputs.solution_values.size();
3067     Assert(inputs.solution_gradients.size() == n_quadrature_points,
3068            ExcInternalError());
3069     Assert(computed_quantities.size() == n_quadrature_points,
3070            ExcInternalError());
3071     Assert(inputs.solution_values[0].size() == dim + 2, ExcInternalError());
3072 
3073     for (unsigned int q = 0; q < n_quadrature_points; ++q)
3074       {
3075         for (unsigned int d = 0; d < dim; ++d)
3076           computed_quantities[q](d) = (inputs.solution_values[q](d) *
3077                                        EquationData::year_in_seconds * 100);
3078 
3079         const double pressure =
3080           (inputs.solution_values[q](dim) - minimal_pressure);
3081         computed_quantities[q](dim) = pressure;
3082 
3083         const double temperature        = inputs.solution_values[q](dim + 1);
3084         computed_quantities[q](dim + 1) = temperature;
3085 
3086         Tensor<2, dim> grad_u;
3087         for (unsigned int d = 0; d < dim; ++d)
3088           grad_u[d] = inputs.solution_gradients[q][d];
3089         const SymmetricTensor<2, dim> strain_rate = symmetrize(grad_u);
3090         computed_quantities[q](dim + 2) =
3091           2 * EquationData::eta * strain_rate * strain_rate;
3092 
3093         computed_quantities[q](dim + 3) = partition;
3094       }
3095   }
3096 
3097 
3098   // The <code>output_results()</code> function has a similar task to the one
3099   // in step-31. However, here we are going to demonstrate a different
3100   // technique on how to merge output from different DoFHandler objects. The
3101   // way we're going to achieve this recombination is to create a joint
3102   // DoFHandler that collects both components, the Stokes solution and the
3103   // temperature solution. This can be nicely done by combining the finite
3104   // elements from the two systems to form one FESystem, and let this
3105   // collective system define a new DoFHandler object. To be sure that
3106   // everything was done correctly, we perform a sanity check that ensures
3107   // that we got all the dofs from both Stokes and temperature even in the
3108   // combined system. We then combine the data vectors. Unfortunately, there
3109   // is no straight-forward relation that tells us how to sort Stokes and
3110   // temperature vector into the joint vector. The way we can get around this
3111   // trouble is to rely on the information collected in the FESystem. For each
3112   // dof on a cell, the joint finite element knows to which equation component
3113   // (velocity component, pressure, or temperature) it belongs – that's the
3114   // information we need! So we step through all cells (with iterators into
3115   // all three DoFHandlers moving in sync), and for each joint cell dof, we
3116   // read out that component using the FiniteElement::system_to_base_index
3117   // function (see there for a description of what the various parts of its
3118   // return value contain). We also need to keep track whether we're on a
3119   // Stokes dof or a temperature dof, which is contained in
3120   // joint_fe.system_to_base_index(i).first.first. Eventually, the dof_indices
3121   // data structures on either of the three systems tell us how the relation
3122   // between global vector and local dofs looks like on the present cell,
3123   // which concludes this tedious work. We make sure that each processor only
3124   // works on the subdomain it owns locally (and not on ghost or artificial
3125   // cells) when building the joint solution vector. The same will then have
3126   // to be done in DataOut::build_patches(), but that function does so
3127   // automatically.
3128   //
3129   // What we end up with is a set of patches that we can write using the
3130   // functions in DataOutBase in a variety of output formats. Here, we then
3131   // have to pay attention that what each processor writes is really only its
3132   // own part of the domain, i.e. we will want to write each processor's
3133   // contribution into a separate file. This we do by adding an additional
3134   // number to the filename when we write the solution. This is not really
3135   // new, we did it similarly in step-40. Note that we write in the compressed
3136   // format @p .vtu instead of plain vtk files, which saves quite some
3137   // storage.
3138   //
3139   // All the rest of the work is done in the PostProcessor class.
3140   template <int dim>
output_results()3141   void BoussinesqFlowProblem<dim>::output_results()
3142   {
3143     TimerOutput::Scope timer_section(computing_timer, "Postprocessing");
3144 
3145     const FESystem<dim> joint_fe(stokes_fe, 1, temperature_fe, 1);
3146 
3147     DoFHandler<dim> joint_dof_handler(triangulation);
3148     joint_dof_handler.distribute_dofs(joint_fe);
3149     Assert(joint_dof_handler.n_dofs() ==
3150              stokes_dof_handler.n_dofs() + temperature_dof_handler.n_dofs(),
3151            ExcInternalError());
3152 
3153     TrilinosWrappers::MPI::Vector joint_solution;
3154     joint_solution.reinit(joint_dof_handler.locally_owned_dofs(),
3155                           MPI_COMM_WORLD);
3156 
3157     {
3158       std::vector<types::global_dof_index> local_joint_dof_indices(
3159         joint_fe.n_dofs_per_cell());
3160       std::vector<types::global_dof_index> local_stokes_dof_indices(
3161         stokes_fe.n_dofs_per_cell());
3162       std::vector<types::global_dof_index> local_temperature_dof_indices(
3163         temperature_fe.n_dofs_per_cell());
3164 
3165       typename DoFHandler<dim>::active_cell_iterator
3166         joint_cell       = joint_dof_handler.begin_active(),
3167         joint_endc       = joint_dof_handler.end(),
3168         stokes_cell      = stokes_dof_handler.begin_active(),
3169         temperature_cell = temperature_dof_handler.begin_active();
3170       for (; joint_cell != joint_endc;
3171            ++joint_cell, ++stokes_cell, ++temperature_cell)
3172         if (joint_cell->is_locally_owned())
3173           {
3174             joint_cell->get_dof_indices(local_joint_dof_indices);
3175             stokes_cell->get_dof_indices(local_stokes_dof_indices);
3176             temperature_cell->get_dof_indices(local_temperature_dof_indices);
3177 
3178             for (unsigned int i = 0; i < joint_fe.n_dofs_per_cell(); ++i)
3179               if (joint_fe.system_to_base_index(i).first.first == 0)
3180                 {
3181                   Assert(joint_fe.system_to_base_index(i).second <
3182                            local_stokes_dof_indices.size(),
3183                          ExcInternalError());
3184 
3185                   joint_solution(local_joint_dof_indices[i]) = stokes_solution(
3186                     local_stokes_dof_indices[joint_fe.system_to_base_index(i)
3187                                                .second]);
3188                 }
3189               else
3190                 {
3191                   Assert(joint_fe.system_to_base_index(i).first.first == 1,
3192                          ExcInternalError());
3193                   Assert(joint_fe.system_to_base_index(i).second <
3194                            local_temperature_dof_indices.size(),
3195                          ExcInternalError());
3196                   joint_solution(local_joint_dof_indices[i]) =
3197                     temperature_solution(
3198                       local_temperature_dof_indices
3199                         [joint_fe.system_to_base_index(i).second]);
3200                 }
3201           }
3202     }
3203 
3204     joint_solution.compress(VectorOperation::insert);
3205 
3206     IndexSet locally_relevant_joint_dofs(joint_dof_handler.n_dofs());
3207     DoFTools::extract_locally_relevant_dofs(joint_dof_handler,
3208                                             locally_relevant_joint_dofs);
3209     TrilinosWrappers::MPI::Vector locally_relevant_joint_solution;
3210     locally_relevant_joint_solution.reinit(locally_relevant_joint_dofs,
3211                                            MPI_COMM_WORLD);
3212     locally_relevant_joint_solution = joint_solution;
3213 
3214     Postprocessor postprocessor(Utilities::MPI::this_mpi_process(
3215                                   MPI_COMM_WORLD),
3216                                 stokes_solution.block(1).min());
3217 
3218     DataOut<dim> data_out;
3219     data_out.attach_dof_handler(joint_dof_handler);
3220     data_out.add_data_vector(locally_relevant_joint_solution, postprocessor);
3221     data_out.build_patches();
3222 
3223     static int out_index = 0;
3224     data_out.write_vtu_with_pvtu_record(
3225       "./", "solution", out_index, MPI_COMM_WORLD, 5);
3226 
3227     out_index++;
3228   }
3229 
3230 
3231 
3232   // @sect4{BoussinesqFlowProblem::refine_mesh}
3233 
3234   // This function isn't really new either. Since the <code>setup_dofs</code>
3235   // function that we call in the middle has its own timer section, we split
3236   // timing this function into two sections. It will also allow us to easily
3237   // identify which of the two is more expensive.
3238   //
3239   // One thing of note, however, is that we only want to compute error
3240   // indicators on the locally owned subdomain. In order to achieve this, we
3241   // pass one additional argument to the KellyErrorEstimator::estimate
3242   // function. Note that the vector for error estimates is resized to the
3243   // number of active cells present on the current process, which is less than
3244   // the total number of active cells on all processors (but more than the
3245   // number of locally owned active cells); each processor only has a few
3246   // coarse cells around the locally owned ones, as also explained in step-40.
3247   //
3248   // The local error estimates are then handed to a %parallel version of
3249   // GridRefinement (in namespace parallel::distributed::GridRefinement, see
3250   // also step-40) which looks at the errors and finds the cells that need
3251   // refinement by comparing the error values across processors. As in
3252   // step-31, we want to limit the maximum grid level. So in case some cells
3253   // have been marked that are already at the finest level, we simply clear
3254   // the refine flags.
3255   template <int dim>
3256   void
refine_mesh(const unsigned int max_grid_level)3257   BoussinesqFlowProblem<dim>::refine_mesh(const unsigned int max_grid_level)
3258   {
3259     parallel::distributed::SolutionTransfer<dim, TrilinosWrappers::MPI::Vector>
3260       temperature_trans(temperature_dof_handler);
3261     parallel::distributed::SolutionTransfer<dim,
3262                                             TrilinosWrappers::MPI::BlockVector>
3263       stokes_trans(stokes_dof_handler);
3264 
3265     {
3266       TimerOutput::Scope timer_section(computing_timer,
3267                                        "Refine mesh structure, part 1");
3268 
3269       Vector<float> estimated_error_per_cell(triangulation.n_active_cells());
3270 
3271       KellyErrorEstimator<dim>::estimate(
3272         temperature_dof_handler,
3273         QGauss<dim - 1>(parameters.temperature_degree + 1),
3274         std::map<types::boundary_id, const Function<dim> *>(),
3275         temperature_solution,
3276         estimated_error_per_cell,
3277         ComponentMask(),
3278         nullptr,
3279         0,
3280         triangulation.locally_owned_subdomain());
3281 
3282       parallel::distributed::GridRefinement::refine_and_coarsen_fixed_fraction(
3283         triangulation, estimated_error_per_cell, 0.3, 0.1);
3284 
3285       if (triangulation.n_levels() > max_grid_level)
3286         for (typename Triangulation<dim>::active_cell_iterator cell =
3287                triangulation.begin_active(max_grid_level);
3288              cell != triangulation.end();
3289              ++cell)
3290           cell->clear_refine_flag();
3291 
3292       // With all flags marked as necessary, we can then tell the
3293       // parallel::distributed::SolutionTransfer objects to get ready to
3294       // transfer data from one mesh to the next, which they will do when
3295       // notified by
3296       // Triangulation as part of the @p execute_coarsening_and_refinement() call.
3297       // The syntax is similar to the non-%parallel solution transfer (with the
3298       // exception that here a pointer to the vector entries is enough). The
3299       // remainder of the function further down below is then concerned with
3300       // setting up the data structures again after mesh refinement and
3301       // restoring the solution vectors on the new mesh.
3302       std::vector<const TrilinosWrappers::MPI::Vector *> x_temperature(2);
3303       x_temperature[0] = &temperature_solution;
3304       x_temperature[1] = &old_temperature_solution;
3305       std::vector<const TrilinosWrappers::MPI::BlockVector *> x_stokes(2);
3306       x_stokes[0] = &stokes_solution;
3307       x_stokes[1] = &old_stokes_solution;
3308 
3309       triangulation.prepare_coarsening_and_refinement();
3310 
3311       temperature_trans.prepare_for_coarsening_and_refinement(x_temperature);
3312       stokes_trans.prepare_for_coarsening_and_refinement(x_stokes);
3313 
3314       triangulation.execute_coarsening_and_refinement();
3315     }
3316 
3317     setup_dofs();
3318 
3319     {
3320       TimerOutput::Scope timer_section(computing_timer,
3321                                        "Refine mesh structure, part 2");
3322 
3323       {
3324         TrilinosWrappers::MPI::Vector distributed_temp1(temperature_rhs);
3325         TrilinosWrappers::MPI::Vector distributed_temp2(temperature_rhs);
3326 
3327         std::vector<TrilinosWrappers::MPI::Vector *> tmp(2);
3328         tmp[0] = &(distributed_temp1);
3329         tmp[1] = &(distributed_temp2);
3330         temperature_trans.interpolate(tmp);
3331 
3332         // enforce constraints to make the interpolated solution conforming on
3333         // the new mesh:
3334         temperature_constraints.distribute(distributed_temp1);
3335         temperature_constraints.distribute(distributed_temp2);
3336 
3337         temperature_solution     = distributed_temp1;
3338         old_temperature_solution = distributed_temp2;
3339       }
3340 
3341       {
3342         TrilinosWrappers::MPI::BlockVector distributed_stokes(stokes_rhs);
3343         TrilinosWrappers::MPI::BlockVector old_distributed_stokes(stokes_rhs);
3344 
3345         std::vector<TrilinosWrappers::MPI::BlockVector *> stokes_tmp(2);
3346         stokes_tmp[0] = &(distributed_stokes);
3347         stokes_tmp[1] = &(old_distributed_stokes);
3348 
3349         stokes_trans.interpolate(stokes_tmp);
3350 
3351         // enforce constraints to make the interpolated solution conforming on
3352         // the new mesh:
3353         stokes_constraints.distribute(distributed_stokes);
3354         stokes_constraints.distribute(old_distributed_stokes);
3355 
3356         stokes_solution     = distributed_stokes;
3357         old_stokes_solution = old_distributed_stokes;
3358       }
3359     }
3360   }
3361 
3362 
3363 
3364   // @sect4{BoussinesqFlowProblem::run}
3365 
3366   // This is the final and controlling function in this class. It, in fact,
3367   // runs the entire rest of the program and is, once more, very similar to
3368   // step-31. The only substantial difference is that we use a different mesh
3369   // now (a GridGenerator::hyper_shell instead of a simple cube geometry).
3370   template <int dim>
run()3371   void BoussinesqFlowProblem<dim>::run()
3372   {
3373     GridGenerator::hyper_shell(triangulation,
3374                                Point<dim>(),
3375                                EquationData::R0,
3376                                EquationData::R1,
3377                                (dim == 3) ? 96 : 12,
3378                                true);
3379 
3380     global_Omega_diameter = GridTools::diameter(triangulation);
3381 
3382     triangulation.refine_global(parameters.initial_global_refinement);
3383 
3384     setup_dofs();
3385 
3386     unsigned int pre_refinement_step = 0;
3387 
3388   start_time_iteration:
3389 
3390     {
3391       TrilinosWrappers::MPI::Vector solution(
3392         temperature_dof_handler.locally_owned_dofs());
3393       // VectorTools::project supports parallel vector classes with most
3394       // standard finite elements via deal.II's own native MatrixFree framework:
3395       // since we use standard Lagrange elements of moderate order this function
3396       // works well here.
3397       VectorTools::project(temperature_dof_handler,
3398                            temperature_constraints,
3399                            QGauss<dim>(parameters.temperature_degree + 2),
3400                            EquationData::TemperatureInitialValues<dim>(),
3401                            solution);
3402       // Having so computed the current temperature field, let us set the member
3403       // variable that holds the temperature nodes. Strictly speaking, we really
3404       // only need to set <code>old_temperature_solution</code> since the first
3405       // thing we will do is to compute the Stokes solution that only requires
3406       // the previous time step's temperature field. That said, nothing good can
3407       // come from not initializing the other vectors as well (especially since
3408       // it's a relatively cheap operation and we only have to do it once at the
3409       // beginning of the program) if we ever want to extend our numerical
3410       // method or physical model, and so we initialize
3411       // <code>old_temperature_solution</code> and
3412       // <code>old_old_temperature_solution</code> as well. The assignment makes
3413       // sure that the vectors on the left hand side (which where initialized to
3414       // contain ghost elements as well) also get the correct ghost elements. In
3415       // other words, the assignment here requires communication between
3416       // processors:
3417       temperature_solution         = solution;
3418       old_temperature_solution     = solution;
3419       old_old_temperature_solution = solution;
3420     }
3421 
3422     timestep_number = 0;
3423     time_step = old_time_step = 0;
3424 
3425     double time = 0;
3426 
3427     do
3428       {
3429         pcout << "Timestep " << timestep_number
3430               << ":  t=" << time / EquationData::year_in_seconds << " years"
3431               << std::endl;
3432 
3433         assemble_stokes_system();
3434         build_stokes_preconditioner();
3435         assemble_temperature_matrix();
3436 
3437         solve();
3438 
3439         pcout << std::endl;
3440 
3441         if ((timestep_number == 0) &&
3442             (pre_refinement_step < parameters.initial_adaptive_refinement))
3443           {
3444             refine_mesh(parameters.initial_global_refinement +
3445                         parameters.initial_adaptive_refinement);
3446             ++pre_refinement_step;
3447             goto start_time_iteration;
3448           }
3449         else if ((timestep_number > 0) &&
3450                  (timestep_number % parameters.adaptive_refinement_interval ==
3451                   0))
3452           refine_mesh(parameters.initial_global_refinement +
3453                       parameters.initial_adaptive_refinement);
3454 
3455         if ((parameters.generate_graphical_output == true) &&
3456             (timestep_number % parameters.graphical_output_interval == 0))
3457           output_results();
3458 
3459         // In order to speed up linear solvers, we extrapolate the solutions
3460         // from the old time levels to the new one. This gives a very good
3461         // initial guess, cutting the number of iterations needed in solvers
3462         // by more than one half. We do not need to extrapolate in the last
3463         // iteration, so if we reached the final time, we stop here.
3464         //
3465         // As the last thing during a time step (before actually bumping up
3466         // the number of the time step), we check whether the current time
3467         // step number is divisible by 100, and if so we let the computing
3468         // timer print a summary of CPU times spent so far.
3469         if (time > parameters.end_time * EquationData::year_in_seconds)
3470           break;
3471 
3472         TrilinosWrappers::MPI::BlockVector old_old_stokes_solution;
3473         old_old_stokes_solution      = old_stokes_solution;
3474         old_stokes_solution          = stokes_solution;
3475         old_old_temperature_solution = old_temperature_solution;
3476         old_temperature_solution     = temperature_solution;
3477         if (old_time_step > 0)
3478           {
3479             // Trilinos sadd does not like ghost vectors even as input. Copy
3480             // into distributed vectors for now:
3481             {
3482               TrilinosWrappers::MPI::BlockVector distr_solution(stokes_rhs);
3483               distr_solution = stokes_solution;
3484               TrilinosWrappers::MPI::BlockVector distr_old_solution(stokes_rhs);
3485               distr_old_solution = old_old_stokes_solution;
3486               distr_solution.sadd(1. + time_step / old_time_step,
3487                                   -time_step / old_time_step,
3488                                   distr_old_solution);
3489               stokes_solution = distr_solution;
3490             }
3491             {
3492               TrilinosWrappers::MPI::Vector distr_solution(temperature_rhs);
3493               distr_solution = temperature_solution;
3494               TrilinosWrappers::MPI::Vector distr_old_solution(temperature_rhs);
3495               distr_old_solution = old_old_temperature_solution;
3496               distr_solution.sadd(1. + time_step / old_time_step,
3497                                   -time_step / old_time_step,
3498                                   distr_old_solution);
3499               temperature_solution = distr_solution;
3500             }
3501           }
3502 
3503         if ((timestep_number > 0) && (timestep_number % 100 == 0))
3504           computing_timer.print_summary();
3505 
3506         time += time_step;
3507         ++timestep_number;
3508       }
3509     while (true);
3510 
3511     // If we are generating graphical output, do so also for the last time
3512     // step unless we had just done so before we left the do-while loop
3513     if ((parameters.generate_graphical_output == true) &&
3514         !((timestep_number - 1) % parameters.graphical_output_interval == 0))
3515       output_results();
3516   }
3517 } // namespace Step32
3518 
3519 
3520 
3521 // @sect3{The <code>main</code> function}
3522 
3523 // The main function is short as usual and very similar to the one in
3524 // step-31. Since we use a parameter file which is specified as an argument in
3525 // the command line, we have to read it in here and pass it on to the
3526 // Parameters class for parsing. If no filename is given in the command line,
3527 // we simply use the <code>\step-32.prm</code> file which is distributed
3528 // together with the program.
3529 //
3530 // Because 3d computations are simply very slow unless you throw a lot of
3531 // processors at them, the program defaults to 2d. You can get the 3d version
3532 // by changing the constant dimension below to 3.
main(int argc,char * argv[])3533 int main(int argc, char *argv[])
3534 {
3535   try
3536     {
3537       using namespace Step32;
3538       using namespace dealii;
3539 
3540       Utilities::MPI::MPI_InitFinalize mpi_initialization(
3541         argc, argv, numbers::invalid_unsigned_int);
3542 
3543       std::string parameter_filename;
3544       if (argc >= 2)
3545         parameter_filename = argv[1];
3546       else
3547         parameter_filename = "step-32.prm";
3548 
3549       const int                              dim = 2;
3550       BoussinesqFlowProblem<dim>::Parameters parameters(parameter_filename);
3551       BoussinesqFlowProblem<dim>             flow_problem(parameters);
3552       flow_problem.run();
3553     }
3554   catch (std::exception &exc)
3555     {
3556       std::cerr << std::endl
3557                 << std::endl
3558                 << "----------------------------------------------------"
3559                 << std::endl;
3560       std::cerr << "Exception on processing: " << std::endl
3561                 << exc.what() << std::endl
3562                 << "Aborting!" << std::endl
3563                 << "----------------------------------------------------"
3564                 << std::endl;
3565 
3566       return 1;
3567     }
3568   catch (...)
3569     {
3570       std::cerr << std::endl
3571                 << std::endl
3572                 << "----------------------------------------------------"
3573                 << std::endl;
3574       std::cerr << "Unknown exception!" << std::endl
3575                 << "Aborting!" << std::endl
3576                 << "----------------------------------------------------"
3577                 << std::endl;
3578       return 1;
3579     }
3580 
3581   return 0;
3582 }
3583