1 /* ---------------------------------------------------------------------
2 *
3 * Copyright (C) 2008 - 2020 by the deal.II authors
4 *
5 * This file is part of the deal.II library.
6 *
7 * The deal.II library is free software; you can use it, redistribute
8 * it, and/or modify it under the terms of the GNU Lesser General
9 * Public License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 * The full text of the license can be found in the file LICENSE.md at
12 * the top level directory of deal.II.
13 *
14 * ---------------------------------------------------------------------
15
16 *
17 * Authors: Martin Kronbichler, Uppsala University,
18 * Wolfgang Bangerth, Texas A&M University,
19 * Timo Heister, University of Goettingen, 2008-2011
20 */
21
22
23 // @sect3{Include files}
24
25 // The first task as usual is to include the functionality of these well-known
26 // deal.II library files and some C++ header files.
27 #include <deal.II/base/quadrature_lib.h>
28 #include <deal.II/base/logstream.h>
29 #include <deal.II/base/function.h>
30 #include <deal.II/base/utilities.h>
31 #include <deal.II/base/conditional_ostream.h>
32 #include <deal.II/base/work_stream.h>
33 #include <deal.II/base/timer.h>
34 #include <deal.II/base/parameter_handler.h>
35
36 #include <deal.II/lac/full_matrix.h>
37 #include <deal.II/lac/solver_bicgstab.h>
38 #include <deal.II/lac/solver_cg.h>
39 #include <deal.II/lac/solver_gmres.h>
40 #include <deal.II/lac/affine_constraints.h>
41 #include <deal.II/lac/block_sparsity_pattern.h>
42 #include <deal.II/lac/trilinos_parallel_block_vector.h>
43 #include <deal.II/lac/trilinos_sparse_matrix.h>
44 #include <deal.II/lac/trilinos_block_sparse_matrix.h>
45 #include <deal.II/lac/trilinos_precondition.h>
46 #include <deal.II/lac/trilinos_solver.h>
47
48 #include <deal.II/grid/tria.h>
49 #include <deal.II/grid/grid_generator.h>
50 #include <deal.II/grid/tria_accessor.h>
51 #include <deal.II/grid/tria_iterator.h>
52 #include <deal.II/grid/filtered_iterator.h>
53 #include <deal.II/grid/manifold_lib.h>
54 #include <deal.II/grid/grid_tools.h>
55 #include <deal.II/grid/grid_refinement.h>
56
57 #include <deal.II/dofs/dof_handler.h>
58 #include <deal.II/dofs/dof_renumbering.h>
59 #include <deal.II/dofs/dof_accessor.h>
60 #include <deal.II/dofs/dof_tools.h>
61
62 #include <deal.II/fe/fe_q.h>
63 #include <deal.II/fe/fe_dgq.h>
64 #include <deal.II/fe/fe_dgp.h>
65 #include <deal.II/fe/fe_system.h>
66 #include <deal.II/fe/fe_values.h>
67 #include <deal.II/fe/mapping_q.h>
68
69 #include <deal.II/numerics/vector_tools.h>
70 #include <deal.II/numerics/matrix_tools.h>
71 #include <deal.II/numerics/data_out.h>
72 #include <deal.II/numerics/error_estimator.h>
73 #include <deal.II/numerics/solution_transfer.h>
74
75 #include <fstream>
76 #include <iostream>
77 #include <limits>
78 #include <locale>
79 #include <string>
80
81 // This is the only include file that is new: It introduces the
82 // parallel::distributed::SolutionTransfer equivalent of the
83 // dealii::SolutionTransfer class to take a solution from on mesh to the next
84 // one upon mesh refinement, but in the case of parallel distributed
85 // triangulations:
86 #include <deal.II/distributed/solution_transfer.h>
87
88 // The following classes are used in parallel distributed computations and
89 // have all already been introduced in step-40:
90 #include <deal.II/base/index_set.h>
91 #include <deal.II/distributed/tria.h>
92 #include <deal.II/distributed/grid_refinement.h>
93
94
95 // The next step is like in all previous tutorial programs: We put everything
96 // into a namespace of its own and then import the deal.II classes and
97 // functions into it:
98 namespace Step32
99 {
100 using namespace dealii;
101
102 // @sect3{Equation data}
103
104 // In the following namespace, we define the various pieces of equation data
105 // that describe the problem. This corresponds to the various aspects of
106 // making the problem at least slightly realistic and that were exhaustively
107 // discussed in the description of the testcase in the introduction.
108 //
109 // We start with a few coefficients that have constant values (the comment
110 // after the value indicates its physical units):
111 namespace EquationData
112 {
113 constexpr double eta = 1e21; /* Pa s */
114 constexpr double kappa = 1e-6; /* m^2 / s */
115 constexpr double reference_density = 3300; /* kg / m^3 */
116 constexpr double reference_temperature = 293; /* K */
117 constexpr double expansion_coefficient = 2e-5; /* 1/K */
118 constexpr double specific_heat = 1250; /* J / K / kg */
119 constexpr double radiogenic_heating = 7.4e-12; /* W / kg */
120
121
122 constexpr double R0 = 6371000. - 2890000.; /* m */
123 constexpr double R1 = 6371000. - 35000.; /* m */
124
125 constexpr double T0 = 4000 + 273; /* K */
126 constexpr double T1 = 700 + 273; /* K */
127
128
129 // The next set of definitions are for functions that encode the density
130 // as a function of temperature, the gravity vector, and the initial
131 // values for the temperature. Again, all of these (along with the values
132 // they compute) are discussed in the introduction:
density(const double temperature)133 double density(const double temperature)
134 {
135 return (
136 reference_density *
137 (1 - expansion_coefficient * (temperature - reference_temperature)));
138 }
139
140
141 template <int dim>
gravity_vector(const Point<dim> & p)142 Tensor<1, dim> gravity_vector(const Point<dim> &p)
143 {
144 const double r = p.norm();
145 return -(1.245e-6 * r + 7.714e13 / r / r) * p / r;
146 }
147
148
149
150 template <int dim>
151 class TemperatureInitialValues : public Function<dim>
152 {
153 public:
TemperatureInitialValues()154 TemperatureInitialValues()
155 : Function<dim>(1)
156 {}
157
158 virtual double value(const Point<dim> & p,
159 const unsigned int component = 0) const override;
160
161 virtual void vector_value(const Point<dim> &p,
162 Vector<double> & value) const override;
163 };
164
165
166
167 template <int dim>
value(const Point<dim> & p,const unsigned int) const168 double TemperatureInitialValues<dim>::value(const Point<dim> &p,
169 const unsigned int) const
170 {
171 const double r = p.norm();
172 const double h = R1 - R0;
173
174 const double s = (r - R0) / h;
175 const double q =
176 (dim == 3) ? std::max(0.0, cos(numbers::PI * abs(p(2) / R1))) : 1.0;
177 const double phi = std::atan2(p(0), p(1));
178 const double tau = s + 0.2 * s * (1 - s) * std::sin(6 * phi) * q;
179
180 return T0 * (1.0 - tau) + T1 * tau;
181 }
182
183
184 template <int dim>
185 void
vector_value(const Point<dim> & p,Vector<double> & values) const186 TemperatureInitialValues<dim>::vector_value(const Point<dim> &p,
187 Vector<double> & values) const
188 {
189 for (unsigned int c = 0; c < this->n_components; ++c)
190 values(c) = TemperatureInitialValues<dim>::value(p, c);
191 }
192
193
194 // As mentioned in the introduction we need to rescale the pressure to
195 // avoid the relative ill-conditioning of the momentum and mass
196 // conservation equations. The scaling factor is $\frac{\eta}{L}$ where
197 // $L$ was a typical length scale. By experimenting it turns out that a
198 // good length scale is the diameter of plumes, which is around 10 km:
199 constexpr double pressure_scaling = eta / 10000;
200
201 // The final number in this namespace is a constant that denotes the
202 // number of seconds per (average, tropical) year. We use this only when
203 // generating screen output: internally, all computations of this program
204 // happen in SI units (kilogram, meter, seconds) but writing geological
205 // times in seconds yields numbers that one can't relate to reality, and
206 // so we convert to years using the factor defined here:
207 const double year_in_seconds = 60 * 60 * 24 * 365.2425;
208
209 } // namespace EquationData
210
211
212
213 // @sect3{Preconditioning the Stokes system}
214
215 // This namespace implements the preconditioner. As discussed in the
216 // introduction, this preconditioner differs in a number of key portions
217 // from the one used in step-31. Specifically, it is a right preconditioner,
218 // implementing the matrix
219 // @f{align*}
220 // \left(\begin{array}{cc}A^{-1} & B^T
221 // \\0 & S^{-1}
222 // \end{array}\right)
223 // @f}
224 // where the two inverse matrix operations
225 // are approximated by linear solvers or, if the right flag is given to the
226 // constructor of this class, by a single AMG V-cycle for the velocity
227 // block. The three code blocks of the <code>vmult</code> function implement
228 // the multiplications with the three blocks of this preconditioner matrix
229 // and should be self explanatory if you have read through step-31 or the
230 // discussion of composing solvers in step-20.
231 namespace LinearSolvers
232 {
233 template <class PreconditionerTypeA, class PreconditionerTypeMp>
234 class BlockSchurPreconditioner : public Subscriptor
235 {
236 public:
BlockSchurPreconditioner(const TrilinosWrappers::BlockSparseMatrix & S,const TrilinosWrappers::BlockSparseMatrix & Spre,const PreconditionerTypeMp & Mppreconditioner,const PreconditionerTypeA & Apreconditioner,const bool do_solve_A)237 BlockSchurPreconditioner(const TrilinosWrappers::BlockSparseMatrix &S,
238 const TrilinosWrappers::BlockSparseMatrix &Spre,
239 const PreconditionerTypeMp &Mppreconditioner,
240 const PreconditionerTypeA & Apreconditioner,
241 const bool do_solve_A)
242 : stokes_matrix(&S)
243 , stokes_preconditioner_matrix(&Spre)
244 , mp_preconditioner(Mppreconditioner)
245 , a_preconditioner(Apreconditioner)
246 , do_solve_A(do_solve_A)
247 {}
248
vmult(TrilinosWrappers::MPI::BlockVector & dst,const TrilinosWrappers::MPI::BlockVector & src) const249 void vmult(TrilinosWrappers::MPI::BlockVector & dst,
250 const TrilinosWrappers::MPI::BlockVector &src) const
251 {
252 TrilinosWrappers::MPI::Vector utmp(src.block(0));
253
254 {
255 SolverControl solver_control(5000, 1e-6 * src.block(1).l2_norm());
256
257 SolverCG<TrilinosWrappers::MPI::Vector> solver(solver_control);
258
259 solver.solve(stokes_preconditioner_matrix->block(1, 1),
260 dst.block(1),
261 src.block(1),
262 mp_preconditioner);
263
264 dst.block(1) *= -1.0;
265 }
266
267 {
268 stokes_matrix->block(0, 1).vmult(utmp, dst.block(1));
269 utmp *= -1.0;
270 utmp.add(src.block(0));
271 }
272
273 if (do_solve_A == true)
274 {
275 SolverControl solver_control(5000, utmp.l2_norm() * 1e-2);
276 TrilinosWrappers::SolverCG solver(solver_control);
277 solver.solve(stokes_matrix->block(0, 0),
278 dst.block(0),
279 utmp,
280 a_preconditioner);
281 }
282 else
283 a_preconditioner.vmult(dst.block(0), utmp);
284 }
285
286 private:
287 const SmartPointer<const TrilinosWrappers::BlockSparseMatrix>
288 stokes_matrix;
289 const SmartPointer<const TrilinosWrappers::BlockSparseMatrix>
290 stokes_preconditioner_matrix;
291 const PreconditionerTypeMp &mp_preconditioner;
292 const PreconditionerTypeA & a_preconditioner;
293 const bool do_solve_A;
294 };
295 } // namespace LinearSolvers
296
297
298
299 // @sect3{Definition of assembly data structures}
300 //
301 // As described in the introduction, we will use the WorkStream mechanism
302 // discussed in the @ref threads module to parallelize operations among the
303 // processors of a single machine. The WorkStream class requires that data
304 // is passed around in two kinds of data structures, one for scratch data
305 // and one to pass data from the assembly function to the function that
306 // copies local contributions into global objects.
307 //
308 // The following namespace (and the two sub-namespaces) contains a
309 // collection of data structures that serve this purpose, one pair for each
310 // of the four operations discussed in the introduction that we will want to
311 // parallelize. Each assembly routine gets two sets of data: a Scratch array
312 // that collects all the classes and arrays that are used for the
313 // calculation of the cell contribution, and a CopyData array that keeps
314 // local matrices and vectors which will be written into the global
315 // matrix. Whereas CopyData is a container for the final data that is
316 // written into the global matrices and vector (and, thus, absolutely
317 // necessary), the Scratch arrays are merely there for performance reasons
318 // — it would be much more expensive to set up a FEValues object on
319 // each cell, than creating it only once and updating some derivative data.
320 //
321 // Step-31 had four assembly routines: One for the preconditioner matrix of
322 // the Stokes system, one for the Stokes matrix and right hand side, one for
323 // the temperature matrices and one for the right hand side of the
324 // temperature equation. We here organize the scratch arrays and CopyData
325 // objects for each of those four assembly components using a
326 // <code>struct</code> environment (since we consider these as temporary
327 // objects we pass around, rather than classes that implement functionality
328 // of their own, though this is a more subjective point of view to
329 // distinguish between <code>struct</code>s and <code>class</code>es).
330 //
331 // Regarding the Scratch objects, each struct is equipped with a constructor
332 // that creates an @ref FEValues object using the @ref FiniteElement,
333 // Quadrature, @ref Mapping (which describes the interpolation of curved
334 // boundaries), and @ref UpdateFlags instances. Moreover, we manually
335 // implement a copy constructor (since the FEValues class is not copyable by
336 // itself), and provide some additional vector fields that are used to hold
337 // intermediate data during the computation of local contributions.
338 //
339 // Let us start with the scratch arrays and, specifically, the one used for
340 // assembly of the Stokes preconditioner:
341 namespace Assembly
342 {
343 namespace Scratch
344 {
345 template <int dim>
346 struct StokesPreconditioner
347 {
348 StokesPreconditioner(const FiniteElement<dim> &stokes_fe,
349 const Quadrature<dim> & stokes_quadrature,
350 const Mapping<dim> & mapping,
351 const UpdateFlags update_flags);
352
353 StokesPreconditioner(const StokesPreconditioner &data);
354
355
356 FEValues<dim> stokes_fe_values;
357
358 std::vector<Tensor<2, dim>> grad_phi_u;
359 std::vector<double> phi_p;
360 };
361
362 template <int dim>
StokesPreconditioner(const FiniteElement<dim> & stokes_fe,const Quadrature<dim> & stokes_quadrature,const Mapping<dim> & mapping,const UpdateFlags update_flags)363 StokesPreconditioner<dim>::StokesPreconditioner(
364 const FiniteElement<dim> &stokes_fe,
365 const Quadrature<dim> & stokes_quadrature,
366 const Mapping<dim> & mapping,
367 const UpdateFlags update_flags)
368 : stokes_fe_values(mapping, stokes_fe, stokes_quadrature, update_flags)
369 , grad_phi_u(stokes_fe.n_dofs_per_cell())
370 , phi_p(stokes_fe.n_dofs_per_cell())
371 {}
372
373
374
375 template <int dim>
StokesPreconditioner(const StokesPreconditioner & scratch)376 StokesPreconditioner<dim>::StokesPreconditioner(
377 const StokesPreconditioner &scratch)
378 : stokes_fe_values(scratch.stokes_fe_values.get_mapping(),
379 scratch.stokes_fe_values.get_fe(),
380 scratch.stokes_fe_values.get_quadrature(),
381 scratch.stokes_fe_values.get_update_flags())
382 , grad_phi_u(scratch.grad_phi_u)
383 , phi_p(scratch.phi_p)
384 {}
385
386
387
388 // The next one is the scratch object used for the assembly of the full
389 // Stokes system. Observe that we derive the StokesSystem scratch class
390 // from the StokesPreconditioner class above. We do this because all the
391 // objects that are necessary for the assembly of the preconditioner are
392 // also needed for the actual matrix system and right hand side, plus
393 // some extra data. This makes the program more compact. Note also that
394 // the assembly of the Stokes system and the temperature right hand side
395 // further down requires data from temperature and velocity,
396 // respectively, so we actually need two FEValues objects for those two
397 // cases.
398 template <int dim>
399 struct StokesSystem : public StokesPreconditioner<dim>
400 {
401 StokesSystem(const FiniteElement<dim> &stokes_fe,
402 const Mapping<dim> & mapping,
403 const Quadrature<dim> & stokes_quadrature,
404 const UpdateFlags stokes_update_flags,
405 const FiniteElement<dim> &temperature_fe,
406 const UpdateFlags temperature_update_flags);
407
408 StokesSystem(const StokesSystem<dim> &data);
409
410
411 FEValues<dim> temperature_fe_values;
412
413 std::vector<Tensor<1, dim>> phi_u;
414 std::vector<SymmetricTensor<2, dim>> grads_phi_u;
415 std::vector<double> div_phi_u;
416
417 std::vector<double> old_temperature_values;
418 };
419
420
421 template <int dim>
StokesSystem(const FiniteElement<dim> & stokes_fe,const Mapping<dim> & mapping,const Quadrature<dim> & stokes_quadrature,const UpdateFlags stokes_update_flags,const FiniteElement<dim> & temperature_fe,const UpdateFlags temperature_update_flags)422 StokesSystem<dim>::StokesSystem(
423 const FiniteElement<dim> &stokes_fe,
424 const Mapping<dim> & mapping,
425 const Quadrature<dim> & stokes_quadrature,
426 const UpdateFlags stokes_update_flags,
427 const FiniteElement<dim> &temperature_fe,
428 const UpdateFlags temperature_update_flags)
429 : StokesPreconditioner<dim>(stokes_fe,
430 stokes_quadrature,
431 mapping,
432 stokes_update_flags)
433 , temperature_fe_values(mapping,
434 temperature_fe,
435 stokes_quadrature,
436 temperature_update_flags)
437 , phi_u(stokes_fe.n_dofs_per_cell())
438 , grads_phi_u(stokes_fe.n_dofs_per_cell())
439 , div_phi_u(stokes_fe.n_dofs_per_cell())
440 , old_temperature_values(stokes_quadrature.size())
441 {}
442
443
444 template <int dim>
StokesSystem(const StokesSystem<dim> & scratch)445 StokesSystem<dim>::StokesSystem(const StokesSystem<dim> &scratch)
446 : StokesPreconditioner<dim>(scratch)
447 , temperature_fe_values(
448 scratch.temperature_fe_values.get_mapping(),
449 scratch.temperature_fe_values.get_fe(),
450 scratch.temperature_fe_values.get_quadrature(),
451 scratch.temperature_fe_values.get_update_flags())
452 , phi_u(scratch.phi_u)
453 , grads_phi_u(scratch.grads_phi_u)
454 , div_phi_u(scratch.div_phi_u)
455 , old_temperature_values(scratch.old_temperature_values)
456 {}
457
458
459 // After defining the objects used in the assembly of the Stokes system,
460 // we do the same for the assembly of the matrices necessary for the
461 // temperature system. The general structure is very similar:
462 template <int dim>
463 struct TemperatureMatrix
464 {
465 TemperatureMatrix(const FiniteElement<dim> &temperature_fe,
466 const Mapping<dim> & mapping,
467 const Quadrature<dim> & temperature_quadrature);
468
469 TemperatureMatrix(const TemperatureMatrix &data);
470
471
472 FEValues<dim> temperature_fe_values;
473
474 std::vector<double> phi_T;
475 std::vector<Tensor<1, dim>> grad_phi_T;
476 };
477
478
479 template <int dim>
TemperatureMatrix(const FiniteElement<dim> & temperature_fe,const Mapping<dim> & mapping,const Quadrature<dim> & temperature_quadrature)480 TemperatureMatrix<dim>::TemperatureMatrix(
481 const FiniteElement<dim> &temperature_fe,
482 const Mapping<dim> & mapping,
483 const Quadrature<dim> & temperature_quadrature)
484 : temperature_fe_values(mapping,
485 temperature_fe,
486 temperature_quadrature,
487 update_values | update_gradients |
488 update_JxW_values)
489 , phi_T(temperature_fe.n_dofs_per_cell())
490 , grad_phi_T(temperature_fe.n_dofs_per_cell())
491 {}
492
493
494 template <int dim>
TemperatureMatrix(const TemperatureMatrix & scratch)495 TemperatureMatrix<dim>::TemperatureMatrix(
496 const TemperatureMatrix &scratch)
497 : temperature_fe_values(
498 scratch.temperature_fe_values.get_mapping(),
499 scratch.temperature_fe_values.get_fe(),
500 scratch.temperature_fe_values.get_quadrature(),
501 scratch.temperature_fe_values.get_update_flags())
502 , phi_T(scratch.phi_T)
503 , grad_phi_T(scratch.grad_phi_T)
504 {}
505
506
507 // The final scratch object is used in the assembly of the right hand
508 // side of the temperature system. This object is significantly larger
509 // than the ones above because a lot more quantities enter the
510 // computation of the right hand side of the temperature equation. In
511 // particular, the temperature values and gradients of the previous two
512 // time steps need to be evaluated at the quadrature points, as well as
513 // the velocities and the strain rates (i.e. the symmetric gradients of
514 // the velocity) that enter the right hand side as friction heating
515 // terms. Despite the number of terms, the following should be rather
516 // self explanatory:
517 template <int dim>
518 struct TemperatureRHS
519 {
520 TemperatureRHS(const FiniteElement<dim> &temperature_fe,
521 const FiniteElement<dim> &stokes_fe,
522 const Mapping<dim> & mapping,
523 const Quadrature<dim> & quadrature);
524
525 TemperatureRHS(const TemperatureRHS &data);
526
527
528 FEValues<dim> temperature_fe_values;
529 FEValues<dim> stokes_fe_values;
530
531 std::vector<double> phi_T;
532 std::vector<Tensor<1, dim>> grad_phi_T;
533
534 std::vector<Tensor<1, dim>> old_velocity_values;
535 std::vector<Tensor<1, dim>> old_old_velocity_values;
536
537 std::vector<SymmetricTensor<2, dim>> old_strain_rates;
538 std::vector<SymmetricTensor<2, dim>> old_old_strain_rates;
539
540 std::vector<double> old_temperature_values;
541 std::vector<double> old_old_temperature_values;
542 std::vector<Tensor<1, dim>> old_temperature_grads;
543 std::vector<Tensor<1, dim>> old_old_temperature_grads;
544 std::vector<double> old_temperature_laplacians;
545 std::vector<double> old_old_temperature_laplacians;
546 };
547
548
549 template <int dim>
TemperatureRHS(const FiniteElement<dim> & temperature_fe,const FiniteElement<dim> & stokes_fe,const Mapping<dim> & mapping,const Quadrature<dim> & quadrature)550 TemperatureRHS<dim>::TemperatureRHS(
551 const FiniteElement<dim> &temperature_fe,
552 const FiniteElement<dim> &stokes_fe,
553 const Mapping<dim> & mapping,
554 const Quadrature<dim> & quadrature)
555 : temperature_fe_values(mapping,
556 temperature_fe,
557 quadrature,
558 update_values | update_gradients |
559 update_hessians | update_quadrature_points |
560 update_JxW_values)
561 , stokes_fe_values(mapping,
562 stokes_fe,
563 quadrature,
564 update_values | update_gradients)
565 , phi_T(temperature_fe.n_dofs_per_cell())
566 , grad_phi_T(temperature_fe.n_dofs_per_cell())
567 ,
568
569 old_velocity_values(quadrature.size())
570 , old_old_velocity_values(quadrature.size())
571 , old_strain_rates(quadrature.size())
572 , old_old_strain_rates(quadrature.size())
573 ,
574
575 old_temperature_values(quadrature.size())
576 , old_old_temperature_values(quadrature.size())
577 , old_temperature_grads(quadrature.size())
578 , old_old_temperature_grads(quadrature.size())
579 , old_temperature_laplacians(quadrature.size())
580 , old_old_temperature_laplacians(quadrature.size())
581 {}
582
583
584 template <int dim>
TemperatureRHS(const TemperatureRHS & scratch)585 TemperatureRHS<dim>::TemperatureRHS(const TemperatureRHS &scratch)
586 : temperature_fe_values(
587 scratch.temperature_fe_values.get_mapping(),
588 scratch.temperature_fe_values.get_fe(),
589 scratch.temperature_fe_values.get_quadrature(),
590 scratch.temperature_fe_values.get_update_flags())
591 , stokes_fe_values(scratch.stokes_fe_values.get_mapping(),
592 scratch.stokes_fe_values.get_fe(),
593 scratch.stokes_fe_values.get_quadrature(),
594 scratch.stokes_fe_values.get_update_flags())
595 , phi_T(scratch.phi_T)
596 , grad_phi_T(scratch.grad_phi_T)
597 ,
598
599 old_velocity_values(scratch.old_velocity_values)
600 , old_old_velocity_values(scratch.old_old_velocity_values)
601 , old_strain_rates(scratch.old_strain_rates)
602 , old_old_strain_rates(scratch.old_old_strain_rates)
603 ,
604
605 old_temperature_values(scratch.old_temperature_values)
606 , old_old_temperature_values(scratch.old_old_temperature_values)
607 , old_temperature_grads(scratch.old_temperature_grads)
608 , old_old_temperature_grads(scratch.old_old_temperature_grads)
609 , old_temperature_laplacians(scratch.old_temperature_laplacians)
610 , old_old_temperature_laplacians(scratch.old_old_temperature_laplacians)
611 {}
612 } // namespace Scratch
613
614
615 // The CopyData objects are even simpler than the Scratch objects as all
616 // they have to do is to store the results of local computations until
617 // they can be copied into the global matrix or vector objects. These
618 // structures therefore only need to provide a constructor, a copy
619 // operation, and some arrays for local matrix, local vectors and the
620 // relation between local and global degrees of freedom (a.k.a.
621 // <code>local_dof_indices</code>). Again, we have one such structure for
622 // each of the four operations we will parallelize using the WorkStream
623 // class:
624 namespace CopyData
625 {
626 template <int dim>
627 struct StokesPreconditioner
628 {
629 StokesPreconditioner(const FiniteElement<dim> &stokes_fe);
630 StokesPreconditioner(const StokesPreconditioner &data);
631 StokesPreconditioner &operator=(const StokesPreconditioner &) = default;
632
633 FullMatrix<double> local_matrix;
634 std::vector<types::global_dof_index> local_dof_indices;
635 };
636
637 template <int dim>
StokesPreconditioner(const FiniteElement<dim> & stokes_fe)638 StokesPreconditioner<dim>::StokesPreconditioner(
639 const FiniteElement<dim> &stokes_fe)
640 : local_matrix(stokes_fe.n_dofs_per_cell(), stokes_fe.n_dofs_per_cell())
641 , local_dof_indices(stokes_fe.n_dofs_per_cell())
642 {}
643
644 template <int dim>
StokesPreconditioner(const StokesPreconditioner & data)645 StokesPreconditioner<dim>::StokesPreconditioner(
646 const StokesPreconditioner &data)
647 : local_matrix(data.local_matrix)
648 , local_dof_indices(data.local_dof_indices)
649 {}
650
651
652
653 template <int dim>
654 struct StokesSystem : public StokesPreconditioner<dim>
655 {
656 StokesSystem(const FiniteElement<dim> &stokes_fe);
657
658 Vector<double> local_rhs;
659 };
660
661 template <int dim>
StokesSystem(const FiniteElement<dim> & stokes_fe)662 StokesSystem<dim>::StokesSystem(const FiniteElement<dim> &stokes_fe)
663 : StokesPreconditioner<dim>(stokes_fe)
664 , local_rhs(stokes_fe.n_dofs_per_cell())
665 {}
666
667
668
669 template <int dim>
670 struct TemperatureMatrix
671 {
672 TemperatureMatrix(const FiniteElement<dim> &temperature_fe);
673
674 FullMatrix<double> local_mass_matrix;
675 FullMatrix<double> local_stiffness_matrix;
676 std::vector<types::global_dof_index> local_dof_indices;
677 };
678
679 template <int dim>
TemperatureMatrix(const FiniteElement<dim> & temperature_fe)680 TemperatureMatrix<dim>::TemperatureMatrix(
681 const FiniteElement<dim> &temperature_fe)
682 : local_mass_matrix(temperature_fe.n_dofs_per_cell(),
683 temperature_fe.n_dofs_per_cell())
684 , local_stiffness_matrix(temperature_fe.n_dofs_per_cell(),
685 temperature_fe.n_dofs_per_cell())
686 , local_dof_indices(temperature_fe.n_dofs_per_cell())
687 {}
688
689
690
691 template <int dim>
692 struct TemperatureRHS
693 {
694 TemperatureRHS(const FiniteElement<dim> &temperature_fe);
695
696 Vector<double> local_rhs;
697 std::vector<types::global_dof_index> local_dof_indices;
698 FullMatrix<double> matrix_for_bc;
699 };
700
701 template <int dim>
TemperatureRHS(const FiniteElement<dim> & temperature_fe)702 TemperatureRHS<dim>::TemperatureRHS(
703 const FiniteElement<dim> &temperature_fe)
704 : local_rhs(temperature_fe.n_dofs_per_cell())
705 , local_dof_indices(temperature_fe.n_dofs_per_cell())
706 , matrix_for_bc(temperature_fe.n_dofs_per_cell(),
707 temperature_fe.n_dofs_per_cell())
708 {}
709 } // namespace CopyData
710 } // namespace Assembly
711
712
713
714 // @sect3{The <code>BoussinesqFlowProblem</code> class template}
715 //
716 // This is the declaration of the main class. It is very similar to step-31
717 // but there are a number differences we will comment on below.
718 //
719 // The top of the class is essentially the same as in step-31, listing the
720 // public methods and a set of private functions that do the heavy
721 // lifting. Compared to step-31 there are only two additions to this
722 // section: the function <code>get_cfl_number()</code> that computes the
723 // maximum CFL number over all cells which we then compute the global time
724 // step from, and the function <code>get_entropy_variation()</code> that is
725 // used in the computation of the entropy stabilization. It is akin to the
726 // <code>get_extrapolated_temperature_range()</code> we have used in step-31
727 // for this purpose, but works on the entropy instead of the temperature
728 // instead.
729 template <int dim>
730 class BoussinesqFlowProblem
731 {
732 public:
733 struct Parameters;
734 BoussinesqFlowProblem(Parameters ¶meters);
735 void run();
736
737 private:
738 void setup_dofs();
739 void assemble_stokes_preconditioner();
740 void build_stokes_preconditioner();
741 void assemble_stokes_system();
742 void assemble_temperature_matrix();
743 void assemble_temperature_system(const double maximal_velocity);
744 double get_maximal_velocity() const;
745 double get_cfl_number() const;
746 double get_entropy_variation(const double average_temperature) const;
747 std::pair<double, double> get_extrapolated_temperature_range() const;
748 void solve();
749 void output_results();
750 void refine_mesh(const unsigned int max_grid_level);
751
752 double compute_viscosity(
753 const std::vector<double> & old_temperature,
754 const std::vector<double> & old_old_temperature,
755 const std::vector<Tensor<1, dim>> &old_temperature_grads,
756 const std::vector<Tensor<1, dim>> &old_old_temperature_grads,
757 const std::vector<double> & old_temperature_laplacians,
758 const std::vector<double> & old_old_temperature_laplacians,
759 const std::vector<Tensor<1, dim>> &old_velocity_values,
760 const std::vector<Tensor<1, dim>> &old_old_velocity_values,
761 const std::vector<SymmetricTensor<2, dim>> &old_strain_rates,
762 const std::vector<SymmetricTensor<2, dim>> &old_old_strain_rates,
763 const double global_u_infty,
764 const double global_T_variation,
765 const double average_temperature,
766 const double global_entropy_variation,
767 const double cell_diameter) const;
768
769 public:
770 // The first significant new component is the definition of a struct for
771 // the parameters according to the discussion in the introduction. This
772 // structure is initialized by reading from a parameter file during
773 // construction of this object.
774 struct Parameters
775 {
776 Parameters(const std::string ¶meter_filename);
777
778 static void declare_parameters(ParameterHandler &prm);
779 void parse_parameters(ParameterHandler &prm);
780
781 double end_time;
782
783 unsigned int initial_global_refinement;
784 unsigned int initial_adaptive_refinement;
785
786 bool generate_graphical_output;
787 unsigned int graphical_output_interval;
788
789 unsigned int adaptive_refinement_interval;
790
791 double stabilization_alpha;
792 double stabilization_c_R;
793 double stabilization_beta;
794
795 unsigned int stokes_velocity_degree;
796 bool use_locally_conservative_discretization;
797
798 unsigned int temperature_degree;
799 };
800
801 private:
802 Parameters ¶meters;
803
804 // The <code>pcout</code> (for <i>%parallel <code>std::cout</code></i>)
805 // object is used to simplify writing output: each MPI process can use
806 // this to generate output as usual, but since each of these processes
807 // will (hopefully) produce the same output it will just be replicated
808 // many times over; with the ConditionalOStream class, only the output
809 // generated by one MPI process will actually be printed to screen,
810 // whereas the output by all the other threads will simply be forgotten.
811 ConditionalOStream pcout;
812
813 // The following member variables will then again be similar to those in
814 // step-31 (and to other tutorial programs). As mentioned in the
815 // introduction, we fully distribute computations, so we will have to use
816 // the parallel::distributed::Triangulation class (see step-40) but the
817 // remainder of these variables is rather standard with two exceptions:
818 //
819 // - The <code>mapping</code> variable is used to denote a higher-order
820 // polynomial mapping. As mentioned in the introduction, we use this
821 // mapping when forming integrals through quadrature for all cells that
822 // are adjacent to either the inner or outer boundaries of our domain
823 // where the boundary is curved.
824 //
825 // - In a bit of naming confusion, you will notice below that some of the
826 // variables from namespace TrilinosWrappers are taken from namespace
827 // TrilinosWrappers::MPI (such as the right hand side vectors) whereas
828 // others are not (such as the various matrices). This is due to legacy
829 // reasons. We will frequently have to query velocities
830 // and temperatures at arbitrary quadrature points; consequently, rather
831 // than importing ghost information of a vector whenever we need access
832 // to degrees of freedom that are relevant locally but owned by another
833 // processor, we solve linear systems in %parallel but then immediately
834 // initialize a vector including ghost entries of the solution for further
835 // processing. The various <code>*_solution</code> vectors are therefore
836 // filled immediately after solving their respective linear system in
837 // %parallel and will always contain values for all
838 // @ref GlossLocallyRelevantDof "locally relevant degrees of freedom";
839 // the fully distributed vectors that we obtain from the solution process
840 // and that only ever contain the
841 // @ref GlossLocallyOwnedDof "locally owned degrees of freedom" are
842 // destroyed immediately after the solution process and after we have
843 // copied the relevant values into the member variable vectors.
844 parallel::distributed::Triangulation<dim> triangulation;
845 double global_Omega_diameter;
846
847 const MappingQ<dim> mapping;
848
849 const FESystem<dim> stokes_fe;
850 DoFHandler<dim> stokes_dof_handler;
851 AffineConstraints<double> stokes_constraints;
852
853 TrilinosWrappers::BlockSparseMatrix stokes_matrix;
854 TrilinosWrappers::BlockSparseMatrix stokes_preconditioner_matrix;
855
856 TrilinosWrappers::MPI::BlockVector stokes_solution;
857 TrilinosWrappers::MPI::BlockVector old_stokes_solution;
858 TrilinosWrappers::MPI::BlockVector stokes_rhs;
859
860
861 FE_Q<dim> temperature_fe;
862 DoFHandler<dim> temperature_dof_handler;
863 AffineConstraints<double> temperature_constraints;
864
865 TrilinosWrappers::SparseMatrix temperature_mass_matrix;
866 TrilinosWrappers::SparseMatrix temperature_stiffness_matrix;
867 TrilinosWrappers::SparseMatrix temperature_matrix;
868
869 TrilinosWrappers::MPI::Vector temperature_solution;
870 TrilinosWrappers::MPI::Vector old_temperature_solution;
871 TrilinosWrappers::MPI::Vector old_old_temperature_solution;
872 TrilinosWrappers::MPI::Vector temperature_rhs;
873
874
875 double time_step;
876 double old_time_step;
877 unsigned int timestep_number;
878
879 std::shared_ptr<TrilinosWrappers::PreconditionAMG> Amg_preconditioner;
880 std::shared_ptr<TrilinosWrappers::PreconditionJacobi> Mp_preconditioner;
881 std::shared_ptr<TrilinosWrappers::PreconditionJacobi> T_preconditioner;
882
883 bool rebuild_stokes_matrix;
884 bool rebuild_stokes_preconditioner;
885 bool rebuild_temperature_matrices;
886 bool rebuild_temperature_preconditioner;
887
888 // The next member variable, <code>computing_timer</code> is used to
889 // conveniently account for compute time spent in certain "sections" of
890 // the code that are repeatedly entered. For example, we will enter (and
891 // leave) sections for Stokes matrix assembly and would like to accumulate
892 // the run time spent in this section over all time steps. Every so many
893 // time steps as well as at the end of the program (through the destructor
894 // of the TimerOutput class) we will then produce a nice summary of the
895 // times spent in the different sections into which we categorize the
896 // run-time of this program.
897 TimerOutput computing_timer;
898
899 // After these member variables we have a number of auxiliary functions
900 // that have been broken out of the ones listed above. Specifically, there
901 // are first three functions that we call from <code>setup_dofs</code> and
902 // then the ones that do the assembling of linear systems:
903 void setup_stokes_matrix(
904 const std::vector<IndexSet> &stokes_partitioning,
905 const std::vector<IndexSet> &stokes_relevant_partitioning);
906 void setup_stokes_preconditioner(
907 const std::vector<IndexSet> &stokes_partitioning,
908 const std::vector<IndexSet> &stokes_relevant_partitioning);
909 void setup_temperature_matrices(
910 const IndexSet &temperature_partitioning,
911 const IndexSet &temperature_relevant_partitioning);
912
913
914 // Following the @ref MTWorkStream "task-based parallelization" paradigm,
915 // we split all the assembly routines into two parts: a first part that
916 // can do all the calculations on a certain cell without taking care of
917 // other threads, and a second part (which is writing the local data into
918 // the global matrices and vectors) which can be entered by only one
919 // thread at a time. In order to implement that, we provide functions for
920 // each of those two steps for all the four assembly routines that we use
921 // in this program. The following eight functions do exactly this:
922 void local_assemble_stokes_preconditioner(
923 const typename DoFHandler<dim>::active_cell_iterator &cell,
924 Assembly::Scratch::StokesPreconditioner<dim> & scratch,
925 Assembly::CopyData::StokesPreconditioner<dim> & data);
926
927 void copy_local_to_global_stokes_preconditioner(
928 const Assembly::CopyData::StokesPreconditioner<dim> &data);
929
930
931 void local_assemble_stokes_system(
932 const typename DoFHandler<dim>::active_cell_iterator &cell,
933 Assembly::Scratch::StokesSystem<dim> & scratch,
934 Assembly::CopyData::StokesSystem<dim> & data);
935
936 void copy_local_to_global_stokes_system(
937 const Assembly::CopyData::StokesSystem<dim> &data);
938
939
940 void local_assemble_temperature_matrix(
941 const typename DoFHandler<dim>::active_cell_iterator &cell,
942 Assembly::Scratch::TemperatureMatrix<dim> & scratch,
943 Assembly::CopyData::TemperatureMatrix<dim> & data);
944
945 void copy_local_to_global_temperature_matrix(
946 const Assembly::CopyData::TemperatureMatrix<dim> &data);
947
948
949
950 void local_assemble_temperature_rhs(
951 const std::pair<double, double> global_T_range,
952 const double global_max_velocity,
953 const double global_entropy_variation,
954 const typename DoFHandler<dim>::active_cell_iterator &cell,
955 Assembly::Scratch::TemperatureRHS<dim> & scratch,
956 Assembly::CopyData::TemperatureRHS<dim> & data);
957
958 void copy_local_to_global_temperature_rhs(
959 const Assembly::CopyData::TemperatureRHS<dim> &data);
960
961 // Finally, we forward declare a member class that we will define later on
962 // and that will be used to compute a number of quantities from our
963 // solution vectors that we'd like to put into the output files for
964 // visualization.
965 class Postprocessor;
966 };
967
968
969 // @sect3{BoussinesqFlowProblem class implementation}
970
971 // @sect4{BoussinesqFlowProblem::Parameters}
972 //
973 // Here comes the definition of the parameters for the Stokes problem. We
974 // allow to set the end time for the simulation, the level of refinements
975 // (both global and adaptive, which in the sum specify what maximum level
976 // the cells are allowed to have), and the interval between refinements in
977 // the time stepping.
978 //
979 // Then, we let the user specify constants for the stabilization parameters
980 // (as discussed in the introduction), the polynomial degree for the Stokes
981 // velocity space, whether to use the locally conservative discretization
982 // based on FE_DGP elements for the pressure or not (FE_Q elements for
983 // pressure), and the polynomial degree for the temperature interpolation.
984 //
985 // The constructor checks for a valid input file (if not, a file with
986 // default parameters for the quantities is written), and eventually parses
987 // the parameters.
988 template <int dim>
Parameters(const std::string & parameter_filename)989 BoussinesqFlowProblem<dim>::Parameters::Parameters(
990 const std::string ¶meter_filename)
991 : end_time(1e8)
992 , initial_global_refinement(2)
993 , initial_adaptive_refinement(2)
994 , adaptive_refinement_interval(10)
995 , stabilization_alpha(2)
996 , stabilization_c_R(0.11)
997 , stabilization_beta(0.078)
998 , stokes_velocity_degree(2)
999 , use_locally_conservative_discretization(true)
1000 , temperature_degree(2)
1001 {
1002 ParameterHandler prm;
1003 BoussinesqFlowProblem<dim>::Parameters::declare_parameters(prm);
1004
1005 std::ifstream parameter_file(parameter_filename);
1006
1007 if (!parameter_file)
1008 {
1009 parameter_file.close();
1010
1011 std::ofstream parameter_out(parameter_filename);
1012 prm.print_parameters(parameter_out, ParameterHandler::Text);
1013
1014 AssertThrow(
1015 false,
1016 ExcMessage(
1017 "Input parameter file <" + parameter_filename +
1018 "> not found. Creating a template file of the same name."));
1019 }
1020
1021 prm.parse_input(parameter_file);
1022 parse_parameters(prm);
1023 }
1024
1025
1026
1027 // Next we have a function that declares the parameters that we expect in
1028 // the input file, together with their data types, default values and a
1029 // description:
1030 template <int dim>
declare_parameters(ParameterHandler & prm)1031 void BoussinesqFlowProblem<dim>::Parameters::declare_parameters(
1032 ParameterHandler &prm)
1033 {
1034 prm.declare_entry("End time",
1035 "1e8",
1036 Patterns::Double(0),
1037 "The end time of the simulation in years.");
1038 prm.declare_entry("Initial global refinement",
1039 "2",
1040 Patterns::Integer(0),
1041 "The number of global refinement steps performed on "
1042 "the initial coarse mesh, before the problem is first "
1043 "solved there.");
1044 prm.declare_entry("Initial adaptive refinement",
1045 "2",
1046 Patterns::Integer(0),
1047 "The number of adaptive refinement steps performed after "
1048 "initial global refinement.");
1049 prm.declare_entry("Time steps between mesh refinement",
1050 "10",
1051 Patterns::Integer(1),
1052 "The number of time steps after which the mesh is to be "
1053 "adapted based on computed error indicators.");
1054 prm.declare_entry("Generate graphical output",
1055 "false",
1056 Patterns::Bool(),
1057 "Whether graphical output is to be generated or not. "
1058 "You may not want to get graphical output if the number "
1059 "of processors is large.");
1060 prm.declare_entry("Time steps between graphical output",
1061 "50",
1062 Patterns::Integer(1),
1063 "The number of time steps between each generation of "
1064 "graphical output files.");
1065
1066 prm.enter_subsection("Stabilization parameters");
1067 {
1068 prm.declare_entry("alpha",
1069 "2",
1070 Patterns::Double(1, 2),
1071 "The exponent in the entropy viscosity stabilization.");
1072 prm.declare_entry("c_R",
1073 "0.11",
1074 Patterns::Double(0),
1075 "The c_R factor in the entropy viscosity "
1076 "stabilization.");
1077 prm.declare_entry("beta",
1078 "0.078",
1079 Patterns::Double(0),
1080 "The beta factor in the artificial viscosity "
1081 "stabilization. An appropriate value for 2d is 0.052 "
1082 "and 0.078 for 3d.");
1083 }
1084 prm.leave_subsection();
1085
1086 prm.enter_subsection("Discretization");
1087 {
1088 prm.declare_entry(
1089 "Stokes velocity polynomial degree",
1090 "2",
1091 Patterns::Integer(1),
1092 "The polynomial degree to use for the velocity variables "
1093 "in the Stokes system.");
1094 prm.declare_entry(
1095 "Temperature polynomial degree",
1096 "2",
1097 Patterns::Integer(1),
1098 "The polynomial degree to use for the temperature variable.");
1099 prm.declare_entry(
1100 "Use locally conservative discretization",
1101 "true",
1102 Patterns::Bool(),
1103 "Whether to use a Stokes discretization that is locally "
1104 "conservative at the expense of a larger number of degrees "
1105 "of freedom, or to go with a cheaper discretization "
1106 "that does not locally conserve mass (although it is "
1107 "globally conservative.");
1108 }
1109 prm.leave_subsection();
1110 }
1111
1112
1113
1114 // And then we need a function that reads the contents of the
1115 // ParameterHandler object we get by reading the input file and puts the
1116 // results into variables that store the values of the parameters we have
1117 // previously declared:
1118 template <int dim>
parse_parameters(ParameterHandler & prm)1119 void BoussinesqFlowProblem<dim>::Parameters::parse_parameters(
1120 ParameterHandler &prm)
1121 {
1122 end_time = prm.get_double("End time");
1123 initial_global_refinement = prm.get_integer("Initial global refinement");
1124 initial_adaptive_refinement =
1125 prm.get_integer("Initial adaptive refinement");
1126
1127 adaptive_refinement_interval =
1128 prm.get_integer("Time steps between mesh refinement");
1129
1130 generate_graphical_output = prm.get_bool("Generate graphical output");
1131 graphical_output_interval =
1132 prm.get_integer("Time steps between graphical output");
1133
1134 prm.enter_subsection("Stabilization parameters");
1135 {
1136 stabilization_alpha = prm.get_double("alpha");
1137 stabilization_c_R = prm.get_double("c_R");
1138 stabilization_beta = prm.get_double("beta");
1139 }
1140 prm.leave_subsection();
1141
1142 prm.enter_subsection("Discretization");
1143 {
1144 stokes_velocity_degree =
1145 prm.get_integer("Stokes velocity polynomial degree");
1146 temperature_degree = prm.get_integer("Temperature polynomial degree");
1147 use_locally_conservative_discretization =
1148 prm.get_bool("Use locally conservative discretization");
1149 }
1150 prm.leave_subsection();
1151 }
1152
1153
1154
1155 // @sect4{BoussinesqFlowProblem::BoussinesqFlowProblem}
1156 //
1157 // The constructor of the problem is very similar to the constructor in
1158 // step-31. What is different is the %parallel communication: Trilinos uses
1159 // a message passing interface (MPI) for data distribution. When entering
1160 // the BoussinesqFlowProblem class, we have to decide how the parallelization
1161 // is to be done. We choose a rather simple strategy and let all processors
1162 // that are running the program work together, specified by the communicator
1163 // <code>MPI_COMM_WORLD</code>. Next, we create the output stream (as we
1164 // already did in step-18) that only generates output on the first MPI
1165 // process and is completely forgetful on all others. The implementation of
1166 // this idea is to check the process number when <code>pcout</code> gets a
1167 // true argument, and it uses the <code>std::cout</code> stream for
1168 // output. If we are one processor five, for instance, then we will give a
1169 // <code>false</code> argument to <code>pcout</code>, which means that the
1170 // output of that processor will not be printed. With the exception of the
1171 // mapping object (for which we use polynomials of degree 4) all but the
1172 // final member variable are exactly the same as in step-31.
1173 //
1174 // This final object, the TimerOutput object, is then told to restrict
1175 // output to the <code>pcout</code> stream (processor 0), and then we
1176 // specify that we want to get a summary table at the end of the program
1177 // which shows us wallclock times (as opposed to CPU times). We will
1178 // manually also request intermediate summaries every so many time steps in
1179 // the <code>run()</code> function below.
1180 template <int dim>
BoussinesqFlowProblem(Parameters & parameters_)1181 BoussinesqFlowProblem<dim>::BoussinesqFlowProblem(Parameters ¶meters_)
1182 : parameters(parameters_)
1183 , pcout(std::cout, (Utilities::MPI::this_mpi_process(MPI_COMM_WORLD) == 0))
1184 ,
1185
1186 triangulation(MPI_COMM_WORLD,
1187 typename Triangulation<dim>::MeshSmoothing(
1188 Triangulation<dim>::smoothing_on_refinement |
1189 Triangulation<dim>::smoothing_on_coarsening))
1190 ,
1191
1192 global_Omega_diameter(0.)
1193 ,
1194
1195 mapping(4)
1196 ,
1197
1198 stokes_fe(FE_Q<dim>(parameters.stokes_velocity_degree),
1199 dim,
1200 (parameters.use_locally_conservative_discretization ?
1201 static_cast<const FiniteElement<dim> &>(
1202 FE_DGP<dim>(parameters.stokes_velocity_degree - 1)) :
1203 static_cast<const FiniteElement<dim> &>(
1204 FE_Q<dim>(parameters.stokes_velocity_degree - 1))),
1205 1)
1206 ,
1207
1208 stokes_dof_handler(triangulation)
1209 ,
1210
1211 temperature_fe(parameters.temperature_degree)
1212 , temperature_dof_handler(triangulation)
1213 ,
1214
1215 time_step(0)
1216 , old_time_step(0)
1217 , timestep_number(0)
1218 , rebuild_stokes_matrix(true)
1219 , rebuild_stokes_preconditioner(true)
1220 , rebuild_temperature_matrices(true)
1221 , rebuild_temperature_preconditioner(true)
1222 ,
1223
1224 computing_timer(MPI_COMM_WORLD,
1225 pcout,
1226 TimerOutput::summary,
1227 TimerOutput::wall_times)
1228 {}
1229
1230
1231
1232 // @sect4{The BoussinesqFlowProblem helper functions}
1233 // @sect5{BoussinesqFlowProblem::get_maximal_velocity}
1234
1235 // Except for two small details, the function to compute the global maximum
1236 // of the velocity is the same as in step-31. The first detail is actually
1237 // common to all functions that implement loops over all cells in the
1238 // triangulation: When operating in %parallel, each processor can only work
1239 // on a chunk of cells since each processor only has a certain part of the
1240 // entire triangulation. This chunk of cells that we want to work on is
1241 // identified via a so-called <code>subdomain_id</code>, as we also did in
1242 // step-18. All we need to change is hence to perform the cell-related
1243 // operations only on cells that are owned by the current process (as
1244 // opposed to ghost or artificial cells), i.e. for which the subdomain id
1245 // equals the number of the process ID. Since this is a commonly used
1246 // operation, there is a shortcut for this operation: we can ask whether the
1247 // cell is owned by the current processor using
1248 // <code>cell-@>is_locally_owned()</code>.
1249 //
1250 // The second difference is the way we calculate the maximum value. Before,
1251 // we could simply have a <code>double</code> variable that we checked
1252 // against on each quadrature point for each cell. Now, we have to be a bit
1253 // more careful since each processor only operates on a subset of
1254 // cells. What we do is to first let each processor calculate the maximum
1255 // among its cells, and then do a global communication operation
1256 // <code>Utilities::MPI::max</code> that computes the maximum value among
1257 // all the maximum values of the individual processors. MPI provides such a
1258 // call, but it's even simpler to use the respective function in namespace
1259 // Utilities::MPI using the MPI communicator object since that will do the
1260 // right thing even if we work without MPI and on a single machine only. The
1261 // call to <code>Utilities::MPI::max</code> needs two arguments, namely the
1262 // local maximum (input) and the MPI communicator, which is MPI_COMM_WORLD
1263 // in this example.
1264 template <int dim>
get_maximal_velocity() const1265 double BoussinesqFlowProblem<dim>::get_maximal_velocity() const
1266 {
1267 const QIterated<dim> quadrature_formula(QTrapezoid<1>(),
1268 parameters.stokes_velocity_degree);
1269 const unsigned int n_q_points = quadrature_formula.size();
1270
1271 FEValues<dim> fe_values(mapping,
1272 stokes_fe,
1273 quadrature_formula,
1274 update_values);
1275 std::vector<Tensor<1, dim>> velocity_values(n_q_points);
1276
1277 const FEValuesExtractors::Vector velocities(0);
1278
1279 double max_local_velocity = 0;
1280
1281 for (const auto &cell : stokes_dof_handler.active_cell_iterators())
1282 if (cell->is_locally_owned())
1283 {
1284 fe_values.reinit(cell);
1285 fe_values[velocities].get_function_values(stokes_solution,
1286 velocity_values);
1287
1288 for (unsigned int q = 0; q < n_q_points; ++q)
1289 max_local_velocity =
1290 std::max(max_local_velocity, velocity_values[q].norm());
1291 }
1292
1293 return Utilities::MPI::max(max_local_velocity, MPI_COMM_WORLD);
1294 }
1295
1296
1297 // @sect5{BoussinesqFlowProblem::get_cfl_number}
1298
1299 // The next function does something similar, but we now compute the CFL
1300 // number, i.e., maximal velocity on a cell divided by the cell
1301 // diameter. This number is necessary to determine the time step size, as we
1302 // use a semi-explicit time stepping scheme for the temperature equation
1303 // (see step-31 for a discussion). We compute it in the same way as above:
1304 // Compute the local maximum over all locally owned cells, then exchange it
1305 // via MPI to find the global maximum.
1306 template <int dim>
get_cfl_number() const1307 double BoussinesqFlowProblem<dim>::get_cfl_number() const
1308 {
1309 const QIterated<dim> quadrature_formula(QTrapezoid<1>(),
1310 parameters.stokes_velocity_degree);
1311 const unsigned int n_q_points = quadrature_formula.size();
1312
1313 FEValues<dim> fe_values(mapping,
1314 stokes_fe,
1315 quadrature_formula,
1316 update_values);
1317 std::vector<Tensor<1, dim>> velocity_values(n_q_points);
1318
1319 const FEValuesExtractors::Vector velocities(0);
1320
1321 double max_local_cfl = 0;
1322
1323 for (const auto &cell : stokes_dof_handler.active_cell_iterators())
1324 if (cell->is_locally_owned())
1325 {
1326 fe_values.reinit(cell);
1327 fe_values[velocities].get_function_values(stokes_solution,
1328 velocity_values);
1329
1330 double max_local_velocity = 1e-10;
1331 for (unsigned int q = 0; q < n_q_points; ++q)
1332 max_local_velocity =
1333 std::max(max_local_velocity, velocity_values[q].norm());
1334 max_local_cfl =
1335 std::max(max_local_cfl, max_local_velocity / cell->diameter());
1336 }
1337
1338 return Utilities::MPI::max(max_local_cfl, MPI_COMM_WORLD);
1339 }
1340
1341
1342 // @sect5{BoussinesqFlowProblem::get_entropy_variation}
1343
1344 // Next comes the computation of the global entropy variation
1345 // $\|E(T)-\bar{E}(T)\|_\infty$ where the entropy $E$ is defined as
1346 // discussed in the introduction. This is needed for the evaluation of the
1347 // stabilization in the temperature equation as explained in the
1348 // introduction. The entropy variation is actually only needed if we use
1349 // $\alpha=2$ as a power in the residual computation. The infinity norm is
1350 // computed by the maxima over quadrature points, as usual in discrete
1351 // computations.
1352 //
1353 // In order to compute this quantity, we first have to find the
1354 // space-average $\bar{E}(T)$ and then evaluate the maximum. However, that
1355 // means that we would need to perform two loops. We can avoid the overhead
1356 // by noting that $\|E(T)-\bar{E}(T)\|_\infty =
1357 // \max\big(E_{\textrm{max}}(T)-\bar{E}(T),
1358 // \bar{E}(T)-E_{\textrm{min}}(T)\big)$, i.e., the maximum out of the
1359 // deviation from the average entropy in positive and negative
1360 // directions. The four quantities we need for the latter formula (maximum
1361 // entropy, minimum entropy, average entropy, area) can all be evaluated in
1362 // the same loop over all cells, so we choose this simpler variant.
1363 template <int dim>
get_entropy_variation(const double average_temperature) const1364 double BoussinesqFlowProblem<dim>::get_entropy_variation(
1365 const double average_temperature) const
1366 {
1367 if (parameters.stabilization_alpha != 2)
1368 return 1.;
1369
1370 const QGauss<dim> quadrature_formula(parameters.temperature_degree + 1);
1371 const unsigned int n_q_points = quadrature_formula.size();
1372
1373 FEValues<dim> fe_values(temperature_fe,
1374 quadrature_formula,
1375 update_values | update_JxW_values);
1376 std::vector<double> old_temperature_values(n_q_points);
1377 std::vector<double> old_old_temperature_values(n_q_points);
1378
1379 // In the two functions above we computed the maximum of numbers that were
1380 // all non-negative, so we knew that zero was certainly a lower bound. On
1381 // the other hand, here we need to find the maximum deviation from the
1382 // average value, i.e., we will need to know the maximal and minimal
1383 // values of the entropy for which we don't a priori know the sign.
1384 //
1385 // To compute it, we can therefore start with the largest and smallest
1386 // possible values we can store in a double precision number: The minimum
1387 // is initialized with a bigger and the maximum with a smaller number than
1388 // any one that is going to appear. We are then guaranteed that these
1389 // numbers will be overwritten in the loop on the first cell or, if this
1390 // processor does not own any cells, in the communication step at the
1391 // latest. The following loop then computes the minimum and maximum local
1392 // entropy as well as keeps track of the area/volume of the part of the
1393 // domain we locally own and the integral over the entropy on it:
1394 double min_entropy = std::numeric_limits<double>::max(),
1395 max_entropy = -std::numeric_limits<double>::max(), area = 0,
1396 entropy_integrated = 0;
1397
1398 for (const auto &cell : temperature_dof_handler.active_cell_iterators())
1399 if (cell->is_locally_owned())
1400 {
1401 fe_values.reinit(cell);
1402 fe_values.get_function_values(old_temperature_solution,
1403 old_temperature_values);
1404 fe_values.get_function_values(old_old_temperature_solution,
1405 old_old_temperature_values);
1406 for (unsigned int q = 0; q < n_q_points; ++q)
1407 {
1408 const double T =
1409 (old_temperature_values[q] + old_old_temperature_values[q]) / 2;
1410 const double entropy =
1411 ((T - average_temperature) * (T - average_temperature));
1412
1413 min_entropy = std::min(min_entropy, entropy);
1414 max_entropy = std::max(max_entropy, entropy);
1415 area += fe_values.JxW(q);
1416 entropy_integrated += fe_values.JxW(q) * entropy;
1417 }
1418 }
1419
1420 // Now we only need to exchange data between processors: we need to sum
1421 // the two integrals (<code>area</code>, <code>entropy_integrated</code>),
1422 // and get the extrema for maximum and minimum. We could do this through
1423 // four different data exchanges, but we can it with two:
1424 // Utilities::MPI::sum also exists in a variant that takes an array of
1425 // values that are all to be summed up. And we can also utilize the
1426 // Utilities::MPI::max function by realizing that forming the minimum over
1427 // the minimal entropies equals forming the negative of the maximum over
1428 // the negative of the minimal entropies; this maximum can then be
1429 // combined with forming the maximum over the maximal entropies.
1430 const double local_sums[2] = {entropy_integrated, area},
1431 local_maxima[2] = {-min_entropy, max_entropy};
1432 double global_sums[2], global_maxima[2];
1433
1434 Utilities::MPI::sum(local_sums, MPI_COMM_WORLD, global_sums);
1435 Utilities::MPI::max(local_maxima, MPI_COMM_WORLD, global_maxima);
1436
1437 // Having computed everything this way, we can then compute the average
1438 // entropy and find the $L^\infty$ norm by taking the larger of the
1439 // deviation of the maximum or minimum from the average:
1440 const double average_entropy = global_sums[0] / global_sums[1];
1441 const double entropy_diff = std::max(global_maxima[1] - average_entropy,
1442 average_entropy - (-global_maxima[0]));
1443 return entropy_diff;
1444 }
1445
1446
1447
1448 // @sect5{BoussinesqFlowProblem::get_extrapolated_temperature_range}
1449
1450 // The next function computes the minimal and maximal value of the
1451 // extrapolated temperature over the entire domain. Again, this is only a
1452 // slightly modified version of the respective function in step-31. As in
1453 // the function above, we collect local minima and maxima and then compute
1454 // the global extrema using the same trick as above.
1455 //
1456 // As already discussed in step-31, the function needs to distinguish
1457 // between the first and all following time steps because it uses a higher
1458 // order temperature extrapolation scheme when at least two previous time
1459 // steps are available.
1460 template <int dim>
1461 std::pair<double, double>
get_extrapolated_temperature_range() const1462 BoussinesqFlowProblem<dim>::get_extrapolated_temperature_range() const
1463 {
1464 const QIterated<dim> quadrature_formula(QTrapezoid<1>(),
1465 parameters.temperature_degree);
1466 const unsigned int n_q_points = quadrature_formula.size();
1467
1468 FEValues<dim> fe_values(mapping,
1469 temperature_fe,
1470 quadrature_formula,
1471 update_values);
1472 std::vector<double> old_temperature_values(n_q_points);
1473 std::vector<double> old_old_temperature_values(n_q_points);
1474
1475 double min_local_temperature = std::numeric_limits<double>::max(),
1476 max_local_temperature = -std::numeric_limits<double>::max();
1477
1478 if (timestep_number != 0)
1479 {
1480 for (const auto &cell : temperature_dof_handler.active_cell_iterators())
1481 if (cell->is_locally_owned())
1482 {
1483 fe_values.reinit(cell);
1484 fe_values.get_function_values(old_temperature_solution,
1485 old_temperature_values);
1486 fe_values.get_function_values(old_old_temperature_solution,
1487 old_old_temperature_values);
1488
1489 for (unsigned int q = 0; q < n_q_points; ++q)
1490 {
1491 const double temperature =
1492 (1. + time_step / old_time_step) *
1493 old_temperature_values[q] -
1494 time_step / old_time_step * old_old_temperature_values[q];
1495
1496 min_local_temperature =
1497 std::min(min_local_temperature, temperature);
1498 max_local_temperature =
1499 std::max(max_local_temperature, temperature);
1500 }
1501 }
1502 }
1503 else
1504 {
1505 for (const auto &cell : temperature_dof_handler.active_cell_iterators())
1506 if (cell->is_locally_owned())
1507 {
1508 fe_values.reinit(cell);
1509 fe_values.get_function_values(old_temperature_solution,
1510 old_temperature_values);
1511
1512 for (unsigned int q = 0; q < n_q_points; ++q)
1513 {
1514 const double temperature = old_temperature_values[q];
1515
1516 min_local_temperature =
1517 std::min(min_local_temperature, temperature);
1518 max_local_temperature =
1519 std::max(max_local_temperature, temperature);
1520 }
1521 }
1522 }
1523
1524 double local_extrema[2] = {-min_local_temperature, max_local_temperature};
1525 double global_extrema[2];
1526 Utilities::MPI::max(local_extrema, MPI_COMM_WORLD, global_extrema);
1527
1528 return std::make_pair(-global_extrema[0], global_extrema[1]);
1529 }
1530
1531
1532 // @sect5{BoussinesqFlowProblem::compute_viscosity}
1533
1534 // The function that calculates the viscosity is purely local and so needs
1535 // no communication at all. It is mostly the same as in step-31 but with an
1536 // updated formulation of the viscosity if $\alpha=2$ is chosen:
1537 template <int dim>
compute_viscosity(const std::vector<double> & old_temperature,const std::vector<double> & old_old_temperature,const std::vector<Tensor<1,dim>> & old_temperature_grads,const std::vector<Tensor<1,dim>> & old_old_temperature_grads,const std::vector<double> & old_temperature_laplacians,const std::vector<double> & old_old_temperature_laplacians,const std::vector<Tensor<1,dim>> & old_velocity_values,const std::vector<Tensor<1,dim>> & old_old_velocity_values,const std::vector<SymmetricTensor<2,dim>> & old_strain_rates,const std::vector<SymmetricTensor<2,dim>> & old_old_strain_rates,const double global_u_infty,const double global_T_variation,const double average_temperature,const double global_entropy_variation,const double cell_diameter) const1538 double BoussinesqFlowProblem<dim>::compute_viscosity(
1539 const std::vector<double> & old_temperature,
1540 const std::vector<double> & old_old_temperature,
1541 const std::vector<Tensor<1, dim>> & old_temperature_grads,
1542 const std::vector<Tensor<1, dim>> & old_old_temperature_grads,
1543 const std::vector<double> & old_temperature_laplacians,
1544 const std::vector<double> & old_old_temperature_laplacians,
1545 const std::vector<Tensor<1, dim>> & old_velocity_values,
1546 const std::vector<Tensor<1, dim>> & old_old_velocity_values,
1547 const std::vector<SymmetricTensor<2, dim>> &old_strain_rates,
1548 const std::vector<SymmetricTensor<2, dim>> &old_old_strain_rates,
1549 const double global_u_infty,
1550 const double global_T_variation,
1551 const double average_temperature,
1552 const double global_entropy_variation,
1553 const double cell_diameter) const
1554 {
1555 if (global_u_infty == 0)
1556 return 5e-3 * cell_diameter;
1557
1558 const unsigned int n_q_points = old_temperature.size();
1559
1560 double max_residual = 0;
1561 double max_velocity = 0;
1562
1563 for (unsigned int q = 0; q < n_q_points; ++q)
1564 {
1565 const Tensor<1, dim> u =
1566 (old_velocity_values[q] + old_old_velocity_values[q]) / 2;
1567
1568 const SymmetricTensor<2, dim> strain_rate =
1569 (old_strain_rates[q] + old_old_strain_rates[q]) / 2;
1570
1571 const double T = (old_temperature[q] + old_old_temperature[q]) / 2;
1572 const double dT_dt =
1573 (old_temperature[q] - old_old_temperature[q]) / old_time_step;
1574 const double u_grad_T =
1575 u * (old_temperature_grads[q] + old_old_temperature_grads[q]) / 2;
1576
1577 const double kappa_Delta_T =
1578 EquationData::kappa *
1579 (old_temperature_laplacians[q] + old_old_temperature_laplacians[q]) /
1580 2;
1581 const double gamma =
1582 ((EquationData::radiogenic_heating * EquationData::density(T) +
1583 2 * EquationData::eta * strain_rate * strain_rate) /
1584 (EquationData::density(T) * EquationData::specific_heat));
1585
1586 double residual = std::abs(dT_dt + u_grad_T - kappa_Delta_T - gamma);
1587 if (parameters.stabilization_alpha == 2)
1588 residual *= std::abs(T - average_temperature);
1589
1590 max_residual = std::max(residual, max_residual);
1591 max_velocity = std::max(std::sqrt(u * u), max_velocity);
1592 }
1593
1594 const double max_viscosity =
1595 (parameters.stabilization_beta * max_velocity * cell_diameter);
1596 if (timestep_number == 0)
1597 return max_viscosity;
1598 else
1599 {
1600 Assert(old_time_step > 0, ExcInternalError());
1601
1602 double entropy_viscosity;
1603 if (parameters.stabilization_alpha == 2)
1604 entropy_viscosity =
1605 (parameters.stabilization_c_R * cell_diameter * cell_diameter *
1606 max_residual / global_entropy_variation);
1607 else
1608 entropy_viscosity =
1609 (parameters.stabilization_c_R * cell_diameter *
1610 global_Omega_diameter * max_velocity * max_residual /
1611 (global_u_infty * global_T_variation));
1612
1613 return std::min(max_viscosity, entropy_viscosity);
1614 }
1615 }
1616
1617
1618
1619 // @sect4{The BoussinesqFlowProblem setup functions}
1620
1621 // The following three functions set up the Stokes matrix, the matrix used
1622 // for the Stokes preconditioner, and the temperature matrix. The code is
1623 // mostly the same as in step-31, but it has been broken out into three
1624 // functions of their own for simplicity.
1625 //
1626 // The main functional difference between the code here and that in step-31
1627 // is that the matrices we want to set up are distributed across multiple
1628 // processors. Since we still want to build up the sparsity pattern first
1629 // for efficiency reasons, we could continue to build the <i>entire</i>
1630 // sparsity pattern as a BlockDynamicSparsityPattern, as we did in
1631 // step-31. However, that would be inefficient: every processor would build
1632 // the same sparsity pattern, but only initialize a small part of the matrix
1633 // using it. It also violates the principle that every processor should only
1634 // work on those cells it owns (and, if necessary the layer of ghost cells
1635 // around it).
1636 //
1637 // Rather, we use an object of type TrilinosWrappers::BlockSparsityPattern,
1638 // which is (obviously) a wrapper around a sparsity pattern object provided
1639 // by Trilinos. The advantage is that the Trilinos sparsity pattern class
1640 // can communicate across multiple processors: if this processor fills in
1641 // all the nonzero entries that result from the cells it owns, and every
1642 // other processor does so as well, then at the end after some MPI
1643 // communication initiated by the <code>compress()</code> call, we will have
1644 // the globally assembled sparsity pattern available with which the global
1645 // matrix can be initialized.
1646 //
1647 // There is one important aspect when initializing Trilinos sparsity
1648 // patterns in parallel: In addition to specifying the locally owned rows
1649 // and columns of the matrices via the @p stokes_partitioning index set, we
1650 // also supply information about all the rows we are possibly going to write
1651 // into when assembling on a certain processor. The set of locally relevant
1652 // rows contains all such rows (possibly also a few unnecessary ones, but it
1653 // is difficult to find the exact row indices before actually getting
1654 // indices on all cells and resolving constraints). This additional
1655 // information allows to exactly determine the structure for the
1656 // off-processor data found during assembly. While Trilinos matrices are
1657 // able to collect this information on the fly as well (when initializing
1658 // them from some other reinit method), it is less efficient and leads to
1659 // problems when assembling matrices with multiple threads. In this program,
1660 // we pessimistically assume that only one processor at a time can write
1661 // into the matrix while assembly (whereas the computation is parallel),
1662 // which is fine for Trilinos matrices. In practice, one can do better by
1663 // hinting WorkStream at cells that do not share vertices, allowing for
1664 // parallelism among those cells (see the graph coloring algorithms and
1665 // WorkStream with colored iterators argument). However, that only works
1666 // when only one MPI processor is present because Trilinos' internal data
1667 // structures for accumulating off-processor data on the fly are not thread
1668 // safe. With the initialization presented here, there is no such problem
1669 // and one could safely introduce graph coloring for this algorithm.
1670 //
1671 // The only other change we need to make is to tell the
1672 // DoFTools::make_sparsity_pattern() function that it is only supposed to
1673 // work on a subset of cells, namely the ones whose
1674 // <code>subdomain_id</code> equals the number of the current processor, and
1675 // to ignore all other cells.
1676 //
1677 // This strategy is replicated across all three of the following functions.
1678 //
1679 // Note that Trilinos matrices store the information contained in the
1680 // sparsity patterns, so we can safely release the <code>sp</code> variable
1681 // once the matrix has been given the sparsity structure.
1682 template <int dim>
setup_stokes_matrix(const std::vector<IndexSet> & stokes_partitioning,const std::vector<IndexSet> & stokes_relevant_partitioning)1683 void BoussinesqFlowProblem<dim>::setup_stokes_matrix(
1684 const std::vector<IndexSet> &stokes_partitioning,
1685 const std::vector<IndexSet> &stokes_relevant_partitioning)
1686 {
1687 stokes_matrix.clear();
1688
1689 TrilinosWrappers::BlockSparsityPattern sp(stokes_partitioning,
1690 stokes_partitioning,
1691 stokes_relevant_partitioning,
1692 MPI_COMM_WORLD);
1693
1694 Table<2, DoFTools::Coupling> coupling(dim + 1, dim + 1);
1695 for (unsigned int c = 0; c < dim + 1; ++c)
1696 for (unsigned int d = 0; d < dim + 1; ++d)
1697 if (!((c == dim) && (d == dim)))
1698 coupling[c][d] = DoFTools::always;
1699 else
1700 coupling[c][d] = DoFTools::none;
1701
1702 DoFTools::make_sparsity_pattern(stokes_dof_handler,
1703 coupling,
1704 sp,
1705 stokes_constraints,
1706 false,
1707 Utilities::MPI::this_mpi_process(
1708 MPI_COMM_WORLD));
1709 sp.compress();
1710
1711 stokes_matrix.reinit(sp);
1712 }
1713
1714
1715
1716 template <int dim>
setup_stokes_preconditioner(const std::vector<IndexSet> & stokes_partitioning,const std::vector<IndexSet> & stokes_relevant_partitioning)1717 void BoussinesqFlowProblem<dim>::setup_stokes_preconditioner(
1718 const std::vector<IndexSet> &stokes_partitioning,
1719 const std::vector<IndexSet> &stokes_relevant_partitioning)
1720 {
1721 Amg_preconditioner.reset();
1722 Mp_preconditioner.reset();
1723
1724 stokes_preconditioner_matrix.clear();
1725
1726 TrilinosWrappers::BlockSparsityPattern sp(stokes_partitioning,
1727 stokes_partitioning,
1728 stokes_relevant_partitioning,
1729 MPI_COMM_WORLD);
1730
1731 Table<2, DoFTools::Coupling> coupling(dim + 1, dim + 1);
1732 for (unsigned int c = 0; c < dim + 1; ++c)
1733 for (unsigned int d = 0; d < dim + 1; ++d)
1734 if (c == d)
1735 coupling[c][d] = DoFTools::always;
1736 else
1737 coupling[c][d] = DoFTools::none;
1738
1739 DoFTools::make_sparsity_pattern(stokes_dof_handler,
1740 coupling,
1741 sp,
1742 stokes_constraints,
1743 false,
1744 Utilities::MPI::this_mpi_process(
1745 MPI_COMM_WORLD));
1746 sp.compress();
1747
1748 stokes_preconditioner_matrix.reinit(sp);
1749 }
1750
1751
1752 template <int dim>
setup_temperature_matrices(const IndexSet & temperature_partitioner,const IndexSet & temperature_relevant_partitioner)1753 void BoussinesqFlowProblem<dim>::setup_temperature_matrices(
1754 const IndexSet &temperature_partitioner,
1755 const IndexSet &temperature_relevant_partitioner)
1756 {
1757 T_preconditioner.reset();
1758 temperature_mass_matrix.clear();
1759 temperature_stiffness_matrix.clear();
1760 temperature_matrix.clear();
1761
1762 TrilinosWrappers::SparsityPattern sp(temperature_partitioner,
1763 temperature_partitioner,
1764 temperature_relevant_partitioner,
1765 MPI_COMM_WORLD);
1766 DoFTools::make_sparsity_pattern(temperature_dof_handler,
1767 sp,
1768 temperature_constraints,
1769 false,
1770 Utilities::MPI::this_mpi_process(
1771 MPI_COMM_WORLD));
1772 sp.compress();
1773
1774 temperature_matrix.reinit(sp);
1775 temperature_mass_matrix.reinit(sp);
1776 temperature_stiffness_matrix.reinit(sp);
1777 }
1778
1779
1780
1781 // The remainder of the setup function (after splitting out the three
1782 // functions above) mostly has to deal with the things we need to do for
1783 // parallelization across processors. Because setting all of this up is a
1784 // significant compute time expense of the program, we put everything we do
1785 // here into a timer group so that we can get summary information about the
1786 // fraction of time spent in this part of the program at its end.
1787 //
1788 // At the top as usual we enumerate degrees of freedom and sort them by
1789 // component/block, followed by writing their numbers to the screen from
1790 // processor zero. The DoFHandler::distributed_dofs() function, when applied
1791 // to a parallel::distributed::Triangulation object, sorts degrees of
1792 // freedom in such a way that all degrees of freedom associated with
1793 // subdomain zero come before all those associated with subdomain one,
1794 // etc. For the Stokes part, this entails, however, that velocities and
1795 // pressures become intermixed, but this is trivially solved by sorting
1796 // again by blocks; it is worth noting that this latter operation leaves the
1797 // relative ordering of all velocities and pressures alone, i.e. within the
1798 // velocity block we will still have all those associated with subdomain
1799 // zero before all velocities associated with subdomain one, etc. This is
1800 // important since we store each of the blocks of this matrix distributed
1801 // across all processors and want this to be done in such a way that each
1802 // processor stores that part of the matrix that is roughly equal to the
1803 // degrees of freedom located on those cells that it will actually work on.
1804 //
1805 // When printing the numbers of degrees of freedom, note that these numbers
1806 // are going to be large if we use many processors. Consequently, we let the
1807 // stream put a comma separator in between every three digits. The state of
1808 // the stream, using the locale, is saved from before to after this
1809 // operation. While slightly opaque, the code works because the default
1810 // locale (which we get using the constructor call
1811 // <code>std::locale("")</code>) implies printing numbers with a comma
1812 // separator for every third digit (i.e., thousands, millions, billions).
1813 //
1814 // In this function as well as many below, we measure how much time
1815 // we spend here and collect that in a section called "Setup dof
1816 // systems" across function invocations. This is done using an
1817 // TimerOutput::Scope object that gets a timer going in the section
1818 // with above name of the `computing_timer` object upon construction
1819 // of the local variable; the timer is stopped again when the
1820 // destructor of the `timing_section` variable is called. This, of
1821 // course, happens either at the end of the function, or if we leave
1822 // the function through a `return` statement or when an exception is
1823 // thrown somewhere -- in other words, whenever we leave this
1824 // function in any way. The use of such "scope" objects therefore
1825 // makes sure that we do not have to manually add code that tells
1826 // the timer to stop at every location where this function may be
1827 // left.
1828 template <int dim>
setup_dofs()1829 void BoussinesqFlowProblem<dim>::setup_dofs()
1830 {
1831 TimerOutput::Scope timing_section(computing_timer, "Setup dof systems");
1832
1833 stokes_dof_handler.distribute_dofs(stokes_fe);
1834
1835 std::vector<unsigned int> stokes_sub_blocks(dim + 1, 0);
1836 stokes_sub_blocks[dim] = 1;
1837 DoFRenumbering::component_wise(stokes_dof_handler, stokes_sub_blocks);
1838
1839 temperature_dof_handler.distribute_dofs(temperature_fe);
1840
1841 const std::vector<types::global_dof_index> stokes_dofs_per_block =
1842 DoFTools::count_dofs_per_fe_block(stokes_dof_handler, stokes_sub_blocks);
1843
1844 const unsigned int n_u = stokes_dofs_per_block[0],
1845 n_p = stokes_dofs_per_block[1],
1846 n_T = temperature_dof_handler.n_dofs();
1847
1848 std::locale s = pcout.get_stream().getloc();
1849 pcout.get_stream().imbue(std::locale(""));
1850 pcout << "Number of active cells: " << triangulation.n_global_active_cells()
1851 << " (on " << triangulation.n_levels() << " levels)" << std::endl
1852 << "Number of degrees of freedom: " << n_u + n_p + n_T << " (" << n_u
1853 << '+' << n_p << '+' << n_T << ')' << std::endl
1854 << std::endl;
1855 pcout.get_stream().imbue(s);
1856
1857
1858 // After this, we have to set up the various partitioners (of type
1859 // <code>IndexSet</code>, see the introduction) that describe which parts
1860 // of each matrix or vector will be stored where, then call the functions
1861 // that actually set up the matrices, and at the end also resize the
1862 // various vectors we keep around in this program.
1863 std::vector<IndexSet> stokes_partitioning, stokes_relevant_partitioning;
1864 IndexSet temperature_partitioning(n_T),
1865 temperature_relevant_partitioning(n_T);
1866 IndexSet stokes_relevant_set;
1867 {
1868 IndexSet stokes_index_set = stokes_dof_handler.locally_owned_dofs();
1869 stokes_partitioning.push_back(stokes_index_set.get_view(0, n_u));
1870 stokes_partitioning.push_back(stokes_index_set.get_view(n_u, n_u + n_p));
1871
1872 DoFTools::extract_locally_relevant_dofs(stokes_dof_handler,
1873 stokes_relevant_set);
1874 stokes_relevant_partitioning.push_back(
1875 stokes_relevant_set.get_view(0, n_u));
1876 stokes_relevant_partitioning.push_back(
1877 stokes_relevant_set.get_view(n_u, n_u + n_p));
1878
1879 temperature_partitioning = temperature_dof_handler.locally_owned_dofs();
1880 DoFTools::extract_locally_relevant_dofs(
1881 temperature_dof_handler, temperature_relevant_partitioning);
1882 }
1883
1884 // Following this, we can compute constraints for the solution vectors,
1885 // including hanging node constraints and homogeneous and inhomogeneous
1886 // boundary values for the Stokes and temperature fields. Note that as for
1887 // everything else, the constraint objects can not hold <i>all</i>
1888 // constraints on every processor. Rather, each processor needs to store
1889 // only those that are actually necessary for correctness given that it
1890 // only assembles linear systems on cells it owns. As discussed in the
1891 // @ref distributed_paper "this paper", the set of constraints we need to
1892 // know about is exactly the set of constraints on all locally relevant
1893 // degrees of freedom, so this is what we use to initialize the constraint
1894 // objects.
1895 {
1896 stokes_constraints.clear();
1897 stokes_constraints.reinit(stokes_relevant_set);
1898
1899 DoFTools::make_hanging_node_constraints(stokes_dof_handler,
1900 stokes_constraints);
1901
1902 FEValuesExtractors::Vector velocity_components(0);
1903 VectorTools::interpolate_boundary_values(
1904 stokes_dof_handler,
1905 0,
1906 Functions::ZeroFunction<dim>(dim + 1),
1907 stokes_constraints,
1908 stokes_fe.component_mask(velocity_components));
1909
1910 std::set<types::boundary_id> no_normal_flux_boundaries;
1911 no_normal_flux_boundaries.insert(1);
1912 VectorTools::compute_no_normal_flux_constraints(stokes_dof_handler,
1913 0,
1914 no_normal_flux_boundaries,
1915 stokes_constraints,
1916 mapping);
1917 stokes_constraints.close();
1918 }
1919 {
1920 temperature_constraints.clear();
1921 temperature_constraints.reinit(temperature_relevant_partitioning);
1922
1923 DoFTools::make_hanging_node_constraints(temperature_dof_handler,
1924 temperature_constraints);
1925 VectorTools::interpolate_boundary_values(
1926 temperature_dof_handler,
1927 0,
1928 EquationData::TemperatureInitialValues<dim>(),
1929 temperature_constraints);
1930 VectorTools::interpolate_boundary_values(
1931 temperature_dof_handler,
1932 1,
1933 EquationData::TemperatureInitialValues<dim>(),
1934 temperature_constraints);
1935 temperature_constraints.close();
1936 }
1937
1938 // All this done, we can then initialize the various matrix and vector
1939 // objects to their proper sizes. At the end, we also record that all
1940 // matrices and preconditioners have to be re-computed at the beginning of
1941 // the next time step. Note how we initialize the vectors for the Stokes
1942 // and temperature right hand sides: These are writable vectors (last
1943 // boolean argument set to @p true) that have the correct one-to-one
1944 // partitioning of locally owned elements but are still given the relevant
1945 // partitioning for means of figuring out the vector entries that are
1946 // going to be set right away. As for matrices, this allows for writing
1947 // local contributions into the vector with multiple threads (always
1948 // assuming that the same vector entry is not accessed by multiple threads
1949 // at the same time). The other vectors only allow for read access of
1950 // individual elements, including ghosts, but are not suitable for
1951 // solvers.
1952 setup_stokes_matrix(stokes_partitioning, stokes_relevant_partitioning);
1953 setup_stokes_preconditioner(stokes_partitioning,
1954 stokes_relevant_partitioning);
1955 setup_temperature_matrices(temperature_partitioning,
1956 temperature_relevant_partitioning);
1957
1958 stokes_rhs.reinit(stokes_partitioning,
1959 stokes_relevant_partitioning,
1960 MPI_COMM_WORLD,
1961 true);
1962 stokes_solution.reinit(stokes_relevant_partitioning, MPI_COMM_WORLD);
1963 old_stokes_solution.reinit(stokes_solution);
1964
1965 temperature_rhs.reinit(temperature_partitioning,
1966 temperature_relevant_partitioning,
1967 MPI_COMM_WORLD,
1968 true);
1969 temperature_solution.reinit(temperature_relevant_partitioning,
1970 MPI_COMM_WORLD);
1971 old_temperature_solution.reinit(temperature_solution);
1972 old_old_temperature_solution.reinit(temperature_solution);
1973
1974 rebuild_stokes_matrix = true;
1975 rebuild_stokes_preconditioner = true;
1976 rebuild_temperature_matrices = true;
1977 rebuild_temperature_preconditioner = true;
1978 }
1979
1980
1981
1982 // @sect4{The BoussinesqFlowProblem assembly functions}
1983 //
1984 // Following the discussion in the introduction and in the @ref threads
1985 // module, we split the assembly functions into different parts:
1986 //
1987 // <ul> <li> The local calculations of matrices and right hand sides, given
1988 // a certain cell as input (these functions are named
1989 // <code>local_assemble_*</code> below). The resulting function is, in other
1990 // words, essentially the body of the loop over all cells in step-31. Note,
1991 // however, that these functions store the result from the local
1992 // calculations in variables of classes from the CopyData namespace.
1993 //
1994 // <li>These objects are then given to the second step which writes the
1995 // local data into the global data structures (these functions are named
1996 // <code>copy_local_to_global_*</code> below). These functions are pretty
1997 // trivial.
1998 //
1999 // <li>These two subfunctions are then used in the respective assembly
2000 // routine (called <code>assemble_*</code> below), where a WorkStream object
2001 // is set up and runs over all the cells that belong to the processor's
2002 // subdomain. </ul>
2003
2004 // @sect5{Stokes preconditioner assembly}
2005 //
2006 // Let us start with the functions that builds the Stokes
2007 // preconditioner. The first two of these are pretty trivial, given the
2008 // discussion above. Note in particular that the main point in using the
2009 // scratch data object is that we want to avoid allocating any objects on
2010 // the free space each time we visit a new cell. As a consequence, the
2011 // assembly function below only has automatic local variables, and
2012 // everything else is accessed through the scratch data object, which is
2013 // allocated only once before we start the loop over all cells:
2014 template <int dim>
local_assemble_stokes_preconditioner(const typename DoFHandler<dim>::active_cell_iterator & cell,Assembly::Scratch::StokesPreconditioner<dim> & scratch,Assembly::CopyData::StokesPreconditioner<dim> & data)2015 void BoussinesqFlowProblem<dim>::local_assemble_stokes_preconditioner(
2016 const typename DoFHandler<dim>::active_cell_iterator &cell,
2017 Assembly::Scratch::StokesPreconditioner<dim> & scratch,
2018 Assembly::CopyData::StokesPreconditioner<dim> & data)
2019 {
2020 const unsigned int dofs_per_cell = stokes_fe.n_dofs_per_cell();
2021 const unsigned int n_q_points =
2022 scratch.stokes_fe_values.n_quadrature_points;
2023
2024 const FEValuesExtractors::Vector velocities(0);
2025 const FEValuesExtractors::Scalar pressure(dim);
2026
2027 scratch.stokes_fe_values.reinit(cell);
2028 cell->get_dof_indices(data.local_dof_indices);
2029
2030 data.local_matrix = 0;
2031
2032 for (unsigned int q = 0; q < n_q_points; ++q)
2033 {
2034 for (unsigned int k = 0; k < dofs_per_cell; ++k)
2035 {
2036 scratch.grad_phi_u[k] =
2037 scratch.stokes_fe_values[velocities].gradient(k, q);
2038 scratch.phi_p[k] = scratch.stokes_fe_values[pressure].value(k, q);
2039 }
2040
2041 for (unsigned int i = 0; i < dofs_per_cell; ++i)
2042 for (unsigned int j = 0; j < dofs_per_cell; ++j)
2043 data.local_matrix(i, j) +=
2044 (EquationData::eta *
2045 scalar_product(scratch.grad_phi_u[i], scratch.grad_phi_u[j]) +
2046 (1. / EquationData::eta) * EquationData::pressure_scaling *
2047 EquationData::pressure_scaling *
2048 (scratch.phi_p[i] * scratch.phi_p[j])) *
2049 scratch.stokes_fe_values.JxW(q);
2050 }
2051 }
2052
2053
2054
2055 template <int dim>
copy_local_to_global_stokes_preconditioner(const Assembly::CopyData::StokesPreconditioner<dim> & data)2056 void BoussinesqFlowProblem<dim>::copy_local_to_global_stokes_preconditioner(
2057 const Assembly::CopyData::StokesPreconditioner<dim> &data)
2058 {
2059 stokes_constraints.distribute_local_to_global(data.local_matrix,
2060 data.local_dof_indices,
2061 stokes_preconditioner_matrix);
2062 }
2063
2064
2065 // Now for the function that actually puts things together, using the
2066 // WorkStream functions. WorkStream::run needs a start and end iterator to
2067 // enumerate the cells it is supposed to work on. Typically, one would use
2068 // DoFHandler::begin_active() and DoFHandler::end() for that but here we
2069 // actually only want the subset of cells that in fact are owned by the
2070 // current processor. This is where the FilteredIterator class comes into
2071 // play: you give it a range of cells and it provides an iterator that only
2072 // iterates over that subset of cells that satisfy a certain predicate (a
2073 // predicate is a function of one argument that either returns true or
2074 // false). The predicate we use here is IteratorFilters::LocallyOwnedCell,
2075 // i.e., it returns true exactly if the cell is owned by the current
2076 // processor. The resulting iterator range is then exactly what we need.
2077 //
2078 // With this obstacle out of the way, we call the WorkStream::run
2079 // function with this set of cells, scratch and copy objects, and
2080 // with pointers to two functions: the local assembly and
2081 // copy-local-to-global function. These functions need to have very
2082 // specific signatures: three arguments in the first and one
2083 // argument in the latter case (see the documentation of the
2084 // WorkStream::run function for the meaning of these arguments).
2085 // Note how we use a lambda functions to
2086 // create a function object that satisfies this requirement. It uses
2087 // function arguments for the local assembly function that specify
2088 // cell, scratch data, and copy data, as well as function argument
2089 // for the copy function that expects the
2090 // data to be written into the global matrix (also see the discussion in
2091 // step-13's <code>assemble_linear_system()</code> function). On the other
2092 // hand, the implicit zeroth argument of member functions (namely
2093 // the <code>this</code> pointer of the object on which that member
2094 // function is to operate on) is <i>bound</i> to the
2095 // <code>this</code> pointer of the current function and is captured. The
2096 // WorkStream::run function, as a consequence, does not need to know
2097 // anything about the object these functions work on.
2098 //
2099 // When the WorkStream is executed, it will create several local assembly
2100 // routines of the first kind for several cells and let some available
2101 // processors work on them. The function that needs to be synchronized,
2102 // i.e., the write operation into the global matrix, however, is executed by
2103 // only one thread at a time in the prescribed order. Of course, this only
2104 // holds for the parallelization on a single MPI process. Different MPI
2105 // processes will have their own WorkStream objects and do that work
2106 // completely independently (and in different memory spaces). In a
2107 // distributed calculation, some data will accumulate at degrees of freedom
2108 // that are not owned by the respective processor. It would be inefficient
2109 // to send data around every time we encounter such a dof. What happens
2110 // instead is that the Trilinos sparse matrix will keep that data and send
2111 // it to the owner at the end of assembly, by calling the
2112 // <code>compress()</code> command.
2113 template <int dim>
assemble_stokes_preconditioner()2114 void BoussinesqFlowProblem<dim>::assemble_stokes_preconditioner()
2115 {
2116 stokes_preconditioner_matrix = 0;
2117
2118 const QGauss<dim> quadrature_formula(parameters.stokes_velocity_degree + 1);
2119
2120 using CellFilter =
2121 FilteredIterator<typename DoFHandler<2>::active_cell_iterator>;
2122
2123 auto worker =
2124 [this](const typename DoFHandler<dim>::active_cell_iterator &cell,
2125 Assembly::Scratch::StokesPreconditioner<dim> & scratch,
2126 Assembly::CopyData::StokesPreconditioner<dim> & data) {
2127 this->local_assemble_stokes_preconditioner(cell, scratch, data);
2128 };
2129
2130 auto copier =
2131 [this](const Assembly::CopyData::StokesPreconditioner<dim> &data) {
2132 this->copy_local_to_global_stokes_preconditioner(data);
2133 };
2134
2135 WorkStream::run(CellFilter(IteratorFilters::LocallyOwnedCell(),
2136 stokes_dof_handler.begin_active()),
2137 CellFilter(IteratorFilters::LocallyOwnedCell(),
2138 stokes_dof_handler.end()),
2139 worker,
2140 copier,
2141 Assembly::Scratch::StokesPreconditioner<dim>(
2142 stokes_fe,
2143 quadrature_formula,
2144 mapping,
2145 update_JxW_values | update_values | update_gradients),
2146 Assembly::CopyData::StokesPreconditioner<dim>(stokes_fe));
2147
2148 stokes_preconditioner_matrix.compress(VectorOperation::add);
2149 }
2150
2151
2152
2153 // The final function in this block initiates assembly of the Stokes
2154 // preconditioner matrix and then in fact builds the Stokes
2155 // preconditioner. It is mostly the same as in the serial case. The only
2156 // difference to step-31 is that we use a Jacobi preconditioner for the
2157 // pressure mass matrix instead of IC, as discussed in the introduction.
2158 template <int dim>
build_stokes_preconditioner()2159 void BoussinesqFlowProblem<dim>::build_stokes_preconditioner()
2160 {
2161 if (rebuild_stokes_preconditioner == false)
2162 return;
2163
2164 TimerOutput::Scope timer_section(computing_timer,
2165 " Build Stokes preconditioner");
2166 pcout << " Rebuilding Stokes preconditioner..." << std::flush;
2167
2168 assemble_stokes_preconditioner();
2169
2170 std::vector<std::vector<bool>> constant_modes;
2171 FEValuesExtractors::Vector velocity_components(0);
2172 DoFTools::extract_constant_modes(stokes_dof_handler,
2173 stokes_fe.component_mask(
2174 velocity_components),
2175 constant_modes);
2176
2177 Mp_preconditioner =
2178 std::make_shared<TrilinosWrappers::PreconditionJacobi>();
2179 Amg_preconditioner = std::make_shared<TrilinosWrappers::PreconditionAMG>();
2180
2181 TrilinosWrappers::PreconditionAMG::AdditionalData Amg_data;
2182 Amg_data.constant_modes = constant_modes;
2183 Amg_data.elliptic = true;
2184 Amg_data.higher_order_elements = true;
2185 Amg_data.smoother_sweeps = 2;
2186 Amg_data.aggregation_threshold = 0.02;
2187
2188 Mp_preconditioner->initialize(stokes_preconditioner_matrix.block(1, 1));
2189 Amg_preconditioner->initialize(stokes_preconditioner_matrix.block(0, 0),
2190 Amg_data);
2191
2192 rebuild_stokes_preconditioner = false;
2193
2194 pcout << std::endl;
2195 }
2196
2197
2198 // @sect5{Stokes system assembly}
2199
2200 // The next three functions implement the assembly of the Stokes system,
2201 // again split up into a part performing local calculations, one for writing
2202 // the local data into the global matrix and vector, and one for actually
2203 // running the loop over all cells with the help of the WorkStream
2204 // class. Note that the assembly of the Stokes matrix needs only to be done
2205 // in case we have changed the mesh. Otherwise, just the
2206 // (temperature-dependent) right hand side needs to be calculated
2207 // here. Since we are working with distributed matrices and vectors, we have
2208 // to call the respective <code>compress()</code> functions in the end of
2209 // the assembly in order to send non-local data to the owner process.
2210 template <int dim>
local_assemble_stokes_system(const typename DoFHandler<dim>::active_cell_iterator & cell,Assembly::Scratch::StokesSystem<dim> & scratch,Assembly::CopyData::StokesSystem<dim> & data)2211 void BoussinesqFlowProblem<dim>::local_assemble_stokes_system(
2212 const typename DoFHandler<dim>::active_cell_iterator &cell,
2213 Assembly::Scratch::StokesSystem<dim> & scratch,
2214 Assembly::CopyData::StokesSystem<dim> & data)
2215 {
2216 const unsigned int dofs_per_cell =
2217 scratch.stokes_fe_values.get_fe().n_dofs_per_cell();
2218 const unsigned int n_q_points =
2219 scratch.stokes_fe_values.n_quadrature_points;
2220
2221 const FEValuesExtractors::Vector velocities(0);
2222 const FEValuesExtractors::Scalar pressure(dim);
2223
2224 scratch.stokes_fe_values.reinit(cell);
2225
2226 typename DoFHandler<dim>::active_cell_iterator temperature_cell(
2227 &triangulation, cell->level(), cell->index(), &temperature_dof_handler);
2228 scratch.temperature_fe_values.reinit(temperature_cell);
2229
2230 if (rebuild_stokes_matrix)
2231 data.local_matrix = 0;
2232 data.local_rhs = 0;
2233
2234 scratch.temperature_fe_values.get_function_values(
2235 old_temperature_solution, scratch.old_temperature_values);
2236
2237 for (unsigned int q = 0; q < n_q_points; ++q)
2238 {
2239 const double old_temperature = scratch.old_temperature_values[q];
2240
2241 for (unsigned int k = 0; k < dofs_per_cell; ++k)
2242 {
2243 scratch.phi_u[k] = scratch.stokes_fe_values[velocities].value(k, q);
2244 if (rebuild_stokes_matrix)
2245 {
2246 scratch.grads_phi_u[k] =
2247 scratch.stokes_fe_values[velocities].symmetric_gradient(k, q);
2248 scratch.div_phi_u[k] =
2249 scratch.stokes_fe_values[velocities].divergence(k, q);
2250 scratch.phi_p[k] =
2251 scratch.stokes_fe_values[pressure].value(k, q);
2252 }
2253 }
2254
2255 if (rebuild_stokes_matrix == true)
2256 for (unsigned int i = 0; i < dofs_per_cell; ++i)
2257 for (unsigned int j = 0; j < dofs_per_cell; ++j)
2258 data.local_matrix(i, j) +=
2259 (EquationData::eta * 2 *
2260 (scratch.grads_phi_u[i] * scratch.grads_phi_u[j]) -
2261 (EquationData::pressure_scaling * scratch.div_phi_u[i] *
2262 scratch.phi_p[j]) -
2263 (EquationData::pressure_scaling * scratch.phi_p[i] *
2264 scratch.div_phi_u[j])) *
2265 scratch.stokes_fe_values.JxW(q);
2266
2267 const Tensor<1, dim> gravity = EquationData::gravity_vector(
2268 scratch.stokes_fe_values.quadrature_point(q));
2269
2270 for (unsigned int i = 0; i < dofs_per_cell; ++i)
2271 data.local_rhs(i) += (EquationData::density(old_temperature) *
2272 gravity * scratch.phi_u[i]) *
2273 scratch.stokes_fe_values.JxW(q);
2274 }
2275
2276 cell->get_dof_indices(data.local_dof_indices);
2277 }
2278
2279
2280
2281 template <int dim>
copy_local_to_global_stokes_system(const Assembly::CopyData::StokesSystem<dim> & data)2282 void BoussinesqFlowProblem<dim>::copy_local_to_global_stokes_system(
2283 const Assembly::CopyData::StokesSystem<dim> &data)
2284 {
2285 if (rebuild_stokes_matrix == true)
2286 stokes_constraints.distribute_local_to_global(data.local_matrix,
2287 data.local_rhs,
2288 data.local_dof_indices,
2289 stokes_matrix,
2290 stokes_rhs);
2291 else
2292 stokes_constraints.distribute_local_to_global(data.local_rhs,
2293 data.local_dof_indices,
2294 stokes_rhs);
2295 }
2296
2297
2298
2299 template <int dim>
assemble_stokes_system()2300 void BoussinesqFlowProblem<dim>::assemble_stokes_system()
2301 {
2302 TimerOutput::Scope timer_section(computing_timer,
2303 " Assemble Stokes system");
2304
2305 if (rebuild_stokes_matrix == true)
2306 stokes_matrix = 0;
2307
2308 stokes_rhs = 0;
2309
2310 const QGauss<dim> quadrature_formula(parameters.stokes_velocity_degree + 1);
2311
2312 using CellFilter =
2313 FilteredIterator<typename DoFHandler<2>::active_cell_iterator>;
2314
2315 WorkStream::run(
2316 CellFilter(IteratorFilters::LocallyOwnedCell(),
2317 stokes_dof_handler.begin_active()),
2318 CellFilter(IteratorFilters::LocallyOwnedCell(), stokes_dof_handler.end()),
2319 [this](const typename DoFHandler<dim>::active_cell_iterator &cell,
2320 Assembly::Scratch::StokesSystem<dim> & scratch,
2321 Assembly::CopyData::StokesSystem<dim> & data) {
2322 this->local_assemble_stokes_system(cell, scratch, data);
2323 },
2324 [this](const Assembly::CopyData::StokesSystem<dim> &data) {
2325 this->copy_local_to_global_stokes_system(data);
2326 },
2327 Assembly::Scratch::StokesSystem<dim>(
2328 stokes_fe,
2329 mapping,
2330 quadrature_formula,
2331 (update_values | update_quadrature_points | update_JxW_values |
2332 (rebuild_stokes_matrix == true ? update_gradients : UpdateFlags(0))),
2333 temperature_fe,
2334 update_values),
2335 Assembly::CopyData::StokesSystem<dim>(stokes_fe));
2336
2337 if (rebuild_stokes_matrix == true)
2338 stokes_matrix.compress(VectorOperation::add);
2339 stokes_rhs.compress(VectorOperation::add);
2340
2341 rebuild_stokes_matrix = false;
2342
2343 pcout << std::endl;
2344 }
2345
2346
2347 // @sect5{Temperature matrix assembly}
2348
2349 // The task to be performed by the next three functions is to calculate a
2350 // mass matrix and a Laplace matrix on the temperature system. These will be
2351 // combined in order to yield the semi-implicit time stepping matrix that
2352 // consists of the mass matrix plus a time step-dependent weight factor
2353 // times the Laplace matrix. This function is again essentially the body of
2354 // the loop over all cells from step-31.
2355 //
2356 // The two following functions perform similar services as the ones above.
2357 template <int dim>
local_assemble_temperature_matrix(const typename DoFHandler<dim>::active_cell_iterator & cell,Assembly::Scratch::TemperatureMatrix<dim> & scratch,Assembly::CopyData::TemperatureMatrix<dim> & data)2358 void BoussinesqFlowProblem<dim>::local_assemble_temperature_matrix(
2359 const typename DoFHandler<dim>::active_cell_iterator &cell,
2360 Assembly::Scratch::TemperatureMatrix<dim> & scratch,
2361 Assembly::CopyData::TemperatureMatrix<dim> & data)
2362 {
2363 const unsigned int dofs_per_cell =
2364 scratch.temperature_fe_values.get_fe().n_dofs_per_cell();
2365 const unsigned int n_q_points =
2366 scratch.temperature_fe_values.n_quadrature_points;
2367
2368 scratch.temperature_fe_values.reinit(cell);
2369 cell->get_dof_indices(data.local_dof_indices);
2370
2371 data.local_mass_matrix = 0;
2372 data.local_stiffness_matrix = 0;
2373
2374 for (unsigned int q = 0; q < n_q_points; ++q)
2375 {
2376 for (unsigned int k = 0; k < dofs_per_cell; ++k)
2377 {
2378 scratch.grad_phi_T[k] =
2379 scratch.temperature_fe_values.shape_grad(k, q);
2380 scratch.phi_T[k] = scratch.temperature_fe_values.shape_value(k, q);
2381 }
2382
2383 for (unsigned int i = 0; i < dofs_per_cell; ++i)
2384 for (unsigned int j = 0; j < dofs_per_cell; ++j)
2385 {
2386 data.local_mass_matrix(i, j) +=
2387 (scratch.phi_T[i] * scratch.phi_T[j] *
2388 scratch.temperature_fe_values.JxW(q));
2389 data.local_stiffness_matrix(i, j) +=
2390 (EquationData::kappa * scratch.grad_phi_T[i] *
2391 scratch.grad_phi_T[j] * scratch.temperature_fe_values.JxW(q));
2392 }
2393 }
2394 }
2395
2396
2397
2398 template <int dim>
copy_local_to_global_temperature_matrix(const Assembly::CopyData::TemperatureMatrix<dim> & data)2399 void BoussinesqFlowProblem<dim>::copy_local_to_global_temperature_matrix(
2400 const Assembly::CopyData::TemperatureMatrix<dim> &data)
2401 {
2402 temperature_constraints.distribute_local_to_global(data.local_mass_matrix,
2403 data.local_dof_indices,
2404 temperature_mass_matrix);
2405 temperature_constraints.distribute_local_to_global(
2406 data.local_stiffness_matrix,
2407 data.local_dof_indices,
2408 temperature_stiffness_matrix);
2409 }
2410
2411
2412 template <int dim>
assemble_temperature_matrix()2413 void BoussinesqFlowProblem<dim>::assemble_temperature_matrix()
2414 {
2415 if (rebuild_temperature_matrices == false)
2416 return;
2417
2418 TimerOutput::Scope timer_section(computing_timer,
2419 " Assemble temperature matrices");
2420 temperature_mass_matrix = 0;
2421 temperature_stiffness_matrix = 0;
2422
2423 const QGauss<dim> quadrature_formula(parameters.temperature_degree + 2);
2424
2425 using CellFilter =
2426 FilteredIterator<typename DoFHandler<2>::active_cell_iterator>;
2427
2428 WorkStream::run(
2429 CellFilter(IteratorFilters::LocallyOwnedCell(),
2430 temperature_dof_handler.begin_active()),
2431 CellFilter(IteratorFilters::LocallyOwnedCell(),
2432 temperature_dof_handler.end()),
2433 [this](const typename DoFHandler<dim>::active_cell_iterator &cell,
2434 Assembly::Scratch::TemperatureMatrix<dim> & scratch,
2435 Assembly::CopyData::TemperatureMatrix<dim> & data) {
2436 this->local_assemble_temperature_matrix(cell, scratch, data);
2437 },
2438 [this](const Assembly::CopyData::TemperatureMatrix<dim> &data) {
2439 this->copy_local_to_global_temperature_matrix(data);
2440 },
2441 Assembly::Scratch::TemperatureMatrix<dim>(temperature_fe,
2442 mapping,
2443 quadrature_formula),
2444 Assembly::CopyData::TemperatureMatrix<dim>(temperature_fe));
2445
2446 temperature_mass_matrix.compress(VectorOperation::add);
2447 temperature_stiffness_matrix.compress(VectorOperation::add);
2448
2449 rebuild_temperature_matrices = false;
2450 rebuild_temperature_preconditioner = true;
2451 }
2452
2453
2454 // @sect5{Temperature right hand side assembly}
2455
2456 // This is the last assembly function. It calculates the right hand side of
2457 // the temperature system, which includes the convection and the
2458 // stabilization terms. It includes a lot of evaluations of old solutions at
2459 // the quadrature points (which are necessary for calculating the artificial
2460 // viscosity of stabilization), but is otherwise similar to the other
2461 // assembly functions. Notice, once again, how we resolve the dilemma of
2462 // having inhomogeneous boundary conditions, by just making a right hand
2463 // side at this point (compare the comments for the <code>project()</code>
2464 // function above): We create some matrix columns with exactly the values
2465 // that would be entered for the temperature stiffness matrix, in case we
2466 // have inhomogeneously constrained dofs. That will account for the correct
2467 // balance of the right hand side vector with the matrix system of
2468 // temperature.
2469 template <int dim>
local_assemble_temperature_rhs(const std::pair<double,double> global_T_range,const double global_max_velocity,const double global_entropy_variation,const typename DoFHandler<dim>::active_cell_iterator & cell,Assembly::Scratch::TemperatureRHS<dim> & scratch,Assembly::CopyData::TemperatureRHS<dim> & data)2470 void BoussinesqFlowProblem<dim>::local_assemble_temperature_rhs(
2471 const std::pair<double, double> global_T_range,
2472 const double global_max_velocity,
2473 const double global_entropy_variation,
2474 const typename DoFHandler<dim>::active_cell_iterator &cell,
2475 Assembly::Scratch::TemperatureRHS<dim> & scratch,
2476 Assembly::CopyData::TemperatureRHS<dim> & data)
2477 {
2478 const bool use_bdf2_scheme = (timestep_number != 0);
2479
2480 const unsigned int dofs_per_cell =
2481 scratch.temperature_fe_values.get_fe().n_dofs_per_cell();
2482 const unsigned int n_q_points =
2483 scratch.temperature_fe_values.n_quadrature_points;
2484
2485 const FEValuesExtractors::Vector velocities(0);
2486
2487 data.local_rhs = 0;
2488 data.matrix_for_bc = 0;
2489 cell->get_dof_indices(data.local_dof_indices);
2490
2491 scratch.temperature_fe_values.reinit(cell);
2492
2493 typename DoFHandler<dim>::active_cell_iterator stokes_cell(
2494 &triangulation, cell->level(), cell->index(), &stokes_dof_handler);
2495 scratch.stokes_fe_values.reinit(stokes_cell);
2496
2497 scratch.temperature_fe_values.get_function_values(
2498 old_temperature_solution, scratch.old_temperature_values);
2499 scratch.temperature_fe_values.get_function_values(
2500 old_old_temperature_solution, scratch.old_old_temperature_values);
2501
2502 scratch.temperature_fe_values.get_function_gradients(
2503 old_temperature_solution, scratch.old_temperature_grads);
2504 scratch.temperature_fe_values.get_function_gradients(
2505 old_old_temperature_solution, scratch.old_old_temperature_grads);
2506
2507 scratch.temperature_fe_values.get_function_laplacians(
2508 old_temperature_solution, scratch.old_temperature_laplacians);
2509 scratch.temperature_fe_values.get_function_laplacians(
2510 old_old_temperature_solution, scratch.old_old_temperature_laplacians);
2511
2512 scratch.stokes_fe_values[velocities].get_function_values(
2513 stokes_solution, scratch.old_velocity_values);
2514 scratch.stokes_fe_values[velocities].get_function_values(
2515 old_stokes_solution, scratch.old_old_velocity_values);
2516 scratch.stokes_fe_values[velocities].get_function_symmetric_gradients(
2517 stokes_solution, scratch.old_strain_rates);
2518 scratch.stokes_fe_values[velocities].get_function_symmetric_gradients(
2519 old_stokes_solution, scratch.old_old_strain_rates);
2520
2521 const double nu =
2522 compute_viscosity(scratch.old_temperature_values,
2523 scratch.old_old_temperature_values,
2524 scratch.old_temperature_grads,
2525 scratch.old_old_temperature_grads,
2526 scratch.old_temperature_laplacians,
2527 scratch.old_old_temperature_laplacians,
2528 scratch.old_velocity_values,
2529 scratch.old_old_velocity_values,
2530 scratch.old_strain_rates,
2531 scratch.old_old_strain_rates,
2532 global_max_velocity,
2533 global_T_range.second - global_T_range.first,
2534 0.5 * (global_T_range.second + global_T_range.first),
2535 global_entropy_variation,
2536 cell->diameter());
2537
2538 for (unsigned int q = 0; q < n_q_points; ++q)
2539 {
2540 for (unsigned int k = 0; k < dofs_per_cell; ++k)
2541 {
2542 scratch.phi_T[k] = scratch.temperature_fe_values.shape_value(k, q);
2543 scratch.grad_phi_T[k] =
2544 scratch.temperature_fe_values.shape_grad(k, q);
2545 }
2546
2547
2548 const double T_term_for_rhs =
2549 (use_bdf2_scheme ?
2550 (scratch.old_temperature_values[q] *
2551 (1 + time_step / old_time_step) -
2552 scratch.old_old_temperature_values[q] * (time_step * time_step) /
2553 (old_time_step * (time_step + old_time_step))) :
2554 scratch.old_temperature_values[q]);
2555
2556 const double ext_T =
2557 (use_bdf2_scheme ? (scratch.old_temperature_values[q] *
2558 (1 + time_step / old_time_step) -
2559 scratch.old_old_temperature_values[q] *
2560 time_step / old_time_step) :
2561 scratch.old_temperature_values[q]);
2562
2563 const Tensor<1, dim> ext_grad_T =
2564 (use_bdf2_scheme ? (scratch.old_temperature_grads[q] *
2565 (1 + time_step / old_time_step) -
2566 scratch.old_old_temperature_grads[q] * time_step /
2567 old_time_step) :
2568 scratch.old_temperature_grads[q]);
2569
2570 const Tensor<1, dim> extrapolated_u =
2571 (use_bdf2_scheme ?
2572 (scratch.old_velocity_values[q] * (1 + time_step / old_time_step) -
2573 scratch.old_old_velocity_values[q] * time_step / old_time_step) :
2574 scratch.old_velocity_values[q]);
2575
2576 const SymmetricTensor<2, dim> extrapolated_strain_rate =
2577 (use_bdf2_scheme ?
2578 (scratch.old_strain_rates[q] * (1 + time_step / old_time_step) -
2579 scratch.old_old_strain_rates[q] * time_step / old_time_step) :
2580 scratch.old_strain_rates[q]);
2581
2582 const double gamma =
2583 ((EquationData::radiogenic_heating * EquationData::density(ext_T) +
2584 2 * EquationData::eta * extrapolated_strain_rate *
2585 extrapolated_strain_rate) /
2586 (EquationData::density(ext_T) * EquationData::specific_heat));
2587
2588 for (unsigned int i = 0; i < dofs_per_cell; ++i)
2589 {
2590 data.local_rhs(i) +=
2591 (T_term_for_rhs * scratch.phi_T[i] -
2592 time_step * extrapolated_u * ext_grad_T * scratch.phi_T[i] -
2593 time_step * nu * ext_grad_T * scratch.grad_phi_T[i] +
2594 time_step * gamma * scratch.phi_T[i]) *
2595 scratch.temperature_fe_values.JxW(q);
2596
2597 if (temperature_constraints.is_inhomogeneously_constrained(
2598 data.local_dof_indices[i]))
2599 {
2600 for (unsigned int j = 0; j < dofs_per_cell; ++j)
2601 data.matrix_for_bc(j, i) +=
2602 (scratch.phi_T[i] * scratch.phi_T[j] *
2603 (use_bdf2_scheme ? ((2 * time_step + old_time_step) /
2604 (time_step + old_time_step)) :
2605 1.) +
2606 scratch.grad_phi_T[i] * scratch.grad_phi_T[j] *
2607 EquationData::kappa * time_step) *
2608 scratch.temperature_fe_values.JxW(q);
2609 }
2610 }
2611 }
2612 }
2613
2614
2615 template <int dim>
copy_local_to_global_temperature_rhs(const Assembly::CopyData::TemperatureRHS<dim> & data)2616 void BoussinesqFlowProblem<dim>::copy_local_to_global_temperature_rhs(
2617 const Assembly::CopyData::TemperatureRHS<dim> &data)
2618 {
2619 temperature_constraints.distribute_local_to_global(data.local_rhs,
2620 data.local_dof_indices,
2621 temperature_rhs,
2622 data.matrix_for_bc);
2623 }
2624
2625
2626
2627 // In the function that runs the WorkStream for actually calculating the
2628 // right hand side, we also generate the final matrix. As mentioned above,
2629 // it is a sum of the mass matrix and the Laplace matrix, times some time
2630 // step-dependent weight. This weight is specified by the BDF-2 time
2631 // integration scheme, see the introduction in step-31. What is new in this
2632 // tutorial program (in addition to the use of MPI parallelization and the
2633 // WorkStream class), is that we now precompute the temperature
2634 // preconditioner as well. The reason is that the setup of the Jacobi
2635 // preconditioner takes a noticeable time compared to the solver because we
2636 // usually only need between 10 and 20 iterations for solving the
2637 // temperature system (this might sound strange, as Jacobi really only
2638 // consists of a diagonal, but in Trilinos it is derived from more general
2639 // framework for point relaxation preconditioners which is a bit
2640 // inefficient). Hence, it is more efficient to precompute the
2641 // preconditioner, even though the matrix entries may slightly change
2642 // because the time step might change. This is not too big a problem because
2643 // we remesh every few time steps (and regenerate the preconditioner then).
2644 template <int dim>
assemble_temperature_system(const double maximal_velocity)2645 void BoussinesqFlowProblem<dim>::assemble_temperature_system(
2646 const double maximal_velocity)
2647 {
2648 const bool use_bdf2_scheme = (timestep_number != 0);
2649
2650 if (use_bdf2_scheme == true)
2651 {
2652 temperature_matrix.copy_from(temperature_mass_matrix);
2653 temperature_matrix *=
2654 (2 * time_step + old_time_step) / (time_step + old_time_step);
2655 temperature_matrix.add(time_step, temperature_stiffness_matrix);
2656 }
2657 else
2658 {
2659 temperature_matrix.copy_from(temperature_mass_matrix);
2660 temperature_matrix.add(time_step, temperature_stiffness_matrix);
2661 }
2662
2663 if (rebuild_temperature_preconditioner == true)
2664 {
2665 T_preconditioner =
2666 std::make_shared<TrilinosWrappers::PreconditionJacobi>();
2667 T_preconditioner->initialize(temperature_matrix);
2668 rebuild_temperature_preconditioner = false;
2669 }
2670
2671 // The next part is computing the right hand side vectors. To do so, we
2672 // first compute the average temperature $T_m$ that we use for evaluating
2673 // the artificial viscosity stabilization through the residual $E(T) =
2674 // (T-T_m)^2$. We do this by defining the midpoint between maximum and
2675 // minimum temperature as average temperature in the definition of the
2676 // entropy viscosity. An alternative would be to use the integral average,
2677 // but the results are not very sensitive to this choice. The rest then
2678 // only requires calling WorkStream::run again, binding the arguments to
2679 // the <code>local_assemble_temperature_rhs</code> function that are the
2680 // same in every call to the correct values:
2681 temperature_rhs = 0;
2682
2683 const QGauss<dim> quadrature_formula(parameters.temperature_degree + 2);
2684 const std::pair<double, double> global_T_range =
2685 get_extrapolated_temperature_range();
2686
2687 const double average_temperature =
2688 0.5 * (global_T_range.first + global_T_range.second);
2689 const double global_entropy_variation =
2690 get_entropy_variation(average_temperature);
2691
2692 using CellFilter =
2693 FilteredIterator<typename DoFHandler<2>::active_cell_iterator>;
2694
2695 auto worker =
2696 [this, global_T_range, maximal_velocity, global_entropy_variation](
2697 const typename DoFHandler<dim>::active_cell_iterator &cell,
2698 Assembly::Scratch::TemperatureRHS<dim> & scratch,
2699 Assembly::CopyData::TemperatureRHS<dim> & data) {
2700 this->local_assemble_temperature_rhs(global_T_range,
2701 maximal_velocity,
2702 global_entropy_variation,
2703 cell,
2704 scratch,
2705 data);
2706 };
2707
2708 auto copier = [this](const Assembly::CopyData::TemperatureRHS<dim> &data) {
2709 this->copy_local_to_global_temperature_rhs(data);
2710 };
2711
2712 WorkStream::run(CellFilter(IteratorFilters::LocallyOwnedCell(),
2713 temperature_dof_handler.begin_active()),
2714 CellFilter(IteratorFilters::LocallyOwnedCell(),
2715 temperature_dof_handler.end()),
2716 worker,
2717 copier,
2718 Assembly::Scratch::TemperatureRHS<dim>(
2719 temperature_fe, stokes_fe, mapping, quadrature_formula),
2720 Assembly::CopyData::TemperatureRHS<dim>(temperature_fe));
2721
2722 temperature_rhs.compress(VectorOperation::add);
2723 }
2724
2725
2726
2727 // @sect4{BoussinesqFlowProblem::solve}
2728
2729 // This function solves the linear systems in each time step of the
2730 // Boussinesq problem. First, we work on the Stokes system and then on the
2731 // temperature system. In essence, it does the same things as the respective
2732 // function in step-31. However, there are a few changes here.
2733 //
2734 // The first change is related to the way we store our solution: we keep the
2735 // vectors with locally owned degrees of freedom plus ghost nodes on each
2736 // MPI node. When we enter a solver which is supposed to perform
2737 // matrix-vector products with a distributed matrix, this is not the
2738 // appropriate form, though. There, we will want to have the solution vector
2739 // to be distributed in the same way as the matrix, i.e. without any
2740 // ghosts. So what we do first is to generate a distributed vector called
2741 // <code>distributed_stokes_solution</code> and put only the locally owned
2742 // dofs into that, which is neatly done by the <code>operator=</code> of the
2743 // Trilinos vector.
2744 //
2745 // Next, we scale the pressure solution (or rather, the initial guess) for
2746 // the solver so that it matches with the length scales in the matrices, as
2747 // discussed in the introduction. We also immediately scale the pressure
2748 // solution back to the correct units after the solution is completed. We
2749 // also need to set the pressure values at hanging nodes to zero. This we
2750 // also did in step-31 in order not to disturb the Schur complement by some
2751 // vector entries that actually are irrelevant during the solve stage. As a
2752 // difference to step-31, here we do it only for the locally owned pressure
2753 // dofs. After solving for the Stokes solution, each processor copies the
2754 // distributed solution back into the solution vector that also includes
2755 // ghost elements.
2756 //
2757 // The third and most obvious change is that we have two variants for the
2758 // Stokes solver: A fast solver that sometimes breaks down, and a robust
2759 // solver that is slower. This is what we already discussed in the
2760 // introduction. Here is how we realize it: First, we perform 30 iterations
2761 // with the fast solver based on the simple preconditioner based on the AMG
2762 // V-cycle instead of an approximate solve (this is indicated by the
2763 // <code>false</code> argument to the
2764 // <code>LinearSolvers::BlockSchurPreconditioner</code> object). If we
2765 // converge, everything is fine. If we do not converge, the solver control
2766 // object will throw an exception SolverControl::NoConvergence. Usually,
2767 // this would abort the program because we don't catch them in our usual
2768 // <code>solve()</code> functions. This is certainly not what we want to
2769 // happen here. Rather, we want to switch to the strong solver and continue
2770 // the solution process with whatever vector we got so far. Hence, we catch
2771 // the exception with the C++ try/catch mechanism. We then simply go through
2772 // the same solver sequence again in the <code>catch</code> clause, this
2773 // time passing the @p true flag to the preconditioner for the strong
2774 // solver, signaling an approximate CG solve.
2775 template <int dim>
solve()2776 void BoussinesqFlowProblem<dim>::solve()
2777 {
2778 {
2779 TimerOutput::Scope timer_section(computing_timer,
2780 " Solve Stokes system");
2781
2782 pcout << " Solving Stokes system... " << std::flush;
2783
2784 TrilinosWrappers::MPI::BlockVector distributed_stokes_solution(
2785 stokes_rhs);
2786 distributed_stokes_solution = stokes_solution;
2787
2788 distributed_stokes_solution.block(1) /= EquationData::pressure_scaling;
2789
2790 const unsigned int
2791 start = (distributed_stokes_solution.block(0).size() +
2792 distributed_stokes_solution.block(1).local_range().first),
2793 end = (distributed_stokes_solution.block(0).size() +
2794 distributed_stokes_solution.block(1).local_range().second);
2795 for (unsigned int i = start; i < end; ++i)
2796 if (stokes_constraints.is_constrained(i))
2797 distributed_stokes_solution(i) = 0;
2798
2799
2800 PrimitiveVectorMemory<TrilinosWrappers::MPI::BlockVector> mem;
2801
2802 unsigned int n_iterations = 0;
2803 const double solver_tolerance = 1e-8 * stokes_rhs.l2_norm();
2804 SolverControl solver_control(30, solver_tolerance);
2805
2806 try
2807 {
2808 const LinearSolvers::BlockSchurPreconditioner<
2809 TrilinosWrappers::PreconditionAMG,
2810 TrilinosWrappers::PreconditionJacobi>
2811 preconditioner(stokes_matrix,
2812 stokes_preconditioner_matrix,
2813 *Mp_preconditioner,
2814 *Amg_preconditioner,
2815 false);
2816
2817 SolverFGMRES<TrilinosWrappers::MPI::BlockVector> solver(
2818 solver_control,
2819 mem,
2820 SolverFGMRES<TrilinosWrappers::MPI::BlockVector>::AdditionalData(
2821 30));
2822 solver.solve(stokes_matrix,
2823 distributed_stokes_solution,
2824 stokes_rhs,
2825 preconditioner);
2826
2827 n_iterations = solver_control.last_step();
2828 }
2829
2830 catch (SolverControl::NoConvergence &)
2831 {
2832 const LinearSolvers::BlockSchurPreconditioner<
2833 TrilinosWrappers::PreconditionAMG,
2834 TrilinosWrappers::PreconditionJacobi>
2835 preconditioner(stokes_matrix,
2836 stokes_preconditioner_matrix,
2837 *Mp_preconditioner,
2838 *Amg_preconditioner,
2839 true);
2840
2841 SolverControl solver_control_refined(stokes_matrix.m(),
2842 solver_tolerance);
2843 SolverFGMRES<TrilinosWrappers::MPI::BlockVector> solver(
2844 solver_control_refined,
2845 mem,
2846 SolverFGMRES<TrilinosWrappers::MPI::BlockVector>::AdditionalData(
2847 50));
2848 solver.solve(stokes_matrix,
2849 distributed_stokes_solution,
2850 stokes_rhs,
2851 preconditioner);
2852
2853 n_iterations =
2854 (solver_control.last_step() + solver_control_refined.last_step());
2855 }
2856
2857
2858 stokes_constraints.distribute(distributed_stokes_solution);
2859
2860 distributed_stokes_solution.block(1) *= EquationData::pressure_scaling;
2861
2862 stokes_solution = distributed_stokes_solution;
2863 pcout << n_iterations << " iterations." << std::endl;
2864 }
2865
2866
2867 // Now let's turn to the temperature part: First, we compute the time step
2868 // size. We found that we need smaller time steps for 3D than for 2D for
2869 // the shell geometry. This is because the cells are more distorted in
2870 // that case (it is the smallest edge length that determines the CFL
2871 // number). Instead of computing the time step from maximum velocity and
2872 // minimal mesh size as in step-31, we compute local CFL numbers, i.e., on
2873 // each cell we compute the maximum velocity times the mesh size, and
2874 // compute the maximum of them. Hence, we need to choose the factor in
2875 // front of the time step slightly smaller.
2876 //
2877 // After temperature right hand side assembly, we solve the linear system
2878 // for temperature (with fully distributed vectors without any ghosts),
2879 // apply constraints and copy the vector back to one with ghosts.
2880 //
2881 // In the end, we extract the temperature range similarly to step-31 to
2882 // produce some output (for example in order to help us choose the
2883 // stabilization constants, as discussed in the introduction). The only
2884 // difference is that we need to exchange maxima over all processors.
2885 {
2886 TimerOutput::Scope timer_section(computing_timer,
2887 " Assemble temperature rhs");
2888
2889 old_time_step = time_step;
2890
2891 const double scaling = (dim == 3 ? 0.25 : 1.0);
2892 time_step = (scaling / (2.1 * dim * std::sqrt(1. * dim)) /
2893 (parameters.temperature_degree * get_cfl_number()));
2894
2895 const double maximal_velocity = get_maximal_velocity();
2896 pcout << " Maximal velocity: "
2897 << maximal_velocity * EquationData::year_in_seconds * 100
2898 << " cm/year" << std::endl;
2899 pcout << " "
2900 << "Time step: " << time_step / EquationData::year_in_seconds
2901 << " years" << std::endl;
2902
2903 temperature_solution = old_temperature_solution;
2904 assemble_temperature_system(maximal_velocity);
2905 }
2906
2907 {
2908 TimerOutput::Scope timer_section(computing_timer,
2909 " Solve temperature system");
2910
2911 SolverControl solver_control(temperature_matrix.m(),
2912 1e-12 * temperature_rhs.l2_norm());
2913 SolverCG<TrilinosWrappers::MPI::Vector> cg(solver_control);
2914
2915 TrilinosWrappers::MPI::Vector distributed_temperature_solution(
2916 temperature_rhs);
2917 distributed_temperature_solution = temperature_solution;
2918
2919 cg.solve(temperature_matrix,
2920 distributed_temperature_solution,
2921 temperature_rhs,
2922 *T_preconditioner);
2923
2924 temperature_constraints.distribute(distributed_temperature_solution);
2925 temperature_solution = distributed_temperature_solution;
2926
2927 pcout << " " << solver_control.last_step()
2928 << " CG iterations for temperature" << std::endl;
2929
2930 double temperature[2] = {std::numeric_limits<double>::max(),
2931 -std::numeric_limits<double>::max()};
2932 double global_temperature[2];
2933
2934 for (unsigned int i =
2935 distributed_temperature_solution.local_range().first;
2936 i < distributed_temperature_solution.local_range().second;
2937 ++i)
2938 {
2939 temperature[0] =
2940 std::min<double>(temperature[0],
2941 distributed_temperature_solution(i));
2942 temperature[1] =
2943 std::max<double>(temperature[1],
2944 distributed_temperature_solution(i));
2945 }
2946
2947 temperature[0] *= -1.0;
2948 Utilities::MPI::max(temperature, MPI_COMM_WORLD, global_temperature);
2949 global_temperature[0] *= -1.0;
2950
2951 pcout << " Temperature range: " << global_temperature[0] << ' '
2952 << global_temperature[1] << std::endl;
2953 }
2954 }
2955
2956
2957 // @sect4{BoussinesqFlowProblem::output_results}
2958
2959 // Next comes the function that generates the output. The quantities to
2960 // output could be introduced manually like we did in step-31. An
2961 // alternative is to hand this task over to a class PostProcessor that
2962 // inherits from the class DataPostprocessor, which can be attached to
2963 // DataOut. This allows us to output derived quantities from the solution,
2964 // like the friction heating included in this example. It overloads the
2965 // virtual function DataPostprocessor::evaluate_vector_field(),
2966 // which is then internally called from DataOut::build_patches(). We have to
2967 // give it values of the numerical solution, its derivatives, normals to the
2968 // cell, the actual evaluation points and any additional quantities. This
2969 // follows the same procedure as discussed in step-29 and other programs.
2970 template <int dim>
2971 class BoussinesqFlowProblem<dim>::Postprocessor
2972 : public DataPostprocessor<dim>
2973 {
2974 public:
2975 Postprocessor(const unsigned int partition, const double minimal_pressure);
2976
2977 virtual void evaluate_vector_field(
2978 const DataPostprocessorInputs::Vector<dim> &inputs,
2979 std::vector<Vector<double>> &computed_quantities) const override;
2980
2981 virtual std::vector<std::string> get_names() const override;
2982
2983 virtual std::vector<
2984 DataComponentInterpretation::DataComponentInterpretation>
2985 get_data_component_interpretation() const override;
2986
2987 virtual UpdateFlags get_needed_update_flags() const override;
2988
2989 private:
2990 const unsigned int partition;
2991 const double minimal_pressure;
2992 };
2993
2994
2995 template <int dim>
Postprocessor(const unsigned int partition,const double minimal_pressure)2996 BoussinesqFlowProblem<dim>::Postprocessor::Postprocessor(
2997 const unsigned int partition,
2998 const double minimal_pressure)
2999 : partition(partition)
3000 , minimal_pressure(minimal_pressure)
3001 {}
3002
3003
3004 // Here we define the names for the variables we want to output. These are
3005 // the actual solution values for velocity, pressure, and temperature, as
3006 // well as the friction heating and to each cell the number of the processor
3007 // that owns it. This allows us to visualize the partitioning of the domain
3008 // among the processors. Except for the velocity, which is vector-valued,
3009 // all other quantities are scalar.
3010 template <int dim>
3011 std::vector<std::string>
get_names() const3012 BoussinesqFlowProblem<dim>::Postprocessor::get_names() const
3013 {
3014 std::vector<std::string> solution_names(dim, "velocity");
3015 solution_names.emplace_back("p");
3016 solution_names.emplace_back("T");
3017 solution_names.emplace_back("friction_heating");
3018 solution_names.emplace_back("partition");
3019
3020 return solution_names;
3021 }
3022
3023
3024 template <int dim>
3025 std::vector<DataComponentInterpretation::DataComponentInterpretation>
get_data_component_interpretation() const3026 BoussinesqFlowProblem<dim>::Postprocessor::get_data_component_interpretation()
3027 const
3028 {
3029 std::vector<DataComponentInterpretation::DataComponentInterpretation>
3030 interpretation(dim,
3031 DataComponentInterpretation::component_is_part_of_vector);
3032
3033 interpretation.push_back(DataComponentInterpretation::component_is_scalar);
3034 interpretation.push_back(DataComponentInterpretation::component_is_scalar);
3035 interpretation.push_back(DataComponentInterpretation::component_is_scalar);
3036 interpretation.push_back(DataComponentInterpretation::component_is_scalar);
3037
3038 return interpretation;
3039 }
3040
3041
3042 template <int dim>
3043 UpdateFlags
get_needed_update_flags() const3044 BoussinesqFlowProblem<dim>::Postprocessor::get_needed_update_flags() const
3045 {
3046 return update_values | update_gradients | update_quadrature_points;
3047 }
3048
3049
3050 // Now we implement the function that computes the derived quantities. As we
3051 // also did for the output, we rescale the velocity from its SI units to
3052 // something more readable, namely cm/year. Next, the pressure is scaled to
3053 // be between 0 and the maximum pressure. This makes it more easily
3054 // comparable -- in essence making all pressure variables positive or
3055 // zero. Temperature is taken as is, and the friction heating is computed as
3056 // $2 \eta \varepsilon(\mathbf{u}) \cdot \varepsilon(\mathbf{u})$.
3057 //
3058 // The quantities we output here are more for illustration, rather than for
3059 // actual scientific value. We come back to this briefly in the results
3060 // section of this program and explain what one may in fact be interested in.
3061 template <int dim>
evaluate_vector_field(const DataPostprocessorInputs::Vector<dim> & inputs,std::vector<Vector<double>> & computed_quantities) const3062 void BoussinesqFlowProblem<dim>::Postprocessor::evaluate_vector_field(
3063 const DataPostprocessorInputs::Vector<dim> &inputs,
3064 std::vector<Vector<double>> & computed_quantities) const
3065 {
3066 const unsigned int n_quadrature_points = inputs.solution_values.size();
3067 Assert(inputs.solution_gradients.size() == n_quadrature_points,
3068 ExcInternalError());
3069 Assert(computed_quantities.size() == n_quadrature_points,
3070 ExcInternalError());
3071 Assert(inputs.solution_values[0].size() == dim + 2, ExcInternalError());
3072
3073 for (unsigned int q = 0; q < n_quadrature_points; ++q)
3074 {
3075 for (unsigned int d = 0; d < dim; ++d)
3076 computed_quantities[q](d) = (inputs.solution_values[q](d) *
3077 EquationData::year_in_seconds * 100);
3078
3079 const double pressure =
3080 (inputs.solution_values[q](dim) - minimal_pressure);
3081 computed_quantities[q](dim) = pressure;
3082
3083 const double temperature = inputs.solution_values[q](dim + 1);
3084 computed_quantities[q](dim + 1) = temperature;
3085
3086 Tensor<2, dim> grad_u;
3087 for (unsigned int d = 0; d < dim; ++d)
3088 grad_u[d] = inputs.solution_gradients[q][d];
3089 const SymmetricTensor<2, dim> strain_rate = symmetrize(grad_u);
3090 computed_quantities[q](dim + 2) =
3091 2 * EquationData::eta * strain_rate * strain_rate;
3092
3093 computed_quantities[q](dim + 3) = partition;
3094 }
3095 }
3096
3097
3098 // The <code>output_results()</code> function has a similar task to the one
3099 // in step-31. However, here we are going to demonstrate a different
3100 // technique on how to merge output from different DoFHandler objects. The
3101 // way we're going to achieve this recombination is to create a joint
3102 // DoFHandler that collects both components, the Stokes solution and the
3103 // temperature solution. This can be nicely done by combining the finite
3104 // elements from the two systems to form one FESystem, and let this
3105 // collective system define a new DoFHandler object. To be sure that
3106 // everything was done correctly, we perform a sanity check that ensures
3107 // that we got all the dofs from both Stokes and temperature even in the
3108 // combined system. We then combine the data vectors. Unfortunately, there
3109 // is no straight-forward relation that tells us how to sort Stokes and
3110 // temperature vector into the joint vector. The way we can get around this
3111 // trouble is to rely on the information collected in the FESystem. For each
3112 // dof on a cell, the joint finite element knows to which equation component
3113 // (velocity component, pressure, or temperature) it belongs – that's the
3114 // information we need! So we step through all cells (with iterators into
3115 // all three DoFHandlers moving in sync), and for each joint cell dof, we
3116 // read out that component using the FiniteElement::system_to_base_index
3117 // function (see there for a description of what the various parts of its
3118 // return value contain). We also need to keep track whether we're on a
3119 // Stokes dof or a temperature dof, which is contained in
3120 // joint_fe.system_to_base_index(i).first.first. Eventually, the dof_indices
3121 // data structures on either of the three systems tell us how the relation
3122 // between global vector and local dofs looks like on the present cell,
3123 // which concludes this tedious work. We make sure that each processor only
3124 // works on the subdomain it owns locally (and not on ghost or artificial
3125 // cells) when building the joint solution vector. The same will then have
3126 // to be done in DataOut::build_patches(), but that function does so
3127 // automatically.
3128 //
3129 // What we end up with is a set of patches that we can write using the
3130 // functions in DataOutBase in a variety of output formats. Here, we then
3131 // have to pay attention that what each processor writes is really only its
3132 // own part of the domain, i.e. we will want to write each processor's
3133 // contribution into a separate file. This we do by adding an additional
3134 // number to the filename when we write the solution. This is not really
3135 // new, we did it similarly in step-40. Note that we write in the compressed
3136 // format @p .vtu instead of plain vtk files, which saves quite some
3137 // storage.
3138 //
3139 // All the rest of the work is done in the PostProcessor class.
3140 template <int dim>
output_results()3141 void BoussinesqFlowProblem<dim>::output_results()
3142 {
3143 TimerOutput::Scope timer_section(computing_timer, "Postprocessing");
3144
3145 const FESystem<dim> joint_fe(stokes_fe, 1, temperature_fe, 1);
3146
3147 DoFHandler<dim> joint_dof_handler(triangulation);
3148 joint_dof_handler.distribute_dofs(joint_fe);
3149 Assert(joint_dof_handler.n_dofs() ==
3150 stokes_dof_handler.n_dofs() + temperature_dof_handler.n_dofs(),
3151 ExcInternalError());
3152
3153 TrilinosWrappers::MPI::Vector joint_solution;
3154 joint_solution.reinit(joint_dof_handler.locally_owned_dofs(),
3155 MPI_COMM_WORLD);
3156
3157 {
3158 std::vector<types::global_dof_index> local_joint_dof_indices(
3159 joint_fe.n_dofs_per_cell());
3160 std::vector<types::global_dof_index> local_stokes_dof_indices(
3161 stokes_fe.n_dofs_per_cell());
3162 std::vector<types::global_dof_index> local_temperature_dof_indices(
3163 temperature_fe.n_dofs_per_cell());
3164
3165 typename DoFHandler<dim>::active_cell_iterator
3166 joint_cell = joint_dof_handler.begin_active(),
3167 joint_endc = joint_dof_handler.end(),
3168 stokes_cell = stokes_dof_handler.begin_active(),
3169 temperature_cell = temperature_dof_handler.begin_active();
3170 for (; joint_cell != joint_endc;
3171 ++joint_cell, ++stokes_cell, ++temperature_cell)
3172 if (joint_cell->is_locally_owned())
3173 {
3174 joint_cell->get_dof_indices(local_joint_dof_indices);
3175 stokes_cell->get_dof_indices(local_stokes_dof_indices);
3176 temperature_cell->get_dof_indices(local_temperature_dof_indices);
3177
3178 for (unsigned int i = 0; i < joint_fe.n_dofs_per_cell(); ++i)
3179 if (joint_fe.system_to_base_index(i).first.first == 0)
3180 {
3181 Assert(joint_fe.system_to_base_index(i).second <
3182 local_stokes_dof_indices.size(),
3183 ExcInternalError());
3184
3185 joint_solution(local_joint_dof_indices[i]) = stokes_solution(
3186 local_stokes_dof_indices[joint_fe.system_to_base_index(i)
3187 .second]);
3188 }
3189 else
3190 {
3191 Assert(joint_fe.system_to_base_index(i).first.first == 1,
3192 ExcInternalError());
3193 Assert(joint_fe.system_to_base_index(i).second <
3194 local_temperature_dof_indices.size(),
3195 ExcInternalError());
3196 joint_solution(local_joint_dof_indices[i]) =
3197 temperature_solution(
3198 local_temperature_dof_indices
3199 [joint_fe.system_to_base_index(i).second]);
3200 }
3201 }
3202 }
3203
3204 joint_solution.compress(VectorOperation::insert);
3205
3206 IndexSet locally_relevant_joint_dofs(joint_dof_handler.n_dofs());
3207 DoFTools::extract_locally_relevant_dofs(joint_dof_handler,
3208 locally_relevant_joint_dofs);
3209 TrilinosWrappers::MPI::Vector locally_relevant_joint_solution;
3210 locally_relevant_joint_solution.reinit(locally_relevant_joint_dofs,
3211 MPI_COMM_WORLD);
3212 locally_relevant_joint_solution = joint_solution;
3213
3214 Postprocessor postprocessor(Utilities::MPI::this_mpi_process(
3215 MPI_COMM_WORLD),
3216 stokes_solution.block(1).min());
3217
3218 DataOut<dim> data_out;
3219 data_out.attach_dof_handler(joint_dof_handler);
3220 data_out.add_data_vector(locally_relevant_joint_solution, postprocessor);
3221 data_out.build_patches();
3222
3223 static int out_index = 0;
3224 data_out.write_vtu_with_pvtu_record(
3225 "./", "solution", out_index, MPI_COMM_WORLD, 5);
3226
3227 out_index++;
3228 }
3229
3230
3231
3232 // @sect4{BoussinesqFlowProblem::refine_mesh}
3233
3234 // This function isn't really new either. Since the <code>setup_dofs</code>
3235 // function that we call in the middle has its own timer section, we split
3236 // timing this function into two sections. It will also allow us to easily
3237 // identify which of the two is more expensive.
3238 //
3239 // One thing of note, however, is that we only want to compute error
3240 // indicators on the locally owned subdomain. In order to achieve this, we
3241 // pass one additional argument to the KellyErrorEstimator::estimate
3242 // function. Note that the vector for error estimates is resized to the
3243 // number of active cells present on the current process, which is less than
3244 // the total number of active cells on all processors (but more than the
3245 // number of locally owned active cells); each processor only has a few
3246 // coarse cells around the locally owned ones, as also explained in step-40.
3247 //
3248 // The local error estimates are then handed to a %parallel version of
3249 // GridRefinement (in namespace parallel::distributed::GridRefinement, see
3250 // also step-40) which looks at the errors and finds the cells that need
3251 // refinement by comparing the error values across processors. As in
3252 // step-31, we want to limit the maximum grid level. So in case some cells
3253 // have been marked that are already at the finest level, we simply clear
3254 // the refine flags.
3255 template <int dim>
3256 void
refine_mesh(const unsigned int max_grid_level)3257 BoussinesqFlowProblem<dim>::refine_mesh(const unsigned int max_grid_level)
3258 {
3259 parallel::distributed::SolutionTransfer<dim, TrilinosWrappers::MPI::Vector>
3260 temperature_trans(temperature_dof_handler);
3261 parallel::distributed::SolutionTransfer<dim,
3262 TrilinosWrappers::MPI::BlockVector>
3263 stokes_trans(stokes_dof_handler);
3264
3265 {
3266 TimerOutput::Scope timer_section(computing_timer,
3267 "Refine mesh structure, part 1");
3268
3269 Vector<float> estimated_error_per_cell(triangulation.n_active_cells());
3270
3271 KellyErrorEstimator<dim>::estimate(
3272 temperature_dof_handler,
3273 QGauss<dim - 1>(parameters.temperature_degree + 1),
3274 std::map<types::boundary_id, const Function<dim> *>(),
3275 temperature_solution,
3276 estimated_error_per_cell,
3277 ComponentMask(),
3278 nullptr,
3279 0,
3280 triangulation.locally_owned_subdomain());
3281
3282 parallel::distributed::GridRefinement::refine_and_coarsen_fixed_fraction(
3283 triangulation, estimated_error_per_cell, 0.3, 0.1);
3284
3285 if (triangulation.n_levels() > max_grid_level)
3286 for (typename Triangulation<dim>::active_cell_iterator cell =
3287 triangulation.begin_active(max_grid_level);
3288 cell != triangulation.end();
3289 ++cell)
3290 cell->clear_refine_flag();
3291
3292 // With all flags marked as necessary, we can then tell the
3293 // parallel::distributed::SolutionTransfer objects to get ready to
3294 // transfer data from one mesh to the next, which they will do when
3295 // notified by
3296 // Triangulation as part of the @p execute_coarsening_and_refinement() call.
3297 // The syntax is similar to the non-%parallel solution transfer (with the
3298 // exception that here a pointer to the vector entries is enough). The
3299 // remainder of the function further down below is then concerned with
3300 // setting up the data structures again after mesh refinement and
3301 // restoring the solution vectors on the new mesh.
3302 std::vector<const TrilinosWrappers::MPI::Vector *> x_temperature(2);
3303 x_temperature[0] = &temperature_solution;
3304 x_temperature[1] = &old_temperature_solution;
3305 std::vector<const TrilinosWrappers::MPI::BlockVector *> x_stokes(2);
3306 x_stokes[0] = &stokes_solution;
3307 x_stokes[1] = &old_stokes_solution;
3308
3309 triangulation.prepare_coarsening_and_refinement();
3310
3311 temperature_trans.prepare_for_coarsening_and_refinement(x_temperature);
3312 stokes_trans.prepare_for_coarsening_and_refinement(x_stokes);
3313
3314 triangulation.execute_coarsening_and_refinement();
3315 }
3316
3317 setup_dofs();
3318
3319 {
3320 TimerOutput::Scope timer_section(computing_timer,
3321 "Refine mesh structure, part 2");
3322
3323 {
3324 TrilinosWrappers::MPI::Vector distributed_temp1(temperature_rhs);
3325 TrilinosWrappers::MPI::Vector distributed_temp2(temperature_rhs);
3326
3327 std::vector<TrilinosWrappers::MPI::Vector *> tmp(2);
3328 tmp[0] = &(distributed_temp1);
3329 tmp[1] = &(distributed_temp2);
3330 temperature_trans.interpolate(tmp);
3331
3332 // enforce constraints to make the interpolated solution conforming on
3333 // the new mesh:
3334 temperature_constraints.distribute(distributed_temp1);
3335 temperature_constraints.distribute(distributed_temp2);
3336
3337 temperature_solution = distributed_temp1;
3338 old_temperature_solution = distributed_temp2;
3339 }
3340
3341 {
3342 TrilinosWrappers::MPI::BlockVector distributed_stokes(stokes_rhs);
3343 TrilinosWrappers::MPI::BlockVector old_distributed_stokes(stokes_rhs);
3344
3345 std::vector<TrilinosWrappers::MPI::BlockVector *> stokes_tmp(2);
3346 stokes_tmp[0] = &(distributed_stokes);
3347 stokes_tmp[1] = &(old_distributed_stokes);
3348
3349 stokes_trans.interpolate(stokes_tmp);
3350
3351 // enforce constraints to make the interpolated solution conforming on
3352 // the new mesh:
3353 stokes_constraints.distribute(distributed_stokes);
3354 stokes_constraints.distribute(old_distributed_stokes);
3355
3356 stokes_solution = distributed_stokes;
3357 old_stokes_solution = old_distributed_stokes;
3358 }
3359 }
3360 }
3361
3362
3363
3364 // @sect4{BoussinesqFlowProblem::run}
3365
3366 // This is the final and controlling function in this class. It, in fact,
3367 // runs the entire rest of the program and is, once more, very similar to
3368 // step-31. The only substantial difference is that we use a different mesh
3369 // now (a GridGenerator::hyper_shell instead of a simple cube geometry).
3370 template <int dim>
run()3371 void BoussinesqFlowProblem<dim>::run()
3372 {
3373 GridGenerator::hyper_shell(triangulation,
3374 Point<dim>(),
3375 EquationData::R0,
3376 EquationData::R1,
3377 (dim == 3) ? 96 : 12,
3378 true);
3379
3380 global_Omega_diameter = GridTools::diameter(triangulation);
3381
3382 triangulation.refine_global(parameters.initial_global_refinement);
3383
3384 setup_dofs();
3385
3386 unsigned int pre_refinement_step = 0;
3387
3388 start_time_iteration:
3389
3390 {
3391 TrilinosWrappers::MPI::Vector solution(
3392 temperature_dof_handler.locally_owned_dofs());
3393 // VectorTools::project supports parallel vector classes with most
3394 // standard finite elements via deal.II's own native MatrixFree framework:
3395 // since we use standard Lagrange elements of moderate order this function
3396 // works well here.
3397 VectorTools::project(temperature_dof_handler,
3398 temperature_constraints,
3399 QGauss<dim>(parameters.temperature_degree + 2),
3400 EquationData::TemperatureInitialValues<dim>(),
3401 solution);
3402 // Having so computed the current temperature field, let us set the member
3403 // variable that holds the temperature nodes. Strictly speaking, we really
3404 // only need to set <code>old_temperature_solution</code> since the first
3405 // thing we will do is to compute the Stokes solution that only requires
3406 // the previous time step's temperature field. That said, nothing good can
3407 // come from not initializing the other vectors as well (especially since
3408 // it's a relatively cheap operation and we only have to do it once at the
3409 // beginning of the program) if we ever want to extend our numerical
3410 // method or physical model, and so we initialize
3411 // <code>old_temperature_solution</code> and
3412 // <code>old_old_temperature_solution</code> as well. The assignment makes
3413 // sure that the vectors on the left hand side (which where initialized to
3414 // contain ghost elements as well) also get the correct ghost elements. In
3415 // other words, the assignment here requires communication between
3416 // processors:
3417 temperature_solution = solution;
3418 old_temperature_solution = solution;
3419 old_old_temperature_solution = solution;
3420 }
3421
3422 timestep_number = 0;
3423 time_step = old_time_step = 0;
3424
3425 double time = 0;
3426
3427 do
3428 {
3429 pcout << "Timestep " << timestep_number
3430 << ": t=" << time / EquationData::year_in_seconds << " years"
3431 << std::endl;
3432
3433 assemble_stokes_system();
3434 build_stokes_preconditioner();
3435 assemble_temperature_matrix();
3436
3437 solve();
3438
3439 pcout << std::endl;
3440
3441 if ((timestep_number == 0) &&
3442 (pre_refinement_step < parameters.initial_adaptive_refinement))
3443 {
3444 refine_mesh(parameters.initial_global_refinement +
3445 parameters.initial_adaptive_refinement);
3446 ++pre_refinement_step;
3447 goto start_time_iteration;
3448 }
3449 else if ((timestep_number > 0) &&
3450 (timestep_number % parameters.adaptive_refinement_interval ==
3451 0))
3452 refine_mesh(parameters.initial_global_refinement +
3453 parameters.initial_adaptive_refinement);
3454
3455 if ((parameters.generate_graphical_output == true) &&
3456 (timestep_number % parameters.graphical_output_interval == 0))
3457 output_results();
3458
3459 // In order to speed up linear solvers, we extrapolate the solutions
3460 // from the old time levels to the new one. This gives a very good
3461 // initial guess, cutting the number of iterations needed in solvers
3462 // by more than one half. We do not need to extrapolate in the last
3463 // iteration, so if we reached the final time, we stop here.
3464 //
3465 // As the last thing during a time step (before actually bumping up
3466 // the number of the time step), we check whether the current time
3467 // step number is divisible by 100, and if so we let the computing
3468 // timer print a summary of CPU times spent so far.
3469 if (time > parameters.end_time * EquationData::year_in_seconds)
3470 break;
3471
3472 TrilinosWrappers::MPI::BlockVector old_old_stokes_solution;
3473 old_old_stokes_solution = old_stokes_solution;
3474 old_stokes_solution = stokes_solution;
3475 old_old_temperature_solution = old_temperature_solution;
3476 old_temperature_solution = temperature_solution;
3477 if (old_time_step > 0)
3478 {
3479 // Trilinos sadd does not like ghost vectors even as input. Copy
3480 // into distributed vectors for now:
3481 {
3482 TrilinosWrappers::MPI::BlockVector distr_solution(stokes_rhs);
3483 distr_solution = stokes_solution;
3484 TrilinosWrappers::MPI::BlockVector distr_old_solution(stokes_rhs);
3485 distr_old_solution = old_old_stokes_solution;
3486 distr_solution.sadd(1. + time_step / old_time_step,
3487 -time_step / old_time_step,
3488 distr_old_solution);
3489 stokes_solution = distr_solution;
3490 }
3491 {
3492 TrilinosWrappers::MPI::Vector distr_solution(temperature_rhs);
3493 distr_solution = temperature_solution;
3494 TrilinosWrappers::MPI::Vector distr_old_solution(temperature_rhs);
3495 distr_old_solution = old_old_temperature_solution;
3496 distr_solution.sadd(1. + time_step / old_time_step,
3497 -time_step / old_time_step,
3498 distr_old_solution);
3499 temperature_solution = distr_solution;
3500 }
3501 }
3502
3503 if ((timestep_number > 0) && (timestep_number % 100 == 0))
3504 computing_timer.print_summary();
3505
3506 time += time_step;
3507 ++timestep_number;
3508 }
3509 while (true);
3510
3511 // If we are generating graphical output, do so also for the last time
3512 // step unless we had just done so before we left the do-while loop
3513 if ((parameters.generate_graphical_output == true) &&
3514 !((timestep_number - 1) % parameters.graphical_output_interval == 0))
3515 output_results();
3516 }
3517 } // namespace Step32
3518
3519
3520
3521 // @sect3{The <code>main</code> function}
3522
3523 // The main function is short as usual and very similar to the one in
3524 // step-31. Since we use a parameter file which is specified as an argument in
3525 // the command line, we have to read it in here and pass it on to the
3526 // Parameters class for parsing. If no filename is given in the command line,
3527 // we simply use the <code>\step-32.prm</code> file which is distributed
3528 // together with the program.
3529 //
3530 // Because 3d computations are simply very slow unless you throw a lot of
3531 // processors at them, the program defaults to 2d. You can get the 3d version
3532 // by changing the constant dimension below to 3.
main(int argc,char * argv[])3533 int main(int argc, char *argv[])
3534 {
3535 try
3536 {
3537 using namespace Step32;
3538 using namespace dealii;
3539
3540 Utilities::MPI::MPI_InitFinalize mpi_initialization(
3541 argc, argv, numbers::invalid_unsigned_int);
3542
3543 std::string parameter_filename;
3544 if (argc >= 2)
3545 parameter_filename = argv[1];
3546 else
3547 parameter_filename = "step-32.prm";
3548
3549 const int dim = 2;
3550 BoussinesqFlowProblem<dim>::Parameters parameters(parameter_filename);
3551 BoussinesqFlowProblem<dim> flow_problem(parameters);
3552 flow_problem.run();
3553 }
3554 catch (std::exception &exc)
3555 {
3556 std::cerr << std::endl
3557 << std::endl
3558 << "----------------------------------------------------"
3559 << std::endl;
3560 std::cerr << "Exception on processing: " << std::endl
3561 << exc.what() << std::endl
3562 << "Aborting!" << std::endl
3563 << "----------------------------------------------------"
3564 << std::endl;
3565
3566 return 1;
3567 }
3568 catch (...)
3569 {
3570 std::cerr << std::endl
3571 << std::endl
3572 << "----------------------------------------------------"
3573 << std::endl;
3574 std::cerr << "Unknown exception!" << std::endl
3575 << "Aborting!" << std::endl
3576 << "----------------------------------------------------"
3577 << std::endl;
3578 return 1;
3579 }
3580
3581 return 0;
3582 }
3583