1 /* ---------------------------------------------------------------------
2  *
3  * Copyright (C) 2013 - 2020 by the deal.II authors
4  *
5  * This file is part of the deal.II library.
6  *
7  * The deal.II library is free software; you can use it, redistribute
8  * it, and/or modify it under the terms of the GNU Lesser General
9  * Public License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  * The full text of the license can be found in the file LICENSE.md at
12  * the top level directory of deal.II.
13  *
14  * ---------------------------------------------------------------------
15 
16  *
17  * Author: Martin Kronbichler, Technische Universität München,
18  *         Scott T. Miller, The Pennsylvania State University, 2013
19  */
20 
21 // @sect3{Include files}
22 //
23 // Most of the deal.II include files have already been covered in previous
24 // examples and are not commented on.
25 #include <deal.II/base/quadrature_lib.h>
26 #include <deal.II/base/function.h>
27 #include <deal.II/base/tensor_function.h>
28 #include <deal.II/base/exceptions.h>
29 #include <deal.II/base/logstream.h>
30 #include <deal.II/base/work_stream.h>
31 #include <deal.II/base/convergence_table.h>
32 #include <deal.II/lac/vector.h>
33 #include <deal.II/lac/affine_constraints.h>
34 #include <deal.II/lac/full_matrix.h>
35 #include <deal.II/lac/dynamic_sparsity_pattern.h>
36 #include <deal.II/lac/solver_bicgstab.h>
37 #include <deal.II/lac/precondition.h>
38 #include <deal.II/grid/tria.h>
39 #include <deal.II/grid/tria_accessor.h>
40 #include <deal.II/grid/grid_generator.h>
41 #include <deal.II/grid/grid_refinement.h>
42 #include <deal.II/grid/tria_iterator.h>
43 #include <deal.II/dofs/dof_handler.h>
44 #include <deal.II/dofs/dof_accessor.h>
45 #include <deal.II/dofs/dof_renumbering.h>
46 #include <deal.II/dofs/dof_tools.h>
47 #include <deal.II/fe/fe_dgq.h>
48 #include <deal.II/fe/fe_system.h>
49 #include <deal.II/fe/fe_values.h>
50 #include <deal.II/numerics/vector_tools.h>
51 #include <deal.II/numerics/error_estimator.h>
52 #include <deal.II/numerics/matrix_tools.h>
53 #include <deal.II/numerics/data_out.h>
54 
55 // However, we do have a few new includes for the example.
56 // The first one defines finite element spaces on the faces
57 // of the triangulation, which we refer to as the 'skeleton'.
58 // These finite elements do not have any support on the element
59 // interior, and they represent polynomials that have a single
60 // value on each codimension-1 surface, but admit discontinuities
61 // on codimension-2 surfaces.
62 #include <deal.II/fe/fe_face.h>
63 
64 // The second new file we include defines a new type of sparse matrix.  The
65 // regular <code>SparseMatrix</code> type stores indices to all non-zero
66 // entries.  The <code>ChunkSparseMatrix</code> takes advantage of the coupled
67 // nature of DG solutions.  It stores an index to a matrix sub-block of a
68 // specified size.  In the HDG context, this sub-block-size is actually the
69 // number of degrees of freedom per face defined by the skeleton solution
70 // field. This reduces the memory consumption of the matrix by up to one third
71 // and results in similar speedups when using the matrix in solvers.
72 #include <deal.II/lac/chunk_sparse_matrix.h>
73 
74 // The final new include for this example deals with data output.  Since
75 // we have a finite element field defined on the skeleton of the mesh,
76 // we would like to visualize what that solution actually is.
77 // DataOutFaces does exactly this; the interface is the almost the same
78 // as the familiar DataOut, but the output only has codimension-1 data for
79 // the simulation.
80 #include <deal.II/numerics/data_out_faces.h>
81 
82 #include <iostream>
83 
84 
85 
86 // We start by putting all of our classes into their own namespace.
87 namespace Step51
88 {
89   using namespace dealii;
90 
91   // @sect3{Equation data}
92   //
93   // The structure of the analytic solution is the same as in step-7. There are
94   // two exceptions. Firstly, we also create a solution for the 3d case, and
95   // secondly, we scale the solution so its norm is of order unity for all
96   // values of the solution width.
97   template <int dim>
98   class SolutionBase
99   {
100   protected:
101     static const unsigned int n_source_centers = 3;
102     static const Point<dim>   source_centers[n_source_centers];
103     static const double       width;
104   };
105 
106 
107   template <>
108   const Point<1>
109     SolutionBase<1>::source_centers[SolutionBase<1>::n_source_centers] =
110       {Point<1>(-1.0 / 3.0), Point<1>(0.0), Point<1>(+1.0 / 3.0)};
111 
112 
113   template <>
114   const Point<2>
115     SolutionBase<2>::source_centers[SolutionBase<2>::n_source_centers] =
116       {Point<2>(-0.5, +0.5), Point<2>(-0.5, -0.5), Point<2>(+0.5, -0.5)};
117 
118   template <>
119   const Point<3>
120     SolutionBase<3>::source_centers[SolutionBase<3>::n_source_centers] = {
121       Point<3>(-0.5, +0.5, 0.25),
122       Point<3>(-0.6, -0.5, -0.125),
123       Point<3>(+0.5, -0.5, 0.5)};
124 
125   template <int dim>
126   const double SolutionBase<dim>::width = 1. / 5.;
127 
128 
129   template <int dim>
130   class Solution : public Function<dim>, protected SolutionBase<dim>
131   {
132   public:
value(const Point<dim> & p,const unsigned int=0) const133     virtual double value(const Point<dim> &p,
134                          const unsigned int /*component*/ = 0) const override
135     {
136       double sum = 0;
137       for (unsigned int i = 0; i < this->n_source_centers; ++i)
138         {
139           const Tensor<1, dim> x_minus_xi = p - this->source_centers[i];
140           sum +=
141             std::exp(-x_minus_xi.norm_square() / (this->width * this->width));
142         }
143 
144       return sum /
145              std::pow(2. * numbers::PI * this->width * this->width, dim / 2.);
146     }
147 
148     virtual Tensor<1, dim>
gradient(const Point<dim> & p,const unsigned int=0) const149     gradient(const Point<dim> &p,
150              const unsigned int /*component*/ = 0) const override
151     {
152       Tensor<1, dim> sum;
153       for (unsigned int i = 0; i < this->n_source_centers; ++i)
154         {
155           const Tensor<1, dim> x_minus_xi = p - this->source_centers[i];
156 
157           sum +=
158             (-2 / (this->width * this->width) *
159              std::exp(-x_minus_xi.norm_square() / (this->width * this->width)) *
160              x_minus_xi);
161         }
162 
163       return sum /
164              std::pow(2. * numbers::PI * this->width * this->width, dim / 2.);
165     }
166   };
167 
168 
169 
170   // This class implements a function where the scalar solution and its negative
171   // gradient are collected together. This function is used when computing the
172   // error of the HDG approximation and its implementation is to simply call
173   // value and gradient function of the Solution class.
174   template <int dim>
175   class SolutionAndGradient : public Function<dim>, protected SolutionBase<dim>
176   {
177   public:
SolutionAndGradient()178     SolutionAndGradient()
179       : Function<dim>(dim + 1)
180     {}
181 
vector_value(const Point<dim> & p,Vector<double> & v) const182     virtual void vector_value(const Point<dim> &p,
183                               Vector<double> &  v) const override
184     {
185       AssertDimension(v.size(), dim + 1);
186       Solution<dim>  solution;
187       Tensor<1, dim> grad = solution.gradient(p);
188       for (unsigned int d = 0; d < dim; ++d)
189         v[d] = -grad[d];
190       v[dim] = solution.value(p);
191     }
192   };
193 
194 
195 
196   // Next comes the implementation of the convection velocity. As described in
197   // the introduction, we choose a velocity field that is $(y, -x)$ in 2D and
198   // $(y, -x, 1)$ in 3D. This gives a divergence-free velocity field.
199   template <int dim>
200   class ConvectionVelocity : public TensorFunction<1, dim>
201   {
202   public:
ConvectionVelocity()203     ConvectionVelocity()
204       : TensorFunction<1, dim>()
205     {}
206 
value(const Point<dim> & p) const207     virtual Tensor<1, dim> value(const Point<dim> &p) const override
208     {
209       Tensor<1, dim> convection;
210       switch (dim)
211         {
212           case 1:
213             convection[0] = 1;
214             break;
215           case 2:
216             convection[0] = p[1];
217             convection[1] = -p[0];
218             break;
219           case 3:
220             convection[0] = p[1];
221             convection[1] = -p[0];
222             convection[2] = 1;
223             break;
224           default:
225             Assert(false, ExcNotImplemented());
226         }
227       return convection;
228     }
229   };
230 
231 
232 
233   // The last function we implement is the right hand side for the
234   // manufactured solution. It is very similar to step-7, with the exception
235   // that we now have a convection term instead of the reaction term. Since
236   // the velocity field is incompressible, i.e., $\nabla \cdot \mathbf{c} =
237   // 0$, the advection term simply reads $\mathbf{c} \nabla u$.
238   template <int dim>
239   class RightHandSide : public Function<dim>, protected SolutionBase<dim>
240   {
241   public:
value(const Point<dim> & p,const unsigned int=0) const242     virtual double value(const Point<dim> &p,
243                          const unsigned int /*component*/ = 0) const override
244     {
245       ConvectionVelocity<dim> convection_velocity;
246       Tensor<1, dim>          convection = convection_velocity.value(p);
247       double                  sum        = 0;
248       for (unsigned int i = 0; i < this->n_source_centers; ++i)
249         {
250           const Tensor<1, dim> x_minus_xi = p - this->source_centers[i];
251 
252           sum +=
253             ((2 * dim - 2 * convection * x_minus_xi -
254               4 * x_minus_xi.norm_square() / (this->width * this->width)) /
255              (this->width * this->width) *
256              std::exp(-x_minus_xi.norm_square() / (this->width * this->width)));
257         }
258 
259       return sum /
260              std::pow(2. * numbers::PI * this->width * this->width, dim / 2.);
261     }
262   };
263 
264 
265 
266   // @sect3{The HDG solver class}
267 
268   // The HDG solution procedure follows closely that of step-7. The major
269   // difference is the use of three different sets of DoFHandler and FE
270   // objects, along with the ChunkSparseMatrix and the corresponding solutions
271   // vectors. We also use WorkStream to enable a multithreaded local solution
272   // process which exploits the embarrassingly parallel nature of the local
273   // solver. For WorkStream, we define the local operations on a cell and a
274   // copy function into the global matrix and vector. We do this both for the
275   // assembly (which is run twice, once when we generate the system matrix and
276   // once when we compute the element-interior solutions from the skeleton
277   // values) and for the postprocessing where we extract a solution that
278   // converges at higher order.
279   template <int dim>
280   class HDG
281   {
282   public:
283     enum RefinementMode
284     {
285       global_refinement,
286       adaptive_refinement
287     };
288 
289     HDG(const unsigned int degree, const RefinementMode refinement_mode);
290     void run();
291 
292   private:
293     void setup_system();
294     void assemble_system(const bool reconstruct_trace = false);
295     void solve();
296     void postprocess();
297     void refine_grid(const unsigned int cycle);
298     void output_results(const unsigned int cycle);
299 
300     // Data for the assembly and solution of the primal variables.
301     struct PerTaskData;
302     struct ScratchData;
303 
304     // Post-processing the solution to obtain $u^*$ is an element-by-element
305     // procedure; as such, we do not need to assemble any global data and do
306     // not declare any 'task data' for WorkStream to use.
307     struct PostProcessScratchData;
308 
309     // The following three functions are used by WorkStream to do the actual
310     // work of the program.
311     void assemble_system_one_cell(
312       const typename DoFHandler<dim>::active_cell_iterator &cell,
313       ScratchData &                                         scratch,
314       PerTaskData &                                         task_data);
315 
316     void copy_local_to_global(const PerTaskData &data);
317 
318     void postprocess_one_cell(
319       const typename DoFHandler<dim>::active_cell_iterator &cell,
320       PostProcessScratchData &                              scratch,
321       unsigned int &                                        empty_data);
322 
323 
324     Triangulation<dim> triangulation;
325 
326     // The 'local' solutions are interior to each element.  These
327     // represent the primal solution field $u$ as well as the auxiliary
328     // field $\mathbf{q}$.
329     FESystem<dim>   fe_local;
330     DoFHandler<dim> dof_handler_local;
331     Vector<double>  solution_local;
332 
333     // The new finite element type and corresponding <code>DoFHandler</code> are
334     // used for the global skeleton solution that couples the element-level
335     // local solutions.
336     FE_FaceQ<dim>   fe;
337     DoFHandler<dim> dof_handler;
338     Vector<double>  solution;
339     Vector<double>  system_rhs;
340 
341     // As stated in the introduction, HDG solutions can be post-processed to
342     // attain superconvergence rates of $\mathcal{O}(h^{p+2})$.  The
343     // post-processed solution is a discontinuous finite element solution
344     // representing the primal variable on the interior of each cell.  We define
345     // a FE type of degree $p+1$ to represent this post-processed solution,
346     // which we only use for output after constructing it.
347     FE_DGQ<dim>     fe_u_post;
348     DoFHandler<dim> dof_handler_u_post;
349     Vector<double>  solution_u_post;
350 
351     // The degrees of freedom corresponding to the skeleton strongly enforce
352     // Dirichlet boundary conditions, just as in a continuous Galerkin finite
353     // element method. We can enforce the boundary conditions in an analogous
354     // manner via an AffineConstraints object. In addition, hanging nodes are
355     // handled in the same way as for continuous finite elements: For the face
356     // elements which only define degrees of freedom on the face, this process
357     // sets the solution on the refined side to coincide with the
358     // representation on the coarse side.
359     //
360     // Note that for HDG, the elimination of hanging nodes is not the only
361     // possibility &mdash; in terms of the HDG theory, one could also use the
362     // unknowns from the refined side and express the local solution on the
363     // coarse side through the trace values on the refined side. However, such
364     // a setup is not as easily implemented in terms of deal.II loops and not
365     // further analyzed.
366     AffineConstraints<double> constraints;
367 
368     // The usage of the ChunkSparseMatrix class is similar to the usual sparse
369     // matrices: You need a sparsity pattern of type ChunkSparsityPattern and
370     // the actual matrix object. When creating the sparsity pattern, we just
371     // have to additionally pass the size of local blocks.
372     ChunkSparsityPattern      sparsity_pattern;
373     ChunkSparseMatrix<double> system_matrix;
374 
375     // Same as step-7:
376     const RefinementMode refinement_mode;
377     ConvergenceTable     convergence_table;
378   };
379 
380   // @sect3{The HDG class implementation}
381 
382   // @sect4{Constructor}
383   // The constructor is similar to those in other examples, with the exception
384   // of handling multiple DoFHandler and FiniteElement objects. Note that we
385   // create a system of finite elements for the local DG part, including the
386   // gradient/flux part and the scalar part.
387   template <int dim>
HDG(const unsigned int degree,const RefinementMode refinement_mode)388   HDG<dim>::HDG(const unsigned int degree, const RefinementMode refinement_mode)
389     : fe_local(FE_DGQ<dim>(degree), dim, FE_DGQ<dim>(degree), 1)
390     , dof_handler_local(triangulation)
391     , fe(degree)
392     , dof_handler(triangulation)
393     , fe_u_post(degree + 1)
394     , dof_handler_u_post(triangulation)
395     , refinement_mode(refinement_mode)
396   {}
397 
398 
399 
400   // @sect4{HDG::setup_system}
401   // The system for an HDG solution is setup in an analogous manner to most
402   // of the other tutorial programs.  We are careful to distribute dofs with
403   // all of our DoFHandler objects.  The @p solution and @p system_matrix
404   // objects go with the global skeleton solution.
405   template <int dim>
setup_system()406   void HDG<dim>::setup_system()
407   {
408     dof_handler_local.distribute_dofs(fe_local);
409     dof_handler.distribute_dofs(fe);
410     dof_handler_u_post.distribute_dofs(fe_u_post);
411 
412     std::cout << "   Number of degrees of freedom: " << dof_handler.n_dofs()
413               << std::endl;
414 
415     solution.reinit(dof_handler.n_dofs());
416     system_rhs.reinit(dof_handler.n_dofs());
417 
418     solution_local.reinit(dof_handler_local.n_dofs());
419     solution_u_post.reinit(dof_handler_u_post.n_dofs());
420 
421     constraints.clear();
422     DoFTools::make_hanging_node_constraints(dof_handler, constraints);
423     std::map<types::boundary_id, const Function<dim> *> boundary_functions;
424     Solution<dim>                                       solution_function;
425     boundary_functions[0] = &solution_function;
426     VectorTools::project_boundary_values(dof_handler,
427                                          boundary_functions,
428                                          QGauss<dim - 1>(fe.degree + 1),
429                                          constraints);
430     constraints.close();
431 
432     // When creating the chunk sparsity pattern, we first create the usual
433     // dynamic sparsity pattern and then set the chunk size, which is equal
434     // to the number of dofs on a face, when copying this into the final
435     // sparsity pattern.
436     {
437       DynamicSparsityPattern dsp(dof_handler.n_dofs());
438       DoFTools::make_sparsity_pattern(dof_handler, dsp, constraints, false);
439       sparsity_pattern.copy_from(dsp, fe.n_dofs_per_face());
440     }
441     system_matrix.reinit(sparsity_pattern);
442   }
443 
444 
445 
446   // @sect4{HDG::PerTaskData}
447   // Next comes the definition of the local data structures for the parallel
448   // assembly. The first structure @p PerTaskData contains the local vector
449   // and matrix that are written into the global matrix, whereas the
450   // ScratchData contains all data that we need for the local assembly. There
451   // is one variable worth noting here, namely the boolean variable @p
452   // trace_reconstruct. As mentioned in the introduction, we solve the HDG
453   // system in two steps. First, we create a linear system for the skeleton
454   // system where we condense the local part into it via the Schur complement
455   // $D-CA^{-1}B$. Then, we solve for the local part using the skeleton
456   // solution. For these two steps, we need the same matrices on the elements
457   // twice, which we want to compute by two assembly steps. Since most of the
458   // code is similar, we do this with the same function but only switch
459   // between the two based on a flag that we set when starting the
460   // assembly. Since we need to pass this information on to the local worker
461   // routines, we store it once in the task data.
462   template <int dim>
463   struct HDG<dim>::PerTaskData
464   {
465     FullMatrix<double>                   cell_matrix;
466     Vector<double>                       cell_vector;
467     std::vector<types::global_dof_index> dof_indices;
468 
469     bool trace_reconstruct;
470 
PerTaskDataStep51::HDG::PerTaskData471     PerTaskData(const unsigned int n_dofs, const bool trace_reconstruct)
472       : cell_matrix(n_dofs, n_dofs)
473       , cell_vector(n_dofs)
474       , dof_indices(n_dofs)
475       , trace_reconstruct(trace_reconstruct)
476     {}
477   };
478 
479 
480 
481   // @sect4{HDG::ScratchData}
482   // @p ScratchData contains persistent data for each
483   // thread within WorkStream.  The FEValues, matrix,
484   // and vector objects should be familiar by now.  There are two objects that
485   // need to be discussed: `std::vector<std::vector<unsigned int> >
486   // fe_local_support_on_face` and `std::vector<std::vector<unsigned int> >
487   // fe_support_on_face`.  These are used to indicate whether or not the finite
488   // elements chosen have support (non-zero values) on a given face of the
489   // reference cell for the local part associated to @p fe_local and the
490   // skeleton part @p fe. We extract this information in the
491   // constructor and store it once for all cells that we work on.  Had we not
492   // stored this information, we would be forced to assemble a large number of
493   // zero terms on each cell, which would significantly slow the program.
494   template <int dim>
495   struct HDG<dim>::ScratchData
496   {
497     FEValues<dim>     fe_values_local;
498     FEFaceValues<dim> fe_face_values_local;
499     FEFaceValues<dim> fe_face_values;
500 
501     FullMatrix<double> ll_matrix;
502     FullMatrix<double> lf_matrix;
503     FullMatrix<double> fl_matrix;
504     FullMatrix<double> tmp_matrix;
505     Vector<double>     l_rhs;
506     Vector<double>     tmp_rhs;
507 
508     std::vector<Tensor<1, dim>> q_phi;
509     std::vector<double>         q_phi_div;
510     std::vector<double>         u_phi;
511     std::vector<Tensor<1, dim>> u_phi_grad;
512     std::vector<double>         tr_phi;
513     std::vector<double>         trace_values;
514 
515     std::vector<std::vector<unsigned int>> fe_local_support_on_face;
516     std::vector<std::vector<unsigned int>> fe_support_on_face;
517 
518     ConvectionVelocity<dim> convection_velocity;
519     RightHandSide<dim>      right_hand_side;
520     const Solution<dim>     exact_solution;
521 
ScratchDataStep51::HDG::ScratchData522     ScratchData(const FiniteElement<dim> &fe,
523                 const FiniteElement<dim> &fe_local,
524                 const QGauss<dim> &       quadrature_formula,
525                 const QGauss<dim - 1> &   face_quadrature_formula,
526                 const UpdateFlags         local_flags,
527                 const UpdateFlags         local_face_flags,
528                 const UpdateFlags         flags)
529       : fe_values_local(fe_local, quadrature_formula, local_flags)
530       , fe_face_values_local(fe_local,
531                              face_quadrature_formula,
532                              local_face_flags)
533       , fe_face_values(fe, face_quadrature_formula, flags)
534       , ll_matrix(fe_local.n_dofs_per_cell(), fe_local.n_dofs_per_cell())
535       , lf_matrix(fe_local.n_dofs_per_cell(), fe.n_dofs_per_cell())
536       , fl_matrix(fe.n_dofs_per_cell(), fe_local.n_dofs_per_cell())
537       , tmp_matrix(fe.n_dofs_per_cell(), fe_local.n_dofs_per_cell())
538       , l_rhs(fe_local.n_dofs_per_cell())
539       , tmp_rhs(fe_local.n_dofs_per_cell())
540       , q_phi(fe_local.n_dofs_per_cell())
541       , q_phi_div(fe_local.n_dofs_per_cell())
542       , u_phi(fe_local.n_dofs_per_cell())
543       , u_phi_grad(fe_local.n_dofs_per_cell())
544       , tr_phi(fe.n_dofs_per_cell())
545       , trace_values(face_quadrature_formula.size())
546       , fe_local_support_on_face(GeometryInfo<dim>::faces_per_cell)
547       , fe_support_on_face(GeometryInfo<dim>::faces_per_cell)
548       , exact_solution()
549     {
550       for (unsigned int face_no : GeometryInfo<dim>::face_indices())
551         for (unsigned int i = 0; i < fe_local.n_dofs_per_cell(); ++i)
552           {
553             if (fe_local.has_support_on_face(i, face_no))
554               fe_local_support_on_face[face_no].push_back(i);
555           }
556 
557       for (unsigned int face_no : GeometryInfo<dim>::face_indices())
558         for (unsigned int i = 0; i < fe.n_dofs_per_cell(); ++i)
559           {
560             if (fe.has_support_on_face(i, face_no))
561               fe_support_on_face[face_no].push_back(i);
562           }
563     }
564 
ScratchDataStep51::HDG::ScratchData565     ScratchData(const ScratchData &sd)
566       : fe_values_local(sd.fe_values_local.get_fe(),
567                         sd.fe_values_local.get_quadrature(),
568                         sd.fe_values_local.get_update_flags())
569       , fe_face_values_local(sd.fe_face_values_local.get_fe(),
570                              sd.fe_face_values_local.get_quadrature(),
571                              sd.fe_face_values_local.get_update_flags())
572       , fe_face_values(sd.fe_face_values.get_fe(),
573                        sd.fe_face_values.get_quadrature(),
574                        sd.fe_face_values.get_update_flags())
575       , ll_matrix(sd.ll_matrix)
576       , lf_matrix(sd.lf_matrix)
577       , fl_matrix(sd.fl_matrix)
578       , tmp_matrix(sd.tmp_matrix)
579       , l_rhs(sd.l_rhs)
580       , tmp_rhs(sd.tmp_rhs)
581       , q_phi(sd.q_phi)
582       , q_phi_div(sd.q_phi_div)
583       , u_phi(sd.u_phi)
584       , u_phi_grad(sd.u_phi_grad)
585       , tr_phi(sd.tr_phi)
586       , trace_values(sd.trace_values)
587       , fe_local_support_on_face(sd.fe_local_support_on_face)
588       , fe_support_on_face(sd.fe_support_on_face)
589       , exact_solution()
590     {}
591   };
592 
593 
594 
595   // @sect4{HDG::PostProcessScratchData}
596   // @p PostProcessScratchData contains the data used by WorkStream
597   // when post-processing the local solution $u^*$.  It is similar, but much
598   // simpler, than @p ScratchData.
599   template <int dim>
600   struct HDG<dim>::PostProcessScratchData
601   {
602     FEValues<dim> fe_values_local;
603     FEValues<dim> fe_values;
604 
605     std::vector<double>         u_values;
606     std::vector<Tensor<1, dim>> u_gradients;
607     FullMatrix<double>          cell_matrix;
608 
609     Vector<double> cell_rhs;
610     Vector<double> cell_sol;
611 
PostProcessScratchDataStep51::HDG::PostProcessScratchData612     PostProcessScratchData(const FiniteElement<dim> &fe,
613                            const FiniteElement<dim> &fe_local,
614                            const QGauss<dim> &       quadrature_formula,
615                            const UpdateFlags         local_flags,
616                            const UpdateFlags         flags)
617       : fe_values_local(fe_local, quadrature_formula, local_flags)
618       , fe_values(fe, quadrature_formula, flags)
619       , u_values(quadrature_formula.size())
620       , u_gradients(quadrature_formula.size())
621       , cell_matrix(fe.n_dofs_per_cell(), fe.n_dofs_per_cell())
622       , cell_rhs(fe.n_dofs_per_cell())
623       , cell_sol(fe.n_dofs_per_cell())
624     {}
625 
PostProcessScratchDataStep51::HDG::PostProcessScratchData626     PostProcessScratchData(const PostProcessScratchData &sd)
627       : fe_values_local(sd.fe_values_local.get_fe(),
628                         sd.fe_values_local.get_quadrature(),
629                         sd.fe_values_local.get_update_flags())
630       , fe_values(sd.fe_values.get_fe(),
631                   sd.fe_values.get_quadrature(),
632                   sd.fe_values.get_update_flags())
633       , u_values(sd.u_values)
634       , u_gradients(sd.u_gradients)
635       , cell_matrix(sd.cell_matrix)
636       , cell_rhs(sd.cell_rhs)
637       , cell_sol(sd.cell_sol)
638     {}
639   };
640 
641 
642 
643   // @sect4{HDG::assemble_system}
644   // The @p assemble_system function is similar to the one on Step-32, where
645   // the quadrature formula and the update flags are set up, and then
646   // <code>WorkStream</code> is used to do the work in a multi-threaded
647   // manner.  The @p trace_reconstruct input parameter is used to decide
648   // whether we are solving for the global skeleton solution (false) or the
649   // local solution (true).
650   //
651   // One thing worth noting for the multi-threaded execution of assembly is
652   // the fact that the local computations in `assemble_system_one_cell()` call
653   // into BLAS and LAPACK functions if those are available in deal.II. Thus,
654   // the underlying BLAS/LAPACK library must support calls from multiple
655   // threads at the same time. Most implementations do support this, but some
656   // libraries need to be built in a specific way to avoid problems. For
657   // example, OpenBLAS compiled without multithreading inside the BLAS/LAPACK
658   // calls needs to built with a flag called `USE_LOCKING` set to true.
659   template <int dim>
assemble_system(const bool trace_reconstruct)660   void HDG<dim>::assemble_system(const bool trace_reconstruct)
661   {
662     const QGauss<dim>     quadrature_formula(fe.degree + 1);
663     const QGauss<dim - 1> face_quadrature_formula(fe.degree + 1);
664 
665     const UpdateFlags local_flags(update_values | update_gradients |
666                                   update_JxW_values | update_quadrature_points);
667 
668     const UpdateFlags local_face_flags(update_values);
669 
670     const UpdateFlags flags(update_values | update_normal_vectors |
671                             update_quadrature_points | update_JxW_values);
672 
673     PerTaskData task_data(fe.n_dofs_per_cell(), trace_reconstruct);
674     ScratchData scratch(fe,
675                         fe_local,
676                         quadrature_formula,
677                         face_quadrature_formula,
678                         local_flags,
679                         local_face_flags,
680                         flags);
681 
682     WorkStream::run(dof_handler.begin_active(),
683                     dof_handler.end(),
684                     *this,
685                     &HDG<dim>::assemble_system_one_cell,
686                     &HDG<dim>::copy_local_to_global,
687                     scratch,
688                     task_data);
689   }
690 
691 
692 
693   // @sect4{HDG::assemble_system_one_cell}
694   // The real work of the HDG program is done by @p assemble_system_one_cell.
695   // Assembling the local matrices $A, B, C$ is done here, along with the
696   // local contributions of the global matrix $D$.
697   template <int dim>
assemble_system_one_cell(const typename DoFHandler<dim>::active_cell_iterator & cell,ScratchData & scratch,PerTaskData & task_data)698   void HDG<dim>::assemble_system_one_cell(
699     const typename DoFHandler<dim>::active_cell_iterator &cell,
700     ScratchData &                                         scratch,
701     PerTaskData &                                         task_data)
702   {
703     // Construct iterator for dof_handler_local for FEValues reinit function.
704     typename DoFHandler<dim>::active_cell_iterator loc_cell(&triangulation,
705                                                             cell->level(),
706                                                             cell->index(),
707                                                             &dof_handler_local);
708 
709     const unsigned int n_q_points =
710       scratch.fe_values_local.get_quadrature().size();
711     const unsigned int n_face_q_points =
712       scratch.fe_face_values_local.get_quadrature().size();
713 
714     const unsigned int loc_dofs_per_cell =
715       scratch.fe_values_local.get_fe().n_dofs_per_cell();
716 
717     const FEValuesExtractors::Vector fluxes(0);
718     const FEValuesExtractors::Scalar scalar(dim);
719 
720     scratch.ll_matrix = 0;
721     scratch.l_rhs     = 0;
722     if (!task_data.trace_reconstruct)
723       {
724         scratch.lf_matrix     = 0;
725         scratch.fl_matrix     = 0;
726         task_data.cell_matrix = 0;
727         task_data.cell_vector = 0;
728       }
729     scratch.fe_values_local.reinit(loc_cell);
730 
731     // We first compute the cell-interior contribution to @p ll_matrix matrix
732     // (referred to as matrix $A$ in the introduction) corresponding to
733     // local-local coupling, as well as the local right-hand-side vector.  We
734     // store the values at each quadrature point for the basis functions, the
735     // right-hand-side value, and the convection velocity, in order to have
736     // quick access to these fields.
737     for (unsigned int q = 0; q < n_q_points; ++q)
738       {
739         const double rhs_value = scratch.right_hand_side.value(
740           scratch.fe_values_local.quadrature_point(q));
741         const Tensor<1, dim> convection = scratch.convection_velocity.value(
742           scratch.fe_values_local.quadrature_point(q));
743         const double JxW = scratch.fe_values_local.JxW(q);
744         for (unsigned int k = 0; k < loc_dofs_per_cell; ++k)
745           {
746             scratch.q_phi[k] = scratch.fe_values_local[fluxes].value(k, q);
747             scratch.q_phi_div[k] =
748               scratch.fe_values_local[fluxes].divergence(k, q);
749             scratch.u_phi[k] = scratch.fe_values_local[scalar].value(k, q);
750             scratch.u_phi_grad[k] =
751               scratch.fe_values_local[scalar].gradient(k, q);
752           }
753         for (unsigned int i = 0; i < loc_dofs_per_cell; ++i)
754           {
755             for (unsigned int j = 0; j < loc_dofs_per_cell; ++j)
756               scratch.ll_matrix(i, j) +=
757                 (scratch.q_phi[i] * scratch.q_phi[j] -
758                  scratch.q_phi_div[i] * scratch.u_phi[j] +
759                  scratch.u_phi[i] * scratch.q_phi_div[j] -
760                  (scratch.u_phi_grad[i] * convection) * scratch.u_phi[j]) *
761                 JxW;
762             scratch.l_rhs(i) += scratch.u_phi[i] * rhs_value * JxW;
763           }
764       }
765 
766     // Face terms are assembled on all faces of all elements. This is in
767     // contrast to more traditional DG methods, where each face is only visited
768     // once in the assembly procedure.
769     for (const auto face_no : cell->face_indices())
770       {
771         scratch.fe_face_values_local.reinit(loc_cell, face_no);
772         scratch.fe_face_values.reinit(cell, face_no);
773 
774         // The already obtained $\hat{u}$ values are needed when solving for the
775         // local variables.
776         if (task_data.trace_reconstruct)
777           scratch.fe_face_values.get_function_values(solution,
778                                                      scratch.trace_values);
779 
780         for (unsigned int q = 0; q < n_face_q_points; ++q)
781           {
782             const double     JxW = scratch.fe_face_values.JxW(q);
783             const Point<dim> quadrature_point =
784               scratch.fe_face_values.quadrature_point(q);
785             const Tensor<1, dim> normal =
786               scratch.fe_face_values.normal_vector(q);
787             const Tensor<1, dim> convection =
788               scratch.convection_velocity.value(quadrature_point);
789 
790             // Here we compute the stabilization parameter discussed in the
791             // introduction: since the diffusion is one and the diffusion
792             // length scale is set to 1/5, it simply results in a contribution
793             // of 5 for the diffusion part and the magnitude of convection
794             // through the element boundary in a centered scheme for the
795             // convection part.
796             const double tau_stab = (5. + std::abs(convection * normal));
797 
798             // We store the non-zero flux and scalar values, making use of the
799             // support_on_face information we created in @p ScratchData.
800             for (unsigned int k = 0;
801                  k < scratch.fe_local_support_on_face[face_no].size();
802                  ++k)
803               {
804                 const unsigned int kk =
805                   scratch.fe_local_support_on_face[face_no][k];
806                 scratch.q_phi[k] =
807                   scratch.fe_face_values_local[fluxes].value(kk, q);
808                 scratch.u_phi[k] =
809                   scratch.fe_face_values_local[scalar].value(kk, q);
810               }
811 
812             // When @p trace_reconstruct=false, we are preparing to assemble the
813             // system for the skeleton variable $\hat{u}$. If this is the case,
814             // we must assemble all local matrices associated with the problem:
815             // local-local, local-face, face-local, and face-face.  The
816             // face-face matrix is stored as @p TaskData::cell_matrix, so that
817             // it can be assembled into the global system by @p
818             // copy_local_to_global.
819             if (!task_data.trace_reconstruct)
820               {
821                 for (unsigned int k = 0;
822                      k < scratch.fe_support_on_face[face_no].size();
823                      ++k)
824                   scratch.tr_phi[k] = scratch.fe_face_values.shape_value(
825                     scratch.fe_support_on_face[face_no][k], q);
826                 for (unsigned int i = 0;
827                      i < scratch.fe_local_support_on_face[face_no].size();
828                      ++i)
829                   for (unsigned int j = 0;
830                        j < scratch.fe_support_on_face[face_no].size();
831                        ++j)
832                     {
833                       const unsigned int ii =
834                         scratch.fe_local_support_on_face[face_no][i];
835                       const unsigned int jj =
836                         scratch.fe_support_on_face[face_no][j];
837                       scratch.lf_matrix(ii, jj) +=
838                         ((scratch.q_phi[i] * normal +
839                           (convection * normal - tau_stab) * scratch.u_phi[i]) *
840                          scratch.tr_phi[j]) *
841                         JxW;
842 
843                       // Note the sign of the face_no-local matrix.  We negate
844                       // the sign during assembly here so that we can use the
845                       // FullMatrix::mmult with addition when computing the
846                       // Schur complement.
847                       scratch.fl_matrix(jj, ii) -=
848                         ((scratch.q_phi[i] * normal +
849                           tau_stab * scratch.u_phi[i]) *
850                          scratch.tr_phi[j]) *
851                         JxW;
852                     }
853 
854                 for (unsigned int i = 0;
855                      i < scratch.fe_support_on_face[face_no].size();
856                      ++i)
857                   for (unsigned int j = 0;
858                        j < scratch.fe_support_on_face[face_no].size();
859                        ++j)
860                     {
861                       const unsigned int ii =
862                         scratch.fe_support_on_face[face_no][i];
863                       const unsigned int jj =
864                         scratch.fe_support_on_face[face_no][j];
865                       task_data.cell_matrix(ii, jj) +=
866                         ((convection * normal - tau_stab) * scratch.tr_phi[i] *
867                          scratch.tr_phi[j]) *
868                         JxW;
869                     }
870 
871                 if (cell->face(face_no)->at_boundary() &&
872                     (cell->face(face_no)->boundary_id() == 1))
873                   {
874                     const double neumann_value =
875                       -scratch.exact_solution.gradient(quadrature_point) *
876                         normal +
877                       convection * normal *
878                         scratch.exact_solution.value(quadrature_point);
879                     for (unsigned int i = 0;
880                          i < scratch.fe_support_on_face[face_no].size();
881                          ++i)
882                       {
883                         const unsigned int ii =
884                           scratch.fe_support_on_face[face_no][i];
885                         task_data.cell_vector(ii) +=
886                           scratch.tr_phi[i] * neumann_value * JxW;
887                       }
888                   }
889               }
890 
891             // This last term adds the contribution of the term $\left<w,\tau
892             // u_h\right>_{\partial \mathcal T}$ to the local matrix. As opposed
893             // to the face matrices above, we need it in both assembly stages.
894             for (unsigned int i = 0;
895                  i < scratch.fe_local_support_on_face[face_no].size();
896                  ++i)
897               for (unsigned int j = 0;
898                    j < scratch.fe_local_support_on_face[face_no].size();
899                    ++j)
900                 {
901                   const unsigned int ii =
902                     scratch.fe_local_support_on_face[face_no][i];
903                   const unsigned int jj =
904                     scratch.fe_local_support_on_face[face_no][j];
905                   scratch.ll_matrix(ii, jj) +=
906                     tau_stab * scratch.u_phi[i] * scratch.u_phi[j] * JxW;
907                 }
908 
909             // When @p trace_reconstruct=true, we are solving for the local
910             // solutions on an element by element basis.  The local
911             // right-hand-side is calculated by replacing the basis functions @p
912             // tr_phi in the @p lf_matrix computation by the computed values @p
913             // trace_values.  Of course, the sign of the matrix is now minus
914             // since we have moved everything to the other side of the equation.
915             if (task_data.trace_reconstruct)
916               for (unsigned int i = 0;
917                    i < scratch.fe_local_support_on_face[face_no].size();
918                    ++i)
919                 {
920                   const unsigned int ii =
921                     scratch.fe_local_support_on_face[face_no][i];
922                   scratch.l_rhs(ii) -=
923                     (scratch.q_phi[i] * normal +
924                      scratch.u_phi[i] * (convection * normal - tau_stab)) *
925                     scratch.trace_values[q] * JxW;
926                 }
927           }
928       }
929 
930     // Once assembly of all of the local contributions is complete, we must
931     // either: (1) assemble the global system, or (2) compute the local solution
932     // values and save them. In either case, the first step is to invert the
933     // local-local matrix.
934     scratch.ll_matrix.gauss_jordan();
935 
936     // For (1), we compute the Schur complement and add it to the @p
937     // cell_matrix, matrix $D$ in the introduction.
938     if (task_data.trace_reconstruct == false)
939       {
940         scratch.fl_matrix.mmult(scratch.tmp_matrix, scratch.ll_matrix);
941         scratch.tmp_matrix.vmult_add(task_data.cell_vector, scratch.l_rhs);
942         scratch.tmp_matrix.mmult(task_data.cell_matrix,
943                                  scratch.lf_matrix,
944                                  true);
945         cell->get_dof_indices(task_data.dof_indices);
946       }
947     // For (2), we are simply solving (ll_matrix).(solution_local) = (l_rhs).
948     // Hence, we multiply @p l_rhs by our already inverted local-local matrix
949     // and store the result using the <code>set_dof_values</code> function.
950     else
951       {
952         scratch.ll_matrix.vmult(scratch.tmp_rhs, scratch.l_rhs);
953         loc_cell->set_dof_values(scratch.tmp_rhs, solution_local);
954       }
955   }
956 
957 
958 
959   // @sect4{HDG::copy_local_to_global}
960   // If we are in the first step of the solution, i.e. @p trace_reconstruct=false,
961   // then we assemble the local matrices into the global system.
962   template <int dim>
copy_local_to_global(const PerTaskData & data)963   void HDG<dim>::copy_local_to_global(const PerTaskData &data)
964   {
965     if (data.trace_reconstruct == false)
966       constraints.distribute_local_to_global(data.cell_matrix,
967                                              data.cell_vector,
968                                              data.dof_indices,
969                                              system_matrix,
970                                              system_rhs);
971   }
972 
973 
974 
975   // @sect4{HDG::solve}
976   // The skeleton solution is solved for by using a BiCGStab solver with
977   // identity preconditioner.
978   template <int dim>
solve()979   void HDG<dim>::solve()
980   {
981     SolverControl                  solver_control(system_matrix.m() * 10,
982                                  1e-11 * system_rhs.l2_norm());
983     SolverBicgstab<Vector<double>> solver(solver_control);
984     solver.solve(system_matrix, solution, system_rhs, PreconditionIdentity());
985 
986     std::cout << "   Number of BiCGStab iterations: "
987               << solver_control.last_step() << std::endl;
988 
989     system_matrix.clear();
990     sparsity_pattern.reinit(0, 0, 0, 1);
991 
992     constraints.distribute(solution);
993 
994     // Once we have solved for the skeleton solution,
995     // we can solve for the local solutions in an element-by-element
996     // fashion.  We do this by re-using the same @p assemble_system function
997     // but switching @p trace_reconstruct to true.
998     assemble_system(true);
999   }
1000 
1001 
1002 
1003   // @sect4{HDG::postprocess}
1004 
1005   // The postprocess method serves two purposes. First, we want to construct a
1006   // post-processed scalar variables in the element space of degree $p+1$ that
1007   // we hope will converge at order $p+2$. This is again an element-by-element
1008   // process and only involves the scalar solution as well as the gradient on
1009   // the local cell. To do this, we introduce the already defined scratch data
1010   // together with some update flags and run the work stream to do this in
1011   // parallel.
1012   //
1013   // Secondly, we want to compute discretization errors just as we did in
1014   // step-7. The overall procedure is similar with calls to
1015   // VectorTools::integrate_difference. The difference is in how we compute
1016   // the errors for the scalar variable and the gradient variable. In step-7,
1017   // we did this by computing @p L2_norm or @p H1_seminorm
1018   // contributions. Here, we have a DoFHandler with these two contributions
1019   // computed and sorted by their vector component, <code>[0, dim)</code> for
1020   // the
1021   // gradient and @p dim for the scalar. To compute their value, we hence use
1022   // a ComponentSelectFunction with either of them, together with the @p
1023   // SolutionAndGradient class introduced above that contains the analytic
1024   // parts of either of them. Eventually, we also compute the L2-error of the
1025   // post-processed solution and add the results into the convergence table.
1026   template <int dim>
postprocess()1027   void HDG<dim>::postprocess()
1028   {
1029     {
1030       const QGauss<dim> quadrature_formula(fe_u_post.degree + 1);
1031       const UpdateFlags local_flags(update_values);
1032       const UpdateFlags flags(update_values | update_gradients |
1033                               update_JxW_values);
1034 
1035       PostProcessScratchData scratch(
1036         fe_u_post, fe_local, quadrature_formula, local_flags, flags);
1037 
1038       WorkStream::run(
1039         dof_handler_u_post.begin_active(),
1040         dof_handler_u_post.end(),
1041         [this](const typename DoFHandler<dim>::active_cell_iterator &cell,
1042                PostProcessScratchData &                              scratch,
1043                unsigned int &                                        data) {
1044           this->postprocess_one_cell(cell, scratch, data);
1045         },
1046         std::function<void(const unsigned int &)>(),
1047         scratch,
1048         0U);
1049     }
1050 
1051     Vector<float> difference_per_cell(triangulation.n_active_cells());
1052 
1053     ComponentSelectFunction<dim> value_select(dim, dim + 1);
1054     VectorTools::integrate_difference(dof_handler_local,
1055                                       solution_local,
1056                                       SolutionAndGradient<dim>(),
1057                                       difference_per_cell,
1058                                       QGauss<dim>(fe.degree + 2),
1059                                       VectorTools::L2_norm,
1060                                       &value_select);
1061     const double L2_error =
1062       VectorTools::compute_global_error(triangulation,
1063                                         difference_per_cell,
1064                                         VectorTools::L2_norm);
1065 
1066     ComponentSelectFunction<dim> gradient_select(
1067       std::pair<unsigned int, unsigned int>(0, dim), dim + 1);
1068     VectorTools::integrate_difference(dof_handler_local,
1069                                       solution_local,
1070                                       SolutionAndGradient<dim>(),
1071                                       difference_per_cell,
1072                                       QGauss<dim>(fe.degree + 2),
1073                                       VectorTools::L2_norm,
1074                                       &gradient_select);
1075     const double grad_error =
1076       VectorTools::compute_global_error(triangulation,
1077                                         difference_per_cell,
1078                                         VectorTools::L2_norm);
1079 
1080     VectorTools::integrate_difference(dof_handler_u_post,
1081                                       solution_u_post,
1082                                       Solution<dim>(),
1083                                       difference_per_cell,
1084                                       QGauss<dim>(fe.degree + 3),
1085                                       VectorTools::L2_norm);
1086     const double post_error =
1087       VectorTools::compute_global_error(triangulation,
1088                                         difference_per_cell,
1089                                         VectorTools::L2_norm);
1090 
1091     convergence_table.add_value("cells", triangulation.n_active_cells());
1092     convergence_table.add_value("dofs", dof_handler.n_dofs());
1093 
1094     convergence_table.add_value("val L2", L2_error);
1095     convergence_table.set_scientific("val L2", true);
1096     convergence_table.set_precision("val L2", 3);
1097 
1098     convergence_table.add_value("grad L2", grad_error);
1099     convergence_table.set_scientific("grad L2", true);
1100     convergence_table.set_precision("grad L2", 3);
1101 
1102     convergence_table.add_value("val L2-post", post_error);
1103     convergence_table.set_scientific("val L2-post", true);
1104     convergence_table.set_precision("val L2-post", 3);
1105   }
1106 
1107 
1108 
1109   // @sect4{HDG::postprocess_one_cell}
1110   //
1111   // This is the actual work done for the postprocessing. According to the
1112   // discussion in the introduction, we need to set up a system that projects
1113   // the gradient part of the DG solution onto the gradient of the
1114   // post-processed variable. Moreover, we need to set the average of the new
1115   // post-processed variable to equal the average of the scalar DG solution
1116   // on the cell.
1117   //
1118   // More technically speaking, the projection of the gradient is a system
1119   // that would potentially fills our @p dofs_per_cell times @p dofs_per_cell
1120   // matrix but is singular (the sum of all rows would be zero because the
1121   // constant function has zero gradient). Therefore, we take one row away and
1122   // use it for imposing the average of the scalar value. We pick the first
1123   // row for the scalar part, even though we could pick any row for $\mathcal
1124   // Q_{-p}$ elements. However, had we used FE_DGP elements instead, the first
1125   // row would correspond to the constant part already and deleting e.g. the
1126   // last row would give us a singular system. This way, our program can also
1127   // be used for those elements.
1128   template <int dim>
postprocess_one_cell(const typename DoFHandler<dim>::active_cell_iterator & cell,PostProcessScratchData & scratch,unsigned int &)1129   void HDG<dim>::postprocess_one_cell(
1130     const typename DoFHandler<dim>::active_cell_iterator &cell,
1131     PostProcessScratchData &                              scratch,
1132     unsigned int &)
1133   {
1134     typename DoFHandler<dim>::active_cell_iterator loc_cell(&triangulation,
1135                                                             cell->level(),
1136                                                             cell->index(),
1137                                                             &dof_handler_local);
1138 
1139     scratch.fe_values_local.reinit(loc_cell);
1140     scratch.fe_values.reinit(cell);
1141 
1142     FEValuesExtractors::Vector fluxes(0);
1143     FEValuesExtractors::Scalar scalar(dim);
1144 
1145     const unsigned int n_q_points = scratch.fe_values.get_quadrature().size();
1146     const unsigned int dofs_per_cell = scratch.fe_values.dofs_per_cell;
1147 
1148     scratch.fe_values_local[scalar].get_function_values(solution_local,
1149                                                         scratch.u_values);
1150     scratch.fe_values_local[fluxes].get_function_values(solution_local,
1151                                                         scratch.u_gradients);
1152 
1153     double sum = 0;
1154     for (unsigned int i = 1; i < dofs_per_cell; ++i)
1155       {
1156         for (unsigned int j = 0; j < dofs_per_cell; ++j)
1157           {
1158             sum = 0;
1159             for (unsigned int q = 0; q < n_q_points; ++q)
1160               sum += (scratch.fe_values.shape_grad(i, q) *
1161                       scratch.fe_values.shape_grad(j, q)) *
1162                      scratch.fe_values.JxW(q);
1163             scratch.cell_matrix(i, j) = sum;
1164           }
1165 
1166         sum = 0;
1167         for (unsigned int q = 0; q < n_q_points; ++q)
1168           sum -= (scratch.fe_values.shape_grad(i, q) * scratch.u_gradients[q]) *
1169                  scratch.fe_values.JxW(q);
1170         scratch.cell_rhs(i) = sum;
1171       }
1172     for (unsigned int j = 0; j < dofs_per_cell; ++j)
1173       {
1174         sum = 0;
1175         for (unsigned int q = 0; q < n_q_points; ++q)
1176           sum += scratch.fe_values.shape_value(j, q) * scratch.fe_values.JxW(q);
1177         scratch.cell_matrix(0, j) = sum;
1178       }
1179     {
1180       sum = 0;
1181       for (unsigned int q = 0; q < n_q_points; ++q)
1182         sum += scratch.u_values[q] * scratch.fe_values.JxW(q);
1183       scratch.cell_rhs(0) = sum;
1184     }
1185 
1186     // Having assembled all terms, we can again go on and solve the linear
1187     // system. We invert the matrix and then multiply the inverse by the
1188     // right hand side. An alternative (and more numerically stable) method
1189     // would have been to only factorize the matrix and apply the factorization.
1190     scratch.cell_matrix.gauss_jordan();
1191     scratch.cell_matrix.vmult(scratch.cell_sol, scratch.cell_rhs);
1192     cell->distribute_local_to_global(scratch.cell_sol, solution_u_post);
1193   }
1194 
1195 
1196 
1197   // @sect4{HDG::output_results}
1198   // We have 3 sets of results that we would like to output:  the local
1199   // solution, the post-processed local solution, and the skeleton solution. The
1200   // former 2 both 'live' on element volumes, whereas the latter lives on
1201   // codimension-1 surfaces
1202   // of the triangulation.  Our @p output_results function writes all local solutions
1203   // to the same vtk file, even though they correspond to different
1204   // DoFHandler objects.  The graphical output for the skeleton
1205   // variable is done through use of the DataOutFaces class.
1206   template <int dim>
output_results(const unsigned int cycle)1207   void HDG<dim>::output_results(const unsigned int cycle)
1208   {
1209     std::string filename;
1210     switch (refinement_mode)
1211       {
1212         case global_refinement:
1213           filename = "solution-global";
1214           break;
1215         case adaptive_refinement:
1216           filename = "solution-adaptive";
1217           break;
1218         default:
1219           Assert(false, ExcNotImplemented());
1220       }
1221 
1222     std::string face_out(filename);
1223     face_out += "-face";
1224 
1225     filename += "-q" + Utilities::int_to_string(fe.degree, 1);
1226     filename += "-" + Utilities::int_to_string(cycle, 2);
1227     filename += ".vtk";
1228     std::ofstream output(filename);
1229 
1230     DataOut<dim> data_out;
1231 
1232     // We first define the names and types of the local solution,
1233     // and add the data to @p data_out.
1234     std::vector<std::string> names(dim, "gradient");
1235     names.emplace_back("solution");
1236     std::vector<DataComponentInterpretation::DataComponentInterpretation>
1237       component_interpretation(
1238         dim + 1, DataComponentInterpretation::component_is_part_of_vector);
1239     component_interpretation[dim] =
1240       DataComponentInterpretation::component_is_scalar;
1241     data_out.add_data_vector(dof_handler_local,
1242                              solution_local,
1243                              names,
1244                              component_interpretation);
1245 
1246     // The second data item we add is the post-processed solution.
1247     // In this case, it is a single scalar variable belonging to
1248     // a different DoFHandler.
1249     std::vector<std::string> post_name(1, "u_post");
1250     std::vector<DataComponentInterpretation::DataComponentInterpretation>
1251       post_comp_type(1, DataComponentInterpretation::component_is_scalar);
1252     data_out.add_data_vector(dof_handler_u_post,
1253                              solution_u_post,
1254                              post_name,
1255                              post_comp_type);
1256 
1257     data_out.build_patches(fe.degree);
1258     data_out.write_vtk(output);
1259 
1260     face_out += "-q" + Utilities::int_to_string(fe.degree, 1);
1261     face_out += "-" + Utilities::int_to_string(cycle, 2);
1262     face_out += ".vtk";
1263     std::ofstream face_output(face_out);
1264 
1265     // The <code>DataOutFaces</code> class works analogously to the
1266     // <code>DataOut</code> class when we have a <code>DoFHandler</code> that
1267     // defines the solution on the skeleton of the triangulation.  We treat it
1268     // as such here, and the code is similar to that above.
1269     DataOutFaces<dim>        data_out_face(false);
1270     std::vector<std::string> face_name(1, "u_hat");
1271     std::vector<DataComponentInterpretation::DataComponentInterpretation>
1272       face_component_type(1, DataComponentInterpretation::component_is_scalar);
1273 
1274     data_out_face.add_data_vector(dof_handler,
1275                                   solution,
1276                                   face_name,
1277                                   face_component_type);
1278 
1279     data_out_face.build_patches(fe.degree);
1280     data_out_face.write_vtk(face_output);
1281   }
1282 
1283   // @sect4{HDG::refine_grid}
1284 
1285   // We implement two different refinement cases for HDG, just as in
1286   // <code>Step-7</code>: adaptive_refinement and global_refinement.  The
1287   // global_refinement option recreates the entire triangulation every
1288   // time. This is because we want to use a finer sequence of meshes than what
1289   // we would get with one refinement step, namely 2, 3, 4, 6, 8, 12, 16, ...
1290   // elements per direction.
1291 
1292   // The adaptive_refinement mode uses the <code>KellyErrorEstimator</code> to
1293   // give a decent indication of the non-regular regions in the scalar local
1294   // solutions.
1295   template <int dim>
refine_grid(const unsigned int cycle)1296   void HDG<dim>::refine_grid(const unsigned int cycle)
1297   {
1298     if (cycle == 0)
1299       {
1300         GridGenerator::subdivided_hyper_cube(triangulation, 2, -1, 1);
1301         triangulation.refine_global(3 - dim);
1302       }
1303     else
1304       switch (refinement_mode)
1305         {
1306           case global_refinement:
1307             {
1308               triangulation.clear();
1309               GridGenerator::subdivided_hyper_cube(triangulation,
1310                                                    2 + (cycle % 2),
1311                                                    -1,
1312                                                    1);
1313               triangulation.refine_global(3 - dim + cycle / 2);
1314               break;
1315             }
1316 
1317           case adaptive_refinement:
1318             {
1319               Vector<float> estimated_error_per_cell(
1320                 triangulation.n_active_cells());
1321 
1322               FEValuesExtractors::Scalar scalar(dim);
1323               std::map<types::boundary_id, const Function<dim> *>
1324                 neumann_boundary;
1325               KellyErrorEstimator<dim>::estimate(dof_handler_local,
1326                                                  QGauss<dim - 1>(fe.degree + 1),
1327                                                  neumann_boundary,
1328                                                  solution_local,
1329                                                  estimated_error_per_cell,
1330                                                  fe_local.component_mask(
1331                                                    scalar));
1332 
1333               GridRefinement::refine_and_coarsen_fixed_number(
1334                 triangulation, estimated_error_per_cell, 0.3, 0.);
1335 
1336               triangulation.execute_coarsening_and_refinement();
1337 
1338               break;
1339             }
1340 
1341           default:
1342             {
1343               Assert(false, ExcNotImplemented());
1344             }
1345         }
1346 
1347     // Just as in step-7, we set the boundary indicator of two of the faces to 1
1348     // where we want to specify Neumann boundary conditions instead of Dirichlet
1349     // conditions. Since we re-create the triangulation every time for global
1350     // refinement, the flags are set in every refinement step, not just at the
1351     // beginning.
1352     for (const auto &cell : triangulation.cell_iterators())
1353       for (const auto &face : cell->face_iterators())
1354         if (face->at_boundary())
1355           if ((std::fabs(face->center()(0) - (-1)) < 1e-12) ||
1356               (std::fabs(face->center()(1) - (-1)) < 1e-12))
1357             face->set_boundary_id(1);
1358   }
1359 
1360   // @sect4{HDG::run}
1361   // The functionality here is basically the same as <code>Step-7</code>.
1362   // We loop over 10 cycles, refining the grid on each one.  At the end,
1363   // convergence tables are created.
1364   template <int dim>
run()1365   void HDG<dim>::run()
1366   {
1367     for (unsigned int cycle = 0; cycle < 10; ++cycle)
1368       {
1369         std::cout << "Cycle " << cycle << ':' << std::endl;
1370 
1371         refine_grid(cycle);
1372         setup_system();
1373         assemble_system(false);
1374         solve();
1375         postprocess();
1376         output_results(cycle);
1377       }
1378 
1379     // There is one minor change for the convergence table compared to step-7:
1380     // Since we did not refine our mesh by a factor two in each cycle (but
1381     // rather used the sequence 2, 3, 4, 6, 8, 12, ...), we need to tell the
1382     // convergence rate evaluation about this. We do this by setting the
1383     // number of cells as a reference column and additionally specifying the
1384     // dimension of the problem, which gives the necessary information for the
1385     // relation between number of cells and mesh size.
1386     if (refinement_mode == global_refinement)
1387       {
1388         convergence_table.evaluate_convergence_rates(
1389           "val L2", "cells", ConvergenceTable::reduction_rate_log2, dim);
1390         convergence_table.evaluate_convergence_rates(
1391           "grad L2", "cells", ConvergenceTable::reduction_rate_log2, dim);
1392         convergence_table.evaluate_convergence_rates(
1393           "val L2-post", "cells", ConvergenceTable::reduction_rate_log2, dim);
1394       }
1395     convergence_table.write_text(std::cout);
1396   }
1397 
1398 } // end of namespace Step51
1399 
1400 
1401 
main()1402 int main()
1403 {
1404   const unsigned int dim = 2;
1405 
1406   try
1407     {
1408       // Now for the three calls to the main class in complete analogy to
1409       // step-7.
1410       {
1411         std::cout << "Solving with Q1 elements, adaptive refinement"
1412                   << std::endl
1413                   << "============================================="
1414                   << std::endl
1415                   << std::endl;
1416 
1417         Step51::HDG<dim> hdg_problem(1, Step51::HDG<dim>::adaptive_refinement);
1418         hdg_problem.run();
1419 
1420         std::cout << std::endl;
1421       }
1422 
1423       {
1424         std::cout << "Solving with Q1 elements, global refinement" << std::endl
1425                   << "===========================================" << std::endl
1426                   << std::endl;
1427 
1428         Step51::HDG<dim> hdg_problem(1, Step51::HDG<dim>::global_refinement);
1429         hdg_problem.run();
1430 
1431         std::cout << std::endl;
1432       }
1433 
1434       {
1435         std::cout << "Solving with Q3 elements, global refinement" << std::endl
1436                   << "===========================================" << std::endl
1437                   << std::endl;
1438 
1439         Step51::HDG<dim> hdg_problem(3, Step51::HDG<dim>::global_refinement);
1440         hdg_problem.run();
1441 
1442         std::cout << std::endl;
1443       }
1444     }
1445   catch (std::exception &exc)
1446     {
1447       std::cerr << std::endl
1448                 << std::endl
1449                 << "----------------------------------------------------"
1450                 << std::endl;
1451       std::cerr << "Exception on processing: " << std::endl
1452                 << exc.what() << std::endl
1453                 << "Aborting!" << std::endl
1454                 << "----------------------------------------------------"
1455                 << std::endl;
1456       return 1;
1457     }
1458   catch (...)
1459     {
1460       std::cerr << std::endl
1461                 << std::endl
1462                 << "----------------------------------------------------"
1463                 << std::endl;
1464       std::cerr << "Unknown exception!" << std::endl
1465                 << "Aborting!" << std::endl
1466                 << "----------------------------------------------------"
1467                 << std::endl;
1468       return 1;
1469     }
1470 
1471   return 0;
1472 }
1473