/* --------------------------------------------------------------------- * * Copyright (C) 2013 - 2020 by the deal.II authors * * This file is part of the deal.II library. * * The deal.II library is free software; you can use it, redistribute * it, and/or modify it under the terms of the GNU Lesser General * Public License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * The full text of the license can be found in the file LICENSE.md at * the top level directory of deal.II. * * --------------------------------------------------------------------- * * Author: Martin Kronbichler, Technische Universität München, * Scott T. Miller, The Pennsylvania State University, 2013 */ // @sect3{Include files} // // Most of the deal.II include files have already been covered in previous // examples and are not commented on. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // However, we do have a few new includes for the example. // The first one defines finite element spaces on the faces // of the triangulation, which we refer to as the 'skeleton'. // These finite elements do not have any support on the element // interior, and they represent polynomials that have a single // value on each codimension-1 surface, but admit discontinuities // on codimension-2 surfaces. #include // The second new file we include defines a new type of sparse matrix. The // regular SparseMatrix type stores indices to all non-zero // entries. The ChunkSparseMatrix takes advantage of the coupled // nature of DG solutions. It stores an index to a matrix sub-block of a // specified size. In the HDG context, this sub-block-size is actually the // number of degrees of freedom per face defined by the skeleton solution // field. This reduces the memory consumption of the matrix by up to one third // and results in similar speedups when using the matrix in solvers. #include // The final new include for this example deals with data output. Since // we have a finite element field defined on the skeleton of the mesh, // we would like to visualize what that solution actually is. // DataOutFaces does exactly this; the interface is the almost the same // as the familiar DataOut, but the output only has codimension-1 data for // the simulation. #include #include // We start by putting all of our classes into their own namespace. namespace Step51 { using namespace dealii; // @sect3{Equation data} // // The structure of the analytic solution is the same as in step-7. There are // two exceptions. Firstly, we also create a solution for the 3d case, and // secondly, we scale the solution so its norm is of order unity for all // values of the solution width. template class SolutionBase { protected: static const unsigned int n_source_centers = 3; static const Point source_centers[n_source_centers]; static const double width; }; template <> const Point<1> SolutionBase<1>::source_centers[SolutionBase<1>::n_source_centers] = {Point<1>(-1.0 / 3.0), Point<1>(0.0), Point<1>(+1.0 / 3.0)}; template <> const Point<2> SolutionBase<2>::source_centers[SolutionBase<2>::n_source_centers] = {Point<2>(-0.5, +0.5), Point<2>(-0.5, -0.5), Point<2>(+0.5, -0.5)}; template <> const Point<3> SolutionBase<3>::source_centers[SolutionBase<3>::n_source_centers] = { Point<3>(-0.5, +0.5, 0.25), Point<3>(-0.6, -0.5, -0.125), Point<3>(+0.5, -0.5, 0.5)}; template const double SolutionBase::width = 1. / 5.; template class Solution : public Function, protected SolutionBase { public: virtual double value(const Point &p, const unsigned int /*component*/ = 0) const override { double sum = 0; for (unsigned int i = 0; i < this->n_source_centers; ++i) { const Tensor<1, dim> x_minus_xi = p - this->source_centers[i]; sum += std::exp(-x_minus_xi.norm_square() / (this->width * this->width)); } return sum / std::pow(2. * numbers::PI * this->width * this->width, dim / 2.); } virtual Tensor<1, dim> gradient(const Point &p, const unsigned int /*component*/ = 0) const override { Tensor<1, dim> sum; for (unsigned int i = 0; i < this->n_source_centers; ++i) { const Tensor<1, dim> x_minus_xi = p - this->source_centers[i]; sum += (-2 / (this->width * this->width) * std::exp(-x_minus_xi.norm_square() / (this->width * this->width)) * x_minus_xi); } return sum / std::pow(2. * numbers::PI * this->width * this->width, dim / 2.); } }; // This class implements a function where the scalar solution and its negative // gradient are collected together. This function is used when computing the // error of the HDG approximation and its implementation is to simply call // value and gradient function of the Solution class. template class SolutionAndGradient : public Function, protected SolutionBase { public: SolutionAndGradient() : Function(dim + 1) {} virtual void vector_value(const Point &p, Vector & v) const override { AssertDimension(v.size(), dim + 1); Solution solution; Tensor<1, dim> grad = solution.gradient(p); for (unsigned int d = 0; d < dim; ++d) v[d] = -grad[d]; v[dim] = solution.value(p); } }; // Next comes the implementation of the convection velocity. As described in // the introduction, we choose a velocity field that is $(y, -x)$ in 2D and // $(y, -x, 1)$ in 3D. This gives a divergence-free velocity field. template class ConvectionVelocity : public TensorFunction<1, dim> { public: ConvectionVelocity() : TensorFunction<1, dim>() {} virtual Tensor<1, dim> value(const Point &p) const override { Tensor<1, dim> convection; switch (dim) { case 1: convection[0] = 1; break; case 2: convection[0] = p[1]; convection[1] = -p[0]; break; case 3: convection[0] = p[1]; convection[1] = -p[0]; convection[2] = 1; break; default: Assert(false, ExcNotImplemented()); } return convection; } }; // The last function we implement is the right hand side for the // manufactured solution. It is very similar to step-7, with the exception // that we now have a convection term instead of the reaction term. Since // the velocity field is incompressible, i.e., $\nabla \cdot \mathbf{c} = // 0$, the advection term simply reads $\mathbf{c} \nabla u$. template class RightHandSide : public Function, protected SolutionBase { public: virtual double value(const Point &p, const unsigned int /*component*/ = 0) const override { ConvectionVelocity convection_velocity; Tensor<1, dim> convection = convection_velocity.value(p); double sum = 0; for (unsigned int i = 0; i < this->n_source_centers; ++i) { const Tensor<1, dim> x_minus_xi = p - this->source_centers[i]; sum += ((2 * dim - 2 * convection * x_minus_xi - 4 * x_minus_xi.norm_square() / (this->width * this->width)) / (this->width * this->width) * std::exp(-x_minus_xi.norm_square() / (this->width * this->width))); } return sum / std::pow(2. * numbers::PI * this->width * this->width, dim / 2.); } }; // @sect3{The HDG solver class} // The HDG solution procedure follows closely that of step-7. The major // difference is the use of three different sets of DoFHandler and FE // objects, along with the ChunkSparseMatrix and the corresponding solutions // vectors. We also use WorkStream to enable a multithreaded local solution // process which exploits the embarrassingly parallel nature of the local // solver. For WorkStream, we define the local operations on a cell and a // copy function into the global matrix and vector. We do this both for the // assembly (which is run twice, once when we generate the system matrix and // once when we compute the element-interior solutions from the skeleton // values) and for the postprocessing where we extract a solution that // converges at higher order. template class HDG { public: enum RefinementMode { global_refinement, adaptive_refinement }; HDG(const unsigned int degree, const RefinementMode refinement_mode); void run(); private: void setup_system(); void assemble_system(const bool reconstruct_trace = false); void solve(); void postprocess(); void refine_grid(const unsigned int cycle); void output_results(const unsigned int cycle); // Data for the assembly and solution of the primal variables. struct PerTaskData; struct ScratchData; // Post-processing the solution to obtain $u^*$ is an element-by-element // procedure; as such, we do not need to assemble any global data and do // not declare any 'task data' for WorkStream to use. struct PostProcessScratchData; // The following three functions are used by WorkStream to do the actual // work of the program. void assemble_system_one_cell( const typename DoFHandler::active_cell_iterator &cell, ScratchData & scratch, PerTaskData & task_data); void copy_local_to_global(const PerTaskData &data); void postprocess_one_cell( const typename DoFHandler::active_cell_iterator &cell, PostProcessScratchData & scratch, unsigned int & empty_data); Triangulation triangulation; // The 'local' solutions are interior to each element. These // represent the primal solution field $u$ as well as the auxiliary // field $\mathbf{q}$. FESystem fe_local; DoFHandler dof_handler_local; Vector solution_local; // The new finite element type and corresponding DoFHandler are // used for the global skeleton solution that couples the element-level // local solutions. FE_FaceQ fe; DoFHandler dof_handler; Vector solution; Vector system_rhs; // As stated in the introduction, HDG solutions can be post-processed to // attain superconvergence rates of $\mathcal{O}(h^{p+2})$. The // post-processed solution is a discontinuous finite element solution // representing the primal variable on the interior of each cell. We define // a FE type of degree $p+1$ to represent this post-processed solution, // which we only use for output after constructing it. FE_DGQ fe_u_post; DoFHandler dof_handler_u_post; Vector solution_u_post; // The degrees of freedom corresponding to the skeleton strongly enforce // Dirichlet boundary conditions, just as in a continuous Galerkin finite // element method. We can enforce the boundary conditions in an analogous // manner via an AffineConstraints object. In addition, hanging nodes are // handled in the same way as for continuous finite elements: For the face // elements which only define degrees of freedom on the face, this process // sets the solution on the refined side to coincide with the // representation on the coarse side. // // Note that for HDG, the elimination of hanging nodes is not the only // possibility — in terms of the HDG theory, one could also use the // unknowns from the refined side and express the local solution on the // coarse side through the trace values on the refined side. However, such // a setup is not as easily implemented in terms of deal.II loops and not // further analyzed. AffineConstraints constraints; // The usage of the ChunkSparseMatrix class is similar to the usual sparse // matrices: You need a sparsity pattern of type ChunkSparsityPattern and // the actual matrix object. When creating the sparsity pattern, we just // have to additionally pass the size of local blocks. ChunkSparsityPattern sparsity_pattern; ChunkSparseMatrix system_matrix; // Same as step-7: const RefinementMode refinement_mode; ConvergenceTable convergence_table; }; // @sect3{The HDG class implementation} // @sect4{Constructor} // The constructor is similar to those in other examples, with the exception // of handling multiple DoFHandler and FiniteElement objects. Note that we // create a system of finite elements for the local DG part, including the // gradient/flux part and the scalar part. template HDG::HDG(const unsigned int degree, const RefinementMode refinement_mode) : fe_local(FE_DGQ(degree), dim, FE_DGQ(degree), 1) , dof_handler_local(triangulation) , fe(degree) , dof_handler(triangulation) , fe_u_post(degree + 1) , dof_handler_u_post(triangulation) , refinement_mode(refinement_mode) {} // @sect4{HDG::setup_system} // The system for an HDG solution is setup in an analogous manner to most // of the other tutorial programs. We are careful to distribute dofs with // all of our DoFHandler objects. The @p solution and @p system_matrix // objects go with the global skeleton solution. template void HDG::setup_system() { dof_handler_local.distribute_dofs(fe_local); dof_handler.distribute_dofs(fe); dof_handler_u_post.distribute_dofs(fe_u_post); std::cout << " Number of degrees of freedom: " << dof_handler.n_dofs() << std::endl; solution.reinit(dof_handler.n_dofs()); system_rhs.reinit(dof_handler.n_dofs()); solution_local.reinit(dof_handler_local.n_dofs()); solution_u_post.reinit(dof_handler_u_post.n_dofs()); constraints.clear(); DoFTools::make_hanging_node_constraints(dof_handler, constraints); std::map *> boundary_functions; Solution solution_function; boundary_functions[0] = &solution_function; VectorTools::project_boundary_values(dof_handler, boundary_functions, QGauss(fe.degree + 1), constraints); constraints.close(); // When creating the chunk sparsity pattern, we first create the usual // dynamic sparsity pattern and then set the chunk size, which is equal // to the number of dofs on a face, when copying this into the final // sparsity pattern. { DynamicSparsityPattern dsp(dof_handler.n_dofs()); DoFTools::make_sparsity_pattern(dof_handler, dsp, constraints, false); sparsity_pattern.copy_from(dsp, fe.n_dofs_per_face()); } system_matrix.reinit(sparsity_pattern); } // @sect4{HDG::PerTaskData} // Next comes the definition of the local data structures for the parallel // assembly. The first structure @p PerTaskData contains the local vector // and matrix that are written into the global matrix, whereas the // ScratchData contains all data that we need for the local assembly. There // is one variable worth noting here, namely the boolean variable @p // trace_reconstruct. As mentioned in the introduction, we solve the HDG // system in two steps. First, we create a linear system for the skeleton // system where we condense the local part into it via the Schur complement // $D-CA^{-1}B$. Then, we solve for the local part using the skeleton // solution. For these two steps, we need the same matrices on the elements // twice, which we want to compute by two assembly steps. Since most of the // code is similar, we do this with the same function but only switch // between the two based on a flag that we set when starting the // assembly. Since we need to pass this information on to the local worker // routines, we store it once in the task data. template struct HDG::PerTaskData { FullMatrix cell_matrix; Vector cell_vector; std::vector dof_indices; bool trace_reconstruct; PerTaskData(const unsigned int n_dofs, const bool trace_reconstruct) : cell_matrix(n_dofs, n_dofs) , cell_vector(n_dofs) , dof_indices(n_dofs) , trace_reconstruct(trace_reconstruct) {} }; // @sect4{HDG::ScratchData} // @p ScratchData contains persistent data for each // thread within WorkStream. The FEValues, matrix, // and vector objects should be familiar by now. There are two objects that // need to be discussed: `std::vector > // fe_local_support_on_face` and `std::vector > // fe_support_on_face`. These are used to indicate whether or not the finite // elements chosen have support (non-zero values) on a given face of the // reference cell for the local part associated to @p fe_local and the // skeleton part @p fe. We extract this information in the // constructor and store it once for all cells that we work on. Had we not // stored this information, we would be forced to assemble a large number of // zero terms on each cell, which would significantly slow the program. template struct HDG::ScratchData { FEValues fe_values_local; FEFaceValues fe_face_values_local; FEFaceValues fe_face_values; FullMatrix ll_matrix; FullMatrix lf_matrix; FullMatrix fl_matrix; FullMatrix tmp_matrix; Vector l_rhs; Vector tmp_rhs; std::vector> q_phi; std::vector q_phi_div; std::vector u_phi; std::vector> u_phi_grad; std::vector tr_phi; std::vector trace_values; std::vector> fe_local_support_on_face; std::vector> fe_support_on_face; ConvectionVelocity convection_velocity; RightHandSide right_hand_side; const Solution exact_solution; ScratchData(const FiniteElement &fe, const FiniteElement &fe_local, const QGauss & quadrature_formula, const QGauss & face_quadrature_formula, const UpdateFlags local_flags, const UpdateFlags local_face_flags, const UpdateFlags flags) : fe_values_local(fe_local, quadrature_formula, local_flags) , fe_face_values_local(fe_local, face_quadrature_formula, local_face_flags) , fe_face_values(fe, face_quadrature_formula, flags) , ll_matrix(fe_local.n_dofs_per_cell(), fe_local.n_dofs_per_cell()) , lf_matrix(fe_local.n_dofs_per_cell(), fe.n_dofs_per_cell()) , fl_matrix(fe.n_dofs_per_cell(), fe_local.n_dofs_per_cell()) , tmp_matrix(fe.n_dofs_per_cell(), fe_local.n_dofs_per_cell()) , l_rhs(fe_local.n_dofs_per_cell()) , tmp_rhs(fe_local.n_dofs_per_cell()) , q_phi(fe_local.n_dofs_per_cell()) , q_phi_div(fe_local.n_dofs_per_cell()) , u_phi(fe_local.n_dofs_per_cell()) , u_phi_grad(fe_local.n_dofs_per_cell()) , tr_phi(fe.n_dofs_per_cell()) , trace_values(face_quadrature_formula.size()) , fe_local_support_on_face(GeometryInfo::faces_per_cell) , fe_support_on_face(GeometryInfo::faces_per_cell) , exact_solution() { for (unsigned int face_no : GeometryInfo::face_indices()) for (unsigned int i = 0; i < fe_local.n_dofs_per_cell(); ++i) { if (fe_local.has_support_on_face(i, face_no)) fe_local_support_on_face[face_no].push_back(i); } for (unsigned int face_no : GeometryInfo::face_indices()) for (unsigned int i = 0; i < fe.n_dofs_per_cell(); ++i) { if (fe.has_support_on_face(i, face_no)) fe_support_on_face[face_no].push_back(i); } } ScratchData(const ScratchData &sd) : fe_values_local(sd.fe_values_local.get_fe(), sd.fe_values_local.get_quadrature(), sd.fe_values_local.get_update_flags()) , fe_face_values_local(sd.fe_face_values_local.get_fe(), sd.fe_face_values_local.get_quadrature(), sd.fe_face_values_local.get_update_flags()) , fe_face_values(sd.fe_face_values.get_fe(), sd.fe_face_values.get_quadrature(), sd.fe_face_values.get_update_flags()) , ll_matrix(sd.ll_matrix) , lf_matrix(sd.lf_matrix) , fl_matrix(sd.fl_matrix) , tmp_matrix(sd.tmp_matrix) , l_rhs(sd.l_rhs) , tmp_rhs(sd.tmp_rhs) , q_phi(sd.q_phi) , q_phi_div(sd.q_phi_div) , u_phi(sd.u_phi) , u_phi_grad(sd.u_phi_grad) , tr_phi(sd.tr_phi) , trace_values(sd.trace_values) , fe_local_support_on_face(sd.fe_local_support_on_face) , fe_support_on_face(sd.fe_support_on_face) , exact_solution() {} }; // @sect4{HDG::PostProcessScratchData} // @p PostProcessScratchData contains the data used by WorkStream // when post-processing the local solution $u^*$. It is similar, but much // simpler, than @p ScratchData. template struct HDG::PostProcessScratchData { FEValues fe_values_local; FEValues fe_values; std::vector u_values; std::vector> u_gradients; FullMatrix cell_matrix; Vector cell_rhs; Vector cell_sol; PostProcessScratchData(const FiniteElement &fe, const FiniteElement &fe_local, const QGauss & quadrature_formula, const UpdateFlags local_flags, const UpdateFlags flags) : fe_values_local(fe_local, quadrature_formula, local_flags) , fe_values(fe, quadrature_formula, flags) , u_values(quadrature_formula.size()) , u_gradients(quadrature_formula.size()) , cell_matrix(fe.n_dofs_per_cell(), fe.n_dofs_per_cell()) , cell_rhs(fe.n_dofs_per_cell()) , cell_sol(fe.n_dofs_per_cell()) {} PostProcessScratchData(const PostProcessScratchData &sd) : fe_values_local(sd.fe_values_local.get_fe(), sd.fe_values_local.get_quadrature(), sd.fe_values_local.get_update_flags()) , fe_values(sd.fe_values.get_fe(), sd.fe_values.get_quadrature(), sd.fe_values.get_update_flags()) , u_values(sd.u_values) , u_gradients(sd.u_gradients) , cell_matrix(sd.cell_matrix) , cell_rhs(sd.cell_rhs) , cell_sol(sd.cell_sol) {} }; // @sect4{HDG::assemble_system} // The @p assemble_system function is similar to the one on Step-32, where // the quadrature formula and the update flags are set up, and then // WorkStream is used to do the work in a multi-threaded // manner. The @p trace_reconstruct input parameter is used to decide // whether we are solving for the global skeleton solution (false) or the // local solution (true). // // One thing worth noting for the multi-threaded execution of assembly is // the fact that the local computations in `assemble_system_one_cell()` call // into BLAS and LAPACK functions if those are available in deal.II. Thus, // the underlying BLAS/LAPACK library must support calls from multiple // threads at the same time. Most implementations do support this, but some // libraries need to be built in a specific way to avoid problems. For // example, OpenBLAS compiled without multithreading inside the BLAS/LAPACK // calls needs to built with a flag called `USE_LOCKING` set to true. template void HDG::assemble_system(const bool trace_reconstruct) { const QGauss quadrature_formula(fe.degree + 1); const QGauss face_quadrature_formula(fe.degree + 1); const UpdateFlags local_flags(update_values | update_gradients | update_JxW_values | update_quadrature_points); const UpdateFlags local_face_flags(update_values); const UpdateFlags flags(update_values | update_normal_vectors | update_quadrature_points | update_JxW_values); PerTaskData task_data(fe.n_dofs_per_cell(), trace_reconstruct); ScratchData scratch(fe, fe_local, quadrature_formula, face_quadrature_formula, local_flags, local_face_flags, flags); WorkStream::run(dof_handler.begin_active(), dof_handler.end(), *this, &HDG::assemble_system_one_cell, &HDG::copy_local_to_global, scratch, task_data); } // @sect4{HDG::assemble_system_one_cell} // The real work of the HDG program is done by @p assemble_system_one_cell. // Assembling the local matrices $A, B, C$ is done here, along with the // local contributions of the global matrix $D$. template void HDG::assemble_system_one_cell( const typename DoFHandler::active_cell_iterator &cell, ScratchData & scratch, PerTaskData & task_data) { // Construct iterator for dof_handler_local for FEValues reinit function. typename DoFHandler::active_cell_iterator loc_cell(&triangulation, cell->level(), cell->index(), &dof_handler_local); const unsigned int n_q_points = scratch.fe_values_local.get_quadrature().size(); const unsigned int n_face_q_points = scratch.fe_face_values_local.get_quadrature().size(); const unsigned int loc_dofs_per_cell = scratch.fe_values_local.get_fe().n_dofs_per_cell(); const FEValuesExtractors::Vector fluxes(0); const FEValuesExtractors::Scalar scalar(dim); scratch.ll_matrix = 0; scratch.l_rhs = 0; if (!task_data.trace_reconstruct) { scratch.lf_matrix = 0; scratch.fl_matrix = 0; task_data.cell_matrix = 0; task_data.cell_vector = 0; } scratch.fe_values_local.reinit(loc_cell); // We first compute the cell-interior contribution to @p ll_matrix matrix // (referred to as matrix $A$ in the introduction) corresponding to // local-local coupling, as well as the local right-hand-side vector. We // store the values at each quadrature point for the basis functions, the // right-hand-side value, and the convection velocity, in order to have // quick access to these fields. for (unsigned int q = 0; q < n_q_points; ++q) { const double rhs_value = scratch.right_hand_side.value( scratch.fe_values_local.quadrature_point(q)); const Tensor<1, dim> convection = scratch.convection_velocity.value( scratch.fe_values_local.quadrature_point(q)); const double JxW = scratch.fe_values_local.JxW(q); for (unsigned int k = 0; k < loc_dofs_per_cell; ++k) { scratch.q_phi[k] = scratch.fe_values_local[fluxes].value(k, q); scratch.q_phi_div[k] = scratch.fe_values_local[fluxes].divergence(k, q); scratch.u_phi[k] = scratch.fe_values_local[scalar].value(k, q); scratch.u_phi_grad[k] = scratch.fe_values_local[scalar].gradient(k, q); } for (unsigned int i = 0; i < loc_dofs_per_cell; ++i) { for (unsigned int j = 0; j < loc_dofs_per_cell; ++j) scratch.ll_matrix(i, j) += (scratch.q_phi[i] * scratch.q_phi[j] - scratch.q_phi_div[i] * scratch.u_phi[j] + scratch.u_phi[i] * scratch.q_phi_div[j] - (scratch.u_phi_grad[i] * convection) * scratch.u_phi[j]) * JxW; scratch.l_rhs(i) += scratch.u_phi[i] * rhs_value * JxW; } } // Face terms are assembled on all faces of all elements. This is in // contrast to more traditional DG methods, where each face is only visited // once in the assembly procedure. for (const auto face_no : cell->face_indices()) { scratch.fe_face_values_local.reinit(loc_cell, face_no); scratch.fe_face_values.reinit(cell, face_no); // The already obtained $\hat{u}$ values are needed when solving for the // local variables. if (task_data.trace_reconstruct) scratch.fe_face_values.get_function_values(solution, scratch.trace_values); for (unsigned int q = 0; q < n_face_q_points; ++q) { const double JxW = scratch.fe_face_values.JxW(q); const Point quadrature_point = scratch.fe_face_values.quadrature_point(q); const Tensor<1, dim> normal = scratch.fe_face_values.normal_vector(q); const Tensor<1, dim> convection = scratch.convection_velocity.value(quadrature_point); // Here we compute the stabilization parameter discussed in the // introduction: since the diffusion is one and the diffusion // length scale is set to 1/5, it simply results in a contribution // of 5 for the diffusion part and the magnitude of convection // through the element boundary in a centered scheme for the // convection part. const double tau_stab = (5. + std::abs(convection * normal)); // We store the non-zero flux and scalar values, making use of the // support_on_face information we created in @p ScratchData. for (unsigned int k = 0; k < scratch.fe_local_support_on_face[face_no].size(); ++k) { const unsigned int kk = scratch.fe_local_support_on_face[face_no][k]; scratch.q_phi[k] = scratch.fe_face_values_local[fluxes].value(kk, q); scratch.u_phi[k] = scratch.fe_face_values_local[scalar].value(kk, q); } // When @p trace_reconstruct=false, we are preparing to assemble the // system for the skeleton variable $\hat{u}$. If this is the case, // we must assemble all local matrices associated with the problem: // local-local, local-face, face-local, and face-face. The // face-face matrix is stored as @p TaskData::cell_matrix, so that // it can be assembled into the global system by @p // copy_local_to_global. if (!task_data.trace_reconstruct) { for (unsigned int k = 0; k < scratch.fe_support_on_face[face_no].size(); ++k) scratch.tr_phi[k] = scratch.fe_face_values.shape_value( scratch.fe_support_on_face[face_no][k], q); for (unsigned int i = 0; i < scratch.fe_local_support_on_face[face_no].size(); ++i) for (unsigned int j = 0; j < scratch.fe_support_on_face[face_no].size(); ++j) { const unsigned int ii = scratch.fe_local_support_on_face[face_no][i]; const unsigned int jj = scratch.fe_support_on_face[face_no][j]; scratch.lf_matrix(ii, jj) += ((scratch.q_phi[i] * normal + (convection * normal - tau_stab) * scratch.u_phi[i]) * scratch.tr_phi[j]) * JxW; // Note the sign of the face_no-local matrix. We negate // the sign during assembly here so that we can use the // FullMatrix::mmult with addition when computing the // Schur complement. scratch.fl_matrix(jj, ii) -= ((scratch.q_phi[i] * normal + tau_stab * scratch.u_phi[i]) * scratch.tr_phi[j]) * JxW; } for (unsigned int i = 0; i < scratch.fe_support_on_face[face_no].size(); ++i) for (unsigned int j = 0; j < scratch.fe_support_on_face[face_no].size(); ++j) { const unsigned int ii = scratch.fe_support_on_face[face_no][i]; const unsigned int jj = scratch.fe_support_on_face[face_no][j]; task_data.cell_matrix(ii, jj) += ((convection * normal - tau_stab) * scratch.tr_phi[i] * scratch.tr_phi[j]) * JxW; } if (cell->face(face_no)->at_boundary() && (cell->face(face_no)->boundary_id() == 1)) { const double neumann_value = -scratch.exact_solution.gradient(quadrature_point) * normal + convection * normal * scratch.exact_solution.value(quadrature_point); for (unsigned int i = 0; i < scratch.fe_support_on_face[face_no].size(); ++i) { const unsigned int ii = scratch.fe_support_on_face[face_no][i]; task_data.cell_vector(ii) += scratch.tr_phi[i] * neumann_value * JxW; } } } // This last term adds the contribution of the term $\left_{\partial \mathcal T}$ to the local matrix. As opposed // to the face matrices above, we need it in both assembly stages. for (unsigned int i = 0; i < scratch.fe_local_support_on_face[face_no].size(); ++i) for (unsigned int j = 0; j < scratch.fe_local_support_on_face[face_no].size(); ++j) { const unsigned int ii = scratch.fe_local_support_on_face[face_no][i]; const unsigned int jj = scratch.fe_local_support_on_face[face_no][j]; scratch.ll_matrix(ii, jj) += tau_stab * scratch.u_phi[i] * scratch.u_phi[j] * JxW; } // When @p trace_reconstruct=true, we are solving for the local // solutions on an element by element basis. The local // right-hand-side is calculated by replacing the basis functions @p // tr_phi in the @p lf_matrix computation by the computed values @p // trace_values. Of course, the sign of the matrix is now minus // since we have moved everything to the other side of the equation. if (task_data.trace_reconstruct) for (unsigned int i = 0; i < scratch.fe_local_support_on_face[face_no].size(); ++i) { const unsigned int ii = scratch.fe_local_support_on_face[face_no][i]; scratch.l_rhs(ii) -= (scratch.q_phi[i] * normal + scratch.u_phi[i] * (convection * normal - tau_stab)) * scratch.trace_values[q] * JxW; } } } // Once assembly of all of the local contributions is complete, we must // either: (1) assemble the global system, or (2) compute the local solution // values and save them. In either case, the first step is to invert the // local-local matrix. scratch.ll_matrix.gauss_jordan(); // For (1), we compute the Schur complement and add it to the @p // cell_matrix, matrix $D$ in the introduction. if (task_data.trace_reconstruct == false) { scratch.fl_matrix.mmult(scratch.tmp_matrix, scratch.ll_matrix); scratch.tmp_matrix.vmult_add(task_data.cell_vector, scratch.l_rhs); scratch.tmp_matrix.mmult(task_data.cell_matrix, scratch.lf_matrix, true); cell->get_dof_indices(task_data.dof_indices); } // For (2), we are simply solving (ll_matrix).(solution_local) = (l_rhs). // Hence, we multiply @p l_rhs by our already inverted local-local matrix // and store the result using the set_dof_values function. else { scratch.ll_matrix.vmult(scratch.tmp_rhs, scratch.l_rhs); loc_cell->set_dof_values(scratch.tmp_rhs, solution_local); } } // @sect4{HDG::copy_local_to_global} // If we are in the first step of the solution, i.e. @p trace_reconstruct=false, // then we assemble the local matrices into the global system. template void HDG::copy_local_to_global(const PerTaskData &data) { if (data.trace_reconstruct == false) constraints.distribute_local_to_global(data.cell_matrix, data.cell_vector, data.dof_indices, system_matrix, system_rhs); } // @sect4{HDG::solve} // The skeleton solution is solved for by using a BiCGStab solver with // identity preconditioner. template void HDG::solve() { SolverControl solver_control(system_matrix.m() * 10, 1e-11 * system_rhs.l2_norm()); SolverBicgstab> solver(solver_control); solver.solve(system_matrix, solution, system_rhs, PreconditionIdentity()); std::cout << " Number of BiCGStab iterations: " << solver_control.last_step() << std::endl; system_matrix.clear(); sparsity_pattern.reinit(0, 0, 0, 1); constraints.distribute(solution); // Once we have solved for the skeleton solution, // we can solve for the local solutions in an element-by-element // fashion. We do this by re-using the same @p assemble_system function // but switching @p trace_reconstruct to true. assemble_system(true); } // @sect4{HDG::postprocess} // The postprocess method serves two purposes. First, we want to construct a // post-processed scalar variables in the element space of degree $p+1$ that // we hope will converge at order $p+2$. This is again an element-by-element // process and only involves the scalar solution as well as the gradient on // the local cell. To do this, we introduce the already defined scratch data // together with some update flags and run the work stream to do this in // parallel. // // Secondly, we want to compute discretization errors just as we did in // step-7. The overall procedure is similar with calls to // VectorTools::integrate_difference. The difference is in how we compute // the errors for the scalar variable and the gradient variable. In step-7, // we did this by computing @p L2_norm or @p H1_seminorm // contributions. Here, we have a DoFHandler with these two contributions // computed and sorted by their vector component, [0, dim) for // the // gradient and @p dim for the scalar. To compute their value, we hence use // a ComponentSelectFunction with either of them, together with the @p // SolutionAndGradient class introduced above that contains the analytic // parts of either of them. Eventually, we also compute the L2-error of the // post-processed solution and add the results into the convergence table. template void HDG::postprocess() { { const QGauss quadrature_formula(fe_u_post.degree + 1); const UpdateFlags local_flags(update_values); const UpdateFlags flags(update_values | update_gradients | update_JxW_values); PostProcessScratchData scratch( fe_u_post, fe_local, quadrature_formula, local_flags, flags); WorkStream::run( dof_handler_u_post.begin_active(), dof_handler_u_post.end(), [this](const typename DoFHandler::active_cell_iterator &cell, PostProcessScratchData & scratch, unsigned int & data) { this->postprocess_one_cell(cell, scratch, data); }, std::function(), scratch, 0U); } Vector difference_per_cell(triangulation.n_active_cells()); ComponentSelectFunction value_select(dim, dim + 1); VectorTools::integrate_difference(dof_handler_local, solution_local, SolutionAndGradient(), difference_per_cell, QGauss(fe.degree + 2), VectorTools::L2_norm, &value_select); const double L2_error = VectorTools::compute_global_error(triangulation, difference_per_cell, VectorTools::L2_norm); ComponentSelectFunction gradient_select( std::pair(0, dim), dim + 1); VectorTools::integrate_difference(dof_handler_local, solution_local, SolutionAndGradient(), difference_per_cell, QGauss(fe.degree + 2), VectorTools::L2_norm, &gradient_select); const double grad_error = VectorTools::compute_global_error(triangulation, difference_per_cell, VectorTools::L2_norm); VectorTools::integrate_difference(dof_handler_u_post, solution_u_post, Solution(), difference_per_cell, QGauss(fe.degree + 3), VectorTools::L2_norm); const double post_error = VectorTools::compute_global_error(triangulation, difference_per_cell, VectorTools::L2_norm); convergence_table.add_value("cells", triangulation.n_active_cells()); convergence_table.add_value("dofs", dof_handler.n_dofs()); convergence_table.add_value("val L2", L2_error); convergence_table.set_scientific("val L2", true); convergence_table.set_precision("val L2", 3); convergence_table.add_value("grad L2", grad_error); convergence_table.set_scientific("grad L2", true); convergence_table.set_precision("grad L2", 3); convergence_table.add_value("val L2-post", post_error); convergence_table.set_scientific("val L2-post", true); convergence_table.set_precision("val L2-post", 3); } // @sect4{HDG::postprocess_one_cell} // // This is the actual work done for the postprocessing. According to the // discussion in the introduction, we need to set up a system that projects // the gradient part of the DG solution onto the gradient of the // post-processed variable. Moreover, we need to set the average of the new // post-processed variable to equal the average of the scalar DG solution // on the cell. // // More technically speaking, the projection of the gradient is a system // that would potentially fills our @p dofs_per_cell times @p dofs_per_cell // matrix but is singular (the sum of all rows would be zero because the // constant function has zero gradient). Therefore, we take one row away and // use it for imposing the average of the scalar value. We pick the first // row for the scalar part, even though we could pick any row for $\mathcal // Q_{-p}$ elements. However, had we used FE_DGP elements instead, the first // row would correspond to the constant part already and deleting e.g. the // last row would give us a singular system. This way, our program can also // be used for those elements. template void HDG::postprocess_one_cell( const typename DoFHandler::active_cell_iterator &cell, PostProcessScratchData & scratch, unsigned int &) { typename DoFHandler::active_cell_iterator loc_cell(&triangulation, cell->level(), cell->index(), &dof_handler_local); scratch.fe_values_local.reinit(loc_cell); scratch.fe_values.reinit(cell); FEValuesExtractors::Vector fluxes(0); FEValuesExtractors::Scalar scalar(dim); const unsigned int n_q_points = scratch.fe_values.get_quadrature().size(); const unsigned int dofs_per_cell = scratch.fe_values.dofs_per_cell; scratch.fe_values_local[scalar].get_function_values(solution_local, scratch.u_values); scratch.fe_values_local[fluxes].get_function_values(solution_local, scratch.u_gradients); double sum = 0; for (unsigned int i = 1; i < dofs_per_cell; ++i) { for (unsigned int j = 0; j < dofs_per_cell; ++j) { sum = 0; for (unsigned int q = 0; q < n_q_points; ++q) sum += (scratch.fe_values.shape_grad(i, q) * scratch.fe_values.shape_grad(j, q)) * scratch.fe_values.JxW(q); scratch.cell_matrix(i, j) = sum; } sum = 0; for (unsigned int q = 0; q < n_q_points; ++q) sum -= (scratch.fe_values.shape_grad(i, q) * scratch.u_gradients[q]) * scratch.fe_values.JxW(q); scratch.cell_rhs(i) = sum; } for (unsigned int j = 0; j < dofs_per_cell; ++j) { sum = 0; for (unsigned int q = 0; q < n_q_points; ++q) sum += scratch.fe_values.shape_value(j, q) * scratch.fe_values.JxW(q); scratch.cell_matrix(0, j) = sum; } { sum = 0; for (unsigned int q = 0; q < n_q_points; ++q) sum += scratch.u_values[q] * scratch.fe_values.JxW(q); scratch.cell_rhs(0) = sum; } // Having assembled all terms, we can again go on and solve the linear // system. We invert the matrix and then multiply the inverse by the // right hand side. An alternative (and more numerically stable) method // would have been to only factorize the matrix and apply the factorization. scratch.cell_matrix.gauss_jordan(); scratch.cell_matrix.vmult(scratch.cell_sol, scratch.cell_rhs); cell->distribute_local_to_global(scratch.cell_sol, solution_u_post); } // @sect4{HDG::output_results} // We have 3 sets of results that we would like to output: the local // solution, the post-processed local solution, and the skeleton solution. The // former 2 both 'live' on element volumes, whereas the latter lives on // codimension-1 surfaces // of the triangulation. Our @p output_results function writes all local solutions // to the same vtk file, even though they correspond to different // DoFHandler objects. The graphical output for the skeleton // variable is done through use of the DataOutFaces class. template void HDG::output_results(const unsigned int cycle) { std::string filename; switch (refinement_mode) { case global_refinement: filename = "solution-global"; break; case adaptive_refinement: filename = "solution-adaptive"; break; default: Assert(false, ExcNotImplemented()); } std::string face_out(filename); face_out += "-face"; filename += "-q" + Utilities::int_to_string(fe.degree, 1); filename += "-" + Utilities::int_to_string(cycle, 2); filename += ".vtk"; std::ofstream output(filename); DataOut data_out; // We first define the names and types of the local solution, // and add the data to @p data_out. std::vector names(dim, "gradient"); names.emplace_back("solution"); std::vector component_interpretation( dim + 1, DataComponentInterpretation::component_is_part_of_vector); component_interpretation[dim] = DataComponentInterpretation::component_is_scalar; data_out.add_data_vector(dof_handler_local, solution_local, names, component_interpretation); // The second data item we add is the post-processed solution. // In this case, it is a single scalar variable belonging to // a different DoFHandler. std::vector post_name(1, "u_post"); std::vector post_comp_type(1, DataComponentInterpretation::component_is_scalar); data_out.add_data_vector(dof_handler_u_post, solution_u_post, post_name, post_comp_type); data_out.build_patches(fe.degree); data_out.write_vtk(output); face_out += "-q" + Utilities::int_to_string(fe.degree, 1); face_out += "-" + Utilities::int_to_string(cycle, 2); face_out += ".vtk"; std::ofstream face_output(face_out); // The DataOutFaces class works analogously to the // DataOut class when we have a DoFHandler that // defines the solution on the skeleton of the triangulation. We treat it // as such here, and the code is similar to that above. DataOutFaces data_out_face(false); std::vector face_name(1, "u_hat"); std::vector face_component_type(1, DataComponentInterpretation::component_is_scalar); data_out_face.add_data_vector(dof_handler, solution, face_name, face_component_type); data_out_face.build_patches(fe.degree); data_out_face.write_vtk(face_output); } // @sect4{HDG::refine_grid} // We implement two different refinement cases for HDG, just as in // Step-7: adaptive_refinement and global_refinement. The // global_refinement option recreates the entire triangulation every // time. This is because we want to use a finer sequence of meshes than what // we would get with one refinement step, namely 2, 3, 4, 6, 8, 12, 16, ... // elements per direction. // The adaptive_refinement mode uses the KellyErrorEstimator to // give a decent indication of the non-regular regions in the scalar local // solutions. template void HDG::refine_grid(const unsigned int cycle) { if (cycle == 0) { GridGenerator::subdivided_hyper_cube(triangulation, 2, -1, 1); triangulation.refine_global(3 - dim); } else switch (refinement_mode) { case global_refinement: { triangulation.clear(); GridGenerator::subdivided_hyper_cube(triangulation, 2 + (cycle % 2), -1, 1); triangulation.refine_global(3 - dim + cycle / 2); break; } case adaptive_refinement: { Vector estimated_error_per_cell( triangulation.n_active_cells()); FEValuesExtractors::Scalar scalar(dim); std::map *> neumann_boundary; KellyErrorEstimator::estimate(dof_handler_local, QGauss(fe.degree + 1), neumann_boundary, solution_local, estimated_error_per_cell, fe_local.component_mask( scalar)); GridRefinement::refine_and_coarsen_fixed_number( triangulation, estimated_error_per_cell, 0.3, 0.); triangulation.execute_coarsening_and_refinement(); break; } default: { Assert(false, ExcNotImplemented()); } } // Just as in step-7, we set the boundary indicator of two of the faces to 1 // where we want to specify Neumann boundary conditions instead of Dirichlet // conditions. Since we re-create the triangulation every time for global // refinement, the flags are set in every refinement step, not just at the // beginning. for (const auto &cell : triangulation.cell_iterators()) for (const auto &face : cell->face_iterators()) if (face->at_boundary()) if ((std::fabs(face->center()(0) - (-1)) < 1e-12) || (std::fabs(face->center()(1) - (-1)) < 1e-12)) face->set_boundary_id(1); } // @sect4{HDG::run} // The functionality here is basically the same as Step-7. // We loop over 10 cycles, refining the grid on each one. At the end, // convergence tables are created. template void HDG::run() { for (unsigned int cycle = 0; cycle < 10; ++cycle) { std::cout << "Cycle " << cycle << ':' << std::endl; refine_grid(cycle); setup_system(); assemble_system(false); solve(); postprocess(); output_results(cycle); } // There is one minor change for the convergence table compared to step-7: // Since we did not refine our mesh by a factor two in each cycle (but // rather used the sequence 2, 3, 4, 6, 8, 12, ...), we need to tell the // convergence rate evaluation about this. We do this by setting the // number of cells as a reference column and additionally specifying the // dimension of the problem, which gives the necessary information for the // relation between number of cells and mesh size. if (refinement_mode == global_refinement) { convergence_table.evaluate_convergence_rates( "val L2", "cells", ConvergenceTable::reduction_rate_log2, dim); convergence_table.evaluate_convergence_rates( "grad L2", "cells", ConvergenceTable::reduction_rate_log2, dim); convergence_table.evaluate_convergence_rates( "val L2-post", "cells", ConvergenceTable::reduction_rate_log2, dim); } convergence_table.write_text(std::cout); } } // end of namespace Step51 int main() { const unsigned int dim = 2; try { // Now for the three calls to the main class in complete analogy to // step-7. { std::cout << "Solving with Q1 elements, adaptive refinement" << std::endl << "=============================================" << std::endl << std::endl; Step51::HDG hdg_problem(1, Step51::HDG::adaptive_refinement); hdg_problem.run(); std::cout << std::endl; } { std::cout << "Solving with Q1 elements, global refinement" << std::endl << "===========================================" << std::endl << std::endl; Step51::HDG hdg_problem(1, Step51::HDG::global_refinement); hdg_problem.run(); std::cout << std::endl; } { std::cout << "Solving with Q3 elements, global refinement" << std::endl << "===========================================" << std::endl << std::endl; Step51::HDG hdg_problem(3, Step51::HDG::global_refinement); hdg_problem.run(); std::cout << std::endl; } } catch (std::exception &exc) { std::cerr << std::endl << std::endl << "----------------------------------------------------" << std::endl; std::cerr << "Exception on processing: " << std::endl << exc.what() << std::endl << "Aborting!" << std::endl << "----------------------------------------------------" << std::endl; return 1; } catch (...) { std::cerr << std::endl << std::endl << "----------------------------------------------------" << std::endl; std::cerr << "Unknown exception!" << std::endl << "Aborting!" << std::endl << "----------------------------------------------------" << std::endl; return 1; } return 0; }