1 // ---------------------------------------------------------------------
2 //
3 // Copyright (C) 2017 - 2020 by the deal.II authors
4 //
5 // This file is part of the deal.II library.
6 //
7 // The deal.II library is free software; you can use it, redistribute
8 // it, and/or modify it under the terms of the GNU Lesser General
9 // Public License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 // The full text of the license can be found in the file LICENSE.md at
12 // the top level directory of deal.II.
13 //
14 // ---------------------------------------------------------------------
15 
16 
17 #ifndef dealii_matrix_free_evaluation_kernels_h
18 #define dealii_matrix_free_evaluation_kernels_h
19 
20 #include <deal.II/base/config.h>
21 
22 #include <deal.II/base/utilities.h>
23 #include <deal.II/base/vectorization.h>
24 
25 #include <deal.II/matrix_free/dof_info.h>
26 #include <deal.II/matrix_free/evaluation_flags.h>
27 #include <deal.II/matrix_free/shape_info.h>
28 #include <deal.II/matrix_free/tensor_product_kernels.h>
29 #include <deal.II/matrix_free/type_traits.h>
30 
31 
32 DEAL_II_NAMESPACE_OPEN
33 
34 
35 // forward declaration
36 template <int, typename, bool, typename>
37 class FEEvaluationBaseData;
38 
39 
40 
41 namespace internal
42 {
43   // Select evaluator type from element shape function type
44   template <MatrixFreeFunctions::ElementType element, bool is_long>
45   struct EvaluatorSelector
46   {};
47 
48   template <bool is_long>
49   struct EvaluatorSelector<MatrixFreeFunctions::tensor_general, is_long>
50   {
51     static const EvaluatorVariant variant = evaluate_general;
52   };
53 
54   template <>
55   struct EvaluatorSelector<MatrixFreeFunctions::tensor_symmetric, false>
56   {
57     static const EvaluatorVariant variant = evaluate_symmetric;
58   };
59 
60   template <>
61   struct EvaluatorSelector<MatrixFreeFunctions::tensor_symmetric, true>
62   {
63     static const EvaluatorVariant variant = evaluate_evenodd;
64   };
65 
66   template <bool is_long>
67   struct EvaluatorSelector<MatrixFreeFunctions::truncated_tensor, is_long>
68   {
69     static const EvaluatorVariant variant = evaluate_general;
70   };
71 
72   template <>
73   struct EvaluatorSelector<MatrixFreeFunctions::tensor_symmetric_plus_dg0,
74                            false>
75   {
76     static const EvaluatorVariant variant = evaluate_general;
77   };
78 
79   template <>
80   struct EvaluatorSelector<MatrixFreeFunctions::tensor_symmetric_plus_dg0, true>
81   {
82     static const EvaluatorVariant variant = evaluate_evenodd;
83   };
84 
85   template <bool is_long>
86   struct EvaluatorSelector<MatrixFreeFunctions::tensor_symmetric_collocation,
87                            is_long>
88   {
89     static const EvaluatorVariant variant = evaluate_evenodd;
90   };
91 
92 
93 
94   /**
95    * This struct performs the evaluation of function values, gradients and
96    * Hessians for tensor-product finite elements. The operation is used for
97    * both the symmetric and non-symmetric case, which use different apply
98    * functions 'values', 'gradients' in the individual coordinate
99    * directions. The apply functions for values are provided through one of
100    * the template classes EvaluatorTensorProduct which in turn are selected
101    * from the MatrixFreeFunctions::ElementType template argument.
102    *
103    * There are two specialized implementation classes
104    * FEEvaluationImplCollocation (for Gauss-Lobatto elements where the nodal
105    * points and the quadrature points coincide and the 'values' operation is
106    * identity) and FEEvaluationImplTransformToCollocation (which can be
107    * transformed to a collocation space and can then use the identity in these
108    * spaces), which both allow for shorter code.
109    */
110   template <MatrixFreeFunctions::ElementType type,
111             int                              dim,
112             int                              fe_degree,
113             int                              n_q_points_1d,
114             typename Number>
115   struct FEEvaluationImpl
116   {
117     static void
118     evaluate(const unsigned int                            n_components,
119              const EvaluationFlags::EvaluationFlags        evaluation_flag,
120              const MatrixFreeFunctions::ShapeInfo<Number> &shape_info,
121              const Number *                                values_dofs_actual,
122              Number *                                      values_quad,
123              Number *                                      gradients_quad,
124              Number *                                      hessians_quad,
125              Number *                                      scratch_data);
126 
127     static void
128     integrate(const unsigned int                            n_components,
129               const EvaluationFlags::EvaluationFlags        integration_flag,
130               const MatrixFreeFunctions::ShapeInfo<Number> &shape_info,
131               Number *                                      values_dofs_actual,
132               Number *                                      values_quad,
133               Number *                                      gradients_quad,
134               Number *                                      scratch_data,
135               const bool add_into_values_array);
136   };
137 
138 
139 
140   template <MatrixFreeFunctions::ElementType type,
141             int                              dim,
142             int                              fe_degree,
143             int                              n_q_points_1d,
144             typename Number>
145   inline void
146   FEEvaluationImpl<type, dim, fe_degree, n_q_points_1d, Number>::evaluate(
147     const unsigned int                            n_components,
148     const EvaluationFlags::EvaluationFlags        evaluation_flag,
149     const MatrixFreeFunctions::ShapeInfo<Number> &shape_info,
150     const Number *                                values_dofs_actual,
151     Number *                                      values_quad,
152     Number *                                      gradients_quad,
153     Number *                                      hessians_quad,
154     Number *                                      scratch_data)
155   {
156     if (evaluation_flag == EvaluationFlags::nothing)
157       return;
158 
159     const EvaluatorVariant variant =
160       EvaluatorSelector<type, (fe_degree + n_q_points_1d > 4)>::variant;
161     using Eval = EvaluatorTensorProduct<variant,
162                                         dim,
163                                         fe_degree + 1,
164                                         n_q_points_1d,
165                                         Number>;
166     Eval eval(variant == evaluate_evenodd ?
167                 shape_info.data.front().shape_values_eo :
168                 shape_info.data.front().shape_values,
169               variant == evaluate_evenodd ?
170                 shape_info.data.front().shape_gradients_eo :
171                 shape_info.data.front().shape_gradients,
172               variant == evaluate_evenodd ?
173                 shape_info.data.front().shape_hessians_eo :
174                 shape_info.data.front().shape_hessians,
175               shape_info.data.front().fe_degree + 1,
176               shape_info.data.front().n_q_points_1d);
177 
178     const unsigned int temp_size =
179       Eval::n_rows_of_product == numbers::invalid_unsigned_int ?
180         0 :
181         (Eval::n_rows_of_product > Eval::n_columns_of_product ?
182            Eval::n_rows_of_product :
183            Eval::n_columns_of_product);
184     Number *temp1 = scratch_data;
185     Number *temp2;
186     if (temp_size == 0)
187       {
188         temp2 = temp1 + std::max(Utilities::fixed_power<dim>(
189                                    shape_info.data.front().fe_degree + 1),
190                                  Utilities::fixed_power<dim>(
191                                    shape_info.data.front().n_q_points_1d));
192       }
193     else
194       {
195         temp2 = temp1 + temp_size;
196       }
197 
198     const unsigned int n_q_points =
199       temp_size == 0 ? shape_info.n_q_points : Eval::n_columns_of_product;
200     const unsigned int dofs_per_comp =
201       (type == MatrixFreeFunctions::truncated_tensor) ?
202         Utilities::fixed_power<dim>(shape_info.data.front().fe_degree + 1) :
203         shape_info.dofs_per_component_on_cell;
204     const Number *values_dofs = values_dofs_actual;
205     if (type == MatrixFreeFunctions::truncated_tensor)
206       {
207         Number *values_dofs_tmp =
208           scratch_data + 2 * (std::max(shape_info.dofs_per_component_on_cell,
209                                        shape_info.n_q_points));
210         const int degree =
211           fe_degree != -1 ? fe_degree : shape_info.data.front().fe_degree;
212         for (unsigned int c = 0; c < n_components; ++c)
213           for (int i = 0, count_p = 0, count_q = 0;
214                i < (dim > 2 ? degree + 1 : 1);
215                ++i)
216             {
217               for (int j = 0; j < (dim > 1 ? degree + 1 - i : 1); ++j)
218                 {
219                   for (int k = 0; k < degree + 1 - j - i;
220                        ++k, ++count_p, ++count_q)
221                     values_dofs_tmp[c * dofs_per_comp + count_q] =
222                       values_dofs_actual
223                         [c * shape_info.dofs_per_component_on_cell + count_p];
224                   for (int k = degree + 1 - j - i; k < degree + 1;
225                        ++k, ++count_q)
226                     values_dofs_tmp[c * dofs_per_comp + count_q] = Number();
227                 }
228               for (int j = degree + 1 - i; j < degree + 1; ++j)
229                 for (int k = 0; k < degree + 1; ++k, ++count_q)
230                   values_dofs_tmp[c * dofs_per_comp + count_q] = Number();
231             }
232         values_dofs = values_dofs_tmp;
233       }
234 
235     switch (dim)
236       {
237         case 1:
238           for (unsigned int c = 0; c < n_components; c++)
239             {
240               if (evaluation_flag & EvaluationFlags::values)
241                 eval.template values<0, true, false>(values_dofs, values_quad);
242               if (evaluation_flag & EvaluationFlags::gradients)
243                 eval.template gradients<0, true, false>(values_dofs,
244                                                         gradients_quad);
245               if (evaluation_flag & EvaluationFlags::hessians)
246                 eval.template hessians<0, true, false>(values_dofs,
247                                                        hessians_quad);
248 
249               // advance the next component in 1D array
250               values_dofs += dofs_per_comp;
251               values_quad += n_q_points;
252               gradients_quad += n_q_points;
253               hessians_quad += n_q_points;
254             }
255           break;
256 
257         case 2:
258           for (unsigned int c = 0; c < n_components; c++)
259             {
260               // grad x
261               if (evaluation_flag & EvaluationFlags::gradients)
262                 {
263                   eval.template gradients<0, true, false>(values_dofs, temp1);
264                   eval.template values<1, true, false>(temp1, gradients_quad);
265                 }
266               if (evaluation_flag & EvaluationFlags::hessians)
267                 {
268                   // grad xy
269                   if (!(evaluation_flag & EvaluationFlags::gradients))
270                     eval.template gradients<0, true, false>(values_dofs, temp1);
271                   eval.template gradients<1, true, false>(temp1,
272                                                           hessians_quad +
273                                                             2 * n_q_points);
274 
275                   // grad xx
276                   eval.template hessians<0, true, false>(values_dofs, temp1);
277                   eval.template values<1, true, false>(temp1, hessians_quad);
278                 }
279 
280               // grad y
281               eval.template values<0, true, false>(values_dofs, temp1);
282               if (evaluation_flag & EvaluationFlags::gradients)
283                 eval.template gradients<1, true, false>(temp1,
284                                                         gradients_quad +
285                                                           n_q_points);
286 
287               // grad yy
288               if (evaluation_flag & EvaluationFlags::hessians)
289                 eval.template hessians<1, true, false>(temp1,
290                                                        hessians_quad +
291                                                          n_q_points);
292 
293               // val: can use values applied in x
294               if (evaluation_flag & EvaluationFlags::values)
295                 eval.template values<1, true, false>(temp1, values_quad);
296 
297               // advance to the next component in 1D array
298               values_dofs += dofs_per_comp;
299               values_quad += n_q_points;
300               gradients_quad += 2 * n_q_points;
301               hessians_quad += 3 * n_q_points;
302             }
303           break;
304 
305         case 3:
306           for (unsigned int c = 0; c < n_components; c++)
307             {
308               if (evaluation_flag & EvaluationFlags::gradients)
309                 {
310                   // grad x
311                   eval.template gradients<0, true, false>(values_dofs, temp1);
312                   eval.template values<1, true, false>(temp1, temp2);
313                   eval.template values<2, true, false>(temp2, gradients_quad);
314                 }
315 
316               if (evaluation_flag & EvaluationFlags::hessians)
317                 {
318                   // grad xz
319                   if (!(evaluation_flag & EvaluationFlags::gradients))
320                     {
321                       eval.template gradients<0, true, false>(values_dofs,
322                                                               temp1);
323                       eval.template values<1, true, false>(temp1, temp2);
324                     }
325                   eval.template gradients<2, true, false>(temp2,
326                                                           hessians_quad +
327                                                             4 * n_q_points);
328 
329                   // grad xy
330                   eval.template gradients<1, true, false>(temp1, temp2);
331                   eval.template values<2, true, false>(temp2,
332                                                        hessians_quad +
333                                                          3 * n_q_points);
334 
335                   // grad xx
336                   eval.template hessians<0, true, false>(values_dofs, temp1);
337                   eval.template values<1, true, false>(temp1, temp2);
338                   eval.template values<2, true, false>(temp2, hessians_quad);
339                 }
340 
341               // grad y
342               eval.template values<0, true, false>(values_dofs, temp1);
343               if (evaluation_flag & EvaluationFlags::gradients)
344                 {
345                   eval.template gradients<1, true, false>(temp1, temp2);
346                   eval.template values<2, true, false>(temp2,
347                                                        gradients_quad +
348                                                          n_q_points);
349                 }
350 
351               if (evaluation_flag & EvaluationFlags::hessians)
352                 {
353                   // grad yz
354                   if (!(evaluation_flag & EvaluationFlags::gradients))
355                     eval.template gradients<1, true, false>(temp1, temp2);
356                   eval.template gradients<2, true, false>(temp2,
357                                                           hessians_quad +
358                                                             5 * n_q_points);
359 
360                   // grad yy
361                   eval.template hessians<1, true, false>(temp1, temp2);
362                   eval.template values<2, true, false>(temp2,
363                                                        hessians_quad +
364                                                          n_q_points);
365                 }
366 
367               // grad z: can use the values applied in x direction stored in
368               // temp1
369               eval.template values<1, true, false>(temp1, temp2);
370               if (evaluation_flag & EvaluationFlags::gradients)
371                 eval.template gradients<2, true, false>(temp2,
372                                                         gradients_quad +
373                                                           2 * n_q_points);
374 
375               // grad zz: can use the values applied in x and y direction stored
376               // in temp2
377               if (evaluation_flag & EvaluationFlags::hessians)
378                 eval.template hessians<2, true, false>(temp2,
379                                                        hessians_quad +
380                                                          2 * n_q_points);
381 
382               // val: can use the values applied in x & y direction stored in
383               // temp2
384               if (evaluation_flag & EvaluationFlags::values)
385                 eval.template values<2, true, false>(temp2, values_quad);
386 
387               // advance to the next component in 1D array
388               values_dofs += dofs_per_comp;
389               values_quad += n_q_points;
390               gradients_quad += 3 * n_q_points;
391               hessians_quad += 6 * n_q_points;
392             }
393           break;
394 
395         default:
396           AssertThrow(false, ExcNotImplemented());
397       }
398 
399     // case additional dof for FE_Q_DG0: add values; gradients and second
400     // derivatives evaluate to zero
401     if (type == MatrixFreeFunctions::tensor_symmetric_plus_dg0 &&
402         (evaluation_flag & EvaluationFlags::values))
403       {
404         values_quad -= n_components * n_q_points;
405         values_dofs -= n_components * dofs_per_comp;
406         for (unsigned int c = 0; c < n_components; ++c)
407           for (unsigned int q = 0; q < shape_info.n_q_points; ++q)
408             values_quad[c * shape_info.n_q_points + q] +=
409               values_dofs[(c + 1) * shape_info.dofs_per_component_on_cell - 1];
410       }
411   }
412 
413 
414 
415   template <MatrixFreeFunctions::ElementType type,
416             int                              dim,
417             int                              fe_degree,
418             int                              n_q_points_1d,
419             typename Number>
420   inline void
421   FEEvaluationImpl<type, dim, fe_degree, n_q_points_1d, Number>::integrate(
422     const unsigned int                            n_components,
423     const EvaluationFlags::EvaluationFlags        integration_flag,
424     const MatrixFreeFunctions::ShapeInfo<Number> &shape_info,
425     Number *                                      values_dofs_actual,
426     Number *                                      values_quad,
427     Number *                                      gradients_quad,
428     Number *                                      scratch_data,
429     const bool                                    add_into_values_array)
430   {
431     const EvaluatorVariant variant =
432       EvaluatorSelector<type, (fe_degree + n_q_points_1d > 4)>::variant;
433     using Eval = EvaluatorTensorProduct<variant,
434                                         dim,
435                                         fe_degree + 1,
436                                         n_q_points_1d,
437                                         Number>;
438     Eval eval(variant == evaluate_evenodd ?
439                 shape_info.data.front().shape_values_eo :
440                 shape_info.data.front().shape_values,
441               variant == evaluate_evenodd ?
442                 shape_info.data.front().shape_gradients_eo :
443                 shape_info.data.front().shape_gradients,
444               variant == evaluate_evenodd ?
445                 shape_info.data.front().shape_hessians_eo :
446                 shape_info.data.front().shape_hessians,
447               shape_info.data.front().fe_degree + 1,
448               shape_info.data.front().n_q_points_1d);
449 
450     const unsigned int temp_size =
451       Eval::n_rows_of_product == numbers::invalid_unsigned_int ?
452         0 :
453         (Eval::n_rows_of_product > Eval::n_columns_of_product ?
454            Eval::n_rows_of_product :
455            Eval::n_columns_of_product);
456     Number *temp1 = scratch_data;
457     Number *temp2;
458     if (temp_size == 0)
459       {
460         temp2 = temp1 + std::max(Utilities::fixed_power<dim>(
461                                    shape_info.data.front().fe_degree + 1),
462                                  Utilities::fixed_power<dim>(
463                                    shape_info.data.front().n_q_points_1d));
464       }
465     else
466       {
467         temp2 = temp1 + temp_size;
468       }
469 
470     const unsigned int n_q_points =
471       temp_size == 0 ? shape_info.n_q_points : Eval::n_columns_of_product;
472     const unsigned int dofs_per_comp =
473       (type == MatrixFreeFunctions::truncated_tensor) ?
474         Utilities::fixed_power<dim>(shape_info.data.front().fe_degree + 1) :
475         shape_info.dofs_per_component_on_cell;
476     // expand dof_values to tensor product for truncated tensor products
477     Number *values_dofs =
478       (type == MatrixFreeFunctions::truncated_tensor) ?
479         scratch_data + 2 * (std::max(shape_info.dofs_per_component_on_cell,
480                                      shape_info.n_q_points)) :
481         values_dofs_actual;
482 
483     switch (dim)
484       {
485         case 1:
486           for (unsigned int c = 0; c < n_components; c++)
487             {
488               if (integration_flag & EvaluationFlags::values)
489                 {
490                   if (add_into_values_array == false)
491                     eval.template values<0, false, false>(values_quad,
492                                                           values_dofs);
493                   else
494                     eval.template values<0, false, true>(values_quad,
495                                                          values_dofs);
496                 }
497               if (integration_flag & EvaluationFlags::gradients)
498                 {
499                   if (integration_flag & EvaluationFlags::values ||
500                       add_into_values_array == true)
501                     eval.template gradients<0, false, true>(gradients_quad,
502                                                             values_dofs);
503                   else
504                     eval.template gradients<0, false, false>(gradients_quad,
505                                                              values_dofs);
506                 }
507 
508               // advance to the next component in 1D array
509               values_dofs += dofs_per_comp;
510               values_quad += n_q_points;
511               gradients_quad += n_q_points;
512             }
513           break;
514 
515         case 2:
516           for (unsigned int c = 0; c < n_components; c++)
517             {
518               if ((integration_flag & EvaluationFlags::values) &&
519                   !(integration_flag & EvaluationFlags::gradients))
520                 {
521                   eval.template values<1, false, false>(values_quad, temp1);
522                   if (add_into_values_array == false)
523                     eval.template values<0, false, false>(temp1, values_dofs);
524                   else
525                     eval.template values<0, false, true>(temp1, values_dofs);
526                 }
527               if (integration_flag & EvaluationFlags::gradients)
528                 {
529                   eval.template gradients<1, false, false>(gradients_quad +
530                                                              n_q_points,
531                                                            temp1);
532                   if (integration_flag & EvaluationFlags::values)
533                     eval.template values<1, false, true>(values_quad, temp1);
534                   if (add_into_values_array == false)
535                     eval.template values<0, false, false>(temp1, values_dofs);
536                   else
537                     eval.template values<0, false, true>(temp1, values_dofs);
538                   eval.template values<1, false, false>(gradients_quad, temp1);
539                   eval.template gradients<0, false, true>(temp1, values_dofs);
540                 }
541 
542               // advance to the next component in 1D array
543               values_dofs += dofs_per_comp;
544               values_quad += n_q_points;
545               gradients_quad += 2 * n_q_points;
546             }
547           break;
548 
549         case 3:
550           for (unsigned int c = 0; c < n_components; c++)
551             {
552               if ((integration_flag & EvaluationFlags::values) &&
553                   !(integration_flag & EvaluationFlags::gradients))
554                 {
555                   eval.template values<2, false, false>(values_quad, temp1);
556                   eval.template values<1, false, false>(temp1, temp2);
557                   if (add_into_values_array == false)
558                     eval.template values<0, false, false>(temp2, values_dofs);
559                   else
560                     eval.template values<0, false, true>(temp2, values_dofs);
561                 }
562               if (integration_flag & EvaluationFlags::gradients)
563                 {
564                   eval.template gradients<2, false, false>(gradients_quad +
565                                                              2 * n_q_points,
566                                                            temp1);
567                   if (integration_flag & EvaluationFlags::values)
568                     eval.template values<2, false, true>(values_quad, temp1);
569                   eval.template values<1, false, false>(temp1, temp2);
570                   eval.template values<2, false, false>(gradients_quad +
571                                                           n_q_points,
572                                                         temp1);
573                   eval.template gradients<1, false, true>(temp1, temp2);
574                   if (add_into_values_array == false)
575                     eval.template values<0, false, false>(temp2, values_dofs);
576                   else
577                     eval.template values<0, false, true>(temp2, values_dofs);
578                   eval.template values<2, false, false>(gradients_quad, temp1);
579                   eval.template values<1, false, false>(temp1, temp2);
580                   eval.template gradients<0, false, true>(temp2, values_dofs);
581                 }
582 
583               // advance to the next component in 1D array
584               values_dofs += dofs_per_comp;
585               values_quad += n_q_points;
586               gradients_quad += 3 * n_q_points;
587             }
588           break;
589 
590         default:
591           AssertThrow(false, ExcNotImplemented());
592       }
593 
594     // case FE_Q_DG0: add values, gradients and second derivatives are zero
595     if (type == MatrixFreeFunctions::tensor_symmetric_plus_dg0)
596       {
597         values_dofs -= n_components * dofs_per_comp -
598                        shape_info.dofs_per_component_on_cell + 1;
599         values_quad -= n_components * n_q_points;
600         if (integration_flag & EvaluationFlags::values)
601           for (unsigned int c = 0; c < n_components; ++c)
602             {
603               values_dofs[0] = values_quad[0];
604               for (unsigned int q = 1; q < shape_info.n_q_points; ++q)
605                 values_dofs[0] += values_quad[q];
606               values_dofs += dofs_per_comp;
607               values_quad += n_q_points;
608             }
609         else
610           {
611             for (unsigned int c = 0; c < n_components; ++c)
612               values_dofs[c * shape_info.dofs_per_component_on_cell] = Number();
613             values_dofs += n_components * shape_info.dofs_per_component_on_cell;
614           }
615       }
616 
617     if (type == MatrixFreeFunctions::truncated_tensor)
618       {
619         values_dofs -= dofs_per_comp * n_components;
620         const int degree =
621           fe_degree != -1 ? fe_degree : shape_info.data.front().fe_degree;
622         for (unsigned int c = 0; c < n_components; ++c)
623           for (int i = 0, count_p = 0, count_q = 0;
624                i < (dim > 2 ? degree + 1 : 1);
625                ++i)
626             {
627               for (int j = 0; j < (dim > 1 ? degree + 1 - i : 1); ++j)
628                 {
629                   for (int k = 0; k < degree + 1 - j - i;
630                        ++k, ++count_p, ++count_q)
631                     values_dofs_actual[c *
632                                          shape_info.dofs_per_component_on_cell +
633                                        count_p] =
634                       values_dofs[c * dofs_per_comp + count_q];
635                   count_q += j + i;
636                 }
637               count_q += i * (degree + 1);
638             }
639       }
640   }
641 
642 
643 
644   /**
645    * This struct implements the change between two different bases. This is an
646    * ingredient in the FEEvaluationImplTransformToCollocation class where we
647    * first transform to the appropriate basis where we can compute the
648    * derivative through collocation techniques.
649    *
650    * This class allows for dimension-independent application of the operation,
651    * implemented by template recursion. It has been tested up to 6D.
652    */
653   template <EvaluatorVariant  variant,
654             EvaluatorQuantity quantity,
655             int               dim,
656             int               basis_size_1,
657             int               basis_size_2,
658             typename Number,
659             typename Number2>
660   struct FEEvaluationImplBasisChange
661   {
662     static_assert(basis_size_1 == 0 || basis_size_1 <= basis_size_2,
663                   "The second dimension must not be smaller than the first");
664 
665     /**
666      * This applies the transformation that contracts over the rows of the
667      * coefficient array, generating values along the columns of the
668      * coefficient array.
669      *
670      * @param transformation_matrix The coefficient matrix handed in as a
671      *                     vector, using @p basis_size_1 rows and @p basis_size_2
672      *                     columns if interpreted as a matrix.
673      * @param values_in    The array of the input of size basis_size_1^dim. It
674      *                     may alias with values_out
675      * @param values_out   The array of size basis_size_2^dim where the results
676      *                     of the transformation are stored. It may alias with
677      *                     the values_in array.
678      * @param basis_size_1_variable In case the template argument basis_size_1
679      * is zero, the size of the first basis can alternatively be passed in as a
680      * run time argument. The template argument takes precedence in case it is
681      * nonzero for efficiency reasons.
682      * @param basis_size_2_variable In case the template argument basis_size_1
683      * is zero, the size of the second basis can alternatively be passed in as a
684      * run time argument.
685      */
686 #ifndef DEBUG
687     DEAL_II_ALWAYS_INLINE
688 #endif
689     static void
690     do_forward(
691       const unsigned int            n_components,
692       const AlignedVector<Number2> &transformation_matrix,
693       const Number *                values_in,
694       Number *                      values_out,
695       const unsigned int basis_size_1_variable = numbers::invalid_unsigned_int,
696       const unsigned int basis_size_2_variable = numbers::invalid_unsigned_int)
697     {
698       Assert(
699         basis_size_1 != 0 || basis_size_1_variable <= basis_size_2_variable,
700         ExcMessage("The second dimension must not be smaller than the first"));
701 
702       Assert(quantity == EvaluatorQuantity::value, ExcInternalError());
703 
704       // we do recursion until dim==1 or dim==2 and we have
705       // basis_size_1==basis_size_2. The latter optimization increases
706       // optimization possibilities for the compiler but does only work for
707       // aliased pointers if the sizes are equal.
708       constexpr int next_dim =
709         (dim > 2 ||
710          ((basis_size_1 == 0 || basis_size_2 > basis_size_1) && dim > 1)) ?
711           dim - 1 :
712           dim;
713 
714       EvaluatorTensorProduct<variant,
715                              dim,
716                              basis_size_1,
717                              (basis_size_1 == 0 ? 0 : basis_size_2),
718                              Number,
719                              Number2>
720                          eval_val(transformation_matrix,
721                  AlignedVector<Number2>(),
722                  AlignedVector<Number2>(),
723                  basis_size_1_variable,
724                  basis_size_2_variable);
725       const unsigned int np_1 =
726         basis_size_1 > 0 ? basis_size_1 : basis_size_1_variable;
727       const unsigned int np_2 =
728         basis_size_1 > 0 ? basis_size_2 : basis_size_2_variable;
729       Assert(np_1 > 0 && np_1 != numbers::invalid_unsigned_int,
730              ExcMessage("Cannot transform with 0-point basis"));
731       Assert(np_2 > 0 && np_2 != numbers::invalid_unsigned_int,
732              ExcMessage("Cannot transform with 0-point basis"));
733 
734       // run loop backwards to ensure correctness if values_in aliases with
735       // values_out in case with basis_size_1 < basis_size_2
736       values_in = values_in + n_components * Utilities::fixed_power<dim>(np_1);
737       values_out =
738         values_out + n_components * Utilities::fixed_power<dim>(np_2);
739       for (unsigned int c = n_components; c != 0; --c)
740         {
741           values_in -= Utilities::fixed_power<dim>(np_1);
742           values_out -= Utilities::fixed_power<dim>(np_2);
743           if (next_dim < dim)
744             for (unsigned int q = np_1; q != 0; --q)
745               FEEvaluationImplBasisChange<
746                 variant,
747                 quantity,
748                 next_dim,
749                 basis_size_1,
750                 basis_size_2,
751                 Number,
752                 Number2>::do_forward(1,
753                                      transformation_matrix,
754                                      values_in +
755                                        (q - 1) *
756                                          Utilities::fixed_power<next_dim>(np_1),
757                                      values_out +
758                                        (q - 1) *
759                                          Utilities::fixed_power<next_dim>(np_2),
760                                      basis_size_1_variable,
761                                      basis_size_2_variable);
762 
763           // the recursion stops if dim==1 or if dim==2 and
764           // basis_size_1==basis_size_2 (the latter is used because the
765           // compiler generates nicer code)
766           if (basis_size_1 > 0 && basis_size_2 == basis_size_1 && dim == 2)
767             {
768               eval_val.template values<0, true, false>(values_in, values_out);
769               eval_val.template values<1, true, false>(values_out, values_out);
770             }
771           else if (dim == 1)
772             eval_val.template values<dim - 1, true, false>(values_in,
773                                                            values_out);
774           else
775             eval_val.template values<dim - 1, true, false>(values_out,
776                                                            values_out);
777         }
778     }
779 
780     /**
781      * This applies the transformation that contracts over the columns of the
782      * coefficient array, generating values along the rows of the coefficient
783      * array.
784      *
785      * @param transformation_matrix The coefficient matrix handed in as a
786      *                     vector, using @p basis_size_1 rows and @p basis_size_2
787      *                     columns if interpreted as a matrix.
788      * @param add_into_result Define whether the result should be added into the
789      *                     array @p values_out (if true) or overwrite the
790      *                     previous content. The result is undefined in case
791      *                     values_in and values_out point to the same array and
792      *                     @p add_into_result is true, in which case an
793      *                     exception is thrown.
794      * @param values_in    The array of the input of size basis_size_2^dim. It
795      *                     may alias with values_out. Note that the previous
796      *                     content of @p values_in is overwritten within the
797      *                     function.
798      * @param values_out   The array of size basis_size_1^dim where the results
799      *                     of the transformation are stored. It may alias with
800      *                     the @p values_in array.
801      * @param basis_size_1_variable In case the template argument basis_size_1
802      * is zero, the size of the first basis can alternatively be passed in as a
803      * run time argument. The template argument takes precedence in case it is
804      * nonzero for efficiency reasons.
805      * @param basis_size_2_variable In case the template argument basis_size_1
806      * is zero, the size of the second basis can alternatively be passed in as a
807      * run time argument.
808      */
809 #ifndef DEBUG
810     DEAL_II_ALWAYS_INLINE
811 #endif
812     static void
813     do_backward(
814       const unsigned int            n_components,
815       const AlignedVector<Number2> &transformation_matrix,
816       const bool                    add_into_result,
817       Number *                      values_in,
818       Number *                      values_out,
819       const unsigned int basis_size_1_variable = numbers::invalid_unsigned_int,
820       const unsigned int basis_size_2_variable = numbers::invalid_unsigned_int)
821     {
822       Assert(
823         basis_size_1 != 0 || basis_size_1_variable <= basis_size_2_variable,
824         ExcMessage("The second dimension must not be smaller than the first"));
825       Assert(add_into_result == false || values_in != values_out,
826              ExcMessage(
827                "Input and output cannot alias with each other when "
828                "adding the result of the basis change to existing data"));
829 
830       Assert(quantity == EvaluatorQuantity::value ||
831                quantity == EvaluatorQuantity::hessian,
832              ExcInternalError());
833 
834       constexpr int next_dim =
835         (dim > 2 ||
836          ((basis_size_1 == 0 || basis_size_2 > basis_size_1) && dim > 1)) ?
837           dim - 1 :
838           dim;
839       EvaluatorTensorProduct<variant,
840                              dim,
841                              basis_size_1,
842                              (basis_size_1 == 0 ? 0 : basis_size_2),
843                              Number,
844                              Number2>
845                          eval_val(transformation_matrix,
846                  transformation_matrix,
847                  transformation_matrix,
848                  basis_size_1_variable,
849                  basis_size_2_variable);
850       const unsigned int np_1 =
851         basis_size_1 > 0 ? basis_size_1 : basis_size_1_variable;
852       const unsigned int np_2 =
853         basis_size_1 > 0 ? basis_size_2 : basis_size_2_variable;
854       Assert(np_1 > 0 && np_1 != numbers::invalid_unsigned_int,
855              ExcMessage("Cannot transform with 0-point basis"));
856       Assert(np_2 > 0 && np_2 != numbers::invalid_unsigned_int,
857              ExcMessage("Cannot transform with 0-point basis"));
858 
859       for (unsigned int c = 0; c < n_components; ++c)
860         {
861           if (basis_size_1 > 0 && basis_size_2 == basis_size_1 && dim == 2)
862             {
863               if (quantity == EvaluatorQuantity::value)
864                 eval_val.template values<1, false, false>(values_in, values_in);
865               else
866                 eval_val.template hessians<1, false, false>(values_in,
867                                                             values_in);
868 
869               if (add_into_result)
870                 {
871                   if (quantity == EvaluatorQuantity::value)
872                     eval_val.template values<0, false, true>(values_in,
873                                                              values_out);
874                   else
875                     eval_val.template hessians<0, false, true>(values_in,
876                                                                values_out);
877                 }
878               else
879                 {
880                   if (quantity == EvaluatorQuantity::value)
881                     eval_val.template values<0, false, false>(values_in,
882                                                               values_out);
883                   else
884                     eval_val.template hessians<0, false, false>(values_in,
885                                                                 values_out);
886                 }
887             }
888           else
889             {
890               if (dim == 1 && add_into_result)
891                 {
892                   if (quantity == EvaluatorQuantity::value)
893                     eval_val.template values<0, false, true>(values_in,
894                                                              values_out);
895                   else
896                     eval_val.template hessians<0, false, true>(values_in,
897                                                                values_out);
898                 }
899               else if (dim == 1)
900                 {
901                   if (quantity == EvaluatorQuantity::value)
902                     eval_val.template values<0, false, false>(values_in,
903                                                               values_out);
904                   else
905                     eval_val.template hessians<0, false, false>(values_in,
906                                                                 values_out);
907                 }
908               else
909                 {
910                   if (quantity == EvaluatorQuantity::value)
911                     eval_val.template values<dim - 1, false, false>(values_in,
912                                                                     values_in);
913                   else
914                     eval_val.template hessians<dim - 1, false, false>(
915                       values_in, values_in);
916                 }
917             }
918           if (next_dim < dim)
919             for (unsigned int q = 0; q < np_1; ++q)
920               FEEvaluationImplBasisChange<variant,
921                                           quantity,
922                                           next_dim,
923                                           basis_size_1,
924                                           basis_size_2,
925                                           Number,
926                                           Number2>::
927                 do_backward(1,
928                             transformation_matrix,
929                             add_into_result,
930                             values_in +
931                               q * Utilities::fixed_power<next_dim>(np_2),
932                             values_out +
933                               q * Utilities::fixed_power<next_dim>(np_1),
934                             basis_size_1_variable,
935                             basis_size_2_variable);
936 
937           values_in += Utilities::fixed_power<dim>(np_2);
938           values_out += Utilities::fixed_power<dim>(np_1);
939         }
940     }
941 
942     /**
943      * This operation applies a mass-matrix-like operation, consisting of a
944      * do_forward() operation, multiplication by the coefficients in the
945      * quadrature points, and the do_backward() operation.
946      *
947      * @param transformation_matrix The coefficient matrix handed in as a
948      *                     vector, using @p basis_size_1 rows and @p basis_size_2
949      *                     columns if interpreted as a matrix.
950      * @param coefficients The array of coefficients by which the result is
951      *                     multiplied. Its length must be either
952      *                     basis_size_2^dim or n_components*basis_size_2^dim
953      * @param values_in    The array of the input of size basis_size_2^dim. It
954      *                     may alias with values_out
955      * @param scratch_data Array to hold temporary data during the operation.
956      *                     Must be of length basis_size_2^dim
957      * @param values_out   The array of size basis_size_1^dim where the results
958      *                     of the transformation are stored. It may alias with
959      *                     the values_in array.
960      */
961     static void
962     do_mass(const unsigned int            n_components,
963             const AlignedVector<Number2> &transformation_matrix,
964             const AlignedVector<Number> & coefficients,
965             const Number *                values_in,
966             Number *                      scratch_data,
967             Number *                      values_out)
968     {
969       constexpr int next_dim = dim > 1 ? dim - 1 : dim;
970       Number *      my_scratch =
971         basis_size_1 != basis_size_2 ? scratch_data : values_out;
972 
973       const unsigned int size_per_component = Utilities::pow(basis_size_2, dim);
974       Assert(coefficients.size() == size_per_component ||
975                coefficients.size() == n_components * size_per_component,
976              ExcDimensionMismatch(coefficients.size(), size_per_component));
977       const unsigned int stride =
978         coefficients.size() == size_per_component ? 0 : 1;
979 
980       for (unsigned int q = basis_size_1; q != 0; --q)
981         FEEvaluationImplBasisChange<
982           variant,
983           EvaluatorQuantity::value,
984           next_dim,
985           basis_size_1,
986           basis_size_2,
987           Number,
988           Number2>::do_forward(n_components,
989                                transformation_matrix,
990                                values_in +
991                                  (q - 1) *
992                                    Utilities::pow(basis_size_1, dim - 1),
993                                my_scratch +
994                                  (q - 1) *
995                                    Utilities::pow(basis_size_2, dim - 1));
996       EvaluatorTensorProduct<variant,
997                              dim,
998                              basis_size_1,
999                              basis_size_2,
1000                              Number,
1001                              Number2>
1002                          eval_val(transformation_matrix);
1003       const unsigned int n_inner_blocks =
1004         (dim > 1 && basis_size_2 < 10) ? basis_size_2 : 1;
1005       const unsigned int n_blocks = Utilities::pow(basis_size_2, dim - 1);
1006       for (unsigned int ii = 0; ii < n_blocks; ii += n_inner_blocks)
1007         for (unsigned int c = 0; c < n_components; ++c)
1008           {
1009             for (unsigned int i = ii; i < ii + n_inner_blocks; ++i)
1010               eval_val.template values_one_line<dim - 1, true, false>(
1011                 my_scratch + i, my_scratch + i);
1012             for (unsigned int q = 0; q < basis_size_2; ++q)
1013               for (unsigned int i = ii; i < ii + n_inner_blocks; ++i)
1014                 my_scratch[i + q * n_blocks + c * size_per_component] *=
1015                   coefficients[i + q * n_blocks +
1016                                c * stride * size_per_component];
1017             for (unsigned int i = ii; i < ii + n_inner_blocks; ++i)
1018               eval_val.template values_one_line<dim - 1, false, false>(
1019                 my_scratch + i, my_scratch + i);
1020           }
1021       for (unsigned int q = 0; q < basis_size_1; ++q)
1022         FEEvaluationImplBasisChange<
1023           variant,
1024           EvaluatorQuantity::value,
1025           next_dim,
1026           basis_size_1,
1027           basis_size_2,
1028           Number,
1029           Number2>::do_backward(n_components,
1030                                 transformation_matrix,
1031                                 false,
1032                                 my_scratch +
1033                                   q * Utilities::pow(basis_size_2, dim - 1),
1034                                 values_out +
1035                                   q * Utilities::pow(basis_size_1, dim - 1));
1036     }
1037   };
1038 
1039 
1040 
1041   /**
1042    * This struct performs the evaluation of function values, gradients and
1043    * Hessians for tensor-product finite elements. This a specialization for
1044    * elements where the nodal points coincide with the quadrature points like
1045    * FE_Q shape functions on Gauss-Lobatto elements integrated with
1046    * Gauss-Lobatto quadrature. The assumption of this class is that the shape
1047    * 'values' operation is identity, which allows us to write shorter code.
1048    *
1049    * In literature, this form of evaluation is often called spectral
1050    * evaluation, spectral collocation or simply collocation, meaning the same
1051    * location for shape functions and evaluation space (quadrature points).
1052    */
1053   template <int dim, int fe_degree, typename Number>
1054   struct FEEvaluationImplCollocation
1055   {
1056     static void
1057     evaluate(const unsigned int                            n_components,
1058              const EvaluationFlags::EvaluationFlags        evaluation_flag,
1059              const MatrixFreeFunctions::ShapeInfo<Number> &shape_info,
1060              const Number *                                values_dofs,
1061              Number *                                      values_quad,
1062              Number *                                      gradients_quad,
1063              Number *                                      hessians_quad,
1064              Number *                                      scratch_data);
1065 
1066     static void
1067     integrate(const unsigned int                            n_components,
1068               const EvaluationFlags::EvaluationFlags        integration_flag,
1069               const MatrixFreeFunctions::ShapeInfo<Number> &shape_info,
1070               Number *                                      values_dofs,
1071               Number *                                      values_quad,
1072               Number *                                      gradients_quad,
1073               Number *                                      scratch_data,
1074               const bool add_into_values_array);
1075   };
1076 
1077 
1078 
1079   template <int dim, int fe_degree, typename Number>
1080   inline void
1081   FEEvaluationImplCollocation<dim, fe_degree, Number>::evaluate(
1082     const unsigned int                            n_components,
1083     const EvaluationFlags::EvaluationFlags        evaluation_flag,
1084     const MatrixFreeFunctions::ShapeInfo<Number> &shape_info,
1085     const Number *                                values_dofs,
1086     Number *                                      values_quad,
1087     Number *                                      gradients_quad,
1088     Number *                                      hessians_quad,
1089     Number *)
1090   {
1091     AssertDimension(
1092       shape_info.data.front().shape_gradients_collocation_eo.size(),
1093       (fe_degree + 2) / 2 * (fe_degree + 1));
1094 
1095     EvaluatorTensorProduct<evaluate_evenodd,
1096                            dim,
1097                            fe_degree + 1,
1098                            fe_degree + 1,
1099                            Number>
1100                            eval(AlignedVector<Number>(),
1101            shape_info.data.front().shape_gradients_collocation_eo,
1102            shape_info.data.front().shape_hessians_collocation_eo);
1103     constexpr unsigned int n_q_points = Utilities::pow(fe_degree + 1, dim);
1104 
1105     for (unsigned int c = 0; c < n_components; c++)
1106       {
1107         if (evaluation_flag & EvaluationFlags::values)
1108           for (unsigned int i = 0; i < n_q_points; ++i)
1109             values_quad[i] = values_dofs[i];
1110         if (evaluation_flag &
1111             (EvaluationFlags::gradients | EvaluationFlags::hessians))
1112           {
1113             eval.template gradients<0, true, false>(values_dofs,
1114                                                     gradients_quad);
1115             if (dim > 1)
1116               eval.template gradients<1, true, false>(values_dofs,
1117                                                       gradients_quad +
1118                                                         n_q_points);
1119             if (dim > 2)
1120               eval.template gradients<2, true, false>(values_dofs,
1121                                                       gradients_quad +
1122                                                         2 * n_q_points);
1123           }
1124         if (evaluation_flag & EvaluationFlags::hessians)
1125           {
1126             eval.template hessians<0, true, false>(values_dofs, hessians_quad);
1127             if (dim > 1)
1128               {
1129                 eval.template gradients<1, true, false>(gradients_quad,
1130                                                         hessians_quad +
1131                                                           dim * n_q_points);
1132                 eval.template hessians<1, true, false>(values_dofs,
1133                                                        hessians_quad +
1134                                                          n_q_points);
1135               }
1136             if (dim > 2)
1137               {
1138                 eval.template gradients<2, true, false>(gradients_quad,
1139                                                         hessians_quad +
1140                                                           4 * n_q_points);
1141                 eval.template gradients<2, true, false>(
1142                   gradients_quad + n_q_points, hessians_quad + 5 * n_q_points);
1143                 eval.template hessians<2, true, false>(values_dofs,
1144                                                        hessians_quad +
1145                                                          2 * n_q_points);
1146               }
1147             hessians_quad += (dim * (dim + 1)) / 2 * n_q_points;
1148           }
1149         gradients_quad += dim * n_q_points;
1150         values_quad += n_q_points;
1151         values_dofs += n_q_points;
1152       }
1153   }
1154 
1155 
1156 
1157   template <int dim, int fe_degree, typename Number>
1158   inline void
1159   FEEvaluationImplCollocation<dim, fe_degree, Number>::integrate(
1160     const unsigned int                            n_components,
1161     const EvaluationFlags::EvaluationFlags        integration_flag,
1162     const MatrixFreeFunctions::ShapeInfo<Number> &shape_info,
1163     Number *                                      values_dofs,
1164     Number *                                      values_quad,
1165     Number *                                      gradients_quad,
1166     Number *,
1167     const bool add_into_values_array)
1168   {
1169     AssertDimension(
1170       shape_info.data.front().shape_gradients_collocation_eo.size(),
1171       (fe_degree + 2) / 2 * (fe_degree + 1));
1172 
1173     EvaluatorTensorProduct<evaluate_evenodd,
1174                            dim,
1175                            fe_degree + 1,
1176                            fe_degree + 1,
1177                            Number>
1178                            eval(AlignedVector<Number>(),
1179            shape_info.data.front().shape_gradients_collocation_eo,
1180            shape_info.data.front().shape_hessians_collocation_eo);
1181     constexpr unsigned int n_q_points = Utilities::pow(fe_degree + 1, dim);
1182 
1183     for (unsigned int c = 0; c < n_components; c++)
1184       {
1185         if (integration_flag & EvaluationFlags::values)
1186           {
1187             if (add_into_values_array == false)
1188               for (unsigned int i = 0; i < n_q_points; ++i)
1189                 values_dofs[i] = values_quad[i];
1190             else
1191               for (unsigned int i = 0; i < n_q_points; ++i)
1192                 values_dofs[i] += values_quad[i];
1193           }
1194         if (integration_flag & EvaluationFlags::gradients)
1195           {
1196             if (integration_flag & EvaluationFlags::values ||
1197                 add_into_values_array == true)
1198               eval.template gradients<0, false, true>(gradients_quad,
1199                                                       values_dofs);
1200             else
1201               eval.template gradients<0, false, false>(gradients_quad,
1202                                                        values_dofs);
1203             if (dim > 1)
1204               eval.template gradients<1, false, true>(gradients_quad +
1205                                                         n_q_points,
1206                                                       values_dofs);
1207             if (dim > 2)
1208               eval.template gradients<2, false, true>(gradients_quad +
1209                                                         2 * n_q_points,
1210                                                       values_dofs);
1211           }
1212         gradients_quad += dim * n_q_points;
1213         values_quad += n_q_points;
1214         values_dofs += n_q_points;
1215       }
1216   }
1217 
1218 
1219 
1220   /**
1221    * This struct performs the evaluation of function values, gradients and
1222    * Hessians for tensor-product finite elements. This a specialization for
1223    * symmetric basis functions about the mid point 0.5 of the unit interval
1224    * with the same number of quadrature points as degrees of freedom. In that
1225    * case, we can first transform the basis to one that has the nodal points
1226    * in the quadrature points (i.e., the collocation space) and then perform
1227    * the evaluation of the first and second derivatives in this transformed
1228    * space, using the identity operation for the shape values.
1229    */
1230   template <int dim, int fe_degree, int n_q_points_1d, typename Number>
1231   struct FEEvaluationImplTransformToCollocation
1232   {
1233     static void
1234     evaluate(const unsigned int                            n_components,
1235              const EvaluationFlags::EvaluationFlags        evaluation_flag,
1236              const MatrixFreeFunctions::ShapeInfo<Number> &shape_info,
1237              const Number *                                values_dofs,
1238              Number *                                      values_quad,
1239              Number *                                      gradients_quad,
1240              Number *                                      hessians_quad,
1241              Number *                                      scratch_data);
1242 
1243     static void
1244     integrate(const unsigned int                            n_components,
1245               const EvaluationFlags::EvaluationFlags        evaluation_flag,
1246               const MatrixFreeFunctions::ShapeInfo<Number> &shape_info,
1247               Number *                                      values_dofs,
1248               Number *                                      values_quad,
1249               Number *                                      gradients_quad,
1250               Number *                                      scratch_data,
1251               const bool add_into_values_array);
1252   };
1253 
1254 
1255 
1256   template <int dim, int fe_degree, int n_q_points_1d, typename Number>
1257   inline void
1258   FEEvaluationImplTransformToCollocation<
1259     dim,
1260     fe_degree,
1261     n_q_points_1d,
1262     Number>::evaluate(const unsigned int                     n_components,
1263                       const EvaluationFlags::EvaluationFlags evaluation_flag,
1264                       const MatrixFreeFunctions::ShapeInfo<Number> &shape_info,
1265                       const Number *                                values_dofs,
1266                       Number *                                      values_quad,
1267                       Number *gradients_quad,
1268                       Number *hessians_quad,
1269                       Number *)
1270   {
1271     Assert(n_q_points_1d > fe_degree,
1272            ExcMessage("You lose information when going to a collocation space "
1273                       "of lower degree, so the evaluation results would be "
1274                       "wrong. Thus, this class does not permit the desired "
1275                       "operation."));
1276     constexpr unsigned int n_q_points = Utilities::pow(n_q_points_1d, dim);
1277 
1278     for (unsigned int c = 0; c < n_components; c++)
1279       {
1280         FEEvaluationImplBasisChange<
1281           evaluate_evenodd,
1282           EvaluatorQuantity::value,
1283           dim,
1284           (fe_degree >= n_q_points_1d ? n_q_points_1d : fe_degree + 1),
1285           n_q_points_1d,
1286           Number,
1287           Number>::do_forward(1,
1288                               shape_info.data.front().shape_values_eo,
1289                               values_dofs,
1290                               values_quad);
1291 
1292         // apply derivatives in the collocation space
1293         if (evaluation_flag &
1294             (EvaluationFlags::gradients | EvaluationFlags::hessians))
1295           FEEvaluationImplCollocation<dim, n_q_points_1d - 1, Number>::evaluate(
1296             1,
1297             evaluation_flag &
1298               (EvaluationFlags::gradients | EvaluationFlags::hessians),
1299             shape_info,
1300             values_quad,
1301             nullptr,
1302             gradients_quad,
1303             hessians_quad,
1304             nullptr);
1305 
1306         values_dofs += shape_info.dofs_per_component_on_cell;
1307         values_quad += n_q_points;
1308         gradients_quad += dim * n_q_points;
1309         hessians_quad += (dim * (dim + 1)) / 2 * n_q_points;
1310       }
1311   }
1312 
1313 
1314 
1315   template <int dim, int fe_degree, int n_q_points_1d, typename Number>
1316   inline void
1317   FEEvaluationImplTransformToCollocation<
1318     dim,
1319     fe_degree,
1320     n_q_points_1d,
1321     Number>::integrate(const unsigned int                     n_components,
1322                        const EvaluationFlags::EvaluationFlags integration_flag,
1323                        const MatrixFreeFunctions::ShapeInfo<Number> &shape_info,
1324                        Number *values_dofs,
1325                        Number *values_quad,
1326                        Number *gradients_quad,
1327                        Number *,
1328                        const bool add_into_values_array)
1329   {
1330     Assert(n_q_points_1d > fe_degree,
1331            ExcMessage("You lose information when going to a collocation space "
1332                       "of lower degree, so the evaluation results would be "
1333                       "wrong. Thus, this class does not permit the desired "
1334                       "operation."));
1335     AssertDimension(
1336       shape_info.data.front().shape_gradients_collocation_eo.size(),
1337       (n_q_points_1d + 1) / 2 * n_q_points_1d);
1338     constexpr unsigned int n_q_points = Utilities::pow(n_q_points_1d, dim);
1339 
1340     for (unsigned int c = 0; c < n_components; c++)
1341       {
1342         // apply derivatives in collocation space
1343         if (integration_flag & EvaluationFlags::gradients)
1344           FEEvaluationImplCollocation<dim, n_q_points_1d - 1, Number>::
1345             integrate(1,
1346                       integration_flag & EvaluationFlags::gradients,
1347                       shape_info,
1348                       values_quad,
1349                       nullptr,
1350                       gradients_quad,
1351                       nullptr,
1352                       /*add_into_values_array=*/integration_flag &
1353                         EvaluationFlags::values);
1354 
1355         // transform back to the original space
1356         FEEvaluationImplBasisChange<
1357           evaluate_evenodd,
1358           EvaluatorQuantity::value,
1359           dim,
1360           (fe_degree >= n_q_points_1d ? n_q_points_1d : fe_degree + 1),
1361           n_q_points_1d,
1362           Number,
1363           Number>::do_backward(1,
1364                                shape_info.data.front().shape_values_eo,
1365                                add_into_values_array,
1366                                values_quad,
1367                                values_dofs);
1368         gradients_quad += dim * n_q_points;
1369         values_quad += n_q_points;
1370         values_dofs += shape_info.dofs_per_component_on_cell;
1371       }
1372   }
1373 
1374 
1375 
1376   /**
1377    * This class chooses an appropriate evaluation strategy based on the
1378    * template parameters and the shape_info variable which contains runtime
1379    * parameters for the strategy underlying FEEvaluation::evaluate(), i.e.
1380    * this calls internal::FEEvaluationImpl::evaluate(),
1381    * internal::FEEvaluationImplCollocation::evaluate() or
1382    * internal::FEEvaluationImplTransformToCollocation::evaluate() with
1383    * appropriate template parameters. In case the template parameters
1384    * fe_degree and n_q_points_1d contain valid information (i.e. fe_degree>-1
1385    * and n_q_points_1d>0), we simply pass these values to the respective
1386    * template specializations.  Otherwise, we perform a runtime matching of
1387    * the runtime parameters to find the correct specialization. This matching
1388    * currently supports $0\leq fe\_degree \leq 9$ and $degree+1\leq
1389    * n\_q\_points\_1d\leq fe\_degree+2$.
1390    */
1391   template <int dim, typename Number>
1392   struct FEEvaluationImplEvaluateSelector
1393   {
1394     template <int fe_degree, int n_q_points_1d>
1395     static bool
1396     run(const unsigned int                                      n_components,
1397         const EvaluationFlags::EvaluationFlags                  evaluation_flag,
1398         const internal::MatrixFreeFunctions::ShapeInfo<Number> &shape_info,
1399         Number *values_dofs_actual,
1400         Number *values_quad,
1401         Number *gradients_quad,
1402         Number *hessians_quad,
1403         Number *scratch_data)
1404     {
1405       // We enable a transformation to collocation for derivatives if it gives
1406       // correct results (first condition), if it is the most efficient choice
1407       // in terms of operation counts (second condition) and if we were able to
1408       // initialize the fields in shape_info.templates.h from the polynomials
1409       // (third condition).
1410       static constexpr bool use_collocation =
1411         n_q_points_1d > fe_degree && n_q_points_1d <= 3 * fe_degree / 2 + 1 &&
1412         n_q_points_1d < 200;
1413 
1414       if (fe_degree >= 0 && fe_degree + 1 == n_q_points_1d &&
1415           shape_info.element_type ==
1416             internal::MatrixFreeFunctions::tensor_symmetric_collocation)
1417         {
1418           internal::FEEvaluationImplCollocation<dim, fe_degree, Number>::
1419             evaluate(n_components,
1420                      evaluation_flag,
1421                      shape_info,
1422                      values_dofs_actual,
1423                      values_quad,
1424                      gradients_quad,
1425                      hessians_quad,
1426                      scratch_data);
1427         }
1428       // '<=' on type means tensor_symmetric or tensor_symmetric_hermite, see
1429       // shape_info.h for more details
1430       else if (fe_degree >= 0 && use_collocation &&
1431                shape_info.element_type <=
1432                  internal::MatrixFreeFunctions::tensor_symmetric)
1433         {
1434           internal::FEEvaluationImplTransformToCollocation<
1435             dim,
1436             fe_degree,
1437             n_q_points_1d,
1438             Number>::evaluate(n_components,
1439                               evaluation_flag,
1440                               shape_info,
1441                               values_dofs_actual,
1442                               values_quad,
1443                               gradients_quad,
1444                               hessians_quad,
1445                               scratch_data);
1446         }
1447       else if (fe_degree >= 0 &&
1448                shape_info.element_type <=
1449                  internal::MatrixFreeFunctions::tensor_symmetric)
1450         {
1451           internal::FEEvaluationImpl<
1452             internal::MatrixFreeFunctions::tensor_symmetric,
1453             dim,
1454             fe_degree,
1455             n_q_points_1d,
1456             Number>::evaluate(n_components,
1457                               evaluation_flag,
1458                               shape_info,
1459                               values_dofs_actual,
1460                               values_quad,
1461                               gradients_quad,
1462                               hessians_quad,
1463                               scratch_data);
1464         }
1465       else if (shape_info.element_type ==
1466                internal::MatrixFreeFunctions::tensor_symmetric_plus_dg0)
1467         {
1468           internal::FEEvaluationImpl<
1469             internal::MatrixFreeFunctions::tensor_symmetric_plus_dg0,
1470             dim,
1471             fe_degree,
1472             n_q_points_1d,
1473             Number>::evaluate(n_components,
1474                               evaluation_flag,
1475                               shape_info,
1476                               values_dofs_actual,
1477                               values_quad,
1478                               gradients_quad,
1479                               hessians_quad,
1480                               scratch_data);
1481         }
1482       else if (shape_info.element_type ==
1483                internal::MatrixFreeFunctions::truncated_tensor)
1484         {
1485           internal::FEEvaluationImpl<
1486             internal::MatrixFreeFunctions::truncated_tensor,
1487             dim,
1488             fe_degree,
1489             n_q_points_1d,
1490             Number>::evaluate(n_components,
1491                               evaluation_flag,
1492                               shape_info,
1493                               values_dofs_actual,
1494                               values_quad,
1495                               gradients_quad,
1496                               hessians_quad,
1497                               scratch_data);
1498         }
1499       else
1500         {
1501           internal::FEEvaluationImpl<
1502             internal::MatrixFreeFunctions::tensor_general,
1503             dim,
1504             fe_degree,
1505             n_q_points_1d,
1506             Number>::evaluate(n_components,
1507                               evaluation_flag,
1508                               shape_info,
1509                               values_dofs_actual,
1510                               values_quad,
1511                               gradients_quad,
1512                               hessians_quad,
1513                               scratch_data);
1514         }
1515 
1516       return false;
1517     }
1518   };
1519 
1520 
1521 
1522   /**
1523    * This class chooses an appropriate evaluation strategy based on the
1524    * template parameters and the shape_info variable which contains runtime
1525    * parameters for the strategy underlying FEEvaluation::integrate(), i.e.
1526    * this calls internal::FEEvaluationImpl::integrate(),
1527    * internal::FEEvaluationImplCollocation::integrate() or
1528    * internal::FEEvaluationImplTransformToCollocation::integrate() with
1529    * appropriate template parameters. In case the template parameters
1530    * fe_degree and n_q_points_1d contain valid information (i.e. fe_degree>-1
1531    * and n_q_points_1d>0), we simply pass these values to the respective
1532    * template specializations.  Otherwise, we perform a runtime matching of
1533    * the runtime parameters to find the correct specialization. This matching
1534    * currently supports $0\leq fe\_degree \leq 9$ and $degree+1\leq
1535    * n\_q\_points\_1d\leq fe\_degree+2$.
1536    */
1537   template <int dim, typename Number>
1538   struct FEEvaluationImplIntegrateSelector
1539   {
1540     template <int fe_degree, int n_q_points_1d>
1541     static bool
1542     run(const unsigned int                     n_components,
1543         const EvaluationFlags::EvaluationFlags integration_flag,
1544         const internal::MatrixFreeFunctions::ShapeInfo<Number> &shape_info,
1545         Number *   values_dofs_actual,
1546         Number *   values_quad,
1547         Number *   gradients_quad,
1548         Number *   scratch_data,
1549         const bool sum_into_values_array)
1550     {
1551       // We enable a transformation to collocation for derivatives if it gives
1552       // correct results (first condition), if it is the most efficient choice
1553       // in terms of operation counts (second condition) and if we were able to
1554       // initialize the fields in shape_info.templates.h from the polynomials
1555       // (third condition).
1556       constexpr bool use_collocation = n_q_points_1d > fe_degree &&
1557                                        n_q_points_1d <= 3 * fe_degree / 2 + 1 &&
1558                                        n_q_points_1d < 200;
1559 
1560       if (fe_degree >= 0 && fe_degree + 1 == n_q_points_1d &&
1561           shape_info.element_type ==
1562             internal::MatrixFreeFunctions::tensor_symmetric_collocation)
1563         {
1564           internal::FEEvaluationImplCollocation<dim, fe_degree, Number>::
1565             integrate(n_components,
1566                       integration_flag,
1567                       shape_info,
1568                       values_dofs_actual,
1569                       values_quad,
1570                       gradients_quad,
1571                       scratch_data,
1572                       sum_into_values_array);
1573         }
1574       // '<=' on type means tensor_symmetric or tensor_symmetric_hermite, see
1575       // shape_info.h for more details
1576       else if (fe_degree >= 0 && use_collocation &&
1577                shape_info.element_type <=
1578                  internal::MatrixFreeFunctions::tensor_symmetric)
1579         {
1580           internal::FEEvaluationImplTransformToCollocation<
1581             dim,
1582             fe_degree,
1583             n_q_points_1d,
1584             Number>::integrate(n_components,
1585                                integration_flag,
1586                                shape_info,
1587                                values_dofs_actual,
1588                                values_quad,
1589                                gradients_quad,
1590                                scratch_data,
1591                                sum_into_values_array);
1592         }
1593       else if (fe_degree >= 0 &&
1594                shape_info.element_type <=
1595                  internal::MatrixFreeFunctions::tensor_symmetric)
1596         {
1597           internal::FEEvaluationImpl<
1598             internal::MatrixFreeFunctions::tensor_symmetric,
1599             dim,
1600             fe_degree,
1601             n_q_points_1d,
1602             Number>::integrate(n_components,
1603                                integration_flag,
1604                                shape_info,
1605                                values_dofs_actual,
1606                                values_quad,
1607                                gradients_quad,
1608                                scratch_data,
1609                                sum_into_values_array);
1610         }
1611       else if (shape_info.element_type ==
1612                internal::MatrixFreeFunctions::tensor_symmetric_plus_dg0)
1613         {
1614           internal::FEEvaluationImpl<
1615             internal::MatrixFreeFunctions::tensor_symmetric_plus_dg0,
1616             dim,
1617             fe_degree,
1618             n_q_points_1d,
1619             Number>::integrate(n_components,
1620                                integration_flag,
1621                                shape_info,
1622                                values_dofs_actual,
1623                                values_quad,
1624                                gradients_quad,
1625                                scratch_data,
1626                                sum_into_values_array);
1627         }
1628       else if (shape_info.element_type ==
1629                internal::MatrixFreeFunctions::truncated_tensor)
1630         {
1631           internal::FEEvaluationImpl<
1632             internal::MatrixFreeFunctions::truncated_tensor,
1633             dim,
1634             fe_degree,
1635             n_q_points_1d,
1636             Number>::integrate(n_components,
1637                                integration_flag,
1638                                shape_info,
1639                                values_dofs_actual,
1640                                values_quad,
1641                                gradients_quad,
1642                                scratch_data,
1643                                sum_into_values_array);
1644         }
1645       else
1646         {
1647           internal::FEEvaluationImpl<
1648             internal::MatrixFreeFunctions::tensor_general,
1649             dim,
1650             fe_degree,
1651             n_q_points_1d,
1652             Number>::integrate(n_components,
1653                                integration_flag,
1654                                shape_info,
1655                                values_dofs_actual,
1656                                values_quad,
1657                                gradients_quad,
1658                                scratch_data,
1659                                sum_into_values_array);
1660         }
1661 
1662       return false;
1663     }
1664   };
1665 
1666 
1667 
1668   template <bool symmetric_evaluate,
1669             int  dim,
1670             int  fe_degree,
1671             int  n_q_points_1d,
1672             typename Number>
1673   struct FEFaceEvaluationImpl
1674   {
1675     // We enable a transformation to collocation for derivatives if it gives
1676     // correct results (first two conditions), if it is the most efficient
1677     // choice in terms of operation counts (third condition) and if we were
1678     // able to initialize the fields in shape_info.templates.h from the
1679     // polynomials (fourth condition).
1680     static constexpr bool use_collocation =
1681       symmetric_evaluate &&
1682       n_q_points_1d > fe_degree &&n_q_points_1d <= 3 * fe_degree / 2 + 1 &&
1683       n_q_points_1d < 200;
1684 
1685     static void
1686     evaluate_in_face(const unsigned int                            n_components,
1687                      const MatrixFreeFunctions::ShapeInfo<Number> &data,
1688                      Number *                                      values_dofs,
1689                      Number *                                      values_quad,
1690                      Number *           gradients_quad,
1691                      Number *           scratch_data,
1692                      const bool         evaluate_val,
1693                      const bool         evaluate_grad,
1694                      const unsigned int subface_index)
1695     {
1696       const AlignedVector<Number> &val1 =
1697         symmetric_evaluate ?
1698           data.data.front().shape_values_eo :
1699           (subface_index >= GeometryInfo<dim>::max_children_per_cell ?
1700              data.data.front().shape_values :
1701              data.data.front().values_within_subface[subface_index % 2]);
1702       const AlignedVector<Number> &val2 =
1703         symmetric_evaluate ?
1704           data.data.front().shape_values_eo :
1705           (subface_index >= GeometryInfo<dim>::max_children_per_cell ?
1706              data.data.front().shape_values :
1707              data.data.front().values_within_subface[subface_index / 2]);
1708 
1709       const AlignedVector<Number> &grad1 =
1710         symmetric_evaluate ?
1711           data.data.front().shape_gradients_eo :
1712           (subface_index >= GeometryInfo<dim>::max_children_per_cell ?
1713              data.data.front().shape_gradients :
1714              data.data.front().gradients_within_subface[subface_index % 2]);
1715       const AlignedVector<Number> &grad2 =
1716         symmetric_evaluate ?
1717           data.data.front().shape_gradients_eo :
1718           (subface_index >= GeometryInfo<dim>::max_children_per_cell ?
1719              data.data.front().shape_gradients :
1720              data.data.front().gradients_within_subface[subface_index / 2]);
1721 
1722       using Eval =
1723         internal::EvaluatorTensorProduct<symmetric_evaluate ?
1724                                            internal::evaluate_evenodd :
1725                                            internal::evaluate_general,
1726                                          dim - 1,
1727                                          fe_degree + 1,
1728                                          n_q_points_1d,
1729                                          Number>;
1730       Eval eval1(val1,
1731                  grad1,
1732                  AlignedVector<Number>(),
1733                  data.data.front().fe_degree + 1,
1734                  data.data.front().n_q_points_1d);
1735       Eval eval2(val2,
1736                  grad2,
1737                  AlignedVector<Number>(),
1738                  data.data.front().fe_degree + 1,
1739                  data.data.front().n_q_points_1d);
1740 
1741       const unsigned int size_deg =
1742         fe_degree > -1 ?
1743           Utilities::pow(fe_degree + 1, dim - 1) :
1744           (dim > 1 ?
1745              Utilities::fixed_power<dim - 1>(data.data.front().fe_degree + 1) :
1746              1);
1747 
1748       const unsigned int n_q_points = fe_degree > -1 ?
1749                                         Utilities::pow(n_q_points_1d, dim - 1) :
1750                                         data.n_q_points_face;
1751 
1752       if (evaluate_grad == false)
1753         for (unsigned int c = 0; c < n_components; ++c)
1754           {
1755             switch (dim)
1756               {
1757                 case 3:
1758                   eval1.template values<0, true, false>(values_dofs,
1759                                                         values_quad);
1760                   eval2.template values<1, true, false>(values_quad,
1761                                                         values_quad);
1762                   break;
1763                 case 2:
1764                   eval1.template values<0, true, false>(values_dofs,
1765                                                         values_quad);
1766                   break;
1767                 case 1:
1768                   values_quad[0] = values_dofs[0];
1769                   break;
1770                 default:
1771                   Assert(false, ExcNotImplemented());
1772               }
1773             values_dofs += 2 * size_deg;
1774             values_quad += n_q_points;
1775           }
1776       else
1777         for (unsigned int c = 0; c < n_components; ++c)
1778           {
1779             switch (dim)
1780               {
1781                 case 3:
1782                   if (use_collocation)
1783                     {
1784                       eval1.template values<0, true, false>(values_dofs,
1785                                                             values_quad);
1786                       eval1.template values<1, true, false>(values_quad,
1787                                                             values_quad);
1788                       internal::EvaluatorTensorProduct<
1789                         internal::evaluate_evenodd,
1790                         dim - 1,
1791                         n_q_points_1d,
1792                         n_q_points_1d,
1793                         Number>
1794                         eval_grad(
1795                           AlignedVector<Number>(),
1796                           data.data.front().shape_gradients_collocation_eo,
1797                           AlignedVector<Number>());
1798                       eval_grad.template gradients<0, true, false>(
1799                         values_quad, gradients_quad);
1800                       eval_grad.template gradients<1, true, false>(
1801                         values_quad, gradients_quad + n_q_points);
1802                     }
1803                   else
1804                     {
1805                       eval1.template gradients<0, true, false>(values_dofs,
1806                                                                scratch_data);
1807                       eval2.template values<1, true, false>(scratch_data,
1808                                                             gradients_quad);
1809 
1810                       eval1.template values<0, true, false>(values_dofs,
1811                                                             scratch_data);
1812                       eval2.template gradients<1, true, false>(scratch_data,
1813                                                                gradients_quad +
1814                                                                  n_q_points);
1815 
1816                       if (evaluate_val == true)
1817                         eval2.template values<1, true, false>(scratch_data,
1818                                                               values_quad);
1819                     }
1820                   eval1.template values<0, true, false>(values_dofs + size_deg,
1821                                                         scratch_data);
1822                   eval2.template values<1, true, false>(
1823                     scratch_data, gradients_quad + (dim - 1) * n_q_points);
1824 
1825                   break;
1826                 case 2:
1827                   eval1.template values<0, true, false>(values_dofs + size_deg,
1828                                                         gradients_quad +
1829                                                           (dim - 1) *
1830                                                             n_q_points);
1831                   eval1.template gradients<0, true, false>(values_dofs,
1832                                                            gradients_quad);
1833                   if (evaluate_val == true)
1834                     eval1.template values<0, true, false>(values_dofs,
1835                                                           values_quad);
1836                   break;
1837                 case 1:
1838                   values_quad[0]    = values_dofs[0];
1839                   gradients_quad[0] = values_dofs[1];
1840                   break;
1841                 default:
1842                   AssertThrow(false, ExcNotImplemented());
1843               }
1844             values_dofs += 2 * size_deg;
1845             values_quad += n_q_points;
1846             gradients_quad += dim * n_q_points;
1847           }
1848     }
1849 
1850     static void
1851     integrate_in_face(const unsigned int n_components,
1852                       const MatrixFreeFunctions::ShapeInfo<Number> &data,
1853                       Number *                                      values_dofs,
1854                       Number *                                      values_quad,
1855                       Number *           gradients_quad,
1856                       Number *           scratch_data,
1857                       const bool         integrate_val,
1858                       const bool         integrate_grad,
1859                       const unsigned int subface_index)
1860     {
1861       const AlignedVector<Number> &val1 =
1862         symmetric_evaluate ?
1863           data.data.front().shape_values_eo :
1864           (subface_index >= GeometryInfo<dim>::max_children_per_cell ?
1865              data.data.front().shape_values :
1866              data.data.front().values_within_subface[subface_index % 2]);
1867       const AlignedVector<Number> &val2 =
1868         symmetric_evaluate ?
1869           data.data.front().shape_values_eo :
1870           (subface_index >= GeometryInfo<dim>::max_children_per_cell ?
1871              data.data.front().shape_values :
1872              data.data.front().values_within_subface[subface_index / 2]);
1873 
1874       const AlignedVector<Number> &grad1 =
1875         symmetric_evaluate ?
1876           data.data.front().shape_gradients_eo :
1877           (subface_index >= GeometryInfo<dim>::max_children_per_cell ?
1878              data.data.front().shape_gradients :
1879              data.data.front().gradients_within_subface[subface_index % 2]);
1880       const AlignedVector<Number> &grad2 =
1881         symmetric_evaluate ?
1882           data.data.front().shape_gradients_eo :
1883           (subface_index >= GeometryInfo<dim>::max_children_per_cell ?
1884              data.data.front().shape_gradients :
1885              data.data.front().gradients_within_subface[subface_index / 2]);
1886 
1887       using Eval =
1888         internal::EvaluatorTensorProduct<symmetric_evaluate ?
1889                                            internal::evaluate_evenodd :
1890                                            internal::evaluate_general,
1891                                          dim - 1,
1892                                          fe_degree + 1,
1893                                          n_q_points_1d,
1894                                          Number>;
1895       Eval eval1(val1,
1896                  grad1,
1897                  val1,
1898                  data.data.front().fe_degree + 1,
1899                  data.data.front().n_q_points_1d);
1900       Eval eval2(val2,
1901                  grad2,
1902                  val1,
1903                  data.data.front().fe_degree + 1,
1904                  data.data.front().n_q_points_1d);
1905 
1906       const unsigned int size_deg =
1907         fe_degree > -1 ?
1908           Utilities::pow(fe_degree + 1, dim - 1) :
1909           (dim > 1 ?
1910              Utilities::fixed_power<dim - 1>(data.data.front().fe_degree + 1) :
1911              1);
1912 
1913       const unsigned int n_q_points = fe_degree > -1 ?
1914                                         Utilities::pow(n_q_points_1d, dim - 1) :
1915                                         data.n_q_points_face;
1916 
1917       if (integrate_grad == false)
1918         for (unsigned int c = 0; c < n_components; ++c)
1919           {
1920             switch (dim)
1921               {
1922                 case 3:
1923                   eval2.template values<1, false, false>(values_quad,
1924                                                          values_quad);
1925                   eval1.template values<0, false, false>(values_quad,
1926                                                          values_dofs);
1927                   break;
1928                 case 2:
1929                   eval1.template values<0, false, false>(values_quad,
1930                                                          values_dofs);
1931                   break;
1932                 case 1:
1933                   values_dofs[0] = values_quad[0];
1934                   break;
1935                 default:
1936                   Assert(false, ExcNotImplemented());
1937               }
1938             values_dofs += 2 * size_deg;
1939             values_quad += n_q_points;
1940           }
1941       else
1942         for (unsigned int c = 0; c < n_components; ++c)
1943           {
1944             switch (dim)
1945               {
1946                 case 3:
1947                   eval2.template values<1, false, false>(gradients_quad +
1948                                                            2 * n_q_points,
1949                                                          gradients_quad +
1950                                                            2 * n_q_points);
1951                   eval1.template values<0, false, false>(
1952                     gradients_quad + 2 * n_q_points, values_dofs + size_deg);
1953                   if (use_collocation)
1954                     {
1955                       internal::EvaluatorTensorProduct<
1956                         internal::evaluate_evenodd,
1957                         dim - 1,
1958                         n_q_points_1d,
1959                         n_q_points_1d,
1960                         Number>
1961                         eval_grad(
1962                           AlignedVector<Number>(),
1963                           data.data.front().shape_gradients_collocation_eo,
1964                           AlignedVector<Number>());
1965                       if (integrate_val)
1966                         eval_grad.template gradients<1, false, true>(
1967                           gradients_quad + n_q_points, values_quad);
1968                       else
1969                         eval_grad.template gradients<1, false, false>(
1970                           gradients_quad + n_q_points, values_quad);
1971                       eval_grad.template gradients<0, false, true>(
1972                         gradients_quad, values_quad);
1973                       eval1.template values<1, false, false>(values_quad,
1974                                                              values_quad);
1975                       eval1.template values<0, false, false>(values_quad,
1976                                                              values_dofs);
1977                     }
1978                   else
1979                     {
1980                       if (integrate_val)
1981                         {
1982                           eval2.template values<1, false, false>(values_quad,
1983                                                                  scratch_data);
1984                           eval2.template gradients<1, false, true>(
1985                             gradients_quad + n_q_points, scratch_data);
1986                         }
1987                       else
1988                         eval2.template gradients<1, false, false>(
1989                           gradients_quad + n_q_points, scratch_data);
1990 
1991                       eval1.template values<0, false, false>(scratch_data,
1992                                                              values_dofs);
1993                       eval2.template values<1, false, false>(gradients_quad,
1994                                                              scratch_data);
1995                       eval1.template gradients<0, false, true>(scratch_data,
1996                                                                values_dofs);
1997                     }
1998                   break;
1999                 case 2:
2000                   eval1.template values<0, false, false>(
2001                     gradients_quad + n_q_points, values_dofs + size_deg);
2002                   eval1.template gradients<0, false, false>(gradients_quad,
2003                                                             values_dofs);
2004                   if (integrate_val == true)
2005                     eval1.template values<0, false, true>(values_quad,
2006                                                           values_dofs);
2007                   break;
2008                 case 1:
2009                   values_dofs[0] = values_quad[0];
2010                   values_dofs[1] = gradients_quad[0];
2011                   break;
2012                 default:
2013                   AssertThrow(false, ExcNotImplemented());
2014               }
2015             values_dofs += 2 * size_deg;
2016             values_quad += n_q_points;
2017             gradients_quad += dim * n_q_points;
2018           }
2019     }
2020   };
2021 
2022 
2023 
2024   template <int dim, int fe_degree, typename Number, bool lex_faces = false>
2025   struct FEFaceNormalEvaluationImpl
2026   {
2027     template <bool do_evaluate, bool add_into_output>
2028     static void
2029     interpolate(const unsigned int                            n_components,
2030                 const MatrixFreeFunctions::ShapeInfo<Number> &data,
2031                 const Number *                                input,
2032                 Number *                                      output,
2033                 const bool                                    do_gradients,
2034                 const unsigned int                            face_no)
2035     {
2036       Assert(static_cast<unsigned int>(fe_degree) ==
2037                  data.data.front().fe_degree ||
2038                fe_degree == -1,
2039              ExcInternalError());
2040 
2041       interpolate_generic<do_evaluate, add_into_output>(
2042         n_components,
2043         input,
2044         output,
2045         do_gradients,
2046         face_no,
2047         data.data.front().fe_degree + 1,
2048         data.data.front().shape_data_on_face,
2049         data.dofs_per_component_on_cell,
2050         2 * data.dofs_per_component_on_face);
2051     }
2052 
2053     /**
2054      * Interpolate the values on the cell quadrature points onto a face.
2055      */
2056     template <bool do_evaluate, bool add_into_output>
2057     static void
2058     interpolate_quadrature(const unsigned int n_components,
2059                            const MatrixFreeFunctions::ShapeInfo<Number> &data,
2060                            const Number *                                input,
2061                            Number *                                      output,
2062                            const bool         do_gradients,
2063                            const unsigned int face_no)
2064     {
2065       Assert(static_cast<unsigned int>(fe_degree + 1) ==
2066                  data.data.front().quadrature.size() ||
2067                fe_degree == -1,
2068              ExcInternalError());
2069 
2070       interpolate_generic<do_evaluate, add_into_output>(
2071         n_components,
2072         input,
2073         output,
2074         do_gradients,
2075         face_no,
2076         data.data.front().quadrature.size(),
2077         data.data.front().quadrature_data_on_face,
2078         data.n_q_points,
2079         data.n_q_points_face);
2080     }
2081 
2082   private:
2083     template <bool do_evaluate, bool add_into_output, int face_direction = 0>
2084     static void
2085     interpolate_generic(const unsigned int n_components,
2086                         const Number *     input,
2087                         Number *           output,
2088                         const bool         do_gradients,
2089                         const unsigned int face_no,
2090                         const unsigned int n_points_1d,
2091                         const std::array<AlignedVector<Number>, 2> &shape_data,
2092                         const unsigned int dofs_per_component_on_cell,
2093                         const unsigned int dofs_per_component_on_face)
2094     {
2095       if (face_direction == face_no / 2)
2096         {
2097           internal::EvaluatorTensorProduct<internal::evaluate_general,
2098                                            dim,
2099                                            fe_degree + 1,
2100                                            0,
2101                                            Number>
2102             evalf(shape_data[face_no % 2],
2103                   AlignedVector<Number>(),
2104                   AlignedVector<Number>(),
2105                   n_points_1d,
2106                   0);
2107 
2108           const unsigned int in_stride = do_evaluate ?
2109                                            dofs_per_component_on_cell :
2110                                            dofs_per_component_on_face;
2111           const unsigned int out_stride = do_evaluate ?
2112                                             dofs_per_component_on_face :
2113                                             dofs_per_component_on_cell;
2114 
2115           for (unsigned int c = 0; c < n_components; c++)
2116             {
2117               if (do_gradients)
2118                 evalf.template apply_face<face_direction,
2119                                           do_evaluate,
2120                                           add_into_output,
2121                                           1,
2122                                           lex_faces>(input, output);
2123               else
2124                 evalf.template apply_face<face_direction,
2125                                           do_evaluate,
2126                                           add_into_output,
2127                                           0,
2128                                           lex_faces>(input, output);
2129               input += in_stride;
2130               output += out_stride;
2131             }
2132         }
2133       else if (face_direction < dim)
2134         {
2135           interpolate_generic<do_evaluate,
2136                               add_into_output,
2137                               std::min(face_direction + 1, dim - 1)>(
2138             n_components,
2139             input,
2140             output,
2141             do_gradients,
2142             face_no,
2143             n_points_1d,
2144             shape_data,
2145             dofs_per_component_on_cell,
2146             dofs_per_component_on_face);
2147         }
2148     }
2149   };
2150 
2151 
2152 
2153   // internal helper function for reading data; base version of different types
2154   template <typename VectorizedArrayType, typename Number2>
2155   void
2156   do_vectorized_read(const Number2 *src_ptr, VectorizedArrayType &dst)
2157   {
2158     for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v)
2159       dst[v] = src_ptr[v];
2160   }
2161 
2162 
2163 
2164   // internal helper function for reading data; specialized version where we
2165   // can use a dedicated load function
2166   template <typename Number, unsigned int width>
2167   void
2168   do_vectorized_read(const Number *src_ptr, VectorizedArray<Number, width> &dst)
2169   {
2170     dst.load(src_ptr);
2171   }
2172 
2173 
2174 
2175   // internal helper function for reading data; base version of different types
2176   template <typename VectorizedArrayType, typename Number2>
2177   void
2178   do_vectorized_gather(const Number2 *      src_ptr,
2179                        const unsigned int * indices,
2180                        VectorizedArrayType &dst)
2181   {
2182     for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v)
2183       dst[v] = src_ptr[indices[v]];
2184   }
2185 
2186 
2187 
2188   // internal helper function for reading data; specialized version where we
2189   // can use a dedicated gather function
2190   template <typename Number, unsigned int width>
2191   void
2192   do_vectorized_gather(const Number *                  src_ptr,
2193                        const unsigned int *            indices,
2194                        VectorizedArray<Number, width> &dst)
2195   {
2196     dst.gather(src_ptr, indices);
2197   }
2198 
2199 
2200 
2201   // internal helper function for reading data; base version of different types
2202   template <typename VectorizedArrayType, typename Number2>
2203   void
2204   do_vectorized_add(const VectorizedArrayType src, Number2 *dst_ptr)
2205   {
2206     for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v)
2207       dst_ptr[v] += src[v];
2208   }
2209 
2210 
2211 
2212   // internal helper function for reading data; specialized version where we
2213   // can use a dedicated load function
2214   template <typename Number, unsigned int width>
2215   void
2216   do_vectorized_add(const VectorizedArray<Number, width> src, Number *dst_ptr)
2217   {
2218     VectorizedArray<Number, width> tmp;
2219     tmp.load(dst_ptr);
2220     (tmp + src).store(dst_ptr);
2221   }
2222 
2223 
2224 
2225   // internal helper function for reading data; base version of different types
2226   template <typename VectorizedArrayType, typename Number2>
2227   void
2228   do_vectorized_scatter_add(const VectorizedArrayType src,
2229                             const unsigned int *      indices,
2230                             Number2 *                 dst_ptr)
2231   {
2232     for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v)
2233       dst_ptr[indices[v]] += src[v];
2234   }
2235 
2236 
2237 
2238   // internal helper function for reading data; specialized version where we
2239   // can use a dedicated gather function
2240   template <typename Number, unsigned int width>
2241   void
2242   do_vectorized_scatter_add(const VectorizedArray<Number, width> src,
2243                             const unsigned int *                 indices,
2244                             Number *                             dst_ptr)
2245   {
2246 #if DEAL_II_VECTORIZATION_WIDTH_IN_BITS < 512
2247     for (unsigned int v = 0; v < width; ++v)
2248       dst_ptr[indices[v]] += src[v];
2249 #else
2250     VectorizedArray<Number, width> tmp;
2251     tmp.gather(dst_ptr, indices);
2252     (tmp + src).scatter(indices, dst_ptr);
2253 #endif
2254   }
2255 
2256 
2257 
2258   template <typename Number>
2259   void
2260   adjust_for_face_orientation(const unsigned int            dim,
2261                               const unsigned int            n_components,
2262                               const unsigned int            face_orientation,
2263                               const Table<2, unsigned int> &orientation_map,
2264                               const bool                    integrate,
2265                               const bool                    values,
2266                               const bool                    gradients,
2267                               const unsigned int            n_q_points,
2268                               Number *                      tmp_values,
2269                               Number *                      values_quad,
2270                               Number *                      gradients_quad)
2271   {
2272     Assert(face_orientation, ExcInternalError());
2273     const unsigned int *orientation = &orientation_map[face_orientation][0];
2274     for (unsigned int c = 0; c < n_components; ++c)
2275       {
2276         if (values == true)
2277           {
2278             if (integrate)
2279               for (unsigned int q = 0; q < n_q_points; ++q)
2280                 tmp_values[q] = values_quad[c * n_q_points + orientation[q]];
2281             else
2282               for (unsigned int q = 0; q < n_q_points; ++q)
2283                 tmp_values[orientation[q]] = values_quad[c * n_q_points + q];
2284             for (unsigned int q = 0; q < n_q_points; ++q)
2285               values_quad[c * n_q_points + q] = tmp_values[q];
2286           }
2287         if (gradients == true)
2288           for (unsigned int d = 0; d < dim; ++d)
2289             {
2290               if (integrate)
2291                 for (unsigned int q = 0; q < n_q_points; ++q)
2292                   tmp_values[q] =
2293                     gradients_quad[(c * dim + d) * n_q_points + orientation[q]];
2294               else
2295                 for (unsigned int q = 0; q < n_q_points; ++q)
2296                   tmp_values[orientation[q]] =
2297                     gradients_quad[(c * dim + d) * n_q_points + q];
2298               for (unsigned int q = 0; q < n_q_points; ++q)
2299                 gradients_quad[(c * dim + d) * n_q_points + q] = tmp_values[q];
2300             }
2301       }
2302   }
2303 
2304 
2305 
2306   template <int dim, typename VectorizedArrayType>
2307   struct FEFaceEvaluationImplEvaluateSelector
2308   {
2309     template <int fe_degree, int n_q_points_1d>
2310     static bool
2311     run(const unsigned int                                         n_components,
2312         const MatrixFreeFunctions::ShapeInfo<VectorizedArrayType> &data,
2313         const VectorizedArrayType *                                values_array,
2314         VectorizedArrayType *                                      values_quad,
2315         VectorizedArrayType *         gradients_quad,
2316         VectorizedArrayType *         scratch_data,
2317         const bool                    evaluate_values,
2318         const bool                    evaluate_gradients,
2319         const unsigned int            face_no,
2320         const unsigned int            subface_index,
2321         const unsigned int            face_orientation,
2322         const Table<2, unsigned int> &orientation_map)
2323     {
2324       constexpr unsigned int static_dofs_per_face =
2325         fe_degree > -1 ? Utilities::pow(fe_degree + 1, dim - 1) :
2326                          numbers::invalid_unsigned_int;
2327       const unsigned int dofs_per_face =
2328         fe_degree > -1 ?
2329           static_dofs_per_face :
2330           Utilities::pow(data.data.front().fe_degree + 1, dim - 1);
2331 
2332       VectorizedArrayType *temp1 = scratch_data;
2333 
2334       FEFaceNormalEvaluationImpl<dim, fe_degree, VectorizedArrayType>::
2335         template interpolate<true, false>(
2336           n_components, data, values_array, temp1, evaluate_gradients, face_no);
2337 
2338       const unsigned int n_q_points_1d_actual =
2339         fe_degree > -1 ? n_q_points_1d : 0;
2340       if (fe_degree > -1 &&
2341           subface_index >= GeometryInfo<dim>::max_children_per_cell &&
2342           data.element_type <= MatrixFreeFunctions::tensor_symmetric)
2343         FEFaceEvaluationImpl<
2344           true,
2345           dim,
2346           fe_degree,
2347           n_q_points_1d_actual,
2348           VectorizedArrayType>::evaluate_in_face(n_components,
2349                                                  data,
2350                                                  temp1,
2351                                                  values_quad,
2352                                                  gradients_quad,
2353                                                  scratch_data + 2 *
2354                                                                   n_components *
2355                                                                   dofs_per_face,
2356                                                  evaluate_values,
2357                                                  evaluate_gradients,
2358                                                  subface_index);
2359       else
2360         FEFaceEvaluationImpl<
2361           false,
2362           dim,
2363           fe_degree,
2364           n_q_points_1d_actual,
2365           VectorizedArrayType>::evaluate_in_face(n_components,
2366                                                  data,
2367                                                  temp1,
2368                                                  values_quad,
2369                                                  gradients_quad,
2370                                                  scratch_data + 2 *
2371                                                                   n_components *
2372                                                                   dofs_per_face,
2373                                                  evaluate_values,
2374                                                  evaluate_gradients,
2375                                                  subface_index);
2376 
2377       if (face_orientation)
2378         adjust_for_face_orientation(dim,
2379                                     n_components,
2380                                     face_orientation,
2381                                     orientation_map,
2382                                     false,
2383                                     evaluate_values,
2384                                     evaluate_gradients,
2385                                     data.n_q_points_face,
2386                                     scratch_data,
2387                                     values_quad,
2388                                     gradients_quad);
2389 
2390       return false;
2391     }
2392   };
2393 
2394 
2395 
2396   template <int dim, typename VectorizedArrayType>
2397   struct FEFaceEvaluationImplIntegrateSelector
2398   {
2399     template <int fe_degree, int n_q_points_1d>
2400     static bool
2401     run(const unsigned int                                         n_components,
2402         const MatrixFreeFunctions::ShapeInfo<VectorizedArrayType> &data,
2403         VectorizedArrayType *                                      values_array,
2404         VectorizedArrayType *                                      values_quad,
2405         VectorizedArrayType *         gradients_quad,
2406         VectorizedArrayType *         scratch_data,
2407         const bool                    integrate_values,
2408         const bool                    integrate_gradients,
2409         const unsigned int            face_no,
2410         const unsigned int            subface_index,
2411         const unsigned int            face_orientation,
2412         const Table<2, unsigned int> &orientation_map)
2413     {
2414       if (face_orientation)
2415         adjust_for_face_orientation(dim,
2416                                     n_components,
2417                                     face_orientation,
2418                                     orientation_map,
2419                                     true,
2420                                     integrate_values,
2421                                     integrate_gradients,
2422                                     data.n_q_points_face,
2423                                     scratch_data,
2424                                     values_quad,
2425                                     gradients_quad);
2426 
2427       constexpr unsigned int static_dofs_per_face =
2428         fe_degree > -1 ? Utilities::pow(fe_degree + 1, dim - 1) :
2429                          numbers::invalid_unsigned_int;
2430       const unsigned int dofs_per_face =
2431         fe_degree > -1 ?
2432           static_dofs_per_face :
2433           Utilities::pow(data.data.front().fe_degree + 1, dim - 1);
2434 
2435       VectorizedArrayType *temp1 = scratch_data;
2436 
2437       const unsigned int n_q_points_1d_actual =
2438         fe_degree > -1 ? n_q_points_1d : 0;
2439       if (fe_degree > -1 &&
2440           subface_index >= GeometryInfo<dim - 1>::max_children_per_cell &&
2441           data.element_type <= MatrixFreeFunctions::tensor_symmetric)
2442         FEFaceEvaluationImpl<
2443           true,
2444           dim,
2445           fe_degree,
2446           n_q_points_1d_actual,
2447           VectorizedArrayType>::integrate_in_face(n_components,
2448                                                   data,
2449                                                   temp1,
2450                                                   values_quad,
2451                                                   gradients_quad,
2452                                                   scratch_data +
2453                                                     2 * n_components *
2454                                                       dofs_per_face,
2455                                                   integrate_values,
2456                                                   integrate_gradients,
2457                                                   subface_index);
2458       else
2459         FEFaceEvaluationImpl<
2460           false,
2461           dim,
2462           fe_degree,
2463           n_q_points_1d_actual,
2464           VectorizedArrayType>::integrate_in_face(n_components,
2465                                                   data,
2466                                                   temp1,
2467                                                   values_quad,
2468                                                   gradients_quad,
2469                                                   scratch_data +
2470                                                     2 * n_components *
2471                                                       dofs_per_face,
2472                                                   integrate_values,
2473                                                   integrate_gradients,
2474                                                   subface_index);
2475 
2476       FEFaceNormalEvaluationImpl<dim, fe_degree, VectorizedArrayType>::
2477         template interpolate<false, false>(n_components,
2478                                            data,
2479                                            temp1,
2480                                            values_array,
2481                                            integrate_gradients,
2482                                            face_no);
2483       return false;
2484     }
2485   };
2486 
2487 
2488 
2489   template <int n_face_orientations, typename Processor>
2490   static bool
2491   fe_face_evaluation_process_and_io(Processor &proc)
2492   {
2493     auto  n_components             = proc.n_components;
2494     auto  integrate                = proc.integrate;
2495     auto  global_vector_ptr        = proc.global_vector_ptr;
2496     auto &data                     = proc.data;
2497     auto &dof_info                 = proc.dof_info;
2498     auto  values_quad              = proc.values_quad;
2499     auto  gradients_quad           = proc.gradients_quad;
2500     auto  scratch_data             = proc.scratch_data;
2501     auto  do_values                = proc.do_values;
2502     auto  do_gradients             = proc.do_gradients;
2503     auto  active_fe_index          = proc.active_fe_index;
2504     auto  first_selected_component = proc.first_selected_component;
2505     auto  cells                    = proc.cells;
2506     auto  face_nos                 = proc.face_nos;
2507     auto  subface_index            = proc.subface_index;
2508     auto  dof_access_index         = proc.dof_access_index;
2509     auto  face_orientations        = proc.face_orientations;
2510     auto &orientation_map          = proc.orientation_map;
2511 
2512     static const int dim       = Processor::dim_;
2513     static const int fe_degree = Processor::fe_degree_;
2514     using VectorizedArrayType  = typename Processor::VectorizedArrayType_;
2515 
2516     using Number2_ = typename Processor::Number2_;
2517 
2518     const unsigned int cell = cells[0];
2519 
2520     // In the case of integration, we do not need to reshuffle the
2521     // data at the quadrature points to adjust for the face
2522     // orientation if the shape functions are nodal at the cell
2523     // boundaries (and we only requested the integration of the
2524     // values) or Hermite shape functions are used. These cases are
2525     // handled later when the values are written back into the
2526     // glrobal vector.
2527     if (integrate &&
2528         (face_orientations[0] > 0 &&
2529          (subface_index < GeometryInfo<dim>::max_children_per_cell ||
2530           !(((do_gradients == false &&
2531               data.data.front().nodal_at_cell_boundaries == true &&
2532               fe_degree > 0) ||
2533              (data.element_type ==
2534                 MatrixFreeFunctions::tensor_symmetric_hermite &&
2535               fe_degree > 1)) &&
2536             (dof_info.index_storage_variants[dof_access_index][cell] ==
2537                MatrixFreeFunctions::DoFInfo::IndexStorageVariants::
2538                  interleaved_contiguous ||
2539              dof_info.index_storage_variants[dof_access_index][cell] ==
2540                MatrixFreeFunctions::DoFInfo::IndexStorageVariants::
2541                  interleaved_contiguous_strided ||
2542              dof_info.index_storage_variants[dof_access_index][cell] ==
2543                MatrixFreeFunctions::DoFInfo::IndexStorageVariants::
2544                  interleaved_contiguous_mixed_strides ||
2545              dof_info.index_storage_variants[dof_access_index][cell] ==
2546                MatrixFreeFunctions::DoFInfo::IndexStorageVariants::
2547                  contiguous)))))
2548       {
2549         AssertDimension(face_orientations.size(), 1);
2550         adjust_for_face_orientation(dim,
2551                                     n_components,
2552                                     face_orientations[0],
2553                                     orientation_map,
2554                                     true,
2555                                     do_values,
2556                                     do_gradients,
2557                                     data.n_q_points_face,
2558                                     scratch_data,
2559                                     values_quad,
2560                                     gradients_quad);
2561       }
2562 
2563     // we know that the gradient weights for the Hermite case on the
2564     // right (side==1) are the negative from the value at the left
2565     // (side==0), so we only read out one of them.
2566     VectorizedArrayType grad_weight =
2567       (data.data.front().nodal_at_cell_boundaries == true && fe_degree > 1 &&
2568        data.element_type == MatrixFreeFunctions::tensor_symmetric_hermite) ?
2569         data.data.front()
2570           .shape_data_on_face[0][fe_degree + (integrate ?
2571                                                 (2 - (face_nos[0] % 2)) :
2572                                                 (1 + (face_nos[0] % 2)))] :
2573         VectorizedArrayType(0.0 /*dummy*/);
2574 
2575     constexpr unsigned int static_dofs_per_component =
2576       fe_degree > -1 ? Utilities::pow(fe_degree + 1, dim) :
2577                        numbers::invalid_unsigned_int;
2578     constexpr unsigned int static_dofs_per_face =
2579       fe_degree > -1 ? Utilities::pow(fe_degree + 1, dim - 1) :
2580                        numbers::invalid_unsigned_int;
2581     const unsigned int dofs_per_face =
2582       fe_degree > -1 ? static_dofs_per_face :
2583                        Utilities::pow(data.data.front().fe_degree + 1, dim - 1);
2584 
2585     VectorizedArrayType *temp1 = scratch_data;
2586 
2587     const unsigned int dummy = 0;
2588 
2589     // re-orientation
2590     std::array<const unsigned int *, n_face_orientations> orientation = {};
2591 
2592     if (n_face_orientations == 1)
2593       orientation[0] = (data.data.front().nodal_at_cell_boundaries == true) ?
2594                          &data.face_orientations[face_orientations[0]][0] :
2595                          &dummy;
2596     else
2597       {
2598         for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v)
2599           {
2600             // the loop breaks once an invalid_unsigned_int is hit for
2601             // all cases except the exterior faces in the ECL loop (where
2602             // some faces might be at the boundaries but others not)
2603             if (cells[v] == numbers::invalid_unsigned_int)
2604               continue;
2605 
2606             orientation[v] =
2607               (data.data.front().nodal_at_cell_boundaries == true) ?
2608                 &data.face_orientations[face_orientations[v]][0] :
2609                 &dummy;
2610           }
2611       }
2612 
2613     // face_to_cell_index_hermite
2614     std::array<const unsigned int *, n_face_orientations> index_array_hermite =
2615       {};
2616 
2617     if (n_face_orientations == 1)
2618       index_array_hermite[0] =
2619         (data.data.front().nodal_at_cell_boundaries == true && fe_degree > 1 &&
2620          data.element_type == MatrixFreeFunctions::tensor_symmetric_hermite) ?
2621           &data.face_to_cell_index_hermite(face_nos[0], 0) :
2622           &dummy;
2623 
2624     if (n_face_orientations > 1 &&
2625         data.data.front().nodal_at_cell_boundaries == true && fe_degree > 1 &&
2626         data.element_type == MatrixFreeFunctions::tensor_symmetric_hermite)
2627       {
2628         for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v)
2629           {
2630             if (cells[v] == numbers::invalid_unsigned_int)
2631               continue;
2632 
2633             grad_weight[v] =
2634               data.data.front().shape_data_on_face
2635                 [0][fe_degree + (integrate ? (2 - (face_nos[v] % 2)) :
2636                                              (1 + (face_nos[v] % 2)))][v];
2637 
2638             index_array_hermite[v] =
2639               &data.face_to_cell_index_hermite(face_nos[v], 0);
2640           }
2641       }
2642 
2643     // face_to_cell_index_nodal
2644     std::array<const unsigned int *, n_face_orientations> index_array_nodal =
2645       {};
2646 
2647     if (n_face_orientations == 1)
2648       index_array_nodal[0] =
2649         (data.data.front().nodal_at_cell_boundaries == true) ?
2650           &data.face_to_cell_index_nodal(face_nos[0], 0) :
2651           &dummy;
2652 
2653     if (n_face_orientations > 1 &&
2654         (data.data.front().nodal_at_cell_boundaries == true))
2655       {
2656         for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v)
2657           {
2658             if (cells[v] == numbers::invalid_unsigned_int)
2659               continue;
2660 
2661             index_array_nodal[v] =
2662               &data.face_to_cell_index_nodal(face_nos[v], 0);
2663           }
2664       }
2665 
2666     const auto reorientate = [&](const unsigned int v, const unsigned int i) {
2667       return (dim < 3 ||
2668               face_orientations[n_face_orientations == 1 ? 0 : v] == 0 ||
2669               subface_index < GeometryInfo<dim>::max_children_per_cell) ?
2670                i :
2671                orientation[v][i];
2672     };
2673 
2674     // this variable keeps track of whether we are able to directly write
2675     // the results into the result (function returns true) or not, requiring
2676     // an additional call to another function
2677     bool accesses_global_vector = true;
2678 
2679     for (unsigned int comp = 0; comp < n_components; ++comp)
2680       {
2681         if (integrate)
2682           proc.function_0(temp1, comp);
2683 
2684         // we can only use the fast functions if we know the polynomial degree
2685         // as a template parameter (fe_degree != -1), and it only makes sense
2686         // to use the functions for at least linear functions for values on
2687         // the faces and quadratic functions for gradients on the faces, so
2688         // include the switch here
2689         if ((do_gradients == false &&
2690              data.data.front().nodal_at_cell_boundaries == true &&
2691              fe_degree > 0) ||
2692             (data.element_type ==
2693                MatrixFreeFunctions::tensor_symmetric_hermite &&
2694              fe_degree > 1))
2695           {
2696             // case 1: contiguous and interleaved indices
2697             if (n_face_orientations == 1 &&
2698                 dof_info.index_storage_variants[dof_access_index][cell] ==
2699                   MatrixFreeFunctions::DoFInfo::IndexStorageVariants::
2700                     interleaved_contiguous)
2701               {
2702                 AssertDimension(n_face_orientations, 1);
2703 
2704                 AssertDimension(
2705                   dof_info.n_vectorization_lanes_filled[dof_access_index][cell],
2706                   VectorizedArrayType::size());
2707                 Number2_ *vector_ptr =
2708                   global_vector_ptr +
2709                   dof_info.dof_indices_contiguous[dof_access_index]
2710                                                  [cell *
2711                                                   VectorizedArrayType::size()] +
2712                   (dof_info
2713                      .component_dof_indices_offset[active_fe_index]
2714                                                   [first_selected_component] +
2715                    comp * static_dofs_per_component) *
2716                     VectorizedArrayType::size();
2717 
2718                 if (fe_degree > 1 && do_gradients == true)
2719                   {
2720                     for (unsigned int i = 0; i < dofs_per_face; ++i)
2721                       {
2722                         if (n_face_orientations == 1)
2723                           {
2724                             const unsigned int ind1 =
2725                               index_array_hermite[0][2 * i];
2726                             const unsigned int ind2 =
2727                               index_array_hermite[0][2 * i + 1];
2728                             AssertIndexRange(ind1,
2729                                              data.dofs_per_component_on_cell);
2730                             AssertIndexRange(ind2,
2731                                              data.dofs_per_component_on_cell);
2732                             const unsigned int i_ = reorientate(0, i);
2733                             proc.function_1a(
2734                               temp1[i_],
2735                               temp1[i_ + dofs_per_face],
2736                               vector_ptr + ind1 * VectorizedArrayType::size(),
2737                               vector_ptr + ind2 * VectorizedArrayType::size(),
2738                               grad_weight);
2739                           }
2740                         else
2741                           {
2742                             Assert(false, ExcNotImplemented());
2743                           }
2744                       }
2745                   }
2746                 else
2747                   {
2748                     for (unsigned int i = 0; i < dofs_per_face; ++i)
2749                       {
2750                         if (n_face_orientations == 1)
2751                           {
2752                             const unsigned int i_  = reorientate(0, i);
2753                             const unsigned int ind = index_array_nodal[0][i];
2754                             proc.function_1b(temp1[i_],
2755                                              vector_ptr +
2756                                                ind *
2757                                                  VectorizedArrayType::size());
2758                           }
2759                         else
2760                           {
2761                             Assert(false, ExcNotImplemented());
2762                           }
2763                       }
2764                   }
2765               }
2766 
2767             // case 2: contiguous and interleaved indices with fixed stride
2768             else if (n_face_orientations == 1 &&
2769                      dof_info.index_storage_variants[dof_access_index][cell] ==
2770                        MatrixFreeFunctions::DoFInfo::IndexStorageVariants::
2771                          interleaved_contiguous_strided)
2772               {
2773                 AssertDimension(n_face_orientations, 1);
2774 
2775                 AssertDimension(
2776                   dof_info.n_vectorization_lanes_filled[dof_access_index][cell],
2777                   VectorizedArrayType::size());
2778                 const unsigned int *indices =
2779                   &dof_info.dof_indices_contiguous[dof_access_index]
2780                                                   [cell *
2781                                                    VectorizedArrayType::size()];
2782                 Number2_ *vector_ptr =
2783                   global_vector_ptr +
2784                   (comp * static_dofs_per_component +
2785                    dof_info
2786                      .component_dof_indices_offset[active_fe_index]
2787                                                   [first_selected_component]) *
2788                     VectorizedArrayType::size();
2789                 if (fe_degree > 1 && do_gradients == true)
2790                   {
2791                     for (unsigned int i = 0; i < dofs_per_face; ++i)
2792                       {
2793                         if (n_face_orientations == 1)
2794                           {
2795                             const unsigned int i_ = reorientate(0, i);
2796                             const unsigned int ind1 =
2797                               index_array_hermite[0][2 * i] *
2798                               VectorizedArrayType::size();
2799                             const unsigned int ind2 =
2800                               index_array_hermite[0][2 * i + 1] *
2801                               VectorizedArrayType::size();
2802                             proc.function_2a(temp1[i_],
2803                                              temp1[i_ + dofs_per_face],
2804                                              vector_ptr + ind1,
2805                                              vector_ptr + ind2,
2806                                              grad_weight,
2807                                              indices,
2808                                              indices);
2809                           }
2810                         else
2811                           {
2812                             Assert(false, ExcNotImplemented());
2813                           }
2814                       }
2815                   }
2816                 else
2817                   {
2818                     for (unsigned int i = 0; i < dofs_per_face; ++i)
2819                       {
2820                         if (n_face_orientations == 1)
2821                           {
2822                             const unsigned int i_ = reorientate(0, i);
2823                             const unsigned int ind =
2824                               index_array_nodal[0][i] *
2825                               VectorizedArrayType::size();
2826                             proc.function_2b(temp1[i_],
2827                                              vector_ptr + ind,
2828                                              indices);
2829                           }
2830                         else
2831                           {
2832                             Assert(false, ExcNotImplemented());
2833                           }
2834                       }
2835                   }
2836               }
2837 
2838             // case 3: contiguous and interleaved indices with mixed stride
2839             else if (n_face_orientations == 1 &&
2840                      dof_info.index_storage_variants[dof_access_index][cell] ==
2841                        MatrixFreeFunctions::DoFInfo::IndexStorageVariants::
2842                          interleaved_contiguous_mixed_strides)
2843               {
2844                 AssertDimension(n_face_orientations, 1);
2845 
2846                 const unsigned int *strides =
2847                   &dof_info.dof_indices_interleave_strides
2848                      [dof_access_index][cell * VectorizedArrayType::size()];
2849                 unsigned int indices[VectorizedArrayType::size()];
2850                 for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v)
2851                   indices[v] =
2852                     dof_info.dof_indices_contiguous
2853                       [dof_access_index]
2854                       [cell * VectorizedArrayType::size() + v] +
2855                     (dof_info
2856                        .component_dof_indices_offset[active_fe_index]
2857                                                     [first_selected_component] +
2858                      comp * static_dofs_per_component) *
2859                       strides[v];
2860                 const unsigned int n_filled_lanes =
2861                   dof_info.n_vectorization_lanes_filled[dof_access_index][cell];
2862 
2863                 if (fe_degree > 1 && do_gradients == true)
2864                   {
2865                     if (n_filled_lanes == VectorizedArrayType::size())
2866                       for (unsigned int i = 0; i < dofs_per_face; ++i)
2867                         {
2868                           if (n_face_orientations == 1)
2869                             {
2870                               const unsigned int i_ = reorientate(0, i);
2871                               unsigned int ind1[VectorizedArrayType::size()];
2872                               DEAL_II_OPENMP_SIMD_PRAGMA
2873                               for (unsigned int v = 0;
2874                                    v < VectorizedArrayType::size();
2875                                    ++v)
2876                                 ind1[v] =
2877                                   indices[v] +
2878                                   index_array_hermite[0 /*TODO*/][2 * i] *
2879                                     strides[v];
2880                               unsigned int ind2[VectorizedArrayType::size()];
2881                               DEAL_II_OPENMP_SIMD_PRAGMA
2882                               for (unsigned int v = 0;
2883                                    v < VectorizedArrayType::size();
2884                                    ++v)
2885                                 ind2[v] =
2886                                   indices[v] +
2887                                   index_array_hermite[0 /*TODO*/][2 * i + 1] *
2888                                     strides[v];
2889                               proc.function_2a(temp1[i_],
2890                                                temp1[i_ + dofs_per_face],
2891                                                global_vector_ptr,
2892                                                global_vector_ptr,
2893                                                grad_weight,
2894                                                ind1,
2895                                                ind2);
2896                             }
2897                           else
2898                             {
2899                               Assert(false, ExcNotImplemented());
2900                             }
2901                         }
2902                     else
2903                       {
2904                         if (integrate == false)
2905                           for (unsigned int i = 0; i < 2 * dofs_per_face; ++i)
2906                             temp1[i] = VectorizedArrayType();
2907 
2908                         for (unsigned int v = 0; v < n_filled_lanes; ++v)
2909                           for (unsigned int i = 0; i < dofs_per_face; ++i)
2910                             {
2911                               const unsigned int i_ =
2912                                 reorientate(n_face_orientations == 1 ? 0 : v,
2913                                             i);
2914                               proc.function_3a(
2915                                 temp1[i_][v],
2916                                 temp1[i_ + dofs_per_face][v],
2917                                 global_vector_ptr
2918                                   [indices[v] +
2919                                    index_array_hermite
2920                                        [n_face_orientations == 1 ? 0 : v]
2921                                        [2 * i] *
2922                                      strides[v]],
2923                                 global_vector_ptr
2924                                   [indices[v] +
2925                                    index_array_hermite
2926                                        [n_face_orientations == 1 ? 0 : v]
2927                                        [2 * i + 1] *
2928                                      strides[v]],
2929                                 grad_weight[n_face_orientations == 1 ? 0 : v]);
2930                             }
2931                       }
2932                   }
2933                 else
2934                   {
2935                     if (n_filled_lanes == VectorizedArrayType::size())
2936                       for (unsigned int i = 0; i < dofs_per_face; ++i)
2937                         {
2938                           if (n_face_orientations == 1)
2939                             {
2940                               unsigned int ind[VectorizedArrayType::size()];
2941                               DEAL_II_OPENMP_SIMD_PRAGMA
2942                               for (unsigned int v = 0;
2943                                    v < VectorizedArrayType::size();
2944                                    ++v)
2945                                 ind[v] = indices[v] +
2946                                          index_array_nodal[0][i] * strides[v];
2947                               const unsigned int i_ = reorientate(0, i);
2948                               proc.function_2b(temp1[i_],
2949                                                global_vector_ptr,
2950                                                ind);
2951                             }
2952                           else
2953                             {
2954                               Assert(false, ExcNotImplemented());
2955                             }
2956                         }
2957                     else
2958                       {
2959                         if (integrate == false)
2960                           for (unsigned int i = 0; i < dofs_per_face; ++i)
2961                             temp1[i] = VectorizedArrayType();
2962 
2963                         for (unsigned int v = 0; v < n_filled_lanes; ++v)
2964                           for (unsigned int i = 0; i < dofs_per_face; ++i)
2965                             proc.function_3b(
2966                               temp1[reorientate(
2967                                 n_face_orientations == 1 ? 0 : v, i)][v],
2968                               global_vector_ptr
2969                                 [indices[v] +
2970                                  index_array_nodal
2971                                      [n_face_orientations == 1 ? 0 : v][i] *
2972                                    strides[v]]);
2973                       }
2974                   }
2975               }
2976 
2977             // case 4: contiguous indices without interleaving
2978             else if (n_face_orientations > 1 ||
2979                      dof_info.index_storage_variants[dof_access_index][cell] ==
2980                        MatrixFreeFunctions::DoFInfo::IndexStorageVariants::
2981                          contiguous)
2982               {
2983                 const unsigned int *indices =
2984                   &dof_info.dof_indices_contiguous[dof_access_index]
2985                                                   [cell *
2986                                                    VectorizedArrayType::size()];
2987                 Number2_ *vector_ptr =
2988                   global_vector_ptr + comp * static_dofs_per_component +
2989                   dof_info
2990                     .component_dof_indices_offset[active_fe_index]
2991                                                  [first_selected_component];
2992 
2993                 if (do_gradients == true &&
2994                     data.element_type ==
2995                       MatrixFreeFunctions::tensor_symmetric_hermite)
2996                   {
2997                     if (n_face_orientations == 1 &&
2998                         dof_info.n_vectorization_lanes_filled[dof_access_index]
2999                                                              [cell] ==
3000                           VectorizedArrayType::size())
3001                       for (unsigned int i = 0; i < dofs_per_face; ++i)
3002                         {
3003                           const unsigned int ind1 =
3004                             index_array_hermite[0][2 * i];
3005                           const unsigned int ind2 =
3006                             index_array_hermite[0][2 * i + 1];
3007                           const unsigned int i_ = reorientate(0, i);
3008 
3009                           proc.function_2a(temp1[i_],
3010                                            temp1[i_ + dofs_per_face],
3011                                            vector_ptr + ind1,
3012                                            vector_ptr + ind2,
3013                                            grad_weight,
3014                                            indices,
3015                                            indices);
3016                         }
3017                     else if (n_face_orientations == 1)
3018                       for (unsigned int i = 0; i < dofs_per_face; ++i)
3019                         {
3020                           const unsigned int ind1 =
3021                             index_array_hermite[0][2 * i];
3022                           const unsigned int ind2 =
3023                             index_array_hermite[0][2 * i + 1];
3024                           const unsigned int i_ = reorientate(0, i);
3025 
3026                           const unsigned int n_filled_lanes =
3027                             dof_info
3028                               .n_vectorization_lanes_filled[dof_access_index]
3029                                                            [cell];
3030 
3031                           for (unsigned int v = 0; v < n_filled_lanes; ++v)
3032                             proc.function_3a(temp1[i_][v],
3033                                              temp1[i_ + dofs_per_face][v],
3034                                              vector_ptr[ind1 + indices[v]],
3035                                              vector_ptr[ind2 + indices[v]],
3036                                              grad_weight[v]);
3037 
3038                           if (integrate == false)
3039                             for (unsigned int v = n_filled_lanes;
3040                                  v < VectorizedArrayType::size();
3041                                  ++v)
3042                               {
3043                                 temp1[i_][v]                 = 0.0;
3044                                 temp1[i_ + dofs_per_face][v] = 0.0;
3045                               }
3046                         }
3047                     else
3048                       {
3049                         Assert(false, ExcNotImplemented());
3050 
3051                         const unsigned int n_filled_lanes =
3052                           dof_info
3053                             .n_vectorization_lanes_filled[dof_access_index]
3054                                                          [cell];
3055 
3056                         for (unsigned int v = 0; v < n_filled_lanes; ++v)
3057                           for (unsigned int i = 0; i < dofs_per_face; ++i)
3058                             proc.function_3a(
3059                               temp1[reorientate(v, i)][v],
3060                               temp1[reorientate(v, i) + dofs_per_face][v],
3061                               vector_ptr[index_array_hermite[v][2 * i] +
3062                                          indices[v]],
3063                               vector_ptr[index_array_hermite[v][2 * i + 1] +
3064                                          indices[v]],
3065                               grad_weight[v]);
3066                       }
3067                   }
3068                 else
3069                   {
3070                     if (n_face_orientations == 1 &&
3071                         dof_info.n_vectorization_lanes_filled[dof_access_index]
3072                                                              [cell] ==
3073                           VectorizedArrayType::size())
3074                       for (unsigned int i = 0; i < dofs_per_face; ++i)
3075                         {
3076                           const unsigned int ind = index_array_nodal[0][i];
3077                           const unsigned int i_  = reorientate(0, i);
3078 
3079                           proc.function_2b(temp1[i_],
3080                                            vector_ptr + ind,
3081                                            indices);
3082                         }
3083                     else if (n_face_orientations == 1)
3084                       for (unsigned int i = 0; i < dofs_per_face; ++i)
3085                         {
3086                           const unsigned int ind = index_array_nodal[0][i];
3087                           const unsigned int i_  = reorientate(0, i);
3088 
3089                           const unsigned int n_filled_lanes =
3090                             dof_info
3091                               .n_vectorization_lanes_filled[dof_access_index]
3092                                                            [cell];
3093 
3094                           for (unsigned int v = 0; v < n_filled_lanes; ++v)
3095                             proc.function_3b(temp1[i_][v],
3096                                              vector_ptr[ind + indices[v]]);
3097 
3098                           if (integrate == false)
3099                             for (unsigned int v = n_filled_lanes;
3100                                  v < VectorizedArrayType::size();
3101                                  ++v)
3102                               temp1[i_][v] = 0.0;
3103                         }
3104                     else
3105                       for (unsigned int i = 0; i < dofs_per_face; ++i)
3106                         {
3107                           for (unsigned int v = 0;
3108                                v < VectorizedArrayType::size();
3109                                ++v)
3110                             if (cells[v] != numbers::invalid_unsigned_int)
3111                               proc.function_3b(
3112                                 temp1[reorientate(v, i)][v],
3113                                 vector_ptr[index_array_nodal[v][i] +
3114                                            dof_info.dof_indices_contiguous
3115                                              [dof_access_index][cells[v]]]);
3116                         }
3117                   }
3118               }
3119             else
3120               {
3121                 // case 5: default vector access
3122                 AssertDimension(n_face_orientations, 1);
3123 
3124                 // for the integrate_scatter path (integrate == true), we
3125                 // need to only prepare the data in this function for all
3126                 // components to later call distribute_local_to_global();
3127                 // for the gather_evaluate path (integrate == false), we
3128                 // instead want to leave early because we need to get the
3129                 // vector data from somewhere else
3130                 proc.function_5(temp1, comp);
3131                 if (integrate)
3132                   accesses_global_vector = false;
3133                 else
3134                   return false;
3135               }
3136           }
3137         else
3138           {
3139             // case 5: default vector access
3140             AssertDimension(n_face_orientations, 1);
3141 
3142             proc.function_5(temp1, comp);
3143             if (integrate)
3144               accesses_global_vector = false;
3145             else
3146               return false;
3147           }
3148 
3149         if (!integrate)
3150           proc.function_0(temp1, comp);
3151       }
3152 
3153     if (!integrate &&
3154         (face_orientations[0] > 0 &&
3155          subface_index < GeometryInfo<dim>::max_children_per_cell))
3156       {
3157         AssertDimension(n_face_orientations, 1);
3158         adjust_for_face_orientation(dim,
3159                                     n_components,
3160                                     face_orientations[0],
3161                                     orientation_map,
3162                                     false,
3163                                     do_values,
3164                                     do_gradients,
3165                                     data.n_q_points_face,
3166                                     scratch_data,
3167                                     values_quad,
3168                                     gradients_quad);
3169       }
3170 
3171     return accesses_global_vector;
3172   }
3173 
3174 
3175 
3176   template <int dim,
3177             typename Number,
3178             typename VectorizedArrayType,
3179             typename Number2 = Number>
3180   struct FEFaceEvaluationImplGatherEvaluateSelector
3181   {
3182     template <int fe_degree, int n_q_points_1d>
3183     static bool
3184     run(const unsigned int n_components,
3185         const unsigned int n_face_orientations,
3186         const Number2 *    src_ptr,
3187         const MatrixFreeFunctions::ShapeInfo<VectorizedArrayType> &data,
3188         const MatrixFreeFunctions::DoFInfo &                       dof_info,
3189         VectorizedArrayType *                                      values_quad,
3190         VectorizedArrayType *gradients_quad,
3191         VectorizedArrayType *scratch_data,
3192         const bool           evaluate_values,
3193         const bool           evaluate_gradients,
3194         const unsigned int   active_fe_index,
3195         const unsigned int   first_selected_component,
3196         const std::array<unsigned int, VectorizedArrayType::size()> cells,
3197         const std::array<unsigned int, VectorizedArrayType::size()> face_nos,
3198         const unsigned int                                 subface_index,
3199         const MatrixFreeFunctions::DoFInfo::DoFAccessIndex dof_access_index,
3200         const std::array<unsigned int, VectorizedArrayType::size()>
3201                                       face_orientations,
3202         const Table<2, unsigned int> &orientation_map)
3203     {
3204       if (src_ptr == nullptr)
3205         {
3206           return false;
3207         }
3208 
3209       Processor<fe_degree, n_q_points_1d> p(n_components,
3210                                             false,
3211                                             src_ptr,
3212                                             data,
3213                                             dof_info,
3214                                             values_quad,
3215                                             gradients_quad,
3216                                             scratch_data,
3217                                             evaluate_values,
3218                                             evaluate_gradients,
3219                                             active_fe_index,
3220                                             first_selected_component,
3221                                             cells,
3222                                             face_nos,
3223                                             subface_index,
3224                                             dof_access_index,
3225                                             face_orientations,
3226                                             orientation_map);
3227 
3228       if (n_face_orientations == VectorizedArrayType::size())
3229         return fe_face_evaluation_process_and_io<VectorizedArrayType::size()>(
3230           p);
3231       else
3232         return fe_face_evaluation_process_and_io<1>(p);
3233     }
3234 
3235   private:
3236     template <int fe_degree, int n_q_points_1d>
3237     struct Processor
3238     {
3239       static const int dim_           = dim;
3240       static const int fe_degree_     = fe_degree;
3241       static const int n_q_points_1d_ = n_q_points_1d;
3242       using VectorizedArrayType_      = VectorizedArrayType;
3243       using Number2_                  = const Number2;
3244 
3245       Processor(
3246         const unsigned int n_components,
3247         const bool         integrate,
3248         const Number2 *    global_vector_ptr,
3249         const MatrixFreeFunctions::ShapeInfo<VectorizedArrayType> &data,
3250         const MatrixFreeFunctions::DoFInfo &                       dof_info,
3251         VectorizedArrayType *                                      values_quad,
3252         VectorizedArrayType *gradients_quad,
3253         VectorizedArrayType *scratch_data,
3254         const bool           do_values,
3255         const bool           do_gradients,
3256         const unsigned int   active_fe_index,
3257         const unsigned int   first_selected_component,
3258         const std::array<unsigned int, VectorizedArrayType::size()> cells,
3259         const std::array<unsigned int, VectorizedArrayType::size()> face_nos,
3260         const unsigned int                                 subface_index,
3261         const MatrixFreeFunctions::DoFInfo::DoFAccessIndex dof_access_index,
3262         const std::array<unsigned int, VectorizedArrayType::size()>
3263                                       face_orientations,
3264         const Table<2, unsigned int> &orientation_map)
3265         : n_components(n_components)
3266         , integrate(integrate)
3267         , global_vector_ptr(global_vector_ptr)
3268         , data(data)
3269         , dof_info(dof_info)
3270         , values_quad(values_quad)
3271         , gradients_quad(gradients_quad)
3272         , scratch_data(scratch_data)
3273         , do_values(do_values)
3274         , do_gradients(do_gradients)
3275         , active_fe_index(active_fe_index)
3276         , first_selected_component(first_selected_component)
3277         , cells(cells)
3278         , face_nos(face_nos)
3279         , subface_index(subface_index)
3280         , dof_access_index(dof_access_index)
3281         , face_orientations(face_orientations)
3282         , orientation_map(orientation_map)
3283       {}
3284 
3285       template <typename T0, typename T1, typename T2>
3286       void
3287       function_1a(T0 &      temp_1,
3288                   T0 &      temp_2,
3289                   const T1  src_ptr_1,
3290                   const T1  src_ptr_2,
3291                   const T2 &grad_weight)
3292       {
3293         // case 1a)
3294         do_vectorized_read(src_ptr_1, temp_1);
3295         do_vectorized_read(src_ptr_2, temp_2);
3296         temp_2 = grad_weight * (temp_1 - temp_2);
3297       }
3298 
3299       template <typename T1, typename T2>
3300       void
3301       function_1b(T1 &temp, const T2 src_ptr)
3302       {
3303         // case 1b)
3304         do_vectorized_read(src_ptr, temp);
3305       }
3306 
3307       template <typename T0, typename T1, typename T2, typename T3>
3308       void
3309       function_2a(T0 &      temp_1,
3310                   T0 &      temp_2,
3311                   const T1  src_ptr_1,
3312                   const T1  src_ptr_2,
3313                   const T2 &grad_weight,
3314                   const T3 &indices_1,
3315                   const T3 &indices_2)
3316       {
3317         // case 2a)
3318         do_vectorized_gather(src_ptr_1, indices_1, temp_1);
3319         do_vectorized_gather(src_ptr_2, indices_2, temp_2);
3320         temp_2 = grad_weight * (temp_1 - temp_2);
3321       }
3322 
3323       template <typename T0, typename T1, typename T2>
3324       void
3325       function_2b(T0 &temp, const T1 src_ptr, const T2 &indices)
3326       {
3327         // case 2b)
3328         do_vectorized_gather(src_ptr, indices, temp);
3329       }
3330 
3331       template <typename T0, typename T1, typename T2>
3332       void
3333       function_3a(T0 &      temp_1,
3334                   T0 &      temp_2,
3335                   const T1 &src_ptr_1,
3336                   const T2 &src_ptr_2,
3337                   const T2 &grad_weight)
3338       {
3339         // case 3a)
3340         temp_1 = src_ptr_1;
3341         temp_2 = grad_weight * (temp_1 - src_ptr_2);
3342       }
3343 
3344       template <typename T1, typename T2>
3345       void
3346       function_3b(T1 &temp, const T2 &src_ptr)
3347       {
3348         // case 3b)
3349         temp = src_ptr;
3350       }
3351 
3352       template <typename T1>
3353       void
3354       function_5(const T1 &, const unsigned int)
3355       {
3356         // case 5)
3357       }
3358 
3359       template <typename T1>
3360       void
3361       function_0(T1 &temp1, const unsigned int comp)
3362       {
3363         const unsigned int dofs_per_face =
3364           fe_degree > -1 ?
3365             Utilities::pow(fe_degree + 1, dim - 1) :
3366             Utilities::pow(data.data.front().fe_degree + 1, dim - 1);
3367         const unsigned int n_q_points =
3368           fe_degree > -1 ? Utilities::pow(n_q_points_1d, dim - 1) :
3369                            data.n_q_points_face;
3370         if (fe_degree > -1 &&
3371             subface_index >= GeometryInfo<dim>::max_children_per_cell &&
3372             data.element_type <= MatrixFreeFunctions::tensor_symmetric)
3373           FEFaceEvaluationImpl<true,
3374                                dim,
3375                                fe_degree,
3376                                n_q_points_1d,
3377                                VectorizedArrayType>::
3378             evaluate_in_face(/* n_components */ 1,
3379                              data,
3380                              temp1,
3381                              values_quad + comp * n_q_points,
3382                              gradients_quad + comp * dim * n_q_points,
3383                              scratch_data + 2 * dofs_per_face,
3384                              do_values,
3385                              do_gradients,
3386                              subface_index);
3387         else
3388           FEFaceEvaluationImpl<false,
3389                                dim,
3390                                fe_degree,
3391                                n_q_points_1d,
3392                                VectorizedArrayType>::
3393             evaluate_in_face(/* n_components */ 1,
3394                              data,
3395                              temp1,
3396                              values_quad + comp * n_q_points,
3397                              gradients_quad + comp * dim * n_q_points,
3398                              scratch_data + 2 * dofs_per_face,
3399                              do_values,
3400                              do_gradients,
3401                              subface_index);
3402       }
3403 
3404       const unsigned int n_components;
3405       const bool         integrate;
3406       const Number2 *    global_vector_ptr;
3407       const MatrixFreeFunctions::ShapeInfo<VectorizedArrayType> &data;
3408       const MatrixFreeFunctions::DoFInfo &                       dof_info;
3409       VectorizedArrayType *                                      values_quad;
3410       VectorizedArrayType *                                      gradients_quad;
3411       VectorizedArrayType *                                      scratch_data;
3412       const bool                                                 do_values;
3413       const bool                                                 do_gradients;
3414       const unsigned int active_fe_index;
3415       const unsigned int first_selected_component;
3416       const std::array<unsigned int, VectorizedArrayType::size()> cells;
3417       const std::array<unsigned int, VectorizedArrayType::size()> face_nos;
3418       const unsigned int                                          subface_index;
3419       const MatrixFreeFunctions::DoFInfo::DoFAccessIndex dof_access_index;
3420       const std::array<unsigned int, VectorizedArrayType::size()>
3421                                     face_orientations;
3422       const Table<2, unsigned int> &orientation_map;
3423     };
3424   };
3425 
3426   template <int dim,
3427             typename Number,
3428             typename VectorizedArrayType,
3429             typename Number2 = Number>
3430   struct FEFaceEvaluationImplIntegrateScatterSelector
3431   {
3432     template <int fe_degree, int n_q_points_1d>
3433     static bool
3434     run(const unsigned int n_components,
3435         const unsigned int n_face_orientations,
3436         Number2 *          dst_ptr,
3437         const MatrixFreeFunctions::ShapeInfo<VectorizedArrayType> &data,
3438         const MatrixFreeFunctions::DoFInfo &                       dof_info,
3439         VectorizedArrayType *                                      values_array,
3440         VectorizedArrayType *                                      values_quad,
3441         VectorizedArrayType *gradients_quad,
3442         VectorizedArrayType *scratch_data,
3443         const bool           integrate_values,
3444         const bool           integrate_gradients,
3445         const unsigned int   active_fe_index,
3446         const unsigned int   first_selected_component,
3447         const std::array<unsigned int, VectorizedArrayType::size()> cells,
3448         const std::array<unsigned int, VectorizedArrayType::size()> face_nos,
3449         const unsigned int                                 subface_index,
3450         const MatrixFreeFunctions::DoFInfo::DoFAccessIndex dof_access_index,
3451         const std::array<unsigned int, VectorizedArrayType::size()>
3452                                       face_orientations,
3453         const Table<2, unsigned int> &orientation_map)
3454     {
3455       if (dst_ptr == nullptr)
3456         {
3457           AssertDimension(n_face_orientations, 1);
3458           AssertDimension(n_face_orientations, 1);
3459 
3460           // for block vectors simply integrate
3461           FEFaceEvaluationImplIntegrateSelector<dim, VectorizedArrayType>::
3462             template run<fe_degree, n_q_points_1d>(n_components,
3463                                                    data,
3464                                                    values_array,
3465                                                    values_quad,
3466                                                    gradients_quad,
3467                                                    scratch_data,
3468                                                    integrate_values,
3469                                                    integrate_gradients,
3470                                                    face_nos[0],
3471                                                    subface_index,
3472                                                    face_orientations[0],
3473                                                    orientation_map);
3474 
3475           // default vector access
3476           return false;
3477         }
3478 
3479 
3480       Processor<fe_degree, n_q_points_1d> p(values_array,
3481                                             n_components,
3482                                             true,
3483                                             dst_ptr,
3484                                             data,
3485                                             dof_info,
3486                                             values_quad,
3487                                             gradients_quad,
3488                                             scratch_data,
3489                                             integrate_values,
3490                                             integrate_gradients,
3491                                             active_fe_index,
3492                                             first_selected_component,
3493                                             cells,
3494                                             face_nos,
3495                                             subface_index,
3496                                             dof_access_index,
3497                                             face_orientations,
3498                                             orientation_map);
3499 
3500       if (n_face_orientations == VectorizedArrayType::size())
3501         return fe_face_evaluation_process_and_io<VectorizedArrayType::size()>(
3502           p);
3503       else
3504         return fe_face_evaluation_process_and_io<1>(p);
3505     }
3506 
3507   private:
3508     template <int fe_degree, int n_q_points_1d>
3509     struct Processor
3510     {
3511       static const int dim_           = dim;
3512       static const int fe_degree_     = fe_degree;
3513       static const int n_q_points_1d_ = n_q_points_1d;
3514       using VectorizedArrayType_      = VectorizedArrayType;
3515       using Number2_                  = Number2;
3516 
3517 
3518       Processor(
3519         VectorizedArrayType *values_array,
3520         const unsigned int   n_components,
3521         const bool           integrate,
3522         Number2 *            global_vector_ptr,
3523         const MatrixFreeFunctions::ShapeInfo<VectorizedArrayType> &data,
3524         const MatrixFreeFunctions::DoFInfo &                       dof_info,
3525         VectorizedArrayType *                                      values_quad,
3526         VectorizedArrayType *gradients_quad,
3527         VectorizedArrayType *scratch_data,
3528         const bool           do_values,
3529         const bool           do_gradients,
3530         const unsigned int   active_fe_index,
3531         const unsigned int   first_selected_component,
3532         const std::array<unsigned int, VectorizedArrayType::size()> cells,
3533         const std::array<unsigned int, VectorizedArrayType::size()> face_nos,
3534         const unsigned int                                 subface_index,
3535         const MatrixFreeFunctions::DoFInfo::DoFAccessIndex dof_access_index,
3536         const std::array<unsigned int, VectorizedArrayType::size()>
3537                                       face_orientations,
3538         const Table<2, unsigned int> &orientation_map)
3539         : values_array(values_array)
3540         , n_components(n_components)
3541         , integrate(integrate)
3542         , global_vector_ptr(global_vector_ptr)
3543         , data(data)
3544         , dof_info(dof_info)
3545         , values_quad(values_quad)
3546         , gradients_quad(gradients_quad)
3547         , scratch_data(scratch_data)
3548         , do_values(do_values)
3549         , do_gradients(do_gradients)
3550         , active_fe_index(active_fe_index)
3551         , first_selected_component(first_selected_component)
3552         , cells(cells)
3553         , face_nos(face_nos)
3554         , subface_index(subface_index)
3555         , dof_access_index(dof_access_index)
3556         , face_orientations(face_orientations)
3557         , orientation_map(orientation_map)
3558       {}
3559 
3560       template <typename T0, typename T1, typename T2, typename T3, typename T4>
3561       void
3562       function_1a(const T0 &temp_1,
3563                   const T1 &temp_2,
3564                   T2        dst_ptr_1,
3565                   T3        dst_ptr_2,
3566                   const T4 &grad_weight)
3567       {
3568         // case 1a)
3569         const VectorizedArrayType val  = temp_1 - grad_weight * temp_2;
3570         const VectorizedArrayType grad = grad_weight * temp_2;
3571         do_vectorized_add(val, dst_ptr_1);
3572         do_vectorized_add(grad, dst_ptr_2);
3573       }
3574 
3575       template <typename T0, typename T1>
3576       void
3577       function_1b(const T0 &temp, T1 dst_ptr)
3578       {
3579         // case 1b)
3580         do_vectorized_add(temp, dst_ptr);
3581       }
3582 
3583       template <typename T0, typename T1, typename T2, typename T3>
3584       void
3585       function_2a(const T0 &temp_1,
3586                   const T0 &temp_2,
3587                   T1        dst_ptr_1,
3588                   T1        dst_ptr_2,
3589                   const T2 &grad_weight,
3590                   const T3 &indices_1,
3591                   const T3 &indices_2)
3592       {
3593         // case 2a)
3594         const VectorizedArrayType val  = temp_1 - grad_weight * temp_2;
3595         const VectorizedArrayType grad = grad_weight * temp_2;
3596         do_vectorized_scatter_add(val, indices_1, dst_ptr_1);
3597         do_vectorized_scatter_add(grad, indices_2, dst_ptr_2);
3598       }
3599 
3600       template <typename T0, typename T1, typename T2>
3601       void
3602       function_2b(const T0 &temp, T1 dst_ptr, const T2 &indices)
3603       {
3604         // case 2b)
3605         do_vectorized_scatter_add(temp, indices, dst_ptr);
3606       }
3607 
3608       template <typename T0, typename T1, typename T2>
3609       void
3610       function_3a(const T0 &temp_1,
3611                   const T0 &temp_2,
3612                   T1 &      dst_ptr_1,
3613                   T1 &      dst_ptr_2,
3614                   const T2 &grad_weight)
3615       {
3616         // case 3a)
3617         const Number val  = temp_1 - grad_weight * temp_2;
3618         const Number grad = grad_weight * temp_2;
3619         dst_ptr_1 += val;
3620         dst_ptr_2 += grad;
3621       }
3622 
3623       template <typename T0, typename T1>
3624       void
3625       function_3b(const T0 &temp, T1 &dst_ptr)
3626       {
3627         // case 3b)
3628         dst_ptr += temp;
3629       }
3630 
3631       template <typename T0>
3632       void
3633       function_5(const T0 &temp1, const unsigned int comp)
3634       {
3635         // case 5: default vector access, must be handled separately, just do
3636         // the face-normal interpolation
3637 
3638         FEFaceNormalEvaluationImpl<dim, fe_degree, VectorizedArrayType>::
3639           template interpolate<false, false>(
3640             /* n_components */ 1,
3641             data,
3642             temp1,
3643             values_array + comp * data.dofs_per_component_on_cell,
3644             do_gradients,
3645             face_nos[0]);
3646       }
3647 
3648       template <typename T0>
3649       void
3650       function_0(T0 &temp1, const unsigned int comp)
3651       {
3652         const unsigned int dofs_per_face =
3653           fe_degree > -1 ?
3654             Utilities::pow(fe_degree + 1, dim - 1) :
3655             Utilities::pow(data.data.front().fe_degree + 1, dim - 1);
3656         const unsigned int n_q_points =
3657           fe_degree > -1 ? Utilities::pow(n_q_points_1d, dim - 1) :
3658                            data.n_q_points_face;
3659         if (fe_degree > -1 &&
3660             subface_index >= GeometryInfo<dim>::max_children_per_cell &&
3661             data.element_type <=
3662               internal::MatrixFreeFunctions::tensor_symmetric)
3663           internal::FEFaceEvaluationImpl<true,
3664                                          dim,
3665                                          fe_degree,
3666                                          n_q_points_1d,
3667                                          VectorizedArrayType>::
3668             integrate_in_face(/* n_components */ 1,
3669                               data,
3670                               temp1,
3671                               values_quad + comp * n_q_points,
3672                               gradients_quad + dim * comp * n_q_points,
3673                               scratch_data + 2 * dofs_per_face,
3674                               do_values,
3675                               do_gradients,
3676                               subface_index);
3677         else
3678           internal::FEFaceEvaluationImpl<false,
3679                                          dim,
3680                                          fe_degree,
3681                                          n_q_points_1d,
3682                                          VectorizedArrayType>::
3683             integrate_in_face(/* n_components */ 1,
3684                               data,
3685                               temp1,
3686                               values_quad + comp * n_q_points,
3687                               gradients_quad + dim * comp * n_q_points,
3688                               scratch_data + 2 * dofs_per_face,
3689                               do_values,
3690                               do_gradients,
3691                               subface_index);
3692       }
3693 
3694       VectorizedArrayType *values_array;
3695 
3696 
3697       const unsigned int n_components;
3698       const bool         integrate;
3699       Number2 *          global_vector_ptr;
3700       const MatrixFreeFunctions::ShapeInfo<VectorizedArrayType> &data;
3701       const MatrixFreeFunctions::DoFInfo &                       dof_info;
3702       VectorizedArrayType *                                      values_quad;
3703       VectorizedArrayType *                                      gradients_quad;
3704       VectorizedArrayType *                                      scratch_data;
3705       const bool                                                 do_values;
3706       const bool                                                 do_gradients;
3707       const unsigned int active_fe_index;
3708       const unsigned int first_selected_component;
3709       const std::array<unsigned int, VectorizedArrayType::size()> cells;
3710       const std::array<unsigned int, VectorizedArrayType::size()> face_nos;
3711       const unsigned int                                          subface_index;
3712       const MatrixFreeFunctions::DoFInfo::DoFAccessIndex dof_access_index;
3713       const std::array<unsigned int, VectorizedArrayType::size()>
3714                                     face_orientations;
3715       const Table<2, unsigned int> &orientation_map;
3716     };
3717   };
3718 
3719 
3720 
3721   /**
3722    * This struct implements the action of the inverse mass matrix operation
3723    * using an FEEvaluationBaseData argument.
3724    */
3725   template <int dim, typename Number>
3726   struct CellwiseInverseMassMatrixImplBasic
3727   {
3728     template <int fe_degree, int = 0>
3729     static bool
3730     run(const unsigned int                  n_components,
3731         const FEEvaluationBaseData<dim,
3732                                    typename Number::value_type,
3733                                    false,
3734                                    Number> &fe_eval,
3735         const Number *                      in_array,
3736         Number *                            out_array,
3737         typename std::enable_if<fe_degree != -1>::type * = nullptr)
3738     {
3739       constexpr unsigned int dofs_per_component =
3740         Utilities::pow(fe_degree + 1, dim);
3741 
3742       Assert(dim >= 1 || dim <= 3, ExcNotImplemented());
3743       Assert(fe_eval.get_shape_info().element_type <=
3744                MatrixFreeFunctions::tensor_symmetric,
3745              ExcNotImplemented());
3746 
3747       internal::EvaluatorTensorProduct<internal::evaluate_evenodd,
3748                                        dim,
3749                                        fe_degree + 1,
3750                                        fe_degree + 1,
3751                                        Number>
3752         evaluator(
3753           AlignedVector<Number>(),
3754           AlignedVector<Number>(),
3755           fe_eval.get_shape_info().data.front().inverse_shape_values_eo);
3756 
3757       for (unsigned int d = 0; d < n_components; ++d)
3758         {
3759           const Number *in  = in_array + d * dofs_per_component;
3760           Number *      out = out_array + d * dofs_per_component;
3761           // Need to select 'apply' method with hessian slot because values
3762           // assume symmetries that do not exist in the inverse shapes
3763           evaluator.template hessians<0, true, false>(in, out);
3764           if (dim > 1)
3765             evaluator.template hessians<1, true, false>(out, out);
3766           if (dim > 2)
3767             evaluator.template hessians<2, true, false>(out, out);
3768         }
3769       for (unsigned int q = 0; q < dofs_per_component; ++q)
3770         {
3771           const Number inverse_JxW_q = Number(1.) / fe_eval.JxW(q);
3772           for (unsigned int d = 0; d < n_components; ++d)
3773             out_array[q + d * dofs_per_component] *= inverse_JxW_q;
3774         }
3775       for (unsigned int d = 0; d < n_components; ++d)
3776         {
3777           Number *out = out_array + d * dofs_per_component;
3778           if (dim > 2)
3779             evaluator.template hessians<2, false, false>(out, out);
3780           if (dim > 1)
3781             evaluator.template hessians<1, false, false>(out, out);
3782           evaluator.template hessians<0, false, false>(out, out);
3783         }
3784       return false;
3785     }
3786 
3787     template <int fe_degree, int = 0>
3788     static bool
3789     run(const unsigned int                  n_components,
3790         const FEEvaluationBaseData<dim,
3791                                    typename Number::value_type,
3792                                    false,
3793                                    Number> &fe_eval,
3794         const Number *                      in_array,
3795         Number *                            out_array,
3796         typename std::enable_if<fe_degree == -1>::type * = nullptr)
3797     {
3798       static_assert(fe_degree == -1, "Only usable for degree -1");
3799       const unsigned int dofs_per_component =
3800         fe_eval.get_shape_info().dofs_per_component_on_cell;
3801 
3802       Assert(dim >= 1 || dim <= 3, ExcNotImplemented());
3803 
3804       internal::
3805         EvaluatorTensorProduct<internal::evaluate_general, dim, 0, 0, Number>
3806           evaluator(fe_eval.get_shape_info().data.front().inverse_shape_values,
3807                     AlignedVector<Number>(),
3808                     AlignedVector<Number>(),
3809                     fe_eval.get_shape_info().data.front().fe_degree + 1,
3810                     fe_eval.get_shape_info().data.front().fe_degree + 1);
3811 
3812       for (unsigned int d = 0; d < n_components; ++d)
3813         {
3814           const Number *in  = in_array + d * dofs_per_component;
3815           Number *      out = out_array + d * dofs_per_component;
3816           // Need to select 'apply' method with hessian slot because values
3817           // assume symmetries that do not exist in the inverse shapes
3818           evaluator.template values<0, true, false>(in, out);
3819           if (dim > 1)
3820             evaluator.template values<1, true, false>(out, out);
3821           if (dim > 2)
3822             evaluator.template values<2, true, false>(out, out);
3823         }
3824       for (unsigned int q = 0; q < dofs_per_component; ++q)
3825         {
3826           const Number inverse_JxW_q = Number(1.) / fe_eval.JxW(q);
3827           for (unsigned int d = 0; d < n_components; ++d)
3828             out_array[q + d * dofs_per_component] *= inverse_JxW_q;
3829         }
3830       for (unsigned int d = 0; d < n_components; ++d)
3831         {
3832           Number *out = out_array + d * dofs_per_component;
3833           if (dim > 2)
3834             evaluator.template values<2, false, false>(out, out);
3835           if (dim > 1)
3836             evaluator.template values<1, false, false>(out, out);
3837           evaluator.template values<0, false, false>(out, out);
3838         }
3839       return false;
3840     }
3841   };
3842 
3843 
3844 
3845   /**
3846    * This struct implements the action of the inverse mass matrix operation
3847    * using an FEEvaluationBaseData argument.
3848    */
3849   template <int dim, typename Number>
3850   struct CellwiseInverseMassMatrixImplFlexible
3851   {
3852     template <int fe_degree, int = 0>
3853     static bool
3854     run(const unsigned int           n_desired_components,
3855         const AlignedVector<Number> &inverse_shape,
3856         const AlignedVector<Number> &inverse_coefficients,
3857         const Number *               in_array,
3858         Number *                     out_array,
3859         typename std::enable_if<fe_degree != -1>::type * = nullptr)
3860     {
3861       constexpr unsigned int dofs_per_component =
3862         Utilities::pow(fe_degree + 1, dim);
3863       Assert(inverse_coefficients.size() > 0 &&
3864                inverse_coefficients.size() % dofs_per_component == 0,
3865              ExcMessage(
3866                "Expected diagonal to be a multiple of scalar dof per cells"));
3867       if (inverse_coefficients.size() != dofs_per_component)
3868         AssertDimension(n_desired_components * dofs_per_component,
3869                         inverse_coefficients.size());
3870 
3871       Assert(dim >= 1 || dim <= 3, ExcNotImplemented());
3872 
3873       internal::EvaluatorTensorProduct<internal::evaluate_evenodd,
3874                                        dim,
3875                                        fe_degree + 1,
3876                                        fe_degree + 1,
3877                                        Number>
3878         evaluator(AlignedVector<Number>(),
3879                   AlignedVector<Number>(),
3880                   inverse_shape);
3881 
3882       const unsigned int shift_coefficient =
3883         inverse_coefficients.size() > dofs_per_component ? dofs_per_component :
3884                                                            0;
3885       const Number *inv_coefficient = inverse_coefficients.data();
3886       for (unsigned int d = 0; d < n_desired_components; ++d)
3887         {
3888           const Number *in  = in_array + d * dofs_per_component;
3889           Number *      out = out_array + d * dofs_per_component;
3890           // Need to select 'apply' method with hessian slot because values
3891           // assume symmetries that do not exist in the inverse shapes
3892           evaluator.template hessians<0, true, false>(in, out);
3893           if (dim > 1)
3894             evaluator.template hessians<1, true, false>(out, out);
3895           if (dim > 2)
3896             evaluator.template hessians<2, true, false>(out, out);
3897 
3898           for (unsigned int q = 0; q < dofs_per_component; ++q)
3899             out[q] *= inv_coefficient[q];
3900 
3901           if (dim > 2)
3902             evaluator.template hessians<2, false, false>(out, out);
3903           if (dim > 1)
3904             evaluator.template hessians<1, false, false>(out, out);
3905           evaluator.template hessians<0, false, false>(out, out);
3906 
3907           inv_coefficient += shift_coefficient;
3908         }
3909       return false;
3910     }
3911 
3912     /**
3913      * Version for degree = -1
3914      */
3915     template <int fe_degree, int = 0>
3916     static bool
3917     run(const unsigned int,
3918         const AlignedVector<Number> &,
3919         const AlignedVector<Number> &,
3920         const Number *,
3921         Number *,
3922         typename std::enable_if<fe_degree == -1>::type * = nullptr)
3923     {
3924       static_assert(fe_degree == -1, "Only usable for degree -1");
3925       Assert(false, ExcNotImplemented());
3926       return false;
3927     }
3928   };
3929 
3930 
3931 
3932   /**
3933    * This struct implements the action of the inverse mass matrix operation
3934    * using an FEEvaluationBaseData argument.
3935    */
3936   template <int dim, typename Number>
3937   struct CellwiseInverseMassMatrixImplTransformFromQPoints
3938   {
3939     template <int fe_degree, int = 0>
3940     static bool
3941     run(const unsigned int           n_desired_components,
3942         const AlignedVector<Number> &inverse_shape,
3943         const Number *               in_array,
3944         Number *                     out_array,
3945         typename std::enable_if<fe_degree != -1>::type * = nullptr)
3946     {
3947       constexpr unsigned int dofs_per_cell = Utilities::pow(fe_degree + 1, dim);
3948       internal::EvaluatorTensorProduct<internal::evaluate_evenodd,
3949                                        dim,
3950                                        fe_degree + 1,
3951                                        fe_degree + 1,
3952                                        Number>
3953         evaluator(AlignedVector<Number>(),
3954                   AlignedVector<Number>(),
3955                   inverse_shape);
3956 
3957       for (unsigned int d = 0; d < n_desired_components; ++d)
3958         {
3959           const Number *in  = in_array + d * dofs_per_cell;
3960           Number *      out = out_array + d * dofs_per_cell;
3961 
3962           if (dim == 3)
3963             {
3964               evaluator.template hessians<2, false, false>(in, out);
3965               evaluator.template hessians<1, false, false>(out, out);
3966               evaluator.template hessians<0, false, false>(out, out);
3967             }
3968           if (dim == 2)
3969             {
3970               evaluator.template hessians<1, false, false>(in, out);
3971               evaluator.template hessians<0, false, false>(out, out);
3972             }
3973           if (dim == 1)
3974             evaluator.template hessians<0, false, false>(in, out);
3975         }
3976       return false;
3977     }
3978 
3979     template <int fe_degree, int = 0>
3980     static bool
3981     run(const unsigned int,
3982         const AlignedVector<Number> &,
3983         const Number *,
3984         Number *,
3985         typename std::enable_if<fe_degree == -1>::type * = nullptr)
3986     {
3987       static_assert(fe_degree == -1, "Only usable for degree -1");
3988       Assert(false, ExcNotImplemented());
3989       return false;
3990     }
3991   };
3992 
3993 } // end of namespace internal
3994 
3995 
3996 DEAL_II_NAMESPACE_CLOSE
3997 
3998 #endif
3999