1 // --------------------------------------------------------------------- 2 // 3 // Copyright (C) 2017 - 2020 by the deal.II authors 4 // 5 // This file is part of the deal.II library. 6 // 7 // The deal.II library is free software; you can use it, redistribute 8 // it, and/or modify it under the terms of the GNU Lesser General 9 // Public License as published by the Free Software Foundation; either 10 // version 2.1 of the License, or (at your option) any later version. 11 // The full text of the license can be found in the file LICENSE.md at 12 // the top level directory of deal.II. 13 // 14 // --------------------------------------------------------------------- 15 16 17 #ifndef dealii_matrix_free_evaluation_kernels_h 18 #define dealii_matrix_free_evaluation_kernels_h 19 20 #include <deal.II/base/config.h> 21 22 #include <deal.II/base/utilities.h> 23 #include <deal.II/base/vectorization.h> 24 25 #include <deal.II/matrix_free/dof_info.h> 26 #include <deal.II/matrix_free/evaluation_flags.h> 27 #include <deal.II/matrix_free/shape_info.h> 28 #include <deal.II/matrix_free/tensor_product_kernels.h> 29 #include <deal.II/matrix_free/type_traits.h> 30 31 32 DEAL_II_NAMESPACE_OPEN 33 34 35 // forward declaration 36 template <int, typename, bool, typename> 37 class FEEvaluationBaseData; 38 39 40 41 namespace internal 42 { 43 // Select evaluator type from element shape function type 44 template <MatrixFreeFunctions::ElementType element, bool is_long> 45 struct EvaluatorSelector 46 {}; 47 48 template <bool is_long> 49 struct EvaluatorSelector<MatrixFreeFunctions::tensor_general, is_long> 50 { 51 static const EvaluatorVariant variant = evaluate_general; 52 }; 53 54 template <> 55 struct EvaluatorSelector<MatrixFreeFunctions::tensor_symmetric, false> 56 { 57 static const EvaluatorVariant variant = evaluate_symmetric; 58 }; 59 60 template <> 61 struct EvaluatorSelector<MatrixFreeFunctions::tensor_symmetric, true> 62 { 63 static const EvaluatorVariant variant = evaluate_evenodd; 64 }; 65 66 template <bool is_long> 67 struct EvaluatorSelector<MatrixFreeFunctions::truncated_tensor, is_long> 68 { 69 static const EvaluatorVariant variant = evaluate_general; 70 }; 71 72 template <> 73 struct EvaluatorSelector<MatrixFreeFunctions::tensor_symmetric_plus_dg0, 74 false> 75 { 76 static const EvaluatorVariant variant = evaluate_general; 77 }; 78 79 template <> 80 struct EvaluatorSelector<MatrixFreeFunctions::tensor_symmetric_plus_dg0, true> 81 { 82 static const EvaluatorVariant variant = evaluate_evenodd; 83 }; 84 85 template <bool is_long> 86 struct EvaluatorSelector<MatrixFreeFunctions::tensor_symmetric_collocation, 87 is_long> 88 { 89 static const EvaluatorVariant variant = evaluate_evenodd; 90 }; 91 92 93 94 /** 95 * This struct performs the evaluation of function values, gradients and 96 * Hessians for tensor-product finite elements. The operation is used for 97 * both the symmetric and non-symmetric case, which use different apply 98 * functions 'values', 'gradients' in the individual coordinate 99 * directions. The apply functions for values are provided through one of 100 * the template classes EvaluatorTensorProduct which in turn are selected 101 * from the MatrixFreeFunctions::ElementType template argument. 102 * 103 * There are two specialized implementation classes 104 * FEEvaluationImplCollocation (for Gauss-Lobatto elements where the nodal 105 * points and the quadrature points coincide and the 'values' operation is 106 * identity) and FEEvaluationImplTransformToCollocation (which can be 107 * transformed to a collocation space and can then use the identity in these 108 * spaces), which both allow for shorter code. 109 */ 110 template <MatrixFreeFunctions::ElementType type, 111 int dim, 112 int fe_degree, 113 int n_q_points_1d, 114 typename Number> 115 struct FEEvaluationImpl 116 { 117 static void 118 evaluate(const unsigned int n_components, 119 const EvaluationFlags::EvaluationFlags evaluation_flag, 120 const MatrixFreeFunctions::ShapeInfo<Number> &shape_info, 121 const Number * values_dofs_actual, 122 Number * values_quad, 123 Number * gradients_quad, 124 Number * hessians_quad, 125 Number * scratch_data); 126 127 static void 128 integrate(const unsigned int n_components, 129 const EvaluationFlags::EvaluationFlags integration_flag, 130 const MatrixFreeFunctions::ShapeInfo<Number> &shape_info, 131 Number * values_dofs_actual, 132 Number * values_quad, 133 Number * gradients_quad, 134 Number * scratch_data, 135 const bool add_into_values_array); 136 }; 137 138 139 140 template <MatrixFreeFunctions::ElementType type, 141 int dim, 142 int fe_degree, 143 int n_q_points_1d, 144 typename Number> 145 inline void 146 FEEvaluationImpl<type, dim, fe_degree, n_q_points_1d, Number>::evaluate( 147 const unsigned int n_components, 148 const EvaluationFlags::EvaluationFlags evaluation_flag, 149 const MatrixFreeFunctions::ShapeInfo<Number> &shape_info, 150 const Number * values_dofs_actual, 151 Number * values_quad, 152 Number * gradients_quad, 153 Number * hessians_quad, 154 Number * scratch_data) 155 { 156 if (evaluation_flag == EvaluationFlags::nothing) 157 return; 158 159 const EvaluatorVariant variant = 160 EvaluatorSelector<type, (fe_degree + n_q_points_1d > 4)>::variant; 161 using Eval = EvaluatorTensorProduct<variant, 162 dim, 163 fe_degree + 1, 164 n_q_points_1d, 165 Number>; 166 Eval eval(variant == evaluate_evenodd ? 167 shape_info.data.front().shape_values_eo : 168 shape_info.data.front().shape_values, 169 variant == evaluate_evenodd ? 170 shape_info.data.front().shape_gradients_eo : 171 shape_info.data.front().shape_gradients, 172 variant == evaluate_evenodd ? 173 shape_info.data.front().shape_hessians_eo : 174 shape_info.data.front().shape_hessians, 175 shape_info.data.front().fe_degree + 1, 176 shape_info.data.front().n_q_points_1d); 177 178 const unsigned int temp_size = 179 Eval::n_rows_of_product == numbers::invalid_unsigned_int ? 180 0 : 181 (Eval::n_rows_of_product > Eval::n_columns_of_product ? 182 Eval::n_rows_of_product : 183 Eval::n_columns_of_product); 184 Number *temp1 = scratch_data; 185 Number *temp2; 186 if (temp_size == 0) 187 { 188 temp2 = temp1 + std::max(Utilities::fixed_power<dim>( 189 shape_info.data.front().fe_degree + 1), 190 Utilities::fixed_power<dim>( 191 shape_info.data.front().n_q_points_1d)); 192 } 193 else 194 { 195 temp2 = temp1 + temp_size; 196 } 197 198 const unsigned int n_q_points = 199 temp_size == 0 ? shape_info.n_q_points : Eval::n_columns_of_product; 200 const unsigned int dofs_per_comp = 201 (type == MatrixFreeFunctions::truncated_tensor) ? 202 Utilities::fixed_power<dim>(shape_info.data.front().fe_degree + 1) : 203 shape_info.dofs_per_component_on_cell; 204 const Number *values_dofs = values_dofs_actual; 205 if (type == MatrixFreeFunctions::truncated_tensor) 206 { 207 Number *values_dofs_tmp = 208 scratch_data + 2 * (std::max(shape_info.dofs_per_component_on_cell, 209 shape_info.n_q_points)); 210 const int degree = 211 fe_degree != -1 ? fe_degree : shape_info.data.front().fe_degree; 212 for (unsigned int c = 0; c < n_components; ++c) 213 for (int i = 0, count_p = 0, count_q = 0; 214 i < (dim > 2 ? degree + 1 : 1); 215 ++i) 216 { 217 for (int j = 0; j < (dim > 1 ? degree + 1 - i : 1); ++j) 218 { 219 for (int k = 0; k < degree + 1 - j - i; 220 ++k, ++count_p, ++count_q) 221 values_dofs_tmp[c * dofs_per_comp + count_q] = 222 values_dofs_actual 223 [c * shape_info.dofs_per_component_on_cell + count_p]; 224 for (int k = degree + 1 - j - i; k < degree + 1; 225 ++k, ++count_q) 226 values_dofs_tmp[c * dofs_per_comp + count_q] = Number(); 227 } 228 for (int j = degree + 1 - i; j < degree + 1; ++j) 229 for (int k = 0; k < degree + 1; ++k, ++count_q) 230 values_dofs_tmp[c * dofs_per_comp + count_q] = Number(); 231 } 232 values_dofs = values_dofs_tmp; 233 } 234 235 switch (dim) 236 { 237 case 1: 238 for (unsigned int c = 0; c < n_components; c++) 239 { 240 if (evaluation_flag & EvaluationFlags::values) 241 eval.template values<0, true, false>(values_dofs, values_quad); 242 if (evaluation_flag & EvaluationFlags::gradients) 243 eval.template gradients<0, true, false>(values_dofs, 244 gradients_quad); 245 if (evaluation_flag & EvaluationFlags::hessians) 246 eval.template hessians<0, true, false>(values_dofs, 247 hessians_quad); 248 249 // advance the next component in 1D array 250 values_dofs += dofs_per_comp; 251 values_quad += n_q_points; 252 gradients_quad += n_q_points; 253 hessians_quad += n_q_points; 254 } 255 break; 256 257 case 2: 258 for (unsigned int c = 0; c < n_components; c++) 259 { 260 // grad x 261 if (evaluation_flag & EvaluationFlags::gradients) 262 { 263 eval.template gradients<0, true, false>(values_dofs, temp1); 264 eval.template values<1, true, false>(temp1, gradients_quad); 265 } 266 if (evaluation_flag & EvaluationFlags::hessians) 267 { 268 // grad xy 269 if (!(evaluation_flag & EvaluationFlags::gradients)) 270 eval.template gradients<0, true, false>(values_dofs, temp1); 271 eval.template gradients<1, true, false>(temp1, 272 hessians_quad + 273 2 * n_q_points); 274 275 // grad xx 276 eval.template hessians<0, true, false>(values_dofs, temp1); 277 eval.template values<1, true, false>(temp1, hessians_quad); 278 } 279 280 // grad y 281 eval.template values<0, true, false>(values_dofs, temp1); 282 if (evaluation_flag & EvaluationFlags::gradients) 283 eval.template gradients<1, true, false>(temp1, 284 gradients_quad + 285 n_q_points); 286 287 // grad yy 288 if (evaluation_flag & EvaluationFlags::hessians) 289 eval.template hessians<1, true, false>(temp1, 290 hessians_quad + 291 n_q_points); 292 293 // val: can use values applied in x 294 if (evaluation_flag & EvaluationFlags::values) 295 eval.template values<1, true, false>(temp1, values_quad); 296 297 // advance to the next component in 1D array 298 values_dofs += dofs_per_comp; 299 values_quad += n_q_points; 300 gradients_quad += 2 * n_q_points; 301 hessians_quad += 3 * n_q_points; 302 } 303 break; 304 305 case 3: 306 for (unsigned int c = 0; c < n_components; c++) 307 { 308 if (evaluation_flag & EvaluationFlags::gradients) 309 { 310 // grad x 311 eval.template gradients<0, true, false>(values_dofs, temp1); 312 eval.template values<1, true, false>(temp1, temp2); 313 eval.template values<2, true, false>(temp2, gradients_quad); 314 } 315 316 if (evaluation_flag & EvaluationFlags::hessians) 317 { 318 // grad xz 319 if (!(evaluation_flag & EvaluationFlags::gradients)) 320 { 321 eval.template gradients<0, true, false>(values_dofs, 322 temp1); 323 eval.template values<1, true, false>(temp1, temp2); 324 } 325 eval.template gradients<2, true, false>(temp2, 326 hessians_quad + 327 4 * n_q_points); 328 329 // grad xy 330 eval.template gradients<1, true, false>(temp1, temp2); 331 eval.template values<2, true, false>(temp2, 332 hessians_quad + 333 3 * n_q_points); 334 335 // grad xx 336 eval.template hessians<0, true, false>(values_dofs, temp1); 337 eval.template values<1, true, false>(temp1, temp2); 338 eval.template values<2, true, false>(temp2, hessians_quad); 339 } 340 341 // grad y 342 eval.template values<0, true, false>(values_dofs, temp1); 343 if (evaluation_flag & EvaluationFlags::gradients) 344 { 345 eval.template gradients<1, true, false>(temp1, temp2); 346 eval.template values<2, true, false>(temp2, 347 gradients_quad + 348 n_q_points); 349 } 350 351 if (evaluation_flag & EvaluationFlags::hessians) 352 { 353 // grad yz 354 if (!(evaluation_flag & EvaluationFlags::gradients)) 355 eval.template gradients<1, true, false>(temp1, temp2); 356 eval.template gradients<2, true, false>(temp2, 357 hessians_quad + 358 5 * n_q_points); 359 360 // grad yy 361 eval.template hessians<1, true, false>(temp1, temp2); 362 eval.template values<2, true, false>(temp2, 363 hessians_quad + 364 n_q_points); 365 } 366 367 // grad z: can use the values applied in x direction stored in 368 // temp1 369 eval.template values<1, true, false>(temp1, temp2); 370 if (evaluation_flag & EvaluationFlags::gradients) 371 eval.template gradients<2, true, false>(temp2, 372 gradients_quad + 373 2 * n_q_points); 374 375 // grad zz: can use the values applied in x and y direction stored 376 // in temp2 377 if (evaluation_flag & EvaluationFlags::hessians) 378 eval.template hessians<2, true, false>(temp2, 379 hessians_quad + 380 2 * n_q_points); 381 382 // val: can use the values applied in x & y direction stored in 383 // temp2 384 if (evaluation_flag & EvaluationFlags::values) 385 eval.template values<2, true, false>(temp2, values_quad); 386 387 // advance to the next component in 1D array 388 values_dofs += dofs_per_comp; 389 values_quad += n_q_points; 390 gradients_quad += 3 * n_q_points; 391 hessians_quad += 6 * n_q_points; 392 } 393 break; 394 395 default: 396 AssertThrow(false, ExcNotImplemented()); 397 } 398 399 // case additional dof for FE_Q_DG0: add values; gradients and second 400 // derivatives evaluate to zero 401 if (type == MatrixFreeFunctions::tensor_symmetric_plus_dg0 && 402 (evaluation_flag & EvaluationFlags::values)) 403 { 404 values_quad -= n_components * n_q_points; 405 values_dofs -= n_components * dofs_per_comp; 406 for (unsigned int c = 0; c < n_components; ++c) 407 for (unsigned int q = 0; q < shape_info.n_q_points; ++q) 408 values_quad[c * shape_info.n_q_points + q] += 409 values_dofs[(c + 1) * shape_info.dofs_per_component_on_cell - 1]; 410 } 411 } 412 413 414 415 template <MatrixFreeFunctions::ElementType type, 416 int dim, 417 int fe_degree, 418 int n_q_points_1d, 419 typename Number> 420 inline void 421 FEEvaluationImpl<type, dim, fe_degree, n_q_points_1d, Number>::integrate( 422 const unsigned int n_components, 423 const EvaluationFlags::EvaluationFlags integration_flag, 424 const MatrixFreeFunctions::ShapeInfo<Number> &shape_info, 425 Number * values_dofs_actual, 426 Number * values_quad, 427 Number * gradients_quad, 428 Number * scratch_data, 429 const bool add_into_values_array) 430 { 431 const EvaluatorVariant variant = 432 EvaluatorSelector<type, (fe_degree + n_q_points_1d > 4)>::variant; 433 using Eval = EvaluatorTensorProduct<variant, 434 dim, 435 fe_degree + 1, 436 n_q_points_1d, 437 Number>; 438 Eval eval(variant == evaluate_evenodd ? 439 shape_info.data.front().shape_values_eo : 440 shape_info.data.front().shape_values, 441 variant == evaluate_evenodd ? 442 shape_info.data.front().shape_gradients_eo : 443 shape_info.data.front().shape_gradients, 444 variant == evaluate_evenodd ? 445 shape_info.data.front().shape_hessians_eo : 446 shape_info.data.front().shape_hessians, 447 shape_info.data.front().fe_degree + 1, 448 shape_info.data.front().n_q_points_1d); 449 450 const unsigned int temp_size = 451 Eval::n_rows_of_product == numbers::invalid_unsigned_int ? 452 0 : 453 (Eval::n_rows_of_product > Eval::n_columns_of_product ? 454 Eval::n_rows_of_product : 455 Eval::n_columns_of_product); 456 Number *temp1 = scratch_data; 457 Number *temp2; 458 if (temp_size == 0) 459 { 460 temp2 = temp1 + std::max(Utilities::fixed_power<dim>( 461 shape_info.data.front().fe_degree + 1), 462 Utilities::fixed_power<dim>( 463 shape_info.data.front().n_q_points_1d)); 464 } 465 else 466 { 467 temp2 = temp1 + temp_size; 468 } 469 470 const unsigned int n_q_points = 471 temp_size == 0 ? shape_info.n_q_points : Eval::n_columns_of_product; 472 const unsigned int dofs_per_comp = 473 (type == MatrixFreeFunctions::truncated_tensor) ? 474 Utilities::fixed_power<dim>(shape_info.data.front().fe_degree + 1) : 475 shape_info.dofs_per_component_on_cell; 476 // expand dof_values to tensor product for truncated tensor products 477 Number *values_dofs = 478 (type == MatrixFreeFunctions::truncated_tensor) ? 479 scratch_data + 2 * (std::max(shape_info.dofs_per_component_on_cell, 480 shape_info.n_q_points)) : 481 values_dofs_actual; 482 483 switch (dim) 484 { 485 case 1: 486 for (unsigned int c = 0; c < n_components; c++) 487 { 488 if (integration_flag & EvaluationFlags::values) 489 { 490 if (add_into_values_array == false) 491 eval.template values<0, false, false>(values_quad, 492 values_dofs); 493 else 494 eval.template values<0, false, true>(values_quad, 495 values_dofs); 496 } 497 if (integration_flag & EvaluationFlags::gradients) 498 { 499 if (integration_flag & EvaluationFlags::values || 500 add_into_values_array == true) 501 eval.template gradients<0, false, true>(gradients_quad, 502 values_dofs); 503 else 504 eval.template gradients<0, false, false>(gradients_quad, 505 values_dofs); 506 } 507 508 // advance to the next component in 1D array 509 values_dofs += dofs_per_comp; 510 values_quad += n_q_points; 511 gradients_quad += n_q_points; 512 } 513 break; 514 515 case 2: 516 for (unsigned int c = 0; c < n_components; c++) 517 { 518 if ((integration_flag & EvaluationFlags::values) && 519 !(integration_flag & EvaluationFlags::gradients)) 520 { 521 eval.template values<1, false, false>(values_quad, temp1); 522 if (add_into_values_array == false) 523 eval.template values<0, false, false>(temp1, values_dofs); 524 else 525 eval.template values<0, false, true>(temp1, values_dofs); 526 } 527 if (integration_flag & EvaluationFlags::gradients) 528 { 529 eval.template gradients<1, false, false>(gradients_quad + 530 n_q_points, 531 temp1); 532 if (integration_flag & EvaluationFlags::values) 533 eval.template values<1, false, true>(values_quad, temp1); 534 if (add_into_values_array == false) 535 eval.template values<0, false, false>(temp1, values_dofs); 536 else 537 eval.template values<0, false, true>(temp1, values_dofs); 538 eval.template values<1, false, false>(gradients_quad, temp1); 539 eval.template gradients<0, false, true>(temp1, values_dofs); 540 } 541 542 // advance to the next component in 1D array 543 values_dofs += dofs_per_comp; 544 values_quad += n_q_points; 545 gradients_quad += 2 * n_q_points; 546 } 547 break; 548 549 case 3: 550 for (unsigned int c = 0; c < n_components; c++) 551 { 552 if ((integration_flag & EvaluationFlags::values) && 553 !(integration_flag & EvaluationFlags::gradients)) 554 { 555 eval.template values<2, false, false>(values_quad, temp1); 556 eval.template values<1, false, false>(temp1, temp2); 557 if (add_into_values_array == false) 558 eval.template values<0, false, false>(temp2, values_dofs); 559 else 560 eval.template values<0, false, true>(temp2, values_dofs); 561 } 562 if (integration_flag & EvaluationFlags::gradients) 563 { 564 eval.template gradients<2, false, false>(gradients_quad + 565 2 * n_q_points, 566 temp1); 567 if (integration_flag & EvaluationFlags::values) 568 eval.template values<2, false, true>(values_quad, temp1); 569 eval.template values<1, false, false>(temp1, temp2); 570 eval.template values<2, false, false>(gradients_quad + 571 n_q_points, 572 temp1); 573 eval.template gradients<1, false, true>(temp1, temp2); 574 if (add_into_values_array == false) 575 eval.template values<0, false, false>(temp2, values_dofs); 576 else 577 eval.template values<0, false, true>(temp2, values_dofs); 578 eval.template values<2, false, false>(gradients_quad, temp1); 579 eval.template values<1, false, false>(temp1, temp2); 580 eval.template gradients<0, false, true>(temp2, values_dofs); 581 } 582 583 // advance to the next component in 1D array 584 values_dofs += dofs_per_comp; 585 values_quad += n_q_points; 586 gradients_quad += 3 * n_q_points; 587 } 588 break; 589 590 default: 591 AssertThrow(false, ExcNotImplemented()); 592 } 593 594 // case FE_Q_DG0: add values, gradients and second derivatives are zero 595 if (type == MatrixFreeFunctions::tensor_symmetric_plus_dg0) 596 { 597 values_dofs -= n_components * dofs_per_comp - 598 shape_info.dofs_per_component_on_cell + 1; 599 values_quad -= n_components * n_q_points; 600 if (integration_flag & EvaluationFlags::values) 601 for (unsigned int c = 0; c < n_components; ++c) 602 { 603 values_dofs[0] = values_quad[0]; 604 for (unsigned int q = 1; q < shape_info.n_q_points; ++q) 605 values_dofs[0] += values_quad[q]; 606 values_dofs += dofs_per_comp; 607 values_quad += n_q_points; 608 } 609 else 610 { 611 for (unsigned int c = 0; c < n_components; ++c) 612 values_dofs[c * shape_info.dofs_per_component_on_cell] = Number(); 613 values_dofs += n_components * shape_info.dofs_per_component_on_cell; 614 } 615 } 616 617 if (type == MatrixFreeFunctions::truncated_tensor) 618 { 619 values_dofs -= dofs_per_comp * n_components; 620 const int degree = 621 fe_degree != -1 ? fe_degree : shape_info.data.front().fe_degree; 622 for (unsigned int c = 0; c < n_components; ++c) 623 for (int i = 0, count_p = 0, count_q = 0; 624 i < (dim > 2 ? degree + 1 : 1); 625 ++i) 626 { 627 for (int j = 0; j < (dim > 1 ? degree + 1 - i : 1); ++j) 628 { 629 for (int k = 0; k < degree + 1 - j - i; 630 ++k, ++count_p, ++count_q) 631 values_dofs_actual[c * 632 shape_info.dofs_per_component_on_cell + 633 count_p] = 634 values_dofs[c * dofs_per_comp + count_q]; 635 count_q += j + i; 636 } 637 count_q += i * (degree + 1); 638 } 639 } 640 } 641 642 643 644 /** 645 * This struct implements the change between two different bases. This is an 646 * ingredient in the FEEvaluationImplTransformToCollocation class where we 647 * first transform to the appropriate basis where we can compute the 648 * derivative through collocation techniques. 649 * 650 * This class allows for dimension-independent application of the operation, 651 * implemented by template recursion. It has been tested up to 6D. 652 */ 653 template <EvaluatorVariant variant, 654 EvaluatorQuantity quantity, 655 int dim, 656 int basis_size_1, 657 int basis_size_2, 658 typename Number, 659 typename Number2> 660 struct FEEvaluationImplBasisChange 661 { 662 static_assert(basis_size_1 == 0 || basis_size_1 <= basis_size_2, 663 "The second dimension must not be smaller than the first"); 664 665 /** 666 * This applies the transformation that contracts over the rows of the 667 * coefficient array, generating values along the columns of the 668 * coefficient array. 669 * 670 * @param transformation_matrix The coefficient matrix handed in as a 671 * vector, using @p basis_size_1 rows and @p basis_size_2 672 * columns if interpreted as a matrix. 673 * @param values_in The array of the input of size basis_size_1^dim. It 674 * may alias with values_out 675 * @param values_out The array of size basis_size_2^dim where the results 676 * of the transformation are stored. It may alias with 677 * the values_in array. 678 * @param basis_size_1_variable In case the template argument basis_size_1 679 * is zero, the size of the first basis can alternatively be passed in as a 680 * run time argument. The template argument takes precedence in case it is 681 * nonzero for efficiency reasons. 682 * @param basis_size_2_variable In case the template argument basis_size_1 683 * is zero, the size of the second basis can alternatively be passed in as a 684 * run time argument. 685 */ 686 #ifndef DEBUG 687 DEAL_II_ALWAYS_INLINE 688 #endif 689 static void 690 do_forward( 691 const unsigned int n_components, 692 const AlignedVector<Number2> &transformation_matrix, 693 const Number * values_in, 694 Number * values_out, 695 const unsigned int basis_size_1_variable = numbers::invalid_unsigned_int, 696 const unsigned int basis_size_2_variable = numbers::invalid_unsigned_int) 697 { 698 Assert( 699 basis_size_1 != 0 || basis_size_1_variable <= basis_size_2_variable, 700 ExcMessage("The second dimension must not be smaller than the first")); 701 702 Assert(quantity == EvaluatorQuantity::value, ExcInternalError()); 703 704 // we do recursion until dim==1 or dim==2 and we have 705 // basis_size_1==basis_size_2. The latter optimization increases 706 // optimization possibilities for the compiler but does only work for 707 // aliased pointers if the sizes are equal. 708 constexpr int next_dim = 709 (dim > 2 || 710 ((basis_size_1 == 0 || basis_size_2 > basis_size_1) && dim > 1)) ? 711 dim - 1 : 712 dim; 713 714 EvaluatorTensorProduct<variant, 715 dim, 716 basis_size_1, 717 (basis_size_1 == 0 ? 0 : basis_size_2), 718 Number, 719 Number2> 720 eval_val(transformation_matrix, 721 AlignedVector<Number2>(), 722 AlignedVector<Number2>(), 723 basis_size_1_variable, 724 basis_size_2_variable); 725 const unsigned int np_1 = 726 basis_size_1 > 0 ? basis_size_1 : basis_size_1_variable; 727 const unsigned int np_2 = 728 basis_size_1 > 0 ? basis_size_2 : basis_size_2_variable; 729 Assert(np_1 > 0 && np_1 != numbers::invalid_unsigned_int, 730 ExcMessage("Cannot transform with 0-point basis")); 731 Assert(np_2 > 0 && np_2 != numbers::invalid_unsigned_int, 732 ExcMessage("Cannot transform with 0-point basis")); 733 734 // run loop backwards to ensure correctness if values_in aliases with 735 // values_out in case with basis_size_1 < basis_size_2 736 values_in = values_in + n_components * Utilities::fixed_power<dim>(np_1); 737 values_out = 738 values_out + n_components * Utilities::fixed_power<dim>(np_2); 739 for (unsigned int c = n_components; c != 0; --c) 740 { 741 values_in -= Utilities::fixed_power<dim>(np_1); 742 values_out -= Utilities::fixed_power<dim>(np_2); 743 if (next_dim < dim) 744 for (unsigned int q = np_1; q != 0; --q) 745 FEEvaluationImplBasisChange< 746 variant, 747 quantity, 748 next_dim, 749 basis_size_1, 750 basis_size_2, 751 Number, 752 Number2>::do_forward(1, 753 transformation_matrix, 754 values_in + 755 (q - 1) * 756 Utilities::fixed_power<next_dim>(np_1), 757 values_out + 758 (q - 1) * 759 Utilities::fixed_power<next_dim>(np_2), 760 basis_size_1_variable, 761 basis_size_2_variable); 762 763 // the recursion stops if dim==1 or if dim==2 and 764 // basis_size_1==basis_size_2 (the latter is used because the 765 // compiler generates nicer code) 766 if (basis_size_1 > 0 && basis_size_2 == basis_size_1 && dim == 2) 767 { 768 eval_val.template values<0, true, false>(values_in, values_out); 769 eval_val.template values<1, true, false>(values_out, values_out); 770 } 771 else if (dim == 1) 772 eval_val.template values<dim - 1, true, false>(values_in, 773 values_out); 774 else 775 eval_val.template values<dim - 1, true, false>(values_out, 776 values_out); 777 } 778 } 779 780 /** 781 * This applies the transformation that contracts over the columns of the 782 * coefficient array, generating values along the rows of the coefficient 783 * array. 784 * 785 * @param transformation_matrix The coefficient matrix handed in as a 786 * vector, using @p basis_size_1 rows and @p basis_size_2 787 * columns if interpreted as a matrix. 788 * @param add_into_result Define whether the result should be added into the 789 * array @p values_out (if true) or overwrite the 790 * previous content. The result is undefined in case 791 * values_in and values_out point to the same array and 792 * @p add_into_result is true, in which case an 793 * exception is thrown. 794 * @param values_in The array of the input of size basis_size_2^dim. It 795 * may alias with values_out. Note that the previous 796 * content of @p values_in is overwritten within the 797 * function. 798 * @param values_out The array of size basis_size_1^dim where the results 799 * of the transformation are stored. It may alias with 800 * the @p values_in array. 801 * @param basis_size_1_variable In case the template argument basis_size_1 802 * is zero, the size of the first basis can alternatively be passed in as a 803 * run time argument. The template argument takes precedence in case it is 804 * nonzero for efficiency reasons. 805 * @param basis_size_2_variable In case the template argument basis_size_1 806 * is zero, the size of the second basis can alternatively be passed in as a 807 * run time argument. 808 */ 809 #ifndef DEBUG 810 DEAL_II_ALWAYS_INLINE 811 #endif 812 static void 813 do_backward( 814 const unsigned int n_components, 815 const AlignedVector<Number2> &transformation_matrix, 816 const bool add_into_result, 817 Number * values_in, 818 Number * values_out, 819 const unsigned int basis_size_1_variable = numbers::invalid_unsigned_int, 820 const unsigned int basis_size_2_variable = numbers::invalid_unsigned_int) 821 { 822 Assert( 823 basis_size_1 != 0 || basis_size_1_variable <= basis_size_2_variable, 824 ExcMessage("The second dimension must not be smaller than the first")); 825 Assert(add_into_result == false || values_in != values_out, 826 ExcMessage( 827 "Input and output cannot alias with each other when " 828 "adding the result of the basis change to existing data")); 829 830 Assert(quantity == EvaluatorQuantity::value || 831 quantity == EvaluatorQuantity::hessian, 832 ExcInternalError()); 833 834 constexpr int next_dim = 835 (dim > 2 || 836 ((basis_size_1 == 0 || basis_size_2 > basis_size_1) && dim > 1)) ? 837 dim - 1 : 838 dim; 839 EvaluatorTensorProduct<variant, 840 dim, 841 basis_size_1, 842 (basis_size_1 == 0 ? 0 : basis_size_2), 843 Number, 844 Number2> 845 eval_val(transformation_matrix, 846 transformation_matrix, 847 transformation_matrix, 848 basis_size_1_variable, 849 basis_size_2_variable); 850 const unsigned int np_1 = 851 basis_size_1 > 0 ? basis_size_1 : basis_size_1_variable; 852 const unsigned int np_2 = 853 basis_size_1 > 0 ? basis_size_2 : basis_size_2_variable; 854 Assert(np_1 > 0 && np_1 != numbers::invalid_unsigned_int, 855 ExcMessage("Cannot transform with 0-point basis")); 856 Assert(np_2 > 0 && np_2 != numbers::invalid_unsigned_int, 857 ExcMessage("Cannot transform with 0-point basis")); 858 859 for (unsigned int c = 0; c < n_components; ++c) 860 { 861 if (basis_size_1 > 0 && basis_size_2 == basis_size_1 && dim == 2) 862 { 863 if (quantity == EvaluatorQuantity::value) 864 eval_val.template values<1, false, false>(values_in, values_in); 865 else 866 eval_val.template hessians<1, false, false>(values_in, 867 values_in); 868 869 if (add_into_result) 870 { 871 if (quantity == EvaluatorQuantity::value) 872 eval_val.template values<0, false, true>(values_in, 873 values_out); 874 else 875 eval_val.template hessians<0, false, true>(values_in, 876 values_out); 877 } 878 else 879 { 880 if (quantity == EvaluatorQuantity::value) 881 eval_val.template values<0, false, false>(values_in, 882 values_out); 883 else 884 eval_val.template hessians<0, false, false>(values_in, 885 values_out); 886 } 887 } 888 else 889 { 890 if (dim == 1 && add_into_result) 891 { 892 if (quantity == EvaluatorQuantity::value) 893 eval_val.template values<0, false, true>(values_in, 894 values_out); 895 else 896 eval_val.template hessians<0, false, true>(values_in, 897 values_out); 898 } 899 else if (dim == 1) 900 { 901 if (quantity == EvaluatorQuantity::value) 902 eval_val.template values<0, false, false>(values_in, 903 values_out); 904 else 905 eval_val.template hessians<0, false, false>(values_in, 906 values_out); 907 } 908 else 909 { 910 if (quantity == EvaluatorQuantity::value) 911 eval_val.template values<dim - 1, false, false>(values_in, 912 values_in); 913 else 914 eval_val.template hessians<dim - 1, false, false>( 915 values_in, values_in); 916 } 917 } 918 if (next_dim < dim) 919 for (unsigned int q = 0; q < np_1; ++q) 920 FEEvaluationImplBasisChange<variant, 921 quantity, 922 next_dim, 923 basis_size_1, 924 basis_size_2, 925 Number, 926 Number2>:: 927 do_backward(1, 928 transformation_matrix, 929 add_into_result, 930 values_in + 931 q * Utilities::fixed_power<next_dim>(np_2), 932 values_out + 933 q * Utilities::fixed_power<next_dim>(np_1), 934 basis_size_1_variable, 935 basis_size_2_variable); 936 937 values_in += Utilities::fixed_power<dim>(np_2); 938 values_out += Utilities::fixed_power<dim>(np_1); 939 } 940 } 941 942 /** 943 * This operation applies a mass-matrix-like operation, consisting of a 944 * do_forward() operation, multiplication by the coefficients in the 945 * quadrature points, and the do_backward() operation. 946 * 947 * @param transformation_matrix The coefficient matrix handed in as a 948 * vector, using @p basis_size_1 rows and @p basis_size_2 949 * columns if interpreted as a matrix. 950 * @param coefficients The array of coefficients by which the result is 951 * multiplied. Its length must be either 952 * basis_size_2^dim or n_components*basis_size_2^dim 953 * @param values_in The array of the input of size basis_size_2^dim. It 954 * may alias with values_out 955 * @param scratch_data Array to hold temporary data during the operation. 956 * Must be of length basis_size_2^dim 957 * @param values_out The array of size basis_size_1^dim where the results 958 * of the transformation are stored. It may alias with 959 * the values_in array. 960 */ 961 static void 962 do_mass(const unsigned int n_components, 963 const AlignedVector<Number2> &transformation_matrix, 964 const AlignedVector<Number> & coefficients, 965 const Number * values_in, 966 Number * scratch_data, 967 Number * values_out) 968 { 969 constexpr int next_dim = dim > 1 ? dim - 1 : dim; 970 Number * my_scratch = 971 basis_size_1 != basis_size_2 ? scratch_data : values_out; 972 973 const unsigned int size_per_component = Utilities::pow(basis_size_2, dim); 974 Assert(coefficients.size() == size_per_component || 975 coefficients.size() == n_components * size_per_component, 976 ExcDimensionMismatch(coefficients.size(), size_per_component)); 977 const unsigned int stride = 978 coefficients.size() == size_per_component ? 0 : 1; 979 980 for (unsigned int q = basis_size_1; q != 0; --q) 981 FEEvaluationImplBasisChange< 982 variant, 983 EvaluatorQuantity::value, 984 next_dim, 985 basis_size_1, 986 basis_size_2, 987 Number, 988 Number2>::do_forward(n_components, 989 transformation_matrix, 990 values_in + 991 (q - 1) * 992 Utilities::pow(basis_size_1, dim - 1), 993 my_scratch + 994 (q - 1) * 995 Utilities::pow(basis_size_2, dim - 1)); 996 EvaluatorTensorProduct<variant, 997 dim, 998 basis_size_1, 999 basis_size_2, 1000 Number, 1001 Number2> 1002 eval_val(transformation_matrix); 1003 const unsigned int n_inner_blocks = 1004 (dim > 1 && basis_size_2 < 10) ? basis_size_2 : 1; 1005 const unsigned int n_blocks = Utilities::pow(basis_size_2, dim - 1); 1006 for (unsigned int ii = 0; ii < n_blocks; ii += n_inner_blocks) 1007 for (unsigned int c = 0; c < n_components; ++c) 1008 { 1009 for (unsigned int i = ii; i < ii + n_inner_blocks; ++i) 1010 eval_val.template values_one_line<dim - 1, true, false>( 1011 my_scratch + i, my_scratch + i); 1012 for (unsigned int q = 0; q < basis_size_2; ++q) 1013 for (unsigned int i = ii; i < ii + n_inner_blocks; ++i) 1014 my_scratch[i + q * n_blocks + c * size_per_component] *= 1015 coefficients[i + q * n_blocks + 1016 c * stride * size_per_component]; 1017 for (unsigned int i = ii; i < ii + n_inner_blocks; ++i) 1018 eval_val.template values_one_line<dim - 1, false, false>( 1019 my_scratch + i, my_scratch + i); 1020 } 1021 for (unsigned int q = 0; q < basis_size_1; ++q) 1022 FEEvaluationImplBasisChange< 1023 variant, 1024 EvaluatorQuantity::value, 1025 next_dim, 1026 basis_size_1, 1027 basis_size_2, 1028 Number, 1029 Number2>::do_backward(n_components, 1030 transformation_matrix, 1031 false, 1032 my_scratch + 1033 q * Utilities::pow(basis_size_2, dim - 1), 1034 values_out + 1035 q * Utilities::pow(basis_size_1, dim - 1)); 1036 } 1037 }; 1038 1039 1040 1041 /** 1042 * This struct performs the evaluation of function values, gradients and 1043 * Hessians for tensor-product finite elements. This a specialization for 1044 * elements where the nodal points coincide with the quadrature points like 1045 * FE_Q shape functions on Gauss-Lobatto elements integrated with 1046 * Gauss-Lobatto quadrature. The assumption of this class is that the shape 1047 * 'values' operation is identity, which allows us to write shorter code. 1048 * 1049 * In literature, this form of evaluation is often called spectral 1050 * evaluation, spectral collocation or simply collocation, meaning the same 1051 * location for shape functions and evaluation space (quadrature points). 1052 */ 1053 template <int dim, int fe_degree, typename Number> 1054 struct FEEvaluationImplCollocation 1055 { 1056 static void 1057 evaluate(const unsigned int n_components, 1058 const EvaluationFlags::EvaluationFlags evaluation_flag, 1059 const MatrixFreeFunctions::ShapeInfo<Number> &shape_info, 1060 const Number * values_dofs, 1061 Number * values_quad, 1062 Number * gradients_quad, 1063 Number * hessians_quad, 1064 Number * scratch_data); 1065 1066 static void 1067 integrate(const unsigned int n_components, 1068 const EvaluationFlags::EvaluationFlags integration_flag, 1069 const MatrixFreeFunctions::ShapeInfo<Number> &shape_info, 1070 Number * values_dofs, 1071 Number * values_quad, 1072 Number * gradients_quad, 1073 Number * scratch_data, 1074 const bool add_into_values_array); 1075 }; 1076 1077 1078 1079 template <int dim, int fe_degree, typename Number> 1080 inline void 1081 FEEvaluationImplCollocation<dim, fe_degree, Number>::evaluate( 1082 const unsigned int n_components, 1083 const EvaluationFlags::EvaluationFlags evaluation_flag, 1084 const MatrixFreeFunctions::ShapeInfo<Number> &shape_info, 1085 const Number * values_dofs, 1086 Number * values_quad, 1087 Number * gradients_quad, 1088 Number * hessians_quad, 1089 Number *) 1090 { 1091 AssertDimension( 1092 shape_info.data.front().shape_gradients_collocation_eo.size(), 1093 (fe_degree + 2) / 2 * (fe_degree + 1)); 1094 1095 EvaluatorTensorProduct<evaluate_evenodd, 1096 dim, 1097 fe_degree + 1, 1098 fe_degree + 1, 1099 Number> 1100 eval(AlignedVector<Number>(), 1101 shape_info.data.front().shape_gradients_collocation_eo, 1102 shape_info.data.front().shape_hessians_collocation_eo); 1103 constexpr unsigned int n_q_points = Utilities::pow(fe_degree + 1, dim); 1104 1105 for (unsigned int c = 0; c < n_components; c++) 1106 { 1107 if (evaluation_flag & EvaluationFlags::values) 1108 for (unsigned int i = 0; i < n_q_points; ++i) 1109 values_quad[i] = values_dofs[i]; 1110 if (evaluation_flag & 1111 (EvaluationFlags::gradients | EvaluationFlags::hessians)) 1112 { 1113 eval.template gradients<0, true, false>(values_dofs, 1114 gradients_quad); 1115 if (dim > 1) 1116 eval.template gradients<1, true, false>(values_dofs, 1117 gradients_quad + 1118 n_q_points); 1119 if (dim > 2) 1120 eval.template gradients<2, true, false>(values_dofs, 1121 gradients_quad + 1122 2 * n_q_points); 1123 } 1124 if (evaluation_flag & EvaluationFlags::hessians) 1125 { 1126 eval.template hessians<0, true, false>(values_dofs, hessians_quad); 1127 if (dim > 1) 1128 { 1129 eval.template gradients<1, true, false>(gradients_quad, 1130 hessians_quad + 1131 dim * n_q_points); 1132 eval.template hessians<1, true, false>(values_dofs, 1133 hessians_quad + 1134 n_q_points); 1135 } 1136 if (dim > 2) 1137 { 1138 eval.template gradients<2, true, false>(gradients_quad, 1139 hessians_quad + 1140 4 * n_q_points); 1141 eval.template gradients<2, true, false>( 1142 gradients_quad + n_q_points, hessians_quad + 5 * n_q_points); 1143 eval.template hessians<2, true, false>(values_dofs, 1144 hessians_quad + 1145 2 * n_q_points); 1146 } 1147 hessians_quad += (dim * (dim + 1)) / 2 * n_q_points; 1148 } 1149 gradients_quad += dim * n_q_points; 1150 values_quad += n_q_points; 1151 values_dofs += n_q_points; 1152 } 1153 } 1154 1155 1156 1157 template <int dim, int fe_degree, typename Number> 1158 inline void 1159 FEEvaluationImplCollocation<dim, fe_degree, Number>::integrate( 1160 const unsigned int n_components, 1161 const EvaluationFlags::EvaluationFlags integration_flag, 1162 const MatrixFreeFunctions::ShapeInfo<Number> &shape_info, 1163 Number * values_dofs, 1164 Number * values_quad, 1165 Number * gradients_quad, 1166 Number *, 1167 const bool add_into_values_array) 1168 { 1169 AssertDimension( 1170 shape_info.data.front().shape_gradients_collocation_eo.size(), 1171 (fe_degree + 2) / 2 * (fe_degree + 1)); 1172 1173 EvaluatorTensorProduct<evaluate_evenodd, 1174 dim, 1175 fe_degree + 1, 1176 fe_degree + 1, 1177 Number> 1178 eval(AlignedVector<Number>(), 1179 shape_info.data.front().shape_gradients_collocation_eo, 1180 shape_info.data.front().shape_hessians_collocation_eo); 1181 constexpr unsigned int n_q_points = Utilities::pow(fe_degree + 1, dim); 1182 1183 for (unsigned int c = 0; c < n_components; c++) 1184 { 1185 if (integration_flag & EvaluationFlags::values) 1186 { 1187 if (add_into_values_array == false) 1188 for (unsigned int i = 0; i < n_q_points; ++i) 1189 values_dofs[i] = values_quad[i]; 1190 else 1191 for (unsigned int i = 0; i < n_q_points; ++i) 1192 values_dofs[i] += values_quad[i]; 1193 } 1194 if (integration_flag & EvaluationFlags::gradients) 1195 { 1196 if (integration_flag & EvaluationFlags::values || 1197 add_into_values_array == true) 1198 eval.template gradients<0, false, true>(gradients_quad, 1199 values_dofs); 1200 else 1201 eval.template gradients<0, false, false>(gradients_quad, 1202 values_dofs); 1203 if (dim > 1) 1204 eval.template gradients<1, false, true>(gradients_quad + 1205 n_q_points, 1206 values_dofs); 1207 if (dim > 2) 1208 eval.template gradients<2, false, true>(gradients_quad + 1209 2 * n_q_points, 1210 values_dofs); 1211 } 1212 gradients_quad += dim * n_q_points; 1213 values_quad += n_q_points; 1214 values_dofs += n_q_points; 1215 } 1216 } 1217 1218 1219 1220 /** 1221 * This struct performs the evaluation of function values, gradients and 1222 * Hessians for tensor-product finite elements. This a specialization for 1223 * symmetric basis functions about the mid point 0.5 of the unit interval 1224 * with the same number of quadrature points as degrees of freedom. In that 1225 * case, we can first transform the basis to one that has the nodal points 1226 * in the quadrature points (i.e., the collocation space) and then perform 1227 * the evaluation of the first and second derivatives in this transformed 1228 * space, using the identity operation for the shape values. 1229 */ 1230 template <int dim, int fe_degree, int n_q_points_1d, typename Number> 1231 struct FEEvaluationImplTransformToCollocation 1232 { 1233 static void 1234 evaluate(const unsigned int n_components, 1235 const EvaluationFlags::EvaluationFlags evaluation_flag, 1236 const MatrixFreeFunctions::ShapeInfo<Number> &shape_info, 1237 const Number * values_dofs, 1238 Number * values_quad, 1239 Number * gradients_quad, 1240 Number * hessians_quad, 1241 Number * scratch_data); 1242 1243 static void 1244 integrate(const unsigned int n_components, 1245 const EvaluationFlags::EvaluationFlags evaluation_flag, 1246 const MatrixFreeFunctions::ShapeInfo<Number> &shape_info, 1247 Number * values_dofs, 1248 Number * values_quad, 1249 Number * gradients_quad, 1250 Number * scratch_data, 1251 const bool add_into_values_array); 1252 }; 1253 1254 1255 1256 template <int dim, int fe_degree, int n_q_points_1d, typename Number> 1257 inline void 1258 FEEvaluationImplTransformToCollocation< 1259 dim, 1260 fe_degree, 1261 n_q_points_1d, 1262 Number>::evaluate(const unsigned int n_components, 1263 const EvaluationFlags::EvaluationFlags evaluation_flag, 1264 const MatrixFreeFunctions::ShapeInfo<Number> &shape_info, 1265 const Number * values_dofs, 1266 Number * values_quad, 1267 Number *gradients_quad, 1268 Number *hessians_quad, 1269 Number *) 1270 { 1271 Assert(n_q_points_1d > fe_degree, 1272 ExcMessage("You lose information when going to a collocation space " 1273 "of lower degree, so the evaluation results would be " 1274 "wrong. Thus, this class does not permit the desired " 1275 "operation.")); 1276 constexpr unsigned int n_q_points = Utilities::pow(n_q_points_1d, dim); 1277 1278 for (unsigned int c = 0; c < n_components; c++) 1279 { 1280 FEEvaluationImplBasisChange< 1281 evaluate_evenodd, 1282 EvaluatorQuantity::value, 1283 dim, 1284 (fe_degree >= n_q_points_1d ? n_q_points_1d : fe_degree + 1), 1285 n_q_points_1d, 1286 Number, 1287 Number>::do_forward(1, 1288 shape_info.data.front().shape_values_eo, 1289 values_dofs, 1290 values_quad); 1291 1292 // apply derivatives in the collocation space 1293 if (evaluation_flag & 1294 (EvaluationFlags::gradients | EvaluationFlags::hessians)) 1295 FEEvaluationImplCollocation<dim, n_q_points_1d - 1, Number>::evaluate( 1296 1, 1297 evaluation_flag & 1298 (EvaluationFlags::gradients | EvaluationFlags::hessians), 1299 shape_info, 1300 values_quad, 1301 nullptr, 1302 gradients_quad, 1303 hessians_quad, 1304 nullptr); 1305 1306 values_dofs += shape_info.dofs_per_component_on_cell; 1307 values_quad += n_q_points; 1308 gradients_quad += dim * n_q_points; 1309 hessians_quad += (dim * (dim + 1)) / 2 * n_q_points; 1310 } 1311 } 1312 1313 1314 1315 template <int dim, int fe_degree, int n_q_points_1d, typename Number> 1316 inline void 1317 FEEvaluationImplTransformToCollocation< 1318 dim, 1319 fe_degree, 1320 n_q_points_1d, 1321 Number>::integrate(const unsigned int n_components, 1322 const EvaluationFlags::EvaluationFlags integration_flag, 1323 const MatrixFreeFunctions::ShapeInfo<Number> &shape_info, 1324 Number *values_dofs, 1325 Number *values_quad, 1326 Number *gradients_quad, 1327 Number *, 1328 const bool add_into_values_array) 1329 { 1330 Assert(n_q_points_1d > fe_degree, 1331 ExcMessage("You lose information when going to a collocation space " 1332 "of lower degree, so the evaluation results would be " 1333 "wrong. Thus, this class does not permit the desired " 1334 "operation.")); 1335 AssertDimension( 1336 shape_info.data.front().shape_gradients_collocation_eo.size(), 1337 (n_q_points_1d + 1) / 2 * n_q_points_1d); 1338 constexpr unsigned int n_q_points = Utilities::pow(n_q_points_1d, dim); 1339 1340 for (unsigned int c = 0; c < n_components; c++) 1341 { 1342 // apply derivatives in collocation space 1343 if (integration_flag & EvaluationFlags::gradients) 1344 FEEvaluationImplCollocation<dim, n_q_points_1d - 1, Number>:: 1345 integrate(1, 1346 integration_flag & EvaluationFlags::gradients, 1347 shape_info, 1348 values_quad, 1349 nullptr, 1350 gradients_quad, 1351 nullptr, 1352 /*add_into_values_array=*/integration_flag & 1353 EvaluationFlags::values); 1354 1355 // transform back to the original space 1356 FEEvaluationImplBasisChange< 1357 evaluate_evenodd, 1358 EvaluatorQuantity::value, 1359 dim, 1360 (fe_degree >= n_q_points_1d ? n_q_points_1d : fe_degree + 1), 1361 n_q_points_1d, 1362 Number, 1363 Number>::do_backward(1, 1364 shape_info.data.front().shape_values_eo, 1365 add_into_values_array, 1366 values_quad, 1367 values_dofs); 1368 gradients_quad += dim * n_q_points; 1369 values_quad += n_q_points; 1370 values_dofs += shape_info.dofs_per_component_on_cell; 1371 } 1372 } 1373 1374 1375 1376 /** 1377 * This class chooses an appropriate evaluation strategy based on the 1378 * template parameters and the shape_info variable which contains runtime 1379 * parameters for the strategy underlying FEEvaluation::evaluate(), i.e. 1380 * this calls internal::FEEvaluationImpl::evaluate(), 1381 * internal::FEEvaluationImplCollocation::evaluate() or 1382 * internal::FEEvaluationImplTransformToCollocation::evaluate() with 1383 * appropriate template parameters. In case the template parameters 1384 * fe_degree and n_q_points_1d contain valid information (i.e. fe_degree>-1 1385 * and n_q_points_1d>0), we simply pass these values to the respective 1386 * template specializations. Otherwise, we perform a runtime matching of 1387 * the runtime parameters to find the correct specialization. This matching 1388 * currently supports $0\leq fe\_degree \leq 9$ and $degree+1\leq 1389 * n\_q\_points\_1d\leq fe\_degree+2$. 1390 */ 1391 template <int dim, typename Number> 1392 struct FEEvaluationImplEvaluateSelector 1393 { 1394 template <int fe_degree, int n_q_points_1d> 1395 static bool 1396 run(const unsigned int n_components, 1397 const EvaluationFlags::EvaluationFlags evaluation_flag, 1398 const internal::MatrixFreeFunctions::ShapeInfo<Number> &shape_info, 1399 Number *values_dofs_actual, 1400 Number *values_quad, 1401 Number *gradients_quad, 1402 Number *hessians_quad, 1403 Number *scratch_data) 1404 { 1405 // We enable a transformation to collocation for derivatives if it gives 1406 // correct results (first condition), if it is the most efficient choice 1407 // in terms of operation counts (second condition) and if we were able to 1408 // initialize the fields in shape_info.templates.h from the polynomials 1409 // (third condition). 1410 static constexpr bool use_collocation = 1411 n_q_points_1d > fe_degree && n_q_points_1d <= 3 * fe_degree / 2 + 1 && 1412 n_q_points_1d < 200; 1413 1414 if (fe_degree >= 0 && fe_degree + 1 == n_q_points_1d && 1415 shape_info.element_type == 1416 internal::MatrixFreeFunctions::tensor_symmetric_collocation) 1417 { 1418 internal::FEEvaluationImplCollocation<dim, fe_degree, Number>:: 1419 evaluate(n_components, 1420 evaluation_flag, 1421 shape_info, 1422 values_dofs_actual, 1423 values_quad, 1424 gradients_quad, 1425 hessians_quad, 1426 scratch_data); 1427 } 1428 // '<=' on type means tensor_symmetric or tensor_symmetric_hermite, see 1429 // shape_info.h for more details 1430 else if (fe_degree >= 0 && use_collocation && 1431 shape_info.element_type <= 1432 internal::MatrixFreeFunctions::tensor_symmetric) 1433 { 1434 internal::FEEvaluationImplTransformToCollocation< 1435 dim, 1436 fe_degree, 1437 n_q_points_1d, 1438 Number>::evaluate(n_components, 1439 evaluation_flag, 1440 shape_info, 1441 values_dofs_actual, 1442 values_quad, 1443 gradients_quad, 1444 hessians_quad, 1445 scratch_data); 1446 } 1447 else if (fe_degree >= 0 && 1448 shape_info.element_type <= 1449 internal::MatrixFreeFunctions::tensor_symmetric) 1450 { 1451 internal::FEEvaluationImpl< 1452 internal::MatrixFreeFunctions::tensor_symmetric, 1453 dim, 1454 fe_degree, 1455 n_q_points_1d, 1456 Number>::evaluate(n_components, 1457 evaluation_flag, 1458 shape_info, 1459 values_dofs_actual, 1460 values_quad, 1461 gradients_quad, 1462 hessians_quad, 1463 scratch_data); 1464 } 1465 else if (shape_info.element_type == 1466 internal::MatrixFreeFunctions::tensor_symmetric_plus_dg0) 1467 { 1468 internal::FEEvaluationImpl< 1469 internal::MatrixFreeFunctions::tensor_symmetric_plus_dg0, 1470 dim, 1471 fe_degree, 1472 n_q_points_1d, 1473 Number>::evaluate(n_components, 1474 evaluation_flag, 1475 shape_info, 1476 values_dofs_actual, 1477 values_quad, 1478 gradients_quad, 1479 hessians_quad, 1480 scratch_data); 1481 } 1482 else if (shape_info.element_type == 1483 internal::MatrixFreeFunctions::truncated_tensor) 1484 { 1485 internal::FEEvaluationImpl< 1486 internal::MatrixFreeFunctions::truncated_tensor, 1487 dim, 1488 fe_degree, 1489 n_q_points_1d, 1490 Number>::evaluate(n_components, 1491 evaluation_flag, 1492 shape_info, 1493 values_dofs_actual, 1494 values_quad, 1495 gradients_quad, 1496 hessians_quad, 1497 scratch_data); 1498 } 1499 else 1500 { 1501 internal::FEEvaluationImpl< 1502 internal::MatrixFreeFunctions::tensor_general, 1503 dim, 1504 fe_degree, 1505 n_q_points_1d, 1506 Number>::evaluate(n_components, 1507 evaluation_flag, 1508 shape_info, 1509 values_dofs_actual, 1510 values_quad, 1511 gradients_quad, 1512 hessians_quad, 1513 scratch_data); 1514 } 1515 1516 return false; 1517 } 1518 }; 1519 1520 1521 1522 /** 1523 * This class chooses an appropriate evaluation strategy based on the 1524 * template parameters and the shape_info variable which contains runtime 1525 * parameters for the strategy underlying FEEvaluation::integrate(), i.e. 1526 * this calls internal::FEEvaluationImpl::integrate(), 1527 * internal::FEEvaluationImplCollocation::integrate() or 1528 * internal::FEEvaluationImplTransformToCollocation::integrate() with 1529 * appropriate template parameters. In case the template parameters 1530 * fe_degree and n_q_points_1d contain valid information (i.e. fe_degree>-1 1531 * and n_q_points_1d>0), we simply pass these values to the respective 1532 * template specializations. Otherwise, we perform a runtime matching of 1533 * the runtime parameters to find the correct specialization. This matching 1534 * currently supports $0\leq fe\_degree \leq 9$ and $degree+1\leq 1535 * n\_q\_points\_1d\leq fe\_degree+2$. 1536 */ 1537 template <int dim, typename Number> 1538 struct FEEvaluationImplIntegrateSelector 1539 { 1540 template <int fe_degree, int n_q_points_1d> 1541 static bool 1542 run(const unsigned int n_components, 1543 const EvaluationFlags::EvaluationFlags integration_flag, 1544 const internal::MatrixFreeFunctions::ShapeInfo<Number> &shape_info, 1545 Number * values_dofs_actual, 1546 Number * values_quad, 1547 Number * gradients_quad, 1548 Number * scratch_data, 1549 const bool sum_into_values_array) 1550 { 1551 // We enable a transformation to collocation for derivatives if it gives 1552 // correct results (first condition), if it is the most efficient choice 1553 // in terms of operation counts (second condition) and if we were able to 1554 // initialize the fields in shape_info.templates.h from the polynomials 1555 // (third condition). 1556 constexpr bool use_collocation = n_q_points_1d > fe_degree && 1557 n_q_points_1d <= 3 * fe_degree / 2 + 1 && 1558 n_q_points_1d < 200; 1559 1560 if (fe_degree >= 0 && fe_degree + 1 == n_q_points_1d && 1561 shape_info.element_type == 1562 internal::MatrixFreeFunctions::tensor_symmetric_collocation) 1563 { 1564 internal::FEEvaluationImplCollocation<dim, fe_degree, Number>:: 1565 integrate(n_components, 1566 integration_flag, 1567 shape_info, 1568 values_dofs_actual, 1569 values_quad, 1570 gradients_quad, 1571 scratch_data, 1572 sum_into_values_array); 1573 } 1574 // '<=' on type means tensor_symmetric or tensor_symmetric_hermite, see 1575 // shape_info.h for more details 1576 else if (fe_degree >= 0 && use_collocation && 1577 shape_info.element_type <= 1578 internal::MatrixFreeFunctions::tensor_symmetric) 1579 { 1580 internal::FEEvaluationImplTransformToCollocation< 1581 dim, 1582 fe_degree, 1583 n_q_points_1d, 1584 Number>::integrate(n_components, 1585 integration_flag, 1586 shape_info, 1587 values_dofs_actual, 1588 values_quad, 1589 gradients_quad, 1590 scratch_data, 1591 sum_into_values_array); 1592 } 1593 else if (fe_degree >= 0 && 1594 shape_info.element_type <= 1595 internal::MatrixFreeFunctions::tensor_symmetric) 1596 { 1597 internal::FEEvaluationImpl< 1598 internal::MatrixFreeFunctions::tensor_symmetric, 1599 dim, 1600 fe_degree, 1601 n_q_points_1d, 1602 Number>::integrate(n_components, 1603 integration_flag, 1604 shape_info, 1605 values_dofs_actual, 1606 values_quad, 1607 gradients_quad, 1608 scratch_data, 1609 sum_into_values_array); 1610 } 1611 else if (shape_info.element_type == 1612 internal::MatrixFreeFunctions::tensor_symmetric_plus_dg0) 1613 { 1614 internal::FEEvaluationImpl< 1615 internal::MatrixFreeFunctions::tensor_symmetric_plus_dg0, 1616 dim, 1617 fe_degree, 1618 n_q_points_1d, 1619 Number>::integrate(n_components, 1620 integration_flag, 1621 shape_info, 1622 values_dofs_actual, 1623 values_quad, 1624 gradients_quad, 1625 scratch_data, 1626 sum_into_values_array); 1627 } 1628 else if (shape_info.element_type == 1629 internal::MatrixFreeFunctions::truncated_tensor) 1630 { 1631 internal::FEEvaluationImpl< 1632 internal::MatrixFreeFunctions::truncated_tensor, 1633 dim, 1634 fe_degree, 1635 n_q_points_1d, 1636 Number>::integrate(n_components, 1637 integration_flag, 1638 shape_info, 1639 values_dofs_actual, 1640 values_quad, 1641 gradients_quad, 1642 scratch_data, 1643 sum_into_values_array); 1644 } 1645 else 1646 { 1647 internal::FEEvaluationImpl< 1648 internal::MatrixFreeFunctions::tensor_general, 1649 dim, 1650 fe_degree, 1651 n_q_points_1d, 1652 Number>::integrate(n_components, 1653 integration_flag, 1654 shape_info, 1655 values_dofs_actual, 1656 values_quad, 1657 gradients_quad, 1658 scratch_data, 1659 sum_into_values_array); 1660 } 1661 1662 return false; 1663 } 1664 }; 1665 1666 1667 1668 template <bool symmetric_evaluate, 1669 int dim, 1670 int fe_degree, 1671 int n_q_points_1d, 1672 typename Number> 1673 struct FEFaceEvaluationImpl 1674 { 1675 // We enable a transformation to collocation for derivatives if it gives 1676 // correct results (first two conditions), if it is the most efficient 1677 // choice in terms of operation counts (third condition) and if we were 1678 // able to initialize the fields in shape_info.templates.h from the 1679 // polynomials (fourth condition). 1680 static constexpr bool use_collocation = 1681 symmetric_evaluate && 1682 n_q_points_1d > fe_degree &&n_q_points_1d <= 3 * fe_degree / 2 + 1 && 1683 n_q_points_1d < 200; 1684 1685 static void 1686 evaluate_in_face(const unsigned int n_components, 1687 const MatrixFreeFunctions::ShapeInfo<Number> &data, 1688 Number * values_dofs, 1689 Number * values_quad, 1690 Number * gradients_quad, 1691 Number * scratch_data, 1692 const bool evaluate_val, 1693 const bool evaluate_grad, 1694 const unsigned int subface_index) 1695 { 1696 const AlignedVector<Number> &val1 = 1697 symmetric_evaluate ? 1698 data.data.front().shape_values_eo : 1699 (subface_index >= GeometryInfo<dim>::max_children_per_cell ? 1700 data.data.front().shape_values : 1701 data.data.front().values_within_subface[subface_index % 2]); 1702 const AlignedVector<Number> &val2 = 1703 symmetric_evaluate ? 1704 data.data.front().shape_values_eo : 1705 (subface_index >= GeometryInfo<dim>::max_children_per_cell ? 1706 data.data.front().shape_values : 1707 data.data.front().values_within_subface[subface_index / 2]); 1708 1709 const AlignedVector<Number> &grad1 = 1710 symmetric_evaluate ? 1711 data.data.front().shape_gradients_eo : 1712 (subface_index >= GeometryInfo<dim>::max_children_per_cell ? 1713 data.data.front().shape_gradients : 1714 data.data.front().gradients_within_subface[subface_index % 2]); 1715 const AlignedVector<Number> &grad2 = 1716 symmetric_evaluate ? 1717 data.data.front().shape_gradients_eo : 1718 (subface_index >= GeometryInfo<dim>::max_children_per_cell ? 1719 data.data.front().shape_gradients : 1720 data.data.front().gradients_within_subface[subface_index / 2]); 1721 1722 using Eval = 1723 internal::EvaluatorTensorProduct<symmetric_evaluate ? 1724 internal::evaluate_evenodd : 1725 internal::evaluate_general, 1726 dim - 1, 1727 fe_degree + 1, 1728 n_q_points_1d, 1729 Number>; 1730 Eval eval1(val1, 1731 grad1, 1732 AlignedVector<Number>(), 1733 data.data.front().fe_degree + 1, 1734 data.data.front().n_q_points_1d); 1735 Eval eval2(val2, 1736 grad2, 1737 AlignedVector<Number>(), 1738 data.data.front().fe_degree + 1, 1739 data.data.front().n_q_points_1d); 1740 1741 const unsigned int size_deg = 1742 fe_degree > -1 ? 1743 Utilities::pow(fe_degree + 1, dim - 1) : 1744 (dim > 1 ? 1745 Utilities::fixed_power<dim - 1>(data.data.front().fe_degree + 1) : 1746 1); 1747 1748 const unsigned int n_q_points = fe_degree > -1 ? 1749 Utilities::pow(n_q_points_1d, dim - 1) : 1750 data.n_q_points_face; 1751 1752 if (evaluate_grad == false) 1753 for (unsigned int c = 0; c < n_components; ++c) 1754 { 1755 switch (dim) 1756 { 1757 case 3: 1758 eval1.template values<0, true, false>(values_dofs, 1759 values_quad); 1760 eval2.template values<1, true, false>(values_quad, 1761 values_quad); 1762 break; 1763 case 2: 1764 eval1.template values<0, true, false>(values_dofs, 1765 values_quad); 1766 break; 1767 case 1: 1768 values_quad[0] = values_dofs[0]; 1769 break; 1770 default: 1771 Assert(false, ExcNotImplemented()); 1772 } 1773 values_dofs += 2 * size_deg; 1774 values_quad += n_q_points; 1775 } 1776 else 1777 for (unsigned int c = 0; c < n_components; ++c) 1778 { 1779 switch (dim) 1780 { 1781 case 3: 1782 if (use_collocation) 1783 { 1784 eval1.template values<0, true, false>(values_dofs, 1785 values_quad); 1786 eval1.template values<1, true, false>(values_quad, 1787 values_quad); 1788 internal::EvaluatorTensorProduct< 1789 internal::evaluate_evenodd, 1790 dim - 1, 1791 n_q_points_1d, 1792 n_q_points_1d, 1793 Number> 1794 eval_grad( 1795 AlignedVector<Number>(), 1796 data.data.front().shape_gradients_collocation_eo, 1797 AlignedVector<Number>()); 1798 eval_grad.template gradients<0, true, false>( 1799 values_quad, gradients_quad); 1800 eval_grad.template gradients<1, true, false>( 1801 values_quad, gradients_quad + n_q_points); 1802 } 1803 else 1804 { 1805 eval1.template gradients<0, true, false>(values_dofs, 1806 scratch_data); 1807 eval2.template values<1, true, false>(scratch_data, 1808 gradients_quad); 1809 1810 eval1.template values<0, true, false>(values_dofs, 1811 scratch_data); 1812 eval2.template gradients<1, true, false>(scratch_data, 1813 gradients_quad + 1814 n_q_points); 1815 1816 if (evaluate_val == true) 1817 eval2.template values<1, true, false>(scratch_data, 1818 values_quad); 1819 } 1820 eval1.template values<0, true, false>(values_dofs + size_deg, 1821 scratch_data); 1822 eval2.template values<1, true, false>( 1823 scratch_data, gradients_quad + (dim - 1) * n_q_points); 1824 1825 break; 1826 case 2: 1827 eval1.template values<0, true, false>(values_dofs + size_deg, 1828 gradients_quad + 1829 (dim - 1) * 1830 n_q_points); 1831 eval1.template gradients<0, true, false>(values_dofs, 1832 gradients_quad); 1833 if (evaluate_val == true) 1834 eval1.template values<0, true, false>(values_dofs, 1835 values_quad); 1836 break; 1837 case 1: 1838 values_quad[0] = values_dofs[0]; 1839 gradients_quad[0] = values_dofs[1]; 1840 break; 1841 default: 1842 AssertThrow(false, ExcNotImplemented()); 1843 } 1844 values_dofs += 2 * size_deg; 1845 values_quad += n_q_points; 1846 gradients_quad += dim * n_q_points; 1847 } 1848 } 1849 1850 static void 1851 integrate_in_face(const unsigned int n_components, 1852 const MatrixFreeFunctions::ShapeInfo<Number> &data, 1853 Number * values_dofs, 1854 Number * values_quad, 1855 Number * gradients_quad, 1856 Number * scratch_data, 1857 const bool integrate_val, 1858 const bool integrate_grad, 1859 const unsigned int subface_index) 1860 { 1861 const AlignedVector<Number> &val1 = 1862 symmetric_evaluate ? 1863 data.data.front().shape_values_eo : 1864 (subface_index >= GeometryInfo<dim>::max_children_per_cell ? 1865 data.data.front().shape_values : 1866 data.data.front().values_within_subface[subface_index % 2]); 1867 const AlignedVector<Number> &val2 = 1868 symmetric_evaluate ? 1869 data.data.front().shape_values_eo : 1870 (subface_index >= GeometryInfo<dim>::max_children_per_cell ? 1871 data.data.front().shape_values : 1872 data.data.front().values_within_subface[subface_index / 2]); 1873 1874 const AlignedVector<Number> &grad1 = 1875 symmetric_evaluate ? 1876 data.data.front().shape_gradients_eo : 1877 (subface_index >= GeometryInfo<dim>::max_children_per_cell ? 1878 data.data.front().shape_gradients : 1879 data.data.front().gradients_within_subface[subface_index % 2]); 1880 const AlignedVector<Number> &grad2 = 1881 symmetric_evaluate ? 1882 data.data.front().shape_gradients_eo : 1883 (subface_index >= GeometryInfo<dim>::max_children_per_cell ? 1884 data.data.front().shape_gradients : 1885 data.data.front().gradients_within_subface[subface_index / 2]); 1886 1887 using Eval = 1888 internal::EvaluatorTensorProduct<symmetric_evaluate ? 1889 internal::evaluate_evenodd : 1890 internal::evaluate_general, 1891 dim - 1, 1892 fe_degree + 1, 1893 n_q_points_1d, 1894 Number>; 1895 Eval eval1(val1, 1896 grad1, 1897 val1, 1898 data.data.front().fe_degree + 1, 1899 data.data.front().n_q_points_1d); 1900 Eval eval2(val2, 1901 grad2, 1902 val1, 1903 data.data.front().fe_degree + 1, 1904 data.data.front().n_q_points_1d); 1905 1906 const unsigned int size_deg = 1907 fe_degree > -1 ? 1908 Utilities::pow(fe_degree + 1, dim - 1) : 1909 (dim > 1 ? 1910 Utilities::fixed_power<dim - 1>(data.data.front().fe_degree + 1) : 1911 1); 1912 1913 const unsigned int n_q_points = fe_degree > -1 ? 1914 Utilities::pow(n_q_points_1d, dim - 1) : 1915 data.n_q_points_face; 1916 1917 if (integrate_grad == false) 1918 for (unsigned int c = 0; c < n_components; ++c) 1919 { 1920 switch (dim) 1921 { 1922 case 3: 1923 eval2.template values<1, false, false>(values_quad, 1924 values_quad); 1925 eval1.template values<0, false, false>(values_quad, 1926 values_dofs); 1927 break; 1928 case 2: 1929 eval1.template values<0, false, false>(values_quad, 1930 values_dofs); 1931 break; 1932 case 1: 1933 values_dofs[0] = values_quad[0]; 1934 break; 1935 default: 1936 Assert(false, ExcNotImplemented()); 1937 } 1938 values_dofs += 2 * size_deg; 1939 values_quad += n_q_points; 1940 } 1941 else 1942 for (unsigned int c = 0; c < n_components; ++c) 1943 { 1944 switch (dim) 1945 { 1946 case 3: 1947 eval2.template values<1, false, false>(gradients_quad + 1948 2 * n_q_points, 1949 gradients_quad + 1950 2 * n_q_points); 1951 eval1.template values<0, false, false>( 1952 gradients_quad + 2 * n_q_points, values_dofs + size_deg); 1953 if (use_collocation) 1954 { 1955 internal::EvaluatorTensorProduct< 1956 internal::evaluate_evenodd, 1957 dim - 1, 1958 n_q_points_1d, 1959 n_q_points_1d, 1960 Number> 1961 eval_grad( 1962 AlignedVector<Number>(), 1963 data.data.front().shape_gradients_collocation_eo, 1964 AlignedVector<Number>()); 1965 if (integrate_val) 1966 eval_grad.template gradients<1, false, true>( 1967 gradients_quad + n_q_points, values_quad); 1968 else 1969 eval_grad.template gradients<1, false, false>( 1970 gradients_quad + n_q_points, values_quad); 1971 eval_grad.template gradients<0, false, true>( 1972 gradients_quad, values_quad); 1973 eval1.template values<1, false, false>(values_quad, 1974 values_quad); 1975 eval1.template values<0, false, false>(values_quad, 1976 values_dofs); 1977 } 1978 else 1979 { 1980 if (integrate_val) 1981 { 1982 eval2.template values<1, false, false>(values_quad, 1983 scratch_data); 1984 eval2.template gradients<1, false, true>( 1985 gradients_quad + n_q_points, scratch_data); 1986 } 1987 else 1988 eval2.template gradients<1, false, false>( 1989 gradients_quad + n_q_points, scratch_data); 1990 1991 eval1.template values<0, false, false>(scratch_data, 1992 values_dofs); 1993 eval2.template values<1, false, false>(gradients_quad, 1994 scratch_data); 1995 eval1.template gradients<0, false, true>(scratch_data, 1996 values_dofs); 1997 } 1998 break; 1999 case 2: 2000 eval1.template values<0, false, false>( 2001 gradients_quad + n_q_points, values_dofs + size_deg); 2002 eval1.template gradients<0, false, false>(gradients_quad, 2003 values_dofs); 2004 if (integrate_val == true) 2005 eval1.template values<0, false, true>(values_quad, 2006 values_dofs); 2007 break; 2008 case 1: 2009 values_dofs[0] = values_quad[0]; 2010 values_dofs[1] = gradients_quad[0]; 2011 break; 2012 default: 2013 AssertThrow(false, ExcNotImplemented()); 2014 } 2015 values_dofs += 2 * size_deg; 2016 values_quad += n_q_points; 2017 gradients_quad += dim * n_q_points; 2018 } 2019 } 2020 }; 2021 2022 2023 2024 template <int dim, int fe_degree, typename Number, bool lex_faces = false> 2025 struct FEFaceNormalEvaluationImpl 2026 { 2027 template <bool do_evaluate, bool add_into_output> 2028 static void 2029 interpolate(const unsigned int n_components, 2030 const MatrixFreeFunctions::ShapeInfo<Number> &data, 2031 const Number * input, 2032 Number * output, 2033 const bool do_gradients, 2034 const unsigned int face_no) 2035 { 2036 Assert(static_cast<unsigned int>(fe_degree) == 2037 data.data.front().fe_degree || 2038 fe_degree == -1, 2039 ExcInternalError()); 2040 2041 interpolate_generic<do_evaluate, add_into_output>( 2042 n_components, 2043 input, 2044 output, 2045 do_gradients, 2046 face_no, 2047 data.data.front().fe_degree + 1, 2048 data.data.front().shape_data_on_face, 2049 data.dofs_per_component_on_cell, 2050 2 * data.dofs_per_component_on_face); 2051 } 2052 2053 /** 2054 * Interpolate the values on the cell quadrature points onto a face. 2055 */ 2056 template <bool do_evaluate, bool add_into_output> 2057 static void 2058 interpolate_quadrature(const unsigned int n_components, 2059 const MatrixFreeFunctions::ShapeInfo<Number> &data, 2060 const Number * input, 2061 Number * output, 2062 const bool do_gradients, 2063 const unsigned int face_no) 2064 { 2065 Assert(static_cast<unsigned int>(fe_degree + 1) == 2066 data.data.front().quadrature.size() || 2067 fe_degree == -1, 2068 ExcInternalError()); 2069 2070 interpolate_generic<do_evaluate, add_into_output>( 2071 n_components, 2072 input, 2073 output, 2074 do_gradients, 2075 face_no, 2076 data.data.front().quadrature.size(), 2077 data.data.front().quadrature_data_on_face, 2078 data.n_q_points, 2079 data.n_q_points_face); 2080 } 2081 2082 private: 2083 template <bool do_evaluate, bool add_into_output, int face_direction = 0> 2084 static void 2085 interpolate_generic(const unsigned int n_components, 2086 const Number * input, 2087 Number * output, 2088 const bool do_gradients, 2089 const unsigned int face_no, 2090 const unsigned int n_points_1d, 2091 const std::array<AlignedVector<Number>, 2> &shape_data, 2092 const unsigned int dofs_per_component_on_cell, 2093 const unsigned int dofs_per_component_on_face) 2094 { 2095 if (face_direction == face_no / 2) 2096 { 2097 internal::EvaluatorTensorProduct<internal::evaluate_general, 2098 dim, 2099 fe_degree + 1, 2100 0, 2101 Number> 2102 evalf(shape_data[face_no % 2], 2103 AlignedVector<Number>(), 2104 AlignedVector<Number>(), 2105 n_points_1d, 2106 0); 2107 2108 const unsigned int in_stride = do_evaluate ? 2109 dofs_per_component_on_cell : 2110 dofs_per_component_on_face; 2111 const unsigned int out_stride = do_evaluate ? 2112 dofs_per_component_on_face : 2113 dofs_per_component_on_cell; 2114 2115 for (unsigned int c = 0; c < n_components; c++) 2116 { 2117 if (do_gradients) 2118 evalf.template apply_face<face_direction, 2119 do_evaluate, 2120 add_into_output, 2121 1, 2122 lex_faces>(input, output); 2123 else 2124 evalf.template apply_face<face_direction, 2125 do_evaluate, 2126 add_into_output, 2127 0, 2128 lex_faces>(input, output); 2129 input += in_stride; 2130 output += out_stride; 2131 } 2132 } 2133 else if (face_direction < dim) 2134 { 2135 interpolate_generic<do_evaluate, 2136 add_into_output, 2137 std::min(face_direction + 1, dim - 1)>( 2138 n_components, 2139 input, 2140 output, 2141 do_gradients, 2142 face_no, 2143 n_points_1d, 2144 shape_data, 2145 dofs_per_component_on_cell, 2146 dofs_per_component_on_face); 2147 } 2148 } 2149 }; 2150 2151 2152 2153 // internal helper function for reading data; base version of different types 2154 template <typename VectorizedArrayType, typename Number2> 2155 void 2156 do_vectorized_read(const Number2 *src_ptr, VectorizedArrayType &dst) 2157 { 2158 for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v) 2159 dst[v] = src_ptr[v]; 2160 } 2161 2162 2163 2164 // internal helper function for reading data; specialized version where we 2165 // can use a dedicated load function 2166 template <typename Number, unsigned int width> 2167 void 2168 do_vectorized_read(const Number *src_ptr, VectorizedArray<Number, width> &dst) 2169 { 2170 dst.load(src_ptr); 2171 } 2172 2173 2174 2175 // internal helper function for reading data; base version of different types 2176 template <typename VectorizedArrayType, typename Number2> 2177 void 2178 do_vectorized_gather(const Number2 * src_ptr, 2179 const unsigned int * indices, 2180 VectorizedArrayType &dst) 2181 { 2182 for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v) 2183 dst[v] = src_ptr[indices[v]]; 2184 } 2185 2186 2187 2188 // internal helper function for reading data; specialized version where we 2189 // can use a dedicated gather function 2190 template <typename Number, unsigned int width> 2191 void 2192 do_vectorized_gather(const Number * src_ptr, 2193 const unsigned int * indices, 2194 VectorizedArray<Number, width> &dst) 2195 { 2196 dst.gather(src_ptr, indices); 2197 } 2198 2199 2200 2201 // internal helper function for reading data; base version of different types 2202 template <typename VectorizedArrayType, typename Number2> 2203 void 2204 do_vectorized_add(const VectorizedArrayType src, Number2 *dst_ptr) 2205 { 2206 for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v) 2207 dst_ptr[v] += src[v]; 2208 } 2209 2210 2211 2212 // internal helper function for reading data; specialized version where we 2213 // can use a dedicated load function 2214 template <typename Number, unsigned int width> 2215 void 2216 do_vectorized_add(const VectorizedArray<Number, width> src, Number *dst_ptr) 2217 { 2218 VectorizedArray<Number, width> tmp; 2219 tmp.load(dst_ptr); 2220 (tmp + src).store(dst_ptr); 2221 } 2222 2223 2224 2225 // internal helper function for reading data; base version of different types 2226 template <typename VectorizedArrayType, typename Number2> 2227 void 2228 do_vectorized_scatter_add(const VectorizedArrayType src, 2229 const unsigned int * indices, 2230 Number2 * dst_ptr) 2231 { 2232 for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v) 2233 dst_ptr[indices[v]] += src[v]; 2234 } 2235 2236 2237 2238 // internal helper function for reading data; specialized version where we 2239 // can use a dedicated gather function 2240 template <typename Number, unsigned int width> 2241 void 2242 do_vectorized_scatter_add(const VectorizedArray<Number, width> src, 2243 const unsigned int * indices, 2244 Number * dst_ptr) 2245 { 2246 #if DEAL_II_VECTORIZATION_WIDTH_IN_BITS < 512 2247 for (unsigned int v = 0; v < width; ++v) 2248 dst_ptr[indices[v]] += src[v]; 2249 #else 2250 VectorizedArray<Number, width> tmp; 2251 tmp.gather(dst_ptr, indices); 2252 (tmp + src).scatter(indices, dst_ptr); 2253 #endif 2254 } 2255 2256 2257 2258 template <typename Number> 2259 void 2260 adjust_for_face_orientation(const unsigned int dim, 2261 const unsigned int n_components, 2262 const unsigned int face_orientation, 2263 const Table<2, unsigned int> &orientation_map, 2264 const bool integrate, 2265 const bool values, 2266 const bool gradients, 2267 const unsigned int n_q_points, 2268 Number * tmp_values, 2269 Number * values_quad, 2270 Number * gradients_quad) 2271 { 2272 Assert(face_orientation, ExcInternalError()); 2273 const unsigned int *orientation = &orientation_map[face_orientation][0]; 2274 for (unsigned int c = 0; c < n_components; ++c) 2275 { 2276 if (values == true) 2277 { 2278 if (integrate) 2279 for (unsigned int q = 0; q < n_q_points; ++q) 2280 tmp_values[q] = values_quad[c * n_q_points + orientation[q]]; 2281 else 2282 for (unsigned int q = 0; q < n_q_points; ++q) 2283 tmp_values[orientation[q]] = values_quad[c * n_q_points + q]; 2284 for (unsigned int q = 0; q < n_q_points; ++q) 2285 values_quad[c * n_q_points + q] = tmp_values[q]; 2286 } 2287 if (gradients == true) 2288 for (unsigned int d = 0; d < dim; ++d) 2289 { 2290 if (integrate) 2291 for (unsigned int q = 0; q < n_q_points; ++q) 2292 tmp_values[q] = 2293 gradients_quad[(c * dim + d) * n_q_points + orientation[q]]; 2294 else 2295 for (unsigned int q = 0; q < n_q_points; ++q) 2296 tmp_values[orientation[q]] = 2297 gradients_quad[(c * dim + d) * n_q_points + q]; 2298 for (unsigned int q = 0; q < n_q_points; ++q) 2299 gradients_quad[(c * dim + d) * n_q_points + q] = tmp_values[q]; 2300 } 2301 } 2302 } 2303 2304 2305 2306 template <int dim, typename VectorizedArrayType> 2307 struct FEFaceEvaluationImplEvaluateSelector 2308 { 2309 template <int fe_degree, int n_q_points_1d> 2310 static bool 2311 run(const unsigned int n_components, 2312 const MatrixFreeFunctions::ShapeInfo<VectorizedArrayType> &data, 2313 const VectorizedArrayType * values_array, 2314 VectorizedArrayType * values_quad, 2315 VectorizedArrayType * gradients_quad, 2316 VectorizedArrayType * scratch_data, 2317 const bool evaluate_values, 2318 const bool evaluate_gradients, 2319 const unsigned int face_no, 2320 const unsigned int subface_index, 2321 const unsigned int face_orientation, 2322 const Table<2, unsigned int> &orientation_map) 2323 { 2324 constexpr unsigned int static_dofs_per_face = 2325 fe_degree > -1 ? Utilities::pow(fe_degree + 1, dim - 1) : 2326 numbers::invalid_unsigned_int; 2327 const unsigned int dofs_per_face = 2328 fe_degree > -1 ? 2329 static_dofs_per_face : 2330 Utilities::pow(data.data.front().fe_degree + 1, dim - 1); 2331 2332 VectorizedArrayType *temp1 = scratch_data; 2333 2334 FEFaceNormalEvaluationImpl<dim, fe_degree, VectorizedArrayType>:: 2335 template interpolate<true, false>( 2336 n_components, data, values_array, temp1, evaluate_gradients, face_no); 2337 2338 const unsigned int n_q_points_1d_actual = 2339 fe_degree > -1 ? n_q_points_1d : 0; 2340 if (fe_degree > -1 && 2341 subface_index >= GeometryInfo<dim>::max_children_per_cell && 2342 data.element_type <= MatrixFreeFunctions::tensor_symmetric) 2343 FEFaceEvaluationImpl< 2344 true, 2345 dim, 2346 fe_degree, 2347 n_q_points_1d_actual, 2348 VectorizedArrayType>::evaluate_in_face(n_components, 2349 data, 2350 temp1, 2351 values_quad, 2352 gradients_quad, 2353 scratch_data + 2 * 2354 n_components * 2355 dofs_per_face, 2356 evaluate_values, 2357 evaluate_gradients, 2358 subface_index); 2359 else 2360 FEFaceEvaluationImpl< 2361 false, 2362 dim, 2363 fe_degree, 2364 n_q_points_1d_actual, 2365 VectorizedArrayType>::evaluate_in_face(n_components, 2366 data, 2367 temp1, 2368 values_quad, 2369 gradients_quad, 2370 scratch_data + 2 * 2371 n_components * 2372 dofs_per_face, 2373 evaluate_values, 2374 evaluate_gradients, 2375 subface_index); 2376 2377 if (face_orientation) 2378 adjust_for_face_orientation(dim, 2379 n_components, 2380 face_orientation, 2381 orientation_map, 2382 false, 2383 evaluate_values, 2384 evaluate_gradients, 2385 data.n_q_points_face, 2386 scratch_data, 2387 values_quad, 2388 gradients_quad); 2389 2390 return false; 2391 } 2392 }; 2393 2394 2395 2396 template <int dim, typename VectorizedArrayType> 2397 struct FEFaceEvaluationImplIntegrateSelector 2398 { 2399 template <int fe_degree, int n_q_points_1d> 2400 static bool 2401 run(const unsigned int n_components, 2402 const MatrixFreeFunctions::ShapeInfo<VectorizedArrayType> &data, 2403 VectorizedArrayType * values_array, 2404 VectorizedArrayType * values_quad, 2405 VectorizedArrayType * gradients_quad, 2406 VectorizedArrayType * scratch_data, 2407 const bool integrate_values, 2408 const bool integrate_gradients, 2409 const unsigned int face_no, 2410 const unsigned int subface_index, 2411 const unsigned int face_orientation, 2412 const Table<2, unsigned int> &orientation_map) 2413 { 2414 if (face_orientation) 2415 adjust_for_face_orientation(dim, 2416 n_components, 2417 face_orientation, 2418 orientation_map, 2419 true, 2420 integrate_values, 2421 integrate_gradients, 2422 data.n_q_points_face, 2423 scratch_data, 2424 values_quad, 2425 gradients_quad); 2426 2427 constexpr unsigned int static_dofs_per_face = 2428 fe_degree > -1 ? Utilities::pow(fe_degree + 1, dim - 1) : 2429 numbers::invalid_unsigned_int; 2430 const unsigned int dofs_per_face = 2431 fe_degree > -1 ? 2432 static_dofs_per_face : 2433 Utilities::pow(data.data.front().fe_degree + 1, dim - 1); 2434 2435 VectorizedArrayType *temp1 = scratch_data; 2436 2437 const unsigned int n_q_points_1d_actual = 2438 fe_degree > -1 ? n_q_points_1d : 0; 2439 if (fe_degree > -1 && 2440 subface_index >= GeometryInfo<dim - 1>::max_children_per_cell && 2441 data.element_type <= MatrixFreeFunctions::tensor_symmetric) 2442 FEFaceEvaluationImpl< 2443 true, 2444 dim, 2445 fe_degree, 2446 n_q_points_1d_actual, 2447 VectorizedArrayType>::integrate_in_face(n_components, 2448 data, 2449 temp1, 2450 values_quad, 2451 gradients_quad, 2452 scratch_data + 2453 2 * n_components * 2454 dofs_per_face, 2455 integrate_values, 2456 integrate_gradients, 2457 subface_index); 2458 else 2459 FEFaceEvaluationImpl< 2460 false, 2461 dim, 2462 fe_degree, 2463 n_q_points_1d_actual, 2464 VectorizedArrayType>::integrate_in_face(n_components, 2465 data, 2466 temp1, 2467 values_quad, 2468 gradients_quad, 2469 scratch_data + 2470 2 * n_components * 2471 dofs_per_face, 2472 integrate_values, 2473 integrate_gradients, 2474 subface_index); 2475 2476 FEFaceNormalEvaluationImpl<dim, fe_degree, VectorizedArrayType>:: 2477 template interpolate<false, false>(n_components, 2478 data, 2479 temp1, 2480 values_array, 2481 integrate_gradients, 2482 face_no); 2483 return false; 2484 } 2485 }; 2486 2487 2488 2489 template <int n_face_orientations, typename Processor> 2490 static bool 2491 fe_face_evaluation_process_and_io(Processor &proc) 2492 { 2493 auto n_components = proc.n_components; 2494 auto integrate = proc.integrate; 2495 auto global_vector_ptr = proc.global_vector_ptr; 2496 auto &data = proc.data; 2497 auto &dof_info = proc.dof_info; 2498 auto values_quad = proc.values_quad; 2499 auto gradients_quad = proc.gradients_quad; 2500 auto scratch_data = proc.scratch_data; 2501 auto do_values = proc.do_values; 2502 auto do_gradients = proc.do_gradients; 2503 auto active_fe_index = proc.active_fe_index; 2504 auto first_selected_component = proc.first_selected_component; 2505 auto cells = proc.cells; 2506 auto face_nos = proc.face_nos; 2507 auto subface_index = proc.subface_index; 2508 auto dof_access_index = proc.dof_access_index; 2509 auto face_orientations = proc.face_orientations; 2510 auto &orientation_map = proc.orientation_map; 2511 2512 static const int dim = Processor::dim_; 2513 static const int fe_degree = Processor::fe_degree_; 2514 using VectorizedArrayType = typename Processor::VectorizedArrayType_; 2515 2516 using Number2_ = typename Processor::Number2_; 2517 2518 const unsigned int cell = cells[0]; 2519 2520 // In the case of integration, we do not need to reshuffle the 2521 // data at the quadrature points to adjust for the face 2522 // orientation if the shape functions are nodal at the cell 2523 // boundaries (and we only requested the integration of the 2524 // values) or Hermite shape functions are used. These cases are 2525 // handled later when the values are written back into the 2526 // glrobal vector. 2527 if (integrate && 2528 (face_orientations[0] > 0 && 2529 (subface_index < GeometryInfo<dim>::max_children_per_cell || 2530 !(((do_gradients == false && 2531 data.data.front().nodal_at_cell_boundaries == true && 2532 fe_degree > 0) || 2533 (data.element_type == 2534 MatrixFreeFunctions::tensor_symmetric_hermite && 2535 fe_degree > 1)) && 2536 (dof_info.index_storage_variants[dof_access_index][cell] == 2537 MatrixFreeFunctions::DoFInfo::IndexStorageVariants:: 2538 interleaved_contiguous || 2539 dof_info.index_storage_variants[dof_access_index][cell] == 2540 MatrixFreeFunctions::DoFInfo::IndexStorageVariants:: 2541 interleaved_contiguous_strided || 2542 dof_info.index_storage_variants[dof_access_index][cell] == 2543 MatrixFreeFunctions::DoFInfo::IndexStorageVariants:: 2544 interleaved_contiguous_mixed_strides || 2545 dof_info.index_storage_variants[dof_access_index][cell] == 2546 MatrixFreeFunctions::DoFInfo::IndexStorageVariants:: 2547 contiguous))))) 2548 { 2549 AssertDimension(face_orientations.size(), 1); 2550 adjust_for_face_orientation(dim, 2551 n_components, 2552 face_orientations[0], 2553 orientation_map, 2554 true, 2555 do_values, 2556 do_gradients, 2557 data.n_q_points_face, 2558 scratch_data, 2559 values_quad, 2560 gradients_quad); 2561 } 2562 2563 // we know that the gradient weights for the Hermite case on the 2564 // right (side==1) are the negative from the value at the left 2565 // (side==0), so we only read out one of them. 2566 VectorizedArrayType grad_weight = 2567 (data.data.front().nodal_at_cell_boundaries == true && fe_degree > 1 && 2568 data.element_type == MatrixFreeFunctions::tensor_symmetric_hermite) ? 2569 data.data.front() 2570 .shape_data_on_face[0][fe_degree + (integrate ? 2571 (2 - (face_nos[0] % 2)) : 2572 (1 + (face_nos[0] % 2)))] : 2573 VectorizedArrayType(0.0 /*dummy*/); 2574 2575 constexpr unsigned int static_dofs_per_component = 2576 fe_degree > -1 ? Utilities::pow(fe_degree + 1, dim) : 2577 numbers::invalid_unsigned_int; 2578 constexpr unsigned int static_dofs_per_face = 2579 fe_degree > -1 ? Utilities::pow(fe_degree + 1, dim - 1) : 2580 numbers::invalid_unsigned_int; 2581 const unsigned int dofs_per_face = 2582 fe_degree > -1 ? static_dofs_per_face : 2583 Utilities::pow(data.data.front().fe_degree + 1, dim - 1); 2584 2585 VectorizedArrayType *temp1 = scratch_data; 2586 2587 const unsigned int dummy = 0; 2588 2589 // re-orientation 2590 std::array<const unsigned int *, n_face_orientations> orientation = {}; 2591 2592 if (n_face_orientations == 1) 2593 orientation[0] = (data.data.front().nodal_at_cell_boundaries == true) ? 2594 &data.face_orientations[face_orientations[0]][0] : 2595 &dummy; 2596 else 2597 { 2598 for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v) 2599 { 2600 // the loop breaks once an invalid_unsigned_int is hit for 2601 // all cases except the exterior faces in the ECL loop (where 2602 // some faces might be at the boundaries but others not) 2603 if (cells[v] == numbers::invalid_unsigned_int) 2604 continue; 2605 2606 orientation[v] = 2607 (data.data.front().nodal_at_cell_boundaries == true) ? 2608 &data.face_orientations[face_orientations[v]][0] : 2609 &dummy; 2610 } 2611 } 2612 2613 // face_to_cell_index_hermite 2614 std::array<const unsigned int *, n_face_orientations> index_array_hermite = 2615 {}; 2616 2617 if (n_face_orientations == 1) 2618 index_array_hermite[0] = 2619 (data.data.front().nodal_at_cell_boundaries == true && fe_degree > 1 && 2620 data.element_type == MatrixFreeFunctions::tensor_symmetric_hermite) ? 2621 &data.face_to_cell_index_hermite(face_nos[0], 0) : 2622 &dummy; 2623 2624 if (n_face_orientations > 1 && 2625 data.data.front().nodal_at_cell_boundaries == true && fe_degree > 1 && 2626 data.element_type == MatrixFreeFunctions::tensor_symmetric_hermite) 2627 { 2628 for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v) 2629 { 2630 if (cells[v] == numbers::invalid_unsigned_int) 2631 continue; 2632 2633 grad_weight[v] = 2634 data.data.front().shape_data_on_face 2635 [0][fe_degree + (integrate ? (2 - (face_nos[v] % 2)) : 2636 (1 + (face_nos[v] % 2)))][v]; 2637 2638 index_array_hermite[v] = 2639 &data.face_to_cell_index_hermite(face_nos[v], 0); 2640 } 2641 } 2642 2643 // face_to_cell_index_nodal 2644 std::array<const unsigned int *, n_face_orientations> index_array_nodal = 2645 {}; 2646 2647 if (n_face_orientations == 1) 2648 index_array_nodal[0] = 2649 (data.data.front().nodal_at_cell_boundaries == true) ? 2650 &data.face_to_cell_index_nodal(face_nos[0], 0) : 2651 &dummy; 2652 2653 if (n_face_orientations > 1 && 2654 (data.data.front().nodal_at_cell_boundaries == true)) 2655 { 2656 for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v) 2657 { 2658 if (cells[v] == numbers::invalid_unsigned_int) 2659 continue; 2660 2661 index_array_nodal[v] = 2662 &data.face_to_cell_index_nodal(face_nos[v], 0); 2663 } 2664 } 2665 2666 const auto reorientate = [&](const unsigned int v, const unsigned int i) { 2667 return (dim < 3 || 2668 face_orientations[n_face_orientations == 1 ? 0 : v] == 0 || 2669 subface_index < GeometryInfo<dim>::max_children_per_cell) ? 2670 i : 2671 orientation[v][i]; 2672 }; 2673 2674 // this variable keeps track of whether we are able to directly write 2675 // the results into the result (function returns true) or not, requiring 2676 // an additional call to another function 2677 bool accesses_global_vector = true; 2678 2679 for (unsigned int comp = 0; comp < n_components; ++comp) 2680 { 2681 if (integrate) 2682 proc.function_0(temp1, comp); 2683 2684 // we can only use the fast functions if we know the polynomial degree 2685 // as a template parameter (fe_degree != -1), and it only makes sense 2686 // to use the functions for at least linear functions for values on 2687 // the faces and quadratic functions for gradients on the faces, so 2688 // include the switch here 2689 if ((do_gradients == false && 2690 data.data.front().nodal_at_cell_boundaries == true && 2691 fe_degree > 0) || 2692 (data.element_type == 2693 MatrixFreeFunctions::tensor_symmetric_hermite && 2694 fe_degree > 1)) 2695 { 2696 // case 1: contiguous and interleaved indices 2697 if (n_face_orientations == 1 && 2698 dof_info.index_storage_variants[dof_access_index][cell] == 2699 MatrixFreeFunctions::DoFInfo::IndexStorageVariants:: 2700 interleaved_contiguous) 2701 { 2702 AssertDimension(n_face_orientations, 1); 2703 2704 AssertDimension( 2705 dof_info.n_vectorization_lanes_filled[dof_access_index][cell], 2706 VectorizedArrayType::size()); 2707 Number2_ *vector_ptr = 2708 global_vector_ptr + 2709 dof_info.dof_indices_contiguous[dof_access_index] 2710 [cell * 2711 VectorizedArrayType::size()] + 2712 (dof_info 2713 .component_dof_indices_offset[active_fe_index] 2714 [first_selected_component] + 2715 comp * static_dofs_per_component) * 2716 VectorizedArrayType::size(); 2717 2718 if (fe_degree > 1 && do_gradients == true) 2719 { 2720 for (unsigned int i = 0; i < dofs_per_face; ++i) 2721 { 2722 if (n_face_orientations == 1) 2723 { 2724 const unsigned int ind1 = 2725 index_array_hermite[0][2 * i]; 2726 const unsigned int ind2 = 2727 index_array_hermite[0][2 * i + 1]; 2728 AssertIndexRange(ind1, 2729 data.dofs_per_component_on_cell); 2730 AssertIndexRange(ind2, 2731 data.dofs_per_component_on_cell); 2732 const unsigned int i_ = reorientate(0, i); 2733 proc.function_1a( 2734 temp1[i_], 2735 temp1[i_ + dofs_per_face], 2736 vector_ptr + ind1 * VectorizedArrayType::size(), 2737 vector_ptr + ind2 * VectorizedArrayType::size(), 2738 grad_weight); 2739 } 2740 else 2741 { 2742 Assert(false, ExcNotImplemented()); 2743 } 2744 } 2745 } 2746 else 2747 { 2748 for (unsigned int i = 0; i < dofs_per_face; ++i) 2749 { 2750 if (n_face_orientations == 1) 2751 { 2752 const unsigned int i_ = reorientate(0, i); 2753 const unsigned int ind = index_array_nodal[0][i]; 2754 proc.function_1b(temp1[i_], 2755 vector_ptr + 2756 ind * 2757 VectorizedArrayType::size()); 2758 } 2759 else 2760 { 2761 Assert(false, ExcNotImplemented()); 2762 } 2763 } 2764 } 2765 } 2766 2767 // case 2: contiguous and interleaved indices with fixed stride 2768 else if (n_face_orientations == 1 && 2769 dof_info.index_storage_variants[dof_access_index][cell] == 2770 MatrixFreeFunctions::DoFInfo::IndexStorageVariants:: 2771 interleaved_contiguous_strided) 2772 { 2773 AssertDimension(n_face_orientations, 1); 2774 2775 AssertDimension( 2776 dof_info.n_vectorization_lanes_filled[dof_access_index][cell], 2777 VectorizedArrayType::size()); 2778 const unsigned int *indices = 2779 &dof_info.dof_indices_contiguous[dof_access_index] 2780 [cell * 2781 VectorizedArrayType::size()]; 2782 Number2_ *vector_ptr = 2783 global_vector_ptr + 2784 (comp * static_dofs_per_component + 2785 dof_info 2786 .component_dof_indices_offset[active_fe_index] 2787 [first_selected_component]) * 2788 VectorizedArrayType::size(); 2789 if (fe_degree > 1 && do_gradients == true) 2790 { 2791 for (unsigned int i = 0; i < dofs_per_face; ++i) 2792 { 2793 if (n_face_orientations == 1) 2794 { 2795 const unsigned int i_ = reorientate(0, i); 2796 const unsigned int ind1 = 2797 index_array_hermite[0][2 * i] * 2798 VectorizedArrayType::size(); 2799 const unsigned int ind2 = 2800 index_array_hermite[0][2 * i + 1] * 2801 VectorizedArrayType::size(); 2802 proc.function_2a(temp1[i_], 2803 temp1[i_ + dofs_per_face], 2804 vector_ptr + ind1, 2805 vector_ptr + ind2, 2806 grad_weight, 2807 indices, 2808 indices); 2809 } 2810 else 2811 { 2812 Assert(false, ExcNotImplemented()); 2813 } 2814 } 2815 } 2816 else 2817 { 2818 for (unsigned int i = 0; i < dofs_per_face; ++i) 2819 { 2820 if (n_face_orientations == 1) 2821 { 2822 const unsigned int i_ = reorientate(0, i); 2823 const unsigned int ind = 2824 index_array_nodal[0][i] * 2825 VectorizedArrayType::size(); 2826 proc.function_2b(temp1[i_], 2827 vector_ptr + ind, 2828 indices); 2829 } 2830 else 2831 { 2832 Assert(false, ExcNotImplemented()); 2833 } 2834 } 2835 } 2836 } 2837 2838 // case 3: contiguous and interleaved indices with mixed stride 2839 else if (n_face_orientations == 1 && 2840 dof_info.index_storage_variants[dof_access_index][cell] == 2841 MatrixFreeFunctions::DoFInfo::IndexStorageVariants:: 2842 interleaved_contiguous_mixed_strides) 2843 { 2844 AssertDimension(n_face_orientations, 1); 2845 2846 const unsigned int *strides = 2847 &dof_info.dof_indices_interleave_strides 2848 [dof_access_index][cell * VectorizedArrayType::size()]; 2849 unsigned int indices[VectorizedArrayType::size()]; 2850 for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v) 2851 indices[v] = 2852 dof_info.dof_indices_contiguous 2853 [dof_access_index] 2854 [cell * VectorizedArrayType::size() + v] + 2855 (dof_info 2856 .component_dof_indices_offset[active_fe_index] 2857 [first_selected_component] + 2858 comp * static_dofs_per_component) * 2859 strides[v]; 2860 const unsigned int n_filled_lanes = 2861 dof_info.n_vectorization_lanes_filled[dof_access_index][cell]; 2862 2863 if (fe_degree > 1 && do_gradients == true) 2864 { 2865 if (n_filled_lanes == VectorizedArrayType::size()) 2866 for (unsigned int i = 0; i < dofs_per_face; ++i) 2867 { 2868 if (n_face_orientations == 1) 2869 { 2870 const unsigned int i_ = reorientate(0, i); 2871 unsigned int ind1[VectorizedArrayType::size()]; 2872 DEAL_II_OPENMP_SIMD_PRAGMA 2873 for (unsigned int v = 0; 2874 v < VectorizedArrayType::size(); 2875 ++v) 2876 ind1[v] = 2877 indices[v] + 2878 index_array_hermite[0 /*TODO*/][2 * i] * 2879 strides[v]; 2880 unsigned int ind2[VectorizedArrayType::size()]; 2881 DEAL_II_OPENMP_SIMD_PRAGMA 2882 for (unsigned int v = 0; 2883 v < VectorizedArrayType::size(); 2884 ++v) 2885 ind2[v] = 2886 indices[v] + 2887 index_array_hermite[0 /*TODO*/][2 * i + 1] * 2888 strides[v]; 2889 proc.function_2a(temp1[i_], 2890 temp1[i_ + dofs_per_face], 2891 global_vector_ptr, 2892 global_vector_ptr, 2893 grad_weight, 2894 ind1, 2895 ind2); 2896 } 2897 else 2898 { 2899 Assert(false, ExcNotImplemented()); 2900 } 2901 } 2902 else 2903 { 2904 if (integrate == false) 2905 for (unsigned int i = 0; i < 2 * dofs_per_face; ++i) 2906 temp1[i] = VectorizedArrayType(); 2907 2908 for (unsigned int v = 0; v < n_filled_lanes; ++v) 2909 for (unsigned int i = 0; i < dofs_per_face; ++i) 2910 { 2911 const unsigned int i_ = 2912 reorientate(n_face_orientations == 1 ? 0 : v, 2913 i); 2914 proc.function_3a( 2915 temp1[i_][v], 2916 temp1[i_ + dofs_per_face][v], 2917 global_vector_ptr 2918 [indices[v] + 2919 index_array_hermite 2920 [n_face_orientations == 1 ? 0 : v] 2921 [2 * i] * 2922 strides[v]], 2923 global_vector_ptr 2924 [indices[v] + 2925 index_array_hermite 2926 [n_face_orientations == 1 ? 0 : v] 2927 [2 * i + 1] * 2928 strides[v]], 2929 grad_weight[n_face_orientations == 1 ? 0 : v]); 2930 } 2931 } 2932 } 2933 else 2934 { 2935 if (n_filled_lanes == VectorizedArrayType::size()) 2936 for (unsigned int i = 0; i < dofs_per_face; ++i) 2937 { 2938 if (n_face_orientations == 1) 2939 { 2940 unsigned int ind[VectorizedArrayType::size()]; 2941 DEAL_II_OPENMP_SIMD_PRAGMA 2942 for (unsigned int v = 0; 2943 v < VectorizedArrayType::size(); 2944 ++v) 2945 ind[v] = indices[v] + 2946 index_array_nodal[0][i] * strides[v]; 2947 const unsigned int i_ = reorientate(0, i); 2948 proc.function_2b(temp1[i_], 2949 global_vector_ptr, 2950 ind); 2951 } 2952 else 2953 { 2954 Assert(false, ExcNotImplemented()); 2955 } 2956 } 2957 else 2958 { 2959 if (integrate == false) 2960 for (unsigned int i = 0; i < dofs_per_face; ++i) 2961 temp1[i] = VectorizedArrayType(); 2962 2963 for (unsigned int v = 0; v < n_filled_lanes; ++v) 2964 for (unsigned int i = 0; i < dofs_per_face; ++i) 2965 proc.function_3b( 2966 temp1[reorientate( 2967 n_face_orientations == 1 ? 0 : v, i)][v], 2968 global_vector_ptr 2969 [indices[v] + 2970 index_array_nodal 2971 [n_face_orientations == 1 ? 0 : v][i] * 2972 strides[v]]); 2973 } 2974 } 2975 } 2976 2977 // case 4: contiguous indices without interleaving 2978 else if (n_face_orientations > 1 || 2979 dof_info.index_storage_variants[dof_access_index][cell] == 2980 MatrixFreeFunctions::DoFInfo::IndexStorageVariants:: 2981 contiguous) 2982 { 2983 const unsigned int *indices = 2984 &dof_info.dof_indices_contiguous[dof_access_index] 2985 [cell * 2986 VectorizedArrayType::size()]; 2987 Number2_ *vector_ptr = 2988 global_vector_ptr + comp * static_dofs_per_component + 2989 dof_info 2990 .component_dof_indices_offset[active_fe_index] 2991 [first_selected_component]; 2992 2993 if (do_gradients == true && 2994 data.element_type == 2995 MatrixFreeFunctions::tensor_symmetric_hermite) 2996 { 2997 if (n_face_orientations == 1 && 2998 dof_info.n_vectorization_lanes_filled[dof_access_index] 2999 [cell] == 3000 VectorizedArrayType::size()) 3001 for (unsigned int i = 0; i < dofs_per_face; ++i) 3002 { 3003 const unsigned int ind1 = 3004 index_array_hermite[0][2 * i]; 3005 const unsigned int ind2 = 3006 index_array_hermite[0][2 * i + 1]; 3007 const unsigned int i_ = reorientate(0, i); 3008 3009 proc.function_2a(temp1[i_], 3010 temp1[i_ + dofs_per_face], 3011 vector_ptr + ind1, 3012 vector_ptr + ind2, 3013 grad_weight, 3014 indices, 3015 indices); 3016 } 3017 else if (n_face_orientations == 1) 3018 for (unsigned int i = 0; i < dofs_per_face; ++i) 3019 { 3020 const unsigned int ind1 = 3021 index_array_hermite[0][2 * i]; 3022 const unsigned int ind2 = 3023 index_array_hermite[0][2 * i + 1]; 3024 const unsigned int i_ = reorientate(0, i); 3025 3026 const unsigned int n_filled_lanes = 3027 dof_info 3028 .n_vectorization_lanes_filled[dof_access_index] 3029 [cell]; 3030 3031 for (unsigned int v = 0; v < n_filled_lanes; ++v) 3032 proc.function_3a(temp1[i_][v], 3033 temp1[i_ + dofs_per_face][v], 3034 vector_ptr[ind1 + indices[v]], 3035 vector_ptr[ind2 + indices[v]], 3036 grad_weight[v]); 3037 3038 if (integrate == false) 3039 for (unsigned int v = n_filled_lanes; 3040 v < VectorizedArrayType::size(); 3041 ++v) 3042 { 3043 temp1[i_][v] = 0.0; 3044 temp1[i_ + dofs_per_face][v] = 0.0; 3045 } 3046 } 3047 else 3048 { 3049 Assert(false, ExcNotImplemented()); 3050 3051 const unsigned int n_filled_lanes = 3052 dof_info 3053 .n_vectorization_lanes_filled[dof_access_index] 3054 [cell]; 3055 3056 for (unsigned int v = 0; v < n_filled_lanes; ++v) 3057 for (unsigned int i = 0; i < dofs_per_face; ++i) 3058 proc.function_3a( 3059 temp1[reorientate(v, i)][v], 3060 temp1[reorientate(v, i) + dofs_per_face][v], 3061 vector_ptr[index_array_hermite[v][2 * i] + 3062 indices[v]], 3063 vector_ptr[index_array_hermite[v][2 * i + 1] + 3064 indices[v]], 3065 grad_weight[v]); 3066 } 3067 } 3068 else 3069 { 3070 if (n_face_orientations == 1 && 3071 dof_info.n_vectorization_lanes_filled[dof_access_index] 3072 [cell] == 3073 VectorizedArrayType::size()) 3074 for (unsigned int i = 0; i < dofs_per_face; ++i) 3075 { 3076 const unsigned int ind = index_array_nodal[0][i]; 3077 const unsigned int i_ = reorientate(0, i); 3078 3079 proc.function_2b(temp1[i_], 3080 vector_ptr + ind, 3081 indices); 3082 } 3083 else if (n_face_orientations == 1) 3084 for (unsigned int i = 0; i < dofs_per_face; ++i) 3085 { 3086 const unsigned int ind = index_array_nodal[0][i]; 3087 const unsigned int i_ = reorientate(0, i); 3088 3089 const unsigned int n_filled_lanes = 3090 dof_info 3091 .n_vectorization_lanes_filled[dof_access_index] 3092 [cell]; 3093 3094 for (unsigned int v = 0; v < n_filled_lanes; ++v) 3095 proc.function_3b(temp1[i_][v], 3096 vector_ptr[ind + indices[v]]); 3097 3098 if (integrate == false) 3099 for (unsigned int v = n_filled_lanes; 3100 v < VectorizedArrayType::size(); 3101 ++v) 3102 temp1[i_][v] = 0.0; 3103 } 3104 else 3105 for (unsigned int i = 0; i < dofs_per_face; ++i) 3106 { 3107 for (unsigned int v = 0; 3108 v < VectorizedArrayType::size(); 3109 ++v) 3110 if (cells[v] != numbers::invalid_unsigned_int) 3111 proc.function_3b( 3112 temp1[reorientate(v, i)][v], 3113 vector_ptr[index_array_nodal[v][i] + 3114 dof_info.dof_indices_contiguous 3115 [dof_access_index][cells[v]]]); 3116 } 3117 } 3118 } 3119 else 3120 { 3121 // case 5: default vector access 3122 AssertDimension(n_face_orientations, 1); 3123 3124 // for the integrate_scatter path (integrate == true), we 3125 // need to only prepare the data in this function for all 3126 // components to later call distribute_local_to_global(); 3127 // for the gather_evaluate path (integrate == false), we 3128 // instead want to leave early because we need to get the 3129 // vector data from somewhere else 3130 proc.function_5(temp1, comp); 3131 if (integrate) 3132 accesses_global_vector = false; 3133 else 3134 return false; 3135 } 3136 } 3137 else 3138 { 3139 // case 5: default vector access 3140 AssertDimension(n_face_orientations, 1); 3141 3142 proc.function_5(temp1, comp); 3143 if (integrate) 3144 accesses_global_vector = false; 3145 else 3146 return false; 3147 } 3148 3149 if (!integrate) 3150 proc.function_0(temp1, comp); 3151 } 3152 3153 if (!integrate && 3154 (face_orientations[0] > 0 && 3155 subface_index < GeometryInfo<dim>::max_children_per_cell)) 3156 { 3157 AssertDimension(n_face_orientations, 1); 3158 adjust_for_face_orientation(dim, 3159 n_components, 3160 face_orientations[0], 3161 orientation_map, 3162 false, 3163 do_values, 3164 do_gradients, 3165 data.n_q_points_face, 3166 scratch_data, 3167 values_quad, 3168 gradients_quad); 3169 } 3170 3171 return accesses_global_vector; 3172 } 3173 3174 3175 3176 template <int dim, 3177 typename Number, 3178 typename VectorizedArrayType, 3179 typename Number2 = Number> 3180 struct FEFaceEvaluationImplGatherEvaluateSelector 3181 { 3182 template <int fe_degree, int n_q_points_1d> 3183 static bool 3184 run(const unsigned int n_components, 3185 const unsigned int n_face_orientations, 3186 const Number2 * src_ptr, 3187 const MatrixFreeFunctions::ShapeInfo<VectorizedArrayType> &data, 3188 const MatrixFreeFunctions::DoFInfo & dof_info, 3189 VectorizedArrayType * values_quad, 3190 VectorizedArrayType *gradients_quad, 3191 VectorizedArrayType *scratch_data, 3192 const bool evaluate_values, 3193 const bool evaluate_gradients, 3194 const unsigned int active_fe_index, 3195 const unsigned int first_selected_component, 3196 const std::array<unsigned int, VectorizedArrayType::size()> cells, 3197 const std::array<unsigned int, VectorizedArrayType::size()> face_nos, 3198 const unsigned int subface_index, 3199 const MatrixFreeFunctions::DoFInfo::DoFAccessIndex dof_access_index, 3200 const std::array<unsigned int, VectorizedArrayType::size()> 3201 face_orientations, 3202 const Table<2, unsigned int> &orientation_map) 3203 { 3204 if (src_ptr == nullptr) 3205 { 3206 return false; 3207 } 3208 3209 Processor<fe_degree, n_q_points_1d> p(n_components, 3210 false, 3211 src_ptr, 3212 data, 3213 dof_info, 3214 values_quad, 3215 gradients_quad, 3216 scratch_data, 3217 evaluate_values, 3218 evaluate_gradients, 3219 active_fe_index, 3220 first_selected_component, 3221 cells, 3222 face_nos, 3223 subface_index, 3224 dof_access_index, 3225 face_orientations, 3226 orientation_map); 3227 3228 if (n_face_orientations == VectorizedArrayType::size()) 3229 return fe_face_evaluation_process_and_io<VectorizedArrayType::size()>( 3230 p); 3231 else 3232 return fe_face_evaluation_process_and_io<1>(p); 3233 } 3234 3235 private: 3236 template <int fe_degree, int n_q_points_1d> 3237 struct Processor 3238 { 3239 static const int dim_ = dim; 3240 static const int fe_degree_ = fe_degree; 3241 static const int n_q_points_1d_ = n_q_points_1d; 3242 using VectorizedArrayType_ = VectorizedArrayType; 3243 using Number2_ = const Number2; 3244 3245 Processor( 3246 const unsigned int n_components, 3247 const bool integrate, 3248 const Number2 * global_vector_ptr, 3249 const MatrixFreeFunctions::ShapeInfo<VectorizedArrayType> &data, 3250 const MatrixFreeFunctions::DoFInfo & dof_info, 3251 VectorizedArrayType * values_quad, 3252 VectorizedArrayType *gradients_quad, 3253 VectorizedArrayType *scratch_data, 3254 const bool do_values, 3255 const bool do_gradients, 3256 const unsigned int active_fe_index, 3257 const unsigned int first_selected_component, 3258 const std::array<unsigned int, VectorizedArrayType::size()> cells, 3259 const std::array<unsigned int, VectorizedArrayType::size()> face_nos, 3260 const unsigned int subface_index, 3261 const MatrixFreeFunctions::DoFInfo::DoFAccessIndex dof_access_index, 3262 const std::array<unsigned int, VectorizedArrayType::size()> 3263 face_orientations, 3264 const Table<2, unsigned int> &orientation_map) 3265 : n_components(n_components) 3266 , integrate(integrate) 3267 , global_vector_ptr(global_vector_ptr) 3268 , data(data) 3269 , dof_info(dof_info) 3270 , values_quad(values_quad) 3271 , gradients_quad(gradients_quad) 3272 , scratch_data(scratch_data) 3273 , do_values(do_values) 3274 , do_gradients(do_gradients) 3275 , active_fe_index(active_fe_index) 3276 , first_selected_component(first_selected_component) 3277 , cells(cells) 3278 , face_nos(face_nos) 3279 , subface_index(subface_index) 3280 , dof_access_index(dof_access_index) 3281 , face_orientations(face_orientations) 3282 , orientation_map(orientation_map) 3283 {} 3284 3285 template <typename T0, typename T1, typename T2> 3286 void 3287 function_1a(T0 & temp_1, 3288 T0 & temp_2, 3289 const T1 src_ptr_1, 3290 const T1 src_ptr_2, 3291 const T2 &grad_weight) 3292 { 3293 // case 1a) 3294 do_vectorized_read(src_ptr_1, temp_1); 3295 do_vectorized_read(src_ptr_2, temp_2); 3296 temp_2 = grad_weight * (temp_1 - temp_2); 3297 } 3298 3299 template <typename T1, typename T2> 3300 void 3301 function_1b(T1 &temp, const T2 src_ptr) 3302 { 3303 // case 1b) 3304 do_vectorized_read(src_ptr, temp); 3305 } 3306 3307 template <typename T0, typename T1, typename T2, typename T3> 3308 void 3309 function_2a(T0 & temp_1, 3310 T0 & temp_2, 3311 const T1 src_ptr_1, 3312 const T1 src_ptr_2, 3313 const T2 &grad_weight, 3314 const T3 &indices_1, 3315 const T3 &indices_2) 3316 { 3317 // case 2a) 3318 do_vectorized_gather(src_ptr_1, indices_1, temp_1); 3319 do_vectorized_gather(src_ptr_2, indices_2, temp_2); 3320 temp_2 = grad_weight * (temp_1 - temp_2); 3321 } 3322 3323 template <typename T0, typename T1, typename T2> 3324 void 3325 function_2b(T0 &temp, const T1 src_ptr, const T2 &indices) 3326 { 3327 // case 2b) 3328 do_vectorized_gather(src_ptr, indices, temp); 3329 } 3330 3331 template <typename T0, typename T1, typename T2> 3332 void 3333 function_3a(T0 & temp_1, 3334 T0 & temp_2, 3335 const T1 &src_ptr_1, 3336 const T2 &src_ptr_2, 3337 const T2 &grad_weight) 3338 { 3339 // case 3a) 3340 temp_1 = src_ptr_1; 3341 temp_2 = grad_weight * (temp_1 - src_ptr_2); 3342 } 3343 3344 template <typename T1, typename T2> 3345 void 3346 function_3b(T1 &temp, const T2 &src_ptr) 3347 { 3348 // case 3b) 3349 temp = src_ptr; 3350 } 3351 3352 template <typename T1> 3353 void 3354 function_5(const T1 &, const unsigned int) 3355 { 3356 // case 5) 3357 } 3358 3359 template <typename T1> 3360 void 3361 function_0(T1 &temp1, const unsigned int comp) 3362 { 3363 const unsigned int dofs_per_face = 3364 fe_degree > -1 ? 3365 Utilities::pow(fe_degree + 1, dim - 1) : 3366 Utilities::pow(data.data.front().fe_degree + 1, dim - 1); 3367 const unsigned int n_q_points = 3368 fe_degree > -1 ? Utilities::pow(n_q_points_1d, dim - 1) : 3369 data.n_q_points_face; 3370 if (fe_degree > -1 && 3371 subface_index >= GeometryInfo<dim>::max_children_per_cell && 3372 data.element_type <= MatrixFreeFunctions::tensor_symmetric) 3373 FEFaceEvaluationImpl<true, 3374 dim, 3375 fe_degree, 3376 n_q_points_1d, 3377 VectorizedArrayType>:: 3378 evaluate_in_face(/* n_components */ 1, 3379 data, 3380 temp1, 3381 values_quad + comp * n_q_points, 3382 gradients_quad + comp * dim * n_q_points, 3383 scratch_data + 2 * dofs_per_face, 3384 do_values, 3385 do_gradients, 3386 subface_index); 3387 else 3388 FEFaceEvaluationImpl<false, 3389 dim, 3390 fe_degree, 3391 n_q_points_1d, 3392 VectorizedArrayType>:: 3393 evaluate_in_face(/* n_components */ 1, 3394 data, 3395 temp1, 3396 values_quad + comp * n_q_points, 3397 gradients_quad + comp * dim * n_q_points, 3398 scratch_data + 2 * dofs_per_face, 3399 do_values, 3400 do_gradients, 3401 subface_index); 3402 } 3403 3404 const unsigned int n_components; 3405 const bool integrate; 3406 const Number2 * global_vector_ptr; 3407 const MatrixFreeFunctions::ShapeInfo<VectorizedArrayType> &data; 3408 const MatrixFreeFunctions::DoFInfo & dof_info; 3409 VectorizedArrayType * values_quad; 3410 VectorizedArrayType * gradients_quad; 3411 VectorizedArrayType * scratch_data; 3412 const bool do_values; 3413 const bool do_gradients; 3414 const unsigned int active_fe_index; 3415 const unsigned int first_selected_component; 3416 const std::array<unsigned int, VectorizedArrayType::size()> cells; 3417 const std::array<unsigned int, VectorizedArrayType::size()> face_nos; 3418 const unsigned int subface_index; 3419 const MatrixFreeFunctions::DoFInfo::DoFAccessIndex dof_access_index; 3420 const std::array<unsigned int, VectorizedArrayType::size()> 3421 face_orientations; 3422 const Table<2, unsigned int> &orientation_map; 3423 }; 3424 }; 3425 3426 template <int dim, 3427 typename Number, 3428 typename VectorizedArrayType, 3429 typename Number2 = Number> 3430 struct FEFaceEvaluationImplIntegrateScatterSelector 3431 { 3432 template <int fe_degree, int n_q_points_1d> 3433 static bool 3434 run(const unsigned int n_components, 3435 const unsigned int n_face_orientations, 3436 Number2 * dst_ptr, 3437 const MatrixFreeFunctions::ShapeInfo<VectorizedArrayType> &data, 3438 const MatrixFreeFunctions::DoFInfo & dof_info, 3439 VectorizedArrayType * values_array, 3440 VectorizedArrayType * values_quad, 3441 VectorizedArrayType *gradients_quad, 3442 VectorizedArrayType *scratch_data, 3443 const bool integrate_values, 3444 const bool integrate_gradients, 3445 const unsigned int active_fe_index, 3446 const unsigned int first_selected_component, 3447 const std::array<unsigned int, VectorizedArrayType::size()> cells, 3448 const std::array<unsigned int, VectorizedArrayType::size()> face_nos, 3449 const unsigned int subface_index, 3450 const MatrixFreeFunctions::DoFInfo::DoFAccessIndex dof_access_index, 3451 const std::array<unsigned int, VectorizedArrayType::size()> 3452 face_orientations, 3453 const Table<2, unsigned int> &orientation_map) 3454 { 3455 if (dst_ptr == nullptr) 3456 { 3457 AssertDimension(n_face_orientations, 1); 3458 AssertDimension(n_face_orientations, 1); 3459 3460 // for block vectors simply integrate 3461 FEFaceEvaluationImplIntegrateSelector<dim, VectorizedArrayType>:: 3462 template run<fe_degree, n_q_points_1d>(n_components, 3463 data, 3464 values_array, 3465 values_quad, 3466 gradients_quad, 3467 scratch_data, 3468 integrate_values, 3469 integrate_gradients, 3470 face_nos[0], 3471 subface_index, 3472 face_orientations[0], 3473 orientation_map); 3474 3475 // default vector access 3476 return false; 3477 } 3478 3479 3480 Processor<fe_degree, n_q_points_1d> p(values_array, 3481 n_components, 3482 true, 3483 dst_ptr, 3484 data, 3485 dof_info, 3486 values_quad, 3487 gradients_quad, 3488 scratch_data, 3489 integrate_values, 3490 integrate_gradients, 3491 active_fe_index, 3492 first_selected_component, 3493 cells, 3494 face_nos, 3495 subface_index, 3496 dof_access_index, 3497 face_orientations, 3498 orientation_map); 3499 3500 if (n_face_orientations == VectorizedArrayType::size()) 3501 return fe_face_evaluation_process_and_io<VectorizedArrayType::size()>( 3502 p); 3503 else 3504 return fe_face_evaluation_process_and_io<1>(p); 3505 } 3506 3507 private: 3508 template <int fe_degree, int n_q_points_1d> 3509 struct Processor 3510 { 3511 static const int dim_ = dim; 3512 static const int fe_degree_ = fe_degree; 3513 static const int n_q_points_1d_ = n_q_points_1d; 3514 using VectorizedArrayType_ = VectorizedArrayType; 3515 using Number2_ = Number2; 3516 3517 3518 Processor( 3519 VectorizedArrayType *values_array, 3520 const unsigned int n_components, 3521 const bool integrate, 3522 Number2 * global_vector_ptr, 3523 const MatrixFreeFunctions::ShapeInfo<VectorizedArrayType> &data, 3524 const MatrixFreeFunctions::DoFInfo & dof_info, 3525 VectorizedArrayType * values_quad, 3526 VectorizedArrayType *gradients_quad, 3527 VectorizedArrayType *scratch_data, 3528 const bool do_values, 3529 const bool do_gradients, 3530 const unsigned int active_fe_index, 3531 const unsigned int first_selected_component, 3532 const std::array<unsigned int, VectorizedArrayType::size()> cells, 3533 const std::array<unsigned int, VectorizedArrayType::size()> face_nos, 3534 const unsigned int subface_index, 3535 const MatrixFreeFunctions::DoFInfo::DoFAccessIndex dof_access_index, 3536 const std::array<unsigned int, VectorizedArrayType::size()> 3537 face_orientations, 3538 const Table<2, unsigned int> &orientation_map) 3539 : values_array(values_array) 3540 , n_components(n_components) 3541 , integrate(integrate) 3542 , global_vector_ptr(global_vector_ptr) 3543 , data(data) 3544 , dof_info(dof_info) 3545 , values_quad(values_quad) 3546 , gradients_quad(gradients_quad) 3547 , scratch_data(scratch_data) 3548 , do_values(do_values) 3549 , do_gradients(do_gradients) 3550 , active_fe_index(active_fe_index) 3551 , first_selected_component(first_selected_component) 3552 , cells(cells) 3553 , face_nos(face_nos) 3554 , subface_index(subface_index) 3555 , dof_access_index(dof_access_index) 3556 , face_orientations(face_orientations) 3557 , orientation_map(orientation_map) 3558 {} 3559 3560 template <typename T0, typename T1, typename T2, typename T3, typename T4> 3561 void 3562 function_1a(const T0 &temp_1, 3563 const T1 &temp_2, 3564 T2 dst_ptr_1, 3565 T3 dst_ptr_2, 3566 const T4 &grad_weight) 3567 { 3568 // case 1a) 3569 const VectorizedArrayType val = temp_1 - grad_weight * temp_2; 3570 const VectorizedArrayType grad = grad_weight * temp_2; 3571 do_vectorized_add(val, dst_ptr_1); 3572 do_vectorized_add(grad, dst_ptr_2); 3573 } 3574 3575 template <typename T0, typename T1> 3576 void 3577 function_1b(const T0 &temp, T1 dst_ptr) 3578 { 3579 // case 1b) 3580 do_vectorized_add(temp, dst_ptr); 3581 } 3582 3583 template <typename T0, typename T1, typename T2, typename T3> 3584 void 3585 function_2a(const T0 &temp_1, 3586 const T0 &temp_2, 3587 T1 dst_ptr_1, 3588 T1 dst_ptr_2, 3589 const T2 &grad_weight, 3590 const T3 &indices_1, 3591 const T3 &indices_2) 3592 { 3593 // case 2a) 3594 const VectorizedArrayType val = temp_1 - grad_weight * temp_2; 3595 const VectorizedArrayType grad = grad_weight * temp_2; 3596 do_vectorized_scatter_add(val, indices_1, dst_ptr_1); 3597 do_vectorized_scatter_add(grad, indices_2, dst_ptr_2); 3598 } 3599 3600 template <typename T0, typename T1, typename T2> 3601 void 3602 function_2b(const T0 &temp, T1 dst_ptr, const T2 &indices) 3603 { 3604 // case 2b) 3605 do_vectorized_scatter_add(temp, indices, dst_ptr); 3606 } 3607 3608 template <typename T0, typename T1, typename T2> 3609 void 3610 function_3a(const T0 &temp_1, 3611 const T0 &temp_2, 3612 T1 & dst_ptr_1, 3613 T1 & dst_ptr_2, 3614 const T2 &grad_weight) 3615 { 3616 // case 3a) 3617 const Number val = temp_1 - grad_weight * temp_2; 3618 const Number grad = grad_weight * temp_2; 3619 dst_ptr_1 += val; 3620 dst_ptr_2 += grad; 3621 } 3622 3623 template <typename T0, typename T1> 3624 void 3625 function_3b(const T0 &temp, T1 &dst_ptr) 3626 { 3627 // case 3b) 3628 dst_ptr += temp; 3629 } 3630 3631 template <typename T0> 3632 void 3633 function_5(const T0 &temp1, const unsigned int comp) 3634 { 3635 // case 5: default vector access, must be handled separately, just do 3636 // the face-normal interpolation 3637 3638 FEFaceNormalEvaluationImpl<dim, fe_degree, VectorizedArrayType>:: 3639 template interpolate<false, false>( 3640 /* n_components */ 1, 3641 data, 3642 temp1, 3643 values_array + comp * data.dofs_per_component_on_cell, 3644 do_gradients, 3645 face_nos[0]); 3646 } 3647 3648 template <typename T0> 3649 void 3650 function_0(T0 &temp1, const unsigned int comp) 3651 { 3652 const unsigned int dofs_per_face = 3653 fe_degree > -1 ? 3654 Utilities::pow(fe_degree + 1, dim - 1) : 3655 Utilities::pow(data.data.front().fe_degree + 1, dim - 1); 3656 const unsigned int n_q_points = 3657 fe_degree > -1 ? Utilities::pow(n_q_points_1d, dim - 1) : 3658 data.n_q_points_face; 3659 if (fe_degree > -1 && 3660 subface_index >= GeometryInfo<dim>::max_children_per_cell && 3661 data.element_type <= 3662 internal::MatrixFreeFunctions::tensor_symmetric) 3663 internal::FEFaceEvaluationImpl<true, 3664 dim, 3665 fe_degree, 3666 n_q_points_1d, 3667 VectorizedArrayType>:: 3668 integrate_in_face(/* n_components */ 1, 3669 data, 3670 temp1, 3671 values_quad + comp * n_q_points, 3672 gradients_quad + dim * comp * n_q_points, 3673 scratch_data + 2 * dofs_per_face, 3674 do_values, 3675 do_gradients, 3676 subface_index); 3677 else 3678 internal::FEFaceEvaluationImpl<false, 3679 dim, 3680 fe_degree, 3681 n_q_points_1d, 3682 VectorizedArrayType>:: 3683 integrate_in_face(/* n_components */ 1, 3684 data, 3685 temp1, 3686 values_quad + comp * n_q_points, 3687 gradients_quad + dim * comp * n_q_points, 3688 scratch_data + 2 * dofs_per_face, 3689 do_values, 3690 do_gradients, 3691 subface_index); 3692 } 3693 3694 VectorizedArrayType *values_array; 3695 3696 3697 const unsigned int n_components; 3698 const bool integrate; 3699 Number2 * global_vector_ptr; 3700 const MatrixFreeFunctions::ShapeInfo<VectorizedArrayType> &data; 3701 const MatrixFreeFunctions::DoFInfo & dof_info; 3702 VectorizedArrayType * values_quad; 3703 VectorizedArrayType * gradients_quad; 3704 VectorizedArrayType * scratch_data; 3705 const bool do_values; 3706 const bool do_gradients; 3707 const unsigned int active_fe_index; 3708 const unsigned int first_selected_component; 3709 const std::array<unsigned int, VectorizedArrayType::size()> cells; 3710 const std::array<unsigned int, VectorizedArrayType::size()> face_nos; 3711 const unsigned int subface_index; 3712 const MatrixFreeFunctions::DoFInfo::DoFAccessIndex dof_access_index; 3713 const std::array<unsigned int, VectorizedArrayType::size()> 3714 face_orientations; 3715 const Table<2, unsigned int> &orientation_map; 3716 }; 3717 }; 3718 3719 3720 3721 /** 3722 * This struct implements the action of the inverse mass matrix operation 3723 * using an FEEvaluationBaseData argument. 3724 */ 3725 template <int dim, typename Number> 3726 struct CellwiseInverseMassMatrixImplBasic 3727 { 3728 template <int fe_degree, int = 0> 3729 static bool 3730 run(const unsigned int n_components, 3731 const FEEvaluationBaseData<dim, 3732 typename Number::value_type, 3733 false, 3734 Number> &fe_eval, 3735 const Number * in_array, 3736 Number * out_array, 3737 typename std::enable_if<fe_degree != -1>::type * = nullptr) 3738 { 3739 constexpr unsigned int dofs_per_component = 3740 Utilities::pow(fe_degree + 1, dim); 3741 3742 Assert(dim >= 1 || dim <= 3, ExcNotImplemented()); 3743 Assert(fe_eval.get_shape_info().element_type <= 3744 MatrixFreeFunctions::tensor_symmetric, 3745 ExcNotImplemented()); 3746 3747 internal::EvaluatorTensorProduct<internal::evaluate_evenodd, 3748 dim, 3749 fe_degree + 1, 3750 fe_degree + 1, 3751 Number> 3752 evaluator( 3753 AlignedVector<Number>(), 3754 AlignedVector<Number>(), 3755 fe_eval.get_shape_info().data.front().inverse_shape_values_eo); 3756 3757 for (unsigned int d = 0; d < n_components; ++d) 3758 { 3759 const Number *in = in_array + d * dofs_per_component; 3760 Number * out = out_array + d * dofs_per_component; 3761 // Need to select 'apply' method with hessian slot because values 3762 // assume symmetries that do not exist in the inverse shapes 3763 evaluator.template hessians<0, true, false>(in, out); 3764 if (dim > 1) 3765 evaluator.template hessians<1, true, false>(out, out); 3766 if (dim > 2) 3767 evaluator.template hessians<2, true, false>(out, out); 3768 } 3769 for (unsigned int q = 0; q < dofs_per_component; ++q) 3770 { 3771 const Number inverse_JxW_q = Number(1.) / fe_eval.JxW(q); 3772 for (unsigned int d = 0; d < n_components; ++d) 3773 out_array[q + d * dofs_per_component] *= inverse_JxW_q; 3774 } 3775 for (unsigned int d = 0; d < n_components; ++d) 3776 { 3777 Number *out = out_array + d * dofs_per_component; 3778 if (dim > 2) 3779 evaluator.template hessians<2, false, false>(out, out); 3780 if (dim > 1) 3781 evaluator.template hessians<1, false, false>(out, out); 3782 evaluator.template hessians<0, false, false>(out, out); 3783 } 3784 return false; 3785 } 3786 3787 template <int fe_degree, int = 0> 3788 static bool 3789 run(const unsigned int n_components, 3790 const FEEvaluationBaseData<dim, 3791 typename Number::value_type, 3792 false, 3793 Number> &fe_eval, 3794 const Number * in_array, 3795 Number * out_array, 3796 typename std::enable_if<fe_degree == -1>::type * = nullptr) 3797 { 3798 static_assert(fe_degree == -1, "Only usable for degree -1"); 3799 const unsigned int dofs_per_component = 3800 fe_eval.get_shape_info().dofs_per_component_on_cell; 3801 3802 Assert(dim >= 1 || dim <= 3, ExcNotImplemented()); 3803 3804 internal:: 3805 EvaluatorTensorProduct<internal::evaluate_general, dim, 0, 0, Number> 3806 evaluator(fe_eval.get_shape_info().data.front().inverse_shape_values, 3807 AlignedVector<Number>(), 3808 AlignedVector<Number>(), 3809 fe_eval.get_shape_info().data.front().fe_degree + 1, 3810 fe_eval.get_shape_info().data.front().fe_degree + 1); 3811 3812 for (unsigned int d = 0; d < n_components; ++d) 3813 { 3814 const Number *in = in_array + d * dofs_per_component; 3815 Number * out = out_array + d * dofs_per_component; 3816 // Need to select 'apply' method with hessian slot because values 3817 // assume symmetries that do not exist in the inverse shapes 3818 evaluator.template values<0, true, false>(in, out); 3819 if (dim > 1) 3820 evaluator.template values<1, true, false>(out, out); 3821 if (dim > 2) 3822 evaluator.template values<2, true, false>(out, out); 3823 } 3824 for (unsigned int q = 0; q < dofs_per_component; ++q) 3825 { 3826 const Number inverse_JxW_q = Number(1.) / fe_eval.JxW(q); 3827 for (unsigned int d = 0; d < n_components; ++d) 3828 out_array[q + d * dofs_per_component] *= inverse_JxW_q; 3829 } 3830 for (unsigned int d = 0; d < n_components; ++d) 3831 { 3832 Number *out = out_array + d * dofs_per_component; 3833 if (dim > 2) 3834 evaluator.template values<2, false, false>(out, out); 3835 if (dim > 1) 3836 evaluator.template values<1, false, false>(out, out); 3837 evaluator.template values<0, false, false>(out, out); 3838 } 3839 return false; 3840 } 3841 }; 3842 3843 3844 3845 /** 3846 * This struct implements the action of the inverse mass matrix operation 3847 * using an FEEvaluationBaseData argument. 3848 */ 3849 template <int dim, typename Number> 3850 struct CellwiseInverseMassMatrixImplFlexible 3851 { 3852 template <int fe_degree, int = 0> 3853 static bool 3854 run(const unsigned int n_desired_components, 3855 const AlignedVector<Number> &inverse_shape, 3856 const AlignedVector<Number> &inverse_coefficients, 3857 const Number * in_array, 3858 Number * out_array, 3859 typename std::enable_if<fe_degree != -1>::type * = nullptr) 3860 { 3861 constexpr unsigned int dofs_per_component = 3862 Utilities::pow(fe_degree + 1, dim); 3863 Assert(inverse_coefficients.size() > 0 && 3864 inverse_coefficients.size() % dofs_per_component == 0, 3865 ExcMessage( 3866 "Expected diagonal to be a multiple of scalar dof per cells")); 3867 if (inverse_coefficients.size() != dofs_per_component) 3868 AssertDimension(n_desired_components * dofs_per_component, 3869 inverse_coefficients.size()); 3870 3871 Assert(dim >= 1 || dim <= 3, ExcNotImplemented()); 3872 3873 internal::EvaluatorTensorProduct<internal::evaluate_evenodd, 3874 dim, 3875 fe_degree + 1, 3876 fe_degree + 1, 3877 Number> 3878 evaluator(AlignedVector<Number>(), 3879 AlignedVector<Number>(), 3880 inverse_shape); 3881 3882 const unsigned int shift_coefficient = 3883 inverse_coefficients.size() > dofs_per_component ? dofs_per_component : 3884 0; 3885 const Number *inv_coefficient = inverse_coefficients.data(); 3886 for (unsigned int d = 0; d < n_desired_components; ++d) 3887 { 3888 const Number *in = in_array + d * dofs_per_component; 3889 Number * out = out_array + d * dofs_per_component; 3890 // Need to select 'apply' method with hessian slot because values 3891 // assume symmetries that do not exist in the inverse shapes 3892 evaluator.template hessians<0, true, false>(in, out); 3893 if (dim > 1) 3894 evaluator.template hessians<1, true, false>(out, out); 3895 if (dim > 2) 3896 evaluator.template hessians<2, true, false>(out, out); 3897 3898 for (unsigned int q = 0; q < dofs_per_component; ++q) 3899 out[q] *= inv_coefficient[q]; 3900 3901 if (dim > 2) 3902 evaluator.template hessians<2, false, false>(out, out); 3903 if (dim > 1) 3904 evaluator.template hessians<1, false, false>(out, out); 3905 evaluator.template hessians<0, false, false>(out, out); 3906 3907 inv_coefficient += shift_coefficient; 3908 } 3909 return false; 3910 } 3911 3912 /** 3913 * Version for degree = -1 3914 */ 3915 template <int fe_degree, int = 0> 3916 static bool 3917 run(const unsigned int, 3918 const AlignedVector<Number> &, 3919 const AlignedVector<Number> &, 3920 const Number *, 3921 Number *, 3922 typename std::enable_if<fe_degree == -1>::type * = nullptr) 3923 { 3924 static_assert(fe_degree == -1, "Only usable for degree -1"); 3925 Assert(false, ExcNotImplemented()); 3926 return false; 3927 } 3928 }; 3929 3930 3931 3932 /** 3933 * This struct implements the action of the inverse mass matrix operation 3934 * using an FEEvaluationBaseData argument. 3935 */ 3936 template <int dim, typename Number> 3937 struct CellwiseInverseMassMatrixImplTransformFromQPoints 3938 { 3939 template <int fe_degree, int = 0> 3940 static bool 3941 run(const unsigned int n_desired_components, 3942 const AlignedVector<Number> &inverse_shape, 3943 const Number * in_array, 3944 Number * out_array, 3945 typename std::enable_if<fe_degree != -1>::type * = nullptr) 3946 { 3947 constexpr unsigned int dofs_per_cell = Utilities::pow(fe_degree + 1, dim); 3948 internal::EvaluatorTensorProduct<internal::evaluate_evenodd, 3949 dim, 3950 fe_degree + 1, 3951 fe_degree + 1, 3952 Number> 3953 evaluator(AlignedVector<Number>(), 3954 AlignedVector<Number>(), 3955 inverse_shape); 3956 3957 for (unsigned int d = 0; d < n_desired_components; ++d) 3958 { 3959 const Number *in = in_array + d * dofs_per_cell; 3960 Number * out = out_array + d * dofs_per_cell; 3961 3962 if (dim == 3) 3963 { 3964 evaluator.template hessians<2, false, false>(in, out); 3965 evaluator.template hessians<1, false, false>(out, out); 3966 evaluator.template hessians<0, false, false>(out, out); 3967 } 3968 if (dim == 2) 3969 { 3970 evaluator.template hessians<1, false, false>(in, out); 3971 evaluator.template hessians<0, false, false>(out, out); 3972 } 3973 if (dim == 1) 3974 evaluator.template hessians<0, false, false>(in, out); 3975 } 3976 return false; 3977 } 3978 3979 template <int fe_degree, int = 0> 3980 static bool 3981 run(const unsigned int, 3982 const AlignedVector<Number> &, 3983 const Number *, 3984 Number *, 3985 typename std::enable_if<fe_degree == -1>::type * = nullptr) 3986 { 3987 static_assert(fe_degree == -1, "Only usable for degree -1"); 3988 Assert(false, ExcNotImplemented()); 3989 return false; 3990 } 3991 }; 3992 3993 } // end of namespace internal 3994 3995 3996 DEAL_II_NAMESPACE_CLOSE 3997 3998 #endif 3999