1 // --------------------------------------------------------------------- 2 // 3 // Copyright (C) 2019 - 2020 by the deal.II authors 4 // 5 // This file is part of the deal.II library. 6 // 7 // The deal.II library is free software; you can use it, redistribute 8 // it, and/or modify it under the terms of the GNU Lesser General 9 // Public License as published by the Free Software Foundation; either 10 // version 2.1 of the License, or (at your option) any later version. 11 // The full text of the license can be found in the file LICENSE.md at 12 // the top level directory of deal.II. 13 // 14 // --------------------------------------------------------------------- 15 16 17 #ifndef dealii_matrix_free_vector_access_internal_h 18 #define dealii_matrix_free_vector_access_internal_h 19 20 #include <deal.II/base/config.h> 21 22 #include <deal.II/base/vectorization.h> 23 24 #include <deal.II/matrix_free/dof_info.h> 25 #include <deal.II/matrix_free/type_traits.h> 26 27 28 DEAL_II_NAMESPACE_OPEN 29 30 31 namespace internal 32 { 33 // below we use type-traits from matrix-free/type_traits.h 34 35 // access to generic const vectors that have operator (). 36 // FIXME: this is wrong for Trilinos/Petsc MPI vectors 37 // where we should first do Partitioner::local_to_global() 38 template <typename VectorType, 39 typename std::enable_if<!has_local_element<VectorType>::value, 40 VectorType>::type * = nullptr> 41 inline typename VectorType::value_type vector_access(const VectorType & vec,const unsigned int entry)42 vector_access(const VectorType &vec, const unsigned int entry) 43 { 44 return vec(entry); 45 } 46 47 48 49 // access to generic non-const vectors that have operator (). 50 // FIXME: this is wrong for Trilinos/Petsc MPI vectors 51 // where we should first do Partitioner::local_to_global() 52 template <typename VectorType, 53 typename std::enable_if<!has_local_element<VectorType>::value, 54 VectorType>::type * = nullptr> 55 inline typename VectorType::value_type & vector_access(VectorType & vec,const unsigned int entry)56 vector_access(VectorType &vec, const unsigned int entry) 57 { 58 return vec(entry); 59 } 60 61 62 63 // access to distributed MPI vectors that have a local_element(uint) 64 // method to access data in local index space, which is what we use in 65 // DoFInfo and hence in read_dof_values etc. 66 template <typename VectorType, 67 typename std::enable_if<has_local_element<VectorType>::value, 68 VectorType>::type * = nullptr> 69 inline typename VectorType::value_type & vector_access(VectorType & vec,const unsigned int entry)70 vector_access(VectorType &vec, const unsigned int entry) 71 { 72 return vec.local_element(entry); 73 } 74 75 76 77 // same for const access 78 template <typename VectorType, 79 typename std::enable_if<has_local_element<VectorType>::value, 80 VectorType>::type * = nullptr> 81 inline typename VectorType::value_type vector_access(const VectorType & vec,const unsigned int entry)82 vector_access(const VectorType &vec, const unsigned int entry) 83 { 84 return vec.local_element(entry); 85 } 86 87 88 89 template <typename VectorType, 90 typename std::enable_if<has_add_local_element<VectorType>::value, 91 VectorType>::type * = nullptr> 92 inline void vector_access_add(VectorType & vec,const unsigned int entry,const typename VectorType::value_type & val)93 vector_access_add(VectorType & vec, 94 const unsigned int entry, 95 const typename VectorType::value_type &val) 96 { 97 vec.add_local_element(entry, val); 98 } 99 100 101 102 template <typename VectorType, 103 typename std::enable_if<!has_add_local_element<VectorType>::value, 104 VectorType>::type * = nullptr> 105 inline void vector_access_add(VectorType & vec,const unsigned int entry,const typename VectorType::value_type & val)106 vector_access_add(VectorType & vec, 107 const unsigned int entry, 108 const typename VectorType::value_type &val) 109 { 110 vector_access(vec, entry) += val; 111 } 112 113 114 115 template <typename VectorType, 116 typename std::enable_if<has_add_local_element<VectorType>::value, 117 VectorType>::type * = nullptr> 118 inline void vector_access_add_global(VectorType & vec,const types::global_dof_index entry,const typename VectorType::value_type & val)119 vector_access_add_global(VectorType & vec, 120 const types::global_dof_index entry, 121 const typename VectorType::value_type &val) 122 { 123 vec.add(entry, val); 124 } 125 126 127 128 template <typename VectorType, 129 typename std::enable_if<!has_add_local_element<VectorType>::value, 130 VectorType>::type * = nullptr> 131 inline void vector_access_add_global(VectorType & vec,const types::global_dof_index entry,const typename VectorType::value_type & val)132 vector_access_add_global(VectorType & vec, 133 const types::global_dof_index entry, 134 const typename VectorType::value_type &val) 135 { 136 vec(entry) += val; 137 } 138 139 140 141 template <typename VectorType, 142 typename std::enable_if<has_set_local_element<VectorType>::value, 143 VectorType>::type * = nullptr> 144 inline void vector_access_set(VectorType & vec,const unsigned int entry,const typename VectorType::value_type & val)145 vector_access_set(VectorType & vec, 146 const unsigned int entry, 147 const typename VectorType::value_type &val) 148 { 149 vec.set_local_element(entry, val); 150 } 151 152 153 154 template <typename VectorType, 155 typename std::enable_if<!has_set_local_element<VectorType>::value, 156 VectorType>::type * = nullptr> 157 inline void vector_access_set(VectorType & vec,const unsigned int entry,const typename VectorType::value_type & val)158 vector_access_set(VectorType & vec, 159 const unsigned int entry, 160 const typename VectorType::value_type &val) 161 { 162 vector_access(vec, entry) = val; 163 } 164 165 166 167 // this is to make sure that the parallel partitioning in VectorType 168 // is really the same as stored in MatrixFree. 169 // version below is when has_partitioners_are_compatible == false 170 // FIXME: this is incorrect for PETSc/Trilinos MPI vectors 171 template < 172 typename VectorType, 173 typename std::enable_if<!has_partitioners_are_compatible<VectorType>::value, 174 VectorType>::type * = nullptr> 175 inline void check_vector_compatibility(const VectorType & vec,const internal::MatrixFreeFunctions::DoFInfo & dof_info)176 check_vector_compatibility( 177 const VectorType & vec, 178 const internal::MatrixFreeFunctions::DoFInfo &dof_info) 179 { 180 (void)vec; 181 (void)dof_info; 182 183 AssertDimension(vec.size(), dof_info.vector_partitioner->size()); 184 } 185 186 187 188 // same as above for has_partitioners_are_compatible == true 189 template < 190 typename VectorType, 191 typename std::enable_if<has_partitioners_are_compatible<VectorType>::value, 192 VectorType>::type * = nullptr> 193 inline void check_vector_compatibility(const VectorType & vec,const internal::MatrixFreeFunctions::DoFInfo & dof_info)194 check_vector_compatibility( 195 const VectorType & vec, 196 const internal::MatrixFreeFunctions::DoFInfo &dof_info) 197 { 198 (void)vec; 199 (void)dof_info; 200 Assert(vec.partitioners_are_compatible(*dof_info.vector_partitioner), 201 ExcMessage( 202 "The parallel layout of the given vector is not " 203 "compatible with the parallel partitioning in MatrixFree. " 204 "Use MatrixFree::initialize_dof_vector to get a " 205 "compatible vector.")); 206 } 207 208 209 210 // Below, three classes (VectorReader, VectorSetter, 211 // VectorDistributorLocalToGlobal) implement the same interface and can be 212 // used to to read from vector, set elements of a vector and add to elements 213 // of the vector. 214 215 // 1. A class to read data from vector 216 template <typename Number, typename VectorizedArrayType> 217 struct VectorReader 218 { 219 template <typename VectorType> 220 void process_dofVectorReader221 process_dof(const unsigned int index, 222 const VectorType & vec, 223 Number & res) const 224 { 225 res = vector_access(vec, index); 226 } 227 228 229 230 template <typename VectorType> 231 void process_dofs_vectorizedVectorReader232 process_dofs_vectorized(const unsigned int dofs_per_cell, 233 const unsigned int dof_index, 234 VectorType & vec, 235 VectorizedArrayType *dof_values, 236 std::integral_constant<bool, true>) const 237 { 238 const Number *vec_ptr = vec.begin() + dof_index; 239 for (unsigned int i = 0; i < dofs_per_cell; 240 ++i, vec_ptr += VectorizedArrayType::size()) 241 dof_values[i].load(vec_ptr); 242 } 243 244 245 246 template <typename VectorType> 247 void process_dofs_vectorizedVectorReader248 process_dofs_vectorized(const unsigned int dofs_per_cell, 249 const unsigned int dof_index, 250 const VectorType & vec, 251 VectorizedArrayType *dof_values, 252 std::integral_constant<bool, false>) const 253 { 254 for (unsigned int i = 0; i < dofs_per_cell; ++i) 255 for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v) 256 dof_values[i][v] = 257 vector_access(vec, dof_index + v + i * VectorizedArrayType::size()); 258 } 259 260 261 262 template <typename VectorType> 263 void process_dofs_vectorized_transposeVectorReader264 process_dofs_vectorized_transpose(const unsigned int dofs_per_cell, 265 const unsigned int * dof_indices, 266 VectorType & vec, 267 VectorizedArrayType *dof_values, 268 std::integral_constant<bool, true>) const 269 { 270 dealii::vectorized_load_and_transpose(dofs_per_cell, 271 vec.begin(), 272 dof_indices, 273 dof_values); 274 } 275 276 277 278 template <typename VectorType> 279 void process_dofs_vectorized_transposeVectorReader280 process_dofs_vectorized_transpose(const unsigned int dofs_per_cell, 281 const unsigned int * dof_indices, 282 const VectorType & vec, 283 VectorizedArrayType *dof_values, 284 std::integral_constant<bool, false>) const 285 { 286 for (unsigned int d = 0; d < dofs_per_cell; ++d) 287 for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v) 288 dof_values[d][v] = vector_access(vec, dof_indices[v] + d); 289 } 290 291 292 293 // variant where VectorType::value_type is the same as Number -> can call 294 // gather 295 template <typename VectorType> 296 void process_dof_gatherVectorReader297 process_dof_gather(const unsigned int * indices, 298 VectorType & vec, 299 const unsigned int constant_offset, 300 VectorizedArrayType &res, 301 std::integral_constant<bool, true>) const 302 { 303 res.gather(vec.begin() + constant_offset, indices); 304 } 305 306 307 308 // variant where VectorType::value_type is not the same as Number -> must 309 // manually load the data 310 template <typename VectorType> 311 void process_dof_gatherVectorReader312 process_dof_gather(const unsigned int * indices, 313 const VectorType & vec, 314 const unsigned int constant_offset, 315 VectorizedArrayType &res, 316 std::integral_constant<bool, false>) const 317 { 318 for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v) 319 res[v] = vector_access(vec, indices[v] + constant_offset); 320 } 321 322 323 324 template <typename VectorType> 325 void process_dof_globalVectorReader326 process_dof_global(const types::global_dof_index index, 327 const VectorType & vec, 328 Number & res) const 329 { 330 res = vec(index); 331 } 332 333 334 335 void pre_constraintsVectorReader336 pre_constraints(const Number &, Number &res) const 337 { 338 res = Number(); 339 } 340 341 342 343 template <typename VectorType> 344 void process_constraintVectorReader345 process_constraint(const unsigned int index, 346 const Number weight, 347 const VectorType & vec, 348 Number & res) const 349 { 350 res += weight * vector_access(vec, index); 351 } 352 353 354 355 void post_constraintsVectorReader356 post_constraints(const Number &sum, Number &write_pos) const 357 { 358 write_pos = sum; 359 } 360 361 362 363 void process_emptyVectorReader364 process_empty(VectorizedArrayType &res) const 365 { 366 res = VectorizedArrayType(); 367 } 368 }; 369 370 371 372 // 2. A class to add values to the vector during 373 // FEEvaluation::distribute_local_to_global() call 374 template <typename Number, typename VectorizedArrayType> 375 struct VectorDistributorLocalToGlobal 376 { 377 template <typename VectorType> 378 void process_dofVectorDistributorLocalToGlobal379 process_dof(const unsigned int index, VectorType &vec, Number &res) const 380 { 381 vector_access_add(vec, index, res); 382 } 383 384 385 386 template <typename VectorType> 387 void process_dofs_vectorizedVectorDistributorLocalToGlobal388 process_dofs_vectorized(const unsigned int dofs_per_cell, 389 const unsigned int dof_index, 390 VectorType & vec, 391 VectorizedArrayType *dof_values, 392 std::integral_constant<bool, true>) const 393 { 394 Number *vec_ptr = vec.begin() + dof_index; 395 for (unsigned int i = 0; i < dofs_per_cell; 396 ++i, vec_ptr += VectorizedArrayType::size()) 397 { 398 VectorizedArrayType tmp; 399 tmp.load(vec_ptr); 400 tmp += dof_values[i]; 401 tmp.store(vec_ptr); 402 } 403 } 404 405 406 407 template <typename VectorType> 408 void process_dofs_vectorizedVectorDistributorLocalToGlobal409 process_dofs_vectorized(const unsigned int dofs_per_cell, 410 const unsigned int dof_index, 411 VectorType & vec, 412 VectorizedArrayType *dof_values, 413 std::integral_constant<bool, false>) const 414 { 415 for (unsigned int i = 0; i < dofs_per_cell; ++i) 416 for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v) 417 vector_access_add(vec, 418 dof_index + v + i * VectorizedArrayType::size(), 419 dof_values[i][v]); 420 } 421 422 423 424 template <typename VectorType> 425 void process_dofs_vectorized_transposeVectorDistributorLocalToGlobal426 process_dofs_vectorized_transpose(const unsigned int dofs_per_cell, 427 const unsigned int * dof_indices, 428 VectorType & vec, 429 VectorizedArrayType *dof_values, 430 std::integral_constant<bool, true>) const 431 { 432 vectorized_transpose_and_store( 433 true, dofs_per_cell, dof_values, dof_indices, vec.begin()); 434 } 435 436 437 438 template <typename VectorType> 439 void process_dofs_vectorized_transposeVectorDistributorLocalToGlobal440 process_dofs_vectorized_transpose(const unsigned int dofs_per_cell, 441 const unsigned int * dof_indices, 442 VectorType & vec, 443 VectorizedArrayType *dof_values, 444 std::integral_constant<bool, false>) const 445 { 446 for (unsigned int d = 0; d < dofs_per_cell; ++d) 447 for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v) 448 vector_access_add(vec, dof_indices[v] + d, dof_values[d][v]); 449 } 450 451 452 453 // variant where VectorType::value_type is the same as Number -> can call 454 // scatter 455 template <typename VectorType> 456 void process_dof_gatherVectorDistributorLocalToGlobal457 process_dof_gather(const unsigned int * indices, 458 VectorType & vec, 459 const unsigned int constant_offset, 460 VectorizedArrayType &res, 461 std::integral_constant<bool, true>) const 462 { 463 #if DEAL_II_VECTORIZATION_WIDTH_IN_BITS < 512 464 for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v) 465 vector_access(vec, indices[v] + constant_offset) += res[v]; 466 #else 467 // only use gather in case there is also scatter. 468 VectorizedArrayType tmp; 469 tmp.gather(vec.begin() + constant_offset, indices); 470 tmp += res; 471 tmp.scatter(indices, vec.begin() + constant_offset); 472 #endif 473 } 474 475 476 477 // variant where VectorType::value_type is not the same as Number -> must 478 // manually append all data 479 template <typename VectorType> 480 void process_dof_gatherVectorDistributorLocalToGlobal481 process_dof_gather(const unsigned int * indices, 482 VectorType & vec, 483 const unsigned int constant_offset, 484 VectorizedArrayType &res, 485 std::integral_constant<bool, false>) const 486 { 487 for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v) 488 vector_access_add(vec, indices[v] + constant_offset, res[v]); 489 } 490 491 492 493 template <typename VectorType> 494 void process_dof_globalVectorDistributorLocalToGlobal495 process_dof_global(const types::global_dof_index index, 496 VectorType & vec, 497 Number & res) const 498 { 499 vector_access_add_global(vec, index, res); 500 } 501 502 503 504 void pre_constraintsVectorDistributorLocalToGlobal505 pre_constraints(const Number &input, Number &res) const 506 { 507 res = input; 508 } 509 510 511 512 template <typename VectorType> 513 void process_constraintVectorDistributorLocalToGlobal514 process_constraint(const unsigned int index, 515 const Number weight, 516 VectorType & vec, 517 Number & res) const 518 { 519 vector_access_add(vec, index, weight * res); 520 } 521 522 523 524 void post_constraintsVectorDistributorLocalToGlobal525 post_constraints(const Number &, Number &) const 526 {} 527 528 529 530 void process_emptyVectorDistributorLocalToGlobal531 process_empty(VectorizedArrayType &) const 532 {} 533 }; 534 535 536 537 // 3. A class to set elements of the vector 538 template <typename Number, typename VectorizedArrayType> 539 struct VectorSetter 540 { 541 template <typename VectorType> 542 void process_dofVectorSetter543 process_dof(const unsigned int index, VectorType &vec, Number &res) const 544 { 545 vector_access(vec, index) = res; 546 } 547 548 549 550 template <typename VectorType> 551 void process_dofs_vectorizedVectorSetter552 process_dofs_vectorized(const unsigned int dofs_per_cell, 553 const unsigned int dof_index, 554 VectorType & vec, 555 VectorizedArrayType *dof_values, 556 std::integral_constant<bool, true>) const 557 { 558 Number *vec_ptr = vec.begin() + dof_index; 559 for (unsigned int i = 0; i < dofs_per_cell; 560 ++i, vec_ptr += VectorizedArrayType::size()) 561 dof_values[i].store(vec_ptr); 562 } 563 564 565 566 template <typename VectorType> 567 void process_dofs_vectorizedVectorSetter568 process_dofs_vectorized(const unsigned int dofs_per_cell, 569 const unsigned int dof_index, 570 VectorType & vec, 571 VectorizedArrayType *dof_values, 572 std::integral_constant<bool, false>) const 573 { 574 for (unsigned int i = 0; i < dofs_per_cell; ++i) 575 for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v) 576 vector_access(vec, dof_index + v + i * VectorizedArrayType::size()) = 577 dof_values[i][v]; 578 } 579 580 581 582 template <typename VectorType> 583 void process_dofs_vectorized_transposeVectorSetter584 process_dofs_vectorized_transpose(const unsigned int dofs_per_cell, 585 const unsigned int * dof_indices, 586 VectorType & vec, 587 VectorizedArrayType *dof_values, 588 std::integral_constant<bool, true>) const 589 { 590 vectorized_transpose_and_store( 591 false, dofs_per_cell, dof_values, dof_indices, vec.begin()); 592 } 593 594 595 596 template <typename VectorType, bool booltype> 597 void process_dofs_vectorized_transposeVectorSetter598 process_dofs_vectorized_transpose(const unsigned int dofs_per_cell, 599 const unsigned int * dof_indices, 600 VectorType & vec, 601 VectorizedArrayType *dof_values, 602 std::integral_constant<bool, false>) const 603 { 604 for (unsigned int i = 0; i < dofs_per_cell; ++i) 605 for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v) 606 vector_access(vec, dof_indices[v] + i) = dof_values[i][v]; 607 } 608 609 610 611 template <typename VectorType> 612 void process_dof_gatherVectorSetter613 process_dof_gather(const unsigned int * indices, 614 VectorType & vec, 615 const unsigned int constant_offset, 616 VectorizedArrayType &res, 617 std::integral_constant<bool, true>) const 618 { 619 res.scatter(indices, vec.begin() + constant_offset); 620 } 621 622 623 624 template <typename VectorType> 625 void process_dof_gatherVectorSetter626 process_dof_gather(const unsigned int * indices, 627 VectorType & vec, 628 const unsigned int constant_offset, 629 VectorizedArrayType &res, 630 std::integral_constant<bool, false>) const 631 { 632 for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v) 633 vector_access(vec, indices[v] + constant_offset) = res[v]; 634 } 635 636 637 638 template <typename VectorType> 639 void process_dof_globalVectorSetter640 process_dof_global(const types::global_dof_index index, 641 VectorType & vec, 642 Number & res) const 643 { 644 vec(index) = res; 645 } 646 647 648 649 void pre_constraintsVectorSetter650 pre_constraints(const Number &, Number &) const 651 {} 652 653 654 655 template <typename VectorType> 656 void process_constraintVectorSetter657 process_constraint(const unsigned int, 658 const Number, 659 VectorType &, 660 Number &) const 661 {} 662 663 664 665 void post_constraintsVectorSetter666 post_constraints(const Number &, Number &) const 667 {} 668 669 670 671 void process_emptyVectorSetter672 process_empty(VectorizedArrayType &) const 673 {} 674 }; 675 } // namespace internal 676 677 678 DEAL_II_NAMESPACE_CLOSE 679 680 #endif 681