1 // --------------------------------------------------------------------- 2 // 3 // Copyright (C) 2011 - 2020 by the deal.II authors 4 // 5 // This file is part of the deal.II library. 6 // 7 // The deal.II library is free software; you can use it, redistribute 8 // it, and/or modify it under the terms of the GNU Lesser General 9 // Public License as published by the Free Software Foundation; either 10 // version 2.1 of the License, or (at your option) any later version. 11 // The full text of the license can be found in the file LICENSE.md at 12 // the top level directory of deal.II. 13 // 14 // --------------------------------------------------------------------- 15 16 #ifndef dealii_la_parallel_vector_templates_h 17 #define dealii_la_parallel_vector_templates_h 18 19 20 #include <deal.II/base/config.h> 21 22 #include <deal.II/base/cuda.h> 23 #include <deal.II/base/cuda_size.h> 24 25 #include <deal.II/lac/exceptions.h> 26 #include <deal.II/lac/la_parallel_vector.h> 27 #include <deal.II/lac/petsc_vector.h> 28 #include <deal.II/lac/read_write_vector.h> 29 #include <deal.II/lac/trilinos_vector.h> 30 #include <deal.II/lac/vector_operations_internal.h> 31 32 #include <memory> 33 34 35 DEAL_II_NAMESPACE_OPEN 36 37 38 namespace LinearAlgebra 39 { 40 namespace distributed 41 { 42 namespace internal 43 { 44 // In the import_from_ghosted_array_finish we might need to calculate the 45 // maximal and minimal value for the given number type, which is not 46 // straightforward for complex numbers. Therefore, comparison of complex 47 // numbers is prohibited and throws an exception. 48 template <typename Number> 49 Number get_min(const Number a,const Number b)50 get_min(const Number a, const Number b) 51 { 52 return std::min(a, b); 53 } 54 55 template <typename Number> 56 std::complex<Number> get_min(const std::complex<Number> a,const std::complex<Number>)57 get_min(const std::complex<Number> a, const std::complex<Number>) 58 { 59 AssertThrow(false, 60 ExcMessage("VectorOperation::min not " 61 "implemented for complex numbers")); 62 return a; 63 } 64 65 template <typename Number> 66 Number get_max(const Number a,const Number b)67 get_max(const Number a, const Number b) 68 { 69 return std::max(a, b); 70 } 71 72 template <typename Number> 73 std::complex<Number> get_max(const std::complex<Number> a,const std::complex<Number>)74 get_max(const std::complex<Number> a, const std::complex<Number>) 75 { 76 AssertThrow(false, 77 ExcMessage("VectorOperation::max not " 78 "implemented for complex numbers")); 79 return a; 80 } 81 82 83 84 // Resize the underlying array on the host or on the device 85 template <typename Number, typename MemorySpaceType> 86 struct la_parallel_vector_templates_functions 87 { 88 static_assert(std::is_same<MemorySpaceType, MemorySpace::Host>::value || 89 std::is_same<MemorySpaceType, MemorySpace::CUDA>::value, 90 "MemorySpace should be Host or CUDA"); 91 92 static void resize_valla_parallel_vector_templates_functions93 resize_val( 94 const types::global_dof_index /*new_alloc_size*/, 95 types::global_dof_index & /*allocated_size*/, 96 ::dealii::MemorySpace::MemorySpaceData<Number, MemorySpaceType> 97 & /*data*/) 98 {} 99 100 static void importla_parallel_vector_templates_functions101 import( 102 const ::dealii::LinearAlgebra::ReadWriteVector<Number> & /*V*/, 103 ::dealii::VectorOperation::values /*operation*/, 104 const std::shared_ptr<const ::dealii::Utilities::MPI::Partitioner> & 105 /*communication_pattern*/, 106 const IndexSet & /*locally_owned_elem*/, 107 ::dealii::MemorySpace::MemorySpaceData<Number, MemorySpaceType> 108 & /*data*/) 109 {} 110 111 template <typename RealType> 112 static void linfty_norm_localla_parallel_vector_templates_functions113 linfty_norm_local( 114 const ::dealii::MemorySpace::MemorySpaceData<Number, MemorySpaceType> 115 & /*data*/, 116 const unsigned int /*size*/, 117 RealType & /*max*/) 118 {} 119 }; 120 121 template <typename Number> 122 struct la_parallel_vector_templates_functions<Number, 123 ::dealii::MemorySpace::Host> 124 { 125 using size_type = types::global_dof_index; 126 127 static void 128 resize_val(const types::global_dof_index new_alloc_size, 129 types::global_dof_index & allocated_size, 130 ::dealii::MemorySpace:: 131 MemorySpaceData<Number, ::dealii::MemorySpace::Host> &data) 132 { 133 if (new_alloc_size > allocated_size) 134 { 135 Assert(((allocated_size > 0 && data.values != nullptr) || 136 data.values == nullptr), 137 ExcInternalError()); 138 139 Number *new_val; 140 Utilities::System::posix_memalign( 141 reinterpret_cast<void **>(&new_val), 142 64, 143 sizeof(Number) * new_alloc_size); 144 data.values = {new_val, [](Number *data) { std::free(data); }}; 145 146 allocated_size = new_alloc_size; 147 } 148 else if (new_alloc_size == 0) 149 { 150 data.values.reset(); 151 allocated_size = 0; 152 } 153 } 154 155 static void 156 import( 157 const ::dealii::LinearAlgebra::ReadWriteVector<Number> &V, 158 ::dealii::VectorOperation::values operation, 159 const std::shared_ptr<const ::dealii::Utilities::MPI::Partitioner> 160 & communication_pattern, 161 const IndexSet &locally_owned_elem, 162 ::dealii::MemorySpace::MemorySpaceData<Number, 163 ::dealii::MemorySpace::Host> 164 &data) 165 { 166 Assert( 167 (operation == ::dealii::VectorOperation::add) || 168 (operation == ::dealii::VectorOperation::insert), 169 ExcMessage( 170 "Only VectorOperation::add and VectorOperation::insert are allowed")); 171 172 ::dealii::LinearAlgebra::distributed:: 173 Vector<Number, ::dealii::MemorySpace::Host> 174 tmp_vector(communication_pattern); 175 176 // fill entries from ReadWriteVector into the distributed vector, 177 // including ghost entries. this is not really efficient right now 178 // because indices are translated twice, once by nth_index_in_set(i) 179 // and once for operator() of tmp_vector 180 const IndexSet &v_stored = V.get_stored_elements(); 181 for (size_type i = 0; i < v_stored.n_elements(); ++i) 182 tmp_vector(v_stored.nth_index_in_set(i)) = V.local_element(i); 183 184 tmp_vector.compress(operation); 185 186 // Copy the local elements of tmp_vector to the right place in val 187 IndexSet tmp_index_set = tmp_vector.locally_owned_elements(); 188 if (operation == VectorOperation::add) 189 { 190 for (size_type i = 0; i < tmp_index_set.n_elements(); ++i) 191 { 192 data.values[locally_owned_elem.index_within_set( 193 tmp_index_set.nth_index_in_set(i))] += 194 tmp_vector.local_element(i); 195 } 196 } 197 else 198 { 199 for (size_type i = 0; i < tmp_index_set.n_elements(); ++i) 200 { 201 data.values[locally_owned_elem.index_within_set( 202 tmp_index_set.nth_index_in_set(i))] = 203 tmp_vector.local_element(i); 204 } 205 } 206 } 207 208 template <typename RealType> 209 static void 210 linfty_norm_local(const ::dealii::MemorySpace::MemorySpaceData< 211 Number, 212 ::dealii::MemorySpace::Host> &data, 213 const unsigned int size, 214 RealType & max) 215 { 216 for (size_type i = 0; i < size; ++i) 217 max = 218 std::max(numbers::NumberTraits<Number>::abs(data.values[i]), max); 219 } 220 }; 221 222 #ifdef DEAL_II_COMPILER_CUDA_AWARE 223 template <typename Number> 224 struct la_parallel_vector_templates_functions<Number, 225 ::dealii::MemorySpace::CUDA> 226 { 227 using size_type = types::global_dof_index; 228 229 static void 230 resize_val(const types::global_dof_index new_alloc_size, 231 types::global_dof_index & allocated_size, 232 ::dealii::MemorySpace:: 233 MemorySpaceData<Number, ::dealii::MemorySpace::CUDA> &data) 234 { 235 static_assert( 236 std::is_same<Number, float>::value || 237 std::is_same<Number, double>::value, 238 "Number should be float or double for CUDA memory space"); 239 240 if (new_alloc_size > allocated_size) 241 { 242 Assert(((allocated_size > 0 && data.values_dev != nullptr) || 243 data.values_dev == nullptr), 244 ExcInternalError()); 245 246 Number *new_val_dev; 247 Utilities::CUDA::malloc(new_val_dev, new_alloc_size); 248 data.values_dev.reset(new_val_dev); 249 250 allocated_size = new_alloc_size; 251 } 252 else if (new_alloc_size == 0) 253 { 254 data.values_dev.reset(); 255 allocated_size = 0; 256 } 257 } 258 259 static void 260 import(const ReadWriteVector<Number> &V, 261 VectorOperation::values operation, 262 std::shared_ptr<const Utilities::MPI::Partitioner> 263 communication_pattern, 264 const IndexSet &locally_owned_elem, 265 ::dealii::MemorySpace:: 266 MemorySpaceData<Number, ::dealii::MemorySpace::CUDA> &data) 267 { 268 Assert( 269 (operation == ::dealii::VectorOperation::add) || 270 (operation == ::dealii::VectorOperation::insert), 271 ExcMessage( 272 "Only VectorOperation::add and VectorOperation::insert are allowed")); 273 274 ::dealii::LinearAlgebra::distributed:: 275 Vector<Number, ::dealii::MemorySpace::CUDA> 276 tmp_vector(communication_pattern); 277 278 // fill entries from ReadWriteVector into the distributed vector, 279 // including ghost entries. this is not really efficient right now 280 // because indices are translated twice, once by nth_index_in_set(i) 281 // and once for operator() of tmp_vector 282 const IndexSet & v_stored = V.get_stored_elements(); 283 const size_type n_elements = v_stored.n_elements(); 284 std::vector<size_type> indices(n_elements); 285 for (size_type i = 0; i < n_elements; ++i) 286 indices[i] = communication_pattern->global_to_local( 287 v_stored.nth_index_in_set(i)); 288 // Move the indices to the device 289 size_type *indices_dev; 290 ::dealii::Utilities::CUDA::malloc(indices_dev, n_elements); 291 ::dealii::Utilities::CUDA::copy_to_dev(indices, indices_dev); 292 // Move the data to the device 293 Number *V_dev; 294 ::dealii::Utilities::CUDA::malloc(V_dev, n_elements); 295 cudaError_t cuda_error_code = cudaMemcpy(V_dev, 296 V.begin(), 297 n_elements * sizeof(Number), 298 cudaMemcpyHostToDevice); 299 AssertCuda(cuda_error_code); 300 301 // Set the values in tmp_vector 302 const int n_blocks = 303 1 + n_elements / (::dealii::CUDAWrappers::chunk_size * 304 ::dealii::CUDAWrappers::block_size); 305 ::dealii::LinearAlgebra::CUDAWrappers::kernel::set_permutated<Number> 306 <<<n_blocks, ::dealii::CUDAWrappers::block_size>>>( 307 indices_dev, tmp_vector.begin(), V_dev, n_elements); 308 309 tmp_vector.compress(operation); 310 311 // Copy the local elements of tmp_vector to the right place in val 312 IndexSet tmp_index_set = tmp_vector.locally_owned_elements(); 313 const size_type tmp_n_elements = tmp_index_set.n_elements(); 314 indices.resize(tmp_n_elements); 315 for (size_type i = 0; i < tmp_n_elements; ++i) 316 indices[i] = locally_owned_elem.index_within_set( 317 tmp_index_set.nth_index_in_set(i)); 318 ::dealii::Utilities::CUDA::free(indices_dev); 319 ::dealii::Utilities::CUDA::malloc(indices_dev, tmp_n_elements); 320 ::dealii::Utilities::CUDA::copy_to_dev(indices, indices_dev); 321 322 if (operation == VectorOperation::add) 323 ::dealii::LinearAlgebra::CUDAWrappers::kernel::add_permutated< 324 Number><<<n_blocks, ::dealii::CUDAWrappers::block_size>>>( 325 indices_dev, 326 data.values_dev.get(), 327 tmp_vector.begin(), 328 tmp_n_elements); 329 else 330 ::dealii::LinearAlgebra::CUDAWrappers::kernel::set_permutated< 331 Number><<<n_blocks, ::dealii::CUDAWrappers::block_size>>>( 332 indices_dev, 333 data.values_dev.get(), 334 tmp_vector.begin(), 335 tmp_n_elements); 336 337 ::dealii::Utilities::CUDA::free(indices_dev); 338 ::dealii::Utilities::CUDA::free(V_dev); 339 } 340 341 template <typename RealType> 342 static void 343 linfty_norm_local(const ::dealii::MemorySpace::MemorySpaceData< 344 Number, 345 ::dealii::MemorySpace::CUDA> &data, 346 const unsigned int size, 347 RealType & result) 348 { 349 static_assert(std::is_same<Number, RealType>::value, 350 "RealType should be the same type as Number"); 351 352 Number * result_device; 353 cudaError_t error_code = cudaMalloc(&result_device, sizeof(Number)); 354 AssertCuda(error_code); 355 error_code = cudaMemset(result_device, 0, sizeof(Number)); 356 357 const int n_blocks = 1 + size / (::dealii::CUDAWrappers::chunk_size * 358 ::dealii::CUDAWrappers::block_size); 359 ::dealii::LinearAlgebra::CUDAWrappers::kernel::reduction< 360 Number, 361 ::dealii::LinearAlgebra::CUDAWrappers::kernel::LInfty<Number>> 362 <<<dim3(n_blocks, 1), dim3(::dealii::CUDAWrappers::block_size)>>>( 363 result_device, data.values_dev.get(), size); 364 365 // Copy the result back to the host 366 error_code = cudaMemcpy(&result, 367 result_device, 368 sizeof(Number), 369 cudaMemcpyDeviceToHost); 370 AssertCuda(error_code); 371 // Free the memory on the device 372 error_code = cudaFree(result_device); 373 AssertCuda(error_code); 374 } 375 }; 376 #endif 377 } // namespace internal 378 379 380 template <typename Number, typename MemorySpaceType> 381 void 382 Vector<Number, MemorySpaceType>::clear_mpi_requests() 383 { 384 #ifdef DEAL_II_WITH_MPI 385 for (auto &compress_request : compress_requests) 386 { 387 const int ierr = MPI_Request_free(&compress_request); 388 AssertThrowMPI(ierr); 389 } 390 compress_requests.clear(); 391 for (auto &update_ghost_values_request : update_ghost_values_requests) 392 { 393 const int ierr = MPI_Request_free(&update_ghost_values_request); 394 AssertThrowMPI(ierr); 395 } 396 update_ghost_values_requests.clear(); 397 #endif 398 } 399 400 401 402 template <typename Number, typename MemorySpaceType> 403 void 404 Vector<Number, MemorySpaceType>::resize_val(const size_type new_alloc_size) 405 { 406 internal::la_parallel_vector_templates_functions< 407 Number, 408 MemorySpaceType>::resize_val(new_alloc_size, allocated_size, data); 409 410 thread_loop_partitioner = 411 std::make_shared<::dealii::parallel::internal::TBBPartitioner>(); 412 } 413 414 415 416 template <typename Number, typename MemorySpaceType> 417 void 418 Vector<Number, MemorySpaceType>::reinit(const size_type size, 419 const bool omit_zeroing_entries) 420 { 421 clear_mpi_requests(); 422 423 // check whether we need to reallocate 424 resize_val(size); 425 426 // delete previous content in import data 427 import_data.values.reset(); 428 import_data.values_dev.reset(); 429 430 // set partitioner to serial version 431 partitioner = std::make_shared<Utilities::MPI::Partitioner>(size); 432 433 // set entries to zero if so requested 434 if (omit_zeroing_entries == false) 435 this->operator=(Number()); 436 else 437 zero_out_ghosts(); 438 } 439 440 441 442 template <typename Number, typename MemorySpaceType> 443 template <typename Number2> 444 void 445 Vector<Number, MemorySpaceType>::reinit( 446 const Vector<Number2, MemorySpaceType> &v, 447 const bool omit_zeroing_entries) 448 { 449 clear_mpi_requests(); 450 Assert(v.partitioner.get() != nullptr, ExcNotInitialized()); 451 452 // check whether the partitioners are 453 // different (check only if the are allocated 454 // differently, not if the actual data is 455 // different) 456 if (partitioner.get() != v.partitioner.get()) 457 { 458 partitioner = v.partitioner; 459 const size_type new_allocated_size = 460 partitioner->local_size() + partitioner->n_ghost_indices(); 461 resize_val(new_allocated_size); 462 } 463 464 if (omit_zeroing_entries == false) 465 this->operator=(Number()); 466 else 467 zero_out_ghosts(); 468 469 // do not reallocate import_data directly, but only upon request. It 470 // is only used as temporary storage for compress() and 471 // update_ghost_values, and we might have vectors where we never 472 // call these methods and hence do not need to have the storage. 473 import_data.values.reset(); 474 import_data.values_dev.reset(); 475 476 thread_loop_partitioner = v.thread_loop_partitioner; 477 } 478 479 480 481 template <typename Number, typename MemorySpaceType> 482 void 483 Vector<Number, MemorySpaceType>::reinit( 484 const IndexSet &locally_owned_indices, 485 const IndexSet &ghost_indices, 486 const MPI_Comm communicator) 487 { 488 // set up parallel partitioner with index sets and communicator 489 reinit(std::make_shared<Utilities::MPI::Partitioner>( 490 locally_owned_indices, ghost_indices, communicator)); 491 } 492 493 494 495 template <typename Number, typename MemorySpaceType> 496 void 497 Vector<Number, MemorySpaceType>::reinit( 498 const IndexSet &locally_owned_indices, 499 const MPI_Comm communicator) 500 { 501 // set up parallel partitioner with index sets and communicator 502 reinit( 503 std::make_shared<Utilities::MPI::Partitioner>(locally_owned_indices, 504 communicator)); 505 } 506 507 508 509 template <typename Number, typename MemorySpaceType> 510 void 511 Vector<Number, MemorySpaceType>::reinit( 512 const std::shared_ptr<const Utilities::MPI::Partitioner> &partitioner_in) 513 { 514 clear_mpi_requests(); 515 partitioner = partitioner_in; 516 517 // set vector size and allocate memory 518 const size_type new_allocated_size = 519 partitioner->local_size() + partitioner->n_ghost_indices(); 520 resize_val(new_allocated_size); 521 522 // initialize to zero 523 *this = Number(); 524 525 526 // do not reallocate import_data directly, but only upon request. It 527 // is only used as temporary storage for compress() and 528 // update_ghost_values, and we might have vectors where we never 529 // call these methods and hence do not need to have the storage. 530 import_data.values.reset(); 531 import_data.values_dev.reset(); 532 533 vector_is_ghosted = false; 534 } 535 536 537 538 template <typename Number, typename MemorySpaceType> 539 Vector<Number, MemorySpaceType>::Vector() 540 : partitioner(std::make_shared<Utilities::MPI::Partitioner>()) 541 , allocated_size(0) 542 { 543 reinit(0); 544 } 545 546 547 548 template <typename Number, typename MemorySpaceType> 549 Vector<Number, MemorySpaceType>::Vector( 550 const Vector<Number, MemorySpaceType> &v) 551 : Subscriptor() 552 , allocated_size(0) 553 , vector_is_ghosted(false) 554 { 555 reinit(v, true); 556 557 thread_loop_partitioner = v.thread_loop_partitioner; 558 559 const size_type this_size = local_size(); 560 if (this_size > 0) 561 { 562 dealii::internal::VectorOperations:: 563 functions<Number, Number, MemorySpaceType>::copy( 564 thread_loop_partitioner, partitioner->local_size(), v.data, data); 565 } 566 } 567 568 569 570 template <typename Number, typename MemorySpaceType> 571 Vector<Number, MemorySpaceType>::Vector(const IndexSet &local_range, 572 const IndexSet &ghost_indices, 573 const MPI_Comm communicator) 574 : allocated_size(0) 575 , vector_is_ghosted(false) 576 { 577 reinit(local_range, ghost_indices, communicator); 578 } 579 580 581 582 template <typename Number, typename MemorySpaceType> 583 Vector<Number, MemorySpaceType>::Vector(const IndexSet &local_range, 584 const MPI_Comm communicator) 585 : allocated_size(0) 586 , vector_is_ghosted(false) 587 { 588 reinit(local_range, communicator); 589 } 590 591 592 593 template <typename Number, typename MemorySpaceType> 594 Vector<Number, MemorySpaceType>::Vector(const size_type size) 595 : allocated_size(0) 596 , vector_is_ghosted(false) 597 { 598 reinit(size, false); 599 } 600 601 602 603 template <typename Number, typename MemorySpaceType> 604 Vector<Number, MemorySpaceType>::Vector( 605 const std::shared_ptr<const Utilities::MPI::Partitioner> &partitioner) 606 : allocated_size(0) 607 , vector_is_ghosted(false) 608 { 609 reinit(partitioner); 610 } 611 612 613 614 template <typename Number, typename MemorySpaceType> 615 inline Vector<Number, MemorySpaceType>::~Vector() 616 { 617 try 618 { 619 clear_mpi_requests(); 620 } 621 catch (...) 622 {} 623 } 624 625 626 627 template <typename Number, typename MemorySpaceType> 628 inline Vector<Number, MemorySpaceType> & 629 Vector<Number, MemorySpaceType>:: 630 operator=(const Vector<Number, MemorySpaceType> &c) 631 { 632 #ifdef _MSC_VER 633 return this->operator=<Number>(c); 634 #else 635 return this->template operator=<Number>(c); 636 #endif 637 } 638 639 640 641 template <typename Number, typename MemorySpaceType> 642 template <typename Number2> 643 inline Vector<Number, MemorySpaceType> & 644 Vector<Number, MemorySpaceType>:: 645 operator=(const Vector<Number2, MemorySpaceType> &c) 646 { 647 Assert(c.partitioner.get() != nullptr, ExcNotInitialized()); 648 649 // we update ghost values whenever one of the input or output vector 650 // already held ghost values or when we import data from a vector with 651 // the same local range but different ghost layout 652 bool must_update_ghost_values = c.vector_is_ghosted; 653 654 // check whether the two vectors use the same parallel partitioner. if 655 // not, check if all local ranges are the same (that way, we can 656 // exchange data between different parallel layouts). One variant which 657 // is included here and necessary for compatibility with the other 658 // distributed vector classes (Trilinos, PETSc) is the case when vector 659 // c does not have any ghosts (constructed without ghost elements given) 660 // but the current vector does: In that case, we need to exchange data 661 // also when none of the two vector had updated its ghost values before. 662 if (partitioner.get() == nullptr) 663 reinit(c, true); 664 else if (partitioner.get() != c.partitioner.get()) 665 { 666 // local ranges are also the same if both partitioners are empty 667 // (even if they happen to define the empty range as [0,0) or [c,c) 668 // for some c!=0 in a different way). 669 int local_ranges_are_identical = 670 (partitioner->local_range() == c.partitioner->local_range() || 671 (partitioner->local_range().second == 672 partitioner->local_range().first && 673 c.partitioner->local_range().second == 674 c.partitioner->local_range().first)); 675 if ((c.partitioner->n_mpi_processes() > 1 && 676 Utilities::MPI::min(local_ranges_are_identical, 677 c.partitioner->get_mpi_communicator()) == 678 0) || 679 !local_ranges_are_identical) 680 reinit(c, true); 681 else 682 must_update_ghost_values |= vector_is_ghosted; 683 684 must_update_ghost_values |= 685 (c.partitioner->ghost_indices_initialized() == false && 686 partitioner->ghost_indices_initialized() == true); 687 } 688 else 689 must_update_ghost_values |= vector_is_ghosted; 690 691 thread_loop_partitioner = c.thread_loop_partitioner; 692 693 const size_type this_size = partitioner->local_size(); 694 if (this_size > 0) 695 { 696 dealii::internal::VectorOperations:: 697 functions<Number, Number2, MemorySpaceType>::copy( 698 thread_loop_partitioner, this_size, c.data, data); 699 } 700 701 if (must_update_ghost_values) 702 update_ghost_values(); 703 else 704 zero_out_ghosts(); 705 return *this; 706 } 707 708 709 710 template <typename Number, typename MemorySpaceType> 711 template <typename Number2> 712 void 713 Vector<Number, MemorySpaceType>::copy_locally_owned_data_from( 714 const Vector<Number2, MemorySpaceType> &src) 715 { 716 AssertDimension(partitioner->local_size(), src.partitioner->local_size()); 717 if (partitioner->local_size() > 0) 718 { 719 dealii::internal::VectorOperations:: 720 functions<Number, Number2, MemorySpaceType>::copy( 721 thread_loop_partitioner, 722 partitioner->local_size(), 723 src.data, 724 data); 725 } 726 } 727 728 729 730 template <typename Number, typename MemorySpaceType> 731 template <typename MemorySpaceType2> 732 void 733 Vector<Number, MemorySpaceType>::import( 734 const Vector<Number, MemorySpaceType2> &src, 735 VectorOperation::values operation) 736 { 737 Assert(src.partitioner.get() != nullptr, ExcNotInitialized()); 738 Assert(partitioner->locally_owned_range() == 739 src.partitioner->locally_owned_range(), 740 ExcMessage("Locally owned indices should be identical.")); 741 Assert(partitioner->ghost_indices() == src.partitioner->ghost_indices(), 742 ExcMessage("Ghost indices should be identical.")); 743 ::dealii::internal::VectorOperations:: 744 functions<Number, Number, MemorySpaceType>::import( 745 thread_loop_partitioner, allocated_size, operation, src.data, data); 746 } 747 748 749 750 template <typename Number, typename MemorySpaceType> 751 void 752 Vector<Number, MemorySpaceType>::compress( 753 ::dealii::VectorOperation::values operation) 754 { 755 compress_start(0, operation); 756 compress_finish(operation); 757 } 758 759 760 761 template <typename Number, typename MemorySpaceType> 762 void 763 Vector<Number, MemorySpaceType>::update_ghost_values() const 764 { 765 update_ghost_values_start(); 766 update_ghost_values_finish(); 767 } 768 769 770 771 template <typename Number, typename MemorySpaceType> 772 void 773 Vector<Number, MemorySpaceType>::zero_out_ghosts() const 774 { 775 if (data.values != nullptr) 776 std::fill_n(data.values.get() + partitioner->local_size(), 777 partitioner->n_ghost_indices(), 778 Number()); 779 #ifdef DEAL_II_COMPILER_CUDA_AWARE 780 if (data.values_dev != nullptr) 781 { 782 const cudaError_t cuda_error_code = 783 cudaMemset(data.values_dev.get() + partitioner->local_size(), 784 0, 785 partitioner->n_ghost_indices() * sizeof(Number)); 786 AssertCuda(cuda_error_code); 787 } 788 #endif 789 790 vector_is_ghosted = false; 791 } 792 793 794 795 template <typename Number, typename MemorySpaceType> 796 void 797 Vector<Number, MemorySpaceType>::compress_start( 798 const unsigned int communication_channel, 799 ::dealii::VectorOperation::values operation) 800 { 801 AssertIndexRange(communication_channel, 200); 802 Assert(vector_is_ghosted == false, 803 ExcMessage("Cannot call compress() on a ghosted vector")); 804 805 #ifdef DEAL_II_WITH_MPI 806 // make this function thread safe 807 std::lock_guard<std::mutex> lock(mutex); 808 809 // allocate import_data in case it is not set up yet 810 if (partitioner->n_import_indices() > 0) 811 { 812 # if defined(DEAL_II_COMPILER_CUDA_AWARE) && \ 813 defined(DEAL_II_MPI_WITH_CUDA_SUPPORT) 814 if (std::is_same<MemorySpaceType, dealii::MemorySpace::CUDA>::value) 815 { 816 if (import_data.values_dev == nullptr) 817 import_data.values_dev.reset( 818 Utilities::CUDA::allocate_device_data<Number>( 819 partitioner->n_import_indices())); 820 } 821 else 822 # endif 823 { 824 # if !defined(DEAL_II_COMPILER_CUDA_AWARE) && \ 825 defined(DEAL_II_MPI_WITH_CUDA_SUPPORT) 826 static_assert( 827 std::is_same<MemorySpaceType, dealii::MemorySpace::Host>::value, 828 "This code path should only be compiled for CUDA-aware-MPI for MemorySpace::Host!"); 829 # endif 830 if (import_data.values == nullptr) 831 { 832 Number *new_val; 833 Utilities::System::posix_memalign( 834 reinterpret_cast<void **>(&new_val), 835 64, 836 sizeof(Number) * partitioner->n_import_indices()); 837 import_data.values.reset(new_val); 838 } 839 } 840 } 841 842 # if defined DEAL_II_COMPILER_CUDA_AWARE && \ 843 !defined(DEAL_II_MPI_WITH_CUDA_SUPPORT) 844 if (std::is_same<MemorySpaceType, dealii::MemorySpace::CUDA>::value) 845 { 846 // Move the data to the host and then move it back to the 847 // device. We use values to store the elements because the function 848 // uses a view of the array and thus we need the data on the host to 849 // outlive the scope of the function. 850 Number *new_val; 851 Utilities::System::posix_memalign(reinterpret_cast<void **>(&new_val), 852 64, 853 sizeof(Number) * allocated_size); 854 855 data.values = {new_val, [](Number *data) { std::free(data); }}; 856 857 cudaError_t cuda_error_code = 858 cudaMemcpy(data.values.get(), 859 data.values_dev.get(), 860 allocated_size * sizeof(Number), 861 cudaMemcpyDeviceToHost); 862 AssertCuda(cuda_error_code); 863 } 864 # endif 865 866 # if defined(DEAL_II_COMPILER_CUDA_AWARE) && \ 867 defined(DEAL_II_MPI_WITH_CUDA_SUPPORT) 868 if (std::is_same<MemorySpaceType, dealii::MemorySpace::CUDA>::value) 869 { 870 partitioner->import_from_ghosted_array_start( 871 operation, 872 communication_channel, 873 ArrayView<Number, MemorySpace::CUDA>( 874 data.values_dev.get() + partitioner->local_size(), 875 partitioner->n_ghost_indices()), 876 ArrayView<Number, MemorySpace::CUDA>( 877 import_data.values_dev.get(), partitioner->n_import_indices()), 878 compress_requests); 879 } 880 else 881 # endif 882 { 883 partitioner->import_from_ghosted_array_start( 884 operation, 885 communication_channel, 886 ArrayView<Number, MemorySpace::Host>( 887 data.values.get() + partitioner->local_size(), 888 partitioner->n_ghost_indices()), 889 ArrayView<Number, MemorySpace::Host>( 890 import_data.values.get(), partitioner->n_import_indices()), 891 compress_requests); 892 } 893 #else 894 (void)communication_channel; 895 (void)operation; 896 #endif 897 } 898 899 900 901 template <typename Number, typename MemorySpaceType> 902 void 903 Vector<Number, MemorySpaceType>::compress_finish( 904 ::dealii::VectorOperation::values operation) 905 { 906 #ifdef DEAL_II_WITH_MPI 907 vector_is_ghosted = false; 908 909 // in order to zero ghost part of the vector, we need to call 910 // import_from_ghosted_array_finish() regardless of 911 // compress_requests.size() == 0 912 913 // make this function thread safe 914 std::lock_guard<std::mutex> lock(mutex); 915 # if defined(DEAL_II_COMPILER_CUDA_AWARE) && \ 916 defined(DEAL_II_MPI_WITH_CUDA_SUPPORT) 917 if (std::is_same<MemorySpaceType, MemorySpace::CUDA>::value) 918 { 919 Assert(partitioner->n_import_indices() == 0 || 920 import_data.values_dev != nullptr, 921 ExcNotInitialized()); 922 partitioner 923 ->import_from_ghosted_array_finish<Number, MemorySpace::CUDA>( 924 operation, 925 ArrayView<const Number, MemorySpace::CUDA>( 926 import_data.values_dev.get(), partitioner->n_import_indices()), 927 ArrayView<Number, MemorySpace::CUDA>(data.values_dev.get(), 928 partitioner->local_size()), 929 ArrayView<Number, MemorySpace::CUDA>( 930 data.values_dev.get() + partitioner->local_size(), 931 partitioner->n_ghost_indices()), 932 compress_requests); 933 } 934 else 935 # endif 936 { 937 Assert(partitioner->n_import_indices() == 0 || 938 import_data.values != nullptr, 939 ExcNotInitialized()); 940 partitioner 941 ->import_from_ghosted_array_finish<Number, MemorySpace::Host>( 942 operation, 943 ArrayView<const Number, MemorySpace::Host>( 944 import_data.values.get(), partitioner->n_import_indices()), 945 ArrayView<Number, MemorySpace::Host>(data.values.get(), 946 partitioner->local_size()), 947 ArrayView<Number, MemorySpace::Host>( 948 data.values.get() + partitioner->local_size(), 949 partitioner->n_ghost_indices()), 950 compress_requests); 951 } 952 953 # if defined DEAL_II_COMPILER_CUDA_AWARE && \ 954 !defined DEAL_II_MPI_WITH_CUDA_SUPPORT 955 // The communication is done on the host, so we need to 956 // move the data back to the device. 957 if (std::is_same<MemorySpaceType, MemorySpace::CUDA>::value) 958 { 959 cudaError_t cuda_error_code = 960 cudaMemcpy(data.values_dev.get(), 961 data.values.get(), 962 allocated_size * sizeof(Number), 963 cudaMemcpyHostToDevice); 964 AssertCuda(cuda_error_code); 965 966 data.values.reset(); 967 } 968 # endif 969 #else 970 (void)operation; 971 #endif 972 } 973 974 975 976 template <typename Number, typename MemorySpaceType> 977 void 978 Vector<Number, MemorySpaceType>::update_ghost_values_start( 979 const unsigned int communication_channel) const 980 { 981 AssertIndexRange(communication_channel, 200); 982 #ifdef DEAL_II_WITH_MPI 983 // nothing to do when we neither have import nor ghost indices. 984 if (partitioner->n_ghost_indices() == 0 && 985 partitioner->n_import_indices() == 0) 986 return; 987 988 // make this function thread safe 989 std::lock_guard<std::mutex> lock(mutex); 990 991 // allocate import_data in case it is not set up yet 992 if (partitioner->n_import_indices() > 0) 993 { 994 # if defined(DEAL_II_COMPILER_CUDA_AWARE) && \ 995 defined(DEAL_II_MPI_WITH_CUDA_SUPPORT) 996 Assert( 997 (std::is_same<MemorySpaceType, dealii::MemorySpace::CUDA>::value), 998 ExcMessage( 999 "Using MemorySpace::CUDA only allowed if the code is compiled with a CUDA compiler!")); 1000 if (import_data.values_dev == nullptr) 1001 import_data.values_dev.reset( 1002 Utilities::CUDA::allocate_device_data<Number>( 1003 partitioner->n_import_indices())); 1004 # else 1005 # ifdef DEAL_II_MPI_WITH_CUDA_SUPPORT 1006 static_assert( 1007 std::is_same<MemorySpaceType, dealii::MemorySpace::Host>::value, 1008 "This code path should only be compiled for CUDA-aware-MPI for MemorySpace::Host!"); 1009 # endif 1010 if (import_data.values == nullptr) 1011 { 1012 Number *new_val; 1013 Utilities::System::posix_memalign( 1014 reinterpret_cast<void **>(&new_val), 1015 64, 1016 sizeof(Number) * partitioner->n_import_indices()); 1017 import_data.values.reset(new_val); 1018 } 1019 # endif 1020 } 1021 1022 # if defined DEAL_II_COMPILER_CUDA_AWARE && \ 1023 !defined(DEAL_II_MPI_WITH_CUDA_SUPPORT) 1024 // Move the data to the host and then move it back to the 1025 // device. We use values to store the elements because the function 1026 // uses a view of the array and thus we need the data on the host to 1027 // outlive the scope of the function. 1028 Number *new_val; 1029 Utilities::System::posix_memalign(reinterpret_cast<void **>(&new_val), 1030 64, 1031 sizeof(Number) * allocated_size); 1032 1033 data.values = {new_val, [](Number *data) { std::free(data); }}; 1034 1035 cudaError_t cuda_error_code = cudaMemcpy(data.values.get(), 1036 data.values_dev.get(), 1037 allocated_size * sizeof(Number), 1038 cudaMemcpyDeviceToHost); 1039 AssertCuda(cuda_error_code); 1040 # endif 1041 1042 # if !(defined(DEAL_II_COMPILER_CUDA_AWARE) && \ 1043 defined(DEAL_II_MPI_WITH_CUDA_SUPPORT)) 1044 partitioner->export_to_ghosted_array_start<Number, MemorySpace::Host>( 1045 communication_channel, 1046 ArrayView<const Number, MemorySpace::Host>(data.values.get(), 1047 partitioner->local_size()), 1048 ArrayView<Number, MemorySpace::Host>(import_data.values.get(), 1049 partitioner->n_import_indices()), 1050 ArrayView<Number, MemorySpace::Host>(data.values.get() + 1051 partitioner->local_size(), 1052 partitioner->n_ghost_indices()), 1053 update_ghost_values_requests); 1054 # else 1055 partitioner->export_to_ghosted_array_start<Number, MemorySpace::CUDA>( 1056 communication_channel, 1057 ArrayView<const Number, MemorySpace::CUDA>(data.values_dev.get(), 1058 partitioner->local_size()), 1059 ArrayView<Number, MemorySpace::CUDA>(import_data.values_dev.get(), 1060 partitioner->n_import_indices()), 1061 ArrayView<Number, MemorySpace::CUDA>(data.values_dev.get() + 1062 partitioner->local_size(), 1063 partitioner->n_ghost_indices()), 1064 update_ghost_values_requests); 1065 # endif 1066 1067 #else 1068 (void)communication_channel; 1069 #endif 1070 } 1071 1072 1073 1074 template <typename Number, typename MemorySpaceType> 1075 void 1076 Vector<Number, MemorySpaceType>::update_ghost_values_finish() const 1077 { 1078 #ifdef DEAL_II_WITH_MPI 1079 // wait for both sends and receives to complete, even though only 1080 // receives are really necessary. this gives (much) better performance 1081 AssertDimension(partitioner->ghost_targets().size() + 1082 partitioner->import_targets().size(), 1083 update_ghost_values_requests.size()); 1084 if (update_ghost_values_requests.size() > 0) 1085 { 1086 // make this function thread safe 1087 std::lock_guard<std::mutex> lock(mutex); 1088 1089 # if !(defined(DEAL_II_COMPILER_CUDA_AWARE) && \ 1090 defined(DEAL_II_MPI_WITH_CUDA_SUPPORT)) 1091 partitioner->export_to_ghosted_array_finish( 1092 ArrayView<Number, MemorySpace::Host>( 1093 data.values.get() + partitioner->local_size(), 1094 partitioner->n_ghost_indices()), 1095 update_ghost_values_requests); 1096 # else 1097 partitioner->export_to_ghosted_array_finish( 1098 ArrayView<Number, MemorySpace::CUDA>( 1099 data.values_dev.get() + partitioner->local_size(), 1100 partitioner->n_ghost_indices()), 1101 update_ghost_values_requests); 1102 # endif 1103 } 1104 1105 # if defined DEAL_II_COMPILER_CUDA_AWARE && \ 1106 !defined DEAL_II_MPI_WITH_CUDA_SUPPORT 1107 // The communication is done on the host, so we need to 1108 // move the data back to the device. 1109 if (std::is_same<MemorySpaceType, MemorySpace::CUDA>::value) 1110 { 1111 cudaError_t cuda_error_code = 1112 cudaMemcpy(data.values_dev.get() + partitioner->local_size(), 1113 data.values.get() + partitioner->local_size(), 1114 partitioner->n_ghost_indices() * sizeof(Number), 1115 cudaMemcpyHostToDevice); 1116 AssertCuda(cuda_error_code); 1117 1118 data.values.reset(); 1119 } 1120 # endif 1121 1122 #endif 1123 vector_is_ghosted = true; 1124 } 1125 1126 1127 1128 template <typename Number, typename MemorySpaceType> 1129 void 1130 Vector<Number, MemorySpaceType>::import( 1131 const ReadWriteVector<Number> & V, 1132 VectorOperation::values operation, 1133 std::shared_ptr<const CommunicationPatternBase> communication_pattern) 1134 { 1135 // If no communication pattern is given, create one. Otherwise, use the 1136 // given one. 1137 std::shared_ptr<const Utilities::MPI::Partitioner> comm_pattern; 1138 if (communication_pattern.get() == nullptr) 1139 { 1140 // Split the IndexSet of V in locally owned elements and ghost indices 1141 // then create the communication pattern 1142 IndexSet locally_owned_elem = locally_owned_elements(); 1143 IndexSet ghost_indices = V.get_stored_elements(); 1144 ghost_indices.subtract_set(locally_owned_elem); 1145 comm_pattern = std::make_shared<Utilities::MPI::Partitioner>( 1146 locally_owned_elem, ghost_indices, get_mpi_communicator()); 1147 } 1148 else 1149 { 1150 comm_pattern = 1151 std::dynamic_pointer_cast<const Utilities::MPI::Partitioner>( 1152 communication_pattern); 1153 AssertThrow(comm_pattern != nullptr, 1154 ExcMessage("The communication pattern is not of type " 1155 "Utilities::MPI::Partitioner.")); 1156 } 1157 Vector<Number, ::dealii::MemorySpace::Host> tmp_vector(comm_pattern); 1158 1159 data.copy_to(tmp_vector.begin(), local_size()); 1160 1161 // fill entries from ReadWriteVector into the distributed vector, 1162 // including ghost entries. this is not really efficient right now 1163 // because indices are translated twice, once by nth_index_in_set(i) and 1164 // once for operator() of tmp_vector 1165 const IndexSet &v_stored = V.get_stored_elements(); 1166 const size_type v_n_elements = v_stored.n_elements(); 1167 switch (operation) 1168 { 1169 case VectorOperation::insert: 1170 { 1171 for (size_type i = 0; i < v_n_elements; ++i) 1172 tmp_vector(v_stored.nth_index_in_set(i)) = V.local_element(i); 1173 1174 break; 1175 } 1176 case VectorOperation::add: 1177 { 1178 for (size_type i = 0; i < v_n_elements; ++i) 1179 tmp_vector(v_stored.nth_index_in_set(i)) += V.local_element(i); 1180 1181 break; 1182 } 1183 case VectorOperation::min: 1184 { 1185 for (size_type i = 0; i < v_n_elements; ++i) 1186 tmp_vector(v_stored.nth_index_in_set(i)) = 1187 internal::get_min(tmp_vector(v_stored.nth_index_in_set(i)), 1188 V.local_element(i)); 1189 1190 break; 1191 } 1192 case VectorOperation::max: 1193 { 1194 for (size_type i = 0; i < v_n_elements; ++i) 1195 tmp_vector(v_stored.nth_index_in_set(i)) = 1196 internal::get_max(tmp_vector(v_stored.nth_index_in_set(i)), 1197 V.local_element(i)); 1198 1199 break; 1200 } 1201 default: 1202 { 1203 Assert(false, ExcMessage("This operation is not supported.")); 1204 } 1205 } 1206 tmp_vector.compress(operation); 1207 1208 data.copy_from(tmp_vector.begin(), local_size()); 1209 } 1210 1211 template <typename Number, typename MemorySpaceType> 1212 void 1213 Vector<Number, MemorySpaceType>::swap(Vector<Number, MemorySpaceType> &v) 1214 { 1215 #ifdef DEAL_II_WITH_MPI 1216 1217 # ifdef DEBUG 1218 if (Utilities::MPI::job_supports_mpi()) 1219 { 1220 // make sure that there are not outstanding requests from updating 1221 // ghost values or compress 1222 int flag = 1; 1223 if (update_ghost_values_requests.size() > 0) 1224 { 1225 const int ierr = MPI_Testall(update_ghost_values_requests.size(), 1226 update_ghost_values_requests.data(), 1227 &flag, 1228 MPI_STATUSES_IGNORE); 1229 AssertThrowMPI(ierr); 1230 Assert(flag == 1, 1231 ExcMessage( 1232 "MPI found unfinished update_ghost_values() requests " 1233 "when calling swap, which is not allowed.")); 1234 } 1235 if (compress_requests.size() > 0) 1236 { 1237 const int ierr = MPI_Testall(compress_requests.size(), 1238 compress_requests.data(), 1239 &flag, 1240 MPI_STATUSES_IGNORE); 1241 AssertThrowMPI(ierr); 1242 Assert(flag == 1, 1243 ExcMessage("MPI found unfinished compress() requests " 1244 "when calling swap, which is not allowed.")); 1245 } 1246 } 1247 # endif 1248 1249 std::swap(compress_requests, v.compress_requests); 1250 std::swap(update_ghost_values_requests, v.update_ghost_values_requests); 1251 #endif 1252 1253 std::swap(partitioner, v.partitioner); 1254 std::swap(thread_loop_partitioner, v.thread_loop_partitioner); 1255 std::swap(allocated_size, v.allocated_size); 1256 std::swap(data, v.data); 1257 std::swap(import_data, v.import_data); 1258 std::swap(vector_is_ghosted, v.vector_is_ghosted); 1259 } 1260 1261 1262 1263 template <typename Number, typename MemorySpaceType> 1264 Vector<Number, MemorySpaceType> & 1265 Vector<Number, MemorySpaceType>::operator=(const Number s) 1266 { 1267 const size_type this_size = local_size(); 1268 if (this_size > 0) 1269 { 1270 dealii::internal::VectorOperations:: 1271 functions<Number, Number, MemorySpaceType>::set( 1272 thread_loop_partitioner, this_size, s, data); 1273 } 1274 1275 // if we call Vector::operator=0, we want to zero out all the entries 1276 // plus ghosts. 1277 if (s == Number()) 1278 zero_out_ghosts(); 1279 1280 return *this; 1281 } 1282 1283 1284 1285 template <typename Number, typename MemorySpaceType> 1286 void 1287 Vector<Number, MemorySpaceType>::reinit(const VectorSpaceVector<Number> &V, 1288 const bool omit_zeroing_entries) 1289 { 1290 // Downcast. Throws an exception if invalid. 1291 using VectorType = Vector<Number, MemorySpaceType>; 1292 Assert(dynamic_cast<const VectorType *>(&V) != nullptr, 1293 ExcVectorTypeNotCompatible()); 1294 const VectorType &down_V = dynamic_cast<const VectorType &>(V); 1295 1296 reinit(down_V, omit_zeroing_entries); 1297 } 1298 1299 1300 1301 template <typename Number, typename MemorySpaceType> 1302 Vector<Number, MemorySpaceType> & 1303 Vector<Number, MemorySpaceType>:: 1304 operator+=(const VectorSpaceVector<Number> &vv) 1305 { 1306 // Downcast. Throws an exception if invalid. 1307 using VectorType = Vector<Number, MemorySpaceType>; 1308 Assert(dynamic_cast<const VectorType *>(&vv) != nullptr, 1309 ExcVectorTypeNotCompatible()); 1310 const VectorType &v = dynamic_cast<const VectorType &>(vv); 1311 1312 AssertDimension(local_size(), v.local_size()); 1313 1314 dealii::internal::VectorOperations:: 1315 functions<Number, Number, MemorySpaceType>::add_vector( 1316 thread_loop_partitioner, partitioner->local_size(), v.data, data); 1317 1318 if (vector_is_ghosted) 1319 update_ghost_values(); 1320 1321 return *this; 1322 } 1323 1324 1325 1326 template <typename Number, typename MemorySpaceType> 1327 Vector<Number, MemorySpaceType> & 1328 Vector<Number, MemorySpaceType>:: 1329 operator-=(const VectorSpaceVector<Number> &vv) 1330 { 1331 // Downcast. Throws an exception if invalid. 1332 using VectorType = Vector<Number, MemorySpaceType>; 1333 Assert(dynamic_cast<const VectorType *>(&vv) != nullptr, 1334 ExcVectorTypeNotCompatible()); 1335 const VectorType &v = dynamic_cast<const VectorType &>(vv); 1336 1337 AssertDimension(local_size(), v.local_size()); 1338 1339 dealii::internal::VectorOperations:: 1340 functions<Number, Number, MemorySpaceType>::subtract_vector( 1341 thread_loop_partitioner, partitioner->local_size(), v.data, data); 1342 1343 if (vector_is_ghosted) 1344 update_ghost_values(); 1345 1346 return *this; 1347 } 1348 1349 1350 1351 template <typename Number, typename MemorySpaceType> 1352 void 1353 Vector<Number, MemorySpaceType>::add(const Number a) 1354 { 1355 AssertIsFinite(a); 1356 1357 dealii::internal::VectorOperations:: 1358 functions<Number, Number, MemorySpaceType>::add_factor( 1359 thread_loop_partitioner, partitioner->local_size(), a, data); 1360 1361 if (vector_is_ghosted) 1362 update_ghost_values(); 1363 } 1364 1365 1366 1367 template <typename Number, typename MemorySpaceType> 1368 void 1369 Vector<Number, MemorySpaceType>::add_local( 1370 const Number a, 1371 const VectorSpaceVector<Number> &vv) 1372 { 1373 // Downcast. Throws an exception if invalid. 1374 using VectorType = Vector<Number, MemorySpaceType>; 1375 Assert(dynamic_cast<const VectorType *>(&vv) != nullptr, 1376 ExcVectorTypeNotCompatible()); 1377 const VectorType &v = dynamic_cast<const VectorType &>(vv); 1378 1379 AssertIsFinite(a); 1380 AssertDimension(local_size(), v.local_size()); 1381 1382 // nothing to do if a is zero 1383 if (a == Number(0.)) 1384 return; 1385 1386 dealii::internal::VectorOperations:: 1387 functions<Number, Number, MemorySpaceType>::add_av( 1388 thread_loop_partitioner, partitioner->local_size(), a, v.data, data); 1389 } 1390 1391 1392 1393 template <typename Number, typename MemorySpaceType> 1394 void 1395 Vector<Number, MemorySpaceType>::add(const Number a, 1396 const VectorSpaceVector<Number> &vv) 1397 { 1398 add_local(a, vv); 1399 1400 if (vector_is_ghosted) 1401 update_ghost_values(); 1402 } 1403 1404 1405 1406 template <typename Number, typename MemorySpaceType> 1407 void 1408 Vector<Number, MemorySpaceType>::add(const Number a, 1409 const VectorSpaceVector<Number> &vv, 1410 const Number b, 1411 const VectorSpaceVector<Number> &ww) 1412 { 1413 // Downcast. Throws an exception if invalid. 1414 using VectorType = Vector<Number, MemorySpaceType>; 1415 Assert(dynamic_cast<const VectorType *>(&vv) != nullptr, 1416 ExcVectorTypeNotCompatible()); 1417 const VectorType &v = dynamic_cast<const VectorType &>(vv); 1418 Assert(dynamic_cast<const VectorType *>(&ww) != nullptr, 1419 ExcVectorTypeNotCompatible()); 1420 const VectorType &w = dynamic_cast<const VectorType &>(ww); 1421 1422 AssertIsFinite(a); 1423 AssertIsFinite(b); 1424 1425 AssertDimension(local_size(), v.local_size()); 1426 AssertDimension(local_size(), w.local_size()); 1427 1428 dealii::internal::VectorOperations:: 1429 functions<Number, Number, MemorySpaceType>::add_avpbw( 1430 thread_loop_partitioner, 1431 partitioner->local_size(), 1432 a, 1433 b, 1434 v.data, 1435 w.data, 1436 data); 1437 1438 if (vector_is_ghosted) 1439 update_ghost_values(); 1440 } 1441 1442 1443 1444 template <typename Number, typename MemorySpaceType> 1445 void 1446 Vector<Number, MemorySpaceType>::add(const std::vector<size_type> &indices, 1447 const std::vector<Number> & values) 1448 { 1449 for (std::size_t i = 0; i < indices.size(); ++i) 1450 { 1451 this->operator()(indices[i]) += values[i]; 1452 } 1453 } 1454 1455 1456 1457 template <typename Number, typename MemorySpaceType> 1458 void 1459 Vector<Number, MemorySpaceType>::sadd( 1460 const Number x, 1461 const Vector<Number, MemorySpaceType> &v) 1462 { 1463 AssertIsFinite(x); 1464 AssertDimension(local_size(), v.local_size()); 1465 1466 dealii::internal::VectorOperations:: 1467 functions<Number, Number, MemorySpaceType>::sadd_xv( 1468 thread_loop_partitioner, partitioner->local_size(), x, v.data, data); 1469 1470 if (vector_is_ghosted) 1471 update_ghost_values(); 1472 } 1473 1474 1475 1476 template <typename Number, typename MemorySpaceType> 1477 void 1478 Vector<Number, MemorySpaceType>::sadd_local( 1479 const Number x, 1480 const Number a, 1481 const VectorSpaceVector<Number> &vv) 1482 { 1483 // Downcast. Throws an exception if invalid. 1484 using VectorType = Vector<Number, MemorySpaceType>; 1485 Assert((dynamic_cast<const VectorType *>(&vv) != nullptr), 1486 ExcVectorTypeNotCompatible()); 1487 const VectorType &v = dynamic_cast<const VectorType &>(vv); 1488 1489 AssertIsFinite(x); 1490 AssertIsFinite(a); 1491 AssertDimension(local_size(), v.local_size()); 1492 1493 dealii::internal::VectorOperations:: 1494 functions<Number, Number, MemorySpaceType>::sadd_xav( 1495 thread_loop_partitioner, 1496 partitioner->local_size(), 1497 x, 1498 a, 1499 v.data, 1500 data); 1501 } 1502 1503 1504 1505 template <typename Number, typename MemorySpaceType> 1506 void 1507 Vector<Number, MemorySpaceType>::sadd(const Number x, 1508 const Number a, 1509 const VectorSpaceVector<Number> &vv) 1510 { 1511 sadd_local(x, a, vv); 1512 1513 if (vector_is_ghosted) 1514 update_ghost_values(); 1515 } 1516 1517 1518 1519 template <typename Number, typename MemorySpaceType> 1520 Vector<Number, MemorySpaceType> & 1521 Vector<Number, MemorySpaceType>::operator*=(const Number factor) 1522 { 1523 AssertIsFinite(factor); 1524 1525 dealii::internal::VectorOperations:: 1526 functions<Number, Number, MemorySpaceType>::multiply_factor( 1527 thread_loop_partitioner, partitioner->local_size(), factor, data); 1528 1529 if (vector_is_ghosted) 1530 update_ghost_values(); 1531 1532 return *this; 1533 } 1534 1535 1536 1537 template <typename Number, typename MemorySpaceType> 1538 Vector<Number, MemorySpaceType> & 1539 Vector<Number, MemorySpaceType>::operator/=(const Number factor) 1540 { 1541 operator*=(static_cast<Number>(1.) / factor); 1542 return *this; 1543 } 1544 1545 1546 1547 template <typename Number, typename MemorySpaceType> 1548 void 1549 Vector<Number, MemorySpaceType>::scale(const VectorSpaceVector<Number> &vv) 1550 { 1551 // Downcast. Throws an exception if invalid. 1552 using VectorType = Vector<Number, MemorySpaceType>; 1553 Assert(dynamic_cast<const VectorType *>(&vv) != nullptr, 1554 ExcVectorTypeNotCompatible()); 1555 const VectorType &v = dynamic_cast<const VectorType &>(vv); 1556 1557 AssertDimension(local_size(), v.local_size()); 1558 1559 dealii::internal::VectorOperations:: 1560 functions<Number, Number, MemorySpaceType>::scale( 1561 thread_loop_partitioner, local_size(), v.data, data); 1562 1563 if (vector_is_ghosted) 1564 update_ghost_values(); 1565 } 1566 1567 1568 1569 template <typename Number, typename MemorySpaceType> 1570 void 1571 Vector<Number, MemorySpaceType>::equ(const Number a, 1572 const VectorSpaceVector<Number> &vv) 1573 { 1574 // Downcast. Throws an exception if invalid. 1575 using VectorType = Vector<Number, MemorySpaceType>; 1576 Assert(dynamic_cast<const VectorType *>(&vv) != nullptr, 1577 ExcVectorTypeNotCompatible()); 1578 const VectorType &v = dynamic_cast<const VectorType &>(vv); 1579 1580 AssertIsFinite(a); 1581 AssertDimension(local_size(), v.local_size()); 1582 1583 dealii::internal::VectorOperations:: 1584 functions<Number, Number, MemorySpaceType>::equ_au( 1585 thread_loop_partitioner, partitioner->local_size(), a, v.data, data); 1586 1587 1588 if (vector_is_ghosted) 1589 update_ghost_values(); 1590 } 1591 1592 1593 1594 template <typename Number, typename MemorySpaceType> 1595 bool 1596 Vector<Number, MemorySpaceType>::all_zero() const 1597 { 1598 return (linfty_norm() == 0) ? true : false; 1599 } 1600 1601 1602 1603 template <typename Number, typename MemorySpaceType> 1604 template <typename Number2> 1605 Number 1606 Vector<Number, MemorySpaceType>::inner_product_local( 1607 const Vector<Number2, MemorySpaceType> &v) const 1608 { 1609 if (PointerComparison::equal(this, &v)) 1610 return norm_sqr_local(); 1611 1612 AssertDimension(partitioner->local_size(), v.partitioner->local_size()); 1613 1614 return dealii::internal::VectorOperations:: 1615 functions<Number, Number2, MemorySpaceType>::dot( 1616 thread_loop_partitioner, partitioner->local_size(), v.data, data); 1617 } 1618 1619 1620 1621 template <typename Number, typename MemorySpaceType> 1622 Number Vector<Number, MemorySpaceType>:: 1623 operator*(const VectorSpaceVector<Number> &vv) const 1624 { 1625 // Downcast. Throws an exception if invalid. 1626 using VectorType = Vector<Number, MemorySpaceType>; 1627 Assert((dynamic_cast<const VectorType *>(&vv) != nullptr), 1628 ExcVectorTypeNotCompatible()); 1629 const VectorType &v = dynamic_cast<const VectorType &>(vv); 1630 1631 Number local_result = inner_product_local(v); 1632 if (partitioner->n_mpi_processes() > 1) 1633 return Utilities::MPI::sum(local_result, 1634 partitioner->get_mpi_communicator()); 1635 else 1636 return local_result; 1637 } 1638 1639 1640 1641 template <typename Number, typename MemorySpaceType> 1642 typename Vector<Number, MemorySpaceType>::real_type 1643 Vector<Number, MemorySpaceType>::norm_sqr_local() const 1644 { 1645 real_type sum; 1646 1647 1648 dealii::internal::VectorOperations:: 1649 functions<Number, Number, MemorySpaceType>::norm_2( 1650 thread_loop_partitioner, partitioner->local_size(), sum, data); 1651 1652 AssertIsFinite(sum); 1653 1654 return sum; 1655 } 1656 1657 1658 1659 template <typename Number, typename MemorySpaceType> 1660 Number 1661 Vector<Number, MemorySpaceType>::mean_value_local() const 1662 { 1663 Assert(size() != 0, ExcEmptyObject()); 1664 1665 if (partitioner->local_size() == 0) 1666 return Number(); 1667 1668 Number sum = ::dealii::internal::VectorOperations:: 1669 functions<Number, Number, MemorySpaceType>::mean_value( 1670 thread_loop_partitioner, partitioner->local_size(), data); 1671 1672 return sum / real_type(partitioner->local_size()); 1673 } 1674 1675 1676 1677 template <typename Number, typename MemorySpaceType> 1678 Number 1679 Vector<Number, MemorySpaceType>::mean_value() const 1680 { 1681 Number local_result = mean_value_local(); 1682 if (partitioner->n_mpi_processes() > 1) 1683 return Utilities::MPI::sum(local_result * static_cast<real_type>( 1684 partitioner->local_size()), 1685 partitioner->get_mpi_communicator()) / 1686 static_cast<real_type>(partitioner->size()); 1687 else 1688 return local_result; 1689 } 1690 1691 1692 1693 template <typename Number, typename MemorySpaceType> 1694 typename Vector<Number, MemorySpaceType>::real_type 1695 Vector<Number, MemorySpaceType>::l1_norm_local() const 1696 { 1697 real_type sum; 1698 1699 dealii::internal::VectorOperations:: 1700 functions<Number, Number, MemorySpaceType>::norm_1( 1701 thread_loop_partitioner, partitioner->local_size(), sum, data); 1702 1703 return sum; 1704 } 1705 1706 1707 1708 template <typename Number, typename MemorySpaceType> 1709 typename Vector<Number, MemorySpaceType>::real_type 1710 Vector<Number, MemorySpaceType>::l1_norm() const 1711 { 1712 real_type local_result = l1_norm_local(); 1713 if (partitioner->n_mpi_processes() > 1) 1714 return Utilities::MPI::sum(local_result, 1715 partitioner->get_mpi_communicator()); 1716 else 1717 return local_result; 1718 } 1719 1720 1721 1722 template <typename Number, typename MemorySpaceType> 1723 typename Vector<Number, MemorySpaceType>::real_type 1724 Vector<Number, MemorySpaceType>::norm_sqr() const 1725 { 1726 real_type local_result = norm_sqr_local(); 1727 if (partitioner->n_mpi_processes() > 1) 1728 return Utilities::MPI::sum(local_result, 1729 partitioner->get_mpi_communicator()); 1730 else 1731 return local_result; 1732 } 1733 1734 1735 1736 template <typename Number, typename MemorySpaceType> 1737 typename Vector<Number, MemorySpaceType>::real_type 1738 Vector<Number, MemorySpaceType>::l2_norm() const 1739 { 1740 return std::sqrt(norm_sqr()); 1741 } 1742 1743 1744 1745 template <typename Number, typename MemorySpaceType> 1746 typename Vector<Number, MemorySpaceType>::real_type 1747 Vector<Number, MemorySpaceType>::lp_norm_local(const real_type p) const 1748 { 1749 real_type sum = 0.; 1750 1751 dealii::internal::VectorOperations:: 1752 functions<Number, Number, MemorySpaceType>::norm_p( 1753 thread_loop_partitioner, partitioner->local_size(), sum, p, data); 1754 1755 return std::pow(sum, 1. / p); 1756 } 1757 1758 1759 1760 template <typename Number, typename MemorySpaceType> 1761 typename Vector<Number, MemorySpaceType>::real_type 1762 Vector<Number, MemorySpaceType>::lp_norm(const real_type p) const 1763 { 1764 const real_type local_result = lp_norm_local(p); 1765 if (partitioner->n_mpi_processes() > 1) 1766 return std::pow( 1767 Utilities::MPI::sum(std::pow(local_result, p), 1768 partitioner->get_mpi_communicator()), 1769 static_cast<real_type>(1.0 / p)); 1770 else 1771 return local_result; 1772 } 1773 1774 1775 1776 template <typename Number, typename MemorySpaceType> 1777 typename Vector<Number, MemorySpaceType>::real_type 1778 Vector<Number, MemorySpaceType>::linfty_norm_local() const 1779 { 1780 real_type max = 0.; 1781 1782 const size_type local_size = partitioner->local_size(); 1783 internal::la_parallel_vector_templates_functions< 1784 Number, 1785 MemorySpaceType>::linfty_norm_local(data, local_size, max); 1786 1787 return max; 1788 } 1789 1790 1791 1792 template <typename Number, typename MemorySpaceType> 1793 inline typename Vector<Number, MemorySpaceType>::real_type 1794 Vector<Number, MemorySpaceType>::linfty_norm() const 1795 { 1796 const real_type local_result = linfty_norm_local(); 1797 if (partitioner->n_mpi_processes() > 1) 1798 return Utilities::MPI::max(local_result, 1799 partitioner->get_mpi_communicator()); 1800 else 1801 return local_result; 1802 } 1803 1804 1805 1806 template <typename Number, typename MemorySpaceType> 1807 Number 1808 Vector<Number, MemorySpaceType>::add_and_dot_local( 1809 const Number a, 1810 const Vector<Number, MemorySpaceType> &v, 1811 const Vector<Number, MemorySpaceType> &w) 1812 { 1813 const size_type vec_size = partitioner->local_size(); 1814 AssertDimension(vec_size, v.local_size()); 1815 AssertDimension(vec_size, w.local_size()); 1816 1817 Number sum = dealii::internal::VectorOperations:: 1818 functions<Number, Number, MemorySpaceType>::add_and_dot( 1819 thread_loop_partitioner, vec_size, a, v.data, w.data, data); 1820 1821 AssertIsFinite(sum); 1822 1823 return sum; 1824 } 1825 1826 1827 1828 template <typename Number, typename MemorySpaceType> 1829 Number 1830 Vector<Number, MemorySpaceType>::add_and_dot( 1831 const Number a, 1832 const VectorSpaceVector<Number> &vv, 1833 const VectorSpaceVector<Number> &ww) 1834 { 1835 // Downcast. Throws an exception if invalid. 1836 using VectorType = Vector<Number, MemorySpaceType>; 1837 Assert((dynamic_cast<const VectorType *>(&vv) != nullptr), 1838 ExcVectorTypeNotCompatible()); 1839 const VectorType &v = dynamic_cast<const VectorType &>(vv); 1840 Assert((dynamic_cast<const VectorType *>(&ww) != nullptr), 1841 ExcVectorTypeNotCompatible()); 1842 const VectorType &w = dynamic_cast<const VectorType &>(ww); 1843 1844 Number local_result = add_and_dot_local(a, v, w); 1845 if (partitioner->n_mpi_processes() > 1) 1846 return Utilities::MPI::sum(local_result, 1847 partitioner->get_mpi_communicator()); 1848 else 1849 return local_result; 1850 } 1851 1852 1853 1854 template <typename Number, typename MemorySpaceType> 1855 inline bool 1856 Vector<Number, MemorySpaceType>::partitioners_are_compatible( 1857 const Utilities::MPI::Partitioner &part) const 1858 { 1859 return partitioner->is_compatible(part); 1860 } 1861 1862 1863 1864 template <typename Number, typename MemorySpaceType> 1865 inline bool 1866 Vector<Number, MemorySpaceType>::partitioners_are_globally_compatible( 1867 const Utilities::MPI::Partitioner &part) const 1868 { 1869 return partitioner->is_globally_compatible(part); 1870 } 1871 1872 1873 1874 template <typename Number, typename MemorySpaceType> 1875 std::size_t 1876 Vector<Number, MemorySpaceType>::memory_consumption() const 1877 { 1878 std::size_t memory = sizeof(*this); 1879 memory += sizeof(Number) * static_cast<std::size_t>(allocated_size); 1880 1881 // if the partitioner is shared between more processors, just count a 1882 // fraction of that memory, since we're not actually using more memory 1883 // for it. 1884 if (partitioner.use_count() > 0) 1885 memory += 1886 partitioner->memory_consumption() / partitioner.use_count() + 1; 1887 if (import_data.values != nullptr || import_data.values_dev != nullptr) 1888 memory += (static_cast<std::size_t>(partitioner->n_import_indices()) * 1889 sizeof(Number)); 1890 return memory; 1891 } 1892 1893 1894 1895 template <typename Number, typename MemorySpaceType> 1896 void 1897 Vector<Number, MemorySpaceType>::print(std::ostream & out, 1898 const unsigned int precision, 1899 const bool scientific, 1900 const bool across) const 1901 { 1902 Assert(partitioner.get() != nullptr, ExcInternalError()); 1903 AssertThrow(out, ExcIO()); 1904 std::ios::fmtflags old_flags = out.flags(); 1905 unsigned int old_precision = out.precision(precision); 1906 1907 out.precision(precision); 1908 if (scientific) 1909 out.setf(std::ios::scientific, std::ios::floatfield); 1910 else 1911 out.setf(std::ios::fixed, std::ios::floatfield); 1912 1913 // to make the vector write out all the information in order, use as 1914 // many barriers as there are processors and start writing when it's our 1915 // turn 1916 #ifdef DEAL_II_WITH_MPI 1917 if (partitioner->n_mpi_processes() > 1) 1918 for (unsigned int i = 0; i < partitioner->this_mpi_process(); i++) 1919 { 1920 const int ierr = MPI_Barrier(partitioner->get_mpi_communicator()); 1921 AssertThrowMPI(ierr); 1922 } 1923 #endif 1924 1925 std::vector<Number> stored_elements(allocated_size); 1926 data.copy_to(stored_elements.data(), allocated_size); 1927 1928 out << "Process #" << partitioner->this_mpi_process() << std::endl 1929 << "Local range: [" << partitioner->local_range().first << ", " 1930 << partitioner->local_range().second 1931 << "), global size: " << partitioner->size() << std::endl 1932 << "Vector data:" << std::endl; 1933 if (across) 1934 for (size_type i = 0; i < partitioner->local_size(); ++i) 1935 out << stored_elements[i] << ' '; 1936 else 1937 for (size_type i = 0; i < partitioner->local_size(); ++i) 1938 out << stored_elements[i] << std::endl; 1939 out << std::endl; 1940 1941 if (vector_is_ghosted) 1942 { 1943 out << "Ghost entries (global index / value):" << std::endl; 1944 if (across) 1945 for (size_type i = 0; i < partitioner->n_ghost_indices(); ++i) 1946 out << '(' << partitioner->ghost_indices().nth_index_in_set(i) 1947 << '/' << stored_elements[partitioner->local_size() + i] 1948 << ") "; 1949 else 1950 for (size_type i = 0; i < partitioner->n_ghost_indices(); ++i) 1951 out << '(' << partitioner->ghost_indices().nth_index_in_set(i) 1952 << '/' << stored_elements[partitioner->local_size() + i] 1953 << ")" << std::endl; 1954 out << std::endl; 1955 } 1956 out << std::flush; 1957 1958 #ifdef DEAL_II_WITH_MPI 1959 if (partitioner->n_mpi_processes() > 1) 1960 { 1961 int ierr = MPI_Barrier(partitioner->get_mpi_communicator()); 1962 AssertThrowMPI(ierr); 1963 1964 for (unsigned int i = partitioner->this_mpi_process() + 1; 1965 i < partitioner->n_mpi_processes(); 1966 i++) 1967 { 1968 ierr = MPI_Barrier(partitioner->get_mpi_communicator()); 1969 AssertThrowMPI(ierr); 1970 } 1971 } 1972 #endif 1973 1974 AssertThrow(out, ExcIO()); 1975 // reset output format 1976 out.flags(old_flags); 1977 out.precision(old_precision); 1978 } 1979 1980 } // end of namespace distributed 1981 } // end of namespace LinearAlgebra 1982 1983 1984 DEAL_II_NAMESPACE_CLOSE 1985 1986 #endif 1987