1!! Copyright (C) 2013 M. Oliveira 2!! 3!! This program is free software; you can redistribute it and/or modify 4!! it under the terms of the GNU General Public License as published by 5!! the Free Software Foundation; either version 2, or (at your option) 6!! any later version. 7!! 8!! This program is distributed in the hope that it will be useful, 9!! but WITHOUT ANY WARRANTY; without even the implied warranty of 10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11!! GNU General Public License for more details. 12!! 13!! You should have received a copy of the GNU General Public License 14!! along with this program; if not, write to the Free Software 15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16!! 02110-1301, USA. 17!! 18 19#include "global.h" 20 21module partition_oct_m 22 use global_oct_m 23 use io_oct_m 24 use io_binary_oct_m 25 use messages_oct_m 26 use mpi_oct_m 27 use mpi_debug_oct_m 28 use profiling_oct_m 29 30 implicit none 31 32 private 33 34 public :: & 35 partition_t, & 36 partition_init, & 37 partition_end, & 38 partition_set, & 39 partition_dump, & 40 partition_load, & 41 partition_get_local_size, & 42 partition_get_global, & 43 partition_get_partition_number, & 44 partition_get_np_local, & 45 partition_get_npart, & 46 partition_get_part, & 47 partition_get_local 48 49 50 !> The partition is an array that contains the mapping between some global index 51 !! and a process, such that point ip will be stored in process partition%part(ip). 52 !! 53 !! In this module this array is distributed among the processes, such that each process 54 !! only stores a portion of the full array. Because each process needs to know in a 55 !! straighforward way (i.e. without having to perform any kind of communication or 56 !! lengthy operations) which process stores the partition corresponding to any giving 57 !! point, the distribution of the points is done in the following way: 58 !! 59 !! i) the first mod(partition%np_global, partition%npart) processes store partition%nppp+1 60 !! points, with partition%nppp = partition%np_global/partition%npart 61 !! ii) the remaining processes store partition%nppp points 62 !! 63 !! Note 1: this module can be a bit confusing as they are in fact two partitions. One is the 64 !! partition of some array (in the case of Octopus, this is typically the mesh functions), 65 !! which is the main information stored in the partition_t object, and then there is the 66 !! partition of the partition itself, as this is also distributed. 67 !! 68 !! Note 2: in principle, the mpi group used by the processes for the partition distribution 69 !! does not need to be the same as the mpi group of the processes the partition refers to. 70 type partition_t 71 private 72 73 !> The following components are the same for all processes: 74 type(mpi_grp_t) :: mpi_grp !< The mpi group use for distributing the partition data. 75 integer :: np_global !< The total number of points in the partition. 76 integer :: npart !< The number of partitions. 77 integer :: remainder !< The remainder of the division of np_global by npart 78 integer :: nppp !< Number of points per process. The first partition%remainder processes 79 !! have nppp+1 points, while the other ones have nppp points 80 81 !> The following components are process-dependent: 82 integer :: np_local !< The number of points of the partition stored in this process. 83 integer :: istart !< The position of the first point stored in this process. 84 integer, pointer :: part(:) !< The local portion of the partition. 85 86 end type partition_t 87 88 89contains 90 91 !--------------------------------------------------------- 92 subroutine partition_init(partition, np_global, mpi_grp) 93 type(partition_t), intent(out) :: partition 94 integer, intent(in) :: np_global 95 96 type(mpi_grp_t), intent(in) :: mpi_grp 97 98 PUSH_SUB(partition_init) 99 100 !Global variables 101 partition%mpi_grp = mpi_grp 102 partition%np_global = np_global 103 partition%npart = mpi_grp%size 104 partition%remainder = mod(partition%np_global, partition%npart) 105 partition%nppp = partition%np_global/partition%npart 106 107 !Processor dependent 108 if (mpi_grp%rank + 1 <= partition%remainder) then 109 partition%np_local = partition%nppp + 1 110 partition%istart = (partition%nppp + 1)*mpi_grp%rank + 1 111 else 112 partition%np_local = partition%nppp 113 partition%istart = partition%nppp*mpi_grp%rank + partition%remainder + 1 114 end if 115 116 !Allocate memory for the partition 117 nullify(partition%part) 118 SAFE_ALLOCATE(partition%part(1:partition%np_local)) 119 120 POP_SUB(partition_init) 121 end subroutine partition_init 122 123 ! --------------------------------------------------------- 124 subroutine partition_end(partition) 125 type(partition_t), intent(inout) :: partition 126 127 PUSH_SUB(partition_end) 128 129 SAFE_DEALLOCATE_P(partition%part) 130 131 POP_SUB(partition_end) 132 end subroutine partition_end 133 134 ! --------------------------------------------------------- 135 subroutine partition_set(partition, part) 136 type(partition_t), intent(inout) :: partition 137 integer, intent(in) :: part(:) !< The local portion of the partition. 138 139 PUSH_SUB(partition_set) 140 141 partition%part(1:partition%np_local) = part(1:partition%np_local) 142 143 POP_SUB(partition_set) 144 end subroutine partition_set 145 146 ! --------------------------------------------------------- 147 !> Partition is written in parallel if MPI2 is available. 148 !! Otherwise, is gathered in root and then written. 149 subroutine partition_dump(partition, dir, filename, ierr) 150 type(partition_t), intent(in) :: partition 151 character(len=*), intent(in) :: dir 152 character(len=*), intent(in) :: filename 153 integer, intent(out) :: ierr 154 155 integer :: err 156#ifdef HAVE_MPI2 157 integer :: ipart 158 integer, allocatable :: scounts(:), sdispls(:) 159#else 160 integer, allocatable :: part_global(:) 161#endif 162 character(len=MAX_PATH_LEN) :: full_filename 163 164 PUSH_SUB(partition_dump) 165 166 ierr = 0 167 full_filename = trim(dir)//'/'//trim(filename) 168 169#ifdef HAVE_MPI2 170 ! Calculate displacements for writing 171 SAFE_ALLOCATE(scounts(1:partition%npart)) 172 SAFE_ALLOCATE(sdispls(1:partition%npart)) 173 174 scounts(1:partition%remainder) = partition%nppp + 1 175 scounts(partition%remainder + 1:partition%npart) = partition%nppp 176 sdispls(1) = 0 177 do ipart = 2, partition%npart 178 sdispls(ipart) = sdispls(ipart-1) + scounts(ipart-1) 179 end do 180 181 ! Write the header (root only) and wait 182 if (mpi_grp_is_root(partition%mpi_grp)) then 183 call iwrite_header(full_filename, partition%np_global, err) 184 if (err /= 0) ierr = ierr + 1 185 end if 186 call MPI_Bcast(ierr, 1, MPI_INTEGER, 0, partition%mpi_grp%comm, mpi_err) 187 call MPI_Barrier(partition%mpi_grp%comm, mpi_err) 188 189 ASSERT(all(partition%part(:) > 0)) 190 191 ! Each process writes a portion of the partition 192 if (ierr == 0) then 193 call mpi_debug_in(partition%mpi_grp%comm, C_MPI_FILE_WRITE) 194 ! Only one rank per partition group should write the partition restart information 195 ! Otherwise, more than once is trying to be written data 196 if (mod(mpi_world%rank, mpi_world%size/partition%mpi_grp%size) == 0) then 197 call io_binary_write_parallel(full_filename, partition%mpi_grp%comm, sdispls(partition%mpi_grp%rank+1)+1, & 198 partition%np_local, partition%part, err) 199 call mpi_debug_out(partition%mpi_grp%comm, C_MPI_FILE_WRITE) 200 if (err /= 0) ierr = ierr + 2 201 end if 202 end if 203 204 SAFE_DEALLOCATE_A(scounts) 205 SAFE_DEALLOCATE_A(sdispls) 206#else 207 !Get the global partition in the root node 208 if (partition%mpi_grp%rank == 0) then 209 SAFE_ALLOCATE(part_global(1:partition%np_global)) 210 else 211 SAFE_ALLOCATE(part_global(1:1)) 212 end if 213 call partition_get_global(partition, part_global, 0) 214 215 !Only the global root process writes. Otherwise, at least two 216 !processes might be writing to the same file, the same data 217 if (mpi_world%rank == 0) then 218 call io_binary_write(full_filename, partition%np_global, part_global, err) 219 if (err /= 0) ierr = ierr + 4 220 end if 221#ifdef HAVE_MPI 222 call MPI_Bcast(ierr, 1, MPI_INTEGER, 0, partition%mpi_grp%comm, mpi_err) 223 call MPI_Barrier(partition%mpi_grp%comm, mpi_err) 224#endif 225 226 SAFE_DEALLOCATE_A(part_global) 227#endif 228 229 POP_SUB(partition_dump) 230 end subroutine partition_dump 231 232 ! --------------------------------------------------------- 233 !> Partition is read in parallel if MPI2 is available. 234 !! Otherwise, is read by the root and then scattered 235 subroutine partition_load(partition, dir, filename, ierr) 236 type(partition_t), intent(inout) :: partition 237 character(len=*), intent(in) :: dir 238 character(len=*), intent(in) :: filename 239 integer, intent(out) :: ierr 240 241 integer :: ipart, err, np, file_size 242 integer, allocatable :: scounts(:), sdispls(:) 243#ifndef HAVE_MPI2 244 integer, allocatable :: part_global(:) 245#endif 246 character(len=MAX_PATH_LEN) :: full_filename 247 248 PUSH_SUB(partition_load) 249 250 ierr = 0 251 full_filename = trim(dir)//'/'//trim(filename) 252 253 ! This is a writing to avoid an optimization of gfortran with -O3 254 write(message(1),'(a,i8)') "Info: number of points in the partition (in root process) =", size(partition%part) 255 call messages_info(1) 256 257 ! Check if the file exists and has the proper size (only world root) 258 if (mpi_world%rank == 0) then 259 call io_binary_get_info(full_filename, np, file_size, err) 260 ASSERT(np == partition%np_global) 261 end if 262 263#ifdef HAVE_MPI 264 ! All nodes need to know the result 265 call MPI_Bcast(err, 1, MPI_INTEGER, 0, mpi_world%comm, mpi_err) 266 call MPI_Bcast(file_size, 1, MPI_INTEGER, 0, mpi_world%comm, mpi_err) 267#endif 268 269 if (err /= 0) then 270 ierr = ierr + 1 271 POP_SUB(partition_load) 272 return 273 end if 274 ! The size of the file is not as big as np_global 275 if (file_size - 64 /= partition%np_global * FC_INTEGER_SIZE) then 276 ierr = ierr + 2 277 POP_SUB(partition_load) 278 return 279 end if 280 281 ! Calculate displacements for reading 282 SAFE_ALLOCATE(scounts(1:partition%npart)) 283 SAFE_ALLOCATE(sdispls(1:partition%npart)) 284 285 scounts(1:partition%remainder) = partition%nppp + 1 286 scounts(partition%remainder + 1:partition%npart) = partition%nppp 287 sdispls(1) = 0 288 do ipart = 2, partition%npart 289 sdispls(ipart) = sdispls(ipart-1) + scounts(ipart-1) 290 end do 291 292 ASSERT(sum(scounts(:)) == partition%np_global) 293 294#ifdef HAVE_MPI2 295 call mpi_debug_in(partition%mpi_grp%comm, C_MPI_FILE_READ) 296 call io_binary_read_parallel(full_filename, partition%mpi_grp%comm, sdispls(partition%mpi_grp%rank+1)+1, & 297 partition%np_local, partition%part, err) 298 call mpi_debug_out(partition%mpi_grp%comm, C_MPI_FILE_READ) 299 if (err /= 0) ierr = ierr + 4 300#else 301 ! The global partition is only read by the root node 302 if (partition%mpi_grp%rank == 0) then 303 SAFE_ALLOCATE(part_global(1:partition%np_global)) 304 call io_binary_read(full_filename, partition%np_global, part_global, err) 305 if (err /= 0) ierr = ierr + 8 306 else 307 ! Create a dummy variable for the rest of the processes 308 SAFE_ALLOCATE(part_global(1:1)) 309 ! Either there are not reading the partition, or there is no 310 ! partition. So partition 1 has all the points 311 part_global = 1 312 end if 313 314#ifdef HAVE_MPI 315 ! All nodes need to know the result 316 call MPI_Bcast(ierr, 1, MPI_INTEGER, 0, partition%mpi_grp%comm, mpi_err) 317#endif 318 319 ASSERT(all(part_global(:) > 0)) 320 321 ! If reading was successful, then scatter the result 322 if (ierr == 0) then 323#ifdef HAVE_MPI 324 call mpi_debug_in(partition%mpi_grp%comm, C_MPI_SCATTERV) 325 call MPI_Scatterv(part_global(1), scounts(1), sdispls(1), MPI_INTEGER, & 326 partition%part(1), partition%np_local, MPI_INTEGER, & 327 0, partition%mpi_grp%comm, mpi_err) 328 call mpi_debug_out(partition%mpi_grp%comm, C_MPI_SCATTERV) 329#endif 330 end if 331 332 SAFE_DEALLOCATE_A(part_global) 333#endif /* HAVE_MPI2 */ 334 335 if(any(partition%part(:) <= 0)) then 336 write(message(1),'(a)') 'Internal error: some elements of partition are <= 0.' 337 write(message(2),*) 'filename = ', full_filename 338 write(message(3),*) 'scounts = ', scounts(:) 339 write(message(4),*) 'sdispls = ', sdispls(:) 340 write(message(5),*) 'partition%remainder = ', partition%remainder 341 write(message(6),*) 'partition%np_local = ', partition%np_local 342 call messages_fatal(6) 343 endif 344 345 SAFE_DEALLOCATE_A(scounts) 346 SAFE_DEALLOCATE_A(sdispls) 347 348 POP_SUB(partition_load) 349 end subroutine partition_load 350 351 ! --------------------------------------------------------- 352 subroutine partition_get_local_size(partition, istart, np_local) 353 type(partition_t), intent(in) :: partition 354 integer, intent(out) :: istart !< The number of points of the partition stored in this process. 355 integer, intent(out) :: np_local !< The position of the first point stored in this process. 356 357 PUSH_SUB(partition_get_local_size) 358 359 istart = partition%istart 360 np_local = partition%np_local 361 362 POP_SUB(partition_get_local_size) 363 end subroutine partition_get_local_size 364 365 ! --------------------------------------------------------- 366 !> Returns the global partition. If root is present, the partition is 367 !! gathered only in that node. Otherwise it is gathered in all nodes. 368 subroutine partition_get_global(partition, part_global, root) 369 type(partition_t), intent(in) :: partition 370 integer, intent(out) :: part_global(:) 371 integer, optional, intent(in) :: root 372 373 integer :: ipart 374 integer, allocatable :: rdispls(:), rcounts(:) 375 376 PUSH_SUB(partition_get_global) 377 378 SAFE_ALLOCATE(rdispls(1:partition%npart)) 379 SAFE_ALLOCATE(rcounts(1:partition%npart)) 380 381 rcounts(1:partition%remainder) = partition%nppp + 1 382 rcounts(partition%remainder + 1:partition%npart) = partition%nppp 383 rdispls(1) = 0 384 do ipart = 2, partition%npart 385 rdispls(ipart) = rdispls(ipart-1) + rcounts(ipart-1) 386 end do 387 388 ASSERT(all(partition%part(1:partition%np_local) > 0)) 389 390 if (present(root)) then 391#ifdef HAVE_MPI 392 call mpi_debug_in(partition%mpi_grp%comm, C_MPI_GATHERV) 393 call MPI_Gatherv(partition%part(1), partition%np_local, MPI_INTEGER, & 394 part_global(1), rcounts(1), rdispls(1), MPI_INTEGER, & 395 root, partition%mpi_grp%comm, mpi_err) 396 call mpi_debug_out(partition%mpi_grp%comm, C_MPI_GATHERV) 397#endif 398 else 399#ifdef HAVE_MPI 400 call mpi_debug_in(partition%mpi_grp%comm, C_MPI_ALLGATHERV) 401 call MPI_Allgatherv(partition%part(1), partition%np_local, MPI_INTEGER, & 402 part_global(1), rcounts(1), rdispls(1), MPI_INTEGER, & 403 partition%mpi_grp%comm, mpi_err) 404 call mpi_debug_out(partition%mpi_grp%comm, C_MPI_GATHERV) 405#endif 406 end if 407 408 if(.not. present(root) .or. partition%mpi_grp%rank == 0) then 409 ASSERT(all(part_global(:) > 0)) 410 end if 411 412 SAFE_DEALLOCATE_A(rdispls) 413 SAFE_DEALLOCATE_A(rcounts) 414 415 POP_SUB(partition_get_global) 416 end subroutine partition_get_global 417 418 ! --------------------------------------------------------- 419 !> Given a list of _global_ indices, return the partition number 420 !! where those points are stored. 421 !! Note that this routine will accept global indices equal to 0. In that 422 !! case it will return 0 as a partition number. 423 subroutine partition_get_partition_number(partition, np, points, partno) 424 type(partition_t), intent(in) :: partition 425 integer, intent(in) :: np 426 integer, intent(in) :: points(:) 427 integer, intent(out) :: partno(:) 428 429 integer :: ip, nproc, rnp, zero_part 430 integer, allocatable :: sbuffer(:), rbuffer(:) 431 integer, allocatable :: scounts(:), rcounts(:) 432 integer, allocatable :: sdispls(:), rdispls(:) 433 integer, allocatable :: ipos(:), order(:) 434 435 PUSH_SUB(partition_get_partition_number) 436 437 SAFE_ALLOCATE(scounts(1:partition%npart)) 438 SAFE_ALLOCATE(rcounts(1:partition%npart)) 439 SAFE_ALLOCATE(sdispls(1:partition%npart)) 440 SAFE_ALLOCATE(rdispls(1:partition%npart)) 441 442 ! How many points will we have to send/receive from each process? 443 scounts = 0 444 zero_part = 1 445 do ip = 1, np 446 !Who knows where points(ip) is stored? 447 if (points(ip) == 0) then 448 nproc = zero_part 449 zero_part = mod(zero_part, partition%npart) + 1 450 else if (points(ip) <= partition%remainder*(partition%nppp + 1)) then 451 nproc = (points(ip)-1)/(partition%nppp + 1) + 1 452 else 453 nproc = (points(ip) - 1 - (partition%nppp + 1)*partition%remainder)/partition%nppp + partition%remainder + 1 454 end if 455 456 !We increase the respective counter 457 scounts(nproc) = scounts(nproc) + 1 458 end do 459 460 461 !Tell each process how many points we will need from it 462#ifdef HAVE_MPI 463 call mpi_debug_in(partition%mpi_grp%comm, C_MPI_ALLTOALL) 464 call MPI_Alltoall(scounts(1), 1, MPI_INTEGER, & 465 rcounts(1), 1, MPI_INTEGER, & 466 partition%mpi_grp%comm, mpi_err) 467 call mpi_debug_out(partition%mpi_grp%comm, C_MPI_ALLTOALL) 468#endif 469 470 !Build displacement arrays 471 sdispls(1) = 0 472 rdispls(1) = 0 473 do ip = 2, partition%npart 474 sdispls(ip) = sdispls(ip-1) + scounts(ip-1) 475 rdispls(ip) = rdispls(ip-1) + rcounts(ip-1) 476 end do 477 478 479 rnp = sum(rcounts) 480 SAFE_ALLOCATE(sbuffer(1:np)) 481 SAFE_ALLOCATE(rbuffer(1:rnp)) 482 483 !Put points in correct order for sending 484 SAFE_ALLOCATE(ipos(1:partition%npart)) 485 SAFE_ALLOCATE(order(1:np)) 486 ipos = 0 487 zero_part = 1 488 do ip = 1, np 489 !Who knows where points(ip) is stored? 490 if (points(ip) == 0) then 491 nproc = zero_part 492 zero_part = mod(zero_part, partition%npart) + 1 493 else if (points(ip) <= partition%remainder*(partition%nppp + 1)) then 494 nproc = (points(ip)-1)/(partition%nppp + 1) + 1 495 else 496 nproc = (points(ip) - 1 - (partition%nppp + 1)*partition%remainder)/partition%nppp + partition%remainder + 1 497 end if 498 499 !We increase the respective counter 500 ipos(nproc) = ipos(nproc) + 1 501 502 !Put the point in the correct place 503 order(ip) = sdispls(nproc) + ipos(nproc) 504 sbuffer(sdispls(nproc) + ipos(nproc)) = points(ip) ! global index of the point 505 end do 506 SAFE_DEALLOCATE_A(ipos) 507 508 !Send the global indices of the points to the process that knows what is the corresponding partition 509#ifdef HAVE_MPI 510 call mpi_debug_in(partition%mpi_grp%comm, C_MPI_ALLTOALLV) 511 call MPI_Alltoallv(sbuffer, scounts(1), sdispls(1), MPI_INTEGER, & 512 rbuffer, rcounts(1), rdispls(1), MPI_INTEGER, & 513 partition%mpi_grp%comm, mpi_err) 514 call mpi_debug_out(partition%mpi_grp%comm, C_MPI_ALLTOALLV) 515#endif 516 517 !We get the partition number from the global index. This will be send back. 518 do ip = 1, rnp 519 if (rbuffer(ip) == 0) cycle 520 rbuffer(ip) = partition%part(rbuffer(ip) - partition%istart + 1) 521 end do 522#ifdef HAVE_MPI 523 !Barrier to ensure that a deadlock is not possible 524 call MPI_Barrier(partition%mpi_grp%comm, mpi_err) 525 526 !Now we send the information backwards 527 call mpi_debug_in(partition%mpi_grp%comm, C_MPI_ALLTOALLV) 528 call MPI_Alltoallv(rbuffer, rcounts(1), rdispls(1), MPI_INTEGER, & 529 sbuffer, scounts(1), sdispls(1), MPI_INTEGER, & 530 partition%mpi_grp%comm, mpi_err) 531 call mpi_debug_out(partition%mpi_grp%comm, C_MPI_ALLTOALLV) 532#endif 533 534 !Reorder the points 535 do ip = 1, np 536 partno(ip) = sbuffer(order(ip)) 537 end do 538 539 !Deallocate memory 540 SAFE_DEALLOCATE_A(order) 541 SAFE_DEALLOCATE_A(sbuffer) 542 SAFE_DEALLOCATE_A(scounts) 543 SAFE_DEALLOCATE_A(sdispls) 544 SAFE_DEALLOCATE_A(rbuffer) 545 SAFE_DEALLOCATE_A(rcounts) 546 SAFE_DEALLOCATE_A(rdispls) 547 548 POP_SUB(partition_get_partition_number) 549 end subroutine partition_get_partition_number 550 551 !--------------------------------------------------------- 552 !> Given the partition, returns the corresponding number of local 553 !! points that each partition has. 554 subroutine partition_get_np_local(partition, np_local_vec) 555 type(partition_t), intent(in) :: partition !< Current partition 556 integer, pointer, intent(inout) :: np_local_vec(:) !< Vector of local points (np_local) 557 558 integer, pointer :: np_local_vec_tmp(:) 559 integer :: ip 560 561 PUSH_SUB(partition_get_np_local) 562 563 ASSERT(ubound(np_local_vec, 1) >= partition%npart) 564 ASSERT(partition%npart > 0) 565 ASSERT(all(partition%part(:) > 0)) 566 SAFE_ALLOCATE(np_local_vec_tmp(1:partition%npart)) 567 np_local_vec_tmp = 0 568 569 ! Calculate locally the local points of each partition 570 do ip = 1, partition%np_local 571 np_local_vec_tmp(partition%part(ip)) = np_local_vec_tmp(partition%part(ip)) + 1 572 end do 573 574 ! Collect all the local points 575#ifdef HAVE_MPI 576 call MPI_Allreduce(np_local_vec_tmp, np_local_vec, partition%npart, & 577 MPI_INTEGER, MPI_SUM, partition%mpi_grp%comm, mpi_err) 578#endif 579 SAFE_DEALLOCATE_P(np_local_vec_tmp) 580 581 POP_SUB(partition_get_np_local) 582 583 end subroutine partition_get_np_local 584 585 !--------------------------------------------------------- 586 !> Returns the total number of partitions 587 pure integer function partition_get_npart(partition) result(npart) 588 type(partition_t), intent(in) :: partition 589 npart = partition%npart 590 end function partition_get_npart 591 592 !--------------------------------------------------------- 593 !> Returns the partition of the local point 594 pure integer function partition_get_part(partition, local_point) result(part) 595 type(partition_t), intent(in) :: partition 596 integer, intent(in) :: local_point 597 part = partition%part(local_point) 598 end function partition_get_part 599 600 !--------------------------------------------------------- 601 !> Calculates the local vector of all partitions in parallel. 602 !! Local vector stores the global point indices that each partition 603 !! has. 604 subroutine partition_get_local(partition, rbuffer, np_local) 605 type(partition_t), intent(in) :: partition 606 integer, intent(inout) :: rbuffer(:) !< The actual result, the local vector from 1 to np_local 607 integer, intent(out) :: np_local !< Number of elements, might be less than partition%np_local 608 609 integer :: ip, ipart, istart 610 integer, allocatable :: sdispls(:), scounts(:), rcounts(:), rdispls(:), sbuffer(:) 611 612 PUSH_SUB(partition_get_local) 613 614 SAFE_ALLOCATE(sdispls(1:partition%npart)) 615 SAFE_ALLOCATE(scounts(1:partition%npart)) 616 SAFE_ALLOCATE(rcounts(1:partition%npart)) 617 618 ! Calculate the starting point of the running process 619 scounts(1:partition%remainder) = partition%nppp + 1 620 scounts(partition%remainder + 1:partition%npart) = partition%nppp 621 sdispls(1) = 0 622 do ipart = 2, partition%npart 623 sdispls(ipart) = sdispls(ipart-1) + scounts(ipart-1) 624 end do 625 istart = sdispls(partition%mpi_grp%rank + 1) 626 627 scounts = 0 628 ! Count and store the local points for each partition 629 do ip = 1, partition%np_local 630 ipart = partition%part(ip) 631 scounts(ipart) = scounts(ipart) + 1 632 end do 633 634 ! Create displacements 635 sdispls(1) = 0 636 do ipart = 2, partition%npart 637 sdispls(ipart) = sdispls(ipart-1) + scounts(ipart-1) 638 end do 639 640 ! Allocate and fill the send buffer 641 np_local = sum(scounts) 642 scounts = 0 643 SAFE_ALLOCATE(sbuffer(1:np_local)) 644 do ip = 1, np_local 645 ipart = partition%part(ip) 646 scounts(ipart) = scounts(ipart) + 1 647 sbuffer(sdispls(ipart) + scounts(ipart)) = ip + istart 648 end do 649 650 ! Tell each process how many points we will need from it 651#ifdef HAVE_MPI 652 call mpi_debug_in(partition%mpi_grp%comm, C_MPI_ALLTOALL) 653 call MPI_Alltoall(scounts(1), 1, MPI_INTEGER, & 654 rcounts(1), 1, MPI_INTEGER, & 655 partition%mpi_grp%comm, mpi_err) 656 call mpi_debug_out(partition%mpi_grp%comm, C_MPI_ALLTOALL) 657#endif 658 659 ! Create the displacement vector from the counts vector 660 np_local = sum(rcounts) 661 SAFE_ALLOCATE(rdispls(1:partition%npart)) 662 ASSERT(ubound(rbuffer, 1) >= np_local) 663 664 rdispls(1) = 0 665 do ipart = 2, partition%npart 666 rdispls(ipart) = rcounts(ipart-1) + rdispls(ipart-1) 667 end do 668 669 ! Collect the corresponding points to each process 670#ifdef HAVE_MPI 671 call mpi_debug_in(partition%mpi_grp%comm, C_MPI_ALLTOALLV) 672 call MPI_Alltoallv(sbuffer, scounts(1), sdispls(1), MPI_INTEGER, & 673 rbuffer, rcounts(1), rdispls(1), MPI_INTEGER, & 674 partition%mpi_grp%comm, mpi_err) 675 call mpi_debug_out(partition%mpi_grp%comm, C_MPI_ALLTOALLV) 676#endif 677 678 SAFE_DEALLOCATE_A(sdispls) 679 SAFE_DEALLOCATE_A(scounts) 680 SAFE_DEALLOCATE_A(sbuffer) 681 SAFE_DEALLOCATE_A(rcounts) 682 SAFE_DEALLOCATE_A(rdispls) 683 684 POP_SUB(partition_get_local) 685 end subroutine partition_get_local 686end module partition_oct_m 687 688!! Local Variables: 689!! mode: f90 690!! coding: utf-8 691!! End: 692