1 /*
2 * Copyright (C) by Argonne National Laboratory
3 * See COPYRIGHT in top-level directory
4 */
5
6 #include "mpiimpl.h"
7 #include "datatype.h"
8
9 #define COPY_BUFFER_SZ 16384
10
MPIR_Localcopy(const void * sendbuf,MPI_Aint sendcount,MPI_Datatype sendtype,void * recvbuf,MPI_Aint recvcount,MPI_Datatype recvtype)11 int MPIR_Localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype,
12 void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype)
13 {
14 int mpi_errno = MPI_SUCCESS;
15 int sendtype_iscontig, recvtype_iscontig;
16 MPI_Aint sendsize, recvsize, sdata_sz, rdata_sz, copy_sz;
17 MPI_Aint true_extent, sendtype_true_lb, recvtype_true_lb;
18 char *buf = NULL;
19 MPL_pointer_attr_t send_attr, recv_attr;
20 MPIR_CHKLMEM_DECL(1);
21 MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_LOCALCOPY);
22
23 MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_LOCALCOPY);
24
25 MPIR_Datatype_get_size_macro(sendtype, sendsize);
26 MPIR_Datatype_get_size_macro(recvtype, recvsize);
27
28 sdata_sz = sendsize * sendcount;
29 rdata_sz = recvsize * recvcount;
30
31 send_attr.type = recv_attr.type = MPL_GPU_POINTER_UNREGISTERED_HOST;
32
33 /* if there is no data to copy, bail out */
34 if (!sdata_sz || !rdata_sz)
35 goto fn_exit;
36
37 #if defined(HAVE_ERROR_CHECKING)
38 if (sdata_sz > rdata_sz) {
39 MPIR_ERR_SET2(mpi_errno, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d", sdata_sz,
40 rdata_sz);
41 copy_sz = rdata_sz;
42 } else
43 #endif /* HAVE_ERROR_CHECKING */
44 copy_sz = sdata_sz;
45
46 /* Builtin types is the common case; optimize for it */
47 MPIR_Datatype_iscontig(sendtype, &sendtype_iscontig);
48 MPIR_Datatype_iscontig(recvtype, &recvtype_iscontig);
49
50 MPIR_Type_get_true_extent_impl(sendtype, &sendtype_true_lb, &true_extent);
51 MPIR_Type_get_true_extent_impl(recvtype, &recvtype_true_lb, &true_extent);
52
53 if (sendtype_iscontig) {
54 MPI_Aint actual_unpack_bytes;
55 MPIR_Typerep_unpack((char *) sendbuf + sendtype_true_lb, copy_sz, recvbuf, recvcount,
56 recvtype, 0, &actual_unpack_bytes);
57 MPIR_ERR_CHKANDJUMP(actual_unpack_bytes != copy_sz, mpi_errno, MPI_ERR_TYPE,
58 "**dtypemismatch");
59 } else if (recvtype_iscontig) {
60 MPI_Aint actual_pack_bytes;
61 MPIR_Typerep_pack(sendbuf, sendcount, sendtype, 0, (char *) recvbuf + recvtype_true_lb,
62 copy_sz, &actual_pack_bytes);
63 MPIR_ERR_CHKANDJUMP(actual_pack_bytes != copy_sz, mpi_errno, MPI_ERR_TYPE,
64 "**dtypemismatch");
65 } else {
66 intptr_t sfirst;
67 intptr_t rfirst;
68
69 MPIR_GPU_query_pointer_attr(sendbuf, &send_attr);
70 MPIR_GPU_query_pointer_attr(recvbuf, &recv_attr);
71
72 if (send_attr.type == MPL_GPU_POINTER_DEV && recv_attr.type == MPL_GPU_POINTER_DEV) {
73 MPL_gpu_malloc((void **) &buf, COPY_BUFFER_SZ, recv_attr.device);
74 } else if (send_attr.type == MPL_GPU_POINTER_DEV || recv_attr.type == MPL_GPU_POINTER_DEV) {
75 MPL_gpu_malloc_host((void **) &buf, COPY_BUFFER_SZ);
76 } else {
77 MPIR_CHKLMEM_MALLOC(buf, char *, COPY_BUFFER_SZ, mpi_errno, "buf", MPL_MEM_BUFFER);
78 }
79
80 sfirst = 0;
81 rfirst = 0;
82
83 while (1) {
84 MPI_Aint max_pack_bytes;
85 if (copy_sz - sfirst > COPY_BUFFER_SZ) {
86 max_pack_bytes = COPY_BUFFER_SZ;
87 } else {
88 max_pack_bytes = copy_sz - sfirst;
89 }
90
91 MPI_Aint actual_pack_bytes;
92 MPIR_Typerep_pack(sendbuf, sendcount, sendtype, sfirst, buf,
93 max_pack_bytes, &actual_pack_bytes);
94 MPIR_Assert(actual_pack_bytes > 0);
95
96 sfirst += actual_pack_bytes;
97
98 MPI_Aint actual_unpack_bytes;
99 MPIR_Typerep_unpack(buf, actual_pack_bytes, recvbuf, recvcount, recvtype,
100 rfirst, &actual_unpack_bytes);
101 MPIR_Assert(actual_unpack_bytes > 0);
102
103 rfirst += actual_unpack_bytes;
104
105 /* everything that was packed from the source type must be
106 * unpacked; otherwise we will lose the remaining data in
107 * buf in the next iteration. */
108 MPIR_ERR_CHKANDJUMP(actual_pack_bytes != actual_unpack_bytes, mpi_errno,
109 MPI_ERR_TYPE, "**dtypemismatch");
110
111 if (rfirst == copy_sz) {
112 /* successful completion */
113 break;
114 }
115 }
116
117 if (send_attr.type == MPL_GPU_POINTER_DEV && recv_attr.type == MPL_GPU_POINTER_DEV) {
118 MPL_gpu_free(buf);
119 } else if (send_attr.type == MPL_GPU_POINTER_DEV || recv_attr.type == MPL_GPU_POINTER_DEV) {
120 MPL_gpu_free_host(buf);
121 }
122 }
123
124 fn_exit:
125 MPIR_CHKLMEM_FREEALL();
126 MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_LOCALCOPY);
127 return mpi_errno;
128 fn_fail:
129 if (buf) {
130 if (send_attr.type == MPL_GPU_POINTER_DEV && recv_attr.type == MPL_GPU_POINTER_DEV) {
131 MPL_gpu_free(buf);
132 } else if (send_attr.type == MPL_GPU_POINTER_DEV || recv_attr.type == MPL_GPU_POINTER_DEV) {
133 MPL_gpu_free_host(buf);
134 }
135 }
136 goto fn_exit;
137 }
138