1 /*
2  * Copyright (C) by Argonne National Laboratory
3  *     See COPYRIGHT in top-level directory
4  */
5 
6 #include "mpiimpl.h"
7 #include "datatype.h"
8 
9 #define COPY_BUFFER_SZ 16384
10 
MPIR_Localcopy(const void * sendbuf,MPI_Aint sendcount,MPI_Datatype sendtype,void * recvbuf,MPI_Aint recvcount,MPI_Datatype recvtype)11 int MPIR_Localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype,
12                    void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype)
13 {
14     int mpi_errno = MPI_SUCCESS;
15     int sendtype_iscontig, recvtype_iscontig;
16     MPI_Aint sendsize, recvsize, sdata_sz, rdata_sz, copy_sz;
17     MPI_Aint true_extent, sendtype_true_lb, recvtype_true_lb;
18     char *buf = NULL;
19     MPL_pointer_attr_t send_attr, recv_attr;
20     MPIR_CHKLMEM_DECL(1);
21     MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_LOCALCOPY);
22 
23     MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_LOCALCOPY);
24 
25     MPIR_Datatype_get_size_macro(sendtype, sendsize);
26     MPIR_Datatype_get_size_macro(recvtype, recvsize);
27 
28     sdata_sz = sendsize * sendcount;
29     rdata_sz = recvsize * recvcount;
30 
31     send_attr.type = recv_attr.type = MPL_GPU_POINTER_UNREGISTERED_HOST;
32 
33     /* if there is no data to copy, bail out */
34     if (!sdata_sz || !rdata_sz)
35         goto fn_exit;
36 
37 #if defined(HAVE_ERROR_CHECKING)
38     if (sdata_sz > rdata_sz) {
39         MPIR_ERR_SET2(mpi_errno, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d", sdata_sz,
40                       rdata_sz);
41         copy_sz = rdata_sz;
42     } else
43 #endif /* HAVE_ERROR_CHECKING */
44         copy_sz = sdata_sz;
45 
46     /* Builtin types is the common case; optimize for it */
47     MPIR_Datatype_iscontig(sendtype, &sendtype_iscontig);
48     MPIR_Datatype_iscontig(recvtype, &recvtype_iscontig);
49 
50     MPIR_Type_get_true_extent_impl(sendtype, &sendtype_true_lb, &true_extent);
51     MPIR_Type_get_true_extent_impl(recvtype, &recvtype_true_lb, &true_extent);
52 
53     if (sendtype_iscontig) {
54         MPI_Aint actual_unpack_bytes;
55         MPIR_Typerep_unpack((char *) sendbuf + sendtype_true_lb, copy_sz, recvbuf, recvcount,
56                             recvtype, 0, &actual_unpack_bytes);
57         MPIR_ERR_CHKANDJUMP(actual_unpack_bytes != copy_sz, mpi_errno, MPI_ERR_TYPE,
58                             "**dtypemismatch");
59     } else if (recvtype_iscontig) {
60         MPI_Aint actual_pack_bytes;
61         MPIR_Typerep_pack(sendbuf, sendcount, sendtype, 0, (char *) recvbuf + recvtype_true_lb,
62                           copy_sz, &actual_pack_bytes);
63         MPIR_ERR_CHKANDJUMP(actual_pack_bytes != copy_sz, mpi_errno, MPI_ERR_TYPE,
64                             "**dtypemismatch");
65     } else {
66         intptr_t sfirst;
67         intptr_t rfirst;
68 
69         MPIR_GPU_query_pointer_attr(sendbuf, &send_attr);
70         MPIR_GPU_query_pointer_attr(recvbuf, &recv_attr);
71 
72         if (send_attr.type == MPL_GPU_POINTER_DEV && recv_attr.type == MPL_GPU_POINTER_DEV) {
73             MPL_gpu_malloc((void **) &buf, COPY_BUFFER_SZ, recv_attr.device);
74         } else if (send_attr.type == MPL_GPU_POINTER_DEV || recv_attr.type == MPL_GPU_POINTER_DEV) {
75             MPL_gpu_malloc_host((void **) &buf, COPY_BUFFER_SZ);
76         } else {
77             MPIR_CHKLMEM_MALLOC(buf, char *, COPY_BUFFER_SZ, mpi_errno, "buf", MPL_MEM_BUFFER);
78         }
79 
80         sfirst = 0;
81         rfirst = 0;
82 
83         while (1) {
84             MPI_Aint max_pack_bytes;
85             if (copy_sz - sfirst > COPY_BUFFER_SZ) {
86                 max_pack_bytes = COPY_BUFFER_SZ;
87             } else {
88                 max_pack_bytes = copy_sz - sfirst;
89             }
90 
91             MPI_Aint actual_pack_bytes;
92             MPIR_Typerep_pack(sendbuf, sendcount, sendtype, sfirst, buf,
93                               max_pack_bytes, &actual_pack_bytes);
94             MPIR_Assert(actual_pack_bytes > 0);
95 
96             sfirst += actual_pack_bytes;
97 
98             MPI_Aint actual_unpack_bytes;
99             MPIR_Typerep_unpack(buf, actual_pack_bytes, recvbuf, recvcount, recvtype,
100                                 rfirst, &actual_unpack_bytes);
101             MPIR_Assert(actual_unpack_bytes > 0);
102 
103             rfirst += actual_unpack_bytes;
104 
105             /* everything that was packed from the source type must be
106              * unpacked; otherwise we will lose the remaining data in
107              * buf in the next iteration. */
108             MPIR_ERR_CHKANDJUMP(actual_pack_bytes != actual_unpack_bytes, mpi_errno,
109                                 MPI_ERR_TYPE, "**dtypemismatch");
110 
111             if (rfirst == copy_sz) {
112                 /* successful completion */
113                 break;
114             }
115         }
116 
117         if (send_attr.type == MPL_GPU_POINTER_DEV && recv_attr.type == MPL_GPU_POINTER_DEV) {
118             MPL_gpu_free(buf);
119         } else if (send_attr.type == MPL_GPU_POINTER_DEV || recv_attr.type == MPL_GPU_POINTER_DEV) {
120             MPL_gpu_free_host(buf);
121         }
122     }
123 
124   fn_exit:
125     MPIR_CHKLMEM_FREEALL();
126     MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_LOCALCOPY);
127     return mpi_errno;
128   fn_fail:
129     if (buf) {
130         if (send_attr.type == MPL_GPU_POINTER_DEV && recv_attr.type == MPL_GPU_POINTER_DEV) {
131             MPL_gpu_free(buf);
132         } else if (send_attr.type == MPL_GPU_POINTER_DEV || recv_attr.type == MPL_GPU_POINTER_DEV) {
133             MPL_gpu_free_host(buf);
134         }
135     }
136     goto fn_exit;
137 }
138