1 /*
2 * Copyright (C) by Argonne National Laboratory
3 * See COPYRIGHT in top-level directory
4 */
5
6 #include "mpidimpl.h"
7 #include "ch4r_init.h"
8
MPIDIG_init_comm(MPIR_Comm * comm)9 int MPIDIG_init_comm(MPIR_Comm * comm)
10 {
11 int mpi_errno = MPI_SUCCESS, comm_idx, subcomm_type, is_localcomm;
12 MPIDIG_rreq_t **uelist;
13
14 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_INIT_COMM);
15 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_INIT_COMM);
16
17 MPIR_Assert(MPIDI_global.is_ch4u_initialized);
18
19 if (MPIR_CONTEXT_READ_FIELD(DYNAMIC_PROC, comm->recvcontext_id))
20 goto fn_exit;
21
22 comm_idx = MPIDIG_get_context_index(comm->recvcontext_id);
23 subcomm_type = MPIR_CONTEXT_READ_FIELD(SUBCOMM, comm->recvcontext_id);
24 is_localcomm = MPIR_CONTEXT_READ_FIELD(IS_LOCALCOMM, comm->recvcontext_id);
25
26 MPIR_Assert(subcomm_type <= 3);
27 MPIR_Assert(is_localcomm <= 1);
28
29 /* There is a potential race between this code (likely called by a user/main thread)
30 * and an MPIDIG callback handler (called by a progress thread, when async progress
31 * is turned on).
32 * Thus we take a lock here to make sure the following operations are atomically done.
33 * (transferring unexpected messages from a global queue to the newly created communicator) */
34 MPID_THREAD_CS_ENTER(VCI, MPIDIU_THREAD_MPIDIG_GLOBAL_MUTEX);
35 MPIDI_global.comm_req_lists[comm_idx].comm[is_localcomm][subcomm_type] = comm;
36 MPIDIG_COMM(comm, posted_list) = NULL;
37 MPIDIG_COMM(comm, unexp_list) = NULL;
38
39 uelist = MPIDIG_context_id_to_uelist(comm->context_id);
40 if (*uelist) {
41 MPIDIG_rreq_t *curr, *tmp;
42 DL_FOREACH_SAFE(*uelist, curr, tmp) {
43 DL_DELETE(*uelist, curr);
44 MPIR_Comm_add_ref(comm); /* +1 for each entry in unexp_list */
45 DL_APPEND(MPIDIG_COMM(comm, unexp_list), curr);
46 }
47 *uelist = NULL;
48 }
49 MPID_THREAD_CS_EXIT(VCI, MPIDIU_THREAD_MPIDIG_GLOBAL_MUTEX);
50
51 MPIDIG_COMM(comm, window_instance) = 0;
52 fn_exit:
53 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_INIT_COMM);
54 return mpi_errno;
55 }
56
MPIDIG_destroy_comm(MPIR_Comm * comm)57 int MPIDIG_destroy_comm(MPIR_Comm * comm)
58 {
59 int mpi_errno = MPI_SUCCESS, comm_idx, subcomm_type, is_localcomm;
60 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_DESTROY_COMM);
61 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_DESTROY_COMM);
62
63 if (MPIR_CONTEXT_READ_FIELD(DYNAMIC_PROC, comm->recvcontext_id))
64 goto fn_exit;
65 comm_idx = MPIDIG_get_context_index(comm->recvcontext_id);
66 subcomm_type = MPIR_CONTEXT_READ_FIELD(SUBCOMM, comm->recvcontext_id);
67 is_localcomm = MPIR_CONTEXT_READ_FIELD(IS_LOCALCOMM, comm->recvcontext_id);
68
69 MPIR_Assert(subcomm_type <= 3);
70 MPIR_Assert(is_localcomm <= 1);
71
72 MPID_THREAD_CS_ENTER(VCI, MPIDIU_THREAD_MPIDIG_GLOBAL_MUTEX);
73 MPIR_Assert(MPIDI_global.comm_req_lists[comm_idx].comm[is_localcomm][subcomm_type] != NULL);
74
75 if (MPIDI_global.comm_req_lists[comm_idx].comm[is_localcomm][subcomm_type]) {
76 MPIR_Assert(MPIDIG_COMM
77 (MPIDI_global.comm_req_lists[comm_idx].comm[is_localcomm][subcomm_type],
78 posted_list) == NULL);
79 MPIR_Assert(MPIDIG_COMM
80 (MPIDI_global.comm_req_lists[comm_idx].comm[is_localcomm][subcomm_type],
81 unexp_list) == NULL);
82 }
83 MPIDI_global.comm_req_lists[comm_idx].comm[is_localcomm][subcomm_type] = NULL;
84 MPID_THREAD_CS_EXIT(VCI, MPIDIU_THREAD_MPIDIG_GLOBAL_MUTEX);
85
86 fn_exit:
87 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_DESTROY_COMM);
88 return mpi_errno;
89 }
90
91 /* Linked list internally used to keep track of
92 * allocated memory for which memory binding is
93 * requested by the user. */
94 typedef struct mem_node {
95 void *ptr;
96 size_t size;
97 struct mem_node *next;
98 } mem_node_t;
99
100 static mem_node_t *mem_list_head = NULL;
101 static mem_node_t *mem_list_tail = NULL;
102
MPIDIG_mpi_alloc_mem(size_t size,MPIR_Info * info_ptr)103 void *MPIDIG_mpi_alloc_mem(size_t size, MPIR_Info * info_ptr)
104 {
105 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_ALLOC_MEM);
106 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_ALLOC_MEM);
107 void *p;
108 MPIR_hwtopo_type_e mem_type = MPIR_HWTOPO_TYPE__DDR;
109 MPIR_hwtopo_gid_t mem_gid = MPIR_HWTOPO_GID_ROOT;
110 int flag = 0;
111 char hint_str[MPI_MAX_INFO_VAL + 1];
112
113 /* retrieve requested memory type for allocation */
114 if (info_ptr) {
115 MPIR_Info_get_impl(info_ptr, "bind_memory", MPI_MAX_INFO_VAL, hint_str, &flag);
116 }
117
118 if (flag) {
119 if (!strcmp(hint_str, "ddr"))
120 mem_type = MPIR_HWTOPO_TYPE__DDR;
121 else if (!strcmp(hint_str, "hbm")) {
122 mem_type = MPIR_HWTOPO_TYPE__HBM;
123 } else {
124 mem_type = MPIR_HWTOPO_TYPE__DDR;
125 }
126 mem_gid = MPIR_hwtopo_get_obj_by_type(mem_type);
127 }
128
129 if (mem_gid != MPIR_HWTOPO_GID_ROOT) {
130 /* requested memory type is available in the system and process is bound
131 * to the corresponding device; allocate memory and bind it to device. */
132 p = MPL_mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0,
133 MPL_MEM_USER);
134 MPIR_hwtopo_mem_bind(p, size, mem_gid);
135
136 /* keep track of bound memory for freeing it later */
137 mem_node_t *el = MPL_malloc(sizeof(*el), MPL_MEM_OTHER);
138 el->ptr = p;
139 el->size = size;
140 LL_APPEND(mem_list_head, mem_list_tail, el);
141 } else if (mem_type != MPIR_HWTOPO_TYPE__DDR) {
142 /* if mem_gid = MPIR_HWTOPO_GID_ROOT and mem_type is non-default (DDR)
143 * it can mean either that the requested memory type is not available
144 * in the system or the requested memory type is available but there
145 * are many devices of such type and the process requesting memory is
146 * not bound to any of them. Regardless the reason we do not fall back
147 * to the default allocation and return a NULL pointer to the upper layer
148 * instead. */
149 p = NULL;
150 } else {
151 /* if mem_gid = MPIR_HWTOPO_GID_ROOT and mem_type is default (DDR) it
152 * means that we cannot bind memory to a single device explicitly. In
153 * this case we still allocate memory and leave the binding to the OS
154 * (first touch policy in Linux). */
155 p = MPL_malloc(size, MPL_MEM_USER);
156 }
157 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_ALLOC_MEM);
158 return p;
159 }
160
MPIDIG_mpi_free_mem(void * ptr)161 int MPIDIG_mpi_free_mem(void *ptr)
162 {
163 int mpi_errno = MPI_SUCCESS;
164 MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDIG_MPI_FREE_MEM);
165 MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDIG_MPI_FREE_MEM);
166 mem_node_t *el = NULL;
167
168 /* scan memory list for allocations */
169 LL_FOREACH(mem_list_head, el) {
170 if (el->ptr == ptr) {
171 LL_DELETE(mem_list_head, mem_list_tail, el);
172 break;
173 }
174 }
175
176 if (el) {
177 MPL_munmap(el->ptr, el->size, MPL_MEM_USER);
178 MPL_free(el);
179 } else {
180 MPL_free(ptr);
181 }
182 MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDIG_MPI_FREE_MEM);
183 return mpi_errno;
184 }
185