1 /*
2 * Copyright (C) by Argonne National Laboratory
3 * See COPYRIGHT in top-level directory
4 */
5
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <assert.h>
9 #include <strings.h>
10
11 #include <mpi.h>
12 #include "mcs_mutex.h"
13
14 /* TODO: Make these mutex operations no-ops for sequential runs */
15
16 /** Create an MCS mutex. Collective on comm.
17 *
18 * @param[out] comm communicator containing all processes that will use the
19 * mutex
20 * @param[out] tail_rank rank of the process in comm that holds the tail
21 * pointer
22 * @param[out] hdl handle to the mutex
23 * @return MPI status
24 */
MCS_Mutex_create(int tail_rank,MPI_Comm comm,MCS_Mutex * hdl_out)25 int MCS_Mutex_create(int tail_rank, MPI_Comm comm, MCS_Mutex * hdl_out)
26 {
27 int rank, nproc;
28 MCS_Mutex hdl;
29
30 hdl = malloc(sizeof(struct mcs_mutex_s));
31 assert(hdl != NULL);
32
33 MPI_Comm_dup(comm, &hdl->comm);
34
35 MPI_Comm_rank(hdl->comm, &rank);
36 MPI_Comm_size(hdl->comm, &nproc);
37
38 hdl->tail_rank = tail_rank;
39
40 #ifdef USE_WIN_SHARED
41 MPI_Win_allocate_shared(2 * sizeof(int), sizeof(int), MPI_INFO_NULL,
42 hdl->comm, &hdl->base, &hdl->window);
43 #else
44 #ifdef USE_WIN_ALLOC_SHM
45 MPI_Info_create(&hdl->win_info);
46 MPI_Info_set(hdl->win_info, "alloc_shm", "true");
47 #else
48 MPI_Info_create(&hdl->win_info);
49 MPI_Info_set(hdl->win_info, "alloc_shm", "false");
50 #endif
51 MPI_Win_allocate(2 * sizeof(int), sizeof(int), hdl->win_info, hdl->comm,
52 &hdl->base, &hdl->window);
53 #endif
54
55 MPI_Win_lock_all(0, hdl->window);
56
57 hdl->base[0] = MPI_PROC_NULL;
58 hdl->base[1] = MPI_PROC_NULL;
59
60 MPI_Win_sync(hdl->window);
61 MPI_Barrier(hdl->comm);
62
63 *hdl_out = hdl;
64 return MPI_SUCCESS;
65 }
66
67
68 /** Free an MCS mutex. Collective on ranks in the communicator used at the
69 * time of creation.
70 *
71 * @param[in] hdl handle to the group that will be freed
72 * @return MPI status
73 */
MCS_Mutex_free(MCS_Mutex * hdl_ptr)74 int MCS_Mutex_free(MCS_Mutex * hdl_ptr)
75 {
76 MCS_Mutex hdl = *hdl_ptr;
77
78 MPI_Win_unlock_all(hdl->window);
79
80 MPI_Win_free(&hdl->window);
81 MPI_Comm_free(&hdl->comm);
82 #ifndef USE_WIN_SHARED
83 MPI_Info_free(&hdl->win_info);
84 #endif
85
86 free(hdl);
87 hdl_ptr = NULL;
88
89 return MPI_SUCCESS;
90 }
91
92
93 /** Lock a mutex.
94 *
95 * @param[in] hdl Handle to the mutex
96 * @return MPI status
97 */
MCS_Mutex_lock(MCS_Mutex hdl)98 int MCS_Mutex_lock(MCS_Mutex hdl)
99 {
100 int rank, nproc;
101 int prev;
102
103 MPI_Comm_rank(hdl->comm, &rank);
104 MPI_Comm_size(hdl->comm, &nproc);
105
106 /* This store is safe, since it cannot happen concurrently with a remote
107 * write */
108 hdl->base[MCS_MTX_ELEM_DISP] = MPI_PROC_NULL;
109 MPI_Win_sync(hdl->window);
110
111 MPI_Fetch_and_op(&rank, &prev, MPI_INT, hdl->tail_rank, MCS_MTX_TAIL_DISP,
112 MPI_REPLACE, hdl->window);
113 MPI_Win_flush(hdl->tail_rank, hdl->window);
114
115 /* If there was a previous tail, update their next pointer and wait for
116 * notification. Otherwise, the mutex was successfully acquired. */
117 if (prev != MPI_PROC_NULL) {
118 /* Wait for notification */
119 MPI_Status status;
120
121 MPI_Accumulate(&rank, 1, MPI_INT, prev, MCS_MTX_ELEM_DISP, 1, MPI_INT, MPI_REPLACE,
122 hdl->window);
123 MPI_Win_flush(prev, hdl->window);
124
125 debug_print("%2d: LOCK - waiting for notification from %d\n", rank, prev);
126 MPI_Recv(NULL, 0, MPI_BYTE, prev, MCS_MUTEX_TAG, hdl->comm, &status);
127 }
128
129 debug_print("%2d: LOCK - lock acquired\n", rank);
130
131 return MPI_SUCCESS;
132 }
133
134
135 /** Attempt to acquire a mutex.
136 *
137 * @param[in] hdl Handle to the mutex
138 * @param[out] success Indicates whether the mutex was acquired
139 * @return MPI status
140 */
MCS_Mutex_trylock(MCS_Mutex hdl,int * success)141 int MCS_Mutex_trylock(MCS_Mutex hdl, int *success)
142 {
143 int rank, nproc;
144 int tail, nil = MPI_PROC_NULL;
145
146 MPI_Comm_rank(hdl->comm, &rank);
147 MPI_Comm_size(hdl->comm, &nproc);
148
149 /* This store is safe, since it cannot happen concurrently with a remote
150 * write */
151 hdl->base[MCS_MTX_ELEM_DISP] = MPI_PROC_NULL;
152 MPI_Win_sync(hdl->window);
153
154 /* Check if the lock is available and claim it if it is. */
155 MPI_Compare_and_swap(&rank, &nil, &tail, MPI_INT, hdl->tail_rank,
156 MCS_MTX_TAIL_DISP, hdl->window);
157 MPI_Win_flush(hdl->tail_rank, hdl->window);
158
159 /* If the old tail was MPI_PROC_NULL, we have claimed the mutex */
160 *success = (tail == nil);
161
162 debug_print("%2d: TRYLOCK - %s\n", rank, (*success) ? "Success" : "Non-success");
163
164 return MPI_SUCCESS;
165 }
166
167
168 /** Unlock a mutex.
169 *
170 * @param[in] hdl Handle to the mutex
171 * @return MPI status
172 */
MCS_Mutex_unlock(MCS_Mutex hdl)173 int MCS_Mutex_unlock(MCS_Mutex hdl)
174 {
175 int rank, nproc, next;
176
177 MPI_Comm_rank(hdl->comm, &rank);
178 MPI_Comm_size(hdl->comm, &nproc);
179
180 MPI_Win_sync(hdl->window);
181
182 /* Read my next pointer. FOP is used since another process may write to
183 * this location concurrent with this read. */
184 MPI_Fetch_and_op(NULL, &next, MPI_INT, rank, MCS_MTX_ELEM_DISP, MPI_NO_OP, hdl->window);
185 MPI_Win_flush(rank, hdl->window);
186
187 if (next == MPI_PROC_NULL) {
188 int tail;
189 int nil = MPI_PROC_NULL;
190
191 /* Check if we are the at the tail of the lock queue. If so, we're
192 * done. If not, we need to send notification. */
193 MPI_Compare_and_swap(&nil, &rank, &tail, MPI_INT, hdl->tail_rank,
194 MCS_MTX_TAIL_DISP, hdl->window);
195 MPI_Win_flush(hdl->tail_rank, hdl->window);
196
197 if (tail != rank) {
198 debug_print("%2d: UNLOCK - waiting for next pointer (tail = %d)\n", rank, tail);
199 assert(tail >= 0 && tail < nproc);
200
201 for (;;) {
202 int flag;
203
204 MPI_Fetch_and_op(NULL, &next, MPI_INT, rank, MCS_MTX_ELEM_DISP,
205 MPI_NO_OP, hdl->window);
206
207 MPI_Win_flush(rank, hdl->window);
208 if (next != MPI_PROC_NULL)
209 break;
210
211 MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE);
212 }
213 }
214 }
215
216 /* Notify the next waiting process */
217 if (next != MPI_PROC_NULL) {
218 debug_print("%2d: UNLOCK - notifying %d\n", rank, next);
219 MPI_Send(NULL, 0, MPI_BYTE, next, MCS_MUTEX_TAG, hdl->comm);
220 }
221
222 debug_print("%2d: UNLOCK - lock released\n", rank);
223
224 return MPI_SUCCESS;
225 }
226