1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
4  *                         University Research and Technology
5  *                         Corporation.  All rights reserved.
6  * Copyright (c) 2004-2016 The University of Tennessee and The University
7  *                         of Tennessee Research Foundation.  All rights
8  *                         reserved.
9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10  *                         University of Stuttgart.  All rights reserved.
11  * Copyright (c) 2004-2005 The Regents of the University of California.
12  *                         All rights reserved.
13  * Copyright (c) 2006-2017 Cisco Systems, Inc.  All rights reserved
14  * Copyright (c) 2009-2012 Oracle and/or its affiliates.  All rights reserved.
15  * Copyright (c) 2012      Oak Ridge National Labs.  All rights reserved.
16  * Copyright (c) 2015-2016 Los Alamos National Security, LLC.  All rights
17  *                         reserved.
18  * $COPYRIGHT$
19  *
20  * Additional copyrights may follow
21  *
22  * $HEADER$
23  */
24 /**
25  * @file
26  *
27  * Top-level description of requests
28  */
29 
30 #ifndef OMPI_REQUEST_H
31 #define OMPI_REQUEST_H
32 
33 #include "ompi_config.h"
34 #include "mpi.h"
35 #include "opal/class/opal_free_list.h"
36 #include "opal/class/opal_pointer_array.h"
37 #include "opal/threads/condition.h"
38 #include "opal/threads/wait_sync.h"
39 #include "ompi/constants.h"
40 
41 BEGIN_C_DECLS
42 
43 /**
44  * Request class
45  */
46 OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_request_t);
47 
48 /*
49  * The following include pulls in shared typedefs with debugger plugins.
50  * For more information on why we do this see the Notice to developers
51  * comment at the top of the ompi_msgq_dll.c file.
52  */
53 
54 #include "request_dbg.h"
55 
56 struct ompi_request_t;
57 
58 /**
59  * Initiate one or more persistent requests.
60  *
61  * This function is called by MPI_START and MPI_STARTALL.
62  *
63  * When called by MPI_START, count is 1.
64  *
65  * When called by MPI_STARTALL, multiple requests which have the same
66  * req_start value are passed. This may help scheduling optimization
67  * of multiple communications.
68  *
69  * @param count (IN)        Number of requests
70  * @param requests (IN/OUT) Array of persistent requests
71  * @return                  OMPI_SUCCESS or failure status.
72  */
73 typedef int (*ompi_request_start_fn_t)(
74     size_t count,
75     struct ompi_request_t ** requests
76 );
77 
78 /*
79  * Required function to free the request and any associated resources.
80  */
81 typedef int (*ompi_request_free_fn_t)(struct ompi_request_t** rptr);
82 
83 /*
84  * Optional function to cancel a pending request.
85  */
86 typedef int (*ompi_request_cancel_fn_t)(struct ompi_request_t* request, int flag);
87 
88 /*
89  * Optional function called when the request is completed from the MPI
90  * library perspective. This function is allowed to release the request if
91  * the request will not be used with ompi_request_wait* or ompi_request_test.
92  * If the function reposts (using start) a request or calls ompi_request_free()
93  * on the request it *MUST* return 1. It should return 0 otherwise.
94  */
95 typedef int (*ompi_request_complete_fn_t)(struct ompi_request_t* request);
96 
97 /**
98  * Forward declaration
99  */
100 struct ompi_communicator_t;
101 
102 /**
103  * Forward declaration
104  */
105 struct ompi_win_t;
106 
107 /**
108  * Forward declaration
109  */
110 struct ompi_file_t;
111 
112 /**
113  * Union for holding several different MPI pointer types on the request
114  */
115 typedef union ompi_mpi_object_t {
116     struct ompi_communicator_t *comm;
117     struct ompi_file_t *file;
118     struct ompi_win_t *win;
119 } ompi_mpi_object_t;
120 
121 /**
122  * Main top-level request struct definition
123  */
124 struct ompi_request_t {
125     opal_free_list_item_t super;                /**< Base type */
126     ompi_request_type_t req_type;               /**< Enum indicating the type of the request */
127     ompi_status_public_t req_status;            /**< Completion status */
128     volatile void *req_complete;                /**< Flag indicating wether request has completed */
129     volatile ompi_request_state_t req_state;    /**< enum indicate state of the request */
130     bool req_persistent;                        /**< flag indicating if the this is a persistent request */
131     int req_f_to_c_index;                       /**< Index in Fortran <-> C translation array */
132     ompi_request_start_fn_t req_start;          /**< Called by MPI_START and MPI_STARTALL */
133     ompi_request_free_fn_t req_free;            /**< Called by free */
134     ompi_request_cancel_fn_t req_cancel;        /**< Optional function to cancel the request */
135     ompi_request_complete_fn_t req_complete_cb; /**< Called when the request is MPI completed */
136     void *req_complete_cb_data;
137     ompi_mpi_object_t req_mpi_object;           /**< Pointer to MPI object that created this request */
138 };
139 
140 /**
141  * Convenience typedef
142  */
143 typedef struct ompi_request_t ompi_request_t;
144 
145 
146 /**
147  * Padded struct to maintain back compatibiltiy.
148  * See ompi/communicator/communicator.h comments with struct ompi_communicator_t
149  * for full explanation why we chose the following padding construct for predefines.
150  */
151 #define PREDEFINED_REQUEST_PAD 256
152 
153 struct ompi_predefined_request_t {
154     struct ompi_request_t request;
155     char padding[PREDEFINED_REQUEST_PAD - sizeof(ompi_request_t)];
156 };
157 
158 typedef struct ompi_predefined_request_t ompi_predefined_request_t;
159 
160 /**
161  * Initialize a request.  This is a macro to avoid function call
162  * overhead, since this is typically invoked in the critical
163  * performance path (since requests may be re-used, it is possible
164  * that we will have to initialize a request multiple times).
165  */
166 #define OMPI_REQUEST_INIT(request, persistent)                  \
167     do {                                                        \
168         (request)->req_complete =                               \
169             (persistent) ? REQUEST_COMPLETED : REQUEST_PENDING; \
170         (request)->req_state = OMPI_REQUEST_INACTIVE;           \
171         (request)->req_persistent = (persistent);               \
172         (request)->req_complete_cb  = NULL;                     \
173         (request)->req_complete_cb_data = NULL;                 \
174     } while (0);
175 
176 
177 #define REQUEST_COMPLETE(req)        (REQUEST_COMPLETED == (req)->req_complete)
178 /**
179  * Finalize a request.  This is a macro to avoid function call
180  * overhead, since this is typically invoked in the critical
181  * performance path (since requests may be re-used, it is possible
182  * that we will have to finalize a request multiple times).
183  *
184  * When finalizing a request, if MPI_Request_f2c() was previously
185  * invoked on that request, then this request was added to the f2c
186  * table, and we need to remove it
187  *
188  * This function should be called only from the MPI layer. It should
189  * never be called from the PML. It take care of the upper level clean-up.
190  * When the user call MPI_Request_free we should release all MPI level
191  * ressources, so we have to call this function too.
192  */
193 #define OMPI_REQUEST_FINI(request)                                      \
194 do {                                                                    \
195     (request)->req_state = OMPI_REQUEST_INVALID;                        \
196     if (MPI_UNDEFINED != (request)->req_f_to_c_index) {                 \
197         opal_pointer_array_set_item(&ompi_request_f_to_c_table,         \
198                                     (request)->req_f_to_c_index, NULL); \
199         (request)->req_f_to_c_index = MPI_UNDEFINED;                    \
200     }                                                                   \
201 } while (0);
202 
203 /**
204  * Non-blocking test for request completion.
205  *
206  * @param request (IN)   Array of requests
207  * @param complete (OUT) Flag indicating if index is valid (a request completed).
208  * @param status (OUT)   Status of completed request.
209  * @return               OMPI_SUCCESS or failure status.
210  *
211  * Note that upon completion, the request is freed, and the
212  * request handle at index set to NULL.
213  */
214 typedef int (*ompi_request_test_fn_t)(ompi_request_t ** rptr,
215                                       int *completed,
216                                       ompi_status_public_t * status );
217 /**
218  * Non-blocking test for request completion.
219  *
220  * @param count (IN)     Number of requests
221  * @param request (IN)   Array of requests
222  * @param index (OUT)    Index of first completed request.
223  * @param complete (OUT) Flag indicating if index is valid (a request completed).
224  * @param status (OUT)   Status of completed request.
225  * @return               OMPI_SUCCESS or failure status.
226  *
227  * Note that upon completion, the request is freed, and the
228  * request handle at index set to NULL.
229  */
230 typedef int (*ompi_request_test_any_fn_t)(size_t count,
231                                           ompi_request_t ** requests,
232                                           int *index,
233                                           int *completed,
234                                           ompi_status_public_t * status);
235 /**
236  * Non-blocking test for request completion.
237  *
238  * @param count (IN)      Number of requests
239  * @param requests (IN)   Array of requests
240  * @param completed (OUT) Flag indicating wether all requests completed.
241  * @param statuses (OUT)  Array of completion statuses.
242  * @return                OMPI_SUCCESS or failure status.
243  *
244  * This routine returns completed==true if all requests have completed.
245  * The statuses parameter is only updated if all requests completed. Likewise,
246  * the requests array is not modified (no requests freed), unless all requests
247  * have completed.
248  */
249 typedef int (*ompi_request_test_all_fn_t)(size_t count,
250                                           ompi_request_t ** requests,
251                                           int *completed,
252                                           ompi_status_public_t * statuses);
253 /**
254  * Non-blocking test for some of N requests to complete.
255  *
256  * @param count (IN)        Number of requests
257  * @param requests (INOUT)  Array of requests
258  * @param outcount (OUT)    Number of finished requests
259  * @param indices (OUT)     Indices of the finished requests
260  * @param statuses (OUT)    Array of completion statuses.
261  * @return                  OMPI_SUCCESS, OMPI_ERR_IN_STATUS or failure status.
262  *
263  */
264 typedef int (*ompi_request_test_some_fn_t)(size_t count,
265                                            ompi_request_t ** requests,
266                                            int * outcount,
267                                            int * indices,
268                                            ompi_status_public_t * statuses);
269 /**
270  * Wait (blocking-mode) for one requests to complete.
271  *
272  * @param request (IN)    Pointer to request.
273  * @param status (OUT)    Status of completed request.
274  * @return                OMPI_SUCCESS or failure status.
275  *
276  */
277 typedef int (*ompi_request_wait_fn_t)(ompi_request_t ** req_ptr,
278                                       ompi_status_public_t * status);
279 /**
280  * Wait (blocking-mode) for one of N requests to complete.
281  *
282  * @param count (IN)      Number of requests
283  * @param requests (IN)   Array of requests
284  * @param index (OUT)     Index into request array of completed request.
285  * @param status (OUT)    Status of completed request.
286  * @return                OMPI_SUCCESS or failure status.
287  *
288  */
289 typedef int (*ompi_request_wait_any_fn_t)(size_t count,
290                                           ompi_request_t ** requests,
291                                           int *index,
292                                           ompi_status_public_t * status);
293 /**
294  * Wait (blocking-mode) for all of N requests to complete.
295  *
296  * @param count (IN)      Number of requests
297  * @param requests (IN)   Array of requests
298  * @param statuses (OUT)  Array of completion statuses.
299  * @return                OMPI_SUCCESS or failure status.
300  *
301  */
302 typedef int (*ompi_request_wait_all_fn_t)(size_t count,
303                                           ompi_request_t ** requests,
304                                           ompi_status_public_t * statuses);
305 /**
306  * Wait (blocking-mode) for some of N requests to complete.
307  *
308  * @param count (IN)        Number of requests
309  * @param requests (INOUT)  Array of requests
310  * @param outcount (OUT)    Number of finished requests
311  * @param indices (OUT)     Indices of the finished requests
312  * @param statuses (OUT)    Array of completion statuses.
313  * @return                  OMPI_SUCCESS, OMPI_ERR_IN_STATUS or failure status.
314  *
315  */
316 typedef int (*ompi_request_wait_some_fn_t)(size_t count,
317                                            ompi_request_t ** requests,
318                                            int * outcount,
319                                            int * indices,
320                                            ompi_status_public_t * statuses);
321 
322 /**
323  * Replaceable request functions
324  */
325 typedef struct ompi_request_fns_t {
326     ompi_request_test_fn_t      req_test;
327     ompi_request_test_any_fn_t  req_test_any;
328     ompi_request_test_all_fn_t  req_test_all;
329     ompi_request_test_some_fn_t req_test_some;
330     ompi_request_wait_fn_t      req_wait;
331     ompi_request_wait_any_fn_t  req_wait_any;
332     ompi_request_wait_all_fn_t  req_wait_all;
333     ompi_request_wait_some_fn_t req_wait_some;
334 } ompi_request_fns_t;
335 
336 /**
337  * Globals used for tracking requests and request completion.
338  */
339 OMPI_DECLSPEC extern opal_pointer_array_t   ompi_request_f_to_c_table;
340 OMPI_DECLSPEC extern ompi_predefined_request_t        ompi_request_null;
341 OMPI_DECLSPEC extern ompi_predefined_request_t        *ompi_request_null_addr;
342 OMPI_DECLSPEC extern ompi_request_t         ompi_request_empty;
343 OMPI_DECLSPEC extern ompi_status_public_t   ompi_status_empty;
344 OMPI_DECLSPEC extern ompi_request_fns_t     ompi_request_functions;
345 
346 /**
347  * Initialize the MPI_Request subsystem; invoked during MPI_INIT.
348  */
349 int ompi_request_init(void);
350 
351 /**
352  * Free a persistent request to a MPI_PROC_NULL peer (there's no
353  * freelist to put it back to, so we have to actually OBJ_RELEASE it).
354  */
355 OMPI_DECLSPEC int ompi_request_persistent_proc_null_free(ompi_request_t **request);
356 
357 /**
358  * Shut down the MPI_Request subsystem; invoked during MPI_FINALIZE.
359  */
360 int ompi_request_finalize(void);
361 
362 /**
363  * Cancel a pending request.
364  */
ompi_request_cancel(ompi_request_t * request)365 static inline int ompi_request_cancel(ompi_request_t* request)
366 {
367     if (request->req_cancel != NULL) {
368         return request->req_cancel(request, true);
369     }
370     return OMPI_SUCCESS;
371 }
372 
373 /**
374  * Free a request.
375  *
376  * @param request (INOUT)   Pointer to request.
377  */
ompi_request_free(ompi_request_t ** request)378 static inline int ompi_request_free(ompi_request_t** request)
379 {
380     return (*request)->req_free(request);
381 }
382 
383 #define ompi_request_test       (ompi_request_functions.req_test)
384 #define ompi_request_test_any   (ompi_request_functions.req_test_any)
385 #define ompi_request_test_all   (ompi_request_functions.req_test_all)
386 #define ompi_request_test_some  (ompi_request_functions.req_test_some)
387 #define ompi_request_wait       (ompi_request_functions.req_wait)
388 #define ompi_request_wait_any   (ompi_request_functions.req_wait_any)
389 #define ompi_request_wait_all   (ompi_request_functions.req_wait_all)
390 #define ompi_request_wait_some  (ompi_request_functions.req_wait_some)
391 
392 /**
393  * Wait a particular request for completion
394  */
395 
ompi_request_wait_completion(ompi_request_t * req)396 static inline void ompi_request_wait_completion(ompi_request_t *req)
397 {
398     if (opal_using_threads () && !REQUEST_COMPLETE(req)) {
399         ompi_wait_sync_t sync;
400         WAIT_SYNC_INIT(&sync, 1);
401 
402         if (OPAL_ATOMIC_CMPSET_PTR(&req->req_complete, REQUEST_PENDING, &sync)) {
403             SYNC_WAIT(&sync);
404         } else {
405             /* completed before we had a chance to swap in the sync object */
406             WAIT_SYNC_SIGNALLED(&sync);
407         }
408 
409         assert(REQUEST_COMPLETE(req));
410         WAIT_SYNC_RELEASE(&sync);
411     } else {
412         while(!REQUEST_COMPLETE(req)) {
413             opal_progress();
414         }
415     }
416 }
417 
418 /**
419  *  Signal or mark a request as complete. If with_signal is true this will
420  *  wake any thread pending on the request. If with_signal is false, the
421  *  opposite will be true, the request will simply be marked as completed
422  *  and no effort will be made to correctly (atomically) handle the associated
423  *  synchronization primitive. This is a special case when the function
424  *  is called from the critical path for small messages, where we know
425  *  the current execution flow created the request, and no synchronized wait
426  *  has been set.
427  *  BEWARE: The error code should be set on the request prior to calling
428  *  this function, or the synchronization primitive might not be correctly
429  *  triggered.
430  */
ompi_request_complete(ompi_request_t * request,bool with_signal)431 static inline int ompi_request_complete(ompi_request_t* request, bool with_signal)
432 {
433     int rc = 0;
434 
435     if( NULL != request->req_complete_cb) {
436         rc = request->req_complete_cb( request );
437         request->req_complete_cb = NULL;
438     }
439 
440     if (0 == rc) {
441         if( OPAL_LIKELY(with_signal) ) {
442             if(!OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, REQUEST_COMPLETED)) {
443                 ompi_wait_sync_t *tmp_sync = (ompi_wait_sync_t *) OPAL_ATOMIC_SWAP_PTR(&request->req_complete,
444                                                                                        REQUEST_COMPLETED);
445                 /* In the case where another thread concurrently changed the request to REQUEST_PENDING */
446                 if( REQUEST_PENDING != tmp_sync )
447                     wait_sync_update(tmp_sync, 1, request->req_status.MPI_ERROR);
448             }
449         } else
450             request->req_complete = REQUEST_COMPLETED;
451     }
452 
453     return OMPI_SUCCESS;
454 }
455 
456 END_C_DECLS
457 
458 #endif
459