1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
4  *                         University Research and Technology
5  *                         Corporation.  All rights reserved.
6  * Copyright (c) 2004-2020 The University of Tennessee and The University
7  *                         of Tennessee Research Foundation.  All rights
8  *                         reserved.
9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10  *                         University of Stuttgart.  All rights reserved.
11  * Copyright (c) 2004-2005 The Regents of the University of California.
12  *                         All rights reserved.
13  * Copyright (c) 2006-2017 Cisco Systems, Inc.  All rights reserved
14  * Copyright (c) 2009-2012 Oracle and/or its affiliates.  All rights reserved.
15  * Copyright (c) 2012      Oak Ridge National Labs.  All rights reserved.
16  * Copyright (c) 2015-2017 Los Alamos National Security, LLC.  All rights
17  *                         reserved.
18  * Copyright (c) 2018      FUJITSU LIMITED.  All rights reserved.
19  * $COPYRIGHT$
20  *
21  * Additional copyrights may follow
22  *
23  * $HEADER$
24  */
25 /**
26  * @file
27  *
28  * Top-level description of requests
29  */
30 
31 #ifndef OMPI_REQUEST_H
32 #define OMPI_REQUEST_H
33 
34 #include "ompi_config.h"
35 #include "mpi.h"
36 #include "opal/class/opal_free_list.h"
37 #include "opal/class/opal_pointer_array.h"
38 #include "opal/threads/condition.h"
39 #include "opal/threads/wait_sync.h"
40 #include "ompi/constants.h"
41 
42 BEGIN_C_DECLS
43 
44 /**
45  * Request class
46  */
47 OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_request_t);
48 
49 /*
50  * The following include pulls in shared typedefs with debugger plugins.
51  * For more information on why we do this see the Notice to developers
52  * comment at the top of the ompi_msgq_dll.c file.
53  */
54 
55 #include "request_dbg.h"
56 
57 struct ompi_request_t;
58 
59 /**
60  * Initiate one or more persistent requests.
61  *
62  * This function is called by MPI_START and MPI_STARTALL.
63  *
64  * When called by MPI_START, count is 1.
65  *
66  * When called by MPI_STARTALL, multiple requests which have the same
67  * req_start value are passed. This may help scheduling optimization
68  * of multiple communications.
69  *
70  * @param count (IN)        Number of requests
71  * @param requests (IN/OUT) Array of persistent requests
72  * @return                  OMPI_SUCCESS or failure status.
73  */
74 typedef int (*ompi_request_start_fn_t)(
75     size_t count,
76     struct ompi_request_t ** requests
77 );
78 
79 /*
80  * Required function to free the request and any associated resources.
81  */
82 typedef int (*ompi_request_free_fn_t)(struct ompi_request_t** rptr);
83 
84 /*
85  * Optional function to cancel a pending request.
86  */
87 typedef int (*ompi_request_cancel_fn_t)(struct ompi_request_t* request, int flag);
88 
89 /*
90  * Optional function called when the request is completed from the MPI
91  * library perspective. This function is allowed to release the request if
92  * the request will not be used with ompi_request_wait* or ompi_request_test.
93  * If the function reposts (using start) a request or calls ompi_request_free()
94  * on the request it *MUST* return 1. It should return 0 otherwise.
95  */
96 typedef int (*ompi_request_complete_fn_t)(struct ompi_request_t* request);
97 
98 /**
99  * Forward declaration
100  */
101 struct ompi_communicator_t;
102 
103 /**
104  * Forward declaration
105  */
106 struct ompi_win_t;
107 
108 /**
109  * Forward declaration
110  */
111 struct ompi_file_t;
112 
113 /**
114  * Union for holding several different MPI pointer types on the request
115  */
116 typedef union ompi_mpi_object_t {
117     struct ompi_communicator_t *comm;
118     struct ompi_file_t *file;
119     struct ompi_win_t *win;
120 } ompi_mpi_object_t;
121 
122 /**
123  * Main top-level request struct definition
124  */
125 struct ompi_request_t {
126     opal_free_list_item_t super;                /**< Base type */
127     ompi_request_type_t req_type;               /**< Enum indicating the type of the request */
128     ompi_status_public_t req_status;            /**< Completion status */
129     volatile void *req_complete;                /**< Flag indicating wether request has completed */
130     volatile ompi_request_state_t req_state;    /**< enum indicate state of the request */
131     bool req_persistent;                        /**< flag indicating if the this is a persistent request */
132     int req_f_to_c_index;                       /**< Index in Fortran <-> C translation array */
133     ompi_request_start_fn_t req_start;          /**< Called by MPI_START and MPI_STARTALL */
134     ompi_request_free_fn_t req_free;            /**< Called by free */
135     ompi_request_cancel_fn_t req_cancel;        /**< Optional function to cancel the request */
136     ompi_request_complete_fn_t req_complete_cb; /**< Called when the request is MPI completed */
137     void *req_complete_cb_data;
138     ompi_mpi_object_t req_mpi_object;           /**< Pointer to MPI object that created this request */
139 };
140 
141 /**
142  * Convenience typedef
143  */
144 typedef struct ompi_request_t ompi_request_t;
145 
146 
147 /**
148  * Padded struct to maintain back compatibiltiy.
149  * See ompi/communicator/communicator.h comments with struct ompi_communicator_t
150  * for full explanation why we chose the following padding construct for predefines.
151  */
152 #define PREDEFINED_REQUEST_PAD 256
153 
154 struct ompi_predefined_request_t {
155     struct ompi_request_t request;
156     char padding[PREDEFINED_REQUEST_PAD - sizeof(ompi_request_t)];
157 };
158 
159 typedef struct ompi_predefined_request_t ompi_predefined_request_t;
160 
161 /**
162  * Initialize a request.  This is a macro to avoid function call
163  * overhead, since this is typically invoked in the critical
164  * performance path (since requests may be re-used, it is possible
165  * that we will have to initialize a request multiple times).
166  */
167 #define OMPI_REQUEST_INIT(request, persistent)                  \
168     do {                                                        \
169         (request)->req_complete =                               \
170             (persistent) ? REQUEST_COMPLETED : REQUEST_PENDING; \
171         (request)->req_state = OMPI_REQUEST_INACTIVE;           \
172         (request)->req_persistent = (persistent);               \
173         (request)->req_complete_cb  = NULL;                     \
174         (request)->req_complete_cb_data = NULL;                 \
175     } while (0);
176 
177 
178 #define REQUEST_COMPLETE(req)        (REQUEST_COMPLETED == (req)->req_complete)
179 /**
180  * Finalize a request.  This is a macro to avoid function call
181  * overhead, since this is typically invoked in the critical
182  * performance path (since requests may be re-used, it is possible
183  * that we will have to finalize a request multiple times).
184  *
185  * When finalizing a request, if MPI_Request_f2c() was previously
186  * invoked on that request, then this request was added to the f2c
187  * table, and we need to remove it
188  *
189  * This function should be called only from the MPI layer. It should
190  * never be called from the PML. It take care of the upper level clean-up.
191  * When the user call MPI_Request_free we should release all MPI level
192  * ressources, so we have to call this function too.
193  */
194 #define OMPI_REQUEST_FINI(request)                                      \
195 do {                                                                    \
196     (request)->req_state = OMPI_REQUEST_INVALID;                        \
197     if (MPI_UNDEFINED != (request)->req_f_to_c_index) {                 \
198         opal_pointer_array_set_item(&ompi_request_f_to_c_table,         \
199                                     (request)->req_f_to_c_index, NULL); \
200         (request)->req_f_to_c_index = MPI_UNDEFINED;                    \
201     }                                                                   \
202 } while (0);
203 
204 /**
205  * Non-blocking test for request completion.
206  *
207  * @param request (IN)   Array of requests
208  * @param complete (OUT) Flag indicating if index is valid (a request completed).
209  * @param status (OUT)   Status of completed request.
210  * @return               OMPI_SUCCESS or failure status.
211  *
212  * Note that upon completion, the request is freed, and the
213  * request handle at index set to NULL.
214  */
215 typedef int (*ompi_request_test_fn_t)(ompi_request_t ** rptr,
216                                       int *completed,
217                                       ompi_status_public_t * status );
218 /**
219  * Non-blocking test for request completion.
220  *
221  * @param count (IN)     Number of requests
222  * @param request (IN)   Array of requests
223  * @param index (OUT)    Index of first completed request.
224  * @param complete (OUT) Flag indicating if index is valid (a request completed).
225  * @param status (OUT)   Status of completed request.
226  * @return               OMPI_SUCCESS or failure status.
227  *
228  * Note that upon completion, the request is freed, and the
229  * request handle at index set to NULL.
230  */
231 typedef int (*ompi_request_test_any_fn_t)(size_t count,
232                                           ompi_request_t ** requests,
233                                           int *index,
234                                           int *completed,
235                                           ompi_status_public_t * status);
236 /**
237  * Non-blocking test for request completion.
238  *
239  * @param count (IN)      Number of requests
240  * @param requests (IN)   Array of requests
241  * @param completed (OUT) Flag indicating wether all requests completed.
242  * @param statuses (OUT)  Array of completion statuses.
243  * @return                OMPI_SUCCESS or failure status.
244  *
245  * This routine returns completed==true if all requests have completed.
246  * The statuses parameter is only updated if all requests completed. Likewise,
247  * the requests array is not modified (no requests freed), unless all requests
248  * have completed.
249  */
250 typedef int (*ompi_request_test_all_fn_t)(size_t count,
251                                           ompi_request_t ** requests,
252                                           int *completed,
253                                           ompi_status_public_t * statuses);
254 /**
255  * Non-blocking test for some of N requests to complete.
256  *
257  * @param count (IN)        Number of requests
258  * @param requests (INOUT)  Array of requests
259  * @param outcount (OUT)    Number of finished requests
260  * @param indices (OUT)     Indices of the finished requests
261  * @param statuses (OUT)    Array of completion statuses.
262  * @return                  OMPI_SUCCESS, OMPI_ERR_IN_STATUS or failure status.
263  *
264  */
265 typedef int (*ompi_request_test_some_fn_t)(size_t count,
266                                            ompi_request_t ** requests,
267                                            int * outcount,
268                                            int * indices,
269                                            ompi_status_public_t * statuses);
270 /**
271  * Wait (blocking-mode) for one requests to complete.
272  *
273  * @param request (IN)    Pointer to request.
274  * @param status (OUT)    Status of completed request.
275  * @return                OMPI_SUCCESS or failure status.
276  *
277  */
278 typedef int (*ompi_request_wait_fn_t)(ompi_request_t ** req_ptr,
279                                       ompi_status_public_t * status);
280 /**
281  * Wait (blocking-mode) for one of N requests to complete.
282  *
283  * @param count (IN)      Number of requests
284  * @param requests (IN)   Array of requests
285  * @param index (OUT)     Index into request array of completed request.
286  * @param status (OUT)    Status of completed request.
287  * @return                OMPI_SUCCESS or failure status.
288  *
289  */
290 typedef int (*ompi_request_wait_any_fn_t)(size_t count,
291                                           ompi_request_t ** requests,
292                                           int *index,
293                                           ompi_status_public_t * status);
294 /**
295  * Wait (blocking-mode) for all of N requests to complete.
296  *
297  * @param count (IN)      Number of requests
298  * @param requests (IN)   Array of requests
299  * @param statuses (OUT)  Array of completion statuses.
300  * @return                OMPI_SUCCESS or failure status.
301  *
302  */
303 typedef int (*ompi_request_wait_all_fn_t)(size_t count,
304                                           ompi_request_t ** requests,
305                                           ompi_status_public_t * statuses);
306 /**
307  * Wait (blocking-mode) for some of N requests to complete.
308  *
309  * @param count (IN)        Number of requests
310  * @param requests (INOUT)  Array of requests
311  * @param outcount (OUT)    Number of finished requests
312  * @param indices (OUT)     Indices of the finished requests
313  * @param statuses (OUT)    Array of completion statuses.
314  * @return                  OMPI_SUCCESS, OMPI_ERR_IN_STATUS or failure status.
315  *
316  */
317 typedef int (*ompi_request_wait_some_fn_t)(size_t count,
318                                            ompi_request_t ** requests,
319                                            int * outcount,
320                                            int * indices,
321                                            ompi_status_public_t * statuses);
322 
323 /**
324  * Replaceable request functions
325  */
326 typedef struct ompi_request_fns_t {
327     ompi_request_test_fn_t      req_test;
328     ompi_request_test_any_fn_t  req_test_any;
329     ompi_request_test_all_fn_t  req_test_all;
330     ompi_request_test_some_fn_t req_test_some;
331     ompi_request_wait_fn_t      req_wait;
332     ompi_request_wait_any_fn_t  req_wait_any;
333     ompi_request_wait_all_fn_t  req_wait_all;
334     ompi_request_wait_some_fn_t req_wait_some;
335 } ompi_request_fns_t;
336 
337 /**
338  * Globals used for tracking requests and request completion.
339  */
340 OMPI_DECLSPEC extern opal_pointer_array_t   ompi_request_f_to_c_table;
341 OMPI_DECLSPEC extern ompi_predefined_request_t        ompi_request_null;
342 OMPI_DECLSPEC extern ompi_predefined_request_t        *ompi_request_null_addr;
343 OMPI_DECLSPEC extern ompi_request_t         ompi_request_empty;
344 OMPI_DECLSPEC extern ompi_status_public_t   ompi_status_empty;
345 OMPI_DECLSPEC extern ompi_request_fns_t     ompi_request_functions;
346 
347 /**
348  * Initialize the MPI_Request subsystem; invoked during MPI_INIT.
349  */
350 int ompi_request_init(void);
351 
352 /**
353  * Shut down the MPI_Request subsystem; invoked during MPI_FINALIZE.
354  */
355 int ompi_request_finalize(void);
356 
357 /**
358  * Create a persistent request that does nothing (e.g., to MPI_PROC_NULL).
359  */
360 int ompi_request_persistent_noop_create(ompi_request_t **request);
361 
362 /**
363  * Cancel a pending request.
364  */
ompi_request_cancel(ompi_request_t * request)365 static inline int ompi_request_cancel(ompi_request_t* request)
366 {
367     if (request->req_cancel != NULL) {
368         return request->req_cancel(request, true);
369     }
370     return OMPI_SUCCESS;
371 }
372 
373 /**
374  * Free a request.
375  *
376  * @param request (INOUT)   Pointer to request.
377  */
ompi_request_free(ompi_request_t ** request)378 static inline int ompi_request_free(ompi_request_t** request)
379 {
380     return (*request)->req_free(request);
381 }
382 
383 #define ompi_request_test       (ompi_request_functions.req_test)
384 #define ompi_request_test_any   (ompi_request_functions.req_test_any)
385 #define ompi_request_test_all   (ompi_request_functions.req_test_all)
386 #define ompi_request_test_some  (ompi_request_functions.req_test_some)
387 #define ompi_request_wait       (ompi_request_functions.req_wait)
388 #define ompi_request_wait_any   (ompi_request_functions.req_wait_any)
389 #define ompi_request_wait_all   (ompi_request_functions.req_wait_all)
390 #define ompi_request_wait_some  (ompi_request_functions.req_wait_some)
391 
392 /**
393  * Wait a particular request for completion
394  */
395 
ompi_request_wait_completion(ompi_request_t * req)396 static inline void ompi_request_wait_completion(ompi_request_t *req)
397 {
398     if (opal_using_threads () && !REQUEST_COMPLETE(req)) {
399         void *_tmp_ptr = REQUEST_PENDING;
400         ompi_wait_sync_t sync;
401 
402         WAIT_SYNC_INIT(&sync, 1);
403 
404         if (OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&req->req_complete, &_tmp_ptr, &sync)) {
405             SYNC_WAIT(&sync);
406         } else {
407             /* completed before we had a chance to swap in the sync object */
408             WAIT_SYNC_SIGNALLED(&sync);
409         }
410 
411         assert(REQUEST_COMPLETE(req));
412         WAIT_SYNC_RELEASE(&sync);
413     } else {
414         while(!REQUEST_COMPLETE(req)) {
415             opal_progress();
416         }
417     }
418 }
419 
420 /**
421  *  Signal or mark a request as complete. If with_signal is true this will
422  *  wake any thread pending on the request. If with_signal is false, the
423  *  opposite will be true, the request will simply be marked as completed
424  *  and no effort will be made to correctly (atomically) handle the associated
425  *  synchronization primitive. This is a special case when the function
426  *  is called from the critical path for small messages, where we know
427  *  the current execution flow created the request, and no synchronized wait
428  *  has been set.
429  *  BEWARE: The error code should be set on the request prior to calling
430  *  this function, or the synchronization primitive might not be correctly
431  *  triggered.
432  */
ompi_request_complete(ompi_request_t * request,bool with_signal)433 static inline int ompi_request_complete(ompi_request_t* request, bool with_signal)
434 {
435     int rc = 0;
436 
437     if(NULL != request->req_complete_cb) {
438         /* Set the request cb to NULL to allow resetting in the callback */
439         ompi_request_complete_fn_t fct = request->req_complete_cb;
440         request->req_complete_cb = NULL;
441         rc = fct( request );
442     }
443 
444     if (0 == rc) {
445         if( OPAL_LIKELY(with_signal) ) {
446             void *_tmp_ptr = REQUEST_PENDING;
447 
448             if(!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_COMPLETED)) {
449                 ompi_wait_sync_t *tmp_sync = (ompi_wait_sync_t *) OPAL_ATOMIC_SWAP_PTR(&request->req_complete,
450                                                                                        REQUEST_COMPLETED);
451                 /* In the case where another thread concurrently changed the request to REQUEST_PENDING */
452                 if( REQUEST_PENDING != tmp_sync )
453                     wait_sync_update(tmp_sync, 1, request->req_status.MPI_ERROR);
454             }
455         } else
456             request->req_complete = REQUEST_COMPLETED;
457     }
458 
459     return OMPI_SUCCESS;
460 }
461 
ompi_request_set_callback(ompi_request_t * request,ompi_request_complete_fn_t cb,void * cb_data)462 static inline int ompi_request_set_callback(ompi_request_t* request,
463                                             ompi_request_complete_fn_t cb,
464                                             void* cb_data)
465 {
466     request->req_complete_cb_data = cb_data;
467     request->req_complete_cb = cb;
468     /* If request is completed and the callback is not called, need to call callback */
469     if ((NULL != request->req_complete_cb) && (request->req_complete == REQUEST_COMPLETED)) {
470         ompi_request_complete_fn_t fct = request->req_complete_cb;
471         request->req_complete_cb = NULL;
472         return fct( request );
473     }
474     return OMPI_SUCCESS;
475 }
476 
477 END_C_DECLS
478 
479 #endif
480