1 /* 2 * Copyright (C) by Argonne National Laboratory 3 * See COPYRIGHT in top-level directory 4 */ 5 6 /* 7 * WARNING: Functions and macros in this file are for internal use only. 8 * As such, they are only visible to the device and 9 * channel. Do not include them in the MPID macros. 10 */ 11 12 #ifndef MPIDIMPL_H_INCLUDED 13 #define MPIDIMPL_H_INCLUDED 14 15 #include "mpichconf.h" 16 17 #if defined(HAVE_ASSERT_H) 18 #include <assert.h> 19 #endif 20 21 #include "mpiimpl.h" 22 23 /* Add the ch3 packet definitions */ 24 #include "mpidpkt.h" 25 26 #if !defined(MPIDI_IOV_DENSITY_MIN) 27 # define MPIDI_IOV_DENSITY_MIN (16 * 1024) 28 #endif 29 30 #if defined(HAVE_GETHOSTNAME) && defined(NEEDS_GETHOSTNAME_DECL) && \ 31 !defined(gethostname) 32 int gethostname(char *name, size_t len); 33 # endif 34 35 /* Default PMI version to use */ 36 #define MPIDI_CH3I_DEFAULT_PMI_VERSION 1 37 #define MPIDI_CH3I_DEFAULT_PMI_SUBVERSION 1 38 39 /* group of processes detected to have failed. This is a subset of 40 comm_world group. */ 41 extern MPIR_Group *MPIDI_Failed_procs_group; 42 extern int MPIDI_last_known_failed; 43 extern char *MPIDI_failed_procs_string; 44 45 #if defined(MPL_USE_DBG_LOGGING) 46 extern MPL_dbg_class MPIDI_CH3_DBG_CONNECT; 47 extern MPL_dbg_class MPIDI_CH3_DBG_DISCONNECT; 48 extern MPL_dbg_class MPIDI_CH3_DBG_PROGRESS; 49 extern MPL_dbg_class MPIDI_CH3_DBG_CHANNEL; 50 extern MPL_dbg_class MPIDI_CH3_DBG_OTHER; 51 extern MPL_dbg_class MPIDI_CH3_DBG_MSG; 52 extern MPL_dbg_class MPIDI_CH3_DBG_VC; 53 extern MPL_dbg_class MPIDI_CH3_DBG_REFCOUNT; 54 #endif /* MPL_USE_DBG_LOGGING */ 55 56 #define MPIDI_CHANGE_VC_STATE(vc, new_state) do { \ 57 MPL_DBG_VCSTATECHANGE(vc, VC_STATE_##new_state); \ 58 (vc)->state = MPIDI_VC_STATE_##new_state; \ 59 } while (0) 60 61 /*S 62 MPIDI_PG_t - Process group description 63 64 Notes: 65 Every 'MPI_COMM_WORLD' known to this process has an associated process 66 group. 67 S*/ 68 typedef struct MPIDI_PG 69 { 70 /* MPIU_Object field. MPIDI_PG_t objects are not allocated using the 71 MPIU_Object system, but we do use the associated reference counting 72 routines. Therefore, handle must be present, but is not used 73 except by debugging routines */ 74 MPIR_OBJECT_HEADER; /* adds handle and ref_count fields */ 75 76 /* Next pointer used to maintain a list of all process groups known to 77 this process */ 78 struct MPIDI_PG * next; 79 80 /* Number of processes in the process group */ 81 int size; 82 83 /* VC table. At present this is a pointer to an array of VC structures. 84 Someday we may want make this a pointer to an array 85 of VC references. Thus, it is important to use MPIDI_PG_Get_vc() 86 instead of directly referencing this field. */ 87 struct MPIDI_VC * vct; 88 89 /* Pointer to the process group ID. The actual ID is defined and 90 allocated by the process group. The pointer is kept in the 91 device space because it is necessary for the device to be able to 92 find a particular process group. */ 93 void * id; 94 95 /* Flag to mark a procress group which is finalizing. This means thay 96 the VCs for this process group are closing, (normally becuase 97 MPI_Finalize was called). This is required to avoid a reconnection 98 of the VCs when the PG is closed due to unused elements in the event 99 queue */ 100 int finalize; 101 102 /* Replacement abstraction for connection information */ 103 /* Connection information needed to access processes in this process 104 group and to share the data with other processes. The items are 105 connData - pointer for data used to implement these functions 106 (e.g., a pointer to an array of process group info) 107 getConnInfo( rank, buf, bufsize, self ) - function to store into 108 buf the connection information for rank in this process 109 group 110 connInfoToString( buf_p, size, self ) - return in buf_p a string 111 that can be sent to another process to recreate the 112 connection information (the info needed to support 113 getConnInfo) 114 connInfoFromString( buf, self ) - setup the information needed 115 to implement getConnInfo 116 freeConnInfo( self ) - free any storage or resources associated 117 with the connection information. 118 119 See ch3/src/mpidi_pg.c 120 */ 121 void *connData; 122 int (*getConnInfo)( int, char *, int, struct MPIDI_PG * ); 123 int (*connInfoToString)( char **, int *, struct MPIDI_PG * ); 124 int (*connInfoFromString)( const char *, struct MPIDI_PG * ); 125 int (*freeConnInfo)( struct MPIDI_PG * ); 126 127 /* Rather than have each channel define its own fields for the 128 channel-specific data, we provide a fixed-sized scratchpad. Currently, 129 this has a very generous size, though this may shrink later (a channel 130 can always allocate storage and hang it off of the end). This 131 is necessary to allow dynamic loading of channels at MPI_Init time. */ 132 #define MPIDI_CH3_PG_SIZE 48 133 int32_t channel_private[MPIDI_CH3_PG_SIZE]; 134 #if defined(MPIDI_CH3_PG_DECL) 135 MPIDI_CH3_PG_DECL 136 #endif 137 } 138 MPIDI_PG_t; 139 140 141 /*S 142 MPIDI_Process_t - The information required about this process by the CH3 143 device. 144 145 S*/ 146 typedef struct MPIDI_Process 147 { 148 MPIDI_PG_t * my_pg; 149 int my_pg_rank; 150 } 151 MPIDI_Process_t; 152 153 extern MPIDI_Process_t MPIDI_Process; 154 155 /*---------------------- 156 BEGIN DATATYPE SECTION 157 ----------------------*/ 158 /* FIXME: We want to avoid even storing information about the builtins 159 if we can */ 160 #define MPIDI_Datatype_get_info(count_, datatype_, dt_contig_out_, data_sz_out_, dt_ptr_, dt_true_lb_)\ 161 { \ 162 if (HANDLE_IS_BUILTIN(datatype_)) \ 163 { \ 164 (dt_ptr_) = NULL; \ 165 (dt_contig_out_) = TRUE; \ 166 (dt_true_lb_) = 0; \ 167 (data_sz_out_) = (intptr_t) (count_) * MPIR_Datatype_get_basic_size(datatype_); \ 168 MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER, TERSE, (MPL_DBG_FDEST,"basic datatype: dt_contig=%d, dt_sz=%d, data_sz=%" PRIdPTR, \ 169 (dt_contig_out_), MPIR_Datatype_get_basic_size(datatype_), (data_sz_out_)));\ 170 } \ 171 else \ 172 { \ 173 MPIR_Datatype_get_ptr((datatype_), (dt_ptr_)); \ 174 MPIR_Datatype_is_contig((datatype_), (&dt_contig_out_)); \ 175 (data_sz_out_) = (intptr_t) (count_) * (dt_ptr_)->size; \ 176 (dt_true_lb_) = (dt_ptr_)->true_lb; \ 177 MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER, TERSE, (MPL_DBG_FDEST, "user defined datatype: dt_contig=%d, dt_sz=" MPI_AINT_FMT_DEC_SPEC ", data_sz=%" PRIdPTR, \ 178 (dt_contig_out_), (dt_ptr_)->size, (data_sz_out_)));\ 179 } \ 180 } 181 /*-------------------- 182 END DATATYPE SECTION 183 --------------------*/ 184 185 186 /*--------------------- 187 BEGIN REQUEST SECTION 188 ---------------------*/ 189 190 /* 191 * MPIR_Requests 192 * 193 * MPI Requests are handles to MPIR_Request structures. These are used 194 * for most communication operations to provide a uniform way in which to 195 * define pending operations. As such, they contain many fields that are 196 * only used by some operations (logically, an MPIR_Request is a union type). 197 * 198 * There are several kinds of requests. They are 199 * Send, Receive, RMA, User, Persistent 200 * In addition, send and RMA requests may be "incomplete"; this means that 201 * they have not sent their initial packet, and they may store additional 202 * data about the operation that will be used when the initial packet 203 * can be sent. 204 * 205 * Also, requests that are used internally within blocking MPI routines 206 * (only Send and Receive requests) do not require references to 207 * (or increments of the reference counts) communicators or datatypes. 208 * Thus, freeing these requests also does not require testing or 209 * decrementing these fields. 210 * 211 * Finally, we want to avoid multiple tests for a failure to allocate 212 * a request. Thus, the request allocation macros will jump to fn_fail 213 * if there is an error. This is akin to using a "throw" in C++. 214 * 215 * For example, a posted (unmatched) receive queue entry needs only: 216 * match info 217 * buffer info (address, count, datatype) 218 * if nonblocking, communicator (used for finding error handler) 219 * if nonblocking, cancelled state 220 * Once matched, a receive queue entry also needs 221 * actual match info 222 * message type (eager, rndv, eager-sync) 223 * completion state (is all data available) 224 * If destination datatype is non-contiguous, it also needs 225 * current unpack state. 226 * An unexpected message (in the unexpected receive queue) needs only: 227 * match info 228 * message type (eager, rndv, eager-sync) 229 * if (eager, eager-sync), data 230 * completion state (is all data available?) 231 * A send request requires only 232 * message type (eager, rndv, eager-sync) 233 * completion state (has all data been sent?) 234 * canceled state 235 * if nonblocking, communicator (used for finding error handler) 236 * if the initial envelope is still pending (e.g., could not write yet) 237 * match info 238 * if the data is still pending (rndv or would not send eager) 239 * buffer info (address, count, datatype) 240 * RMA requests require (what)? 241 * User (generalized) requests require 242 * function pointers for operations 243 * completion state 244 * cancelled state 245 */ 246 247 /* If the channel doesn't initialize anything in the request, 248 provide a dummy */ 249 #ifndef MPIDI_CH3_REQUEST_INIT 250 #define MPIDI_CH3_REQUEST_INIT(a_) 251 #endif 252 253 /* FIXME: Why does a send request need the match information? 254 Is that for debugging information? In case the initial envelope 255 cannot be sent? Ditto for the dev.user_buf, count, and datatype 256 fields when the data is sent eagerly. 257 258 The following fields needed to be set: 259 datatype_ptr 260 status.MPI_ERROR 261 262 Note that this macro requires that rank, tag, context_offset, 263 comm, buf, datatype, and count all be available with those names 264 (they are not arguments to the routine) 265 */ 266 #define MPIDI_Request_create_sreq(sreq_, mpi_errno_, FAIL_) \ 267 { \ 268 (sreq_) = MPIR_Request_create(MPIR_REQUEST_KIND__SEND); \ 269 MPIR_Object_set_ref((sreq_), 2); \ 270 (sreq_)->comm = comm; \ 271 (sreq_)->dev.partner_request = NULL; \ 272 MPIR_Comm_add_ref(comm); \ 273 (sreq_)->dev.match.parts.rank = rank; \ 274 (sreq_)->dev.match.parts.tag = tag; \ 275 (sreq_)->dev.match.parts.context_id = comm->context_id + context_offset; \ 276 (sreq_)->dev.user_buf = (void *) buf; \ 277 (sreq_)->dev.user_count = count; \ 278 (sreq_)->dev.datatype = datatype; \ 279 (sreq_)->dev.iov_count = 0; \ 280 } 281 282 /* This is the receive request version of MPIDI_Request_create_sreq */ 283 #define MPIDI_Request_create_rreq(rreq_, mpi_errno_, FAIL_) \ 284 { \ 285 (rreq_) = MPIR_Request_create(MPIR_REQUEST_KIND__RECV); \ 286 MPIR_Object_set_ref((rreq_), 2); \ 287 (rreq_)->dev.partner_request = NULL; \ 288 } 289 290 #define MPIDI_REQUEST_MSG_MASK (0x3 << MPIDI_REQUEST_MSG_SHIFT) 291 #define MPIDI_REQUEST_MSG_SHIFT 0 292 #define MPIDI_REQUEST_NO_MSG 0 293 #define MPIDI_REQUEST_EAGER_MSG 1 294 #define MPIDI_REQUEST_RNDV_MSG 2 295 #define MPIDI_REQUEST_SELF_MSG 3 296 297 #define MPIDI_Request_get_msg_type(req_) \ 298 (((req_)->dev.state & MPIDI_REQUEST_MSG_MASK) >> MPIDI_REQUEST_MSG_SHIFT) 299 300 #define MPIDI_Request_set_msg_type(req_, msgtype_) \ 301 { \ 302 (req_)->dev.state &= ~MPIDI_REQUEST_MSG_MASK; \ 303 (req_)->dev.state |= ((msgtype_) << MPIDI_REQUEST_MSG_SHIFT) & MPIDI_REQUEST_MSG_MASK;\ 304 } 305 306 #define MPIDI_REQUEST_SRBUF_MASK (0x1 << MPIDI_REQUEST_SRBUF_SHIFT) 307 #define MPIDI_REQUEST_SRBUF_SHIFT 2 308 309 #define MPIDI_Request_get_srbuf_flag(req_) \ 310 (((req_)->dev.state & MPIDI_REQUEST_SRBUF_MASK) >> MPIDI_REQUEST_SRBUF_SHIFT) 311 312 #define MPIDI_Request_set_srbuf_flag(req_, flag_) \ 313 { \ 314 (req_)->dev.state &= ~MPIDI_REQUEST_SRBUF_MASK; \ 315 (req_)->dev.state |= ((flag_) << MPIDI_REQUEST_SRBUF_SHIFT) & MPIDI_REQUEST_SRBUF_MASK; \ 316 } 317 318 #define MPIDI_REQUEST_SYNC_SEND_MASK (0x1 << MPIDI_REQUEST_SYNC_SEND_SHIFT) 319 #define MPIDI_REQUEST_SYNC_SEND_SHIFT 3 320 321 #define MPIDI_Request_get_sync_send_flag(req_) \ 322 (((req_)->dev.state & MPIDI_REQUEST_SYNC_SEND_MASK) >> MPIDI_REQUEST_SYNC_SEND_SHIFT) 323 324 #define MPIDI_Request_set_sync_send_flag(req_, flag_) \ 325 { \ 326 (req_)->dev.state &= ~MPIDI_REQUEST_SYNC_SEND_MASK; \ 327 (req_)->dev.state |= ((flag_) << MPIDI_REQUEST_SYNC_SEND_SHIFT) & MPIDI_REQUEST_SYNC_SEND_MASK;\ 328 } 329 330 #define MPIDI_REQUEST_TYPE_MASK (0xF << MPIDI_REQUEST_TYPE_SHIFT) 331 #define MPIDI_REQUEST_TYPE_SHIFT 4 332 #define MPIDI_REQUEST_TYPE_RECV 0 333 #define MPIDI_REQUEST_TYPE_SEND 1 334 #define MPIDI_REQUEST_TYPE_RSEND 2 335 #define MPIDI_REQUEST_TYPE_SSEND 3 336 /* We need a BSEND type for persistent bsends (see mpid_startall.c) */ 337 #define MPIDI_REQUEST_TYPE_BSEND 4 338 #define MPIDI_REQUEST_TYPE_PUT_RECV 5 /* target is receiving PUT data */ 339 #define MPIDI_REQUEST_TYPE_GET_RESP 6 /* target is sending GET response data */ 340 #define MPIDI_REQUEST_TYPE_ACCUM_RECV 7 /* target is receiving ACC data */ 341 #define MPIDI_REQUEST_TYPE_PUT_RECV_DERIVED_DT 8 /* target is receiving derived DT info for PUT data */ 342 #define MPIDI_REQUEST_TYPE_GET_RECV_DERIVED_DT 9 /* target is receiving derived DT info for GET data */ 343 #define MPIDI_REQUEST_TYPE_ACCUM_RECV_DERIVED_DT 10 /* target is receiving derived DT info for ACC data */ 344 #define MPIDI_REQUEST_TYPE_GET_ACCUM_RECV 11 /* target is receiving GACC data */ 345 #define MPIDI_REQUEST_TYPE_GET_ACCUM_RECV_DERIVED_DT 12 /* target is receiving derived DT info for GACC data */ 346 #define MPIDI_REQUEST_TYPE_GET_ACCUM_RESP 13 /* target is sending GACC response data */ 347 #define MPIDI_REQUEST_TYPE_FOP_RECV 14 /* target is receiving FOP data */ 348 #define MPIDI_REQUEST_TYPE_FOP_RESP 15 /* target is sending FOP response data */ 349 350 351 #define MPIDI_Request_get_type(req_) \ 352 (((req_)->dev.state & MPIDI_REQUEST_TYPE_MASK) >> MPIDI_REQUEST_TYPE_SHIFT) 353 354 #define MPIDI_Request_set_type(req_, type_) \ 355 { \ 356 (req_)->dev.state &= ~MPIDI_REQUEST_TYPE_MASK; \ 357 (req_)->dev.state |= ((type_) << MPIDI_REQUEST_TYPE_SHIFT) & MPIDI_REQUEST_TYPE_MASK;\ 358 } 359 360 /* NOTE: Request updates may require atomic ops (critical sections) if 361 a fine-grain thread-sync model is used. */ 362 #define MPIDI_Request_cancel_pending(req_, flag_) \ 363 { \ 364 *(flag_) = (req_)->dev.cancel_pending; \ 365 (req_)->dev.cancel_pending = TRUE; \ 366 } 367 368 /* the following two macros were formerly a single confusing macro with side 369 effects named MPIDI_Request_recv_pending() */ 370 #define MPIDI_Request_check_pending(req_, recv_pending_) \ 371 do { \ 372 *(recv_pending_) = (req_)->dev.recv_pending_count; \ 373 } while (0) 374 375 #define MPIDI_Request_decr_pending(req_) \ 376 do { \ 377 --(req_)->dev.recv_pending_count; \ 378 } while (0) 379 380 /* MPIDI_Request_fetch_and_clear_rts_sreq() - atomically fetch current 381 partner RTS sreq and nullify partner request */ 382 #define MPIDI_Request_fetch_and_clear_rts_sreq(sreq_, rts_sreq_) \ 383 { \ 384 *(rts_sreq_) = (sreq_)->dev.partner_request; \ 385 (sreq_)->dev.partner_request = NULL; \ 386 } 387 388 /* FIXME: We've moved to allow finer-grain critical sections... */ 389 /* Note: In the current implementation, the mpid_xsend.c routines that 390 make use of MPIDI_VC_FAI_send_seqnum are all protected by the 391 SINGLE_CS_ENTER/EXIT macros, so all uses of this macro are 392 alreay within a critical section when needed. If/when we move to 393 a finer-grain model, we'll need to examine whether this requires 394 a separate lock. */ 395 #if defined(MPID_USE_SEQUENCE_NUMBERS) 396 # define MPIDI_Request_set_seqnum(req_, seqnum_) \ 397 { \ 398 (req_)->dev.seqnum = (seqnum_); \ 399 } 400 # define MPIDI_VC_FAI_send_seqnum(vc_, seqnum_out_) \ 401 { \ 402 (seqnum_out_) = (vc_)->seqnum_send++; \ 403 } 404 # define MPIDI_Pkt_set_seqnum(pkt_, seqnum_) \ 405 { \ 406 (pkt_)->seqnum = (seqnum_); \ 407 } 408 # define MPIDI_VC_Init_seqnum_send(vc_) \ 409 { \ 410 (vc_)->seqnum_send = 0; \ 411 } 412 #else 413 # define MPIDI_Request_set_seqnum(req_, seqnum_) 414 # define MPIDI_VC_FAI_send_seqnum(vc_, seqnum_out_) 415 # define MPIDI_Pkt_set_seqnum(pkt_, seqnum_) 416 # define MPIDI_VC_Init_seqnum_send(vc_) 417 #endif 418 419 420 /*------------------- 421 END REQUEST SECTION 422 -------------------*/ 423 424 425 /*------------------ 426 BEGIN COMM SECTION 427 ------------------*/ 428 #define MPIDI_Comm_get_vc(comm_, rank_, vcp_) *(vcp_) = (comm_)->dev.vcrt->vcr_table[(rank_)] 429 430 #ifdef USE_MPIDI_DBG_PRINT_VC 431 void MPIDI_DBG_PrintVC(MPIDI_VC_t *vc); 432 void MPIDI_DBG_PrintVCState2(MPIDI_VC_t *vc, MPIDI_VC_State_t new_state); 433 void MPIDI_DBG_PrintVCState(MPIDI_VC_t *vc); 434 #else 435 #define MPIDI_DBG_PrintVC(vc) 436 #define MPIDI_DBG_PrintVCState2(vc, new_state) 437 #define MPIDI_DBG_PrintVCState(vc) 438 #endif 439 440 #define MPIDI_Comm_get_vc_set_active(comm_, rank_, vcp_) do { \ 441 *(vcp_) = (comm_)->dev.vcrt->vcr_table[(rank_)]; \ 442 if ((*(vcp_))->state == MPIDI_VC_STATE_INACTIVE) \ 443 { \ 444 MPIDI_DBG_PrintVCState2(*(vcp_), MPIDI_VC_STATE_ACTIVE); \ 445 MPIDI_CHANGE_VC_STATE((*(vcp_)), ACTIVE); \ 446 } \ 447 } while(0) 448 449 /*---------------- 450 END COMM SECTION 451 ----------------*/ 452 453 454 /*-------------------- 455 BEGIN PACKET SECTION 456 --------------------*/ 457 #if !defined(MPICH_DEBUG_MEMINIT) 458 # define MPIDI_Pkt_init(pkt_, type_) \ 459 { \ 460 (pkt_)->type = (type_); \ 461 } 462 #else 463 # define MPIDI_Pkt_init(pkt_, type_) \ 464 { \ 465 memset((void *) (pkt_), 0xfc, sizeof(MPIDI_CH3_Pkt_t)); \ 466 (pkt_)->type = (type_); \ 467 } 468 #endif 469 470 /*------------------ 471 END PACKET SECTION 472 ------------------*/ 473 474 475 /*--------------------------- 476 BEGIN PROCESS GROUP SECTION 477 ---------------------------*/ 478 /* FIXME: Determine which of these functions should be exported to all of 479 the MPICH routines and which are internal to the device implementation */ 480 typedef int (*MPIDI_PG_Compare_ids_fn_t)(void * id1, void * id2); 481 typedef int (*MPIDI_PG_Destroy_fn_t)(MPIDI_PG_t * pg); 482 483 int MPIDI_VCRT_Create(int size, struct MPIDI_VCRT **vcrt_ptr); 484 int MPIDI_VCRT_Add_ref(struct MPIDI_VCRT *vcrt); 485 int MPIDI_VCRT_Release(struct MPIDI_VCRT *vcrt, int isDisconnect); 486 int MPIDI_VCR_Dup(MPIDI_VCR orig_vcr, MPIDI_VCR * new_vcr); 487 488 int MPIDI_PG_Init(MPIDI_PG_Compare_ids_fn_t, MPIDI_PG_Destroy_fn_t); 489 int MPIDI_PG_Finalize(void); 490 int MPIDI_PG_Create(int vct_sz, void * pg_id, MPIDI_PG_t ** ppg); 491 int MPIDI_PG_Destroy(MPIDI_PG_t * pg); 492 int MPIDI_PG_Find(void * id, MPIDI_PG_t ** pgp); 493 int MPIDI_PG_Id_compare(void *id1, void *id2); 494 void MPIDI_PG_set_verbose(int level); 495 496 /* Always use the MPIDI_PG_iterator type, never its expansion. Otherwise it 497 will be difficult to make any changes later. */ 498 typedef MPIDI_PG_t * MPIDI_PG_iterator; 499 /* 'iter' is similar to 'saveptr' in strtok_r */ 500 int MPIDI_PG_Get_iterator(MPIDI_PG_iterator *iter); 501 int MPIDI_PG_Has_next(MPIDI_PG_iterator *iter); 502 int MPIDI_PG_Get_next(MPIDI_PG_iterator *iter, MPIDI_PG_t **pgp); 503 504 int MPIDI_PG_Close_VCs( void ); 505 506 int MPIDI_PG_InitConnKVS( MPIDI_PG_t * ); 507 int MPIDI_PG_GetConnKVSname( char ** ); 508 int MPIDI_PG_InitConnString( MPIDI_PG_t * ); 509 int MPIDI_PG_GetConnString( MPIDI_PG_t *, int, char *, int ); 510 int MPIDI_PG_Dup_vcr( MPIDI_PG_t *, int, struct MPIDI_VC ** ); 511 int MPIDI_PG_Get_size(MPIDI_PG_t * pg); 512 void MPIDI_PG_IdToNum( MPIDI_PG_t *, int * ); 513 int MPIU_PG_Printall( FILE * ); 514 int MPIDI_PG_CheckForSingleton( void ); 515 516 /* CH3_PG_Init allows the channel to pre-initialize the process group */ 517 int MPIDI_CH3_PG_Init( MPIDI_PG_t * ); 518 519 #define MPIDI_PG_add_ref(pg_) \ 520 do { \ 521 MPIR_Object_add_ref(pg_); \ 522 } while (0) 523 #define MPIDI_PG_release_ref(pg_, inuse_) \ 524 do { \ 525 MPIR_Object_release_ref(pg_, inuse_); \ 526 } while (0) 527 528 #define MPIDI_PG_Get_vc(pg_, rank_, vcp_) *(vcp_) = &(pg_)->vct[rank_] 529 530 #define MPIDI_PG_Get_vc_set_active(pg_, rank_, vcp_) do { \ 531 *(vcp_) = &(pg_)->vct[rank_]; \ 532 if ((*(vcp_))->state == MPIDI_VC_STATE_INACTIVE) \ 533 { \ 534 MPIDI_DBG_PrintVCState2(*(vcp_), MPIDI_VC_STATE_ACTIVE); \ 535 MPIDI_CHANGE_VC_STATE((*(vcp_)), ACTIVE); \ 536 } \ 537 } while(0) 538 539 #define MPIDI_PG_Get_size(pg_) ((pg_)->size) 540 541 #ifdef MPIDI_DEV_IMPLEMENTS_KVS 542 int MPIDI_PG_To_string(MPIDI_PG_t *pg_ptr, char **str_ptr, int *); 543 int MPIDI_PG_Create_from_string(const char * str, MPIDI_PG_t ** pg_pptr, 544 int *flag); 545 #endif 546 /*------------------------- 547 END PROCESS GROUP SECTION 548 -------------------------*/ 549 550 551 /*-------------------------------- 552 BEGIN VIRTUAL CONNECTION SECTION 553 --------------------------------*/ 554 /*E 555 MPIDI_VC_State - States for a virtual connection. 556 557 Notes: 558 A closed connection is placed into 'STATE_INACTIVE'. (is this true?) 559 E*/ 560 typedef enum MPIDI_VC_State 561 { 562 MPIDI_VC_STATE_INACTIVE=1, /* Comm either hasn't started or has completed. */ 563 MPIDI_VC_STATE_ACTIVE, /* Comm has started and hasn't completed */ 564 MPIDI_VC_STATE_LOCAL_CLOSE, /* Local side has initiated close protocol */ 565 MPIDI_VC_STATE_REMOTE_CLOSE, /* Remote side has initiated close protocol */ 566 MPIDI_VC_STATE_CLOSE_ACKED, /* Both have initiated close, we have acknowledged remote side */ 567 MPIDI_VC_STATE_CLOSED, /* Both have initiated close, both have acked */ 568 MPIDI_VC_STATE_INACTIVE_CLOSED, /* INACTIVE VCs are moved to this state in Finalize */ 569 MPIDI_VC_STATE_MORIBUND /* Abnormally terminated, there may be unsent/unreceived msgs */ 570 } MPIDI_VC_State_t; 571 572 struct MPIR_Comm; 573 574 #ifdef ENABLE_COMM_OVERRIDES 575 typedef struct MPIDI_Comm_ops 576 { 577 /* Overriding calls in case of matching-capable interfaces */ 578 int (*recv_posted)(struct MPIDI_VC *vc, struct MPIR_Request *req); 579 580 int (*send)(struct MPIDI_VC *vc, const void *buf, MPI_Aint count, MPI_Datatype datatype, 581 int dest, int tag, MPIR_Comm *comm, int context_offset, 582 struct MPIR_Request **request); 583 int (*rsend)(struct MPIDI_VC *vc, const void *buf, MPI_Aint count, MPI_Datatype datatype, 584 int dest, int tag, MPIR_Comm *comm, int context_offset, 585 struct MPIR_Request **request); 586 int (*ssend)(struct MPIDI_VC *vc, const void *buf, MPI_Aint count, MPI_Datatype datatype, 587 int dest, int tag, MPIR_Comm *comm, int context_offset, 588 struct MPIR_Request **request ); 589 int (*isend)(struct MPIDI_VC *vc, const void *buf, MPI_Aint count, MPI_Datatype datatype, 590 int dest, int tag, MPIR_Comm *comm, int context_offset, 591 struct MPIR_Request **request ); 592 int (*irsend)(struct MPIDI_VC *vc, const void *buf, MPI_Aint count, MPI_Datatype datatype, 593 int dest, int tag, MPIR_Comm *comm, int context_offset, 594 struct MPIR_Request **request ); 595 int (*issend)(struct MPIDI_VC *vc, const void *buf, MPI_Aint count, MPI_Datatype datatype, 596 int dest, int tag, MPIR_Comm *comm, int context_offset, 597 struct MPIR_Request **request ); 598 599 int (*send_init)(struct MPIDI_VC *vc, const void *buf, MPI_Aint count, MPI_Datatype datatype, 600 int dest, int tag, MPIR_Comm *comm, int context_offset, 601 struct MPIR_Request **request ); 602 int (*bsend_init)(struct MPIDI_VC *vc, const void *buf, int count, MPI_Datatype datatype, 603 int dest, int tag, MPIR_Comm *comm, int context_offset, 604 struct MPIR_Request **request); 605 int (*rsend_init)(struct MPIDI_VC *vc, const void *buf, MPI_Aint count, MPI_Datatype datatype, 606 int dest, int tag, MPIR_Comm *comm, int context_offset, 607 struct MPIR_Request **request ); 608 int (*ssend_init)(struct MPIDI_VC *vc, const void *buf, MPI_Aint count, MPI_Datatype datatype, 609 int dest, int tag, MPIR_Comm *comm, int context_offset, 610 struct MPIR_Request **request ); 611 int (*startall)(struct MPIDI_VC *vc, int count, struct MPIR_Request *requests[]); 612 613 int (*cancel_send)(struct MPIDI_VC *vc, struct MPIR_Request *sreq); 614 int (*cancel_recv)(struct MPIDI_VC *vc, struct MPIR_Request *rreq); 615 616 int (*probe)(struct MPIDI_VC *vc, int source, int tag, MPIR_Comm *comm, int context_offset, 617 MPI_Status *status); 618 int (*iprobe)(struct MPIDI_VC *vc, int source, int tag, MPIR_Comm *comm, int context_offset, 619 int *flag, MPI_Status *status); 620 int (*improbe)(struct MPIDI_VC *vc, int source, int tag, MPIR_Comm *comm, int context_offset, 621 int *flag, MPIR_Request **message, MPI_Status *status); 622 int (*imrecv)(struct MPIDI_VC *vc, struct MPIR_Request *req); 623 } MPIDI_Comm_ops_t; 624 625 extern int (*MPIDI_Anysource_iprobe_fn)(int tag, MPIR_Comm * comm, int context_offset, int *flag, 626 MPI_Status * status); 627 extern int (*MPIDI_Anysource_improbe_fn)(int tag, MPIR_Comm * comm, int context_offset, 628 int *flag, MPIR_Request **message, 629 MPI_Status * status); 630 #endif 631 632 typedef struct MPIDI_VC 633 { 634 /* XXX - need better comment */ 635 /* MPIU_Object fields. MPIDI_VC_t objects are not allocated using the 636 MPIU_Object system, but we do use the associated 637 reference counting routines. The handle value is required 638 when debugging objects (the handle kind is used in reporting 639 on changes to the object). 640 */ 641 MPIR_OBJECT_HEADER; /* adds handle and ref_count fields */ 642 643 /* state of the VC */ 644 MPIDI_VC_State_t state; 645 646 /* Process group to which this VC belongs */ 647 struct MPIDI_PG * pg; 648 649 /* Rank of the process in that process group associated with this VC */ 650 int pg_rank; 651 652 /* Local process ID */ 653 int lpid; 654 655 /* The node id of this process, used for topologically aware collectives. */ 656 int node_id; 657 658 /* port name tag */ 659 int port_name_tag; /* added to handle dynamic process mgmt */ 660 661 #ifndef MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS 662 void *connreq_obj; /* pointer to dynamic connection mgmt object */ 663 #endif 664 665 #if defined(MPID_USE_SEQUENCE_NUMBERS) 666 /* Sequence number of the next packet to be sent */ 667 MPID_Seqnum_t seqnum_send; 668 #endif 669 670 #if defined(MPIDI_CH3_MSGS_UNORDERED) 671 /* Sequence number of the next packet we expect to receive */ 672 MPID_Seqnum_t seqnum_recv; 673 674 /* Queue for holding packets received out of order. NOTE: the CH3 device 675 only orders packets. Handling of out-of-order data 676 is the responsibility of the channel. */ 677 MPIDI_CH3_Pkt_send_container_t * msg_reorder_queue; 678 #endif 679 680 /* rendezvous function pointers. Called to send a rendevous 681 message or when one is matched */ 682 int (* rndvSend_fn)( struct MPIR_Request **sreq_p, const void * buf, MPI_Aint count, 683 MPI_Datatype datatype, int dt_contig, intptr_t data_sz, 684 MPI_Aint dt_true_lb, int rank, int tag, 685 struct MPIR_Comm * comm, int context_offset ); 686 int (* rndvRecv_fn)( struct MPIDI_VC * vc, struct MPIR_Request *rreq ); 687 688 /* eager message threshold */ 689 int eager_max_msg_sz; 690 /* eager message threshold for ready sends. -1 means there's no limit */ 691 int ready_eager_max_msg_sz; 692 693 /* noncontiguous send function pointer. Called to send a 694 noncontiguous message. Caller must initialize 695 sreq->dev.segment, _first and _size. Contiguous messages are 696 called directly from CH3 and cannot be overridden. 697 The optional hdr_iov and n_hdr_iov input parameters are used for 698 variable-length extended header, specify NULL and zero if unused. 699 n_hdr_iov should not exceed MPL_IOV_LIMIT - 2 (one for header and one 700 for packed data).*/ 701 int (* sendNoncontig_fn)( struct MPIDI_VC *vc, struct MPIR_Request *sreq, 702 void *header, intptr_t hdr_sz, struct iovec *hdr_iov, int n_hdr_iov); 703 704 #ifdef ENABLE_COMM_OVERRIDES 705 MPIDI_Comm_ops_t *comm_ops; 706 #endif 707 708 # if defined(MPIDI_CH3_VC_DECL) 709 MPIDI_CH3_VC_DECL 710 # endif 711 } 712 MPIDI_VC_t; 713 714 typedef enum MPIDI_VC_Event 715 { 716 MPIDI_VC_EVENT_TERMINATED 717 } 718 MPIDI_VC_Event_t; 719 720 /*S 721 * MPIDI_VCRT - virtual connection reference table 722 * 723 * handle - this element is not used, but exists so that we may use the 724 * MPIU_Object routines for reference counting 725 * 726 * ref_count - number of references to this table 727 * 728 * vcr_table - array of virtual connection references 729 S*/ 730 typedef struct MPIDI_VCRT 731 { 732 MPIR_OBJECT_HEADER; /* adds handle and ref_count fields */ 733 int size; 734 MPIDI_VC_t * vcr_table[1]; 735 } 736 MPIDI_VCRT_t; 737 738 /* number of VCs that are in MORIBUND state */ 739 extern int MPIDI_Failed_vc_count; 740 741 /* Initialize a new VC */ 742 int MPIDI_VC_Init( MPIDI_VC_t *, MPIDI_PG_t *, int ); 743 744 #if defined(MPIDI_CH3_MSGS_UNORDERED) 745 # define MPIDI_VC_Init_seqnum_recv(vc_); \ 746 { \ 747 (vc_)->seqnum_recv = 0; \ 748 (vc_)->msg_reorder_queue = NULL; \ 749 } 750 #else 751 # define MPIDI_VC_Init_seqnum_recv(vc_); 752 #endif 753 754 755 #define MPIDI_VC_add_ref( _vc ) \ 756 do { MPIR_Object_add_ref( _vc ); } while (0) 757 758 #define MPIDI_VC_release_ref( _vc, _inuse ) \ 759 do { MPIR_Object_release_ref( _vc, _inuse ); } while (0) 760 761 /*------------------------------ 762 END VIRTUAL CONNECTION SECTION 763 ------------------------------*/ 764 765 766 /*--------------------------------- 767 BEGIN SEND/RECEIVE BUFFER SECTION 768 ---------------------------------*/ 769 #if !defined(MPIDI_CH3U_Offsetof) 770 # define MPIDI_CH3U_Offsetof(struct_, field_) ((MPI_Aint) &((struct_*)0)->field_) 771 #endif 772 773 #if !defined(MPIDI_CH3U_SRBuf_size) 774 # define MPIDI_CH3U_SRBuf_size (256 * 1024) 775 #endif 776 777 typedef struct __MPIDI_CH3U_SRBuf_element { 778 /* Keep the buffer at the top to help keep the memory alignment */ 779 char buf[MPIDI_CH3U_SRBuf_size]; 780 struct __MPIDI_CH3U_SRBuf_element * next; 781 } MPIDI_CH3U_SRBuf_element_t; 782 783 extern MPIDI_CH3U_SRBuf_element_t * MPIDI_CH3U_SRBuf_pool; 784 785 #if !defined (MPIDI_CH3U_SRBuf_get) 786 # define MPIDI_CH3U_SRBuf_get(req_) \ 787 { \ 788 MPIDI_CH3U_SRBuf_element_t * tmp; \ 789 if (!MPIDI_CH3U_SRBuf_pool) { \ 790 MPIDI_CH3U_SRBuf_pool = \ 791 MPL_malloc(sizeof(MPIDI_CH3U_SRBuf_element_t), MPL_MEM_BUFFER); \ 792 MPIDI_CH3U_SRBuf_pool->next = NULL; \ 793 } \ 794 tmp = MPIDI_CH3U_SRBuf_pool; \ 795 MPIDI_CH3U_SRBuf_pool = MPIDI_CH3U_SRBuf_pool->next; \ 796 tmp->next = NULL; \ 797 (req_)->dev.tmpbuf = tmp->buf; \ 798 } 799 #endif 800 801 #if !defined (MPIDI_CH3U_SRBuf_free) 802 # define MPIDI_CH3U_SRBuf_free(req_) \ 803 { \ 804 MPIDI_CH3U_SRBuf_element_t * tmp; \ 805 MPIR_Assert(MPIDI_Request_get_srbuf_flag(req_)); \ 806 MPIDI_Request_set_srbuf_flag((req_), FALSE); \ 807 tmp = (MPIDI_CH3U_SRBuf_element_t *) (((MPI_Aint) ((req_)->dev.tmpbuf)) - \ 808 ((MPI_Aint) MPIDI_CH3U_Offsetof(MPIDI_CH3U_SRBuf_element_t, buf))); \ 809 tmp->next = MPIDI_CH3U_SRBuf_pool; \ 810 MPIDI_CH3U_SRBuf_pool = tmp; \ 811 } 812 #endif 813 814 #if !defined(MPIDI_CH3U_SRBuf_alloc) 815 # define MPIDI_CH3U_SRBuf_alloc(req_, size_) \ 816 { \ 817 MPIDI_CH3U_SRBuf_get(req_); \ 818 if ((req_)->dev.tmpbuf != NULL) \ 819 { \ 820 (req_)->dev.tmpbuf_sz = MPIDI_CH3U_SRBuf_size; \ 821 MPIDI_Request_set_srbuf_flag((req_), TRUE); \ 822 } \ 823 else \ 824 { \ 825 (req_)->dev.tmpbuf_sz = 0; \ 826 } \ 827 } 828 #endif 829 /*------------------------------- 830 END SEND/RECEIVE BUFFER SECTION 831 -------------------------------*/ 832 833 /* define ACC stream size as the SRBuf size */ 834 #if !defined(MPIDI_CH3U_Acc_stream_size) 835 #define MPIDI_CH3U_Acc_stream_size MPIDI_CH3U_SRBuf_size 836 #endif 837 838 /*---------------------------- 839 BEGIN DEBUGGING TOOL SECTION 840 ----------------------------*/ 841 842 /* If there is no support for dynamic processes, there will be no 843 channel-specific connection state */ 844 #ifdef MPL_USE_DBG_LOGGING 845 846 #ifdef MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS 847 #define MPIDI_CH3_VC_GetStateString( _c ) "none" 848 #else 849 const char *MPIDI_CH3_VC_GetStateString(struct MPIDI_VC *); 850 const char *MPIDI_CH3_VC_SockGetStateString(struct MPIDI_VC *); 851 #endif 852 853 /* These tw routines are in mpidi_pg.c and are used to print the 854 connection string (which is attached to a process group) */ 855 int MPIDI_PrintConnStr( const char *file, int line, 856 const char *label, const char *str ); 857 int MPIDI_PrintConnStrToFile( FILE *fd, const char *file, int line, 858 const char *label, const char *str ); 859 860 /* Defined and used in sock channel. */ 861 const char * MPIDI_Conn_GetStateString(int state); 862 #endif 863 864 /* These macros simplify and unify the debugging of changes in the 865 connection state 866 867 MPL_DBG_VCSTATECHANGE(vc,newstate) - use when changing the state 868 of a VC 869 870 MPL_DBG_VCCHSTATECHANGE(vc,newstate) - use when changing the state 871 of the channel-specific part of the vc (e.g., vc->ch.state) 872 873 MPL_DBG_CONNSTATECHANGE(vc,conn,newstate ) - use when changing the 874 state of a conn. vc may be null 875 876 MPL_DBG_CONNSTATECHANGEMSG(vc,conn,newstate,msg ) - use when changing the 877 state of a conn. vc may be null. Like CONNSTATECHANGE, but allows 878 an additional message 879 880 MPL_DBG_PKT(conn,pkt,msg) - print out a short description of an 881 packet being sent/received on the designated connection, prefixed with 882 msg. 883 884 */ 885 #define MPL_DBG_VCSTATECHANGE(_vc,_newstate) do { \ 886 MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CONNECT,TYPICAL,(MPL_DBG_FDEST, \ 887 "vc=%p: Setting state (vc) from %s to %s, vcchstate is %s", \ 888 _vc, MPIDI_VC_GetStateString((_vc)->state), \ 889 #_newstate, MPIDI_CH3_VC_GetStateString( (_vc) ))); \ 890 } while (0) 891 892 #define MPL_DBG_VCCHSTATECHANGE(_vc,_newstate) \ 893 MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CONNECT,TYPICAL,(MPL_DBG_FDEST, \ 894 "vc=%p: Setting state (ch) from %s to %s, vc state is %s", \ 895 _vc, MPIDI_CH3_VC_GetStateString((_vc)), \ 896 #_newstate, MPIDI_VC_GetStateString( (_vc)->state )) ) 897 898 #define MPL_DBG_CONNSTATECHANGE(_vc,_conn,_newstate) \ 899 MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CONNECT,TYPICAL,(MPL_DBG_FDEST, \ 900 "vc=%p,conn=%p: Setting state (conn) from %s to %s, vcstate = %s", \ 901 _vc, _conn, \ 902 MPIDI_Conn_GetStateString((_conn)->state), #_newstate, \ 903 _vc ? MPIDI_VC_GetStateString((_vc)->state) : "<no vc>" )) 904 905 #define MPL_DBG_CONNSTATECHANGE_MSG(_vc,_conn,_newstate,_msg) \ 906 MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CONNECT,TYPICAL,(MPL_DBG_FDEST, \ 907 "vc=%p,conn=%p: Setting conn state from %s to %s, vcstate = %s %s", \ 908 _vc, _conn, \ 909 MPIDI_Conn_GetStateString((_conn)->state), #_newstate, \ 910 _vc ? MPIDI_VC_GetStateString((_vc)->state) : "<no vc>", _msg )) 911 #define MPL_DBG_VCUSE(_vc,_msg) \ 912 MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CONNECT,TYPICAL,(MPL_DBG_FDEST,\ 913 "vc=%p: Using vc for %s", _vc, _msg )) 914 #define MPL_DBG_PKT(_conn,_pkt,_msg) \ 915 MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,TYPICAL,(MPL_DBG_FDEST,\ 916 "conn=%p: %s %s", _conn, _msg, MPIDI_Pkt_GetDescString( _pkt ) )) 917 918 const char *MPIDI_Pkt_GetDescString( MPIDI_CH3_Pkt_t *pkt ); 919 920 /* These macros help trace communication headers */ 921 #define MPL_DBG_MSGPKT(_vc,_tag,_contextid,_dest,_size,_kind) \ 922 MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_MSG,TYPICAL,(MPL_DBG_FDEST,\ 923 "%s: vc=%p, tag=%d, context=%d, dest=%d, datasz=%" PRIdPTR,\ 924 _kind,_vc,_tag,_contextid,_dest,_size) ) 925 926 /* FIXME: Switch this to use the common debug code */ 927 void MPIDI_err_printf(char *, char *, ...); 928 929 /* FIXME: This does not belong here */ 930 #ifdef USE_MPIDI_DBG_PRINT_VC 931 extern char *MPIDI_DBG_parent_str; 932 #endif 933 934 #define MPIDI_ERR_PRINTF(e) MPIDI_err_printf e 935 936 #if defined(HAVE_MACRO_VA_ARGS) 937 # define MPIDI_err_printf(func, fmt, ...) \ 938 { \ 939 MPL_error_printf("[%d] ERROR - %s(): " fmt "\n", MPIR_Process.comm_world->rank, func, __VA_ARGS__); \ 940 fflush(stdout); \ 941 } 942 #endif 943 944 #ifdef MPICH_DBG_OUTPUT 945 void MPIDI_DBG_Print_packet(MPIDI_CH3_Pkt_t *pkt); 946 #else 947 # define MPIDI_DBG_Print_packet(a) 948 #endif 949 950 /* Given a state, return the string for this state (VC's and connections) */ 951 const char * MPIDI_VC_GetStateString(int); 952 /*-------------------------- 953 END DEBUGGING TOOL SECTION 954 --------------------------*/ 955 956 957 /* Prototypes for internal device routines */ 958 int MPIDI_Isend_self(const void *, MPI_Aint, MPI_Datatype, int, int, MPIR_Comm *, 959 int, int, MPIR_Request **); 960 961 /*-------------------------- 962 BEGIN MPI PORT SECTION 963 --------------------------*/ 964 /* These are the default functions */ 965 int MPIDI_Comm_connect(const char *, MPIR_Info *, int, MPIR_Comm *, MPIR_Comm **); 966 int MPIDI_Comm_accept(const char *, MPIR_Info *, int, MPIR_Comm *, MPIR_Comm **); 967 968 int MPIDI_Comm_spawn_multiple(int, char **, char ***, const int *, MPIR_Info **, 969 int, MPIR_Comm *, MPIR_Comm **, int *); 970 971 972 /* This structure defines a module that handles the routines that 973 work with MPI port names */ 974 typedef struct MPIDI_Port_Ops { 975 int (*OpenPort)( MPIR_Info *, char * ); 976 int (*ClosePort)( const char * ); 977 int (*CommAccept)( const char *, MPIR_Info *, int, MPIR_Comm *, 978 MPIR_Comm ** ); 979 int (*CommConnect)( const char *, MPIR_Info *, int, MPIR_Comm *, 980 MPIR_Comm ** ); 981 } MPIDI_PortFns; 982 #define MPIDI_PORTFNS_VERSION 1 983 int MPIDI_CH3_PortFnsInit( MPIDI_PortFns * ); 984 985 #ifndef MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS 986 /* Utility routines provided in src/ch3u_port.c for working with connection 987 queues */ 988 int MPIDI_CH3I_Acceptq_enqueue(MPIDI_VC_t * vc, int port_name_tag); 989 int MPIDI_Port_finalize(void); 990 991 int MPIDI_CH3I_Port_init(int port_name_tag); 992 int MPIDI_CH3I_Port_destroy(int port_name_tag); 993 #else 994 /* Need empty symbols to avoid failure at compile time if defined 995 * MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS. */ 996 #define MPIDI_CH3I_Acceptq_enqueue(vc, port_name_tag) (MPI_SUCCESS) 997 #define MPIDI_Port_finalize() (MPI_SUCCESS) 998 999 #define MPIDI_CH3I_Port_init(port_name_tag) (MPI_SUCCESS) 1000 #define MPIDI_CH3I_Port_destroy(port_name_tag) (MPI_SUCCESS) 1001 #endif 1002 /*-------------------------- 1003 END MPI PORT SECTION 1004 --------------------------*/ 1005 1006 #define MPIDI_MAX_KVS_VALUE_LEN 4096 1007 1008 /* ------------------------------------------------------------------------- */ 1009 /* mpirma.h (in src/mpi/rma?) */ 1010 /* ------------------------------------------------------------------------- */ 1011 1012 int MPIDI_RMA_init(void); 1013 void MPIDI_RMA_finalize(void); 1014 1015 /* The Win_fns table contains pointers to the channel's implementation of the 1016 * RMA window creation routines. The channel must provide the init function, 1017 * which can optionally override any defaults already set by CH3. 1018 */ 1019 1020 typedef struct { 1021 int (*create)(void *, MPI_Aint, int, MPIR_Info *, MPIR_Comm *, MPIR_Win **); 1022 int (*allocate)(MPI_Aint, int, MPIR_Info *, MPIR_Comm *, void *, MPIR_Win **); 1023 int (*allocate_shared)(MPI_Aint, int, MPIR_Info *, MPIR_Comm *, void *, MPIR_Win **); 1024 int (*allocate_shm)(MPI_Aint, int, MPIR_Info *, MPIR_Comm *, void *, MPIR_Win **); 1025 int (*create_dynamic)(MPIR_Info *, MPIR_Comm *, MPIR_Win **); 1026 int (*detect_shm)(MPIR_Win **); 1027 int (*gather_info)(void *, MPI_Aint, int, MPIR_Info *, MPIR_Comm *, MPIR_Win **); 1028 int (*shared_query)(MPIR_Win *, int, MPI_Aint *, int *, void *); 1029 } MPIDI_CH3U_Win_fns_t; 1030 1031 extern MPIDI_CH3U_Win_fns_t MPIDI_CH3U_Win_fns; 1032 1033 typedef struct { 1034 int (*win_init)(MPI_Aint, int, int, int, MPIR_Info *, MPIR_Comm *, MPIR_Win **); 1035 int (*win_free)(MPIR_Win **); 1036 } MPIDI_CH3U_Win_hooks_t; 1037 1038 extern MPIDI_CH3U_Win_hooks_t MPIDI_CH3U_Win_hooks; 1039 1040 typedef struct MPIDI_CH3U_Win_pkt_ordering { 1041 1042 /* Ordered AM flush. 1043 * It means whether AM flush is guaranteed to be finished after all previous 1044 * RMA operations. It initialized by Nemesis and used by CH3. 1045 * Note that we use single global flag for all targets including both 1046 * intra-node and inter-node processes.*/ 1047 int am_flush_ordered; 1048 } MPIDI_CH3U_Win_pkt_ordering_t; 1049 1050 extern MPIDI_CH3U_Win_pkt_ordering_t MPIDI_CH3U_Win_pkt_orderings; 1051 1052 /* CH3 and Channel window functions initializers */ 1053 int MPIDI_Win_fns_init(MPIDI_CH3U_Win_fns_t *win_fns); 1054 int MPIDI_CH3_Win_fns_init(MPIDI_CH3U_Win_fns_t *win_fns); 1055 1056 /* Channel window hooks initializer */ 1057 int MPIDI_CH3_Win_hooks_init(MPIDI_CH3U_Win_hooks_t *win_hooks); 1058 1059 int MPIDI_CH3_Win_pkt_orderings_init(MPIDI_CH3U_Win_pkt_ordering_t * win_pkt_orderings); 1060 1061 /* Default window creation functions provided by CH3 */ 1062 int MPIDI_CH3U_Win_create(void *, MPI_Aint, int, MPIR_Info *, MPIR_Comm *, 1063 MPIR_Win **); 1064 int MPIDI_CH3U_Win_allocate(MPI_Aint size, int disp_unit, MPIR_Info *info, 1065 MPIR_Comm *comm, void *baseptr, MPIR_Win **win); 1066 int MPIDI_CH3U_Win_allocate_no_shm(MPI_Aint size, int disp_unit, MPIR_Info *info, 1067 MPIR_Comm *comm_ptr, void *baseptr, MPIR_Win **win_ptr); 1068 int MPIDI_CH3U_Win_create_dynamic(MPIR_Info *info, MPIR_Comm *comm, MPIR_Win **win); 1069 int MPIDI_CH3U_Win_shared_query(MPIR_Win * win_ptr, int target_rank, MPI_Aint * size, 1070 int *disp_unit, void *baseptr); 1071 1072 /* MPI RMA Utility functions */ 1073 1074 int MPIDI_CH3U_Win_gather_info(void *, MPI_Aint, int, MPIR_Info *, MPIR_Comm *, 1075 MPIR_Win **); 1076 1077 1078 #ifdef MPIDI_CH3I_HAS_ALLOC_MEM 1079 void* MPIDI_CH3I_Alloc_mem(size_t size, MPIR_Info *info_ptr); 1080 /* fallback to MPL_malloc if channel does not have its own RMA memory allocator */ 1081 #else 1082 #define MPIDI_CH3I_Alloc_mem(size, info_ptr) MPL_malloc(size, MPL_MEM_USER) 1083 #endif 1084 1085 #ifdef MPIDI_CH3I_HAS_FREE_MEM 1086 int MPIDI_CH3I_Free_mem(void *ptr); 1087 #else 1088 #define MPIDI_CH3I_Free_mem(ptr) MPL_free(ptr); 1089 #endif 1090 1091 /* Pvars */ 1092 void MPIDI_CH3_RMA_Init_sync_pvars(void); 1093 void MPIDI_CH3_RMA_Init_pkthandler_pvars(void); 1094 1095 /* internal */ 1096 int MPIDI_CH3I_Release_lock(MPIR_Win * win_ptr); 1097 int MPIDI_CH3I_Try_acquire_win_lock(MPIR_Win * win_ptr, int requested_lock); 1098 1099 int MPIDI_CH3I_Progress_finalize(void); 1100 1101 1102 /* Internal RMA operation routines. 1103 * Called by normal RMA operations and request-based RMA operations . */ 1104 int MPIDI_CH3I_Put(const void *origin_addr, int origin_count, MPI_Datatype 1105 origin_datatype, int target_rank, MPI_Aint target_disp, 1106 int target_count, MPI_Datatype target_datatype, MPIR_Win * win_ptr, 1107 MPIR_Request * ureq); 1108 int MPIDI_CH3I_Get(void *origin_addr, int origin_count, MPI_Datatype 1109 origin_datatype, int target_rank, MPI_Aint target_disp, 1110 int target_count, MPI_Datatype target_datatype, MPIR_Win * win_ptr, 1111 MPIR_Request * ureq); 1112 int MPIDI_CH3I_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype 1113 origin_datatype, int target_rank, MPI_Aint target_disp, 1114 int target_count, MPI_Datatype target_datatype, MPI_Op op, 1115 MPIR_Win * win_ptr, MPIR_Request * ureq); 1116 int MPIDI_CH3I_Get_accumulate(const void *origin_addr, int origin_count, 1117 MPI_Datatype origin_datatype, void *result_addr, int result_count, 1118 MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, 1119 int target_count, MPI_Datatype target_datatype, MPI_Op op, 1120 MPIR_Win * win_ptr, MPIR_Request * ureq); 1121 1122 /*@ 1123 MPIDI_CH3_Progress_signal_completion - Inform the progress engine that a 1124 pending request has completed. 1125 1126 IMPLEMENTORS: 1127 In a single-threaded environment, this routine can be implemented by 1128 incrementing a request completion counter. In a 1129 multi-threaded environment, the request completion counter must be atomically 1130 incremented, and any threaded blocking in the 1131 progress engine must be woken up when a request is completed. 1132 1133 Notes on the implementation: 1134 1135 This code is designed to support one particular model of thread-safety. 1136 It is common to many of the channels and was moved into this file because 1137 the MPIDI_CH3_Progress_signal_completion reference is used by the 1138 function the implements MPID_Request_complete. 1139 @*/ 1140 1141 /* 1142 * MPIDI_CH3_Progress_signal_completion() is used to notify the progress 1143 * engine that a completion has occurred. The multi-threaded version will need 1144 * to wake up any (and all) threads blocking in MPIDI_CH3_Progress(). 1145 */ 1146 1147 /* This allows the channel to define an alternate to the 1148 completion counter. */ 1149 #ifndef MPIDI_CH3I_INCR_PROGRESS_COMPLETION_COUNT 1150 #define MPIDI_CH3I_INCR_PROGRESS_COMPLETION_COUNT \ 1151 do { \ 1152 MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_COMPLETION_MUTEX); \ 1153 ++MPIDI_CH3I_progress_completion_count; \ 1154 MPL_DBG_MSG_D(MPIDI_CH3_DBG_PROGRESS,VERBOSE, \ 1155 "just incremented MPIDI_CH3I_progress_completion_count=%d", \ 1156 MPIDI_CH3I_progress_completion_count); \ 1157 MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_COMPLETION_MUTEX); \ 1158 } while (0) 1159 #endif 1160 1161 1162 /* The following is part of an implementation of a control of a 1163 resource shared among threads - it needs to be managed more 1164 explicitly as such as shared resource */ 1165 #ifndef MPICH_IS_THREADED 1166 # define MPIDI_CH3_Progress_signal_completion() \ 1167 { \ 1168 MPIDI_CH3I_INCR_PROGRESS_COMPLETION_COUNT; \ 1169 } 1170 #else 1171 /* TODO these decls should probably move into each channel as appropriate */ 1172 extern volatile int MPIDI_CH3I_progress_blocked; 1173 extern volatile int MPIDI_CH3I_progress_wakeup_signalled; 1174 1175 /* This allows the channel to hook the MPIDI_CH3_Progress_signal_completion 1176 * macro when it is necessary to wake up some part of the progress engine from a 1177 * blocking operation. Currently ch3:sock uses it, ch3:nemesis does not. */ 1178 /* MT alternative implementations of this macro are responsible for providing any 1179 * synchronization (acquiring MPIDCOMM, etc) */ 1180 #ifndef MPIDI_CH3I_PROGRESS_WAKEUP 1181 # define MPIDI_CH3I_PROGRESS_WAKEUP do {/*do nothing*/} while(0) 1182 #endif 1183 1184 void MPIDI_CH3I_Progress_wakeup(void); 1185 /* MT TODO profiling is needed here. We currently protect the completion 1186 * counter with the COMPLETION critical section, which could be a source of 1187 * contention. It should be possible to peform these updates atomically via 1188 * OPA instead, but the additional complexity should be justified by 1189 * profiling evidence. [goodell@ 2010-06-29] */ 1190 # define MPIDI_CH3_Progress_signal_completion() \ 1191 do { \ 1192 MPIDI_CH3I_INCR_PROGRESS_COMPLETION_COUNT; \ 1193 MPIDI_CH3I_PROGRESS_WAKEUP; \ 1194 } while (0) 1195 #endif 1196 1197 /* Function that may be used to provide business card info */ 1198 int MPIDI_CH3I_BCInit( char **bc_val_p, int *val_max_sz_p); 1199 /* Function to free the storage allocated by MPIDI_CH3I_BCInit */ 1200 int MPIDI_CH3I_BCFree( char *publish_bc ); 1201 1202 /* Inform the process group of our connection information string (business 1203 card) */ 1204 int MPIDI_PG_SetConnInfo( int rank, const char *connString ); 1205 1206 /* Fill in the node_id information for each VC in the given PG. */ 1207 int MPIDI_Populate_vc_node_ids(MPIDI_PG_t *pg, int our_pg_rank); 1208 1209 /* NOTE: Channel function prototypes are in mpidi_ch3_post.h since some of the 1210 macros require their declarations. */ 1211 1212 /* FIXME: These should be defined only when these particular utility 1213 packages are used. Best would be to keep these prototypes in the 1214 related util/xxx directories, and either copy them into an include 1215 directory used only for builds or add (yet another) include path */ 1216 /* from util/sock */ 1217 int MPIDI_VC_InitSock( MPIDI_VC_t *); 1218 int MPIDI_CH3I_Connect_to_root_sock(const char *, MPIDI_VC_t **); 1219 1220 1221 int MPIDI_CH3I_VC_post_sockconnect(MPIDI_VC_t * ); 1222 1223 /* FIXME: Where should this go? */ 1224 1225 /* Used internally to broadcast process groups belonging to peercomm to 1226 all processes in comm*/ 1227 int MPID_PG_BCast( MPIR_Comm *peercomm_p, MPIR_Comm *comm_p, int root ); 1228 1229 /* Channel defintitions */ 1230 /*@ 1231 MPIDI_CH3_iStartMsg - A non-blocking request to send a CH3 packet. A r 1232 equest object is allocated only if the send could not be completed 1233 immediately. 1234 1235 Input Parameters: 1236 + vc - virtual connection to send the message over 1237 . pkt - pointer to a MPIDI_CH3_Pkt_t structure containing the substructure to 1238 be sent 1239 - pkt_sz - size of the packet substucture 1240 1241 Output Parameters: 1242 . sreq_ptr - send request or NULL if the send completed immediately 1243 1244 Return value: 1245 An mpi error code. 1246 1247 NOTE: 1248 The packet structure may be allocated on the stack. 1249 1250 IMPLEMETORS: 1251 If the send can not be completed immediately, the CH3 packet structure must 1252 be stored internally until the request is complete. 1253 1254 If the send completes immediately, the channel implementation should return 1255 NULL. 1256 @*/ 1257 int MPIDI_CH3_iStartMsg(MPIDI_VC_t * vc, void * pkt, intptr_t pkt_sz, 1258 MPIR_Request **sreq_ptr); 1259 1260 1261 /*@ 1262 MPIDI_CH3_iStartMsgv - A non-blocking request to send a CH3 packet and 1263 associated data. A request object is allocated only if 1264 the send could not be completed immediately. 1265 1266 Input Parameters: 1267 + vc - virtual connection to send the message over 1268 . iov - a vector of a structure contains a buffer pointer and length 1269 - iov_n - number of elements in the vector 1270 1271 Output Parameters: 1272 . sreq_ptr - send request or NULL if the send completed immediately 1273 1274 Return value: 1275 An mpi error code. 1276 1277 NOTE: 1278 The first element in the vector must point to the packet structure. The 1279 packet structure and the vector may be allocated on 1280 the stack. 1281 1282 IMPLEMENTORS: 1283 If the send can not be completed immediately, the CH3 packet structure and 1284 the vector must be stored internally until the 1285 request is complete. 1286 1287 If the send completes immediately, the channel implementation should return 1288 NULL. 1289 @*/ 1290 int MPIDI_CH3_iStartMsgv(MPIDI_VC_t * vc, struct iovec * iov, int iov_n, 1291 MPIR_Request **sreq_ptr); 1292 1293 1294 /*@ 1295 MPIDI_CH3_iSend - A non-blocking request to send a CH3 packet using an 1296 existing request object. When the send is complete 1297 the channel implementation will call the OnDataAvail routine in the 1298 request, if any (if not, the channel implementation will mark the 1299 request as complete). 1300 1301 Input Parameters: 1302 + vc - virtual connection over which to send the CH3 packet 1303 . sreq - pointer to the send request object 1304 . pkt - pointer to a MPIDI_CH3_Pkt_t structure containing the substructure to 1305 be sent 1306 - pkt_sz - size of the packet substucture 1307 1308 Return value: 1309 An mpi error code. 1310 1311 NOTE: 1312 The packet structure may be allocated on the stack. 1313 1314 IMPLEMETORS: 1315 If the send can not be completed immediately, the packet structure must be 1316 stored internally until the request is complete. 1317 1318 If the send completes immediately, the channel implementation still must 1319 invoke the OnDataAvail routine in the request, if any; otherwise, is 1320 must set the request as complete. 1321 @*/ 1322 int MPIDI_CH3_iSend(MPIDI_VC_t * vc, MPIR_Request * sreq, void * pkt, 1323 intptr_t pkt_sz); 1324 1325 1326 /*@ 1327 MPIDI_CH3_iSendv - A non-blocking request to send a CH3 packet and 1328 associated data using an existing request object. When 1329 the send is complete the channel implementation will call the 1330 OnDataAvail routine in the request, if any. 1331 1332 Input Parameters: 1333 + vc - virtual connection over which to send the CH3 packet and data 1334 . sreq - pointer to the send request object 1335 . iov - a vector of a structure contains a buffer pointer and length 1336 - iov_n - number of elements in the vector 1337 1338 Return value: 1339 An mpi error code. 1340 1341 NOTE: 1342 The first element in the vector must point to the packet structure. The 1343 packet structure and the vector may be allocated on 1344 the stack. 1345 1346 IMPLEMENTORS: 1347 If the send can not be completed immediately, the packet structure and the 1348 vector must be stored internally until the request is 1349 complete. 1350 1351 If the send completes immediately, the channel implementation still must 1352 call the OnDataAvail routine in the request, if any. 1353 @*/ 1354 int MPIDI_CH3_iSendv(MPIDI_VC_t * vc, MPIR_Request * sreq, struct iovec * iov, 1355 int iov_n); 1356 1357 /*@ 1358 MPIDI_CH3_Connection_terminate - terminate the underlying connection 1359 associated with the specified VC 1360 1361 Input Parameters: 1362 . vc - virtual connection 1363 1364 Return value: 1365 An MPI error code 1366 @*/ 1367 int MPIDI_CH3_Connection_terminate(MPIDI_VC_t * vc); 1368 1369 /* MPIDI_CH3_Connect_to_root (really connect to peer) - channel routine 1370 for connecting to a process through a port, used in implementing 1371 MPID_Comm_connect and accept */ 1372 int MPIDI_CH3_Connect_to_root(const char *, MPIDI_VC_t **); 1373 1374 /* 1375 * Channel utility prototypes 1376 */ 1377 int MPIDI_CH3U_Recvq_init(void); 1378 int MPIDI_CH3U_Recvq_FU(int, int, int, MPI_Status * ); 1379 MPIR_Request * MPIDI_CH3U_Recvq_FDU(MPI_Request, MPIDI_Message_match *); 1380 MPIR_Request * MPIDI_CH3U_Recvq_FDU_matchonly(int source, int tag, int context_id, MPIR_Comm *comm, 1381 int *foundp); 1382 MPIR_Request * MPIDI_CH3U_Recvq_FDU_or_AEP(int source, int tag, 1383 int context_id, MPIR_Comm *comm, void *user_buf, 1384 MPI_Aint user_count, MPI_Datatype datatype, int * foundp); 1385 int MPIDI_CH3U_Recvq_DP(MPIR_Request * rreq); 1386 MPIR_Request * MPIDI_CH3U_Recvq_FDP_or_AEU(MPIDI_Message_match * match, 1387 int * found); 1388 int MPIDI_CH3U_Recvq_count_unexp(void); 1389 int MPIDI_CH3U_Complete_posted_with_error(MPIDI_VC_t *vc); 1390 int MPIDI_CH3U_Clean_recvq(MPIR_Comm *comm_ptr); 1391 1392 1393 int MPIDI_CH3U_Request_load_send_iov(MPIR_Request * const sreq, 1394 struct iovec * const iov, int * const iov_n); 1395 int MPIDI_CH3U_Request_load_recv_iov(MPIR_Request * const rreq); 1396 int MPIDI_CH3U_Request_unpack_uebuf(MPIR_Request * rreq); 1397 int MPIDI_CH3U_Request_unpack_srbuf(MPIR_Request * rreq); 1398 1399 void MPIDI_CH3U_Buffer_copy(const void * const sbuf, MPI_Aint scount, 1400 MPI_Datatype sdt, int * smpi_errno, 1401 void * const rbuf, MPI_Aint rcount, MPI_Datatype rdt, 1402 intptr_t * rdata_sz, int * rmpi_errno); 1403 int MPIDI_CH3U_Post_data_receive(int found, MPIR_Request ** rreqp); 1404 int MPIDI_CH3U_Post_data_receive_found(MPIR_Request * rreqp); 1405 int MPIDI_CH3U_Post_data_receive_unexpected(MPIR_Request * rreqp); 1406 int MPIDI_CH3U_Receive_data_found(MPIR_Request *rreq, void *buf, intptr_t *buflen, int *complete); 1407 int MPIDI_CH3U_Receive_data_unexpected(MPIR_Request * rreq, void *buf, intptr_t *buflen, int *complete); 1408 1409 /* Initialization routine for ch3u_comm.c */ 1410 int MPIDI_CH3I_Comm_init(void); 1411 1412 int MPIDI_CH3I_Comm_handle_failed_procs(MPIR_Group *new_failed_procs); 1413 void MPIDI_CH3I_Comm_find(MPIR_Context_id_t context_id, MPIR_Comm **comm); 1414 1415 /* The functions below allow channels to register functions to be 1416 called immediately after a communicator has been created, and 1417 immediately before a communicator is to be destroyed. 1418 */ 1419 int MPIDI_CH3U_Comm_register_create_hook(int (*hook_fn)(struct MPIR_Comm *, void *), void *param); 1420 int MPIDI_CH3U_Comm_register_destroy_hook(int (*hook_fn)(struct MPIR_Comm *, void *), void *param); 1421 1422 /* FIXME: This is a macro! */ 1423 #ifndef MPIDI_CH3_Request_add_ref 1424 /*@ 1425 MPIDI_CH3_Request_add_ref - Increment the reference count associated with a 1426 request object 1427 1428 Input Parameters: 1429 . req - pointer to the request object 1430 @*/ 1431 void MPIDI_CH3_Request_add_ref(MPIR_Request * req); 1432 #endif 1433 1434 /*@ 1435 MPIDI_CH3_GetParentPort - obtain the port name associated with the parent 1436 1437 Output Parameters: 1438 . parent_port_name - the port name associated with the parent communicator 1439 1440 Return value: 1441 A MPI error code. 1442 1443 NOTE: 1444 'MPIDI_CH3_GetParentPort' should only be called if the initialization 1445 (in the current implementation, done with the static function 1446 'InitPGFromPMI' in 'mpid_init.c') has determined that this process 1447 in fact has a parent. 1448 @*/ 1449 int MPIDI_CH3_GetParentPort(char ** parent_port_name); 1450 1451 /*@ 1452 MPIDI_CH3_FreeParentPort - This routine frees the storage associated with 1453 a parent port (allocted with MPIDH_CH3_GetParentPort). 1454 1455 @*/ 1456 void MPIDI_CH3_FreeParentPort( void ); 1457 1458 /*E 1459 MPIDI_CH3_Abort - Abort this process. 1460 1461 Input Parameters: 1462 + exit_code - exit code to be returned by the process 1463 - error_msg - error message to print 1464 1465 Return value: 1466 This function should not return. 1467 1468 Notes: 1469 This routine is used only if the channel defines 1470 'MPIDI_CH3_IMPLEMENTS_ABORT'. This allows the channel to handle 1471 aborting processes, particularly when the channel does not use the standard 1472 PMI interface. 1473 E*/ 1474 int MPIDI_CH3_Abort(int exit_code, char * error_msg); 1475 1476 /* FIXME: Move these prototypes into header files in the appropriate 1477 util directories */ 1478 /* added by brad. upcalls for MPIDI_CH3_Init that contain code which could be 1479 executed by two or more channels */ 1480 int MPIDI_CH3U_Init_sock(int has_parent, MPIDI_PG_t * pg_p, int pg_rank, 1481 char **bc_val_p, int *val_max_sz_p); 1482 1483 /* added by brad. business card related global and functions */ 1484 /* FIXME: Make these part of the channel support headers */ 1485 #define MAX_HOST_DESCRIPTION_LEN 256 1486 int MPIDI_CH3U_Get_business_card_sock(int myRank, 1487 char **bc_val_p, int *val_max_sz_p); 1488 1489 int MPIDI_CH3_Get_business_card(int myRank, char *value, int length); 1490 1491 /* 1492 * Channel upcall prototypes 1493 */ 1494 1495 /*E 1496 MPIDI_CH3U_Handle_recv_pkt- Handle a freshly received CH3 packet. 1497 1498 Input Parameters: 1499 + vc - virtual connection over which the packet was received 1500 - pkt - pointer to the CH3 packet header 1501 - data - pointer to the start address of data 1502 1503 Output Parameter: 1504 . rreqp - receive request defining data to be received; may be NULL 1505 1506 NOTE: 1507 Multiple threads may not simultaneously call this routine with the same 1508 virtual connection. This constraint eliminates the 1509 need to lock the VC and thus improves performance. If simultaneous upcalls 1510 for a single VC are a possible, then the calling 1511 routine must serialize the calls (perhaps by locking the VC). Special 1512 consideration may need to be given to packet ordering 1513 if the channel has made guarantees about ordering. 1514 E*/ 1515 int MPIDI_CH3U_Handle_recv_pkt(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt, void *data, 1516 intptr_t *buflen, MPIR_Request ** rreqp); 1517 1518 /*@ 1519 MPIDI_CH3U_Handle_recv_req - Process a receive request for which all of the 1520 data has been received (and copied) into the 1521 buffers described by the request's IOV. 1522 1523 Input Parameters: 1524 + vc - virtual connection over which the data was received 1525 - rreq - pointer to the receive request object 1526 1527 Output Parameter: 1528 . complete - data transfer for the request has completed 1529 @*/ 1530 int MPIDI_CH3U_Handle_recv_req(MPIDI_VC_t * vc, MPIR_Request * rreq, 1531 int * complete); 1532 1533 /* Handle_send_req invokes the action (method/function) when data 1534 becomes available. It is an obsolete routine; the completion 1535 function should be invoked directly. */ 1536 int MPIDI_CH3U_Handle_send_req(MPIDI_VC_t * vc, MPIR_Request * sreq, 1537 int *complete); 1538 1539 int MPIDI_CH3U_Handle_connection(MPIDI_VC_t * vc, MPIDI_VC_Event_t event); 1540 1541 int MPIDI_CH3U_VC_SendClose( MPIDI_VC_t *vc, int rank ); 1542 int MPIDI_CH3U_VC_WaitForClose( void ); 1543 #ifdef MPIDI_CH3_HAS_CHANNEL_CLOSE 1544 int MPIDI_CH3_Channel_close( void ); 1545 #else 1546 #define MPIDI_CH3_Channel_close( ) MPI_SUCCESS 1547 #endif 1548 1549 /* MPIDI_CH3U_Get_failed_group() generates a group of failed processes based 1550 * on the last list generated during MPIDI_CH3U_Check_for_failed_procs */ 1551 int MPIDI_CH3U_Get_failed_group(int last_rank, MPIR_Group **failed_group); 1552 /* MPIDI_CH3U_Check_for_failed_procs() reads PMI_dead_processes key 1553 and marks VCs to those processes as failed */ 1554 int MPIDI_CH3U_Check_for_failed_procs(void); 1555 1556 /*@ 1557 MPIDI_CH3_Pre_init - Allows the channel to initialize before PMI_init is 1558 called, and allows the 1559 channel to optionally set the rank, size, and whether this process has a 1560 parent. 1561 1562 Output Parameters: 1563 + setvals - boolean value that is true if this function set has_parent, rank, 1564 and size 1565 . has_parent - boolean value that is true if this MPI job was spawned by 1566 another set of MPI processes 1567 . rank - rank of this process in the process group 1568 - size - number of processes in the process group 1569 1570 Return value: 1571 A MPI error code. 1572 1573 Notes: 1574 This function is optional, and is used only when HAVE_CH3_PRE_INIT is 1575 defined. It is called by CH3 before PMI_Init. If the function sets setvals 1576 to TRUE, CH3 will not use PMI to get the rank, size, etc. 1577 @*/ 1578 int MPIDI_CH3_Pre_init (int *setvals, int *has_parent, int *rank, int *size); 1579 1580 /*@ 1581 MPIDI_CH3_Init - Initialize the channel implementation. 1582 1583 Input Parameters: 1584 + has_parent - boolean value that is true if this MPI job was spawned by 1585 another set of MPI processes 1586 . pg_ptr - the new process group representing MPI_COMM_WORLD 1587 - pg_rank - my rank in the process group 1588 1589 Return value: 1590 A MPI error code. 1591 1592 Notes: 1593 MPID_Init has called 'PMI_Init' and created the process group structure 1594 before this routine is called. 1595 @*/ 1596 int MPIDI_CH3_Init(int has_parent, MPIDI_PG_t *pg_ptr, int pg_rank ); 1597 1598 /*@ 1599 MPIDI_CH3_Finalize - Shutdown the channel implementation. 1600 1601 Return value: 1602 A MPI error class. 1603 @*/ 1604 int MPIDI_CH3_Finalize(void); 1605 1606 /*@ 1607 MPIDI_CH3_VC_Init - Perform channel-specific initialization of a VC 1608 1609 Input Parameter: 1610 . vc - Virtual connection to initialize 1611 @*/ 1612 int MPIDI_CH3_VC_Init( struct MPIDI_VC *vc ); 1613 1614 /*@ 1615 MPIDI_CH3_PG_Destroy - Perform any channel-specific actions when freeing 1616 a process group 1617 1618 Input Parameter: 1619 . pg - Process group on which to act 1620 @*/ 1621 int MPIDI_CH3_PG_Destroy( struct MPIDI_PG *pg ); 1622 1623 /*@ MPIDI_CH3_VC_Destroy - Perform and channel-specific actions when freeing a 1624 virtual connection. 1625 1626 Input Parameter: 1627 . vc - Virtual connection on which to act 1628 @*/ 1629 int MPIDI_CH3_VC_Destroy( struct MPIDI_VC *vc ); 1630 1631 /*@ MPIDI_CH3_InitCompleted - Perform any channel-specific initialization 1632 actions after MPID_Init but before MPI_Init (or MPI_Initthread) returns 1633 @*/ 1634 int MPIDI_CH3_InitCompleted( void ); 1635 1636 #ifdef MPIDI_CH3_HASIMPL_HEADER 1637 #include "mpidi_ch3_mpid.h" 1638 #endif 1639 /* Routines in support of ch3 */ 1640 1641 #ifndef MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS 1642 /* Routine to return the tag associated with a port */ 1643 int MPIDI_GetTagFromPort( const char *, int * ); 1644 #else 1645 /* Need empty symbol to avoid failure at compile time if defined 1646 * MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS. */ 1647 #define MPIDI_GetTagFromPort(port_name, port_name_tag) (MPI_SUCCESS) 1648 #endif 1649 1650 /* Here are the packet handlers */ 1651 int MPIDI_CH3_PktHandler_EagerSend( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1652 intptr_t *, MPIR_Request ** ); 1653 #ifdef USE_EAGER_SHORT 1654 int MPIDI_CH3_PktHandler_EagerShortSend( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1655 intptr_t *, MPIR_Request ** ); 1656 #endif 1657 int MPIDI_CH3_PktHandler_ReadySend( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1658 intptr_t *, MPIR_Request ** ); 1659 int MPIDI_CH3_PktHandler_EagerSyncSend( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1660 intptr_t *, MPIR_Request ** ); 1661 int MPIDI_CH3_PktHandler_EagerSyncAck( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1662 intptr_t *, MPIR_Request ** ); 1663 int MPIDI_CH3_PktHandler_RndvReqToSend( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1664 intptr_t *, MPIR_Request ** ); 1665 int MPIDI_CH3_PktHandler_RndvClrToSend( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1666 intptr_t *, MPIR_Request ** ); 1667 int MPIDI_CH3_PktHandler_RndvSend( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1668 intptr_t *, MPIR_Request ** ); 1669 int MPIDI_CH3_PktHandler_CancelSendReq( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1670 intptr_t *, MPIR_Request ** ); 1671 int MPIDI_CH3_PktHandler_CancelSendResp( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1672 intptr_t *, MPIR_Request ** ); 1673 int MPIDI_CH3_PktHandler_Put( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1674 intptr_t *, MPIR_Request ** ); 1675 int MPIDI_CH3_PktHandler_Accumulate( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1676 intptr_t *, MPIR_Request ** ); 1677 int MPIDI_CH3_PktHandler_GetAccumulate( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1678 intptr_t *, MPIR_Request ** ); 1679 int MPIDI_CH3_PktHandler_CAS( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1680 intptr_t *, MPIR_Request ** ); 1681 int MPIDI_CH3_PktHandler_CASResp( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1682 intptr_t *, MPIR_Request ** ); 1683 int MPIDI_CH3_PktHandler_FOP( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1684 intptr_t *, MPIR_Request ** ); 1685 int MPIDI_CH3_PktHandler_FOPResp( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1686 intptr_t *, MPIR_Request ** ); 1687 int MPIDI_CH3_PktHandler_Get_AccumResp( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1688 intptr_t *, MPIR_Request ** ); 1689 int MPIDI_CH3_PktHandler_Get( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1690 intptr_t *, MPIR_Request ** ); 1691 int MPIDI_CH3_PktHandler_GetResp( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1692 intptr_t *, MPIR_Request ** ); 1693 int MPIDI_CH3_PktHandler_Lock( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1694 intptr_t *, MPIR_Request ** ); 1695 int MPIDI_CH3_PktHandler_LockAck( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1696 intptr_t *, MPIR_Request ** ); 1697 int MPIDI_CH3_PktHandler_LockOpAck( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1698 intptr_t *, MPIR_Request ** ); 1699 int MPIDI_CH3_PktHandler_Unlock( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1700 intptr_t *, MPIR_Request ** ); 1701 int MPIDI_CH3_PktHandler_Flush( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1702 intptr_t *, MPIR_Request ** ); 1703 int MPIDI_CH3_PktHandler_Ack( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1704 intptr_t *, MPIR_Request ** ); 1705 int MPIDI_CH3_PktHandler_DecrAtCnt( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1706 intptr_t *, MPIR_Request ** ); 1707 int MPIDI_CH3_PktHandler_FlowCntlUpdate( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, void *, 1708 intptr_t *, MPIR_Request ** ); 1709 int MPIDI_CH3_PktHandler_Close( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1710 intptr_t *, MPIR_Request ** ); 1711 1712 #ifndef MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS 1713 /* packet handlers used in dynamic process connection. */ 1714 int MPIDI_CH3_PktHandler_ConnAck(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt, void * data, 1715 intptr_t * buflen, MPIR_Request ** rreqp); 1716 int MPIDI_CH3_PktHandler_AcceptAck(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt, void * data, 1717 intptr_t * buflen, MPIR_Request ** rreqp); 1718 #endif /* end of MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS */ 1719 1720 int MPIDI_CH3_PktHandler_EndCH3( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, void *, 1721 intptr_t *, MPIR_Request ** ); 1722 int MPIDI_CH3_PktHandler_Revoke(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, void * data, 1723 intptr_t *buflen, MPIR_Request **rreqp); 1724 int MPIDI_CH3_PktHandler_Init( MPIDI_CH3_PktHandler_Fcn *[], int ); 1725 1726 int MPIDI_CH3I_RMA_Make_progress_global(int *made_progress); 1727 1728 #ifdef MPICH_DBG_OUTPUT 1729 int MPIDI_CH3_PktPrint_CancelSendReq( FILE *, MPIDI_CH3_Pkt_t * ); 1730 int MPIDI_CH3_PktPrint_CancelSendResp( FILE *, MPIDI_CH3_Pkt_t * ); 1731 int MPIDI_CH3_PktPrint_EagerSend( FILE *, MPIDI_CH3_Pkt_t * ); 1732 int MPIDI_CH3_PktPrint_ReadySend( FILE *, MPIDI_CH3_Pkt_t * ); 1733 int MPIDI_CH3_PktPrint_RndvReqToSend( FILE *, MPIDI_CH3_Pkt_t * ); 1734 int MPIDI_CH3_PktPrint_RndvClrToSend( FILE *, MPIDI_CH3_Pkt_t * ); 1735 int MPIDI_CH3_PktPrint_RndvSend( FILE *, MPIDI_CH3_Pkt_t * ); 1736 int MPIDI_CH3_PktPrint_EagerSyncSend( FILE *fp, MPIDI_CH3_Pkt_t *pkt ); 1737 int MPIDI_CH3_PktPrint_EagerSyncAck( FILE *fp, MPIDI_CH3_Pkt_t *pkt ); 1738 #endif 1739 1740 /* Routines to create packets (used in implementing MPI communications */ 1741 int MPIDI_CH3_EagerNoncontigSend( MPIR_Request **, MPIDI_CH3_Pkt_type_t, 1742 const void *, MPI_Aint, 1743 MPI_Datatype, int, int, MPIR_Comm *, 1744 int ); 1745 int MPIDI_CH3_EagerContigSend( MPIR_Request **, MPIDI_CH3_Pkt_type_t, 1746 const void *, intptr_t, int, 1747 int, MPIR_Comm *, int ); 1748 int MPIDI_CH3_EagerContigShortSend( MPIR_Request **, MPIDI_CH3_Pkt_type_t, 1749 const void *, intptr_t, 1750 int, int, MPIR_Comm *, int ); 1751 int MPIDI_CH3_EagerContigIsend( MPIR_Request **, MPIDI_CH3_Pkt_type_t, 1752 const void *, intptr_t, int, 1753 int, MPIR_Comm *, int ); 1754 1755 1756 int MPIDI_CH3_RndvSend( MPIR_Request **, const void *, MPI_Aint, MPI_Datatype, 1757 int, intptr_t, MPI_Aint, int, int, MPIR_Comm *, int ); 1758 1759 int MPIDI_CH3_EagerSyncNoncontigSend( MPIR_Request **, const void *, int, 1760 MPI_Datatype, intptr_t, int, MPI_Aint, 1761 int, int, MPIR_Comm *, int ); 1762 int MPIDI_CH3_EagerSyncZero(MPIR_Request **, int, int, MPIR_Comm *, int ); 1763 1764 int MPIDI_CH3_SendNoncontig_iov( struct MPIDI_VC *vc, struct MPIR_Request *sreq, 1765 void *header, intptr_t hdr_sz, 1766 struct iovec *hdr_iov, int n_hdr_iov); 1767 1768 /* Routines to ack packets, called in the receive routines when a 1769 message is matched */ 1770 int MPIDI_CH3_EagerSyncAck( MPIDI_VC_t *, MPIR_Request * ); 1771 int MPIDI_CH3_RecvFromSelf( MPIR_Request *, void *, MPI_Aint, MPI_Datatype ); 1772 int MPIDI_CH3_RecvRndv( MPIDI_VC_t *, MPIR_Request * ); 1773 1774 /* Handler routines to continuing after an IOV is processed (assigned to the 1775 OnDataAvail field in the device part of a request) */ 1776 int MPIDI_CH3_ReqHandler_RecvComplete( MPIDI_VC_t *, MPIR_Request *, int * ); 1777 int MPIDI_CH3_ReqHandler_UnpackUEBufComplete( MPIDI_VC_t *, MPIR_Request *, 1778 int * ); 1779 int MPIDI_CH3_ReqHandler_ReloadIOV( MPIDI_VC_t *, MPIR_Request *, int * ); 1780 1781 int MPIDI_CH3_ReqHandler_UnpackSRBufReloadIOV( MPIDI_VC_t *, MPIR_Request *, 1782 int * ); 1783 int MPIDI_CH3_ReqHandler_UnpackSRBufComplete( MPIDI_VC_t *, MPIR_Request *, 1784 int * ); 1785 int MPIDI_CH3_ReqHandler_PutDerivedDTRecvComplete( MPIDI_VC_t *, 1786 MPIR_Request *, int * ); 1787 int MPIDI_CH3_ReqHandler_PutRecvComplete( MPIDI_VC_t *, MPIR_Request *, 1788 int * ); 1789 int MPIDI_CH3_ReqHandler_AccumRecvComplete( MPIDI_VC_t *, MPIR_Request *, 1790 int * ); 1791 int MPIDI_CH3_ReqHandler_GaccumRecvComplete( MPIDI_VC_t *, MPIR_Request *, 1792 int * ); 1793 int MPIDI_CH3_ReqHandler_FOPRecvComplete( MPIDI_VC_t *, MPIR_Request *, 1794 int * ); 1795 int MPIDI_CH3_ReqHandler_AccumMetadataRecvComplete( MPIDI_VC_t *, 1796 MPIR_Request *, 1797 int * ); 1798 int MPIDI_CH3_ReqHandler_GaccumMetadataRecvComplete( MPIDI_VC_t *, 1799 MPIR_Request *, 1800 int * ); 1801 int MPIDI_CH3_ReqHandler_GetDerivedDTRecvComplete( MPIDI_VC_t *, 1802 MPIR_Request *, int * ); 1803 int MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete( MPIDI_VC_t *, 1804 MPIR_Request *, int * ); 1805 /* Send Handlers */ 1806 int MPIDI_CH3_ReqHandler_SendReloadIOV( MPIDI_VC_t *vc, MPIR_Request *sreq, 1807 int *complete ); 1808 int MPIDI_CH3_ReqHandler_GetSendComplete( MPIDI_VC_t *, MPIR_Request *, 1809 int * ); 1810 int MPIDI_CH3_ReqHandler_GaccumSendComplete( MPIDI_VC_t *, MPIR_Request *, 1811 int * ); 1812 int MPIDI_CH3_ReqHandler_CASSendComplete( MPIDI_VC_t *, MPIR_Request *, 1813 int * ); 1814 int MPIDI_CH3_ReqHandler_FOPSendComplete( MPIDI_VC_t *, MPIR_Request *, 1815 int * ); 1816 /* RMA operation request handler */ 1817 int MPIDI_CH3_Req_handler_rma_op_complete(MPIR_Request *); 1818 1819 #define MPIDI_CH3_GET_EAGER_THRESHOLD(eager_threshold_p, comm, vc) \ 1820 do { \ 1821 if ((comm)->hints[MPIR_COMM_HINT_EAGER_THRESH] != -1) \ 1822 *(eager_threshold_p) = (comm)->hints[MPIR_COMM_HINT_EAGER_THRESH]; \ 1823 else \ 1824 *(eager_threshold_p) = (vc)->eager_max_msg_sz; \ 1825 } while (0) 1826 1827 1828 #endif /* MPIDIMPL_H_INCLUDED */ 1829