1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
2 /*
3  *  (C) 2001 by Argonne National Laboratory.
4  *      See COPYRIGHT in top-level directory.
5  */
6 
7 #include "mpidimpl.h"
8 
9 /* FIXME: This should call a channel-provided routine to deliver the
10    cancel message, once the code decides that the request can still
11    be cancelled */
12 
13 #undef FUNCNAME
14 #define FUNCNAME MPID_Cancel_send
15 #undef FCNAME
16 #define FCNAME MPIDI_QUOTE(FUNCNAME)
MPID_Cancel_send(MPID_Request * sreq)17 int MPID_Cancel_send(MPID_Request * sreq)
18 {
19     MPIDI_VC_t * vc;
20     int proto;
21     int flag;
22     int mpi_errno = MPI_SUCCESS;
23     MPIDI_STATE_DECL(MPID_STATE_MPID_CANCEL_SEND);
24 
25     MPIDI_FUNC_ENTER(MPID_STATE_MPID_CANCEL_SEND);
26 
27     MPIU_Assert(sreq->kind == MPID_REQUEST_SEND);
28 
29     MPIDI_Request_cancel_pending(sreq, &flag);
30     if (flag)
31     {
32 	goto fn_exit;
33     }
34 
35     /*
36      * FIXME: user requests returned by MPI_Ibsend() have a NULL comm pointer
37      * and no pointer to the underlying communication
38      * request.  For now, we simply fail to cancel the request.  In the future,
39      * we should add a new request kind to indicate that
40      * the request is a BSEND.  Then we can properly cancel the request, much
41      * in the way we do persistent requests.
42      */
43     if (sreq->comm == NULL)
44     {
45 	goto fn_exit;
46     }
47 
48     MPIDI_Comm_get_vc_set_active(sreq->comm, sreq->dev.match.parts.rank, &vc);
49 
50     proto = MPIDI_Request_get_msg_type(sreq);
51 
52     if (proto == MPIDI_REQUEST_SELF_MSG)
53     {
54 	MPID_Request * rreq;
55 
56 	MPIU_DBG_MSG(CH3_OTHER,VERBOSE,
57 		     "attempting to cancel message sent to self");
58 
59 	MPIU_THREAD_CS_ENTER(MSGQUEUE,);
60 	rreq = MPIDI_CH3U_Recvq_FDU(sreq->handle, &sreq->dev.match);
61 	MPIU_THREAD_CS_EXIT(MSGQUEUE,);
62 	if (rreq)
63 	{
64 	    MPIU_Assert(rreq->partner_request == sreq);
65 
66 	    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
67              "send-to-self cancellation successful, sreq=0x%08x, rreq=0x%08x",
68 						sreq->handle, rreq->handle));
69 
70 	    MPIU_Object_set_ref(rreq, 0);
71 	    MPIDI_CH3_Request_destroy(rreq);
72 
73 	    sreq->status.cancelled = TRUE;
74 	    /* no other thread should be waiting on sreq, so it is safe to
75 	       reset ref_count and cc */
76             MPID_cc_set(&sreq->cc, 0);
77             /* FIXME should be a decr and assert, not a set */
78 	    MPIU_Object_set_ref(sreq, 1);
79 	}
80 	else
81 	{
82 	    sreq->status.cancelled = FALSE;
83 	    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
84                "send-to-self cancellation failed, sreq=0x%08x, rreq=0x%08x",
85 						sreq->handle, rreq->handle));
86 	}
87 
88 	goto fn_exit;
89     }
90 
91     /* Check to see if the send is still in the send queue.  If so, remove it,
92        mark the request and cancelled and complete, and
93        release the device's reference to the request object.
94     */
95     {
96 	int cancelled;
97 
98 	if (proto == MPIDI_REQUEST_RNDV_MSG)
99 	{
100 	    MPID_Request * rts_sreq;
101 	    /* The cancellation of the RTS request needs to be atomic through
102 	       the destruction of the RTS request to avoid
103                conflict with release of the RTS request if the CTS is received
104 	       (see handling of a rendezvous CTS packet in
105                MPIDI_CH3U_Handle_recv_pkt()).
106 	       MPID_Request_fetch_and_clear_rts_sreq() is used to gurantee
107 	       that atomicity. */
108 	    MPIDI_Request_fetch_and_clear_rts_sreq(sreq, &rts_sreq);
109 	    if (rts_sreq != NULL)
110 	    {
111 		cancelled = FALSE;
112 
113 		/* since we attempted to cancel a RTS request, then we are
114 		   responsible for releasing that request */
115 		MPID_Request_release(rts_sreq);
116 
117 		/* --BEGIN ERROR HANDLING-- */
118 		if (mpi_errno != MPI_SUCCESS)
119 		{
120 		    mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER,
121 						     "**ch3|cancelrndv", 0);
122 		    goto fn_exit;
123 		}
124 		/* --END ERROR HANDLING-- */
125 
126 		if (cancelled)
127 		{
128 		    sreq->status.cancelled = TRUE;
129 		    /* no other thread should be waiting on sreq, so it is
130 		       safe to reset ref_count and cc */
131                     MPID_cc_set(&sreq->cc, 0);
132                     /* FIXME should be a decr and assert, not a set */
133 		    MPIU_Object_set_ref(sreq, 1);
134 		    goto fn_exit;
135 		}
136 	    }
137 	}
138 	else
139 	{
140 	    cancelled = FALSE;
141 	    if (cancelled)
142 	    {
143 		sreq->status.cancelled = TRUE;
144 		/* no other thread should be waiting on sreq, so it is safe to
145 		   reset ref_count and cc */
146                 MPID_cc_set(&sreq->cc, 0);
147                 /* FIXME should be a decr and assert, not a set */
148 		MPIU_Object_set_ref(sreq, 1);
149 		goto fn_exit;
150 	    }
151 	}
152     }
153 
154     /* Part or all of the message has already been sent, so we need to send a
155        cancellation request to the receiver in an attempt
156        to catch the message before it is matched. */
157     {
158 	int was_incomplete;
159 	MPIDI_CH3_Pkt_t upkt;
160 	MPIDI_CH3_Pkt_cancel_send_req_t * const csr_pkt = &upkt.cancel_send_req;
161 	MPID_Request * csr_sreq;
162 
163 	MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
164               "sending cancel request to %d for 0x%08x",
165 	      sreq->dev.match.parts.rank, sreq->handle));
166 
167 	/* The completion counter and reference count are incremented to keep
168 	   the request around long enough to receive a
169 	   response regardless of what the user does (free the request before
170 	   waiting, etc.). */
171 	MPIDI_CH3U_Request_increment_cc(sreq, &was_incomplete);
172 	if (!was_incomplete)
173 	{
174 	    /* The reference count is incremented only if the request was
175 	       complete before the increment. */
176 	    MPIR_Request_add_ref( sreq );
177 	}
178 
179 	MPIDI_Pkt_init(csr_pkt, MPIDI_CH3_PKT_CANCEL_SEND_REQ);
180 	csr_pkt->match.parts.rank = sreq->comm->rank;
181 	csr_pkt->match.parts.tag = sreq->dev.match.parts.tag;
182 	csr_pkt->match.parts.context_id = sreq->dev.match.parts.context_id;
183 	csr_pkt->sender_req_id = sreq->handle;
184 
185 	MPIU_THREAD_CS_ENTER(CH3COMM,vc);
186 	mpi_errno = MPIDI_CH3_iStartMsg(vc, csr_pkt, sizeof(*csr_pkt), &csr_sreq);
187 	MPIU_THREAD_CS_EXIT(CH3COMM,vc);
188 	if (mpi_errno != MPI_SUCCESS) {
189 	    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|cancelreq");
190 	}
191 	if (csr_sreq != NULL)
192 	{
193 	    MPID_Request_release(csr_sreq);
194 	}
195     }
196 
197     /* FIXME: if send cancellation packets are allowed to arrive out-of-order
198        with respect to send packets, then we need to
199        timestamp send and cancel packets to insure that a cancellation request
200        does not bypass the send packet to be cancelled
201        and erroneously cancel a previously sent message with the same request
202        handle. */
203     /* FIXME: A timestamp is more than is necessary; a message sequence number
204        should be adequate. */
205  fn_fail:
206  fn_exit:
207     MPIDI_FUNC_EXIT(MPID_STATE_MPID_CANCEL_SEND);
208     return mpi_errno;
209 }
210 
211 /*
212  * Handler routines called when cancel send packets arrive
213  */
214 
MPIDI_CH3_PktHandler_CancelSendReq(MPIDI_VC_t * vc,MPIDI_CH3_Pkt_t * pkt,MPIDI_msg_sz_t * buflen,MPID_Request ** rreqp)215 int MPIDI_CH3_PktHandler_CancelSendReq( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
216 					MPIDI_msg_sz_t *buflen, MPID_Request **rreqp )
217 {
218     MPIDI_CH3_Pkt_cancel_send_req_t * req_pkt = &pkt->cancel_send_req;
219     MPID_Request * rreq;
220     int ack;
221     MPIDI_CH3_Pkt_t upkt;
222     MPIDI_CH3_Pkt_cancel_send_resp_t * resp_pkt = &upkt.cancel_send_resp;
223     MPID_Request * resp_sreq;
224     int mpi_errno = MPI_SUCCESS;
225 
226     MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
227       "received cancel send req pkt, sreq=0x%08x, rank=%d, tag=%d, context=%d",
228 		      req_pkt->sender_req_id, req_pkt->match.parts.rank,
229 		      req_pkt->match.parts.tag, req_pkt->match.parts.context_id));
230 
231     *buflen = sizeof(MPIDI_CH3_Pkt_t);
232     /* FIXME: Note that this routine is called from within the packet handler.
233        If the message queue mutex is different from the progress mutex, this
234        must be protected within a message-queue mutex */
235     rreq = MPIDI_CH3U_Recvq_FDU(req_pkt->sender_req_id, &req_pkt->match);
236     if (rreq != NULL)
237     {
238 	MPIU_DBG_MSG(CH3_OTHER,TYPICAL,"message cancelled");
239 	if (MPIDI_Request_get_msg_type(rreq) == MPIDI_REQUEST_EAGER_MSG && rreq->dev.recv_data_sz > 0)
240 	{
241 	    MPIU_Free(rreq->dev.tmpbuf);
242 	}
243 	MPID_Request_release(rreq);
244 	ack = TRUE;
245     }
246     else
247     {
248 	MPIU_DBG_MSG(CH3_OTHER,TYPICAL,"unable to cancel message");
249 	ack = FALSE;
250     }
251 
252     MPIDI_Pkt_init(resp_pkt, MPIDI_CH3_PKT_CANCEL_SEND_RESP);
253     resp_pkt->sender_req_id = req_pkt->sender_req_id;
254     resp_pkt->ack = ack;
255     /* FIXME: This is called within the packet handler */
256     /* MPIU_THREAD_CS_ENTER(CH3COMM,vc); */
257     mpi_errno = MPIDI_CH3_iStartMsg(vc, resp_pkt, sizeof(*resp_pkt), &resp_sreq);
258     /* MPIU_THREAD_CS_EXIT(CH3COMM,vc); */
259     if (mpi_errno != MPI_SUCCESS) {
260 	MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,
261 			    "**ch3|cancelresp");
262     }
263     if (resp_sreq != NULL)
264     {
265 	MPID_Request_release(resp_sreq);
266     }
267 
268     *rreqp = NULL;
269 
270  fn_fail:
271     return mpi_errno;
272 }
273 
MPIDI_CH3_PktHandler_CancelSendResp(MPIDI_VC_t * vc ATTRIBUTE ((unused)),MPIDI_CH3_Pkt_t * pkt,MPIDI_msg_sz_t * buflen,MPID_Request ** rreqp)274 int MPIDI_CH3_PktHandler_CancelSendResp( MPIDI_VC_t *vc ATTRIBUTE((unused)),
275 					 MPIDI_CH3_Pkt_t *pkt,
276 					 MPIDI_msg_sz_t *buflen, MPID_Request **rreqp )
277 {
278     MPIDI_CH3_Pkt_cancel_send_resp_t * resp_pkt = &pkt->cancel_send_resp;
279     MPID_Request * sreq;
280 
281     MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
282 			"received cancel send resp pkt, sreq=0x%08x, ack=%d",
283 			resp_pkt->sender_req_id, resp_pkt->ack));
284 
285     *buflen = sizeof(MPIDI_CH3_Pkt_t);
286 
287     MPID_Request_get_ptr(resp_pkt->sender_req_id, sreq);
288 
289     if (resp_pkt->ack)
290     {
291 	sreq->status.cancelled = TRUE;
292 
293 	if (MPIDI_Request_get_msg_type(sreq) == MPIDI_REQUEST_RNDV_MSG ||
294 	    MPIDI_Request_get_type(sreq) == MPIDI_REQUEST_TYPE_SSEND)
295 	{
296 	    int cc;
297 
298 	    /* decrement the CC one additional time for the CTS/sync ack that
299 	       is never going to arrive */
300 	    MPIDI_CH3U_Request_decrement_cc(sreq, &cc);
301 	}
302 
303 	MPIU_DBG_MSG(CH3_OTHER,TYPICAL,"message cancelled");
304     }
305     else
306     {
307 	sreq->status.cancelled = FALSE;
308 	MPIU_DBG_MSG(CH3_OTHER,TYPICAL,"unable to cancel message");
309     }
310 
311     MPIDI_CH3U_Request_complete(sreq);
312 
313     *rreqp = NULL;
314 
315     return MPI_SUCCESS;
316 }
317 
318 /*
319  * Define the routines that can print out the cancel packets if
320  * debugging is enabled.
321  */
322 #ifdef MPICH_DBG_OUTPUT
MPIDI_CH3_PktPrint_CancelSendReq(FILE * fp,MPIDI_CH3_Pkt_t * pkt)323 int MPIDI_CH3_PktPrint_CancelSendReq( FILE *fp, MPIDI_CH3_Pkt_t *pkt )
324 {
325     MPIU_DBG_PRINTF((" type ......... CANCEL_SEND\n"));
326     MPIU_DBG_PRINTF((" sender_reqid . 0x%08X\n", pkt->cancel_send_req.sender_req_id));
327     MPIU_DBG_PRINTF((" context_id ... %d\n", pkt->cancel_send_req.match.parts.context_id));
328     MPIU_DBG_PRINTF((" tag .......... %d\n", pkt->cancel_send_req.match.parts.tag));
329     MPIU_DBG_PRINTF((" rank ......... %d\n", pkt->cancel_send_req.match.parts.rank));
330 
331     return MPI_SUCCESS;
332 }
333 
MPIDI_CH3_PktPrint_CancelSendResp(FILE * fp,MPIDI_CH3_Pkt_t * pkt)334 int MPIDI_CH3_PktPrint_CancelSendResp( FILE *fp, MPIDI_CH3_Pkt_t *pkt )
335 {
336     MPIU_DBG_PRINTF((" type ......... CANCEL_SEND_RESP\n"));
337     MPIU_DBG_PRINTF((" sender_reqid . 0x%08X\n", pkt->cancel_send_resp.sender_req_id));
338     MPIU_DBG_PRINTF((" ack .......... %d\n", pkt->cancel_send_resp.ack));
339 
340     return MPI_SUCCESS;
341 }
342 #endif
343