xref: /dragonfly/sys/kern/kern_dmsg.c (revision 666e46d7)
1 /*-
2  * Copyright (c) 2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/kernel.h>
37 #include <sys/conf.h>
38 #include <sys/systm.h>
39 #include <sys/queue.h>
40 #include <sys/tree.h>
41 #include <sys/malloc.h>
42 #include <sys/mount.h>
43 #include <sys/socket.h>
44 #include <sys/vnode.h>
45 #include <sys/file.h>
46 #include <sys/proc.h>
47 #include <sys/priv.h>
48 #include <sys/thread.h>
49 #include <sys/globaldata.h>
50 #include <sys/limits.h>
51 
52 #include <sys/dmsg.h>
53 
54 RB_GENERATE(kdmsg_state_tree, kdmsg_state, rbnode, kdmsg_state_cmp);
55 
56 static void kdmsg_iocom_thread_rd(void *arg);
57 static void kdmsg_iocom_thread_wr(void *arg);
58 
59 /*
60  * Initialize the roll-up communications structure for a network
61  * messaging session.  This function does not install the socket.
62  */
63 void
64 kdmsg_iocom_init(kdmsg_iocom_t *iocom, void *handle,
65 		 struct malloc_type *mmsg,
66 		 int (*lnk_rcvmsg)(kdmsg_msg_t *msg),
67 		 int (*dbg_rcvmsg)(kdmsg_msg_t *msg),
68 		 int (*misc_rcvmsg)(kdmsg_msg_t *msg))
69 {
70 	bzero(iocom, sizeof(*iocom));
71 	iocom->handle = handle;
72 	iocom->mmsg = mmsg;
73 	iocom->lnk_rcvmsg = lnk_rcvmsg;
74 	iocom->dbg_rcvmsg = dbg_rcvmsg;
75 	iocom->misc_rcvmsg = misc_rcvmsg;
76 	iocom->router.iocom = iocom;
77 	lockinit(&iocom->msglk, "h2msg", 0, 0);
78 	TAILQ_INIT(&iocom->msgq);
79 	RB_INIT(&iocom->staterd_tree);
80 	RB_INIT(&iocom->statewr_tree);
81 }
82 
83 /*
84  * [Re]connect using the passed file pointer.  The caller must ref the
85  * fp for us.  We own that ref now.
86  */
87 void
88 kdmsg_iocom_reconnect(kdmsg_iocom_t *iocom, struct file *fp,
89 		      const char *subsysname)
90 {
91 	/*
92 	 * Destroy the current connection
93 	 */
94 	atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL);
95 	while (iocom->msgrd_td || iocom->msgwr_td) {
96 		wakeup(&iocom->msg_ctl);
97 		tsleep(iocom, 0, "clstrkl", hz);
98 	}
99 
100 	/*
101 	 * Drop communications descriptor
102 	 */
103 	if (iocom->msg_fp) {
104 		fdrop(iocom->msg_fp);
105 		iocom->msg_fp = NULL;
106 	}
107 	kprintf("RESTART CONNECTION\n");
108 
109 	/*
110 	 * Setup new communications descriptor
111 	 */
112 	iocom->msg_ctl = 0;
113 	iocom->msg_fp = fp;
114 	iocom->msg_seq = 0;
115 
116 	lwkt_create(kdmsg_iocom_thread_rd, iocom, &iocom->msgrd_td,
117 		    NULL, 0, -1, "%s-msgrd", subsysname);
118 	lwkt_create(kdmsg_iocom_thread_wr, iocom, &iocom->msgwr_td,
119 		    NULL, 0, -1, "%s-msgwr", subsysname);
120 }
121 
122 /*
123  * Disconnect and clean up
124  */
125 void
126 kdmsg_iocom_uninit(kdmsg_iocom_t *iocom)
127 {
128 	/*
129 	 * Ask the cluster controller to go away
130 	 */
131 	atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL);
132 
133 	while (iocom->msgrd_td || iocom->msgwr_td) {
134 		wakeup(&iocom->msg_ctl);
135 		tsleep(iocom, 0, "clstrkl", hz);
136 	}
137 
138 	/*
139 	 * Drop communications descriptor
140 	 */
141 	if (iocom->msg_fp) {
142 		fdrop(iocom->msg_fp);
143 		iocom->msg_fp = NULL;
144 	}
145 }
146 
147 /*
148  * Cluster controller thread.  Perform messaging functions.  We have one
149  * thread for the reader and one for the writer.  The writer handles
150  * shutdown requests (which should break the reader thread).
151  */
152 static
153 void
154 kdmsg_iocom_thread_rd(void *arg)
155 {
156 	kdmsg_iocom_t *iocom = arg;
157 	dmsg_hdr_t hdr;
158 	kdmsg_msg_t *msg;
159 	kdmsg_state_t *state;
160 	size_t hbytes;
161 	int error = 0;
162 
163 	while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILL) == 0) {
164 		/*
165 		 * Retrieve the message from the pipe or socket.
166 		 */
167 		error = fp_read(iocom->msg_fp, &hdr, sizeof(hdr),
168 				NULL, 1, UIO_SYSSPACE);
169 		if (error)
170 			break;
171 		if (hdr.magic != DMSG_HDR_MAGIC) {
172 			kprintf("kdmsg: bad magic: %04x\n", hdr.magic);
173 			error = EINVAL;
174 			break;
175 		}
176 		hbytes = (hdr.cmd & DMSGF_SIZE) * DMSG_ALIGN;
177 		if (hbytes < sizeof(hdr) || hbytes > DMSG_AUX_MAX) {
178 			kprintf("kdmsg: bad header size %zd\n", hbytes);
179 			error = EINVAL;
180 			break;
181 		}
182 		/* XXX messy: mask cmd to avoid allocating state */
183 		msg = kdmsg_msg_alloc(&iocom->router,
184 					hdr.cmd & DMSGF_BASECMDMASK,
185 					NULL, NULL);
186 		msg->any.head = hdr;
187 		msg->hdr_size = hbytes;
188 		if (hbytes > sizeof(hdr)) {
189 			error = fp_read(iocom->msg_fp, &msg->any.head + 1,
190 					hbytes - sizeof(hdr),
191 					NULL, 1, UIO_SYSSPACE);
192 			if (error) {
193 				kprintf("kdmsg: short msg received\n");
194 				error = EINVAL;
195 				break;
196 			}
197 		}
198 		msg->aux_size = hdr.aux_bytes * DMSG_ALIGN;
199 		if (msg->aux_size > DMSG_AUX_MAX) {
200 			kprintf("kdmsg: illegal msg payload size %zd\n",
201 				msg->aux_size);
202 			error = EINVAL;
203 			break;
204 		}
205 		if (msg->aux_size) {
206 			msg->aux_data = kmalloc(msg->aux_size, iocom->mmsg,
207 						M_WAITOK | M_ZERO);
208 			error = fp_read(iocom->msg_fp, msg->aux_data,
209 					msg->aux_size,
210 					NULL, 1, UIO_SYSSPACE);
211 			if (error) {
212 				kprintf("kdmsg: short msg payload received\n");
213 				break;
214 			}
215 		}
216 
217 		/*
218 		 * State machine tracking, state assignment for msg,
219 		 * returns error and discard status.  Errors are fatal
220 		 * to the connection except for EALREADY which forces
221 		 * a discard without execution.
222 		 */
223 		error = kdmsg_state_msgrx(msg);
224 		if (error) {
225 			/*
226 			 * Raw protocol or connection error
227 			 */
228 			kdmsg_msg_free(msg);
229 			if (error == EALREADY)
230 				error = 0;
231 		} else if (msg->state && msg->state->func) {
232 			/*
233 			 * Message related to state which already has a
234 			 * handling function installed for it.
235 			 */
236 			error = msg->state->func(msg->state, msg);
237 			kdmsg_state_cleanuprx(msg);
238 		} else if ((msg->any.head.cmd & DMSGF_PROTOS) ==
239 			   DMSG_PROTO_LNK) {
240 			/*
241 			 * Message related to the LNK protocol set
242 			 */
243 			error = iocom->lnk_rcvmsg(msg);
244 			kdmsg_state_cleanuprx(msg);
245 		} else if ((msg->any.head.cmd & DMSGF_PROTOS) ==
246 			   DMSG_PROTO_DBG) {
247 			/*
248 			 * Message related to the DBG protocol set
249 			 */
250 			error = iocom->dbg_rcvmsg(msg);
251 			kdmsg_state_cleanuprx(msg);
252 		} else {
253 			/*
254 			 * Other higher-level messages (e.g. vnops)
255 			 */
256 			error = iocom->misc_rcvmsg(msg);
257 			kdmsg_state_cleanuprx(msg);
258 		}
259 		msg = NULL;
260 	}
261 
262 	if (error)
263 		kprintf("kdmsg: read failed error %d\n", error);
264 
265 	lockmgr(&iocom->msglk, LK_EXCLUSIVE);
266 	if (msg) {
267 		if (msg->state && msg->state->msg == msg)
268 			msg->state->msg = NULL;
269 		kdmsg_msg_free(msg);
270 	}
271 
272 	if ((state = iocom->freerd_state) != NULL) {
273 		iocom->freerd_state = NULL;
274 		kdmsg_state_free(state);
275 	}
276 
277 	/*
278 	 * Shutdown the socket before waiting for the transmit side.
279 	 *
280 	 * If we are dying due to e.g. a socket disconnect verses being
281 	 * killed explicity we have to set KILL in order to kick the tx
282 	 * side when it might not have any other work to do.  KILL might
283 	 * already be set if we are in an unmount or reconnect.
284 	 */
285 	fp_shutdown(iocom->msg_fp, SHUT_RDWR);
286 
287 	atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL);
288 	wakeup(&iocom->msg_ctl);
289 
290 	/*
291 	 * Wait for the transmit side to drain remaining messages
292 	 * before cleaning up the rx state.  The transmit side will
293 	 * set KILLTX and wait for the rx side to completely finish
294 	 * (set msgrd_td to NULL) before cleaning up any remaining
295 	 * tx states.
296 	 */
297 	lockmgr(&iocom->msglk, LK_RELEASE);
298 	atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLRX);
299 	wakeup(&iocom->msg_ctl);
300 	while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILLTX) == 0) {
301 		wakeup(&iocom->msg_ctl);
302 		tsleep(iocom, 0, "clstrkw", hz);
303 	}
304 
305 	iocom->msgrd_td = NULL;
306 
307 	/*
308 	 * iocom can be ripped out from under us at this point but
309 	 * wakeup() is safe.
310 	 */
311 	wakeup(iocom);
312 	lwkt_exit();
313 }
314 
315 static
316 void
317 kdmsg_iocom_thread_wr(void *arg)
318 {
319 	kdmsg_iocom_t *iocom = arg;
320 	kdmsg_msg_t *msg;
321 	kdmsg_state_t *state;
322 	ssize_t res;
323 	int error = 0;
324 	int retries = 20;
325 
326 	/*
327 	 * Transmit loop
328 	 */
329 	msg = NULL;
330 	lockmgr(&iocom->msglk, LK_EXCLUSIVE);
331 
332 	while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILL) == 0 && error == 0) {
333 		/*
334 		 * Sleep if no messages pending.  Interlock with flag while
335 		 * holding msglk.
336 		 */
337 		if (TAILQ_EMPTY(&iocom->msgq)) {
338 			atomic_set_int(&iocom->msg_ctl,
339 				       KDMSG_CLUSTERCTL_SLEEPING);
340 			lksleep(&iocom->msg_ctl, &iocom->msglk, 0, "msgwr", hz);
341 			atomic_clear_int(&iocom->msg_ctl,
342 					 KDMSG_CLUSTERCTL_SLEEPING);
343 		}
344 
345 		while ((msg = TAILQ_FIRST(&iocom->msgq)) != NULL) {
346 			/*
347 			 * Remove msg from the transmit queue and do
348 			 * persist and half-closed state handling.
349 			 */
350 			TAILQ_REMOVE(&iocom->msgq, msg, qentry);
351 			lockmgr(&iocom->msglk, LK_RELEASE);
352 
353 			error = kdmsg_state_msgtx(msg);
354 			if (error == EALREADY) {
355 				error = 0;
356 				kdmsg_msg_free(msg);
357 				lockmgr(&iocom->msglk, LK_EXCLUSIVE);
358 				continue;
359 			}
360 			if (error) {
361 				kdmsg_msg_free(msg);
362 				lockmgr(&iocom->msglk, LK_EXCLUSIVE);
363 				break;
364 			}
365 
366 			/*
367 			 * Dump the message to the pipe or socket.
368 			 */
369 			error = fp_write(iocom->msg_fp, &msg->any,
370 					 msg->hdr_size, &res, UIO_SYSSPACE);
371 			if (error || res != msg->hdr_size) {
372 				if (error == 0)
373 					error = EINVAL;
374 				lockmgr(&iocom->msglk, LK_EXCLUSIVE);
375 				break;
376 			}
377 			if (msg->aux_size) {
378 				error = fp_write(iocom->msg_fp,
379 						 msg->aux_data, msg->aux_size,
380 						 &res, UIO_SYSSPACE);
381 				if (error || res != msg->aux_size) {
382 					if (error == 0)
383 						error = EINVAL;
384 					lockmgr(&iocom->msglk, LK_EXCLUSIVE);
385 					break;
386 				}
387 			}
388 			kdmsg_state_cleanuptx(msg);
389 			lockmgr(&iocom->msglk, LK_EXCLUSIVE);
390 		}
391 	}
392 
393 	/*
394 	 * Cleanup messages pending transmission and release msgq lock.
395 	 */
396 	if (error)
397 		kprintf("kdmsg: write failed error %d\n", error);
398 
399 	if (msg) {
400 		if (msg->state && msg->state->msg == msg)
401 			msg->state->msg = NULL;
402 		kdmsg_msg_free(msg);
403 	}
404 
405 	/*
406 	 * Shutdown the socket.  This will cause the rx thread to get an
407 	 * EOF and ensure that both threads get to a termination state.
408 	 */
409 	fp_shutdown(iocom->msg_fp, SHUT_RDWR);
410 
411 	/*
412 	 * Set KILLTX (which the rx side waits for), then wait for the RX
413 	 * side to completely finish before we clean out any remaining
414 	 * command states.
415 	 */
416 	lockmgr(&iocom->msglk, LK_RELEASE);
417 	atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLTX);
418 	wakeup(&iocom->msg_ctl);
419 	while (iocom->msgrd_td) {
420 		wakeup(&iocom->msg_ctl);
421 		tsleep(iocom, 0, "clstrkw", hz);
422 	}
423 	lockmgr(&iocom->msglk, LK_EXCLUSIVE);
424 
425 	/*
426 	 * Simulate received MSGF_DELETE's for any remaining states.
427 	 */
428 cleanuprd:
429 	RB_FOREACH(state, kdmsg_state_tree, &iocom->staterd_tree) {
430 		if (state->func &&
431 		    (state->rxcmd & DMSGF_DELETE) == 0) {
432 			lockmgr(&iocom->msglk, LK_RELEASE);
433 			msg = kdmsg_msg_alloc(&iocom->router, DMSG_LNK_ERROR,
434 					      NULL, NULL);
435 			if ((state->rxcmd & DMSGF_CREATE) == 0)
436 				msg->any.head.cmd |= DMSGF_CREATE;
437 			msg->any.head.cmd |= DMSGF_DELETE;
438 			msg->state = state;
439 			state->rxcmd = msg->any.head.cmd &
440 				       ~DMSGF_DELETE;
441 			msg->state->func(state, msg);
442 			kdmsg_state_cleanuprx(msg);
443 			lockmgr(&iocom->msglk, LK_EXCLUSIVE);
444 			goto cleanuprd;
445 		}
446 		if (state->func == NULL) {
447 			state->flags &= ~KDMSG_STATE_INSERTED;
448 			RB_REMOVE(kdmsg_state_tree,
449 				  &iocom->staterd_tree, state);
450 			kdmsg_state_free(state);
451 			goto cleanuprd;
452 		}
453 	}
454 
455 	/*
456 	 * NOTE: We have to drain the msgq to handle situations
457 	 *	 where received states have built up output
458 	 *	 messages, to avoid creating messages with
459 	 *	 duplicate CREATE/DELETE flags.
460 	 */
461 cleanupwr:
462 	kdmsg_drain_msgq(iocom);
463 	RB_FOREACH(state, kdmsg_state_tree, &iocom->statewr_tree) {
464 		if (state->func &&
465 		    (state->rxcmd & DMSGF_DELETE) == 0) {
466 			lockmgr(&iocom->msglk, LK_RELEASE);
467 			msg = kdmsg_msg_alloc(&iocom->router, DMSG_LNK_ERROR,
468 					      NULL, NULL);
469 			if ((state->rxcmd & DMSGF_CREATE) == 0)
470 				msg->any.head.cmd |= DMSGF_CREATE;
471 			msg->any.head.cmd |= DMSGF_DELETE |
472 					     DMSGF_REPLY;
473 			msg->state = state;
474 			state->rxcmd = msg->any.head.cmd &
475 				       ~DMSGF_DELETE;
476 			msg->state->func(state, msg);
477 			kdmsg_state_cleanuprx(msg);
478 			lockmgr(&iocom->msglk, LK_EXCLUSIVE);
479 			goto cleanupwr;
480 		}
481 		if (state->func == NULL) {
482 			state->flags &= ~KDMSG_STATE_INSERTED;
483 			RB_REMOVE(kdmsg_state_tree,
484 				  &iocom->statewr_tree, state);
485 			kdmsg_state_free(state);
486 			goto cleanupwr;
487 		}
488 	}
489 
490 	kdmsg_drain_msgq(iocom);
491 	if (--retries == 0)
492 		panic("kdmsg: comm thread shutdown couldn't drain");
493 	if (RB_ROOT(&iocom->statewr_tree))
494 		goto cleanupwr;
495 
496 	if ((state = iocom->freewr_state) != NULL) {
497 		iocom->freewr_state = NULL;
498 		kdmsg_state_free(state);
499 	}
500 
501 	lockmgr(&iocom->msglk, LK_RELEASE);
502 
503 	/*
504 	 * The state trees had better be empty now
505 	 */
506 	KKASSERT(RB_EMPTY(&iocom->staterd_tree));
507 	KKASSERT(RB_EMPTY(&iocom->statewr_tree));
508 	KKASSERT(iocom->conn_state == NULL);
509 
510 	if (iocom->exit_func) {
511 		/*
512 		 * iocom is invalid after we call the exit function.
513 		 */
514 		iocom->msgwr_td = NULL;
515 		iocom->exit_func(iocom);
516 	} else {
517 		/*
518 		 * iocom can be ripped out from under us once msgwr_td is
519 		 * set to NULL.  The wakeup is safe.
520 		 */
521 		iocom->msgwr_td = NULL;
522 		wakeup(iocom);
523 	}
524 	lwkt_exit();
525 }
526 
527 /*
528  * This cleans out the pending transmit message queue, adjusting any
529  * persistent states properly in the process.
530  *
531  * Caller must hold pmp->iocom.msglk
532  */
533 void
534 kdmsg_drain_msgq(kdmsg_iocom_t *iocom)
535 {
536 	kdmsg_msg_t *msg;
537 
538 	/*
539 	 * Clean out our pending transmit queue, executing the
540 	 * appropriate state adjustments.  If this tries to open
541 	 * any new outgoing transactions we have to loop up and
542 	 * clean them out.
543 	 */
544 	while ((msg = TAILQ_FIRST(&iocom->msgq)) != NULL) {
545 		TAILQ_REMOVE(&iocom->msgq, msg, qentry);
546 		lockmgr(&iocom->msglk, LK_RELEASE);
547 		if (msg->state && msg->state->msg == msg)
548 			msg->state->msg = NULL;
549 		if (kdmsg_state_msgtx(msg))
550 			kdmsg_msg_free(msg);
551 		else
552 			kdmsg_state_cleanuptx(msg);
553 		lockmgr(&iocom->msglk, LK_EXCLUSIVE);
554 	}
555 }
556 
557 /*
558  * Process state tracking for a message after reception, prior to
559  * execution.
560  *
561  * Called with msglk held and the msg dequeued.
562  *
563  * All messages are called with dummy state and return actual state.
564  * (One-off messages often just return the same dummy state).
565  *
566  * May request that caller discard the message by setting *discardp to 1.
567  * The returned state is not used in this case and is allowed to be NULL.
568  *
569  * --
570  *
571  * These routines handle persistent and command/reply message state via the
572  * CREATE and DELETE flags.  The first message in a command or reply sequence
573  * sets CREATE, the last message in a command or reply sequence sets DELETE.
574  *
575  * There can be any number of intermediate messages belonging to the same
576  * sequence sent inbetween the CREATE message and the DELETE message,
577  * which set neither flag.  This represents a streaming command or reply.
578  *
579  * Any command message received with CREATE set expects a reply sequence to
580  * be returned.  Reply sequences work the same as command sequences except the
581  * REPLY bit is also sent.  Both the command side and reply side can
582  * degenerate into a single message with both CREATE and DELETE set.  Note
583  * that one side can be streaming and the other side not, or neither, or both.
584  *
585  * The msgid is unique for the initiator.  That is, two sides sending a new
586  * message can use the same msgid without colliding.
587  *
588  * --
589  *
590  * ABORT sequences work by setting the ABORT flag along with normal message
591  * state.  However, ABORTs can also be sent on half-closed messages, that is
592  * even if the command or reply side has already sent a DELETE, as long as
593  * the message has not been fully closed it can still send an ABORT+DELETE
594  * to terminate the half-closed message state.
595  *
596  * Since ABORT+DELETEs can race we silently discard ABORT's for message
597  * state which has already been fully closed.  REPLY+ABORT+DELETEs can
598  * also race, and in this situation the other side might have already
599  * initiated a new unrelated command with the same message id.  Since
600  * the abort has not set the CREATE flag the situation can be detected
601  * and the message will also be discarded.
602  *
603  * Non-blocking requests can be initiated with ABORT+CREATE[+DELETE].
604  * The ABORT request is essentially integrated into the command instead
605  * of being sent later on.  In this situation the command implementation
606  * detects that CREATE and ABORT are both set (vs ABORT alone) and can
607  * special-case non-blocking operation for the command.
608  *
609  * NOTE!  Messages with ABORT set without CREATE or DELETE are considered
610  *	  to be mid-stream aborts for command/reply sequences.  ABORTs on
611  *	  one-way messages are not supported.
612  *
613  * NOTE!  If a command sequence does not support aborts the ABORT flag is
614  *	  simply ignored.
615  *
616  * --
617  *
618  * One-off messages (no reply expected) are sent with neither CREATE or DELETE
619  * set.  One-off messages cannot be aborted and typically aren't processed
620  * by these routines.  The REPLY bit can be used to distinguish whether a
621  * one-off message is a command or reply.  For example, one-off replies
622  * will typically just contain status updates.
623  */
624 int
625 kdmsg_state_msgrx(kdmsg_msg_t *msg)
626 {
627 	kdmsg_iocom_t *iocom;
628 	kdmsg_state_t *state;
629 	int error;
630 
631 	iocom = msg->router->iocom;
632 
633 	/*
634 	 * XXX resolve msg->any.head.source and msg->any.head.target
635 	 *     into LNK_SPAN references.
636 	 *
637 	 * XXX replace msg->router
638 	 */
639 
640 	/*
641 	 * Make sure a state structure is ready to go in case we need a new
642 	 * one.  This is the only routine which uses freerd_state so no
643 	 * races are possible.
644 	 */
645 	if ((state = iocom->freerd_state) == NULL) {
646 		state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO);
647 		state->flags = KDMSG_STATE_DYNAMIC;
648 		iocom->freerd_state = state;
649 	}
650 
651 	/*
652 	 * Lock RB tree and locate existing persistent state, if any.
653 	 *
654 	 * If received msg is a command state is on staterd_tree.
655 	 * If received msg is a reply state is on statewr_tree.
656 	 */
657 	lockmgr(&iocom->msglk, LK_EXCLUSIVE);
658 
659 	state->msgid = msg->any.head.msgid;
660 	state->router = &iocom->router;
661 	kprintf("received msg %08x msgid %jx source=%jx target=%jx\n",
662 		msg->any.head.cmd,
663 		(intmax_t)msg->any.head.msgid,
664 		(intmax_t)msg->any.head.source,
665 		(intmax_t)msg->any.head.target);
666 	if (msg->any.head.cmd & DMSGF_REPLY)
667 		state = RB_FIND(kdmsg_state_tree, &iocom->statewr_tree, state);
668 	else
669 		state = RB_FIND(kdmsg_state_tree, &iocom->staterd_tree, state);
670 	msg->state = state;
671 
672 	/*
673 	 * Short-cut one-off or mid-stream messages (state may be NULL).
674 	 */
675 	if ((msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE |
676 				  DMSGF_ABORT)) == 0) {
677 		lockmgr(&iocom->msglk, LK_RELEASE);
678 		return(0);
679 	}
680 
681 	/*
682 	 * Switch on CREATE, DELETE, REPLY, and also handle ABORT from
683 	 * inside the case statements.
684 	 */
685 	switch(msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE | DMSGF_REPLY)) {
686 	case DMSGF_CREATE:
687 	case DMSGF_CREATE | DMSGF_DELETE:
688 		/*
689 		 * New persistant command received.
690 		 */
691 		if (state) {
692 			kprintf("kdmsg_state_msgrx: duplicate transaction\n");
693 			error = EINVAL;
694 			break;
695 		}
696 		state = iocom->freerd_state;
697 		iocom->freerd_state = NULL;
698 		msg->state = state;
699 		state->router = msg->router;
700 		state->msg = msg;
701 		state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE;
702 		state->txcmd = DMSGF_REPLY;
703 		RB_INSERT(kdmsg_state_tree, &iocom->staterd_tree, state);
704 		state->flags |= KDMSG_STATE_INSERTED;
705 		error = 0;
706 		break;
707 	case DMSGF_DELETE:
708 		/*
709 		 * Persistent state is expected but might not exist if an
710 		 * ABORT+DELETE races the close.
711 		 */
712 		if (state == NULL) {
713 			if (msg->any.head.cmd & DMSGF_ABORT) {
714 				error = EALREADY;
715 			} else {
716 				kprintf("kdmsg_state_msgrx: no state "
717 					"for DELETE\n");
718 				error = EINVAL;
719 			}
720 			break;
721 		}
722 
723 		/*
724 		 * Handle another ABORT+DELETE case if the msgid has already
725 		 * been reused.
726 		 */
727 		if ((state->rxcmd & DMSGF_CREATE) == 0) {
728 			if (msg->any.head.cmd & DMSGF_ABORT) {
729 				error = EALREADY;
730 			} else {
731 				kprintf("kdmsg_state_msgrx: state reused "
732 					"for DELETE\n");
733 				error = EINVAL;
734 			}
735 			break;
736 		}
737 		error = 0;
738 		break;
739 	default:
740 		/*
741 		 * Check for mid-stream ABORT command received, otherwise
742 		 * allow.
743 		 */
744 		if (msg->any.head.cmd & DMSGF_ABORT) {
745 			if (state == NULL ||
746 			    (state->rxcmd & DMSGF_CREATE) == 0) {
747 				error = EALREADY;
748 				break;
749 			}
750 		}
751 		error = 0;
752 		break;
753 	case DMSGF_REPLY | DMSGF_CREATE:
754 	case DMSGF_REPLY | DMSGF_CREATE | DMSGF_DELETE:
755 		/*
756 		 * When receiving a reply with CREATE set the original
757 		 * persistent state message should already exist.
758 		 */
759 		if (state == NULL) {
760 			kprintf("kdmsg_state_msgrx: no state match for "
761 				"REPLY cmd=%08x msgid=%016jx\n",
762 				msg->any.head.cmd,
763 				(intmax_t)msg->any.head.msgid);
764 			error = EINVAL;
765 			break;
766 		}
767 		state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE;
768 		error = 0;
769 		break;
770 	case DMSGF_REPLY | DMSGF_DELETE:
771 		/*
772 		 * Received REPLY+ABORT+DELETE in case where msgid has
773 		 * already been fully closed, ignore the message.
774 		 */
775 		if (state == NULL) {
776 			if (msg->any.head.cmd & DMSGF_ABORT) {
777 				error = EALREADY;
778 			} else {
779 				kprintf("kdmsg_state_msgrx: no state match "
780 					"for REPLY|DELETE\n");
781 				error = EINVAL;
782 			}
783 			break;
784 		}
785 
786 		/*
787 		 * Received REPLY+ABORT+DELETE in case where msgid has
788 		 * already been reused for an unrelated message,
789 		 * ignore the message.
790 		 */
791 		if ((state->rxcmd & DMSGF_CREATE) == 0) {
792 			if (msg->any.head.cmd & DMSGF_ABORT) {
793 				error = EALREADY;
794 			} else {
795 				kprintf("kdmsg_state_msgrx: state reused "
796 					"for REPLY|DELETE\n");
797 				error = EINVAL;
798 			}
799 			break;
800 		}
801 		error = 0;
802 		break;
803 	case DMSGF_REPLY:
804 		/*
805 		 * Check for mid-stream ABORT reply received to sent command.
806 		 */
807 		if (msg->any.head.cmd & DMSGF_ABORT) {
808 			if (state == NULL ||
809 			    (state->rxcmd & DMSGF_CREATE) == 0) {
810 				error = EALREADY;
811 				break;
812 			}
813 		}
814 		error = 0;
815 		break;
816 	}
817 	lockmgr(&iocom->msglk, LK_RELEASE);
818 	return (error);
819 }
820 
821 void
822 kdmsg_state_cleanuprx(kdmsg_msg_t *msg)
823 {
824 	kdmsg_iocom_t *iocom;
825 	kdmsg_state_t *state;
826 
827 	iocom = msg->router->iocom;
828 
829 	if ((state = msg->state) == NULL) {
830 		kdmsg_msg_free(msg);
831 	} else if (msg->any.head.cmd & DMSGF_DELETE) {
832 		lockmgr(&iocom->msglk, LK_EXCLUSIVE);
833 		state->rxcmd |= DMSGF_DELETE;
834 		if (state->txcmd & DMSGF_DELETE) {
835 			if (state->msg == msg)
836 				state->msg = NULL;
837 			KKASSERT(state->flags & KDMSG_STATE_INSERTED);
838 			if (state->rxcmd & DMSGF_REPLY) {
839 				KKASSERT(msg->any.head.cmd &
840 					 DMSGF_REPLY);
841 				RB_REMOVE(kdmsg_state_tree,
842 					  &iocom->statewr_tree, state);
843 			} else {
844 				KKASSERT((msg->any.head.cmd &
845 					  DMSGF_REPLY) == 0);
846 				RB_REMOVE(kdmsg_state_tree,
847 					  &iocom->staterd_tree, state);
848 			}
849 			state->flags &= ~KDMSG_STATE_INSERTED;
850 			lockmgr(&iocom->msglk, LK_RELEASE);
851 			kdmsg_state_free(state);
852 		} else {
853 			lockmgr(&iocom->msglk, LK_RELEASE);
854 		}
855 		kdmsg_msg_free(msg);
856 	} else if (state->msg != msg) {
857 		kdmsg_msg_free(msg);
858 	}
859 }
860 
861 /*
862  * Process state tracking for a message prior to transmission.
863  *
864  * Called with msglk held and the msg dequeued.
865  *
866  * One-off messages are usually with dummy state and msg->state may be NULL
867  * in this situation.
868  *
869  * New transactions (when CREATE is set) will insert the state.
870  *
871  * May request that caller discard the message by setting *discardp to 1.
872  * A NULL state may be returned in this case.
873  */
874 int
875 kdmsg_state_msgtx(kdmsg_msg_t *msg)
876 {
877 	kdmsg_iocom_t *iocom;
878 	kdmsg_state_t *state;
879 	int error;
880 
881 	iocom = msg->router->iocom;
882 
883 	/*
884 	 * Make sure a state structure is ready to go in case we need a new
885 	 * one.  This is the only routine which uses freewr_state so no
886 	 * races are possible.
887 	 */
888 	if ((state = iocom->freewr_state) == NULL) {
889 		state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO);
890 		state->flags = KDMSG_STATE_DYNAMIC;
891 		state->router = &iocom->router;
892 		iocom->freewr_state = state;
893 	}
894 
895 	/*
896 	 * Lock RB tree.  If persistent state is present it will have already
897 	 * been assigned to msg.
898 	 */
899 	lockmgr(&iocom->msglk, LK_EXCLUSIVE);
900 	state = msg->state;
901 
902 	/*
903 	 * Short-cut one-off or mid-stream messages (state may be NULL).
904 	 */
905 	if ((msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE |
906 				  DMSGF_ABORT)) == 0) {
907 		lockmgr(&iocom->msglk, LK_RELEASE);
908 		return(0);
909 	}
910 
911 
912 	/*
913 	 * Switch on CREATE, DELETE, REPLY, and also handle ABORT from
914 	 * inside the case statements.
915 	 */
916 	switch(msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE |
917 				    DMSGF_REPLY)) {
918 	case DMSGF_CREATE:
919 	case DMSGF_CREATE | DMSGF_DELETE:
920 		/*
921 		 * Insert the new persistent message state and mark
922 		 * half-closed if DELETE is set.  Since this is a new
923 		 * message it isn't possible to transition into the fully
924 		 * closed state here.
925 		 *
926 		 * XXX state must be assigned and inserted by
927 		 *     kdmsg_msg_write().  txcmd is assigned by us
928 		 *     on-transmit.
929 		 */
930 		KKASSERT(state != NULL);
931 		state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE;
932 		state->rxcmd = DMSGF_REPLY;
933 		error = 0;
934 		break;
935 	case DMSGF_DELETE:
936 		/*
937 		 * Sent ABORT+DELETE in case where msgid has already
938 		 * been fully closed, ignore the message.
939 		 */
940 		if (state == NULL) {
941 			if (msg->any.head.cmd & DMSGF_ABORT) {
942 				error = EALREADY;
943 			} else {
944 				kprintf("kdmsg_state_msgtx: no state match "
945 					"for DELETE cmd=%08x msgid=%016jx\n",
946 					msg->any.head.cmd,
947 					(intmax_t)msg->any.head.msgid);
948 				error = EINVAL;
949 			}
950 			break;
951 		}
952 
953 		/*
954 		 * Sent ABORT+DELETE in case where msgid has
955 		 * already been reused for an unrelated message,
956 		 * ignore the message.
957 		 */
958 		if ((state->txcmd & DMSGF_CREATE) == 0) {
959 			if (msg->any.head.cmd & DMSGF_ABORT) {
960 				error = EALREADY;
961 			} else {
962 				kprintf("kdmsg_state_msgtx: state reused "
963 					"for DELETE\n");
964 				error = EINVAL;
965 			}
966 			break;
967 		}
968 		error = 0;
969 		break;
970 	default:
971 		/*
972 		 * Check for mid-stream ABORT command sent
973 		 */
974 		if (msg->any.head.cmd & DMSGF_ABORT) {
975 			if (state == NULL ||
976 			    (state->txcmd & DMSGF_CREATE) == 0) {
977 				error = EALREADY;
978 				break;
979 			}
980 		}
981 		error = 0;
982 		break;
983 	case DMSGF_REPLY | DMSGF_CREATE:
984 	case DMSGF_REPLY | DMSGF_CREATE | DMSGF_DELETE:
985 		/*
986 		 * When transmitting a reply with CREATE set the original
987 		 * persistent state message should already exist.
988 		 */
989 		if (state == NULL) {
990 			kprintf("kdmsg_state_msgtx: no state match "
991 				"for REPLY | CREATE\n");
992 			error = EINVAL;
993 			break;
994 		}
995 		state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE;
996 		error = 0;
997 		break;
998 	case DMSGF_REPLY | DMSGF_DELETE:
999 		/*
1000 		 * When transmitting a reply with DELETE set the original
1001 		 * persistent state message should already exist.
1002 		 *
1003 		 * This is very similar to the REPLY|CREATE|* case except
1004 		 * txcmd is already stored, so we just add the DELETE flag.
1005 		 *
1006 		 * Sent REPLY+ABORT+DELETE in case where msgid has
1007 		 * already been fully closed, ignore the message.
1008 		 */
1009 		if (state == NULL) {
1010 			if (msg->any.head.cmd & DMSGF_ABORT) {
1011 				error = EALREADY;
1012 			} else {
1013 				kprintf("kdmsg_state_msgtx: no state match "
1014 					"for REPLY | DELETE\n");
1015 				error = EINVAL;
1016 			}
1017 			break;
1018 		}
1019 
1020 		/*
1021 		 * Sent REPLY+ABORT+DELETE in case where msgid has already
1022 		 * been reused for an unrelated message, ignore the message.
1023 		 */
1024 		if ((state->txcmd & DMSGF_CREATE) == 0) {
1025 			if (msg->any.head.cmd & DMSGF_ABORT) {
1026 				error = EALREADY;
1027 			} else {
1028 				kprintf("kdmsg_state_msgtx: state reused "
1029 					"for REPLY | DELETE\n");
1030 				error = EINVAL;
1031 			}
1032 			break;
1033 		}
1034 		error = 0;
1035 		break;
1036 	case DMSGF_REPLY:
1037 		/*
1038 		 * Check for mid-stream ABORT reply sent.
1039 		 *
1040 		 * One-off REPLY messages are allowed for e.g. status updates.
1041 		 */
1042 		if (msg->any.head.cmd & DMSGF_ABORT) {
1043 			if (state == NULL ||
1044 			    (state->txcmd & DMSGF_CREATE) == 0) {
1045 				error = EALREADY;
1046 				break;
1047 			}
1048 		}
1049 		error = 0;
1050 		break;
1051 	}
1052 	lockmgr(&iocom->msglk, LK_RELEASE);
1053 	return (error);
1054 }
1055 
1056 void
1057 kdmsg_state_cleanuptx(kdmsg_msg_t *msg)
1058 {
1059 	kdmsg_iocom_t *iocom;
1060 	kdmsg_state_t *state;
1061 
1062 	iocom = msg->router->iocom;
1063 
1064 	if ((state = msg->state) == NULL) {
1065 		kdmsg_msg_free(msg);
1066 	} else if (msg->any.head.cmd & DMSGF_DELETE) {
1067 		lockmgr(&iocom->msglk, LK_EXCLUSIVE);
1068 		state->txcmd |= DMSGF_DELETE;
1069 		if (state->rxcmd & DMSGF_DELETE) {
1070 			if (state->msg == msg)
1071 				state->msg = NULL;
1072 			KKASSERT(state->flags & KDMSG_STATE_INSERTED);
1073 			if (state->txcmd & DMSGF_REPLY) {
1074 				KKASSERT(msg->any.head.cmd &
1075 					 DMSGF_REPLY);
1076 				RB_REMOVE(kdmsg_state_tree,
1077 					  &iocom->staterd_tree, state);
1078 			} else {
1079 				KKASSERT((msg->any.head.cmd &
1080 					  DMSGF_REPLY) == 0);
1081 				RB_REMOVE(kdmsg_state_tree,
1082 					  &iocom->statewr_tree, state);
1083 			}
1084 			state->flags &= ~KDMSG_STATE_INSERTED;
1085 			lockmgr(&iocom->msglk, LK_RELEASE);
1086 			kdmsg_state_free(state);
1087 		} else {
1088 			lockmgr(&iocom->msglk, LK_RELEASE);
1089 		}
1090 		kdmsg_msg_free(msg);
1091 	} else if (state->msg != msg) {
1092 		kdmsg_msg_free(msg);
1093 	}
1094 }
1095 
1096 void
1097 kdmsg_state_free(kdmsg_state_t *state)
1098 {
1099 	kdmsg_iocom_t *iocom;
1100 	kdmsg_msg_t *msg;
1101 
1102 	iocom = state->router->iocom;
1103 
1104 	KKASSERT((state->flags & KDMSG_STATE_INSERTED) == 0);
1105 	msg = state->msg;
1106 	state->msg = NULL;
1107 	kfree(state, iocom->mmsg);
1108 	if (msg)
1109 		kdmsg_msg_free(msg);
1110 }
1111 
1112 kdmsg_msg_t *
1113 kdmsg_msg_alloc(kdmsg_router_t *router, uint32_t cmd,
1114 		int (*func)(kdmsg_state_t *, kdmsg_msg_t *), void *data)
1115 {
1116 	kdmsg_iocom_t *iocom;
1117 	kdmsg_msg_t *msg;
1118 	kdmsg_state_t *state;
1119 	size_t hbytes;
1120 
1121 	iocom = router->iocom;
1122 	hbytes = (cmd & DMSGF_SIZE) * DMSG_ALIGN;
1123 	msg = kmalloc(offsetof(struct kdmsg_msg, any) + hbytes,
1124 		      iocom->mmsg, M_WAITOK | M_ZERO);
1125 	msg->hdr_size = hbytes;
1126 	msg->router = router;
1127 	KKASSERT(router != NULL);
1128 	msg->any.head.magic = DMSG_HDR_MAGIC;
1129 	msg->any.head.source = 0;
1130 	msg->any.head.target = router->target;
1131 	msg->any.head.cmd = cmd;
1132 
1133 	if (cmd & DMSGF_CREATE) {
1134 		/*
1135 		 * New transaction, requires tracking state and a unique
1136 		 * msgid to be allocated.
1137 		 */
1138 		KKASSERT(msg->state == NULL);
1139 		state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO);
1140 		state->flags = KDMSG_STATE_DYNAMIC;
1141 		state->func = func;
1142 		state->any.any = data;
1143 		state->msg = msg;
1144 		state->msgid = (uint64_t)(uintptr_t)state;
1145 		state->router = msg->router;
1146 		msg->state = state;
1147 		msg->any.head.source = 0;
1148 		msg->any.head.target = state->router->target;
1149 		msg->any.head.msgid = state->msgid;
1150 
1151 		lockmgr(&iocom->msglk, LK_EXCLUSIVE);
1152 		if (RB_INSERT(kdmsg_state_tree, &iocom->statewr_tree, state))
1153 			panic("duplicate msgid allocated");
1154 		state->flags |= KDMSG_STATE_INSERTED;
1155 		msg->any.head.msgid = state->msgid;
1156 		lockmgr(&iocom->msglk, LK_RELEASE);
1157 	}
1158 
1159 	return (msg);
1160 }
1161 
1162 void
1163 kdmsg_msg_free(kdmsg_msg_t *msg)
1164 {
1165 	kdmsg_iocom_t *iocom;
1166 
1167 	iocom = msg->router->iocom;
1168 
1169 	if (msg->aux_data && msg->aux_size) {
1170 		kfree(msg->aux_data, iocom->mmsg);
1171 		msg->aux_data = NULL;
1172 		msg->aux_size = 0;
1173 		msg->router = NULL;
1174 	}
1175 	kfree(msg, iocom->mmsg);
1176 }
1177 
1178 /*
1179  * Indexed messages are stored in a red-black tree indexed by their
1180  * msgid.  Only persistent messages are indexed.
1181  */
1182 int
1183 kdmsg_state_cmp(kdmsg_state_t *state1, kdmsg_state_t *state2)
1184 {
1185 	if (state1->router < state2->router)
1186 		return(-1);
1187 	if (state1->router > state2->router)
1188 		return(1);
1189 	if (state1->msgid < state2->msgid)
1190 		return(-1);
1191 	if (state1->msgid > state2->msgid)
1192 		return(1);
1193 	return(0);
1194 }
1195 
1196 /*
1197  * Write a message.  All requisit command flags have been set.
1198  *
1199  * If msg->state is non-NULL the message is written to the existing
1200  * transaction.  msgid will be set accordingly.
1201  *
1202  * If msg->state is NULL and CREATE is set new state is allocated and
1203  * (func, data) is installed.  A msgid is assigned.
1204  *
1205  * If msg->state is NULL and CREATE is not set the message is assumed
1206  * to be a one-way message.  The originator must assign the msgid
1207  * (or leave it 0, which is typical.
1208  *
1209  * This function merely queues the message to the management thread, it
1210  * does not write to the message socket/pipe.
1211  */
1212 void
1213 kdmsg_msg_write(kdmsg_msg_t *msg)
1214 {
1215 	kdmsg_iocom_t *iocom;
1216 	kdmsg_state_t *state;
1217 
1218 	iocom = msg->router->iocom;
1219 
1220 	if (msg->state) {
1221 		/*
1222 		 * Continuance or termination of existing transaction.
1223 		 * The transaction could have been initiated by either end.
1224 		 *
1225 		 * (Function callback and aux data for the receive side can
1226 		 * be replaced or left alone).
1227 		 */
1228 		state = msg->state;
1229 		msg->any.head.msgid = state->msgid;
1230 		msg->any.head.source = 0;
1231 		msg->any.head.target = state->router->target;
1232 		lockmgr(&iocom->msglk, LK_EXCLUSIVE);
1233 	} else {
1234 		/*
1235 		 * One-off message (always uses msgid 0 to distinguish
1236 		 * between a possibly lost in-transaction message due to
1237 		 * competing aborts and a real one-off message?)
1238 		 */
1239 		msg->any.head.msgid = 0;
1240 		msg->any.head.source = 0;
1241 		msg->any.head.target = msg->router->target;
1242 		lockmgr(&iocom->msglk, LK_EXCLUSIVE);
1243 	}
1244 
1245 	/*
1246 	 * Finish up the msg fields
1247 	 */
1248 	msg->any.head.salt = /* (random << 8) | */ (iocom->msg_seq & 255);
1249 	++iocom->msg_seq;
1250 
1251 	msg->any.head.hdr_crc = 0;
1252 	msg->any.head.hdr_crc = iscsi_crc32(msg->any.buf, msg->hdr_size);
1253 
1254 	TAILQ_INSERT_TAIL(&iocom->msgq, msg, qentry);
1255 
1256 	if (iocom->msg_ctl & KDMSG_CLUSTERCTL_SLEEPING) {
1257 		atomic_clear_int(&iocom->msg_ctl,
1258 				 KDMSG_CLUSTERCTL_SLEEPING);
1259 		wakeup(&iocom->msg_ctl);
1260 	}
1261 
1262 	lockmgr(&iocom->msglk, LK_RELEASE);
1263 }
1264 
1265 /*
1266  * Reply to a message and terminate our side of the transaction.
1267  *
1268  * If msg->state is non-NULL we are replying to a one-way message.
1269  */
1270 void
1271 kdmsg_msg_reply(kdmsg_msg_t *msg, uint32_t error)
1272 {
1273 	kdmsg_state_t *state = msg->state;
1274 	kdmsg_msg_t *nmsg;
1275 	uint32_t cmd;
1276 
1277 	/*
1278 	 * Reply with a simple error code and terminate the transaction.
1279 	 */
1280 	cmd = DMSG_LNK_ERROR;
1281 
1282 	/*
1283 	 * Check if our direction has even been initiated yet, set CREATE.
1284 	 *
1285 	 * Check what direction this is (command or reply direction).  Note
1286 	 * that txcmd might not have been initiated yet.
1287 	 *
1288 	 * If our direction has already been closed we just return without
1289 	 * doing anything.
1290 	 */
1291 	if (state) {
1292 		if (state->txcmd & DMSGF_DELETE)
1293 			return;
1294 		if ((state->txcmd & DMSGF_CREATE) == 0)
1295 			cmd |= DMSGF_CREATE;
1296 		if (state->txcmd & DMSGF_REPLY)
1297 			cmd |= DMSGF_REPLY;
1298 		cmd |= DMSGF_DELETE;
1299 	} else {
1300 		if ((msg->any.head.cmd & DMSGF_REPLY) == 0)
1301 			cmd |= DMSGF_REPLY;
1302 	}
1303 	kprintf("MSG_REPLY state=%p msg %08x\n", state, cmd);
1304 
1305 	/* XXX messy mask cmd to avoid allocating state */
1306 	nmsg = kdmsg_msg_alloc(msg->router, cmd & DMSGF_BASECMDMASK,
1307 			       NULL, NULL);
1308 	nmsg->any.head.cmd = cmd;
1309 	nmsg->any.head.error = error;
1310 	nmsg->state = state;
1311 	kdmsg_msg_write(nmsg);
1312 }
1313 
1314 /*
1315  * Reply to a message and continue our side of the transaction.
1316  *
1317  * If msg->state is non-NULL we are replying to a one-way message and this
1318  * function degenerates into the same as kdmsg_msg_reply().
1319  */
1320 void
1321 kdmsg_msg_result(kdmsg_msg_t *msg, uint32_t error)
1322 {
1323 	kdmsg_state_t *state = msg->state;
1324 	kdmsg_msg_t *nmsg;
1325 	uint32_t cmd;
1326 
1327 	/*
1328 	 * Return a simple result code, do NOT terminate the transaction.
1329 	 */
1330 	cmd = DMSG_LNK_ERROR;
1331 
1332 	/*
1333 	 * Check if our direction has even been initiated yet, set CREATE.
1334 	 *
1335 	 * Check what direction this is (command or reply direction).  Note
1336 	 * that txcmd might not have been initiated yet.
1337 	 *
1338 	 * If our direction has already been closed we just return without
1339 	 * doing anything.
1340 	 */
1341 	if (state) {
1342 		if (state->txcmd & DMSGF_DELETE)
1343 			return;
1344 		if ((state->txcmd & DMSGF_CREATE) == 0)
1345 			cmd |= DMSGF_CREATE;
1346 		if (state->txcmd & DMSGF_REPLY)
1347 			cmd |= DMSGF_REPLY;
1348 		/* continuing transaction, do not set MSGF_DELETE */
1349 	} else {
1350 		if ((msg->any.head.cmd & DMSGF_REPLY) == 0)
1351 			cmd |= DMSGF_REPLY;
1352 	}
1353 
1354 	/* XXX messy mask cmd to avoid allocating state */
1355 	nmsg = kdmsg_msg_alloc(msg->router, cmd & DMSGF_BASECMDMASK,
1356 			       NULL, NULL);
1357 	nmsg->any.head.cmd = cmd;
1358 	nmsg->any.head.error = error;
1359 	nmsg->state = state;
1360 	kdmsg_msg_write(nmsg);
1361 }
1362