1 /*- 2 * Copyright (c) 2012 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * TODO: txcmd CREATE state is deferred by tx msgq, need to calculate 36 * a streaming response. See subr_diskiocom()'s diskiodone(). 37 */ 38 #include <sys/param.h> 39 #include <sys/types.h> 40 #include <sys/kernel.h> 41 #include <sys/conf.h> 42 #include <sys/systm.h> 43 #include <sys/queue.h> 44 #include <sys/tree.h> 45 #include <sys/malloc.h> 46 #include <sys/mount.h> 47 #include <sys/socket.h> 48 #include <sys/vnode.h> 49 #include <sys/sysctl.h> 50 #include <sys/file.h> 51 #include <sys/proc.h> 52 #include <sys/priv.h> 53 #include <sys/thread.h> 54 #include <sys/globaldata.h> 55 #include <sys/limits.h> 56 57 #include <sys/dmsg.h> 58 59 RB_GENERATE(kdmsg_state_tree, kdmsg_state, rbnode, kdmsg_state_cmp); 60 61 SYSCTL_NODE(, OID_AUTO, kdmsg, CTLFLAG_RW, 0, "kdmsg"); 62 static int kdmsg_debug = 1; 63 SYSCTL_INT(_kdmsg, OID_AUTO, debug, CTLFLAG_RW, &kdmsg_debug, 0, 64 "Set debug level for kernel dmsg layer"); 65 66 #define kd_printf(level, ctl, ...) \ 67 if (kdmsg_debug >= (level)) kprintf("kdmsg: " ctl, __VA_ARGS__) 68 69 #define kdio_printf(iocom, level, ctl, ...) \ 70 if (kdmsg_debug >= (level)) kprintf("kdmsg: " ctl, __VA_ARGS__) 71 72 static int kdmsg_msg_receive_handling(kdmsg_msg_t *msg); 73 static int kdmsg_state_msgrx(kdmsg_msg_t *msg); 74 static int kdmsg_state_msgtx(kdmsg_msg_t *msg); 75 static void kdmsg_msg_write_locked(kdmsg_iocom_t *iocom, kdmsg_msg_t *msg); 76 static void kdmsg_state_cleanuprx(kdmsg_msg_t *msg); 77 static void kdmsg_state_cleanuptx(kdmsg_msg_t *msg); 78 static void kdmsg_subq_delete(kdmsg_state_t *state); 79 static void kdmsg_simulate_failure(kdmsg_state_t *state, int meto, int error); 80 static void kdmsg_state_abort(kdmsg_state_t *state); 81 static void kdmsg_state_dying(kdmsg_state_t *state); 82 static void kdmsg_state_free(kdmsg_state_t *state); 83 static void kdmsg_drain_msg(kdmsg_msg_t *msg); 84 85 #ifdef KDMSG_DEBUG 86 #define KDMSG_DEBUG_ARGS , const char *file, int line 87 #define kdmsg_state_hold(state) _kdmsg_state_hold(state, __FILE__, __LINE__) 88 #define kdmsg_state_drop(state) _kdmsg_state_drop(state, __FILE__, __LINE__) 89 #else 90 #define KDMSG_DEBUG 0 91 #define KDMSG_DEBUG_ARGS 92 #define kdmsg_state_hold(state) _kdmsg_state_hold(state) 93 #define kdmsg_state_drop(state) _kdmsg_state_drop(state) 94 #endif 95 static void _kdmsg_state_hold(kdmsg_state_t *state KDMSG_DEBUG_ARGS); 96 static void _kdmsg_state_drop(kdmsg_state_t *state KDMSG_DEBUG_ARGS); 97 98 static void kdmsg_iocom_thread_rd(void *arg); 99 static void kdmsg_iocom_thread_wr(void *arg); 100 static int kdmsg_autorxmsg(kdmsg_msg_t *msg); 101 102 /*static struct lwkt_token kdmsg_token = LWKT_TOKEN_INITIALIZER(kdmsg_token);*/ 103 104 /* 105 * Initialize the roll-up communications structure for a network 106 * messaging session. This function does not install the socket. 107 */ 108 void 109 kdmsg_iocom_init(kdmsg_iocom_t *iocom, void *handle, uint32_t flags, 110 struct malloc_type *mmsg, 111 int (*rcvmsg)(kdmsg_msg_t *msg)) 112 { 113 bzero(iocom, sizeof(*iocom)); 114 iocom->handle = handle; 115 iocom->mmsg = mmsg; 116 iocom->rcvmsg = rcvmsg; 117 iocom->flags = flags; 118 lockinit(&iocom->msglk, "h2msg", 0, 0); 119 TAILQ_INIT(&iocom->msgq); 120 RB_INIT(&iocom->staterd_tree); 121 RB_INIT(&iocom->statewr_tree); 122 123 iocom->state0.iocom = iocom; 124 iocom->state0.parent = &iocom->state0; 125 TAILQ_INIT(&iocom->state0.subq); 126 } 127 128 /* 129 * [Re]connect using the passed file pointer. The caller must ref the 130 * fp for us. We own that ref now. 131 */ 132 void 133 kdmsg_iocom_reconnect(kdmsg_iocom_t *iocom, struct file *fp, 134 const char *subsysname) 135 { 136 /* 137 * Destroy the current connection 138 */ 139 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 140 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLRX); 141 while (iocom->msgrd_td || iocom->msgwr_td) { 142 wakeup(&iocom->msg_ctl); 143 lksleep(iocom, &iocom->msglk, 0, "clstrkl", hz); 144 } 145 146 /* 147 * Drop communications descriptor 148 */ 149 if (iocom->msg_fp) { 150 fdrop(iocom->msg_fp); 151 iocom->msg_fp = NULL; 152 } 153 154 /* 155 * Setup new communications descriptor 156 */ 157 iocom->msg_ctl = 0; 158 iocom->msg_fp = fp; 159 iocom->msg_seq = 0; 160 iocom->flags &= ~KDMSG_IOCOMF_EXITNOACC; 161 162 lwkt_create(kdmsg_iocom_thread_rd, iocom, &iocom->msgrd_td, 163 NULL, 0, -1, "%s-msgrd", subsysname); 164 lwkt_create(kdmsg_iocom_thread_wr, iocom, &iocom->msgwr_td, 165 NULL, 0, -1, "%s-msgwr", subsysname); 166 lockmgr(&iocom->msglk, LK_RELEASE); 167 } 168 169 /* 170 * Caller sets up iocom->auto_lnk_conn and iocom->auto_lnk_span, then calls 171 * this function to handle the state machine for LNK_CONN and LNK_SPAN. 172 */ 173 static int kdmsg_lnk_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg); 174 static int kdmsg_lnk_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg); 175 176 void 177 kdmsg_iocom_autoinitiate(kdmsg_iocom_t *iocom, 178 void (*auto_callback)(kdmsg_msg_t *msg)) 179 { 180 kdmsg_msg_t *msg; 181 182 iocom->auto_callback = auto_callback; 183 184 msg = kdmsg_msg_alloc(&iocom->state0, 185 DMSG_LNK_CONN | DMSGF_CREATE, 186 kdmsg_lnk_conn_reply, NULL); 187 iocom->auto_lnk_conn.head = msg->any.head; 188 msg->any.lnk_conn = iocom->auto_lnk_conn; 189 iocom->conn_state = msg->state; 190 kdmsg_state_hold(msg->state); /* iocom->conn_state */ 191 kdmsg_msg_write(msg); 192 } 193 194 static 195 int 196 kdmsg_lnk_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg) 197 { 198 kdmsg_iocom_t *iocom = state->iocom; 199 kdmsg_msg_t *rmsg; 200 201 /* 202 * Upon receipt of the LNK_CONN acknowledgement initiate an 203 * automatic SPAN if we were asked to. Used by e.g. xdisk, but 204 * not used by HAMMER2 which must manage more than one transmitted 205 * SPAN. 206 */ 207 if ((msg->any.head.cmd & DMSGF_CREATE) && 208 (iocom->flags & KDMSG_IOCOMF_AUTOTXSPAN)) { 209 rmsg = kdmsg_msg_alloc(&iocom->state0, 210 DMSG_LNK_SPAN | DMSGF_CREATE, 211 kdmsg_lnk_span_reply, NULL); 212 iocom->auto_lnk_span.head = rmsg->any.head; 213 rmsg->any.lnk_span = iocom->auto_lnk_span; 214 kdmsg_msg_write(rmsg); 215 } 216 217 /* 218 * Process shim after the CONN is acknowledged and before the CONN 219 * transaction is deleted. For deletions this gives device drivers 220 * the ability to interlock new operations on the circuit before 221 * it becomes illegal and panics. 222 */ 223 if (iocom->auto_callback) 224 iocom->auto_callback(msg); 225 226 if ((state->txcmd & DMSGF_DELETE) == 0 && 227 (msg->any.head.cmd & DMSGF_DELETE)) { 228 /* 229 * iocom->conn_state has a state ref, drop it when clearing. 230 */ 231 if (iocom->conn_state) 232 kdmsg_state_drop(iocom->conn_state); 233 iocom->conn_state = NULL; 234 kdmsg_msg_reply(msg, 0); 235 } 236 237 return (0); 238 } 239 240 static 241 int 242 kdmsg_lnk_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg) 243 { 244 /* 245 * Be sure to process shim before terminating the SPAN 246 * transaction. Gives device drivers the ability to 247 * interlock new operations on the circuit before it 248 * becomes illegal and panics. 249 */ 250 if (state->iocom->auto_callback) 251 state->iocom->auto_callback(msg); 252 253 if ((state->txcmd & DMSGF_DELETE) == 0 && 254 (msg->any.head.cmd & DMSGF_DELETE)) { 255 kdmsg_msg_reply(msg, 0); 256 } 257 return (0); 258 } 259 260 /* 261 * Disconnect and clean up 262 */ 263 void 264 kdmsg_iocom_uninit(kdmsg_iocom_t *iocom) 265 { 266 kdmsg_state_t *state; 267 kdmsg_msg_t *msg; 268 int retries; 269 270 /* 271 * Ask the cluster controller to go away by setting 272 * KILLRX. Send a PING to get a response to unstick reading 273 * from the pipe. 274 * 275 * After 10 seconds shitcan the pipe and do an unclean shutdown. 276 */ 277 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 278 279 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLRX); 280 msg = kdmsg_msg_alloc(&iocom->state0, DMSG_LNK_PING, NULL, NULL); 281 kdmsg_msg_write_locked(iocom, msg); 282 283 retries = 10; 284 while (iocom->msgrd_td || iocom->msgwr_td) { 285 wakeup(&iocom->msg_ctl); 286 lksleep(iocom, &iocom->msglk, 0, "clstrkl", hz); 287 if (--retries == 0 && iocom->msg_fp) { 288 kdio_printf(iocom, 0, "%s\n", 289 "iocom_uninit: " 290 "shitcanning unresponsive pipe"); 291 fp_shutdown(iocom->msg_fp, SHUT_RDWR); 292 /* retries allowed to go negative, keep looping */ 293 } 294 } 295 296 /* 297 * Cleanup caches 298 */ 299 if ((state = iocom->freerd_state) != NULL) { 300 iocom->freerd_state = NULL; 301 kdmsg_state_drop(state); 302 } 303 304 if ((state = iocom->freewr_state) != NULL) { 305 iocom->freewr_state = NULL; 306 kdmsg_state_drop(state); 307 } 308 309 /* 310 * Drop communications descriptor 311 */ 312 if (iocom->msg_fp) { 313 fdrop(iocom->msg_fp); 314 iocom->msg_fp = NULL; 315 } 316 lockmgr(&iocom->msglk, LK_RELEASE); 317 } 318 319 /* 320 * Cluster controller thread. Perform messaging functions. We have one 321 * thread for the reader and one for the writer. The writer handles 322 * shutdown requests (which should break the reader thread). 323 */ 324 static 325 void 326 kdmsg_iocom_thread_rd(void *arg) 327 { 328 kdmsg_iocom_t *iocom = arg; 329 dmsg_hdr_t hdr; 330 kdmsg_msg_t *msg = NULL; 331 size_t hbytes; 332 size_t abytes; 333 int error = 0; 334 335 while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILLRX) == 0) { 336 /* 337 * Retrieve the message from the pipe or socket. 338 */ 339 error = fp_read(iocom->msg_fp, &hdr, sizeof(hdr), 340 NULL, 1, UIO_SYSSPACE); 341 if (error) 342 break; 343 if (hdr.magic != DMSG_HDR_MAGIC) { 344 kdio_printf(iocom, 1, "bad magic: %04x\n", hdr.magic); 345 error = EINVAL; 346 break; 347 } 348 hbytes = (hdr.cmd & DMSGF_SIZE) * DMSG_ALIGN; 349 if (hbytes < sizeof(hdr) || hbytes > DMSG_HDR_MAX) { 350 kdio_printf(iocom, 1, "bad header size %zd\n", hbytes); 351 error = EINVAL; 352 break; 353 } 354 355 /* XXX messy: mask cmd to avoid allocating state */ 356 msg = kdmsg_msg_alloc(&iocom->state0, 357 hdr.cmd & DMSGF_BASECMDMASK, 358 NULL, NULL); 359 msg->any.head = hdr; 360 msg->hdr_size = hbytes; 361 if (hbytes > sizeof(hdr)) { 362 error = fp_read(iocom->msg_fp, &msg->any.head + 1, 363 hbytes - sizeof(hdr), 364 NULL, 1, UIO_SYSSPACE); 365 if (error) { 366 kdio_printf(iocom, 1, "%s\n", 367 "short msg received"); 368 error = EINVAL; 369 break; 370 } 371 } 372 msg->aux_size = hdr.aux_bytes; 373 if (msg->aux_size > DMSG_AUX_MAX) { 374 kdio_printf(iocom, 1, 375 "illegal msg payload size %zd\n", 376 msg->aux_size); 377 error = EINVAL; 378 break; 379 } 380 if (msg->aux_size) { 381 abytes = DMSG_DOALIGN(msg->aux_size); 382 msg->aux_data = kmalloc(abytes, iocom->mmsg, M_WAITOK); 383 msg->flags |= KDMSG_FLAG_AUXALLOC; 384 error = fp_read(iocom->msg_fp, msg->aux_data, 385 abytes, NULL, 1, UIO_SYSSPACE); 386 if (error) { 387 kdio_printf(iocom, 1, "%s\n", 388 "short msg payload received"); 389 break; 390 } 391 } 392 393 error = kdmsg_msg_receive_handling(msg); 394 msg = NULL; 395 } 396 397 #if 0 398 kdio_printf(iocom, 1, "read thread terminating error=%d\n", error); 399 #endif 400 401 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 402 if (msg) 403 kdmsg_msg_free(msg); 404 405 /* 406 * Shutdown the socket and set KILLRX for consistency in case the 407 * shutdown was not commanded. Signal the transmit side to shutdown 408 * by setting KILLTX and waking it up. 409 */ 410 fp_shutdown(iocom->msg_fp, SHUT_RDWR); 411 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLRX | 412 KDMSG_CLUSTERCTL_KILLTX); 413 iocom->msgrd_td = NULL; 414 lockmgr(&iocom->msglk, LK_RELEASE); 415 wakeup(&iocom->msg_ctl); 416 417 /* 418 * iocom can be ripped out at any time once the lock is 419 * released with msgrd_td set to NULL. The wakeup()s are safe but 420 * that is all. 421 */ 422 wakeup(iocom); 423 lwkt_exit(); 424 } 425 426 static 427 void 428 kdmsg_iocom_thread_wr(void *arg) 429 { 430 kdmsg_iocom_t *iocom = arg; 431 kdmsg_msg_t *msg; 432 ssize_t res; 433 size_t abytes; 434 int error = 0; 435 int save_ticks; 436 int didwarn; 437 438 /* 439 * Transmit loop 440 */ 441 msg = NULL; 442 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 443 444 while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILLTX) == 0 && error == 0) { 445 /* 446 * Sleep if no messages pending. Interlock with flag while 447 * holding msglk. 448 */ 449 if (TAILQ_EMPTY(&iocom->msgq)) { 450 atomic_set_int(&iocom->msg_ctl, 451 KDMSG_CLUSTERCTL_SLEEPING); 452 lksleep(&iocom->msg_ctl, &iocom->msglk, 0, "msgwr", hz); 453 atomic_clear_int(&iocom->msg_ctl, 454 KDMSG_CLUSTERCTL_SLEEPING); 455 } 456 457 while ((msg = TAILQ_FIRST(&iocom->msgq)) != NULL) { 458 /* 459 * Remove msg from the transmit queue and do 460 * persist and half-closed state handling. 461 */ 462 TAILQ_REMOVE(&iocom->msgq, msg, qentry); 463 464 error = kdmsg_state_msgtx(msg); 465 if (error == EALREADY) { 466 error = 0; 467 kdmsg_msg_free(msg); 468 continue; 469 } 470 if (error) { 471 kdmsg_msg_free(msg); 472 break; 473 } 474 475 /* 476 * Dump the message to the pipe or socket. 477 * 478 * We have to clean up the message as if the transmit 479 * succeeded even if it failed. 480 */ 481 lockmgr(&iocom->msglk, LK_RELEASE); 482 error = fp_write(iocom->msg_fp, &msg->any, 483 msg->hdr_size, &res, UIO_SYSSPACE); 484 if (error || res != msg->hdr_size) { 485 if (error == 0) 486 error = EINVAL; 487 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 488 kdmsg_state_cleanuptx(msg); 489 break; 490 } 491 if (msg->aux_size) { 492 abytes = DMSG_DOALIGN(msg->aux_size); 493 error = fp_write(iocom->msg_fp, 494 msg->aux_data, abytes, 495 &res, UIO_SYSSPACE); 496 if (error || res != abytes) { 497 if (error == 0) 498 error = EINVAL; 499 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 500 kdmsg_state_cleanuptx(msg); 501 break; 502 } 503 } 504 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 505 kdmsg_state_cleanuptx(msg); 506 } 507 } 508 509 #if 0 510 kdio_printf(iocom, 1, "write thread terminating error=%d\n", error); 511 #endif 512 513 /* 514 * Shutdown the socket and set KILLTX for consistency in case the 515 * shutdown was not commanded. Signal the receive side to shutdown 516 * by setting KILLRX and waking it up. 517 */ 518 fp_shutdown(iocom->msg_fp, SHUT_RDWR); 519 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLRX | 520 KDMSG_CLUSTERCTL_KILLTX); 521 wakeup(&iocom->msg_ctl); 522 523 /* 524 * The transmit thread is responsible for final cleanups, wait 525 * for the receive side to terminate to prevent new received 526 * states from interfering with our cleanup. 527 * 528 * Do not set msgwr_td to NULL until we actually exit. 529 */ 530 while (iocom->msgrd_td) { 531 wakeup(&iocom->msg_ctl); 532 lksleep(iocom, &iocom->msglk, 0, "clstrkt", hz); 533 } 534 535 /* 536 * We can no longer receive new messages. We must drain the transmit 537 * message queue and simulate received messages to close anay remaining 538 * states. 539 * 540 * Loop until all the states are gone and there are no messages 541 * pending transmit. 542 */ 543 save_ticks = ticks; 544 didwarn = 0; 545 iocom->flags |= KDMSG_IOCOMF_EXITNOACC; 546 547 while (TAILQ_FIRST(&iocom->msgq) || 548 RB_ROOT(&iocom->staterd_tree) || 549 RB_ROOT(&iocom->statewr_tree) || 550 iocom->conn_state) { 551 /* 552 * Simulate failure for all sub-states of state0. 553 */ 554 kdmsg_drain_msgq(iocom); 555 kdmsg_simulate_failure(&iocom->state0, 0, DMSG_ERR_LOSTLINK); 556 557 lksleep(iocom, &iocom->msglk, 0, "clstrtk", hz / 2); 558 559 if ((int)(ticks - save_ticks) > hz*2 && didwarn == 0) { 560 didwarn = 1; 561 kdio_printf(iocom, 0, 562 "Warning, write thread on %p " 563 "still terminating\n", 564 iocom); 565 } 566 if ((int)(ticks - save_ticks) > hz*15 && didwarn == 1) { 567 didwarn = 2; 568 kdio_printf(iocom, 0, 569 "Warning, write thread on %p " 570 "still terminating\n", 571 iocom); 572 } 573 if ((int)(ticks - save_ticks) > hz*60) { 574 kdio_printf(iocom, 0, 575 "Can't terminate: msgq %p " 576 "rd_tree %p wr_tree %p\n", 577 TAILQ_FIRST(&iocom->msgq), 578 RB_ROOT(&iocom->staterd_tree), 579 RB_ROOT(&iocom->statewr_tree)); 580 lksleep(iocom, &iocom->msglk, 0, "clstrtk", hz * 10); 581 } 582 } 583 584 /* 585 * Exit handling is done by the write thread. 586 */ 587 lockmgr(&iocom->msglk, LK_RELEASE); 588 589 /* 590 * The state trees had better be empty now 591 */ 592 KKASSERT(RB_EMPTY(&iocom->staterd_tree)); 593 KKASSERT(RB_EMPTY(&iocom->statewr_tree)); 594 KKASSERT(iocom->conn_state == NULL); 595 596 if (iocom->exit_func) { 597 /* 598 * iocom is invalid after we call the exit function. 599 */ 600 iocom->msgwr_td = NULL; 601 iocom->exit_func(iocom); 602 } else { 603 /* 604 * iocom can be ripped out from under us once msgwr_td is 605 * set to NULL. The wakeup is safe. 606 */ 607 iocom->msgwr_td = NULL; 608 wakeup(iocom); 609 } 610 lwkt_exit(); 611 } 612 613 /* 614 * This cleans out the pending transmit message queue, adjusting any 615 * persistent states properly in the process. 616 * 617 * Called with iocom locked. 618 */ 619 void 620 kdmsg_drain_msgq(kdmsg_iocom_t *iocom) 621 { 622 kdmsg_msg_t *msg; 623 624 /* 625 * Clean out our pending transmit queue, executing the 626 * appropriate state adjustments as if the messages were 627 * sent. 628 */ 629 while ((msg = TAILQ_FIRST(&iocom->msgq)) != NULL) { 630 TAILQ_REMOVE(&iocom->msgq, msg, qentry); 631 kdmsg_drain_msg(msg); 632 } 633 } 634 635 /* 636 * Drain one message by simulating transmission and also simulating a 637 * receive failure. 638 */ 639 static void 640 kdmsg_drain_msg(kdmsg_msg_t *msg) 641 { 642 if (kdmsg_state_msgtx(msg)) { 643 kdmsg_msg_free(msg); 644 } else { 645 if (msg->state) { 646 kdmsg_simulate_failure(msg->state, 647 0, DMSG_ERR_LOSTLINK); 648 } 649 kdmsg_state_cleanuptx(msg); 650 } 651 } 652 653 /* 654 * Do all processing required to handle a freshly received message 655 * after its low level header has been validated. 656 * 657 * iocom is not locked. 658 */ 659 static 660 int 661 kdmsg_msg_receive_handling(kdmsg_msg_t *msg) 662 { 663 kdmsg_iocom_t *iocom = msg->state->iocom; 664 int error; 665 666 /* 667 * State machine tracking, state assignment for msg, 668 * returns error and discard status. Errors are fatal 669 * to the connection except for EALREADY which forces 670 * a discard without execution. 671 */ 672 error = kdmsg_state_msgrx(msg); 673 if (msg->state->flags & KDMSG_STATE_ABORTING) { 674 kdio_printf(iocom, 5, 675 "kdmsg_state_abort(b): state %p rxcmd=%08x " 676 "txcmd=%08x msgrx error %d\n", 677 msg->state, msg->state->rxcmd, 678 msg->state->txcmd, error); 679 } 680 if (error) { 681 /* 682 * Raw protocol or connection error 683 */ 684 if (msg->state->flags & KDMSG_STATE_ABORTING) 685 kdio_printf(iocom, 5, 686 "X1 state %p error %d\n", 687 msg->state, error); 688 kdmsg_msg_free(msg); 689 if (error == EALREADY) 690 error = 0; 691 } else if (msg->state && msg->state->func) { 692 /* 693 * Message related to state which already has a 694 * handling function installed for it. 695 */ 696 if (msg->state->flags & KDMSG_STATE_ABORTING) 697 kdio_printf(iocom, 5, 698 "X2 state %p func %p\n", 699 msg->state, msg->state->func); 700 error = msg->state->func(msg->state, msg); 701 kdmsg_state_cleanuprx(msg); 702 } else if (iocom->flags & KDMSG_IOCOMF_AUTOANY) { 703 if (msg->state->flags & KDMSG_STATE_ABORTING) 704 kdio_printf(iocom, 5, 705 "X3 state %p\n", msg->state); 706 error = kdmsg_autorxmsg(msg); 707 kdmsg_state_cleanuprx(msg); 708 } else { 709 if (msg->state->flags & KDMSG_STATE_ABORTING) 710 kdio_printf(iocom, 5, 711 "X4 state %p\n", msg->state); 712 error = iocom->rcvmsg(msg); 713 kdmsg_state_cleanuprx(msg); 714 } 715 return error; 716 } 717 718 /* 719 * Process state tracking for a message after reception and dequeueing, 720 * prior to execution of the state callback. The state is updated and 721 * will be removed from the RBTREE if completely closed, but the state->parent 722 * and subq linkage is not cleaned up until after the callback (see 723 * cleanuprx()). 724 * 725 * msglk is not held. 726 * 727 * NOTE: A message transaction can consist of several messages in either 728 * direction. 729 * 730 * NOTE: The msgid is unique to the initiator, not necessarily unique for 731 * us or for any relay or for the return direction for that matter. 732 * That is, two sides sending a new message can use the same msgid 733 * without colliding. 734 * 735 * -- 736 * 737 * ABORT sequences work by setting the ABORT flag along with normal message 738 * state. However, ABORTs can also be sent on half-closed messages, that is 739 * even if the command or reply side has already sent a DELETE, as long as 740 * the message has not been fully closed it can still send an ABORT+DELETE 741 * to terminate the half-closed message state. 742 * 743 * Since ABORT+DELETEs can race we silently discard ABORT's for message 744 * state which has already been fully closed. REPLY+ABORT+DELETEs can 745 * also race, and in this situation the other side might have already 746 * initiated a new unrelated command with the same message id. Since 747 * the abort has not set the CREATE flag the situation can be detected 748 * and the message will also be discarded. 749 * 750 * Non-blocking requests can be initiated with ABORT+CREATE[+DELETE]. 751 * The ABORT request is essentially integrated into the command instead 752 * of being sent later on. In this situation the command implementation 753 * detects that CREATE and ABORT are both set (vs ABORT alone) and can 754 * special-case non-blocking operation for the command. 755 * 756 * NOTE! Messages with ABORT set without CREATE or DELETE are considered 757 * to be mid-stream aborts for command/reply sequences. ABORTs on 758 * one-way messages are not supported. 759 * 760 * NOTE! If a command sequence does not support aborts the ABORT flag is 761 * simply ignored. 762 * 763 * -- 764 * 765 * One-off messages (no reply expected) are sent with neither CREATE or DELETE 766 * set. One-off messages cannot be aborted and typically aren't processed 767 * by these routines. The REPLY bit can be used to distinguish whether a 768 * one-off message is a command or reply. For example, one-off replies 769 * will typically just contain status updates. 770 */ 771 static 772 int 773 kdmsg_state_msgrx(kdmsg_msg_t *msg) 774 { 775 kdmsg_iocom_t *iocom = msg->state->iocom; 776 kdmsg_state_t *state; 777 kdmsg_state_t *pstate; 778 kdmsg_state_t sdummy; 779 int error; 780 781 bzero(&sdummy, sizeof(sdummy)); /* avoid gcc warnings */ 782 783 /* 784 * Make sure a state structure is ready to go in case we need a new 785 * one. This is the only routine which uses freerd_state so no 786 * races are possible. 787 */ 788 if ((state = iocom->freerd_state) == NULL) { 789 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO); 790 state->flags = KDMSG_STATE_DYNAMIC; 791 state->iocom = iocom; 792 state->refs = 1; 793 TAILQ_INIT(&state->subq); 794 iocom->freerd_state = state; 795 } 796 state = NULL; /* safety */ 797 798 /* 799 * Lock RB tree and locate existing persistent state, if any. 800 * 801 * If received msg is a command state is on staterd_tree. 802 * If received msg is a reply state is on statewr_tree. 803 */ 804 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 805 806 again: 807 if (msg->state == &iocom->state0) { 808 sdummy.msgid = msg->any.head.msgid; 809 sdummy.iocom = iocom; 810 if (msg->any.head.cmd & DMSGF_REVTRANS) { 811 state = RB_FIND(kdmsg_state_tree, &iocom->statewr_tree, 812 &sdummy); 813 } else { 814 state = RB_FIND(kdmsg_state_tree, &iocom->staterd_tree, 815 &sdummy); 816 } 817 818 /* 819 * Set message state unconditionally. If this is a CREATE 820 * message this state will become the parent state and new 821 * state will be allocated for the message state. 822 */ 823 if (state == NULL) 824 state = &iocom->state0; 825 if (state->flags & KDMSG_STATE_INTERLOCK) { 826 state->flags |= KDMSG_STATE_SIGNAL; 827 lksleep(state, &iocom->msglk, 0, "dmrace", hz); 828 goto again; 829 } 830 kdmsg_state_hold(state); 831 kdmsg_state_drop(msg->state); /* iocom->state0 */ 832 msg->state = state; 833 } else { 834 state = msg->state; 835 } 836 837 /* 838 * Short-cut one-off or mid-stream messages. 839 */ 840 if ((msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE | 841 DMSGF_ABORT)) == 0) { 842 error = 0; 843 goto done; 844 } 845 846 /* 847 * Switch on CREATE, DELETE, REPLY, and also handle ABORT from 848 * inside the case statements. 849 */ 850 switch(msg->any.head.cmd & (DMSGF_CREATE|DMSGF_DELETE|DMSGF_REPLY)) { 851 case DMSGF_CREATE: 852 case DMSGF_CREATE | DMSGF_DELETE: 853 /* 854 * New persistant command received. 855 */ 856 if (state != &iocom->state0) { 857 kdio_printf(iocom, 1, "%s\n", 858 "duplicate transaction"); 859 error = EINVAL; 860 break; 861 } 862 863 /* 864 * Lookup the circuit. The circuit is an open transaction. 865 * the REVCIRC bit in the message tells us which side 866 * initiated the transaction representing the circuit. 867 */ 868 if (msg->any.head.circuit) { 869 sdummy.msgid = msg->any.head.circuit; 870 871 if (msg->any.head.cmd & DMSGF_REVCIRC) { 872 pstate = RB_FIND(kdmsg_state_tree, 873 &iocom->statewr_tree, 874 &sdummy); 875 } else { 876 pstate = RB_FIND(kdmsg_state_tree, 877 &iocom->staterd_tree, 878 &sdummy); 879 } 880 if (pstate == NULL) { 881 kdio_printf(iocom, 1, "%s\n", 882 "missing parent in " 883 "stacked trans"); 884 error = EINVAL; 885 break; 886 } 887 } else { 888 pstate = &iocom->state0; 889 } 890 891 /* 892 * Allocate new state. 893 * 894 * msg->state becomes the owner of the ref we inherit from 895 * freerd_stae. 896 */ 897 kdmsg_state_drop(state); 898 state = iocom->freerd_state; 899 iocom->freerd_state = NULL; 900 901 msg->state = state; /* inherits freerd ref */ 902 state->parent = pstate; 903 KKASSERT(state->iocom == iocom); 904 state->flags |= KDMSG_STATE_RBINSERTED | 905 KDMSG_STATE_SUBINSERTED | 906 KDMSG_STATE_OPPOSITE; 907 if (TAILQ_EMPTY(&pstate->subq)) 908 kdmsg_state_hold(pstate);/* states on pstate->subq */ 909 kdmsg_state_hold(state); /* state on pstate->subq */ 910 kdmsg_state_hold(state); /* state on rbtree */ 911 state->icmd = msg->any.head.cmd & DMSGF_BASECMDMASK; 912 state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE; 913 state->txcmd = DMSGF_REPLY; 914 state->msgid = msg->any.head.msgid; 915 state->flags &= ~KDMSG_STATE_NEW; 916 RB_INSERT(kdmsg_state_tree, &iocom->staterd_tree, state); 917 TAILQ_INSERT_TAIL(&pstate->subq, state, entry); 918 error = 0; 919 break; 920 case DMSGF_DELETE: 921 /* 922 * Persistent state is expected but might not exist if an 923 * ABORT+DELETE races the close. 924 */ 925 if (state == &iocom->state0) { 926 if (msg->any.head.cmd & DMSGF_ABORT) { 927 kdio_printf(iocom, 1, "%s\n", 928 "msgrx: " 929 "state already A"); 930 error = EALREADY; 931 } else { 932 kdio_printf(iocom, 1, "%s\n", 933 "msgrx: no state for DELETE"); 934 error = EINVAL; 935 } 936 break; 937 } 938 939 /* 940 * Handle another ABORT+DELETE case if the msgid has already 941 * been reused. 942 */ 943 if ((state->rxcmd & DMSGF_CREATE) == 0) { 944 if (msg->any.head.cmd & DMSGF_ABORT) { 945 kdio_printf(iocom, 1, "%s\n", 946 "msgrx: state already B"); 947 error = EALREADY; 948 } else { 949 kdio_printf(iocom, 1, "%s\n", 950 "msgrx: state reused for DELETE"); 951 error = EINVAL; 952 } 953 break; 954 } 955 error = 0; 956 break; 957 default: 958 /* 959 * Check for mid-stream ABORT command received, otherwise 960 * allow. 961 */ 962 if (msg->any.head.cmd & DMSGF_ABORT) { 963 if (state == &iocom->state0 || 964 (state->rxcmd & DMSGF_CREATE) == 0) { 965 error = EALREADY; 966 break; 967 } 968 } 969 error = 0; 970 break; 971 case DMSGF_REPLY | DMSGF_CREATE: 972 case DMSGF_REPLY | DMSGF_CREATE | DMSGF_DELETE: 973 /* 974 * When receiving a reply with CREATE set the original 975 * persistent state message should already exist. 976 */ 977 if (state == &iocom->state0) { 978 kdio_printf(iocom, 1, 979 "msgrx: no state match for " 980 "REPLY cmd=%08x msgid=%016jx\n", 981 msg->any.head.cmd, 982 (intmax_t)msg->any.head.msgid); 983 error = EINVAL; 984 break; 985 } 986 state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE; 987 error = 0; 988 break; 989 case DMSGF_REPLY | DMSGF_DELETE: 990 /* 991 * Received REPLY+ABORT+DELETE in case where msgid has 992 * already been fully closed, ignore the message. 993 */ 994 if (state == &iocom->state0) { 995 if (msg->any.head.cmd & DMSGF_ABORT) { 996 error = EALREADY; 997 } else { 998 kdio_printf(iocom, 1, "%s\n", 999 "msgrx: no state match " 1000 "for REPLY|DELETE"); 1001 error = EINVAL; 1002 } 1003 break; 1004 } 1005 1006 /* 1007 * Received REPLY+ABORT+DELETE in case where msgid has 1008 * already been reused for an unrelated message, 1009 * ignore the message. 1010 */ 1011 if ((state->rxcmd & DMSGF_CREATE) == 0) { 1012 if (msg->any.head.cmd & DMSGF_ABORT) { 1013 error = EALREADY; 1014 } else { 1015 kdio_printf(iocom, 1, "%s\n", 1016 "msgrx: state reused " 1017 "for REPLY|DELETE"); 1018 error = EINVAL; 1019 } 1020 break; 1021 } 1022 error = 0; 1023 break; 1024 case DMSGF_REPLY: 1025 /* 1026 * Check for mid-stream ABORT reply received to sent command. 1027 */ 1028 if (msg->any.head.cmd & DMSGF_ABORT) { 1029 if (state == &iocom->state0 || 1030 (state->rxcmd & DMSGF_CREATE) == 0) { 1031 error = EALREADY; 1032 break; 1033 } 1034 } 1035 error = 0; 1036 break; 1037 } 1038 1039 /* 1040 * Calculate the easy-switch() transactional command. Represents 1041 * the outer-transaction command for any transaction-create or 1042 * transaction-delete, and the inner message command for any 1043 * non-transaction or inside-transaction command. tcmd will be 1044 * set to 0 if the message state is illegal. 1045 * 1046 * The two can be told apart because outer-transaction commands 1047 * always have a DMSGF_CREATE and/or DMSGF_DELETE flag. 1048 */ 1049 done: 1050 if (msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE)) { 1051 if (state != &iocom->state0) { 1052 msg->tcmd = (msg->state->icmd & DMSGF_BASECMDMASK) | 1053 (msg->any.head.cmd & (DMSGF_CREATE | 1054 DMSGF_DELETE | 1055 DMSGF_REPLY)); 1056 } else { 1057 msg->tcmd = 0; 1058 } 1059 } else { 1060 msg->tcmd = msg->any.head.cmd & DMSGF_CMDSWMASK; 1061 } 1062 1063 /* 1064 * Adjust the state for DELETE handling now, before making the 1065 * callback so we are atomic with other state updates. 1066 * 1067 * Subq/parent linkages are cleaned up after the callback. 1068 * If an error occurred the message is ignored and state is not 1069 * updated. 1070 */ 1071 if ((state = msg->state) == NULL || error != 0) { 1072 kdio_printf(iocom, 1, 1073 "msgrx: state=%p error %d\n", 1074 state, error); 1075 } else if (msg->any.head.cmd & DMSGF_DELETE) { 1076 KKASSERT((state->rxcmd & DMSGF_DELETE) == 0); 1077 state->rxcmd |= DMSGF_DELETE; 1078 if (state->txcmd & DMSGF_DELETE) { 1079 KKASSERT(state->flags & KDMSG_STATE_RBINSERTED); 1080 if (state->rxcmd & DMSGF_REPLY) { 1081 KKASSERT(msg->any.head.cmd & 1082 DMSGF_REPLY); 1083 RB_REMOVE(kdmsg_state_tree, 1084 &iocom->statewr_tree, state); 1085 } else { 1086 KKASSERT((msg->any.head.cmd & 1087 DMSGF_REPLY) == 0); 1088 RB_REMOVE(kdmsg_state_tree, 1089 &iocom->staterd_tree, state); 1090 } 1091 state->flags &= ~KDMSG_STATE_RBINSERTED; 1092 kdmsg_state_drop(state); /* state on rbtree */ 1093 } 1094 } 1095 lockmgr(&iocom->msglk, LK_RELEASE); 1096 1097 return (error); 1098 } 1099 1100 /* 1101 * Called instead of iocom->rcvmsg() if any of the AUTO flags are set. 1102 * This routine must call iocom->rcvmsg() for anything not automatically 1103 * handled. 1104 */ 1105 static int 1106 kdmsg_autorxmsg(kdmsg_msg_t *msg) 1107 { 1108 kdmsg_iocom_t *iocom = msg->state->iocom; 1109 kdmsg_msg_t *rep; 1110 int error = 0; 1111 uint32_t cmd; 1112 1113 /* 1114 * Main switch processes transaction create/delete sequences only. 1115 * Use icmd (DELETEs use DMSG_LNK_ERROR 1116 * 1117 * NOTE: If processing in-transaction messages you generally want 1118 * an inner switch on msg->any.head.cmd. 1119 */ 1120 if (msg->state) { 1121 cmd = (msg->state->icmd & DMSGF_BASECMDMASK) | 1122 (msg->any.head.cmd & (DMSGF_CREATE | 1123 DMSGF_DELETE | 1124 DMSGF_REPLY)); 1125 } else { 1126 cmd = 0; 1127 } 1128 1129 switch(cmd) { 1130 case DMSG_LNK_PING: 1131 /* 1132 * Received ping, send reply 1133 */ 1134 rep = kdmsg_msg_alloc(msg->state, DMSG_LNK_PING | DMSGF_REPLY, 1135 NULL, NULL); 1136 kdmsg_msg_write(rep); 1137 break; 1138 case DMSG_LNK_PING | DMSGF_REPLY: 1139 /* ignore replies */ 1140 break; 1141 case DMSG_LNK_CONN | DMSGF_CREATE: 1142 case DMSG_LNK_CONN | DMSGF_CREATE | DMSGF_DELETE: 1143 /* 1144 * Received LNK_CONN transaction. Transmit response and 1145 * leave transaction open, which allows the other end to 1146 * start to the SPAN protocol. 1147 * 1148 * Handle shim after acknowledging the CONN. 1149 */ 1150 if ((msg->any.head.cmd & DMSGF_DELETE) == 0) { 1151 if (iocom->flags & KDMSG_IOCOMF_AUTOCONN) { 1152 kdmsg_msg_result(msg, 0); 1153 if (iocom->auto_callback) 1154 iocom->auto_callback(msg); 1155 } else { 1156 error = iocom->rcvmsg(msg); 1157 } 1158 break; 1159 } 1160 /* fall through */ 1161 case DMSG_LNK_CONN | DMSGF_DELETE: 1162 /* 1163 * This message is usually simulated after a link is lost 1164 * to clean up the transaction. 1165 */ 1166 if (iocom->flags & KDMSG_IOCOMF_AUTOCONN) { 1167 if (iocom->auto_callback) 1168 iocom->auto_callback(msg); 1169 kdmsg_msg_reply(msg, 0); 1170 } else { 1171 error = iocom->rcvmsg(msg); 1172 } 1173 break; 1174 case DMSG_LNK_SPAN | DMSGF_CREATE: 1175 case DMSG_LNK_SPAN | DMSGF_CREATE | DMSGF_DELETE: 1176 /* 1177 * Received LNK_SPAN transaction. We do not have to respond 1178 * (except on termination), but we must leave the transaction 1179 * open. 1180 * 1181 * Handle shim after acknowledging the SPAN. 1182 */ 1183 if (iocom->flags & KDMSG_IOCOMF_AUTORXSPAN) { 1184 if ((msg->any.head.cmd & DMSGF_DELETE) == 0) { 1185 if (iocom->auto_callback) 1186 iocom->auto_callback(msg); 1187 break; 1188 } 1189 /* fall through */ 1190 } else { 1191 error = iocom->rcvmsg(msg); 1192 break; 1193 } 1194 /* fall through */ 1195 case DMSG_LNK_SPAN | DMSGF_DELETE: 1196 /* 1197 * Process shims (auto_callback) before cleaning up the 1198 * circuit structure and closing the transactions. Device 1199 * driver should ensure that the circuit is not used after 1200 * the auto_callback() returns. 1201 * 1202 * Handle shim before closing the SPAN transaction. 1203 */ 1204 if (iocom->flags & KDMSG_IOCOMF_AUTORXSPAN) { 1205 if (iocom->auto_callback) 1206 iocom->auto_callback(msg); 1207 kdmsg_msg_reply(msg, 0); 1208 } else { 1209 error = iocom->rcvmsg(msg); 1210 } 1211 break; 1212 default: 1213 /* 1214 * Anything unhandled goes into rcvmsg. 1215 * 1216 * NOTE: Replies to link-level messages initiated by our side 1217 * are handled by the state callback, they are NOT 1218 * handled here. 1219 */ 1220 error = iocom->rcvmsg(msg); 1221 break; 1222 } 1223 return (error); 1224 } 1225 1226 /* 1227 * Post-receive-handling message and state cleanup. This routine is called 1228 * after the state function handling/callback to properly dispose of the 1229 * message and unlink the state's parent/subq linkage if the state is 1230 * completely closed. 1231 * 1232 * msglk is not held. 1233 */ 1234 static 1235 void 1236 kdmsg_state_cleanuprx(kdmsg_msg_t *msg) 1237 { 1238 kdmsg_state_t *state = msg->state; 1239 kdmsg_iocom_t *iocom = state->iocom; 1240 1241 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 1242 if (state != &iocom->state0) { 1243 /* 1244 * When terminating a transaction (in either direction), all 1245 * sub-states are aborted. 1246 */ 1247 if ((msg->any.head.cmd & DMSGF_DELETE) && 1248 TAILQ_FIRST(&msg->state->subq)) { 1249 kdio_printf(iocom, 2, 1250 "simulate failure for substates of " 1251 "state %p cmd %08x/%08x\n", 1252 msg->state, 1253 msg->state->rxcmd, 1254 msg->state->txcmd); 1255 kdmsg_simulate_failure(msg->state, 1256 0, DMSG_ERR_LOSTLINK); 1257 } 1258 1259 /* 1260 * Once the state is fully closed we can (try to) remove it 1261 * from the subq topology. 1262 */ 1263 if ((state->flags & KDMSG_STATE_SUBINSERTED) && 1264 (state->rxcmd & DMSGF_DELETE) && 1265 (state->txcmd & DMSGF_DELETE)) { 1266 /* 1267 * Remove parent linkage if state is completely closed. 1268 */ 1269 kdmsg_subq_delete(state); 1270 } 1271 } 1272 kdmsg_msg_free(msg); 1273 1274 lockmgr(&iocom->msglk, LK_RELEASE); 1275 } 1276 1277 /* 1278 * Remove state from its parent's subq. This can wind up recursively 1279 * dropping the parent upward. 1280 * 1281 * NOTE: Once we drop the parent, our pstate pointer may become invalid. 1282 */ 1283 static 1284 void 1285 kdmsg_subq_delete(kdmsg_state_t *state) 1286 { 1287 kdmsg_state_t *pstate; 1288 1289 if (state->flags & KDMSG_STATE_SUBINSERTED) { 1290 pstate = state->parent; 1291 KKASSERT(pstate); 1292 if (pstate->scan == state) 1293 pstate->scan = NULL; 1294 TAILQ_REMOVE(&pstate->subq, state, entry); 1295 state->flags &= ~KDMSG_STATE_SUBINSERTED; 1296 state->parent = NULL; 1297 if (TAILQ_EMPTY(&pstate->subq)) { 1298 kdmsg_state_drop(pstate);/* pstate->subq */ 1299 } 1300 pstate = NULL; /* safety */ 1301 kdmsg_state_drop(state); /* pstate->subq */ 1302 } else { 1303 KKASSERT(state->parent == NULL); 1304 } 1305 } 1306 1307 /* 1308 * Simulate receiving a message which terminates an active transaction 1309 * state. Our simulated received message must set DELETE and may also 1310 * have to set CREATE. It must also ensure that all fields are set such 1311 * that the receive handling code can find the state (kdmsg_state_msgrx()) 1312 * or an endless loop will ensue. 1313 * 1314 * This is used when the other end of the link is dead so the device driver 1315 * gets a completed transaction for all pending states. 1316 * 1317 * Called with iocom locked. 1318 */ 1319 static 1320 void 1321 kdmsg_simulate_failure(kdmsg_state_t *state, int meto, int error) 1322 { 1323 kdmsg_state_t *substate; 1324 1325 kdmsg_state_hold(state); /* aborting */ 1326 1327 /* 1328 * Abort parent state first. Parent will not actually disappear 1329 * until children are gone. Device drivers must handle the situation. 1330 * The advantage of this is that device drivers can flag the situation 1331 * as an interlock against new operations on dying states. And since 1332 * device operations are often asynchronous anyway, this sequence of 1333 * events works out better. 1334 */ 1335 if (meto) 1336 kdmsg_state_abort(state); 1337 1338 /* 1339 * Recurse through any children. 1340 */ 1341 again: 1342 TAILQ_FOREACH(substate, &state->subq, entry) { 1343 if (substate->flags & KDMSG_STATE_ABORTING) 1344 continue; 1345 state->scan = substate; 1346 kdmsg_simulate_failure(substate, 1, error); 1347 if (state->scan != substate) 1348 goto again; 1349 } 1350 kdmsg_state_drop(state); /* aborting */ 1351 } 1352 1353 static 1354 void 1355 kdmsg_state_abort(kdmsg_state_t *state) 1356 { 1357 kdmsg_msg_t *msg; 1358 1359 /* 1360 * Set ABORTING and DYING, return if already set. If the state was 1361 * just allocated we defer the abort operation until the related 1362 * message is processed. 1363 */ 1364 KKASSERT((state->flags & KDMSG_STATE_ABORTING) == 0); 1365 if (state->flags & KDMSG_STATE_ABORTING) 1366 return; 1367 state->flags |= KDMSG_STATE_ABORTING; 1368 kdmsg_state_dying(state); 1369 if (state->flags & KDMSG_STATE_NEW) { 1370 kdio_printf(iocom, 5, 1371 "kdmsg_state_abort(0): state %p rxcmd %08x " 1372 "txcmd %08x flags %08x - in NEW state\n", 1373 state, state->rxcmd, 1374 state->txcmd, state->flags); 1375 return; 1376 } 1377 1378 /* 1379 * NOTE: The DELETE flag might already be set due to an early 1380 * termination. 1381 * 1382 * NOTE: Args to kdmsg_msg_alloc() to avoid dynamic state allocation. 1383 * 1384 * NOTE: We are simulating a received message using our state 1385 * (vs a message generated by the other side using its state), 1386 * so we must invert DMSGF_REVTRANS and DMSGF_REVCIRC. 1387 */ 1388 kdio_printf(iocom, 5, 1389 "kdmsg_state_abort(1): state %p rxcmd %08x txcmd %08x\n", 1390 state, state->rxcmd, state->txcmd); 1391 if ((state->rxcmd & DMSGF_DELETE) == 0) { 1392 msg = kdmsg_msg_alloc(state, DMSG_LNK_ERROR, NULL, NULL); 1393 if ((state->rxcmd & DMSGF_CREATE) == 0) 1394 msg->any.head.cmd |= DMSGF_CREATE; 1395 msg->any.head.cmd |= DMSGF_DELETE | 1396 (state->rxcmd & DMSGF_REPLY); 1397 msg->any.head.cmd ^= (DMSGF_REVTRANS | DMSGF_REVCIRC); 1398 msg->any.head.error = DMSG_ERR_LOSTLINK; 1399 kdio_printf(iocom, 5, 1400 "kdmsg_state_abort(a): state %p msgcmd %08x\n", 1401 state, msg->any.head.cmd); 1402 /* circuit not initialized */ 1403 lockmgr(&state->iocom->msglk, LK_RELEASE); 1404 kdmsg_msg_receive_handling(msg); 1405 lockmgr(&state->iocom->msglk, LK_EXCLUSIVE); 1406 msg = NULL; 1407 } 1408 kdio_printf(iocom, 5, 1409 "kdmsg_state_abort(2): state %p rxcmd %08x txcmd %08x\n", 1410 state, state->rxcmd, state->txcmd); 1411 } 1412 1413 /* 1414 * Recursively sets KDMSG_STATE_DYING on state and all sub-states, preventing 1415 * the transmission of any new messages on these states. This is done 1416 * atomically when parent state is terminating, whereas setting ABORTING is 1417 * not atomic and can leak races. 1418 */ 1419 static 1420 void 1421 kdmsg_state_dying(kdmsg_state_t *state) 1422 { 1423 kdmsg_state_t *scan; 1424 1425 if ((state->flags & KDMSG_STATE_DYING) == 0) { 1426 state->flags |= KDMSG_STATE_DYING; 1427 TAILQ_FOREACH(scan, &state->subq, entry) 1428 kdmsg_state_dying(scan); 1429 } 1430 } 1431 1432 /* 1433 * Process state tracking for a message prior to transmission. 1434 * 1435 * Called with msglk held and the msg dequeued. Returns non-zero if 1436 * the message is bad and should be deleted by the caller. 1437 * 1438 * One-off messages are usually with dummy state and msg->state may be NULL 1439 * in this situation. 1440 * 1441 * New transactions (when CREATE is set) will insert the state. 1442 * 1443 * May request that caller discard the message by setting *discardp to 1. 1444 * A NULL state may be returned in this case. 1445 */ 1446 static 1447 int 1448 kdmsg_state_msgtx(kdmsg_msg_t *msg) 1449 { 1450 kdmsg_iocom_t *iocom = msg->state->iocom; 1451 kdmsg_state_t *state; 1452 int error; 1453 1454 /* 1455 * Make sure a state structure is ready to go in case we need a new 1456 * one. This is the only routine which uses freewr_state so no 1457 * races are possible. 1458 */ 1459 if ((state = iocom->freewr_state) == NULL) { 1460 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO); 1461 state->flags = KDMSG_STATE_DYNAMIC; 1462 state->iocom = iocom; 1463 state->refs = 1; 1464 TAILQ_INIT(&state->subq); 1465 iocom->freewr_state = state; 1466 } 1467 1468 /* 1469 * Lock RB tree. If persistent state is present it will have already 1470 * been assigned to msg. 1471 */ 1472 state = msg->state; 1473 1474 /* 1475 * Short-cut one-off or mid-stream messages (state may be NULL). 1476 */ 1477 if ((msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE | 1478 DMSGF_ABORT)) == 0) { 1479 return(0); 1480 } 1481 1482 1483 /* 1484 * Switch on CREATE, DELETE, REPLY, and also handle ABORT from 1485 * inside the case statements. 1486 */ 1487 switch(msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE | 1488 DMSGF_REPLY)) { 1489 case DMSGF_CREATE: 1490 case DMSGF_CREATE | DMSGF_DELETE: 1491 /* 1492 * Insert the new persistent message state and mark 1493 * half-closed if DELETE is set. Since this is a new 1494 * message it isn't possible to transition into the fully 1495 * closed state here. 1496 * 1497 * XXX state must be assigned and inserted by 1498 * kdmsg_msg_write(). txcmd is assigned by us 1499 * on-transmit. 1500 */ 1501 KKASSERT(state != NULL); 1502 state->icmd = msg->any.head.cmd & DMSGF_BASECMDMASK; 1503 state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE; 1504 state->rxcmd = DMSGF_REPLY; 1505 state->flags &= ~KDMSG_STATE_NEW; 1506 error = 0; 1507 break; 1508 case DMSGF_DELETE: 1509 /* 1510 * Sent ABORT+DELETE in case where msgid has already 1511 * been fully closed, ignore the message. 1512 */ 1513 if (state == &iocom->state0) { 1514 if (msg->any.head.cmd & DMSGF_ABORT) { 1515 error = EALREADY; 1516 } else { 1517 kdio_printf(iocom, 1, 1518 "msgtx: no state match " 1519 "for DELETE cmd=%08x msgid=%016jx\n", 1520 msg->any.head.cmd, 1521 (intmax_t)msg->any.head.msgid); 1522 error = EINVAL; 1523 } 1524 break; 1525 } 1526 1527 /* 1528 * Sent ABORT+DELETE in case where msgid has 1529 * already been reused for an unrelated message, 1530 * ignore the message. 1531 */ 1532 if ((state->txcmd & DMSGF_CREATE) == 0) { 1533 if (msg->any.head.cmd & DMSGF_ABORT) { 1534 error = EALREADY; 1535 } else { 1536 kdio_printf(iocom, 1, "%s\n", 1537 "msgtx: state reused " 1538 "for DELETE"); 1539 error = EINVAL; 1540 } 1541 break; 1542 } 1543 error = 0; 1544 break; 1545 default: 1546 /* 1547 * Check for mid-stream ABORT command sent 1548 */ 1549 if (msg->any.head.cmd & DMSGF_ABORT) { 1550 if (state == &state->iocom->state0 || 1551 (state->txcmd & DMSGF_CREATE) == 0) { 1552 error = EALREADY; 1553 break; 1554 } 1555 } 1556 error = 0; 1557 break; 1558 case DMSGF_REPLY | DMSGF_CREATE: 1559 case DMSGF_REPLY | DMSGF_CREATE | DMSGF_DELETE: 1560 /* 1561 * When transmitting a reply with CREATE set the original 1562 * persistent state message should already exist. 1563 */ 1564 if (state == &state->iocom->state0) { 1565 kdio_printf(iocom, 1, "%s\n", 1566 "msgtx: no state match " 1567 "for REPLY | CREATE"); 1568 error = EINVAL; 1569 break; 1570 } 1571 state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE; 1572 error = 0; 1573 break; 1574 case DMSGF_REPLY | DMSGF_DELETE: 1575 /* 1576 * When transmitting a reply with DELETE set the original 1577 * persistent state message should already exist. 1578 * 1579 * This is very similar to the REPLY|CREATE|* case except 1580 * txcmd is already stored, so we just add the DELETE flag. 1581 * 1582 * Sent REPLY+ABORT+DELETE in case where msgid has 1583 * already been fully closed, ignore the message. 1584 */ 1585 if (state == &state->iocom->state0) { 1586 if (msg->any.head.cmd & DMSGF_ABORT) { 1587 error = EALREADY; 1588 } else { 1589 kdio_printf(iocom, 1, "%s\n", 1590 "msgtx: no state match " 1591 "for REPLY | DELETE"); 1592 error = EINVAL; 1593 } 1594 break; 1595 } 1596 1597 /* 1598 * Sent REPLY+ABORT+DELETE in case where msgid has already 1599 * been reused for an unrelated message, ignore the message. 1600 */ 1601 if ((state->txcmd & DMSGF_CREATE) == 0) { 1602 if (msg->any.head.cmd & DMSGF_ABORT) { 1603 error = EALREADY; 1604 } else { 1605 kdio_printf(iocom, 1, "%s\n", 1606 "msgtx: state reused " 1607 "for REPLY | DELETE"); 1608 error = EINVAL; 1609 } 1610 break; 1611 } 1612 error = 0; 1613 break; 1614 case DMSGF_REPLY: 1615 /* 1616 * Check for mid-stream ABORT reply sent. 1617 * 1618 * One-off REPLY messages are allowed for e.g. status updates. 1619 */ 1620 if (msg->any.head.cmd & DMSGF_ABORT) { 1621 if (state == &state->iocom->state0 || 1622 (state->txcmd & DMSGF_CREATE) == 0) { 1623 error = EALREADY; 1624 break; 1625 } 1626 } 1627 error = 0; 1628 break; 1629 } 1630 1631 /* 1632 * Set interlock (XXX hack) in case the send side blocks and a 1633 * response is returned before kdmsg_state_cleanuptx() can be 1634 * run. 1635 */ 1636 if (state && error == 0) 1637 state->flags |= KDMSG_STATE_INTERLOCK; 1638 1639 return (error); 1640 } 1641 1642 /* 1643 * Called with iocom locked. 1644 */ 1645 static 1646 void 1647 kdmsg_state_cleanuptx(kdmsg_msg_t *msg) 1648 { 1649 kdmsg_iocom_t *iocom = msg->state->iocom; 1650 kdmsg_state_t *state; 1651 1652 if ((state = msg->state) == NULL) { 1653 kdmsg_msg_free(msg); 1654 return; 1655 } 1656 1657 /* 1658 * Clear interlock (XXX hack) in case the send side blocks and a 1659 * response is returned in the other thread before 1660 * kdmsg_state_cleanuptx() can be run. We maintain our hold on 1661 * iocom->msglk so we can do this before completing our task. 1662 */ 1663 if (state->flags & KDMSG_STATE_SIGNAL) { 1664 kdio_printf(iocom, 1, "state %p interlock!\n", state); 1665 wakeup(state); 1666 } 1667 state->flags &= ~(KDMSG_STATE_INTERLOCK | KDMSG_STATE_SIGNAL); 1668 kdmsg_state_hold(state); 1669 1670 if (msg->any.head.cmd & DMSGF_DELETE) { 1671 KKASSERT((state->txcmd & DMSGF_DELETE) == 0); 1672 state->txcmd |= DMSGF_DELETE; 1673 if (state->rxcmd & DMSGF_DELETE) { 1674 KKASSERT(state->flags & KDMSG_STATE_RBINSERTED); 1675 if (state->txcmd & DMSGF_REPLY) { 1676 KKASSERT(msg->any.head.cmd & 1677 DMSGF_REPLY); 1678 RB_REMOVE(kdmsg_state_tree, 1679 &iocom->staterd_tree, state); 1680 } else { 1681 KKASSERT((msg->any.head.cmd & 1682 DMSGF_REPLY) == 0); 1683 RB_REMOVE(kdmsg_state_tree, 1684 &iocom->statewr_tree, state); 1685 } 1686 state->flags &= ~KDMSG_STATE_RBINSERTED; 1687 1688 /* 1689 * The subq recursion is used for parent linking and 1690 * scanning the topology for aborts, we can only 1691 * remove leafs. The circuit is effectively dead now, 1692 * but topology won't be torn down until all of its 1693 * children have finished/aborted. 1694 * 1695 * This is particularly important for end-point 1696 * devices which might need to access private data 1697 * in parent states. Out of order disconnects can 1698 * occur if an end-point device is processing a 1699 * message transaction asynchronously because abort 1700 * requests are basically synchronous and it probably 1701 * isn't convenient (or possible) for the end-point 1702 * to abort an asynchronous operation. 1703 */ 1704 if (TAILQ_EMPTY(&state->subq)) 1705 kdmsg_subq_delete(state); 1706 kdmsg_msg_free(msg); 1707 kdmsg_state_drop(state); /* state on rbtree */ 1708 } else { 1709 kdmsg_msg_free(msg); 1710 } 1711 } else { 1712 kdmsg_msg_free(msg); 1713 } 1714 1715 /* 1716 * Deferred abort after transmission. 1717 */ 1718 if ((state->flags & (KDMSG_STATE_ABORTING | KDMSG_STATE_DYING)) && 1719 (state->rxcmd & DMSGF_DELETE) == 0) { 1720 kdio_printf(iocom, 5, 1721 "kdmsg_state_cleanuptx: state=%p " 1722 "executing deferred abort\n", 1723 state); 1724 state->flags &= ~KDMSG_STATE_ABORTING; 1725 kdmsg_state_abort(state); 1726 } 1727 kdmsg_state_drop(state); 1728 } 1729 1730 static 1731 void 1732 _kdmsg_state_hold(kdmsg_state_t *state KDMSG_DEBUG_ARGS) 1733 { 1734 atomic_add_int(&state->refs, 1); 1735 #if KDMSG_DEBUG 1736 kd_printf(4, "state %p +%d\t%s:%d\n", state, state->refs, file, line); 1737 #endif 1738 } 1739 1740 static 1741 void 1742 _kdmsg_state_drop(kdmsg_state_t *state KDMSG_DEBUG_ARGS) 1743 { 1744 KKASSERT(state->refs > 0); 1745 #if KDMSG_DEBUG 1746 kd_printf(4, "state %p -%d\t%s:%d\n", state, state->refs, file, line); 1747 #endif 1748 if (atomic_fetchadd_int(&state->refs, -1) == 1) 1749 kdmsg_state_free(state); 1750 } 1751 1752 static 1753 void 1754 kdmsg_state_free(kdmsg_state_t *state) 1755 { 1756 kdmsg_iocom_t *iocom = state->iocom; 1757 1758 KKASSERT((state->flags & KDMSG_STATE_RBINSERTED) == 0); 1759 KKASSERT((state->flags & KDMSG_STATE_SUBINSERTED) == 0); 1760 KKASSERT(TAILQ_EMPTY(&state->subq)); 1761 1762 if (state != &state->iocom->state0) 1763 kfree(state, iocom->mmsg); 1764 } 1765 1766 kdmsg_msg_t * 1767 kdmsg_msg_alloc(kdmsg_state_t *state, uint32_t cmd, 1768 int (*func)(kdmsg_state_t *, kdmsg_msg_t *), void *data) 1769 { 1770 kdmsg_iocom_t *iocom = state->iocom; 1771 kdmsg_state_t *pstate; 1772 kdmsg_msg_t *msg; 1773 size_t hbytes; 1774 1775 KKASSERT(iocom != NULL); 1776 hbytes = (cmd & DMSGF_SIZE) * DMSG_ALIGN; 1777 msg = kmalloc(offsetof(struct kdmsg_msg, any) + hbytes, 1778 iocom->mmsg, M_WAITOK | M_ZERO); 1779 msg->hdr_size = hbytes; 1780 1781 if ((cmd & (DMSGF_CREATE | DMSGF_REPLY)) == DMSGF_CREATE) { 1782 /* 1783 * New transaction, requires tracking state and a unique 1784 * msgid to be allocated. 1785 * 1786 * It is possible to race a circuit failure, inherit the 1787 * parent's STATE_DYING flag to trigger an abort sequence 1788 * in the transmit path. By not inheriting ABORTING the 1789 * abort sequence can recurse. 1790 * 1791 * NOTE: The transactions has not yet been initiated so we 1792 * cannot set DMSGF_CREATE/DELETE bits in txcmd or rxcmd. 1793 * We have to properly setup DMSGF_REPLY, however. 1794 */ 1795 pstate = state; 1796 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO); 1797 TAILQ_INIT(&state->subq); 1798 state->iocom = iocom; 1799 state->parent = pstate; 1800 state->flags = KDMSG_STATE_DYNAMIC | 1801 KDMSG_STATE_NEW; 1802 state->func = func; 1803 state->any.any = data; 1804 state->msgid = (uint64_t)(uintptr_t)state; 1805 /*msg->any.head.msgid = state->msgid;XXX*/ 1806 1807 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 1808 if (RB_INSERT(kdmsg_state_tree, &iocom->statewr_tree, state)) 1809 panic("duplicate msgid allocated"); 1810 if (TAILQ_EMPTY(&pstate->subq)) 1811 kdmsg_state_hold(pstate);/* pstate->subq */ 1812 TAILQ_INSERT_TAIL(&pstate->subq, state, entry); 1813 state->flags |= KDMSG_STATE_RBINSERTED | 1814 KDMSG_STATE_SUBINSERTED; 1815 state->flags |= pstate->flags & KDMSG_STATE_DYING; 1816 kdmsg_state_hold(state); /* pstate->subq */ 1817 kdmsg_state_hold(state); /* state on rbtree */ 1818 kdmsg_state_hold(state); /* msg->state */ 1819 lockmgr(&iocom->msglk, LK_RELEASE); 1820 } else { 1821 pstate = state->parent; 1822 KKASSERT(pstate != NULL); 1823 kdmsg_state_hold(state); /* msg->state */ 1824 } 1825 1826 if (state->flags & KDMSG_STATE_OPPOSITE) 1827 cmd |= DMSGF_REVTRANS; 1828 if (pstate->flags & KDMSG_STATE_OPPOSITE) 1829 cmd |= DMSGF_REVCIRC; 1830 1831 msg->any.head.magic = DMSG_HDR_MAGIC; 1832 msg->any.head.cmd = cmd; 1833 msg->any.head.msgid = state->msgid; 1834 msg->any.head.circuit = pstate->msgid; 1835 msg->state = state; 1836 1837 return (msg); 1838 } 1839 1840 void 1841 kdmsg_msg_free(kdmsg_msg_t *msg) 1842 { 1843 kdmsg_iocom_t *iocom = msg->state->iocom; 1844 kdmsg_state_t *state; 1845 1846 if ((msg->flags & KDMSG_FLAG_AUXALLOC) && 1847 msg->aux_data && msg->aux_size) { 1848 kfree(msg->aux_data, iocom->mmsg); 1849 msg->aux_data = NULL; 1850 msg->flags &= ~KDMSG_FLAG_AUXALLOC; 1851 } 1852 if ((state = msg->state) != NULL) { 1853 msg->state = NULL; 1854 kdmsg_state_drop(state); /* msg->state */ 1855 } 1856 msg->aux_data = NULL; 1857 msg->aux_size = 0; 1858 1859 kfree(msg, iocom->mmsg); 1860 } 1861 1862 void 1863 kdmsg_detach_aux_data(kdmsg_msg_t *msg, kdmsg_data_t *data) 1864 { 1865 if (msg->flags & KDMSG_FLAG_AUXALLOC) { 1866 data->aux_data = msg->aux_data; 1867 data->aux_size = msg->aux_size; 1868 data->iocom = msg->state->iocom; 1869 msg->flags &= ~KDMSG_FLAG_AUXALLOC; 1870 } else { 1871 data->aux_data = NULL; 1872 data->aux_size = 0; 1873 data->iocom = msg->state->iocom; 1874 } 1875 } 1876 1877 void 1878 kdmsg_free_aux_data(kdmsg_data_t *data) 1879 { 1880 if (data->aux_data) { 1881 kfree(data->aux_data, data->iocom->mmsg); 1882 data->aux_data = NULL; 1883 } 1884 } 1885 1886 /* 1887 * Indexed messages are stored in a red-black tree indexed by their 1888 * msgid. Only persistent messages are indexed. 1889 */ 1890 int 1891 kdmsg_state_cmp(kdmsg_state_t *state1, kdmsg_state_t *state2) 1892 { 1893 if (state1->iocom < state2->iocom) 1894 return(-1); 1895 if (state1->iocom > state2->iocom) 1896 return(1); 1897 if (state1->msgid < state2->msgid) 1898 return(-1); 1899 if (state1->msgid > state2->msgid) 1900 return(1); 1901 return(0); 1902 } 1903 1904 /* 1905 * Write a message. All requisit command flags have been set. 1906 * 1907 * If msg->state is non-NULL the message is written to the existing 1908 * transaction. msgid will be set accordingly. 1909 * 1910 * If msg->state is NULL and CREATE is set new state is allocated and 1911 * (func, data) is installed. A msgid is assigned. 1912 * 1913 * If msg->state is NULL and CREATE is not set the message is assumed 1914 * to be a one-way message. The originator must assign the msgid 1915 * (or leave it 0, which is typical. 1916 * 1917 * This function merely queues the message to the management thread, it 1918 * does not write to the message socket/pipe. 1919 */ 1920 void 1921 kdmsg_msg_write(kdmsg_msg_t *msg) 1922 { 1923 kdmsg_iocom_t *iocom = msg->state->iocom; 1924 1925 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 1926 kdmsg_msg_write_locked(iocom, msg); 1927 lockmgr(&iocom->msglk, LK_RELEASE); 1928 } 1929 1930 static void 1931 kdmsg_msg_write_locked(kdmsg_iocom_t *iocom, kdmsg_msg_t *msg) 1932 { 1933 kdmsg_state_t *state; 1934 1935 if (msg->state) { 1936 /* 1937 * Continuance or termination of existing transaction. 1938 * The transaction could have been initiated by either end. 1939 * 1940 * (Function callback and aux data for the receive side can 1941 * be replaced or left alone). 1942 */ 1943 state = msg->state; 1944 msg->any.head.msgid = state->msgid; 1945 } else { 1946 /* 1947 * One-off message (always uses msgid 0 to distinguish 1948 * between a possibly lost in-transaction message due to 1949 * competing aborts and a real one-off message?) 1950 */ 1951 state = NULL; 1952 msg->any.head.msgid = 0; 1953 } 1954 1955 /* 1956 * For stateful messages, if the circuit is dead or dying we have 1957 * to abort the potentially newly-created state and discard the 1958 * message. 1959 * 1960 * - We must discard the message because the other end will not 1961 * be expecting any more messages over the dead or dying circuit 1962 * and might not be able to receive them. 1963 * 1964 * - We abort the state by simulating a failure to generate a fake 1965 * incoming DELETE. This will trigger the state callback and allow 1966 * the device to clean things up and reply, closing the outgoing 1967 * direction and allowing the state to be freed. 1968 * 1969 * This situation occurs quite often, particularly as SPANs stabilize. 1970 * End-points must do the right thing. 1971 */ 1972 if (state) { 1973 KKASSERT((state->txcmd & DMSGF_DELETE) == 0); 1974 if (state->flags & KDMSG_STATE_DYING) { 1975 #if 0 1976 if ((state->flags & KDMSG_STATE_DYING) || 1977 (state->parent->txcmd & DMSGF_DELETE) || 1978 (state->parent->flags & KDMSG_STATE_DYING)) { 1979 #endif 1980 kdio_printf(iocom, 4, 1981 "kdmsg_msg_write: Write to dying circuit " 1982 "state=%p " 1983 "ptxcmd=%08x prxcmd=%08x flags=%08x\n", 1984 state, 1985 state->parent->rxcmd, 1986 state->parent->txcmd, 1987 state->parent->flags); 1988 kdmsg_state_hold(state); 1989 kdmsg_state_msgtx(msg); 1990 kdmsg_state_cleanuptx(msg); 1991 kdmsg_state_drop(state); 1992 return; 1993 } 1994 } 1995 1996 /* 1997 * Finish up the msg fields. Note that msg->aux_size and the 1998 * aux_bytes stored in the message header represent the unaligned 1999 * (actual) bytes of data, but the buffer is sized to an aligned 2000 * size and the CRC is generated over the aligned length. 2001 */ 2002 msg->any.head.salt = /* (random << 8) | */ (iocom->msg_seq & 255); 2003 ++iocom->msg_seq; 2004 2005 if (msg->aux_data && msg->aux_size) { 2006 uint32_t abytes = DMSG_DOALIGN(msg->aux_size); 2007 2008 msg->any.head.aux_bytes = msg->aux_size; 2009 msg->any.head.aux_crc = iscsi_crc32(msg->aux_data, abytes); 2010 } 2011 msg->any.head.hdr_crc = 0; 2012 msg->any.head.hdr_crc = iscsi_crc32(msg->any.buf, msg->hdr_size); 2013 2014 /* 2015 * If termination races new message senders we must drain the 2016 * message immediately instead of queue it. 2017 */ 2018 if (iocom->flags & KDMSG_IOCOMF_EXITNOACC) 2019 kdmsg_drain_msg(msg); 2020 else 2021 TAILQ_INSERT_TAIL(&iocom->msgq, msg, qentry); 2022 2023 if (iocom->msg_ctl & KDMSG_CLUSTERCTL_SLEEPING) { 2024 atomic_clear_int(&iocom->msg_ctl, 2025 KDMSG_CLUSTERCTL_SLEEPING); 2026 wakeup(&iocom->msg_ctl); 2027 } 2028 } 2029 2030 /* 2031 * Reply to a message and terminate our side of the transaction. 2032 * 2033 * If msg->state is non-NULL we are replying to a one-way message. 2034 */ 2035 void 2036 kdmsg_msg_reply(kdmsg_msg_t *msg, uint32_t error) 2037 { 2038 kdmsg_state_t *state = msg->state; 2039 kdmsg_msg_t *nmsg; 2040 uint32_t cmd; 2041 2042 /* 2043 * Reply with a simple error code and terminate the transaction. 2044 */ 2045 cmd = DMSG_LNK_ERROR; 2046 2047 /* 2048 * Check if our direction has even been initiated yet, set CREATE. 2049 * 2050 * Check what direction this is (command or reply direction). Note 2051 * that txcmd might not have been initiated yet. 2052 * 2053 * If our direction has already been closed we just return without 2054 * doing anything. 2055 */ 2056 if (state != &state->iocom->state0) { 2057 if (state->txcmd & DMSGF_DELETE) 2058 return; 2059 if ((state->txcmd & DMSGF_CREATE) == 0) 2060 cmd |= DMSGF_CREATE; 2061 if (state->txcmd & DMSGF_REPLY) 2062 cmd |= DMSGF_REPLY; 2063 cmd |= DMSGF_DELETE; 2064 } else { 2065 if ((msg->any.head.cmd & DMSGF_REPLY) == 0) 2066 cmd |= DMSGF_REPLY; 2067 } 2068 2069 nmsg = kdmsg_msg_alloc(state, cmd, NULL, NULL); 2070 nmsg->any.head.error = error; 2071 kdmsg_msg_write(nmsg); 2072 } 2073 2074 /* 2075 * Reply to a message and continue our side of the transaction. 2076 * 2077 * If msg->state is non-NULL we are replying to a one-way message and this 2078 * function degenerates into the same as kdmsg_msg_reply(). 2079 */ 2080 void 2081 kdmsg_msg_result(kdmsg_msg_t *msg, uint32_t error) 2082 { 2083 kdmsg_state_t *state = msg->state; 2084 kdmsg_msg_t *nmsg; 2085 uint32_t cmd; 2086 2087 /* 2088 * Return a simple result code, do NOT terminate the transaction. 2089 */ 2090 cmd = DMSG_LNK_ERROR; 2091 2092 /* 2093 * Check if our direction has even been initiated yet, set CREATE. 2094 * 2095 * Check what direction this is (command or reply direction). Note 2096 * that txcmd might not have been initiated yet. 2097 * 2098 * If our direction has already been closed we just return without 2099 * doing anything. 2100 */ 2101 if (state != &state->iocom->state0) { 2102 if (state->txcmd & DMSGF_DELETE) 2103 return; 2104 if ((state->txcmd & DMSGF_CREATE) == 0) 2105 cmd |= DMSGF_CREATE; 2106 if (state->txcmd & DMSGF_REPLY) 2107 cmd |= DMSGF_REPLY; 2108 /* continuing transaction, do not set MSGF_DELETE */ 2109 } else { 2110 if ((msg->any.head.cmd & DMSGF_REPLY) == 0) 2111 cmd |= DMSGF_REPLY; 2112 } 2113 2114 nmsg = kdmsg_msg_alloc(state, cmd, NULL, NULL); 2115 nmsg->any.head.error = error; 2116 kdmsg_msg_write(nmsg); 2117 } 2118 2119 /* 2120 * Reply to a message and terminate our side of the transaction. 2121 * 2122 * If msg->state is non-NULL we are replying to a one-way message. 2123 */ 2124 void 2125 kdmsg_state_reply(kdmsg_state_t *state, uint32_t error) 2126 { 2127 kdmsg_msg_t *nmsg; 2128 uint32_t cmd; 2129 2130 /* 2131 * Reply with a simple error code and terminate the transaction. 2132 */ 2133 cmd = DMSG_LNK_ERROR; 2134 2135 /* 2136 * Check if our direction has even been initiated yet, set CREATE. 2137 * 2138 * Check what direction this is (command or reply direction). Note 2139 * that txcmd might not have been initiated yet. 2140 * 2141 * If our direction has already been closed we just return without 2142 * doing anything. 2143 */ 2144 KKASSERT(state); 2145 if (state->txcmd & DMSGF_DELETE) 2146 return; 2147 if ((state->txcmd & DMSGF_CREATE) == 0) 2148 cmd |= DMSGF_CREATE; 2149 if (state->txcmd & DMSGF_REPLY) 2150 cmd |= DMSGF_REPLY; 2151 cmd |= DMSGF_DELETE; 2152 2153 nmsg = kdmsg_msg_alloc(state, cmd, NULL, NULL); 2154 nmsg->any.head.error = error; 2155 kdmsg_msg_write(nmsg); 2156 } 2157 2158 /* 2159 * Reply to a message and continue our side of the transaction. 2160 * 2161 * If msg->state is non-NULL we are replying to a one-way message and this 2162 * function degenerates into the same as kdmsg_msg_reply(). 2163 */ 2164 void 2165 kdmsg_state_result(kdmsg_state_t *state, uint32_t error) 2166 { 2167 kdmsg_msg_t *nmsg; 2168 uint32_t cmd; 2169 2170 /* 2171 * Return a simple result code, do NOT terminate the transaction. 2172 */ 2173 cmd = DMSG_LNK_ERROR; 2174 2175 /* 2176 * Check if our direction has even been initiated yet, set CREATE. 2177 * 2178 * Check what direction this is (command or reply direction). Note 2179 * that txcmd might not have been initiated yet. 2180 * 2181 * If our direction has already been closed we just return without 2182 * doing anything. 2183 */ 2184 KKASSERT(state); 2185 if (state->txcmd & DMSGF_DELETE) 2186 return; 2187 if ((state->txcmd & DMSGF_CREATE) == 0) 2188 cmd |= DMSGF_CREATE; 2189 if (state->txcmd & DMSGF_REPLY) 2190 cmd |= DMSGF_REPLY; 2191 /* continuing transaction, do not set MSGF_DELETE */ 2192 2193 nmsg = kdmsg_msg_alloc(state, cmd, NULL, NULL); 2194 nmsg->any.head.error = error; 2195 kdmsg_msg_write(nmsg); 2196 } 2197