1 /*- 2 * Copyright (c) 2012 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * TODO: txcmd CREATE state is deferred by tx msgq, need to calculate 36 * a streaming response. See subr_diskiocom()'s diskiodone(). 37 */ 38 #include <sys/param.h> 39 #include <sys/types.h> 40 #include <sys/kernel.h> 41 #include <sys/conf.h> 42 #include <sys/systm.h> 43 #include <sys/queue.h> 44 #include <sys/tree.h> 45 #include <sys/malloc.h> 46 #include <sys/mount.h> 47 #include <sys/socket.h> 48 #include <sys/vnode.h> 49 #include <sys/sysctl.h> 50 #include <sys/file.h> 51 #include <sys/proc.h> 52 #include <sys/priv.h> 53 #include <sys/thread.h> 54 #include <sys/globaldata.h> 55 #include <sys/limits.h> 56 57 #include <sys/dmsg.h> 58 59 RB_GENERATE(kdmsg_state_tree, kdmsg_state, rbnode, kdmsg_state_cmp); 60 61 SYSCTL_NODE(, OID_AUTO, kdmsg, CTLFLAG_RW, 0, "kdmsg"); 62 static int kdmsg_debug = 1; 63 SYSCTL_INT(_kdmsg, OID_AUTO, debug, CTLFLAG_RW, &kdmsg_debug, 0, 64 "Set debug level for kernel dmsg layer"); 65 66 #define kd_printf(level, ctl, ...) \ 67 if (kdmsg_debug >= (level)) kprintf("kdmsg: " ctl, __VA_ARGS__) 68 69 #define kdio_printf(iocom, level, ctl, ...) \ 70 if (kdmsg_debug >= (level)) kprintf("kdmsg: " ctl, __VA_ARGS__) 71 72 static int kdmsg_msg_receive_handling(kdmsg_msg_t *msg); 73 static int kdmsg_state_msgrx(kdmsg_msg_t *msg); 74 static int kdmsg_state_msgtx(kdmsg_msg_t *msg); 75 static void kdmsg_msg_write_locked(kdmsg_iocom_t *iocom, kdmsg_msg_t *msg); 76 static void kdmsg_state_cleanuprx(kdmsg_msg_t *msg); 77 static void kdmsg_state_cleanuptx(kdmsg_msg_t *msg); 78 static void kdmsg_subq_delete(kdmsg_state_t *state); 79 static void kdmsg_simulate_failure(kdmsg_state_t *state, int meto, int error); 80 static void kdmsg_state_abort(kdmsg_state_t *state); 81 static void kdmsg_state_dying(kdmsg_state_t *state); 82 static void kdmsg_state_free(kdmsg_state_t *state); 83 84 #ifdef KDMSG_DEBUG 85 #define KDMSG_DEBUG_ARGS , const char *file, int line 86 #define kdmsg_state_hold(state) _kdmsg_state_hold(state, __FILE__, __LINE__) 87 #define kdmsg_state_drop(state) _kdmsg_state_drop(state, __FILE__, __LINE__) 88 #else 89 #define KDMSG_DEBUG_ARGS 90 #define kdmsg_state_hold(state) _kdmsg_state_hold(state) 91 #define kdmsg_state_drop(state) _kdmsg_state_drop(state) 92 #endif 93 static void _kdmsg_state_hold(kdmsg_state_t *state KDMSG_DEBUG_ARGS); 94 static void _kdmsg_state_drop(kdmsg_state_t *state KDMSG_DEBUG_ARGS); 95 96 static void kdmsg_iocom_thread_rd(void *arg); 97 static void kdmsg_iocom_thread_wr(void *arg); 98 static int kdmsg_autorxmsg(kdmsg_msg_t *msg); 99 100 /*static struct lwkt_token kdmsg_token = LWKT_TOKEN_INITIALIZER(kdmsg_token);*/ 101 102 /* 103 * Initialize the roll-up communications structure for a network 104 * messaging session. This function does not install the socket. 105 */ 106 void 107 kdmsg_iocom_init(kdmsg_iocom_t *iocom, void *handle, uint32_t flags, 108 struct malloc_type *mmsg, 109 int (*rcvmsg)(kdmsg_msg_t *msg)) 110 { 111 bzero(iocom, sizeof(*iocom)); 112 iocom->handle = handle; 113 iocom->mmsg = mmsg; 114 iocom->rcvmsg = rcvmsg; 115 iocom->flags = flags; 116 lockinit(&iocom->msglk, "h2msg", 0, 0); 117 TAILQ_INIT(&iocom->msgq); 118 RB_INIT(&iocom->staterd_tree); 119 RB_INIT(&iocom->statewr_tree); 120 121 iocom->state0.iocom = iocom; 122 iocom->state0.parent = &iocom->state0; 123 TAILQ_INIT(&iocom->state0.subq); 124 } 125 126 /* 127 * [Re]connect using the passed file pointer. The caller must ref the 128 * fp for us. We own that ref now. 129 */ 130 void 131 kdmsg_iocom_reconnect(kdmsg_iocom_t *iocom, struct file *fp, 132 const char *subsysname) 133 { 134 /* 135 * Destroy the current connection 136 */ 137 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 138 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLRX); 139 while (iocom->msgrd_td || iocom->msgwr_td) { 140 wakeup(&iocom->msg_ctl); 141 lksleep(iocom, &iocom->msglk, 0, "clstrkl", hz); 142 } 143 144 /* 145 * Drop communications descriptor 146 */ 147 if (iocom->msg_fp) { 148 fdrop(iocom->msg_fp); 149 iocom->msg_fp = NULL; 150 } 151 152 /* 153 * Setup new communications descriptor 154 */ 155 iocom->msg_ctl = 0; 156 iocom->msg_fp = fp; 157 iocom->msg_seq = 0; 158 iocom->flags &= ~KDMSG_IOCOMF_EXITNOACC; 159 160 lwkt_create(kdmsg_iocom_thread_rd, iocom, &iocom->msgrd_td, 161 NULL, 0, -1, "%s-msgrd", subsysname); 162 lwkt_create(kdmsg_iocom_thread_wr, iocom, &iocom->msgwr_td, 163 NULL, 0, -1, "%s-msgwr", subsysname); 164 lockmgr(&iocom->msglk, LK_RELEASE); 165 } 166 167 /* 168 * Caller sets up iocom->auto_lnk_conn and iocom->auto_lnk_span, then calls 169 * this function to handle the state machine for LNK_CONN and LNK_SPAN. 170 */ 171 static int kdmsg_lnk_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg); 172 static int kdmsg_lnk_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg); 173 174 void 175 kdmsg_iocom_autoinitiate(kdmsg_iocom_t *iocom, 176 void (*auto_callback)(kdmsg_msg_t *msg)) 177 { 178 kdmsg_msg_t *msg; 179 180 iocom->auto_callback = auto_callback; 181 182 msg = kdmsg_msg_alloc(&iocom->state0, 183 DMSG_LNK_CONN | DMSGF_CREATE, 184 kdmsg_lnk_conn_reply, NULL); 185 iocom->auto_lnk_conn.head = msg->any.head; 186 msg->any.lnk_conn = iocom->auto_lnk_conn; 187 iocom->conn_state = msg->state; 188 kdmsg_state_hold(msg->state); /* iocom->conn_state */ 189 kdmsg_msg_write(msg); 190 } 191 192 static 193 int 194 kdmsg_lnk_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg) 195 { 196 kdmsg_iocom_t *iocom = state->iocom; 197 kdmsg_msg_t *rmsg; 198 199 /* 200 * Upon receipt of the LNK_CONN acknowledgement initiate an 201 * automatic SPAN if we were asked to. Used by e.g. xdisk, but 202 * not used by HAMMER2 which must manage more than one transmitted 203 * SPAN. 204 */ 205 if ((msg->any.head.cmd & DMSGF_CREATE) && 206 (iocom->flags & KDMSG_IOCOMF_AUTOTXSPAN)) { 207 rmsg = kdmsg_msg_alloc(&iocom->state0, 208 DMSG_LNK_SPAN | DMSGF_CREATE, 209 kdmsg_lnk_span_reply, NULL); 210 iocom->auto_lnk_span.head = rmsg->any.head; 211 rmsg->any.lnk_span = iocom->auto_lnk_span; 212 kdmsg_msg_write(rmsg); 213 } 214 215 /* 216 * Process shim after the CONN is acknowledged and before the CONN 217 * transaction is deleted. For deletions this gives device drivers 218 * the ability to interlock new operations on the circuit before 219 * it becomes illegal and panics. 220 */ 221 if (iocom->auto_callback) 222 iocom->auto_callback(msg); 223 224 if ((state->txcmd & DMSGF_DELETE) == 0 && 225 (msg->any.head.cmd & DMSGF_DELETE)) { 226 /* 227 * iocom->conn_state has a state ref, drop it when clearing. 228 */ 229 if (iocom->conn_state) 230 kdmsg_state_drop(iocom->conn_state); 231 iocom->conn_state = NULL; 232 kdmsg_msg_reply(msg, 0); 233 } 234 235 return (0); 236 } 237 238 static 239 int 240 kdmsg_lnk_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg) 241 { 242 /* 243 * Be sure to process shim before terminating the SPAN 244 * transaction. Gives device drivers the ability to 245 * interlock new operations on the circuit before it 246 * becomes illegal and panics. 247 */ 248 if (state->iocom->auto_callback) 249 state->iocom->auto_callback(msg); 250 251 if ((state->txcmd & DMSGF_DELETE) == 0 && 252 (msg->any.head.cmd & DMSGF_DELETE)) { 253 kdmsg_msg_reply(msg, 0); 254 } 255 return (0); 256 } 257 258 /* 259 * Disconnect and clean up 260 */ 261 void 262 kdmsg_iocom_uninit(kdmsg_iocom_t *iocom) 263 { 264 kdmsg_state_t *state; 265 kdmsg_msg_t *msg; 266 int retries; 267 268 /* 269 * Ask the cluster controller to go away by setting 270 * KILLRX. Send a PING to get a response to unstick reading 271 * from the pipe. 272 * 273 * After 10 seconds shitcan the pipe and do an unclean shutdown. 274 */ 275 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 276 277 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLRX); 278 msg = kdmsg_msg_alloc(&iocom->state0, DMSG_LNK_PING, NULL, NULL); 279 kdmsg_msg_write_locked(iocom, msg); 280 281 retries = 10; 282 while (iocom->msgrd_td || iocom->msgwr_td) { 283 wakeup(&iocom->msg_ctl); 284 lksleep(iocom, &iocom->msglk, 0, "clstrkl", hz); 285 if (--retries == 0 && iocom->msg_fp) { 286 kdio_printf(iocom, 0, "%s\n", 287 "iocom_uninit: " 288 "shitcanning unresponsive pipe"); 289 fp_shutdown(iocom->msg_fp, SHUT_RDWR); 290 /* retries allowed to go negative, keep looping */ 291 } 292 } 293 294 /* 295 * Cleanup caches 296 */ 297 if ((state = iocom->freerd_state) != NULL) { 298 iocom->freerd_state = NULL; 299 kdmsg_state_drop(state); 300 } 301 302 if ((state = iocom->freewr_state) != NULL) { 303 iocom->freewr_state = NULL; 304 kdmsg_state_drop(state); 305 } 306 307 /* 308 * Drop communications descriptor 309 */ 310 if (iocom->msg_fp) { 311 fdrop(iocom->msg_fp); 312 iocom->msg_fp = NULL; 313 } 314 lockmgr(&iocom->msglk, LK_RELEASE); 315 } 316 317 /* 318 * Cluster controller thread. Perform messaging functions. We have one 319 * thread for the reader and one for the writer. The writer handles 320 * shutdown requests (which should break the reader thread). 321 */ 322 static 323 void 324 kdmsg_iocom_thread_rd(void *arg) 325 { 326 kdmsg_iocom_t *iocom = arg; 327 dmsg_hdr_t hdr; 328 kdmsg_msg_t *msg = NULL; 329 size_t hbytes; 330 size_t abytes; 331 int error = 0; 332 333 while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILLRX) == 0) { 334 /* 335 * Retrieve the message from the pipe or socket. 336 */ 337 error = fp_read(iocom->msg_fp, &hdr, sizeof(hdr), 338 NULL, 1, UIO_SYSSPACE); 339 if (error) 340 break; 341 if (hdr.magic != DMSG_HDR_MAGIC) { 342 kdio_printf(iocom, 1, "bad magic: %04x\n", hdr.magic); 343 error = EINVAL; 344 break; 345 } 346 hbytes = (hdr.cmd & DMSGF_SIZE) * DMSG_ALIGN; 347 if (hbytes < sizeof(hdr) || hbytes > DMSG_HDR_MAX) { 348 kdio_printf(iocom, 1, "bad header size %zd\n", hbytes); 349 error = EINVAL; 350 break; 351 } 352 353 /* XXX messy: mask cmd to avoid allocating state */ 354 msg = kdmsg_msg_alloc(&iocom->state0, 355 hdr.cmd & DMSGF_BASECMDMASK, 356 NULL, NULL); 357 msg->any.head = hdr; 358 msg->hdr_size = hbytes; 359 if (hbytes > sizeof(hdr)) { 360 error = fp_read(iocom->msg_fp, &msg->any.head + 1, 361 hbytes - sizeof(hdr), 362 NULL, 1, UIO_SYSSPACE); 363 if (error) { 364 kdio_printf(iocom, 1, "%s\n", 365 "short msg received"); 366 error = EINVAL; 367 break; 368 } 369 } 370 msg->aux_size = hdr.aux_bytes; 371 if (msg->aux_size > DMSG_AUX_MAX) { 372 kdio_printf(iocom, 1, 373 "illegal msg payload size %zd\n", 374 msg->aux_size); 375 error = EINVAL; 376 break; 377 } 378 if (msg->aux_size) { 379 abytes = DMSG_DOALIGN(msg->aux_size); 380 msg->aux_data = kmalloc(abytes, iocom->mmsg, M_WAITOK); 381 msg->flags |= KDMSG_FLAG_AUXALLOC; 382 error = fp_read(iocom->msg_fp, msg->aux_data, 383 abytes, NULL, 1, UIO_SYSSPACE); 384 if (error) { 385 kdio_printf(iocom, 1, "%s\n", 386 "short msg payload received"); 387 break; 388 } 389 } 390 391 error = kdmsg_msg_receive_handling(msg); 392 msg = NULL; 393 } 394 395 kdio_printf(iocom, 1, "read thread terminating error=%d\n", error); 396 397 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 398 if (msg) 399 kdmsg_msg_free(msg); 400 401 /* 402 * Shutdown the socket and set KILLRX for consistency in case the 403 * shutdown was not commanded. Signal the transmit side to shutdown 404 * by setting KILLTX and waking it up. 405 */ 406 fp_shutdown(iocom->msg_fp, SHUT_RDWR); 407 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLRX | 408 KDMSG_CLUSTERCTL_KILLTX); 409 iocom->msgrd_td = NULL; 410 lockmgr(&iocom->msglk, LK_RELEASE); 411 wakeup(&iocom->msg_ctl); 412 413 /* 414 * iocom can be ripped out at any time once the lock is 415 * released with msgrd_td set to NULL. The wakeup()s are safe but 416 * that is all. 417 */ 418 wakeup(iocom); 419 lwkt_exit(); 420 } 421 422 static 423 void 424 kdmsg_iocom_thread_wr(void *arg) 425 { 426 kdmsg_iocom_t *iocom = arg; 427 kdmsg_msg_t *msg; 428 ssize_t res; 429 size_t abytes; 430 int error = 0; 431 int save_ticks; 432 int didwarn; 433 434 /* 435 * Transmit loop 436 */ 437 msg = NULL; 438 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 439 440 while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILLTX) == 0 && error == 0) { 441 /* 442 * Sleep if no messages pending. Interlock with flag while 443 * holding msglk. 444 */ 445 if (TAILQ_EMPTY(&iocom->msgq)) { 446 atomic_set_int(&iocom->msg_ctl, 447 KDMSG_CLUSTERCTL_SLEEPING); 448 lksleep(&iocom->msg_ctl, &iocom->msglk, 0, "msgwr", hz); 449 atomic_clear_int(&iocom->msg_ctl, 450 KDMSG_CLUSTERCTL_SLEEPING); 451 } 452 453 while ((msg = TAILQ_FIRST(&iocom->msgq)) != NULL) { 454 /* 455 * Remove msg from the transmit queue and do 456 * persist and half-closed state handling. 457 */ 458 TAILQ_REMOVE(&iocom->msgq, msg, qentry); 459 460 error = kdmsg_state_msgtx(msg); 461 if (error == EALREADY) { 462 error = 0; 463 kdmsg_msg_free(msg); 464 continue; 465 } 466 if (error) { 467 kdmsg_msg_free(msg); 468 break; 469 } 470 471 /* 472 * Dump the message to the pipe or socket. 473 * 474 * We have to clean up the message as if the transmit 475 * succeeded even if it failed. 476 */ 477 lockmgr(&iocom->msglk, LK_RELEASE); 478 error = fp_write(iocom->msg_fp, &msg->any, 479 msg->hdr_size, &res, UIO_SYSSPACE); 480 if (error || res != msg->hdr_size) { 481 if (error == 0) 482 error = EINVAL; 483 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 484 kdmsg_state_cleanuptx(msg); 485 break; 486 } 487 if (msg->aux_size) { 488 abytes = DMSG_DOALIGN(msg->aux_size); 489 error = fp_write(iocom->msg_fp, 490 msg->aux_data, abytes, 491 &res, UIO_SYSSPACE); 492 if (error || res != abytes) { 493 if (error == 0) 494 error = EINVAL; 495 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 496 kdmsg_state_cleanuptx(msg); 497 break; 498 } 499 } 500 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 501 kdmsg_state_cleanuptx(msg); 502 } 503 } 504 505 kdio_printf(iocom, 1, "write thread terminating error=%d\n", error); 506 507 /* 508 * Shutdown the socket and set KILLTX for consistency in case the 509 * shutdown was not commanded. Signal the receive side to shutdown 510 * by setting KILLRX and waking it up. 511 */ 512 fp_shutdown(iocom->msg_fp, SHUT_RDWR); 513 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLRX | 514 KDMSG_CLUSTERCTL_KILLTX); 515 wakeup(&iocom->msg_ctl); 516 517 /* 518 * The transmit thread is responsible for final cleanups, wait 519 * for the receive side to terminate to prevent new received 520 * states from interfering with our cleanup. 521 * 522 * Do not set msgwr_td to NULL until we actually exit. 523 */ 524 while (iocom->msgrd_td) { 525 wakeup(&iocom->msg_ctl); 526 lksleep(iocom, &iocom->msglk, 0, "clstrkt", hz); 527 } 528 529 /* 530 * We can no longer receive new messages. We must drain the transmit 531 * message queue and simulate received messages to close anay remaining 532 * states. 533 * 534 * Loop until all the states are gone and there are no messages 535 * pending transmit. 536 */ 537 save_ticks = ticks; 538 didwarn = 0; 539 540 while (TAILQ_FIRST(&iocom->msgq) || 541 RB_ROOT(&iocom->staterd_tree) || 542 RB_ROOT(&iocom->statewr_tree)) { 543 /* 544 * Simulate failure for all sub-states of state0. 545 */ 546 kdmsg_drain_msgq(iocom); 547 kdio_printf(iocom, 2, "%s\n", 548 "simulate failure for all substates of state0"); 549 kdmsg_simulate_failure(&iocom->state0, 0, DMSG_ERR_LOSTLINK); 550 551 lksleep(iocom, &iocom->msglk, 0, "clstrtk", hz / 2); 552 553 if ((int)(ticks - save_ticks) > hz*2 && didwarn == 0) { 554 didwarn = 1; 555 kdio_printf(iocom, 0, 556 "Warning, write thread on %p " 557 "still terminating\n", 558 iocom); 559 } 560 if ((int)(ticks - save_ticks) > hz*15 && didwarn == 1) { 561 didwarn = 2; 562 kdio_printf(iocom, 0, 563 "Warning, write thread on %p " 564 "still terminating\n", 565 iocom); 566 } 567 if ((int)(ticks - save_ticks) > hz*60) { 568 kdio_printf(iocom, 0, 569 "Can't terminate: msgq %p " 570 "rd_tree %p wr_tree %p\n", 571 TAILQ_FIRST(&iocom->msgq), 572 RB_ROOT(&iocom->staterd_tree), 573 RB_ROOT(&iocom->statewr_tree)); 574 lksleep(iocom, &iocom->msglk, 0, "clstrtk", hz * 10); 575 } 576 } 577 578 /* 579 * Exit handling is done by the write thread. 580 */ 581 iocom->flags |= KDMSG_IOCOMF_EXITNOACC; 582 lockmgr(&iocom->msglk, LK_RELEASE); 583 584 /* 585 * The state trees had better be empty now 586 */ 587 KKASSERT(RB_EMPTY(&iocom->staterd_tree)); 588 KKASSERT(RB_EMPTY(&iocom->statewr_tree)); 589 KKASSERT(iocom->conn_state == NULL); 590 591 if (iocom->exit_func) { 592 /* 593 * iocom is invalid after we call the exit function. 594 */ 595 iocom->msgwr_td = NULL; 596 iocom->exit_func(iocom); 597 } else { 598 /* 599 * iocom can be ripped out from under us once msgwr_td is 600 * set to NULL. The wakeup is safe. 601 */ 602 iocom->msgwr_td = NULL; 603 wakeup(iocom); 604 } 605 lwkt_exit(); 606 } 607 608 /* 609 * This cleans out the pending transmit message queue, adjusting any 610 * persistent states properly in the process. 611 * 612 * Called with iocom locked. 613 */ 614 void 615 kdmsg_drain_msgq(kdmsg_iocom_t *iocom) 616 { 617 kdmsg_msg_t *msg; 618 619 /* 620 * Clean out our pending transmit queue, executing the 621 * appropriate state adjustments. If this tries to open 622 * any new outgoing transactions we have to loop up and 623 * clean them out. 624 */ 625 while ((msg = TAILQ_FIRST(&iocom->msgq)) != NULL) { 626 TAILQ_REMOVE(&iocom->msgq, msg, qentry); 627 if (kdmsg_state_msgtx(msg)) 628 kdmsg_msg_free(msg); 629 else 630 kdmsg_state_cleanuptx(msg); 631 } 632 } 633 634 /* 635 * Do all processing required to handle a freshly received message 636 * after its low level header has been validated. 637 * 638 * iocom is not locked. 639 */ 640 static 641 int 642 kdmsg_msg_receive_handling(kdmsg_msg_t *msg) 643 { 644 kdmsg_iocom_t *iocom = msg->state->iocom; 645 int error; 646 647 /* 648 * State machine tracking, state assignment for msg, 649 * returns error and discard status. Errors are fatal 650 * to the connection except for EALREADY which forces 651 * a discard without execution. 652 */ 653 error = kdmsg_state_msgrx(msg); 654 if (msg->state->flags & KDMSG_STATE_ABORTING) { 655 kdio_printf(iocom, 5, 656 "kdmsg_state_abort(b): state %p rxcmd=%08x " 657 "txcmd=%08x msgrx error %d\n", 658 msg->state, msg->state->rxcmd, 659 msg->state->txcmd, error); 660 } 661 if (error) { 662 /* 663 * Raw protocol or connection error 664 */ 665 if (msg->state->flags & KDMSG_STATE_ABORTING) 666 kdio_printf(iocom, 5, 667 "X1 state %p error %d\n", 668 msg->state, error); 669 kdmsg_msg_free(msg); 670 if (error == EALREADY) 671 error = 0; 672 } else if (msg->state && msg->state->func) { 673 /* 674 * Message related to state which already has a 675 * handling function installed for it. 676 */ 677 if (msg->state->flags & KDMSG_STATE_ABORTING) 678 kdio_printf(iocom, 5, 679 "X2 state %p func %p\n", 680 msg->state, msg->state->func); 681 error = msg->state->func(msg->state, msg); 682 kdmsg_state_cleanuprx(msg); 683 } else if (iocom->flags & KDMSG_IOCOMF_AUTOANY) { 684 if (msg->state->flags & KDMSG_STATE_ABORTING) 685 kdio_printf(iocom, 5, 686 "X3 state %p\n", msg->state); 687 error = kdmsg_autorxmsg(msg); 688 kdmsg_state_cleanuprx(msg); 689 } else { 690 if (msg->state->flags & KDMSG_STATE_ABORTING) 691 kdio_printf(iocom, 5, 692 "X4 state %p\n", msg->state); 693 error = iocom->rcvmsg(msg); 694 kdmsg_state_cleanuprx(msg); 695 } 696 return error; 697 } 698 699 /* 700 * Process state tracking for a message after reception and dequeueing, 701 * prior to execution of the state callback. The state is updated and 702 * will be removed from the RBTREE if completely closed, but the state->parent 703 * and subq linkage is not cleaned up until after the callback (see 704 * cleanuprx()). 705 * 706 * msglk is not held. 707 * 708 * NOTE: A message transaction can consist of several messages in either 709 * direction. 710 * 711 * NOTE: The msgid is unique to the initiator, not necessarily unique for 712 * us or for any relay or for the return direction for that matter. 713 * That is, two sides sending a new message can use the same msgid 714 * without colliding. 715 * 716 * -- 717 * 718 * ABORT sequences work by setting the ABORT flag along with normal message 719 * state. However, ABORTs can also be sent on half-closed messages, that is 720 * even if the command or reply side has already sent a DELETE, as long as 721 * the message has not been fully closed it can still send an ABORT+DELETE 722 * to terminate the half-closed message state. 723 * 724 * Since ABORT+DELETEs can race we silently discard ABORT's for message 725 * state which has already been fully closed. REPLY+ABORT+DELETEs can 726 * also race, and in this situation the other side might have already 727 * initiated a new unrelated command with the same message id. Since 728 * the abort has not set the CREATE flag the situation can be detected 729 * and the message will also be discarded. 730 * 731 * Non-blocking requests can be initiated with ABORT+CREATE[+DELETE]. 732 * The ABORT request is essentially integrated into the command instead 733 * of being sent later on. In this situation the command implementation 734 * detects that CREATE and ABORT are both set (vs ABORT alone) and can 735 * special-case non-blocking operation for the command. 736 * 737 * NOTE! Messages with ABORT set without CREATE or DELETE are considered 738 * to be mid-stream aborts for command/reply sequences. ABORTs on 739 * one-way messages are not supported. 740 * 741 * NOTE! If a command sequence does not support aborts the ABORT flag is 742 * simply ignored. 743 * 744 * -- 745 * 746 * One-off messages (no reply expected) are sent with neither CREATE or DELETE 747 * set. One-off messages cannot be aborted and typically aren't processed 748 * by these routines. The REPLY bit can be used to distinguish whether a 749 * one-off message is a command or reply. For example, one-off replies 750 * will typically just contain status updates. 751 */ 752 static 753 int 754 kdmsg_state_msgrx(kdmsg_msg_t *msg) 755 { 756 kdmsg_iocom_t *iocom = msg->state->iocom; 757 kdmsg_state_t *state; 758 kdmsg_state_t *pstate; 759 kdmsg_state_t sdummy; 760 int error; 761 762 bzero(&sdummy, sizeof(sdummy)); /* avoid gcc warnings */ 763 764 /* 765 * Make sure a state structure is ready to go in case we need a new 766 * one. This is the only routine which uses freerd_state so no 767 * races are possible. 768 */ 769 if ((state = iocom->freerd_state) == NULL) { 770 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO); 771 state->flags = KDMSG_STATE_DYNAMIC; 772 state->iocom = iocom; 773 state->refs = 1; 774 TAILQ_INIT(&state->subq); 775 iocom->freerd_state = state; 776 } 777 state = NULL; /* safety */ 778 779 /* 780 * Lock RB tree and locate existing persistent state, if any. 781 * 782 * If received msg is a command state is on staterd_tree. 783 * If received msg is a reply state is on statewr_tree. 784 */ 785 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 786 787 again: 788 if (msg->state == &iocom->state0) { 789 sdummy.msgid = msg->any.head.msgid; 790 sdummy.iocom = iocom; 791 if (msg->any.head.cmd & DMSGF_REVTRANS) { 792 state = RB_FIND(kdmsg_state_tree, &iocom->statewr_tree, 793 &sdummy); 794 } else { 795 state = RB_FIND(kdmsg_state_tree, &iocom->staterd_tree, 796 &sdummy); 797 } 798 799 /* 800 * Set message state unconditionally. If this is a CREATE 801 * message this state will become the parent state and new 802 * state will be allocated for the message state. 803 */ 804 if (state == NULL) 805 state = &iocom->state0; 806 if (state->flags & KDMSG_STATE_INTERLOCK) { 807 state->flags |= KDMSG_STATE_SIGNAL; 808 lksleep(state, &iocom->msglk, 0, "dmrace", hz); 809 goto again; 810 } 811 kdmsg_state_hold(state); 812 kdmsg_state_drop(msg->state); /* iocom->state0 */ 813 msg->state = state; 814 } else { 815 state = msg->state; 816 } 817 818 /* 819 * Short-cut one-off or mid-stream messages. 820 */ 821 if ((msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE | 822 DMSGF_ABORT)) == 0) { 823 error = 0; 824 goto done; 825 } 826 827 /* 828 * Switch on CREATE, DELETE, REPLY, and also handle ABORT from 829 * inside the case statements. 830 */ 831 switch(msg->any.head.cmd & (DMSGF_CREATE|DMSGF_DELETE|DMSGF_REPLY)) { 832 case DMSGF_CREATE: 833 case DMSGF_CREATE | DMSGF_DELETE: 834 /* 835 * New persistant command received. 836 */ 837 if (state != &iocom->state0) { 838 kdio_printf(iocom, 1, "%s\n", 839 "duplicate transaction"); 840 error = EINVAL; 841 break; 842 } 843 844 /* 845 * Lookup the circuit. The circuit is an open transaction. 846 * the REVCIRC bit in the message tells us which side 847 * initiated the transaction representing the circuit. 848 */ 849 if (msg->any.head.circuit) { 850 sdummy.msgid = msg->any.head.circuit; 851 852 if (msg->any.head.cmd & DMSGF_REVCIRC) { 853 pstate = RB_FIND(kdmsg_state_tree, 854 &iocom->statewr_tree, 855 &sdummy); 856 } else { 857 pstate = RB_FIND(kdmsg_state_tree, 858 &iocom->staterd_tree, 859 &sdummy); 860 } 861 if (pstate == NULL) { 862 kdio_printf(iocom, 1, "%s\n", 863 "missing parent in " 864 "stacked trans"); 865 error = EINVAL; 866 break; 867 } 868 } else { 869 pstate = &iocom->state0; 870 } 871 872 /* 873 * Allocate new state. 874 * 875 * msg->state becomes the owner of the ref we inherit from 876 * freerd_stae. 877 */ 878 kdmsg_state_drop(state); 879 state = iocom->freerd_state; 880 iocom->freerd_state = NULL; 881 882 msg->state = state; /* inherits freerd ref */ 883 state->parent = pstate; 884 KKASSERT(state->iocom == iocom); 885 state->flags |= KDMSG_STATE_RBINSERTED | 886 KDMSG_STATE_SUBINSERTED | 887 KDMSG_STATE_OPPOSITE; 888 if (TAILQ_EMPTY(&pstate->subq)) 889 kdmsg_state_hold(pstate);/* states on pstate->subq */ 890 kdmsg_state_hold(state); /* state on pstate->subq */ 891 kdmsg_state_hold(state); /* state on rbtree */ 892 state->icmd = msg->any.head.cmd & DMSGF_BASECMDMASK; 893 state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE; 894 state->txcmd = DMSGF_REPLY; 895 state->msgid = msg->any.head.msgid; 896 state->flags &= ~KDMSG_STATE_NEW; 897 RB_INSERT(kdmsg_state_tree, &iocom->staterd_tree, state); 898 TAILQ_INSERT_TAIL(&pstate->subq, state, entry); 899 error = 0; 900 break; 901 case DMSGF_DELETE: 902 /* 903 * Persistent state is expected but might not exist if an 904 * ABORT+DELETE races the close. 905 */ 906 if (state == &iocom->state0) { 907 if (msg->any.head.cmd & DMSGF_ABORT) { 908 kdio_printf(iocom, 1, "%s\n", 909 "msgrx: " 910 "state already A"); 911 error = EALREADY; 912 } else { 913 kdio_printf(iocom, 1, "%s\n", 914 "msgrx: no state for DELETE"); 915 error = EINVAL; 916 } 917 break; 918 } 919 920 /* 921 * Handle another ABORT+DELETE case if the msgid has already 922 * been reused. 923 */ 924 if ((state->rxcmd & DMSGF_CREATE) == 0) { 925 if (msg->any.head.cmd & DMSGF_ABORT) { 926 kdio_printf(iocom, 1, "%s\n", 927 "msgrx: state already B"); 928 error = EALREADY; 929 } else { 930 kdio_printf(iocom, 1, "%s\n", 931 "msgrx: state reused for DELETE"); 932 error = EINVAL; 933 } 934 break; 935 } 936 error = 0; 937 break; 938 default: 939 /* 940 * Check for mid-stream ABORT command received, otherwise 941 * allow. 942 */ 943 if (msg->any.head.cmd & DMSGF_ABORT) { 944 if (state == &iocom->state0 || 945 (state->rxcmd & DMSGF_CREATE) == 0) { 946 error = EALREADY; 947 break; 948 } 949 } 950 error = 0; 951 break; 952 case DMSGF_REPLY | DMSGF_CREATE: 953 case DMSGF_REPLY | DMSGF_CREATE | DMSGF_DELETE: 954 /* 955 * When receiving a reply with CREATE set the original 956 * persistent state message should already exist. 957 */ 958 if (state == &iocom->state0) { 959 kdio_printf(iocom, 1, 960 "msgrx: no state match for " 961 "REPLY cmd=%08x msgid=%016jx\n", 962 msg->any.head.cmd, 963 (intmax_t)msg->any.head.msgid); 964 error = EINVAL; 965 break; 966 } 967 state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE; 968 error = 0; 969 break; 970 case DMSGF_REPLY | DMSGF_DELETE: 971 /* 972 * Received REPLY+ABORT+DELETE in case where msgid has 973 * already been fully closed, ignore the message. 974 */ 975 if (state == &iocom->state0) { 976 if (msg->any.head.cmd & DMSGF_ABORT) { 977 error = EALREADY; 978 } else { 979 kdio_printf(iocom, 1, "%s\n", 980 "msgrx: no state match " 981 "for REPLY|DELETE"); 982 error = EINVAL; 983 } 984 break; 985 } 986 987 /* 988 * Received REPLY+ABORT+DELETE in case where msgid has 989 * already been reused for an unrelated message, 990 * ignore the message. 991 */ 992 if ((state->rxcmd & DMSGF_CREATE) == 0) { 993 if (msg->any.head.cmd & DMSGF_ABORT) { 994 error = EALREADY; 995 } else { 996 kdio_printf(iocom, 1, "%s\n", 997 "msgrx: state reused " 998 "for REPLY|DELETE"); 999 error = EINVAL; 1000 } 1001 break; 1002 } 1003 error = 0; 1004 break; 1005 case DMSGF_REPLY: 1006 /* 1007 * Check for mid-stream ABORT reply received to sent command. 1008 */ 1009 if (msg->any.head.cmd & DMSGF_ABORT) { 1010 if (state == &iocom->state0 || 1011 (state->rxcmd & DMSGF_CREATE) == 0) { 1012 error = EALREADY; 1013 break; 1014 } 1015 } 1016 error = 0; 1017 break; 1018 } 1019 1020 /* 1021 * Calculate the easy-switch() transactional command. Represents 1022 * the outer-transaction command for any transaction-create or 1023 * transaction-delete, and the inner message command for any 1024 * non-transaction or inside-transaction command. tcmd will be 1025 * set to 0 if the message state is illegal. 1026 * 1027 * The two can be told apart because outer-transaction commands 1028 * always have a DMSGF_CREATE and/or DMSGF_DELETE flag. 1029 */ 1030 done: 1031 if (msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE)) { 1032 if (state != &iocom->state0) { 1033 msg->tcmd = (msg->state->icmd & DMSGF_BASECMDMASK) | 1034 (msg->any.head.cmd & (DMSGF_CREATE | 1035 DMSGF_DELETE | 1036 DMSGF_REPLY)); 1037 } else { 1038 msg->tcmd = 0; 1039 } 1040 } else { 1041 msg->tcmd = msg->any.head.cmd & DMSGF_CMDSWMASK; 1042 } 1043 1044 /* 1045 * Adjust the state for DELETE handling now, before making the 1046 * callback so we are atomic with other state updates. 1047 * 1048 * Subq/parent linkages are cleaned up after the callback. 1049 * If an error occurred the message is ignored and state is not 1050 * updated. 1051 */ 1052 if ((state = msg->state) == NULL || error != 0) { 1053 kdio_printf(iocom, 1, 1054 "msgrx: state=%p error %d\n", 1055 state, error); 1056 } else if (msg->any.head.cmd & DMSGF_DELETE) { 1057 KKASSERT((state->rxcmd & DMSGF_DELETE) == 0); 1058 state->rxcmd |= DMSGF_DELETE; 1059 if (state->txcmd & DMSGF_DELETE) { 1060 KKASSERT(state->flags & KDMSG_STATE_RBINSERTED); 1061 if (state->rxcmd & DMSGF_REPLY) { 1062 KKASSERT(msg->any.head.cmd & 1063 DMSGF_REPLY); 1064 RB_REMOVE(kdmsg_state_tree, 1065 &iocom->statewr_tree, state); 1066 } else { 1067 KKASSERT((msg->any.head.cmd & 1068 DMSGF_REPLY) == 0); 1069 RB_REMOVE(kdmsg_state_tree, 1070 &iocom->staterd_tree, state); 1071 } 1072 state->flags &= ~KDMSG_STATE_RBINSERTED; 1073 kdmsg_state_drop(state); /* state on rbtree */ 1074 } 1075 } 1076 lockmgr(&iocom->msglk, LK_RELEASE); 1077 1078 return (error); 1079 } 1080 1081 /* 1082 * Called instead of iocom->rcvmsg() if any of the AUTO flags are set. 1083 * This routine must call iocom->rcvmsg() for anything not automatically 1084 * handled. 1085 */ 1086 static int 1087 kdmsg_autorxmsg(kdmsg_msg_t *msg) 1088 { 1089 kdmsg_iocom_t *iocom = msg->state->iocom; 1090 kdmsg_msg_t *rep; 1091 int error = 0; 1092 uint32_t cmd; 1093 1094 /* 1095 * Main switch processes transaction create/delete sequences only. 1096 * Use icmd (DELETEs use DMSG_LNK_ERROR 1097 * 1098 * NOTE: If processing in-transaction messages you generally want 1099 * an inner switch on msg->any.head.cmd. 1100 */ 1101 if (msg->state) { 1102 cmd = (msg->state->icmd & DMSGF_BASECMDMASK) | 1103 (msg->any.head.cmd & (DMSGF_CREATE | 1104 DMSGF_DELETE | 1105 DMSGF_REPLY)); 1106 } else { 1107 cmd = 0; 1108 } 1109 1110 switch(cmd) { 1111 case DMSG_LNK_PING: 1112 /* 1113 * Received ping, send reply 1114 */ 1115 rep = kdmsg_msg_alloc(msg->state, DMSG_LNK_PING | DMSGF_REPLY, 1116 NULL, NULL); 1117 kdmsg_msg_write(rep); 1118 break; 1119 case DMSG_LNK_PING | DMSGF_REPLY: 1120 /* ignore replies */ 1121 break; 1122 case DMSG_LNK_CONN | DMSGF_CREATE: 1123 case DMSG_LNK_CONN | DMSGF_CREATE | DMSGF_DELETE: 1124 /* 1125 * Received LNK_CONN transaction. Transmit response and 1126 * leave transaction open, which allows the other end to 1127 * start to the SPAN protocol. 1128 * 1129 * Handle shim after acknowledging the CONN. 1130 */ 1131 if ((msg->any.head.cmd & DMSGF_DELETE) == 0) { 1132 if (iocom->flags & KDMSG_IOCOMF_AUTOCONN) { 1133 kdmsg_msg_result(msg, 0); 1134 if (iocom->auto_callback) 1135 iocom->auto_callback(msg); 1136 } else { 1137 error = iocom->rcvmsg(msg); 1138 } 1139 break; 1140 } 1141 /* fall through */ 1142 case DMSG_LNK_CONN | DMSGF_DELETE: 1143 /* 1144 * This message is usually simulated after a link is lost 1145 * to clean up the transaction. 1146 */ 1147 if (iocom->flags & KDMSG_IOCOMF_AUTOCONN) { 1148 if (iocom->auto_callback) 1149 iocom->auto_callback(msg); 1150 kdmsg_msg_reply(msg, 0); 1151 } else { 1152 error = iocom->rcvmsg(msg); 1153 } 1154 break; 1155 case DMSG_LNK_SPAN | DMSGF_CREATE: 1156 case DMSG_LNK_SPAN | DMSGF_CREATE | DMSGF_DELETE: 1157 /* 1158 * Received LNK_SPAN transaction. We do not have to respond 1159 * (except on termination), but we must leave the transaction 1160 * open. 1161 * 1162 * Handle shim after acknowledging the SPAN. 1163 */ 1164 if (iocom->flags & KDMSG_IOCOMF_AUTORXSPAN) { 1165 if ((msg->any.head.cmd & DMSGF_DELETE) == 0) { 1166 if (iocom->auto_callback) 1167 iocom->auto_callback(msg); 1168 break; 1169 } 1170 /* fall through */ 1171 } else { 1172 error = iocom->rcvmsg(msg); 1173 break; 1174 } 1175 /* fall through */ 1176 case DMSG_LNK_SPAN | DMSGF_DELETE: 1177 /* 1178 * Process shims (auto_callback) before cleaning up the 1179 * circuit structure and closing the transactions. Device 1180 * driver should ensure that the circuit is not used after 1181 * the auto_callback() returns. 1182 * 1183 * Handle shim before closing the SPAN transaction. 1184 */ 1185 if (iocom->flags & KDMSG_IOCOMF_AUTORXSPAN) { 1186 if (iocom->auto_callback) 1187 iocom->auto_callback(msg); 1188 kdmsg_msg_reply(msg, 0); 1189 } else { 1190 error = iocom->rcvmsg(msg); 1191 } 1192 break; 1193 default: 1194 /* 1195 * Anything unhandled goes into rcvmsg. 1196 * 1197 * NOTE: Replies to link-level messages initiated by our side 1198 * are handled by the state callback, they are NOT 1199 * handled here. 1200 */ 1201 error = iocom->rcvmsg(msg); 1202 break; 1203 } 1204 return (error); 1205 } 1206 1207 /* 1208 * Post-receive-handling message and state cleanup. This routine is called 1209 * after the state function handling/callback to properly dispose of the 1210 * message and unlink the state's parent/subq linkage if the state is 1211 * completely closed. 1212 * 1213 * msglk is not held. 1214 */ 1215 static 1216 void 1217 kdmsg_state_cleanuprx(kdmsg_msg_t *msg) 1218 { 1219 kdmsg_state_t *state = msg->state; 1220 kdmsg_iocom_t *iocom = state->iocom; 1221 1222 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 1223 if (state != &iocom->state0) { 1224 /* 1225 * When terminating a transaction (in either direction), all 1226 * sub-states are aborted. 1227 */ 1228 if ((msg->any.head.cmd & DMSGF_DELETE) && 1229 TAILQ_FIRST(&msg->state->subq)) { 1230 kdio_printf(iocom, 2, 1231 "simulate failure for substates of " 1232 "state %p cmd %08x/%08x\n", 1233 msg->state, 1234 msg->state->rxcmd, 1235 msg->state->txcmd); 1236 kdmsg_simulate_failure(msg->state, 1237 0, DMSG_ERR_LOSTLINK); 1238 } 1239 1240 /* 1241 * Once the state is fully closed we can (try to) remove it 1242 * from the subq topology. 1243 */ 1244 if ((state->flags & KDMSG_STATE_SUBINSERTED) && 1245 (state->rxcmd & DMSGF_DELETE) && 1246 (state->txcmd & DMSGF_DELETE)) { 1247 /* 1248 * Remove parent linkage if state is completely closed. 1249 */ 1250 kdmsg_subq_delete(state); 1251 } 1252 } 1253 kdmsg_msg_free(msg); 1254 1255 lockmgr(&iocom->msglk, LK_RELEASE); 1256 } 1257 1258 /* 1259 * Remove state from its parent's subq. This can wind up recursively 1260 * dropping the parent upward. 1261 * 1262 * NOTE: Once we drop the parent, our pstate pointer may become invalid. 1263 */ 1264 static 1265 void 1266 kdmsg_subq_delete(kdmsg_state_t *state) 1267 { 1268 kdmsg_state_t *pstate; 1269 1270 if (state->flags & KDMSG_STATE_SUBINSERTED) { 1271 pstate = state->parent; 1272 KKASSERT(pstate); 1273 if (pstate->scan == state) 1274 pstate->scan = NULL; 1275 TAILQ_REMOVE(&pstate->subq, state, entry); 1276 state->flags &= ~KDMSG_STATE_SUBINSERTED; 1277 state->parent = NULL; 1278 if (TAILQ_EMPTY(&pstate->subq)) { 1279 kdmsg_state_drop(pstate);/* pstate->subq */ 1280 } 1281 pstate = NULL; /* safety */ 1282 kdmsg_state_drop(state); /* pstate->subq */ 1283 } else { 1284 KKASSERT(state->parent == NULL); 1285 } 1286 } 1287 1288 /* 1289 * Simulate receiving a message which terminates an active transaction 1290 * state. Our simulated received message must set DELETE and may also 1291 * have to set CREATE. It must also ensure that all fields are set such 1292 * that the receive handling code can find the state (kdmsg_state_msgrx()) 1293 * or an endless loop will ensue. 1294 * 1295 * This is used when the other end of the link is dead so the device driver 1296 * gets a completed transaction for all pending states. 1297 * 1298 * Called with iocom locked. 1299 */ 1300 static 1301 void 1302 kdmsg_simulate_failure(kdmsg_state_t *state, int meto, int error) 1303 { 1304 kdmsg_state_t *substate; 1305 1306 kdmsg_state_hold(state); /* aborting */ 1307 1308 /* 1309 * Abort parent state first. Parent will not actually disappear 1310 * until children are gone. Device drivers must handle the situation. 1311 * The advantage of this is that device drivers can flag the situation 1312 * as an interlock against new operations on dying states. And since 1313 * device operations are often asynchronous anyway, this sequence of 1314 * events works out better. 1315 */ 1316 if (meto) 1317 kdmsg_state_abort(state); 1318 1319 /* 1320 * Recurse through any children. 1321 */ 1322 again: 1323 TAILQ_FOREACH(substate, &state->subq, entry) { 1324 if (substate->flags & KDMSG_STATE_ABORTING) 1325 continue; 1326 state->scan = substate; 1327 kdmsg_simulate_failure(substate, 1, error); 1328 if (state->scan != substate) 1329 goto again; 1330 } 1331 kdmsg_state_drop(state); /* aborting */ 1332 } 1333 1334 static 1335 void 1336 kdmsg_state_abort(kdmsg_state_t *state) 1337 { 1338 kdmsg_msg_t *msg; 1339 1340 /* 1341 * Set ABORTING and DYING, return if already set. If the state was 1342 * just allocated we defer the abort operation until the related 1343 * message is processed. 1344 */ 1345 KKASSERT((state->flags & KDMSG_STATE_ABORTING) == 0); 1346 if (state->flags & KDMSG_STATE_ABORTING) 1347 return; 1348 state->flags |= KDMSG_STATE_ABORTING; 1349 kdmsg_state_dying(state); 1350 if (state->flags & KDMSG_STATE_NEW) { 1351 kdio_printf(iocom, 5, 1352 "kdmsg_state_abort(0): state %p rxcmd %08x " 1353 "txcmd %08x flags %08x - in NEW state\n", 1354 state, state->rxcmd, 1355 state->txcmd, state->flags); 1356 return; 1357 } 1358 1359 /* 1360 * NOTE: The DELETE flag might already be set due to an early 1361 * termination. 1362 * 1363 * NOTE: Args to kdmsg_msg_alloc() to avoid dynamic state allocation. 1364 * 1365 * NOTE: We are simulating a received message using our state 1366 * (vs a message generated by the other side using its state), 1367 * so we must invert DMSGF_REVTRANS and DMSGF_REVCIRC. 1368 */ 1369 kdio_printf(iocom, 5, 1370 "kdmsg_state_abort(1): state %p rxcmd %08x txcmd %08x\n", 1371 state, state->rxcmd, state->txcmd); 1372 if ((state->rxcmd & DMSGF_DELETE) == 0) { 1373 msg = kdmsg_msg_alloc(state, DMSG_LNK_ERROR, NULL, NULL); 1374 if ((state->rxcmd & DMSGF_CREATE) == 0) 1375 msg->any.head.cmd |= DMSGF_CREATE; 1376 msg->any.head.cmd |= DMSGF_DELETE | 1377 (state->rxcmd & DMSGF_REPLY); 1378 msg->any.head.cmd ^= (DMSGF_REVTRANS | DMSGF_REVCIRC); 1379 msg->any.head.error = DMSG_ERR_LOSTLINK; 1380 kdio_printf(iocom, 5, 1381 "kdmsg_state_abort(a): state %p msgcmd %08x\n", 1382 state, msg->any.head.cmd); 1383 /* circuit not initialized */ 1384 lockmgr(&state->iocom->msglk, LK_RELEASE); 1385 kdmsg_msg_receive_handling(msg); 1386 lockmgr(&state->iocom->msglk, LK_EXCLUSIVE); 1387 msg = NULL; 1388 } 1389 kdio_printf(iocom, 5, 1390 "kdmsg_state_abort(2): state %p rxcmd %08x txcmd %08x\n", 1391 state, state->rxcmd, state->txcmd); 1392 } 1393 1394 /* 1395 * Recursively sets KDMSG_STATE_DYING on state and all sub-states, preventing 1396 * the transmission of any new messages on these states. This is done 1397 * atomically when parent state is terminating, whereas setting ABORTING is 1398 * not atomic and can leak races. 1399 */ 1400 static 1401 void 1402 kdmsg_state_dying(kdmsg_state_t *state) 1403 { 1404 kdmsg_state_t *scan; 1405 1406 if ((state->flags & KDMSG_STATE_DYING) == 0) { 1407 state->flags |= KDMSG_STATE_DYING; 1408 TAILQ_FOREACH(scan, &state->subq, entry) 1409 kdmsg_state_dying(scan); 1410 } 1411 } 1412 1413 /* 1414 * Process state tracking for a message prior to transmission. 1415 * 1416 * Called with msglk held and the msg dequeued. Returns non-zero if 1417 * the message is bad and should be deleted by the caller. 1418 * 1419 * One-off messages are usually with dummy state and msg->state may be NULL 1420 * in this situation. 1421 * 1422 * New transactions (when CREATE is set) will insert the state. 1423 * 1424 * May request that caller discard the message by setting *discardp to 1. 1425 * A NULL state may be returned in this case. 1426 */ 1427 static 1428 int 1429 kdmsg_state_msgtx(kdmsg_msg_t *msg) 1430 { 1431 kdmsg_iocom_t *iocom = msg->state->iocom; 1432 kdmsg_state_t *state; 1433 int error; 1434 1435 /* 1436 * Make sure a state structure is ready to go in case we need a new 1437 * one. This is the only routine which uses freewr_state so no 1438 * races are possible. 1439 */ 1440 if ((state = iocom->freewr_state) == NULL) { 1441 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO); 1442 state->flags = KDMSG_STATE_DYNAMIC; 1443 state->iocom = iocom; 1444 state->refs = 1; 1445 TAILQ_INIT(&state->subq); 1446 iocom->freewr_state = state; 1447 } 1448 1449 /* 1450 * Lock RB tree. If persistent state is present it will have already 1451 * been assigned to msg. 1452 */ 1453 state = msg->state; 1454 1455 /* 1456 * Short-cut one-off or mid-stream messages (state may be NULL). 1457 */ 1458 if ((msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE | 1459 DMSGF_ABORT)) == 0) { 1460 return(0); 1461 } 1462 1463 1464 /* 1465 * Switch on CREATE, DELETE, REPLY, and also handle ABORT from 1466 * inside the case statements. 1467 */ 1468 switch(msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE | 1469 DMSGF_REPLY)) { 1470 case DMSGF_CREATE: 1471 case DMSGF_CREATE | DMSGF_DELETE: 1472 /* 1473 * Insert the new persistent message state and mark 1474 * half-closed if DELETE is set. Since this is a new 1475 * message it isn't possible to transition into the fully 1476 * closed state here. 1477 * 1478 * XXX state must be assigned and inserted by 1479 * kdmsg_msg_write(). txcmd is assigned by us 1480 * on-transmit. 1481 */ 1482 KKASSERT(state != NULL); 1483 state->icmd = msg->any.head.cmd & DMSGF_BASECMDMASK; 1484 state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE; 1485 state->rxcmd = DMSGF_REPLY; 1486 state->flags &= ~KDMSG_STATE_NEW; 1487 error = 0; 1488 break; 1489 case DMSGF_DELETE: 1490 /* 1491 * Sent ABORT+DELETE in case where msgid has already 1492 * been fully closed, ignore the message. 1493 */ 1494 if (state == &iocom->state0) { 1495 if (msg->any.head.cmd & DMSGF_ABORT) { 1496 error = EALREADY; 1497 } else { 1498 kdio_printf(iocom, 1, 1499 "msgtx: no state match " 1500 "for DELETE cmd=%08x msgid=%016jx\n", 1501 msg->any.head.cmd, 1502 (intmax_t)msg->any.head.msgid); 1503 error = EINVAL; 1504 } 1505 break; 1506 } 1507 1508 /* 1509 * Sent ABORT+DELETE in case where msgid has 1510 * already been reused for an unrelated message, 1511 * ignore the message. 1512 */ 1513 if ((state->txcmd & DMSGF_CREATE) == 0) { 1514 if (msg->any.head.cmd & DMSGF_ABORT) { 1515 error = EALREADY; 1516 } else { 1517 kdio_printf(iocom, 1, "%s\n", 1518 "msgtx: state reused " 1519 "for DELETE"); 1520 error = EINVAL; 1521 } 1522 break; 1523 } 1524 error = 0; 1525 break; 1526 default: 1527 /* 1528 * Check for mid-stream ABORT command sent 1529 */ 1530 if (msg->any.head.cmd & DMSGF_ABORT) { 1531 if (state == &state->iocom->state0 || 1532 (state->txcmd & DMSGF_CREATE) == 0) { 1533 error = EALREADY; 1534 break; 1535 } 1536 } 1537 error = 0; 1538 break; 1539 case DMSGF_REPLY | DMSGF_CREATE: 1540 case DMSGF_REPLY | DMSGF_CREATE | DMSGF_DELETE: 1541 /* 1542 * When transmitting a reply with CREATE set the original 1543 * persistent state message should already exist. 1544 */ 1545 if (state == &state->iocom->state0) { 1546 kdio_printf(iocom, 1, "%s\n", 1547 "msgtx: no state match " 1548 "for REPLY | CREATE"); 1549 error = EINVAL; 1550 break; 1551 } 1552 state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE; 1553 error = 0; 1554 break; 1555 case DMSGF_REPLY | DMSGF_DELETE: 1556 /* 1557 * When transmitting a reply with DELETE set the original 1558 * persistent state message should already exist. 1559 * 1560 * This is very similar to the REPLY|CREATE|* case except 1561 * txcmd is already stored, so we just add the DELETE flag. 1562 * 1563 * Sent REPLY+ABORT+DELETE in case where msgid has 1564 * already been fully closed, ignore the message. 1565 */ 1566 if (state == &state->iocom->state0) { 1567 if (msg->any.head.cmd & DMSGF_ABORT) { 1568 error = EALREADY; 1569 } else { 1570 kdio_printf(iocom, 1, "%s\n", 1571 "msgtx: no state match " 1572 "for REPLY | DELETE"); 1573 error = EINVAL; 1574 } 1575 break; 1576 } 1577 1578 /* 1579 * Sent REPLY+ABORT+DELETE in case where msgid has already 1580 * been reused for an unrelated message, ignore the message. 1581 */ 1582 if ((state->txcmd & DMSGF_CREATE) == 0) { 1583 if (msg->any.head.cmd & DMSGF_ABORT) { 1584 error = EALREADY; 1585 } else { 1586 kdio_printf(iocom, 1, "%s\n", 1587 "msgtx: state reused " 1588 "for REPLY | DELETE"); 1589 error = EINVAL; 1590 } 1591 break; 1592 } 1593 error = 0; 1594 break; 1595 case DMSGF_REPLY: 1596 /* 1597 * Check for mid-stream ABORT reply sent. 1598 * 1599 * One-off REPLY messages are allowed for e.g. status updates. 1600 */ 1601 if (msg->any.head.cmd & DMSGF_ABORT) { 1602 if (state == &state->iocom->state0 || 1603 (state->txcmd & DMSGF_CREATE) == 0) { 1604 error = EALREADY; 1605 break; 1606 } 1607 } 1608 error = 0; 1609 break; 1610 } 1611 1612 /* 1613 * Set interlock (XXX hack) in case the send side blocks and a 1614 * response is returned before kdmsg_state_cleanuptx() can be 1615 * run. 1616 */ 1617 if (state && error == 0) 1618 state->flags |= KDMSG_STATE_INTERLOCK; 1619 1620 return (error); 1621 } 1622 1623 /* 1624 * Called with iocom locked. 1625 */ 1626 static 1627 void 1628 kdmsg_state_cleanuptx(kdmsg_msg_t *msg) 1629 { 1630 kdmsg_iocom_t *iocom = msg->state->iocom; 1631 kdmsg_state_t *state; 1632 1633 if ((state = msg->state) == NULL) { 1634 kdmsg_msg_free(msg); 1635 return; 1636 } 1637 1638 /* 1639 * Clear interlock (XXX hack) in case the send side blocks and a 1640 * response is returned in the other thread before 1641 * kdmsg_state_cleanuptx() can be run. We maintain our hold on 1642 * iocom->msglk so we can do this before completing our task. 1643 */ 1644 if (state->flags & KDMSG_STATE_SIGNAL) { 1645 kdio_printf(iocom, 1, "state %p interlock!\n", state); 1646 wakeup(state); 1647 } 1648 state->flags &= ~(KDMSG_STATE_INTERLOCK | KDMSG_STATE_SIGNAL); 1649 kdmsg_state_hold(state); 1650 1651 if (msg->any.head.cmd & DMSGF_DELETE) { 1652 KKASSERT((state->txcmd & DMSGF_DELETE) == 0); 1653 state->txcmd |= DMSGF_DELETE; 1654 if (state->rxcmd & DMSGF_DELETE) { 1655 KKASSERT(state->flags & KDMSG_STATE_RBINSERTED); 1656 if (state->txcmd & DMSGF_REPLY) { 1657 KKASSERT(msg->any.head.cmd & 1658 DMSGF_REPLY); 1659 RB_REMOVE(kdmsg_state_tree, 1660 &iocom->staterd_tree, state); 1661 } else { 1662 KKASSERT((msg->any.head.cmd & 1663 DMSGF_REPLY) == 0); 1664 RB_REMOVE(kdmsg_state_tree, 1665 &iocom->statewr_tree, state); 1666 } 1667 state->flags &= ~KDMSG_STATE_RBINSERTED; 1668 1669 /* 1670 * The subq recursion is used for parent linking and 1671 * scanning the topology for aborts, we can only 1672 * remove leafs. The circuit is effectively dead now, 1673 * but topology won't be torn down until all of its 1674 * children have finished/aborted. 1675 * 1676 * This is particularly important for end-point 1677 * devices which might need to access private data 1678 * in parent states. Out of order disconnects can 1679 * occur if an end-point device is processing a 1680 * message transaction asynchronously because abort 1681 * requests are basically synchronous and it probably 1682 * isn't convenient (or possible) for the end-point 1683 * to abort an asynchronous operation. 1684 */ 1685 if (TAILQ_EMPTY(&state->subq)) 1686 kdmsg_subq_delete(state); 1687 kdmsg_msg_free(msg); 1688 kdmsg_state_drop(state); /* state on rbtree */ 1689 } else { 1690 kdmsg_msg_free(msg); 1691 } 1692 } else { 1693 kdmsg_msg_free(msg); 1694 } 1695 1696 /* 1697 * Deferred abort after transmission. 1698 */ 1699 if ((state->flags & (KDMSG_STATE_ABORTING | KDMSG_STATE_DYING)) && 1700 (state->rxcmd & DMSGF_DELETE) == 0) { 1701 kdio_printf(iocom, 5, 1702 "kdmsg_state_cleanuptx: state=%p " 1703 "executing deferred abort\n", 1704 state); 1705 state->flags &= ~KDMSG_STATE_ABORTING; 1706 kdmsg_state_abort(state); 1707 } 1708 kdmsg_state_drop(state); 1709 } 1710 1711 static 1712 void 1713 _kdmsg_state_hold(kdmsg_state_t *state KDMSG_DEBUG_ARGS) 1714 { 1715 atomic_add_int(&state->refs, 1); 1716 #if KDMSG_DEBUG 1717 kd_printf(4, "state %p +%d\t%s:%d\n", state, state->refs, file, line); 1718 #endif 1719 } 1720 1721 static 1722 void 1723 _kdmsg_state_drop(kdmsg_state_t *state KDMSG_DEBUG_ARGS) 1724 { 1725 KKASSERT(state->refs > 0); 1726 #if KDMSG_DEBUG 1727 kd_printf(4, "state %p -%d\t%s:%d\n", state, state->refs, file, line); 1728 #endif 1729 if (atomic_fetchadd_int(&state->refs, -1) == 1) 1730 kdmsg_state_free(state); 1731 } 1732 1733 static 1734 void 1735 kdmsg_state_free(kdmsg_state_t *state) 1736 { 1737 kdmsg_iocom_t *iocom = state->iocom; 1738 1739 KKASSERT((state->flags & KDMSG_STATE_RBINSERTED) == 0); 1740 KKASSERT((state->flags & KDMSG_STATE_SUBINSERTED) == 0); 1741 KKASSERT(TAILQ_EMPTY(&state->subq)); 1742 1743 if (state != &state->iocom->state0) 1744 kfree(state, iocom->mmsg); 1745 } 1746 1747 kdmsg_msg_t * 1748 kdmsg_msg_alloc(kdmsg_state_t *state, uint32_t cmd, 1749 int (*func)(kdmsg_state_t *, kdmsg_msg_t *), void *data) 1750 { 1751 kdmsg_iocom_t *iocom = state->iocom; 1752 kdmsg_state_t *pstate; 1753 kdmsg_msg_t *msg; 1754 size_t hbytes; 1755 1756 KKASSERT(iocom != NULL); 1757 hbytes = (cmd & DMSGF_SIZE) * DMSG_ALIGN; 1758 msg = kmalloc(offsetof(struct kdmsg_msg, any) + hbytes, 1759 iocom->mmsg, M_WAITOK | M_ZERO); 1760 msg->hdr_size = hbytes; 1761 1762 if ((cmd & (DMSGF_CREATE | DMSGF_REPLY)) == DMSGF_CREATE) { 1763 /* 1764 * New transaction, requires tracking state and a unique 1765 * msgid to be allocated. 1766 * 1767 * It is possible to race a circuit failure, inherit the 1768 * parent's STATE_DYING flag to trigger an abort sequence 1769 * in the transmit path. By not inheriting ABORTING the 1770 * abort sequence can recurse. 1771 * 1772 * NOTE: The transactions has not yet been initiated so we 1773 * cannot set DMSGF_CREATE/DELETE bits in txcmd or rxcmd. 1774 * We have to properly setup DMSGF_REPLY, however. 1775 */ 1776 pstate = state; 1777 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO); 1778 TAILQ_INIT(&state->subq); 1779 state->iocom = iocom; 1780 state->parent = pstate; 1781 state->flags = KDMSG_STATE_DYNAMIC | 1782 KDMSG_STATE_NEW; 1783 state->func = func; 1784 state->any.any = data; 1785 state->msgid = (uint64_t)(uintptr_t)state; 1786 /*msg->any.head.msgid = state->msgid;XXX*/ 1787 1788 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 1789 if (RB_INSERT(kdmsg_state_tree, &iocom->statewr_tree, state)) 1790 panic("duplicate msgid allocated"); 1791 if (TAILQ_EMPTY(&pstate->subq)) 1792 kdmsg_state_hold(pstate);/* pstate->subq */ 1793 TAILQ_INSERT_TAIL(&pstate->subq, state, entry); 1794 state->flags |= KDMSG_STATE_RBINSERTED | 1795 KDMSG_STATE_SUBINSERTED; 1796 state->flags |= pstate->flags & KDMSG_STATE_DYING; 1797 kdmsg_state_hold(state); /* pstate->subq */ 1798 kdmsg_state_hold(state); /* state on rbtree */ 1799 kdmsg_state_hold(state); /* msg->state */ 1800 lockmgr(&iocom->msglk, LK_RELEASE); 1801 } else { 1802 pstate = state->parent; 1803 KKASSERT(pstate != NULL); 1804 kdmsg_state_hold(state); /* msg->state */ 1805 } 1806 1807 if (state->flags & KDMSG_STATE_OPPOSITE) 1808 cmd |= DMSGF_REVTRANS; 1809 if (pstate->flags & KDMSG_STATE_OPPOSITE) 1810 cmd |= DMSGF_REVCIRC; 1811 1812 msg->any.head.magic = DMSG_HDR_MAGIC; 1813 msg->any.head.cmd = cmd; 1814 msg->any.head.msgid = state->msgid; 1815 msg->any.head.circuit = pstate->msgid; 1816 msg->state = state; 1817 1818 return (msg); 1819 } 1820 1821 void 1822 kdmsg_msg_free(kdmsg_msg_t *msg) 1823 { 1824 kdmsg_iocom_t *iocom = msg->state->iocom; 1825 kdmsg_state_t *state; 1826 1827 if ((msg->flags & KDMSG_FLAG_AUXALLOC) && 1828 msg->aux_data && msg->aux_size) { 1829 kfree(msg->aux_data, iocom->mmsg); 1830 msg->flags &= ~KDMSG_FLAG_AUXALLOC; 1831 } 1832 if ((state = msg->state) != NULL) { 1833 msg->state = NULL; 1834 kdmsg_state_drop(state); /* msg->state */ 1835 } 1836 msg->aux_data = NULL; 1837 msg->aux_size = 0; 1838 1839 kfree(msg, iocom->mmsg); 1840 } 1841 1842 void 1843 kdmsg_detach_aux_data(kdmsg_msg_t *msg, kdmsg_data_t *data) 1844 { 1845 if (msg->flags & KDMSG_FLAG_AUXALLOC) { 1846 data->aux_data = msg->aux_data; 1847 data->aux_size = msg->aux_size; 1848 data->iocom = msg->state->iocom; 1849 msg->flags &= ~KDMSG_FLAG_AUXALLOC; 1850 } else { 1851 data->aux_data = NULL; 1852 data->aux_size = 0; 1853 data->iocom = msg->state->iocom; 1854 } 1855 } 1856 1857 void 1858 kdmsg_free_aux_data(kdmsg_data_t *data) 1859 { 1860 if (data->aux_data) 1861 kfree(data->aux_data, data->iocom->mmsg); 1862 } 1863 1864 /* 1865 * Indexed messages are stored in a red-black tree indexed by their 1866 * msgid. Only persistent messages are indexed. 1867 */ 1868 int 1869 kdmsg_state_cmp(kdmsg_state_t *state1, kdmsg_state_t *state2) 1870 { 1871 if (state1->iocom < state2->iocom) 1872 return(-1); 1873 if (state1->iocom > state2->iocom) 1874 return(1); 1875 if (state1->msgid < state2->msgid) 1876 return(-1); 1877 if (state1->msgid > state2->msgid) 1878 return(1); 1879 return(0); 1880 } 1881 1882 /* 1883 * Write a message. All requisit command flags have been set. 1884 * 1885 * If msg->state is non-NULL the message is written to the existing 1886 * transaction. msgid will be set accordingly. 1887 * 1888 * If msg->state is NULL and CREATE is set new state is allocated and 1889 * (func, data) is installed. A msgid is assigned. 1890 * 1891 * If msg->state is NULL and CREATE is not set the message is assumed 1892 * to be a one-way message. The originator must assign the msgid 1893 * (or leave it 0, which is typical. 1894 * 1895 * This function merely queues the message to the management thread, it 1896 * does not write to the message socket/pipe. 1897 */ 1898 void 1899 kdmsg_msg_write(kdmsg_msg_t *msg) 1900 { 1901 kdmsg_iocom_t *iocom = msg->state->iocom; 1902 1903 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 1904 kdmsg_msg_write_locked(iocom, msg); 1905 lockmgr(&iocom->msglk, LK_RELEASE); 1906 } 1907 1908 static void 1909 kdmsg_msg_write_locked(kdmsg_iocom_t *iocom, kdmsg_msg_t *msg) 1910 { 1911 kdmsg_state_t *state; 1912 1913 if (msg->state) { 1914 /* 1915 * Continuance or termination of existing transaction. 1916 * The transaction could have been initiated by either end. 1917 * 1918 * (Function callback and aux data for the receive side can 1919 * be replaced or left alone). 1920 */ 1921 state = msg->state; 1922 msg->any.head.msgid = state->msgid; 1923 } else { 1924 /* 1925 * One-off message (always uses msgid 0 to distinguish 1926 * between a possibly lost in-transaction message due to 1927 * competing aborts and a real one-off message?) 1928 */ 1929 state = NULL; 1930 msg->any.head.msgid = 0; 1931 } 1932 1933 #if 0 1934 /* 1935 * XXX removed - don't make this a panic, allow the state checks 1936 * below to catch the situation. 1937 * 1938 * This flag is not set until after the tx thread has drained 1939 * the tx msgq and simulated responses. After that point the 1940 * txthread is dead and can no longer simulate responses. 1941 * 1942 * Device drivers should never try to send a message once this 1943 * flag is set. They should have detected (through the state 1944 * closures) that the link is in trouble. 1945 */ 1946 if (iocom->flags & KDMSG_IOCOMF_EXITNOACC) { 1947 lockmgr(&iocom->msglk, LK_RELEASE); 1948 panic("kdmsg_msg_write: Attempt to write message to " 1949 "terminated iocom\n"); 1950 } 1951 #endif 1952 1953 /* 1954 * For stateful messages, if the circuit is dead or dying we have 1955 * to abort the potentially newly-created state and discard the 1956 * message. 1957 * 1958 * - We must discard the message because the other end will not 1959 * be expecting any more messages over the dead or dying circuit 1960 * and might not be able to receive them. 1961 * 1962 * - We abort the state by simulating a failure to generate a fake 1963 * incoming DELETE. This will trigger the state callback and allow 1964 * the device to clean things up and reply, closing the outgoing 1965 * direction and allowing the state to be freed. 1966 * 1967 * This situation occurs quite often, particularly as SPANs stabilize. 1968 * End-points must do the right thing. 1969 */ 1970 if (state) { 1971 KKASSERT((state->txcmd & DMSGF_DELETE) == 0); 1972 if (state->flags & KDMSG_STATE_DYING) { 1973 #if 0 1974 if ((state->flags & KDMSG_STATE_DYING) || 1975 (state->parent->txcmd & DMSGF_DELETE) || 1976 (state->parent->flags & KDMSG_STATE_DYING)) { 1977 #endif 1978 kdio_printf(iocom, 4, 1979 "kdmsg_msg_write: Write to dying circuit " 1980 "state=%p " 1981 "ptxcmd=%08x prxcmd=%08x flags=%08x\n", 1982 state, 1983 state->parent->rxcmd, 1984 state->parent->txcmd, 1985 state->parent->flags); 1986 kdmsg_state_hold(state); 1987 kdmsg_state_msgtx(msg); 1988 kdmsg_state_cleanuptx(msg); 1989 kdmsg_state_drop(state); 1990 return; 1991 } 1992 } 1993 1994 /* 1995 * Finish up the msg fields. Note that msg->aux_size and the 1996 * aux_bytes stored in the message header represent the unaligned 1997 * (actual) bytes of data, but the buffer is sized to an aligned 1998 * size and the CRC is generated over the aligned length. 1999 */ 2000 msg->any.head.salt = /* (random << 8) | */ (iocom->msg_seq & 255); 2001 ++iocom->msg_seq; 2002 2003 if (msg->aux_data && msg->aux_size) { 2004 uint32_t abytes = DMSG_DOALIGN(msg->aux_size); 2005 2006 msg->any.head.aux_bytes = msg->aux_size; 2007 msg->any.head.aux_crc = iscsi_crc32(msg->aux_data, abytes); 2008 } 2009 msg->any.head.hdr_crc = 0; 2010 msg->any.head.hdr_crc = iscsi_crc32(msg->any.buf, msg->hdr_size); 2011 2012 TAILQ_INSERT_TAIL(&iocom->msgq, msg, qentry); 2013 2014 if (iocom->msg_ctl & KDMSG_CLUSTERCTL_SLEEPING) { 2015 atomic_clear_int(&iocom->msg_ctl, 2016 KDMSG_CLUSTERCTL_SLEEPING); 2017 wakeup(&iocom->msg_ctl); 2018 } 2019 } 2020 2021 /* 2022 * Reply to a message and terminate our side of the transaction. 2023 * 2024 * If msg->state is non-NULL we are replying to a one-way message. 2025 */ 2026 void 2027 kdmsg_msg_reply(kdmsg_msg_t *msg, uint32_t error) 2028 { 2029 kdmsg_state_t *state = msg->state; 2030 kdmsg_msg_t *nmsg; 2031 uint32_t cmd; 2032 2033 /* 2034 * Reply with a simple error code and terminate the transaction. 2035 */ 2036 cmd = DMSG_LNK_ERROR; 2037 2038 /* 2039 * Check if our direction has even been initiated yet, set CREATE. 2040 * 2041 * Check what direction this is (command or reply direction). Note 2042 * that txcmd might not have been initiated yet. 2043 * 2044 * If our direction has already been closed we just return without 2045 * doing anything. 2046 */ 2047 if (state != &state->iocom->state0) { 2048 if (state->txcmd & DMSGF_DELETE) 2049 return; 2050 if ((state->txcmd & DMSGF_CREATE) == 0) 2051 cmd |= DMSGF_CREATE; 2052 if (state->txcmd & DMSGF_REPLY) 2053 cmd |= DMSGF_REPLY; 2054 cmd |= DMSGF_DELETE; 2055 } else { 2056 if ((msg->any.head.cmd & DMSGF_REPLY) == 0) 2057 cmd |= DMSGF_REPLY; 2058 } 2059 2060 nmsg = kdmsg_msg_alloc(state, cmd, NULL, NULL); 2061 nmsg->any.head.error = error; 2062 kdmsg_msg_write(nmsg); 2063 } 2064 2065 /* 2066 * Reply to a message and continue our side of the transaction. 2067 * 2068 * If msg->state is non-NULL we are replying to a one-way message and this 2069 * function degenerates into the same as kdmsg_msg_reply(). 2070 */ 2071 void 2072 kdmsg_msg_result(kdmsg_msg_t *msg, uint32_t error) 2073 { 2074 kdmsg_state_t *state = msg->state; 2075 kdmsg_msg_t *nmsg; 2076 uint32_t cmd; 2077 2078 /* 2079 * Return a simple result code, do NOT terminate the transaction. 2080 */ 2081 cmd = DMSG_LNK_ERROR; 2082 2083 /* 2084 * Check if our direction has even been initiated yet, set CREATE. 2085 * 2086 * Check what direction this is (command or reply direction). Note 2087 * that txcmd might not have been initiated yet. 2088 * 2089 * If our direction has already been closed we just return without 2090 * doing anything. 2091 */ 2092 if (state != &state->iocom->state0) { 2093 if (state->txcmd & DMSGF_DELETE) 2094 return; 2095 if ((state->txcmd & DMSGF_CREATE) == 0) 2096 cmd |= DMSGF_CREATE; 2097 if (state->txcmd & DMSGF_REPLY) 2098 cmd |= DMSGF_REPLY; 2099 /* continuing transaction, do not set MSGF_DELETE */ 2100 } else { 2101 if ((msg->any.head.cmd & DMSGF_REPLY) == 0) 2102 cmd |= DMSGF_REPLY; 2103 } 2104 2105 nmsg = kdmsg_msg_alloc(state, cmd, NULL, NULL); 2106 nmsg->any.head.error = error; 2107 kdmsg_msg_write(nmsg); 2108 } 2109 2110 /* 2111 * Reply to a message and terminate our side of the transaction. 2112 * 2113 * If msg->state is non-NULL we are replying to a one-way message. 2114 */ 2115 void 2116 kdmsg_state_reply(kdmsg_state_t *state, uint32_t error) 2117 { 2118 kdmsg_msg_t *nmsg; 2119 uint32_t cmd; 2120 2121 /* 2122 * Reply with a simple error code and terminate the transaction. 2123 */ 2124 cmd = DMSG_LNK_ERROR; 2125 2126 /* 2127 * Check if our direction has even been initiated yet, set CREATE. 2128 * 2129 * Check what direction this is (command or reply direction). Note 2130 * that txcmd might not have been initiated yet. 2131 * 2132 * If our direction has already been closed we just return without 2133 * doing anything. 2134 */ 2135 KKASSERT(state); 2136 if (state->txcmd & DMSGF_DELETE) 2137 return; 2138 if ((state->txcmd & DMSGF_CREATE) == 0) 2139 cmd |= DMSGF_CREATE; 2140 if (state->txcmd & DMSGF_REPLY) 2141 cmd |= DMSGF_REPLY; 2142 cmd |= DMSGF_DELETE; 2143 2144 nmsg = kdmsg_msg_alloc(state, cmd, NULL, NULL); 2145 nmsg->any.head.error = error; 2146 kdmsg_msg_write(nmsg); 2147 } 2148 2149 /* 2150 * Reply to a message and continue our side of the transaction. 2151 * 2152 * If msg->state is non-NULL we are replying to a one-way message and this 2153 * function degenerates into the same as kdmsg_msg_reply(). 2154 */ 2155 void 2156 kdmsg_state_result(kdmsg_state_t *state, uint32_t error) 2157 { 2158 kdmsg_msg_t *nmsg; 2159 uint32_t cmd; 2160 2161 /* 2162 * Return a simple result code, do NOT terminate the transaction. 2163 */ 2164 cmd = DMSG_LNK_ERROR; 2165 2166 /* 2167 * Check if our direction has even been initiated yet, set CREATE. 2168 * 2169 * Check what direction this is (command or reply direction). Note 2170 * that txcmd might not have been initiated yet. 2171 * 2172 * If our direction has already been closed we just return without 2173 * doing anything. 2174 */ 2175 KKASSERT(state); 2176 if (state->txcmd & DMSGF_DELETE) 2177 return; 2178 if ((state->txcmd & DMSGF_CREATE) == 0) 2179 cmd |= DMSGF_CREATE; 2180 if (state->txcmd & DMSGF_REPLY) 2181 cmd |= DMSGF_REPLY; 2182 /* continuing transaction, do not set MSGF_DELETE */ 2183 2184 nmsg = kdmsg_msg_alloc(state, cmd, NULL, NULL); 2185 nmsg->any.head.error = error; 2186 kdmsg_msg_write(nmsg); 2187 } 2188