1 /*- 2 * Copyright (c) 2012 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * TODO: txcmd CREATE state is deferred by tx msgq, need to calculate 36 * a streaming response. See subr_diskiocom()'s diskiodone(). 37 */ 38 #include <sys/param.h> 39 #include <sys/types.h> 40 #include <sys/kernel.h> 41 #include <sys/conf.h> 42 #include <sys/systm.h> 43 #include <sys/queue.h> 44 #include <sys/tree.h> 45 #include <sys/malloc.h> 46 #include <sys/mount.h> 47 #include <sys/socket.h> 48 #include <sys/vnode.h> 49 #include <sys/sysctl.h> 50 #include <sys/file.h> 51 #include <sys/proc.h> 52 #include <sys/priv.h> 53 #include <sys/thread.h> 54 #include <sys/globaldata.h> 55 #include <sys/limits.h> 56 57 #include <sys/dmsg.h> 58 59 RB_GENERATE(kdmsg_state_tree, kdmsg_state, rbnode, kdmsg_state_cmp); 60 61 SYSCTL_NODE(, OID_AUTO, kdmsg, CTLFLAG_RW, 0, "kdmsg"); 62 static int kdmsg_debug = 1; 63 SYSCTL_INT(_kdmsg, OID_AUTO, debug, CTLFLAG_RW, &kdmsg_debug, 0, 64 "Set debug level for kernel dmsg layer"); 65 66 #define kd_printf(level, ctl, ...) \ 67 if (kdmsg_debug >= (level)) kprintf("kdmsg: " ctl, __VA_ARGS__) 68 69 #define kdio_printf(iocom, level, ctl, ...) \ 70 if (kdmsg_debug >= (level)) kprintf("kdmsg: " ctl, __VA_ARGS__) 71 72 static int kdmsg_msg_receive_handling(kdmsg_msg_t *msg); 73 static int kdmsg_state_msgrx(kdmsg_msg_t *msg); 74 static int kdmsg_state_msgtx(kdmsg_msg_t *msg); 75 static void kdmsg_msg_write_locked(kdmsg_iocom_t *iocom, kdmsg_msg_t *msg); 76 static void kdmsg_state_cleanuprx(kdmsg_msg_t *msg); 77 static void kdmsg_state_cleanuptx(kdmsg_msg_t *msg); 78 static void kdmsg_subq_delete(kdmsg_state_t *state); 79 static void kdmsg_simulate_failure(kdmsg_state_t *state, int meto, int error); 80 static void kdmsg_state_abort(kdmsg_state_t *state); 81 static void kdmsg_state_dying(kdmsg_state_t *state); 82 static void kdmsg_state_free(kdmsg_state_t *state); 83 84 #ifdef KDMSG_DEBUG 85 #define KDMSG_DEBUG_ARGS , const char *file, int line 86 #define kdmsg_state_hold(state) _kdmsg_state_hold(state, __FILE__, __LINE__) 87 #define kdmsg_state_drop(state) _kdmsg_state_drop(state, __FILE__, __LINE__) 88 #else 89 #define KDMSG_DEBUG 0 90 #define KDMSG_DEBUG_ARGS 91 #define kdmsg_state_hold(state) _kdmsg_state_hold(state) 92 #define kdmsg_state_drop(state) _kdmsg_state_drop(state) 93 #endif 94 static void _kdmsg_state_hold(kdmsg_state_t *state KDMSG_DEBUG_ARGS); 95 static void _kdmsg_state_drop(kdmsg_state_t *state KDMSG_DEBUG_ARGS); 96 97 static void kdmsg_iocom_thread_rd(void *arg); 98 static void kdmsg_iocom_thread_wr(void *arg); 99 static int kdmsg_autorxmsg(kdmsg_msg_t *msg); 100 101 /*static struct lwkt_token kdmsg_token = LWKT_TOKEN_INITIALIZER(kdmsg_token);*/ 102 103 /* 104 * Initialize the roll-up communications structure for a network 105 * messaging session. This function does not install the socket. 106 */ 107 void 108 kdmsg_iocom_init(kdmsg_iocom_t *iocom, void *handle, uint32_t flags, 109 struct malloc_type *mmsg, 110 int (*rcvmsg)(kdmsg_msg_t *msg)) 111 { 112 bzero(iocom, sizeof(*iocom)); 113 iocom->handle = handle; 114 iocom->mmsg = mmsg; 115 iocom->rcvmsg = rcvmsg; 116 iocom->flags = flags; 117 lockinit(&iocom->msglk, "h2msg", 0, 0); 118 TAILQ_INIT(&iocom->msgq); 119 RB_INIT(&iocom->staterd_tree); 120 RB_INIT(&iocom->statewr_tree); 121 122 iocom->state0.iocom = iocom; 123 iocom->state0.parent = &iocom->state0; 124 TAILQ_INIT(&iocom->state0.subq); 125 } 126 127 /* 128 * [Re]connect using the passed file pointer. The caller must ref the 129 * fp for us. We own that ref now. 130 */ 131 void 132 kdmsg_iocom_reconnect(kdmsg_iocom_t *iocom, struct file *fp, 133 const char *subsysname) 134 { 135 /* 136 * Destroy the current connection 137 */ 138 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 139 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLRX); 140 while (iocom->msgrd_td || iocom->msgwr_td) { 141 wakeup(&iocom->msg_ctl); 142 lksleep(iocom, &iocom->msglk, 0, "clstrkl", hz); 143 } 144 145 /* 146 * Drop communications descriptor 147 */ 148 if (iocom->msg_fp) { 149 fdrop(iocom->msg_fp); 150 iocom->msg_fp = NULL; 151 } 152 153 /* 154 * Setup new communications descriptor 155 */ 156 iocom->msg_ctl = 0; 157 iocom->msg_fp = fp; 158 iocom->msg_seq = 0; 159 iocom->flags &= ~KDMSG_IOCOMF_EXITNOACC; 160 161 lwkt_create(kdmsg_iocom_thread_rd, iocom, &iocom->msgrd_td, 162 NULL, 0, -1, "%s-msgrd", subsysname); 163 lwkt_create(kdmsg_iocom_thread_wr, iocom, &iocom->msgwr_td, 164 NULL, 0, -1, "%s-msgwr", subsysname); 165 lockmgr(&iocom->msglk, LK_RELEASE); 166 } 167 168 /* 169 * Caller sets up iocom->auto_lnk_conn and iocom->auto_lnk_span, then calls 170 * this function to handle the state machine for LNK_CONN and LNK_SPAN. 171 */ 172 static int kdmsg_lnk_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg); 173 static int kdmsg_lnk_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg); 174 175 void 176 kdmsg_iocom_autoinitiate(kdmsg_iocom_t *iocom, 177 void (*auto_callback)(kdmsg_msg_t *msg)) 178 { 179 kdmsg_msg_t *msg; 180 181 iocom->auto_callback = auto_callback; 182 183 msg = kdmsg_msg_alloc(&iocom->state0, 184 DMSG_LNK_CONN | DMSGF_CREATE, 185 kdmsg_lnk_conn_reply, NULL); 186 iocom->auto_lnk_conn.head = msg->any.head; 187 msg->any.lnk_conn = iocom->auto_lnk_conn; 188 iocom->conn_state = msg->state; 189 kdmsg_state_hold(msg->state); /* iocom->conn_state */ 190 kdmsg_msg_write(msg); 191 } 192 193 static 194 int 195 kdmsg_lnk_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg) 196 { 197 kdmsg_iocom_t *iocom = state->iocom; 198 kdmsg_msg_t *rmsg; 199 200 /* 201 * Upon receipt of the LNK_CONN acknowledgement initiate an 202 * automatic SPAN if we were asked to. Used by e.g. xdisk, but 203 * not used by HAMMER2 which must manage more than one transmitted 204 * SPAN. 205 */ 206 if ((msg->any.head.cmd & DMSGF_CREATE) && 207 (iocom->flags & KDMSG_IOCOMF_AUTOTXSPAN)) { 208 rmsg = kdmsg_msg_alloc(&iocom->state0, 209 DMSG_LNK_SPAN | DMSGF_CREATE, 210 kdmsg_lnk_span_reply, NULL); 211 iocom->auto_lnk_span.head = rmsg->any.head; 212 rmsg->any.lnk_span = iocom->auto_lnk_span; 213 kdmsg_msg_write(rmsg); 214 } 215 216 /* 217 * Process shim after the CONN is acknowledged and before the CONN 218 * transaction is deleted. For deletions this gives device drivers 219 * the ability to interlock new operations on the circuit before 220 * it becomes illegal and panics. 221 */ 222 if (iocom->auto_callback) 223 iocom->auto_callback(msg); 224 225 if ((state->txcmd & DMSGF_DELETE) == 0 && 226 (msg->any.head.cmd & DMSGF_DELETE)) { 227 /* 228 * iocom->conn_state has a state ref, drop it when clearing. 229 */ 230 if (iocom->conn_state) 231 kdmsg_state_drop(iocom->conn_state); 232 iocom->conn_state = NULL; 233 kdmsg_msg_reply(msg, 0); 234 } 235 236 return (0); 237 } 238 239 static 240 int 241 kdmsg_lnk_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg) 242 { 243 /* 244 * Be sure to process shim before terminating the SPAN 245 * transaction. Gives device drivers the ability to 246 * interlock new operations on the circuit before it 247 * becomes illegal and panics. 248 */ 249 if (state->iocom->auto_callback) 250 state->iocom->auto_callback(msg); 251 252 if ((state->txcmd & DMSGF_DELETE) == 0 && 253 (msg->any.head.cmd & DMSGF_DELETE)) { 254 kdmsg_msg_reply(msg, 0); 255 } 256 return (0); 257 } 258 259 /* 260 * Disconnect and clean up 261 */ 262 void 263 kdmsg_iocom_uninit(kdmsg_iocom_t *iocom) 264 { 265 kdmsg_state_t *state; 266 kdmsg_msg_t *msg; 267 int retries; 268 269 /* 270 * Ask the cluster controller to go away by setting 271 * KILLRX. Send a PING to get a response to unstick reading 272 * from the pipe. 273 * 274 * After 10 seconds shitcan the pipe and do an unclean shutdown. 275 */ 276 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 277 278 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLRX); 279 msg = kdmsg_msg_alloc(&iocom->state0, DMSG_LNK_PING, NULL, NULL); 280 kdmsg_msg_write_locked(iocom, msg); 281 282 retries = 10; 283 while (iocom->msgrd_td || iocom->msgwr_td) { 284 wakeup(&iocom->msg_ctl); 285 lksleep(iocom, &iocom->msglk, 0, "clstrkl", hz); 286 if (--retries == 0 && iocom->msg_fp) { 287 kdio_printf(iocom, 0, "%s\n", 288 "iocom_uninit: " 289 "shitcanning unresponsive pipe"); 290 fp_shutdown(iocom->msg_fp, SHUT_RDWR); 291 /* retries allowed to go negative, keep looping */ 292 } 293 } 294 295 /* 296 * Cleanup caches 297 */ 298 if ((state = iocom->freerd_state) != NULL) { 299 iocom->freerd_state = NULL; 300 kdmsg_state_drop(state); 301 } 302 303 if ((state = iocom->freewr_state) != NULL) { 304 iocom->freewr_state = NULL; 305 kdmsg_state_drop(state); 306 } 307 308 /* 309 * Drop communications descriptor 310 */ 311 if (iocom->msg_fp) { 312 fdrop(iocom->msg_fp); 313 iocom->msg_fp = NULL; 314 } 315 lockmgr(&iocom->msglk, LK_RELEASE); 316 } 317 318 /* 319 * Cluster controller thread. Perform messaging functions. We have one 320 * thread for the reader and one for the writer. The writer handles 321 * shutdown requests (which should break the reader thread). 322 */ 323 static 324 void 325 kdmsg_iocom_thread_rd(void *arg) 326 { 327 kdmsg_iocom_t *iocom = arg; 328 dmsg_hdr_t hdr; 329 kdmsg_msg_t *msg = NULL; 330 size_t hbytes; 331 size_t abytes; 332 int error = 0; 333 334 while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILLRX) == 0) { 335 /* 336 * Retrieve the message from the pipe or socket. 337 */ 338 error = fp_read(iocom->msg_fp, &hdr, sizeof(hdr), 339 NULL, 1, UIO_SYSSPACE); 340 if (error) 341 break; 342 if (hdr.magic != DMSG_HDR_MAGIC) { 343 kdio_printf(iocom, 1, "bad magic: %04x\n", hdr.magic); 344 error = EINVAL; 345 break; 346 } 347 hbytes = (hdr.cmd & DMSGF_SIZE) * DMSG_ALIGN; 348 if (hbytes < sizeof(hdr) || hbytes > DMSG_HDR_MAX) { 349 kdio_printf(iocom, 1, "bad header size %zd\n", hbytes); 350 error = EINVAL; 351 break; 352 } 353 354 /* XXX messy: mask cmd to avoid allocating state */ 355 msg = kdmsg_msg_alloc(&iocom->state0, 356 hdr.cmd & DMSGF_BASECMDMASK, 357 NULL, NULL); 358 msg->any.head = hdr; 359 msg->hdr_size = hbytes; 360 if (hbytes > sizeof(hdr)) { 361 error = fp_read(iocom->msg_fp, &msg->any.head + 1, 362 hbytes - sizeof(hdr), 363 NULL, 1, UIO_SYSSPACE); 364 if (error) { 365 kdio_printf(iocom, 1, "%s\n", 366 "short msg received"); 367 error = EINVAL; 368 break; 369 } 370 } 371 msg->aux_size = hdr.aux_bytes; 372 if (msg->aux_size > DMSG_AUX_MAX) { 373 kdio_printf(iocom, 1, 374 "illegal msg payload size %zd\n", 375 msg->aux_size); 376 error = EINVAL; 377 break; 378 } 379 if (msg->aux_size) { 380 abytes = DMSG_DOALIGN(msg->aux_size); 381 msg->aux_data = kmalloc(abytes, iocom->mmsg, M_WAITOK); 382 msg->flags |= KDMSG_FLAG_AUXALLOC; 383 error = fp_read(iocom->msg_fp, msg->aux_data, 384 abytes, NULL, 1, UIO_SYSSPACE); 385 if (error) { 386 kdio_printf(iocom, 1, "%s\n", 387 "short msg payload received"); 388 break; 389 } 390 } 391 392 error = kdmsg_msg_receive_handling(msg); 393 msg = NULL; 394 } 395 396 kdio_printf(iocom, 1, "read thread terminating error=%d\n", error); 397 398 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 399 if (msg) 400 kdmsg_msg_free(msg); 401 402 /* 403 * Shutdown the socket and set KILLRX for consistency in case the 404 * shutdown was not commanded. Signal the transmit side to shutdown 405 * by setting KILLTX and waking it up. 406 */ 407 fp_shutdown(iocom->msg_fp, SHUT_RDWR); 408 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLRX | 409 KDMSG_CLUSTERCTL_KILLTX); 410 iocom->msgrd_td = NULL; 411 lockmgr(&iocom->msglk, LK_RELEASE); 412 wakeup(&iocom->msg_ctl); 413 414 /* 415 * iocom can be ripped out at any time once the lock is 416 * released with msgrd_td set to NULL. The wakeup()s are safe but 417 * that is all. 418 */ 419 wakeup(iocom); 420 lwkt_exit(); 421 } 422 423 static 424 void 425 kdmsg_iocom_thread_wr(void *arg) 426 { 427 kdmsg_iocom_t *iocom = arg; 428 kdmsg_msg_t *msg; 429 ssize_t res; 430 size_t abytes; 431 int error = 0; 432 int save_ticks; 433 int didwarn; 434 435 /* 436 * Transmit loop 437 */ 438 msg = NULL; 439 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 440 441 while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILLTX) == 0 && error == 0) { 442 /* 443 * Sleep if no messages pending. Interlock with flag while 444 * holding msglk. 445 */ 446 if (TAILQ_EMPTY(&iocom->msgq)) { 447 atomic_set_int(&iocom->msg_ctl, 448 KDMSG_CLUSTERCTL_SLEEPING); 449 lksleep(&iocom->msg_ctl, &iocom->msglk, 0, "msgwr", hz); 450 atomic_clear_int(&iocom->msg_ctl, 451 KDMSG_CLUSTERCTL_SLEEPING); 452 } 453 454 while ((msg = TAILQ_FIRST(&iocom->msgq)) != NULL) { 455 /* 456 * Remove msg from the transmit queue and do 457 * persist and half-closed state handling. 458 */ 459 TAILQ_REMOVE(&iocom->msgq, msg, qentry); 460 461 error = kdmsg_state_msgtx(msg); 462 if (error == EALREADY) { 463 error = 0; 464 kdmsg_msg_free(msg); 465 continue; 466 } 467 if (error) { 468 kdmsg_msg_free(msg); 469 break; 470 } 471 472 /* 473 * Dump the message to the pipe or socket. 474 * 475 * We have to clean up the message as if the transmit 476 * succeeded even if it failed. 477 */ 478 lockmgr(&iocom->msglk, LK_RELEASE); 479 error = fp_write(iocom->msg_fp, &msg->any, 480 msg->hdr_size, &res, UIO_SYSSPACE); 481 if (error || res != msg->hdr_size) { 482 if (error == 0) 483 error = EINVAL; 484 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 485 kdmsg_state_cleanuptx(msg); 486 break; 487 } 488 if (msg->aux_size) { 489 abytes = DMSG_DOALIGN(msg->aux_size); 490 error = fp_write(iocom->msg_fp, 491 msg->aux_data, abytes, 492 &res, UIO_SYSSPACE); 493 if (error || res != abytes) { 494 if (error == 0) 495 error = EINVAL; 496 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 497 kdmsg_state_cleanuptx(msg); 498 break; 499 } 500 } 501 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 502 kdmsg_state_cleanuptx(msg); 503 } 504 } 505 506 kdio_printf(iocom, 1, "write thread terminating error=%d\n", error); 507 508 /* 509 * Shutdown the socket and set KILLTX for consistency in case the 510 * shutdown was not commanded. Signal the receive side to shutdown 511 * by setting KILLRX and waking it up. 512 */ 513 fp_shutdown(iocom->msg_fp, SHUT_RDWR); 514 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLRX | 515 KDMSG_CLUSTERCTL_KILLTX); 516 wakeup(&iocom->msg_ctl); 517 518 /* 519 * The transmit thread is responsible for final cleanups, wait 520 * for the receive side to terminate to prevent new received 521 * states from interfering with our cleanup. 522 * 523 * Do not set msgwr_td to NULL until we actually exit. 524 */ 525 while (iocom->msgrd_td) { 526 wakeup(&iocom->msg_ctl); 527 lksleep(iocom, &iocom->msglk, 0, "clstrkt", hz); 528 } 529 530 /* 531 * We can no longer receive new messages. We must drain the transmit 532 * message queue and simulate received messages to close anay remaining 533 * states. 534 * 535 * Loop until all the states are gone and there are no messages 536 * pending transmit. 537 */ 538 save_ticks = ticks; 539 didwarn = 0; 540 541 while (TAILQ_FIRST(&iocom->msgq) || 542 RB_ROOT(&iocom->staterd_tree) || 543 RB_ROOT(&iocom->statewr_tree)) { 544 /* 545 * Simulate failure for all sub-states of state0. 546 */ 547 kdmsg_drain_msgq(iocom); 548 kdio_printf(iocom, 2, "%s\n", 549 "simulate failure for all substates of state0"); 550 kdmsg_simulate_failure(&iocom->state0, 0, DMSG_ERR_LOSTLINK); 551 552 lksleep(iocom, &iocom->msglk, 0, "clstrtk", hz / 2); 553 554 if ((int)(ticks - save_ticks) > hz*2 && didwarn == 0) { 555 didwarn = 1; 556 kdio_printf(iocom, 0, 557 "Warning, write thread on %p " 558 "still terminating\n", 559 iocom); 560 } 561 if ((int)(ticks - save_ticks) > hz*15 && didwarn == 1) { 562 didwarn = 2; 563 kdio_printf(iocom, 0, 564 "Warning, write thread on %p " 565 "still terminating\n", 566 iocom); 567 } 568 if ((int)(ticks - save_ticks) > hz*60) { 569 kdio_printf(iocom, 0, 570 "Can't terminate: msgq %p " 571 "rd_tree %p wr_tree %p\n", 572 TAILQ_FIRST(&iocom->msgq), 573 RB_ROOT(&iocom->staterd_tree), 574 RB_ROOT(&iocom->statewr_tree)); 575 lksleep(iocom, &iocom->msglk, 0, "clstrtk", hz * 10); 576 } 577 } 578 579 /* 580 * Exit handling is done by the write thread. 581 */ 582 iocom->flags |= KDMSG_IOCOMF_EXITNOACC; 583 lockmgr(&iocom->msglk, LK_RELEASE); 584 585 /* 586 * The state trees had better be empty now 587 */ 588 KKASSERT(RB_EMPTY(&iocom->staterd_tree)); 589 KKASSERT(RB_EMPTY(&iocom->statewr_tree)); 590 KKASSERT(iocom->conn_state == NULL); 591 592 if (iocom->exit_func) { 593 /* 594 * iocom is invalid after we call the exit function. 595 */ 596 iocom->msgwr_td = NULL; 597 iocom->exit_func(iocom); 598 } else { 599 /* 600 * iocom can be ripped out from under us once msgwr_td is 601 * set to NULL. The wakeup is safe. 602 */ 603 iocom->msgwr_td = NULL; 604 wakeup(iocom); 605 } 606 lwkt_exit(); 607 } 608 609 /* 610 * This cleans out the pending transmit message queue, adjusting any 611 * persistent states properly in the process. 612 * 613 * Called with iocom locked. 614 */ 615 void 616 kdmsg_drain_msgq(kdmsg_iocom_t *iocom) 617 { 618 kdmsg_msg_t *msg; 619 620 /* 621 * Clean out our pending transmit queue, executing the 622 * appropriate state adjustments. If this tries to open 623 * any new outgoing transactions we have to loop up and 624 * clean them out. 625 */ 626 while ((msg = TAILQ_FIRST(&iocom->msgq)) != NULL) { 627 TAILQ_REMOVE(&iocom->msgq, msg, qentry); 628 if (kdmsg_state_msgtx(msg)) 629 kdmsg_msg_free(msg); 630 else 631 kdmsg_state_cleanuptx(msg); 632 } 633 } 634 635 /* 636 * Do all processing required to handle a freshly received message 637 * after its low level header has been validated. 638 * 639 * iocom is not locked. 640 */ 641 static 642 int 643 kdmsg_msg_receive_handling(kdmsg_msg_t *msg) 644 { 645 kdmsg_iocom_t *iocom = msg->state->iocom; 646 int error; 647 648 /* 649 * State machine tracking, state assignment for msg, 650 * returns error and discard status. Errors are fatal 651 * to the connection except for EALREADY which forces 652 * a discard without execution. 653 */ 654 error = kdmsg_state_msgrx(msg); 655 if (msg->state->flags & KDMSG_STATE_ABORTING) { 656 kdio_printf(iocom, 5, 657 "kdmsg_state_abort(b): state %p rxcmd=%08x " 658 "txcmd=%08x msgrx error %d\n", 659 msg->state, msg->state->rxcmd, 660 msg->state->txcmd, error); 661 } 662 if (error) { 663 /* 664 * Raw protocol or connection error 665 */ 666 if (msg->state->flags & KDMSG_STATE_ABORTING) 667 kdio_printf(iocom, 5, 668 "X1 state %p error %d\n", 669 msg->state, error); 670 kdmsg_msg_free(msg); 671 if (error == EALREADY) 672 error = 0; 673 } else if (msg->state && msg->state->func) { 674 /* 675 * Message related to state which already has a 676 * handling function installed for it. 677 */ 678 if (msg->state->flags & KDMSG_STATE_ABORTING) 679 kdio_printf(iocom, 5, 680 "X2 state %p func %p\n", 681 msg->state, msg->state->func); 682 error = msg->state->func(msg->state, msg); 683 kdmsg_state_cleanuprx(msg); 684 } else if (iocom->flags & KDMSG_IOCOMF_AUTOANY) { 685 if (msg->state->flags & KDMSG_STATE_ABORTING) 686 kdio_printf(iocom, 5, 687 "X3 state %p\n", msg->state); 688 error = kdmsg_autorxmsg(msg); 689 kdmsg_state_cleanuprx(msg); 690 } else { 691 if (msg->state->flags & KDMSG_STATE_ABORTING) 692 kdio_printf(iocom, 5, 693 "X4 state %p\n", msg->state); 694 error = iocom->rcvmsg(msg); 695 kdmsg_state_cleanuprx(msg); 696 } 697 return error; 698 } 699 700 /* 701 * Process state tracking for a message after reception and dequeueing, 702 * prior to execution of the state callback. The state is updated and 703 * will be removed from the RBTREE if completely closed, but the state->parent 704 * and subq linkage is not cleaned up until after the callback (see 705 * cleanuprx()). 706 * 707 * msglk is not held. 708 * 709 * NOTE: A message transaction can consist of several messages in either 710 * direction. 711 * 712 * NOTE: The msgid is unique to the initiator, not necessarily unique for 713 * us or for any relay or for the return direction for that matter. 714 * That is, two sides sending a new message can use the same msgid 715 * without colliding. 716 * 717 * -- 718 * 719 * ABORT sequences work by setting the ABORT flag along with normal message 720 * state. However, ABORTs can also be sent on half-closed messages, that is 721 * even if the command or reply side has already sent a DELETE, as long as 722 * the message has not been fully closed it can still send an ABORT+DELETE 723 * to terminate the half-closed message state. 724 * 725 * Since ABORT+DELETEs can race we silently discard ABORT's for message 726 * state which has already been fully closed. REPLY+ABORT+DELETEs can 727 * also race, and in this situation the other side might have already 728 * initiated a new unrelated command with the same message id. Since 729 * the abort has not set the CREATE flag the situation can be detected 730 * and the message will also be discarded. 731 * 732 * Non-blocking requests can be initiated with ABORT+CREATE[+DELETE]. 733 * The ABORT request is essentially integrated into the command instead 734 * of being sent later on. In this situation the command implementation 735 * detects that CREATE and ABORT are both set (vs ABORT alone) and can 736 * special-case non-blocking operation for the command. 737 * 738 * NOTE! Messages with ABORT set without CREATE or DELETE are considered 739 * to be mid-stream aborts for command/reply sequences. ABORTs on 740 * one-way messages are not supported. 741 * 742 * NOTE! If a command sequence does not support aborts the ABORT flag is 743 * simply ignored. 744 * 745 * -- 746 * 747 * One-off messages (no reply expected) are sent with neither CREATE or DELETE 748 * set. One-off messages cannot be aborted and typically aren't processed 749 * by these routines. The REPLY bit can be used to distinguish whether a 750 * one-off message is a command or reply. For example, one-off replies 751 * will typically just contain status updates. 752 */ 753 static 754 int 755 kdmsg_state_msgrx(kdmsg_msg_t *msg) 756 { 757 kdmsg_iocom_t *iocom = msg->state->iocom; 758 kdmsg_state_t *state; 759 kdmsg_state_t *pstate; 760 kdmsg_state_t sdummy; 761 int error; 762 763 bzero(&sdummy, sizeof(sdummy)); /* avoid gcc warnings */ 764 765 /* 766 * Make sure a state structure is ready to go in case we need a new 767 * one. This is the only routine which uses freerd_state so no 768 * races are possible. 769 */ 770 if ((state = iocom->freerd_state) == NULL) { 771 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO); 772 state->flags = KDMSG_STATE_DYNAMIC; 773 state->iocom = iocom; 774 state->refs = 1; 775 TAILQ_INIT(&state->subq); 776 iocom->freerd_state = state; 777 } 778 state = NULL; /* safety */ 779 780 /* 781 * Lock RB tree and locate existing persistent state, if any. 782 * 783 * If received msg is a command state is on staterd_tree. 784 * If received msg is a reply state is on statewr_tree. 785 */ 786 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 787 788 again: 789 if (msg->state == &iocom->state0) { 790 sdummy.msgid = msg->any.head.msgid; 791 sdummy.iocom = iocom; 792 if (msg->any.head.cmd & DMSGF_REVTRANS) { 793 state = RB_FIND(kdmsg_state_tree, &iocom->statewr_tree, 794 &sdummy); 795 } else { 796 state = RB_FIND(kdmsg_state_tree, &iocom->staterd_tree, 797 &sdummy); 798 } 799 800 /* 801 * Set message state unconditionally. If this is a CREATE 802 * message this state will become the parent state and new 803 * state will be allocated for the message state. 804 */ 805 if (state == NULL) 806 state = &iocom->state0; 807 if (state->flags & KDMSG_STATE_INTERLOCK) { 808 state->flags |= KDMSG_STATE_SIGNAL; 809 lksleep(state, &iocom->msglk, 0, "dmrace", hz); 810 goto again; 811 } 812 kdmsg_state_hold(state); 813 kdmsg_state_drop(msg->state); /* iocom->state0 */ 814 msg->state = state; 815 } else { 816 state = msg->state; 817 } 818 819 /* 820 * Short-cut one-off or mid-stream messages. 821 */ 822 if ((msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE | 823 DMSGF_ABORT)) == 0) { 824 error = 0; 825 goto done; 826 } 827 828 /* 829 * Switch on CREATE, DELETE, REPLY, and also handle ABORT from 830 * inside the case statements. 831 */ 832 switch(msg->any.head.cmd & (DMSGF_CREATE|DMSGF_DELETE|DMSGF_REPLY)) { 833 case DMSGF_CREATE: 834 case DMSGF_CREATE | DMSGF_DELETE: 835 /* 836 * New persistant command received. 837 */ 838 if (state != &iocom->state0) { 839 kdio_printf(iocom, 1, "%s\n", 840 "duplicate transaction"); 841 error = EINVAL; 842 break; 843 } 844 845 /* 846 * Lookup the circuit. The circuit is an open transaction. 847 * the REVCIRC bit in the message tells us which side 848 * initiated the transaction representing the circuit. 849 */ 850 if (msg->any.head.circuit) { 851 sdummy.msgid = msg->any.head.circuit; 852 853 if (msg->any.head.cmd & DMSGF_REVCIRC) { 854 pstate = RB_FIND(kdmsg_state_tree, 855 &iocom->statewr_tree, 856 &sdummy); 857 } else { 858 pstate = RB_FIND(kdmsg_state_tree, 859 &iocom->staterd_tree, 860 &sdummy); 861 } 862 if (pstate == NULL) { 863 kdio_printf(iocom, 1, "%s\n", 864 "missing parent in " 865 "stacked trans"); 866 error = EINVAL; 867 break; 868 } 869 } else { 870 pstate = &iocom->state0; 871 } 872 873 /* 874 * Allocate new state. 875 * 876 * msg->state becomes the owner of the ref we inherit from 877 * freerd_stae. 878 */ 879 kdmsg_state_drop(state); 880 state = iocom->freerd_state; 881 iocom->freerd_state = NULL; 882 883 msg->state = state; /* inherits freerd ref */ 884 state->parent = pstate; 885 KKASSERT(state->iocom == iocom); 886 state->flags |= KDMSG_STATE_RBINSERTED | 887 KDMSG_STATE_SUBINSERTED | 888 KDMSG_STATE_OPPOSITE; 889 if (TAILQ_EMPTY(&pstate->subq)) 890 kdmsg_state_hold(pstate);/* states on pstate->subq */ 891 kdmsg_state_hold(state); /* state on pstate->subq */ 892 kdmsg_state_hold(state); /* state on rbtree */ 893 state->icmd = msg->any.head.cmd & DMSGF_BASECMDMASK; 894 state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE; 895 state->txcmd = DMSGF_REPLY; 896 state->msgid = msg->any.head.msgid; 897 state->flags &= ~KDMSG_STATE_NEW; 898 RB_INSERT(kdmsg_state_tree, &iocom->staterd_tree, state); 899 TAILQ_INSERT_TAIL(&pstate->subq, state, entry); 900 error = 0; 901 break; 902 case DMSGF_DELETE: 903 /* 904 * Persistent state is expected but might not exist if an 905 * ABORT+DELETE races the close. 906 */ 907 if (state == &iocom->state0) { 908 if (msg->any.head.cmd & DMSGF_ABORT) { 909 kdio_printf(iocom, 1, "%s\n", 910 "msgrx: " 911 "state already A"); 912 error = EALREADY; 913 } else { 914 kdio_printf(iocom, 1, "%s\n", 915 "msgrx: no state for DELETE"); 916 error = EINVAL; 917 } 918 break; 919 } 920 921 /* 922 * Handle another ABORT+DELETE case if the msgid has already 923 * been reused. 924 */ 925 if ((state->rxcmd & DMSGF_CREATE) == 0) { 926 if (msg->any.head.cmd & DMSGF_ABORT) { 927 kdio_printf(iocom, 1, "%s\n", 928 "msgrx: state already B"); 929 error = EALREADY; 930 } else { 931 kdio_printf(iocom, 1, "%s\n", 932 "msgrx: state reused for DELETE"); 933 error = EINVAL; 934 } 935 break; 936 } 937 error = 0; 938 break; 939 default: 940 /* 941 * Check for mid-stream ABORT command received, otherwise 942 * allow. 943 */ 944 if (msg->any.head.cmd & DMSGF_ABORT) { 945 if (state == &iocom->state0 || 946 (state->rxcmd & DMSGF_CREATE) == 0) { 947 error = EALREADY; 948 break; 949 } 950 } 951 error = 0; 952 break; 953 case DMSGF_REPLY | DMSGF_CREATE: 954 case DMSGF_REPLY | DMSGF_CREATE | DMSGF_DELETE: 955 /* 956 * When receiving a reply with CREATE set the original 957 * persistent state message should already exist. 958 */ 959 if (state == &iocom->state0) { 960 kdio_printf(iocom, 1, 961 "msgrx: no state match for " 962 "REPLY cmd=%08x msgid=%016jx\n", 963 msg->any.head.cmd, 964 (intmax_t)msg->any.head.msgid); 965 error = EINVAL; 966 break; 967 } 968 state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE; 969 error = 0; 970 break; 971 case DMSGF_REPLY | DMSGF_DELETE: 972 /* 973 * Received REPLY+ABORT+DELETE in case where msgid has 974 * already been fully closed, ignore the message. 975 */ 976 if (state == &iocom->state0) { 977 if (msg->any.head.cmd & DMSGF_ABORT) { 978 error = EALREADY; 979 } else { 980 kdio_printf(iocom, 1, "%s\n", 981 "msgrx: no state match " 982 "for REPLY|DELETE"); 983 error = EINVAL; 984 } 985 break; 986 } 987 988 /* 989 * Received REPLY+ABORT+DELETE in case where msgid has 990 * already been reused for an unrelated message, 991 * ignore the message. 992 */ 993 if ((state->rxcmd & DMSGF_CREATE) == 0) { 994 if (msg->any.head.cmd & DMSGF_ABORT) { 995 error = EALREADY; 996 } else { 997 kdio_printf(iocom, 1, "%s\n", 998 "msgrx: state reused " 999 "for REPLY|DELETE"); 1000 error = EINVAL; 1001 } 1002 break; 1003 } 1004 error = 0; 1005 break; 1006 case DMSGF_REPLY: 1007 /* 1008 * Check for mid-stream ABORT reply received to sent command. 1009 */ 1010 if (msg->any.head.cmd & DMSGF_ABORT) { 1011 if (state == &iocom->state0 || 1012 (state->rxcmd & DMSGF_CREATE) == 0) { 1013 error = EALREADY; 1014 break; 1015 } 1016 } 1017 error = 0; 1018 break; 1019 } 1020 1021 /* 1022 * Calculate the easy-switch() transactional command. Represents 1023 * the outer-transaction command for any transaction-create or 1024 * transaction-delete, and the inner message command for any 1025 * non-transaction or inside-transaction command. tcmd will be 1026 * set to 0 if the message state is illegal. 1027 * 1028 * The two can be told apart because outer-transaction commands 1029 * always have a DMSGF_CREATE and/or DMSGF_DELETE flag. 1030 */ 1031 done: 1032 if (msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE)) { 1033 if (state != &iocom->state0) { 1034 msg->tcmd = (msg->state->icmd & DMSGF_BASECMDMASK) | 1035 (msg->any.head.cmd & (DMSGF_CREATE | 1036 DMSGF_DELETE | 1037 DMSGF_REPLY)); 1038 } else { 1039 msg->tcmd = 0; 1040 } 1041 } else { 1042 msg->tcmd = msg->any.head.cmd & DMSGF_CMDSWMASK; 1043 } 1044 1045 /* 1046 * Adjust the state for DELETE handling now, before making the 1047 * callback so we are atomic with other state updates. 1048 * 1049 * Subq/parent linkages are cleaned up after the callback. 1050 * If an error occurred the message is ignored and state is not 1051 * updated. 1052 */ 1053 if ((state = msg->state) == NULL || error != 0) { 1054 kdio_printf(iocom, 1, 1055 "msgrx: state=%p error %d\n", 1056 state, error); 1057 } else if (msg->any.head.cmd & DMSGF_DELETE) { 1058 KKASSERT((state->rxcmd & DMSGF_DELETE) == 0); 1059 state->rxcmd |= DMSGF_DELETE; 1060 if (state->txcmd & DMSGF_DELETE) { 1061 KKASSERT(state->flags & KDMSG_STATE_RBINSERTED); 1062 if (state->rxcmd & DMSGF_REPLY) { 1063 KKASSERT(msg->any.head.cmd & 1064 DMSGF_REPLY); 1065 RB_REMOVE(kdmsg_state_tree, 1066 &iocom->statewr_tree, state); 1067 } else { 1068 KKASSERT((msg->any.head.cmd & 1069 DMSGF_REPLY) == 0); 1070 RB_REMOVE(kdmsg_state_tree, 1071 &iocom->staterd_tree, state); 1072 } 1073 state->flags &= ~KDMSG_STATE_RBINSERTED; 1074 kdmsg_state_drop(state); /* state on rbtree */ 1075 } 1076 } 1077 lockmgr(&iocom->msglk, LK_RELEASE); 1078 1079 return (error); 1080 } 1081 1082 /* 1083 * Called instead of iocom->rcvmsg() if any of the AUTO flags are set. 1084 * This routine must call iocom->rcvmsg() for anything not automatically 1085 * handled. 1086 */ 1087 static int 1088 kdmsg_autorxmsg(kdmsg_msg_t *msg) 1089 { 1090 kdmsg_iocom_t *iocom = msg->state->iocom; 1091 kdmsg_msg_t *rep; 1092 int error = 0; 1093 uint32_t cmd; 1094 1095 /* 1096 * Main switch processes transaction create/delete sequences only. 1097 * Use icmd (DELETEs use DMSG_LNK_ERROR 1098 * 1099 * NOTE: If processing in-transaction messages you generally want 1100 * an inner switch on msg->any.head.cmd. 1101 */ 1102 if (msg->state) { 1103 cmd = (msg->state->icmd & DMSGF_BASECMDMASK) | 1104 (msg->any.head.cmd & (DMSGF_CREATE | 1105 DMSGF_DELETE | 1106 DMSGF_REPLY)); 1107 } else { 1108 cmd = 0; 1109 } 1110 1111 switch(cmd) { 1112 case DMSG_LNK_PING: 1113 /* 1114 * Received ping, send reply 1115 */ 1116 rep = kdmsg_msg_alloc(msg->state, DMSG_LNK_PING | DMSGF_REPLY, 1117 NULL, NULL); 1118 kdmsg_msg_write(rep); 1119 break; 1120 case DMSG_LNK_PING | DMSGF_REPLY: 1121 /* ignore replies */ 1122 break; 1123 case DMSG_LNK_CONN | DMSGF_CREATE: 1124 case DMSG_LNK_CONN | DMSGF_CREATE | DMSGF_DELETE: 1125 /* 1126 * Received LNK_CONN transaction. Transmit response and 1127 * leave transaction open, which allows the other end to 1128 * start to the SPAN protocol. 1129 * 1130 * Handle shim after acknowledging the CONN. 1131 */ 1132 if ((msg->any.head.cmd & DMSGF_DELETE) == 0) { 1133 if (iocom->flags & KDMSG_IOCOMF_AUTOCONN) { 1134 kdmsg_msg_result(msg, 0); 1135 if (iocom->auto_callback) 1136 iocom->auto_callback(msg); 1137 } else { 1138 error = iocom->rcvmsg(msg); 1139 } 1140 break; 1141 } 1142 /* fall through */ 1143 case DMSG_LNK_CONN | DMSGF_DELETE: 1144 /* 1145 * This message is usually simulated after a link is lost 1146 * to clean up the transaction. 1147 */ 1148 if (iocom->flags & KDMSG_IOCOMF_AUTOCONN) { 1149 if (iocom->auto_callback) 1150 iocom->auto_callback(msg); 1151 kdmsg_msg_reply(msg, 0); 1152 } else { 1153 error = iocom->rcvmsg(msg); 1154 } 1155 break; 1156 case DMSG_LNK_SPAN | DMSGF_CREATE: 1157 case DMSG_LNK_SPAN | DMSGF_CREATE | DMSGF_DELETE: 1158 /* 1159 * Received LNK_SPAN transaction. We do not have to respond 1160 * (except on termination), but we must leave the transaction 1161 * open. 1162 * 1163 * Handle shim after acknowledging the SPAN. 1164 */ 1165 if (iocom->flags & KDMSG_IOCOMF_AUTORXSPAN) { 1166 if ((msg->any.head.cmd & DMSGF_DELETE) == 0) { 1167 if (iocom->auto_callback) 1168 iocom->auto_callback(msg); 1169 break; 1170 } 1171 /* fall through */ 1172 } else { 1173 error = iocom->rcvmsg(msg); 1174 break; 1175 } 1176 /* fall through */ 1177 case DMSG_LNK_SPAN | DMSGF_DELETE: 1178 /* 1179 * Process shims (auto_callback) before cleaning up the 1180 * circuit structure and closing the transactions. Device 1181 * driver should ensure that the circuit is not used after 1182 * the auto_callback() returns. 1183 * 1184 * Handle shim before closing the SPAN transaction. 1185 */ 1186 if (iocom->flags & KDMSG_IOCOMF_AUTORXSPAN) { 1187 if (iocom->auto_callback) 1188 iocom->auto_callback(msg); 1189 kdmsg_msg_reply(msg, 0); 1190 } else { 1191 error = iocom->rcvmsg(msg); 1192 } 1193 break; 1194 default: 1195 /* 1196 * Anything unhandled goes into rcvmsg. 1197 * 1198 * NOTE: Replies to link-level messages initiated by our side 1199 * are handled by the state callback, they are NOT 1200 * handled here. 1201 */ 1202 error = iocom->rcvmsg(msg); 1203 break; 1204 } 1205 return (error); 1206 } 1207 1208 /* 1209 * Post-receive-handling message and state cleanup. This routine is called 1210 * after the state function handling/callback to properly dispose of the 1211 * message and unlink the state's parent/subq linkage if the state is 1212 * completely closed. 1213 * 1214 * msglk is not held. 1215 */ 1216 static 1217 void 1218 kdmsg_state_cleanuprx(kdmsg_msg_t *msg) 1219 { 1220 kdmsg_state_t *state = msg->state; 1221 kdmsg_iocom_t *iocom = state->iocom; 1222 1223 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 1224 if (state != &iocom->state0) { 1225 /* 1226 * When terminating a transaction (in either direction), all 1227 * sub-states are aborted. 1228 */ 1229 if ((msg->any.head.cmd & DMSGF_DELETE) && 1230 TAILQ_FIRST(&msg->state->subq)) { 1231 kdio_printf(iocom, 2, 1232 "simulate failure for substates of " 1233 "state %p cmd %08x/%08x\n", 1234 msg->state, 1235 msg->state->rxcmd, 1236 msg->state->txcmd); 1237 kdmsg_simulate_failure(msg->state, 1238 0, DMSG_ERR_LOSTLINK); 1239 } 1240 1241 /* 1242 * Once the state is fully closed we can (try to) remove it 1243 * from the subq topology. 1244 */ 1245 if ((state->flags & KDMSG_STATE_SUBINSERTED) && 1246 (state->rxcmd & DMSGF_DELETE) && 1247 (state->txcmd & DMSGF_DELETE)) { 1248 /* 1249 * Remove parent linkage if state is completely closed. 1250 */ 1251 kdmsg_subq_delete(state); 1252 } 1253 } 1254 kdmsg_msg_free(msg); 1255 1256 lockmgr(&iocom->msglk, LK_RELEASE); 1257 } 1258 1259 /* 1260 * Remove state from its parent's subq. This can wind up recursively 1261 * dropping the parent upward. 1262 * 1263 * NOTE: Once we drop the parent, our pstate pointer may become invalid. 1264 */ 1265 static 1266 void 1267 kdmsg_subq_delete(kdmsg_state_t *state) 1268 { 1269 kdmsg_state_t *pstate; 1270 1271 if (state->flags & KDMSG_STATE_SUBINSERTED) { 1272 pstate = state->parent; 1273 KKASSERT(pstate); 1274 if (pstate->scan == state) 1275 pstate->scan = NULL; 1276 TAILQ_REMOVE(&pstate->subq, state, entry); 1277 state->flags &= ~KDMSG_STATE_SUBINSERTED; 1278 state->parent = NULL; 1279 if (TAILQ_EMPTY(&pstate->subq)) { 1280 kdmsg_state_drop(pstate);/* pstate->subq */ 1281 } 1282 pstate = NULL; /* safety */ 1283 kdmsg_state_drop(state); /* pstate->subq */ 1284 } else { 1285 KKASSERT(state->parent == NULL); 1286 } 1287 } 1288 1289 /* 1290 * Simulate receiving a message which terminates an active transaction 1291 * state. Our simulated received message must set DELETE and may also 1292 * have to set CREATE. It must also ensure that all fields are set such 1293 * that the receive handling code can find the state (kdmsg_state_msgrx()) 1294 * or an endless loop will ensue. 1295 * 1296 * This is used when the other end of the link is dead so the device driver 1297 * gets a completed transaction for all pending states. 1298 * 1299 * Called with iocom locked. 1300 */ 1301 static 1302 void 1303 kdmsg_simulate_failure(kdmsg_state_t *state, int meto, int error) 1304 { 1305 kdmsg_state_t *substate; 1306 1307 kdmsg_state_hold(state); /* aborting */ 1308 1309 /* 1310 * Abort parent state first. Parent will not actually disappear 1311 * until children are gone. Device drivers must handle the situation. 1312 * The advantage of this is that device drivers can flag the situation 1313 * as an interlock against new operations on dying states. And since 1314 * device operations are often asynchronous anyway, this sequence of 1315 * events works out better. 1316 */ 1317 if (meto) 1318 kdmsg_state_abort(state); 1319 1320 /* 1321 * Recurse through any children. 1322 */ 1323 again: 1324 TAILQ_FOREACH(substate, &state->subq, entry) { 1325 if (substate->flags & KDMSG_STATE_ABORTING) 1326 continue; 1327 state->scan = substate; 1328 kdmsg_simulate_failure(substate, 1, error); 1329 if (state->scan != substate) 1330 goto again; 1331 } 1332 kdmsg_state_drop(state); /* aborting */ 1333 } 1334 1335 static 1336 void 1337 kdmsg_state_abort(kdmsg_state_t *state) 1338 { 1339 kdmsg_msg_t *msg; 1340 1341 /* 1342 * Set ABORTING and DYING, return if already set. If the state was 1343 * just allocated we defer the abort operation until the related 1344 * message is processed. 1345 */ 1346 KKASSERT((state->flags & KDMSG_STATE_ABORTING) == 0); 1347 if (state->flags & KDMSG_STATE_ABORTING) 1348 return; 1349 state->flags |= KDMSG_STATE_ABORTING; 1350 kdmsg_state_dying(state); 1351 if (state->flags & KDMSG_STATE_NEW) { 1352 kdio_printf(iocom, 5, 1353 "kdmsg_state_abort(0): state %p rxcmd %08x " 1354 "txcmd %08x flags %08x - in NEW state\n", 1355 state, state->rxcmd, 1356 state->txcmd, state->flags); 1357 return; 1358 } 1359 1360 /* 1361 * NOTE: The DELETE flag might already be set due to an early 1362 * termination. 1363 * 1364 * NOTE: Args to kdmsg_msg_alloc() to avoid dynamic state allocation. 1365 * 1366 * NOTE: We are simulating a received message using our state 1367 * (vs a message generated by the other side using its state), 1368 * so we must invert DMSGF_REVTRANS and DMSGF_REVCIRC. 1369 */ 1370 kdio_printf(iocom, 5, 1371 "kdmsg_state_abort(1): state %p rxcmd %08x txcmd %08x\n", 1372 state, state->rxcmd, state->txcmd); 1373 if ((state->rxcmd & DMSGF_DELETE) == 0) { 1374 msg = kdmsg_msg_alloc(state, DMSG_LNK_ERROR, NULL, NULL); 1375 if ((state->rxcmd & DMSGF_CREATE) == 0) 1376 msg->any.head.cmd |= DMSGF_CREATE; 1377 msg->any.head.cmd |= DMSGF_DELETE | 1378 (state->rxcmd & DMSGF_REPLY); 1379 msg->any.head.cmd ^= (DMSGF_REVTRANS | DMSGF_REVCIRC); 1380 msg->any.head.error = DMSG_ERR_LOSTLINK; 1381 kdio_printf(iocom, 5, 1382 "kdmsg_state_abort(a): state %p msgcmd %08x\n", 1383 state, msg->any.head.cmd); 1384 /* circuit not initialized */ 1385 lockmgr(&state->iocom->msglk, LK_RELEASE); 1386 kdmsg_msg_receive_handling(msg); 1387 lockmgr(&state->iocom->msglk, LK_EXCLUSIVE); 1388 msg = NULL; 1389 } 1390 kdio_printf(iocom, 5, 1391 "kdmsg_state_abort(2): state %p rxcmd %08x txcmd %08x\n", 1392 state, state->rxcmd, state->txcmd); 1393 } 1394 1395 /* 1396 * Recursively sets KDMSG_STATE_DYING on state and all sub-states, preventing 1397 * the transmission of any new messages on these states. This is done 1398 * atomically when parent state is terminating, whereas setting ABORTING is 1399 * not atomic and can leak races. 1400 */ 1401 static 1402 void 1403 kdmsg_state_dying(kdmsg_state_t *state) 1404 { 1405 kdmsg_state_t *scan; 1406 1407 if ((state->flags & KDMSG_STATE_DYING) == 0) { 1408 state->flags |= KDMSG_STATE_DYING; 1409 TAILQ_FOREACH(scan, &state->subq, entry) 1410 kdmsg_state_dying(scan); 1411 } 1412 } 1413 1414 /* 1415 * Process state tracking for a message prior to transmission. 1416 * 1417 * Called with msglk held and the msg dequeued. Returns non-zero if 1418 * the message is bad and should be deleted by the caller. 1419 * 1420 * One-off messages are usually with dummy state and msg->state may be NULL 1421 * in this situation. 1422 * 1423 * New transactions (when CREATE is set) will insert the state. 1424 * 1425 * May request that caller discard the message by setting *discardp to 1. 1426 * A NULL state may be returned in this case. 1427 */ 1428 static 1429 int 1430 kdmsg_state_msgtx(kdmsg_msg_t *msg) 1431 { 1432 kdmsg_iocom_t *iocom = msg->state->iocom; 1433 kdmsg_state_t *state; 1434 int error; 1435 1436 /* 1437 * Make sure a state structure is ready to go in case we need a new 1438 * one. This is the only routine which uses freewr_state so no 1439 * races are possible. 1440 */ 1441 if ((state = iocom->freewr_state) == NULL) { 1442 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO); 1443 state->flags = KDMSG_STATE_DYNAMIC; 1444 state->iocom = iocom; 1445 state->refs = 1; 1446 TAILQ_INIT(&state->subq); 1447 iocom->freewr_state = state; 1448 } 1449 1450 /* 1451 * Lock RB tree. If persistent state is present it will have already 1452 * been assigned to msg. 1453 */ 1454 state = msg->state; 1455 1456 /* 1457 * Short-cut one-off or mid-stream messages (state may be NULL). 1458 */ 1459 if ((msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE | 1460 DMSGF_ABORT)) == 0) { 1461 return(0); 1462 } 1463 1464 1465 /* 1466 * Switch on CREATE, DELETE, REPLY, and also handle ABORT from 1467 * inside the case statements. 1468 */ 1469 switch(msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE | 1470 DMSGF_REPLY)) { 1471 case DMSGF_CREATE: 1472 case DMSGF_CREATE | DMSGF_DELETE: 1473 /* 1474 * Insert the new persistent message state and mark 1475 * half-closed if DELETE is set. Since this is a new 1476 * message it isn't possible to transition into the fully 1477 * closed state here. 1478 * 1479 * XXX state must be assigned and inserted by 1480 * kdmsg_msg_write(). txcmd is assigned by us 1481 * on-transmit. 1482 */ 1483 KKASSERT(state != NULL); 1484 state->icmd = msg->any.head.cmd & DMSGF_BASECMDMASK; 1485 state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE; 1486 state->rxcmd = DMSGF_REPLY; 1487 state->flags &= ~KDMSG_STATE_NEW; 1488 error = 0; 1489 break; 1490 case DMSGF_DELETE: 1491 /* 1492 * Sent ABORT+DELETE in case where msgid has already 1493 * been fully closed, ignore the message. 1494 */ 1495 if (state == &iocom->state0) { 1496 if (msg->any.head.cmd & DMSGF_ABORT) { 1497 error = EALREADY; 1498 } else { 1499 kdio_printf(iocom, 1, 1500 "msgtx: no state match " 1501 "for DELETE cmd=%08x msgid=%016jx\n", 1502 msg->any.head.cmd, 1503 (intmax_t)msg->any.head.msgid); 1504 error = EINVAL; 1505 } 1506 break; 1507 } 1508 1509 /* 1510 * Sent ABORT+DELETE in case where msgid has 1511 * already been reused for an unrelated message, 1512 * ignore the message. 1513 */ 1514 if ((state->txcmd & DMSGF_CREATE) == 0) { 1515 if (msg->any.head.cmd & DMSGF_ABORT) { 1516 error = EALREADY; 1517 } else { 1518 kdio_printf(iocom, 1, "%s\n", 1519 "msgtx: state reused " 1520 "for DELETE"); 1521 error = EINVAL; 1522 } 1523 break; 1524 } 1525 error = 0; 1526 break; 1527 default: 1528 /* 1529 * Check for mid-stream ABORT command sent 1530 */ 1531 if (msg->any.head.cmd & DMSGF_ABORT) { 1532 if (state == &state->iocom->state0 || 1533 (state->txcmd & DMSGF_CREATE) == 0) { 1534 error = EALREADY; 1535 break; 1536 } 1537 } 1538 error = 0; 1539 break; 1540 case DMSGF_REPLY | DMSGF_CREATE: 1541 case DMSGF_REPLY | DMSGF_CREATE | DMSGF_DELETE: 1542 /* 1543 * When transmitting a reply with CREATE set the original 1544 * persistent state message should already exist. 1545 */ 1546 if (state == &state->iocom->state0) { 1547 kdio_printf(iocom, 1, "%s\n", 1548 "msgtx: no state match " 1549 "for REPLY | CREATE"); 1550 error = EINVAL; 1551 break; 1552 } 1553 state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE; 1554 error = 0; 1555 break; 1556 case DMSGF_REPLY | DMSGF_DELETE: 1557 /* 1558 * When transmitting a reply with DELETE set the original 1559 * persistent state message should already exist. 1560 * 1561 * This is very similar to the REPLY|CREATE|* case except 1562 * txcmd is already stored, so we just add the DELETE flag. 1563 * 1564 * Sent REPLY+ABORT+DELETE in case where msgid has 1565 * already been fully closed, ignore the message. 1566 */ 1567 if (state == &state->iocom->state0) { 1568 if (msg->any.head.cmd & DMSGF_ABORT) { 1569 error = EALREADY; 1570 } else { 1571 kdio_printf(iocom, 1, "%s\n", 1572 "msgtx: no state match " 1573 "for REPLY | DELETE"); 1574 error = EINVAL; 1575 } 1576 break; 1577 } 1578 1579 /* 1580 * Sent REPLY+ABORT+DELETE in case where msgid has already 1581 * been reused for an unrelated message, ignore the message. 1582 */ 1583 if ((state->txcmd & DMSGF_CREATE) == 0) { 1584 if (msg->any.head.cmd & DMSGF_ABORT) { 1585 error = EALREADY; 1586 } else { 1587 kdio_printf(iocom, 1, "%s\n", 1588 "msgtx: state reused " 1589 "for REPLY | DELETE"); 1590 error = EINVAL; 1591 } 1592 break; 1593 } 1594 error = 0; 1595 break; 1596 case DMSGF_REPLY: 1597 /* 1598 * Check for mid-stream ABORT reply sent. 1599 * 1600 * One-off REPLY messages are allowed for e.g. status updates. 1601 */ 1602 if (msg->any.head.cmd & DMSGF_ABORT) { 1603 if (state == &state->iocom->state0 || 1604 (state->txcmd & DMSGF_CREATE) == 0) { 1605 error = EALREADY; 1606 break; 1607 } 1608 } 1609 error = 0; 1610 break; 1611 } 1612 1613 /* 1614 * Set interlock (XXX hack) in case the send side blocks and a 1615 * response is returned before kdmsg_state_cleanuptx() can be 1616 * run. 1617 */ 1618 if (state && error == 0) 1619 state->flags |= KDMSG_STATE_INTERLOCK; 1620 1621 return (error); 1622 } 1623 1624 /* 1625 * Called with iocom locked. 1626 */ 1627 static 1628 void 1629 kdmsg_state_cleanuptx(kdmsg_msg_t *msg) 1630 { 1631 kdmsg_iocom_t *iocom = msg->state->iocom; 1632 kdmsg_state_t *state; 1633 1634 if ((state = msg->state) == NULL) { 1635 kdmsg_msg_free(msg); 1636 return; 1637 } 1638 1639 /* 1640 * Clear interlock (XXX hack) in case the send side blocks and a 1641 * response is returned in the other thread before 1642 * kdmsg_state_cleanuptx() can be run. We maintain our hold on 1643 * iocom->msglk so we can do this before completing our task. 1644 */ 1645 if (state->flags & KDMSG_STATE_SIGNAL) { 1646 kdio_printf(iocom, 1, "state %p interlock!\n", state); 1647 wakeup(state); 1648 } 1649 state->flags &= ~(KDMSG_STATE_INTERLOCK | KDMSG_STATE_SIGNAL); 1650 kdmsg_state_hold(state); 1651 1652 if (msg->any.head.cmd & DMSGF_DELETE) { 1653 KKASSERT((state->txcmd & DMSGF_DELETE) == 0); 1654 state->txcmd |= DMSGF_DELETE; 1655 if (state->rxcmd & DMSGF_DELETE) { 1656 KKASSERT(state->flags & KDMSG_STATE_RBINSERTED); 1657 if (state->txcmd & DMSGF_REPLY) { 1658 KKASSERT(msg->any.head.cmd & 1659 DMSGF_REPLY); 1660 RB_REMOVE(kdmsg_state_tree, 1661 &iocom->staterd_tree, state); 1662 } else { 1663 KKASSERT((msg->any.head.cmd & 1664 DMSGF_REPLY) == 0); 1665 RB_REMOVE(kdmsg_state_tree, 1666 &iocom->statewr_tree, state); 1667 } 1668 state->flags &= ~KDMSG_STATE_RBINSERTED; 1669 1670 /* 1671 * The subq recursion is used for parent linking and 1672 * scanning the topology for aborts, we can only 1673 * remove leafs. The circuit is effectively dead now, 1674 * but topology won't be torn down until all of its 1675 * children have finished/aborted. 1676 * 1677 * This is particularly important for end-point 1678 * devices which might need to access private data 1679 * in parent states. Out of order disconnects can 1680 * occur if an end-point device is processing a 1681 * message transaction asynchronously because abort 1682 * requests are basically synchronous and it probably 1683 * isn't convenient (or possible) for the end-point 1684 * to abort an asynchronous operation. 1685 */ 1686 if (TAILQ_EMPTY(&state->subq)) 1687 kdmsg_subq_delete(state); 1688 kdmsg_msg_free(msg); 1689 kdmsg_state_drop(state); /* state on rbtree */ 1690 } else { 1691 kdmsg_msg_free(msg); 1692 } 1693 } else { 1694 kdmsg_msg_free(msg); 1695 } 1696 1697 /* 1698 * Deferred abort after transmission. 1699 */ 1700 if ((state->flags & (KDMSG_STATE_ABORTING | KDMSG_STATE_DYING)) && 1701 (state->rxcmd & DMSGF_DELETE) == 0) { 1702 kdio_printf(iocom, 5, 1703 "kdmsg_state_cleanuptx: state=%p " 1704 "executing deferred abort\n", 1705 state); 1706 state->flags &= ~KDMSG_STATE_ABORTING; 1707 kdmsg_state_abort(state); 1708 } 1709 kdmsg_state_drop(state); 1710 } 1711 1712 static 1713 void 1714 _kdmsg_state_hold(kdmsg_state_t *state KDMSG_DEBUG_ARGS) 1715 { 1716 atomic_add_int(&state->refs, 1); 1717 #if KDMSG_DEBUG 1718 kd_printf(4, "state %p +%d\t%s:%d\n", state, state->refs, file, line); 1719 #endif 1720 } 1721 1722 static 1723 void 1724 _kdmsg_state_drop(kdmsg_state_t *state KDMSG_DEBUG_ARGS) 1725 { 1726 KKASSERT(state->refs > 0); 1727 #if KDMSG_DEBUG 1728 kd_printf(4, "state %p -%d\t%s:%d\n", state, state->refs, file, line); 1729 #endif 1730 if (atomic_fetchadd_int(&state->refs, -1) == 1) 1731 kdmsg_state_free(state); 1732 } 1733 1734 static 1735 void 1736 kdmsg_state_free(kdmsg_state_t *state) 1737 { 1738 kdmsg_iocom_t *iocom = state->iocom; 1739 1740 KKASSERT((state->flags & KDMSG_STATE_RBINSERTED) == 0); 1741 KKASSERT((state->flags & KDMSG_STATE_SUBINSERTED) == 0); 1742 KKASSERT(TAILQ_EMPTY(&state->subq)); 1743 1744 if (state != &state->iocom->state0) 1745 kfree(state, iocom->mmsg); 1746 } 1747 1748 kdmsg_msg_t * 1749 kdmsg_msg_alloc(kdmsg_state_t *state, uint32_t cmd, 1750 int (*func)(kdmsg_state_t *, kdmsg_msg_t *), void *data) 1751 { 1752 kdmsg_iocom_t *iocom = state->iocom; 1753 kdmsg_state_t *pstate; 1754 kdmsg_msg_t *msg; 1755 size_t hbytes; 1756 1757 KKASSERT(iocom != NULL); 1758 hbytes = (cmd & DMSGF_SIZE) * DMSG_ALIGN; 1759 msg = kmalloc(offsetof(struct kdmsg_msg, any) + hbytes, 1760 iocom->mmsg, M_WAITOK | M_ZERO); 1761 msg->hdr_size = hbytes; 1762 1763 if ((cmd & (DMSGF_CREATE | DMSGF_REPLY)) == DMSGF_CREATE) { 1764 /* 1765 * New transaction, requires tracking state and a unique 1766 * msgid to be allocated. 1767 * 1768 * It is possible to race a circuit failure, inherit the 1769 * parent's STATE_DYING flag to trigger an abort sequence 1770 * in the transmit path. By not inheriting ABORTING the 1771 * abort sequence can recurse. 1772 * 1773 * NOTE: The transactions has not yet been initiated so we 1774 * cannot set DMSGF_CREATE/DELETE bits in txcmd or rxcmd. 1775 * We have to properly setup DMSGF_REPLY, however. 1776 */ 1777 pstate = state; 1778 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO); 1779 TAILQ_INIT(&state->subq); 1780 state->iocom = iocom; 1781 state->parent = pstate; 1782 state->flags = KDMSG_STATE_DYNAMIC | 1783 KDMSG_STATE_NEW; 1784 state->func = func; 1785 state->any.any = data; 1786 state->msgid = (uint64_t)(uintptr_t)state; 1787 /*msg->any.head.msgid = state->msgid;XXX*/ 1788 1789 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 1790 if (RB_INSERT(kdmsg_state_tree, &iocom->statewr_tree, state)) 1791 panic("duplicate msgid allocated"); 1792 if (TAILQ_EMPTY(&pstate->subq)) 1793 kdmsg_state_hold(pstate);/* pstate->subq */ 1794 TAILQ_INSERT_TAIL(&pstate->subq, state, entry); 1795 state->flags |= KDMSG_STATE_RBINSERTED | 1796 KDMSG_STATE_SUBINSERTED; 1797 state->flags |= pstate->flags & KDMSG_STATE_DYING; 1798 kdmsg_state_hold(state); /* pstate->subq */ 1799 kdmsg_state_hold(state); /* state on rbtree */ 1800 kdmsg_state_hold(state); /* msg->state */ 1801 lockmgr(&iocom->msglk, LK_RELEASE); 1802 } else { 1803 pstate = state->parent; 1804 KKASSERT(pstate != NULL); 1805 kdmsg_state_hold(state); /* msg->state */ 1806 } 1807 1808 if (state->flags & KDMSG_STATE_OPPOSITE) 1809 cmd |= DMSGF_REVTRANS; 1810 if (pstate->flags & KDMSG_STATE_OPPOSITE) 1811 cmd |= DMSGF_REVCIRC; 1812 1813 msg->any.head.magic = DMSG_HDR_MAGIC; 1814 msg->any.head.cmd = cmd; 1815 msg->any.head.msgid = state->msgid; 1816 msg->any.head.circuit = pstate->msgid; 1817 msg->state = state; 1818 1819 return (msg); 1820 } 1821 1822 void 1823 kdmsg_msg_free(kdmsg_msg_t *msg) 1824 { 1825 kdmsg_iocom_t *iocom = msg->state->iocom; 1826 kdmsg_state_t *state; 1827 1828 if ((msg->flags & KDMSG_FLAG_AUXALLOC) && 1829 msg->aux_data && msg->aux_size) { 1830 kfree(msg->aux_data, iocom->mmsg); 1831 msg->flags &= ~KDMSG_FLAG_AUXALLOC; 1832 } 1833 if ((state = msg->state) != NULL) { 1834 msg->state = NULL; 1835 kdmsg_state_drop(state); /* msg->state */ 1836 } 1837 msg->aux_data = NULL; 1838 msg->aux_size = 0; 1839 1840 kfree(msg, iocom->mmsg); 1841 } 1842 1843 void 1844 kdmsg_detach_aux_data(kdmsg_msg_t *msg, kdmsg_data_t *data) 1845 { 1846 if (msg->flags & KDMSG_FLAG_AUXALLOC) { 1847 data->aux_data = msg->aux_data; 1848 data->aux_size = msg->aux_size; 1849 data->iocom = msg->state->iocom; 1850 msg->flags &= ~KDMSG_FLAG_AUXALLOC; 1851 } else { 1852 data->aux_data = NULL; 1853 data->aux_size = 0; 1854 data->iocom = msg->state->iocom; 1855 } 1856 } 1857 1858 void 1859 kdmsg_free_aux_data(kdmsg_data_t *data) 1860 { 1861 if (data->aux_data) 1862 kfree(data->aux_data, data->iocom->mmsg); 1863 } 1864 1865 /* 1866 * Indexed messages are stored in a red-black tree indexed by their 1867 * msgid. Only persistent messages are indexed. 1868 */ 1869 int 1870 kdmsg_state_cmp(kdmsg_state_t *state1, kdmsg_state_t *state2) 1871 { 1872 if (state1->iocom < state2->iocom) 1873 return(-1); 1874 if (state1->iocom > state2->iocom) 1875 return(1); 1876 if (state1->msgid < state2->msgid) 1877 return(-1); 1878 if (state1->msgid > state2->msgid) 1879 return(1); 1880 return(0); 1881 } 1882 1883 /* 1884 * Write a message. All requisit command flags have been set. 1885 * 1886 * If msg->state is non-NULL the message is written to the existing 1887 * transaction. msgid will be set accordingly. 1888 * 1889 * If msg->state is NULL and CREATE is set new state is allocated and 1890 * (func, data) is installed. A msgid is assigned. 1891 * 1892 * If msg->state is NULL and CREATE is not set the message is assumed 1893 * to be a one-way message. The originator must assign the msgid 1894 * (or leave it 0, which is typical. 1895 * 1896 * This function merely queues the message to the management thread, it 1897 * does not write to the message socket/pipe. 1898 */ 1899 void 1900 kdmsg_msg_write(kdmsg_msg_t *msg) 1901 { 1902 kdmsg_iocom_t *iocom = msg->state->iocom; 1903 1904 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 1905 kdmsg_msg_write_locked(iocom, msg); 1906 lockmgr(&iocom->msglk, LK_RELEASE); 1907 } 1908 1909 static void 1910 kdmsg_msg_write_locked(kdmsg_iocom_t *iocom, kdmsg_msg_t *msg) 1911 { 1912 kdmsg_state_t *state; 1913 1914 if (msg->state) { 1915 /* 1916 * Continuance or termination of existing transaction. 1917 * The transaction could have been initiated by either end. 1918 * 1919 * (Function callback and aux data for the receive side can 1920 * be replaced or left alone). 1921 */ 1922 state = msg->state; 1923 msg->any.head.msgid = state->msgid; 1924 } else { 1925 /* 1926 * One-off message (always uses msgid 0 to distinguish 1927 * between a possibly lost in-transaction message due to 1928 * competing aborts and a real one-off message?) 1929 */ 1930 state = NULL; 1931 msg->any.head.msgid = 0; 1932 } 1933 1934 #if 0 1935 /* 1936 * XXX removed - don't make this a panic, allow the state checks 1937 * below to catch the situation. 1938 * 1939 * This flag is not set until after the tx thread has drained 1940 * the tx msgq and simulated responses. After that point the 1941 * txthread is dead and can no longer simulate responses. 1942 * 1943 * Device drivers should never try to send a message once this 1944 * flag is set. They should have detected (through the state 1945 * closures) that the link is in trouble. 1946 */ 1947 if (iocom->flags & KDMSG_IOCOMF_EXITNOACC) { 1948 lockmgr(&iocom->msglk, LK_RELEASE); 1949 panic("kdmsg_msg_write: Attempt to write message to " 1950 "terminated iocom\n"); 1951 } 1952 #endif 1953 1954 /* 1955 * For stateful messages, if the circuit is dead or dying we have 1956 * to abort the potentially newly-created state and discard the 1957 * message. 1958 * 1959 * - We must discard the message because the other end will not 1960 * be expecting any more messages over the dead or dying circuit 1961 * and might not be able to receive them. 1962 * 1963 * - We abort the state by simulating a failure to generate a fake 1964 * incoming DELETE. This will trigger the state callback and allow 1965 * the device to clean things up and reply, closing the outgoing 1966 * direction and allowing the state to be freed. 1967 * 1968 * This situation occurs quite often, particularly as SPANs stabilize. 1969 * End-points must do the right thing. 1970 */ 1971 if (state) { 1972 KKASSERT((state->txcmd & DMSGF_DELETE) == 0); 1973 if (state->flags & KDMSG_STATE_DYING) { 1974 #if 0 1975 if ((state->flags & KDMSG_STATE_DYING) || 1976 (state->parent->txcmd & DMSGF_DELETE) || 1977 (state->parent->flags & KDMSG_STATE_DYING)) { 1978 #endif 1979 kdio_printf(iocom, 4, 1980 "kdmsg_msg_write: Write to dying circuit " 1981 "state=%p " 1982 "ptxcmd=%08x prxcmd=%08x flags=%08x\n", 1983 state, 1984 state->parent->rxcmd, 1985 state->parent->txcmd, 1986 state->parent->flags); 1987 kdmsg_state_hold(state); 1988 kdmsg_state_msgtx(msg); 1989 kdmsg_state_cleanuptx(msg); 1990 kdmsg_state_drop(state); 1991 return; 1992 } 1993 } 1994 1995 /* 1996 * Finish up the msg fields. Note that msg->aux_size and the 1997 * aux_bytes stored in the message header represent the unaligned 1998 * (actual) bytes of data, but the buffer is sized to an aligned 1999 * size and the CRC is generated over the aligned length. 2000 */ 2001 msg->any.head.salt = /* (random << 8) | */ (iocom->msg_seq & 255); 2002 ++iocom->msg_seq; 2003 2004 if (msg->aux_data && msg->aux_size) { 2005 uint32_t abytes = DMSG_DOALIGN(msg->aux_size); 2006 2007 msg->any.head.aux_bytes = msg->aux_size; 2008 msg->any.head.aux_crc = iscsi_crc32(msg->aux_data, abytes); 2009 } 2010 msg->any.head.hdr_crc = 0; 2011 msg->any.head.hdr_crc = iscsi_crc32(msg->any.buf, msg->hdr_size); 2012 2013 TAILQ_INSERT_TAIL(&iocom->msgq, msg, qentry); 2014 2015 if (iocom->msg_ctl & KDMSG_CLUSTERCTL_SLEEPING) { 2016 atomic_clear_int(&iocom->msg_ctl, 2017 KDMSG_CLUSTERCTL_SLEEPING); 2018 wakeup(&iocom->msg_ctl); 2019 } 2020 } 2021 2022 /* 2023 * Reply to a message and terminate our side of the transaction. 2024 * 2025 * If msg->state is non-NULL we are replying to a one-way message. 2026 */ 2027 void 2028 kdmsg_msg_reply(kdmsg_msg_t *msg, uint32_t error) 2029 { 2030 kdmsg_state_t *state = msg->state; 2031 kdmsg_msg_t *nmsg; 2032 uint32_t cmd; 2033 2034 /* 2035 * Reply with a simple error code and terminate the transaction. 2036 */ 2037 cmd = DMSG_LNK_ERROR; 2038 2039 /* 2040 * Check if our direction has even been initiated yet, set CREATE. 2041 * 2042 * Check what direction this is (command or reply direction). Note 2043 * that txcmd might not have been initiated yet. 2044 * 2045 * If our direction has already been closed we just return without 2046 * doing anything. 2047 */ 2048 if (state != &state->iocom->state0) { 2049 if (state->txcmd & DMSGF_DELETE) 2050 return; 2051 if ((state->txcmd & DMSGF_CREATE) == 0) 2052 cmd |= DMSGF_CREATE; 2053 if (state->txcmd & DMSGF_REPLY) 2054 cmd |= DMSGF_REPLY; 2055 cmd |= DMSGF_DELETE; 2056 } else { 2057 if ((msg->any.head.cmd & DMSGF_REPLY) == 0) 2058 cmd |= DMSGF_REPLY; 2059 } 2060 2061 nmsg = kdmsg_msg_alloc(state, cmd, NULL, NULL); 2062 nmsg->any.head.error = error; 2063 kdmsg_msg_write(nmsg); 2064 } 2065 2066 /* 2067 * Reply to a message and continue our side of the transaction. 2068 * 2069 * If msg->state is non-NULL we are replying to a one-way message and this 2070 * function degenerates into the same as kdmsg_msg_reply(). 2071 */ 2072 void 2073 kdmsg_msg_result(kdmsg_msg_t *msg, uint32_t error) 2074 { 2075 kdmsg_state_t *state = msg->state; 2076 kdmsg_msg_t *nmsg; 2077 uint32_t cmd; 2078 2079 /* 2080 * Return a simple result code, do NOT terminate the transaction. 2081 */ 2082 cmd = DMSG_LNK_ERROR; 2083 2084 /* 2085 * Check if our direction has even been initiated yet, set CREATE. 2086 * 2087 * Check what direction this is (command or reply direction). Note 2088 * that txcmd might not have been initiated yet. 2089 * 2090 * If our direction has already been closed we just return without 2091 * doing anything. 2092 */ 2093 if (state != &state->iocom->state0) { 2094 if (state->txcmd & DMSGF_DELETE) 2095 return; 2096 if ((state->txcmd & DMSGF_CREATE) == 0) 2097 cmd |= DMSGF_CREATE; 2098 if (state->txcmd & DMSGF_REPLY) 2099 cmd |= DMSGF_REPLY; 2100 /* continuing transaction, do not set MSGF_DELETE */ 2101 } else { 2102 if ((msg->any.head.cmd & DMSGF_REPLY) == 0) 2103 cmd |= DMSGF_REPLY; 2104 } 2105 2106 nmsg = kdmsg_msg_alloc(state, cmd, NULL, NULL); 2107 nmsg->any.head.error = error; 2108 kdmsg_msg_write(nmsg); 2109 } 2110 2111 /* 2112 * Reply to a message and terminate our side of the transaction. 2113 * 2114 * If msg->state is non-NULL we are replying to a one-way message. 2115 */ 2116 void 2117 kdmsg_state_reply(kdmsg_state_t *state, uint32_t error) 2118 { 2119 kdmsg_msg_t *nmsg; 2120 uint32_t cmd; 2121 2122 /* 2123 * Reply with a simple error code and terminate the transaction. 2124 */ 2125 cmd = DMSG_LNK_ERROR; 2126 2127 /* 2128 * Check if our direction has even been initiated yet, set CREATE. 2129 * 2130 * Check what direction this is (command or reply direction). Note 2131 * that txcmd might not have been initiated yet. 2132 * 2133 * If our direction has already been closed we just return without 2134 * doing anything. 2135 */ 2136 KKASSERT(state); 2137 if (state->txcmd & DMSGF_DELETE) 2138 return; 2139 if ((state->txcmd & DMSGF_CREATE) == 0) 2140 cmd |= DMSGF_CREATE; 2141 if (state->txcmd & DMSGF_REPLY) 2142 cmd |= DMSGF_REPLY; 2143 cmd |= DMSGF_DELETE; 2144 2145 nmsg = kdmsg_msg_alloc(state, cmd, NULL, NULL); 2146 nmsg->any.head.error = error; 2147 kdmsg_msg_write(nmsg); 2148 } 2149 2150 /* 2151 * Reply to a message and continue our side of the transaction. 2152 * 2153 * If msg->state is non-NULL we are replying to a one-way message and this 2154 * function degenerates into the same as kdmsg_msg_reply(). 2155 */ 2156 void 2157 kdmsg_state_result(kdmsg_state_t *state, uint32_t error) 2158 { 2159 kdmsg_msg_t *nmsg; 2160 uint32_t cmd; 2161 2162 /* 2163 * Return a simple result code, do NOT terminate the transaction. 2164 */ 2165 cmd = DMSG_LNK_ERROR; 2166 2167 /* 2168 * Check if our direction has even been initiated yet, set CREATE. 2169 * 2170 * Check what direction this is (command or reply direction). Note 2171 * that txcmd might not have been initiated yet. 2172 * 2173 * If our direction has already been closed we just return without 2174 * doing anything. 2175 */ 2176 KKASSERT(state); 2177 if (state->txcmd & DMSGF_DELETE) 2178 return; 2179 if ((state->txcmd & DMSGF_CREATE) == 0) 2180 cmd |= DMSGF_CREATE; 2181 if (state->txcmd & DMSGF_REPLY) 2182 cmd |= DMSGF_REPLY; 2183 /* continuing transaction, do not set MSGF_DELETE */ 2184 2185 nmsg = kdmsg_msg_alloc(state, cmd, NULL, NULL); 2186 nmsg->any.head.error = error; 2187 kdmsg_msg_write(nmsg); 2188 } 2189