1 /*- 2 * Copyright (c) 2012 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * TODO: txcmd CREATE state is deferred by txmsgq, need to calculate 36 * a streaming response. See subr_diskiocom()'s diskiodone(). 37 */ 38 #include <sys/param.h> 39 #include <sys/types.h> 40 #include <sys/kernel.h> 41 #include <sys/conf.h> 42 #include <sys/systm.h> 43 #include <sys/queue.h> 44 #include <sys/tree.h> 45 #include <sys/malloc.h> 46 #include <sys/mount.h> 47 #include <sys/socket.h> 48 #include <sys/vnode.h> 49 #include <sys/file.h> 50 #include <sys/proc.h> 51 #include <sys/priv.h> 52 #include <sys/thread.h> 53 #include <sys/globaldata.h> 54 #include <sys/limits.h> 55 56 #include <sys/dmsg.h> 57 58 RB_GENERATE(kdmsg_state_tree, kdmsg_state, rbnode, kdmsg_state_cmp); 59 60 static int kdmsg_msg_receive_handling(kdmsg_msg_t *msg); 61 static int kdmsg_state_msgrx(kdmsg_msg_t *msg); 62 static int kdmsg_state_msgtx(kdmsg_msg_t *msg); 63 static void kdmsg_state_cleanuprx(kdmsg_msg_t *msg); 64 static void kdmsg_state_cleanuptx(kdmsg_msg_t *msg); 65 static void kdmsg_state_abort(kdmsg_state_t *state); 66 static void kdmsg_state_free(kdmsg_state_t *state); 67 68 static void kdmsg_iocom_thread_rd(void *arg); 69 static void kdmsg_iocom_thread_wr(void *arg); 70 static int kdmsg_autorxmsg(kdmsg_msg_t *msg); 71 72 /*static struct lwkt_token kdmsg_token = LWKT_TOKEN_INITIALIZER(kdmsg_token);*/ 73 74 /* 75 * Initialize the roll-up communications structure for a network 76 * messaging session. This function does not install the socket. 77 */ 78 void 79 kdmsg_iocom_init(kdmsg_iocom_t *iocom, void *handle, uint32_t flags, 80 struct malloc_type *mmsg, 81 int (*rcvmsg)(kdmsg_msg_t *msg)) 82 { 83 bzero(iocom, sizeof(*iocom)); 84 iocom->handle = handle; 85 iocom->mmsg = mmsg; 86 iocom->rcvmsg = rcvmsg; 87 iocom->flags = flags; 88 lockinit(&iocom->msglk, "h2msg", 0, 0); 89 TAILQ_INIT(&iocom->msgq); 90 RB_INIT(&iocom->staterd_tree); 91 RB_INIT(&iocom->statewr_tree); 92 93 iocom->state0.iocom = iocom; 94 iocom->state0.parent = &iocom->state0; 95 TAILQ_INIT(&iocom->state0.subq); 96 } 97 98 /* 99 * [Re]connect using the passed file pointer. The caller must ref the 100 * fp for us. We own that ref now. 101 */ 102 void 103 kdmsg_iocom_reconnect(kdmsg_iocom_t *iocom, struct file *fp, 104 const char *subsysname) 105 { 106 /* 107 * Destroy the current connection 108 */ 109 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 110 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL); 111 while (iocom->msgrd_td || iocom->msgwr_td) { 112 wakeup(&iocom->msg_ctl); 113 lksleep(iocom, &iocom->msglk, 0, "clstrkl", hz); 114 } 115 116 /* 117 * Drop communications descriptor 118 */ 119 if (iocom->msg_fp) { 120 fdrop(iocom->msg_fp); 121 iocom->msg_fp = NULL; 122 } 123 124 /* 125 * Setup new communications descriptor 126 */ 127 iocom->msg_ctl = 0; 128 iocom->msg_fp = fp; 129 iocom->msg_seq = 0; 130 iocom->flags &= ~KDMSG_IOCOMF_EXITNOACC; 131 132 lwkt_create(kdmsg_iocom_thread_rd, iocom, &iocom->msgrd_td, 133 NULL, 0, -1, "%s-msgrd", subsysname); 134 lwkt_create(kdmsg_iocom_thread_wr, iocom, &iocom->msgwr_td, 135 NULL, 0, -1, "%s-msgwr", subsysname); 136 lockmgr(&iocom->msglk, LK_RELEASE); 137 } 138 139 /* 140 * Caller sets up iocom->auto_lnk_conn and iocom->auto_lnk_span, then calls 141 * this function to handle the state machine for LNK_CONN and LNK_SPAN. 142 */ 143 static int kdmsg_lnk_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg); 144 static int kdmsg_lnk_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg); 145 146 void 147 kdmsg_iocom_autoinitiate(kdmsg_iocom_t *iocom, 148 void (*auto_callback)(kdmsg_msg_t *msg)) 149 { 150 kdmsg_msg_t *msg; 151 152 iocom->auto_callback = auto_callback; 153 154 msg = kdmsg_msg_alloc(&iocom->state0, 155 DMSG_LNK_CONN | DMSGF_CREATE, 156 kdmsg_lnk_conn_reply, NULL); 157 iocom->auto_lnk_conn.head = msg->any.head; 158 msg->any.lnk_conn = iocom->auto_lnk_conn; 159 iocom->conn_state = msg->state; 160 kdmsg_msg_write(msg); 161 } 162 163 static 164 int 165 kdmsg_lnk_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg) 166 { 167 kdmsg_iocom_t *iocom = state->iocom; 168 kdmsg_msg_t *rmsg; 169 170 /* 171 * Upon receipt of the LNK_CONN acknowledgement initiate an 172 * automatic SPAN if we were asked to. Used by e.g. xdisk, but 173 * not used by HAMMER2 which must manage more than one transmitted 174 * SPAN. 175 */ 176 if ((msg->any.head.cmd & DMSGF_CREATE) && 177 (iocom->flags & KDMSG_IOCOMF_AUTOTXSPAN)) { 178 rmsg = kdmsg_msg_alloc(&iocom->state0, 179 DMSG_LNK_SPAN | DMSGF_CREATE, 180 kdmsg_lnk_span_reply, NULL); 181 iocom->auto_lnk_span.head = rmsg->any.head; 182 rmsg->any.lnk_span = iocom->auto_lnk_span; 183 kdmsg_msg_write(rmsg); 184 } 185 186 /* 187 * Process shim after the CONN is acknowledged and before the CONN 188 * transaction is deleted. For deletions this gives device drivers 189 * the ability to interlock new operations on the circuit before 190 * it becomes illegal and panics. 191 */ 192 if (iocom->auto_callback) 193 iocom->auto_callback(msg); 194 195 if ((state->txcmd & DMSGF_DELETE) == 0 && 196 (msg->any.head.cmd & DMSGF_DELETE)) { 197 iocom->conn_state = NULL; 198 kdmsg_msg_reply(msg, 0); 199 } 200 201 return (0); 202 } 203 204 static 205 int 206 kdmsg_lnk_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg) 207 { 208 /* 209 * Be sure to process shim before terminating the SPAN 210 * transaction. Gives device drivers the ability to 211 * interlock new operations on the circuit before it 212 * becomes illegal and panics. 213 */ 214 if (state->iocom->auto_callback) 215 state->iocom->auto_callback(msg); 216 217 if ((state->txcmd & DMSGF_DELETE) == 0 && 218 (msg->any.head.cmd & DMSGF_DELETE)) { 219 kdmsg_msg_reply(msg, 0); 220 } 221 return (0); 222 } 223 224 /* 225 * Disconnect and clean up 226 */ 227 void 228 kdmsg_iocom_uninit(kdmsg_iocom_t *iocom) 229 { 230 kdmsg_state_t *state; 231 232 /* 233 * Ask the cluster controller to go away 234 */ 235 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 236 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL); 237 238 while (iocom->msgrd_td || iocom->msgwr_td) { 239 wakeup(&iocom->msg_ctl); 240 lksleep(iocom, &iocom->msglk, 0, "clstrkl", hz); 241 } 242 243 /* 244 * Cleanup caches 245 */ 246 if ((state = iocom->freerd_state) != NULL) { 247 iocom->freerd_state = NULL; 248 kdmsg_state_free(state); 249 } 250 251 if ((state = iocom->freewr_state) != NULL) { 252 iocom->freewr_state = NULL; 253 kdmsg_state_free(state); 254 } 255 256 /* 257 * Drop communications descriptor 258 */ 259 if (iocom->msg_fp) { 260 fdrop(iocom->msg_fp); 261 iocom->msg_fp = NULL; 262 } 263 lockmgr(&iocom->msglk, LK_RELEASE); 264 } 265 266 /* 267 * Cluster controller thread. Perform messaging functions. We have one 268 * thread for the reader and one for the writer. The writer handles 269 * shutdown requests (which should break the reader thread). 270 */ 271 static 272 void 273 kdmsg_iocom_thread_rd(void *arg) 274 { 275 kdmsg_iocom_t *iocom = arg; 276 dmsg_hdr_t hdr; 277 kdmsg_msg_t *msg = NULL; 278 size_t hbytes; 279 size_t abytes; 280 int error = 0; 281 282 while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILL) == 0) { 283 /* 284 * Retrieve the message from the pipe or socket. 285 */ 286 error = fp_read(iocom->msg_fp, &hdr, sizeof(hdr), 287 NULL, 1, UIO_SYSSPACE); 288 if (error) 289 break; 290 if (hdr.magic != DMSG_HDR_MAGIC) { 291 kprintf("kdmsg: bad magic: %04x\n", hdr.magic); 292 error = EINVAL; 293 break; 294 } 295 hbytes = (hdr.cmd & DMSGF_SIZE) * DMSG_ALIGN; 296 if (hbytes < sizeof(hdr) || hbytes > DMSG_AUX_MAX) { 297 kprintf("kdmsg: bad header size %zd\n", hbytes); 298 error = EINVAL; 299 break; 300 } 301 302 /* XXX messy: mask cmd to avoid allocating state */ 303 msg = kdmsg_msg_alloc(&iocom->state0, 304 hdr.cmd & DMSGF_BASECMDMASK, 305 NULL, NULL); 306 msg->any.head = hdr; 307 msg->hdr_size = hbytes; 308 if (hbytes > sizeof(hdr)) { 309 error = fp_read(iocom->msg_fp, &msg->any.head + 1, 310 hbytes - sizeof(hdr), 311 NULL, 1, UIO_SYSSPACE); 312 if (error) { 313 kprintf("kdmsg: short msg received\n"); 314 error = EINVAL; 315 break; 316 } 317 } 318 msg->aux_size = hdr.aux_bytes; 319 if (msg->aux_size > DMSG_AUX_MAX) { 320 kprintf("kdmsg: illegal msg payload size %zd\n", 321 msg->aux_size); 322 error = EINVAL; 323 break; 324 } 325 if (msg->aux_size) { 326 abytes = DMSG_DOALIGN(msg->aux_size); 327 msg->aux_data = kmalloc(abytes, iocom->mmsg, M_WAITOK); 328 msg->flags |= KDMSG_FLAG_AUXALLOC; 329 error = fp_read(iocom->msg_fp, msg->aux_data, 330 abytes, NULL, 1, UIO_SYSSPACE); 331 if (error) { 332 kprintf("kdmsg: short msg payload received\n"); 333 break; 334 } 335 } 336 337 error = kdmsg_msg_receive_handling(msg); 338 msg = NULL; 339 } 340 341 if (error) 342 kprintf("kdmsg: read failed error %d\n", error); 343 344 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 345 if (msg) 346 kdmsg_msg_free(msg); 347 348 /* 349 * Shutdown the socket before waiting for the transmit side. 350 * 351 * If we are dying due to e.g. a socket disconnect verses being 352 * killed explicity we have to set KILL in order to kick the tx 353 * side when it might not have any other work to do. KILL might 354 * already be set if we are in an unmount or reconnect. 355 */ 356 fp_shutdown(iocom->msg_fp, SHUT_RDWR); 357 358 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILL); 359 wakeup(&iocom->msg_ctl); 360 361 /* 362 * Wait for the transmit side to drain remaining messages 363 * before cleaning up the rx state. The transmit side will 364 * set KILLTX and wait for the rx side to completely finish 365 * (set msgrd_td to NULL) before cleaning up any remaining 366 * tx states. 367 */ 368 lockmgr(&iocom->msglk, LK_RELEASE); 369 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLRX); 370 wakeup(&iocom->msg_ctl); 371 while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILLTX) == 0) { 372 wakeup(&iocom->msg_ctl); 373 tsleep(iocom, 0, "clstrkw", hz); 374 } 375 376 iocom->msgrd_td = NULL; 377 378 /* 379 * iocom can be ripped out from under us at this point but 380 * wakeup() is safe. 381 */ 382 wakeup(iocom); 383 lwkt_exit(); 384 } 385 386 static 387 void 388 kdmsg_iocom_thread_wr(void *arg) 389 { 390 kdmsg_iocom_t *iocom = arg; 391 kdmsg_msg_t *msg; 392 kdmsg_state_t *state; 393 ssize_t res; 394 size_t abytes; 395 int error = 0; 396 int retries = 20; 397 398 /* 399 * Transmit loop 400 */ 401 msg = NULL; 402 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 403 404 while ((iocom->msg_ctl & KDMSG_CLUSTERCTL_KILL) == 0 && error == 0) { 405 /* 406 * Sleep if no messages pending. Interlock with flag while 407 * holding msglk. 408 */ 409 if (TAILQ_EMPTY(&iocom->msgq)) { 410 atomic_set_int(&iocom->msg_ctl, 411 KDMSG_CLUSTERCTL_SLEEPING); 412 lksleep(&iocom->msg_ctl, &iocom->msglk, 0, "msgwr", hz); 413 atomic_clear_int(&iocom->msg_ctl, 414 KDMSG_CLUSTERCTL_SLEEPING); 415 } 416 417 while ((msg = TAILQ_FIRST(&iocom->msgq)) != NULL) { 418 /* 419 * Remove msg from the transmit queue and do 420 * persist and half-closed state handling. 421 */ 422 TAILQ_REMOVE(&iocom->msgq, msg, qentry); 423 lockmgr(&iocom->msglk, LK_RELEASE); 424 425 error = kdmsg_state_msgtx(msg); 426 if (error == EALREADY) { 427 error = 0; 428 kdmsg_msg_free(msg); 429 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 430 continue; 431 } 432 if (error) { 433 kdmsg_msg_free(msg); 434 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 435 break; 436 } 437 438 /* 439 * Dump the message to the pipe or socket. 440 * 441 * We have to clean up the message as if the transmit 442 * succeeded even if it failed. 443 */ 444 error = fp_write(iocom->msg_fp, &msg->any, 445 msg->hdr_size, &res, UIO_SYSSPACE); 446 if (error || res != msg->hdr_size) { 447 if (error == 0) 448 error = EINVAL; 449 kdmsg_state_cleanuptx(msg); 450 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 451 break; 452 } 453 if (msg->aux_size) { 454 abytes = DMSG_DOALIGN(msg->aux_size); 455 error = fp_write(iocom->msg_fp, 456 msg->aux_data, abytes, 457 &res, UIO_SYSSPACE); 458 if (error || res != abytes) { 459 if (error == 0) 460 error = EINVAL; 461 kdmsg_state_cleanuptx(msg); 462 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 463 break; 464 } 465 } 466 kdmsg_state_cleanuptx(msg); 467 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 468 } 469 } 470 471 /* 472 * Cleanup messages pending transmission and release msgq lock. 473 */ 474 if (error) 475 kprintf("kdmsg: write failed error %d\n", error); 476 kprintf("thread_wr: Terminating iocom\n"); 477 478 /* 479 * Shutdown the socket. This will cause the rx thread to get an 480 * EOF and ensure that both threads get to a termination state. 481 */ 482 fp_shutdown(iocom->msg_fp, SHUT_RDWR); 483 484 /* 485 * Set KILLTX (which the rx side waits for), then wait for the RX 486 * side to completely finish before we clean out any remaining 487 * command states. 488 */ 489 lockmgr(&iocom->msglk, LK_RELEASE); 490 atomic_set_int(&iocom->msg_ctl, KDMSG_CLUSTERCTL_KILLTX); 491 wakeup(&iocom->msg_ctl); 492 while (iocom->msgrd_td) { 493 wakeup(&iocom->msg_ctl); 494 tsleep(iocom, 0, "clstrkw", hz); 495 } 496 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 497 498 /* 499 * Simulate received MSGF_DELETE's for any remaining states. 500 * (For remote masters). 501 * 502 * Drain the message queue to handle any device initiated writes 503 * due to state callbacks. 504 */ 505 cleanuprd: 506 RB_FOREACH(state, kdmsg_state_tree, &iocom->staterd_tree) 507 atomic_set_int(&state->flags, KDMSG_STATE_DYING); 508 RB_FOREACH(state, kdmsg_state_tree, &iocom->statewr_tree) 509 atomic_set_int(&state->flags, KDMSG_STATE_DYING); 510 kdmsg_drain_msgq(iocom); 511 RB_FOREACH(state, kdmsg_state_tree, &iocom->staterd_tree) { 512 if ((state->rxcmd & DMSGF_DELETE) == 0) { 513 lockmgr(&iocom->msglk, LK_RELEASE); 514 kdmsg_state_abort(state); 515 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 516 goto cleanuprd; 517 } 518 } 519 520 /* 521 * Simulate received MSGF_DELETE's for any remaining states. 522 * (For local masters). 523 */ 524 kdmsg_drain_msgq(iocom); 525 RB_FOREACH(state, kdmsg_state_tree, &iocom->statewr_tree) { 526 if ((state->rxcmd & DMSGF_DELETE) == 0) { 527 lockmgr(&iocom->msglk, LK_RELEASE); 528 kdmsg_state_abort(state); 529 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 530 goto cleanuprd; 531 } 532 } 533 534 /* 535 * Retry until all work is done 536 */ 537 if (--retries == 0) 538 panic("kdmsg: comm thread shutdown couldn't drain"); 539 if (TAILQ_FIRST(&iocom->msgq) || 540 RB_ROOT(&iocom->staterd_tree) || 541 RB_ROOT(&iocom->statewr_tree)) { 542 goto cleanuprd; 543 } 544 iocom->flags |= KDMSG_IOCOMF_EXITNOACC; 545 546 lockmgr(&iocom->msglk, LK_RELEASE); 547 548 /* 549 * The state trees had better be empty now 550 */ 551 KKASSERT(RB_EMPTY(&iocom->staterd_tree)); 552 KKASSERT(RB_EMPTY(&iocom->statewr_tree)); 553 KKASSERT(iocom->conn_state == NULL); 554 555 if (iocom->exit_func) { 556 /* 557 * iocom is invalid after we call the exit function. 558 */ 559 iocom->msgwr_td = NULL; 560 iocom->exit_func(iocom); 561 } else { 562 /* 563 * iocom can be ripped out from under us once msgwr_td is 564 * set to NULL. The wakeup is safe. 565 */ 566 iocom->msgwr_td = NULL; 567 wakeup(iocom); 568 } 569 lwkt_exit(); 570 } 571 572 /* 573 * This cleans out the pending transmit message queue, adjusting any 574 * persistent states properly in the process. 575 * 576 * Caller must hold pmp->iocom.msglk 577 */ 578 void 579 kdmsg_drain_msgq(kdmsg_iocom_t *iocom) 580 { 581 kdmsg_msg_t *msg; 582 583 /* 584 * Clean out our pending transmit queue, executing the 585 * appropriate state adjustments. If this tries to open 586 * any new outgoing transactions we have to loop up and 587 * clean them out. 588 */ 589 while ((msg = TAILQ_FIRST(&iocom->msgq)) != NULL) { 590 TAILQ_REMOVE(&iocom->msgq, msg, qentry); 591 lockmgr(&iocom->msglk, LK_RELEASE); 592 if (kdmsg_state_msgtx(msg)) 593 kdmsg_msg_free(msg); 594 else 595 kdmsg_state_cleanuptx(msg); 596 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 597 } 598 } 599 600 /* 601 * Do all processing required to handle a freshly received message 602 * after its low level header has been validated. 603 */ 604 static 605 int 606 kdmsg_msg_receive_handling(kdmsg_msg_t *msg) 607 { 608 kdmsg_iocom_t *iocom = msg->state->iocom; 609 int error; 610 611 /* 612 * State machine tracking, state assignment for msg, 613 * returns error and discard status. Errors are fatal 614 * to the connection except for EALREADY which forces 615 * a discard without execution. 616 */ 617 error = kdmsg_state_msgrx(msg); 618 if (error) { 619 /* 620 * Raw protocol or connection error 621 */ 622 kdmsg_msg_free(msg); 623 if (error == EALREADY) 624 error = 0; 625 } else if (msg->state && msg->state->func) { 626 /* 627 * Message related to state which already has a 628 * handling function installed for it. 629 */ 630 error = msg->state->func(msg->state, msg); 631 kdmsg_state_cleanuprx(msg); 632 } else if (iocom->flags & KDMSG_IOCOMF_AUTOANY) { 633 error = kdmsg_autorxmsg(msg); 634 kdmsg_state_cleanuprx(msg); 635 } else { 636 error = iocom->rcvmsg(msg); 637 kdmsg_state_cleanuprx(msg); 638 } 639 return error; 640 } 641 642 /* 643 * Process state tracking for a message after reception, prior to 644 * execution. 645 * 646 * Called with msglk held and the msg dequeued. 647 * 648 * All messages are called with dummy state and return actual state. 649 * (One-off messages often just return the same dummy state). 650 * 651 * May request that caller discard the message by setting *discardp to 1. 652 * The returned state is not used in this case and is allowed to be NULL. 653 * 654 * -- 655 * 656 * These routines handle persistent and command/reply message state via the 657 * CREATE and DELETE flags. The first message in a command or reply sequence 658 * sets CREATE, the last message in a command or reply sequence sets DELETE. 659 * 660 * There can be any number of intermediate messages belonging to the same 661 * sequence sent inbetween the CREATE message and the DELETE message, 662 * which set neither flag. This represents a streaming command or reply. 663 * 664 * Any command message received with CREATE set expects a reply sequence to 665 * be returned. Reply sequences work the same as command sequences except the 666 * REPLY bit is also sent. Both the command side and reply side can 667 * degenerate into a single message with both CREATE and DELETE set. Note 668 * that one side can be streaming and the other side not, or neither, or both. 669 * 670 * The msgid is unique for the initiator. That is, two sides sending a new 671 * message can use the same msgid without colliding. 672 * 673 * -- 674 * 675 * ABORT sequences work by setting the ABORT flag along with normal message 676 * state. However, ABORTs can also be sent on half-closed messages, that is 677 * even if the command or reply side has already sent a DELETE, as long as 678 * the message has not been fully closed it can still send an ABORT+DELETE 679 * to terminate the half-closed message state. 680 * 681 * Since ABORT+DELETEs can race we silently discard ABORT's for message 682 * state which has already been fully closed. REPLY+ABORT+DELETEs can 683 * also race, and in this situation the other side might have already 684 * initiated a new unrelated command with the same message id. Since 685 * the abort has not set the CREATE flag the situation can be detected 686 * and the message will also be discarded. 687 * 688 * Non-blocking requests can be initiated with ABORT+CREATE[+DELETE]. 689 * The ABORT request is essentially integrated into the command instead 690 * of being sent later on. In this situation the command implementation 691 * detects that CREATE and ABORT are both set (vs ABORT alone) and can 692 * special-case non-blocking operation for the command. 693 * 694 * NOTE! Messages with ABORT set without CREATE or DELETE are considered 695 * to be mid-stream aborts for command/reply sequences. ABORTs on 696 * one-way messages are not supported. 697 * 698 * NOTE! If a command sequence does not support aborts the ABORT flag is 699 * simply ignored. 700 * 701 * -- 702 * 703 * One-off messages (no reply expected) are sent with neither CREATE or DELETE 704 * set. One-off messages cannot be aborted and typically aren't processed 705 * by these routines. The REPLY bit can be used to distinguish whether a 706 * one-off message is a command or reply. For example, one-off replies 707 * will typically just contain status updates. 708 */ 709 static 710 int 711 kdmsg_state_msgrx(kdmsg_msg_t *msg) 712 { 713 kdmsg_iocom_t *iocom = msg->state->iocom; 714 kdmsg_state_t *state; 715 kdmsg_state_t *pstate; 716 kdmsg_state_t sdummy; 717 int error; 718 719 /* 720 * Make sure a state structure is ready to go in case we need a new 721 * one. This is the only routine which uses freerd_state so no 722 * races are possible. 723 */ 724 if ((state = iocom->freerd_state) == NULL) { 725 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO); 726 state->flags = KDMSG_STATE_DYNAMIC; 727 state->iocom = iocom; 728 TAILQ_INIT(&state->subq); 729 iocom->freerd_state = state; 730 } 731 732 /* 733 * Lock RB tree and locate existing persistent state, if any. 734 * 735 * If received msg is a command state is on staterd_tree. 736 * If received msg is a reply state is on statewr_tree. 737 */ 738 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 739 740 sdummy.msgid = msg->any.head.msgid; 741 sdummy.iocom = iocom; 742 if (msg->any.head.cmd & DMSGF_REVTRANS) { 743 state = RB_FIND(kdmsg_state_tree, &iocom->statewr_tree, 744 &sdummy); 745 } else { 746 state = RB_FIND(kdmsg_state_tree, &iocom->staterd_tree, 747 &sdummy); 748 } 749 if (state == NULL) 750 state = &iocom->state0; 751 msg->state = state; 752 753 /* 754 * Short-cut one-off or mid-stream messages. 755 */ 756 if ((msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE | 757 DMSGF_ABORT)) == 0) { 758 error = 0; 759 goto done; 760 } 761 762 /* 763 * Switch on CREATE, DELETE, REPLY, and also handle ABORT from 764 * inside the case statements. 765 */ 766 switch(msg->any.head.cmd & (DMSGF_CREATE|DMSGF_DELETE|DMSGF_REPLY)) { 767 case DMSGF_CREATE: 768 case DMSGF_CREATE | DMSGF_DELETE: 769 /* 770 * New persistant command received. 771 */ 772 if (state != &iocom->state0) { 773 kprintf("kdmsg_state_msgrx: duplicate transaction\n"); 774 error = EINVAL; 775 break; 776 } 777 778 /* 779 * Lookup the circuit. The circuit is an open transaction. 780 * the REVCIRC bit in the message tells us which side 781 * initiated the transaction representing the circuit. 782 */ 783 if (msg->any.head.circuit) { 784 sdummy.msgid = msg->any.head.circuit; 785 786 if (msg->any.head.cmd & DMSGF_REVCIRC) { 787 pstate = RB_FIND(kdmsg_state_tree, 788 &iocom->statewr_tree, 789 &sdummy); 790 } else { 791 pstate = RB_FIND(kdmsg_state_tree, 792 &iocom->staterd_tree, 793 &sdummy); 794 } 795 if (pstate == NULL) { 796 kprintf("kdmsg_state_msgrx: " 797 "missing parent in stacked trans\n"); 798 error = EINVAL; 799 break; 800 } 801 } else { 802 pstate = &iocom->state0; 803 } 804 805 /* 806 * Allocate new state 807 */ 808 state = iocom->freerd_state; 809 iocom->freerd_state = NULL; 810 811 msg->state = state; 812 state->parent = pstate; 813 KKASSERT(state->iocom == iocom); 814 state->flags |= KDMSG_STATE_INSERTED | 815 KDMSG_STATE_OPPOSITE; 816 state->icmd = msg->any.head.cmd & DMSGF_BASECMDMASK; 817 state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE; 818 state->txcmd = DMSGF_REPLY; 819 state->msgid = msg->any.head.msgid; 820 RB_INSERT(kdmsg_state_tree, &iocom->staterd_tree, state); 821 TAILQ_INSERT_TAIL(&pstate->subq, state, entry); 822 error = 0; 823 break; 824 case DMSGF_DELETE: 825 /* 826 * Persistent state is expected but might not exist if an 827 * ABORT+DELETE races the close. 828 */ 829 if (state == &iocom->state0) { 830 if (msg->any.head.cmd & DMSGF_ABORT) { 831 error = EALREADY; 832 } else { 833 kprintf("kdmsg_state_msgrx: " 834 "no state for DELETE\n"); 835 error = EINVAL; 836 } 837 break; 838 } 839 840 /* 841 * Handle another ABORT+DELETE case if the msgid has already 842 * been reused. 843 */ 844 if ((state->rxcmd & DMSGF_CREATE) == 0) { 845 if (msg->any.head.cmd & DMSGF_ABORT) { 846 error = EALREADY; 847 } else { 848 kprintf("kdmsg_state_msgrx: " 849 "state reused for DELETE\n"); 850 error = EINVAL; 851 } 852 break; 853 } 854 error = 0; 855 break; 856 default: 857 /* 858 * Check for mid-stream ABORT command received, otherwise 859 * allow. 860 */ 861 if (msg->any.head.cmd & DMSGF_ABORT) { 862 if (state == &iocom->state0 || 863 (state->rxcmd & DMSGF_CREATE) == 0) { 864 error = EALREADY; 865 break; 866 } 867 } 868 error = 0; 869 break; 870 case DMSGF_REPLY | DMSGF_CREATE: 871 case DMSGF_REPLY | DMSGF_CREATE | DMSGF_DELETE: 872 /* 873 * When receiving a reply with CREATE set the original 874 * persistent state message should already exist. 875 */ 876 if (state == &iocom->state0) { 877 kprintf("kdmsg_state_msgrx: no state match for " 878 "REPLY cmd=%08x msgid=%016jx\n", 879 msg->any.head.cmd, 880 (intmax_t)msg->any.head.msgid); 881 error = EINVAL; 882 break; 883 } 884 state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE; 885 error = 0; 886 break; 887 case DMSGF_REPLY | DMSGF_DELETE: 888 /* 889 * Received REPLY+ABORT+DELETE in case where msgid has 890 * already been fully closed, ignore the message. 891 */ 892 if (state == &iocom->state0) { 893 if (msg->any.head.cmd & DMSGF_ABORT) { 894 error = EALREADY; 895 } else { 896 kprintf("kdmsg_state_msgrx: no state match " 897 "for REPLY|DELETE\n"); 898 error = EINVAL; 899 } 900 break; 901 } 902 903 /* 904 * Received REPLY+ABORT+DELETE in case where msgid has 905 * already been reused for an unrelated message, 906 * ignore the message. 907 */ 908 if ((state->rxcmd & DMSGF_CREATE) == 0) { 909 if (msg->any.head.cmd & DMSGF_ABORT) { 910 error = EALREADY; 911 } else { 912 kprintf("kdmsg_state_msgrx: state reused " 913 "for REPLY|DELETE\n"); 914 error = EINVAL; 915 } 916 break; 917 } 918 error = 0; 919 break; 920 case DMSGF_REPLY: 921 /* 922 * Check for mid-stream ABORT reply received to sent command. 923 */ 924 if (msg->any.head.cmd & DMSGF_ABORT) { 925 if (state == &iocom->state0 || 926 (state->rxcmd & DMSGF_CREATE) == 0) { 927 error = EALREADY; 928 break; 929 } 930 } 931 error = 0; 932 break; 933 } 934 935 /* 936 * Calculate the easy-switch() transactional command. Represents 937 * the outer-transaction command for any transaction-create or 938 * transaction-delete, and the inner message command for any 939 * non-transaction or inside-transaction command. tcmd will be 940 * set to 0 if the message state is illegal. 941 * 942 * The two can be told apart because outer-transaction commands 943 * always have a DMSGF_CREATE and/or DMSGF_DELETE flag. 944 */ 945 done: 946 lockmgr(&iocom->msglk, LK_RELEASE); 947 948 if (msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE)) { 949 if (state != &iocom->state0) { 950 msg->tcmd = (msg->state->icmd & DMSGF_BASECMDMASK) | 951 (msg->any.head.cmd & (DMSGF_CREATE | 952 DMSGF_DELETE | 953 DMSGF_REPLY)); 954 } else { 955 msg->tcmd = 0; 956 } 957 } else { 958 msg->tcmd = msg->any.head.cmd & DMSGF_CMDSWMASK; 959 } 960 return (error); 961 } 962 963 /* 964 * Called instead of iocom->rcvmsg() if any of the AUTO flags are set. 965 * This routine must call iocom->rcvmsg() for anything not automatically 966 * handled. 967 */ 968 static int 969 kdmsg_autorxmsg(kdmsg_msg_t *msg) 970 { 971 kdmsg_iocom_t *iocom = msg->state->iocom; 972 int error = 0; 973 uint32_t cmd; 974 975 /* 976 * Main switch processes transaction create/delete sequences only. 977 * Use icmd (DELETEs use DMSG_LNK_ERROR 978 * 979 * NOTE: If processing in-transaction messages you generally want 980 * an inner switch on msg->any.head.cmd. 981 */ 982 if (msg->state) { 983 cmd = (msg->state->icmd & DMSGF_BASECMDMASK) | 984 (msg->any.head.cmd & (DMSGF_CREATE | 985 DMSGF_DELETE | 986 DMSGF_REPLY)); 987 } else { 988 cmd = 0; 989 } 990 991 switch(cmd) { 992 case DMSG_LNK_CONN | DMSGF_CREATE: 993 case DMSG_LNK_CONN | DMSGF_CREATE | DMSGF_DELETE: 994 /* 995 * Received LNK_CONN transaction. Transmit response and 996 * leave transaction open, which allows the other end to 997 * start to the SPAN protocol. 998 * 999 * Handle shim after acknowledging the CONN. 1000 */ 1001 if ((msg->any.head.cmd & DMSGF_DELETE) == 0) { 1002 if (iocom->flags & KDMSG_IOCOMF_AUTOCONN) { 1003 kdmsg_msg_result(msg, 0); 1004 if (iocom->auto_callback) 1005 iocom->auto_callback(msg); 1006 } else { 1007 error = iocom->rcvmsg(msg); 1008 } 1009 break; 1010 } 1011 /* fall through */ 1012 case DMSG_LNK_CONN | DMSGF_DELETE: 1013 /* 1014 * This message is usually simulated after a link is lost 1015 * to clean up the transaction. 1016 */ 1017 if (iocom->flags & KDMSG_IOCOMF_AUTOCONN) { 1018 if (iocom->auto_callback) 1019 iocom->auto_callback(msg); 1020 kdmsg_msg_reply(msg, 0); 1021 } else { 1022 error = iocom->rcvmsg(msg); 1023 } 1024 break; 1025 case DMSG_LNK_SPAN | DMSGF_CREATE: 1026 case DMSG_LNK_SPAN | DMSGF_CREATE | DMSGF_DELETE: 1027 /* 1028 * Received LNK_SPAN transaction. We do not have to respond 1029 * (except on termination), but we must leave the transaction 1030 * open. 1031 * 1032 * Handle shim after acknowledging the SPAN. 1033 */ 1034 if (iocom->flags & KDMSG_IOCOMF_AUTORXSPAN) { 1035 if ((msg->any.head.cmd & DMSGF_DELETE) == 0) { 1036 if (iocom->auto_callback) 1037 iocom->auto_callback(msg); 1038 break; 1039 } 1040 /* fall through */ 1041 } else { 1042 error = iocom->rcvmsg(msg); 1043 break; 1044 } 1045 /* fall through */ 1046 case DMSG_LNK_SPAN | DMSGF_DELETE: 1047 /* 1048 * Process shims (auto_callback) before cleaning up the 1049 * circuit structure and closing the transactions. Device 1050 * driver should ensure that the circuit is not used after 1051 * the auto_callback() returns. 1052 * 1053 * Handle shim before closing the SPAN transaction. 1054 */ 1055 if (iocom->flags & KDMSG_IOCOMF_AUTORXSPAN) { 1056 if (iocom->auto_callback) 1057 iocom->auto_callback(msg); 1058 kdmsg_msg_reply(msg, 0); 1059 } else { 1060 error = iocom->rcvmsg(msg); 1061 } 1062 break; 1063 default: 1064 /* 1065 * Anything unhandled goes into rcvmsg. 1066 * 1067 * NOTE: Replies to link-level messages initiated by our side 1068 * are handled by the state callback, they are NOT 1069 * handled here. 1070 */ 1071 error = iocom->rcvmsg(msg); 1072 break; 1073 } 1074 return (error); 1075 } 1076 1077 /* 1078 * Post-receive-handling message and state cleanup. This routine is called 1079 * after the state function handling/callback to properly dispose of the 1080 * message and update or dispose of the state. 1081 */ 1082 static 1083 void 1084 kdmsg_state_cleanuprx(kdmsg_msg_t *msg) 1085 { 1086 kdmsg_iocom_t *iocom = msg->state->iocom; 1087 kdmsg_state_t *state; 1088 kdmsg_state_t *pstate; 1089 1090 if ((state = msg->state) == NULL) { 1091 kdmsg_msg_free(msg); 1092 } else if (msg->any.head.cmd & DMSGF_DELETE) { 1093 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 1094 KKASSERT((state->rxcmd & DMSGF_DELETE) == 0); 1095 state->rxcmd |= DMSGF_DELETE; 1096 if (state->txcmd & DMSGF_DELETE) { 1097 KKASSERT(state->flags & KDMSG_STATE_INSERTED); 1098 if (state->rxcmd & DMSGF_REPLY) { 1099 KKASSERT(msg->any.head.cmd & 1100 DMSGF_REPLY); 1101 RB_REMOVE(kdmsg_state_tree, 1102 &iocom->statewr_tree, state); 1103 } else { 1104 KKASSERT((msg->any.head.cmd & 1105 DMSGF_REPLY) == 0); 1106 RB_REMOVE(kdmsg_state_tree, 1107 &iocom->staterd_tree, state); 1108 } 1109 pstate = state->parent; 1110 TAILQ_REMOVE(&pstate->subq, state, entry); 1111 if (pstate != &pstate->iocom->state0 && 1112 TAILQ_EMPTY(&pstate->subq) && 1113 (pstate->flags & KDMSG_STATE_INSERTED) == 0) { 1114 kdmsg_state_free(pstate); 1115 } 1116 state->flags &= ~KDMSG_STATE_INSERTED; 1117 state->parent = NULL; 1118 kdmsg_msg_free(msg); 1119 if (TAILQ_EMPTY(&state->subq)) 1120 kdmsg_state_free(state); 1121 lockmgr(&iocom->msglk, LK_RELEASE); 1122 } else { 1123 kdmsg_msg_free(msg); 1124 lockmgr(&iocom->msglk, LK_RELEASE); 1125 } 1126 } else { 1127 kdmsg_msg_free(msg); 1128 } 1129 } 1130 1131 /* 1132 * Simulate receiving a message which terminates an active transaction 1133 * state. Our simulated received message must set DELETE and may also 1134 * have to set CREATE. It must also ensure that all fields are set such 1135 * that the receive handling code can find the state (kdmsg_state_msgrx()) 1136 * or an endless loop will ensue. 1137 * 1138 * This is used when the other end of the link is dead so the device driver 1139 * gets a completed transaction for all pending states. 1140 */ 1141 static 1142 void 1143 kdmsg_state_abort(kdmsg_state_t *state) 1144 { 1145 kdmsg_msg_t *msg; 1146 1147 /* 1148 * Prevent recursive aborts which could otherwise occur if the 1149 * simulated message reception runs state->func which then turns 1150 * around and tries to reply to a broken circuit when then calls 1151 * the state abort code again. 1152 */ 1153 if (state->flags & KDMSG_STATE_ABORTING) 1154 return; 1155 state->flags |= KDMSG_STATE_ABORTING; 1156 1157 /* 1158 * NOTE: Args to kdmsg_msg_alloc() to avoid dynamic state allocation. 1159 * 1160 * NOTE: We are simulating a received message using our state 1161 * (vs a message generated by the other side using its state), 1162 * so we must invert DMSGF_REVTRANS and DMSGF_REVCIRC. 1163 */ 1164 msg = kdmsg_msg_alloc(state, DMSG_LNK_ERROR, NULL, NULL); 1165 if ((state->rxcmd & DMSGF_CREATE) == 0) 1166 msg->any.head.cmd |= DMSGF_CREATE; 1167 msg->any.head.cmd |= DMSGF_DELETE | (state->rxcmd & DMSGF_REPLY); 1168 msg->any.head.cmd ^= (DMSGF_REVTRANS | DMSGF_REVCIRC); 1169 msg->any.head.error = DMSG_ERR_LOSTLINK; 1170 kdmsg_msg_receive_handling(msg); 1171 } 1172 1173 /* 1174 * Process state tracking for a message prior to transmission. 1175 * 1176 * Called with msglk held and the msg dequeued. Returns non-zero if 1177 * the message is bad and should be deleted by the caller. 1178 * 1179 * One-off messages are usually with dummy state and msg->state may be NULL 1180 * in this situation. 1181 * 1182 * New transactions (when CREATE is set) will insert the state. 1183 * 1184 * May request that caller discard the message by setting *discardp to 1. 1185 * A NULL state may be returned in this case. 1186 */ 1187 static 1188 int 1189 kdmsg_state_msgtx(kdmsg_msg_t *msg) 1190 { 1191 kdmsg_iocom_t *iocom = msg->state->iocom; 1192 kdmsg_state_t *state; 1193 int error; 1194 1195 /* 1196 * Make sure a state structure is ready to go in case we need a new 1197 * one. This is the only routine which uses freewr_state so no 1198 * races are possible. 1199 */ 1200 if ((state = iocom->freewr_state) == NULL) { 1201 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO); 1202 state->flags = KDMSG_STATE_DYNAMIC; 1203 state->iocom = iocom; 1204 iocom->freewr_state = state; 1205 } 1206 1207 /* 1208 * Lock RB tree. If persistent state is present it will have already 1209 * been assigned to msg. 1210 */ 1211 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 1212 state = msg->state; 1213 1214 /* 1215 * Short-cut one-off or mid-stream messages (state may be NULL). 1216 */ 1217 if ((msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE | 1218 DMSGF_ABORT)) == 0) { 1219 lockmgr(&iocom->msglk, LK_RELEASE); 1220 return(0); 1221 } 1222 1223 1224 /* 1225 * Switch on CREATE, DELETE, REPLY, and also handle ABORT from 1226 * inside the case statements. 1227 */ 1228 switch(msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE | 1229 DMSGF_REPLY)) { 1230 case DMSGF_CREATE: 1231 case DMSGF_CREATE | DMSGF_DELETE: 1232 /* 1233 * Insert the new persistent message state and mark 1234 * half-closed if DELETE is set. Since this is a new 1235 * message it isn't possible to transition into the fully 1236 * closed state here. 1237 * 1238 * XXX state must be assigned and inserted by 1239 * kdmsg_msg_write(). txcmd is assigned by us 1240 * on-transmit. 1241 */ 1242 KKASSERT(state != NULL); 1243 state->icmd = msg->any.head.cmd & DMSGF_BASECMDMASK; 1244 state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE; 1245 state->rxcmd = DMSGF_REPLY; 1246 error = 0; 1247 break; 1248 case DMSGF_DELETE: 1249 /* 1250 * Sent ABORT+DELETE in case where msgid has already 1251 * been fully closed, ignore the message. 1252 */ 1253 if (state == &iocom->state0) { 1254 if (msg->any.head.cmd & DMSGF_ABORT) { 1255 error = EALREADY; 1256 } else { 1257 kprintf("kdmsg_state_msgtx: no state match " 1258 "for DELETE cmd=%08x msgid=%016jx\n", 1259 msg->any.head.cmd, 1260 (intmax_t)msg->any.head.msgid); 1261 error = EINVAL; 1262 } 1263 break; 1264 } 1265 1266 /* 1267 * Sent ABORT+DELETE in case where msgid has 1268 * already been reused for an unrelated message, 1269 * ignore the message. 1270 */ 1271 if ((state->txcmd & DMSGF_CREATE) == 0) { 1272 if (msg->any.head.cmd & DMSGF_ABORT) { 1273 error = EALREADY; 1274 } else { 1275 kprintf("kdmsg_state_msgtx: state reused " 1276 "for DELETE\n"); 1277 error = EINVAL; 1278 } 1279 break; 1280 } 1281 error = 0; 1282 break; 1283 default: 1284 /* 1285 * Check for mid-stream ABORT command sent 1286 */ 1287 if (msg->any.head.cmd & DMSGF_ABORT) { 1288 if (state == &state->iocom->state0 || 1289 (state->txcmd & DMSGF_CREATE) == 0) { 1290 error = EALREADY; 1291 break; 1292 } 1293 } 1294 error = 0; 1295 break; 1296 case DMSGF_REPLY | DMSGF_CREATE: 1297 case DMSGF_REPLY | DMSGF_CREATE | DMSGF_DELETE: 1298 /* 1299 * When transmitting a reply with CREATE set the original 1300 * persistent state message should already exist. 1301 */ 1302 if (state == &state->iocom->state0) { 1303 kprintf("kdmsg_state_msgtx: no state match " 1304 "for REPLY | CREATE\n"); 1305 error = EINVAL; 1306 break; 1307 } 1308 state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE; 1309 error = 0; 1310 break; 1311 case DMSGF_REPLY | DMSGF_DELETE: 1312 /* 1313 * When transmitting a reply with DELETE set the original 1314 * persistent state message should already exist. 1315 * 1316 * This is very similar to the REPLY|CREATE|* case except 1317 * txcmd is already stored, so we just add the DELETE flag. 1318 * 1319 * Sent REPLY+ABORT+DELETE in case where msgid has 1320 * already been fully closed, ignore the message. 1321 */ 1322 if (state == &state->iocom->state0) { 1323 if (msg->any.head.cmd & DMSGF_ABORT) { 1324 error = EALREADY; 1325 } else { 1326 kprintf("kdmsg_state_msgtx: no state match " 1327 "for REPLY | DELETE\n"); 1328 error = EINVAL; 1329 } 1330 break; 1331 } 1332 1333 /* 1334 * Sent REPLY+ABORT+DELETE in case where msgid has already 1335 * been reused for an unrelated message, ignore the message. 1336 */ 1337 if ((state->txcmd & DMSGF_CREATE) == 0) { 1338 if (msg->any.head.cmd & DMSGF_ABORT) { 1339 error = EALREADY; 1340 } else { 1341 kprintf("kdmsg_state_msgtx: state reused " 1342 "for REPLY | DELETE\n"); 1343 error = EINVAL; 1344 } 1345 break; 1346 } 1347 error = 0; 1348 break; 1349 case DMSGF_REPLY: 1350 /* 1351 * Check for mid-stream ABORT reply sent. 1352 * 1353 * One-off REPLY messages are allowed for e.g. status updates. 1354 */ 1355 if (msg->any.head.cmd & DMSGF_ABORT) { 1356 if (state == &state->iocom->state0 || 1357 (state->txcmd & DMSGF_CREATE) == 0) { 1358 error = EALREADY; 1359 break; 1360 } 1361 } 1362 error = 0; 1363 break; 1364 } 1365 lockmgr(&iocom->msglk, LK_RELEASE); 1366 return (error); 1367 } 1368 1369 static 1370 void 1371 kdmsg_state_cleanuptx(kdmsg_msg_t *msg) 1372 { 1373 kdmsg_iocom_t *iocom = msg->state->iocom; 1374 kdmsg_state_t *state; 1375 kdmsg_state_t *pstate; 1376 1377 if ((state = msg->state) == NULL) { 1378 kdmsg_msg_free(msg); 1379 } else if (msg->any.head.cmd & DMSGF_DELETE) { 1380 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 1381 KKASSERT((state->txcmd & DMSGF_DELETE) == 0); 1382 state->txcmd |= DMSGF_DELETE; 1383 if (state->rxcmd & DMSGF_DELETE) { 1384 KKASSERT(state->flags & KDMSG_STATE_INSERTED); 1385 if (state->txcmd & DMSGF_REPLY) { 1386 KKASSERT(msg->any.head.cmd & 1387 DMSGF_REPLY); 1388 RB_REMOVE(kdmsg_state_tree, 1389 &iocom->staterd_tree, state); 1390 } else { 1391 KKASSERT((msg->any.head.cmd & 1392 DMSGF_REPLY) == 0); 1393 RB_REMOVE(kdmsg_state_tree, 1394 &iocom->statewr_tree, state); 1395 } 1396 pstate = state->parent; 1397 TAILQ_REMOVE(&pstate->subq, state, entry); 1398 if (pstate != &pstate->iocom->state0 && 1399 TAILQ_EMPTY(&pstate->subq) && 1400 (pstate->flags & KDMSG_STATE_INSERTED) == 0) { 1401 kdmsg_state_free(pstate); 1402 } 1403 state->flags &= ~KDMSG_STATE_INSERTED; 1404 state->parent = NULL; 1405 kdmsg_msg_free(msg); 1406 if (TAILQ_EMPTY(&state->subq)) 1407 kdmsg_state_free(state); 1408 lockmgr(&iocom->msglk, LK_RELEASE); 1409 } else { 1410 kdmsg_msg_free(msg); 1411 lockmgr(&iocom->msglk, LK_RELEASE); 1412 } 1413 } else { 1414 kdmsg_msg_free(msg); 1415 } 1416 } 1417 1418 static 1419 void 1420 kdmsg_state_free(kdmsg_state_t *state) 1421 { 1422 kdmsg_iocom_t *iocom = state->iocom; 1423 1424 KKASSERT((state->flags & KDMSG_STATE_INSERTED) == 0); 1425 kfree(state, iocom->mmsg); 1426 } 1427 1428 kdmsg_msg_t * 1429 kdmsg_msg_alloc(kdmsg_state_t *state, uint32_t cmd, 1430 int (*func)(kdmsg_state_t *, kdmsg_msg_t *), void *data) 1431 { 1432 kdmsg_iocom_t *iocom = state->iocom; 1433 kdmsg_state_t *pstate; 1434 kdmsg_msg_t *msg; 1435 size_t hbytes; 1436 1437 KKASSERT(iocom != NULL); 1438 hbytes = (cmd & DMSGF_SIZE) * DMSG_ALIGN; 1439 msg = kmalloc(offsetof(struct kdmsg_msg, any) + hbytes, 1440 iocom->mmsg, M_WAITOK | M_ZERO); 1441 msg->hdr_size = hbytes; 1442 1443 if ((cmd & (DMSGF_CREATE | DMSGF_REPLY)) == DMSGF_CREATE) { 1444 /* 1445 * New transaction, requires tracking state and a unique 1446 * msgid to be allocated. 1447 */ 1448 pstate = state; 1449 state = kmalloc(sizeof(*state), iocom->mmsg, M_WAITOK | M_ZERO); 1450 TAILQ_INIT(&state->subq); 1451 state->iocom = iocom; 1452 state->parent = pstate; 1453 state->flags = KDMSG_STATE_DYNAMIC; 1454 state->func = func; 1455 state->any.any = data; 1456 state->msgid = (uint64_t)(uintptr_t)state; 1457 /*msg->any.head.msgid = state->msgid;XXX*/ 1458 1459 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 1460 if (RB_INSERT(kdmsg_state_tree, &iocom->statewr_tree, state)) 1461 panic("duplicate msgid allocated"); 1462 TAILQ_INSERT_TAIL(&pstate->subq, state, entry); 1463 state->flags |= KDMSG_STATE_INSERTED; 1464 lockmgr(&iocom->msglk, LK_RELEASE); 1465 } else { 1466 pstate = state->parent; 1467 } 1468 1469 if (state->flags & KDMSG_STATE_OPPOSITE) 1470 cmd |= DMSGF_REVTRANS; 1471 if (pstate->flags & KDMSG_STATE_OPPOSITE) 1472 cmd |= DMSGF_REVCIRC; 1473 1474 msg->any.head.magic = DMSG_HDR_MAGIC; 1475 msg->any.head.cmd = cmd; 1476 msg->any.head.msgid = state->msgid; 1477 msg->any.head.circuit = pstate->msgid; 1478 msg->state = state; 1479 1480 return (msg); 1481 } 1482 1483 void 1484 kdmsg_msg_free(kdmsg_msg_t *msg) 1485 { 1486 kdmsg_iocom_t *iocom = msg->state->iocom; 1487 1488 if ((msg->flags & KDMSG_FLAG_AUXALLOC) && 1489 msg->aux_data && msg->aux_size) { 1490 kfree(msg->aux_data, iocom->mmsg); 1491 msg->flags &= ~KDMSG_FLAG_AUXALLOC; 1492 } 1493 msg->state = NULL; 1494 msg->aux_data = NULL; 1495 msg->aux_size = 0; 1496 1497 kfree(msg, iocom->mmsg); 1498 } 1499 1500 /* 1501 * Indexed messages are stored in a red-black tree indexed by their 1502 * msgid. Only persistent messages are indexed. 1503 */ 1504 int 1505 kdmsg_state_cmp(kdmsg_state_t *state1, kdmsg_state_t *state2) 1506 { 1507 if (state1->iocom < state2->iocom) 1508 return(-1); 1509 if (state1->iocom > state2->iocom) 1510 return(1); 1511 if (state1->msgid < state2->msgid) 1512 return(-1); 1513 if (state1->msgid > state2->msgid) 1514 return(1); 1515 return(0); 1516 } 1517 1518 /* 1519 * Write a message. All requisit command flags have been set. 1520 * 1521 * If msg->state is non-NULL the message is written to the existing 1522 * transaction. msgid will be set accordingly. 1523 * 1524 * If msg->state is NULL and CREATE is set new state is allocated and 1525 * (func, data) is installed. A msgid is assigned. 1526 * 1527 * If msg->state is NULL and CREATE is not set the message is assumed 1528 * to be a one-way message. The originator must assign the msgid 1529 * (or leave it 0, which is typical. 1530 * 1531 * This function merely queues the message to the management thread, it 1532 * does not write to the message socket/pipe. 1533 */ 1534 void 1535 kdmsg_msg_write(kdmsg_msg_t *msg) 1536 { 1537 kdmsg_iocom_t *iocom = msg->state->iocom; 1538 kdmsg_state_t *state; 1539 1540 if (msg->state) { 1541 /* 1542 * Continuance or termination of existing transaction. 1543 * The transaction could have been initiated by either end. 1544 * 1545 * (Function callback and aux data for the receive side can 1546 * be replaced or left alone). 1547 */ 1548 state = msg->state; 1549 msg->any.head.msgid = state->msgid; 1550 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 1551 } else { 1552 /* 1553 * One-off message (always uses msgid 0 to distinguish 1554 * between a possibly lost in-transaction message due to 1555 * competing aborts and a real one-off message?) 1556 */ 1557 state = NULL; 1558 msg->any.head.msgid = 0; 1559 lockmgr(&iocom->msglk, LK_EXCLUSIVE); 1560 } 1561 1562 /* 1563 * This flag is not set until after the tx thread has drained 1564 * the txmsgq and simulated responses. After that point the 1565 * txthread is dead and can no longer simulate responses. 1566 * 1567 * Device drivers should never try to send a message once this 1568 * flag is set. They should have detected (through the state 1569 * closures) that the link is in trouble. 1570 */ 1571 if (iocom->flags & KDMSG_IOCOMF_EXITNOACC) { 1572 lockmgr(&iocom->msglk, LK_RELEASE); 1573 panic("kdmsg_msg_write: Attempt to write message to " 1574 "terminated iocom\n"); 1575 } 1576 1577 /* 1578 * Finish up the msg fields. Note that msg->aux_size and the 1579 * aux_bytes stored in the message header represent the unaligned 1580 * (actual) bytes of data, but the buffer is sized to an aligned 1581 * size and the CRC is generated over the aligned length. 1582 */ 1583 msg->any.head.salt = /* (random << 8) | */ (iocom->msg_seq & 255); 1584 ++iocom->msg_seq; 1585 1586 if (msg->aux_data && msg->aux_size) { 1587 uint32_t abytes = DMSG_DOALIGN(msg->aux_size); 1588 1589 msg->any.head.aux_bytes = msg->aux_size; 1590 msg->any.head.aux_crc = iscsi_crc32(msg->aux_data, abytes); 1591 } 1592 msg->any.head.hdr_crc = 0; 1593 msg->any.head.hdr_crc = iscsi_crc32(msg->any.buf, msg->hdr_size); 1594 1595 TAILQ_INSERT_TAIL(&iocom->msgq, msg, qentry); 1596 1597 if (iocom->msg_ctl & KDMSG_CLUSTERCTL_SLEEPING) { 1598 atomic_clear_int(&iocom->msg_ctl, 1599 KDMSG_CLUSTERCTL_SLEEPING); 1600 wakeup(&iocom->msg_ctl); 1601 } 1602 1603 lockmgr(&iocom->msglk, LK_RELEASE); 1604 } 1605 1606 /* 1607 * Reply to a message and terminate our side of the transaction. 1608 * 1609 * If msg->state is non-NULL we are replying to a one-way message. 1610 */ 1611 void 1612 kdmsg_msg_reply(kdmsg_msg_t *msg, uint32_t error) 1613 { 1614 kdmsg_state_t *state = msg->state; 1615 kdmsg_msg_t *nmsg; 1616 uint32_t cmd; 1617 1618 /* 1619 * Reply with a simple error code and terminate the transaction. 1620 */ 1621 cmd = DMSG_LNK_ERROR; 1622 1623 /* 1624 * Check if our direction has even been initiated yet, set CREATE. 1625 * 1626 * Check what direction this is (command or reply direction). Note 1627 * that txcmd might not have been initiated yet. 1628 * 1629 * If our direction has already been closed we just return without 1630 * doing anything. 1631 */ 1632 if (state != &state->iocom->state0) { 1633 if (state->txcmd & DMSGF_DELETE) 1634 return; 1635 if ((state->txcmd & DMSGF_CREATE) == 0) 1636 cmd |= DMSGF_CREATE; 1637 if (state->txcmd & DMSGF_REPLY) 1638 cmd |= DMSGF_REPLY; 1639 cmd |= DMSGF_DELETE; 1640 } else { 1641 if ((msg->any.head.cmd & DMSGF_REPLY) == 0) 1642 cmd |= DMSGF_REPLY; 1643 } 1644 1645 nmsg = kdmsg_msg_alloc(state, cmd, NULL, NULL); 1646 nmsg->any.head.error = error; 1647 kdmsg_msg_write(nmsg); 1648 } 1649 1650 /* 1651 * Reply to a message and continue our side of the transaction. 1652 * 1653 * If msg->state is non-NULL we are replying to a one-way message and this 1654 * function degenerates into the same as kdmsg_msg_reply(). 1655 */ 1656 void 1657 kdmsg_msg_result(kdmsg_msg_t *msg, uint32_t error) 1658 { 1659 kdmsg_state_t *state = msg->state; 1660 kdmsg_msg_t *nmsg; 1661 uint32_t cmd; 1662 1663 /* 1664 * Return a simple result code, do NOT terminate the transaction. 1665 */ 1666 cmd = DMSG_LNK_ERROR; 1667 1668 /* 1669 * Check if our direction has even been initiated yet, set CREATE. 1670 * 1671 * Check what direction this is (command or reply direction). Note 1672 * that txcmd might not have been initiated yet. 1673 * 1674 * If our direction has already been closed we just return without 1675 * doing anything. 1676 */ 1677 if (state != &state->iocom->state0) { 1678 if (state->txcmd & DMSGF_DELETE) 1679 return; 1680 if ((state->txcmd & DMSGF_CREATE) == 0) 1681 cmd |= DMSGF_CREATE; 1682 if (state->txcmd & DMSGF_REPLY) 1683 cmd |= DMSGF_REPLY; 1684 /* continuing transaction, do not set MSGF_DELETE */ 1685 } else { 1686 if ((msg->any.head.cmd & DMSGF_REPLY) == 0) 1687 cmd |= DMSGF_REPLY; 1688 } 1689 1690 nmsg = kdmsg_msg_alloc(state, cmd, NULL, NULL); 1691 nmsg->any.head.error = error; 1692 kdmsg_msg_write(nmsg); 1693 } 1694 1695 /* 1696 * Reply to a message and terminate our side of the transaction. 1697 * 1698 * If msg->state is non-NULL we are replying to a one-way message. 1699 */ 1700 void 1701 kdmsg_state_reply(kdmsg_state_t *state, uint32_t error) 1702 { 1703 kdmsg_msg_t *nmsg; 1704 uint32_t cmd; 1705 1706 /* 1707 * Reply with a simple error code and terminate the transaction. 1708 */ 1709 cmd = DMSG_LNK_ERROR; 1710 1711 /* 1712 * Check if our direction has even been initiated yet, set CREATE. 1713 * 1714 * Check what direction this is (command or reply direction). Note 1715 * that txcmd might not have been initiated yet. 1716 * 1717 * If our direction has already been closed we just return without 1718 * doing anything. 1719 */ 1720 KKASSERT(state); 1721 if (state->txcmd & DMSGF_DELETE) 1722 return; 1723 if ((state->txcmd & DMSGF_CREATE) == 0) 1724 cmd |= DMSGF_CREATE; 1725 if (state->txcmd & DMSGF_REPLY) 1726 cmd |= DMSGF_REPLY; 1727 cmd |= DMSGF_DELETE; 1728 1729 nmsg = kdmsg_msg_alloc(state, cmd, NULL, NULL); 1730 nmsg->any.head.error = error; 1731 kdmsg_msg_write(nmsg); 1732 } 1733 1734 /* 1735 * Reply to a message and continue our side of the transaction. 1736 * 1737 * If msg->state is non-NULL we are replying to a one-way message and this 1738 * function degenerates into the same as kdmsg_msg_reply(). 1739 */ 1740 void 1741 kdmsg_state_result(kdmsg_state_t *state, uint32_t error) 1742 { 1743 kdmsg_msg_t *nmsg; 1744 uint32_t cmd; 1745 1746 /* 1747 * Return a simple result code, do NOT terminate the transaction. 1748 */ 1749 cmd = DMSG_LNK_ERROR; 1750 1751 /* 1752 * Check if our direction has even been initiated yet, set CREATE. 1753 * 1754 * Check what direction this is (command or reply direction). Note 1755 * that txcmd might not have been initiated yet. 1756 * 1757 * If our direction has already been closed we just return without 1758 * doing anything. 1759 */ 1760 KKASSERT(state); 1761 if (state->txcmd & DMSGF_DELETE) 1762 return; 1763 if ((state->txcmd & DMSGF_CREATE) == 0) 1764 cmd |= DMSGF_CREATE; 1765 if (state->txcmd & DMSGF_REPLY) 1766 cmd |= DMSGF_REPLY; 1767 /* continuing transaction, do not set MSGF_DELETE */ 1768 1769 nmsg = kdmsg_msg_alloc(state, cmd, NULL, NULL); 1770 nmsg->any.head.error = error; 1771 kdmsg_msg_write(nmsg); 1772 } 1773