10c3a8cd0SMatthew Dillon /* 2e96cef49SMatthew Dillon * Copyright (c) 2011-2015 The DragonFly Project. All rights reserved. 30c3a8cd0SMatthew Dillon * 40c3a8cd0SMatthew Dillon * This code is derived from software contributed to The DragonFly Project 50c3a8cd0SMatthew Dillon * by Matthew Dillon <dillon@dragonflybsd.org> 60c3a8cd0SMatthew Dillon * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 70c3a8cd0SMatthew Dillon * 80c3a8cd0SMatthew Dillon * Redistribution and use in source and binary forms, with or without 90c3a8cd0SMatthew Dillon * modification, are permitted provided that the following conditions 100c3a8cd0SMatthew Dillon * are met: 110c3a8cd0SMatthew Dillon * 120c3a8cd0SMatthew Dillon * 1. Redistributions of source code must retain the above copyright 130c3a8cd0SMatthew Dillon * notice, this list of conditions and the following disclaimer. 140c3a8cd0SMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright 150c3a8cd0SMatthew Dillon * notice, this list of conditions and the following disclaimer in 160c3a8cd0SMatthew Dillon * the documentation and/or other materials provided with the 170c3a8cd0SMatthew Dillon * distribution. 180c3a8cd0SMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its 190c3a8cd0SMatthew Dillon * contributors may be used to endorse or promote products derived 200c3a8cd0SMatthew Dillon * from this software without specific, prior written permission. 210c3a8cd0SMatthew Dillon * 220c3a8cd0SMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 230c3a8cd0SMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 240c3a8cd0SMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 250c3a8cd0SMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 260c3a8cd0SMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 270c3a8cd0SMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 280c3a8cd0SMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 290c3a8cd0SMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 300c3a8cd0SMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 310c3a8cd0SMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 320c3a8cd0SMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 330c3a8cd0SMatthew Dillon * SUCH DAMAGE. 340c3a8cd0SMatthew Dillon */ 350c3a8cd0SMatthew Dillon 360c3a8cd0SMatthew Dillon #include "dmsg_local.h" 370c3a8cd0SMatthew Dillon 380a9eefcaSMatthew Dillon #define DMSG_BLOCK_DEBUG 390a9eefcaSMatthew Dillon 400c3a8cd0SMatthew Dillon int DMsgDebugOpt; 41*157f2a25STomohiro Kusumi static unsigned int dmsg_state_count; 427adbba57SMatthew Dillon #ifdef DMSG_BLOCK_DEBUG 43*157f2a25STomohiro Kusumi static unsigned int biocount; 447adbba57SMatthew Dillon #endif 450c3a8cd0SMatthew Dillon 460a9eefcaSMatthew Dillon static int dmsg_state_msgrx(dmsg_msg_t *msg, int mstate); 471b8eded1SMatthew Dillon static void dmsg_state_cleanuptx(dmsg_iocom_t *iocom, dmsg_msg_t *msg); 48a2179323SMatthew Dillon static void dmsg_msg_free_locked(dmsg_msg_t *msg); 49323c0947SMatthew Dillon static void dmsg_state_free(dmsg_state_t *state); 500a9eefcaSMatthew Dillon static void dmsg_subq_delete(dmsg_state_t *state); 510a9eefcaSMatthew Dillon static void dmsg_simulate_failure(dmsg_state_t *state, int meto, int error); 520a9eefcaSMatthew Dillon static void dmsg_state_abort(dmsg_state_t *state); 530a9eefcaSMatthew Dillon static void dmsg_state_dying(dmsg_state_t *state); 540c3a8cd0SMatthew Dillon 550d20ec8aSMatthew Dillon RB_GENERATE(dmsg_state_tree, dmsg_state, rbnode, dmsg_state_cmp); 560c3a8cd0SMatthew Dillon 570c3a8cd0SMatthew Dillon /* 580c3a8cd0SMatthew Dillon * STATE TREE - Represents open transactions which are indexed by their 590d20ec8aSMatthew Dillon * { msgid } relative to the governing iocom. 600c3a8cd0SMatthew Dillon */ 610c3a8cd0SMatthew Dillon int 620c3a8cd0SMatthew Dillon dmsg_state_cmp(dmsg_state_t *state1, dmsg_state_t *state2) 630c3a8cd0SMatthew Dillon { 640c3a8cd0SMatthew Dillon if (state1->msgid < state2->msgid) 650c3a8cd0SMatthew Dillon return(-1); 660c3a8cd0SMatthew Dillon if (state1->msgid > state2->msgid) 670c3a8cd0SMatthew Dillon return(1); 680c3a8cd0SMatthew Dillon return(0); 690c3a8cd0SMatthew Dillon } 700c3a8cd0SMatthew Dillon 710d20ec8aSMatthew Dillon /* 720c3a8cd0SMatthew Dillon * Initialize a low-level ioq 730c3a8cd0SMatthew Dillon */ 740c3a8cd0SMatthew Dillon void 750c3a8cd0SMatthew Dillon dmsg_ioq_init(dmsg_iocom_t *iocom __unused, dmsg_ioq_t *ioq) 760c3a8cd0SMatthew Dillon { 770c3a8cd0SMatthew Dillon bzero(ioq, sizeof(*ioq)); 780c3a8cd0SMatthew Dillon ioq->state = DMSG_MSGQ_STATE_HEADER1; 790c3a8cd0SMatthew Dillon TAILQ_INIT(&ioq->msgq); 800c3a8cd0SMatthew Dillon } 810c3a8cd0SMatthew Dillon 820c3a8cd0SMatthew Dillon /* 830c3a8cd0SMatthew Dillon * Cleanup queue. 840c3a8cd0SMatthew Dillon * 850c3a8cd0SMatthew Dillon * caller holds iocom->mtx. 860c3a8cd0SMatthew Dillon */ 870c3a8cd0SMatthew Dillon void 880c3a8cd0SMatthew Dillon dmsg_ioq_done(dmsg_iocom_t *iocom __unused, dmsg_ioq_t *ioq) 890c3a8cd0SMatthew Dillon { 900c3a8cd0SMatthew Dillon dmsg_msg_t *msg; 910c3a8cd0SMatthew Dillon 920c3a8cd0SMatthew Dillon while ((msg = TAILQ_FIRST(&ioq->msgq)) != NULL) { 930c3a8cd0SMatthew Dillon assert(0); /* shouldn't happen */ 940c3a8cd0SMatthew Dillon TAILQ_REMOVE(&ioq->msgq, msg, qentry); 950c3a8cd0SMatthew Dillon dmsg_msg_free(msg); 960c3a8cd0SMatthew Dillon } 970c3a8cd0SMatthew Dillon if ((msg = ioq->msg) != NULL) { 980c3a8cd0SMatthew Dillon ioq->msg = NULL; 990c3a8cd0SMatthew Dillon dmsg_msg_free(msg); 1000c3a8cd0SMatthew Dillon } 1010c3a8cd0SMatthew Dillon } 1020c3a8cd0SMatthew Dillon 1030c3a8cd0SMatthew Dillon /* 1040c3a8cd0SMatthew Dillon * Initialize a low-level communications channel. 1050c3a8cd0SMatthew Dillon * 1060c3a8cd0SMatthew Dillon * NOTE: The signal_func() is called at least once from the loop and can be 1070c3a8cd0SMatthew Dillon * re-armed via dmsg_iocom_restate(). 1080c3a8cd0SMatthew Dillon */ 1090c3a8cd0SMatthew Dillon void 1100c3a8cd0SMatthew Dillon dmsg_iocom_init(dmsg_iocom_t *iocom, int sock_fd, int alt_fd, 11101e43224SMatthew Dillon void (*signal_func)(dmsg_iocom_t *iocom), 11201e43224SMatthew Dillon void (*rcvmsg_func)(dmsg_msg_t *msg), 11301e43224SMatthew Dillon void (*usrmsg_func)(dmsg_msg_t *msg, int unmanaged), 11401e43224SMatthew Dillon void (*altmsg_func)(dmsg_iocom_t *iocom)) 1150c3a8cd0SMatthew Dillon { 1160c3a8cd0SMatthew Dillon struct stat st; 1170c3a8cd0SMatthew Dillon 1180c3a8cd0SMatthew Dillon bzero(iocom, sizeof(*iocom)); 1190c3a8cd0SMatthew Dillon 120f306de83SMatthew Dillon asprintf(&iocom->label, "iocom-%p", iocom); 1210d20ec8aSMatthew Dillon iocom->signal_callback = signal_func; 1220d20ec8aSMatthew Dillon iocom->rcvmsg_callback = rcvmsg_func; 1230d20ec8aSMatthew Dillon iocom->altmsg_callback = altmsg_func; 12401e43224SMatthew Dillon iocom->usrmsg_callback = usrmsg_func; 1250c3a8cd0SMatthew Dillon 1260c3a8cd0SMatthew Dillon pthread_mutex_init(&iocom->mtx, NULL); 1271b8eded1SMatthew Dillon RB_INIT(&iocom->staterd_tree); 1281b8eded1SMatthew Dillon RB_INIT(&iocom->statewr_tree); 1290d20ec8aSMatthew Dillon TAILQ_INIT(&iocom->txmsgq); 1300c3a8cd0SMatthew Dillon iocom->sock_fd = sock_fd; 1310c3a8cd0SMatthew Dillon iocom->alt_fd = alt_fd; 13298126869SMatthew Dillon iocom->flags = DMSG_IOCOMF_RREQ | DMSG_IOCOMF_CLOSEALT; 1330c3a8cd0SMatthew Dillon if (signal_func) 1340c3a8cd0SMatthew Dillon iocom->flags |= DMSG_IOCOMF_SWORK; 1350c3a8cd0SMatthew Dillon dmsg_ioq_init(iocom, &iocom->ioq_rx); 1360c3a8cd0SMatthew Dillon dmsg_ioq_init(iocom, &iocom->ioq_tx); 137323c0947SMatthew Dillon iocom->state0.refs = 1; /* should never trigger a free */ 1381b8eded1SMatthew Dillon iocom->state0.iocom = iocom; 1391b8eded1SMatthew Dillon iocom->state0.parent = &iocom->state0; 140d30cab67SMatthew Dillon iocom->state0.flags = DMSG_STATE_ROOT; 1411b8eded1SMatthew Dillon TAILQ_INIT(&iocom->state0.subq); 1421b8eded1SMatthew Dillon 1430c3a8cd0SMatthew Dillon if (pipe(iocom->wakeupfds) < 0) 1440c3a8cd0SMatthew Dillon assert(0); 1450c3a8cd0SMatthew Dillon fcntl(iocom->wakeupfds[0], F_SETFL, O_NONBLOCK); 1460c3a8cd0SMatthew Dillon fcntl(iocom->wakeupfds[1], F_SETFL, O_NONBLOCK); 1470c3a8cd0SMatthew Dillon 1480c3a8cd0SMatthew Dillon /* 1490c3a8cd0SMatthew Dillon * Negotiate session crypto synchronously. This will mark the 1500c3a8cd0SMatthew Dillon * connection as error'd if it fails. If this is a pipe it's 1510c3a8cd0SMatthew Dillon * a linkage that we set up ourselves to the filesystem and there 1520c3a8cd0SMatthew Dillon * is no crypto. 1530c3a8cd0SMatthew Dillon */ 1540c3a8cd0SMatthew Dillon if (fstat(sock_fd, &st) < 0) 1550c3a8cd0SMatthew Dillon assert(0); 1560c3a8cd0SMatthew Dillon if (S_ISSOCK(st.st_mode)) 1570c3a8cd0SMatthew Dillon dmsg_crypto_negotiate(iocom); 1580c3a8cd0SMatthew Dillon 1590c3a8cd0SMatthew Dillon /* 1600c3a8cd0SMatthew Dillon * Make sure our fds are set to non-blocking for the iocom core. 1610c3a8cd0SMatthew Dillon */ 1620c3a8cd0SMatthew Dillon if (sock_fd >= 0) 1630c3a8cd0SMatthew Dillon fcntl(sock_fd, F_SETFL, O_NONBLOCK); 1640c3a8cd0SMatthew Dillon #if 0 1650c3a8cd0SMatthew Dillon /* if line buffered our single fgets() should be fine */ 1660c3a8cd0SMatthew Dillon if (alt_fd >= 0) 1670c3a8cd0SMatthew Dillon fcntl(alt_fd, F_SETFL, O_NONBLOCK); 1680c3a8cd0SMatthew Dillon #endif 1690c3a8cd0SMatthew Dillon } 1700c3a8cd0SMatthew Dillon 171f306de83SMatthew Dillon void 172f306de83SMatthew Dillon dmsg_iocom_label(dmsg_iocom_t *iocom, const char *ctl, ...) 173f306de83SMatthew Dillon { 174f306de83SMatthew Dillon va_list va; 175f306de83SMatthew Dillon char *optr; 176f306de83SMatthew Dillon 177f306de83SMatthew Dillon va_start(va, ctl); 178f306de83SMatthew Dillon optr = iocom->label; 179f306de83SMatthew Dillon vasprintf(&iocom->label, ctl, va); 180f306de83SMatthew Dillon va_end(va); 181f306de83SMatthew Dillon if (optr) 182f306de83SMatthew Dillon free(optr); 183f306de83SMatthew Dillon } 184f306de83SMatthew Dillon 1850c3a8cd0SMatthew Dillon /* 1860c3a8cd0SMatthew Dillon * May only be called from a callback from iocom_core. 1870c3a8cd0SMatthew Dillon * 1880c3a8cd0SMatthew Dillon * Adjust state machine functions, set flags to guarantee that both 1890c3a8cd0SMatthew Dillon * the recevmsg_func and the sendmsg_func is called at least once. 1900c3a8cd0SMatthew Dillon */ 1910c3a8cd0SMatthew Dillon void 1920d20ec8aSMatthew Dillon dmsg_iocom_restate(dmsg_iocom_t *iocom, 1930d20ec8aSMatthew Dillon void (*signal_func)(dmsg_iocom_t *), 19401e43224SMatthew Dillon void (*rcvmsg_func)(dmsg_msg_t *msg)) 1950c3a8cd0SMatthew Dillon { 196a2179323SMatthew Dillon pthread_mutex_lock(&iocom->mtx); 1970d20ec8aSMatthew Dillon iocom->signal_callback = signal_func; 1980d20ec8aSMatthew Dillon iocom->rcvmsg_callback = rcvmsg_func; 1990c3a8cd0SMatthew Dillon if (signal_func) 200a2179323SMatthew Dillon atomic_set_int(&iocom->flags, DMSG_IOCOMF_SWORK); 2010c3a8cd0SMatthew Dillon else 202a2179323SMatthew Dillon atomic_clear_int(&iocom->flags, DMSG_IOCOMF_SWORK); 203a2179323SMatthew Dillon pthread_mutex_unlock(&iocom->mtx); 2040c3a8cd0SMatthew Dillon } 2050c3a8cd0SMatthew Dillon 2060c3a8cd0SMatthew Dillon void 2070d20ec8aSMatthew Dillon dmsg_iocom_signal(dmsg_iocom_t *iocom) 2080c3a8cd0SMatthew Dillon { 209a2179323SMatthew Dillon pthread_mutex_lock(&iocom->mtx); 2100d20ec8aSMatthew Dillon if (iocom->signal_callback) 211a2179323SMatthew Dillon atomic_set_int(&iocom->flags, DMSG_IOCOMF_SWORK); 212a2179323SMatthew Dillon pthread_mutex_unlock(&iocom->mtx); 2130c3a8cd0SMatthew Dillon } 2140c3a8cd0SMatthew Dillon 2150c3a8cd0SMatthew Dillon /* 2160c3a8cd0SMatthew Dillon * Cleanup a terminating iocom. 2170c3a8cd0SMatthew Dillon * 2180c3a8cd0SMatthew Dillon * Caller should not hold iocom->mtx. The iocom has already been disconnected 2190c3a8cd0SMatthew Dillon * from all possible references to it. 2200c3a8cd0SMatthew Dillon */ 2210c3a8cd0SMatthew Dillon void 2220c3a8cd0SMatthew Dillon dmsg_iocom_done(dmsg_iocom_t *iocom) 2230c3a8cd0SMatthew Dillon { 2240c3a8cd0SMatthew Dillon if (iocom->sock_fd >= 0) { 2250c3a8cd0SMatthew Dillon close(iocom->sock_fd); 2260c3a8cd0SMatthew Dillon iocom->sock_fd = -1; 2270c3a8cd0SMatthew Dillon } 22898126869SMatthew Dillon if (iocom->alt_fd >= 0 && (iocom->flags & DMSG_IOCOMF_CLOSEALT)) { 2290c3a8cd0SMatthew Dillon close(iocom->alt_fd); 2300c3a8cd0SMatthew Dillon iocom->alt_fd = -1; 2310c3a8cd0SMatthew Dillon } 2320c3a8cd0SMatthew Dillon dmsg_ioq_done(iocom, &iocom->ioq_rx); 2330c3a8cd0SMatthew Dillon dmsg_ioq_done(iocom, &iocom->ioq_tx); 2340c3a8cd0SMatthew Dillon if (iocom->wakeupfds[0] >= 0) { 2350c3a8cd0SMatthew Dillon close(iocom->wakeupfds[0]); 2360c3a8cd0SMatthew Dillon iocom->wakeupfds[0] = -1; 2370c3a8cd0SMatthew Dillon } 2380c3a8cd0SMatthew Dillon if (iocom->wakeupfds[1] >= 0) { 2390c3a8cd0SMatthew Dillon close(iocom->wakeupfds[1]); 2400c3a8cd0SMatthew Dillon iocom->wakeupfds[1] = -1; 2410c3a8cd0SMatthew Dillon } 2420c3a8cd0SMatthew Dillon pthread_mutex_destroy(&iocom->mtx); 2430c3a8cd0SMatthew Dillon } 2440c3a8cd0SMatthew Dillon 2450c3a8cd0SMatthew Dillon /* 2461b8eded1SMatthew Dillon * Allocate a new message using the specified transaction state. 247a2179323SMatthew Dillon * 2481b8eded1SMatthew Dillon * If CREATE is set a new transaction is allocated relative to the passed-in 249d30cab67SMatthew Dillon * transaction (the 'state' argument becomes pstate). 2501b8eded1SMatthew Dillon * 2511b8eded1SMatthew Dillon * If CREATE is not set the message is associated with the passed-in 2521b8eded1SMatthew Dillon * transaction. 2530c3a8cd0SMatthew Dillon */ 2540c3a8cd0SMatthew Dillon dmsg_msg_t * 2551b8eded1SMatthew Dillon dmsg_msg_alloc(dmsg_state_t *state, 2560d20ec8aSMatthew Dillon size_t aux_size, uint32_t cmd, 2570c3a8cd0SMatthew Dillon void (*func)(dmsg_msg_t *), void *data) 2580c3a8cd0SMatthew Dillon { 2591b8eded1SMatthew Dillon dmsg_iocom_t *iocom = state->iocom; 260323c0947SMatthew Dillon dmsg_msg_t *msg; 261323c0947SMatthew Dillon 262323c0947SMatthew Dillon pthread_mutex_lock(&iocom->mtx); 263323c0947SMatthew Dillon msg = dmsg_msg_alloc_locked(state, aux_size, cmd, func, data); 264323c0947SMatthew Dillon pthread_mutex_unlock(&iocom->mtx); 265323c0947SMatthew Dillon 266323c0947SMatthew Dillon return msg; 267323c0947SMatthew Dillon } 268323c0947SMatthew Dillon 269323c0947SMatthew Dillon dmsg_msg_t * 270323c0947SMatthew Dillon dmsg_msg_alloc_locked(dmsg_state_t *state, 271323c0947SMatthew Dillon size_t aux_size, uint32_t cmd, 272323c0947SMatthew Dillon void (*func)(dmsg_msg_t *), void *data) 273323c0947SMatthew Dillon { 274323c0947SMatthew Dillon dmsg_iocom_t *iocom = state->iocom; 2751b8eded1SMatthew Dillon dmsg_state_t *pstate; 2760c3a8cd0SMatthew Dillon dmsg_msg_t *msg; 2770c3a8cd0SMatthew Dillon int hbytes; 278f306de83SMatthew Dillon size_t aligned_size; 2790c3a8cd0SMatthew Dillon 280f306de83SMatthew Dillon aligned_size = DMSG_DOALIGN(aux_size); 2810c3a8cd0SMatthew Dillon if ((cmd & (DMSGF_CREATE | DMSGF_REPLY)) == DMSGF_CREATE) { 2820c3a8cd0SMatthew Dillon /* 2831b8eded1SMatthew Dillon * When CREATE is set without REPLY the caller is 2841b8eded1SMatthew Dillon * initiating a new transaction stacked under the specified 2851b8eded1SMatthew Dillon * circuit. 2860c3a8cd0SMatthew Dillon * 2870a9eefcaSMatthew Dillon * It is possible to race a circuit failure, inherit the 2880a9eefcaSMatthew Dillon * parent's STATE_DYING flag to trigger an abort sequence 2890a9eefcaSMatthew Dillon * in the transmit path. By not inheriting ABORTING the 2900a9eefcaSMatthew Dillon * abort sequence can recurse. 2910a9eefcaSMatthew Dillon * 2920c3a8cd0SMatthew Dillon * NOTE: CREATE in txcmd handled by dmsg_msg_write() 2930c3a8cd0SMatthew Dillon * NOTE: DELETE in txcmd handled by dmsg_state_cleanuptx() 2940c3a8cd0SMatthew Dillon */ 2951b8eded1SMatthew Dillon pstate = state; 2960c3a8cd0SMatthew Dillon state = malloc(sizeof(*state)); 2970c3a8cd0SMatthew Dillon bzero(state, sizeof(*state)); 2980a9eefcaSMatthew Dillon atomic_add_int(&dmsg_state_count, 1); 2990a9eefcaSMatthew Dillon 3001b8eded1SMatthew Dillon TAILQ_INIT(&state->subq); 3011b8eded1SMatthew Dillon state->parent = pstate; 3020c3a8cd0SMatthew Dillon state->iocom = iocom; 3030c3a8cd0SMatthew Dillon state->flags = DMSG_STATE_DYNAMIC; 3040c3a8cd0SMatthew Dillon state->msgid = (uint64_t)(uintptr_t)state; 3050c3a8cd0SMatthew Dillon state->txcmd = cmd & ~(DMSGF_CREATE | DMSGF_DELETE); 3060c3a8cd0SMatthew Dillon state->rxcmd = DMSGF_REPLY; 3070d20ec8aSMatthew Dillon state->icmd = state->txcmd & DMSGF_BASECMDMASK; 3080c3a8cd0SMatthew Dillon state->func = func; 3090c3a8cd0SMatthew Dillon state->any.any = data; 310d30cab67SMatthew Dillon 311a06d536bSMatthew Dillon state->flags |= DMSG_STATE_SUBINSERTED | 312a06d536bSMatthew Dillon DMSG_STATE_RBINSERTED; 3130a9eefcaSMatthew Dillon state->flags |= pstate->flags & DMSG_STATE_DYING; 3140a9eefcaSMatthew Dillon if (TAILQ_EMPTY(&pstate->subq)) 3150a9eefcaSMatthew Dillon dmsg_state_hold(pstate); 3160a9eefcaSMatthew Dillon RB_INSERT(dmsg_state_tree, &iocom->statewr_tree, state); 3170a9eefcaSMatthew Dillon TAILQ_INSERT_TAIL(&pstate->subq, state, entry); 3180a9eefcaSMatthew Dillon dmsg_state_hold(state); /* state on pstate->subq */ 3190a9eefcaSMatthew Dillon dmsg_state_hold(state); /* state on rbtree */ 3200a9eefcaSMatthew Dillon dmsg_state_hold(state); /* msg->state */ 3211b8eded1SMatthew Dillon } else { 3221b8eded1SMatthew Dillon /* 3231b8eded1SMatthew Dillon * Otherwise the message is transmitted over the existing 3241b8eded1SMatthew Dillon * open transaction. 3251b8eded1SMatthew Dillon */ 3261b8eded1SMatthew Dillon pstate = state->parent; 3270a9eefcaSMatthew Dillon dmsg_state_hold(state); /* msg->state */ 3280c3a8cd0SMatthew Dillon } 3291b8eded1SMatthew Dillon 330a2179323SMatthew Dillon /* XXX SMP race for state */ 331a2179323SMatthew Dillon hbytes = (cmd & DMSGF_SIZE) * DMSG_ALIGN; 3320a9eefcaSMatthew Dillon assert((size_t)hbytes >= sizeof(struct dmsg_hdr)); 3330a9eefcaSMatthew Dillon msg = malloc(offsetof(struct dmsg_msg, any.head) + hbytes); 334a2179323SMatthew Dillon bzero(msg, offsetof(struct dmsg_msg, any.head)); 335f306de83SMatthew Dillon 336f306de83SMatthew Dillon /* 337f306de83SMatthew Dillon * [re]allocate the auxillary data buffer. The caller knows that 338f306de83SMatthew Dillon * a size-aligned buffer will be allocated but we do not want to 339f306de83SMatthew Dillon * force the caller to zero any tail piece, so we do that ourself. 340f306de83SMatthew Dillon */ 3410c3a8cd0SMatthew Dillon if (msg->aux_size != aux_size) { 3420c3a8cd0SMatthew Dillon if (msg->aux_data) { 3430c3a8cd0SMatthew Dillon free(msg->aux_data); 3440c3a8cd0SMatthew Dillon msg->aux_data = NULL; 3450c3a8cd0SMatthew Dillon msg->aux_size = 0; 3460c3a8cd0SMatthew Dillon } 3470c3a8cd0SMatthew Dillon if (aux_size) { 348f306de83SMatthew Dillon msg->aux_data = malloc(aligned_size); 3490c3a8cd0SMatthew Dillon msg->aux_size = aux_size; 350f306de83SMatthew Dillon if (aux_size != aligned_size) { 351f306de83SMatthew Dillon bzero(msg->aux_data + aux_size, 352f306de83SMatthew Dillon aligned_size - aux_size); 353f306de83SMatthew Dillon } 3540c3a8cd0SMatthew Dillon } 3550c3a8cd0SMatthew Dillon } 3561b8eded1SMatthew Dillon 3571b8eded1SMatthew Dillon /* 3581b8eded1SMatthew Dillon * Set REVTRANS if the transaction was remotely initiated 3591b8eded1SMatthew Dillon * Set REVCIRC if the circuit was remotely initiated 3601b8eded1SMatthew Dillon */ 3611b8eded1SMatthew Dillon if (state->flags & DMSG_STATE_OPPOSITE) 3621b8eded1SMatthew Dillon cmd |= DMSGF_REVTRANS; 3631b8eded1SMatthew Dillon if (pstate->flags & DMSG_STATE_OPPOSITE) 3641b8eded1SMatthew Dillon cmd |= DMSGF_REVCIRC; 3651b8eded1SMatthew Dillon 3661b8eded1SMatthew Dillon /* 3671b8eded1SMatthew Dillon * Finish filling out the header. 3681b8eded1SMatthew Dillon */ 3690c3a8cd0SMatthew Dillon bzero(&msg->any.head, hbytes); 3700c3a8cd0SMatthew Dillon msg->hdr_size = hbytes; 3710d20ec8aSMatthew Dillon msg->any.head.magic = DMSG_HDR_MAGIC; 3720c3a8cd0SMatthew Dillon msg->any.head.cmd = cmd; 3730c3a8cd0SMatthew Dillon msg->any.head.aux_descr = 0; 3740c3a8cd0SMatthew Dillon msg->any.head.aux_crc = 0; 3750c3a8cd0SMatthew Dillon msg->any.head.msgid = state->msgid; 3761b8eded1SMatthew Dillon msg->any.head.circuit = pstate->msgid; 3771b8eded1SMatthew Dillon msg->state = state; 3781b8eded1SMatthew Dillon 3790c3a8cd0SMatthew Dillon return (msg); 3800c3a8cd0SMatthew Dillon } 3810c3a8cd0SMatthew Dillon 3820c3a8cd0SMatthew Dillon /* 3830c3a8cd0SMatthew Dillon * Free a message so it can be reused afresh. 3840c3a8cd0SMatthew Dillon * 3850c3a8cd0SMatthew Dillon * NOTE: aux_size can be 0 with a non-NULL aux_data. 3860c3a8cd0SMatthew Dillon */ 3870c3a8cd0SMatthew Dillon static 3880c3a8cd0SMatthew Dillon void 3890c3a8cd0SMatthew Dillon dmsg_msg_free_locked(dmsg_msg_t *msg) 3900c3a8cd0SMatthew Dillon { 3910a9eefcaSMatthew Dillon dmsg_state_t *state; 392e96cef49SMatthew Dillon 3930a9eefcaSMatthew Dillon if ((state = msg->state) != NULL) { 3940a9eefcaSMatthew Dillon dmsg_state_drop(state); 395323c0947SMatthew Dillon msg->state = NULL; /* safety */ 3960a9eefcaSMatthew Dillon } 397a2179323SMatthew Dillon if (msg->aux_data) { 398a2179323SMatthew Dillon free(msg->aux_data); 3990a9eefcaSMatthew Dillon msg->aux_data = NULL; /* safety */ 400a2179323SMatthew Dillon } 401a2179323SMatthew Dillon msg->aux_size = 0; 402a2179323SMatthew Dillon free (msg); 4030c3a8cd0SMatthew Dillon } 4040c3a8cd0SMatthew Dillon 4050c3a8cd0SMatthew Dillon void 4060c3a8cd0SMatthew Dillon dmsg_msg_free(dmsg_msg_t *msg) 4070c3a8cd0SMatthew Dillon { 4081b8eded1SMatthew Dillon dmsg_iocom_t *iocom = msg->state->iocom; 4090c3a8cd0SMatthew Dillon 4100c3a8cd0SMatthew Dillon pthread_mutex_lock(&iocom->mtx); 4110c3a8cd0SMatthew Dillon dmsg_msg_free_locked(msg); 4120c3a8cd0SMatthew Dillon pthread_mutex_unlock(&iocom->mtx); 4130c3a8cd0SMatthew Dillon } 4140c3a8cd0SMatthew Dillon 4150c3a8cd0SMatthew Dillon /* 4160c3a8cd0SMatthew Dillon * I/O core loop for an iocom. 4170c3a8cd0SMatthew Dillon * 4180c3a8cd0SMatthew Dillon * Thread localized, iocom->mtx not held. 4190c3a8cd0SMatthew Dillon */ 4200c3a8cd0SMatthew Dillon void 4210c3a8cd0SMatthew Dillon dmsg_iocom_core(dmsg_iocom_t *iocom) 4220c3a8cd0SMatthew Dillon { 4230c3a8cd0SMatthew Dillon struct pollfd fds[3]; 4240c3a8cd0SMatthew Dillon char dummybuf[256]; 4250c3a8cd0SMatthew Dillon dmsg_msg_t *msg; 4260c3a8cd0SMatthew Dillon int timeout; 4270c3a8cd0SMatthew Dillon int count; 4280c3a8cd0SMatthew Dillon int wi; /* wakeup pipe */ 4290c3a8cd0SMatthew Dillon int si; /* socket */ 4300c3a8cd0SMatthew Dillon int ai; /* alt bulk path socket */ 4310c3a8cd0SMatthew Dillon 4320c3a8cd0SMatthew Dillon while ((iocom->flags & DMSG_IOCOMF_EOF) == 0) { 433a2179323SMatthew Dillon /* 434a2179323SMatthew Dillon * These iocom->flags are only manipulated within the 435a2179323SMatthew Dillon * context of the current thread. However, modifications 436a2179323SMatthew Dillon * still require atomic ops. 437a2179323SMatthew Dillon */ 4385ab1caedSMatthew Dillon dmio_printf(iocom, 5, "iocom %p %08x\n", 4395ab1caedSMatthew Dillon iocom, iocom->flags); 4400c3a8cd0SMatthew Dillon if ((iocom->flags & (DMSG_IOCOMF_RWORK | 4410c3a8cd0SMatthew Dillon DMSG_IOCOMF_WWORK | 4420c3a8cd0SMatthew Dillon DMSG_IOCOMF_PWORK | 4430c3a8cd0SMatthew Dillon DMSG_IOCOMF_SWORK | 4440c3a8cd0SMatthew Dillon DMSG_IOCOMF_ARWORK | 4450c3a8cd0SMatthew Dillon DMSG_IOCOMF_AWWORK)) == 0) { 4460c3a8cd0SMatthew Dillon /* 4470c3a8cd0SMatthew Dillon * Only poll if no immediate work is pending. 4480c3a8cd0SMatthew Dillon * Otherwise we are just wasting our time calling 4490c3a8cd0SMatthew Dillon * poll. 4500c3a8cd0SMatthew Dillon */ 4510c3a8cd0SMatthew Dillon timeout = 5000; 4520c3a8cd0SMatthew Dillon 4530c3a8cd0SMatthew Dillon count = 0; 4540c3a8cd0SMatthew Dillon wi = -1; 4550c3a8cd0SMatthew Dillon si = -1; 4560c3a8cd0SMatthew Dillon ai = -1; 4570c3a8cd0SMatthew Dillon 4580c3a8cd0SMatthew Dillon /* 4590c3a8cd0SMatthew Dillon * Always check the inter-thread pipe, e.g. 4600c3a8cd0SMatthew Dillon * for iocom->txmsgq work. 4610c3a8cd0SMatthew Dillon */ 4620c3a8cd0SMatthew Dillon wi = count++; 4630c3a8cd0SMatthew Dillon fds[wi].fd = iocom->wakeupfds[0]; 4640c3a8cd0SMatthew Dillon fds[wi].events = POLLIN; 4650c3a8cd0SMatthew Dillon fds[wi].revents = 0; 4660c3a8cd0SMatthew Dillon 4670c3a8cd0SMatthew Dillon /* 4680c3a8cd0SMatthew Dillon * Check the socket input/output direction as 4690c3a8cd0SMatthew Dillon * requested 4700c3a8cd0SMatthew Dillon */ 4710c3a8cd0SMatthew Dillon if (iocom->flags & (DMSG_IOCOMF_RREQ | 4720c3a8cd0SMatthew Dillon DMSG_IOCOMF_WREQ)) { 4730c3a8cd0SMatthew Dillon si = count++; 4740c3a8cd0SMatthew Dillon fds[si].fd = iocom->sock_fd; 4750c3a8cd0SMatthew Dillon fds[si].events = 0; 4760c3a8cd0SMatthew Dillon fds[si].revents = 0; 4770c3a8cd0SMatthew Dillon 4780c3a8cd0SMatthew Dillon if (iocom->flags & DMSG_IOCOMF_RREQ) 4790c3a8cd0SMatthew Dillon fds[si].events |= POLLIN; 4800c3a8cd0SMatthew Dillon if (iocom->flags & DMSG_IOCOMF_WREQ) 4810c3a8cd0SMatthew Dillon fds[si].events |= POLLOUT; 4820c3a8cd0SMatthew Dillon } 4830c3a8cd0SMatthew Dillon 4840c3a8cd0SMatthew Dillon /* 4850c3a8cd0SMatthew Dillon * Check the alternative fd for work. 4860c3a8cd0SMatthew Dillon */ 4870c3a8cd0SMatthew Dillon if (iocom->alt_fd >= 0) { 4880c3a8cd0SMatthew Dillon ai = count++; 4890c3a8cd0SMatthew Dillon fds[ai].fd = iocom->alt_fd; 4900c3a8cd0SMatthew Dillon fds[ai].events = POLLIN; 4910c3a8cd0SMatthew Dillon fds[ai].revents = 0; 4920c3a8cd0SMatthew Dillon } 4930c3a8cd0SMatthew Dillon poll(fds, count, timeout); 4940c3a8cd0SMatthew Dillon 4950c3a8cd0SMatthew Dillon if (wi >= 0 && (fds[wi].revents & POLLIN)) 496a2179323SMatthew Dillon atomic_set_int(&iocom->flags, 497a2179323SMatthew Dillon DMSG_IOCOMF_PWORK); 4980c3a8cd0SMatthew Dillon if (si >= 0 && (fds[si].revents & POLLIN)) 499a2179323SMatthew Dillon atomic_set_int(&iocom->flags, 500a2179323SMatthew Dillon DMSG_IOCOMF_RWORK); 5010c3a8cd0SMatthew Dillon if (si >= 0 && (fds[si].revents & POLLOUT)) 502a2179323SMatthew Dillon atomic_set_int(&iocom->flags, 503a2179323SMatthew Dillon DMSG_IOCOMF_WWORK); 5040c3a8cd0SMatthew Dillon if (wi >= 0 && (fds[wi].revents & POLLOUT)) 505a2179323SMatthew Dillon atomic_set_int(&iocom->flags, 506a2179323SMatthew Dillon DMSG_IOCOMF_WWORK); 5070c3a8cd0SMatthew Dillon if (ai >= 0 && (fds[ai].revents & POLLIN)) 508a2179323SMatthew Dillon atomic_set_int(&iocom->flags, 509a2179323SMatthew Dillon DMSG_IOCOMF_ARWORK); 5100c3a8cd0SMatthew Dillon } else { 5110c3a8cd0SMatthew Dillon /* 5120c3a8cd0SMatthew Dillon * Always check the pipe 5130c3a8cd0SMatthew Dillon */ 514a2179323SMatthew Dillon atomic_set_int(&iocom->flags, DMSG_IOCOMF_PWORK); 5150c3a8cd0SMatthew Dillon } 5160c3a8cd0SMatthew Dillon 5170c3a8cd0SMatthew Dillon if (iocom->flags & DMSG_IOCOMF_SWORK) { 518a2179323SMatthew Dillon atomic_clear_int(&iocom->flags, DMSG_IOCOMF_SWORK); 5190d20ec8aSMatthew Dillon iocom->signal_callback(iocom); 5200c3a8cd0SMatthew Dillon } 5210c3a8cd0SMatthew Dillon 5220c3a8cd0SMatthew Dillon /* 5230c3a8cd0SMatthew Dillon * Pending message queues from other threads wake us up 5240c3a8cd0SMatthew Dillon * with a write to the wakeupfds[] pipe. We have to clear 5250c3a8cd0SMatthew Dillon * the pipe with a dummy read. 5260c3a8cd0SMatthew Dillon */ 5270c3a8cd0SMatthew Dillon if (iocom->flags & DMSG_IOCOMF_PWORK) { 528a2179323SMatthew Dillon atomic_clear_int(&iocom->flags, DMSG_IOCOMF_PWORK); 5290c3a8cd0SMatthew Dillon read(iocom->wakeupfds[0], dummybuf, sizeof(dummybuf)); 530a2179323SMatthew Dillon atomic_set_int(&iocom->flags, DMSG_IOCOMF_RWORK); 531a2179323SMatthew Dillon atomic_set_int(&iocom->flags, DMSG_IOCOMF_WWORK); 5320c3a8cd0SMatthew Dillon } 5330c3a8cd0SMatthew Dillon 5340c3a8cd0SMatthew Dillon /* 5350c3a8cd0SMatthew Dillon * Message write sequencing 5360c3a8cd0SMatthew Dillon */ 5370c3a8cd0SMatthew Dillon if (iocom->flags & DMSG_IOCOMF_WWORK) 5380c3a8cd0SMatthew Dillon dmsg_iocom_flush1(iocom); 5390c3a8cd0SMatthew Dillon 5400c3a8cd0SMatthew Dillon /* 5410c3a8cd0SMatthew Dillon * Message read sequencing. Run this after the write 5420c3a8cd0SMatthew Dillon * sequencing in case the write sequencing allowed another 5430c3a8cd0SMatthew Dillon * auto-DELETE to occur on the read side. 5440c3a8cd0SMatthew Dillon */ 5450c3a8cd0SMatthew Dillon if (iocom->flags & DMSG_IOCOMF_RWORK) { 5460c3a8cd0SMatthew Dillon while ((iocom->flags & DMSG_IOCOMF_EOF) == 0 && 5470c3a8cd0SMatthew Dillon (msg = dmsg_ioq_read(iocom)) != NULL) { 5485ab1caedSMatthew Dillon dmio_printf(iocom, 4, "receive %s\n", 5490c3a8cd0SMatthew Dillon dmsg_msg_str(msg)); 5500d20ec8aSMatthew Dillon iocom->rcvmsg_callback(msg); 5510a9eefcaSMatthew Dillon pthread_mutex_lock(&iocom->mtx); 5520c3a8cd0SMatthew Dillon dmsg_state_cleanuprx(iocom, msg); 5530a9eefcaSMatthew Dillon pthread_mutex_unlock(&iocom->mtx); 5540c3a8cd0SMatthew Dillon } 5550c3a8cd0SMatthew Dillon } 5560c3a8cd0SMatthew Dillon 5570c3a8cd0SMatthew Dillon if (iocom->flags & DMSG_IOCOMF_ARWORK) { 558a2179323SMatthew Dillon atomic_clear_int(&iocom->flags, DMSG_IOCOMF_ARWORK); 5590d20ec8aSMatthew Dillon iocom->altmsg_callback(iocom); 5600c3a8cd0SMatthew Dillon } 5610c3a8cd0SMatthew Dillon } 5620c3a8cd0SMatthew Dillon } 5630c3a8cd0SMatthew Dillon 5640c3a8cd0SMatthew Dillon /* 5650c3a8cd0SMatthew Dillon * Make sure there's enough room in the FIFO to hold the 5660c3a8cd0SMatthew Dillon * needed data. 5670c3a8cd0SMatthew Dillon * 5680c3a8cd0SMatthew Dillon * Assume worst case encrypted form is 2x the size of the 5690c3a8cd0SMatthew Dillon * plaintext equivalent. 5700c3a8cd0SMatthew Dillon */ 5710c3a8cd0SMatthew Dillon static 5720c3a8cd0SMatthew Dillon size_t 5730c3a8cd0SMatthew Dillon dmsg_ioq_makeroom(dmsg_ioq_t *ioq, size_t needed) 5740c3a8cd0SMatthew Dillon { 5750c3a8cd0SMatthew Dillon size_t bytes; 5760c3a8cd0SMatthew Dillon size_t nmax; 5770c3a8cd0SMatthew Dillon 5780c3a8cd0SMatthew Dillon bytes = ioq->fifo_cdx - ioq->fifo_beg; 5790c3a8cd0SMatthew Dillon nmax = sizeof(ioq->buf) - ioq->fifo_end; 5800c3a8cd0SMatthew Dillon if (bytes + nmax / 2 < needed) { 5810c3a8cd0SMatthew Dillon if (bytes) { 5820c3a8cd0SMatthew Dillon bcopy(ioq->buf + ioq->fifo_beg, 5830c3a8cd0SMatthew Dillon ioq->buf, 5840c3a8cd0SMatthew Dillon bytes); 5850c3a8cd0SMatthew Dillon } 5860c3a8cd0SMatthew Dillon ioq->fifo_cdx -= ioq->fifo_beg; 5870c3a8cd0SMatthew Dillon ioq->fifo_beg = 0; 5880c3a8cd0SMatthew Dillon if (ioq->fifo_cdn < ioq->fifo_end) { 5890c3a8cd0SMatthew Dillon bcopy(ioq->buf + ioq->fifo_cdn, 5900c3a8cd0SMatthew Dillon ioq->buf + ioq->fifo_cdx, 5910c3a8cd0SMatthew Dillon ioq->fifo_end - ioq->fifo_cdn); 5920c3a8cd0SMatthew Dillon } 5930c3a8cd0SMatthew Dillon ioq->fifo_end -= ioq->fifo_cdn - ioq->fifo_cdx; 5940c3a8cd0SMatthew Dillon ioq->fifo_cdn = ioq->fifo_cdx; 5950c3a8cd0SMatthew Dillon nmax = sizeof(ioq->buf) - ioq->fifo_end; 5960c3a8cd0SMatthew Dillon } 5970c3a8cd0SMatthew Dillon return(nmax); 5980c3a8cd0SMatthew Dillon } 5990c3a8cd0SMatthew Dillon 6000c3a8cd0SMatthew Dillon /* 6010c3a8cd0SMatthew Dillon * Read the next ready message from the ioq, issuing I/O if needed. 6020c3a8cd0SMatthew Dillon * Caller should retry on a read-event when NULL is returned. 6030c3a8cd0SMatthew Dillon * 6040c3a8cd0SMatthew Dillon * If an error occurs during reception a DMSG_LNK_ERROR msg will 6050c3a8cd0SMatthew Dillon * be returned for each open transaction, then the ioq and iocom 6060c3a8cd0SMatthew Dillon * will be errored out and a non-transactional DMSG_LNK_ERROR 6070c3a8cd0SMatthew Dillon * msg will be returned as the final message. The caller should not call 6080c3a8cd0SMatthew Dillon * us again after the final message is returned. 6090c3a8cd0SMatthew Dillon * 6100c3a8cd0SMatthew Dillon * Thread localized, iocom->mtx not held. 6110c3a8cd0SMatthew Dillon */ 6120c3a8cd0SMatthew Dillon dmsg_msg_t * 6130c3a8cd0SMatthew Dillon dmsg_ioq_read(dmsg_iocom_t *iocom) 6140c3a8cd0SMatthew Dillon { 6150c3a8cd0SMatthew Dillon dmsg_ioq_t *ioq = &iocom->ioq_rx; 6160c3a8cd0SMatthew Dillon dmsg_msg_t *msg; 6170c3a8cd0SMatthew Dillon dmsg_hdr_t *head; 6180c3a8cd0SMatthew Dillon ssize_t n; 6190c3a8cd0SMatthew Dillon size_t bytes; 6200c3a8cd0SMatthew Dillon size_t nmax; 621f306de83SMatthew Dillon uint32_t aux_size; 6220c3a8cd0SMatthew Dillon uint32_t xcrc32; 6230c3a8cd0SMatthew Dillon int error; 6240c3a8cd0SMatthew Dillon 6250c3a8cd0SMatthew Dillon again: 6260c3a8cd0SMatthew Dillon /* 6270c3a8cd0SMatthew Dillon * If a message is already pending we can just remove and 6280c3a8cd0SMatthew Dillon * return it. Message state has already been processed. 6290c3a8cd0SMatthew Dillon * (currently not implemented) 6300c3a8cd0SMatthew Dillon */ 6310c3a8cd0SMatthew Dillon if ((msg = TAILQ_FIRST(&ioq->msgq)) != NULL) { 6320c3a8cd0SMatthew Dillon TAILQ_REMOVE(&ioq->msgq, msg, qentry); 633a06d536bSMatthew Dillon 634a06d536bSMatthew Dillon if (msg->state == &iocom->state0) { 635a06d536bSMatthew Dillon atomic_set_int(&iocom->flags, DMSG_IOCOMF_EOF); 6365ab1caedSMatthew Dillon dmio_printf(iocom, 1, 6375ab1caedSMatthew Dillon "EOF ON SOCKET %d\n", 6385ab1caedSMatthew Dillon iocom->sock_fd); 639a06d536bSMatthew Dillon } 6400c3a8cd0SMatthew Dillon return (msg); 6410c3a8cd0SMatthew Dillon } 642a2179323SMatthew Dillon atomic_clear_int(&iocom->flags, DMSG_IOCOMF_RREQ | DMSG_IOCOMF_RWORK); 6430c3a8cd0SMatthew Dillon 6440c3a8cd0SMatthew Dillon /* 6450c3a8cd0SMatthew Dillon * If the stream is errored out we stop processing it. 6460c3a8cd0SMatthew Dillon */ 6470c3a8cd0SMatthew Dillon if (ioq->error) 6480c3a8cd0SMatthew Dillon goto skip; 6490c3a8cd0SMatthew Dillon 6500c3a8cd0SMatthew Dillon /* 6510c3a8cd0SMatthew Dillon * Message read in-progress (msg is NULL at the moment). We don't 6520c3a8cd0SMatthew Dillon * allocate a msg until we have its core header. 6530c3a8cd0SMatthew Dillon */ 6540c3a8cd0SMatthew Dillon nmax = sizeof(ioq->buf) - ioq->fifo_end; 6550c3a8cd0SMatthew Dillon bytes = ioq->fifo_cdx - ioq->fifo_beg; /* already decrypted */ 6560c3a8cd0SMatthew Dillon msg = ioq->msg; 6570c3a8cd0SMatthew Dillon 6580c3a8cd0SMatthew Dillon switch(ioq->state) { 6590c3a8cd0SMatthew Dillon case DMSG_MSGQ_STATE_HEADER1: 6600c3a8cd0SMatthew Dillon /* 6610c3a8cd0SMatthew Dillon * Load the primary header, fail on any non-trivial read 6620c3a8cd0SMatthew Dillon * error or on EOF. Since the primary header is the same 6630c3a8cd0SMatthew Dillon * size is the message alignment it will never straddle 6640c3a8cd0SMatthew Dillon * the end of the buffer. 6650c3a8cd0SMatthew Dillon */ 6660c3a8cd0SMatthew Dillon nmax = dmsg_ioq_makeroom(ioq, sizeof(msg->any.head)); 6670c3a8cd0SMatthew Dillon if (bytes < sizeof(msg->any.head)) { 6680c3a8cd0SMatthew Dillon n = read(iocom->sock_fd, 6690c3a8cd0SMatthew Dillon ioq->buf + ioq->fifo_end, 6700c3a8cd0SMatthew Dillon nmax); 6710c3a8cd0SMatthew Dillon if (n <= 0) { 6720c3a8cd0SMatthew Dillon if (n == 0) { 6730c3a8cd0SMatthew Dillon ioq->error = DMSG_IOQ_ERROR_EOF; 6740c3a8cd0SMatthew Dillon break; 6750c3a8cd0SMatthew Dillon } 6760c3a8cd0SMatthew Dillon if (errno != EINTR && 6770c3a8cd0SMatthew Dillon errno != EINPROGRESS && 6780c3a8cd0SMatthew Dillon errno != EAGAIN) { 6790c3a8cd0SMatthew Dillon ioq->error = DMSG_IOQ_ERROR_SOCK; 6800c3a8cd0SMatthew Dillon break; 6810c3a8cd0SMatthew Dillon } 6820c3a8cd0SMatthew Dillon n = 0; 6830c3a8cd0SMatthew Dillon /* fall through */ 6840c3a8cd0SMatthew Dillon } 6850c3a8cd0SMatthew Dillon ioq->fifo_end += (size_t)n; 6860c3a8cd0SMatthew Dillon nmax -= (size_t)n; 6870c3a8cd0SMatthew Dillon } 6880c3a8cd0SMatthew Dillon 6890c3a8cd0SMatthew Dillon /* 6900c3a8cd0SMatthew Dillon * Decrypt data received so far. Data will be decrypted 6910c3a8cd0SMatthew Dillon * in-place but might create gaps in the FIFO. Partial 6920c3a8cd0SMatthew Dillon * blocks are not immediately decrypted. 6930c3a8cd0SMatthew Dillon * 6940c3a8cd0SMatthew Dillon * WARNING! The header might be in the wrong endian, we 6950c3a8cd0SMatthew Dillon * do not fix it up until we get the entire 6960c3a8cd0SMatthew Dillon * extended header. 6970c3a8cd0SMatthew Dillon */ 6980c3a8cd0SMatthew Dillon if (iocom->flags & DMSG_IOCOMF_CRYPTED) { 6990c3a8cd0SMatthew Dillon dmsg_crypto_decrypt(iocom, ioq); 7000c3a8cd0SMatthew Dillon } else { 7010c3a8cd0SMatthew Dillon ioq->fifo_cdx = ioq->fifo_end; 7020c3a8cd0SMatthew Dillon ioq->fifo_cdn = ioq->fifo_end; 7030c3a8cd0SMatthew Dillon } 7040c3a8cd0SMatthew Dillon bytes = ioq->fifo_cdx - ioq->fifo_beg; 7050c3a8cd0SMatthew Dillon 7060c3a8cd0SMatthew Dillon /* 7070c3a8cd0SMatthew Dillon * Insufficient data accumulated (msg is NULL, caller will 7080c3a8cd0SMatthew Dillon * retry on event). 7090c3a8cd0SMatthew Dillon */ 7100c3a8cd0SMatthew Dillon assert(msg == NULL); 7110c3a8cd0SMatthew Dillon if (bytes < sizeof(msg->any.head)) 7120c3a8cd0SMatthew Dillon break; 7130c3a8cd0SMatthew Dillon 7140c3a8cd0SMatthew Dillon /* 7150c3a8cd0SMatthew Dillon * Check and fixup the core header. Note that the icrc 7160c3a8cd0SMatthew Dillon * has to be calculated before any fixups, but the crc 7170c3a8cd0SMatthew Dillon * fields in the msg may have to be swapped like everything 7180c3a8cd0SMatthew Dillon * else. 7190c3a8cd0SMatthew Dillon */ 7200c3a8cd0SMatthew Dillon head = (void *)(ioq->buf + ioq->fifo_beg); 7210c3a8cd0SMatthew Dillon if (head->magic != DMSG_HDR_MAGIC && 7220c3a8cd0SMatthew Dillon head->magic != DMSG_HDR_MAGIC_REV) { 7235ab1caedSMatthew Dillon dmio_printf(iocom, 1, 7245ab1caedSMatthew Dillon "%s: head->magic is bad %02x\n", 725f306de83SMatthew Dillon iocom->label, head->magic); 726f306de83SMatthew Dillon if (iocom->flags & DMSG_IOCOMF_CRYPTED) 7275ab1caedSMatthew Dillon dmio_printf(iocom, 1, "%s\n", 7285ab1caedSMatthew Dillon "(on encrypted link)"); 7290c3a8cd0SMatthew Dillon ioq->error = DMSG_IOQ_ERROR_SYNC; 7300c3a8cd0SMatthew Dillon break; 7310c3a8cd0SMatthew Dillon } 7320c3a8cd0SMatthew Dillon 7330c3a8cd0SMatthew Dillon /* 7340c3a8cd0SMatthew Dillon * Calculate the full header size and aux data size 7350c3a8cd0SMatthew Dillon */ 7360c3a8cd0SMatthew Dillon if (head->magic == DMSG_HDR_MAGIC_REV) { 7370c3a8cd0SMatthew Dillon ioq->hbytes = (bswap32(head->cmd) & DMSGF_SIZE) * 7380c3a8cd0SMatthew Dillon DMSG_ALIGN; 739f306de83SMatthew Dillon aux_size = bswap32(head->aux_bytes); 7400c3a8cd0SMatthew Dillon } else { 7410c3a8cd0SMatthew Dillon ioq->hbytes = (head->cmd & DMSGF_SIZE) * 7420c3a8cd0SMatthew Dillon DMSG_ALIGN; 743f306de83SMatthew Dillon aux_size = head->aux_bytes; 7440c3a8cd0SMatthew Dillon } 745f306de83SMatthew Dillon ioq->abytes = DMSG_DOALIGN(aux_size); 746f306de83SMatthew Dillon ioq->unaligned_aux_size = aux_size; 7470c3a8cd0SMatthew Dillon if (ioq->hbytes < sizeof(msg->any.head) || 7480c3a8cd0SMatthew Dillon ioq->hbytes > sizeof(msg->any) || 7490c3a8cd0SMatthew Dillon ioq->abytes > DMSG_AUX_MAX) { 7500c3a8cd0SMatthew Dillon ioq->error = DMSG_IOQ_ERROR_FIELD; 7510c3a8cd0SMatthew Dillon break; 7520c3a8cd0SMatthew Dillon } 7530c3a8cd0SMatthew Dillon 7540c3a8cd0SMatthew Dillon /* 7550c3a8cd0SMatthew Dillon * Allocate the message, the next state will fill it in. 7561b8eded1SMatthew Dillon * 7571b8eded1SMatthew Dillon * NOTE: The aux_data buffer will be sized to an aligned 7581b8eded1SMatthew Dillon * value and the aligned remainder zero'd for 7591b8eded1SMatthew Dillon * convenience. 7601b8eded1SMatthew Dillon * 7611b8eded1SMatthew Dillon * NOTE: Supply dummy state and a degenerate cmd without 7621b8eded1SMatthew Dillon * CREATE set. The message will temporarily be 7631b8eded1SMatthew Dillon * associated with state0 until later post-processing. 7640c3a8cd0SMatthew Dillon */ 7651b8eded1SMatthew Dillon msg = dmsg_msg_alloc(&iocom->state0, aux_size, 766a2179323SMatthew Dillon ioq->hbytes / DMSG_ALIGN, 7670c3a8cd0SMatthew Dillon NULL, NULL); 7680c3a8cd0SMatthew Dillon ioq->msg = msg; 7690c3a8cd0SMatthew Dillon 7700c3a8cd0SMatthew Dillon /* 7710c3a8cd0SMatthew Dillon * Fall through to the next state. Make sure that the 7720c3a8cd0SMatthew Dillon * extended header does not straddle the end of the buffer. 7730c3a8cd0SMatthew Dillon * We still want to issue larger reads into our buffer, 7740c3a8cd0SMatthew Dillon * book-keeping is easier if we don't bcopy() yet. 7750c3a8cd0SMatthew Dillon * 7760c3a8cd0SMatthew Dillon * Make sure there is enough room for bloated encrypt data. 7770c3a8cd0SMatthew Dillon */ 7780c3a8cd0SMatthew Dillon nmax = dmsg_ioq_makeroom(ioq, ioq->hbytes); 7790c3a8cd0SMatthew Dillon ioq->state = DMSG_MSGQ_STATE_HEADER2; 7800c3a8cd0SMatthew Dillon /* fall through */ 7810c3a8cd0SMatthew Dillon case DMSG_MSGQ_STATE_HEADER2: 7820c3a8cd0SMatthew Dillon /* 7830c3a8cd0SMatthew Dillon * Fill out the extended header. 7840c3a8cd0SMatthew Dillon */ 7850c3a8cd0SMatthew Dillon assert(msg != NULL); 7860c3a8cd0SMatthew Dillon if (bytes < ioq->hbytes) { 7870a9eefcaSMatthew Dillon assert(nmax > 0); 7880c3a8cd0SMatthew Dillon n = read(iocom->sock_fd, 7890c3a8cd0SMatthew Dillon ioq->buf + ioq->fifo_end, 7900c3a8cd0SMatthew Dillon nmax); 7910c3a8cd0SMatthew Dillon if (n <= 0) { 7920c3a8cd0SMatthew Dillon if (n == 0) { 7930c3a8cd0SMatthew Dillon ioq->error = DMSG_IOQ_ERROR_EOF; 7940c3a8cd0SMatthew Dillon break; 7950c3a8cd0SMatthew Dillon } 7960c3a8cd0SMatthew Dillon if (errno != EINTR && 7970c3a8cd0SMatthew Dillon errno != EINPROGRESS && 7980c3a8cd0SMatthew Dillon errno != EAGAIN) { 7990c3a8cd0SMatthew Dillon ioq->error = DMSG_IOQ_ERROR_SOCK; 8000c3a8cd0SMatthew Dillon break; 8010c3a8cd0SMatthew Dillon } 8020c3a8cd0SMatthew Dillon n = 0; 8030c3a8cd0SMatthew Dillon /* fall through */ 8040c3a8cd0SMatthew Dillon } 8050c3a8cd0SMatthew Dillon ioq->fifo_end += (size_t)n; 8060c3a8cd0SMatthew Dillon nmax -= (size_t)n; 8070c3a8cd0SMatthew Dillon } 8080c3a8cd0SMatthew Dillon 8090c3a8cd0SMatthew Dillon if (iocom->flags & DMSG_IOCOMF_CRYPTED) { 8100c3a8cd0SMatthew Dillon dmsg_crypto_decrypt(iocom, ioq); 8110c3a8cd0SMatthew Dillon } else { 8120c3a8cd0SMatthew Dillon ioq->fifo_cdx = ioq->fifo_end; 8130c3a8cd0SMatthew Dillon ioq->fifo_cdn = ioq->fifo_end; 8140c3a8cd0SMatthew Dillon } 8150c3a8cd0SMatthew Dillon bytes = ioq->fifo_cdx - ioq->fifo_beg; 8160c3a8cd0SMatthew Dillon 8170c3a8cd0SMatthew Dillon /* 8180c3a8cd0SMatthew Dillon * Insufficient data accumulated (set msg NULL so caller will 8190c3a8cd0SMatthew Dillon * retry on event). 8200c3a8cd0SMatthew Dillon */ 8210c3a8cd0SMatthew Dillon if (bytes < ioq->hbytes) { 8220c3a8cd0SMatthew Dillon msg = NULL; 8230c3a8cd0SMatthew Dillon break; 8240c3a8cd0SMatthew Dillon } 8250c3a8cd0SMatthew Dillon 8260c3a8cd0SMatthew Dillon /* 8270c3a8cd0SMatthew Dillon * Calculate the extended header, decrypt data received 8280c3a8cd0SMatthew Dillon * so far. Handle endian-conversion for the entire extended 8290c3a8cd0SMatthew Dillon * header. 8300c3a8cd0SMatthew Dillon */ 8310c3a8cd0SMatthew Dillon head = (void *)(ioq->buf + ioq->fifo_beg); 8320c3a8cd0SMatthew Dillon 8330c3a8cd0SMatthew Dillon /* 8340c3a8cd0SMatthew Dillon * Check the CRC. 8350c3a8cd0SMatthew Dillon */ 8360c3a8cd0SMatthew Dillon if (head->magic == DMSG_HDR_MAGIC_REV) 8370c3a8cd0SMatthew Dillon xcrc32 = bswap32(head->hdr_crc); 8380c3a8cd0SMatthew Dillon else 8390c3a8cd0SMatthew Dillon xcrc32 = head->hdr_crc; 8400c3a8cd0SMatthew Dillon head->hdr_crc = 0; 8410c3a8cd0SMatthew Dillon if (dmsg_icrc32(head, ioq->hbytes) != xcrc32) { 8420c3a8cd0SMatthew Dillon ioq->error = DMSG_IOQ_ERROR_XCRC; 8435ab1caedSMatthew Dillon dmio_printf(iocom, 1, "BAD-XCRC(%08x,%08x) %s\n", 8440c3a8cd0SMatthew Dillon xcrc32, dmsg_icrc32(head, ioq->hbytes), 8450c3a8cd0SMatthew Dillon dmsg_msg_str(msg)); 8460c3a8cd0SMatthew Dillon assert(0); 8470c3a8cd0SMatthew Dillon break; 8480c3a8cd0SMatthew Dillon } 8490c3a8cd0SMatthew Dillon head->hdr_crc = xcrc32; 8500c3a8cd0SMatthew Dillon 8510c3a8cd0SMatthew Dillon if (head->magic == DMSG_HDR_MAGIC_REV) { 8520c3a8cd0SMatthew Dillon dmsg_bswap_head(head); 8530c3a8cd0SMatthew Dillon } 8540c3a8cd0SMatthew Dillon 8550c3a8cd0SMatthew Dillon /* 8560c3a8cd0SMatthew Dillon * Copy the extended header into the msg and adjust the 8570c3a8cd0SMatthew Dillon * FIFO. 8580c3a8cd0SMatthew Dillon */ 8590c3a8cd0SMatthew Dillon bcopy(head, &msg->any, ioq->hbytes); 8600c3a8cd0SMatthew Dillon 8610c3a8cd0SMatthew Dillon /* 8620c3a8cd0SMatthew Dillon * We are either done or we fall-through. 8630c3a8cd0SMatthew Dillon */ 8640c3a8cd0SMatthew Dillon if (ioq->abytes == 0) { 8650c3a8cd0SMatthew Dillon ioq->fifo_beg += ioq->hbytes; 8660c3a8cd0SMatthew Dillon break; 8670c3a8cd0SMatthew Dillon } 8680c3a8cd0SMatthew Dillon 8690c3a8cd0SMatthew Dillon /* 8700c3a8cd0SMatthew Dillon * Must adjust bytes (and the state) when falling through. 8710c3a8cd0SMatthew Dillon * nmax doesn't change. 8720c3a8cd0SMatthew Dillon */ 8730c3a8cd0SMatthew Dillon ioq->fifo_beg += ioq->hbytes; 8740c3a8cd0SMatthew Dillon bytes -= ioq->hbytes; 8750c3a8cd0SMatthew Dillon ioq->state = DMSG_MSGQ_STATE_AUXDATA1; 8760c3a8cd0SMatthew Dillon /* fall through */ 8770c3a8cd0SMatthew Dillon case DMSG_MSGQ_STATE_AUXDATA1: 8780c3a8cd0SMatthew Dillon /* 879a2179323SMatthew Dillon * Copy the partial or complete [decrypted] payload from 880a2179323SMatthew Dillon * remaining bytes in the FIFO in order to optimize the 881a2179323SMatthew Dillon * makeroom call in the AUXDATA2 state. We have to 882a2179323SMatthew Dillon * fall-through either way so we can check the crc. 8830c3a8cd0SMatthew Dillon * 8840c3a8cd0SMatthew Dillon * msg->aux_size tracks our aux data. 885a2179323SMatthew Dillon * 886a2179323SMatthew Dillon * (Lets not complicate matters if the data is encrypted, 887a2179323SMatthew Dillon * since the data in-stream is not the same size as the 888a2179323SMatthew Dillon * data decrypted). 8890c3a8cd0SMatthew Dillon */ 8900c3a8cd0SMatthew Dillon if (bytes >= ioq->abytes) { 8910c3a8cd0SMatthew Dillon bcopy(ioq->buf + ioq->fifo_beg, msg->aux_data, 8920c3a8cd0SMatthew Dillon ioq->abytes); 8930c3a8cd0SMatthew Dillon msg->aux_size = ioq->abytes; 8940c3a8cd0SMatthew Dillon ioq->fifo_beg += ioq->abytes; 8950c3a8cd0SMatthew Dillon assert(ioq->fifo_beg <= ioq->fifo_cdx); 8960c3a8cd0SMatthew Dillon assert(ioq->fifo_cdx <= ioq->fifo_cdn); 8970c3a8cd0SMatthew Dillon bytes -= ioq->abytes; 8980c3a8cd0SMatthew Dillon } else if (bytes) { 8990c3a8cd0SMatthew Dillon bcopy(ioq->buf + ioq->fifo_beg, msg->aux_data, 9000c3a8cd0SMatthew Dillon bytes); 9010c3a8cd0SMatthew Dillon msg->aux_size = bytes; 9020c3a8cd0SMatthew Dillon ioq->fifo_beg += bytes; 9030c3a8cd0SMatthew Dillon if (ioq->fifo_cdx < ioq->fifo_beg) 9040c3a8cd0SMatthew Dillon ioq->fifo_cdx = ioq->fifo_beg; 9050c3a8cd0SMatthew Dillon assert(ioq->fifo_beg <= ioq->fifo_cdx); 9060c3a8cd0SMatthew Dillon assert(ioq->fifo_cdx <= ioq->fifo_cdn); 9070c3a8cd0SMatthew Dillon bytes = 0; 9080c3a8cd0SMatthew Dillon } else { 9090c3a8cd0SMatthew Dillon msg->aux_size = 0; 9100c3a8cd0SMatthew Dillon } 9110c3a8cd0SMatthew Dillon ioq->state = DMSG_MSGQ_STATE_AUXDATA2; 9120c3a8cd0SMatthew Dillon /* fall through */ 9130c3a8cd0SMatthew Dillon case DMSG_MSGQ_STATE_AUXDATA2: 9140c3a8cd0SMatthew Dillon /* 9150c3a8cd0SMatthew Dillon * Make sure there is enough room for more data. 9160c3a8cd0SMatthew Dillon */ 9170c3a8cd0SMatthew Dillon assert(msg); 9180c3a8cd0SMatthew Dillon nmax = dmsg_ioq_makeroom(ioq, ioq->abytes - msg->aux_size); 9190c3a8cd0SMatthew Dillon 9200c3a8cd0SMatthew Dillon /* 9210c3a8cd0SMatthew Dillon * Read and decrypt more of the payload. 9220c3a8cd0SMatthew Dillon */ 9230c3a8cd0SMatthew Dillon if (msg->aux_size < ioq->abytes) { 9240a9eefcaSMatthew Dillon assert(nmax > 0); 9250c3a8cd0SMatthew Dillon assert(bytes == 0); 9260c3a8cd0SMatthew Dillon n = read(iocom->sock_fd, 9270c3a8cd0SMatthew Dillon ioq->buf + ioq->fifo_end, 9280c3a8cd0SMatthew Dillon nmax); 9290c3a8cd0SMatthew Dillon if (n <= 0) { 9300c3a8cd0SMatthew Dillon if (n == 0) { 9310c3a8cd0SMatthew Dillon ioq->error = DMSG_IOQ_ERROR_EOF; 9320c3a8cd0SMatthew Dillon break; 9330c3a8cd0SMatthew Dillon } 9340c3a8cd0SMatthew Dillon if (errno != EINTR && 9350c3a8cd0SMatthew Dillon errno != EINPROGRESS && 9360c3a8cd0SMatthew Dillon errno != EAGAIN) { 9370c3a8cd0SMatthew Dillon ioq->error = DMSG_IOQ_ERROR_SOCK; 9380c3a8cd0SMatthew Dillon break; 9390c3a8cd0SMatthew Dillon } 9400c3a8cd0SMatthew Dillon n = 0; 9410c3a8cd0SMatthew Dillon /* fall through */ 9420c3a8cd0SMatthew Dillon } 9430c3a8cd0SMatthew Dillon ioq->fifo_end += (size_t)n; 9440c3a8cd0SMatthew Dillon nmax -= (size_t)n; 9450c3a8cd0SMatthew Dillon } 9460c3a8cd0SMatthew Dillon 9470c3a8cd0SMatthew Dillon if (iocom->flags & DMSG_IOCOMF_CRYPTED) { 9480c3a8cd0SMatthew Dillon dmsg_crypto_decrypt(iocom, ioq); 9490c3a8cd0SMatthew Dillon } else { 9500c3a8cd0SMatthew Dillon ioq->fifo_cdx = ioq->fifo_end; 9510c3a8cd0SMatthew Dillon ioq->fifo_cdn = ioq->fifo_end; 9520c3a8cd0SMatthew Dillon } 9530c3a8cd0SMatthew Dillon bytes = ioq->fifo_cdx - ioq->fifo_beg; 9540c3a8cd0SMatthew Dillon 9550c3a8cd0SMatthew Dillon if (bytes > ioq->abytes - msg->aux_size) 9560c3a8cd0SMatthew Dillon bytes = ioq->abytes - msg->aux_size; 9570c3a8cd0SMatthew Dillon 9580c3a8cd0SMatthew Dillon if (bytes) { 9590c3a8cd0SMatthew Dillon bcopy(ioq->buf + ioq->fifo_beg, 9600c3a8cd0SMatthew Dillon msg->aux_data + msg->aux_size, 9610c3a8cd0SMatthew Dillon bytes); 9620c3a8cd0SMatthew Dillon msg->aux_size += bytes; 9630c3a8cd0SMatthew Dillon ioq->fifo_beg += bytes; 9640c3a8cd0SMatthew Dillon } 9650c3a8cd0SMatthew Dillon 9660c3a8cd0SMatthew Dillon /* 9670c3a8cd0SMatthew Dillon * Insufficient data accumulated (set msg NULL so caller will 9680c3a8cd0SMatthew Dillon * retry on event). 969f306de83SMatthew Dillon * 970f306de83SMatthew Dillon * Assert the auxillary data size is correct, then record the 971f306de83SMatthew Dillon * original unaligned size from the message header. 9720c3a8cd0SMatthew Dillon */ 9730c3a8cd0SMatthew Dillon if (msg->aux_size < ioq->abytes) { 9740c3a8cd0SMatthew Dillon msg = NULL; 9750c3a8cd0SMatthew Dillon break; 9760c3a8cd0SMatthew Dillon } 9770c3a8cd0SMatthew Dillon assert(msg->aux_size == ioq->abytes); 978f306de83SMatthew Dillon msg->aux_size = ioq->unaligned_aux_size; 9790c3a8cd0SMatthew Dillon 9800c3a8cd0SMatthew Dillon /* 981f306de83SMatthew Dillon * Check aux_crc, then we are done. Note that the crc 982f306de83SMatthew Dillon * is calculated over the aligned size, not the actual 983f306de83SMatthew Dillon * size. 9840c3a8cd0SMatthew Dillon */ 985f306de83SMatthew Dillon xcrc32 = dmsg_icrc32(msg->aux_data, ioq->abytes); 9860c3a8cd0SMatthew Dillon if (xcrc32 != msg->any.head.aux_crc) { 9870c3a8cd0SMatthew Dillon ioq->error = DMSG_IOQ_ERROR_ACRC; 9885ab1caedSMatthew Dillon dmio_printf(iocom, 1, 989d30cab67SMatthew Dillon "iocom: ACRC error %08x vs %08x " 990d30cab67SMatthew Dillon "msgid %016jx msgcmd %08x auxsize %d\n", 991d30cab67SMatthew Dillon xcrc32, 992d30cab67SMatthew Dillon msg->any.head.aux_crc, 993d30cab67SMatthew Dillon (intmax_t)msg->any.head.msgid, 994d30cab67SMatthew Dillon msg->any.head.cmd, 995d30cab67SMatthew Dillon msg->any.head.aux_bytes); 9960c3a8cd0SMatthew Dillon break; 9970c3a8cd0SMatthew Dillon } 9980c3a8cd0SMatthew Dillon break; 9990c3a8cd0SMatthew Dillon case DMSG_MSGQ_STATE_ERROR: 10000c3a8cd0SMatthew Dillon /* 10010c3a8cd0SMatthew Dillon * Continued calls to drain recorded transactions (returning 10020c3a8cd0SMatthew Dillon * a LNK_ERROR for each one), before we return the final 10030c3a8cd0SMatthew Dillon * LNK_ERROR. 10040c3a8cd0SMatthew Dillon */ 10050c3a8cd0SMatthew Dillon assert(msg == NULL); 10060c3a8cd0SMatthew Dillon break; 10070c3a8cd0SMatthew Dillon default: 10080c3a8cd0SMatthew Dillon /* 10090c3a8cd0SMatthew Dillon * We don't double-return errors, the caller should not 10100c3a8cd0SMatthew Dillon * have called us again after getting an error msg. 10110c3a8cd0SMatthew Dillon */ 10120c3a8cd0SMatthew Dillon assert(0); 10130c3a8cd0SMatthew Dillon break; 10140c3a8cd0SMatthew Dillon } 10150c3a8cd0SMatthew Dillon 10160c3a8cd0SMatthew Dillon /* 10170c3a8cd0SMatthew Dillon * Check the message sequence. The iv[] should prevent any 10180c3a8cd0SMatthew Dillon * possibility of a replay but we add this check anyway. 10190c3a8cd0SMatthew Dillon */ 10200c3a8cd0SMatthew Dillon if (msg && ioq->error == 0) { 10210c3a8cd0SMatthew Dillon if ((msg->any.head.salt & 255) != (ioq->seq & 255)) { 10220c3a8cd0SMatthew Dillon ioq->error = DMSG_IOQ_ERROR_MSGSEQ; 10230c3a8cd0SMatthew Dillon } else { 10240c3a8cd0SMatthew Dillon ++ioq->seq; 10250c3a8cd0SMatthew Dillon } 10260c3a8cd0SMatthew Dillon } 10270c3a8cd0SMatthew Dillon 10280c3a8cd0SMatthew Dillon /* 10290c3a8cd0SMatthew Dillon * Handle error, RREQ, or completion 10300c3a8cd0SMatthew Dillon * 10310c3a8cd0SMatthew Dillon * NOTE: nmax and bytes are invalid at this point, we don't bother 10320c3a8cd0SMatthew Dillon * to update them when breaking out. 10330c3a8cd0SMatthew Dillon */ 10340c3a8cd0SMatthew Dillon if (ioq->error) { 10350c3a8cd0SMatthew Dillon skip: 10360c3a8cd0SMatthew Dillon /* 10370c3a8cd0SMatthew Dillon * An unrecoverable error causes all active receive 10380c3a8cd0SMatthew Dillon * transactions to be terminated with a LNK_ERROR message. 10390c3a8cd0SMatthew Dillon * 10400c3a8cd0SMatthew Dillon * Once all active transactions are exhausted we set the 10410c3a8cd0SMatthew Dillon * iocom ERROR flag and return a non-transactional LNK_ERROR 10420c3a8cd0SMatthew Dillon * message, which should cause master processing loops to 10430c3a8cd0SMatthew Dillon * terminate. 10440c3a8cd0SMatthew Dillon */ 10455ab1caedSMatthew Dillon dmio_printf(iocom, 1, "IOQ ERROR %d\n", ioq->error); 10460c3a8cd0SMatthew Dillon assert(ioq->msg == msg); 10470c3a8cd0SMatthew Dillon if (msg) { 10480c3a8cd0SMatthew Dillon dmsg_msg_free(msg); 10490c3a8cd0SMatthew Dillon ioq->msg = NULL; 1050323c0947SMatthew Dillon msg = NULL; 10510c3a8cd0SMatthew Dillon } 10520c3a8cd0SMatthew Dillon 10530c3a8cd0SMatthew Dillon /* 10540c3a8cd0SMatthew Dillon * No more I/O read processing 10550c3a8cd0SMatthew Dillon */ 10560c3a8cd0SMatthew Dillon ioq->state = DMSG_MSGQ_STATE_ERROR; 10570c3a8cd0SMatthew Dillon 10580c3a8cd0SMatthew Dillon /* 10590c3a8cd0SMatthew Dillon * Simulate a remote LNK_ERROR DELETE msg for any open 10600c3a8cd0SMatthew Dillon * transactions, ending with a final non-transactional 10610c3a8cd0SMatthew Dillon * LNK_ERROR (that the session can detect) when no 10620c3a8cd0SMatthew Dillon * transactions remain. 10630d20ec8aSMatthew Dillon * 10641b8eded1SMatthew Dillon * NOTE: Temporarily supply state0 and a degenerate cmd 10651b8eded1SMatthew Dillon * without CREATE set. The real state will be 10661b8eded1SMatthew Dillon * assigned in the loop. 10671b8eded1SMatthew Dillon * 10681b8eded1SMatthew Dillon * NOTE: We are simulating a received message using our 10691b8eded1SMatthew Dillon * side of the state, so the DMSGF_REV* bits have 10701b8eded1SMatthew Dillon * to be reversed. 10710c3a8cd0SMatthew Dillon */ 10720c3a8cd0SMatthew Dillon pthread_mutex_lock(&iocom->mtx); 10730c3a8cd0SMatthew Dillon dmsg_iocom_drain(iocom); 10740a9eefcaSMatthew Dillon dmsg_simulate_failure(&iocom->state0, 0, ioq->error); 10750c3a8cd0SMatthew Dillon pthread_mutex_unlock(&iocom->mtx); 1076323c0947SMatthew Dillon if (TAILQ_FIRST(&ioq->msgq)) 1077323c0947SMatthew Dillon goto again; 10780c3a8cd0SMatthew Dillon 1079323c0947SMatthew Dillon #if 0 10800c3a8cd0SMatthew Dillon /* 10810c3a8cd0SMatthew Dillon * For the iocom error case we want to set RWORK to indicate 10820c3a8cd0SMatthew Dillon * that more messages might be pending. 10830c3a8cd0SMatthew Dillon * 10840c3a8cd0SMatthew Dillon * It is possible to return NULL when there is more work to 10850c3a8cd0SMatthew Dillon * do because each message has to be DELETEd in both 10860c3a8cd0SMatthew Dillon * directions before we continue on with the next (though 10870c3a8cd0SMatthew Dillon * this could be optimized). The transmit direction will 10880c3a8cd0SMatthew Dillon * re-set RWORK. 10890c3a8cd0SMatthew Dillon */ 10900c3a8cd0SMatthew Dillon if (msg) 1091a2179323SMatthew Dillon atomic_set_int(&iocom->flags, DMSG_IOCOMF_RWORK); 1092323c0947SMatthew Dillon #endif 10930c3a8cd0SMatthew Dillon } else if (msg == NULL) { 10940c3a8cd0SMatthew Dillon /* 10950c3a8cd0SMatthew Dillon * Insufficient data received to finish building the message, 10960c3a8cd0SMatthew Dillon * set RREQ and return NULL. 10970c3a8cd0SMatthew Dillon * 10980c3a8cd0SMatthew Dillon * Leave ioq->msg intact. 10990c3a8cd0SMatthew Dillon * Leave the FIFO intact. 11000c3a8cd0SMatthew Dillon */ 1101a2179323SMatthew Dillon atomic_set_int(&iocom->flags, DMSG_IOCOMF_RREQ); 11020c3a8cd0SMatthew Dillon } else { 11030c3a8cd0SMatthew Dillon /* 11040d20ec8aSMatthew Dillon * Continue processing msg. 11050c3a8cd0SMatthew Dillon * 11060c3a8cd0SMatthew Dillon * The fifo has already been advanced past the message. 11070c3a8cd0SMatthew Dillon * Trivially reset the FIFO indices if possible. 11080c3a8cd0SMatthew Dillon * 11090c3a8cd0SMatthew Dillon * clear the FIFO if it is now empty and set RREQ to wait 11100c3a8cd0SMatthew Dillon * for more from the socket. If the FIFO is not empty set 11110c3a8cd0SMatthew Dillon * TWORK to bypass the poll so we loop immediately. 11120c3a8cd0SMatthew Dillon */ 11130c3a8cd0SMatthew Dillon if (ioq->fifo_beg == ioq->fifo_cdx && 11140c3a8cd0SMatthew Dillon ioq->fifo_cdn == ioq->fifo_end) { 1115a2179323SMatthew Dillon atomic_set_int(&iocom->flags, DMSG_IOCOMF_RREQ); 11160c3a8cd0SMatthew Dillon ioq->fifo_cdx = 0; 11170c3a8cd0SMatthew Dillon ioq->fifo_cdn = 0; 11180c3a8cd0SMatthew Dillon ioq->fifo_beg = 0; 11190c3a8cd0SMatthew Dillon ioq->fifo_end = 0; 11200c3a8cd0SMatthew Dillon } else { 1121a2179323SMatthew Dillon atomic_set_int(&iocom->flags, DMSG_IOCOMF_RWORK); 11220c3a8cd0SMatthew Dillon } 11230c3a8cd0SMatthew Dillon ioq->state = DMSG_MSGQ_STATE_HEADER1; 11240c3a8cd0SMatthew Dillon ioq->msg = NULL; 11250d20ec8aSMatthew Dillon 11260d20ec8aSMatthew Dillon /* 11270d20ec8aSMatthew Dillon * Handle message routing. Validates non-zero sources 11280d20ec8aSMatthew Dillon * and routes message. Error will be 0 if the message is 11290d20ec8aSMatthew Dillon * destined for us. 11300d20ec8aSMatthew Dillon * 11310d20ec8aSMatthew Dillon * State processing only occurs for messages destined for us. 11320d20ec8aSMatthew Dillon */ 11335ab1caedSMatthew Dillon dmio_printf(iocom, 5, 11340a9eefcaSMatthew Dillon "rxmsg cmd=%08x circ=%016jx\n", 1135a2179323SMatthew Dillon msg->any.head.cmd, 1136a2179323SMatthew Dillon (intmax_t)msg->any.head.circuit); 11377adbba57SMatthew Dillon 11380a9eefcaSMatthew Dillon error = dmsg_state_msgrx(msg, 0); 11390d20ec8aSMatthew Dillon 11400d20ec8aSMatthew Dillon if (error) { 11410d20ec8aSMatthew Dillon /* 11420d20ec8aSMatthew Dillon * Abort-after-closure, throw message away and 11430d20ec8aSMatthew Dillon * start reading another. 11440d20ec8aSMatthew Dillon */ 11450d20ec8aSMatthew Dillon if (error == DMSG_IOQ_ERROR_EALREADY) { 11460d20ec8aSMatthew Dillon dmsg_msg_free(msg); 11470d20ec8aSMatthew Dillon goto again; 11480d20ec8aSMatthew Dillon } 11490d20ec8aSMatthew Dillon 11500d20ec8aSMatthew Dillon /* 11510d20ec8aSMatthew Dillon * Process real error and throw away message. 11520d20ec8aSMatthew Dillon */ 11530d20ec8aSMatthew Dillon ioq->error = error; 11540d20ec8aSMatthew Dillon goto skip; 11550d20ec8aSMatthew Dillon } 1156a06d536bSMatthew Dillon 1157a06d536bSMatthew Dillon /* 1158a06d536bSMatthew Dillon * No error and not routed 1159a06d536bSMatthew Dillon */ 11600d20ec8aSMatthew Dillon /* no error, not routed. Fall through and return msg */ 11610c3a8cd0SMatthew Dillon } 11620c3a8cd0SMatthew Dillon return (msg); 11630c3a8cd0SMatthew Dillon } 11640c3a8cd0SMatthew Dillon 11650c3a8cd0SMatthew Dillon /* 11660c3a8cd0SMatthew Dillon * Calculate the header and data crc's and write a low-level message to 11670c3a8cd0SMatthew Dillon * the connection. If aux_crc is non-zero the aux_data crc is already 11680c3a8cd0SMatthew Dillon * assumed to have been set. 11690c3a8cd0SMatthew Dillon * 11700c3a8cd0SMatthew Dillon * A non-NULL msg is added to the queue but not necessarily flushed. 11710c3a8cd0SMatthew Dillon * Calling this function with msg == NULL will get a flush going. 11720c3a8cd0SMatthew Dillon * 1173a2179323SMatthew Dillon * (called from iocom_core only) 11740c3a8cd0SMatthew Dillon */ 11750c3a8cd0SMatthew Dillon void 11760c3a8cd0SMatthew Dillon dmsg_iocom_flush1(dmsg_iocom_t *iocom) 11770c3a8cd0SMatthew Dillon { 11780c3a8cd0SMatthew Dillon dmsg_ioq_t *ioq = &iocom->ioq_tx; 11790c3a8cd0SMatthew Dillon dmsg_msg_t *msg; 11800c3a8cd0SMatthew Dillon uint32_t xcrc32; 1181f306de83SMatthew Dillon size_t hbytes; 1182f306de83SMatthew Dillon size_t abytes; 11830c3a8cd0SMatthew Dillon dmsg_msg_queue_t tmpq; 11840c3a8cd0SMatthew Dillon 1185a2179323SMatthew Dillon atomic_clear_int(&iocom->flags, DMSG_IOCOMF_WREQ | DMSG_IOCOMF_WWORK); 11860c3a8cd0SMatthew Dillon TAILQ_INIT(&tmpq); 11870c3a8cd0SMatthew Dillon pthread_mutex_lock(&iocom->mtx); 11880d20ec8aSMatthew Dillon while ((msg = TAILQ_FIRST(&iocom->txmsgq)) != NULL) { 11890d20ec8aSMatthew Dillon TAILQ_REMOVE(&iocom->txmsgq, msg, qentry); 11900c3a8cd0SMatthew Dillon TAILQ_INSERT_TAIL(&tmpq, msg, qentry); 11910c3a8cd0SMatthew Dillon } 11920c3a8cd0SMatthew Dillon pthread_mutex_unlock(&iocom->mtx); 11930c3a8cd0SMatthew Dillon 11940a9eefcaSMatthew Dillon /* 11950a9eefcaSMatthew Dillon * Flush queue, doing all required encryption and CRC generation, 11960a9eefcaSMatthew Dillon * with the mutex unlocked. 11970a9eefcaSMatthew Dillon */ 11980c3a8cd0SMatthew Dillon while ((msg = TAILQ_FIRST(&tmpq)) != NULL) { 11990c3a8cd0SMatthew Dillon /* 12000c3a8cd0SMatthew Dillon * Process terminal connection errors. 12010c3a8cd0SMatthew Dillon */ 12020c3a8cd0SMatthew Dillon TAILQ_REMOVE(&tmpq, msg, qentry); 12030c3a8cd0SMatthew Dillon if (ioq->error) { 12040c3a8cd0SMatthew Dillon TAILQ_INSERT_TAIL(&ioq->msgq, msg, qentry); 12050c3a8cd0SMatthew Dillon ++ioq->msgcount; 12060c3a8cd0SMatthew Dillon continue; 12070c3a8cd0SMatthew Dillon } 12080c3a8cd0SMatthew Dillon 12090c3a8cd0SMatthew Dillon /* 12100c3a8cd0SMatthew Dillon * Finish populating the msg fields. The salt ensures that 12110c3a8cd0SMatthew Dillon * the iv[] array is ridiculously randomized and we also 12120c3a8cd0SMatthew Dillon * re-seed our PRNG every 32768 messages just to be sure. 12130c3a8cd0SMatthew Dillon */ 12140c3a8cd0SMatthew Dillon msg->any.head.magic = DMSG_HDR_MAGIC; 12150c3a8cd0SMatthew Dillon msg->any.head.salt = (random() << 8) | (ioq->seq & 255); 12160c3a8cd0SMatthew Dillon ++ioq->seq; 12170a9eefcaSMatthew Dillon if ((ioq->seq & 32767) == 0) { 12180a9eefcaSMatthew Dillon pthread_mutex_lock(&iocom->mtx); 12190c3a8cd0SMatthew Dillon srandomdev(); 12200a9eefcaSMatthew Dillon pthread_mutex_unlock(&iocom->mtx); 12210a9eefcaSMatthew Dillon } 12220c3a8cd0SMatthew Dillon 12230c3a8cd0SMatthew Dillon /* 12240c3a8cd0SMatthew Dillon * Calculate aux_crc if 0, then calculate hdr_crc. 12250c3a8cd0SMatthew Dillon */ 12260c3a8cd0SMatthew Dillon if (msg->aux_size && msg->any.head.aux_crc == 0) { 1227f306de83SMatthew Dillon abytes = DMSG_DOALIGN(msg->aux_size); 1228f306de83SMatthew Dillon xcrc32 = dmsg_icrc32(msg->aux_data, abytes); 12290c3a8cd0SMatthew Dillon msg->any.head.aux_crc = xcrc32; 12300c3a8cd0SMatthew Dillon } 1231f306de83SMatthew Dillon msg->any.head.aux_bytes = msg->aux_size; 12320c3a8cd0SMatthew Dillon 12330c3a8cd0SMatthew Dillon hbytes = (msg->any.head.cmd & DMSGF_SIZE) * 12340c3a8cd0SMatthew Dillon DMSG_ALIGN; 12350c3a8cd0SMatthew Dillon msg->any.head.hdr_crc = 0; 12360c3a8cd0SMatthew Dillon msg->any.head.hdr_crc = dmsg_icrc32(&msg->any.head, hbytes); 12370c3a8cd0SMatthew Dillon 12380c3a8cd0SMatthew Dillon /* 12390c3a8cd0SMatthew Dillon * Enqueue the message (the flush codes handles stream 12400c3a8cd0SMatthew Dillon * encryption). 12410c3a8cd0SMatthew Dillon */ 12420c3a8cd0SMatthew Dillon TAILQ_INSERT_TAIL(&ioq->msgq, msg, qentry); 12430c3a8cd0SMatthew Dillon ++ioq->msgcount; 12440c3a8cd0SMatthew Dillon } 12450c3a8cd0SMatthew Dillon dmsg_iocom_flush2(iocom); 12460c3a8cd0SMatthew Dillon } 12470c3a8cd0SMatthew Dillon 12480c3a8cd0SMatthew Dillon /* 12490c3a8cd0SMatthew Dillon * Thread localized, iocom->mtx not held by caller. 1250a2179323SMatthew Dillon * 1251a2179323SMatthew Dillon * (called from iocom_core via iocom_flush1 only) 12520c3a8cd0SMatthew Dillon */ 12530c3a8cd0SMatthew Dillon void 12540c3a8cd0SMatthew Dillon dmsg_iocom_flush2(dmsg_iocom_t *iocom) 12550c3a8cd0SMatthew Dillon { 12560c3a8cd0SMatthew Dillon dmsg_ioq_t *ioq = &iocom->ioq_tx; 12570c3a8cd0SMatthew Dillon dmsg_msg_t *msg; 12580c3a8cd0SMatthew Dillon ssize_t n; 12590c3a8cd0SMatthew Dillon struct iovec iov[DMSG_IOQ_MAXIOVEC]; 12600c3a8cd0SMatthew Dillon size_t nact; 12610c3a8cd0SMatthew Dillon size_t hbytes; 12620c3a8cd0SMatthew Dillon size_t abytes; 12630c3a8cd0SMatthew Dillon size_t hoff; 12640c3a8cd0SMatthew Dillon size_t aoff; 12650c3a8cd0SMatthew Dillon int iovcnt; 12667adbba57SMatthew Dillon int save_errno; 12670c3a8cd0SMatthew Dillon 12680c3a8cd0SMatthew Dillon if (ioq->error) { 12690c3a8cd0SMatthew Dillon dmsg_iocom_drain(iocom); 12700c3a8cd0SMatthew Dillon return; 12710c3a8cd0SMatthew Dillon } 12720c3a8cd0SMatthew Dillon 12730c3a8cd0SMatthew Dillon /* 12740c3a8cd0SMatthew Dillon * Pump messages out the connection by building an iovec. 12750c3a8cd0SMatthew Dillon * 12760c3a8cd0SMatthew Dillon * ioq->hbytes/ioq->abytes tracks how much of the first message 12770c3a8cd0SMatthew Dillon * in the queue has been successfully written out, so we can 12780c3a8cd0SMatthew Dillon * resume writing. 12790c3a8cd0SMatthew Dillon */ 12800c3a8cd0SMatthew Dillon iovcnt = 0; 12810c3a8cd0SMatthew Dillon nact = 0; 12820c3a8cd0SMatthew Dillon hoff = ioq->hbytes; 12830c3a8cd0SMatthew Dillon aoff = ioq->abytes; 12840c3a8cd0SMatthew Dillon 12850c3a8cd0SMatthew Dillon TAILQ_FOREACH(msg, &ioq->msgq, qentry) { 12860c3a8cd0SMatthew Dillon hbytes = (msg->any.head.cmd & DMSGF_SIZE) * 12870c3a8cd0SMatthew Dillon DMSG_ALIGN; 12888d6d37b8SMatthew Dillon abytes = DMSG_DOALIGN(msg->aux_size); 12890c3a8cd0SMatthew Dillon assert(hoff <= hbytes && aoff <= abytes); 12900c3a8cd0SMatthew Dillon 12910c3a8cd0SMatthew Dillon if (hoff < hbytes) { 1292024de405SMatthew Dillon size_t maxlen = hbytes - hoff; 1293024de405SMatthew Dillon if (maxlen > sizeof(ioq->buf) / 2) 1294024de405SMatthew Dillon maxlen = sizeof(ioq->buf) / 2; 12950c3a8cd0SMatthew Dillon iov[iovcnt].iov_base = (char *)&msg->any.head + hoff; 1296024de405SMatthew Dillon iov[iovcnt].iov_len = maxlen; 1297024de405SMatthew Dillon nact += maxlen; 12980c3a8cd0SMatthew Dillon ++iovcnt; 1299024de405SMatthew Dillon if (iovcnt == DMSG_IOQ_MAXIOVEC || 1300024de405SMatthew Dillon maxlen != hbytes - hoff) { 13010c3a8cd0SMatthew Dillon break; 13020c3a8cd0SMatthew Dillon } 1303024de405SMatthew Dillon } 13040c3a8cd0SMatthew Dillon if (aoff < abytes) { 1305024de405SMatthew Dillon size_t maxlen = abytes - aoff; 1306024de405SMatthew Dillon if (maxlen > sizeof(ioq->buf) / 2) 1307024de405SMatthew Dillon maxlen = sizeof(ioq->buf) / 2; 1308024de405SMatthew Dillon 13090c3a8cd0SMatthew Dillon assert(msg->aux_data != NULL); 13100c3a8cd0SMatthew Dillon iov[iovcnt].iov_base = (char *)msg->aux_data + aoff; 1311024de405SMatthew Dillon iov[iovcnt].iov_len = maxlen; 1312024de405SMatthew Dillon nact += maxlen; 13130c3a8cd0SMatthew Dillon ++iovcnt; 1314024de405SMatthew Dillon if (iovcnt == DMSG_IOQ_MAXIOVEC || 1315024de405SMatthew Dillon maxlen != abytes - aoff) { 13160c3a8cd0SMatthew Dillon break; 13170c3a8cd0SMatthew Dillon } 1318024de405SMatthew Dillon } 13190c3a8cd0SMatthew Dillon hoff = 0; 13200c3a8cd0SMatthew Dillon aoff = 0; 13210c3a8cd0SMatthew Dillon } 13227adbba57SMatthew Dillon 13237adbba57SMatthew Dillon /* 13247adbba57SMatthew Dillon * Shortcut if no work to do. Be sure to check for old work still 13257adbba57SMatthew Dillon * pending in the FIFO. 13267adbba57SMatthew Dillon */ 13277adbba57SMatthew Dillon if (iovcnt == 0 && ioq->fifo_beg == ioq->fifo_cdx) 13280c3a8cd0SMatthew Dillon return; 13290c3a8cd0SMatthew Dillon 13300c3a8cd0SMatthew Dillon /* 13310c3a8cd0SMatthew Dillon * Encrypt and write the data. The crypto code will move the 13320c3a8cd0SMatthew Dillon * data into the fifo and adjust the iov as necessary. If 13330c3a8cd0SMatthew Dillon * encryption is disabled the iov is left alone. 13340c3a8cd0SMatthew Dillon * 13350c3a8cd0SMatthew Dillon * May return a smaller iov (thus a smaller n), with aggregated 13360c3a8cd0SMatthew Dillon * chunks. May reduce nmax to what fits in the FIFO. 13370c3a8cd0SMatthew Dillon * 13380c3a8cd0SMatthew Dillon * This function sets nact to the number of original bytes now 13390c3a8cd0SMatthew Dillon * encrypted, adding to the FIFO some number of bytes that might 13400c3a8cd0SMatthew Dillon * be greater depending on the crypto mechanic. iov[] is adjusted 13410c3a8cd0SMatthew Dillon * to point at the FIFO if necessary. 13420c3a8cd0SMatthew Dillon * 1343f2239a4eSMatthew Dillon * NOTE: nact is the number of bytes eaten from the message. For 1344f2239a4eSMatthew Dillon * encrypted data this is the number of bytes processed for 1345f2239a4eSMatthew Dillon * encryption and not necessarily the number of bytes writable. 1346f2239a4eSMatthew Dillon * The return value from the writev() is the post-encrypted 1347f2239a4eSMatthew Dillon * byte count which might be larger. 1348f2239a4eSMatthew Dillon * 1349f2239a4eSMatthew Dillon * NOTE: For direct writes, nact is the return value from the writev(). 13500c3a8cd0SMatthew Dillon */ 13510c3a8cd0SMatthew Dillon if (iocom->flags & DMSG_IOCOMF_CRYPTED) { 13520c3a8cd0SMatthew Dillon /* 13530c3a8cd0SMatthew Dillon * Make sure the FIFO has a reasonable amount of space 13540c3a8cd0SMatthew Dillon * left (if not completely full). 1355a2179323SMatthew Dillon * 1356a2179323SMatthew Dillon * In this situation we are staging the encrypted message 1357a2179323SMatthew Dillon * data in the FIFO. (nact) represents how much plaintext 1358a2179323SMatthew Dillon * has been staged, (n) represents how much encrypted data 1359a2179323SMatthew Dillon * has been flushed. The two are independent of each other. 13600c3a8cd0SMatthew Dillon */ 13610c3a8cd0SMatthew Dillon if (ioq->fifo_beg > sizeof(ioq->buf) / 2 && 1362a2179323SMatthew Dillon sizeof(ioq->buf) - ioq->fifo_end < DMSG_ALIGN * 2) { 13630c3a8cd0SMatthew Dillon bcopy(ioq->buf + ioq->fifo_beg, ioq->buf, 13640c3a8cd0SMatthew Dillon ioq->fifo_end - ioq->fifo_beg); 13650c3a8cd0SMatthew Dillon ioq->fifo_cdx -= ioq->fifo_beg; 13660c3a8cd0SMatthew Dillon ioq->fifo_cdn -= ioq->fifo_beg; 13670c3a8cd0SMatthew Dillon ioq->fifo_end -= ioq->fifo_beg; 13680c3a8cd0SMatthew Dillon ioq->fifo_beg = 0; 13690c3a8cd0SMatthew Dillon } 13700c3a8cd0SMatthew Dillon 1371f2239a4eSMatthew Dillon /* 1372f2239a4eSMatthew Dillon * beg .... cdx ............ cdn ............. end 1373f2239a4eSMatthew Dillon * [WRITABLE] [PARTIALENCRYPT] [NOTYETENCRYPTED] 1374f2239a4eSMatthew Dillon * 1375e96cef49SMatthew Dillon * Advance fifo_beg on a successful write. 1376f2239a4eSMatthew Dillon */ 13770c3a8cd0SMatthew Dillon iovcnt = dmsg_crypto_encrypt(iocom, ioq, iov, iovcnt, &nact); 13780c3a8cd0SMatthew Dillon n = writev(iocom->sock_fd, iov, iovcnt); 13797adbba57SMatthew Dillon save_errno = errno; 13800c3a8cd0SMatthew Dillon if (n > 0) { 13810c3a8cd0SMatthew Dillon ioq->fifo_beg += n; 13820c3a8cd0SMatthew Dillon if (ioq->fifo_beg == ioq->fifo_end) { 13830c3a8cd0SMatthew Dillon ioq->fifo_beg = 0; 13840c3a8cd0SMatthew Dillon ioq->fifo_cdn = 0; 13850c3a8cd0SMatthew Dillon ioq->fifo_cdx = 0; 13860c3a8cd0SMatthew Dillon ioq->fifo_end = 0; 13870c3a8cd0SMatthew Dillon } 13880c3a8cd0SMatthew Dillon } 13897adbba57SMatthew Dillon 1390a2179323SMatthew Dillon /* 1391a2179323SMatthew Dillon * We don't mess with the nact returned by the crypto_encrypt 1392a2179323SMatthew Dillon * call, which represents the filling of the FIFO. (n) tells 1393a2179323SMatthew Dillon * us how much we were able to write from the FIFO. The two 1394a2179323SMatthew Dillon * are different beasts when encrypting. 1395a2179323SMatthew Dillon */ 13960c3a8cd0SMatthew Dillon } else { 1397a2179323SMatthew Dillon /* 1398a2179323SMatthew Dillon * In this situation we are not staging the messages to the 1399a2179323SMatthew Dillon * FIFO but instead writing them directly from the msg 1400f2239a4eSMatthew Dillon * structure(s) unencrypted, so (nact) is basically (n). 1401a2179323SMatthew Dillon */ 14020c3a8cd0SMatthew Dillon n = writev(iocom->sock_fd, iov, iovcnt); 14037adbba57SMatthew Dillon save_errno = errno; 14040c3a8cd0SMatthew Dillon if (n > 0) 14050c3a8cd0SMatthew Dillon nact = n; 14060c3a8cd0SMatthew Dillon else 14070c3a8cd0SMatthew Dillon nact = 0; 14080c3a8cd0SMatthew Dillon } 14090c3a8cd0SMatthew Dillon 14100c3a8cd0SMatthew Dillon /* 14110c3a8cd0SMatthew Dillon * Clean out the transmit queue based on what we successfully 14127adbba57SMatthew Dillon * encrypted (nact is the plaintext count) and is now in the FIFO. 14137adbba57SMatthew Dillon * ioq->hbytes/abytes represents the portion of the first message 14147adbba57SMatthew Dillon * previously sent. 14150c3a8cd0SMatthew Dillon */ 14160c3a8cd0SMatthew Dillon while ((msg = TAILQ_FIRST(&ioq->msgq)) != NULL) { 14170c3a8cd0SMatthew Dillon hbytes = (msg->any.head.cmd & DMSGF_SIZE) * 14180c3a8cd0SMatthew Dillon DMSG_ALIGN; 14198d6d37b8SMatthew Dillon abytes = DMSG_DOALIGN(msg->aux_size); 14200c3a8cd0SMatthew Dillon 14210c3a8cd0SMatthew Dillon if ((size_t)nact < hbytes - ioq->hbytes) { 14220c3a8cd0SMatthew Dillon ioq->hbytes += nact; 14230c3a8cd0SMatthew Dillon nact = 0; 14240c3a8cd0SMatthew Dillon break; 14250c3a8cd0SMatthew Dillon } 14260c3a8cd0SMatthew Dillon nact -= hbytes - ioq->hbytes; 14270c3a8cd0SMatthew Dillon ioq->hbytes = hbytes; 14280c3a8cd0SMatthew Dillon if ((size_t)nact < abytes - ioq->abytes) { 14290c3a8cd0SMatthew Dillon ioq->abytes += nact; 14300c3a8cd0SMatthew Dillon nact = 0; 14310c3a8cd0SMatthew Dillon break; 14320c3a8cd0SMatthew Dillon } 14330c3a8cd0SMatthew Dillon nact -= abytes - ioq->abytes; 1434a2179323SMatthew Dillon /* ioq->abytes = abytes; optimized out */ 1435a2179323SMatthew Dillon 14365ab1caedSMatthew Dillon dmio_printf(iocom, 5, 14370a9eefcaSMatthew Dillon "txmsg cmd=%08x circ=%016jx\n", 1438a2179323SMatthew Dillon msg->any.head.cmd, 1439a2179323SMatthew Dillon (intmax_t)msg->any.head.circuit); 14400c3a8cd0SMatthew Dillon 14417adbba57SMatthew Dillon #ifdef DMSG_BLOCK_DEBUG 14427adbba57SMatthew Dillon uint32_t tcmd; 14437adbba57SMatthew Dillon 14447adbba57SMatthew Dillon if (msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE)) { 14457adbba57SMatthew Dillon if ((msg->state->flags & DMSG_STATE_ROOT) == 0) { 14467adbba57SMatthew Dillon tcmd = (msg->state->icmd & DMSGF_BASECMDMASK) | 14477adbba57SMatthew Dillon (msg->any.head.cmd & (DMSGF_CREATE | 14487adbba57SMatthew Dillon DMSGF_DELETE | 14497adbba57SMatthew Dillon DMSGF_REPLY)); 14507adbba57SMatthew Dillon } else { 14517adbba57SMatthew Dillon tcmd = 0; 14527adbba57SMatthew Dillon } 14537adbba57SMatthew Dillon } else { 14547adbba57SMatthew Dillon tcmd = msg->any.head.cmd & DMSGF_CMDSWMASK; 14557adbba57SMatthew Dillon } 14567adbba57SMatthew Dillon 14577adbba57SMatthew Dillon switch (tcmd) { 14587adbba57SMatthew Dillon case DMSG_BLK_READ | DMSGF_CREATE | DMSGF_DELETE: 14597adbba57SMatthew Dillon case DMSG_BLK_WRITE | DMSGF_CREATE | DMSGF_DELETE: 14605ab1caedSMatthew Dillon dmio_printf(iocom, 4, 14615ab1caedSMatthew Dillon "write BIO %-3d %016jx %d@%016jx\n", 14627adbba57SMatthew Dillon biocount, msg->any.head.msgid, 14637adbba57SMatthew Dillon msg->any.blk_read.bytes, 14647adbba57SMatthew Dillon msg->any.blk_read.offset); 14657adbba57SMatthew Dillon break; 14667adbba57SMatthew Dillon case DMSG_BLK_READ | DMSGF_CREATE | DMSGF_DELETE | DMSGF_REPLY: 14677adbba57SMatthew Dillon case DMSG_BLK_WRITE | DMSGF_CREATE | DMSGF_DELETE | DMSGF_REPLY: 14685ab1caedSMatthew Dillon dmio_printf(iocom, 4, 14695ab1caedSMatthew Dillon "wretr BIO %-3d %016jx %d@%016jx\n", 14707adbba57SMatthew Dillon biocount, msg->any.head.msgid, 14717adbba57SMatthew Dillon msg->any.blk_read.bytes, 14727adbba57SMatthew Dillon msg->any.blk_read.offset); 14737adbba57SMatthew Dillon break; 14747adbba57SMatthew Dillon default: 14757adbba57SMatthew Dillon break; 14767adbba57SMatthew Dillon } 14777adbba57SMatthew Dillon #endif 14787adbba57SMatthew Dillon 14790c3a8cd0SMatthew Dillon TAILQ_REMOVE(&ioq->msgq, msg, qentry); 14800c3a8cd0SMatthew Dillon --ioq->msgcount; 14810c3a8cd0SMatthew Dillon ioq->hbytes = 0; 14820c3a8cd0SMatthew Dillon ioq->abytes = 0; 1483323c0947SMatthew Dillon dmsg_msg_free(msg); 14840c3a8cd0SMatthew Dillon } 14850c3a8cd0SMatthew Dillon assert(nact == 0); 14860c3a8cd0SMatthew Dillon 14870c3a8cd0SMatthew Dillon /* 14880c3a8cd0SMatthew Dillon * Process the return value from the write w/regards to blocking. 14890c3a8cd0SMatthew Dillon */ 14900c3a8cd0SMatthew Dillon if (n < 0) { 14917adbba57SMatthew Dillon if (save_errno != EINTR && 14927adbba57SMatthew Dillon save_errno != EINPROGRESS && 14937adbba57SMatthew Dillon save_errno != EAGAIN) { 14940c3a8cd0SMatthew Dillon /* 14950c3a8cd0SMatthew Dillon * Fatal write error 14960c3a8cd0SMatthew Dillon */ 14970c3a8cd0SMatthew Dillon ioq->error = DMSG_IOQ_ERROR_SOCK; 14980c3a8cd0SMatthew Dillon dmsg_iocom_drain(iocom); 14990c3a8cd0SMatthew Dillon } else { 15000c3a8cd0SMatthew Dillon /* 15017adbba57SMatthew Dillon * Wait for socket buffer space, do not try to 15027adbba57SMatthew Dillon * process more packets for transmit until space 15037adbba57SMatthew Dillon * is available. 15040c3a8cd0SMatthew Dillon */ 1505a2179323SMatthew Dillon atomic_set_int(&iocom->flags, DMSG_IOCOMF_WREQ); 15060c3a8cd0SMatthew Dillon } 15077adbba57SMatthew Dillon } else if (TAILQ_FIRST(&ioq->msgq) || 15087adbba57SMatthew Dillon TAILQ_FIRST(&iocom->txmsgq) || 15097adbba57SMatthew Dillon ioq->fifo_beg != ioq->fifo_cdx) { 15107adbba57SMatthew Dillon /* 15117adbba57SMatthew Dillon * If the write succeeded and more messages are pending 15127adbba57SMatthew Dillon * in either msgq, or the FIFO WWORK must remain set. 15137adbba57SMatthew Dillon */ 15147adbba57SMatthew Dillon atomic_set_int(&iocom->flags, DMSG_IOCOMF_WWORK); 15150c3a8cd0SMatthew Dillon } 15167adbba57SMatthew Dillon /* else no transmit-side work remains */ 15177adbba57SMatthew Dillon 15180c3a8cd0SMatthew Dillon if (ioq->error) { 15190c3a8cd0SMatthew Dillon dmsg_iocom_drain(iocom); 15200c3a8cd0SMatthew Dillon } 15210c3a8cd0SMatthew Dillon } 15220c3a8cd0SMatthew Dillon 15230c3a8cd0SMatthew Dillon /* 15240c3a8cd0SMatthew Dillon * Kill pending msgs on ioq_tx and adjust the flags such that no more 15250c3a8cd0SMatthew Dillon * write events will occur. We don't kill read msgs because we want 15260c3a8cd0SMatthew Dillon * the caller to pull off our contrived terminal error msg to detect 15270c3a8cd0SMatthew Dillon * the connection failure. 15280c3a8cd0SMatthew Dillon * 1529a2179323SMatthew Dillon * Localized to iocom_core thread, iocom->mtx not held by caller. 15300c3a8cd0SMatthew Dillon */ 15310c3a8cd0SMatthew Dillon void 15320c3a8cd0SMatthew Dillon dmsg_iocom_drain(dmsg_iocom_t *iocom) 15330c3a8cd0SMatthew Dillon { 15340c3a8cd0SMatthew Dillon dmsg_ioq_t *ioq = &iocom->ioq_tx; 15350c3a8cd0SMatthew Dillon dmsg_msg_t *msg; 15360c3a8cd0SMatthew Dillon 1537a2179323SMatthew Dillon atomic_clear_int(&iocom->flags, DMSG_IOCOMF_WREQ | DMSG_IOCOMF_WWORK); 15380c3a8cd0SMatthew Dillon ioq->hbytes = 0; 15390c3a8cd0SMatthew Dillon ioq->abytes = 0; 15400c3a8cd0SMatthew Dillon 15410c3a8cd0SMatthew Dillon while ((msg = TAILQ_FIRST(&ioq->msgq)) != NULL) { 15420c3a8cd0SMatthew Dillon TAILQ_REMOVE(&ioq->msgq, msg, qentry); 15430c3a8cd0SMatthew Dillon --ioq->msgcount; 1544323c0947SMatthew Dillon dmsg_msg_free(msg); 15450c3a8cd0SMatthew Dillon } 15460c3a8cd0SMatthew Dillon } 15470c3a8cd0SMatthew Dillon 15480c3a8cd0SMatthew Dillon /* 15490c3a8cd0SMatthew Dillon * Write a message to an iocom, with additional state processing. 15500c3a8cd0SMatthew Dillon */ 15510c3a8cd0SMatthew Dillon void 15520c3a8cd0SMatthew Dillon dmsg_msg_write(dmsg_msg_t *msg) 15530c3a8cd0SMatthew Dillon { 15541b8eded1SMatthew Dillon dmsg_iocom_t *iocom = msg->state->iocom; 15550c3a8cd0SMatthew Dillon dmsg_state_t *state; 15560c3a8cd0SMatthew Dillon char dummy; 15570c3a8cd0SMatthew Dillon 15580c3a8cd0SMatthew Dillon pthread_mutex_lock(&iocom->mtx); 15591b8eded1SMatthew Dillon state = msg->state; 1560d30cab67SMatthew Dillon 15615ab1caedSMatthew Dillon dmio_printf(iocom, 5, 15620a9eefcaSMatthew Dillon "msgtx: cmd=%08x msgid=%016jx " 15630a9eefcaSMatthew Dillon "state %p(%08x) error=%d\n", 15640a9eefcaSMatthew Dillon msg->any.head.cmd, msg->any.head.msgid, 15650a9eefcaSMatthew Dillon state, (state ? state->icmd : 0), 15660a9eefcaSMatthew Dillon msg->any.head.error); 15670a9eefcaSMatthew Dillon 15680a9eefcaSMatthew Dillon 1569a06d536bSMatthew Dillon #if 0 1570323c0947SMatthew Dillon /* 1571323c0947SMatthew Dillon * Make sure the parent transaction is still open in the transmit 1572323c0947SMatthew Dillon * direction. If it isn't the message is dead and we have to 1573323c0947SMatthew Dillon * potentially simulate a rxmsg terminating the transaction. 1574323c0947SMatthew Dillon */ 1575a06d536bSMatthew Dillon if ((state->parent->txcmd & DMSGF_DELETE) || 1576a06d536bSMatthew Dillon (state->parent->rxcmd & DMSGF_DELETE)) { 15775ab1caedSMatthew Dillon dmio_printf(iocom, 4, "dmsg_msg_write: EARLY TERMINATION\n"); 1578a06d536bSMatthew Dillon dmsg_simulate_failure(state, DMSG_ERR_LOSTLINK); 1579323c0947SMatthew Dillon dmsg_state_cleanuptx(iocom, msg); 1580323c0947SMatthew Dillon dmsg_msg_free(msg); 1581323c0947SMatthew Dillon pthread_mutex_unlock(&iocom->mtx); 1582323c0947SMatthew Dillon return; 1583323c0947SMatthew Dillon } 1584a06d536bSMatthew Dillon #endif 1585323c0947SMatthew Dillon /* 1586323c0947SMatthew Dillon * Process state data into the message as needed, then update the 1587323c0947SMatthew Dillon * state based on the message. 1588323c0947SMatthew Dillon */ 1589d30cab67SMatthew Dillon if ((state->flags & DMSG_STATE_ROOT) == 0) { 15900c3a8cd0SMatthew Dillon /* 15910c3a8cd0SMatthew Dillon * Existing transaction (could be reply). It is also 15920c3a8cd0SMatthew Dillon * possible for this to be the first reply (CREATE is set), 15930c3a8cd0SMatthew Dillon * in which case we populate state->txcmd. 15940c3a8cd0SMatthew Dillon * 15950c3a8cd0SMatthew Dillon * state->txcmd is adjusted to hold the final message cmd, 15960c3a8cd0SMatthew Dillon * and we also be sure to set the CREATE bit here. We did 15970c3a8cd0SMatthew Dillon * not set it in dmsg_msg_alloc() because that would have 15980c3a8cd0SMatthew Dillon * not been serialized (state could have gotten ripped out 15990c3a8cd0SMatthew Dillon * from under the message prior to it being transmitted). 16000c3a8cd0SMatthew Dillon */ 16010c3a8cd0SMatthew Dillon if ((msg->any.head.cmd & (DMSGF_CREATE | DMSGF_REPLY)) == 16020c3a8cd0SMatthew Dillon DMSGF_CREATE) { 16030c3a8cd0SMatthew Dillon state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE; 16040d20ec8aSMatthew Dillon state->icmd = state->txcmd & DMSGF_BASECMDMASK; 16050a9eefcaSMatthew Dillon state->flags &= ~DMSG_STATE_NEW; 16060c3a8cd0SMatthew Dillon } 16070c3a8cd0SMatthew Dillon msg->any.head.msgid = state->msgid; 16081b8eded1SMatthew Dillon 16090d20ec8aSMatthew Dillon if (msg->any.head.cmd & DMSGF_CREATE) { 16100c3a8cd0SMatthew Dillon state->txcmd = msg->any.head.cmd & ~DMSGF_DELETE; 16110c3a8cd0SMatthew Dillon } 16120d20ec8aSMatthew Dillon } 16131b8eded1SMatthew Dillon 16140c3a8cd0SMatthew Dillon /* 16150a9eefcaSMatthew Dillon * Discard messages sent to transactions which are already dead. 16160c3a8cd0SMatthew Dillon */ 16170a9eefcaSMatthew Dillon if (state && (state->txcmd & DMSGF_DELETE)) { 16185ab1caedSMatthew Dillon dmio_printf(iocom, 4, 16195ab1caedSMatthew Dillon "dmsg_msg_write: drop msg %08x to dead " 16200a9eefcaSMatthew Dillon "circuit state=%p\n", 16210a9eefcaSMatthew Dillon msg->any.head.cmd, state); 16220a9eefcaSMatthew Dillon dmsg_msg_free(msg); 16230a9eefcaSMatthew Dillon return; 16240a9eefcaSMatthew Dillon } 16250a9eefcaSMatthew Dillon 16260a9eefcaSMatthew Dillon /* 16270a9eefcaSMatthew Dillon * Normally we queue the msg for output. However, if the circuit is 16280a9eefcaSMatthew Dillon * dead or dying we must simulate a failure in the return direction 16290a9eefcaSMatthew Dillon * and throw the message away. The other end is not expecting any 16300a9eefcaSMatthew Dillon * further messages from us on this state. 16310a9eefcaSMatthew Dillon * 16320a9eefcaSMatthew Dillon * Note that the I/O thread is responsible for generating the CRCs 16330a9eefcaSMatthew Dillon * and encryption. 16340a9eefcaSMatthew Dillon */ 16350a9eefcaSMatthew Dillon if (state->flags & DMSG_STATE_DYING) { 16360a9eefcaSMatthew Dillon #if 0 16370a9eefcaSMatthew Dillon if ((state->parent->txcmd & DMSGF_DELETE) || 16380a9eefcaSMatthew Dillon (state->parent->flags & DMSG_STATE_DYING) || 16390a9eefcaSMatthew Dillon (state->flags & DMSG_STATE_DYING)) { 16400a9eefcaSMatthew Dillon #endif 16410a9eefcaSMatthew Dillon /* 16420a9eefcaSMatthew Dillon * Illegal message, kill state and related sub-state. 16430a9eefcaSMatthew Dillon * Cannot transmit if state is already dying. 16440a9eefcaSMatthew Dillon */ 16455ab1caedSMatthew Dillon dmio_printf(iocom, 4, 16465ab1caedSMatthew Dillon "dmsg_msg_write: Write to dying circuit " 16470a9eefcaSMatthew Dillon "ptxcmd=%08x prxcmd=%08x flags=%08x\n", 16480a9eefcaSMatthew Dillon state->parent->rxcmd, 16490a9eefcaSMatthew Dillon state->parent->txcmd, 16500a9eefcaSMatthew Dillon state->parent->flags); 16510a9eefcaSMatthew Dillon dmsg_state_hold(state); 16520a9eefcaSMatthew Dillon dmsg_state_cleanuptx(iocom, msg); 16530a9eefcaSMatthew Dillon if ((state->flags & DMSG_STATE_ABORTING) == 0) { 16540a9eefcaSMatthew Dillon dmsg_simulate_failure(state, 1, DMSG_ERR_LOSTLINK); 16550a9eefcaSMatthew Dillon } 16560a9eefcaSMatthew Dillon dmsg_state_drop(state); 16570a9eefcaSMatthew Dillon dmsg_msg_free(msg); 16580a9eefcaSMatthew Dillon } else { 16590a9eefcaSMatthew Dillon /* 16600a9eefcaSMatthew Dillon * Queue the message, clean up transmit state prior to queueing 16610a9eefcaSMatthew Dillon * to avoid SMP races. 16620a9eefcaSMatthew Dillon */ 16635ab1caedSMatthew Dillon dmio_printf(iocom, 5, 16645ab1caedSMatthew Dillon "dmsg_msg_write: commit msg state=%p to txkmsgq\n", 16655ab1caedSMatthew Dillon state); 16660a9eefcaSMatthew Dillon dmsg_state_cleanuptx(iocom, msg); 16670d20ec8aSMatthew Dillon TAILQ_INSERT_TAIL(&iocom->txmsgq, msg, qentry); 16680c3a8cd0SMatthew Dillon dummy = 0; 16690c3a8cd0SMatthew Dillon write(iocom->wakeupfds[1], &dummy, 1); /* XXX optimize me */ 16700a9eefcaSMatthew Dillon } 16710c3a8cd0SMatthew Dillon pthread_mutex_unlock(&iocom->mtx); 16720c3a8cd0SMatthew Dillon } 16730c3a8cd0SMatthew Dillon 16740c3a8cd0SMatthew Dillon /* 16750a9eefcaSMatthew Dillon * Remove state from its parent's subq. This can wind up recursively 16760a9eefcaSMatthew Dillon * dropping the parent upward. 16770a9eefcaSMatthew Dillon * 16780a9eefcaSMatthew Dillon * NOTE: iocom must be locked. 16790a9eefcaSMatthew Dillon * 16800a9eefcaSMatthew Dillon * NOTE: Once we drop the parent, our pstate pointer may become invalid. 16810a9eefcaSMatthew Dillon */ 16820a9eefcaSMatthew Dillon static 16830a9eefcaSMatthew Dillon void 16840a9eefcaSMatthew Dillon dmsg_subq_delete(dmsg_state_t *state) 16850a9eefcaSMatthew Dillon { 16860a9eefcaSMatthew Dillon dmsg_state_t *pstate; 16870a9eefcaSMatthew Dillon 16880a9eefcaSMatthew Dillon if (state->flags & DMSG_STATE_SUBINSERTED) { 16890a9eefcaSMatthew Dillon pstate = state->parent; 16900a9eefcaSMatthew Dillon assert(pstate); 16910a9eefcaSMatthew Dillon if (pstate->scan == state) 16920a9eefcaSMatthew Dillon pstate->scan = NULL; 16930a9eefcaSMatthew Dillon TAILQ_REMOVE(&pstate->subq, state, entry); 16940a9eefcaSMatthew Dillon state->flags &= ~DMSG_STATE_SUBINSERTED; 16950a9eefcaSMatthew Dillon state->parent = NULL; 16960a9eefcaSMatthew Dillon if (TAILQ_EMPTY(&pstate->subq)) 16970a9eefcaSMatthew Dillon dmsg_state_drop(pstate);/* pstate->subq */ 16980a9eefcaSMatthew Dillon pstate = NULL; /* safety */ 16990a9eefcaSMatthew Dillon dmsg_state_drop(state); /* pstate->subq */ 17000a9eefcaSMatthew Dillon } else { 17010a9eefcaSMatthew Dillon assert(state->parent == NULL); 17020a9eefcaSMatthew Dillon } 17030a9eefcaSMatthew Dillon } 17040a9eefcaSMatthew Dillon 17050a9eefcaSMatthew Dillon /* 1706a06d536bSMatthew Dillon * Simulate reception of a transaction DELETE message when the link goes 1707a06d536bSMatthew Dillon * bad. This routine must recurse through state->subq and generate messages 1708a06d536bSMatthew Dillon * and callbacks bottom-up. 1709a06d536bSMatthew Dillon * 1710323c0947SMatthew Dillon * iocom->mtx must be held by caller. 1711323c0947SMatthew Dillon */ 1712323c0947SMatthew Dillon static 1713323c0947SMatthew Dillon void 17140a9eefcaSMatthew Dillon dmsg_simulate_failure(dmsg_state_t *state, int meto, int error) 1715323c0947SMatthew Dillon { 1716a06d536bSMatthew Dillon dmsg_state_t *substate; 17170a9eefcaSMatthew Dillon 17180a9eefcaSMatthew Dillon dmsg_state_hold(state); 17190a9eefcaSMatthew Dillon if (meto) 17200a9eefcaSMatthew Dillon dmsg_state_abort(state); 17210a9eefcaSMatthew Dillon 17220a9eefcaSMatthew Dillon /* 17230a9eefcaSMatthew Dillon * Recurse through sub-states. 17240a9eefcaSMatthew Dillon */ 17250a9eefcaSMatthew Dillon again: 17260a9eefcaSMatthew Dillon TAILQ_FOREACH(substate, &state->subq, entry) { 17270a9eefcaSMatthew Dillon if (substate->flags & DMSG_STATE_ABORTING) 17280a9eefcaSMatthew Dillon continue; 17290a9eefcaSMatthew Dillon state->scan = substate; 17300a9eefcaSMatthew Dillon dmsg_simulate_failure(substate, 1, error); 17310a9eefcaSMatthew Dillon if (state->scan != substate) 17320a9eefcaSMatthew Dillon goto again; 17330a9eefcaSMatthew Dillon } 17340a9eefcaSMatthew Dillon 17350a9eefcaSMatthew Dillon dmsg_state_drop(state); 17360a9eefcaSMatthew Dillon } 17370a9eefcaSMatthew Dillon 17380a9eefcaSMatthew Dillon static 17390a9eefcaSMatthew Dillon void 17400a9eefcaSMatthew Dillon dmsg_state_abort(dmsg_state_t *state) 17410a9eefcaSMatthew Dillon { 1742a06d536bSMatthew Dillon dmsg_iocom_t *iocom; 1743323c0947SMatthew Dillon dmsg_msg_t *msg; 1744323c0947SMatthew Dillon 17450a9eefcaSMatthew Dillon /* 17460a9eefcaSMatthew Dillon * Set ABORTING and DYING, return if already set. If the state was 17470a9eefcaSMatthew Dillon * just allocated we defer the abort operation until the related 17480a9eefcaSMatthew Dillon * message is processed. 17490a9eefcaSMatthew Dillon */ 17500a9eefcaSMatthew Dillon if (state->flags & DMSG_STATE_ABORTING) 17510a9eefcaSMatthew Dillon return; 17520a9eefcaSMatthew Dillon state->flags |= DMSG_STATE_ABORTING; 17530a9eefcaSMatthew Dillon dmsg_state_dying(state); 17540a9eefcaSMatthew Dillon if (state->flags & DMSG_STATE_NEW) { 17555ab1caedSMatthew Dillon dmio_printf(iocom, 4, 17565ab1caedSMatthew Dillon "dmsg_state_abort(0): state %p rxcmd %08x " 17575ab1caedSMatthew Dillon "txcmd %08x flags %08x - in NEW state\n", 17585ab1caedSMatthew Dillon state, state->rxcmd, 17595ab1caedSMatthew Dillon state->txcmd, state->flags); 17600a9eefcaSMatthew Dillon return; 1761a06d536bSMatthew Dillon } 1762323c0947SMatthew Dillon 1763323c0947SMatthew Dillon /* 17640a9eefcaSMatthew Dillon * Simulate parent state failure before child states. Device 17650a9eefcaSMatthew Dillon * drivers need to understand this and flag the situation but might 17660a9eefcaSMatthew Dillon * have asynchronous operations in progress that they cannot stop. 17670a9eefcaSMatthew Dillon * To make things easier, parent states will not actually disappear 17680a9eefcaSMatthew Dillon * until the children are all gone. 1769323c0947SMatthew Dillon */ 1770a06d536bSMatthew Dillon if ((state->rxcmd & DMSGF_DELETE) == 0) { 17715ab1caedSMatthew Dillon dmio_printf(iocom, 5, 17725ab1caedSMatthew Dillon "dmsg_state_abort() on state %p\n", 17735ab1caedSMatthew Dillon state); 17740a9eefcaSMatthew Dillon msg = dmsg_msg_alloc_locked(state, 0, DMSG_LNK_ERROR, 1775323c0947SMatthew Dillon NULL, NULL); 1776323c0947SMatthew Dillon if ((state->rxcmd & DMSGF_CREATE) == 0) 1777323c0947SMatthew Dillon msg->any.head.cmd |= DMSGF_CREATE; 17780a9eefcaSMatthew Dillon msg->any.head.cmd |= DMSGF_DELETE | 17790a9eefcaSMatthew Dillon (state->rxcmd & DMSGF_REPLY); 17800a9eefcaSMatthew Dillon msg->any.head.cmd ^= (DMSGF_REVTRANS | DMSGF_REVCIRC); 17810a9eefcaSMatthew Dillon msg->any.head.error = DMSG_ERR_LOSTLINK; 17820a9eefcaSMatthew Dillon msg->any.head.cmd |= DMSGF_ABORT; 17830a9eefcaSMatthew Dillon 17840a9eefcaSMatthew Dillon /* 17850a9eefcaSMatthew Dillon * Issue callback synchronously even though this isn't 17860a9eefcaSMatthew Dillon * the receiver thread. We need to issue the callback 17870a9eefcaSMatthew Dillon * before removing state from the subq in order to allow 17880a9eefcaSMatthew Dillon * the callback to reply. 17890a9eefcaSMatthew Dillon */ 17900a9eefcaSMatthew Dillon iocom = state->iocom; 17910a9eefcaSMatthew Dillon dmsg_state_msgrx(msg, 1); 17920a9eefcaSMatthew Dillon pthread_mutex_unlock(&iocom->mtx); 17930a9eefcaSMatthew Dillon iocom->rcvmsg_callback(msg); 17940a9eefcaSMatthew Dillon pthread_mutex_lock(&iocom->mtx); 17950a9eefcaSMatthew Dillon dmsg_state_cleanuprx(iocom, msg); 17960a9eefcaSMatthew Dillon #if 0 1797323c0947SMatthew Dillon TAILQ_INSERT_TAIL(&iocom->ioq_rx.msgq, msg, qentry); 1798323c0947SMatthew Dillon atomic_set_int(&iocom->flags, DMSG_IOCOMF_RWORK); 17990a9eefcaSMatthew Dillon #endif 18000a9eefcaSMatthew Dillon } 18010a9eefcaSMatthew Dillon } 18020a9eefcaSMatthew Dillon 18030a9eefcaSMatthew Dillon 18040a9eefcaSMatthew Dillon /* 18050a9eefcaSMatthew Dillon * Recursively sets DMSG_STATE_DYING on state and all sub-states, preventing 18060a9eefcaSMatthew Dillon * the transmission of any new messages on these states. This is done 18070a9eefcaSMatthew Dillon * atomically when parent state is terminating, whereas setting ABORTING is 18080a9eefcaSMatthew Dillon * not atomic and can leak races. 18090a9eefcaSMatthew Dillon */ 18100a9eefcaSMatthew Dillon static 18110a9eefcaSMatthew Dillon void 18120a9eefcaSMatthew Dillon dmsg_state_dying(dmsg_state_t *state) 18130a9eefcaSMatthew Dillon { 18140a9eefcaSMatthew Dillon dmsg_state_t *scan; 18150a9eefcaSMatthew Dillon 18160a9eefcaSMatthew Dillon if ((state->flags & DMSG_STATE_DYING) == 0) { 18170a9eefcaSMatthew Dillon state->flags |= DMSG_STATE_DYING; 18180a9eefcaSMatthew Dillon TAILQ_FOREACH(scan, &state->subq, entry) 18190a9eefcaSMatthew Dillon dmsg_state_dying(scan); 1820323c0947SMatthew Dillon } 1821323c0947SMatthew Dillon } 1822323c0947SMatthew Dillon 1823323c0947SMatthew Dillon /* 18240c3a8cd0SMatthew Dillon * This is a shortcut to formulate a reply to msg with a simple error code, 18250c3a8cd0SMatthew Dillon * It can reply to and terminate a transaction, or it can reply to a one-way 18260c3a8cd0SMatthew Dillon * messages. A DMSG_LNK_ERROR command code is utilized to encode 18270c3a8cd0SMatthew Dillon * the error code (which can be 0). Not all transactions are terminated 18280c3a8cd0SMatthew Dillon * with DMSG_LNK_ERROR status (the low level only cares about the 18290c3a8cd0SMatthew Dillon * MSGF_DELETE flag), but most are. 18300c3a8cd0SMatthew Dillon * 18310c3a8cd0SMatthew Dillon * Replies to one-way messages are a bit of an oxymoron but the feature 18320c3a8cd0SMatthew Dillon * is used by the debug (DBG) protocol. 18330c3a8cd0SMatthew Dillon * 18340c3a8cd0SMatthew Dillon * The reply contains no extended data. 18350c3a8cd0SMatthew Dillon */ 18360c3a8cd0SMatthew Dillon void 18370c3a8cd0SMatthew Dillon dmsg_msg_reply(dmsg_msg_t *msg, uint32_t error) 18380c3a8cd0SMatthew Dillon { 18390c3a8cd0SMatthew Dillon dmsg_state_t *state = msg->state; 18400c3a8cd0SMatthew Dillon dmsg_msg_t *nmsg; 18410c3a8cd0SMatthew Dillon uint32_t cmd; 18420c3a8cd0SMatthew Dillon 18430c3a8cd0SMatthew Dillon /* 18440c3a8cd0SMatthew Dillon * Reply with a simple error code and terminate the transaction. 18450c3a8cd0SMatthew Dillon */ 18460c3a8cd0SMatthew Dillon cmd = DMSG_LNK_ERROR; 18470c3a8cd0SMatthew Dillon 18480c3a8cd0SMatthew Dillon /* 18490c3a8cd0SMatthew Dillon * Check if our direction has even been initiated yet, set CREATE. 18500c3a8cd0SMatthew Dillon * 18510c3a8cd0SMatthew Dillon * Check what direction this is (command or reply direction). Note 18520c3a8cd0SMatthew Dillon * that txcmd might not have been initiated yet. 18530c3a8cd0SMatthew Dillon * 18540c3a8cd0SMatthew Dillon * If our direction has already been closed we just return without 18550c3a8cd0SMatthew Dillon * doing anything. 18560c3a8cd0SMatthew Dillon */ 1857d30cab67SMatthew Dillon if ((state->flags & DMSG_STATE_ROOT) == 0) { 18580c3a8cd0SMatthew Dillon if (state->txcmd & DMSGF_DELETE) 18590c3a8cd0SMatthew Dillon return; 18600c3a8cd0SMatthew Dillon if (state->txcmd & DMSGF_REPLY) 18610c3a8cd0SMatthew Dillon cmd |= DMSGF_REPLY; 18620c3a8cd0SMatthew Dillon cmd |= DMSGF_DELETE; 18630c3a8cd0SMatthew Dillon } else { 18640c3a8cd0SMatthew Dillon if ((msg->any.head.cmd & DMSGF_REPLY) == 0) 18650c3a8cd0SMatthew Dillon cmd |= DMSGF_REPLY; 18660c3a8cd0SMatthew Dillon } 18670c3a8cd0SMatthew Dillon 18680c3a8cd0SMatthew Dillon /* 18690c3a8cd0SMatthew Dillon * Allocate the message and associate it with the existing state. 18700d20ec8aSMatthew Dillon * We cannot pass DMSGF_CREATE to msg_alloc() because that may 18710c3a8cd0SMatthew Dillon * allocate new state. We have our state already. 18720c3a8cd0SMatthew Dillon */ 18731b8eded1SMatthew Dillon nmsg = dmsg_msg_alloc(state, 0, cmd, NULL, NULL); 1874d30cab67SMatthew Dillon if ((state->flags & DMSG_STATE_ROOT) == 0) { 18750c3a8cd0SMatthew Dillon if ((state->txcmd & DMSGF_CREATE) == 0) 18760c3a8cd0SMatthew Dillon nmsg->any.head.cmd |= DMSGF_CREATE; 18770c3a8cd0SMatthew Dillon } 18780c3a8cd0SMatthew Dillon nmsg->any.head.error = error; 18791b8eded1SMatthew Dillon 18800c3a8cd0SMatthew Dillon dmsg_msg_write(nmsg); 18810c3a8cd0SMatthew Dillon } 18820c3a8cd0SMatthew Dillon 18830c3a8cd0SMatthew Dillon /* 18840c3a8cd0SMatthew Dillon * Similar to dmsg_msg_reply() but leave the transaction open. That is, 18850c3a8cd0SMatthew Dillon * we are generating a streaming reply or an intermediate acknowledgement 18860c3a8cd0SMatthew Dillon * of some sort as part of the higher level protocol, with more to come 18870c3a8cd0SMatthew Dillon * later. 18880c3a8cd0SMatthew Dillon */ 18890c3a8cd0SMatthew Dillon void 18900c3a8cd0SMatthew Dillon dmsg_msg_result(dmsg_msg_t *msg, uint32_t error) 18910c3a8cd0SMatthew Dillon { 18920c3a8cd0SMatthew Dillon dmsg_state_t *state = msg->state; 18930c3a8cd0SMatthew Dillon dmsg_msg_t *nmsg; 18940c3a8cd0SMatthew Dillon uint32_t cmd; 18950c3a8cd0SMatthew Dillon 18960c3a8cd0SMatthew Dillon 18970c3a8cd0SMatthew Dillon /* 18980c3a8cd0SMatthew Dillon * Reply with a simple error code and terminate the transaction. 18990c3a8cd0SMatthew Dillon */ 19000c3a8cd0SMatthew Dillon cmd = DMSG_LNK_ERROR; 19010c3a8cd0SMatthew Dillon 19020c3a8cd0SMatthew Dillon /* 19030c3a8cd0SMatthew Dillon * Check if our direction has even been initiated yet, set CREATE. 19040c3a8cd0SMatthew Dillon * 19050c3a8cd0SMatthew Dillon * Check what direction this is (command or reply direction). Note 19060c3a8cd0SMatthew Dillon * that txcmd might not have been initiated yet. 19070c3a8cd0SMatthew Dillon * 19080c3a8cd0SMatthew Dillon * If our direction has already been closed we just return without 19090c3a8cd0SMatthew Dillon * doing anything. 19100c3a8cd0SMatthew Dillon */ 1911d30cab67SMatthew Dillon if ((state->flags & DMSG_STATE_ROOT) == 0) { 19120c3a8cd0SMatthew Dillon if (state->txcmd & DMSGF_DELETE) 19130c3a8cd0SMatthew Dillon return; 19140c3a8cd0SMatthew Dillon if (state->txcmd & DMSGF_REPLY) 19150c3a8cd0SMatthew Dillon cmd |= DMSGF_REPLY; 19160c3a8cd0SMatthew Dillon /* continuing transaction, do not set MSGF_DELETE */ 19170c3a8cd0SMatthew Dillon } else { 19180c3a8cd0SMatthew Dillon if ((msg->any.head.cmd & DMSGF_REPLY) == 0) 19190c3a8cd0SMatthew Dillon cmd |= DMSGF_REPLY; 19200c3a8cd0SMatthew Dillon } 19211b8eded1SMatthew Dillon nmsg = dmsg_msg_alloc(state, 0, cmd, NULL, NULL); 1922d30cab67SMatthew Dillon if ((state->flags & DMSG_STATE_ROOT) == 0) { 19230c3a8cd0SMatthew Dillon if ((state->txcmd & DMSGF_CREATE) == 0) 19240c3a8cd0SMatthew Dillon nmsg->any.head.cmd |= DMSGF_CREATE; 19250c3a8cd0SMatthew Dillon } 19260c3a8cd0SMatthew Dillon nmsg->any.head.error = error; 19271b8eded1SMatthew Dillon 19280c3a8cd0SMatthew Dillon dmsg_msg_write(nmsg); 19290c3a8cd0SMatthew Dillon } 19300c3a8cd0SMatthew Dillon 19310c3a8cd0SMatthew Dillon /* 19320c3a8cd0SMatthew Dillon * Terminate a transaction given a state structure by issuing a DELETE. 19331b8eded1SMatthew Dillon * (the state structure must not be &iocom->state0) 19340c3a8cd0SMatthew Dillon */ 19350c3a8cd0SMatthew Dillon void 19360c3a8cd0SMatthew Dillon dmsg_state_reply(dmsg_state_t *state, uint32_t error) 19370c3a8cd0SMatthew Dillon { 19380c3a8cd0SMatthew Dillon dmsg_msg_t *nmsg; 19390c3a8cd0SMatthew Dillon uint32_t cmd = DMSG_LNK_ERROR | DMSGF_DELETE; 19400c3a8cd0SMatthew Dillon 19410c3a8cd0SMatthew Dillon /* 19420c3a8cd0SMatthew Dillon * Nothing to do if we already transmitted a delete 19430c3a8cd0SMatthew Dillon */ 19440c3a8cd0SMatthew Dillon if (state->txcmd & DMSGF_DELETE) 19450c3a8cd0SMatthew Dillon return; 19460c3a8cd0SMatthew Dillon 19470c3a8cd0SMatthew Dillon /* 19480c3a8cd0SMatthew Dillon * Set REPLY if the other end initiated the command. Otherwise 19490c3a8cd0SMatthew Dillon * we are the command direction. 19500c3a8cd0SMatthew Dillon */ 19510c3a8cd0SMatthew Dillon if (state->txcmd & DMSGF_REPLY) 19520c3a8cd0SMatthew Dillon cmd |= DMSGF_REPLY; 19530c3a8cd0SMatthew Dillon 19541b8eded1SMatthew Dillon nmsg = dmsg_msg_alloc(state, 0, cmd, NULL, NULL); 1955d30cab67SMatthew Dillon if ((state->flags & DMSG_STATE_ROOT) == 0) { 19560c3a8cd0SMatthew Dillon if ((state->txcmd & DMSGF_CREATE) == 0) 19570c3a8cd0SMatthew Dillon nmsg->any.head.cmd |= DMSGF_CREATE; 19580c3a8cd0SMatthew Dillon } 19590c3a8cd0SMatthew Dillon nmsg->any.head.error = error; 19600d20ec8aSMatthew Dillon dmsg_msg_write(nmsg); 19610d20ec8aSMatthew Dillon } 19620d20ec8aSMatthew Dillon 19630d20ec8aSMatthew Dillon /* 19640d20ec8aSMatthew Dillon * Terminate a transaction given a state structure by issuing a DELETE. 19651b8eded1SMatthew Dillon * (the state structure must not be &iocom->state0) 19660d20ec8aSMatthew Dillon */ 19670d20ec8aSMatthew Dillon void 19680d20ec8aSMatthew Dillon dmsg_state_result(dmsg_state_t *state, uint32_t error) 19690d20ec8aSMatthew Dillon { 19700d20ec8aSMatthew Dillon dmsg_msg_t *nmsg; 19710d20ec8aSMatthew Dillon uint32_t cmd = DMSG_LNK_ERROR; 19720d20ec8aSMatthew Dillon 19730d20ec8aSMatthew Dillon /* 19740d20ec8aSMatthew Dillon * Nothing to do if we already transmitted a delete 19750d20ec8aSMatthew Dillon */ 19760d20ec8aSMatthew Dillon if (state->txcmd & DMSGF_DELETE) 19770d20ec8aSMatthew Dillon return; 19780d20ec8aSMatthew Dillon 19790d20ec8aSMatthew Dillon /* 19800d20ec8aSMatthew Dillon * Set REPLY if the other end initiated the command. Otherwise 19810d20ec8aSMatthew Dillon * we are the command direction. 19820d20ec8aSMatthew Dillon */ 19830d20ec8aSMatthew Dillon if (state->txcmd & DMSGF_REPLY) 19840d20ec8aSMatthew Dillon cmd |= DMSGF_REPLY; 19850d20ec8aSMatthew Dillon 19861b8eded1SMatthew Dillon nmsg = dmsg_msg_alloc(state, 0, cmd, NULL, NULL); 1987d30cab67SMatthew Dillon if ((state->flags & DMSG_STATE_ROOT) == 0) { 19880d20ec8aSMatthew Dillon if ((state->txcmd & DMSGF_CREATE) == 0) 19890d20ec8aSMatthew Dillon nmsg->any.head.cmd |= DMSGF_CREATE; 19900d20ec8aSMatthew Dillon } 19910d20ec8aSMatthew Dillon nmsg->any.head.error = error; 19920c3a8cd0SMatthew Dillon dmsg_msg_write(nmsg); 19930c3a8cd0SMatthew Dillon } 19940c3a8cd0SMatthew Dillon 19950c3a8cd0SMatthew Dillon /************************************************************************ 19960c3a8cd0SMatthew Dillon * TRANSACTION STATE HANDLING * 19970c3a8cd0SMatthew Dillon ************************************************************************ 19980c3a8cd0SMatthew Dillon * 19990c3a8cd0SMatthew Dillon */ 20000c3a8cd0SMatthew Dillon 20010c3a8cd0SMatthew Dillon /* 2002d30cab67SMatthew Dillon * Process state tracking for a message after reception, prior to execution. 2003d30cab67SMatthew Dillon * Possibly route the message (consuming it). 20040c3a8cd0SMatthew Dillon * 20050c3a8cd0SMatthew Dillon * Called with msglk held and the msg dequeued. 20060c3a8cd0SMatthew Dillon * 20070c3a8cd0SMatthew Dillon * All messages are called with dummy state and return actual state. 20080c3a8cd0SMatthew Dillon * (One-off messages often just return the same dummy state). 20090c3a8cd0SMatthew Dillon * 20100c3a8cd0SMatthew Dillon * May request that caller discard the message by setting *discardp to 1. 20110c3a8cd0SMatthew Dillon * The returned state is not used in this case and is allowed to be NULL. 20120c3a8cd0SMatthew Dillon * 20130c3a8cd0SMatthew Dillon * -- 20140c3a8cd0SMatthew Dillon * 20150c3a8cd0SMatthew Dillon * These routines handle persistent and command/reply message state via the 20160c3a8cd0SMatthew Dillon * CREATE and DELETE flags. The first message in a command or reply sequence 20170c3a8cd0SMatthew Dillon * sets CREATE, the last message in a command or reply sequence sets DELETE. 20180c3a8cd0SMatthew Dillon * 20190c3a8cd0SMatthew Dillon * There can be any number of intermediate messages belonging to the same 20200c3a8cd0SMatthew Dillon * sequence sent inbetween the CREATE message and the DELETE message, 20210c3a8cd0SMatthew Dillon * which set neither flag. This represents a streaming command or reply. 20220c3a8cd0SMatthew Dillon * 20230c3a8cd0SMatthew Dillon * Any command message received with CREATE set expects a reply sequence to 20240c3a8cd0SMatthew Dillon * be returned. Reply sequences work the same as command sequences except the 20250c3a8cd0SMatthew Dillon * REPLY bit is also sent. Both the command side and reply side can 20260c3a8cd0SMatthew Dillon * degenerate into a single message with both CREATE and DELETE set. Note 20270c3a8cd0SMatthew Dillon * that one side can be streaming and the other side not, or neither, or both. 20280c3a8cd0SMatthew Dillon * 20290c3a8cd0SMatthew Dillon * The msgid is unique for the initiator. That is, two sides sending a new 20300c3a8cd0SMatthew Dillon * message can use the same msgid without colliding. 20310c3a8cd0SMatthew Dillon * 20320c3a8cd0SMatthew Dillon * -- 20330c3a8cd0SMatthew Dillon * 2034a06d536bSMatthew Dillon * The message may be running over a circuit. If the circuit is half-deleted 2035a06d536bSMatthew Dillon * The message is typically racing against a link failure and must be thrown 2036a06d536bSMatthew Dillon * out. As the circuit deletion propagates the library will automatically 2037a06d536bSMatthew Dillon * generate terminations for sub states. 2038a06d536bSMatthew Dillon * 2039a06d536bSMatthew Dillon * -- 2040a06d536bSMatthew Dillon * 20410c3a8cd0SMatthew Dillon * ABORT sequences work by setting the ABORT flag along with normal message 20420c3a8cd0SMatthew Dillon * state. However, ABORTs can also be sent on half-closed messages, that is 20430c3a8cd0SMatthew Dillon * even if the command or reply side has already sent a DELETE, as long as 20440c3a8cd0SMatthew Dillon * the message has not been fully closed it can still send an ABORT+DELETE 20450c3a8cd0SMatthew Dillon * to terminate the half-closed message state. 20460c3a8cd0SMatthew Dillon * 20470c3a8cd0SMatthew Dillon * Since ABORT+DELETEs can race we silently discard ABORT's for message 20480c3a8cd0SMatthew Dillon * state which has already been fully closed. REPLY+ABORT+DELETEs can 20490c3a8cd0SMatthew Dillon * also race, and in this situation the other side might have already 20500c3a8cd0SMatthew Dillon * initiated a new unrelated command with the same message id. Since 20510c3a8cd0SMatthew Dillon * the abort has not set the CREATE flag the situation can be detected 20520c3a8cd0SMatthew Dillon * and the message will also be discarded. 20530c3a8cd0SMatthew Dillon * 20540c3a8cd0SMatthew Dillon * Non-blocking requests can be initiated with ABORT+CREATE[+DELETE]. 20550c3a8cd0SMatthew Dillon * The ABORT request is essentially integrated into the command instead 20560c3a8cd0SMatthew Dillon * of being sent later on. In this situation the command implementation 20570c3a8cd0SMatthew Dillon * detects that CREATE and ABORT are both set (vs ABORT alone) and can 20580c3a8cd0SMatthew Dillon * special-case non-blocking operation for the command. 20590c3a8cd0SMatthew Dillon * 20600c3a8cd0SMatthew Dillon * NOTE! Messages with ABORT set without CREATE or DELETE are considered 20610c3a8cd0SMatthew Dillon * to be mid-stream aborts for command/reply sequences. ABORTs on 20620c3a8cd0SMatthew Dillon * one-way messages are not supported. 20630c3a8cd0SMatthew Dillon * 20640c3a8cd0SMatthew Dillon * NOTE! If a command sequence does not support aborts the ABORT flag is 20650c3a8cd0SMatthew Dillon * simply ignored. 20660c3a8cd0SMatthew Dillon * 20670c3a8cd0SMatthew Dillon * -- 20680c3a8cd0SMatthew Dillon * 2069d30cab67SMatthew Dillon * One-off messages (no reply expected) are sent without an established 2070d30cab67SMatthew Dillon * transaction. CREATE and DELETE are left clear and the msgid is usually 0. 2071d30cab67SMatthew Dillon * For one-off messages sent over circuits msgid generally MUST be 0. 2072d30cab67SMatthew Dillon * 2073d30cab67SMatthew Dillon * One-off messages cannot be aborted and typically aren't processed 2074d30cab67SMatthew Dillon * by these routines. Order is still guaranteed for messages sent over 2075d30cab67SMatthew Dillon * the same circuit. The REPLY bit can be used to distinguish whether 2076d30cab67SMatthew Dillon * a one-off message is a command or reply. For example, one-off replies 20770c3a8cd0SMatthew Dillon * will typically just contain status updates. 20780c3a8cd0SMatthew Dillon */ 20790c3a8cd0SMatthew Dillon static int 20800a9eefcaSMatthew Dillon dmsg_state_msgrx(dmsg_msg_t *msg, int mstate) 20810c3a8cd0SMatthew Dillon { 20821b8eded1SMatthew Dillon dmsg_iocom_t *iocom = msg->state->iocom; 20830c3a8cd0SMatthew Dillon dmsg_state_t *state; 20841b8eded1SMatthew Dillon dmsg_state_t *pstate; 20850d20ec8aSMatthew Dillon dmsg_state_t sdummy; 20860c3a8cd0SMatthew Dillon int error; 20870c3a8cd0SMatthew Dillon 20880d20ec8aSMatthew Dillon pthread_mutex_lock(&iocom->mtx); 20890d20ec8aSMatthew Dillon 20900a9eefcaSMatthew Dillon if (DMsgDebugOpt) { 20915ab1caedSMatthew Dillon dmio_printf(iocom, 5, 20920a9eefcaSMatthew Dillon "msgrx: cmd=%08x msgid=%016jx " 20930a9eefcaSMatthew Dillon "circuit=%016jx error=%d\n", 20940a9eefcaSMatthew Dillon msg->any.head.cmd, 20950a9eefcaSMatthew Dillon msg->any.head.msgid, 20960a9eefcaSMatthew Dillon msg->any.head.circuit, 20970a9eefcaSMatthew Dillon msg->any.head.error); 20980a9eefcaSMatthew Dillon } 20990a9eefcaSMatthew Dillon 21000c3a8cd0SMatthew Dillon /* 2101d30cab67SMatthew Dillon * Lookup the circuit (pstate). The circuit will be an open 2102d30cab67SMatthew Dillon * transaction. The REVCIRC bit in the message tells us which side 2103d30cab67SMatthew Dillon * initiated it. 21040a9eefcaSMatthew Dillon * 21050a9eefcaSMatthew Dillon * If mstate is non-zero the state has already been incorporated 21060a9eefcaSMatthew Dillon * into the message as part of a simulated abort. Note that in this 21070a9eefcaSMatthew Dillon * situation the parent state may have already been removed from 21080a9eefcaSMatthew Dillon * the RBTREE. 21091b8eded1SMatthew Dillon */ 21100a9eefcaSMatthew Dillon if (mstate) { 21110a9eefcaSMatthew Dillon pstate = msg->state->parent; 21120a9eefcaSMatthew Dillon } else if (msg->any.head.circuit) { 21131b8eded1SMatthew Dillon sdummy.msgid = msg->any.head.circuit; 21141b8eded1SMatthew Dillon 21151b8eded1SMatthew Dillon if (msg->any.head.cmd & DMSGF_REVCIRC) { 21161b8eded1SMatthew Dillon pstate = RB_FIND(dmsg_state_tree, 21171b8eded1SMatthew Dillon &iocom->statewr_tree, 21181b8eded1SMatthew Dillon &sdummy); 21191b8eded1SMatthew Dillon } else { 21201b8eded1SMatthew Dillon pstate = RB_FIND(dmsg_state_tree, 21211b8eded1SMatthew Dillon &iocom->staterd_tree, 21221b8eded1SMatthew Dillon &sdummy); 21231b8eded1SMatthew Dillon } 21240a9eefcaSMatthew Dillon 21250a9eefcaSMatthew Dillon /* 21260a9eefcaSMatthew Dillon * If we cannot find the circuit throw the message away. 21270a9eefcaSMatthew Dillon * The state will have already been taken care of by 21280a9eefcaSMatthew Dillon * the simulated failure code. This case can occur due 21290a9eefcaSMatthew Dillon * to a failure propagating in one direction crossing a 21300a9eefcaSMatthew Dillon * request on the failed circuit propagating in the other 21310a9eefcaSMatthew Dillon * direction. 21320a9eefcaSMatthew Dillon */ 21331b8eded1SMatthew Dillon if (pstate == NULL) { 21345ab1caedSMatthew Dillon dmio_printf(iocom, 4, 21351b8eded1SMatthew Dillon "missing parent in stacked trans %s\n", 21361b8eded1SMatthew Dillon dmsg_msg_str(msg)); 21371b8eded1SMatthew Dillon pthread_mutex_unlock(&iocom->mtx); 21380a9eefcaSMatthew Dillon error = DMSG_IOQ_ERROR_EALREADY; 21390a9eefcaSMatthew Dillon 21400a9eefcaSMatthew Dillon return error; 21411b8eded1SMatthew Dillon } 21421b8eded1SMatthew Dillon } else { 21431b8eded1SMatthew Dillon pstate = &iocom->state0; 21441b8eded1SMatthew Dillon } 21450a9eefcaSMatthew Dillon /* WARNING: pstate not (yet) refd */ 21461b8eded1SMatthew Dillon 21471b8eded1SMatthew Dillon /* 2148d30cab67SMatthew Dillon * Lookup the msgid. 2149d30cab67SMatthew Dillon * 21500a9eefcaSMatthew Dillon * If mstate is non-zero the state has already been incorporated 21510a9eefcaSMatthew Dillon * into the message as part of a simulated abort. Note that in this 21520a9eefcaSMatthew Dillon * situation the state may have already been removed from the RBTREE. 21530a9eefcaSMatthew Dillon * 2154d30cab67SMatthew Dillon * If received msg is a command state is on staterd_tree. 2155d30cab67SMatthew Dillon * If received msg is a reply state is on statewr_tree. 2156d30cab67SMatthew Dillon * Otherwise there is no state (retain &iocom->state0) 2157d30cab67SMatthew Dillon */ 21580a9eefcaSMatthew Dillon if (mstate) { 21590a9eefcaSMatthew Dillon state = msg->state; 21600a9eefcaSMatthew Dillon } else { 2161d30cab67SMatthew Dillon sdummy.msgid = msg->any.head.msgid; 21620a9eefcaSMatthew Dillon if (msg->any.head.cmd & DMSGF_REVTRANS) { 21630a9eefcaSMatthew Dillon state = RB_FIND(dmsg_state_tree, 21640a9eefcaSMatthew Dillon &iocom->statewr_tree, &sdummy); 21650a9eefcaSMatthew Dillon } else { 21660a9eefcaSMatthew Dillon state = RB_FIND(dmsg_state_tree, 21670a9eefcaSMatthew Dillon &iocom->staterd_tree, &sdummy); 21680a9eefcaSMatthew Dillon } 21690a9eefcaSMatthew Dillon } 2170d30cab67SMatthew Dillon 21710a9eefcaSMatthew Dillon if (DMsgDebugOpt) { 21725ab1caedSMatthew Dillon dmio_printf(iocom, 5, "msgrx:\tstate %p(%08x)", 21730a9eefcaSMatthew Dillon state, (state ? state->icmd : 0)); 21740a9eefcaSMatthew Dillon if (pstate != &iocom->state0) { 21755ab1caedSMatthew Dillon dmio_printf(iocom, 5, 21760a9eefcaSMatthew Dillon " pstate %p(%08x)", 21770a9eefcaSMatthew Dillon pstate, pstate->icmd); 21780a9eefcaSMatthew Dillon } 21795ab1caedSMatthew Dillon dmio_printf(iocom, 5, "%s\n", ""); 21800a9eefcaSMatthew Dillon } 21810a9eefcaSMatthew Dillon 21820a9eefcaSMatthew Dillon if (mstate) { 21830a9eefcaSMatthew Dillon /* state already assigned to msg */ 21840a9eefcaSMatthew Dillon } else if (state) { 2185d30cab67SMatthew Dillon /* 2186d30cab67SMatthew Dillon * Message over an existing transaction (CREATE should not 2187d30cab67SMatthew Dillon * be set). 2188d30cab67SMatthew Dillon */ 21890a9eefcaSMatthew Dillon dmsg_state_drop(msg->state); 21900a9eefcaSMatthew Dillon dmsg_state_hold(state); 2191d30cab67SMatthew Dillon msg->state = state; 2192d30cab67SMatthew Dillon assert(pstate == state->parent); 2193d30cab67SMatthew Dillon } else { 2194d30cab67SMatthew Dillon /* 2195d30cab67SMatthew Dillon * Either a new transaction (if CREATE set) or a one-off. 2196d30cab67SMatthew Dillon */ 2197d30cab67SMatthew Dillon state = pstate; 2198d30cab67SMatthew Dillon } 2199d30cab67SMatthew Dillon 2200d30cab67SMatthew Dillon /* 2201d30cab67SMatthew Dillon * Switch on CREATE, DELETE, REPLY, and also handle ABORT from 2202d30cab67SMatthew Dillon * inside the case statements. 2203d30cab67SMatthew Dillon * 2204d30cab67SMatthew Dillon * Construct new state as necessary. 2205d30cab67SMatthew Dillon */ 2206d30cab67SMatthew Dillon switch(msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE | 2207d30cab67SMatthew Dillon DMSGF_REPLY)) { 2208d30cab67SMatthew Dillon case DMSGF_CREATE: 2209d30cab67SMatthew Dillon case DMSGF_CREATE | DMSGF_DELETE: 2210d30cab67SMatthew Dillon /* 2211d30cab67SMatthew Dillon * Create new sub-transaction under pstate. 2212d30cab67SMatthew Dillon * (any DELETE is handled in post-processing of msg). 2213d30cab67SMatthew Dillon * 2214d30cab67SMatthew Dillon * (During routing the msgid was made unique for this 2215d30cab67SMatthew Dillon * direction over the comlink, so our RB trees can be 2216d30cab67SMatthew Dillon * iocom-based instead of state-based). 2217d30cab67SMatthew Dillon */ 2218d30cab67SMatthew Dillon if (state != pstate) { 22195ab1caedSMatthew Dillon dmio_printf(iocom, 2, 2220d30cab67SMatthew Dillon "duplicate transaction %s\n", 2221d30cab67SMatthew Dillon dmsg_msg_str(msg)); 2222d30cab67SMatthew Dillon error = DMSG_IOQ_ERROR_TRANS; 2223d30cab67SMatthew Dillon assert(0); 2224d30cab67SMatthew Dillon break; 2225d30cab67SMatthew Dillon } 2226d30cab67SMatthew Dillon 2227d30cab67SMatthew Dillon /* 2228d30cab67SMatthew Dillon * Allocate the new state. 22291b8eded1SMatthew Dillon */ 22300c3a8cd0SMatthew Dillon state = malloc(sizeof(*state)); 22310c3a8cd0SMatthew Dillon bzero(state, sizeof(*state)); 22320a9eefcaSMatthew Dillon atomic_add_int(&dmsg_state_count, 1); 22330a9eefcaSMatthew Dillon 22341b8eded1SMatthew Dillon TAILQ_INIT(&state->subq); 2235323c0947SMatthew Dillon dmsg_state_hold(pstate); 22361b8eded1SMatthew Dillon state->parent = pstate; 22370c3a8cd0SMatthew Dillon state->iocom = iocom; 22381b8eded1SMatthew Dillon state->flags = DMSG_STATE_DYNAMIC | 2239d30cab67SMatthew Dillon DMSG_STATE_OPPOSITE; 22401b8eded1SMatthew Dillon state->msgid = msg->any.head.msgid; 22410c3a8cd0SMatthew Dillon state->txcmd = DMSGF_REPLY; 22420c3a8cd0SMatthew Dillon state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE; 22430d20ec8aSMatthew Dillon state->icmd = state->rxcmd & DMSGF_BASECMDMASK; 22440a9eefcaSMatthew Dillon state->flags &= ~DMSG_STATE_NEW; 22450c3a8cd0SMatthew Dillon msg->state = state; 22460a9eefcaSMatthew Dillon 22471b8eded1SMatthew Dillon RB_INSERT(dmsg_state_tree, &iocom->staterd_tree, state); 22480a9eefcaSMatthew Dillon if (TAILQ_EMPTY(&pstate->subq)) 22490a9eefcaSMatthew Dillon dmsg_state_hold(pstate);/* pstate->subq */ 22501b8eded1SMatthew Dillon TAILQ_INSERT_TAIL(&pstate->subq, state, entry); 2251a06d536bSMatthew Dillon state->flags |= DMSG_STATE_SUBINSERTED | 2252a06d536bSMatthew Dillon DMSG_STATE_RBINSERTED; 22530a9eefcaSMatthew Dillon dmsg_state_hold(state); /* pstate->subq */ 22540a9eefcaSMatthew Dillon dmsg_state_hold(state); /* state on rbtree */ 22550a9eefcaSMatthew Dillon dmsg_state_hold(state); /* msg->state */ 2256d30cab67SMatthew Dillon 2257d30cab67SMatthew Dillon /* 2258d30cab67SMatthew Dillon * If the parent is a relay set up the state handler to 2259d30cab67SMatthew Dillon * automatically route the message. Local processing will 2260d30cab67SMatthew Dillon * not occur if set. 2261d30cab67SMatthew Dillon * 2262d30cab67SMatthew Dillon * (state relays are seeded by SPAN processing) 2263d30cab67SMatthew Dillon */ 2264d30cab67SMatthew Dillon if (pstate->relay) 2265d30cab67SMatthew Dillon state->func = dmsg_state_relay; 22660c3a8cd0SMatthew Dillon error = 0; 22670c3a8cd0SMatthew Dillon break; 22680c3a8cd0SMatthew Dillon case DMSGF_DELETE: 22690c3a8cd0SMatthew Dillon /* 22700c3a8cd0SMatthew Dillon * Persistent state is expected but might not exist if an 22710c3a8cd0SMatthew Dillon * ABORT+DELETE races the close. 2272d30cab67SMatthew Dillon * 2273d30cab67SMatthew Dillon * (any DELETE is handled in post-processing of msg). 22740c3a8cd0SMatthew Dillon */ 2275d30cab67SMatthew Dillon if (state == pstate) { 22760c3a8cd0SMatthew Dillon if (msg->any.head.cmd & DMSGF_ABORT) { 22770c3a8cd0SMatthew Dillon error = DMSG_IOQ_ERROR_EALREADY; 22780c3a8cd0SMatthew Dillon } else { 22795ab1caedSMatthew Dillon dmio_printf(iocom, 2, 22805ab1caedSMatthew Dillon "missing-state %s\n", 22810c3a8cd0SMatthew Dillon dmsg_msg_str(msg)); 22820c3a8cd0SMatthew Dillon error = DMSG_IOQ_ERROR_TRANS; 22830c3a8cd0SMatthew Dillon assert(0); 22840c3a8cd0SMatthew Dillon } 22850c3a8cd0SMatthew Dillon break; 22860c3a8cd0SMatthew Dillon } 22870c3a8cd0SMatthew Dillon 22880c3a8cd0SMatthew Dillon /* 22890c3a8cd0SMatthew Dillon * Handle another ABORT+DELETE case if the msgid has already 22900c3a8cd0SMatthew Dillon * been reused. 22910c3a8cd0SMatthew Dillon */ 22920c3a8cd0SMatthew Dillon if ((state->rxcmd & DMSGF_CREATE) == 0) { 22930c3a8cd0SMatthew Dillon if (msg->any.head.cmd & DMSGF_ABORT) { 22940c3a8cd0SMatthew Dillon error = DMSG_IOQ_ERROR_EALREADY; 22950c3a8cd0SMatthew Dillon } else { 22965ab1caedSMatthew Dillon dmio_printf(iocom, 2, 22975ab1caedSMatthew Dillon "reused-state %s\n", 22980c3a8cd0SMatthew Dillon dmsg_msg_str(msg)); 22990c3a8cd0SMatthew Dillon error = DMSG_IOQ_ERROR_TRANS; 23000c3a8cd0SMatthew Dillon assert(0); 23010c3a8cd0SMatthew Dillon } 23020c3a8cd0SMatthew Dillon break; 23030c3a8cd0SMatthew Dillon } 23040c3a8cd0SMatthew Dillon error = 0; 23050c3a8cd0SMatthew Dillon break; 23060c3a8cd0SMatthew Dillon default: 23070c3a8cd0SMatthew Dillon /* 23080c3a8cd0SMatthew Dillon * Check for mid-stream ABORT command received, otherwise 23090c3a8cd0SMatthew Dillon * allow. 23100c3a8cd0SMatthew Dillon */ 23110c3a8cd0SMatthew Dillon if (msg->any.head.cmd & DMSGF_ABORT) { 2312d30cab67SMatthew Dillon if ((state == pstate) || 23130c3a8cd0SMatthew Dillon (state->rxcmd & DMSGF_CREATE) == 0) { 23140c3a8cd0SMatthew Dillon error = DMSG_IOQ_ERROR_EALREADY; 23150c3a8cd0SMatthew Dillon break; 23160c3a8cd0SMatthew Dillon } 23170c3a8cd0SMatthew Dillon } 23180c3a8cd0SMatthew Dillon error = 0; 23190c3a8cd0SMatthew Dillon break; 23200c3a8cd0SMatthew Dillon case DMSGF_REPLY | DMSGF_CREATE: 23210c3a8cd0SMatthew Dillon case DMSGF_REPLY | DMSGF_CREATE | DMSGF_DELETE: 23220c3a8cd0SMatthew Dillon /* 23230c3a8cd0SMatthew Dillon * When receiving a reply with CREATE set the original 23240c3a8cd0SMatthew Dillon * persistent state message should already exist. 23250c3a8cd0SMatthew Dillon */ 2326d30cab67SMatthew Dillon if (state == pstate) { 23275ab1caedSMatthew Dillon dmio_printf(iocom, 2, "no-state(r) %s\n", 23280c3a8cd0SMatthew Dillon dmsg_msg_str(msg)); 23290c3a8cd0SMatthew Dillon error = DMSG_IOQ_ERROR_TRANS; 23300c3a8cd0SMatthew Dillon assert(0); 23310c3a8cd0SMatthew Dillon break; 23320c3a8cd0SMatthew Dillon } 2333d30cab67SMatthew Dillon assert(((state->rxcmd ^ msg->any.head.cmd) & DMSGF_REPLY) == 0); 23340c3a8cd0SMatthew Dillon state->rxcmd = msg->any.head.cmd & ~DMSGF_DELETE; 23350c3a8cd0SMatthew Dillon error = 0; 23360c3a8cd0SMatthew Dillon break; 23370c3a8cd0SMatthew Dillon case DMSGF_REPLY | DMSGF_DELETE: 23380c3a8cd0SMatthew Dillon /* 23390c3a8cd0SMatthew Dillon * Received REPLY+ABORT+DELETE in case where msgid has 23400c3a8cd0SMatthew Dillon * already been fully closed, ignore the message. 23410c3a8cd0SMatthew Dillon */ 2342d30cab67SMatthew Dillon if (state == pstate) { 23430c3a8cd0SMatthew Dillon if (msg->any.head.cmd & DMSGF_ABORT) { 23440c3a8cd0SMatthew Dillon error = DMSG_IOQ_ERROR_EALREADY; 23450c3a8cd0SMatthew Dillon } else { 23465ab1caedSMatthew Dillon dmio_printf(iocom, 2, 23475ab1caedSMatthew Dillon "no-state(r,d) %s\n", 23480c3a8cd0SMatthew Dillon dmsg_msg_str(msg)); 23490c3a8cd0SMatthew Dillon error = DMSG_IOQ_ERROR_TRANS; 23500c3a8cd0SMatthew Dillon assert(0); 23510c3a8cd0SMatthew Dillon } 23520c3a8cd0SMatthew Dillon break; 23530c3a8cd0SMatthew Dillon } 23540c3a8cd0SMatthew Dillon 23550c3a8cd0SMatthew Dillon /* 23560c3a8cd0SMatthew Dillon * Received REPLY+ABORT+DELETE in case where msgid has 23570c3a8cd0SMatthew Dillon * already been reused for an unrelated message, 23580c3a8cd0SMatthew Dillon * ignore the message. 23590c3a8cd0SMatthew Dillon */ 23600c3a8cd0SMatthew Dillon if ((state->rxcmd & DMSGF_CREATE) == 0) { 23610c3a8cd0SMatthew Dillon if (msg->any.head.cmd & DMSGF_ABORT) { 23620c3a8cd0SMatthew Dillon error = DMSG_IOQ_ERROR_EALREADY; 23630c3a8cd0SMatthew Dillon } else { 23645ab1caedSMatthew Dillon dmio_printf(iocom, 2, 23655ab1caedSMatthew Dillon "reused-state(r,d) %s\n", 23660c3a8cd0SMatthew Dillon dmsg_msg_str(msg)); 23670c3a8cd0SMatthew Dillon error = DMSG_IOQ_ERROR_TRANS; 23680c3a8cd0SMatthew Dillon assert(0); 23690c3a8cd0SMatthew Dillon } 23700c3a8cd0SMatthew Dillon break; 23710c3a8cd0SMatthew Dillon } 23720c3a8cd0SMatthew Dillon error = 0; 23730c3a8cd0SMatthew Dillon break; 23740c3a8cd0SMatthew Dillon case DMSGF_REPLY: 23750c3a8cd0SMatthew Dillon /* 23760c3a8cd0SMatthew Dillon * Check for mid-stream ABORT reply received to sent command. 23770c3a8cd0SMatthew Dillon */ 23780c3a8cd0SMatthew Dillon if (msg->any.head.cmd & DMSGF_ABORT) { 2379d30cab67SMatthew Dillon if (state == pstate || 23800c3a8cd0SMatthew Dillon (state->rxcmd & DMSGF_CREATE) == 0) { 23810c3a8cd0SMatthew Dillon error = DMSG_IOQ_ERROR_EALREADY; 23820c3a8cd0SMatthew Dillon break; 23830c3a8cd0SMatthew Dillon } 23840c3a8cd0SMatthew Dillon } 23850c3a8cd0SMatthew Dillon error = 0; 23860c3a8cd0SMatthew Dillon break; 23870c3a8cd0SMatthew Dillon } 23888e226bc8SMatthew Dillon 23898e226bc8SMatthew Dillon /* 23908e226bc8SMatthew Dillon * Calculate the easy-switch() transactional command. Represents 23918e226bc8SMatthew Dillon * the outer-transaction command for any transaction-create or 23928e226bc8SMatthew Dillon * transaction-delete, and the inner message command for any 23938e226bc8SMatthew Dillon * non-transaction or inside-transaction command. tcmd will be 23948e226bc8SMatthew Dillon * set to 0 for any messaging error condition. 23958e226bc8SMatthew Dillon * 23968e226bc8SMatthew Dillon * The two can be told apart because outer-transaction commands 23978e226bc8SMatthew Dillon * always have a DMSGF_CREATE and/or DMSGF_DELETE flag. 23988e226bc8SMatthew Dillon */ 23998e226bc8SMatthew Dillon if (msg->any.head.cmd & (DMSGF_CREATE | DMSGF_DELETE)) { 24007adbba57SMatthew Dillon if ((msg->state->flags & DMSG_STATE_ROOT) == 0) { 24010a9eefcaSMatthew Dillon msg->tcmd = (state->icmd & DMSGF_BASECMDMASK) | 24028e226bc8SMatthew Dillon (msg->any.head.cmd & (DMSGF_CREATE | 24038e226bc8SMatthew Dillon DMSGF_DELETE | 24048e226bc8SMatthew Dillon DMSGF_REPLY)); 24058e226bc8SMatthew Dillon } else { 24068e226bc8SMatthew Dillon msg->tcmd = 0; 24078e226bc8SMatthew Dillon } 24088e226bc8SMatthew Dillon } else { 24098e226bc8SMatthew Dillon msg->tcmd = msg->any.head.cmd & DMSGF_CMDSWMASK; 24108e226bc8SMatthew Dillon } 24117adbba57SMatthew Dillon 24127adbba57SMatthew Dillon #ifdef DMSG_BLOCK_DEBUG 24137adbba57SMatthew Dillon switch (msg->tcmd) { 24147adbba57SMatthew Dillon case DMSG_BLK_READ | DMSGF_CREATE | DMSGF_DELETE: 24157adbba57SMatthew Dillon case DMSG_BLK_WRITE | DMSGF_CREATE | DMSGF_DELETE: 24165ab1caedSMatthew Dillon dmio_printf(iocom, 4, 24175ab1caedSMatthew Dillon "read BIO %-3d %016jx %d@%016jx\n", 24187adbba57SMatthew Dillon biocount, msg->any.head.msgid, 24197adbba57SMatthew Dillon msg->any.blk_read.bytes, 24207adbba57SMatthew Dillon msg->any.blk_read.offset); 24217adbba57SMatthew Dillon break; 24227adbba57SMatthew Dillon case DMSG_BLK_READ | DMSGF_CREATE | DMSGF_DELETE | DMSGF_REPLY: 24237adbba57SMatthew Dillon case DMSG_BLK_WRITE | DMSGF_CREATE | DMSGF_DELETE | DMSGF_REPLY: 24245ab1caedSMatthew Dillon dmio_printf(iocom, 4, 24255ab1caedSMatthew Dillon "rread BIO %-3d %016jx %d@%016jx\n", 24267adbba57SMatthew Dillon biocount, msg->any.head.msgid, 24277adbba57SMatthew Dillon msg->any.blk_read.bytes, 24287adbba57SMatthew Dillon msg->any.blk_read.offset); 24297adbba57SMatthew Dillon break; 24307adbba57SMatthew Dillon default: 24317adbba57SMatthew Dillon break; 24327adbba57SMatthew Dillon } 24337adbba57SMatthew Dillon #endif 24347adbba57SMatthew Dillon 24350a9eefcaSMatthew Dillon /* 24360a9eefcaSMatthew Dillon * Adjust state, mark receive side as DELETED if appropriate and 24370a9eefcaSMatthew Dillon * adjust RB tree if both sides are DELETED. cleanuprx handles 24380a9eefcaSMatthew Dillon * the rest after the state callback returns. 24390a9eefcaSMatthew Dillon */ 24400a9eefcaSMatthew Dillon assert(msg->state->iocom == iocom); 24410a9eefcaSMatthew Dillon assert(msg->state == state); 24420a9eefcaSMatthew Dillon 24430a9eefcaSMatthew Dillon if (state->flags & DMSG_STATE_ROOT) { 24440a9eefcaSMatthew Dillon /* 24450a9eefcaSMatthew Dillon * Nothing to do for non-transactional messages. 24460a9eefcaSMatthew Dillon */ 24470a9eefcaSMatthew Dillon } else if (msg->any.head.cmd & DMSGF_DELETE) { 24480a9eefcaSMatthew Dillon /* 24490a9eefcaSMatthew Dillon * Message terminating transaction, remove the state from 24500a9eefcaSMatthew Dillon * the RB tree if the full transaction is now complete. 24510a9eefcaSMatthew Dillon * The related state, subq, and parent link is retained 24520a9eefcaSMatthew Dillon * until after the state callback is complete. 24530a9eefcaSMatthew Dillon */ 24540a9eefcaSMatthew Dillon assert((state->rxcmd & DMSGF_DELETE) == 0); 24550a9eefcaSMatthew Dillon state->rxcmd |= DMSGF_DELETE; 24560a9eefcaSMatthew Dillon if (state->txcmd & DMSGF_DELETE) { 24570a9eefcaSMatthew Dillon assert(state->flags & DMSG_STATE_RBINSERTED); 24580a9eefcaSMatthew Dillon if (state->rxcmd & DMSGF_REPLY) { 24590a9eefcaSMatthew Dillon assert(msg->any.head.cmd & DMSGF_REPLY); 24600a9eefcaSMatthew Dillon RB_REMOVE(dmsg_state_tree, 24610a9eefcaSMatthew Dillon &iocom->statewr_tree, state); 24620a9eefcaSMatthew Dillon } else { 24630a9eefcaSMatthew Dillon assert((msg->any.head.cmd & DMSGF_REPLY) == 0); 24640a9eefcaSMatthew Dillon RB_REMOVE(dmsg_state_tree, 24650a9eefcaSMatthew Dillon &iocom->staterd_tree, state); 24660a9eefcaSMatthew Dillon } 24670a9eefcaSMatthew Dillon state->flags &= ~DMSG_STATE_RBINSERTED; 24680a9eefcaSMatthew Dillon dmsg_state_drop(state); 24690a9eefcaSMatthew Dillon } 24700a9eefcaSMatthew Dillon } 24710a9eefcaSMatthew Dillon 24720a9eefcaSMatthew Dillon pthread_mutex_unlock(&iocom->mtx); 24730a9eefcaSMatthew Dillon 24740a9eefcaSMatthew Dillon if (DMsgDebugOpt && error) 24755ab1caedSMatthew Dillon dmio_printf(iocom, 1, "msgrx: error %d\n", error); 24760a9eefcaSMatthew Dillon 24770c3a8cd0SMatthew Dillon return (error); 24780c3a8cd0SMatthew Dillon } 24790c3a8cd0SMatthew Dillon 24801b8eded1SMatthew Dillon /* 2481d30cab67SMatthew Dillon * Route the message and handle pair-state processing. 24821b8eded1SMatthew Dillon */ 2483d30cab67SMatthew Dillon void 2484d30cab67SMatthew Dillon dmsg_state_relay(dmsg_msg_t *lmsg) 24851b8eded1SMatthew Dillon { 2486d30cab67SMatthew Dillon dmsg_state_t *lpstate; 2487d30cab67SMatthew Dillon dmsg_state_t *rpstate; 2488d30cab67SMatthew Dillon dmsg_state_t *lstate; 2489d30cab67SMatthew Dillon dmsg_state_t *rstate; 2490d30cab67SMatthew Dillon dmsg_msg_t *rmsg; 24911b8eded1SMatthew Dillon 24927adbba57SMatthew Dillon #ifdef DMSG_BLOCK_DEBUG 24937adbba57SMatthew Dillon switch (lmsg->tcmd) { 24940a9eefcaSMatthew Dillon case DMSG_BLK_OPEN | DMSGF_CREATE: 24955ab1caedSMatthew Dillon dmio_printf(iocom, 4, "%s\n", 24965ab1caedSMatthew Dillon "relay BIO_OPEN (CREATE)"); 24970a9eefcaSMatthew Dillon break; 24980a9eefcaSMatthew Dillon case DMSG_BLK_OPEN | DMSGF_DELETE: 24995ab1caedSMatthew Dillon dmio_printf(iocom, 4, "%s\n", 25005ab1caedSMatthew Dillon "relay BIO_OPEN (DELETE)"); 25010a9eefcaSMatthew Dillon break; 25027adbba57SMatthew Dillon case DMSG_BLK_READ | DMSGF_CREATE | DMSGF_DELETE: 25037adbba57SMatthew Dillon case DMSG_BLK_WRITE | DMSGF_CREATE | DMSGF_DELETE: 25047adbba57SMatthew Dillon atomic_add_int(&biocount, 1); 25055ab1caedSMatthew Dillon dmio_printf(iocom, 4, 25065ab1caedSMatthew Dillon "relay BIO %-3d %016jx %d@%016jx\n", 25077adbba57SMatthew Dillon biocount, lmsg->any.head.msgid, 25087adbba57SMatthew Dillon lmsg->any.blk_read.bytes, 25097adbba57SMatthew Dillon lmsg->any.blk_read.offset); 25107adbba57SMatthew Dillon break; 25117adbba57SMatthew Dillon case DMSG_BLK_READ | DMSGF_CREATE | DMSGF_DELETE | DMSGF_REPLY: 25127adbba57SMatthew Dillon case DMSG_BLK_WRITE | DMSGF_CREATE | DMSGF_DELETE | DMSGF_REPLY: 25135ab1caedSMatthew Dillon dmio_printf(iocom, 4, 25145ab1caedSMatthew Dillon "retrn BIO %-3d %016jx %d@%016jx\n", 25157adbba57SMatthew Dillon biocount, lmsg->any.head.msgid, 25167adbba57SMatthew Dillon lmsg->any.blk_read.bytes, 25177adbba57SMatthew Dillon lmsg->any.blk_read.offset); 25187adbba57SMatthew Dillon atomic_add_int(&biocount, -1); 25197adbba57SMatthew Dillon break; 25207adbba57SMatthew Dillon default: 25217adbba57SMatthew Dillon break; 25227adbba57SMatthew Dillon } 25237adbba57SMatthew Dillon #endif 25247adbba57SMatthew Dillon 2525d30cab67SMatthew Dillon if ((lmsg->any.head.cmd & (DMSGF_CREATE | DMSGF_REPLY)) == 2526d30cab67SMatthew Dillon DMSGF_CREATE) { 25271b8eded1SMatthew Dillon /* 2528d30cab67SMatthew Dillon * New sub-transaction, establish new state and relay. 25291b8eded1SMatthew Dillon */ 2530d30cab67SMatthew Dillon lstate = lmsg->state; 2531d30cab67SMatthew Dillon lpstate = lstate->parent; 2532d30cab67SMatthew Dillon rpstate = lpstate->relay; 2533d30cab67SMatthew Dillon assert(lstate->relay == NULL); 2534d30cab67SMatthew Dillon assert(rpstate != NULL); 25351b8eded1SMatthew Dillon 2536e96cef49SMatthew Dillon rmsg = dmsg_msg_alloc(rpstate, 0, 2537d30cab67SMatthew Dillon lmsg->any.head.cmd, 2538d30cab67SMatthew Dillon dmsg_state_relay, NULL); 2539d30cab67SMatthew Dillon rstate = rmsg->state; 2540d30cab67SMatthew Dillon rstate->relay = lstate; 2541d30cab67SMatthew Dillon lstate->relay = rstate; 2542323c0947SMatthew Dillon dmsg_state_hold(lstate); 2543323c0947SMatthew Dillon dmsg_state_hold(rstate); 25441b8eded1SMatthew Dillon } else { 25451b8eded1SMatthew Dillon /* 2546d30cab67SMatthew Dillon * State & relay already established 25471b8eded1SMatthew Dillon */ 2548d30cab67SMatthew Dillon lstate = lmsg->state; 2549d30cab67SMatthew Dillon rstate = lstate->relay; 2550d30cab67SMatthew Dillon assert(rstate != NULL); 2551d30cab67SMatthew Dillon 25520a9eefcaSMatthew Dillon assert((rstate->txcmd & DMSGF_DELETE) == 0); 25530a9eefcaSMatthew Dillon 25540a9eefcaSMatthew Dillon #if 0 25550a9eefcaSMatthew Dillon if (lstate->flags & DMSG_STATE_ABORTING) { 25565ab1caedSMatthew Dillon dmio_printf(iocom, 4, 25570a9eefcaSMatthew Dillon "relay: relay lost link l=%p r=%p\n", 25580a9eefcaSMatthew Dillon lstate, rstate); 25590a9eefcaSMatthew Dillon dmsg_simulate_failure(rstate, 0, DMSG_ERR_LOSTLINK); 25600a9eefcaSMatthew Dillon } 25610a9eefcaSMatthew Dillon #endif 25620a9eefcaSMatthew Dillon 2563e96cef49SMatthew Dillon rmsg = dmsg_msg_alloc(rstate, 0, 2564d30cab67SMatthew Dillon lmsg->any.head.cmd, 2565d30cab67SMatthew Dillon dmsg_state_relay, NULL); 25661b8eded1SMatthew Dillon } 2567d30cab67SMatthew Dillon if (lmsg->hdr_size > sizeof(lmsg->any.head)) { 2568d30cab67SMatthew Dillon bcopy(&lmsg->any.head + 1, &rmsg->any.head + 1, 2569d30cab67SMatthew Dillon lmsg->hdr_size - sizeof(lmsg->any.head)); 2570d30cab67SMatthew Dillon } 2571d30cab67SMatthew Dillon rmsg->any.head.error = lmsg->any.head.error; 2572d30cab67SMatthew Dillon rmsg->any.head.reserved02 = lmsg->any.head.reserved02; 2573d30cab67SMatthew Dillon rmsg->any.head.reserved18 = lmsg->any.head.reserved18; 2574e96cef49SMatthew Dillon rmsg->aux_size = lmsg->aux_size; 2575d30cab67SMatthew Dillon rmsg->aux_data = lmsg->aux_data; 2576d30cab67SMatthew Dillon lmsg->aux_data = NULL; 25770a9eefcaSMatthew Dillon 2578d30cab67SMatthew Dillon dmsg_msg_write(rmsg); 25791b8eded1SMatthew Dillon } 25801b8eded1SMatthew Dillon 2581d30cab67SMatthew Dillon /* 25820a9eefcaSMatthew Dillon * Cleanup and retire msg after issuing the state callback. The state 25830a9eefcaSMatthew Dillon * has already been removed from the RB tree. The subq and msg must be 25840a9eefcaSMatthew Dillon * cleaned up. 25850a9eefcaSMatthew Dillon * 25860a9eefcaSMatthew Dillon * Called with the iocom mutex held (to handle subq disconnection). 2587d30cab67SMatthew Dillon */ 25880c3a8cd0SMatthew Dillon void 25890c3a8cd0SMatthew Dillon dmsg_state_cleanuprx(dmsg_iocom_t *iocom, dmsg_msg_t *msg) 25900c3a8cd0SMatthew Dillon { 25910c3a8cd0SMatthew Dillon dmsg_state_t *state; 25920c3a8cd0SMatthew Dillon 25931b8eded1SMatthew Dillon assert(msg->state->iocom == iocom); 25941b8eded1SMatthew Dillon state = msg->state; 2595d30cab67SMatthew Dillon if (state->flags & DMSG_STATE_ROOT) { 25960c3a8cd0SMatthew Dillon /* 25970c3a8cd0SMatthew Dillon * Free a non-transactional message, there is no state 25980c3a8cd0SMatthew Dillon * to worry about. 25990c3a8cd0SMatthew Dillon */ 26000c3a8cd0SMatthew Dillon dmsg_msg_free(msg); 26010a9eefcaSMatthew Dillon } else if ((state->flags & DMSG_STATE_SUBINSERTED) && 26020a9eefcaSMatthew Dillon (state->rxcmd & DMSGF_DELETE) && 26030a9eefcaSMatthew Dillon (state->txcmd & DMSGF_DELETE)) { 26040c3a8cd0SMatthew Dillon /* 26050a9eefcaSMatthew Dillon * Must disconnect from parent and drop relay. 26060c3a8cd0SMatthew Dillon */ 26070a9eefcaSMatthew Dillon dmsg_subq_delete(state); 2608d30cab67SMatthew Dillon if (state->relay) { 2609323c0947SMatthew Dillon dmsg_state_drop(state->relay); 2610d30cab67SMatthew Dillon state->relay = NULL; 2611d30cab67SMatthew Dillon } 26121b8eded1SMatthew Dillon dmsg_msg_free(msg); 26131b8eded1SMatthew Dillon } else { 26140c3a8cd0SMatthew Dillon /* 26150c3a8cd0SMatthew Dillon * Message not terminating transaction, leave state intact 26160c3a8cd0SMatthew Dillon * and free message if it isn't the CREATE message. 26170c3a8cd0SMatthew Dillon */ 26180c3a8cd0SMatthew Dillon dmsg_msg_free(msg); 26190c3a8cd0SMatthew Dillon } 26200c3a8cd0SMatthew Dillon } 26210c3a8cd0SMatthew Dillon 2622323c0947SMatthew Dillon /* 2623323c0947SMatthew Dillon * Clean up the state after pulling out needed fields and queueing the 2624323c0947SMatthew Dillon * message for transmission. This occurs in dmsg_msg_write(). 26250a9eefcaSMatthew Dillon * 26260a9eefcaSMatthew Dillon * Called with the mutex locked. 2627323c0947SMatthew Dillon */ 26280c3a8cd0SMatthew Dillon static void 26291b8eded1SMatthew Dillon dmsg_state_cleanuptx(dmsg_iocom_t *iocom, dmsg_msg_t *msg) 26300c3a8cd0SMatthew Dillon { 26310c3a8cd0SMatthew Dillon dmsg_state_t *state; 26320c3a8cd0SMatthew Dillon 26331b8eded1SMatthew Dillon assert(iocom == msg->state->iocom); 26341b8eded1SMatthew Dillon state = msg->state; 26350a9eefcaSMatthew Dillon 26360a9eefcaSMatthew Dillon dmsg_state_hold(state); 26370a9eefcaSMatthew Dillon 2638d30cab67SMatthew Dillon if (state->flags & DMSG_STATE_ROOT) { 2639323c0947SMatthew Dillon ; 26400c3a8cd0SMatthew Dillon } else if (msg->any.head.cmd & DMSGF_DELETE) { 2641323c0947SMatthew Dillon /* 2642323c0947SMatthew Dillon * Message terminating transaction, destroy the related 2643323c0947SMatthew Dillon * state, the original message, and this message (if it 2644323c0947SMatthew Dillon * isn't the original message due to a CREATE|DELETE). 2645323c0947SMatthew Dillon * 2646323c0947SMatthew Dillon * It's possible for governing state to terminate while 2647323c0947SMatthew Dillon * sub-transactions still exist. This is allowed but 2648323c0947SMatthew Dillon * will cause sub-transactions to recursively fail. 2649323c0947SMatthew Dillon * Further reception of sub-transaction messages will be 2650323c0947SMatthew Dillon * impossible because the circuit will no longer exist. 2651323c0947SMatthew Dillon * (XXX need code to make sure that happens properly). 26520a9eefcaSMatthew Dillon * 26530a9eefcaSMatthew Dillon * NOTE: It is possible for a fafilure to terminate the 26540a9eefcaSMatthew Dillon * state after we have written the message but before 26550a9eefcaSMatthew Dillon * we are able to call cleanuptx, so txcmd might already 26560a9eefcaSMatthew Dillon * have DMSGF_DELETE set. 2657323c0947SMatthew Dillon */ 26580a9eefcaSMatthew Dillon if ((state->txcmd & DMSGF_DELETE) == 0 && 26590a9eefcaSMatthew Dillon (state->rxcmd & DMSGF_DELETE)) { 26600c3a8cd0SMatthew Dillon state->txcmd |= DMSGF_DELETE; 2661a06d536bSMatthew Dillon assert(state->flags & DMSG_STATE_RBINSERTED); 26620c3a8cd0SMatthew Dillon if (state->txcmd & DMSGF_REPLY) { 26630c3a8cd0SMatthew Dillon assert(msg->any.head.cmd & DMSGF_REPLY); 26640c3a8cd0SMatthew Dillon RB_REMOVE(dmsg_state_tree, 26651b8eded1SMatthew Dillon &iocom->staterd_tree, state); 26660c3a8cd0SMatthew Dillon } else { 26670c3a8cd0SMatthew Dillon assert((msg->any.head.cmd & DMSGF_REPLY) == 0); 26680c3a8cd0SMatthew Dillon RB_REMOVE(dmsg_state_tree, 26691b8eded1SMatthew Dillon &iocom->statewr_tree, state); 26701b8eded1SMatthew Dillon } 2671a06d536bSMatthew Dillon state->flags &= ~DMSG_STATE_RBINSERTED; 26720a9eefcaSMatthew Dillon dmsg_subq_delete(state); 2673d30cab67SMatthew Dillon 2674d30cab67SMatthew Dillon if (state->relay) { 2675323c0947SMatthew Dillon dmsg_state_drop(state->relay); 2676d30cab67SMatthew Dillon state->relay = NULL; 2677d30cab67SMatthew Dillon } 26780a9eefcaSMatthew Dillon dmsg_state_drop(state); /* state->rbtree */ 26790a9eefcaSMatthew Dillon } else if ((state->txcmd & DMSGF_DELETE) == 0) { 26800a9eefcaSMatthew Dillon state->txcmd |= DMSGF_DELETE; 26810c3a8cd0SMatthew Dillon } 26820c3a8cd0SMatthew Dillon } 26830a9eefcaSMatthew Dillon 26840a9eefcaSMatthew Dillon /* 26850a9eefcaSMatthew Dillon * Deferred abort after transmission. 26860a9eefcaSMatthew Dillon */ 26870a9eefcaSMatthew Dillon if ((state->flags & (DMSG_STATE_ABORTING | DMSG_STATE_DYING)) && 26880a9eefcaSMatthew Dillon (state->rxcmd & DMSGF_DELETE) == 0) { 26895ab1caedSMatthew Dillon dmio_printf(iocom, 4, 26905ab1caedSMatthew Dillon "cleanuptx: state=%p " 26910a9eefcaSMatthew Dillon "executing deferred abort\n", 26920a9eefcaSMatthew Dillon state); 26930a9eefcaSMatthew Dillon state->flags &= ~DMSG_STATE_ABORTING; 26940a9eefcaSMatthew Dillon dmsg_simulate_failure(state, 1, DMSG_ERR_LOSTLINK); 26950a9eefcaSMatthew Dillon } 26960a9eefcaSMatthew Dillon 26970a9eefcaSMatthew Dillon dmsg_state_drop(state); 26980c3a8cd0SMatthew Dillon } 26990c3a8cd0SMatthew Dillon 27000c3a8cd0SMatthew Dillon /* 2701323c0947SMatthew Dillon * Called with or without locks 2702323c0947SMatthew Dillon */ 2703323c0947SMatthew Dillon void 2704323c0947SMatthew Dillon dmsg_state_hold(dmsg_state_t *state) 2705323c0947SMatthew Dillon { 2706323c0947SMatthew Dillon atomic_add_int(&state->refs, 1); 2707323c0947SMatthew Dillon } 2708323c0947SMatthew Dillon 2709323c0947SMatthew Dillon void 2710323c0947SMatthew Dillon dmsg_state_drop(dmsg_state_t *state) 2711323c0947SMatthew Dillon { 27120a9eefcaSMatthew Dillon assert(state->refs > 0); 2713323c0947SMatthew Dillon if (atomic_fetchadd_int(&state->refs, -1) == 1) 2714323c0947SMatthew Dillon dmsg_state_free(state); 2715323c0947SMatthew Dillon } 2716323c0947SMatthew Dillon 2717323c0947SMatthew Dillon /* 27180c3a8cd0SMatthew Dillon * Called with iocom locked 27190c3a8cd0SMatthew Dillon */ 2720323c0947SMatthew Dillon static void 27210c3a8cd0SMatthew Dillon dmsg_state_free(dmsg_state_t *state) 27220c3a8cd0SMatthew Dillon { 2723323c0947SMatthew Dillon atomic_add_int(&dmsg_state_count, -1); 27245ab1caedSMatthew Dillon dmio_printf(state->iocom, 5, "terminate state %p\n", state); 2725a06d536bSMatthew Dillon assert((state->flags & (DMSG_STATE_ROOT | 2726a06d536bSMatthew Dillon DMSG_STATE_SUBINSERTED | 2727a06d536bSMatthew Dillon DMSG_STATE_RBINSERTED)) == 0); 2728323c0947SMatthew Dillon assert(TAILQ_EMPTY(&state->subq)); 2729323c0947SMatthew Dillon assert(state->refs == 0); 2730f306de83SMatthew Dillon if (state->any.any != NULL) /* XXX avoid deadlock w/exit & kernel */ 2731f306de83SMatthew Dillon closefrom(3); 27320c3a8cd0SMatthew Dillon assert(state->any.any == NULL); 27330c3a8cd0SMatthew Dillon free(state); 27340d20ec8aSMatthew Dillon } 27350c3a8cd0SMatthew Dillon 27360c3a8cd0SMatthew Dillon /* 27370c3a8cd0SMatthew Dillon * This swaps endian for a hammer2_msg_hdr. Note that the extended 27380c3a8cd0SMatthew Dillon * header is not adjusted, just the core header. 27390c3a8cd0SMatthew Dillon */ 27400c3a8cd0SMatthew Dillon void 27410c3a8cd0SMatthew Dillon dmsg_bswap_head(dmsg_hdr_t *head) 27420c3a8cd0SMatthew Dillon { 27430c3a8cd0SMatthew Dillon head->magic = bswap16(head->magic); 27440c3a8cd0SMatthew Dillon head->reserved02 = bswap16(head->reserved02); 27450c3a8cd0SMatthew Dillon head->salt = bswap32(head->salt); 27460c3a8cd0SMatthew Dillon 27470c3a8cd0SMatthew Dillon head->msgid = bswap64(head->msgid); 27480d20ec8aSMatthew Dillon head->circuit = bswap64(head->circuit); 27490d20ec8aSMatthew Dillon head->reserved18= bswap64(head->reserved18); 27500c3a8cd0SMatthew Dillon 27510c3a8cd0SMatthew Dillon head->cmd = bswap32(head->cmd); 27520c3a8cd0SMatthew Dillon head->aux_crc = bswap32(head->aux_crc); 27530c3a8cd0SMatthew Dillon head->aux_bytes = bswap32(head->aux_bytes); 27540c3a8cd0SMatthew Dillon head->error = bswap32(head->error); 27550c3a8cd0SMatthew Dillon head->aux_descr = bswap64(head->aux_descr); 27560c3a8cd0SMatthew Dillon head->reserved38= bswap32(head->reserved38); 27570c3a8cd0SMatthew Dillon head->hdr_crc = bswap32(head->hdr_crc); 27580c3a8cd0SMatthew Dillon } 2759