1*bf21cd93STycho Nightingale /*- 2*bf21cd93STycho Nightingale * Copyright (c) 2013 Peter Grehan <grehan@freebsd.org> 3*bf21cd93STycho Nightingale * All rights reserved. 4*bf21cd93STycho Nightingale * 5*bf21cd93STycho Nightingale * Redistribution and use in source and binary forms, with or without 6*bf21cd93STycho Nightingale * modification, are permitted provided that the following conditions 7*bf21cd93STycho Nightingale * are met: 8*bf21cd93STycho Nightingale * 1. Redistributions of source code must retain the above copyright 9*bf21cd93STycho Nightingale * notice, this list of conditions and the following disclaimer. 10*bf21cd93STycho Nightingale * 2. Redistributions in binary form must reproduce the above copyright 11*bf21cd93STycho Nightingale * notice, this list of conditions and the following disclaimer in the 12*bf21cd93STycho Nightingale * documentation and/or other materials provided with the distribution. 13*bf21cd93STycho Nightingale * 14*bf21cd93STycho Nightingale * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 15*bf21cd93STycho Nightingale * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16*bf21cd93STycho Nightingale * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17*bf21cd93STycho Nightingale * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18*bf21cd93STycho Nightingale * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19*bf21cd93STycho Nightingale * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20*bf21cd93STycho Nightingale * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21*bf21cd93STycho Nightingale * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22*bf21cd93STycho Nightingale * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23*bf21cd93STycho Nightingale * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24*bf21cd93STycho Nightingale * SUCH DAMAGE. 25*bf21cd93STycho Nightingale * 26*bf21cd93STycho Nightingale * $FreeBSD: head/usr.sbin/bhyve/block_if.c 274330 2014-11-09 21:08:52Z tychon $ 27*bf21cd93STycho Nightingale */ 28*bf21cd93STycho Nightingale 29*bf21cd93STycho Nightingale #include <sys/cdefs.h> 30*bf21cd93STycho Nightingale __FBSDID("$FreeBSD: head/usr.sbin/bhyve/block_if.c 274330 2014-11-09 21:08:52Z tychon $"); 31*bf21cd93STycho Nightingale 32*bf21cd93STycho Nightingale #include <sys/param.h> 33*bf21cd93STycho Nightingale #include <sys/queue.h> 34*bf21cd93STycho Nightingale #include <sys/errno.h> 35*bf21cd93STycho Nightingale #include <sys/stat.h> 36*bf21cd93STycho Nightingale #include <sys/ioctl.h> 37*bf21cd93STycho Nightingale #include <sys/disk.h> 38*bf21cd93STycho Nightingale 39*bf21cd93STycho Nightingale #include <assert.h> 40*bf21cd93STycho Nightingale #include <fcntl.h> 41*bf21cd93STycho Nightingale #include <stdio.h> 42*bf21cd93STycho Nightingale #include <stdlib.h> 43*bf21cd93STycho Nightingale #include <string.h> 44*bf21cd93STycho Nightingale #include <pthread.h> 45*bf21cd93STycho Nightingale #include <pthread_np.h> 46*bf21cd93STycho Nightingale #include <signal.h> 47*bf21cd93STycho Nightingale #include <unistd.h> 48*bf21cd93STycho Nightingale 49*bf21cd93STycho Nightingale #include <machine/atomic.h> 50*bf21cd93STycho Nightingale 51*bf21cd93STycho Nightingale #include "bhyverun.h" 52*bf21cd93STycho Nightingale #ifdef __FreeBSD__ 53*bf21cd93STycho Nightingale #include "mevent.h" 54*bf21cd93STycho Nightingale #endif 55*bf21cd93STycho Nightingale #include "block_if.h" 56*bf21cd93STycho Nightingale 57*bf21cd93STycho Nightingale #define BLOCKIF_SIG 0xb109b109 58*bf21cd93STycho Nightingale 59*bf21cd93STycho Nightingale #define BLOCKIF_MAXREQ 33 60*bf21cd93STycho Nightingale 61*bf21cd93STycho Nightingale enum blockop { 62*bf21cd93STycho Nightingale BOP_READ, 63*bf21cd93STycho Nightingale BOP_WRITE, 64*bf21cd93STycho Nightingale BOP_FLUSH 65*bf21cd93STycho Nightingale }; 66*bf21cd93STycho Nightingale 67*bf21cd93STycho Nightingale enum blockstat { 68*bf21cd93STycho Nightingale BST_FREE, 69*bf21cd93STycho Nightingale BST_PEND, 70*bf21cd93STycho Nightingale BST_BUSY, 71*bf21cd93STycho Nightingale BST_DONE 72*bf21cd93STycho Nightingale }; 73*bf21cd93STycho Nightingale 74*bf21cd93STycho Nightingale struct blockif_elem { 75*bf21cd93STycho Nightingale TAILQ_ENTRY(blockif_elem) be_link; 76*bf21cd93STycho Nightingale struct blockif_req *be_req; 77*bf21cd93STycho Nightingale enum blockop be_op; 78*bf21cd93STycho Nightingale enum blockstat be_status; 79*bf21cd93STycho Nightingale pthread_t be_tid; 80*bf21cd93STycho Nightingale }; 81*bf21cd93STycho Nightingale 82*bf21cd93STycho Nightingale struct blockif_ctxt { 83*bf21cd93STycho Nightingale int bc_magic; 84*bf21cd93STycho Nightingale int bc_fd; 85*bf21cd93STycho Nightingale int bc_rdonly; 86*bf21cd93STycho Nightingale off_t bc_size; 87*bf21cd93STycho Nightingale int bc_sectsz; 88*bf21cd93STycho Nightingale pthread_t bc_btid; 89*bf21cd93STycho Nightingale pthread_mutex_t bc_mtx; 90*bf21cd93STycho Nightingale pthread_cond_t bc_cond; 91*bf21cd93STycho Nightingale int bc_closing; 92*bf21cd93STycho Nightingale 93*bf21cd93STycho Nightingale /* Request elements and free/pending/busy queues */ 94*bf21cd93STycho Nightingale TAILQ_HEAD(, blockif_elem) bc_freeq; 95*bf21cd93STycho Nightingale TAILQ_HEAD(, blockif_elem) bc_pendq; 96*bf21cd93STycho Nightingale TAILQ_HEAD(, blockif_elem) bc_busyq; 97*bf21cd93STycho Nightingale u_int bc_req_count; 98*bf21cd93STycho Nightingale struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; 99*bf21cd93STycho Nightingale }; 100*bf21cd93STycho Nightingale 101*bf21cd93STycho Nightingale static pthread_once_t blockif_once = PTHREAD_ONCE_INIT; 102*bf21cd93STycho Nightingale 103*bf21cd93STycho Nightingale struct blockif_sig_elem { 104*bf21cd93STycho Nightingale pthread_mutex_t bse_mtx; 105*bf21cd93STycho Nightingale pthread_cond_t bse_cond; 106*bf21cd93STycho Nightingale int bse_pending; 107*bf21cd93STycho Nightingale struct blockif_sig_elem *bse_next; 108*bf21cd93STycho Nightingale }; 109*bf21cd93STycho Nightingale 110*bf21cd93STycho Nightingale static struct blockif_sig_elem *blockif_bse_head; 111*bf21cd93STycho Nightingale 112*bf21cd93STycho Nightingale static int 113*bf21cd93STycho Nightingale blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, 114*bf21cd93STycho Nightingale enum blockop op) 115*bf21cd93STycho Nightingale { 116*bf21cd93STycho Nightingale struct blockif_elem *be; 117*bf21cd93STycho Nightingale 118*bf21cd93STycho Nightingale assert(bc->bc_req_count < BLOCKIF_MAXREQ); 119*bf21cd93STycho Nightingale 120*bf21cd93STycho Nightingale be = TAILQ_FIRST(&bc->bc_freeq); 121*bf21cd93STycho Nightingale assert(be != NULL); 122*bf21cd93STycho Nightingale assert(be->be_status == BST_FREE); 123*bf21cd93STycho Nightingale 124*bf21cd93STycho Nightingale TAILQ_REMOVE(&bc->bc_freeq, be, be_link); 125*bf21cd93STycho Nightingale be->be_status = BST_PEND; 126*bf21cd93STycho Nightingale be->be_req = breq; 127*bf21cd93STycho Nightingale be->be_op = op; 128*bf21cd93STycho Nightingale TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link); 129*bf21cd93STycho Nightingale 130*bf21cd93STycho Nightingale bc->bc_req_count++; 131*bf21cd93STycho Nightingale 132*bf21cd93STycho Nightingale return (0); 133*bf21cd93STycho Nightingale } 134*bf21cd93STycho Nightingale 135*bf21cd93STycho Nightingale static int 136*bf21cd93STycho Nightingale blockif_dequeue(struct blockif_ctxt *bc, struct blockif_elem **bep) 137*bf21cd93STycho Nightingale { 138*bf21cd93STycho Nightingale struct blockif_elem *be; 139*bf21cd93STycho Nightingale 140*bf21cd93STycho Nightingale if (bc->bc_req_count == 0) 141*bf21cd93STycho Nightingale return (ENOENT); 142*bf21cd93STycho Nightingale 143*bf21cd93STycho Nightingale be = TAILQ_FIRST(&bc->bc_pendq); 144*bf21cd93STycho Nightingale assert(be != NULL); 145*bf21cd93STycho Nightingale assert(be->be_status == BST_PEND); 146*bf21cd93STycho Nightingale TAILQ_REMOVE(&bc->bc_pendq, be, be_link); 147*bf21cd93STycho Nightingale be->be_status = BST_BUSY; 148*bf21cd93STycho Nightingale be->be_tid = bc->bc_btid; 149*bf21cd93STycho Nightingale TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link); 150*bf21cd93STycho Nightingale 151*bf21cd93STycho Nightingale *bep = be; 152*bf21cd93STycho Nightingale 153*bf21cd93STycho Nightingale return (0); 154*bf21cd93STycho Nightingale } 155*bf21cd93STycho Nightingale 156*bf21cd93STycho Nightingale static void 157*bf21cd93STycho Nightingale blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be) 158*bf21cd93STycho Nightingale { 159*bf21cd93STycho Nightingale assert(be->be_status == BST_DONE); 160*bf21cd93STycho Nightingale 161*bf21cd93STycho Nightingale TAILQ_REMOVE(&bc->bc_busyq, be, be_link); 162*bf21cd93STycho Nightingale be->be_tid = 0; 163*bf21cd93STycho Nightingale be->be_status = BST_FREE; 164*bf21cd93STycho Nightingale be->be_req = NULL; 165*bf21cd93STycho Nightingale TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); 166*bf21cd93STycho Nightingale 167*bf21cd93STycho Nightingale bc->bc_req_count--; 168*bf21cd93STycho Nightingale } 169*bf21cd93STycho Nightingale 170*bf21cd93STycho Nightingale static void 171*bf21cd93STycho Nightingale blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be) 172*bf21cd93STycho Nightingale { 173*bf21cd93STycho Nightingale struct blockif_req *br; 174*bf21cd93STycho Nightingale int err; 175*bf21cd93STycho Nightingale 176*bf21cd93STycho Nightingale br = be->be_req; 177*bf21cd93STycho Nightingale err = 0; 178*bf21cd93STycho Nightingale 179*bf21cd93STycho Nightingale switch (be->be_op) { 180*bf21cd93STycho Nightingale case BOP_READ: 181*bf21cd93STycho Nightingale if (preadv(bc->bc_fd, br->br_iov, br->br_iovcnt, 182*bf21cd93STycho Nightingale br->br_offset) < 0) 183*bf21cd93STycho Nightingale err = errno; 184*bf21cd93STycho Nightingale break; 185*bf21cd93STycho Nightingale case BOP_WRITE: 186*bf21cd93STycho Nightingale if (bc->bc_rdonly) 187*bf21cd93STycho Nightingale err = EROFS; 188*bf21cd93STycho Nightingale else if (pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt, 189*bf21cd93STycho Nightingale br->br_offset) < 0) 190*bf21cd93STycho Nightingale err = errno; 191*bf21cd93STycho Nightingale break; 192*bf21cd93STycho Nightingale case BOP_FLUSH: 193*bf21cd93STycho Nightingale break; 194*bf21cd93STycho Nightingale default: 195*bf21cd93STycho Nightingale err = EINVAL; 196*bf21cd93STycho Nightingale break; 197*bf21cd93STycho Nightingale } 198*bf21cd93STycho Nightingale 199*bf21cd93STycho Nightingale be->be_status = BST_DONE; 200*bf21cd93STycho Nightingale 201*bf21cd93STycho Nightingale (*br->br_callback)(br, err); 202*bf21cd93STycho Nightingale } 203*bf21cd93STycho Nightingale 204*bf21cd93STycho Nightingale static void * 205*bf21cd93STycho Nightingale blockif_thr(void *arg) 206*bf21cd93STycho Nightingale { 207*bf21cd93STycho Nightingale struct blockif_ctxt *bc; 208*bf21cd93STycho Nightingale struct blockif_elem *be; 209*bf21cd93STycho Nightingale 210*bf21cd93STycho Nightingale bc = arg; 211*bf21cd93STycho Nightingale 212*bf21cd93STycho Nightingale for (;;) { 213*bf21cd93STycho Nightingale pthread_mutex_lock(&bc->bc_mtx); 214*bf21cd93STycho Nightingale while (!blockif_dequeue(bc, &be)) { 215*bf21cd93STycho Nightingale pthread_mutex_unlock(&bc->bc_mtx); 216*bf21cd93STycho Nightingale blockif_proc(bc, be); 217*bf21cd93STycho Nightingale pthread_mutex_lock(&bc->bc_mtx); 218*bf21cd93STycho Nightingale blockif_complete(bc, be); 219*bf21cd93STycho Nightingale } 220*bf21cd93STycho Nightingale pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); 221*bf21cd93STycho Nightingale pthread_mutex_unlock(&bc->bc_mtx); 222*bf21cd93STycho Nightingale 223*bf21cd93STycho Nightingale /* 224*bf21cd93STycho Nightingale * Check ctxt status here to see if exit requested 225*bf21cd93STycho Nightingale */ 226*bf21cd93STycho Nightingale if (bc->bc_closing) 227*bf21cd93STycho Nightingale pthread_exit(NULL); 228*bf21cd93STycho Nightingale } 229*bf21cd93STycho Nightingale 230*bf21cd93STycho Nightingale /* Not reached */ 231*bf21cd93STycho Nightingale return (NULL); 232*bf21cd93STycho Nightingale } 233*bf21cd93STycho Nightingale 234*bf21cd93STycho Nightingale #ifdef __FreeBSD__ 235*bf21cd93STycho Nightingale static void 236*bf21cd93STycho Nightingale blockif_sigcont_handler(int signal, enum ev_type type, void *arg) 237*bf21cd93STycho Nightingale #else 238*bf21cd93STycho Nightingale static void 239*bf21cd93STycho Nightingale blockif_sigcont_handler(int signal) 240*bf21cd93STycho Nightingale #endif 241*bf21cd93STycho Nightingale { 242*bf21cd93STycho Nightingale struct blockif_sig_elem *bse; 243*bf21cd93STycho Nightingale 244*bf21cd93STycho Nightingale for (;;) { 245*bf21cd93STycho Nightingale /* 246*bf21cd93STycho Nightingale * Process the entire list even if not intended for 247*bf21cd93STycho Nightingale * this thread. 248*bf21cd93STycho Nightingale */ 249*bf21cd93STycho Nightingale do { 250*bf21cd93STycho Nightingale bse = blockif_bse_head; 251*bf21cd93STycho Nightingale if (bse == NULL) 252*bf21cd93STycho Nightingale return; 253*bf21cd93STycho Nightingale } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, 254*bf21cd93STycho Nightingale (uintptr_t)bse, 255*bf21cd93STycho Nightingale (uintptr_t)bse->bse_next)); 256*bf21cd93STycho Nightingale 257*bf21cd93STycho Nightingale pthread_mutex_lock(&bse->bse_mtx); 258*bf21cd93STycho Nightingale bse->bse_pending = 0; 259*bf21cd93STycho Nightingale pthread_cond_signal(&bse->bse_cond); 260*bf21cd93STycho Nightingale pthread_mutex_unlock(&bse->bse_mtx); 261*bf21cd93STycho Nightingale } 262*bf21cd93STycho Nightingale } 263*bf21cd93STycho Nightingale 264*bf21cd93STycho Nightingale static void 265*bf21cd93STycho Nightingale blockif_init(void) 266*bf21cd93STycho Nightingale { 267*bf21cd93STycho Nightingale #ifdef __FreeBSD__ 268*bf21cd93STycho Nightingale mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL); 269*bf21cd93STycho Nightingale (void) signal(SIGCONT, SIG_IGN); 270*bf21cd93STycho Nightingale #else 271*bf21cd93STycho Nightingale (void) sigset(SIGCONT, blockif_sigcont_handler); 272*bf21cd93STycho Nightingale #endif 273*bf21cd93STycho Nightingale } 274*bf21cd93STycho Nightingale 275*bf21cd93STycho Nightingale struct blockif_ctxt * 276*bf21cd93STycho Nightingale blockif_open(const char *optstr, const char *ident) 277*bf21cd93STycho Nightingale { 278*bf21cd93STycho Nightingale char tname[MAXCOMLEN + 1]; 279*bf21cd93STycho Nightingale char *nopt, *xopts; 280*bf21cd93STycho Nightingale struct blockif_ctxt *bc; 281*bf21cd93STycho Nightingale struct stat sbuf; 282*bf21cd93STycho Nightingale off_t size; 283*bf21cd93STycho Nightingale int extra, fd, i, sectsz; 284*bf21cd93STycho Nightingale int nocache, sync, ro; 285*bf21cd93STycho Nightingale 286*bf21cd93STycho Nightingale pthread_once(&blockif_once, blockif_init); 287*bf21cd93STycho Nightingale 288*bf21cd93STycho Nightingale nocache = 0; 289*bf21cd93STycho Nightingale sync = 0; 290*bf21cd93STycho Nightingale ro = 0; 291*bf21cd93STycho Nightingale 292*bf21cd93STycho Nightingale /* 293*bf21cd93STycho Nightingale * The first element in the optstring is always a pathname. 294*bf21cd93STycho Nightingale * Optional elements follow 295*bf21cd93STycho Nightingale */ 296*bf21cd93STycho Nightingale nopt = strdup(optstr); 297*bf21cd93STycho Nightingale for (xopts = strtok(nopt, ","); 298*bf21cd93STycho Nightingale xopts != NULL; 299*bf21cd93STycho Nightingale xopts = strtok(NULL, ",")) { 300*bf21cd93STycho Nightingale if (!strcmp(xopts, "nocache")) 301*bf21cd93STycho Nightingale nocache = 1; 302*bf21cd93STycho Nightingale else if (!strcmp(xopts, "sync")) 303*bf21cd93STycho Nightingale sync = 1; 304*bf21cd93STycho Nightingale else if (!strcmp(xopts, "ro")) 305*bf21cd93STycho Nightingale ro = 1; 306*bf21cd93STycho Nightingale } 307*bf21cd93STycho Nightingale 308*bf21cd93STycho Nightingale extra = 0; 309*bf21cd93STycho Nightingale if (nocache) 310*bf21cd93STycho Nightingale extra |= O_DIRECT; 311*bf21cd93STycho Nightingale if (sync) 312*bf21cd93STycho Nightingale extra |= O_SYNC; 313*bf21cd93STycho Nightingale 314*bf21cd93STycho Nightingale fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra); 315*bf21cd93STycho Nightingale if (fd < 0 && !ro) { 316*bf21cd93STycho Nightingale /* Attempt a r/w fail with a r/o open */ 317*bf21cd93STycho Nightingale fd = open(nopt, O_RDONLY | extra); 318*bf21cd93STycho Nightingale ro = 1; 319*bf21cd93STycho Nightingale } 320*bf21cd93STycho Nightingale 321*bf21cd93STycho Nightingale if (fd < 0) { 322*bf21cd93STycho Nightingale perror("Could not open backing file"); 323*bf21cd93STycho Nightingale return (NULL); 324*bf21cd93STycho Nightingale } 325*bf21cd93STycho Nightingale 326*bf21cd93STycho Nightingale if (fstat(fd, &sbuf) < 0) { 327*bf21cd93STycho Nightingale perror("Could not stat backing file"); 328*bf21cd93STycho Nightingale close(fd); 329*bf21cd93STycho Nightingale return (NULL); 330*bf21cd93STycho Nightingale } 331*bf21cd93STycho Nightingale 332*bf21cd93STycho Nightingale /* 333*bf21cd93STycho Nightingale * Deal with raw devices 334*bf21cd93STycho Nightingale */ 335*bf21cd93STycho Nightingale size = sbuf.st_size; 336*bf21cd93STycho Nightingale sectsz = DEV_BSIZE; 337*bf21cd93STycho Nightingale #ifdef __FreeBSD__ 338*bf21cd93STycho Nightingale if (S_ISCHR(sbuf.st_mode)) { 339*bf21cd93STycho Nightingale if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || 340*bf21cd93STycho Nightingale ioctl(fd, DIOCGSECTORSIZE, §sz)) { 341*bf21cd93STycho Nightingale perror("Could not fetch dev blk/sector size"); 342*bf21cd93STycho Nightingale close(fd); 343*bf21cd93STycho Nightingale return (NULL); 344*bf21cd93STycho Nightingale } 345*bf21cd93STycho Nightingale assert(size != 0); 346*bf21cd93STycho Nightingale assert(sectsz != 0); 347*bf21cd93STycho Nightingale } 348*bf21cd93STycho Nightingale #endif 349*bf21cd93STycho Nightingale 350*bf21cd93STycho Nightingale bc = calloc(1, sizeof(struct blockif_ctxt)); 351*bf21cd93STycho Nightingale if (bc == NULL) { 352*bf21cd93STycho Nightingale close(fd); 353*bf21cd93STycho Nightingale return (NULL); 354*bf21cd93STycho Nightingale } 355*bf21cd93STycho Nightingale 356*bf21cd93STycho Nightingale bc->bc_magic = BLOCKIF_SIG; 357*bf21cd93STycho Nightingale bc->bc_fd = fd; 358*bf21cd93STycho Nightingale bc->bc_rdonly = ro; 359*bf21cd93STycho Nightingale bc->bc_size = size; 360*bf21cd93STycho Nightingale bc->bc_sectsz = sectsz; 361*bf21cd93STycho Nightingale pthread_mutex_init(&bc->bc_mtx, NULL); 362*bf21cd93STycho Nightingale pthread_cond_init(&bc->bc_cond, NULL); 363*bf21cd93STycho Nightingale TAILQ_INIT(&bc->bc_freeq); 364*bf21cd93STycho Nightingale TAILQ_INIT(&bc->bc_pendq); 365*bf21cd93STycho Nightingale TAILQ_INIT(&bc->bc_busyq); 366*bf21cd93STycho Nightingale bc->bc_req_count = 0; 367*bf21cd93STycho Nightingale for (i = 0; i < BLOCKIF_MAXREQ; i++) { 368*bf21cd93STycho Nightingale bc->bc_reqs[i].be_status = BST_FREE; 369*bf21cd93STycho Nightingale TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); 370*bf21cd93STycho Nightingale } 371*bf21cd93STycho Nightingale 372*bf21cd93STycho Nightingale pthread_create(&bc->bc_btid, NULL, blockif_thr, bc); 373*bf21cd93STycho Nightingale 374*bf21cd93STycho Nightingale snprintf(tname, sizeof(tname), "blk-%s", ident); 375*bf21cd93STycho Nightingale pthread_set_name_np(bc->bc_btid, tname); 376*bf21cd93STycho Nightingale 377*bf21cd93STycho Nightingale return (bc); 378*bf21cd93STycho Nightingale } 379*bf21cd93STycho Nightingale 380*bf21cd93STycho Nightingale static int 381*bf21cd93STycho Nightingale blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, 382*bf21cd93STycho Nightingale enum blockop op) 383*bf21cd93STycho Nightingale { 384*bf21cd93STycho Nightingale int err; 385*bf21cd93STycho Nightingale 386*bf21cd93STycho Nightingale err = 0; 387*bf21cd93STycho Nightingale 388*bf21cd93STycho Nightingale pthread_mutex_lock(&bc->bc_mtx); 389*bf21cd93STycho Nightingale if (bc->bc_req_count < BLOCKIF_MAXREQ) { 390*bf21cd93STycho Nightingale /* 391*bf21cd93STycho Nightingale * Enqueue and inform the block i/o thread 392*bf21cd93STycho Nightingale * that there is work available 393*bf21cd93STycho Nightingale */ 394*bf21cd93STycho Nightingale blockif_enqueue(bc, breq, op); 395*bf21cd93STycho Nightingale pthread_cond_signal(&bc->bc_cond); 396*bf21cd93STycho Nightingale } else { 397*bf21cd93STycho Nightingale /* 398*bf21cd93STycho Nightingale * Callers are not allowed to enqueue more than 399*bf21cd93STycho Nightingale * the specified blockif queue limit. Return an 400*bf21cd93STycho Nightingale * error to indicate that the queue length has been 401*bf21cd93STycho Nightingale * exceeded. 402*bf21cd93STycho Nightingale */ 403*bf21cd93STycho Nightingale err = E2BIG; 404*bf21cd93STycho Nightingale } 405*bf21cd93STycho Nightingale pthread_mutex_unlock(&bc->bc_mtx); 406*bf21cd93STycho Nightingale 407*bf21cd93STycho Nightingale return (err); 408*bf21cd93STycho Nightingale } 409*bf21cd93STycho Nightingale 410*bf21cd93STycho Nightingale int 411*bf21cd93STycho Nightingale blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) 412*bf21cd93STycho Nightingale { 413*bf21cd93STycho Nightingale 414*bf21cd93STycho Nightingale assert(bc->bc_magic == BLOCKIF_SIG); 415*bf21cd93STycho Nightingale return (blockif_request(bc, breq, BOP_READ)); 416*bf21cd93STycho Nightingale } 417*bf21cd93STycho Nightingale 418*bf21cd93STycho Nightingale int 419*bf21cd93STycho Nightingale blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) 420*bf21cd93STycho Nightingale { 421*bf21cd93STycho Nightingale 422*bf21cd93STycho Nightingale assert(bc->bc_magic == BLOCKIF_SIG); 423*bf21cd93STycho Nightingale return (blockif_request(bc, breq, BOP_WRITE)); 424*bf21cd93STycho Nightingale } 425*bf21cd93STycho Nightingale 426*bf21cd93STycho Nightingale int 427*bf21cd93STycho Nightingale blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) 428*bf21cd93STycho Nightingale { 429*bf21cd93STycho Nightingale 430*bf21cd93STycho Nightingale assert(bc->bc_magic == BLOCKIF_SIG); 431*bf21cd93STycho Nightingale return (blockif_request(bc, breq, BOP_FLUSH)); 432*bf21cd93STycho Nightingale } 433*bf21cd93STycho Nightingale 434*bf21cd93STycho Nightingale int 435*bf21cd93STycho Nightingale blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) 436*bf21cd93STycho Nightingale { 437*bf21cd93STycho Nightingale struct blockif_elem *be; 438*bf21cd93STycho Nightingale 439*bf21cd93STycho Nightingale assert(bc->bc_magic == BLOCKIF_SIG); 440*bf21cd93STycho Nightingale 441*bf21cd93STycho Nightingale pthread_mutex_lock(&bc->bc_mtx); 442*bf21cd93STycho Nightingale /* 443*bf21cd93STycho Nightingale * Check pending requests. 444*bf21cd93STycho Nightingale */ 445*bf21cd93STycho Nightingale TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { 446*bf21cd93STycho Nightingale if (be->be_req == breq) 447*bf21cd93STycho Nightingale break; 448*bf21cd93STycho Nightingale } 449*bf21cd93STycho Nightingale if (be != NULL) { 450*bf21cd93STycho Nightingale /* 451*bf21cd93STycho Nightingale * Found it. 452*bf21cd93STycho Nightingale */ 453*bf21cd93STycho Nightingale TAILQ_REMOVE(&bc->bc_pendq, be, be_link); 454*bf21cd93STycho Nightingale be->be_status = BST_FREE; 455*bf21cd93STycho Nightingale be->be_req = NULL; 456*bf21cd93STycho Nightingale TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); 457*bf21cd93STycho Nightingale bc->bc_req_count--; 458*bf21cd93STycho Nightingale pthread_mutex_unlock(&bc->bc_mtx); 459*bf21cd93STycho Nightingale 460*bf21cd93STycho Nightingale return (0); 461*bf21cd93STycho Nightingale } 462*bf21cd93STycho Nightingale 463*bf21cd93STycho Nightingale /* 464*bf21cd93STycho Nightingale * Check in-flight requests. 465*bf21cd93STycho Nightingale */ 466*bf21cd93STycho Nightingale TAILQ_FOREACH(be, &bc->bc_busyq, be_link) { 467*bf21cd93STycho Nightingale if (be->be_req == breq) 468*bf21cd93STycho Nightingale break; 469*bf21cd93STycho Nightingale } 470*bf21cd93STycho Nightingale if (be == NULL) { 471*bf21cd93STycho Nightingale /* 472*bf21cd93STycho Nightingale * Didn't find it. 473*bf21cd93STycho Nightingale */ 474*bf21cd93STycho Nightingale pthread_mutex_unlock(&bc->bc_mtx); 475*bf21cd93STycho Nightingale return (EINVAL); 476*bf21cd93STycho Nightingale } 477*bf21cd93STycho Nightingale 478*bf21cd93STycho Nightingale /* 479*bf21cd93STycho Nightingale * Interrupt the processing thread to force it return 480*bf21cd93STycho Nightingale * prematurely via it's normal callback path. 481*bf21cd93STycho Nightingale */ 482*bf21cd93STycho Nightingale while (be->be_status == BST_BUSY) { 483*bf21cd93STycho Nightingale struct blockif_sig_elem bse, *old_head; 484*bf21cd93STycho Nightingale 485*bf21cd93STycho Nightingale pthread_mutex_init(&bse.bse_mtx, NULL); 486*bf21cd93STycho Nightingale pthread_cond_init(&bse.bse_cond, NULL); 487*bf21cd93STycho Nightingale 488*bf21cd93STycho Nightingale bse.bse_pending = 1; 489*bf21cd93STycho Nightingale 490*bf21cd93STycho Nightingale do { 491*bf21cd93STycho Nightingale old_head = blockif_bse_head; 492*bf21cd93STycho Nightingale bse.bse_next = old_head; 493*bf21cd93STycho Nightingale } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, 494*bf21cd93STycho Nightingale (uintptr_t)old_head, 495*bf21cd93STycho Nightingale (uintptr_t)&bse)); 496*bf21cd93STycho Nightingale 497*bf21cd93STycho Nightingale pthread_kill(be->be_tid, SIGCONT); 498*bf21cd93STycho Nightingale 499*bf21cd93STycho Nightingale pthread_mutex_lock(&bse.bse_mtx); 500*bf21cd93STycho Nightingale while (bse.bse_pending) 501*bf21cd93STycho Nightingale pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx); 502*bf21cd93STycho Nightingale pthread_mutex_unlock(&bse.bse_mtx); 503*bf21cd93STycho Nightingale } 504*bf21cd93STycho Nightingale 505*bf21cd93STycho Nightingale pthread_mutex_unlock(&bc->bc_mtx); 506*bf21cd93STycho Nightingale 507*bf21cd93STycho Nightingale /* 508*bf21cd93STycho Nightingale * The processing thread has been interrupted. Since it's not 509*bf21cd93STycho Nightingale * clear if the callback has been invoked yet, return EBUSY. 510*bf21cd93STycho Nightingale */ 511*bf21cd93STycho Nightingale return (EBUSY); 512*bf21cd93STycho Nightingale } 513*bf21cd93STycho Nightingale 514*bf21cd93STycho Nightingale int 515*bf21cd93STycho Nightingale blockif_close(struct blockif_ctxt *bc) 516*bf21cd93STycho Nightingale { 517*bf21cd93STycho Nightingale void *jval; 518*bf21cd93STycho Nightingale int err; 519*bf21cd93STycho Nightingale 520*bf21cd93STycho Nightingale err = 0; 521*bf21cd93STycho Nightingale 522*bf21cd93STycho Nightingale assert(bc->bc_magic == BLOCKIF_SIG); 523*bf21cd93STycho Nightingale 524*bf21cd93STycho Nightingale /* 525*bf21cd93STycho Nightingale * Stop the block i/o thread 526*bf21cd93STycho Nightingale */ 527*bf21cd93STycho Nightingale bc->bc_closing = 1; 528*bf21cd93STycho Nightingale pthread_cond_signal(&bc->bc_cond); 529*bf21cd93STycho Nightingale pthread_join(bc->bc_btid, &jval); 530*bf21cd93STycho Nightingale 531*bf21cd93STycho Nightingale /* XXX Cancel queued i/o's ??? */ 532*bf21cd93STycho Nightingale 533*bf21cd93STycho Nightingale /* 534*bf21cd93STycho Nightingale * Release resources 535*bf21cd93STycho Nightingale */ 536*bf21cd93STycho Nightingale bc->bc_magic = 0; 537*bf21cd93STycho Nightingale close(bc->bc_fd); 538*bf21cd93STycho Nightingale free(bc); 539*bf21cd93STycho Nightingale 540*bf21cd93STycho Nightingale return (0); 541*bf21cd93STycho Nightingale } 542*bf21cd93STycho Nightingale 543*bf21cd93STycho Nightingale /* 544*bf21cd93STycho Nightingale * Return virtual C/H/S values for a given block. Use the algorithm 545*bf21cd93STycho Nightingale * outlined in the VHD specification to calculate values. 546*bf21cd93STycho Nightingale */ 547*bf21cd93STycho Nightingale void 548*bf21cd93STycho Nightingale blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s) 549*bf21cd93STycho Nightingale { 550*bf21cd93STycho Nightingale off_t sectors; /* total sectors of the block dev */ 551*bf21cd93STycho Nightingale off_t hcyl; /* cylinders times heads */ 552*bf21cd93STycho Nightingale uint16_t secpt; /* sectors per track */ 553*bf21cd93STycho Nightingale uint8_t heads; 554*bf21cd93STycho Nightingale 555*bf21cd93STycho Nightingale assert(bc->bc_magic == BLOCKIF_SIG); 556*bf21cd93STycho Nightingale 557*bf21cd93STycho Nightingale sectors = bc->bc_size / bc->bc_sectsz; 558*bf21cd93STycho Nightingale 559*bf21cd93STycho Nightingale /* Clamp the size to the largest possible with CHS */ 560*bf21cd93STycho Nightingale if (sectors > 65535UL*16*255) 561*bf21cd93STycho Nightingale sectors = 65535UL*16*255; 562*bf21cd93STycho Nightingale 563*bf21cd93STycho Nightingale if (sectors >= 65536UL*16*63) { 564*bf21cd93STycho Nightingale secpt = 255; 565*bf21cd93STycho Nightingale heads = 16; 566*bf21cd93STycho Nightingale hcyl = sectors / secpt; 567*bf21cd93STycho Nightingale } else { 568*bf21cd93STycho Nightingale secpt = 17; 569*bf21cd93STycho Nightingale hcyl = sectors / secpt; 570*bf21cd93STycho Nightingale heads = (hcyl + 1023) / 1024; 571*bf21cd93STycho Nightingale 572*bf21cd93STycho Nightingale if (heads < 4) 573*bf21cd93STycho Nightingale heads = 4; 574*bf21cd93STycho Nightingale 575*bf21cd93STycho Nightingale if (hcyl >= (heads * 1024) || heads > 16) { 576*bf21cd93STycho Nightingale secpt = 31; 577*bf21cd93STycho Nightingale heads = 16; 578*bf21cd93STycho Nightingale hcyl = sectors / secpt; 579*bf21cd93STycho Nightingale } 580*bf21cd93STycho Nightingale if (hcyl >= (heads * 1024)) { 581*bf21cd93STycho Nightingale secpt = 63; 582*bf21cd93STycho Nightingale heads = 16; 583*bf21cd93STycho Nightingale hcyl = sectors / secpt; 584*bf21cd93STycho Nightingale } 585*bf21cd93STycho Nightingale } 586*bf21cd93STycho Nightingale 587*bf21cd93STycho Nightingale *c = hcyl / heads; 588*bf21cd93STycho Nightingale *h = heads; 589*bf21cd93STycho Nightingale *s = secpt; 590*bf21cd93STycho Nightingale } 591*bf21cd93STycho Nightingale 592*bf21cd93STycho Nightingale /* 593*bf21cd93STycho Nightingale * Accessors 594*bf21cd93STycho Nightingale */ 595*bf21cd93STycho Nightingale off_t 596*bf21cd93STycho Nightingale blockif_size(struct blockif_ctxt *bc) 597*bf21cd93STycho Nightingale { 598*bf21cd93STycho Nightingale 599*bf21cd93STycho Nightingale assert(bc->bc_magic == BLOCKIF_SIG); 600*bf21cd93STycho Nightingale return (bc->bc_size); 601*bf21cd93STycho Nightingale } 602*bf21cd93STycho Nightingale 603*bf21cd93STycho Nightingale int 604*bf21cd93STycho Nightingale blockif_sectsz(struct blockif_ctxt *bc) 605*bf21cd93STycho Nightingale { 606*bf21cd93STycho Nightingale 607*bf21cd93STycho Nightingale assert(bc->bc_magic == BLOCKIF_SIG); 608*bf21cd93STycho Nightingale return (bc->bc_sectsz); 609*bf21cd93STycho Nightingale } 610*bf21cd93STycho Nightingale 611*bf21cd93STycho Nightingale int 612*bf21cd93STycho Nightingale blockif_queuesz(struct blockif_ctxt *bc) 613*bf21cd93STycho Nightingale { 614*bf21cd93STycho Nightingale 615*bf21cd93STycho Nightingale assert(bc->bc_magic == BLOCKIF_SIG); 616*bf21cd93STycho Nightingale return (BLOCKIF_MAXREQ - 1); 617*bf21cd93STycho Nightingale } 618*bf21cd93STycho Nightingale 619*bf21cd93STycho Nightingale int 620*bf21cd93STycho Nightingale blockif_is_ro(struct blockif_ctxt *bc) 621*bf21cd93STycho Nightingale { 622*bf21cd93STycho Nightingale 623*bf21cd93STycho Nightingale assert(bc->bc_magic == BLOCKIF_SIG); 624*bf21cd93STycho Nightingale return (bc->bc_rdonly); 625*bf21cd93STycho Nightingale } 626