xref: /illumos-gate/usr/src/cmd/bhyve/block_if.c (revision bf21cd93)
1*bf21cd93STycho Nightingale /*-
2*bf21cd93STycho Nightingale  * Copyright (c) 2013  Peter Grehan <grehan@freebsd.org>
3*bf21cd93STycho Nightingale  * All rights reserved.
4*bf21cd93STycho Nightingale  *
5*bf21cd93STycho Nightingale  * Redistribution and use in source and binary forms, with or without
6*bf21cd93STycho Nightingale  * modification, are permitted provided that the following conditions
7*bf21cd93STycho Nightingale  * are met:
8*bf21cd93STycho Nightingale  * 1. Redistributions of source code must retain the above copyright
9*bf21cd93STycho Nightingale  *    notice, this list of conditions and the following disclaimer.
10*bf21cd93STycho Nightingale  * 2. Redistributions in binary form must reproduce the above copyright
11*bf21cd93STycho Nightingale  *    notice, this list of conditions and the following disclaimer in the
12*bf21cd93STycho Nightingale  *    documentation and/or other materials provided with the distribution.
13*bf21cd93STycho Nightingale  *
14*bf21cd93STycho Nightingale  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
15*bf21cd93STycho Nightingale  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16*bf21cd93STycho Nightingale  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17*bf21cd93STycho Nightingale  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18*bf21cd93STycho Nightingale  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19*bf21cd93STycho Nightingale  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20*bf21cd93STycho Nightingale  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21*bf21cd93STycho Nightingale  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22*bf21cd93STycho Nightingale  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23*bf21cd93STycho Nightingale  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24*bf21cd93STycho Nightingale  * SUCH DAMAGE.
25*bf21cd93STycho Nightingale  *
26*bf21cd93STycho Nightingale  * $FreeBSD: head/usr.sbin/bhyve/block_if.c 274330 2014-11-09 21:08:52Z tychon $
27*bf21cd93STycho Nightingale  */
28*bf21cd93STycho Nightingale 
29*bf21cd93STycho Nightingale #include <sys/cdefs.h>
30*bf21cd93STycho Nightingale __FBSDID("$FreeBSD: head/usr.sbin/bhyve/block_if.c 274330 2014-11-09 21:08:52Z tychon $");
31*bf21cd93STycho Nightingale 
32*bf21cd93STycho Nightingale #include <sys/param.h>
33*bf21cd93STycho Nightingale #include <sys/queue.h>
34*bf21cd93STycho Nightingale #include <sys/errno.h>
35*bf21cd93STycho Nightingale #include <sys/stat.h>
36*bf21cd93STycho Nightingale #include <sys/ioctl.h>
37*bf21cd93STycho Nightingale #include <sys/disk.h>
38*bf21cd93STycho Nightingale 
39*bf21cd93STycho Nightingale #include <assert.h>
40*bf21cd93STycho Nightingale #include <fcntl.h>
41*bf21cd93STycho Nightingale #include <stdio.h>
42*bf21cd93STycho Nightingale #include <stdlib.h>
43*bf21cd93STycho Nightingale #include <string.h>
44*bf21cd93STycho Nightingale #include <pthread.h>
45*bf21cd93STycho Nightingale #include <pthread_np.h>
46*bf21cd93STycho Nightingale #include <signal.h>
47*bf21cd93STycho Nightingale #include <unistd.h>
48*bf21cd93STycho Nightingale 
49*bf21cd93STycho Nightingale #include <machine/atomic.h>
50*bf21cd93STycho Nightingale 
51*bf21cd93STycho Nightingale #include "bhyverun.h"
52*bf21cd93STycho Nightingale #ifdef	__FreeBSD__
53*bf21cd93STycho Nightingale #include "mevent.h"
54*bf21cd93STycho Nightingale #endif
55*bf21cd93STycho Nightingale #include "block_if.h"
56*bf21cd93STycho Nightingale 
57*bf21cd93STycho Nightingale #define BLOCKIF_SIG	0xb109b109
58*bf21cd93STycho Nightingale 
59*bf21cd93STycho Nightingale #define BLOCKIF_MAXREQ	33
60*bf21cd93STycho Nightingale 
61*bf21cd93STycho Nightingale enum blockop {
62*bf21cd93STycho Nightingale 	BOP_READ,
63*bf21cd93STycho Nightingale 	BOP_WRITE,
64*bf21cd93STycho Nightingale 	BOP_FLUSH
65*bf21cd93STycho Nightingale };
66*bf21cd93STycho Nightingale 
67*bf21cd93STycho Nightingale enum blockstat {
68*bf21cd93STycho Nightingale 	BST_FREE,
69*bf21cd93STycho Nightingale 	BST_PEND,
70*bf21cd93STycho Nightingale 	BST_BUSY,
71*bf21cd93STycho Nightingale 	BST_DONE
72*bf21cd93STycho Nightingale };
73*bf21cd93STycho Nightingale 
74*bf21cd93STycho Nightingale struct blockif_elem {
75*bf21cd93STycho Nightingale 	TAILQ_ENTRY(blockif_elem) be_link;
76*bf21cd93STycho Nightingale 	struct blockif_req  *be_req;
77*bf21cd93STycho Nightingale 	enum blockop	     be_op;
78*bf21cd93STycho Nightingale 	enum blockstat	     be_status;
79*bf21cd93STycho Nightingale 	pthread_t            be_tid;
80*bf21cd93STycho Nightingale };
81*bf21cd93STycho Nightingale 
82*bf21cd93STycho Nightingale struct blockif_ctxt {
83*bf21cd93STycho Nightingale 	int			bc_magic;
84*bf21cd93STycho Nightingale 	int			bc_fd;
85*bf21cd93STycho Nightingale 	int			bc_rdonly;
86*bf21cd93STycho Nightingale 	off_t			bc_size;
87*bf21cd93STycho Nightingale 	int			bc_sectsz;
88*bf21cd93STycho Nightingale 	pthread_t		bc_btid;
89*bf21cd93STycho Nightingale         pthread_mutex_t		bc_mtx;
90*bf21cd93STycho Nightingale         pthread_cond_t		bc_cond;
91*bf21cd93STycho Nightingale 	int			bc_closing;
92*bf21cd93STycho Nightingale 
93*bf21cd93STycho Nightingale 	/* Request elements and free/pending/busy queues */
94*bf21cd93STycho Nightingale 	TAILQ_HEAD(, blockif_elem) bc_freeq;
95*bf21cd93STycho Nightingale 	TAILQ_HEAD(, blockif_elem) bc_pendq;
96*bf21cd93STycho Nightingale 	TAILQ_HEAD(, blockif_elem) bc_busyq;
97*bf21cd93STycho Nightingale 	u_int			bc_req_count;
98*bf21cd93STycho Nightingale 	struct blockif_elem	bc_reqs[BLOCKIF_MAXREQ];
99*bf21cd93STycho Nightingale };
100*bf21cd93STycho Nightingale 
101*bf21cd93STycho Nightingale static pthread_once_t blockif_once = PTHREAD_ONCE_INIT;
102*bf21cd93STycho Nightingale 
103*bf21cd93STycho Nightingale struct blockif_sig_elem {
104*bf21cd93STycho Nightingale 	pthread_mutex_t			bse_mtx;
105*bf21cd93STycho Nightingale 	pthread_cond_t			bse_cond;
106*bf21cd93STycho Nightingale 	int				bse_pending;
107*bf21cd93STycho Nightingale 	struct blockif_sig_elem		*bse_next;
108*bf21cd93STycho Nightingale };
109*bf21cd93STycho Nightingale 
110*bf21cd93STycho Nightingale static struct blockif_sig_elem *blockif_bse_head;
111*bf21cd93STycho Nightingale 
112*bf21cd93STycho Nightingale static int
113*bf21cd93STycho Nightingale blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq,
114*bf21cd93STycho Nightingale 		enum blockop op)
115*bf21cd93STycho Nightingale {
116*bf21cd93STycho Nightingale 	struct blockif_elem *be;
117*bf21cd93STycho Nightingale 
118*bf21cd93STycho Nightingale 	assert(bc->bc_req_count < BLOCKIF_MAXREQ);
119*bf21cd93STycho Nightingale 
120*bf21cd93STycho Nightingale 	be = TAILQ_FIRST(&bc->bc_freeq);
121*bf21cd93STycho Nightingale 	assert(be != NULL);
122*bf21cd93STycho Nightingale 	assert(be->be_status == BST_FREE);
123*bf21cd93STycho Nightingale 
124*bf21cd93STycho Nightingale 	TAILQ_REMOVE(&bc->bc_freeq, be, be_link);
125*bf21cd93STycho Nightingale 	be->be_status = BST_PEND;
126*bf21cd93STycho Nightingale 	be->be_req = breq;
127*bf21cd93STycho Nightingale 	be->be_op = op;
128*bf21cd93STycho Nightingale 	TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link);
129*bf21cd93STycho Nightingale 
130*bf21cd93STycho Nightingale 	bc->bc_req_count++;
131*bf21cd93STycho Nightingale 
132*bf21cd93STycho Nightingale 	return (0);
133*bf21cd93STycho Nightingale }
134*bf21cd93STycho Nightingale 
135*bf21cd93STycho Nightingale static int
136*bf21cd93STycho Nightingale blockif_dequeue(struct blockif_ctxt *bc, struct blockif_elem **bep)
137*bf21cd93STycho Nightingale {
138*bf21cd93STycho Nightingale 	struct blockif_elem *be;
139*bf21cd93STycho Nightingale 
140*bf21cd93STycho Nightingale 	if (bc->bc_req_count == 0)
141*bf21cd93STycho Nightingale 		return (ENOENT);
142*bf21cd93STycho Nightingale 
143*bf21cd93STycho Nightingale 	be = TAILQ_FIRST(&bc->bc_pendq);
144*bf21cd93STycho Nightingale 	assert(be != NULL);
145*bf21cd93STycho Nightingale 	assert(be->be_status == BST_PEND);
146*bf21cd93STycho Nightingale 	TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
147*bf21cd93STycho Nightingale 	be->be_status = BST_BUSY;
148*bf21cd93STycho Nightingale 	be->be_tid = bc->bc_btid;
149*bf21cd93STycho Nightingale 	TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link);
150*bf21cd93STycho Nightingale 
151*bf21cd93STycho Nightingale 	*bep = be;
152*bf21cd93STycho Nightingale 
153*bf21cd93STycho Nightingale 	return (0);
154*bf21cd93STycho Nightingale }
155*bf21cd93STycho Nightingale 
156*bf21cd93STycho Nightingale static void
157*bf21cd93STycho Nightingale blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be)
158*bf21cd93STycho Nightingale {
159*bf21cd93STycho Nightingale 	assert(be->be_status == BST_DONE);
160*bf21cd93STycho Nightingale 
161*bf21cd93STycho Nightingale 	TAILQ_REMOVE(&bc->bc_busyq, be, be_link);
162*bf21cd93STycho Nightingale 	be->be_tid = 0;
163*bf21cd93STycho Nightingale 	be->be_status = BST_FREE;
164*bf21cd93STycho Nightingale 	be->be_req = NULL;
165*bf21cd93STycho Nightingale 	TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
166*bf21cd93STycho Nightingale 
167*bf21cd93STycho Nightingale 	bc->bc_req_count--;
168*bf21cd93STycho Nightingale }
169*bf21cd93STycho Nightingale 
170*bf21cd93STycho Nightingale static void
171*bf21cd93STycho Nightingale blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be)
172*bf21cd93STycho Nightingale {
173*bf21cd93STycho Nightingale 	struct blockif_req *br;
174*bf21cd93STycho Nightingale 	int err;
175*bf21cd93STycho Nightingale 
176*bf21cd93STycho Nightingale 	br = be->be_req;
177*bf21cd93STycho Nightingale 	err = 0;
178*bf21cd93STycho Nightingale 
179*bf21cd93STycho Nightingale 	switch (be->be_op) {
180*bf21cd93STycho Nightingale 	case BOP_READ:
181*bf21cd93STycho Nightingale 		if (preadv(bc->bc_fd, br->br_iov, br->br_iovcnt,
182*bf21cd93STycho Nightingale 			   br->br_offset) < 0)
183*bf21cd93STycho Nightingale 			err = errno;
184*bf21cd93STycho Nightingale 		break;
185*bf21cd93STycho Nightingale 	case BOP_WRITE:
186*bf21cd93STycho Nightingale 		if (bc->bc_rdonly)
187*bf21cd93STycho Nightingale 			err = EROFS;
188*bf21cd93STycho Nightingale 		else if (pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt,
189*bf21cd93STycho Nightingale 			     br->br_offset) < 0)
190*bf21cd93STycho Nightingale 			err = errno;
191*bf21cd93STycho Nightingale 		break;
192*bf21cd93STycho Nightingale 	case BOP_FLUSH:
193*bf21cd93STycho Nightingale 		break;
194*bf21cd93STycho Nightingale 	default:
195*bf21cd93STycho Nightingale 		err = EINVAL;
196*bf21cd93STycho Nightingale 		break;
197*bf21cd93STycho Nightingale 	}
198*bf21cd93STycho Nightingale 
199*bf21cd93STycho Nightingale 	be->be_status = BST_DONE;
200*bf21cd93STycho Nightingale 
201*bf21cd93STycho Nightingale 	(*br->br_callback)(br, err);
202*bf21cd93STycho Nightingale }
203*bf21cd93STycho Nightingale 
204*bf21cd93STycho Nightingale static void *
205*bf21cd93STycho Nightingale blockif_thr(void *arg)
206*bf21cd93STycho Nightingale {
207*bf21cd93STycho Nightingale 	struct blockif_ctxt *bc;
208*bf21cd93STycho Nightingale 	struct blockif_elem *be;
209*bf21cd93STycho Nightingale 
210*bf21cd93STycho Nightingale 	bc = arg;
211*bf21cd93STycho Nightingale 
212*bf21cd93STycho Nightingale 	for (;;) {
213*bf21cd93STycho Nightingale 		pthread_mutex_lock(&bc->bc_mtx);
214*bf21cd93STycho Nightingale 		while (!blockif_dequeue(bc, &be)) {
215*bf21cd93STycho Nightingale 			pthread_mutex_unlock(&bc->bc_mtx);
216*bf21cd93STycho Nightingale 			blockif_proc(bc, be);
217*bf21cd93STycho Nightingale 			pthread_mutex_lock(&bc->bc_mtx);
218*bf21cd93STycho Nightingale 			blockif_complete(bc, be);
219*bf21cd93STycho Nightingale 		}
220*bf21cd93STycho Nightingale 		pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
221*bf21cd93STycho Nightingale 		pthread_mutex_unlock(&bc->bc_mtx);
222*bf21cd93STycho Nightingale 
223*bf21cd93STycho Nightingale 		/*
224*bf21cd93STycho Nightingale 		 * Check ctxt status here to see if exit requested
225*bf21cd93STycho Nightingale 		 */
226*bf21cd93STycho Nightingale 		if (bc->bc_closing)
227*bf21cd93STycho Nightingale 			pthread_exit(NULL);
228*bf21cd93STycho Nightingale 	}
229*bf21cd93STycho Nightingale 
230*bf21cd93STycho Nightingale 	/* Not reached */
231*bf21cd93STycho Nightingale 	return (NULL);
232*bf21cd93STycho Nightingale }
233*bf21cd93STycho Nightingale 
234*bf21cd93STycho Nightingale #ifdef	__FreeBSD__
235*bf21cd93STycho Nightingale static void
236*bf21cd93STycho Nightingale blockif_sigcont_handler(int signal, enum ev_type type, void *arg)
237*bf21cd93STycho Nightingale #else
238*bf21cd93STycho Nightingale static void
239*bf21cd93STycho Nightingale blockif_sigcont_handler(int signal)
240*bf21cd93STycho Nightingale #endif
241*bf21cd93STycho Nightingale {
242*bf21cd93STycho Nightingale 	struct blockif_sig_elem *bse;
243*bf21cd93STycho Nightingale 
244*bf21cd93STycho Nightingale 	for (;;) {
245*bf21cd93STycho Nightingale 		/*
246*bf21cd93STycho Nightingale 		 * Process the entire list even if not intended for
247*bf21cd93STycho Nightingale 		 * this thread.
248*bf21cd93STycho Nightingale 		 */
249*bf21cd93STycho Nightingale 		do {
250*bf21cd93STycho Nightingale 			bse = blockif_bse_head;
251*bf21cd93STycho Nightingale 			if (bse == NULL)
252*bf21cd93STycho Nightingale 				return;
253*bf21cd93STycho Nightingale 		} while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head,
254*bf21cd93STycho Nightingale 					    (uintptr_t)bse,
255*bf21cd93STycho Nightingale 					    (uintptr_t)bse->bse_next));
256*bf21cd93STycho Nightingale 
257*bf21cd93STycho Nightingale 		pthread_mutex_lock(&bse->bse_mtx);
258*bf21cd93STycho Nightingale 		bse->bse_pending = 0;
259*bf21cd93STycho Nightingale 		pthread_cond_signal(&bse->bse_cond);
260*bf21cd93STycho Nightingale 		pthread_mutex_unlock(&bse->bse_mtx);
261*bf21cd93STycho Nightingale 	}
262*bf21cd93STycho Nightingale }
263*bf21cd93STycho Nightingale 
264*bf21cd93STycho Nightingale static void
265*bf21cd93STycho Nightingale blockif_init(void)
266*bf21cd93STycho Nightingale {
267*bf21cd93STycho Nightingale #ifdef	__FreeBSD__
268*bf21cd93STycho Nightingale 	mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL);
269*bf21cd93STycho Nightingale 	(void) signal(SIGCONT, SIG_IGN);
270*bf21cd93STycho Nightingale #else
271*bf21cd93STycho Nightingale 	(void) sigset(SIGCONT, blockif_sigcont_handler);
272*bf21cd93STycho Nightingale #endif
273*bf21cd93STycho Nightingale }
274*bf21cd93STycho Nightingale 
275*bf21cd93STycho Nightingale struct blockif_ctxt *
276*bf21cd93STycho Nightingale blockif_open(const char *optstr, const char *ident)
277*bf21cd93STycho Nightingale {
278*bf21cd93STycho Nightingale 	char tname[MAXCOMLEN + 1];
279*bf21cd93STycho Nightingale 	char *nopt, *xopts;
280*bf21cd93STycho Nightingale 	struct blockif_ctxt *bc;
281*bf21cd93STycho Nightingale 	struct stat sbuf;
282*bf21cd93STycho Nightingale 	off_t size;
283*bf21cd93STycho Nightingale 	int extra, fd, i, sectsz;
284*bf21cd93STycho Nightingale 	int nocache, sync, ro;
285*bf21cd93STycho Nightingale 
286*bf21cd93STycho Nightingale 	pthread_once(&blockif_once, blockif_init);
287*bf21cd93STycho Nightingale 
288*bf21cd93STycho Nightingale 	nocache = 0;
289*bf21cd93STycho Nightingale 	sync = 0;
290*bf21cd93STycho Nightingale 	ro = 0;
291*bf21cd93STycho Nightingale 
292*bf21cd93STycho Nightingale 	/*
293*bf21cd93STycho Nightingale 	 * The first element in the optstring is always a pathname.
294*bf21cd93STycho Nightingale 	 * Optional elements follow
295*bf21cd93STycho Nightingale 	 */
296*bf21cd93STycho Nightingale 	nopt = strdup(optstr);
297*bf21cd93STycho Nightingale 	for (xopts = strtok(nopt, ",");
298*bf21cd93STycho Nightingale 	     xopts != NULL;
299*bf21cd93STycho Nightingale 	     xopts = strtok(NULL, ",")) {
300*bf21cd93STycho Nightingale 		if (!strcmp(xopts, "nocache"))
301*bf21cd93STycho Nightingale 			nocache = 1;
302*bf21cd93STycho Nightingale 		else if (!strcmp(xopts, "sync"))
303*bf21cd93STycho Nightingale 			sync = 1;
304*bf21cd93STycho Nightingale 		else if (!strcmp(xopts, "ro"))
305*bf21cd93STycho Nightingale 			ro = 1;
306*bf21cd93STycho Nightingale 	}
307*bf21cd93STycho Nightingale 
308*bf21cd93STycho Nightingale 	extra = 0;
309*bf21cd93STycho Nightingale 	if (nocache)
310*bf21cd93STycho Nightingale 		extra |= O_DIRECT;
311*bf21cd93STycho Nightingale 	if (sync)
312*bf21cd93STycho Nightingale 		extra |= O_SYNC;
313*bf21cd93STycho Nightingale 
314*bf21cd93STycho Nightingale 	fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra);
315*bf21cd93STycho Nightingale 	if (fd < 0 && !ro) {
316*bf21cd93STycho Nightingale 		/* Attempt a r/w fail with a r/o open */
317*bf21cd93STycho Nightingale 		fd = open(nopt, O_RDONLY | extra);
318*bf21cd93STycho Nightingale 		ro = 1;
319*bf21cd93STycho Nightingale 	}
320*bf21cd93STycho Nightingale 
321*bf21cd93STycho Nightingale 	if (fd < 0) {
322*bf21cd93STycho Nightingale 		perror("Could not open backing file");
323*bf21cd93STycho Nightingale 		return (NULL);
324*bf21cd93STycho Nightingale 	}
325*bf21cd93STycho Nightingale 
326*bf21cd93STycho Nightingale         if (fstat(fd, &sbuf) < 0) {
327*bf21cd93STycho Nightingale                 perror("Could not stat backing file");
328*bf21cd93STycho Nightingale                 close(fd);
329*bf21cd93STycho Nightingale                 return (NULL);
330*bf21cd93STycho Nightingale         }
331*bf21cd93STycho Nightingale 
332*bf21cd93STycho Nightingale         /*
333*bf21cd93STycho Nightingale 	 * Deal with raw devices
334*bf21cd93STycho Nightingale 	 */
335*bf21cd93STycho Nightingale         size = sbuf.st_size;
336*bf21cd93STycho Nightingale 	sectsz = DEV_BSIZE;
337*bf21cd93STycho Nightingale #ifdef	__FreeBSD__
338*bf21cd93STycho Nightingale 	if (S_ISCHR(sbuf.st_mode)) {
339*bf21cd93STycho Nightingale 		if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
340*bf21cd93STycho Nightingale 		    ioctl(fd, DIOCGSECTORSIZE, &sectsz)) {
341*bf21cd93STycho Nightingale 			perror("Could not fetch dev blk/sector size");
342*bf21cd93STycho Nightingale 			close(fd);
343*bf21cd93STycho Nightingale 			return (NULL);
344*bf21cd93STycho Nightingale 		}
345*bf21cd93STycho Nightingale 		assert(size != 0);
346*bf21cd93STycho Nightingale 		assert(sectsz != 0);
347*bf21cd93STycho Nightingale 	}
348*bf21cd93STycho Nightingale #endif
349*bf21cd93STycho Nightingale 
350*bf21cd93STycho Nightingale 	bc = calloc(1, sizeof(struct blockif_ctxt));
351*bf21cd93STycho Nightingale 	if (bc == NULL) {
352*bf21cd93STycho Nightingale 		close(fd);
353*bf21cd93STycho Nightingale 		return (NULL);
354*bf21cd93STycho Nightingale 	}
355*bf21cd93STycho Nightingale 
356*bf21cd93STycho Nightingale 	bc->bc_magic = BLOCKIF_SIG;
357*bf21cd93STycho Nightingale 	bc->bc_fd = fd;
358*bf21cd93STycho Nightingale 	bc->bc_rdonly = ro;
359*bf21cd93STycho Nightingale 	bc->bc_size = size;
360*bf21cd93STycho Nightingale 	bc->bc_sectsz = sectsz;
361*bf21cd93STycho Nightingale 	pthread_mutex_init(&bc->bc_mtx, NULL);
362*bf21cd93STycho Nightingale 	pthread_cond_init(&bc->bc_cond, NULL);
363*bf21cd93STycho Nightingale 	TAILQ_INIT(&bc->bc_freeq);
364*bf21cd93STycho Nightingale 	TAILQ_INIT(&bc->bc_pendq);
365*bf21cd93STycho Nightingale 	TAILQ_INIT(&bc->bc_busyq);
366*bf21cd93STycho Nightingale 	bc->bc_req_count = 0;
367*bf21cd93STycho Nightingale 	for (i = 0; i < BLOCKIF_MAXREQ; i++) {
368*bf21cd93STycho Nightingale 		bc->bc_reqs[i].be_status = BST_FREE;
369*bf21cd93STycho Nightingale 		TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link);
370*bf21cd93STycho Nightingale 	}
371*bf21cd93STycho Nightingale 
372*bf21cd93STycho Nightingale 	pthread_create(&bc->bc_btid, NULL, blockif_thr, bc);
373*bf21cd93STycho Nightingale 
374*bf21cd93STycho Nightingale 	snprintf(tname, sizeof(tname), "blk-%s", ident);
375*bf21cd93STycho Nightingale 	pthread_set_name_np(bc->bc_btid, tname);
376*bf21cd93STycho Nightingale 
377*bf21cd93STycho Nightingale 	return (bc);
378*bf21cd93STycho Nightingale }
379*bf21cd93STycho Nightingale 
380*bf21cd93STycho Nightingale static int
381*bf21cd93STycho Nightingale blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq,
382*bf21cd93STycho Nightingale 		enum blockop op)
383*bf21cd93STycho Nightingale {
384*bf21cd93STycho Nightingale 	int err;
385*bf21cd93STycho Nightingale 
386*bf21cd93STycho Nightingale 	err = 0;
387*bf21cd93STycho Nightingale 
388*bf21cd93STycho Nightingale 	pthread_mutex_lock(&bc->bc_mtx);
389*bf21cd93STycho Nightingale 	if (bc->bc_req_count < BLOCKIF_MAXREQ) {
390*bf21cd93STycho Nightingale 		/*
391*bf21cd93STycho Nightingale 		 * Enqueue and inform the block i/o thread
392*bf21cd93STycho Nightingale 		 * that there is work available
393*bf21cd93STycho Nightingale 		 */
394*bf21cd93STycho Nightingale 		blockif_enqueue(bc, breq, op);
395*bf21cd93STycho Nightingale 		pthread_cond_signal(&bc->bc_cond);
396*bf21cd93STycho Nightingale 	} else {
397*bf21cd93STycho Nightingale 		/*
398*bf21cd93STycho Nightingale 		 * Callers are not allowed to enqueue more than
399*bf21cd93STycho Nightingale 		 * the specified blockif queue limit. Return an
400*bf21cd93STycho Nightingale 		 * error to indicate that the queue length has been
401*bf21cd93STycho Nightingale 		 * exceeded.
402*bf21cd93STycho Nightingale 		 */
403*bf21cd93STycho Nightingale 		err = E2BIG;
404*bf21cd93STycho Nightingale 	}
405*bf21cd93STycho Nightingale 	pthread_mutex_unlock(&bc->bc_mtx);
406*bf21cd93STycho Nightingale 
407*bf21cd93STycho Nightingale 	return (err);
408*bf21cd93STycho Nightingale }
409*bf21cd93STycho Nightingale 
410*bf21cd93STycho Nightingale int
411*bf21cd93STycho Nightingale blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq)
412*bf21cd93STycho Nightingale {
413*bf21cd93STycho Nightingale 
414*bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
415*bf21cd93STycho Nightingale 	return (blockif_request(bc, breq, BOP_READ));
416*bf21cd93STycho Nightingale }
417*bf21cd93STycho Nightingale 
418*bf21cd93STycho Nightingale int
419*bf21cd93STycho Nightingale blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq)
420*bf21cd93STycho Nightingale {
421*bf21cd93STycho Nightingale 
422*bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
423*bf21cd93STycho Nightingale 	return (blockif_request(bc, breq, BOP_WRITE));
424*bf21cd93STycho Nightingale }
425*bf21cd93STycho Nightingale 
426*bf21cd93STycho Nightingale int
427*bf21cd93STycho Nightingale blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq)
428*bf21cd93STycho Nightingale {
429*bf21cd93STycho Nightingale 
430*bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
431*bf21cd93STycho Nightingale 	return (blockif_request(bc, breq, BOP_FLUSH));
432*bf21cd93STycho Nightingale }
433*bf21cd93STycho Nightingale 
434*bf21cd93STycho Nightingale int
435*bf21cd93STycho Nightingale blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq)
436*bf21cd93STycho Nightingale {
437*bf21cd93STycho Nightingale 	struct blockif_elem *be;
438*bf21cd93STycho Nightingale 
439*bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
440*bf21cd93STycho Nightingale 
441*bf21cd93STycho Nightingale 	pthread_mutex_lock(&bc->bc_mtx);
442*bf21cd93STycho Nightingale 	/*
443*bf21cd93STycho Nightingale 	 * Check pending requests.
444*bf21cd93STycho Nightingale 	 */
445*bf21cd93STycho Nightingale 	TAILQ_FOREACH(be, &bc->bc_pendq, be_link) {
446*bf21cd93STycho Nightingale 		if (be->be_req == breq)
447*bf21cd93STycho Nightingale 			break;
448*bf21cd93STycho Nightingale 	}
449*bf21cd93STycho Nightingale 	if (be != NULL) {
450*bf21cd93STycho Nightingale 		/*
451*bf21cd93STycho Nightingale 		 * Found it.
452*bf21cd93STycho Nightingale 		 */
453*bf21cd93STycho Nightingale 		TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
454*bf21cd93STycho Nightingale 		be->be_status = BST_FREE;
455*bf21cd93STycho Nightingale 		be->be_req = NULL;
456*bf21cd93STycho Nightingale 		TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
457*bf21cd93STycho Nightingale 		bc->bc_req_count--;
458*bf21cd93STycho Nightingale 		pthread_mutex_unlock(&bc->bc_mtx);
459*bf21cd93STycho Nightingale 
460*bf21cd93STycho Nightingale 		return (0);
461*bf21cd93STycho Nightingale 	}
462*bf21cd93STycho Nightingale 
463*bf21cd93STycho Nightingale 	/*
464*bf21cd93STycho Nightingale 	 * Check in-flight requests.
465*bf21cd93STycho Nightingale 	 */
466*bf21cd93STycho Nightingale 	TAILQ_FOREACH(be, &bc->bc_busyq, be_link) {
467*bf21cd93STycho Nightingale 		if (be->be_req == breq)
468*bf21cd93STycho Nightingale 			break;
469*bf21cd93STycho Nightingale 	}
470*bf21cd93STycho Nightingale 	if (be == NULL) {
471*bf21cd93STycho Nightingale 		/*
472*bf21cd93STycho Nightingale 		 * Didn't find it.
473*bf21cd93STycho Nightingale 		 */
474*bf21cd93STycho Nightingale 		pthread_mutex_unlock(&bc->bc_mtx);
475*bf21cd93STycho Nightingale 		return (EINVAL);
476*bf21cd93STycho Nightingale 	}
477*bf21cd93STycho Nightingale 
478*bf21cd93STycho Nightingale 	/*
479*bf21cd93STycho Nightingale 	 * Interrupt the processing thread to force it return
480*bf21cd93STycho Nightingale 	 * prematurely via it's normal callback path.
481*bf21cd93STycho Nightingale 	 */
482*bf21cd93STycho Nightingale 	while (be->be_status == BST_BUSY) {
483*bf21cd93STycho Nightingale 		struct blockif_sig_elem bse, *old_head;
484*bf21cd93STycho Nightingale 
485*bf21cd93STycho Nightingale 		pthread_mutex_init(&bse.bse_mtx, NULL);
486*bf21cd93STycho Nightingale 		pthread_cond_init(&bse.bse_cond, NULL);
487*bf21cd93STycho Nightingale 
488*bf21cd93STycho Nightingale 		bse.bse_pending = 1;
489*bf21cd93STycho Nightingale 
490*bf21cd93STycho Nightingale 		do {
491*bf21cd93STycho Nightingale 			old_head = blockif_bse_head;
492*bf21cd93STycho Nightingale 			bse.bse_next = old_head;
493*bf21cd93STycho Nightingale 		} while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head,
494*bf21cd93STycho Nightingale 					    (uintptr_t)old_head,
495*bf21cd93STycho Nightingale 					    (uintptr_t)&bse));
496*bf21cd93STycho Nightingale 
497*bf21cd93STycho Nightingale 		pthread_kill(be->be_tid, SIGCONT);
498*bf21cd93STycho Nightingale 
499*bf21cd93STycho Nightingale 		pthread_mutex_lock(&bse.bse_mtx);
500*bf21cd93STycho Nightingale 		while (bse.bse_pending)
501*bf21cd93STycho Nightingale 			pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx);
502*bf21cd93STycho Nightingale 		pthread_mutex_unlock(&bse.bse_mtx);
503*bf21cd93STycho Nightingale 	}
504*bf21cd93STycho Nightingale 
505*bf21cd93STycho Nightingale 	pthread_mutex_unlock(&bc->bc_mtx);
506*bf21cd93STycho Nightingale 
507*bf21cd93STycho Nightingale 	/*
508*bf21cd93STycho Nightingale 	 * The processing thread has been interrupted.  Since it's not
509*bf21cd93STycho Nightingale 	 * clear if the callback has been invoked yet, return EBUSY.
510*bf21cd93STycho Nightingale 	 */
511*bf21cd93STycho Nightingale 	return (EBUSY);
512*bf21cd93STycho Nightingale }
513*bf21cd93STycho Nightingale 
514*bf21cd93STycho Nightingale int
515*bf21cd93STycho Nightingale blockif_close(struct blockif_ctxt *bc)
516*bf21cd93STycho Nightingale {
517*bf21cd93STycho Nightingale 	void *jval;
518*bf21cd93STycho Nightingale 	int err;
519*bf21cd93STycho Nightingale 
520*bf21cd93STycho Nightingale 	err = 0;
521*bf21cd93STycho Nightingale 
522*bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
523*bf21cd93STycho Nightingale 
524*bf21cd93STycho Nightingale 	/*
525*bf21cd93STycho Nightingale 	 * Stop the block i/o thread
526*bf21cd93STycho Nightingale 	 */
527*bf21cd93STycho Nightingale 	bc->bc_closing = 1;
528*bf21cd93STycho Nightingale 	pthread_cond_signal(&bc->bc_cond);
529*bf21cd93STycho Nightingale 	pthread_join(bc->bc_btid, &jval);
530*bf21cd93STycho Nightingale 
531*bf21cd93STycho Nightingale 	/* XXX Cancel queued i/o's ??? */
532*bf21cd93STycho Nightingale 
533*bf21cd93STycho Nightingale 	/*
534*bf21cd93STycho Nightingale 	 * Release resources
535*bf21cd93STycho Nightingale 	 */
536*bf21cd93STycho Nightingale 	bc->bc_magic = 0;
537*bf21cd93STycho Nightingale 	close(bc->bc_fd);
538*bf21cd93STycho Nightingale 	free(bc);
539*bf21cd93STycho Nightingale 
540*bf21cd93STycho Nightingale 	return (0);
541*bf21cd93STycho Nightingale }
542*bf21cd93STycho Nightingale 
543*bf21cd93STycho Nightingale /*
544*bf21cd93STycho Nightingale  * Return virtual C/H/S values for a given block. Use the algorithm
545*bf21cd93STycho Nightingale  * outlined in the VHD specification to calculate values.
546*bf21cd93STycho Nightingale  */
547*bf21cd93STycho Nightingale void
548*bf21cd93STycho Nightingale blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s)
549*bf21cd93STycho Nightingale {
550*bf21cd93STycho Nightingale 	off_t sectors;		/* total sectors of the block dev */
551*bf21cd93STycho Nightingale 	off_t hcyl;		/* cylinders times heads */
552*bf21cd93STycho Nightingale 	uint16_t secpt;		/* sectors per track */
553*bf21cd93STycho Nightingale 	uint8_t heads;
554*bf21cd93STycho Nightingale 
555*bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
556*bf21cd93STycho Nightingale 
557*bf21cd93STycho Nightingale 	sectors = bc->bc_size / bc->bc_sectsz;
558*bf21cd93STycho Nightingale 
559*bf21cd93STycho Nightingale 	/* Clamp the size to the largest possible with CHS */
560*bf21cd93STycho Nightingale 	if (sectors > 65535UL*16*255)
561*bf21cd93STycho Nightingale 		sectors = 65535UL*16*255;
562*bf21cd93STycho Nightingale 
563*bf21cd93STycho Nightingale 	if (sectors >= 65536UL*16*63) {
564*bf21cd93STycho Nightingale 		secpt = 255;
565*bf21cd93STycho Nightingale 		heads = 16;
566*bf21cd93STycho Nightingale 		hcyl = sectors / secpt;
567*bf21cd93STycho Nightingale 	} else {
568*bf21cd93STycho Nightingale 		secpt = 17;
569*bf21cd93STycho Nightingale 		hcyl = sectors / secpt;
570*bf21cd93STycho Nightingale 		heads = (hcyl + 1023) / 1024;
571*bf21cd93STycho Nightingale 
572*bf21cd93STycho Nightingale 		if (heads < 4)
573*bf21cd93STycho Nightingale 			heads = 4;
574*bf21cd93STycho Nightingale 
575*bf21cd93STycho Nightingale 		if (hcyl >= (heads * 1024) || heads > 16) {
576*bf21cd93STycho Nightingale 			secpt = 31;
577*bf21cd93STycho Nightingale 			heads = 16;
578*bf21cd93STycho Nightingale 			hcyl = sectors / secpt;
579*bf21cd93STycho Nightingale 		}
580*bf21cd93STycho Nightingale 		if (hcyl >= (heads * 1024)) {
581*bf21cd93STycho Nightingale 			secpt = 63;
582*bf21cd93STycho Nightingale 			heads = 16;
583*bf21cd93STycho Nightingale 			hcyl = sectors / secpt;
584*bf21cd93STycho Nightingale 		}
585*bf21cd93STycho Nightingale 	}
586*bf21cd93STycho Nightingale 
587*bf21cd93STycho Nightingale 	*c = hcyl / heads;
588*bf21cd93STycho Nightingale 	*h = heads;
589*bf21cd93STycho Nightingale 	*s = secpt;
590*bf21cd93STycho Nightingale }
591*bf21cd93STycho Nightingale 
592*bf21cd93STycho Nightingale /*
593*bf21cd93STycho Nightingale  * Accessors
594*bf21cd93STycho Nightingale  */
595*bf21cd93STycho Nightingale off_t
596*bf21cd93STycho Nightingale blockif_size(struct blockif_ctxt *bc)
597*bf21cd93STycho Nightingale {
598*bf21cd93STycho Nightingale 
599*bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
600*bf21cd93STycho Nightingale 	return (bc->bc_size);
601*bf21cd93STycho Nightingale }
602*bf21cd93STycho Nightingale 
603*bf21cd93STycho Nightingale int
604*bf21cd93STycho Nightingale blockif_sectsz(struct blockif_ctxt *bc)
605*bf21cd93STycho Nightingale {
606*bf21cd93STycho Nightingale 
607*bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
608*bf21cd93STycho Nightingale 	return (bc->bc_sectsz);
609*bf21cd93STycho Nightingale }
610*bf21cd93STycho Nightingale 
611*bf21cd93STycho Nightingale int
612*bf21cd93STycho Nightingale blockif_queuesz(struct blockif_ctxt *bc)
613*bf21cd93STycho Nightingale {
614*bf21cd93STycho Nightingale 
615*bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
616*bf21cd93STycho Nightingale 	return (BLOCKIF_MAXREQ - 1);
617*bf21cd93STycho Nightingale }
618*bf21cd93STycho Nightingale 
619*bf21cd93STycho Nightingale int
620*bf21cd93STycho Nightingale blockif_is_ro(struct blockif_ctxt *bc)
621*bf21cd93STycho Nightingale {
622*bf21cd93STycho Nightingale 
623*bf21cd93STycho Nightingale 	assert(bc->bc_magic == BLOCKIF_SIG);
624*bf21cd93STycho Nightingale 	return (bc->bc_rdonly);
625*bf21cd93STycho Nightingale }
626