xref: /openbsd/sys/arch/sparc64/dev/vdsp.c (revision 5a38ef86)
1 /*	$OpenBSD: vdsp.c,v 1.48 2021/10/24 17:05:04 mpi Exp $	*/
2 /*
3  * Copyright (c) 2009, 2011, 2014 Mark Kettenis
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include <sys/param.h>
19 #include <sys/conf.h>
20 #include <sys/proc.h>
21 #include <sys/buf.h>
22 #include <sys/device.h>
23 #include <sys/disklabel.h>
24 #include <sys/fcntl.h>
25 #include <sys/lock.h>
26 #include <sys/malloc.h>
27 #include <sys/mutex.h>
28 #include <sys/namei.h>
29 #include <sys/systm.h>
30 #include <sys/task.h>
31 #include <sys/vnode.h>
32 #include <sys/dkio.h>
33 #include <sys/specdev.h>
34 
35 #include <machine/autoconf.h>
36 #include <machine/conf.h>
37 #include <machine/hypervisor.h>
38 #include <machine/mdesc.h>
39 
40 #include <uvm/uvm_extern.h>
41 
42 #include <scsi/scsi_all.h>
43 #include <scsi/scsi_disk.h>
44 #include <scsi/scsiconf.h>
45 
46 #include <isofs/cd9660/iso.h>
47 
48 #include <dev/sun/disklabel.h>
49 
50 #include <sparc64/dev/cbusvar.h>
51 #include <sparc64/dev/ldcvar.h>
52 #include <sparc64/dev/viovar.h>
53 
54 #ifdef VDSP_DEBUG
55 #define DPRINTF(x)	printf x
56 #else
57 #define DPRINTF(x)
58 #endif
59 
60 #define VDSK_TX_ENTRIES			64
61 #define VDSK_RX_ENTRIES			64
62 
63 #define VDSK_MAX_DESCRIPTORS		1024
64 #define VDSK_MAX_DESCRIPTOR_SIZE	512
65 
66 struct vd_attr_info {
67 	struct vio_msg_tag	tag;
68 	uint8_t			xfer_mode;
69 	uint8_t			vd_type;
70 	uint8_t			vd_mtype;
71 	uint8_t			_reserved1;
72 	uint32_t		vdisk_block_size;
73 	uint64_t		operations;
74 	uint64_t		vdisk_size;
75 	uint64_t		max_xfer_sz;
76 	uint64_t		_reserved2[2];
77 };
78 
79 #define VD_DISK_TYPE_SLICE	0x01
80 #define VD_DISK_TYPE_DISK	0x02
81 
82 #define VD_MEDIA_TYPE_FIXED	0x01
83 #define VD_MEDIA_TYPE_CD	0x02
84 #define VD_MEDIA_TYPE_DVD	0x03
85 
86 /* vDisk version 1.0. */
87 #define VD_OP_BREAD		0x01
88 #define VD_OP_BWRITE		0x02
89 #define VD_OP_FLUSH		0x03
90 #define VD_OP_GET_WCE		0x04
91 #define VD_OP_SET_WCE		0x05
92 #define VD_OP_GET_VTOC		0x06
93 #define VD_OP_SET_VTOC		0x07
94 #define VD_OP_GET_DISKGEOM	0x08
95 #define VD_OP_SET_DISKGEOM	0x09
96 #define VD_OP_GET_DEVID		0x0b
97 #define VD_OP_GET_EFI		0x0c
98 #define VD_OP_SET_EFI		0x0d
99 
100 /* vDisk version 1.1 */
101 #define VD_OP_SCSICMD		0x0a
102 #define VD_OP_RESET		0x0e
103 #define VD_OP_GET_ACCESS	0x0f
104 #define VD_OP_SET_ACCESS	0x10
105 #define VD_OP_GET_CAPACITY	0x11
106 
107 /* Sun standard fields. */
108 struct sun_vtoc_preamble {
109 	char	sl_text[128];
110 	u_int	sl_version;	/* label version */
111 	char	sl_volume[8];	/* short volume name */
112 	u_short	sl_nparts;	/* partition count */
113 
114 	struct sun_partinfo sl_part[8];
115 
116 	u_int	sl_bootinfo[3];
117 	u_int	sl_sanity;
118 };
119 
120 struct vd_vtoc_part {
121 	uint16_t	id_tag;
122 	uint16_t	perm;
123 	uint32_t	reserved;
124 	uint64_t	start;
125 	uint64_t	nblocks;
126 
127 };
128 struct vd_vtoc {
129 	uint8_t		volume_name[8];
130 	uint16_t	sector_size;
131 	uint16_t	num_partitions;
132 	uint32_t	reserved;
133 	uint8_t		ascii_label[128];
134 	struct vd_vtoc_part partition[8];
135 };
136 
137 struct vd_diskgeom {
138 	uint16_t	ncyl;
139 	uint16_t	acyl;
140 	uint16_t	bcyl;
141 	uint16_t	nhead;
142 	uint16_t	nsect;
143 	uint16_t	intrlv;
144 	uint16_t	apc;
145 	uint16_t	rpm;
146 	uint16_t	pcyl;
147 	uint16_t	write_reinstruct;
148 	uint16_t	read_reinstruct;
149 };
150 
151 struct vd_desc {
152 	struct vio_dring_hdr	hdr;
153 	uint64_t		req_id;
154 	uint8_t			operation;
155 	uint8_t			slice;
156 	uint16_t		_reserved1;
157 	uint32_t		status;
158 	uint64_t		offset;
159 	uint64_t		size;
160 	uint32_t		ncookies;
161 	uint32_t		_reserved2;
162 	struct ldc_cookie	cookie[1];
163 };
164 
165 #define VD_SLICE_NONE		0xff
166 
167 struct vdsk_desc_msg {
168 	struct vio_msg_tag	tag;
169 	uint64_t		seq_no;
170 	uint64_t		desc_handle;
171 	uint64_t		req_id;
172 	uint8_t			operation;
173 	uint8_t			slice;
174 	uint16_t		_reserved1;
175 	uint32_t		status;
176 	uint64_t		offset;
177 	uint64_t		size;
178 	uint32_t		ncookies;
179 	uint32_t		_reserved2;
180 	struct ldc_cookie	cookie[1];
181 };
182 
183 /*
184  * We support vDisk 1.1.
185  */
186 #define VDSK_MAJOR	1
187 #define VDSK_MINOR	1
188 
189 /*
190  * But we only support a subset of the defined commands.
191  */
192 #define VD_OP_MASK \
193     ((1 << VD_OP_BREAD) | (1 << VD_OP_BWRITE) | (1 << VD_OP_FLUSH) | \
194      (1 << VD_OP_GET_WCE) | (1 << VD_OP_SET_WCE) | \
195      (1 << VD_OP_GET_VTOC) | (1 << VD_OP_SET_VTOC) | \
196      (1 << VD_OP_GET_DISKGEOM))
197 
198 struct vdsp_softc {
199 	struct device	sc_dv;
200 	int		sc_idx;
201 	bus_space_tag_t	sc_bustag;
202 	bus_dma_tag_t	sc_dmatag;
203 
204 	uint64_t	sc_tx_ino;
205 	uint64_t	sc_rx_ino;
206 	void		*sc_tx_ih;
207 	void		*sc_rx_ih;
208 
209 	struct ldc_conn	sc_lc;
210 
211 	uint16_t	sc_vio_state;
212 #define VIO_SND_VER_INFO	0x0001
213 #define VIO_ACK_VER_INFO	0x0002
214 #define VIO_RCV_VER_INFO	0x0004
215 #define VIO_SND_ATTR_INFO	0x0008
216 #define VIO_ACK_ATTR_INFO	0x0010
217 #define VIO_RCV_ATTR_INFO	0x0020
218 #define VIO_SND_DRING_REG	0x0040
219 #define VIO_ACK_DRING_REG	0x0080
220 #define VIO_RCV_DRING_REG	0x0100
221 #define VIO_SND_RDX		0x0200
222 #define VIO_ACK_RDX		0x0400
223 #define VIO_RCV_RDX		0x0800
224 
225 	uint16_t	sc_major;
226 	uint16_t	sc_minor;
227 
228 	uint8_t		sc_xfer_mode;
229 
230 	uint32_t	sc_local_sid;
231 	uint64_t	sc_seq_no;
232 
233 	uint64_t	sc_dring_ident;
234 	uint32_t	sc_num_descriptors;
235 	uint32_t	sc_descriptor_size;
236 	struct ldc_cookie sc_dring_cookie;
237 
238 	struct task	sc_open_task;
239 	struct task	sc_alloc_task;
240 	struct task	sc_close_task;
241 
242 	struct mutex	sc_desc_mtx;
243 	struct vdsk_desc_msg *sc_desc_msg[VDSK_RX_ENTRIES];
244 	int		sc_desc_head;
245 	int		sc_desc_tail;
246 
247 	struct task	sc_read_task;
248 
249 	caddr_t		sc_vd;
250 	struct task	sc_vd_task;
251 	struct vd_desc	**sc_vd_ring;
252 	u_int		sc_vd_prod;
253 	u_int		sc_vd_cons;
254 
255 	uint32_t	sc_vdisk_block_size;
256 	uint64_t	sc_vdisk_size;
257 
258 	struct vnode	*sc_vp;
259 
260 	struct sun_disklabel *sc_label;
261 	uint16_t	sc_ncyl;
262 	uint16_t	sc_acyl;
263 	uint16_t	sc_nhead;
264 	uint16_t	sc_nsect;
265 };
266 
267 int	vdsp_match(struct device *, void *, void *);
268 void	vdsp_attach(struct device *, struct device *, void *);
269 
270 const struct cfattach vdsp_ca = {
271 	sizeof(struct vdsp_softc), vdsp_match, vdsp_attach
272 };
273 
274 struct cfdriver vdsp_cd = {
275 	NULL, "vdsp", DV_DULL
276 };
277 
278 int	vdsp_tx_intr(void *);
279 int	vdsp_rx_intr(void *);
280 
281 void	vdsp_rx_data(struct ldc_conn *, struct ldc_pkt *);
282 void	vdsp_rx_vio_ctrl(struct vdsp_softc *, struct vio_msg *);
283 void	vdsp_rx_vio_ver_info(struct vdsp_softc *, struct vio_msg_tag *);
284 void	vdsp_rx_vio_attr_info(struct vdsp_softc *, struct vio_msg_tag *);
285 void	vdsp_rx_vio_dring_reg(struct vdsp_softc *, struct vio_msg_tag *);
286 void	vdsp_rx_vio_rdx(struct vdsp_softc *sc, struct vio_msg_tag *);
287 void	vdsp_rx_vio_data(struct vdsp_softc *sc, struct vio_msg *);
288 void	vdsp_rx_vio_dring_data(struct vdsp_softc *sc,
289 	    struct vio_msg_tag *);
290 void	vdsp_rx_vio_desc_data(struct vdsp_softc *sc, struct vio_msg_tag *);
291 
292 void	vdsp_ldc_reset(struct ldc_conn *);
293 void	vdsp_ldc_start(struct ldc_conn *);
294 
295 void	vdsp_sendmsg(struct vdsp_softc *, void *, size_t, int dowait);
296 
297 void	vdsp_open(void *);
298 void	vdsp_close(void *);
299 void	vdsp_alloc(void *);
300 void	vdsp_readlabel(struct vdsp_softc *);
301 int	vdsp_writelabel(struct vdsp_softc *);
302 int	vdsp_is_iso(struct vdsp_softc *);
303 void	vdsp_read(void *);
304 void	vdsp_read_desc(struct vdsp_softc *, struct vdsk_desc_msg *);
305 void	vdsp_vd_task(void *);
306 void	vdsp_read_dring(void *, void *);
307 void	vdsp_write_dring(void *, void *);
308 void	vdsp_flush_dring(void *, void *);
309 void	vdsp_get_vtoc(void *, void *);
310 void	vdsp_set_vtoc(void *, void *);
311 void	vdsp_get_diskgeom(void *, void *);
312 void	vdsp_unimp(void *, void *);
313 
314 void	vdsp_ack_desc(struct vdsp_softc *, struct vd_desc *);
315 
316 int
317 vdsp_match(struct device *parent, void *match, void *aux)
318 {
319 	struct cbus_attach_args *ca = aux;
320 
321 	if (strcmp(ca->ca_name, "vds-port") == 0)
322 		return (1);
323 
324 	return (0);
325 }
326 
327 void
328 vdsp_attach(struct device *parent, struct device *self, void *aux)
329 {
330 	struct vdsp_softc *sc = (struct vdsp_softc *)self;
331 	struct cbus_attach_args *ca = aux;
332 	struct ldc_conn *lc;
333 
334 	sc->sc_idx = ca->ca_idx;
335 	sc->sc_bustag = ca->ca_bustag;
336 	sc->sc_dmatag = ca->ca_dmatag;
337 	sc->sc_tx_ino = ca->ca_tx_ino;
338 	sc->sc_rx_ino = ca->ca_rx_ino;
339 
340 	printf(": ivec 0x%llx, 0x%llx", sc->sc_tx_ino, sc->sc_rx_ino);
341 
342 	mtx_init(&sc->sc_desc_mtx, IPL_BIO);
343 
344 	/*
345 	 * Un-configure queues before registering interrupt handlers,
346 	 * such that we dont get any stale LDC packets or events.
347 	 */
348 	hv_ldc_tx_qconf(ca->ca_id, 0, 0);
349 	hv_ldc_rx_qconf(ca->ca_id, 0, 0);
350 
351 	sc->sc_tx_ih = bus_intr_establish(ca->ca_bustag, sc->sc_tx_ino,
352 	    IPL_BIO, BUS_INTR_ESTABLISH_MPSAFE, vdsp_tx_intr, sc,
353 	    sc->sc_dv.dv_xname);
354 	sc->sc_rx_ih = bus_intr_establish(ca->ca_bustag, sc->sc_rx_ino,
355 	    IPL_BIO, BUS_INTR_ESTABLISH_MPSAFE, vdsp_rx_intr, sc,
356 	    sc->sc_dv.dv_xname);
357 	if (sc->sc_tx_ih == NULL || sc->sc_rx_ih == NULL) {
358 		printf(", can't establish interrupt\n");
359 		return;
360 	}
361 
362 	lc = &sc->sc_lc;
363 	lc->lc_id = ca->ca_id;
364 	lc->lc_sc = sc;
365 	lc->lc_reset = vdsp_ldc_reset;
366 	lc->lc_start = vdsp_ldc_start;
367 	lc->lc_rx_data = vdsp_rx_data;
368 
369 	lc->lc_txq = ldc_queue_alloc(sc->sc_dmatag, VDSK_TX_ENTRIES);
370 	if (lc->lc_txq == NULL) {
371 		printf(", can't allocate tx queue\n");
372 		return;
373 	}
374 
375 	lc->lc_rxq = ldc_queue_alloc(sc->sc_dmatag, VDSK_RX_ENTRIES);
376 	if (lc->lc_rxq == NULL) {
377 		printf(", can't allocate rx queue\n");
378 		goto free_txqueue;
379 	}
380 
381 	task_set(&sc->sc_open_task, vdsp_open, sc);
382 	task_set(&sc->sc_alloc_task, vdsp_alloc, sc);
383 	task_set(&sc->sc_close_task, vdsp_close, sc);
384 	task_set(&sc->sc_read_task, vdsp_read, sc);
385 
386 	printf("\n");
387 
388 	return;
389 
390 #if 0
391 free_rxqueue:
392 	ldc_queue_free(sc->sc_dmatag, lc->lc_rxq);
393 #endif
394 free_txqueue:
395 	ldc_queue_free(sc->sc_dmatag, lc->lc_txq);
396 }
397 
398 int
399 vdsp_tx_intr(void *arg)
400 {
401 	struct vdsp_softc *sc = arg;
402 	struct ldc_conn *lc = &sc->sc_lc;
403 	uint64_t tx_head, tx_tail, tx_state;
404 	int err;
405 
406 	err = hv_ldc_tx_get_state(lc->lc_id, &tx_head, &tx_tail, &tx_state);
407 	if (err != H_EOK) {
408 		printf("hv_ldc_rx_get_state %d\n", err);
409 		return (0);
410 	}
411 
412 	if (tx_state != lc->lc_tx_state) {
413 		switch (tx_state) {
414 		case LDC_CHANNEL_DOWN:
415 			DPRINTF(("%s: Tx link down\n", __func__));
416 			break;
417 		case LDC_CHANNEL_UP:
418 			DPRINTF(("%s: Tx link up\n", __func__));
419 			break;
420 		case LDC_CHANNEL_RESET:
421 			DPRINTF(("%s: Tx link reset\n", __func__));
422 			break;
423 		}
424 		lc->lc_tx_state = tx_state;
425 	}
426 
427 	wakeup(lc->lc_txq);
428 	return (1);
429 }
430 
431 int
432 vdsp_rx_intr(void *arg)
433 {
434 	struct vdsp_softc *sc = arg;
435 	struct ldc_conn *lc = &sc->sc_lc;
436 	uint64_t rx_head, rx_tail, rx_state;
437 	struct ldc_pkt *lp;
438 	int err;
439 
440 	err = hv_ldc_rx_get_state(lc->lc_id, &rx_head, &rx_tail, &rx_state);
441 	if (err == H_EINVAL)
442 		return (0);
443 	if (err != H_EOK) {
444 		printf("hv_ldc_rx_get_state %d\n", err);
445 		return (0);
446 	}
447 
448 	if (rx_state != lc->lc_rx_state) {
449 		switch (rx_state) {
450 		case LDC_CHANNEL_DOWN:
451 			DPRINTF(("%s: Rx link down\n", __func__));
452 			lc->lc_tx_seqid = 0;
453 			lc->lc_state = 0;
454 			lc->lc_reset(lc);
455 			break;
456 		case LDC_CHANNEL_UP:
457 			DPRINTF(("%s: Rx link up\n", __func__));
458 			break;
459 		case LDC_CHANNEL_RESET:
460 			DPRINTF(("%s: Rx link reset\n", __func__));
461 			lc->lc_tx_seqid = 0;
462 			lc->lc_state = 0;
463 			lc->lc_reset(lc);
464 			break;
465 		}
466 		lc->lc_rx_state = rx_state;
467 		return (1);
468 	}
469 
470 	if (lc->lc_rx_state == LDC_CHANNEL_DOWN)
471 		return (1);
472 
473 	lp = (struct ldc_pkt *)(lc->lc_rxq->lq_va + rx_head);
474 	switch (lp->type) {
475 	case LDC_CTRL:
476 		ldc_rx_ctrl(lc, lp);
477 		break;
478 
479 	case LDC_DATA:
480 		ldc_rx_data(lc, lp);
481 		break;
482 
483 	default:
484 		DPRINTF(("0x%02x/0x%02x/0x%02x\n", lp->type, lp->stype,
485 		    lp->ctrl));
486 		ldc_reset(lc);
487 		break;
488 	}
489 
490 	rx_head += sizeof(*lp);
491 	rx_head &= ((lc->lc_rxq->lq_nentries * sizeof(*lp)) - 1);
492 	err = hv_ldc_rx_set_qhead(lc->lc_id, rx_head);
493 	if (err != H_EOK)
494 		printf("%s: hv_ldc_rx_set_qhead %d\n", __func__, err);
495 
496 	return (1);
497 }
498 
499 void
500 vdsp_rx_data(struct ldc_conn *lc, struct ldc_pkt *lp)
501 {
502 	struct vio_msg *vm = (struct vio_msg *)lp;
503 
504 	switch (vm->type) {
505 	case VIO_TYPE_CTRL:
506 		if ((lp->env & LDC_FRAG_START) == 0 &&
507 		    (lp->env & LDC_FRAG_STOP) == 0)
508 			return;
509 		vdsp_rx_vio_ctrl(lc->lc_sc, vm);
510 		break;
511 
512 	case VIO_TYPE_DATA:
513 		if((lp->env & LDC_FRAG_START) == 0)
514 			return;
515 		vdsp_rx_vio_data(lc->lc_sc, vm);
516 		break;
517 
518 	default:
519 		DPRINTF(("Unhandled packet type 0x%02x\n", vm->type));
520 		ldc_reset(lc);
521 		break;
522 	}
523 }
524 
525 void
526 vdsp_rx_vio_ctrl(struct vdsp_softc *sc, struct vio_msg *vm)
527 {
528 	struct vio_msg_tag *tag = (struct vio_msg_tag *)&vm->type;
529 
530 	switch (tag->stype_env) {
531 	case VIO_VER_INFO:
532 		vdsp_rx_vio_ver_info(sc, tag);
533 		break;
534 	case VIO_ATTR_INFO:
535 		vdsp_rx_vio_attr_info(sc, tag);
536 		break;
537 	case VIO_DRING_REG:
538 		vdsp_rx_vio_dring_reg(sc, tag);
539 		break;
540 	case VIO_RDX:
541 		vdsp_rx_vio_rdx(sc, tag);
542 		break;
543 	default:
544 		DPRINTF(("CTRL/0x%02x/0x%04x\n", tag->stype, tag->stype_env));
545 		break;
546 	}
547 }
548 
549 void
550 vdsp_rx_vio_ver_info(struct vdsp_softc *sc, struct vio_msg_tag *tag)
551 {
552 	struct vio_ver_info *vi = (struct vio_ver_info *)tag;
553 
554 	switch (vi->tag.stype) {
555 	case VIO_SUBTYPE_INFO:
556 		DPRINTF(("CTRL/INFO/VER_INFO\n"));
557 
558 		/* Make sure we're talking to a virtual disk. */
559 		if (vi->dev_class != VDEV_DISK) {
560 			/* Huh, we're not talking to a disk device? */
561 			printf("%s: peer is not a disk device\n",
562 			    sc->sc_dv.dv_xname);
563 			vi->tag.stype = VIO_SUBTYPE_NACK;
564 			vi->major = 0;
565 			vdsp_sendmsg(sc, vi, sizeof(*vi), 0);
566 			return;
567 		}
568 
569 		if (vi->major != VDSK_MAJOR) {
570 			vi->tag.stype = VIO_SUBTYPE_NACK;
571 			vi->major = VDSK_MAJOR;
572 			vi->minor = VDSK_MINOR;
573 			vdsp_sendmsg(sc, vi, sizeof(*vi), 0);
574 			return;
575 		}
576 
577 		sc->sc_major = vi->major;
578 		sc->sc_minor = vi->minor;
579 		sc->sc_local_sid = vi->tag.sid;
580 
581 		vi->tag.stype = VIO_SUBTYPE_ACK;
582 		if (vi->minor > VDSK_MINOR)
583 			vi->minor = VDSK_MINOR;
584 		vi->dev_class = VDEV_DISK_SERVER;
585 		vdsp_sendmsg(sc, vi, sizeof(*vi), 0);
586 		sc->sc_vio_state |= VIO_RCV_VER_INFO;
587 		break;
588 
589 	case VIO_SUBTYPE_ACK:
590 		DPRINTF(("CTRL/ACK/VER_INFO\n"));
591 		break;
592 
593 	default:
594 		DPRINTF(("CTRL/0x%02x/VER_INFO\n", vi->tag.stype));
595 		break;
596 	}
597 }
598 
599 void
600 vdsp_rx_vio_attr_info(struct vdsp_softc *sc, struct vio_msg_tag *tag)
601 {
602 	struct vd_attr_info *ai = (struct vd_attr_info *)tag;
603 
604 	switch (ai->tag.stype) {
605 	case VIO_SUBTYPE_INFO:
606 		DPRINTF(("CTRL/INFO/ATTR_INFO\n"));
607 
608 		if (ai->xfer_mode != VIO_DESC_MODE &&
609 		    ai->xfer_mode != VIO_DRING_MODE) {
610 			printf("%s: peer uses unsupported xfer mode 0x%02x\n",
611 			    sc->sc_dv.dv_xname, ai->xfer_mode);
612 			ai->tag.stype = VIO_SUBTYPE_NACK;
613 			vdsp_sendmsg(sc, ai, sizeof(*ai), 0);
614 			return;
615 		}
616 		sc->sc_xfer_mode = ai->xfer_mode;
617 		sc->sc_vio_state |= VIO_RCV_ATTR_INFO;
618 
619 		task_add(systq, &sc->sc_open_task);
620 		break;
621 
622 	case VIO_SUBTYPE_ACK:
623 		DPRINTF(("CTRL/ACK/ATTR_INFO\n"));
624 		break;
625 
626 	default:
627 		DPRINTF(("CTRL/0x%02x/ATTR_INFO\n", ai->tag.stype));
628 		break;
629 	}
630 }
631 
632 void
633 vdsp_rx_vio_dring_reg(struct vdsp_softc *sc, struct vio_msg_tag *tag)
634 {
635 	struct vio_dring_reg *dr = (struct vio_dring_reg *)tag;
636 
637 	switch (dr->tag.stype) {
638 	case VIO_SUBTYPE_INFO:
639 		DPRINTF(("CTRL/INFO/DRING_REG\n"));
640 
641 		if (dr->num_descriptors > VDSK_MAX_DESCRIPTORS ||
642 		    dr->descriptor_size > VDSK_MAX_DESCRIPTOR_SIZE ||
643 		    dr->ncookies > 1) {
644 			dr->tag.stype = VIO_SUBTYPE_NACK;
645 			vdsp_sendmsg(sc, dr, sizeof(*dr), 0);
646 			return;
647 		}
648 		sc->sc_num_descriptors = dr->num_descriptors;
649 		sc->sc_descriptor_size = dr->descriptor_size;
650 		sc->sc_dring_cookie = dr->cookie[0];
651 		sc->sc_vio_state |= VIO_RCV_DRING_REG;
652 
653 		task_add(systq, &sc->sc_alloc_task);
654 		break;
655 
656 	case VIO_SUBTYPE_ACK:
657 		DPRINTF(("CTRL/ACK/DRING_REG\n"));
658 		break;
659 
660 	default:
661 		DPRINTF(("CTRL/0x%02x/DRING_REG\n", dr->tag.stype));
662 		break;
663 	}
664 }
665 
666 void
667 vdsp_rx_vio_rdx(struct vdsp_softc *sc, struct vio_msg_tag *tag)
668 {
669 	switch(tag->stype) {
670 	case VIO_SUBTYPE_INFO:
671 		DPRINTF(("CTRL/INFO/RDX\n"));
672 
673 		tag->stype = VIO_SUBTYPE_ACK;
674 		tag->sid = sc->sc_local_sid;
675 		vdsp_sendmsg(sc, tag, sizeof(*tag), 0);
676 		sc->sc_vio_state |= VIO_RCV_RDX;
677 		break;
678 
679 	case VIO_SUBTYPE_ACK:
680 		DPRINTF(("CTRL/ACK/RDX\n"));
681 		break;
682 
683 	default:
684 		DPRINTF(("CTRL/0x%02x/RDX (VIO)\n", tag->stype));
685 		break;
686 	}
687 }
688 
689 void
690 vdsp_rx_vio_data(struct vdsp_softc *sc, struct vio_msg *vm)
691 {
692 	struct vio_msg_tag *tag = (struct vio_msg_tag *)&vm->type;
693 
694 	if (!ISSET(sc->sc_vio_state, VIO_RCV_RDX)) {
695 		DPRINTF(("Spurious DATA/0x%02x/0x%04x\n", tag->stype,
696 		    tag->stype_env));
697 		return;
698 	}
699 
700 	switch(tag->stype_env) {
701 	case VIO_DESC_DATA:
702 		vdsp_rx_vio_desc_data(sc, tag);
703 		break;
704 
705 	case VIO_DRING_DATA:
706 		vdsp_rx_vio_dring_data(sc, tag);
707 		break;
708 
709 	default:
710 		DPRINTF(("DATA/0x%02x/0x%04x\n", tag->stype, tag->stype_env));
711 		break;
712 	}
713 }
714 
715 void
716 vdsp_rx_vio_dring_data(struct vdsp_softc *sc, struct vio_msg_tag *tag)
717 {
718 	struct vio_dring_msg *dm = (struct vio_dring_msg *)tag;
719 	struct vd_desc *vd;
720 	vaddr_t va;
721 	paddr_t pa;
722 	uint64_t size, off;
723 	psize_t nbytes;
724 	int err;
725 
726 	switch(tag->stype) {
727 	case VIO_SUBTYPE_INFO:
728 		DPRINTF(("DATA/INFO/DRING_DATA\n"));
729 
730 		if (dm->dring_ident != sc->sc_dring_ident ||
731 		    dm->start_idx >= sc->sc_num_descriptors) {
732 			dm->tag.stype = VIO_SUBTYPE_NACK;
733 			vdsp_sendmsg(sc, dm, sizeof(*dm), 0);
734 			return;
735 		}
736 
737 		off = dm->start_idx * sc->sc_descriptor_size;
738 		vd = (struct vd_desc *)(sc->sc_vd + off);
739 		va = (vaddr_t)vd;
740 		size = sc->sc_descriptor_size;
741 		while (size > 0) {
742 			pmap_extract(pmap_kernel(), va, &pa);
743 			nbytes = MIN(size, PAGE_SIZE - (off & PAGE_MASK));
744 			err = hv_ldc_copy(sc->sc_lc.lc_id, LDC_COPY_IN,
745 			    sc->sc_dring_cookie.addr + off, pa,
746 			    nbytes, &nbytes);
747 			if (err != H_EOK) {
748 				printf("%s: hv_ldc_copy %d\n", __func__, err);
749 				return;
750 			}
751 			va += nbytes;
752 			size -= nbytes;
753 			off += nbytes;
754 		}
755 
756 		sc->sc_vd_ring[sc->sc_vd_prod % sc->sc_num_descriptors] = vd;
757 		membar_producer();
758 		sc->sc_vd_prod++;
759 		task_add(systq, &sc->sc_vd_task);
760 
761 		break;
762 
763 	case VIO_SUBTYPE_ACK:
764 		DPRINTF(("DATA/ACK/DRING_DATA\n"));
765 		break;
766 
767 	case VIO_SUBTYPE_NACK:
768 		DPRINTF(("DATA/NACK/DRING_DATA\n"));
769 		break;
770 
771 	default:
772 		DPRINTF(("DATA/0x%02x/DRING_DATA\n", tag->stype));
773 		break;
774 	}
775 }
776 
777 void
778 vdsp_vd_task(void *xsc)
779 {
780 	struct vdsp_softc *sc = xsc;
781 	struct vd_desc *vd;
782 
783 	while (sc->sc_vd_cons != sc->sc_vd_prod) {
784 		membar_consumer();
785 		vd = sc->sc_vd_ring[sc->sc_vd_cons++ % sc->sc_num_descriptors];
786 
787 		DPRINTF(("%s: operation %x\n", sc->sc_dv.dv_xname,
788 		    vd->operation));
789 		switch (vd->operation) {
790 		case VD_OP_BREAD:
791 			vdsp_read_dring(sc, vd);
792 			break;
793 		case VD_OP_BWRITE:
794 			vdsp_write_dring(sc, vd);
795 			break;
796 		case VD_OP_FLUSH:
797 			vdsp_flush_dring(sc, vd);
798 			break;
799 		case VD_OP_GET_VTOC:
800 			vdsp_get_vtoc(sc, vd);
801 			break;
802 		case VD_OP_SET_VTOC:
803 			vdsp_set_vtoc(sc, vd);
804 			break;
805 		case VD_OP_GET_DISKGEOM:
806 			vdsp_get_diskgeom(sc, vd);
807 			break;
808 		case VD_OP_GET_WCE:
809 		case VD_OP_SET_WCE:
810 		case VD_OP_GET_DEVID:
811 			/*
812 			 * Solaris issues VD_OP_GET_DEVID despite the
813 			 * fact that we don't advertise it.  It seems
814 			 * to be able to handle failure just fine, so
815 			 * we silently ignore it.
816 			 */
817 			vdsp_unimp(sc, vd);
818 			break;
819 		default:
820 			printf("%s: unsupported operation 0x%02x\n",
821 			    sc->sc_dv.dv_xname, vd->operation);
822 			vdsp_unimp(sc, vd);
823 			break;
824 		}
825 	}
826 }
827 
828 void
829 vdsp_rx_vio_desc_data(struct vdsp_softc *sc, struct vio_msg_tag *tag)
830 {
831 	struct vdsk_desc_msg *dm = (struct vdsk_desc_msg *)tag;
832 
833 	switch(tag->stype) {
834 	case VIO_SUBTYPE_INFO:
835 		DPRINTF(("DATA/INFO/DESC_DATA\n"));
836 
837 		switch (dm->operation) {
838 		case VD_OP_BREAD:
839 			mtx_enter(&sc->sc_desc_mtx);
840 			sc->sc_desc_msg[sc->sc_desc_head++] = dm;
841 			sc->sc_desc_head &= (VDSK_RX_ENTRIES - 1);
842 			KASSERT(sc->sc_desc_head != sc->sc_desc_tail);
843 			mtx_leave(&sc->sc_desc_mtx);
844 			task_add(systq, &sc->sc_read_task);
845 			break;
846 		default:
847 			printf("%s: unsupported operation 0x%02x\n",
848 			    sc->sc_dv.dv_xname, dm->operation);
849 			break;
850 		}
851 		break;
852 
853 	case VIO_SUBTYPE_ACK:
854 		DPRINTF(("DATA/ACK/DESC_DATA\n"));
855 		break;
856 
857 	case VIO_SUBTYPE_NACK:
858 		DPRINTF(("DATA/NACK/DESC_DATA\n"));
859 		break;
860 
861 	default:
862 		DPRINTF(("DATA/0x%02x/DESC_DATA\n", tag->stype));
863 		break;
864 	}
865 }
866 
867 void
868 vdsp_ldc_reset(struct ldc_conn *lc)
869 {
870 	struct vdsp_softc *sc = lc->lc_sc;
871 
872 	sc->sc_vio_state = 0;
873 	task_add(systq, &sc->sc_close_task);
874 }
875 
876 void
877 vdsp_ldc_start(struct ldc_conn *lc)
878 {
879 	/* The vDisk client is supposed to initiate the handshake. */
880 }
881 
882 void
883 vdsp_sendmsg(struct vdsp_softc *sc, void *msg, size_t len, int dowait)
884 {
885 	struct ldc_conn *lc = &sc->sc_lc;
886 	int err;
887 
888 	do {
889 		err = ldc_send_unreliable(lc, msg, len);
890 		if (dowait && err == EWOULDBLOCK) {
891 			/*
892 			 * Seems like the hypervisor doesn't actually
893 			 * generate interrupts for transmit queues, so
894 			 * we specify a timeout such that we don't
895 			 * block forever.
896 			 */
897 			err = tsleep_nsec(lc->lc_txq, PWAIT, "vdsp",
898 			    MSEC_TO_NSEC(10));
899 		}
900 	} while (dowait && err == EWOULDBLOCK);
901 }
902 
903 void
904 vdsp_open(void *arg1)
905 {
906 	struct vdsp_softc *sc = arg1;
907 	struct proc *p = curproc;
908 	struct vd_attr_info ai;
909 
910 	if (sc->sc_vp == NULL) {
911 		struct nameidata nd;
912 		struct vattr va;
913 		struct partinfo pi;
914 		const char *name;
915 		dev_t dev;
916 		int error;
917 
918 		name = mdesc_get_prop_str(sc->sc_idx, "vds-block-device");
919 		if (name == NULL)
920 			return;
921 
922 		NDINIT(&nd, 0, 0, UIO_SYSSPACE, name, p);
923 		error = vn_open(&nd, FREAD | FWRITE, 0);
924 		if (error) {
925 			printf("VOP_OPEN: %s, %d\n", name, error);
926 			return;
927 		}
928 
929 		if (nd.ni_vp->v_type == VBLK) {
930 			dev = nd.ni_vp->v_rdev;
931 			error = (*bdevsw[major(dev)].d_ioctl)(dev,
932 			    DIOCGPART, (caddr_t)&pi, FREAD, curproc);
933 			if (error)
934 				printf("DIOCGPART: %s, %d\n", name, error);
935 			sc->sc_vdisk_block_size = pi.disklab->d_secsize;
936 			sc->sc_vdisk_size = DL_GETPSIZE(pi.part);
937 		} else {
938 			error = VOP_GETATTR(nd.ni_vp, &va, p->p_ucred, p);
939 			if (error)
940 				printf("VOP_GETATTR: %s, %d\n", name, error);
941 			sc->sc_vdisk_block_size = DEV_BSIZE;
942 			sc->sc_vdisk_size = va.va_size / DEV_BSIZE;
943 		}
944 
945 		VOP_UNLOCK(nd.ni_vp);
946 		sc->sc_vp = nd.ni_vp;
947 
948 		vdsp_readlabel(sc);
949 	}
950 
951 	bzero(&ai, sizeof(ai));
952 	ai.tag.type = VIO_TYPE_CTRL;
953 	ai.tag.stype = VIO_SUBTYPE_ACK;
954 	ai.tag.stype_env = VIO_ATTR_INFO;
955 	ai.tag.sid = sc->sc_local_sid;
956 	ai.xfer_mode = sc->sc_xfer_mode;
957 	ai.vd_type = VD_DISK_TYPE_DISK;
958 	if (sc->sc_major > 1 || sc->sc_minor >= 1) {
959 		if (vdsp_is_iso(sc))
960 			ai.vd_mtype = VD_MEDIA_TYPE_CD;
961 		else
962 			ai.vd_mtype = VD_MEDIA_TYPE_FIXED;
963 	}
964 	ai.vdisk_block_size = sc->sc_vdisk_block_size;
965 	ai.operations = VD_OP_MASK;
966 	ai.vdisk_size = sc->sc_vdisk_size;
967 	ai.max_xfer_sz = MAXPHYS / sc->sc_vdisk_block_size;
968 	vdsp_sendmsg(sc, &ai, sizeof(ai), 1);
969 }
970 
971 void
972 vdsp_close(void *arg1)
973 {
974 	struct vdsp_softc *sc = arg1;
975 	struct proc *p = curproc;
976 
977 	sc->sc_seq_no = 0;
978 
979 	free(sc->sc_vd, M_DEVBUF, 0);
980 	sc->sc_vd = NULL;
981 	free(sc->sc_vd_ring, M_DEVBUF,
982 	     sc->sc_num_descriptors * sizeof(*sc->sc_vd_ring));
983 	sc->sc_vd_ring = NULL;
984 	free(sc->sc_label, M_DEVBUF, 0);
985 	sc->sc_label = NULL;
986 	if (sc->sc_vp) {
987 		vn_close(sc->sc_vp, FREAD | FWRITE, p->p_ucred, p);
988 		sc->sc_vp = NULL;
989 	}
990 }
991 
992 void
993 vdsp_readlabel(struct vdsp_softc *sc)
994 {
995 	struct proc *p = curproc;
996 	struct iovec iov;
997 	struct uio uio;
998 	int err;
999 
1000 	if (sc->sc_vp == NULL)
1001 		return;
1002 
1003 	sc->sc_label = malloc(sizeof(*sc->sc_label), M_DEVBUF, M_WAITOK);
1004 
1005 	iov.iov_base = sc->sc_label;
1006 	iov.iov_len = sizeof(*sc->sc_label);
1007 	uio.uio_iov = &iov;
1008 	uio.uio_iovcnt = 1;
1009 	uio.uio_offset = 0;
1010 	uio.uio_resid = sizeof(*sc->sc_label);
1011 	uio.uio_segflg = UIO_SYSSPACE;
1012 	uio.uio_rw = UIO_READ;
1013 	uio.uio_procp = p;
1014 
1015 	vn_lock(sc->sc_vp, LK_EXCLUSIVE | LK_RETRY);
1016 	err = VOP_READ(sc->sc_vp, &uio, 0, p->p_ucred);
1017 	VOP_UNLOCK(sc->sc_vp);
1018 	if (err) {
1019 		free(sc->sc_label, M_DEVBUF, 0);
1020 		sc->sc_label = NULL;
1021 	}
1022 }
1023 
1024 int
1025 vdsp_writelabel(struct vdsp_softc *sc)
1026 {
1027 	struct proc *p = curproc;
1028 	struct iovec iov;
1029 	struct uio uio;
1030 	int err;
1031 
1032 	if (sc->sc_vp == NULL || sc->sc_label == NULL)
1033 		return (EINVAL);
1034 
1035 	iov.iov_base = sc->sc_label;
1036 	iov.iov_len = sizeof(*sc->sc_label);
1037 	uio.uio_iov = &iov;
1038 	uio.uio_iovcnt = 1;
1039 	uio.uio_offset = 0;
1040 	uio.uio_resid = sizeof(*sc->sc_label);
1041 	uio.uio_segflg = UIO_SYSSPACE;
1042 	uio.uio_rw = UIO_WRITE;
1043 	uio.uio_procp = p;
1044 
1045 	vn_lock(sc->sc_vp, LK_EXCLUSIVE | LK_RETRY);
1046 	err = VOP_WRITE(sc->sc_vp, &uio, 0, p->p_ucred);
1047 	VOP_UNLOCK(sc->sc_vp);
1048 
1049 	return (err);
1050 }
1051 
1052 int
1053 vdsp_is_iso(struct vdsp_softc *sc)
1054 {
1055 	struct proc *p = curproc;
1056 	struct iovec iov;
1057 	struct uio uio;
1058 	struct iso_volume_descriptor *vdp;
1059 	int err;
1060 
1061 	if (sc->sc_vp == NULL)
1062 		return (0);
1063 
1064 	vdp = malloc(sizeof(*vdp), M_DEVBUF, M_WAITOK);
1065 
1066 	iov.iov_base = vdp;
1067 	iov.iov_len = sizeof(*vdp);
1068 	uio.uio_iov = &iov;
1069 	uio.uio_iovcnt = 1;
1070 	uio.uio_offset = 16 * ISO_DEFAULT_BLOCK_SIZE;
1071 	uio.uio_resid = sizeof(*vdp);
1072 	uio.uio_segflg = UIO_SYSSPACE;
1073 	uio.uio_rw = UIO_READ;
1074 	uio.uio_procp = p;
1075 
1076 	vn_lock(sc->sc_vp, LK_EXCLUSIVE | LK_RETRY);
1077 	err = VOP_READ(sc->sc_vp, &uio, 0, p->p_ucred);
1078 	VOP_UNLOCK(sc->sc_vp);
1079 
1080 	if (err == 0 && memcmp(vdp->id, ISO_STANDARD_ID, sizeof(vdp->id)))
1081 		err = ENOENT;
1082 
1083 	free(vdp, M_DEVBUF, 0);
1084 	return (err == 0);
1085 }
1086 
1087 void
1088 vdsp_alloc(void *arg1)
1089 {
1090 	struct vdsp_softc *sc = arg1;
1091 	struct vio_dring_reg dr;
1092 
1093 	KASSERT(sc->sc_num_descriptors <= VDSK_MAX_DESCRIPTORS);
1094 	KASSERT(sc->sc_descriptor_size <= VDSK_MAX_DESCRIPTOR_SIZE);
1095 	sc->sc_vd = mallocarray(sc->sc_num_descriptors,
1096 	    sc->sc_descriptor_size, M_DEVBUF, M_WAITOK);
1097 	sc->sc_vd_ring = mallocarray(sc->sc_num_descriptors,
1098 	    sizeof(*sc->sc_vd_ring), M_DEVBUF, M_WAITOK);
1099 	task_set(&sc->sc_vd_task, vdsp_vd_task, sc);
1100 
1101 	bzero(&dr, sizeof(dr));
1102 	dr.tag.type = VIO_TYPE_CTRL;
1103 	dr.tag.stype = VIO_SUBTYPE_ACK;
1104 	dr.tag.stype_env = VIO_DRING_REG;
1105 	dr.tag.sid = sc->sc_local_sid;
1106 	dr.dring_ident = ++sc->sc_dring_ident;
1107 	vdsp_sendmsg(sc, &dr, sizeof(dr), 1);
1108 }
1109 
1110 void
1111 vdsp_read(void *arg1)
1112 {
1113 	struct vdsp_softc *sc = arg1;
1114 
1115 	mtx_enter(&sc->sc_desc_mtx);
1116 	while (sc->sc_desc_tail != sc->sc_desc_head) {
1117 		mtx_leave(&sc->sc_desc_mtx);
1118 		vdsp_read_desc(sc, sc->sc_desc_msg[sc->sc_desc_tail]);
1119 		mtx_enter(&sc->sc_desc_mtx);
1120 		sc->sc_desc_tail++;
1121 		sc->sc_desc_tail &= (VDSK_RX_ENTRIES - 1);
1122 	}
1123 	mtx_leave(&sc->sc_desc_mtx);
1124 }
1125 
1126 void
1127 vdsp_read_desc(struct vdsp_softc *sc, struct vdsk_desc_msg *dm)
1128 {
1129 	struct ldc_conn *lc = &sc->sc_lc;
1130 	struct proc *p = curproc;
1131 	struct iovec iov;
1132 	struct uio uio;
1133 	caddr_t buf;
1134 	vaddr_t va;
1135 	paddr_t pa;
1136 	uint64_t size, off;
1137 	psize_t nbytes;
1138 	int err, i;
1139 
1140 	if (sc->sc_vp == NULL)
1141 		return;
1142 
1143 	buf = malloc(dm->size, M_DEVBUF, M_WAITOK);
1144 
1145 	iov.iov_base = buf;
1146 	iov.iov_len = dm->size;
1147 	uio.uio_iov = &iov;
1148 	uio.uio_iovcnt = 1;
1149 	uio.uio_offset = dm->offset * DEV_BSIZE;
1150 	uio.uio_resid = dm->size;
1151 	uio.uio_segflg = UIO_SYSSPACE;
1152 	uio.uio_rw = UIO_READ;
1153 	uio.uio_procp = p;
1154 
1155 	vn_lock(sc->sc_vp, LK_EXCLUSIVE | LK_RETRY);
1156 	dm->status = VOP_READ(sc->sc_vp, &uio, 0, p->p_ucred);
1157 	VOP_UNLOCK(sc->sc_vp);
1158 
1159 	KERNEL_UNLOCK();
1160 	if (dm->status == 0) {
1161 		i = 0;
1162 		va = (vaddr_t)buf;
1163 		size = dm->size;
1164 		off = 0;
1165 		while (size > 0 && i < dm->ncookies) {
1166 			pmap_extract(pmap_kernel(), va, &pa);
1167 			nbytes = MIN(size, dm->cookie[i].size - off);
1168 			nbytes = MIN(nbytes, PAGE_SIZE - (off & PAGE_MASK));
1169 			err = hv_ldc_copy(lc->lc_id, LDC_COPY_OUT,
1170 			    dm->cookie[i].addr + off, pa, nbytes, &nbytes);
1171 			if (err != H_EOK) {
1172 				printf("%s: hv_ldc_copy: %d\n", __func__, err);
1173 				dm->status = EIO;
1174 				KERNEL_LOCK();
1175 				goto fail;
1176 			}
1177 			va += nbytes;
1178 			size -= nbytes;
1179 			off += nbytes;
1180 			if (off >= dm->cookie[i].size) {
1181 				off = 0;
1182 				i++;
1183 			}
1184 		}
1185 	}
1186 	KERNEL_LOCK();
1187 
1188 fail:
1189 	free(buf, M_DEVBUF, 0);
1190 
1191 	/* ACK the descriptor. */
1192 	dm->tag.stype = VIO_SUBTYPE_ACK;
1193 	dm->tag.sid = sc->sc_local_sid;
1194 	vdsp_sendmsg(sc, dm, sizeof(*dm) +
1195 	    (dm->ncookies - 1) * sizeof(struct ldc_cookie), 1);
1196 }
1197 
1198 void
1199 vdsp_read_dring(void *arg1, void *arg2)
1200 {
1201 	struct vdsp_softc *sc = arg1;
1202 	struct ldc_conn *lc = &sc->sc_lc;
1203 	struct vd_desc *vd = arg2;
1204 	struct proc *p = curproc;
1205 	struct iovec iov;
1206 	struct uio uio;
1207 	caddr_t buf;
1208 	vaddr_t va;
1209 	paddr_t pa;
1210 	uint64_t size, off;
1211 	psize_t nbytes;
1212 	int err, i;
1213 
1214 	if (sc->sc_vp == NULL)
1215 		return;
1216 
1217 	buf = malloc(vd->size, M_DEVBUF, M_WAITOK);
1218 
1219 	iov.iov_base = buf;
1220 	iov.iov_len = vd->size;
1221 	uio.uio_iov = &iov;
1222 	uio.uio_iovcnt = 1;
1223 	uio.uio_offset = vd->offset * DEV_BSIZE;
1224 	uio.uio_resid = vd->size;
1225 	uio.uio_segflg = UIO_SYSSPACE;
1226 	uio.uio_rw = UIO_READ;
1227 	uio.uio_procp = p;
1228 
1229 	vn_lock(sc->sc_vp, LK_EXCLUSIVE | LK_RETRY);
1230 	vd->status = VOP_READ(sc->sc_vp, &uio, 0, p->p_ucred);
1231 	VOP_UNLOCK(sc->sc_vp);
1232 
1233 	KERNEL_UNLOCK();
1234 	if (vd->status == 0) {
1235 		i = 0;
1236 		va = (vaddr_t)buf;
1237 		size = vd->size;
1238 		off = 0;
1239 		while (size > 0 && i < vd->ncookies) {
1240 			pmap_extract(pmap_kernel(), va, &pa);
1241 			nbytes = MIN(size, vd->cookie[i].size - off);
1242 			nbytes = MIN(nbytes, PAGE_SIZE - (off & PAGE_MASK));
1243 			err = hv_ldc_copy(lc->lc_id, LDC_COPY_OUT,
1244 			    vd->cookie[i].addr + off, pa, nbytes, &nbytes);
1245 			if (err != H_EOK) {
1246 				printf("%s: hv_ldc_copy: %d\n", __func__, err);
1247 				vd->status = EIO;
1248 				KERNEL_LOCK();
1249 				goto fail;
1250 			}
1251 			va += nbytes;
1252 			size -= nbytes;
1253 			off += nbytes;
1254 			if (off >= vd->cookie[i].size) {
1255 				off = 0;
1256 				i++;
1257 			}
1258 		}
1259 	}
1260 	KERNEL_LOCK();
1261 
1262 fail:
1263 	free(buf, M_DEVBUF, 0);
1264 
1265 	/* ACK the descriptor. */
1266 	vd->hdr.dstate = VIO_DESC_DONE;
1267 	vdsp_ack_desc(sc, vd);
1268 }
1269 
1270 void
1271 vdsp_write_dring(void *arg1, void *arg2)
1272 {
1273 	struct vdsp_softc *sc = arg1;
1274 	struct ldc_conn *lc = &sc->sc_lc;
1275 	struct vd_desc *vd = arg2;
1276 	struct proc *p = curproc;
1277 	struct iovec iov;
1278 	struct uio uio;
1279 	caddr_t buf;
1280 	vaddr_t va;
1281 	paddr_t pa;
1282 	uint64_t size, off;
1283 	psize_t nbytes;
1284 	int err, i;
1285 
1286 	if (sc->sc_vp == NULL)
1287 		return;
1288 
1289 	buf = malloc(vd->size, M_DEVBUF, M_WAITOK);
1290 
1291 	KERNEL_UNLOCK();
1292 	i = 0;
1293 	va = (vaddr_t)buf;
1294 	size = vd->size;
1295 	off = 0;
1296 	while (size > 0 && i < vd->ncookies) {
1297 		pmap_extract(pmap_kernel(), va, &pa);
1298 		nbytes = MIN(size, vd->cookie[i].size - off);
1299 		nbytes = MIN(nbytes, PAGE_SIZE - (off & PAGE_MASK));
1300 		err = hv_ldc_copy(lc->lc_id, LDC_COPY_IN,
1301 		    vd->cookie[i].addr + off, pa, nbytes, &nbytes);
1302 		if (err != H_EOK) {
1303 			printf("%s: hv_ldc_copy: %d\n", __func__, err);
1304 			vd->status = EIO;
1305 			KERNEL_LOCK();
1306 			goto fail;
1307 		}
1308 		va += nbytes;
1309 		size -= nbytes;
1310 		off += nbytes;
1311 		if (off >= vd->cookie[i].size) {
1312 			off = 0;
1313 			i++;
1314 		}
1315 	}
1316 	KERNEL_LOCK();
1317 
1318 	iov.iov_base = buf;
1319 	iov.iov_len = vd->size;
1320 	uio.uio_iov = &iov;
1321 	uio.uio_iovcnt = 1;
1322 	uio.uio_offset = vd->offset * DEV_BSIZE;
1323 	uio.uio_resid = vd->size;
1324 	uio.uio_segflg = UIO_SYSSPACE;
1325 	uio.uio_rw = UIO_WRITE;
1326 	uio.uio_procp = p;
1327 
1328 	vn_lock(sc->sc_vp, LK_EXCLUSIVE | LK_RETRY);
1329 	vd->status = VOP_WRITE(sc->sc_vp, &uio, 0, p->p_ucred);
1330 	VOP_UNLOCK(sc->sc_vp);
1331 
1332 fail:
1333 	free(buf, M_DEVBUF, 0);
1334 
1335 	/* ACK the descriptor. */
1336 	vd->hdr.dstate = VIO_DESC_DONE;
1337 	vdsp_ack_desc(sc, vd);
1338 }
1339 
1340 void
1341 vdsp_flush_dring(void *arg1, void *arg2)
1342 {
1343 	struct vdsp_softc *sc = arg1;
1344 	struct vd_desc *vd = arg2;
1345 
1346 	if (sc->sc_vp == NULL)
1347 		return;
1348 
1349 	/* ACK the descriptor. */
1350 	vd->status = 0;
1351 	vd->hdr.dstate = VIO_DESC_DONE;
1352 	vdsp_ack_desc(sc, vd);
1353 }
1354 
1355 void
1356 vdsp_get_vtoc(void *arg1, void *arg2)
1357 {
1358 	struct vdsp_softc *sc = arg1;
1359 	struct ldc_conn *lc = &sc->sc_lc;
1360 	struct vd_desc *vd = arg2;
1361 	struct sun_vtoc_preamble *sl;
1362 	struct vd_vtoc *vt;
1363 	vaddr_t va;
1364 	paddr_t pa;
1365 	uint64_t size, off;
1366 	psize_t nbytes;
1367 	int err, i;
1368 
1369 	vt = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
1370 
1371 	if (sc->sc_label == NULL)
1372 		vdsp_readlabel(sc);
1373 
1374 	if (sc->sc_label && sc->sc_label->sl_magic == SUN_DKMAGIC) {
1375 		sl = (struct sun_vtoc_preamble *)sc->sc_label;
1376 
1377 		memcpy(vt->ascii_label, sl->sl_text, sizeof(sl->sl_text));
1378 		memcpy(vt->volume_name, sl->sl_volume, sizeof(sl->sl_volume));
1379 		vt->sector_size = DEV_BSIZE;
1380 		vt->num_partitions = sl->sl_nparts;
1381 		for (i = 0; i < vt->num_partitions; i++) {
1382 			vt->partition[i].id_tag = sl->sl_part[i].spi_tag;
1383 			vt->partition[i].perm = sl->sl_part[i].spi_flag;
1384 			vt->partition[i].start =
1385 			    sc->sc_label->sl_part[i].sdkp_cyloffset *
1386 				sc->sc_label->sl_ntracks *
1387 				sc->sc_label->sl_nsectors;
1388 			vt->partition[i].nblocks =
1389 			    sc->sc_label->sl_part[i].sdkp_nsectors;
1390 		}
1391 	} else {
1392 		uint64_t disk_size;
1393 		int unit;
1394 
1395 		/* Human-readable disk size. */
1396 		disk_size = sc->sc_vdisk_size * sc->sc_vdisk_block_size;
1397 		disk_size >>= 10;
1398 		unit = 'K';
1399 		if (disk_size > (2 << 10)) {
1400 			disk_size >>= 10;
1401 			unit = 'M';
1402 		}
1403 		if (disk_size > (2 << 10)) {
1404 			disk_size >>= 10;
1405 			unit = 'G';
1406 		}
1407 
1408 		snprintf(vt->ascii_label, sizeof(vt->ascii_label),
1409 		    "OpenBSD-DiskImage-%lld%cB cyl %d alt %d hd %d sec %d",
1410 		    disk_size, unit, sc->sc_ncyl, sc->sc_acyl,
1411 		    sc->sc_nhead, sc->sc_nsect);
1412 		vt->sector_size = sc->sc_vdisk_block_size;
1413 		vt->num_partitions = 8;
1414 		vt->partition[2].id_tag = SPTAG_WHOLE_DISK;
1415 		vt->partition[2].nblocks =
1416 		    sc->sc_ncyl * sc->sc_nhead * sc->sc_nsect;
1417 	}
1418 
1419 	i = 0;
1420 	va = (vaddr_t)vt;
1421 	size = roundup(sizeof(*vt), 64);
1422 	off = 0;
1423 	while (size > 0 && i < vd->ncookies) {
1424 		pmap_extract(pmap_kernel(), va, &pa);
1425 		nbytes = MIN(size, vd->cookie[i].size - off);
1426 		nbytes = MIN(nbytes, PAGE_SIZE - (off & PAGE_MASK));
1427 		err = hv_ldc_copy(lc->lc_id, LDC_COPY_OUT,
1428 		    vd->cookie[i].addr + off, pa, nbytes, &nbytes);
1429 		if (err != H_EOK) {
1430 			printf("%s: hv_ldc_copy: %d\n", __func__, err);
1431 			vd->status = EIO;
1432 			goto fail;
1433 		}
1434 		va += nbytes;
1435 		size -= nbytes;
1436 		off += nbytes;
1437 		if (off >= vd->cookie[i].size) {
1438 			off = 0;
1439 			i++;
1440 		}
1441 	}
1442 
1443 	vd->status = 0;
1444 
1445 fail:
1446 	free(vt, M_DEVBUF, 0);
1447 
1448 	/* ACK the descriptor. */
1449 	vd->hdr.dstate = VIO_DESC_DONE;
1450 	vdsp_ack_desc(sc, vd);
1451 }
1452 
1453 void
1454 vdsp_set_vtoc(void *arg1, void *arg2)
1455 {
1456 	struct vdsp_softc *sc = arg1;
1457 	struct ldc_conn *lc = &sc->sc_lc;
1458 	struct vd_desc *vd = arg2;
1459 	struct sun_vtoc_preamble *sl;
1460 	struct vd_vtoc *vt;
1461 	u_short cksum = 0, *sp1, *sp2;
1462 	vaddr_t va;
1463 	paddr_t pa;
1464 	uint64_t size, off;
1465 	psize_t nbytes;
1466 	int err, i;
1467 
1468 	vt = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
1469 
1470 	i = 0;
1471 	va = (vaddr_t)vt;
1472 	size = sizeof(*vt);
1473 	off = 0;
1474 	while (size > 0 && i < vd->ncookies) {
1475 		pmap_extract(pmap_kernel(), va, &pa);
1476 		nbytes = MIN(size, vd->cookie[i].size - off);
1477 		nbytes = MIN(nbytes, PAGE_SIZE - (off & PAGE_MASK));
1478 		err = hv_ldc_copy(lc->lc_id, LDC_COPY_IN,
1479 		    vd->cookie[i].addr + off, pa, nbytes, &nbytes);
1480 		if (err != H_EOK) {
1481 			printf("%s: hv_ldc_copy: %d\n", __func__, err);
1482 			vd->status = EIO;
1483 			goto fail;
1484 		}
1485 		va += nbytes;
1486 		size -= nbytes;
1487 		off += nbytes;
1488 		if (off >= vd->cookie[i].size) {
1489 			off = 0;
1490 			i++;
1491 		}
1492 	}
1493 
1494 	if (vt->num_partitions > nitems(sc->sc_label->sl_part)) {
1495 		vd->status = EINVAL;
1496 		goto fail;
1497 	}
1498 
1499 	if (sc->sc_label == NULL || sc->sc_label->sl_magic != SUN_DKMAGIC) {
1500 		sc->sc_label = malloc(sizeof(*sc->sc_label),
1501 		    M_DEVBUF, M_WAITOK | M_ZERO);
1502 
1503 		sc->sc_label->sl_ntracks = sc->sc_nhead;
1504 		sc->sc_label->sl_nsectors = sc->sc_nsect;
1505 		sc->sc_label->sl_ncylinders = sc->sc_ncyl;
1506 		sc->sc_label->sl_acylinders = sc->sc_acyl;
1507 		sc->sc_label->sl_pcylinders = sc->sc_ncyl + sc->sc_acyl;
1508 		sc->sc_label->sl_rpm = 3600;
1509 
1510 		sc->sc_label->sl_magic = SUN_DKMAGIC;
1511 	}
1512 
1513 	sl = (struct sun_vtoc_preamble *)sc->sc_label;
1514 	memcpy(sl->sl_text, vt->ascii_label, sizeof(sl->sl_text));
1515 	sl->sl_version = 0x01;
1516 	memcpy(sl->sl_volume, vt->volume_name, sizeof(sl->sl_volume));
1517 	sl->sl_nparts = vt->num_partitions;
1518 	for (i = 0; i < vt->num_partitions; i++) {
1519 		sl->sl_part[i].spi_tag = vt->partition[i].id_tag;
1520 		sl->sl_part[i].spi_flag = vt->partition[i].perm;
1521 		sc->sc_label->sl_part[i].sdkp_cyloffset =
1522 		    vt->partition[i].start / (sc->sc_nhead * sc->sc_nsect);
1523 		sc->sc_label->sl_part[i].sdkp_nsectors =
1524 		    vt->partition[i].nblocks;
1525 	}
1526 	sl->sl_sanity = 0x600ddeee;
1527 
1528 	/* Compute the checksum. */
1529 	sp1 = (u_short *)sc->sc_label;
1530 	sp2 = (u_short *)(sc->sc_label + 1);
1531 	while (sp1 < sp2)
1532 		cksum ^= *sp1++;
1533 	sc->sc_label->sl_cksum = cksum;
1534 
1535 	vd->status = vdsp_writelabel(sc);
1536 
1537 fail:
1538 	free(vt, M_DEVBUF, 0);
1539 
1540 	/* ACK the descriptor. */
1541 	vd->hdr.dstate = VIO_DESC_DONE;
1542 	vdsp_ack_desc(sc, vd);
1543 }
1544 
1545 void
1546 vdsp_get_diskgeom(void *arg1, void *arg2)
1547 {
1548 	struct vdsp_softc *sc = arg1;
1549 	struct ldc_conn *lc = &sc->sc_lc;
1550 	struct vd_desc *vd = arg2;
1551 	struct vd_diskgeom *vg;
1552 	vaddr_t va;
1553 	paddr_t pa;
1554 	uint64_t size, off;
1555 	psize_t nbytes;
1556 	int err, i;
1557 
1558 	vg = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
1559 
1560 	if (sc->sc_label == NULL)
1561 		vdsp_readlabel(sc);
1562 
1563 	if (sc->sc_label && sc->sc_label->sl_magic == SUN_DKMAGIC) {
1564 		vg->ncyl = sc->sc_label->sl_ncylinders;
1565 		vg->acyl = sc->sc_label->sl_acylinders;
1566 		vg->nhead = sc->sc_label->sl_ntracks;
1567 		vg->nsect = sc->sc_label->sl_nsectors;
1568 		vg->intrlv = sc->sc_label->sl_interleave;
1569 		vg->apc = sc->sc_label->sl_sparespercyl;
1570 		vg->rpm = sc->sc_label->sl_rpm;
1571 		vg->pcyl = sc->sc_label->sl_pcylinders;
1572 	} else {
1573 		uint64_t disk_size, block_size;
1574 
1575 		disk_size = sc->sc_vdisk_size * sc->sc_vdisk_block_size;
1576 		block_size = sc->sc_vdisk_block_size;
1577 
1578 		if (disk_size >= 8L * 1024 * 1024 * 1024) {
1579 			vg->nhead = 96;
1580 			vg->nsect = 768;
1581 		} else if (disk_size >= 2 *1024 * 1024) {
1582 			vg->nhead = 1;
1583 			vg->nsect = 600;
1584 		} else {
1585 			vg->nhead = 1;
1586 			vg->nsect = 200;
1587 		}
1588 
1589 		vg->pcyl = disk_size / (block_size * vg->nhead * vg->nsect);
1590 		if (vg->pcyl == 0)
1591 			vg->pcyl = 1;
1592 		if (vg->pcyl > 2)
1593 			vg->acyl = 2;
1594 		vg->ncyl = vg->pcyl - vg->acyl;
1595 
1596 		vg->rpm = 3600;
1597 	}
1598 
1599 	sc->sc_ncyl = vg->ncyl;
1600 	sc->sc_acyl = vg->acyl;
1601 	sc->sc_nhead = vg->nhead;
1602 	sc->sc_nsect = vg->nsect;
1603 
1604 	i = 0;
1605 	va = (vaddr_t)vg;
1606 	size = roundup(sizeof(*vg), 64);
1607 	off = 0;
1608 	while (size > 0 && i < vd->ncookies) {
1609 		pmap_extract(pmap_kernel(), va, &pa);
1610 		nbytes = MIN(size, vd->cookie[i].size - off);
1611 		nbytes = MIN(nbytes, PAGE_SIZE - (off & PAGE_MASK));
1612 		err = hv_ldc_copy(lc->lc_id, LDC_COPY_OUT,
1613 		    vd->cookie[i].addr + off, pa, nbytes, &nbytes);
1614 		if (err != H_EOK) {
1615 			printf("%s: hv_ldc_copy: %d\n", __func__, err);
1616 			vd->status = EIO;
1617 			goto fail;
1618 		}
1619 		va += nbytes;
1620 		size -= nbytes;
1621 		off += nbytes;
1622 		if (off >= vd->cookie[i].size) {
1623 			off = 0;
1624 			i++;
1625 		}
1626 	}
1627 
1628 	vd->status = 0;
1629 
1630 fail:
1631 	free(vg, M_DEVBUF, 0);
1632 
1633 	/* ACK the descriptor. */
1634 	vd->hdr.dstate = VIO_DESC_DONE;
1635 	vdsp_ack_desc(sc, vd);
1636 }
1637 
1638 void
1639 vdsp_unimp(void *arg1, void *arg2)
1640 {
1641 	struct vdsp_softc *sc = arg1;
1642 	struct vd_desc *vd = arg2;
1643 
1644 	/* ACK the descriptor. */
1645 	vd->status = ENOTSUP;
1646 	vd->hdr.dstate = VIO_DESC_DONE;
1647 	vdsp_ack_desc(sc, vd);
1648 }
1649 
1650 void
1651 vdsp_ack_desc(struct vdsp_softc *sc, struct vd_desc *vd)
1652 {
1653 	struct vio_dring_msg dm;
1654 	vaddr_t va;
1655 	paddr_t pa;
1656 	uint64_t size, off;
1657 	psize_t nbytes;
1658 	int err;
1659 
1660 	va = (vaddr_t)vd;
1661 	off = (caddr_t)vd - sc->sc_vd;
1662 	size = sc->sc_descriptor_size;
1663 	while (size > 0) {
1664 		pmap_extract(pmap_kernel(), va, &pa);
1665 		nbytes = MIN(size, PAGE_SIZE - (off & PAGE_MASK));
1666 		err = hv_ldc_copy(sc->sc_lc.lc_id, LDC_COPY_OUT,
1667 		    sc->sc_dring_cookie.addr + off, pa, nbytes, &nbytes);
1668 		if (err != H_EOK) {
1669 			printf("%s: hv_ldc_copy %d\n", __func__, err);
1670 			return;
1671 		}
1672 		va += nbytes;
1673 		size -= nbytes;
1674 		off += nbytes;
1675 	}
1676 
1677 	/* ACK the descriptor. */
1678 	bzero(&dm, sizeof(dm));
1679 	dm.tag.type = VIO_TYPE_DATA;
1680 	dm.tag.stype = VIO_SUBTYPE_ACK;
1681 	dm.tag.stype_env = VIO_DRING_DATA;
1682 	dm.tag.sid = sc->sc_local_sid;
1683 	dm.seq_no = ++sc->sc_seq_no;
1684 	dm.dring_ident = sc->sc_dring_ident;
1685 	off = (caddr_t)vd - sc->sc_vd;
1686 	dm.start_idx = off / sc->sc_descriptor_size;
1687 	dm.end_idx = off / sc->sc_descriptor_size;
1688 	vdsp_sendmsg(sc, &dm, sizeof(dm), 1);
1689 }
1690 
1691 int
1692 vdspopen(dev_t dev, int flag, int mode, struct proc *p)
1693 {
1694 	struct vdsp_softc *sc;
1695 	struct ldc_conn *lc;
1696 	int unit = minor(dev);
1697 	int err;
1698 
1699 	if (unit >= vdsp_cd.cd_ndevs)
1700 		return (ENXIO);
1701 	sc = vdsp_cd.cd_devs[unit];
1702 	if (sc == NULL)
1703 		return (ENXIO);
1704 
1705 	lc = &sc->sc_lc;
1706 
1707 	err = hv_ldc_tx_qconf(lc->lc_id,
1708 	    lc->lc_txq->lq_map->dm_segs[0].ds_addr, lc->lc_txq->lq_nentries);
1709 	if (err != H_EOK)
1710 		printf("%s: hv_ldc_tx_qconf %d\n", __func__, err);
1711 
1712 	err = hv_ldc_rx_qconf(lc->lc_id,
1713 	    lc->lc_rxq->lq_map->dm_segs[0].ds_addr, lc->lc_rxq->lq_nentries);
1714 	if (err != H_EOK)
1715 		printf("%s: hv_ldc_rx_qconf %d\n", __func__, err);
1716 
1717 	cbus_intr_setenabled(sc->sc_bustag, sc->sc_tx_ino, INTR_ENABLED);
1718 	cbus_intr_setenabled(sc->sc_bustag, sc->sc_rx_ino, INTR_ENABLED);
1719 
1720 	return (0);
1721 }
1722 
1723 int
1724 vdspclose(dev_t dev, int flag, int mode, struct proc *p)
1725 {
1726 	struct vdsp_softc *sc;
1727 	int unit = minor(dev);
1728 
1729 	if (unit >= vdsp_cd.cd_ndevs)
1730 		return (ENXIO);
1731 	sc = vdsp_cd.cd_devs[unit];
1732 	if (sc == NULL)
1733 		return (ENXIO);
1734 
1735 	cbus_intr_setenabled(sc->sc_bustag, sc->sc_tx_ino, INTR_DISABLED);
1736 	cbus_intr_setenabled(sc->sc_bustag, sc->sc_rx_ino, INTR_DISABLED);
1737 
1738 	hv_ldc_tx_qconf(sc->sc_lc.lc_id, 0, 0);
1739 	hv_ldc_rx_qconf(sc->sc_lc.lc_id, 0, 0);
1740 
1741 	task_add(systq, &sc->sc_close_task);
1742 	return (0);
1743 }
1744 
1745 int
1746 vdspioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
1747 {
1748 	struct vdsp_softc *sc;
1749 	int unit = minor(dev);
1750 
1751 	if (unit >= vdsp_cd.cd_ndevs)
1752 		return (ENXIO);
1753 	sc = vdsp_cd.cd_devs[unit];
1754 	if (sc == NULL)
1755 		return (ENXIO);
1756 
1757 	return (ENOTTY);
1758 }
1759