xref: /freebsd/usr.sbin/bhyve/pci_ahci.c (revision 325151a3)
1 /*-
2  * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/linker_set.h>
34 #include <sys/stat.h>
35 #include <sys/uio.h>
36 #include <sys/ioctl.h>
37 #include <sys/disk.h>
38 #include <sys/ata.h>
39 #include <sys/endian.h>
40 
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <stdint.h>
46 #include <string.h>
47 #include <strings.h>
48 #include <unistd.h>
49 #include <assert.h>
50 #include <pthread.h>
51 #include <pthread_np.h>
52 #include <inttypes.h>
53 #include <md5.h>
54 
55 #include "bhyverun.h"
56 #include "pci_emul.h"
57 #include "ahci.h"
58 #include "block_if.h"
59 
60 #define	MAX_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
61 
62 #define	PxSIG_ATA	0x00000101 /* ATA drive */
63 #define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
64 
65 enum sata_fis_type {
66 	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
67 	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
68 	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
69 	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
70 	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
71 	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
72 	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
73 	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
74 };
75 
76 /*
77  * SCSI opcodes
78  */
79 #define	TEST_UNIT_READY		0x00
80 #define	REQUEST_SENSE		0x03
81 #define	INQUIRY			0x12
82 #define	START_STOP_UNIT		0x1B
83 #define	PREVENT_ALLOW		0x1E
84 #define	READ_CAPACITY		0x25
85 #define	READ_10			0x28
86 #define	POSITION_TO_ELEMENT	0x2B
87 #define	READ_TOC		0x43
88 #define	GET_EVENT_STATUS_NOTIFICATION 0x4A
89 #define	MODE_SENSE_10		0x5A
90 #define	REPORT_LUNS		0xA0
91 #define	READ_12			0xA8
92 #define	READ_CD			0xBE
93 
94 /*
95  * SCSI mode page codes
96  */
97 #define	MODEPAGE_RW_ERROR_RECOVERY	0x01
98 #define	MODEPAGE_CD_CAPABILITIES	0x2A
99 
100 /*
101  * ATA commands
102  */
103 #define	ATA_SF_ENAB_SATA_SF		0x10
104 #define		ATA_SATA_SF_AN		0x05
105 #define	ATA_SF_DIS_SATA_SF		0x90
106 
107 /*
108  * Debug printf
109  */
110 #ifdef AHCI_DEBUG
111 static FILE *dbg;
112 #define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
113 #else
114 #define DPRINTF(format, arg...)
115 #endif
116 #define WPRINTF(format, arg...) printf(format, ##arg)
117 
118 struct ahci_ioreq {
119 	struct blockif_req io_req;
120 	struct ahci_port *io_pr;
121 	STAILQ_ENTRY(ahci_ioreq) io_flist;
122 	TAILQ_ENTRY(ahci_ioreq) io_blist;
123 	uint8_t *cfis;
124 	uint32_t len;
125 	uint32_t done;
126 	int slot;
127 	int more;
128 };
129 
130 struct ahci_port {
131 	struct blockif_ctxt *bctx;
132 	struct pci_ahci_softc *pr_sc;
133 	uint8_t *cmd_lst;
134 	uint8_t *rfis;
135 	char ident[20 + 1];
136 	int atapi;
137 	int reset;
138 	int waitforclear;
139 	int mult_sectors;
140 	uint8_t xfermode;
141 	uint8_t err_cfis[20];
142 	uint8_t sense_key;
143 	uint8_t asc;
144 	u_int ccs;
145 	uint32_t pending;
146 
147 	uint32_t clb;
148 	uint32_t clbu;
149 	uint32_t fb;
150 	uint32_t fbu;
151 	uint32_t is;
152 	uint32_t ie;
153 	uint32_t cmd;
154 	uint32_t unused0;
155 	uint32_t tfd;
156 	uint32_t sig;
157 	uint32_t ssts;
158 	uint32_t sctl;
159 	uint32_t serr;
160 	uint32_t sact;
161 	uint32_t ci;
162 	uint32_t sntf;
163 	uint32_t fbs;
164 
165 	/*
166 	 * i/o request info
167 	 */
168 	struct ahci_ioreq *ioreq;
169 	int ioqsz;
170 	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
171 	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
172 };
173 
174 struct ahci_cmd_hdr {
175 	uint16_t flags;
176 	uint16_t prdtl;
177 	uint32_t prdbc;
178 	uint64_t ctba;
179 	uint32_t reserved[4];
180 };
181 
182 struct ahci_prdt_entry {
183 	uint64_t dba;
184 	uint32_t reserved;
185 #define	DBCMASK		0x3fffff
186 	uint32_t dbc;
187 };
188 
189 struct pci_ahci_softc {
190 	struct pci_devinst *asc_pi;
191 	pthread_mutex_t	mtx;
192 	int ports;
193 	uint32_t cap;
194 	uint32_t ghc;
195 	uint32_t is;
196 	uint32_t pi;
197 	uint32_t vs;
198 	uint32_t ccc_ctl;
199 	uint32_t ccc_pts;
200 	uint32_t em_loc;
201 	uint32_t em_ctl;
202 	uint32_t cap2;
203 	uint32_t bohc;
204 	uint32_t lintr;
205 	struct ahci_port port[MAX_PORTS];
206 };
207 #define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
208 
209 static void ahci_handle_port(struct ahci_port *p);
210 
211 static inline void lba_to_msf(uint8_t *buf, int lba)
212 {
213 	lba += 150;
214 	buf[0] = (lba / 75) / 60;
215 	buf[1] = (lba / 75) % 60;
216 	buf[2] = lba % 75;
217 }
218 
219 /*
220  * generate HBA intr depending on whether or not ports within
221  * the controller have an interrupt pending.
222  */
223 static void
224 ahci_generate_intr(struct pci_ahci_softc *sc)
225 {
226 	struct pci_devinst *pi;
227 	int i;
228 
229 	pi = sc->asc_pi;
230 
231 	for (i = 0; i < sc->ports; i++) {
232 		struct ahci_port *pr;
233 		pr = &sc->port[i];
234 		if (pr->is & pr->ie)
235 			sc->is |= (1 << i);
236 	}
237 
238 	DPRINTF("%s %x\n", __func__, sc->is);
239 
240 	if (sc->is && (sc->ghc & AHCI_GHC_IE)) {
241 		if (pci_msi_enabled(pi)) {
242 			/*
243 			 * Generate an MSI interrupt on every edge
244 			 */
245 			pci_generate_msi(pi, 0);
246 		} else if (!sc->lintr) {
247 			/*
248 			 * Only generate a pin-based interrupt if one wasn't
249 			 * in progress
250 			 */
251 			sc->lintr = 1;
252 			pci_lintr_assert(pi);
253 		}
254 	} else if (sc->lintr) {
255 		/*
256 		 * No interrupts: deassert pin-based signal if it had
257 		 * been asserted
258 		 */
259 		pci_lintr_deassert(pi);
260 		sc->lintr = 0;
261 	}
262 }
263 
264 static void
265 ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
266 {
267 	int offset, len, irq;
268 
269 	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
270 		return;
271 
272 	switch (ft) {
273 	case FIS_TYPE_REGD2H:
274 		offset = 0x40;
275 		len = 20;
276 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_DHR : 0;
277 		break;
278 	case FIS_TYPE_SETDEVBITS:
279 		offset = 0x58;
280 		len = 8;
281 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_SDB : 0;
282 		break;
283 	case FIS_TYPE_PIOSETUP:
284 		offset = 0x20;
285 		len = 20;
286 		irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_PS : 0;
287 		break;
288 	default:
289 		WPRINTF("unsupported fis type %d\n", ft);
290 		return;
291 	}
292 	if (fis[2] & ATA_S_ERROR) {
293 		p->waitforclear = 1;
294 		irq |= AHCI_P_IX_TFE;
295 	}
296 	memcpy(p->rfis + offset, fis, len);
297 	if (irq) {
298 		p->is |= irq;
299 		ahci_generate_intr(p->pr_sc);
300 	}
301 }
302 
303 static void
304 ahci_write_fis_piosetup(struct ahci_port *p)
305 {
306 	uint8_t fis[20];
307 
308 	memset(fis, 0, sizeof(fis));
309 	fis[0] = FIS_TYPE_PIOSETUP;
310 	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
311 }
312 
313 static void
314 ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
315 {
316 	uint8_t fis[8];
317 	uint8_t error;
318 
319 	error = (tfd >> 8) & 0xff;
320 	tfd &= 0x77;
321 	memset(fis, 0, sizeof(fis));
322 	fis[0] = FIS_TYPE_SETDEVBITS;
323 	fis[1] = (1 << 6);
324 	fis[2] = tfd;
325 	fis[3] = error;
326 	if (fis[2] & ATA_S_ERROR) {
327 		p->err_cfis[0] = slot;
328 		p->err_cfis[2] = tfd;
329 		p->err_cfis[3] = error;
330 		memcpy(&p->err_cfis[4], cfis + 4, 16);
331 	} else {
332 		*(uint32_t *)(fis + 4) = (1 << slot);
333 		p->sact &= ~(1 << slot);
334 	}
335 	p->tfd &= ~0x77;
336 	p->tfd |= tfd;
337 	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
338 }
339 
340 static void
341 ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
342 {
343 	uint8_t fis[20];
344 	uint8_t error;
345 
346 	error = (tfd >> 8) & 0xff;
347 	memset(fis, 0, sizeof(fis));
348 	fis[0] = FIS_TYPE_REGD2H;
349 	fis[1] = (1 << 6);
350 	fis[2] = tfd & 0xff;
351 	fis[3] = error;
352 	fis[4] = cfis[4];
353 	fis[5] = cfis[5];
354 	fis[6] = cfis[6];
355 	fis[7] = cfis[7];
356 	fis[8] = cfis[8];
357 	fis[9] = cfis[9];
358 	fis[10] = cfis[10];
359 	fis[11] = cfis[11];
360 	fis[12] = cfis[12];
361 	fis[13] = cfis[13];
362 	if (fis[2] & ATA_S_ERROR) {
363 		p->err_cfis[0] = 0x80;
364 		p->err_cfis[2] = tfd & 0xff;
365 		p->err_cfis[3] = error;
366 		memcpy(&p->err_cfis[4], cfis + 4, 16);
367 	} else
368 		p->ci &= ~(1 << slot);
369 	p->tfd = tfd;
370 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
371 }
372 
373 static void
374 ahci_write_fis_d2h_ncq(struct ahci_port *p, int slot)
375 {
376 	uint8_t fis[20];
377 
378 	p->tfd = ATA_S_READY | ATA_S_DSC;
379 	memset(fis, 0, sizeof(fis));
380 	fis[0] = FIS_TYPE_REGD2H;
381 	fis[1] = 0;			/* No interrupt */
382 	fis[2] = p->tfd;		/* Status */
383 	fis[3] = 0;			/* No error */
384 	p->ci &= ~(1 << slot);
385 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
386 }
387 
388 static void
389 ahci_write_reset_fis_d2h(struct ahci_port *p)
390 {
391 	uint8_t fis[20];
392 
393 	memset(fis, 0, sizeof(fis));
394 	fis[0] = FIS_TYPE_REGD2H;
395 	fis[3] = 1;
396 	fis[4] = 1;
397 	if (p->atapi) {
398 		fis[5] = 0x14;
399 		fis[6] = 0xeb;
400 	}
401 	fis[12] = 1;
402 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
403 }
404 
405 static void
406 ahci_check_stopped(struct ahci_port *p)
407 {
408 	/*
409 	 * If we are no longer processing the command list and nothing
410 	 * is in-flight, clear the running bit, the current command
411 	 * slot, the command issue and active bits.
412 	 */
413 	if (!(p->cmd & AHCI_P_CMD_ST)) {
414 		if (p->pending == 0) {
415 			p->ccs = 0;
416 			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
417 			p->ci = 0;
418 			p->sact = 0;
419 			p->waitforclear = 0;
420 		}
421 	}
422 }
423 
424 static void
425 ahci_port_stop(struct ahci_port *p)
426 {
427 	struct ahci_ioreq *aior;
428 	uint8_t *cfis;
429 	int slot;
430 	int ncq;
431 	int error;
432 
433 	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
434 
435 	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
436 		/*
437 		 * Try to cancel the outstanding blockif request.
438 		 */
439 		error = blockif_cancel(p->bctx, &aior->io_req);
440 		if (error != 0)
441 			continue;
442 
443 		slot = aior->slot;
444 		cfis = aior->cfis;
445 		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
446 		    cfis[2] == ATA_READ_FPDMA_QUEUED ||
447 		    cfis[2] == ATA_SEND_FPDMA_QUEUED)
448 			ncq = 1;
449 
450 		if (ncq)
451 			p->sact &= ~(1 << slot);
452 		else
453 			p->ci &= ~(1 << slot);
454 
455 		/*
456 		 * This command is now done.
457 		 */
458 		p->pending &= ~(1 << slot);
459 
460 		/*
461 		 * Delete the blockif request from the busy list
462 		 */
463 		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
464 
465 		/*
466 		 * Move the blockif request back to the free list
467 		 */
468 		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
469 	}
470 
471 	ahci_check_stopped(p);
472 }
473 
474 static void
475 ahci_port_reset(struct ahci_port *pr)
476 {
477 	pr->serr = 0;
478 	pr->sact = 0;
479 	pr->xfermode = ATA_UDMA6;
480 	pr->mult_sectors = 128;
481 
482 	if (!pr->bctx) {
483 		pr->ssts = ATA_SS_DET_NO_DEVICE;
484 		pr->sig = 0xFFFFFFFF;
485 		pr->tfd = 0x7F;
486 		return;
487 	}
488 	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
489 	if (pr->sctl & ATA_SC_SPD_MASK)
490 		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
491 	else
492 		pr->ssts |= ATA_SS_SPD_GEN3;
493 	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
494 	if (!pr->atapi) {
495 		pr->sig = PxSIG_ATA;
496 		pr->tfd |= ATA_S_READY;
497 	} else
498 		pr->sig = PxSIG_ATAPI;
499 	ahci_write_reset_fis_d2h(pr);
500 }
501 
502 static void
503 ahci_reset(struct pci_ahci_softc *sc)
504 {
505 	int i;
506 
507 	sc->ghc = AHCI_GHC_AE;
508 	sc->is = 0;
509 
510 	if (sc->lintr) {
511 		pci_lintr_deassert(sc->asc_pi);
512 		sc->lintr = 0;
513 	}
514 
515 	for (i = 0; i < sc->ports; i++) {
516 		sc->port[i].ie = 0;
517 		sc->port[i].is = 0;
518 		sc->port[i].cmd = (AHCI_P_CMD_SUD | AHCI_P_CMD_POD);
519 		if (sc->port[i].bctx)
520 			sc->port[i].cmd |= AHCI_P_CMD_CPS;
521 		sc->port[i].sctl = 0;
522 		ahci_port_reset(&sc->port[i]);
523 	}
524 }
525 
526 static void
527 ata_string(uint8_t *dest, const char *src, int len)
528 {
529 	int i;
530 
531 	for (i = 0; i < len; i++) {
532 		if (*src)
533 			dest[i ^ 1] = *src++;
534 		else
535 			dest[i ^ 1] = ' ';
536 	}
537 }
538 
539 static void
540 atapi_string(uint8_t *dest, const char *src, int len)
541 {
542 	int i;
543 
544 	for (i = 0; i < len; i++) {
545 		if (*src)
546 			dest[i] = *src++;
547 		else
548 			dest[i] = ' ';
549 	}
550 }
551 
552 /*
553  * Build up the iovec based on the PRDT, 'done' and 'len'.
554  */
555 static void
556 ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
557     struct ahci_prdt_entry *prdt, uint16_t prdtl)
558 {
559 	struct blockif_req *breq = &aior->io_req;
560 	int i, j, skip, todo, left, extra;
561 	uint32_t dbcsz;
562 
563 	/* Copy part of PRDT between 'done' and 'len' bytes into the iov. */
564 	skip = aior->done;
565 	left = aior->len - aior->done;
566 	todo = 0;
567 	for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0;
568 	    i++, prdt++) {
569 		dbcsz = (prdt->dbc & DBCMASK) + 1;
570 		/* Skip already done part of the PRDT */
571 		if (dbcsz <= skip) {
572 			skip -= dbcsz;
573 			continue;
574 		}
575 		dbcsz -= skip;
576 		if (dbcsz > left)
577 			dbcsz = left;
578 		breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc),
579 		    prdt->dba + skip, dbcsz);
580 		breq->br_iov[j].iov_len = dbcsz;
581 		todo += dbcsz;
582 		left -= dbcsz;
583 		skip = 0;
584 		j++;
585 	}
586 
587 	/* If we got limited by IOV length, round I/O down to sector size. */
588 	if (j == BLOCKIF_IOV_MAX) {
589 		extra = todo % blockif_sectsz(p->bctx);
590 		todo -= extra;
591 		assert(todo > 0);
592 		while (extra > 0) {
593 			if (breq->br_iov[j - 1].iov_len > extra) {
594 				breq->br_iov[j - 1].iov_len -= extra;
595 				break;
596 			}
597 			extra -= breq->br_iov[j - 1].iov_len;
598 			j--;
599 		}
600 	}
601 
602 	breq->br_iovcnt = j;
603 	breq->br_resid = todo;
604 	aior->done += todo;
605 	aior->more = (aior->done < aior->len && i < prdtl);
606 }
607 
608 static void
609 ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
610 {
611 	struct ahci_ioreq *aior;
612 	struct blockif_req *breq;
613 	struct ahci_prdt_entry *prdt;
614 	struct ahci_cmd_hdr *hdr;
615 	uint64_t lba;
616 	uint32_t len;
617 	int err, first, ncq, readop;
618 
619 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
620 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
621 	ncq = 0;
622 	readop = 1;
623 	first = (done == 0);
624 
625 	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
626 	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
627 	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
628 	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
629 		readop = 0;
630 
631 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
632 	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
633 		lba = ((uint64_t)cfis[10] << 40) |
634 			((uint64_t)cfis[9] << 32) |
635 			((uint64_t)cfis[8] << 24) |
636 			((uint64_t)cfis[6] << 16) |
637 			((uint64_t)cfis[5] << 8) |
638 			cfis[4];
639 		len = cfis[11] << 8 | cfis[3];
640 		if (!len)
641 			len = 65536;
642 		ncq = 1;
643 	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
644 	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
645 	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
646 		lba = ((uint64_t)cfis[10] << 40) |
647 			((uint64_t)cfis[9] << 32) |
648 			((uint64_t)cfis[8] << 24) |
649 			((uint64_t)cfis[6] << 16) |
650 			((uint64_t)cfis[5] << 8) |
651 			cfis[4];
652 		len = cfis[13] << 8 | cfis[12];
653 		if (!len)
654 			len = 65536;
655 	} else {
656 		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
657 			(cfis[5] << 8) | cfis[4];
658 		len = cfis[12];
659 		if (!len)
660 			len = 256;
661 	}
662 	lba *= blockif_sectsz(p->bctx);
663 	len *= blockif_sectsz(p->bctx);
664 
665 	/* Pull request off free list */
666 	aior = STAILQ_FIRST(&p->iofhd);
667 	assert(aior != NULL);
668 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
669 
670 	aior->cfis = cfis;
671 	aior->slot = slot;
672 	aior->len = len;
673 	aior->done = done;
674 	breq = &aior->io_req;
675 	breq->br_offset = lba + done;
676 	ahci_build_iov(p, aior, prdt, hdr->prdtl);
677 
678 	/* Mark this command in-flight. */
679 	p->pending |= 1 << slot;
680 
681 	/* Stuff request onto busy list. */
682 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
683 
684 	if (ncq && first)
685 		ahci_write_fis_d2h_ncq(p, slot);
686 
687 	if (readop)
688 		err = blockif_read(p->bctx, breq);
689 	else
690 		err = blockif_write(p->bctx, breq);
691 	assert(err == 0);
692 }
693 
694 static void
695 ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
696 {
697 	struct ahci_ioreq *aior;
698 	struct blockif_req *breq;
699 	int err;
700 
701 	/*
702 	 * Pull request off free list
703 	 */
704 	aior = STAILQ_FIRST(&p->iofhd);
705 	assert(aior != NULL);
706 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
707 	aior->cfis = cfis;
708 	aior->slot = slot;
709 	aior->len = 0;
710 	aior->done = 0;
711 	aior->more = 0;
712 	breq = &aior->io_req;
713 
714 	/*
715 	 * Mark this command in-flight.
716 	 */
717 	p->pending |= 1 << slot;
718 
719 	/*
720 	 * Stuff request onto busy list
721 	 */
722 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
723 
724 	err = blockif_flush(p->bctx, breq);
725 	assert(err == 0);
726 }
727 
728 static inline void
729 read_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
730 		void *buf, int size)
731 {
732 	struct ahci_cmd_hdr *hdr;
733 	struct ahci_prdt_entry *prdt;
734 	void *to;
735 	int i, len;
736 
737 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
738 	len = size;
739 	to = buf;
740 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
741 	for (i = 0; i < hdr->prdtl && len; i++) {
742 		uint8_t *ptr;
743 		uint32_t dbcsz;
744 		int sublen;
745 
746 		dbcsz = (prdt->dbc & DBCMASK) + 1;
747 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
748 		sublen = len < dbcsz ? len : dbcsz;
749 		memcpy(to, ptr, sublen);
750 		len -= sublen;
751 		to += sublen;
752 		prdt++;
753 	}
754 }
755 
756 static void
757 ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
758 {
759 	struct ahci_ioreq *aior;
760 	struct blockif_req *breq;
761 	uint8_t *entry;
762 	uint64_t elba;
763 	uint32_t len, elen;
764 	int err, first, ncq;
765 	uint8_t buf[512];
766 
767 	first = (done == 0);
768 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
769 		len = (uint16_t)cfis[13] << 8 | cfis[12];
770 		len *= 512;
771 		ncq = 0;
772 	} else { /* ATA_SEND_FPDMA_QUEUED */
773 		len = (uint16_t)cfis[11] << 8 | cfis[3];
774 		len *= 512;
775 		ncq = 1;
776 	}
777 	read_prdt(p, slot, cfis, buf, sizeof(buf));
778 
779 next:
780 	entry = &buf[done];
781 	elba = ((uint64_t)entry[5] << 40) |
782 		((uint64_t)entry[4] << 32) |
783 		((uint64_t)entry[3] << 24) |
784 		((uint64_t)entry[2] << 16) |
785 		((uint64_t)entry[1] << 8) |
786 		entry[0];
787 	elen = (uint16_t)entry[7] << 8 | entry[6];
788 	done += 8;
789 	if (elen == 0) {
790 		if (done >= len) {
791 			ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
792 			p->pending &= ~(1 << slot);
793 			ahci_check_stopped(p);
794 			if (!first)
795 				ahci_handle_port(p);
796 			return;
797 		}
798 		goto next;
799 	}
800 
801 	/*
802 	 * Pull request off free list
803 	 */
804 	aior = STAILQ_FIRST(&p->iofhd);
805 	assert(aior != NULL);
806 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
807 	aior->cfis = cfis;
808 	aior->slot = slot;
809 	aior->len = len;
810 	aior->done = done;
811 	aior->more = (len != done);
812 
813 	breq = &aior->io_req;
814 	breq->br_offset = elba * blockif_sectsz(p->bctx);
815 	breq->br_resid = elen * blockif_sectsz(p->bctx);
816 
817 	/*
818 	 * Mark this command in-flight.
819 	 */
820 	p->pending |= 1 << slot;
821 
822 	/*
823 	 * Stuff request onto busy list
824 	 */
825 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
826 
827 	if (ncq && first)
828 		ahci_write_fis_d2h_ncq(p, slot);
829 
830 	err = blockif_delete(p->bctx, breq);
831 	assert(err == 0);
832 }
833 
834 static inline void
835 write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
836 		void *buf, int size)
837 {
838 	struct ahci_cmd_hdr *hdr;
839 	struct ahci_prdt_entry *prdt;
840 	void *from;
841 	int i, len;
842 
843 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
844 	len = size;
845 	from = buf;
846 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
847 	for (i = 0; i < hdr->prdtl && len; i++) {
848 		uint8_t *ptr;
849 		uint32_t dbcsz;
850 		int sublen;
851 
852 		dbcsz = (prdt->dbc & DBCMASK) + 1;
853 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
854 		sublen = len < dbcsz ? len : dbcsz;
855 		memcpy(ptr, from, sublen);
856 		len -= sublen;
857 		from += sublen;
858 		prdt++;
859 	}
860 	hdr->prdbc = size - len;
861 }
862 
863 static void
864 ahci_checksum(uint8_t *buf, int size)
865 {
866 	int i;
867 	uint8_t sum = 0;
868 
869 	for (i = 0; i < size - 1; i++)
870 		sum += buf[i];
871 	buf[size - 1] = 0x100 - sum;
872 }
873 
874 static void
875 ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis)
876 {
877 	struct ahci_cmd_hdr *hdr;
878 	uint8_t buf[512];
879 
880 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
881 	if (p->atapi || hdr->prdtl == 0 || cfis[4] != 0x10 ||
882 	    cfis[5] != 0 || cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) {
883 		ahci_write_fis_d2h(p, slot, cfis,
884 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
885 		return;
886 	}
887 
888 	memset(buf, 0, sizeof(buf));
889 	memcpy(buf, p->err_cfis, sizeof(p->err_cfis));
890 	ahci_checksum(buf, sizeof(buf));
891 
892 	if (cfis[2] == ATA_READ_LOG_EXT)
893 		ahci_write_fis_piosetup(p);
894 	write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
895 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
896 }
897 
898 static void
899 handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
900 {
901 	struct ahci_cmd_hdr *hdr;
902 
903 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
904 	if (p->atapi || hdr->prdtl == 0) {
905 		ahci_write_fis_d2h(p, slot, cfis,
906 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
907 	} else {
908 		uint16_t buf[256];
909 		uint64_t sectors;
910 		int sectsz, psectsz, psectoff, candelete, ro;
911 		uint16_t cyl;
912 		uint8_t sech, heads;
913 
914 		ro = blockif_is_ro(p->bctx);
915 		candelete = blockif_candelete(p->bctx);
916 		sectsz = blockif_sectsz(p->bctx);
917 		sectors = blockif_size(p->bctx) / sectsz;
918 		blockif_chs(p->bctx, &cyl, &heads, &sech);
919 		blockif_psectsz(p->bctx, &psectsz, &psectoff);
920 		memset(buf, 0, sizeof(buf));
921 		buf[0] = 0x0040;
922 		buf[1] = cyl;
923 		buf[3] = heads;
924 		buf[6] = sech;
925 		ata_string((uint8_t *)(buf+10), p->ident, 20);
926 		ata_string((uint8_t *)(buf+23), "001", 8);
927 		ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
928 		buf[47] = (0x8000 | 128);
929 		buf[48] = 0;
930 		buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
931 		buf[50] = (1 << 14);
932 		buf[53] = (1 << 1 | 1 << 2);
933 		if (p->mult_sectors)
934 			buf[59] = (0x100 | p->mult_sectors);
935 		if (sectors <= 0x0fffffff) {
936 			buf[60] = sectors;
937 			buf[61] = (sectors >> 16);
938 		} else {
939 			buf[60] = 0xffff;
940 			buf[61] = 0x0fff;
941 		}
942 		buf[63] = 0x7;
943 		if (p->xfermode & ATA_WDMA0)
944 			buf[63] |= (1 << ((p->xfermode & 7) + 8));
945 		buf[64] = 0x3;
946 		buf[65] = 120;
947 		buf[66] = 120;
948 		buf[67] = 120;
949 		buf[68] = 120;
950 		buf[69] = 0;
951 		buf[75] = 31;
952 		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
953 			   ATA_SUPPORT_NCQ);
954 		buf[77] = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED |
955 			   (p->ssts & ATA_SS_SPD_MASK) >> 3);
956 		buf[80] = 0x3f0;
957 		buf[81] = 0x28;
958 		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
959 			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
960 		buf[83] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
961 			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
962 		buf[84] = (1 << 14);
963 		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
964 			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
965 		buf[86] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
966 			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 15);
967 		buf[87] = (1 << 14);
968 		buf[88] = 0x7f;
969 		if (p->xfermode & ATA_UDMA0)
970 			buf[88] |= (1 << ((p->xfermode & 7) + 8));
971 		buf[100] = sectors;
972 		buf[101] = (sectors >> 16);
973 		buf[102] = (sectors >> 32);
974 		buf[103] = (sectors >> 48);
975 		if (candelete && !ro) {
976 			buf[69] |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
977 			buf[105] = 1;
978 			buf[169] = ATA_SUPPORT_DSM_TRIM;
979 		}
980 		buf[106] = 0x4000;
981 		buf[209] = 0x4000;
982 		if (psectsz > sectsz) {
983 			buf[106] |= 0x2000;
984 			buf[106] |= ffsl(psectsz / sectsz) - 1;
985 			buf[209] |= (psectoff / sectsz);
986 		}
987 		if (sectsz > 512) {
988 			buf[106] |= 0x1000;
989 			buf[117] = sectsz / 2;
990 			buf[118] = ((sectsz / 2) >> 16);
991 		}
992 		buf[119] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
993 		buf[120] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
994 		buf[222] = 0x1020;
995 		buf[255] = 0x00a5;
996 		ahci_checksum((uint8_t *)buf, sizeof(buf));
997 		ahci_write_fis_piosetup(p);
998 		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
999 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1000 	}
1001 }
1002 
1003 static void
1004 handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
1005 {
1006 	if (!p->atapi) {
1007 		ahci_write_fis_d2h(p, slot, cfis,
1008 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1009 	} else {
1010 		uint16_t buf[256];
1011 
1012 		memset(buf, 0, sizeof(buf));
1013 		buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5);
1014 		ata_string((uint8_t *)(buf+10), p->ident, 20);
1015 		ata_string((uint8_t *)(buf+23), "001", 8);
1016 		ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40);
1017 		buf[49] = (1 << 9 | 1 << 8);
1018 		buf[50] = (1 << 14 | 1);
1019 		buf[53] = (1 << 2 | 1 << 1);
1020 		buf[62] = 0x3f;
1021 		buf[63] = 7;
1022 		if (p->xfermode & ATA_WDMA0)
1023 			buf[63] |= (1 << ((p->xfermode & 7) + 8));
1024 		buf[64] = 3;
1025 		buf[65] = 120;
1026 		buf[66] = 120;
1027 		buf[67] = 120;
1028 		buf[68] = 120;
1029 		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3);
1030 		buf[77] = ((p->ssts & ATA_SS_SPD_MASK) >> 3);
1031 		buf[78] = (1 << 5);
1032 		buf[80] = 0x3f0;
1033 		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1034 			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1035 		buf[83] = (1 << 14);
1036 		buf[84] = (1 << 14);
1037 		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
1038 			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
1039 		buf[87] = (1 << 14);
1040 		buf[88] = 0x7f;
1041 		if (p->xfermode & ATA_UDMA0)
1042 			buf[88] |= (1 << ((p->xfermode & 7) + 8));
1043 		buf[222] = 0x1020;
1044 		buf[255] = 0x00a5;
1045 		ahci_checksum((uint8_t *)buf, sizeof(buf));
1046 		ahci_write_fis_piosetup(p);
1047 		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
1048 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
1049 	}
1050 }
1051 
1052 static void
1053 atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
1054 {
1055 	uint8_t buf[36];
1056 	uint8_t *acmd;
1057 	int len;
1058 	uint32_t tfd;
1059 
1060 	acmd = cfis + 0x40;
1061 
1062 	if (acmd[1] & 1) {		/* VPD */
1063 		if (acmd[2] == 0) {	/* Supported VPD pages */
1064 			buf[0] = 0x05;
1065 			buf[1] = 0;
1066 			buf[2] = 0;
1067 			buf[3] = 1;
1068 			buf[4] = 0;
1069 			len = 4 + buf[3];
1070 		} else {
1071 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1072 			p->asc = 0x24;
1073 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1074 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1075 			ahci_write_fis_d2h(p, slot, cfis, tfd);
1076 			return;
1077 		}
1078 	} else {
1079 		buf[0] = 0x05;
1080 		buf[1] = 0x80;
1081 		buf[2] = 0x00;
1082 		buf[3] = 0x21;
1083 		buf[4] = 31;
1084 		buf[5] = 0;
1085 		buf[6] = 0;
1086 		buf[7] = 0;
1087 		atapi_string(buf + 8, "BHYVE", 8);
1088 		atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
1089 		atapi_string(buf + 32, "001", 4);
1090 		len = sizeof(buf);
1091 	}
1092 
1093 	if (len > acmd[4])
1094 		len = acmd[4];
1095 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1096 	write_prdt(p, slot, cfis, buf, len);
1097 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1098 }
1099 
1100 static void
1101 atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
1102 {
1103 	uint8_t buf[8];
1104 	uint64_t sectors;
1105 
1106 	sectors = blockif_size(p->bctx) / 2048;
1107 	be32enc(buf, sectors - 1);
1108 	be32enc(buf + 4, 2048);
1109 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1110 	write_prdt(p, slot, cfis, buf, sizeof(buf));
1111 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1112 }
1113 
1114 static void
1115 atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
1116 {
1117 	uint8_t *acmd;
1118 	uint8_t format;
1119 	int len;
1120 
1121 	acmd = cfis + 0x40;
1122 
1123 	len = be16dec(acmd + 7);
1124 	format = acmd[9] >> 6;
1125 	switch (format) {
1126 	case 0:
1127 	{
1128 		int msf, size;
1129 		uint64_t sectors;
1130 		uint8_t start_track, buf[20], *bp;
1131 
1132 		msf = (acmd[1] >> 1) & 1;
1133 		start_track = acmd[6];
1134 		if (start_track > 1 && start_track != 0xaa) {
1135 			uint32_t tfd;
1136 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1137 			p->asc = 0x24;
1138 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1139 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1140 			ahci_write_fis_d2h(p, slot, cfis, tfd);
1141 			return;
1142 		}
1143 		bp = buf + 2;
1144 		*bp++ = 1;
1145 		*bp++ = 1;
1146 		if (start_track <= 1) {
1147 			*bp++ = 0;
1148 			*bp++ = 0x14;
1149 			*bp++ = 1;
1150 			*bp++ = 0;
1151 			if (msf) {
1152 				*bp++ = 0;
1153 				lba_to_msf(bp, 0);
1154 				bp += 3;
1155 			} else {
1156 				*bp++ = 0;
1157 				*bp++ = 0;
1158 				*bp++ = 0;
1159 				*bp++ = 0;
1160 			}
1161 		}
1162 		*bp++ = 0;
1163 		*bp++ = 0x14;
1164 		*bp++ = 0xaa;
1165 		*bp++ = 0;
1166 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1167 		sectors >>= 2;
1168 		if (msf) {
1169 			*bp++ = 0;
1170 			lba_to_msf(bp, sectors);
1171 			bp += 3;
1172 		} else {
1173 			be32enc(bp, sectors);
1174 			bp += 4;
1175 		}
1176 		size = bp - buf;
1177 		be16enc(buf, size - 2);
1178 		if (len > size)
1179 			len = size;
1180 		write_prdt(p, slot, cfis, buf, len);
1181 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1182 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1183 		break;
1184 	}
1185 	case 1:
1186 	{
1187 		uint8_t buf[12];
1188 
1189 		memset(buf, 0, sizeof(buf));
1190 		buf[1] = 0xa;
1191 		buf[2] = 0x1;
1192 		buf[3] = 0x1;
1193 		if (len > sizeof(buf))
1194 			len = sizeof(buf);
1195 		write_prdt(p, slot, cfis, buf, len);
1196 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1197 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1198 		break;
1199 	}
1200 	case 2:
1201 	{
1202 		int msf, size;
1203 		uint64_t sectors;
1204 		uint8_t start_track, *bp, buf[50];
1205 
1206 		msf = (acmd[1] >> 1) & 1;
1207 		start_track = acmd[6];
1208 		bp = buf + 2;
1209 		*bp++ = 1;
1210 		*bp++ = 1;
1211 
1212 		*bp++ = 1;
1213 		*bp++ = 0x14;
1214 		*bp++ = 0;
1215 		*bp++ = 0xa0;
1216 		*bp++ = 0;
1217 		*bp++ = 0;
1218 		*bp++ = 0;
1219 		*bp++ = 0;
1220 		*bp++ = 1;
1221 		*bp++ = 0;
1222 		*bp++ = 0;
1223 
1224 		*bp++ = 1;
1225 		*bp++ = 0x14;
1226 		*bp++ = 0;
1227 		*bp++ = 0xa1;
1228 		*bp++ = 0;
1229 		*bp++ = 0;
1230 		*bp++ = 0;
1231 		*bp++ = 0;
1232 		*bp++ = 1;
1233 		*bp++ = 0;
1234 		*bp++ = 0;
1235 
1236 		*bp++ = 1;
1237 		*bp++ = 0x14;
1238 		*bp++ = 0;
1239 		*bp++ = 0xa2;
1240 		*bp++ = 0;
1241 		*bp++ = 0;
1242 		*bp++ = 0;
1243 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
1244 		sectors >>= 2;
1245 		if (msf) {
1246 			*bp++ = 0;
1247 			lba_to_msf(bp, sectors);
1248 			bp += 3;
1249 		} else {
1250 			be32enc(bp, sectors);
1251 			bp += 4;
1252 		}
1253 
1254 		*bp++ = 1;
1255 		*bp++ = 0x14;
1256 		*bp++ = 0;
1257 		*bp++ = 1;
1258 		*bp++ = 0;
1259 		*bp++ = 0;
1260 		*bp++ = 0;
1261 		if (msf) {
1262 			*bp++ = 0;
1263 			lba_to_msf(bp, 0);
1264 			bp += 3;
1265 		} else {
1266 			*bp++ = 0;
1267 			*bp++ = 0;
1268 			*bp++ = 0;
1269 			*bp++ = 0;
1270 		}
1271 
1272 		size = bp - buf;
1273 		be16enc(buf, size - 2);
1274 		if (len > size)
1275 			len = size;
1276 		write_prdt(p, slot, cfis, buf, len);
1277 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1278 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1279 		break;
1280 	}
1281 	default:
1282 	{
1283 		uint32_t tfd;
1284 
1285 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1286 		p->asc = 0x24;
1287 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1288 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1289 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1290 		break;
1291 	}
1292 	}
1293 }
1294 
1295 static void
1296 atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis)
1297 {
1298 	uint8_t buf[16];
1299 
1300 	memset(buf, 0, sizeof(buf));
1301 	buf[3] = 8;
1302 
1303 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1304 	write_prdt(p, slot, cfis, buf, sizeof(buf));
1305 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1306 }
1307 
1308 static void
1309 atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
1310 {
1311 	struct ahci_ioreq *aior;
1312 	struct ahci_cmd_hdr *hdr;
1313 	struct ahci_prdt_entry *prdt;
1314 	struct blockif_req *breq;
1315 	struct pci_ahci_softc *sc;
1316 	uint8_t *acmd;
1317 	uint64_t lba;
1318 	uint32_t len;
1319 	int err;
1320 
1321 	sc = p->pr_sc;
1322 	acmd = cfis + 0x40;
1323 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1324 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1325 
1326 	lba = be32dec(acmd + 2);
1327 	if (acmd[0] == READ_10)
1328 		len = be16dec(acmd + 7);
1329 	else
1330 		len = be32dec(acmd + 6);
1331 	if (len == 0) {
1332 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1333 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1334 	}
1335 	lba *= 2048;
1336 	len *= 2048;
1337 
1338 	/*
1339 	 * Pull request off free list
1340 	 */
1341 	aior = STAILQ_FIRST(&p->iofhd);
1342 	assert(aior != NULL);
1343 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
1344 	aior->cfis = cfis;
1345 	aior->slot = slot;
1346 	aior->len = len;
1347 	aior->done = done;
1348 	breq = &aior->io_req;
1349 	breq->br_offset = lba + done;
1350 	ahci_build_iov(p, aior, prdt, hdr->prdtl);
1351 
1352 	/* Mark this command in-flight. */
1353 	p->pending |= 1 << slot;
1354 
1355 	/* Stuff request onto busy list. */
1356 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
1357 
1358 	err = blockif_read(p->bctx, breq);
1359 	assert(err == 0);
1360 }
1361 
1362 static void
1363 atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1364 {
1365 	uint8_t buf[64];
1366 	uint8_t *acmd;
1367 	int len;
1368 
1369 	acmd = cfis + 0x40;
1370 	len = acmd[4];
1371 	if (len > sizeof(buf))
1372 		len = sizeof(buf);
1373 	memset(buf, 0, len);
1374 	buf[0] = 0x70 | (1 << 7);
1375 	buf[2] = p->sense_key;
1376 	buf[7] = 10;
1377 	buf[12] = p->asc;
1378 	write_prdt(p, slot, cfis, buf, len);
1379 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1380 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1381 }
1382 
1383 static void
1384 atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
1385 {
1386 	uint8_t *acmd = cfis + 0x40;
1387 	uint32_t tfd;
1388 
1389 	switch (acmd[4] & 3) {
1390 	case 0:
1391 	case 1:
1392 	case 3:
1393 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1394 		tfd = ATA_S_READY | ATA_S_DSC;
1395 		break;
1396 	case 2:
1397 		/* TODO eject media */
1398 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1399 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1400 		p->asc = 0x53;
1401 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1402 		break;
1403 	}
1404 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1405 }
1406 
1407 static void
1408 atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
1409 {
1410 	uint8_t *acmd;
1411 	uint32_t tfd;
1412 	uint8_t pc, code;
1413 	int len;
1414 
1415 	acmd = cfis + 0x40;
1416 	len = be16dec(acmd + 7);
1417 	pc = acmd[2] >> 6;
1418 	code = acmd[2] & 0x3f;
1419 
1420 	switch (pc) {
1421 	case 0:
1422 		switch (code) {
1423 		case MODEPAGE_RW_ERROR_RECOVERY:
1424 		{
1425 			uint8_t buf[16];
1426 
1427 			if (len > sizeof(buf))
1428 				len = sizeof(buf);
1429 
1430 			memset(buf, 0, sizeof(buf));
1431 			be16enc(buf, 16 - 2);
1432 			buf[2] = 0x70;
1433 			buf[8] = 0x01;
1434 			buf[9] = 16 - 10;
1435 			buf[11] = 0x05;
1436 			write_prdt(p, slot, cfis, buf, len);
1437 			tfd = ATA_S_READY | ATA_S_DSC;
1438 			break;
1439 		}
1440 		case MODEPAGE_CD_CAPABILITIES:
1441 		{
1442 			uint8_t buf[30];
1443 
1444 			if (len > sizeof(buf))
1445 				len = sizeof(buf);
1446 
1447 			memset(buf, 0, sizeof(buf));
1448 			be16enc(buf, 30 - 2);
1449 			buf[2] = 0x70;
1450 			buf[8] = 0x2A;
1451 			buf[9] = 30 - 10;
1452 			buf[10] = 0x08;
1453 			buf[12] = 0x71;
1454 			be16enc(&buf[18], 2);
1455 			be16enc(&buf[20], 512);
1456 			write_prdt(p, slot, cfis, buf, len);
1457 			tfd = ATA_S_READY | ATA_S_DSC;
1458 			break;
1459 		}
1460 		default:
1461 			goto error;
1462 			break;
1463 		}
1464 		break;
1465 	case 3:
1466 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1467 		p->asc = 0x39;
1468 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1469 		break;
1470 error:
1471 	case 1:
1472 	case 2:
1473 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1474 		p->asc = 0x24;
1475 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1476 		break;
1477 	}
1478 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1479 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1480 }
1481 
1482 static void
1483 atapi_get_event_status_notification(struct ahci_port *p, int slot,
1484     uint8_t *cfis)
1485 {
1486 	uint8_t *acmd;
1487 	uint32_t tfd;
1488 
1489 	acmd = cfis + 0x40;
1490 
1491 	/* we don't support asynchronous operation */
1492 	if (!(acmd[1] & 1)) {
1493 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1494 		p->asc = 0x24;
1495 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1496 	} else {
1497 		uint8_t buf[8];
1498 		int len;
1499 
1500 		len = be16dec(acmd + 7);
1501 		if (len > sizeof(buf))
1502 			len = sizeof(buf);
1503 
1504 		memset(buf, 0, sizeof(buf));
1505 		be16enc(buf, 8 - 2);
1506 		buf[2] = 0x04;
1507 		buf[3] = 0x10;
1508 		buf[5] = 0x02;
1509 		write_prdt(p, slot, cfis, buf, len);
1510 		tfd = ATA_S_READY | ATA_S_DSC;
1511 	}
1512 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1513 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1514 }
1515 
1516 static void
1517 handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1518 {
1519 	uint8_t *acmd;
1520 
1521 	acmd = cfis + 0x40;
1522 
1523 #ifdef AHCI_DEBUG
1524 	{
1525 		int i;
1526 		DPRINTF("ACMD:");
1527 		for (i = 0; i < 16; i++)
1528 			DPRINTF("%02x ", acmd[i]);
1529 		DPRINTF("\n");
1530 	}
1531 #endif
1532 
1533 	switch (acmd[0]) {
1534 	case TEST_UNIT_READY:
1535 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1536 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1537 		break;
1538 	case INQUIRY:
1539 		atapi_inquiry(p, slot, cfis);
1540 		break;
1541 	case READ_CAPACITY:
1542 		atapi_read_capacity(p, slot, cfis);
1543 		break;
1544 	case PREVENT_ALLOW:
1545 		/* TODO */
1546 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1547 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1548 		break;
1549 	case READ_TOC:
1550 		atapi_read_toc(p, slot, cfis);
1551 		break;
1552 	case REPORT_LUNS:
1553 		atapi_report_luns(p, slot, cfis);
1554 		break;
1555 	case READ_10:
1556 	case READ_12:
1557 		atapi_read(p, slot, cfis, 0);
1558 		break;
1559 	case REQUEST_SENSE:
1560 		atapi_request_sense(p, slot, cfis);
1561 		break;
1562 	case START_STOP_UNIT:
1563 		atapi_start_stop_unit(p, slot, cfis);
1564 		break;
1565 	case MODE_SENSE_10:
1566 		atapi_mode_sense(p, slot, cfis);
1567 		break;
1568 	case GET_EVENT_STATUS_NOTIFICATION:
1569 		atapi_get_event_status_notification(p, slot, cfis);
1570 		break;
1571 	default:
1572 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1573 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1574 		p->asc = 0x20;
1575 		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
1576 				ATA_S_READY | ATA_S_ERROR);
1577 		break;
1578 	}
1579 }
1580 
1581 static void
1582 ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
1583 {
1584 
1585 	p->tfd |= ATA_S_BUSY;
1586 	switch (cfis[2]) {
1587 	case ATA_ATA_IDENTIFY:
1588 		handle_identify(p, slot, cfis);
1589 		break;
1590 	case ATA_SETFEATURES:
1591 	{
1592 		switch (cfis[3]) {
1593 		case ATA_SF_ENAB_SATA_SF:
1594 			switch (cfis[12]) {
1595 			case ATA_SATA_SF_AN:
1596 				p->tfd = ATA_S_DSC | ATA_S_READY;
1597 				break;
1598 			default:
1599 				p->tfd = ATA_S_ERROR | ATA_S_READY;
1600 				p->tfd |= (ATA_ERROR_ABORT << 8);
1601 				break;
1602 			}
1603 			break;
1604 		case ATA_SF_ENAB_WCACHE:
1605 		case ATA_SF_DIS_WCACHE:
1606 		case ATA_SF_ENAB_RCACHE:
1607 		case ATA_SF_DIS_RCACHE:
1608 			p->tfd = ATA_S_DSC | ATA_S_READY;
1609 			break;
1610 		case ATA_SF_SETXFER:
1611 		{
1612 			switch (cfis[12] & 0xf8) {
1613 			case ATA_PIO:
1614 			case ATA_PIO0:
1615 				break;
1616 			case ATA_WDMA0:
1617 			case ATA_UDMA0:
1618 				p->xfermode = (cfis[12] & 0x7);
1619 				break;
1620 			}
1621 			p->tfd = ATA_S_DSC | ATA_S_READY;
1622 			break;
1623 		}
1624 		default:
1625 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1626 			p->tfd |= (ATA_ERROR_ABORT << 8);
1627 			break;
1628 		}
1629 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1630 		break;
1631 	}
1632 	case ATA_SET_MULTI:
1633 		if (cfis[12] != 0 &&
1634 			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
1635 			p->tfd = ATA_S_ERROR | ATA_S_READY;
1636 			p->tfd |= (ATA_ERROR_ABORT << 8);
1637 		} else {
1638 			p->mult_sectors = cfis[12];
1639 			p->tfd = ATA_S_DSC | ATA_S_READY;
1640 		}
1641 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
1642 		break;
1643 	case ATA_READ:
1644 	case ATA_WRITE:
1645 	case ATA_READ48:
1646 	case ATA_WRITE48:
1647 	case ATA_READ_MUL:
1648 	case ATA_WRITE_MUL:
1649 	case ATA_READ_MUL48:
1650 	case ATA_WRITE_MUL48:
1651 	case ATA_READ_DMA:
1652 	case ATA_WRITE_DMA:
1653 	case ATA_READ_DMA48:
1654 	case ATA_WRITE_DMA48:
1655 	case ATA_READ_FPDMA_QUEUED:
1656 	case ATA_WRITE_FPDMA_QUEUED:
1657 		ahci_handle_rw(p, slot, cfis, 0);
1658 		break;
1659 	case ATA_FLUSHCACHE:
1660 	case ATA_FLUSHCACHE48:
1661 		ahci_handle_flush(p, slot, cfis);
1662 		break;
1663 	case ATA_DATA_SET_MANAGEMENT:
1664 		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
1665 		    cfis[13] == 0 && cfis[12] == 1) {
1666 			ahci_handle_dsm_trim(p, slot, cfis, 0);
1667 			break;
1668 		}
1669 		ahci_write_fis_d2h(p, slot, cfis,
1670 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1671 		break;
1672 	case ATA_SEND_FPDMA_QUEUED:
1673 		if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
1674 		    cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
1675 		    cfis[11] == 0 && cfis[13] == 1) {
1676 			ahci_handle_dsm_trim(p, slot, cfis, 0);
1677 			break;
1678 		}
1679 		ahci_write_fis_d2h(p, slot, cfis,
1680 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1681 		break;
1682 	case ATA_READ_LOG_EXT:
1683 	case ATA_READ_LOG_DMA_EXT:
1684 		ahci_handle_read_log(p, slot, cfis);
1685 		break;
1686 	case ATA_SECURITY_FREEZE_LOCK:
1687 	case ATA_SMART_CMD:
1688 	case ATA_NOP:
1689 		ahci_write_fis_d2h(p, slot, cfis,
1690 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1691 		break;
1692 	case ATA_CHECK_POWER_MODE:
1693 		cfis[12] = 0xff;	/* always on */
1694 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1695 		break;
1696 	case ATA_STANDBY_CMD:
1697 	case ATA_STANDBY_IMMEDIATE:
1698 	case ATA_IDLE_CMD:
1699 	case ATA_IDLE_IMMEDIATE:
1700 	case ATA_SLEEP:
1701 	case ATA_READ_VERIFY:
1702 	case ATA_READ_VERIFY48:
1703 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
1704 		break;
1705 	case ATA_ATAPI_IDENTIFY:
1706 		handle_atapi_identify(p, slot, cfis);
1707 		break;
1708 	case ATA_PACKET_CMD:
1709 		if (!p->atapi) {
1710 			ahci_write_fis_d2h(p, slot, cfis,
1711 			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1712 		} else
1713 			handle_packet_cmd(p, slot, cfis);
1714 		break;
1715 	default:
1716 		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
1717 		ahci_write_fis_d2h(p, slot, cfis,
1718 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
1719 		break;
1720 	}
1721 }
1722 
1723 static void
1724 ahci_handle_slot(struct ahci_port *p, int slot)
1725 {
1726 	struct ahci_cmd_hdr *hdr;
1727 	struct ahci_prdt_entry *prdt;
1728 	struct pci_ahci_softc *sc;
1729 	uint8_t *cfis;
1730 	int cfl;
1731 
1732 	sc = p->pr_sc;
1733 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1734 	cfl = (hdr->flags & 0x1f) * 4;
1735 	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
1736 			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
1737 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
1738 
1739 #ifdef AHCI_DEBUG
1740 	DPRINTF("\ncfis:");
1741 	for (i = 0; i < cfl; i++) {
1742 		if (i % 10 == 0)
1743 			DPRINTF("\n");
1744 		DPRINTF("%02x ", cfis[i]);
1745 	}
1746 	DPRINTF("\n");
1747 
1748 	for (i = 0; i < hdr->prdtl; i++) {
1749 		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
1750 		prdt++;
1751 	}
1752 #endif
1753 
1754 	if (cfis[0] != FIS_TYPE_REGH2D) {
1755 		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
1756 		return;
1757 	}
1758 
1759 	if (cfis[1] & 0x80) {
1760 		ahci_handle_cmd(p, slot, cfis);
1761 	} else {
1762 		if (cfis[15] & (1 << 2))
1763 			p->reset = 1;
1764 		else if (p->reset) {
1765 			p->reset = 0;
1766 			ahci_port_reset(p);
1767 		}
1768 		p->ci &= ~(1 << slot);
1769 	}
1770 }
1771 
1772 static void
1773 ahci_handle_port(struct ahci_port *p)
1774 {
1775 
1776 	if (!(p->cmd & AHCI_P_CMD_ST))
1777 		return;
1778 
1779 	/*
1780 	 * Search for any new commands to issue ignoring those that
1781 	 * are already in-flight.  Stop if device is busy or in error.
1782 	 */
1783 	for (; (p->ci & ~p->pending) != 0; p->ccs = ((p->ccs + 1) & 31)) {
1784 		if ((p->tfd & (ATA_S_BUSY | ATA_S_DRQ)) != 0)
1785 			break;
1786 		if (p->waitforclear)
1787 			break;
1788 		if ((p->ci & ~p->pending & (1 << p->ccs)) != 0) {
1789 			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
1790 			p->cmd |= p->ccs << AHCI_P_CMD_CCS_SHIFT;
1791 			ahci_handle_slot(p, p->ccs);
1792 		}
1793 	}
1794 }
1795 
1796 /*
1797  * blockif callback routine - this runs in the context of the blockif
1798  * i/o thread, so the mutex needs to be acquired.
1799  */
1800 static void
1801 ata_ioreq_cb(struct blockif_req *br, int err)
1802 {
1803 	struct ahci_cmd_hdr *hdr;
1804 	struct ahci_ioreq *aior;
1805 	struct ahci_port *p;
1806 	struct pci_ahci_softc *sc;
1807 	uint32_t tfd;
1808 	uint8_t *cfis;
1809 	int slot, ncq, dsm;
1810 
1811 	DPRINTF("%s %d\n", __func__, err);
1812 
1813 	ncq = dsm = 0;
1814 	aior = br->br_param;
1815 	p = aior->io_pr;
1816 	cfis = aior->cfis;
1817 	slot = aior->slot;
1818 	sc = p->pr_sc;
1819 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
1820 
1821 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
1822 	    cfis[2] == ATA_READ_FPDMA_QUEUED ||
1823 	    cfis[2] == ATA_SEND_FPDMA_QUEUED)
1824 		ncq = 1;
1825 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT ||
1826 	    (cfis[2] == ATA_SEND_FPDMA_QUEUED &&
1827 	     (cfis[13] & 0x1f) == ATA_SFPDMA_DSM))
1828 		dsm = 1;
1829 
1830 	pthread_mutex_lock(&sc->mtx);
1831 
1832 	/*
1833 	 * Delete the blockif request from the busy list
1834 	 */
1835 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1836 
1837 	/*
1838 	 * Move the blockif request back to the free list
1839 	 */
1840 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1841 
1842 	if (!err)
1843 		hdr->prdbc = aior->done;
1844 
1845 	if (!err && aior->more) {
1846 		if (dsm)
1847 			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
1848 		else
1849 			ahci_handle_rw(p, slot, cfis, aior->done);
1850 		goto out;
1851 	}
1852 
1853 	if (!err)
1854 		tfd = ATA_S_READY | ATA_S_DSC;
1855 	else
1856 		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
1857 	if (ncq)
1858 		ahci_write_fis_sdb(p, slot, cfis, tfd);
1859 	else
1860 		ahci_write_fis_d2h(p, slot, cfis, tfd);
1861 
1862 	/*
1863 	 * This command is now complete.
1864 	 */
1865 	p->pending &= ~(1 << slot);
1866 
1867 	ahci_check_stopped(p);
1868 	ahci_handle_port(p);
1869 out:
1870 	pthread_mutex_unlock(&sc->mtx);
1871 	DPRINTF("%s exit\n", __func__);
1872 }
1873 
1874 static void
1875 atapi_ioreq_cb(struct blockif_req *br, int err)
1876 {
1877 	struct ahci_cmd_hdr *hdr;
1878 	struct ahci_ioreq *aior;
1879 	struct ahci_port *p;
1880 	struct pci_ahci_softc *sc;
1881 	uint8_t *cfis;
1882 	uint32_t tfd;
1883 	int slot;
1884 
1885 	DPRINTF("%s %d\n", __func__, err);
1886 
1887 	aior = br->br_param;
1888 	p = aior->io_pr;
1889 	cfis = aior->cfis;
1890 	slot = aior->slot;
1891 	sc = p->pr_sc;
1892 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
1893 
1894 	pthread_mutex_lock(&sc->mtx);
1895 
1896 	/*
1897 	 * Delete the blockif request from the busy list
1898 	 */
1899 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
1900 
1901 	/*
1902 	 * Move the blockif request back to the free list
1903 	 */
1904 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
1905 
1906 	if (!err)
1907 		hdr->prdbc = aior->done;
1908 
1909 	if (!err && aior->more) {
1910 		atapi_read(p, slot, cfis, aior->done);
1911 		goto out;
1912 	}
1913 
1914 	if (!err) {
1915 		tfd = ATA_S_READY | ATA_S_DSC;
1916 	} else {
1917 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
1918 		p->asc = 0x21;
1919 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
1920 	}
1921 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
1922 	ahci_write_fis_d2h(p, slot, cfis, tfd);
1923 
1924 	/*
1925 	 * This command is now complete.
1926 	 */
1927 	p->pending &= ~(1 << slot);
1928 
1929 	ahci_check_stopped(p);
1930 	ahci_handle_port(p);
1931 out:
1932 	pthread_mutex_unlock(&sc->mtx);
1933 	DPRINTF("%s exit\n", __func__);
1934 }
1935 
1936 static void
1937 pci_ahci_ioreq_init(struct ahci_port *pr)
1938 {
1939 	struct ahci_ioreq *vr;
1940 	int i;
1941 
1942 	pr->ioqsz = blockif_queuesz(pr->bctx);
1943 	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
1944 	STAILQ_INIT(&pr->iofhd);
1945 
1946 	/*
1947 	 * Add all i/o request entries to the free queue
1948 	 */
1949 	for (i = 0; i < pr->ioqsz; i++) {
1950 		vr = &pr->ioreq[i];
1951 		vr->io_pr = pr;
1952 		if (!pr->atapi)
1953 			vr->io_req.br_callback = ata_ioreq_cb;
1954 		else
1955 			vr->io_req.br_callback = atapi_ioreq_cb;
1956 		vr->io_req.br_param = vr;
1957 		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
1958 	}
1959 
1960 	TAILQ_INIT(&pr->iobhd);
1961 }
1962 
1963 static void
1964 pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
1965 {
1966 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
1967 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
1968 	struct ahci_port *p = &sc->port[port];
1969 
1970 	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
1971 		port, offset, value);
1972 
1973 	switch (offset) {
1974 	case AHCI_P_CLB:
1975 		p->clb = value;
1976 		break;
1977 	case AHCI_P_CLBU:
1978 		p->clbu = value;
1979 		break;
1980 	case AHCI_P_FB:
1981 		p->fb = value;
1982 		break;
1983 	case AHCI_P_FBU:
1984 		p->fbu = value;
1985 		break;
1986 	case AHCI_P_IS:
1987 		p->is &= ~value;
1988 		break;
1989 	case AHCI_P_IE:
1990 		p->ie = value & 0xFDC000FF;
1991 		ahci_generate_intr(sc);
1992 		break;
1993 	case AHCI_P_CMD:
1994 	{
1995 		p->cmd &= ~(AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
1996 		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
1997 		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
1998 		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK);
1999 		p->cmd |= (AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD |
2000 		    AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE |
2001 		    AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE |
2002 		    AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK) & value;
2003 
2004 		if (!(value & AHCI_P_CMD_ST)) {
2005 			ahci_port_stop(p);
2006 		} else {
2007 			uint64_t clb;
2008 
2009 			p->cmd |= AHCI_P_CMD_CR;
2010 			clb = (uint64_t)p->clbu << 32 | p->clb;
2011 			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
2012 					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
2013 		}
2014 
2015 		if (value & AHCI_P_CMD_FRE) {
2016 			uint64_t fb;
2017 
2018 			p->cmd |= AHCI_P_CMD_FR;
2019 			fb = (uint64_t)p->fbu << 32 | p->fb;
2020 			/* we don't support FBSCP, so rfis size is 256Bytes */
2021 			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
2022 		} else {
2023 			p->cmd &= ~AHCI_P_CMD_FR;
2024 		}
2025 
2026 		if (value & AHCI_P_CMD_CLO) {
2027 			p->tfd &= ~(ATA_S_BUSY | ATA_S_DRQ);
2028 			p->cmd &= ~AHCI_P_CMD_CLO;
2029 		}
2030 
2031 		if (value & AHCI_P_CMD_ICC_MASK) {
2032 			p->cmd &= ~AHCI_P_CMD_ICC_MASK;
2033 		}
2034 
2035 		ahci_handle_port(p);
2036 		break;
2037 	}
2038 	case AHCI_P_TFD:
2039 	case AHCI_P_SIG:
2040 	case AHCI_P_SSTS:
2041 		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
2042 		break;
2043 	case AHCI_P_SCTL:
2044 		p->sctl = value;
2045 		if (!(p->cmd & AHCI_P_CMD_ST)) {
2046 			if (value & ATA_SC_DET_RESET)
2047 				ahci_port_reset(p);
2048 		}
2049 		break;
2050 	case AHCI_P_SERR:
2051 		p->serr &= ~value;
2052 		break;
2053 	case AHCI_P_SACT:
2054 		p->sact |= value;
2055 		break;
2056 	case AHCI_P_CI:
2057 		p->ci |= value;
2058 		ahci_handle_port(p);
2059 		break;
2060 	case AHCI_P_SNTF:
2061 	case AHCI_P_FBS:
2062 	default:
2063 		break;
2064 	}
2065 }
2066 
2067 static void
2068 pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
2069 {
2070 	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
2071 		offset, value);
2072 
2073 	switch (offset) {
2074 	case AHCI_CAP:
2075 	case AHCI_PI:
2076 	case AHCI_VS:
2077 	case AHCI_CAP2:
2078 		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
2079 		break;
2080 	case AHCI_GHC:
2081 		if (value & AHCI_GHC_HR)
2082 			ahci_reset(sc);
2083 		else if (value & AHCI_GHC_IE) {
2084 			sc->ghc |= AHCI_GHC_IE;
2085 			ahci_generate_intr(sc);
2086 		}
2087 		break;
2088 	case AHCI_IS:
2089 		sc->is &= ~value;
2090 		ahci_generate_intr(sc);
2091 		break;
2092 	default:
2093 		break;
2094 	}
2095 }
2096 
2097 static void
2098 pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
2099 		int baridx, uint64_t offset, int size, uint64_t value)
2100 {
2101 	struct pci_ahci_softc *sc = pi->pi_arg;
2102 
2103 	assert(baridx == 5);
2104 	assert((offset % 4) == 0 && size == 4);
2105 
2106 	pthread_mutex_lock(&sc->mtx);
2107 
2108 	if (offset < AHCI_OFFSET)
2109 		pci_ahci_host_write(sc, offset, value);
2110 	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2111 		pci_ahci_port_write(sc, offset, value);
2112 	else
2113 		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
2114 
2115 	pthread_mutex_unlock(&sc->mtx);
2116 }
2117 
2118 static uint64_t
2119 pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
2120 {
2121 	uint32_t value;
2122 
2123 	switch (offset) {
2124 	case AHCI_CAP:
2125 	case AHCI_GHC:
2126 	case AHCI_IS:
2127 	case AHCI_PI:
2128 	case AHCI_VS:
2129 	case AHCI_CCCC:
2130 	case AHCI_CCCP:
2131 	case AHCI_EM_LOC:
2132 	case AHCI_EM_CTL:
2133 	case AHCI_CAP2:
2134 	{
2135 		uint32_t *p = &sc->cap;
2136 		p += (offset - AHCI_CAP) / sizeof(uint32_t);
2137 		value = *p;
2138 		break;
2139 	}
2140 	default:
2141 		value = 0;
2142 		break;
2143 	}
2144 	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
2145 		offset, value);
2146 
2147 	return (value);
2148 }
2149 
2150 static uint64_t
2151 pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
2152 {
2153 	uint32_t value;
2154 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
2155 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
2156 
2157 	switch (offset) {
2158 	case AHCI_P_CLB:
2159 	case AHCI_P_CLBU:
2160 	case AHCI_P_FB:
2161 	case AHCI_P_FBU:
2162 	case AHCI_P_IS:
2163 	case AHCI_P_IE:
2164 	case AHCI_P_CMD:
2165 	case AHCI_P_TFD:
2166 	case AHCI_P_SIG:
2167 	case AHCI_P_SSTS:
2168 	case AHCI_P_SCTL:
2169 	case AHCI_P_SERR:
2170 	case AHCI_P_SACT:
2171 	case AHCI_P_CI:
2172 	case AHCI_P_SNTF:
2173 	case AHCI_P_FBS:
2174 	{
2175 		uint32_t *p= &sc->port[port].clb;
2176 		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
2177 		value = *p;
2178 		break;
2179 	}
2180 	default:
2181 		value = 0;
2182 		break;
2183 	}
2184 
2185 	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
2186 		port, offset, value);
2187 
2188 	return value;
2189 }
2190 
2191 static uint64_t
2192 pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
2193     uint64_t regoff, int size)
2194 {
2195 	struct pci_ahci_softc *sc = pi->pi_arg;
2196 	uint64_t offset;
2197 	uint32_t value;
2198 
2199 	assert(baridx == 5);
2200 	assert(size == 1 || size == 2 || size == 4);
2201 	assert((regoff & (size - 1)) == 0);
2202 
2203 	pthread_mutex_lock(&sc->mtx);
2204 
2205 	offset = regoff & ~0x3;	    /* round down to a multiple of 4 bytes */
2206 	if (offset < AHCI_OFFSET)
2207 		value = pci_ahci_host_read(sc, offset);
2208 	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
2209 		value = pci_ahci_port_read(sc, offset);
2210 	else {
2211 		value = 0;
2212 		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n",
2213 		    regoff);
2214 	}
2215 	value >>= 8 * (regoff & 0x3);
2216 
2217 	pthread_mutex_unlock(&sc->mtx);
2218 
2219 	return (value);
2220 }
2221 
2222 static int
2223 pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
2224 {
2225 	char bident[sizeof("XX:X:X")];
2226 	struct blockif_ctxt *bctxt;
2227 	struct pci_ahci_softc *sc;
2228 	int ret, slots;
2229 	MD5_CTX mdctx;
2230 	u_char digest[16];
2231 
2232 	ret = 0;
2233 
2234 	if (opts == NULL) {
2235 		fprintf(stderr, "pci_ahci: backing device required\n");
2236 		return (1);
2237 	}
2238 
2239 #ifdef AHCI_DEBUG
2240 	dbg = fopen("/tmp/log", "w+");
2241 #endif
2242 
2243 	sc = calloc(1, sizeof(struct pci_ahci_softc));
2244 	pi->pi_arg = sc;
2245 	sc->asc_pi = pi;
2246 	sc->ports = MAX_PORTS;
2247 
2248 	/*
2249 	 * Only use port 0 for a backing device. All other ports will be
2250 	 * marked as unused
2251 	 */
2252 	sc->port[0].atapi = atapi;
2253 
2254 	/*
2255 	 * Attempt to open the backing image. Use the PCI
2256 	 * slot/func for the identifier string.
2257 	 */
2258 	snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
2259 	bctxt = blockif_open(opts, bident);
2260 	if (bctxt == NULL) {
2261 		ret = 1;
2262 		goto open_fail;
2263 	}
2264 	sc->port[0].bctx = bctxt;
2265 	sc->port[0].pr_sc = sc;
2266 
2267 	/*
2268 	 * Create an identifier for the backing file. Use parts of the
2269 	 * md5 sum of the filename
2270 	 */
2271 	MD5Init(&mdctx);
2272 	MD5Update(&mdctx, opts, strlen(opts));
2273 	MD5Final(digest, &mdctx);
2274 	sprintf(sc->port[0].ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X",
2275 	    digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]);
2276 
2277 	/*
2278 	 * Allocate blockif request structures and add them
2279 	 * to the free list
2280 	 */
2281 	pci_ahci_ioreq_init(&sc->port[0]);
2282 
2283 	pthread_mutex_init(&sc->mtx, NULL);
2284 
2285 	/* Intel ICH8 AHCI */
2286 	slots = sc->port[0].ioqsz;
2287 	if (slots > 32)
2288 		slots = 32;
2289 	--slots;
2290 	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
2291 	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
2292 	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
2293 	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
2294 	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
2295 
2296 	/* Only port 0 implemented */
2297 	sc->pi = 1;
2298 	sc->vs = 0x10300;
2299 	sc->cap2 = AHCI_CAP2_APST;
2300 	ahci_reset(sc);
2301 
2302 	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
2303 	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
2304 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
2305 	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
2306 	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
2307 	pci_emul_add_msicap(pi, 1);
2308 	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
2309 	    AHCI_OFFSET + sc->ports * AHCI_STEP);
2310 
2311 	pci_lintr_request(pi);
2312 
2313 open_fail:
2314 	if (ret) {
2315 		if (sc->port[0].bctx != NULL)
2316 			blockif_close(sc->port[0].bctx);
2317 		free(sc);
2318 	}
2319 
2320 	return (ret);
2321 }
2322 
2323 static int
2324 pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2325 {
2326 
2327 	return (pci_ahci_init(ctx, pi, opts, 0));
2328 }
2329 
2330 static int
2331 pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
2332 {
2333 
2334 	return (pci_ahci_init(ctx, pi, opts, 1));
2335 }
2336 
2337 /*
2338  * Use separate emulation names to distinguish drive and atapi devices
2339  */
2340 struct pci_devemu pci_de_ahci_hd = {
2341 	.pe_emu =	"ahci-hd",
2342 	.pe_init =	pci_ahci_hd_init,
2343 	.pe_barwrite =	pci_ahci_write,
2344 	.pe_barread =	pci_ahci_read
2345 };
2346 PCI_EMUL_SET(pci_de_ahci_hd);
2347 
2348 struct pci_devemu pci_de_ahci_cd = {
2349 	.pe_emu =	"ahci-cd",
2350 	.pe_init =	pci_ahci_atapi_init,
2351 	.pe_barwrite =	pci_ahci_write,
2352 	.pe_barread =	pci_ahci_read
2353 };
2354 PCI_EMUL_SET(pci_de_ahci_cd);
2355