1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
5  * Copyright (c) 2012 NetApp Inc.
6  * Copyright (c) 2012 Citrix Inc.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice unmodified, this list of conditions, and the following
14  *    disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 /**
32  * StorVSC driver for Hyper-V.  This driver presents a SCSI HBA interface
33  * to the Comman Access Method (CAM) layer.  CAM control blocks (CCBs) are
34  * converted into VSCSI protocol messages which are delivered to the parent
35  * partition StorVSP driver over the Hyper-V VMBUS.
36  */
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include <sys/param.h>
41 #include <sys/proc.h>
42 #include <sys/condvar.h>
43 #include <sys/time.h>
44 #include <sys/systm.h>
45 #include <sys/sysctl.h>
46 #include <sys/sockio.h>
47 #include <sys/mbuf.h>
48 #include <sys/malloc.h>
49 #include <sys/module.h>
50 #include <sys/kernel.h>
51 #include <sys/queue.h>
52 #include <sys/lock.h>
53 #include <sys/sx.h>
54 #include <sys/taskqueue.h>
55 #include <sys/bus.h>
56 #include <sys/mutex.h>
57 #include <sys/callout.h>
58 #include <sys/smp.h>
59 #include <vm/vm.h>
60 #include <vm/pmap.h>
61 #include <vm/uma.h>
62 #include <sys/lock.h>
63 #include <sys/sema.h>
64 #include <sys/eventhandler.h>
65 #include <machine/bus.h>
66 
67 #include <cam/cam.h>
68 #include <cam/cam_ccb.h>
69 #include <cam/cam_periph.h>
70 #include <cam/cam_sim.h>
71 #include <cam/cam_xpt_sim.h>
72 #include <cam/cam_xpt_internal.h>
73 #include <cam/cam_debug.h>
74 #include <cam/scsi/scsi_all.h>
75 #include <cam/scsi/scsi_message.h>
76 
77 #include <dev/hyperv/include/hyperv.h>
78 #include <dev/hyperv/include/vmbus.h>
79 #include "hv_vstorage.h"
80 #include "vmbus_if.h"
81 
82 #define STORVSC_MAX_LUNS_PER_TARGET	(64)
83 #define STORVSC_MAX_IO_REQUESTS		(STORVSC_MAX_LUNS_PER_TARGET * 2)
84 #define BLKVSC_MAX_IDE_DISKS_PER_TARGET	(1)
85 #define BLKVSC_MAX_IO_REQUESTS		STORVSC_MAX_IO_REQUESTS
86 #define STORVSC_MAX_TARGETS		(2)
87 
88 #define VSTOR_PKT_SIZE	(sizeof(struct vstor_packet) - vmscsi_size_delta)
89 
90 /*
91  * 33 segments are needed to allow 128KB maxio, in case the data
92  * in the first page is _not_ PAGE_SIZE aligned, e.g.
93  *
94  *     |<----------- 128KB ----------->|
95  *     |                               |
96  *  0  2K 4K    8K   16K   124K  128K  130K
97  *  |  |  |     |     |       |     |  |
98  *  +--+--+-----+-----+.......+-----+--+--+
99  *  |  |  |     |     |       |     |  |  | DATA
100  *  |  |  |     |     |       |     |  |  |
101  *  +--+--+-----+-----+.......------+--+--+
102  *     |  |                         |  |
103  *     | 1|            31           | 1| ...... # of segments
104  */
105 #define STORVSC_DATA_SEGCNT_MAX		33
106 #define STORVSC_DATA_SEGSZ_MAX		PAGE_SIZE
107 #define STORVSC_DATA_SIZE_MAX		\
108 	((STORVSC_DATA_SEGCNT_MAX - 1) * STORVSC_DATA_SEGSZ_MAX)
109 
110 struct storvsc_softc;
111 
112 struct hv_sglist {
113 	struct iovec sg_iov[STORVSC_DATA_SEGCNT_MAX];
114 	u_short	sg_nseg;
115 	u_short	sg_maxseg;
116 };
117 
118 struct hv_sgl_node {
119 	LIST_ENTRY(hv_sgl_node) link;
120 	struct hv_sglist *sgl_data;
121 };
122 
123 struct hv_sgl_page_pool{
124 	LIST_HEAD(, hv_sgl_node) in_use_sgl_list;
125 	LIST_HEAD(, hv_sgl_node) free_sgl_list;
126 	boolean_t                is_init;
127 } g_hv_sgl_page_pool;
128 
129 enum storvsc_request_type {
130 	WRITE_TYPE,
131 	READ_TYPE,
132 	UNKNOWN_TYPE
133 };
134 
135 SYSCTL_NODE(_hw, OID_AUTO, storvsc, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
136 	"Hyper-V storage interface");
137 
138 static u_int hv_storvsc_use_win8ext_flags = 1;
139 SYSCTL_UINT(_hw_storvsc, OID_AUTO, use_win8ext_flags, CTLFLAG_RW,
140 	&hv_storvsc_use_win8ext_flags, 0,
141 	"Use win8 extension flags or not");
142 
143 static u_int hv_storvsc_use_pim_unmapped = 1;
144 SYSCTL_UINT(_hw_storvsc, OID_AUTO, use_pim_unmapped, CTLFLAG_RDTUN,
145 	&hv_storvsc_use_pim_unmapped, 0,
146 	"Optimize storvsc by using unmapped I/O");
147 
148 static u_int hv_storvsc_ringbuffer_size = (64 * PAGE_SIZE);
149 SYSCTL_UINT(_hw_storvsc, OID_AUTO, ringbuffer_size, CTLFLAG_RDTUN,
150 	&hv_storvsc_ringbuffer_size, 0, "Hyper-V storage ringbuffer size");
151 
152 static u_int hv_storvsc_max_io = 512;
153 SYSCTL_UINT(_hw_storvsc, OID_AUTO, max_io, CTLFLAG_RDTUN,
154 	&hv_storvsc_max_io, 0, "Hyper-V storage max io limit");
155 
156 static int hv_storvsc_chan_cnt = 0;
157 SYSCTL_INT(_hw_storvsc, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
158 	&hv_storvsc_chan_cnt, 0, "# of channels to use");
159 #ifdef DIAGNOSTIC
160 static int hv_storvsc_srb_status = -1;
161 SYSCTL_INT(_hw_storvsc, OID_AUTO, srb_status,  CTLFLAG_RW,
162 	&hv_storvsc_srb_status, 0, "srb_status to inject");
163 TUNABLE_INT("hw_storvsc.srb_status", &hv_storvsc_srb_status);
164 #endif /* DIAGNOSTIC */
165 
166 #define STORVSC_MAX_IO						\
167 	vmbus_chan_prplist_nelem(hv_storvsc_ringbuffer_size,	\
168 	   STORVSC_DATA_SEGCNT_MAX, VSTOR_PKT_SIZE)
169 
170 struct hv_storvsc_sysctl {
171 	u_long		data_bio_cnt;
172 	u_long		data_vaddr_cnt;
173 	u_long		data_sg_cnt;
174 	u_long		chan_send_cnt[MAXCPU];
175 };
176 
177 struct storvsc_gpa_range {
178 	struct vmbus_gpa_range	gpa_range;
179 	uint64_t		gpa_page[STORVSC_DATA_SEGCNT_MAX];
180 } __packed;
181 
182 struct hv_storvsc_request {
183 	LIST_ENTRY(hv_storvsc_request)	link;
184 	struct vstor_packet		vstor_packet;
185 	int				prp_cnt;
186 	struct storvsc_gpa_range	prp_list;
187 	void				*sense_data;
188 	uint8_t				sense_info_len;
189 	uint8_t				retries;
190 	union ccb			*ccb;
191 	struct storvsc_softc		*softc;
192 	struct callout			callout;
193 	struct sema			synch_sema; /*Synchronize the request/response if needed */
194 	struct hv_sglist		*bounce_sgl;
195 	unsigned int			bounce_sgl_count;
196 	uint64_t			not_aligned_seg_bits;
197 	bus_dmamap_t			data_dmap;
198 };
199 
200 struct storvsc_softc {
201 	struct vmbus_channel		*hs_chan;
202 	LIST_HEAD(, hv_storvsc_request)	hs_free_list;
203 	struct mtx			hs_lock;
204 	struct storvsc_driver_props	*hs_drv_props;
205 	int 				hs_unit;
206 	uint32_t			hs_frozen;
207 	struct cam_sim			*hs_sim;
208 	struct cam_path 		*hs_path;
209 	uint32_t			hs_num_out_reqs;
210 	boolean_t			hs_destroy;
211 	boolean_t			hs_drain_notify;
212 	struct sema 			hs_drain_sema;
213 	struct hv_storvsc_request	hs_init_req;
214 	struct hv_storvsc_request	hs_reset_req;
215 	device_t			hs_dev;
216 	bus_dma_tag_t			storvsc_req_dtag;
217 	struct hv_storvsc_sysctl	sysctl_data;
218 	uint32_t			hs_nchan;
219 	struct vmbus_channel		*hs_sel_chan[MAXCPU];
220 };
221 
222 static eventhandler_tag storvsc_handler_tag;
223 /*
224  * The size of the vmscsi_request has changed in win8. The
225  * additional size is for the newly added elements in the
226  * structure. These elements are valid only when we are talking
227  * to a win8 host.
228  * Track the correct size we need to apply.
229  */
230 static int vmscsi_size_delta = sizeof(struct vmscsi_win8_extension);
231 
232 /**
233  * HyperV storvsc timeout testing cases:
234  * a. IO returned after first timeout;
235  * b. IO returned after second timeout and queue freeze;
236  * c. IO returned while timer handler is running
237  * The first can be tested by "sg_senddiag -vv /dev/daX",
238  * and the second and third can be done by
239  * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX".
240  */
241 #define HVS_TIMEOUT_TEST 0
242 
243 /*
244  * Bus/adapter reset functionality on the Hyper-V host is
245  * buggy and it will be disabled until
246  * it can be further tested.
247  */
248 #define HVS_HOST_RESET 0
249 
250 struct storvsc_driver_props {
251 	char		*drv_name;
252 	char		*drv_desc;
253 	uint8_t		drv_max_luns_per_target;
254 	uint32_t	drv_max_ios_per_target;
255 	uint32_t	drv_ringbuffer_size;
256 };
257 
258 enum hv_storage_type {
259 	DRIVER_BLKVSC,
260 	DRIVER_STORVSC,
261 	DRIVER_UNKNOWN
262 };
263 
264 #define HS_MAX_ADAPTERS 10
265 
266 #define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1
267 
268 /* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */
269 static const struct hyperv_guid gStorVscDeviceType={
270 	.hv_guid = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
271 		 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f}
272 };
273 
274 /* {32412632-86cb-44a2-9b5c-50d1417354f5} */
275 static const struct hyperv_guid gBlkVscDeviceType={
276 	.hv_guid = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,
277 		 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5}
278 };
279 
280 static struct storvsc_driver_props g_drv_props_table[] = {
281 	{"blkvsc", "Hyper-V IDE",
282 	 BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS,
283 	 20*PAGE_SIZE},
284 	{"storvsc", "Hyper-V SCSI",
285 	 STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS,
286 	 20*PAGE_SIZE}
287 };
288 
289 /*
290  * Sense buffer size changed in win8; have a run-time
291  * variable to track the size we should use.
292  */
293 static int sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE;
294 
295 /*
296  * The storage protocol version is determined during the
297  * initial exchange with the host.  It will indicate which
298  * storage functionality is available in the host.
299 */
300 static int vmstor_proto_version;
301 
302 struct vmstor_proto {
303         int proto_version;
304         int sense_buffer_size;
305         int vmscsi_size_delta;
306 };
307 
308 static const struct vmstor_proto vmstor_proto_list[] = {
309         {
310                 VMSTOR_PROTOCOL_VERSION_WIN10,
311                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
312                 0
313         },
314         {
315                 VMSTOR_PROTOCOL_VERSION_WIN8_1,
316                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
317                 0
318         },
319         {
320                 VMSTOR_PROTOCOL_VERSION_WIN8,
321                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
322                 0
323         },
324         {
325                 VMSTOR_PROTOCOL_VERSION_WIN7,
326                 PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
327                 sizeof(struct vmscsi_win8_extension),
328         },
329         {
330                 VMSTOR_PROTOCOL_VERSION_WIN6,
331                 PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
332                 sizeof(struct vmscsi_win8_extension),
333         }
334 };
335 
336 /* static functions */
337 static int storvsc_probe(device_t dev);
338 static int storvsc_attach(device_t dev);
339 static int storvsc_detach(device_t dev);
340 static void storvsc_poll(struct cam_sim * sim);
341 static void storvsc_action(struct cam_sim * sim, union ccb * ccb);
342 static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
343 static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp);
344 static enum hv_storage_type storvsc_get_storage_type(device_t dev);
345 static void hv_storvsc_rescan_target(struct storvsc_softc *sc);
346 static void hv_storvsc_on_channel_callback(struct vmbus_channel *chan, void *xsc);
347 static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc,
348 					struct vstor_packet *vstor_packet,
349 					struct hv_storvsc_request *request);
350 static int hv_storvsc_connect_vsp(struct storvsc_softc *);
351 static void storvsc_io_done(struct hv_storvsc_request *reqp);
352 static void storvsc_copy_sgl_to_bounce_buf(struct hv_sglist *bounce_sgl,
353 				bus_dma_segment_t *orig_sgl,
354 				unsigned int orig_sgl_count,
355 				uint64_t seg_bits);
356 void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
357 				unsigned int dest_sgl_count,
358 				struct hv_sglist *src_sgl,
359 				uint64_t seg_bits);
360 
361 static device_method_t storvsc_methods[] = {
362 	/* Device interface */
363 	DEVMETHOD(device_probe,		storvsc_probe),
364 	DEVMETHOD(device_attach,	storvsc_attach),
365 	DEVMETHOD(device_detach,	storvsc_detach),
366 	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
367 	DEVMETHOD_END
368 };
369 
370 static driver_t storvsc_driver = {
371 	"storvsc", storvsc_methods, sizeof(struct storvsc_softc),
372 };
373 
374 static devclass_t storvsc_devclass;
375 DRIVER_MODULE(storvsc, vmbus, storvsc_driver, storvsc_devclass, 0, 0);
376 MODULE_VERSION(storvsc, 1);
377 MODULE_DEPEND(storvsc, vmbus, 1, 1, 1);
378 
379 static void
380 storvsc_subchan_attach(struct storvsc_softc *sc,
381     struct vmbus_channel *new_channel)
382 {
383 	struct vmstor_chan_props props;
384 
385 	memset(&props, 0, sizeof(props));
386 
387 	vmbus_chan_cpu_rr(new_channel);
388 	vmbus_chan_open(new_channel,
389 	    sc->hs_drv_props->drv_ringbuffer_size,
390   	    sc->hs_drv_props->drv_ringbuffer_size,
391 	    (void *)&props,
392 	    sizeof(struct vmstor_chan_props),
393 	    hv_storvsc_on_channel_callback, sc);
394 }
395 
396 /**
397  * @brief Send multi-channel creation request to host
398  *
399  * @param device  a Hyper-V device pointer
400  * @param max_chans  the max channels supported by vmbus
401  */
402 static void
403 storvsc_send_multichannel_request(struct storvsc_softc *sc, int max_subch)
404 {
405 	struct vmbus_channel **subchan;
406 	struct hv_storvsc_request *request;
407 	struct vstor_packet *vstor_packet;
408 	int request_subch;
409 	int i;
410 
411 	/* get sub-channel count that need to create */
412 	request_subch = MIN(max_subch, mp_ncpus - 1);
413 
414 	request = &sc->hs_init_req;
415 
416 	/* request the host to create multi-channel */
417 	memset(request, 0, sizeof(struct hv_storvsc_request));
418 
419 	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
420 
421 	vstor_packet = &request->vstor_packet;
422 
423 	vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS;
424 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
425 	vstor_packet->u.multi_channels_cnt = request_subch;
426 
427 	vmbus_chan_send(sc->hs_chan,
428 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
429 	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
430 
431 	sema_wait(&request->synch_sema);
432 
433 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
434 	    vstor_packet->status != 0) {
435 		printf("Storvsc_error: create multi-channel invalid operation "
436 		    "(%d) or statue (%u)\n",
437 		    vstor_packet->operation, vstor_packet->status);
438 		return;
439 	}
440 
441 	/* Update channel count */
442 	sc->hs_nchan = request_subch + 1;
443 
444 	/* Wait for sub-channels setup to complete. */
445 	subchan = vmbus_subchan_get(sc->hs_chan, request_subch);
446 
447 	/* Attach the sub-channels. */
448 	for (i = 0; i < request_subch; ++i)
449 		storvsc_subchan_attach(sc, subchan[i]);
450 
451 	/* Release the sub-channels. */
452 	vmbus_subchan_rel(subchan, request_subch);
453 
454 	if (bootverbose)
455 		printf("Storvsc create multi-channel success!\n");
456 }
457 
458 /**
459  * @brief initialize channel connection to parent partition
460  *
461  * @param dev  a Hyper-V device pointer
462  * @returns  0 on success, non-zero error on failure
463  */
464 static int
465 hv_storvsc_channel_init(struct storvsc_softc *sc)
466 {
467 	int ret = 0, i;
468 	struct hv_storvsc_request *request;
469 	struct vstor_packet *vstor_packet;
470 	uint16_t max_subch;
471 	boolean_t support_multichannel;
472 	uint32_t version;
473 
474 	max_subch = 0;
475 	support_multichannel = FALSE;
476 
477 	request = &sc->hs_init_req;
478 	memset(request, 0, sizeof(struct hv_storvsc_request));
479 	vstor_packet = &request->vstor_packet;
480 	request->softc = sc;
481 
482 	/**
483 	 * Initiate the vsc/vsp initialization protocol on the open channel
484 	 */
485 	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
486 
487 	vstor_packet->operation = VSTOR_OPERATION_BEGININITIALIZATION;
488 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
489 
490 
491 	ret = vmbus_chan_send(sc->hs_chan,
492 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
493 	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
494 
495 	if (ret != 0)
496 		goto cleanup;
497 
498 	sema_wait(&request->synch_sema);
499 
500 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
501 		vstor_packet->status != 0) {
502 		goto cleanup;
503 	}
504 
505 	for (i = 0; i < nitems(vmstor_proto_list); i++) {
506 		/* reuse the packet for version range supported */
507 
508 		memset(vstor_packet, 0, sizeof(struct vstor_packet));
509 		vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION;
510 		vstor_packet->flags = REQUEST_COMPLETION_FLAG;
511 
512 		vstor_packet->u.version.major_minor =
513 			vmstor_proto_list[i].proto_version;
514 
515 		/* revision is only significant for Windows guests */
516 		vstor_packet->u.version.revision = 0;
517 
518 		ret = vmbus_chan_send(sc->hs_chan,
519 		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
520 		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
521 
522 		if (ret != 0)
523 			goto cleanup;
524 
525 		sema_wait(&request->synch_sema);
526 
527 		if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO) {
528 			ret = EINVAL;
529 			goto cleanup;
530 		}
531 		if (vstor_packet->status == 0) {
532 			vmstor_proto_version =
533 				vmstor_proto_list[i].proto_version;
534 			sense_buffer_size =
535 				vmstor_proto_list[i].sense_buffer_size;
536 			vmscsi_size_delta =
537 				vmstor_proto_list[i].vmscsi_size_delta;
538 			break;
539 		}
540 	}
541 
542 	if (vstor_packet->status != 0) {
543 		ret = EINVAL;
544 		goto cleanup;
545 	}
546 	/**
547 	 * Query channel properties
548 	 */
549 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
550 	vstor_packet->operation = VSTOR_OPERATION_QUERYPROPERTIES;
551 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
552 
553 	ret = vmbus_chan_send(sc->hs_chan,
554 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
555 	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
556 
557 	if ( ret != 0)
558 		goto cleanup;
559 
560 	sema_wait(&request->synch_sema);
561 
562 	/* TODO: Check returned version */
563 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
564 	    vstor_packet->status != 0) {
565 		goto cleanup;
566 	}
567 
568 	max_subch = vstor_packet->u.chan_props.max_channel_cnt;
569 	if (hv_storvsc_chan_cnt > 0 && hv_storvsc_chan_cnt < (max_subch + 1))
570 		max_subch = hv_storvsc_chan_cnt - 1;
571 
572 	/* multi-channels feature is supported by WIN8 and above version */
573 	version = VMBUS_GET_VERSION(device_get_parent(sc->hs_dev), sc->hs_dev);
574 	if (version != VMBUS_VERSION_WIN7 && version != VMBUS_VERSION_WS2008 &&
575 	    (vstor_packet->u.chan_props.flags &
576 	     HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) {
577 		support_multichannel = TRUE;
578 	}
579 	if (bootverbose) {
580 		device_printf(sc->hs_dev, "max chans %d%s\n", max_subch + 1,
581 		    support_multichannel ? ", multi-chan capable" : "");
582 	}
583 
584 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
585 	vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION;
586 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
587 
588 	ret = vmbus_chan_send(sc->hs_chan,
589 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
590 	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
591 
592 	if (ret != 0) {
593 		goto cleanup;
594 	}
595 
596 	sema_wait(&request->synch_sema);
597 
598 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
599 	    vstor_packet->status != 0)
600 		goto cleanup;
601 
602 	/*
603 	 * If multi-channel is supported, send multichannel create
604 	 * request to host.
605 	 */
606 	if (support_multichannel && max_subch > 0)
607 		storvsc_send_multichannel_request(sc, max_subch);
608 cleanup:
609 	sema_destroy(&request->synch_sema);
610 	return (ret);
611 }
612 
613 /**
614  * @brief Open channel connection to paraent partition StorVSP driver
615  *
616  * Open and initialize channel connection to parent partition StorVSP driver.
617  *
618  * @param pointer to a Hyper-V device
619  * @returns 0 on success, non-zero error on failure
620  */
621 static int
622 hv_storvsc_connect_vsp(struct storvsc_softc *sc)
623 {
624 	int ret = 0;
625 	struct vmstor_chan_props props;
626 
627 	memset(&props, 0, sizeof(struct vmstor_chan_props));
628 
629 	/*
630 	 * Open the channel
631 	 */
632 	vmbus_chan_cpu_rr(sc->hs_chan);
633 	ret = vmbus_chan_open(
634 		sc->hs_chan,
635 		sc->hs_drv_props->drv_ringbuffer_size,
636 		sc->hs_drv_props->drv_ringbuffer_size,
637 		(void *)&props,
638 		sizeof(struct vmstor_chan_props),
639 		hv_storvsc_on_channel_callback, sc);
640 
641 	if (ret != 0) {
642 		return ret;
643 	}
644 
645 	ret = hv_storvsc_channel_init(sc);
646 	return (ret);
647 }
648 
649 #if HVS_HOST_RESET
650 static int
651 hv_storvsc_host_reset(struct storvsc_softc *sc)
652 {
653 	int ret = 0;
654 
655 	struct hv_storvsc_request *request;
656 	struct vstor_packet *vstor_packet;
657 
658 	request = &sc->hs_reset_req;
659 	request->softc = sc;
660 	vstor_packet = &request->vstor_packet;
661 
662 	sema_init(&request->synch_sema, 0, "stor synch sema");
663 
664 	vstor_packet->operation = VSTOR_OPERATION_RESETBUS;
665 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
666 
667 	ret = vmbus_chan_send(dev->channel,
668 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
669 	    vstor_packet, VSTOR_PKT_SIZE,
670 	    (uint64_t)(uintptr_t)&sc->hs_reset_req);
671 
672 	if (ret != 0) {
673 		goto cleanup;
674 	}
675 
676 	sema_wait(&request->synch_sema);
677 
678 	/*
679 	 * At this point, all outstanding requests in the adapter
680 	 * should have been flushed out and return to us
681 	 */
682 
683 cleanup:
684 	sema_destroy(&request->synch_sema);
685 	return (ret);
686 }
687 #endif /* HVS_HOST_RESET */
688 
689 /**
690  * @brief Function to initiate an I/O request
691  *
692  * @param device Hyper-V device pointer
693  * @param request pointer to a request structure
694  * @returns 0 on success, non-zero error on failure
695  */
696 static int
697 hv_storvsc_io_request(struct storvsc_softc *sc,
698 					  struct hv_storvsc_request *request)
699 {
700 	struct vstor_packet *vstor_packet = &request->vstor_packet;
701 	struct vmbus_channel* outgoing_channel = NULL;
702 	int ret = 0, ch_sel;
703 
704 	vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
705 
706 	vstor_packet->u.vm_srb.length =
707 	    sizeof(struct vmscsi_req) - vmscsi_size_delta;
708 
709 	vstor_packet->u.vm_srb.sense_info_len = sense_buffer_size;
710 
711 	vstor_packet->u.vm_srb.transfer_len =
712 	    request->prp_list.gpa_range.gpa_len;
713 
714 	vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB;
715 
716 	ch_sel = (vstor_packet->u.vm_srb.lun + curcpu) % sc->hs_nchan;
717 	/*
718 	 * If we are panic'ing, then we are dumping core. Since storvsc_polls
719 	 * always uses sc->hs_chan, then we must send to that channel or a poll
720 	 * timeout will occur.
721 	 */
722 	if (panicstr) {
723 		outgoing_channel = sc->hs_chan;
724 	} else {
725 		outgoing_channel = sc->hs_sel_chan[ch_sel];
726 	}
727 
728 	mtx_unlock(&request->softc->hs_lock);
729 	if (request->prp_list.gpa_range.gpa_len) {
730 		ret = vmbus_chan_send_prplist(outgoing_channel,
731 		    &request->prp_list.gpa_range, request->prp_cnt,
732 		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
733 	} else {
734 		ret = vmbus_chan_send(outgoing_channel,
735 		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
736 		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
737 	}
738 	/* statistic for successful request sending on each channel */
739 	if (!ret) {
740 		sc->sysctl_data.chan_send_cnt[ch_sel]++;
741 	}
742 	mtx_lock(&request->softc->hs_lock);
743 
744 	if (ret != 0) {
745 		printf("Unable to send packet %p ret %d", vstor_packet, ret);
746 	} else {
747 		atomic_add_int(&sc->hs_num_out_reqs, 1);
748 	}
749 
750 	return (ret);
751 }
752 
753 
754 /**
755  * Process IO_COMPLETION_OPERATION and ready
756  * the result to be completed for upper layer
757  * processing by the CAM layer.
758  */
759 static void
760 hv_storvsc_on_iocompletion(struct storvsc_softc *sc,
761 			   struct vstor_packet *vstor_packet,
762 			   struct hv_storvsc_request *request)
763 {
764 	struct vmscsi_req *vm_srb;
765 
766 	vm_srb = &vstor_packet->u.vm_srb;
767 
768 	/*
769 	 * Copy some fields of the host's response into the request structure,
770 	 * because the fields will be used later in storvsc_io_done().
771 	 */
772 	request->vstor_packet.u.vm_srb.scsi_status = vm_srb->scsi_status;
773 	request->vstor_packet.u.vm_srb.srb_status = vm_srb->srb_status;
774 	request->vstor_packet.u.vm_srb.transfer_len = vm_srb->transfer_len;
775 
776 	if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) &&
777 			(vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) {
778 		/* Autosense data available */
779 
780 		KASSERT(vm_srb->sense_info_len <= request->sense_info_len,
781 				("vm_srb->sense_info_len <= "
782 				 "request->sense_info_len"));
783 
784 		memcpy(request->sense_data, vm_srb->u.sense_data,
785 			vm_srb->sense_info_len);
786 
787 		request->sense_info_len = vm_srb->sense_info_len;
788 	}
789 
790 	/* Complete request by passing to the CAM layer */
791 	storvsc_io_done(request);
792 	atomic_subtract_int(&sc->hs_num_out_reqs, 1);
793 	if (sc->hs_drain_notify && (sc->hs_num_out_reqs == 0)) {
794 		sema_post(&sc->hs_drain_sema);
795 	}
796 }
797 
798 static void
799 hv_storvsc_rescan_target(struct storvsc_softc *sc)
800 {
801 	path_id_t pathid;
802 	target_id_t targetid;
803 	union ccb *ccb;
804 
805 	pathid = cam_sim_path(sc->hs_sim);
806 	targetid = CAM_TARGET_WILDCARD;
807 
808 	/*
809 	 * Allocate a CCB and schedule a rescan.
810 	 */
811 	ccb = xpt_alloc_ccb_nowait();
812 	if (ccb == NULL) {
813 		printf("unable to alloc CCB for rescan\n");
814 		return;
815 	}
816 
817 	if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, targetid,
818 	    CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
819 		printf("unable to create path for rescan, pathid: %u,"
820 		    "targetid: %u\n", pathid, targetid);
821 		xpt_free_ccb(ccb);
822 		return;
823 	}
824 
825 	if (targetid == CAM_TARGET_WILDCARD)
826 		ccb->ccb_h.func_code = XPT_SCAN_BUS;
827 	else
828 		ccb->ccb_h.func_code = XPT_SCAN_TGT;
829 
830 	xpt_rescan(ccb);
831 }
832 
833 static void
834 hv_storvsc_on_channel_callback(struct vmbus_channel *channel, void *xsc)
835 {
836 	int ret = 0;
837 	struct storvsc_softc *sc = xsc;
838 	uint32_t bytes_recvd;
839 	uint64_t request_id;
840 	uint8_t packet[roundup2(sizeof(struct vstor_packet), 8)];
841 	struct hv_storvsc_request *request;
842 	struct vstor_packet *vstor_packet;
843 
844 	bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8);
845 	ret = vmbus_chan_recv(channel, packet, &bytes_recvd, &request_id);
846 	KASSERT(ret != ENOBUFS, ("storvsc recvbuf is not large enough"));
847 	/* XXX check bytes_recvd to make sure that it contains enough data */
848 
849 	while ((ret == 0) && (bytes_recvd > 0)) {
850 		request = (struct hv_storvsc_request *)(uintptr_t)request_id;
851 
852 		if ((request == &sc->hs_init_req) ||
853 			(request == &sc->hs_reset_req)) {
854 			memcpy(&request->vstor_packet, packet,
855 				   sizeof(struct vstor_packet));
856 			sema_post(&request->synch_sema);
857 		} else {
858 			vstor_packet = (struct vstor_packet *)packet;
859 			switch(vstor_packet->operation) {
860 			case VSTOR_OPERATION_COMPLETEIO:
861 				if (request == NULL)
862 					panic("VMBUS: storvsc received a "
863 					    "packet with NULL request id in "
864 					    "COMPLETEIO operation.");
865 
866 				hv_storvsc_on_iocompletion(sc,
867 							vstor_packet, request);
868 				break;
869 			case VSTOR_OPERATION_REMOVEDEVICE:
870 				printf("VMBUS: storvsc operation %d not "
871 				    "implemented.\n", vstor_packet->operation);
872 				/* TODO: implement */
873 				break;
874 			case VSTOR_OPERATION_ENUMERATE_BUS:
875 				hv_storvsc_rescan_target(sc);
876 				break;
877 			default:
878 				break;
879 			}
880 		}
881 
882 		bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8),
883 		ret = vmbus_chan_recv(channel, packet, &bytes_recvd,
884 		    &request_id);
885 		KASSERT(ret != ENOBUFS,
886 		    ("storvsc recvbuf is not large enough"));
887 		/*
888 		 * XXX check bytes_recvd to make sure that it contains
889 		 * enough data
890 		 */
891 	}
892 }
893 
894 /**
895  * @brief StorVSC probe function
896  *
897  * Device probe function.  Returns 0 if the input device is a StorVSC
898  * device.  Otherwise, a ENXIO is returned.  If the input device is
899  * for BlkVSC (paravirtual IDE) device and this support is disabled in
900  * favor of the emulated ATA/IDE device, return ENXIO.
901  *
902  * @param a device
903  * @returns 0 on success, ENXIO if not a matcing StorVSC device
904  */
905 static int
906 storvsc_probe(device_t dev)
907 {
908 	int ret	= ENXIO;
909 
910 	switch (storvsc_get_storage_type(dev)) {
911 	case DRIVER_BLKVSC:
912 		if(bootverbose)
913 			device_printf(dev,
914 			    "Enlightened ATA/IDE detected\n");
915 		device_set_desc(dev, g_drv_props_table[DRIVER_BLKVSC].drv_desc);
916 		ret = BUS_PROBE_DEFAULT;
917 		break;
918 	case DRIVER_STORVSC:
919 		if(bootverbose)
920 			device_printf(dev, "Enlightened SCSI device detected\n");
921 		device_set_desc(dev, g_drv_props_table[DRIVER_STORVSC].drv_desc);
922 		ret = BUS_PROBE_DEFAULT;
923 		break;
924 	default:
925 		ret = ENXIO;
926 	}
927 	return (ret);
928 }
929 
930 static void
931 storvsc_create_chan_sel(struct storvsc_softc *sc)
932 {
933 	struct vmbus_channel **subch;
934 	int i, nsubch;
935 
936 	sc->hs_sel_chan[0] = sc->hs_chan;
937 	nsubch = sc->hs_nchan - 1;
938 	if (nsubch == 0)
939 		return;
940 
941 	subch = vmbus_subchan_get(sc->hs_chan, nsubch);
942 	for (i = 0; i < nsubch; i++)
943 		sc->hs_sel_chan[i + 1] = subch[i];
944 	vmbus_subchan_rel(subch, nsubch);
945 }
946 
947 static int
948 storvsc_init_requests(device_t dev)
949 {
950 	struct storvsc_softc *sc = device_get_softc(dev);
951 	struct hv_storvsc_request *reqp;
952 	int error, i;
953 
954 	LIST_INIT(&sc->hs_free_list);
955 
956 	error = bus_dma_tag_create(
957 		bus_get_dma_tag(dev),		/* parent */
958 		1,				/* alignment */
959 		PAGE_SIZE,			/* boundary */
960 		BUS_SPACE_MAXADDR,		/* lowaddr */
961 		BUS_SPACE_MAXADDR,		/* highaddr */
962 		NULL, NULL,			/* filter, filterarg */
963 		STORVSC_DATA_SIZE_MAX,		/* maxsize */
964 		STORVSC_DATA_SEGCNT_MAX,	/* nsegments */
965 		STORVSC_DATA_SEGSZ_MAX,		/* maxsegsize */
966 		0,				/* flags */
967 		NULL,				/* lockfunc */
968 		NULL,				/* lockfuncarg */
969 		&sc->storvsc_req_dtag);
970 	if (error) {
971 		device_printf(dev, "failed to create storvsc dma tag\n");
972 		return (error);
973 	}
974 
975 	for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; ++i) {
976 		reqp = malloc(sizeof(struct hv_storvsc_request),
977 				 M_DEVBUF, M_WAITOK|M_ZERO);
978 		reqp->softc = sc;
979 		error = bus_dmamap_create(sc->storvsc_req_dtag, 0,
980 				&reqp->data_dmap);
981 		if (error) {
982 			device_printf(dev, "failed to allocate storvsc "
983 			    "data dmamap\n");
984 			goto cleanup;
985 		}
986 		LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
987 	}
988 	return (0);
989 
990 cleanup:
991 	while ((reqp = LIST_FIRST(&sc->hs_free_list)) != NULL) {
992 		LIST_REMOVE(reqp, link);
993 		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
994 		free(reqp, M_DEVBUF);
995 	}
996 	return (error);
997 }
998 
999 static void
1000 storvsc_sysctl(device_t dev)
1001 {
1002 	struct sysctl_oid_list *child;
1003 	struct sysctl_ctx_list *ctx;
1004 	struct sysctl_oid *ch_tree, *chid_tree;
1005 	struct storvsc_softc *sc;
1006 	char name[16];
1007 	int i;
1008 
1009 	sc = device_get_softc(dev);
1010 	ctx = device_get_sysctl_ctx(dev);
1011 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
1012 
1013 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_bio_cnt",
1014 		CTLFLAG_RW | CTLFLAG_STATS, &sc->sysctl_data.data_bio_cnt,
1015 		"# of bio data block");
1016 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_vaddr_cnt",
1017 		CTLFLAG_RW | CTLFLAG_STATS, &sc->sysctl_data.data_vaddr_cnt,
1018 		"# of vaddr data block");
1019 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_sg_cnt",
1020 		CTLFLAG_RW | CTLFLAG_STATS, &sc->sysctl_data.data_sg_cnt,
1021 		"# of sg data block");
1022 
1023 	/* dev.storvsc.UNIT.channel */
1024 	ch_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "channel",
1025 		CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1026 	if (ch_tree == NULL)
1027 		return;
1028 
1029 	for (i = 0; i < sc->hs_nchan; i++) {
1030 		uint32_t ch_id;
1031 
1032 		ch_id = vmbus_chan_id(sc->hs_sel_chan[i]);
1033 		snprintf(name, sizeof(name), "%d", ch_id);
1034 		/* dev.storvsc.UNIT.channel.CHID */
1035 		chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree),
1036 			OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1037 		if (chid_tree == NULL)
1038 			return;
1039 		/* dev.storvsc.UNIT.channel.CHID.send_req */
1040 		SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
1041 			"send_req", CTLFLAG_RD, &sc->sysctl_data.chan_send_cnt[i],
1042 			"# of request sending from this channel");
1043 	}
1044 }
1045 
1046 /**
1047  * @brief StorVSC attach function
1048  *
1049  * Function responsible for allocating per-device structures,
1050  * setting up CAM interfaces and scanning for available LUNs to
1051  * be used for SCSI device peripherals.
1052  *
1053  * @param a device
1054  * @returns 0 on success or an error on failure
1055  */
1056 static int
1057 storvsc_attach(device_t dev)
1058 {
1059 	enum hv_storage_type stor_type;
1060 	struct storvsc_softc *sc;
1061 	struct cam_devq *devq;
1062 	int ret, i, j;
1063 	struct hv_storvsc_request *reqp;
1064 	struct root_hold_token *root_mount_token = NULL;
1065 	struct hv_sgl_node *sgl_node = NULL;
1066 	void *tmp_buff = NULL;
1067 
1068 	/*
1069 	 * We need to serialize storvsc attach calls.
1070 	 */
1071 	root_mount_token = root_mount_hold("storvsc");
1072 
1073 	sc = device_get_softc(dev);
1074 	sc->hs_nchan = 1;
1075 	sc->hs_chan = vmbus_get_channel(dev);
1076 
1077 	stor_type = storvsc_get_storage_type(dev);
1078 
1079 	if (stor_type == DRIVER_UNKNOWN) {
1080 		ret = ENODEV;
1081 		goto cleanup;
1082 	}
1083 
1084 	/* fill in driver specific properties */
1085 	sc->hs_drv_props = &g_drv_props_table[stor_type];
1086 	sc->hs_drv_props->drv_ringbuffer_size = hv_storvsc_ringbuffer_size;
1087 	sc->hs_drv_props->drv_max_ios_per_target =
1088 		MIN(STORVSC_MAX_IO, hv_storvsc_max_io);
1089 	if (bootverbose) {
1090 		printf("storvsc ringbuffer size: %d, max_io: %d\n",
1091 			sc->hs_drv_props->drv_ringbuffer_size,
1092 			sc->hs_drv_props->drv_max_ios_per_target);
1093 	}
1094 	/* fill in device specific properties */
1095 	sc->hs_unit	= device_get_unit(dev);
1096 	sc->hs_dev	= dev;
1097 
1098 	mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF);
1099 
1100 	ret = storvsc_init_requests(dev);
1101 	if (ret != 0)
1102 		goto cleanup;
1103 
1104 	/* create sg-list page pool */
1105 	if (FALSE == g_hv_sgl_page_pool.is_init) {
1106 		g_hv_sgl_page_pool.is_init = TRUE;
1107 		LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list);
1108 		LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list);
1109 
1110 		/*
1111 		 * Pre-create SG list, each SG list with
1112 		 * STORVSC_DATA_SEGCNT_MAX segments, each
1113 		 * segment has one page buffer
1114 		 */
1115 		for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; i++) {
1116 	        	sgl_node = malloc(sizeof(struct hv_sgl_node),
1117 			    M_DEVBUF, M_WAITOK|M_ZERO);
1118 
1119 			sgl_node->sgl_data = malloc(sizeof(struct hv_sglist),
1120 			    M_DEVBUF, M_WAITOK|M_ZERO);
1121 
1122 			for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) {
1123 				tmp_buff = malloc(PAGE_SIZE,
1124 				    M_DEVBUF, M_WAITOK|M_ZERO);
1125 
1126 				sgl_node->sgl_data->sg_iov[j].iov_base =
1127 				    tmp_buff;
1128 			}
1129 
1130 			LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list,
1131 			    sgl_node, link);
1132 		}
1133 	}
1134 
1135 	sc->hs_destroy = FALSE;
1136 	sc->hs_drain_notify = FALSE;
1137 	sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema");
1138 
1139 	ret = hv_storvsc_connect_vsp(sc);
1140 	if (ret != 0) {
1141 		goto cleanup;
1142 	}
1143 
1144 	/* Construct cpu to channel mapping */
1145 	storvsc_create_chan_sel(sc);
1146 
1147 	/*
1148 	 * Create the device queue.
1149 	 * Hyper-V maps each target to one SCSI HBA
1150 	 */
1151 	devq = cam_simq_alloc(sc->hs_drv_props->drv_max_ios_per_target);
1152 	if (devq == NULL) {
1153 		device_printf(dev, "Failed to alloc device queue\n");
1154 		ret = ENOMEM;
1155 		goto cleanup;
1156 	}
1157 
1158 	sc->hs_sim = cam_sim_alloc(storvsc_action,
1159 				storvsc_poll,
1160 				sc->hs_drv_props->drv_name,
1161 				sc,
1162 				sc->hs_unit,
1163 				&sc->hs_lock, 1,
1164 				sc->hs_drv_props->drv_max_ios_per_target,
1165 				devq);
1166 
1167 	if (sc->hs_sim == NULL) {
1168 		device_printf(dev, "Failed to alloc sim\n");
1169 		cam_simq_free(devq);
1170 		ret = ENOMEM;
1171 		goto cleanup;
1172 	}
1173 
1174 	mtx_lock(&sc->hs_lock);
1175 	/* bus_id is set to 0, need to get it from VMBUS channel query? */
1176 	if (xpt_bus_register(sc->hs_sim, dev, 0) != CAM_SUCCESS) {
1177 		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
1178 		mtx_unlock(&sc->hs_lock);
1179 		device_printf(dev, "Unable to register SCSI bus\n");
1180 		ret = ENXIO;
1181 		goto cleanup;
1182 	}
1183 
1184 	if (xpt_create_path(&sc->hs_path, /*periph*/NULL,
1185 		 cam_sim_path(sc->hs_sim),
1186 		CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
1187 		xpt_bus_deregister(cam_sim_path(sc->hs_sim));
1188 		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
1189 		mtx_unlock(&sc->hs_lock);
1190 		device_printf(dev, "Unable to create path\n");
1191 		ret = ENXIO;
1192 		goto cleanup;
1193 	}
1194 
1195 	mtx_unlock(&sc->hs_lock);
1196 
1197 	storvsc_sysctl(dev);
1198 
1199 	root_mount_rel(root_mount_token);
1200 	return (0);
1201 
1202 
1203 cleanup:
1204 	root_mount_rel(root_mount_token);
1205 	while (!LIST_EMPTY(&sc->hs_free_list)) {
1206 		reqp = LIST_FIRST(&sc->hs_free_list);
1207 		LIST_REMOVE(reqp, link);
1208 		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
1209 		free(reqp, M_DEVBUF);
1210 	}
1211 
1212 	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1213 		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1214 		LIST_REMOVE(sgl_node, link);
1215 		for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) {
1216 			free(sgl_node->sgl_data->sg_iov[j].iov_base, M_DEVBUF);
1217 		}
1218 		free(sgl_node->sgl_data, M_DEVBUF);
1219 		free(sgl_node, M_DEVBUF);
1220 	}
1221 
1222 	return (ret);
1223 }
1224 
1225 /**
1226  * @brief StorVSC device detach function
1227  *
1228  * This function is responsible for safely detaching a
1229  * StorVSC device.  This includes waiting for inbound responses
1230  * to complete and freeing associated per-device structures.
1231  *
1232  * @param dev a device
1233  * returns 0 on success
1234  */
1235 static int
1236 storvsc_detach(device_t dev)
1237 {
1238 	struct storvsc_softc *sc = device_get_softc(dev);
1239 	struct hv_storvsc_request *reqp = NULL;
1240 	struct hv_sgl_node *sgl_node = NULL;
1241 	int j = 0;
1242 
1243 	sc->hs_destroy = TRUE;
1244 
1245 	/*
1246 	 * At this point, all outbound traffic should be disabled. We
1247 	 * only allow inbound traffic (responses) to proceed so that
1248 	 * outstanding requests can be completed.
1249 	 */
1250 
1251 	sc->hs_drain_notify = TRUE;
1252 	sema_wait(&sc->hs_drain_sema);
1253 	sc->hs_drain_notify = FALSE;
1254 
1255 	/*
1256 	 * Since we have already drained, we don't need to busy wait.
1257 	 * The call to close the channel will reset the callback
1258 	 * under the protection of the incoming channel lock.
1259 	 */
1260 
1261 	vmbus_chan_close(sc->hs_chan);
1262 
1263 	mtx_lock(&sc->hs_lock);
1264 	while (!LIST_EMPTY(&sc->hs_free_list)) {
1265 		reqp = LIST_FIRST(&sc->hs_free_list);
1266 		LIST_REMOVE(reqp, link);
1267 		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
1268 		free(reqp, M_DEVBUF);
1269 	}
1270 	mtx_unlock(&sc->hs_lock);
1271 
1272 	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1273 		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1274 		LIST_REMOVE(sgl_node, link);
1275 		for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++){
1276 			free(sgl_node->sgl_data->sg_iov[j].iov_base, M_DEVBUF);
1277 		}
1278 		free(sgl_node->sgl_data, M_DEVBUF);
1279 		free(sgl_node, M_DEVBUF);
1280 	}
1281 
1282 	return (0);
1283 }
1284 
1285 #if HVS_TIMEOUT_TEST
1286 /**
1287  * @brief unit test for timed out operations
1288  *
1289  * This function provides unit testing capability to simulate
1290  * timed out operations.  Recompilation with HV_TIMEOUT_TEST=1
1291  * is required.
1292  *
1293  * @param reqp pointer to a request structure
1294  * @param opcode SCSI operation being performed
1295  * @param wait if 1, wait for I/O to complete
1296  */
1297 static void
1298 storvsc_timeout_test(struct hv_storvsc_request *reqp,
1299 		uint8_t opcode, int wait)
1300 {
1301 	int ret;
1302 	union ccb *ccb = reqp->ccb;
1303 	struct storvsc_softc *sc = reqp->softc;
1304 
1305 	if (reqp->vstor_packet.vm_srb.cdb[0] != opcode) {
1306 		return;
1307 	}
1308 
1309 	if (wait) {
1310 		mtx_lock(&reqp->event.mtx);
1311 	}
1312 	ret = hv_storvsc_io_request(sc, reqp);
1313 	if (ret != 0) {
1314 		if (wait) {
1315 			mtx_unlock(&reqp->event.mtx);
1316 		}
1317 		printf("%s: io_request failed with %d.\n",
1318 				__func__, ret);
1319 		ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1320 		mtx_lock(&sc->hs_lock);
1321 		storvsc_free_request(sc, reqp);
1322 		xpt_done(ccb);
1323 		mtx_unlock(&sc->hs_lock);
1324 		return;
1325 	}
1326 
1327 	if (wait) {
1328 		xpt_print(ccb->ccb_h.path,
1329 				"%u: %s: waiting for IO return.\n",
1330 				ticks, __func__);
1331 		ret = cv_timedwait(&reqp->event.cv, &reqp->event.mtx, 60*hz);
1332 		mtx_unlock(&reqp->event.mtx);
1333 		xpt_print(ccb->ccb_h.path, "%u: %s: %s.\n",
1334 				ticks, __func__, (ret == 0)?
1335 				"IO return detected" :
1336 				"IO return not detected");
1337 		/*
1338 		 * Now both the timer handler and io done are running
1339 		 * simultaneously. We want to confirm the io done always
1340 		 * finishes after the timer handler exits. So reqp used by
1341 		 * timer handler is not freed or stale. Do busy loop for
1342 		 * another 1/10 second to make sure io done does
1343 		 * wait for the timer handler to complete.
1344 		 */
1345 		DELAY(100*1000);
1346 		mtx_lock(&sc->hs_lock);
1347 		xpt_print(ccb->ccb_h.path,
1348 				"%u: %s: finishing, queue frozen %d, "
1349 				"ccb status 0x%x scsi_status 0x%x.\n",
1350 				ticks, __func__, sc->hs_frozen,
1351 				ccb->ccb_h.status,
1352 				ccb->csio.scsi_status);
1353 		mtx_unlock(&sc->hs_lock);
1354 	}
1355 }
1356 #endif /* HVS_TIMEOUT_TEST */
1357 
1358 #ifdef notyet
1359 /**
1360  * @brief timeout handler for requests
1361  *
1362  * This function is called as a result of a callout expiring.
1363  *
1364  * @param arg pointer to a request
1365  */
1366 static void
1367 storvsc_timeout(void *arg)
1368 {
1369 	struct hv_storvsc_request *reqp = arg;
1370 	struct storvsc_softc *sc = reqp->softc;
1371 	union ccb *ccb = reqp->ccb;
1372 
1373 	if (reqp->retries == 0) {
1374 		mtx_lock(&sc->hs_lock);
1375 		xpt_print(ccb->ccb_h.path,
1376 		    "%u: IO timed out (req=0x%p), wait for another %u secs.\n",
1377 		    ticks, reqp, ccb->ccb_h.timeout / 1000);
1378 		cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL);
1379 		mtx_unlock(&sc->hs_lock);
1380 
1381 		reqp->retries++;
1382 		callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout,
1383 		    0, storvsc_timeout, reqp, 0);
1384 #if HVS_TIMEOUT_TEST
1385 		storvsc_timeout_test(reqp, SEND_DIAGNOSTIC, 0);
1386 #endif
1387 		return;
1388 	}
1389 
1390 	mtx_lock(&sc->hs_lock);
1391 	xpt_print(ccb->ccb_h.path,
1392 		"%u: IO (reqp = 0x%p) did not return for %u seconds, %s.\n",
1393 		ticks, reqp, ccb->ccb_h.timeout * (reqp->retries+1) / 1000,
1394 		(sc->hs_frozen == 0)?
1395 		"freezing the queue" : "the queue is already frozen");
1396 	if (sc->hs_frozen == 0) {
1397 		sc->hs_frozen = 1;
1398 		xpt_freeze_simq(xpt_path_sim(ccb->ccb_h.path), 1);
1399 	}
1400 	mtx_unlock(&sc->hs_lock);
1401 
1402 #if HVS_TIMEOUT_TEST
1403 	storvsc_timeout_test(reqp, MODE_SELECT_10, 1);
1404 #endif
1405 }
1406 #endif
1407 
1408 /**
1409  * @brief StorVSC device poll function
1410  *
1411  * This function is responsible for servicing requests when
1412  * interrupts are disabled (i.e when we are dumping core.)
1413  *
1414  * @param sim a pointer to a CAM SCSI interface module
1415  */
1416 static void
1417 storvsc_poll(struct cam_sim *sim)
1418 {
1419 	struct storvsc_softc *sc = cam_sim_softc(sim);
1420 
1421 	mtx_assert(&sc->hs_lock, MA_OWNED);
1422 	mtx_unlock(&sc->hs_lock);
1423 	hv_storvsc_on_channel_callback(sc->hs_chan, sc);
1424 	mtx_lock(&sc->hs_lock);
1425 }
1426 
1427 /**
1428  * @brief StorVSC device action function
1429  *
1430  * This function is responsible for handling SCSI operations which
1431  * are passed from the CAM layer.  The requests are in the form of
1432  * CAM control blocks which indicate the action being performed.
1433  * Not all actions require converting the request to a VSCSI protocol
1434  * message - these actions can be responded to by this driver.
1435  * Requests which are destined for a backend storage device are converted
1436  * to a VSCSI protocol message and sent on the channel connection associated
1437  * with this device.
1438  *
1439  * @param sim pointer to a CAM SCSI interface module
1440  * @param ccb pointer to a CAM control block
1441  */
1442 static void
1443 storvsc_action(struct cam_sim *sim, union ccb *ccb)
1444 {
1445 	struct storvsc_softc *sc = cam_sim_softc(sim);
1446 	int res;
1447 
1448 	mtx_assert(&sc->hs_lock, MA_OWNED);
1449 	switch (ccb->ccb_h.func_code) {
1450 	case XPT_PATH_INQ: {
1451 		struct ccb_pathinq *cpi = &ccb->cpi;
1452 
1453 		cpi->version_num = 1;
1454 		cpi->hba_inquiry = PI_TAG_ABLE|PI_SDTR_ABLE;
1455 		cpi->target_sprt = 0;
1456 		cpi->hba_misc = PIM_NOBUSRESET;
1457 		if (hv_storvsc_use_pim_unmapped)
1458 			cpi->hba_misc |= PIM_UNMAPPED;
1459 		cpi->maxio = STORVSC_DATA_SIZE_MAX;
1460 		cpi->hba_eng_cnt = 0;
1461 		cpi->max_target = STORVSC_MAX_TARGETS;
1462 		cpi->max_lun = sc->hs_drv_props->drv_max_luns_per_target;
1463 		cpi->initiator_id = cpi->max_target;
1464 		cpi->bus_id = cam_sim_bus(sim);
1465 		cpi->base_transfer_speed = 300000;
1466 		cpi->transport = XPORT_SAS;
1467 		cpi->transport_version = 0;
1468 		cpi->protocol = PROTO_SCSI;
1469 		cpi->protocol_version = SCSI_REV_SPC2;
1470 		strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
1471 		strlcpy(cpi->hba_vid, sc->hs_drv_props->drv_name, HBA_IDLEN);
1472 		strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
1473 		cpi->unit_number = cam_sim_unit(sim);
1474 
1475 		ccb->ccb_h.status = CAM_REQ_CMP;
1476 		xpt_done(ccb);
1477 		return;
1478 	}
1479 	case XPT_GET_TRAN_SETTINGS: {
1480 		struct  ccb_trans_settings *cts = &ccb->cts;
1481 
1482 		cts->transport = XPORT_SAS;
1483 		cts->transport_version = 0;
1484 		cts->protocol = PROTO_SCSI;
1485 		cts->protocol_version = SCSI_REV_SPC2;
1486 
1487 		/* enable tag queuing and disconnected mode */
1488 		cts->proto_specific.valid = CTS_SCSI_VALID_TQ;
1489 		cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
1490 		cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
1491 		cts->xport_specific.valid = CTS_SPI_VALID_DISC;
1492 		cts->xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB;
1493 
1494 		ccb->ccb_h.status = CAM_REQ_CMP;
1495 		xpt_done(ccb);
1496 		return;
1497 	}
1498 	case XPT_SET_TRAN_SETTINGS:	{
1499 		ccb->ccb_h.status = CAM_REQ_CMP;
1500 		xpt_done(ccb);
1501 		return;
1502 	}
1503 	case XPT_CALC_GEOMETRY:{
1504 		cam_calc_geometry(&ccb->ccg, 1);
1505 		xpt_done(ccb);
1506 		return;
1507 	}
1508 	case  XPT_RESET_BUS:
1509 	case  XPT_RESET_DEV:{
1510 #if HVS_HOST_RESET
1511 		if ((res = hv_storvsc_host_reset(sc)) != 0) {
1512 			xpt_print(ccb->ccb_h.path,
1513 				"hv_storvsc_host_reset failed with %d\n", res);
1514 			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1515 			xpt_done(ccb);
1516 			return;
1517 		}
1518 		ccb->ccb_h.status = CAM_REQ_CMP;
1519 		xpt_done(ccb);
1520 		return;
1521 #else
1522 		xpt_print(ccb->ccb_h.path,
1523 				  "%s reset not supported.\n",
1524 				  (ccb->ccb_h.func_code == XPT_RESET_BUS)?
1525 				  "bus" : "dev");
1526 		ccb->ccb_h.status = CAM_REQ_INVALID;
1527 		xpt_done(ccb);
1528 		return;
1529 #endif	/* HVS_HOST_RESET */
1530 	}
1531 	case XPT_SCSI_IO:
1532 	case XPT_IMMED_NOTIFY: {
1533 		struct hv_storvsc_request *reqp = NULL;
1534 		bus_dmamap_t dmap_saved;
1535 
1536 		if (ccb->csio.cdb_len == 0) {
1537 			panic("cdl_len is 0\n");
1538 		}
1539 
1540 		if (LIST_EMPTY(&sc->hs_free_list)) {
1541 			ccb->ccb_h.status = CAM_REQUEUE_REQ;
1542 			if (sc->hs_frozen == 0) {
1543 				sc->hs_frozen = 1;
1544 				xpt_freeze_simq(sim, /* count*/1);
1545 			}
1546 			xpt_done(ccb);
1547 			return;
1548 		}
1549 
1550 		reqp = LIST_FIRST(&sc->hs_free_list);
1551 		LIST_REMOVE(reqp, link);
1552 
1553 		/* Save the data_dmap before reset request */
1554 		dmap_saved = reqp->data_dmap;
1555 
1556 		/* XXX this is ugly */
1557 		bzero(reqp, sizeof(struct hv_storvsc_request));
1558 
1559 		/* Restore necessary bits */
1560 		reqp->data_dmap = dmap_saved;
1561 		reqp->softc = sc;
1562 
1563 		ccb->ccb_h.status |= CAM_SIM_QUEUED;
1564 		if ((res = create_storvsc_request(ccb, reqp)) != 0) {
1565 			ccb->ccb_h.status = CAM_REQ_INVALID;
1566 			xpt_done(ccb);
1567 			return;
1568 		}
1569 
1570 #ifdef notyet
1571 		if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
1572 			callout_init(&reqp->callout, 1);
1573 			callout_reset_sbt(&reqp->callout,
1574 			    SBT_1MS * ccb->ccb_h.timeout, 0,
1575 			    storvsc_timeout, reqp, 0);
1576 #if HVS_TIMEOUT_TEST
1577 			cv_init(&reqp->event.cv, "storvsc timeout cv");
1578 			mtx_init(&reqp->event.mtx, "storvsc timeout mutex",
1579 					NULL, MTX_DEF);
1580 			switch (reqp->vstor_packet.vm_srb.cdb[0]) {
1581 				case MODE_SELECT_10:
1582 				case SEND_DIAGNOSTIC:
1583 					/* To have timer send the request. */
1584 					return;
1585 				default:
1586 					break;
1587 			}
1588 #endif /* HVS_TIMEOUT_TEST */
1589 		}
1590 #endif
1591 
1592 		if ((res = hv_storvsc_io_request(sc, reqp)) != 0) {
1593 			xpt_print(ccb->ccb_h.path,
1594 				"hv_storvsc_io_request failed with %d\n", res);
1595 			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1596 			storvsc_free_request(sc, reqp);
1597 			xpt_done(ccb);
1598 			return;
1599 		}
1600 		return;
1601 	}
1602 
1603 	default:
1604 		ccb->ccb_h.status = CAM_REQ_INVALID;
1605 		xpt_done(ccb);
1606 		return;
1607 	}
1608 }
1609 
1610 /**
1611  * @brief destroy bounce buffer
1612  *
1613  * This function is responsible for destroy a Scatter/Gather list
1614  * that create by storvsc_create_bounce_buffer()
1615  *
1616  * @param sgl- the Scatter/Gather need be destroy
1617  * @param sg_count- page count of the SG list.
1618  *
1619  */
1620 static void
1621 storvsc_destroy_bounce_buffer(struct hv_sglist *sgl)
1622 {
1623 	struct hv_sgl_node *sgl_node = NULL;
1624 	if (LIST_EMPTY(&g_hv_sgl_page_pool.in_use_sgl_list)) {
1625 		printf("storvsc error: not enough in use sgl\n");
1626 		return;
1627 	}
1628 	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list);
1629 	LIST_REMOVE(sgl_node, link);
1630 	sgl_node->sgl_data = sgl;
1631 	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link);
1632 }
1633 
1634 /**
1635  * @brief create bounce buffer
1636  *
1637  * This function is responsible for create a Scatter/Gather list,
1638  * which hold several pages that can be aligned with page size.
1639  *
1640  * @param seg_count- SG-list segments count
1641  * @param write - if WRITE_TYPE, set SG list page used size to 0,
1642  * otherwise set used size to page size.
1643  *
1644  * return NULL if create failed
1645  */
1646 static struct hv_sglist *
1647 storvsc_create_bounce_buffer(uint16_t seg_count, int write)
1648 {
1649 	int i = 0;
1650 	struct hv_sglist *bounce_sgl = NULL;
1651 	unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE);
1652 	struct hv_sgl_node *sgl_node = NULL;
1653 
1654 	/* get struct hv_sglist from free_sgl_list */
1655 	if (LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1656 		printf("storvsc error: not enough free sgl\n");
1657 		return NULL;
1658 	}
1659 	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1660 	LIST_REMOVE(sgl_node, link);
1661 	bounce_sgl = sgl_node->sgl_data;
1662 	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link);
1663 
1664 	bounce_sgl->sg_maxseg = seg_count;
1665 
1666 	if (write == WRITE_TYPE)
1667 		bounce_sgl->sg_nseg = 0;
1668 	else
1669 		bounce_sgl->sg_nseg = seg_count;
1670 
1671 	for (i = 0; i < seg_count; i++)
1672 	        bounce_sgl->sg_iov[i].iov_len = buf_len;
1673 
1674 	return bounce_sgl;
1675 }
1676 
1677 /**
1678  * @brief copy data from SG list to bounce buffer
1679  *
1680  * This function is responsible for copy data from one SG list's segments
1681  * to another SG list which used as bounce buffer.
1682  *
1683  * @param bounce_sgl - the destination SG list
1684  * @param orig_sgl - the segment of the source SG list.
1685  * @param orig_sgl_count - the count of segments.
1686  * @param orig_sgl_count - indicate which segment need bounce buffer,
1687  *  set 1 means need.
1688  *
1689  */
1690 static void
1691 storvsc_copy_sgl_to_bounce_buf(struct hv_sglist *bounce_sgl,
1692 			       bus_dma_segment_t *orig_sgl,
1693 			       unsigned int orig_sgl_count,
1694 			       uint64_t seg_bits)
1695 {
1696 	int src_sgl_idx = 0;
1697 
1698 	for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) {
1699 		if (seg_bits & (1 << src_sgl_idx)) {
1700 			memcpy(bounce_sgl->sg_iov[src_sgl_idx].iov_base,
1701 			    (void*)orig_sgl[src_sgl_idx].ds_addr,
1702 			    orig_sgl[src_sgl_idx].ds_len);
1703 
1704 			bounce_sgl->sg_iov[src_sgl_idx].iov_len =
1705 			    orig_sgl[src_sgl_idx].ds_len;
1706 		}
1707 	}
1708 }
1709 
1710 /**
1711  * @brief copy data from SG list which used as bounce to another SG list
1712  *
1713  * This function is responsible for copy data from one SG list with bounce
1714  * buffer to another SG list's segments.
1715  *
1716  * @param dest_sgl - the destination SG list's segments
1717  * @param dest_sgl_count - the count of destination SG list's segment.
1718  * @param src_sgl - the source SG list.
1719  * @param seg_bits - indicate which segment used bounce buffer of src SG-list.
1720  *
1721  */
1722 void
1723 storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
1724 				    unsigned int dest_sgl_count,
1725 				    struct hv_sglist* src_sgl,
1726 				    uint64_t seg_bits)
1727 {
1728 	int sgl_idx = 0;
1729 
1730 	for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) {
1731 		if (seg_bits & (1 << sgl_idx)) {
1732 			memcpy((void*)(dest_sgl[sgl_idx].ds_addr),
1733 			    src_sgl->sg_iov[sgl_idx].iov_base,
1734 			    src_sgl->sg_iov[sgl_idx].iov_len);
1735 		}
1736 	}
1737 }
1738 
1739 /**
1740  * @brief check SG list with bounce buffer or not
1741  *
1742  * This function is responsible for check if need bounce buffer for SG list.
1743  *
1744  * @param sgl - the SG list's segments
1745  * @param sg_count - the count of SG list's segment.
1746  * @param bits - segmengs number that need bounce buffer
1747  *
1748  * return -1 if SG list needless bounce buffer
1749  */
1750 static int
1751 storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl,
1752 				unsigned int sg_count,
1753 				uint64_t *bits)
1754 {
1755 	int i = 0;
1756 	int offset = 0;
1757 	uint64_t phys_addr = 0;
1758 	uint64_t tmp_bits = 0;
1759 	boolean_t found_hole = FALSE;
1760 	boolean_t pre_aligned = TRUE;
1761 
1762 	if (sg_count < 2){
1763 		return -1;
1764 	}
1765 
1766 	*bits = 0;
1767 
1768 	phys_addr = vtophys(sgl[0].ds_addr);
1769 	offset =  phys_addr - trunc_page(phys_addr);
1770 
1771 	if (offset != 0) {
1772 		pre_aligned = FALSE;
1773 		tmp_bits |= 1;
1774 	}
1775 
1776 	for (i = 1; i < sg_count; i++) {
1777 		phys_addr = vtophys(sgl[i].ds_addr);
1778 		offset =  phys_addr - trunc_page(phys_addr);
1779 
1780 		if (offset == 0) {
1781 			if (FALSE == pre_aligned){
1782 				/*
1783 				 * This segment is aligned, if the previous
1784 				 * one is not aligned, find a hole
1785 				 */
1786 				found_hole = TRUE;
1787 			}
1788 			pre_aligned = TRUE;
1789 		} else {
1790 			tmp_bits |= 1ULL << i;
1791 			if (!pre_aligned) {
1792 				if (phys_addr != vtophys(sgl[i-1].ds_addr +
1793 				    sgl[i-1].ds_len)) {
1794 					/*
1795 					 * Check whether connect to previous
1796 					 * segment,if not, find the hole
1797 					 */
1798 					found_hole = TRUE;
1799 				}
1800 			} else {
1801 				found_hole = TRUE;
1802 			}
1803 			pre_aligned = FALSE;
1804 		}
1805 	}
1806 
1807 	if (!found_hole) {
1808 		return (-1);
1809 	} else {
1810 		*bits = tmp_bits;
1811 		return 0;
1812 	}
1813 }
1814 
1815 /**
1816  * Copy bus_dma segments to multiple page buffer, which requires
1817  * the pages are compact composed except for the 1st and last pages.
1818  */
1819 static void
1820 storvsc_xferbuf_prepare(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
1821 {
1822 	struct hv_storvsc_request *reqp = arg;
1823 	union ccb *ccb = reqp->ccb;
1824 	struct ccb_scsiio *csio = &ccb->csio;
1825 	struct storvsc_gpa_range *prplist;
1826 	int i;
1827 
1828 	prplist = &reqp->prp_list;
1829 	prplist->gpa_range.gpa_len = csio->dxfer_len;
1830 	prplist->gpa_range.gpa_ofs = segs[0].ds_addr & PAGE_MASK;
1831 
1832 	for (i = 0; i < nsegs; i++) {
1833 #ifdef INVARIANTS
1834 		if (nsegs > 1) {
1835 			if (i == 0) {
1836 				KASSERT((segs[i].ds_addr & PAGE_MASK) +
1837 				    segs[i].ds_len == PAGE_SIZE,
1838 				    ("invalid 1st page, ofs 0x%jx, len %zu",
1839 				     (uintmax_t)segs[i].ds_addr,
1840 				     segs[i].ds_len));
1841 			} else if (i == nsegs - 1) {
1842 				KASSERT((segs[i].ds_addr & PAGE_MASK) == 0,
1843 				    ("invalid last page, ofs 0x%jx",
1844 				     (uintmax_t)segs[i].ds_addr));
1845 			} else {
1846 				KASSERT((segs[i].ds_addr & PAGE_MASK) == 0 &&
1847 				    segs[i].ds_len == PAGE_SIZE,
1848 				    ("not a full page, ofs 0x%jx, len %zu",
1849 				     (uintmax_t)segs[i].ds_addr,
1850 				     segs[i].ds_len));
1851 			}
1852 		}
1853 #endif
1854 		prplist->gpa_page[i] = atop(segs[i].ds_addr);
1855 	}
1856 	reqp->prp_cnt = nsegs;
1857 }
1858 
1859 /**
1860  * @brief Fill in a request structure based on a CAM control block
1861  *
1862  * Fills in a request structure based on the contents of a CAM control
1863  * block.  The request structure holds the payload information for
1864  * VSCSI protocol request.
1865  *
1866  * @param ccb pointer to a CAM contorl block
1867  * @param reqp pointer to a request structure
1868  */
1869 static int
1870 create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
1871 {
1872 	struct ccb_scsiio *csio = &ccb->csio;
1873 	uint64_t phys_addr;
1874 	uint32_t pfn;
1875 	uint64_t not_aligned_seg_bits = 0;
1876 	int error;
1877 
1878 	/* refer to struct vmscsi_req for meanings of these two fields */
1879 	reqp->vstor_packet.u.vm_srb.port =
1880 		cam_sim_unit(xpt_path_sim(ccb->ccb_h.path));
1881 	reqp->vstor_packet.u.vm_srb.path_id =
1882 		cam_sim_bus(xpt_path_sim(ccb->ccb_h.path));
1883 
1884 	reqp->vstor_packet.u.vm_srb.target_id = ccb->ccb_h.target_id;
1885 	reqp->vstor_packet.u.vm_srb.lun = ccb->ccb_h.target_lun;
1886 
1887 	reqp->vstor_packet.u.vm_srb.cdb_len = csio->cdb_len;
1888 	if(ccb->ccb_h.flags & CAM_CDB_POINTER) {
1889 		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_ptr,
1890 			csio->cdb_len);
1891 	} else {
1892 		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_bytes,
1893 			csio->cdb_len);
1894 	}
1895 
1896 	if (hv_storvsc_use_win8ext_flags) {
1897 		reqp->vstor_packet.u.vm_srb.win8_extension.time_out_value = 60;
1898 		reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1899 			SRB_FLAGS_DISABLE_SYNCH_TRANSFER;
1900 	}
1901 	switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
1902 	case CAM_DIR_OUT:
1903 		reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;
1904 		if (hv_storvsc_use_win8ext_flags) {
1905 			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1906 				SRB_FLAGS_DATA_OUT;
1907 		}
1908 		break;
1909 	case CAM_DIR_IN:
1910 		reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
1911 		if (hv_storvsc_use_win8ext_flags) {
1912 			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1913 				SRB_FLAGS_DATA_IN;
1914 		}
1915 		break;
1916 	case CAM_DIR_NONE:
1917 		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
1918 		if (hv_storvsc_use_win8ext_flags) {
1919 			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1920 				SRB_FLAGS_NO_DATA_TRANSFER;
1921 		}
1922 		break;
1923 	default:
1924 		printf("Error: unexpected data direction: 0x%x\n",
1925 			ccb->ccb_h.flags & CAM_DIR_MASK);
1926 		return (EINVAL);
1927 	}
1928 
1929 	reqp->sense_data     = &csio->sense_data;
1930 	reqp->sense_info_len = csio->sense_len;
1931 
1932 	reqp->ccb = ccb;
1933 	ccb->ccb_h.spriv_ptr0 = reqp;
1934 
1935 	if (0 == csio->dxfer_len) {
1936 		return (0);
1937 	}
1938 
1939 	switch (ccb->ccb_h.flags & CAM_DATA_MASK) {
1940 	case CAM_DATA_BIO:
1941 	case CAM_DATA_VADDR:
1942 		error = bus_dmamap_load_ccb(reqp->softc->storvsc_req_dtag,
1943 		    reqp->data_dmap, ccb, storvsc_xferbuf_prepare, reqp,
1944 		    BUS_DMA_NOWAIT);
1945 		if (error) {
1946 			xpt_print(ccb->ccb_h.path,
1947 			    "bus_dmamap_load_ccb failed: %d\n", error);
1948 			return (error);
1949 		}
1950 		if ((ccb->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_BIO)
1951 			reqp->softc->sysctl_data.data_bio_cnt++;
1952 		else
1953 			reqp->softc->sysctl_data.data_vaddr_cnt++;
1954 		break;
1955 
1956 	case CAM_DATA_SG:
1957 	{
1958 		struct storvsc_gpa_range *prplist;
1959 		int i = 0;
1960 		int offset = 0;
1961 		int ret;
1962 
1963 		bus_dma_segment_t *storvsc_sglist =
1964 		    (bus_dma_segment_t *)ccb->csio.data_ptr;
1965 		u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt;
1966 
1967 		prplist = &reqp->prp_list;
1968 		prplist->gpa_range.gpa_len = csio->dxfer_len;
1969 
1970 		printf("Storvsc: get SG I/O operation, %d\n",
1971 		    reqp->vstor_packet.u.vm_srb.data_in);
1972 
1973 		if (storvsc_sg_count > STORVSC_DATA_SEGCNT_MAX){
1974 			printf("Storvsc: %d segments is too much, "
1975 			    "only support %d segments\n",
1976 			    storvsc_sg_count, STORVSC_DATA_SEGCNT_MAX);
1977 			return (EINVAL);
1978 		}
1979 
1980 		/*
1981 		 * We create our own bounce buffer function currently. Idealy
1982 		 * we should use BUS_DMA(9) framework. But with current BUS_DMA
1983 		 * code there is no callback API to check the page alignment of
1984 		 * middle segments before busdma can decide if a bounce buffer
1985 		 * is needed for particular segment. There is callback,
1986 		 * "bus_dma_filter_t *filter", but the parrameters are not
1987 		 * sufficient for storvsc driver.
1988 		 * TODO:
1989 		 *	Add page alignment check in BUS_DMA(9) callback. Once
1990 		 *	this is complete, switch the following code to use
1991 		 *	BUS_DMA(9) for storvsc bounce buffer support.
1992 		 */
1993 		/* check if we need to create bounce buffer */
1994 		ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist,
1995 		    storvsc_sg_count, &not_aligned_seg_bits);
1996 		if (ret != -1) {
1997 			reqp->bounce_sgl =
1998 			    storvsc_create_bounce_buffer(storvsc_sg_count,
1999 			    reqp->vstor_packet.u.vm_srb.data_in);
2000 			if (NULL == reqp->bounce_sgl) {
2001 				printf("Storvsc_error: "
2002 				    "create bounce buffer failed.\n");
2003 				return (ENOMEM);
2004 			}
2005 
2006 			reqp->bounce_sgl_count = storvsc_sg_count;
2007 			reqp->not_aligned_seg_bits = not_aligned_seg_bits;
2008 
2009 			/*
2010 			 * if it is write, we need copy the original data
2011 			 *to bounce buffer
2012 			 */
2013 			if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
2014 				storvsc_copy_sgl_to_bounce_buf(
2015 				    reqp->bounce_sgl,
2016 				    storvsc_sglist,
2017 				    storvsc_sg_count,
2018 				    reqp->not_aligned_seg_bits);
2019 			}
2020 
2021 			/* transfer virtual address to physical frame number */
2022 			if (reqp->not_aligned_seg_bits & 0x1){
2023  				phys_addr =
2024 				    vtophys(reqp->bounce_sgl->sg_iov[0].iov_base);
2025 			}else{
2026  				phys_addr =
2027 					vtophys(storvsc_sglist[0].ds_addr);
2028 			}
2029 			prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK;
2030 
2031 			pfn = phys_addr >> PAGE_SHIFT;
2032 			prplist->gpa_page[0] = pfn;
2033 
2034 			for (i = 1; i < storvsc_sg_count; i++) {
2035 				if (reqp->not_aligned_seg_bits & (1 << i)) {
2036 					phys_addr =
2037 					    vtophys(reqp->bounce_sgl->sg_iov[i].iov_base);
2038 				} else {
2039 					phys_addr =
2040 					    vtophys(storvsc_sglist[i].ds_addr);
2041 				}
2042 
2043 				pfn = phys_addr >> PAGE_SHIFT;
2044 				prplist->gpa_page[i] = pfn;
2045 			}
2046 			reqp->prp_cnt = i;
2047 		} else {
2048 			phys_addr = vtophys(storvsc_sglist[0].ds_addr);
2049 
2050 			prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK;
2051 
2052 			for (i = 0; i < storvsc_sg_count; i++) {
2053 				phys_addr = vtophys(storvsc_sglist[i].ds_addr);
2054 				pfn = phys_addr >> PAGE_SHIFT;
2055 				prplist->gpa_page[i] = pfn;
2056 			}
2057 			reqp->prp_cnt = i;
2058 
2059 			/* check the last segment cross boundary or not */
2060 			offset = phys_addr & PAGE_MASK;
2061 			if (offset) {
2062 				/* Add one more PRP entry */
2063 				phys_addr =
2064 				    vtophys(storvsc_sglist[i-1].ds_addr +
2065 				    PAGE_SIZE - offset);
2066 				pfn = phys_addr >> PAGE_SHIFT;
2067 				prplist->gpa_page[i] = pfn;
2068 				reqp->prp_cnt++;
2069 			}
2070 
2071 			reqp->bounce_sgl_count = 0;
2072 		}
2073 		reqp->softc->sysctl_data.data_sg_cnt++;
2074 		break;
2075 	}
2076 	default:
2077 		printf("Unknow flags: %d\n", ccb->ccb_h.flags);
2078 		return(EINVAL);
2079 	}
2080 
2081 	return(0);
2082 }
2083 
2084 static uint32_t
2085 is_scsi_valid(const struct scsi_inquiry_data *inq_data)
2086 {
2087 	u_int8_t type;
2088 
2089 	type = SID_TYPE(inq_data);
2090 	if (type == T_NODEVICE)
2091 		return (0);
2092 	if (SID_QUAL(inq_data) == SID_QUAL_BAD_LU)
2093 		return (0);
2094 	return (1);
2095 }
2096 
2097 /**
2098  * @brief completion function before returning to CAM
2099  *
2100  * I/O process has been completed and the result needs
2101  * to be passed to the CAM layer.
2102  * Free resources related to this request.
2103  *
2104  * @param reqp pointer to a request structure
2105  */
2106 static void
2107 storvsc_io_done(struct hv_storvsc_request *reqp)
2108 {
2109 	union ccb *ccb = reqp->ccb;
2110 	struct ccb_scsiio *csio = &ccb->csio;
2111 	struct storvsc_softc *sc = reqp->softc;
2112 	struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb;
2113 	bus_dma_segment_t *ori_sglist = NULL;
2114 	int ori_sg_count = 0;
2115 	const struct scsi_generic *cmd;
2116 
2117 	/* destroy bounce buffer if it is used */
2118 	if (reqp->bounce_sgl_count) {
2119 		ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr;
2120 		ori_sg_count = ccb->csio.sglist_cnt;
2121 
2122 		/*
2123 		 * If it is READ operation, we should copy back the data
2124 		 * to original SG list.
2125 		 */
2126 		if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
2127 			storvsc_copy_from_bounce_buf_to_sgl(ori_sglist,
2128 			    ori_sg_count,
2129 			    reqp->bounce_sgl,
2130 			    reqp->not_aligned_seg_bits);
2131 		}
2132 
2133 		storvsc_destroy_bounce_buffer(reqp->bounce_sgl);
2134 		reqp->bounce_sgl_count = 0;
2135 	}
2136 
2137 	if (reqp->retries > 0) {
2138 		mtx_lock(&sc->hs_lock);
2139 #if HVS_TIMEOUT_TEST
2140 		xpt_print(ccb->ccb_h.path,
2141 			"%u: IO returned after timeout, "
2142 			"waking up timer handler if any.\n", ticks);
2143 		mtx_lock(&reqp->event.mtx);
2144 		cv_signal(&reqp->event.cv);
2145 		mtx_unlock(&reqp->event.mtx);
2146 #endif
2147 		reqp->retries = 0;
2148 		xpt_print(ccb->ccb_h.path,
2149 			"%u: IO returned after timeout, "
2150 			"stopping timer if any.\n", ticks);
2151 		mtx_unlock(&sc->hs_lock);
2152 	}
2153 
2154 #ifdef notyet
2155 	/*
2156 	 * callout_drain() will wait for the timer handler to finish
2157 	 * if it is running. So we don't need any lock to synchronize
2158 	 * between this routine and the timer handler.
2159 	 * Note that we need to make sure reqp is not freed when timer
2160 	 * handler is using or will use it.
2161 	 */
2162 	if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
2163 		callout_drain(&reqp->callout);
2164 	}
2165 #endif
2166 	cmd = (const struct scsi_generic *)
2167 	    ((ccb->ccb_h.flags & CAM_CDB_POINTER) ?
2168 	     csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes);
2169 
2170 	ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
2171 	ccb->ccb_h.status &= ~CAM_STATUS_MASK;
2172 	int srb_status = SRB_STATUS(vm_srb->srb_status);
2173 #ifdef DIAGNOSTIC
2174 	if (hv_storvsc_srb_status != -1) {
2175 		srb_status = SRB_STATUS(hv_storvsc_srb_status & 0x3f);
2176 		hv_storvsc_srb_status = -1;
2177 	}
2178 #endif /* DIAGNOSTIC */
2179 	if (vm_srb->scsi_status == SCSI_STATUS_OK) {
2180 		if (srb_status != SRB_STATUS_SUCCESS) {
2181 			bool log_error = true;
2182 			switch (srb_status) {
2183 				case SRB_STATUS_PENDING:
2184 					/* We should never get this */
2185 					panic("storvsc_io_done: SRB_STATUS_PENDING");
2186 					break;
2187 				case SRB_STATUS_ABORTED:
2188 					/*
2189 					 * storvsc doesn't support aborts yet
2190 					 * but if we ever get this status
2191 					 * the I/O is complete - treat it as a
2192 					 * timeout
2193 					 */
2194 					ccb->ccb_h.status |= CAM_CMD_TIMEOUT;
2195 					break;
2196 				case SRB_STATUS_ABORT_FAILED:
2197 					/* We should never get this */
2198 					panic("storvsc_io_done: SRB_STATUS_ABORT_FAILED");
2199 					break;
2200 				case SRB_STATUS_ERROR:
2201 					/*
2202 					 * We should never get this.
2203 					 * Treat it as a CAM_UNREC_HBA_ERROR.
2204 					 * It will be retried
2205 					 */
2206 					ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
2207 					break;
2208 				case SRB_STATUS_BUSY:
2209 					/* Host is busy. Delay and retry */
2210 					ccb->ccb_h.status |= CAM_BUSY;
2211 					break;
2212 				case SRB_STATUS_INVALID_REQUEST:
2213 				case SRB_STATUS_INVALID_PATH_ID:
2214 				case SRB_STATUS_NO_DEVICE:
2215 				case SRB_STATUS_INVALID_TARGET_ID:
2216 					/*
2217 					 * These indicate an invalid address
2218 					 * and really should never be seen.
2219 					 * A CAM_PATH_INVALID could be
2220 					 * used here but I want to run
2221 					 * down retries.  Do a CAM_BUSY
2222 					 * since the host might be having issues.
2223 					 */
2224 					ccb->ccb_h.status |= CAM_BUSY;
2225 					break;
2226 				case SRB_STATUS_TIMEOUT:
2227 				case SRB_STATUS_COMMAND_TIMEOUT:
2228 					/* The backend has timed this out */
2229 					ccb->ccb_h.status |= CAM_BUSY;
2230 					break;
2231 				/* Some old pSCSI errors below */
2232 				case SRB_STATUS_SELECTION_TIMEOUT:
2233 				case SRB_STATUS_MESSAGE_REJECTED:
2234 				case SRB_STATUS_PARITY_ERROR:
2235 				case SRB_STATUS_NO_HBA:
2236 				case SRB_STATUS_DATA_OVERRUN:
2237 				case SRB_STATUS_UNEXPECTED_BUS_FREE:
2238 				case SRB_STATUS_PHASE_SEQUENCE_FAILURE:
2239 					/*
2240 					 * Old pSCSI responses, should never get.
2241 					 * If we do treat as a CAM_UNREC_HBA_ERROR
2242 					 * which will be retried
2243 					 */
2244 					ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
2245 					break;
2246 				case SRB_STATUS_BUS_RESET:
2247 					ccb->ccb_h.status |= CAM_SCSI_BUS_RESET;
2248 					break;
2249 				case SRB_STATUS_BAD_SRB_BLOCK_LENGTH:
2250 					/*
2251 					 * The request block is malformed and
2252 					 * I doubt it is from the guest. Just retry.
2253 					 */
2254 					ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
2255 					break;
2256 				/* Not used statuses just retry */
2257 				case SRB_STATUS_REQUEST_FLUSHED:
2258 				case SRB_STATUS_BAD_FUNCTION:
2259 				case SRB_STATUS_NOT_POWERED:
2260 					ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
2261 					break;
2262 				case SRB_STATUS_INVALID_LUN:
2263 					/*
2264 					 * Don't log an EMS for this response since
2265 					 * there is no device at this LUN. This is a
2266 					 * normal and expected response when a device
2267 					 * is detached.
2268 					 */
2269 					ccb->ccb_h.status |= CAM_DEV_NOT_THERE;
2270 					log_error = false;
2271 					break;
2272 				case SRB_STATUS_ERROR_RECOVERY:
2273 				case SRB_STATUS_LINK_DOWN:
2274 					/*
2275 					 * I don't ever expect these from
2276 					 * the host but if we ever get
2277 					 * retry after a delay
2278 					 */
2279 					ccb->ccb_h.status |= CAM_BUSY;
2280 					break;
2281 				default:
2282 					/*
2283 					 * An undefined response assert on
2284 					 * on debug builds else retry
2285 					 */
2286 					ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
2287 					KASSERT(srb_status <= SRB_STATUS_LINK_DOWN,
2288 					    ("storvsc: %s, unexpected srb_status of 0x%x",
2289 					    __func__, srb_status));
2290 					break;
2291 			}
2292 			if (log_error) {
2293 				xpt_print(ccb->ccb_h.path, "The hypervisor's I/O adapter "
2294 					"driver received an unexpected response code 0x%x "
2295 					"for operation: %s. If this continues to occur, "
2296 					"report the condition to your hypervisor vendor so "
2297 					"they can rectify the issue.\n", srb_status,
2298 					scsi_op_desc(cmd->opcode, NULL));
2299 			}
2300 		} else {
2301 			ccb->ccb_h.status |= CAM_REQ_CMP;
2302 		}
2303 
2304 		if (cmd->opcode == INQUIRY &&
2305 		    srb_status == SRB_STATUS_SUCCESS) {
2306 			int resp_xfer_len, resp_buf_len, data_len;
2307 			uint8_t *resp_buf = (uint8_t *)csio->data_ptr;
2308 			struct scsi_inquiry_data *inq_data =
2309 			    (struct scsi_inquiry_data *)csio->data_ptr;
2310 
2311 			/* Get the buffer length reported by host */
2312 			resp_xfer_len = vm_srb->transfer_len;
2313 
2314 			/* Get the available buffer length */
2315 			resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0;
2316 			data_len = (resp_buf_len < resp_xfer_len) ?
2317 			    resp_buf_len : resp_xfer_len;
2318 			if (bootverbose && data_len >= 5) {
2319 				xpt_print(ccb->ccb_h.path, "storvsc inquiry "
2320 				    "(%d) [%x %x %x %x %x ... ]\n", data_len,
2321 				    resp_buf[0], resp_buf[1], resp_buf[2],
2322 				    resp_buf[3], resp_buf[4]);
2323 			}
2324 			/*
2325 			 * XXX: Hyper-V (since win2012r2) responses inquiry with
2326 			 * unknown version (0) for GEN-2 DVD device.
2327 			 * Manually set the version number to SPC3 in order to
2328 			 * ask CAM to continue probing with "PROBE_REPORT_LUNS".
2329 			 * see probedone() in scsi_xpt.c
2330 			 */
2331 			if (SID_TYPE(inq_data) == T_CDROM &&
2332 			    inq_data->version == 0 &&
2333 			    (vmstor_proto_version >= VMSTOR_PROTOCOL_VERSION_WIN8)) {
2334 				inq_data->version = SCSI_REV_SPC3;
2335 				if (bootverbose) {
2336 					xpt_print(ccb->ccb_h.path,
2337 					    "set version from 0 to %d\n",
2338 					    inq_data->version);
2339 				}
2340 			}
2341 			/*
2342 			 * XXX: Manually fix the wrong response returned from WS2012
2343 			 */
2344 			if (!is_scsi_valid(inq_data) &&
2345 			    (vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
2346 			    vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8 ||
2347 			    vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN7)) {
2348 				if (data_len >= 4 &&
2349 				    (resp_buf[2] == 0 || resp_buf[3] == 0)) {
2350 					resp_buf[2] = SCSI_REV_SPC3;
2351 					resp_buf[3] = 2; // resp fmt must be 2
2352 					if (bootverbose)
2353 						xpt_print(ccb->ccb_h.path,
2354 						    "fix version and resp fmt for 0x%x\n",
2355 						    vmstor_proto_version);
2356 				}
2357 			} else if (data_len >= SHORT_INQUIRY_LENGTH) {
2358 				char vendor[16];
2359 
2360 				cam_strvis(vendor, inq_data->vendor,
2361 				    sizeof(inq_data->vendor), sizeof(vendor));
2362 				/*
2363 				 * XXX: Upgrade SPC2 to SPC3 if host is WIN8 or
2364 				 * WIN2012 R2 in order to support UNMAP feature.
2365 				 */
2366 				if (!strncmp(vendor, "Msft", 4) &&
2367 				    SID_ANSI_REV(inq_data) == SCSI_REV_SPC2 &&
2368 				    (vmstor_proto_version ==
2369 				     VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
2370 				     vmstor_proto_version ==
2371 				     VMSTOR_PROTOCOL_VERSION_WIN8)) {
2372 					inq_data->version = SCSI_REV_SPC3;
2373 					if (bootverbose) {
2374 						xpt_print(ccb->ccb_h.path,
2375 						    "storvsc upgrades "
2376 						    "SPC2 to SPC3\n");
2377 					}
2378 				}
2379 			}
2380 		}
2381 	} else {
2382 		/**
2383 		 * On Some Windows hosts TEST_UNIT_READY command can return
2384 		 * SRB_STATUS_ERROR and sense data, for example, asc=0x3a,1
2385 		 * "(Medium not present - tray closed)". This error can be
2386 		 * ignored since it will be sent to host periodically.
2387 		 */
2388 		boolean_t unit_not_ready = \
2389 		    vm_srb->scsi_status == SCSI_STATUS_CHECK_COND &&
2390 		    cmd->opcode == TEST_UNIT_READY &&
2391 		    srb_status == SRB_STATUS_ERROR;
2392 		if (!unit_not_ready && bootverbose) {
2393 			mtx_lock(&sc->hs_lock);
2394 			xpt_print(ccb->ccb_h.path,
2395 				"storvsc scsi_status = %d, srb_status = %d\n",
2396 				vm_srb->scsi_status, srb_status);
2397 			mtx_unlock(&sc->hs_lock);
2398 		}
2399 		ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR;
2400 	}
2401 
2402 	ccb->csio.scsi_status = (vm_srb->scsi_status & 0xFF);
2403 	if (srb_status == SRB_STATUS_SUCCESS ||
2404 	    srb_status == SRB_STATUS_DATA_OVERRUN)
2405 		ccb->csio.resid = ccb->csio.dxfer_len - vm_srb->transfer_len;
2406 	else
2407 		ccb->csio.resid = ccb->csio.dxfer_len;
2408 
2409 	if ((vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) != 0 &&
2410 	    reqp->sense_info_len != 0) {
2411 		csio->sense_resid = csio->sense_len - reqp->sense_info_len;
2412 		ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
2413 	}
2414 
2415 	mtx_lock(&sc->hs_lock);
2416 	if (reqp->softc->hs_frozen == 1) {
2417 		xpt_print(ccb->ccb_h.path,
2418 			"%u: storvsc unfreezing softc 0x%p.\n",
2419 			ticks, reqp->softc);
2420 		ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
2421 		reqp->softc->hs_frozen = 0;
2422 	}
2423 	storvsc_free_request(sc, reqp);
2424 	mtx_unlock(&sc->hs_lock);
2425 
2426 	xpt_done_direct(ccb);
2427 }
2428 
2429 /**
2430  * @brief Free a request structure
2431  *
2432  * Free a request structure by returning it to the free list
2433  *
2434  * @param sc pointer to a softc
2435  * @param reqp pointer to a request structure
2436  */
2437 static void
2438 storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp)
2439 {
2440 
2441 	LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
2442 }
2443 
2444 /**
2445  * @brief Determine type of storage device from GUID
2446  *
2447  * Using the type GUID, determine if this is a StorVSC (paravirtual
2448  * SCSI or BlkVSC (paravirtual IDE) device.
2449  *
2450  * @param dev a device
2451  * returns an enum
2452  */
2453 static enum hv_storage_type
2454 storvsc_get_storage_type(device_t dev)
2455 {
2456 	device_t parent = device_get_parent(dev);
2457 
2458 	if (VMBUS_PROBE_GUID(parent, dev, &gBlkVscDeviceType) == 0)
2459 		return DRIVER_BLKVSC;
2460 	if (VMBUS_PROBE_GUID(parent, dev, &gStorVscDeviceType) == 0)
2461 		return DRIVER_STORVSC;
2462 	return DRIVER_UNKNOWN;
2463 }
2464 
2465 #define	PCI_VENDOR_INTEL	0x8086
2466 #define	PCI_PRODUCT_PIIX4	0x7111
2467 
2468 static void
2469 storvsc_ada_probe_veto(void *arg __unused, struct cam_path *path,
2470     struct ata_params *ident_buf __unused, int *veto)
2471 {
2472 
2473 	/*
2474 	 * The ATA disks are shared with the controllers managed
2475 	 * by this driver, so veto the ATA disks' attachment; the
2476 	 * ATA disks will be attached as SCSI disks once this driver
2477 	 * attached.
2478 	 */
2479 	if (path->device->protocol == PROTO_ATA) {
2480 		struct ccb_pathinq cpi;
2481 
2482 		xpt_path_inq(&cpi, path);
2483 		if (cpi.ccb_h.status == CAM_REQ_CMP &&
2484 		    cpi.hba_vendor == PCI_VENDOR_INTEL &&
2485 		    cpi.hba_device == PCI_PRODUCT_PIIX4) {
2486 			(*veto)++;
2487 			if (bootverbose) {
2488 				xpt_print(path,
2489 				    "Disable ATA disks on "
2490 				    "simulated ATA controller (0x%04x%04x)\n",
2491 				    cpi.hba_device, cpi.hba_vendor);
2492 			}
2493 		}
2494 	}
2495 }
2496 
2497 static void
2498 storvsc_sysinit(void *arg __unused)
2499 {
2500 	if (vm_guest == VM_GUEST_HV) {
2501 		storvsc_handler_tag = EVENTHANDLER_REGISTER(ada_probe_veto,
2502 		    storvsc_ada_probe_veto, NULL, EVENTHANDLER_PRI_ANY);
2503 	}
2504 }
2505 SYSINIT(storvsc_sys_init, SI_SUB_DRIVERS, SI_ORDER_SECOND, storvsc_sysinit,
2506     NULL);
2507 
2508 static void
2509 storvsc_sysuninit(void *arg __unused)
2510 {
2511 	if (storvsc_handler_tag != NULL)
2512 		EVENTHANDLER_DEREGISTER(ada_probe_veto, storvsc_handler_tag);
2513 }
2514 SYSUNINIT(storvsc_sys_uninit, SI_SUB_DRIVERS, SI_ORDER_SECOND,
2515     storvsc_sysuninit, NULL);
2516