1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
5  * Copyright (c) 2012 NetApp Inc.
6  * Copyright (c) 2012 Citrix Inc.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice unmodified, this list of conditions, and the following
14  *    disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 /**
32  * StorVSC driver for Hyper-V.  This driver presents a SCSI HBA interface
33  * to the Comman Access Method (CAM) layer.  CAM control blocks (CCBs) are
34  * converted into VSCSI protocol messages which are delivered to the parent
35  * partition StorVSP driver over the Hyper-V VMBUS.
36  */
37 
38 #include <sys/param.h>
39 #include <sys/proc.h>
40 #include <sys/condvar.h>
41 #include <sys/time.h>
42 #include <sys/systm.h>
43 #include <sys/sysctl.h>
44 #include <sys/sockio.h>
45 #include <sys/mbuf.h>
46 #include <sys/malloc.h>
47 #include <sys/module.h>
48 #include <sys/kernel.h>
49 #include <sys/queue.h>
50 #include <sys/lock.h>
51 #include <sys/sx.h>
52 #include <sys/taskqueue.h>
53 #include <sys/bus.h>
54 #include <sys/mutex.h>
55 #include <sys/callout.h>
56 #include <sys/smp.h>
57 #include <vm/vm.h>
58 #include <vm/pmap.h>
59 #include <vm/uma.h>
60 #include <sys/lock.h>
61 #include <sys/sema.h>
62 #include <sys/eventhandler.h>
63 #include <machine/bus.h>
64 
65 #include <cam/cam.h>
66 #include <cam/cam_ccb.h>
67 #include <cam/cam_periph.h>
68 #include <cam/cam_sim.h>
69 #include <cam/cam_xpt_sim.h>
70 #include <cam/cam_xpt_internal.h>
71 #include <cam/cam_debug.h>
72 #include <cam/scsi/scsi_all.h>
73 #include <cam/scsi/scsi_message.h>
74 
75 #include <dev/hyperv/include/hyperv.h>
76 #include <dev/hyperv/include/vmbus.h>
77 #include "hv_vstorage.h"
78 #include "vmbus_if.h"
79 
80 #define STORVSC_MAX_LUNS_PER_TARGET	(64)
81 #define STORVSC_MAX_IO_REQUESTS		(STORVSC_MAX_LUNS_PER_TARGET * 2)
82 #define BLKVSC_MAX_IDE_DISKS_PER_TARGET	(1)
83 #define BLKVSC_MAX_IO_REQUESTS		STORVSC_MAX_IO_REQUESTS
84 #define STORVSC_MAX_TARGETS		(2)
85 
86 #define VSTOR_PKT_SIZE	(sizeof(struct vstor_packet) - vmscsi_size_delta)
87 
88 /*
89  * 33 segments are needed to allow 128KB maxio, in case the data
90  * in the first page is _not_ PAGE_SIZE aligned, e.g.
91  *
92  *     |<----------- 128KB ----------->|
93  *     |                               |
94  *  0  2K 4K    8K   16K   124K  128K  130K
95  *  |  |  |     |     |       |     |  |
96  *  +--+--+-----+-----+.......+-----+--+--+
97  *  |  |  |     |     |       |     |  |  | DATA
98  *  |  |  |     |     |       |     |  |  |
99  *  +--+--+-----+-----+.......------+--+--+
100  *     |  |                         |  |
101  *     | 1|            31           | 1| ...... # of segments
102  */
103 #define STORVSC_DATA_SEGCNT_MAX		33
104 #define STORVSC_DATA_SEGSZ_MAX		PAGE_SIZE
105 #define STORVSC_DATA_SIZE_MAX		\
106 	((STORVSC_DATA_SEGCNT_MAX - 1) * STORVSC_DATA_SEGSZ_MAX)
107 
108 struct storvsc_softc;
109 
110 struct hv_sglist {
111 	struct iovec sg_iov[STORVSC_DATA_SEGCNT_MAX];
112 	u_short	sg_nseg;
113 	u_short	sg_maxseg;
114 };
115 
116 struct hv_sgl_node {
117 	LIST_ENTRY(hv_sgl_node) link;
118 	struct hv_sglist *sgl_data;
119 };
120 
121 struct hv_sgl_page_pool{
122 	LIST_HEAD(, hv_sgl_node) in_use_sgl_list;
123 	LIST_HEAD(, hv_sgl_node) free_sgl_list;
124 	boolean_t                is_init;
125 } g_hv_sgl_page_pool;
126 
127 enum storvsc_request_type {
128 	WRITE_TYPE,
129 	READ_TYPE,
130 	UNKNOWN_TYPE
131 };
132 
133 SYSCTL_NODE(_hw, OID_AUTO, storvsc, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
134 	"Hyper-V storage interface");
135 
136 static u_int hv_storvsc_use_win8ext_flags = 1;
137 SYSCTL_UINT(_hw_storvsc, OID_AUTO, use_win8ext_flags, CTLFLAG_RW,
138 	&hv_storvsc_use_win8ext_flags, 0,
139 	"Use win8 extension flags or not");
140 
141 static u_int hv_storvsc_use_pim_unmapped = 1;
142 SYSCTL_UINT(_hw_storvsc, OID_AUTO, use_pim_unmapped, CTLFLAG_RDTUN,
143 	&hv_storvsc_use_pim_unmapped, 0,
144 	"Optimize storvsc by using unmapped I/O");
145 
146 static u_int hv_storvsc_ringbuffer_size = (64 * PAGE_SIZE);
147 SYSCTL_UINT(_hw_storvsc, OID_AUTO, ringbuffer_size, CTLFLAG_RDTUN,
148 	&hv_storvsc_ringbuffer_size, 0, "Hyper-V storage ringbuffer size");
149 
150 static u_int hv_storvsc_max_io = 512;
151 SYSCTL_UINT(_hw_storvsc, OID_AUTO, max_io, CTLFLAG_RDTUN,
152 	&hv_storvsc_max_io, 0, "Hyper-V storage max io limit");
153 
154 static int hv_storvsc_chan_cnt = 0;
155 SYSCTL_INT(_hw_storvsc, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
156 	&hv_storvsc_chan_cnt, 0, "# of channels to use");
157 #ifdef DIAGNOSTIC
158 static int hv_storvsc_srb_status = -1;
159 SYSCTL_INT(_hw_storvsc, OID_AUTO, srb_status,  CTLFLAG_RW,
160 	&hv_storvsc_srb_status, 0, "srb_status to inject");
161 TUNABLE_INT("hw_storvsc.srb_status", &hv_storvsc_srb_status);
162 #endif /* DIAGNOSTIC */
163 
164 #define STORVSC_MAX_IO						\
165 	vmbus_chan_prplist_nelem(hv_storvsc_ringbuffer_size,	\
166 	   STORVSC_DATA_SEGCNT_MAX, VSTOR_PKT_SIZE)
167 
168 struct hv_storvsc_sysctl {
169 	u_long		data_bio_cnt;
170 	u_long		data_vaddr_cnt;
171 	u_long		data_sg_cnt;
172 	u_long		chan_send_cnt[MAXCPU];
173 };
174 
175 struct storvsc_gpa_range {
176 	struct vmbus_gpa_range	gpa_range;
177 	uint64_t		gpa_page[STORVSC_DATA_SEGCNT_MAX];
178 } __packed;
179 
180 struct hv_storvsc_request {
181 	LIST_ENTRY(hv_storvsc_request)	link;
182 	struct vstor_packet		vstor_packet;
183 	int				prp_cnt;
184 	struct storvsc_gpa_range	prp_list;
185 	void				*sense_data;
186 	uint8_t				sense_info_len;
187 	uint8_t				retries;
188 	union ccb			*ccb;
189 	struct storvsc_softc		*softc;
190 	struct callout			callout;
191 	struct sema			synch_sema; /*Synchronize the request/response if needed */
192 	struct hv_sglist		*bounce_sgl;
193 	unsigned int			bounce_sgl_count;
194 	uint64_t			not_aligned_seg_bits;
195 	bus_dmamap_t			data_dmap;
196 };
197 
198 struct storvsc_softc {
199 	struct vmbus_channel		*hs_chan;
200 	LIST_HEAD(, hv_storvsc_request)	hs_free_list;
201 	struct mtx			hs_lock;
202 	struct storvsc_driver_props	*hs_drv_props;
203 	int 				hs_unit;
204 	uint32_t			hs_frozen;
205 	struct cam_sim			*hs_sim;
206 	struct cam_path 		*hs_path;
207 	uint32_t			hs_num_out_reqs;
208 	boolean_t			hs_destroy;
209 	boolean_t			hs_drain_notify;
210 	struct sema 			hs_drain_sema;
211 	struct hv_storvsc_request	hs_init_req;
212 	struct hv_storvsc_request	hs_reset_req;
213 	device_t			hs_dev;
214 	bus_dma_tag_t			storvsc_req_dtag;
215 	struct hv_storvsc_sysctl	sysctl_data;
216 	uint32_t			hs_nchan;
217 	struct vmbus_channel		*hs_sel_chan[MAXCPU];
218 };
219 
220 static eventhandler_tag storvsc_handler_tag;
221 /*
222  * The size of the vmscsi_request has changed in win8. The
223  * additional size is for the newly added elements in the
224  * structure. These elements are valid only when we are talking
225  * to a win8 host.
226  * Track the correct size we need to apply.
227  */
228 static int vmscsi_size_delta = sizeof(struct vmscsi_win8_extension);
229 
230 /**
231  * HyperV storvsc timeout testing cases:
232  * a. IO returned after first timeout;
233  * b. IO returned after second timeout and queue freeze;
234  * c. IO returned while timer handler is running
235  * The first can be tested by "sg_senddiag -vv /dev/daX",
236  * and the second and third can be done by
237  * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX".
238  */
239 #define HVS_TIMEOUT_TEST 0
240 
241 /*
242  * Bus/adapter reset functionality on the Hyper-V host is
243  * buggy and it will be disabled until
244  * it can be further tested.
245  */
246 #define HVS_HOST_RESET 0
247 
248 struct storvsc_driver_props {
249 	char		*drv_name;
250 	char		*drv_desc;
251 	uint8_t		drv_max_luns_per_target;
252 	uint32_t	drv_max_ios_per_target;
253 	uint32_t	drv_ringbuffer_size;
254 };
255 
256 enum hv_storage_type {
257 	DRIVER_BLKVSC,
258 	DRIVER_STORVSC,
259 	DRIVER_UNKNOWN
260 };
261 
262 #define HS_MAX_ADAPTERS 10
263 
264 #define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1
265 
266 /* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */
267 static const struct hyperv_guid gStorVscDeviceType={
268 	.hv_guid = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
269 		 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f}
270 };
271 
272 /* {32412632-86cb-44a2-9b5c-50d1417354f5} */
273 static const struct hyperv_guid gBlkVscDeviceType={
274 	.hv_guid = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,
275 		 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5}
276 };
277 
278 static struct storvsc_driver_props g_drv_props_table[] = {
279 	{"blkvsc", "Hyper-V IDE",
280 	 BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS,
281 	 20*PAGE_SIZE},
282 	{"storvsc", "Hyper-V SCSI",
283 	 STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS,
284 	 20*PAGE_SIZE}
285 };
286 
287 /*
288  * Sense buffer size changed in win8; have a run-time
289  * variable to track the size we should use.
290  */
291 static int sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE;
292 
293 /*
294  * The storage protocol version is determined during the
295  * initial exchange with the host.  It will indicate which
296  * storage functionality is available in the host.
297 */
298 static int vmstor_proto_version;
299 
300 struct vmstor_proto {
301         int proto_version;
302         int sense_buffer_size;
303         int vmscsi_size_delta;
304 };
305 
306 static const struct vmstor_proto vmstor_proto_list[] = {
307         {
308                 VMSTOR_PROTOCOL_VERSION_WIN10,
309                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
310                 0
311         },
312         {
313                 VMSTOR_PROTOCOL_VERSION_WIN8_1,
314                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
315                 0
316         },
317         {
318                 VMSTOR_PROTOCOL_VERSION_WIN8,
319                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
320                 0
321         },
322         {
323                 VMSTOR_PROTOCOL_VERSION_WIN7,
324                 PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
325                 sizeof(struct vmscsi_win8_extension),
326         },
327         {
328                 VMSTOR_PROTOCOL_VERSION_WIN6,
329                 PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
330                 sizeof(struct vmscsi_win8_extension),
331         }
332 };
333 
334 /* static functions */
335 static int storvsc_probe(device_t dev);
336 static int storvsc_attach(device_t dev);
337 static int storvsc_detach(device_t dev);
338 static void storvsc_poll(struct cam_sim * sim);
339 static void storvsc_action(struct cam_sim * sim, union ccb * ccb);
340 static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
341 static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp);
342 static enum hv_storage_type storvsc_get_storage_type(device_t dev);
343 static void hv_storvsc_rescan_target(struct storvsc_softc *sc);
344 static void hv_storvsc_on_channel_callback(struct vmbus_channel *chan, void *xsc);
345 static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc,
346 					struct vstor_packet *vstor_packet,
347 					struct hv_storvsc_request *request);
348 static int hv_storvsc_connect_vsp(struct storvsc_softc *);
349 static void storvsc_io_done(struct hv_storvsc_request *reqp);
350 static void storvsc_copy_sgl_to_bounce_buf(struct hv_sglist *bounce_sgl,
351 				bus_dma_segment_t *orig_sgl,
352 				unsigned int orig_sgl_count,
353 				uint64_t seg_bits);
354 void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
355 				unsigned int dest_sgl_count,
356 				struct hv_sglist *src_sgl,
357 				uint64_t seg_bits);
358 
359 static device_method_t storvsc_methods[] = {
360 	/* Device interface */
361 	DEVMETHOD(device_probe,		storvsc_probe),
362 	DEVMETHOD(device_attach,	storvsc_attach),
363 	DEVMETHOD(device_detach,	storvsc_detach),
364 	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
365 	DEVMETHOD_END
366 };
367 
368 static driver_t storvsc_driver = {
369 	"storvsc", storvsc_methods, sizeof(struct storvsc_softc),
370 };
371 
372 DRIVER_MODULE(storvsc, vmbus, storvsc_driver, 0, 0);
373 MODULE_VERSION(storvsc, 1);
374 MODULE_DEPEND(storvsc, vmbus, 1, 1, 1);
375 
376 static void
377 storvsc_subchan_attach(struct storvsc_softc *sc,
378     struct vmbus_channel *new_channel)
379 {
380 	struct vmstor_chan_props props;
381 
382 	memset(&props, 0, sizeof(props));
383 
384 	vmbus_chan_cpu_rr(new_channel);
385 	vmbus_chan_open(new_channel,
386 	    sc->hs_drv_props->drv_ringbuffer_size,
387   	    sc->hs_drv_props->drv_ringbuffer_size,
388 	    (void *)&props,
389 	    sizeof(struct vmstor_chan_props),
390 	    hv_storvsc_on_channel_callback, sc);
391 }
392 
393 /**
394  * @brief Send multi-channel creation request to host
395  *
396  * @param device  a Hyper-V device pointer
397  * @param max_chans  the max channels supported by vmbus
398  */
399 static void
400 storvsc_send_multichannel_request(struct storvsc_softc *sc, int max_subch)
401 {
402 	struct vmbus_channel **subchan;
403 	struct hv_storvsc_request *request;
404 	struct vstor_packet *vstor_packet;
405 	int request_subch;
406 	int i;
407 
408 	/* get sub-channel count that need to create */
409 	request_subch = MIN(max_subch, mp_ncpus - 1);
410 
411 	request = &sc->hs_init_req;
412 
413 	/* request the host to create multi-channel */
414 	memset(request, 0, sizeof(struct hv_storvsc_request));
415 
416 	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
417 
418 	vstor_packet = &request->vstor_packet;
419 
420 	vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS;
421 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
422 	vstor_packet->u.multi_channels_cnt = request_subch;
423 
424 	vmbus_chan_send(sc->hs_chan,
425 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
426 	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
427 
428 	sema_wait(&request->synch_sema);
429 
430 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
431 	    vstor_packet->status != 0) {
432 		printf("Storvsc_error: create multi-channel invalid operation "
433 		    "(%d) or statue (%u)\n",
434 		    vstor_packet->operation, vstor_packet->status);
435 		return;
436 	}
437 
438 	/* Update channel count */
439 	sc->hs_nchan = request_subch + 1;
440 
441 	/* Wait for sub-channels setup to complete. */
442 	subchan = vmbus_subchan_get(sc->hs_chan, request_subch);
443 
444 	/* Attach the sub-channels. */
445 	for (i = 0; i < request_subch; ++i)
446 		storvsc_subchan_attach(sc, subchan[i]);
447 
448 	/* Release the sub-channels. */
449 	vmbus_subchan_rel(subchan, request_subch);
450 
451 	if (bootverbose)
452 		printf("Storvsc create multi-channel success!\n");
453 }
454 
455 /**
456  * @brief initialize channel connection to parent partition
457  *
458  * @param dev  a Hyper-V device pointer
459  * @returns  0 on success, non-zero error on failure
460  */
461 static int
462 hv_storvsc_channel_init(struct storvsc_softc *sc)
463 {
464 	int ret = 0, i;
465 	struct hv_storvsc_request *request;
466 	struct vstor_packet *vstor_packet;
467 	uint16_t max_subch;
468 	boolean_t support_multichannel;
469 	uint32_t version;
470 
471 	max_subch = 0;
472 	support_multichannel = FALSE;
473 
474 	request = &sc->hs_init_req;
475 	memset(request, 0, sizeof(struct hv_storvsc_request));
476 	vstor_packet = &request->vstor_packet;
477 	request->softc = sc;
478 
479 	/**
480 	 * Initiate the vsc/vsp initialization protocol on the open channel
481 	 */
482 	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
483 
484 	vstor_packet->operation = VSTOR_OPERATION_BEGININITIALIZATION;
485 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
486 
487 
488 	ret = vmbus_chan_send(sc->hs_chan,
489 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
490 	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
491 
492 	if (ret != 0)
493 		goto cleanup;
494 
495 	sema_wait(&request->synch_sema);
496 
497 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
498 		vstor_packet->status != 0) {
499 		goto cleanup;
500 	}
501 
502 	for (i = 0; i < nitems(vmstor_proto_list); i++) {
503 		/* reuse the packet for version range supported */
504 
505 		memset(vstor_packet, 0, sizeof(struct vstor_packet));
506 		vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION;
507 		vstor_packet->flags = REQUEST_COMPLETION_FLAG;
508 
509 		vstor_packet->u.version.major_minor =
510 			vmstor_proto_list[i].proto_version;
511 
512 		/* revision is only significant for Windows guests */
513 		vstor_packet->u.version.revision = 0;
514 
515 		ret = vmbus_chan_send(sc->hs_chan,
516 		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
517 		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
518 
519 		if (ret != 0)
520 			goto cleanup;
521 
522 		sema_wait(&request->synch_sema);
523 
524 		if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO) {
525 			ret = EINVAL;
526 			goto cleanup;
527 		}
528 		if (vstor_packet->status == 0) {
529 			vmstor_proto_version =
530 				vmstor_proto_list[i].proto_version;
531 			sense_buffer_size =
532 				vmstor_proto_list[i].sense_buffer_size;
533 			vmscsi_size_delta =
534 				vmstor_proto_list[i].vmscsi_size_delta;
535 			break;
536 		}
537 	}
538 
539 	if (vstor_packet->status != 0) {
540 		ret = EINVAL;
541 		goto cleanup;
542 	}
543 	/**
544 	 * Query channel properties
545 	 */
546 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
547 	vstor_packet->operation = VSTOR_OPERATION_QUERYPROPERTIES;
548 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
549 
550 	ret = vmbus_chan_send(sc->hs_chan,
551 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
552 	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
553 
554 	if ( ret != 0)
555 		goto cleanup;
556 
557 	sema_wait(&request->synch_sema);
558 
559 	/* TODO: Check returned version */
560 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
561 	    vstor_packet->status != 0) {
562 		goto cleanup;
563 	}
564 
565 	max_subch = vstor_packet->u.chan_props.max_channel_cnt;
566 	if (hv_storvsc_chan_cnt > 0 && hv_storvsc_chan_cnt < (max_subch + 1))
567 		max_subch = hv_storvsc_chan_cnt - 1;
568 
569 	/* multi-channels feature is supported by WIN8 and above version */
570 	version = VMBUS_GET_VERSION(device_get_parent(sc->hs_dev), sc->hs_dev);
571 	if (version != VMBUS_VERSION_WIN7 && version != VMBUS_VERSION_WS2008 &&
572 	    (vstor_packet->u.chan_props.flags &
573 	     HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) {
574 		support_multichannel = TRUE;
575 	}
576 	if (bootverbose) {
577 		device_printf(sc->hs_dev, "max chans %d%s\n", max_subch + 1,
578 		    support_multichannel ? ", multi-chan capable" : "");
579 	}
580 
581 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
582 	vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION;
583 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
584 
585 	ret = vmbus_chan_send(sc->hs_chan,
586 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
587 	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
588 
589 	if (ret != 0) {
590 		goto cleanup;
591 	}
592 
593 	sema_wait(&request->synch_sema);
594 
595 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
596 	    vstor_packet->status != 0)
597 		goto cleanup;
598 
599 	/*
600 	 * If multi-channel is supported, send multichannel create
601 	 * request to host.
602 	 */
603 	if (support_multichannel && max_subch > 0)
604 		storvsc_send_multichannel_request(sc, max_subch);
605 cleanup:
606 	sema_destroy(&request->synch_sema);
607 	return (ret);
608 }
609 
610 /**
611  * @brief Open channel connection to paraent partition StorVSP driver
612  *
613  * Open and initialize channel connection to parent partition StorVSP driver.
614  *
615  * @param pointer to a Hyper-V device
616  * @returns 0 on success, non-zero error on failure
617  */
618 static int
619 hv_storvsc_connect_vsp(struct storvsc_softc *sc)
620 {
621 	int ret = 0;
622 	struct vmstor_chan_props props;
623 
624 	memset(&props, 0, sizeof(struct vmstor_chan_props));
625 
626 	/*
627 	 * Open the channel
628 	 */
629 	vmbus_chan_cpu_rr(sc->hs_chan);
630 	ret = vmbus_chan_open(
631 		sc->hs_chan,
632 		sc->hs_drv_props->drv_ringbuffer_size,
633 		sc->hs_drv_props->drv_ringbuffer_size,
634 		(void *)&props,
635 		sizeof(struct vmstor_chan_props),
636 		hv_storvsc_on_channel_callback, sc);
637 
638 	if (ret != 0) {
639 		return ret;
640 	}
641 
642 	ret = hv_storvsc_channel_init(sc);
643 	return (ret);
644 }
645 
646 #if HVS_HOST_RESET
647 static int
648 hv_storvsc_host_reset(struct storvsc_softc *sc)
649 {
650 	int ret = 0;
651 
652 	struct hv_storvsc_request *request;
653 	struct vstor_packet *vstor_packet;
654 
655 	request = &sc->hs_reset_req;
656 	request->softc = sc;
657 	vstor_packet = &request->vstor_packet;
658 
659 	sema_init(&request->synch_sema, 0, "stor synch sema");
660 
661 	vstor_packet->operation = VSTOR_OPERATION_RESETBUS;
662 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
663 
664 	ret = vmbus_chan_send(dev->channel,
665 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
666 	    vstor_packet, VSTOR_PKT_SIZE,
667 	    (uint64_t)(uintptr_t)&sc->hs_reset_req);
668 
669 	if (ret != 0) {
670 		goto cleanup;
671 	}
672 
673 	sema_wait(&request->synch_sema);
674 
675 	/*
676 	 * At this point, all outstanding requests in the adapter
677 	 * should have been flushed out and return to us
678 	 */
679 
680 cleanup:
681 	sema_destroy(&request->synch_sema);
682 	return (ret);
683 }
684 #endif /* HVS_HOST_RESET */
685 
686 /**
687  * @brief Function to initiate an I/O request
688  *
689  * @param device Hyper-V device pointer
690  * @param request pointer to a request structure
691  * @returns 0 on success, non-zero error on failure
692  */
693 static int
694 hv_storvsc_io_request(struct storvsc_softc *sc,
695 					  struct hv_storvsc_request *request)
696 {
697 	struct vstor_packet *vstor_packet = &request->vstor_packet;
698 	struct vmbus_channel* outgoing_channel = NULL;
699 	int ret = 0, ch_sel;
700 
701 	vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
702 
703 	vstor_packet->u.vm_srb.length =
704 	    sizeof(struct vmscsi_req) - vmscsi_size_delta;
705 
706 	vstor_packet->u.vm_srb.sense_info_len = sense_buffer_size;
707 
708 	vstor_packet->u.vm_srb.transfer_len =
709 	    request->prp_list.gpa_range.gpa_len;
710 
711 	vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB;
712 
713 	ch_sel = (vstor_packet->u.vm_srb.lun + curcpu) % sc->hs_nchan;
714 	/*
715 	 * If we are panic'ing, then we are dumping core. Since storvsc_polls
716 	 * always uses sc->hs_chan, then we must send to that channel or a poll
717 	 * timeout will occur.
718 	 */
719 	if (KERNEL_PANICKED()) {
720 		outgoing_channel = sc->hs_chan;
721 	} else {
722 		outgoing_channel = sc->hs_sel_chan[ch_sel];
723 	}
724 
725 	mtx_unlock(&request->softc->hs_lock);
726 	if (request->prp_list.gpa_range.gpa_len) {
727 		ret = vmbus_chan_send_prplist(outgoing_channel,
728 		    &request->prp_list.gpa_range, request->prp_cnt,
729 		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
730 	} else {
731 		ret = vmbus_chan_send(outgoing_channel,
732 		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
733 		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
734 	}
735 	/* statistic for successful request sending on each channel */
736 	if (!ret) {
737 		sc->sysctl_data.chan_send_cnt[ch_sel]++;
738 	}
739 	mtx_lock(&request->softc->hs_lock);
740 
741 	if (ret != 0) {
742 		printf("Unable to send packet %p ret %d", vstor_packet, ret);
743 	} else {
744 		atomic_add_int(&sc->hs_num_out_reqs, 1);
745 	}
746 
747 	return (ret);
748 }
749 
750 
751 /**
752  * Process IO_COMPLETION_OPERATION and ready
753  * the result to be completed for upper layer
754  * processing by the CAM layer.
755  */
756 static void
757 hv_storvsc_on_iocompletion(struct storvsc_softc *sc,
758 			   struct vstor_packet *vstor_packet,
759 			   struct hv_storvsc_request *request)
760 {
761 	struct vmscsi_req *vm_srb;
762 
763 	vm_srb = &vstor_packet->u.vm_srb;
764 
765 	/*
766 	 * Copy some fields of the host's response into the request structure,
767 	 * because the fields will be used later in storvsc_io_done().
768 	 */
769 	request->vstor_packet.u.vm_srb.scsi_status = vm_srb->scsi_status;
770 	request->vstor_packet.u.vm_srb.srb_status = vm_srb->srb_status;
771 	request->vstor_packet.u.vm_srb.transfer_len = vm_srb->transfer_len;
772 
773 	if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) &&
774 			(vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) {
775 		/* Autosense data available */
776 
777 		KASSERT(vm_srb->sense_info_len <= request->sense_info_len,
778 				("vm_srb->sense_info_len <= "
779 				 "request->sense_info_len"));
780 
781 		memcpy(request->sense_data, vm_srb->u.sense_data,
782 			vm_srb->sense_info_len);
783 
784 		request->sense_info_len = vm_srb->sense_info_len;
785 	}
786 
787 	/* Complete request by passing to the CAM layer */
788 	storvsc_io_done(request);
789 	atomic_subtract_int(&sc->hs_num_out_reqs, 1);
790 	if (sc->hs_drain_notify && (sc->hs_num_out_reqs == 0)) {
791 		sema_post(&sc->hs_drain_sema);
792 	}
793 }
794 
795 static void
796 hv_storvsc_rescan_target(struct storvsc_softc *sc)
797 {
798 	path_id_t pathid;
799 	target_id_t targetid;
800 	union ccb *ccb;
801 
802 	pathid = cam_sim_path(sc->hs_sim);
803 	targetid = CAM_TARGET_WILDCARD;
804 
805 	/*
806 	 * Allocate a CCB and schedule a rescan.
807 	 */
808 	ccb = xpt_alloc_ccb_nowait();
809 	if (ccb == NULL) {
810 		printf("unable to alloc CCB for rescan\n");
811 		return;
812 	}
813 
814 	if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, targetid,
815 	    CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
816 		printf("unable to create path for rescan, pathid: %u,"
817 		    "targetid: %u\n", pathid, targetid);
818 		xpt_free_ccb(ccb);
819 		return;
820 	}
821 
822 	if (targetid == CAM_TARGET_WILDCARD)
823 		ccb->ccb_h.func_code = XPT_SCAN_BUS;
824 	else
825 		ccb->ccb_h.func_code = XPT_SCAN_TGT;
826 
827 	xpt_rescan(ccb);
828 }
829 
830 static void
831 hv_storvsc_on_channel_callback(struct vmbus_channel *channel, void *xsc)
832 {
833 	int ret = 0;
834 	struct storvsc_softc *sc = xsc;
835 	uint32_t bytes_recvd;
836 	uint64_t request_id;
837 	uint8_t packet[roundup2(sizeof(struct vstor_packet), 8)];
838 	struct hv_storvsc_request *request;
839 	struct vstor_packet *vstor_packet;
840 
841 	bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8);
842 	ret = vmbus_chan_recv(channel, packet, &bytes_recvd, &request_id);
843 	KASSERT(ret != ENOBUFS, ("storvsc recvbuf is not large enough"));
844 	/* XXX check bytes_recvd to make sure that it contains enough data */
845 
846 	while ((ret == 0) && (bytes_recvd > 0)) {
847 		request = (struct hv_storvsc_request *)(uintptr_t)request_id;
848 
849 		if ((request == &sc->hs_init_req) ||
850 			(request == &sc->hs_reset_req)) {
851 			memcpy(&request->vstor_packet, packet,
852 				   sizeof(struct vstor_packet));
853 			sema_post(&request->synch_sema);
854 		} else {
855 			vstor_packet = (struct vstor_packet *)packet;
856 			switch(vstor_packet->operation) {
857 			case VSTOR_OPERATION_COMPLETEIO:
858 				if (request == NULL)
859 					panic("VMBUS: storvsc received a "
860 					    "packet with NULL request id in "
861 					    "COMPLETEIO operation.");
862 
863 				hv_storvsc_on_iocompletion(sc,
864 							vstor_packet, request);
865 				break;
866 			case VSTOR_OPERATION_REMOVEDEVICE:
867 				printf("VMBUS: storvsc operation %d not "
868 				    "implemented.\n", vstor_packet->operation);
869 				/* TODO: implement */
870 				break;
871 			case VSTOR_OPERATION_ENUMERATE_BUS:
872 				hv_storvsc_rescan_target(sc);
873 				break;
874 			default:
875 				break;
876 			}
877 		}
878 
879 		bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8),
880 		ret = vmbus_chan_recv(channel, packet, &bytes_recvd,
881 		    &request_id);
882 		KASSERT(ret != ENOBUFS,
883 		    ("storvsc recvbuf is not large enough"));
884 		/*
885 		 * XXX check bytes_recvd to make sure that it contains
886 		 * enough data
887 		 */
888 	}
889 }
890 
891 /**
892  * @brief StorVSC probe function
893  *
894  * Device probe function.  Returns 0 if the input device is a StorVSC
895  * device.  Otherwise, a ENXIO is returned.  If the input device is
896  * for BlkVSC (paravirtual IDE) device and this support is disabled in
897  * favor of the emulated ATA/IDE device, return ENXIO.
898  *
899  * @param a device
900  * @returns 0 on success, ENXIO if not a matcing StorVSC device
901  */
902 static int
903 storvsc_probe(device_t dev)
904 {
905 	int ret	= ENXIO;
906 
907 	switch (storvsc_get_storage_type(dev)) {
908 	case DRIVER_BLKVSC:
909 		if(bootverbose)
910 			device_printf(dev,
911 			    "Enlightened ATA/IDE detected\n");
912 		device_set_desc(dev, g_drv_props_table[DRIVER_BLKVSC].drv_desc);
913 		ret = BUS_PROBE_DEFAULT;
914 		break;
915 	case DRIVER_STORVSC:
916 		if(bootverbose)
917 			device_printf(dev, "Enlightened SCSI device detected\n");
918 		device_set_desc(dev, g_drv_props_table[DRIVER_STORVSC].drv_desc);
919 		ret = BUS_PROBE_DEFAULT;
920 		break;
921 	default:
922 		ret = ENXIO;
923 	}
924 	return (ret);
925 }
926 
927 static void
928 storvsc_create_chan_sel(struct storvsc_softc *sc)
929 {
930 	struct vmbus_channel **subch;
931 	int i, nsubch;
932 
933 	sc->hs_sel_chan[0] = sc->hs_chan;
934 	nsubch = sc->hs_nchan - 1;
935 	if (nsubch == 0)
936 		return;
937 
938 	subch = vmbus_subchan_get(sc->hs_chan, nsubch);
939 	for (i = 0; i < nsubch; i++)
940 		sc->hs_sel_chan[i + 1] = subch[i];
941 	vmbus_subchan_rel(subch, nsubch);
942 }
943 
944 static int
945 storvsc_init_requests(device_t dev)
946 {
947 	struct storvsc_softc *sc = device_get_softc(dev);
948 	struct hv_storvsc_request *reqp;
949 	int error, i;
950 
951 	LIST_INIT(&sc->hs_free_list);
952 
953 	error = bus_dma_tag_create(
954 		bus_get_dma_tag(dev),		/* parent */
955 		1,				/* alignment */
956 		PAGE_SIZE,			/* boundary */
957 		BUS_SPACE_MAXADDR,		/* lowaddr */
958 		BUS_SPACE_MAXADDR,		/* highaddr */
959 		NULL, NULL,			/* filter, filterarg */
960 		STORVSC_DATA_SIZE_MAX,		/* maxsize */
961 		STORVSC_DATA_SEGCNT_MAX,	/* nsegments */
962 		STORVSC_DATA_SEGSZ_MAX,		/* maxsegsize */
963 		0,				/* flags */
964 		NULL,				/* lockfunc */
965 		NULL,				/* lockfuncarg */
966 		&sc->storvsc_req_dtag);
967 	if (error) {
968 		device_printf(dev, "failed to create storvsc dma tag\n");
969 		return (error);
970 	}
971 
972 	for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; ++i) {
973 		reqp = malloc(sizeof(struct hv_storvsc_request),
974 				 M_DEVBUF, M_WAITOK|M_ZERO);
975 		reqp->softc = sc;
976 		error = bus_dmamap_create(sc->storvsc_req_dtag, 0,
977 				&reqp->data_dmap);
978 		if (error) {
979 			device_printf(dev, "failed to allocate storvsc "
980 			    "data dmamap\n");
981 			goto cleanup;
982 		}
983 		LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
984 	}
985 	return (0);
986 
987 cleanup:
988 	while ((reqp = LIST_FIRST(&sc->hs_free_list)) != NULL) {
989 		LIST_REMOVE(reqp, link);
990 		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
991 		free(reqp, M_DEVBUF);
992 	}
993 	return (error);
994 }
995 
996 static void
997 storvsc_sysctl(device_t dev)
998 {
999 	struct sysctl_oid_list *child;
1000 	struct sysctl_ctx_list *ctx;
1001 	struct sysctl_oid *ch_tree, *chid_tree;
1002 	struct storvsc_softc *sc;
1003 	char name[16];
1004 	int i;
1005 
1006 	sc = device_get_softc(dev);
1007 	ctx = device_get_sysctl_ctx(dev);
1008 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
1009 
1010 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_bio_cnt",
1011 		CTLFLAG_RW | CTLFLAG_STATS, &sc->sysctl_data.data_bio_cnt,
1012 		"# of bio data block");
1013 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_vaddr_cnt",
1014 		CTLFLAG_RW | CTLFLAG_STATS, &sc->sysctl_data.data_vaddr_cnt,
1015 		"# of vaddr data block");
1016 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_sg_cnt",
1017 		CTLFLAG_RW | CTLFLAG_STATS, &sc->sysctl_data.data_sg_cnt,
1018 		"# of sg data block");
1019 
1020 	/* dev.storvsc.UNIT.channel */
1021 	ch_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "channel",
1022 		CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1023 	if (ch_tree == NULL)
1024 		return;
1025 
1026 	for (i = 0; i < sc->hs_nchan; i++) {
1027 		uint32_t ch_id;
1028 
1029 		ch_id = vmbus_chan_id(sc->hs_sel_chan[i]);
1030 		snprintf(name, sizeof(name), "%d", ch_id);
1031 		/* dev.storvsc.UNIT.channel.CHID */
1032 		chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree),
1033 			OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1034 		if (chid_tree == NULL)
1035 			return;
1036 		/* dev.storvsc.UNIT.channel.CHID.send_req */
1037 		SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
1038 			"send_req", CTLFLAG_RD, &sc->sysctl_data.chan_send_cnt[i],
1039 			"# of request sending from this channel");
1040 	}
1041 }
1042 
1043 /**
1044  * @brief StorVSC attach function
1045  *
1046  * Function responsible for allocating per-device structures,
1047  * setting up CAM interfaces and scanning for available LUNs to
1048  * be used for SCSI device peripherals.
1049  *
1050  * @param a device
1051  * @returns 0 on success or an error on failure
1052  */
1053 static int
1054 storvsc_attach(device_t dev)
1055 {
1056 	enum hv_storage_type stor_type;
1057 	struct storvsc_softc *sc;
1058 	struct cam_devq *devq;
1059 	int ret, i, j;
1060 	struct hv_storvsc_request *reqp;
1061 	struct root_hold_token *root_mount_token = NULL;
1062 	struct hv_sgl_node *sgl_node = NULL;
1063 	void *tmp_buff = NULL;
1064 
1065 	/*
1066 	 * We need to serialize storvsc attach calls.
1067 	 */
1068 	root_mount_token = root_mount_hold("storvsc");
1069 
1070 	sc = device_get_softc(dev);
1071 	sc->hs_nchan = 1;
1072 	sc->hs_chan = vmbus_get_channel(dev);
1073 
1074 	stor_type = storvsc_get_storage_type(dev);
1075 
1076 	if (stor_type == DRIVER_UNKNOWN) {
1077 		ret = ENODEV;
1078 		goto cleanup;
1079 	}
1080 
1081 	/* fill in driver specific properties */
1082 	sc->hs_drv_props = &g_drv_props_table[stor_type];
1083 	sc->hs_drv_props->drv_ringbuffer_size = hv_storvsc_ringbuffer_size;
1084 	sc->hs_drv_props->drv_max_ios_per_target =
1085 		MIN(STORVSC_MAX_IO, hv_storvsc_max_io);
1086 	if (bootverbose) {
1087 		printf("storvsc ringbuffer size: %d, max_io: %d\n",
1088 			sc->hs_drv_props->drv_ringbuffer_size,
1089 			sc->hs_drv_props->drv_max_ios_per_target);
1090 	}
1091 	/* fill in device specific properties */
1092 	sc->hs_unit	= device_get_unit(dev);
1093 	sc->hs_dev	= dev;
1094 
1095 	mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF);
1096 
1097 	ret = storvsc_init_requests(dev);
1098 	if (ret != 0)
1099 		goto cleanup;
1100 
1101 	/* create sg-list page pool */
1102 	if (FALSE == g_hv_sgl_page_pool.is_init) {
1103 		g_hv_sgl_page_pool.is_init = TRUE;
1104 		LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list);
1105 		LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list);
1106 
1107 		/*
1108 		 * Pre-create SG list, each SG list with
1109 		 * STORVSC_DATA_SEGCNT_MAX segments, each
1110 		 * segment has one page buffer
1111 		 */
1112 		for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; i++) {
1113 	        	sgl_node = malloc(sizeof(struct hv_sgl_node),
1114 			    M_DEVBUF, M_WAITOK|M_ZERO);
1115 
1116 			sgl_node->sgl_data = malloc(sizeof(struct hv_sglist),
1117 			    M_DEVBUF, M_WAITOK|M_ZERO);
1118 
1119 			for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) {
1120 				tmp_buff = malloc(PAGE_SIZE,
1121 				    M_DEVBUF, M_WAITOK|M_ZERO);
1122 
1123 				sgl_node->sgl_data->sg_iov[j].iov_base =
1124 				    tmp_buff;
1125 			}
1126 
1127 			LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list,
1128 			    sgl_node, link);
1129 		}
1130 	}
1131 
1132 	sc->hs_destroy = FALSE;
1133 	sc->hs_drain_notify = FALSE;
1134 	sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema");
1135 
1136 	ret = hv_storvsc_connect_vsp(sc);
1137 	if (ret != 0) {
1138 		goto cleanup;
1139 	}
1140 
1141 	/* Construct cpu to channel mapping */
1142 	storvsc_create_chan_sel(sc);
1143 
1144 	/*
1145 	 * Create the device queue.
1146 	 * Hyper-V maps each target to one SCSI HBA
1147 	 */
1148 	devq = cam_simq_alloc(sc->hs_drv_props->drv_max_ios_per_target);
1149 	if (devq == NULL) {
1150 		device_printf(dev, "Failed to alloc device queue\n");
1151 		ret = ENOMEM;
1152 		goto cleanup;
1153 	}
1154 
1155 	sc->hs_sim = cam_sim_alloc(storvsc_action,
1156 				storvsc_poll,
1157 				sc->hs_drv_props->drv_name,
1158 				sc,
1159 				sc->hs_unit,
1160 				&sc->hs_lock, 1,
1161 				sc->hs_drv_props->drv_max_ios_per_target,
1162 				devq);
1163 
1164 	if (sc->hs_sim == NULL) {
1165 		device_printf(dev, "Failed to alloc sim\n");
1166 		cam_simq_free(devq);
1167 		ret = ENOMEM;
1168 		goto cleanup;
1169 	}
1170 
1171 	mtx_lock(&sc->hs_lock);
1172 	/* bus_id is set to 0, need to get it from VMBUS channel query? */
1173 	if (xpt_bus_register(sc->hs_sim, dev, 0) != CAM_SUCCESS) {
1174 		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
1175 		mtx_unlock(&sc->hs_lock);
1176 		device_printf(dev, "Unable to register SCSI bus\n");
1177 		ret = ENXIO;
1178 		goto cleanup;
1179 	}
1180 
1181 	if (xpt_create_path(&sc->hs_path, /*periph*/NULL,
1182 		 cam_sim_path(sc->hs_sim),
1183 		CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
1184 		xpt_bus_deregister(cam_sim_path(sc->hs_sim));
1185 		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
1186 		mtx_unlock(&sc->hs_lock);
1187 		device_printf(dev, "Unable to create path\n");
1188 		ret = ENXIO;
1189 		goto cleanup;
1190 	}
1191 
1192 	mtx_unlock(&sc->hs_lock);
1193 
1194 	storvsc_sysctl(dev);
1195 
1196 	root_mount_rel(root_mount_token);
1197 	return (0);
1198 
1199 
1200 cleanup:
1201 	root_mount_rel(root_mount_token);
1202 	while (!LIST_EMPTY(&sc->hs_free_list)) {
1203 		reqp = LIST_FIRST(&sc->hs_free_list);
1204 		LIST_REMOVE(reqp, link);
1205 		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
1206 		free(reqp, M_DEVBUF);
1207 	}
1208 
1209 	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1210 		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1211 		LIST_REMOVE(sgl_node, link);
1212 		for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) {
1213 			free(sgl_node->sgl_data->sg_iov[j].iov_base, M_DEVBUF);
1214 		}
1215 		free(sgl_node->sgl_data, M_DEVBUF);
1216 		free(sgl_node, M_DEVBUF);
1217 	}
1218 
1219 	return (ret);
1220 }
1221 
1222 /**
1223  * @brief StorVSC device detach function
1224  *
1225  * This function is responsible for safely detaching a
1226  * StorVSC device.  This includes waiting for inbound responses
1227  * to complete and freeing associated per-device structures.
1228  *
1229  * @param dev a device
1230  * returns 0 on success
1231  */
1232 static int
1233 storvsc_detach(device_t dev)
1234 {
1235 	struct storvsc_softc *sc = device_get_softc(dev);
1236 	struct hv_storvsc_request *reqp = NULL;
1237 	struct hv_sgl_node *sgl_node = NULL;
1238 	int j = 0;
1239 
1240 	sc->hs_destroy = TRUE;
1241 
1242 	/*
1243 	 * At this point, all outbound traffic should be disabled. We
1244 	 * only allow inbound traffic (responses) to proceed so that
1245 	 * outstanding requests can be completed.
1246 	 */
1247 
1248 	sc->hs_drain_notify = TRUE;
1249 	sema_wait(&sc->hs_drain_sema);
1250 	sc->hs_drain_notify = FALSE;
1251 
1252 	/*
1253 	 * Since we have already drained, we don't need to busy wait.
1254 	 * The call to close the channel will reset the callback
1255 	 * under the protection of the incoming channel lock.
1256 	 */
1257 
1258 	vmbus_chan_close(sc->hs_chan);
1259 
1260 	mtx_lock(&sc->hs_lock);
1261 	while (!LIST_EMPTY(&sc->hs_free_list)) {
1262 		reqp = LIST_FIRST(&sc->hs_free_list);
1263 		LIST_REMOVE(reqp, link);
1264 		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
1265 		free(reqp, M_DEVBUF);
1266 	}
1267 	mtx_unlock(&sc->hs_lock);
1268 
1269 	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1270 		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1271 		LIST_REMOVE(sgl_node, link);
1272 		for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++){
1273 			free(sgl_node->sgl_data->sg_iov[j].iov_base, M_DEVBUF);
1274 		}
1275 		free(sgl_node->sgl_data, M_DEVBUF);
1276 		free(sgl_node, M_DEVBUF);
1277 	}
1278 
1279 	return (0);
1280 }
1281 
1282 #if HVS_TIMEOUT_TEST
1283 /**
1284  * @brief unit test for timed out operations
1285  *
1286  * This function provides unit testing capability to simulate
1287  * timed out operations.  Recompilation with HV_TIMEOUT_TEST=1
1288  * is required.
1289  *
1290  * @param reqp pointer to a request structure
1291  * @param opcode SCSI operation being performed
1292  * @param wait if 1, wait for I/O to complete
1293  */
1294 static void
1295 storvsc_timeout_test(struct hv_storvsc_request *reqp,
1296 		uint8_t opcode, int wait)
1297 {
1298 	int ret;
1299 	union ccb *ccb = reqp->ccb;
1300 	struct storvsc_softc *sc = reqp->softc;
1301 
1302 	if (reqp->vstor_packet.vm_srb.cdb[0] != opcode) {
1303 		return;
1304 	}
1305 
1306 	if (wait) {
1307 		mtx_lock(&reqp->event.mtx);
1308 	}
1309 	ret = hv_storvsc_io_request(sc, reqp);
1310 	if (ret != 0) {
1311 		if (wait) {
1312 			mtx_unlock(&reqp->event.mtx);
1313 		}
1314 		printf("%s: io_request failed with %d.\n",
1315 				__func__, ret);
1316 		ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1317 		mtx_lock(&sc->hs_lock);
1318 		storvsc_free_request(sc, reqp);
1319 		xpt_done(ccb);
1320 		mtx_unlock(&sc->hs_lock);
1321 		return;
1322 	}
1323 
1324 	if (wait) {
1325 		xpt_print(ccb->ccb_h.path,
1326 				"%u: %s: waiting for IO return.\n",
1327 				ticks, __func__);
1328 		ret = cv_timedwait(&reqp->event.cv, &reqp->event.mtx, 60*hz);
1329 		mtx_unlock(&reqp->event.mtx);
1330 		xpt_print(ccb->ccb_h.path, "%u: %s: %s.\n",
1331 				ticks, __func__, (ret == 0)?
1332 				"IO return detected" :
1333 				"IO return not detected");
1334 		/*
1335 		 * Now both the timer handler and io done are running
1336 		 * simultaneously. We want to confirm the io done always
1337 		 * finishes after the timer handler exits. So reqp used by
1338 		 * timer handler is not freed or stale. Do busy loop for
1339 		 * another 1/10 second to make sure io done does
1340 		 * wait for the timer handler to complete.
1341 		 */
1342 		DELAY(100*1000);
1343 		mtx_lock(&sc->hs_lock);
1344 		xpt_print(ccb->ccb_h.path,
1345 				"%u: %s: finishing, queue frozen %d, "
1346 				"ccb status 0x%x scsi_status 0x%x.\n",
1347 				ticks, __func__, sc->hs_frozen,
1348 				ccb->ccb_h.status,
1349 				ccb->csio.scsi_status);
1350 		mtx_unlock(&sc->hs_lock);
1351 	}
1352 }
1353 #endif /* HVS_TIMEOUT_TEST */
1354 
1355 #ifdef notyet
1356 /**
1357  * @brief timeout handler for requests
1358  *
1359  * This function is called as a result of a callout expiring.
1360  *
1361  * @param arg pointer to a request
1362  */
1363 static void
1364 storvsc_timeout(void *arg)
1365 {
1366 	struct hv_storvsc_request *reqp = arg;
1367 	struct storvsc_softc *sc = reqp->softc;
1368 	union ccb *ccb = reqp->ccb;
1369 
1370 	if (reqp->retries == 0) {
1371 		mtx_lock(&sc->hs_lock);
1372 		xpt_print(ccb->ccb_h.path,
1373 		    "%u: IO timed out (req=0x%p), wait for another %u secs.\n",
1374 		    ticks, reqp, ccb->ccb_h.timeout / 1000);
1375 		cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL);
1376 		mtx_unlock(&sc->hs_lock);
1377 
1378 		reqp->retries++;
1379 		callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout,
1380 		    0, storvsc_timeout, reqp, 0);
1381 #if HVS_TIMEOUT_TEST
1382 		storvsc_timeout_test(reqp, SEND_DIAGNOSTIC, 0);
1383 #endif
1384 		return;
1385 	}
1386 
1387 	mtx_lock(&sc->hs_lock);
1388 	xpt_print(ccb->ccb_h.path,
1389 		"%u: IO (reqp = 0x%p) did not return for %u seconds, %s.\n",
1390 		ticks, reqp, ccb->ccb_h.timeout * (reqp->retries+1) / 1000,
1391 		(sc->hs_frozen == 0)?
1392 		"freezing the queue" : "the queue is already frozen");
1393 	if (sc->hs_frozen == 0) {
1394 		sc->hs_frozen = 1;
1395 		xpt_freeze_simq(xpt_path_sim(ccb->ccb_h.path), 1);
1396 	}
1397 	mtx_unlock(&sc->hs_lock);
1398 
1399 #if HVS_TIMEOUT_TEST
1400 	storvsc_timeout_test(reqp, MODE_SELECT_10, 1);
1401 #endif
1402 }
1403 #endif
1404 
1405 /**
1406  * @brief StorVSC device poll function
1407  *
1408  * This function is responsible for servicing requests when
1409  * interrupts are disabled (i.e when we are dumping core.)
1410  *
1411  * @param sim a pointer to a CAM SCSI interface module
1412  */
1413 static void
1414 storvsc_poll(struct cam_sim *sim)
1415 {
1416 	struct storvsc_softc *sc = cam_sim_softc(sim);
1417 
1418 	mtx_assert(&sc->hs_lock, MA_OWNED);
1419 	mtx_unlock(&sc->hs_lock);
1420 	hv_storvsc_on_channel_callback(sc->hs_chan, sc);
1421 	mtx_lock(&sc->hs_lock);
1422 }
1423 
1424 /**
1425  * @brief StorVSC device action function
1426  *
1427  * This function is responsible for handling SCSI operations which
1428  * are passed from the CAM layer.  The requests are in the form of
1429  * CAM control blocks which indicate the action being performed.
1430  * Not all actions require converting the request to a VSCSI protocol
1431  * message - these actions can be responded to by this driver.
1432  * Requests which are destined for a backend storage device are converted
1433  * to a VSCSI protocol message and sent on the channel connection associated
1434  * with this device.
1435  *
1436  * @param sim pointer to a CAM SCSI interface module
1437  * @param ccb pointer to a CAM control block
1438  */
1439 static void
1440 storvsc_action(struct cam_sim *sim, union ccb *ccb)
1441 {
1442 	struct storvsc_softc *sc = cam_sim_softc(sim);
1443 	int res;
1444 
1445 	mtx_assert(&sc->hs_lock, MA_OWNED);
1446 	switch (ccb->ccb_h.func_code) {
1447 	case XPT_PATH_INQ: {
1448 		struct ccb_pathinq *cpi = &ccb->cpi;
1449 
1450 		cpi->version_num = 1;
1451 		cpi->hba_inquiry = PI_TAG_ABLE|PI_SDTR_ABLE;
1452 		cpi->target_sprt = 0;
1453 		cpi->hba_misc = PIM_NOBUSRESET;
1454 		if (hv_storvsc_use_pim_unmapped)
1455 			cpi->hba_misc |= PIM_UNMAPPED;
1456 		cpi->maxio = STORVSC_DATA_SIZE_MAX;
1457 		cpi->hba_eng_cnt = 0;
1458 		cpi->max_target = STORVSC_MAX_TARGETS;
1459 		cpi->max_lun = sc->hs_drv_props->drv_max_luns_per_target;
1460 		cpi->initiator_id = cpi->max_target;
1461 		cpi->bus_id = cam_sim_bus(sim);
1462 		cpi->base_transfer_speed = 300000;
1463 		cpi->transport = XPORT_SAS;
1464 		cpi->transport_version = 0;
1465 		cpi->protocol = PROTO_SCSI;
1466 		cpi->protocol_version = SCSI_REV_SPC2;
1467 		strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
1468 		strlcpy(cpi->hba_vid, sc->hs_drv_props->drv_name, HBA_IDLEN);
1469 		strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
1470 		cpi->unit_number = cam_sim_unit(sim);
1471 
1472 		ccb->ccb_h.status = CAM_REQ_CMP;
1473 		xpt_done(ccb);
1474 		return;
1475 	}
1476 	case XPT_GET_TRAN_SETTINGS: {
1477 		struct  ccb_trans_settings *cts = &ccb->cts;
1478 
1479 		cts->transport = XPORT_SAS;
1480 		cts->transport_version = 0;
1481 		cts->protocol = PROTO_SCSI;
1482 		cts->protocol_version = SCSI_REV_SPC2;
1483 
1484 		/* enable tag queuing and disconnected mode */
1485 		cts->proto_specific.valid = CTS_SCSI_VALID_TQ;
1486 		cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
1487 		cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
1488 		cts->xport_specific.valid = CTS_SPI_VALID_DISC;
1489 		cts->xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB;
1490 
1491 		ccb->ccb_h.status = CAM_REQ_CMP;
1492 		xpt_done(ccb);
1493 		return;
1494 	}
1495 	case XPT_SET_TRAN_SETTINGS:	{
1496 		ccb->ccb_h.status = CAM_REQ_CMP;
1497 		xpt_done(ccb);
1498 		return;
1499 	}
1500 	case XPT_CALC_GEOMETRY:{
1501 		cam_calc_geometry(&ccb->ccg, 1);
1502 		xpt_done(ccb);
1503 		return;
1504 	}
1505 	case  XPT_RESET_BUS:
1506 	case  XPT_RESET_DEV:{
1507 #if HVS_HOST_RESET
1508 		if ((res = hv_storvsc_host_reset(sc)) != 0) {
1509 			xpt_print(ccb->ccb_h.path,
1510 				"hv_storvsc_host_reset failed with %d\n", res);
1511 			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1512 			xpt_done(ccb);
1513 			return;
1514 		}
1515 		ccb->ccb_h.status = CAM_REQ_CMP;
1516 		xpt_done(ccb);
1517 		return;
1518 #else
1519 		xpt_print(ccb->ccb_h.path,
1520 				  "%s reset not supported.\n",
1521 				  (ccb->ccb_h.func_code == XPT_RESET_BUS)?
1522 				  "bus" : "dev");
1523 		ccb->ccb_h.status = CAM_REQ_INVALID;
1524 		xpt_done(ccb);
1525 		return;
1526 #endif	/* HVS_HOST_RESET */
1527 	}
1528 	case XPT_SCSI_IO:
1529 	case XPT_IMMED_NOTIFY: {
1530 		struct hv_storvsc_request *reqp = NULL;
1531 		bus_dmamap_t dmap_saved;
1532 
1533 		if (ccb->csio.cdb_len == 0) {
1534 			panic("cdl_len is 0\n");
1535 		}
1536 
1537 		if (LIST_EMPTY(&sc->hs_free_list)) {
1538 			ccb->ccb_h.status = CAM_REQUEUE_REQ;
1539 			if (sc->hs_frozen == 0) {
1540 				sc->hs_frozen = 1;
1541 				xpt_freeze_simq(sim, /* count*/1);
1542 			}
1543 			xpt_done(ccb);
1544 			return;
1545 		}
1546 
1547 		reqp = LIST_FIRST(&sc->hs_free_list);
1548 		LIST_REMOVE(reqp, link);
1549 
1550 		/* Save the data_dmap before reset request */
1551 		dmap_saved = reqp->data_dmap;
1552 
1553 		/* XXX this is ugly */
1554 		bzero(reqp, sizeof(struct hv_storvsc_request));
1555 
1556 		/* Restore necessary bits */
1557 		reqp->data_dmap = dmap_saved;
1558 		reqp->softc = sc;
1559 
1560 		ccb->ccb_h.status |= CAM_SIM_QUEUED;
1561 		if ((res = create_storvsc_request(ccb, reqp)) != 0) {
1562 			ccb->ccb_h.status = CAM_REQ_INVALID;
1563 			xpt_done(ccb);
1564 			return;
1565 		}
1566 
1567 #ifdef notyet
1568 		if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
1569 			callout_init(&reqp->callout, 1);
1570 			callout_reset_sbt(&reqp->callout,
1571 			    SBT_1MS * ccb->ccb_h.timeout, 0,
1572 			    storvsc_timeout, reqp, 0);
1573 #if HVS_TIMEOUT_TEST
1574 			cv_init(&reqp->event.cv, "storvsc timeout cv");
1575 			mtx_init(&reqp->event.mtx, "storvsc timeout mutex",
1576 					NULL, MTX_DEF);
1577 			switch (reqp->vstor_packet.vm_srb.cdb[0]) {
1578 				case MODE_SELECT_10:
1579 				case SEND_DIAGNOSTIC:
1580 					/* To have timer send the request. */
1581 					return;
1582 				default:
1583 					break;
1584 			}
1585 #endif /* HVS_TIMEOUT_TEST */
1586 		}
1587 #endif
1588 
1589 		if ((res = hv_storvsc_io_request(sc, reqp)) != 0) {
1590 			xpt_print(ccb->ccb_h.path,
1591 				"hv_storvsc_io_request failed with %d\n", res);
1592 			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1593 			storvsc_free_request(sc, reqp);
1594 			xpt_done(ccb);
1595 			return;
1596 		}
1597 		return;
1598 	}
1599 
1600 	default:
1601 		ccb->ccb_h.status = CAM_REQ_INVALID;
1602 		xpt_done(ccb);
1603 		return;
1604 	}
1605 }
1606 
1607 /**
1608  * @brief destroy bounce buffer
1609  *
1610  * This function is responsible for destroy a Scatter/Gather list
1611  * that create by storvsc_create_bounce_buffer()
1612  *
1613  * @param sgl- the Scatter/Gather need be destroy
1614  * @param sg_count- page count of the SG list.
1615  *
1616  */
1617 static void
1618 storvsc_destroy_bounce_buffer(struct hv_sglist *sgl)
1619 {
1620 	struct hv_sgl_node *sgl_node = NULL;
1621 	if (LIST_EMPTY(&g_hv_sgl_page_pool.in_use_sgl_list)) {
1622 		printf("storvsc error: not enough in use sgl\n");
1623 		return;
1624 	}
1625 	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list);
1626 	LIST_REMOVE(sgl_node, link);
1627 	sgl_node->sgl_data = sgl;
1628 	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link);
1629 }
1630 
1631 /**
1632  * @brief create bounce buffer
1633  *
1634  * This function is responsible for create a Scatter/Gather list,
1635  * which hold several pages that can be aligned with page size.
1636  *
1637  * @param seg_count- SG-list segments count
1638  * @param write - if WRITE_TYPE, set SG list page used size to 0,
1639  * otherwise set used size to page size.
1640  *
1641  * return NULL if create failed
1642  */
1643 static struct hv_sglist *
1644 storvsc_create_bounce_buffer(uint16_t seg_count, int write)
1645 {
1646 	int i = 0;
1647 	struct hv_sglist *bounce_sgl = NULL;
1648 	unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE);
1649 	struct hv_sgl_node *sgl_node = NULL;
1650 
1651 	/* get struct hv_sglist from free_sgl_list */
1652 	if (LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1653 		printf("storvsc error: not enough free sgl\n");
1654 		return NULL;
1655 	}
1656 	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1657 	LIST_REMOVE(sgl_node, link);
1658 	bounce_sgl = sgl_node->sgl_data;
1659 	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link);
1660 
1661 	bounce_sgl->sg_maxseg = seg_count;
1662 
1663 	if (write == WRITE_TYPE)
1664 		bounce_sgl->sg_nseg = 0;
1665 	else
1666 		bounce_sgl->sg_nseg = seg_count;
1667 
1668 	for (i = 0; i < seg_count; i++)
1669 	        bounce_sgl->sg_iov[i].iov_len = buf_len;
1670 
1671 	return bounce_sgl;
1672 }
1673 
1674 /**
1675  * @brief copy data from SG list to bounce buffer
1676  *
1677  * This function is responsible for copy data from one SG list's segments
1678  * to another SG list which used as bounce buffer.
1679  *
1680  * @param bounce_sgl - the destination SG list
1681  * @param orig_sgl - the segment of the source SG list.
1682  * @param orig_sgl_count - the count of segments.
1683  * @param orig_sgl_count - indicate which segment need bounce buffer,
1684  *  set 1 means need.
1685  *
1686  */
1687 static void
1688 storvsc_copy_sgl_to_bounce_buf(struct hv_sglist *bounce_sgl,
1689 			       bus_dma_segment_t *orig_sgl,
1690 			       unsigned int orig_sgl_count,
1691 			       uint64_t seg_bits)
1692 {
1693 	int src_sgl_idx = 0;
1694 
1695 	for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) {
1696 		if (seg_bits & (1 << src_sgl_idx)) {
1697 			memcpy(bounce_sgl->sg_iov[src_sgl_idx].iov_base,
1698 			    (void*)orig_sgl[src_sgl_idx].ds_addr,
1699 			    orig_sgl[src_sgl_idx].ds_len);
1700 
1701 			bounce_sgl->sg_iov[src_sgl_idx].iov_len =
1702 			    orig_sgl[src_sgl_idx].ds_len;
1703 		}
1704 	}
1705 }
1706 
1707 /**
1708  * @brief copy data from SG list which used as bounce to another SG list
1709  *
1710  * This function is responsible for copy data from one SG list with bounce
1711  * buffer to another SG list's segments.
1712  *
1713  * @param dest_sgl - the destination SG list's segments
1714  * @param dest_sgl_count - the count of destination SG list's segment.
1715  * @param src_sgl - the source SG list.
1716  * @param seg_bits - indicate which segment used bounce buffer of src SG-list.
1717  *
1718  */
1719 void
1720 storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
1721 				    unsigned int dest_sgl_count,
1722 				    struct hv_sglist* src_sgl,
1723 				    uint64_t seg_bits)
1724 {
1725 	int sgl_idx = 0;
1726 
1727 	for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) {
1728 		if (seg_bits & (1 << sgl_idx)) {
1729 			memcpy((void*)(dest_sgl[sgl_idx].ds_addr),
1730 			    src_sgl->sg_iov[sgl_idx].iov_base,
1731 			    src_sgl->sg_iov[sgl_idx].iov_len);
1732 		}
1733 	}
1734 }
1735 
1736 /**
1737  * @brief check SG list with bounce buffer or not
1738  *
1739  * This function is responsible for check if need bounce buffer for SG list.
1740  *
1741  * @param sgl - the SG list's segments
1742  * @param sg_count - the count of SG list's segment.
1743  * @param bits - segmengs number that need bounce buffer
1744  *
1745  * return -1 if SG list needless bounce buffer
1746  */
1747 static int
1748 storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl,
1749 				unsigned int sg_count,
1750 				uint64_t *bits)
1751 {
1752 	int i = 0;
1753 	int offset = 0;
1754 	uint64_t phys_addr = 0;
1755 	uint64_t tmp_bits = 0;
1756 	boolean_t found_hole = FALSE;
1757 	boolean_t pre_aligned = TRUE;
1758 
1759 	if (sg_count < 2){
1760 		return -1;
1761 	}
1762 
1763 	*bits = 0;
1764 
1765 	phys_addr = vtophys(sgl[0].ds_addr);
1766 	offset =  phys_addr - trunc_page(phys_addr);
1767 
1768 	if (offset != 0) {
1769 		pre_aligned = FALSE;
1770 		tmp_bits |= 1;
1771 	}
1772 
1773 	for (i = 1; i < sg_count; i++) {
1774 		phys_addr = vtophys(sgl[i].ds_addr);
1775 		offset =  phys_addr - trunc_page(phys_addr);
1776 
1777 		if (offset == 0) {
1778 			if (FALSE == pre_aligned){
1779 				/*
1780 				 * This segment is aligned, if the previous
1781 				 * one is not aligned, find a hole
1782 				 */
1783 				found_hole = TRUE;
1784 			}
1785 			pre_aligned = TRUE;
1786 		} else {
1787 			tmp_bits |= 1ULL << i;
1788 			if (!pre_aligned) {
1789 				if (phys_addr != vtophys(sgl[i-1].ds_addr +
1790 				    sgl[i-1].ds_len)) {
1791 					/*
1792 					 * Check whether connect to previous
1793 					 * segment,if not, find the hole
1794 					 */
1795 					found_hole = TRUE;
1796 				}
1797 			} else {
1798 				found_hole = TRUE;
1799 			}
1800 			pre_aligned = FALSE;
1801 		}
1802 	}
1803 
1804 	if (!found_hole) {
1805 		return (-1);
1806 	} else {
1807 		*bits = tmp_bits;
1808 		return 0;
1809 	}
1810 }
1811 
1812 /**
1813  * Copy bus_dma segments to multiple page buffer, which requires
1814  * the pages are compact composed except for the 1st and last pages.
1815  */
1816 static void
1817 storvsc_xferbuf_prepare(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
1818 {
1819 	struct hv_storvsc_request *reqp = arg;
1820 	union ccb *ccb = reqp->ccb;
1821 	struct ccb_scsiio *csio = &ccb->csio;
1822 	struct storvsc_gpa_range *prplist;
1823 	int i;
1824 
1825 	prplist = &reqp->prp_list;
1826 	prplist->gpa_range.gpa_len = csio->dxfer_len;
1827 	prplist->gpa_range.gpa_ofs = segs[0].ds_addr & PAGE_MASK;
1828 
1829 	for (i = 0; i < nsegs; i++) {
1830 #ifdef INVARIANTS
1831 #if !defined(__aarch64__)
1832 		if (nsegs > 1) {
1833 			if (i == 0) {
1834 				KASSERT((segs[i].ds_addr & PAGE_MASK) +
1835 				    segs[i].ds_len == PAGE_SIZE,
1836 				    ("invalid 1st page, ofs 0x%jx, len %zu",
1837 				     (uintmax_t)segs[i].ds_addr,
1838 				     segs[i].ds_len));
1839 			} else if (i == nsegs - 1) {
1840 				KASSERT((segs[i].ds_addr & PAGE_MASK) == 0,
1841 				    ("invalid last page, ofs 0x%jx",
1842 				     (uintmax_t)segs[i].ds_addr));
1843 			} else {
1844 				KASSERT((segs[i].ds_addr & PAGE_MASK) == 0 &&
1845 				    segs[i].ds_len == PAGE_SIZE,
1846 				    ("not a full page, ofs 0x%jx, len %zu",
1847 				     (uintmax_t)segs[i].ds_addr,
1848 				     segs[i].ds_len));
1849 			}
1850 		}
1851 #endif
1852 #endif
1853 		prplist->gpa_page[i] = atop(segs[i].ds_addr);
1854 	}
1855 	reqp->prp_cnt = nsegs;
1856 
1857 	if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) {
1858 		bus_dmasync_op_t op;
1859 
1860 		if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN)
1861 			op = BUS_DMASYNC_PREREAD;
1862 		else
1863 			op = BUS_DMASYNC_PREWRITE;
1864 
1865 		bus_dmamap_sync(reqp->softc->storvsc_req_dtag,
1866 		    reqp->data_dmap, op);
1867 	}
1868 }
1869 
1870 /**
1871  * @brief Fill in a request structure based on a CAM control block
1872  *
1873  * Fills in a request structure based on the contents of a CAM control
1874  * block.  The request structure holds the payload information for
1875  * VSCSI protocol request.
1876  *
1877  * @param ccb pointer to a CAM contorl block
1878  * @param reqp pointer to a request structure
1879  */
1880 static int
1881 create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
1882 {
1883 	struct ccb_scsiio *csio = &ccb->csio;
1884 	uint64_t phys_addr;
1885 	uint32_t pfn;
1886 	uint64_t not_aligned_seg_bits = 0;
1887 	int error;
1888 
1889 	/* refer to struct vmscsi_req for meanings of these two fields */
1890 	reqp->vstor_packet.u.vm_srb.port =
1891 		cam_sim_unit(xpt_path_sim(ccb->ccb_h.path));
1892 	reqp->vstor_packet.u.vm_srb.path_id =
1893 		cam_sim_bus(xpt_path_sim(ccb->ccb_h.path));
1894 
1895 	reqp->vstor_packet.u.vm_srb.target_id = ccb->ccb_h.target_id;
1896 	reqp->vstor_packet.u.vm_srb.lun = ccb->ccb_h.target_lun;
1897 
1898 	reqp->vstor_packet.u.vm_srb.cdb_len = csio->cdb_len;
1899 	if(ccb->ccb_h.flags & CAM_CDB_POINTER) {
1900 		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_ptr,
1901 			csio->cdb_len);
1902 	} else {
1903 		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_bytes,
1904 			csio->cdb_len);
1905 	}
1906 
1907 	if (hv_storvsc_use_win8ext_flags) {
1908 		reqp->vstor_packet.u.vm_srb.win8_extension.time_out_value = 60;
1909 		reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1910 			SRB_FLAGS_DISABLE_SYNCH_TRANSFER;
1911 	}
1912 	switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
1913 	case CAM_DIR_OUT:
1914 		reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;
1915 		if (hv_storvsc_use_win8ext_flags) {
1916 			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1917 				SRB_FLAGS_DATA_OUT;
1918 		}
1919 		break;
1920 	case CAM_DIR_IN:
1921 		reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
1922 		if (hv_storvsc_use_win8ext_flags) {
1923 			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1924 				SRB_FLAGS_DATA_IN;
1925 		}
1926 		break;
1927 	case CAM_DIR_NONE:
1928 		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
1929 		if (hv_storvsc_use_win8ext_flags) {
1930 			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1931 				SRB_FLAGS_NO_DATA_TRANSFER;
1932 		}
1933 		break;
1934 	default:
1935 		printf("Error: unexpected data direction: 0x%x\n",
1936 			ccb->ccb_h.flags & CAM_DIR_MASK);
1937 		return (EINVAL);
1938 	}
1939 
1940 	reqp->sense_data     = &csio->sense_data;
1941 	reqp->sense_info_len = csio->sense_len;
1942 
1943 	reqp->ccb = ccb;
1944 	ccb->ccb_h.spriv_ptr0 = reqp;
1945 
1946 	if (0 == csio->dxfer_len) {
1947 		return (0);
1948 	}
1949 
1950 	switch (ccb->ccb_h.flags & CAM_DATA_MASK) {
1951 	case CAM_DATA_BIO:
1952 	case CAM_DATA_VADDR:
1953 		error = bus_dmamap_load_ccb(reqp->softc->storvsc_req_dtag,
1954 		    reqp->data_dmap, ccb, storvsc_xferbuf_prepare, reqp,
1955 		    BUS_DMA_NOWAIT);
1956 		if (error) {
1957 			xpt_print(ccb->ccb_h.path,
1958 			    "bus_dmamap_load_ccb failed: %d\n", error);
1959 			return (error);
1960 		}
1961 		if ((ccb->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_BIO)
1962 			reqp->softc->sysctl_data.data_bio_cnt++;
1963 		else
1964 			reqp->softc->sysctl_data.data_vaddr_cnt++;
1965 		break;
1966 
1967 	case CAM_DATA_SG:
1968 	{
1969 		struct storvsc_gpa_range *prplist;
1970 		int i = 0;
1971 		int offset = 0;
1972 		int ret;
1973 
1974 		bus_dma_segment_t *storvsc_sglist =
1975 		    (bus_dma_segment_t *)ccb->csio.data_ptr;
1976 		u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt;
1977 
1978 		prplist = &reqp->prp_list;
1979 		prplist->gpa_range.gpa_len = csio->dxfer_len;
1980 
1981 		printf("Storvsc: get SG I/O operation, %d\n",
1982 		    reqp->vstor_packet.u.vm_srb.data_in);
1983 
1984 		if (storvsc_sg_count > STORVSC_DATA_SEGCNT_MAX){
1985 			printf("Storvsc: %d segments is too much, "
1986 			    "only support %d segments\n",
1987 			    storvsc_sg_count, STORVSC_DATA_SEGCNT_MAX);
1988 			return (EINVAL);
1989 		}
1990 
1991 		/*
1992 		 * We create our own bounce buffer function currently. Idealy
1993 		 * we should use BUS_DMA(9) framework. But with current BUS_DMA
1994 		 * code there is no callback API to check the page alignment of
1995 		 * middle segments before busdma can decide if a bounce buffer
1996 		 * is needed for particular segment. There is callback,
1997 		 * "bus_dma_filter_t *filter", but the parrameters are not
1998 		 * sufficient for storvsc driver.
1999 		 * TODO:
2000 		 *	Add page alignment check in BUS_DMA(9) callback. Once
2001 		 *	this is complete, switch the following code to use
2002 		 *	BUS_DMA(9) for storvsc bounce buffer support.
2003 		 */
2004 		/* check if we need to create bounce buffer */
2005 		ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist,
2006 		    storvsc_sg_count, &not_aligned_seg_bits);
2007 		if (ret != -1) {
2008 			reqp->bounce_sgl =
2009 			    storvsc_create_bounce_buffer(storvsc_sg_count,
2010 			    reqp->vstor_packet.u.vm_srb.data_in);
2011 			if (NULL == reqp->bounce_sgl) {
2012 				printf("Storvsc_error: "
2013 				    "create bounce buffer failed.\n");
2014 				return (ENOMEM);
2015 			}
2016 
2017 			reqp->bounce_sgl_count = storvsc_sg_count;
2018 			reqp->not_aligned_seg_bits = not_aligned_seg_bits;
2019 
2020 			/*
2021 			 * if it is write, we need copy the original data
2022 			 *to bounce buffer
2023 			 */
2024 			if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
2025 				storvsc_copy_sgl_to_bounce_buf(
2026 				    reqp->bounce_sgl,
2027 				    storvsc_sglist,
2028 				    storvsc_sg_count,
2029 				    reqp->not_aligned_seg_bits);
2030 			}
2031 
2032 			/* transfer virtual address to physical frame number */
2033 			if (reqp->not_aligned_seg_bits & 0x1){
2034  				phys_addr =
2035 				    vtophys(reqp->bounce_sgl->sg_iov[0].iov_base);
2036 			}else{
2037  				phys_addr =
2038 					vtophys(storvsc_sglist[0].ds_addr);
2039 			}
2040 			prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK;
2041 
2042 			pfn = phys_addr >> PAGE_SHIFT;
2043 			prplist->gpa_page[0] = pfn;
2044 
2045 			for (i = 1; i < storvsc_sg_count; i++) {
2046 				if (reqp->not_aligned_seg_bits & (1 << i)) {
2047 					phys_addr =
2048 					    vtophys(reqp->bounce_sgl->sg_iov[i].iov_base);
2049 				} else {
2050 					phys_addr =
2051 					    vtophys(storvsc_sglist[i].ds_addr);
2052 				}
2053 
2054 				pfn = phys_addr >> PAGE_SHIFT;
2055 				prplist->gpa_page[i] = pfn;
2056 			}
2057 			reqp->prp_cnt = i;
2058 		} else {
2059 			phys_addr = vtophys(storvsc_sglist[0].ds_addr);
2060 
2061 			prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK;
2062 
2063 			for (i = 0; i < storvsc_sg_count; i++) {
2064 				phys_addr = vtophys(storvsc_sglist[i].ds_addr);
2065 				pfn = phys_addr >> PAGE_SHIFT;
2066 				prplist->gpa_page[i] = pfn;
2067 			}
2068 			reqp->prp_cnt = i;
2069 
2070 			/* check the last segment cross boundary or not */
2071 			offset = phys_addr & PAGE_MASK;
2072 			if (offset) {
2073 				/* Add one more PRP entry */
2074 				phys_addr =
2075 				    vtophys(storvsc_sglist[i-1].ds_addr +
2076 				    PAGE_SIZE - offset);
2077 				pfn = phys_addr >> PAGE_SHIFT;
2078 				prplist->gpa_page[i] = pfn;
2079 				reqp->prp_cnt++;
2080 			}
2081 
2082 			reqp->bounce_sgl_count = 0;
2083 		}
2084 		reqp->softc->sysctl_data.data_sg_cnt++;
2085 		break;
2086 	}
2087 	default:
2088 		printf("Unknow flags: %d\n", ccb->ccb_h.flags);
2089 		return(EINVAL);
2090 	}
2091 
2092 	return(0);
2093 }
2094 
2095 static uint32_t
2096 is_scsi_valid(const struct scsi_inquiry_data *inq_data)
2097 {
2098 	u_int8_t type;
2099 
2100 	type = SID_TYPE(inq_data);
2101 	if (type == T_NODEVICE)
2102 		return (0);
2103 	if (SID_QUAL(inq_data) == SID_QUAL_BAD_LU)
2104 		return (0);
2105 	return (1);
2106 }
2107 
2108 /**
2109  * @brief completion function before returning to CAM
2110  *
2111  * I/O process has been completed and the result needs
2112  * to be passed to the CAM layer.
2113  * Free resources related to this request.
2114  *
2115  * @param reqp pointer to a request structure
2116  */
2117 static void
2118 storvsc_io_done(struct hv_storvsc_request *reqp)
2119 {
2120 	union ccb *ccb = reqp->ccb;
2121 	struct ccb_scsiio *csio = &ccb->csio;
2122 	struct storvsc_softc *sc = reqp->softc;
2123 	struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb;
2124 	bus_dma_segment_t *ori_sglist = NULL;
2125 	int ori_sg_count = 0;
2126 	const struct scsi_generic *cmd;
2127 
2128 	if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) {
2129 		bus_dmasync_op_t op;
2130 
2131 		if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN)
2132 			op = BUS_DMASYNC_POSTREAD;
2133 		else
2134 			op = BUS_DMASYNC_POSTWRITE;
2135 
2136 		bus_dmamap_sync(reqp->softc->storvsc_req_dtag,
2137 		    reqp->data_dmap, op);
2138 		bus_dmamap_unload(sc->storvsc_req_dtag, reqp->data_dmap);
2139 	}
2140 
2141 	/* destroy bounce buffer if it is used */
2142 	if (reqp->bounce_sgl_count) {
2143 		ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr;
2144 		ori_sg_count = ccb->csio.sglist_cnt;
2145 
2146 		/*
2147 		 * If it is READ operation, we should copy back the data
2148 		 * to original SG list.
2149 		 */
2150 		if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
2151 			storvsc_copy_from_bounce_buf_to_sgl(ori_sglist,
2152 			    ori_sg_count,
2153 			    reqp->bounce_sgl,
2154 			    reqp->not_aligned_seg_bits);
2155 		}
2156 
2157 		storvsc_destroy_bounce_buffer(reqp->bounce_sgl);
2158 		reqp->bounce_sgl_count = 0;
2159 	}
2160 
2161 	if (reqp->retries > 0) {
2162 		mtx_lock(&sc->hs_lock);
2163 #if HVS_TIMEOUT_TEST
2164 		xpt_print(ccb->ccb_h.path,
2165 			"%u: IO returned after timeout, "
2166 			"waking up timer handler if any.\n", ticks);
2167 		mtx_lock(&reqp->event.mtx);
2168 		cv_signal(&reqp->event.cv);
2169 		mtx_unlock(&reqp->event.mtx);
2170 #endif
2171 		reqp->retries = 0;
2172 		xpt_print(ccb->ccb_h.path,
2173 			"%u: IO returned after timeout, "
2174 			"stopping timer if any.\n", ticks);
2175 		mtx_unlock(&sc->hs_lock);
2176 	}
2177 
2178 #ifdef notyet
2179 	/*
2180 	 * callout_drain() will wait for the timer handler to finish
2181 	 * if it is running. So we don't need any lock to synchronize
2182 	 * between this routine and the timer handler.
2183 	 * Note that we need to make sure reqp is not freed when timer
2184 	 * handler is using or will use it.
2185 	 */
2186 	if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
2187 		callout_drain(&reqp->callout);
2188 	}
2189 #endif
2190 	cmd = (const struct scsi_generic *)
2191 	    ((ccb->ccb_h.flags & CAM_CDB_POINTER) ?
2192 	     csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes);
2193 
2194 	ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
2195 	ccb->ccb_h.status &= ~CAM_STATUS_MASK;
2196 	int srb_status = SRB_STATUS(vm_srb->srb_status);
2197 #ifdef DIAGNOSTIC
2198 	if (hv_storvsc_srb_status != -1) {
2199 		srb_status = SRB_STATUS(hv_storvsc_srb_status & 0x3f);
2200 		hv_storvsc_srb_status = -1;
2201 	}
2202 #endif /* DIAGNOSTIC */
2203 	if (vm_srb->scsi_status == SCSI_STATUS_OK) {
2204 		if (srb_status != SRB_STATUS_SUCCESS) {
2205 			bool log_error = true;
2206 			switch (srb_status) {
2207 				case SRB_STATUS_PENDING:
2208 					/* We should never get this */
2209 					panic("storvsc_io_done: SRB_STATUS_PENDING");
2210 					break;
2211 				case SRB_STATUS_ABORTED:
2212 					/*
2213 					 * storvsc doesn't support aborts yet
2214 					 * but if we ever get this status
2215 					 * the I/O is complete - treat it as a
2216 					 * timeout
2217 					 */
2218 					ccb->ccb_h.status |= CAM_CMD_TIMEOUT;
2219 					break;
2220 				case SRB_STATUS_ABORT_FAILED:
2221 					/* We should never get this */
2222 					panic("storvsc_io_done: SRB_STATUS_ABORT_FAILED");
2223 					break;
2224 				case SRB_STATUS_ERROR:
2225 					/*
2226 					 * We should never get this.
2227 					 * Treat it as a CAM_UNREC_HBA_ERROR.
2228 					 * It will be retried
2229 					 */
2230 					ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
2231 					break;
2232 				case SRB_STATUS_BUSY:
2233 					/* Host is busy. Delay and retry */
2234 					ccb->ccb_h.status |= CAM_BUSY;
2235 					break;
2236 				case SRB_STATUS_INVALID_REQUEST:
2237 				case SRB_STATUS_INVALID_PATH_ID:
2238 				case SRB_STATUS_NO_DEVICE:
2239 				case SRB_STATUS_INVALID_TARGET_ID:
2240 					/*
2241 					 * These indicate an invalid address
2242 					 * and really should never be seen.
2243 					 * A CAM_PATH_INVALID could be
2244 					 * used here but I want to run
2245 					 * down retries.  Do a CAM_BUSY
2246 					 * since the host might be having issues.
2247 					 */
2248 					ccb->ccb_h.status |= CAM_BUSY;
2249 					break;
2250 				case SRB_STATUS_TIMEOUT:
2251 				case SRB_STATUS_COMMAND_TIMEOUT:
2252 					/* The backend has timed this out */
2253 					ccb->ccb_h.status |= CAM_BUSY;
2254 					break;
2255 				/* Some old pSCSI errors below */
2256 				case SRB_STATUS_SELECTION_TIMEOUT:
2257 				case SRB_STATUS_MESSAGE_REJECTED:
2258 				case SRB_STATUS_PARITY_ERROR:
2259 				case SRB_STATUS_NO_HBA:
2260 				case SRB_STATUS_DATA_OVERRUN:
2261 				case SRB_STATUS_UNEXPECTED_BUS_FREE:
2262 				case SRB_STATUS_PHASE_SEQUENCE_FAILURE:
2263 					/*
2264 					 * Old pSCSI responses, should never get.
2265 					 * If we do treat as a CAM_UNREC_HBA_ERROR
2266 					 * which will be retried
2267 					 */
2268 					ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
2269 					break;
2270 				case SRB_STATUS_BUS_RESET:
2271 					ccb->ccb_h.status |= CAM_SCSI_BUS_RESET;
2272 					break;
2273 				case SRB_STATUS_BAD_SRB_BLOCK_LENGTH:
2274 					/*
2275 					 * The request block is malformed and
2276 					 * I doubt it is from the guest. Just retry.
2277 					 */
2278 					ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
2279 					break;
2280 				/* Not used statuses just retry */
2281 				case SRB_STATUS_REQUEST_FLUSHED:
2282 				case SRB_STATUS_BAD_FUNCTION:
2283 				case SRB_STATUS_NOT_POWERED:
2284 					ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
2285 					break;
2286 				case SRB_STATUS_INVALID_LUN:
2287 					/*
2288 					 * Don't log an EMS for this response since
2289 					 * there is no device at this LUN. This is a
2290 					 * normal and expected response when a device
2291 					 * is detached.
2292 					 */
2293 					ccb->ccb_h.status |= CAM_DEV_NOT_THERE;
2294 					log_error = false;
2295 					break;
2296 				case SRB_STATUS_ERROR_RECOVERY:
2297 				case SRB_STATUS_LINK_DOWN:
2298 					/*
2299 					 * I don't ever expect these from
2300 					 * the host but if we ever get
2301 					 * retry after a delay
2302 					 */
2303 					ccb->ccb_h.status |= CAM_BUSY;
2304 					break;
2305 				default:
2306 					/*
2307 					 * An undefined response assert on
2308 					 * on debug builds else retry
2309 					 */
2310 					ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
2311 					KASSERT(srb_status <= SRB_STATUS_LINK_DOWN,
2312 					    ("storvsc: %s, unexpected srb_status of 0x%x",
2313 					    __func__, srb_status));
2314 					break;
2315 			}
2316 			if (log_error) {
2317 				xpt_print(ccb->ccb_h.path, "The hypervisor's I/O adapter "
2318 					"driver received an unexpected response code 0x%x "
2319 					"for operation: %s. If this continues to occur, "
2320 					"report the condition to your hypervisor vendor so "
2321 					"they can rectify the issue.\n", srb_status,
2322 					scsi_op_desc(cmd->opcode, NULL));
2323 			}
2324 		} else {
2325 			ccb->ccb_h.status |= CAM_REQ_CMP;
2326 		}
2327 
2328 		if (cmd->opcode == INQUIRY &&
2329 		    srb_status == SRB_STATUS_SUCCESS) {
2330 			int resp_xfer_len, resp_buf_len, data_len;
2331 			uint8_t *resp_buf = (uint8_t *)csio->data_ptr;
2332 			struct scsi_inquiry_data *inq_data =
2333 			    (struct scsi_inquiry_data *)csio->data_ptr;
2334 
2335 			/* Get the buffer length reported by host */
2336 			resp_xfer_len = vm_srb->transfer_len;
2337 
2338 			/* Get the available buffer length */
2339 			resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0;
2340 			data_len = (resp_buf_len < resp_xfer_len) ?
2341 			    resp_buf_len : resp_xfer_len;
2342 			if (bootverbose && data_len >= 5) {
2343 				xpt_print(ccb->ccb_h.path, "storvsc inquiry "
2344 				    "(%d) [%x %x %x %x %x ... ]\n", data_len,
2345 				    resp_buf[0], resp_buf[1], resp_buf[2],
2346 				    resp_buf[3], resp_buf[4]);
2347 			}
2348 			/*
2349 			 * XXX: Hyper-V (since win2012r2) responses inquiry with
2350 			 * unknown version (0) for GEN-2 DVD device.
2351 			 * Manually set the version number to SPC3 in order to
2352 			 * ask CAM to continue probing with "PROBE_REPORT_LUNS".
2353 			 * see probedone() in scsi_xpt.c
2354 			 */
2355 			if (SID_TYPE(inq_data) == T_CDROM &&
2356 			    inq_data->version == 0 &&
2357 			    (vmstor_proto_version >= VMSTOR_PROTOCOL_VERSION_WIN8)) {
2358 				inq_data->version = SCSI_REV_SPC3;
2359 				if (bootverbose) {
2360 					xpt_print(ccb->ccb_h.path,
2361 					    "set version from 0 to %d\n",
2362 					    inq_data->version);
2363 				}
2364 			}
2365 			/*
2366 			 * XXX: Manually fix the wrong response returned from WS2012
2367 			 */
2368 			if (!is_scsi_valid(inq_data) &&
2369 			    (vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
2370 			    vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8 ||
2371 			    vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN7)) {
2372 				if (data_len >= 4 &&
2373 				    (resp_buf[2] == 0 || resp_buf[3] == 0)) {
2374 					resp_buf[2] = SCSI_REV_SPC3;
2375 					resp_buf[3] = 2; // resp fmt must be 2
2376 					if (bootverbose)
2377 						xpt_print(ccb->ccb_h.path,
2378 						    "fix version and resp fmt for 0x%x\n",
2379 						    vmstor_proto_version);
2380 				}
2381 			} else if (data_len >= SHORT_INQUIRY_LENGTH) {
2382 				char vendor[16];
2383 
2384 				cam_strvis(vendor, inq_data->vendor,
2385 				    sizeof(inq_data->vendor), sizeof(vendor));
2386 				/*
2387 				 * XXX: Upgrade SPC2 to SPC3 if host is WIN8 or
2388 				 * WIN2012 R2 in order to support UNMAP feature.
2389 				 */
2390 				if (!strncmp(vendor, "Msft", 4) &&
2391 				    SID_ANSI_REV(inq_data) == SCSI_REV_SPC2 &&
2392 				    (vmstor_proto_version ==
2393 				     VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
2394 				     vmstor_proto_version ==
2395 				     VMSTOR_PROTOCOL_VERSION_WIN8)) {
2396 					inq_data->version = SCSI_REV_SPC3;
2397 					if (bootverbose) {
2398 						xpt_print(ccb->ccb_h.path,
2399 						    "storvsc upgrades "
2400 						    "SPC2 to SPC3\n");
2401 					}
2402 				}
2403 			}
2404 		}
2405 	} else {
2406 		/**
2407 		 * On Some Windows hosts TEST_UNIT_READY command can return
2408 		 * SRB_STATUS_ERROR and sense data, for example, asc=0x3a,1
2409 		 * "(Medium not present - tray closed)". This error can be
2410 		 * ignored since it will be sent to host periodically.
2411 		 */
2412 		boolean_t unit_not_ready = \
2413 		    vm_srb->scsi_status == SCSI_STATUS_CHECK_COND &&
2414 		    cmd->opcode == TEST_UNIT_READY &&
2415 		    srb_status == SRB_STATUS_ERROR;
2416 		if (!unit_not_ready && bootverbose) {
2417 			mtx_lock(&sc->hs_lock);
2418 			xpt_print(ccb->ccb_h.path,
2419 				"storvsc scsi_status = %d, srb_status = %d\n",
2420 				vm_srb->scsi_status, srb_status);
2421 			mtx_unlock(&sc->hs_lock);
2422 		}
2423 		ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR;
2424 	}
2425 
2426 	ccb->csio.scsi_status = (vm_srb->scsi_status & 0xFF);
2427 	if (srb_status == SRB_STATUS_SUCCESS ||
2428 	    srb_status == SRB_STATUS_DATA_OVERRUN)
2429 		ccb->csio.resid = ccb->csio.dxfer_len - vm_srb->transfer_len;
2430 	else
2431 		ccb->csio.resid = ccb->csio.dxfer_len;
2432 
2433 	if ((vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) != 0 &&
2434 	    reqp->sense_info_len != 0) {
2435 		csio->sense_resid = csio->sense_len - reqp->sense_info_len;
2436 		ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
2437 	}
2438 
2439 	mtx_lock(&sc->hs_lock);
2440 	if (reqp->softc->hs_frozen == 1) {
2441 		xpt_print(ccb->ccb_h.path,
2442 			"%u: storvsc unfreezing softc 0x%p.\n",
2443 			ticks, reqp->softc);
2444 		ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
2445 		reqp->softc->hs_frozen = 0;
2446 	}
2447 	storvsc_free_request(sc, reqp);
2448 	mtx_unlock(&sc->hs_lock);
2449 
2450 	xpt_done_direct(ccb);
2451 }
2452 
2453 /**
2454  * @brief Free a request structure
2455  *
2456  * Free a request structure by returning it to the free list
2457  *
2458  * @param sc pointer to a softc
2459  * @param reqp pointer to a request structure
2460  */
2461 static void
2462 storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp)
2463 {
2464 
2465 	LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
2466 }
2467 
2468 /**
2469  * @brief Determine type of storage device from GUID
2470  *
2471  * Using the type GUID, determine if this is a StorVSC (paravirtual
2472  * SCSI or BlkVSC (paravirtual IDE) device.
2473  *
2474  * @param dev a device
2475  * returns an enum
2476  */
2477 static enum hv_storage_type
2478 storvsc_get_storage_type(device_t dev)
2479 {
2480 	device_t parent = device_get_parent(dev);
2481 
2482 	if (VMBUS_PROBE_GUID(parent, dev, &gBlkVscDeviceType) == 0)
2483 		return DRIVER_BLKVSC;
2484 	if (VMBUS_PROBE_GUID(parent, dev, &gStorVscDeviceType) == 0)
2485 		return DRIVER_STORVSC;
2486 	return DRIVER_UNKNOWN;
2487 }
2488 
2489 #define	PCI_VENDOR_INTEL	0x8086
2490 #define	PCI_PRODUCT_PIIX4	0x7111
2491 
2492 static void
2493 storvsc_ada_probe_veto(void *arg __unused, struct cam_path *path,
2494     struct ata_params *ident_buf __unused, int *veto)
2495 {
2496 
2497 	/*
2498 	 * The ATA disks are shared with the controllers managed
2499 	 * by this driver, so veto the ATA disks' attachment; the
2500 	 * ATA disks will be attached as SCSI disks once this driver
2501 	 * attached.
2502 	 */
2503 	if (path->device->protocol == PROTO_ATA) {
2504 		struct ccb_pathinq cpi;
2505 
2506 		xpt_path_inq(&cpi, path);
2507 		if (cpi.ccb_h.status == CAM_REQ_CMP &&
2508 		    cpi.hba_vendor == PCI_VENDOR_INTEL &&
2509 		    cpi.hba_device == PCI_PRODUCT_PIIX4) {
2510 			(*veto)++;
2511 			if (bootverbose) {
2512 				xpt_print(path,
2513 				    "Disable ATA disks on "
2514 				    "simulated ATA controller (0x%04x%04x)\n",
2515 				    cpi.hba_device, cpi.hba_vendor);
2516 			}
2517 		}
2518 	}
2519 }
2520 
2521 static void
2522 storvsc_sysinit(void *arg __unused)
2523 {
2524 	if (vm_guest == VM_GUEST_HV) {
2525 		storvsc_handler_tag = EVENTHANDLER_REGISTER(ada_probe_veto,
2526 		    storvsc_ada_probe_veto, NULL, EVENTHANDLER_PRI_ANY);
2527 	}
2528 }
2529 SYSINIT(storvsc_sys_init, SI_SUB_DRIVERS, SI_ORDER_SECOND, storvsc_sysinit,
2530     NULL);
2531 
2532 static void
2533 storvsc_sysuninit(void *arg __unused)
2534 {
2535 	if (storvsc_handler_tag != NULL)
2536 		EVENTHANDLER_DEREGISTER(ada_probe_veto, storvsc_handler_tag);
2537 }
2538 SYSUNINIT(storvsc_sys_uninit, SI_SUB_DRIVERS, SI_ORDER_SECOND,
2539     storvsc_sysuninit, NULL);
2540