1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
5  * Copyright (c) 2012 NetApp Inc.
6  * Copyright (c) 2012 Citrix Inc.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice unmodified, this list of conditions, and the following
14  *    disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 /**
32  * StorVSC driver for Hyper-V.  This driver presents a SCSI HBA interface
33  * to the Comman Access Method (CAM) layer.  CAM control blocks (CCBs) are
34  * converted into VSCSI protocol messages which are delivered to the parent
35  * partition StorVSP driver over the Hyper-V VMBUS.
36  */
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include <sys/param.h>
41 #include <sys/proc.h>
42 #include <sys/condvar.h>
43 #include <sys/time.h>
44 #include <sys/systm.h>
45 #include <sys/sysctl.h>
46 #include <sys/sockio.h>
47 #include <sys/mbuf.h>
48 #include <sys/malloc.h>
49 #include <sys/module.h>
50 #include <sys/kernel.h>
51 #include <sys/queue.h>
52 #include <sys/lock.h>
53 #include <sys/sx.h>
54 #include <sys/taskqueue.h>
55 #include <sys/bus.h>
56 #include <sys/mutex.h>
57 #include <sys/callout.h>
58 #include <sys/smp.h>
59 #include <vm/vm.h>
60 #include <vm/pmap.h>
61 #include <vm/uma.h>
62 #include <sys/lock.h>
63 #include <sys/sema.h>
64 #include <sys/sglist.h>
65 #include <sys/eventhandler.h>
66 #include <machine/bus.h>
67 
68 #include <cam/cam.h>
69 #include <cam/cam_ccb.h>
70 #include <cam/cam_periph.h>
71 #include <cam/cam_sim.h>
72 #include <cam/cam_xpt_sim.h>
73 #include <cam/cam_xpt_internal.h>
74 #include <cam/cam_debug.h>
75 #include <cam/scsi/scsi_all.h>
76 #include <cam/scsi/scsi_message.h>
77 
78 #include <dev/hyperv/include/hyperv.h>
79 #include <dev/hyperv/include/vmbus.h>
80 #include "hv_vstorage.h"
81 #include "vmbus_if.h"
82 
83 #define STORVSC_MAX_LUNS_PER_TARGET	(64)
84 #define STORVSC_MAX_IO_REQUESTS		(STORVSC_MAX_LUNS_PER_TARGET * 2)
85 #define BLKVSC_MAX_IDE_DISKS_PER_TARGET	(1)
86 #define BLKVSC_MAX_IO_REQUESTS		STORVSC_MAX_IO_REQUESTS
87 #define STORVSC_MAX_TARGETS		(2)
88 
89 #define VSTOR_PKT_SIZE	(sizeof(struct vstor_packet) - vmscsi_size_delta)
90 
91 /*
92  * 33 segments are needed to allow 128KB maxio, in case the data
93  * in the first page is _not_ PAGE_SIZE aligned, e.g.
94  *
95  *     |<----------- 128KB ----------->|
96  *     |                               |
97  *  0  2K 4K    8K   16K   124K  128K  130K
98  *  |  |  |     |     |       |     |  |
99  *  +--+--+-----+-----+.......+-----+--+--+
100  *  |  |  |     |     |       |     |  |  | DATA
101  *  |  |  |     |     |       |     |  |  |
102  *  +--+--+-----+-----+.......------+--+--+
103  *     |  |                         |  |
104  *     | 1|            31           | 1| ...... # of segments
105  */
106 #define STORVSC_DATA_SEGCNT_MAX		33
107 #define STORVSC_DATA_SEGSZ_MAX		PAGE_SIZE
108 #define STORVSC_DATA_SIZE_MAX		\
109 	((STORVSC_DATA_SEGCNT_MAX - 1) * STORVSC_DATA_SEGSZ_MAX)
110 
111 struct storvsc_softc;
112 
113 struct hv_sgl_node {
114 	LIST_ENTRY(hv_sgl_node) link;
115 	struct sglist *sgl_data;
116 };
117 
118 struct hv_sgl_page_pool{
119 	LIST_HEAD(, hv_sgl_node) in_use_sgl_list;
120 	LIST_HEAD(, hv_sgl_node) free_sgl_list;
121 	boolean_t                is_init;
122 } g_hv_sgl_page_pool;
123 
124 enum storvsc_request_type {
125 	WRITE_TYPE,
126 	READ_TYPE,
127 	UNKNOWN_TYPE
128 };
129 
130 SYSCTL_NODE(_hw, OID_AUTO, storvsc, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
131 	"Hyper-V storage interface");
132 
133 static u_int hv_storvsc_use_win8ext_flags = 1;
134 SYSCTL_UINT(_hw_storvsc, OID_AUTO, use_win8ext_flags, CTLFLAG_RW,
135 	&hv_storvsc_use_win8ext_flags, 0,
136 	"Use win8 extension flags or not");
137 
138 static u_int hv_storvsc_use_pim_unmapped = 1;
139 SYSCTL_UINT(_hw_storvsc, OID_AUTO, use_pim_unmapped, CTLFLAG_RDTUN,
140 	&hv_storvsc_use_pim_unmapped, 0,
141 	"Optimize storvsc by using unmapped I/O");
142 
143 static u_int hv_storvsc_ringbuffer_size = (64 * PAGE_SIZE);
144 SYSCTL_UINT(_hw_storvsc, OID_AUTO, ringbuffer_size, CTLFLAG_RDTUN,
145 	&hv_storvsc_ringbuffer_size, 0, "Hyper-V storage ringbuffer size");
146 
147 static u_int hv_storvsc_max_io = 512;
148 SYSCTL_UINT(_hw_storvsc, OID_AUTO, max_io, CTLFLAG_RDTUN,
149 	&hv_storvsc_max_io, 0, "Hyper-V storage max io limit");
150 
151 static int hv_storvsc_chan_cnt = 0;
152 SYSCTL_INT(_hw_storvsc, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
153 	&hv_storvsc_chan_cnt, 0, "# of channels to use");
154 
155 #define STORVSC_MAX_IO						\
156 	vmbus_chan_prplist_nelem(hv_storvsc_ringbuffer_size,	\
157 	   STORVSC_DATA_SEGCNT_MAX, VSTOR_PKT_SIZE)
158 
159 struct hv_storvsc_sysctl {
160 	u_long		data_bio_cnt;
161 	u_long		data_vaddr_cnt;
162 	u_long		data_sg_cnt;
163 	u_long		chan_send_cnt[MAXCPU];
164 };
165 
166 struct storvsc_gpa_range {
167 	struct vmbus_gpa_range	gpa_range;
168 	uint64_t		gpa_page[STORVSC_DATA_SEGCNT_MAX];
169 } __packed;
170 
171 struct hv_storvsc_request {
172 	LIST_ENTRY(hv_storvsc_request)	link;
173 	struct vstor_packet		vstor_packet;
174 	int				prp_cnt;
175 	struct storvsc_gpa_range	prp_list;
176 	void				*sense_data;
177 	uint8_t				sense_info_len;
178 	uint8_t				retries;
179 	union ccb			*ccb;
180 	struct storvsc_softc		*softc;
181 	struct callout			callout;
182 	struct sema			synch_sema; /*Synchronize the request/response if needed */
183 	struct sglist			*bounce_sgl;
184 	unsigned int			bounce_sgl_count;
185 	uint64_t			not_aligned_seg_bits;
186 	bus_dmamap_t			data_dmap;
187 };
188 
189 struct storvsc_softc {
190 	struct vmbus_channel		*hs_chan;
191 	LIST_HEAD(, hv_storvsc_request)	hs_free_list;
192 	struct mtx			hs_lock;
193 	struct storvsc_driver_props	*hs_drv_props;
194 	int 				hs_unit;
195 	uint32_t			hs_frozen;
196 	struct cam_sim			*hs_sim;
197 	struct cam_path 		*hs_path;
198 	uint32_t			hs_num_out_reqs;
199 	boolean_t			hs_destroy;
200 	boolean_t			hs_drain_notify;
201 	struct sema 			hs_drain_sema;
202 	struct hv_storvsc_request	hs_init_req;
203 	struct hv_storvsc_request	hs_reset_req;
204 	device_t			hs_dev;
205 	bus_dma_tag_t			storvsc_req_dtag;
206 	struct hv_storvsc_sysctl	sysctl_data;
207 	uint32_t			hs_nchan;
208 	struct vmbus_channel		*hs_sel_chan[MAXCPU];
209 };
210 
211 static eventhandler_tag storvsc_handler_tag;
212 /*
213  * The size of the vmscsi_request has changed in win8. The
214  * additional size is for the newly added elements in the
215  * structure. These elements are valid only when we are talking
216  * to a win8 host.
217  * Track the correct size we need to apply.
218  */
219 static int vmscsi_size_delta = sizeof(struct vmscsi_win8_extension);
220 
221 /**
222  * HyperV storvsc timeout testing cases:
223  * a. IO returned after first timeout;
224  * b. IO returned after second timeout and queue freeze;
225  * c. IO returned while timer handler is running
226  * The first can be tested by "sg_senddiag -vv /dev/daX",
227  * and the second and third can be done by
228  * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX".
229  */
230 #define HVS_TIMEOUT_TEST 0
231 
232 /*
233  * Bus/adapter reset functionality on the Hyper-V host is
234  * buggy and it will be disabled until
235  * it can be further tested.
236  */
237 #define HVS_HOST_RESET 0
238 
239 struct storvsc_driver_props {
240 	char		*drv_name;
241 	char		*drv_desc;
242 	uint8_t		drv_max_luns_per_target;
243 	uint32_t	drv_max_ios_per_target;
244 	uint32_t	drv_ringbuffer_size;
245 };
246 
247 enum hv_storage_type {
248 	DRIVER_BLKVSC,
249 	DRIVER_STORVSC,
250 	DRIVER_UNKNOWN
251 };
252 
253 #define HS_MAX_ADAPTERS 10
254 
255 #define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1
256 
257 /* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */
258 static const struct hyperv_guid gStorVscDeviceType={
259 	.hv_guid = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
260 		 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f}
261 };
262 
263 /* {32412632-86cb-44a2-9b5c-50d1417354f5} */
264 static const struct hyperv_guid gBlkVscDeviceType={
265 	.hv_guid = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,
266 		 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5}
267 };
268 
269 static struct storvsc_driver_props g_drv_props_table[] = {
270 	{"blkvsc", "Hyper-V IDE",
271 	 BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS,
272 	 20*PAGE_SIZE},
273 	{"storvsc", "Hyper-V SCSI",
274 	 STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS,
275 	 20*PAGE_SIZE}
276 };
277 
278 /*
279  * Sense buffer size changed in win8; have a run-time
280  * variable to track the size we should use.
281  */
282 static int sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE;
283 
284 /*
285  * The storage protocol version is determined during the
286  * initial exchange with the host.  It will indicate which
287  * storage functionality is available in the host.
288 */
289 static int vmstor_proto_version;
290 
291 struct vmstor_proto {
292         int proto_version;
293         int sense_buffer_size;
294         int vmscsi_size_delta;
295 };
296 
297 static const struct vmstor_proto vmstor_proto_list[] = {
298         {
299                 VMSTOR_PROTOCOL_VERSION_WIN10,
300                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
301                 0
302         },
303         {
304                 VMSTOR_PROTOCOL_VERSION_WIN8_1,
305                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
306                 0
307         },
308         {
309                 VMSTOR_PROTOCOL_VERSION_WIN8,
310                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
311                 0
312         },
313         {
314                 VMSTOR_PROTOCOL_VERSION_WIN7,
315                 PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
316                 sizeof(struct vmscsi_win8_extension),
317         },
318         {
319                 VMSTOR_PROTOCOL_VERSION_WIN6,
320                 PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
321                 sizeof(struct vmscsi_win8_extension),
322         }
323 };
324 
325 /* static functions */
326 static int storvsc_probe(device_t dev);
327 static int storvsc_attach(device_t dev);
328 static int storvsc_detach(device_t dev);
329 static void storvsc_poll(struct cam_sim * sim);
330 static void storvsc_action(struct cam_sim * sim, union ccb * ccb);
331 static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
332 static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp);
333 static enum hv_storage_type storvsc_get_storage_type(device_t dev);
334 static void hv_storvsc_rescan_target(struct storvsc_softc *sc);
335 static void hv_storvsc_on_channel_callback(struct vmbus_channel *chan, void *xsc);
336 static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc,
337 					struct vstor_packet *vstor_packet,
338 					struct hv_storvsc_request *request);
339 static int hv_storvsc_connect_vsp(struct storvsc_softc *);
340 static void storvsc_io_done(struct hv_storvsc_request *reqp);
341 static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
342 				bus_dma_segment_t *orig_sgl,
343 				unsigned int orig_sgl_count,
344 				uint64_t seg_bits);
345 void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
346 				unsigned int dest_sgl_count,
347 				struct sglist* src_sgl,
348 				uint64_t seg_bits);
349 
350 static device_method_t storvsc_methods[] = {
351 	/* Device interface */
352 	DEVMETHOD(device_probe,		storvsc_probe),
353 	DEVMETHOD(device_attach,	storvsc_attach),
354 	DEVMETHOD(device_detach,	storvsc_detach),
355 	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
356 	DEVMETHOD_END
357 };
358 
359 static driver_t storvsc_driver = {
360 	"storvsc", storvsc_methods, sizeof(struct storvsc_softc),
361 };
362 
363 static devclass_t storvsc_devclass;
364 DRIVER_MODULE(storvsc, vmbus, storvsc_driver, storvsc_devclass, 0, 0);
365 MODULE_VERSION(storvsc, 1);
366 MODULE_DEPEND(storvsc, vmbus, 1, 1, 1);
367 
368 static void
369 storvsc_subchan_attach(struct storvsc_softc *sc,
370     struct vmbus_channel *new_channel)
371 {
372 	struct vmstor_chan_props props;
373 	int ret = 0;
374 
375 	memset(&props, 0, sizeof(props));
376 
377 	vmbus_chan_cpu_rr(new_channel);
378 	ret = vmbus_chan_open(new_channel,
379 	    sc->hs_drv_props->drv_ringbuffer_size,
380   	    sc->hs_drv_props->drv_ringbuffer_size,
381 	    (void *)&props,
382 	    sizeof(struct vmstor_chan_props),
383 	    hv_storvsc_on_channel_callback, sc);
384 }
385 
386 /**
387  * @brief Send multi-channel creation request to host
388  *
389  * @param device  a Hyper-V device pointer
390  * @param max_chans  the max channels supported by vmbus
391  */
392 static void
393 storvsc_send_multichannel_request(struct storvsc_softc *sc, int max_subch)
394 {
395 	struct vmbus_channel **subchan;
396 	struct hv_storvsc_request *request;
397 	struct vstor_packet *vstor_packet;
398 	int request_subch;
399 	int ret, i;
400 
401 	/* get sub-channel count that need to create */
402 	request_subch = MIN(max_subch, mp_ncpus - 1);
403 
404 	request = &sc->hs_init_req;
405 
406 	/* request the host to create multi-channel */
407 	memset(request, 0, sizeof(struct hv_storvsc_request));
408 
409 	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
410 
411 	vstor_packet = &request->vstor_packet;
412 
413 	vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS;
414 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
415 	vstor_packet->u.multi_channels_cnt = request_subch;
416 
417 	ret = vmbus_chan_send(sc->hs_chan,
418 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
419 	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
420 
421 	sema_wait(&request->synch_sema);
422 
423 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
424 	    vstor_packet->status != 0) {
425 		printf("Storvsc_error: create multi-channel invalid operation "
426 		    "(%d) or statue (%u)\n",
427 		    vstor_packet->operation, vstor_packet->status);
428 		return;
429 	}
430 
431 	/* Update channel count */
432 	sc->hs_nchan = request_subch + 1;
433 
434 	/* Wait for sub-channels setup to complete. */
435 	subchan = vmbus_subchan_get(sc->hs_chan, request_subch);
436 
437 	/* Attach the sub-channels. */
438 	for (i = 0; i < request_subch; ++i)
439 		storvsc_subchan_attach(sc, subchan[i]);
440 
441 	/* Release the sub-channels. */
442 	vmbus_subchan_rel(subchan, request_subch);
443 
444 	if (bootverbose)
445 		printf("Storvsc create multi-channel success!\n");
446 }
447 
448 /**
449  * @brief initialize channel connection to parent partition
450  *
451  * @param dev  a Hyper-V device pointer
452  * @returns  0 on success, non-zero error on failure
453  */
454 static int
455 hv_storvsc_channel_init(struct storvsc_softc *sc)
456 {
457 	int ret = 0, i;
458 	struct hv_storvsc_request *request;
459 	struct vstor_packet *vstor_packet;
460 	uint16_t max_subch;
461 	boolean_t support_multichannel;
462 	uint32_t version;
463 
464 	max_subch = 0;
465 	support_multichannel = FALSE;
466 
467 	request = &sc->hs_init_req;
468 	memset(request, 0, sizeof(struct hv_storvsc_request));
469 	vstor_packet = &request->vstor_packet;
470 	request->softc = sc;
471 
472 	/**
473 	 * Initiate the vsc/vsp initialization protocol on the open channel
474 	 */
475 	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
476 
477 	vstor_packet->operation = VSTOR_OPERATION_BEGININITIALIZATION;
478 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
479 
480 
481 	ret = vmbus_chan_send(sc->hs_chan,
482 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
483 	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
484 
485 	if (ret != 0)
486 		goto cleanup;
487 
488 	sema_wait(&request->synch_sema);
489 
490 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
491 		vstor_packet->status != 0) {
492 		goto cleanup;
493 	}
494 
495 	for (i = 0; i < nitems(vmstor_proto_list); i++) {
496 		/* reuse the packet for version range supported */
497 
498 		memset(vstor_packet, 0, sizeof(struct vstor_packet));
499 		vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION;
500 		vstor_packet->flags = REQUEST_COMPLETION_FLAG;
501 
502 		vstor_packet->u.version.major_minor =
503 			vmstor_proto_list[i].proto_version;
504 
505 		/* revision is only significant for Windows guests */
506 		vstor_packet->u.version.revision = 0;
507 
508 		ret = vmbus_chan_send(sc->hs_chan,
509 		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
510 		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
511 
512 		if (ret != 0)
513 			goto cleanup;
514 
515 		sema_wait(&request->synch_sema);
516 
517 		if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO) {
518 			ret = EINVAL;
519 			goto cleanup;
520 		}
521 		if (vstor_packet->status == 0) {
522 			vmstor_proto_version =
523 				vmstor_proto_list[i].proto_version;
524 			sense_buffer_size =
525 				vmstor_proto_list[i].sense_buffer_size;
526 			vmscsi_size_delta =
527 				vmstor_proto_list[i].vmscsi_size_delta;
528 			break;
529 		}
530 	}
531 
532 	if (vstor_packet->status != 0) {
533 		ret = EINVAL;
534 		goto cleanup;
535 	}
536 	/**
537 	 * Query channel properties
538 	 */
539 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
540 	vstor_packet->operation = VSTOR_OPERATION_QUERYPROPERTIES;
541 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
542 
543 	ret = vmbus_chan_send(sc->hs_chan,
544 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
545 	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
546 
547 	if ( ret != 0)
548 		goto cleanup;
549 
550 	sema_wait(&request->synch_sema);
551 
552 	/* TODO: Check returned version */
553 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
554 	    vstor_packet->status != 0) {
555 		goto cleanup;
556 	}
557 
558 	max_subch = vstor_packet->u.chan_props.max_channel_cnt;
559 	if (hv_storvsc_chan_cnt > 0 && hv_storvsc_chan_cnt < (max_subch + 1))
560 		max_subch = hv_storvsc_chan_cnt - 1;
561 
562 	/* multi-channels feature is supported by WIN8 and above version */
563 	version = VMBUS_GET_VERSION(device_get_parent(sc->hs_dev), sc->hs_dev);
564 	if (version != VMBUS_VERSION_WIN7 && version != VMBUS_VERSION_WS2008 &&
565 	    (vstor_packet->u.chan_props.flags &
566 	     HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) {
567 		support_multichannel = TRUE;
568 	}
569 	if (bootverbose) {
570 		device_printf(sc->hs_dev, "max chans %d%s\n", max_subch + 1,
571 		    support_multichannel ? ", multi-chan capable" : "");
572 	}
573 
574 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
575 	vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION;
576 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
577 
578 	ret = vmbus_chan_send(sc->hs_chan,
579 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
580 	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
581 
582 	if (ret != 0) {
583 		goto cleanup;
584 	}
585 
586 	sema_wait(&request->synch_sema);
587 
588 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
589 	    vstor_packet->status != 0)
590 		goto cleanup;
591 
592 	/*
593 	 * If multi-channel is supported, send multichannel create
594 	 * request to host.
595 	 */
596 	if (support_multichannel && max_subch > 0)
597 		storvsc_send_multichannel_request(sc, max_subch);
598 cleanup:
599 	sema_destroy(&request->synch_sema);
600 	return (ret);
601 }
602 
603 /**
604  * @brief Open channel connection to paraent partition StorVSP driver
605  *
606  * Open and initialize channel connection to parent partition StorVSP driver.
607  *
608  * @param pointer to a Hyper-V device
609  * @returns 0 on success, non-zero error on failure
610  */
611 static int
612 hv_storvsc_connect_vsp(struct storvsc_softc *sc)
613 {
614 	int ret = 0;
615 	struct vmstor_chan_props props;
616 
617 	memset(&props, 0, sizeof(struct vmstor_chan_props));
618 
619 	/*
620 	 * Open the channel
621 	 */
622 	vmbus_chan_cpu_rr(sc->hs_chan);
623 	ret = vmbus_chan_open(
624 		sc->hs_chan,
625 		sc->hs_drv_props->drv_ringbuffer_size,
626 		sc->hs_drv_props->drv_ringbuffer_size,
627 		(void *)&props,
628 		sizeof(struct vmstor_chan_props),
629 		hv_storvsc_on_channel_callback, sc);
630 
631 	if (ret != 0) {
632 		return ret;
633 	}
634 
635 	ret = hv_storvsc_channel_init(sc);
636 	return (ret);
637 }
638 
639 #if HVS_HOST_RESET
640 static int
641 hv_storvsc_host_reset(struct storvsc_softc *sc)
642 {
643 	int ret = 0;
644 
645 	struct hv_storvsc_request *request;
646 	struct vstor_packet *vstor_packet;
647 
648 	request = &sc->hs_reset_req;
649 	request->softc = sc;
650 	vstor_packet = &request->vstor_packet;
651 
652 	sema_init(&request->synch_sema, 0, "stor synch sema");
653 
654 	vstor_packet->operation = VSTOR_OPERATION_RESETBUS;
655 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
656 
657 	ret = vmbus_chan_send(dev->channel,
658 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
659 	    vstor_packet, VSTOR_PKT_SIZE,
660 	    (uint64_t)(uintptr_t)&sc->hs_reset_req);
661 
662 	if (ret != 0) {
663 		goto cleanup;
664 	}
665 
666 	sema_wait(&request->synch_sema);
667 
668 	/*
669 	 * At this point, all outstanding requests in the adapter
670 	 * should have been flushed out and return to us
671 	 */
672 
673 cleanup:
674 	sema_destroy(&request->synch_sema);
675 	return (ret);
676 }
677 #endif /* HVS_HOST_RESET */
678 
679 /**
680  * @brief Function to initiate an I/O request
681  *
682  * @param device Hyper-V device pointer
683  * @param request pointer to a request structure
684  * @returns 0 on success, non-zero error on failure
685  */
686 static int
687 hv_storvsc_io_request(struct storvsc_softc *sc,
688 					  struct hv_storvsc_request *request)
689 {
690 	struct vstor_packet *vstor_packet = &request->vstor_packet;
691 	struct vmbus_channel* outgoing_channel = NULL;
692 	int ret = 0, ch_sel;
693 
694 	vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
695 
696 	vstor_packet->u.vm_srb.length =
697 	    sizeof(struct vmscsi_req) - vmscsi_size_delta;
698 
699 	vstor_packet->u.vm_srb.sense_info_len = sense_buffer_size;
700 
701 	vstor_packet->u.vm_srb.transfer_len =
702 	    request->prp_list.gpa_range.gpa_len;
703 
704 	vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB;
705 
706 	ch_sel = (vstor_packet->u.vm_srb.lun + curcpu) % sc->hs_nchan;
707 	outgoing_channel = sc->hs_sel_chan[ch_sel];
708 
709 	mtx_unlock(&request->softc->hs_lock);
710 	if (request->prp_list.gpa_range.gpa_len) {
711 		ret = vmbus_chan_send_prplist(outgoing_channel,
712 		    &request->prp_list.gpa_range, request->prp_cnt,
713 		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
714 	} else {
715 		ret = vmbus_chan_send(outgoing_channel,
716 		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
717 		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
718 	}
719 	/* statistic for successful request sending on each channel */
720 	if (!ret) {
721 		sc->sysctl_data.chan_send_cnt[ch_sel]++;
722 	}
723 	mtx_lock(&request->softc->hs_lock);
724 
725 	if (ret != 0) {
726 		printf("Unable to send packet %p ret %d", vstor_packet, ret);
727 	} else {
728 		atomic_add_int(&sc->hs_num_out_reqs, 1);
729 	}
730 
731 	return (ret);
732 }
733 
734 
735 /**
736  * Process IO_COMPLETION_OPERATION and ready
737  * the result to be completed for upper layer
738  * processing by the CAM layer.
739  */
740 static void
741 hv_storvsc_on_iocompletion(struct storvsc_softc *sc,
742 			   struct vstor_packet *vstor_packet,
743 			   struct hv_storvsc_request *request)
744 {
745 	struct vmscsi_req *vm_srb;
746 
747 	vm_srb = &vstor_packet->u.vm_srb;
748 
749 	/*
750 	 * Copy some fields of the host's response into the request structure,
751 	 * because the fields will be used later in storvsc_io_done().
752 	 */
753 	request->vstor_packet.u.vm_srb.scsi_status = vm_srb->scsi_status;
754 	request->vstor_packet.u.vm_srb.srb_status = vm_srb->srb_status;
755 	request->vstor_packet.u.vm_srb.transfer_len = vm_srb->transfer_len;
756 
757 	if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) &&
758 			(vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) {
759 		/* Autosense data available */
760 
761 		KASSERT(vm_srb->sense_info_len <= request->sense_info_len,
762 				("vm_srb->sense_info_len <= "
763 				 "request->sense_info_len"));
764 
765 		memcpy(request->sense_data, vm_srb->u.sense_data,
766 			vm_srb->sense_info_len);
767 
768 		request->sense_info_len = vm_srb->sense_info_len;
769 	}
770 
771 	/* Complete request by passing to the CAM layer */
772 	storvsc_io_done(request);
773 	atomic_subtract_int(&sc->hs_num_out_reqs, 1);
774 	if (sc->hs_drain_notify && (sc->hs_num_out_reqs == 0)) {
775 		sema_post(&sc->hs_drain_sema);
776 	}
777 }
778 
779 static void
780 hv_storvsc_rescan_target(struct storvsc_softc *sc)
781 {
782 	path_id_t pathid;
783 	target_id_t targetid;
784 	union ccb *ccb;
785 
786 	pathid = cam_sim_path(sc->hs_sim);
787 	targetid = CAM_TARGET_WILDCARD;
788 
789 	/*
790 	 * Allocate a CCB and schedule a rescan.
791 	 */
792 	ccb = xpt_alloc_ccb_nowait();
793 	if (ccb == NULL) {
794 		printf("unable to alloc CCB for rescan\n");
795 		return;
796 	}
797 
798 	if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, targetid,
799 	    CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
800 		printf("unable to create path for rescan, pathid: %u,"
801 		    "targetid: %u\n", pathid, targetid);
802 		xpt_free_ccb(ccb);
803 		return;
804 	}
805 
806 	if (targetid == CAM_TARGET_WILDCARD)
807 		ccb->ccb_h.func_code = XPT_SCAN_BUS;
808 	else
809 		ccb->ccb_h.func_code = XPT_SCAN_TGT;
810 
811 	xpt_rescan(ccb);
812 }
813 
814 static void
815 hv_storvsc_on_channel_callback(struct vmbus_channel *channel, void *xsc)
816 {
817 	int ret = 0;
818 	struct storvsc_softc *sc = xsc;
819 	uint32_t bytes_recvd;
820 	uint64_t request_id;
821 	uint8_t packet[roundup2(sizeof(struct vstor_packet), 8)];
822 	struct hv_storvsc_request *request;
823 	struct vstor_packet *vstor_packet;
824 
825 	bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8);
826 	ret = vmbus_chan_recv(channel, packet, &bytes_recvd, &request_id);
827 	KASSERT(ret != ENOBUFS, ("storvsc recvbuf is not large enough"));
828 	/* XXX check bytes_recvd to make sure that it contains enough data */
829 
830 	while ((ret == 0) && (bytes_recvd > 0)) {
831 		request = (struct hv_storvsc_request *)(uintptr_t)request_id;
832 
833 		if ((request == &sc->hs_init_req) ||
834 			(request == &sc->hs_reset_req)) {
835 			memcpy(&request->vstor_packet, packet,
836 				   sizeof(struct vstor_packet));
837 			sema_post(&request->synch_sema);
838 		} else {
839 			vstor_packet = (struct vstor_packet *)packet;
840 			switch(vstor_packet->operation) {
841 			case VSTOR_OPERATION_COMPLETEIO:
842 				if (request == NULL)
843 					panic("VMBUS: storvsc received a "
844 					    "packet with NULL request id in "
845 					    "COMPLETEIO operation.");
846 
847 				hv_storvsc_on_iocompletion(sc,
848 							vstor_packet, request);
849 				break;
850 			case VSTOR_OPERATION_REMOVEDEVICE:
851 				printf("VMBUS: storvsc operation %d not "
852 				    "implemented.\n", vstor_packet->operation);
853 				/* TODO: implement */
854 				break;
855 			case VSTOR_OPERATION_ENUMERATE_BUS:
856 				hv_storvsc_rescan_target(sc);
857 				break;
858 			default:
859 				break;
860 			}
861 		}
862 
863 		bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8),
864 		ret = vmbus_chan_recv(channel, packet, &bytes_recvd,
865 		    &request_id);
866 		KASSERT(ret != ENOBUFS,
867 		    ("storvsc recvbuf is not large enough"));
868 		/*
869 		 * XXX check bytes_recvd to make sure that it contains
870 		 * enough data
871 		 */
872 	}
873 }
874 
875 /**
876  * @brief StorVSC probe function
877  *
878  * Device probe function.  Returns 0 if the input device is a StorVSC
879  * device.  Otherwise, a ENXIO is returned.  If the input device is
880  * for BlkVSC (paravirtual IDE) device and this support is disabled in
881  * favor of the emulated ATA/IDE device, return ENXIO.
882  *
883  * @param a device
884  * @returns 0 on success, ENXIO if not a matcing StorVSC device
885  */
886 static int
887 storvsc_probe(device_t dev)
888 {
889 	int ret	= ENXIO;
890 
891 	switch (storvsc_get_storage_type(dev)) {
892 	case DRIVER_BLKVSC:
893 		if(bootverbose)
894 			device_printf(dev,
895 			    "Enlightened ATA/IDE detected\n");
896 		device_set_desc(dev, g_drv_props_table[DRIVER_BLKVSC].drv_desc);
897 		ret = BUS_PROBE_DEFAULT;
898 		break;
899 	case DRIVER_STORVSC:
900 		if(bootverbose)
901 			device_printf(dev, "Enlightened SCSI device detected\n");
902 		device_set_desc(dev, g_drv_props_table[DRIVER_STORVSC].drv_desc);
903 		ret = BUS_PROBE_DEFAULT;
904 		break;
905 	default:
906 		ret = ENXIO;
907 	}
908 	return (ret);
909 }
910 
911 static void
912 storvsc_create_chan_sel(struct storvsc_softc *sc)
913 {
914 	struct vmbus_channel **subch;
915 	int i, nsubch;
916 
917 	sc->hs_sel_chan[0] = sc->hs_chan;
918 	nsubch = sc->hs_nchan - 1;
919 	if (nsubch == 0)
920 		return;
921 
922 	subch = vmbus_subchan_get(sc->hs_chan, nsubch);
923 	for (i = 0; i < nsubch; i++)
924 		sc->hs_sel_chan[i + 1] = subch[i];
925 	vmbus_subchan_rel(subch, nsubch);
926 }
927 
928 static int
929 storvsc_init_requests(device_t dev)
930 {
931 	struct storvsc_softc *sc = device_get_softc(dev);
932 	struct hv_storvsc_request *reqp;
933 	int error, i;
934 
935 	LIST_INIT(&sc->hs_free_list);
936 
937 	error = bus_dma_tag_create(
938 		bus_get_dma_tag(dev),		/* parent */
939 		1,				/* alignment */
940 		PAGE_SIZE,			/* boundary */
941 		BUS_SPACE_MAXADDR,		/* lowaddr */
942 		BUS_SPACE_MAXADDR,		/* highaddr */
943 		NULL, NULL,			/* filter, filterarg */
944 		STORVSC_DATA_SIZE_MAX,		/* maxsize */
945 		STORVSC_DATA_SEGCNT_MAX,	/* nsegments */
946 		STORVSC_DATA_SEGSZ_MAX,		/* maxsegsize */
947 		0,				/* flags */
948 		NULL,				/* lockfunc */
949 		NULL,				/* lockfuncarg */
950 		&sc->storvsc_req_dtag);
951 	if (error) {
952 		device_printf(dev, "failed to create storvsc dma tag\n");
953 		return (error);
954 	}
955 
956 	for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; ++i) {
957 		reqp = malloc(sizeof(struct hv_storvsc_request),
958 				 M_DEVBUF, M_WAITOK|M_ZERO);
959 		reqp->softc = sc;
960 		error = bus_dmamap_create(sc->storvsc_req_dtag, 0,
961 				&reqp->data_dmap);
962 		if (error) {
963 			device_printf(dev, "failed to allocate storvsc "
964 			    "data dmamap\n");
965 			goto cleanup;
966 		}
967 		LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
968 	}
969 	return (0);
970 
971 cleanup:
972 	while ((reqp = LIST_FIRST(&sc->hs_free_list)) != NULL) {
973 		LIST_REMOVE(reqp, link);
974 		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
975 		free(reqp, M_DEVBUF);
976 	}
977 	return (error);
978 }
979 
980 static void
981 storvsc_sysctl(device_t dev)
982 {
983 	struct sysctl_oid_list *child;
984 	struct sysctl_ctx_list *ctx;
985 	struct sysctl_oid *ch_tree, *chid_tree;
986 	struct storvsc_softc *sc;
987 	char name[16];
988 	int i;
989 
990 	sc = device_get_softc(dev);
991 	ctx = device_get_sysctl_ctx(dev);
992 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
993 
994 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_bio_cnt", CTLFLAG_RW,
995 		&sc->sysctl_data.data_bio_cnt, "# of bio data block");
996 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_vaddr_cnt", CTLFLAG_RW,
997 		&sc->sysctl_data.data_vaddr_cnt, "# of vaddr data block");
998 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_sg_cnt", CTLFLAG_RW,
999 		&sc->sysctl_data.data_sg_cnt, "# of sg data block");
1000 
1001 	/* dev.storvsc.UNIT.channel */
1002 	ch_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "channel",
1003 		CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1004 	if (ch_tree == NULL)
1005 		return;
1006 
1007 	for (i = 0; i < sc->hs_nchan; i++) {
1008 		uint32_t ch_id;
1009 
1010 		ch_id = vmbus_chan_id(sc->hs_sel_chan[i]);
1011 		snprintf(name, sizeof(name), "%d", ch_id);
1012 		/* dev.storvsc.UNIT.channel.CHID */
1013 		chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree),
1014 			OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1015 		if (chid_tree == NULL)
1016 			return;
1017 		/* dev.storvsc.UNIT.channel.CHID.send_req */
1018 		SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
1019 			"send_req", CTLFLAG_RD, &sc->sysctl_data.chan_send_cnt[i],
1020 			"# of request sending from this channel");
1021 	}
1022 }
1023 
1024 /**
1025  * @brief StorVSC attach function
1026  *
1027  * Function responsible for allocating per-device structures,
1028  * setting up CAM interfaces and scanning for available LUNs to
1029  * be used for SCSI device peripherals.
1030  *
1031  * @param a device
1032  * @returns 0 on success or an error on failure
1033  */
1034 static int
1035 storvsc_attach(device_t dev)
1036 {
1037 	enum hv_storage_type stor_type;
1038 	struct storvsc_softc *sc;
1039 	struct cam_devq *devq;
1040 	int ret, i, j;
1041 	struct hv_storvsc_request *reqp;
1042 	struct root_hold_token *root_mount_token = NULL;
1043 	struct hv_sgl_node *sgl_node = NULL;
1044 	void *tmp_buff = NULL;
1045 
1046 	/*
1047 	 * We need to serialize storvsc attach calls.
1048 	 */
1049 	root_mount_token = root_mount_hold("storvsc");
1050 
1051 	sc = device_get_softc(dev);
1052 	sc->hs_nchan = 1;
1053 	sc->hs_chan = vmbus_get_channel(dev);
1054 
1055 	stor_type = storvsc_get_storage_type(dev);
1056 
1057 	if (stor_type == DRIVER_UNKNOWN) {
1058 		ret = ENODEV;
1059 		goto cleanup;
1060 	}
1061 
1062 	/* fill in driver specific properties */
1063 	sc->hs_drv_props = &g_drv_props_table[stor_type];
1064 	sc->hs_drv_props->drv_ringbuffer_size = hv_storvsc_ringbuffer_size;
1065 	sc->hs_drv_props->drv_max_ios_per_target =
1066 		MIN(STORVSC_MAX_IO, hv_storvsc_max_io);
1067 	if (bootverbose) {
1068 		printf("storvsc ringbuffer size: %d, max_io: %d\n",
1069 			sc->hs_drv_props->drv_ringbuffer_size,
1070 			sc->hs_drv_props->drv_max_ios_per_target);
1071 	}
1072 	/* fill in device specific properties */
1073 	sc->hs_unit	= device_get_unit(dev);
1074 	sc->hs_dev	= dev;
1075 
1076 	mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF);
1077 
1078 	ret = storvsc_init_requests(dev);
1079 	if (ret != 0)
1080 		goto cleanup;
1081 
1082 	/* create sg-list page pool */
1083 	if (FALSE == g_hv_sgl_page_pool.is_init) {
1084 		g_hv_sgl_page_pool.is_init = TRUE;
1085 		LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list);
1086 		LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list);
1087 
1088 		/*
1089 		 * Pre-create SG list, each SG list with
1090 		 * STORVSC_DATA_SEGCNT_MAX segments, each
1091 		 * segment has one page buffer
1092 		 */
1093 		for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; i++) {
1094 	        	sgl_node = malloc(sizeof(struct hv_sgl_node),
1095 			    M_DEVBUF, M_WAITOK|M_ZERO);
1096 
1097 			sgl_node->sgl_data =
1098 			    sglist_alloc(STORVSC_DATA_SEGCNT_MAX,
1099 			    M_WAITOK|M_ZERO);
1100 
1101 			for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) {
1102 				tmp_buff = malloc(PAGE_SIZE,
1103 				    M_DEVBUF, M_WAITOK|M_ZERO);
1104 
1105 				sgl_node->sgl_data->sg_segs[j].ss_paddr =
1106 				    (vm_paddr_t)tmp_buff;
1107 			}
1108 
1109 			LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list,
1110 			    sgl_node, link);
1111 		}
1112 	}
1113 
1114 	sc->hs_destroy = FALSE;
1115 	sc->hs_drain_notify = FALSE;
1116 	sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema");
1117 
1118 	ret = hv_storvsc_connect_vsp(sc);
1119 	if (ret != 0) {
1120 		goto cleanup;
1121 	}
1122 
1123 	/* Construct cpu to channel mapping */
1124 	storvsc_create_chan_sel(sc);
1125 
1126 	/*
1127 	 * Create the device queue.
1128 	 * Hyper-V maps each target to one SCSI HBA
1129 	 */
1130 	devq = cam_simq_alloc(sc->hs_drv_props->drv_max_ios_per_target);
1131 	if (devq == NULL) {
1132 		device_printf(dev, "Failed to alloc device queue\n");
1133 		ret = ENOMEM;
1134 		goto cleanup;
1135 	}
1136 
1137 	sc->hs_sim = cam_sim_alloc(storvsc_action,
1138 				storvsc_poll,
1139 				sc->hs_drv_props->drv_name,
1140 				sc,
1141 				sc->hs_unit,
1142 				&sc->hs_lock, 1,
1143 				sc->hs_drv_props->drv_max_ios_per_target,
1144 				devq);
1145 
1146 	if (sc->hs_sim == NULL) {
1147 		device_printf(dev, "Failed to alloc sim\n");
1148 		cam_simq_free(devq);
1149 		ret = ENOMEM;
1150 		goto cleanup;
1151 	}
1152 
1153 	mtx_lock(&sc->hs_lock);
1154 	/* bus_id is set to 0, need to get it from VMBUS channel query? */
1155 	if (xpt_bus_register(sc->hs_sim, dev, 0) != CAM_SUCCESS) {
1156 		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
1157 		mtx_unlock(&sc->hs_lock);
1158 		device_printf(dev, "Unable to register SCSI bus\n");
1159 		ret = ENXIO;
1160 		goto cleanup;
1161 	}
1162 
1163 	if (xpt_create_path(&sc->hs_path, /*periph*/NULL,
1164 		 cam_sim_path(sc->hs_sim),
1165 		CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
1166 		xpt_bus_deregister(cam_sim_path(sc->hs_sim));
1167 		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
1168 		mtx_unlock(&sc->hs_lock);
1169 		device_printf(dev, "Unable to create path\n");
1170 		ret = ENXIO;
1171 		goto cleanup;
1172 	}
1173 
1174 	mtx_unlock(&sc->hs_lock);
1175 
1176 	storvsc_sysctl(dev);
1177 
1178 	root_mount_rel(root_mount_token);
1179 	return (0);
1180 
1181 
1182 cleanup:
1183 	root_mount_rel(root_mount_token);
1184 	while (!LIST_EMPTY(&sc->hs_free_list)) {
1185 		reqp = LIST_FIRST(&sc->hs_free_list);
1186 		LIST_REMOVE(reqp, link);
1187 		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
1188 		free(reqp, M_DEVBUF);
1189 	}
1190 
1191 	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1192 		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1193 		LIST_REMOVE(sgl_node, link);
1194 		for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) {
1195 			if (NULL !=
1196 			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
1197 				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
1198 			}
1199 		}
1200 		sglist_free(sgl_node->sgl_data);
1201 		free(sgl_node, M_DEVBUF);
1202 	}
1203 
1204 	return (ret);
1205 }
1206 
1207 /**
1208  * @brief StorVSC device detach function
1209  *
1210  * This function is responsible for safely detaching a
1211  * StorVSC device.  This includes waiting for inbound responses
1212  * to complete and freeing associated per-device structures.
1213  *
1214  * @param dev a device
1215  * returns 0 on success
1216  */
1217 static int
1218 storvsc_detach(device_t dev)
1219 {
1220 	struct storvsc_softc *sc = device_get_softc(dev);
1221 	struct hv_storvsc_request *reqp = NULL;
1222 	struct hv_sgl_node *sgl_node = NULL;
1223 	int j = 0;
1224 
1225 	sc->hs_destroy = TRUE;
1226 
1227 	/*
1228 	 * At this point, all outbound traffic should be disabled. We
1229 	 * only allow inbound traffic (responses) to proceed so that
1230 	 * outstanding requests can be completed.
1231 	 */
1232 
1233 	sc->hs_drain_notify = TRUE;
1234 	sema_wait(&sc->hs_drain_sema);
1235 	sc->hs_drain_notify = FALSE;
1236 
1237 	/*
1238 	 * Since we have already drained, we don't need to busy wait.
1239 	 * The call to close the channel will reset the callback
1240 	 * under the protection of the incoming channel lock.
1241 	 */
1242 
1243 	vmbus_chan_close(sc->hs_chan);
1244 
1245 	mtx_lock(&sc->hs_lock);
1246 	while (!LIST_EMPTY(&sc->hs_free_list)) {
1247 		reqp = LIST_FIRST(&sc->hs_free_list);
1248 		LIST_REMOVE(reqp, link);
1249 		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
1250 		free(reqp, M_DEVBUF);
1251 	}
1252 	mtx_unlock(&sc->hs_lock);
1253 
1254 	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1255 		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1256 		LIST_REMOVE(sgl_node, link);
1257 		for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++){
1258 			if (NULL !=
1259 			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
1260 				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
1261 			}
1262 		}
1263 		sglist_free(sgl_node->sgl_data);
1264 		free(sgl_node, M_DEVBUF);
1265 	}
1266 
1267 	return (0);
1268 }
1269 
1270 #if HVS_TIMEOUT_TEST
1271 /**
1272  * @brief unit test for timed out operations
1273  *
1274  * This function provides unit testing capability to simulate
1275  * timed out operations.  Recompilation with HV_TIMEOUT_TEST=1
1276  * is required.
1277  *
1278  * @param reqp pointer to a request structure
1279  * @param opcode SCSI operation being performed
1280  * @param wait if 1, wait for I/O to complete
1281  */
1282 static void
1283 storvsc_timeout_test(struct hv_storvsc_request *reqp,
1284 		uint8_t opcode, int wait)
1285 {
1286 	int ret;
1287 	union ccb *ccb = reqp->ccb;
1288 	struct storvsc_softc *sc = reqp->softc;
1289 
1290 	if (reqp->vstor_packet.vm_srb.cdb[0] != opcode) {
1291 		return;
1292 	}
1293 
1294 	if (wait) {
1295 		mtx_lock(&reqp->event.mtx);
1296 	}
1297 	ret = hv_storvsc_io_request(sc, reqp);
1298 	if (ret != 0) {
1299 		if (wait) {
1300 			mtx_unlock(&reqp->event.mtx);
1301 		}
1302 		printf("%s: io_request failed with %d.\n",
1303 				__func__, ret);
1304 		ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1305 		mtx_lock(&sc->hs_lock);
1306 		storvsc_free_request(sc, reqp);
1307 		xpt_done(ccb);
1308 		mtx_unlock(&sc->hs_lock);
1309 		return;
1310 	}
1311 
1312 	if (wait) {
1313 		xpt_print(ccb->ccb_h.path,
1314 				"%u: %s: waiting for IO return.\n",
1315 				ticks, __func__);
1316 		ret = cv_timedwait(&reqp->event.cv, &reqp->event.mtx, 60*hz);
1317 		mtx_unlock(&reqp->event.mtx);
1318 		xpt_print(ccb->ccb_h.path, "%u: %s: %s.\n",
1319 				ticks, __func__, (ret == 0)?
1320 				"IO return detected" :
1321 				"IO return not detected");
1322 		/*
1323 		 * Now both the timer handler and io done are running
1324 		 * simultaneously. We want to confirm the io done always
1325 		 * finishes after the timer handler exits. So reqp used by
1326 		 * timer handler is not freed or stale. Do busy loop for
1327 		 * another 1/10 second to make sure io done does
1328 		 * wait for the timer handler to complete.
1329 		 */
1330 		DELAY(100*1000);
1331 		mtx_lock(&sc->hs_lock);
1332 		xpt_print(ccb->ccb_h.path,
1333 				"%u: %s: finishing, queue frozen %d, "
1334 				"ccb status 0x%x scsi_status 0x%x.\n",
1335 				ticks, __func__, sc->hs_frozen,
1336 				ccb->ccb_h.status,
1337 				ccb->csio.scsi_status);
1338 		mtx_unlock(&sc->hs_lock);
1339 	}
1340 }
1341 #endif /* HVS_TIMEOUT_TEST */
1342 
1343 #ifdef notyet
1344 /**
1345  * @brief timeout handler for requests
1346  *
1347  * This function is called as a result of a callout expiring.
1348  *
1349  * @param arg pointer to a request
1350  */
1351 static void
1352 storvsc_timeout(void *arg)
1353 {
1354 	struct hv_storvsc_request *reqp = arg;
1355 	struct storvsc_softc *sc = reqp->softc;
1356 	union ccb *ccb = reqp->ccb;
1357 
1358 	if (reqp->retries == 0) {
1359 		mtx_lock(&sc->hs_lock);
1360 		xpt_print(ccb->ccb_h.path,
1361 		    "%u: IO timed out (req=0x%p), wait for another %u secs.\n",
1362 		    ticks, reqp, ccb->ccb_h.timeout / 1000);
1363 		cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL);
1364 		mtx_unlock(&sc->hs_lock);
1365 
1366 		reqp->retries++;
1367 		callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout,
1368 		    0, storvsc_timeout, reqp, 0);
1369 #if HVS_TIMEOUT_TEST
1370 		storvsc_timeout_test(reqp, SEND_DIAGNOSTIC, 0);
1371 #endif
1372 		return;
1373 	}
1374 
1375 	mtx_lock(&sc->hs_lock);
1376 	xpt_print(ccb->ccb_h.path,
1377 		"%u: IO (reqp = 0x%p) did not return for %u seconds, %s.\n",
1378 		ticks, reqp, ccb->ccb_h.timeout * (reqp->retries+1) / 1000,
1379 		(sc->hs_frozen == 0)?
1380 		"freezing the queue" : "the queue is already frozen");
1381 	if (sc->hs_frozen == 0) {
1382 		sc->hs_frozen = 1;
1383 		xpt_freeze_simq(xpt_path_sim(ccb->ccb_h.path), 1);
1384 	}
1385 	mtx_unlock(&sc->hs_lock);
1386 
1387 #if HVS_TIMEOUT_TEST
1388 	storvsc_timeout_test(reqp, MODE_SELECT_10, 1);
1389 #endif
1390 }
1391 #endif
1392 
1393 /**
1394  * @brief StorVSC device poll function
1395  *
1396  * This function is responsible for servicing requests when
1397  * interrupts are disabled (i.e when we are dumping core.)
1398  *
1399  * @param sim a pointer to a CAM SCSI interface module
1400  */
1401 static void
1402 storvsc_poll(struct cam_sim *sim)
1403 {
1404 	struct storvsc_softc *sc = cam_sim_softc(sim);
1405 
1406 	mtx_assert(&sc->hs_lock, MA_OWNED);
1407 	mtx_unlock(&sc->hs_lock);
1408 	hv_storvsc_on_channel_callback(sc->hs_chan, sc);
1409 	mtx_lock(&sc->hs_lock);
1410 }
1411 
1412 /**
1413  * @brief StorVSC device action function
1414  *
1415  * This function is responsible for handling SCSI operations which
1416  * are passed from the CAM layer.  The requests are in the form of
1417  * CAM control blocks which indicate the action being performed.
1418  * Not all actions require converting the request to a VSCSI protocol
1419  * message - these actions can be responded to by this driver.
1420  * Requests which are destined for a backend storage device are converted
1421  * to a VSCSI protocol message and sent on the channel connection associated
1422  * with this device.
1423  *
1424  * @param sim pointer to a CAM SCSI interface module
1425  * @param ccb pointer to a CAM control block
1426  */
1427 static void
1428 storvsc_action(struct cam_sim *sim, union ccb *ccb)
1429 {
1430 	struct storvsc_softc *sc = cam_sim_softc(sim);
1431 	int res;
1432 
1433 	mtx_assert(&sc->hs_lock, MA_OWNED);
1434 	switch (ccb->ccb_h.func_code) {
1435 	case XPT_PATH_INQ: {
1436 		struct ccb_pathinq *cpi = &ccb->cpi;
1437 
1438 		cpi->version_num = 1;
1439 		cpi->hba_inquiry = PI_TAG_ABLE|PI_SDTR_ABLE;
1440 		cpi->target_sprt = 0;
1441 		cpi->hba_misc = PIM_NOBUSRESET;
1442 		if (hv_storvsc_use_pim_unmapped)
1443 			cpi->hba_misc |= PIM_UNMAPPED;
1444 		cpi->maxio = STORVSC_DATA_SIZE_MAX;
1445 		cpi->hba_eng_cnt = 0;
1446 		cpi->max_target = STORVSC_MAX_TARGETS;
1447 		cpi->max_lun = sc->hs_drv_props->drv_max_luns_per_target;
1448 		cpi->initiator_id = cpi->max_target;
1449 		cpi->bus_id = cam_sim_bus(sim);
1450 		cpi->base_transfer_speed = 300000;
1451 		cpi->transport = XPORT_SAS;
1452 		cpi->transport_version = 0;
1453 		cpi->protocol = PROTO_SCSI;
1454 		cpi->protocol_version = SCSI_REV_SPC2;
1455 		strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
1456 		strlcpy(cpi->hba_vid, sc->hs_drv_props->drv_name, HBA_IDLEN);
1457 		strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
1458 		cpi->unit_number = cam_sim_unit(sim);
1459 
1460 		ccb->ccb_h.status = CAM_REQ_CMP;
1461 		xpt_done(ccb);
1462 		return;
1463 	}
1464 	case XPT_GET_TRAN_SETTINGS: {
1465 		struct  ccb_trans_settings *cts = &ccb->cts;
1466 
1467 		cts->transport = XPORT_SAS;
1468 		cts->transport_version = 0;
1469 		cts->protocol = PROTO_SCSI;
1470 		cts->protocol_version = SCSI_REV_SPC2;
1471 
1472 		/* enable tag queuing and disconnected mode */
1473 		cts->proto_specific.valid = CTS_SCSI_VALID_TQ;
1474 		cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
1475 		cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
1476 		cts->xport_specific.valid = CTS_SPI_VALID_DISC;
1477 		cts->xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB;
1478 
1479 		ccb->ccb_h.status = CAM_REQ_CMP;
1480 		xpt_done(ccb);
1481 		return;
1482 	}
1483 	case XPT_SET_TRAN_SETTINGS:	{
1484 		ccb->ccb_h.status = CAM_REQ_CMP;
1485 		xpt_done(ccb);
1486 		return;
1487 	}
1488 	case XPT_CALC_GEOMETRY:{
1489 		cam_calc_geometry(&ccb->ccg, 1);
1490 		xpt_done(ccb);
1491 		return;
1492 	}
1493 	case  XPT_RESET_BUS:
1494 	case  XPT_RESET_DEV:{
1495 #if HVS_HOST_RESET
1496 		if ((res = hv_storvsc_host_reset(sc)) != 0) {
1497 			xpt_print(ccb->ccb_h.path,
1498 				"hv_storvsc_host_reset failed with %d\n", res);
1499 			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1500 			xpt_done(ccb);
1501 			return;
1502 		}
1503 		ccb->ccb_h.status = CAM_REQ_CMP;
1504 		xpt_done(ccb);
1505 		return;
1506 #else
1507 		xpt_print(ccb->ccb_h.path,
1508 				  "%s reset not supported.\n",
1509 				  (ccb->ccb_h.func_code == XPT_RESET_BUS)?
1510 				  "bus" : "dev");
1511 		ccb->ccb_h.status = CAM_REQ_INVALID;
1512 		xpt_done(ccb);
1513 		return;
1514 #endif	/* HVS_HOST_RESET */
1515 	}
1516 	case XPT_SCSI_IO:
1517 	case XPT_IMMED_NOTIFY: {
1518 		struct hv_storvsc_request *reqp = NULL;
1519 		bus_dmamap_t dmap_saved;
1520 
1521 		if (ccb->csio.cdb_len == 0) {
1522 			panic("cdl_len is 0\n");
1523 		}
1524 
1525 		if (LIST_EMPTY(&sc->hs_free_list)) {
1526 			ccb->ccb_h.status = CAM_REQUEUE_REQ;
1527 			if (sc->hs_frozen == 0) {
1528 				sc->hs_frozen = 1;
1529 				xpt_freeze_simq(sim, /* count*/1);
1530 			}
1531 			xpt_done(ccb);
1532 			return;
1533 		}
1534 
1535 		reqp = LIST_FIRST(&sc->hs_free_list);
1536 		LIST_REMOVE(reqp, link);
1537 
1538 		/* Save the data_dmap before reset request */
1539 		dmap_saved = reqp->data_dmap;
1540 
1541 		/* XXX this is ugly */
1542 		bzero(reqp, sizeof(struct hv_storvsc_request));
1543 
1544 		/* Restore necessary bits */
1545 		reqp->data_dmap = dmap_saved;
1546 		reqp->softc = sc;
1547 
1548 		ccb->ccb_h.status |= CAM_SIM_QUEUED;
1549 		if ((res = create_storvsc_request(ccb, reqp)) != 0) {
1550 			ccb->ccb_h.status = CAM_REQ_INVALID;
1551 			xpt_done(ccb);
1552 			return;
1553 		}
1554 
1555 #ifdef notyet
1556 		if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
1557 			callout_init(&reqp->callout, 1);
1558 			callout_reset_sbt(&reqp->callout,
1559 			    SBT_1MS * ccb->ccb_h.timeout, 0,
1560 			    storvsc_timeout, reqp, 0);
1561 #if HVS_TIMEOUT_TEST
1562 			cv_init(&reqp->event.cv, "storvsc timeout cv");
1563 			mtx_init(&reqp->event.mtx, "storvsc timeout mutex",
1564 					NULL, MTX_DEF);
1565 			switch (reqp->vstor_packet.vm_srb.cdb[0]) {
1566 				case MODE_SELECT_10:
1567 				case SEND_DIAGNOSTIC:
1568 					/* To have timer send the request. */
1569 					return;
1570 				default:
1571 					break;
1572 			}
1573 #endif /* HVS_TIMEOUT_TEST */
1574 		}
1575 #endif
1576 
1577 		if ((res = hv_storvsc_io_request(sc, reqp)) != 0) {
1578 			xpt_print(ccb->ccb_h.path,
1579 				"hv_storvsc_io_request failed with %d\n", res);
1580 			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1581 			storvsc_free_request(sc, reqp);
1582 			xpt_done(ccb);
1583 			return;
1584 		}
1585 		return;
1586 	}
1587 
1588 	default:
1589 		ccb->ccb_h.status = CAM_REQ_INVALID;
1590 		xpt_done(ccb);
1591 		return;
1592 	}
1593 }
1594 
1595 /**
1596  * @brief destroy bounce buffer
1597  *
1598  * This function is responsible for destroy a Scatter/Gather list
1599  * that create by storvsc_create_bounce_buffer()
1600  *
1601  * @param sgl- the Scatter/Gather need be destroy
1602  * @param sg_count- page count of the SG list.
1603  *
1604  */
1605 static void
1606 storvsc_destroy_bounce_buffer(struct sglist *sgl)
1607 {
1608 	struct hv_sgl_node *sgl_node = NULL;
1609 	if (LIST_EMPTY(&g_hv_sgl_page_pool.in_use_sgl_list)) {
1610 		printf("storvsc error: not enough in use sgl\n");
1611 		return;
1612 	}
1613 	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list);
1614 	LIST_REMOVE(sgl_node, link);
1615 	sgl_node->sgl_data = sgl;
1616 	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link);
1617 }
1618 
1619 /**
1620  * @brief create bounce buffer
1621  *
1622  * This function is responsible for create a Scatter/Gather list,
1623  * which hold several pages that can be aligned with page size.
1624  *
1625  * @param seg_count- SG-list segments count
1626  * @param write - if WRITE_TYPE, set SG list page used size to 0,
1627  * otherwise set used size to page size.
1628  *
1629  * return NULL if create failed
1630  */
1631 static struct sglist *
1632 storvsc_create_bounce_buffer(uint16_t seg_count, int write)
1633 {
1634 	int i = 0;
1635 	struct sglist *bounce_sgl = NULL;
1636 	unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE);
1637 	struct hv_sgl_node *sgl_node = NULL;
1638 
1639 	/* get struct sglist from free_sgl_list */
1640 	if (LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1641 		printf("storvsc error: not enough free sgl\n");
1642 		return NULL;
1643 	}
1644 	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1645 	LIST_REMOVE(sgl_node, link);
1646 	bounce_sgl = sgl_node->sgl_data;
1647 	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link);
1648 
1649 	bounce_sgl->sg_maxseg = seg_count;
1650 
1651 	if (write == WRITE_TYPE)
1652 		bounce_sgl->sg_nseg = 0;
1653 	else
1654 		bounce_sgl->sg_nseg = seg_count;
1655 
1656 	for (i = 0; i < seg_count; i++)
1657 	        bounce_sgl->sg_segs[i].ss_len = buf_len;
1658 
1659 	return bounce_sgl;
1660 }
1661 
1662 /**
1663  * @brief copy data from SG list to bounce buffer
1664  *
1665  * This function is responsible for copy data from one SG list's segments
1666  * to another SG list which used as bounce buffer.
1667  *
1668  * @param bounce_sgl - the destination SG list
1669  * @param orig_sgl - the segment of the source SG list.
1670  * @param orig_sgl_count - the count of segments.
1671  * @param orig_sgl_count - indicate which segment need bounce buffer,
1672  *  set 1 means need.
1673  *
1674  */
1675 static void
1676 storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
1677 			       bus_dma_segment_t *orig_sgl,
1678 			       unsigned int orig_sgl_count,
1679 			       uint64_t seg_bits)
1680 {
1681 	int src_sgl_idx = 0;
1682 
1683 	for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) {
1684 		if (seg_bits & (1 << src_sgl_idx)) {
1685 			memcpy((void*)bounce_sgl->sg_segs[src_sgl_idx].ss_paddr,
1686 			    (void*)orig_sgl[src_sgl_idx].ds_addr,
1687 			    orig_sgl[src_sgl_idx].ds_len);
1688 
1689 			bounce_sgl->sg_segs[src_sgl_idx].ss_len =
1690 			    orig_sgl[src_sgl_idx].ds_len;
1691 		}
1692 	}
1693 }
1694 
1695 /**
1696  * @brief copy data from SG list which used as bounce to another SG list
1697  *
1698  * This function is responsible for copy data from one SG list with bounce
1699  * buffer to another SG list's segments.
1700  *
1701  * @param dest_sgl - the destination SG list's segments
1702  * @param dest_sgl_count - the count of destination SG list's segment.
1703  * @param src_sgl - the source SG list.
1704  * @param seg_bits - indicate which segment used bounce buffer of src SG-list.
1705  *
1706  */
1707 void
1708 storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
1709 				    unsigned int dest_sgl_count,
1710 				    struct sglist* src_sgl,
1711 				    uint64_t seg_bits)
1712 {
1713 	int sgl_idx = 0;
1714 
1715 	for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) {
1716 		if (seg_bits & (1 << sgl_idx)) {
1717 			memcpy((void*)(dest_sgl[sgl_idx].ds_addr),
1718 			    (void*)(src_sgl->sg_segs[sgl_idx].ss_paddr),
1719 			    src_sgl->sg_segs[sgl_idx].ss_len);
1720 		}
1721 	}
1722 }
1723 
1724 /**
1725  * @brief check SG list with bounce buffer or not
1726  *
1727  * This function is responsible for check if need bounce buffer for SG list.
1728  *
1729  * @param sgl - the SG list's segments
1730  * @param sg_count - the count of SG list's segment.
1731  * @param bits - segmengs number that need bounce buffer
1732  *
1733  * return -1 if SG list needless bounce buffer
1734  */
1735 static int
1736 storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl,
1737 				unsigned int sg_count,
1738 				uint64_t *bits)
1739 {
1740 	int i = 0;
1741 	int offset = 0;
1742 	uint64_t phys_addr = 0;
1743 	uint64_t tmp_bits = 0;
1744 	boolean_t found_hole = FALSE;
1745 	boolean_t pre_aligned = TRUE;
1746 
1747 	if (sg_count < 2){
1748 		return -1;
1749 	}
1750 
1751 	*bits = 0;
1752 
1753 	phys_addr = vtophys(sgl[0].ds_addr);
1754 	offset =  phys_addr - trunc_page(phys_addr);
1755 
1756 	if (offset != 0) {
1757 		pre_aligned = FALSE;
1758 		tmp_bits |= 1;
1759 	}
1760 
1761 	for (i = 1; i < sg_count; i++) {
1762 		phys_addr = vtophys(sgl[i].ds_addr);
1763 		offset =  phys_addr - trunc_page(phys_addr);
1764 
1765 		if (offset == 0) {
1766 			if (FALSE == pre_aligned){
1767 				/*
1768 				 * This segment is aligned, if the previous
1769 				 * one is not aligned, find a hole
1770 				 */
1771 				found_hole = TRUE;
1772 			}
1773 			pre_aligned = TRUE;
1774 		} else {
1775 			tmp_bits |= 1ULL << i;
1776 			if (!pre_aligned) {
1777 				if (phys_addr != vtophys(sgl[i-1].ds_addr +
1778 				    sgl[i-1].ds_len)) {
1779 					/*
1780 					 * Check whether connect to previous
1781 					 * segment,if not, find the hole
1782 					 */
1783 					found_hole = TRUE;
1784 				}
1785 			} else {
1786 				found_hole = TRUE;
1787 			}
1788 			pre_aligned = FALSE;
1789 		}
1790 	}
1791 
1792 	if (!found_hole) {
1793 		return (-1);
1794 	} else {
1795 		*bits = tmp_bits;
1796 		return 0;
1797 	}
1798 }
1799 
1800 /**
1801  * Copy bus_dma segments to multiple page buffer, which requires
1802  * the pages are compact composed except for the 1st and last pages.
1803  */
1804 static void
1805 storvsc_xferbuf_prepare(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
1806 {
1807 	struct hv_storvsc_request *reqp = arg;
1808 	union ccb *ccb = reqp->ccb;
1809 	struct ccb_scsiio *csio = &ccb->csio;
1810 	struct storvsc_gpa_range *prplist;
1811 	int i;
1812 
1813 	prplist = &reqp->prp_list;
1814 	prplist->gpa_range.gpa_len = csio->dxfer_len;
1815 	prplist->gpa_range.gpa_ofs = segs[0].ds_addr & PAGE_MASK;
1816 
1817 	for (i = 0; i < nsegs; i++) {
1818 #ifdef INVARIANTS
1819 		if (nsegs > 1) {
1820 			if (i == 0) {
1821 				KASSERT((segs[i].ds_addr & PAGE_MASK) +
1822 				    segs[i].ds_len == PAGE_SIZE,
1823 				    ("invalid 1st page, ofs 0x%jx, len %zu",
1824 				     (uintmax_t)segs[i].ds_addr,
1825 				     segs[i].ds_len));
1826 			} else if (i == nsegs - 1) {
1827 				KASSERT((segs[i].ds_addr & PAGE_MASK) == 0,
1828 				    ("invalid last page, ofs 0x%jx",
1829 				     (uintmax_t)segs[i].ds_addr));
1830 			} else {
1831 				KASSERT((segs[i].ds_addr & PAGE_MASK) == 0 &&
1832 				    segs[i].ds_len == PAGE_SIZE,
1833 				    ("not a full page, ofs 0x%jx, len %zu",
1834 				     (uintmax_t)segs[i].ds_addr,
1835 				     segs[i].ds_len));
1836 			}
1837 		}
1838 #endif
1839 		prplist->gpa_page[i] = atop(segs[i].ds_addr);
1840 	}
1841 	reqp->prp_cnt = nsegs;
1842 }
1843 
1844 /**
1845  * @brief Fill in a request structure based on a CAM control block
1846  *
1847  * Fills in a request structure based on the contents of a CAM control
1848  * block.  The request structure holds the payload information for
1849  * VSCSI protocol request.
1850  *
1851  * @param ccb pointer to a CAM contorl block
1852  * @param reqp pointer to a request structure
1853  */
1854 static int
1855 create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
1856 {
1857 	struct ccb_scsiio *csio = &ccb->csio;
1858 	uint64_t phys_addr;
1859 	uint32_t pfn;
1860 	uint64_t not_aligned_seg_bits = 0;
1861 	int error;
1862 
1863 	/* refer to struct vmscsi_req for meanings of these two fields */
1864 	reqp->vstor_packet.u.vm_srb.port =
1865 		cam_sim_unit(xpt_path_sim(ccb->ccb_h.path));
1866 	reqp->vstor_packet.u.vm_srb.path_id =
1867 		cam_sim_bus(xpt_path_sim(ccb->ccb_h.path));
1868 
1869 	reqp->vstor_packet.u.vm_srb.target_id = ccb->ccb_h.target_id;
1870 	reqp->vstor_packet.u.vm_srb.lun = ccb->ccb_h.target_lun;
1871 
1872 	reqp->vstor_packet.u.vm_srb.cdb_len = csio->cdb_len;
1873 	if(ccb->ccb_h.flags & CAM_CDB_POINTER) {
1874 		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_ptr,
1875 			csio->cdb_len);
1876 	} else {
1877 		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_bytes,
1878 			csio->cdb_len);
1879 	}
1880 
1881 	if (hv_storvsc_use_win8ext_flags) {
1882 		reqp->vstor_packet.u.vm_srb.win8_extension.time_out_value = 60;
1883 		reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1884 			SRB_FLAGS_DISABLE_SYNCH_TRANSFER;
1885 	}
1886 	switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
1887 	case CAM_DIR_OUT:
1888 		reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;
1889 		if (hv_storvsc_use_win8ext_flags) {
1890 			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1891 				SRB_FLAGS_DATA_OUT;
1892 		}
1893 		break;
1894 	case CAM_DIR_IN:
1895 		reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
1896 		if (hv_storvsc_use_win8ext_flags) {
1897 			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1898 				SRB_FLAGS_DATA_IN;
1899 		}
1900 		break;
1901 	case CAM_DIR_NONE:
1902 		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
1903 		if (hv_storvsc_use_win8ext_flags) {
1904 			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1905 				SRB_FLAGS_NO_DATA_TRANSFER;
1906 		}
1907 		break;
1908 	default:
1909 		printf("Error: unexpected data direction: 0x%x\n",
1910 			ccb->ccb_h.flags & CAM_DIR_MASK);
1911 		return (EINVAL);
1912 	}
1913 
1914 	reqp->sense_data     = &csio->sense_data;
1915 	reqp->sense_info_len = csio->sense_len;
1916 
1917 	reqp->ccb = ccb;
1918 	ccb->ccb_h.spriv_ptr0 = reqp;
1919 
1920 	if (0 == csio->dxfer_len) {
1921 		return (0);
1922 	}
1923 
1924 	switch (ccb->ccb_h.flags & CAM_DATA_MASK) {
1925 	case CAM_DATA_BIO:
1926 	case CAM_DATA_VADDR:
1927 		error = bus_dmamap_load_ccb(reqp->softc->storvsc_req_dtag,
1928 		    reqp->data_dmap, ccb, storvsc_xferbuf_prepare, reqp,
1929 		    BUS_DMA_NOWAIT);
1930 		if (error) {
1931 			xpt_print(ccb->ccb_h.path,
1932 			    "bus_dmamap_load_ccb failed: %d\n", error);
1933 			return (error);
1934 		}
1935 		if ((ccb->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_BIO)
1936 			reqp->softc->sysctl_data.data_bio_cnt++;
1937 		else
1938 			reqp->softc->sysctl_data.data_vaddr_cnt++;
1939 		break;
1940 
1941 	case CAM_DATA_SG:
1942 	{
1943 		struct storvsc_gpa_range *prplist;
1944 		int i = 0;
1945 		int offset = 0;
1946 		int ret;
1947 
1948 		bus_dma_segment_t *storvsc_sglist =
1949 		    (bus_dma_segment_t *)ccb->csio.data_ptr;
1950 		u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt;
1951 
1952 		prplist = &reqp->prp_list;
1953 		prplist->gpa_range.gpa_len = csio->dxfer_len;
1954 
1955 		printf("Storvsc: get SG I/O operation, %d\n",
1956 		    reqp->vstor_packet.u.vm_srb.data_in);
1957 
1958 		if (storvsc_sg_count > STORVSC_DATA_SEGCNT_MAX){
1959 			printf("Storvsc: %d segments is too much, "
1960 			    "only support %d segments\n",
1961 			    storvsc_sg_count, STORVSC_DATA_SEGCNT_MAX);
1962 			return (EINVAL);
1963 		}
1964 
1965 		/*
1966 		 * We create our own bounce buffer function currently. Idealy
1967 		 * we should use BUS_DMA(9) framework. But with current BUS_DMA
1968 		 * code there is no callback API to check the page alignment of
1969 		 * middle segments before busdma can decide if a bounce buffer
1970 		 * is needed for particular segment. There is callback,
1971 		 * "bus_dma_filter_t *filter", but the parrameters are not
1972 		 * sufficient for storvsc driver.
1973 		 * TODO:
1974 		 *	Add page alignment check in BUS_DMA(9) callback. Once
1975 		 *	this is complete, switch the following code to use
1976 		 *	BUS_DMA(9) for storvsc bounce buffer support.
1977 		 */
1978 		/* check if we need to create bounce buffer */
1979 		ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist,
1980 		    storvsc_sg_count, &not_aligned_seg_bits);
1981 		if (ret != -1) {
1982 			reqp->bounce_sgl =
1983 			    storvsc_create_bounce_buffer(storvsc_sg_count,
1984 			    reqp->vstor_packet.u.vm_srb.data_in);
1985 			if (NULL == reqp->bounce_sgl) {
1986 				printf("Storvsc_error: "
1987 				    "create bounce buffer failed.\n");
1988 				return (ENOMEM);
1989 			}
1990 
1991 			reqp->bounce_sgl_count = storvsc_sg_count;
1992 			reqp->not_aligned_seg_bits = not_aligned_seg_bits;
1993 
1994 			/*
1995 			 * if it is write, we need copy the original data
1996 			 *to bounce buffer
1997 			 */
1998 			if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
1999 				storvsc_copy_sgl_to_bounce_buf(
2000 				    reqp->bounce_sgl,
2001 				    storvsc_sglist,
2002 				    storvsc_sg_count,
2003 				    reqp->not_aligned_seg_bits);
2004 			}
2005 
2006 			/* transfer virtual address to physical frame number */
2007 			if (reqp->not_aligned_seg_bits & 0x1){
2008  				phys_addr =
2009 				    vtophys(reqp->bounce_sgl->sg_segs[0].ss_paddr);
2010 			}else{
2011  				phys_addr =
2012 					vtophys(storvsc_sglist[0].ds_addr);
2013 			}
2014 			prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK;
2015 
2016 			pfn = phys_addr >> PAGE_SHIFT;
2017 			prplist->gpa_page[0] = pfn;
2018 
2019 			for (i = 1; i < storvsc_sg_count; i++) {
2020 				if (reqp->not_aligned_seg_bits & (1 << i)) {
2021 					phys_addr =
2022 					    vtophys(reqp->bounce_sgl->sg_segs[i].ss_paddr);
2023 				} else {
2024 					phys_addr =
2025 					    vtophys(storvsc_sglist[i].ds_addr);
2026 				}
2027 
2028 				pfn = phys_addr >> PAGE_SHIFT;
2029 				prplist->gpa_page[i] = pfn;
2030 			}
2031 			reqp->prp_cnt = i;
2032 		} else {
2033 			phys_addr = vtophys(storvsc_sglist[0].ds_addr);
2034 
2035 			prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK;
2036 
2037 			for (i = 0; i < storvsc_sg_count; i++) {
2038 				phys_addr = vtophys(storvsc_sglist[i].ds_addr);
2039 				pfn = phys_addr >> PAGE_SHIFT;
2040 				prplist->gpa_page[i] = pfn;
2041 			}
2042 			reqp->prp_cnt = i;
2043 
2044 			/* check the last segment cross boundary or not */
2045 			offset = phys_addr & PAGE_MASK;
2046 			if (offset) {
2047 				/* Add one more PRP entry */
2048 				phys_addr =
2049 				    vtophys(storvsc_sglist[i-1].ds_addr +
2050 				    PAGE_SIZE - offset);
2051 				pfn = phys_addr >> PAGE_SHIFT;
2052 				prplist->gpa_page[i] = pfn;
2053 				reqp->prp_cnt++;
2054 			}
2055 
2056 			reqp->bounce_sgl_count = 0;
2057 		}
2058 		reqp->softc->sysctl_data.data_sg_cnt++;
2059 		break;
2060 	}
2061 	default:
2062 		printf("Unknow flags: %d\n", ccb->ccb_h.flags);
2063 		return(EINVAL);
2064 	}
2065 
2066 	return(0);
2067 }
2068 
2069 static uint32_t
2070 is_scsi_valid(const struct scsi_inquiry_data *inq_data)
2071 {
2072 	u_int8_t type;
2073 
2074 	type = SID_TYPE(inq_data);
2075 	if (type == T_NODEVICE)
2076 		return (0);
2077 	if (SID_QUAL(inq_data) == SID_QUAL_BAD_LU)
2078 		return (0);
2079 	return (1);
2080 }
2081 
2082 /**
2083  * @brief completion function before returning to CAM
2084  *
2085  * I/O process has been completed and the result needs
2086  * to be passed to the CAM layer.
2087  * Free resources related to this request.
2088  *
2089  * @param reqp pointer to a request structure
2090  */
2091 static void
2092 storvsc_io_done(struct hv_storvsc_request *reqp)
2093 {
2094 	union ccb *ccb = reqp->ccb;
2095 	struct ccb_scsiio *csio = &ccb->csio;
2096 	struct storvsc_softc *sc = reqp->softc;
2097 	struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb;
2098 	bus_dma_segment_t *ori_sglist = NULL;
2099 	int ori_sg_count = 0;
2100 	const struct scsi_generic *cmd;
2101 
2102 	/* destroy bounce buffer if it is used */
2103 	if (reqp->bounce_sgl_count) {
2104 		ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr;
2105 		ori_sg_count = ccb->csio.sglist_cnt;
2106 
2107 		/*
2108 		 * If it is READ operation, we should copy back the data
2109 		 * to original SG list.
2110 		 */
2111 		if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
2112 			storvsc_copy_from_bounce_buf_to_sgl(ori_sglist,
2113 			    ori_sg_count,
2114 			    reqp->bounce_sgl,
2115 			    reqp->not_aligned_seg_bits);
2116 		}
2117 
2118 		storvsc_destroy_bounce_buffer(reqp->bounce_sgl);
2119 		reqp->bounce_sgl_count = 0;
2120 	}
2121 
2122 	if (reqp->retries > 0) {
2123 		mtx_lock(&sc->hs_lock);
2124 #if HVS_TIMEOUT_TEST
2125 		xpt_print(ccb->ccb_h.path,
2126 			"%u: IO returned after timeout, "
2127 			"waking up timer handler if any.\n", ticks);
2128 		mtx_lock(&reqp->event.mtx);
2129 		cv_signal(&reqp->event.cv);
2130 		mtx_unlock(&reqp->event.mtx);
2131 #endif
2132 		reqp->retries = 0;
2133 		xpt_print(ccb->ccb_h.path,
2134 			"%u: IO returned after timeout, "
2135 			"stopping timer if any.\n", ticks);
2136 		mtx_unlock(&sc->hs_lock);
2137 	}
2138 
2139 #ifdef notyet
2140 	/*
2141 	 * callout_drain() will wait for the timer handler to finish
2142 	 * if it is running. So we don't need any lock to synchronize
2143 	 * between this routine and the timer handler.
2144 	 * Note that we need to make sure reqp is not freed when timer
2145 	 * handler is using or will use it.
2146 	 */
2147 	if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
2148 		callout_drain(&reqp->callout);
2149 	}
2150 #endif
2151 	cmd = (const struct scsi_generic *)
2152 	    ((ccb->ccb_h.flags & CAM_CDB_POINTER) ?
2153 	     csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes);
2154 
2155 	ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
2156 	ccb->ccb_h.status &= ~CAM_STATUS_MASK;
2157 	int srb_status = SRB_STATUS(vm_srb->srb_status);
2158 	if (vm_srb->scsi_status == SCSI_STATUS_OK) {
2159 		if (srb_status != SRB_STATUS_SUCCESS) {
2160 			/*
2161 			 * If there are errors, for example, invalid LUN,
2162 			 * host will inform VM through SRB status.
2163 			 */
2164 			if (bootverbose) {
2165 				if (srb_status == SRB_STATUS_INVALID_LUN) {
2166 					xpt_print(ccb->ccb_h.path,
2167 					    "invalid LUN %d for op: %s\n",
2168 					    vm_srb->lun,
2169 					    scsi_op_desc(cmd->opcode, NULL));
2170 				} else {
2171 					xpt_print(ccb->ccb_h.path,
2172 					    "Unknown SRB flag: %d for op: %s\n",
2173 					    srb_status,
2174 					    scsi_op_desc(cmd->opcode, NULL));
2175 				}
2176 			}
2177 			ccb->ccb_h.status |= CAM_DEV_NOT_THERE;
2178 		} else {
2179 			ccb->ccb_h.status |= CAM_REQ_CMP;
2180 		}
2181 
2182 		if (cmd->opcode == INQUIRY &&
2183 		    srb_status == SRB_STATUS_SUCCESS) {
2184 			int resp_xfer_len, resp_buf_len, data_len;
2185 			uint8_t *resp_buf = (uint8_t *)csio->data_ptr;
2186 			struct scsi_inquiry_data *inq_data =
2187 			    (struct scsi_inquiry_data *)csio->data_ptr;
2188 
2189 			/* Get the buffer length reported by host */
2190 			resp_xfer_len = vm_srb->transfer_len;
2191 
2192 			/* Get the available buffer length */
2193 			resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0;
2194 			data_len = (resp_buf_len < resp_xfer_len) ?
2195 			    resp_buf_len : resp_xfer_len;
2196 			if (bootverbose && data_len >= 5) {
2197 				xpt_print(ccb->ccb_h.path, "storvsc inquiry "
2198 				    "(%d) [%x %x %x %x %x ... ]\n", data_len,
2199 				    resp_buf[0], resp_buf[1], resp_buf[2],
2200 				    resp_buf[3], resp_buf[4]);
2201 			}
2202 			/*
2203 			 * XXX: Hyper-V (since win2012r2) responses inquiry with
2204 			 * unknown version (0) for GEN-2 DVD device.
2205 			 * Manually set the version number to SPC3 in order to
2206 			 * ask CAM to continue probing with "PROBE_REPORT_LUNS".
2207 			 * see probedone() in scsi_xpt.c
2208 			 */
2209 			if (SID_TYPE(inq_data) == T_CDROM &&
2210 			    inq_data->version == 0 &&
2211 			    (vmstor_proto_version >= VMSTOR_PROTOCOL_VERSION_WIN8)) {
2212 				inq_data->version = SCSI_REV_SPC3;
2213 				if (bootverbose) {
2214 					xpt_print(ccb->ccb_h.path,
2215 					    "set version from 0 to %d\n",
2216 					    inq_data->version);
2217 				}
2218 			}
2219 			/*
2220 			 * XXX: Manually fix the wrong response returned from WS2012
2221 			 */
2222 			if (!is_scsi_valid(inq_data) &&
2223 			    (vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
2224 			    vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8 ||
2225 			    vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN7)) {
2226 				if (data_len >= 4 &&
2227 				    (resp_buf[2] == 0 || resp_buf[3] == 0)) {
2228 					resp_buf[2] = SCSI_REV_SPC3;
2229 					resp_buf[3] = 2; // resp fmt must be 2
2230 					if (bootverbose)
2231 						xpt_print(ccb->ccb_h.path,
2232 						    "fix version and resp fmt for 0x%x\n",
2233 						    vmstor_proto_version);
2234 				}
2235 			} else if (data_len >= SHORT_INQUIRY_LENGTH) {
2236 				char vendor[16];
2237 
2238 				cam_strvis(vendor, inq_data->vendor,
2239 				    sizeof(inq_data->vendor), sizeof(vendor));
2240 				/*
2241 				 * XXX: Upgrade SPC2 to SPC3 if host is WIN8 or
2242 				 * WIN2012 R2 in order to support UNMAP feature.
2243 				 */
2244 				if (!strncmp(vendor, "Msft", 4) &&
2245 				    SID_ANSI_REV(inq_data) == SCSI_REV_SPC2 &&
2246 				    (vmstor_proto_version ==
2247 				     VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
2248 				     vmstor_proto_version ==
2249 				     VMSTOR_PROTOCOL_VERSION_WIN8)) {
2250 					inq_data->version = SCSI_REV_SPC3;
2251 					if (bootverbose) {
2252 						xpt_print(ccb->ccb_h.path,
2253 						    "storvsc upgrades "
2254 						    "SPC2 to SPC3\n");
2255 					}
2256 				}
2257 			}
2258 		}
2259 	} else {
2260 		/**
2261 		 * On Some Windows hosts TEST_UNIT_READY command can return
2262 		 * SRB_STATUS_ERROR and sense data, for example, asc=0x3a,1
2263 		 * "(Medium not present - tray closed)". This error can be
2264 		 * ignored since it will be sent to host periodically.
2265 		 */
2266 		boolean_t unit_not_ready = \
2267 		    vm_srb->scsi_status == SCSI_STATUS_CHECK_COND &&
2268 		    cmd->opcode == TEST_UNIT_READY &&
2269 		    srb_status == SRB_STATUS_ERROR;
2270 		if (!unit_not_ready && bootverbose) {
2271 			mtx_lock(&sc->hs_lock);
2272 			xpt_print(ccb->ccb_h.path,
2273 				"storvsc scsi_status = %d, srb_status = %d\n",
2274 				vm_srb->scsi_status, srb_status);
2275 			mtx_unlock(&sc->hs_lock);
2276 		}
2277 		ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR;
2278 	}
2279 
2280 	ccb->csio.scsi_status = (vm_srb->scsi_status & 0xFF);
2281 	if (srb_status == SRB_STATUS_SUCCESS ||
2282 	    srb_status == SRB_STATUS_DATA_OVERRUN)
2283 		ccb->csio.resid = ccb->csio.dxfer_len - vm_srb->transfer_len;
2284 	else
2285 		ccb->csio.resid = ccb->csio.dxfer_len;
2286 
2287 	if (reqp->sense_info_len != 0) {
2288 		csio->sense_resid = csio->sense_len - reqp->sense_info_len;
2289 		ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
2290 	}
2291 
2292 	mtx_lock(&sc->hs_lock);
2293 	if (reqp->softc->hs_frozen == 1) {
2294 		xpt_print(ccb->ccb_h.path,
2295 			"%u: storvsc unfreezing softc 0x%p.\n",
2296 			ticks, reqp->softc);
2297 		ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
2298 		reqp->softc->hs_frozen = 0;
2299 	}
2300 	storvsc_free_request(sc, reqp);
2301 	mtx_unlock(&sc->hs_lock);
2302 
2303 	xpt_done_direct(ccb);
2304 }
2305 
2306 /**
2307  * @brief Free a request structure
2308  *
2309  * Free a request structure by returning it to the free list
2310  *
2311  * @param sc pointer to a softc
2312  * @param reqp pointer to a request structure
2313  */
2314 static void
2315 storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp)
2316 {
2317 
2318 	LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
2319 }
2320 
2321 /**
2322  * @brief Determine type of storage device from GUID
2323  *
2324  * Using the type GUID, determine if this is a StorVSC (paravirtual
2325  * SCSI or BlkVSC (paravirtual IDE) device.
2326  *
2327  * @param dev a device
2328  * returns an enum
2329  */
2330 static enum hv_storage_type
2331 storvsc_get_storage_type(device_t dev)
2332 {
2333 	device_t parent = device_get_parent(dev);
2334 
2335 	if (VMBUS_PROBE_GUID(parent, dev, &gBlkVscDeviceType) == 0)
2336 		return DRIVER_BLKVSC;
2337 	if (VMBUS_PROBE_GUID(parent, dev, &gStorVscDeviceType) == 0)
2338 		return DRIVER_STORVSC;
2339 	return DRIVER_UNKNOWN;
2340 }
2341 
2342 #define	PCI_VENDOR_INTEL	0x8086
2343 #define	PCI_PRODUCT_PIIX4	0x7111
2344 
2345 static void
2346 storvsc_ada_probe_veto(void *arg __unused, struct cam_path *path,
2347     struct ata_params *ident_buf __unused, int *veto)
2348 {
2349 
2350 	/*
2351 	 * The ATA disks are shared with the controllers managed
2352 	 * by this driver, so veto the ATA disks' attachment; the
2353 	 * ATA disks will be attached as SCSI disks once this driver
2354 	 * attached.
2355 	 */
2356 	if (path->device->protocol == PROTO_ATA) {
2357 		struct ccb_pathinq cpi;
2358 
2359 		xpt_path_inq(&cpi, path);
2360 		if (cpi.ccb_h.status == CAM_REQ_CMP &&
2361 		    cpi.hba_vendor == PCI_VENDOR_INTEL &&
2362 		    cpi.hba_device == PCI_PRODUCT_PIIX4) {
2363 			(*veto)++;
2364 			if (bootverbose) {
2365 				xpt_print(path,
2366 				    "Disable ATA disks on "
2367 				    "simulated ATA controller (0x%04x%04x)\n",
2368 				    cpi.hba_device, cpi.hba_vendor);
2369 			}
2370 		}
2371 	}
2372 }
2373 
2374 static void
2375 storvsc_sysinit(void *arg __unused)
2376 {
2377 	if (vm_guest == VM_GUEST_HV) {
2378 		storvsc_handler_tag = EVENTHANDLER_REGISTER(ada_probe_veto,
2379 		    storvsc_ada_probe_veto, NULL, EVENTHANDLER_PRI_ANY);
2380 	}
2381 }
2382 SYSINIT(storvsc_sys_init, SI_SUB_DRIVERS, SI_ORDER_SECOND, storvsc_sysinit,
2383     NULL);
2384 
2385 static void
2386 storvsc_sysuninit(void *arg __unused)
2387 {
2388 	if (storvsc_handler_tag != NULL)
2389 		EVENTHANDLER_DEREGISTER(ada_probe_veto, storvsc_handler_tag);
2390 }
2391 SYSUNINIT(storvsc_sys_uninit, SI_SUB_DRIVERS, SI_ORDER_SECOND,
2392     storvsc_sysuninit, NULL);
2393