1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
5  * Copyright (c) 2012 NetApp Inc.
6  * Copyright (c) 2012 Citrix Inc.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice unmodified, this list of conditions, and the following
14  *    disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 /**
32  * StorVSC driver for Hyper-V.  This driver presents a SCSI HBA interface
33  * to the Comman Access Method (CAM) layer.  CAM control blocks (CCBs) are
34  * converted into VSCSI protocol messages which are delivered to the parent
35  * partition StorVSP driver over the Hyper-V VMBUS.
36  */
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include <sys/param.h>
41 #include <sys/proc.h>
42 #include <sys/condvar.h>
43 #include <sys/time.h>
44 #include <sys/systm.h>
45 #include <sys/sysctl.h>
46 #include <sys/sockio.h>
47 #include <sys/mbuf.h>
48 #include <sys/malloc.h>
49 #include <sys/module.h>
50 #include <sys/kernel.h>
51 #include <sys/queue.h>
52 #include <sys/lock.h>
53 #include <sys/sx.h>
54 #include <sys/taskqueue.h>
55 #include <sys/bus.h>
56 #include <sys/mutex.h>
57 #include <sys/callout.h>
58 #include <sys/smp.h>
59 #include <vm/vm.h>
60 #include <vm/pmap.h>
61 #include <vm/uma.h>
62 #include <sys/lock.h>
63 #include <sys/sema.h>
64 #include <sys/eventhandler.h>
65 #include <machine/bus.h>
66 
67 #include <cam/cam.h>
68 #include <cam/cam_ccb.h>
69 #include <cam/cam_periph.h>
70 #include <cam/cam_sim.h>
71 #include <cam/cam_xpt_sim.h>
72 #include <cam/cam_xpt_internal.h>
73 #include <cam/cam_debug.h>
74 #include <cam/scsi/scsi_all.h>
75 #include <cam/scsi/scsi_message.h>
76 
77 #include <dev/hyperv/include/hyperv.h>
78 #include <dev/hyperv/include/vmbus.h>
79 #include "hv_vstorage.h"
80 #include "vmbus_if.h"
81 
82 #define STORVSC_MAX_LUNS_PER_TARGET	(64)
83 #define STORVSC_MAX_IO_REQUESTS		(STORVSC_MAX_LUNS_PER_TARGET * 2)
84 #define BLKVSC_MAX_IDE_DISKS_PER_TARGET	(1)
85 #define BLKVSC_MAX_IO_REQUESTS		STORVSC_MAX_IO_REQUESTS
86 #define STORVSC_MAX_TARGETS		(2)
87 
88 #define VSTOR_PKT_SIZE	(sizeof(struct vstor_packet) - vmscsi_size_delta)
89 
90 /*
91  * 33 segments are needed to allow 128KB maxio, in case the data
92  * in the first page is _not_ PAGE_SIZE aligned, e.g.
93  *
94  *     |<----------- 128KB ----------->|
95  *     |                               |
96  *  0  2K 4K    8K   16K   124K  128K  130K
97  *  |  |  |     |     |       |     |  |
98  *  +--+--+-----+-----+.......+-----+--+--+
99  *  |  |  |     |     |       |     |  |  | DATA
100  *  |  |  |     |     |       |     |  |  |
101  *  +--+--+-----+-----+.......------+--+--+
102  *     |  |                         |  |
103  *     | 1|            31           | 1| ...... # of segments
104  */
105 #define STORVSC_DATA_SEGCNT_MAX		33
106 #define STORVSC_DATA_SEGSZ_MAX		PAGE_SIZE
107 #define STORVSC_DATA_SIZE_MAX		\
108 	((STORVSC_DATA_SEGCNT_MAX - 1) * STORVSC_DATA_SEGSZ_MAX)
109 
110 struct storvsc_softc;
111 
112 struct hv_sglist {
113 	struct iovec sg_iov[STORVSC_DATA_SEGCNT_MAX];
114 	u_short	sg_nseg;
115 	u_short	sg_maxseg;
116 };
117 
118 struct hv_sgl_node {
119 	LIST_ENTRY(hv_sgl_node) link;
120 	struct hv_sglist *sgl_data;
121 };
122 
123 struct hv_sgl_page_pool{
124 	LIST_HEAD(, hv_sgl_node) in_use_sgl_list;
125 	LIST_HEAD(, hv_sgl_node) free_sgl_list;
126 	boolean_t                is_init;
127 } g_hv_sgl_page_pool;
128 
129 enum storvsc_request_type {
130 	WRITE_TYPE,
131 	READ_TYPE,
132 	UNKNOWN_TYPE
133 };
134 
135 SYSCTL_NODE(_hw, OID_AUTO, storvsc, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
136 	"Hyper-V storage interface");
137 
138 static u_int hv_storvsc_use_win8ext_flags = 1;
139 SYSCTL_UINT(_hw_storvsc, OID_AUTO, use_win8ext_flags, CTLFLAG_RW,
140 	&hv_storvsc_use_win8ext_flags, 0,
141 	"Use win8 extension flags or not");
142 
143 static u_int hv_storvsc_use_pim_unmapped = 1;
144 SYSCTL_UINT(_hw_storvsc, OID_AUTO, use_pim_unmapped, CTLFLAG_RDTUN,
145 	&hv_storvsc_use_pim_unmapped, 0,
146 	"Optimize storvsc by using unmapped I/O");
147 
148 static u_int hv_storvsc_ringbuffer_size = (64 * PAGE_SIZE);
149 SYSCTL_UINT(_hw_storvsc, OID_AUTO, ringbuffer_size, CTLFLAG_RDTUN,
150 	&hv_storvsc_ringbuffer_size, 0, "Hyper-V storage ringbuffer size");
151 
152 static u_int hv_storvsc_max_io = 512;
153 SYSCTL_UINT(_hw_storvsc, OID_AUTO, max_io, CTLFLAG_RDTUN,
154 	&hv_storvsc_max_io, 0, "Hyper-V storage max io limit");
155 
156 static int hv_storvsc_chan_cnt = 0;
157 SYSCTL_INT(_hw_storvsc, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
158 	&hv_storvsc_chan_cnt, 0, "# of channels to use");
159 #ifdef DIAGNOSTIC
160 static int hv_storvsc_srb_status = -1;
161 SYSCTL_INT(_hw_storvsc, OID_AUTO, srb_status,  CTLFLAG_RW,
162 	&hv_storvsc_srb_status, 0, "srb_status to inject");
163 TUNABLE_INT("hw_storvsc.srb_status", &hv_storvsc_srb_status);
164 #endif /* DIAGNOSTIC */
165 
166 #define STORVSC_MAX_IO						\
167 	vmbus_chan_prplist_nelem(hv_storvsc_ringbuffer_size,	\
168 	   STORVSC_DATA_SEGCNT_MAX, VSTOR_PKT_SIZE)
169 
170 struct hv_storvsc_sysctl {
171 	u_long		data_bio_cnt;
172 	u_long		data_vaddr_cnt;
173 	u_long		data_sg_cnt;
174 	u_long		chan_send_cnt[MAXCPU];
175 };
176 
177 struct storvsc_gpa_range {
178 	struct vmbus_gpa_range	gpa_range;
179 	uint64_t		gpa_page[STORVSC_DATA_SEGCNT_MAX];
180 } __packed;
181 
182 struct hv_storvsc_request {
183 	LIST_ENTRY(hv_storvsc_request)	link;
184 	struct vstor_packet		vstor_packet;
185 	int				prp_cnt;
186 	struct storvsc_gpa_range	prp_list;
187 	void				*sense_data;
188 	uint8_t				sense_info_len;
189 	uint8_t				retries;
190 	union ccb			*ccb;
191 	struct storvsc_softc		*softc;
192 	struct callout			callout;
193 	struct sema			synch_sema; /*Synchronize the request/response if needed */
194 	struct hv_sglist		*bounce_sgl;
195 	unsigned int			bounce_sgl_count;
196 	uint64_t			not_aligned_seg_bits;
197 	bus_dmamap_t			data_dmap;
198 };
199 
200 struct storvsc_softc {
201 	struct vmbus_channel		*hs_chan;
202 	LIST_HEAD(, hv_storvsc_request)	hs_free_list;
203 	struct mtx			hs_lock;
204 	struct storvsc_driver_props	*hs_drv_props;
205 	int 				hs_unit;
206 	uint32_t			hs_frozen;
207 	struct cam_sim			*hs_sim;
208 	struct cam_path 		*hs_path;
209 	uint32_t			hs_num_out_reqs;
210 	boolean_t			hs_destroy;
211 	boolean_t			hs_drain_notify;
212 	struct sema 			hs_drain_sema;
213 	struct hv_storvsc_request	hs_init_req;
214 	struct hv_storvsc_request	hs_reset_req;
215 	device_t			hs_dev;
216 	bus_dma_tag_t			storvsc_req_dtag;
217 	struct hv_storvsc_sysctl	sysctl_data;
218 	uint32_t			hs_nchan;
219 	struct vmbus_channel		*hs_sel_chan[MAXCPU];
220 };
221 
222 static eventhandler_tag storvsc_handler_tag;
223 /*
224  * The size of the vmscsi_request has changed in win8. The
225  * additional size is for the newly added elements in the
226  * structure. These elements are valid only when we are talking
227  * to a win8 host.
228  * Track the correct size we need to apply.
229  */
230 static int vmscsi_size_delta = sizeof(struct vmscsi_win8_extension);
231 
232 /**
233  * HyperV storvsc timeout testing cases:
234  * a. IO returned after first timeout;
235  * b. IO returned after second timeout and queue freeze;
236  * c. IO returned while timer handler is running
237  * The first can be tested by "sg_senddiag -vv /dev/daX",
238  * and the second and third can be done by
239  * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX".
240  */
241 #define HVS_TIMEOUT_TEST 0
242 
243 /*
244  * Bus/adapter reset functionality on the Hyper-V host is
245  * buggy and it will be disabled until
246  * it can be further tested.
247  */
248 #define HVS_HOST_RESET 0
249 
250 struct storvsc_driver_props {
251 	char		*drv_name;
252 	char		*drv_desc;
253 	uint8_t		drv_max_luns_per_target;
254 	uint32_t	drv_max_ios_per_target;
255 	uint32_t	drv_ringbuffer_size;
256 };
257 
258 enum hv_storage_type {
259 	DRIVER_BLKVSC,
260 	DRIVER_STORVSC,
261 	DRIVER_UNKNOWN
262 };
263 
264 #define HS_MAX_ADAPTERS 10
265 
266 #define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1
267 
268 /* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */
269 static const struct hyperv_guid gStorVscDeviceType={
270 	.hv_guid = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
271 		 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f}
272 };
273 
274 /* {32412632-86cb-44a2-9b5c-50d1417354f5} */
275 static const struct hyperv_guid gBlkVscDeviceType={
276 	.hv_guid = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,
277 		 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5}
278 };
279 
280 static struct storvsc_driver_props g_drv_props_table[] = {
281 	{"blkvsc", "Hyper-V IDE",
282 	 BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS,
283 	 20*PAGE_SIZE},
284 	{"storvsc", "Hyper-V SCSI",
285 	 STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS,
286 	 20*PAGE_SIZE}
287 };
288 
289 /*
290  * Sense buffer size changed in win8; have a run-time
291  * variable to track the size we should use.
292  */
293 static int sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE;
294 
295 /*
296  * The storage protocol version is determined during the
297  * initial exchange with the host.  It will indicate which
298  * storage functionality is available in the host.
299 */
300 static int vmstor_proto_version;
301 
302 struct vmstor_proto {
303         int proto_version;
304         int sense_buffer_size;
305         int vmscsi_size_delta;
306 };
307 
308 static const struct vmstor_proto vmstor_proto_list[] = {
309         {
310                 VMSTOR_PROTOCOL_VERSION_WIN10,
311                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
312                 0
313         },
314         {
315                 VMSTOR_PROTOCOL_VERSION_WIN8_1,
316                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
317                 0
318         },
319         {
320                 VMSTOR_PROTOCOL_VERSION_WIN8,
321                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
322                 0
323         },
324         {
325                 VMSTOR_PROTOCOL_VERSION_WIN7,
326                 PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
327                 sizeof(struct vmscsi_win8_extension),
328         },
329         {
330                 VMSTOR_PROTOCOL_VERSION_WIN6,
331                 PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
332                 sizeof(struct vmscsi_win8_extension),
333         }
334 };
335 
336 /* static functions */
337 static int storvsc_probe(device_t dev);
338 static int storvsc_attach(device_t dev);
339 static int storvsc_detach(device_t dev);
340 static void storvsc_poll(struct cam_sim * sim);
341 static void storvsc_action(struct cam_sim * sim, union ccb * ccb);
342 static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
343 static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp);
344 static enum hv_storage_type storvsc_get_storage_type(device_t dev);
345 static void hv_storvsc_rescan_target(struct storvsc_softc *sc);
346 static void hv_storvsc_on_channel_callback(struct vmbus_channel *chan, void *xsc);
347 static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc,
348 					struct vstor_packet *vstor_packet,
349 					struct hv_storvsc_request *request);
350 static int hv_storvsc_connect_vsp(struct storvsc_softc *);
351 static void storvsc_io_done(struct hv_storvsc_request *reqp);
352 static void storvsc_copy_sgl_to_bounce_buf(struct hv_sglist *bounce_sgl,
353 				bus_dma_segment_t *orig_sgl,
354 				unsigned int orig_sgl_count,
355 				uint64_t seg_bits);
356 void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
357 				unsigned int dest_sgl_count,
358 				struct hv_sglist *src_sgl,
359 				uint64_t seg_bits);
360 
361 static device_method_t storvsc_methods[] = {
362 	/* Device interface */
363 	DEVMETHOD(device_probe,		storvsc_probe),
364 	DEVMETHOD(device_attach,	storvsc_attach),
365 	DEVMETHOD(device_detach,	storvsc_detach),
366 	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
367 	DEVMETHOD_END
368 };
369 
370 static driver_t storvsc_driver = {
371 	"storvsc", storvsc_methods, sizeof(struct storvsc_softc),
372 };
373 
374 DRIVER_MODULE(storvsc, vmbus, storvsc_driver, 0, 0);
375 MODULE_VERSION(storvsc, 1);
376 MODULE_DEPEND(storvsc, vmbus, 1, 1, 1);
377 
378 static void
379 storvsc_subchan_attach(struct storvsc_softc *sc,
380     struct vmbus_channel *new_channel)
381 {
382 	struct vmstor_chan_props props;
383 
384 	memset(&props, 0, sizeof(props));
385 
386 	vmbus_chan_cpu_rr(new_channel);
387 	vmbus_chan_open(new_channel,
388 	    sc->hs_drv_props->drv_ringbuffer_size,
389   	    sc->hs_drv_props->drv_ringbuffer_size,
390 	    (void *)&props,
391 	    sizeof(struct vmstor_chan_props),
392 	    hv_storvsc_on_channel_callback, sc);
393 }
394 
395 /**
396  * @brief Send multi-channel creation request to host
397  *
398  * @param device  a Hyper-V device pointer
399  * @param max_chans  the max channels supported by vmbus
400  */
401 static void
402 storvsc_send_multichannel_request(struct storvsc_softc *sc, int max_subch)
403 {
404 	struct vmbus_channel **subchan;
405 	struct hv_storvsc_request *request;
406 	struct vstor_packet *vstor_packet;
407 	int request_subch;
408 	int i;
409 
410 	/* get sub-channel count that need to create */
411 	request_subch = MIN(max_subch, mp_ncpus - 1);
412 
413 	request = &sc->hs_init_req;
414 
415 	/* request the host to create multi-channel */
416 	memset(request, 0, sizeof(struct hv_storvsc_request));
417 
418 	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
419 
420 	vstor_packet = &request->vstor_packet;
421 
422 	vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS;
423 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
424 	vstor_packet->u.multi_channels_cnt = request_subch;
425 
426 	vmbus_chan_send(sc->hs_chan,
427 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
428 	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
429 
430 	sema_wait(&request->synch_sema);
431 
432 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
433 	    vstor_packet->status != 0) {
434 		printf("Storvsc_error: create multi-channel invalid operation "
435 		    "(%d) or statue (%u)\n",
436 		    vstor_packet->operation, vstor_packet->status);
437 		return;
438 	}
439 
440 	/* Update channel count */
441 	sc->hs_nchan = request_subch + 1;
442 
443 	/* Wait for sub-channels setup to complete. */
444 	subchan = vmbus_subchan_get(sc->hs_chan, request_subch);
445 
446 	/* Attach the sub-channels. */
447 	for (i = 0; i < request_subch; ++i)
448 		storvsc_subchan_attach(sc, subchan[i]);
449 
450 	/* Release the sub-channels. */
451 	vmbus_subchan_rel(subchan, request_subch);
452 
453 	if (bootverbose)
454 		printf("Storvsc create multi-channel success!\n");
455 }
456 
457 /**
458  * @brief initialize channel connection to parent partition
459  *
460  * @param dev  a Hyper-V device pointer
461  * @returns  0 on success, non-zero error on failure
462  */
463 static int
464 hv_storvsc_channel_init(struct storvsc_softc *sc)
465 {
466 	int ret = 0, i;
467 	struct hv_storvsc_request *request;
468 	struct vstor_packet *vstor_packet;
469 	uint16_t max_subch;
470 	boolean_t support_multichannel;
471 	uint32_t version;
472 
473 	max_subch = 0;
474 	support_multichannel = FALSE;
475 
476 	request = &sc->hs_init_req;
477 	memset(request, 0, sizeof(struct hv_storvsc_request));
478 	vstor_packet = &request->vstor_packet;
479 	request->softc = sc;
480 
481 	/**
482 	 * Initiate the vsc/vsp initialization protocol on the open channel
483 	 */
484 	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
485 
486 	vstor_packet->operation = VSTOR_OPERATION_BEGININITIALIZATION;
487 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
488 
489 
490 	ret = vmbus_chan_send(sc->hs_chan,
491 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
492 	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
493 
494 	if (ret != 0)
495 		goto cleanup;
496 
497 	sema_wait(&request->synch_sema);
498 
499 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
500 		vstor_packet->status != 0) {
501 		goto cleanup;
502 	}
503 
504 	for (i = 0; i < nitems(vmstor_proto_list); i++) {
505 		/* reuse the packet for version range supported */
506 
507 		memset(vstor_packet, 0, sizeof(struct vstor_packet));
508 		vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION;
509 		vstor_packet->flags = REQUEST_COMPLETION_FLAG;
510 
511 		vstor_packet->u.version.major_minor =
512 			vmstor_proto_list[i].proto_version;
513 
514 		/* revision is only significant for Windows guests */
515 		vstor_packet->u.version.revision = 0;
516 
517 		ret = vmbus_chan_send(sc->hs_chan,
518 		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
519 		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
520 
521 		if (ret != 0)
522 			goto cleanup;
523 
524 		sema_wait(&request->synch_sema);
525 
526 		if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO) {
527 			ret = EINVAL;
528 			goto cleanup;
529 		}
530 		if (vstor_packet->status == 0) {
531 			vmstor_proto_version =
532 				vmstor_proto_list[i].proto_version;
533 			sense_buffer_size =
534 				vmstor_proto_list[i].sense_buffer_size;
535 			vmscsi_size_delta =
536 				vmstor_proto_list[i].vmscsi_size_delta;
537 			break;
538 		}
539 	}
540 
541 	if (vstor_packet->status != 0) {
542 		ret = EINVAL;
543 		goto cleanup;
544 	}
545 	/**
546 	 * Query channel properties
547 	 */
548 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
549 	vstor_packet->operation = VSTOR_OPERATION_QUERYPROPERTIES;
550 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
551 
552 	ret = vmbus_chan_send(sc->hs_chan,
553 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
554 	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
555 
556 	if ( ret != 0)
557 		goto cleanup;
558 
559 	sema_wait(&request->synch_sema);
560 
561 	/* TODO: Check returned version */
562 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
563 	    vstor_packet->status != 0) {
564 		goto cleanup;
565 	}
566 
567 	max_subch = vstor_packet->u.chan_props.max_channel_cnt;
568 	if (hv_storvsc_chan_cnt > 0 && hv_storvsc_chan_cnt < (max_subch + 1))
569 		max_subch = hv_storvsc_chan_cnt - 1;
570 
571 	/* multi-channels feature is supported by WIN8 and above version */
572 	version = VMBUS_GET_VERSION(device_get_parent(sc->hs_dev), sc->hs_dev);
573 	if (version != VMBUS_VERSION_WIN7 && version != VMBUS_VERSION_WS2008 &&
574 	    (vstor_packet->u.chan_props.flags &
575 	     HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) {
576 		support_multichannel = TRUE;
577 	}
578 	if (bootverbose) {
579 		device_printf(sc->hs_dev, "max chans %d%s\n", max_subch + 1,
580 		    support_multichannel ? ", multi-chan capable" : "");
581 	}
582 
583 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
584 	vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION;
585 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
586 
587 	ret = vmbus_chan_send(sc->hs_chan,
588 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
589 	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
590 
591 	if (ret != 0) {
592 		goto cleanup;
593 	}
594 
595 	sema_wait(&request->synch_sema);
596 
597 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
598 	    vstor_packet->status != 0)
599 		goto cleanup;
600 
601 	/*
602 	 * If multi-channel is supported, send multichannel create
603 	 * request to host.
604 	 */
605 	if (support_multichannel && max_subch > 0)
606 		storvsc_send_multichannel_request(sc, max_subch);
607 cleanup:
608 	sema_destroy(&request->synch_sema);
609 	return (ret);
610 }
611 
612 /**
613  * @brief Open channel connection to paraent partition StorVSP driver
614  *
615  * Open and initialize channel connection to parent partition StorVSP driver.
616  *
617  * @param pointer to a Hyper-V device
618  * @returns 0 on success, non-zero error on failure
619  */
620 static int
621 hv_storvsc_connect_vsp(struct storvsc_softc *sc)
622 {
623 	int ret = 0;
624 	struct vmstor_chan_props props;
625 
626 	memset(&props, 0, sizeof(struct vmstor_chan_props));
627 
628 	/*
629 	 * Open the channel
630 	 */
631 	vmbus_chan_cpu_rr(sc->hs_chan);
632 	ret = vmbus_chan_open(
633 		sc->hs_chan,
634 		sc->hs_drv_props->drv_ringbuffer_size,
635 		sc->hs_drv_props->drv_ringbuffer_size,
636 		(void *)&props,
637 		sizeof(struct vmstor_chan_props),
638 		hv_storvsc_on_channel_callback, sc);
639 
640 	if (ret != 0) {
641 		return ret;
642 	}
643 
644 	ret = hv_storvsc_channel_init(sc);
645 	return (ret);
646 }
647 
648 #if HVS_HOST_RESET
649 static int
650 hv_storvsc_host_reset(struct storvsc_softc *sc)
651 {
652 	int ret = 0;
653 
654 	struct hv_storvsc_request *request;
655 	struct vstor_packet *vstor_packet;
656 
657 	request = &sc->hs_reset_req;
658 	request->softc = sc;
659 	vstor_packet = &request->vstor_packet;
660 
661 	sema_init(&request->synch_sema, 0, "stor synch sema");
662 
663 	vstor_packet->operation = VSTOR_OPERATION_RESETBUS;
664 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
665 
666 	ret = vmbus_chan_send(dev->channel,
667 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
668 	    vstor_packet, VSTOR_PKT_SIZE,
669 	    (uint64_t)(uintptr_t)&sc->hs_reset_req);
670 
671 	if (ret != 0) {
672 		goto cleanup;
673 	}
674 
675 	sema_wait(&request->synch_sema);
676 
677 	/*
678 	 * At this point, all outstanding requests in the adapter
679 	 * should have been flushed out and return to us
680 	 */
681 
682 cleanup:
683 	sema_destroy(&request->synch_sema);
684 	return (ret);
685 }
686 #endif /* HVS_HOST_RESET */
687 
688 /**
689  * @brief Function to initiate an I/O request
690  *
691  * @param device Hyper-V device pointer
692  * @param request pointer to a request structure
693  * @returns 0 on success, non-zero error on failure
694  */
695 static int
696 hv_storvsc_io_request(struct storvsc_softc *sc,
697 					  struct hv_storvsc_request *request)
698 {
699 	struct vstor_packet *vstor_packet = &request->vstor_packet;
700 	struct vmbus_channel* outgoing_channel = NULL;
701 	int ret = 0, ch_sel;
702 
703 	vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
704 
705 	vstor_packet->u.vm_srb.length =
706 	    sizeof(struct vmscsi_req) - vmscsi_size_delta;
707 
708 	vstor_packet->u.vm_srb.sense_info_len = sense_buffer_size;
709 
710 	vstor_packet->u.vm_srb.transfer_len =
711 	    request->prp_list.gpa_range.gpa_len;
712 
713 	vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB;
714 
715 	ch_sel = (vstor_packet->u.vm_srb.lun + curcpu) % sc->hs_nchan;
716 	/*
717 	 * If we are panic'ing, then we are dumping core. Since storvsc_polls
718 	 * always uses sc->hs_chan, then we must send to that channel or a poll
719 	 * timeout will occur.
720 	 */
721 	if (panicstr) {
722 		outgoing_channel = sc->hs_chan;
723 	} else {
724 		outgoing_channel = sc->hs_sel_chan[ch_sel];
725 	}
726 
727 	mtx_unlock(&request->softc->hs_lock);
728 	if (request->prp_list.gpa_range.gpa_len) {
729 		ret = vmbus_chan_send_prplist(outgoing_channel,
730 		    &request->prp_list.gpa_range, request->prp_cnt,
731 		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
732 	} else {
733 		ret = vmbus_chan_send(outgoing_channel,
734 		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
735 		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
736 	}
737 	/* statistic for successful request sending on each channel */
738 	if (!ret) {
739 		sc->sysctl_data.chan_send_cnt[ch_sel]++;
740 	}
741 	mtx_lock(&request->softc->hs_lock);
742 
743 	if (ret != 0) {
744 		printf("Unable to send packet %p ret %d", vstor_packet, ret);
745 	} else {
746 		atomic_add_int(&sc->hs_num_out_reqs, 1);
747 	}
748 
749 	return (ret);
750 }
751 
752 
753 /**
754  * Process IO_COMPLETION_OPERATION and ready
755  * the result to be completed for upper layer
756  * processing by the CAM layer.
757  */
758 static void
759 hv_storvsc_on_iocompletion(struct storvsc_softc *sc,
760 			   struct vstor_packet *vstor_packet,
761 			   struct hv_storvsc_request *request)
762 {
763 	struct vmscsi_req *vm_srb;
764 
765 	vm_srb = &vstor_packet->u.vm_srb;
766 
767 	/*
768 	 * Copy some fields of the host's response into the request structure,
769 	 * because the fields will be used later in storvsc_io_done().
770 	 */
771 	request->vstor_packet.u.vm_srb.scsi_status = vm_srb->scsi_status;
772 	request->vstor_packet.u.vm_srb.srb_status = vm_srb->srb_status;
773 	request->vstor_packet.u.vm_srb.transfer_len = vm_srb->transfer_len;
774 
775 	if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) &&
776 			(vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) {
777 		/* Autosense data available */
778 
779 		KASSERT(vm_srb->sense_info_len <= request->sense_info_len,
780 				("vm_srb->sense_info_len <= "
781 				 "request->sense_info_len"));
782 
783 		memcpy(request->sense_data, vm_srb->u.sense_data,
784 			vm_srb->sense_info_len);
785 
786 		request->sense_info_len = vm_srb->sense_info_len;
787 	}
788 
789 	/* Complete request by passing to the CAM layer */
790 	storvsc_io_done(request);
791 	atomic_subtract_int(&sc->hs_num_out_reqs, 1);
792 	if (sc->hs_drain_notify && (sc->hs_num_out_reqs == 0)) {
793 		sema_post(&sc->hs_drain_sema);
794 	}
795 }
796 
797 static void
798 hv_storvsc_rescan_target(struct storvsc_softc *sc)
799 {
800 	path_id_t pathid;
801 	target_id_t targetid;
802 	union ccb *ccb;
803 
804 	pathid = cam_sim_path(sc->hs_sim);
805 	targetid = CAM_TARGET_WILDCARD;
806 
807 	/*
808 	 * Allocate a CCB and schedule a rescan.
809 	 */
810 	ccb = xpt_alloc_ccb_nowait();
811 	if (ccb == NULL) {
812 		printf("unable to alloc CCB for rescan\n");
813 		return;
814 	}
815 
816 	if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, targetid,
817 	    CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
818 		printf("unable to create path for rescan, pathid: %u,"
819 		    "targetid: %u\n", pathid, targetid);
820 		xpt_free_ccb(ccb);
821 		return;
822 	}
823 
824 	if (targetid == CAM_TARGET_WILDCARD)
825 		ccb->ccb_h.func_code = XPT_SCAN_BUS;
826 	else
827 		ccb->ccb_h.func_code = XPT_SCAN_TGT;
828 
829 	xpt_rescan(ccb);
830 }
831 
832 static void
833 hv_storvsc_on_channel_callback(struct vmbus_channel *channel, void *xsc)
834 {
835 	int ret = 0;
836 	struct storvsc_softc *sc = xsc;
837 	uint32_t bytes_recvd;
838 	uint64_t request_id;
839 	uint8_t packet[roundup2(sizeof(struct vstor_packet), 8)];
840 	struct hv_storvsc_request *request;
841 	struct vstor_packet *vstor_packet;
842 
843 	bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8);
844 	ret = vmbus_chan_recv(channel, packet, &bytes_recvd, &request_id);
845 	KASSERT(ret != ENOBUFS, ("storvsc recvbuf is not large enough"));
846 	/* XXX check bytes_recvd to make sure that it contains enough data */
847 
848 	while ((ret == 0) && (bytes_recvd > 0)) {
849 		request = (struct hv_storvsc_request *)(uintptr_t)request_id;
850 
851 		if ((request == &sc->hs_init_req) ||
852 			(request == &sc->hs_reset_req)) {
853 			memcpy(&request->vstor_packet, packet,
854 				   sizeof(struct vstor_packet));
855 			sema_post(&request->synch_sema);
856 		} else {
857 			vstor_packet = (struct vstor_packet *)packet;
858 			switch(vstor_packet->operation) {
859 			case VSTOR_OPERATION_COMPLETEIO:
860 				if (request == NULL)
861 					panic("VMBUS: storvsc received a "
862 					    "packet with NULL request id in "
863 					    "COMPLETEIO operation.");
864 
865 				hv_storvsc_on_iocompletion(sc,
866 							vstor_packet, request);
867 				break;
868 			case VSTOR_OPERATION_REMOVEDEVICE:
869 				printf("VMBUS: storvsc operation %d not "
870 				    "implemented.\n", vstor_packet->operation);
871 				/* TODO: implement */
872 				break;
873 			case VSTOR_OPERATION_ENUMERATE_BUS:
874 				hv_storvsc_rescan_target(sc);
875 				break;
876 			default:
877 				break;
878 			}
879 		}
880 
881 		bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8),
882 		ret = vmbus_chan_recv(channel, packet, &bytes_recvd,
883 		    &request_id);
884 		KASSERT(ret != ENOBUFS,
885 		    ("storvsc recvbuf is not large enough"));
886 		/*
887 		 * XXX check bytes_recvd to make sure that it contains
888 		 * enough data
889 		 */
890 	}
891 }
892 
893 /**
894  * @brief StorVSC probe function
895  *
896  * Device probe function.  Returns 0 if the input device is a StorVSC
897  * device.  Otherwise, a ENXIO is returned.  If the input device is
898  * for BlkVSC (paravirtual IDE) device and this support is disabled in
899  * favor of the emulated ATA/IDE device, return ENXIO.
900  *
901  * @param a device
902  * @returns 0 on success, ENXIO if not a matcing StorVSC device
903  */
904 static int
905 storvsc_probe(device_t dev)
906 {
907 	int ret	= ENXIO;
908 
909 	switch (storvsc_get_storage_type(dev)) {
910 	case DRIVER_BLKVSC:
911 		if(bootverbose)
912 			device_printf(dev,
913 			    "Enlightened ATA/IDE detected\n");
914 		device_set_desc(dev, g_drv_props_table[DRIVER_BLKVSC].drv_desc);
915 		ret = BUS_PROBE_DEFAULT;
916 		break;
917 	case DRIVER_STORVSC:
918 		if(bootverbose)
919 			device_printf(dev, "Enlightened SCSI device detected\n");
920 		device_set_desc(dev, g_drv_props_table[DRIVER_STORVSC].drv_desc);
921 		ret = BUS_PROBE_DEFAULT;
922 		break;
923 	default:
924 		ret = ENXIO;
925 	}
926 	return (ret);
927 }
928 
929 static void
930 storvsc_create_chan_sel(struct storvsc_softc *sc)
931 {
932 	struct vmbus_channel **subch;
933 	int i, nsubch;
934 
935 	sc->hs_sel_chan[0] = sc->hs_chan;
936 	nsubch = sc->hs_nchan - 1;
937 	if (nsubch == 0)
938 		return;
939 
940 	subch = vmbus_subchan_get(sc->hs_chan, nsubch);
941 	for (i = 0; i < nsubch; i++)
942 		sc->hs_sel_chan[i + 1] = subch[i];
943 	vmbus_subchan_rel(subch, nsubch);
944 }
945 
946 static int
947 storvsc_init_requests(device_t dev)
948 {
949 	struct storvsc_softc *sc = device_get_softc(dev);
950 	struct hv_storvsc_request *reqp;
951 	int error, i;
952 
953 	LIST_INIT(&sc->hs_free_list);
954 
955 	error = bus_dma_tag_create(
956 		bus_get_dma_tag(dev),		/* parent */
957 		1,				/* alignment */
958 		PAGE_SIZE,			/* boundary */
959 		BUS_SPACE_MAXADDR,		/* lowaddr */
960 		BUS_SPACE_MAXADDR,		/* highaddr */
961 		NULL, NULL,			/* filter, filterarg */
962 		STORVSC_DATA_SIZE_MAX,		/* maxsize */
963 		STORVSC_DATA_SEGCNT_MAX,	/* nsegments */
964 		STORVSC_DATA_SEGSZ_MAX,		/* maxsegsize */
965 		0,				/* flags */
966 		NULL,				/* lockfunc */
967 		NULL,				/* lockfuncarg */
968 		&sc->storvsc_req_dtag);
969 	if (error) {
970 		device_printf(dev, "failed to create storvsc dma tag\n");
971 		return (error);
972 	}
973 
974 	for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; ++i) {
975 		reqp = malloc(sizeof(struct hv_storvsc_request),
976 				 M_DEVBUF, M_WAITOK|M_ZERO);
977 		reqp->softc = sc;
978 		error = bus_dmamap_create(sc->storvsc_req_dtag, 0,
979 				&reqp->data_dmap);
980 		if (error) {
981 			device_printf(dev, "failed to allocate storvsc "
982 			    "data dmamap\n");
983 			goto cleanup;
984 		}
985 		LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
986 	}
987 	return (0);
988 
989 cleanup:
990 	while ((reqp = LIST_FIRST(&sc->hs_free_list)) != NULL) {
991 		LIST_REMOVE(reqp, link);
992 		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
993 		free(reqp, M_DEVBUF);
994 	}
995 	return (error);
996 }
997 
998 static void
999 storvsc_sysctl(device_t dev)
1000 {
1001 	struct sysctl_oid_list *child;
1002 	struct sysctl_ctx_list *ctx;
1003 	struct sysctl_oid *ch_tree, *chid_tree;
1004 	struct storvsc_softc *sc;
1005 	char name[16];
1006 	int i;
1007 
1008 	sc = device_get_softc(dev);
1009 	ctx = device_get_sysctl_ctx(dev);
1010 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
1011 
1012 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_bio_cnt",
1013 		CTLFLAG_RW | CTLFLAG_STATS, &sc->sysctl_data.data_bio_cnt,
1014 		"# of bio data block");
1015 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_vaddr_cnt",
1016 		CTLFLAG_RW | CTLFLAG_STATS, &sc->sysctl_data.data_vaddr_cnt,
1017 		"# of vaddr data block");
1018 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_sg_cnt",
1019 		CTLFLAG_RW | CTLFLAG_STATS, &sc->sysctl_data.data_sg_cnt,
1020 		"# of sg data block");
1021 
1022 	/* dev.storvsc.UNIT.channel */
1023 	ch_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "channel",
1024 		CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1025 	if (ch_tree == NULL)
1026 		return;
1027 
1028 	for (i = 0; i < sc->hs_nchan; i++) {
1029 		uint32_t ch_id;
1030 
1031 		ch_id = vmbus_chan_id(sc->hs_sel_chan[i]);
1032 		snprintf(name, sizeof(name), "%d", ch_id);
1033 		/* dev.storvsc.UNIT.channel.CHID */
1034 		chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree),
1035 			OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1036 		if (chid_tree == NULL)
1037 			return;
1038 		/* dev.storvsc.UNIT.channel.CHID.send_req */
1039 		SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
1040 			"send_req", CTLFLAG_RD, &sc->sysctl_data.chan_send_cnt[i],
1041 			"# of request sending from this channel");
1042 	}
1043 }
1044 
1045 /**
1046  * @brief StorVSC attach function
1047  *
1048  * Function responsible for allocating per-device structures,
1049  * setting up CAM interfaces and scanning for available LUNs to
1050  * be used for SCSI device peripherals.
1051  *
1052  * @param a device
1053  * @returns 0 on success or an error on failure
1054  */
1055 static int
1056 storvsc_attach(device_t dev)
1057 {
1058 	enum hv_storage_type stor_type;
1059 	struct storvsc_softc *sc;
1060 	struct cam_devq *devq;
1061 	int ret, i, j;
1062 	struct hv_storvsc_request *reqp;
1063 	struct root_hold_token *root_mount_token = NULL;
1064 	struct hv_sgl_node *sgl_node = NULL;
1065 	void *tmp_buff = NULL;
1066 
1067 	/*
1068 	 * We need to serialize storvsc attach calls.
1069 	 */
1070 	root_mount_token = root_mount_hold("storvsc");
1071 
1072 	sc = device_get_softc(dev);
1073 	sc->hs_nchan = 1;
1074 	sc->hs_chan = vmbus_get_channel(dev);
1075 
1076 	stor_type = storvsc_get_storage_type(dev);
1077 
1078 	if (stor_type == DRIVER_UNKNOWN) {
1079 		ret = ENODEV;
1080 		goto cleanup;
1081 	}
1082 
1083 	/* fill in driver specific properties */
1084 	sc->hs_drv_props = &g_drv_props_table[stor_type];
1085 	sc->hs_drv_props->drv_ringbuffer_size = hv_storvsc_ringbuffer_size;
1086 	sc->hs_drv_props->drv_max_ios_per_target =
1087 		MIN(STORVSC_MAX_IO, hv_storvsc_max_io);
1088 	if (bootverbose) {
1089 		printf("storvsc ringbuffer size: %d, max_io: %d\n",
1090 			sc->hs_drv_props->drv_ringbuffer_size,
1091 			sc->hs_drv_props->drv_max_ios_per_target);
1092 	}
1093 	/* fill in device specific properties */
1094 	sc->hs_unit	= device_get_unit(dev);
1095 	sc->hs_dev	= dev;
1096 
1097 	mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF);
1098 
1099 	ret = storvsc_init_requests(dev);
1100 	if (ret != 0)
1101 		goto cleanup;
1102 
1103 	/* create sg-list page pool */
1104 	if (FALSE == g_hv_sgl_page_pool.is_init) {
1105 		g_hv_sgl_page_pool.is_init = TRUE;
1106 		LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list);
1107 		LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list);
1108 
1109 		/*
1110 		 * Pre-create SG list, each SG list with
1111 		 * STORVSC_DATA_SEGCNT_MAX segments, each
1112 		 * segment has one page buffer
1113 		 */
1114 		for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; i++) {
1115 	        	sgl_node = malloc(sizeof(struct hv_sgl_node),
1116 			    M_DEVBUF, M_WAITOK|M_ZERO);
1117 
1118 			sgl_node->sgl_data = malloc(sizeof(struct hv_sglist),
1119 			    M_DEVBUF, M_WAITOK|M_ZERO);
1120 
1121 			for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) {
1122 				tmp_buff = malloc(PAGE_SIZE,
1123 				    M_DEVBUF, M_WAITOK|M_ZERO);
1124 
1125 				sgl_node->sgl_data->sg_iov[j].iov_base =
1126 				    tmp_buff;
1127 			}
1128 
1129 			LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list,
1130 			    sgl_node, link);
1131 		}
1132 	}
1133 
1134 	sc->hs_destroy = FALSE;
1135 	sc->hs_drain_notify = FALSE;
1136 	sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema");
1137 
1138 	ret = hv_storvsc_connect_vsp(sc);
1139 	if (ret != 0) {
1140 		goto cleanup;
1141 	}
1142 
1143 	/* Construct cpu to channel mapping */
1144 	storvsc_create_chan_sel(sc);
1145 
1146 	/*
1147 	 * Create the device queue.
1148 	 * Hyper-V maps each target to one SCSI HBA
1149 	 */
1150 	devq = cam_simq_alloc(sc->hs_drv_props->drv_max_ios_per_target);
1151 	if (devq == NULL) {
1152 		device_printf(dev, "Failed to alloc device queue\n");
1153 		ret = ENOMEM;
1154 		goto cleanup;
1155 	}
1156 
1157 	sc->hs_sim = cam_sim_alloc(storvsc_action,
1158 				storvsc_poll,
1159 				sc->hs_drv_props->drv_name,
1160 				sc,
1161 				sc->hs_unit,
1162 				&sc->hs_lock, 1,
1163 				sc->hs_drv_props->drv_max_ios_per_target,
1164 				devq);
1165 
1166 	if (sc->hs_sim == NULL) {
1167 		device_printf(dev, "Failed to alloc sim\n");
1168 		cam_simq_free(devq);
1169 		ret = ENOMEM;
1170 		goto cleanup;
1171 	}
1172 
1173 	mtx_lock(&sc->hs_lock);
1174 	/* bus_id is set to 0, need to get it from VMBUS channel query? */
1175 	if (xpt_bus_register(sc->hs_sim, dev, 0) != CAM_SUCCESS) {
1176 		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
1177 		mtx_unlock(&sc->hs_lock);
1178 		device_printf(dev, "Unable to register SCSI bus\n");
1179 		ret = ENXIO;
1180 		goto cleanup;
1181 	}
1182 
1183 	if (xpt_create_path(&sc->hs_path, /*periph*/NULL,
1184 		 cam_sim_path(sc->hs_sim),
1185 		CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
1186 		xpt_bus_deregister(cam_sim_path(sc->hs_sim));
1187 		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
1188 		mtx_unlock(&sc->hs_lock);
1189 		device_printf(dev, "Unable to create path\n");
1190 		ret = ENXIO;
1191 		goto cleanup;
1192 	}
1193 
1194 	mtx_unlock(&sc->hs_lock);
1195 
1196 	storvsc_sysctl(dev);
1197 
1198 	root_mount_rel(root_mount_token);
1199 	return (0);
1200 
1201 
1202 cleanup:
1203 	root_mount_rel(root_mount_token);
1204 	while (!LIST_EMPTY(&sc->hs_free_list)) {
1205 		reqp = LIST_FIRST(&sc->hs_free_list);
1206 		LIST_REMOVE(reqp, link);
1207 		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
1208 		free(reqp, M_DEVBUF);
1209 	}
1210 
1211 	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1212 		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1213 		LIST_REMOVE(sgl_node, link);
1214 		for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) {
1215 			free(sgl_node->sgl_data->sg_iov[j].iov_base, M_DEVBUF);
1216 		}
1217 		free(sgl_node->sgl_data, M_DEVBUF);
1218 		free(sgl_node, M_DEVBUF);
1219 	}
1220 
1221 	return (ret);
1222 }
1223 
1224 /**
1225  * @brief StorVSC device detach function
1226  *
1227  * This function is responsible for safely detaching a
1228  * StorVSC device.  This includes waiting for inbound responses
1229  * to complete and freeing associated per-device structures.
1230  *
1231  * @param dev a device
1232  * returns 0 on success
1233  */
1234 static int
1235 storvsc_detach(device_t dev)
1236 {
1237 	struct storvsc_softc *sc = device_get_softc(dev);
1238 	struct hv_storvsc_request *reqp = NULL;
1239 	struct hv_sgl_node *sgl_node = NULL;
1240 	int j = 0;
1241 
1242 	sc->hs_destroy = TRUE;
1243 
1244 	/*
1245 	 * At this point, all outbound traffic should be disabled. We
1246 	 * only allow inbound traffic (responses) to proceed so that
1247 	 * outstanding requests can be completed.
1248 	 */
1249 
1250 	sc->hs_drain_notify = TRUE;
1251 	sema_wait(&sc->hs_drain_sema);
1252 	sc->hs_drain_notify = FALSE;
1253 
1254 	/*
1255 	 * Since we have already drained, we don't need to busy wait.
1256 	 * The call to close the channel will reset the callback
1257 	 * under the protection of the incoming channel lock.
1258 	 */
1259 
1260 	vmbus_chan_close(sc->hs_chan);
1261 
1262 	mtx_lock(&sc->hs_lock);
1263 	while (!LIST_EMPTY(&sc->hs_free_list)) {
1264 		reqp = LIST_FIRST(&sc->hs_free_list);
1265 		LIST_REMOVE(reqp, link);
1266 		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
1267 		free(reqp, M_DEVBUF);
1268 	}
1269 	mtx_unlock(&sc->hs_lock);
1270 
1271 	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1272 		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1273 		LIST_REMOVE(sgl_node, link);
1274 		for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++){
1275 			free(sgl_node->sgl_data->sg_iov[j].iov_base, M_DEVBUF);
1276 		}
1277 		free(sgl_node->sgl_data, M_DEVBUF);
1278 		free(sgl_node, M_DEVBUF);
1279 	}
1280 
1281 	return (0);
1282 }
1283 
1284 #if HVS_TIMEOUT_TEST
1285 /**
1286  * @brief unit test for timed out operations
1287  *
1288  * This function provides unit testing capability to simulate
1289  * timed out operations.  Recompilation with HV_TIMEOUT_TEST=1
1290  * is required.
1291  *
1292  * @param reqp pointer to a request structure
1293  * @param opcode SCSI operation being performed
1294  * @param wait if 1, wait for I/O to complete
1295  */
1296 static void
1297 storvsc_timeout_test(struct hv_storvsc_request *reqp,
1298 		uint8_t opcode, int wait)
1299 {
1300 	int ret;
1301 	union ccb *ccb = reqp->ccb;
1302 	struct storvsc_softc *sc = reqp->softc;
1303 
1304 	if (reqp->vstor_packet.vm_srb.cdb[0] != opcode) {
1305 		return;
1306 	}
1307 
1308 	if (wait) {
1309 		mtx_lock(&reqp->event.mtx);
1310 	}
1311 	ret = hv_storvsc_io_request(sc, reqp);
1312 	if (ret != 0) {
1313 		if (wait) {
1314 			mtx_unlock(&reqp->event.mtx);
1315 		}
1316 		printf("%s: io_request failed with %d.\n",
1317 				__func__, ret);
1318 		ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1319 		mtx_lock(&sc->hs_lock);
1320 		storvsc_free_request(sc, reqp);
1321 		xpt_done(ccb);
1322 		mtx_unlock(&sc->hs_lock);
1323 		return;
1324 	}
1325 
1326 	if (wait) {
1327 		xpt_print(ccb->ccb_h.path,
1328 				"%u: %s: waiting for IO return.\n",
1329 				ticks, __func__);
1330 		ret = cv_timedwait(&reqp->event.cv, &reqp->event.mtx, 60*hz);
1331 		mtx_unlock(&reqp->event.mtx);
1332 		xpt_print(ccb->ccb_h.path, "%u: %s: %s.\n",
1333 				ticks, __func__, (ret == 0)?
1334 				"IO return detected" :
1335 				"IO return not detected");
1336 		/*
1337 		 * Now both the timer handler and io done are running
1338 		 * simultaneously. We want to confirm the io done always
1339 		 * finishes after the timer handler exits. So reqp used by
1340 		 * timer handler is not freed or stale. Do busy loop for
1341 		 * another 1/10 second to make sure io done does
1342 		 * wait for the timer handler to complete.
1343 		 */
1344 		DELAY(100*1000);
1345 		mtx_lock(&sc->hs_lock);
1346 		xpt_print(ccb->ccb_h.path,
1347 				"%u: %s: finishing, queue frozen %d, "
1348 				"ccb status 0x%x scsi_status 0x%x.\n",
1349 				ticks, __func__, sc->hs_frozen,
1350 				ccb->ccb_h.status,
1351 				ccb->csio.scsi_status);
1352 		mtx_unlock(&sc->hs_lock);
1353 	}
1354 }
1355 #endif /* HVS_TIMEOUT_TEST */
1356 
1357 #ifdef notyet
1358 /**
1359  * @brief timeout handler for requests
1360  *
1361  * This function is called as a result of a callout expiring.
1362  *
1363  * @param arg pointer to a request
1364  */
1365 static void
1366 storvsc_timeout(void *arg)
1367 {
1368 	struct hv_storvsc_request *reqp = arg;
1369 	struct storvsc_softc *sc = reqp->softc;
1370 	union ccb *ccb = reqp->ccb;
1371 
1372 	if (reqp->retries == 0) {
1373 		mtx_lock(&sc->hs_lock);
1374 		xpt_print(ccb->ccb_h.path,
1375 		    "%u: IO timed out (req=0x%p), wait for another %u secs.\n",
1376 		    ticks, reqp, ccb->ccb_h.timeout / 1000);
1377 		cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL);
1378 		mtx_unlock(&sc->hs_lock);
1379 
1380 		reqp->retries++;
1381 		callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout,
1382 		    0, storvsc_timeout, reqp, 0);
1383 #if HVS_TIMEOUT_TEST
1384 		storvsc_timeout_test(reqp, SEND_DIAGNOSTIC, 0);
1385 #endif
1386 		return;
1387 	}
1388 
1389 	mtx_lock(&sc->hs_lock);
1390 	xpt_print(ccb->ccb_h.path,
1391 		"%u: IO (reqp = 0x%p) did not return for %u seconds, %s.\n",
1392 		ticks, reqp, ccb->ccb_h.timeout * (reqp->retries+1) / 1000,
1393 		(sc->hs_frozen == 0)?
1394 		"freezing the queue" : "the queue is already frozen");
1395 	if (sc->hs_frozen == 0) {
1396 		sc->hs_frozen = 1;
1397 		xpt_freeze_simq(xpt_path_sim(ccb->ccb_h.path), 1);
1398 	}
1399 	mtx_unlock(&sc->hs_lock);
1400 
1401 #if HVS_TIMEOUT_TEST
1402 	storvsc_timeout_test(reqp, MODE_SELECT_10, 1);
1403 #endif
1404 }
1405 #endif
1406 
1407 /**
1408  * @brief StorVSC device poll function
1409  *
1410  * This function is responsible for servicing requests when
1411  * interrupts are disabled (i.e when we are dumping core.)
1412  *
1413  * @param sim a pointer to a CAM SCSI interface module
1414  */
1415 static void
1416 storvsc_poll(struct cam_sim *sim)
1417 {
1418 	struct storvsc_softc *sc = cam_sim_softc(sim);
1419 
1420 	mtx_assert(&sc->hs_lock, MA_OWNED);
1421 	mtx_unlock(&sc->hs_lock);
1422 	hv_storvsc_on_channel_callback(sc->hs_chan, sc);
1423 	mtx_lock(&sc->hs_lock);
1424 }
1425 
1426 /**
1427  * @brief StorVSC device action function
1428  *
1429  * This function is responsible for handling SCSI operations which
1430  * are passed from the CAM layer.  The requests are in the form of
1431  * CAM control blocks which indicate the action being performed.
1432  * Not all actions require converting the request to a VSCSI protocol
1433  * message - these actions can be responded to by this driver.
1434  * Requests which are destined for a backend storage device are converted
1435  * to a VSCSI protocol message and sent on the channel connection associated
1436  * with this device.
1437  *
1438  * @param sim pointer to a CAM SCSI interface module
1439  * @param ccb pointer to a CAM control block
1440  */
1441 static void
1442 storvsc_action(struct cam_sim *sim, union ccb *ccb)
1443 {
1444 	struct storvsc_softc *sc = cam_sim_softc(sim);
1445 	int res;
1446 
1447 	mtx_assert(&sc->hs_lock, MA_OWNED);
1448 	switch (ccb->ccb_h.func_code) {
1449 	case XPT_PATH_INQ: {
1450 		struct ccb_pathinq *cpi = &ccb->cpi;
1451 
1452 		cpi->version_num = 1;
1453 		cpi->hba_inquiry = PI_TAG_ABLE|PI_SDTR_ABLE;
1454 		cpi->target_sprt = 0;
1455 		cpi->hba_misc = PIM_NOBUSRESET;
1456 		if (hv_storvsc_use_pim_unmapped)
1457 			cpi->hba_misc |= PIM_UNMAPPED;
1458 		cpi->maxio = STORVSC_DATA_SIZE_MAX;
1459 		cpi->hba_eng_cnt = 0;
1460 		cpi->max_target = STORVSC_MAX_TARGETS;
1461 		cpi->max_lun = sc->hs_drv_props->drv_max_luns_per_target;
1462 		cpi->initiator_id = cpi->max_target;
1463 		cpi->bus_id = cam_sim_bus(sim);
1464 		cpi->base_transfer_speed = 300000;
1465 		cpi->transport = XPORT_SAS;
1466 		cpi->transport_version = 0;
1467 		cpi->protocol = PROTO_SCSI;
1468 		cpi->protocol_version = SCSI_REV_SPC2;
1469 		strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
1470 		strlcpy(cpi->hba_vid, sc->hs_drv_props->drv_name, HBA_IDLEN);
1471 		strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
1472 		cpi->unit_number = cam_sim_unit(sim);
1473 
1474 		ccb->ccb_h.status = CAM_REQ_CMP;
1475 		xpt_done(ccb);
1476 		return;
1477 	}
1478 	case XPT_GET_TRAN_SETTINGS: {
1479 		struct  ccb_trans_settings *cts = &ccb->cts;
1480 
1481 		cts->transport = XPORT_SAS;
1482 		cts->transport_version = 0;
1483 		cts->protocol = PROTO_SCSI;
1484 		cts->protocol_version = SCSI_REV_SPC2;
1485 
1486 		/* enable tag queuing and disconnected mode */
1487 		cts->proto_specific.valid = CTS_SCSI_VALID_TQ;
1488 		cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
1489 		cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
1490 		cts->xport_specific.valid = CTS_SPI_VALID_DISC;
1491 		cts->xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB;
1492 
1493 		ccb->ccb_h.status = CAM_REQ_CMP;
1494 		xpt_done(ccb);
1495 		return;
1496 	}
1497 	case XPT_SET_TRAN_SETTINGS:	{
1498 		ccb->ccb_h.status = CAM_REQ_CMP;
1499 		xpt_done(ccb);
1500 		return;
1501 	}
1502 	case XPT_CALC_GEOMETRY:{
1503 		cam_calc_geometry(&ccb->ccg, 1);
1504 		xpt_done(ccb);
1505 		return;
1506 	}
1507 	case  XPT_RESET_BUS:
1508 	case  XPT_RESET_DEV:{
1509 #if HVS_HOST_RESET
1510 		if ((res = hv_storvsc_host_reset(sc)) != 0) {
1511 			xpt_print(ccb->ccb_h.path,
1512 				"hv_storvsc_host_reset failed with %d\n", res);
1513 			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1514 			xpt_done(ccb);
1515 			return;
1516 		}
1517 		ccb->ccb_h.status = CAM_REQ_CMP;
1518 		xpt_done(ccb);
1519 		return;
1520 #else
1521 		xpt_print(ccb->ccb_h.path,
1522 				  "%s reset not supported.\n",
1523 				  (ccb->ccb_h.func_code == XPT_RESET_BUS)?
1524 				  "bus" : "dev");
1525 		ccb->ccb_h.status = CAM_REQ_INVALID;
1526 		xpt_done(ccb);
1527 		return;
1528 #endif	/* HVS_HOST_RESET */
1529 	}
1530 	case XPT_SCSI_IO:
1531 	case XPT_IMMED_NOTIFY: {
1532 		struct hv_storvsc_request *reqp = NULL;
1533 		bus_dmamap_t dmap_saved;
1534 
1535 		if (ccb->csio.cdb_len == 0) {
1536 			panic("cdl_len is 0\n");
1537 		}
1538 
1539 		if (LIST_EMPTY(&sc->hs_free_list)) {
1540 			ccb->ccb_h.status = CAM_REQUEUE_REQ;
1541 			if (sc->hs_frozen == 0) {
1542 				sc->hs_frozen = 1;
1543 				xpt_freeze_simq(sim, /* count*/1);
1544 			}
1545 			xpt_done(ccb);
1546 			return;
1547 		}
1548 
1549 		reqp = LIST_FIRST(&sc->hs_free_list);
1550 		LIST_REMOVE(reqp, link);
1551 
1552 		/* Save the data_dmap before reset request */
1553 		dmap_saved = reqp->data_dmap;
1554 
1555 		/* XXX this is ugly */
1556 		bzero(reqp, sizeof(struct hv_storvsc_request));
1557 
1558 		/* Restore necessary bits */
1559 		reqp->data_dmap = dmap_saved;
1560 		reqp->softc = sc;
1561 
1562 		ccb->ccb_h.status |= CAM_SIM_QUEUED;
1563 		if ((res = create_storvsc_request(ccb, reqp)) != 0) {
1564 			ccb->ccb_h.status = CAM_REQ_INVALID;
1565 			xpt_done(ccb);
1566 			return;
1567 		}
1568 
1569 #ifdef notyet
1570 		if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
1571 			callout_init(&reqp->callout, 1);
1572 			callout_reset_sbt(&reqp->callout,
1573 			    SBT_1MS * ccb->ccb_h.timeout, 0,
1574 			    storvsc_timeout, reqp, 0);
1575 #if HVS_TIMEOUT_TEST
1576 			cv_init(&reqp->event.cv, "storvsc timeout cv");
1577 			mtx_init(&reqp->event.mtx, "storvsc timeout mutex",
1578 					NULL, MTX_DEF);
1579 			switch (reqp->vstor_packet.vm_srb.cdb[0]) {
1580 				case MODE_SELECT_10:
1581 				case SEND_DIAGNOSTIC:
1582 					/* To have timer send the request. */
1583 					return;
1584 				default:
1585 					break;
1586 			}
1587 #endif /* HVS_TIMEOUT_TEST */
1588 		}
1589 #endif
1590 
1591 		if ((res = hv_storvsc_io_request(sc, reqp)) != 0) {
1592 			xpt_print(ccb->ccb_h.path,
1593 				"hv_storvsc_io_request failed with %d\n", res);
1594 			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1595 			storvsc_free_request(sc, reqp);
1596 			xpt_done(ccb);
1597 			return;
1598 		}
1599 		return;
1600 	}
1601 
1602 	default:
1603 		ccb->ccb_h.status = CAM_REQ_INVALID;
1604 		xpt_done(ccb);
1605 		return;
1606 	}
1607 }
1608 
1609 /**
1610  * @brief destroy bounce buffer
1611  *
1612  * This function is responsible for destroy a Scatter/Gather list
1613  * that create by storvsc_create_bounce_buffer()
1614  *
1615  * @param sgl- the Scatter/Gather need be destroy
1616  * @param sg_count- page count of the SG list.
1617  *
1618  */
1619 static void
1620 storvsc_destroy_bounce_buffer(struct hv_sglist *sgl)
1621 {
1622 	struct hv_sgl_node *sgl_node = NULL;
1623 	if (LIST_EMPTY(&g_hv_sgl_page_pool.in_use_sgl_list)) {
1624 		printf("storvsc error: not enough in use sgl\n");
1625 		return;
1626 	}
1627 	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list);
1628 	LIST_REMOVE(sgl_node, link);
1629 	sgl_node->sgl_data = sgl;
1630 	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link);
1631 }
1632 
1633 /**
1634  * @brief create bounce buffer
1635  *
1636  * This function is responsible for create a Scatter/Gather list,
1637  * which hold several pages that can be aligned with page size.
1638  *
1639  * @param seg_count- SG-list segments count
1640  * @param write - if WRITE_TYPE, set SG list page used size to 0,
1641  * otherwise set used size to page size.
1642  *
1643  * return NULL if create failed
1644  */
1645 static struct hv_sglist *
1646 storvsc_create_bounce_buffer(uint16_t seg_count, int write)
1647 {
1648 	int i = 0;
1649 	struct hv_sglist *bounce_sgl = NULL;
1650 	unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE);
1651 	struct hv_sgl_node *sgl_node = NULL;
1652 
1653 	/* get struct hv_sglist from free_sgl_list */
1654 	if (LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1655 		printf("storvsc error: not enough free sgl\n");
1656 		return NULL;
1657 	}
1658 	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1659 	LIST_REMOVE(sgl_node, link);
1660 	bounce_sgl = sgl_node->sgl_data;
1661 	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link);
1662 
1663 	bounce_sgl->sg_maxseg = seg_count;
1664 
1665 	if (write == WRITE_TYPE)
1666 		bounce_sgl->sg_nseg = 0;
1667 	else
1668 		bounce_sgl->sg_nseg = seg_count;
1669 
1670 	for (i = 0; i < seg_count; i++)
1671 	        bounce_sgl->sg_iov[i].iov_len = buf_len;
1672 
1673 	return bounce_sgl;
1674 }
1675 
1676 /**
1677  * @brief copy data from SG list to bounce buffer
1678  *
1679  * This function is responsible for copy data from one SG list's segments
1680  * to another SG list which used as bounce buffer.
1681  *
1682  * @param bounce_sgl - the destination SG list
1683  * @param orig_sgl - the segment of the source SG list.
1684  * @param orig_sgl_count - the count of segments.
1685  * @param orig_sgl_count - indicate which segment need bounce buffer,
1686  *  set 1 means need.
1687  *
1688  */
1689 static void
1690 storvsc_copy_sgl_to_bounce_buf(struct hv_sglist *bounce_sgl,
1691 			       bus_dma_segment_t *orig_sgl,
1692 			       unsigned int orig_sgl_count,
1693 			       uint64_t seg_bits)
1694 {
1695 	int src_sgl_idx = 0;
1696 
1697 	for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) {
1698 		if (seg_bits & (1 << src_sgl_idx)) {
1699 			memcpy(bounce_sgl->sg_iov[src_sgl_idx].iov_base,
1700 			    (void*)orig_sgl[src_sgl_idx].ds_addr,
1701 			    orig_sgl[src_sgl_idx].ds_len);
1702 
1703 			bounce_sgl->sg_iov[src_sgl_idx].iov_len =
1704 			    orig_sgl[src_sgl_idx].ds_len;
1705 		}
1706 	}
1707 }
1708 
1709 /**
1710  * @brief copy data from SG list which used as bounce to another SG list
1711  *
1712  * This function is responsible for copy data from one SG list with bounce
1713  * buffer to another SG list's segments.
1714  *
1715  * @param dest_sgl - the destination SG list's segments
1716  * @param dest_sgl_count - the count of destination SG list's segment.
1717  * @param src_sgl - the source SG list.
1718  * @param seg_bits - indicate which segment used bounce buffer of src SG-list.
1719  *
1720  */
1721 void
1722 storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
1723 				    unsigned int dest_sgl_count,
1724 				    struct hv_sglist* src_sgl,
1725 				    uint64_t seg_bits)
1726 {
1727 	int sgl_idx = 0;
1728 
1729 	for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) {
1730 		if (seg_bits & (1 << sgl_idx)) {
1731 			memcpy((void*)(dest_sgl[sgl_idx].ds_addr),
1732 			    src_sgl->sg_iov[sgl_idx].iov_base,
1733 			    src_sgl->sg_iov[sgl_idx].iov_len);
1734 		}
1735 	}
1736 }
1737 
1738 /**
1739  * @brief check SG list with bounce buffer or not
1740  *
1741  * This function is responsible for check if need bounce buffer for SG list.
1742  *
1743  * @param sgl - the SG list's segments
1744  * @param sg_count - the count of SG list's segment.
1745  * @param bits - segmengs number that need bounce buffer
1746  *
1747  * return -1 if SG list needless bounce buffer
1748  */
1749 static int
1750 storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl,
1751 				unsigned int sg_count,
1752 				uint64_t *bits)
1753 {
1754 	int i = 0;
1755 	int offset = 0;
1756 	uint64_t phys_addr = 0;
1757 	uint64_t tmp_bits = 0;
1758 	boolean_t found_hole = FALSE;
1759 	boolean_t pre_aligned = TRUE;
1760 
1761 	if (sg_count < 2){
1762 		return -1;
1763 	}
1764 
1765 	*bits = 0;
1766 
1767 	phys_addr = vtophys(sgl[0].ds_addr);
1768 	offset =  phys_addr - trunc_page(phys_addr);
1769 
1770 	if (offset != 0) {
1771 		pre_aligned = FALSE;
1772 		tmp_bits |= 1;
1773 	}
1774 
1775 	for (i = 1; i < sg_count; i++) {
1776 		phys_addr = vtophys(sgl[i].ds_addr);
1777 		offset =  phys_addr - trunc_page(phys_addr);
1778 
1779 		if (offset == 0) {
1780 			if (FALSE == pre_aligned){
1781 				/*
1782 				 * This segment is aligned, if the previous
1783 				 * one is not aligned, find a hole
1784 				 */
1785 				found_hole = TRUE;
1786 			}
1787 			pre_aligned = TRUE;
1788 		} else {
1789 			tmp_bits |= 1ULL << i;
1790 			if (!pre_aligned) {
1791 				if (phys_addr != vtophys(sgl[i-1].ds_addr +
1792 				    sgl[i-1].ds_len)) {
1793 					/*
1794 					 * Check whether connect to previous
1795 					 * segment,if not, find the hole
1796 					 */
1797 					found_hole = TRUE;
1798 				}
1799 			} else {
1800 				found_hole = TRUE;
1801 			}
1802 			pre_aligned = FALSE;
1803 		}
1804 	}
1805 
1806 	if (!found_hole) {
1807 		return (-1);
1808 	} else {
1809 		*bits = tmp_bits;
1810 		return 0;
1811 	}
1812 }
1813 
1814 /**
1815  * Copy bus_dma segments to multiple page buffer, which requires
1816  * the pages are compact composed except for the 1st and last pages.
1817  */
1818 static void
1819 storvsc_xferbuf_prepare(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
1820 {
1821 	struct hv_storvsc_request *reqp = arg;
1822 	union ccb *ccb = reqp->ccb;
1823 	struct ccb_scsiio *csio = &ccb->csio;
1824 	struct storvsc_gpa_range *prplist;
1825 	int i;
1826 
1827 	prplist = &reqp->prp_list;
1828 	prplist->gpa_range.gpa_len = csio->dxfer_len;
1829 	prplist->gpa_range.gpa_ofs = segs[0].ds_addr & PAGE_MASK;
1830 
1831 	for (i = 0; i < nsegs; i++) {
1832 #ifdef INVARIANTS
1833 		if (nsegs > 1) {
1834 			if (i == 0) {
1835 				KASSERT((segs[i].ds_addr & PAGE_MASK) +
1836 				    segs[i].ds_len == PAGE_SIZE,
1837 				    ("invalid 1st page, ofs 0x%jx, len %zu",
1838 				     (uintmax_t)segs[i].ds_addr,
1839 				     segs[i].ds_len));
1840 			} else if (i == nsegs - 1) {
1841 				KASSERT((segs[i].ds_addr & PAGE_MASK) == 0,
1842 				    ("invalid last page, ofs 0x%jx",
1843 				     (uintmax_t)segs[i].ds_addr));
1844 			} else {
1845 				KASSERT((segs[i].ds_addr & PAGE_MASK) == 0 &&
1846 				    segs[i].ds_len == PAGE_SIZE,
1847 				    ("not a full page, ofs 0x%jx, len %zu",
1848 				     (uintmax_t)segs[i].ds_addr,
1849 				     segs[i].ds_len));
1850 			}
1851 		}
1852 #endif
1853 		prplist->gpa_page[i] = atop(segs[i].ds_addr);
1854 	}
1855 	reqp->prp_cnt = nsegs;
1856 }
1857 
1858 /**
1859  * @brief Fill in a request structure based on a CAM control block
1860  *
1861  * Fills in a request structure based on the contents of a CAM control
1862  * block.  The request structure holds the payload information for
1863  * VSCSI protocol request.
1864  *
1865  * @param ccb pointer to a CAM contorl block
1866  * @param reqp pointer to a request structure
1867  */
1868 static int
1869 create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
1870 {
1871 	struct ccb_scsiio *csio = &ccb->csio;
1872 	uint64_t phys_addr;
1873 	uint32_t pfn;
1874 	uint64_t not_aligned_seg_bits = 0;
1875 	int error;
1876 
1877 	/* refer to struct vmscsi_req for meanings of these two fields */
1878 	reqp->vstor_packet.u.vm_srb.port =
1879 		cam_sim_unit(xpt_path_sim(ccb->ccb_h.path));
1880 	reqp->vstor_packet.u.vm_srb.path_id =
1881 		cam_sim_bus(xpt_path_sim(ccb->ccb_h.path));
1882 
1883 	reqp->vstor_packet.u.vm_srb.target_id = ccb->ccb_h.target_id;
1884 	reqp->vstor_packet.u.vm_srb.lun = ccb->ccb_h.target_lun;
1885 
1886 	reqp->vstor_packet.u.vm_srb.cdb_len = csio->cdb_len;
1887 	if(ccb->ccb_h.flags & CAM_CDB_POINTER) {
1888 		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_ptr,
1889 			csio->cdb_len);
1890 	} else {
1891 		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_bytes,
1892 			csio->cdb_len);
1893 	}
1894 
1895 	if (hv_storvsc_use_win8ext_flags) {
1896 		reqp->vstor_packet.u.vm_srb.win8_extension.time_out_value = 60;
1897 		reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1898 			SRB_FLAGS_DISABLE_SYNCH_TRANSFER;
1899 	}
1900 	switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
1901 	case CAM_DIR_OUT:
1902 		reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;
1903 		if (hv_storvsc_use_win8ext_flags) {
1904 			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1905 				SRB_FLAGS_DATA_OUT;
1906 		}
1907 		break;
1908 	case CAM_DIR_IN:
1909 		reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
1910 		if (hv_storvsc_use_win8ext_flags) {
1911 			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1912 				SRB_FLAGS_DATA_IN;
1913 		}
1914 		break;
1915 	case CAM_DIR_NONE:
1916 		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
1917 		if (hv_storvsc_use_win8ext_flags) {
1918 			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1919 				SRB_FLAGS_NO_DATA_TRANSFER;
1920 		}
1921 		break;
1922 	default:
1923 		printf("Error: unexpected data direction: 0x%x\n",
1924 			ccb->ccb_h.flags & CAM_DIR_MASK);
1925 		return (EINVAL);
1926 	}
1927 
1928 	reqp->sense_data     = &csio->sense_data;
1929 	reqp->sense_info_len = csio->sense_len;
1930 
1931 	reqp->ccb = ccb;
1932 	ccb->ccb_h.spriv_ptr0 = reqp;
1933 
1934 	if (0 == csio->dxfer_len) {
1935 		return (0);
1936 	}
1937 
1938 	switch (ccb->ccb_h.flags & CAM_DATA_MASK) {
1939 	case CAM_DATA_BIO:
1940 	case CAM_DATA_VADDR:
1941 		error = bus_dmamap_load_ccb(reqp->softc->storvsc_req_dtag,
1942 		    reqp->data_dmap, ccb, storvsc_xferbuf_prepare, reqp,
1943 		    BUS_DMA_NOWAIT);
1944 		if (error) {
1945 			xpt_print(ccb->ccb_h.path,
1946 			    "bus_dmamap_load_ccb failed: %d\n", error);
1947 			return (error);
1948 		}
1949 		if ((ccb->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_BIO)
1950 			reqp->softc->sysctl_data.data_bio_cnt++;
1951 		else
1952 			reqp->softc->sysctl_data.data_vaddr_cnt++;
1953 		break;
1954 
1955 	case CAM_DATA_SG:
1956 	{
1957 		struct storvsc_gpa_range *prplist;
1958 		int i = 0;
1959 		int offset = 0;
1960 		int ret;
1961 
1962 		bus_dma_segment_t *storvsc_sglist =
1963 		    (bus_dma_segment_t *)ccb->csio.data_ptr;
1964 		u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt;
1965 
1966 		prplist = &reqp->prp_list;
1967 		prplist->gpa_range.gpa_len = csio->dxfer_len;
1968 
1969 		printf("Storvsc: get SG I/O operation, %d\n",
1970 		    reqp->vstor_packet.u.vm_srb.data_in);
1971 
1972 		if (storvsc_sg_count > STORVSC_DATA_SEGCNT_MAX){
1973 			printf("Storvsc: %d segments is too much, "
1974 			    "only support %d segments\n",
1975 			    storvsc_sg_count, STORVSC_DATA_SEGCNT_MAX);
1976 			return (EINVAL);
1977 		}
1978 
1979 		/*
1980 		 * We create our own bounce buffer function currently. Idealy
1981 		 * we should use BUS_DMA(9) framework. But with current BUS_DMA
1982 		 * code there is no callback API to check the page alignment of
1983 		 * middle segments before busdma can decide if a bounce buffer
1984 		 * is needed for particular segment. There is callback,
1985 		 * "bus_dma_filter_t *filter", but the parrameters are not
1986 		 * sufficient for storvsc driver.
1987 		 * TODO:
1988 		 *	Add page alignment check in BUS_DMA(9) callback. Once
1989 		 *	this is complete, switch the following code to use
1990 		 *	BUS_DMA(9) for storvsc bounce buffer support.
1991 		 */
1992 		/* check if we need to create bounce buffer */
1993 		ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist,
1994 		    storvsc_sg_count, &not_aligned_seg_bits);
1995 		if (ret != -1) {
1996 			reqp->bounce_sgl =
1997 			    storvsc_create_bounce_buffer(storvsc_sg_count,
1998 			    reqp->vstor_packet.u.vm_srb.data_in);
1999 			if (NULL == reqp->bounce_sgl) {
2000 				printf("Storvsc_error: "
2001 				    "create bounce buffer failed.\n");
2002 				return (ENOMEM);
2003 			}
2004 
2005 			reqp->bounce_sgl_count = storvsc_sg_count;
2006 			reqp->not_aligned_seg_bits = not_aligned_seg_bits;
2007 
2008 			/*
2009 			 * if it is write, we need copy the original data
2010 			 *to bounce buffer
2011 			 */
2012 			if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
2013 				storvsc_copy_sgl_to_bounce_buf(
2014 				    reqp->bounce_sgl,
2015 				    storvsc_sglist,
2016 				    storvsc_sg_count,
2017 				    reqp->not_aligned_seg_bits);
2018 			}
2019 
2020 			/* transfer virtual address to physical frame number */
2021 			if (reqp->not_aligned_seg_bits & 0x1){
2022  				phys_addr =
2023 				    vtophys(reqp->bounce_sgl->sg_iov[0].iov_base);
2024 			}else{
2025  				phys_addr =
2026 					vtophys(storvsc_sglist[0].ds_addr);
2027 			}
2028 			prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK;
2029 
2030 			pfn = phys_addr >> PAGE_SHIFT;
2031 			prplist->gpa_page[0] = pfn;
2032 
2033 			for (i = 1; i < storvsc_sg_count; i++) {
2034 				if (reqp->not_aligned_seg_bits & (1 << i)) {
2035 					phys_addr =
2036 					    vtophys(reqp->bounce_sgl->sg_iov[i].iov_base);
2037 				} else {
2038 					phys_addr =
2039 					    vtophys(storvsc_sglist[i].ds_addr);
2040 				}
2041 
2042 				pfn = phys_addr >> PAGE_SHIFT;
2043 				prplist->gpa_page[i] = pfn;
2044 			}
2045 			reqp->prp_cnt = i;
2046 		} else {
2047 			phys_addr = vtophys(storvsc_sglist[0].ds_addr);
2048 
2049 			prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK;
2050 
2051 			for (i = 0; i < storvsc_sg_count; i++) {
2052 				phys_addr = vtophys(storvsc_sglist[i].ds_addr);
2053 				pfn = phys_addr >> PAGE_SHIFT;
2054 				prplist->gpa_page[i] = pfn;
2055 			}
2056 			reqp->prp_cnt = i;
2057 
2058 			/* check the last segment cross boundary or not */
2059 			offset = phys_addr & PAGE_MASK;
2060 			if (offset) {
2061 				/* Add one more PRP entry */
2062 				phys_addr =
2063 				    vtophys(storvsc_sglist[i-1].ds_addr +
2064 				    PAGE_SIZE - offset);
2065 				pfn = phys_addr >> PAGE_SHIFT;
2066 				prplist->gpa_page[i] = pfn;
2067 				reqp->prp_cnt++;
2068 			}
2069 
2070 			reqp->bounce_sgl_count = 0;
2071 		}
2072 		reqp->softc->sysctl_data.data_sg_cnt++;
2073 		break;
2074 	}
2075 	default:
2076 		printf("Unknow flags: %d\n", ccb->ccb_h.flags);
2077 		return(EINVAL);
2078 	}
2079 
2080 	return(0);
2081 }
2082 
2083 static uint32_t
2084 is_scsi_valid(const struct scsi_inquiry_data *inq_data)
2085 {
2086 	u_int8_t type;
2087 
2088 	type = SID_TYPE(inq_data);
2089 	if (type == T_NODEVICE)
2090 		return (0);
2091 	if (SID_QUAL(inq_data) == SID_QUAL_BAD_LU)
2092 		return (0);
2093 	return (1);
2094 }
2095 
2096 /**
2097  * @brief completion function before returning to CAM
2098  *
2099  * I/O process has been completed and the result needs
2100  * to be passed to the CAM layer.
2101  * Free resources related to this request.
2102  *
2103  * @param reqp pointer to a request structure
2104  */
2105 static void
2106 storvsc_io_done(struct hv_storvsc_request *reqp)
2107 {
2108 	union ccb *ccb = reqp->ccb;
2109 	struct ccb_scsiio *csio = &ccb->csio;
2110 	struct storvsc_softc *sc = reqp->softc;
2111 	struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb;
2112 	bus_dma_segment_t *ori_sglist = NULL;
2113 	int ori_sg_count = 0;
2114 	const struct scsi_generic *cmd;
2115 
2116 	/* destroy bounce buffer if it is used */
2117 	if (reqp->bounce_sgl_count) {
2118 		ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr;
2119 		ori_sg_count = ccb->csio.sglist_cnt;
2120 
2121 		/*
2122 		 * If it is READ operation, we should copy back the data
2123 		 * to original SG list.
2124 		 */
2125 		if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
2126 			storvsc_copy_from_bounce_buf_to_sgl(ori_sglist,
2127 			    ori_sg_count,
2128 			    reqp->bounce_sgl,
2129 			    reqp->not_aligned_seg_bits);
2130 		}
2131 
2132 		storvsc_destroy_bounce_buffer(reqp->bounce_sgl);
2133 		reqp->bounce_sgl_count = 0;
2134 	}
2135 
2136 	if (reqp->retries > 0) {
2137 		mtx_lock(&sc->hs_lock);
2138 #if HVS_TIMEOUT_TEST
2139 		xpt_print(ccb->ccb_h.path,
2140 			"%u: IO returned after timeout, "
2141 			"waking up timer handler if any.\n", ticks);
2142 		mtx_lock(&reqp->event.mtx);
2143 		cv_signal(&reqp->event.cv);
2144 		mtx_unlock(&reqp->event.mtx);
2145 #endif
2146 		reqp->retries = 0;
2147 		xpt_print(ccb->ccb_h.path,
2148 			"%u: IO returned after timeout, "
2149 			"stopping timer if any.\n", ticks);
2150 		mtx_unlock(&sc->hs_lock);
2151 	}
2152 
2153 #ifdef notyet
2154 	/*
2155 	 * callout_drain() will wait for the timer handler to finish
2156 	 * if it is running. So we don't need any lock to synchronize
2157 	 * between this routine and the timer handler.
2158 	 * Note that we need to make sure reqp is not freed when timer
2159 	 * handler is using or will use it.
2160 	 */
2161 	if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
2162 		callout_drain(&reqp->callout);
2163 	}
2164 #endif
2165 	cmd = (const struct scsi_generic *)
2166 	    ((ccb->ccb_h.flags & CAM_CDB_POINTER) ?
2167 	     csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes);
2168 
2169 	ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
2170 	ccb->ccb_h.status &= ~CAM_STATUS_MASK;
2171 	int srb_status = SRB_STATUS(vm_srb->srb_status);
2172 #ifdef DIAGNOSTIC
2173 	if (hv_storvsc_srb_status != -1) {
2174 		srb_status = SRB_STATUS(hv_storvsc_srb_status & 0x3f);
2175 		hv_storvsc_srb_status = -1;
2176 	}
2177 #endif /* DIAGNOSTIC */
2178 	if (vm_srb->scsi_status == SCSI_STATUS_OK) {
2179 		if (srb_status != SRB_STATUS_SUCCESS) {
2180 			bool log_error = true;
2181 			switch (srb_status) {
2182 				case SRB_STATUS_PENDING:
2183 					/* We should never get this */
2184 					panic("storvsc_io_done: SRB_STATUS_PENDING");
2185 					break;
2186 				case SRB_STATUS_ABORTED:
2187 					/*
2188 					 * storvsc doesn't support aborts yet
2189 					 * but if we ever get this status
2190 					 * the I/O is complete - treat it as a
2191 					 * timeout
2192 					 */
2193 					ccb->ccb_h.status |= CAM_CMD_TIMEOUT;
2194 					break;
2195 				case SRB_STATUS_ABORT_FAILED:
2196 					/* We should never get this */
2197 					panic("storvsc_io_done: SRB_STATUS_ABORT_FAILED");
2198 					break;
2199 				case SRB_STATUS_ERROR:
2200 					/*
2201 					 * We should never get this.
2202 					 * Treat it as a CAM_UNREC_HBA_ERROR.
2203 					 * It will be retried
2204 					 */
2205 					ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
2206 					break;
2207 				case SRB_STATUS_BUSY:
2208 					/* Host is busy. Delay and retry */
2209 					ccb->ccb_h.status |= CAM_BUSY;
2210 					break;
2211 				case SRB_STATUS_INVALID_REQUEST:
2212 				case SRB_STATUS_INVALID_PATH_ID:
2213 				case SRB_STATUS_NO_DEVICE:
2214 				case SRB_STATUS_INVALID_TARGET_ID:
2215 					/*
2216 					 * These indicate an invalid address
2217 					 * and really should never be seen.
2218 					 * A CAM_PATH_INVALID could be
2219 					 * used here but I want to run
2220 					 * down retries.  Do a CAM_BUSY
2221 					 * since the host might be having issues.
2222 					 */
2223 					ccb->ccb_h.status |= CAM_BUSY;
2224 					break;
2225 				case SRB_STATUS_TIMEOUT:
2226 				case SRB_STATUS_COMMAND_TIMEOUT:
2227 					/* The backend has timed this out */
2228 					ccb->ccb_h.status |= CAM_BUSY;
2229 					break;
2230 				/* Some old pSCSI errors below */
2231 				case SRB_STATUS_SELECTION_TIMEOUT:
2232 				case SRB_STATUS_MESSAGE_REJECTED:
2233 				case SRB_STATUS_PARITY_ERROR:
2234 				case SRB_STATUS_NO_HBA:
2235 				case SRB_STATUS_DATA_OVERRUN:
2236 				case SRB_STATUS_UNEXPECTED_BUS_FREE:
2237 				case SRB_STATUS_PHASE_SEQUENCE_FAILURE:
2238 					/*
2239 					 * Old pSCSI responses, should never get.
2240 					 * If we do treat as a CAM_UNREC_HBA_ERROR
2241 					 * which will be retried
2242 					 */
2243 					ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
2244 					break;
2245 				case SRB_STATUS_BUS_RESET:
2246 					ccb->ccb_h.status |= CAM_SCSI_BUS_RESET;
2247 					break;
2248 				case SRB_STATUS_BAD_SRB_BLOCK_LENGTH:
2249 					/*
2250 					 * The request block is malformed and
2251 					 * I doubt it is from the guest. Just retry.
2252 					 */
2253 					ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
2254 					break;
2255 				/* Not used statuses just retry */
2256 				case SRB_STATUS_REQUEST_FLUSHED:
2257 				case SRB_STATUS_BAD_FUNCTION:
2258 				case SRB_STATUS_NOT_POWERED:
2259 					ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
2260 					break;
2261 				case SRB_STATUS_INVALID_LUN:
2262 					/*
2263 					 * Don't log an EMS for this response since
2264 					 * there is no device at this LUN. This is a
2265 					 * normal and expected response when a device
2266 					 * is detached.
2267 					 */
2268 					ccb->ccb_h.status |= CAM_DEV_NOT_THERE;
2269 					log_error = false;
2270 					break;
2271 				case SRB_STATUS_ERROR_RECOVERY:
2272 				case SRB_STATUS_LINK_DOWN:
2273 					/*
2274 					 * I don't ever expect these from
2275 					 * the host but if we ever get
2276 					 * retry after a delay
2277 					 */
2278 					ccb->ccb_h.status |= CAM_BUSY;
2279 					break;
2280 				default:
2281 					/*
2282 					 * An undefined response assert on
2283 					 * on debug builds else retry
2284 					 */
2285 					ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
2286 					KASSERT(srb_status <= SRB_STATUS_LINK_DOWN,
2287 					    ("storvsc: %s, unexpected srb_status of 0x%x",
2288 					    __func__, srb_status));
2289 					break;
2290 			}
2291 			if (log_error) {
2292 				xpt_print(ccb->ccb_h.path, "The hypervisor's I/O adapter "
2293 					"driver received an unexpected response code 0x%x "
2294 					"for operation: %s. If this continues to occur, "
2295 					"report the condition to your hypervisor vendor so "
2296 					"they can rectify the issue.\n", srb_status,
2297 					scsi_op_desc(cmd->opcode, NULL));
2298 			}
2299 		} else {
2300 			ccb->ccb_h.status |= CAM_REQ_CMP;
2301 		}
2302 
2303 		if (cmd->opcode == INQUIRY &&
2304 		    srb_status == SRB_STATUS_SUCCESS) {
2305 			int resp_xfer_len, resp_buf_len, data_len;
2306 			uint8_t *resp_buf = (uint8_t *)csio->data_ptr;
2307 			struct scsi_inquiry_data *inq_data =
2308 			    (struct scsi_inquiry_data *)csio->data_ptr;
2309 
2310 			/* Get the buffer length reported by host */
2311 			resp_xfer_len = vm_srb->transfer_len;
2312 
2313 			/* Get the available buffer length */
2314 			resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0;
2315 			data_len = (resp_buf_len < resp_xfer_len) ?
2316 			    resp_buf_len : resp_xfer_len;
2317 			if (bootverbose && data_len >= 5) {
2318 				xpt_print(ccb->ccb_h.path, "storvsc inquiry "
2319 				    "(%d) [%x %x %x %x %x ... ]\n", data_len,
2320 				    resp_buf[0], resp_buf[1], resp_buf[2],
2321 				    resp_buf[3], resp_buf[4]);
2322 			}
2323 			/*
2324 			 * XXX: Hyper-V (since win2012r2) responses inquiry with
2325 			 * unknown version (0) for GEN-2 DVD device.
2326 			 * Manually set the version number to SPC3 in order to
2327 			 * ask CAM to continue probing with "PROBE_REPORT_LUNS".
2328 			 * see probedone() in scsi_xpt.c
2329 			 */
2330 			if (SID_TYPE(inq_data) == T_CDROM &&
2331 			    inq_data->version == 0 &&
2332 			    (vmstor_proto_version >= VMSTOR_PROTOCOL_VERSION_WIN8)) {
2333 				inq_data->version = SCSI_REV_SPC3;
2334 				if (bootverbose) {
2335 					xpt_print(ccb->ccb_h.path,
2336 					    "set version from 0 to %d\n",
2337 					    inq_data->version);
2338 				}
2339 			}
2340 			/*
2341 			 * XXX: Manually fix the wrong response returned from WS2012
2342 			 */
2343 			if (!is_scsi_valid(inq_data) &&
2344 			    (vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
2345 			    vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8 ||
2346 			    vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN7)) {
2347 				if (data_len >= 4 &&
2348 				    (resp_buf[2] == 0 || resp_buf[3] == 0)) {
2349 					resp_buf[2] = SCSI_REV_SPC3;
2350 					resp_buf[3] = 2; // resp fmt must be 2
2351 					if (bootverbose)
2352 						xpt_print(ccb->ccb_h.path,
2353 						    "fix version and resp fmt for 0x%x\n",
2354 						    vmstor_proto_version);
2355 				}
2356 			} else if (data_len >= SHORT_INQUIRY_LENGTH) {
2357 				char vendor[16];
2358 
2359 				cam_strvis(vendor, inq_data->vendor,
2360 				    sizeof(inq_data->vendor), sizeof(vendor));
2361 				/*
2362 				 * XXX: Upgrade SPC2 to SPC3 if host is WIN8 or
2363 				 * WIN2012 R2 in order to support UNMAP feature.
2364 				 */
2365 				if (!strncmp(vendor, "Msft", 4) &&
2366 				    SID_ANSI_REV(inq_data) == SCSI_REV_SPC2 &&
2367 				    (vmstor_proto_version ==
2368 				     VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
2369 				     vmstor_proto_version ==
2370 				     VMSTOR_PROTOCOL_VERSION_WIN8)) {
2371 					inq_data->version = SCSI_REV_SPC3;
2372 					if (bootverbose) {
2373 						xpt_print(ccb->ccb_h.path,
2374 						    "storvsc upgrades "
2375 						    "SPC2 to SPC3\n");
2376 					}
2377 				}
2378 			}
2379 		}
2380 	} else {
2381 		/**
2382 		 * On Some Windows hosts TEST_UNIT_READY command can return
2383 		 * SRB_STATUS_ERROR and sense data, for example, asc=0x3a,1
2384 		 * "(Medium not present - tray closed)". This error can be
2385 		 * ignored since it will be sent to host periodically.
2386 		 */
2387 		boolean_t unit_not_ready = \
2388 		    vm_srb->scsi_status == SCSI_STATUS_CHECK_COND &&
2389 		    cmd->opcode == TEST_UNIT_READY &&
2390 		    srb_status == SRB_STATUS_ERROR;
2391 		if (!unit_not_ready && bootverbose) {
2392 			mtx_lock(&sc->hs_lock);
2393 			xpt_print(ccb->ccb_h.path,
2394 				"storvsc scsi_status = %d, srb_status = %d\n",
2395 				vm_srb->scsi_status, srb_status);
2396 			mtx_unlock(&sc->hs_lock);
2397 		}
2398 		ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR;
2399 	}
2400 
2401 	ccb->csio.scsi_status = (vm_srb->scsi_status & 0xFF);
2402 	if (srb_status == SRB_STATUS_SUCCESS ||
2403 	    srb_status == SRB_STATUS_DATA_OVERRUN)
2404 		ccb->csio.resid = ccb->csio.dxfer_len - vm_srb->transfer_len;
2405 	else
2406 		ccb->csio.resid = ccb->csio.dxfer_len;
2407 
2408 	if ((vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) != 0 &&
2409 	    reqp->sense_info_len != 0) {
2410 		csio->sense_resid = csio->sense_len - reqp->sense_info_len;
2411 		ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
2412 	}
2413 
2414 	mtx_lock(&sc->hs_lock);
2415 	if (reqp->softc->hs_frozen == 1) {
2416 		xpt_print(ccb->ccb_h.path,
2417 			"%u: storvsc unfreezing softc 0x%p.\n",
2418 			ticks, reqp->softc);
2419 		ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
2420 		reqp->softc->hs_frozen = 0;
2421 	}
2422 	storvsc_free_request(sc, reqp);
2423 	mtx_unlock(&sc->hs_lock);
2424 
2425 	xpt_done_direct(ccb);
2426 }
2427 
2428 /**
2429  * @brief Free a request structure
2430  *
2431  * Free a request structure by returning it to the free list
2432  *
2433  * @param sc pointer to a softc
2434  * @param reqp pointer to a request structure
2435  */
2436 static void
2437 storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp)
2438 {
2439 
2440 	LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
2441 }
2442 
2443 /**
2444  * @brief Determine type of storage device from GUID
2445  *
2446  * Using the type GUID, determine if this is a StorVSC (paravirtual
2447  * SCSI or BlkVSC (paravirtual IDE) device.
2448  *
2449  * @param dev a device
2450  * returns an enum
2451  */
2452 static enum hv_storage_type
2453 storvsc_get_storage_type(device_t dev)
2454 {
2455 	device_t parent = device_get_parent(dev);
2456 
2457 	if (VMBUS_PROBE_GUID(parent, dev, &gBlkVscDeviceType) == 0)
2458 		return DRIVER_BLKVSC;
2459 	if (VMBUS_PROBE_GUID(parent, dev, &gStorVscDeviceType) == 0)
2460 		return DRIVER_STORVSC;
2461 	return DRIVER_UNKNOWN;
2462 }
2463 
2464 #define	PCI_VENDOR_INTEL	0x8086
2465 #define	PCI_PRODUCT_PIIX4	0x7111
2466 
2467 static void
2468 storvsc_ada_probe_veto(void *arg __unused, struct cam_path *path,
2469     struct ata_params *ident_buf __unused, int *veto)
2470 {
2471 
2472 	/*
2473 	 * The ATA disks are shared with the controllers managed
2474 	 * by this driver, so veto the ATA disks' attachment; the
2475 	 * ATA disks will be attached as SCSI disks once this driver
2476 	 * attached.
2477 	 */
2478 	if (path->device->protocol == PROTO_ATA) {
2479 		struct ccb_pathinq cpi;
2480 
2481 		xpt_path_inq(&cpi, path);
2482 		if (cpi.ccb_h.status == CAM_REQ_CMP &&
2483 		    cpi.hba_vendor == PCI_VENDOR_INTEL &&
2484 		    cpi.hba_device == PCI_PRODUCT_PIIX4) {
2485 			(*veto)++;
2486 			if (bootverbose) {
2487 				xpt_print(path,
2488 				    "Disable ATA disks on "
2489 				    "simulated ATA controller (0x%04x%04x)\n",
2490 				    cpi.hba_device, cpi.hba_vendor);
2491 			}
2492 		}
2493 	}
2494 }
2495 
2496 static void
2497 storvsc_sysinit(void *arg __unused)
2498 {
2499 	if (vm_guest == VM_GUEST_HV) {
2500 		storvsc_handler_tag = EVENTHANDLER_REGISTER(ada_probe_veto,
2501 		    storvsc_ada_probe_veto, NULL, EVENTHANDLER_PRI_ANY);
2502 	}
2503 }
2504 SYSINIT(storvsc_sys_init, SI_SUB_DRIVERS, SI_ORDER_SECOND, storvsc_sysinit,
2505     NULL);
2506 
2507 static void
2508 storvsc_sysuninit(void *arg __unused)
2509 {
2510 	if (storvsc_handler_tag != NULL)
2511 		EVENTHANDLER_DEREGISTER(ada_probe_veto, storvsc_handler_tag);
2512 }
2513 SYSUNINIT(storvsc_sys_uninit, SI_SUB_DRIVERS, SI_ORDER_SECOND,
2514     storvsc_sysuninit, NULL);
2515