1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
5  * Copyright (c) 2012 NetApp Inc.
6  * Copyright (c) 2012 Citrix Inc.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice unmodified, this list of conditions, and the following
14  *    disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 /**
32  * StorVSC driver for Hyper-V.  This driver presents a SCSI HBA interface
33  * to the Comman Access Method (CAM) layer.  CAM control blocks (CCBs) are
34  * converted into VSCSI protocol messages which are delivered to the parent
35  * partition StorVSP driver over the Hyper-V VMBUS.
36  */
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include <sys/param.h>
41 #include <sys/proc.h>
42 #include <sys/condvar.h>
43 #include <sys/time.h>
44 #include <sys/systm.h>
45 #include <sys/sysctl.h>
46 #include <sys/sockio.h>
47 #include <sys/mbuf.h>
48 #include <sys/malloc.h>
49 #include <sys/module.h>
50 #include <sys/kernel.h>
51 #include <sys/queue.h>
52 #include <sys/lock.h>
53 #include <sys/sx.h>
54 #include <sys/taskqueue.h>
55 #include <sys/bus.h>
56 #include <sys/mutex.h>
57 #include <sys/callout.h>
58 #include <sys/smp.h>
59 #include <vm/vm.h>
60 #include <vm/pmap.h>
61 #include <vm/uma.h>
62 #include <sys/lock.h>
63 #include <sys/sema.h>
64 #include <sys/sglist.h>
65 #include <sys/eventhandler.h>
66 #include <machine/bus.h>
67 
68 #include <cam/cam.h>
69 #include <cam/cam_ccb.h>
70 #include <cam/cam_periph.h>
71 #include <cam/cam_sim.h>
72 #include <cam/cam_xpt_sim.h>
73 #include <cam/cam_xpt_internal.h>
74 #include <cam/cam_debug.h>
75 #include <cam/scsi/scsi_all.h>
76 #include <cam/scsi/scsi_message.h>
77 
78 #include <dev/hyperv/include/hyperv.h>
79 #include <dev/hyperv/include/vmbus.h>
80 #include "hv_vstorage.h"
81 #include "vmbus_if.h"
82 
83 #define STORVSC_MAX_LUNS_PER_TARGET	(64)
84 #define STORVSC_MAX_IO_REQUESTS		(STORVSC_MAX_LUNS_PER_TARGET * 2)
85 #define BLKVSC_MAX_IDE_DISKS_PER_TARGET	(1)
86 #define BLKVSC_MAX_IO_REQUESTS		STORVSC_MAX_IO_REQUESTS
87 #define STORVSC_MAX_TARGETS		(2)
88 
89 #define VSTOR_PKT_SIZE	(sizeof(struct vstor_packet) - vmscsi_size_delta)
90 
91 /*
92  * 33 segments are needed to allow 128KB maxio, in case the data
93  * in the first page is _not_ PAGE_SIZE aligned, e.g.
94  *
95  *     |<----------- 128KB ----------->|
96  *     |                               |
97  *  0  2K 4K    8K   16K   124K  128K  130K
98  *  |  |  |     |     |       |     |  |
99  *  +--+--+-----+-----+.......+-----+--+--+
100  *  |  |  |     |     |       |     |  |  | DATA
101  *  |  |  |     |     |       |     |  |  |
102  *  +--+--+-----+-----+.......------+--+--+
103  *     |  |                         |  |
104  *     | 1|            31           | 1| ...... # of segments
105  */
106 #define STORVSC_DATA_SEGCNT_MAX		33
107 #define STORVSC_DATA_SEGSZ_MAX		PAGE_SIZE
108 #define STORVSC_DATA_SIZE_MAX		\
109 	((STORVSC_DATA_SEGCNT_MAX - 1) * STORVSC_DATA_SEGSZ_MAX)
110 
111 struct storvsc_softc;
112 
113 struct hv_sgl_node {
114 	LIST_ENTRY(hv_sgl_node) link;
115 	struct sglist *sgl_data;
116 };
117 
118 struct hv_sgl_page_pool{
119 	LIST_HEAD(, hv_sgl_node) in_use_sgl_list;
120 	LIST_HEAD(, hv_sgl_node) free_sgl_list;
121 	boolean_t                is_init;
122 } g_hv_sgl_page_pool;
123 
124 enum storvsc_request_type {
125 	WRITE_TYPE,
126 	READ_TYPE,
127 	UNKNOWN_TYPE
128 };
129 
130 SYSCTL_NODE(_hw, OID_AUTO, storvsc, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
131 	"Hyper-V storage interface");
132 
133 static u_int hv_storvsc_use_win8ext_flags = 1;
134 SYSCTL_UINT(_hw_storvsc, OID_AUTO, use_win8ext_flags, CTLFLAG_RW,
135 	&hv_storvsc_use_win8ext_flags, 0,
136 	"Use win8 extension flags or not");
137 
138 static u_int hv_storvsc_use_pim_unmapped = 1;
139 SYSCTL_UINT(_hw_storvsc, OID_AUTO, use_pim_unmapped, CTLFLAG_RDTUN,
140 	&hv_storvsc_use_pim_unmapped, 0,
141 	"Optimize storvsc by using unmapped I/O");
142 
143 static u_int hv_storvsc_ringbuffer_size = (64 * PAGE_SIZE);
144 SYSCTL_UINT(_hw_storvsc, OID_AUTO, ringbuffer_size, CTLFLAG_RDTUN,
145 	&hv_storvsc_ringbuffer_size, 0, "Hyper-V storage ringbuffer size");
146 
147 static u_int hv_storvsc_max_io = 512;
148 SYSCTL_UINT(_hw_storvsc, OID_AUTO, max_io, CTLFLAG_RDTUN,
149 	&hv_storvsc_max_io, 0, "Hyper-V storage max io limit");
150 
151 static int hv_storvsc_chan_cnt = 0;
152 SYSCTL_INT(_hw_storvsc, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
153 	&hv_storvsc_chan_cnt, 0, "# of channels to use");
154 #ifdef DIAGNOSTIC
155 static int hv_storvsc_srb_status = -1;
156 SYSCTL_INT(_hw_storvsc, OID_AUTO, srb_status,  CTLFLAG_RW,
157 	&hv_storvsc_srb_status, 0, "srb_status to inject");
158 TUNABLE_INT("hw_storvsc.srb_status", &hv_storvsc_srb_status);
159 #endif /* DIAGNOSTIC */
160 
161 #define STORVSC_MAX_IO						\
162 	vmbus_chan_prplist_nelem(hv_storvsc_ringbuffer_size,	\
163 	   STORVSC_DATA_SEGCNT_MAX, VSTOR_PKT_SIZE)
164 
165 struct hv_storvsc_sysctl {
166 	u_long		data_bio_cnt;
167 	u_long		data_vaddr_cnt;
168 	u_long		data_sg_cnt;
169 	u_long		chan_send_cnt[MAXCPU];
170 };
171 
172 struct storvsc_gpa_range {
173 	struct vmbus_gpa_range	gpa_range;
174 	uint64_t		gpa_page[STORVSC_DATA_SEGCNT_MAX];
175 } __packed;
176 
177 struct hv_storvsc_request {
178 	LIST_ENTRY(hv_storvsc_request)	link;
179 	struct vstor_packet		vstor_packet;
180 	int				prp_cnt;
181 	struct storvsc_gpa_range	prp_list;
182 	void				*sense_data;
183 	uint8_t				sense_info_len;
184 	uint8_t				retries;
185 	union ccb			*ccb;
186 	struct storvsc_softc		*softc;
187 	struct callout			callout;
188 	struct sema			synch_sema; /*Synchronize the request/response if needed */
189 	struct sglist			*bounce_sgl;
190 	unsigned int			bounce_sgl_count;
191 	uint64_t			not_aligned_seg_bits;
192 	bus_dmamap_t			data_dmap;
193 };
194 
195 struct storvsc_softc {
196 	struct vmbus_channel		*hs_chan;
197 	LIST_HEAD(, hv_storvsc_request)	hs_free_list;
198 	struct mtx			hs_lock;
199 	struct storvsc_driver_props	*hs_drv_props;
200 	int 				hs_unit;
201 	uint32_t			hs_frozen;
202 	struct cam_sim			*hs_sim;
203 	struct cam_path 		*hs_path;
204 	uint32_t			hs_num_out_reqs;
205 	boolean_t			hs_destroy;
206 	boolean_t			hs_drain_notify;
207 	struct sema 			hs_drain_sema;
208 	struct hv_storvsc_request	hs_init_req;
209 	struct hv_storvsc_request	hs_reset_req;
210 	device_t			hs_dev;
211 	bus_dma_tag_t			storvsc_req_dtag;
212 	struct hv_storvsc_sysctl	sysctl_data;
213 	uint32_t			hs_nchan;
214 	struct vmbus_channel		*hs_sel_chan[MAXCPU];
215 };
216 
217 static eventhandler_tag storvsc_handler_tag;
218 /*
219  * The size of the vmscsi_request has changed in win8. The
220  * additional size is for the newly added elements in the
221  * structure. These elements are valid only when we are talking
222  * to a win8 host.
223  * Track the correct size we need to apply.
224  */
225 static int vmscsi_size_delta = sizeof(struct vmscsi_win8_extension);
226 
227 /**
228  * HyperV storvsc timeout testing cases:
229  * a. IO returned after first timeout;
230  * b. IO returned after second timeout and queue freeze;
231  * c. IO returned while timer handler is running
232  * The first can be tested by "sg_senddiag -vv /dev/daX",
233  * and the second and third can be done by
234  * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX".
235  */
236 #define HVS_TIMEOUT_TEST 0
237 
238 /*
239  * Bus/adapter reset functionality on the Hyper-V host is
240  * buggy and it will be disabled until
241  * it can be further tested.
242  */
243 #define HVS_HOST_RESET 0
244 
245 struct storvsc_driver_props {
246 	char		*drv_name;
247 	char		*drv_desc;
248 	uint8_t		drv_max_luns_per_target;
249 	uint32_t	drv_max_ios_per_target;
250 	uint32_t	drv_ringbuffer_size;
251 };
252 
253 enum hv_storage_type {
254 	DRIVER_BLKVSC,
255 	DRIVER_STORVSC,
256 	DRIVER_UNKNOWN
257 };
258 
259 #define HS_MAX_ADAPTERS 10
260 
261 #define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1
262 
263 /* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */
264 static const struct hyperv_guid gStorVscDeviceType={
265 	.hv_guid = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
266 		 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f}
267 };
268 
269 /* {32412632-86cb-44a2-9b5c-50d1417354f5} */
270 static const struct hyperv_guid gBlkVscDeviceType={
271 	.hv_guid = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,
272 		 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5}
273 };
274 
275 static struct storvsc_driver_props g_drv_props_table[] = {
276 	{"blkvsc", "Hyper-V IDE",
277 	 BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS,
278 	 20*PAGE_SIZE},
279 	{"storvsc", "Hyper-V SCSI",
280 	 STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS,
281 	 20*PAGE_SIZE}
282 };
283 
284 /*
285  * Sense buffer size changed in win8; have a run-time
286  * variable to track the size we should use.
287  */
288 static int sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE;
289 
290 /*
291  * The storage protocol version is determined during the
292  * initial exchange with the host.  It will indicate which
293  * storage functionality is available in the host.
294 */
295 static int vmstor_proto_version;
296 
297 struct vmstor_proto {
298         int proto_version;
299         int sense_buffer_size;
300         int vmscsi_size_delta;
301 };
302 
303 static const struct vmstor_proto vmstor_proto_list[] = {
304         {
305                 VMSTOR_PROTOCOL_VERSION_WIN10,
306                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
307                 0
308         },
309         {
310                 VMSTOR_PROTOCOL_VERSION_WIN8_1,
311                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
312                 0
313         },
314         {
315                 VMSTOR_PROTOCOL_VERSION_WIN8,
316                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
317                 0
318         },
319         {
320                 VMSTOR_PROTOCOL_VERSION_WIN7,
321                 PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
322                 sizeof(struct vmscsi_win8_extension),
323         },
324         {
325                 VMSTOR_PROTOCOL_VERSION_WIN6,
326                 PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
327                 sizeof(struct vmscsi_win8_extension),
328         }
329 };
330 
331 /* static functions */
332 static int storvsc_probe(device_t dev);
333 static int storvsc_attach(device_t dev);
334 static int storvsc_detach(device_t dev);
335 static void storvsc_poll(struct cam_sim * sim);
336 static void storvsc_action(struct cam_sim * sim, union ccb * ccb);
337 static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
338 static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp);
339 static enum hv_storage_type storvsc_get_storage_type(device_t dev);
340 static void hv_storvsc_rescan_target(struct storvsc_softc *sc);
341 static void hv_storvsc_on_channel_callback(struct vmbus_channel *chan, void *xsc);
342 static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc,
343 					struct vstor_packet *vstor_packet,
344 					struct hv_storvsc_request *request);
345 static int hv_storvsc_connect_vsp(struct storvsc_softc *);
346 static void storvsc_io_done(struct hv_storvsc_request *reqp);
347 static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
348 				bus_dma_segment_t *orig_sgl,
349 				unsigned int orig_sgl_count,
350 				uint64_t seg_bits);
351 void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
352 				unsigned int dest_sgl_count,
353 				struct sglist* src_sgl,
354 				uint64_t seg_bits);
355 
356 static device_method_t storvsc_methods[] = {
357 	/* Device interface */
358 	DEVMETHOD(device_probe,		storvsc_probe),
359 	DEVMETHOD(device_attach,	storvsc_attach),
360 	DEVMETHOD(device_detach,	storvsc_detach),
361 	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
362 	DEVMETHOD_END
363 };
364 
365 static driver_t storvsc_driver = {
366 	"storvsc", storvsc_methods, sizeof(struct storvsc_softc),
367 };
368 
369 static devclass_t storvsc_devclass;
370 DRIVER_MODULE(storvsc, vmbus, storvsc_driver, storvsc_devclass, 0, 0);
371 MODULE_VERSION(storvsc, 1);
372 MODULE_DEPEND(storvsc, vmbus, 1, 1, 1);
373 
374 static void
375 storvsc_subchan_attach(struct storvsc_softc *sc,
376     struct vmbus_channel *new_channel)
377 {
378 	struct vmstor_chan_props props;
379 	int ret = 0;
380 
381 	memset(&props, 0, sizeof(props));
382 
383 	vmbus_chan_cpu_rr(new_channel);
384 	ret = vmbus_chan_open(new_channel,
385 	    sc->hs_drv_props->drv_ringbuffer_size,
386   	    sc->hs_drv_props->drv_ringbuffer_size,
387 	    (void *)&props,
388 	    sizeof(struct vmstor_chan_props),
389 	    hv_storvsc_on_channel_callback, sc);
390 }
391 
392 /**
393  * @brief Send multi-channel creation request to host
394  *
395  * @param device  a Hyper-V device pointer
396  * @param max_chans  the max channels supported by vmbus
397  */
398 static void
399 storvsc_send_multichannel_request(struct storvsc_softc *sc, int max_subch)
400 {
401 	struct vmbus_channel **subchan;
402 	struct hv_storvsc_request *request;
403 	struct vstor_packet *vstor_packet;
404 	int request_subch;
405 	int ret, i;
406 
407 	/* get sub-channel count that need to create */
408 	request_subch = MIN(max_subch, mp_ncpus - 1);
409 
410 	request = &sc->hs_init_req;
411 
412 	/* request the host to create multi-channel */
413 	memset(request, 0, sizeof(struct hv_storvsc_request));
414 
415 	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
416 
417 	vstor_packet = &request->vstor_packet;
418 
419 	vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS;
420 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
421 	vstor_packet->u.multi_channels_cnt = request_subch;
422 
423 	ret = vmbus_chan_send(sc->hs_chan,
424 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
425 	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
426 
427 	sema_wait(&request->synch_sema);
428 
429 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
430 	    vstor_packet->status != 0) {
431 		printf("Storvsc_error: create multi-channel invalid operation "
432 		    "(%d) or statue (%u)\n",
433 		    vstor_packet->operation, vstor_packet->status);
434 		return;
435 	}
436 
437 	/* Update channel count */
438 	sc->hs_nchan = request_subch + 1;
439 
440 	/* Wait for sub-channels setup to complete. */
441 	subchan = vmbus_subchan_get(sc->hs_chan, request_subch);
442 
443 	/* Attach the sub-channels. */
444 	for (i = 0; i < request_subch; ++i)
445 		storvsc_subchan_attach(sc, subchan[i]);
446 
447 	/* Release the sub-channels. */
448 	vmbus_subchan_rel(subchan, request_subch);
449 
450 	if (bootverbose)
451 		printf("Storvsc create multi-channel success!\n");
452 }
453 
454 /**
455  * @brief initialize channel connection to parent partition
456  *
457  * @param dev  a Hyper-V device pointer
458  * @returns  0 on success, non-zero error on failure
459  */
460 static int
461 hv_storvsc_channel_init(struct storvsc_softc *sc)
462 {
463 	int ret = 0, i;
464 	struct hv_storvsc_request *request;
465 	struct vstor_packet *vstor_packet;
466 	uint16_t max_subch;
467 	boolean_t support_multichannel;
468 	uint32_t version;
469 
470 	max_subch = 0;
471 	support_multichannel = FALSE;
472 
473 	request = &sc->hs_init_req;
474 	memset(request, 0, sizeof(struct hv_storvsc_request));
475 	vstor_packet = &request->vstor_packet;
476 	request->softc = sc;
477 
478 	/**
479 	 * Initiate the vsc/vsp initialization protocol on the open channel
480 	 */
481 	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
482 
483 	vstor_packet->operation = VSTOR_OPERATION_BEGININITIALIZATION;
484 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
485 
486 
487 	ret = vmbus_chan_send(sc->hs_chan,
488 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
489 	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
490 
491 	if (ret != 0)
492 		goto cleanup;
493 
494 	sema_wait(&request->synch_sema);
495 
496 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
497 		vstor_packet->status != 0) {
498 		goto cleanup;
499 	}
500 
501 	for (i = 0; i < nitems(vmstor_proto_list); i++) {
502 		/* reuse the packet for version range supported */
503 
504 		memset(vstor_packet, 0, sizeof(struct vstor_packet));
505 		vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION;
506 		vstor_packet->flags = REQUEST_COMPLETION_FLAG;
507 
508 		vstor_packet->u.version.major_minor =
509 			vmstor_proto_list[i].proto_version;
510 
511 		/* revision is only significant for Windows guests */
512 		vstor_packet->u.version.revision = 0;
513 
514 		ret = vmbus_chan_send(sc->hs_chan,
515 		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
516 		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
517 
518 		if (ret != 0)
519 			goto cleanup;
520 
521 		sema_wait(&request->synch_sema);
522 
523 		if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO) {
524 			ret = EINVAL;
525 			goto cleanup;
526 		}
527 		if (vstor_packet->status == 0) {
528 			vmstor_proto_version =
529 				vmstor_proto_list[i].proto_version;
530 			sense_buffer_size =
531 				vmstor_proto_list[i].sense_buffer_size;
532 			vmscsi_size_delta =
533 				vmstor_proto_list[i].vmscsi_size_delta;
534 			break;
535 		}
536 	}
537 
538 	if (vstor_packet->status != 0) {
539 		ret = EINVAL;
540 		goto cleanup;
541 	}
542 	/**
543 	 * Query channel properties
544 	 */
545 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
546 	vstor_packet->operation = VSTOR_OPERATION_QUERYPROPERTIES;
547 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
548 
549 	ret = vmbus_chan_send(sc->hs_chan,
550 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
551 	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
552 
553 	if ( ret != 0)
554 		goto cleanup;
555 
556 	sema_wait(&request->synch_sema);
557 
558 	/* TODO: Check returned version */
559 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
560 	    vstor_packet->status != 0) {
561 		goto cleanup;
562 	}
563 
564 	max_subch = vstor_packet->u.chan_props.max_channel_cnt;
565 	if (hv_storvsc_chan_cnt > 0 && hv_storvsc_chan_cnt < (max_subch + 1))
566 		max_subch = hv_storvsc_chan_cnt - 1;
567 
568 	/* multi-channels feature is supported by WIN8 and above version */
569 	version = VMBUS_GET_VERSION(device_get_parent(sc->hs_dev), sc->hs_dev);
570 	if (version != VMBUS_VERSION_WIN7 && version != VMBUS_VERSION_WS2008 &&
571 	    (vstor_packet->u.chan_props.flags &
572 	     HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) {
573 		support_multichannel = TRUE;
574 	}
575 	if (bootverbose) {
576 		device_printf(sc->hs_dev, "max chans %d%s\n", max_subch + 1,
577 		    support_multichannel ? ", multi-chan capable" : "");
578 	}
579 
580 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
581 	vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION;
582 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
583 
584 	ret = vmbus_chan_send(sc->hs_chan,
585 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
586 	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
587 
588 	if (ret != 0) {
589 		goto cleanup;
590 	}
591 
592 	sema_wait(&request->synch_sema);
593 
594 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
595 	    vstor_packet->status != 0)
596 		goto cleanup;
597 
598 	/*
599 	 * If multi-channel is supported, send multichannel create
600 	 * request to host.
601 	 */
602 	if (support_multichannel && max_subch > 0)
603 		storvsc_send_multichannel_request(sc, max_subch);
604 cleanup:
605 	sema_destroy(&request->synch_sema);
606 	return (ret);
607 }
608 
609 /**
610  * @brief Open channel connection to paraent partition StorVSP driver
611  *
612  * Open and initialize channel connection to parent partition StorVSP driver.
613  *
614  * @param pointer to a Hyper-V device
615  * @returns 0 on success, non-zero error on failure
616  */
617 static int
618 hv_storvsc_connect_vsp(struct storvsc_softc *sc)
619 {
620 	int ret = 0;
621 	struct vmstor_chan_props props;
622 
623 	memset(&props, 0, sizeof(struct vmstor_chan_props));
624 
625 	/*
626 	 * Open the channel
627 	 */
628 	vmbus_chan_cpu_rr(sc->hs_chan);
629 	ret = vmbus_chan_open(
630 		sc->hs_chan,
631 		sc->hs_drv_props->drv_ringbuffer_size,
632 		sc->hs_drv_props->drv_ringbuffer_size,
633 		(void *)&props,
634 		sizeof(struct vmstor_chan_props),
635 		hv_storvsc_on_channel_callback, sc);
636 
637 	if (ret != 0) {
638 		return ret;
639 	}
640 
641 	ret = hv_storvsc_channel_init(sc);
642 	return (ret);
643 }
644 
645 #if HVS_HOST_RESET
646 static int
647 hv_storvsc_host_reset(struct storvsc_softc *sc)
648 {
649 	int ret = 0;
650 
651 	struct hv_storvsc_request *request;
652 	struct vstor_packet *vstor_packet;
653 
654 	request = &sc->hs_reset_req;
655 	request->softc = sc;
656 	vstor_packet = &request->vstor_packet;
657 
658 	sema_init(&request->synch_sema, 0, "stor synch sema");
659 
660 	vstor_packet->operation = VSTOR_OPERATION_RESETBUS;
661 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
662 
663 	ret = vmbus_chan_send(dev->channel,
664 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
665 	    vstor_packet, VSTOR_PKT_SIZE,
666 	    (uint64_t)(uintptr_t)&sc->hs_reset_req);
667 
668 	if (ret != 0) {
669 		goto cleanup;
670 	}
671 
672 	sema_wait(&request->synch_sema);
673 
674 	/*
675 	 * At this point, all outstanding requests in the adapter
676 	 * should have been flushed out and return to us
677 	 */
678 
679 cleanup:
680 	sema_destroy(&request->synch_sema);
681 	return (ret);
682 }
683 #endif /* HVS_HOST_RESET */
684 
685 /**
686  * @brief Function to initiate an I/O request
687  *
688  * @param device Hyper-V device pointer
689  * @param request pointer to a request structure
690  * @returns 0 on success, non-zero error on failure
691  */
692 static int
693 hv_storvsc_io_request(struct storvsc_softc *sc,
694 					  struct hv_storvsc_request *request)
695 {
696 	struct vstor_packet *vstor_packet = &request->vstor_packet;
697 	struct vmbus_channel* outgoing_channel = NULL;
698 	int ret = 0, ch_sel;
699 
700 	vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
701 
702 	vstor_packet->u.vm_srb.length =
703 	    sizeof(struct vmscsi_req) - vmscsi_size_delta;
704 
705 	vstor_packet->u.vm_srb.sense_info_len = sense_buffer_size;
706 
707 	vstor_packet->u.vm_srb.transfer_len =
708 	    request->prp_list.gpa_range.gpa_len;
709 
710 	vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB;
711 
712 	ch_sel = (vstor_packet->u.vm_srb.lun + curcpu) % sc->hs_nchan;
713 	/*
714 	 * If we are panic'ing, then we are dumping core. Since storvsc_polls
715 	 * always uses sc->hs_chan, then we must send to that channel or a poll
716 	 * timeout will occur.
717 	 */
718 	if (panicstr) {
719 		outgoing_channel = sc->hs_chan;
720 	} else {
721 		outgoing_channel = sc->hs_sel_chan[ch_sel];
722 	}
723 
724 	mtx_unlock(&request->softc->hs_lock);
725 	if (request->prp_list.gpa_range.gpa_len) {
726 		ret = vmbus_chan_send_prplist(outgoing_channel,
727 		    &request->prp_list.gpa_range, request->prp_cnt,
728 		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
729 	} else {
730 		ret = vmbus_chan_send(outgoing_channel,
731 		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
732 		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
733 	}
734 	/* statistic for successful request sending on each channel */
735 	if (!ret) {
736 		sc->sysctl_data.chan_send_cnt[ch_sel]++;
737 	}
738 	mtx_lock(&request->softc->hs_lock);
739 
740 	if (ret != 0) {
741 		printf("Unable to send packet %p ret %d", vstor_packet, ret);
742 	} else {
743 		atomic_add_int(&sc->hs_num_out_reqs, 1);
744 	}
745 
746 	return (ret);
747 }
748 
749 
750 /**
751  * Process IO_COMPLETION_OPERATION and ready
752  * the result to be completed for upper layer
753  * processing by the CAM layer.
754  */
755 static void
756 hv_storvsc_on_iocompletion(struct storvsc_softc *sc,
757 			   struct vstor_packet *vstor_packet,
758 			   struct hv_storvsc_request *request)
759 {
760 	struct vmscsi_req *vm_srb;
761 
762 	vm_srb = &vstor_packet->u.vm_srb;
763 
764 	/*
765 	 * Copy some fields of the host's response into the request structure,
766 	 * because the fields will be used later in storvsc_io_done().
767 	 */
768 	request->vstor_packet.u.vm_srb.scsi_status = vm_srb->scsi_status;
769 	request->vstor_packet.u.vm_srb.srb_status = vm_srb->srb_status;
770 	request->vstor_packet.u.vm_srb.transfer_len = vm_srb->transfer_len;
771 
772 	if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) &&
773 			(vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) {
774 		/* Autosense data available */
775 
776 		KASSERT(vm_srb->sense_info_len <= request->sense_info_len,
777 				("vm_srb->sense_info_len <= "
778 				 "request->sense_info_len"));
779 
780 		memcpy(request->sense_data, vm_srb->u.sense_data,
781 			vm_srb->sense_info_len);
782 
783 		request->sense_info_len = vm_srb->sense_info_len;
784 	}
785 
786 	/* Complete request by passing to the CAM layer */
787 	storvsc_io_done(request);
788 	atomic_subtract_int(&sc->hs_num_out_reqs, 1);
789 	if (sc->hs_drain_notify && (sc->hs_num_out_reqs == 0)) {
790 		sema_post(&sc->hs_drain_sema);
791 	}
792 }
793 
794 static void
795 hv_storvsc_rescan_target(struct storvsc_softc *sc)
796 {
797 	path_id_t pathid;
798 	target_id_t targetid;
799 	union ccb *ccb;
800 
801 	pathid = cam_sim_path(sc->hs_sim);
802 	targetid = CAM_TARGET_WILDCARD;
803 
804 	/*
805 	 * Allocate a CCB and schedule a rescan.
806 	 */
807 	ccb = xpt_alloc_ccb_nowait();
808 	if (ccb == NULL) {
809 		printf("unable to alloc CCB for rescan\n");
810 		return;
811 	}
812 
813 	if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, targetid,
814 	    CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
815 		printf("unable to create path for rescan, pathid: %u,"
816 		    "targetid: %u\n", pathid, targetid);
817 		xpt_free_ccb(ccb);
818 		return;
819 	}
820 
821 	if (targetid == CAM_TARGET_WILDCARD)
822 		ccb->ccb_h.func_code = XPT_SCAN_BUS;
823 	else
824 		ccb->ccb_h.func_code = XPT_SCAN_TGT;
825 
826 	xpt_rescan(ccb);
827 }
828 
829 static void
830 hv_storvsc_on_channel_callback(struct vmbus_channel *channel, void *xsc)
831 {
832 	int ret = 0;
833 	struct storvsc_softc *sc = xsc;
834 	uint32_t bytes_recvd;
835 	uint64_t request_id;
836 	uint8_t packet[roundup2(sizeof(struct vstor_packet), 8)];
837 	struct hv_storvsc_request *request;
838 	struct vstor_packet *vstor_packet;
839 
840 	bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8);
841 	ret = vmbus_chan_recv(channel, packet, &bytes_recvd, &request_id);
842 	KASSERT(ret != ENOBUFS, ("storvsc recvbuf is not large enough"));
843 	/* XXX check bytes_recvd to make sure that it contains enough data */
844 
845 	while ((ret == 0) && (bytes_recvd > 0)) {
846 		request = (struct hv_storvsc_request *)(uintptr_t)request_id;
847 
848 		if ((request == &sc->hs_init_req) ||
849 			(request == &sc->hs_reset_req)) {
850 			memcpy(&request->vstor_packet, packet,
851 				   sizeof(struct vstor_packet));
852 			sema_post(&request->synch_sema);
853 		} else {
854 			vstor_packet = (struct vstor_packet *)packet;
855 			switch(vstor_packet->operation) {
856 			case VSTOR_OPERATION_COMPLETEIO:
857 				if (request == NULL)
858 					panic("VMBUS: storvsc received a "
859 					    "packet with NULL request id in "
860 					    "COMPLETEIO operation.");
861 
862 				hv_storvsc_on_iocompletion(sc,
863 							vstor_packet, request);
864 				break;
865 			case VSTOR_OPERATION_REMOVEDEVICE:
866 				printf("VMBUS: storvsc operation %d not "
867 				    "implemented.\n", vstor_packet->operation);
868 				/* TODO: implement */
869 				break;
870 			case VSTOR_OPERATION_ENUMERATE_BUS:
871 				hv_storvsc_rescan_target(sc);
872 				break;
873 			default:
874 				break;
875 			}
876 		}
877 
878 		bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8),
879 		ret = vmbus_chan_recv(channel, packet, &bytes_recvd,
880 		    &request_id);
881 		KASSERT(ret != ENOBUFS,
882 		    ("storvsc recvbuf is not large enough"));
883 		/*
884 		 * XXX check bytes_recvd to make sure that it contains
885 		 * enough data
886 		 */
887 	}
888 }
889 
890 /**
891  * @brief StorVSC probe function
892  *
893  * Device probe function.  Returns 0 if the input device is a StorVSC
894  * device.  Otherwise, a ENXIO is returned.  If the input device is
895  * for BlkVSC (paravirtual IDE) device and this support is disabled in
896  * favor of the emulated ATA/IDE device, return ENXIO.
897  *
898  * @param a device
899  * @returns 0 on success, ENXIO if not a matcing StorVSC device
900  */
901 static int
902 storvsc_probe(device_t dev)
903 {
904 	int ret	= ENXIO;
905 
906 	switch (storvsc_get_storage_type(dev)) {
907 	case DRIVER_BLKVSC:
908 		if(bootverbose)
909 			device_printf(dev,
910 			    "Enlightened ATA/IDE detected\n");
911 		device_set_desc(dev, g_drv_props_table[DRIVER_BLKVSC].drv_desc);
912 		ret = BUS_PROBE_DEFAULT;
913 		break;
914 	case DRIVER_STORVSC:
915 		if(bootverbose)
916 			device_printf(dev, "Enlightened SCSI device detected\n");
917 		device_set_desc(dev, g_drv_props_table[DRIVER_STORVSC].drv_desc);
918 		ret = BUS_PROBE_DEFAULT;
919 		break;
920 	default:
921 		ret = ENXIO;
922 	}
923 	return (ret);
924 }
925 
926 static void
927 storvsc_create_chan_sel(struct storvsc_softc *sc)
928 {
929 	struct vmbus_channel **subch;
930 	int i, nsubch;
931 
932 	sc->hs_sel_chan[0] = sc->hs_chan;
933 	nsubch = sc->hs_nchan - 1;
934 	if (nsubch == 0)
935 		return;
936 
937 	subch = vmbus_subchan_get(sc->hs_chan, nsubch);
938 	for (i = 0; i < nsubch; i++)
939 		sc->hs_sel_chan[i + 1] = subch[i];
940 	vmbus_subchan_rel(subch, nsubch);
941 }
942 
943 static int
944 storvsc_init_requests(device_t dev)
945 {
946 	struct storvsc_softc *sc = device_get_softc(dev);
947 	struct hv_storvsc_request *reqp;
948 	int error, i;
949 
950 	LIST_INIT(&sc->hs_free_list);
951 
952 	error = bus_dma_tag_create(
953 		bus_get_dma_tag(dev),		/* parent */
954 		1,				/* alignment */
955 		PAGE_SIZE,			/* boundary */
956 		BUS_SPACE_MAXADDR,		/* lowaddr */
957 		BUS_SPACE_MAXADDR,		/* highaddr */
958 		NULL, NULL,			/* filter, filterarg */
959 		STORVSC_DATA_SIZE_MAX,		/* maxsize */
960 		STORVSC_DATA_SEGCNT_MAX,	/* nsegments */
961 		STORVSC_DATA_SEGSZ_MAX,		/* maxsegsize */
962 		0,				/* flags */
963 		NULL,				/* lockfunc */
964 		NULL,				/* lockfuncarg */
965 		&sc->storvsc_req_dtag);
966 	if (error) {
967 		device_printf(dev, "failed to create storvsc dma tag\n");
968 		return (error);
969 	}
970 
971 	for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; ++i) {
972 		reqp = malloc(sizeof(struct hv_storvsc_request),
973 				 M_DEVBUF, M_WAITOK|M_ZERO);
974 		reqp->softc = sc;
975 		error = bus_dmamap_create(sc->storvsc_req_dtag, 0,
976 				&reqp->data_dmap);
977 		if (error) {
978 			device_printf(dev, "failed to allocate storvsc "
979 			    "data dmamap\n");
980 			goto cleanup;
981 		}
982 		LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
983 	}
984 	return (0);
985 
986 cleanup:
987 	while ((reqp = LIST_FIRST(&sc->hs_free_list)) != NULL) {
988 		LIST_REMOVE(reqp, link);
989 		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
990 		free(reqp, M_DEVBUF);
991 	}
992 	return (error);
993 }
994 
995 static void
996 storvsc_sysctl(device_t dev)
997 {
998 	struct sysctl_oid_list *child;
999 	struct sysctl_ctx_list *ctx;
1000 	struct sysctl_oid *ch_tree, *chid_tree;
1001 	struct storvsc_softc *sc;
1002 	char name[16];
1003 	int i;
1004 
1005 	sc = device_get_softc(dev);
1006 	ctx = device_get_sysctl_ctx(dev);
1007 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
1008 
1009 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_bio_cnt",
1010 		CTLFLAG_RW | CTLFLAG_STATS, &sc->sysctl_data.data_bio_cnt,
1011 		"# of bio data block");
1012 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_vaddr_cnt",
1013 		CTLFLAG_RW | CTLFLAG_STATS, &sc->sysctl_data.data_vaddr_cnt,
1014 		"# of vaddr data block");
1015 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_sg_cnt",
1016 		CTLFLAG_RW | CTLFLAG_STATS, &sc->sysctl_data.data_sg_cnt,
1017 		"# of sg data block");
1018 
1019 	/* dev.storvsc.UNIT.channel */
1020 	ch_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "channel",
1021 		CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1022 	if (ch_tree == NULL)
1023 		return;
1024 
1025 	for (i = 0; i < sc->hs_nchan; i++) {
1026 		uint32_t ch_id;
1027 
1028 		ch_id = vmbus_chan_id(sc->hs_sel_chan[i]);
1029 		snprintf(name, sizeof(name), "%d", ch_id);
1030 		/* dev.storvsc.UNIT.channel.CHID */
1031 		chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree),
1032 			OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1033 		if (chid_tree == NULL)
1034 			return;
1035 		/* dev.storvsc.UNIT.channel.CHID.send_req */
1036 		SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
1037 			"send_req", CTLFLAG_RD, &sc->sysctl_data.chan_send_cnt[i],
1038 			"# of request sending from this channel");
1039 	}
1040 }
1041 
1042 /**
1043  * @brief StorVSC attach function
1044  *
1045  * Function responsible for allocating per-device structures,
1046  * setting up CAM interfaces and scanning for available LUNs to
1047  * be used for SCSI device peripherals.
1048  *
1049  * @param a device
1050  * @returns 0 on success or an error on failure
1051  */
1052 static int
1053 storvsc_attach(device_t dev)
1054 {
1055 	enum hv_storage_type stor_type;
1056 	struct storvsc_softc *sc;
1057 	struct cam_devq *devq;
1058 	int ret, i, j;
1059 	struct hv_storvsc_request *reqp;
1060 	struct root_hold_token *root_mount_token = NULL;
1061 	struct hv_sgl_node *sgl_node = NULL;
1062 	void *tmp_buff = NULL;
1063 
1064 	/*
1065 	 * We need to serialize storvsc attach calls.
1066 	 */
1067 	root_mount_token = root_mount_hold("storvsc");
1068 
1069 	sc = device_get_softc(dev);
1070 	sc->hs_nchan = 1;
1071 	sc->hs_chan = vmbus_get_channel(dev);
1072 
1073 	stor_type = storvsc_get_storage_type(dev);
1074 
1075 	if (stor_type == DRIVER_UNKNOWN) {
1076 		ret = ENODEV;
1077 		goto cleanup;
1078 	}
1079 
1080 	/* fill in driver specific properties */
1081 	sc->hs_drv_props = &g_drv_props_table[stor_type];
1082 	sc->hs_drv_props->drv_ringbuffer_size = hv_storvsc_ringbuffer_size;
1083 	sc->hs_drv_props->drv_max_ios_per_target =
1084 		MIN(STORVSC_MAX_IO, hv_storvsc_max_io);
1085 	if (bootverbose) {
1086 		printf("storvsc ringbuffer size: %d, max_io: %d\n",
1087 			sc->hs_drv_props->drv_ringbuffer_size,
1088 			sc->hs_drv_props->drv_max_ios_per_target);
1089 	}
1090 	/* fill in device specific properties */
1091 	sc->hs_unit	= device_get_unit(dev);
1092 	sc->hs_dev	= dev;
1093 
1094 	mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF);
1095 
1096 	ret = storvsc_init_requests(dev);
1097 	if (ret != 0)
1098 		goto cleanup;
1099 
1100 	/* create sg-list page pool */
1101 	if (FALSE == g_hv_sgl_page_pool.is_init) {
1102 		g_hv_sgl_page_pool.is_init = TRUE;
1103 		LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list);
1104 		LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list);
1105 
1106 		/*
1107 		 * Pre-create SG list, each SG list with
1108 		 * STORVSC_DATA_SEGCNT_MAX segments, each
1109 		 * segment has one page buffer
1110 		 */
1111 		for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; i++) {
1112 	        	sgl_node = malloc(sizeof(struct hv_sgl_node),
1113 			    M_DEVBUF, M_WAITOK|M_ZERO);
1114 
1115 			sgl_node->sgl_data =
1116 			    sglist_alloc(STORVSC_DATA_SEGCNT_MAX,
1117 			    M_WAITOK|M_ZERO);
1118 
1119 			for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) {
1120 				tmp_buff = malloc(PAGE_SIZE,
1121 				    M_DEVBUF, M_WAITOK|M_ZERO);
1122 
1123 				sgl_node->sgl_data->sg_segs[j].ss_paddr =
1124 				    (vm_paddr_t)tmp_buff;
1125 			}
1126 
1127 			LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list,
1128 			    sgl_node, link);
1129 		}
1130 	}
1131 
1132 	sc->hs_destroy = FALSE;
1133 	sc->hs_drain_notify = FALSE;
1134 	sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema");
1135 
1136 	ret = hv_storvsc_connect_vsp(sc);
1137 	if (ret != 0) {
1138 		goto cleanup;
1139 	}
1140 
1141 	/* Construct cpu to channel mapping */
1142 	storvsc_create_chan_sel(sc);
1143 
1144 	/*
1145 	 * Create the device queue.
1146 	 * Hyper-V maps each target to one SCSI HBA
1147 	 */
1148 	devq = cam_simq_alloc(sc->hs_drv_props->drv_max_ios_per_target);
1149 	if (devq == NULL) {
1150 		device_printf(dev, "Failed to alloc device queue\n");
1151 		ret = ENOMEM;
1152 		goto cleanup;
1153 	}
1154 
1155 	sc->hs_sim = cam_sim_alloc(storvsc_action,
1156 				storvsc_poll,
1157 				sc->hs_drv_props->drv_name,
1158 				sc,
1159 				sc->hs_unit,
1160 				&sc->hs_lock, 1,
1161 				sc->hs_drv_props->drv_max_ios_per_target,
1162 				devq);
1163 
1164 	if (sc->hs_sim == NULL) {
1165 		device_printf(dev, "Failed to alloc sim\n");
1166 		cam_simq_free(devq);
1167 		ret = ENOMEM;
1168 		goto cleanup;
1169 	}
1170 
1171 	mtx_lock(&sc->hs_lock);
1172 	/* bus_id is set to 0, need to get it from VMBUS channel query? */
1173 	if (xpt_bus_register(sc->hs_sim, dev, 0) != CAM_SUCCESS) {
1174 		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
1175 		mtx_unlock(&sc->hs_lock);
1176 		device_printf(dev, "Unable to register SCSI bus\n");
1177 		ret = ENXIO;
1178 		goto cleanup;
1179 	}
1180 
1181 	if (xpt_create_path(&sc->hs_path, /*periph*/NULL,
1182 		 cam_sim_path(sc->hs_sim),
1183 		CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
1184 		xpt_bus_deregister(cam_sim_path(sc->hs_sim));
1185 		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
1186 		mtx_unlock(&sc->hs_lock);
1187 		device_printf(dev, "Unable to create path\n");
1188 		ret = ENXIO;
1189 		goto cleanup;
1190 	}
1191 
1192 	mtx_unlock(&sc->hs_lock);
1193 
1194 	storvsc_sysctl(dev);
1195 
1196 	root_mount_rel(root_mount_token);
1197 	return (0);
1198 
1199 
1200 cleanup:
1201 	root_mount_rel(root_mount_token);
1202 	while (!LIST_EMPTY(&sc->hs_free_list)) {
1203 		reqp = LIST_FIRST(&sc->hs_free_list);
1204 		LIST_REMOVE(reqp, link);
1205 		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
1206 		free(reqp, M_DEVBUF);
1207 	}
1208 
1209 	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1210 		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1211 		LIST_REMOVE(sgl_node, link);
1212 		for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) {
1213 			if (NULL !=
1214 			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
1215 				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
1216 			}
1217 		}
1218 		sglist_free(sgl_node->sgl_data);
1219 		free(sgl_node, M_DEVBUF);
1220 	}
1221 
1222 	return (ret);
1223 }
1224 
1225 /**
1226  * @brief StorVSC device detach function
1227  *
1228  * This function is responsible for safely detaching a
1229  * StorVSC device.  This includes waiting for inbound responses
1230  * to complete and freeing associated per-device structures.
1231  *
1232  * @param dev a device
1233  * returns 0 on success
1234  */
1235 static int
1236 storvsc_detach(device_t dev)
1237 {
1238 	struct storvsc_softc *sc = device_get_softc(dev);
1239 	struct hv_storvsc_request *reqp = NULL;
1240 	struct hv_sgl_node *sgl_node = NULL;
1241 	int j = 0;
1242 
1243 	sc->hs_destroy = TRUE;
1244 
1245 	/*
1246 	 * At this point, all outbound traffic should be disabled. We
1247 	 * only allow inbound traffic (responses) to proceed so that
1248 	 * outstanding requests can be completed.
1249 	 */
1250 
1251 	sc->hs_drain_notify = TRUE;
1252 	sema_wait(&sc->hs_drain_sema);
1253 	sc->hs_drain_notify = FALSE;
1254 
1255 	/*
1256 	 * Since we have already drained, we don't need to busy wait.
1257 	 * The call to close the channel will reset the callback
1258 	 * under the protection of the incoming channel lock.
1259 	 */
1260 
1261 	vmbus_chan_close(sc->hs_chan);
1262 
1263 	mtx_lock(&sc->hs_lock);
1264 	while (!LIST_EMPTY(&sc->hs_free_list)) {
1265 		reqp = LIST_FIRST(&sc->hs_free_list);
1266 		LIST_REMOVE(reqp, link);
1267 		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
1268 		free(reqp, M_DEVBUF);
1269 	}
1270 	mtx_unlock(&sc->hs_lock);
1271 
1272 	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1273 		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1274 		LIST_REMOVE(sgl_node, link);
1275 		for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++){
1276 			if (NULL !=
1277 			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
1278 				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
1279 			}
1280 		}
1281 		sglist_free(sgl_node->sgl_data);
1282 		free(sgl_node, M_DEVBUF);
1283 	}
1284 
1285 	return (0);
1286 }
1287 
1288 #if HVS_TIMEOUT_TEST
1289 /**
1290  * @brief unit test for timed out operations
1291  *
1292  * This function provides unit testing capability to simulate
1293  * timed out operations.  Recompilation with HV_TIMEOUT_TEST=1
1294  * is required.
1295  *
1296  * @param reqp pointer to a request structure
1297  * @param opcode SCSI operation being performed
1298  * @param wait if 1, wait for I/O to complete
1299  */
1300 static void
1301 storvsc_timeout_test(struct hv_storvsc_request *reqp,
1302 		uint8_t opcode, int wait)
1303 {
1304 	int ret;
1305 	union ccb *ccb = reqp->ccb;
1306 	struct storvsc_softc *sc = reqp->softc;
1307 
1308 	if (reqp->vstor_packet.vm_srb.cdb[0] != opcode) {
1309 		return;
1310 	}
1311 
1312 	if (wait) {
1313 		mtx_lock(&reqp->event.mtx);
1314 	}
1315 	ret = hv_storvsc_io_request(sc, reqp);
1316 	if (ret != 0) {
1317 		if (wait) {
1318 			mtx_unlock(&reqp->event.mtx);
1319 		}
1320 		printf("%s: io_request failed with %d.\n",
1321 				__func__, ret);
1322 		ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1323 		mtx_lock(&sc->hs_lock);
1324 		storvsc_free_request(sc, reqp);
1325 		xpt_done(ccb);
1326 		mtx_unlock(&sc->hs_lock);
1327 		return;
1328 	}
1329 
1330 	if (wait) {
1331 		xpt_print(ccb->ccb_h.path,
1332 				"%u: %s: waiting for IO return.\n",
1333 				ticks, __func__);
1334 		ret = cv_timedwait(&reqp->event.cv, &reqp->event.mtx, 60*hz);
1335 		mtx_unlock(&reqp->event.mtx);
1336 		xpt_print(ccb->ccb_h.path, "%u: %s: %s.\n",
1337 				ticks, __func__, (ret == 0)?
1338 				"IO return detected" :
1339 				"IO return not detected");
1340 		/*
1341 		 * Now both the timer handler and io done are running
1342 		 * simultaneously. We want to confirm the io done always
1343 		 * finishes after the timer handler exits. So reqp used by
1344 		 * timer handler is not freed or stale. Do busy loop for
1345 		 * another 1/10 second to make sure io done does
1346 		 * wait for the timer handler to complete.
1347 		 */
1348 		DELAY(100*1000);
1349 		mtx_lock(&sc->hs_lock);
1350 		xpt_print(ccb->ccb_h.path,
1351 				"%u: %s: finishing, queue frozen %d, "
1352 				"ccb status 0x%x scsi_status 0x%x.\n",
1353 				ticks, __func__, sc->hs_frozen,
1354 				ccb->ccb_h.status,
1355 				ccb->csio.scsi_status);
1356 		mtx_unlock(&sc->hs_lock);
1357 	}
1358 }
1359 #endif /* HVS_TIMEOUT_TEST */
1360 
1361 #ifdef notyet
1362 /**
1363  * @brief timeout handler for requests
1364  *
1365  * This function is called as a result of a callout expiring.
1366  *
1367  * @param arg pointer to a request
1368  */
1369 static void
1370 storvsc_timeout(void *arg)
1371 {
1372 	struct hv_storvsc_request *reqp = arg;
1373 	struct storvsc_softc *sc = reqp->softc;
1374 	union ccb *ccb = reqp->ccb;
1375 
1376 	if (reqp->retries == 0) {
1377 		mtx_lock(&sc->hs_lock);
1378 		xpt_print(ccb->ccb_h.path,
1379 		    "%u: IO timed out (req=0x%p), wait for another %u secs.\n",
1380 		    ticks, reqp, ccb->ccb_h.timeout / 1000);
1381 		cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL);
1382 		mtx_unlock(&sc->hs_lock);
1383 
1384 		reqp->retries++;
1385 		callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout,
1386 		    0, storvsc_timeout, reqp, 0);
1387 #if HVS_TIMEOUT_TEST
1388 		storvsc_timeout_test(reqp, SEND_DIAGNOSTIC, 0);
1389 #endif
1390 		return;
1391 	}
1392 
1393 	mtx_lock(&sc->hs_lock);
1394 	xpt_print(ccb->ccb_h.path,
1395 		"%u: IO (reqp = 0x%p) did not return for %u seconds, %s.\n",
1396 		ticks, reqp, ccb->ccb_h.timeout * (reqp->retries+1) / 1000,
1397 		(sc->hs_frozen == 0)?
1398 		"freezing the queue" : "the queue is already frozen");
1399 	if (sc->hs_frozen == 0) {
1400 		sc->hs_frozen = 1;
1401 		xpt_freeze_simq(xpt_path_sim(ccb->ccb_h.path), 1);
1402 	}
1403 	mtx_unlock(&sc->hs_lock);
1404 
1405 #if HVS_TIMEOUT_TEST
1406 	storvsc_timeout_test(reqp, MODE_SELECT_10, 1);
1407 #endif
1408 }
1409 #endif
1410 
1411 /**
1412  * @brief StorVSC device poll function
1413  *
1414  * This function is responsible for servicing requests when
1415  * interrupts are disabled (i.e when we are dumping core.)
1416  *
1417  * @param sim a pointer to a CAM SCSI interface module
1418  */
1419 static void
1420 storvsc_poll(struct cam_sim *sim)
1421 {
1422 	struct storvsc_softc *sc = cam_sim_softc(sim);
1423 
1424 	mtx_assert(&sc->hs_lock, MA_OWNED);
1425 	mtx_unlock(&sc->hs_lock);
1426 	hv_storvsc_on_channel_callback(sc->hs_chan, sc);
1427 	mtx_lock(&sc->hs_lock);
1428 }
1429 
1430 /**
1431  * @brief StorVSC device action function
1432  *
1433  * This function is responsible for handling SCSI operations which
1434  * are passed from the CAM layer.  The requests are in the form of
1435  * CAM control blocks which indicate the action being performed.
1436  * Not all actions require converting the request to a VSCSI protocol
1437  * message - these actions can be responded to by this driver.
1438  * Requests which are destined for a backend storage device are converted
1439  * to a VSCSI protocol message and sent on the channel connection associated
1440  * with this device.
1441  *
1442  * @param sim pointer to a CAM SCSI interface module
1443  * @param ccb pointer to a CAM control block
1444  */
1445 static void
1446 storvsc_action(struct cam_sim *sim, union ccb *ccb)
1447 {
1448 	struct storvsc_softc *sc = cam_sim_softc(sim);
1449 	int res;
1450 
1451 	mtx_assert(&sc->hs_lock, MA_OWNED);
1452 	switch (ccb->ccb_h.func_code) {
1453 	case XPT_PATH_INQ: {
1454 		struct ccb_pathinq *cpi = &ccb->cpi;
1455 
1456 		cpi->version_num = 1;
1457 		cpi->hba_inquiry = PI_TAG_ABLE|PI_SDTR_ABLE;
1458 		cpi->target_sprt = 0;
1459 		cpi->hba_misc = PIM_NOBUSRESET;
1460 		if (hv_storvsc_use_pim_unmapped)
1461 			cpi->hba_misc |= PIM_UNMAPPED;
1462 		cpi->maxio = STORVSC_DATA_SIZE_MAX;
1463 		cpi->hba_eng_cnt = 0;
1464 		cpi->max_target = STORVSC_MAX_TARGETS;
1465 		cpi->max_lun = sc->hs_drv_props->drv_max_luns_per_target;
1466 		cpi->initiator_id = cpi->max_target;
1467 		cpi->bus_id = cam_sim_bus(sim);
1468 		cpi->base_transfer_speed = 300000;
1469 		cpi->transport = XPORT_SAS;
1470 		cpi->transport_version = 0;
1471 		cpi->protocol = PROTO_SCSI;
1472 		cpi->protocol_version = SCSI_REV_SPC2;
1473 		strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
1474 		strlcpy(cpi->hba_vid, sc->hs_drv_props->drv_name, HBA_IDLEN);
1475 		strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
1476 		cpi->unit_number = cam_sim_unit(sim);
1477 
1478 		ccb->ccb_h.status = CAM_REQ_CMP;
1479 		xpt_done(ccb);
1480 		return;
1481 	}
1482 	case XPT_GET_TRAN_SETTINGS: {
1483 		struct  ccb_trans_settings *cts = &ccb->cts;
1484 
1485 		cts->transport = XPORT_SAS;
1486 		cts->transport_version = 0;
1487 		cts->protocol = PROTO_SCSI;
1488 		cts->protocol_version = SCSI_REV_SPC2;
1489 
1490 		/* enable tag queuing and disconnected mode */
1491 		cts->proto_specific.valid = CTS_SCSI_VALID_TQ;
1492 		cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
1493 		cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
1494 		cts->xport_specific.valid = CTS_SPI_VALID_DISC;
1495 		cts->xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB;
1496 
1497 		ccb->ccb_h.status = CAM_REQ_CMP;
1498 		xpt_done(ccb);
1499 		return;
1500 	}
1501 	case XPT_SET_TRAN_SETTINGS:	{
1502 		ccb->ccb_h.status = CAM_REQ_CMP;
1503 		xpt_done(ccb);
1504 		return;
1505 	}
1506 	case XPT_CALC_GEOMETRY:{
1507 		cam_calc_geometry(&ccb->ccg, 1);
1508 		xpt_done(ccb);
1509 		return;
1510 	}
1511 	case  XPT_RESET_BUS:
1512 	case  XPT_RESET_DEV:{
1513 #if HVS_HOST_RESET
1514 		if ((res = hv_storvsc_host_reset(sc)) != 0) {
1515 			xpt_print(ccb->ccb_h.path,
1516 				"hv_storvsc_host_reset failed with %d\n", res);
1517 			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1518 			xpt_done(ccb);
1519 			return;
1520 		}
1521 		ccb->ccb_h.status = CAM_REQ_CMP;
1522 		xpt_done(ccb);
1523 		return;
1524 #else
1525 		xpt_print(ccb->ccb_h.path,
1526 				  "%s reset not supported.\n",
1527 				  (ccb->ccb_h.func_code == XPT_RESET_BUS)?
1528 				  "bus" : "dev");
1529 		ccb->ccb_h.status = CAM_REQ_INVALID;
1530 		xpt_done(ccb);
1531 		return;
1532 #endif	/* HVS_HOST_RESET */
1533 	}
1534 	case XPT_SCSI_IO:
1535 	case XPT_IMMED_NOTIFY: {
1536 		struct hv_storvsc_request *reqp = NULL;
1537 		bus_dmamap_t dmap_saved;
1538 
1539 		if (ccb->csio.cdb_len == 0) {
1540 			panic("cdl_len is 0\n");
1541 		}
1542 
1543 		if (LIST_EMPTY(&sc->hs_free_list)) {
1544 			ccb->ccb_h.status = CAM_REQUEUE_REQ;
1545 			if (sc->hs_frozen == 0) {
1546 				sc->hs_frozen = 1;
1547 				xpt_freeze_simq(sim, /* count*/1);
1548 			}
1549 			xpt_done(ccb);
1550 			return;
1551 		}
1552 
1553 		reqp = LIST_FIRST(&sc->hs_free_list);
1554 		LIST_REMOVE(reqp, link);
1555 
1556 		/* Save the data_dmap before reset request */
1557 		dmap_saved = reqp->data_dmap;
1558 
1559 		/* XXX this is ugly */
1560 		bzero(reqp, sizeof(struct hv_storvsc_request));
1561 
1562 		/* Restore necessary bits */
1563 		reqp->data_dmap = dmap_saved;
1564 		reqp->softc = sc;
1565 
1566 		ccb->ccb_h.status |= CAM_SIM_QUEUED;
1567 		if ((res = create_storvsc_request(ccb, reqp)) != 0) {
1568 			ccb->ccb_h.status = CAM_REQ_INVALID;
1569 			xpt_done(ccb);
1570 			return;
1571 		}
1572 
1573 #ifdef notyet
1574 		if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
1575 			callout_init(&reqp->callout, 1);
1576 			callout_reset_sbt(&reqp->callout,
1577 			    SBT_1MS * ccb->ccb_h.timeout, 0,
1578 			    storvsc_timeout, reqp, 0);
1579 #if HVS_TIMEOUT_TEST
1580 			cv_init(&reqp->event.cv, "storvsc timeout cv");
1581 			mtx_init(&reqp->event.mtx, "storvsc timeout mutex",
1582 					NULL, MTX_DEF);
1583 			switch (reqp->vstor_packet.vm_srb.cdb[0]) {
1584 				case MODE_SELECT_10:
1585 				case SEND_DIAGNOSTIC:
1586 					/* To have timer send the request. */
1587 					return;
1588 				default:
1589 					break;
1590 			}
1591 #endif /* HVS_TIMEOUT_TEST */
1592 		}
1593 #endif
1594 
1595 		if ((res = hv_storvsc_io_request(sc, reqp)) != 0) {
1596 			xpt_print(ccb->ccb_h.path,
1597 				"hv_storvsc_io_request failed with %d\n", res);
1598 			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1599 			storvsc_free_request(sc, reqp);
1600 			xpt_done(ccb);
1601 			return;
1602 		}
1603 		return;
1604 	}
1605 
1606 	default:
1607 		ccb->ccb_h.status = CAM_REQ_INVALID;
1608 		xpt_done(ccb);
1609 		return;
1610 	}
1611 }
1612 
1613 /**
1614  * @brief destroy bounce buffer
1615  *
1616  * This function is responsible for destroy a Scatter/Gather list
1617  * that create by storvsc_create_bounce_buffer()
1618  *
1619  * @param sgl- the Scatter/Gather need be destroy
1620  * @param sg_count- page count of the SG list.
1621  *
1622  */
1623 static void
1624 storvsc_destroy_bounce_buffer(struct sglist *sgl)
1625 {
1626 	struct hv_sgl_node *sgl_node = NULL;
1627 	if (LIST_EMPTY(&g_hv_sgl_page_pool.in_use_sgl_list)) {
1628 		printf("storvsc error: not enough in use sgl\n");
1629 		return;
1630 	}
1631 	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list);
1632 	LIST_REMOVE(sgl_node, link);
1633 	sgl_node->sgl_data = sgl;
1634 	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link);
1635 }
1636 
1637 /**
1638  * @brief create bounce buffer
1639  *
1640  * This function is responsible for create a Scatter/Gather list,
1641  * which hold several pages that can be aligned with page size.
1642  *
1643  * @param seg_count- SG-list segments count
1644  * @param write - if WRITE_TYPE, set SG list page used size to 0,
1645  * otherwise set used size to page size.
1646  *
1647  * return NULL if create failed
1648  */
1649 static struct sglist *
1650 storvsc_create_bounce_buffer(uint16_t seg_count, int write)
1651 {
1652 	int i = 0;
1653 	struct sglist *bounce_sgl = NULL;
1654 	unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE);
1655 	struct hv_sgl_node *sgl_node = NULL;
1656 
1657 	/* get struct sglist from free_sgl_list */
1658 	if (LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1659 		printf("storvsc error: not enough free sgl\n");
1660 		return NULL;
1661 	}
1662 	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1663 	LIST_REMOVE(sgl_node, link);
1664 	bounce_sgl = sgl_node->sgl_data;
1665 	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link);
1666 
1667 	bounce_sgl->sg_maxseg = seg_count;
1668 
1669 	if (write == WRITE_TYPE)
1670 		bounce_sgl->sg_nseg = 0;
1671 	else
1672 		bounce_sgl->sg_nseg = seg_count;
1673 
1674 	for (i = 0; i < seg_count; i++)
1675 	        bounce_sgl->sg_segs[i].ss_len = buf_len;
1676 
1677 	return bounce_sgl;
1678 }
1679 
1680 /**
1681  * @brief copy data from SG list to bounce buffer
1682  *
1683  * This function is responsible for copy data from one SG list's segments
1684  * to another SG list which used as bounce buffer.
1685  *
1686  * @param bounce_sgl - the destination SG list
1687  * @param orig_sgl - the segment of the source SG list.
1688  * @param orig_sgl_count - the count of segments.
1689  * @param orig_sgl_count - indicate which segment need bounce buffer,
1690  *  set 1 means need.
1691  *
1692  */
1693 static void
1694 storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
1695 			       bus_dma_segment_t *orig_sgl,
1696 			       unsigned int orig_sgl_count,
1697 			       uint64_t seg_bits)
1698 {
1699 	int src_sgl_idx = 0;
1700 
1701 	for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) {
1702 		if (seg_bits & (1 << src_sgl_idx)) {
1703 			memcpy((void*)bounce_sgl->sg_segs[src_sgl_idx].ss_paddr,
1704 			    (void*)orig_sgl[src_sgl_idx].ds_addr,
1705 			    orig_sgl[src_sgl_idx].ds_len);
1706 
1707 			bounce_sgl->sg_segs[src_sgl_idx].ss_len =
1708 			    orig_sgl[src_sgl_idx].ds_len;
1709 		}
1710 	}
1711 }
1712 
1713 /**
1714  * @brief copy data from SG list which used as bounce to another SG list
1715  *
1716  * This function is responsible for copy data from one SG list with bounce
1717  * buffer to another SG list's segments.
1718  *
1719  * @param dest_sgl - the destination SG list's segments
1720  * @param dest_sgl_count - the count of destination SG list's segment.
1721  * @param src_sgl - the source SG list.
1722  * @param seg_bits - indicate which segment used bounce buffer of src SG-list.
1723  *
1724  */
1725 void
1726 storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
1727 				    unsigned int dest_sgl_count,
1728 				    struct sglist* src_sgl,
1729 				    uint64_t seg_bits)
1730 {
1731 	int sgl_idx = 0;
1732 
1733 	for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) {
1734 		if (seg_bits & (1 << sgl_idx)) {
1735 			memcpy((void*)(dest_sgl[sgl_idx].ds_addr),
1736 			    (void*)(src_sgl->sg_segs[sgl_idx].ss_paddr),
1737 			    src_sgl->sg_segs[sgl_idx].ss_len);
1738 		}
1739 	}
1740 }
1741 
1742 /**
1743  * @brief check SG list with bounce buffer or not
1744  *
1745  * This function is responsible for check if need bounce buffer for SG list.
1746  *
1747  * @param sgl - the SG list's segments
1748  * @param sg_count - the count of SG list's segment.
1749  * @param bits - segmengs number that need bounce buffer
1750  *
1751  * return -1 if SG list needless bounce buffer
1752  */
1753 static int
1754 storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl,
1755 				unsigned int sg_count,
1756 				uint64_t *bits)
1757 {
1758 	int i = 0;
1759 	int offset = 0;
1760 	uint64_t phys_addr = 0;
1761 	uint64_t tmp_bits = 0;
1762 	boolean_t found_hole = FALSE;
1763 	boolean_t pre_aligned = TRUE;
1764 
1765 	if (sg_count < 2){
1766 		return -1;
1767 	}
1768 
1769 	*bits = 0;
1770 
1771 	phys_addr = vtophys(sgl[0].ds_addr);
1772 	offset =  phys_addr - trunc_page(phys_addr);
1773 
1774 	if (offset != 0) {
1775 		pre_aligned = FALSE;
1776 		tmp_bits |= 1;
1777 	}
1778 
1779 	for (i = 1; i < sg_count; i++) {
1780 		phys_addr = vtophys(sgl[i].ds_addr);
1781 		offset =  phys_addr - trunc_page(phys_addr);
1782 
1783 		if (offset == 0) {
1784 			if (FALSE == pre_aligned){
1785 				/*
1786 				 * This segment is aligned, if the previous
1787 				 * one is not aligned, find a hole
1788 				 */
1789 				found_hole = TRUE;
1790 			}
1791 			pre_aligned = TRUE;
1792 		} else {
1793 			tmp_bits |= 1ULL << i;
1794 			if (!pre_aligned) {
1795 				if (phys_addr != vtophys(sgl[i-1].ds_addr +
1796 				    sgl[i-1].ds_len)) {
1797 					/*
1798 					 * Check whether connect to previous
1799 					 * segment,if not, find the hole
1800 					 */
1801 					found_hole = TRUE;
1802 				}
1803 			} else {
1804 				found_hole = TRUE;
1805 			}
1806 			pre_aligned = FALSE;
1807 		}
1808 	}
1809 
1810 	if (!found_hole) {
1811 		return (-1);
1812 	} else {
1813 		*bits = tmp_bits;
1814 		return 0;
1815 	}
1816 }
1817 
1818 /**
1819  * Copy bus_dma segments to multiple page buffer, which requires
1820  * the pages are compact composed except for the 1st and last pages.
1821  */
1822 static void
1823 storvsc_xferbuf_prepare(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
1824 {
1825 	struct hv_storvsc_request *reqp = arg;
1826 	union ccb *ccb = reqp->ccb;
1827 	struct ccb_scsiio *csio = &ccb->csio;
1828 	struct storvsc_gpa_range *prplist;
1829 	int i;
1830 
1831 	prplist = &reqp->prp_list;
1832 	prplist->gpa_range.gpa_len = csio->dxfer_len;
1833 	prplist->gpa_range.gpa_ofs = segs[0].ds_addr & PAGE_MASK;
1834 
1835 	for (i = 0; i < nsegs; i++) {
1836 #ifdef INVARIANTS
1837 		if (nsegs > 1) {
1838 			if (i == 0) {
1839 				KASSERT((segs[i].ds_addr & PAGE_MASK) +
1840 				    segs[i].ds_len == PAGE_SIZE,
1841 				    ("invalid 1st page, ofs 0x%jx, len %zu",
1842 				     (uintmax_t)segs[i].ds_addr,
1843 				     segs[i].ds_len));
1844 			} else if (i == nsegs - 1) {
1845 				KASSERT((segs[i].ds_addr & PAGE_MASK) == 0,
1846 				    ("invalid last page, ofs 0x%jx",
1847 				     (uintmax_t)segs[i].ds_addr));
1848 			} else {
1849 				KASSERT((segs[i].ds_addr & PAGE_MASK) == 0 &&
1850 				    segs[i].ds_len == PAGE_SIZE,
1851 				    ("not a full page, ofs 0x%jx, len %zu",
1852 				     (uintmax_t)segs[i].ds_addr,
1853 				     segs[i].ds_len));
1854 			}
1855 		}
1856 #endif
1857 		prplist->gpa_page[i] = atop(segs[i].ds_addr);
1858 	}
1859 	reqp->prp_cnt = nsegs;
1860 }
1861 
1862 /**
1863  * @brief Fill in a request structure based on a CAM control block
1864  *
1865  * Fills in a request structure based on the contents of a CAM control
1866  * block.  The request structure holds the payload information for
1867  * VSCSI protocol request.
1868  *
1869  * @param ccb pointer to a CAM contorl block
1870  * @param reqp pointer to a request structure
1871  */
1872 static int
1873 create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
1874 {
1875 	struct ccb_scsiio *csio = &ccb->csio;
1876 	uint64_t phys_addr;
1877 	uint32_t pfn;
1878 	uint64_t not_aligned_seg_bits = 0;
1879 	int error;
1880 
1881 	/* refer to struct vmscsi_req for meanings of these two fields */
1882 	reqp->vstor_packet.u.vm_srb.port =
1883 		cam_sim_unit(xpt_path_sim(ccb->ccb_h.path));
1884 	reqp->vstor_packet.u.vm_srb.path_id =
1885 		cam_sim_bus(xpt_path_sim(ccb->ccb_h.path));
1886 
1887 	reqp->vstor_packet.u.vm_srb.target_id = ccb->ccb_h.target_id;
1888 	reqp->vstor_packet.u.vm_srb.lun = ccb->ccb_h.target_lun;
1889 
1890 	reqp->vstor_packet.u.vm_srb.cdb_len = csio->cdb_len;
1891 	if(ccb->ccb_h.flags & CAM_CDB_POINTER) {
1892 		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_ptr,
1893 			csio->cdb_len);
1894 	} else {
1895 		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_bytes,
1896 			csio->cdb_len);
1897 	}
1898 
1899 	if (hv_storvsc_use_win8ext_flags) {
1900 		reqp->vstor_packet.u.vm_srb.win8_extension.time_out_value = 60;
1901 		reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1902 			SRB_FLAGS_DISABLE_SYNCH_TRANSFER;
1903 	}
1904 	switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
1905 	case CAM_DIR_OUT:
1906 		reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;
1907 		if (hv_storvsc_use_win8ext_flags) {
1908 			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1909 				SRB_FLAGS_DATA_OUT;
1910 		}
1911 		break;
1912 	case CAM_DIR_IN:
1913 		reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
1914 		if (hv_storvsc_use_win8ext_flags) {
1915 			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1916 				SRB_FLAGS_DATA_IN;
1917 		}
1918 		break;
1919 	case CAM_DIR_NONE:
1920 		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
1921 		if (hv_storvsc_use_win8ext_flags) {
1922 			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1923 				SRB_FLAGS_NO_DATA_TRANSFER;
1924 		}
1925 		break;
1926 	default:
1927 		printf("Error: unexpected data direction: 0x%x\n",
1928 			ccb->ccb_h.flags & CAM_DIR_MASK);
1929 		return (EINVAL);
1930 	}
1931 
1932 	reqp->sense_data     = &csio->sense_data;
1933 	reqp->sense_info_len = csio->sense_len;
1934 
1935 	reqp->ccb = ccb;
1936 	ccb->ccb_h.spriv_ptr0 = reqp;
1937 
1938 	if (0 == csio->dxfer_len) {
1939 		return (0);
1940 	}
1941 
1942 	switch (ccb->ccb_h.flags & CAM_DATA_MASK) {
1943 	case CAM_DATA_BIO:
1944 	case CAM_DATA_VADDR:
1945 		error = bus_dmamap_load_ccb(reqp->softc->storvsc_req_dtag,
1946 		    reqp->data_dmap, ccb, storvsc_xferbuf_prepare, reqp,
1947 		    BUS_DMA_NOWAIT);
1948 		if (error) {
1949 			xpt_print(ccb->ccb_h.path,
1950 			    "bus_dmamap_load_ccb failed: %d\n", error);
1951 			return (error);
1952 		}
1953 		if ((ccb->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_BIO)
1954 			reqp->softc->sysctl_data.data_bio_cnt++;
1955 		else
1956 			reqp->softc->sysctl_data.data_vaddr_cnt++;
1957 		break;
1958 
1959 	case CAM_DATA_SG:
1960 	{
1961 		struct storvsc_gpa_range *prplist;
1962 		int i = 0;
1963 		int offset = 0;
1964 		int ret;
1965 
1966 		bus_dma_segment_t *storvsc_sglist =
1967 		    (bus_dma_segment_t *)ccb->csio.data_ptr;
1968 		u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt;
1969 
1970 		prplist = &reqp->prp_list;
1971 		prplist->gpa_range.gpa_len = csio->dxfer_len;
1972 
1973 		printf("Storvsc: get SG I/O operation, %d\n",
1974 		    reqp->vstor_packet.u.vm_srb.data_in);
1975 
1976 		if (storvsc_sg_count > STORVSC_DATA_SEGCNT_MAX){
1977 			printf("Storvsc: %d segments is too much, "
1978 			    "only support %d segments\n",
1979 			    storvsc_sg_count, STORVSC_DATA_SEGCNT_MAX);
1980 			return (EINVAL);
1981 		}
1982 
1983 		/*
1984 		 * We create our own bounce buffer function currently. Idealy
1985 		 * we should use BUS_DMA(9) framework. But with current BUS_DMA
1986 		 * code there is no callback API to check the page alignment of
1987 		 * middle segments before busdma can decide if a bounce buffer
1988 		 * is needed for particular segment. There is callback,
1989 		 * "bus_dma_filter_t *filter", but the parrameters are not
1990 		 * sufficient for storvsc driver.
1991 		 * TODO:
1992 		 *	Add page alignment check in BUS_DMA(9) callback. Once
1993 		 *	this is complete, switch the following code to use
1994 		 *	BUS_DMA(9) for storvsc bounce buffer support.
1995 		 */
1996 		/* check if we need to create bounce buffer */
1997 		ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist,
1998 		    storvsc_sg_count, &not_aligned_seg_bits);
1999 		if (ret != -1) {
2000 			reqp->bounce_sgl =
2001 			    storvsc_create_bounce_buffer(storvsc_sg_count,
2002 			    reqp->vstor_packet.u.vm_srb.data_in);
2003 			if (NULL == reqp->bounce_sgl) {
2004 				printf("Storvsc_error: "
2005 				    "create bounce buffer failed.\n");
2006 				return (ENOMEM);
2007 			}
2008 
2009 			reqp->bounce_sgl_count = storvsc_sg_count;
2010 			reqp->not_aligned_seg_bits = not_aligned_seg_bits;
2011 
2012 			/*
2013 			 * if it is write, we need copy the original data
2014 			 *to bounce buffer
2015 			 */
2016 			if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
2017 				storvsc_copy_sgl_to_bounce_buf(
2018 				    reqp->bounce_sgl,
2019 				    storvsc_sglist,
2020 				    storvsc_sg_count,
2021 				    reqp->not_aligned_seg_bits);
2022 			}
2023 
2024 			/* transfer virtual address to physical frame number */
2025 			if (reqp->not_aligned_seg_bits & 0x1){
2026  				phys_addr =
2027 				    vtophys(reqp->bounce_sgl->sg_segs[0].ss_paddr);
2028 			}else{
2029  				phys_addr =
2030 					vtophys(storvsc_sglist[0].ds_addr);
2031 			}
2032 			prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK;
2033 
2034 			pfn = phys_addr >> PAGE_SHIFT;
2035 			prplist->gpa_page[0] = pfn;
2036 
2037 			for (i = 1; i < storvsc_sg_count; i++) {
2038 				if (reqp->not_aligned_seg_bits & (1 << i)) {
2039 					phys_addr =
2040 					    vtophys(reqp->bounce_sgl->sg_segs[i].ss_paddr);
2041 				} else {
2042 					phys_addr =
2043 					    vtophys(storvsc_sglist[i].ds_addr);
2044 				}
2045 
2046 				pfn = phys_addr >> PAGE_SHIFT;
2047 				prplist->gpa_page[i] = pfn;
2048 			}
2049 			reqp->prp_cnt = i;
2050 		} else {
2051 			phys_addr = vtophys(storvsc_sglist[0].ds_addr);
2052 
2053 			prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK;
2054 
2055 			for (i = 0; i < storvsc_sg_count; i++) {
2056 				phys_addr = vtophys(storvsc_sglist[i].ds_addr);
2057 				pfn = phys_addr >> PAGE_SHIFT;
2058 				prplist->gpa_page[i] = pfn;
2059 			}
2060 			reqp->prp_cnt = i;
2061 
2062 			/* check the last segment cross boundary or not */
2063 			offset = phys_addr & PAGE_MASK;
2064 			if (offset) {
2065 				/* Add one more PRP entry */
2066 				phys_addr =
2067 				    vtophys(storvsc_sglist[i-1].ds_addr +
2068 				    PAGE_SIZE - offset);
2069 				pfn = phys_addr >> PAGE_SHIFT;
2070 				prplist->gpa_page[i] = pfn;
2071 				reqp->prp_cnt++;
2072 			}
2073 
2074 			reqp->bounce_sgl_count = 0;
2075 		}
2076 		reqp->softc->sysctl_data.data_sg_cnt++;
2077 		break;
2078 	}
2079 	default:
2080 		printf("Unknow flags: %d\n", ccb->ccb_h.flags);
2081 		return(EINVAL);
2082 	}
2083 
2084 	return(0);
2085 }
2086 
2087 static uint32_t
2088 is_scsi_valid(const struct scsi_inquiry_data *inq_data)
2089 {
2090 	u_int8_t type;
2091 
2092 	type = SID_TYPE(inq_data);
2093 	if (type == T_NODEVICE)
2094 		return (0);
2095 	if (SID_QUAL(inq_data) == SID_QUAL_BAD_LU)
2096 		return (0);
2097 	return (1);
2098 }
2099 
2100 /**
2101  * @brief completion function before returning to CAM
2102  *
2103  * I/O process has been completed and the result needs
2104  * to be passed to the CAM layer.
2105  * Free resources related to this request.
2106  *
2107  * @param reqp pointer to a request structure
2108  */
2109 static void
2110 storvsc_io_done(struct hv_storvsc_request *reqp)
2111 {
2112 	union ccb *ccb = reqp->ccb;
2113 	struct ccb_scsiio *csio = &ccb->csio;
2114 	struct storvsc_softc *sc = reqp->softc;
2115 	struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb;
2116 	bus_dma_segment_t *ori_sglist = NULL;
2117 	int ori_sg_count = 0;
2118 	const struct scsi_generic *cmd;
2119 
2120 	/* destroy bounce buffer if it is used */
2121 	if (reqp->bounce_sgl_count) {
2122 		ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr;
2123 		ori_sg_count = ccb->csio.sglist_cnt;
2124 
2125 		/*
2126 		 * If it is READ operation, we should copy back the data
2127 		 * to original SG list.
2128 		 */
2129 		if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
2130 			storvsc_copy_from_bounce_buf_to_sgl(ori_sglist,
2131 			    ori_sg_count,
2132 			    reqp->bounce_sgl,
2133 			    reqp->not_aligned_seg_bits);
2134 		}
2135 
2136 		storvsc_destroy_bounce_buffer(reqp->bounce_sgl);
2137 		reqp->bounce_sgl_count = 0;
2138 	}
2139 
2140 	if (reqp->retries > 0) {
2141 		mtx_lock(&sc->hs_lock);
2142 #if HVS_TIMEOUT_TEST
2143 		xpt_print(ccb->ccb_h.path,
2144 			"%u: IO returned after timeout, "
2145 			"waking up timer handler if any.\n", ticks);
2146 		mtx_lock(&reqp->event.mtx);
2147 		cv_signal(&reqp->event.cv);
2148 		mtx_unlock(&reqp->event.mtx);
2149 #endif
2150 		reqp->retries = 0;
2151 		xpt_print(ccb->ccb_h.path,
2152 			"%u: IO returned after timeout, "
2153 			"stopping timer if any.\n", ticks);
2154 		mtx_unlock(&sc->hs_lock);
2155 	}
2156 
2157 #ifdef notyet
2158 	/*
2159 	 * callout_drain() will wait for the timer handler to finish
2160 	 * if it is running. So we don't need any lock to synchronize
2161 	 * between this routine and the timer handler.
2162 	 * Note that we need to make sure reqp is not freed when timer
2163 	 * handler is using or will use it.
2164 	 */
2165 	if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
2166 		callout_drain(&reqp->callout);
2167 	}
2168 #endif
2169 	cmd = (const struct scsi_generic *)
2170 	    ((ccb->ccb_h.flags & CAM_CDB_POINTER) ?
2171 	     csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes);
2172 
2173 	ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
2174 	ccb->ccb_h.status &= ~CAM_STATUS_MASK;
2175 	int srb_status = SRB_STATUS(vm_srb->srb_status);
2176 #ifdef DIAGNOSTIC
2177 	if (hv_storvsc_srb_status != -1) {
2178 		srb_status = SRB_STATUS(hv_storvsc_srb_status & 0x3f);
2179 		hv_storvsc_srb_status = -1;
2180 	}
2181 #endif /* DIAGNOSTIC */
2182 	if (vm_srb->scsi_status == SCSI_STATUS_OK) {
2183 		if (srb_status != SRB_STATUS_SUCCESS) {
2184 			bool log_error = true;
2185 			switch (srb_status) {
2186 				case SRB_STATUS_PENDING:
2187 					/* We should never get this */
2188 					panic("storvsc_io_done: SRB_STATUS_PENDING");
2189 					break;
2190 				case SRB_STATUS_ABORTED:
2191 					/*
2192 					 * storvsc doesn't support aborts yet
2193 					 * but if we ever get this status
2194 					 * the I/O is complete - treat it as a
2195 					 * timeout
2196 					 */
2197 					ccb->ccb_h.status |= CAM_CMD_TIMEOUT;
2198 					break;
2199 				case SRB_STATUS_ABORT_FAILED:
2200 					/* We should never get this */
2201 					panic("storvsc_io_done: SRB_STATUS_ABORT_FAILED");
2202 					break;
2203 				case SRB_STATUS_ERROR:
2204 					/*
2205 					 * We should never get this.
2206 					 * Treat it as a CAM_UNREC_HBA_ERROR.
2207 					 * It will be retried
2208 					 */
2209 					ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
2210 					break;
2211 				case SRB_STATUS_BUSY:
2212 					/* Host is busy. Delay and retry */
2213 					ccb->ccb_h.status |= CAM_BUSY;
2214 					break;
2215 				case SRB_STATUS_INVALID_REQUEST:
2216 				case SRB_STATUS_INVALID_PATH_ID:
2217 				case SRB_STATUS_NO_DEVICE:
2218 				case SRB_STATUS_INVALID_TARGET_ID:
2219 					/*
2220 					 * These indicate an invalid address
2221 					 * and really should never be seen.
2222 					 * A CAM_PATH_INVALID could be
2223 					 * used here but I want to run
2224 					 * down retries.  Do a CAM_BUSY
2225 					 * since the host might be having issues.
2226 					 */
2227 					ccb->ccb_h.status |= CAM_BUSY;
2228 					break;
2229 				case SRB_STATUS_TIMEOUT:
2230 				case SRB_STATUS_COMMAND_TIMEOUT:
2231 					/* The backend has timed this out */
2232 					ccb->ccb_h.status |= CAM_BUSY;
2233 					break;
2234 				/* Some old pSCSI errors below */
2235 				case SRB_STATUS_SELECTION_TIMEOUT:
2236 				case SRB_STATUS_MESSAGE_REJECTED:
2237 				case SRB_STATUS_PARITY_ERROR:
2238 				case SRB_STATUS_NO_HBA:
2239 				case SRB_STATUS_DATA_OVERRUN:
2240 				case SRB_STATUS_UNEXPECTED_BUS_FREE:
2241 				case SRB_STATUS_PHASE_SEQUENCE_FAILURE:
2242 					/*
2243 					 * Old pSCSI responses, should never get.
2244 					 * If we do treat as a CAM_UNREC_HBA_ERROR
2245 					 * which will be retried
2246 					 */
2247 					ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
2248 					break;
2249 				case SRB_STATUS_BUS_RESET:
2250 					ccb->ccb_h.status |= CAM_SCSI_BUS_RESET;
2251 					break;
2252 				case SRB_STATUS_BAD_SRB_BLOCK_LENGTH:
2253 					/*
2254 					 * The request block is malformed and
2255 					 * I doubt it is from the guest. Just retry.
2256 					 */
2257 					ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
2258 					break;
2259 				/* Not used statuses just retry */
2260 				case SRB_STATUS_REQUEST_FLUSHED:
2261 				case SRB_STATUS_BAD_FUNCTION:
2262 				case SRB_STATUS_NOT_POWERED:
2263 					ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
2264 					break;
2265 				case SRB_STATUS_INVALID_LUN:
2266 					/*
2267 					 * Don't log an EMS for this response since
2268 					 * there is no device at this LUN. This is a
2269 					 * normal and expected response when a device
2270 					 * is detached.
2271 					 */
2272 					ccb->ccb_h.status |= CAM_DEV_NOT_THERE;
2273 					log_error = false;
2274 					break;
2275 				case SRB_STATUS_ERROR_RECOVERY:
2276 				case SRB_STATUS_LINK_DOWN:
2277 					/*
2278 					 * I don't ever expect these from
2279 					 * the host but if we ever get
2280 					 * retry after a delay
2281 					 */
2282 					ccb->ccb_h.status |= CAM_BUSY;
2283 					break;
2284 				default:
2285 					/*
2286 					 * An undefined response assert on
2287 					 * on debug builds else retry
2288 					 */
2289 					ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
2290 					KASSERT(srb_status <= SRB_STATUS_LINK_DOWN,
2291 					    ("storvsc: %s, unexpected srb_status of 0x%x",
2292 					    __func__, srb_status));
2293 					break;
2294 			}
2295 			if (log_error) {
2296 				xpt_print(ccb->ccb_h.path, "The hypervisor's I/O adapter "
2297 					"driver received an unexpected response code 0x%x "
2298 					"for operation: %s. If this continues to occur, "
2299 					"report the condition to your hypervisor vendor so "
2300 					"they can rectify the issue.\n", srb_status,
2301 					scsi_op_desc(cmd->opcode, NULL));
2302 			}
2303 		} else {
2304 			ccb->ccb_h.status |= CAM_REQ_CMP;
2305 		}
2306 
2307 		if (cmd->opcode == INQUIRY &&
2308 		    srb_status == SRB_STATUS_SUCCESS) {
2309 			int resp_xfer_len, resp_buf_len, data_len;
2310 			uint8_t *resp_buf = (uint8_t *)csio->data_ptr;
2311 			struct scsi_inquiry_data *inq_data =
2312 			    (struct scsi_inquiry_data *)csio->data_ptr;
2313 
2314 			/* Get the buffer length reported by host */
2315 			resp_xfer_len = vm_srb->transfer_len;
2316 
2317 			/* Get the available buffer length */
2318 			resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0;
2319 			data_len = (resp_buf_len < resp_xfer_len) ?
2320 			    resp_buf_len : resp_xfer_len;
2321 			if (bootverbose && data_len >= 5) {
2322 				xpt_print(ccb->ccb_h.path, "storvsc inquiry "
2323 				    "(%d) [%x %x %x %x %x ... ]\n", data_len,
2324 				    resp_buf[0], resp_buf[1], resp_buf[2],
2325 				    resp_buf[3], resp_buf[4]);
2326 			}
2327 			/*
2328 			 * XXX: Hyper-V (since win2012r2) responses inquiry with
2329 			 * unknown version (0) for GEN-2 DVD device.
2330 			 * Manually set the version number to SPC3 in order to
2331 			 * ask CAM to continue probing with "PROBE_REPORT_LUNS".
2332 			 * see probedone() in scsi_xpt.c
2333 			 */
2334 			if (SID_TYPE(inq_data) == T_CDROM &&
2335 			    inq_data->version == 0 &&
2336 			    (vmstor_proto_version >= VMSTOR_PROTOCOL_VERSION_WIN8)) {
2337 				inq_data->version = SCSI_REV_SPC3;
2338 				if (bootverbose) {
2339 					xpt_print(ccb->ccb_h.path,
2340 					    "set version from 0 to %d\n",
2341 					    inq_data->version);
2342 				}
2343 			}
2344 			/*
2345 			 * XXX: Manually fix the wrong response returned from WS2012
2346 			 */
2347 			if (!is_scsi_valid(inq_data) &&
2348 			    (vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
2349 			    vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8 ||
2350 			    vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN7)) {
2351 				if (data_len >= 4 &&
2352 				    (resp_buf[2] == 0 || resp_buf[3] == 0)) {
2353 					resp_buf[2] = SCSI_REV_SPC3;
2354 					resp_buf[3] = 2; // resp fmt must be 2
2355 					if (bootverbose)
2356 						xpt_print(ccb->ccb_h.path,
2357 						    "fix version and resp fmt for 0x%x\n",
2358 						    vmstor_proto_version);
2359 				}
2360 			} else if (data_len >= SHORT_INQUIRY_LENGTH) {
2361 				char vendor[16];
2362 
2363 				cam_strvis(vendor, inq_data->vendor,
2364 				    sizeof(inq_data->vendor), sizeof(vendor));
2365 				/*
2366 				 * XXX: Upgrade SPC2 to SPC3 if host is WIN8 or
2367 				 * WIN2012 R2 in order to support UNMAP feature.
2368 				 */
2369 				if (!strncmp(vendor, "Msft", 4) &&
2370 				    SID_ANSI_REV(inq_data) == SCSI_REV_SPC2 &&
2371 				    (vmstor_proto_version ==
2372 				     VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
2373 				     vmstor_proto_version ==
2374 				     VMSTOR_PROTOCOL_VERSION_WIN8)) {
2375 					inq_data->version = SCSI_REV_SPC3;
2376 					if (bootverbose) {
2377 						xpt_print(ccb->ccb_h.path,
2378 						    "storvsc upgrades "
2379 						    "SPC2 to SPC3\n");
2380 					}
2381 				}
2382 			}
2383 		}
2384 	} else {
2385 		/**
2386 		 * On Some Windows hosts TEST_UNIT_READY command can return
2387 		 * SRB_STATUS_ERROR and sense data, for example, asc=0x3a,1
2388 		 * "(Medium not present - tray closed)". This error can be
2389 		 * ignored since it will be sent to host periodically.
2390 		 */
2391 		boolean_t unit_not_ready = \
2392 		    vm_srb->scsi_status == SCSI_STATUS_CHECK_COND &&
2393 		    cmd->opcode == TEST_UNIT_READY &&
2394 		    srb_status == SRB_STATUS_ERROR;
2395 		if (!unit_not_ready && bootverbose) {
2396 			mtx_lock(&sc->hs_lock);
2397 			xpt_print(ccb->ccb_h.path,
2398 				"storvsc scsi_status = %d, srb_status = %d\n",
2399 				vm_srb->scsi_status, srb_status);
2400 			mtx_unlock(&sc->hs_lock);
2401 		}
2402 		ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR;
2403 	}
2404 
2405 	ccb->csio.scsi_status = (vm_srb->scsi_status & 0xFF);
2406 	if (srb_status == SRB_STATUS_SUCCESS ||
2407 	    srb_status == SRB_STATUS_DATA_OVERRUN)
2408 		ccb->csio.resid = ccb->csio.dxfer_len - vm_srb->transfer_len;
2409 	else
2410 		ccb->csio.resid = ccb->csio.dxfer_len;
2411 
2412 	if ((vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) != 0 &&
2413 	    reqp->sense_info_len != 0) {
2414 		csio->sense_resid = csio->sense_len - reqp->sense_info_len;
2415 		ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
2416 	}
2417 
2418 	mtx_lock(&sc->hs_lock);
2419 	if (reqp->softc->hs_frozen == 1) {
2420 		xpt_print(ccb->ccb_h.path,
2421 			"%u: storvsc unfreezing softc 0x%p.\n",
2422 			ticks, reqp->softc);
2423 		ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
2424 		reqp->softc->hs_frozen = 0;
2425 	}
2426 	storvsc_free_request(sc, reqp);
2427 	mtx_unlock(&sc->hs_lock);
2428 
2429 	xpt_done_direct(ccb);
2430 }
2431 
2432 /**
2433  * @brief Free a request structure
2434  *
2435  * Free a request structure by returning it to the free list
2436  *
2437  * @param sc pointer to a softc
2438  * @param reqp pointer to a request structure
2439  */
2440 static void
2441 storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp)
2442 {
2443 
2444 	LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
2445 }
2446 
2447 /**
2448  * @brief Determine type of storage device from GUID
2449  *
2450  * Using the type GUID, determine if this is a StorVSC (paravirtual
2451  * SCSI or BlkVSC (paravirtual IDE) device.
2452  *
2453  * @param dev a device
2454  * returns an enum
2455  */
2456 static enum hv_storage_type
2457 storvsc_get_storage_type(device_t dev)
2458 {
2459 	device_t parent = device_get_parent(dev);
2460 
2461 	if (VMBUS_PROBE_GUID(parent, dev, &gBlkVscDeviceType) == 0)
2462 		return DRIVER_BLKVSC;
2463 	if (VMBUS_PROBE_GUID(parent, dev, &gStorVscDeviceType) == 0)
2464 		return DRIVER_STORVSC;
2465 	return DRIVER_UNKNOWN;
2466 }
2467 
2468 #define	PCI_VENDOR_INTEL	0x8086
2469 #define	PCI_PRODUCT_PIIX4	0x7111
2470 
2471 static void
2472 storvsc_ada_probe_veto(void *arg __unused, struct cam_path *path,
2473     struct ata_params *ident_buf __unused, int *veto)
2474 {
2475 
2476 	/*
2477 	 * The ATA disks are shared with the controllers managed
2478 	 * by this driver, so veto the ATA disks' attachment; the
2479 	 * ATA disks will be attached as SCSI disks once this driver
2480 	 * attached.
2481 	 */
2482 	if (path->device->protocol == PROTO_ATA) {
2483 		struct ccb_pathinq cpi;
2484 
2485 		xpt_path_inq(&cpi, path);
2486 		if (cpi.ccb_h.status == CAM_REQ_CMP &&
2487 		    cpi.hba_vendor == PCI_VENDOR_INTEL &&
2488 		    cpi.hba_device == PCI_PRODUCT_PIIX4) {
2489 			(*veto)++;
2490 			if (bootverbose) {
2491 				xpt_print(path,
2492 				    "Disable ATA disks on "
2493 				    "simulated ATA controller (0x%04x%04x)\n",
2494 				    cpi.hba_device, cpi.hba_vendor);
2495 			}
2496 		}
2497 	}
2498 }
2499 
2500 static void
2501 storvsc_sysinit(void *arg __unused)
2502 {
2503 	if (vm_guest == VM_GUEST_HV) {
2504 		storvsc_handler_tag = EVENTHANDLER_REGISTER(ada_probe_veto,
2505 		    storvsc_ada_probe_veto, NULL, EVENTHANDLER_PRI_ANY);
2506 	}
2507 }
2508 SYSINIT(storvsc_sys_init, SI_SUB_DRIVERS, SI_ORDER_SECOND, storvsc_sysinit,
2509     NULL);
2510 
2511 static void
2512 storvsc_sysuninit(void *arg __unused)
2513 {
2514 	if (storvsc_handler_tag != NULL)
2515 		EVENTHANDLER_DEREGISTER(ada_probe_veto, storvsc_handler_tag);
2516 }
2517 SYSUNINIT(storvsc_sys_uninit, SI_SUB_DRIVERS, SI_ORDER_SECOND,
2518     storvsc_sysuninit, NULL);
2519