1 /*-
2  * Copyright (c) 2009-2012 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /**
30  * StorVSC driver for Hyper-V.  This driver presents a SCSI HBA interface
31  * to the Comman Access Method (CAM) layer.  CAM control blocks (CCBs) are
32  * converted into VSCSI protocol messages which are delivered to the parent
33  * partition StorVSP driver over the Hyper-V VMBUS.
34  */
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 #include <sys/param.h>
39 #include <sys/proc.h>
40 #include <sys/condvar.h>
41 #include <sys/time.h>
42 #include <sys/systm.h>
43 #include <sys/sockio.h>
44 #include <sys/mbuf.h>
45 #include <sys/malloc.h>
46 #include <sys/module.h>
47 #include <sys/kernel.h>
48 #include <sys/queue.h>
49 #include <sys/lock.h>
50 #include <sys/sx.h>
51 #include <sys/taskqueue.h>
52 #include <sys/bus.h>
53 #include <sys/mutex.h>
54 #include <sys/callout.h>
55 #include <vm/vm.h>
56 #include <vm/pmap.h>
57 #include <vm/uma.h>
58 #include <sys/lock.h>
59 #include <sys/sema.h>
60 #include <sys/sglist.h>
61 #include <machine/bus.h>
62 #include <sys/bus_dma.h>
63 
64 #include <cam/cam.h>
65 #include <cam/cam_ccb.h>
66 #include <cam/cam_periph.h>
67 #include <cam/cam_sim.h>
68 #include <cam/cam_xpt_sim.h>
69 #include <cam/cam_xpt_internal.h>
70 #include <cam/cam_debug.h>
71 #include <cam/scsi/scsi_all.h>
72 #include <cam/scsi/scsi_message.h>
73 
74 #include <dev/hyperv/include/hyperv.h>
75 #include "hv_vstorage.h"
76 
77 #define STORVSC_RINGBUFFER_SIZE		(20*PAGE_SIZE)
78 #define STORVSC_MAX_LUNS_PER_TARGET	(64)
79 #define STORVSC_MAX_IO_REQUESTS		(STORVSC_MAX_LUNS_PER_TARGET * 2)
80 #define BLKVSC_MAX_IDE_DISKS_PER_TARGET	(1)
81 #define BLKVSC_MAX_IO_REQUESTS		STORVSC_MAX_IO_REQUESTS
82 #define STORVSC_MAX_TARGETS		(2)
83 
84 #define STORVSC_WIN7_MAJOR 4
85 #define STORVSC_WIN7_MINOR 2
86 
87 #define STORVSC_WIN8_MAJOR 5
88 #define STORVSC_WIN8_MINOR 1
89 
90 #define VSTOR_PKT_SIZE	(sizeof(struct vstor_packet) - vmscsi_size_delta)
91 
92 #define HV_ALIGN(x, a) roundup2(x, a)
93 
94 struct storvsc_softc;
95 
96 struct hv_sgl_node {
97 	LIST_ENTRY(hv_sgl_node) link;
98 	struct sglist *sgl_data;
99 };
100 
101 struct hv_sgl_page_pool{
102 	LIST_HEAD(, hv_sgl_node) in_use_sgl_list;
103 	LIST_HEAD(, hv_sgl_node) free_sgl_list;
104 	boolean_t                is_init;
105 } g_hv_sgl_page_pool;
106 
107 #define STORVSC_MAX_SG_PAGE_CNT STORVSC_MAX_IO_REQUESTS * HV_MAX_MULTIPAGE_BUFFER_COUNT
108 
109 enum storvsc_request_type {
110 	WRITE_TYPE,
111 	READ_TYPE,
112 	UNKNOWN_TYPE
113 };
114 
115 struct hv_storvsc_request {
116 	LIST_ENTRY(hv_storvsc_request) link;
117 	struct vstor_packet	vstor_packet;
118 	hv_vmbus_multipage_buffer data_buf;
119 	void *sense_data;
120 	uint8_t sense_info_len;
121 	uint8_t retries;
122 	union ccb *ccb;
123 	struct storvsc_softc *softc;
124 	struct callout callout;
125 	struct sema synch_sema; /*Synchronize the request/response if needed */
126 	struct sglist *bounce_sgl;
127 	unsigned int bounce_sgl_count;
128 	uint64_t not_aligned_seg_bits;
129 };
130 
131 struct storvsc_softc {
132 	struct hv_device		*hs_dev;
133 	LIST_HEAD(, hv_storvsc_request)	hs_free_list;
134 	struct mtx			hs_lock;
135 	struct storvsc_driver_props	*hs_drv_props;
136 	int 				hs_unit;
137 	uint32_t			hs_frozen;
138 	struct cam_sim			*hs_sim;
139 	struct cam_path 		*hs_path;
140 	uint32_t			hs_num_out_reqs;
141 	boolean_t			hs_destroy;
142 	boolean_t			hs_drain_notify;
143 	boolean_t			hs_open_multi_channel;
144 	struct sema 			hs_drain_sema;
145 	struct hv_storvsc_request	hs_init_req;
146 	struct hv_storvsc_request	hs_reset_req;
147 };
148 
149 
150 /**
151  * HyperV storvsc timeout testing cases:
152  * a. IO returned after first timeout;
153  * b. IO returned after second timeout and queue freeze;
154  * c. IO returned while timer handler is running
155  * The first can be tested by "sg_senddiag -vv /dev/daX",
156  * and the second and third can be done by
157  * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX".
158  */
159 #define HVS_TIMEOUT_TEST 0
160 
161 /*
162  * Bus/adapter reset functionality on the Hyper-V host is
163  * buggy and it will be disabled until
164  * it can be further tested.
165  */
166 #define HVS_HOST_RESET 0
167 
168 struct storvsc_driver_props {
169 	char		*drv_name;
170 	char		*drv_desc;
171 	uint8_t		drv_max_luns_per_target;
172 	uint8_t		drv_max_ios_per_target;
173 	uint32_t	drv_ringbuffer_size;
174 };
175 
176 enum hv_storage_type {
177 	DRIVER_BLKVSC,
178 	DRIVER_STORVSC,
179 	DRIVER_UNKNOWN
180 };
181 
182 #define HS_MAX_ADAPTERS 10
183 
184 #define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1
185 
186 /* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */
187 static const hv_guid gStorVscDeviceType={
188 	.data = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
189 		 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f}
190 };
191 
192 /* {32412632-86cb-44a2-9b5c-50d1417354f5} */
193 static const hv_guid gBlkVscDeviceType={
194 	.data = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,
195 		 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5}
196 };
197 
198 static struct storvsc_driver_props g_drv_props_table[] = {
199 	{"blkvsc", "Hyper-V IDE Storage Interface",
200 	 BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS,
201 	 STORVSC_RINGBUFFER_SIZE},
202 	{"storvsc", "Hyper-V SCSI Storage Interface",
203 	 STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS,
204 	 STORVSC_RINGBUFFER_SIZE}
205 };
206 
207 /*
208  * Sense buffer size changed in win8; have a run-time
209  * variable to track the size we should use.
210  */
211 static int sense_buffer_size;
212 
213 /*
214  * The size of the vmscsi_request has changed in win8. The
215  * additional size is for the newly added elements in the
216  * structure. These elements are valid only when we are talking
217  * to a win8 host.
218  * Track the correct size we need to apply.
219  */
220 static int vmscsi_size_delta;
221 
222 static int storvsc_current_major;
223 static int storvsc_current_minor;
224 
225 /* static functions */
226 static int storvsc_probe(device_t dev);
227 static int storvsc_attach(device_t dev);
228 static int storvsc_detach(device_t dev);
229 static void storvsc_poll(struct cam_sim * sim);
230 static void storvsc_action(struct cam_sim * sim, union ccb * ccb);
231 static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
232 static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp);
233 static enum hv_storage_type storvsc_get_storage_type(device_t dev);
234 static void hv_storvsc_rescan_target(struct storvsc_softc *sc);
235 static void hv_storvsc_on_channel_callback(void *context);
236 static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc,
237 					struct vstor_packet *vstor_packet,
238 					struct hv_storvsc_request *request);
239 static int hv_storvsc_connect_vsp(struct hv_device *device);
240 static void storvsc_io_done(struct hv_storvsc_request *reqp);
241 static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
242 				bus_dma_segment_t *orig_sgl,
243 				unsigned int orig_sgl_count,
244 				uint64_t seg_bits);
245 void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
246 				unsigned int dest_sgl_count,
247 				struct sglist* src_sgl,
248 				uint64_t seg_bits);
249 
250 static device_method_t storvsc_methods[] = {
251 	/* Device interface */
252 	DEVMETHOD(device_probe,		storvsc_probe),
253 	DEVMETHOD(device_attach,	storvsc_attach),
254 	DEVMETHOD(device_detach,	storvsc_detach),
255 	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
256 	DEVMETHOD_END
257 };
258 
259 static driver_t storvsc_driver = {
260 	"storvsc", storvsc_methods, sizeof(struct storvsc_softc),
261 };
262 
263 static devclass_t storvsc_devclass;
264 DRIVER_MODULE(storvsc, vmbus, storvsc_driver, storvsc_devclass, 0, 0);
265 MODULE_VERSION(storvsc, 1);
266 MODULE_DEPEND(storvsc, vmbus, 1, 1, 1);
267 
268 
269 /**
270  * The host is capable of sending messages to us that are
271  * completely unsolicited. So, we need to address the race
272  * condition where we may be in the process of unloading the
273  * driver when the host may send us an unsolicited message.
274  * We address this issue by implementing a sequentially
275  * consistent protocol:
276  *
277  * 1. Channel callback is invoked while holding the the channel lock
278  *    and an unloading driver will reset the channel callback under
279  *    the protection of this channel lock.
280  *
281  * 2. To ensure bounded wait time for unloading a driver, we don't
282  *    permit outgoing traffic once the device is marked as being
283  *    destroyed.
284  *
285  * 3. Once the device is marked as being destroyed, we only
286  *    permit incoming traffic to properly account for
287  *    packets already sent out.
288  */
289 static inline struct storvsc_softc *
290 get_stor_device(struct hv_device *device,
291 				boolean_t outbound)
292 {
293 	struct storvsc_softc *sc;
294 
295 	sc = device_get_softc(device->device);
296 	if (sc == NULL) {
297 		return NULL;
298 	}
299 
300 	if (outbound) {
301 		/*
302 		 * Here we permit outgoing I/O only
303 		 * if the device is not being destroyed.
304 		 */
305 
306 		if (sc->hs_destroy) {
307 			sc = NULL;
308 		}
309 	} else {
310 		/*
311 		 * inbound case; if being destroyed
312 		 * only permit to account for
313 		 * messages already sent out.
314 		 */
315 		if (sc->hs_destroy && (sc->hs_num_out_reqs == 0)) {
316 			sc = NULL;
317 		}
318 	}
319 	return sc;
320 }
321 
322 /**
323  * @brief Callback handler, will be invoked when receive mutil-channel offer
324  *
325  * @param context  new multi-channel
326  */
327 static void
328 storvsc_handle_sc_creation(void *context)
329 {
330 	hv_vmbus_channel *new_channel;
331 	struct hv_device *device;
332 	struct storvsc_softc *sc;
333 	struct vmstor_chan_props props;
334 	int ret = 0;
335 
336 	new_channel = (hv_vmbus_channel *)context;
337 	device = new_channel->device;
338 	sc = get_stor_device(device, TRUE);
339 	if (sc == NULL)
340 		return;
341 
342 	if (FALSE == sc->hs_open_multi_channel)
343 		return;
344 
345 	memset(&props, 0, sizeof(props));
346 
347 	ret = hv_vmbus_channel_open(new_channel,
348 	    sc->hs_drv_props->drv_ringbuffer_size,
349   	    sc->hs_drv_props->drv_ringbuffer_size,
350 	    (void *)&props,
351 	    sizeof(struct vmstor_chan_props),
352 	    hv_storvsc_on_channel_callback,
353 	    new_channel);
354 
355 	return;
356 }
357 
358 /**
359  * @brief Send multi-channel creation request to host
360  *
361  * @param device  a Hyper-V device pointer
362  * @param max_chans  the max channels supported by vmbus
363  */
364 static void
365 storvsc_send_multichannel_request(struct hv_device *dev, int max_chans)
366 {
367 	struct storvsc_softc *sc;
368 	struct hv_storvsc_request *request;
369 	struct vstor_packet *vstor_packet;
370 	int request_channels_cnt = 0;
371 	int ret;
372 
373 	/* get multichannels count that need to create */
374 	request_channels_cnt = MIN(max_chans, mp_ncpus);
375 
376 	sc = get_stor_device(dev, TRUE);
377 	if (sc == NULL) {
378 		printf("Storvsc_error: get sc failed while send mutilchannel "
379 		    "request\n");
380 		return;
381 	}
382 
383 	request = &sc->hs_init_req;
384 
385 	/* Establish a handler for multi-channel */
386 	dev->channel->sc_creation_callback = storvsc_handle_sc_creation;
387 
388 	/* request the host to create multi-channel */
389 	memset(request, 0, sizeof(struct hv_storvsc_request));
390 
391 	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
392 
393 	vstor_packet = &request->vstor_packet;
394 
395 	vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS;
396 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
397 	vstor_packet->u.multi_channels_cnt = request_channels_cnt;
398 
399 	ret = hv_vmbus_channel_send_packet(
400 	    dev->channel,
401 	    vstor_packet,
402 	    VSTOR_PKT_SIZE,
403 	    (uint64_t)(uintptr_t)request,
404 	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
405 	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
406 
407 	/* wait for 5 seconds */
408 	ret = sema_timedwait(&request->synch_sema, 5 * hz);
409 	if (ret != 0) {
410 		printf("Storvsc_error: create multi-channel timeout, %d\n",
411 		    ret);
412 		return;
413 	}
414 
415 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
416 	    vstor_packet->status != 0) {
417 		printf("Storvsc_error: create multi-channel invalid operation "
418 		    "(%d) or statue (%u)\n",
419 		    vstor_packet->operation, vstor_packet->status);
420 		return;
421 	}
422 
423 	sc->hs_open_multi_channel = TRUE;
424 
425 	if (bootverbose)
426 		printf("Storvsc create multi-channel success!\n");
427 }
428 
429 /**
430  * @brief initialize channel connection to parent partition
431  *
432  * @param dev  a Hyper-V device pointer
433  * @returns  0 on success, non-zero error on failure
434  */
435 static int
436 hv_storvsc_channel_init(struct hv_device *dev)
437 {
438 	int ret = 0;
439 	struct hv_storvsc_request *request;
440 	struct vstor_packet *vstor_packet;
441 	struct storvsc_softc *sc;
442 	uint16_t max_chans = 0;
443 	boolean_t support_multichannel = FALSE;
444 
445 	max_chans = 0;
446 	support_multichannel = FALSE;
447 
448 	sc = get_stor_device(dev, TRUE);
449 	if (sc == NULL)
450 		return (ENODEV);
451 
452 	request = &sc->hs_init_req;
453 	memset(request, 0, sizeof(struct hv_storvsc_request));
454 	vstor_packet = &request->vstor_packet;
455 	request->softc = sc;
456 
457 	/**
458 	 * Initiate the vsc/vsp initialization protocol on the open channel
459 	 */
460 	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
461 
462 	vstor_packet->operation = VSTOR_OPERATION_BEGININITIALIZATION;
463 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
464 
465 
466 	ret = hv_vmbus_channel_send_packet(
467 			dev->channel,
468 			vstor_packet,
469 			VSTOR_PKT_SIZE,
470 			(uint64_t)(uintptr_t)request,
471 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
472 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
473 
474 	if (ret != 0)
475 		goto cleanup;
476 
477 	/* wait 5 seconds */
478 	ret = sema_timedwait(&request->synch_sema, 5 * hz);
479 	if (ret != 0)
480 		goto cleanup;
481 
482 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
483 		vstor_packet->status != 0) {
484 		goto cleanup;
485 	}
486 
487 	/* reuse the packet for version range supported */
488 
489 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
490 	vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION;
491 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
492 
493 	vstor_packet->u.version.major_minor =
494 	    VMSTOR_PROTOCOL_VERSION(storvsc_current_major, storvsc_current_minor);
495 
496 	/* revision is only significant for Windows guests */
497 	vstor_packet->u.version.revision = 0;
498 
499 	ret = hv_vmbus_channel_send_packet(
500 			dev->channel,
501 			vstor_packet,
502 			VSTOR_PKT_SIZE,
503 			(uint64_t)(uintptr_t)request,
504 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
505 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
506 
507 	if (ret != 0)
508 		goto cleanup;
509 
510 	/* wait 5 seconds */
511 	ret = sema_timedwait(&request->synch_sema, 5 * hz);
512 
513 	if (ret)
514 		goto cleanup;
515 
516 	/* TODO: Check returned version */
517 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
518 		vstor_packet->status != 0)
519 		goto cleanup;
520 
521 	/**
522 	 * Query channel properties
523 	 */
524 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
525 	vstor_packet->operation = VSTOR_OPERATION_QUERYPROPERTIES;
526 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
527 
528 	ret = hv_vmbus_channel_send_packet(
529 				dev->channel,
530 				vstor_packet,
531 				VSTOR_PKT_SIZE,
532 				(uint64_t)(uintptr_t)request,
533 				HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
534 				HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
535 
536 	if ( ret != 0)
537 		goto cleanup;
538 
539 	/* wait 5 seconds */
540 	ret = sema_timedwait(&request->synch_sema, 5 * hz);
541 
542 	if (ret != 0)
543 		goto cleanup;
544 
545 	/* TODO: Check returned version */
546 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
547 	    vstor_packet->status != 0) {
548 		goto cleanup;
549 	}
550 
551 	/* multi-channels feature is supported by WIN8 and above version */
552 	max_chans = vstor_packet->u.chan_props.max_channel_cnt;
553 	if ((hv_vmbus_protocal_version != HV_VMBUS_VERSION_WIN7) &&
554 	    (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) &&
555 	    (vstor_packet->u.chan_props.flags &
556 	     HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) {
557 		support_multichannel = TRUE;
558 	}
559 
560 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
561 	vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION;
562 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
563 
564 	ret = hv_vmbus_channel_send_packet(
565 			dev->channel,
566 			vstor_packet,
567 			VSTOR_PKT_SIZE,
568 			(uint64_t)(uintptr_t)request,
569 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
570 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
571 
572 	if (ret != 0) {
573 		goto cleanup;
574 	}
575 
576 	/* wait 5 seconds */
577 	ret = sema_timedwait(&request->synch_sema, 5 * hz);
578 
579 	if (ret != 0)
580 		goto cleanup;
581 
582 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
583 	    vstor_packet->status != 0)
584 		goto cleanup;
585 
586 	/*
587 	 * If multi-channel is supported, send multichannel create
588 	 * request to host.
589 	 */
590 	if (support_multichannel)
591 		storvsc_send_multichannel_request(dev, max_chans);
592 
593 cleanup:
594 	sema_destroy(&request->synch_sema);
595 	return (ret);
596 }
597 
598 /**
599  * @brief Open channel connection to paraent partition StorVSP driver
600  *
601  * Open and initialize channel connection to parent partition StorVSP driver.
602  *
603  * @param pointer to a Hyper-V device
604  * @returns 0 on success, non-zero error on failure
605  */
606 static int
607 hv_storvsc_connect_vsp(struct hv_device *dev)
608 {
609 	int ret = 0;
610 	struct vmstor_chan_props props;
611 	struct storvsc_softc *sc;
612 
613 	sc = device_get_softc(dev->device);
614 
615 	memset(&props, 0, sizeof(struct vmstor_chan_props));
616 
617 	/*
618 	 * Open the channel
619 	 */
620 
621 	ret = hv_vmbus_channel_open(
622 		dev->channel,
623 		sc->hs_drv_props->drv_ringbuffer_size,
624 		sc->hs_drv_props->drv_ringbuffer_size,
625 		(void *)&props,
626 		sizeof(struct vmstor_chan_props),
627 		hv_storvsc_on_channel_callback,
628 		dev->channel);
629 
630 	if (ret != 0) {
631 		return ret;
632 	}
633 
634 	ret = hv_storvsc_channel_init(dev);
635 
636 	return (ret);
637 }
638 
639 #if HVS_HOST_RESET
640 static int
641 hv_storvsc_host_reset(struct hv_device *dev)
642 {
643 	int ret = 0;
644 	struct storvsc_softc *sc;
645 
646 	struct hv_storvsc_request *request;
647 	struct vstor_packet *vstor_packet;
648 
649 	sc = get_stor_device(dev, TRUE);
650 	if (sc == NULL) {
651 		return ENODEV;
652 	}
653 
654 	request = &sc->hs_reset_req;
655 	request->softc = sc;
656 	vstor_packet = &request->vstor_packet;
657 
658 	sema_init(&request->synch_sema, 0, "stor synch sema");
659 
660 	vstor_packet->operation = VSTOR_OPERATION_RESETBUS;
661 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
662 
663 	ret = hv_vmbus_channel_send_packet(dev->channel,
664 			vstor_packet,
665 			VSTOR_PKT_SIZE,
666 			(uint64_t)(uintptr_t)&sc->hs_reset_req,
667 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
668 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
669 
670 	if (ret != 0) {
671 		goto cleanup;
672 	}
673 
674 	ret = sema_timedwait(&request->synch_sema, 5 * hz); /* KYS 5 seconds */
675 
676 	if (ret) {
677 		goto cleanup;
678 	}
679 
680 
681 	/*
682 	 * At this point, all outstanding requests in the adapter
683 	 * should have been flushed out and return to us
684 	 */
685 
686 cleanup:
687 	sema_destroy(&request->synch_sema);
688 	return (ret);
689 }
690 #endif /* HVS_HOST_RESET */
691 
692 /**
693  * @brief Function to initiate an I/O request
694  *
695  * @param device Hyper-V device pointer
696  * @param request pointer to a request structure
697  * @returns 0 on success, non-zero error on failure
698  */
699 static int
700 hv_storvsc_io_request(struct hv_device *device,
701 					  struct hv_storvsc_request *request)
702 {
703 	struct storvsc_softc *sc;
704 	struct vstor_packet *vstor_packet = &request->vstor_packet;
705 	struct hv_vmbus_channel* outgoing_channel = NULL;
706 	int ret = 0;
707 
708 	sc = get_stor_device(device, TRUE);
709 
710 	if (sc == NULL) {
711 		return ENODEV;
712 	}
713 
714 	vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
715 
716 	vstor_packet->u.vm_srb.length = VSTOR_PKT_SIZE;
717 
718 	vstor_packet->u.vm_srb.sense_info_len = sense_buffer_size;
719 
720 	vstor_packet->u.vm_srb.transfer_len = request->data_buf.length;
721 
722 	vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB;
723 
724 	outgoing_channel = vmbus_select_outgoing_channel(device->channel);
725 
726 	mtx_unlock(&request->softc->hs_lock);
727 	if (request->data_buf.length) {
728 		ret = hv_vmbus_channel_send_packet_multipagebuffer(
729 				outgoing_channel,
730 				&request->data_buf,
731 				vstor_packet,
732 				VSTOR_PKT_SIZE,
733 				(uint64_t)(uintptr_t)request);
734 
735 	} else {
736 		ret = hv_vmbus_channel_send_packet(
737 			outgoing_channel,
738 			vstor_packet,
739 			VSTOR_PKT_SIZE,
740 			(uint64_t)(uintptr_t)request,
741 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
742 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
743 	}
744 	mtx_lock(&request->softc->hs_lock);
745 
746 	if (ret != 0) {
747 		printf("Unable to send packet %p ret %d", vstor_packet, ret);
748 	} else {
749 		atomic_add_int(&sc->hs_num_out_reqs, 1);
750 	}
751 
752 	return (ret);
753 }
754 
755 
756 /**
757  * Process IO_COMPLETION_OPERATION and ready
758  * the result to be completed for upper layer
759  * processing by the CAM layer.
760  */
761 static void
762 hv_storvsc_on_iocompletion(struct storvsc_softc *sc,
763 			   struct vstor_packet *vstor_packet,
764 			   struct hv_storvsc_request *request)
765 {
766 	struct vmscsi_req *vm_srb;
767 
768 	vm_srb = &vstor_packet->u.vm_srb;
769 
770 	if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) &&
771 			(vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) {
772 		/* Autosense data available */
773 
774 		KASSERT(vm_srb->sense_info_len <= request->sense_info_len,
775 				("vm_srb->sense_info_len <= "
776 				 "request->sense_info_len"));
777 
778 		memcpy(request->sense_data, vm_srb->u.sense_data,
779 			vm_srb->sense_info_len);
780 
781 		request->sense_info_len = vm_srb->sense_info_len;
782 	}
783 
784 	/* Complete request by passing to the CAM layer */
785 	storvsc_io_done(request);
786 	atomic_subtract_int(&sc->hs_num_out_reqs, 1);
787 	if (sc->hs_drain_notify && (sc->hs_num_out_reqs == 0)) {
788 		sema_post(&sc->hs_drain_sema);
789 	}
790 }
791 
792 static void
793 hv_storvsc_rescan_target(struct storvsc_softc *sc)
794 {
795 	path_id_t pathid;
796 	target_id_t targetid;
797 	union ccb *ccb;
798 
799 	pathid = cam_sim_path(sc->hs_sim);
800 	targetid = CAM_TARGET_WILDCARD;
801 
802 	/*
803 	 * Allocate a CCB and schedule a rescan.
804 	 */
805 	ccb = xpt_alloc_ccb_nowait();
806 	if (ccb == NULL) {
807 		printf("unable to alloc CCB for rescan\n");
808 		return;
809 	}
810 
811 	if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, targetid,
812 	    CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
813 		printf("unable to create path for rescan, pathid: %u,"
814 		    "targetid: %u\n", pathid, targetid);
815 		xpt_free_ccb(ccb);
816 		return;
817 	}
818 
819 	if (targetid == CAM_TARGET_WILDCARD)
820 		ccb->ccb_h.func_code = XPT_SCAN_BUS;
821 	else
822 		ccb->ccb_h.func_code = XPT_SCAN_TGT;
823 
824 	xpt_rescan(ccb);
825 }
826 
827 static void
828 hv_storvsc_on_channel_callback(void *context)
829 {
830 	int ret = 0;
831 	hv_vmbus_channel *channel = (hv_vmbus_channel *)context;
832 	struct hv_device *device = NULL;
833 	struct storvsc_softc *sc;
834 	uint32_t bytes_recvd;
835 	uint64_t request_id;
836 	uint8_t packet[roundup2(sizeof(struct vstor_packet), 8)];
837 	struct hv_storvsc_request *request;
838 	struct vstor_packet *vstor_packet;
839 
840 	device = channel->device;
841 	KASSERT(device, ("device is NULL"));
842 
843 	sc = get_stor_device(device, FALSE);
844 	if (sc == NULL) {
845 		printf("Storvsc_error: get stor device failed.\n");
846 		return;
847 	}
848 
849 	ret = hv_vmbus_channel_recv_packet(
850 			channel,
851 			packet,
852 			roundup2(VSTOR_PKT_SIZE, 8),
853 			&bytes_recvd,
854 			&request_id);
855 
856 	while ((ret == 0) && (bytes_recvd > 0)) {
857 		request = (struct hv_storvsc_request *)(uintptr_t)request_id;
858 
859 		if ((request == &sc->hs_init_req) ||
860 			(request == &sc->hs_reset_req)) {
861 			memcpy(&request->vstor_packet, packet,
862 				   sizeof(struct vstor_packet));
863 			sema_post(&request->synch_sema);
864 		} else {
865 			vstor_packet = (struct vstor_packet *)packet;
866 			switch(vstor_packet->operation) {
867 			case VSTOR_OPERATION_COMPLETEIO:
868 				if (request == NULL)
869 					panic("VMBUS: storvsc received a "
870 					    "packet with NULL request id in "
871 					    "COMPLETEIO operation.");
872 
873 				hv_storvsc_on_iocompletion(sc,
874 							vstor_packet, request);
875 				break;
876 			case VSTOR_OPERATION_REMOVEDEVICE:
877 				printf("VMBUS: storvsc operation %d not "
878 				    "implemented.\n", vstor_packet->operation);
879 				/* TODO: implement */
880 				break;
881 			case VSTOR_OPERATION_ENUMERATE_BUS:
882 				hv_storvsc_rescan_target(sc);
883 				break;
884 			default:
885 				break;
886 			}
887 		}
888 		ret = hv_vmbus_channel_recv_packet(
889 				channel,
890 				packet,
891 				roundup2(VSTOR_PKT_SIZE, 8),
892 				&bytes_recvd,
893 				&request_id);
894 	}
895 }
896 
897 /**
898  * @brief StorVSC probe function
899  *
900  * Device probe function.  Returns 0 if the input device is a StorVSC
901  * device.  Otherwise, a ENXIO is returned.  If the input device is
902  * for BlkVSC (paravirtual IDE) device and this support is disabled in
903  * favor of the emulated ATA/IDE device, return ENXIO.
904  *
905  * @param a device
906  * @returns 0 on success, ENXIO if not a matcing StorVSC device
907  */
908 static int
909 storvsc_probe(device_t dev)
910 {
911 	int ata_disk_enable = 0;
912 	int ret	= ENXIO;
913 
914 	if (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008 ||
915 	    hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7) {
916 		sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE;
917 		vmscsi_size_delta = sizeof(struct vmscsi_win8_extension);
918 		storvsc_current_major = STORVSC_WIN7_MAJOR;
919 		storvsc_current_minor = STORVSC_WIN7_MINOR;
920 	} else {
921 		sense_buffer_size = POST_WIN7_STORVSC_SENSE_BUFFER_SIZE;
922 		vmscsi_size_delta = 0;
923 		storvsc_current_major = STORVSC_WIN8_MAJOR;
924 		storvsc_current_minor = STORVSC_WIN8_MINOR;
925 	}
926 
927 	switch (storvsc_get_storage_type(dev)) {
928 	case DRIVER_BLKVSC:
929 		if(bootverbose)
930 			device_printf(dev, "DRIVER_BLKVSC-Emulated ATA/IDE probe\n");
931 		if (!getenv_int("hw.ata.disk_enable", &ata_disk_enable)) {
932 			if(bootverbose)
933 				device_printf(dev,
934 					"Enlightened ATA/IDE detected\n");
935 			ret = BUS_PROBE_DEFAULT;
936 		} else if(bootverbose)
937 			device_printf(dev, "Emulated ATA/IDE set (hw.ata.disk_enable set)\n");
938 		break;
939 	case DRIVER_STORVSC:
940 		if(bootverbose)
941 			device_printf(dev, "Enlightened SCSI device detected\n");
942 		ret = BUS_PROBE_DEFAULT;
943 		break;
944 	default:
945 		ret = ENXIO;
946 	}
947 	return (ret);
948 }
949 
950 /**
951  * @brief StorVSC attach function
952  *
953  * Function responsible for allocating per-device structures,
954  * setting up CAM interfaces and scanning for available LUNs to
955  * be used for SCSI device peripherals.
956  *
957  * @param a device
958  * @returns 0 on success or an error on failure
959  */
960 static int
961 storvsc_attach(device_t dev)
962 {
963 	struct hv_device *hv_dev = vmbus_get_devctx(dev);
964 	enum hv_storage_type stor_type;
965 	struct storvsc_softc *sc;
966 	struct cam_devq *devq;
967 	int ret, i, j;
968 	struct hv_storvsc_request *reqp;
969 	struct root_hold_token *root_mount_token = NULL;
970 	struct hv_sgl_node *sgl_node = NULL;
971 	void *tmp_buff = NULL;
972 
973 	/*
974 	 * We need to serialize storvsc attach calls.
975 	 */
976 	root_mount_token = root_mount_hold("storvsc");
977 
978 	sc = device_get_softc(dev);
979 	if (sc == NULL) {
980 		ret = ENOMEM;
981 		goto cleanup;
982 	}
983 
984 	stor_type = storvsc_get_storage_type(dev);
985 
986 	if (stor_type == DRIVER_UNKNOWN) {
987 		ret = ENODEV;
988 		goto cleanup;
989 	}
990 
991 	bzero(sc, sizeof(struct storvsc_softc));
992 
993 	/* fill in driver specific properties */
994 	sc->hs_drv_props = &g_drv_props_table[stor_type];
995 
996 	/* fill in device specific properties */
997 	sc->hs_unit	= device_get_unit(dev);
998 	sc->hs_dev	= hv_dev;
999 	device_set_desc(dev, g_drv_props_table[stor_type].drv_desc);
1000 
1001 	LIST_INIT(&sc->hs_free_list);
1002 	mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF);
1003 
1004 	for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; ++i) {
1005 		reqp = malloc(sizeof(struct hv_storvsc_request),
1006 				 M_DEVBUF, M_WAITOK|M_ZERO);
1007 		reqp->softc = sc;
1008 
1009 		LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
1010 	}
1011 
1012 	/* create sg-list page pool */
1013 	if (FALSE == g_hv_sgl_page_pool.is_init) {
1014 		g_hv_sgl_page_pool.is_init = TRUE;
1015 		LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list);
1016 		LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list);
1017 
1018 		/*
1019 		 * Pre-create SG list, each SG list with
1020 		 * HV_MAX_MULTIPAGE_BUFFER_COUNT segments, each
1021 		 * segment has one page buffer
1022 		 */
1023 		for (i = 0; i < STORVSC_MAX_IO_REQUESTS; i++) {
1024 	        	sgl_node = malloc(sizeof(struct hv_sgl_node),
1025 			    M_DEVBUF, M_WAITOK|M_ZERO);
1026 
1027 			sgl_node->sgl_data =
1028 			    sglist_alloc(HV_MAX_MULTIPAGE_BUFFER_COUNT,
1029 			    M_WAITOK|M_ZERO);
1030 
1031 			for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) {
1032 				tmp_buff = malloc(PAGE_SIZE,
1033 				    M_DEVBUF, M_WAITOK|M_ZERO);
1034 
1035 				sgl_node->sgl_data->sg_segs[j].ss_paddr =
1036 				    (vm_paddr_t)tmp_buff;
1037 			}
1038 
1039 			LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list,
1040 			    sgl_node, link);
1041 		}
1042 	}
1043 
1044 	sc->hs_destroy = FALSE;
1045 	sc->hs_drain_notify = FALSE;
1046 	sc->hs_open_multi_channel = FALSE;
1047 	sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema");
1048 
1049 	ret = hv_storvsc_connect_vsp(hv_dev);
1050 	if (ret != 0) {
1051 		goto cleanup;
1052 	}
1053 
1054 	/*
1055 	 * Create the device queue.
1056 	 * Hyper-V maps each target to one SCSI HBA
1057 	 */
1058 	devq = cam_simq_alloc(sc->hs_drv_props->drv_max_ios_per_target);
1059 	if (devq == NULL) {
1060 		device_printf(dev, "Failed to alloc device queue\n");
1061 		ret = ENOMEM;
1062 		goto cleanup;
1063 	}
1064 
1065 	sc->hs_sim = cam_sim_alloc(storvsc_action,
1066 				storvsc_poll,
1067 				sc->hs_drv_props->drv_name,
1068 				sc,
1069 				sc->hs_unit,
1070 				&sc->hs_lock, 1,
1071 				sc->hs_drv_props->drv_max_ios_per_target,
1072 				devq);
1073 
1074 	if (sc->hs_sim == NULL) {
1075 		device_printf(dev, "Failed to alloc sim\n");
1076 		cam_simq_free(devq);
1077 		ret = ENOMEM;
1078 		goto cleanup;
1079 	}
1080 
1081 	mtx_lock(&sc->hs_lock);
1082 	/* bus_id is set to 0, need to get it from VMBUS channel query? */
1083 	if (xpt_bus_register(sc->hs_sim, dev, 0) != CAM_SUCCESS) {
1084 		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
1085 		mtx_unlock(&sc->hs_lock);
1086 		device_printf(dev, "Unable to register SCSI bus\n");
1087 		ret = ENXIO;
1088 		goto cleanup;
1089 	}
1090 
1091 	if (xpt_create_path(&sc->hs_path, /*periph*/NULL,
1092 		 cam_sim_path(sc->hs_sim),
1093 		CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
1094 		xpt_bus_deregister(cam_sim_path(sc->hs_sim));
1095 		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
1096 		mtx_unlock(&sc->hs_lock);
1097 		device_printf(dev, "Unable to create path\n");
1098 		ret = ENXIO;
1099 		goto cleanup;
1100 	}
1101 
1102 	mtx_unlock(&sc->hs_lock);
1103 
1104 	root_mount_rel(root_mount_token);
1105 	return (0);
1106 
1107 
1108 cleanup:
1109 	root_mount_rel(root_mount_token);
1110 	while (!LIST_EMPTY(&sc->hs_free_list)) {
1111 		reqp = LIST_FIRST(&sc->hs_free_list);
1112 		LIST_REMOVE(reqp, link);
1113 		free(reqp, M_DEVBUF);
1114 	}
1115 
1116 	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1117 		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1118 		LIST_REMOVE(sgl_node, link);
1119 		for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) {
1120 			if (NULL !=
1121 			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
1122 				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
1123 			}
1124 		}
1125 		sglist_free(sgl_node->sgl_data);
1126 		free(sgl_node, M_DEVBUF);
1127 	}
1128 
1129 	return (ret);
1130 }
1131 
1132 /**
1133  * @brief StorVSC device detach function
1134  *
1135  * This function is responsible for safely detaching a
1136  * StorVSC device.  This includes waiting for inbound responses
1137  * to complete and freeing associated per-device structures.
1138  *
1139  * @param dev a device
1140  * returns 0 on success
1141  */
1142 static int
1143 storvsc_detach(device_t dev)
1144 {
1145 	struct storvsc_softc *sc = device_get_softc(dev);
1146 	struct hv_storvsc_request *reqp = NULL;
1147 	struct hv_device *hv_device = vmbus_get_devctx(dev);
1148 	struct hv_sgl_node *sgl_node = NULL;
1149 	int j = 0;
1150 
1151 	sc->hs_destroy = TRUE;
1152 
1153 	/*
1154 	 * At this point, all outbound traffic should be disabled. We
1155 	 * only allow inbound traffic (responses) to proceed so that
1156 	 * outstanding requests can be completed.
1157 	 */
1158 
1159 	sc->hs_drain_notify = TRUE;
1160 	sema_wait(&sc->hs_drain_sema);
1161 	sc->hs_drain_notify = FALSE;
1162 
1163 	/*
1164 	 * Since we have already drained, we don't need to busy wait.
1165 	 * The call to close the channel will reset the callback
1166 	 * under the protection of the incoming channel lock.
1167 	 */
1168 
1169 	hv_vmbus_channel_close(hv_device->channel);
1170 
1171 	mtx_lock(&sc->hs_lock);
1172 	while (!LIST_EMPTY(&sc->hs_free_list)) {
1173 		reqp = LIST_FIRST(&sc->hs_free_list);
1174 		LIST_REMOVE(reqp, link);
1175 
1176 		free(reqp, M_DEVBUF);
1177 	}
1178 	mtx_unlock(&sc->hs_lock);
1179 
1180 	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1181 		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1182 		LIST_REMOVE(sgl_node, link);
1183 		for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++){
1184 			if (NULL !=
1185 			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
1186 				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
1187 			}
1188 		}
1189 		sglist_free(sgl_node->sgl_data);
1190 		free(sgl_node, M_DEVBUF);
1191 	}
1192 
1193 	return (0);
1194 }
1195 
1196 #if HVS_TIMEOUT_TEST
1197 /**
1198  * @brief unit test for timed out operations
1199  *
1200  * This function provides unit testing capability to simulate
1201  * timed out operations.  Recompilation with HV_TIMEOUT_TEST=1
1202  * is required.
1203  *
1204  * @param reqp pointer to a request structure
1205  * @param opcode SCSI operation being performed
1206  * @param wait if 1, wait for I/O to complete
1207  */
1208 static void
1209 storvsc_timeout_test(struct hv_storvsc_request *reqp,
1210 		uint8_t opcode, int wait)
1211 {
1212 	int ret;
1213 	union ccb *ccb = reqp->ccb;
1214 	struct storvsc_softc *sc = reqp->softc;
1215 
1216 	if (reqp->vstor_packet.vm_srb.cdb[0] != opcode) {
1217 		return;
1218 	}
1219 
1220 	if (wait) {
1221 		mtx_lock(&reqp->event.mtx);
1222 	}
1223 	ret = hv_storvsc_io_request(sc->hs_dev, reqp);
1224 	if (ret != 0) {
1225 		if (wait) {
1226 			mtx_unlock(&reqp->event.mtx);
1227 		}
1228 		printf("%s: io_request failed with %d.\n",
1229 				__func__, ret);
1230 		ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1231 		mtx_lock(&sc->hs_lock);
1232 		storvsc_free_request(sc, reqp);
1233 		xpt_done(ccb);
1234 		mtx_unlock(&sc->hs_lock);
1235 		return;
1236 	}
1237 
1238 	if (wait) {
1239 		xpt_print(ccb->ccb_h.path,
1240 				"%u: %s: waiting for IO return.\n",
1241 				ticks, __func__);
1242 		ret = cv_timedwait(&reqp->event.cv, &reqp->event.mtx, 60*hz);
1243 		mtx_unlock(&reqp->event.mtx);
1244 		xpt_print(ccb->ccb_h.path, "%u: %s: %s.\n",
1245 				ticks, __func__, (ret == 0)?
1246 				"IO return detected" :
1247 				"IO return not detected");
1248 		/*
1249 		 * Now both the timer handler and io done are running
1250 		 * simultaneously. We want to confirm the io done always
1251 		 * finishes after the timer handler exits. So reqp used by
1252 		 * timer handler is not freed or stale. Do busy loop for
1253 		 * another 1/10 second to make sure io done does
1254 		 * wait for the timer handler to complete.
1255 		 */
1256 		DELAY(100*1000);
1257 		mtx_lock(&sc->hs_lock);
1258 		xpt_print(ccb->ccb_h.path,
1259 				"%u: %s: finishing, queue frozen %d, "
1260 				"ccb status 0x%x scsi_status 0x%x.\n",
1261 				ticks, __func__, sc->hs_frozen,
1262 				ccb->ccb_h.status,
1263 				ccb->csio.scsi_status);
1264 		mtx_unlock(&sc->hs_lock);
1265 	}
1266 }
1267 #endif /* HVS_TIMEOUT_TEST */
1268 
1269 /**
1270  * @brief timeout handler for requests
1271  *
1272  * This function is called as a result of a callout expiring.
1273  *
1274  * @param arg pointer to a request
1275  */
1276 static void
1277 storvsc_timeout(void *arg)
1278 {
1279 	struct hv_storvsc_request *reqp = arg;
1280 	struct storvsc_softc *sc = reqp->softc;
1281 	union ccb *ccb = reqp->ccb;
1282 
1283 	if (reqp->retries == 0) {
1284 		mtx_lock(&sc->hs_lock);
1285 		xpt_print(ccb->ccb_h.path,
1286 		    "%u: IO timed out (req=0x%p), wait for another %u secs.\n",
1287 		    ticks, reqp, ccb->ccb_h.timeout / 1000);
1288 		cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL);
1289 		mtx_unlock(&sc->hs_lock);
1290 
1291 		reqp->retries++;
1292 		callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout,
1293 		    0, storvsc_timeout, reqp, 0);
1294 #if HVS_TIMEOUT_TEST
1295 		storvsc_timeout_test(reqp, SEND_DIAGNOSTIC, 0);
1296 #endif
1297 		return;
1298 	}
1299 
1300 	mtx_lock(&sc->hs_lock);
1301 	xpt_print(ccb->ccb_h.path,
1302 		"%u: IO (reqp = 0x%p) did not return for %u seconds, %s.\n",
1303 		ticks, reqp, ccb->ccb_h.timeout * (reqp->retries+1) / 1000,
1304 		(sc->hs_frozen == 0)?
1305 		"freezing the queue" : "the queue is already frozen");
1306 	if (sc->hs_frozen == 0) {
1307 		sc->hs_frozen = 1;
1308 		xpt_freeze_simq(xpt_path_sim(ccb->ccb_h.path), 1);
1309 	}
1310 	mtx_unlock(&sc->hs_lock);
1311 
1312 #if HVS_TIMEOUT_TEST
1313 	storvsc_timeout_test(reqp, MODE_SELECT_10, 1);
1314 #endif
1315 }
1316 
1317 /**
1318  * @brief StorVSC device poll function
1319  *
1320  * This function is responsible for servicing requests when
1321  * interrupts are disabled (i.e when we are dumping core.)
1322  *
1323  * @param sim a pointer to a CAM SCSI interface module
1324  */
1325 static void
1326 storvsc_poll(struct cam_sim *sim)
1327 {
1328 	struct storvsc_softc *sc = cam_sim_softc(sim);
1329 
1330 	mtx_assert(&sc->hs_lock, MA_OWNED);
1331 	mtx_unlock(&sc->hs_lock);
1332 	hv_storvsc_on_channel_callback(sc->hs_dev->channel);
1333 	mtx_lock(&sc->hs_lock);
1334 }
1335 
1336 /**
1337  * @brief StorVSC device action function
1338  *
1339  * This function is responsible for handling SCSI operations which
1340  * are passed from the CAM layer.  The requests are in the form of
1341  * CAM control blocks which indicate the action being performed.
1342  * Not all actions require converting the request to a VSCSI protocol
1343  * message - these actions can be responded to by this driver.
1344  * Requests which are destined for a backend storage device are converted
1345  * to a VSCSI protocol message and sent on the channel connection associated
1346  * with this device.
1347  *
1348  * @param sim pointer to a CAM SCSI interface module
1349  * @param ccb pointer to a CAM control block
1350  */
1351 static void
1352 storvsc_action(struct cam_sim *sim, union ccb *ccb)
1353 {
1354 	struct storvsc_softc *sc = cam_sim_softc(sim);
1355 	int res;
1356 
1357 	mtx_assert(&sc->hs_lock, MA_OWNED);
1358 	switch (ccb->ccb_h.func_code) {
1359 	case XPT_PATH_INQ: {
1360 		struct ccb_pathinq *cpi = &ccb->cpi;
1361 
1362 		cpi->version_num = 1;
1363 		cpi->hba_inquiry = PI_TAG_ABLE|PI_SDTR_ABLE;
1364 		cpi->target_sprt = 0;
1365 		cpi->hba_misc = PIM_NOBUSRESET;
1366 		cpi->hba_eng_cnt = 0;
1367 		cpi->max_target = STORVSC_MAX_TARGETS;
1368 		cpi->max_lun = sc->hs_drv_props->drv_max_luns_per_target;
1369 		cpi->initiator_id = cpi->max_target;
1370 		cpi->bus_id = cam_sim_bus(sim);
1371 		cpi->base_transfer_speed = 300000;
1372 		cpi->transport = XPORT_SAS;
1373 		cpi->transport_version = 0;
1374 		cpi->protocol = PROTO_SCSI;
1375 		cpi->protocol_version = SCSI_REV_SPC2;
1376 		strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
1377 		strncpy(cpi->hba_vid, sc->hs_drv_props->drv_name, HBA_IDLEN);
1378 		strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
1379 		cpi->unit_number = cam_sim_unit(sim);
1380 
1381 		ccb->ccb_h.status = CAM_REQ_CMP;
1382 		xpt_done(ccb);
1383 		return;
1384 	}
1385 	case XPT_GET_TRAN_SETTINGS: {
1386 		struct  ccb_trans_settings *cts = &ccb->cts;
1387 
1388 		cts->transport = XPORT_SAS;
1389 		cts->transport_version = 0;
1390 		cts->protocol = PROTO_SCSI;
1391 		cts->protocol_version = SCSI_REV_SPC2;
1392 
1393 		/* enable tag queuing and disconnected mode */
1394 		cts->proto_specific.valid = CTS_SCSI_VALID_TQ;
1395 		cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
1396 		cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
1397 		cts->xport_specific.valid = CTS_SPI_VALID_DISC;
1398 		cts->xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB;
1399 
1400 		ccb->ccb_h.status = CAM_REQ_CMP;
1401 		xpt_done(ccb);
1402 		return;
1403 	}
1404 	case XPT_SET_TRAN_SETTINGS:	{
1405 		ccb->ccb_h.status = CAM_REQ_CMP;
1406 		xpt_done(ccb);
1407 		return;
1408 	}
1409 	case XPT_CALC_GEOMETRY:{
1410 		cam_calc_geometry(&ccb->ccg, 1);
1411 		xpt_done(ccb);
1412 		return;
1413 	}
1414 	case  XPT_RESET_BUS:
1415 	case  XPT_RESET_DEV:{
1416 #if HVS_HOST_RESET
1417 		if ((res = hv_storvsc_host_reset(sc->hs_dev)) != 0) {
1418 			xpt_print(ccb->ccb_h.path,
1419 				"hv_storvsc_host_reset failed with %d\n", res);
1420 			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1421 			xpt_done(ccb);
1422 			return;
1423 		}
1424 		ccb->ccb_h.status = CAM_REQ_CMP;
1425 		xpt_done(ccb);
1426 		return;
1427 #else
1428 		xpt_print(ccb->ccb_h.path,
1429 				  "%s reset not supported.\n",
1430 				  (ccb->ccb_h.func_code == XPT_RESET_BUS)?
1431 				  "bus" : "dev");
1432 		ccb->ccb_h.status = CAM_REQ_INVALID;
1433 		xpt_done(ccb);
1434 		return;
1435 #endif	/* HVS_HOST_RESET */
1436 	}
1437 	case XPT_SCSI_IO:
1438 	case XPT_IMMED_NOTIFY: {
1439 		struct hv_storvsc_request *reqp = NULL;
1440 
1441 		if (ccb->csio.cdb_len == 0) {
1442 			panic("cdl_len is 0\n");
1443 		}
1444 
1445 		if (LIST_EMPTY(&sc->hs_free_list)) {
1446 			ccb->ccb_h.status = CAM_REQUEUE_REQ;
1447 			if (sc->hs_frozen == 0) {
1448 				sc->hs_frozen = 1;
1449 				xpt_freeze_simq(sim, /* count*/1);
1450 			}
1451 			xpt_done(ccb);
1452 			return;
1453 		}
1454 
1455 		reqp = LIST_FIRST(&sc->hs_free_list);
1456 		LIST_REMOVE(reqp, link);
1457 
1458 		bzero(reqp, sizeof(struct hv_storvsc_request));
1459 		reqp->softc = sc;
1460 
1461 		ccb->ccb_h.status |= CAM_SIM_QUEUED;
1462 		if ((res = create_storvsc_request(ccb, reqp)) != 0) {
1463 			ccb->ccb_h.status = CAM_REQ_INVALID;
1464 			xpt_done(ccb);
1465 			return;
1466 		}
1467 
1468 		if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
1469 			callout_init(&reqp->callout, 1);
1470 			callout_reset_sbt(&reqp->callout,
1471 			    SBT_1MS * ccb->ccb_h.timeout, 0,
1472 			    storvsc_timeout, reqp, 0);
1473 #if HVS_TIMEOUT_TEST
1474 			cv_init(&reqp->event.cv, "storvsc timeout cv");
1475 			mtx_init(&reqp->event.mtx, "storvsc timeout mutex",
1476 					NULL, MTX_DEF);
1477 			switch (reqp->vstor_packet.vm_srb.cdb[0]) {
1478 				case MODE_SELECT_10:
1479 				case SEND_DIAGNOSTIC:
1480 					/* To have timer send the request. */
1481 					return;
1482 				default:
1483 					break;
1484 			}
1485 #endif /* HVS_TIMEOUT_TEST */
1486 		}
1487 
1488 		if ((res = hv_storvsc_io_request(sc->hs_dev, reqp)) != 0) {
1489 			xpt_print(ccb->ccb_h.path,
1490 				"hv_storvsc_io_request failed with %d\n", res);
1491 			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1492 			storvsc_free_request(sc, reqp);
1493 			xpt_done(ccb);
1494 			return;
1495 		}
1496 		return;
1497 	}
1498 
1499 	default:
1500 		ccb->ccb_h.status = CAM_REQ_INVALID;
1501 		xpt_done(ccb);
1502 		return;
1503 	}
1504 }
1505 
1506 /**
1507  * @brief destroy bounce buffer
1508  *
1509  * This function is responsible for destroy a Scatter/Gather list
1510  * that create by storvsc_create_bounce_buffer()
1511  *
1512  * @param sgl- the Scatter/Gather need be destroy
1513  * @param sg_count- page count of the SG list.
1514  *
1515  */
1516 static void
1517 storvsc_destroy_bounce_buffer(struct sglist *sgl)
1518 {
1519 	struct hv_sgl_node *sgl_node = NULL;
1520 	if (LIST_EMPTY(&g_hv_sgl_page_pool.in_use_sgl_list)) {
1521 		printf("storvsc error: not enough in use sgl\n");
1522 		return;
1523 	}
1524 	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list);
1525 	LIST_REMOVE(sgl_node, link);
1526 	sgl_node->sgl_data = sgl;
1527 	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link);
1528 }
1529 
1530 /**
1531  * @brief create bounce buffer
1532  *
1533  * This function is responsible for create a Scatter/Gather list,
1534  * which hold several pages that can be aligned with page size.
1535  *
1536  * @param seg_count- SG-list segments count
1537  * @param write - if WRITE_TYPE, set SG list page used size to 0,
1538  * otherwise set used size to page size.
1539  *
1540  * return NULL if create failed
1541  */
1542 static struct sglist *
1543 storvsc_create_bounce_buffer(uint16_t seg_count, int write)
1544 {
1545 	int i = 0;
1546 	struct sglist *bounce_sgl = NULL;
1547 	unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE);
1548 	struct hv_sgl_node *sgl_node = NULL;
1549 
1550 	/* get struct sglist from free_sgl_list */
1551 	if (LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1552 		printf("storvsc error: not enough free sgl\n");
1553 		return NULL;
1554 	}
1555 	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1556 	LIST_REMOVE(sgl_node, link);
1557 	bounce_sgl = sgl_node->sgl_data;
1558 	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link);
1559 
1560 	bounce_sgl->sg_maxseg = seg_count;
1561 
1562 	if (write == WRITE_TYPE)
1563 		bounce_sgl->sg_nseg = 0;
1564 	else
1565 		bounce_sgl->sg_nseg = seg_count;
1566 
1567 	for (i = 0; i < seg_count; i++)
1568 	        bounce_sgl->sg_segs[i].ss_len = buf_len;
1569 
1570 	return bounce_sgl;
1571 }
1572 
1573 /**
1574  * @brief copy data from SG list to bounce buffer
1575  *
1576  * This function is responsible for copy data from one SG list's segments
1577  * to another SG list which used as bounce buffer.
1578  *
1579  * @param bounce_sgl - the destination SG list
1580  * @param orig_sgl - the segment of the source SG list.
1581  * @param orig_sgl_count - the count of segments.
1582  * @param orig_sgl_count - indicate which segment need bounce buffer,
1583  *  set 1 means need.
1584  *
1585  */
1586 static void
1587 storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
1588 			       bus_dma_segment_t *orig_sgl,
1589 			       unsigned int orig_sgl_count,
1590 			       uint64_t seg_bits)
1591 {
1592 	int src_sgl_idx = 0;
1593 
1594 	for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) {
1595 		if (seg_bits & (1 << src_sgl_idx)) {
1596 			memcpy((void*)bounce_sgl->sg_segs[src_sgl_idx].ss_paddr,
1597 			    (void*)orig_sgl[src_sgl_idx].ds_addr,
1598 			    orig_sgl[src_sgl_idx].ds_len);
1599 
1600 			bounce_sgl->sg_segs[src_sgl_idx].ss_len =
1601 			    orig_sgl[src_sgl_idx].ds_len;
1602 		}
1603 	}
1604 }
1605 
1606 /**
1607  * @brief copy data from SG list which used as bounce to another SG list
1608  *
1609  * This function is responsible for copy data from one SG list with bounce
1610  * buffer to another SG list's segments.
1611  *
1612  * @param dest_sgl - the destination SG list's segments
1613  * @param dest_sgl_count - the count of destination SG list's segment.
1614  * @param src_sgl - the source SG list.
1615  * @param seg_bits - indicate which segment used bounce buffer of src SG-list.
1616  *
1617  */
1618 void
1619 storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
1620 				    unsigned int dest_sgl_count,
1621 				    struct sglist* src_sgl,
1622 				    uint64_t seg_bits)
1623 {
1624 	int sgl_idx = 0;
1625 
1626 	for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) {
1627 		if (seg_bits & (1 << sgl_idx)) {
1628 			memcpy((void*)(dest_sgl[sgl_idx].ds_addr),
1629 			    (void*)(src_sgl->sg_segs[sgl_idx].ss_paddr),
1630 			    src_sgl->sg_segs[sgl_idx].ss_len);
1631 		}
1632 	}
1633 }
1634 
1635 /**
1636  * @brief check SG list with bounce buffer or not
1637  *
1638  * This function is responsible for check if need bounce buffer for SG list.
1639  *
1640  * @param sgl - the SG list's segments
1641  * @param sg_count - the count of SG list's segment.
1642  * @param bits - segmengs number that need bounce buffer
1643  *
1644  * return -1 if SG list needless bounce buffer
1645  */
1646 static int
1647 storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl,
1648 				unsigned int sg_count,
1649 				uint64_t *bits)
1650 {
1651 	int i = 0;
1652 	int offset = 0;
1653 	uint64_t phys_addr = 0;
1654 	uint64_t tmp_bits = 0;
1655 	boolean_t found_hole = FALSE;
1656 	boolean_t pre_aligned = TRUE;
1657 
1658 	if (sg_count < 2){
1659 		return -1;
1660 	}
1661 
1662 	*bits = 0;
1663 
1664 	phys_addr = vtophys(sgl[0].ds_addr);
1665 	offset =  phys_addr - trunc_page(phys_addr);
1666 
1667 	if (offset != 0) {
1668 		pre_aligned = FALSE;
1669 		tmp_bits |= 1;
1670 	}
1671 
1672 	for (i = 1; i < sg_count; i++) {
1673 		phys_addr = vtophys(sgl[i].ds_addr);
1674 		offset =  phys_addr - trunc_page(phys_addr);
1675 
1676 		if (offset == 0) {
1677 			if (FALSE == pre_aligned){
1678 				/*
1679 				 * This segment is aligned, if the previous
1680 				 * one is not aligned, find a hole
1681 				 */
1682 				found_hole = TRUE;
1683 			}
1684 			pre_aligned = TRUE;
1685 		} else {
1686 			tmp_bits |= 1 << i;
1687 			if (!pre_aligned) {
1688 				if (phys_addr != vtophys(sgl[i-1].ds_addr +
1689 				    sgl[i-1].ds_len)) {
1690 					/*
1691 					 * Check whether connect to previous
1692 					 * segment,if not, find the hole
1693 					 */
1694 					found_hole = TRUE;
1695 				}
1696 			} else {
1697 				found_hole = TRUE;
1698 			}
1699 			pre_aligned = FALSE;
1700 		}
1701 	}
1702 
1703 	if (!found_hole) {
1704 		return (-1);
1705 	} else {
1706 		*bits = tmp_bits;
1707 		return 0;
1708 	}
1709 }
1710 
1711 /**
1712  * @brief Fill in a request structure based on a CAM control block
1713  *
1714  * Fills in a request structure based on the contents of a CAM control
1715  * block.  The request structure holds the payload information for
1716  * VSCSI protocol request.
1717  *
1718  * @param ccb pointer to a CAM contorl block
1719  * @param reqp pointer to a request structure
1720  */
1721 static int
1722 create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
1723 {
1724 	struct ccb_scsiio *csio = &ccb->csio;
1725 	uint64_t phys_addr;
1726 	uint32_t bytes_to_copy = 0;
1727 	uint32_t pfn_num = 0;
1728 	uint32_t pfn;
1729 	uint64_t not_aligned_seg_bits = 0;
1730 
1731 	/* refer to struct vmscsi_req for meanings of these two fields */
1732 	reqp->vstor_packet.u.vm_srb.port =
1733 		cam_sim_unit(xpt_path_sim(ccb->ccb_h.path));
1734 	reqp->vstor_packet.u.vm_srb.path_id =
1735 		cam_sim_bus(xpt_path_sim(ccb->ccb_h.path));
1736 
1737 	reqp->vstor_packet.u.vm_srb.target_id = ccb->ccb_h.target_id;
1738 	reqp->vstor_packet.u.vm_srb.lun = ccb->ccb_h.target_lun;
1739 
1740 	reqp->vstor_packet.u.vm_srb.cdb_len = csio->cdb_len;
1741 	if(ccb->ccb_h.flags & CAM_CDB_POINTER) {
1742 		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_ptr,
1743 			csio->cdb_len);
1744 	} else {
1745 		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_bytes,
1746 			csio->cdb_len);
1747 	}
1748 
1749 	switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
1750 	case CAM_DIR_OUT:
1751 		reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;
1752 		break;
1753 	case CAM_DIR_IN:
1754 		reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
1755 		break;
1756 	case CAM_DIR_NONE:
1757 		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
1758 		break;
1759 	default:
1760 		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
1761 		break;
1762 	}
1763 
1764 	reqp->sense_data     = &csio->sense_data;
1765 	reqp->sense_info_len = csio->sense_len;
1766 
1767 	reqp->ccb = ccb;
1768 
1769 	if (0 == csio->dxfer_len) {
1770 		return (0);
1771 	}
1772 
1773 	reqp->data_buf.length = csio->dxfer_len;
1774 
1775 	switch (ccb->ccb_h.flags & CAM_DATA_MASK) {
1776 	case CAM_DATA_VADDR:
1777 	{
1778 		bytes_to_copy = csio->dxfer_len;
1779 		phys_addr = vtophys(csio->data_ptr);
1780 		reqp->data_buf.offset = phys_addr & PAGE_MASK;
1781 
1782 		while (bytes_to_copy != 0) {
1783 			int bytes, page_offset;
1784 			phys_addr =
1785 			    vtophys(&csio->data_ptr[reqp->data_buf.length -
1786 			    bytes_to_copy]);
1787 			pfn = phys_addr >> PAGE_SHIFT;
1788 			reqp->data_buf.pfn_array[pfn_num] = pfn;
1789 			page_offset = phys_addr & PAGE_MASK;
1790 
1791 			bytes = min(PAGE_SIZE - page_offset, bytes_to_copy);
1792 
1793 			bytes_to_copy -= bytes;
1794 			pfn_num++;
1795 		}
1796 		break;
1797 	}
1798 
1799 	case CAM_DATA_SG:
1800 	{
1801 		int i = 0;
1802 		int offset = 0;
1803 		int ret;
1804 
1805 		bus_dma_segment_t *storvsc_sglist =
1806 		    (bus_dma_segment_t *)ccb->csio.data_ptr;
1807 		u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt;
1808 
1809 		printf("Storvsc: get SG I/O operation, %d\n",
1810 		    reqp->vstor_packet.u.vm_srb.data_in);
1811 
1812 		if (storvsc_sg_count > HV_MAX_MULTIPAGE_BUFFER_COUNT){
1813 			printf("Storvsc: %d segments is too much, "
1814 			    "only support %d segments\n",
1815 			    storvsc_sg_count, HV_MAX_MULTIPAGE_BUFFER_COUNT);
1816 			return (EINVAL);
1817 		}
1818 
1819 		/*
1820 		 * We create our own bounce buffer function currently. Idealy
1821 		 * we should use BUS_DMA(9) framework. But with current BUS_DMA
1822 		 * code there is no callback API to check the page alignment of
1823 		 * middle segments before busdma can decide if a bounce buffer
1824 		 * is needed for particular segment. There is callback,
1825 		 * "bus_dma_filter_t *filter", but the parrameters are not
1826 		 * sufficient for storvsc driver.
1827 		 * TODO:
1828 		 *	Add page alignment check in BUS_DMA(9) callback. Once
1829 		 *	this is complete, switch the following code to use
1830 		 *	BUS_DMA(9) for storvsc bounce buffer support.
1831 		 */
1832 		/* check if we need to create bounce buffer */
1833 		ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist,
1834 		    storvsc_sg_count, &not_aligned_seg_bits);
1835 		if (ret != -1) {
1836 			reqp->bounce_sgl =
1837 			    storvsc_create_bounce_buffer(storvsc_sg_count,
1838 			    reqp->vstor_packet.u.vm_srb.data_in);
1839 			if (NULL == reqp->bounce_sgl) {
1840 				printf("Storvsc_error: "
1841 				    "create bounce buffer failed.\n");
1842 				return (ENOMEM);
1843 			}
1844 
1845 			reqp->bounce_sgl_count = storvsc_sg_count;
1846 			reqp->not_aligned_seg_bits = not_aligned_seg_bits;
1847 
1848 			/*
1849 			 * if it is write, we need copy the original data
1850 			 *to bounce buffer
1851 			 */
1852 			if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
1853 				storvsc_copy_sgl_to_bounce_buf(
1854 				    reqp->bounce_sgl,
1855 				    storvsc_sglist,
1856 				    storvsc_sg_count,
1857 				    reqp->not_aligned_seg_bits);
1858 			}
1859 
1860 			/* transfer virtual address to physical frame number */
1861 			if (reqp->not_aligned_seg_bits & 0x1){
1862  				phys_addr =
1863 				    vtophys(reqp->bounce_sgl->sg_segs[0].ss_paddr);
1864 			}else{
1865  				phys_addr =
1866 					vtophys(storvsc_sglist[0].ds_addr);
1867 			}
1868 			reqp->data_buf.offset = phys_addr & PAGE_MASK;
1869 
1870 			pfn = phys_addr >> PAGE_SHIFT;
1871 			reqp->data_buf.pfn_array[0] = pfn;
1872 
1873 			for (i = 1; i < storvsc_sg_count; i++) {
1874 				if (reqp->not_aligned_seg_bits & (1 << i)) {
1875 					phys_addr =
1876 					    vtophys(reqp->bounce_sgl->sg_segs[i].ss_paddr);
1877 				} else {
1878 					phys_addr =
1879 					    vtophys(storvsc_sglist[i].ds_addr);
1880 				}
1881 
1882 				pfn = phys_addr >> PAGE_SHIFT;
1883 				reqp->data_buf.pfn_array[i] = pfn;
1884 			}
1885 		} else {
1886 			phys_addr = vtophys(storvsc_sglist[0].ds_addr);
1887 
1888 			reqp->data_buf.offset = phys_addr & PAGE_MASK;
1889 
1890 			for (i = 0; i < storvsc_sg_count; i++) {
1891 				phys_addr = vtophys(storvsc_sglist[i].ds_addr);
1892 				pfn = phys_addr >> PAGE_SHIFT;
1893 				reqp->data_buf.pfn_array[i] = pfn;
1894 			}
1895 
1896 			/* check the last segment cross boundary or not */
1897 			offset = phys_addr & PAGE_MASK;
1898 			if (offset) {
1899 				phys_addr =
1900 				    vtophys(storvsc_sglist[i-1].ds_addr +
1901 				    PAGE_SIZE - offset);
1902 				pfn = phys_addr >> PAGE_SHIFT;
1903 				reqp->data_buf.pfn_array[i] = pfn;
1904 			}
1905 
1906 			reqp->bounce_sgl_count = 0;
1907 		}
1908 		break;
1909 	}
1910 	default:
1911 		printf("Unknow flags: %d\n", ccb->ccb_h.flags);
1912 		return(EINVAL);
1913 	}
1914 
1915 	return(0);
1916 }
1917 
1918 /*
1919  * Modified based on scsi_print_inquiry which is responsible to
1920  * print the detail information for scsi_inquiry_data.
1921  *
1922  * Return 1 if it is valid, 0 otherwise.
1923  */
1924 static inline int
1925 is_inquiry_valid(const struct scsi_inquiry_data *inq_data)
1926 {
1927 	uint8_t type;
1928 	char vendor[16], product[48], revision[16];
1929 
1930 	/*
1931 	 * Check device type and qualifier
1932 	 */
1933 	if (!(SID_QUAL_IS_VENDOR_UNIQUE(inq_data) ||
1934 	    SID_QUAL(inq_data) == SID_QUAL_LU_CONNECTED))
1935 		return (0);
1936 
1937 	type = SID_TYPE(inq_data);
1938 	switch (type) {
1939 	case T_DIRECT:
1940 	case T_SEQUENTIAL:
1941 	case T_PRINTER:
1942 	case T_PROCESSOR:
1943 	case T_WORM:
1944 	case T_CDROM:
1945 	case T_SCANNER:
1946 	case T_OPTICAL:
1947 	case T_CHANGER:
1948 	case T_COMM:
1949 	case T_STORARRAY:
1950 	case T_ENCLOSURE:
1951 	case T_RBC:
1952 	case T_OCRW:
1953 	case T_OSD:
1954 	case T_ADC:
1955 		break;
1956 	case T_NODEVICE:
1957 	default:
1958 		return (0);
1959 	}
1960 
1961 	/*
1962 	 * Check vendor, product, and revision
1963 	 */
1964 	cam_strvis(vendor, inq_data->vendor, sizeof(inq_data->vendor),
1965 	    sizeof(vendor));
1966 	cam_strvis(product, inq_data->product, sizeof(inq_data->product),
1967 	    sizeof(product));
1968 	cam_strvis(revision, inq_data->revision, sizeof(inq_data->revision),
1969 	    sizeof(revision));
1970 	if (strlen(vendor) == 0  ||
1971 	    strlen(product) == 0 ||
1972 	    strlen(revision) == 0)
1973 		return (0);
1974 
1975 	return (1);
1976 }
1977 
1978 /**
1979  * @brief completion function before returning to CAM
1980  *
1981  * I/O process has been completed and the result needs
1982  * to be passed to the CAM layer.
1983  * Free resources related to this request.
1984  *
1985  * @param reqp pointer to a request structure
1986  */
1987 static void
1988 storvsc_io_done(struct hv_storvsc_request *reqp)
1989 {
1990 	union ccb *ccb = reqp->ccb;
1991 	struct ccb_scsiio *csio = &ccb->csio;
1992 	struct storvsc_softc *sc = reqp->softc;
1993 	struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb;
1994 	bus_dma_segment_t *ori_sglist = NULL;
1995 	int ori_sg_count = 0;
1996 
1997 	/* destroy bounce buffer if it is used */
1998 	if (reqp->bounce_sgl_count) {
1999 		ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr;
2000 		ori_sg_count = ccb->csio.sglist_cnt;
2001 
2002 		/*
2003 		 * If it is READ operation, we should copy back the data
2004 		 * to original SG list.
2005 		 */
2006 		if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
2007 			storvsc_copy_from_bounce_buf_to_sgl(ori_sglist,
2008 			    ori_sg_count,
2009 			    reqp->bounce_sgl,
2010 			    reqp->not_aligned_seg_bits);
2011 		}
2012 
2013 		storvsc_destroy_bounce_buffer(reqp->bounce_sgl);
2014 		reqp->bounce_sgl_count = 0;
2015 	}
2016 
2017 	if (reqp->retries > 0) {
2018 		mtx_lock(&sc->hs_lock);
2019 #if HVS_TIMEOUT_TEST
2020 		xpt_print(ccb->ccb_h.path,
2021 			"%u: IO returned after timeout, "
2022 			"waking up timer handler if any.\n", ticks);
2023 		mtx_lock(&reqp->event.mtx);
2024 		cv_signal(&reqp->event.cv);
2025 		mtx_unlock(&reqp->event.mtx);
2026 #endif
2027 		reqp->retries = 0;
2028 		xpt_print(ccb->ccb_h.path,
2029 			"%u: IO returned after timeout, "
2030 			"stopping timer if any.\n", ticks);
2031 		mtx_unlock(&sc->hs_lock);
2032 	}
2033 
2034 	/*
2035 	 * callout_drain() will wait for the timer handler to finish
2036 	 * if it is running. So we don't need any lock to synchronize
2037 	 * between this routine and the timer handler.
2038 	 * Note that we need to make sure reqp is not freed when timer
2039 	 * handler is using or will use it.
2040 	 */
2041 	if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
2042 		callout_drain(&reqp->callout);
2043 	}
2044 
2045 	ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
2046 	ccb->ccb_h.status &= ~CAM_STATUS_MASK;
2047 	if (vm_srb->scsi_status == SCSI_STATUS_OK) {
2048 		const struct scsi_generic *cmd;
2049 
2050 		/*
2051 		 * Check whether the data for INQUIRY cmd is valid or
2052 		 * not.  Windows 10 and Windows 2016 send all zero
2053 		 * inquiry data to VM even for unpopulated slots.
2054 		 */
2055 		cmd = (const struct scsi_generic *)
2056 		    ((ccb->ccb_h.flags & CAM_CDB_POINTER) ?
2057 		     csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes);
2058 		if (cmd->opcode == INQUIRY &&
2059 		    is_inquiry_valid(
2060 		    (const struct scsi_inquiry_data *)csio->data_ptr) == 0) {
2061 			ccb->ccb_h.status |= CAM_DEV_NOT_THERE;
2062 			if (bootverbose) {
2063 				mtx_lock(&sc->hs_lock);
2064 				xpt_print(ccb->ccb_h.path,
2065 				    "storvsc uninstalled device\n");
2066 				mtx_unlock(&sc->hs_lock);
2067 			}
2068 		} else {
2069 			ccb->ccb_h.status |= CAM_REQ_CMP;
2070 		}
2071 	} else {
2072 		mtx_lock(&sc->hs_lock);
2073 		xpt_print(ccb->ccb_h.path,
2074 			"storvsc scsi_status = %d\n",
2075 			vm_srb->scsi_status);
2076 		mtx_unlock(&sc->hs_lock);
2077 		ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR;
2078 	}
2079 
2080 	ccb->csio.scsi_status = (vm_srb->scsi_status & 0xFF);
2081 	ccb->csio.resid = ccb->csio.dxfer_len - vm_srb->transfer_len;
2082 
2083 	if (reqp->sense_info_len != 0) {
2084 		csio->sense_resid = csio->sense_len - reqp->sense_info_len;
2085 		ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
2086 	}
2087 
2088 	mtx_lock(&sc->hs_lock);
2089 	if (reqp->softc->hs_frozen == 1) {
2090 		xpt_print(ccb->ccb_h.path,
2091 			"%u: storvsc unfreezing softc 0x%p.\n",
2092 			ticks, reqp->softc);
2093 		ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
2094 		reqp->softc->hs_frozen = 0;
2095 	}
2096 	storvsc_free_request(sc, reqp);
2097 	xpt_done(ccb);
2098 	mtx_unlock(&sc->hs_lock);
2099 }
2100 
2101 /**
2102  * @brief Free a request structure
2103  *
2104  * Free a request structure by returning it to the free list
2105  *
2106  * @param sc pointer to a softc
2107  * @param reqp pointer to a request structure
2108  */
2109 static void
2110 storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp)
2111 {
2112 
2113 	LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
2114 }
2115 
2116 /**
2117  * @brief Determine type of storage device from GUID
2118  *
2119  * Using the type GUID, determine if this is a StorVSC (paravirtual
2120  * SCSI or BlkVSC (paravirtual IDE) device.
2121  *
2122  * @param dev a device
2123  * returns an enum
2124  */
2125 static enum hv_storage_type
2126 storvsc_get_storage_type(device_t dev)
2127 {
2128 	const char *p = vmbus_get_type(dev);
2129 
2130 	if (!memcmp(p, &gBlkVscDeviceType, sizeof(hv_guid))) {
2131 		return DRIVER_BLKVSC;
2132 	} else if (!memcmp(p, &gStorVscDeviceType, sizeof(hv_guid))) {
2133 		return DRIVER_STORVSC;
2134 	}
2135 	return (DRIVER_UNKNOWN);
2136 }
2137 
2138