xref: /freebsd/sys/dev/hyperv/vmbus/vmbus.c (revision 716fd348)
1 /*-
2  * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * VM Bus Driver Implementation
31  */
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/kernel.h>
38 #include <sys/linker.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/module.h>
42 #include <sys/mutex.h>
43 #include <sys/sbuf.h>
44 #include <sys/smp.h>
45 #include <sys/sysctl.h>
46 #include <sys/systm.h>
47 #include <sys/taskqueue.h>
48 
49 #include <vm/vm.h>
50 #include <vm/vm_param.h>
51 #include <vm/pmap.h>
52 
53 #include <machine/bus.h>
54 #include <machine/intr_machdep.h>
55 #include <machine/metadata.h>
56 #include <machine/md_var.h>
57 #include <machine/resource.h>
58 #include <x86/include/apicvar.h>
59 
60 #include <contrib/dev/acpica/include/acpi.h>
61 #include <dev/acpica/acpivar.h>
62 
63 #include <dev/hyperv/include/hyperv.h>
64 #include <dev/hyperv/include/vmbus_xact.h>
65 #include <dev/hyperv/vmbus/hyperv_reg.h>
66 #include <dev/hyperv/vmbus/hyperv_var.h>
67 #include <dev/hyperv/vmbus/vmbus_reg.h>
68 #include <dev/hyperv/vmbus/vmbus_var.h>
69 #include <dev/hyperv/vmbus/vmbus_chanvar.h>
70 
71 #include "acpi_if.h"
72 #include "pcib_if.h"
73 #include "vmbus_if.h"
74 
75 #define VMBUS_GPADL_START		0xe1e10
76 
77 struct vmbus_msghc {
78 	struct vmbus_xact		*mh_xact;
79 	struct hypercall_postmsg_in	mh_inprm_save;
80 };
81 
82 static void			vmbus_identify(driver_t *, device_t);
83 static int			vmbus_probe(device_t);
84 static int			vmbus_attach(device_t);
85 static int			vmbus_detach(device_t);
86 static int			vmbus_read_ivar(device_t, device_t, int,
87 				    uintptr_t *);
88 static int			vmbus_child_pnpinfo(device_t, device_t, struct sbuf *);
89 static struct resource		*vmbus_alloc_resource(device_t dev,
90 				    device_t child, int type, int *rid,
91 				    rman_res_t start, rman_res_t end,
92 				    rman_res_t count, u_int flags);
93 static int			vmbus_alloc_msi(device_t bus, device_t dev,
94 				    int count, int maxcount, int *irqs);
95 static int			vmbus_release_msi(device_t bus, device_t dev,
96 				    int count, int *irqs);
97 static int			vmbus_alloc_msix(device_t bus, device_t dev,
98 				    int *irq);
99 static int			vmbus_release_msix(device_t bus, device_t dev,
100 				    int irq);
101 static int			vmbus_map_msi(device_t bus, device_t dev,
102 				    int irq, uint64_t *addr, uint32_t *data);
103 static uint32_t			vmbus_get_version_method(device_t, device_t);
104 static int			vmbus_probe_guid_method(device_t, device_t,
105 				    const struct hyperv_guid *);
106 static uint32_t			vmbus_get_vcpu_id_method(device_t bus,
107 				    device_t dev, int cpu);
108 static struct taskqueue		*vmbus_get_eventtq_method(device_t, device_t,
109 				    int);
110 #ifdef EARLY_AP_STARTUP
111 static void			vmbus_intrhook(void *);
112 #endif
113 
114 static int			vmbus_init(struct vmbus_softc *);
115 static int			vmbus_connect(struct vmbus_softc *, uint32_t);
116 static int			vmbus_req_channels(struct vmbus_softc *sc);
117 static void			vmbus_disconnect(struct vmbus_softc *);
118 static int			vmbus_scan(struct vmbus_softc *);
119 static void			vmbus_scan_teardown(struct vmbus_softc *);
120 static void			vmbus_scan_done(struct vmbus_softc *,
121 				    const struct vmbus_message *);
122 static void			vmbus_chanmsg_handle(struct vmbus_softc *,
123 				    const struct vmbus_message *);
124 static void			vmbus_msg_task(void *, int);
125 static void			vmbus_synic_setup(void *);
126 static void			vmbus_synic_teardown(void *);
127 static int			vmbus_sysctl_version(SYSCTL_HANDLER_ARGS);
128 static int			vmbus_dma_alloc(struct vmbus_softc *);
129 static void			vmbus_dma_free(struct vmbus_softc *);
130 static int			vmbus_intr_setup(struct vmbus_softc *);
131 static void			vmbus_intr_teardown(struct vmbus_softc *);
132 static int			vmbus_doattach(struct vmbus_softc *);
133 static void			vmbus_event_proc_dummy(struct vmbus_softc *,
134 				    int);
135 
136 static struct vmbus_softc	*vmbus_sc;
137 
138 SYSCTL_NODE(_hw, OID_AUTO, vmbus, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
139     "Hyper-V vmbus");
140 
141 static int			vmbus_pin_evttask = 1;
142 SYSCTL_INT(_hw_vmbus, OID_AUTO, pin_evttask, CTLFLAG_RDTUN,
143     &vmbus_pin_evttask, 0, "Pin event tasks to their respective CPU");
144 
145 extern inthand_t IDTVEC(vmbus_isr), IDTVEC(vmbus_isr_pti);
146 #define VMBUS_ISR_ADDR	trunc_page((uintptr_t)IDTVEC(vmbus_isr_pti))
147 
148 uint32_t			vmbus_current_version;
149 
150 static const uint32_t		vmbus_version[] = {
151 	VMBUS_VERSION_WIN10,
152 	VMBUS_VERSION_WIN8_1,
153 	VMBUS_VERSION_WIN8,
154 	VMBUS_VERSION_WIN7,
155 	VMBUS_VERSION_WS2008
156 };
157 
158 static const vmbus_chanmsg_proc_t
159 vmbus_chanmsg_handlers[VMBUS_CHANMSG_TYPE_MAX] = {
160 	VMBUS_CHANMSG_PROC(CHOFFER_DONE, vmbus_scan_done),
161 	VMBUS_CHANMSG_PROC_WAKEUP(CONNECT_RESP)
162 };
163 
164 static device_method_t vmbus_methods[] = {
165 	/* Device interface */
166 	DEVMETHOD(device_identify,		vmbus_identify),
167 	DEVMETHOD(device_probe,			vmbus_probe),
168 	DEVMETHOD(device_attach,		vmbus_attach),
169 	DEVMETHOD(device_detach,		vmbus_detach),
170 	DEVMETHOD(device_shutdown,		bus_generic_shutdown),
171 	DEVMETHOD(device_suspend,		bus_generic_suspend),
172 	DEVMETHOD(device_resume,		bus_generic_resume),
173 
174 	/* Bus interface */
175 	DEVMETHOD(bus_add_child,		bus_generic_add_child),
176 	DEVMETHOD(bus_print_child,		bus_generic_print_child),
177 	DEVMETHOD(bus_read_ivar,		vmbus_read_ivar),
178 	DEVMETHOD(bus_child_pnpinfo,		vmbus_child_pnpinfo),
179 	DEVMETHOD(bus_alloc_resource,		vmbus_alloc_resource),
180 	DEVMETHOD(bus_release_resource,		bus_generic_release_resource),
181 	DEVMETHOD(bus_activate_resource,	bus_generic_activate_resource),
182 	DEVMETHOD(bus_deactivate_resource,	bus_generic_deactivate_resource),
183 	DEVMETHOD(bus_setup_intr,		bus_generic_setup_intr),
184 	DEVMETHOD(bus_teardown_intr,		bus_generic_teardown_intr),
185 #if __FreeBSD_version >= 1100000
186 	DEVMETHOD(bus_get_cpus,			bus_generic_get_cpus),
187 #endif
188 
189 	/* pcib interface */
190 	DEVMETHOD(pcib_alloc_msi,		vmbus_alloc_msi),
191 	DEVMETHOD(pcib_release_msi,		vmbus_release_msi),
192 	DEVMETHOD(pcib_alloc_msix,		vmbus_alloc_msix),
193 	DEVMETHOD(pcib_release_msix,		vmbus_release_msix),
194 	DEVMETHOD(pcib_map_msi,			vmbus_map_msi),
195 
196 	/* Vmbus interface */
197 	DEVMETHOD(vmbus_get_version,		vmbus_get_version_method),
198 	DEVMETHOD(vmbus_probe_guid,		vmbus_probe_guid_method),
199 	DEVMETHOD(vmbus_get_vcpu_id,		vmbus_get_vcpu_id_method),
200 	DEVMETHOD(vmbus_get_event_taskq,	vmbus_get_eventtq_method),
201 
202 	DEVMETHOD_END
203 };
204 
205 static driver_t vmbus_driver = {
206 	"vmbus",
207 	vmbus_methods,
208 	sizeof(struct vmbus_softc)
209 };
210 
211 DRIVER_MODULE(vmbus, pcib, vmbus_driver, NULL, NULL);
212 DRIVER_MODULE(vmbus, acpi_syscontainer, vmbus_driver, NULL, NULL);
213 
214 MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
215 MODULE_DEPEND(vmbus, pci, 1, 1, 1);
216 MODULE_VERSION(vmbus, 1);
217 
218 static __inline struct vmbus_softc *
219 vmbus_get_softc(void)
220 {
221 	return vmbus_sc;
222 }
223 
224 void
225 vmbus_msghc_reset(struct vmbus_msghc *mh, size_t dsize)
226 {
227 	struct hypercall_postmsg_in *inprm;
228 
229 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
230 		panic("invalid data size %zu", dsize);
231 
232 	inprm = vmbus_xact_req_data(mh->mh_xact);
233 	memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE);
234 	inprm->hc_connid = VMBUS_CONNID_MESSAGE;
235 	inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL;
236 	inprm->hc_dsize = dsize;
237 }
238 
239 struct vmbus_msghc *
240 vmbus_msghc_get(struct vmbus_softc *sc, size_t dsize)
241 {
242 	struct vmbus_msghc *mh;
243 	struct vmbus_xact *xact;
244 
245 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
246 		panic("invalid data size %zu", dsize);
247 
248 	xact = vmbus_xact_get(sc->vmbus_xc,
249 	    dsize + __offsetof(struct hypercall_postmsg_in, hc_data[0]));
250 	if (xact == NULL)
251 		return (NULL);
252 
253 	mh = vmbus_xact_priv(xact, sizeof(*mh));
254 	mh->mh_xact = xact;
255 
256 	vmbus_msghc_reset(mh, dsize);
257 	return (mh);
258 }
259 
260 void
261 vmbus_msghc_put(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
262 {
263 
264 	vmbus_xact_put(mh->mh_xact);
265 }
266 
267 void *
268 vmbus_msghc_dataptr(struct vmbus_msghc *mh)
269 {
270 	struct hypercall_postmsg_in *inprm;
271 
272 	inprm = vmbus_xact_req_data(mh->mh_xact);
273 	return (inprm->hc_data);
274 }
275 
276 int
277 vmbus_msghc_exec_noresult(struct vmbus_msghc *mh)
278 {
279 	sbintime_t time = SBT_1MS;
280 	struct hypercall_postmsg_in *inprm;
281 	bus_addr_t inprm_paddr;
282 	int i;
283 
284 	inprm = vmbus_xact_req_data(mh->mh_xact);
285 	inprm_paddr = vmbus_xact_req_paddr(mh->mh_xact);
286 
287 	/*
288 	 * Save the input parameter so that we could restore the input
289 	 * parameter if the Hypercall failed.
290 	 *
291 	 * XXX
292 	 * Is this really necessary?!  i.e. Will the Hypercall ever
293 	 * overwrite the input parameter?
294 	 */
295 	memcpy(&mh->mh_inprm_save, inprm, HYPERCALL_POSTMSGIN_SIZE);
296 
297 	/*
298 	 * In order to cope with transient failures, e.g. insufficient
299 	 * resources on host side, we retry the post message Hypercall
300 	 * several times.  20 retries seem sufficient.
301 	 */
302 #define HC_RETRY_MAX	20
303 
304 	for (i = 0; i < HC_RETRY_MAX; ++i) {
305 		uint64_t status;
306 
307 		status = hypercall_post_message(inprm_paddr);
308 		if (status == HYPERCALL_STATUS_SUCCESS)
309 			return 0;
310 
311 		pause_sbt("hcpmsg", time, 0, C_HARDCLOCK);
312 		if (time < SBT_1S * 2)
313 			time *= 2;
314 
315 		/* Restore input parameter and try again */
316 		memcpy(inprm, &mh->mh_inprm_save, HYPERCALL_POSTMSGIN_SIZE);
317 	}
318 
319 #undef HC_RETRY_MAX
320 
321 	return EIO;
322 }
323 
324 int
325 vmbus_msghc_exec(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
326 {
327 	int error;
328 
329 	vmbus_xact_activate(mh->mh_xact);
330 	error = vmbus_msghc_exec_noresult(mh);
331 	if (error)
332 		vmbus_xact_deactivate(mh->mh_xact);
333 	return error;
334 }
335 
336 void
337 vmbus_msghc_exec_cancel(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
338 {
339 
340 	vmbus_xact_deactivate(mh->mh_xact);
341 }
342 
343 const struct vmbus_message *
344 vmbus_msghc_wait_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
345 {
346 	size_t resp_len;
347 
348 	return (vmbus_xact_wait(mh->mh_xact, &resp_len));
349 }
350 
351 const struct vmbus_message *
352 vmbus_msghc_poll_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
353 {
354 	size_t resp_len;
355 
356 	return (vmbus_xact_poll(mh->mh_xact, &resp_len));
357 }
358 
359 void
360 vmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg)
361 {
362 
363 	vmbus_xact_ctx_wakeup(sc->vmbus_xc, msg, sizeof(*msg));
364 }
365 
366 uint32_t
367 vmbus_gpadl_alloc(struct vmbus_softc *sc)
368 {
369 	uint32_t gpadl;
370 
371 again:
372 	gpadl = atomic_fetchadd_int(&sc->vmbus_gpadl, 1);
373 	if (gpadl == 0)
374 		goto again;
375 	return (gpadl);
376 }
377 
378 /* Used for Hyper-V socket when guest client connects to host */
379 int
380 vmbus_req_tl_connect(struct hyperv_guid *guest_srv_id,
381     struct hyperv_guid *host_srv_id)
382 {
383 	struct vmbus_softc *sc = vmbus_get_softc();
384 	struct vmbus_chanmsg_tl_connect *req;
385 	struct vmbus_msghc *mh;
386 	int error;
387 
388 	if (!sc)
389 		return ENXIO;
390 
391 	mh = vmbus_msghc_get(sc, sizeof(*req));
392 	if (mh == NULL) {
393 		device_printf(sc->vmbus_dev,
394 		    "can not get msg hypercall for tl connect\n");
395 		return ENXIO;
396 	}
397 
398 	req = vmbus_msghc_dataptr(mh);
399 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_TL_CONN;
400 	req->guest_endpoint_id = *guest_srv_id;
401 	req->host_service_id = *host_srv_id;
402 
403 	error = vmbus_msghc_exec_noresult(mh);
404 	vmbus_msghc_put(sc, mh);
405 
406 	if (error) {
407 		device_printf(sc->vmbus_dev,
408 		    "tl connect msg hypercall failed\n");
409 	}
410 
411 	return error;
412 }
413 
414 static int
415 vmbus_connect(struct vmbus_softc *sc, uint32_t version)
416 {
417 	struct vmbus_chanmsg_connect *req;
418 	const struct vmbus_message *msg;
419 	struct vmbus_msghc *mh;
420 	int error, done = 0;
421 
422 	mh = vmbus_msghc_get(sc, sizeof(*req));
423 	if (mh == NULL)
424 		return ENXIO;
425 
426 	req = vmbus_msghc_dataptr(mh);
427 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CONNECT;
428 	req->chm_ver = version;
429 	req->chm_evtflags = sc->vmbus_evtflags_dma.hv_paddr;
430 	req->chm_mnf1 = sc->vmbus_mnf1_dma.hv_paddr;
431 	req->chm_mnf2 = sc->vmbus_mnf2_dma.hv_paddr;
432 
433 	error = vmbus_msghc_exec(sc, mh);
434 	if (error) {
435 		vmbus_msghc_put(sc, mh);
436 		return error;
437 	}
438 
439 	msg = vmbus_msghc_wait_result(sc, mh);
440 	done = ((const struct vmbus_chanmsg_connect_resp *)
441 	    msg->msg_data)->chm_done;
442 
443 	vmbus_msghc_put(sc, mh);
444 
445 	return (done ? 0 : EOPNOTSUPP);
446 }
447 
448 static int
449 vmbus_init(struct vmbus_softc *sc)
450 {
451 	int i;
452 
453 	for (i = 0; i < nitems(vmbus_version); ++i) {
454 		int error;
455 
456 		error = vmbus_connect(sc, vmbus_version[i]);
457 		if (!error) {
458 			vmbus_current_version = vmbus_version[i];
459 			sc->vmbus_version = vmbus_version[i];
460 			device_printf(sc->vmbus_dev, "version %u.%u\n",
461 			    VMBUS_VERSION_MAJOR(sc->vmbus_version),
462 			    VMBUS_VERSION_MINOR(sc->vmbus_version));
463 			return 0;
464 		}
465 	}
466 	return ENXIO;
467 }
468 
469 static void
470 vmbus_disconnect(struct vmbus_softc *sc)
471 {
472 	struct vmbus_chanmsg_disconnect *req;
473 	struct vmbus_msghc *mh;
474 	int error;
475 
476 	mh = vmbus_msghc_get(sc, sizeof(*req));
477 	if (mh == NULL) {
478 		device_printf(sc->vmbus_dev,
479 		    "can not get msg hypercall for disconnect\n");
480 		return;
481 	}
482 
483 	req = vmbus_msghc_dataptr(mh);
484 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_DISCONNECT;
485 
486 	error = vmbus_msghc_exec_noresult(mh);
487 	vmbus_msghc_put(sc, mh);
488 
489 	if (error) {
490 		device_printf(sc->vmbus_dev,
491 		    "disconnect msg hypercall failed\n");
492 	}
493 }
494 
495 static int
496 vmbus_req_channels(struct vmbus_softc *sc)
497 {
498 	struct vmbus_chanmsg_chrequest *req;
499 	struct vmbus_msghc *mh;
500 	int error;
501 
502 	mh = vmbus_msghc_get(sc, sizeof(*req));
503 	if (mh == NULL)
504 		return ENXIO;
505 
506 	req = vmbus_msghc_dataptr(mh);
507 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHREQUEST;
508 
509 	error = vmbus_msghc_exec_noresult(mh);
510 	vmbus_msghc_put(sc, mh);
511 
512 	return error;
513 }
514 
515 static void
516 vmbus_scan_done_task(void *xsc, int pending __unused)
517 {
518 	struct vmbus_softc *sc = xsc;
519 
520 	bus_topo_lock();
521 	sc->vmbus_scandone = true;
522 	bus_topo_unlock();
523 	wakeup(&sc->vmbus_scandone);
524 }
525 
526 static void
527 vmbus_scan_done(struct vmbus_softc *sc,
528     const struct vmbus_message *msg __unused)
529 {
530 
531 	taskqueue_enqueue(sc->vmbus_devtq, &sc->vmbus_scandone_task);
532 }
533 
534 static int
535 vmbus_scan(struct vmbus_softc *sc)
536 {
537 	int error;
538 
539 	/*
540 	 * Identify, probe and attach for non-channel devices.
541 	 */
542 	bus_generic_probe(sc->vmbus_dev);
543 	bus_generic_attach(sc->vmbus_dev);
544 
545 	/*
546 	 * This taskqueue serializes vmbus devices' attach and detach
547 	 * for channel offer and rescind messages.
548 	 */
549 	sc->vmbus_devtq = taskqueue_create("vmbus dev", M_WAITOK,
550 	    taskqueue_thread_enqueue, &sc->vmbus_devtq);
551 	taskqueue_start_threads(&sc->vmbus_devtq, 1, PI_NET, "vmbusdev");
552 	TASK_INIT(&sc->vmbus_scandone_task, 0, vmbus_scan_done_task, sc);
553 
554 	/*
555 	 * This taskqueue handles sub-channel detach, so that vmbus
556 	 * device's detach running in vmbus_devtq can drain its sub-
557 	 * channels.
558 	 */
559 	sc->vmbus_subchtq = taskqueue_create("vmbus subch", M_WAITOK,
560 	    taskqueue_thread_enqueue, &sc->vmbus_subchtq);
561 	taskqueue_start_threads(&sc->vmbus_subchtq, 1, PI_NET, "vmbussch");
562 
563 	/*
564 	 * Start vmbus scanning.
565 	 */
566 	error = vmbus_req_channels(sc);
567 	if (error) {
568 		device_printf(sc->vmbus_dev, "channel request failed: %d\n",
569 		    error);
570 		return (error);
571 	}
572 
573 	/*
574 	 * Wait for all vmbus devices from the initial channel offers to be
575 	 * attached.
576 	 */
577 	bus_topo_assert();
578 	while (!sc->vmbus_scandone)
579 		mtx_sleep(&sc->vmbus_scandone, bus_topo_mtx(), 0, "vmbusdev", 0);
580 
581 	if (bootverbose) {
582 		device_printf(sc->vmbus_dev, "device scan, probe and attach "
583 		    "done\n");
584 	}
585 	return (0);
586 }
587 
588 static void
589 vmbus_scan_teardown(struct vmbus_softc *sc)
590 {
591 
592 	bus_topo_assert();
593 	if (sc->vmbus_devtq != NULL) {
594 		bus_topo_unlock();
595 		taskqueue_free(sc->vmbus_devtq);
596 		bus_topo_lock();
597 		sc->vmbus_devtq = NULL;
598 	}
599 	if (sc->vmbus_subchtq != NULL) {
600 		bus_topo_unlock();
601 		taskqueue_free(sc->vmbus_subchtq);
602 		bus_topo_lock();
603 		sc->vmbus_subchtq = NULL;
604 	}
605 }
606 
607 static void
608 vmbus_chanmsg_handle(struct vmbus_softc *sc, const struct vmbus_message *msg)
609 {
610 	vmbus_chanmsg_proc_t msg_proc;
611 	uint32_t msg_type;
612 
613 	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
614 	if (msg_type >= VMBUS_CHANMSG_TYPE_MAX) {
615 		device_printf(sc->vmbus_dev, "unknown message type 0x%x\n",
616 		    msg_type);
617 		return;
618 	}
619 
620 	msg_proc = vmbus_chanmsg_handlers[msg_type];
621 	if (msg_proc != NULL)
622 		msg_proc(sc, msg);
623 
624 	/* Channel specific processing */
625 	vmbus_chan_msgproc(sc, msg);
626 }
627 
628 static void
629 vmbus_msg_task(void *xsc, int pending __unused)
630 {
631 	struct vmbus_softc *sc = xsc;
632 	volatile struct vmbus_message *msg;
633 
634 	msg = VMBUS_PCPU_GET(sc, message, curcpu) + VMBUS_SINT_MESSAGE;
635 	for (;;) {
636 		if (msg->msg_type == HYPERV_MSGTYPE_NONE) {
637 			/* No message */
638 			break;
639 		} else if (msg->msg_type == HYPERV_MSGTYPE_CHANNEL) {
640 			/* Channel message */
641 			vmbus_chanmsg_handle(sc,
642 			    __DEVOLATILE(const struct vmbus_message *, msg));
643 		}
644 
645 		msg->msg_type = HYPERV_MSGTYPE_NONE;
646 		/*
647 		 * Make sure the write to msg_type (i.e. set to
648 		 * HYPERV_MSGTYPE_NONE) happens before we read the
649 		 * msg_flags and EOMing. Otherwise, the EOMing will
650 		 * not deliver any more messages since there is no
651 		 * empty slot
652 		 *
653 		 * NOTE:
654 		 * mb() is used here, since atomic_thread_fence_seq_cst()
655 		 * will become compiler fence on UP kernel.
656 		 */
657 		mb();
658 		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
659 			/*
660 			 * This will cause message queue rescan to possibly
661 			 * deliver another msg from the hypervisor
662 			 */
663 			wrmsr(MSR_HV_EOM, 0);
664 		}
665 	}
666 }
667 
668 static __inline int
669 vmbus_handle_intr1(struct vmbus_softc *sc, struct trapframe *frame, int cpu)
670 {
671 	volatile struct vmbus_message *msg;
672 	struct vmbus_message *msg_base;
673 
674 	msg_base = VMBUS_PCPU_GET(sc, message, cpu);
675 
676 	/*
677 	 * Check event timer.
678 	 *
679 	 * TODO: move this to independent IDT vector.
680 	 */
681 	msg = msg_base + VMBUS_SINT_TIMER;
682 	if (msg->msg_type == HYPERV_MSGTYPE_TIMER_EXPIRED) {
683 		msg->msg_type = HYPERV_MSGTYPE_NONE;
684 
685 		vmbus_et_intr(frame);
686 
687 		/*
688 		 * Make sure the write to msg_type (i.e. set to
689 		 * HYPERV_MSGTYPE_NONE) happens before we read the
690 		 * msg_flags and EOMing. Otherwise, the EOMing will
691 		 * not deliver any more messages since there is no
692 		 * empty slot
693 		 *
694 		 * NOTE:
695 		 * mb() is used here, since atomic_thread_fence_seq_cst()
696 		 * will become compiler fence on UP kernel.
697 		 */
698 		mb();
699 		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
700 			/*
701 			 * This will cause message queue rescan to possibly
702 			 * deliver another msg from the hypervisor
703 			 */
704 			wrmsr(MSR_HV_EOM, 0);
705 		}
706 	}
707 
708 	/*
709 	 * Check events.  Hot path for network and storage I/O data; high rate.
710 	 *
711 	 * NOTE:
712 	 * As recommended by the Windows guest fellows, we check events before
713 	 * checking messages.
714 	 */
715 	sc->vmbus_event_proc(sc, cpu);
716 
717 	/*
718 	 * Check messages.  Mainly management stuffs; ultra low rate.
719 	 */
720 	msg = msg_base + VMBUS_SINT_MESSAGE;
721 	if (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE)) {
722 		taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu),
723 		    VMBUS_PCPU_PTR(sc, message_task, cpu));
724 	}
725 
726 	return (FILTER_HANDLED);
727 }
728 
729 void
730 vmbus_handle_intr(struct trapframe *trap_frame)
731 {
732 	struct vmbus_softc *sc = vmbus_get_softc();
733 	int cpu = curcpu;
734 
735 	/*
736 	 * Disable preemption.
737 	 */
738 	critical_enter();
739 
740 	/*
741 	 * Do a little interrupt counting.
742 	 */
743 	(*VMBUS_PCPU_GET(sc, intr_cnt, cpu))++;
744 
745 	vmbus_handle_intr1(sc, trap_frame, cpu);
746 
747 	/*
748 	 * Enable preemption.
749 	 */
750 	critical_exit();
751 }
752 
753 static void
754 vmbus_synic_setup(void *xsc)
755 {
756 	struct vmbus_softc *sc = xsc;
757 	int cpu = curcpu;
758 	uint64_t val, orig;
759 	uint32_t sint;
760 
761 	if (hyperv_features & CPUID_HV_MSR_VP_INDEX) {
762 		/* Save virtual processor id. */
763 		VMBUS_PCPU_GET(sc, vcpuid, cpu) = rdmsr(MSR_HV_VP_INDEX);
764 	} else {
765 		/* Set virtual processor id to 0 for compatibility. */
766 		VMBUS_PCPU_GET(sc, vcpuid, cpu) = 0;
767 	}
768 
769 	/*
770 	 * Setup the SynIC message.
771 	 */
772 	orig = rdmsr(MSR_HV_SIMP);
773 	val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
774 	    ((VMBUS_PCPU_GET(sc, message_dma.hv_paddr, cpu) >> PAGE_SHIFT) <<
775 	     MSR_HV_SIMP_PGSHIFT);
776 	wrmsr(MSR_HV_SIMP, val);
777 
778 	/*
779 	 * Setup the SynIC event flags.
780 	 */
781 	orig = rdmsr(MSR_HV_SIEFP);
782 	val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
783 	    ((VMBUS_PCPU_GET(sc, event_flags_dma.hv_paddr, cpu)
784 	      >> PAGE_SHIFT) << MSR_HV_SIEFP_PGSHIFT);
785 	wrmsr(MSR_HV_SIEFP, val);
786 
787 
788 	/*
789 	 * Configure and unmask SINT for message and event flags.
790 	 */
791 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
792 	orig = rdmsr(sint);
793 	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
794 	    (orig & MSR_HV_SINT_RSVD_MASK);
795 	wrmsr(sint, val);
796 
797 	/*
798 	 * Configure and unmask SINT for timer.
799 	 */
800 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
801 	orig = rdmsr(sint);
802 	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
803 	    (orig & MSR_HV_SINT_RSVD_MASK);
804 	wrmsr(sint, val);
805 
806 	/*
807 	 * All done; enable SynIC.
808 	 */
809 	orig = rdmsr(MSR_HV_SCONTROL);
810 	val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
811 	wrmsr(MSR_HV_SCONTROL, val);
812 }
813 
814 static void
815 vmbus_synic_teardown(void *arg)
816 {
817 	uint64_t orig;
818 	uint32_t sint;
819 
820 	/*
821 	 * Disable SynIC.
822 	 */
823 	orig = rdmsr(MSR_HV_SCONTROL);
824 	wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
825 
826 	/*
827 	 * Mask message and event flags SINT.
828 	 */
829 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
830 	orig = rdmsr(sint);
831 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
832 
833 	/*
834 	 * Mask timer SINT.
835 	 */
836 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
837 	orig = rdmsr(sint);
838 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
839 
840 	/*
841 	 * Teardown SynIC message.
842 	 */
843 	orig = rdmsr(MSR_HV_SIMP);
844 	wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
845 
846 	/*
847 	 * Teardown SynIC event flags.
848 	 */
849 	orig = rdmsr(MSR_HV_SIEFP);
850 	wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
851 }
852 
853 static int
854 vmbus_dma_alloc(struct vmbus_softc *sc)
855 {
856 	bus_dma_tag_t parent_dtag;
857 	uint8_t *evtflags;
858 	int cpu;
859 
860 	parent_dtag = bus_get_dma_tag(sc->vmbus_dev);
861 	CPU_FOREACH(cpu) {
862 		void *ptr;
863 
864 		/*
865 		 * Per-cpu messages and event flags.
866 		 */
867 		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
868 		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, message_dma, cpu),
869 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
870 		if (ptr == NULL)
871 			return ENOMEM;
872 		VMBUS_PCPU_GET(sc, message, cpu) = ptr;
873 
874 		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
875 		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
876 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
877 		if (ptr == NULL)
878 			return ENOMEM;
879 		VMBUS_PCPU_GET(sc, event_flags, cpu) = ptr;
880 	}
881 
882 	evtflags = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
883 	    PAGE_SIZE, &sc->vmbus_evtflags_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
884 	if (evtflags == NULL)
885 		return ENOMEM;
886 	sc->vmbus_rx_evtflags = (u_long *)evtflags;
887 	sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2));
888 	sc->vmbus_evtflags = evtflags;
889 
890 	sc->vmbus_mnf1 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
891 	    PAGE_SIZE, &sc->vmbus_mnf1_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
892 	if (sc->vmbus_mnf1 == NULL)
893 		return ENOMEM;
894 
895 	sc->vmbus_mnf2 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
896 	    sizeof(struct vmbus_mnf), &sc->vmbus_mnf2_dma,
897 	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
898 	if (sc->vmbus_mnf2 == NULL)
899 		return ENOMEM;
900 
901 	return 0;
902 }
903 
904 static void
905 vmbus_dma_free(struct vmbus_softc *sc)
906 {
907 	int cpu;
908 
909 	if (sc->vmbus_evtflags != NULL) {
910 		hyperv_dmamem_free(&sc->vmbus_evtflags_dma, sc->vmbus_evtflags);
911 		sc->vmbus_evtflags = NULL;
912 		sc->vmbus_rx_evtflags = NULL;
913 		sc->vmbus_tx_evtflags = NULL;
914 	}
915 	if (sc->vmbus_mnf1 != NULL) {
916 		hyperv_dmamem_free(&sc->vmbus_mnf1_dma, sc->vmbus_mnf1);
917 		sc->vmbus_mnf1 = NULL;
918 	}
919 	if (sc->vmbus_mnf2 != NULL) {
920 		hyperv_dmamem_free(&sc->vmbus_mnf2_dma, sc->vmbus_mnf2);
921 		sc->vmbus_mnf2 = NULL;
922 	}
923 
924 	CPU_FOREACH(cpu) {
925 		if (VMBUS_PCPU_GET(sc, message, cpu) != NULL) {
926 			hyperv_dmamem_free(
927 			    VMBUS_PCPU_PTR(sc, message_dma, cpu),
928 			    VMBUS_PCPU_GET(sc, message, cpu));
929 			VMBUS_PCPU_GET(sc, message, cpu) = NULL;
930 		}
931 		if (VMBUS_PCPU_GET(sc, event_flags, cpu) != NULL) {
932 			hyperv_dmamem_free(
933 			    VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
934 			    VMBUS_PCPU_GET(sc, event_flags, cpu));
935 			VMBUS_PCPU_GET(sc, event_flags, cpu) = NULL;
936 		}
937 	}
938 }
939 
940 static int
941 vmbus_intr_setup(struct vmbus_softc *sc)
942 {
943 	int cpu;
944 
945 	CPU_FOREACH(cpu) {
946 		char buf[MAXCOMLEN + 1];
947 		cpuset_t cpu_mask;
948 
949 		/* Allocate an interrupt counter for Hyper-V interrupt */
950 		snprintf(buf, sizeof(buf), "cpu%d:hyperv", cpu);
951 		intrcnt_add(buf, VMBUS_PCPU_PTR(sc, intr_cnt, cpu));
952 
953 		/*
954 		 * Setup taskqueue to handle events.  Task will be per-
955 		 * channel.
956 		 */
957 		VMBUS_PCPU_GET(sc, event_tq, cpu) = taskqueue_create_fast(
958 		    "hyperv event", M_WAITOK, taskqueue_thread_enqueue,
959 		    VMBUS_PCPU_PTR(sc, event_tq, cpu));
960 		if (vmbus_pin_evttask) {
961 			CPU_SETOF(cpu, &cpu_mask);
962 			taskqueue_start_threads_cpuset(
963 			    VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET,
964 			    &cpu_mask, "hvevent%d", cpu);
965 		} else {
966 			taskqueue_start_threads(
967 			    VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET,
968 			    "hvevent%d", cpu);
969 		}
970 
971 		/*
972 		 * Setup tasks and taskqueues to handle messages.
973 		 */
974 		VMBUS_PCPU_GET(sc, message_tq, cpu) = taskqueue_create_fast(
975 		    "hyperv msg", M_WAITOK, taskqueue_thread_enqueue,
976 		    VMBUS_PCPU_PTR(sc, message_tq, cpu));
977 		CPU_SETOF(cpu, &cpu_mask);
978 		taskqueue_start_threads_cpuset(
979 		    VMBUS_PCPU_PTR(sc, message_tq, cpu), 1, PI_NET, &cpu_mask,
980 		    "hvmsg%d", cpu);
981 		TASK_INIT(VMBUS_PCPU_PTR(sc, message_task, cpu), 0,
982 		    vmbus_msg_task, sc);
983 	}
984 
985 #if defined(__amd64__) && defined(KLD_MODULE)
986 	pmap_pti_add_kva(VMBUS_ISR_ADDR, VMBUS_ISR_ADDR + PAGE_SIZE, true);
987 #endif
988 
989 	/*
990 	 * All Hyper-V ISR required resources are setup, now let's find a
991 	 * free IDT vector for Hyper-V ISR and set it up.
992 	 */
993 	sc->vmbus_idtvec = lapic_ipi_alloc(pti ? IDTVEC(vmbus_isr_pti) :
994 	    IDTVEC(vmbus_isr));
995 	if (sc->vmbus_idtvec < 0) {
996 #if defined(__amd64__) && defined(KLD_MODULE)
997 		pmap_pti_remove_kva(VMBUS_ISR_ADDR, VMBUS_ISR_ADDR + PAGE_SIZE);
998 #endif
999 		device_printf(sc->vmbus_dev, "cannot find free IDT vector\n");
1000 		return ENXIO;
1001 	}
1002 	if (bootverbose) {
1003 		device_printf(sc->vmbus_dev, "vmbus IDT vector %d\n",
1004 		    sc->vmbus_idtvec);
1005 	}
1006 	return 0;
1007 }
1008 
1009 static void
1010 vmbus_intr_teardown(struct vmbus_softc *sc)
1011 {
1012 	int cpu;
1013 
1014 	if (sc->vmbus_idtvec >= 0) {
1015 		lapic_ipi_free(sc->vmbus_idtvec);
1016 		sc->vmbus_idtvec = -1;
1017 	}
1018 
1019 #if defined(__amd64__) && defined(KLD_MODULE)
1020 	pmap_pti_remove_kva(VMBUS_ISR_ADDR, VMBUS_ISR_ADDR + PAGE_SIZE);
1021 #endif
1022 
1023 	CPU_FOREACH(cpu) {
1024 		if (VMBUS_PCPU_GET(sc, event_tq, cpu) != NULL) {
1025 			taskqueue_free(VMBUS_PCPU_GET(sc, event_tq, cpu));
1026 			VMBUS_PCPU_GET(sc, event_tq, cpu) = NULL;
1027 		}
1028 		if (VMBUS_PCPU_GET(sc, message_tq, cpu) != NULL) {
1029 			taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu),
1030 			    VMBUS_PCPU_PTR(sc, message_task, cpu));
1031 			taskqueue_free(VMBUS_PCPU_GET(sc, message_tq, cpu));
1032 			VMBUS_PCPU_GET(sc, message_tq, cpu) = NULL;
1033 		}
1034 	}
1035 }
1036 
1037 static int
1038 vmbus_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
1039 {
1040 	return (ENOENT);
1041 }
1042 
1043 static int
1044 vmbus_child_pnpinfo(device_t dev, device_t child, struct sbuf *sb)
1045 {
1046 	const struct vmbus_channel *chan;
1047 	char guidbuf[HYPERV_GUID_STRLEN];
1048 
1049 	chan = vmbus_get_channel(child);
1050 	if (chan == NULL) {
1051 		/* Event timer device, which does not belong to a channel */
1052 		return (0);
1053 	}
1054 
1055 	hyperv_guid2str(&chan->ch_guid_type, guidbuf, sizeof(guidbuf));
1056 	sbuf_printf(sb, "classid=%s", guidbuf);
1057 
1058 	hyperv_guid2str(&chan->ch_guid_inst, guidbuf, sizeof(guidbuf));
1059 	sbuf_printf(sb, " deviceid=%s", guidbuf);
1060 
1061 	return (0);
1062 }
1063 
1064 int
1065 vmbus_add_child(struct vmbus_channel *chan)
1066 {
1067 	struct vmbus_softc *sc = chan->ch_vmbus;
1068 	device_t parent = sc->vmbus_dev;
1069 
1070 	bus_topo_lock();
1071 	chan->ch_dev = device_add_child(parent, NULL, -1);
1072 	if (chan->ch_dev == NULL) {
1073 		bus_topo_unlock();
1074 		device_printf(parent, "device_add_child for chan%u failed\n",
1075 		    chan->ch_id);
1076 		return (ENXIO);
1077 	}
1078 	device_set_ivars(chan->ch_dev, chan);
1079 	device_probe_and_attach(chan->ch_dev);
1080 	bus_topo_unlock();
1081 
1082 	return (0);
1083 }
1084 
1085 int
1086 vmbus_delete_child(struct vmbus_channel *chan)
1087 {
1088 	int error = 0;
1089 
1090 	bus_topo_lock();
1091 	if (chan->ch_dev != NULL) {
1092 		error = device_delete_child(chan->ch_vmbus->vmbus_dev,
1093 		    chan->ch_dev);
1094 		chan->ch_dev = NULL;
1095 	}
1096 	bus_topo_unlock();
1097 	return (error);
1098 }
1099 
1100 static int
1101 vmbus_sysctl_version(SYSCTL_HANDLER_ARGS)
1102 {
1103 	struct vmbus_softc *sc = arg1;
1104 	char verstr[16];
1105 
1106 	snprintf(verstr, sizeof(verstr), "%u.%u",
1107 	    VMBUS_VERSION_MAJOR(sc->vmbus_version),
1108 	    VMBUS_VERSION_MINOR(sc->vmbus_version));
1109 	return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
1110 }
1111 
1112 /*
1113  * We need the function to make sure the MMIO resource is allocated from the
1114  * ranges found in _CRS.
1115  *
1116  * For the release function, we can use bus_generic_release_resource().
1117  */
1118 static struct resource *
1119 vmbus_alloc_resource(device_t dev, device_t child, int type, int *rid,
1120     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
1121 {
1122 	device_t parent = device_get_parent(dev);
1123 	struct resource *res;
1124 
1125 #ifdef NEW_PCIB
1126 	if (type == SYS_RES_MEMORY) {
1127 		struct vmbus_softc *sc = device_get_softc(dev);
1128 
1129 		res = pcib_host_res_alloc(&sc->vmbus_mmio_res, child, type,
1130 		    rid, start, end, count, flags);
1131 	} else
1132 #endif
1133 	{
1134 		res = BUS_ALLOC_RESOURCE(parent, child, type, rid, start,
1135 		    end, count, flags);
1136 	}
1137 
1138 	return (res);
1139 }
1140 
1141 static int
1142 vmbus_alloc_msi(device_t bus, device_t dev, int count, int maxcount, int *irqs)
1143 {
1144 
1145 	return (PCIB_ALLOC_MSI(device_get_parent(bus), dev, count, maxcount,
1146 	    irqs));
1147 }
1148 
1149 static int
1150 vmbus_release_msi(device_t bus, device_t dev, int count, int *irqs)
1151 {
1152 
1153 	return (PCIB_RELEASE_MSI(device_get_parent(bus), dev, count, irqs));
1154 }
1155 
1156 static int
1157 vmbus_alloc_msix(device_t bus, device_t dev, int *irq)
1158 {
1159 
1160 	return (PCIB_ALLOC_MSIX(device_get_parent(bus), dev, irq));
1161 }
1162 
1163 static int
1164 vmbus_release_msix(device_t bus, device_t dev, int irq)
1165 {
1166 
1167 	return (PCIB_RELEASE_MSIX(device_get_parent(bus), dev, irq));
1168 }
1169 
1170 static int
1171 vmbus_map_msi(device_t bus, device_t dev, int irq, uint64_t *addr,
1172 	uint32_t *data)
1173 {
1174 
1175 	return (PCIB_MAP_MSI(device_get_parent(bus), dev, irq, addr, data));
1176 }
1177 
1178 static uint32_t
1179 vmbus_get_version_method(device_t bus, device_t dev)
1180 {
1181 	struct vmbus_softc *sc = device_get_softc(bus);
1182 
1183 	return sc->vmbus_version;
1184 }
1185 
1186 static int
1187 vmbus_probe_guid_method(device_t bus, device_t dev,
1188     const struct hyperv_guid *guid)
1189 {
1190 	const struct vmbus_channel *chan = vmbus_get_channel(dev);
1191 
1192 	if (memcmp(&chan->ch_guid_type, guid, sizeof(struct hyperv_guid)) == 0)
1193 		return 0;
1194 	return ENXIO;
1195 }
1196 
1197 static uint32_t
1198 vmbus_get_vcpu_id_method(device_t bus, device_t dev, int cpu)
1199 {
1200 	const struct vmbus_softc *sc = device_get_softc(bus);
1201 
1202 	return (VMBUS_PCPU_GET(sc, vcpuid, cpu));
1203 }
1204 
1205 static struct taskqueue *
1206 vmbus_get_eventtq_method(device_t bus, device_t dev __unused, int cpu)
1207 {
1208 	const struct vmbus_softc *sc = device_get_softc(bus);
1209 
1210 	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu%d", cpu));
1211 	return (VMBUS_PCPU_GET(sc, event_tq, cpu));
1212 }
1213 
1214 #ifdef NEW_PCIB
1215 #define VTPM_BASE_ADDR 0xfed40000
1216 #define FOUR_GB (1ULL << 32)
1217 
1218 enum parse_pass { parse_64, parse_32 };
1219 
1220 struct parse_context {
1221 	device_t vmbus_dev;
1222 	enum parse_pass pass;
1223 };
1224 
1225 static ACPI_STATUS
1226 parse_crs(ACPI_RESOURCE *res, void *ctx)
1227 {
1228 	const struct parse_context *pc = ctx;
1229 	device_t vmbus_dev = pc->vmbus_dev;
1230 
1231 	struct vmbus_softc *sc = device_get_softc(vmbus_dev);
1232 	UINT64 start, end;
1233 
1234 	switch (res->Type) {
1235 	case ACPI_RESOURCE_TYPE_ADDRESS32:
1236 		start = res->Data.Address32.Address.Minimum;
1237 		end = res->Data.Address32.Address.Maximum;
1238 		break;
1239 
1240 	case ACPI_RESOURCE_TYPE_ADDRESS64:
1241 		start = res->Data.Address64.Address.Minimum;
1242 		end = res->Data.Address64.Address.Maximum;
1243 		break;
1244 
1245 	default:
1246 		/* Unused types. */
1247 		return (AE_OK);
1248 	}
1249 
1250 	/*
1251 	 * We don't use <1MB addresses.
1252 	 */
1253 	if (end < 0x100000)
1254 		return (AE_OK);
1255 
1256 	/* Don't conflict with vTPM. */
1257 	if (end >= VTPM_BASE_ADDR && start < VTPM_BASE_ADDR)
1258 		end = VTPM_BASE_ADDR - 1;
1259 
1260 	if ((pc->pass == parse_32 && start < FOUR_GB) ||
1261 	    (pc->pass == parse_64 && start >= FOUR_GB))
1262 		pcib_host_res_decodes(&sc->vmbus_mmio_res, SYS_RES_MEMORY,
1263 		    start, end, 0);
1264 
1265 	return (AE_OK);
1266 }
1267 
1268 static void
1269 vmbus_get_crs(device_t dev, device_t vmbus_dev, enum parse_pass pass)
1270 {
1271 	struct parse_context pc;
1272 	ACPI_STATUS status;
1273 
1274 	if (bootverbose)
1275 		device_printf(dev, "walking _CRS, pass=%d\n", pass);
1276 
1277 	pc.vmbus_dev = vmbus_dev;
1278 	pc.pass = pass;
1279 	status = AcpiWalkResources(acpi_get_handle(dev), "_CRS",
1280 			parse_crs, &pc);
1281 
1282 	if (bootverbose && ACPI_FAILURE(status))
1283 		device_printf(dev, "_CRS: not found, pass=%d\n", pass);
1284 }
1285 
1286 static void
1287 vmbus_get_mmio_res_pass(device_t dev, enum parse_pass pass)
1288 {
1289 	device_t acpi0, parent;
1290 
1291 	parent = device_get_parent(dev);
1292 
1293 	acpi0 = device_get_parent(parent);
1294 	if (strcmp("acpi0", device_get_nameunit(acpi0)) == 0) {
1295 		device_t *children;
1296 		int count;
1297 
1298 		/*
1299 		 * Try to locate VMBUS resources and find _CRS on them.
1300 		 */
1301 		if (device_get_children(acpi0, &children, &count) == 0) {
1302 			int i;
1303 
1304 			for (i = 0; i < count; ++i) {
1305 				if (!device_is_attached(children[i]))
1306 					continue;
1307 
1308 				if (strcmp("vmbus_res",
1309 				    device_get_name(children[i])) == 0)
1310 					vmbus_get_crs(children[i], dev, pass);
1311 			}
1312 			free(children, M_TEMP);
1313 		}
1314 
1315 		/*
1316 		 * Try to find _CRS on acpi.
1317 		 */
1318 		vmbus_get_crs(acpi0, dev, pass);
1319 	} else {
1320 		device_printf(dev, "not grandchild of acpi\n");
1321 	}
1322 
1323 	/*
1324 	 * Try to find _CRS on parent.
1325 	 */
1326 	vmbus_get_crs(parent, dev, pass);
1327 }
1328 
1329 static void
1330 vmbus_get_mmio_res(device_t dev)
1331 {
1332 	struct vmbus_softc *sc = device_get_softc(dev);
1333 	/*
1334 	 * We walk the resources twice to make sure that: in the resource
1335 	 * list, the 32-bit resources appear behind the 64-bit resources.
1336 	 * NB: resource_list_add() uses INSERT_TAIL. This way, when we
1337 	 * iterate through the list to find a range for a 64-bit BAR in
1338 	 * vmbus_alloc_resource(), we can make sure we try to use >4GB
1339 	 * ranges first.
1340 	 */
1341 	pcib_host_res_init(dev, &sc->vmbus_mmio_res);
1342 
1343 	vmbus_get_mmio_res_pass(dev, parse_64);
1344 	vmbus_get_mmio_res_pass(dev, parse_32);
1345 }
1346 
1347 /*
1348  * On Gen2 VMs, Hyper-V provides mmio space for framebuffer.
1349  * This mmio address range is not useable for other PCI devices.
1350  * Currently only efifb and vbefb drivers are using this range without
1351  * reserving it from system.
1352  * Therefore, vmbus driver reserves it before any other PCI device
1353  * drivers start to request mmio addresses.
1354  */
1355 static struct resource *hv_fb_res;
1356 
1357 static void
1358 vmbus_fb_mmio_res(device_t dev)
1359 {
1360 	struct efi_fb *efifb;
1361 	struct vbe_fb *vbefb;
1362 	rman_res_t fb_start, fb_end, fb_count;
1363 	int fb_height, fb_width;
1364 	caddr_t kmdp;
1365 
1366 	struct vmbus_softc *sc = device_get_softc(dev);
1367 	int rid = 0;
1368 
1369 	kmdp = preload_search_by_type("elf kernel");
1370 	if (kmdp == NULL)
1371 		kmdp = preload_search_by_type("elf64 kernel");
1372 	efifb = (struct efi_fb *)preload_search_info(kmdp,
1373 	    MODINFO_METADATA | MODINFOMD_EFI_FB);
1374 	vbefb = (struct vbe_fb *)preload_search_info(kmdp,
1375 	    MODINFO_METADATA | MODINFOMD_VBE_FB);
1376 	if (efifb != NULL) {
1377 		fb_start = efifb->fb_addr;
1378 		fb_end = efifb->fb_addr + efifb->fb_size;
1379 		fb_count = efifb->fb_size;
1380 		fb_height = efifb->fb_height;
1381 		fb_width = efifb->fb_width;
1382 	} else if (vbefb != NULL) {
1383 		fb_start = vbefb->fb_addr;
1384 		fb_end = vbefb->fb_addr + vbefb->fb_size;
1385 		fb_count = vbefb->fb_size;
1386 		fb_height = vbefb->fb_height;
1387 		fb_width = vbefb->fb_width;
1388 	} else {
1389 		if (bootverbose)
1390 			device_printf(dev,
1391 			    "no preloaded kernel fb information\n");
1392 		/* We are on Gen1 VM, just return. */
1393 		return;
1394 	}
1395 
1396 	if (bootverbose)
1397 		device_printf(dev,
1398 		    "fb: fb_addr: %#jx, size: %#jx, "
1399 		    "actual size needed: 0x%x\n",
1400 		    fb_start, fb_count, fb_height * fb_width);
1401 
1402 	hv_fb_res = pcib_host_res_alloc(&sc->vmbus_mmio_res, dev,
1403 	    SYS_RES_MEMORY, &rid, fb_start, fb_end, fb_count,
1404 	    RF_ACTIVE | rman_make_alignment_flags(PAGE_SIZE));
1405 
1406 	if (hv_fb_res && bootverbose)
1407 		device_printf(dev,
1408 		    "successfully reserved memory for framebuffer "
1409 		    "starting at %#jx, size %#jx\n",
1410 		    fb_start, fb_count);
1411 }
1412 
1413 static void
1414 vmbus_free_mmio_res(device_t dev)
1415 {
1416 	struct vmbus_softc *sc = device_get_softc(dev);
1417 
1418 	pcib_host_res_free(dev, &sc->vmbus_mmio_res);
1419 
1420 	if (hv_fb_res)
1421 		hv_fb_res = NULL;
1422 }
1423 #endif	/* NEW_PCIB */
1424 
1425 static void
1426 vmbus_identify(driver_t *driver, device_t parent)
1427 {
1428 
1429 	if (device_get_unit(parent) != 0 || vm_guest != VM_GUEST_HV ||
1430 	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
1431 		return;
1432 	device_add_child(parent, "vmbus", -1);
1433 }
1434 
1435 static int
1436 vmbus_probe(device_t dev)
1437 {
1438 
1439 	if (device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV ||
1440 	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
1441 		return (ENXIO);
1442 
1443 	device_set_desc(dev, "Hyper-V Vmbus");
1444 	return (BUS_PROBE_DEFAULT);
1445 }
1446 
1447 /**
1448  * @brief Main vmbus driver initialization routine.
1449  *
1450  * Here, we
1451  * - initialize the vmbus driver context
1452  * - setup various driver entry points
1453  * - invoke the vmbus hv main init routine
1454  * - get the irq resource
1455  * - invoke the vmbus to add the vmbus root device
1456  * - setup the vmbus root device
1457  * - retrieve the channel offers
1458  */
1459 static int
1460 vmbus_doattach(struct vmbus_softc *sc)
1461 {
1462 	struct sysctl_oid_list *child;
1463 	struct sysctl_ctx_list *ctx;
1464 	int ret;
1465 
1466 	if (sc->vmbus_flags & VMBUS_FLAG_ATTACHED)
1467 		return (0);
1468 
1469 #ifdef NEW_PCIB
1470 	vmbus_get_mmio_res(sc->vmbus_dev);
1471 	vmbus_fb_mmio_res(sc->vmbus_dev);
1472 #endif
1473 
1474 	sc->vmbus_flags |= VMBUS_FLAG_ATTACHED;
1475 
1476 	sc->vmbus_gpadl = VMBUS_GPADL_START;
1477 	mtx_init(&sc->vmbus_prichan_lock, "vmbus prichan", NULL, MTX_DEF);
1478 	TAILQ_INIT(&sc->vmbus_prichans);
1479 	mtx_init(&sc->vmbus_chan_lock, "vmbus channel", NULL, MTX_DEF);
1480 	TAILQ_INIT(&sc->vmbus_chans);
1481 	sc->vmbus_chmap = malloc(
1482 	    sizeof(struct vmbus_channel *) * VMBUS_CHAN_MAX, M_DEVBUF,
1483 	    M_WAITOK | M_ZERO);
1484 
1485 	/*
1486 	 * Create context for "post message" Hypercalls
1487 	 */
1488 	sc->vmbus_xc = vmbus_xact_ctx_create(bus_get_dma_tag(sc->vmbus_dev),
1489 	    HYPERCALL_POSTMSGIN_SIZE, VMBUS_MSG_SIZE,
1490 	    sizeof(struct vmbus_msghc));
1491 	if (sc->vmbus_xc == NULL) {
1492 		ret = ENXIO;
1493 		goto cleanup;
1494 	}
1495 
1496 	/*
1497 	 * Allocate DMA stuffs.
1498 	 */
1499 	ret = vmbus_dma_alloc(sc);
1500 	if (ret != 0)
1501 		goto cleanup;
1502 
1503 	/*
1504 	 * Setup interrupt.
1505 	 */
1506 	ret = vmbus_intr_setup(sc);
1507 	if (ret != 0)
1508 		goto cleanup;
1509 
1510 	/*
1511 	 * Setup SynIC.
1512 	 */
1513 	if (bootverbose)
1514 		device_printf(sc->vmbus_dev, "smp_started = %d\n", smp_started);
1515 	smp_rendezvous(NULL, vmbus_synic_setup, NULL, sc);
1516 	sc->vmbus_flags |= VMBUS_FLAG_SYNIC;
1517 
1518 	/*
1519 	 * Initialize vmbus, e.g. connect to Hypervisor.
1520 	 */
1521 	ret = vmbus_init(sc);
1522 	if (ret != 0)
1523 		goto cleanup;
1524 
1525 	if (sc->vmbus_version == VMBUS_VERSION_WS2008 ||
1526 	    sc->vmbus_version == VMBUS_VERSION_WIN7)
1527 		sc->vmbus_event_proc = vmbus_event_proc_compat;
1528 	else
1529 		sc->vmbus_event_proc = vmbus_event_proc;
1530 
1531 	ret = vmbus_scan(sc);
1532 	if (ret != 0)
1533 		goto cleanup;
1534 
1535 	ctx = device_get_sysctl_ctx(sc->vmbus_dev);
1536 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->vmbus_dev));
1537 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "version",
1538 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1539 	    vmbus_sysctl_version, "A", "vmbus version");
1540 
1541 	return (ret);
1542 
1543 cleanup:
1544 	vmbus_scan_teardown(sc);
1545 	vmbus_intr_teardown(sc);
1546 	vmbus_dma_free(sc);
1547 	if (sc->vmbus_xc != NULL) {
1548 		vmbus_xact_ctx_destroy(sc->vmbus_xc);
1549 		sc->vmbus_xc = NULL;
1550 	}
1551 	free(__DEVOLATILE(void *, sc->vmbus_chmap), M_DEVBUF);
1552 	mtx_destroy(&sc->vmbus_prichan_lock);
1553 	mtx_destroy(&sc->vmbus_chan_lock);
1554 
1555 	return (ret);
1556 }
1557 
1558 static void
1559 vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused)
1560 {
1561 }
1562 
1563 #ifdef EARLY_AP_STARTUP
1564 
1565 static void
1566 vmbus_intrhook(void *xsc)
1567 {
1568 	struct vmbus_softc *sc = xsc;
1569 
1570 	if (bootverbose)
1571 		device_printf(sc->vmbus_dev, "intrhook\n");
1572 	vmbus_doattach(sc);
1573 	config_intrhook_disestablish(&sc->vmbus_intrhook);
1574 }
1575 
1576 #endif	/* EARLY_AP_STARTUP */
1577 
1578 static int
1579 vmbus_attach(device_t dev)
1580 {
1581 	vmbus_sc = device_get_softc(dev);
1582 	vmbus_sc->vmbus_dev = dev;
1583 	vmbus_sc->vmbus_idtvec = -1;
1584 
1585 	/*
1586 	 * Event processing logic will be configured:
1587 	 * - After the vmbus protocol version negotiation.
1588 	 * - Before we request channel offers.
1589 	 */
1590 	vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy;
1591 
1592 #ifdef EARLY_AP_STARTUP
1593 	/*
1594 	 * Defer the real attach until the pause(9) works as expected.
1595 	 */
1596 	vmbus_sc->vmbus_intrhook.ich_func = vmbus_intrhook;
1597 	vmbus_sc->vmbus_intrhook.ich_arg = vmbus_sc;
1598 	config_intrhook_establish(&vmbus_sc->vmbus_intrhook);
1599 #else	/* !EARLY_AP_STARTUP */
1600 	/*
1601 	 * If the system has already booted and thread
1602 	 * scheduling is possible indicated by the global
1603 	 * cold set to zero, we just call the driver
1604 	 * initialization directly.
1605 	 */
1606 	if (!cold)
1607 		vmbus_doattach(vmbus_sc);
1608 #endif	/* EARLY_AP_STARTUP */
1609 
1610 	return (0);
1611 }
1612 
1613 static int
1614 vmbus_detach(device_t dev)
1615 {
1616 	struct vmbus_softc *sc = device_get_softc(dev);
1617 
1618 	bus_generic_detach(dev);
1619 	vmbus_chan_destroy_all(sc);
1620 
1621 	vmbus_scan_teardown(sc);
1622 
1623 	vmbus_disconnect(sc);
1624 
1625 	if (sc->vmbus_flags & VMBUS_FLAG_SYNIC) {
1626 		sc->vmbus_flags &= ~VMBUS_FLAG_SYNIC;
1627 		smp_rendezvous(NULL, vmbus_synic_teardown, NULL, NULL);
1628 	}
1629 
1630 	vmbus_intr_teardown(sc);
1631 	vmbus_dma_free(sc);
1632 
1633 	if (sc->vmbus_xc != NULL) {
1634 		vmbus_xact_ctx_destroy(sc->vmbus_xc);
1635 		sc->vmbus_xc = NULL;
1636 	}
1637 
1638 	free(__DEVOLATILE(void *, sc->vmbus_chmap), M_DEVBUF);
1639 	mtx_destroy(&sc->vmbus_prichan_lock);
1640 	mtx_destroy(&sc->vmbus_chan_lock);
1641 
1642 #ifdef NEW_PCIB
1643 	vmbus_free_mmio_res(dev);
1644 #endif
1645 
1646 	return (0);
1647 }
1648 
1649 #ifndef EARLY_AP_STARTUP
1650 
1651 static void
1652 vmbus_sysinit(void *arg __unused)
1653 {
1654 	struct vmbus_softc *sc = vmbus_get_softc();
1655 
1656 	if (vm_guest != VM_GUEST_HV || sc == NULL)
1657 		return;
1658 
1659 	/*
1660 	 * If the system has already booted and thread
1661 	 * scheduling is possible, as indicated by the
1662 	 * global cold set to zero, we just call the driver
1663 	 * initialization directly.
1664 	 */
1665 	if (!cold)
1666 		vmbus_doattach(sc);
1667 }
1668 /*
1669  * NOTE:
1670  * We have to start as the last step of SI_SUB_SMP, i.e. after SMP is
1671  * initialized.
1672  */
1673 SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL);
1674 
1675 #endif	/* !EARLY_AP_STARTUP */
1676