xref: /freebsd/sys/dev/hyperv/vmbus/vmbus.c (revision e17f5b1d)
1 /*-
2  * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * VM Bus Driver Implementation
31  */
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
41 #include <sys/mutex.h>
42 #include <sys/smp.h>
43 #include <sys/sysctl.h>
44 #include <sys/systm.h>
45 #include <sys/taskqueue.h>
46 
47 #include <machine/bus.h>
48 #include <machine/intr_machdep.h>
49 #include <machine/md_var.h>
50 #include <machine/resource.h>
51 #include <x86/include/apicvar.h>
52 
53 #include <contrib/dev/acpica/include/acpi.h>
54 #include <dev/acpica/acpivar.h>
55 
56 #include <dev/hyperv/include/hyperv.h>
57 #include <dev/hyperv/include/vmbus_xact.h>
58 #include <dev/hyperv/vmbus/hyperv_reg.h>
59 #include <dev/hyperv/vmbus/hyperv_var.h>
60 #include <dev/hyperv/vmbus/vmbus_reg.h>
61 #include <dev/hyperv/vmbus/vmbus_var.h>
62 #include <dev/hyperv/vmbus/vmbus_chanvar.h>
63 
64 #include "acpi_if.h"
65 #include "pcib_if.h"
66 #include "vmbus_if.h"
67 
68 #define VMBUS_GPADL_START		0xe1e10
69 
70 struct vmbus_msghc {
71 	struct vmbus_xact		*mh_xact;
72 	struct hypercall_postmsg_in	mh_inprm_save;
73 };
74 
75 static void			vmbus_identify(driver_t *, device_t);
76 static int			vmbus_probe(device_t);
77 static int			vmbus_attach(device_t);
78 static int			vmbus_detach(device_t);
79 static int			vmbus_read_ivar(device_t, device_t, int,
80 				    uintptr_t *);
81 static int			vmbus_child_pnpinfo_str(device_t, device_t,
82 				    char *, size_t);
83 static struct resource		*vmbus_alloc_resource(device_t dev,
84 				    device_t child, int type, int *rid,
85 				    rman_res_t start, rman_res_t end,
86 				    rman_res_t count, u_int flags);
87 static int			vmbus_alloc_msi(device_t bus, device_t dev,
88 				    int count, int maxcount, int *irqs);
89 static int			vmbus_release_msi(device_t bus, device_t dev,
90 				    int count, int *irqs);
91 static int			vmbus_alloc_msix(device_t bus, device_t dev,
92 				    int *irq);
93 static int			vmbus_release_msix(device_t bus, device_t dev,
94 				    int irq);
95 static int			vmbus_map_msi(device_t bus, device_t dev,
96 				    int irq, uint64_t *addr, uint32_t *data);
97 static uint32_t			vmbus_get_version_method(device_t, device_t);
98 static int			vmbus_probe_guid_method(device_t, device_t,
99 				    const struct hyperv_guid *);
100 static uint32_t			vmbus_get_vcpu_id_method(device_t bus,
101 				    device_t dev, int cpu);
102 static struct taskqueue		*vmbus_get_eventtq_method(device_t, device_t,
103 				    int);
104 #ifdef EARLY_AP_STARTUP
105 static void			vmbus_intrhook(void *);
106 #endif
107 
108 static int			vmbus_init(struct vmbus_softc *);
109 static int			vmbus_connect(struct vmbus_softc *, uint32_t);
110 static int			vmbus_req_channels(struct vmbus_softc *sc);
111 static void			vmbus_disconnect(struct vmbus_softc *);
112 static int			vmbus_scan(struct vmbus_softc *);
113 static void			vmbus_scan_teardown(struct vmbus_softc *);
114 static void			vmbus_scan_done(struct vmbus_softc *,
115 				    const struct vmbus_message *);
116 static void			vmbus_chanmsg_handle(struct vmbus_softc *,
117 				    const struct vmbus_message *);
118 static void			vmbus_msg_task(void *, int);
119 static void			vmbus_synic_setup(void *);
120 static void			vmbus_synic_teardown(void *);
121 static int			vmbus_sysctl_version(SYSCTL_HANDLER_ARGS);
122 static int			vmbus_dma_alloc(struct vmbus_softc *);
123 static void			vmbus_dma_free(struct vmbus_softc *);
124 static int			vmbus_intr_setup(struct vmbus_softc *);
125 static void			vmbus_intr_teardown(struct vmbus_softc *);
126 static int			vmbus_doattach(struct vmbus_softc *);
127 static void			vmbus_event_proc_dummy(struct vmbus_softc *,
128 				    int);
129 
130 static struct vmbus_softc	*vmbus_sc;
131 
132 SYSCTL_NODE(_hw, OID_AUTO, vmbus, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
133     "Hyper-V vmbus");
134 
135 static int			vmbus_pin_evttask = 1;
136 SYSCTL_INT(_hw_vmbus, OID_AUTO, pin_evttask, CTLFLAG_RDTUN,
137     &vmbus_pin_evttask, 0, "Pin event tasks to their respective CPU");
138 
139 extern inthand_t IDTVEC(vmbus_isr), IDTVEC(vmbus_isr_pti);
140 
141 uint32_t			vmbus_current_version;
142 
143 static const uint32_t		vmbus_version[] = {
144 	VMBUS_VERSION_WIN10,
145 	VMBUS_VERSION_WIN8_1,
146 	VMBUS_VERSION_WIN8,
147 	VMBUS_VERSION_WIN7,
148 	VMBUS_VERSION_WS2008
149 };
150 
151 static const vmbus_chanmsg_proc_t
152 vmbus_chanmsg_handlers[VMBUS_CHANMSG_TYPE_MAX] = {
153 	VMBUS_CHANMSG_PROC(CHOFFER_DONE, vmbus_scan_done),
154 	VMBUS_CHANMSG_PROC_WAKEUP(CONNECT_RESP)
155 };
156 
157 static device_method_t vmbus_methods[] = {
158 	/* Device interface */
159 	DEVMETHOD(device_identify,		vmbus_identify),
160 	DEVMETHOD(device_probe,			vmbus_probe),
161 	DEVMETHOD(device_attach,		vmbus_attach),
162 	DEVMETHOD(device_detach,		vmbus_detach),
163 	DEVMETHOD(device_shutdown,		bus_generic_shutdown),
164 	DEVMETHOD(device_suspend,		bus_generic_suspend),
165 	DEVMETHOD(device_resume,		bus_generic_resume),
166 
167 	/* Bus interface */
168 	DEVMETHOD(bus_add_child,		bus_generic_add_child),
169 	DEVMETHOD(bus_print_child,		bus_generic_print_child),
170 	DEVMETHOD(bus_read_ivar,		vmbus_read_ivar),
171 	DEVMETHOD(bus_child_pnpinfo_str,	vmbus_child_pnpinfo_str),
172 	DEVMETHOD(bus_alloc_resource,		vmbus_alloc_resource),
173 	DEVMETHOD(bus_release_resource,		bus_generic_release_resource),
174 	DEVMETHOD(bus_activate_resource,	bus_generic_activate_resource),
175 	DEVMETHOD(bus_deactivate_resource,	bus_generic_deactivate_resource),
176 	DEVMETHOD(bus_setup_intr,		bus_generic_setup_intr),
177 	DEVMETHOD(bus_teardown_intr,		bus_generic_teardown_intr),
178 #if __FreeBSD_version >= 1100000
179 	DEVMETHOD(bus_get_cpus,			bus_generic_get_cpus),
180 #endif
181 
182 	/* pcib interface */
183 	DEVMETHOD(pcib_alloc_msi,		vmbus_alloc_msi),
184 	DEVMETHOD(pcib_release_msi,		vmbus_release_msi),
185 	DEVMETHOD(pcib_alloc_msix,		vmbus_alloc_msix),
186 	DEVMETHOD(pcib_release_msix,		vmbus_release_msix),
187 	DEVMETHOD(pcib_map_msi,			vmbus_map_msi),
188 
189 	/* Vmbus interface */
190 	DEVMETHOD(vmbus_get_version,		vmbus_get_version_method),
191 	DEVMETHOD(vmbus_probe_guid,		vmbus_probe_guid_method),
192 	DEVMETHOD(vmbus_get_vcpu_id,		vmbus_get_vcpu_id_method),
193 	DEVMETHOD(vmbus_get_event_taskq,	vmbus_get_eventtq_method),
194 
195 	DEVMETHOD_END
196 };
197 
198 static driver_t vmbus_driver = {
199 	"vmbus",
200 	vmbus_methods,
201 	sizeof(struct vmbus_softc)
202 };
203 
204 static devclass_t vmbus_devclass;
205 
206 DRIVER_MODULE(vmbus, pcib, vmbus_driver, vmbus_devclass, NULL, NULL);
207 DRIVER_MODULE(vmbus, acpi_syscontainer, vmbus_driver, vmbus_devclass,
208     NULL, NULL);
209 
210 MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
211 MODULE_DEPEND(vmbus, pci, 1, 1, 1);
212 MODULE_VERSION(vmbus, 1);
213 
214 static __inline struct vmbus_softc *
215 vmbus_get_softc(void)
216 {
217 	return vmbus_sc;
218 }
219 
220 void
221 vmbus_msghc_reset(struct vmbus_msghc *mh, size_t dsize)
222 {
223 	struct hypercall_postmsg_in *inprm;
224 
225 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
226 		panic("invalid data size %zu", dsize);
227 
228 	inprm = vmbus_xact_req_data(mh->mh_xact);
229 	memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE);
230 	inprm->hc_connid = VMBUS_CONNID_MESSAGE;
231 	inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL;
232 	inprm->hc_dsize = dsize;
233 }
234 
235 struct vmbus_msghc *
236 vmbus_msghc_get(struct vmbus_softc *sc, size_t dsize)
237 {
238 	struct vmbus_msghc *mh;
239 	struct vmbus_xact *xact;
240 
241 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
242 		panic("invalid data size %zu", dsize);
243 
244 	xact = vmbus_xact_get(sc->vmbus_xc,
245 	    dsize + __offsetof(struct hypercall_postmsg_in, hc_data[0]));
246 	if (xact == NULL)
247 		return (NULL);
248 
249 	mh = vmbus_xact_priv(xact, sizeof(*mh));
250 	mh->mh_xact = xact;
251 
252 	vmbus_msghc_reset(mh, dsize);
253 	return (mh);
254 }
255 
256 void
257 vmbus_msghc_put(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
258 {
259 
260 	vmbus_xact_put(mh->mh_xact);
261 }
262 
263 void *
264 vmbus_msghc_dataptr(struct vmbus_msghc *mh)
265 {
266 	struct hypercall_postmsg_in *inprm;
267 
268 	inprm = vmbus_xact_req_data(mh->mh_xact);
269 	return (inprm->hc_data);
270 }
271 
272 int
273 vmbus_msghc_exec_noresult(struct vmbus_msghc *mh)
274 {
275 	sbintime_t time = SBT_1MS;
276 	struct hypercall_postmsg_in *inprm;
277 	bus_addr_t inprm_paddr;
278 	int i;
279 
280 	inprm = vmbus_xact_req_data(mh->mh_xact);
281 	inprm_paddr = vmbus_xact_req_paddr(mh->mh_xact);
282 
283 	/*
284 	 * Save the input parameter so that we could restore the input
285 	 * parameter if the Hypercall failed.
286 	 *
287 	 * XXX
288 	 * Is this really necessary?!  i.e. Will the Hypercall ever
289 	 * overwrite the input parameter?
290 	 */
291 	memcpy(&mh->mh_inprm_save, inprm, HYPERCALL_POSTMSGIN_SIZE);
292 
293 	/*
294 	 * In order to cope with transient failures, e.g. insufficient
295 	 * resources on host side, we retry the post message Hypercall
296 	 * several times.  20 retries seem sufficient.
297 	 */
298 #define HC_RETRY_MAX	20
299 
300 	for (i = 0; i < HC_RETRY_MAX; ++i) {
301 		uint64_t status;
302 
303 		status = hypercall_post_message(inprm_paddr);
304 		if (status == HYPERCALL_STATUS_SUCCESS)
305 			return 0;
306 
307 		pause_sbt("hcpmsg", time, 0, C_HARDCLOCK);
308 		if (time < SBT_1S * 2)
309 			time *= 2;
310 
311 		/* Restore input parameter and try again */
312 		memcpy(inprm, &mh->mh_inprm_save, HYPERCALL_POSTMSGIN_SIZE);
313 	}
314 
315 #undef HC_RETRY_MAX
316 
317 	return EIO;
318 }
319 
320 int
321 vmbus_msghc_exec(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
322 {
323 	int error;
324 
325 	vmbus_xact_activate(mh->mh_xact);
326 	error = vmbus_msghc_exec_noresult(mh);
327 	if (error)
328 		vmbus_xact_deactivate(mh->mh_xact);
329 	return error;
330 }
331 
332 void
333 vmbus_msghc_exec_cancel(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
334 {
335 
336 	vmbus_xact_deactivate(mh->mh_xact);
337 }
338 
339 const struct vmbus_message *
340 vmbus_msghc_wait_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
341 {
342 	size_t resp_len;
343 
344 	return (vmbus_xact_wait(mh->mh_xact, &resp_len));
345 }
346 
347 const struct vmbus_message *
348 vmbus_msghc_poll_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh)
349 {
350 	size_t resp_len;
351 
352 	return (vmbus_xact_poll(mh->mh_xact, &resp_len));
353 }
354 
355 void
356 vmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg)
357 {
358 
359 	vmbus_xact_ctx_wakeup(sc->vmbus_xc, msg, sizeof(*msg));
360 }
361 
362 uint32_t
363 vmbus_gpadl_alloc(struct vmbus_softc *sc)
364 {
365 	uint32_t gpadl;
366 
367 again:
368 	gpadl = atomic_fetchadd_int(&sc->vmbus_gpadl, 1);
369 	if (gpadl == 0)
370 		goto again;
371 	return (gpadl);
372 }
373 
374 /* Used for Hyper-V socket when guest client connects to host */
375 int
376 vmbus_req_tl_connect(struct hyperv_guid *guest_srv_id,
377     struct hyperv_guid *host_srv_id)
378 {
379 	struct vmbus_softc *sc = vmbus_get_softc();
380 	struct vmbus_chanmsg_tl_connect *req;
381 	struct vmbus_msghc *mh;
382 	int error;
383 
384 	if (!sc)
385 		return ENXIO;
386 
387 	mh = vmbus_msghc_get(sc, sizeof(*req));
388 	if (mh == NULL) {
389 		device_printf(sc->vmbus_dev,
390 		    "can not get msg hypercall for tl connect\n");
391 		return ENXIO;
392 	}
393 
394 	req = vmbus_msghc_dataptr(mh);
395 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_TL_CONN;
396 	req->guest_endpoint_id = *guest_srv_id;
397 	req->host_service_id = *host_srv_id;
398 
399 	error = vmbus_msghc_exec_noresult(mh);
400 	vmbus_msghc_put(sc, mh);
401 
402 	if (error) {
403 		device_printf(sc->vmbus_dev,
404 		    "tl connect msg hypercall failed\n");
405 	}
406 
407 	return error;
408 }
409 
410 static int
411 vmbus_connect(struct vmbus_softc *sc, uint32_t version)
412 {
413 	struct vmbus_chanmsg_connect *req;
414 	const struct vmbus_message *msg;
415 	struct vmbus_msghc *mh;
416 	int error, done = 0;
417 
418 	mh = vmbus_msghc_get(sc, sizeof(*req));
419 	if (mh == NULL)
420 		return ENXIO;
421 
422 	req = vmbus_msghc_dataptr(mh);
423 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CONNECT;
424 	req->chm_ver = version;
425 	req->chm_evtflags = sc->vmbus_evtflags_dma.hv_paddr;
426 	req->chm_mnf1 = sc->vmbus_mnf1_dma.hv_paddr;
427 	req->chm_mnf2 = sc->vmbus_mnf2_dma.hv_paddr;
428 
429 	error = vmbus_msghc_exec(sc, mh);
430 	if (error) {
431 		vmbus_msghc_put(sc, mh);
432 		return error;
433 	}
434 
435 	msg = vmbus_msghc_wait_result(sc, mh);
436 	done = ((const struct vmbus_chanmsg_connect_resp *)
437 	    msg->msg_data)->chm_done;
438 
439 	vmbus_msghc_put(sc, mh);
440 
441 	return (done ? 0 : EOPNOTSUPP);
442 }
443 
444 static int
445 vmbus_init(struct vmbus_softc *sc)
446 {
447 	int i;
448 
449 	for (i = 0; i < nitems(vmbus_version); ++i) {
450 		int error;
451 
452 		error = vmbus_connect(sc, vmbus_version[i]);
453 		if (!error) {
454 			vmbus_current_version = vmbus_version[i];
455 			sc->vmbus_version = vmbus_version[i];
456 			device_printf(sc->vmbus_dev, "version %u.%u\n",
457 			    VMBUS_VERSION_MAJOR(sc->vmbus_version),
458 			    VMBUS_VERSION_MINOR(sc->vmbus_version));
459 			return 0;
460 		}
461 	}
462 	return ENXIO;
463 }
464 
465 static void
466 vmbus_disconnect(struct vmbus_softc *sc)
467 {
468 	struct vmbus_chanmsg_disconnect *req;
469 	struct vmbus_msghc *mh;
470 	int error;
471 
472 	mh = vmbus_msghc_get(sc, sizeof(*req));
473 	if (mh == NULL) {
474 		device_printf(sc->vmbus_dev,
475 		    "can not get msg hypercall for disconnect\n");
476 		return;
477 	}
478 
479 	req = vmbus_msghc_dataptr(mh);
480 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_DISCONNECT;
481 
482 	error = vmbus_msghc_exec_noresult(mh);
483 	vmbus_msghc_put(sc, mh);
484 
485 	if (error) {
486 		device_printf(sc->vmbus_dev,
487 		    "disconnect msg hypercall failed\n");
488 	}
489 }
490 
491 static int
492 vmbus_req_channels(struct vmbus_softc *sc)
493 {
494 	struct vmbus_chanmsg_chrequest *req;
495 	struct vmbus_msghc *mh;
496 	int error;
497 
498 	mh = vmbus_msghc_get(sc, sizeof(*req));
499 	if (mh == NULL)
500 		return ENXIO;
501 
502 	req = vmbus_msghc_dataptr(mh);
503 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHREQUEST;
504 
505 	error = vmbus_msghc_exec_noresult(mh);
506 	vmbus_msghc_put(sc, mh);
507 
508 	return error;
509 }
510 
511 static void
512 vmbus_scan_done_task(void *xsc, int pending __unused)
513 {
514 	struct vmbus_softc *sc = xsc;
515 
516 	mtx_lock(&Giant);
517 	sc->vmbus_scandone = true;
518 	mtx_unlock(&Giant);
519 	wakeup(&sc->vmbus_scandone);
520 }
521 
522 static void
523 vmbus_scan_done(struct vmbus_softc *sc,
524     const struct vmbus_message *msg __unused)
525 {
526 
527 	taskqueue_enqueue(sc->vmbus_devtq, &sc->vmbus_scandone_task);
528 }
529 
530 static int
531 vmbus_scan(struct vmbus_softc *sc)
532 {
533 	int error;
534 
535 	/*
536 	 * Identify, probe and attach for non-channel devices.
537 	 */
538 	bus_generic_probe(sc->vmbus_dev);
539 	bus_generic_attach(sc->vmbus_dev);
540 
541 	/*
542 	 * This taskqueue serializes vmbus devices' attach and detach
543 	 * for channel offer and rescind messages.
544 	 */
545 	sc->vmbus_devtq = taskqueue_create("vmbus dev", M_WAITOK,
546 	    taskqueue_thread_enqueue, &sc->vmbus_devtq);
547 	taskqueue_start_threads(&sc->vmbus_devtq, 1, PI_NET, "vmbusdev");
548 	TASK_INIT(&sc->vmbus_scandone_task, 0, vmbus_scan_done_task, sc);
549 
550 	/*
551 	 * This taskqueue handles sub-channel detach, so that vmbus
552 	 * device's detach running in vmbus_devtq can drain its sub-
553 	 * channels.
554 	 */
555 	sc->vmbus_subchtq = taskqueue_create("vmbus subch", M_WAITOK,
556 	    taskqueue_thread_enqueue, &sc->vmbus_subchtq);
557 	taskqueue_start_threads(&sc->vmbus_subchtq, 1, PI_NET, "vmbussch");
558 
559 	/*
560 	 * Start vmbus scanning.
561 	 */
562 	error = vmbus_req_channels(sc);
563 	if (error) {
564 		device_printf(sc->vmbus_dev, "channel request failed: %d\n",
565 		    error);
566 		return (error);
567 	}
568 
569 	/*
570 	 * Wait for all vmbus devices from the initial channel offers to be
571 	 * attached.
572 	 */
573 	GIANT_REQUIRED;
574 	while (!sc->vmbus_scandone)
575 		mtx_sleep(&sc->vmbus_scandone, &Giant, 0, "vmbusdev", 0);
576 
577 	if (bootverbose) {
578 		device_printf(sc->vmbus_dev, "device scan, probe and attach "
579 		    "done\n");
580 	}
581 	return (0);
582 }
583 
584 static void
585 vmbus_scan_teardown(struct vmbus_softc *sc)
586 {
587 
588 	GIANT_REQUIRED;
589 	if (sc->vmbus_devtq != NULL) {
590 		mtx_unlock(&Giant);
591 		taskqueue_free(sc->vmbus_devtq);
592 		mtx_lock(&Giant);
593 		sc->vmbus_devtq = NULL;
594 	}
595 	if (sc->vmbus_subchtq != NULL) {
596 		mtx_unlock(&Giant);
597 		taskqueue_free(sc->vmbus_subchtq);
598 		mtx_lock(&Giant);
599 		sc->vmbus_subchtq = NULL;
600 	}
601 }
602 
603 static void
604 vmbus_chanmsg_handle(struct vmbus_softc *sc, const struct vmbus_message *msg)
605 {
606 	vmbus_chanmsg_proc_t msg_proc;
607 	uint32_t msg_type;
608 
609 	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
610 	if (msg_type >= VMBUS_CHANMSG_TYPE_MAX) {
611 		device_printf(sc->vmbus_dev, "unknown message type 0x%x\n",
612 		    msg_type);
613 		return;
614 	}
615 
616 	msg_proc = vmbus_chanmsg_handlers[msg_type];
617 	if (msg_proc != NULL)
618 		msg_proc(sc, msg);
619 
620 	/* Channel specific processing */
621 	vmbus_chan_msgproc(sc, msg);
622 }
623 
624 static void
625 vmbus_msg_task(void *xsc, int pending __unused)
626 {
627 	struct vmbus_softc *sc = xsc;
628 	volatile struct vmbus_message *msg;
629 
630 	msg = VMBUS_PCPU_GET(sc, message, curcpu) + VMBUS_SINT_MESSAGE;
631 	for (;;) {
632 		if (msg->msg_type == HYPERV_MSGTYPE_NONE) {
633 			/* No message */
634 			break;
635 		} else if (msg->msg_type == HYPERV_MSGTYPE_CHANNEL) {
636 			/* Channel message */
637 			vmbus_chanmsg_handle(sc,
638 			    __DEVOLATILE(const struct vmbus_message *, msg));
639 		}
640 
641 		msg->msg_type = HYPERV_MSGTYPE_NONE;
642 		/*
643 		 * Make sure the write to msg_type (i.e. set to
644 		 * HYPERV_MSGTYPE_NONE) happens before we read the
645 		 * msg_flags and EOMing. Otherwise, the EOMing will
646 		 * not deliver any more messages since there is no
647 		 * empty slot
648 		 *
649 		 * NOTE:
650 		 * mb() is used here, since atomic_thread_fence_seq_cst()
651 		 * will become compiler fence on UP kernel.
652 		 */
653 		mb();
654 		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
655 			/*
656 			 * This will cause message queue rescan to possibly
657 			 * deliver another msg from the hypervisor
658 			 */
659 			wrmsr(MSR_HV_EOM, 0);
660 		}
661 	}
662 }
663 
664 static __inline int
665 vmbus_handle_intr1(struct vmbus_softc *sc, struct trapframe *frame, int cpu)
666 {
667 	volatile struct vmbus_message *msg;
668 	struct vmbus_message *msg_base;
669 
670 	msg_base = VMBUS_PCPU_GET(sc, message, cpu);
671 
672 	/*
673 	 * Check event timer.
674 	 *
675 	 * TODO: move this to independent IDT vector.
676 	 */
677 	msg = msg_base + VMBUS_SINT_TIMER;
678 	if (msg->msg_type == HYPERV_MSGTYPE_TIMER_EXPIRED) {
679 		msg->msg_type = HYPERV_MSGTYPE_NONE;
680 
681 		vmbus_et_intr(frame);
682 
683 		/*
684 		 * Make sure the write to msg_type (i.e. set to
685 		 * HYPERV_MSGTYPE_NONE) happens before we read the
686 		 * msg_flags and EOMing. Otherwise, the EOMing will
687 		 * not deliver any more messages since there is no
688 		 * empty slot
689 		 *
690 		 * NOTE:
691 		 * mb() is used here, since atomic_thread_fence_seq_cst()
692 		 * will become compiler fence on UP kernel.
693 		 */
694 		mb();
695 		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
696 			/*
697 			 * This will cause message queue rescan to possibly
698 			 * deliver another msg from the hypervisor
699 			 */
700 			wrmsr(MSR_HV_EOM, 0);
701 		}
702 	}
703 
704 	/*
705 	 * Check events.  Hot path for network and storage I/O data; high rate.
706 	 *
707 	 * NOTE:
708 	 * As recommended by the Windows guest fellows, we check events before
709 	 * checking messages.
710 	 */
711 	sc->vmbus_event_proc(sc, cpu);
712 
713 	/*
714 	 * Check messages.  Mainly management stuffs; ultra low rate.
715 	 */
716 	msg = msg_base + VMBUS_SINT_MESSAGE;
717 	if (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE)) {
718 		taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu),
719 		    VMBUS_PCPU_PTR(sc, message_task, cpu));
720 	}
721 
722 	return (FILTER_HANDLED);
723 }
724 
725 void
726 vmbus_handle_intr(struct trapframe *trap_frame)
727 {
728 	struct vmbus_softc *sc = vmbus_get_softc();
729 	int cpu = curcpu;
730 
731 	/*
732 	 * Disable preemption.
733 	 */
734 	critical_enter();
735 
736 	/*
737 	 * Do a little interrupt counting.
738 	 */
739 	(*VMBUS_PCPU_GET(sc, intr_cnt, cpu))++;
740 
741 	vmbus_handle_intr1(sc, trap_frame, cpu);
742 
743 	/*
744 	 * Enable preemption.
745 	 */
746 	critical_exit();
747 }
748 
749 static void
750 vmbus_synic_setup(void *xsc)
751 {
752 	struct vmbus_softc *sc = xsc;
753 	int cpu = curcpu;
754 	uint64_t val, orig;
755 	uint32_t sint;
756 
757 	if (hyperv_features & CPUID_HV_MSR_VP_INDEX) {
758 		/* Save virtual processor id. */
759 		VMBUS_PCPU_GET(sc, vcpuid, cpu) = rdmsr(MSR_HV_VP_INDEX);
760 	} else {
761 		/* Set virtual processor id to 0 for compatibility. */
762 		VMBUS_PCPU_GET(sc, vcpuid, cpu) = 0;
763 	}
764 
765 	/*
766 	 * Setup the SynIC message.
767 	 */
768 	orig = rdmsr(MSR_HV_SIMP);
769 	val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
770 	    ((VMBUS_PCPU_GET(sc, message_dma.hv_paddr, cpu) >> PAGE_SHIFT) <<
771 	     MSR_HV_SIMP_PGSHIFT);
772 	wrmsr(MSR_HV_SIMP, val);
773 
774 	/*
775 	 * Setup the SynIC event flags.
776 	 */
777 	orig = rdmsr(MSR_HV_SIEFP);
778 	val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
779 	    ((VMBUS_PCPU_GET(sc, event_flags_dma.hv_paddr, cpu)
780 	      >> PAGE_SHIFT) << MSR_HV_SIEFP_PGSHIFT);
781 	wrmsr(MSR_HV_SIEFP, val);
782 
783 
784 	/*
785 	 * Configure and unmask SINT for message and event flags.
786 	 */
787 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
788 	orig = rdmsr(sint);
789 	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
790 	    (orig & MSR_HV_SINT_RSVD_MASK);
791 	wrmsr(sint, val);
792 
793 	/*
794 	 * Configure and unmask SINT for timer.
795 	 */
796 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
797 	orig = rdmsr(sint);
798 	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
799 	    (orig & MSR_HV_SINT_RSVD_MASK);
800 	wrmsr(sint, val);
801 
802 	/*
803 	 * All done; enable SynIC.
804 	 */
805 	orig = rdmsr(MSR_HV_SCONTROL);
806 	val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
807 	wrmsr(MSR_HV_SCONTROL, val);
808 }
809 
810 static void
811 vmbus_synic_teardown(void *arg)
812 {
813 	uint64_t orig;
814 	uint32_t sint;
815 
816 	/*
817 	 * Disable SynIC.
818 	 */
819 	orig = rdmsr(MSR_HV_SCONTROL);
820 	wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
821 
822 	/*
823 	 * Mask message and event flags SINT.
824 	 */
825 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
826 	orig = rdmsr(sint);
827 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
828 
829 	/*
830 	 * Mask timer SINT.
831 	 */
832 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
833 	orig = rdmsr(sint);
834 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
835 
836 	/*
837 	 * Teardown SynIC message.
838 	 */
839 	orig = rdmsr(MSR_HV_SIMP);
840 	wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
841 
842 	/*
843 	 * Teardown SynIC event flags.
844 	 */
845 	orig = rdmsr(MSR_HV_SIEFP);
846 	wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
847 }
848 
849 static int
850 vmbus_dma_alloc(struct vmbus_softc *sc)
851 {
852 	bus_dma_tag_t parent_dtag;
853 	uint8_t *evtflags;
854 	int cpu;
855 
856 	parent_dtag = bus_get_dma_tag(sc->vmbus_dev);
857 	CPU_FOREACH(cpu) {
858 		void *ptr;
859 
860 		/*
861 		 * Per-cpu messages and event flags.
862 		 */
863 		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
864 		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, message_dma, cpu),
865 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
866 		if (ptr == NULL)
867 			return ENOMEM;
868 		VMBUS_PCPU_GET(sc, message, cpu) = ptr;
869 
870 		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
871 		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
872 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
873 		if (ptr == NULL)
874 			return ENOMEM;
875 		VMBUS_PCPU_GET(sc, event_flags, cpu) = ptr;
876 	}
877 
878 	evtflags = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
879 	    PAGE_SIZE, &sc->vmbus_evtflags_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
880 	if (evtflags == NULL)
881 		return ENOMEM;
882 	sc->vmbus_rx_evtflags = (u_long *)evtflags;
883 	sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2));
884 	sc->vmbus_evtflags = evtflags;
885 
886 	sc->vmbus_mnf1 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
887 	    PAGE_SIZE, &sc->vmbus_mnf1_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
888 	if (sc->vmbus_mnf1 == NULL)
889 		return ENOMEM;
890 
891 	sc->vmbus_mnf2 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
892 	    sizeof(struct vmbus_mnf), &sc->vmbus_mnf2_dma,
893 	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
894 	if (sc->vmbus_mnf2 == NULL)
895 		return ENOMEM;
896 
897 	return 0;
898 }
899 
900 static void
901 vmbus_dma_free(struct vmbus_softc *sc)
902 {
903 	int cpu;
904 
905 	if (sc->vmbus_evtflags != NULL) {
906 		hyperv_dmamem_free(&sc->vmbus_evtflags_dma, sc->vmbus_evtflags);
907 		sc->vmbus_evtflags = NULL;
908 		sc->vmbus_rx_evtflags = NULL;
909 		sc->vmbus_tx_evtflags = NULL;
910 	}
911 	if (sc->vmbus_mnf1 != NULL) {
912 		hyperv_dmamem_free(&sc->vmbus_mnf1_dma, sc->vmbus_mnf1);
913 		sc->vmbus_mnf1 = NULL;
914 	}
915 	if (sc->vmbus_mnf2 != NULL) {
916 		hyperv_dmamem_free(&sc->vmbus_mnf2_dma, sc->vmbus_mnf2);
917 		sc->vmbus_mnf2 = NULL;
918 	}
919 
920 	CPU_FOREACH(cpu) {
921 		if (VMBUS_PCPU_GET(sc, message, cpu) != NULL) {
922 			hyperv_dmamem_free(
923 			    VMBUS_PCPU_PTR(sc, message_dma, cpu),
924 			    VMBUS_PCPU_GET(sc, message, cpu));
925 			VMBUS_PCPU_GET(sc, message, cpu) = NULL;
926 		}
927 		if (VMBUS_PCPU_GET(sc, event_flags, cpu) != NULL) {
928 			hyperv_dmamem_free(
929 			    VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
930 			    VMBUS_PCPU_GET(sc, event_flags, cpu));
931 			VMBUS_PCPU_GET(sc, event_flags, cpu) = NULL;
932 		}
933 	}
934 }
935 
936 static int
937 vmbus_intr_setup(struct vmbus_softc *sc)
938 {
939 	int cpu;
940 
941 	CPU_FOREACH(cpu) {
942 		char buf[MAXCOMLEN + 1];
943 		cpuset_t cpu_mask;
944 
945 		/* Allocate an interrupt counter for Hyper-V interrupt */
946 		snprintf(buf, sizeof(buf), "cpu%d:hyperv", cpu);
947 		intrcnt_add(buf, VMBUS_PCPU_PTR(sc, intr_cnt, cpu));
948 
949 		/*
950 		 * Setup taskqueue to handle events.  Task will be per-
951 		 * channel.
952 		 */
953 		VMBUS_PCPU_GET(sc, event_tq, cpu) = taskqueue_create_fast(
954 		    "hyperv event", M_WAITOK, taskqueue_thread_enqueue,
955 		    VMBUS_PCPU_PTR(sc, event_tq, cpu));
956 		if (vmbus_pin_evttask) {
957 			CPU_SETOF(cpu, &cpu_mask);
958 			taskqueue_start_threads_cpuset(
959 			    VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET,
960 			    &cpu_mask, "hvevent%d", cpu);
961 		} else {
962 			taskqueue_start_threads(
963 			    VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET,
964 			    "hvevent%d", cpu);
965 		}
966 
967 		/*
968 		 * Setup tasks and taskqueues to handle messages.
969 		 */
970 		VMBUS_PCPU_GET(sc, message_tq, cpu) = taskqueue_create_fast(
971 		    "hyperv msg", M_WAITOK, taskqueue_thread_enqueue,
972 		    VMBUS_PCPU_PTR(sc, message_tq, cpu));
973 		CPU_SETOF(cpu, &cpu_mask);
974 		taskqueue_start_threads_cpuset(
975 		    VMBUS_PCPU_PTR(sc, message_tq, cpu), 1, PI_NET, &cpu_mask,
976 		    "hvmsg%d", cpu);
977 		TASK_INIT(VMBUS_PCPU_PTR(sc, message_task, cpu), 0,
978 		    vmbus_msg_task, sc);
979 	}
980 
981 	/*
982 	 * All Hyper-V ISR required resources are setup, now let's find a
983 	 * free IDT vector for Hyper-V ISR and set it up.
984 	 */
985 	sc->vmbus_idtvec = lapic_ipi_alloc(pti ? IDTVEC(vmbus_isr_pti) :
986 	    IDTVEC(vmbus_isr));
987 	if (sc->vmbus_idtvec < 0) {
988 		device_printf(sc->vmbus_dev, "cannot find free IDT vector\n");
989 		return ENXIO;
990 	}
991 	if (bootverbose) {
992 		device_printf(sc->vmbus_dev, "vmbus IDT vector %d\n",
993 		    sc->vmbus_idtvec);
994 	}
995 	return 0;
996 }
997 
998 static void
999 vmbus_intr_teardown(struct vmbus_softc *sc)
1000 {
1001 	int cpu;
1002 
1003 	if (sc->vmbus_idtvec >= 0) {
1004 		lapic_ipi_free(sc->vmbus_idtvec);
1005 		sc->vmbus_idtvec = -1;
1006 	}
1007 
1008 	CPU_FOREACH(cpu) {
1009 		if (VMBUS_PCPU_GET(sc, event_tq, cpu) != NULL) {
1010 			taskqueue_free(VMBUS_PCPU_GET(sc, event_tq, cpu));
1011 			VMBUS_PCPU_GET(sc, event_tq, cpu) = NULL;
1012 		}
1013 		if (VMBUS_PCPU_GET(sc, message_tq, cpu) != NULL) {
1014 			taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu),
1015 			    VMBUS_PCPU_PTR(sc, message_task, cpu));
1016 			taskqueue_free(VMBUS_PCPU_GET(sc, message_tq, cpu));
1017 			VMBUS_PCPU_GET(sc, message_tq, cpu) = NULL;
1018 		}
1019 	}
1020 }
1021 
1022 static int
1023 vmbus_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
1024 {
1025 	return (ENOENT);
1026 }
1027 
1028 static int
1029 vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen)
1030 {
1031 	const struct vmbus_channel *chan;
1032 	char guidbuf[HYPERV_GUID_STRLEN];
1033 
1034 	chan = vmbus_get_channel(child);
1035 	if (chan == NULL) {
1036 		/* Event timer device, which does not belong to a channel */
1037 		return (0);
1038 	}
1039 
1040 	strlcat(buf, "classid=", buflen);
1041 	hyperv_guid2str(&chan->ch_guid_type, guidbuf, sizeof(guidbuf));
1042 	strlcat(buf, guidbuf, buflen);
1043 
1044 	strlcat(buf, " deviceid=", buflen);
1045 	hyperv_guid2str(&chan->ch_guid_inst, guidbuf, sizeof(guidbuf));
1046 	strlcat(buf, guidbuf, buflen);
1047 
1048 	return (0);
1049 }
1050 
1051 int
1052 vmbus_add_child(struct vmbus_channel *chan)
1053 {
1054 	struct vmbus_softc *sc = chan->ch_vmbus;
1055 	device_t parent = sc->vmbus_dev;
1056 
1057 	mtx_lock(&Giant);
1058 
1059 	chan->ch_dev = device_add_child(parent, NULL, -1);
1060 	if (chan->ch_dev == NULL) {
1061 		mtx_unlock(&Giant);
1062 		device_printf(parent, "device_add_child for chan%u failed\n",
1063 		    chan->ch_id);
1064 		return (ENXIO);
1065 	}
1066 	device_set_ivars(chan->ch_dev, chan);
1067 	device_probe_and_attach(chan->ch_dev);
1068 
1069 	mtx_unlock(&Giant);
1070 	return (0);
1071 }
1072 
1073 int
1074 vmbus_delete_child(struct vmbus_channel *chan)
1075 {
1076 	int error = 0;
1077 
1078 	mtx_lock(&Giant);
1079 	if (chan->ch_dev != NULL) {
1080 		error = device_delete_child(chan->ch_vmbus->vmbus_dev,
1081 		    chan->ch_dev);
1082 		chan->ch_dev = NULL;
1083 	}
1084 	mtx_unlock(&Giant);
1085 	return (error);
1086 }
1087 
1088 static int
1089 vmbus_sysctl_version(SYSCTL_HANDLER_ARGS)
1090 {
1091 	struct vmbus_softc *sc = arg1;
1092 	char verstr[16];
1093 
1094 	snprintf(verstr, sizeof(verstr), "%u.%u",
1095 	    VMBUS_VERSION_MAJOR(sc->vmbus_version),
1096 	    VMBUS_VERSION_MINOR(sc->vmbus_version));
1097 	return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
1098 }
1099 
1100 /*
1101  * We need the function to make sure the MMIO resource is allocated from the
1102  * ranges found in _CRS.
1103  *
1104  * For the release function, we can use bus_generic_release_resource().
1105  */
1106 static struct resource *
1107 vmbus_alloc_resource(device_t dev, device_t child, int type, int *rid,
1108     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
1109 {
1110 	device_t parent = device_get_parent(dev);
1111 	struct resource *res;
1112 
1113 #ifdef NEW_PCIB
1114 	if (type == SYS_RES_MEMORY) {
1115 		struct vmbus_softc *sc = device_get_softc(dev);
1116 
1117 		res = pcib_host_res_alloc(&sc->vmbus_mmio_res, child, type,
1118 		    rid, start, end, count, flags);
1119 	} else
1120 #endif
1121 	{
1122 		res = BUS_ALLOC_RESOURCE(parent, child, type, rid, start,
1123 		    end, count, flags);
1124 	}
1125 
1126 	return (res);
1127 }
1128 
1129 static int
1130 vmbus_alloc_msi(device_t bus, device_t dev, int count, int maxcount, int *irqs)
1131 {
1132 
1133 	return (PCIB_ALLOC_MSI(device_get_parent(bus), dev, count, maxcount,
1134 	    irqs));
1135 }
1136 
1137 static int
1138 vmbus_release_msi(device_t bus, device_t dev, int count, int *irqs)
1139 {
1140 
1141 	return (PCIB_RELEASE_MSI(device_get_parent(bus), dev, count, irqs));
1142 }
1143 
1144 static int
1145 vmbus_alloc_msix(device_t bus, device_t dev, int *irq)
1146 {
1147 
1148 	return (PCIB_ALLOC_MSIX(device_get_parent(bus), dev, irq));
1149 }
1150 
1151 static int
1152 vmbus_release_msix(device_t bus, device_t dev, int irq)
1153 {
1154 
1155 	return (PCIB_RELEASE_MSIX(device_get_parent(bus), dev, irq));
1156 }
1157 
1158 static int
1159 vmbus_map_msi(device_t bus, device_t dev, int irq, uint64_t *addr,
1160 	uint32_t *data)
1161 {
1162 
1163 	return (PCIB_MAP_MSI(device_get_parent(bus), dev, irq, addr, data));
1164 }
1165 
1166 static uint32_t
1167 vmbus_get_version_method(device_t bus, device_t dev)
1168 {
1169 	struct vmbus_softc *sc = device_get_softc(bus);
1170 
1171 	return sc->vmbus_version;
1172 }
1173 
1174 static int
1175 vmbus_probe_guid_method(device_t bus, device_t dev,
1176     const struct hyperv_guid *guid)
1177 {
1178 	const struct vmbus_channel *chan = vmbus_get_channel(dev);
1179 
1180 	if (memcmp(&chan->ch_guid_type, guid, sizeof(struct hyperv_guid)) == 0)
1181 		return 0;
1182 	return ENXIO;
1183 }
1184 
1185 static uint32_t
1186 vmbus_get_vcpu_id_method(device_t bus, device_t dev, int cpu)
1187 {
1188 	const struct vmbus_softc *sc = device_get_softc(bus);
1189 
1190 	return (VMBUS_PCPU_GET(sc, vcpuid, cpu));
1191 }
1192 
1193 static struct taskqueue *
1194 vmbus_get_eventtq_method(device_t bus, device_t dev __unused, int cpu)
1195 {
1196 	const struct vmbus_softc *sc = device_get_softc(bus);
1197 
1198 	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu%d", cpu));
1199 	return (VMBUS_PCPU_GET(sc, event_tq, cpu));
1200 }
1201 
1202 #ifdef NEW_PCIB
1203 #define VTPM_BASE_ADDR 0xfed40000
1204 #define FOUR_GB (1ULL << 32)
1205 
1206 enum parse_pass { parse_64, parse_32 };
1207 
1208 struct parse_context {
1209 	device_t vmbus_dev;
1210 	enum parse_pass pass;
1211 };
1212 
1213 static ACPI_STATUS
1214 parse_crs(ACPI_RESOURCE *res, void *ctx)
1215 {
1216 	const struct parse_context *pc = ctx;
1217 	device_t vmbus_dev = pc->vmbus_dev;
1218 
1219 	struct vmbus_softc *sc = device_get_softc(vmbus_dev);
1220 	UINT64 start, end;
1221 
1222 	switch (res->Type) {
1223 	case ACPI_RESOURCE_TYPE_ADDRESS32:
1224 		start = res->Data.Address32.Address.Minimum;
1225 		end = res->Data.Address32.Address.Maximum;
1226 		break;
1227 
1228 	case ACPI_RESOURCE_TYPE_ADDRESS64:
1229 		start = res->Data.Address64.Address.Minimum;
1230 		end = res->Data.Address64.Address.Maximum;
1231 		break;
1232 
1233 	default:
1234 		/* Unused types. */
1235 		return (AE_OK);
1236 	}
1237 
1238 	/*
1239 	 * We don't use <1MB addresses.
1240 	 */
1241 	if (end < 0x100000)
1242 		return (AE_OK);
1243 
1244 	/* Don't conflict with vTPM. */
1245 	if (end >= VTPM_BASE_ADDR && start < VTPM_BASE_ADDR)
1246 		end = VTPM_BASE_ADDR - 1;
1247 
1248 	if ((pc->pass == parse_32 && start < FOUR_GB) ||
1249 	    (pc->pass == parse_64 && start >= FOUR_GB))
1250 		pcib_host_res_decodes(&sc->vmbus_mmio_res, SYS_RES_MEMORY,
1251 		    start, end, 0);
1252 
1253 	return (AE_OK);
1254 }
1255 
1256 static void
1257 vmbus_get_crs(device_t dev, device_t vmbus_dev, enum parse_pass pass)
1258 {
1259 	struct parse_context pc;
1260 	ACPI_STATUS status;
1261 
1262 	if (bootverbose)
1263 		device_printf(dev, "walking _CRS, pass=%d\n", pass);
1264 
1265 	pc.vmbus_dev = vmbus_dev;
1266 	pc.pass = pass;
1267 	status = AcpiWalkResources(acpi_get_handle(dev), "_CRS",
1268 			parse_crs, &pc);
1269 
1270 	if (bootverbose && ACPI_FAILURE(status))
1271 		device_printf(dev, "_CRS: not found, pass=%d\n", pass);
1272 }
1273 
1274 static void
1275 vmbus_get_mmio_res_pass(device_t dev, enum parse_pass pass)
1276 {
1277 	device_t acpi0, parent;
1278 
1279 	parent = device_get_parent(dev);
1280 
1281 	acpi0 = device_get_parent(parent);
1282 	if (strcmp("acpi0", device_get_nameunit(acpi0)) == 0) {
1283 		device_t *children;
1284 		int count;
1285 
1286 		/*
1287 		 * Try to locate VMBUS resources and find _CRS on them.
1288 		 */
1289 		if (device_get_children(acpi0, &children, &count) == 0) {
1290 			int i;
1291 
1292 			for (i = 0; i < count; ++i) {
1293 				if (!device_is_attached(children[i]))
1294 					continue;
1295 
1296 				if (strcmp("vmbus_res",
1297 				    device_get_name(children[i])) == 0)
1298 					vmbus_get_crs(children[i], dev, pass);
1299 			}
1300 			free(children, M_TEMP);
1301 		}
1302 
1303 		/*
1304 		 * Try to find _CRS on acpi.
1305 		 */
1306 		vmbus_get_crs(acpi0, dev, pass);
1307 	} else {
1308 		device_printf(dev, "not grandchild of acpi\n");
1309 	}
1310 
1311 	/*
1312 	 * Try to find _CRS on parent.
1313 	 */
1314 	vmbus_get_crs(parent, dev, pass);
1315 }
1316 
1317 static void
1318 vmbus_get_mmio_res(device_t dev)
1319 {
1320 	struct vmbus_softc *sc = device_get_softc(dev);
1321 	/*
1322 	 * We walk the resources twice to make sure that: in the resource
1323 	 * list, the 32-bit resources appear behind the 64-bit resources.
1324 	 * NB: resource_list_add() uses INSERT_TAIL. This way, when we
1325 	 * iterate through the list to find a range for a 64-bit BAR in
1326 	 * vmbus_alloc_resource(), we can make sure we try to use >4GB
1327 	 * ranges first.
1328 	 */
1329 	pcib_host_res_init(dev, &sc->vmbus_mmio_res);
1330 
1331 	vmbus_get_mmio_res_pass(dev, parse_64);
1332 	vmbus_get_mmio_res_pass(dev, parse_32);
1333 }
1334 
1335 static void
1336 vmbus_free_mmio_res(device_t dev)
1337 {
1338 	struct vmbus_softc *sc = device_get_softc(dev);
1339 
1340 	pcib_host_res_free(dev, &sc->vmbus_mmio_res);
1341 }
1342 #endif	/* NEW_PCIB */
1343 
1344 static void
1345 vmbus_identify(driver_t *driver, device_t parent)
1346 {
1347 
1348 	if (device_get_unit(parent) != 0 || vm_guest != VM_GUEST_HV ||
1349 	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
1350 		return;
1351 	device_add_child(parent, "vmbus", -1);
1352 }
1353 
1354 static int
1355 vmbus_probe(device_t dev)
1356 {
1357 
1358 	if (device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV ||
1359 	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
1360 		return (ENXIO);
1361 
1362 	device_set_desc(dev, "Hyper-V Vmbus");
1363 	return (BUS_PROBE_DEFAULT);
1364 }
1365 
1366 /**
1367  * @brief Main vmbus driver initialization routine.
1368  *
1369  * Here, we
1370  * - initialize the vmbus driver context
1371  * - setup various driver entry points
1372  * - invoke the vmbus hv main init routine
1373  * - get the irq resource
1374  * - invoke the vmbus to add the vmbus root device
1375  * - setup the vmbus root device
1376  * - retrieve the channel offers
1377  */
1378 static int
1379 vmbus_doattach(struct vmbus_softc *sc)
1380 {
1381 	struct sysctl_oid_list *child;
1382 	struct sysctl_ctx_list *ctx;
1383 	int ret;
1384 
1385 	if (sc->vmbus_flags & VMBUS_FLAG_ATTACHED)
1386 		return (0);
1387 
1388 #ifdef NEW_PCIB
1389 	vmbus_get_mmio_res(sc->vmbus_dev);
1390 #endif
1391 
1392 	sc->vmbus_flags |= VMBUS_FLAG_ATTACHED;
1393 
1394 	sc->vmbus_gpadl = VMBUS_GPADL_START;
1395 	mtx_init(&sc->vmbus_prichan_lock, "vmbus prichan", NULL, MTX_DEF);
1396 	TAILQ_INIT(&sc->vmbus_prichans);
1397 	mtx_init(&sc->vmbus_chan_lock, "vmbus channel", NULL, MTX_DEF);
1398 	TAILQ_INIT(&sc->vmbus_chans);
1399 	sc->vmbus_chmap = malloc(
1400 	    sizeof(struct vmbus_channel *) * VMBUS_CHAN_MAX, M_DEVBUF,
1401 	    M_WAITOK | M_ZERO);
1402 
1403 	/*
1404 	 * Create context for "post message" Hypercalls
1405 	 */
1406 	sc->vmbus_xc = vmbus_xact_ctx_create(bus_get_dma_tag(sc->vmbus_dev),
1407 	    HYPERCALL_POSTMSGIN_SIZE, VMBUS_MSG_SIZE,
1408 	    sizeof(struct vmbus_msghc));
1409 	if (sc->vmbus_xc == NULL) {
1410 		ret = ENXIO;
1411 		goto cleanup;
1412 	}
1413 
1414 	/*
1415 	 * Allocate DMA stuffs.
1416 	 */
1417 	ret = vmbus_dma_alloc(sc);
1418 	if (ret != 0)
1419 		goto cleanup;
1420 
1421 	/*
1422 	 * Setup interrupt.
1423 	 */
1424 	ret = vmbus_intr_setup(sc);
1425 	if (ret != 0)
1426 		goto cleanup;
1427 
1428 	/*
1429 	 * Setup SynIC.
1430 	 */
1431 	if (bootverbose)
1432 		device_printf(sc->vmbus_dev, "smp_started = %d\n", smp_started);
1433 	smp_rendezvous(NULL, vmbus_synic_setup, NULL, sc);
1434 	sc->vmbus_flags |= VMBUS_FLAG_SYNIC;
1435 
1436 	/*
1437 	 * Initialize vmbus, e.g. connect to Hypervisor.
1438 	 */
1439 	ret = vmbus_init(sc);
1440 	if (ret != 0)
1441 		goto cleanup;
1442 
1443 	if (sc->vmbus_version == VMBUS_VERSION_WS2008 ||
1444 	    sc->vmbus_version == VMBUS_VERSION_WIN7)
1445 		sc->vmbus_event_proc = vmbus_event_proc_compat;
1446 	else
1447 		sc->vmbus_event_proc = vmbus_event_proc;
1448 
1449 	ret = vmbus_scan(sc);
1450 	if (ret != 0)
1451 		goto cleanup;
1452 
1453 	ctx = device_get_sysctl_ctx(sc->vmbus_dev);
1454 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->vmbus_dev));
1455 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "version",
1456 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1457 	    vmbus_sysctl_version, "A", "vmbus version");
1458 
1459 	return (ret);
1460 
1461 cleanup:
1462 	vmbus_scan_teardown(sc);
1463 	vmbus_intr_teardown(sc);
1464 	vmbus_dma_free(sc);
1465 	if (sc->vmbus_xc != NULL) {
1466 		vmbus_xact_ctx_destroy(sc->vmbus_xc);
1467 		sc->vmbus_xc = NULL;
1468 	}
1469 	free(__DEVOLATILE(void *, sc->vmbus_chmap), M_DEVBUF);
1470 	mtx_destroy(&sc->vmbus_prichan_lock);
1471 	mtx_destroy(&sc->vmbus_chan_lock);
1472 
1473 	return (ret);
1474 }
1475 
1476 static void
1477 vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused)
1478 {
1479 }
1480 
1481 #ifdef EARLY_AP_STARTUP
1482 
1483 static void
1484 vmbus_intrhook(void *xsc)
1485 {
1486 	struct vmbus_softc *sc = xsc;
1487 
1488 	if (bootverbose)
1489 		device_printf(sc->vmbus_dev, "intrhook\n");
1490 	vmbus_doattach(sc);
1491 	config_intrhook_disestablish(&sc->vmbus_intrhook);
1492 }
1493 
1494 #endif	/* EARLY_AP_STARTUP */
1495 
1496 static int
1497 vmbus_attach(device_t dev)
1498 {
1499 	vmbus_sc = device_get_softc(dev);
1500 	vmbus_sc->vmbus_dev = dev;
1501 	vmbus_sc->vmbus_idtvec = -1;
1502 
1503 	/*
1504 	 * Event processing logic will be configured:
1505 	 * - After the vmbus protocol version negotiation.
1506 	 * - Before we request channel offers.
1507 	 */
1508 	vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy;
1509 
1510 #ifdef EARLY_AP_STARTUP
1511 	/*
1512 	 * Defer the real attach until the pause(9) works as expected.
1513 	 */
1514 	vmbus_sc->vmbus_intrhook.ich_func = vmbus_intrhook;
1515 	vmbus_sc->vmbus_intrhook.ich_arg = vmbus_sc;
1516 	config_intrhook_establish(&vmbus_sc->vmbus_intrhook);
1517 #else	/* !EARLY_AP_STARTUP */
1518 	/*
1519 	 * If the system has already booted and thread
1520 	 * scheduling is possible indicated by the global
1521 	 * cold set to zero, we just call the driver
1522 	 * initialization directly.
1523 	 */
1524 	if (!cold)
1525 		vmbus_doattach(vmbus_sc);
1526 #endif	/* EARLY_AP_STARTUP */
1527 
1528 	return (0);
1529 }
1530 
1531 static int
1532 vmbus_detach(device_t dev)
1533 {
1534 	struct vmbus_softc *sc = device_get_softc(dev);
1535 
1536 	bus_generic_detach(dev);
1537 	vmbus_chan_destroy_all(sc);
1538 
1539 	vmbus_scan_teardown(sc);
1540 
1541 	vmbus_disconnect(sc);
1542 
1543 	if (sc->vmbus_flags & VMBUS_FLAG_SYNIC) {
1544 		sc->vmbus_flags &= ~VMBUS_FLAG_SYNIC;
1545 		smp_rendezvous(NULL, vmbus_synic_teardown, NULL, NULL);
1546 	}
1547 
1548 	vmbus_intr_teardown(sc);
1549 	vmbus_dma_free(sc);
1550 
1551 	if (sc->vmbus_xc != NULL) {
1552 		vmbus_xact_ctx_destroy(sc->vmbus_xc);
1553 		sc->vmbus_xc = NULL;
1554 	}
1555 
1556 	free(__DEVOLATILE(void *, sc->vmbus_chmap), M_DEVBUF);
1557 	mtx_destroy(&sc->vmbus_prichan_lock);
1558 	mtx_destroy(&sc->vmbus_chan_lock);
1559 
1560 #ifdef NEW_PCIB
1561 	vmbus_free_mmio_res(dev);
1562 #endif
1563 
1564 	return (0);
1565 }
1566 
1567 #ifndef EARLY_AP_STARTUP
1568 
1569 static void
1570 vmbus_sysinit(void *arg __unused)
1571 {
1572 	struct vmbus_softc *sc = vmbus_get_softc();
1573 
1574 	if (vm_guest != VM_GUEST_HV || sc == NULL)
1575 		return;
1576 
1577 	/*
1578 	 * If the system has already booted and thread
1579 	 * scheduling is possible, as indicated by the
1580 	 * global cold set to zero, we just call the driver
1581 	 * initialization directly.
1582 	 */
1583 	if (!cold)
1584 		vmbus_doattach(sc);
1585 }
1586 /*
1587  * NOTE:
1588  * We have to start as the last step of SI_SUB_SMP, i.e. after SMP is
1589  * initialized.
1590  */
1591 SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL);
1592 
1593 #endif	/* !EARLY_AP_STARTUP */
1594