xref: /dragonfly/sys/dev/virtual/hyperv/vmbus/vmbus.c (revision 0de090e1)
1 /*-
2  * Copyright (c) 2009-2012,2016 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include "opt_acpi.h"
30 
31 #include <sys/param.h>
32 #include <sys/bus.h>
33 #include <sys/kernel.h>
34 #include <sys/module.h>
35 #include <sys/rman.h>
36 #include <sys/systimer.h>
37 #include <sys/thread.h>
38 #include <sys/thread2.h>
39 
40 #include <machine/intr_machdep.h>
41 #include <machine/smp.h>
42 
43 #include <dev/virtual/hyperv/include/hyperv_busdma.h>
44 #include <dev/virtual/hyperv/vmbus/hyperv_machdep.h>
45 #include <dev/virtual/hyperv/vmbus/hyperv_reg.h>
46 #include <dev/virtual/hyperv/vmbus/hyperv_var.h>
47 #include <dev/virtual/hyperv/vmbus/vmbus_reg.h>
48 #include <dev/virtual/hyperv/vmbus/vmbus_var.h>
49 
50 #include "acpi.h"
51 #include "acpi_if.h"
52 #include "pcib_if.h"
53 
54 #define MSR_HV_STIMER0_CFG_SINT		\
55 	((((uint64_t)VMBUS_SINT_TIMER) << MSR_HV_STIMER_CFG_SINT_SHIFT) & \
56 	 MSR_HV_STIMER_CFG_SINT_MASK)
57 
58 /*
59  * Two additionally required features:
60  * - SynIC is needed for interrupt generation.
61  * - Time reference counter is needed to set ABS reference count to
62  *   STIMER0_COUNT.
63  */
64 #define CPUID_HV_TIMER_MASK		(CPUID_HV_MSR_TIME_REFCNT |	\
65 					 CPUID_HV_MSR_SYNIC |		\
66 					 CPUID_HV_MSR_SYNTIMER)
67 
68 /*
69  * NOTE: DO NOT CHANGE THIS.
70  */
71 #define VMBUS_SINT_MESSAGE		2
72 /*
73  * NOTE:
74  * - DO NOT set it to the same value as VMBUS_SINT_MESSAGE.
75  * - DO NOT set it to 0.
76  */
77 #define VMBUS_SINT_TIMER		4
78 
79 /*
80  * NOTE: DO NOT CHANGE THESE
81  */
82 #define VMBUS_CONNID_MESSAGE		1
83 #define VMBUS_CONNID_EVENT		2
84 
85 struct vmbus_msghc {
86 	struct hypercall_postmsg_in	*mh_inprm;
87 	struct hypercall_postmsg_in	mh_inprm_save;
88 	struct hyperv_dma		mh_inprm_dma;
89 
90 	struct vmbus_message		*mh_resp;
91 	struct vmbus_message		mh_resp0;
92 };
93 
94 struct vmbus_msghc_ctx {
95 	struct vmbus_msghc		*mhc_free;
96 	struct lwkt_token		mhc_free_token;
97 	uint32_t			mhc_flags;
98 
99 	struct vmbus_msghc		*mhc_active;
100 	struct lwkt_token		mhc_active_token;
101 };
102 
103 #define VMBUS_MSGHC_CTXF_DESTROY	0x0001
104 
105 static int			vmbus_probe(device_t);
106 static int			vmbus_attach(device_t);
107 static int			vmbus_detach(device_t);
108 static void			vmbus_intr(void *);
109 static void			vmbus_timer_intr_reload(struct cputimer_intr *,
110 				    sysclock_t);
111 static void			vmbus_timer_intr_pcpuhand(
112 				    struct cputimer_intr *);
113 static void			vmbus_timer_intr_restart(
114 				    struct cputimer_intr *);
115 
116 static int			vmbus_dma_alloc(struct vmbus_softc *);
117 static void			vmbus_dma_free(struct vmbus_softc *);
118 static int			vmbus_intr_setup(struct vmbus_softc *);
119 static void			vmbus_intr_teardown(struct vmbus_softc *);
120 static void			vmbus_synic_setup(void *);
121 static void			vmbus_synic_teardown(void *);
122 static void			vmbus_timer_stop(void *);
123 static void			vmbus_timer_config(void *);
124 static int			vmbus_init(struct vmbus_softc *);
125 static int			vmbus_init_contact(struct vmbus_softc *,
126 				    uint32_t);
127 static void			vmbus_timer_restart(void *);
128 static void			vmbus_timer_msgintr(struct vmbus_pcpu_data *);
129 
130 static void			vmbus_chan_msgproc(struct vmbus_softc *,
131 				    const struct vmbus_message *);
132 
133 static struct vmbus_msghc_ctx	*vmbus_msghc_ctx_create(bus_dma_tag_t);
134 static void			vmbus_msghc_ctx_destroy(
135 				    struct vmbus_msghc_ctx *);
136 static void			vmbus_msghc_ctx_free(struct vmbus_msghc_ctx *);
137 static struct vmbus_msghc	*vmbus_msghc_alloc(bus_dma_tag_t);
138 static void			vmbus_msghc_free(struct vmbus_msghc *);
139 static struct vmbus_msghc	*vmbus_msghc_get1(struct vmbus_msghc_ctx *,
140 				    uint32_t);
141 
142 static device_method_t vmbus_methods[] = {
143 	/* Device interface */
144 	DEVMETHOD(device_probe,			vmbus_probe),
145 	DEVMETHOD(device_attach,		vmbus_attach),
146 	DEVMETHOD(device_detach,		vmbus_detach),
147 	DEVMETHOD(device_shutdown,		bus_generic_shutdown),
148 	DEVMETHOD(device_suspend,		bus_generic_suspend),
149 	DEVMETHOD(device_resume,		bus_generic_resume),
150 
151 	DEVMETHOD_END
152 };
153 
154 static driver_t vmbus_driver = {
155 	"vmbus",
156 	vmbus_methods,
157 	sizeof(struct vmbus_softc)
158 };
159 
160 static devclass_t vmbus_devclass;
161 
162 DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, NULL, NULL);
163 MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
164 MODULE_VERSION(vmbus, 1);
165 
166 static struct cputimer_intr vmbus_cputimer_intr = {
167 	.freq = HYPERV_TIMER_FREQ,
168 	.reload = vmbus_timer_intr_reload,
169 	.enable = cputimer_intr_default_enable,
170 	.config = cputimer_intr_default_config,
171 	.restart = vmbus_timer_intr_restart,
172 	.pmfixup = cputimer_intr_default_pmfixup,
173 	.initclock = cputimer_intr_default_initclock,
174 	.pcpuhand = vmbus_timer_intr_pcpuhand,
175 	.next = SLIST_ENTRY_INITIALIZER,
176 	.name = "hyperv",
177 	.type = CPUTIMER_INTR_VMM,
178 	.prio = CPUTIMER_INTR_PRIO_VMM,
179 	.caps = CPUTIMER_INTR_CAP_PS,
180 	.priv = NULL
181 };
182 
183 static const uint32_t	vmbus_version[] = {
184 	VMBUS_VERSION_WIN8_1,
185 	VMBUS_VERSION_WIN8,
186 	VMBUS_VERSION_WIN7,
187 	VMBUS_VERSION_WS2008
188 };
189 
190 static int		vmbus_timer_intr_enable = 1;
191 TUNABLE_INT("hw.vmbus.timer_intr.enable", &vmbus_timer_intr_enable);
192 
193 static int
194 vmbus_probe(device_t dev)
195 {
196 	char *id[] = { "VMBUS", NULL };
197 
198 	if (ACPI_ID_PROBE(device_get_parent(dev), dev, id) == NULL ||
199 	    device_get_unit(dev) != 0 || vmm_guest != VMM_GUEST_HYPERV ||
200 	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
201 		return (ENXIO);
202 
203 	device_set_desc(dev, "Hyper-V vmbus");
204 
205 	return (0);
206 }
207 
208 static int
209 vmbus_attach(device_t dev)
210 {
211 	struct vmbus_softc *sc = device_get_softc(dev);
212 	int error, cpu, use_timer;
213 
214 	/*
215 	 * Basic setup.
216 	 */
217 	sc->vmbus_dev = dev;
218 	for (cpu = 0; cpu < ncpus; ++cpu) {
219 		struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, cpu);
220 
221 		psc->sc = sc;
222 		psc->cpuid = cpu;
223 		psc->timer_last = UINT64_MAX;
224 	}
225 
226 	/*
227 	 * Should we use interrupt timer?
228 	 */
229 	use_timer = 0;
230 	if (device_get_unit(dev) == 0 &&
231 	    (hyperv_features & CPUID_HV_TIMER_MASK) == CPUID_HV_TIMER_MASK)
232 		use_timer = 1;
233 
234 	/*
235 	 * Create context for "post message" Hypercalls
236 	 */
237 	sc->vmbus_msg_hc = vmbus_msghc_ctx_create(
238 	    bus_get_dma_tag(sc->vmbus_dev));
239 	if (sc->vmbus_msg_hc == NULL)
240 		return ENXIO;
241 
242 	/*
243 	 * Allocate DMA stuffs.
244 	 */
245 	error = vmbus_dma_alloc(sc);
246 	if (error)
247 		goto failed;
248 
249 	/*
250 	 * Setup interrupt.
251 	 */
252 	error = vmbus_intr_setup(sc);
253 	if (error)
254 		goto failed;
255 
256 	if (use_timer) {
257 		/*
258 		 * Make sure that interrupt timer is stopped.
259 		 */
260 		lwkt_cpusync_simple(smp_active_mask, vmbus_timer_stop, sc);
261 	}
262 
263 	/*
264 	 * Setup SynIC.
265 	 */
266 	lwkt_cpusync_simple(smp_active_mask, vmbus_synic_setup, sc);
267 	sc->vmbus_flags |= VMBUS_FLAG_SYNIC;
268 
269 	/*
270 	 * Initialize vmbus.
271 	 */
272 	error = vmbus_init(sc);
273 	if (error)
274 		goto failed;
275 
276 	if (use_timer) {
277 		/*
278 		 * Configure and register vmbus interrupt timer.
279 		 */
280 		lwkt_cpusync_simple(smp_active_mask, vmbus_timer_config, sc);
281 		vmbus_cputimer_intr.priv = sc;
282 		cputimer_intr_register(&vmbus_cputimer_intr);
283 		if (vmbus_timer_intr_enable)
284 			cputimer_intr_select(&vmbus_cputimer_intr, 0);
285 	}
286 
287 	return 0;
288 failed:
289 	vmbus_detach(dev);
290 	return error;
291 }
292 
293 static int
294 vmbus_detach(device_t dev)
295 {
296 	struct vmbus_softc *sc = device_get_softc(dev);
297 
298 	/* TODO: uninitialize vmbus. */
299 	/* TODO: stop and deregister timer */
300 
301 	if (sc->vmbus_flags & VMBUS_FLAG_SYNIC)
302 		lwkt_cpusync_simple(smp_active_mask, vmbus_synic_teardown, sc);
303 	vmbus_intr_teardown(sc);
304 	vmbus_dma_free(sc);
305 
306 	if (sc->vmbus_msg_hc != NULL) {
307 		vmbus_msghc_ctx_destroy(sc->vmbus_msg_hc);
308 		sc->vmbus_msg_hc = NULL;
309 	}
310 	return (0);
311 }
312 
313 static __inline void
314 vmbus_msg_reset(volatile struct vmbus_message *msg)
315 {
316 	msg->msg_type = HYPERV_MSGTYPE_NONE;
317 	/*
318 	 * Make sure that the write to msg_type (i.e. set to
319 	 * HYPERV_MSGTYPE_NONE) happens before we read the
320 	 * msg_flags and send EOM to the hypervisor.
321 	 */
322 	cpu_mfence();
323 	if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
324 		/*
325 		 * Ask the hypervisor to rescan message queue,
326 		 * and deliver new message if any.
327 		 */
328 		wrmsr(MSR_HV_EOM, 0);
329 	}
330 }
331 
332 static void
333 vmbus_intr(void *xpsc)
334 {
335 	struct vmbus_pcpu_data *psc = xpsc;
336 	volatile struct vmbus_message *msg;
337 
338 	msg = psc->message + VMBUS_SINT_MESSAGE;
339 	while (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE)) {
340 		if (msg->msg_type == HYPERV_MSGTYPE_CHANNEL) {
341 			/* Channel message */
342 			vmbus_chan_msgproc(psc->sc,
343 			    __DEVOLATILE(const struct vmbus_message *, msg));
344 		}
345 		vmbus_msg_reset(msg);
346 	}
347 }
348 
349 static __inline void
350 vmbus_timer_oneshot(struct vmbus_pcpu_data *psc, uint64_t current)
351 {
352 	psc->timer_last = current;
353 	wrmsr(MSR_HV_STIMER0_COUNT, current);
354 }
355 
356 static void
357 vmbus_timer_intr_reload(struct cputimer_intr *cti, sysclock_t reload)
358 {
359 	struct globaldata *gd = mycpu;
360 	struct vmbus_softc *sc = cti->priv;
361 	struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, gd->gd_cpuid);
362 	uint64_t current;
363 
364 	reload = (uint64_t)reload * cti->freq / sys_cputimer->freq;
365 	current = rdmsr(MSR_HV_TIME_REF_COUNT) + reload;
366 
367 	if (gd->gd_timer_running) {
368 		if (current < psc->timer_last)
369 			vmbus_timer_oneshot(psc, current);
370 	} else {
371 		gd->gd_timer_running = 1;
372 		vmbus_timer_oneshot(psc, current);
373 	}
374 }
375 
376 static void
377 vmbus_timer_intr_pcpuhand(struct cputimer_intr *cti)
378 {
379 	struct vmbus_softc *sc = cti->priv;
380 	struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, mycpuid);
381 
382 	vmbus_timer_msgintr(psc);
383 }
384 
385 static void
386 vmbus_timer_intr_restart(struct cputimer_intr *cti)
387 {
388 	lwkt_send_ipiq_mask(smp_active_mask, vmbus_timer_restart, cti->priv);
389 }
390 
391 static struct vmbus_msghc *
392 vmbus_msghc_alloc(bus_dma_tag_t parent_dtag)
393 {
394 	struct vmbus_msghc *mh;
395 
396 	mh = kmalloc(sizeof(*mh), M_DEVBUF, M_WAITOK | M_ZERO);
397 
398 	mh->mh_inprm = hyperv_dmamem_alloc(parent_dtag,
399 	    HYPERCALL_POSTMSGIN_ALIGN, 0, HYPERCALL_POSTMSGIN_SIZE,
400 	    &mh->mh_inprm_dma, BUS_DMA_WAITOK);
401 	if (mh->mh_inprm == NULL) {
402 		kfree(mh, M_DEVBUF);
403 		return NULL;
404 	}
405 	return mh;
406 }
407 
408 static void
409 vmbus_msghc_free(struct vmbus_msghc *mh)
410 {
411 	hyperv_dmamem_free(&mh->mh_inprm_dma, mh->mh_inprm);
412 	kfree(mh, M_DEVBUF);
413 }
414 
415 static void
416 vmbus_msghc_ctx_free(struct vmbus_msghc_ctx *mhc)
417 {
418 	KASSERT(mhc->mhc_active == NULL, ("still have active msg hypercall"));
419 	KASSERT(mhc->mhc_free == NULL, ("still have hypercall msg"));
420 
421 	lwkt_token_uninit(&mhc->mhc_free_token);
422 	lwkt_token_uninit(&mhc->mhc_active_token);
423 	kfree(mhc, M_DEVBUF);
424 }
425 
426 static struct vmbus_msghc_ctx *
427 vmbus_msghc_ctx_create(bus_dma_tag_t parent_dtag)
428 {
429 	struct vmbus_msghc_ctx *mhc;
430 
431 	mhc = kmalloc(sizeof(*mhc), M_DEVBUF, M_WAITOK | M_ZERO);
432 	lwkt_token_init(&mhc->mhc_free_token, "msghcf");
433 	lwkt_token_init(&mhc->mhc_active_token, "msghca");
434 
435 	mhc->mhc_free = vmbus_msghc_alloc(parent_dtag);
436 	if (mhc->mhc_free == NULL) {
437 		vmbus_msghc_ctx_free(mhc);
438 		return NULL;
439 	}
440 	return mhc;
441 }
442 
443 static struct vmbus_msghc *
444 vmbus_msghc_get1(struct vmbus_msghc_ctx *mhc, uint32_t dtor_flag)
445 {
446 	struct vmbus_msghc *mh;
447 
448 	lwkt_gettoken(&mhc->mhc_free_token);
449 
450 	while ((mhc->mhc_flags & dtor_flag) == 0 && mhc->mhc_free == NULL)
451 		tsleep(&mhc->mhc_free, 0, "gmsghc", 0);
452 	if (mhc->mhc_flags & dtor_flag) {
453 		/* Being destroyed */
454 		mh = NULL;
455 	} else {
456 		mh = mhc->mhc_free;
457 		KASSERT(mh != NULL, ("no free hypercall msg"));
458 		KASSERT(mh->mh_resp == NULL,
459 		    ("hypercall msg has pending response"));
460 		mhc->mhc_free = NULL;
461 	}
462 
463 	lwkt_reltoken(&mhc->mhc_free_token);
464 
465 	return mh;
466 }
467 
468 struct vmbus_msghc *
469 vmbus_msghc_get(struct vmbus_softc *sc, size_t dsize)
470 {
471 	struct hypercall_postmsg_in *inprm;
472 	struct vmbus_msghc *mh;
473 
474 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
475 		return NULL;
476 
477 	mh = vmbus_msghc_get1(sc->vmbus_msg_hc, VMBUS_MSGHC_CTXF_DESTROY);
478 	if (mh == NULL)
479 		return NULL;
480 
481 	inprm = mh->mh_inprm;
482 	memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE);
483 	inprm->hc_connid = VMBUS_CONNID_MESSAGE;
484 	inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL;
485 	inprm->hc_dsize = dsize;
486 
487 	return mh;
488 }
489 
490 void
491 vmbus_msghc_put(struct vmbus_softc *sc, struct vmbus_msghc *mh)
492 {
493 	struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc;
494 
495 	KASSERT(mhc->mhc_active == NULL, ("msg hypercall is active"));
496 	mh->mh_resp = NULL;
497 
498 	lwkt_gettoken(&mhc->mhc_free_token);
499 	KASSERT(mhc->mhc_free == NULL, ("has free hypercall msg"));
500 	mhc->mhc_free = mh;
501 	lwkt_reltoken(&mhc->mhc_free_token);
502 	wakeup(&mhc->mhc_free);
503 }
504 
505 void *
506 vmbus_msghc_dataptr(struct vmbus_msghc *mh)
507 {
508 	return mh->mh_inprm->hc_data;
509 }
510 
511 static void
512 vmbus_msghc_ctx_destroy(struct vmbus_msghc_ctx *mhc)
513 {
514 	struct vmbus_msghc *mh;
515 
516 	lwkt_gettoken(&mhc->mhc_free_token);
517 	mhc->mhc_flags |= VMBUS_MSGHC_CTXF_DESTROY;
518 	lwkt_reltoken(&mhc->mhc_free_token);
519 	wakeup(&mhc->mhc_free);
520 
521 	mh = vmbus_msghc_get1(mhc, 0);
522 	if (mh == NULL)
523 		panic("can't get msghc");
524 
525 	vmbus_msghc_free(mh);
526 	vmbus_msghc_ctx_free(mhc);
527 }
528 
529 int
530 vmbus_msghc_exec_noresult(struct vmbus_msghc *mh)
531 {
532 	int i, wait_ticks = 1;
533 
534 	/*
535 	 * Save the input parameter so that we could restore the input
536 	 * parameter if the Hypercall failed.
537 	 *
538 	 * XXX
539 	 * Is this really necessary?!  i.e. Will the Hypercall ever
540 	 * overwrite the input parameter?
541 	 */
542 	memcpy(&mh->mh_inprm_save, mh->mh_inprm, HYPERCALL_POSTMSGIN_SIZE);
543 
544 	/*
545 	 * In order to cope with transient failures, e.g. insufficient
546 	 * resources on host side, we retry the post message Hypercall
547 	 * several times.  20 retries seem sufficient.
548 	 */
549 #define HC_RETRY_MAX	20
550 
551 	for (i = 0; i < HC_RETRY_MAX; ++i) {
552 		uint64_t status;
553 
554 		status = hypercall_post_message(mh->mh_inprm_dma.hv_paddr);
555 		if (status == HYPERCALL_STATUS_SUCCESS)
556 			return 0;
557 
558 		tsleep(&status, 0, "hcpmsg", wait_ticks);
559 		if (wait_ticks < hz)
560 			wait_ticks *= 2;
561 
562 		/* Restore input parameter and try again */
563 		memcpy(mh->mh_inprm, &mh->mh_inprm_save,
564 		    HYPERCALL_POSTMSGIN_SIZE);
565 	}
566 
567 #undef HC_RETRY_MAX
568 
569 	return EIO;
570 }
571 
572 int
573 vmbus_msghc_exec(struct vmbus_softc *sc, struct vmbus_msghc *mh)
574 {
575 	struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc;
576 	int error;
577 
578 	KASSERT(mh->mh_resp == NULL, ("hypercall msg has pending response"));
579 
580 	lwkt_gettoken(&mhc->mhc_active_token);
581 	KASSERT(mhc->mhc_active == NULL, ("pending active msg hypercall"));
582 	mhc->mhc_active = mh;
583 	lwkt_reltoken(&mhc->mhc_active_token);
584 
585 	error = vmbus_msghc_exec_noresult(mh);
586 	if (error) {
587 		lwkt_gettoken(&mhc->mhc_active_token);
588 		KASSERT(mhc->mhc_active == mh, ("msghc mismatch"));
589 		mhc->mhc_active = NULL;
590 		lwkt_reltoken(&mhc->mhc_active_token);
591 	}
592 	return error;
593 }
594 
595 const struct vmbus_message *
596 vmbus_msghc_wait_result(struct vmbus_softc *sc, struct vmbus_msghc *mh)
597 {
598 	struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc;
599 
600 	lwkt_gettoken(&mhc->mhc_active_token);
601 
602 	KASSERT(mhc->mhc_active == mh, ("msghc mismatch"));
603 	while (mh->mh_resp == NULL)
604 		tsleep(&mhc->mhc_active, 0, "wmsghc", 0);
605 	mhc->mhc_active = NULL;
606 
607 	lwkt_reltoken(&mhc->mhc_active_token);
608 
609 	return mh->mh_resp;
610 }
611 
612 void
613 vmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg)
614 {
615 	struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc;
616 	struct vmbus_msghc *mh;
617 
618 	lwkt_gettoken(&mhc->mhc_active_token);
619 
620 	mh = mhc->mhc_active;
621 	KASSERT(mh != NULL, ("no pending msg hypercall"));
622 	memcpy(&mh->mh_resp0, msg, sizeof(mh->mh_resp0));
623 	mh->mh_resp = &mh->mh_resp0;
624 
625 	lwkt_reltoken(&mhc->mhc_active_token);
626 	wakeup(&mhc->mhc_active);
627 }
628 
629 static int
630 vmbus_dma_alloc(struct vmbus_softc *sc)
631 {
632 	bus_dma_tag_t parent_dtag;
633 	uint8_t *evtflags;
634 	int cpu;
635 
636 	parent_dtag = bus_get_dma_tag(sc->vmbus_dev);
637 	for (cpu = 0; cpu < ncpus; ++cpu) {
638 		struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, cpu);
639 
640 		/*
641 		 * Per-cpu messages and event flags.
642 		 */
643 		psc->message = hyperv_dmamem_alloc(parent_dtag,
644 		    PAGE_SIZE, 0, PAGE_SIZE, &psc->message_dma,
645 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
646 		if (psc->message == NULL)
647 			return ENOMEM;
648 
649 		psc->event_flags = hyperv_dmamem_alloc(parent_dtag,
650 		    PAGE_SIZE, 0, PAGE_SIZE, &psc->event_flags_dma,
651 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
652 		if (psc->event_flags == NULL)
653 			return ENOMEM;
654 	}
655 
656 	evtflags = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
657 	    PAGE_SIZE, &sc->vmbus_evtflags_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
658 	if (evtflags == NULL)
659 		return ENOMEM;
660 	sc->vmbus_rx_evtflags = (u_long *)evtflags;
661 	sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2));
662 	sc->vmbus_evtflags = evtflags;
663 
664 	sc->vmbus_mnf1 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
665 	    PAGE_SIZE, &sc->vmbus_mnf1_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
666 	if (sc->vmbus_mnf1 == NULL)
667 		return ENOMEM;
668 
669 	sc->vmbus_mnf2 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
670 	    PAGE_SIZE, &sc->vmbus_mnf2_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
671 	if (sc->vmbus_mnf2 == NULL)
672 		return ENOMEM;
673 
674 	return 0;
675 }
676 
677 static void
678 vmbus_dma_free(struct vmbus_softc *sc)
679 {
680 	int cpu;
681 
682 	if (sc->vmbus_evtflags != NULL) {
683 		hyperv_dmamem_free(&sc->vmbus_evtflags_dma, sc->vmbus_evtflags);
684 		sc->vmbus_evtflags = NULL;
685 		sc->vmbus_rx_evtflags = NULL;
686 		sc->vmbus_tx_evtflags = NULL;
687 	}
688 	if (sc->vmbus_mnf1 != NULL) {
689 		hyperv_dmamem_free(&sc->vmbus_mnf1_dma, sc->vmbus_mnf1);
690 		sc->vmbus_mnf1 = NULL;
691 	}
692 	if (sc->vmbus_mnf2 != NULL) {
693 		hyperv_dmamem_free(&sc->vmbus_mnf2_dma, sc->vmbus_mnf2);
694 		sc->vmbus_mnf2 = NULL;
695 	}
696 
697 	for (cpu = 0; cpu < ncpus; ++cpu) {
698 		struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, cpu);
699 
700 		if (psc->message != NULL) {
701 			hyperv_dmamem_free(&psc->message_dma, psc->message);
702 			psc->message = NULL;
703 		}
704 		if (psc->event_flags != NULL) {
705 			hyperv_dmamem_free(&psc->event_flags_dma,
706 			    psc->event_flags);
707 			psc->event_flags = NULL;
708 		}
709 	}
710 }
711 
712 static int
713 vmbus_intr_setup(struct vmbus_softc *sc)
714 {
715 	device_t dev = sc->vmbus_dev;
716 	device_t bus = device_get_parent(device_get_parent(dev));
717 	int rid, cpu;
718 
719 	rid = 0;
720 	for (cpu = 0; cpu < ncpus; ++cpu) {
721 		struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, cpu);
722 		uint64_t msi_addr;
723 		uint32_t msi_data;
724 		int error;
725 
726 		error = PCIB_ALLOC_MSIX(bus, dev, &psc->intr_irq, cpu);
727 		if (error) {
728 			device_printf(dev, "alloc vector on cpu%d failed: %d\n",
729 			    cpu, error);
730 			return ENXIO;
731 		}
732 		psc->intr_rid = ++rid;
733 
734 		psc->intr_res = BUS_ALLOC_RESOURCE(bus, dev, SYS_RES_IRQ,
735 		    &psc->intr_rid, psc->intr_irq, psc->intr_irq, 1,
736 		    RF_ACTIVE, cpu);
737 		if (psc->intr_res == NULL) {
738 			device_printf(dev, "alloc irq on cpu%d failed: %d\n",
739 			    cpu, error);
740 			return ENXIO;
741 		}
742 
743 		error = PCIB_MAP_MSI(bus, dev, rman_get_start(psc->intr_res),
744 		    &msi_addr, &msi_data, cpu);
745 		if (error) {
746 			device_printf(dev, "map irq on cpu%d failed: %d\n",
747 			    cpu, error);
748 			return ENXIO;
749 		}
750 		psc->intr_vec = hyperv_msi2vector(msi_addr, msi_data);
751 
752 		if (bootverbose) {
753 			device_printf(dev, "vector %d irq %d on cpu%d\n",
754 			    psc->intr_vec, psc->intr_irq, cpu);
755 		}
756 
757 		ksnprintf(psc->intr_desc, sizeof(psc->intr_desc), "%s cpu%d",
758 		    device_get_nameunit(dev), cpu);
759 		error = bus_setup_intr_descr(dev, psc->intr_res, INTR_MPSAFE,
760 		    vmbus_intr, psc, &psc->intr_hand, NULL, psc->intr_desc);
761 		if (error) {
762 			device_printf(dev, "setup intr on cpu%d failed: %d\n",
763 			    cpu, error);
764 			return ENXIO;
765 		}
766 	}
767 	return 0;
768 }
769 
770 static void
771 vmbus_intr_teardown(struct vmbus_softc *sc)
772 {
773 	device_t dev = sc->vmbus_dev;
774 	device_t bus = device_get_parent(device_get_parent(dev));
775 	int cpu;
776 
777 	for (cpu = 0; cpu < ncpus; ++cpu) {
778 		struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, cpu);
779 
780 		if (psc->intr_hand != NULL) {
781 			bus_teardown_intr(dev, psc->intr_res, psc->intr_hand);
782 			psc->intr_hand = NULL;
783 		}
784 
785 		if (psc->intr_res != NULL) {
786 			BUS_RELEASE_RESOURCE(bus, dev, SYS_RES_IRQ,
787 			    psc->intr_rid, psc->intr_res);
788 			psc->intr_res = NULL;
789 		}
790 
791 		if (psc->intr_rid != 0) {
792 			PCIB_RELEASE_MSIX(bus, dev, psc->intr_irq, psc->cpuid);
793 			psc->intr_rid = 0;
794 		}
795 	}
796 }
797 
798 static void
799 vmbus_synic_setup(void *xsc)
800 {
801 	struct vmbus_softc *sc = xsc;
802 	struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, mycpuid);
803 	uint64_t val, orig;
804 	uint32_t sint;
805 
806 	if (hyperv_features & CPUID_HV_MSR_VP_INDEX) {
807 		/*
808 		 * Save virtual processor id.
809 		 */
810 		psc->vcpuid = rdmsr(MSR_HV_VP_INDEX);
811 	} else {
812 		/*
813 		 * XXX
814 		 * Virtual processoor id is only used by a pretty broken
815 		 * channel selection code from storvsc.  It's nothing
816 		 * critical even if CPUID_HV_MSR_VP_INDEX is not set; keep
817 		 * moving on.
818 		 */
819 		psc->vcpuid = mycpuid;
820 	}
821 
822 	/*
823 	 * Setup the SynIC message.
824 	 */
825 	orig = rdmsr(MSR_HV_SIMP);
826 	val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
827 	    ((psc->message_dma.hv_paddr >> PAGE_SHIFT) << MSR_HV_SIMP_PGSHIFT);
828 	wrmsr(MSR_HV_SIMP, val);
829 
830 	/*
831 	 * Setup the SynIC event flags.
832 	 */
833 	orig = rdmsr(MSR_HV_SIEFP);
834 	val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
835 	    ((psc->event_flags_dma.hv_paddr >> PAGE_SHIFT) <<
836 	     MSR_HV_SIEFP_PGSHIFT);
837 	wrmsr(MSR_HV_SIEFP, val);
838 
839 
840 	/*
841 	 * Configure and unmask SINT for message and event flags.
842 	 */
843 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
844 	orig = rdmsr(sint);
845 	val = psc->intr_vec | /* MSR_HV_SINT_AUTOEOI | notyet */
846 	    (orig & MSR_HV_SINT_RSVD_MASK);
847 	wrmsr(sint, val);
848 
849 	/*
850 	 * Configure and unmask SINT for timer.
851 	 */
852 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
853 	orig = rdmsr(sint);
854 	val = XTIMER_OFFSET | /* MSR_HV_SINT_AUTOEOI | notyet */
855 	    (orig & MSR_HV_SINT_RSVD_MASK);
856 	wrmsr(sint, val);
857 
858 	/*
859 	 * All done; enable SynIC.
860 	 */
861 	orig = rdmsr(MSR_HV_SCONTROL);
862 	val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
863 	wrmsr(MSR_HV_SCONTROL, val);
864 }
865 
866 static void
867 vmbus_timer_stop(void *arg __unused)
868 {
869 	for (;;) {
870 		uint64_t val;
871 
872 		/* Stop counting, and this also implies disabling STIMER0 */
873 		wrmsr(MSR_HV_STIMER0_COUNT, 0);
874 
875 		val = rdmsr(MSR_HV_STIMER0_CONFIG);
876 		if ((val & MSR_HV_STIMER_CFG_ENABLE) == 0)
877 			break;
878 		cpu_pause();
879 	}
880 }
881 
882 static void
883 vmbus_timer_config(void *arg __unused)
884 {
885 	/*
886 	 * Make sure that STIMER0 is really disabled before writing
887 	 * to STIMER0_CONFIG.
888 	 *
889 	 * "Writing to the configuration register of a timer that
890 	 *  is already enabled may result in undefined behaviour."
891 	 */
892 	vmbus_timer_stop(arg);
893 	wrmsr(MSR_HV_STIMER0_CONFIG,
894 	    MSR_HV_STIMER_CFG_AUTOEN | MSR_HV_STIMER0_CFG_SINT);
895 }
896 
897 static void
898 vmbus_timer_msgintr(struct vmbus_pcpu_data *psc)
899 {
900 	volatile struct vmbus_message *msg;
901 
902 	msg = psc->message + VMBUS_SINT_TIMER;
903 	if (msg->msg_type == HYPERV_MSGTYPE_TIMER_EXPIRED)
904 		vmbus_msg_reset(msg);
905 }
906 
907 static void
908 vmbus_timer_restart(void *xsc)
909 {
910 	struct vmbus_softc *sc = xsc;
911 	struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, mycpuid);
912 
913 	crit_enter();
914 	vmbus_timer_msgintr(psc);
915 	vmbus_timer_oneshot(psc, rdmsr(MSR_HV_TIME_REF_COUNT) + 1);
916 	crit_exit();
917 }
918 
919 static void
920 vmbus_synic_teardown(void *arg __unused)
921 {
922 	uint64_t orig;
923 	uint32_t sint;
924 
925 	/*
926 	 * Disable SynIC.
927 	 */
928 	orig = rdmsr(MSR_HV_SCONTROL);
929 	wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
930 
931 	/*
932 	 * Mask message and event flags SINT.
933 	 */
934 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
935 	orig = rdmsr(sint);
936 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
937 
938 	/*
939 	 * Mask timer SINT.
940 	 */
941 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
942 	orig = rdmsr(sint);
943 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
944 
945 	/*
946 	 * Teardown SynIC message.
947 	 */
948 	orig = rdmsr(MSR_HV_SIMP);
949 	wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
950 
951 	/*
952 	 * Teardown SynIC event flags.
953 	 */
954 	orig = rdmsr(MSR_HV_SIEFP);
955 	wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
956 }
957 
958 static int
959 vmbus_init_contact(struct vmbus_softc *sc, uint32_t version)
960 {
961 	struct vmbus_chanmsg_init_contact *req;
962 	const struct vmbus_chanmsg_version_resp *resp;
963 	const struct vmbus_message *msg;
964 	struct vmbus_msghc *mh;
965 	int error, supp = 0;
966 
967 	mh = vmbus_msghc_get(sc, sizeof(*req));
968 	if (mh == NULL)
969 		return ENXIO;
970 
971 	req = vmbus_msghc_dataptr(mh);
972 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_INIT_CONTACT;
973 	req->chm_ver = version;
974 	req->chm_evtflags = sc->vmbus_evtflags_dma.hv_paddr;
975 	req->chm_mnf1 = sc->vmbus_mnf1_dma.hv_paddr;
976 	req->chm_mnf2 = sc->vmbus_mnf2_dma.hv_paddr;
977 
978 	error = vmbus_msghc_exec(sc, mh);
979 	if (error) {
980 		vmbus_msghc_put(sc, mh);
981 		return error;
982 	}
983 
984 	msg = vmbus_msghc_wait_result(sc, mh);
985 	resp = (const struct vmbus_chanmsg_version_resp *)msg->msg_data;
986 	supp = resp->chm_supp;
987 
988 	vmbus_msghc_put(sc, mh);
989 
990 	return (supp ? 0 : EOPNOTSUPP);
991 }
992 
993 static int
994 vmbus_init(struct vmbus_softc *sc)
995 {
996 	int i;
997 
998 	for (i = 0; i < nitems(vmbus_version); ++i) {
999 		int error;
1000 
1001 		error = vmbus_init_contact(sc, vmbus_version[i]);
1002 		if (!error) {
1003 			sc->vmbus_version = vmbus_version[i];
1004 			device_printf(sc->vmbus_dev, "version %u.%u\n",
1005 			    (sc->vmbus_version >> 16),
1006 			    (sc->vmbus_version & 0xffff));
1007 			return 0;
1008 		}
1009 	}
1010 	return ENXIO;
1011 }
1012 
1013 static void
1014 vmbus_chan_msgproc(struct vmbus_softc *sc, const struct vmbus_message *msg)
1015 {
1016 	const struct vmbus_chanmsg_hdr *hdr;
1017 
1018 	hdr = (const struct vmbus_chanmsg_hdr *)msg->msg_data;
1019 
1020 	/* TODO */
1021 	if (hdr->chm_type == VMBUS_CHANMSG_TYPE_VERSION_RESP)
1022 		vmbus_msghc_wakeup(sc, msg);
1023 }
1024