xref: /dragonfly/sys/dev/virtual/hyperv/vmbus/vmbus.c (revision 49837aef)
1 /*-
2  * Copyright (c) 2009-2012,2016 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include "opt_acpi.h"
30 
31 #include <sys/param.h>
32 #include <sys/bus.h>
33 #include <sys/kernel.h>
34 #include <sys/malloc.h>
35 #include <sys/module.h>
36 #include <sys/rman.h>
37 #include <sys/systimer.h>
38 #include <sys/thread.h>
39 #include <sys/thread2.h>
40 
41 #include <machine/intr_machdep.h>
42 #include <machine/smp.h>
43 
44 #include <dev/virtual/hyperv/hyperv_busdma.h>
45 #include <dev/virtual/hyperv/hyperv_machdep.h>
46 #include <dev/virtual/hyperv/hyperv_reg.h>
47 #include <dev/virtual/hyperv/hyperv_var.h>
48 #include <dev/virtual/hyperv/vmbus/vmbus_reg.h>
49 #include <dev/virtual/hyperv/vmbus/vmbus_var.h>
50 
51 #include "acpi.h"
52 #include "acpi_if.h"
53 #include "pcib_if.h"
54 
55 #define MSR_HV_STIMER0_CFG_SINT		\
56 	((((uint64_t)VMBUS_SINT_TIMER) << MSR_HV_STIMER_CFG_SINT_SHIFT) & \
57 	 MSR_HV_STIMER_CFG_SINT_MASK)
58 
59 /*
60  * Additionally required feature:
61  * - SynIC is needed for interrupt generation.
62  */
63 #define CPUID_HV_TIMER_MASK		(CPUID_HV_MSR_SYNIC |		\
64 					 CPUID_HV_MSR_SYNTIMER)
65 
66 /*
67  * NOTE: DO NOT CHANGE THIS.
68  */
69 #define VMBUS_SINT_MESSAGE		2
70 /*
71  * NOTE:
72  * - DO NOT set it to the same value as VMBUS_SINT_MESSAGE.
73  * - DO NOT set it to 0.
74  */
75 #define VMBUS_SINT_TIMER		4
76 
77 /*
78  * NOTE: DO NOT CHANGE THESE
79  */
80 #define VMBUS_CONNID_MESSAGE		1
81 #define VMBUS_CONNID_EVENT		2
82 
83 struct vmbus_msghc {
84 	struct hypercall_postmsg_in	*mh_inprm;
85 	struct hypercall_postmsg_in	mh_inprm_save;
86 	struct hyperv_dma		mh_inprm_dma;
87 
88 	struct vmbus_message		*mh_resp;
89 	struct vmbus_message		mh_resp0;
90 };
91 
92 struct vmbus_msghc_ctx {
93 	struct vmbus_msghc		*mhc_free;
94 	struct lwkt_token		mhc_free_token;
95 	uint32_t			mhc_flags;
96 
97 	struct vmbus_msghc		*mhc_active;
98 	struct lwkt_token		mhc_active_token;
99 };
100 
101 #define VMBUS_MSGHC_CTXF_DESTROY	0x0001
102 
103 static int			vmbus_probe(device_t);
104 static int			vmbus_attach(device_t);
105 static int			vmbus_detach(device_t);
106 static void			vmbus_intr(void *);
107 static void			vmbus_timer_intr_reload(struct cputimer_intr *,
108 				    sysclock_t);
109 static void			vmbus_timer_intr_pcpuhand(
110 				    struct cputimer_intr *);
111 static void			vmbus_timer_intr_restart(
112 				    struct cputimer_intr *);
113 
114 static int			vmbus_dma_alloc(struct vmbus_softc *);
115 static void			vmbus_dma_free(struct vmbus_softc *);
116 static int			vmbus_intr_setup(struct vmbus_softc *);
117 static void			vmbus_intr_teardown(struct vmbus_softc *);
118 static void			vmbus_synic_setup(void *);
119 static void			vmbus_synic_teardown(void *);
120 static void			vmbus_timer_stop(void *);
121 static void			vmbus_timer_config(void *);
122 static int			vmbus_init(struct vmbus_softc *);
123 static int			vmbus_init_contact(struct vmbus_softc *,
124 				    uint32_t);
125 static void			vmbus_timer_restart(void *);
126 static void			vmbus_timer_msgintr(struct vmbus_pcpu_data *);
127 
128 static void			vmbus_chan_msgproc(struct vmbus_softc *,
129 				    const struct vmbus_message *);
130 
131 static struct vmbus_msghc_ctx	*vmbus_msghc_ctx_create(bus_dma_tag_t);
132 static void			vmbus_msghc_ctx_destroy(
133 				    struct vmbus_msghc_ctx *);
134 static void			vmbus_msghc_ctx_free(struct vmbus_msghc_ctx *);
135 static struct vmbus_msghc	*vmbus_msghc_alloc(bus_dma_tag_t);
136 static void			vmbus_msghc_free(struct vmbus_msghc *);
137 static struct vmbus_msghc	*vmbus_msghc_get1(struct vmbus_msghc_ctx *,
138 				    uint32_t);
139 
140 static device_method_t vmbus_methods[] = {
141 	/* Device interface */
142 	DEVMETHOD(device_probe,			vmbus_probe),
143 	DEVMETHOD(device_attach,		vmbus_attach),
144 	DEVMETHOD(device_detach,		vmbus_detach),
145 	DEVMETHOD(device_shutdown,		bus_generic_shutdown),
146 	DEVMETHOD(device_suspend,		bus_generic_suspend),
147 	DEVMETHOD(device_resume,		bus_generic_resume),
148 
149 	DEVMETHOD_END
150 };
151 
152 static driver_t vmbus_driver = {
153 	"vmbus",
154 	vmbus_methods,
155 	sizeof(struct vmbus_softc)
156 };
157 
158 static devclass_t vmbus_devclass;
159 
160 DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, NULL, NULL);
161 MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
162 MODULE_VERSION(vmbus, 1);
163 
164 static struct cputimer_intr vmbus_cputimer_intr = {
165 	.freq = HYPERV_TIMER_FREQ,
166 	.reload = vmbus_timer_intr_reload,
167 	.enable = cputimer_intr_default_enable,
168 	.config = cputimer_intr_default_config,
169 	.restart = vmbus_timer_intr_restart,
170 	.pmfixup = cputimer_intr_default_pmfixup,
171 	.initclock = cputimer_intr_default_initclock,
172 	.pcpuhand = vmbus_timer_intr_pcpuhand,
173 	.next = SLIST_ENTRY_INITIALIZER,
174 	.name = "hyperv",
175 	.type = CPUTIMER_INTR_VMM,
176 	.prio = CPUTIMER_INTR_PRIO_VMM,
177 	.caps = CPUTIMER_INTR_CAP_PS,
178 	.priv = NULL
179 };
180 
181 static const uint32_t	vmbus_version[] = {
182 	VMBUS_VERSION_WIN8_1,
183 	VMBUS_VERSION_WIN8,
184 	VMBUS_VERSION_WIN7,
185 	VMBUS_VERSION_WS2008
186 };
187 
188 static int		vmbus_timer_intr_enable = 1;
189 TUNABLE_INT("hw.vmbus.timer_intr.enable", &vmbus_timer_intr_enable);
190 
191 static int
192 vmbus_probe(device_t dev)
193 {
194 	char *id[] = { "VMBUS", NULL };
195 
196 	if (ACPI_ID_PROBE(device_get_parent(dev), dev, id) == NULL ||
197 	    device_get_unit(dev) != 0 || vmm_guest != VMM_GUEST_HYPERV ||
198 	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
199 		return (ENXIO);
200 
201 	device_set_desc(dev, "Hyper-V vmbus");
202 
203 	return (0);
204 }
205 
206 static int
207 vmbus_attach(device_t dev)
208 {
209 	struct vmbus_softc *sc = device_get_softc(dev);
210 	int error, cpu, use_timer;
211 
212 	/*
213 	 * Basic setup.
214 	 */
215 	sc->vmbus_dev = dev;
216 	for (cpu = 0; cpu < ncpus; ++cpu) {
217 		struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, cpu);
218 
219 		psc->sc = sc;
220 		psc->cpuid = cpu;
221 		psc->timer_last = UINT64_MAX;
222 	}
223 
224 	/*
225 	 * Should we use interrupt timer?
226 	 */
227 	use_timer = 0;
228 	if (device_get_unit(dev) == 0 &&
229 	    (hyperv_features & CPUID_HV_TIMER_MASK) == CPUID_HV_TIMER_MASK &&
230 	    hyperv_tc64 != NULL)
231 		use_timer = 1;
232 
233 	/*
234 	 * Create context for "post message" Hypercalls
235 	 */
236 	sc->vmbus_msg_hc = vmbus_msghc_ctx_create(
237 	    bus_get_dma_tag(sc->vmbus_dev));
238 	if (sc->vmbus_msg_hc == NULL)
239 		return ENXIO;
240 
241 	/*
242 	 * Allocate DMA stuffs.
243 	 */
244 	error = vmbus_dma_alloc(sc);
245 	if (error)
246 		goto failed;
247 
248 	/*
249 	 * Setup interrupt.
250 	 */
251 	error = vmbus_intr_setup(sc);
252 	if (error)
253 		goto failed;
254 
255 	if (use_timer) {
256 		/*
257 		 * Make sure that interrupt timer is stopped.
258 		 */
259 		lwkt_cpusync_simple(smp_active_mask, vmbus_timer_stop, sc);
260 	}
261 
262 	/*
263 	 * Setup SynIC.
264 	 */
265 	lwkt_cpusync_simple(smp_active_mask, vmbus_synic_setup, sc);
266 	sc->vmbus_flags |= VMBUS_FLAG_SYNIC;
267 
268 	/*
269 	 * Initialize vmbus.
270 	 */
271 	error = vmbus_init(sc);
272 	if (error)
273 		goto failed;
274 
275 	if (use_timer) {
276 		/*
277 		 * Configure and register vmbus interrupt timer.
278 		 */
279 		lwkt_cpusync_simple(smp_active_mask, vmbus_timer_config, sc);
280 		vmbus_cputimer_intr.priv = sc;
281 		cputimer_intr_register(&vmbus_cputimer_intr);
282 		if (vmbus_timer_intr_enable)
283 			cputimer_intr_select(&vmbus_cputimer_intr, 0);
284 	}
285 
286 	return 0;
287 failed:
288 	vmbus_detach(dev);
289 	return error;
290 }
291 
292 static int
293 vmbus_detach(device_t dev)
294 {
295 	struct vmbus_softc *sc = device_get_softc(dev);
296 
297 	/* TODO: uninitialize vmbus. */
298 	/* TODO: stop and deregister timer */
299 
300 	if (sc->vmbus_flags & VMBUS_FLAG_SYNIC)
301 		lwkt_cpusync_simple(smp_active_mask, vmbus_synic_teardown, sc);
302 	vmbus_intr_teardown(sc);
303 	vmbus_dma_free(sc);
304 
305 	if (sc->vmbus_msg_hc != NULL) {
306 		vmbus_msghc_ctx_destroy(sc->vmbus_msg_hc);
307 		sc->vmbus_msg_hc = NULL;
308 	}
309 	return (0);
310 }
311 
312 static __inline void
313 vmbus_msg_reset(volatile struct vmbus_message *msg)
314 {
315 	msg->msg_type = HYPERV_MSGTYPE_NONE;
316 	/*
317 	 * Make sure that the write to msg_type (i.e. set to
318 	 * HYPERV_MSGTYPE_NONE) happens before we read the
319 	 * msg_flags and send EOM to the hypervisor.
320 	 */
321 	cpu_mfence();
322 	if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
323 		/*
324 		 * Ask the hypervisor to rescan message queue,
325 		 * and deliver new message if any.
326 		 */
327 		wrmsr(MSR_HV_EOM, 0);
328 	}
329 }
330 
331 static void
332 vmbus_intr(void *xpsc)
333 {
334 	struct vmbus_pcpu_data *psc = xpsc;
335 	volatile struct vmbus_message *msg;
336 
337 	msg = psc->message + VMBUS_SINT_MESSAGE;
338 	while (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE)) {
339 		if (msg->msg_type == HYPERV_MSGTYPE_CHANNEL) {
340 			/* Channel message */
341 			vmbus_chan_msgproc(psc->sc,
342 			    __DEVOLATILE(const struct vmbus_message *, msg));
343 		}
344 		vmbus_msg_reset(msg);
345 	}
346 }
347 
348 static __inline void
349 vmbus_timer_oneshot(struct vmbus_pcpu_data *psc, uint64_t current)
350 {
351 	psc->timer_last = current;
352 	wrmsr(MSR_HV_STIMER0_COUNT, current);
353 }
354 
355 static void
356 vmbus_timer_intr_reload(struct cputimer_intr *cti, sysclock_t reload)
357 {
358 	struct globaldata *gd = mycpu;
359 	struct vmbus_softc *sc = cti->priv;
360 	struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, gd->gd_cpuid);
361 	uint64_t current;
362 
363 	reload = (uint64_t)reload * cti->freq / sys_cputimer->freq;
364 	current = hyperv_tc64() + reload;
365 
366 	if (gd->gd_timer_running) {
367 		if (current < psc->timer_last)
368 			vmbus_timer_oneshot(psc, current);
369 	} else {
370 		gd->gd_timer_running = 1;
371 		vmbus_timer_oneshot(psc, current);
372 	}
373 }
374 
375 static void
376 vmbus_timer_intr_pcpuhand(struct cputimer_intr *cti)
377 {
378 	struct vmbus_softc *sc = cti->priv;
379 	struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, mycpuid);
380 
381 	vmbus_timer_msgintr(psc);
382 }
383 
384 static void
385 vmbus_timer_intr_restart(struct cputimer_intr *cti)
386 {
387 	lwkt_send_ipiq_mask(smp_active_mask, vmbus_timer_restart, cti->priv);
388 }
389 
390 static struct vmbus_msghc *
391 vmbus_msghc_alloc(bus_dma_tag_t parent_dtag)
392 {
393 	struct vmbus_msghc *mh;
394 
395 	mh = kmalloc(sizeof(*mh), M_DEVBUF, M_WAITOK | M_ZERO);
396 
397 	mh->mh_inprm = hyperv_dmamem_alloc(parent_dtag,
398 	    HYPERCALL_POSTMSGIN_ALIGN, 0, HYPERCALL_POSTMSGIN_SIZE,
399 	    &mh->mh_inprm_dma, BUS_DMA_WAITOK);
400 	if (mh->mh_inprm == NULL) {
401 		kfree(mh, M_DEVBUF);
402 		return NULL;
403 	}
404 	return mh;
405 }
406 
407 static void
408 vmbus_msghc_free(struct vmbus_msghc *mh)
409 {
410 	hyperv_dmamem_free(&mh->mh_inprm_dma, mh->mh_inprm);
411 	kfree(mh, M_DEVBUF);
412 }
413 
414 static void
415 vmbus_msghc_ctx_free(struct vmbus_msghc_ctx *mhc)
416 {
417 	KASSERT(mhc->mhc_active == NULL, ("still have active msg hypercall"));
418 	KASSERT(mhc->mhc_free == NULL, ("still have hypercall msg"));
419 
420 	lwkt_token_uninit(&mhc->mhc_free_token);
421 	lwkt_token_uninit(&mhc->mhc_active_token);
422 	kfree(mhc, M_DEVBUF);
423 }
424 
425 static struct vmbus_msghc_ctx *
426 vmbus_msghc_ctx_create(bus_dma_tag_t parent_dtag)
427 {
428 	struct vmbus_msghc_ctx *mhc;
429 
430 	mhc = kmalloc(sizeof(*mhc), M_DEVBUF, M_WAITOK | M_ZERO);
431 	lwkt_token_init(&mhc->mhc_free_token, "msghcf");
432 	lwkt_token_init(&mhc->mhc_active_token, "msghca");
433 
434 	mhc->mhc_free = vmbus_msghc_alloc(parent_dtag);
435 	if (mhc->mhc_free == NULL) {
436 		vmbus_msghc_ctx_free(mhc);
437 		return NULL;
438 	}
439 	return mhc;
440 }
441 
442 static struct vmbus_msghc *
443 vmbus_msghc_get1(struct vmbus_msghc_ctx *mhc, uint32_t dtor_flag)
444 {
445 	struct vmbus_msghc *mh;
446 
447 	lwkt_gettoken(&mhc->mhc_free_token);
448 
449 	while ((mhc->mhc_flags & dtor_flag) == 0 && mhc->mhc_free == NULL)
450 		tsleep(&mhc->mhc_free, 0, "gmsghc", 0);
451 	if (mhc->mhc_flags & dtor_flag) {
452 		/* Being destroyed */
453 		mh = NULL;
454 	} else {
455 		mh = mhc->mhc_free;
456 		KASSERT(mh != NULL, ("no free hypercall msg"));
457 		KASSERT(mh->mh_resp == NULL,
458 		    ("hypercall msg has pending response"));
459 		mhc->mhc_free = NULL;
460 	}
461 
462 	lwkt_reltoken(&mhc->mhc_free_token);
463 
464 	return mh;
465 }
466 
467 struct vmbus_msghc *
468 vmbus_msghc_get(struct vmbus_softc *sc, size_t dsize)
469 {
470 	struct hypercall_postmsg_in *inprm;
471 	struct vmbus_msghc *mh;
472 
473 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
474 		return NULL;
475 
476 	mh = vmbus_msghc_get1(sc->vmbus_msg_hc, VMBUS_MSGHC_CTXF_DESTROY);
477 	if (mh == NULL)
478 		return NULL;
479 
480 	inprm = mh->mh_inprm;
481 	memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE);
482 	inprm->hc_connid = VMBUS_CONNID_MESSAGE;
483 	inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL;
484 	inprm->hc_dsize = dsize;
485 
486 	return mh;
487 }
488 
489 void
490 vmbus_msghc_put(struct vmbus_softc *sc, struct vmbus_msghc *mh)
491 {
492 	struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc;
493 
494 	KASSERT(mhc->mhc_active == NULL, ("msg hypercall is active"));
495 	mh->mh_resp = NULL;
496 
497 	lwkt_gettoken(&mhc->mhc_free_token);
498 	KASSERT(mhc->mhc_free == NULL, ("has free hypercall msg"));
499 	mhc->mhc_free = mh;
500 	lwkt_reltoken(&mhc->mhc_free_token);
501 	wakeup(&mhc->mhc_free);
502 }
503 
504 void *
505 vmbus_msghc_dataptr(struct vmbus_msghc *mh)
506 {
507 	return mh->mh_inprm->hc_data;
508 }
509 
510 static void
511 vmbus_msghc_ctx_destroy(struct vmbus_msghc_ctx *mhc)
512 {
513 	struct vmbus_msghc *mh;
514 
515 	lwkt_gettoken(&mhc->mhc_free_token);
516 	mhc->mhc_flags |= VMBUS_MSGHC_CTXF_DESTROY;
517 	lwkt_reltoken(&mhc->mhc_free_token);
518 	wakeup(&mhc->mhc_free);
519 
520 	mh = vmbus_msghc_get1(mhc, 0);
521 	if (mh == NULL)
522 		panic("can't get msghc");
523 
524 	vmbus_msghc_free(mh);
525 	vmbus_msghc_ctx_free(mhc);
526 }
527 
528 int
529 vmbus_msghc_exec_noresult(struct vmbus_msghc *mh)
530 {
531 	int i, wait_ticks = 1;
532 
533 	/*
534 	 * Save the input parameter so that we could restore the input
535 	 * parameter if the Hypercall failed.
536 	 *
537 	 * XXX
538 	 * Is this really necessary?!  i.e. Will the Hypercall ever
539 	 * overwrite the input parameter?
540 	 */
541 	memcpy(&mh->mh_inprm_save, mh->mh_inprm, HYPERCALL_POSTMSGIN_SIZE);
542 
543 	/*
544 	 * In order to cope with transient failures, e.g. insufficient
545 	 * resources on host side, we retry the post message Hypercall
546 	 * several times.  20 retries seem sufficient.
547 	 */
548 #define HC_RETRY_MAX	20
549 
550 	for (i = 0; i < HC_RETRY_MAX; ++i) {
551 		uint64_t status;
552 
553 		status = hypercall_post_message(mh->mh_inprm_dma.hv_paddr);
554 		if (status == HYPERCALL_STATUS_SUCCESS)
555 			return 0;
556 
557 		tsleep(&status, 0, "hcpmsg", wait_ticks);
558 		if (wait_ticks < hz)
559 			wait_ticks *= 2;
560 
561 		/* Restore input parameter and try again */
562 		memcpy(mh->mh_inprm, &mh->mh_inprm_save,
563 		    HYPERCALL_POSTMSGIN_SIZE);
564 	}
565 
566 #undef HC_RETRY_MAX
567 
568 	return EIO;
569 }
570 
571 int
572 vmbus_msghc_exec(struct vmbus_softc *sc, struct vmbus_msghc *mh)
573 {
574 	struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc;
575 	int error;
576 
577 	KASSERT(mh->mh_resp == NULL, ("hypercall msg has pending response"));
578 
579 	lwkt_gettoken(&mhc->mhc_active_token);
580 	KASSERT(mhc->mhc_active == NULL, ("pending active msg hypercall"));
581 	mhc->mhc_active = mh;
582 	lwkt_reltoken(&mhc->mhc_active_token);
583 
584 	error = vmbus_msghc_exec_noresult(mh);
585 	if (error) {
586 		lwkt_gettoken(&mhc->mhc_active_token);
587 		KASSERT(mhc->mhc_active == mh, ("msghc mismatch"));
588 		mhc->mhc_active = NULL;
589 		lwkt_reltoken(&mhc->mhc_active_token);
590 	}
591 	return error;
592 }
593 
594 const struct vmbus_message *
595 vmbus_msghc_wait_result(struct vmbus_softc *sc, struct vmbus_msghc *mh)
596 {
597 	struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc;
598 
599 	lwkt_gettoken(&mhc->mhc_active_token);
600 
601 	KASSERT(mhc->mhc_active == mh, ("msghc mismatch"));
602 	while (mh->mh_resp == NULL)
603 		tsleep(&mhc->mhc_active, 0, "wmsghc", 0);
604 	mhc->mhc_active = NULL;
605 
606 	lwkt_reltoken(&mhc->mhc_active_token);
607 
608 	return mh->mh_resp;
609 }
610 
611 void
612 vmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg)
613 {
614 	struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc;
615 	struct vmbus_msghc *mh;
616 
617 	lwkt_gettoken(&mhc->mhc_active_token);
618 
619 	mh = mhc->mhc_active;
620 	KASSERT(mh != NULL, ("no pending msg hypercall"));
621 	memcpy(&mh->mh_resp0, msg, sizeof(mh->mh_resp0));
622 	mh->mh_resp = &mh->mh_resp0;
623 
624 	lwkt_reltoken(&mhc->mhc_active_token);
625 	wakeup(&mhc->mhc_active);
626 }
627 
628 static int
629 vmbus_dma_alloc(struct vmbus_softc *sc)
630 {
631 	bus_dma_tag_t parent_dtag;
632 	uint8_t *evtflags;
633 	int cpu;
634 
635 	parent_dtag = bus_get_dma_tag(sc->vmbus_dev);
636 	for (cpu = 0; cpu < ncpus; ++cpu) {
637 		struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, cpu);
638 
639 		/*
640 		 * Per-cpu messages and event flags.
641 		 */
642 		psc->message = hyperv_dmamem_alloc(parent_dtag,
643 		    PAGE_SIZE, 0, PAGE_SIZE, &psc->message_dma,
644 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
645 		if (psc->message == NULL)
646 			return ENOMEM;
647 
648 		psc->event_flags = hyperv_dmamem_alloc(parent_dtag,
649 		    PAGE_SIZE, 0, PAGE_SIZE, &psc->event_flags_dma,
650 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
651 		if (psc->event_flags == NULL)
652 			return ENOMEM;
653 	}
654 
655 	evtflags = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
656 	    PAGE_SIZE, &sc->vmbus_evtflags_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
657 	if (evtflags == NULL)
658 		return ENOMEM;
659 	sc->vmbus_rx_evtflags = (u_long *)evtflags;
660 	sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2));
661 	sc->vmbus_evtflags = evtflags;
662 
663 	sc->vmbus_mnf1 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
664 	    PAGE_SIZE, &sc->vmbus_mnf1_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
665 	if (sc->vmbus_mnf1 == NULL)
666 		return ENOMEM;
667 
668 	sc->vmbus_mnf2 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
669 	    PAGE_SIZE, &sc->vmbus_mnf2_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
670 	if (sc->vmbus_mnf2 == NULL)
671 		return ENOMEM;
672 
673 	return 0;
674 }
675 
676 static void
677 vmbus_dma_free(struct vmbus_softc *sc)
678 {
679 	int cpu;
680 
681 	if (sc->vmbus_evtflags != NULL) {
682 		hyperv_dmamem_free(&sc->vmbus_evtflags_dma, sc->vmbus_evtflags);
683 		sc->vmbus_evtflags = NULL;
684 		sc->vmbus_rx_evtflags = NULL;
685 		sc->vmbus_tx_evtflags = NULL;
686 	}
687 	if (sc->vmbus_mnf1 != NULL) {
688 		hyperv_dmamem_free(&sc->vmbus_mnf1_dma, sc->vmbus_mnf1);
689 		sc->vmbus_mnf1 = NULL;
690 	}
691 	if (sc->vmbus_mnf2 != NULL) {
692 		hyperv_dmamem_free(&sc->vmbus_mnf2_dma, sc->vmbus_mnf2);
693 		sc->vmbus_mnf2 = NULL;
694 	}
695 
696 	for (cpu = 0; cpu < ncpus; ++cpu) {
697 		struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, cpu);
698 
699 		if (psc->message != NULL) {
700 			hyperv_dmamem_free(&psc->message_dma, psc->message);
701 			psc->message = NULL;
702 		}
703 		if (psc->event_flags != NULL) {
704 			hyperv_dmamem_free(&psc->event_flags_dma,
705 			    psc->event_flags);
706 			psc->event_flags = NULL;
707 		}
708 	}
709 }
710 
711 static int
712 vmbus_intr_setup(struct vmbus_softc *sc)
713 {
714 	device_t dev = sc->vmbus_dev;
715 	device_t bus = device_get_parent(device_get_parent(dev));
716 	int rid, cpu;
717 
718 	rid = 0;
719 	for (cpu = 0; cpu < ncpus; ++cpu) {
720 		struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, cpu);
721 		uint64_t msi_addr;
722 		uint32_t msi_data;
723 		int error;
724 
725 		error = PCIB_ALLOC_MSIX(bus, dev, &psc->intr_irq, cpu);
726 		if (error) {
727 			device_printf(dev, "alloc vector on cpu%d failed: %d\n",
728 			    cpu, error);
729 			return ENXIO;
730 		}
731 		psc->intr_rid = ++rid;
732 
733 		psc->intr_res = BUS_ALLOC_RESOURCE(bus, dev, SYS_RES_IRQ,
734 		    &psc->intr_rid, psc->intr_irq, psc->intr_irq, 1,
735 		    RF_ACTIVE, cpu);
736 		if (psc->intr_res == NULL) {
737 			device_printf(dev, "alloc irq on cpu%d failed: %d\n",
738 			    cpu, error);
739 			return ENXIO;
740 		}
741 
742 		error = PCIB_MAP_MSI(bus, dev, rman_get_start(psc->intr_res),
743 		    &msi_addr, &msi_data, cpu);
744 		if (error) {
745 			device_printf(dev, "map irq on cpu%d failed: %d\n",
746 			    cpu, error);
747 			return ENXIO;
748 		}
749 		psc->intr_vec = hyperv_msi2vector(msi_addr, msi_data);
750 
751 		if (bootverbose) {
752 			device_printf(dev, "vector %d irq %d on cpu%d\n",
753 			    psc->intr_vec, psc->intr_irq, cpu);
754 		}
755 
756 		ksnprintf(psc->intr_desc, sizeof(psc->intr_desc), "%s cpu%d",
757 		    device_get_nameunit(dev), cpu);
758 		error = bus_setup_intr_descr(dev, psc->intr_res, INTR_MPSAFE,
759 		    vmbus_intr, psc, &psc->intr_hand, NULL, psc->intr_desc);
760 		if (error) {
761 			device_printf(dev, "setup intr on cpu%d failed: %d\n",
762 			    cpu, error);
763 			return ENXIO;
764 		}
765 	}
766 	return 0;
767 }
768 
769 static void
770 vmbus_intr_teardown(struct vmbus_softc *sc)
771 {
772 	device_t dev = sc->vmbus_dev;
773 	device_t bus = device_get_parent(device_get_parent(dev));
774 	int cpu;
775 
776 	for (cpu = 0; cpu < ncpus; ++cpu) {
777 		struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, cpu);
778 
779 		if (psc->intr_hand != NULL) {
780 			bus_teardown_intr(dev, psc->intr_res, psc->intr_hand);
781 			psc->intr_hand = NULL;
782 		}
783 
784 		if (psc->intr_res != NULL) {
785 			BUS_RELEASE_RESOURCE(bus, dev, SYS_RES_IRQ,
786 			    psc->intr_rid, psc->intr_res);
787 			psc->intr_res = NULL;
788 		}
789 
790 		if (psc->intr_rid != 0) {
791 			PCIB_RELEASE_MSIX(bus, dev, psc->intr_irq, psc->cpuid);
792 			psc->intr_rid = 0;
793 		}
794 	}
795 }
796 
797 static void
798 vmbus_synic_setup(void *xsc)
799 {
800 	struct vmbus_softc *sc = xsc;
801 	struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, mycpuid);
802 	uint64_t val, orig;
803 	uint32_t sint;
804 
805 	if (hyperv_features & CPUID_HV_MSR_VP_INDEX) {
806 		/*
807 		 * Save virtual processor id.
808 		 */
809 		psc->vcpuid = rdmsr(MSR_HV_VP_INDEX);
810 	} else {
811 		/*
812 		 * XXX
813 		 * Virtual processoor id is only used by a pretty broken
814 		 * channel selection code from storvsc.  It's nothing
815 		 * critical even if CPUID_HV_MSR_VP_INDEX is not set; keep
816 		 * moving on.
817 		 */
818 		psc->vcpuid = mycpuid;
819 	}
820 
821 	/*
822 	 * Setup the SynIC message.
823 	 */
824 	orig = rdmsr(MSR_HV_SIMP);
825 	val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
826 	    ((psc->message_dma.hv_paddr >> PAGE_SHIFT) << MSR_HV_SIMP_PGSHIFT);
827 	wrmsr(MSR_HV_SIMP, val);
828 
829 	/*
830 	 * Setup the SynIC event flags.
831 	 */
832 	orig = rdmsr(MSR_HV_SIEFP);
833 	val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
834 	    ((psc->event_flags_dma.hv_paddr >> PAGE_SHIFT) <<
835 	     MSR_HV_SIEFP_PGSHIFT);
836 	wrmsr(MSR_HV_SIEFP, val);
837 
838 
839 	/*
840 	 * Configure and unmask SINT for message and event flags.
841 	 */
842 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
843 	orig = rdmsr(sint);
844 	val = psc->intr_vec | /* MSR_HV_SINT_AUTOEOI | notyet */
845 	    (orig & MSR_HV_SINT_RSVD_MASK);
846 	wrmsr(sint, val);
847 
848 	/*
849 	 * Configure and unmask SINT for timer.
850 	 */
851 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
852 	orig = rdmsr(sint);
853 	val = XTIMER_OFFSET | /* MSR_HV_SINT_AUTOEOI | notyet */
854 	    (orig & MSR_HV_SINT_RSVD_MASK);
855 	wrmsr(sint, val);
856 
857 	/*
858 	 * All done; enable SynIC.
859 	 */
860 	orig = rdmsr(MSR_HV_SCONTROL);
861 	val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
862 	wrmsr(MSR_HV_SCONTROL, val);
863 }
864 
865 static void
866 vmbus_timer_stop(void *arg __unused)
867 {
868 	for (;;) {
869 		uint64_t val;
870 
871 		/* Stop counting, and this also implies disabling STIMER0 */
872 		wrmsr(MSR_HV_STIMER0_COUNT, 0);
873 
874 		val = rdmsr(MSR_HV_STIMER0_CONFIG);
875 		if ((val & MSR_HV_STIMER_CFG_ENABLE) == 0)
876 			break;
877 		cpu_pause();
878 	}
879 }
880 
881 static void
882 vmbus_timer_config(void *arg __unused)
883 {
884 	/*
885 	 * Make sure that STIMER0 is really disabled before writing
886 	 * to STIMER0_CONFIG.
887 	 *
888 	 * "Writing to the configuration register of a timer that
889 	 *  is already enabled may result in undefined behaviour."
890 	 */
891 	vmbus_timer_stop(arg);
892 	wrmsr(MSR_HV_STIMER0_CONFIG,
893 	    MSR_HV_STIMER_CFG_AUTOEN | MSR_HV_STIMER0_CFG_SINT);
894 }
895 
896 static void
897 vmbus_timer_msgintr(struct vmbus_pcpu_data *psc)
898 {
899 	volatile struct vmbus_message *msg;
900 
901 	msg = psc->message + VMBUS_SINT_TIMER;
902 	if (msg->msg_type == HYPERV_MSGTYPE_TIMER_EXPIRED)
903 		vmbus_msg_reset(msg);
904 }
905 
906 static void
907 vmbus_timer_restart(void *xsc)
908 {
909 	struct vmbus_softc *sc = xsc;
910 	struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, mycpuid);
911 
912 	crit_enter();
913 	vmbus_timer_msgintr(psc);
914 	vmbus_timer_oneshot(psc, hyperv_tc64() + 1);
915 	crit_exit();
916 }
917 
918 static void
919 vmbus_synic_teardown(void *arg __unused)
920 {
921 	uint64_t orig;
922 	uint32_t sint;
923 
924 	/*
925 	 * Disable SynIC.
926 	 */
927 	orig = rdmsr(MSR_HV_SCONTROL);
928 	wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
929 
930 	/*
931 	 * Mask message and event flags SINT.
932 	 */
933 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
934 	orig = rdmsr(sint);
935 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
936 
937 	/*
938 	 * Mask timer SINT.
939 	 */
940 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
941 	orig = rdmsr(sint);
942 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
943 
944 	/*
945 	 * Teardown SynIC message.
946 	 */
947 	orig = rdmsr(MSR_HV_SIMP);
948 	wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
949 
950 	/*
951 	 * Teardown SynIC event flags.
952 	 */
953 	orig = rdmsr(MSR_HV_SIEFP);
954 	wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
955 }
956 
957 static int
958 vmbus_init_contact(struct vmbus_softc *sc, uint32_t version)
959 {
960 	struct vmbus_chanmsg_init_contact *req;
961 	const struct vmbus_chanmsg_version_resp *resp;
962 	const struct vmbus_message *msg;
963 	struct vmbus_msghc *mh;
964 	int error, supp = 0;
965 
966 	mh = vmbus_msghc_get(sc, sizeof(*req));
967 	if (mh == NULL)
968 		return ENXIO;
969 
970 	req = vmbus_msghc_dataptr(mh);
971 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_INIT_CONTACT;
972 	req->chm_ver = version;
973 	req->chm_evtflags = sc->vmbus_evtflags_dma.hv_paddr;
974 	req->chm_mnf1 = sc->vmbus_mnf1_dma.hv_paddr;
975 	req->chm_mnf2 = sc->vmbus_mnf2_dma.hv_paddr;
976 
977 	error = vmbus_msghc_exec(sc, mh);
978 	if (error) {
979 		vmbus_msghc_put(sc, mh);
980 		return error;
981 	}
982 
983 	msg = vmbus_msghc_wait_result(sc, mh);
984 	resp = (const struct vmbus_chanmsg_version_resp *)msg->msg_data;
985 	supp = resp->chm_supp;
986 
987 	vmbus_msghc_put(sc, mh);
988 
989 	return (supp ? 0 : EOPNOTSUPP);
990 }
991 
992 static int
993 vmbus_init(struct vmbus_softc *sc)
994 {
995 	int i;
996 
997 	for (i = 0; i < nitems(vmbus_version); ++i) {
998 		int error;
999 
1000 		error = vmbus_init_contact(sc, vmbus_version[i]);
1001 		if (!error) {
1002 			sc->vmbus_version = vmbus_version[i];
1003 			device_printf(sc->vmbus_dev, "version %u.%u\n",
1004 			    (sc->vmbus_version >> 16),
1005 			    (sc->vmbus_version & 0xffff));
1006 			return 0;
1007 		}
1008 	}
1009 	return ENXIO;
1010 }
1011 
1012 static void
1013 vmbus_chan_msgproc(struct vmbus_softc *sc, const struct vmbus_message *msg)
1014 {
1015 	const struct vmbus_chanmsg_hdr *hdr;
1016 
1017 	hdr = (const struct vmbus_chanmsg_hdr *)msg->msg_data;
1018 
1019 	/* TODO */
1020 	if (hdr->chm_type == VMBUS_CHANMSG_TYPE_VERSION_RESP)
1021 		vmbus_msghc_wakeup(sc, msg);
1022 }
1023