xref: /dragonfly/sys/dev/virtual/hyperv/vmbus/vmbus.c (revision 52cb6762)
1 /*-
2  * Copyright (c) 2009-2012,2016 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include "opt_acpi.h"
30 
31 #include <sys/param.h>
32 #include <sys/bus.h>
33 #include <sys/kernel.h>
34 #include <sys/module.h>
35 #include <sys/rman.h>
36 #include <sys/systimer.h>
37 #include <sys/thread.h>
38 #include <sys/thread2.h>
39 
40 #include <machine/intr_machdep.h>
41 #include <machine/smp.h>
42 
43 #include <dev/virtual/hyperv/include/hyperv_busdma.h>
44 #include <dev/virtual/hyperv/vmbus/hyperv_machdep.h>
45 #include <dev/virtual/hyperv/vmbus/hyperv_reg.h>
46 #include <dev/virtual/hyperv/vmbus/hyperv_var.h>
47 #include <dev/virtual/hyperv/vmbus/vmbus_reg.h>
48 #include <dev/virtual/hyperv/vmbus/vmbus_var.h>
49 
50 #include "acpi.h"
51 #include "acpi_if.h"
52 #include "pcib_if.h"
53 
54 #define MSR_HV_STIMER0_CFG_SINT		\
55 	((((uint64_t)VMBUS_SINT_TIMER) << MSR_HV_STIMER_CFG_SINT_SHIFT) & \
56 	 MSR_HV_STIMER_CFG_SINT_MASK)
57 
58 /*
59  * Two additionally required features:
60  * - SynIC is needed for interrupt generation.
61  * - Time reference counter is needed to set ABS reference count to
62  *   STIMER0_COUNT.
63  */
64 #define CPUID_HV_TIMER_MASK		(CPUID_HV_MSR_TIME_REFCNT |	\
65 					 CPUID_HV_MSR_SYNIC |		\
66 					 CPUID_HV_MSR_SYNTIMER)
67 
68 /*
69  * NOTE: DO NOT CHANGE THIS.
70  */
71 #define VMBUS_SINT_MESSAGE		2
72 /*
73  * NOTE:
74  * - DO NOT set it to the same value as VMBUS_SINT_MESSAGE.
75  * - DO NOT set it to 0.
76  */
77 #define VMBUS_SINT_TIMER		4
78 
79 /*
80  * NOTE: DO NOT CHANGE THESE
81  */
82 #define VMBUS_CONNID_MESSAGE		1
83 #define VMBUS_CONNID_EVENT		2
84 
85 struct vmbus_msghc {
86 	struct hypercall_postmsg_in	*mh_inprm;
87 	struct hypercall_postmsg_in	mh_inprm_save;
88 	struct hyperv_dma		mh_inprm_dma;
89 
90 	struct vmbus_message		*mh_resp;
91 	struct vmbus_message		mh_resp0;
92 };
93 
94 struct vmbus_msghc_ctx {
95 	struct vmbus_msghc		*mhc_free;
96 	struct lwkt_token		mhc_free_token;
97 	uint32_t			mhc_flags;
98 
99 	struct vmbus_msghc		*mhc_active;
100 	struct lwkt_token		mhc_active_token;
101 };
102 
103 #define VMBUS_MSGHC_CTXF_DESTROY	0x0001
104 
105 static int			vmbus_probe(device_t);
106 static int			vmbus_attach(device_t);
107 static int			vmbus_detach(device_t);
108 static void			vmbus_intr(void *);
109 static void			vmbus_timer_intr_reload(struct cputimer_intr *,
110 				    sysclock_t);
111 static void			vmbus_timer_intr_pcpuhand(
112 				    struct cputimer_intr *);
113 static void			vmbus_timer_intr_restart(
114 				    struct cputimer_intr *);
115 
116 static int			vmbus_dma_alloc(struct vmbus_softc *);
117 static void			vmbus_dma_free(struct vmbus_softc *);
118 static int			vmbus_intr_setup(struct vmbus_softc *);
119 static void			vmbus_intr_teardown(struct vmbus_softc *);
120 static int			vmbus_intr_rid(struct resource_list *, int);
121 static void			vmbus_synic_setup(void *);
122 static void			vmbus_synic_teardown(void *);
123 static void			vmbus_timer_stop(void *);
124 static void			vmbus_timer_config(void *);
125 static int			vmbus_init(struct vmbus_softc *);
126 static int			vmbus_init_contact(struct vmbus_softc *,
127 				    uint32_t);
128 static void			vmbus_timer_restart(void *);
129 static void			vmbus_timer_msgintr(struct vmbus_pcpu_data *);
130 
131 static void			vmbus_chan_msgproc(struct vmbus_softc *,
132 				    const struct vmbus_message *);
133 
134 static struct vmbus_msghc_ctx	*vmbus_msghc_ctx_create(bus_dma_tag_t);
135 static void			vmbus_msghc_ctx_destroy(
136 				    struct vmbus_msghc_ctx *);
137 static void			vmbus_msghc_ctx_free(struct vmbus_msghc_ctx *);
138 static struct vmbus_msghc	*vmbus_msghc_alloc(bus_dma_tag_t);
139 static void			vmbus_msghc_free(struct vmbus_msghc *);
140 static struct vmbus_msghc	*vmbus_msghc_get1(struct vmbus_msghc_ctx *,
141 				    uint32_t);
142 
143 static device_method_t vmbus_methods[] = {
144 	/* Device interface */
145 	DEVMETHOD(device_probe,			vmbus_probe),
146 	DEVMETHOD(device_attach,		vmbus_attach),
147 	DEVMETHOD(device_detach,		vmbus_detach),
148 	DEVMETHOD(device_shutdown,		bus_generic_shutdown),
149 	DEVMETHOD(device_suspend,		bus_generic_suspend),
150 	DEVMETHOD(device_resume,		bus_generic_resume),
151 
152 	DEVMETHOD_END
153 };
154 
155 static driver_t vmbus_driver = {
156 	"vmbus",
157 	vmbus_methods,
158 	sizeof(struct vmbus_softc)
159 };
160 
161 static devclass_t vmbus_devclass;
162 
163 DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, NULL, NULL);
164 MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
165 MODULE_VERSION(vmbus, 1);
166 
167 static struct cputimer_intr vmbus_cputimer_intr = {
168 	.freq = HYPERV_TIMER_FREQ,
169 	.reload = vmbus_timer_intr_reload,
170 	.enable = cputimer_intr_default_enable,
171 	.config = cputimer_intr_default_config,
172 	.restart = vmbus_timer_intr_restart,
173 	.pmfixup = cputimer_intr_default_pmfixup,
174 	.initclock = cputimer_intr_default_initclock,
175 	.pcpuhand = vmbus_timer_intr_pcpuhand,
176 	.next = SLIST_ENTRY_INITIALIZER,
177 	.name = "hyperv",
178 	.type = CPUTIMER_INTR_VMM,
179 	.prio = CPUTIMER_INTR_PRIO_VMM,
180 	.caps = CPUTIMER_INTR_CAP_PS,
181 	.priv = NULL
182 };
183 
184 static const uint32_t	vmbus_version[] = {
185 	VMBUS_VERSION_WIN8_1,
186 	VMBUS_VERSION_WIN8,
187 	VMBUS_VERSION_WIN7,
188 	VMBUS_VERSION_WS2008
189 };
190 
191 static int		vmbus_timer_intr_enable = 1;
192 TUNABLE_INT("hw.vmbus.timer_intr.enable", &vmbus_timer_intr_enable);
193 
194 static int
195 vmbus_probe(device_t dev)
196 {
197 	char *id[] = { "VMBUS", NULL };
198 
199 	if (ACPI_ID_PROBE(device_get_parent(dev), dev, id) == NULL ||
200 	    device_get_unit(dev) != 0 || vmm_guest != VMM_GUEST_HYPERV ||
201 	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
202 		return (ENXIO);
203 
204 	device_set_desc(dev, "Hyper-V vmbus");
205 
206 	return (0);
207 }
208 
209 static int
210 vmbus_attach(device_t dev)
211 {
212 	struct vmbus_softc *sc = device_get_softc(dev);
213 	int error, cpu, use_timer;
214 
215 	/*
216 	 * Basic setup.
217 	 */
218 	sc->vmbus_dev = dev;
219 	for (cpu = 0; cpu < ncpus; ++cpu) {
220 		struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, cpu);
221 
222 		psc->sc = sc;
223 		psc->cpuid = cpu;
224 		psc->timer_last = UINT64_MAX;
225 	}
226 
227 	/*
228 	 * Should we use interrupt timer?
229 	 */
230 	use_timer = 0;
231 	if (device_get_unit(dev) == 0 &&
232 	    (hyperv_features & CPUID_HV_TIMER_MASK) == CPUID_HV_TIMER_MASK)
233 		use_timer = 1;
234 
235 	/*
236 	 * Create context for "post message" Hypercalls
237 	 */
238 	sc->vmbus_msg_hc = vmbus_msghc_ctx_create(
239 	    bus_get_dma_tag(sc->vmbus_dev));
240 	if (sc->vmbus_msg_hc == NULL)
241 		return ENXIO;
242 
243 	/*
244 	 * Allocate DMA stuffs.
245 	 */
246 	error = vmbus_dma_alloc(sc);
247 	if (error)
248 		goto failed;
249 
250 	/*
251 	 * Setup interrupt.
252 	 */
253 	error = vmbus_intr_setup(sc);
254 	if (error)
255 		goto failed;
256 
257 	if (use_timer) {
258 		/*
259 		 * Make sure that interrupt timer is stopped.
260 		 */
261 		lwkt_cpusync_simple(smp_active_mask, vmbus_timer_stop, sc);
262 	}
263 
264 	/*
265 	 * Setup SynIC.
266 	 */
267 	lwkt_cpusync_simple(smp_active_mask, vmbus_synic_setup, sc);
268 	sc->vmbus_flags |= VMBUS_FLAG_SYNIC;
269 
270 	/*
271 	 * Initialize vmbus.
272 	 */
273 	error = vmbus_init(sc);
274 	if (error)
275 		goto failed;
276 
277 	if (use_timer) {
278 		/*
279 		 * Configure and register vmbus interrupt timer.
280 		 */
281 		lwkt_cpusync_simple(smp_active_mask, vmbus_timer_config, sc);
282 		vmbus_cputimer_intr.priv = sc;
283 		cputimer_intr_register(&vmbus_cputimer_intr);
284 		if (vmbus_timer_intr_enable)
285 			cputimer_intr_select(&vmbus_cputimer_intr, 0);
286 	}
287 
288 	return 0;
289 failed:
290 	vmbus_detach(dev);
291 	return error;
292 }
293 
294 static int
295 vmbus_detach(device_t dev)
296 {
297 	struct vmbus_softc *sc = device_get_softc(dev);
298 
299 	/* TODO: uninitialize vmbus. */
300 	/* TODO: stop and deregister timer */
301 
302 	if (sc->vmbus_flags & VMBUS_FLAG_SYNIC)
303 		lwkt_cpusync_simple(smp_active_mask, vmbus_synic_teardown, sc);
304 	vmbus_intr_teardown(sc);
305 	vmbus_dma_free(sc);
306 
307 	if (sc->vmbus_msg_hc != NULL) {
308 		vmbus_msghc_ctx_destroy(sc->vmbus_msg_hc);
309 		sc->vmbus_msg_hc = NULL;
310 	}
311 	return (0);
312 }
313 
314 static __inline void
315 vmbus_msg_reset(volatile struct vmbus_message *msg)
316 {
317 	msg->msg_type = HYPERV_MSGTYPE_NONE;
318 	/*
319 	 * Make sure that the write to msg_type (i.e. set to
320 	 * HYPERV_MSGTYPE_NONE) happens before we read the
321 	 * msg_flags and send EOM to the hypervisor.
322 	 */
323 	cpu_mfence();
324 	if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
325 		/*
326 		 * Ask the hypervisor to rescan message queue,
327 		 * and deliver new message if any.
328 		 */
329 		wrmsr(MSR_HV_EOM, 0);
330 	}
331 }
332 
333 static void
334 vmbus_intr(void *xpsc)
335 {
336 	struct vmbus_pcpu_data *psc = xpsc;
337 	volatile struct vmbus_message *msg;
338 
339 	msg = psc->message + VMBUS_SINT_MESSAGE;
340 	while (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE)) {
341 		if (msg->msg_type == HYPERV_MSGTYPE_CHANNEL) {
342 			/* Channel message */
343 			vmbus_chan_msgproc(psc->sc,
344 			    __DEVOLATILE(const struct vmbus_message *, msg));
345 		}
346 		vmbus_msg_reset(msg);
347 	}
348 }
349 
350 static __inline void
351 vmbus_timer_oneshot(struct vmbus_pcpu_data *psc, uint64_t current)
352 {
353 	psc->timer_last = current;
354 	wrmsr(MSR_HV_STIMER0_COUNT, current);
355 }
356 
357 static void
358 vmbus_timer_intr_reload(struct cputimer_intr *cti, sysclock_t reload)
359 {
360 	struct globaldata *gd = mycpu;
361 	struct vmbus_softc *sc = cti->priv;
362 	struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, gd->gd_cpuid);
363 	uint64_t current;
364 
365 	reload = (uint64_t)reload * cti->freq / sys_cputimer->freq;
366 	current = rdmsr(MSR_HV_TIME_REF_COUNT) + reload;
367 
368 	if (gd->gd_timer_running) {
369 		if (current < psc->timer_last)
370 			vmbus_timer_oneshot(psc, current);
371 	} else {
372 		gd->gd_timer_running = 1;
373 		vmbus_timer_oneshot(psc, current);
374 	}
375 }
376 
377 static void
378 vmbus_timer_intr_pcpuhand(struct cputimer_intr *cti)
379 {
380 	struct vmbus_softc *sc = cti->priv;
381 	struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, mycpuid);
382 
383 	vmbus_timer_msgintr(psc);
384 }
385 
386 static void
387 vmbus_timer_intr_restart(struct cputimer_intr *cti)
388 {
389 	lwkt_send_ipiq_mask(smp_active_mask, vmbus_timer_restart, cti->priv);
390 }
391 
392 static struct vmbus_msghc *
393 vmbus_msghc_alloc(bus_dma_tag_t parent_dtag)
394 {
395 	struct vmbus_msghc *mh;
396 
397 	mh = kmalloc(sizeof(*mh), M_DEVBUF, M_WAITOK | M_ZERO);
398 
399 	mh->mh_inprm = hyperv_dmamem_alloc(parent_dtag,
400 	    HYPERCALL_POSTMSGIN_ALIGN, 0, HYPERCALL_POSTMSGIN_SIZE,
401 	    &mh->mh_inprm_dma, BUS_DMA_WAITOK);
402 	if (mh->mh_inprm == NULL) {
403 		kfree(mh, M_DEVBUF);
404 		return NULL;
405 	}
406 	return mh;
407 }
408 
409 static void
410 vmbus_msghc_free(struct vmbus_msghc *mh)
411 {
412 	hyperv_dmamem_free(&mh->mh_inprm_dma, mh->mh_inprm);
413 	kfree(mh, M_DEVBUF);
414 }
415 
416 static void
417 vmbus_msghc_ctx_free(struct vmbus_msghc_ctx *mhc)
418 {
419 	KASSERT(mhc->mhc_active == NULL, ("still have active msg hypercall"));
420 	KASSERT(mhc->mhc_free == NULL, ("still have hypercall msg"));
421 
422 	lwkt_token_uninit(&mhc->mhc_free_token);
423 	lwkt_token_uninit(&mhc->mhc_active_token);
424 	kfree(mhc, M_DEVBUF);
425 }
426 
427 static struct vmbus_msghc_ctx *
428 vmbus_msghc_ctx_create(bus_dma_tag_t parent_dtag)
429 {
430 	struct vmbus_msghc_ctx *mhc;
431 
432 	mhc = kmalloc(sizeof(*mhc), M_DEVBUF, M_WAITOK | M_ZERO);
433 	lwkt_token_init(&mhc->mhc_free_token, "msghcf");
434 	lwkt_token_init(&mhc->mhc_active_token, "msghca");
435 
436 	mhc->mhc_free = vmbus_msghc_alloc(parent_dtag);
437 	if (mhc->mhc_free == NULL) {
438 		vmbus_msghc_ctx_free(mhc);
439 		return NULL;
440 	}
441 	return mhc;
442 }
443 
444 static struct vmbus_msghc *
445 vmbus_msghc_get1(struct vmbus_msghc_ctx *mhc, uint32_t dtor_flag)
446 {
447 	struct vmbus_msghc *mh;
448 
449 	lwkt_gettoken(&mhc->mhc_free_token);
450 
451 	while ((mhc->mhc_flags & dtor_flag) == 0 && mhc->mhc_free == NULL)
452 		tsleep(&mhc->mhc_free, 0, "gmsghc", 0);
453 	if (mhc->mhc_flags & dtor_flag) {
454 		/* Being destroyed */
455 		mh = NULL;
456 	} else {
457 		mh = mhc->mhc_free;
458 		KASSERT(mh != NULL, ("no free hypercall msg"));
459 		KASSERT(mh->mh_resp == NULL,
460 		    ("hypercall msg has pending response"));
461 		mhc->mhc_free = NULL;
462 	}
463 
464 	lwkt_reltoken(&mhc->mhc_free_token);
465 
466 	return mh;
467 }
468 
469 struct vmbus_msghc *
470 vmbus_msghc_get(struct vmbus_softc *sc, size_t dsize)
471 {
472 	struct hypercall_postmsg_in *inprm;
473 	struct vmbus_msghc *mh;
474 
475 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
476 		return NULL;
477 
478 	mh = vmbus_msghc_get1(sc->vmbus_msg_hc, VMBUS_MSGHC_CTXF_DESTROY);
479 	if (mh == NULL)
480 		return NULL;
481 
482 	inprm = mh->mh_inprm;
483 	memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE);
484 	inprm->hc_connid = VMBUS_CONNID_MESSAGE;
485 	inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL;
486 	inprm->hc_dsize = dsize;
487 
488 	return mh;
489 }
490 
491 void
492 vmbus_msghc_put(struct vmbus_softc *sc, struct vmbus_msghc *mh)
493 {
494 	struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc;
495 
496 	KASSERT(mhc->mhc_active == NULL, ("msg hypercall is active"));
497 	mh->mh_resp = NULL;
498 
499 	lwkt_gettoken(&mhc->mhc_free_token);
500 	KASSERT(mhc->mhc_free == NULL, ("has free hypercall msg"));
501 	mhc->mhc_free = mh;
502 	lwkt_reltoken(&mhc->mhc_free_token);
503 	wakeup(&mhc->mhc_free);
504 }
505 
506 void *
507 vmbus_msghc_dataptr(struct vmbus_msghc *mh)
508 {
509 	return mh->mh_inprm->hc_data;
510 }
511 
512 static void
513 vmbus_msghc_ctx_destroy(struct vmbus_msghc_ctx *mhc)
514 {
515 	struct vmbus_msghc *mh;
516 
517 	lwkt_gettoken(&mhc->mhc_free_token);
518 	mhc->mhc_flags |= VMBUS_MSGHC_CTXF_DESTROY;
519 	lwkt_reltoken(&mhc->mhc_free_token);
520 	wakeup(&mhc->mhc_free);
521 
522 	mh = vmbus_msghc_get1(mhc, 0);
523 	if (mh == NULL)
524 		panic("can't get msghc");
525 
526 	vmbus_msghc_free(mh);
527 	vmbus_msghc_ctx_free(mhc);
528 }
529 
530 int
531 vmbus_msghc_exec_noresult(struct vmbus_msghc *mh)
532 {
533 	int i, wait_ticks = 1;
534 
535 	/*
536 	 * Save the input parameter so that we could restore the input
537 	 * parameter if the Hypercall failed.
538 	 *
539 	 * XXX
540 	 * Is this really necessary?!  i.e. Will the Hypercall ever
541 	 * overwrite the input parameter?
542 	 */
543 	memcpy(&mh->mh_inprm_save, mh->mh_inprm, HYPERCALL_POSTMSGIN_SIZE);
544 
545 	/*
546 	 * In order to cope with transient failures, e.g. insufficient
547 	 * resources on host side, we retry the post message Hypercall
548 	 * several times.  20 retries seem sufficient.
549 	 */
550 #define HC_RETRY_MAX	20
551 
552 	for (i = 0; i < HC_RETRY_MAX; ++i) {
553 		uint64_t status;
554 
555 		status = hypercall_post_message(mh->mh_inprm_dma.hv_paddr);
556 		if (status == HYPERCALL_STATUS_SUCCESS)
557 			return 0;
558 
559 		tsleep(&status, 0, "hcpmsg", wait_ticks);
560 		if (wait_ticks < hz)
561 			wait_ticks *= 2;
562 
563 		/* Restore input parameter and try again */
564 		memcpy(mh->mh_inprm, &mh->mh_inprm_save,
565 		    HYPERCALL_POSTMSGIN_SIZE);
566 	}
567 
568 #undef HC_RETRY_MAX
569 
570 	return EIO;
571 }
572 
573 int
574 vmbus_msghc_exec(struct vmbus_softc *sc, struct vmbus_msghc *mh)
575 {
576 	struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc;
577 	int error;
578 
579 	KASSERT(mh->mh_resp == NULL, ("hypercall msg has pending response"));
580 
581 	lwkt_gettoken(&mhc->mhc_active_token);
582 	KASSERT(mhc->mhc_active == NULL, ("pending active msg hypercall"));
583 	mhc->mhc_active = mh;
584 	lwkt_reltoken(&mhc->mhc_active_token);
585 
586 	error = vmbus_msghc_exec_noresult(mh);
587 	if (error) {
588 		lwkt_gettoken(&mhc->mhc_active_token);
589 		KASSERT(mhc->mhc_active == mh, ("msghc mismatch"));
590 		mhc->mhc_active = NULL;
591 		lwkt_reltoken(&mhc->mhc_active_token);
592 	}
593 	return error;
594 }
595 
596 const struct vmbus_message *
597 vmbus_msghc_wait_result(struct vmbus_softc *sc, struct vmbus_msghc *mh)
598 {
599 	struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc;
600 
601 	lwkt_gettoken(&mhc->mhc_active_token);
602 
603 	KASSERT(mhc->mhc_active == mh, ("msghc mismatch"));
604 	while (mh->mh_resp == NULL)
605 		tsleep(&mhc->mhc_active, 0, "wmsghc", 0);
606 	mhc->mhc_active = NULL;
607 
608 	lwkt_reltoken(&mhc->mhc_active_token);
609 
610 	return mh->mh_resp;
611 }
612 
613 void
614 vmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg)
615 {
616 	struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc;
617 	struct vmbus_msghc *mh;
618 
619 	lwkt_gettoken(&mhc->mhc_active_token);
620 
621 	mh = mhc->mhc_active;
622 	KASSERT(mh != NULL, ("no pending msg hypercall"));
623 	memcpy(&mh->mh_resp0, msg, sizeof(mh->mh_resp0));
624 	mh->mh_resp = &mh->mh_resp0;
625 
626 	lwkt_reltoken(&mhc->mhc_active_token);
627 	wakeup(&mhc->mhc_active);
628 }
629 
630 static int
631 vmbus_dma_alloc(struct vmbus_softc *sc)
632 {
633 	bus_dma_tag_t parent_dtag;
634 	uint8_t *evtflags;
635 	int cpu;
636 
637 	parent_dtag = bus_get_dma_tag(sc->vmbus_dev);
638 	for (cpu = 0; cpu < ncpus; ++cpu) {
639 		struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, cpu);
640 
641 		/*
642 		 * Per-cpu messages and event flags.
643 		 */
644 		psc->message = hyperv_dmamem_alloc(parent_dtag,
645 		    PAGE_SIZE, 0, PAGE_SIZE, &psc->message_dma,
646 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
647 		if (psc->message == NULL)
648 			return ENOMEM;
649 
650 		psc->event_flags = hyperv_dmamem_alloc(parent_dtag,
651 		    PAGE_SIZE, 0, PAGE_SIZE, &psc->event_flags_dma,
652 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
653 		if (psc->event_flags == NULL)
654 			return ENOMEM;
655 	}
656 
657 	evtflags = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
658 	    PAGE_SIZE, &sc->vmbus_evtflags_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
659 	if (evtflags == NULL)
660 		return ENOMEM;
661 	sc->vmbus_rx_evtflags = (u_long *)evtflags;
662 	sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2));
663 	sc->vmbus_evtflags = evtflags;
664 
665 	sc->vmbus_mnf1 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
666 	    PAGE_SIZE, &sc->vmbus_mnf1_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
667 	if (sc->vmbus_mnf1 == NULL)
668 		return ENOMEM;
669 
670 	sc->vmbus_mnf2 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
671 	    PAGE_SIZE, &sc->vmbus_mnf2_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
672 	if (sc->vmbus_mnf2 == NULL)
673 		return ENOMEM;
674 
675 	return 0;
676 }
677 
678 static void
679 vmbus_dma_free(struct vmbus_softc *sc)
680 {
681 	int cpu;
682 
683 	if (sc->vmbus_evtflags != NULL) {
684 		hyperv_dmamem_free(&sc->vmbus_evtflags_dma, sc->vmbus_evtflags);
685 		sc->vmbus_evtflags = NULL;
686 		sc->vmbus_rx_evtflags = NULL;
687 		sc->vmbus_tx_evtflags = NULL;
688 	}
689 	if (sc->vmbus_mnf1 != NULL) {
690 		hyperv_dmamem_free(&sc->vmbus_mnf1_dma, sc->vmbus_mnf1);
691 		sc->vmbus_mnf1 = NULL;
692 	}
693 	if (sc->vmbus_mnf2 != NULL) {
694 		hyperv_dmamem_free(&sc->vmbus_mnf2_dma, sc->vmbus_mnf2);
695 		sc->vmbus_mnf2 = NULL;
696 	}
697 
698 	for (cpu = 0; cpu < ncpus; ++cpu) {
699 		struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, cpu);
700 
701 		if (psc->message != NULL) {
702 			hyperv_dmamem_free(&psc->message_dma, psc->message);
703 			psc->message = NULL;
704 		}
705 		if (psc->event_flags != NULL) {
706 			hyperv_dmamem_free(&psc->event_flags_dma,
707 			    psc->event_flags);
708 			psc->event_flags = NULL;
709 		}
710 	}
711 }
712 
713 static int
714 vmbus_intr_rid(struct resource_list *rl, int rid)
715 {
716 	do {
717 		++rid;
718 		if (resource_list_find(rl, SYS_RES_IRQ, rid) == NULL)
719 			break;
720 	} while (1);
721 	return rid;
722 }
723 
724 static int
725 vmbus_intr_setup(struct vmbus_softc *sc)
726 {
727 	device_t dev = sc->vmbus_dev;
728 	device_t parent = device_get_parent(dev);
729 	device_t bus = device_get_parent(parent);
730 	struct resource_list *rl;
731 	int rid, cpu;
732 
733 	rl = BUS_GET_RESOURCE_LIST(parent, dev);
734 	if (rl == NULL)
735 		return ENXIO;
736 
737 	rid = 0;
738 	for (cpu = 0; cpu < ncpus; ++cpu) {
739 		struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, cpu);
740 		struct resource *res;
741 		uint64_t msi_addr;
742 		uint32_t msi_data;
743 		int irq, error;
744 
745 		error = PCIB_ALLOC_MSIX(bus, dev, &irq, cpu);
746 		if (error) {
747 			device_printf(dev, "alloc vector on cpu%d failed: %d\n",
748 			    cpu, error);
749 			return ENXIO;
750 		}
751 		rid = vmbus_intr_rid(rl, rid);
752 		resource_list_add(rl, SYS_RES_IRQ, rid, irq, irq, 1, cpu);
753 		psc->intr_rid = rid;
754 
755 		res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE);
756 		if (res == NULL) {
757 			device_printf(dev, "alloc irq on cpu%d failed: %d\n",
758 			    cpu, error);
759 			return ENXIO;
760 		}
761 		psc->intr_res = res;
762 
763 		error = PCIB_MAP_MSI(bus, dev, rman_get_start(res),
764 		    &msi_addr, &msi_data, cpu);
765 		if (error) {
766 			device_printf(dev, "map irq on cpu%d failed: %d\n",
767 			    cpu, error);
768 			return ENXIO;
769 		}
770 		psc->intr_vec = hyperv_msi2vector(msi_addr, msi_data);
771 
772 		if (bootverbose) {
773 			device_printf(dev, "vector %d irq %d on cpu%d\n",
774 			    psc->intr_vec, irq, cpu);
775 		}
776 
777 		ksnprintf(psc->intr_desc, sizeof(psc->intr_desc), "%s cpu%d",
778 		    device_get_nameunit(dev), cpu);
779 		error = bus_setup_intr_descr(dev, res, INTR_MPSAFE, vmbus_intr,
780 		    psc, &psc->intr_hand, NULL, psc->intr_desc);
781 		if (error) {
782 			device_printf(dev, "setup intr on cpu%d failed: %d\n",
783 			    cpu, error);
784 			return ENXIO;
785 		}
786 	}
787 	return 0;
788 }
789 
790 static void
791 vmbus_intr_teardown(struct vmbus_softc *sc)
792 {
793 	device_t dev = sc->vmbus_dev;
794 	device_t parent = device_get_parent(dev);
795 	struct resource_list *rl;
796 	int cpu;
797 
798 	rl = BUS_GET_RESOURCE_LIST(parent, dev);
799 	if (rl == NULL)
800 		return;
801 
802 	for (cpu = 0; cpu < ncpus; ++cpu) {
803 		struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, cpu);
804 
805 		if (psc->intr_hand != NULL) {
806 			bus_teardown_intr(dev, psc->intr_res, psc->intr_hand);
807 			psc->intr_hand = NULL;
808 		}
809 
810 		if (psc->intr_res != NULL) {
811 			bus_release_resource(dev, SYS_RES_IRQ, psc->intr_rid,
812 			    psc->intr_res);
813 			psc->intr_res = NULL;
814 		}
815 
816 		if (psc->intr_rid != 0) {
817 			struct resource_list_entry *rle;
818 			int irq;
819 
820 			rle = resource_list_find(rl, SYS_RES_IRQ,
821 			    psc->intr_rid);
822 			irq = rle->start;
823 			resource_list_delete(rl, SYS_RES_IRQ, psc->intr_rid);
824 
825 			PCIB_RELEASE_MSIX(device_get_parent(parent), dev, irq,
826 			    psc->cpuid);
827 			psc->intr_rid = 0;
828 		}
829 	}
830 }
831 
832 static void
833 vmbus_synic_setup(void *xsc)
834 {
835 	struct vmbus_softc *sc = xsc;
836 	struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, mycpuid);
837 	uint64_t val, orig;
838 	uint32_t sint;
839 
840 	if (hyperv_features & CPUID_HV_MSR_VP_INDEX) {
841 		/*
842 		 * Save virtual processor id.
843 		 */
844 		psc->vcpuid = rdmsr(MSR_HV_VP_INDEX);
845 	} else {
846 		/*
847 		 * XXX
848 		 * Virtual processoor id is only used by a pretty broken
849 		 * channel selection code from storvsc.  It's nothing
850 		 * critical even if CPUID_HV_MSR_VP_INDEX is not set; keep
851 		 * moving on.
852 		 */
853 		psc->vcpuid = mycpuid;
854 	}
855 
856 	/*
857 	 * Setup the SynIC message.
858 	 */
859 	orig = rdmsr(MSR_HV_SIMP);
860 	val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
861 	    ((psc->message_dma.hv_paddr >> PAGE_SHIFT) << MSR_HV_SIMP_PGSHIFT);
862 	wrmsr(MSR_HV_SIMP, val);
863 
864 	/*
865 	 * Setup the SynIC event flags.
866 	 */
867 	orig = rdmsr(MSR_HV_SIEFP);
868 	val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
869 	    ((psc->event_flags_dma.hv_paddr >> PAGE_SHIFT) <<
870 	     MSR_HV_SIEFP_PGSHIFT);
871 	wrmsr(MSR_HV_SIEFP, val);
872 
873 
874 	/*
875 	 * Configure and unmask SINT for message and event flags.
876 	 */
877 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
878 	orig = rdmsr(sint);
879 	val = psc->intr_vec | /* MSR_HV_SINT_AUTOEOI | notyet */
880 	    (orig & MSR_HV_SINT_RSVD_MASK);
881 	wrmsr(sint, val);
882 
883 	/*
884 	 * Configure and unmask SINT for timer.
885 	 */
886 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
887 	orig = rdmsr(sint);
888 	val = XTIMER_OFFSET | /* MSR_HV_SINT_AUTOEOI | notyet */
889 	    (orig & MSR_HV_SINT_RSVD_MASK);
890 	wrmsr(sint, val);
891 
892 	/*
893 	 * All done; enable SynIC.
894 	 */
895 	orig = rdmsr(MSR_HV_SCONTROL);
896 	val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
897 	wrmsr(MSR_HV_SCONTROL, val);
898 }
899 
900 static void
901 vmbus_timer_stop(void *arg __unused)
902 {
903 	for (;;) {
904 		uint64_t val;
905 
906 		/* Stop counting, and this also implies disabling STIMER0 */
907 		wrmsr(MSR_HV_STIMER0_COUNT, 0);
908 
909 		val = rdmsr(MSR_HV_STIMER0_CONFIG);
910 		if ((val & MSR_HV_STIMER_CFG_ENABLE) == 0)
911 			break;
912 		cpu_pause();
913 	}
914 }
915 
916 static void
917 vmbus_timer_config(void *arg __unused)
918 {
919 	/*
920 	 * Make sure that STIMER0 is really disabled before writing
921 	 * to STIMER0_CONFIG.
922 	 *
923 	 * "Writing to the configuration register of a timer that
924 	 *  is already enabled may result in undefined behaviour."
925 	 */
926 	vmbus_timer_stop(arg);
927 	wrmsr(MSR_HV_STIMER0_CONFIG,
928 	    MSR_HV_STIMER_CFG_AUTOEN | MSR_HV_STIMER0_CFG_SINT);
929 }
930 
931 static void
932 vmbus_timer_msgintr(struct vmbus_pcpu_data *psc)
933 {
934 	volatile struct vmbus_message *msg;
935 
936 	msg = psc->message + VMBUS_SINT_TIMER;
937 	if (msg->msg_type == HYPERV_MSGTYPE_TIMER_EXPIRED)
938 		vmbus_msg_reset(msg);
939 }
940 
941 static void
942 vmbus_timer_restart(void *xsc)
943 {
944 	struct vmbus_softc *sc = xsc;
945 	struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, mycpuid);
946 
947 	crit_enter();
948 	vmbus_timer_msgintr(psc);
949 	vmbus_timer_oneshot(psc, rdmsr(MSR_HV_TIME_REF_COUNT) + 1);
950 	crit_exit();
951 }
952 
953 static void
954 vmbus_synic_teardown(void *arg __unused)
955 {
956 	uint64_t orig;
957 	uint32_t sint;
958 
959 	/*
960 	 * Disable SynIC.
961 	 */
962 	orig = rdmsr(MSR_HV_SCONTROL);
963 	wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
964 
965 	/*
966 	 * Mask message and event flags SINT.
967 	 */
968 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
969 	orig = rdmsr(sint);
970 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
971 
972 	/*
973 	 * Mask timer SINT.
974 	 */
975 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
976 	orig = rdmsr(sint);
977 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
978 
979 	/*
980 	 * Teardown SynIC message.
981 	 */
982 	orig = rdmsr(MSR_HV_SIMP);
983 	wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
984 
985 	/*
986 	 * Teardown SynIC event flags.
987 	 */
988 	orig = rdmsr(MSR_HV_SIEFP);
989 	wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
990 }
991 
992 static int
993 vmbus_init_contact(struct vmbus_softc *sc, uint32_t version)
994 {
995 	struct vmbus_chanmsg_init_contact *req;
996 	const struct vmbus_chanmsg_version_resp *resp;
997 	const struct vmbus_message *msg;
998 	struct vmbus_msghc *mh;
999 	int error, supp = 0;
1000 
1001 	mh = vmbus_msghc_get(sc, sizeof(*req));
1002 	if (mh == NULL)
1003 		return ENXIO;
1004 
1005 	req = vmbus_msghc_dataptr(mh);
1006 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_INIT_CONTACT;
1007 	req->chm_ver = version;
1008 	req->chm_evtflags = sc->vmbus_evtflags_dma.hv_paddr;
1009 	req->chm_mnf1 = sc->vmbus_mnf1_dma.hv_paddr;
1010 	req->chm_mnf2 = sc->vmbus_mnf2_dma.hv_paddr;
1011 
1012 	error = vmbus_msghc_exec(sc, mh);
1013 	if (error) {
1014 		vmbus_msghc_put(sc, mh);
1015 		return error;
1016 	}
1017 
1018 	msg = vmbus_msghc_wait_result(sc, mh);
1019 	resp = (const struct vmbus_chanmsg_version_resp *)msg->msg_data;
1020 	supp = resp->chm_supp;
1021 
1022 	vmbus_msghc_put(sc, mh);
1023 
1024 	return (supp ? 0 : EOPNOTSUPP);
1025 }
1026 
1027 static int
1028 vmbus_init(struct vmbus_softc *sc)
1029 {
1030 	int i;
1031 
1032 	for (i = 0; i < nitems(vmbus_version); ++i) {
1033 		int error;
1034 
1035 		error = vmbus_init_contact(sc, vmbus_version[i]);
1036 		if (!error) {
1037 			sc->vmbus_version = vmbus_version[i];
1038 			device_printf(sc->vmbus_dev, "version %u.%u\n",
1039 			    (sc->vmbus_version >> 16),
1040 			    (sc->vmbus_version & 0xffff));
1041 			return 0;
1042 		}
1043 	}
1044 	return ENXIO;
1045 }
1046 
1047 static void
1048 vmbus_chan_msgproc(struct vmbus_softc *sc, const struct vmbus_message *msg)
1049 {
1050 	const struct vmbus_chanmsg_hdr *hdr;
1051 
1052 	hdr = (const struct vmbus_chanmsg_hdr *)msg->msg_data;
1053 
1054 	/* TODO */
1055 	if (hdr->chm_type == VMBUS_CHANMSG_TYPE_VERSION_RESP)
1056 		vmbus_msghc_wakeup(sc, msg);
1057 }
1058