xref: /dragonfly/sys/dev/virtual/hyperv/vmbus/vmbus.c (revision 1fe7e945)
1 /*-
2  * Copyright (c) 2009-2012,2016 Microsoft Corp.
3  * Copyright (c) 2012 NetApp Inc.
4  * Copyright (c) 2012 Citrix Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include "opt_acpi.h"
30 
31 #include <sys/param.h>
32 #include <sys/bus.h>
33 #include <sys/kernel.h>
34 #include <sys/module.h>
35 #include <sys/rman.h>
36 #include <sys/systimer.h>
37 #include <sys/thread.h>
38 #include <sys/thread2.h>
39 
40 #include <machine/intr_machdep.h>
41 #include <machine/smp.h>
42 
43 #include <dev/virtual/hyperv/hyperv_busdma.h>
44 #include <dev/virtual/hyperv/hyperv_machdep.h>
45 #include <dev/virtual/hyperv/hyperv_reg.h>
46 #include <dev/virtual/hyperv/hyperv_var.h>
47 #include <dev/virtual/hyperv/vmbus/vmbus_reg.h>
48 #include <dev/virtual/hyperv/vmbus/vmbus_var.h>
49 
50 #include "acpi.h"
51 #include "acpi_if.h"
52 #include "pcib_if.h"
53 
54 #define MSR_HV_STIMER0_CFG_SINT		\
55 	((((uint64_t)VMBUS_SINT_TIMER) << MSR_HV_STIMER_CFG_SINT_SHIFT) & \
56 	 MSR_HV_STIMER_CFG_SINT_MASK)
57 
58 /*
59  * Additionally required feature:
60  * - SynIC is needed for interrupt generation.
61  */
62 #define CPUID_HV_TIMER_MASK		(CPUID_HV_MSR_SYNIC |		\
63 					 CPUID_HV_MSR_SYNTIMER)
64 
65 /*
66  * NOTE: DO NOT CHANGE THIS.
67  */
68 #define VMBUS_SINT_MESSAGE		2
69 /*
70  * NOTE:
71  * - DO NOT set it to the same value as VMBUS_SINT_MESSAGE.
72  * - DO NOT set it to 0.
73  */
74 #define VMBUS_SINT_TIMER		4
75 
76 /*
77  * NOTE: DO NOT CHANGE THESE
78  */
79 #define VMBUS_CONNID_MESSAGE		1
80 #define VMBUS_CONNID_EVENT		2
81 
82 struct vmbus_msghc {
83 	struct hypercall_postmsg_in	*mh_inprm;
84 	struct hypercall_postmsg_in	mh_inprm_save;
85 	struct hyperv_dma		mh_inprm_dma;
86 
87 	struct vmbus_message		*mh_resp;
88 	struct vmbus_message		mh_resp0;
89 };
90 
91 struct vmbus_msghc_ctx {
92 	struct vmbus_msghc		*mhc_free;
93 	struct lwkt_token		mhc_free_token;
94 	uint32_t			mhc_flags;
95 
96 	struct vmbus_msghc		*mhc_active;
97 	struct lwkt_token		mhc_active_token;
98 };
99 
100 #define VMBUS_MSGHC_CTXF_DESTROY	0x0001
101 
102 static int			vmbus_probe(device_t);
103 static int			vmbus_attach(device_t);
104 static int			vmbus_detach(device_t);
105 static void			vmbus_intr(void *);
106 static void			vmbus_timer_intr_reload(struct cputimer_intr *,
107 				    sysclock_t);
108 static void			vmbus_timer_intr_pcpuhand(
109 				    struct cputimer_intr *);
110 static void			vmbus_timer_intr_restart(
111 				    struct cputimer_intr *);
112 
113 static int			vmbus_dma_alloc(struct vmbus_softc *);
114 static void			vmbus_dma_free(struct vmbus_softc *);
115 static int			vmbus_intr_setup(struct vmbus_softc *);
116 static void			vmbus_intr_teardown(struct vmbus_softc *);
117 static void			vmbus_synic_setup(void *);
118 static void			vmbus_synic_teardown(void *);
119 static void			vmbus_timer_stop(void *);
120 static void			vmbus_timer_config(void *);
121 static int			vmbus_init(struct vmbus_softc *);
122 static int			vmbus_init_contact(struct vmbus_softc *,
123 				    uint32_t);
124 static void			vmbus_timer_restart(void *);
125 static void			vmbus_timer_msgintr(struct vmbus_pcpu_data *);
126 
127 static void			vmbus_chan_msgproc(struct vmbus_softc *,
128 				    const struct vmbus_message *);
129 
130 static struct vmbus_msghc_ctx	*vmbus_msghc_ctx_create(bus_dma_tag_t);
131 static void			vmbus_msghc_ctx_destroy(
132 				    struct vmbus_msghc_ctx *);
133 static void			vmbus_msghc_ctx_free(struct vmbus_msghc_ctx *);
134 static struct vmbus_msghc	*vmbus_msghc_alloc(bus_dma_tag_t);
135 static void			vmbus_msghc_free(struct vmbus_msghc *);
136 static struct vmbus_msghc	*vmbus_msghc_get1(struct vmbus_msghc_ctx *,
137 				    uint32_t);
138 
139 static device_method_t vmbus_methods[] = {
140 	/* Device interface */
141 	DEVMETHOD(device_probe,			vmbus_probe),
142 	DEVMETHOD(device_attach,		vmbus_attach),
143 	DEVMETHOD(device_detach,		vmbus_detach),
144 	DEVMETHOD(device_shutdown,		bus_generic_shutdown),
145 	DEVMETHOD(device_suspend,		bus_generic_suspend),
146 	DEVMETHOD(device_resume,		bus_generic_resume),
147 
148 	DEVMETHOD_END
149 };
150 
151 static driver_t vmbus_driver = {
152 	"vmbus",
153 	vmbus_methods,
154 	sizeof(struct vmbus_softc)
155 };
156 
157 static devclass_t vmbus_devclass;
158 
159 DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, NULL, NULL);
160 MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
161 MODULE_VERSION(vmbus, 1);
162 
163 static struct cputimer_intr vmbus_cputimer_intr = {
164 	.freq = HYPERV_TIMER_FREQ,
165 	.reload = vmbus_timer_intr_reload,
166 	.enable = cputimer_intr_default_enable,
167 	.config = cputimer_intr_default_config,
168 	.restart = vmbus_timer_intr_restart,
169 	.pmfixup = cputimer_intr_default_pmfixup,
170 	.initclock = cputimer_intr_default_initclock,
171 	.pcpuhand = vmbus_timer_intr_pcpuhand,
172 	.next = SLIST_ENTRY_INITIALIZER,
173 	.name = "hyperv",
174 	.type = CPUTIMER_INTR_VMM,
175 	.prio = CPUTIMER_INTR_PRIO_VMM,
176 	.caps = CPUTIMER_INTR_CAP_PS,
177 	.priv = NULL
178 };
179 
180 static const uint32_t	vmbus_version[] = {
181 	VMBUS_VERSION_WIN8_1,
182 	VMBUS_VERSION_WIN8,
183 	VMBUS_VERSION_WIN7,
184 	VMBUS_VERSION_WS2008
185 };
186 
187 static int		vmbus_timer_intr_enable = 1;
188 TUNABLE_INT("hw.vmbus.timer_intr.enable", &vmbus_timer_intr_enable);
189 
190 static int
191 vmbus_probe(device_t dev)
192 {
193 	char *id[] = { "VMBUS", NULL };
194 
195 	if (ACPI_ID_PROBE(device_get_parent(dev), dev, id) == NULL ||
196 	    device_get_unit(dev) != 0 || vmm_guest != VMM_GUEST_HYPERV ||
197 	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
198 		return (ENXIO);
199 
200 	device_set_desc(dev, "Hyper-V vmbus");
201 
202 	return (0);
203 }
204 
205 static int
206 vmbus_attach(device_t dev)
207 {
208 	struct vmbus_softc *sc = device_get_softc(dev);
209 	int error, cpu, use_timer;
210 
211 	/*
212 	 * Basic setup.
213 	 */
214 	sc->vmbus_dev = dev;
215 	for (cpu = 0; cpu < ncpus; ++cpu) {
216 		struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, cpu);
217 
218 		psc->sc = sc;
219 		psc->cpuid = cpu;
220 		psc->timer_last = UINT64_MAX;
221 	}
222 
223 	/*
224 	 * Should we use interrupt timer?
225 	 */
226 	use_timer = 0;
227 	if (device_get_unit(dev) == 0 &&
228 	    (hyperv_features & CPUID_HV_TIMER_MASK) == CPUID_HV_TIMER_MASK &&
229 	    hyperv_tc64 != NULL)
230 		use_timer = 1;
231 
232 	/*
233 	 * Create context for "post message" Hypercalls
234 	 */
235 	sc->vmbus_msg_hc = vmbus_msghc_ctx_create(
236 	    bus_get_dma_tag(sc->vmbus_dev));
237 	if (sc->vmbus_msg_hc == NULL)
238 		return ENXIO;
239 
240 	/*
241 	 * Allocate DMA stuffs.
242 	 */
243 	error = vmbus_dma_alloc(sc);
244 	if (error)
245 		goto failed;
246 
247 	/*
248 	 * Setup interrupt.
249 	 */
250 	error = vmbus_intr_setup(sc);
251 	if (error)
252 		goto failed;
253 
254 	if (use_timer) {
255 		/*
256 		 * Make sure that interrupt timer is stopped.
257 		 */
258 		lwkt_cpusync_simple(smp_active_mask, vmbus_timer_stop, sc);
259 	}
260 
261 	/*
262 	 * Setup SynIC.
263 	 */
264 	lwkt_cpusync_simple(smp_active_mask, vmbus_synic_setup, sc);
265 	sc->vmbus_flags |= VMBUS_FLAG_SYNIC;
266 
267 	/*
268 	 * Initialize vmbus.
269 	 */
270 	error = vmbus_init(sc);
271 	if (error)
272 		goto failed;
273 
274 	if (use_timer) {
275 		/*
276 		 * Configure and register vmbus interrupt timer.
277 		 */
278 		lwkt_cpusync_simple(smp_active_mask, vmbus_timer_config, sc);
279 		vmbus_cputimer_intr.priv = sc;
280 		cputimer_intr_register(&vmbus_cputimer_intr);
281 		if (vmbus_timer_intr_enable)
282 			cputimer_intr_select(&vmbus_cputimer_intr, 0);
283 	}
284 
285 	return 0;
286 failed:
287 	vmbus_detach(dev);
288 	return error;
289 }
290 
291 static int
292 vmbus_detach(device_t dev)
293 {
294 	struct vmbus_softc *sc = device_get_softc(dev);
295 
296 	/* TODO: uninitialize vmbus. */
297 	/* TODO: stop and deregister timer */
298 
299 	if (sc->vmbus_flags & VMBUS_FLAG_SYNIC)
300 		lwkt_cpusync_simple(smp_active_mask, vmbus_synic_teardown, sc);
301 	vmbus_intr_teardown(sc);
302 	vmbus_dma_free(sc);
303 
304 	if (sc->vmbus_msg_hc != NULL) {
305 		vmbus_msghc_ctx_destroy(sc->vmbus_msg_hc);
306 		sc->vmbus_msg_hc = NULL;
307 	}
308 	return (0);
309 }
310 
311 static __inline void
312 vmbus_msg_reset(volatile struct vmbus_message *msg)
313 {
314 	msg->msg_type = HYPERV_MSGTYPE_NONE;
315 	/*
316 	 * Make sure that the write to msg_type (i.e. set to
317 	 * HYPERV_MSGTYPE_NONE) happens before we read the
318 	 * msg_flags and send EOM to the hypervisor.
319 	 */
320 	cpu_mfence();
321 	if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
322 		/*
323 		 * Ask the hypervisor to rescan message queue,
324 		 * and deliver new message if any.
325 		 */
326 		wrmsr(MSR_HV_EOM, 0);
327 	}
328 }
329 
330 static void
331 vmbus_intr(void *xpsc)
332 {
333 	struct vmbus_pcpu_data *psc = xpsc;
334 	volatile struct vmbus_message *msg;
335 
336 	msg = psc->message + VMBUS_SINT_MESSAGE;
337 	while (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE)) {
338 		if (msg->msg_type == HYPERV_MSGTYPE_CHANNEL) {
339 			/* Channel message */
340 			vmbus_chan_msgproc(psc->sc,
341 			    __DEVOLATILE(const struct vmbus_message *, msg));
342 		}
343 		vmbus_msg_reset(msg);
344 	}
345 }
346 
347 static __inline void
348 vmbus_timer_oneshot(struct vmbus_pcpu_data *psc, uint64_t current)
349 {
350 	psc->timer_last = current;
351 	wrmsr(MSR_HV_STIMER0_COUNT, current);
352 }
353 
354 static void
355 vmbus_timer_intr_reload(struct cputimer_intr *cti, sysclock_t reload)
356 {
357 	struct globaldata *gd = mycpu;
358 	struct vmbus_softc *sc = cti->priv;
359 	struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, gd->gd_cpuid);
360 	uint64_t current;
361 
362 	reload = (uint64_t)reload * cti->freq / sys_cputimer->freq;
363 	current = hyperv_tc64() + reload;
364 
365 	if (gd->gd_timer_running) {
366 		if (current < psc->timer_last)
367 			vmbus_timer_oneshot(psc, current);
368 	} else {
369 		gd->gd_timer_running = 1;
370 		vmbus_timer_oneshot(psc, current);
371 	}
372 }
373 
374 static void
375 vmbus_timer_intr_pcpuhand(struct cputimer_intr *cti)
376 {
377 	struct vmbus_softc *sc = cti->priv;
378 	struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, mycpuid);
379 
380 	vmbus_timer_msgintr(psc);
381 }
382 
383 static void
384 vmbus_timer_intr_restart(struct cputimer_intr *cti)
385 {
386 	lwkt_send_ipiq_mask(smp_active_mask, vmbus_timer_restart, cti->priv);
387 }
388 
389 static struct vmbus_msghc *
390 vmbus_msghc_alloc(bus_dma_tag_t parent_dtag)
391 {
392 	struct vmbus_msghc *mh;
393 
394 	mh = kmalloc(sizeof(*mh), M_DEVBUF, M_WAITOK | M_ZERO);
395 
396 	mh->mh_inprm = hyperv_dmamem_alloc(parent_dtag,
397 	    HYPERCALL_POSTMSGIN_ALIGN, 0, HYPERCALL_POSTMSGIN_SIZE,
398 	    &mh->mh_inprm_dma, BUS_DMA_WAITOK);
399 	if (mh->mh_inprm == NULL) {
400 		kfree(mh, M_DEVBUF);
401 		return NULL;
402 	}
403 	return mh;
404 }
405 
406 static void
407 vmbus_msghc_free(struct vmbus_msghc *mh)
408 {
409 	hyperv_dmamem_free(&mh->mh_inprm_dma, mh->mh_inprm);
410 	kfree(mh, M_DEVBUF);
411 }
412 
413 static void
414 vmbus_msghc_ctx_free(struct vmbus_msghc_ctx *mhc)
415 {
416 	KASSERT(mhc->mhc_active == NULL, ("still have active msg hypercall"));
417 	KASSERT(mhc->mhc_free == NULL, ("still have hypercall msg"));
418 
419 	lwkt_token_uninit(&mhc->mhc_free_token);
420 	lwkt_token_uninit(&mhc->mhc_active_token);
421 	kfree(mhc, M_DEVBUF);
422 }
423 
424 static struct vmbus_msghc_ctx *
425 vmbus_msghc_ctx_create(bus_dma_tag_t parent_dtag)
426 {
427 	struct vmbus_msghc_ctx *mhc;
428 
429 	mhc = kmalloc(sizeof(*mhc), M_DEVBUF, M_WAITOK | M_ZERO);
430 	lwkt_token_init(&mhc->mhc_free_token, "msghcf");
431 	lwkt_token_init(&mhc->mhc_active_token, "msghca");
432 
433 	mhc->mhc_free = vmbus_msghc_alloc(parent_dtag);
434 	if (mhc->mhc_free == NULL) {
435 		vmbus_msghc_ctx_free(mhc);
436 		return NULL;
437 	}
438 	return mhc;
439 }
440 
441 static struct vmbus_msghc *
442 vmbus_msghc_get1(struct vmbus_msghc_ctx *mhc, uint32_t dtor_flag)
443 {
444 	struct vmbus_msghc *mh;
445 
446 	lwkt_gettoken(&mhc->mhc_free_token);
447 
448 	while ((mhc->mhc_flags & dtor_flag) == 0 && mhc->mhc_free == NULL)
449 		tsleep(&mhc->mhc_free, 0, "gmsghc", 0);
450 	if (mhc->mhc_flags & dtor_flag) {
451 		/* Being destroyed */
452 		mh = NULL;
453 	} else {
454 		mh = mhc->mhc_free;
455 		KASSERT(mh != NULL, ("no free hypercall msg"));
456 		KASSERT(mh->mh_resp == NULL,
457 		    ("hypercall msg has pending response"));
458 		mhc->mhc_free = NULL;
459 	}
460 
461 	lwkt_reltoken(&mhc->mhc_free_token);
462 
463 	return mh;
464 }
465 
466 struct vmbus_msghc *
467 vmbus_msghc_get(struct vmbus_softc *sc, size_t dsize)
468 {
469 	struct hypercall_postmsg_in *inprm;
470 	struct vmbus_msghc *mh;
471 
472 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
473 		return NULL;
474 
475 	mh = vmbus_msghc_get1(sc->vmbus_msg_hc, VMBUS_MSGHC_CTXF_DESTROY);
476 	if (mh == NULL)
477 		return NULL;
478 
479 	inprm = mh->mh_inprm;
480 	memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE);
481 	inprm->hc_connid = VMBUS_CONNID_MESSAGE;
482 	inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL;
483 	inprm->hc_dsize = dsize;
484 
485 	return mh;
486 }
487 
488 void
489 vmbus_msghc_put(struct vmbus_softc *sc, struct vmbus_msghc *mh)
490 {
491 	struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc;
492 
493 	KASSERT(mhc->mhc_active == NULL, ("msg hypercall is active"));
494 	mh->mh_resp = NULL;
495 
496 	lwkt_gettoken(&mhc->mhc_free_token);
497 	KASSERT(mhc->mhc_free == NULL, ("has free hypercall msg"));
498 	mhc->mhc_free = mh;
499 	lwkt_reltoken(&mhc->mhc_free_token);
500 	wakeup(&mhc->mhc_free);
501 }
502 
503 void *
504 vmbus_msghc_dataptr(struct vmbus_msghc *mh)
505 {
506 	return mh->mh_inprm->hc_data;
507 }
508 
509 static void
510 vmbus_msghc_ctx_destroy(struct vmbus_msghc_ctx *mhc)
511 {
512 	struct vmbus_msghc *mh;
513 
514 	lwkt_gettoken(&mhc->mhc_free_token);
515 	mhc->mhc_flags |= VMBUS_MSGHC_CTXF_DESTROY;
516 	lwkt_reltoken(&mhc->mhc_free_token);
517 	wakeup(&mhc->mhc_free);
518 
519 	mh = vmbus_msghc_get1(mhc, 0);
520 	if (mh == NULL)
521 		panic("can't get msghc");
522 
523 	vmbus_msghc_free(mh);
524 	vmbus_msghc_ctx_free(mhc);
525 }
526 
527 int
528 vmbus_msghc_exec_noresult(struct vmbus_msghc *mh)
529 {
530 	int i, wait_ticks = 1;
531 
532 	/*
533 	 * Save the input parameter so that we could restore the input
534 	 * parameter if the Hypercall failed.
535 	 *
536 	 * XXX
537 	 * Is this really necessary?!  i.e. Will the Hypercall ever
538 	 * overwrite the input parameter?
539 	 */
540 	memcpy(&mh->mh_inprm_save, mh->mh_inprm, HYPERCALL_POSTMSGIN_SIZE);
541 
542 	/*
543 	 * In order to cope with transient failures, e.g. insufficient
544 	 * resources on host side, we retry the post message Hypercall
545 	 * several times.  20 retries seem sufficient.
546 	 */
547 #define HC_RETRY_MAX	20
548 
549 	for (i = 0; i < HC_RETRY_MAX; ++i) {
550 		uint64_t status;
551 
552 		status = hypercall_post_message(mh->mh_inprm_dma.hv_paddr);
553 		if (status == HYPERCALL_STATUS_SUCCESS)
554 			return 0;
555 
556 		tsleep(&status, 0, "hcpmsg", wait_ticks);
557 		if (wait_ticks < hz)
558 			wait_ticks *= 2;
559 
560 		/* Restore input parameter and try again */
561 		memcpy(mh->mh_inprm, &mh->mh_inprm_save,
562 		    HYPERCALL_POSTMSGIN_SIZE);
563 	}
564 
565 #undef HC_RETRY_MAX
566 
567 	return EIO;
568 }
569 
570 int
571 vmbus_msghc_exec(struct vmbus_softc *sc, struct vmbus_msghc *mh)
572 {
573 	struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc;
574 	int error;
575 
576 	KASSERT(mh->mh_resp == NULL, ("hypercall msg has pending response"));
577 
578 	lwkt_gettoken(&mhc->mhc_active_token);
579 	KASSERT(mhc->mhc_active == NULL, ("pending active msg hypercall"));
580 	mhc->mhc_active = mh;
581 	lwkt_reltoken(&mhc->mhc_active_token);
582 
583 	error = vmbus_msghc_exec_noresult(mh);
584 	if (error) {
585 		lwkt_gettoken(&mhc->mhc_active_token);
586 		KASSERT(mhc->mhc_active == mh, ("msghc mismatch"));
587 		mhc->mhc_active = NULL;
588 		lwkt_reltoken(&mhc->mhc_active_token);
589 	}
590 	return error;
591 }
592 
593 const struct vmbus_message *
594 vmbus_msghc_wait_result(struct vmbus_softc *sc, struct vmbus_msghc *mh)
595 {
596 	struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc;
597 
598 	lwkt_gettoken(&mhc->mhc_active_token);
599 
600 	KASSERT(mhc->mhc_active == mh, ("msghc mismatch"));
601 	while (mh->mh_resp == NULL)
602 		tsleep(&mhc->mhc_active, 0, "wmsghc", 0);
603 	mhc->mhc_active = NULL;
604 
605 	lwkt_reltoken(&mhc->mhc_active_token);
606 
607 	return mh->mh_resp;
608 }
609 
610 void
611 vmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg)
612 {
613 	struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc;
614 	struct vmbus_msghc *mh;
615 
616 	lwkt_gettoken(&mhc->mhc_active_token);
617 
618 	mh = mhc->mhc_active;
619 	KASSERT(mh != NULL, ("no pending msg hypercall"));
620 	memcpy(&mh->mh_resp0, msg, sizeof(mh->mh_resp0));
621 	mh->mh_resp = &mh->mh_resp0;
622 
623 	lwkt_reltoken(&mhc->mhc_active_token);
624 	wakeup(&mhc->mhc_active);
625 }
626 
627 static int
628 vmbus_dma_alloc(struct vmbus_softc *sc)
629 {
630 	bus_dma_tag_t parent_dtag;
631 	uint8_t *evtflags;
632 	int cpu;
633 
634 	parent_dtag = bus_get_dma_tag(sc->vmbus_dev);
635 	for (cpu = 0; cpu < ncpus; ++cpu) {
636 		struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, cpu);
637 
638 		/*
639 		 * Per-cpu messages and event flags.
640 		 */
641 		psc->message = hyperv_dmamem_alloc(parent_dtag,
642 		    PAGE_SIZE, 0, PAGE_SIZE, &psc->message_dma,
643 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
644 		if (psc->message == NULL)
645 			return ENOMEM;
646 
647 		psc->event_flags = hyperv_dmamem_alloc(parent_dtag,
648 		    PAGE_SIZE, 0, PAGE_SIZE, &psc->event_flags_dma,
649 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
650 		if (psc->event_flags == NULL)
651 			return ENOMEM;
652 	}
653 
654 	evtflags = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
655 	    PAGE_SIZE, &sc->vmbus_evtflags_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
656 	if (evtflags == NULL)
657 		return ENOMEM;
658 	sc->vmbus_rx_evtflags = (u_long *)evtflags;
659 	sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2));
660 	sc->vmbus_evtflags = evtflags;
661 
662 	sc->vmbus_mnf1 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
663 	    PAGE_SIZE, &sc->vmbus_mnf1_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
664 	if (sc->vmbus_mnf1 == NULL)
665 		return ENOMEM;
666 
667 	sc->vmbus_mnf2 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
668 	    PAGE_SIZE, &sc->vmbus_mnf2_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
669 	if (sc->vmbus_mnf2 == NULL)
670 		return ENOMEM;
671 
672 	return 0;
673 }
674 
675 static void
676 vmbus_dma_free(struct vmbus_softc *sc)
677 {
678 	int cpu;
679 
680 	if (sc->vmbus_evtflags != NULL) {
681 		hyperv_dmamem_free(&sc->vmbus_evtflags_dma, sc->vmbus_evtflags);
682 		sc->vmbus_evtflags = NULL;
683 		sc->vmbus_rx_evtflags = NULL;
684 		sc->vmbus_tx_evtflags = NULL;
685 	}
686 	if (sc->vmbus_mnf1 != NULL) {
687 		hyperv_dmamem_free(&sc->vmbus_mnf1_dma, sc->vmbus_mnf1);
688 		sc->vmbus_mnf1 = NULL;
689 	}
690 	if (sc->vmbus_mnf2 != NULL) {
691 		hyperv_dmamem_free(&sc->vmbus_mnf2_dma, sc->vmbus_mnf2);
692 		sc->vmbus_mnf2 = NULL;
693 	}
694 
695 	for (cpu = 0; cpu < ncpus; ++cpu) {
696 		struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, cpu);
697 
698 		if (psc->message != NULL) {
699 			hyperv_dmamem_free(&psc->message_dma, psc->message);
700 			psc->message = NULL;
701 		}
702 		if (psc->event_flags != NULL) {
703 			hyperv_dmamem_free(&psc->event_flags_dma,
704 			    psc->event_flags);
705 			psc->event_flags = NULL;
706 		}
707 	}
708 }
709 
710 static int
711 vmbus_intr_setup(struct vmbus_softc *sc)
712 {
713 	device_t dev = sc->vmbus_dev;
714 	device_t bus = device_get_parent(device_get_parent(dev));
715 	int rid, cpu;
716 
717 	rid = 0;
718 	for (cpu = 0; cpu < ncpus; ++cpu) {
719 		struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, cpu);
720 		uint64_t msi_addr;
721 		uint32_t msi_data;
722 		int error;
723 
724 		error = PCIB_ALLOC_MSIX(bus, dev, &psc->intr_irq, cpu);
725 		if (error) {
726 			device_printf(dev, "alloc vector on cpu%d failed: %d\n",
727 			    cpu, error);
728 			return ENXIO;
729 		}
730 		psc->intr_rid = ++rid;
731 
732 		psc->intr_res = BUS_ALLOC_RESOURCE(bus, dev, SYS_RES_IRQ,
733 		    &psc->intr_rid, psc->intr_irq, psc->intr_irq, 1,
734 		    RF_ACTIVE, cpu);
735 		if (psc->intr_res == NULL) {
736 			device_printf(dev, "alloc irq on cpu%d failed: %d\n",
737 			    cpu, error);
738 			return ENXIO;
739 		}
740 
741 		error = PCIB_MAP_MSI(bus, dev, rman_get_start(psc->intr_res),
742 		    &msi_addr, &msi_data, cpu);
743 		if (error) {
744 			device_printf(dev, "map irq on cpu%d failed: %d\n",
745 			    cpu, error);
746 			return ENXIO;
747 		}
748 		psc->intr_vec = hyperv_msi2vector(msi_addr, msi_data);
749 
750 		if (bootverbose) {
751 			device_printf(dev, "vector %d irq %d on cpu%d\n",
752 			    psc->intr_vec, psc->intr_irq, cpu);
753 		}
754 
755 		ksnprintf(psc->intr_desc, sizeof(psc->intr_desc), "%s cpu%d",
756 		    device_get_nameunit(dev), cpu);
757 		error = bus_setup_intr_descr(dev, psc->intr_res, INTR_MPSAFE,
758 		    vmbus_intr, psc, &psc->intr_hand, NULL, psc->intr_desc);
759 		if (error) {
760 			device_printf(dev, "setup intr on cpu%d failed: %d\n",
761 			    cpu, error);
762 			return ENXIO;
763 		}
764 	}
765 	return 0;
766 }
767 
768 static void
769 vmbus_intr_teardown(struct vmbus_softc *sc)
770 {
771 	device_t dev = sc->vmbus_dev;
772 	device_t bus = device_get_parent(device_get_parent(dev));
773 	int cpu;
774 
775 	for (cpu = 0; cpu < ncpus; ++cpu) {
776 		struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, cpu);
777 
778 		if (psc->intr_hand != NULL) {
779 			bus_teardown_intr(dev, psc->intr_res, psc->intr_hand);
780 			psc->intr_hand = NULL;
781 		}
782 
783 		if (psc->intr_res != NULL) {
784 			BUS_RELEASE_RESOURCE(bus, dev, SYS_RES_IRQ,
785 			    psc->intr_rid, psc->intr_res);
786 			psc->intr_res = NULL;
787 		}
788 
789 		if (psc->intr_rid != 0) {
790 			PCIB_RELEASE_MSIX(bus, dev, psc->intr_irq, psc->cpuid);
791 			psc->intr_rid = 0;
792 		}
793 	}
794 }
795 
796 static void
797 vmbus_synic_setup(void *xsc)
798 {
799 	struct vmbus_softc *sc = xsc;
800 	struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, mycpuid);
801 	uint64_t val, orig;
802 	uint32_t sint;
803 
804 	if (hyperv_features & CPUID_HV_MSR_VP_INDEX) {
805 		/*
806 		 * Save virtual processor id.
807 		 */
808 		psc->vcpuid = rdmsr(MSR_HV_VP_INDEX);
809 	} else {
810 		/*
811 		 * XXX
812 		 * Virtual processoor id is only used by a pretty broken
813 		 * channel selection code from storvsc.  It's nothing
814 		 * critical even if CPUID_HV_MSR_VP_INDEX is not set; keep
815 		 * moving on.
816 		 */
817 		psc->vcpuid = mycpuid;
818 	}
819 
820 	/*
821 	 * Setup the SynIC message.
822 	 */
823 	orig = rdmsr(MSR_HV_SIMP);
824 	val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
825 	    ((psc->message_dma.hv_paddr >> PAGE_SHIFT) << MSR_HV_SIMP_PGSHIFT);
826 	wrmsr(MSR_HV_SIMP, val);
827 
828 	/*
829 	 * Setup the SynIC event flags.
830 	 */
831 	orig = rdmsr(MSR_HV_SIEFP);
832 	val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
833 	    ((psc->event_flags_dma.hv_paddr >> PAGE_SHIFT) <<
834 	     MSR_HV_SIEFP_PGSHIFT);
835 	wrmsr(MSR_HV_SIEFP, val);
836 
837 
838 	/*
839 	 * Configure and unmask SINT for message and event flags.
840 	 */
841 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
842 	orig = rdmsr(sint);
843 	val = psc->intr_vec | /* MSR_HV_SINT_AUTOEOI | notyet */
844 	    (orig & MSR_HV_SINT_RSVD_MASK);
845 	wrmsr(sint, val);
846 
847 	/*
848 	 * Configure and unmask SINT for timer.
849 	 */
850 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
851 	orig = rdmsr(sint);
852 	val = XTIMER_OFFSET | /* MSR_HV_SINT_AUTOEOI | notyet */
853 	    (orig & MSR_HV_SINT_RSVD_MASK);
854 	wrmsr(sint, val);
855 
856 	/*
857 	 * All done; enable SynIC.
858 	 */
859 	orig = rdmsr(MSR_HV_SCONTROL);
860 	val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
861 	wrmsr(MSR_HV_SCONTROL, val);
862 }
863 
864 static void
865 vmbus_timer_stop(void *arg __unused)
866 {
867 	for (;;) {
868 		uint64_t val;
869 
870 		/* Stop counting, and this also implies disabling STIMER0 */
871 		wrmsr(MSR_HV_STIMER0_COUNT, 0);
872 
873 		val = rdmsr(MSR_HV_STIMER0_CONFIG);
874 		if ((val & MSR_HV_STIMER_CFG_ENABLE) == 0)
875 			break;
876 		cpu_pause();
877 	}
878 }
879 
880 static void
881 vmbus_timer_config(void *arg __unused)
882 {
883 	/*
884 	 * Make sure that STIMER0 is really disabled before writing
885 	 * to STIMER0_CONFIG.
886 	 *
887 	 * "Writing to the configuration register of a timer that
888 	 *  is already enabled may result in undefined behaviour."
889 	 */
890 	vmbus_timer_stop(arg);
891 	wrmsr(MSR_HV_STIMER0_CONFIG,
892 	    MSR_HV_STIMER_CFG_AUTOEN | MSR_HV_STIMER0_CFG_SINT);
893 }
894 
895 static void
896 vmbus_timer_msgintr(struct vmbus_pcpu_data *psc)
897 {
898 	volatile struct vmbus_message *msg;
899 
900 	msg = psc->message + VMBUS_SINT_TIMER;
901 	if (msg->msg_type == HYPERV_MSGTYPE_TIMER_EXPIRED)
902 		vmbus_msg_reset(msg);
903 }
904 
905 static void
906 vmbus_timer_restart(void *xsc)
907 {
908 	struct vmbus_softc *sc = xsc;
909 	struct vmbus_pcpu_data *psc = VMBUS_PCPU(sc, mycpuid);
910 
911 	crit_enter();
912 	vmbus_timer_msgintr(psc);
913 	vmbus_timer_oneshot(psc, hyperv_tc64() + 1);
914 	crit_exit();
915 }
916 
917 static void
918 vmbus_synic_teardown(void *arg __unused)
919 {
920 	uint64_t orig;
921 	uint32_t sint;
922 
923 	/*
924 	 * Disable SynIC.
925 	 */
926 	orig = rdmsr(MSR_HV_SCONTROL);
927 	wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
928 
929 	/*
930 	 * Mask message and event flags SINT.
931 	 */
932 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
933 	orig = rdmsr(sint);
934 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
935 
936 	/*
937 	 * Mask timer SINT.
938 	 */
939 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
940 	orig = rdmsr(sint);
941 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
942 
943 	/*
944 	 * Teardown SynIC message.
945 	 */
946 	orig = rdmsr(MSR_HV_SIMP);
947 	wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
948 
949 	/*
950 	 * Teardown SynIC event flags.
951 	 */
952 	orig = rdmsr(MSR_HV_SIEFP);
953 	wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
954 }
955 
956 static int
957 vmbus_init_contact(struct vmbus_softc *sc, uint32_t version)
958 {
959 	struct vmbus_chanmsg_init_contact *req;
960 	const struct vmbus_chanmsg_version_resp *resp;
961 	const struct vmbus_message *msg;
962 	struct vmbus_msghc *mh;
963 	int error, supp = 0;
964 
965 	mh = vmbus_msghc_get(sc, sizeof(*req));
966 	if (mh == NULL)
967 		return ENXIO;
968 
969 	req = vmbus_msghc_dataptr(mh);
970 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_INIT_CONTACT;
971 	req->chm_ver = version;
972 	req->chm_evtflags = sc->vmbus_evtflags_dma.hv_paddr;
973 	req->chm_mnf1 = sc->vmbus_mnf1_dma.hv_paddr;
974 	req->chm_mnf2 = sc->vmbus_mnf2_dma.hv_paddr;
975 
976 	error = vmbus_msghc_exec(sc, mh);
977 	if (error) {
978 		vmbus_msghc_put(sc, mh);
979 		return error;
980 	}
981 
982 	msg = vmbus_msghc_wait_result(sc, mh);
983 	resp = (const struct vmbus_chanmsg_version_resp *)msg->msg_data;
984 	supp = resp->chm_supp;
985 
986 	vmbus_msghc_put(sc, mh);
987 
988 	return (supp ? 0 : EOPNOTSUPP);
989 }
990 
991 static int
992 vmbus_init(struct vmbus_softc *sc)
993 {
994 	int i;
995 
996 	for (i = 0; i < nitems(vmbus_version); ++i) {
997 		int error;
998 
999 		error = vmbus_init_contact(sc, vmbus_version[i]);
1000 		if (!error) {
1001 			sc->vmbus_version = vmbus_version[i];
1002 			device_printf(sc->vmbus_dev, "version %u.%u\n",
1003 			    (sc->vmbus_version >> 16),
1004 			    (sc->vmbus_version & 0xffff));
1005 			return 0;
1006 		}
1007 	}
1008 	return ENXIO;
1009 }
1010 
1011 static void
1012 vmbus_chan_msgproc(struct vmbus_softc *sc, const struct vmbus_message *msg)
1013 {
1014 	const struct vmbus_chanmsg_hdr *hdr;
1015 
1016 	hdr = (const struct vmbus_chanmsg_hdr *)msg->msg_data;
1017 
1018 	/* TODO */
1019 	if (hdr->chm_type == VMBUS_CHANMSG_TYPE_VERSION_RESP)
1020 		vmbus_msghc_wakeup(sc, msg);
1021 }
1022