xref: /netbsd/sys/arch/x86/x86/hyperv.c (revision c2ac0d61)
1 /*	$NetBSD: hyperv.c,v 1.15 2022/05/20 13:55:16 nonaka Exp $	*/
2 
3 /*-
4  * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
5  * Copyright (c) 2012 NetApp Inc.
6  * Copyright (c) 2012 Citrix Inc.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice unmodified, this list of conditions, and the following
14  *    disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 /**
32  * Implements low-level interactions with Hyper-V/Azure
33  */
34 #include <sys/cdefs.h>
35 #ifdef __KERNEL_RCSID
36 __KERNEL_RCSID(0, "$NetBSD: hyperv.c,v 1.15 2022/05/20 13:55:16 nonaka Exp $");
37 #endif
38 #ifdef __FBSDID
39 __FBSDID("$FreeBSD: head/sys/dev/hyperv/vmbus/hyperv.c 331757 2018-03-30 02:25:12Z emaste $");
40 #endif
41 
42 #ifdef _KERNEL_OPT
43 #include "lapic.h"
44 #include "genfb.h"
45 #include "opt_ddb.h"
46 #include "vmbus.h"
47 #include "wsdisplay.h"
48 #endif
49 
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/kernel.h>
53 #include <sys/device.h>
54 #include <sys/bus.h>
55 #include <sys/cpu.h>
56 #include <sys/kmem.h>
57 #include <sys/module.h>
58 #include <sys/pmf.h>
59 #include <sys/sysctl.h>
60 #include <sys/timetc.h>
61 
62 #include <uvm/uvm_extern.h>
63 
64 #include <machine/autoconf.h>
65 #include <machine/bootinfo.h>
66 #include <machine/cpufunc.h>
67 #include <machine/cputypes.h>
68 #include <machine/cpuvar.h>
69 #include <machine/cpu_counter.h>
70 #include <x86/apicvar.h>
71 #include <x86/efi.h>
72 
73 #include <dev/wsfb/genfbvar.h>
74 #include <x86/genfb_machdep.h>
75 
76 #include <x86/x86/hypervreg.h>
77 #include <x86/x86/hypervvar.h>
78 #include <dev/hyperv/vmbusvar.h>
79 #include <dev/hyperv/genfb_vmbusvar.h>
80 
81 #ifdef DDB
82 #include <machine/db_machdep.h>
83 #include <ddb/db_sym.h>
84 #include <ddb/db_extern.h>
85 #endif
86 
87 struct hyperv_softc {
88 	device_t		sc_dev;
89 
90 	struct sysctllog	*sc_log;
91 };
92 
93 struct hyperv_hypercall_ctx {
94 	void		*hc_addr;
95 	paddr_t		hc_paddr;
96 };
97 
98 struct hyperv_percpu_data {
99 	int	pd_idtvec;
100 };
101 
102 static struct hyperv_hypercall_ctx hyperv_hypercall_ctx;
103 
104 static char hyperv_hypercall_page[PAGE_SIZE]
105     __section(".text") __aligned(PAGE_SIZE) = { 0xcc };
106 
107 static u_int	hyperv_get_timecount(struct timecounter *);
108 
109 static u_int hyperv_features;		/* CPUID_HV_MSR_ */
110 static u_int hyperv_recommends;
111 
112 static u_int hyperv_pm_features;
113 static u_int hyperv_features3;
114 
115 static char hyperv_version_str[64];
116 static char hyperv_features_str[256];
117 static char hyperv_pm_features_str[256];
118 static char hyperv_features3_str[256];
119 
120 uint32_t hyperv_vcpuid[MAXCPUS];
121 
122 static struct timecounter hyperv_timecounter = {
123 	.tc_get_timecount = hyperv_get_timecount,
124 	.tc_counter_mask = 0xffffffff,
125 	.tc_frequency = HYPERV_TIMER_FREQ,
126 	.tc_name = "Hyper-V",
127 	.tc_quality = 2000,
128 };
129 
130 static void	hyperv_proc_dummy(void *, struct cpu_info *);
131 
132 struct hyperv_proc {
133 	hyperv_proc_t	func;
134 	void		*arg;
135 };
136 
137 static struct hyperv_proc hyperv_event_proc = {
138 	.func = hyperv_proc_dummy,
139 };
140 
141 static struct hyperv_proc hyperv_message_proc = {
142 	.func = hyperv_proc_dummy,
143 };
144 
145 static int	hyperv_match(device_t, cfdata_t, void *);
146 static void	hyperv_attach(device_t, device_t, void *);
147 static int	hyperv_detach(device_t, int);
148 
149 CFATTACH_DECL_NEW(hyperv, sizeof(struct hyperv_softc),
150     hyperv_match, hyperv_attach, hyperv_detach, NULL);
151 
152 static void	hyperv_hypercall_memfree(void);
153 static bool	hyperv_init_hypercall(void);
154 static int	hyperv_sysctl_setup_root(struct hyperv_softc *);
155 
156 static u_int
hyperv_get_timecount(struct timecounter * tc)157 hyperv_get_timecount(struct timecounter *tc)
158 {
159 
160 	return (u_int)rdmsr(MSR_HV_TIME_REF_COUNT);
161 }
162 
163 static uint64_t
hyperv_tc64_rdmsr(void)164 hyperv_tc64_rdmsr(void)
165 {
166 
167 	return rdmsr(MSR_HV_TIME_REF_COUNT);
168 }
169 
170 #ifdef __amd64__
171 /*
172  * Reference TSC
173  */
174 struct hyperv_ref_tsc {
175 	struct hyperv_reftsc	*tsc_ref;
176 	paddr_t			tsc_paddr;
177 };
178 
179 static struct hyperv_ref_tsc hyperv_ref_tsc;
180 
181 static u_int	hyperv_tsc_timecount(struct timecounter *);
182 
183 static struct timecounter hyperv_tsc_timecounter = {
184 	.tc_get_timecount = hyperv_tsc_timecount,
185 	.tc_counter_mask = 0xffffffff,
186 	.tc_frequency = HYPERV_TIMER_FREQ,
187 	.tc_name = "Hyper-V-TSC",
188 	.tc_quality = 3000,
189 };
190 
191 static __inline u_int
atomic_load_acq_int(volatile u_int * p)192 atomic_load_acq_int(volatile u_int *p)
193 {
194 	u_int r = *p;
195 	__insn_barrier();
196 	return r;
197 }
198 
199 static uint64_t
hyperv_tc64_tsc(void)200 hyperv_tc64_tsc(void)
201 {
202 	struct hyperv_reftsc *tsc_ref = hyperv_ref_tsc.tsc_ref;
203 	uint32_t seq;
204 
205 	while ((seq = atomic_load_acq_int(&tsc_ref->tsc_seq)) != 0) {
206 		uint64_t disc, ret, tsc;
207 		uint64_t scale = tsc_ref->tsc_scale;
208 		int64_t ofs = tsc_ref->tsc_ofs;
209 
210 		tsc = cpu_counter();
211 
212 		/* ret = ((tsc * scale) >> 64) + ofs */
213 		__asm__ __volatile__ ("mulq %3" :
214 		    "=d" (ret), "=a" (disc) :
215 		    "a" (tsc), "r" (scale));
216 		ret += ofs;
217 
218 		__insn_barrier();
219 		if (tsc_ref->tsc_seq == seq)
220 			return ret;
221 
222 		/* Sequence changed; re-sync. */
223 	}
224 	/* Fallback to the generic timecounter, i.e. rdmsr. */
225 	return rdmsr(MSR_HV_TIME_REF_COUNT);
226 }
227 
228 static u_int
hyperv_tsc_timecount(struct timecounter * tc __unused)229 hyperv_tsc_timecount(struct timecounter *tc __unused)
230 {
231 
232 	return hyperv_tc64_tsc();
233 }
234 
235 static bool
hyperv_tsc_tcinit(void)236 hyperv_tsc_tcinit(void)
237 {
238 	uint64_t orig_msr, msr;
239 
240 	if ((hyperv_features &
241 	     (CPUID_HV_MSR_TIME_REFCNT | CPUID_HV_MSR_REFERENCE_TSC)) !=
242 	    (CPUID_HV_MSR_TIME_REFCNT | CPUID_HV_MSR_REFERENCE_TSC) ||
243 	    (cpu_feature[0] & CPUID_SSE2) == 0)	/* SSE2 for mfence/lfence */
244 		return false;
245 
246 	hyperv_ref_tsc.tsc_ref = (void *)uvm_km_alloc(kernel_map,
247 	    PAGE_SIZE, PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_ZERO);
248 	if (hyperv_ref_tsc.tsc_ref == NULL) {
249 		aprint_error("Hyper-V: reference TSC page allocation failed\n");
250 		return false;
251 	}
252 
253 	if (!pmap_extract(pmap_kernel(), (vaddr_t)hyperv_ref_tsc.tsc_ref,
254 	    &hyperv_ref_tsc.tsc_paddr)) {
255 		aprint_error("Hyper-V: reference TSC page setup failed\n");
256 		uvm_km_free(kernel_map, (vaddr_t)hyperv_ref_tsc.tsc_ref,
257 		    PAGE_SIZE, UVM_KMF_WIRED);
258 		hyperv_ref_tsc.tsc_ref = NULL;
259 		return false;
260 	}
261 
262 	orig_msr = rdmsr(MSR_HV_REFERENCE_TSC);
263 	msr = MSR_HV_REFTSC_ENABLE | (orig_msr & MSR_HV_REFTSC_RSVD_MASK) |
264 	    (atop(hyperv_ref_tsc.tsc_paddr) << MSR_HV_REFTSC_PGSHIFT);
265 	wrmsr(MSR_HV_REFERENCE_TSC, msr);
266 
267 	/* Install 64 bits timecounter method for other modules to use. */
268 	hyperv_tc64 = hyperv_tc64_tsc;
269 
270 	/* Register "enlightened" timecounter. */
271 	tc_init(&hyperv_tsc_timecounter);
272 
273 	return true;
274 }
275 #endif /* __amd64__ */
276 
277 static void
delay_tc(unsigned int n)278 delay_tc(unsigned int n)
279 {
280 	struct timecounter *tc;
281 	uint64_t end, now;
282 	u_int last, u;
283 
284 	tc = timecounter;
285 	if (tc->tc_quality <= 0) {
286 		x86_delay(n);
287 		return;
288 	}
289 
290 	now = 0;
291 	end = tc->tc_frequency * n / 1000000;
292 	last = tc->tc_get_timecount(tc) & tc->tc_counter_mask;
293 	do {
294 		x86_pause();
295 		u = tc->tc_get_timecount(tc) & tc->tc_counter_mask;
296 		if (u < last)
297 			now += tc->tc_counter_mask - last + u + 1;
298 		else
299 			now += u - last;
300 		last = u;
301 	} while (now < end);
302 }
303 
304 static void
delay_msr(unsigned int n)305 delay_msr(unsigned int n)
306 {
307 	uint64_t end, now;
308 	u_int last, u;
309 
310 	now = 0;
311 	end = HYPERV_TIMER_FREQ * n / 1000000ULL;
312 	last = (u_int)rdmsr(MSR_HV_TIME_REF_COUNT);
313 	do {
314 		x86_pause();
315 		u = (u_int)rdmsr(MSR_HV_TIME_REF_COUNT);
316 		if (u < last)
317 			now += 0xffffffff - last + u + 1;
318 		else
319 			now += u - last;
320 		last = u;
321 	} while (now < end);
322 }
323 
324 static __inline uint64_t
hyperv_hypercall_md(volatile void * hc_addr,uint64_t in_val,uint64_t in_paddr,uint64_t out_paddr)325 hyperv_hypercall_md(volatile void *hc_addr, uint64_t in_val, uint64_t in_paddr,
326     uint64_t out_paddr)
327 {
328 	uint64_t status;
329 
330 #ifdef __amd64__
331 	__asm__ __volatile__ ("mov %0, %%r8" : : "r" (out_paddr): "r8");
332 	__asm__ __volatile__ ("call *%3" : "=a" (status) : "c" (in_val),
333 	    "d" (in_paddr), "m" (hc_addr));
334 #else
335 	uint32_t in_val_hi = in_val >> 32;
336 	uint32_t in_val_lo = in_val & 0xFFFFFFFF;
337 	uint32_t status_hi, status_lo;
338 	uint32_t in_paddr_hi = in_paddr >> 32;
339 	uint32_t in_paddr_lo = in_paddr & 0xFFFFFFFF;
340 	uint32_t out_paddr_hi = out_paddr >> 32;
341 	uint32_t out_paddr_lo = out_paddr & 0xFFFFFFFF;
342 
343 	__asm__ __volatile__ ("call *%8" : "=d" (status_hi), "=a" (status_lo) :
344 	    "d" (in_val_hi), "a" (in_val_lo),
345 	    "b" (in_paddr_hi), "c" (in_paddr_lo),
346 	    "D" (out_paddr_hi), "S" (out_paddr_lo),
347 	    "m" (hc_addr));
348 	status = status_lo | ((uint64_t)status_hi << 32);
349 #endif
350 
351 	return status;
352 }
353 
354 uint64_t
hyperv_hypercall(uint64_t control,paddr_t in_paddr,paddr_t out_paddr)355 hyperv_hypercall(uint64_t control, paddr_t in_paddr, paddr_t out_paddr)
356 {
357 
358 	if (hyperv_hypercall_ctx.hc_addr == NULL)
359 		return ~HYPERCALL_STATUS_SUCCESS;
360 
361 	return hyperv_hypercall_md(hyperv_hypercall_ctx.hc_addr, control,
362 	    in_paddr, out_paddr);
363 }
364 
365 static bool
hyperv_probe(u_int * maxleaf,u_int * features,u_int * pm_features,u_int * features3)366 hyperv_probe(u_int *maxleaf, u_int *features, u_int *pm_features,
367     u_int *features3)
368 {
369 	u_int regs[4];
370 
371 	if (vm_guest != VM_GUEST_HV)
372 		return false;
373 
374 	x86_cpuid(CPUID_LEAF_HV_MAXLEAF, regs);
375 	*maxleaf = regs[0];
376 	if (*maxleaf < CPUID_LEAF_HV_LIMITS)
377 		return false;
378 
379 	x86_cpuid(CPUID_LEAF_HV_INTERFACE, regs);
380 	if (regs[0] != CPUID_HV_IFACE_HYPERV)
381 		return false;
382 
383 	x86_cpuid(CPUID_LEAF_HV_FEATURES, regs);
384 	if (!(regs[0] & CPUID_HV_MSR_HYPERCALL)) {
385 		/*
386 		 * Hyper-V w/o Hypercall is impossible; someone
387 		 * is faking Hyper-V.
388 		 */
389 		return false;
390 	}
391 
392 	*features = regs[0];
393 	*pm_features = regs[2];
394 	*features3 = regs[3];
395 
396 	return true;
397 }
398 
399 static bool
hyperv_identify(void)400 hyperv_identify(void)
401 {
402 	char buf[256];
403 	u_int regs[4];
404 	u_int maxleaf;
405 
406 	if (!hyperv_probe(&maxleaf, &hyperv_features, &hyperv_pm_features,
407 	    &hyperv_features3))
408 		return false;
409 
410 	x86_cpuid(CPUID_LEAF_HV_IDENTITY, regs);
411 	hyperv_ver_major = regs[1] >> 16;
412 	snprintf(hyperv_version_str, sizeof(hyperv_version_str),
413 	    "%d.%d.%d [SP%d]",
414 	    hyperv_ver_major, regs[1] & 0xffff, regs[0], regs[2]);
415 	aprint_verbose("Hyper-V Version: %s\n", hyperv_version_str);
416 
417 	snprintb(hyperv_features_str, sizeof(hyperv_features_str),
418 	    "\020"
419 	    "\001VPRUNTIME"	/* MSR_HV_VP_RUNTIME */
420 	    "\002TMREFCNT"	/* MSR_HV_TIME_REF_COUNT */
421 	    "\003SYNIC"		/* MSRs for SynIC */
422 	    "\004SYNTM"		/* MSRs for SynTimer */
423 	    "\005APIC"		/* MSR_HV_{EOI,ICR,TPR} */
424 	    "\006HYPERCALL"	/* MSR_HV_{GUEST_OS_ID,HYPERCALL} */
425 	    "\007VPINDEX"	/* MSR_HV_VP_INDEX */
426 	    "\010RESET"		/* MSR_HV_RESET */
427 	    "\011STATS"		/* MSR_HV_STATS_ */
428 	    "\012REFTSC"	/* MSR_HV_REFERENCE_TSC */
429 	    "\013IDLE"		/* MSR_HV_GUEST_IDLE */
430 	    "\014TMFREQ"	/* MSR_HV_{TSC,APIC}_FREQUENCY */
431 	    "\015DEBUG",	/* MSR_HV_SYNTH_DEBUG_ */
432 	    hyperv_features);
433 	aprint_verbose("  Features=%s\n", hyperv_features_str);
434 	snprintb(buf, sizeof(buf),
435 	    "\020"
436 	    "\005C3HPET",	/* HPET is required for C3 state */
437 	    (hyperv_pm_features & ~CPUPM_HV_CSTATE_MASK));
438 	snprintf(hyperv_pm_features_str, sizeof(hyperv_pm_features_str),
439 	    "%s [C%u]", buf, CPUPM_HV_CSTATE(hyperv_pm_features));
440 	aprint_verbose("  PM Features=%s\n", hyperv_pm_features_str);
441 	snprintb(hyperv_features3_str, sizeof(hyperv_features3_str),
442 	    "\020"
443 	    "\001MWAIT"		/* MWAIT */
444 	    "\002DEBUG"		/* guest debug support */
445 	    "\003PERFMON"	/* performance monitor */
446 	    "\004PCPUDPE"	/* physical CPU dynamic partition event */
447 	    "\005XMMHC"		/* hypercall input through XMM regs */
448 	    "\006IDLE"		/* guest idle support */
449 	    "\007SLEEP"		/* hypervisor sleep support */
450 	    "\010NUMA"		/* NUMA distance query support */
451 	    "\011TMFREQ"	/* timer frequency query (TSC, LAPIC) */
452 	    "\012SYNCMC"	/* inject synthetic machine checks */
453 	    "\013CRASH"		/* MSRs for guest crash */
454 	    "\014DEBUGMSR"	/* MSRs for guest debug */
455 	    "\015NPIEP"		/* NPIEP */
456 	    "\016HVDIS",	/* disabling hypervisor */
457 	    hyperv_features3);
458 	aprint_verbose("  Features3=%s\n", hyperv_features3_str);
459 
460 	x86_cpuid(CPUID_LEAF_HV_RECOMMENDS, regs);
461 	hyperv_recommends = regs[0];
462 	aprint_verbose("  Recommends: %08x %08x\n", regs[0], regs[1]);
463 
464 	x86_cpuid(CPUID_LEAF_HV_LIMITS, regs);
465 	aprint_verbose("  Limits: Vcpu:%d Lcpu:%d Int:%d\n",
466 	    regs[0], regs[1], regs[2]);
467 
468 	if (maxleaf >= CPUID_LEAF_HV_HWFEATURES) {
469 		x86_cpuid(CPUID_LEAF_HV_HWFEATURES, regs);
470 		aprint_verbose("  HW Features: %08x, AMD: %08x\n",
471 		    regs[0], regs[3]);
472 	}
473 
474 	return true;
475 }
476 
477 void
hyperv_early_init(void)478 hyperv_early_init(void)
479 {
480 	u_int features, pm_features, features3;
481 	u_int maxleaf;
482 	int i;
483 
484 	if (!hyperv_probe(&maxleaf, &features, &pm_features, &features3))
485 		return;
486 
487 	if (features & CPUID_HV_MSR_TIME_REFCNT)
488 		x86_delay = delay_func = delay_msr;
489 
490 	if (features & CPUID_HV_MSR_VP_INDEX) {
491 		/* Save virtual processor id. */
492 		hyperv_vcpuid[0] = rdmsr(MSR_HV_VP_INDEX);
493 	} else {
494 		/* Set virtual processor id to 0 for compatibility. */
495 		hyperv_vcpuid[0] = 0;
496 	}
497 	for (i = 1; i < MAXCPUS; i++)
498 		hyperv_vcpuid[i] = hyperv_vcpuid[0];
499 }
500 
501 void
hyperv_init_cpu(struct cpu_info * ci)502 hyperv_init_cpu(struct cpu_info *ci)
503 {
504 	u_int features, pm_features, features3;
505 	u_int maxleaf;
506 
507 	if (!hyperv_probe(&maxleaf, &features, &pm_features, &features3))
508 		return;
509 
510 	if (features & CPUID_HV_MSR_VP_INDEX)
511 		hyperv_vcpuid[ci->ci_index] = rdmsr(MSR_HV_VP_INDEX);
512 }
513 
514 uint32_t
hyperv_get_vcpuid(cpuid_t cpu)515 hyperv_get_vcpuid(cpuid_t cpu)
516 {
517 
518 	if (cpu < MAXCPUS)
519 		return hyperv_vcpuid[cpu];
520 	return 0;
521 }
522 
523 static bool
hyperv_init(void)524 hyperv_init(void)
525 {
526 
527 	if (!hyperv_identify()) {
528 		/* Not Hyper-V; reset guest id to the generic one. */
529 		if (vm_guest == VM_GUEST_HV)
530 			vm_guest = VM_GUEST_VM;
531 		return false;
532 	}
533 
534 	/* Set guest id */
535 	wrmsr(MSR_HV_GUEST_OS_ID, MSR_HV_GUESTID_OSTYPE_NETBSD |
536 	    (uint64_t)__NetBSD_Version__ << MSR_HV_GUESTID_VERSION_SHIFT);
537 
538 	if (hyperv_features & CPUID_HV_MSR_TIME_REFCNT) {
539 		/* Register Hyper-V timecounter */
540 		tc_init(&hyperv_timecounter);
541 
542 		/*
543 		 * Install 64 bits timecounter method for other modules to use.
544 		 */
545 		hyperv_tc64 = hyperv_tc64_rdmsr;
546 #ifdef __amd64__
547 		hyperv_tsc_tcinit();
548 #endif
549 
550 		/* delay with timecounter */
551 		x86_delay = delay_func = delay_tc;
552 	}
553 
554 #if NLAPIC > 0
555 	if ((hyperv_features & CPUID_HV_MSR_TIME_FREQ) &&
556 	    (hyperv_features3 & CPUID3_HV_TIME_FREQ))
557 		lapic_per_second = rdmsr(MSR_HV_APIC_FREQUENCY);
558 #endif
559 
560 	return hyperv_init_hypercall();
561 }
562 
563 static bool
hyperv_is_initialized(void)564 hyperv_is_initialized(void)
565 {
566 	uint64_t msr;
567 
568 	if (vm_guest != VM_GUEST_HV)
569 		return false;
570 	if (rdmsr_safe(MSR_HV_HYPERCALL, &msr) == EFAULT)
571 		return false;
572 	return (msr & MSR_HV_HYPERCALL_ENABLE) ? true : false;
573 }
574 
575 static int
hyperv_match(device_t parent,cfdata_t cf,void * aux)576 hyperv_match(device_t parent, cfdata_t cf, void *aux)
577 {
578 	struct cpufeature_attach_args *cfaa = aux;
579 	struct cpu_info *ci = cfaa->ci;
580 
581 	if (strcmp(cfaa->name, "vm") != 0)
582 		return 0;
583 	if ((ci->ci_flags & (CPUF_BSP|CPUF_SP|CPUF_PRIMARY)) == 0)
584 		return 0;
585 	if (vm_guest != VM_GUEST_HV)
586 		return 0;
587 
588 	return 1;
589 }
590 
591 static void
hyperv_attach(device_t parent,device_t self,void * aux)592 hyperv_attach(device_t parent, device_t self, void *aux)
593 {
594 	struct hyperv_softc *sc = device_private(self);
595 
596 	sc->sc_dev = self;
597 
598 	aprint_naive("\n");
599 	aprint_normal(": Hyper-V\n");
600 
601 	if (!hyperv_is_initialized()) {
602 		if (rdmsr(MSR_HV_GUEST_OS_ID) == 0) {
603 			if (!hyperv_init()) {
604 				aprint_error_dev(self, "initialize failed\n");
605 				return;
606 			}
607 		}
608 		hyperv_init_hypercall();
609 	}
610 
611 	(void) pmf_device_register(self, NULL, NULL);
612 
613 	(void) hyperv_sysctl_setup_root(sc);
614 }
615 
616 static int
hyperv_detach(device_t self,int flags)617 hyperv_detach(device_t self, int flags)
618 {
619 	struct hyperv_softc *sc = device_private(self);
620 	uint64_t hc;
621 
622 	/* Disable Hypercall */
623 	hc = rdmsr(MSR_HV_HYPERCALL);
624 	wrmsr(MSR_HV_HYPERCALL, hc & MSR_HV_HYPERCALL_RSVD_MASK);
625 	hyperv_hypercall_memfree();
626 
627 	if (hyperv_features & CPUID_HV_MSR_TIME_REFCNT)
628 		tc_detach(&hyperv_timecounter);
629 
630 	wrmsr(MSR_HV_GUEST_OS_ID, 0);
631 
632 	pmf_device_deregister(self);
633 
634 	if (sc->sc_log != NULL) {
635 		sysctl_teardown(&sc->sc_log);
636 		sc->sc_log = NULL;
637 	}
638 
639 	return 0;
640 }
641 
642 void
hyperv_intr(void)643 hyperv_intr(void)
644 {
645 	struct cpu_info *ci = curcpu();
646 
647 	(*hyperv_event_proc.func)(hyperv_event_proc.arg, ci);
648 	(*hyperv_message_proc.func)(hyperv_message_proc.arg, ci);
649 }
650 
651 void hyperv_hypercall_intr(struct trapframe *);
652 void
hyperv_hypercall_intr(struct trapframe * frame __unused)653 hyperv_hypercall_intr(struct trapframe *frame __unused)
654 {
655 	struct cpu_info *ci = curcpu();
656 
657 	ci->ci_isources[LIR_HV]->is_evcnt.ev_count++;
658 
659 	hyperv_intr();
660 }
661 
662 static void
hyperv_proc_dummy(void * arg __unused,struct cpu_info * ci __unused)663 hyperv_proc_dummy(void *arg __unused, struct cpu_info *ci __unused)
664 {
665 }
666 
667 void
hyperv_set_event_proc(void (* func)(void *,struct cpu_info *),void * arg)668 hyperv_set_event_proc(void (*func)(void *, struct cpu_info *), void *arg)
669 {
670 
671 	hyperv_event_proc.func = func;
672 	hyperv_event_proc.arg = arg;
673 }
674 
675 void
hyperv_set_message_proc(void (* func)(void *,struct cpu_info *),void * arg)676 hyperv_set_message_proc(void (*func)(void *, struct cpu_info *), void *arg)
677 {
678 
679 	hyperv_message_proc.func = func;
680 	hyperv_message_proc.arg = arg;
681 }
682 
683 static void
hyperv_hypercall_memfree(void)684 hyperv_hypercall_memfree(void)
685 {
686 
687 	hyperv_hypercall_ctx.hc_addr = NULL;
688 }
689 
690 static bool
hyperv_init_hypercall(void)691 hyperv_init_hypercall(void)
692 {
693 	uint64_t hc, hc_orig;
694 
695 	hyperv_hypercall_ctx.hc_addr = hyperv_hypercall_page;
696 	hyperv_hypercall_ctx.hc_paddr = vtophys((vaddr_t)hyperv_hypercall_page);
697 	KASSERT(hyperv_hypercall_ctx.hc_paddr != 0);
698 
699 	/* Get the 'reserved' bits, which requires preservation. */
700 	hc_orig = rdmsr(MSR_HV_HYPERCALL);
701 
702 	/*
703 	 * Setup the Hypercall page.
704 	 *
705 	 * NOTE: 'reserved' bits MUST be preserved.
706 	 */
707 	hc = (atop(hyperv_hypercall_ctx.hc_paddr) << MSR_HV_HYPERCALL_PGSHIFT) |
708 	    (hc_orig & MSR_HV_HYPERCALL_RSVD_MASK) |
709 	    MSR_HV_HYPERCALL_ENABLE;
710 	wrmsr(MSR_HV_HYPERCALL, hc);
711 
712 	/*
713 	 * Confirm that Hypercall page did get setup.
714 	 */
715 	hc = rdmsr(MSR_HV_HYPERCALL);
716 	if (!(hc & MSR_HV_HYPERCALL_ENABLE)) {
717 		aprint_error("Hyper-V: Hypercall setup failed\n");
718 		hyperv_hypercall_memfree();
719 		/* Can't perform any Hyper-V specific actions */
720 		vm_guest = VM_GUEST_VM;
721 		return false;
722 	}
723 
724 	return true;
725 }
726 
727 int
hyperv_hypercall_enabled(void)728 hyperv_hypercall_enabled(void)
729 {
730 
731 	return hyperv_is_initialized();
732 }
733 
734 int
hyperv_synic_supported(void)735 hyperv_synic_supported(void)
736 {
737 
738 	return (hyperv_features & CPUID_HV_MSR_SYNIC) ? 1 : 0;
739 }
740 
741 int
hyperv_is_gen1(void)742 hyperv_is_gen1(void)
743 {
744 
745 	return !efi_probe();
746 }
747 
748 void
hyperv_send_eom(void)749 hyperv_send_eom(void)
750 {
751 
752 	wrmsr(MSR_HV_EOM, 0);
753 }
754 
755 void
vmbus_init_interrupts_md(struct vmbus_softc * sc,cpuid_t cpu)756 vmbus_init_interrupts_md(struct vmbus_softc *sc, cpuid_t cpu)
757 {
758 	extern void Xintr_hyperv_hypercall(void);
759 	struct vmbus_percpu_data *pd;
760 	struct hyperv_percpu_data *hv_pd;
761 	struct cpu_info *ci;
762 	struct idt_vec *iv;
763 	int hyperv_idtvec;
764 	cpuid_t cpu0;
765 
766 	cpu0 = cpu_index(&cpu_info_primary);
767 
768 	if (cpu == cpu0 || idt_vec_is_pcpu()) {
769 		/*
770 		 * All Hyper-V ISR required resources are setup, now let's find a
771 		 * free IDT vector for Hyper-V ISR and set it up.
772 		 */
773 		ci = cpu_lookup(cpu);
774 		iv = &ci->ci_idtvec;
775 		mutex_enter(&cpu_lock);
776 		hyperv_idtvec = idt_vec_alloc(iv,
777 		    APIC_LEVEL(NIPL), IDT_INTR_HIGH);
778 		mutex_exit(&cpu_lock);
779 		KASSERT(hyperv_idtvec > 0);
780 		idt_vec_set(iv, hyperv_idtvec, Xintr_hyperv_hypercall);
781 	} else {
782 		pd = &sc->sc_percpu[cpu0];
783 		hv_pd = pd->md_cookie;
784 		KASSERT(hv_pd != NULL && hv_pd->pd_idtvec > 0);
785 		hyperv_idtvec = hv_pd->pd_idtvec;
786 	}
787 
788 	hv_pd = kmem_zalloc(sizeof(*hv_pd), KM_SLEEP);
789 	hv_pd->pd_idtvec = hyperv_idtvec;
790 	pd = &sc->sc_percpu[cpu];
791 	pd->md_cookie = (void *)hv_pd;
792 }
793 
794 void
vmbus_deinit_interrupts_md(struct vmbus_softc * sc,cpuid_t cpu)795 vmbus_deinit_interrupts_md(struct vmbus_softc *sc, cpuid_t cpu)
796 {
797 	struct vmbus_percpu_data *pd;
798 	struct hyperv_percpu_data *hv_pd;
799 	struct cpu_info *ci;
800 	struct idt_vec *iv;
801 
802 	pd = &sc->sc_percpu[cpu];
803 	hv_pd = pd->md_cookie;
804 	KASSERT(hv_pd != NULL);
805 
806 	if (cpu == cpu_index(&cpu_info_primary) ||
807 	    idt_vec_is_pcpu()) {
808 		ci = cpu_lookup(cpu);
809 		iv = &ci->ci_idtvec;
810 
811 		if (hv_pd->pd_idtvec > 0) {
812 			idt_vec_free(iv, hv_pd->pd_idtvec);
813 		}
814 	}
815 
816 	pd->md_cookie = NULL;
817 	kmem_free(hv_pd, sizeof(*hv_pd));
818 }
819 
820 void
vmbus_init_synic_md(struct vmbus_softc * sc,cpuid_t cpu)821 vmbus_init_synic_md(struct vmbus_softc *sc, cpuid_t cpu)
822 {
823 	extern void Xintr_hyperv_hypercall(void);
824 	struct vmbus_percpu_data *pd;
825 	struct hyperv_percpu_data *hv_pd;
826 	uint64_t val, orig;
827 	uint32_t sint;
828 	int hyperv_idtvec;
829 
830 	pd = &sc->sc_percpu[cpu];
831 	hv_pd = pd->md_cookie;
832 	hyperv_idtvec = hv_pd->pd_idtvec;
833 
834 	/*
835 	 * Setup the SynIC message.
836 	 */
837 	orig = rdmsr(MSR_HV_SIMP);
838 	val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
839 	    (atop(hyperv_dma_get_paddr(&pd->simp_dma)) << MSR_HV_SIMP_PGSHIFT);
840 	wrmsr(MSR_HV_SIMP, val);
841 
842 	/*
843 	 * Setup the SynIC event flags.
844 	 */
845 	orig = rdmsr(MSR_HV_SIEFP);
846 	val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
847 	    (atop(hyperv_dma_get_paddr(&pd->siep_dma)) << MSR_HV_SIEFP_PGSHIFT);
848 	wrmsr(MSR_HV_SIEFP, val);
849 
850 	/*
851 	 * Configure and unmask SINT for message and event flags.
852 	 */
853 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
854 	orig = rdmsr(sint);
855 	val = hyperv_idtvec | MSR_HV_SINT_AUTOEOI |
856 	    (orig & MSR_HV_SINT_RSVD_MASK);
857 	wrmsr(sint, val);
858 
859 	/*
860 	 * Configure and unmask SINT for timer.
861 	 */
862 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
863 	orig = rdmsr(sint);
864 	val = hyperv_idtvec | MSR_HV_SINT_AUTOEOI |
865 	    (orig & MSR_HV_SINT_RSVD_MASK);
866 	wrmsr(sint, val);
867 
868 	/*
869 	 * All done; enable SynIC.
870 	 */
871 	orig = rdmsr(MSR_HV_SCONTROL);
872 	val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
873 	wrmsr(MSR_HV_SCONTROL, val);
874 }
875 
876 void
vmbus_deinit_synic_md(struct vmbus_softc * sc,cpuid_t cpu)877 vmbus_deinit_synic_md(struct vmbus_softc *sc, cpuid_t cpu)
878 {
879 	uint64_t orig;
880 	uint32_t sint;
881 
882 	/*
883 	 * Disable SynIC.
884 	 */
885 	orig = rdmsr(MSR_HV_SCONTROL);
886 	wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
887 
888 	/*
889 	 * Mask message and event flags SINT.
890 	 */
891 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
892 	orig = rdmsr(sint);
893 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
894 
895 	/*
896 	 * Mask timer SINT.
897 	 */
898 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
899 	orig = rdmsr(sint);
900 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
901 
902 	/*
903 	 * Teardown SynIC message.
904 	 */
905 	orig = rdmsr(MSR_HV_SIMP);
906 	wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
907 
908 	/*
909 	 * Teardown SynIC event flags.
910 	 */
911 	orig = rdmsr(MSR_HV_SIEFP);
912 	wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
913 }
914 
915 static int
hyperv_sysctl_setup(struct hyperv_softc * sc,const struct sysctlnode * hyperv_node)916 hyperv_sysctl_setup(struct hyperv_softc *sc,
917     const struct sysctlnode *hyperv_node)
918 {
919 	int error;
920 
921 	error = sysctl_createv(&sc->sc_log, 0, &hyperv_node, NULL,
922 	    CTLFLAG_READONLY, CTLTYPE_STRING, "version", NULL,
923 	    NULL, 0, hyperv_version_str,
924 	    0, CTL_CREATE, CTL_EOL);
925 	if (error)
926 		return error;
927 
928 	error = sysctl_createv(&sc->sc_log, 0, &hyperv_node, NULL,
929 	    CTLFLAG_READONLY, CTLTYPE_STRING, "features", NULL,
930 	    NULL, 0, hyperv_features_str,
931 	    0, CTL_CREATE, CTL_EOL);
932 	if (error)
933 		return error;
934 
935 	error = sysctl_createv(&sc->sc_log, 0, &hyperv_node, NULL,
936 	    CTLFLAG_READONLY, CTLTYPE_STRING, "pm_features", NULL,
937 	    NULL, 0, hyperv_pm_features_str,
938 	    0, CTL_CREATE, CTL_EOL);
939 	if (error)
940 		return error;
941 
942 	error = sysctl_createv(&sc->sc_log, 0, &hyperv_node, NULL,
943 	    CTLFLAG_READONLY, CTLTYPE_STRING, "features3", NULL,
944 	    NULL, 0, hyperv_features3_str,
945 	    0, CTL_CREATE, CTL_EOL);
946 	if (error)
947 		return error;
948 
949 	return 0;
950 }
951 
952 static int
hyperv_sysctl_setup_root(struct hyperv_softc * sc)953 hyperv_sysctl_setup_root(struct hyperv_softc *sc)
954 {
955 	const struct sysctlnode *machdep_node, *hyperv_node;
956 	int error;
957 
958 	error = sysctl_createv(&sc->sc_log, 0, NULL, &machdep_node,
959 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "machdep", NULL,
960 	    NULL, 0, NULL, 0, CTL_MACHDEP, CTL_EOL);
961 	if (error)
962 		goto fail;
963 
964 	error = sysctl_createv(&sc->sc_log, 0, &machdep_node, &hyperv_node,
965 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "hyperv", NULL,
966 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
967 	if (error)
968 		goto fail;
969 
970 	error = hyperv_sysctl_setup(sc, hyperv_node);
971 	if (error)
972 		goto fail;
973 
974 	return 0;
975 
976 fail:
977 	sysctl_teardown(&sc->sc_log);
978 	sc->sc_log = NULL;
979 	return error;
980 }
981 
982 MODULE(MODULE_CLASS_DRIVER, hyperv, NULL);
983 
984 #ifdef _MODULE
985 #include "ioconf.c"
986 #endif
987 
988 static int
hyperv_modcmd(modcmd_t cmd,void * aux)989 hyperv_modcmd(modcmd_t cmd, void *aux)
990 {
991 	int rv = 0;
992 
993 	switch (cmd) {
994 	case MODULE_CMD_INIT:
995 #ifdef _MODULE
996 		rv = config_init_component(cfdriver_ioconf_hyperv,
997 		    cfattach_ioconf_hyperv, cfdata_ioconf_hyperv);
998 #endif
999 		hyperv_init();
1000 		break;
1001 
1002 	case MODULE_CMD_FINI:
1003 #ifdef _MODULE
1004 		rv = config_fini_component(cfdriver_ioconf_hyperv,
1005 		    cfattach_ioconf_hyperv, cfdata_ioconf_hyperv);
1006 #endif
1007 		break;
1008 
1009 	default:
1010 		rv = ENOTTY;
1011 		break;
1012 	}
1013 
1014 	return rv;
1015 }
1016 
1017 #if NVMBUS > 0
1018 /*
1019  * genfb at vmbus
1020  */
1021 static struct genfb_pmf_callback pmf_cb;
1022 static struct genfb_mode_callback mode_cb;
1023 
1024 static bool
x86_genfb_setmode(struct genfb_softc * sc,int newmode)1025 x86_genfb_setmode(struct genfb_softc *sc, int newmode)
1026 {
1027 	return true;
1028 }
1029 
1030 static bool
x86_genfb_suspend(device_t dev,const pmf_qual_t * qual)1031 x86_genfb_suspend(device_t dev, const pmf_qual_t *qual)
1032 {
1033 	return true;
1034 }
1035 
1036 static bool
x86_genfb_resume(device_t dev,const pmf_qual_t * qual)1037 x86_genfb_resume(device_t dev, const pmf_qual_t *qual)
1038 {
1039 #if NGENFB > 0
1040 	struct genfb_vmbus_softc *sc = device_private(dev);
1041 
1042 	genfb_restore_palette(&sc->sc_gen);
1043 #endif
1044 	return true;
1045 }
1046 
1047 static void
populate_fbinfo(device_t dev,prop_dictionary_t dict)1048 populate_fbinfo(device_t dev, prop_dictionary_t dict)
1049 {
1050 #if NWSDISPLAY > 0 && NGENFB > 0
1051 	extern struct vcons_screen x86_genfb_console_screen;
1052 	struct rasops_info *ri = &x86_genfb_console_screen.scr_ri;
1053 #endif
1054 	const void *fbptr = lookup_bootinfo(BTINFO_FRAMEBUFFER);
1055 	struct btinfo_framebuffer fbinfo;
1056 
1057 	if (fbptr == NULL)
1058 		return;
1059 
1060 	memcpy(&fbinfo, fbptr, sizeof(fbinfo));
1061 
1062 	if (fbinfo.physaddr != 0) {
1063 		prop_dictionary_set_uint32(dict, "width", fbinfo.width);
1064 		prop_dictionary_set_uint32(dict, "height", fbinfo.height);
1065 		prop_dictionary_set_uint8(dict, "depth", fbinfo.depth);
1066 		prop_dictionary_set_uint16(dict, "linebytes", fbinfo.stride);
1067 
1068 		prop_dictionary_set_uint64(dict, "address", fbinfo.physaddr);
1069 #if NWSDISPLAY > 0 && NGENFB > 0
1070 		if (ri->ri_bits != NULL) {
1071 			prop_dictionary_set_uint64(dict, "virtual_address",
1072 			    ri->ri_hwbits != NULL ?
1073 			    (vaddr_t)ri->ri_hworigbits :
1074 			    (vaddr_t)ri->ri_origbits);
1075 		}
1076 #endif
1077 	}
1078 #if notyet
1079 	prop_dictionary_set_bool(dict, "splash",
1080 	    (fbinfo.flags & BI_FB_SPLASH) != 0);
1081 #endif
1082 #if 0
1083 	if (fbinfo.depth == 8) {
1084 		gfb_cb.gcc_cookie = NULL;
1085 		gfb_cb.gcc_set_mapreg = x86_genfb_set_mapreg;
1086 		prop_dictionary_set_uint64(dict, "cmap_callback",
1087 		    (uint64_t)(uintptr_t)&gfb_cb);
1088 	}
1089 #endif
1090 	if (fbinfo.physaddr != 0) {
1091 		mode_cb.gmc_setmode = x86_genfb_setmode;
1092 		prop_dictionary_set_uint64(dict, "mode_callback",
1093 		    (uint64_t)(uintptr_t)&mode_cb);
1094 	}
1095 
1096 #if NWSDISPLAY > 0 && NGENFB > 0
1097 	if (device_is_a(dev, "genfb")) {
1098 		prop_dictionary_set_bool(dict, "enable_shadowfb",
1099 		    ri->ri_hwbits != NULL);
1100 
1101 		x86_genfb_set_console_dev(dev);
1102 #ifdef DDB
1103 		db_trap_callback = x86_genfb_ddb_trap_callback;
1104 #endif
1105 	}
1106 #endif
1107 }
1108 #endif
1109 
1110 device_t
device_hyperv_register(device_t dev,void * aux)1111 device_hyperv_register(device_t dev, void *aux)
1112 {
1113 #if NVMBUS > 0
1114 	device_t parent = device_parent(dev);
1115 
1116 	if (parent && device_is_a(parent, "vmbus") && !x86_found_console) {
1117 		struct vmbus_attach_args *aa = aux;
1118 
1119 		if (memcmp(aa->aa_type, &hyperv_guid_video,
1120 		    sizeof(*aa->aa_type)) == 0) {
1121 			prop_dictionary_t dict = device_properties(dev);
1122 
1123 			/* Initialize genfb for serial console */
1124 			x86_genfb_init();
1125 
1126 			/*
1127 			 * framebuffer drivers other than genfb can work
1128 			 * without the address property
1129 			 */
1130 			populate_fbinfo(dev, dict);
1131 
1132 #if 1 && NWSDISPLAY > 0 && NGENFB > 0
1133 			/* XXX */
1134 			if (device_is_a(dev, "genfb")) {
1135 				prop_dictionary_set_bool(dict, "is_console",
1136 				    genfb_is_console());
1137 			} else
1138 #endif
1139 			prop_dictionary_set_bool(dict, "is_console", true);
1140 
1141 			prop_dictionary_set_bool(dict, "clear-screen", false);
1142 #if NWSDISPLAY > 0 && NGENFB > 0
1143 			extern struct vcons_screen x86_genfb_console_screen;
1144 			prop_dictionary_set_uint16(dict, "cursor-row",
1145 			    x86_genfb_console_screen.scr_ri.ri_crow);
1146 #endif
1147 			pmf_cb.gpc_suspend = x86_genfb_suspend;
1148 			pmf_cb.gpc_resume = x86_genfb_resume;
1149 			prop_dictionary_set_uint64(dict, "pmf_callback",
1150 			    (uint64_t)(uintptr_t)&pmf_cb);
1151 			x86_found_console = true;
1152 			return NULL;
1153 		}
1154 	}
1155 #endif
1156 	return NULL;
1157 }
1158