xref: /freebsd/sys/dev/kvm_clock/kvm_clock.c (revision 4e8d558c)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2014 Bryan Venteicher <bryanv@FreeBSD.org>
5  * Copyright (c) 2021 Mathieu Chouquet-Stringer
6  * Copyright (c) 2021 Juniper Networks, Inc.
7  * Copyright (c) 2021 Klara, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 /*
32  * Linux KVM paravirtual clock support
33  *
34  * References:
35  *     - [1] https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html
36  *     - [2] https://www.kernel.org/doc/html/latest/virt/kvm/msr.html
37  */
38 
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD$");
41 
42 #include <sys/param.h>
43 #include <sys/bus.h>
44 #include <sys/domainset.h>
45 #include <sys/kernel.h>
46 #include <sys/malloc.h>
47 #include <sys/module.h>
48 #include <sys/smp.h>
49 #include <sys/sysctl.h>
50 
51 #include <vm/vm.h>
52 #include <vm/pmap.h>
53 #include <vm/vm_extern.h>
54 
55 #include <machine/pvclock.h>
56 #include <x86/kvm.h>
57 
58 #include "clock_if.h"
59 
60 #define	KVM_CLOCK_DEVNAME		"kvmclock"
61 /*
62  * Note: Chosen to be (1) above HPET's value (always 950), (2) above the TSC's
63  * default value of 800, and (3) below the TSC's value when it supports the
64  * "Invariant TSC" feature and is believed to be synchronized across all CPUs.
65  */
66 #define	KVM_CLOCK_TC_QUALITY		975
67 
68 struct kvm_clock_softc {
69 	struct pvclock			 pvc;
70 	struct pvclock_wall_clock	 wc;
71 	struct pvclock_vcpu_time_info	*timeinfos;
72 	u_int				 msr_tc;
73 	u_int				 msr_wc;
74 #ifndef EARLY_AP_STARTUP
75 	int				 firstcpu;
76 #endif
77 };
78 
79 static struct pvclock_wall_clock *kvm_clock_get_wallclock(void *arg);
80 static void	kvm_clock_system_time_enable(struct kvm_clock_softc *sc,
81 		    const cpuset_t *cpus);
82 static void	kvm_clock_system_time_enable_pcpu(void *arg);
83 static void	kvm_clock_setup_sysctl(device_t);
84 
85 static struct pvclock_wall_clock *
86 kvm_clock_get_wallclock(void *arg)
87 {
88 	struct kvm_clock_softc *sc = arg;
89 
90 	wrmsr(sc->msr_wc, vtophys(&sc->wc));
91 	return (&sc->wc);
92 }
93 
94 static void
95 kvm_clock_system_time_enable(struct kvm_clock_softc *sc, const cpuset_t *cpus)
96 {
97 	smp_rendezvous_cpus(*cpus, NULL, kvm_clock_system_time_enable_pcpu,
98 	    NULL, sc);
99 }
100 
101 static void
102 kvm_clock_system_time_enable_pcpu(void *arg)
103 {
104 	struct kvm_clock_softc *sc = arg;
105 
106 	/*
107 	 * See [2]; the lsb of this MSR is the system time enable bit.
108 	 */
109 	wrmsr(sc->msr_tc, vtophys(&(sc->timeinfos)[curcpu]) | 1);
110 }
111 
112 #ifndef EARLY_AP_STARTUP
113 static void
114 kvm_clock_init_smp(void *arg __unused)
115 {
116 	devclass_t kvm_clock_devclass;
117 	cpuset_t cpus;
118 	struct kvm_clock_softc *sc;
119 
120 	kvm_clock_devclass = devclass_find(KVM_CLOCK_DEVNAME);
121 	sc = devclass_get_softc(kvm_clock_devclass, 0);
122 	if (sc == NULL || mp_ncpus == 1)
123 		return;
124 
125 	/*
126 	 * Register with the hypervisor on all CPUs except the one that
127 	 * registered in kvm_clock_attach().
128 	 */
129 	cpus = all_cpus;
130 	KASSERT(CPU_ISSET(sc->firstcpu, &cpus),
131 	    ("%s: invalid first CPU %d", __func__, sc->firstcpu));
132 	CPU_CLR(sc->firstcpu, &cpus);
133 	kvm_clock_system_time_enable(sc, &cpus);
134 }
135 SYSINIT(kvm_clock, SI_SUB_SMP, SI_ORDER_ANY, kvm_clock_init_smp, NULL);
136 #endif
137 
138 static void
139 kvm_clock_identify(driver_t *driver, device_t parent)
140 {
141 	u_int regs[4];
142 
143 	kvm_cpuid_get_features(regs);
144 	if ((regs[0] &
145 	    (KVM_FEATURE_CLOCKSOURCE2 | KVM_FEATURE_CLOCKSOURCE)) == 0)
146 		return;
147 	if (device_find_child(parent, KVM_CLOCK_DEVNAME, -1))
148 		return;
149 	BUS_ADD_CHILD(parent, 0, KVM_CLOCK_DEVNAME, 0);
150 }
151 
152 static int
153 kvm_clock_probe(device_t dev)
154 {
155 	device_set_desc(dev, "KVM paravirtual clock");
156 	return (BUS_PROBE_DEFAULT);
157 }
158 
159 static int
160 kvm_clock_attach(device_t dev)
161 {
162 	u_int regs[4];
163 	struct kvm_clock_softc *sc = device_get_softc(dev);
164 	bool stable_flag_supported;
165 
166 	/* Process KVM "features" CPUID leaf content: */
167 	kvm_cpuid_get_features(regs);
168 	if ((regs[0] & KVM_FEATURE_CLOCKSOURCE2) != 0) {
169 		sc->msr_tc = KVM_MSR_SYSTEM_TIME_NEW;
170 		sc->msr_wc = KVM_MSR_WALL_CLOCK_NEW;
171 	} else {
172 		KASSERT((regs[0] & KVM_FEATURE_CLOCKSOURCE) != 0,
173 		    ("Clocksource feature flags disappeared since "
174 		    "kvm_clock_identify: regs[0] %#0x.", regs[0]));
175 		sc->msr_tc = KVM_MSR_SYSTEM_TIME;
176 		sc->msr_wc = KVM_MSR_WALL_CLOCK;
177 	}
178 	stable_flag_supported =
179 	    (regs[0] & KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) != 0;
180 
181 	/* Set up 'struct pvclock_vcpu_time_info' page(s): */
182 	sc->timeinfos = kmem_malloc(mp_ncpus *
183 	    sizeof(struct pvclock_vcpu_time_info), M_WAITOK | M_ZERO);
184 #ifdef EARLY_AP_STARTUP
185 	kvm_clock_system_time_enable(sc, &all_cpus);
186 #else
187 	sc->firstcpu = curcpu;
188 	kvm_clock_system_time_enable_pcpu(sc);
189 #endif
190 
191 	/*
192 	 * Init pvclock; register KVM clock wall clock, register KVM clock
193 	 * timecounter, and set up the requisite infrastructure for vDSO access
194 	 * to this timecounter.
195 	 *     Regarding 'tc_flags': Since the KVM MSR documentation does not
196 	 *     specifically discuss suspend/resume scenarios, conservatively
197 	 *     leave 'TC_FLAGS_SUSPEND_SAFE' cleared and assume that the system
198 	 *     time must be re-inited in such cases.
199 	 */
200 	sc->pvc.get_wallclock = kvm_clock_get_wallclock;
201 	sc->pvc.get_wallclock_arg = sc;
202 	sc->pvc.timeinfos = sc->timeinfos;
203 	sc->pvc.stable_flag_supported = stable_flag_supported;
204 	pvclock_init(&sc->pvc, dev, KVM_CLOCK_DEVNAME, KVM_CLOCK_TC_QUALITY, 0);
205 	kvm_clock_setup_sysctl(dev);
206 	return (0);
207 }
208 
209 static int
210 kvm_clock_detach(device_t dev)
211 {
212 	struct kvm_clock_softc *sc = device_get_softc(dev);
213 
214 	return (pvclock_destroy(&sc->pvc));
215 }
216 
217 static int
218 kvm_clock_suspend(device_t dev)
219 {
220 	return (0);
221 }
222 
223 static int
224 kvm_clock_resume(device_t dev)
225 {
226 	/*
227 	 * See note in 'kvm_clock_attach()' regarding 'TC_FLAGS_SUSPEND_SAFE';
228 	 * conservatively assume that the system time must be re-inited in
229 	 * suspend/resume scenarios.
230 	 */
231 	kvm_clock_system_time_enable(device_get_softc(dev), &all_cpus);
232 	pvclock_resume();
233 	inittodr(time_second);
234 	return (0);
235 }
236 
237 static int
238 kvm_clock_gettime(device_t dev, struct timespec *ts)
239 {
240 	struct kvm_clock_softc *sc = device_get_softc(dev);
241 
242 	pvclock_gettime(&sc->pvc, ts);
243 	return (0);
244 }
245 
246 static int
247 kvm_clock_settime(device_t dev, struct timespec *ts)
248 {
249 	/*
250 	 * Even though it is not possible to set the KVM clock's wall clock, to
251 	 * avoid the possibility of periodic benign error messages from
252 	 * 'settime_task_func()', report success rather than, e.g., 'ENODEV'.
253 	 */
254 	return (0);
255 }
256 
257 static int
258 kvm_clock_tsc_freq_sysctl(SYSCTL_HANDLER_ARGS)
259 {
260 	struct kvm_clock_softc *sc = oidp->oid_arg1;
261         uint64_t freq = pvclock_tsc_freq(sc->timeinfos);
262 
263         return (sysctl_handle_64(oidp, &freq, 0, req));
264 }
265 
266 static void
267 kvm_clock_setup_sysctl(device_t dev)
268 {
269 	struct kvm_clock_softc *sc = device_get_softc(dev);
270         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
271         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
272         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
273 
274         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tsc_freq",
275             CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
276             kvm_clock_tsc_freq_sysctl, "QU",
277             "Time Stamp Counter frequency");
278 }
279 
280 static device_method_t kvm_clock_methods[] = {
281 	DEVMETHOD(device_identify,	kvm_clock_identify),
282 	DEVMETHOD(device_probe,		kvm_clock_probe),
283 	DEVMETHOD(device_attach,	kvm_clock_attach),
284 	DEVMETHOD(device_detach,	kvm_clock_detach),
285 	DEVMETHOD(device_suspend,	kvm_clock_suspend),
286 	DEVMETHOD(device_resume,	kvm_clock_resume),
287 	/* clock interface */
288 	DEVMETHOD(clock_gettime,	kvm_clock_gettime),
289 	DEVMETHOD(clock_settime,	kvm_clock_settime),
290 
291 	DEVMETHOD_END
292 };
293 
294 static driver_t kvm_clock_driver = {
295 	KVM_CLOCK_DEVNAME,
296 	kvm_clock_methods,
297 	sizeof(struct kvm_clock_softc),
298 };
299 
300 DRIVER_MODULE(kvm_clock, nexus, kvm_clock_driver, 0, 0);
301