xref: /freebsd/sys/x86/x86/tsc.c (revision dd7d207d)
1dd7d207dSJung-uk Kim /*-
2dd7d207dSJung-uk Kim  * Copyright (c) 1998-2003 Poul-Henning Kamp
3dd7d207dSJung-uk Kim  * All rights reserved.
4dd7d207dSJung-uk Kim  *
5dd7d207dSJung-uk Kim  * Redistribution and use in source and binary forms, with or without
6dd7d207dSJung-uk Kim  * modification, are permitted provided that the following conditions
7dd7d207dSJung-uk Kim  * are met:
8dd7d207dSJung-uk Kim  * 1. Redistributions of source code must retain the above copyright
9dd7d207dSJung-uk Kim  *    notice, this list of conditions and the following disclaimer.
10dd7d207dSJung-uk Kim  * 2. Redistributions in binary form must reproduce the above copyright
11dd7d207dSJung-uk Kim  *    notice, this list of conditions and the following disclaimer in the
12dd7d207dSJung-uk Kim  *    documentation and/or other materials provided with the distribution.
13dd7d207dSJung-uk Kim  *
14dd7d207dSJung-uk Kim  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15dd7d207dSJung-uk Kim  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16dd7d207dSJung-uk Kim  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17dd7d207dSJung-uk Kim  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18dd7d207dSJung-uk Kim  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19dd7d207dSJung-uk Kim  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20dd7d207dSJung-uk Kim  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21dd7d207dSJung-uk Kim  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22dd7d207dSJung-uk Kim  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23dd7d207dSJung-uk Kim  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24dd7d207dSJung-uk Kim  * SUCH DAMAGE.
25dd7d207dSJung-uk Kim  */
26dd7d207dSJung-uk Kim 
27dd7d207dSJung-uk Kim #include <sys/cdefs.h>
28dd7d207dSJung-uk Kim __FBSDID("$FreeBSD$");
29dd7d207dSJung-uk Kim 
30dd7d207dSJung-uk Kim #include "opt_clock.h"
31dd7d207dSJung-uk Kim 
32dd7d207dSJung-uk Kim #include <sys/param.h>
33dd7d207dSJung-uk Kim #include <sys/bus.h>
34dd7d207dSJung-uk Kim #include <sys/cpu.h>
35dd7d207dSJung-uk Kim #include <sys/malloc.h>
36dd7d207dSJung-uk Kim #include <sys/systm.h>
37dd7d207dSJung-uk Kim #include <sys/sysctl.h>
38dd7d207dSJung-uk Kim #include <sys/time.h>
39dd7d207dSJung-uk Kim #include <sys/timetc.h>
40dd7d207dSJung-uk Kim #include <sys/kernel.h>
41dd7d207dSJung-uk Kim #include <sys/power.h>
42dd7d207dSJung-uk Kim #include <sys/smp.h>
43dd7d207dSJung-uk Kim #include <machine/clock.h>
44dd7d207dSJung-uk Kim #include <machine/cputypes.h>
45dd7d207dSJung-uk Kim #include <machine/md_var.h>
46dd7d207dSJung-uk Kim #include <machine/specialreg.h>
47dd7d207dSJung-uk Kim 
48dd7d207dSJung-uk Kim #include "cpufreq_if.h"
49dd7d207dSJung-uk Kim 
50dd7d207dSJung-uk Kim uint64_t	tsc_freq;
51dd7d207dSJung-uk Kim int		tsc_is_broken;
52dd7d207dSJung-uk Kim int		tsc_is_invariant;
53dd7d207dSJung-uk Kim int		tsc_present;
54dd7d207dSJung-uk Kim static eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag;
55dd7d207dSJung-uk Kim 
56dd7d207dSJung-uk Kim SYSCTL_INT(_kern_timecounter, OID_AUTO, invariant_tsc, CTLFLAG_RDTUN,
57dd7d207dSJung-uk Kim     &tsc_is_invariant, 0, "Indicates whether the TSC is P-state invariant");
58dd7d207dSJung-uk Kim TUNABLE_INT("kern.timecounter.invariant_tsc", &tsc_is_invariant);
59dd7d207dSJung-uk Kim 
60dd7d207dSJung-uk Kim #ifdef SMP
61dd7d207dSJung-uk Kim static int	smp_tsc;
62dd7d207dSJung-uk Kim SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0,
63dd7d207dSJung-uk Kim     "Indicates whether the TSC is safe to use in SMP mode");
64dd7d207dSJung-uk Kim TUNABLE_INT("kern.timecounter.smp_tsc", &smp_tsc);
65dd7d207dSJung-uk Kim #endif
66dd7d207dSJung-uk Kim 
67dd7d207dSJung-uk Kim static void tsc_freq_changed(void *arg, const struct cf_level *level,
68dd7d207dSJung-uk Kim     int status);
69dd7d207dSJung-uk Kim static void tsc_freq_changing(void *arg, const struct cf_level *level,
70dd7d207dSJung-uk Kim     int *status);
71dd7d207dSJung-uk Kim static	unsigned tsc_get_timecount(struct timecounter *tc);
72dd7d207dSJung-uk Kim static void tsc_levels_changed(void *arg, int unit);
73dd7d207dSJung-uk Kim 
74dd7d207dSJung-uk Kim static struct timecounter tsc_timecounter = {
75dd7d207dSJung-uk Kim 	tsc_get_timecount,	/* get_timecount */
76dd7d207dSJung-uk Kim 	0,			/* no poll_pps */
77dd7d207dSJung-uk Kim 	~0u,			/* counter_mask */
78dd7d207dSJung-uk Kim 	0,			/* frequency */
79dd7d207dSJung-uk Kim 	"TSC",			/* name */
80dd7d207dSJung-uk Kim 	800,			/* quality (adjusted in code) */
81dd7d207dSJung-uk Kim };
82dd7d207dSJung-uk Kim 
83dd7d207dSJung-uk Kim void
84dd7d207dSJung-uk Kim init_TSC(void)
85dd7d207dSJung-uk Kim {
86dd7d207dSJung-uk Kim 	u_int64_t tscval[2];
87dd7d207dSJung-uk Kim 
88dd7d207dSJung-uk Kim 	if (cpu_feature & CPUID_TSC)
89dd7d207dSJung-uk Kim 		tsc_present = 1;
90dd7d207dSJung-uk Kim 	else
91dd7d207dSJung-uk Kim 		tsc_present = 0;
92dd7d207dSJung-uk Kim 
93dd7d207dSJung-uk Kim 	if (!tsc_present)
94dd7d207dSJung-uk Kim 		return;
95dd7d207dSJung-uk Kim 
96dd7d207dSJung-uk Kim 	if (bootverbose)
97dd7d207dSJung-uk Kim 	        printf("Calibrating TSC clock ... ");
98dd7d207dSJung-uk Kim 
99dd7d207dSJung-uk Kim 	tscval[0] = rdtsc();
100dd7d207dSJung-uk Kim 	DELAY(1000000);
101dd7d207dSJung-uk Kim 	tscval[1] = rdtsc();
102dd7d207dSJung-uk Kim 
103dd7d207dSJung-uk Kim 	tsc_freq = tscval[1] - tscval[0];
104dd7d207dSJung-uk Kim 	if (bootverbose)
105dd7d207dSJung-uk Kim 		printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq);
106dd7d207dSJung-uk Kim 
107dd7d207dSJung-uk Kim 	switch (cpu_vendor_id) {
108dd7d207dSJung-uk Kim 	case CPU_VENDOR_AMD:
109dd7d207dSJung-uk Kim 		if ((amd_pminfo & AMDPM_TSC_INVARIANT) ||
110dd7d207dSJung-uk Kim 		    CPUID_TO_FAMILY(cpu_id) >= 0x10 || cpu_id == 0x60fb2)
111dd7d207dSJung-uk Kim 			tsc_is_invariant = 1;
112dd7d207dSJung-uk Kim 		break;
113dd7d207dSJung-uk Kim 	case CPU_VENDOR_INTEL:
114dd7d207dSJung-uk Kim 		if ((amd_pminfo & AMDPM_TSC_INVARIANT) ||
115dd7d207dSJung-uk Kim 		    (CPUID_TO_FAMILY(cpu_id) == 0x6 &&
116dd7d207dSJung-uk Kim 		    CPUID_TO_MODEL(cpu_id) >= 0xe) ||
117dd7d207dSJung-uk Kim 		    (CPUID_TO_FAMILY(cpu_id) == 0xf &&
118dd7d207dSJung-uk Kim 		    CPUID_TO_MODEL(cpu_id) >= 0x3))
119dd7d207dSJung-uk Kim 			tsc_is_invariant = 1;
120dd7d207dSJung-uk Kim 		break;
121dd7d207dSJung-uk Kim 	case CPU_VENDOR_CENTAUR:
122dd7d207dSJung-uk Kim 		if (CPUID_TO_FAMILY(cpu_id) == 0x6 &&
123dd7d207dSJung-uk Kim 		    CPUID_TO_MODEL(cpu_id) >= 0xf &&
124dd7d207dSJung-uk Kim 		    (rdmsr(0x1203) & 0x100000000ULL) == 0)
125dd7d207dSJung-uk Kim 			tsc_is_invariant = 1;
126dd7d207dSJung-uk Kim 		break;
127dd7d207dSJung-uk Kim 	}
128dd7d207dSJung-uk Kim 
129dd7d207dSJung-uk Kim 	/*
130dd7d207dSJung-uk Kim 	 * Inform CPU accounting about our boot-time clock rate.  This will
131dd7d207dSJung-uk Kim 	 * be updated if someone loads a cpufreq driver after boot that
132dd7d207dSJung-uk Kim 	 * discovers a new max frequency.
133dd7d207dSJung-uk Kim 	 */
134dd7d207dSJung-uk Kim 	set_cputicker(rdtsc, tsc_freq, 1);
135dd7d207dSJung-uk Kim 
136dd7d207dSJung-uk Kim 	if (tsc_is_invariant)
137dd7d207dSJung-uk Kim 		return;
138dd7d207dSJung-uk Kim 
139dd7d207dSJung-uk Kim 	/* Register to find out about changes in CPU frequency. */
140dd7d207dSJung-uk Kim 	tsc_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change,
141dd7d207dSJung-uk Kim 	    tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST);
142dd7d207dSJung-uk Kim 	tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change,
143dd7d207dSJung-uk Kim 	    tsc_freq_changed, NULL, EVENTHANDLER_PRI_FIRST);
144dd7d207dSJung-uk Kim 	tsc_levels_tag = EVENTHANDLER_REGISTER(cpufreq_levels_changed,
145dd7d207dSJung-uk Kim 	    tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY);
146dd7d207dSJung-uk Kim }
147dd7d207dSJung-uk Kim 
148dd7d207dSJung-uk Kim void
149dd7d207dSJung-uk Kim init_TSC_tc(void)
150dd7d207dSJung-uk Kim {
151dd7d207dSJung-uk Kim 
152dd7d207dSJung-uk Kim 	if (!tsc_present)
153dd7d207dSJung-uk Kim 		return;
154dd7d207dSJung-uk Kim 
155dd7d207dSJung-uk Kim 	/*
156dd7d207dSJung-uk Kim 	 * We can not use the TSC if we support APM.  Precise timekeeping
157dd7d207dSJung-uk Kim 	 * on an APM'ed machine is at best a fools pursuit, since
158dd7d207dSJung-uk Kim 	 * any and all of the time spent in various SMM code can't
159dd7d207dSJung-uk Kim 	 * be reliably accounted for.  Reading the RTC is your only
160dd7d207dSJung-uk Kim 	 * source of reliable time info.  The i8254 loses too, of course,
161dd7d207dSJung-uk Kim 	 * but we need to have some kind of time...
162dd7d207dSJung-uk Kim 	 * We don't know at this point whether APM is going to be used
163dd7d207dSJung-uk Kim 	 * or not, nor when it might be activated.  Play it safe.
164dd7d207dSJung-uk Kim 	 */
165dd7d207dSJung-uk Kim 	if (power_pm_get_type() == POWER_PM_TYPE_APM) {
166dd7d207dSJung-uk Kim 		tsc_timecounter.tc_quality = -1000;
167dd7d207dSJung-uk Kim 		if (bootverbose)
168dd7d207dSJung-uk Kim 			printf("TSC timecounter disabled: APM enabled.\n");
169dd7d207dSJung-uk Kim 	}
170dd7d207dSJung-uk Kim 
171dd7d207dSJung-uk Kim #ifdef SMP
172dd7d207dSJung-uk Kim 	/*
173dd7d207dSJung-uk Kim 	 * We can not use the TSC in SMP mode unless the TSCs on all CPUs
174dd7d207dSJung-uk Kim 	 * are somehow synchronized.  Some hardware configurations do
175dd7d207dSJung-uk Kim 	 * this, but we have no way of determining whether this is the
176dd7d207dSJung-uk Kim 	 * case, so we do not use the TSC in multi-processor systems
177dd7d207dSJung-uk Kim 	 * unless the user indicated (by setting kern.timecounter.smp_tsc
178dd7d207dSJung-uk Kim 	 * to 1) that he believes that his TSCs are synchronized.
179dd7d207dSJung-uk Kim 	 */
180dd7d207dSJung-uk Kim 	if (mp_ncpus > 1 && !smp_tsc)
181dd7d207dSJung-uk Kim 		tsc_timecounter.tc_quality = -100;
182dd7d207dSJung-uk Kim #endif
183dd7d207dSJung-uk Kim 
184dd7d207dSJung-uk Kim 	if (tsc_freq != 0 && !tsc_is_broken) {
185dd7d207dSJung-uk Kim 		tsc_timecounter.tc_frequency = tsc_freq;
186dd7d207dSJung-uk Kim 		tc_init(&tsc_timecounter);
187dd7d207dSJung-uk Kim 	}
188dd7d207dSJung-uk Kim }
189dd7d207dSJung-uk Kim 
190dd7d207dSJung-uk Kim /*
191dd7d207dSJung-uk Kim  * When cpufreq levels change, find out about the (new) max frequency.  We
192dd7d207dSJung-uk Kim  * use this to update CPU accounting in case it got a lower estimate at boot.
193dd7d207dSJung-uk Kim  */
194dd7d207dSJung-uk Kim static void
195dd7d207dSJung-uk Kim tsc_levels_changed(void *arg, int unit)
196dd7d207dSJung-uk Kim {
197dd7d207dSJung-uk Kim 	device_t cf_dev;
198dd7d207dSJung-uk Kim 	struct cf_level *levels;
199dd7d207dSJung-uk Kim 	int count, error;
200dd7d207dSJung-uk Kim 	uint64_t max_freq;
201dd7d207dSJung-uk Kim 
202dd7d207dSJung-uk Kim 	/* Only use values from the first CPU, assuming all are equal. */
203dd7d207dSJung-uk Kim 	if (unit != 0)
204dd7d207dSJung-uk Kim 		return;
205dd7d207dSJung-uk Kim 
206dd7d207dSJung-uk Kim 	/* Find the appropriate cpufreq device instance. */
207dd7d207dSJung-uk Kim 	cf_dev = devclass_get_device(devclass_find("cpufreq"), unit);
208dd7d207dSJung-uk Kim 	if (cf_dev == NULL) {
209dd7d207dSJung-uk Kim 		printf("tsc_levels_changed() called but no cpufreq device?\n");
210dd7d207dSJung-uk Kim 		return;
211dd7d207dSJung-uk Kim 	}
212dd7d207dSJung-uk Kim 
213dd7d207dSJung-uk Kim 	/* Get settings from the device and find the max frequency. */
214dd7d207dSJung-uk Kim 	count = 64;
215dd7d207dSJung-uk Kim 	levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
216dd7d207dSJung-uk Kim 	if (levels == NULL)
217dd7d207dSJung-uk Kim 		return;
218dd7d207dSJung-uk Kim 	error = CPUFREQ_LEVELS(cf_dev, levels, &count);
219dd7d207dSJung-uk Kim 	if (error == 0 && count != 0) {
220dd7d207dSJung-uk Kim 		max_freq = (uint64_t)levels[0].total_set.freq * 1000000;
221dd7d207dSJung-uk Kim 		set_cputicker(rdtsc, max_freq, 1);
222dd7d207dSJung-uk Kim 	} else
223dd7d207dSJung-uk Kim 		printf("tsc_levels_changed: no max freq found\n");
224dd7d207dSJung-uk Kim 	free(levels, M_TEMP);
225dd7d207dSJung-uk Kim }
226dd7d207dSJung-uk Kim 
227dd7d207dSJung-uk Kim /*
228dd7d207dSJung-uk Kim  * If the TSC timecounter is in use, veto the pending change.  It may be
229dd7d207dSJung-uk Kim  * possible in the future to handle a dynamically-changing timecounter rate.
230dd7d207dSJung-uk Kim  */
231dd7d207dSJung-uk Kim static void
232dd7d207dSJung-uk Kim tsc_freq_changing(void *arg, const struct cf_level *level, int *status)
233dd7d207dSJung-uk Kim {
234dd7d207dSJung-uk Kim 
235dd7d207dSJung-uk Kim 	if (*status != 0 || timecounter != &tsc_timecounter)
236dd7d207dSJung-uk Kim 		return;
237dd7d207dSJung-uk Kim 
238dd7d207dSJung-uk Kim 	printf("timecounter TSC must not be in use when "
239dd7d207dSJung-uk Kim 	    "changing frequencies; change denied\n");
240dd7d207dSJung-uk Kim 	*status = EBUSY;
241dd7d207dSJung-uk Kim }
242dd7d207dSJung-uk Kim 
243dd7d207dSJung-uk Kim /* Update TSC freq with the value indicated by the caller. */
244dd7d207dSJung-uk Kim static void
245dd7d207dSJung-uk Kim tsc_freq_changed(void *arg, const struct cf_level *level, int status)
246dd7d207dSJung-uk Kim {
247dd7d207dSJung-uk Kim 
248dd7d207dSJung-uk Kim 	/* If there was an error during the transition, don't do anything. */
249dd7d207dSJung-uk Kim 	if (status != 0)
250dd7d207dSJung-uk Kim 		return;
251dd7d207dSJung-uk Kim 
252dd7d207dSJung-uk Kim 	/* Total setting for this level gives the new frequency in MHz. */
253dd7d207dSJung-uk Kim 	tsc_freq = (uint64_t)level->total_set.freq * 1000000;
254dd7d207dSJung-uk Kim 	tsc_timecounter.tc_frequency = tsc_freq;
255dd7d207dSJung-uk Kim }
256dd7d207dSJung-uk Kim 
257dd7d207dSJung-uk Kim static int
258dd7d207dSJung-uk Kim sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS)
259dd7d207dSJung-uk Kim {
260dd7d207dSJung-uk Kim 	int error;
261dd7d207dSJung-uk Kim 	uint64_t freq;
262dd7d207dSJung-uk Kim 
263dd7d207dSJung-uk Kim 	if (tsc_timecounter.tc_frequency == 0)
264dd7d207dSJung-uk Kim 		return (EOPNOTSUPP);
265dd7d207dSJung-uk Kim 	freq = tsc_freq;
266dd7d207dSJung-uk Kim 	error = sysctl_handle_quad(oidp, &freq, 0, req);
267dd7d207dSJung-uk Kim 	if (error == 0 && req->newptr != NULL) {
268dd7d207dSJung-uk Kim 		tsc_freq = freq;
269dd7d207dSJung-uk Kim 		tsc_timecounter.tc_frequency = tsc_freq;
270dd7d207dSJung-uk Kim 	}
271dd7d207dSJung-uk Kim 	return (error);
272dd7d207dSJung-uk Kim }
273dd7d207dSJung-uk Kim 
274dd7d207dSJung-uk Kim SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_QUAD | CTLFLAG_RW,
275dd7d207dSJung-uk Kim     0, 0, sysctl_machdep_tsc_freq, "QU", "");
276dd7d207dSJung-uk Kim 
277dd7d207dSJung-uk Kim static unsigned
278dd7d207dSJung-uk Kim tsc_get_timecount(struct timecounter *tc)
279dd7d207dSJung-uk Kim {
280dd7d207dSJung-uk Kim 	return (rdtsc());
281dd7d207dSJung-uk Kim }
282