xref: /dragonfly/sys/platform/pc64/isa/clock.c (revision 6e4c95df)
1 /*-
2  * Copyright (c) 1990 The Regents of the University of California.
3  * Copyright (c) 2008 The DragonFly Project.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * William Jolitz and Don Ahn.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	from: @(#)clock.c	7.2 (Berkeley) 5/12/91
34  * $FreeBSD: src/sys/i386/isa/clock.c,v 1.149.2.6 2002/11/02 04:41:50 iwasaki Exp $
35  */
36 
37 /*
38  * Routines to handle clock hardware.
39  */
40 
41 /*
42  * inittodr, settodr and support routines written
43  * by Christoph Robitschko <chmr@edvz.tu-graz.ac.at>
44  *
45  * reintroduced and updated by Chris Stenton <chris@gnome.co.uk> 8/10/94
46  */
47 
48 #if 0
49 #include "opt_clock.h"
50 #endif
51 
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/eventhandler.h>
55 #include <sys/time.h>
56 #include <sys/kernel.h>
57 #include <sys/bus.h>
58 #include <sys/sysctl.h>
59 #include <sys/cons.h>
60 #include <sys/kbio.h>
61 #include <sys/systimer.h>
62 #include <sys/globaldata.h>
63 #include <sys/machintr.h>
64 #include <sys/interrupt.h>
65 
66 #include <sys/thread2.h>
67 
68 #include <machine/clock.h>
69 #include <machine/cputypes.h>
70 #include <machine/frame.h>
71 #include <machine/ipl.h>
72 #include <machine/limits.h>
73 #include <machine/md_var.h>
74 #include <machine/psl.h>
75 #include <machine/segments.h>
76 #include <machine/smp.h>
77 #include <machine/specialreg.h>
78 #include <machine/intr_machdep.h>
79 
80 #include <machine_base/apic/ioapic.h>
81 #include <machine_base/apic/ioapic_abi.h>
82 #include <machine_base/icu/icu.h>
83 #include <bus/isa/isa.h>
84 #include <bus/isa/rtc.h>
85 #include <machine_base/isa/timerreg.h>
86 
87 static void i8254_restore(void);
88 static void resettodr_on_shutdown(void *arg __unused);
89 
90 /*
91  * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
92  * can use a simple formula for leap years.
93  */
94 #define	LEAPYEAR(y) ((u_int)(y) % 4 == 0)
95 #define DAYSPERYEAR   (31+28+31+30+31+30+31+31+30+31+30+31)
96 
97 #ifndef TIMER_FREQ
98 #define TIMER_FREQ   1193182
99 #endif
100 
101 static uint8_t i8254_walltimer_sel;
102 static uint16_t i8254_walltimer_cntr;
103 
104 int	adjkerntz;		/* local offset from GMT in seconds */
105 int	disable_rtc_set;	/* disable resettodr() if != 0 */
106 int	tsc_present;
107 int	tsc_invariant;
108 int	tsc_mpsync;
109 int64_t	tsc_frequency;
110 int	tsc_is_broken;
111 int	wall_cmos_clock;	/* wall CMOS clock assumed if != 0 */
112 int	timer0_running;
113 enum tstate { RELEASED, ACQUIRED };
114 enum tstate timer0_state;
115 enum tstate timer1_state;
116 enum tstate timer2_state;
117 
118 static	int	beeping = 0;
119 static	const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
120 static	u_char	rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
121 static	u_char	rtc_statusb = RTCSB_24HR | RTCSB_PINTR;
122 static  int	rtc_loaded;
123 
124 static int i8254_cputimer_div;
125 
126 static int i8254_nointr;
127 static int i8254_intr_disable = 1;
128 TUNABLE_INT("hw.i8254.intr_disable", &i8254_intr_disable);
129 
130 static int calibrate_timers_with_rtc = 0;
131 TUNABLE_INT("hw.calibrate_timers_with_rtc", &calibrate_timers_with_rtc);
132 
133 static struct callout sysbeepstop_ch;
134 
135 static sysclock_t i8254_cputimer_count(void);
136 static void i8254_cputimer_construct(struct cputimer *cputimer, sysclock_t last);
137 static void i8254_cputimer_destruct(struct cputimer *cputimer);
138 
139 static struct cputimer	i8254_cputimer = {
140     .next		= SLIST_ENTRY_INITIALIZER,
141     .name		= "i8254",
142     .pri		= CPUTIMER_PRI_8254,
143     .type		= 0,	/* determined later */
144     .count		= i8254_cputimer_count,
145     .fromhz		= cputimer_default_fromhz,
146     .fromus		= cputimer_default_fromus,
147     .construct		= i8254_cputimer_construct,
148     .destruct		= i8254_cputimer_destruct,
149     .freq		= TIMER_FREQ
150 };
151 
152 static sysclock_t tsc_cputimer_count_mfence(void);
153 static sysclock_t tsc_cputimer_count_lfence(void);
154 static void tsc_cputimer_construct(struct cputimer *, sysclock_t);
155 
156 static struct cputimer	tsc_cputimer = {
157     .next		= SLIST_ENTRY_INITIALIZER,
158     .name		= "TSC",
159     .pri		= CPUTIMER_PRI_TSC,
160     .type		= CPUTIMER_TSC,
161     .count		= NULL,	/* determined later */
162     .fromhz		= cputimer_default_fromhz,
163     .fromus		= cputimer_default_fromus,
164     .construct		= tsc_cputimer_construct,
165     .destruct		= cputimer_default_destruct,
166     .freq		= 0	/* determined later */
167 };
168 
169 static struct cpucounter tsc_cpucounter = {
170     .freq		= 0,	/* determined later */
171     .count		= NULL,	/* determined later */
172     .flags		= 0,	/* adjusted later */
173     .prio		= CPUCOUNTER_PRIO_TSC,
174     .type		= CPUCOUNTER_TSC
175 };
176 
177 static void i8254_intr_reload(struct cputimer_intr *, sysclock_t);
178 static void i8254_intr_config(struct cputimer_intr *, const struct cputimer *);
179 static void i8254_intr_initclock(struct cputimer_intr *, boolean_t);
180 
181 static struct cputimer_intr i8254_cputimer_intr = {
182     .freq = TIMER_FREQ,
183     .reload = i8254_intr_reload,
184     .enable = cputimer_intr_default_enable,
185     .config = i8254_intr_config,
186     .restart = cputimer_intr_default_restart,
187     .pmfixup = cputimer_intr_default_pmfixup,
188     .initclock = i8254_intr_initclock,
189     .pcpuhand = NULL,
190     .next = SLIST_ENTRY_INITIALIZER,
191     .name = "i8254",
192     .type = CPUTIMER_INTR_8254,
193     .prio = CPUTIMER_INTR_PRIO_8254,
194     .caps = CPUTIMER_INTR_CAP_PS,
195     .priv = NULL
196 };
197 
198 /*
199  * timer0 clock interrupt.  Timer0 is in one-shot mode and has stopped
200  * counting as of this interrupt.  We use timer1 in free-running mode (not
201  * generating any interrupts) as our main counter.  Each cpu has timeouts
202  * pending.
203  *
204  * This code is INTR_MPSAFE and may be called without the BGL held.
205  */
206 static void
207 clkintr(void *dummy, void *frame_arg)
208 {
209 	static sysclock_t sysclock_count;	/* NOTE! Must be static */
210 	struct globaldata *gd = mycpu;
211 	struct globaldata *gscan;
212 	int n;
213 
214 	/*
215 	 * SWSTROBE mode is a one-shot, the timer is no longer running
216 	 */
217 	timer0_running = 0;
218 
219 	/*
220 	 * XXX the dispatcher needs work.  right now we call systimer_intr()
221 	 * directly or via IPI for any cpu with systimers queued, which is
222 	 * usually *ALL* of them.  We need to use the LAPIC timer for this.
223 	 */
224 	sysclock_count = sys_cputimer->count();
225 	for (n = 0; n < ncpus; ++n) {
226 	    gscan = globaldata_find(n);
227 	    if (TAILQ_FIRST(&gscan->gd_systimerq) == NULL)
228 		continue;
229 	    if (gscan != gd) {
230 		lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr,
231 				&sysclock_count, 1);
232 	    } else {
233 		systimer_intr(&sysclock_count, 0, frame_arg);
234 	    }
235 	}
236 }
237 
238 
239 /*
240  * NOTE! not MP safe.
241  */
242 int
243 acquire_timer2(int mode)
244 {
245 	if (timer2_state != RELEASED)
246 		return (-1);
247 	timer2_state = ACQUIRED;
248 
249 	/*
250 	 * This access to the timer registers is as atomic as possible
251 	 * because it is a single instruction.  We could do better if we
252 	 * knew the rate.
253 	 */
254 	outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f));
255 	return (0);
256 }
257 
258 int
259 release_timer2(void)
260 {
261 	if (timer2_state != ACQUIRED)
262 		return (-1);
263 	outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT);
264 	timer2_state = RELEASED;
265 	return (0);
266 }
267 
268 #include "opt_ddb.h"
269 #ifdef DDB
270 #include <ddb/ddb.h>
271 
272 DB_SHOW_COMMAND(rtc, rtc)
273 {
274 	kprintf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n",
275 	       rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY),
276 	       rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC),
277 	       rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR));
278 }
279 #endif /* DDB */
280 
281 /*
282  * Return the current cpu timer count as a 32 bit integer.
283  */
284 static
285 sysclock_t
286 i8254_cputimer_count(void)
287 {
288 	static uint16_t cputimer_last;
289 	uint16_t count;
290 	sysclock_t ret;
291 
292 	clock_lock();
293 	outb(TIMER_MODE, i8254_walltimer_sel | TIMER_LATCH);
294 	count = (uint8_t)inb(i8254_walltimer_cntr);		/* get countdown */
295 	count |= ((uint8_t)inb(i8254_walltimer_cntr) << 8);
296 	count = -count;					/* -> countup */
297 	if (count < cputimer_last)			/* rollover */
298 		i8254_cputimer.base += 0x00010000;
299 	ret = i8254_cputimer.base | count;
300 	cputimer_last = count;
301 	clock_unlock();
302 	return(ret);
303 }
304 
305 /*
306  * This function is called whenever the system timebase changes, allowing
307  * us to calculate what is needed to convert a system timebase tick
308  * into an 8254 tick for the interrupt timer.  If we can convert to a
309  * simple shift, multiplication, or division, we do so.  Otherwise 64
310  * bit arithmatic is required every time the interrupt timer is reloaded.
311  */
312 static void
313 i8254_intr_config(struct cputimer_intr *cti, const struct cputimer *timer)
314 {
315     int freq;
316     int div;
317 
318     /*
319      * Will a simple divide do the trick?
320      */
321     div = (timer->freq + (cti->freq / 2)) / cti->freq;
322     freq = cti->freq * div;
323 
324     if (freq >= timer->freq - 1 && freq <= timer->freq + 1)
325 	i8254_cputimer_div = div;
326     else
327 	i8254_cputimer_div = 0;
328 }
329 
330 /*
331  * Reload for the next timeout.  It is possible for the reload value
332  * to be 0 or negative, indicating that an immediate timer interrupt
333  * is desired.  For now make the minimum 2 ticks.
334  *
335  * We may have to convert from the system timebase to the 8254 timebase.
336  */
337 static void
338 i8254_intr_reload(struct cputimer_intr *cti, sysclock_t reload)
339 {
340     uint16_t count;
341 
342     if (i8254_cputimer_div)
343 	reload /= i8254_cputimer_div;
344     else
345 	reload = (int64_t)reload * cti->freq / sys_cputimer->freq;
346 
347     if ((int)reload < 2)
348 	reload = 2;
349 
350     clock_lock();
351     if (timer0_running) {
352 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);	/* count-down timer */
353 	count = (uint8_t)inb(TIMER_CNTR0);		/* lsb */
354 	count |= ((uint8_t)inb(TIMER_CNTR0) << 8);	/* msb */
355 	if (reload < count) {
356 	    outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
357 	    outb(TIMER_CNTR0, (uint8_t)reload); 	/* lsb */
358 	    outb(TIMER_CNTR0, (uint8_t)(reload >> 8));	/* msb */
359 	}
360     } else {
361 	timer0_running = 1;
362 	if (reload > 0xFFFF)
363 	    reload = 0;		/* full count */
364 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
365 	outb(TIMER_CNTR0, (uint8_t)reload); 		/* lsb */
366 	outb(TIMER_CNTR0, (uint8_t)(reload >> 8));	/* msb */
367     }
368     clock_unlock();
369 }
370 
371 /*
372  * DELAY(usec)	     - Spin for the specified number of microseconds.
373  * DRIVERSLEEP(usec) - Spin for the specified number of microseconds,
374  *		       but do a thread switch in the loop
375  *
376  * Relies on timer 1 counting down from (cputimer_freq / hz)
377  * Note: timer had better have been programmed before this is first used!
378  */
379 static void
380 DODELAY(int n, int doswitch)
381 {
382 	ssysclock_t delta, ticks_left;
383 	sysclock_t prev_tick, tick;
384 
385 #ifdef DELAYDEBUG
386 	int getit_calls = 1;
387 	int n1;
388 	static int state = 0;
389 
390 	if (state == 0) {
391 		state = 1;
392 		for (n1 = 1; n1 <= 10000000; n1 *= 10)
393 			DELAY(n1);
394 		state = 2;
395 	}
396 	if (state == 1)
397 		kprintf("DELAY(%d)...", n);
398 #endif
399 	/*
400 	 * Guard against the timer being uninitialized if we are called
401 	 * early for console i/o.
402 	 */
403 	if (timer0_state == RELEASED)
404 		i8254_restore();
405 
406 	/*
407 	 * Read the counter first, so that the rest of the setup overhead is
408 	 * counted.  Then calculate the number of hardware timer ticks
409 	 * required, rounding up to be sure we delay at least the requested
410 	 * number of microseconds.
411 	 */
412 	prev_tick = sys_cputimer->count();
413 	ticks_left = ((u_int)n * (int64_t)sys_cputimer->freq + 999999) /
414 		     1000000;
415 
416 	/*
417 	 * Loop until done.
418 	 */
419 	while (ticks_left > 0) {
420 		tick = sys_cputimer->count();
421 #ifdef DELAYDEBUG
422 		++getit_calls;
423 #endif
424 		delta = tick - prev_tick;
425 		prev_tick = tick;
426 		if (delta < 0)
427 			delta = 0;
428 		ticks_left -= delta;
429 		if (doswitch && ticks_left > 0)
430 			lwkt_switch();
431 		cpu_pause();
432 	}
433 #ifdef DELAYDEBUG
434 	if (state == 1)
435 		kprintf(" %d calls to getit() at %d usec each\n",
436 		       getit_calls, (n + 5) / getit_calls);
437 #endif
438 }
439 
440 /*
441  * DELAY() never switches.
442  */
443 void
444 DELAY(int n)
445 {
446 	DODELAY(n, 0);
447 }
448 
449 /*
450  * Returns non-zero if the specified time period has elapsed.  Call
451  * first with last_clock set to 0.
452  */
453 int
454 CHECKTIMEOUT(TOTALDELAY *tdd)
455 {
456 	sysclock_t delta;
457 	int us;
458 
459 	if (tdd->started == 0) {
460 		if (timer0_state == RELEASED)
461 			i8254_restore();
462 		tdd->last_clock = sys_cputimer->count();
463 		tdd->started = 1;
464 		return(0);
465 	}
466 	delta = sys_cputimer->count() - tdd->last_clock;
467 	us = (u_int64_t)delta * (u_int64_t)1000000 /
468 	     (u_int64_t)sys_cputimer->freq;
469 	tdd->last_clock += (u_int64_t)us * (u_int64_t)sys_cputimer->freq /
470 			   1000000;
471 	tdd->us -= us;
472 	return (tdd->us < 0);
473 }
474 
475 
476 /*
477  * DRIVERSLEEP() does not switch if called with a spinlock held or
478  * from a hard interrupt.
479  */
480 void
481 DRIVERSLEEP(int usec)
482 {
483 	globaldata_t gd = mycpu;
484 
485 	if (gd->gd_intr_nesting_level || gd->gd_spinlocks) {
486 		DODELAY(usec, 0);
487 	} else {
488 		DODELAY(usec, 1);
489 	}
490 }
491 
492 static void
493 sysbeepstop(void *chan)
494 {
495 	outb(IO_PPI, inb(IO_PPI)&0xFC);	/* disable counter2 output to speaker */
496 	beeping = 0;
497 	release_timer2();
498 }
499 
500 int
501 sysbeep(int pitch, int period)
502 {
503 	if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT))
504 		return(-1);
505 	if (sysbeep_enable == 0)
506 		return(-1);
507 	/*
508 	 * Nobody else is using timer2, we do not need the clock lock
509 	 */
510 	outb(TIMER_CNTR2, pitch);
511 	outb(TIMER_CNTR2, (pitch>>8));
512 	if (!beeping) {
513 		/* enable counter2 output to speaker */
514 		outb(IO_PPI, inb(IO_PPI) | 3);
515 		beeping = period;
516 		callout_reset(&sysbeepstop_ch, period, sysbeepstop, NULL);
517 	}
518 	return (0);
519 }
520 
521 /*
522  * RTC support routines
523  */
524 
525 int
526 rtcin(int reg)
527 {
528 	u_char val;
529 
530 	crit_enter();
531 	outb(IO_RTC, reg);
532 	inb(0x84);
533 	val = inb(IO_RTC + 1);
534 	inb(0x84);
535 	crit_exit();
536 	return (val);
537 }
538 
539 static __inline void
540 writertc(u_char reg, u_char val)
541 {
542 	crit_enter();
543 	inb(0x84);
544 	outb(IO_RTC, reg);
545 	inb(0x84);
546 	outb(IO_RTC + 1, val);
547 	inb(0x84);		/* XXX work around wrong order in rtcin() */
548 	crit_exit();
549 }
550 
551 static __inline int
552 readrtc(int port)
553 {
554 	return(bcd2bin(rtcin(port)));
555 }
556 
557 static u_int
558 calibrate_clocks(void)
559 {
560 	u_int64_t old_tsc;
561 	u_int tot_count;
562 	sysclock_t count, prev_count;
563 	int sec, start_sec, timeout;
564 
565 	if (bootverbose)
566 	        kprintf("Calibrating clock(s) ...\n");
567 	if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
568 		goto fail;
569 	timeout = 100000000;
570 
571 	/* Read the mc146818A seconds counter. */
572 	for (;;) {
573 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
574 			sec = rtcin(RTC_SEC);
575 			break;
576 		}
577 		if (--timeout == 0)
578 			goto fail;
579 	}
580 
581 	/* Wait for the mC146818A seconds counter to change. */
582 	start_sec = sec;
583 	for (;;) {
584 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
585 			sec = rtcin(RTC_SEC);
586 			if (sec != start_sec)
587 				break;
588 		}
589 		if (--timeout == 0)
590 			goto fail;
591 	}
592 
593 	/* Start keeping track of the i8254 counter. */
594 	prev_count = sys_cputimer->count();
595 	tot_count = 0;
596 
597 	if (tsc_present)
598 		old_tsc = rdtsc();
599 	else
600 		old_tsc = 0;		/* shut up gcc */
601 
602 	/*
603 	 * Wait for the mc146818A seconds counter to change.  Read the i8254
604 	 * counter for each iteration since this is convenient and only
605 	 * costs a few usec of inaccuracy. The timing of the final reads
606 	 * of the counters almost matches the timing of the initial reads,
607 	 * so the main cause of inaccuracy is the varying latency from
608 	 * inside getit() or rtcin(RTC_STATUSA) to the beginning of the
609 	 * rtcin(RTC_SEC) that returns a changed seconds count.  The
610 	 * maximum inaccuracy from this cause is < 10 usec on 486's.
611 	 */
612 	start_sec = sec;
613 	for (;;) {
614 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP))
615 			sec = rtcin(RTC_SEC);
616 		count = sys_cputimer->count();
617 		tot_count += (int)(count - prev_count);
618 		prev_count = count;
619 		if (sec != start_sec)
620 			break;
621 		if (--timeout == 0)
622 			goto fail;
623 	}
624 
625 	/*
626 	 * Read the cpu cycle counter.  The timing considerations are
627 	 * similar to those for the i8254 clock.
628 	 */
629 	if (tsc_present) {
630 		tsc_frequency = rdtsc() - old_tsc;
631 		if (bootverbose) {
632 			kprintf("TSC clock: %jd Hz (Method A)\n",
633 			    (intmax_t)tsc_frequency);
634 		}
635 	}
636 
637 	kprintf("i8254 clock: %u Hz\n", tot_count);
638 	return (tot_count);
639 
640 fail:
641 	kprintf("failed, using default i8254 clock of %u Hz\n",
642 		i8254_cputimer.freq);
643 	return (i8254_cputimer.freq);
644 }
645 
646 static void
647 i8254_restore(void)
648 {
649 	timer0_state = ACQUIRED;
650 
651 	clock_lock();
652 
653 	/*
654 	 * Timer0 is our fine-grained variable clock interrupt
655 	 */
656 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
657 	outb(TIMER_CNTR0, 2);	/* lsb */
658 	outb(TIMER_CNTR0, 0);	/* msb */
659 	clock_unlock();
660 
661 	if (!i8254_nointr) {
662 		cputimer_intr_register(&i8254_cputimer_intr);
663 		cputimer_intr_select(&i8254_cputimer_intr, 0);
664 	}
665 
666 	/*
667 	 * Timer1 or timer2 is our free-running clock, but only if another
668 	 * has not been selected.
669 	 */
670 	cputimer_register(&i8254_cputimer);
671 	cputimer_select(&i8254_cputimer, 0);
672 }
673 
674 static void
675 i8254_cputimer_construct(struct cputimer *timer, sysclock_t oldclock)
676 {
677  	int which;
678 
679 	/*
680 	 * Should we use timer 1 or timer 2 ?
681 	 */
682 	which = 0;
683 	TUNABLE_INT_FETCH("hw.i8254.walltimer", &which);
684 	if (which != 1 && which != 2)
685 		which = 2;
686 
687 	switch(which) {
688 	case 1:
689 		timer->name = "i8254_timer1";
690 		timer->type = CPUTIMER_8254_SEL1;
691 		i8254_walltimer_sel = TIMER_SEL1;
692 		i8254_walltimer_cntr = TIMER_CNTR1;
693 		timer1_state = ACQUIRED;
694 		break;
695 	case 2:
696 		timer->name = "i8254_timer2";
697 		timer->type = CPUTIMER_8254_SEL2;
698 		i8254_walltimer_sel = TIMER_SEL2;
699 		i8254_walltimer_cntr = TIMER_CNTR2;
700 		timer2_state = ACQUIRED;
701 		break;
702 	}
703 
704 	timer->base = (oldclock + 0xFFFF) & ~0xFFFF;
705 
706 	clock_lock();
707 	outb(TIMER_MODE, i8254_walltimer_sel | TIMER_RATEGEN | TIMER_16BIT);
708 	outb(i8254_walltimer_cntr, 0);	/* lsb */
709 	outb(i8254_walltimer_cntr, 0);	/* msb */
710 	outb(IO_PPI, inb(IO_PPI) | 1);	/* bit 0: enable gate, bit 1: spkr */
711 	clock_unlock();
712 }
713 
714 static void
715 i8254_cputimer_destruct(struct cputimer *timer)
716 {
717 	switch(timer->type) {
718 	case CPUTIMER_8254_SEL1:
719 	    timer1_state = RELEASED;
720 	    break;
721 	case CPUTIMER_8254_SEL2:
722 	    timer2_state = RELEASED;
723 	    break;
724 	default:
725 	    break;
726 	}
727 	timer->type = 0;
728 }
729 
730 static void
731 rtc_restore(void)
732 {
733 	/* Restore all of the RTC's "status" (actually, control) registers. */
734 	writertc(RTC_STATUSB, RTCSB_24HR);
735 	writertc(RTC_STATUSA, rtc_statusa);
736 	writertc(RTC_STATUSB, rtc_statusb);
737 }
738 
739 /*
740  * Restore all the timers.
741  *
742  * This function is called to resynchronize our core timekeeping after a
743  * long halt, e.g. from apm_default_resume() and friends.  It is also
744  * called if after a BIOS call we have detected munging of the 8254.
745  * It is necessary because cputimer_count() counter's delta may have grown
746  * too large for nanouptime() and friends to handle, or (in the case of 8254
747  * munging) might cause the SYSTIMER code to prematurely trigger.
748  */
749 void
750 timer_restore(void)
751 {
752 	crit_enter();
753 	i8254_restore();		/* restore timer_freq and hz */
754 	rtc_restore();			/* reenable RTC interrupts */
755 	crit_exit();
756 }
757 
758 /*
759  * Initialize 8254 timer 0 early so that it can be used in DELAY().
760  */
761 void
762 startrtclock(void)
763 {
764 	u_int delta, freq;
765 
766 	/*
767 	 * Can we use the TSC?
768 	 *
769 	 * NOTE: If running under qemu, probably a good idea to force the
770 	 *	 TSC because we are not likely to detect it as being
771 	 *	 invariant or mpsyncd if you don't.  This will greatly
772 	 *	 reduce SMP contention.
773 	 */
774 	if (cpu_feature & CPUID_TSC) {
775 		tsc_present = 1;
776 		TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_invariant);
777 
778 		if ((cpu_vendor_id == CPU_VENDOR_INTEL ||
779 		     cpu_vendor_id == CPU_VENDOR_AMD) &&
780 		    cpu_exthigh >= 0x80000007) {
781 			u_int regs[4];
782 
783 			do_cpuid(0x80000007, regs);
784 			if (regs[3] & 0x100)
785 				tsc_invariant = 1;
786 		}
787 	} else {
788 		tsc_present = 0;
789 	}
790 
791 	/*
792 	 * Initial RTC state, don't do anything unexpected
793 	 */
794 	writertc(RTC_STATUSA, rtc_statusa);
795 	writertc(RTC_STATUSB, RTCSB_24HR);
796 
797 	/*
798 	 * Set the 8254 timer0 in TIMER_SWSTROBE mode and cause it to
799 	 * generate an interrupt, which we will ignore for now.
800 	 *
801 	 * Set the 8254 timer1 in TIMER_RATEGEN mode and load 0x0000
802 	 * (so it counts a full 2^16 and repeats).  We will use this timer
803 	 * for our counting.
804 	 */
805 	i8254_restore();
806 
807 	/*
808 	 * When booting without verbose messages, it's pointless to run the
809 	 * calibrate_clocks() calibration code, when we don't use the
810 	 * results in any way. With bootverbose, we are at least printing
811 	 *  this information to the kernel log.
812 	 */
813 	if (calibrate_timers_with_rtc == 0 && !bootverbose)
814 		goto skip_rtc_based;
815 
816 	freq = calibrate_clocks();
817 #ifdef CLK_CALIBRATION_LOOP
818 	if (bootverbose) {
819 		int c;
820 
821 		cnpoll(TRUE);
822 		kprintf("Press a key on the console to "
823 			"abort clock calibration\n");
824 		while ((c = cncheckc()) == -1 || c == NOKEY)
825 			calibrate_clocks();
826 		cnpoll(FALSE);
827 	}
828 #endif
829 
830 	/*
831 	 * Use the calibrated i8254 frequency if it seems reasonable.
832 	 * Otherwise use the default, and don't use the calibrated i586
833 	 * frequency.
834 	 */
835 	delta = freq > i8254_cputimer.freq ?
836 			freq - i8254_cputimer.freq : i8254_cputimer.freq - freq;
837 	if (delta < i8254_cputimer.freq / 100) {
838 		if (calibrate_timers_with_rtc == 0) {
839 			kprintf(
840 "hw.calibrate_timers_with_rtc not set - using default i8254 frequency\n");
841 			freq = i8254_cputimer.freq;
842 		}
843 		/*
844 		 * NOTE:
845 		 * Interrupt timer's freq must be adjusted
846 		 * before we change the cuptimer's frequency.
847 		 */
848 		i8254_cputimer_intr.freq = freq;
849 		cputimer_set_frequency(&i8254_cputimer, freq);
850 	} else {
851 		if (bootverbose)
852 			kprintf(
853 		    "%d Hz differs from default of %d Hz by more than 1%%\n",
854 			       freq, i8254_cputimer.freq);
855 		tsc_frequency = 0;
856 	}
857 
858 	if (tsc_frequency != 0 && calibrate_timers_with_rtc == 0) {
859 		kprintf(
860 "hw.calibrate_timers_with_rtc not set - using old calibration method\n");
861 		tsc_frequency = 0;
862 	}
863 
864 skip_rtc_based:
865 	if (tsc_present && tsc_frequency == 0) {
866 		/*
867 		 * Calibration of the i586 clock relative to the mc146818A
868 		 * clock failed.  Do a less accurate calibration relative
869 		 * to the i8254 clock.
870 		 */
871 		u_int64_t old_tsc = rdtsc();
872 
873 		DELAY(1000000);
874 		tsc_frequency = rdtsc() - old_tsc;
875 		if (bootverbose && calibrate_timers_with_rtc) {
876 			kprintf("TSC clock: %jd Hz (Method B)\n",
877 			    (intmax_t)tsc_frequency);
878 		}
879 	}
880 
881 	if (tsc_present) {
882 		kprintf("TSC%s clock: %jd Hz\n",
883 		    tsc_invariant ? " invariant" : "",
884 		    (intmax_t)tsc_frequency);
885 	}
886 
887 	EVENTHANDLER_REGISTER(shutdown_post_sync, resettodr_on_shutdown, NULL, SHUTDOWN_PRI_LAST);
888 }
889 
890 /*
891  * Sync the time of day back to the RTC on shutdown, but only if
892  * we have already loaded it and have not crashed.
893  */
894 static void
895 resettodr_on_shutdown(void *arg __unused)
896 {
897  	if (rtc_loaded && panicstr == NULL) {
898 		resettodr();
899 	}
900 }
901 
902 /*
903  * Initialize the time of day register, based on the time base which is, e.g.
904  * from a filesystem.
905  */
906 void
907 inittodr(time_t base)
908 {
909 	unsigned long	sec, days;
910 	int		year, month;
911 	int		y, m;
912 	struct timespec ts;
913 
914 	if (base) {
915 		ts.tv_sec = base;
916 		ts.tv_nsec = 0;
917 		set_timeofday(&ts);
918 	}
919 
920 	/* Look if we have a RTC present and the time is valid */
921 	if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
922 		goto wrong_time;
923 
924 	/* wait for time update to complete */
925 	/* If RTCSA_TUP is zero, we have at least 244us before next update */
926 	crit_enter();
927 	while (rtcin(RTC_STATUSA) & RTCSA_TUP) {
928 		crit_exit();
929 		crit_enter();
930 	}
931 
932 	days = 0;
933 #ifdef USE_RTC_CENTURY
934 	year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100;
935 #else
936 	year = readrtc(RTC_YEAR) + 1900;
937 	if (year < 1970)
938 		year += 100;
939 #endif
940 	if (year < 1970) {
941 		crit_exit();
942 		goto wrong_time;
943 	}
944 	month = readrtc(RTC_MONTH);
945 	for (m = 1; m < month; m++)
946 		days += daysinmonth[m-1];
947 	if ((month > 2) && LEAPYEAR(year))
948 		days ++;
949 	days += readrtc(RTC_DAY) - 1;
950 	for (y = 1970; y < year; y++)
951 		days += DAYSPERYEAR + LEAPYEAR(y);
952 	sec = ((( days * 24 +
953 		  readrtc(RTC_HRS)) * 60 +
954 		  readrtc(RTC_MIN)) * 60 +
955 		  readrtc(RTC_SEC));
956 	/* sec now contains the number of seconds, since Jan 1 1970,
957 	   in the local time zone */
958 
959 	sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
960 
961 	y = (int)(time_second - sec);
962 	if (y <= -2 || y >= 2) {
963 		/* badly off, adjust it */
964 		ts.tv_sec = sec;
965 		ts.tv_nsec = 0;
966 		set_timeofday(&ts);
967 	}
968 	rtc_loaded = 1;
969 	crit_exit();
970 	return;
971 
972 wrong_time:
973 	kprintf("Invalid time in real time clock.\n");
974 	kprintf("Check and reset the date immediately!\n");
975 }
976 
977 /*
978  * Write system time back to RTC
979  */
980 void
981 resettodr(void)
982 {
983 	struct timeval tv;
984 	unsigned long tm;
985 	int m;
986 	int y;
987 
988 	if (disable_rtc_set)
989 		return;
990 
991 	microtime(&tv);
992 	tm = tv.tv_sec;
993 
994 	crit_enter();
995 	/* Disable RTC updates and interrupts. */
996 	writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR);
997 
998 	/* Calculate local time to put in RTC */
999 
1000 	tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
1001 
1002 	writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60;	/* Write back Seconds */
1003 	writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60;	/* Write back Minutes */
1004 	writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24;	/* Write back Hours   */
1005 
1006 	/* We have now the days since 01-01-1970 in tm */
1007 	writertc(RTC_WDAY, (tm+4)%7);			/* Write back Weekday */
1008 	for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y);
1009 	     tm >= m;
1010 	     y++,      m = DAYSPERYEAR + LEAPYEAR(y))
1011 	     tm -= m;
1012 
1013 	/* Now we have the years in y and the day-of-the-year in tm */
1014 	writertc(RTC_YEAR, bin2bcd(y%100));		/* Write back Year    */
1015 #ifdef USE_RTC_CENTURY
1016 	writertc(RTC_CENTURY, bin2bcd(y/100));		/* ... and Century    */
1017 #endif
1018 	for (m = 0; ; m++) {
1019 		int ml;
1020 
1021 		ml = daysinmonth[m];
1022 		if (m == 1 && LEAPYEAR(y))
1023 			ml++;
1024 		if (tm < ml)
1025 			break;
1026 		tm -= ml;
1027 	}
1028 
1029 	writertc(RTC_MONTH, bin2bcd(m + 1));            /* Write back Month   */
1030 	writertc(RTC_DAY, bin2bcd(tm + 1));             /* Write back Month Day */
1031 
1032 	/* Reenable RTC updates and interrupts. */
1033 	writertc(RTC_STATUSB, rtc_statusb);
1034 	crit_exit();
1035 }
1036 
1037 static int
1038 i8254_ioapic_trial(int irq, struct cputimer_intr *cti)
1039 {
1040 	sysclock_t base;
1041 	long lastcnt;
1042 
1043 	/*
1044 	 * Following code assumes the 8254 is the cpu timer,
1045 	 * so make sure it is.
1046 	 */
1047 	KKASSERT(sys_cputimer == &i8254_cputimer);
1048 	KKASSERT(cti == &i8254_cputimer_intr);
1049 
1050 	lastcnt = get_interrupt_counter(irq, mycpuid);
1051 
1052 	/*
1053 	 * Force an 8254 Timer0 interrupt and wait 1/100s for
1054 	 * it to happen, then see if we got it.
1055 	 */
1056 	kprintf("IOAPIC: testing 8254 interrupt delivery\n");
1057 
1058 	i8254_intr_reload(cti, 2);
1059 	base = sys_cputimer->count();
1060 	while (sys_cputimer->count() - base < sys_cputimer->freq / 100)
1061 		; /* nothing */
1062 
1063 	if (get_interrupt_counter(irq, mycpuid) - lastcnt == 0)
1064 		return ENOENT;
1065 	return 0;
1066 }
1067 
1068 /*
1069  * Start both clocks running.  DragonFly note: the stat clock is no longer
1070  * used.  Instead, 8254 based systimers are used for all major clock
1071  * interrupts.
1072  */
1073 static void
1074 i8254_intr_initclock(struct cputimer_intr *cti, boolean_t selected)
1075 {
1076 	void *clkdesc = NULL;
1077 	int irq = 0, mixed_mode = 0, error;
1078 
1079 	KKASSERT(mycpuid == 0);
1080 	callout_init_mp(&sysbeepstop_ch);
1081 
1082 	if (!selected && i8254_intr_disable)
1083 		goto nointr;
1084 
1085 	/*
1086 	 * The stat interrupt mask is different without the
1087 	 * statistics clock.  Also, don't set the interrupt
1088 	 * flag which would normally cause the RTC to generate
1089 	 * interrupts.
1090 	 */
1091 	rtc_statusb = RTCSB_24HR;
1092 
1093 	/* Finish initializing 8254 timer 0. */
1094 	if (ioapic_enable) {
1095 		irq = machintr_legacy_intr_find(0, INTR_TRIGGER_EDGE,
1096 			INTR_POLARITY_HIGH);
1097 		if (irq < 0) {
1098 mixed_mode_setup:
1099 			error = ioapic_conf_legacy_extint(0);
1100 			if (!error) {
1101 				irq = machintr_legacy_intr_find(0,
1102 				    INTR_TRIGGER_EDGE, INTR_POLARITY_HIGH);
1103 				if (irq < 0)
1104 					error = ENOENT;
1105 			}
1106 
1107 			if (error) {
1108 				if (!selected) {
1109 					kprintf("IOAPIC: setup mixed mode for "
1110 						"irq 0 failed: %d\n", error);
1111 					goto nointr;
1112 				} else {
1113 					panic("IOAPIC: setup mixed mode for "
1114 					      "irq 0 failed: %d\n", error);
1115 				}
1116 			}
1117 			mixed_mode = 1;
1118 		}
1119 		clkdesc = register_int(irq, clkintr, NULL, "clk",
1120 				       NULL,
1121 				       INTR_EXCL | INTR_CLOCK |
1122 				       INTR_NOPOLL | INTR_MPSAFE |
1123 				       INTR_NOENTROPY, 0);
1124 	} else {
1125 		register_int(0, clkintr, NULL, "clk", NULL,
1126 			     INTR_EXCL | INTR_CLOCK |
1127 			     INTR_NOPOLL | INTR_MPSAFE |
1128 			     INTR_NOENTROPY, 0);
1129 	}
1130 
1131 	/* Initialize RTC. */
1132 	writertc(RTC_STATUSA, rtc_statusa);
1133 	writertc(RTC_STATUSB, RTCSB_24HR);
1134 
1135 	if (ioapic_enable) {
1136 		error = i8254_ioapic_trial(irq, cti);
1137 		if (error) {
1138 			if (mixed_mode) {
1139 				if (!selected) {
1140 					kprintf("IOAPIC: mixed mode for irq %d "
1141 						"trial failed: %d\n",
1142 						irq, error);
1143 					goto nointr;
1144 				} else {
1145 					panic("IOAPIC: mixed mode for irq %d "
1146 					      "trial failed: %d\n", irq, error);
1147 				}
1148 			} else {
1149 				kprintf("IOAPIC: warning 8254 is not connected "
1150 					"to the correct pin, try mixed mode\n");
1151 				unregister_int(clkdesc, 0);
1152 				goto mixed_mode_setup;
1153 			}
1154 		}
1155 	}
1156 	return;
1157 
1158 nointr:
1159 	i8254_nointr = 1; /* don't try to register again */
1160 	cputimer_intr_deregister(cti);
1161 }
1162 
1163 void
1164 setstatclockrate(int newhz)
1165 {
1166 	if (newhz == RTC_PROFRATE)
1167 		rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF;
1168 	else
1169 		rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
1170 	writertc(RTC_STATUSA, rtc_statusa);
1171 }
1172 
1173 #if 0
1174 static unsigned
1175 tsc_get_timecount(struct timecounter *tc)
1176 {
1177 	return (rdtsc());
1178 }
1179 #endif
1180 
1181 #ifdef KERN_TIMESTAMP
1182 #define KERN_TIMESTAMP_SIZE 16384
1183 static u_long tsc[KERN_TIMESTAMP_SIZE] ;
1184 SYSCTL_OPAQUE(_debug, OID_AUTO, timestamp, CTLFLAG_RD, tsc,
1185 	sizeof(tsc), "LU", "Kernel timestamps");
1186 void
1187 _TSTMP(u_int32_t x)
1188 {
1189 	static int i;
1190 
1191 	tsc[i] = (u_int32_t)rdtsc();
1192 	tsc[i+1] = x;
1193 	i = i + 2;
1194 	if (i >= KERN_TIMESTAMP_SIZE)
1195 		i = 0;
1196 	tsc[i] = 0; /* mark last entry */
1197 }
1198 #endif /* KERN_TIMESTAMP */
1199 
1200 /*
1201  *
1202  */
1203 
1204 static int
1205 hw_i8254_timestamp(SYSCTL_HANDLER_ARGS)
1206 {
1207     sysclock_t count;
1208     uint64_t tscval;
1209     char buf[32];
1210 
1211     crit_enter();
1212     if (sys_cputimer == &i8254_cputimer)
1213 	count = sys_cputimer->count();
1214     else
1215 	count = 0;
1216     if (tsc_present)
1217 	tscval = rdtsc();
1218     else
1219 	tscval = 0;
1220     crit_exit();
1221     ksnprintf(buf, sizeof(buf), "%08x %016llx", count, (long long)tscval);
1222     return(SYSCTL_OUT(req, buf, strlen(buf) + 1));
1223 }
1224 
1225 struct tsc_mpsync_arg {
1226 	volatile uint64_t	tsc_target;
1227 	volatile int		tsc_mpsync;
1228 };
1229 
1230 struct tsc_mpsync_thr {
1231 	volatile int		tsc_done_cnt;
1232 	volatile int		tsc_mpsync_cnt;
1233 };
1234 
1235 static void
1236 tsc_mpsync_test_remote(void *xarg)
1237 {
1238 	struct tsc_mpsync_arg *arg = xarg;
1239 	uint64_t tsc;
1240 
1241 	tsc = rdtsc_ordered();
1242 	if (tsc < arg->tsc_target)
1243 		arg->tsc_mpsync = 0;
1244 }
1245 
1246 static void
1247 tsc_mpsync_test_loop(struct tsc_mpsync_arg *arg)
1248 {
1249 	struct globaldata *gd = mycpu;
1250 	uint64_t test_end, test_begin;
1251 	u_int i;
1252 
1253 	if (bootverbose) {
1254 		kprintf("cpu%d: TSC testing MP synchronization ...\n",
1255 		    gd->gd_cpuid);
1256 	}
1257 
1258 	test_begin = rdtsc_ordered();
1259 	/* Run test for 100ms */
1260 	test_end = test_begin + (tsc_frequency / 10);
1261 
1262 	arg->tsc_mpsync = 1;
1263 	arg->tsc_target = test_begin;
1264 
1265 #define TSC_TEST_TRYMAX		1000000	/* Make sure we could stop */
1266 #define TSC_TEST_TRYMIN		50000
1267 
1268 	for (i = 0; i < TSC_TEST_TRYMAX; ++i) {
1269 		struct lwkt_cpusync cs;
1270 
1271 		crit_enter();
1272 		lwkt_cpusync_init(&cs, gd->gd_other_cpus,
1273 		    tsc_mpsync_test_remote, arg);
1274 		lwkt_cpusync_interlock(&cs);
1275 		arg->tsc_target = rdtsc_ordered();
1276 		cpu_mfence();
1277 		lwkt_cpusync_deinterlock(&cs);
1278 		crit_exit();
1279 
1280 		if (!arg->tsc_mpsync) {
1281 			kprintf("cpu%d: TSC is not MP synchronized @%u\n",
1282 			    gd->gd_cpuid, i);
1283 			break;
1284 		}
1285 		if (arg->tsc_target > test_end && i >= TSC_TEST_TRYMIN)
1286 			break;
1287 	}
1288 
1289 #undef TSC_TEST_TRYMIN
1290 #undef TSC_TEST_TRYMAX
1291 
1292 	if (arg->tsc_target == test_begin) {
1293 		kprintf("cpu%d: TSC does not tick?!\n", gd->gd_cpuid);
1294 		/* XXX disable TSC? */
1295 		tsc_invariant = 0;
1296 		arg->tsc_mpsync = 0;
1297 		return;
1298 	}
1299 
1300 	if (arg->tsc_mpsync && bootverbose) {
1301 		kprintf("cpu%d: TSC is MP synchronized after %u tries\n",
1302 		    gd->gd_cpuid, i);
1303 	}
1304 }
1305 
1306 static void
1307 tsc_mpsync_ap_thread(void *xthr)
1308 {
1309 	struct tsc_mpsync_thr *thr = xthr;
1310 	struct tsc_mpsync_arg arg;
1311 
1312 	tsc_mpsync_test_loop(&arg);
1313 	if (arg.tsc_mpsync) {
1314 		atomic_add_int(&thr->tsc_mpsync_cnt, 1);
1315 		cpu_sfence();
1316 	}
1317 	atomic_add_int(&thr->tsc_done_cnt, 1);
1318 
1319 	lwkt_exit();
1320 }
1321 
1322 static void
1323 tsc_mpsync_test(void)
1324 {
1325 	struct tsc_mpsync_arg arg;
1326 
1327 	if (!tsc_invariant) {
1328 		/* Not even invariant TSC */
1329 		return;
1330 	}
1331 
1332 	if (ncpus == 1) {
1333 		/* Only one CPU */
1334 		tsc_mpsync = 1;
1335 		return;
1336 	}
1337 
1338 	/*
1339 	 * Forcing can be used w/qemu to reduce contention
1340 	 */
1341 	TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_mpsync);
1342 
1343 	if (tsc_mpsync == 0) {
1344 		switch(cpu_vendor_id) {
1345 		case CPU_VENDOR_INTEL:
1346 			/*
1347 			 * Intel probably works
1348 			 */
1349 			break;
1350 		case CPU_VENDOR_AMD:
1351 			/*
1352 			 * AMD < Ryzen probably doesn't work
1353 			 */
1354 			if (CPUID_TO_FAMILY(cpu_id) < 0x17)
1355 				return;
1356 			break;
1357 		default:
1358 			/* probably won't work */
1359 			return;
1360 		}
1361 	}
1362 
1363 	/*
1364 	 * Test even if forced above.  If forced, we will use the TSC
1365 	 * even if the test fails.
1366 	 */
1367 	kprintf("TSC testing MP synchronization ...\n");
1368 
1369 	tsc_mpsync_test_loop(&arg);
1370 	if (arg.tsc_mpsync) {
1371 		struct tsc_mpsync_thr thr;
1372 		int cpu;
1373 
1374 		/*
1375 		 * Test TSC MP synchronization on APs.
1376 		 */
1377 
1378 		thr.tsc_done_cnt = 1;
1379 		thr.tsc_mpsync_cnt = 1;
1380 
1381 		for (cpu = 0; cpu < ncpus; ++cpu) {
1382 			if (cpu == mycpuid)
1383 				continue;
1384 
1385 			lwkt_create(tsc_mpsync_ap_thread, &thr, NULL,
1386 			    NULL, 0, cpu, "tsc mpsync %d", cpu);
1387 		}
1388 
1389 		while (thr.tsc_done_cnt != ncpus) {
1390 			cpu_pause();
1391 			cpu_lfence();
1392 		}
1393 		if (thr.tsc_mpsync_cnt == ncpus)
1394 			tsc_mpsync = 1;
1395 	}
1396 
1397 	if (tsc_mpsync)
1398 		kprintf("TSC is MP synchronized\n");
1399 	else
1400 		kprintf("TSC is not MP synchronized\n");
1401 }
1402 SYSINIT(tsc_mpsync, SI_BOOT2_FINISH_SMP, SI_ORDER_ANY, tsc_mpsync_test, NULL);
1403 
1404 #define TSC_CPUTIMER_FREQMAX	128000000	/* 128Mhz */
1405 
1406 static int tsc_cputimer_shift;
1407 
1408 static void
1409 tsc_cputimer_construct(struct cputimer *timer, sysclock_t oldclock)
1410 {
1411 	timer->base = 0;
1412 	timer->base = oldclock - timer->count();
1413 }
1414 
1415 static __inline sysclock_t
1416 tsc_cputimer_count(void)
1417 {
1418 	uint64_t tsc;
1419 
1420 	tsc = rdtsc();
1421 	tsc >>= tsc_cputimer_shift;
1422 
1423 	return (tsc + tsc_cputimer.base);
1424 }
1425 
1426 static sysclock_t
1427 tsc_cputimer_count_lfence(void)
1428 {
1429 	cpu_lfence();
1430 	return tsc_cputimer_count();
1431 }
1432 
1433 static sysclock_t
1434 tsc_cputimer_count_mfence(void)
1435 {
1436 	cpu_mfence();
1437 	return tsc_cputimer_count();
1438 }
1439 
1440 static uint64_t
1441 tsc_cpucounter_count_lfence(void)
1442 {
1443 
1444 	cpu_lfence();
1445 	return (rdtsc());
1446 }
1447 
1448 static uint64_t
1449 tsc_cpucounter_count_mfence(void)
1450 {
1451 
1452 	cpu_mfence();
1453 	return (rdtsc());
1454 }
1455 
1456 static void
1457 tsc_cputimer_register(void)
1458 {
1459 	uint64_t freq;
1460 	int enable = 1;
1461 
1462 	if (!tsc_mpsync) {
1463 		if (tsc_invariant) {
1464 			/* Per-cpu cpucounter still works. */
1465 			goto regcnt;
1466 		}
1467 		return;
1468 	}
1469 
1470 	TUNABLE_INT_FETCH("hw.tsc_cputimer_enable", &enable);
1471 	if (!enable)
1472 		return;
1473 
1474 	freq = tsc_frequency;
1475 	while (freq > TSC_CPUTIMER_FREQMAX) {
1476 		freq >>= 1;
1477 		++tsc_cputimer_shift;
1478 	}
1479 	kprintf("TSC: cputimer freq %ju, shift %d\n",
1480 	    (uintmax_t)freq, tsc_cputimer_shift);
1481 
1482 	tsc_cputimer.freq = freq;
1483 
1484 	if (cpu_vendor_id == CPU_VENDOR_INTEL)
1485 		tsc_cputimer.count = tsc_cputimer_count_lfence;
1486 	else
1487 		tsc_cputimer.count = tsc_cputimer_count_mfence; /* safe bet */
1488 
1489 	cputimer_register(&tsc_cputimer);
1490 	cputimer_select(&tsc_cputimer, 0);
1491 
1492 	tsc_cpucounter.flags |= CPUCOUNTER_FLAG_MPSYNC;
1493 regcnt:
1494 	tsc_cpucounter.freq = tsc_frequency;
1495 	if (cpu_vendor_id == CPU_VENDOR_INTEL) {
1496 		tsc_cpucounter.count =
1497 		    tsc_cpucounter_count_lfence;
1498 	} else {
1499 		tsc_cpucounter.count =
1500 		    tsc_cpucounter_count_mfence; /* safe bet */
1501 	}
1502 	cpucounter_register(&tsc_cpucounter);
1503 }
1504 SYSINIT(tsc_cputimer_reg, SI_BOOT2_POST_SMP, SI_ORDER_FIRST,
1505 	tsc_cputimer_register, NULL);
1506 
1507 SYSCTL_NODE(_hw, OID_AUTO, i8254, CTLFLAG_RW, 0, "I8254");
1508 SYSCTL_UINT(_hw_i8254, OID_AUTO, freq, CTLFLAG_RD, &i8254_cputimer.freq, 0,
1509 	    "frequency");
1510 SYSCTL_PROC(_hw_i8254, OID_AUTO, timestamp, CTLTYPE_STRING|CTLFLAG_RD,
1511 	    0, 0, hw_i8254_timestamp, "A", "");
1512 
1513 SYSCTL_INT(_hw, OID_AUTO, tsc_present, CTLFLAG_RD,
1514 	    &tsc_present, 0, "TSC Available");
1515 SYSCTL_INT(_hw, OID_AUTO, tsc_invariant, CTLFLAG_RD,
1516 	    &tsc_invariant, 0, "Invariant TSC");
1517 SYSCTL_INT(_hw, OID_AUTO, tsc_mpsync, CTLFLAG_RD,
1518 	    &tsc_mpsync, 0, "TSC is synchronized across CPUs");
1519 SYSCTL_QUAD(_hw, OID_AUTO, tsc_frequency, CTLFLAG_RD,
1520 	    &tsc_frequency, 0, "TSC Frequency");
1521