xref: /dragonfly/sys/platform/pc64/isa/clock.c (revision 689ddcfa)
1 /*-
2  * Copyright (c) 1990 The Regents of the University of California.
3  * Copyright (c) 2008 The DragonFly Project.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * William Jolitz and Don Ahn.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	from: @(#)clock.c	7.2 (Berkeley) 5/12/91
34  * $FreeBSD: src/sys/i386/isa/clock.c,v 1.149.2.6 2002/11/02 04:41:50 iwasaki Exp $
35  */
36 
37 /*
38  * Routines to handle clock hardware.
39  */
40 
41 /*
42  * inittodr, settodr and support routines written
43  * by Christoph Robitschko <chmr@edvz.tu-graz.ac.at>
44  *
45  * reintroduced and updated by Chris Stenton <chris@gnome.co.uk> 8/10/94
46  */
47 
48 #if 0
49 #include "opt_clock.h"
50 #endif
51 
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/eventhandler.h>
55 #include <sys/time.h>
56 #include <sys/kernel.h>
57 #include <sys/bus.h>
58 #include <sys/sysctl.h>
59 #include <sys/cons.h>
60 #include <sys/kbio.h>
61 #include <sys/systimer.h>
62 #include <sys/globaldata.h>
63 #include <sys/machintr.h>
64 #include <sys/interrupt.h>
65 
66 #include <sys/thread2.h>
67 
68 #include <machine/clock.h>
69 #include <machine/cputypes.h>
70 #include <machine/frame.h>
71 #include <machine/ipl.h>
72 #include <machine/limits.h>
73 #include <machine/md_var.h>
74 #include <machine/psl.h>
75 #include <machine/segments.h>
76 #include <machine/smp.h>
77 #include <machine/specialreg.h>
78 #include <machine/intr_machdep.h>
79 
80 #include <machine_base/apic/ioapic.h>
81 #include <machine_base/apic/ioapic_abi.h>
82 #include <machine_base/icu/icu.h>
83 #include <bus/isa/isa.h>
84 #include <bus/isa/rtc.h>
85 #include <machine_base/isa/timerreg.h>
86 
87 static void i8254_restore(void);
88 static void resettodr_on_shutdown(void *arg __unused);
89 
90 /*
91  * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
92  * can use a simple formula for leap years.
93  */
94 #define	LEAPYEAR(y) ((u_int)(y) % 4 == 0)
95 #define DAYSPERYEAR   (31+28+31+30+31+30+31+31+30+31+30+31)
96 
97 #ifndef TIMER_FREQ
98 #define TIMER_FREQ   1193182
99 #endif
100 
101 static uint8_t i8254_walltimer_sel;
102 static uint16_t i8254_walltimer_cntr;
103 
104 int	adjkerntz;		/* local offset from GMT in seconds */
105 int	disable_rtc_set;	/* disable resettodr() if != 0 */
106 int	tsc_present;
107 int	tsc_invariant;
108 int	tsc_mpsync;
109 int	tsc_is_broken;
110 int	wall_cmos_clock;	/* wall CMOS clock assumed if != 0 */
111 int	timer0_running;
112 tsc_uclock_t tsc_frequency;
113 tsc_uclock_t tsc_oneus_approx;	/* always at least 1, approx only */
114 
115 enum tstate { RELEASED, ACQUIRED };
116 enum tstate timer0_state;
117 enum tstate timer1_state;
118 enum tstate timer2_state;
119 
120 static	int	beeping = 0;
121 static	const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
122 static	u_char	rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
123 static	u_char	rtc_statusb = RTCSB_24HR | RTCSB_PINTR;
124 static  int	rtc_loaded;
125 
126 static int i8254_cputimer_div;
127 
128 static int i8254_nointr;
129 static int i8254_intr_disable = 1;
130 TUNABLE_INT("hw.i8254.intr_disable", &i8254_intr_disable);
131 
132 static int calibrate_timers_with_rtc = 0;
133 TUNABLE_INT("hw.calibrate_timers_with_rtc", &calibrate_timers_with_rtc);
134 
135 static struct callout sysbeepstop_ch;
136 
137 static sysclock_t i8254_cputimer_count(void);
138 static void i8254_cputimer_construct(struct cputimer *cputimer, sysclock_t last);
139 static void i8254_cputimer_destruct(struct cputimer *cputimer);
140 
141 static struct cputimer	i8254_cputimer = {
142     .next		= SLIST_ENTRY_INITIALIZER,
143     .name		= "i8254",
144     .pri		= CPUTIMER_PRI_8254,
145     .type		= 0,	/* determined later */
146     .count		= i8254_cputimer_count,
147     .fromhz		= cputimer_default_fromhz,
148     .fromus		= cputimer_default_fromus,
149     .construct		= i8254_cputimer_construct,
150     .destruct		= i8254_cputimer_destruct,
151     .freq		= TIMER_FREQ
152 };
153 
154 static sysclock_t tsc_cputimer_count_mfence(void);
155 static sysclock_t tsc_cputimer_count_lfence(void);
156 static void tsc_cputimer_construct(struct cputimer *, sysclock_t);
157 
158 static struct cputimer	tsc_cputimer = {
159     .next		= SLIST_ENTRY_INITIALIZER,
160     .name		= "TSC",
161     .pri		= CPUTIMER_PRI_TSC,
162     .type		= CPUTIMER_TSC,
163     .count		= NULL,	/* determined later */
164     .fromhz		= cputimer_default_fromhz,
165     .fromus		= cputimer_default_fromus,
166     .construct		= tsc_cputimer_construct,
167     .destruct		= cputimer_default_destruct,
168     .freq		= 0	/* determined later */
169 };
170 
171 static struct cpucounter tsc_cpucounter = {
172     .freq		= 0,	/* determined later */
173     .count		= NULL,	/* determined later */
174     .flags		= 0,	/* adjusted later */
175     .prio		= CPUCOUNTER_PRIO_TSC,
176     .type		= CPUCOUNTER_TSC
177 };
178 
179 static void i8254_intr_reload(struct cputimer_intr *, sysclock_t);
180 static void i8254_intr_config(struct cputimer_intr *, const struct cputimer *);
181 static void i8254_intr_initclock(struct cputimer_intr *, boolean_t);
182 
183 static struct cputimer_intr i8254_cputimer_intr = {
184     .freq = TIMER_FREQ,
185     .reload = i8254_intr_reload,
186     .enable = cputimer_intr_default_enable,
187     .config = i8254_intr_config,
188     .restart = cputimer_intr_default_restart,
189     .pmfixup = cputimer_intr_default_pmfixup,
190     .initclock = i8254_intr_initclock,
191     .pcpuhand = NULL,
192     .next = SLIST_ENTRY_INITIALIZER,
193     .name = "i8254",
194     .type = CPUTIMER_INTR_8254,
195     .prio = CPUTIMER_INTR_PRIO_8254,
196     .caps = CPUTIMER_INTR_CAP_PS,
197     .priv = NULL
198 };
199 
200 /*
201  * timer0 clock interrupt.  Timer0 is in one-shot mode and has stopped
202  * counting as of this interrupt.  We use timer1 in free-running mode (not
203  * generating any interrupts) as our main counter.  Each cpu has timeouts
204  * pending.
205  *
206  * This code is INTR_MPSAFE and may be called without the BGL held.
207  */
208 static void
209 clkintr(void *dummy, void *frame_arg)
210 {
211 	static sysclock_t sysclock_count;	/* NOTE! Must be static */
212 	struct globaldata *gd = mycpu;
213 	struct globaldata *gscan;
214 	int n;
215 
216 	/*
217 	 * SWSTROBE mode is a one-shot, the timer is no longer running
218 	 */
219 	timer0_running = 0;
220 
221 	/*
222 	 * XXX the dispatcher needs work.  right now we call systimer_intr()
223 	 * directly or via IPI for any cpu with systimers queued, which is
224 	 * usually *ALL* of them.  We need to use the LAPIC timer for this.
225 	 */
226 	sysclock_count = sys_cputimer->count();
227 	for (n = 0; n < ncpus; ++n) {
228 	    gscan = globaldata_find(n);
229 	    if (TAILQ_FIRST(&gscan->gd_systimerq) == NULL)
230 		continue;
231 	    if (gscan != gd) {
232 		lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr,
233 				&sysclock_count, 1);
234 	    } else {
235 		systimer_intr(&sysclock_count, 0, frame_arg);
236 	    }
237 	}
238 }
239 
240 
241 /*
242  * NOTE! not MP safe.
243  */
244 int
245 acquire_timer2(int mode)
246 {
247 	if (timer2_state != RELEASED)
248 		return (-1);
249 	timer2_state = ACQUIRED;
250 
251 	/*
252 	 * This access to the timer registers is as atomic as possible
253 	 * because it is a single instruction.  We could do better if we
254 	 * knew the rate.
255 	 */
256 	outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f));
257 	return (0);
258 }
259 
260 int
261 release_timer2(void)
262 {
263 	if (timer2_state != ACQUIRED)
264 		return (-1);
265 	outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT);
266 	timer2_state = RELEASED;
267 	return (0);
268 }
269 
270 #include "opt_ddb.h"
271 #ifdef DDB
272 #include <ddb/ddb.h>
273 
274 DB_SHOW_COMMAND(rtc, rtc)
275 {
276 	kprintf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n",
277 	       rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY),
278 	       rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC),
279 	       rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR));
280 }
281 #endif /* DDB */
282 
283 /*
284  * Return the current cpu timer count as a 32 bit integer.
285  */
286 static
287 sysclock_t
288 i8254_cputimer_count(void)
289 {
290 	static uint16_t cputimer_last;
291 	uint16_t count;
292 	sysclock_t ret;
293 
294 	clock_lock();
295 	outb(TIMER_MODE, i8254_walltimer_sel | TIMER_LATCH);
296 	count = (uint8_t)inb(i8254_walltimer_cntr);		/* get countdown */
297 	count |= ((uint8_t)inb(i8254_walltimer_cntr) << 8);
298 	count = -count;					/* -> countup */
299 	if (count < cputimer_last)			/* rollover */
300 		i8254_cputimer.base += 0x00010000;
301 	ret = i8254_cputimer.base | count;
302 	cputimer_last = count;
303 	clock_unlock();
304 	return(ret);
305 }
306 
307 /*
308  * This function is called whenever the system timebase changes, allowing
309  * us to calculate what is needed to convert a system timebase tick
310  * into an 8254 tick for the interrupt timer.  If we can convert to a
311  * simple shift, multiplication, or division, we do so.  Otherwise 64
312  * bit arithmatic is required every time the interrupt timer is reloaded.
313  */
314 static void
315 i8254_intr_config(struct cputimer_intr *cti, const struct cputimer *timer)
316 {
317     int freq;
318     int div;
319 
320     /*
321      * Will a simple divide do the trick?
322      */
323     div = (timer->freq + (cti->freq / 2)) / cti->freq;
324     freq = cti->freq * div;
325 
326     if (freq >= timer->freq - 1 && freq <= timer->freq + 1)
327 	i8254_cputimer_div = div;
328     else
329 	i8254_cputimer_div = 0;
330 }
331 
332 /*
333  * Reload for the next timeout.  It is possible for the reload value
334  * to be 0 or negative, indicating that an immediate timer interrupt
335  * is desired.  For now make the minimum 2 ticks.
336  *
337  * We may have to convert from the system timebase to the 8254 timebase.
338  */
339 static void
340 i8254_intr_reload(struct cputimer_intr *cti, sysclock_t reload)
341 {
342     uint16_t count;
343 
344     if (i8254_cputimer_div)
345 	reload /= i8254_cputimer_div;
346     else
347 	reload = (int64_t)reload * cti->freq / sys_cputimer->freq;
348 
349     if ((int)reload < 2)
350 	reload = 2;
351 
352     clock_lock();
353     if (timer0_running) {
354 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);	/* count-down timer */
355 	count = (uint8_t)inb(TIMER_CNTR0);		/* lsb */
356 	count |= ((uint8_t)inb(TIMER_CNTR0) << 8);	/* msb */
357 	if (reload < count) {
358 	    outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
359 	    outb(TIMER_CNTR0, (uint8_t)reload); 	/* lsb */
360 	    outb(TIMER_CNTR0, (uint8_t)(reload >> 8));	/* msb */
361 	}
362     } else {
363 	timer0_running = 1;
364 	if (reload > 0xFFFF)
365 	    reload = 0;		/* full count */
366 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
367 	outb(TIMER_CNTR0, (uint8_t)reload); 		/* lsb */
368 	outb(TIMER_CNTR0, (uint8_t)(reload >> 8));	/* msb */
369     }
370     clock_unlock();
371 }
372 
373 /*
374  * DELAY(usec)	     - Spin for the specified number of microseconds.
375  * DRIVERSLEEP(usec) - Spin for the specified number of microseconds,
376  *		       but do a thread switch in the loop
377  *
378  * Relies on timer 1 counting down from (cputimer_freq / hz)
379  * Note: timer had better have been programmed before this is first used!
380  */
381 static void
382 DODELAY(int n, int doswitch)
383 {
384 	ssysclock_t delta, ticks_left;
385 	sysclock_t prev_tick, tick;
386 
387 #ifdef DELAYDEBUG
388 	int getit_calls = 1;
389 	int n1;
390 	static int state = 0;
391 
392 	if (state == 0) {
393 		state = 1;
394 		for (n1 = 1; n1 <= 10000000; n1 *= 10)
395 			DELAY(n1);
396 		state = 2;
397 	}
398 	if (state == 1)
399 		kprintf("DELAY(%d)...", n);
400 #endif
401 	/*
402 	 * Guard against the timer being uninitialized if we are called
403 	 * early for console i/o.
404 	 */
405 	if (timer0_state == RELEASED)
406 		i8254_restore();
407 
408 	/*
409 	 * Read the counter first, so that the rest of the setup overhead is
410 	 * counted.  Then calculate the number of hardware timer ticks
411 	 * required, rounding up to be sure we delay at least the requested
412 	 * number of microseconds.
413 	 */
414 	prev_tick = sys_cputimer->count();
415 	ticks_left = ((u_int)n * (int64_t)sys_cputimer->freq + 999999) /
416 		     1000000;
417 
418 	/*
419 	 * Loop until done.
420 	 */
421 	while (ticks_left > 0) {
422 		tick = sys_cputimer->count();
423 #ifdef DELAYDEBUG
424 		++getit_calls;
425 #endif
426 		delta = tick - prev_tick;
427 		prev_tick = tick;
428 		if (delta < 0)
429 			delta = 0;
430 		ticks_left -= delta;
431 		if (doswitch && ticks_left > 0)
432 			lwkt_switch();
433 		cpu_pause();
434 	}
435 #ifdef DELAYDEBUG
436 	if (state == 1)
437 		kprintf(" %d calls to getit() at %d usec each\n",
438 		       getit_calls, (n + 5) / getit_calls);
439 #endif
440 }
441 
442 /*
443  * DELAY() never switches.
444  */
445 void
446 DELAY(int n)
447 {
448 	DODELAY(n, 0);
449 }
450 
451 /*
452  * Returns non-zero if the specified time period has elapsed.  Call
453  * first with last_clock set to 0.
454  */
455 int
456 CHECKTIMEOUT(TOTALDELAY *tdd)
457 {
458 	sysclock_t delta;
459 	int us;
460 
461 	if (tdd->started == 0) {
462 		if (timer0_state == RELEASED)
463 			i8254_restore();
464 		tdd->last_clock = sys_cputimer->count();
465 		tdd->started = 1;
466 		return(0);
467 	}
468 	delta = sys_cputimer->count() - tdd->last_clock;
469 	us = (u_int64_t)delta * (u_int64_t)1000000 /
470 	     (u_int64_t)sys_cputimer->freq;
471 	tdd->last_clock += (u_int64_t)us * (u_int64_t)sys_cputimer->freq /
472 			   1000000;
473 	tdd->us -= us;
474 	return (tdd->us < 0);
475 }
476 
477 
478 /*
479  * DRIVERSLEEP() does not switch if called with a spinlock held or
480  * from a hard interrupt.
481  */
482 void
483 DRIVERSLEEP(int usec)
484 {
485 	globaldata_t gd = mycpu;
486 
487 	if (gd->gd_intr_nesting_level || gd->gd_spinlocks) {
488 		DODELAY(usec, 0);
489 	} else {
490 		DODELAY(usec, 1);
491 	}
492 }
493 
494 static void
495 sysbeepstop(void *chan)
496 {
497 	outb(IO_PPI, inb(IO_PPI)&0xFC);	/* disable counter2 output to speaker */
498 	beeping = 0;
499 	release_timer2();
500 }
501 
502 int
503 sysbeep(int pitch, int period)
504 {
505 	if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT))
506 		return(-1);
507 	if (sysbeep_enable == 0)
508 		return(-1);
509 	/*
510 	 * Nobody else is using timer2, we do not need the clock lock
511 	 */
512 	outb(TIMER_CNTR2, pitch);
513 	outb(TIMER_CNTR2, (pitch>>8));
514 	if (!beeping) {
515 		/* enable counter2 output to speaker */
516 		outb(IO_PPI, inb(IO_PPI) | 3);
517 		beeping = period;
518 		callout_reset(&sysbeepstop_ch, period, sysbeepstop, NULL);
519 	}
520 	return (0);
521 }
522 
523 /*
524  * RTC support routines
525  */
526 
527 int
528 rtcin(int reg)
529 {
530 	u_char val;
531 
532 	crit_enter();
533 	outb(IO_RTC, reg);
534 	inb(0x84);
535 	val = inb(IO_RTC + 1);
536 	inb(0x84);
537 	crit_exit();
538 	return (val);
539 }
540 
541 static __inline void
542 writertc(u_char reg, u_char val)
543 {
544 	crit_enter();
545 	inb(0x84);
546 	outb(IO_RTC, reg);
547 	inb(0x84);
548 	outb(IO_RTC + 1, val);
549 	inb(0x84);		/* XXX work around wrong order in rtcin() */
550 	crit_exit();
551 }
552 
553 static __inline int
554 readrtc(int port)
555 {
556 	return(bcd2bin(rtcin(port)));
557 }
558 
559 static u_int
560 calibrate_clocks(void)
561 {
562 	tsc_uclock_t old_tsc;
563 	u_int tot_count;
564 	sysclock_t count, prev_count;
565 	int sec, start_sec, timeout;
566 
567 	if (bootverbose)
568 	        kprintf("Calibrating clock(s) ...\n");
569 	if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
570 		goto fail;
571 	timeout = 100000000;
572 
573 	/* Read the mc146818A seconds counter. */
574 	for (;;) {
575 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
576 			sec = rtcin(RTC_SEC);
577 			break;
578 		}
579 		if (--timeout == 0)
580 			goto fail;
581 	}
582 
583 	/* Wait for the mC146818A seconds counter to change. */
584 	start_sec = sec;
585 	for (;;) {
586 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
587 			sec = rtcin(RTC_SEC);
588 			if (sec != start_sec)
589 				break;
590 		}
591 		if (--timeout == 0)
592 			goto fail;
593 	}
594 
595 	/* Start keeping track of the i8254 counter. */
596 	prev_count = sys_cputimer->count();
597 	tot_count = 0;
598 
599 	if (tsc_present)
600 		old_tsc = rdtsc();
601 	else
602 		old_tsc = 0;		/* shut up gcc */
603 
604 	/*
605 	 * Wait for the mc146818A seconds counter to change.  Read the i8254
606 	 * counter for each iteration since this is convenient and only
607 	 * costs a few usec of inaccuracy. The timing of the final reads
608 	 * of the counters almost matches the timing of the initial reads,
609 	 * so the main cause of inaccuracy is the varying latency from
610 	 * inside getit() or rtcin(RTC_STATUSA) to the beginning of the
611 	 * rtcin(RTC_SEC) that returns a changed seconds count.  The
612 	 * maximum inaccuracy from this cause is < 10 usec on 486's.
613 	 */
614 	start_sec = sec;
615 	for (;;) {
616 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP))
617 			sec = rtcin(RTC_SEC);
618 		count = sys_cputimer->count();
619 		tot_count += (int)(count - prev_count);
620 		prev_count = count;
621 		if (sec != start_sec)
622 			break;
623 		if (--timeout == 0)
624 			goto fail;
625 	}
626 
627 	/*
628 	 * Read the cpu cycle counter.  The timing considerations are
629 	 * similar to those for the i8254 clock.
630 	 */
631 	if (tsc_present) {
632 		tsc_frequency = rdtsc() - old_tsc;
633 		if (bootverbose) {
634 			kprintf("TSC clock: %jd Hz (Method A)\n",
635 			    (intmax_t)tsc_frequency);
636 		}
637 	}
638 	tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000;
639 
640 	kprintf("i8254 clock: %u Hz\n", tot_count);
641 	return (tot_count);
642 
643 fail:
644 	kprintf("failed, using default i8254 clock of %u Hz\n",
645 		i8254_cputimer.freq);
646 	return (i8254_cputimer.freq);
647 }
648 
649 static void
650 i8254_restore(void)
651 {
652 	timer0_state = ACQUIRED;
653 
654 	clock_lock();
655 
656 	/*
657 	 * Timer0 is our fine-grained variable clock interrupt
658 	 */
659 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
660 	outb(TIMER_CNTR0, 2);	/* lsb */
661 	outb(TIMER_CNTR0, 0);	/* msb */
662 	clock_unlock();
663 
664 	if (!i8254_nointr) {
665 		cputimer_intr_register(&i8254_cputimer_intr);
666 		cputimer_intr_select(&i8254_cputimer_intr, 0);
667 	}
668 
669 	/*
670 	 * Timer1 or timer2 is our free-running clock, but only if another
671 	 * has not been selected.
672 	 */
673 	cputimer_register(&i8254_cputimer);
674 	cputimer_select(&i8254_cputimer, 0);
675 }
676 
677 static void
678 i8254_cputimer_construct(struct cputimer *timer, sysclock_t oldclock)
679 {
680  	int which;
681 
682 	/*
683 	 * Should we use timer 1 or timer 2 ?
684 	 */
685 	which = 0;
686 	TUNABLE_INT_FETCH("hw.i8254.walltimer", &which);
687 	if (which != 1 && which != 2)
688 		which = 2;
689 
690 	switch(which) {
691 	case 1:
692 		timer->name = "i8254_timer1";
693 		timer->type = CPUTIMER_8254_SEL1;
694 		i8254_walltimer_sel = TIMER_SEL1;
695 		i8254_walltimer_cntr = TIMER_CNTR1;
696 		timer1_state = ACQUIRED;
697 		break;
698 	case 2:
699 		timer->name = "i8254_timer2";
700 		timer->type = CPUTIMER_8254_SEL2;
701 		i8254_walltimer_sel = TIMER_SEL2;
702 		i8254_walltimer_cntr = TIMER_CNTR2;
703 		timer2_state = ACQUIRED;
704 		break;
705 	}
706 
707 	timer->base = (oldclock + 0xFFFF) & ~0xFFFF;
708 
709 	clock_lock();
710 	outb(TIMER_MODE, i8254_walltimer_sel | TIMER_RATEGEN | TIMER_16BIT);
711 	outb(i8254_walltimer_cntr, 0);	/* lsb */
712 	outb(i8254_walltimer_cntr, 0);	/* msb */
713 	outb(IO_PPI, inb(IO_PPI) | 1);	/* bit 0: enable gate, bit 1: spkr */
714 	clock_unlock();
715 }
716 
717 static void
718 i8254_cputimer_destruct(struct cputimer *timer)
719 {
720 	switch(timer->type) {
721 	case CPUTIMER_8254_SEL1:
722 	    timer1_state = RELEASED;
723 	    break;
724 	case CPUTIMER_8254_SEL2:
725 	    timer2_state = RELEASED;
726 	    break;
727 	default:
728 	    break;
729 	}
730 	timer->type = 0;
731 }
732 
733 static void
734 rtc_restore(void)
735 {
736 	/* Restore all of the RTC's "status" (actually, control) registers. */
737 	writertc(RTC_STATUSB, RTCSB_24HR);
738 	writertc(RTC_STATUSA, rtc_statusa);
739 	writertc(RTC_STATUSB, rtc_statusb);
740 }
741 
742 /*
743  * Restore all the timers.
744  *
745  * This function is called to resynchronize our core timekeeping after a
746  * long halt, e.g. from apm_default_resume() and friends.  It is also
747  * called if after a BIOS call we have detected munging of the 8254.
748  * It is necessary because cputimer_count() counter's delta may have grown
749  * too large for nanouptime() and friends to handle, or (in the case of 8254
750  * munging) might cause the SYSTIMER code to prematurely trigger.
751  */
752 void
753 timer_restore(void)
754 {
755 	crit_enter();
756 	i8254_restore();		/* restore timer_freq and hz */
757 	rtc_restore();			/* reenable RTC interrupts */
758 	crit_exit();
759 }
760 
761 /*
762  * Initialize 8254 timer 0 early so that it can be used in DELAY().
763  */
764 void
765 startrtclock(void)
766 {
767 	u_int delta, freq;
768 
769 	/*
770 	 * Can we use the TSC?
771 	 *
772 	 * NOTE: If running under qemu, probably a good idea to force the
773 	 *	 TSC because we are not likely to detect it as being
774 	 *	 invariant or mpsyncd if you don't.  This will greatly
775 	 *	 reduce SMP contention.
776 	 */
777 	if (cpu_feature & CPUID_TSC) {
778 		tsc_present = 1;
779 		TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_invariant);
780 
781 		if ((cpu_vendor_id == CPU_VENDOR_INTEL ||
782 		     cpu_vendor_id == CPU_VENDOR_AMD) &&
783 		    cpu_exthigh >= 0x80000007) {
784 			u_int regs[4];
785 
786 			do_cpuid(0x80000007, regs);
787 			if (regs[3] & 0x100)
788 				tsc_invariant = 1;
789 		}
790 	} else {
791 		tsc_present = 0;
792 	}
793 
794 	/*
795 	 * Initial RTC state, don't do anything unexpected
796 	 */
797 	writertc(RTC_STATUSA, rtc_statusa);
798 	writertc(RTC_STATUSB, RTCSB_24HR);
799 
800 	/*
801 	 * Set the 8254 timer0 in TIMER_SWSTROBE mode and cause it to
802 	 * generate an interrupt, which we will ignore for now.
803 	 *
804 	 * Set the 8254 timer1 in TIMER_RATEGEN mode and load 0x0000
805 	 * (so it counts a full 2^16 and repeats).  We will use this timer
806 	 * for our counting.
807 	 */
808 	i8254_restore();
809 
810 	/*
811 	 * When booting without verbose messages, it's pointless to run the
812 	 * calibrate_clocks() calibration code, when we don't use the
813 	 * results in any way. With bootverbose, we are at least printing
814 	 *  this information to the kernel log.
815 	 */
816 	if (calibrate_timers_with_rtc == 0 && !bootverbose)
817 		goto skip_rtc_based;
818 
819 	freq = calibrate_clocks();
820 #ifdef CLK_CALIBRATION_LOOP
821 	if (bootverbose) {
822 		int c;
823 
824 		cnpoll(TRUE);
825 		kprintf("Press a key on the console to "
826 			"abort clock calibration\n");
827 		while ((c = cncheckc()) == -1 || c == NOKEY)
828 			calibrate_clocks();
829 		cnpoll(FALSE);
830 	}
831 #endif
832 
833 	/*
834 	 * Use the calibrated i8254 frequency if it seems reasonable.
835 	 * Otherwise use the default, and don't use the calibrated i586
836 	 * frequency.
837 	 */
838 	delta = freq > i8254_cputimer.freq ?
839 			freq - i8254_cputimer.freq : i8254_cputimer.freq - freq;
840 	if (delta < i8254_cputimer.freq / 100) {
841 		if (calibrate_timers_with_rtc == 0) {
842 			kprintf(
843 "hw.calibrate_timers_with_rtc not set - using default i8254 frequency\n");
844 			freq = i8254_cputimer.freq;
845 		}
846 		/*
847 		 * NOTE:
848 		 * Interrupt timer's freq must be adjusted
849 		 * before we change the cuptimer's frequency.
850 		 */
851 		i8254_cputimer_intr.freq = freq;
852 		cputimer_set_frequency(&i8254_cputimer, freq);
853 	} else {
854 		if (bootverbose)
855 			kprintf("%d Hz differs from default of %d Hz "
856 				"by more than 1%%\n",
857 			        freq, i8254_cputimer.freq);
858 		tsc_frequency = 0;
859 	}
860 
861 	if (tsc_frequency != 0 && calibrate_timers_with_rtc == 0) {
862 		kprintf("hw.calibrate_timers_with_rtc not "
863 			"set - using old calibration method\n");
864 		tsc_frequency = 0;
865 	}
866 
867 skip_rtc_based:
868 	if (tsc_present && tsc_frequency == 0) {
869 		/*
870 		 * Calibration of the i586 clock relative to the mc146818A
871 		 * clock failed.  Do a less accurate calibration relative
872 		 * to the i8254 clock.
873 		 */
874 		u_int64_t old_tsc = rdtsc();
875 
876 		DELAY(1000000);
877 		tsc_frequency = rdtsc() - old_tsc;
878 		if (bootverbose && calibrate_timers_with_rtc) {
879 			kprintf("TSC clock: %jd Hz (Method B)\n",
880 			    (intmax_t)tsc_frequency);
881 		}
882 	}
883 
884 	if (tsc_present) {
885 		kprintf("TSC%s clock: %jd Hz\n",
886 		    tsc_invariant ? " invariant" : "",
887 		    (intmax_t)tsc_frequency);
888 	}
889 	tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000;
890 
891 	EVENTHANDLER_REGISTER(shutdown_post_sync, resettodr_on_shutdown,
892 			      NULL, SHUTDOWN_PRI_LAST);
893 }
894 
895 /*
896  * Sync the time of day back to the RTC on shutdown, but only if
897  * we have already loaded it and have not crashed.
898  */
899 static void
900 resettodr_on_shutdown(void *arg __unused)
901 {
902  	if (rtc_loaded && panicstr == NULL) {
903 		resettodr();
904 	}
905 }
906 
907 /*
908  * Initialize the time of day register, based on the time base which is, e.g.
909  * from a filesystem.
910  */
911 void
912 inittodr(time_t base)
913 {
914 	unsigned long	sec, days;
915 	int		year, month;
916 	int		y, m;
917 	struct timespec ts;
918 
919 	if (base) {
920 		ts.tv_sec = base;
921 		ts.tv_nsec = 0;
922 		set_timeofday(&ts);
923 	}
924 
925 	/* Look if we have a RTC present and the time is valid */
926 	if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
927 		goto wrong_time;
928 
929 	/* wait for time update to complete */
930 	/* If RTCSA_TUP is zero, we have at least 244us before next update */
931 	crit_enter();
932 	while (rtcin(RTC_STATUSA) & RTCSA_TUP) {
933 		crit_exit();
934 		crit_enter();
935 	}
936 
937 	days = 0;
938 #ifdef USE_RTC_CENTURY
939 	year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100;
940 #else
941 	year = readrtc(RTC_YEAR) + 1900;
942 	if (year < 1970)
943 		year += 100;
944 #endif
945 	if (year < 1970) {
946 		crit_exit();
947 		goto wrong_time;
948 	}
949 	month = readrtc(RTC_MONTH);
950 	for (m = 1; m < month; m++)
951 		days += daysinmonth[m-1];
952 	if ((month > 2) && LEAPYEAR(year))
953 		days ++;
954 	days += readrtc(RTC_DAY) - 1;
955 	for (y = 1970; y < year; y++)
956 		days += DAYSPERYEAR + LEAPYEAR(y);
957 	sec = ((( days * 24 +
958 		  readrtc(RTC_HRS)) * 60 +
959 		  readrtc(RTC_MIN)) * 60 +
960 		  readrtc(RTC_SEC));
961 	/* sec now contains the number of seconds, since Jan 1 1970,
962 	   in the local time zone */
963 
964 	sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
965 
966 	y = (int)(time_second - sec);
967 	if (y <= -2 || y >= 2) {
968 		/* badly off, adjust it */
969 		ts.tv_sec = sec;
970 		ts.tv_nsec = 0;
971 		set_timeofday(&ts);
972 	}
973 	rtc_loaded = 1;
974 	crit_exit();
975 	return;
976 
977 wrong_time:
978 	kprintf("Invalid time in real time clock.\n");
979 	kprintf("Check and reset the date immediately!\n");
980 }
981 
982 /*
983  * Write system time back to RTC
984  */
985 void
986 resettodr(void)
987 {
988 	struct timeval tv;
989 	unsigned long tm;
990 	int m;
991 	int y;
992 
993 	if (disable_rtc_set)
994 		return;
995 
996 	microtime(&tv);
997 	tm = tv.tv_sec;
998 
999 	crit_enter();
1000 	/* Disable RTC updates and interrupts. */
1001 	writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR);
1002 
1003 	/* Calculate local time to put in RTC */
1004 
1005 	tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
1006 
1007 	writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60;	/* Write back Seconds */
1008 	writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60;	/* Write back Minutes */
1009 	writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24;	/* Write back Hours   */
1010 
1011 	/* We have now the days since 01-01-1970 in tm */
1012 	writertc(RTC_WDAY, (tm+4)%7);			/* Write back Weekday */
1013 	for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y);
1014 	     tm >= m;
1015 	     y++,      m = DAYSPERYEAR + LEAPYEAR(y))
1016 	     tm -= m;
1017 
1018 	/* Now we have the years in y and the day-of-the-year in tm */
1019 	writertc(RTC_YEAR, bin2bcd(y%100));		/* Write back Year    */
1020 #ifdef USE_RTC_CENTURY
1021 	writertc(RTC_CENTURY, bin2bcd(y/100));		/* ... and Century    */
1022 #endif
1023 	for (m = 0; ; m++) {
1024 		int ml;
1025 
1026 		ml = daysinmonth[m];
1027 		if (m == 1 && LEAPYEAR(y))
1028 			ml++;
1029 		if (tm < ml)
1030 			break;
1031 		tm -= ml;
1032 	}
1033 
1034 	writertc(RTC_MONTH, bin2bcd(m + 1));            /* Write back Month   */
1035 	writertc(RTC_DAY, bin2bcd(tm + 1));             /* Write back Month Day */
1036 
1037 	/* Reenable RTC updates and interrupts. */
1038 	writertc(RTC_STATUSB, rtc_statusb);
1039 	crit_exit();
1040 }
1041 
1042 static int
1043 i8254_ioapic_trial(int irq, struct cputimer_intr *cti)
1044 {
1045 	sysclock_t base;
1046 	long lastcnt;
1047 
1048 	/*
1049 	 * Following code assumes the 8254 is the cpu timer,
1050 	 * so make sure it is.
1051 	 */
1052 	KKASSERT(sys_cputimer == &i8254_cputimer);
1053 	KKASSERT(cti == &i8254_cputimer_intr);
1054 
1055 	lastcnt = get_interrupt_counter(irq, mycpuid);
1056 
1057 	/*
1058 	 * Force an 8254 Timer0 interrupt and wait 1/100s for
1059 	 * it to happen, then see if we got it.
1060 	 */
1061 	kprintf("IOAPIC: testing 8254 interrupt delivery\n");
1062 
1063 	i8254_intr_reload(cti, 2);
1064 	base = sys_cputimer->count();
1065 	while (sys_cputimer->count() - base < sys_cputimer->freq / 100)
1066 		; /* nothing */
1067 
1068 	if (get_interrupt_counter(irq, mycpuid) - lastcnt == 0)
1069 		return ENOENT;
1070 	return 0;
1071 }
1072 
1073 /*
1074  * Start both clocks running.  DragonFly note: the stat clock is no longer
1075  * used.  Instead, 8254 based systimers are used for all major clock
1076  * interrupts.
1077  */
1078 static void
1079 i8254_intr_initclock(struct cputimer_intr *cti, boolean_t selected)
1080 {
1081 	void *clkdesc = NULL;
1082 	int irq = 0, mixed_mode = 0, error;
1083 
1084 	KKASSERT(mycpuid == 0);
1085 	callout_init_mp(&sysbeepstop_ch);
1086 
1087 	if (!selected && i8254_intr_disable)
1088 		goto nointr;
1089 
1090 	/*
1091 	 * The stat interrupt mask is different without the
1092 	 * statistics clock.  Also, don't set the interrupt
1093 	 * flag which would normally cause the RTC to generate
1094 	 * interrupts.
1095 	 */
1096 	rtc_statusb = RTCSB_24HR;
1097 
1098 	/* Finish initializing 8254 timer 0. */
1099 	if (ioapic_enable) {
1100 		irq = machintr_legacy_intr_find(0, INTR_TRIGGER_EDGE,
1101 			INTR_POLARITY_HIGH);
1102 		if (irq < 0) {
1103 mixed_mode_setup:
1104 			error = ioapic_conf_legacy_extint(0);
1105 			if (!error) {
1106 				irq = machintr_legacy_intr_find(0,
1107 				    INTR_TRIGGER_EDGE, INTR_POLARITY_HIGH);
1108 				if (irq < 0)
1109 					error = ENOENT;
1110 			}
1111 
1112 			if (error) {
1113 				if (!selected) {
1114 					kprintf("IOAPIC: setup mixed mode for "
1115 						"irq 0 failed: %d\n", error);
1116 					goto nointr;
1117 				} else {
1118 					panic("IOAPIC: setup mixed mode for "
1119 					      "irq 0 failed: %d\n", error);
1120 				}
1121 			}
1122 			mixed_mode = 1;
1123 		}
1124 		clkdesc = register_int(irq, clkintr, NULL, "clk",
1125 				       NULL,
1126 				       INTR_EXCL | INTR_CLOCK |
1127 				       INTR_NOPOLL | INTR_MPSAFE |
1128 				       INTR_NOENTROPY, 0);
1129 	} else {
1130 		register_int(0, clkintr, NULL, "clk", NULL,
1131 			     INTR_EXCL | INTR_CLOCK |
1132 			     INTR_NOPOLL | INTR_MPSAFE |
1133 			     INTR_NOENTROPY, 0);
1134 	}
1135 
1136 	/* Initialize RTC. */
1137 	writertc(RTC_STATUSA, rtc_statusa);
1138 	writertc(RTC_STATUSB, RTCSB_24HR);
1139 
1140 	if (ioapic_enable) {
1141 		error = i8254_ioapic_trial(irq, cti);
1142 		if (error) {
1143 			if (mixed_mode) {
1144 				if (!selected) {
1145 					kprintf("IOAPIC: mixed mode for irq %d "
1146 						"trial failed: %d\n",
1147 						irq, error);
1148 					goto nointr;
1149 				} else {
1150 					panic("IOAPIC: mixed mode for irq %d "
1151 					      "trial failed: %d\n", irq, error);
1152 				}
1153 			} else {
1154 				kprintf("IOAPIC: warning 8254 is not connected "
1155 					"to the correct pin, try mixed mode\n");
1156 				unregister_int(clkdesc, 0);
1157 				goto mixed_mode_setup;
1158 			}
1159 		}
1160 	}
1161 	return;
1162 
1163 nointr:
1164 	i8254_nointr = 1; /* don't try to register again */
1165 	cputimer_intr_deregister(cti);
1166 }
1167 
1168 void
1169 setstatclockrate(int newhz)
1170 {
1171 	if (newhz == RTC_PROFRATE)
1172 		rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF;
1173 	else
1174 		rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
1175 	writertc(RTC_STATUSA, rtc_statusa);
1176 }
1177 
1178 #if 0
1179 static unsigned
1180 tsc_get_timecount(struct timecounter *tc)
1181 {
1182 	return (rdtsc());
1183 }
1184 #endif
1185 
1186 #ifdef KERN_TIMESTAMP
1187 #define KERN_TIMESTAMP_SIZE 16384
1188 static u_long tsc[KERN_TIMESTAMP_SIZE] ;
1189 SYSCTL_OPAQUE(_debug, OID_AUTO, timestamp, CTLFLAG_RD, tsc,
1190 	sizeof(tsc), "LU", "Kernel timestamps");
1191 void
1192 _TSTMP(u_int32_t x)
1193 {
1194 	static int i;
1195 
1196 	tsc[i] = (u_int32_t)rdtsc();
1197 	tsc[i+1] = x;
1198 	i = i + 2;
1199 	if (i >= KERN_TIMESTAMP_SIZE)
1200 		i = 0;
1201 	tsc[i] = 0; /* mark last entry */
1202 }
1203 #endif /* KERN_TIMESTAMP */
1204 
1205 /*
1206  *
1207  */
1208 
1209 static int
1210 hw_i8254_timestamp(SYSCTL_HANDLER_ARGS)
1211 {
1212     sysclock_t count;
1213     uint64_t tscval;
1214     char buf[32];
1215 
1216     crit_enter();
1217     if (sys_cputimer == &i8254_cputimer)
1218 	count = sys_cputimer->count();
1219     else
1220 	count = 0;
1221     if (tsc_present)
1222 	tscval = rdtsc();
1223     else
1224 	tscval = 0;
1225     crit_exit();
1226     ksnprintf(buf, sizeof(buf), "%08x %016llx", count, (long long)tscval);
1227     return(SYSCTL_OUT(req, buf, strlen(buf) + 1));
1228 }
1229 
1230 struct tsc_mpsync_arg {
1231 	volatile uint64_t	tsc_target;
1232 	volatile int		tsc_mpsync;
1233 };
1234 
1235 struct tsc_mpsync_thr {
1236 	volatile int		tsc_done_cnt;
1237 	volatile int		tsc_mpsync_cnt;
1238 };
1239 
1240 static void
1241 tsc_mpsync_test_remote(void *xarg)
1242 {
1243 	struct tsc_mpsync_arg *arg = xarg;
1244 	uint64_t tsc;
1245 
1246 	tsc = rdtsc_ordered();
1247 	if (tsc < arg->tsc_target)
1248 		arg->tsc_mpsync = 0;
1249 }
1250 
1251 static void
1252 tsc_mpsync_test_loop(struct tsc_mpsync_arg *arg)
1253 {
1254 	struct globaldata *gd = mycpu;
1255 	tsc_uclock_t test_end, test_begin;
1256 	u_int i;
1257 
1258 	if (bootverbose) {
1259 		kprintf("cpu%d: TSC testing MP synchronization ...\n",
1260 		    gd->gd_cpuid);
1261 	}
1262 
1263 	test_begin = rdtsc_ordered();
1264 	/* Run test for 100ms */
1265 	test_end = test_begin + (tsc_frequency / 10);
1266 
1267 	arg->tsc_mpsync = 1;
1268 	arg->tsc_target = test_begin;
1269 
1270 #define TSC_TEST_TRYMAX		1000000	/* Make sure we could stop */
1271 #define TSC_TEST_TRYMIN		50000
1272 
1273 	for (i = 0; i < TSC_TEST_TRYMAX; ++i) {
1274 		struct lwkt_cpusync cs;
1275 
1276 		crit_enter();
1277 		lwkt_cpusync_init(&cs, gd->gd_other_cpus,
1278 		    tsc_mpsync_test_remote, arg);
1279 		lwkt_cpusync_interlock(&cs);
1280 		cpu_pause();
1281 		arg->tsc_target = rdtsc_ordered();
1282 		cpu_mfence();
1283 		lwkt_cpusync_deinterlock(&cs);
1284 		crit_exit();
1285 		cpu_pause();
1286 
1287 		if (!arg->tsc_mpsync) {
1288 			kprintf("cpu%d: TSC is not MP synchronized @%u\n",
1289 			    gd->gd_cpuid, i);
1290 			break;
1291 		}
1292 		if (arg->tsc_target > test_end && i >= TSC_TEST_TRYMIN)
1293 			break;
1294 	}
1295 
1296 #undef TSC_TEST_TRYMIN
1297 #undef TSC_TEST_TRYMAX
1298 
1299 	if (arg->tsc_target == test_begin) {
1300 		kprintf("cpu%d: TSC does not tick?!\n", gd->gd_cpuid);
1301 		/* XXX disable TSC? */
1302 		tsc_invariant = 0;
1303 		arg->tsc_mpsync = 0;
1304 		return;
1305 	}
1306 
1307 	if (arg->tsc_mpsync && bootverbose) {
1308 		kprintf("cpu%d: TSC is MP synchronized after %u tries\n",
1309 		    gd->gd_cpuid, i);
1310 	}
1311 }
1312 
1313 static void
1314 tsc_mpsync_ap_thread(void *xthr)
1315 {
1316 	struct tsc_mpsync_thr *thr = xthr;
1317 	struct tsc_mpsync_arg arg;
1318 
1319 	tsc_mpsync_test_loop(&arg);
1320 	if (arg.tsc_mpsync) {
1321 		atomic_add_int(&thr->tsc_mpsync_cnt, 1);
1322 		cpu_sfence();
1323 	}
1324 	atomic_add_int(&thr->tsc_done_cnt, 1);
1325 
1326 	lwkt_exit();
1327 }
1328 
1329 static void
1330 tsc_mpsync_test(void)
1331 {
1332 	struct tsc_mpsync_arg arg;
1333 
1334 	if (!tsc_invariant) {
1335 		/* Not even invariant TSC */
1336 		return;
1337 	}
1338 
1339 	if (ncpus == 1) {
1340 		/* Only one CPU */
1341 		tsc_mpsync = 1;
1342 		return;
1343 	}
1344 
1345 	/*
1346 	 * Forcing can be used w/qemu to reduce contention
1347 	 */
1348 	TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_mpsync);
1349 
1350 	if (tsc_mpsync == 0) {
1351 		switch (cpu_vendor_id) {
1352 		case CPU_VENDOR_INTEL:
1353 			/*
1354 			 * Intel probably works
1355 			 */
1356 			break;
1357 
1358 		case CPU_VENDOR_AMD:
1359 			/*
1360 			 * AMD < Ryzen probably doesn't work
1361 			 */
1362 			if (CPUID_TO_FAMILY(cpu_id) < 0x17)
1363 				return;
1364 			break;
1365 
1366 		default:
1367 			/* probably won't work */
1368 			return;
1369 		}
1370 	} else if (tsc_mpsync < 0) {
1371 		kprintf("TSC MP synchronization test is disabled\n");
1372 		tsc_mpsync = 0;
1373 		return;
1374 	}
1375 
1376 	/*
1377 	 * Test even if forced above.  If forced, we will use the TSC
1378 	 * even if the test fails.
1379 	 */
1380 	kprintf("TSC testing MP synchronization ...\n");
1381 
1382 	tsc_mpsync_test_loop(&arg);
1383 	if (arg.tsc_mpsync) {
1384 		struct tsc_mpsync_thr thr;
1385 		int cpu;
1386 
1387 		/*
1388 		 * Test TSC MP synchronization on APs.
1389 		 */
1390 
1391 		thr.tsc_done_cnt = 1;
1392 		thr.tsc_mpsync_cnt = 1;
1393 
1394 		for (cpu = 0; cpu < ncpus; ++cpu) {
1395 			if (cpu == mycpuid)
1396 				continue;
1397 
1398 			lwkt_create(tsc_mpsync_ap_thread, &thr, NULL,
1399 			    NULL, 0, cpu, "tsc mpsync %d", cpu);
1400 		}
1401 
1402 		while (thr.tsc_done_cnt != ncpus) {
1403 			cpu_pause();
1404 			cpu_lfence();
1405 		}
1406 		if (thr.tsc_mpsync_cnt == ncpus)
1407 			tsc_mpsync = 1;
1408 	}
1409 
1410 	if (tsc_mpsync)
1411 		kprintf("TSC is MP synchronized\n");
1412 	else
1413 		kprintf("TSC is not MP synchronized\n");
1414 }
1415 SYSINIT(tsc_mpsync, SI_BOOT2_FINISH_SMP, SI_ORDER_ANY, tsc_mpsync_test, NULL);
1416 
1417 #define TSC_CPUTIMER_FREQMAX	128000000	/* 128Mhz */
1418 
1419 static int tsc_cputimer_shift;
1420 
1421 static void
1422 tsc_cputimer_construct(struct cputimer *timer, sysclock_t oldclock)
1423 {
1424 	timer->base = 0;
1425 	timer->base = oldclock - timer->count();
1426 }
1427 
1428 static __inline sysclock_t
1429 tsc_cputimer_count(void)
1430 {
1431 	uint64_t tsc;
1432 
1433 	tsc = rdtsc();
1434 	tsc >>= tsc_cputimer_shift;
1435 
1436 	return (tsc + tsc_cputimer.base);
1437 }
1438 
1439 static sysclock_t
1440 tsc_cputimer_count_lfence(void)
1441 {
1442 	cpu_lfence();
1443 	return tsc_cputimer_count();
1444 }
1445 
1446 static sysclock_t
1447 tsc_cputimer_count_mfence(void)
1448 {
1449 	cpu_mfence();
1450 	return tsc_cputimer_count();
1451 }
1452 
1453 static uint64_t
1454 tsc_cpucounter_count_lfence(void)
1455 {
1456 
1457 	cpu_lfence();
1458 	return (rdtsc());
1459 }
1460 
1461 static uint64_t
1462 tsc_cpucounter_count_mfence(void)
1463 {
1464 
1465 	cpu_mfence();
1466 	return (rdtsc());
1467 }
1468 
1469 static void
1470 tsc_cputimer_register(void)
1471 {
1472 	uint64_t freq;
1473 	int enable = 1;
1474 
1475 	if (!tsc_mpsync) {
1476 		if (tsc_invariant) {
1477 			/* Per-cpu cpucounter still works. */
1478 			goto regcnt;
1479 		}
1480 		return;
1481 	}
1482 
1483 	TUNABLE_INT_FETCH("hw.tsc_cputimer_enable", &enable);
1484 	if (!enable)
1485 		return;
1486 
1487 	freq = tsc_frequency;
1488 	while (freq > TSC_CPUTIMER_FREQMAX) {
1489 		freq >>= 1;
1490 		++tsc_cputimer_shift;
1491 	}
1492 	kprintf("TSC: cputimer freq %ju, shift %d\n",
1493 	    (uintmax_t)freq, tsc_cputimer_shift);
1494 
1495 	tsc_cputimer.freq = freq;
1496 
1497 	if (cpu_vendor_id == CPU_VENDOR_INTEL)
1498 		tsc_cputimer.count = tsc_cputimer_count_lfence;
1499 	else
1500 		tsc_cputimer.count = tsc_cputimer_count_mfence; /* safe bet */
1501 
1502 	cputimer_register(&tsc_cputimer);
1503 	cputimer_select(&tsc_cputimer, 0);
1504 
1505 	tsc_cpucounter.flags |= CPUCOUNTER_FLAG_MPSYNC;
1506 regcnt:
1507 	tsc_cpucounter.freq = tsc_frequency;
1508 	if (cpu_vendor_id == CPU_VENDOR_INTEL) {
1509 		tsc_cpucounter.count =
1510 		    tsc_cpucounter_count_lfence;
1511 	} else {
1512 		tsc_cpucounter.count =
1513 		    tsc_cpucounter_count_mfence; /* safe bet */
1514 	}
1515 	cpucounter_register(&tsc_cpucounter);
1516 }
1517 SYSINIT(tsc_cputimer_reg, SI_BOOT2_POST_SMP, SI_ORDER_FIRST,
1518 	tsc_cputimer_register, NULL);
1519 
1520 SYSCTL_NODE(_hw, OID_AUTO, i8254, CTLFLAG_RW, 0, "I8254");
1521 SYSCTL_UINT(_hw_i8254, OID_AUTO, freq, CTLFLAG_RD, &i8254_cputimer.freq, 0,
1522 	    "frequency");
1523 SYSCTL_PROC(_hw_i8254, OID_AUTO, timestamp, CTLTYPE_STRING|CTLFLAG_RD,
1524 	    0, 0, hw_i8254_timestamp, "A", "");
1525 
1526 SYSCTL_INT(_hw, OID_AUTO, tsc_present, CTLFLAG_RD,
1527 	    &tsc_present, 0, "TSC Available");
1528 SYSCTL_INT(_hw, OID_AUTO, tsc_invariant, CTLFLAG_RD,
1529 	    &tsc_invariant, 0, "Invariant TSC");
1530 SYSCTL_INT(_hw, OID_AUTO, tsc_mpsync, CTLFLAG_RD,
1531 	    &tsc_mpsync, 0, "TSC is synchronized across CPUs");
1532 SYSCTL_QUAD(_hw, OID_AUTO, tsc_frequency, CTLFLAG_RD,
1533 	    &tsc_frequency, 0, "TSC Frequency");
1534