1 /*-
2 * Copyright (c) 1990 The Regents of the University of California.
3 * Copyright (c) 2008-2021 The DragonFly Project. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * William Jolitz and Don Ahn.
7 *
8 * This code is derived from software contributed to The DragonFly Project
9 * by Matthew Dillon <dillon@backplane.com>
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * from: @(#)clock.c 7.2 (Berkeley) 5/12/91
36 * $FreeBSD: src/sys/i386/isa/clock.c,v 1.149.2.6 2002/11/02 04:41:50 iwasaki Exp $
37 */
38
39 /*
40 * Routines to handle clock hardware.
41 */
42
43 /*
44 * inittodr, settodr and support routines written
45 * by Christoph Robitschko <chmr@edvz.tu-graz.ac.at>
46 *
47 * reintroduced and updated by Chris Stenton <chris@gnome.co.uk> 8/10/94
48 */
49
50 #if 0
51 #include "opt_clock.h"
52 #endif
53
54 #include <sys/param.h>
55 #include <sys/systm.h>
56 #include <sys/eventhandler.h>
57 #include <sys/time.h>
58 #include <sys/kernel.h>
59 #include <sys/bus.h>
60 #include <sys/sysctl.h>
61 #include <sys/cons.h>
62 #include <sys/kbio.h>
63 #include <sys/systimer.h>
64 #include <sys/globaldata.h>
65 #include <sys/machintr.h>
66 #include <sys/interrupt.h>
67
68 #include <sys/thread2.h>
69
70 #include <machine/clock.h>
71 #include <machine/cputypes.h>
72 #include <machine/frame.h>
73 #include <machine/ipl.h>
74 #include <machine/limits.h>
75 #include <machine/md_var.h>
76 #include <machine/psl.h>
77 #include <machine/segments.h>
78 #include <machine/smp.h>
79 #include <machine/specialreg.h>
80 #include <machine/intr_machdep.h>
81
82 #include <machine_base/apic/ioapic.h>
83 #include <machine_base/apic/ioapic_abi.h>
84 #include <machine_base/icu/icu.h>
85 #include <bus/isa/isa.h>
86 #include <bus/isa/rtc.h>
87 #include <machine_base/isa/timerreg.h>
88
89 SET_DECLARE(timecounter_init_set, const timecounter_init_t);
90 TIMECOUNTER_INIT(placeholder, NULL);
91
92 static void i8254_restore(void);
93 static void resettodr_on_shutdown(void *arg __unused);
94
95 /*
96 * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
97 * can use a simple formula for leap years.
98 */
99 #define LEAPYEAR(y) ((u_int)(y) % 4 == 0)
100 #define DAYSPERYEAR (31+28+31+30+31+30+31+31+30+31+30+31)
101
102 #ifndef TIMER_FREQ
103 #define TIMER_FREQ 1193182
104 #endif
105
106 static uint8_t i8254_walltimer_sel;
107 static uint16_t i8254_walltimer_cntr;
108 static int timer0_running;
109
110 int adjkerntz; /* local offset from GMT in seconds */
111 int disable_rtc_set; /* disable resettodr() if != 0 */
112 int tsc_present;
113 int tsc_invariant;
114 int tsc_mpsync;
115 int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */
116 tsc_uclock_t tsc_frequency;
117 tsc_uclock_t tsc_oneus_approx; /* always at least 1, approx only */
118
119 enum tstate { RELEASED, ACQUIRED };
120 static enum tstate timer0_state;
121 static enum tstate timer1_state;
122 static enum tstate timer2_state;
123
124 int i8254_cputimer_disable; /* No need to initialize i8254 cputimer. */
125
126 static int beeping = 0;
127 static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
128 static u_char rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
129 static u_char rtc_statusb = RTCSB_24HR | RTCSB_PINTR;
130 static int rtc_loaded;
131
132 static sysclock_t i8254_cputimer_div;
133
134 static int i8254_nointr;
135 static int i8254_intr_disable = 1;
136 TUNABLE_INT("hw.i8254.intr_disable", &i8254_intr_disable);
137
138 static int calibrate_timers_with_rtc = 0;
139 TUNABLE_INT("hw.calibrate_timers_with_rtc", &calibrate_timers_with_rtc);
140
141 static int calibrate_tsc_fast = 1;
142 TUNABLE_INT("hw.calibrate_tsc_fast", &calibrate_tsc_fast);
143
144 static int calibrate_test;
145 TUNABLE_INT("hw.tsc_calibrate_test", &calibrate_test);
146
147 static struct callout sysbeepstop_ch;
148
149 static sysclock_t i8254_cputimer_count(void);
150 static void i8254_cputimer_construct(struct cputimer *cputimer, sysclock_t last);
151 static void i8254_cputimer_destruct(struct cputimer *cputimer);
152
153 static struct cputimer i8254_cputimer = {
154 .next = SLIST_ENTRY_INITIALIZER,
155 .name = "i8254",
156 .pri = CPUTIMER_PRI_8254,
157 .type = 0, /* determined later */
158 .count = i8254_cputimer_count,
159 .fromhz = cputimer_default_fromhz,
160 .fromus = cputimer_default_fromus,
161 .construct = i8254_cputimer_construct,
162 .destruct = i8254_cputimer_destruct,
163 .freq = TIMER_FREQ
164 };
165
166 static void i8254_intr_reload(struct cputimer_intr *, sysclock_t);
167 static void i8254_intr_config(struct cputimer_intr *, const struct cputimer *);
168 static void i8254_intr_initclock(struct cputimer_intr *, boolean_t);
169
170 static struct cputimer_intr i8254_cputimer_intr = {
171 .freq = TIMER_FREQ,
172 .reload = i8254_intr_reload,
173 .enable = cputimer_intr_default_enable,
174 .config = i8254_intr_config,
175 .restart = cputimer_intr_default_restart,
176 .pmfixup = cputimer_intr_default_pmfixup,
177 .initclock = i8254_intr_initclock,
178 .pcpuhand = NULL,
179 .next = SLIST_ENTRY_INITIALIZER,
180 .name = "i8254",
181 .type = CPUTIMER_INTR_8254,
182 .prio = CPUTIMER_INTR_PRIO_8254,
183 .caps = CPUTIMER_INTR_CAP_PS,
184 .priv = NULL
185 };
186
187 /*
188 * Use this to lwkt_switch() when the scheduler clock is not
189 * yet running, otherwise lwkt_switch() won't do anything.
190 * XXX needs cleaning up in lwkt_thread.c
191 */
192 static void
lwkt_force_switch(void)193 lwkt_force_switch(void)
194 {
195 crit_enter();
196 lwkt_schedulerclock(curthread);
197 crit_exit();
198 lwkt_switch();
199 }
200
201 /*
202 * timer0 clock interrupt. Timer0 is in one-shot mode and has stopped
203 * counting as of this interrupt. We use timer1 in free-running mode (not
204 * generating any interrupts) as our main counter. Each cpu has timeouts
205 * pending.
206 *
207 * This code is INTR_MPSAFE and may be called without the BGL held.
208 */
209 static void
clkintr(void * dummy,void * frame_arg)210 clkintr(void *dummy, void *frame_arg)
211 {
212 static sysclock_t sysclock_count; /* NOTE! Must be static */
213 struct globaldata *gd = mycpu;
214 struct globaldata *gscan;
215 int n;
216
217 /*
218 * SWSTROBE mode is a one-shot, the timer is no longer running
219 */
220 timer0_running = 0;
221
222 /*
223 * XXX the dispatcher needs work. right now we call systimer_intr()
224 * directly or via IPI for any cpu with systimers queued, which is
225 * usually *ALL* of them. We need to use the LAPIC timer for this.
226 */
227 sysclock_count = sys_cputimer->count();
228 for (n = 0; n < ncpus; ++n) {
229 gscan = globaldata_find(n);
230 if (TAILQ_FIRST(&gscan->gd_systimerq) == NULL)
231 continue;
232 if (gscan != gd) {
233 lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr,
234 &sysclock_count, 1);
235 } else {
236 systimer_intr(&sysclock_count, 0, frame_arg);
237 }
238 }
239 }
240
241
242 /*
243 * NOTE! not MP safe.
244 */
245 int
acquire_timer2(int mode)246 acquire_timer2(int mode)
247 {
248 if (timer2_state != RELEASED)
249 return (-1);
250 timer2_state = ACQUIRED;
251
252 /*
253 * This access to the timer registers is as atomic as possible
254 * because it is a single instruction. We could do better if we
255 * knew the rate.
256 */
257 outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f));
258 return (0);
259 }
260
261 int
release_timer2(void)262 release_timer2(void)
263 {
264 if (timer2_state != ACQUIRED)
265 return (-1);
266 outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT);
267 timer2_state = RELEASED;
268 return (0);
269 }
270
271 #include "opt_ddb.h"
272 #ifdef DDB
273 #include <ddb/ddb.h>
274
DB_SHOW_COMMAND(rtc,rtc)275 DB_SHOW_COMMAND(rtc, rtc)
276 {
277 kprintf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n",
278 rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY),
279 rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC),
280 rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR));
281 }
282 #endif /* DDB */
283
284 /*
285 * Return the current cpu timer count as a 32 bit integer.
286 */
287 static
288 sysclock_t
i8254_cputimer_count(void)289 i8254_cputimer_count(void)
290 {
291 static uint16_t cputimer_last;
292 uint16_t count;
293 sysclock_t ret;
294
295 clock_lock();
296 outb(TIMER_MODE, i8254_walltimer_sel | TIMER_LATCH);
297 count = (uint8_t)inb(i8254_walltimer_cntr); /* get countdown */
298 count |= ((uint8_t)inb(i8254_walltimer_cntr) << 8);
299 count = -count; /* -> countup */
300 if (count < cputimer_last) /* rollover */
301 i8254_cputimer.base += 0x00010000U;
302 ret = i8254_cputimer.base | count;
303 cputimer_last = count;
304 clock_unlock();
305
306 return(ret);
307 }
308
309 /*
310 * This function is called whenever the system timebase changes, allowing
311 * us to calculate what is needed to convert a system timebase tick
312 * into an 8254 tick for the interrupt timer. If we can convert to a
313 * simple shift, multiplication, or division, we do so. Otherwise 64
314 * bit arithmatic is required every time the interrupt timer is reloaded.
315 */
316 static void
i8254_intr_config(struct cputimer_intr * cti,const struct cputimer * timer)317 i8254_intr_config(struct cputimer_intr *cti, const struct cputimer *timer)
318 {
319 sysclock_t freq;
320 sysclock_t div;
321
322 /*
323 * Will a simple divide do the trick?
324 */
325 div = (timer->freq + (cti->freq / 2)) / cti->freq;
326 freq = cti->freq * div;
327
328 if (freq >= timer->freq - 1 && freq <= timer->freq + 1)
329 i8254_cputimer_div = div;
330 else
331 i8254_cputimer_div = 0;
332 }
333
334 /*
335 * Reload for the next timeout. It is possible for the reload value
336 * to be 0 or negative, indicating that an immediate timer interrupt
337 * is desired. For now make the minimum 2 ticks.
338 *
339 * We may have to convert from the system timebase to the 8254 timebase.
340 */
341 static void
i8254_intr_reload(struct cputimer_intr * cti,sysclock_t reload)342 i8254_intr_reload(struct cputimer_intr *cti, sysclock_t reload)
343 {
344 uint16_t count;
345
346 if ((ssysclock_t)reload < 0)
347 reload = 1;
348 if (i8254_cputimer_div)
349 reload /= i8254_cputimer_div;
350 else
351 reload = muldivu64(reload, cti->freq, sys_cputimer->freq);
352
353 if (reload < 2)
354 reload = 2; /* minimum count */
355 if (reload > 0xFFFF)
356 reload = 0xFFFF; /* almost full count (0 is full count) */
357
358 clock_lock();
359 if (timer0_running) {
360 outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); /* count-down timer */
361 count = (uint8_t)inb(TIMER_CNTR0); /* lsb */
362 count |= ((uint8_t)inb(TIMER_CNTR0) << 8); /* msb */
363 if (reload < count) {
364 outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
365 outb(TIMER_CNTR0, (uint8_t)reload); /* lsb */
366 outb(TIMER_CNTR0, (uint8_t)(reload >> 8)); /* msb */
367 }
368 } else {
369 timer0_running = 1;
370 outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
371 outb(TIMER_CNTR0, (uint8_t)reload); /* lsb */
372 outb(TIMER_CNTR0, (uint8_t)(reload >> 8)); /* msb */
373 }
374 clock_unlock();
375 }
376
377 /*
378 * DELAY(usec) - Spin for the specified number of microseconds.
379 * DRIVERSLEEP(usec) - Spin for the specified number of microseconds,
380 * but do a thread switch in the loop
381 *
382 * Relies on timer 1 counting down from (cputimer_freq / hz)
383 * Note: timer had better have been programmed before this is first used!
384 */
385 static void
DODELAY(int n,int doswitch)386 DODELAY(int n, int doswitch)
387 {
388 ssysclock_t delta, ticks_left;
389 sysclock_t prev_tick, tick;
390
391 #ifdef DELAYDEBUG
392 int getit_calls = 1;
393 int n1;
394 static int state = 0;
395
396 if (state == 0) {
397 state = 1;
398 for (n1 = 1; n1 <= 10000000; n1 *= 10)
399 DELAY(n1);
400 state = 2;
401 }
402 if (state == 1)
403 kprintf("DELAY(%d)...", n);
404 #endif
405 /*
406 * Guard against the timer being uninitialized if we are called
407 * early for console i/o.
408 */
409 if (timer0_state == RELEASED && i8254_cputimer_disable == 0)
410 i8254_restore();
411
412 /*
413 * Read the counter first, so that the rest of the setup overhead is
414 * counted. Then calculate the number of hardware timer ticks
415 * required, rounding up to be sure we delay at least the requested
416 * number of microseconds.
417 */
418 prev_tick = sys_cputimer->count();
419 ticks_left = muldivu64(n, sys_cputimer->freq + 999999, 1000000);
420
421 /*
422 * Loop until done.
423 */
424 while (ticks_left > 0) {
425 tick = sys_cputimer->count();
426 #ifdef DELAYDEBUG
427 ++getit_calls;
428 #endif
429 delta = tick - prev_tick;
430 prev_tick = tick;
431 if (delta < 0)
432 delta = 0;
433 ticks_left -= delta;
434 if (doswitch && ticks_left > 0)
435 lwkt_switch();
436 cpu_pause();
437 }
438 #ifdef DELAYDEBUG
439 if (state == 1)
440 kprintf(" %d calls to getit() at %d usec each\n",
441 getit_calls, (n + 5) / getit_calls);
442 #endif
443 }
444
445 /*
446 * DELAY() never switches.
447 */
448 void
DELAY(int n)449 DELAY(int n)
450 {
451 DODELAY(n, 0);
452 }
453
454 /*
455 * Returns non-zero if the specified time period has elapsed. Call
456 * first with last_clock set to 0.
457 */
458 int
CHECKTIMEOUT(TOTALDELAY * tdd)459 CHECKTIMEOUT(TOTALDELAY *tdd)
460 {
461 sysclock_t delta;
462 int us;
463
464 if (tdd->started == 0) {
465 if (timer0_state == RELEASED && i8254_cputimer_disable == 0)
466 i8254_restore();
467 tdd->last_clock = sys_cputimer->count();
468 tdd->started = 1;
469 return(0);
470 }
471 delta = sys_cputimer->count() - tdd->last_clock;
472 us = muldivu64(delta, 1000000, sys_cputimer->freq);
473 tdd->last_clock += muldivu64(us, sys_cputimer->freq, 1000000);
474 tdd->us -= us;
475
476 return (tdd->us < 0);
477 }
478
479
480 /*
481 * DRIVERSLEEP() does not switch if called with a spinlock held or
482 * from a hard interrupt.
483 */
484 void
DRIVERSLEEP(int usec)485 DRIVERSLEEP(int usec)
486 {
487 globaldata_t gd = mycpu;
488
489 if (gd->gd_intr_nesting_level || gd->gd_spinlocks) {
490 DODELAY(usec, 0);
491 } else {
492 DODELAY(usec, 1);
493 }
494 }
495
496 static void
sysbeepstop(void * chan)497 sysbeepstop(void *chan)
498 {
499 outb(IO_PPI, inb(IO_PPI)&0xFC); /* disable counter2 output to speaker */
500 beeping = 0;
501 release_timer2();
502 }
503
504 int
sysbeep(int pitch,int period)505 sysbeep(int pitch, int period)
506 {
507 if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT))
508 return(-1);
509 if (sysbeep_enable == 0)
510 return(-1);
511 /*
512 * Nobody else is using timer2, we do not need the clock lock
513 */
514 outb(TIMER_CNTR2, pitch);
515 outb(TIMER_CNTR2, (pitch>>8));
516 if (!beeping) {
517 /* enable counter2 output to speaker */
518 outb(IO_PPI, inb(IO_PPI) | 3);
519 beeping = period;
520 callout_reset(&sysbeepstop_ch, period, sysbeepstop, NULL);
521 }
522 return (0);
523 }
524
525 /*
526 * RTC support routines
527 */
528
529 int
rtcin(int reg)530 rtcin(int reg)
531 {
532 u_char val;
533
534 crit_enter();
535 outb(IO_RTC, reg);
536 inb(0x84);
537 val = inb(IO_RTC + 1);
538 inb(0x84);
539 crit_exit();
540 return (val);
541 }
542
543 static __inline void
writertc(u_char reg,u_char val)544 writertc(u_char reg, u_char val)
545 {
546 crit_enter();
547 inb(0x84);
548 outb(IO_RTC, reg);
549 inb(0x84);
550 outb(IO_RTC + 1, val);
551 inb(0x84); /* XXX work around wrong order in rtcin() */
552 crit_exit();
553 }
554
555 static __inline int
readrtc(int port)556 readrtc(int port)
557 {
558 return(bcd2bin(rtcin(port)));
559 }
560
561 static u_int
calibrate_clocks(void)562 calibrate_clocks(void)
563 {
564 tsc_uclock_t old_tsc;
565 sysclock_t tot_count;
566 sysclock_t count, prev_count;
567 int sec, start_sec, timeout;
568
569 if (bootverbose)
570 kprintf("Calibrating clock(s) ...\n");
571 if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
572 goto fail;
573 timeout = 100000000;
574
575 /* Read the mc146818A seconds counter. */
576 for (;;) {
577 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
578 sec = rtcin(RTC_SEC);
579 break;
580 }
581 if (--timeout == 0)
582 goto fail;
583 }
584
585 /* Wait for the mC146818A seconds counter to change. */
586 start_sec = sec;
587 for (;;) {
588 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
589 sec = rtcin(RTC_SEC);
590 if (sec != start_sec)
591 break;
592 }
593 if (--timeout == 0)
594 goto fail;
595 }
596
597 /* Start keeping track of the i8254 counter. */
598 prev_count = sys_cputimer->count();
599 tot_count = 0;
600
601 if (tsc_present)
602 old_tsc = rdtsc();
603 else
604 old_tsc = 0; /* shut up gcc */
605
606 /*
607 * Wait for the mc146818A seconds counter to change. Read the i8254
608 * counter for each iteration since this is convenient and only
609 * costs a few usec of inaccuracy. The timing of the final reads
610 * of the counters almost matches the timing of the initial reads,
611 * so the main cause of inaccuracy is the varying latency from
612 * inside getit() or rtcin(RTC_STATUSA) to the beginning of the
613 * rtcin(RTC_SEC) that returns a changed seconds count. The
614 * maximum inaccuracy from this cause is < 10 usec on 486's.
615 */
616 start_sec = sec;
617 for (;;) {
618 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP))
619 sec = rtcin(RTC_SEC);
620 count = sys_cputimer->count();
621 tot_count += (sysclock_t)(count - prev_count);
622 prev_count = count;
623 if (sec != start_sec)
624 break;
625 if (--timeout == 0)
626 goto fail;
627 }
628
629 /*
630 * Read the cpu cycle counter. The timing considerations are
631 * similar to those for the i8254 clock.
632 */
633 if (tsc_present) {
634 tsc_frequency = rdtsc() - old_tsc;
635 if (bootverbose) {
636 kprintf("TSC clock: %jd Hz (Method A)\n",
637 (intmax_t)tsc_frequency);
638 }
639 }
640 tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000;
641
642 kprintf("i8254 clock: %lu Hz\n", tot_count);
643 return (tot_count);
644
645 fail:
646 kprintf("failed, using default i8254 clock of %lu Hz\n",
647 i8254_cputimer.freq);
648 return (i8254_cputimer.freq);
649 }
650
651 static void
i8254_restore(void)652 i8254_restore(void)
653 {
654 timer0_state = ACQUIRED;
655
656 clock_lock();
657
658 /*
659 * Timer0 is our fine-grained variable clock interrupt
660 */
661 outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
662 outb(TIMER_CNTR0, 2); /* lsb */
663 outb(TIMER_CNTR0, 0); /* msb */
664 clock_unlock();
665
666 if (!i8254_nointr) {
667 cputimer_intr_register(&i8254_cputimer_intr);
668 cputimer_intr_select(&i8254_cputimer_intr, 0);
669 }
670
671 /*
672 * Timer1 or timer2 is our free-running clock, but only if another
673 * has not been selected.
674 */
675 cputimer_register(&i8254_cputimer);
676 cputimer_select(&i8254_cputimer, 0);
677 }
678
679 static void
i8254_cputimer_construct(struct cputimer * timer,sysclock_t oldclock)680 i8254_cputimer_construct(struct cputimer *timer, sysclock_t oldclock)
681 {
682 int which;
683
684 /*
685 * Should we use timer 1 or timer 2 ?
686 */
687 which = 0;
688 TUNABLE_INT_FETCH("hw.i8254.walltimer", &which);
689 if (which != 1 && which != 2)
690 which = 2;
691
692 switch(which) {
693 case 1:
694 timer->name = "i8254_timer1";
695 timer->type = CPUTIMER_8254_SEL1;
696 i8254_walltimer_sel = TIMER_SEL1;
697 i8254_walltimer_cntr = TIMER_CNTR1;
698 timer1_state = ACQUIRED;
699 break;
700 case 2:
701 timer->name = "i8254_timer2";
702 timer->type = CPUTIMER_8254_SEL2;
703 i8254_walltimer_sel = TIMER_SEL2;
704 i8254_walltimer_cntr = TIMER_CNTR2;
705 timer2_state = ACQUIRED;
706 break;
707 }
708
709 timer->base = (oldclock + 0xFFFF) & 0xFFFFFFFFFFFF0000LU;
710
711 clock_lock();
712 outb(TIMER_MODE, i8254_walltimer_sel | TIMER_RATEGEN | TIMER_16BIT);
713 outb(i8254_walltimer_cntr, 0); /* lsb */
714 outb(i8254_walltimer_cntr, 0); /* msb */
715 outb(IO_PPI, inb(IO_PPI) | 1); /* bit 0: enable gate, bit 1: spkr */
716 clock_unlock();
717 }
718
719 static void
i8254_cputimer_destruct(struct cputimer * timer)720 i8254_cputimer_destruct(struct cputimer *timer)
721 {
722 switch(timer->type) {
723 case CPUTIMER_8254_SEL1:
724 timer1_state = RELEASED;
725 break;
726 case CPUTIMER_8254_SEL2:
727 timer2_state = RELEASED;
728 break;
729 default:
730 break;
731 }
732 timer->type = 0;
733 }
734
735 static void
rtc_restore(void)736 rtc_restore(void)
737 {
738 /* Restore all of the RTC's "status" (actually, control) registers. */
739 writertc(RTC_STATUSB, RTCSB_24HR);
740 writertc(RTC_STATUSA, rtc_statusa);
741 writertc(RTC_STATUSB, rtc_statusb);
742 }
743
744 /*
745 * Restore all the timers.
746 *
747 * This function is called to resynchronize our core timekeeping after a
748 * long halt, e.g. from apm_default_resume() and friends. It is also
749 * called if after a BIOS call we have detected munging of the 8254.
750 * It is necessary because cputimer_count() counter's delta may have grown
751 * too large for nanouptime() and friends to handle, or (in the case of 8254
752 * munging) might cause the SYSTIMER code to prematurely trigger.
753 */
754 void
timer_restore(void)755 timer_restore(void)
756 {
757 crit_enter();
758 if (i8254_cputimer_disable == 0)
759 i8254_restore(); /* restore timer_freq and hz */
760 rtc_restore(); /* reenable RTC interrupts */
761 crit_exit();
762 }
763
764 #define MAX_MEASURE_RETRIES 100
765
766 static u_int64_t
do_measure(u_int64_t timer_latency,u_int64_t * latency,sysclock_t * time,int * retries)767 do_measure(u_int64_t timer_latency, u_int64_t *latency, sysclock_t *time,
768 int *retries)
769 {
770 u_int64_t tsc1, tsc2;
771 u_int64_t threshold;
772 sysclock_t val;
773 int cnt = 0;
774
775 do {
776 if (cnt > MAX_MEASURE_RETRIES/2)
777 threshold = timer_latency << 1;
778 else
779 threshold = timer_latency + (timer_latency >> 2);
780
781 cnt++;
782 tsc1 = rdtsc_ordered();
783 val = sys_cputimer->count();
784 tsc2 = rdtsc_ordered();
785 } while (timer_latency > 0 && cnt < MAX_MEASURE_RETRIES &&
786 tsc2 - tsc1 > threshold);
787
788 *retries = cnt - 1;
789 *latency = tsc2 - tsc1;
790 *time = val;
791 return tsc1;
792 }
793
794 static u_int64_t
do_calibrate_cputimer(u_int usecs,u_int64_t timer_latency)795 do_calibrate_cputimer(u_int usecs, u_int64_t timer_latency)
796 {
797 if (calibrate_tsc_fast) {
798 u_int64_t old_tsc1, start_lat1, new_tsc1, end_lat1;
799 u_int64_t old_tsc2, start_lat2, new_tsc2, end_lat2;
800 u_int64_t freq1, freq2;
801 sysclock_t start1, end1, start2, end2;
802 int retries1, retries2, retries3, retries4;
803
804 DELAY(1000);
805 old_tsc1 = do_measure(timer_latency, &start_lat1, &start1,
806 &retries1);
807 DELAY(20000);
808 old_tsc2 = do_measure(timer_latency, &start_lat2, &start2,
809 &retries2);
810 DELAY(usecs);
811 new_tsc1 = do_measure(timer_latency, &end_lat1, &end1,
812 &retries3);
813 DELAY(20000);
814 new_tsc2 = do_measure(timer_latency, &end_lat2, &end2,
815 &retries4);
816
817 old_tsc1 += start_lat1;
818 old_tsc2 += start_lat2;
819 freq1 = (new_tsc1 - old_tsc1) + (start_lat1 + end_lat1) / 2;
820 freq2 = (new_tsc2 - old_tsc2) + (start_lat2 + end_lat2) / 2;
821 end1 -= start1;
822 end2 -= start2;
823 /* This should in practice be safe from overflows. */
824 freq1 = muldivu64(freq1, sys_cputimer->freq, end1);
825 freq2 = muldivu64(freq2, sys_cputimer->freq, end2);
826 if (calibrate_test && (retries1 > 0 || retries2 > 0)) {
827 kprintf("%s: retries: %d, %d, %d, %d\n",
828 __func__, retries1, retries2, retries3, retries4);
829 }
830 if (calibrate_test) {
831 kprintf("%s: freq1=%ju freq2=%ju avg=%ju\n",
832 __func__, freq1, freq2, (freq1 + freq2) / 2);
833 }
834 return (freq1 + freq2) / 2;
835 } else {
836 u_int64_t old_tsc, new_tsc;
837 u_int64_t freq;
838
839 old_tsc = rdtsc_ordered();
840 DELAY(usecs);
841 new_tsc = rdtsc();
842 freq = new_tsc - old_tsc;
843 /* This should in practice be safe from overflows. */
844 freq = (freq * 1000 * 1000) / usecs;
845 return freq;
846 }
847 }
848
849 /*
850 * Initialize 8254 timer 0 early so that it can be used in DELAY().
851 */
852 void
startrtclock(void)853 startrtclock(void)
854 {
855 const timecounter_init_t **list;
856 sysclock_t delta, freq;
857
858 callout_init_mp(&sysbeepstop_ch);
859
860 /*
861 * Can we use the TSC?
862 *
863 * NOTE: If running under qemu, probably a good idea to force the
864 * TSC because we are not likely to detect it as being
865 * invariant or mpsyncd if you don't. This will greatly
866 * reduce SMP contention.
867 */
868 if (cpu_feature & CPUID_TSC) {
869 tsc_present = 1;
870 TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_invariant);
871
872 if ((cpu_vendor_id == CPU_VENDOR_INTEL ||
873 cpu_vendor_id == CPU_VENDOR_AMD) &&
874 cpu_exthigh >= 0x80000007) {
875 u_int regs[4];
876
877 do_cpuid(0x80000007, regs);
878 if (regs[3] & 0x100)
879 tsc_invariant = 1;
880 }
881 } else {
882 tsc_present = 0;
883 }
884
885 /*
886 * Initial RTC state, don't do anything unexpected
887 */
888 writertc(RTC_STATUSA, rtc_statusa);
889 writertc(RTC_STATUSB, RTCSB_24HR);
890
891 SET_FOREACH(list, timecounter_init_set) {
892 if ((*list)->configure != NULL)
893 (*list)->configure();
894 }
895
896 /*
897 * If tsc_frequency is already initialized now, and a flag is set
898 * that i8254 timer is unneeded, we are done.
899 */
900 if (tsc_frequency != 0 && i8254_cputimer_disable != 0)
901 goto done;
902
903 /*
904 * Set the 8254 timer0 in TIMER_SWSTROBE mode and cause it to
905 * generate an interrupt, which we will ignore for now.
906 *
907 * Set the 8254 timer1 in TIMER_RATEGEN mode and load 0x0000
908 * (so it counts a full 2^16 and repeats). We will use this timer
909 * for our counting.
910 */
911 if (i8254_cputimer_disable == 0)
912 i8254_restore();
913
914 kprintf("Using cputimer %s for TSC calibration\n", sys_cputimer->name);
915
916 /*
917 * When booting without verbose messages, it's pointless to run the
918 * calibrate_clocks() calibration code, when we don't use the
919 * results in any way. With bootverbose, we are at least printing
920 * this information to the kernel log.
921 */
922 if (i8254_cputimer_disable != 0 ||
923 (calibrate_timers_with_rtc == 0 && !bootverbose)) {
924 goto skip_rtc_based;
925 }
926
927 freq = calibrate_clocks();
928 #ifdef CLK_CALIBRATION_LOOP
929 if (bootverbose) {
930 int c;
931
932 cnpoll(TRUE);
933 kprintf("Press a key on the console to "
934 "abort clock calibration\n");
935 while ((c = cncheckc()) == -1 || c == NOKEY)
936 calibrate_clocks();
937 cnpoll(FALSE);
938 }
939 #endif
940
941 /*
942 * Use the calibrated i8254 frequency if it seems reasonable.
943 * Otherwise use the default, and don't use the calibrated i586
944 * frequency.
945 */
946 delta = freq > i8254_cputimer.freq ?
947 freq - i8254_cputimer.freq : i8254_cputimer.freq - freq;
948 if (delta < i8254_cputimer.freq / 100) {
949 if (calibrate_timers_with_rtc == 0) {
950 kprintf(
951 "hw.calibrate_timers_with_rtc not set - using default i8254 frequency\n");
952 freq = i8254_cputimer.freq;
953 }
954 /*
955 * NOTE:
956 * Interrupt timer's freq must be adjusted
957 * before we change the cuptimer's frequency.
958 */
959 i8254_cputimer_intr.freq = freq;
960 cputimer_set_frequency(&i8254_cputimer, freq);
961 } else {
962 if (bootverbose)
963 kprintf("%lu Hz differs from default of %lu Hz "
964 "by more than 1%%\n",
965 freq, i8254_cputimer.freq);
966 tsc_frequency = 0;
967 }
968
969 if (tsc_frequency != 0 && calibrate_timers_with_rtc == 0) {
970 kprintf("hw.calibrate_timers_with_rtc not "
971 "set - using old calibration method\n");
972 tsc_frequency = 0;
973 }
974
975 skip_rtc_based:
976 if (tsc_present && tsc_frequency == 0) {
977 u_int cnt;
978 u_int64_t cputime_latency_tsc = 0, max = 0, min = 0;
979 int i;
980
981 for (i = 0; i < 10; i++) {
982 /* Warm up */
983 (void)sys_cputimer->count();
984 }
985 for (i = 0; i < 100; i++) {
986 u_int64_t old_tsc, new_tsc;
987
988 old_tsc = rdtsc_ordered();
989 (void)sys_cputimer->count();
990 new_tsc = rdtsc_ordered();
991 cputime_latency_tsc += (new_tsc - old_tsc);
992 if (max < (new_tsc - old_tsc))
993 max = new_tsc - old_tsc;
994 if (min == 0 || min > (new_tsc - old_tsc))
995 min = new_tsc - old_tsc;
996 }
997 cputime_latency_tsc /= 100;
998 kprintf(
999 "Timer latency (in TSC ticks): %lu min=%lu max=%lu\n",
1000 cputime_latency_tsc, min, max);
1001 /* XXX Instead of this, properly filter out outliers. */
1002 cputime_latency_tsc = min;
1003
1004 if (calibrate_test > 0) {
1005 u_int64_t values[20], avg = 0;
1006 for (i = 1; i <= 20; i++) {
1007 u_int64_t freq;
1008
1009 freq = do_calibrate_cputimer(i * 100 * 1000,
1010 cputime_latency_tsc);
1011 values[i - 1] = freq;
1012 }
1013 /* Compute an average TSC for the 1s to 2s delays. */
1014 for (i = 10; i < 20; i++)
1015 avg += values[i];
1016 avg /= 10;
1017 for (i = 0; i < 20; i++) {
1018 kprintf("%ums: %lu (Diff from average: %ld)\n",
1019 (i + 1) * 100, values[i],
1020 (int64_t)(values[i] - avg));
1021 }
1022 }
1023
1024 if (calibrate_tsc_fast > 0) {
1025 /* HPET would typically be >10MHz */
1026 if (sys_cputimer->freq >= 10000000)
1027 cnt = 200000;
1028 else
1029 cnt = 500000;
1030 } else {
1031 cnt = 1000000;
1032 }
1033
1034 tsc_frequency = do_calibrate_cputimer(cnt, cputime_latency_tsc);
1035 if (bootverbose && calibrate_timers_with_rtc) {
1036 kprintf("TSC clock: %jd Hz (Method B)\n",
1037 (intmax_t)tsc_frequency);
1038 }
1039 }
1040
1041 done:
1042 if (tsc_present) {
1043 kprintf("TSC%s clock: %jd Hz\n",
1044 tsc_invariant ? " invariant" : "",
1045 (intmax_t)tsc_frequency);
1046 }
1047 tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000;
1048
1049 EVENTHANDLER_REGISTER(shutdown_post_sync, resettodr_on_shutdown,
1050 NULL, SHUTDOWN_PRI_LAST);
1051 }
1052
1053 /*
1054 * Sync the time of day back to the RTC on shutdown, but only if
1055 * we have already loaded it and have not crashed.
1056 */
1057 static void
resettodr_on_shutdown(void * arg __unused)1058 resettodr_on_shutdown(void *arg __unused)
1059 {
1060 if (rtc_loaded && panicstr == NULL) {
1061 resettodr();
1062 }
1063 }
1064
1065 /*
1066 * Initialize the time of day register, based on the time base which is, e.g.
1067 * from a filesystem.
1068 */
1069 void
inittodr(time_t base)1070 inittodr(time_t base)
1071 {
1072 unsigned long sec, days;
1073 int year, month;
1074 int y, m;
1075 struct timespec ts;
1076
1077 if (base) {
1078 ts.tv_sec = base;
1079 ts.tv_nsec = 0;
1080 set_timeofday(&ts);
1081 }
1082
1083 /* Look if we have a RTC present and the time is valid */
1084 if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
1085 goto wrong_time;
1086
1087 /* wait for time update to complete */
1088 /* If RTCSA_TUP is zero, we have at least 244us before next update */
1089 crit_enter();
1090 while (rtcin(RTC_STATUSA) & RTCSA_TUP) {
1091 crit_exit();
1092 crit_enter();
1093 }
1094
1095 days = 0;
1096 #ifdef USE_RTC_CENTURY
1097 year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100;
1098 #else
1099 year = readrtc(RTC_YEAR) + 1900;
1100 if (year < 1970)
1101 year += 100;
1102 #endif
1103 if (year < 1970) {
1104 crit_exit();
1105 goto wrong_time;
1106 }
1107 month = readrtc(RTC_MONTH);
1108 for (m = 1; m < month; m++)
1109 days += daysinmonth[m-1];
1110 if ((month > 2) && LEAPYEAR(year))
1111 days ++;
1112 days += readrtc(RTC_DAY) - 1;
1113 for (y = 1970; y < year; y++)
1114 days += DAYSPERYEAR + LEAPYEAR(y);
1115 sec = ((( days * 24 +
1116 readrtc(RTC_HRS)) * 60 +
1117 readrtc(RTC_MIN)) * 60 +
1118 readrtc(RTC_SEC));
1119 /* sec now contains the number of seconds, since Jan 1 1970,
1120 in the local time zone */
1121
1122 sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
1123
1124 y = (int)(time_second - sec);
1125 if (y <= -2 || y >= 2) {
1126 /* badly off, adjust it */
1127 ts.tv_sec = sec;
1128 ts.tv_nsec = 0;
1129 set_timeofday(&ts);
1130 }
1131 rtc_loaded = 1;
1132 crit_exit();
1133 return;
1134
1135 wrong_time:
1136 kprintf("Invalid time in real time clock.\n");
1137 kprintf("Check and reset the date immediately!\n");
1138 }
1139
1140 /*
1141 * Write system time back to RTC
1142 */
1143 void
resettodr(void)1144 resettodr(void)
1145 {
1146 struct timeval tv;
1147 unsigned long tm;
1148 int m;
1149 int y;
1150
1151 if (disable_rtc_set)
1152 return;
1153
1154 microtime(&tv);
1155 tm = tv.tv_sec;
1156
1157 crit_enter();
1158 /* Disable RTC updates and interrupts. */
1159 writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR);
1160
1161 /* Calculate local time to put in RTC */
1162
1163 tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
1164
1165 writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */
1166 writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */
1167 writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24; /* Write back Hours */
1168
1169 /* We have now the days since 01-01-1970 in tm */
1170 writertc(RTC_WDAY, (tm+4)%7); /* Write back Weekday */
1171 for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y);
1172 tm >= m;
1173 y++, m = DAYSPERYEAR + LEAPYEAR(y))
1174 tm -= m;
1175
1176 /* Now we have the years in y and the day-of-the-year in tm */
1177 writertc(RTC_YEAR, bin2bcd(y%100)); /* Write back Year */
1178 #ifdef USE_RTC_CENTURY
1179 writertc(RTC_CENTURY, bin2bcd(y/100)); /* ... and Century */
1180 #endif
1181 for (m = 0; ; m++) {
1182 int ml;
1183
1184 ml = daysinmonth[m];
1185 if (m == 1 && LEAPYEAR(y))
1186 ml++;
1187 if (tm < ml)
1188 break;
1189 tm -= ml;
1190 }
1191
1192 writertc(RTC_MONTH, bin2bcd(m + 1)); /* Write back Month */
1193 writertc(RTC_DAY, bin2bcd(tm + 1)); /* Write back Month Day */
1194
1195 /* Reenable RTC updates and interrupts. */
1196 writertc(RTC_STATUSB, rtc_statusb);
1197 crit_exit();
1198 }
1199
1200 static int
i8254_ioapic_trial(int irq,struct cputimer_intr * cti)1201 i8254_ioapic_trial(int irq, struct cputimer_intr *cti)
1202 {
1203 sysclock_t base;
1204 long lastcnt;
1205
1206 /*
1207 * Following code assumes the 8254 is the cpu timer,
1208 * so make sure it is.
1209 */
1210 /*KKASSERT(sys_cputimer == &i8254_cputimer); (tested by CuteLarva) */
1211 KKASSERT(cti == &i8254_cputimer_intr);
1212
1213 lastcnt = get_interrupt_counter(irq, mycpuid);
1214
1215 /*
1216 * Force an 8254 Timer0 interrupt and wait 1/100s for
1217 * it to happen, then see if we got it.
1218 */
1219 kprintf("IOAPIC: testing 8254 interrupt delivery...");
1220
1221 i8254_intr_reload(cti, sys_cputimer->fromus(2));
1222 base = sys_cputimer->count();
1223 while (sys_cputimer->count() - base < sys_cputimer->freq / 100)
1224 ; /* nothing */
1225
1226 if (get_interrupt_counter(irq, mycpuid) - lastcnt == 0) {
1227 kprintf(" failed\n");
1228 return ENOENT;
1229 } else {
1230 kprintf(" success\n");
1231 }
1232 return 0;
1233 }
1234
1235 /*
1236 * Start both clocks running. DragonFly note: the stat clock is no longer
1237 * used. Instead, 8254 based systimers are used for all major clock
1238 * interrupts.
1239 */
1240 static void
i8254_intr_initclock(struct cputimer_intr * cti,boolean_t selected)1241 i8254_intr_initclock(struct cputimer_intr *cti, boolean_t selected)
1242 {
1243 void *clkdesc = NULL;
1244 int irq = 0, mixed_mode = 0, error;
1245
1246 KKASSERT(mycpuid == 0);
1247
1248 if (!selected && i8254_intr_disable)
1249 goto nointr;
1250
1251 /*
1252 * The stat interrupt mask is different without the
1253 * statistics clock. Also, don't set the interrupt
1254 * flag which would normally cause the RTC to generate
1255 * interrupts.
1256 */
1257 rtc_statusb = RTCSB_24HR;
1258
1259 /* Finish initializing 8254 timer 0. */
1260 if (ioapic_enable) {
1261 irq = machintr_legacy_intr_find(0, INTR_TRIGGER_EDGE,
1262 INTR_POLARITY_HIGH);
1263 if (irq < 0) {
1264 mixed_mode_setup:
1265 error = ioapic_conf_legacy_extint(0);
1266 if (!error) {
1267 irq = machintr_legacy_intr_find(0,
1268 INTR_TRIGGER_EDGE, INTR_POLARITY_HIGH);
1269 if (irq < 0)
1270 error = ENOENT;
1271 }
1272
1273 if (error) {
1274 if (!selected) {
1275 kprintf("IOAPIC: setup mixed mode for "
1276 "irq 0 failed: %d\n", error);
1277 goto nointr;
1278 } else {
1279 panic("IOAPIC: setup mixed mode for "
1280 "irq 0 failed: %d\n", error);
1281 }
1282 }
1283 mixed_mode = 1;
1284 }
1285 clkdesc = register_int(irq, clkintr, NULL, "clk",
1286 NULL,
1287 INTR_EXCL | INTR_CLOCK |
1288 INTR_NOPOLL | INTR_MPSAFE |
1289 INTR_NOENTROPY, 0);
1290 } else {
1291 register_int(0, clkintr, NULL, "clk", NULL,
1292 INTR_EXCL | INTR_CLOCK |
1293 INTR_NOPOLL | INTR_MPSAFE |
1294 INTR_NOENTROPY, 0);
1295 }
1296
1297 /* Initialize RTC. */
1298 writertc(RTC_STATUSA, rtc_statusa);
1299 writertc(RTC_STATUSB, RTCSB_24HR);
1300
1301 if (ioapic_enable) {
1302 error = i8254_ioapic_trial(irq, cti);
1303 if (error) {
1304 if (mixed_mode) {
1305 if (!selected) {
1306 kprintf("IOAPIC: mixed mode for irq %d "
1307 "trial failed: %d\n",
1308 irq, error);
1309 goto nointr;
1310 } else {
1311 panic("IOAPIC: mixed mode for irq %d "
1312 "trial failed: %d\n", irq, error);
1313 }
1314 } else {
1315 kprintf("IOAPIC: warning 8254 is not connected "
1316 "to the correct pin, try mixed mode\n");
1317 unregister_int(clkdesc, 0);
1318 goto mixed_mode_setup;
1319 }
1320 }
1321 }
1322 return;
1323
1324 nointr:
1325 i8254_nointr = 1; /* don't try to register again */
1326 cputimer_intr_deregister(cti);
1327 }
1328
1329 void
setstatclockrate(int newhz)1330 setstatclockrate(int newhz)
1331 {
1332 if (newhz == RTC_PROFRATE)
1333 rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF;
1334 else
1335 rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
1336 writertc(RTC_STATUSA, rtc_statusa);
1337 }
1338
1339 #if 0
1340 static unsigned
1341 tsc_get_timecount(struct timecounter *tc)
1342 {
1343 return (rdtsc());
1344 }
1345 #endif
1346
1347 #ifdef KERN_TIMESTAMP
1348 #define KERN_TIMESTAMP_SIZE 16384
1349 static u_long tsc[KERN_TIMESTAMP_SIZE] ;
1350 SYSCTL_OPAQUE(_debug, OID_AUTO, timestamp, CTLFLAG_RD, tsc,
1351 sizeof(tsc), "LU", "Kernel timestamps");
1352 void
_TSTMP(u_int32_t x)1353 _TSTMP(u_int32_t x)
1354 {
1355 static int i;
1356
1357 tsc[i] = (u_int32_t)rdtsc();
1358 tsc[i+1] = x;
1359 i = i + 2;
1360 if (i >= KERN_TIMESTAMP_SIZE)
1361 i = 0;
1362 tsc[i] = 0; /* mark last entry */
1363 }
1364 #endif /* KERN_TIMESTAMP */
1365
1366 /*
1367 *
1368 */
1369
1370 static int
hw_i8254_timestamp(SYSCTL_HANDLER_ARGS)1371 hw_i8254_timestamp(SYSCTL_HANDLER_ARGS)
1372 {
1373 sysclock_t count;
1374 uint64_t tscval;
1375 char buf[32];
1376
1377 crit_enter();
1378 if (sys_cputimer == &i8254_cputimer)
1379 count = sys_cputimer->count();
1380 else
1381 count = 0;
1382 if (tsc_present)
1383 tscval = rdtsc();
1384 else
1385 tscval = 0;
1386 crit_exit();
1387 ksnprintf(buf, sizeof(buf), "%016lx %016lx", count, tscval);
1388 return(SYSCTL_OUT(req, buf, strlen(buf) + 1));
1389 }
1390
1391 struct tsc_mpsync_info {
1392 volatile int tsc_ready_cnt;
1393 volatile int tsc_done_cnt;
1394 volatile int tsc_command;
1395 volatile int unused01[5];
1396 struct {
1397 uint64_t v;
1398 uint64_t unused02;
1399 } tsc_saved[MAXCPU];
1400 } __cachealign;
1401
1402 #if 0
1403 static void
1404 tsc_mpsync_test_loop(struct tsc_mpsync_thr *info)
1405 {
1406 struct globaldata *gd = mycpu;
1407 tsc_uclock_t test_end, test_begin;
1408 u_int i;
1409
1410 if (bootverbose) {
1411 kprintf("cpu%d: TSC testing MP synchronization ...\n",
1412 gd->gd_cpuid);
1413 }
1414
1415 test_begin = rdtsc_ordered();
1416 /* Run test for 100ms */
1417 test_end = test_begin + (tsc_frequency / 10);
1418
1419 arg->tsc_mpsync = 1;
1420 arg->tsc_target = test_begin;
1421
1422 #define TSC_TEST_TRYMAX 1000000 /* Make sure we could stop */
1423 #define TSC_TEST_TRYMIN 50000
1424
1425 for (i = 0; i < TSC_TEST_TRYMAX; ++i) {
1426 struct lwkt_cpusync cs;
1427
1428 crit_enter();
1429 lwkt_cpusync_init(&cs, gd->gd_other_cpus,
1430 tsc_mpsync_test_remote, arg);
1431 lwkt_cpusync_interlock(&cs);
1432 cpu_pause();
1433 arg->tsc_target = rdtsc_ordered();
1434 cpu_mfence();
1435 lwkt_cpusync_deinterlock(&cs);
1436 crit_exit();
1437 cpu_pause();
1438
1439 if (!arg->tsc_mpsync) {
1440 kprintf("cpu%d: TSC is not MP synchronized @%u\n",
1441 gd->gd_cpuid, i);
1442 break;
1443 }
1444 if (arg->tsc_target > test_end && i >= TSC_TEST_TRYMIN)
1445 break;
1446 }
1447
1448 #undef TSC_TEST_TRYMIN
1449 #undef TSC_TEST_TRYMAX
1450
1451 if (arg->tsc_target == test_begin) {
1452 kprintf("cpu%d: TSC does not tick?!\n", gd->gd_cpuid);
1453 /* XXX disable TSC? */
1454 tsc_invariant = 0;
1455 arg->tsc_mpsync = 0;
1456 return;
1457 }
1458
1459 if (arg->tsc_mpsync && bootverbose) {
1460 kprintf("cpu%d: TSC is MP synchronized after %u tries\n",
1461 gd->gd_cpuid, i);
1462 }
1463 }
1464
1465 #endif
1466
1467 #define TSC_TEST_COUNT 50000
1468
1469 static void
tsc_mpsync_ap_thread(void * xinfo)1470 tsc_mpsync_ap_thread(void *xinfo)
1471 {
1472 struct tsc_mpsync_info *info = xinfo;
1473 int cpu = mycpuid;
1474 int i;
1475
1476 /*
1477 * Tell main loop that we are ready and wait for initiation
1478 */
1479 atomic_add_int(&info->tsc_ready_cnt, 1);
1480 while (info->tsc_command == 0) {
1481 lwkt_force_switch();
1482 }
1483
1484 /*
1485 * Run test for 10000 loops or until tsc_done_cnt != 0 (another
1486 * cpu has finished its test), then increment done.
1487 */
1488 crit_enter();
1489 for (i = 0; i < TSC_TEST_COUNT && info->tsc_done_cnt == 0; ++i) {
1490 info->tsc_saved[cpu].v = rdtsc_ordered();
1491 }
1492 crit_exit();
1493 atomic_add_int(&info->tsc_done_cnt, 1);
1494
1495 lwkt_exit();
1496 }
1497
1498 static void
tsc_mpsync_test(void)1499 tsc_mpsync_test(void)
1500 {
1501 enum { TSCOK, TSCNEG, TSCSPAN } error = TSCOK;
1502 int cpu;
1503 int try;
1504
1505 if (!tsc_invariant) {
1506 /* Not even invariant TSC */
1507 kprintf("TSC is not invariant, "
1508 "no further tests will be performed\n");
1509 return;
1510 }
1511
1512 if (ncpus == 1) {
1513 /* Only one CPU */
1514 tsc_mpsync = 1;
1515 return;
1516 }
1517
1518 /*
1519 * Forcing can be used w/qemu to reduce contention
1520 */
1521 TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_mpsync);
1522
1523 if (tsc_mpsync == 0) {
1524 switch (cpu_vendor_id) {
1525 case CPU_VENDOR_INTEL:
1526 /*
1527 * Intel probably works
1528 */
1529 break;
1530
1531 case CPU_VENDOR_AMD:
1532 /*
1533 * For AMD 15h and 16h (i.e. The Bulldozer and Jaguar
1534 * architectures) we have to watch out for
1535 * Erratum 778:
1536 * "Processor Core Time Stamp Counters May
1537 * Experience Drift"
1538 * This Erratum is only listed for cpus in Family
1539 * 15h < Model 30h and for 16h < Model 30h.
1540 *
1541 * AMD < Bulldozer probably doesn't work
1542 */
1543 if (CPUID_TO_FAMILY(cpu_id) == 0x15 ||
1544 CPUID_TO_FAMILY(cpu_id) == 0x16) {
1545 if (CPUID_TO_MODEL(cpu_id) < 0x30)
1546 return;
1547 } else if (CPUID_TO_FAMILY(cpu_id) < 0x17) {
1548 return;
1549 }
1550 break;
1551
1552 default:
1553 /* probably won't work */
1554 return;
1555 }
1556 } else if (tsc_mpsync < 0) {
1557 kprintf("TSC MP synchronization test is disabled\n");
1558 tsc_mpsync = 0;
1559 return;
1560 }
1561
1562 /*
1563 * Test even if forced to 1 above. If forced, we will use the TSC
1564 * even if the test fails. (set forced to -1 to disable entirely).
1565 */
1566 kprintf("TSC testing MP synchronization ...\n");
1567 kprintf("TSC testing MP: NOTE! CPU pwrsave will inflate latencies!\n");
1568
1569 /*
1570 * Test that the TSC is monotonically increasing across CPU
1571 * switches. Otherwise time will get really messed up if the
1572 * TSC is selected as the timebase.
1573 *
1574 * Test 4 times
1575 */
1576 for (try = 0; tsc_frequency && try < 4; ++try) {
1577 tsc_uclock_t last;
1578 tsc_uclock_t next;
1579 tsc_sclock_t delta;
1580 tsc_sclock_t lo_delta = 0x7FFFFFFFFFFFFFFFLL;
1581 tsc_sclock_t hi_delta = -0x7FFFFFFFFFFFFFFFLL;
1582
1583 last = rdtsc();
1584 for (cpu = 0; cpu < ncpus; ++cpu) {
1585 lwkt_migratecpu(cpu);
1586 next = rdtsc();
1587 if (cpu == 0) {
1588 last = next;
1589 continue;
1590 }
1591
1592 delta = next - last;
1593 if (delta < 0) {
1594 kprintf("TSC cpu-delta NEGATIVE: "
1595 "cpu %d to %d (%ld)\n",
1596 cpu - 1, cpu, delta);
1597 error = TSCNEG;
1598 }
1599 if (lo_delta > delta)
1600 lo_delta = delta;
1601 if (hi_delta < delta)
1602 hi_delta = delta;
1603 last = next;
1604 }
1605 last = rdtsc();
1606 for (cpu = ncpus - 2; cpu >= 0; --cpu) {
1607 lwkt_migratecpu(cpu);
1608 next = rdtsc();
1609 delta = next - last;
1610 if (delta <= 0) {
1611 kprintf("TSC cpu-delta WAS NEGATIVE! "
1612 "cpu %d to %d (%ld)\n",
1613 cpu + 1, cpu, delta);
1614 error = TSCNEG;
1615 }
1616 if (lo_delta > delta)
1617 lo_delta = delta;
1618 if (hi_delta < delta)
1619 hi_delta = delta;
1620 last = next;
1621 }
1622 kprintf("TSC cpu-delta test complete, %ldnS to %ldnS ",
1623 muldivu64(lo_delta, 1000000000, tsc_frequency),
1624 muldivu64(hi_delta, 1000000000, tsc_frequency));
1625 if (error != TSCOK) {
1626 kprintf("FAILURE\n");
1627 break;
1628 }
1629 kprintf("SUCCESS\n");
1630 }
1631
1632 /*
1633 * Test TSC MP synchronization on APs.
1634 *
1635 * Test 4 times.
1636 */
1637 for (try = 0; tsc_frequency && try < 4; ++try) {
1638 struct tsc_mpsync_info info;
1639 uint64_t last;
1640 int64_t xworst;
1641 int64_t xdelta;
1642 int64_t delta;
1643
1644 bzero(&info, sizeof(info));
1645
1646 for (cpu = 0; cpu < ncpus; ++cpu) {
1647 thread_t td;
1648 lwkt_create(tsc_mpsync_ap_thread, &info, &td,
1649 NULL, TDF_NOSTART, cpu,
1650 "tsc mpsync %d", cpu);
1651 lwkt_setpri_initial(td, curthread->td_pri);
1652 lwkt_schedule(td);
1653 }
1654 while (info.tsc_ready_cnt != ncpus)
1655 lwkt_force_switch();
1656
1657 /*
1658 * All threads are ready, start the test and wait for
1659 * completion.
1660 */
1661 info.tsc_command = 1;
1662 while (info.tsc_done_cnt != ncpus)
1663 lwkt_force_switch();
1664
1665 /*
1666 * Process results
1667 */
1668 last = info.tsc_saved[0].v;
1669 delta = 0;
1670 xworst = 0;
1671 for (cpu = 0; cpu < ncpus; ++cpu) {
1672 xdelta = (int64_t)(info.tsc_saved[cpu].v - last);
1673 last = info.tsc_saved[cpu].v;
1674 if (xdelta < 0)
1675 xdelta = -xdelta;
1676 if (xworst < xdelta)
1677 xworst = xdelta;
1678 delta += xdelta;
1679
1680 }
1681
1682 /*
1683 * Result from attempt. Break-out if we succeeds, otherwise
1684 * try again (up to 4 times). This might be in a VM so we
1685 * need to be robust.
1686 */
1687 kprintf("TSC cpu concurrency test complete, worst=%ldns, "
1688 "avg=%ldns ",
1689 muldivu64(xworst, 1000000000, tsc_frequency),
1690 muldivu64(delta / ncpus, 1000000000, tsc_frequency));
1691 if (delta / ncpus > tsc_frequency / 100) {
1692 kprintf("FAILURE\n");
1693 }
1694 if (delta / ncpus < tsc_frequency / 100000) {
1695 kprintf("SUCCESS\n");
1696 if (error == TSCOK)
1697 tsc_mpsync = 1;
1698 break;
1699 }
1700 kprintf("INDETERMINATE\n");
1701 }
1702
1703 if (tsc_mpsync)
1704 kprintf("TSC is MP synchronized\n");
1705 else
1706 kprintf("TSC is not MP synchronized\n");
1707 }
1708 SYSINIT(tsc_mpsync, SI_BOOT2_FINISH_SMP, SI_ORDER_ANY, tsc_mpsync_test, NULL);
1709
1710 static SYSCTL_NODE(_hw, OID_AUTO, i8254, CTLFLAG_RW, 0, "I8254");
1711 SYSCTL_UINT(_hw_i8254, OID_AUTO, freq, CTLFLAG_RD, &i8254_cputimer.freq, 0,
1712 "frequency");
1713 SYSCTL_PROC(_hw_i8254, OID_AUTO, timestamp, CTLTYPE_STRING|CTLFLAG_RD,
1714 0, 0, hw_i8254_timestamp, "A", "");
1715
1716 SYSCTL_INT(_hw, OID_AUTO, tsc_present, CTLFLAG_RD,
1717 &tsc_present, 0, "TSC Available");
1718 SYSCTL_INT(_hw, OID_AUTO, tsc_invariant, CTLFLAG_RD,
1719 &tsc_invariant, 0, "Invariant TSC");
1720 SYSCTL_INT(_hw, OID_AUTO, tsc_mpsync, CTLFLAG_RD,
1721 &tsc_mpsync, 0, "TSC is synchronized across CPUs");
1722 SYSCTL_QUAD(_hw, OID_AUTO, tsc_frequency, CTLFLAG_RD,
1723 &tsc_frequency, 0, "TSC Frequency");
1724