1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * Copyright (c) 2008-2021 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * William Jolitz and Don Ahn. 7 * 8 * This code is derived from software contributed to The DragonFly Project 9 * by Matthew Dillon <dillon@backplane.com> 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * from: @(#)clock.c 7.2 (Berkeley) 5/12/91 36 * $FreeBSD: src/sys/i386/isa/clock.c,v 1.149.2.6 2002/11/02 04:41:50 iwasaki Exp $ 37 */ 38 39 /* 40 * Routines to handle clock hardware. 41 */ 42 43 /* 44 * inittodr, settodr and support routines written 45 * by Christoph Robitschko <chmr@edvz.tu-graz.ac.at> 46 * 47 * reintroduced and updated by Chris Stenton <chris@gnome.co.uk> 8/10/94 48 */ 49 50 #if 0 51 #include "opt_clock.h" 52 #endif 53 54 #include <sys/param.h> 55 #include <sys/systm.h> 56 #include <sys/eventhandler.h> 57 #include <sys/time.h> 58 #include <sys/kernel.h> 59 #include <sys/bus.h> 60 #include <sys/sysctl.h> 61 #include <sys/cons.h> 62 #include <sys/kbio.h> 63 #include <sys/systimer.h> 64 #include <sys/globaldata.h> 65 #include <sys/machintr.h> 66 #include <sys/interrupt.h> 67 68 #include <sys/thread2.h> 69 70 #include <machine/clock.h> 71 #include <machine/cputypes.h> 72 #include <machine/frame.h> 73 #include <machine/ipl.h> 74 #include <machine/limits.h> 75 #include <machine/md_var.h> 76 #include <machine/psl.h> 77 #include <machine/segments.h> 78 #include <machine/smp.h> 79 #include <machine/specialreg.h> 80 #include <machine/intr_machdep.h> 81 82 #include <machine_base/apic/ioapic.h> 83 #include <machine_base/apic/ioapic_abi.h> 84 #include <machine_base/icu/icu.h> 85 #include <bus/isa/isa.h> 86 #include <bus/isa/rtc.h> 87 #include <machine_base/isa/timerreg.h> 88 89 SET_DECLARE(timecounter_init_set, const timecounter_init_t); 90 TIMECOUNTER_INIT(placeholder, NULL); 91 92 static void i8254_restore(void); 93 static void resettodr_on_shutdown(void *arg __unused); 94 95 /* 96 * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we 97 * can use a simple formula for leap years. 98 */ 99 #define LEAPYEAR(y) ((u_int)(y) % 4 == 0) 100 #define DAYSPERYEAR (31+28+31+30+31+30+31+31+30+31+30+31) 101 102 #ifndef TIMER_FREQ 103 #define TIMER_FREQ 1193182 104 #endif 105 106 static uint8_t i8254_walltimer_sel; 107 static uint16_t i8254_walltimer_cntr; 108 static int timer0_running; 109 110 int adjkerntz; /* local offset from GMT in seconds */ 111 int disable_rtc_set; /* disable resettodr() if != 0 */ 112 int tsc_present; 113 int tsc_invariant; 114 int tsc_mpsync; 115 int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */ 116 tsc_uclock_t tsc_frequency; 117 tsc_uclock_t tsc_oneus_approx; /* always at least 1, approx only */ 118 119 enum tstate { RELEASED, ACQUIRED }; 120 static enum tstate timer0_state; 121 static enum tstate timer1_state; 122 static enum tstate timer2_state; 123 124 int i8254_cputimer_disable; /* No need to initialize i8254 cputimer. */ 125 126 static int beeping = 0; 127 static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; 128 static u_char rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; 129 static u_char rtc_statusb = RTCSB_24HR | RTCSB_PINTR; 130 static int rtc_loaded; 131 132 static sysclock_t i8254_cputimer_div; 133 134 static int i8254_nointr; 135 static int i8254_intr_disable = 1; 136 TUNABLE_INT("hw.i8254.intr_disable", &i8254_intr_disable); 137 138 static int calibrate_timers_with_rtc = 0; 139 TUNABLE_INT("hw.calibrate_timers_with_rtc", &calibrate_timers_with_rtc); 140 141 static int calibrate_tsc_fast = 1; 142 TUNABLE_INT("hw.calibrate_tsc_fast", &calibrate_tsc_fast); 143 144 static int calibrate_test; 145 TUNABLE_INT("hw.tsc_calibrate_test", &calibrate_test); 146 147 static struct callout sysbeepstop_ch; 148 149 static sysclock_t i8254_cputimer_count(void); 150 static void i8254_cputimer_construct(struct cputimer *cputimer, sysclock_t last); 151 static void i8254_cputimer_destruct(struct cputimer *cputimer); 152 153 static struct cputimer i8254_cputimer = { 154 .next = SLIST_ENTRY_INITIALIZER, 155 .name = "i8254", 156 .pri = CPUTIMER_PRI_8254, 157 .type = 0, /* determined later */ 158 .count = i8254_cputimer_count, 159 .fromhz = cputimer_default_fromhz, 160 .fromus = cputimer_default_fromus, 161 .construct = i8254_cputimer_construct, 162 .destruct = i8254_cputimer_destruct, 163 .freq = TIMER_FREQ 164 }; 165 166 static void i8254_intr_reload(struct cputimer_intr *, sysclock_t); 167 static void i8254_intr_config(struct cputimer_intr *, const struct cputimer *); 168 static void i8254_intr_initclock(struct cputimer_intr *, boolean_t); 169 170 static struct cputimer_intr i8254_cputimer_intr = { 171 .freq = TIMER_FREQ, 172 .reload = i8254_intr_reload, 173 .enable = cputimer_intr_default_enable, 174 .config = i8254_intr_config, 175 .restart = cputimer_intr_default_restart, 176 .pmfixup = cputimer_intr_default_pmfixup, 177 .initclock = i8254_intr_initclock, 178 .pcpuhand = NULL, 179 .next = SLIST_ENTRY_INITIALIZER, 180 .name = "i8254", 181 .type = CPUTIMER_INTR_8254, 182 .prio = CPUTIMER_INTR_PRIO_8254, 183 .caps = CPUTIMER_INTR_CAP_PS, 184 .priv = NULL 185 }; 186 187 /* 188 * Use this to lwkt_switch() when the scheduler clock is not 189 * yet running, otherwise lwkt_switch() won't do anything. 190 * XXX needs cleaning up in lwkt_thread.c 191 */ 192 static void 193 lwkt_force_switch(void) 194 { 195 crit_enter(); 196 lwkt_schedulerclock(curthread); 197 crit_exit(); 198 lwkt_switch(); 199 } 200 201 /* 202 * timer0 clock interrupt. Timer0 is in one-shot mode and has stopped 203 * counting as of this interrupt. We use timer1 in free-running mode (not 204 * generating any interrupts) as our main counter. Each cpu has timeouts 205 * pending. 206 * 207 * This code is INTR_MPSAFE and may be called without the BGL held. 208 */ 209 static void 210 clkintr(void *dummy, void *frame_arg) 211 { 212 static sysclock_t sysclock_count; /* NOTE! Must be static */ 213 struct globaldata *gd = mycpu; 214 struct globaldata *gscan; 215 int n; 216 217 /* 218 * SWSTROBE mode is a one-shot, the timer is no longer running 219 */ 220 timer0_running = 0; 221 222 /* 223 * XXX the dispatcher needs work. right now we call systimer_intr() 224 * directly or via IPI for any cpu with systimers queued, which is 225 * usually *ALL* of them. We need to use the LAPIC timer for this. 226 */ 227 sysclock_count = sys_cputimer->count(); 228 for (n = 0; n < ncpus; ++n) { 229 gscan = globaldata_find(n); 230 if (TAILQ_FIRST(&gscan->gd_systimerq) == NULL) 231 continue; 232 if (gscan != gd) { 233 lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr, 234 &sysclock_count, 1); 235 } else { 236 systimer_intr(&sysclock_count, 0, frame_arg); 237 } 238 } 239 } 240 241 242 /* 243 * NOTE! not MP safe. 244 */ 245 int 246 acquire_timer2(int mode) 247 { 248 if (timer2_state != RELEASED) 249 return (-1); 250 timer2_state = ACQUIRED; 251 252 /* 253 * This access to the timer registers is as atomic as possible 254 * because it is a single instruction. We could do better if we 255 * knew the rate. 256 */ 257 outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f)); 258 return (0); 259 } 260 261 int 262 release_timer2(void) 263 { 264 if (timer2_state != ACQUIRED) 265 return (-1); 266 outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT); 267 timer2_state = RELEASED; 268 return (0); 269 } 270 271 #include "opt_ddb.h" 272 #ifdef DDB 273 #include <ddb/ddb.h> 274 275 DB_SHOW_COMMAND(rtc, rtc) 276 { 277 kprintf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n", 278 rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY), 279 rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC), 280 rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR)); 281 } 282 #endif /* DDB */ 283 284 /* 285 * Return the current cpu timer count as a 32 bit integer. 286 */ 287 static 288 sysclock_t 289 i8254_cputimer_count(void) 290 { 291 static uint16_t cputimer_last; 292 uint16_t count; 293 sysclock_t ret; 294 295 clock_lock(); 296 outb(TIMER_MODE, i8254_walltimer_sel | TIMER_LATCH); 297 count = (uint8_t)inb(i8254_walltimer_cntr); /* get countdown */ 298 count |= ((uint8_t)inb(i8254_walltimer_cntr) << 8); 299 count = -count; /* -> countup */ 300 if (count < cputimer_last) /* rollover */ 301 i8254_cputimer.base += 0x00010000U; 302 ret = i8254_cputimer.base | count; 303 cputimer_last = count; 304 clock_unlock(); 305 306 return(ret); 307 } 308 309 /* 310 * This function is called whenever the system timebase changes, allowing 311 * us to calculate what is needed to convert a system timebase tick 312 * into an 8254 tick for the interrupt timer. If we can convert to a 313 * simple shift, multiplication, or division, we do so. Otherwise 64 314 * bit arithmatic is required every time the interrupt timer is reloaded. 315 */ 316 static void 317 i8254_intr_config(struct cputimer_intr *cti, const struct cputimer *timer) 318 { 319 sysclock_t freq; 320 sysclock_t div; 321 322 /* 323 * Will a simple divide do the trick? 324 */ 325 div = (timer->freq + (cti->freq / 2)) / cti->freq; 326 freq = cti->freq * div; 327 328 if (freq >= timer->freq - 1 && freq <= timer->freq + 1) 329 i8254_cputimer_div = div; 330 else 331 i8254_cputimer_div = 0; 332 } 333 334 /* 335 * Reload for the next timeout. It is possible for the reload value 336 * to be 0 or negative, indicating that an immediate timer interrupt 337 * is desired. For now make the minimum 2 ticks. 338 * 339 * We may have to convert from the system timebase to the 8254 timebase. 340 */ 341 static void 342 i8254_intr_reload(struct cputimer_intr *cti, sysclock_t reload) 343 { 344 uint16_t count; 345 346 if ((ssysclock_t)reload < 0) 347 reload = 1; 348 if (i8254_cputimer_div) 349 reload /= i8254_cputimer_div; 350 else 351 reload = muldivu64(reload, cti->freq, sys_cputimer->freq); 352 353 if (reload < 2) 354 reload = 2; /* minimum count */ 355 if (reload > 0xFFFF) 356 reload = 0xFFFF; /* almost full count (0 is full count) */ 357 358 clock_lock(); 359 if (timer0_running) { 360 outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); /* count-down timer */ 361 count = (uint8_t)inb(TIMER_CNTR0); /* lsb */ 362 count |= ((uint8_t)inb(TIMER_CNTR0) << 8); /* msb */ 363 if (reload < count) { 364 outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT); 365 outb(TIMER_CNTR0, (uint8_t)reload); /* lsb */ 366 outb(TIMER_CNTR0, (uint8_t)(reload >> 8)); /* msb */ 367 } 368 } else { 369 timer0_running = 1; 370 outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT); 371 outb(TIMER_CNTR0, (uint8_t)reload); /* lsb */ 372 outb(TIMER_CNTR0, (uint8_t)(reload >> 8)); /* msb */ 373 } 374 clock_unlock(); 375 } 376 377 /* 378 * DELAY(usec) - Spin for the specified number of microseconds. 379 * DRIVERSLEEP(usec) - Spin for the specified number of microseconds, 380 * but do a thread switch in the loop 381 * 382 * Relies on timer 1 counting down from (cputimer_freq / hz) 383 * Note: timer had better have been programmed before this is first used! 384 */ 385 static void 386 DODELAY(int n, int doswitch) 387 { 388 ssysclock_t delta, ticks_left; 389 sysclock_t prev_tick, tick; 390 391 #ifdef DELAYDEBUG 392 int getit_calls = 1; 393 int n1; 394 static int state = 0; 395 396 if (state == 0) { 397 state = 1; 398 for (n1 = 1; n1 <= 10000000; n1 *= 10) 399 DELAY(n1); 400 state = 2; 401 } 402 if (state == 1) 403 kprintf("DELAY(%d)...", n); 404 #endif 405 /* 406 * Guard against the timer being uninitialized if we are called 407 * early for console i/o. 408 */ 409 if (timer0_state == RELEASED && i8254_cputimer_disable == 0) 410 i8254_restore(); 411 412 /* 413 * Read the counter first, so that the rest of the setup overhead is 414 * counted. Then calculate the number of hardware timer ticks 415 * required, rounding up to be sure we delay at least the requested 416 * number of microseconds. 417 */ 418 prev_tick = sys_cputimer->count(); 419 ticks_left = muldivu64(n, sys_cputimer->freq + 999999, 1000000); 420 421 /* 422 * Loop until done. 423 */ 424 while (ticks_left > 0) { 425 tick = sys_cputimer->count(); 426 #ifdef DELAYDEBUG 427 ++getit_calls; 428 #endif 429 delta = tick - prev_tick; 430 prev_tick = tick; 431 if (delta < 0) 432 delta = 0; 433 ticks_left -= delta; 434 if (doswitch && ticks_left > 0) 435 lwkt_switch(); 436 cpu_pause(); 437 } 438 #ifdef DELAYDEBUG 439 if (state == 1) 440 kprintf(" %d calls to getit() at %d usec each\n", 441 getit_calls, (n + 5) / getit_calls); 442 #endif 443 } 444 445 /* 446 * DELAY() never switches. 447 */ 448 void 449 DELAY(int n) 450 { 451 DODELAY(n, 0); 452 } 453 454 /* 455 * Returns non-zero if the specified time period has elapsed. Call 456 * first with last_clock set to 0. 457 */ 458 int 459 CHECKTIMEOUT(TOTALDELAY *tdd) 460 { 461 sysclock_t delta; 462 int us; 463 464 if (tdd->started == 0) { 465 if (timer0_state == RELEASED && i8254_cputimer_disable == 0) 466 i8254_restore(); 467 tdd->last_clock = sys_cputimer->count(); 468 tdd->started = 1; 469 return(0); 470 } 471 delta = sys_cputimer->count() - tdd->last_clock; 472 us = muldivu64(delta, 1000000, sys_cputimer->freq); 473 tdd->last_clock += muldivu64(us, sys_cputimer->freq, 1000000); 474 tdd->us -= us; 475 476 return (tdd->us < 0); 477 } 478 479 480 /* 481 * DRIVERSLEEP() does not switch if called with a spinlock held or 482 * from a hard interrupt. 483 */ 484 void 485 DRIVERSLEEP(int usec) 486 { 487 globaldata_t gd = mycpu; 488 489 if (gd->gd_intr_nesting_level || gd->gd_spinlocks) { 490 DODELAY(usec, 0); 491 } else { 492 DODELAY(usec, 1); 493 } 494 } 495 496 static void 497 sysbeepstop(void *chan) 498 { 499 outb(IO_PPI, inb(IO_PPI)&0xFC); /* disable counter2 output to speaker */ 500 beeping = 0; 501 release_timer2(); 502 } 503 504 int 505 sysbeep(int pitch, int period) 506 { 507 if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT)) 508 return(-1); 509 if (sysbeep_enable == 0) 510 return(-1); 511 /* 512 * Nobody else is using timer2, we do not need the clock lock 513 */ 514 outb(TIMER_CNTR2, pitch); 515 outb(TIMER_CNTR2, (pitch>>8)); 516 if (!beeping) { 517 /* enable counter2 output to speaker */ 518 outb(IO_PPI, inb(IO_PPI) | 3); 519 beeping = period; 520 callout_reset(&sysbeepstop_ch, period, sysbeepstop, NULL); 521 } 522 return (0); 523 } 524 525 /* 526 * RTC support routines 527 */ 528 529 int 530 rtcin(int reg) 531 { 532 u_char val; 533 534 crit_enter(); 535 outb(IO_RTC, reg); 536 inb(0x84); 537 val = inb(IO_RTC + 1); 538 inb(0x84); 539 crit_exit(); 540 return (val); 541 } 542 543 static __inline void 544 writertc(u_char reg, u_char val) 545 { 546 crit_enter(); 547 inb(0x84); 548 outb(IO_RTC, reg); 549 inb(0x84); 550 outb(IO_RTC + 1, val); 551 inb(0x84); /* XXX work around wrong order in rtcin() */ 552 crit_exit(); 553 } 554 555 static __inline int 556 readrtc(int port) 557 { 558 return(bcd2bin(rtcin(port))); 559 } 560 561 static u_int 562 calibrate_clocks(void) 563 { 564 tsc_uclock_t old_tsc; 565 sysclock_t tot_count; 566 sysclock_t count, prev_count; 567 int sec, start_sec, timeout; 568 569 if (bootverbose) 570 kprintf("Calibrating clock(s) ...\n"); 571 if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) 572 goto fail; 573 timeout = 100000000; 574 575 /* Read the mc146818A seconds counter. */ 576 for (;;) { 577 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { 578 sec = rtcin(RTC_SEC); 579 break; 580 } 581 if (--timeout == 0) 582 goto fail; 583 } 584 585 /* Wait for the mC146818A seconds counter to change. */ 586 start_sec = sec; 587 for (;;) { 588 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { 589 sec = rtcin(RTC_SEC); 590 if (sec != start_sec) 591 break; 592 } 593 if (--timeout == 0) 594 goto fail; 595 } 596 597 /* Start keeping track of the i8254 counter. */ 598 prev_count = sys_cputimer->count(); 599 tot_count = 0; 600 601 if (tsc_present) 602 old_tsc = rdtsc(); 603 else 604 old_tsc = 0; /* shut up gcc */ 605 606 /* 607 * Wait for the mc146818A seconds counter to change. Read the i8254 608 * counter for each iteration since this is convenient and only 609 * costs a few usec of inaccuracy. The timing of the final reads 610 * of the counters almost matches the timing of the initial reads, 611 * so the main cause of inaccuracy is the varying latency from 612 * inside getit() or rtcin(RTC_STATUSA) to the beginning of the 613 * rtcin(RTC_SEC) that returns a changed seconds count. The 614 * maximum inaccuracy from this cause is < 10 usec on 486's. 615 */ 616 start_sec = sec; 617 for (;;) { 618 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) 619 sec = rtcin(RTC_SEC); 620 count = sys_cputimer->count(); 621 tot_count += (sysclock_t)(count - prev_count); 622 prev_count = count; 623 if (sec != start_sec) 624 break; 625 if (--timeout == 0) 626 goto fail; 627 } 628 629 /* 630 * Read the cpu cycle counter. The timing considerations are 631 * similar to those for the i8254 clock. 632 */ 633 if (tsc_present) { 634 tsc_frequency = rdtsc() - old_tsc; 635 if (bootverbose) { 636 kprintf("TSC clock: %jd Hz (Method A)\n", 637 (intmax_t)tsc_frequency); 638 } 639 } 640 tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000; 641 642 kprintf("i8254 clock: %lu Hz\n", tot_count); 643 return (tot_count); 644 645 fail: 646 kprintf("failed, using default i8254 clock of %lu Hz\n", 647 i8254_cputimer.freq); 648 return (i8254_cputimer.freq); 649 } 650 651 static void 652 i8254_restore(void) 653 { 654 timer0_state = ACQUIRED; 655 656 clock_lock(); 657 658 /* 659 * Timer0 is our fine-grained variable clock interrupt 660 */ 661 outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT); 662 outb(TIMER_CNTR0, 2); /* lsb */ 663 outb(TIMER_CNTR0, 0); /* msb */ 664 clock_unlock(); 665 666 if (!i8254_nointr) { 667 cputimer_intr_register(&i8254_cputimer_intr); 668 cputimer_intr_select(&i8254_cputimer_intr, 0); 669 } 670 671 /* 672 * Timer1 or timer2 is our free-running clock, but only if another 673 * has not been selected. 674 */ 675 cputimer_register(&i8254_cputimer); 676 cputimer_select(&i8254_cputimer, 0); 677 } 678 679 static void 680 i8254_cputimer_construct(struct cputimer *timer, sysclock_t oldclock) 681 { 682 int which; 683 684 /* 685 * Should we use timer 1 or timer 2 ? 686 */ 687 which = 0; 688 TUNABLE_INT_FETCH("hw.i8254.walltimer", &which); 689 if (which != 1 && which != 2) 690 which = 2; 691 692 switch(which) { 693 case 1: 694 timer->name = "i8254_timer1"; 695 timer->type = CPUTIMER_8254_SEL1; 696 i8254_walltimer_sel = TIMER_SEL1; 697 i8254_walltimer_cntr = TIMER_CNTR1; 698 timer1_state = ACQUIRED; 699 break; 700 case 2: 701 timer->name = "i8254_timer2"; 702 timer->type = CPUTIMER_8254_SEL2; 703 i8254_walltimer_sel = TIMER_SEL2; 704 i8254_walltimer_cntr = TIMER_CNTR2; 705 timer2_state = ACQUIRED; 706 break; 707 } 708 709 timer->base = (oldclock + 0xFFFF) & 0xFFFFFFFFFFFF0000LU; 710 711 clock_lock(); 712 outb(TIMER_MODE, i8254_walltimer_sel | TIMER_RATEGEN | TIMER_16BIT); 713 outb(i8254_walltimer_cntr, 0); /* lsb */ 714 outb(i8254_walltimer_cntr, 0); /* msb */ 715 outb(IO_PPI, inb(IO_PPI) | 1); /* bit 0: enable gate, bit 1: spkr */ 716 clock_unlock(); 717 } 718 719 static void 720 i8254_cputimer_destruct(struct cputimer *timer) 721 { 722 switch(timer->type) { 723 case CPUTIMER_8254_SEL1: 724 timer1_state = RELEASED; 725 break; 726 case CPUTIMER_8254_SEL2: 727 timer2_state = RELEASED; 728 break; 729 default: 730 break; 731 } 732 timer->type = 0; 733 } 734 735 static void 736 rtc_restore(void) 737 { 738 /* Restore all of the RTC's "status" (actually, control) registers. */ 739 writertc(RTC_STATUSB, RTCSB_24HR); 740 writertc(RTC_STATUSA, rtc_statusa); 741 writertc(RTC_STATUSB, rtc_statusb); 742 } 743 744 /* 745 * Restore all the timers. 746 * 747 * This function is called to resynchronize our core timekeeping after a 748 * long halt, e.g. from apm_default_resume() and friends. It is also 749 * called if after a BIOS call we have detected munging of the 8254. 750 * It is necessary because cputimer_count() counter's delta may have grown 751 * too large for nanouptime() and friends to handle, or (in the case of 8254 752 * munging) might cause the SYSTIMER code to prematurely trigger. 753 */ 754 void 755 timer_restore(void) 756 { 757 crit_enter(); 758 if (i8254_cputimer_disable == 0) 759 i8254_restore(); /* restore timer_freq and hz */ 760 rtc_restore(); /* reenable RTC interrupts */ 761 crit_exit(); 762 } 763 764 #define MAX_MEASURE_RETRIES 100 765 766 static u_int64_t 767 do_measure(u_int64_t timer_latency, u_int64_t *latency, sysclock_t *time, 768 int *retries) 769 { 770 u_int64_t tsc1, tsc2; 771 u_int64_t threshold; 772 sysclock_t val; 773 int cnt = 0; 774 775 do { 776 if (cnt > MAX_MEASURE_RETRIES/2) 777 threshold = timer_latency << 1; 778 else 779 threshold = timer_latency + (timer_latency >> 2); 780 781 cnt++; 782 tsc1 = rdtsc_ordered(); 783 val = sys_cputimer->count(); 784 tsc2 = rdtsc_ordered(); 785 } while (timer_latency > 0 && cnt < MAX_MEASURE_RETRIES && 786 tsc2 - tsc1 > threshold); 787 788 *retries = cnt - 1; 789 *latency = tsc2 - tsc1; 790 *time = val; 791 return tsc1; 792 } 793 794 static u_int64_t 795 do_calibrate_cputimer(u_int usecs, u_int64_t timer_latency) 796 { 797 if (calibrate_tsc_fast) { 798 u_int64_t old_tsc1, start_lat1, new_tsc1, end_lat1; 799 u_int64_t old_tsc2, start_lat2, new_tsc2, end_lat2; 800 u_int64_t freq1, freq2; 801 sysclock_t start1, end1, start2, end2; 802 int retries1, retries2, retries3, retries4; 803 804 DELAY(1000); 805 old_tsc1 = do_measure(timer_latency, &start_lat1, &start1, 806 &retries1); 807 DELAY(20000); 808 old_tsc2 = do_measure(timer_latency, &start_lat2, &start2, 809 &retries2); 810 DELAY(usecs); 811 new_tsc1 = do_measure(timer_latency, &end_lat1, &end1, 812 &retries3); 813 DELAY(20000); 814 new_tsc2 = do_measure(timer_latency, &end_lat2, &end2, 815 &retries4); 816 817 old_tsc1 += start_lat1; 818 old_tsc2 += start_lat2; 819 freq1 = (new_tsc1 - old_tsc1) + (start_lat1 + end_lat1) / 2; 820 freq2 = (new_tsc2 - old_tsc2) + (start_lat2 + end_lat2) / 2; 821 end1 -= start1; 822 end2 -= start2; 823 /* This should in practice be safe from overflows. */ 824 freq1 = muldivu64(freq1, sys_cputimer->freq, end1); 825 freq2 = muldivu64(freq2, sys_cputimer->freq, end2); 826 if (calibrate_test && (retries1 > 0 || retries2 > 0)) { 827 kprintf("%s: retries: %d, %d, %d, %d\n", 828 __func__, retries1, retries2, retries3, retries4); 829 } 830 if (calibrate_test) { 831 kprintf("%s: freq1=%ju freq2=%ju avg=%ju\n", 832 __func__, freq1, freq2, (freq1 + freq2) / 2); 833 } 834 return (freq1 + freq2) / 2; 835 } else { 836 u_int64_t old_tsc, new_tsc; 837 u_int64_t freq; 838 839 old_tsc = rdtsc_ordered(); 840 DELAY(usecs); 841 new_tsc = rdtsc(); 842 freq = new_tsc - old_tsc; 843 /* This should in practice be safe from overflows. */ 844 freq = (freq * 1000 * 1000) / usecs; 845 return freq; 846 } 847 } 848 849 /* 850 * Initialize 8254 timer 0 early so that it can be used in DELAY(). 851 */ 852 void 853 startrtclock(void) 854 { 855 const timecounter_init_t **list; 856 sysclock_t delta, freq; 857 858 callout_init_mp(&sysbeepstop_ch); 859 860 /* 861 * Can we use the TSC? 862 * 863 * NOTE: If running under qemu, probably a good idea to force the 864 * TSC because we are not likely to detect it as being 865 * invariant or mpsyncd if you don't. This will greatly 866 * reduce SMP contention. 867 */ 868 if (cpu_feature & CPUID_TSC) { 869 tsc_present = 1; 870 TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_invariant); 871 872 if ((cpu_vendor_id == CPU_VENDOR_INTEL || 873 cpu_vendor_id == CPU_VENDOR_AMD) && 874 cpu_exthigh >= 0x80000007) { 875 u_int regs[4]; 876 877 do_cpuid(0x80000007, regs); 878 if (regs[3] & 0x100) 879 tsc_invariant = 1; 880 } 881 } else { 882 tsc_present = 0; 883 } 884 885 /* 886 * Initial RTC state, don't do anything unexpected 887 */ 888 writertc(RTC_STATUSA, rtc_statusa); 889 writertc(RTC_STATUSB, RTCSB_24HR); 890 891 SET_FOREACH(list, timecounter_init_set) { 892 if ((*list)->configure != NULL) 893 (*list)->configure(); 894 } 895 896 /* 897 * If tsc_frequency is already initialized now, and a flag is set 898 * that i8254 timer is unneeded, we are done. 899 */ 900 if (tsc_frequency != 0 && i8254_cputimer_disable != 0) 901 goto done; 902 903 /* 904 * Set the 8254 timer0 in TIMER_SWSTROBE mode and cause it to 905 * generate an interrupt, which we will ignore for now. 906 * 907 * Set the 8254 timer1 in TIMER_RATEGEN mode and load 0x0000 908 * (so it counts a full 2^16 and repeats). We will use this timer 909 * for our counting. 910 */ 911 if (i8254_cputimer_disable == 0) 912 i8254_restore(); 913 914 kprintf("Using cputimer %s for TSC calibration\n", sys_cputimer->name); 915 916 /* 917 * When booting without verbose messages, it's pointless to run the 918 * calibrate_clocks() calibration code, when we don't use the 919 * results in any way. With bootverbose, we are at least printing 920 * this information to the kernel log. 921 */ 922 if (i8254_cputimer_disable != 0 || 923 (calibrate_timers_with_rtc == 0 && !bootverbose)) { 924 goto skip_rtc_based; 925 } 926 927 freq = calibrate_clocks(); 928 #ifdef CLK_CALIBRATION_LOOP 929 if (bootverbose) { 930 int c; 931 932 cnpoll(TRUE); 933 kprintf("Press a key on the console to " 934 "abort clock calibration\n"); 935 while ((c = cncheckc()) == -1 || c == NOKEY) 936 calibrate_clocks(); 937 cnpoll(FALSE); 938 } 939 #endif 940 941 /* 942 * Use the calibrated i8254 frequency if it seems reasonable. 943 * Otherwise use the default, and don't use the calibrated i586 944 * frequency. 945 */ 946 delta = freq > i8254_cputimer.freq ? 947 freq - i8254_cputimer.freq : i8254_cputimer.freq - freq; 948 if (delta < i8254_cputimer.freq / 100) { 949 if (calibrate_timers_with_rtc == 0) { 950 kprintf( 951 "hw.calibrate_timers_with_rtc not set - using default i8254 frequency\n"); 952 freq = i8254_cputimer.freq; 953 } 954 /* 955 * NOTE: 956 * Interrupt timer's freq must be adjusted 957 * before we change the cuptimer's frequency. 958 */ 959 i8254_cputimer_intr.freq = freq; 960 cputimer_set_frequency(&i8254_cputimer, freq); 961 } else { 962 if (bootverbose) 963 kprintf("%lu Hz differs from default of %lu Hz " 964 "by more than 1%%\n", 965 freq, i8254_cputimer.freq); 966 tsc_frequency = 0; 967 } 968 969 if (tsc_frequency != 0 && calibrate_timers_with_rtc == 0) { 970 kprintf("hw.calibrate_timers_with_rtc not " 971 "set - using old calibration method\n"); 972 tsc_frequency = 0; 973 } 974 975 skip_rtc_based: 976 if (tsc_present && tsc_frequency == 0) { 977 u_int cnt; 978 u_int64_t cputime_latency_tsc = 0, max = 0, min = 0; 979 int i; 980 981 for (i = 0; i < 10; i++) { 982 /* Warm up */ 983 (void)sys_cputimer->count(); 984 } 985 for (i = 0; i < 100; i++) { 986 u_int64_t old_tsc, new_tsc; 987 988 old_tsc = rdtsc_ordered(); 989 (void)sys_cputimer->count(); 990 new_tsc = rdtsc_ordered(); 991 cputime_latency_tsc += (new_tsc - old_tsc); 992 if (max < (new_tsc - old_tsc)) 993 max = new_tsc - old_tsc; 994 if (min == 0 || min > (new_tsc - old_tsc)) 995 min = new_tsc - old_tsc; 996 } 997 cputime_latency_tsc /= 100; 998 kprintf( 999 "Timer latency (in TSC ticks): %lu min=%lu max=%lu\n", 1000 cputime_latency_tsc, min, max); 1001 /* XXX Instead of this, properly filter out outliers. */ 1002 cputime_latency_tsc = min; 1003 1004 if (calibrate_test > 0) { 1005 u_int64_t values[20], avg = 0; 1006 for (i = 1; i <= 20; i++) { 1007 u_int64_t freq; 1008 1009 freq = do_calibrate_cputimer(i * 100 * 1000, 1010 cputime_latency_tsc); 1011 values[i - 1] = freq; 1012 } 1013 /* Compute an average TSC for the 1s to 2s delays. */ 1014 for (i = 10; i < 20; i++) 1015 avg += values[i]; 1016 avg /= 10; 1017 for (i = 0; i < 20; i++) { 1018 kprintf("%ums: %lu (Diff from average: %ld)\n", 1019 (i + 1) * 100, values[i], 1020 (int64_t)(values[i] - avg)); 1021 } 1022 } 1023 1024 if (calibrate_tsc_fast > 0) { 1025 /* HPET would typically be >10MHz */ 1026 if (sys_cputimer->freq >= 10000000) 1027 cnt = 200000; 1028 else 1029 cnt = 500000; 1030 } else { 1031 cnt = 1000000; 1032 } 1033 1034 tsc_frequency = do_calibrate_cputimer(cnt, cputime_latency_tsc); 1035 if (bootverbose && calibrate_timers_with_rtc) { 1036 kprintf("TSC clock: %jd Hz (Method B)\n", 1037 (intmax_t)tsc_frequency); 1038 } 1039 } 1040 1041 done: 1042 if (tsc_present) { 1043 kprintf("TSC%s clock: %jd Hz\n", 1044 tsc_invariant ? " invariant" : "", 1045 (intmax_t)tsc_frequency); 1046 } 1047 tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000; 1048 1049 EVENTHANDLER_REGISTER(shutdown_post_sync, resettodr_on_shutdown, 1050 NULL, SHUTDOWN_PRI_LAST); 1051 } 1052 1053 /* 1054 * Sync the time of day back to the RTC on shutdown, but only if 1055 * we have already loaded it and have not crashed. 1056 */ 1057 static void 1058 resettodr_on_shutdown(void *arg __unused) 1059 { 1060 if (rtc_loaded && panicstr == NULL) { 1061 resettodr(); 1062 } 1063 } 1064 1065 /* 1066 * Initialize the time of day register, based on the time base which is, e.g. 1067 * from a filesystem. 1068 */ 1069 void 1070 inittodr(time_t base) 1071 { 1072 unsigned long sec, days; 1073 int year, month; 1074 int y, m; 1075 struct timespec ts; 1076 1077 if (base) { 1078 ts.tv_sec = base; 1079 ts.tv_nsec = 0; 1080 set_timeofday(&ts); 1081 } 1082 1083 /* Look if we have a RTC present and the time is valid */ 1084 if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) 1085 goto wrong_time; 1086 1087 /* wait for time update to complete */ 1088 /* If RTCSA_TUP is zero, we have at least 244us before next update */ 1089 crit_enter(); 1090 while (rtcin(RTC_STATUSA) & RTCSA_TUP) { 1091 crit_exit(); 1092 crit_enter(); 1093 } 1094 1095 days = 0; 1096 #ifdef USE_RTC_CENTURY 1097 year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100; 1098 #else 1099 year = readrtc(RTC_YEAR) + 1900; 1100 if (year < 1970) 1101 year += 100; 1102 #endif 1103 if (year < 1970) { 1104 crit_exit(); 1105 goto wrong_time; 1106 } 1107 month = readrtc(RTC_MONTH); 1108 for (m = 1; m < month; m++) 1109 days += daysinmonth[m-1]; 1110 if ((month > 2) && LEAPYEAR(year)) 1111 days ++; 1112 days += readrtc(RTC_DAY) - 1; 1113 for (y = 1970; y < year; y++) 1114 days += DAYSPERYEAR + LEAPYEAR(y); 1115 sec = ((( days * 24 + 1116 readrtc(RTC_HRS)) * 60 + 1117 readrtc(RTC_MIN)) * 60 + 1118 readrtc(RTC_SEC)); 1119 /* sec now contains the number of seconds, since Jan 1 1970, 1120 in the local time zone */ 1121 1122 sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); 1123 1124 y = (int)(time_second - sec); 1125 if (y <= -2 || y >= 2) { 1126 /* badly off, adjust it */ 1127 ts.tv_sec = sec; 1128 ts.tv_nsec = 0; 1129 set_timeofday(&ts); 1130 } 1131 rtc_loaded = 1; 1132 crit_exit(); 1133 return; 1134 1135 wrong_time: 1136 kprintf("Invalid time in real time clock.\n"); 1137 kprintf("Check and reset the date immediately!\n"); 1138 } 1139 1140 /* 1141 * Write system time back to RTC 1142 */ 1143 void 1144 resettodr(void) 1145 { 1146 struct timeval tv; 1147 unsigned long tm; 1148 int m; 1149 int y; 1150 1151 if (disable_rtc_set) 1152 return; 1153 1154 microtime(&tv); 1155 tm = tv.tv_sec; 1156 1157 crit_enter(); 1158 /* Disable RTC updates and interrupts. */ 1159 writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR); 1160 1161 /* Calculate local time to put in RTC */ 1162 1163 tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); 1164 1165 writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */ 1166 writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */ 1167 writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24; /* Write back Hours */ 1168 1169 /* We have now the days since 01-01-1970 in tm */ 1170 writertc(RTC_WDAY, (tm+4)%7); /* Write back Weekday */ 1171 for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y); 1172 tm >= m; 1173 y++, m = DAYSPERYEAR + LEAPYEAR(y)) 1174 tm -= m; 1175 1176 /* Now we have the years in y and the day-of-the-year in tm */ 1177 writertc(RTC_YEAR, bin2bcd(y%100)); /* Write back Year */ 1178 #ifdef USE_RTC_CENTURY 1179 writertc(RTC_CENTURY, bin2bcd(y/100)); /* ... and Century */ 1180 #endif 1181 for (m = 0; ; m++) { 1182 int ml; 1183 1184 ml = daysinmonth[m]; 1185 if (m == 1 && LEAPYEAR(y)) 1186 ml++; 1187 if (tm < ml) 1188 break; 1189 tm -= ml; 1190 } 1191 1192 writertc(RTC_MONTH, bin2bcd(m + 1)); /* Write back Month */ 1193 writertc(RTC_DAY, bin2bcd(tm + 1)); /* Write back Month Day */ 1194 1195 /* Reenable RTC updates and interrupts. */ 1196 writertc(RTC_STATUSB, rtc_statusb); 1197 crit_exit(); 1198 } 1199 1200 static int 1201 i8254_ioapic_trial(int irq, struct cputimer_intr *cti) 1202 { 1203 sysclock_t base; 1204 long lastcnt; 1205 1206 /* 1207 * Following code assumes the 8254 is the cpu timer, 1208 * so make sure it is. 1209 */ 1210 /*KKASSERT(sys_cputimer == &i8254_cputimer); (tested by CuteLarva) */ 1211 KKASSERT(cti == &i8254_cputimer_intr); 1212 1213 lastcnt = get_interrupt_counter(irq, mycpuid); 1214 1215 /* 1216 * Force an 8254 Timer0 interrupt and wait 1/100s for 1217 * it to happen, then see if we got it. 1218 */ 1219 kprintf("IOAPIC: testing 8254 interrupt delivery..."); 1220 1221 i8254_intr_reload(cti, sys_cputimer->fromus(2)); 1222 base = sys_cputimer->count(); 1223 while (sys_cputimer->count() - base < sys_cputimer->freq / 100) 1224 ; /* nothing */ 1225 1226 if (get_interrupt_counter(irq, mycpuid) - lastcnt == 0) { 1227 kprintf(" failed\n"); 1228 return ENOENT; 1229 } else { 1230 kprintf(" success\n"); 1231 } 1232 return 0; 1233 } 1234 1235 /* 1236 * Start both clocks running. DragonFly note: the stat clock is no longer 1237 * used. Instead, 8254 based systimers are used for all major clock 1238 * interrupts. 1239 */ 1240 static void 1241 i8254_intr_initclock(struct cputimer_intr *cti, boolean_t selected) 1242 { 1243 void *clkdesc = NULL; 1244 int irq = 0, mixed_mode = 0, error; 1245 1246 KKASSERT(mycpuid == 0); 1247 1248 if (!selected && i8254_intr_disable) 1249 goto nointr; 1250 1251 /* 1252 * The stat interrupt mask is different without the 1253 * statistics clock. Also, don't set the interrupt 1254 * flag which would normally cause the RTC to generate 1255 * interrupts. 1256 */ 1257 rtc_statusb = RTCSB_24HR; 1258 1259 /* Finish initializing 8254 timer 0. */ 1260 if (ioapic_enable) { 1261 irq = machintr_legacy_intr_find(0, INTR_TRIGGER_EDGE, 1262 INTR_POLARITY_HIGH); 1263 if (irq < 0) { 1264 mixed_mode_setup: 1265 error = ioapic_conf_legacy_extint(0); 1266 if (!error) { 1267 irq = machintr_legacy_intr_find(0, 1268 INTR_TRIGGER_EDGE, INTR_POLARITY_HIGH); 1269 if (irq < 0) 1270 error = ENOENT; 1271 } 1272 1273 if (error) { 1274 if (!selected) { 1275 kprintf("IOAPIC: setup mixed mode for " 1276 "irq 0 failed: %d\n", error); 1277 goto nointr; 1278 } else { 1279 panic("IOAPIC: setup mixed mode for " 1280 "irq 0 failed: %d\n", error); 1281 } 1282 } 1283 mixed_mode = 1; 1284 } 1285 clkdesc = register_int(irq, clkintr, NULL, "clk", 1286 NULL, 1287 INTR_EXCL | INTR_CLOCK | 1288 INTR_NOPOLL | INTR_MPSAFE | 1289 INTR_NOENTROPY, 0); 1290 } else { 1291 register_int(0, clkintr, NULL, "clk", NULL, 1292 INTR_EXCL | INTR_CLOCK | 1293 INTR_NOPOLL | INTR_MPSAFE | 1294 INTR_NOENTROPY, 0); 1295 } 1296 1297 /* Initialize RTC. */ 1298 writertc(RTC_STATUSA, rtc_statusa); 1299 writertc(RTC_STATUSB, RTCSB_24HR); 1300 1301 if (ioapic_enable) { 1302 error = i8254_ioapic_trial(irq, cti); 1303 if (error) { 1304 if (mixed_mode) { 1305 if (!selected) { 1306 kprintf("IOAPIC: mixed mode for irq %d " 1307 "trial failed: %d\n", 1308 irq, error); 1309 goto nointr; 1310 } else { 1311 panic("IOAPIC: mixed mode for irq %d " 1312 "trial failed: %d\n", irq, error); 1313 } 1314 } else { 1315 kprintf("IOAPIC: warning 8254 is not connected " 1316 "to the correct pin, try mixed mode\n"); 1317 unregister_int(clkdesc, 0); 1318 goto mixed_mode_setup; 1319 } 1320 } 1321 } 1322 return; 1323 1324 nointr: 1325 i8254_nointr = 1; /* don't try to register again */ 1326 cputimer_intr_deregister(cti); 1327 } 1328 1329 void 1330 setstatclockrate(int newhz) 1331 { 1332 if (newhz == RTC_PROFRATE) 1333 rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF; 1334 else 1335 rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; 1336 writertc(RTC_STATUSA, rtc_statusa); 1337 } 1338 1339 #if 0 1340 static unsigned 1341 tsc_get_timecount(struct timecounter *tc) 1342 { 1343 return (rdtsc()); 1344 } 1345 #endif 1346 1347 #ifdef KERN_TIMESTAMP 1348 #define KERN_TIMESTAMP_SIZE 16384 1349 static u_long tsc[KERN_TIMESTAMP_SIZE] ; 1350 SYSCTL_OPAQUE(_debug, OID_AUTO, timestamp, CTLFLAG_RD, tsc, 1351 sizeof(tsc), "LU", "Kernel timestamps"); 1352 void 1353 _TSTMP(u_int32_t x) 1354 { 1355 static int i; 1356 1357 tsc[i] = (u_int32_t)rdtsc(); 1358 tsc[i+1] = x; 1359 i = i + 2; 1360 if (i >= KERN_TIMESTAMP_SIZE) 1361 i = 0; 1362 tsc[i] = 0; /* mark last entry */ 1363 } 1364 #endif /* KERN_TIMESTAMP */ 1365 1366 /* 1367 * 1368 */ 1369 1370 static int 1371 hw_i8254_timestamp(SYSCTL_HANDLER_ARGS) 1372 { 1373 sysclock_t count; 1374 uint64_t tscval; 1375 char buf[32]; 1376 1377 crit_enter(); 1378 if (sys_cputimer == &i8254_cputimer) 1379 count = sys_cputimer->count(); 1380 else 1381 count = 0; 1382 if (tsc_present) 1383 tscval = rdtsc(); 1384 else 1385 tscval = 0; 1386 crit_exit(); 1387 ksnprintf(buf, sizeof(buf), "%016lx %016lx", count, tscval); 1388 return(SYSCTL_OUT(req, buf, strlen(buf) + 1)); 1389 } 1390 1391 struct tsc_mpsync_info { 1392 volatile int tsc_ready_cnt; 1393 volatile int tsc_done_cnt; 1394 volatile int tsc_command; 1395 volatile int unused01[5]; 1396 struct { 1397 uint64_t v; 1398 uint64_t unused02; 1399 } tsc_saved[MAXCPU]; 1400 } __cachealign; 1401 1402 #if 0 1403 static void 1404 tsc_mpsync_test_loop(struct tsc_mpsync_thr *info) 1405 { 1406 struct globaldata *gd = mycpu; 1407 tsc_uclock_t test_end, test_begin; 1408 u_int i; 1409 1410 if (bootverbose) { 1411 kprintf("cpu%d: TSC testing MP synchronization ...\n", 1412 gd->gd_cpuid); 1413 } 1414 1415 test_begin = rdtsc_ordered(); 1416 /* Run test for 100ms */ 1417 test_end = test_begin + (tsc_frequency / 10); 1418 1419 arg->tsc_mpsync = 1; 1420 arg->tsc_target = test_begin; 1421 1422 #define TSC_TEST_TRYMAX 1000000 /* Make sure we could stop */ 1423 #define TSC_TEST_TRYMIN 50000 1424 1425 for (i = 0; i < TSC_TEST_TRYMAX; ++i) { 1426 struct lwkt_cpusync cs; 1427 1428 crit_enter(); 1429 lwkt_cpusync_init(&cs, gd->gd_other_cpus, 1430 tsc_mpsync_test_remote, arg); 1431 lwkt_cpusync_interlock(&cs); 1432 cpu_pause(); 1433 arg->tsc_target = rdtsc_ordered(); 1434 cpu_mfence(); 1435 lwkt_cpusync_deinterlock(&cs); 1436 crit_exit(); 1437 cpu_pause(); 1438 1439 if (!arg->tsc_mpsync) { 1440 kprintf("cpu%d: TSC is not MP synchronized @%u\n", 1441 gd->gd_cpuid, i); 1442 break; 1443 } 1444 if (arg->tsc_target > test_end && i >= TSC_TEST_TRYMIN) 1445 break; 1446 } 1447 1448 #undef TSC_TEST_TRYMIN 1449 #undef TSC_TEST_TRYMAX 1450 1451 if (arg->tsc_target == test_begin) { 1452 kprintf("cpu%d: TSC does not tick?!\n", gd->gd_cpuid); 1453 /* XXX disable TSC? */ 1454 tsc_invariant = 0; 1455 arg->tsc_mpsync = 0; 1456 return; 1457 } 1458 1459 if (arg->tsc_mpsync && bootverbose) { 1460 kprintf("cpu%d: TSC is MP synchronized after %u tries\n", 1461 gd->gd_cpuid, i); 1462 } 1463 } 1464 1465 #endif 1466 1467 #define TSC_TEST_COUNT 50000 1468 1469 static void 1470 tsc_mpsync_ap_thread(void *xinfo) 1471 { 1472 struct tsc_mpsync_info *info = xinfo; 1473 int cpu = mycpuid; 1474 int i; 1475 1476 /* 1477 * Tell main loop that we are ready and wait for initiation 1478 */ 1479 atomic_add_int(&info->tsc_ready_cnt, 1); 1480 while (info->tsc_command == 0) { 1481 lwkt_force_switch(); 1482 } 1483 1484 /* 1485 * Run test for 10000 loops or until tsc_done_cnt != 0 (another 1486 * cpu has finished its test), then increment done. 1487 */ 1488 crit_enter(); 1489 for (i = 0; i < TSC_TEST_COUNT && info->tsc_done_cnt == 0; ++i) { 1490 info->tsc_saved[cpu].v = rdtsc_ordered(); 1491 } 1492 crit_exit(); 1493 atomic_add_int(&info->tsc_done_cnt, 1); 1494 1495 lwkt_exit(); 1496 } 1497 1498 static void 1499 tsc_mpsync_test(void) 1500 { 1501 enum { TSCOK, TSCNEG, TSCSPAN } error = TSCOK; 1502 int cpu; 1503 int try; 1504 1505 if (!tsc_invariant) { 1506 /* Not even invariant TSC */ 1507 kprintf("TSC is not invariant, " 1508 "no further tests will be performed\n"); 1509 return; 1510 } 1511 1512 if (ncpus == 1) { 1513 /* Only one CPU */ 1514 tsc_mpsync = 1; 1515 return; 1516 } 1517 1518 /* 1519 * Forcing can be used w/qemu to reduce contention 1520 */ 1521 TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_mpsync); 1522 1523 if (tsc_mpsync == 0) { 1524 switch (cpu_vendor_id) { 1525 case CPU_VENDOR_INTEL: 1526 /* 1527 * Intel probably works 1528 */ 1529 break; 1530 1531 case CPU_VENDOR_AMD: 1532 /* 1533 * For AMD 15h and 16h (i.e. The Bulldozer and Jaguar 1534 * architectures) we have to watch out for 1535 * Erratum 778: 1536 * "Processor Core Time Stamp Counters May 1537 * Experience Drift" 1538 * This Erratum is only listed for cpus in Family 1539 * 15h < Model 30h and for 16h < Model 30h. 1540 * 1541 * AMD < Bulldozer probably doesn't work 1542 */ 1543 if (CPUID_TO_FAMILY(cpu_id) == 0x15 || 1544 CPUID_TO_FAMILY(cpu_id) == 0x16) { 1545 if (CPUID_TO_MODEL(cpu_id) < 0x30) 1546 return; 1547 } else if (CPUID_TO_FAMILY(cpu_id) < 0x17) { 1548 return; 1549 } 1550 break; 1551 1552 default: 1553 /* probably won't work */ 1554 return; 1555 } 1556 } else if (tsc_mpsync < 0) { 1557 kprintf("TSC MP synchronization test is disabled\n"); 1558 tsc_mpsync = 0; 1559 return; 1560 } 1561 1562 /* 1563 * Test even if forced to 1 above. If forced, we will use the TSC 1564 * even if the test fails. (set forced to -1 to disable entirely). 1565 */ 1566 kprintf("TSC testing MP synchronization ...\n"); 1567 kprintf("TSC testing MP: NOTE! CPU pwrsave will inflate latencies!\n"); 1568 1569 /* 1570 * Test that the TSC is monotonically increasing across CPU 1571 * switches. Otherwise time will get really messed up if the 1572 * TSC is selected as the timebase. 1573 * 1574 * Test 4 times 1575 */ 1576 for (try = 0; tsc_frequency && try < 4; ++try) { 1577 tsc_uclock_t last; 1578 tsc_uclock_t next; 1579 tsc_sclock_t delta; 1580 tsc_sclock_t lo_delta = 0x7FFFFFFFFFFFFFFFLL; 1581 tsc_sclock_t hi_delta = -0x7FFFFFFFFFFFFFFFLL; 1582 1583 last = rdtsc(); 1584 for (cpu = 0; cpu < ncpus; ++cpu) { 1585 lwkt_migratecpu(cpu); 1586 next = rdtsc(); 1587 if (cpu == 0) { 1588 last = next; 1589 continue; 1590 } 1591 1592 delta = next - last; 1593 if (delta < 0) { 1594 kprintf("TSC cpu-delta NEGATIVE: " 1595 "cpu %d to %d (%ld)\n", 1596 cpu - 1, cpu, delta); 1597 error = TSCNEG; 1598 } 1599 if (lo_delta > delta) 1600 lo_delta = delta; 1601 if (hi_delta < delta) 1602 hi_delta = delta; 1603 last = next; 1604 } 1605 last = rdtsc(); 1606 for (cpu = ncpus - 2; cpu >= 0; --cpu) { 1607 lwkt_migratecpu(cpu); 1608 next = rdtsc(); 1609 delta = next - last; 1610 if (delta <= 0) { 1611 kprintf("TSC cpu-delta WAS NEGATIVE! " 1612 "cpu %d to %d (%ld)\n", 1613 cpu + 1, cpu, delta); 1614 error = TSCNEG; 1615 } 1616 if (lo_delta > delta) 1617 lo_delta = delta; 1618 if (hi_delta < delta) 1619 hi_delta = delta; 1620 last = next; 1621 } 1622 kprintf("TSC cpu-delta test complete, %ldnS to %ldnS ", 1623 muldivu64(lo_delta, 1000000000, tsc_frequency), 1624 muldivu64(hi_delta, 1000000000, tsc_frequency)); 1625 if (error != TSCOK) { 1626 kprintf("FAILURE\n"); 1627 break; 1628 } 1629 kprintf("SUCCESS\n"); 1630 } 1631 1632 /* 1633 * Test TSC MP synchronization on APs. 1634 * 1635 * Test 4 times. 1636 */ 1637 for (try = 0; tsc_frequency && try < 4; ++try) { 1638 struct tsc_mpsync_info info; 1639 uint64_t last; 1640 int64_t xworst; 1641 int64_t xdelta; 1642 int64_t delta; 1643 1644 bzero(&info, sizeof(info)); 1645 1646 for (cpu = 0; cpu < ncpus; ++cpu) { 1647 thread_t td; 1648 lwkt_create(tsc_mpsync_ap_thread, &info, &td, 1649 NULL, TDF_NOSTART, cpu, 1650 "tsc mpsync %d", cpu); 1651 lwkt_setpri_initial(td, curthread->td_pri); 1652 lwkt_schedule(td); 1653 } 1654 while (info.tsc_ready_cnt != ncpus) 1655 lwkt_force_switch(); 1656 1657 /* 1658 * All threads are ready, start the test and wait for 1659 * completion. 1660 */ 1661 info.tsc_command = 1; 1662 while (info.tsc_done_cnt != ncpus) 1663 lwkt_force_switch(); 1664 1665 /* 1666 * Process results 1667 */ 1668 last = info.tsc_saved[0].v; 1669 delta = 0; 1670 xworst = 0; 1671 for (cpu = 0; cpu < ncpus; ++cpu) { 1672 xdelta = (int64_t)(info.tsc_saved[cpu].v - last); 1673 last = info.tsc_saved[cpu].v; 1674 if (xdelta < 0) 1675 xdelta = -xdelta; 1676 if (xworst < xdelta) 1677 xworst = xdelta; 1678 delta += xdelta; 1679 1680 } 1681 1682 /* 1683 * Result from attempt. Break-out if we succeeds, otherwise 1684 * try again (up to 4 times). This might be in a VM so we 1685 * need to be robust. 1686 */ 1687 kprintf("TSC cpu concurrency test complete, worst=%ldns, " 1688 "avg=%ldns ", 1689 muldivu64(xworst, 1000000000, tsc_frequency), 1690 muldivu64(delta / ncpus, 1000000000, tsc_frequency)); 1691 if (delta / ncpus > tsc_frequency / 100) { 1692 kprintf("FAILURE\n"); 1693 } 1694 if (delta / ncpus < tsc_frequency / 100000) { 1695 kprintf("SUCCESS\n"); 1696 if (error == TSCOK) 1697 tsc_mpsync = 1; 1698 break; 1699 } 1700 kprintf("INDETERMINATE\n"); 1701 } 1702 1703 if (tsc_mpsync) 1704 kprintf("TSC is MP synchronized\n"); 1705 else 1706 kprintf("TSC is not MP synchronized\n"); 1707 } 1708 SYSINIT(tsc_mpsync, SI_BOOT2_FINISH_SMP, SI_ORDER_ANY, tsc_mpsync_test, NULL); 1709 1710 static SYSCTL_NODE(_hw, OID_AUTO, i8254, CTLFLAG_RW, 0, "I8254"); 1711 SYSCTL_UINT(_hw_i8254, OID_AUTO, freq, CTLFLAG_RD, &i8254_cputimer.freq, 0, 1712 "frequency"); 1713 SYSCTL_PROC(_hw_i8254, OID_AUTO, timestamp, CTLTYPE_STRING|CTLFLAG_RD, 1714 0, 0, hw_i8254_timestamp, "A", ""); 1715 1716 SYSCTL_INT(_hw, OID_AUTO, tsc_present, CTLFLAG_RD, 1717 &tsc_present, 0, "TSC Available"); 1718 SYSCTL_INT(_hw, OID_AUTO, tsc_invariant, CTLFLAG_RD, 1719 &tsc_invariant, 0, "Invariant TSC"); 1720 SYSCTL_INT(_hw, OID_AUTO, tsc_mpsync, CTLFLAG_RD, 1721 &tsc_mpsync, 0, "TSC is synchronized across CPUs"); 1722 SYSCTL_QUAD(_hw, OID_AUTO, tsc_frequency, CTLFLAG_RD, 1723 &tsc_frequency, 0, "TSC Frequency"); 1724