1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * Copyright (c) 2008 The DragonFly Project. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * William Jolitz and Don Ahn. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * from: @(#)clock.c 7.2 (Berkeley) 5/12/91 34 * $FreeBSD: src/sys/i386/isa/clock.c,v 1.149.2.6 2002/11/02 04:41:50 iwasaki Exp $ 35 */ 36 37 /* 38 * Routines to handle clock hardware. 39 */ 40 41 /* 42 * inittodr, settodr and support routines written 43 * by Christoph Robitschko <chmr@edvz.tu-graz.ac.at> 44 * 45 * reintroduced and updated by Chris Stenton <chris@gnome.co.uk> 8/10/94 46 */ 47 48 #if 0 49 #include "opt_clock.h" 50 #endif 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/eventhandler.h> 55 #include <sys/time.h> 56 #include <sys/kernel.h> 57 #include <sys/bus.h> 58 #include <sys/sysctl.h> 59 #include <sys/cons.h> 60 #include <sys/kbio.h> 61 #include <sys/systimer.h> 62 #include <sys/globaldata.h> 63 #include <sys/machintr.h> 64 #include <sys/interrupt.h> 65 66 #include <sys/thread2.h> 67 68 #include <machine/clock.h> 69 #include <machine/cputypes.h> 70 #include <machine/frame.h> 71 #include <machine/ipl.h> 72 #include <machine/limits.h> 73 #include <machine/md_var.h> 74 #include <machine/psl.h> 75 #include <machine/segments.h> 76 #include <machine/smp.h> 77 #include <machine/specialreg.h> 78 #include <machine/intr_machdep.h> 79 80 #include <machine_base/apic/ioapic.h> 81 #include <machine_base/apic/ioapic_abi.h> 82 #include <machine_base/icu/icu.h> 83 #include <bus/isa/isa.h> 84 #include <bus/isa/rtc.h> 85 #include <machine_base/isa/timerreg.h> 86 87 SET_DECLARE(timecounter_init_set, const timecounter_init_t); 88 TIMECOUNTER_INIT(placeholder, NULL); 89 90 static void i8254_restore(void); 91 static void resettodr_on_shutdown(void *arg __unused); 92 93 /* 94 * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we 95 * can use a simple formula for leap years. 96 */ 97 #define LEAPYEAR(y) ((u_int)(y) % 4 == 0) 98 #define DAYSPERYEAR (31+28+31+30+31+30+31+31+30+31+30+31) 99 100 #ifndef TIMER_FREQ 101 #define TIMER_FREQ 1193182 102 #endif 103 104 static uint8_t i8254_walltimer_sel; 105 static uint16_t i8254_walltimer_cntr; 106 107 int adjkerntz; /* local offset from GMT in seconds */ 108 int disable_rtc_set; /* disable resettodr() if != 0 */ 109 int tsc_present; 110 int tsc_invariant; 111 int tsc_mpsync; 112 int tsc_is_broken; 113 int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */ 114 int timer0_running; 115 tsc_uclock_t tsc_frequency; 116 tsc_uclock_t tsc_oneus_approx; /* always at least 1, approx only */ 117 118 enum tstate { RELEASED, ACQUIRED }; 119 enum tstate timer0_state; 120 enum tstate timer1_state; 121 enum tstate timer2_state; 122 123 int i8254_cputimer_disable; /* No need to initialize i8254 cputimer. */ 124 125 static int beeping = 0; 126 static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; 127 static u_char rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; 128 static u_char rtc_statusb = RTCSB_24HR | RTCSB_PINTR; 129 static int rtc_loaded; 130 131 static int i8254_cputimer_div; 132 133 static int i8254_nointr; 134 static int i8254_intr_disable = 1; 135 TUNABLE_INT("hw.i8254.intr_disable", &i8254_intr_disable); 136 137 static int calibrate_timers_with_rtc = 0; 138 TUNABLE_INT("hw.calibrate_timers_with_rtc", &calibrate_timers_with_rtc); 139 140 static int calibrate_tsc_fast = 1; 141 TUNABLE_INT("hw.calibrate_tsc_fast", &calibrate_tsc_fast); 142 143 static int calibrate_test; 144 TUNABLE_INT("hw.tsc_calibrate_test", &calibrate_test); 145 146 static struct callout sysbeepstop_ch; 147 148 static sysclock_t i8254_cputimer_count(void); 149 static void i8254_cputimer_construct(struct cputimer *cputimer, sysclock_t last); 150 static void i8254_cputimer_destruct(struct cputimer *cputimer); 151 152 static struct cputimer i8254_cputimer = { 153 .next = SLIST_ENTRY_INITIALIZER, 154 .name = "i8254", 155 .pri = CPUTIMER_PRI_8254, 156 .type = 0, /* determined later */ 157 .count = i8254_cputimer_count, 158 .fromhz = cputimer_default_fromhz, 159 .fromus = cputimer_default_fromus, 160 .construct = i8254_cputimer_construct, 161 .destruct = i8254_cputimer_destruct, 162 .freq = TIMER_FREQ 163 }; 164 165 static sysclock_t tsc_cputimer_count_mfence(void); 166 static sysclock_t tsc_cputimer_count_lfence(void); 167 static void tsc_cputimer_construct(struct cputimer *, sysclock_t); 168 169 static struct cputimer tsc_cputimer = { 170 .next = SLIST_ENTRY_INITIALIZER, 171 .name = "TSC", 172 .pri = CPUTIMER_PRI_TSC, 173 .type = CPUTIMER_TSC, 174 .count = NULL, /* determined later */ 175 .fromhz = cputimer_default_fromhz, 176 .fromus = cputimer_default_fromus, 177 .construct = tsc_cputimer_construct, 178 .destruct = cputimer_default_destruct, 179 .freq = 0 /* determined later */ 180 }; 181 182 static struct cpucounter tsc_cpucounter = { 183 .freq = 0, /* determined later */ 184 .count = NULL, /* determined later */ 185 .flags = 0, /* adjusted later */ 186 .prio = CPUCOUNTER_PRIO_TSC, 187 .type = CPUCOUNTER_TSC 188 }; 189 190 static void i8254_intr_reload(struct cputimer_intr *, sysclock_t); 191 static void i8254_intr_config(struct cputimer_intr *, const struct cputimer *); 192 static void i8254_intr_initclock(struct cputimer_intr *, boolean_t); 193 194 static struct cputimer_intr i8254_cputimer_intr = { 195 .freq = TIMER_FREQ, 196 .reload = i8254_intr_reload, 197 .enable = cputimer_intr_default_enable, 198 .config = i8254_intr_config, 199 .restart = cputimer_intr_default_restart, 200 .pmfixup = cputimer_intr_default_pmfixup, 201 .initclock = i8254_intr_initclock, 202 .pcpuhand = NULL, 203 .next = SLIST_ENTRY_INITIALIZER, 204 .name = "i8254", 205 .type = CPUTIMER_INTR_8254, 206 .prio = CPUTIMER_INTR_PRIO_8254, 207 .caps = CPUTIMER_INTR_CAP_PS, 208 .priv = NULL 209 }; 210 211 /* 212 * timer0 clock interrupt. Timer0 is in one-shot mode and has stopped 213 * counting as of this interrupt. We use timer1 in free-running mode (not 214 * generating any interrupts) as our main counter. Each cpu has timeouts 215 * pending. 216 * 217 * This code is INTR_MPSAFE and may be called without the BGL held. 218 */ 219 static void 220 clkintr(void *dummy, void *frame_arg) 221 { 222 static sysclock_t sysclock_count; /* NOTE! Must be static */ 223 struct globaldata *gd = mycpu; 224 struct globaldata *gscan; 225 int n; 226 227 /* 228 * SWSTROBE mode is a one-shot, the timer is no longer running 229 */ 230 timer0_running = 0; 231 232 /* 233 * XXX the dispatcher needs work. right now we call systimer_intr() 234 * directly or via IPI for any cpu with systimers queued, which is 235 * usually *ALL* of them. We need to use the LAPIC timer for this. 236 */ 237 sysclock_count = sys_cputimer->count(); 238 for (n = 0; n < ncpus; ++n) { 239 gscan = globaldata_find(n); 240 if (TAILQ_FIRST(&gscan->gd_systimerq) == NULL) 241 continue; 242 if (gscan != gd) { 243 lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr, 244 &sysclock_count, 1); 245 } else { 246 systimer_intr(&sysclock_count, 0, frame_arg); 247 } 248 } 249 } 250 251 252 /* 253 * NOTE! not MP safe. 254 */ 255 int 256 acquire_timer2(int mode) 257 { 258 if (timer2_state != RELEASED) 259 return (-1); 260 timer2_state = ACQUIRED; 261 262 /* 263 * This access to the timer registers is as atomic as possible 264 * because it is a single instruction. We could do better if we 265 * knew the rate. 266 */ 267 outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f)); 268 return (0); 269 } 270 271 int 272 release_timer2(void) 273 { 274 if (timer2_state != ACQUIRED) 275 return (-1); 276 outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT); 277 timer2_state = RELEASED; 278 return (0); 279 } 280 281 #include "opt_ddb.h" 282 #ifdef DDB 283 #include <ddb/ddb.h> 284 285 DB_SHOW_COMMAND(rtc, rtc) 286 { 287 kprintf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n", 288 rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY), 289 rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC), 290 rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR)); 291 } 292 #endif /* DDB */ 293 294 /* 295 * Return the current cpu timer count as a 32 bit integer. 296 */ 297 static 298 sysclock_t 299 i8254_cputimer_count(void) 300 { 301 static uint16_t cputimer_last; 302 uint16_t count; 303 sysclock_t ret; 304 305 clock_lock(); 306 outb(TIMER_MODE, i8254_walltimer_sel | TIMER_LATCH); 307 count = (uint8_t)inb(i8254_walltimer_cntr); /* get countdown */ 308 count |= ((uint8_t)inb(i8254_walltimer_cntr) << 8); 309 count = -count; /* -> countup */ 310 if (count < cputimer_last) /* rollover */ 311 i8254_cputimer.base += 0x00010000; 312 ret = i8254_cputimer.base | count; 313 cputimer_last = count; 314 clock_unlock(); 315 return(ret); 316 } 317 318 /* 319 * This function is called whenever the system timebase changes, allowing 320 * us to calculate what is needed to convert a system timebase tick 321 * into an 8254 tick for the interrupt timer. If we can convert to a 322 * simple shift, multiplication, or division, we do so. Otherwise 64 323 * bit arithmatic is required every time the interrupt timer is reloaded. 324 */ 325 static void 326 i8254_intr_config(struct cputimer_intr *cti, const struct cputimer *timer) 327 { 328 int freq; 329 int div; 330 331 /* 332 * Will a simple divide do the trick? 333 */ 334 div = (timer->freq + (cti->freq / 2)) / cti->freq; 335 freq = cti->freq * div; 336 337 if (freq >= timer->freq - 1 && freq <= timer->freq + 1) 338 i8254_cputimer_div = div; 339 else 340 i8254_cputimer_div = 0; 341 } 342 343 /* 344 * Reload for the next timeout. It is possible for the reload value 345 * to be 0 or negative, indicating that an immediate timer interrupt 346 * is desired. For now make the minimum 2 ticks. 347 * 348 * We may have to convert from the system timebase to the 8254 timebase. 349 */ 350 static void 351 i8254_intr_reload(struct cputimer_intr *cti, sysclock_t reload) 352 { 353 uint16_t count; 354 355 if (i8254_cputimer_div) 356 reload /= i8254_cputimer_div; 357 else 358 reload = (int64_t)reload * cti->freq / sys_cputimer->freq; 359 360 if ((int)reload < 2) 361 reload = 2; 362 363 clock_lock(); 364 if (timer0_running) { 365 outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); /* count-down timer */ 366 count = (uint8_t)inb(TIMER_CNTR0); /* lsb */ 367 count |= ((uint8_t)inb(TIMER_CNTR0) << 8); /* msb */ 368 if (reload < count) { 369 outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT); 370 outb(TIMER_CNTR0, (uint8_t)reload); /* lsb */ 371 outb(TIMER_CNTR0, (uint8_t)(reload >> 8)); /* msb */ 372 } 373 } else { 374 timer0_running = 1; 375 if (reload > 0xFFFF) 376 reload = 0; /* full count */ 377 outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT); 378 outb(TIMER_CNTR0, (uint8_t)reload); /* lsb */ 379 outb(TIMER_CNTR0, (uint8_t)(reload >> 8)); /* msb */ 380 } 381 clock_unlock(); 382 } 383 384 /* 385 * DELAY(usec) - Spin for the specified number of microseconds. 386 * DRIVERSLEEP(usec) - Spin for the specified number of microseconds, 387 * but do a thread switch in the loop 388 * 389 * Relies on timer 1 counting down from (cputimer_freq / hz) 390 * Note: timer had better have been programmed before this is first used! 391 */ 392 static void 393 DODELAY(int n, int doswitch) 394 { 395 ssysclock_t delta, ticks_left; 396 sysclock_t prev_tick, tick; 397 398 #ifdef DELAYDEBUG 399 int getit_calls = 1; 400 int n1; 401 static int state = 0; 402 403 if (state == 0) { 404 state = 1; 405 for (n1 = 1; n1 <= 10000000; n1 *= 10) 406 DELAY(n1); 407 state = 2; 408 } 409 if (state == 1) 410 kprintf("DELAY(%d)...", n); 411 #endif 412 /* 413 * Guard against the timer being uninitialized if we are called 414 * early for console i/o. 415 */ 416 if (timer0_state == RELEASED && i8254_cputimer_disable == 0) 417 i8254_restore(); 418 419 /* 420 * Read the counter first, so that the rest of the setup overhead is 421 * counted. Then calculate the number of hardware timer ticks 422 * required, rounding up to be sure we delay at least the requested 423 * number of microseconds. 424 */ 425 prev_tick = sys_cputimer->count(); 426 ticks_left = ((u_int)n * (int64_t)sys_cputimer->freq + 999999) / 427 1000000; 428 429 /* 430 * Loop until done. 431 */ 432 while (ticks_left > 0) { 433 tick = sys_cputimer->count(); 434 #ifdef DELAYDEBUG 435 ++getit_calls; 436 #endif 437 delta = tick - prev_tick; 438 prev_tick = tick; 439 if (delta < 0) 440 delta = 0; 441 ticks_left -= delta; 442 if (doswitch && ticks_left > 0) 443 lwkt_switch(); 444 cpu_pause(); 445 } 446 #ifdef DELAYDEBUG 447 if (state == 1) 448 kprintf(" %d calls to getit() at %d usec each\n", 449 getit_calls, (n + 5) / getit_calls); 450 #endif 451 } 452 453 /* 454 * DELAY() never switches. 455 */ 456 void 457 DELAY(int n) 458 { 459 DODELAY(n, 0); 460 } 461 462 /* 463 * Returns non-zero if the specified time period has elapsed. Call 464 * first with last_clock set to 0. 465 */ 466 int 467 CHECKTIMEOUT(TOTALDELAY *tdd) 468 { 469 sysclock_t delta; 470 int us; 471 472 if (tdd->started == 0) { 473 if (timer0_state == RELEASED && i8254_cputimer_disable == 0) 474 i8254_restore(); 475 tdd->last_clock = sys_cputimer->count(); 476 tdd->started = 1; 477 return(0); 478 } 479 delta = sys_cputimer->count() - tdd->last_clock; 480 us = (u_int64_t)delta * (u_int64_t)1000000 / 481 (u_int64_t)sys_cputimer->freq; 482 tdd->last_clock += (u_int64_t)us * (u_int64_t)sys_cputimer->freq / 483 1000000; 484 tdd->us -= us; 485 return (tdd->us < 0); 486 } 487 488 489 /* 490 * DRIVERSLEEP() does not switch if called with a spinlock held or 491 * from a hard interrupt. 492 */ 493 void 494 DRIVERSLEEP(int usec) 495 { 496 globaldata_t gd = mycpu; 497 498 if (gd->gd_intr_nesting_level || gd->gd_spinlocks) { 499 DODELAY(usec, 0); 500 } else { 501 DODELAY(usec, 1); 502 } 503 } 504 505 static void 506 sysbeepstop(void *chan) 507 { 508 outb(IO_PPI, inb(IO_PPI)&0xFC); /* disable counter2 output to speaker */ 509 beeping = 0; 510 release_timer2(); 511 } 512 513 int 514 sysbeep(int pitch, int period) 515 { 516 if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT)) 517 return(-1); 518 if (sysbeep_enable == 0) 519 return(-1); 520 /* 521 * Nobody else is using timer2, we do not need the clock lock 522 */ 523 outb(TIMER_CNTR2, pitch); 524 outb(TIMER_CNTR2, (pitch>>8)); 525 if (!beeping) { 526 /* enable counter2 output to speaker */ 527 outb(IO_PPI, inb(IO_PPI) | 3); 528 beeping = period; 529 callout_reset(&sysbeepstop_ch, period, sysbeepstop, NULL); 530 } 531 return (0); 532 } 533 534 /* 535 * RTC support routines 536 */ 537 538 int 539 rtcin(int reg) 540 { 541 u_char val; 542 543 crit_enter(); 544 outb(IO_RTC, reg); 545 inb(0x84); 546 val = inb(IO_RTC + 1); 547 inb(0x84); 548 crit_exit(); 549 return (val); 550 } 551 552 static __inline void 553 writertc(u_char reg, u_char val) 554 { 555 crit_enter(); 556 inb(0x84); 557 outb(IO_RTC, reg); 558 inb(0x84); 559 outb(IO_RTC + 1, val); 560 inb(0x84); /* XXX work around wrong order in rtcin() */ 561 crit_exit(); 562 } 563 564 static __inline int 565 readrtc(int port) 566 { 567 return(bcd2bin(rtcin(port))); 568 } 569 570 static u_int 571 calibrate_clocks(void) 572 { 573 tsc_uclock_t old_tsc; 574 u_int tot_count; 575 sysclock_t count, prev_count; 576 int sec, start_sec, timeout; 577 578 if (bootverbose) 579 kprintf("Calibrating clock(s) ...\n"); 580 if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) 581 goto fail; 582 timeout = 100000000; 583 584 /* Read the mc146818A seconds counter. */ 585 for (;;) { 586 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { 587 sec = rtcin(RTC_SEC); 588 break; 589 } 590 if (--timeout == 0) 591 goto fail; 592 } 593 594 /* Wait for the mC146818A seconds counter to change. */ 595 start_sec = sec; 596 for (;;) { 597 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { 598 sec = rtcin(RTC_SEC); 599 if (sec != start_sec) 600 break; 601 } 602 if (--timeout == 0) 603 goto fail; 604 } 605 606 /* Start keeping track of the i8254 counter. */ 607 prev_count = sys_cputimer->count(); 608 tot_count = 0; 609 610 if (tsc_present) 611 old_tsc = rdtsc(); 612 else 613 old_tsc = 0; /* shut up gcc */ 614 615 /* 616 * Wait for the mc146818A seconds counter to change. Read the i8254 617 * counter for each iteration since this is convenient and only 618 * costs a few usec of inaccuracy. The timing of the final reads 619 * of the counters almost matches the timing of the initial reads, 620 * so the main cause of inaccuracy is the varying latency from 621 * inside getit() or rtcin(RTC_STATUSA) to the beginning of the 622 * rtcin(RTC_SEC) that returns a changed seconds count. The 623 * maximum inaccuracy from this cause is < 10 usec on 486's. 624 */ 625 start_sec = sec; 626 for (;;) { 627 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) 628 sec = rtcin(RTC_SEC); 629 count = sys_cputimer->count(); 630 tot_count += (int)(count - prev_count); 631 prev_count = count; 632 if (sec != start_sec) 633 break; 634 if (--timeout == 0) 635 goto fail; 636 } 637 638 /* 639 * Read the cpu cycle counter. The timing considerations are 640 * similar to those for the i8254 clock. 641 */ 642 if (tsc_present) { 643 tsc_frequency = rdtsc() - old_tsc; 644 if (bootverbose) { 645 kprintf("TSC clock: %jd Hz (Method A)\n", 646 (intmax_t)tsc_frequency); 647 } 648 } 649 tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000; 650 651 kprintf("i8254 clock: %u Hz\n", tot_count); 652 return (tot_count); 653 654 fail: 655 kprintf("failed, using default i8254 clock of %u Hz\n", 656 i8254_cputimer.freq); 657 return (i8254_cputimer.freq); 658 } 659 660 static void 661 i8254_restore(void) 662 { 663 timer0_state = ACQUIRED; 664 665 clock_lock(); 666 667 /* 668 * Timer0 is our fine-grained variable clock interrupt 669 */ 670 outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT); 671 outb(TIMER_CNTR0, 2); /* lsb */ 672 outb(TIMER_CNTR0, 0); /* msb */ 673 clock_unlock(); 674 675 if (!i8254_nointr) { 676 cputimer_intr_register(&i8254_cputimer_intr); 677 cputimer_intr_select(&i8254_cputimer_intr, 0); 678 } 679 680 /* 681 * Timer1 or timer2 is our free-running clock, but only if another 682 * has not been selected. 683 */ 684 cputimer_register(&i8254_cputimer); 685 cputimer_select(&i8254_cputimer, 0); 686 } 687 688 static void 689 i8254_cputimer_construct(struct cputimer *timer, sysclock_t oldclock) 690 { 691 int which; 692 693 /* 694 * Should we use timer 1 or timer 2 ? 695 */ 696 which = 0; 697 TUNABLE_INT_FETCH("hw.i8254.walltimer", &which); 698 if (which != 1 && which != 2) 699 which = 2; 700 701 switch(which) { 702 case 1: 703 timer->name = "i8254_timer1"; 704 timer->type = CPUTIMER_8254_SEL1; 705 i8254_walltimer_sel = TIMER_SEL1; 706 i8254_walltimer_cntr = TIMER_CNTR1; 707 timer1_state = ACQUIRED; 708 break; 709 case 2: 710 timer->name = "i8254_timer2"; 711 timer->type = CPUTIMER_8254_SEL2; 712 i8254_walltimer_sel = TIMER_SEL2; 713 i8254_walltimer_cntr = TIMER_CNTR2; 714 timer2_state = ACQUIRED; 715 break; 716 } 717 718 timer->base = (oldclock + 0xFFFF) & ~0xFFFF; 719 720 clock_lock(); 721 outb(TIMER_MODE, i8254_walltimer_sel | TIMER_RATEGEN | TIMER_16BIT); 722 outb(i8254_walltimer_cntr, 0); /* lsb */ 723 outb(i8254_walltimer_cntr, 0); /* msb */ 724 outb(IO_PPI, inb(IO_PPI) | 1); /* bit 0: enable gate, bit 1: spkr */ 725 clock_unlock(); 726 } 727 728 static void 729 i8254_cputimer_destruct(struct cputimer *timer) 730 { 731 switch(timer->type) { 732 case CPUTIMER_8254_SEL1: 733 timer1_state = RELEASED; 734 break; 735 case CPUTIMER_8254_SEL2: 736 timer2_state = RELEASED; 737 break; 738 default: 739 break; 740 } 741 timer->type = 0; 742 } 743 744 static void 745 rtc_restore(void) 746 { 747 /* Restore all of the RTC's "status" (actually, control) registers. */ 748 writertc(RTC_STATUSB, RTCSB_24HR); 749 writertc(RTC_STATUSA, rtc_statusa); 750 writertc(RTC_STATUSB, rtc_statusb); 751 } 752 753 /* 754 * Restore all the timers. 755 * 756 * This function is called to resynchronize our core timekeeping after a 757 * long halt, e.g. from apm_default_resume() and friends. It is also 758 * called if after a BIOS call we have detected munging of the 8254. 759 * It is necessary because cputimer_count() counter's delta may have grown 760 * too large for nanouptime() and friends to handle, or (in the case of 8254 761 * munging) might cause the SYSTIMER code to prematurely trigger. 762 */ 763 void 764 timer_restore(void) 765 { 766 crit_enter(); 767 if (i8254_cputimer_disable == 0) 768 i8254_restore(); /* restore timer_freq and hz */ 769 rtc_restore(); /* reenable RTC interrupts */ 770 crit_exit(); 771 } 772 773 #define MAX_MEASURE_RETRIES 100 774 775 static u_int64_t 776 do_measure(u_int64_t timer_latency, u_int64_t *latency, sysclock_t *time, 777 int *retries) 778 { 779 u_int64_t tsc1, tsc2; 780 u_int64_t threshold; 781 sysclock_t val; 782 int cnt = 0; 783 784 do { 785 if (cnt > MAX_MEASURE_RETRIES/2) 786 threshold = timer_latency << 1; 787 else 788 threshold = timer_latency + (timer_latency >> 2); 789 790 cnt++; 791 tsc1 = rdtsc_ordered(); 792 val = sys_cputimer->count(); 793 tsc2 = rdtsc_ordered(); 794 } while (timer_latency > 0 && cnt < MAX_MEASURE_RETRIES && 795 tsc2 - tsc1 > threshold); 796 797 *retries = cnt - 1; 798 *latency = tsc2 - tsc1; 799 *time = val; 800 return tsc1; 801 } 802 803 static u_int64_t 804 do_calibrate_cputimer(u_int usecs, u_int64_t timer_latency) 805 { 806 if (calibrate_tsc_fast) { 807 u_int64_t old_tsc1, start_lat1, new_tsc1, end_lat1; 808 u_int64_t old_tsc2, start_lat2, new_tsc2, end_lat2; 809 u_int64_t freq1, freq2; 810 sysclock_t start1, end1, start2, end2; 811 int retries1, retries2, retries3, retries4; 812 813 DELAY(1000); 814 old_tsc1 = do_measure(timer_latency, &start_lat1, &start1, 815 &retries1); 816 DELAY(20000); 817 old_tsc2 = do_measure(timer_latency, &start_lat2, &start2, 818 &retries2); 819 DELAY(usecs); 820 new_tsc1 = do_measure(timer_latency, &end_lat1, &end1, 821 &retries3); 822 DELAY(20000); 823 new_tsc2 = do_measure(timer_latency, &end_lat2, &end2, 824 &retries4); 825 826 old_tsc1 += start_lat1; 827 old_tsc2 += start_lat2; 828 freq1 = (new_tsc1 - old_tsc1) + (start_lat1 + end_lat1) / 2; 829 freq2 = (new_tsc2 - old_tsc2) + (start_lat2 + end_lat2) / 2; 830 end1 -= start1; 831 end2 -= start2; 832 /* This should in practice be safe from overflows. */ 833 freq1 = (freq1 * sys_cputimer->freq) / end1; 834 freq2 = (freq2 * sys_cputimer->freq) / end2; 835 if (calibrate_test && (retries1 > 0 || retries2 > 0)) { 836 kprintf("%s: retries: %d, %d, %d, %d\n", 837 __func__, retries1, retries2, retries3, retries4); 838 } 839 if (calibrate_test) { 840 kprintf("%s: freq1=%ju freq2=%ju avg=%ju\n", 841 __func__, freq1, freq2, (freq1 + freq2) / 2); 842 } 843 return (freq1 + freq2) / 2; 844 } else { 845 u_int64_t old_tsc, new_tsc; 846 u_int64_t freq; 847 848 old_tsc = rdtsc_ordered(); 849 DELAY(usecs); 850 new_tsc = rdtsc(); 851 freq = new_tsc - old_tsc; 852 /* This should in practice be safe from overflows. */ 853 freq = (freq * 1000 * 1000) / usecs; 854 return freq; 855 } 856 } 857 858 /* 859 * Initialize 8254 timer 0 early so that it can be used in DELAY(). 860 */ 861 void 862 startrtclock(void) 863 { 864 const timecounter_init_t **list; 865 u_int delta, freq; 866 867 callout_init_mp(&sysbeepstop_ch); 868 869 /* 870 * Can we use the TSC? 871 * 872 * NOTE: If running under qemu, probably a good idea to force the 873 * TSC because we are not likely to detect it as being 874 * invariant or mpsyncd if you don't. This will greatly 875 * reduce SMP contention. 876 */ 877 if (cpu_feature & CPUID_TSC) { 878 tsc_present = 1; 879 TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_invariant); 880 881 if ((cpu_vendor_id == CPU_VENDOR_INTEL || 882 cpu_vendor_id == CPU_VENDOR_AMD) && 883 cpu_exthigh >= 0x80000007) { 884 u_int regs[4]; 885 886 do_cpuid(0x80000007, regs); 887 if (regs[3] & 0x100) 888 tsc_invariant = 1; 889 } 890 } else { 891 tsc_present = 0; 892 } 893 894 /* 895 * Initial RTC state, don't do anything unexpected 896 */ 897 writertc(RTC_STATUSA, rtc_statusa); 898 writertc(RTC_STATUSB, RTCSB_24HR); 899 900 SET_FOREACH(list, timecounter_init_set) { 901 if ((*list)->configure != NULL) 902 (*list)->configure(); 903 } 904 905 /* 906 * If tsc_frequency is already initialized now, and a flag is set 907 * that i8254 timer is unneeded, we are done. 908 */ 909 if (tsc_frequency != 0 && i8254_cputimer_disable != 0) 910 goto done; 911 912 /* 913 * Set the 8254 timer0 in TIMER_SWSTROBE mode and cause it to 914 * generate an interrupt, which we will ignore for now. 915 * 916 * Set the 8254 timer1 in TIMER_RATEGEN mode and load 0x0000 917 * (so it counts a full 2^16 and repeats). We will use this timer 918 * for our counting. 919 */ 920 if (i8254_cputimer_disable == 0) 921 i8254_restore(); 922 923 kprintf("Using cputimer %s for TSC calibration\n", sys_cputimer->name); 924 925 /* 926 * When booting without verbose messages, it's pointless to run the 927 * calibrate_clocks() calibration code, when we don't use the 928 * results in any way. With bootverbose, we are at least printing 929 * this information to the kernel log. 930 */ 931 if (i8254_cputimer_disable != 0 || 932 (calibrate_timers_with_rtc == 0 && !bootverbose)) { 933 goto skip_rtc_based; 934 } 935 936 freq = calibrate_clocks(); 937 #ifdef CLK_CALIBRATION_LOOP 938 if (bootverbose) { 939 int c; 940 941 cnpoll(TRUE); 942 kprintf("Press a key on the console to " 943 "abort clock calibration\n"); 944 while ((c = cncheckc()) == -1 || c == NOKEY) 945 calibrate_clocks(); 946 cnpoll(FALSE); 947 } 948 #endif 949 950 /* 951 * Use the calibrated i8254 frequency if it seems reasonable. 952 * Otherwise use the default, and don't use the calibrated i586 953 * frequency. 954 */ 955 delta = freq > i8254_cputimer.freq ? 956 freq - i8254_cputimer.freq : i8254_cputimer.freq - freq; 957 if (delta < i8254_cputimer.freq / 100) { 958 if (calibrate_timers_with_rtc == 0) { 959 kprintf( 960 "hw.calibrate_timers_with_rtc not set - using default i8254 frequency\n"); 961 freq = i8254_cputimer.freq; 962 } 963 /* 964 * NOTE: 965 * Interrupt timer's freq must be adjusted 966 * before we change the cuptimer's frequency. 967 */ 968 i8254_cputimer_intr.freq = freq; 969 cputimer_set_frequency(&i8254_cputimer, freq); 970 } else { 971 if (bootverbose) 972 kprintf("%d Hz differs from default of %d Hz " 973 "by more than 1%%\n", 974 freq, i8254_cputimer.freq); 975 tsc_frequency = 0; 976 } 977 978 if (tsc_frequency != 0 && calibrate_timers_with_rtc == 0) { 979 kprintf("hw.calibrate_timers_with_rtc not " 980 "set - using old calibration method\n"); 981 tsc_frequency = 0; 982 } 983 984 skip_rtc_based: 985 if (tsc_present && tsc_frequency == 0) { 986 u_int cnt; 987 u_int64_t cputime_latency_tsc = 0, max = 0, min = 0; 988 int i; 989 990 for (i = 0; i < 10; i++) { 991 /* Warm up */ 992 (void)sys_cputimer->count(); 993 } 994 for (i = 0; i < 100; i++) { 995 u_int64_t old_tsc, new_tsc; 996 997 old_tsc = rdtsc_ordered(); 998 (void)sys_cputimer->count(); 999 new_tsc = rdtsc_ordered(); 1000 cputime_latency_tsc += (new_tsc - old_tsc); 1001 if (max < (new_tsc - old_tsc)) 1002 max = new_tsc - old_tsc; 1003 if (min == 0 || min > (new_tsc - old_tsc)) 1004 min = new_tsc - old_tsc; 1005 } 1006 cputime_latency_tsc /= 100; 1007 kprintf( 1008 "Timer latency (in TSC ticks): %lu min=%lu max=%lu\n", 1009 cputime_latency_tsc, min, max); 1010 /* XXX Instead of this, properly filter out outliers. */ 1011 cputime_latency_tsc = min; 1012 1013 if (calibrate_test > 0) { 1014 u_int64_t values[20], avg = 0; 1015 for (i = 1; i <= 20; i++) { 1016 u_int64_t freq; 1017 1018 freq = do_calibrate_cputimer(i * 100 * 1000, 1019 cputime_latency_tsc); 1020 values[i - 1] = freq; 1021 } 1022 /* Compute an average TSC for the 1s to 2s delays. */ 1023 for (i = 10; i < 20; i++) 1024 avg += values[i]; 1025 avg /= 10; 1026 for (i = 0; i < 20; i++) { 1027 kprintf("%ums: %lu (Diff from average: %ld)\n", 1028 (i + 1) * 100, values[i], 1029 (int64_t)(values[i] - avg)); 1030 } 1031 } 1032 1033 if (calibrate_tsc_fast > 0) { 1034 /* HPET would typically be >10MHz */ 1035 if (sys_cputimer->freq >= 10000000) 1036 cnt = 200000; 1037 else 1038 cnt = 500000; 1039 } else { 1040 cnt = 1000000; 1041 } 1042 1043 tsc_frequency = do_calibrate_cputimer(cnt, cputime_latency_tsc); 1044 if (bootverbose && calibrate_timers_with_rtc) { 1045 kprintf("TSC clock: %jd Hz (Method B)\n", 1046 (intmax_t)tsc_frequency); 1047 } 1048 } 1049 1050 done: 1051 if (tsc_present) { 1052 kprintf("TSC%s clock: %jd Hz\n", 1053 tsc_invariant ? " invariant" : "", 1054 (intmax_t)tsc_frequency); 1055 } 1056 tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000; 1057 1058 EVENTHANDLER_REGISTER(shutdown_post_sync, resettodr_on_shutdown, 1059 NULL, SHUTDOWN_PRI_LAST); 1060 } 1061 1062 /* 1063 * Sync the time of day back to the RTC on shutdown, but only if 1064 * we have already loaded it and have not crashed. 1065 */ 1066 static void 1067 resettodr_on_shutdown(void *arg __unused) 1068 { 1069 if (rtc_loaded && panicstr == NULL) { 1070 resettodr(); 1071 } 1072 } 1073 1074 /* 1075 * Initialize the time of day register, based on the time base which is, e.g. 1076 * from a filesystem. 1077 */ 1078 void 1079 inittodr(time_t base) 1080 { 1081 unsigned long sec, days; 1082 int year, month; 1083 int y, m; 1084 struct timespec ts; 1085 1086 if (base) { 1087 ts.tv_sec = base; 1088 ts.tv_nsec = 0; 1089 set_timeofday(&ts); 1090 } 1091 1092 /* Look if we have a RTC present and the time is valid */ 1093 if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) 1094 goto wrong_time; 1095 1096 /* wait for time update to complete */ 1097 /* If RTCSA_TUP is zero, we have at least 244us before next update */ 1098 crit_enter(); 1099 while (rtcin(RTC_STATUSA) & RTCSA_TUP) { 1100 crit_exit(); 1101 crit_enter(); 1102 } 1103 1104 days = 0; 1105 #ifdef USE_RTC_CENTURY 1106 year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100; 1107 #else 1108 year = readrtc(RTC_YEAR) + 1900; 1109 if (year < 1970) 1110 year += 100; 1111 #endif 1112 if (year < 1970) { 1113 crit_exit(); 1114 goto wrong_time; 1115 } 1116 month = readrtc(RTC_MONTH); 1117 for (m = 1; m < month; m++) 1118 days += daysinmonth[m-1]; 1119 if ((month > 2) && LEAPYEAR(year)) 1120 days ++; 1121 days += readrtc(RTC_DAY) - 1; 1122 for (y = 1970; y < year; y++) 1123 days += DAYSPERYEAR + LEAPYEAR(y); 1124 sec = ((( days * 24 + 1125 readrtc(RTC_HRS)) * 60 + 1126 readrtc(RTC_MIN)) * 60 + 1127 readrtc(RTC_SEC)); 1128 /* sec now contains the number of seconds, since Jan 1 1970, 1129 in the local time zone */ 1130 1131 sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); 1132 1133 y = (int)(time_second - sec); 1134 if (y <= -2 || y >= 2) { 1135 /* badly off, adjust it */ 1136 ts.tv_sec = sec; 1137 ts.tv_nsec = 0; 1138 set_timeofday(&ts); 1139 } 1140 rtc_loaded = 1; 1141 crit_exit(); 1142 return; 1143 1144 wrong_time: 1145 kprintf("Invalid time in real time clock.\n"); 1146 kprintf("Check and reset the date immediately!\n"); 1147 } 1148 1149 /* 1150 * Write system time back to RTC 1151 */ 1152 void 1153 resettodr(void) 1154 { 1155 struct timeval tv; 1156 unsigned long tm; 1157 int m; 1158 int y; 1159 1160 if (disable_rtc_set) 1161 return; 1162 1163 microtime(&tv); 1164 tm = tv.tv_sec; 1165 1166 crit_enter(); 1167 /* Disable RTC updates and interrupts. */ 1168 writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR); 1169 1170 /* Calculate local time to put in RTC */ 1171 1172 tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); 1173 1174 writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */ 1175 writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */ 1176 writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24; /* Write back Hours */ 1177 1178 /* We have now the days since 01-01-1970 in tm */ 1179 writertc(RTC_WDAY, (tm+4)%7); /* Write back Weekday */ 1180 for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y); 1181 tm >= m; 1182 y++, m = DAYSPERYEAR + LEAPYEAR(y)) 1183 tm -= m; 1184 1185 /* Now we have the years in y and the day-of-the-year in tm */ 1186 writertc(RTC_YEAR, bin2bcd(y%100)); /* Write back Year */ 1187 #ifdef USE_RTC_CENTURY 1188 writertc(RTC_CENTURY, bin2bcd(y/100)); /* ... and Century */ 1189 #endif 1190 for (m = 0; ; m++) { 1191 int ml; 1192 1193 ml = daysinmonth[m]; 1194 if (m == 1 && LEAPYEAR(y)) 1195 ml++; 1196 if (tm < ml) 1197 break; 1198 tm -= ml; 1199 } 1200 1201 writertc(RTC_MONTH, bin2bcd(m + 1)); /* Write back Month */ 1202 writertc(RTC_DAY, bin2bcd(tm + 1)); /* Write back Month Day */ 1203 1204 /* Reenable RTC updates and interrupts. */ 1205 writertc(RTC_STATUSB, rtc_statusb); 1206 crit_exit(); 1207 } 1208 1209 static int 1210 i8254_ioapic_trial(int irq, struct cputimer_intr *cti) 1211 { 1212 sysclock_t base; 1213 long lastcnt; 1214 1215 /* 1216 * Following code assumes the 8254 is the cpu timer, 1217 * so make sure it is. 1218 */ 1219 KKASSERT(sys_cputimer == &i8254_cputimer); 1220 KKASSERT(cti == &i8254_cputimer_intr); 1221 1222 lastcnt = get_interrupt_counter(irq, mycpuid); 1223 1224 /* 1225 * Force an 8254 Timer0 interrupt and wait 1/100s for 1226 * it to happen, then see if we got it. 1227 */ 1228 kprintf("IOAPIC: testing 8254 interrupt delivery\n"); 1229 1230 i8254_intr_reload(cti, 2); 1231 base = sys_cputimer->count(); 1232 while (sys_cputimer->count() - base < sys_cputimer->freq / 100) 1233 ; /* nothing */ 1234 1235 if (get_interrupt_counter(irq, mycpuid) - lastcnt == 0) 1236 return ENOENT; 1237 return 0; 1238 } 1239 1240 /* 1241 * Start both clocks running. DragonFly note: the stat clock is no longer 1242 * used. Instead, 8254 based systimers are used for all major clock 1243 * interrupts. 1244 */ 1245 static void 1246 i8254_intr_initclock(struct cputimer_intr *cti, boolean_t selected) 1247 { 1248 void *clkdesc = NULL; 1249 int irq = 0, mixed_mode = 0, error; 1250 1251 KKASSERT(mycpuid == 0); 1252 1253 if (!selected && i8254_intr_disable) 1254 goto nointr; 1255 1256 /* 1257 * The stat interrupt mask is different without the 1258 * statistics clock. Also, don't set the interrupt 1259 * flag which would normally cause the RTC to generate 1260 * interrupts. 1261 */ 1262 rtc_statusb = RTCSB_24HR; 1263 1264 /* Finish initializing 8254 timer 0. */ 1265 if (ioapic_enable) { 1266 irq = machintr_legacy_intr_find(0, INTR_TRIGGER_EDGE, 1267 INTR_POLARITY_HIGH); 1268 if (irq < 0) { 1269 mixed_mode_setup: 1270 error = ioapic_conf_legacy_extint(0); 1271 if (!error) { 1272 irq = machintr_legacy_intr_find(0, 1273 INTR_TRIGGER_EDGE, INTR_POLARITY_HIGH); 1274 if (irq < 0) 1275 error = ENOENT; 1276 } 1277 1278 if (error) { 1279 if (!selected) { 1280 kprintf("IOAPIC: setup mixed mode for " 1281 "irq 0 failed: %d\n", error); 1282 goto nointr; 1283 } else { 1284 panic("IOAPIC: setup mixed mode for " 1285 "irq 0 failed: %d\n", error); 1286 } 1287 } 1288 mixed_mode = 1; 1289 } 1290 clkdesc = register_int(irq, clkintr, NULL, "clk", 1291 NULL, 1292 INTR_EXCL | INTR_CLOCK | 1293 INTR_NOPOLL | INTR_MPSAFE | 1294 INTR_NOENTROPY, 0); 1295 } else { 1296 register_int(0, clkintr, NULL, "clk", NULL, 1297 INTR_EXCL | INTR_CLOCK | 1298 INTR_NOPOLL | INTR_MPSAFE | 1299 INTR_NOENTROPY, 0); 1300 } 1301 1302 /* Initialize RTC. */ 1303 writertc(RTC_STATUSA, rtc_statusa); 1304 writertc(RTC_STATUSB, RTCSB_24HR); 1305 1306 if (ioapic_enable) { 1307 error = i8254_ioapic_trial(irq, cti); 1308 if (error) { 1309 if (mixed_mode) { 1310 if (!selected) { 1311 kprintf("IOAPIC: mixed mode for irq %d " 1312 "trial failed: %d\n", 1313 irq, error); 1314 goto nointr; 1315 } else { 1316 panic("IOAPIC: mixed mode for irq %d " 1317 "trial failed: %d\n", irq, error); 1318 } 1319 } else { 1320 kprintf("IOAPIC: warning 8254 is not connected " 1321 "to the correct pin, try mixed mode\n"); 1322 unregister_int(clkdesc, 0); 1323 goto mixed_mode_setup; 1324 } 1325 } 1326 } 1327 return; 1328 1329 nointr: 1330 i8254_nointr = 1; /* don't try to register again */ 1331 cputimer_intr_deregister(cti); 1332 } 1333 1334 void 1335 setstatclockrate(int newhz) 1336 { 1337 if (newhz == RTC_PROFRATE) 1338 rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF; 1339 else 1340 rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; 1341 writertc(RTC_STATUSA, rtc_statusa); 1342 } 1343 1344 #if 0 1345 static unsigned 1346 tsc_get_timecount(struct timecounter *tc) 1347 { 1348 return (rdtsc()); 1349 } 1350 #endif 1351 1352 #ifdef KERN_TIMESTAMP 1353 #define KERN_TIMESTAMP_SIZE 16384 1354 static u_long tsc[KERN_TIMESTAMP_SIZE] ; 1355 SYSCTL_OPAQUE(_debug, OID_AUTO, timestamp, CTLFLAG_RD, tsc, 1356 sizeof(tsc), "LU", "Kernel timestamps"); 1357 void 1358 _TSTMP(u_int32_t x) 1359 { 1360 static int i; 1361 1362 tsc[i] = (u_int32_t)rdtsc(); 1363 tsc[i+1] = x; 1364 i = i + 2; 1365 if (i >= KERN_TIMESTAMP_SIZE) 1366 i = 0; 1367 tsc[i] = 0; /* mark last entry */ 1368 } 1369 #endif /* KERN_TIMESTAMP */ 1370 1371 /* 1372 * 1373 */ 1374 1375 static int 1376 hw_i8254_timestamp(SYSCTL_HANDLER_ARGS) 1377 { 1378 sysclock_t count; 1379 uint64_t tscval; 1380 char buf[32]; 1381 1382 crit_enter(); 1383 if (sys_cputimer == &i8254_cputimer) 1384 count = sys_cputimer->count(); 1385 else 1386 count = 0; 1387 if (tsc_present) 1388 tscval = rdtsc(); 1389 else 1390 tscval = 0; 1391 crit_exit(); 1392 ksnprintf(buf, sizeof(buf), "%08x %016llx", count, (long long)tscval); 1393 return(SYSCTL_OUT(req, buf, strlen(buf) + 1)); 1394 } 1395 1396 struct tsc_mpsync_arg { 1397 volatile uint64_t tsc_target; 1398 volatile int tsc_mpsync; 1399 }; 1400 1401 struct tsc_mpsync_thr { 1402 volatile int tsc_done_cnt; 1403 volatile int tsc_mpsync_cnt; 1404 }; 1405 1406 static void 1407 tsc_mpsync_test_remote(void *xarg) 1408 { 1409 struct tsc_mpsync_arg *arg = xarg; 1410 uint64_t tsc; 1411 1412 tsc = rdtsc_ordered(); 1413 if (tsc < arg->tsc_target) 1414 arg->tsc_mpsync = 0; 1415 } 1416 1417 static void 1418 tsc_mpsync_test_loop(struct tsc_mpsync_arg *arg) 1419 { 1420 struct globaldata *gd = mycpu; 1421 tsc_uclock_t test_end, test_begin; 1422 u_int i; 1423 1424 if (bootverbose) { 1425 kprintf("cpu%d: TSC testing MP synchronization ...\n", 1426 gd->gd_cpuid); 1427 } 1428 1429 test_begin = rdtsc_ordered(); 1430 /* Run test for 100ms */ 1431 test_end = test_begin + (tsc_frequency / 10); 1432 1433 arg->tsc_mpsync = 1; 1434 arg->tsc_target = test_begin; 1435 1436 #define TSC_TEST_TRYMAX 1000000 /* Make sure we could stop */ 1437 #define TSC_TEST_TRYMIN 50000 1438 1439 for (i = 0; i < TSC_TEST_TRYMAX; ++i) { 1440 struct lwkt_cpusync cs; 1441 1442 crit_enter(); 1443 lwkt_cpusync_init(&cs, gd->gd_other_cpus, 1444 tsc_mpsync_test_remote, arg); 1445 lwkt_cpusync_interlock(&cs); 1446 cpu_pause(); 1447 arg->tsc_target = rdtsc_ordered(); 1448 cpu_mfence(); 1449 lwkt_cpusync_deinterlock(&cs); 1450 crit_exit(); 1451 cpu_pause(); 1452 1453 if (!arg->tsc_mpsync) { 1454 kprintf("cpu%d: TSC is not MP synchronized @%u\n", 1455 gd->gd_cpuid, i); 1456 break; 1457 } 1458 if (arg->tsc_target > test_end && i >= TSC_TEST_TRYMIN) 1459 break; 1460 } 1461 1462 #undef TSC_TEST_TRYMIN 1463 #undef TSC_TEST_TRYMAX 1464 1465 if (arg->tsc_target == test_begin) { 1466 kprintf("cpu%d: TSC does not tick?!\n", gd->gd_cpuid); 1467 /* XXX disable TSC? */ 1468 tsc_invariant = 0; 1469 arg->tsc_mpsync = 0; 1470 return; 1471 } 1472 1473 if (arg->tsc_mpsync && bootverbose) { 1474 kprintf("cpu%d: TSC is MP synchronized after %u tries\n", 1475 gd->gd_cpuid, i); 1476 } 1477 } 1478 1479 static void 1480 tsc_mpsync_ap_thread(void *xthr) 1481 { 1482 struct tsc_mpsync_thr *thr = xthr; 1483 struct tsc_mpsync_arg arg; 1484 1485 tsc_mpsync_test_loop(&arg); 1486 if (arg.tsc_mpsync) { 1487 atomic_add_int(&thr->tsc_mpsync_cnt, 1); 1488 cpu_sfence(); 1489 } 1490 atomic_add_int(&thr->tsc_done_cnt, 1); 1491 1492 lwkt_exit(); 1493 } 1494 1495 static void 1496 tsc_mpsync_test(void) 1497 { 1498 struct tsc_mpsync_arg arg; 1499 1500 if (!tsc_invariant) { 1501 /* Not even invariant TSC */ 1502 return; 1503 } 1504 1505 if (ncpus == 1) { 1506 /* Only one CPU */ 1507 tsc_mpsync = 1; 1508 return; 1509 } 1510 1511 /* 1512 * Forcing can be used w/qemu to reduce contention 1513 */ 1514 TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_mpsync); 1515 1516 if (tsc_mpsync == 0) { 1517 switch (cpu_vendor_id) { 1518 case CPU_VENDOR_INTEL: 1519 /* 1520 * Intel probably works 1521 */ 1522 break; 1523 1524 case CPU_VENDOR_AMD: 1525 /* 1526 * For AMD 15h and 16h (i.e. The Bulldozer and Jaguar 1527 * architectures) we have to watch out for 1528 * Erratum 778: 1529 * "Processor Core Time Stamp Counters May 1530 * Experience Drift" 1531 * This Erratum is only listed for cpus in Family 1532 * 15h < Model 30h and for 16h < Model 30h. 1533 * 1534 * AMD < Bulldozer probably doesn't work 1535 */ 1536 if (CPUID_TO_FAMILY(cpu_id) == 0x15 || 1537 CPUID_TO_FAMILY(cpu_id) == 0x16) { 1538 if (CPUID_TO_MODEL(cpu_id) < 0x30) 1539 return; 1540 } else if (CPUID_TO_FAMILY(cpu_id) < 0x17) { 1541 return; 1542 } 1543 break; 1544 1545 default: 1546 /* probably won't work */ 1547 return; 1548 } 1549 } else if (tsc_mpsync < 0) { 1550 kprintf("TSC MP synchronization test is disabled\n"); 1551 tsc_mpsync = 0; 1552 return; 1553 } 1554 1555 /* 1556 * Test even if forced above. If forced, we will use the TSC 1557 * even if the test fails. 1558 */ 1559 kprintf("TSC testing MP synchronization ...\n"); 1560 1561 tsc_mpsync_test_loop(&arg); 1562 if (arg.tsc_mpsync) { 1563 struct tsc_mpsync_thr thr; 1564 int cpu; 1565 1566 /* 1567 * Test TSC MP synchronization on APs. 1568 */ 1569 1570 thr.tsc_done_cnt = 1; 1571 thr.tsc_mpsync_cnt = 1; 1572 1573 for (cpu = 0; cpu < ncpus; ++cpu) { 1574 if (cpu == mycpuid) 1575 continue; 1576 1577 lwkt_create(tsc_mpsync_ap_thread, &thr, NULL, 1578 NULL, 0, cpu, "tsc mpsync %d", cpu); 1579 } 1580 1581 while (thr.tsc_done_cnt != ncpus) { 1582 cpu_pause(); 1583 cpu_lfence(); 1584 } 1585 if (thr.tsc_mpsync_cnt == ncpus) 1586 tsc_mpsync = 1; 1587 } 1588 1589 if (tsc_mpsync) 1590 kprintf("TSC is MP synchronized\n"); 1591 else 1592 kprintf("TSC is not MP synchronized\n"); 1593 } 1594 SYSINIT(tsc_mpsync, SI_BOOT2_FINISH_SMP, SI_ORDER_ANY, tsc_mpsync_test, NULL); 1595 1596 #define TSC_CPUTIMER_FREQMAX 128000000 /* 128Mhz */ 1597 1598 static int tsc_cputimer_shift; 1599 1600 static void 1601 tsc_cputimer_construct(struct cputimer *timer, sysclock_t oldclock) 1602 { 1603 timer->base = 0; 1604 timer->base = oldclock - timer->count(); 1605 } 1606 1607 static __inline sysclock_t 1608 tsc_cputimer_count(void) 1609 { 1610 uint64_t tsc; 1611 1612 tsc = rdtsc(); 1613 tsc >>= tsc_cputimer_shift; 1614 1615 return (tsc + tsc_cputimer.base); 1616 } 1617 1618 static sysclock_t 1619 tsc_cputimer_count_lfence(void) 1620 { 1621 cpu_lfence(); 1622 return tsc_cputimer_count(); 1623 } 1624 1625 static sysclock_t 1626 tsc_cputimer_count_mfence(void) 1627 { 1628 cpu_mfence(); 1629 return tsc_cputimer_count(); 1630 } 1631 1632 static uint64_t 1633 tsc_cpucounter_count_lfence(void) 1634 { 1635 1636 cpu_lfence(); 1637 return (rdtsc()); 1638 } 1639 1640 static uint64_t 1641 tsc_cpucounter_count_mfence(void) 1642 { 1643 1644 cpu_mfence(); 1645 return (rdtsc()); 1646 } 1647 1648 static void 1649 tsc_cputimer_register(void) 1650 { 1651 uint64_t freq; 1652 int enable = 1; 1653 1654 if (!tsc_mpsync) { 1655 if (tsc_invariant) { 1656 /* Per-cpu cpucounter still works. */ 1657 goto regcnt; 1658 } 1659 return; 1660 } 1661 1662 TUNABLE_INT_FETCH("hw.tsc_cputimer_enable", &enable); 1663 if (!enable) 1664 return; 1665 1666 freq = tsc_frequency; 1667 while (freq > TSC_CPUTIMER_FREQMAX) { 1668 freq >>= 1; 1669 ++tsc_cputimer_shift; 1670 } 1671 kprintf("TSC: cputimer freq %ju, shift %d\n", 1672 (uintmax_t)freq, tsc_cputimer_shift); 1673 1674 tsc_cputimer.freq = freq; 1675 1676 if (cpu_vendor_id == CPU_VENDOR_INTEL) 1677 tsc_cputimer.count = tsc_cputimer_count_lfence; 1678 else 1679 tsc_cputimer.count = tsc_cputimer_count_mfence; /* safe bet */ 1680 1681 cputimer_register(&tsc_cputimer); 1682 cputimer_select(&tsc_cputimer, 0); 1683 1684 tsc_cpucounter.flags |= CPUCOUNTER_FLAG_MPSYNC; 1685 regcnt: 1686 tsc_cpucounter.freq = tsc_frequency; 1687 if (cpu_vendor_id == CPU_VENDOR_INTEL) { 1688 tsc_cpucounter.count = 1689 tsc_cpucounter_count_lfence; 1690 } else { 1691 tsc_cpucounter.count = 1692 tsc_cpucounter_count_mfence; /* safe bet */ 1693 } 1694 cpucounter_register(&tsc_cpucounter); 1695 } 1696 SYSINIT(tsc_cputimer_reg, SI_BOOT2_POST_SMP, SI_ORDER_FIRST, 1697 tsc_cputimer_register, NULL); 1698 1699 SYSCTL_NODE(_hw, OID_AUTO, i8254, CTLFLAG_RW, 0, "I8254"); 1700 SYSCTL_UINT(_hw_i8254, OID_AUTO, freq, CTLFLAG_RD, &i8254_cputimer.freq, 0, 1701 "frequency"); 1702 SYSCTL_PROC(_hw_i8254, OID_AUTO, timestamp, CTLTYPE_STRING|CTLFLAG_RD, 1703 0, 0, hw_i8254_timestamp, "A", ""); 1704 1705 SYSCTL_INT(_hw, OID_AUTO, tsc_present, CTLFLAG_RD, 1706 &tsc_present, 0, "TSC Available"); 1707 SYSCTL_INT(_hw, OID_AUTO, tsc_invariant, CTLFLAG_RD, 1708 &tsc_invariant, 0, "Invariant TSC"); 1709 SYSCTL_INT(_hw, OID_AUTO, tsc_mpsync, CTLFLAG_RD, 1710 &tsc_mpsync, 0, "TSC is synchronized across CPUs"); 1711 SYSCTL_QUAD(_hw, OID_AUTO, tsc_frequency, CTLFLAG_RD, 1712 &tsc_frequency, 0, "TSC Frequency"); 1713