1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * Copyright (c) 2008 The DragonFly Project. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * William Jolitz and Don Ahn. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * from: @(#)clock.c 7.2 (Berkeley) 5/12/91 34 * $FreeBSD: src/sys/i386/isa/clock.c,v 1.149.2.6 2002/11/02 04:41:50 iwasaki Exp $ 35 */ 36 37 /* 38 * Routines to handle clock hardware. 39 */ 40 41 /* 42 * inittodr, settodr and support routines written 43 * by Christoph Robitschko <chmr@edvz.tu-graz.ac.at> 44 * 45 * reintroduced and updated by Chris Stenton <chris@gnome.co.uk> 8/10/94 46 */ 47 48 #if 0 49 #include "opt_clock.h" 50 #endif 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/eventhandler.h> 55 #include <sys/time.h> 56 #include <sys/kernel.h> 57 #include <sys/bus.h> 58 #include <sys/sysctl.h> 59 #include <sys/cons.h> 60 #include <sys/kbio.h> 61 #include <sys/systimer.h> 62 #include <sys/globaldata.h> 63 #include <sys/machintr.h> 64 #include <sys/interrupt.h> 65 66 #include <sys/thread2.h> 67 68 #include <machine/clock.h> 69 #include <machine/cputypes.h> 70 #include <machine/frame.h> 71 #include <machine/ipl.h> 72 #include <machine/limits.h> 73 #include <machine/md_var.h> 74 #include <machine/psl.h> 75 #include <machine/segments.h> 76 #include <machine/smp.h> 77 #include <machine/specialreg.h> 78 #include <machine/intr_machdep.h> 79 80 #include <machine_base/apic/ioapic.h> 81 #include <machine_base/apic/ioapic_abi.h> 82 #include <machine_base/icu/icu.h> 83 #include <bus/isa/isa.h> 84 #include <bus/isa/rtc.h> 85 #include <machine_base/isa/timerreg.h> 86 87 SET_DECLARE(timecounter_init_set, const timecounter_init_t); 88 TIMECOUNTER_INIT(placeholder, NULL); 89 90 static void i8254_restore(void); 91 static void resettodr_on_shutdown(void *arg __unused); 92 93 /* 94 * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we 95 * can use a simple formula for leap years. 96 */ 97 #define LEAPYEAR(y) ((u_int)(y) % 4 == 0) 98 #define DAYSPERYEAR (31+28+31+30+31+30+31+31+30+31+30+31) 99 100 #ifndef TIMER_FREQ 101 #define TIMER_FREQ 1193182 102 #endif 103 104 static uint8_t i8254_walltimer_sel; 105 static uint16_t i8254_walltimer_cntr; 106 static int timer0_running; 107 108 int adjkerntz; /* local offset from GMT in seconds */ 109 int disable_rtc_set; /* disable resettodr() if != 0 */ 110 int tsc_present; 111 int tsc_invariant; 112 int tsc_mpsync; 113 int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */ 114 tsc_uclock_t tsc_frequency; 115 tsc_uclock_t tsc_oneus_approx; /* always at least 1, approx only */ 116 117 enum tstate { RELEASED, ACQUIRED }; 118 static enum tstate timer0_state; 119 static enum tstate timer1_state; 120 static enum tstate timer2_state; 121 122 int i8254_cputimer_disable; /* No need to initialize i8254 cputimer. */ 123 124 static int beeping = 0; 125 static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; 126 static u_char rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; 127 static u_char rtc_statusb = RTCSB_24HR | RTCSB_PINTR; 128 static int rtc_loaded; 129 130 static sysclock_t i8254_cputimer_div; 131 132 static int i8254_nointr; 133 static int i8254_intr_disable = 1; 134 TUNABLE_INT("hw.i8254.intr_disable", &i8254_intr_disable); 135 136 static int calibrate_timers_with_rtc = 0; 137 TUNABLE_INT("hw.calibrate_timers_with_rtc", &calibrate_timers_with_rtc); 138 139 static int calibrate_tsc_fast = 1; 140 TUNABLE_INT("hw.calibrate_tsc_fast", &calibrate_tsc_fast); 141 142 static int calibrate_test; 143 TUNABLE_INT("hw.tsc_calibrate_test", &calibrate_test); 144 145 static struct callout sysbeepstop_ch; 146 147 static sysclock_t i8254_cputimer_count(void); 148 static void i8254_cputimer_construct(struct cputimer *cputimer, sysclock_t last); 149 static void i8254_cputimer_destruct(struct cputimer *cputimer); 150 151 static struct cputimer i8254_cputimer = { 152 .next = SLIST_ENTRY_INITIALIZER, 153 .name = "i8254", 154 .pri = CPUTIMER_PRI_8254, 155 .type = 0, /* determined later */ 156 .count = i8254_cputimer_count, 157 .fromhz = cputimer_default_fromhz, 158 .fromus = cputimer_default_fromus, 159 .construct = i8254_cputimer_construct, 160 .destruct = i8254_cputimer_destruct, 161 .freq = TIMER_FREQ 162 }; 163 164 static void i8254_intr_reload(struct cputimer_intr *, sysclock_t); 165 static void i8254_intr_config(struct cputimer_intr *, const struct cputimer *); 166 static void i8254_intr_initclock(struct cputimer_intr *, boolean_t); 167 168 static struct cputimer_intr i8254_cputimer_intr = { 169 .freq = TIMER_FREQ, 170 .reload = i8254_intr_reload, 171 .enable = cputimer_intr_default_enable, 172 .config = i8254_intr_config, 173 .restart = cputimer_intr_default_restart, 174 .pmfixup = cputimer_intr_default_pmfixup, 175 .initclock = i8254_intr_initclock, 176 .pcpuhand = NULL, 177 .next = SLIST_ENTRY_INITIALIZER, 178 .name = "i8254", 179 .type = CPUTIMER_INTR_8254, 180 .prio = CPUTIMER_INTR_PRIO_8254, 181 .caps = CPUTIMER_INTR_CAP_PS, 182 .priv = NULL 183 }; 184 185 /* 186 * Use this to lwkt_switch() when the scheduler clock is not 187 * yet running, otherwise lwkt_switch() won't do anything. 188 * XXX needs cleaning up in lwkt_thread.c 189 */ 190 static void 191 lwkt_force_switch(void) 192 { 193 crit_enter(); 194 lwkt_schedulerclock(curthread); 195 crit_exit(); 196 lwkt_switch(); 197 } 198 199 /* 200 * timer0 clock interrupt. Timer0 is in one-shot mode and has stopped 201 * counting as of this interrupt. We use timer1 in free-running mode (not 202 * generating any interrupts) as our main counter. Each cpu has timeouts 203 * pending. 204 * 205 * This code is INTR_MPSAFE and may be called without the BGL held. 206 */ 207 static void 208 clkintr(void *dummy, void *frame_arg) 209 { 210 static sysclock_t sysclock_count; /* NOTE! Must be static */ 211 struct globaldata *gd = mycpu; 212 struct globaldata *gscan; 213 int n; 214 215 /* 216 * SWSTROBE mode is a one-shot, the timer is no longer running 217 */ 218 timer0_running = 0; 219 220 /* 221 * XXX the dispatcher needs work. right now we call systimer_intr() 222 * directly or via IPI for any cpu with systimers queued, which is 223 * usually *ALL* of them. We need to use the LAPIC timer for this. 224 */ 225 sysclock_count = sys_cputimer->count(); 226 for (n = 0; n < ncpus; ++n) { 227 gscan = globaldata_find(n); 228 if (TAILQ_FIRST(&gscan->gd_systimerq) == NULL) 229 continue; 230 if (gscan != gd) { 231 lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr, 232 &sysclock_count, 1); 233 } else { 234 systimer_intr(&sysclock_count, 0, frame_arg); 235 } 236 } 237 } 238 239 240 /* 241 * NOTE! not MP safe. 242 */ 243 int 244 acquire_timer2(int mode) 245 { 246 if (timer2_state != RELEASED) 247 return (-1); 248 timer2_state = ACQUIRED; 249 250 /* 251 * This access to the timer registers is as atomic as possible 252 * because it is a single instruction. We could do better if we 253 * knew the rate. 254 */ 255 outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f)); 256 return (0); 257 } 258 259 int 260 release_timer2(void) 261 { 262 if (timer2_state != ACQUIRED) 263 return (-1); 264 outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT); 265 timer2_state = RELEASED; 266 return (0); 267 } 268 269 #include "opt_ddb.h" 270 #ifdef DDB 271 #include <ddb/ddb.h> 272 273 DB_SHOW_COMMAND(rtc, rtc) 274 { 275 kprintf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n", 276 rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY), 277 rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC), 278 rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR)); 279 } 280 #endif /* DDB */ 281 282 /* 283 * Return the current cpu timer count as a 32 bit integer. 284 */ 285 static 286 sysclock_t 287 i8254_cputimer_count(void) 288 { 289 static uint16_t cputimer_last; 290 uint16_t count; 291 sysclock_t ret; 292 293 clock_lock(); 294 outb(TIMER_MODE, i8254_walltimer_sel | TIMER_LATCH); 295 count = (uint8_t)inb(i8254_walltimer_cntr); /* get countdown */ 296 count |= ((uint8_t)inb(i8254_walltimer_cntr) << 8); 297 count = -count; /* -> countup */ 298 if (count < cputimer_last) /* rollover */ 299 i8254_cputimer.base += 0x00010000U; 300 ret = i8254_cputimer.base | count; 301 cputimer_last = count; 302 clock_unlock(); 303 304 return(ret); 305 } 306 307 /* 308 * This function is called whenever the system timebase changes, allowing 309 * us to calculate what is needed to convert a system timebase tick 310 * into an 8254 tick for the interrupt timer. If we can convert to a 311 * simple shift, multiplication, or division, we do so. Otherwise 64 312 * bit arithmatic is required every time the interrupt timer is reloaded. 313 */ 314 static void 315 i8254_intr_config(struct cputimer_intr *cti, const struct cputimer *timer) 316 { 317 sysclock_t freq; 318 sysclock_t div; 319 320 /* 321 * Will a simple divide do the trick? 322 */ 323 div = (timer->freq + (cti->freq / 2)) / cti->freq; 324 freq = cti->freq * div; 325 326 if (freq >= timer->freq - 1 && freq <= timer->freq + 1) 327 i8254_cputimer_div = div; 328 else 329 i8254_cputimer_div = 0; 330 } 331 332 /* 333 * Reload for the next timeout. It is possible for the reload value 334 * to be 0 or negative, indicating that an immediate timer interrupt 335 * is desired. For now make the minimum 2 ticks. 336 * 337 * We may have to convert from the system timebase to the 8254 timebase. 338 */ 339 static void 340 i8254_intr_reload(struct cputimer_intr *cti, sysclock_t reload) 341 { 342 uint16_t count; 343 344 if ((ssysclock_t)reload < 0) 345 reload = 1; 346 if (i8254_cputimer_div) 347 reload /= i8254_cputimer_div; 348 else 349 reload = muldivu64(reload, cti->freq, sys_cputimer->freq); 350 351 if (reload < 2) 352 reload = 2; /* minimum count */ 353 if (reload > 0xFFFF) 354 reload = 0xFFFF; /* almost full count (0 is full count) */ 355 356 clock_lock(); 357 if (timer0_running) { 358 outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); /* count-down timer */ 359 count = (uint8_t)inb(TIMER_CNTR0); /* lsb */ 360 count |= ((uint8_t)inb(TIMER_CNTR0) << 8); /* msb */ 361 if (reload < count) { 362 outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT); 363 outb(TIMER_CNTR0, (uint8_t)reload); /* lsb */ 364 outb(TIMER_CNTR0, (uint8_t)(reload >> 8)); /* msb */ 365 } 366 } else { 367 timer0_running = 1; 368 outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT); 369 outb(TIMER_CNTR0, (uint8_t)reload); /* lsb */ 370 outb(TIMER_CNTR0, (uint8_t)(reload >> 8)); /* msb */ 371 } 372 clock_unlock(); 373 } 374 375 /* 376 * DELAY(usec) - Spin for the specified number of microseconds. 377 * DRIVERSLEEP(usec) - Spin for the specified number of microseconds, 378 * but do a thread switch in the loop 379 * 380 * Relies on timer 1 counting down from (cputimer_freq / hz) 381 * Note: timer had better have been programmed before this is first used! 382 */ 383 static void 384 DODELAY(int n, int doswitch) 385 { 386 ssysclock_t delta, ticks_left; 387 sysclock_t prev_tick, tick; 388 389 #ifdef DELAYDEBUG 390 int getit_calls = 1; 391 int n1; 392 static int state = 0; 393 394 if (state == 0) { 395 state = 1; 396 for (n1 = 1; n1 <= 10000000; n1 *= 10) 397 DELAY(n1); 398 state = 2; 399 } 400 if (state == 1) 401 kprintf("DELAY(%d)...", n); 402 #endif 403 /* 404 * Guard against the timer being uninitialized if we are called 405 * early for console i/o. 406 */ 407 if (timer0_state == RELEASED && i8254_cputimer_disable == 0) 408 i8254_restore(); 409 410 /* 411 * Read the counter first, so that the rest of the setup overhead is 412 * counted. Then calculate the number of hardware timer ticks 413 * required, rounding up to be sure we delay at least the requested 414 * number of microseconds. 415 */ 416 prev_tick = sys_cputimer->count(); 417 ticks_left = muldivu64(n, sys_cputimer->freq + 999999, 1000000); 418 419 /* 420 * Loop until done. 421 */ 422 while (ticks_left > 0) { 423 tick = sys_cputimer->count(); 424 #ifdef DELAYDEBUG 425 ++getit_calls; 426 #endif 427 delta = tick - prev_tick; 428 prev_tick = tick; 429 if (delta < 0) 430 delta = 0; 431 ticks_left -= delta; 432 if (doswitch && ticks_left > 0) 433 lwkt_switch(); 434 cpu_pause(); 435 } 436 #ifdef DELAYDEBUG 437 if (state == 1) 438 kprintf(" %d calls to getit() at %d usec each\n", 439 getit_calls, (n + 5) / getit_calls); 440 #endif 441 } 442 443 /* 444 * DELAY() never switches. 445 */ 446 void 447 DELAY(int n) 448 { 449 DODELAY(n, 0); 450 } 451 452 /* 453 * Returns non-zero if the specified time period has elapsed. Call 454 * first with last_clock set to 0. 455 */ 456 int 457 CHECKTIMEOUT(TOTALDELAY *tdd) 458 { 459 sysclock_t delta; 460 int us; 461 462 if (tdd->started == 0) { 463 if (timer0_state == RELEASED && i8254_cputimer_disable == 0) 464 i8254_restore(); 465 tdd->last_clock = sys_cputimer->count(); 466 tdd->started = 1; 467 return(0); 468 } 469 delta = sys_cputimer->count() - tdd->last_clock; 470 us = muldivu64(delta, 1000000, sys_cputimer->freq); 471 tdd->last_clock += muldivu64(us, sys_cputimer->freq, 1000000); 472 tdd->us -= us; 473 474 return (tdd->us < 0); 475 } 476 477 478 /* 479 * DRIVERSLEEP() does not switch if called with a spinlock held or 480 * from a hard interrupt. 481 */ 482 void 483 DRIVERSLEEP(int usec) 484 { 485 globaldata_t gd = mycpu; 486 487 if (gd->gd_intr_nesting_level || gd->gd_spinlocks) { 488 DODELAY(usec, 0); 489 } else { 490 DODELAY(usec, 1); 491 } 492 } 493 494 static void 495 sysbeepstop(void *chan) 496 { 497 outb(IO_PPI, inb(IO_PPI)&0xFC); /* disable counter2 output to speaker */ 498 beeping = 0; 499 release_timer2(); 500 } 501 502 int 503 sysbeep(int pitch, int period) 504 { 505 if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT)) 506 return(-1); 507 if (sysbeep_enable == 0) 508 return(-1); 509 /* 510 * Nobody else is using timer2, we do not need the clock lock 511 */ 512 outb(TIMER_CNTR2, pitch); 513 outb(TIMER_CNTR2, (pitch>>8)); 514 if (!beeping) { 515 /* enable counter2 output to speaker */ 516 outb(IO_PPI, inb(IO_PPI) | 3); 517 beeping = period; 518 callout_reset(&sysbeepstop_ch, period, sysbeepstop, NULL); 519 } 520 return (0); 521 } 522 523 /* 524 * RTC support routines 525 */ 526 527 int 528 rtcin(int reg) 529 { 530 u_char val; 531 532 crit_enter(); 533 outb(IO_RTC, reg); 534 inb(0x84); 535 val = inb(IO_RTC + 1); 536 inb(0x84); 537 crit_exit(); 538 return (val); 539 } 540 541 static __inline void 542 writertc(u_char reg, u_char val) 543 { 544 crit_enter(); 545 inb(0x84); 546 outb(IO_RTC, reg); 547 inb(0x84); 548 outb(IO_RTC + 1, val); 549 inb(0x84); /* XXX work around wrong order in rtcin() */ 550 crit_exit(); 551 } 552 553 static __inline int 554 readrtc(int port) 555 { 556 return(bcd2bin(rtcin(port))); 557 } 558 559 static u_int 560 calibrate_clocks(void) 561 { 562 tsc_uclock_t old_tsc; 563 sysclock_t tot_count; 564 sysclock_t count, prev_count; 565 int sec, start_sec, timeout; 566 567 if (bootverbose) 568 kprintf("Calibrating clock(s) ...\n"); 569 if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) 570 goto fail; 571 timeout = 100000000; 572 573 /* Read the mc146818A seconds counter. */ 574 for (;;) { 575 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { 576 sec = rtcin(RTC_SEC); 577 break; 578 } 579 if (--timeout == 0) 580 goto fail; 581 } 582 583 /* Wait for the mC146818A seconds counter to change. */ 584 start_sec = sec; 585 for (;;) { 586 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { 587 sec = rtcin(RTC_SEC); 588 if (sec != start_sec) 589 break; 590 } 591 if (--timeout == 0) 592 goto fail; 593 } 594 595 /* Start keeping track of the i8254 counter. */ 596 prev_count = sys_cputimer->count(); 597 tot_count = 0; 598 599 if (tsc_present) 600 old_tsc = rdtsc(); 601 else 602 old_tsc = 0; /* shut up gcc */ 603 604 /* 605 * Wait for the mc146818A seconds counter to change. Read the i8254 606 * counter for each iteration since this is convenient and only 607 * costs a few usec of inaccuracy. The timing of the final reads 608 * of the counters almost matches the timing of the initial reads, 609 * so the main cause of inaccuracy is the varying latency from 610 * inside getit() or rtcin(RTC_STATUSA) to the beginning of the 611 * rtcin(RTC_SEC) that returns a changed seconds count. The 612 * maximum inaccuracy from this cause is < 10 usec on 486's. 613 */ 614 start_sec = sec; 615 for (;;) { 616 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) 617 sec = rtcin(RTC_SEC); 618 count = sys_cputimer->count(); 619 tot_count += (sysclock_t)(count - prev_count); 620 prev_count = count; 621 if (sec != start_sec) 622 break; 623 if (--timeout == 0) 624 goto fail; 625 } 626 627 /* 628 * Read the cpu cycle counter. The timing considerations are 629 * similar to those for the i8254 clock. 630 */ 631 if (tsc_present) { 632 tsc_frequency = rdtsc() - old_tsc; 633 if (bootverbose) { 634 kprintf("TSC clock: %jd Hz (Method A)\n", 635 (intmax_t)tsc_frequency); 636 } 637 } 638 tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000; 639 640 kprintf("i8254 clock: %lu Hz\n", tot_count); 641 return (tot_count); 642 643 fail: 644 kprintf("failed, using default i8254 clock of %lu Hz\n", 645 i8254_cputimer.freq); 646 return (i8254_cputimer.freq); 647 } 648 649 static void 650 i8254_restore(void) 651 { 652 timer0_state = ACQUIRED; 653 654 clock_lock(); 655 656 /* 657 * Timer0 is our fine-grained variable clock interrupt 658 */ 659 outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT); 660 outb(TIMER_CNTR0, 2); /* lsb */ 661 outb(TIMER_CNTR0, 0); /* msb */ 662 clock_unlock(); 663 664 if (!i8254_nointr) { 665 cputimer_intr_register(&i8254_cputimer_intr); 666 cputimer_intr_select(&i8254_cputimer_intr, 0); 667 } 668 669 /* 670 * Timer1 or timer2 is our free-running clock, but only if another 671 * has not been selected. 672 */ 673 cputimer_register(&i8254_cputimer); 674 cputimer_select(&i8254_cputimer, 0); 675 } 676 677 static void 678 i8254_cputimer_construct(struct cputimer *timer, sysclock_t oldclock) 679 { 680 int which; 681 682 /* 683 * Should we use timer 1 or timer 2 ? 684 */ 685 which = 0; 686 TUNABLE_INT_FETCH("hw.i8254.walltimer", &which); 687 if (which != 1 && which != 2) 688 which = 2; 689 690 switch(which) { 691 case 1: 692 timer->name = "i8254_timer1"; 693 timer->type = CPUTIMER_8254_SEL1; 694 i8254_walltimer_sel = TIMER_SEL1; 695 i8254_walltimer_cntr = TIMER_CNTR1; 696 timer1_state = ACQUIRED; 697 break; 698 case 2: 699 timer->name = "i8254_timer2"; 700 timer->type = CPUTIMER_8254_SEL2; 701 i8254_walltimer_sel = TIMER_SEL2; 702 i8254_walltimer_cntr = TIMER_CNTR2; 703 timer2_state = ACQUIRED; 704 break; 705 } 706 707 timer->base = (oldclock + 0xFFFF) & 0xFFFFFFFFFFFF0000LU; 708 709 clock_lock(); 710 outb(TIMER_MODE, i8254_walltimer_sel | TIMER_RATEGEN | TIMER_16BIT); 711 outb(i8254_walltimer_cntr, 0); /* lsb */ 712 outb(i8254_walltimer_cntr, 0); /* msb */ 713 outb(IO_PPI, inb(IO_PPI) | 1); /* bit 0: enable gate, bit 1: spkr */ 714 clock_unlock(); 715 } 716 717 static void 718 i8254_cputimer_destruct(struct cputimer *timer) 719 { 720 switch(timer->type) { 721 case CPUTIMER_8254_SEL1: 722 timer1_state = RELEASED; 723 break; 724 case CPUTIMER_8254_SEL2: 725 timer2_state = RELEASED; 726 break; 727 default: 728 break; 729 } 730 timer->type = 0; 731 } 732 733 static void 734 rtc_restore(void) 735 { 736 /* Restore all of the RTC's "status" (actually, control) registers. */ 737 writertc(RTC_STATUSB, RTCSB_24HR); 738 writertc(RTC_STATUSA, rtc_statusa); 739 writertc(RTC_STATUSB, rtc_statusb); 740 } 741 742 /* 743 * Restore all the timers. 744 * 745 * This function is called to resynchronize our core timekeeping after a 746 * long halt, e.g. from apm_default_resume() and friends. It is also 747 * called if after a BIOS call we have detected munging of the 8254. 748 * It is necessary because cputimer_count() counter's delta may have grown 749 * too large for nanouptime() and friends to handle, or (in the case of 8254 750 * munging) might cause the SYSTIMER code to prematurely trigger. 751 */ 752 void 753 timer_restore(void) 754 { 755 crit_enter(); 756 if (i8254_cputimer_disable == 0) 757 i8254_restore(); /* restore timer_freq and hz */ 758 rtc_restore(); /* reenable RTC interrupts */ 759 crit_exit(); 760 } 761 762 #define MAX_MEASURE_RETRIES 100 763 764 static u_int64_t 765 do_measure(u_int64_t timer_latency, u_int64_t *latency, sysclock_t *time, 766 int *retries) 767 { 768 u_int64_t tsc1, tsc2; 769 u_int64_t threshold; 770 sysclock_t val; 771 int cnt = 0; 772 773 do { 774 if (cnt > MAX_MEASURE_RETRIES/2) 775 threshold = timer_latency << 1; 776 else 777 threshold = timer_latency + (timer_latency >> 2); 778 779 cnt++; 780 tsc1 = rdtsc_ordered(); 781 val = sys_cputimer->count(); 782 tsc2 = rdtsc_ordered(); 783 } while (timer_latency > 0 && cnt < MAX_MEASURE_RETRIES && 784 tsc2 - tsc1 > threshold); 785 786 *retries = cnt - 1; 787 *latency = tsc2 - tsc1; 788 *time = val; 789 return tsc1; 790 } 791 792 static u_int64_t 793 do_calibrate_cputimer(u_int usecs, u_int64_t timer_latency) 794 { 795 if (calibrate_tsc_fast) { 796 u_int64_t old_tsc1, start_lat1, new_tsc1, end_lat1; 797 u_int64_t old_tsc2, start_lat2, new_tsc2, end_lat2; 798 u_int64_t freq1, freq2; 799 sysclock_t start1, end1, start2, end2; 800 int retries1, retries2, retries3, retries4; 801 802 DELAY(1000); 803 old_tsc1 = do_measure(timer_latency, &start_lat1, &start1, 804 &retries1); 805 DELAY(20000); 806 old_tsc2 = do_measure(timer_latency, &start_lat2, &start2, 807 &retries2); 808 DELAY(usecs); 809 new_tsc1 = do_measure(timer_latency, &end_lat1, &end1, 810 &retries3); 811 DELAY(20000); 812 new_tsc2 = do_measure(timer_latency, &end_lat2, &end2, 813 &retries4); 814 815 old_tsc1 += start_lat1; 816 old_tsc2 += start_lat2; 817 freq1 = (new_tsc1 - old_tsc1) + (start_lat1 + end_lat1) / 2; 818 freq2 = (new_tsc2 - old_tsc2) + (start_lat2 + end_lat2) / 2; 819 end1 -= start1; 820 end2 -= start2; 821 /* This should in practice be safe from overflows. */ 822 freq1 = muldivu64(freq1, sys_cputimer->freq, end1); 823 freq2 = muldivu64(freq2, sys_cputimer->freq, end2); 824 if (calibrate_test && (retries1 > 0 || retries2 > 0)) { 825 kprintf("%s: retries: %d, %d, %d, %d\n", 826 __func__, retries1, retries2, retries3, retries4); 827 } 828 if (calibrate_test) { 829 kprintf("%s: freq1=%ju freq2=%ju avg=%ju\n", 830 __func__, freq1, freq2, (freq1 + freq2) / 2); 831 } 832 return (freq1 + freq2) / 2; 833 } else { 834 u_int64_t old_tsc, new_tsc; 835 u_int64_t freq; 836 837 old_tsc = rdtsc_ordered(); 838 DELAY(usecs); 839 new_tsc = rdtsc(); 840 freq = new_tsc - old_tsc; 841 /* This should in practice be safe from overflows. */ 842 freq = (freq * 1000 * 1000) / usecs; 843 return freq; 844 } 845 } 846 847 /* 848 * Initialize 8254 timer 0 early so that it can be used in DELAY(). 849 */ 850 void 851 startrtclock(void) 852 { 853 const timecounter_init_t **list; 854 sysclock_t delta, freq; 855 856 callout_init_mp(&sysbeepstop_ch); 857 858 /* 859 * Can we use the TSC? 860 * 861 * NOTE: If running under qemu, probably a good idea to force the 862 * TSC because we are not likely to detect it as being 863 * invariant or mpsyncd if you don't. This will greatly 864 * reduce SMP contention. 865 */ 866 if (cpu_feature & CPUID_TSC) { 867 tsc_present = 1; 868 TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_invariant); 869 870 if ((cpu_vendor_id == CPU_VENDOR_INTEL || 871 cpu_vendor_id == CPU_VENDOR_AMD) && 872 cpu_exthigh >= 0x80000007) { 873 u_int regs[4]; 874 875 do_cpuid(0x80000007, regs); 876 if (regs[3] & 0x100) 877 tsc_invariant = 1; 878 } 879 } else { 880 tsc_present = 0; 881 } 882 883 /* 884 * Initial RTC state, don't do anything unexpected 885 */ 886 writertc(RTC_STATUSA, rtc_statusa); 887 writertc(RTC_STATUSB, RTCSB_24HR); 888 889 SET_FOREACH(list, timecounter_init_set) { 890 if ((*list)->configure != NULL) 891 (*list)->configure(); 892 } 893 894 /* 895 * If tsc_frequency is already initialized now, and a flag is set 896 * that i8254 timer is unneeded, we are done. 897 */ 898 if (tsc_frequency != 0 && i8254_cputimer_disable != 0) 899 goto done; 900 901 /* 902 * Set the 8254 timer0 in TIMER_SWSTROBE mode and cause it to 903 * generate an interrupt, which we will ignore for now. 904 * 905 * Set the 8254 timer1 in TIMER_RATEGEN mode and load 0x0000 906 * (so it counts a full 2^16 and repeats). We will use this timer 907 * for our counting. 908 */ 909 if (i8254_cputimer_disable == 0) 910 i8254_restore(); 911 912 kprintf("Using cputimer %s for TSC calibration\n", sys_cputimer->name); 913 914 /* 915 * When booting without verbose messages, it's pointless to run the 916 * calibrate_clocks() calibration code, when we don't use the 917 * results in any way. With bootverbose, we are at least printing 918 * this information to the kernel log. 919 */ 920 if (i8254_cputimer_disable != 0 || 921 (calibrate_timers_with_rtc == 0 && !bootverbose)) { 922 goto skip_rtc_based; 923 } 924 925 freq = calibrate_clocks(); 926 #ifdef CLK_CALIBRATION_LOOP 927 if (bootverbose) { 928 int c; 929 930 cnpoll(TRUE); 931 kprintf("Press a key on the console to " 932 "abort clock calibration\n"); 933 while ((c = cncheckc()) == -1 || c == NOKEY) 934 calibrate_clocks(); 935 cnpoll(FALSE); 936 } 937 #endif 938 939 /* 940 * Use the calibrated i8254 frequency if it seems reasonable. 941 * Otherwise use the default, and don't use the calibrated i586 942 * frequency. 943 */ 944 delta = freq > i8254_cputimer.freq ? 945 freq - i8254_cputimer.freq : i8254_cputimer.freq - freq; 946 if (delta < i8254_cputimer.freq / 100) { 947 if (calibrate_timers_with_rtc == 0) { 948 kprintf( 949 "hw.calibrate_timers_with_rtc not set - using default i8254 frequency\n"); 950 freq = i8254_cputimer.freq; 951 } 952 /* 953 * NOTE: 954 * Interrupt timer's freq must be adjusted 955 * before we change the cuptimer's frequency. 956 */ 957 i8254_cputimer_intr.freq = freq; 958 cputimer_set_frequency(&i8254_cputimer, freq); 959 } else { 960 if (bootverbose) 961 kprintf("%lu Hz differs from default of %lu Hz " 962 "by more than 1%%\n", 963 freq, i8254_cputimer.freq); 964 tsc_frequency = 0; 965 } 966 967 if (tsc_frequency != 0 && calibrate_timers_with_rtc == 0) { 968 kprintf("hw.calibrate_timers_with_rtc not " 969 "set - using old calibration method\n"); 970 tsc_frequency = 0; 971 } 972 973 skip_rtc_based: 974 if (tsc_present && tsc_frequency == 0) { 975 u_int cnt; 976 u_int64_t cputime_latency_tsc = 0, max = 0, min = 0; 977 int i; 978 979 for (i = 0; i < 10; i++) { 980 /* Warm up */ 981 (void)sys_cputimer->count(); 982 } 983 for (i = 0; i < 100; i++) { 984 u_int64_t old_tsc, new_tsc; 985 986 old_tsc = rdtsc_ordered(); 987 (void)sys_cputimer->count(); 988 new_tsc = rdtsc_ordered(); 989 cputime_latency_tsc += (new_tsc - old_tsc); 990 if (max < (new_tsc - old_tsc)) 991 max = new_tsc - old_tsc; 992 if (min == 0 || min > (new_tsc - old_tsc)) 993 min = new_tsc - old_tsc; 994 } 995 cputime_latency_tsc /= 100; 996 kprintf( 997 "Timer latency (in TSC ticks): %lu min=%lu max=%lu\n", 998 cputime_latency_tsc, min, max); 999 /* XXX Instead of this, properly filter out outliers. */ 1000 cputime_latency_tsc = min; 1001 1002 if (calibrate_test > 0) { 1003 u_int64_t values[20], avg = 0; 1004 for (i = 1; i <= 20; i++) { 1005 u_int64_t freq; 1006 1007 freq = do_calibrate_cputimer(i * 100 * 1000, 1008 cputime_latency_tsc); 1009 values[i - 1] = freq; 1010 } 1011 /* Compute an average TSC for the 1s to 2s delays. */ 1012 for (i = 10; i < 20; i++) 1013 avg += values[i]; 1014 avg /= 10; 1015 for (i = 0; i < 20; i++) { 1016 kprintf("%ums: %lu (Diff from average: %ld)\n", 1017 (i + 1) * 100, values[i], 1018 (int64_t)(values[i] - avg)); 1019 } 1020 } 1021 1022 if (calibrate_tsc_fast > 0) { 1023 /* HPET would typically be >10MHz */ 1024 if (sys_cputimer->freq >= 10000000) 1025 cnt = 200000; 1026 else 1027 cnt = 500000; 1028 } else { 1029 cnt = 1000000; 1030 } 1031 1032 tsc_frequency = do_calibrate_cputimer(cnt, cputime_latency_tsc); 1033 if (bootverbose && calibrate_timers_with_rtc) { 1034 kprintf("TSC clock: %jd Hz (Method B)\n", 1035 (intmax_t)tsc_frequency); 1036 } 1037 } 1038 1039 done: 1040 if (tsc_present) { 1041 kprintf("TSC%s clock: %jd Hz\n", 1042 tsc_invariant ? " invariant" : "", 1043 (intmax_t)tsc_frequency); 1044 } 1045 tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000; 1046 1047 EVENTHANDLER_REGISTER(shutdown_post_sync, resettodr_on_shutdown, 1048 NULL, SHUTDOWN_PRI_LAST); 1049 } 1050 1051 /* 1052 * Sync the time of day back to the RTC on shutdown, but only if 1053 * we have already loaded it and have not crashed. 1054 */ 1055 static void 1056 resettodr_on_shutdown(void *arg __unused) 1057 { 1058 if (rtc_loaded && panicstr == NULL) { 1059 resettodr(); 1060 } 1061 } 1062 1063 /* 1064 * Initialize the time of day register, based on the time base which is, e.g. 1065 * from a filesystem. 1066 */ 1067 void 1068 inittodr(time_t base) 1069 { 1070 unsigned long sec, days; 1071 int year, month; 1072 int y, m; 1073 struct timespec ts; 1074 1075 if (base) { 1076 ts.tv_sec = base; 1077 ts.tv_nsec = 0; 1078 set_timeofday(&ts); 1079 } 1080 1081 /* Look if we have a RTC present and the time is valid */ 1082 if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) 1083 goto wrong_time; 1084 1085 /* wait for time update to complete */ 1086 /* If RTCSA_TUP is zero, we have at least 244us before next update */ 1087 crit_enter(); 1088 while (rtcin(RTC_STATUSA) & RTCSA_TUP) { 1089 crit_exit(); 1090 crit_enter(); 1091 } 1092 1093 days = 0; 1094 #ifdef USE_RTC_CENTURY 1095 year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100; 1096 #else 1097 year = readrtc(RTC_YEAR) + 1900; 1098 if (year < 1970) 1099 year += 100; 1100 #endif 1101 if (year < 1970) { 1102 crit_exit(); 1103 goto wrong_time; 1104 } 1105 month = readrtc(RTC_MONTH); 1106 for (m = 1; m < month; m++) 1107 days += daysinmonth[m-1]; 1108 if ((month > 2) && LEAPYEAR(year)) 1109 days ++; 1110 days += readrtc(RTC_DAY) - 1; 1111 for (y = 1970; y < year; y++) 1112 days += DAYSPERYEAR + LEAPYEAR(y); 1113 sec = ((( days * 24 + 1114 readrtc(RTC_HRS)) * 60 + 1115 readrtc(RTC_MIN)) * 60 + 1116 readrtc(RTC_SEC)); 1117 /* sec now contains the number of seconds, since Jan 1 1970, 1118 in the local time zone */ 1119 1120 sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); 1121 1122 y = (int)(time_second - sec); 1123 if (y <= -2 || y >= 2) { 1124 /* badly off, adjust it */ 1125 ts.tv_sec = sec; 1126 ts.tv_nsec = 0; 1127 set_timeofday(&ts); 1128 } 1129 rtc_loaded = 1; 1130 crit_exit(); 1131 return; 1132 1133 wrong_time: 1134 kprintf("Invalid time in real time clock.\n"); 1135 kprintf("Check and reset the date immediately!\n"); 1136 } 1137 1138 /* 1139 * Write system time back to RTC 1140 */ 1141 void 1142 resettodr(void) 1143 { 1144 struct timeval tv; 1145 unsigned long tm; 1146 int m; 1147 int y; 1148 1149 if (disable_rtc_set) 1150 return; 1151 1152 microtime(&tv); 1153 tm = tv.tv_sec; 1154 1155 crit_enter(); 1156 /* Disable RTC updates and interrupts. */ 1157 writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR); 1158 1159 /* Calculate local time to put in RTC */ 1160 1161 tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); 1162 1163 writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */ 1164 writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */ 1165 writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24; /* Write back Hours */ 1166 1167 /* We have now the days since 01-01-1970 in tm */ 1168 writertc(RTC_WDAY, (tm+4)%7); /* Write back Weekday */ 1169 for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y); 1170 tm >= m; 1171 y++, m = DAYSPERYEAR + LEAPYEAR(y)) 1172 tm -= m; 1173 1174 /* Now we have the years in y and the day-of-the-year in tm */ 1175 writertc(RTC_YEAR, bin2bcd(y%100)); /* Write back Year */ 1176 #ifdef USE_RTC_CENTURY 1177 writertc(RTC_CENTURY, bin2bcd(y/100)); /* ... and Century */ 1178 #endif 1179 for (m = 0; ; m++) { 1180 int ml; 1181 1182 ml = daysinmonth[m]; 1183 if (m == 1 && LEAPYEAR(y)) 1184 ml++; 1185 if (tm < ml) 1186 break; 1187 tm -= ml; 1188 } 1189 1190 writertc(RTC_MONTH, bin2bcd(m + 1)); /* Write back Month */ 1191 writertc(RTC_DAY, bin2bcd(tm + 1)); /* Write back Month Day */ 1192 1193 /* Reenable RTC updates and interrupts. */ 1194 writertc(RTC_STATUSB, rtc_statusb); 1195 crit_exit(); 1196 } 1197 1198 static int 1199 i8254_ioapic_trial(int irq, struct cputimer_intr *cti) 1200 { 1201 sysclock_t base; 1202 long lastcnt; 1203 1204 /* 1205 * Following code assumes the 8254 is the cpu timer, 1206 * so make sure it is. 1207 */ 1208 /*KKASSERT(sys_cputimer == &i8254_cputimer); (tested by CuteLarva) */ 1209 KKASSERT(cti == &i8254_cputimer_intr); 1210 1211 lastcnt = get_interrupt_counter(irq, mycpuid); 1212 1213 /* 1214 * Force an 8254 Timer0 interrupt and wait 1/100s for 1215 * it to happen, then see if we got it. 1216 */ 1217 kprintf("IOAPIC: testing 8254 interrupt delivery..."); 1218 1219 i8254_intr_reload(cti, sys_cputimer->fromus(2)); 1220 base = sys_cputimer->count(); 1221 while (sys_cputimer->count() - base < sys_cputimer->freq / 100) 1222 ; /* nothing */ 1223 1224 if (get_interrupt_counter(irq, mycpuid) - lastcnt == 0) { 1225 kprintf(" failed\n"); 1226 return ENOENT; 1227 } else { 1228 kprintf(" success\n"); 1229 } 1230 return 0; 1231 } 1232 1233 /* 1234 * Start both clocks running. DragonFly note: the stat clock is no longer 1235 * used. Instead, 8254 based systimers are used for all major clock 1236 * interrupts. 1237 */ 1238 static void 1239 i8254_intr_initclock(struct cputimer_intr *cti, boolean_t selected) 1240 { 1241 void *clkdesc = NULL; 1242 int irq = 0, mixed_mode = 0, error; 1243 1244 KKASSERT(mycpuid == 0); 1245 1246 if (!selected && i8254_intr_disable) 1247 goto nointr; 1248 1249 /* 1250 * The stat interrupt mask is different without the 1251 * statistics clock. Also, don't set the interrupt 1252 * flag which would normally cause the RTC to generate 1253 * interrupts. 1254 */ 1255 rtc_statusb = RTCSB_24HR; 1256 1257 /* Finish initializing 8254 timer 0. */ 1258 if (ioapic_enable) { 1259 irq = machintr_legacy_intr_find(0, INTR_TRIGGER_EDGE, 1260 INTR_POLARITY_HIGH); 1261 if (irq < 0) { 1262 mixed_mode_setup: 1263 error = ioapic_conf_legacy_extint(0); 1264 if (!error) { 1265 irq = machintr_legacy_intr_find(0, 1266 INTR_TRIGGER_EDGE, INTR_POLARITY_HIGH); 1267 if (irq < 0) 1268 error = ENOENT; 1269 } 1270 1271 if (error) { 1272 if (!selected) { 1273 kprintf("IOAPIC: setup mixed mode for " 1274 "irq 0 failed: %d\n", error); 1275 goto nointr; 1276 } else { 1277 panic("IOAPIC: setup mixed mode for " 1278 "irq 0 failed: %d\n", error); 1279 } 1280 } 1281 mixed_mode = 1; 1282 } 1283 clkdesc = register_int(irq, clkintr, NULL, "clk", 1284 NULL, 1285 INTR_EXCL | INTR_CLOCK | 1286 INTR_NOPOLL | INTR_MPSAFE | 1287 INTR_NOENTROPY, 0); 1288 } else { 1289 register_int(0, clkintr, NULL, "clk", NULL, 1290 INTR_EXCL | INTR_CLOCK | 1291 INTR_NOPOLL | INTR_MPSAFE | 1292 INTR_NOENTROPY, 0); 1293 } 1294 1295 /* Initialize RTC. */ 1296 writertc(RTC_STATUSA, rtc_statusa); 1297 writertc(RTC_STATUSB, RTCSB_24HR); 1298 1299 if (ioapic_enable) { 1300 error = i8254_ioapic_trial(irq, cti); 1301 if (error) { 1302 if (mixed_mode) { 1303 if (!selected) { 1304 kprintf("IOAPIC: mixed mode for irq %d " 1305 "trial failed: %d\n", 1306 irq, error); 1307 goto nointr; 1308 } else { 1309 panic("IOAPIC: mixed mode for irq %d " 1310 "trial failed: %d\n", irq, error); 1311 } 1312 } else { 1313 kprintf("IOAPIC: warning 8254 is not connected " 1314 "to the correct pin, try mixed mode\n"); 1315 unregister_int(clkdesc, 0); 1316 goto mixed_mode_setup; 1317 } 1318 } 1319 } 1320 return; 1321 1322 nointr: 1323 i8254_nointr = 1; /* don't try to register again */ 1324 cputimer_intr_deregister(cti); 1325 } 1326 1327 void 1328 setstatclockrate(int newhz) 1329 { 1330 if (newhz == RTC_PROFRATE) 1331 rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF; 1332 else 1333 rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; 1334 writertc(RTC_STATUSA, rtc_statusa); 1335 } 1336 1337 #if 0 1338 static unsigned 1339 tsc_get_timecount(struct timecounter *tc) 1340 { 1341 return (rdtsc()); 1342 } 1343 #endif 1344 1345 #ifdef KERN_TIMESTAMP 1346 #define KERN_TIMESTAMP_SIZE 16384 1347 static u_long tsc[KERN_TIMESTAMP_SIZE] ; 1348 SYSCTL_OPAQUE(_debug, OID_AUTO, timestamp, CTLFLAG_RD, tsc, 1349 sizeof(tsc), "LU", "Kernel timestamps"); 1350 void 1351 _TSTMP(u_int32_t x) 1352 { 1353 static int i; 1354 1355 tsc[i] = (u_int32_t)rdtsc(); 1356 tsc[i+1] = x; 1357 i = i + 2; 1358 if (i >= KERN_TIMESTAMP_SIZE) 1359 i = 0; 1360 tsc[i] = 0; /* mark last entry */ 1361 } 1362 #endif /* KERN_TIMESTAMP */ 1363 1364 /* 1365 * 1366 */ 1367 1368 static int 1369 hw_i8254_timestamp(SYSCTL_HANDLER_ARGS) 1370 { 1371 sysclock_t count; 1372 uint64_t tscval; 1373 char buf[32]; 1374 1375 crit_enter(); 1376 if (sys_cputimer == &i8254_cputimer) 1377 count = sys_cputimer->count(); 1378 else 1379 count = 0; 1380 if (tsc_present) 1381 tscval = rdtsc(); 1382 else 1383 tscval = 0; 1384 crit_exit(); 1385 ksnprintf(buf, sizeof(buf), "%016lx %016lx", count, tscval); 1386 return(SYSCTL_OUT(req, buf, strlen(buf) + 1)); 1387 } 1388 1389 struct tsc_mpsync_info { 1390 volatile int tsc_ready_cnt; 1391 volatile int tsc_done_cnt; 1392 volatile int tsc_command; 1393 volatile int unused01[5]; 1394 struct { 1395 uint64_t v; 1396 uint64_t unused02; 1397 } tsc_saved[MAXCPU]; 1398 } __cachealign; 1399 1400 #if 0 1401 static void 1402 tsc_mpsync_test_loop(struct tsc_mpsync_thr *info) 1403 { 1404 struct globaldata *gd = mycpu; 1405 tsc_uclock_t test_end, test_begin; 1406 u_int i; 1407 1408 if (bootverbose) { 1409 kprintf("cpu%d: TSC testing MP synchronization ...\n", 1410 gd->gd_cpuid); 1411 } 1412 1413 test_begin = rdtsc_ordered(); 1414 /* Run test for 100ms */ 1415 test_end = test_begin + (tsc_frequency / 10); 1416 1417 arg->tsc_mpsync = 1; 1418 arg->tsc_target = test_begin; 1419 1420 #define TSC_TEST_TRYMAX 1000000 /* Make sure we could stop */ 1421 #define TSC_TEST_TRYMIN 50000 1422 1423 for (i = 0; i < TSC_TEST_TRYMAX; ++i) { 1424 struct lwkt_cpusync cs; 1425 1426 crit_enter(); 1427 lwkt_cpusync_init(&cs, gd->gd_other_cpus, 1428 tsc_mpsync_test_remote, arg); 1429 lwkt_cpusync_interlock(&cs); 1430 cpu_pause(); 1431 arg->tsc_target = rdtsc_ordered(); 1432 cpu_mfence(); 1433 lwkt_cpusync_deinterlock(&cs); 1434 crit_exit(); 1435 cpu_pause(); 1436 1437 if (!arg->tsc_mpsync) { 1438 kprintf("cpu%d: TSC is not MP synchronized @%u\n", 1439 gd->gd_cpuid, i); 1440 break; 1441 } 1442 if (arg->tsc_target > test_end && i >= TSC_TEST_TRYMIN) 1443 break; 1444 } 1445 1446 #undef TSC_TEST_TRYMIN 1447 #undef TSC_TEST_TRYMAX 1448 1449 if (arg->tsc_target == test_begin) { 1450 kprintf("cpu%d: TSC does not tick?!\n", gd->gd_cpuid); 1451 /* XXX disable TSC? */ 1452 tsc_invariant = 0; 1453 arg->tsc_mpsync = 0; 1454 return; 1455 } 1456 1457 if (arg->tsc_mpsync && bootverbose) { 1458 kprintf("cpu%d: TSC is MP synchronized after %u tries\n", 1459 gd->gd_cpuid, i); 1460 } 1461 } 1462 1463 #endif 1464 1465 #define TSC_TEST_COUNT 50000 1466 1467 static void 1468 tsc_mpsync_ap_thread(void *xinfo) 1469 { 1470 struct tsc_mpsync_info *info = xinfo; 1471 int cpu = mycpuid; 1472 int i; 1473 1474 /* 1475 * Tell main loop that we are ready and wait for initiation 1476 */ 1477 atomic_add_int(&info->tsc_ready_cnt, 1); 1478 while (info->tsc_command == 0) { 1479 lwkt_force_switch(); 1480 } 1481 1482 /* 1483 * Run test for 10000 loops or until tsc_done_cnt != 0 (another 1484 * cpu has finished its test), then increment done. 1485 */ 1486 crit_enter(); 1487 for (i = 0; i < TSC_TEST_COUNT && info->tsc_done_cnt == 0; ++i) { 1488 info->tsc_saved[cpu].v = rdtsc_ordered(); 1489 } 1490 crit_exit(); 1491 atomic_add_int(&info->tsc_done_cnt, 1); 1492 1493 lwkt_exit(); 1494 } 1495 1496 static void 1497 tsc_mpsync_test(void) 1498 { 1499 int cpu; 1500 int try; 1501 1502 if (!tsc_invariant) { 1503 /* Not even invariant TSC */ 1504 return; 1505 } 1506 1507 if (ncpus == 1) { 1508 /* Only one CPU */ 1509 tsc_mpsync = 1; 1510 return; 1511 } 1512 1513 /* 1514 * Forcing can be used w/qemu to reduce contention 1515 */ 1516 TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_mpsync); 1517 1518 if (tsc_mpsync == 0) { 1519 switch (cpu_vendor_id) { 1520 case CPU_VENDOR_INTEL: 1521 /* 1522 * Intel probably works 1523 */ 1524 break; 1525 1526 case CPU_VENDOR_AMD: 1527 /* 1528 * For AMD 15h and 16h (i.e. The Bulldozer and Jaguar 1529 * architectures) we have to watch out for 1530 * Erratum 778: 1531 * "Processor Core Time Stamp Counters May 1532 * Experience Drift" 1533 * This Erratum is only listed for cpus in Family 1534 * 15h < Model 30h and for 16h < Model 30h. 1535 * 1536 * AMD < Bulldozer probably doesn't work 1537 */ 1538 if (CPUID_TO_FAMILY(cpu_id) == 0x15 || 1539 CPUID_TO_FAMILY(cpu_id) == 0x16) { 1540 if (CPUID_TO_MODEL(cpu_id) < 0x30) 1541 return; 1542 } else if (CPUID_TO_FAMILY(cpu_id) < 0x17) { 1543 return; 1544 } 1545 break; 1546 1547 default: 1548 /* probably won't work */ 1549 return; 1550 } 1551 } else if (tsc_mpsync < 0) { 1552 kprintf("TSC MP synchronization test is disabled\n"); 1553 tsc_mpsync = 0; 1554 return; 1555 } 1556 1557 /* 1558 * Test even if forced to 1 above. If forced, we will use the TSC 1559 * even if the test fails. (set forced to -1 to disable entirely). 1560 */ 1561 kprintf("TSC testing MP synchronization ...\n"); 1562 1563 /* 1564 * Test TSC MP synchronization on APs. Try up to 4 times. 1565 */ 1566 for (try = 0; try < 4; ++try) { 1567 struct tsc_mpsync_info info; 1568 uint64_t last; 1569 int64_t xdelta; 1570 int64_t delta; 1571 1572 bzero(&info, sizeof(info)); 1573 1574 for (cpu = 0; cpu < ncpus; ++cpu) { 1575 thread_t td; 1576 lwkt_create(tsc_mpsync_ap_thread, &info, &td, 1577 NULL, TDF_NOSTART, cpu, 1578 "tsc mpsync %d", cpu); 1579 lwkt_setpri_initial(td, curthread->td_pri); 1580 lwkt_schedule(td); 1581 } 1582 while (info.tsc_ready_cnt != ncpus) 1583 lwkt_force_switch(); 1584 1585 /* 1586 * All threads are ready, start the test and wait for 1587 * completion. 1588 */ 1589 info.tsc_command = 1; 1590 while (info.tsc_done_cnt != ncpus) 1591 lwkt_force_switch(); 1592 1593 /* 1594 * Process results 1595 */ 1596 last = info.tsc_saved[0].v; 1597 delta = 0; 1598 for (cpu = 0; cpu < ncpus; ++cpu) { 1599 xdelta = (int64_t)(info.tsc_saved[cpu].v - last); 1600 last = info.tsc_saved[cpu].v; 1601 if (xdelta < 0) 1602 xdelta = -xdelta; 1603 delta += xdelta; 1604 1605 } 1606 1607 /* 1608 * Result from attempt. If its too wild just stop now. 1609 * Also break out if we succeed, no need to try further. 1610 */ 1611 kprintf("TSC MPSYNC TEST %jd %d -> %jd (10uS=%jd)\n", 1612 delta, ncpus, delta / ncpus, 1613 tsc_frequency / 100000); 1614 if (delta / ncpus > tsc_frequency / 100) 1615 break; 1616 if (delta / ncpus < tsc_frequency / 100000) { 1617 tsc_mpsync = 1; 1618 break; 1619 } 1620 } 1621 1622 if (tsc_mpsync) 1623 kprintf("TSC is MP synchronized\n"); 1624 else 1625 kprintf("TSC is not MP synchronized\n"); 1626 } 1627 SYSINIT(tsc_mpsync, SI_BOOT2_FINISH_SMP, SI_ORDER_ANY, tsc_mpsync_test, NULL); 1628 1629 static SYSCTL_NODE(_hw, OID_AUTO, i8254, CTLFLAG_RW, 0, "I8254"); 1630 SYSCTL_UINT(_hw_i8254, OID_AUTO, freq, CTLFLAG_RD, &i8254_cputimer.freq, 0, 1631 "frequency"); 1632 SYSCTL_PROC(_hw_i8254, OID_AUTO, timestamp, CTLTYPE_STRING|CTLFLAG_RD, 1633 0, 0, hw_i8254_timestamp, "A", ""); 1634 1635 SYSCTL_INT(_hw, OID_AUTO, tsc_present, CTLFLAG_RD, 1636 &tsc_present, 0, "TSC Available"); 1637 SYSCTL_INT(_hw, OID_AUTO, tsc_invariant, CTLFLAG_RD, 1638 &tsc_invariant, 0, "Invariant TSC"); 1639 SYSCTL_INT(_hw, OID_AUTO, tsc_mpsync, CTLFLAG_RD, 1640 &tsc_mpsync, 0, "TSC is synchronized across CPUs"); 1641 SYSCTL_QUAD(_hw, OID_AUTO, tsc_frequency, CTLFLAG_RD, 1642 &tsc_frequency, 0, "TSC Frequency"); 1643