1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2009-2010, Intel Corporation. 27 * All rights reserved. 28 */ 29 /* 30 * Copyright 2019 Joyent, Inc. 31 */ 32 33 #include <sys/x86_archext.h> 34 #include <sys/machsystm.h> 35 #include <sys/x_call.h> 36 #include <sys/stat.h> 37 #include <sys/acpi/acpi.h> 38 #include <sys/acpica.h> 39 #include <sys/cpu_acpi.h> 40 #include <sys/cpu_idle.h> 41 #include <sys/cpupm.h> 42 #include <sys/cpu_event.h> 43 #include <sys/hpet.h> 44 #include <sys/archsystm.h> 45 #include <vm/hat_i86.h> 46 #include <sys/dtrace.h> 47 #include <sys/sdt.h> 48 #include <sys/callb.h> 49 50 #define CSTATE_USING_HPET 1 51 #define CSTATE_USING_LAT 2 52 53 #define CPU_IDLE_STOP_TIMEOUT 1000 54 55 extern void cpu_idle_adaptive(void); 56 extern uint32_t cpupm_next_cstate(cma_c_state_t *cs_data, 57 cpu_acpi_cstate_t *cstates, uint32_t cs_count, hrtime_t start); 58 59 static int cpu_idle_init(cpu_t *); 60 static void cpu_idle_fini(cpu_t *); 61 static void cpu_idle_stop(cpu_t *); 62 static boolean_t cpu_deep_idle_callb(void *arg, int code); 63 static boolean_t cpu_idle_cpr_callb(void *arg, int code); 64 static void acpi_cpu_cstate(cpu_acpi_cstate_t *cstate); 65 66 static boolean_t cstate_use_timer(hrtime_t *lapic_expire, int timer); 67 68 /* 69 * the flag of always-running local APIC timer. 70 * the flag of HPET Timer use in deep cstate. 71 */ 72 static boolean_t cpu_cstate_arat = B_FALSE; 73 static boolean_t cpu_cstate_hpet = B_FALSE; 74 75 /* 76 * Interfaces for modules implementing Intel's deep c-state. 77 */ 78 cpupm_state_ops_t cpu_idle_ops = { 79 "Generic ACPI C-state Support", 80 cpu_idle_init, 81 cpu_idle_fini, 82 NULL, 83 cpu_idle_stop 84 }; 85 86 static kmutex_t cpu_idle_callb_mutex; 87 static callb_id_t cpu_deep_idle_callb_id; 88 static callb_id_t cpu_idle_cpr_callb_id; 89 static uint_t cpu_idle_cfg_state; 90 91 static kmutex_t cpu_idle_mutex; 92 93 cpu_idle_kstat_t cpu_idle_kstat = { 94 { "address_space_id", KSTAT_DATA_STRING }, 95 { "latency", KSTAT_DATA_UINT32 }, 96 { "power", KSTAT_DATA_UINT32 }, 97 }; 98 99 /* 100 * kstat update function of the c-state info 101 */ 102 static int 103 cpu_idle_kstat_update(kstat_t *ksp, int flag) 104 { 105 cpu_acpi_cstate_t *cstate = ksp->ks_private; 106 107 if (flag == KSTAT_WRITE) { 108 return (EACCES); 109 } 110 111 if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_FIXED_HARDWARE) { 112 kstat_named_setstr(&cpu_idle_kstat.addr_space_id, 113 "FFixedHW"); 114 } else if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_SYSTEM_IO) { 115 kstat_named_setstr(&cpu_idle_kstat.addr_space_id, 116 "SystemIO"); 117 } else { 118 kstat_named_setstr(&cpu_idle_kstat.addr_space_id, 119 "Unsupported"); 120 } 121 122 cpu_idle_kstat.cs_latency.value.ui32 = cstate->cs_latency; 123 cpu_idle_kstat.cs_power.value.ui32 = cstate->cs_power; 124 125 return (0); 126 } 127 128 /* 129 * Used during configuration callbacks to manage implementation specific 130 * details of the hardware timer used during Deep C-state. 131 */ 132 boolean_t 133 cstate_timer_callback(int code) 134 { 135 if (cpu_cstate_arat) { 136 return (B_TRUE); 137 } else if (cpu_cstate_hpet) { 138 return (hpet.callback(code)); 139 } 140 return (B_FALSE); 141 } 142 143 /* 144 * Some Local APIC Timers do not work during Deep C-states. 145 * The Deep C-state idle function uses this function to ensure it is using a 146 * hardware timer that works during Deep C-states. This function also 147 * switches the timer back to the LACPI Timer after Deep C-state. 148 */ 149 static boolean_t 150 cstate_use_timer(hrtime_t *lapic_expire, int timer) 151 { 152 if (cpu_cstate_arat) 153 return (B_TRUE); 154 155 /* 156 * We have to return B_FALSE if no arat or hpet support 157 */ 158 if (!cpu_cstate_hpet) 159 return (B_FALSE); 160 161 switch (timer) { 162 case CSTATE_USING_HPET: 163 return (hpet.use_hpet_timer(lapic_expire)); 164 case CSTATE_USING_LAT: 165 hpet.use_lapic_timer(*lapic_expire); 166 return (B_TRUE); 167 default: 168 return (B_FALSE); 169 } 170 } 171 172 /* 173 * c-state wakeup function. 174 * Similar to cpu_wakeup and cpu_wakeup_mwait except this function deals 175 * with CPUs asleep in MWAIT, HLT, or ACPI Deep C-State. 176 */ 177 void 178 cstate_wakeup(cpu_t *cp, int bound) 179 { 180 struct machcpu *mcpu = &(cp->cpu_m); 181 volatile uint32_t *mcpu_mwait = mcpu->mcpu_mwait; 182 cpupart_t *cpu_part; 183 uint_t cpu_found; 184 processorid_t cpu_sid; 185 186 cpu_part = cp->cpu_part; 187 cpu_sid = cp->cpu_seqid; 188 /* 189 * Clear the halted bit for that CPU since it will be woken up 190 * in a moment. 191 */ 192 if (bitset_in_set(&cpu_part->cp_haltset, cpu_sid)) { 193 /* 194 * Clear the halted bit for that CPU since it will be 195 * poked in a moment. 196 */ 197 bitset_atomic_del(&cpu_part->cp_haltset, cpu_sid); 198 199 /* 200 * We may find the current CPU present in the halted cpuset 201 * if we're in the context of an interrupt that occurred 202 * before we had a chance to clear our bit in cpu_idle(). 203 * Waking ourself is obviously unnecessary, since if 204 * we're here, we're not halted. 205 */ 206 if (cp != CPU) { 207 /* 208 * Use correct wakeup mechanism 209 */ 210 if ((mcpu_mwait != NULL) && 211 (*mcpu_mwait == MWAIT_HALTED)) 212 MWAIT_WAKEUP(cp); 213 else 214 poke_cpu(cp->cpu_id); 215 } 216 return; 217 } else { 218 /* 219 * This cpu isn't halted, but it's idle or undergoing a 220 * context switch. No need to awaken anyone else. 221 */ 222 if (cp->cpu_thread == cp->cpu_idle_thread || 223 cp->cpu_disp_flags & CPU_DISP_DONTSTEAL) 224 return; 225 } 226 227 /* 228 * No need to wake up other CPUs if the thread we just enqueued 229 * is bound. 230 */ 231 if (bound) 232 return; 233 234 235 /* 236 * See if there's any other halted CPUs. If there are, then 237 * select one, and awaken it. 238 * It's possible that after we find a CPU, somebody else 239 * will awaken it before we get the chance. 240 * In that case, look again. 241 */ 242 do { 243 cpu_found = bitset_find(&cpu_part->cp_haltset); 244 if (cpu_found == (uint_t)-1) 245 return; 246 247 } while (bitset_atomic_test_and_del(&cpu_part->cp_haltset, 248 cpu_found) < 0); 249 250 /* 251 * Must use correct wakeup mechanism to avoid lost wakeup of 252 * alternate cpu. 253 */ 254 if (cpu_found != CPU->cpu_seqid) { 255 mcpu_mwait = cpu_seq[cpu_found]->cpu_m.mcpu_mwait; 256 if ((mcpu_mwait != NULL) && (*mcpu_mwait == MWAIT_HALTED)) 257 MWAIT_WAKEUP(cpu_seq[cpu_found]); 258 else 259 poke_cpu(cpu_seq[cpu_found]->cpu_id); 260 } 261 } 262 263 /* 264 * Function called by CPU idle notification framework to check whether CPU 265 * has been awakened. It will be called with interrupt disabled. 266 * If CPU has been awakened, call cpu_idle_exit() to notify CPU idle 267 * notification framework. 268 */ 269 static void 270 acpi_cpu_mwait_check_wakeup(void *arg) 271 { 272 volatile uint32_t *mcpu_mwait = (volatile uint32_t *)arg; 273 274 ASSERT(arg != NULL); 275 if (*mcpu_mwait != MWAIT_HALTED) { 276 /* 277 * CPU has been awakened, notify CPU idle notification system. 278 */ 279 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); 280 } else { 281 /* 282 * Toggle interrupt flag to detect pending interrupts. 283 * If interrupt happened, do_interrupt() will notify CPU idle 284 * notification framework so no need to call cpu_idle_exit() 285 * here. 286 */ 287 sti(); 288 SMT_PAUSE(); 289 cli(); 290 } 291 } 292 293 static void 294 acpi_cpu_mwait_ipi_check_wakeup(void *arg) 295 { 296 volatile uint32_t *mcpu_mwait = (volatile uint32_t *)arg; 297 298 ASSERT(arg != NULL); 299 if (*mcpu_mwait != MWAIT_WAKEUP_IPI) { 300 /* 301 * CPU has been awakened, notify CPU idle notification system. 302 */ 303 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); 304 } else { 305 /* 306 * Toggle interrupt flag to detect pending interrupts. 307 * If interrupt happened, do_interrupt() will notify CPU idle 308 * notification framework so no need to call cpu_idle_exit() 309 * here. 310 */ 311 sti(); 312 SMT_PAUSE(); 313 cli(); 314 } 315 } 316 317 /*ARGSUSED*/ 318 static void 319 acpi_cpu_check_wakeup(void *arg) 320 { 321 /* 322 * Toggle interrupt flag to detect pending interrupts. 323 * If interrupt happened, do_interrupt() will notify CPU idle 324 * notification framework so no need to call cpu_idle_exit() here. 325 */ 326 sti(); 327 SMT_PAUSE(); 328 cli(); 329 } 330 331 /* 332 * enter deep c-state handler 333 */ 334 static void 335 acpi_cpu_cstate(cpu_acpi_cstate_t *cstate) 336 { 337 volatile uint32_t *mcpu_mwait = CPU->cpu_m.mcpu_mwait; 338 cpu_t *cpup = CPU; 339 processorid_t cpu_sid = cpup->cpu_seqid; 340 cpupart_t *cp = cpup->cpu_part; 341 hrtime_t lapic_expire; 342 uint8_t type = cstate->cs_addrspace_id; 343 uint32_t cs_type = cstate->cs_type; 344 int hset_update = 1; 345 boolean_t using_timer; 346 cpu_idle_check_wakeup_t check_func = &acpi_cpu_check_wakeup; 347 348 /* 349 * Set our mcpu_mwait here, so we can tell if anyone tries to 350 * wake us between now and when we call mwait. No other cpu will 351 * attempt to set our mcpu_mwait until we add ourself to the haltset. 352 */ 353 if (mcpu_mwait) { 354 if (type == ACPI_ADR_SPACE_SYSTEM_IO) { 355 *mcpu_mwait = MWAIT_WAKEUP_IPI; 356 check_func = &acpi_cpu_mwait_ipi_check_wakeup; 357 } else { 358 *mcpu_mwait = MWAIT_HALTED; 359 check_func = &acpi_cpu_mwait_check_wakeup; 360 } 361 } 362 363 /* 364 * If this CPU is online, and there are multiple CPUs 365 * in the system, then we should note our halting 366 * by adding ourselves to the partition's halted CPU 367 * bitmap. This allows other CPUs to find/awaken us when 368 * work becomes available. 369 */ 370 if (cpup->cpu_flags & CPU_OFFLINE || ncpus == 1) 371 hset_update = 0; 372 373 /* 374 * Add ourselves to the partition's halted CPUs bitmask 375 * and set our HALTED flag, if necessary. 376 * 377 * When a thread becomes runnable, it is placed on the queue 378 * and then the halted cpuset is checked to determine who 379 * (if anyone) should be awakened. We therefore need to first 380 * add ourselves to the halted cpuset, and and then check if there 381 * is any work available. 382 * 383 * Note that memory barriers after updating the HALTED flag 384 * are not necessary since an atomic operation (updating the bitmap) 385 * immediately follows. On x86 the atomic operation acts as a 386 * memory barrier for the update of cpu_disp_flags. 387 */ 388 if (hset_update) { 389 cpup->cpu_disp_flags |= CPU_DISP_HALTED; 390 bitset_atomic_add(&cp->cp_haltset, cpu_sid); 391 } 392 393 /* 394 * Check to make sure there's really nothing to do. 395 * Work destined for this CPU may become available after 396 * this check. We'll be notified through the clearing of our 397 * bit in the halted CPU bitmask, and a write to our mcpu_mwait. 398 * 399 * disp_anywork() checks disp_nrunnable, so we do not have to later. 400 */ 401 if (disp_anywork()) { 402 if (hset_update) { 403 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 404 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 405 } 406 return; 407 } 408 409 /* 410 * We're on our way to being halted. 411 * 412 * The local APIC timer can stop in ACPI C2 and deeper c-states. 413 * Try to program the HPET hardware to substitute for this CPU's 414 * LAPIC timer. 415 * cstate_use_timer() could disable the LAPIC Timer. Make sure 416 * to start the LAPIC Timer again before leaving this function. 417 * 418 * Disable interrupts here so we will awaken immediately after halting 419 * if someone tries to poke us between now and the time we actually 420 * halt. 421 */ 422 cli(); 423 using_timer = cstate_use_timer(&lapic_expire, CSTATE_USING_HPET); 424 425 /* 426 * We check for the presence of our bit after disabling interrupts. 427 * If it's cleared, we'll return. If the bit is cleared after 428 * we check then the cstate_wakeup() will pop us out of the halted 429 * state. 430 * 431 * This means that the ordering of the cstate_wakeup() and the clearing 432 * of the bit by cpu_wakeup is important. 433 * cpu_wakeup() must clear our mc_haltset bit, and then call 434 * cstate_wakeup(). 435 * acpi_cpu_cstate() must disable interrupts, then check for the bit. 436 */ 437 if (hset_update && bitset_in_set(&cp->cp_haltset, cpu_sid) == 0) { 438 (void) cstate_use_timer(&lapic_expire, 439 CSTATE_USING_LAT); 440 sti(); 441 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 442 return; 443 } 444 445 /* 446 * The check for anything locally runnable is here for performance 447 * and isn't needed for correctness. disp_nrunnable ought to be 448 * in our cache still, so it's inexpensive to check, and if there 449 * is anything runnable we won't have to wait for the poke. 450 */ 451 if (cpup->cpu_disp->disp_nrunnable != 0) { 452 (void) cstate_use_timer(&lapic_expire, 453 CSTATE_USING_LAT); 454 sti(); 455 if (hset_update) { 456 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 457 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 458 } 459 return; 460 } 461 462 if (using_timer == B_FALSE) { 463 464 (void) cstate_use_timer(&lapic_expire, 465 CSTATE_USING_LAT); 466 sti(); 467 468 /* 469 * We are currently unable to program the HPET to act as this 470 * CPU's proxy LAPIC timer. This CPU cannot enter C2 or deeper 471 * because no timer is set to wake it up while its LAPIC timer 472 * stalls in deep C-States. 473 * Enter C1 instead. 474 * 475 * cstate_wake_cpu() will wake this CPU with an IPI which 476 * works with MWAIT. 477 */ 478 i86_monitor(mcpu_mwait, 0, 0); 479 if ((*mcpu_mwait & ~MWAIT_WAKEUP_IPI) == MWAIT_HALTED) { 480 if (cpu_idle_enter(IDLE_STATE_C1, 0, 481 check_func, (void *)mcpu_mwait) == 0) { 482 if ((*mcpu_mwait & ~MWAIT_WAKEUP_IPI) == 483 MWAIT_HALTED) { 484 i86_mwait(0, 0); 485 } 486 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); 487 } 488 } 489 490 /* 491 * We're no longer halted 492 */ 493 if (hset_update) { 494 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 495 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 496 } 497 return; 498 } 499 500 if (type == ACPI_ADR_SPACE_FIXED_HARDWARE) { 501 /* 502 * We're on our way to being halted. 503 * To avoid a lost wakeup, arm the monitor before checking 504 * if another cpu wrote to mcpu_mwait to wake us up. 505 */ 506 i86_monitor(mcpu_mwait, 0, 0); 507 if (*mcpu_mwait == MWAIT_HALTED) { 508 if (cpu_idle_enter((uint_t)cs_type, 0, 509 check_func, (void *)mcpu_mwait) == 0) { 510 if (*mcpu_mwait == MWAIT_HALTED) { 511 i86_mwait(cstate->cs_address, 1); 512 } 513 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); 514 } 515 } 516 } else if (type == ACPI_ADR_SPACE_SYSTEM_IO) { 517 uint32_t value; 518 ACPI_TABLE_FADT *gbl_FADT; 519 520 if (*mcpu_mwait == MWAIT_WAKEUP_IPI) { 521 if (cpu_idle_enter((uint_t)cs_type, 0, 522 check_func, (void *)mcpu_mwait) == 0) { 523 if (*mcpu_mwait == MWAIT_WAKEUP_IPI) { 524 /* 525 * The following calls will cause us to 526 * halt which will cause the store 527 * buffer to be repartitioned, 528 * potentially exposing us to the Intel 529 * CPU vulnerability MDS. As such, we 530 * need to explicitly call that here. 531 * The other idle methods in this 532 * function do this automatically as 533 * part of the implementation of 534 * i86_mwait(). 535 */ 536 x86_md_clear(); 537 (void) cpu_acpi_read_port( 538 cstate->cs_address, &value, 8); 539 acpica_get_global_FADT(&gbl_FADT); 540 (void) cpu_acpi_read_port( 541 gbl_FADT->XPmTimerBlock.Address, 542 &value, 32); 543 } 544 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); 545 } 546 } 547 } 548 549 /* 550 * The LAPIC timer may have stopped in deep c-state. 551 * Reprogram this CPU's LAPIC here before enabling interrupts. 552 */ 553 (void) cstate_use_timer(&lapic_expire, CSTATE_USING_LAT); 554 sti(); 555 556 /* 557 * We're no longer halted 558 */ 559 if (hset_update) { 560 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 561 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 562 } 563 } 564 565 /* 566 * Idle the present CPU, deep c-state is supported 567 */ 568 void 569 cpu_acpi_idle(void) 570 { 571 cpu_t *cp = CPU; 572 cpu_acpi_handle_t handle; 573 cma_c_state_t *cs_data; 574 cpu_acpi_cstate_t *cstates; 575 hrtime_t start, end; 576 int cpu_max_cstates; 577 uint32_t cs_indx; 578 uint16_t cs_type; 579 580 cpupm_mach_state_t *mach_state = 581 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 582 handle = mach_state->ms_acpi_handle; 583 ASSERT(CPU_ACPI_CSTATES(handle) != NULL); 584 585 cs_data = mach_state->ms_cstate.cma_state.cstate; 586 cstates = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); 587 ASSERT(cstates != NULL); 588 cpu_max_cstates = cpu_acpi_get_max_cstates(handle); 589 if (cpu_max_cstates > CPU_MAX_CSTATES) 590 cpu_max_cstates = CPU_MAX_CSTATES; 591 if (cpu_max_cstates == 1) { /* no ACPI c-state data */ 592 (*non_deep_idle_cpu)(); 593 return; 594 } 595 596 start = gethrtime_unscaled(); 597 598 cs_indx = cpupm_next_cstate(cs_data, cstates, cpu_max_cstates, start); 599 600 cs_type = cstates[cs_indx].cs_type; 601 602 switch (cs_type) { 603 default: 604 /* FALLTHROUGH */ 605 case CPU_ACPI_C1: 606 (*non_deep_idle_cpu)(); 607 break; 608 609 case CPU_ACPI_C2: 610 acpi_cpu_cstate(&cstates[cs_indx]); 611 break; 612 613 case CPU_ACPI_C3: 614 /* 615 * All supported Intel processors maintain cache coherency 616 * during C3. Currently when entering C3 processors flush 617 * core caches to higher level shared cache. The shared cache 618 * maintains state and supports probes during C3. 619 * Consequently there is no need to handle cache coherency 620 * and Bus Master activity here with the cache flush, BM_RLD 621 * bit, BM_STS bit, nor PM2_CNT.ARB_DIS mechanisms described 622 * in section 8.1.4 of the ACPI Specification 4.0. 623 */ 624 acpi_cpu_cstate(&cstates[cs_indx]); 625 break; 626 } 627 628 end = gethrtime_unscaled(); 629 630 /* 631 * Update statistics 632 */ 633 cpupm_wakeup_cstate_data(cs_data, end); 634 } 635 636 boolean_t 637 cpu_deep_cstates_supported(void) 638 { 639 extern int idle_cpu_no_deep_c; 640 641 if (idle_cpu_no_deep_c) 642 return (B_FALSE); 643 644 if (!cpuid_deep_cstates_supported()) 645 return (B_FALSE); 646 647 if (cpuid_arat_supported()) { 648 cpu_cstate_arat = B_TRUE; 649 return (B_TRUE); 650 } 651 652 if ((hpet.supported == HPET_FULL_SUPPORT) && 653 hpet.install_proxy()) { 654 cpu_cstate_hpet = B_TRUE; 655 return (B_TRUE); 656 } 657 658 return (B_FALSE); 659 } 660 661 /* 662 * Validate that this processor supports deep cstate and if so, 663 * get the c-state data from ACPI and cache it. 664 */ 665 static int 666 cpu_idle_init(cpu_t *cp) 667 { 668 cpupm_mach_state_t *mach_state = 669 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 670 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 671 cpu_acpi_cstate_t *cstate; 672 char name[KSTAT_STRLEN]; 673 int cpu_max_cstates, i; 674 int ret; 675 676 /* 677 * Cache the C-state specific ACPI data. 678 */ 679 if ((ret = cpu_acpi_cache_cstate_data(handle)) != 0) { 680 if (ret < 0) 681 cmn_err(CE_NOTE, 682 "!Support for CPU deep idle states is being " 683 "disabled due to errors parsing ACPI C-state " 684 "objects exported by BIOS."); 685 cpu_idle_fini(cp); 686 return (-1); 687 } 688 689 cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); 690 691 cpu_max_cstates = cpu_acpi_get_max_cstates(handle); 692 693 for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) { 694 (void) snprintf(name, KSTAT_STRLEN - 1, "c%d", cstate->cs_type); 695 /* 696 * Allocate, initialize and install cstate kstat 697 */ 698 cstate->cs_ksp = kstat_create("cstate", cp->cpu_id, 699 name, "misc", 700 KSTAT_TYPE_NAMED, 701 sizeof (cpu_idle_kstat) / sizeof (kstat_named_t), 702 KSTAT_FLAG_VIRTUAL); 703 704 if (cstate->cs_ksp == NULL) { 705 cmn_err(CE_NOTE, "kstat_create(c_state) fail"); 706 } else { 707 cstate->cs_ksp->ks_data = &cpu_idle_kstat; 708 cstate->cs_ksp->ks_lock = &cpu_idle_mutex; 709 cstate->cs_ksp->ks_update = cpu_idle_kstat_update; 710 cstate->cs_ksp->ks_data_size += MAXNAMELEN; 711 cstate->cs_ksp->ks_private = cstate; 712 kstat_install(cstate->cs_ksp); 713 } 714 cstate++; 715 } 716 717 cpupm_alloc_domains(cp, CPUPM_C_STATES); 718 cpupm_alloc_ms_cstate(cp); 719 720 if (cpu_deep_cstates_supported()) { 721 uint32_t value; 722 723 mutex_enter(&cpu_idle_callb_mutex); 724 if (cpu_deep_idle_callb_id == (callb_id_t)0) 725 cpu_deep_idle_callb_id = callb_add(&cpu_deep_idle_callb, 726 (void *)NULL, CB_CL_CPU_DEEP_IDLE, "cpu_deep_idle"); 727 if (cpu_idle_cpr_callb_id == (callb_id_t)0) 728 cpu_idle_cpr_callb_id = callb_add(&cpu_idle_cpr_callb, 729 (void *)NULL, CB_CL_CPR_PM, "cpu_idle_cpr"); 730 mutex_exit(&cpu_idle_callb_mutex); 731 732 733 /* 734 * All supported CPUs (Nehalem and later) will remain in C3 735 * during Bus Master activity. 736 * All CPUs set ACPI_BITREG_BUS_MASTER_RLD to 0 here if it 737 * is not already 0 before enabling Deeper C-states. 738 */ 739 cpu_acpi_get_register(ACPI_BITREG_BUS_MASTER_RLD, &value); 740 if (value & 1) 741 cpu_acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); 742 } 743 744 return (0); 745 } 746 747 /* 748 * Free resources allocated by cpu_idle_init(). 749 */ 750 static void 751 cpu_idle_fini(cpu_t *cp) 752 { 753 cpupm_mach_state_t *mach_state = 754 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 755 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 756 cpu_acpi_cstate_t *cstate; 757 uint_t cpu_max_cstates, i; 758 759 /* 760 * idle cpu points back to the generic one 761 */ 762 idle_cpu = cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu; 763 disp_enq_thread = non_deep_idle_disp_enq_thread; 764 765 cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); 766 if (cstate) { 767 cpu_max_cstates = cpu_acpi_get_max_cstates(handle); 768 769 for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) { 770 if (cstate->cs_ksp != NULL) 771 kstat_delete(cstate->cs_ksp); 772 cstate++; 773 } 774 } 775 776 cpupm_free_ms_cstate(cp); 777 cpupm_free_domains(&cpupm_cstate_domains); 778 cpu_acpi_free_cstate_data(handle); 779 780 mutex_enter(&cpu_idle_callb_mutex); 781 if (cpu_deep_idle_callb_id != (callb_id_t)0) { 782 (void) callb_delete(cpu_deep_idle_callb_id); 783 cpu_deep_idle_callb_id = (callb_id_t)0; 784 } 785 if (cpu_idle_cpr_callb_id != (callb_id_t)0) { 786 (void) callb_delete(cpu_idle_cpr_callb_id); 787 cpu_idle_cpr_callb_id = (callb_id_t)0; 788 } 789 mutex_exit(&cpu_idle_callb_mutex); 790 } 791 792 /* 793 * This function is introduced here to solve a race condition 794 * between the master and the slave to touch c-state data structure. 795 * After the slave calls this idle function to switch to the non 796 * deep idle function, the master can go on to reclaim the resource. 797 */ 798 static void 799 cpu_idle_stop_sync(void) 800 { 801 /* switch to the non deep idle function */ 802 CPU->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu; 803 } 804 805 static void 806 cpu_idle_stop(cpu_t *cp) 807 { 808 cpupm_mach_state_t *mach_state = 809 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 810 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 811 cpu_acpi_cstate_t *cstate; 812 uint_t cpu_max_cstates, i = 0; 813 814 mutex_enter(&cpu_idle_callb_mutex); 815 if (idle_cpu == cpu_idle_adaptive) { 816 /* 817 * invoke the slave to call synchronous idle function. 818 */ 819 cp->cpu_m.mcpu_idle_cpu = cpu_idle_stop_sync; 820 poke_cpu(cp->cpu_id); 821 822 /* 823 * wait until the slave switchs to non deep idle function, 824 * so that the master is safe to go on to reclaim the resource. 825 */ 826 while (cp->cpu_m.mcpu_idle_cpu != non_deep_idle_cpu) { 827 drv_usecwait(10); 828 if ((++i % CPU_IDLE_STOP_TIMEOUT) == 0) 829 cmn_err(CE_NOTE, "!cpu_idle_stop: the slave" 830 " idle stop timeout"); 831 } 832 } 833 mutex_exit(&cpu_idle_callb_mutex); 834 835 cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); 836 if (cstate) { 837 cpu_max_cstates = cpu_acpi_get_max_cstates(handle); 838 839 for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) { 840 if (cstate->cs_ksp != NULL) 841 kstat_delete(cstate->cs_ksp); 842 cstate++; 843 } 844 } 845 cpupm_free_ms_cstate(cp); 846 cpupm_remove_domains(cp, CPUPM_C_STATES, &cpupm_cstate_domains); 847 cpu_acpi_free_cstate_data(handle); 848 } 849 850 /*ARGSUSED*/ 851 static boolean_t 852 cpu_deep_idle_callb(void *arg, int code) 853 { 854 boolean_t rslt = B_TRUE; 855 856 mutex_enter(&cpu_idle_callb_mutex); 857 switch (code) { 858 case PM_DEFAULT_CPU_DEEP_IDLE: 859 /* 860 * Default policy is same as enable 861 */ 862 /*FALLTHROUGH*/ 863 case PM_ENABLE_CPU_DEEP_IDLE: 864 if ((cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) == 0) 865 break; 866 867 if (cstate_timer_callback(PM_ENABLE_CPU_DEEP_IDLE)) { 868 disp_enq_thread = cstate_wakeup; 869 idle_cpu = cpu_idle_adaptive; 870 cpu_idle_cfg_state &= ~CPU_IDLE_DEEP_CFG; 871 } else { 872 rslt = B_FALSE; 873 } 874 break; 875 876 case PM_DISABLE_CPU_DEEP_IDLE: 877 if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) 878 break; 879 880 idle_cpu = non_deep_idle_cpu; 881 if (cstate_timer_callback(PM_DISABLE_CPU_DEEP_IDLE)) { 882 disp_enq_thread = non_deep_idle_disp_enq_thread; 883 cpu_idle_cfg_state |= CPU_IDLE_DEEP_CFG; 884 } 885 break; 886 887 default: 888 cmn_err(CE_NOTE, "!cpu deep_idle_callb: invalid code %d\n", 889 code); 890 break; 891 } 892 mutex_exit(&cpu_idle_callb_mutex); 893 return (rslt); 894 } 895 896 /*ARGSUSED*/ 897 static boolean_t 898 cpu_idle_cpr_callb(void *arg, int code) 899 { 900 boolean_t rslt = B_TRUE; 901 902 mutex_enter(&cpu_idle_callb_mutex); 903 switch (code) { 904 case CB_CODE_CPR_RESUME: 905 if (cstate_timer_callback(CB_CODE_CPR_RESUME)) { 906 /* 907 * Do not enable dispatcher hooks if disabled by user. 908 */ 909 if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) 910 break; 911 912 disp_enq_thread = cstate_wakeup; 913 idle_cpu = cpu_idle_adaptive; 914 } else { 915 rslt = B_FALSE; 916 } 917 break; 918 919 case CB_CODE_CPR_CHKPT: 920 idle_cpu = non_deep_idle_cpu; 921 disp_enq_thread = non_deep_idle_disp_enq_thread; 922 (void) cstate_timer_callback(CB_CODE_CPR_CHKPT); 923 break; 924 925 default: 926 cmn_err(CE_NOTE, "!cpudvr cpr_callb: invalid code %d\n", code); 927 break; 928 } 929 mutex_exit(&cpu_idle_callb_mutex); 930 return (rslt); 931 } 932 933 /* 934 * handle _CST notification 935 */ 936 void 937 cpuidle_cstate_instance(cpu_t *cp) 938 { 939 #ifndef __xpv 940 cpupm_mach_state_t *mach_state = 941 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 942 cpu_acpi_handle_t handle; 943 struct machcpu *mcpu; 944 cpuset_t dom_cpu_set; 945 kmutex_t *pm_lock; 946 int result = 0; 947 processorid_t cpu_id; 948 949 if (mach_state == NULL) { 950 return; 951 } 952 953 ASSERT(mach_state->ms_cstate.cma_domain != NULL); 954 dom_cpu_set = mach_state->ms_cstate.cma_domain->pm_cpus; 955 pm_lock = &mach_state->ms_cstate.cma_domain->pm_lock; 956 957 /* 958 * Do for all the CPU's in the domain 959 */ 960 mutex_enter(pm_lock); 961 do { 962 CPUSET_FIND(dom_cpu_set, cpu_id); 963 if (cpu_id == CPUSET_NOTINSET) 964 break; 965 966 ASSERT(cpu_id >= 0 && cpu_id < NCPU); 967 cp = cpu[cpu_id]; 968 mach_state = (cpupm_mach_state_t *) 969 cp->cpu_m.mcpu_pm_mach_state; 970 if (!(mach_state->ms_caps & CPUPM_C_STATES)) { 971 mutex_exit(pm_lock); 972 return; 973 } 974 handle = mach_state->ms_acpi_handle; 975 ASSERT(handle != NULL); 976 977 /* 978 * re-evaluate cstate object 979 */ 980 if (cpu_acpi_cache_cstate_data(handle) != 0) { 981 cmn_err(CE_WARN, "Cannot re-evaluate the cpu c-state" 982 " object Instance: %d", cpu_id); 983 } 984 mcpu = &(cp->cpu_m); 985 mcpu->max_cstates = cpu_acpi_get_max_cstates(handle); 986 if (mcpu->max_cstates > CPU_ACPI_C1) { 987 (void) cstate_timer_callback( 988 CST_EVENT_MULTIPLE_CSTATES); 989 disp_enq_thread = cstate_wakeup; 990 cp->cpu_m.mcpu_idle_cpu = cpu_acpi_idle; 991 } else if (mcpu->max_cstates == CPU_ACPI_C1) { 992 disp_enq_thread = non_deep_idle_disp_enq_thread; 993 cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu; 994 (void) cstate_timer_callback(CST_EVENT_ONE_CSTATE); 995 } 996 997 CPUSET_ATOMIC_XDEL(dom_cpu_set, cpu_id, result); 998 } while (result < 0); 999 mutex_exit(pm_lock); 1000 #endif 1001 } 1002 1003 /* 1004 * handle the number or the type of available processor power states change 1005 */ 1006 void 1007 cpuidle_manage_cstates(void *ctx) 1008 { 1009 cpu_t *cp = ctx; 1010 cpupm_mach_state_t *mach_state = 1011 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 1012 boolean_t is_ready; 1013 1014 if (mach_state == NULL) { 1015 return; 1016 } 1017 1018 /* 1019 * We currently refuse to power manage if the CPU is not ready to 1020 * take cross calls (cross calls fail silently if CPU is not ready 1021 * for it). 1022 * 1023 * Additionally, for x86 platforms we cannot power manage an instance, 1024 * until it has been initialized. 1025 */ 1026 is_ready = (cp->cpu_flags & CPU_READY) && cpupm_cstate_ready(cp); 1027 if (!is_ready) 1028 return; 1029 1030 cpuidle_cstate_instance(cp); 1031 } 1032