1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include <linux/string_helpers.h> 7 8 #include <drm/i915_drm.h> 9 10 #include "i915_drv.h" 11 #include "i915_irq.h" 12 #include "intel_breadcrumbs.h" 13 #include "intel_gt.h" 14 #include "intel_gt_clock_utils.h" 15 #include "intel_gt_irq.h" 16 #include "intel_gt_pm_irq.h" 17 #include "intel_gt_regs.h" 18 #include "intel_mchbar_regs.h" 19 #include "intel_pcode.h" 20 #include "intel_rps.h" 21 #include "vlv_sideband.h" 22 #ifdef __linux__ 23 #include "../../../platform/x86/intel_ips.h" 24 #endif 25 26 #define BUSY_MAX_EI 20u /* ms */ 27 28 /* 29 * Lock protecting IPS related data structures 30 */ 31 static DEFINE_SPINLOCK(mchdev_lock); 32 33 static struct intel_gt *rps_to_gt(struct intel_rps *rps) 34 { 35 return container_of(rps, struct intel_gt, rps); 36 } 37 38 static struct drm_i915_private *rps_to_i915(struct intel_rps *rps) 39 { 40 return rps_to_gt(rps)->i915; 41 } 42 43 static struct intel_uncore *rps_to_uncore(struct intel_rps *rps) 44 { 45 return rps_to_gt(rps)->uncore; 46 } 47 48 static struct intel_guc_slpc *rps_to_slpc(struct intel_rps *rps) 49 { 50 struct intel_gt *gt = rps_to_gt(rps); 51 52 return >->uc.guc.slpc; 53 } 54 55 static bool rps_uses_slpc(struct intel_rps *rps) 56 { 57 struct intel_gt *gt = rps_to_gt(rps); 58 59 return intel_uc_uses_guc_slpc(>->uc); 60 } 61 62 static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask) 63 { 64 return mask & ~rps->pm_intrmsk_mbz; 65 } 66 67 static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) 68 { 69 intel_uncore_write_fw(uncore, reg, val); 70 } 71 72 static void rps_timer(void *arg) 73 { 74 struct intel_rps *rps = arg; 75 struct intel_engine_cs *engine; 76 ktime_t dt, last, timestamp; 77 enum intel_engine_id id; 78 s64 max_busy[3] = {}; 79 80 timestamp = 0; 81 for_each_engine(engine, rps_to_gt(rps), id) { 82 s64 busy; 83 int i; 84 85 dt = intel_engine_get_busy_time(engine, ×tamp); 86 last = engine->stats.rps; 87 engine->stats.rps = dt; 88 89 busy = ktime_to_ns(ktime_sub(dt, last)); 90 for (i = 0; i < ARRAY_SIZE(max_busy); i++) { 91 if (busy > max_busy[i]) 92 swap(busy, max_busy[i]); 93 } 94 } 95 last = rps->pm_timestamp; 96 rps->pm_timestamp = timestamp; 97 98 if (intel_rps_is_active(rps)) { 99 s64 busy; 100 int i; 101 102 dt = ktime_sub(timestamp, last); 103 104 /* 105 * Our goal is to evaluate each engine independently, so we run 106 * at the lowest clocks required to sustain the heaviest 107 * workload. However, a task may be split into sequential 108 * dependent operations across a set of engines, such that 109 * the independent contributions do not account for high load, 110 * but overall the task is GPU bound. For example, consider 111 * video decode on vcs followed by colour post-processing 112 * on vecs, followed by general post-processing on rcs. 113 * Since multi-engines being active does imply a single 114 * continuous workload across all engines, we hedge our 115 * bets by only contributing a factor of the distributed 116 * load into our busyness calculation. 117 */ 118 busy = max_busy[0]; 119 for (i = 1; i < ARRAY_SIZE(max_busy); i++) { 120 if (!max_busy[i]) 121 break; 122 123 busy += div_u64(max_busy[i], 1 << i); 124 } 125 GT_TRACE(rps_to_gt(rps), 126 "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n", 127 busy, (int)div64_u64(100 * busy, dt), 128 max_busy[0], max_busy[1], max_busy[2], 129 rps->pm_interval); 130 131 if (100 * busy > rps->power.up_threshold * dt && 132 rps->cur_freq < rps->max_freq_softlimit) { 133 rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD; 134 rps->pm_interval = 1; 135 schedule_work(&rps->work); 136 } else if (100 * busy < rps->power.down_threshold * dt && 137 rps->cur_freq > rps->min_freq_softlimit) { 138 rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD; 139 rps->pm_interval = 1; 140 schedule_work(&rps->work); 141 } else { 142 rps->last_adj = 0; 143 } 144 145 mod_timer(&rps->timer, 146 jiffies + msecs_to_jiffies(rps->pm_interval)); 147 rps->pm_interval = min(rps->pm_interval * 2, BUSY_MAX_EI); 148 } 149 } 150 151 static void rps_start_timer(struct intel_rps *rps) 152 { 153 rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp); 154 rps->pm_interval = 1; 155 mod_timer(&rps->timer, jiffies + 1); 156 } 157 158 static void rps_stop_timer(struct intel_rps *rps) 159 { 160 del_timer_sync(&rps->timer); 161 rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp); 162 cancel_work_sync(&rps->work); 163 } 164 165 static u32 rps_pm_mask(struct intel_rps *rps, u8 val) 166 { 167 u32 mask = 0; 168 169 /* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */ 170 if (val > rps->min_freq_softlimit) 171 mask |= (GEN6_PM_RP_UP_EI_EXPIRED | 172 GEN6_PM_RP_DOWN_THRESHOLD | 173 GEN6_PM_RP_DOWN_TIMEOUT); 174 175 if (val < rps->max_freq_softlimit) 176 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; 177 178 mask &= rps->pm_events; 179 180 return rps_pm_sanitize_mask(rps, ~mask); 181 } 182 183 static void rps_reset_ei(struct intel_rps *rps) 184 { 185 memset(&rps->ei, 0, sizeof(rps->ei)); 186 } 187 188 static void rps_enable_interrupts(struct intel_rps *rps) 189 { 190 struct intel_gt *gt = rps_to_gt(rps); 191 192 GEM_BUG_ON(rps_uses_slpc(rps)); 193 194 GT_TRACE(gt, "interrupts:on rps->pm_events: %x, rps_pm_mask:%x\n", 195 rps->pm_events, rps_pm_mask(rps, rps->last_freq)); 196 197 rps_reset_ei(rps); 198 199 spin_lock_irq(gt->irq_lock); 200 gen6_gt_pm_enable_irq(gt, rps->pm_events); 201 spin_unlock_irq(gt->irq_lock); 202 203 intel_uncore_write(gt->uncore, 204 GEN6_PMINTRMSK, rps_pm_mask(rps, rps->last_freq)); 205 } 206 207 static void gen6_rps_reset_interrupts(struct intel_rps *rps) 208 { 209 gen6_gt_pm_reset_iir(rps_to_gt(rps), GEN6_PM_RPS_EVENTS); 210 } 211 212 static void gen11_rps_reset_interrupts(struct intel_rps *rps) 213 { 214 while (gen11_gt_reset_one_iir(rps_to_gt(rps), 0, GEN11_GTPM)) 215 ; 216 } 217 218 static void rps_reset_interrupts(struct intel_rps *rps) 219 { 220 struct intel_gt *gt = rps_to_gt(rps); 221 222 spin_lock_irq(gt->irq_lock); 223 if (GRAPHICS_VER(gt->i915) >= 11) 224 gen11_rps_reset_interrupts(rps); 225 else 226 gen6_rps_reset_interrupts(rps); 227 228 rps->pm_iir = 0; 229 spin_unlock_irq(gt->irq_lock); 230 } 231 232 static void rps_disable_interrupts(struct intel_rps *rps) 233 { 234 struct intel_gt *gt = rps_to_gt(rps); 235 236 intel_uncore_write(gt->uncore, 237 GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u)); 238 239 spin_lock_irq(gt->irq_lock); 240 gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS); 241 spin_unlock_irq(gt->irq_lock); 242 243 intel_synchronize_irq(gt->i915); 244 245 /* 246 * Now that we will not be generating any more work, flush any 247 * outstanding tasks. As we are called on the RPS idle path, 248 * we will reset the GPU to minimum frequencies, so the current 249 * state of the worker can be discarded. 250 */ 251 cancel_work_sync(&rps->work); 252 253 rps_reset_interrupts(rps); 254 GT_TRACE(gt, "interrupts:off\n"); 255 } 256 257 static const struct cparams { 258 u16 i; 259 u16 t; 260 u16 m; 261 u16 c; 262 } cparams[] = { 263 { 1, 1333, 301, 28664 }, 264 { 1, 1066, 294, 24460 }, 265 { 1, 800, 294, 25192 }, 266 { 0, 1333, 276, 27605 }, 267 { 0, 1066, 276, 27605 }, 268 { 0, 800, 231, 23784 }, 269 }; 270 271 static void gen5_rps_init(struct intel_rps *rps) 272 { 273 struct drm_i915_private *i915 = rps_to_i915(rps); 274 struct intel_uncore *uncore = rps_to_uncore(rps); 275 u8 fmax, fmin, fstart; 276 u32 rgvmodectl; 277 int c_m, i; 278 279 if (i915->fsb_freq <= 3200) 280 c_m = 0; 281 else if (i915->fsb_freq <= 4800) 282 c_m = 1; 283 else 284 c_m = 2; 285 286 for (i = 0; i < ARRAY_SIZE(cparams); i++) { 287 if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) { 288 rps->ips.m = cparams[i].m; 289 rps->ips.c = cparams[i].c; 290 break; 291 } 292 } 293 294 rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); 295 296 /* Set up min, max, and cur for interrupt handling */ 297 fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; 298 fmin = (rgvmodectl & MEMMODE_FMIN_MASK); 299 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> 300 MEMMODE_FSTART_SHIFT; 301 drm_dbg(&i915->drm, "fmax: %d, fmin: %d, fstart: %d\n", 302 fmax, fmin, fstart); 303 304 rps->min_freq = fmax; 305 rps->efficient_freq = fstart; 306 rps->max_freq = fmin; 307 } 308 309 static unsigned long 310 __ips_chipset_val(struct intel_ips *ips) 311 { 312 struct intel_uncore *uncore = 313 rps_to_uncore(container_of(ips, struct intel_rps, ips)); 314 unsigned long now = jiffies_to_msecs(jiffies), dt; 315 unsigned long result; 316 u64 total, delta; 317 318 lockdep_assert_held(&mchdev_lock); 319 320 /* 321 * Prevent division-by-zero if we are asking too fast. 322 * Also, we don't get interesting results if we are polling 323 * faster than once in 10ms, so just return the saved value 324 * in such cases. 325 */ 326 dt = now - ips->last_time1; 327 if (dt <= 10) 328 return ips->chipset_power; 329 330 /* FIXME: handle per-counter overflow */ 331 total = intel_uncore_read(uncore, DMIEC); 332 total += intel_uncore_read(uncore, DDREC); 333 total += intel_uncore_read(uncore, CSIEC); 334 335 delta = total - ips->last_count1; 336 337 result = div_u64(div_u64(ips->m * delta, dt) + ips->c, 10); 338 339 ips->last_count1 = total; 340 ips->last_time1 = now; 341 342 ips->chipset_power = result; 343 344 return result; 345 } 346 347 static unsigned long ips_mch_val(struct intel_uncore *uncore) 348 { 349 unsigned int m, x, b; 350 u32 tsfs; 351 352 tsfs = intel_uncore_read(uncore, TSFS); 353 x = intel_uncore_read8(uncore, TR1); 354 355 b = tsfs & TSFS_INTR_MASK; 356 m = (tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT; 357 358 return m * x / 127 - b; 359 } 360 361 static int _pxvid_to_vd(u8 pxvid) 362 { 363 if (pxvid == 0) 364 return 0; 365 366 if (pxvid >= 8 && pxvid < 31) 367 pxvid = 31; 368 369 return (pxvid + 2) * 125; 370 } 371 372 static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid) 373 { 374 const int vd = _pxvid_to_vd(pxvid); 375 376 if (INTEL_INFO(i915)->is_mobile) 377 return max(vd - 1125, 0); 378 379 return vd; 380 } 381 382 static void __gen5_ips_update(struct intel_ips *ips) 383 { 384 struct intel_uncore *uncore = 385 rps_to_uncore(container_of(ips, struct intel_rps, ips)); 386 u64 now, delta, dt; 387 u32 count; 388 389 lockdep_assert_held(&mchdev_lock); 390 391 now = ktime_get_raw_ns(); 392 dt = now - ips->last_time2; 393 do_div(dt, NSEC_PER_MSEC); 394 395 /* Don't divide by 0 */ 396 if (dt <= 10) 397 return; 398 399 count = intel_uncore_read(uncore, GFXEC); 400 delta = count - ips->last_count2; 401 402 ips->last_count2 = count; 403 ips->last_time2 = now; 404 405 /* More magic constants... */ 406 ips->gfx_power = div_u64(delta * 1181, dt * 10); 407 } 408 409 static void gen5_rps_update(struct intel_rps *rps) 410 { 411 spin_lock_irq(&mchdev_lock); 412 __gen5_ips_update(&rps->ips); 413 spin_unlock_irq(&mchdev_lock); 414 } 415 416 static unsigned int gen5_invert_freq(struct intel_rps *rps, 417 unsigned int val) 418 { 419 /* Invert the frequency bin into an ips delay */ 420 val = rps->max_freq - val; 421 val = rps->min_freq + val; 422 423 return val; 424 } 425 426 static int __gen5_rps_set(struct intel_rps *rps, u8 val) 427 { 428 struct intel_uncore *uncore = rps_to_uncore(rps); 429 u16 rgvswctl; 430 431 lockdep_assert_held(&mchdev_lock); 432 433 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); 434 if (rgvswctl & MEMCTL_CMD_STS) { 435 DRM_DEBUG("gpu busy, RCS change rejected\n"); 436 return -EBUSY; /* still busy with another command */ 437 } 438 439 /* Invert the frequency bin into an ips delay */ 440 val = gen5_invert_freq(rps, val); 441 442 rgvswctl = 443 (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | 444 (val << MEMCTL_FREQ_SHIFT) | 445 MEMCTL_SFCAVM; 446 intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); 447 intel_uncore_posting_read16(uncore, MEMSWCTL); 448 449 rgvswctl |= MEMCTL_CMD_STS; 450 intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); 451 452 return 0; 453 } 454 455 static int gen5_rps_set(struct intel_rps *rps, u8 val) 456 { 457 int err; 458 459 spin_lock_irq(&mchdev_lock); 460 err = __gen5_rps_set(rps, val); 461 spin_unlock_irq(&mchdev_lock); 462 463 return err; 464 } 465 466 static unsigned long intel_pxfreq(u32 vidfreq) 467 { 468 int div = (vidfreq & 0x3f0000) >> 16; 469 int post = (vidfreq & 0x3000) >> 12; 470 int pre = (vidfreq & 0x7); 471 472 if (!pre) 473 return 0; 474 475 return div * 133333 / (pre << post); 476 } 477 478 static unsigned int init_emon(struct intel_uncore *uncore) 479 { 480 u8 pxw[16]; 481 int i; 482 483 /* Disable to program */ 484 intel_uncore_write(uncore, ECR, 0); 485 intel_uncore_posting_read(uncore, ECR); 486 487 /* Program energy weights for various events */ 488 intel_uncore_write(uncore, SDEW, 0x15040d00); 489 intel_uncore_write(uncore, CSIEW0, 0x007f0000); 490 intel_uncore_write(uncore, CSIEW1, 0x1e220004); 491 intel_uncore_write(uncore, CSIEW2, 0x04000004); 492 493 for (i = 0; i < 5; i++) 494 intel_uncore_write(uncore, PEW(i), 0); 495 for (i = 0; i < 3; i++) 496 intel_uncore_write(uncore, DEW(i), 0); 497 498 /* Program P-state weights to account for frequency power adjustment */ 499 for (i = 0; i < 16; i++) { 500 u32 pxvidfreq = intel_uncore_read(uncore, PXVFREQ(i)); 501 unsigned int freq = intel_pxfreq(pxvidfreq); 502 unsigned int vid = 503 (pxvidfreq & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; 504 unsigned int val; 505 506 val = vid * vid * freq / 1000 * 255; 507 val /= 127 * 127 * 900; 508 509 pxw[i] = val; 510 } 511 /* Render standby states get 0 weight */ 512 pxw[14] = 0; 513 pxw[15] = 0; 514 515 for (i = 0; i < 4; i++) { 516 intel_uncore_write(uncore, PXW(i), 517 pxw[i * 4 + 0] << 24 | 518 pxw[i * 4 + 1] << 16 | 519 pxw[i * 4 + 2] << 8 | 520 pxw[i * 4 + 3] << 0); 521 } 522 523 /* Adjust magic regs to magic values (more experimental results) */ 524 intel_uncore_write(uncore, OGW0, 0); 525 intel_uncore_write(uncore, OGW1, 0); 526 intel_uncore_write(uncore, EG0, 0x00007f00); 527 intel_uncore_write(uncore, EG1, 0x0000000e); 528 intel_uncore_write(uncore, EG2, 0x000e0000); 529 intel_uncore_write(uncore, EG3, 0x68000300); 530 intel_uncore_write(uncore, EG4, 0x42000000); 531 intel_uncore_write(uncore, EG5, 0x00140031); 532 intel_uncore_write(uncore, EG6, 0); 533 intel_uncore_write(uncore, EG7, 0); 534 535 for (i = 0; i < 8; i++) 536 intel_uncore_write(uncore, PXWL(i), 0); 537 538 /* Enable PMON + select events */ 539 intel_uncore_write(uncore, ECR, 0x80000019); 540 541 return intel_uncore_read(uncore, LCFUSE02) & LCFUSE_HIV_MASK; 542 } 543 544 static bool gen5_rps_enable(struct intel_rps *rps) 545 { 546 struct drm_i915_private *i915 = rps_to_i915(rps); 547 struct intel_uncore *uncore = rps_to_uncore(rps); 548 u8 fstart, vstart; 549 u32 rgvmodectl; 550 551 spin_lock_irq(&mchdev_lock); 552 553 rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); 554 555 /* Enable temp reporting */ 556 intel_uncore_write16(uncore, PMMISC, 557 intel_uncore_read16(uncore, PMMISC) | MCPPCE_EN); 558 intel_uncore_write16(uncore, TSC1, 559 intel_uncore_read16(uncore, TSC1) | TSE); 560 561 /* 100ms RC evaluation intervals */ 562 intel_uncore_write(uncore, RCUPEI, 100000); 563 intel_uncore_write(uncore, RCDNEI, 100000); 564 565 /* Set max/min thresholds to 90ms and 80ms respectively */ 566 intel_uncore_write(uncore, RCBMAXAVG, 90000); 567 intel_uncore_write(uncore, RCBMINAVG, 80000); 568 569 intel_uncore_write(uncore, MEMIHYST, 1); 570 571 /* Set up min, max, and cur for interrupt handling */ 572 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> 573 MEMMODE_FSTART_SHIFT; 574 575 vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) & 576 PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; 577 578 intel_uncore_write(uncore, 579 MEMINTREN, 580 MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); 581 582 intel_uncore_write(uncore, VIDSTART, vstart); 583 intel_uncore_posting_read(uncore, VIDSTART); 584 585 rgvmodectl |= MEMMODE_SWMODE_EN; 586 intel_uncore_write(uncore, MEMMODECTL, rgvmodectl); 587 588 if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) & 589 MEMCTL_CMD_STS) == 0, 10)) 590 drm_err(&uncore->i915->drm, 591 "stuck trying to change perf mode\n"); 592 mdelay(1); 593 594 __gen5_rps_set(rps, rps->cur_freq); 595 596 rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC); 597 rps->ips.last_count1 += intel_uncore_read(uncore, DDREC); 598 rps->ips.last_count1 += intel_uncore_read(uncore, CSIEC); 599 rps->ips.last_time1 = jiffies_to_msecs(jiffies); 600 601 rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC); 602 rps->ips.last_time2 = ktime_get_raw_ns(); 603 604 spin_lock(&i915->irq_lock); 605 ilk_enable_display_irq(i915, DE_PCU_EVENT); 606 spin_unlock(&i915->irq_lock); 607 608 spin_unlock_irq(&mchdev_lock); 609 610 rps->ips.corr = init_emon(uncore); 611 612 return true; 613 } 614 615 static void gen5_rps_disable(struct intel_rps *rps) 616 { 617 struct drm_i915_private *i915 = rps_to_i915(rps); 618 struct intel_uncore *uncore = rps_to_uncore(rps); 619 u16 rgvswctl; 620 621 spin_lock_irq(&mchdev_lock); 622 623 spin_lock(&i915->irq_lock); 624 ilk_disable_display_irq(i915, DE_PCU_EVENT); 625 spin_unlock(&i915->irq_lock); 626 627 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); 628 629 /* Ack interrupts, disable EFC interrupt */ 630 intel_uncore_write(uncore, MEMINTREN, 631 intel_uncore_read(uncore, MEMINTREN) & 632 ~MEMINT_EVAL_CHG_EN); 633 intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); 634 635 /* Go back to the starting frequency */ 636 __gen5_rps_set(rps, rps->idle_freq); 637 mdelay(1); 638 rgvswctl |= MEMCTL_CMD_STS; 639 intel_uncore_write(uncore, MEMSWCTL, rgvswctl); 640 mdelay(1); 641 642 spin_unlock_irq(&mchdev_lock); 643 } 644 645 static u32 rps_limits(struct intel_rps *rps, u8 val) 646 { 647 u32 limits; 648 649 /* 650 * Only set the down limit when we've reached the lowest level to avoid 651 * getting more interrupts, otherwise leave this clear. This prevents a 652 * race in the hw when coming out of rc6: There's a tiny window where 653 * the hw runs at the minimal clock before selecting the desired 654 * frequency, if the down threshold expires in that window we will not 655 * receive a down interrupt. 656 */ 657 if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) { 658 limits = rps->max_freq_softlimit << 23; 659 if (val <= rps->min_freq_softlimit) 660 limits |= rps->min_freq_softlimit << 14; 661 } else { 662 limits = rps->max_freq_softlimit << 24; 663 if (val <= rps->min_freq_softlimit) 664 limits |= rps->min_freq_softlimit << 16; 665 } 666 667 return limits; 668 } 669 670 static void rps_set_power(struct intel_rps *rps, int new_power) 671 { 672 struct intel_gt *gt = rps_to_gt(rps); 673 struct intel_uncore *uncore = gt->uncore; 674 u32 threshold_up = 0, threshold_down = 0; /* in % */ 675 u32 ei_up = 0, ei_down = 0; 676 677 lockdep_assert_held(&rps->power.mutex); 678 679 if (new_power == rps->power.mode) 680 return; 681 682 threshold_up = 95; 683 threshold_down = 85; 684 685 /* Note the units here are not exactly 1us, but 1280ns. */ 686 switch (new_power) { 687 case LOW_POWER: 688 ei_up = 16000; 689 ei_down = 32000; 690 break; 691 692 case BETWEEN: 693 ei_up = 13000; 694 ei_down = 32000; 695 break; 696 697 case HIGH_POWER: 698 ei_up = 10000; 699 ei_down = 32000; 700 break; 701 } 702 703 /* When byt can survive without system hang with dynamic 704 * sw freq adjustments, this restriction can be lifted. 705 */ 706 if (IS_VALLEYVIEW(gt->i915)) 707 goto skip_hw_write; 708 709 GT_TRACE(gt, 710 "changing power mode [%d], up %d%% @ %dus, down %d%% @ %dus\n", 711 new_power, threshold_up, ei_up, threshold_down, ei_down); 712 713 set(uncore, GEN6_RP_UP_EI, 714 intel_gt_ns_to_pm_interval(gt, ei_up * 1000)); 715 set(uncore, GEN6_RP_UP_THRESHOLD, 716 intel_gt_ns_to_pm_interval(gt, ei_up * threshold_up * 10)); 717 718 set(uncore, GEN6_RP_DOWN_EI, 719 intel_gt_ns_to_pm_interval(gt, ei_down * 1000)); 720 set(uncore, GEN6_RP_DOWN_THRESHOLD, 721 intel_gt_ns_to_pm_interval(gt, ei_down * threshold_down * 10)); 722 723 set(uncore, GEN6_RP_CONTROL, 724 (GRAPHICS_VER(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | 725 GEN6_RP_MEDIA_HW_NORMAL_MODE | 726 GEN6_RP_MEDIA_IS_GFX | 727 GEN6_RP_ENABLE | 728 GEN6_RP_UP_BUSY_AVG | 729 GEN6_RP_DOWN_IDLE_AVG); 730 731 skip_hw_write: 732 rps->power.mode = new_power; 733 rps->power.up_threshold = threshold_up; 734 rps->power.down_threshold = threshold_down; 735 } 736 737 static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val) 738 { 739 int new_power; 740 741 new_power = rps->power.mode; 742 switch (rps->power.mode) { 743 case LOW_POWER: 744 if (val > rps->efficient_freq + 1 && 745 val > rps->cur_freq) 746 new_power = BETWEEN; 747 break; 748 749 case BETWEEN: 750 if (val <= rps->efficient_freq && 751 val < rps->cur_freq) 752 new_power = LOW_POWER; 753 else if (val >= rps->rp0_freq && 754 val > rps->cur_freq) 755 new_power = HIGH_POWER; 756 break; 757 758 case HIGH_POWER: 759 if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 && 760 val < rps->cur_freq) 761 new_power = BETWEEN; 762 break; 763 } 764 /* Max/min bins are special */ 765 if (val <= rps->min_freq_softlimit) 766 new_power = LOW_POWER; 767 if (val >= rps->max_freq_softlimit) 768 new_power = HIGH_POWER; 769 770 mutex_lock(&rps->power.mutex); 771 if (rps->power.interactive) 772 new_power = HIGH_POWER; 773 rps_set_power(rps, new_power); 774 mutex_unlock(&rps->power.mutex); 775 } 776 777 void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive) 778 { 779 GT_TRACE(rps_to_gt(rps), "mark interactive: %s\n", 780 str_yes_no(interactive)); 781 782 mutex_lock(&rps->power.mutex); 783 if (interactive) { 784 if (!rps->power.interactive++ && intel_rps_is_active(rps)) 785 rps_set_power(rps, HIGH_POWER); 786 } else { 787 GEM_BUG_ON(!rps->power.interactive); 788 rps->power.interactive--; 789 } 790 mutex_unlock(&rps->power.mutex); 791 } 792 793 static int gen6_rps_set(struct intel_rps *rps, u8 val) 794 { 795 struct intel_uncore *uncore = rps_to_uncore(rps); 796 struct drm_i915_private *i915 = rps_to_i915(rps); 797 u32 swreq; 798 799 GEM_BUG_ON(rps_uses_slpc(rps)); 800 801 if (GRAPHICS_VER(i915) >= 9) 802 swreq = GEN9_FREQUENCY(val); 803 else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) 804 swreq = HSW_FREQUENCY(val); 805 else 806 swreq = (GEN6_FREQUENCY(val) | 807 GEN6_OFFSET(0) | 808 GEN6_AGGRESSIVE_TURBO); 809 set(uncore, GEN6_RPNSWREQ, swreq); 810 811 GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d, swreq:%x\n", 812 val, intel_gpu_freq(rps, val), swreq); 813 814 return 0; 815 } 816 817 static int vlv_rps_set(struct intel_rps *rps, u8 val) 818 { 819 struct drm_i915_private *i915 = rps_to_i915(rps); 820 int err; 821 822 vlv_punit_get(i915); 823 err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val); 824 vlv_punit_put(i915); 825 826 GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d\n", 827 val, intel_gpu_freq(rps, val)); 828 829 return err; 830 } 831 832 static int rps_set(struct intel_rps *rps, u8 val, bool update) 833 { 834 struct drm_i915_private *i915 = rps_to_i915(rps); 835 int err; 836 837 if (val == rps->last_freq) 838 return 0; 839 840 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) 841 err = vlv_rps_set(rps, val); 842 else if (GRAPHICS_VER(i915) >= 6) 843 err = gen6_rps_set(rps, val); 844 else 845 err = gen5_rps_set(rps, val); 846 if (err) 847 return err; 848 849 if (update && GRAPHICS_VER(i915) >= 6) 850 gen6_rps_set_thresholds(rps, val); 851 rps->last_freq = val; 852 853 return 0; 854 } 855 856 void intel_rps_unpark(struct intel_rps *rps) 857 { 858 if (!intel_rps_is_enabled(rps)) 859 return; 860 861 GT_TRACE(rps_to_gt(rps), "unpark:%x\n", rps->cur_freq); 862 863 /* 864 * Use the user's desired frequency as a guide, but for better 865 * performance, jump directly to RPe as our starting frequency. 866 */ 867 mutex_lock(&rps->lock); 868 869 intel_rps_set_active(rps); 870 intel_rps_set(rps, 871 clamp(rps->cur_freq, 872 rps->min_freq_softlimit, 873 rps->max_freq_softlimit)); 874 875 mutex_unlock(&rps->lock); 876 877 rps->pm_iir = 0; 878 if (intel_rps_has_interrupts(rps)) 879 rps_enable_interrupts(rps); 880 if (intel_rps_uses_timer(rps)) 881 rps_start_timer(rps); 882 883 if (GRAPHICS_VER(rps_to_i915(rps)) == 5) 884 gen5_rps_update(rps); 885 } 886 887 void intel_rps_park(struct intel_rps *rps) 888 { 889 int adj; 890 891 if (!intel_rps_is_enabled(rps)) 892 return; 893 894 if (!intel_rps_clear_active(rps)) 895 return; 896 897 if (intel_rps_uses_timer(rps)) 898 rps_stop_timer(rps); 899 if (intel_rps_has_interrupts(rps)) 900 rps_disable_interrupts(rps); 901 902 if (rps->last_freq <= rps->idle_freq) 903 return; 904 905 /* 906 * The punit delays the write of the frequency and voltage until it 907 * determines the GPU is awake. During normal usage we don't want to 908 * waste power changing the frequency if the GPU is sleeping (rc6). 909 * However, the GPU and driver is now idle and we do not want to delay 910 * switching to minimum voltage (reducing power whilst idle) as we do 911 * not expect to be woken in the near future and so must flush the 912 * change by waking the device. 913 * 914 * We choose to take the media powerwell (either would do to trick the 915 * punit into committing the voltage change) as that takes a lot less 916 * power than the render powerwell. 917 */ 918 intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA); 919 rps_set(rps, rps->idle_freq, false); 920 intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA); 921 922 /* 923 * Since we will try and restart from the previously requested 924 * frequency on unparking, treat this idle point as a downclock 925 * interrupt and reduce the frequency for resume. If we park/unpark 926 * more frequently than the rps worker can run, we will not respond 927 * to any EI and never see a change in frequency. 928 * 929 * (Note we accommodate Cherryview's limitation of only using an 930 * even bin by applying it to all.) 931 */ 932 adj = rps->last_adj; 933 if (adj < 0) 934 adj *= 2; 935 else /* CHV needs even encode values */ 936 adj = -2; 937 rps->last_adj = adj; 938 rps->cur_freq = max_t(int, rps->cur_freq + adj, rps->min_freq); 939 if (rps->cur_freq < rps->efficient_freq) { 940 rps->cur_freq = rps->efficient_freq; 941 rps->last_adj = 0; 942 } 943 944 GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq); 945 } 946 947 u32 intel_rps_get_boost_frequency(struct intel_rps *rps) 948 { 949 struct intel_guc_slpc *slpc; 950 951 if (rps_uses_slpc(rps)) { 952 slpc = rps_to_slpc(rps); 953 954 return slpc->boost_freq; 955 } else { 956 return intel_gpu_freq(rps, rps->boost_freq); 957 } 958 } 959 960 static int rps_set_boost_freq(struct intel_rps *rps, u32 val) 961 { 962 bool boost = false; 963 964 /* Validate against (static) hardware limits */ 965 val = intel_freq_opcode(rps, val); 966 if (val < rps->min_freq || val > rps->max_freq) 967 return -EINVAL; 968 969 mutex_lock(&rps->lock); 970 if (val != rps->boost_freq) { 971 rps->boost_freq = val; 972 boost = atomic_read(&rps->num_waiters); 973 } 974 mutex_unlock(&rps->lock); 975 if (boost) 976 schedule_work(&rps->work); 977 978 return 0; 979 } 980 981 int intel_rps_set_boost_frequency(struct intel_rps *rps, u32 freq) 982 { 983 struct intel_guc_slpc *slpc; 984 985 if (rps_uses_slpc(rps)) { 986 slpc = rps_to_slpc(rps); 987 988 return intel_guc_slpc_set_boost_freq(slpc, freq); 989 } else { 990 return rps_set_boost_freq(rps, freq); 991 } 992 } 993 994 void intel_rps_dec_waiters(struct intel_rps *rps) 995 { 996 struct intel_guc_slpc *slpc; 997 998 if (rps_uses_slpc(rps)) { 999 slpc = rps_to_slpc(rps); 1000 1001 intel_guc_slpc_dec_waiters(slpc); 1002 } else { 1003 atomic_dec(&rps->num_waiters); 1004 } 1005 } 1006 1007 void intel_rps_boost(struct i915_request *rq) 1008 { 1009 struct intel_guc_slpc *slpc; 1010 1011 if (i915_request_signaled(rq) || i915_request_has_waitboost(rq)) 1012 return; 1013 1014 /* Serializes with i915_request_retire() */ 1015 if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) { 1016 struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps; 1017 1018 if (rps_uses_slpc(rps)) { 1019 slpc = rps_to_slpc(rps); 1020 1021 /* Return if old value is non zero */ 1022 if (!atomic_fetch_inc(&slpc->num_waiters)) 1023 schedule_work(&slpc->boost_work); 1024 1025 return; 1026 } 1027 1028 if (atomic_fetch_inc(&rps->num_waiters)) 1029 return; 1030 1031 if (!intel_rps_is_active(rps)) 1032 return; 1033 1034 GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", 1035 rq->fence.context, rq->fence.seqno); 1036 1037 if (READ_ONCE(rps->cur_freq) < rps->boost_freq) 1038 schedule_work(&rps->work); 1039 1040 WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */ 1041 } 1042 } 1043 1044 int intel_rps_set(struct intel_rps *rps, u8 val) 1045 { 1046 int err; 1047 1048 lockdep_assert_held(&rps->lock); 1049 GEM_BUG_ON(val > rps->max_freq); 1050 GEM_BUG_ON(val < rps->min_freq); 1051 1052 if (intel_rps_is_active(rps)) { 1053 err = rps_set(rps, val, true); 1054 if (err) 1055 return err; 1056 1057 /* 1058 * Make sure we continue to get interrupts 1059 * until we hit the minimum or maximum frequencies. 1060 */ 1061 if (intel_rps_has_interrupts(rps)) { 1062 struct intel_uncore *uncore = rps_to_uncore(rps); 1063 1064 set(uncore, 1065 GEN6_RP_INTERRUPT_LIMITS, rps_limits(rps, val)); 1066 1067 set(uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, val)); 1068 } 1069 } 1070 1071 rps->cur_freq = val; 1072 return 0; 1073 } 1074 1075 static u32 intel_rps_read_state_cap(struct intel_rps *rps) 1076 { 1077 struct drm_i915_private *i915 = rps_to_i915(rps); 1078 struct intel_uncore *uncore = rps_to_uncore(rps); 1079 1080 if (IS_PONTEVECCHIO(i915)) 1081 return intel_uncore_read(uncore, PVC_RP_STATE_CAP); 1082 else if (IS_XEHPSDV(i915)) 1083 return intel_uncore_read(uncore, XEHPSDV_RP_STATE_CAP); 1084 else if (IS_GEN9_LP(i915)) 1085 return intel_uncore_read(uncore, BXT_RP_STATE_CAP); 1086 else 1087 return intel_uncore_read(uncore, GEN6_RP_STATE_CAP); 1088 } 1089 1090 /** 1091 * gen6_rps_get_freq_caps - Get freq caps exposed by HW 1092 * @rps: the intel_rps structure 1093 * @caps: returned freq caps 1094 * 1095 * Returned "caps" frequencies should be converted to MHz using 1096 * intel_gpu_freq() 1097 */ 1098 void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps) 1099 { 1100 struct drm_i915_private *i915 = rps_to_i915(rps); 1101 u32 rp_state_cap; 1102 1103 rp_state_cap = intel_rps_read_state_cap(rps); 1104 1105 /* static values from HW: RP0 > RP1 > RPn (min_freq) */ 1106 if (IS_GEN9_LP(i915)) { 1107 caps->rp0_freq = (rp_state_cap >> 16) & 0xff; 1108 caps->rp1_freq = (rp_state_cap >> 8) & 0xff; 1109 caps->min_freq = (rp_state_cap >> 0) & 0xff; 1110 } else { 1111 caps->rp0_freq = (rp_state_cap >> 0) & 0xff; 1112 if (GRAPHICS_VER(i915) >= 10) 1113 caps->rp1_freq = REG_FIELD_GET(RPE_MASK, 1114 intel_uncore_read(to_gt(i915)->uncore, 1115 GEN10_FREQ_INFO_REC)); 1116 else 1117 caps->rp1_freq = (rp_state_cap >> 8) & 0xff; 1118 caps->min_freq = (rp_state_cap >> 16) & 0xff; 1119 } 1120 1121 if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { 1122 /* 1123 * In this case rp_state_cap register reports frequencies in 1124 * units of 50 MHz. Convert these to the actual "hw unit", i.e. 1125 * units of 16.67 MHz 1126 */ 1127 caps->rp0_freq *= GEN9_FREQ_SCALER; 1128 caps->rp1_freq *= GEN9_FREQ_SCALER; 1129 caps->min_freq *= GEN9_FREQ_SCALER; 1130 } 1131 } 1132 1133 static void gen6_rps_init(struct intel_rps *rps) 1134 { 1135 struct drm_i915_private *i915 = rps_to_i915(rps); 1136 struct intel_rps_freq_caps caps; 1137 1138 gen6_rps_get_freq_caps(rps, &caps); 1139 rps->rp0_freq = caps.rp0_freq; 1140 rps->rp1_freq = caps.rp1_freq; 1141 rps->min_freq = caps.min_freq; 1142 1143 /* hw_max = RP0 until we check for overclocking */ 1144 rps->max_freq = rps->rp0_freq; 1145 1146 rps->efficient_freq = rps->rp1_freq; 1147 if (IS_HASWELL(i915) || IS_BROADWELL(i915) || 1148 IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { 1149 u32 ddcc_status = 0; 1150 u32 mult = 1; 1151 1152 if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) 1153 mult = GEN9_FREQ_SCALER; 1154 if (snb_pcode_read(rps_to_gt(rps)->uncore, 1155 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, 1156 &ddcc_status, NULL) == 0) 1157 rps->efficient_freq = 1158 clamp_t(u32, 1159 ((ddcc_status >> 8) & 0xff) * mult, 1160 rps->min_freq, 1161 rps->max_freq); 1162 } 1163 } 1164 1165 static bool rps_reset(struct intel_rps *rps) 1166 { 1167 struct drm_i915_private *i915 = rps_to_i915(rps); 1168 1169 /* force a reset */ 1170 rps->power.mode = -1; 1171 rps->last_freq = -1; 1172 1173 if (rps_set(rps, rps->min_freq, true)) { 1174 drm_err(&i915->drm, "Failed to reset RPS to initial values\n"); 1175 return false; 1176 } 1177 1178 rps->cur_freq = rps->min_freq; 1179 return true; 1180 } 1181 1182 /* See the Gen9_GT_PM_Programming_Guide doc for the below */ 1183 static bool gen9_rps_enable(struct intel_rps *rps) 1184 { 1185 struct intel_gt *gt = rps_to_gt(rps); 1186 struct intel_uncore *uncore = gt->uncore; 1187 1188 /* Program defaults and thresholds for RPS */ 1189 if (GRAPHICS_VER(gt->i915) == 9) 1190 intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, 1191 GEN9_FREQUENCY(rps->rp1_freq)); 1192 1193 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa); 1194 1195 rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD; 1196 1197 return rps_reset(rps); 1198 } 1199 1200 static bool gen8_rps_enable(struct intel_rps *rps) 1201 { 1202 struct intel_uncore *uncore = rps_to_uncore(rps); 1203 1204 intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, 1205 HSW_FREQUENCY(rps->rp1_freq)); 1206 1207 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); 1208 1209 rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD; 1210 1211 return rps_reset(rps); 1212 } 1213 1214 static bool gen6_rps_enable(struct intel_rps *rps) 1215 { 1216 struct intel_uncore *uncore = rps_to_uncore(rps); 1217 1218 /* Power down if completely idle for over 50ms */ 1219 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000); 1220 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); 1221 1222 rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | 1223 GEN6_PM_RP_DOWN_THRESHOLD | 1224 GEN6_PM_RP_DOWN_TIMEOUT); 1225 1226 return rps_reset(rps); 1227 } 1228 1229 static int chv_rps_max_freq(struct intel_rps *rps) 1230 { 1231 struct drm_i915_private *i915 = rps_to_i915(rps); 1232 struct intel_gt *gt = rps_to_gt(rps); 1233 u32 val; 1234 1235 val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); 1236 1237 switch (gt->info.sseu.eu_total) { 1238 case 8: 1239 /* (2 * 4) config */ 1240 val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT; 1241 break; 1242 case 12: 1243 /* (2 * 6) config */ 1244 val >>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT; 1245 break; 1246 case 16: 1247 /* (2 * 8) config */ 1248 default: 1249 /* Setting (2 * 8) Min RP0 for any other combination */ 1250 val >>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT; 1251 break; 1252 } 1253 1254 return val & FB_GFX_FREQ_FUSE_MASK; 1255 } 1256 1257 static int chv_rps_rpe_freq(struct intel_rps *rps) 1258 { 1259 struct drm_i915_private *i915 = rps_to_i915(rps); 1260 u32 val; 1261 1262 val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG); 1263 val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT; 1264 1265 return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; 1266 } 1267 1268 static int chv_rps_guar_freq(struct intel_rps *rps) 1269 { 1270 struct drm_i915_private *i915 = rps_to_i915(rps); 1271 u32 val; 1272 1273 val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); 1274 1275 return val & FB_GFX_FREQ_FUSE_MASK; 1276 } 1277 1278 static u32 chv_rps_min_freq(struct intel_rps *rps) 1279 { 1280 struct drm_i915_private *i915 = rps_to_i915(rps); 1281 u32 val; 1282 1283 val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE); 1284 val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT; 1285 1286 return val & FB_GFX_FREQ_FUSE_MASK; 1287 } 1288 1289 static bool chv_rps_enable(struct intel_rps *rps) 1290 { 1291 struct intel_uncore *uncore = rps_to_uncore(rps); 1292 struct drm_i915_private *i915 = rps_to_i915(rps); 1293 u32 val; 1294 1295 /* 1: Program defaults and thresholds for RPS*/ 1296 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000); 1297 intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400); 1298 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000); 1299 intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000); 1300 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000); 1301 1302 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); 1303 1304 /* 2: Enable RPS */ 1305 intel_uncore_write_fw(uncore, GEN6_RP_CONTROL, 1306 GEN6_RP_MEDIA_HW_NORMAL_MODE | 1307 GEN6_RP_MEDIA_IS_GFX | 1308 GEN6_RP_ENABLE | 1309 GEN6_RP_UP_BUSY_AVG | 1310 GEN6_RP_DOWN_IDLE_AVG); 1311 1312 rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | 1313 GEN6_PM_RP_DOWN_THRESHOLD | 1314 GEN6_PM_RP_DOWN_TIMEOUT); 1315 1316 /* Setting Fixed Bias */ 1317 vlv_punit_get(i915); 1318 1319 val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50; 1320 vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); 1321 1322 val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); 1323 1324 vlv_punit_put(i915); 1325 1326 /* RPS code assumes GPLL is used */ 1327 drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0, 1328 "GPLL not enabled\n"); 1329 1330 drm_dbg(&i915->drm, "GPLL enabled? %s\n", 1331 str_yes_no(val & GPLLENABLE)); 1332 drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val); 1333 1334 return rps_reset(rps); 1335 } 1336 1337 static int vlv_rps_guar_freq(struct intel_rps *rps) 1338 { 1339 struct drm_i915_private *i915 = rps_to_i915(rps); 1340 u32 val, rp1; 1341 1342 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); 1343 1344 rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK; 1345 rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; 1346 1347 return rp1; 1348 } 1349 1350 static int vlv_rps_max_freq(struct intel_rps *rps) 1351 { 1352 struct drm_i915_private *i915 = rps_to_i915(rps); 1353 u32 val, rp0; 1354 1355 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); 1356 1357 rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; 1358 /* Clamp to max */ 1359 rp0 = min_t(u32, rp0, 0xea); 1360 1361 return rp0; 1362 } 1363 1364 static int vlv_rps_rpe_freq(struct intel_rps *rps) 1365 { 1366 struct drm_i915_private *i915 = rps_to_i915(rps); 1367 u32 val, rpe; 1368 1369 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO); 1370 rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; 1371 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI); 1372 rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; 1373 1374 return rpe; 1375 } 1376 1377 static int vlv_rps_min_freq(struct intel_rps *rps) 1378 { 1379 struct drm_i915_private *i915 = rps_to_i915(rps); 1380 u32 val; 1381 1382 val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff; 1383 /* 1384 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value 1385 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on 1386 * a BYT-M B0 the above register contains 0xbf. Moreover when setting 1387 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0 1388 * to make sure it matches what Punit accepts. 1389 */ 1390 return max_t(u32, val, 0xc0); 1391 } 1392 1393 static bool vlv_rps_enable(struct intel_rps *rps) 1394 { 1395 struct intel_uncore *uncore = rps_to_uncore(rps); 1396 struct drm_i915_private *i915 = rps_to_i915(rps); 1397 u32 val; 1398 1399 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000); 1400 intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400); 1401 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000); 1402 intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000); 1403 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000); 1404 1405 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); 1406 1407 intel_uncore_write_fw(uncore, GEN6_RP_CONTROL, 1408 GEN6_RP_MEDIA_TURBO | 1409 GEN6_RP_MEDIA_HW_NORMAL_MODE | 1410 GEN6_RP_MEDIA_IS_GFX | 1411 GEN6_RP_ENABLE | 1412 GEN6_RP_UP_BUSY_AVG | 1413 GEN6_RP_DOWN_IDLE_CONT); 1414 1415 /* WaGsvRC0ResidencyMethod:vlv */ 1416 rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED; 1417 1418 vlv_punit_get(i915); 1419 1420 /* Setting Fixed Bias */ 1421 val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875; 1422 vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); 1423 1424 val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); 1425 1426 vlv_punit_put(i915); 1427 1428 /* RPS code assumes GPLL is used */ 1429 drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0, 1430 "GPLL not enabled\n"); 1431 1432 drm_dbg(&i915->drm, "GPLL enabled? %s\n", 1433 str_yes_no(val & GPLLENABLE)); 1434 drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val); 1435 1436 return rps_reset(rps); 1437 } 1438 1439 static unsigned long __ips_gfx_val(struct intel_ips *ips) 1440 { 1441 struct intel_rps *rps = container_of(ips, typeof(*rps), ips); 1442 struct intel_uncore *uncore = rps_to_uncore(rps); 1443 unsigned int t, state1, state2; 1444 u32 pxvid, ext_v; 1445 u64 corr, corr2; 1446 1447 lockdep_assert_held(&mchdev_lock); 1448 1449 pxvid = intel_uncore_read(uncore, PXVFREQ(rps->cur_freq)); 1450 pxvid = (pxvid >> 24) & 0x7f; 1451 ext_v = pvid_to_extvid(rps_to_i915(rps), pxvid); 1452 1453 state1 = ext_v; 1454 1455 /* Revel in the empirically derived constants */ 1456 1457 /* Correction factor in 1/100000 units */ 1458 t = ips_mch_val(uncore); 1459 if (t > 80) 1460 corr = t * 2349 + 135940; 1461 else if (t >= 50) 1462 corr = t * 964 + 29317; 1463 else /* < 50 */ 1464 corr = t * 301 + 1004; 1465 1466 corr = div_u64(corr * 150142 * state1, 10000) - 78642; 1467 corr2 = div_u64(corr, 100000) * ips->corr; 1468 1469 state2 = div_u64(corr2 * state1, 10000); 1470 state2 /= 100; /* convert to mW */ 1471 1472 __gen5_ips_update(ips); 1473 1474 return ips->gfx_power + state2; 1475 } 1476 1477 static bool has_busy_stats(struct intel_rps *rps) 1478 { 1479 struct intel_engine_cs *engine; 1480 enum intel_engine_id id; 1481 1482 for_each_engine(engine, rps_to_gt(rps), id) { 1483 if (!intel_engine_supports_stats(engine)) 1484 return false; 1485 } 1486 1487 return true; 1488 } 1489 1490 void intel_rps_enable(struct intel_rps *rps) 1491 { 1492 struct drm_i915_private *i915 = rps_to_i915(rps); 1493 struct intel_uncore *uncore = rps_to_uncore(rps); 1494 bool enabled = false; 1495 1496 if (!HAS_RPS(i915)) 1497 return; 1498 1499 if (rps_uses_slpc(rps)) 1500 return; 1501 1502 intel_gt_check_clock_frequency(rps_to_gt(rps)); 1503 1504 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); 1505 if (rps->max_freq <= rps->min_freq) 1506 /* leave disabled, no room for dynamic reclocking */; 1507 else if (IS_CHERRYVIEW(i915)) 1508 enabled = chv_rps_enable(rps); 1509 else if (IS_VALLEYVIEW(i915)) 1510 enabled = vlv_rps_enable(rps); 1511 else if (GRAPHICS_VER(i915) >= 9) 1512 enabled = gen9_rps_enable(rps); 1513 else if (GRAPHICS_VER(i915) >= 8) 1514 enabled = gen8_rps_enable(rps); 1515 else if (GRAPHICS_VER(i915) >= 6) 1516 enabled = gen6_rps_enable(rps); 1517 else if (IS_IRONLAKE_M(i915)) 1518 enabled = gen5_rps_enable(rps); 1519 else 1520 MISSING_CASE(GRAPHICS_VER(i915)); 1521 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); 1522 if (!enabled) 1523 return; 1524 1525 GT_TRACE(rps_to_gt(rps), 1526 "min:%x, max:%x, freq:[%d, %d]\n", 1527 rps->min_freq, rps->max_freq, 1528 intel_gpu_freq(rps, rps->min_freq), 1529 intel_gpu_freq(rps, rps->max_freq)); 1530 1531 GEM_BUG_ON(rps->max_freq < rps->min_freq); 1532 GEM_BUG_ON(rps->idle_freq > rps->max_freq); 1533 1534 GEM_BUG_ON(rps->efficient_freq < rps->min_freq); 1535 GEM_BUG_ON(rps->efficient_freq > rps->max_freq); 1536 1537 if (has_busy_stats(rps)) 1538 intel_rps_set_timer(rps); 1539 else if (GRAPHICS_VER(i915) >= 6 && GRAPHICS_VER(i915) <= 11) 1540 intel_rps_set_interrupts(rps); 1541 else 1542 /* Ironlake currently uses intel_ips.ko */ {} 1543 1544 intel_rps_set_enabled(rps); 1545 } 1546 1547 static void gen6_rps_disable(struct intel_rps *rps) 1548 { 1549 set(rps_to_uncore(rps), GEN6_RP_CONTROL, 0); 1550 } 1551 1552 void intel_rps_disable(struct intel_rps *rps) 1553 { 1554 struct drm_i915_private *i915 = rps_to_i915(rps); 1555 1556 if (!intel_rps_is_enabled(rps)) 1557 return; 1558 1559 intel_rps_clear_enabled(rps); 1560 intel_rps_clear_interrupts(rps); 1561 intel_rps_clear_timer(rps); 1562 1563 if (GRAPHICS_VER(i915) >= 6) 1564 gen6_rps_disable(rps); 1565 else if (IS_IRONLAKE_M(i915)) 1566 gen5_rps_disable(rps); 1567 } 1568 1569 static int byt_gpu_freq(struct intel_rps *rps, int val) 1570 { 1571 /* 1572 * N = val - 0xb7 1573 * Slow = Fast = GPLL ref * N 1574 */ 1575 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000); 1576 } 1577 1578 static int byt_freq_opcode(struct intel_rps *rps, int val) 1579 { 1580 return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7; 1581 } 1582 1583 static int chv_gpu_freq(struct intel_rps *rps, int val) 1584 { 1585 /* 1586 * N = val / 2 1587 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 1588 */ 1589 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000); 1590 } 1591 1592 static int chv_freq_opcode(struct intel_rps *rps, int val) 1593 { 1594 /* CHV needs even values */ 1595 return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2; 1596 } 1597 1598 int intel_gpu_freq(struct intel_rps *rps, int val) 1599 { 1600 struct drm_i915_private *i915 = rps_to_i915(rps); 1601 1602 if (GRAPHICS_VER(i915) >= 9) 1603 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, 1604 GEN9_FREQ_SCALER); 1605 else if (IS_CHERRYVIEW(i915)) 1606 return chv_gpu_freq(rps, val); 1607 else if (IS_VALLEYVIEW(i915)) 1608 return byt_gpu_freq(rps, val); 1609 else if (GRAPHICS_VER(i915) >= 6) 1610 return val * GT_FREQUENCY_MULTIPLIER; 1611 else 1612 return val; 1613 } 1614 1615 int intel_freq_opcode(struct intel_rps *rps, int val) 1616 { 1617 struct drm_i915_private *i915 = rps_to_i915(rps); 1618 1619 if (GRAPHICS_VER(i915) >= 9) 1620 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, 1621 GT_FREQUENCY_MULTIPLIER); 1622 else if (IS_CHERRYVIEW(i915)) 1623 return chv_freq_opcode(rps, val); 1624 else if (IS_VALLEYVIEW(i915)) 1625 return byt_freq_opcode(rps, val); 1626 else if (GRAPHICS_VER(i915) >= 6) 1627 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); 1628 else 1629 return val; 1630 } 1631 1632 static void vlv_init_gpll_ref_freq(struct intel_rps *rps) 1633 { 1634 struct drm_i915_private *i915 = rps_to_i915(rps); 1635 1636 rps->gpll_ref_freq = 1637 vlv_get_cck_clock(i915, "GPLL ref", 1638 CCK_GPLL_CLOCK_CONTROL, 1639 i915->czclk_freq); 1640 1641 drm_dbg(&i915->drm, "GPLL reference freq: %d kHz\n", 1642 rps->gpll_ref_freq); 1643 } 1644 1645 static void vlv_rps_init(struct intel_rps *rps) 1646 { 1647 struct drm_i915_private *i915 = rps_to_i915(rps); 1648 u32 val; 1649 1650 vlv_iosf_sb_get(i915, 1651 BIT(VLV_IOSF_SB_PUNIT) | 1652 BIT(VLV_IOSF_SB_NC) | 1653 BIT(VLV_IOSF_SB_CCK)); 1654 1655 vlv_init_gpll_ref_freq(rps); 1656 1657 val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); 1658 switch ((val >> 6) & 3) { 1659 case 0: 1660 case 1: 1661 i915->mem_freq = 800; 1662 break; 1663 case 2: 1664 i915->mem_freq = 1066; 1665 break; 1666 case 3: 1667 i915->mem_freq = 1333; 1668 break; 1669 } 1670 drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq); 1671 1672 rps->max_freq = vlv_rps_max_freq(rps); 1673 rps->rp0_freq = rps->max_freq; 1674 drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n", 1675 intel_gpu_freq(rps, rps->max_freq), rps->max_freq); 1676 1677 rps->efficient_freq = vlv_rps_rpe_freq(rps); 1678 drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n", 1679 intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq); 1680 1681 rps->rp1_freq = vlv_rps_guar_freq(rps); 1682 drm_dbg(&i915->drm, "RP1(Guar Freq) GPU freq: %d MHz (%u)\n", 1683 intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq); 1684 1685 rps->min_freq = vlv_rps_min_freq(rps); 1686 drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n", 1687 intel_gpu_freq(rps, rps->min_freq), rps->min_freq); 1688 1689 vlv_iosf_sb_put(i915, 1690 BIT(VLV_IOSF_SB_PUNIT) | 1691 BIT(VLV_IOSF_SB_NC) | 1692 BIT(VLV_IOSF_SB_CCK)); 1693 } 1694 1695 static void chv_rps_init(struct intel_rps *rps) 1696 { 1697 struct drm_i915_private *i915 = rps_to_i915(rps); 1698 u32 val; 1699 1700 vlv_iosf_sb_get(i915, 1701 BIT(VLV_IOSF_SB_PUNIT) | 1702 BIT(VLV_IOSF_SB_NC) | 1703 BIT(VLV_IOSF_SB_CCK)); 1704 1705 vlv_init_gpll_ref_freq(rps); 1706 1707 val = vlv_cck_read(i915, CCK_FUSE_REG); 1708 1709 switch ((val >> 2) & 0x7) { 1710 case 3: 1711 i915->mem_freq = 2000; 1712 break; 1713 default: 1714 i915->mem_freq = 1600; 1715 break; 1716 } 1717 drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq); 1718 1719 rps->max_freq = chv_rps_max_freq(rps); 1720 rps->rp0_freq = rps->max_freq; 1721 drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n", 1722 intel_gpu_freq(rps, rps->max_freq), rps->max_freq); 1723 1724 rps->efficient_freq = chv_rps_rpe_freq(rps); 1725 drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n", 1726 intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq); 1727 1728 rps->rp1_freq = chv_rps_guar_freq(rps); 1729 drm_dbg(&i915->drm, "RP1(Guar) GPU freq: %d MHz (%u)\n", 1730 intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq); 1731 1732 rps->min_freq = chv_rps_min_freq(rps); 1733 drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n", 1734 intel_gpu_freq(rps, rps->min_freq), rps->min_freq); 1735 1736 vlv_iosf_sb_put(i915, 1737 BIT(VLV_IOSF_SB_PUNIT) | 1738 BIT(VLV_IOSF_SB_NC) | 1739 BIT(VLV_IOSF_SB_CCK)); 1740 1741 drm_WARN_ONCE(&i915->drm, (rps->max_freq | rps->efficient_freq | 1742 rps->rp1_freq | rps->min_freq) & 1, 1743 "Odd GPU freq values\n"); 1744 } 1745 1746 static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei) 1747 { 1748 ei->ktime = ktime_get_raw(); 1749 ei->render_c0 = intel_uncore_read(uncore, VLV_RENDER_C0_COUNT); 1750 ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT); 1751 } 1752 1753 static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir) 1754 { 1755 struct intel_uncore *uncore = rps_to_uncore(rps); 1756 const struct intel_rps_ei *prev = &rps->ei; 1757 struct intel_rps_ei now; 1758 u32 events = 0; 1759 1760 if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0) 1761 return 0; 1762 1763 vlv_c0_read(uncore, &now); 1764 1765 #ifdef __linux__ 1766 if (prev->ktime) { 1767 #else 1768 if (ktime_to_ns(prev->ktime)) { 1769 #endif 1770 u64 time, c0; 1771 u32 render, media; 1772 1773 time = ktime_us_delta(now.ktime, prev->ktime); 1774 1775 time *= rps_to_i915(rps)->czclk_freq; 1776 1777 /* Workload can be split between render + media, 1778 * e.g. SwapBuffers being blitted in X after being rendered in 1779 * mesa. To account for this we need to combine both engines 1780 * into our activity counter. 1781 */ 1782 render = now.render_c0 - prev->render_c0; 1783 media = now.media_c0 - prev->media_c0; 1784 c0 = max(render, media); 1785 c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */ 1786 1787 if (c0 > time * rps->power.up_threshold) 1788 events = GEN6_PM_RP_UP_THRESHOLD; 1789 else if (c0 < time * rps->power.down_threshold) 1790 events = GEN6_PM_RP_DOWN_THRESHOLD; 1791 } 1792 1793 rps->ei = now; 1794 return events; 1795 } 1796 1797 static void rps_work(struct work_struct *work) 1798 { 1799 struct intel_rps *rps = container_of(work, typeof(*rps), work); 1800 struct intel_gt *gt = rps_to_gt(rps); 1801 struct drm_i915_private *i915 = rps_to_i915(rps); 1802 bool client_boost = false; 1803 int new_freq, adj, min, max; 1804 u32 pm_iir = 0; 1805 1806 spin_lock_irq(gt->irq_lock); 1807 pm_iir = fetch_and_zero(&rps->pm_iir) & rps->pm_events; 1808 client_boost = atomic_read(&rps->num_waiters); 1809 spin_unlock_irq(gt->irq_lock); 1810 1811 /* Make sure we didn't queue anything we're not going to process. */ 1812 if (!pm_iir && !client_boost) 1813 goto out; 1814 1815 mutex_lock(&rps->lock); 1816 if (!intel_rps_is_active(rps)) { 1817 mutex_unlock(&rps->lock); 1818 return; 1819 } 1820 1821 pm_iir |= vlv_wa_c0_ei(rps, pm_iir); 1822 1823 adj = rps->last_adj; 1824 new_freq = rps->cur_freq; 1825 min = rps->min_freq_softlimit; 1826 max = rps->max_freq_softlimit; 1827 if (client_boost) 1828 max = rps->max_freq; 1829 1830 GT_TRACE(gt, 1831 "pm_iir:%x, client_boost:%s, last:%d, cur:%x, min:%x, max:%x\n", 1832 pm_iir, str_yes_no(client_boost), 1833 adj, new_freq, min, max); 1834 1835 if (client_boost && new_freq < rps->boost_freq) { 1836 new_freq = rps->boost_freq; 1837 adj = 0; 1838 } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { 1839 if (adj > 0) 1840 adj *= 2; 1841 else /* CHV needs even encode values */ 1842 adj = IS_CHERRYVIEW(gt->i915) ? 2 : 1; 1843 1844 if (new_freq >= rps->max_freq_softlimit) 1845 adj = 0; 1846 } else if (client_boost) { 1847 adj = 0; 1848 } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { 1849 if (rps->cur_freq > rps->efficient_freq) 1850 new_freq = rps->efficient_freq; 1851 else if (rps->cur_freq > rps->min_freq_softlimit) 1852 new_freq = rps->min_freq_softlimit; 1853 adj = 0; 1854 } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { 1855 if (adj < 0) 1856 adj *= 2; 1857 else /* CHV needs even encode values */ 1858 adj = IS_CHERRYVIEW(gt->i915) ? -2 : -1; 1859 1860 if (new_freq <= rps->min_freq_softlimit) 1861 adj = 0; 1862 } else { /* unknown event */ 1863 adj = 0; 1864 } 1865 1866 /* 1867 * sysfs frequency limits may have snuck in while 1868 * servicing the interrupt 1869 */ 1870 new_freq += adj; 1871 new_freq = clamp_t(int, new_freq, min, max); 1872 1873 if (intel_rps_set(rps, new_freq)) { 1874 drm_dbg(&i915->drm, "Failed to set new GPU frequency\n"); 1875 adj = 0; 1876 } 1877 rps->last_adj = adj; 1878 1879 mutex_unlock(&rps->lock); 1880 1881 out: 1882 spin_lock_irq(gt->irq_lock); 1883 gen6_gt_pm_unmask_irq(gt, rps->pm_events); 1884 spin_unlock_irq(gt->irq_lock); 1885 } 1886 1887 void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) 1888 { 1889 struct intel_gt *gt = rps_to_gt(rps); 1890 const u32 events = rps->pm_events & pm_iir; 1891 1892 lockdep_assert_held(gt->irq_lock); 1893 1894 if (unlikely(!events)) 1895 return; 1896 1897 GT_TRACE(gt, "irq events:%x\n", events); 1898 1899 gen6_gt_pm_mask_irq(gt, events); 1900 1901 rps->pm_iir |= events; 1902 schedule_work(&rps->work); 1903 } 1904 1905 void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) 1906 { 1907 struct intel_gt *gt = rps_to_gt(rps); 1908 u32 events; 1909 1910 events = pm_iir & rps->pm_events; 1911 if (events) { 1912 spin_lock(gt->irq_lock); 1913 1914 GT_TRACE(gt, "irq events:%x\n", events); 1915 1916 gen6_gt_pm_mask_irq(gt, events); 1917 rps->pm_iir |= events; 1918 1919 schedule_work(&rps->work); 1920 spin_unlock(gt->irq_lock); 1921 } 1922 1923 if (GRAPHICS_VER(gt->i915) >= 8) 1924 return; 1925 1926 if (pm_iir & PM_VEBOX_USER_INTERRUPT) 1927 intel_engine_cs_irq(gt->engine[VECS0], pm_iir >> 10); 1928 1929 if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) 1930 DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); 1931 } 1932 1933 void gen5_rps_irq_handler(struct intel_rps *rps) 1934 { 1935 struct intel_uncore *uncore = rps_to_uncore(rps); 1936 u32 busy_up, busy_down, max_avg, min_avg; 1937 u8 new_freq; 1938 1939 spin_lock(&mchdev_lock); 1940 1941 intel_uncore_write16(uncore, 1942 MEMINTRSTS, 1943 intel_uncore_read(uncore, MEMINTRSTS)); 1944 1945 intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); 1946 busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG); 1947 busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG); 1948 max_avg = intel_uncore_read(uncore, RCBMAXAVG); 1949 min_avg = intel_uncore_read(uncore, RCBMINAVG); 1950 1951 /* Handle RCS change request from hw */ 1952 new_freq = rps->cur_freq; 1953 if (busy_up > max_avg) 1954 new_freq++; 1955 else if (busy_down < min_avg) 1956 new_freq--; 1957 new_freq = clamp(new_freq, 1958 rps->min_freq_softlimit, 1959 rps->max_freq_softlimit); 1960 1961 if (new_freq != rps->cur_freq && !__gen5_rps_set(rps, new_freq)) 1962 rps->cur_freq = new_freq; 1963 1964 spin_unlock(&mchdev_lock); 1965 } 1966 1967 void intel_rps_init_early(struct intel_rps *rps) 1968 { 1969 rw_init(&rps->lock, "rpslk"); 1970 rw_init(&rps->power.mutex, "rpspwr"); 1971 1972 INIT_WORK(&rps->work, rps_work); 1973 #ifdef __linux__ 1974 timer_setup(&rps->timer, rps_timer, 0); 1975 #else 1976 timeout_set(&rps->timer, rps_timer, rps); 1977 #endif 1978 1979 atomic_set(&rps->num_waiters, 0); 1980 } 1981 1982 void intel_rps_init(struct intel_rps *rps) 1983 { 1984 struct drm_i915_private *i915 = rps_to_i915(rps); 1985 1986 if (rps_uses_slpc(rps)) 1987 return; 1988 1989 if (IS_CHERRYVIEW(i915)) 1990 chv_rps_init(rps); 1991 else if (IS_VALLEYVIEW(i915)) 1992 vlv_rps_init(rps); 1993 else if (GRAPHICS_VER(i915) >= 6) 1994 gen6_rps_init(rps); 1995 else if (IS_IRONLAKE_M(i915)) 1996 gen5_rps_init(rps); 1997 1998 /* Derive initial user preferences/limits from the hardware limits */ 1999 rps->max_freq_softlimit = rps->max_freq; 2000 rps_to_gt(rps)->defaults.max_freq = rps->max_freq_softlimit; 2001 rps->min_freq_softlimit = rps->min_freq; 2002 rps_to_gt(rps)->defaults.min_freq = rps->min_freq_softlimit; 2003 2004 /* After setting max-softlimit, find the overclock max freq */ 2005 if (GRAPHICS_VER(i915) == 6 || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) { 2006 u32 params = 0; 2007 2008 snb_pcode_read(rps_to_gt(rps)->uncore, GEN6_READ_OC_PARAMS, ¶ms, NULL); 2009 if (params & BIT(31)) { /* OC supported */ 2010 drm_dbg(&i915->drm, 2011 "Overclocking supported, max: %dMHz, overclock: %dMHz\n", 2012 (rps->max_freq & 0xff) * 50, 2013 (params & 0xff) * 50); 2014 rps->max_freq = params & 0xff; 2015 } 2016 } 2017 2018 /* Finally allow us to boost to max by default */ 2019 rps->boost_freq = rps->max_freq; 2020 rps->idle_freq = rps->min_freq; 2021 2022 /* Start in the middle, from here we will autotune based on workload */ 2023 rps->cur_freq = rps->efficient_freq; 2024 2025 rps->pm_intrmsk_mbz = 0; 2026 2027 /* 2028 * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer 2029 * if GEN6_PM_UP_EI_EXPIRED is masked. 2030 * 2031 * TODO: verify if this can be reproduced on VLV,CHV. 2032 */ 2033 if (GRAPHICS_VER(i915) <= 7) 2034 rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; 2035 2036 if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) < 11) 2037 rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; 2038 2039 /* GuC needs ARAT expired interrupt unmasked */ 2040 if (intel_uc_uses_guc_submission(&rps_to_gt(rps)->uc)) 2041 rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; 2042 } 2043 2044 void intel_rps_sanitize(struct intel_rps *rps) 2045 { 2046 if (rps_uses_slpc(rps)) 2047 return; 2048 2049 if (GRAPHICS_VER(rps_to_i915(rps)) >= 6) 2050 rps_disable_interrupts(rps); 2051 } 2052 2053 u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) 2054 { 2055 struct drm_i915_private *i915 = rps_to_i915(rps); 2056 u32 cagf; 2057 2058 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) 2059 cagf = (rpstat >> 8) & 0xff; 2060 else if (GRAPHICS_VER(i915) >= 9) 2061 cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; 2062 else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) 2063 cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; 2064 else if (GRAPHICS_VER(i915) >= 6) 2065 cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; 2066 else 2067 cagf = gen5_invert_freq(rps, (rpstat & MEMSTAT_PSTATE_MASK) >> 2068 MEMSTAT_PSTATE_SHIFT); 2069 2070 return cagf; 2071 } 2072 2073 static u32 read_cagf(struct intel_rps *rps) 2074 { 2075 struct drm_i915_private *i915 = rps_to_i915(rps); 2076 struct intel_uncore *uncore = rps_to_uncore(rps); 2077 u32 freq; 2078 2079 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { 2080 vlv_punit_get(i915); 2081 freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); 2082 vlv_punit_put(i915); 2083 } else if (GRAPHICS_VER(i915) >= 6) { 2084 freq = intel_uncore_read(uncore, GEN6_RPSTAT1); 2085 } else { 2086 freq = intel_uncore_read(uncore, MEMSTAT_ILK); 2087 } 2088 2089 return intel_rps_get_cagf(rps, freq); 2090 } 2091 2092 u32 intel_rps_read_actual_frequency(struct intel_rps *rps) 2093 { 2094 struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; 2095 intel_wakeref_t wakeref; 2096 u32 freq = 0; 2097 2098 with_intel_runtime_pm_if_in_use(rpm, wakeref) 2099 freq = intel_gpu_freq(rps, read_cagf(rps)); 2100 2101 return freq; 2102 } 2103 2104 u32 intel_rps_read_punit_req(struct intel_rps *rps) 2105 { 2106 struct intel_uncore *uncore = rps_to_uncore(rps); 2107 struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; 2108 intel_wakeref_t wakeref; 2109 u32 freq = 0; 2110 2111 with_intel_runtime_pm_if_in_use(rpm, wakeref) 2112 freq = intel_uncore_read(uncore, GEN6_RPNSWREQ); 2113 2114 return freq; 2115 } 2116 2117 static u32 intel_rps_get_req(u32 pureq) 2118 { 2119 u32 req = pureq >> GEN9_SW_REQ_UNSLICE_RATIO_SHIFT; 2120 2121 return req; 2122 } 2123 2124 u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps) 2125 { 2126 u32 freq = intel_rps_get_req(intel_rps_read_punit_req(rps)); 2127 2128 return intel_gpu_freq(rps, freq); 2129 } 2130 2131 u32 intel_rps_get_requested_frequency(struct intel_rps *rps) 2132 { 2133 if (rps_uses_slpc(rps)) 2134 return intel_rps_read_punit_req_frequency(rps); 2135 else 2136 return intel_gpu_freq(rps, rps->cur_freq); 2137 } 2138 2139 u32 intel_rps_get_max_frequency(struct intel_rps *rps) 2140 { 2141 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2142 2143 if (rps_uses_slpc(rps)) 2144 return slpc->max_freq_softlimit; 2145 else 2146 return intel_gpu_freq(rps, rps->max_freq_softlimit); 2147 } 2148 2149 /** 2150 * intel_rps_get_max_raw_freq - returns the max frequency in some raw format. 2151 * @rps: the intel_rps structure 2152 * 2153 * Returns the max frequency in a raw format. In newer platforms raw is in 2154 * units of 50 MHz. 2155 */ 2156 u32 intel_rps_get_max_raw_freq(struct intel_rps *rps) 2157 { 2158 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2159 u32 freq; 2160 2161 if (rps_uses_slpc(rps)) { 2162 return DIV_ROUND_CLOSEST(slpc->rp0_freq, 2163 GT_FREQUENCY_MULTIPLIER); 2164 } else { 2165 freq = rps->max_freq; 2166 if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) { 2167 /* Convert GT frequency to 50 MHz units */ 2168 freq /= GEN9_FREQ_SCALER; 2169 } 2170 return freq; 2171 } 2172 } 2173 2174 u32 intel_rps_get_rp0_frequency(struct intel_rps *rps) 2175 { 2176 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2177 2178 if (rps_uses_slpc(rps)) 2179 return slpc->rp0_freq; 2180 else 2181 return intel_gpu_freq(rps, rps->rp0_freq); 2182 } 2183 2184 u32 intel_rps_get_rp1_frequency(struct intel_rps *rps) 2185 { 2186 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2187 2188 if (rps_uses_slpc(rps)) 2189 return slpc->rp1_freq; 2190 else 2191 return intel_gpu_freq(rps, rps->rp1_freq); 2192 } 2193 2194 u32 intel_rps_get_rpn_frequency(struct intel_rps *rps) 2195 { 2196 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2197 2198 if (rps_uses_slpc(rps)) 2199 return slpc->min_freq; 2200 else 2201 return intel_gpu_freq(rps, rps->min_freq); 2202 } 2203 2204 static int set_max_freq(struct intel_rps *rps, u32 val) 2205 { 2206 struct drm_i915_private *i915 = rps_to_i915(rps); 2207 int ret = 0; 2208 2209 mutex_lock(&rps->lock); 2210 2211 val = intel_freq_opcode(rps, val); 2212 if (val < rps->min_freq || 2213 val > rps->max_freq || 2214 val < rps->min_freq_softlimit) { 2215 ret = -EINVAL; 2216 goto unlock; 2217 } 2218 2219 if (val > rps->rp0_freq) 2220 drm_dbg(&i915->drm, "User requested overclocking to %d\n", 2221 intel_gpu_freq(rps, val)); 2222 2223 rps->max_freq_softlimit = val; 2224 2225 val = clamp_t(int, rps->cur_freq, 2226 rps->min_freq_softlimit, 2227 rps->max_freq_softlimit); 2228 2229 /* 2230 * We still need *_set_rps to process the new max_delay and 2231 * update the interrupt limits and PMINTRMSK even though 2232 * frequency request may be unchanged. 2233 */ 2234 intel_rps_set(rps, val); 2235 2236 unlock: 2237 mutex_unlock(&rps->lock); 2238 2239 return ret; 2240 } 2241 2242 int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val) 2243 { 2244 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2245 2246 if (rps_uses_slpc(rps)) 2247 return intel_guc_slpc_set_max_freq(slpc, val); 2248 else 2249 return set_max_freq(rps, val); 2250 } 2251 2252 u32 intel_rps_get_min_frequency(struct intel_rps *rps) 2253 { 2254 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2255 2256 if (rps_uses_slpc(rps)) 2257 return slpc->min_freq_softlimit; 2258 else 2259 return intel_gpu_freq(rps, rps->min_freq_softlimit); 2260 } 2261 2262 /** 2263 * intel_rps_get_min_raw_freq - returns the min frequency in some raw format. 2264 * @rps: the intel_rps structure 2265 * 2266 * Returns the min frequency in a raw format. In newer platforms raw is in 2267 * units of 50 MHz. 2268 */ 2269 u32 intel_rps_get_min_raw_freq(struct intel_rps *rps) 2270 { 2271 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2272 u32 freq; 2273 2274 if (rps_uses_slpc(rps)) { 2275 return DIV_ROUND_CLOSEST(slpc->min_freq, 2276 GT_FREQUENCY_MULTIPLIER); 2277 } else { 2278 freq = rps->min_freq; 2279 if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) { 2280 /* Convert GT frequency to 50 MHz units */ 2281 freq /= GEN9_FREQ_SCALER; 2282 } 2283 return freq; 2284 } 2285 } 2286 2287 static int set_min_freq(struct intel_rps *rps, u32 val) 2288 { 2289 int ret = 0; 2290 2291 mutex_lock(&rps->lock); 2292 2293 val = intel_freq_opcode(rps, val); 2294 if (val < rps->min_freq || 2295 val > rps->max_freq || 2296 val > rps->max_freq_softlimit) { 2297 ret = -EINVAL; 2298 goto unlock; 2299 } 2300 2301 rps->min_freq_softlimit = val; 2302 2303 val = clamp_t(int, rps->cur_freq, 2304 rps->min_freq_softlimit, 2305 rps->max_freq_softlimit); 2306 2307 /* 2308 * We still need *_set_rps to process the new min_delay and 2309 * update the interrupt limits and PMINTRMSK even though 2310 * frequency request may be unchanged. 2311 */ 2312 intel_rps_set(rps, val); 2313 2314 unlock: 2315 mutex_unlock(&rps->lock); 2316 2317 return ret; 2318 } 2319 2320 int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val) 2321 { 2322 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2323 2324 if (rps_uses_slpc(rps)) 2325 return intel_guc_slpc_set_min_freq(slpc, val); 2326 else 2327 return set_min_freq(rps, val); 2328 } 2329 2330 static void intel_rps_set_manual(struct intel_rps *rps, bool enable) 2331 { 2332 struct intel_uncore *uncore = rps_to_uncore(rps); 2333 u32 state = enable ? GEN9_RPSWCTL_ENABLE : GEN9_RPSWCTL_DISABLE; 2334 2335 /* Allow punit to process software requests */ 2336 intel_uncore_write(uncore, GEN6_RP_CONTROL, state); 2337 } 2338 2339 void intel_rps_raise_unslice(struct intel_rps *rps) 2340 { 2341 struct intel_uncore *uncore = rps_to_uncore(rps); 2342 2343 mutex_lock(&rps->lock); 2344 2345 if (rps_uses_slpc(rps)) { 2346 /* RP limits have not been initialized yet for SLPC path */ 2347 struct intel_rps_freq_caps caps; 2348 2349 gen6_rps_get_freq_caps(rps, &caps); 2350 2351 intel_rps_set_manual(rps, true); 2352 intel_uncore_write(uncore, GEN6_RPNSWREQ, 2353 ((caps.rp0_freq << 2354 GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) | 2355 GEN9_IGNORE_SLICE_RATIO)); 2356 intel_rps_set_manual(rps, false); 2357 } else { 2358 intel_rps_set(rps, rps->rp0_freq); 2359 } 2360 2361 mutex_unlock(&rps->lock); 2362 } 2363 2364 void intel_rps_lower_unslice(struct intel_rps *rps) 2365 { 2366 struct intel_uncore *uncore = rps_to_uncore(rps); 2367 2368 mutex_lock(&rps->lock); 2369 2370 if (rps_uses_slpc(rps)) { 2371 /* RP limits have not been initialized yet for SLPC path */ 2372 struct intel_rps_freq_caps caps; 2373 2374 gen6_rps_get_freq_caps(rps, &caps); 2375 2376 intel_rps_set_manual(rps, true); 2377 intel_uncore_write(uncore, GEN6_RPNSWREQ, 2378 ((caps.min_freq << 2379 GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) | 2380 GEN9_IGNORE_SLICE_RATIO)); 2381 intel_rps_set_manual(rps, false); 2382 } else { 2383 intel_rps_set(rps, rps->min_freq); 2384 } 2385 2386 mutex_unlock(&rps->lock); 2387 } 2388 2389 static u32 rps_read_mmio(struct intel_rps *rps, i915_reg_t reg32) 2390 { 2391 struct intel_gt *gt = rps_to_gt(rps); 2392 intel_wakeref_t wakeref; 2393 u32 val; 2394 2395 with_intel_runtime_pm(gt->uncore->rpm, wakeref) 2396 val = intel_uncore_read(gt->uncore, reg32); 2397 2398 return val; 2399 } 2400 2401 bool rps_read_mask_mmio(struct intel_rps *rps, 2402 i915_reg_t reg32, u32 mask) 2403 { 2404 return rps_read_mmio(rps, reg32) & mask; 2405 } 2406 2407 /* External interface for intel_ips.ko */ 2408 2409 static struct drm_i915_private __rcu *ips_mchdev; 2410 2411 /** 2412 * Tells the intel_ips driver that the i915 driver is now loaded, if 2413 * IPS got loaded first. 2414 * 2415 * This awkward dance is so that neither module has to depend on the 2416 * other in order for IPS to do the appropriate communication of 2417 * GPU turbo limits to i915. 2418 */ 2419 static void 2420 ips_ping_for_i915_load(void) 2421 { 2422 #ifdef __linux__ 2423 void (*link)(void); 2424 2425 link = symbol_get(ips_link_to_i915_driver); 2426 if (link) { 2427 link(); 2428 symbol_put(ips_link_to_i915_driver); 2429 } 2430 #endif 2431 } 2432 2433 void intel_rps_driver_register(struct intel_rps *rps) 2434 { 2435 struct intel_gt *gt = rps_to_gt(rps); 2436 2437 /* 2438 * We only register the i915 ips part with intel-ips once everything is 2439 * set up, to avoid intel-ips sneaking in and reading bogus values. 2440 */ 2441 if (GRAPHICS_VER(gt->i915) == 5) { 2442 GEM_BUG_ON(ips_mchdev); 2443 rcu_assign_pointer(ips_mchdev, gt->i915); 2444 ips_ping_for_i915_load(); 2445 } 2446 } 2447 2448 void intel_rps_driver_unregister(struct intel_rps *rps) 2449 { 2450 if (rcu_access_pointer(ips_mchdev) == rps_to_i915(rps)) 2451 rcu_assign_pointer(ips_mchdev, NULL); 2452 } 2453 2454 static struct drm_i915_private *mchdev_get(void) 2455 { 2456 struct drm_i915_private *i915; 2457 2458 rcu_read_lock(); 2459 i915 = rcu_dereference(ips_mchdev); 2460 if (i915 && !kref_get_unless_zero(&i915->drm.ref)) 2461 i915 = NULL; 2462 rcu_read_unlock(); 2463 2464 return i915; 2465 } 2466 2467 /** 2468 * i915_read_mch_val - return value for IPS use 2469 * 2470 * Calculate and return a value for the IPS driver to use when deciding whether 2471 * we have thermal and power headroom to increase CPU or GPU power budget. 2472 */ 2473 unsigned long i915_read_mch_val(void) 2474 { 2475 struct drm_i915_private *i915; 2476 unsigned long chipset_val = 0; 2477 unsigned long graphics_val = 0; 2478 intel_wakeref_t wakeref; 2479 2480 i915 = mchdev_get(); 2481 if (!i915) 2482 return 0; 2483 2484 with_intel_runtime_pm(&i915->runtime_pm, wakeref) { 2485 struct intel_ips *ips = &to_gt(i915)->rps.ips; 2486 2487 spin_lock_irq(&mchdev_lock); 2488 chipset_val = __ips_chipset_val(ips); 2489 graphics_val = __ips_gfx_val(ips); 2490 spin_unlock_irq(&mchdev_lock); 2491 } 2492 2493 drm_dev_put(&i915->drm); 2494 return chipset_val + graphics_val; 2495 } 2496 EXPORT_SYMBOL_GPL(i915_read_mch_val); 2497 2498 /** 2499 * i915_gpu_raise - raise GPU frequency limit 2500 * 2501 * Raise the limit; IPS indicates we have thermal headroom. 2502 */ 2503 bool i915_gpu_raise(void) 2504 { 2505 struct drm_i915_private *i915; 2506 struct intel_rps *rps; 2507 2508 i915 = mchdev_get(); 2509 if (!i915) 2510 return false; 2511 2512 rps = &to_gt(i915)->rps; 2513 2514 spin_lock_irq(&mchdev_lock); 2515 if (rps->max_freq_softlimit < rps->max_freq) 2516 rps->max_freq_softlimit++; 2517 spin_unlock_irq(&mchdev_lock); 2518 2519 drm_dev_put(&i915->drm); 2520 return true; 2521 } 2522 EXPORT_SYMBOL_GPL(i915_gpu_raise); 2523 2524 /** 2525 * i915_gpu_lower - lower GPU frequency limit 2526 * 2527 * IPS indicates we're close to a thermal limit, so throttle back the GPU 2528 * frequency maximum. 2529 */ 2530 bool i915_gpu_lower(void) 2531 { 2532 struct drm_i915_private *i915; 2533 struct intel_rps *rps; 2534 2535 i915 = mchdev_get(); 2536 if (!i915) 2537 return false; 2538 2539 rps = &to_gt(i915)->rps; 2540 2541 spin_lock_irq(&mchdev_lock); 2542 if (rps->max_freq_softlimit > rps->min_freq) 2543 rps->max_freq_softlimit--; 2544 spin_unlock_irq(&mchdev_lock); 2545 2546 drm_dev_put(&i915->drm); 2547 return true; 2548 } 2549 EXPORT_SYMBOL_GPL(i915_gpu_lower); 2550 2551 /** 2552 * i915_gpu_busy - indicate GPU business to IPS 2553 * 2554 * Tell the IPS driver whether or not the GPU is busy. 2555 */ 2556 bool i915_gpu_busy(void) 2557 { 2558 struct drm_i915_private *i915; 2559 bool ret; 2560 2561 i915 = mchdev_get(); 2562 if (!i915) 2563 return false; 2564 2565 ret = to_gt(i915)->awake; 2566 2567 drm_dev_put(&i915->drm); 2568 return ret; 2569 } 2570 EXPORT_SYMBOL_GPL(i915_gpu_busy); 2571 2572 /** 2573 * i915_gpu_turbo_disable - disable graphics turbo 2574 * 2575 * Disable graphics turbo by resetting the max frequency and setting the 2576 * current frequency to the default. 2577 */ 2578 bool i915_gpu_turbo_disable(void) 2579 { 2580 struct drm_i915_private *i915; 2581 struct intel_rps *rps; 2582 bool ret; 2583 2584 i915 = mchdev_get(); 2585 if (!i915) 2586 return false; 2587 2588 rps = &to_gt(i915)->rps; 2589 2590 spin_lock_irq(&mchdev_lock); 2591 rps->max_freq_softlimit = rps->min_freq; 2592 ret = !__gen5_rps_set(&to_gt(i915)->rps, rps->min_freq); 2593 spin_unlock_irq(&mchdev_lock); 2594 2595 drm_dev_put(&i915->drm); 2596 return ret; 2597 } 2598 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); 2599 2600 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 2601 #include "selftest_rps.c" 2602 #include "selftest_slpc.c" 2603 #endif 2604