1 /* 2 * Copyright © 2012 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eugeni Dodonov <eugeni.dodonov@intel.com> 25 * 26 */ 27 28 #include <linux/cpufreq.h> 29 #include <drm/drm_plane_helper.h> 30 #include "i915_drv.h" 31 #include "intel_drv.h" 32 #include <linux/module.h> 33 #include <drm/drm_atomic_helper.h> 34 35 /** 36 * DOC: RC6 37 * 38 * RC6 is a special power stage which allows the GPU to enter an very 39 * low-voltage mode when idle, using down to 0V while at this stage. This 40 * stage is entered automatically when the GPU is idle when RC6 support is 41 * enabled, and as soon as new workload arises GPU wakes up automatically as well. 42 * 43 * There are different RC6 modes available in Intel GPU, which differentiate 44 * among each other with the latency required to enter and leave RC6 and 45 * voltage consumed by the GPU in different states. 46 * 47 * The combination of the following flags define which states GPU is allowed 48 * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and 49 * RC6pp is deepest RC6. Their support by hardware varies according to the 50 * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one 51 * which brings the most power savings; deeper states save more power, but 52 * require higher latency to switch to and wake up. 53 */ 54 #define INTEL_RC6_ENABLE (1<<0) 55 #define INTEL_RC6p_ENABLE (1<<1) 56 #define INTEL_RC6pp_ENABLE (1<<2) 57 58 static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) 59 { 60 if (HAS_LLC(dev_priv)) { 61 /* 62 * WaCompressedResourceDisplayNewHashMode:skl,kbl 63 * Display WA#0390: skl,kbl 64 * 65 * Must match Sampler, Pixel Back End, and Media. See 66 * WaCompressedResourceSamplerPbeMediaNewHashMode. 67 */ 68 I915_WRITE(CHICKEN_PAR1_1, 69 I915_READ(CHICKEN_PAR1_1) | 70 SKL_DE_COMPRESSED_HASH_MODE); 71 } 72 73 /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl,cfl */ 74 I915_WRITE(CHICKEN_PAR1_1, 75 I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP); 76 77 I915_WRITE(GEN8_CONFIG0, 78 I915_READ(GEN8_CONFIG0) | GEN9_DEFAULT_FIXES); 79 80 /* WaEnableChickenDCPR:skl,bxt,kbl,glk,cfl */ 81 I915_WRITE(GEN8_CHICKEN_DCPR_1, 82 I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM); 83 84 /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl,cfl */ 85 /* WaFbcWakeMemOn:skl,bxt,kbl,glk,cfl */ 86 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 87 DISP_FBC_WM_DIS | 88 DISP_FBC_MEMORY_WAKE); 89 90 /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl,cfl */ 91 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) | 92 ILK_DPFC_DISABLE_DUMMY0); 93 94 if (IS_SKYLAKE(dev_priv)) { 95 /* WaDisableDopClockGating */ 96 I915_WRITE(GEN7_MISCCPCTL, I915_READ(GEN7_MISCCPCTL) 97 & ~GEN7_DOP_CLOCK_GATE_ENABLE); 98 } 99 } 100 101 static void bxt_init_clock_gating(struct drm_i915_private *dev_priv) 102 { 103 gen9_init_clock_gating(dev_priv); 104 105 /* WaDisableSDEUnitClockGating:bxt */ 106 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 107 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 108 109 /* 110 * FIXME: 111 * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only. 112 */ 113 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 114 GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ); 115 116 /* 117 * Wa: Backlight PWM may stop in the asserted state, causing backlight 118 * to stay fully on. 119 */ 120 I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | 121 PWM1_GATING_DIS | PWM2_GATING_DIS); 122 } 123 124 static void glk_init_clock_gating(struct drm_i915_private *dev_priv) 125 { 126 gen9_init_clock_gating(dev_priv); 127 128 /* 129 * WaDisablePWMClockGating:glk 130 * Backlight PWM may stop in the asserted state, causing backlight 131 * to stay fully on. 132 */ 133 I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | 134 PWM1_GATING_DIS | PWM2_GATING_DIS); 135 136 /* WaDDIIOTimeout:glk */ 137 if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1)) { 138 u32 val = I915_READ(CHICKEN_MISC_2); 139 val &= ~(GLK_CL0_PWR_DOWN | 140 GLK_CL1_PWR_DOWN | 141 GLK_CL2_PWR_DOWN); 142 I915_WRITE(CHICKEN_MISC_2, val); 143 } 144 145 } 146 147 static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv) 148 { 149 u32 tmp; 150 151 tmp = I915_READ(CLKCFG); 152 153 switch (tmp & CLKCFG_FSB_MASK) { 154 case CLKCFG_FSB_533: 155 dev_priv->fsb_freq = 533; /* 133*4 */ 156 break; 157 case CLKCFG_FSB_800: 158 dev_priv->fsb_freq = 800; /* 200*4 */ 159 break; 160 case CLKCFG_FSB_667: 161 dev_priv->fsb_freq = 667; /* 167*4 */ 162 break; 163 case CLKCFG_FSB_400: 164 dev_priv->fsb_freq = 400; /* 100*4 */ 165 break; 166 } 167 168 switch (tmp & CLKCFG_MEM_MASK) { 169 case CLKCFG_MEM_533: 170 dev_priv->mem_freq = 533; 171 break; 172 case CLKCFG_MEM_667: 173 dev_priv->mem_freq = 667; 174 break; 175 case CLKCFG_MEM_800: 176 dev_priv->mem_freq = 800; 177 break; 178 } 179 180 /* detect pineview DDR3 setting */ 181 tmp = I915_READ(CSHRDDR3CTL); 182 dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0; 183 } 184 185 static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv) 186 { 187 u16 ddrpll, csipll; 188 189 ddrpll = I915_READ16(DDRMPLL1); 190 csipll = I915_READ16(CSIPLL0); 191 192 switch (ddrpll & 0xff) { 193 case 0xc: 194 dev_priv->mem_freq = 800; 195 break; 196 case 0x10: 197 dev_priv->mem_freq = 1066; 198 break; 199 case 0x14: 200 dev_priv->mem_freq = 1333; 201 break; 202 case 0x18: 203 dev_priv->mem_freq = 1600; 204 break; 205 default: 206 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n", 207 ddrpll & 0xff); 208 dev_priv->mem_freq = 0; 209 break; 210 } 211 212 dev_priv->ips.r_t = dev_priv->mem_freq; 213 214 switch (csipll & 0x3ff) { 215 case 0x00c: 216 dev_priv->fsb_freq = 3200; 217 break; 218 case 0x00e: 219 dev_priv->fsb_freq = 3733; 220 break; 221 case 0x010: 222 dev_priv->fsb_freq = 4266; 223 break; 224 case 0x012: 225 dev_priv->fsb_freq = 4800; 226 break; 227 case 0x014: 228 dev_priv->fsb_freq = 5333; 229 break; 230 case 0x016: 231 dev_priv->fsb_freq = 5866; 232 break; 233 case 0x018: 234 dev_priv->fsb_freq = 6400; 235 break; 236 default: 237 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n", 238 csipll & 0x3ff); 239 dev_priv->fsb_freq = 0; 240 break; 241 } 242 243 if (dev_priv->fsb_freq == 3200) { 244 dev_priv->ips.c_m = 0; 245 } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) { 246 dev_priv->ips.c_m = 1; 247 } else { 248 dev_priv->ips.c_m = 2; 249 } 250 } 251 252 static const struct cxsr_latency cxsr_latency_table[] = { 253 {1, 0, 800, 400, 3382, 33382, 3983, 33983}, /* DDR2-400 SC */ 254 {1, 0, 800, 667, 3354, 33354, 3807, 33807}, /* DDR2-667 SC */ 255 {1, 0, 800, 800, 3347, 33347, 3763, 33763}, /* DDR2-800 SC */ 256 {1, 1, 800, 667, 6420, 36420, 6873, 36873}, /* DDR3-667 SC */ 257 {1, 1, 800, 800, 5902, 35902, 6318, 36318}, /* DDR3-800 SC */ 258 259 {1, 0, 667, 400, 3400, 33400, 4021, 34021}, /* DDR2-400 SC */ 260 {1, 0, 667, 667, 3372, 33372, 3845, 33845}, /* DDR2-667 SC */ 261 {1, 0, 667, 800, 3386, 33386, 3822, 33822}, /* DDR2-800 SC */ 262 {1, 1, 667, 667, 6438, 36438, 6911, 36911}, /* DDR3-667 SC */ 263 {1, 1, 667, 800, 5941, 35941, 6377, 36377}, /* DDR3-800 SC */ 264 265 {1, 0, 400, 400, 3472, 33472, 4173, 34173}, /* DDR2-400 SC */ 266 {1, 0, 400, 667, 3443, 33443, 3996, 33996}, /* DDR2-667 SC */ 267 {1, 0, 400, 800, 3430, 33430, 3946, 33946}, /* DDR2-800 SC */ 268 {1, 1, 400, 667, 6509, 36509, 7062, 37062}, /* DDR3-667 SC */ 269 {1, 1, 400, 800, 5985, 35985, 6501, 36501}, /* DDR3-800 SC */ 270 271 {0, 0, 800, 400, 3438, 33438, 4065, 34065}, /* DDR2-400 SC */ 272 {0, 0, 800, 667, 3410, 33410, 3889, 33889}, /* DDR2-667 SC */ 273 {0, 0, 800, 800, 3403, 33403, 3845, 33845}, /* DDR2-800 SC */ 274 {0, 1, 800, 667, 6476, 36476, 6955, 36955}, /* DDR3-667 SC */ 275 {0, 1, 800, 800, 5958, 35958, 6400, 36400}, /* DDR3-800 SC */ 276 277 {0, 0, 667, 400, 3456, 33456, 4103, 34106}, /* DDR2-400 SC */ 278 {0, 0, 667, 667, 3428, 33428, 3927, 33927}, /* DDR2-667 SC */ 279 {0, 0, 667, 800, 3443, 33443, 3905, 33905}, /* DDR2-800 SC */ 280 {0, 1, 667, 667, 6494, 36494, 6993, 36993}, /* DDR3-667 SC */ 281 {0, 1, 667, 800, 5998, 35998, 6460, 36460}, /* DDR3-800 SC */ 282 283 {0, 0, 400, 400, 3528, 33528, 4255, 34255}, /* DDR2-400 SC */ 284 {0, 0, 400, 667, 3500, 33500, 4079, 34079}, /* DDR2-667 SC */ 285 {0, 0, 400, 800, 3487, 33487, 4029, 34029}, /* DDR2-800 SC */ 286 {0, 1, 400, 667, 6566, 36566, 7145, 37145}, /* DDR3-667 SC */ 287 {0, 1, 400, 800, 6042, 36042, 6584, 36584}, /* DDR3-800 SC */ 288 }; 289 290 static const struct cxsr_latency *intel_get_cxsr_latency(bool is_desktop, 291 bool is_ddr3, 292 int fsb, 293 int mem) 294 { 295 const struct cxsr_latency *latency; 296 int i; 297 298 if (fsb == 0 || mem == 0) 299 return NULL; 300 301 for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) { 302 latency = &cxsr_latency_table[i]; 303 if (is_desktop == latency->is_desktop && 304 is_ddr3 == latency->is_ddr3 && 305 fsb == latency->fsb_freq && mem == latency->mem_freq) 306 return latency; 307 } 308 309 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n"); 310 311 return NULL; 312 } 313 314 static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable) 315 { 316 u32 val; 317 318 mutex_lock(&dev_priv->pcu_lock); 319 320 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); 321 if (enable) 322 val &= ~FORCE_DDR_HIGH_FREQ; 323 else 324 val |= FORCE_DDR_HIGH_FREQ; 325 val &= ~FORCE_DDR_LOW_FREQ; 326 val |= FORCE_DDR_FREQ_REQ_ACK; 327 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val); 328 329 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) & 330 FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) 331 DRM_ERROR("timed out waiting for Punit DDR DVFS request\n"); 332 333 mutex_unlock(&dev_priv->pcu_lock); 334 } 335 336 static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) 337 { 338 u32 val; 339 340 mutex_lock(&dev_priv->pcu_lock); 341 342 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); 343 if (enable) 344 val |= DSP_MAXFIFO_PM5_ENABLE; 345 else 346 val &= ~DSP_MAXFIFO_PM5_ENABLE; 347 vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); 348 349 mutex_unlock(&dev_priv->pcu_lock); 350 } 351 352 #define FW_WM(value, plane) \ 353 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK) 354 355 static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable) 356 { 357 bool was_enabled; 358 u32 val; 359 360 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { 361 was_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN; 362 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0); 363 POSTING_READ(FW_BLC_SELF_VLV); 364 } else if (IS_G4X(dev_priv) || IS_I965GM(dev_priv)) { 365 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN; 366 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0); 367 POSTING_READ(FW_BLC_SELF); 368 } else if (IS_PINEVIEW(dev_priv)) { 369 val = I915_READ(DSPFW3); 370 was_enabled = val & PINEVIEW_SELF_REFRESH_EN; 371 if (enable) 372 val |= PINEVIEW_SELF_REFRESH_EN; 373 else 374 val &= ~PINEVIEW_SELF_REFRESH_EN; 375 I915_WRITE(DSPFW3, val); 376 POSTING_READ(DSPFW3); 377 } else if (IS_I945G(dev_priv) || IS_I945GM(dev_priv)) { 378 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN; 379 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) : 380 _MASKED_BIT_DISABLE(FW_BLC_SELF_EN); 381 I915_WRITE(FW_BLC_SELF, val); 382 POSTING_READ(FW_BLC_SELF); 383 } else if (IS_I915GM(dev_priv)) { 384 /* 385 * FIXME can't find a bit like this for 915G, and 386 * and yet it does have the related watermark in 387 * FW_BLC_SELF. What's going on? 388 */ 389 was_enabled = I915_READ(INSTPM) & INSTPM_SELF_EN; 390 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) : 391 _MASKED_BIT_DISABLE(INSTPM_SELF_EN); 392 I915_WRITE(INSTPM, val); 393 POSTING_READ(INSTPM); 394 } else { 395 return false; 396 } 397 398 trace_intel_memory_cxsr(dev_priv, was_enabled, enable); 399 400 DRM_DEBUG_KMS("memory self-refresh is %s (was %s)\n", 401 enableddisabled(enable), 402 enableddisabled(was_enabled)); 403 404 return was_enabled; 405 } 406 407 /** 408 * intel_set_memory_cxsr - Configure CxSR state 409 * @dev_priv: i915 device 410 * @enable: Allow vs. disallow CxSR 411 * 412 * Allow or disallow the system to enter a special CxSR 413 * (C-state self refresh) state. What typically happens in CxSR mode 414 * is that several display FIFOs may get combined into a single larger 415 * FIFO for a particular plane (so called max FIFO mode) to allow the 416 * system to defer memory fetches longer, and the memory will enter 417 * self refresh. 418 * 419 * Note that enabling CxSR does not guarantee that the system enter 420 * this special mode, nor does it guarantee that the system stays 421 * in that mode once entered. So this just allows/disallows the system 422 * to autonomously utilize the CxSR mode. Other factors such as core 423 * C-states will affect when/if the system actually enters/exits the 424 * CxSR mode. 425 * 426 * Note that on VLV/CHV this actually only controls the max FIFO mode, 427 * and the system is free to enter/exit memory self refresh at any time 428 * even when the use of CxSR has been disallowed. 429 * 430 * While the system is actually in the CxSR/max FIFO mode, some plane 431 * control registers will not get latched on vblank. Thus in order to 432 * guarantee the system will respond to changes in the plane registers 433 * we must always disallow CxSR prior to making changes to those registers. 434 * Unfortunately the system will re-evaluate the CxSR conditions at 435 * frame start which happens after vblank start (which is when the plane 436 * registers would get latched), so we can't proceed with the plane update 437 * during the same frame where we disallowed CxSR. 438 * 439 * Certain platforms also have a deeper HPLL SR mode. Fortunately the 440 * HPLL SR mode depends on CxSR itself, so we don't have to hand hold 441 * the hardware w.r.t. HPLL SR when writing to plane registers. 442 * Disallowing just CxSR is sufficient. 443 */ 444 bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable) 445 { 446 bool ret; 447 448 mutex_lock(&dev_priv->wm.wm_mutex); 449 ret = _intel_set_memory_cxsr(dev_priv, enable); 450 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) 451 dev_priv->wm.vlv.cxsr = enable; 452 else if (IS_G4X(dev_priv)) 453 dev_priv->wm.g4x.cxsr = enable; 454 mutex_unlock(&dev_priv->wm.wm_mutex); 455 456 return ret; 457 } 458 459 /* 460 * Latency for FIFO fetches is dependent on several factors: 461 * - memory configuration (speed, channels) 462 * - chipset 463 * - current MCH state 464 * It can be fairly high in some situations, so here we assume a fairly 465 * pessimal value. It's a tradeoff between extra memory fetches (if we 466 * set this value too high, the FIFO will fetch frequently to stay full) 467 * and power consumption (set it too low to save power and we might see 468 * FIFO underruns and display "flicker"). 469 * 470 * A value of 5us seems to be a good balance; safe for very low end 471 * platforms but not overly aggressive on lower latency configs. 472 */ 473 static const int pessimal_latency_ns = 5000; 474 475 #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \ 476 ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8)) 477 478 static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state) 479 { 480 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); 481 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); 482 struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; 483 enum i915_pipe pipe = crtc->pipe; 484 int sprite0_start, sprite1_start; 485 486 switch (pipe) { 487 uint32_t dsparb, dsparb2, dsparb3; 488 case PIPE_A: 489 dsparb = I915_READ(DSPARB); 490 dsparb2 = I915_READ(DSPARB2); 491 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0); 492 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4); 493 break; 494 case PIPE_B: 495 dsparb = I915_READ(DSPARB); 496 dsparb2 = I915_READ(DSPARB2); 497 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8); 498 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12); 499 break; 500 case PIPE_C: 501 dsparb2 = I915_READ(DSPARB2); 502 dsparb3 = I915_READ(DSPARB3); 503 sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16); 504 sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20); 505 break; 506 default: 507 MISSING_CASE(pipe); 508 return; 509 } 510 511 fifo_state->plane[PLANE_PRIMARY] = sprite0_start; 512 fifo_state->plane[PLANE_SPRITE0] = sprite1_start - sprite0_start; 513 fifo_state->plane[PLANE_SPRITE1] = 511 - sprite1_start; 514 fifo_state->plane[PLANE_CURSOR] = 63; 515 } 516 517 static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv, int plane) 518 { 519 uint32_t dsparb = I915_READ(DSPARB); 520 int size; 521 522 size = dsparb & 0x7f; 523 if (plane) 524 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size; 525 526 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, 527 plane ? "B" : "A", size); 528 529 return size; 530 } 531 532 static int i830_get_fifo_size(struct drm_i915_private *dev_priv, int plane) 533 { 534 uint32_t dsparb = I915_READ(DSPARB); 535 int size; 536 537 size = dsparb & 0x1ff; 538 if (plane) 539 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size; 540 size >>= 1; /* Convert to cachelines */ 541 542 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, 543 plane ? "B" : "A", size); 544 545 return size; 546 } 547 548 static int i845_get_fifo_size(struct drm_i915_private *dev_priv, int plane) 549 { 550 uint32_t dsparb = I915_READ(DSPARB); 551 int size; 552 553 size = dsparb & 0x7f; 554 size >>= 2; /* Convert to cachelines */ 555 556 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, 557 plane ? "B" : "A", 558 size); 559 560 return size; 561 } 562 563 /* Pineview has different values for various configs */ 564 static const struct intel_watermark_params pineview_display_wm = { 565 .fifo_size = PINEVIEW_DISPLAY_FIFO, 566 .max_wm = PINEVIEW_MAX_WM, 567 .default_wm = PINEVIEW_DFT_WM, 568 .guard_size = PINEVIEW_GUARD_WM, 569 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 570 }; 571 static const struct intel_watermark_params pineview_display_hplloff_wm = { 572 .fifo_size = PINEVIEW_DISPLAY_FIFO, 573 .max_wm = PINEVIEW_MAX_WM, 574 .default_wm = PINEVIEW_DFT_HPLLOFF_WM, 575 .guard_size = PINEVIEW_GUARD_WM, 576 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 577 }; 578 static const struct intel_watermark_params pineview_cursor_wm = { 579 .fifo_size = PINEVIEW_CURSOR_FIFO, 580 .max_wm = PINEVIEW_CURSOR_MAX_WM, 581 .default_wm = PINEVIEW_CURSOR_DFT_WM, 582 .guard_size = PINEVIEW_CURSOR_GUARD_WM, 583 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 584 }; 585 static const struct intel_watermark_params pineview_cursor_hplloff_wm = { 586 .fifo_size = PINEVIEW_CURSOR_FIFO, 587 .max_wm = PINEVIEW_CURSOR_MAX_WM, 588 .default_wm = PINEVIEW_CURSOR_DFT_WM, 589 .guard_size = PINEVIEW_CURSOR_GUARD_WM, 590 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 591 }; 592 static const struct intel_watermark_params i965_cursor_wm_info = { 593 .fifo_size = I965_CURSOR_FIFO, 594 .max_wm = I965_CURSOR_MAX_WM, 595 .default_wm = I965_CURSOR_DFT_WM, 596 .guard_size = 2, 597 .cacheline_size = I915_FIFO_LINE_SIZE, 598 }; 599 static const struct intel_watermark_params i945_wm_info = { 600 .fifo_size = I945_FIFO_SIZE, 601 .max_wm = I915_MAX_WM, 602 .default_wm = 1, 603 .guard_size = 2, 604 .cacheline_size = I915_FIFO_LINE_SIZE, 605 }; 606 static const struct intel_watermark_params i915_wm_info = { 607 .fifo_size = I915_FIFO_SIZE, 608 .max_wm = I915_MAX_WM, 609 .default_wm = 1, 610 .guard_size = 2, 611 .cacheline_size = I915_FIFO_LINE_SIZE, 612 }; 613 static const struct intel_watermark_params i830_a_wm_info = { 614 .fifo_size = I855GM_FIFO_SIZE, 615 .max_wm = I915_MAX_WM, 616 .default_wm = 1, 617 .guard_size = 2, 618 .cacheline_size = I830_FIFO_LINE_SIZE, 619 }; 620 static const struct intel_watermark_params i830_bc_wm_info = { 621 .fifo_size = I855GM_FIFO_SIZE, 622 .max_wm = I915_MAX_WM/2, 623 .default_wm = 1, 624 .guard_size = 2, 625 .cacheline_size = I830_FIFO_LINE_SIZE, 626 }; 627 static const struct intel_watermark_params i845_wm_info = { 628 .fifo_size = I830_FIFO_SIZE, 629 .max_wm = I915_MAX_WM, 630 .default_wm = 1, 631 .guard_size = 2, 632 .cacheline_size = I830_FIFO_LINE_SIZE, 633 }; 634 635 /** 636 * intel_wm_method1 - Method 1 / "small buffer" watermark formula 637 * @pixel_rate: Pipe pixel rate in kHz 638 * @cpp: Plane bytes per pixel 639 * @latency: Memory wakeup latency in 0.1us units 640 * 641 * Compute the watermark using the method 1 or "small buffer" 642 * formula. The caller may additonally add extra cachelines 643 * to account for TLB misses and clock crossings. 644 * 645 * This method is concerned with the short term drain rate 646 * of the FIFO, ie. it does not account for blanking periods 647 * which would effectively reduce the average drain rate across 648 * a longer period. The name "small" refers to the fact the 649 * FIFO is relatively small compared to the amount of data 650 * fetched. 651 * 652 * The FIFO level vs. time graph might look something like: 653 * 654 * |\ |\ 655 * | \ | \ 656 * __---__---__ (- plane active, _ blanking) 657 * -> time 658 * 659 * or perhaps like this: 660 * 661 * |\|\ |\|\ 662 * __----__----__ (- plane active, _ blanking) 663 * -> time 664 * 665 * Returns: 666 * The watermark in bytes 667 */ 668 static unsigned int intel_wm_method1(unsigned int pixel_rate, 669 unsigned int cpp, 670 unsigned int latency) 671 { 672 uint64_t ret; 673 674 ret = (uint64_t) pixel_rate * cpp * latency; 675 ret = DIV_ROUND_UP_ULL(ret, 10000); 676 677 return ret; 678 } 679 680 /** 681 * intel_wm_method2 - Method 2 / "large buffer" watermark formula 682 * @pixel_rate: Pipe pixel rate in kHz 683 * @htotal: Pipe horizontal total 684 * @width: Plane width in pixels 685 * @cpp: Plane bytes per pixel 686 * @latency: Memory wakeup latency in 0.1us units 687 * 688 * Compute the watermark using the method 2 or "large buffer" 689 * formula. The caller may additonally add extra cachelines 690 * to account for TLB misses and clock crossings. 691 * 692 * This method is concerned with the long term drain rate 693 * of the FIFO, ie. it does account for blanking periods 694 * which effectively reduce the average drain rate across 695 * a longer period. The name "large" refers to the fact the 696 * FIFO is relatively large compared to the amount of data 697 * fetched. 698 * 699 * The FIFO level vs. time graph might look something like: 700 * 701 * |\___ |\___ 702 * | \___ | \___ 703 * | \ | \ 704 * __ --__--__--__--__--__--__ (- plane active, _ blanking) 705 * -> time 706 * 707 * Returns: 708 * The watermark in bytes 709 */ 710 static unsigned int intel_wm_method2(unsigned int pixel_rate, 711 unsigned int htotal, 712 unsigned int width, 713 unsigned int cpp, 714 unsigned int latency) 715 { 716 unsigned int ret; 717 718 /* 719 * FIXME remove once all users are computing 720 * watermarks in the correct place. 721 */ 722 if (WARN_ON_ONCE(htotal == 0)) 723 htotal = 1; 724 725 ret = (latency * pixel_rate) / (htotal * 10000); 726 ret = (ret + 1) * width * cpp; 727 728 return ret; 729 } 730 731 /** 732 * intel_calculate_wm - calculate watermark level 733 * @pixel_rate: pixel clock 734 * @wm: chip FIFO params 735 * @cpp: bytes per pixel 736 * @latency_ns: memory latency for the platform 737 * 738 * Calculate the watermark level (the level at which the display plane will 739 * start fetching from memory again). Each chip has a different display 740 * FIFO size and allocation, so the caller needs to figure that out and pass 741 * in the correct intel_watermark_params structure. 742 * 743 * As the pixel clock runs, the FIFO will be drained at a rate that depends 744 * on the pixel size. When it reaches the watermark level, it'll start 745 * fetching FIFO line sized based chunks from memory until the FIFO fills 746 * past the watermark point. If the FIFO drains completely, a FIFO underrun 747 * will occur, and a display engine hang could result. 748 */ 749 static unsigned int intel_calculate_wm(int pixel_rate, 750 const struct intel_watermark_params *wm, 751 int fifo_size, int cpp, 752 unsigned int latency_ns) 753 { 754 int entries, wm_size; 755 756 /* 757 * Note: we need to make sure we don't overflow for various clock & 758 * latency values. 759 * clocks go from a few thousand to several hundred thousand. 760 * latency is usually a few thousand 761 */ 762 entries = intel_wm_method1(pixel_rate, cpp, 763 latency_ns / 100); 764 entries = DIV_ROUND_UP(entries, wm->cacheline_size) + 765 wm->guard_size; 766 DRM_DEBUG_KMS("FIFO entries required for mode: %d\n", entries); 767 768 wm_size = fifo_size - entries; 769 DRM_DEBUG_KMS("FIFO watermark level: %d\n", wm_size); 770 771 /* Don't promote wm_size to unsigned... */ 772 if (wm_size > wm->max_wm) 773 wm_size = wm->max_wm; 774 if (wm_size <= 0) 775 wm_size = wm->default_wm; 776 777 /* 778 * Bspec seems to indicate that the value shouldn't be lower than 779 * 'burst size + 1'. Certainly 830 is quite unhappy with low values. 780 * Lets go for 8 which is the burst size since certain platforms 781 * already use a hardcoded 8 (which is what the spec says should be 782 * done). 783 */ 784 if (wm_size <= 8) 785 wm_size = 8; 786 787 return wm_size; 788 } 789 790 static bool is_disabling(int old, int new, int threshold) 791 { 792 return old >= threshold && new < threshold; 793 } 794 795 static bool is_enabling(int old, int new, int threshold) 796 { 797 return old < threshold && new >= threshold; 798 } 799 800 static int intel_wm_num_levels(struct drm_i915_private *dev_priv) 801 { 802 return dev_priv->wm.max_level + 1; 803 } 804 805 static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state, 806 const struct intel_plane_state *plane_state) 807 { 808 struct intel_plane *plane = to_intel_plane(plane_state->base.plane); 809 810 /* FIXME check the 'enable' instead */ 811 if (!crtc_state->base.active) 812 return false; 813 814 /* 815 * Treat cursor with fb as always visible since cursor updates 816 * can happen faster than the vrefresh rate, and the current 817 * watermark code doesn't handle that correctly. Cursor updates 818 * which set/clear the fb or change the cursor size are going 819 * to get throttled by intel_legacy_cursor_update() to work 820 * around this problem with the watermark code. 821 */ 822 if (plane->id == PLANE_CURSOR) 823 return plane_state->base.fb != NULL; 824 else 825 return plane_state->base.visible; 826 } 827 828 static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv) 829 { 830 struct intel_crtc *crtc, *enabled = NULL; 831 832 for_each_intel_crtc(&dev_priv->drm, crtc) { 833 if (intel_crtc_active(crtc)) { 834 if (enabled) 835 return NULL; 836 enabled = crtc; 837 } 838 } 839 840 return enabled; 841 } 842 843 static void pineview_update_wm(struct intel_crtc *unused_crtc) 844 { 845 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev); 846 struct intel_crtc *crtc; 847 const struct cxsr_latency *latency; 848 u32 reg; 849 unsigned int wm; 850 851 latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv), 852 dev_priv->is_ddr3, 853 dev_priv->fsb_freq, 854 dev_priv->mem_freq); 855 if (!latency) { 856 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n"); 857 intel_set_memory_cxsr(dev_priv, false); 858 return; 859 } 860 861 crtc = single_enabled_crtc(dev_priv); 862 if (crtc) { 863 const struct drm_display_mode *adjusted_mode = 864 &crtc->config->base.adjusted_mode; 865 const struct drm_framebuffer *fb = 866 crtc->base.primary->state->fb; 867 int cpp = fb->format->cpp[0]; 868 int clock = adjusted_mode->crtc_clock; 869 870 /* Display SR */ 871 wm = intel_calculate_wm(clock, &pineview_display_wm, 872 pineview_display_wm.fifo_size, 873 cpp, latency->display_sr); 874 reg = I915_READ(DSPFW1); 875 reg &= ~DSPFW_SR_MASK; 876 reg |= FW_WM(wm, SR); 877 I915_WRITE(DSPFW1, reg); 878 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg); 879 880 /* cursor SR */ 881 wm = intel_calculate_wm(clock, &pineview_cursor_wm, 882 pineview_display_wm.fifo_size, 883 4, latency->cursor_sr); 884 reg = I915_READ(DSPFW3); 885 reg &= ~DSPFW_CURSOR_SR_MASK; 886 reg |= FW_WM(wm, CURSOR_SR); 887 I915_WRITE(DSPFW3, reg); 888 889 /* Display HPLL off SR */ 890 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm, 891 pineview_display_hplloff_wm.fifo_size, 892 cpp, latency->display_hpll_disable); 893 reg = I915_READ(DSPFW3); 894 reg &= ~DSPFW_HPLL_SR_MASK; 895 reg |= FW_WM(wm, HPLL_SR); 896 I915_WRITE(DSPFW3, reg); 897 898 /* cursor HPLL off SR */ 899 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm, 900 pineview_display_hplloff_wm.fifo_size, 901 4, latency->cursor_hpll_disable); 902 reg = I915_READ(DSPFW3); 903 reg &= ~DSPFW_HPLL_CURSOR_MASK; 904 reg |= FW_WM(wm, HPLL_CURSOR); 905 I915_WRITE(DSPFW3, reg); 906 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg); 907 908 intel_set_memory_cxsr(dev_priv, true); 909 } else { 910 intel_set_memory_cxsr(dev_priv, false); 911 } 912 } 913 914 /* 915 * Documentation says: 916 * "If the line size is small, the TLB fetches can get in the way of the 917 * data fetches, causing some lag in the pixel data return which is not 918 * accounted for in the above formulas. The following adjustment only 919 * needs to be applied if eight whole lines fit in the buffer at once. 920 * The WM is adjusted upwards by the difference between the FIFO size 921 * and the size of 8 whole lines. This adjustment is always performed 922 * in the actual pixel depth regardless of whether FBC is enabled or not." 923 */ 924 static int g4x_tlb_miss_wa(int fifo_size, int width, int cpp) 925 { 926 int tlb_miss = fifo_size * 64 - width * cpp * 8; 927 928 return max(0, tlb_miss); 929 } 930 931 static void g4x_write_wm_values(struct drm_i915_private *dev_priv, 932 const struct g4x_wm_values *wm) 933 { 934 enum i915_pipe pipe; 935 936 for_each_pipe(dev_priv, pipe) 937 trace_g4x_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm); 938 939 I915_WRITE(DSPFW1, 940 FW_WM(wm->sr.plane, SR) | 941 FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) | 942 FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) | 943 FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA)); 944 I915_WRITE(DSPFW2, 945 (wm->fbc_en ? DSPFW_FBC_SR_EN : 0) | 946 FW_WM(wm->sr.fbc, FBC_SR) | 947 FW_WM(wm->hpll.fbc, FBC_HPLL_SR) | 948 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEB) | 949 FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) | 950 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA)); 951 I915_WRITE(DSPFW3, 952 (wm->hpll_en ? DSPFW_HPLL_SR_EN : 0) | 953 FW_WM(wm->sr.cursor, CURSOR_SR) | 954 FW_WM(wm->hpll.cursor, HPLL_CURSOR) | 955 FW_WM(wm->hpll.plane, HPLL_SR)); 956 957 POSTING_READ(DSPFW1); 958 } 959 960 #define FW_WM_VLV(value, plane) \ 961 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV) 962 963 static void vlv_write_wm_values(struct drm_i915_private *dev_priv, 964 const struct vlv_wm_values *wm) 965 { 966 enum i915_pipe pipe; 967 968 for_each_pipe(dev_priv, pipe) { 969 trace_vlv_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm); 970 971 I915_WRITE(VLV_DDL(pipe), 972 (wm->ddl[pipe].plane[PLANE_CURSOR] << DDL_CURSOR_SHIFT) | 973 (wm->ddl[pipe].plane[PLANE_SPRITE1] << DDL_SPRITE_SHIFT(1)) | 974 (wm->ddl[pipe].plane[PLANE_SPRITE0] << DDL_SPRITE_SHIFT(0)) | 975 (wm->ddl[pipe].plane[PLANE_PRIMARY] << DDL_PLANE_SHIFT)); 976 } 977 978 /* 979 * Zero the (unused) WM1 watermarks, and also clear all the 980 * high order bits so that there are no out of bounds values 981 * present in the registers during the reprogramming. 982 */ 983 I915_WRITE(DSPHOWM, 0); 984 I915_WRITE(DSPHOWM1, 0); 985 I915_WRITE(DSPFW4, 0); 986 I915_WRITE(DSPFW5, 0); 987 I915_WRITE(DSPFW6, 0); 988 989 I915_WRITE(DSPFW1, 990 FW_WM(wm->sr.plane, SR) | 991 FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) | 992 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) | 993 FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA)); 994 I915_WRITE(DSPFW2, 995 FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE1], SPRITEB) | 996 FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) | 997 FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA)); 998 I915_WRITE(DSPFW3, 999 FW_WM(wm->sr.cursor, CURSOR_SR)); 1000 1001 if (IS_CHERRYVIEW(dev_priv)) { 1002 I915_WRITE(DSPFW7_CHV, 1003 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) | 1004 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC)); 1005 I915_WRITE(DSPFW8_CHV, 1006 FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE1], SPRITEF) | 1007 FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE0], SPRITEE)); 1008 I915_WRITE(DSPFW9_CHV, 1009 FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_PRIMARY], PLANEC) | 1010 FW_WM(wm->pipe[PIPE_C].plane[PLANE_CURSOR], CURSORC)); 1011 I915_WRITE(DSPHOWM, 1012 FW_WM(wm->sr.plane >> 9, SR_HI) | 1013 FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE1] >> 8, SPRITEF_HI) | 1014 FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE0] >> 8, SPRITEE_HI) | 1015 FW_WM(wm->pipe[PIPE_C].plane[PLANE_PRIMARY] >> 8, PLANEC_HI) | 1016 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) | 1017 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) | 1018 FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) | 1019 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) | 1020 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) | 1021 FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI)); 1022 } else { 1023 I915_WRITE(DSPFW7, 1024 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) | 1025 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC)); 1026 I915_WRITE(DSPHOWM, 1027 FW_WM(wm->sr.plane >> 9, SR_HI) | 1028 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) | 1029 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) | 1030 FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) | 1031 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) | 1032 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) | 1033 FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI)); 1034 } 1035 1036 POSTING_READ(DSPFW1); 1037 } 1038 1039 #undef FW_WM_VLV 1040 1041 static void g4x_setup_wm_latency(struct drm_i915_private *dev_priv) 1042 { 1043 /* all latencies in usec */ 1044 dev_priv->wm.pri_latency[G4X_WM_LEVEL_NORMAL] = 5; 1045 dev_priv->wm.pri_latency[G4X_WM_LEVEL_SR] = 12; 1046 dev_priv->wm.pri_latency[G4X_WM_LEVEL_HPLL] = 35; 1047 1048 dev_priv->wm.max_level = G4X_WM_LEVEL_HPLL; 1049 } 1050 1051 static int g4x_plane_fifo_size(enum plane_id plane_id, int level) 1052 { 1053 /* 1054 * DSPCNTR[13] supposedly controls whether the 1055 * primary plane can use the FIFO space otherwise 1056 * reserved for the sprite plane. It's not 100% clear 1057 * what the actual FIFO size is, but it looks like we 1058 * can happily set both primary and sprite watermarks 1059 * up to 127 cachelines. So that would seem to mean 1060 * that either DSPCNTR[13] doesn't do anything, or that 1061 * the total FIFO is >= 256 cachelines in size. Either 1062 * way, we don't seem to have to worry about this 1063 * repartitioning as the maximum watermark value the 1064 * register can hold for each plane is lower than the 1065 * minimum FIFO size. 1066 */ 1067 switch (plane_id) { 1068 case PLANE_CURSOR: 1069 return 63; 1070 case PLANE_PRIMARY: 1071 return level == G4X_WM_LEVEL_NORMAL ? 127 : 511; 1072 case PLANE_SPRITE0: 1073 return level == G4X_WM_LEVEL_NORMAL ? 127 : 0; 1074 default: 1075 MISSING_CASE(plane_id); 1076 return 0; 1077 } 1078 } 1079 1080 static int g4x_fbc_fifo_size(int level) 1081 { 1082 switch (level) { 1083 case G4X_WM_LEVEL_SR: 1084 return 7; 1085 case G4X_WM_LEVEL_HPLL: 1086 return 15; 1087 default: 1088 MISSING_CASE(level); 1089 return 0; 1090 } 1091 } 1092 1093 static uint16_t g4x_compute_wm(const struct intel_crtc_state *crtc_state, 1094 const struct intel_plane_state *plane_state, 1095 int level) 1096 { 1097 struct intel_plane *plane = to_intel_plane(plane_state->base.plane); 1098 struct drm_i915_private *dev_priv = to_i915(plane->base.dev); 1099 const struct drm_display_mode *adjusted_mode = 1100 &crtc_state->base.adjusted_mode; 1101 int clock, htotal, cpp, width, wm; 1102 int latency = dev_priv->wm.pri_latency[level] * 10; 1103 1104 if (latency == 0) 1105 return USHRT_MAX; 1106 1107 if (!intel_wm_plane_visible(crtc_state, plane_state)) 1108 return 0; 1109 1110 /* 1111 * Not 100% sure which way ELK should go here as the 1112 * spec only says CL/CTG should assume 32bpp and BW 1113 * doesn't need to. But as these things followed the 1114 * mobile vs. desktop lines on gen3 as well, let's 1115 * assume ELK doesn't need this. 1116 * 1117 * The spec also fails to list such a restriction for 1118 * the HPLL watermark, which seems a little strange. 1119 * Let's use 32bpp for the HPLL watermark as well. 1120 */ 1121 if (IS_GM45(dev_priv) && plane->id == PLANE_PRIMARY && 1122 level != G4X_WM_LEVEL_NORMAL) 1123 cpp = 4; 1124 else 1125 cpp = plane_state->base.fb->format->cpp[0]; 1126 1127 clock = adjusted_mode->crtc_clock; 1128 htotal = adjusted_mode->crtc_htotal; 1129 1130 if (plane->id == PLANE_CURSOR) 1131 width = plane_state->base.crtc_w; 1132 else 1133 width = drm_rect_width(&plane_state->base.dst); 1134 1135 if (plane->id == PLANE_CURSOR) { 1136 wm = intel_wm_method2(clock, htotal, width, cpp, latency); 1137 } else if (plane->id == PLANE_PRIMARY && 1138 level == G4X_WM_LEVEL_NORMAL) { 1139 wm = intel_wm_method1(clock, cpp, latency); 1140 } else { 1141 int small, large; 1142 1143 small = intel_wm_method1(clock, cpp, latency); 1144 large = intel_wm_method2(clock, htotal, width, cpp, latency); 1145 1146 wm = min(small, large); 1147 } 1148 1149 wm += g4x_tlb_miss_wa(g4x_plane_fifo_size(plane->id, level), 1150 width, cpp); 1151 1152 wm = DIV_ROUND_UP(wm, 64) + 2; 1153 1154 return min_t(int, wm, USHRT_MAX); 1155 } 1156 1157 static bool g4x_raw_plane_wm_set(struct intel_crtc_state *crtc_state, 1158 int level, enum plane_id plane_id, u16 value) 1159 { 1160 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); 1161 bool dirty = false; 1162 1163 for (; level < intel_wm_num_levels(dev_priv); level++) { 1164 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level]; 1165 1166 dirty |= raw->plane[plane_id] != value; 1167 raw->plane[plane_id] = value; 1168 } 1169 1170 return dirty; 1171 } 1172 1173 static bool g4x_raw_fbc_wm_set(struct intel_crtc_state *crtc_state, 1174 int level, u16 value) 1175 { 1176 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); 1177 bool dirty = false; 1178 1179 /* NORMAL level doesn't have an FBC watermark */ 1180 level = max(level, G4X_WM_LEVEL_SR); 1181 1182 for (; level < intel_wm_num_levels(dev_priv); level++) { 1183 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level]; 1184 1185 dirty |= raw->fbc != value; 1186 raw->fbc = value; 1187 } 1188 1189 return dirty; 1190 } 1191 1192 static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate, 1193 const struct intel_plane_state *pstate, 1194 uint32_t pri_val); 1195 1196 static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state, 1197 const struct intel_plane_state *plane_state) 1198 { 1199 struct intel_plane *plane = to_intel_plane(plane_state->base.plane); 1200 int num_levels = intel_wm_num_levels(to_i915(plane->base.dev)); 1201 enum plane_id plane_id = plane->id; 1202 bool dirty = false; 1203 int level; 1204 1205 if (!intel_wm_plane_visible(crtc_state, plane_state)) { 1206 dirty |= g4x_raw_plane_wm_set(crtc_state, 0, plane_id, 0); 1207 if (plane_id == PLANE_PRIMARY) 1208 dirty |= g4x_raw_fbc_wm_set(crtc_state, 0, 0); 1209 goto out; 1210 } 1211 1212 for (level = 0; level < num_levels; level++) { 1213 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level]; 1214 int wm, max_wm; 1215 1216 wm = g4x_compute_wm(crtc_state, plane_state, level); 1217 max_wm = g4x_plane_fifo_size(plane_id, level); 1218 1219 if (wm > max_wm) 1220 break; 1221 1222 dirty |= raw->plane[plane_id] != wm; 1223 raw->plane[plane_id] = wm; 1224 1225 if (plane_id != PLANE_PRIMARY || 1226 level == G4X_WM_LEVEL_NORMAL) 1227 continue; 1228 1229 wm = ilk_compute_fbc_wm(crtc_state, plane_state, 1230 raw->plane[plane_id]); 1231 max_wm = g4x_fbc_fifo_size(level); 1232 1233 /* 1234 * FBC wm is not mandatory as we 1235 * can always just disable its use. 1236 */ 1237 if (wm > max_wm) 1238 wm = USHRT_MAX; 1239 1240 dirty |= raw->fbc != wm; 1241 raw->fbc = wm; 1242 } 1243 1244 /* mark watermarks as invalid */ 1245 dirty |= g4x_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX); 1246 1247 if (plane_id == PLANE_PRIMARY) 1248 dirty |= g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX); 1249 1250 out: 1251 if (dirty) { 1252 DRM_DEBUG_KMS("%s watermarks: normal=%d, SR=%d, HPLL=%d\n", 1253 plane->base.name, 1254 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_NORMAL].plane[plane_id], 1255 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].plane[plane_id], 1256 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].plane[plane_id]); 1257 1258 if (plane_id == PLANE_PRIMARY) 1259 DRM_DEBUG_KMS("FBC watermarks: SR=%d, HPLL=%d\n", 1260 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].fbc, 1261 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].fbc); 1262 } 1263 1264 return dirty; 1265 } 1266 1267 static bool g4x_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state, 1268 enum plane_id plane_id, int level) 1269 { 1270 const struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level]; 1271 1272 return raw->plane[plane_id] <= g4x_plane_fifo_size(plane_id, level); 1273 } 1274 1275 static bool g4x_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, 1276 int level) 1277 { 1278 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); 1279 1280 if (level > dev_priv->wm.max_level) 1281 return false; 1282 1283 return g4x_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) && 1284 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) && 1285 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level); 1286 } 1287 1288 /* mark all levels starting from 'level' as invalid */ 1289 static void g4x_invalidate_wms(struct intel_crtc *crtc, 1290 struct g4x_wm_state *wm_state, int level) 1291 { 1292 if (level <= G4X_WM_LEVEL_NORMAL) { 1293 enum plane_id plane_id; 1294 1295 for_each_plane_id_on_crtc(crtc, plane_id) 1296 wm_state->wm.plane[plane_id] = USHRT_MAX; 1297 } 1298 1299 if (level <= G4X_WM_LEVEL_SR) { 1300 wm_state->cxsr = false; 1301 wm_state->sr.cursor = USHRT_MAX; 1302 wm_state->sr.plane = USHRT_MAX; 1303 wm_state->sr.fbc = USHRT_MAX; 1304 } 1305 1306 if (level <= G4X_WM_LEVEL_HPLL) { 1307 wm_state->hpll_en = false; 1308 wm_state->hpll.cursor = USHRT_MAX; 1309 wm_state->hpll.plane = USHRT_MAX; 1310 wm_state->hpll.fbc = USHRT_MAX; 1311 } 1312 } 1313 1314 static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state) 1315 { 1316 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); 1317 struct intel_atomic_state *state = 1318 to_intel_atomic_state(crtc_state->base.state); 1319 struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal; 1320 int num_active_planes = hweight32(crtc_state->active_planes & 1321 ~BIT(PLANE_CURSOR)); 1322 const struct g4x_pipe_wm *raw; 1323 const struct intel_plane_state *old_plane_state; 1324 const struct intel_plane_state *new_plane_state; 1325 struct intel_plane *plane; 1326 enum plane_id plane_id; 1327 int i, level; 1328 unsigned int dirty = 0; 1329 1330 for_each_oldnew_intel_plane_in_state(state, plane, 1331 old_plane_state, 1332 new_plane_state, i) { 1333 if (new_plane_state->base.crtc != &crtc->base && 1334 old_plane_state->base.crtc != &crtc->base) 1335 continue; 1336 1337 if (g4x_raw_plane_wm_compute(crtc_state, new_plane_state)) 1338 dirty |= BIT(plane->id); 1339 } 1340 1341 if (!dirty) 1342 return 0; 1343 1344 level = G4X_WM_LEVEL_NORMAL; 1345 if (!g4x_raw_crtc_wm_is_valid(crtc_state, level)) 1346 goto out; 1347 1348 raw = &crtc_state->wm.g4x.raw[level]; 1349 for_each_plane_id_on_crtc(crtc, plane_id) 1350 wm_state->wm.plane[plane_id] = raw->plane[plane_id]; 1351 1352 level = G4X_WM_LEVEL_SR; 1353 1354 if (!g4x_raw_crtc_wm_is_valid(crtc_state, level)) 1355 goto out; 1356 1357 raw = &crtc_state->wm.g4x.raw[level]; 1358 wm_state->sr.plane = raw->plane[PLANE_PRIMARY]; 1359 wm_state->sr.cursor = raw->plane[PLANE_CURSOR]; 1360 wm_state->sr.fbc = raw->fbc; 1361 1362 wm_state->cxsr = num_active_planes == BIT(PLANE_PRIMARY); 1363 1364 level = G4X_WM_LEVEL_HPLL; 1365 1366 if (!g4x_raw_crtc_wm_is_valid(crtc_state, level)) 1367 goto out; 1368 1369 raw = &crtc_state->wm.g4x.raw[level]; 1370 wm_state->hpll.plane = raw->plane[PLANE_PRIMARY]; 1371 wm_state->hpll.cursor = raw->plane[PLANE_CURSOR]; 1372 wm_state->hpll.fbc = raw->fbc; 1373 1374 wm_state->hpll_en = wm_state->cxsr; 1375 1376 level++; 1377 1378 out: 1379 if (level == G4X_WM_LEVEL_NORMAL) 1380 return -EINVAL; 1381 1382 /* invalidate the higher levels */ 1383 g4x_invalidate_wms(crtc, wm_state, level); 1384 1385 /* 1386 * Determine if the FBC watermark(s) can be used. IF 1387 * this isn't the case we prefer to disable the FBC 1388 ( watermark(s) rather than disable the SR/HPLL 1389 * level(s) entirely. 1390 */ 1391 wm_state->fbc_en = level > G4X_WM_LEVEL_NORMAL; 1392 1393 if (level >= G4X_WM_LEVEL_SR && 1394 wm_state->sr.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_SR)) 1395 wm_state->fbc_en = false; 1396 else if (level >= G4X_WM_LEVEL_HPLL && 1397 wm_state->hpll.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_HPLL)) 1398 wm_state->fbc_en = false; 1399 1400 return 0; 1401 } 1402 1403 static int g4x_compute_intermediate_wm(struct drm_device *dev, 1404 struct intel_crtc *crtc, 1405 struct intel_crtc_state *crtc_state) 1406 { 1407 struct g4x_wm_state *intermediate = &crtc_state->wm.g4x.intermediate; 1408 const struct g4x_wm_state *optimal = &crtc_state->wm.g4x.optimal; 1409 const struct g4x_wm_state *active = &crtc->wm.active.g4x; 1410 enum plane_id plane_id; 1411 1412 intermediate->cxsr = optimal->cxsr && active->cxsr && 1413 !crtc_state->disable_cxsr; 1414 intermediate->hpll_en = optimal->hpll_en && active->hpll_en && 1415 !crtc_state->disable_cxsr; 1416 intermediate->fbc_en = optimal->fbc_en && active->fbc_en; 1417 1418 for_each_plane_id_on_crtc(crtc, plane_id) { 1419 intermediate->wm.plane[plane_id] = 1420 max(optimal->wm.plane[plane_id], 1421 active->wm.plane[plane_id]); 1422 1423 WARN_ON(intermediate->wm.plane[plane_id] > 1424 g4x_plane_fifo_size(plane_id, G4X_WM_LEVEL_NORMAL)); 1425 } 1426 1427 intermediate->sr.plane = max(optimal->sr.plane, 1428 active->sr.plane); 1429 intermediate->sr.cursor = max(optimal->sr.cursor, 1430 active->sr.cursor); 1431 intermediate->sr.fbc = max(optimal->sr.fbc, 1432 active->sr.fbc); 1433 1434 intermediate->hpll.plane = max(optimal->hpll.plane, 1435 active->hpll.plane); 1436 intermediate->hpll.cursor = max(optimal->hpll.cursor, 1437 active->hpll.cursor); 1438 intermediate->hpll.fbc = max(optimal->hpll.fbc, 1439 active->hpll.fbc); 1440 1441 WARN_ON((intermediate->sr.plane > 1442 g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_SR) || 1443 intermediate->sr.cursor > 1444 g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_SR)) && 1445 intermediate->cxsr); 1446 WARN_ON((intermediate->sr.plane > 1447 g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_HPLL) || 1448 intermediate->sr.cursor > 1449 g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_HPLL)) && 1450 intermediate->hpll_en); 1451 1452 WARN_ON(intermediate->sr.fbc > g4x_fbc_fifo_size(1) && 1453 intermediate->fbc_en && intermediate->cxsr); 1454 WARN_ON(intermediate->hpll.fbc > g4x_fbc_fifo_size(2) && 1455 intermediate->fbc_en && intermediate->hpll_en); 1456 1457 /* 1458 * If our intermediate WM are identical to the final WM, then we can 1459 * omit the post-vblank programming; only update if it's different. 1460 */ 1461 if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0) 1462 crtc_state->wm.need_postvbl_update = true; 1463 1464 return 0; 1465 } 1466 1467 static void g4x_merge_wm(struct drm_i915_private *dev_priv, 1468 struct g4x_wm_values *wm) 1469 { 1470 struct intel_crtc *crtc; 1471 int num_active_crtcs = 0; 1472 1473 wm->cxsr = true; 1474 wm->hpll_en = true; 1475 wm->fbc_en = true; 1476 1477 for_each_intel_crtc(&dev_priv->drm, crtc) { 1478 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x; 1479 1480 if (!crtc->active) 1481 continue; 1482 1483 if (!wm_state->cxsr) 1484 wm->cxsr = false; 1485 if (!wm_state->hpll_en) 1486 wm->hpll_en = false; 1487 if (!wm_state->fbc_en) 1488 wm->fbc_en = false; 1489 1490 num_active_crtcs++; 1491 } 1492 1493 if (num_active_crtcs != 1) { 1494 wm->cxsr = false; 1495 wm->hpll_en = false; 1496 wm->fbc_en = false; 1497 } 1498 1499 for_each_intel_crtc(&dev_priv->drm, crtc) { 1500 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x; 1501 enum i915_pipe pipe = crtc->pipe; 1502 1503 wm->pipe[pipe] = wm_state->wm; 1504 if (crtc->active && wm->cxsr) 1505 wm->sr = wm_state->sr; 1506 if (crtc->active && wm->hpll_en) 1507 wm->hpll = wm_state->hpll; 1508 } 1509 } 1510 1511 static void g4x_program_watermarks(struct drm_i915_private *dev_priv) 1512 { 1513 struct g4x_wm_values *old_wm = &dev_priv->wm.g4x; 1514 struct g4x_wm_values new_wm = {}; 1515 1516 g4x_merge_wm(dev_priv, &new_wm); 1517 1518 if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0) 1519 return; 1520 1521 if (is_disabling(old_wm->cxsr, new_wm.cxsr, true)) 1522 _intel_set_memory_cxsr(dev_priv, false); 1523 1524 g4x_write_wm_values(dev_priv, &new_wm); 1525 1526 if (is_enabling(old_wm->cxsr, new_wm.cxsr, true)) 1527 _intel_set_memory_cxsr(dev_priv, true); 1528 1529 *old_wm = new_wm; 1530 } 1531 1532 static void g4x_initial_watermarks(struct intel_atomic_state *state, 1533 struct intel_crtc_state *crtc_state) 1534 { 1535 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); 1536 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); 1537 1538 mutex_lock(&dev_priv->wm.wm_mutex); 1539 crtc->wm.active.g4x = crtc_state->wm.g4x.intermediate; 1540 g4x_program_watermarks(dev_priv); 1541 mutex_unlock(&dev_priv->wm.wm_mutex); 1542 } 1543 1544 static void g4x_optimize_watermarks(struct intel_atomic_state *state, 1545 struct intel_crtc_state *crtc_state) 1546 { 1547 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); 1548 struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); 1549 1550 if (!crtc_state->wm.need_postvbl_update) 1551 return; 1552 1553 mutex_lock(&dev_priv->wm.wm_mutex); 1554 intel_crtc->wm.active.g4x = crtc_state->wm.g4x.optimal; 1555 g4x_program_watermarks(dev_priv); 1556 mutex_unlock(&dev_priv->wm.wm_mutex); 1557 } 1558 1559 /* latency must be in 0.1us units. */ 1560 static unsigned int vlv_wm_method2(unsigned int pixel_rate, 1561 unsigned int htotal, 1562 unsigned int width, 1563 unsigned int cpp, 1564 unsigned int latency) 1565 { 1566 unsigned int ret; 1567 1568 ret = intel_wm_method2(pixel_rate, htotal, 1569 width, cpp, latency); 1570 ret = DIV_ROUND_UP(ret, 64); 1571 1572 return ret; 1573 } 1574 1575 static void vlv_setup_wm_latency(struct drm_i915_private *dev_priv) 1576 { 1577 /* all latencies in usec */ 1578 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3; 1579 1580 dev_priv->wm.max_level = VLV_WM_LEVEL_PM2; 1581 1582 if (IS_CHERRYVIEW(dev_priv)) { 1583 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12; 1584 dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33; 1585 1586 dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS; 1587 } 1588 } 1589 1590 static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, 1591 const struct intel_plane_state *plane_state, 1592 int level) 1593 { 1594 struct intel_plane *plane = to_intel_plane(plane_state->base.plane); 1595 struct drm_i915_private *dev_priv = to_i915(plane->base.dev); 1596 const struct drm_display_mode *adjusted_mode = 1597 &crtc_state->base.adjusted_mode; 1598 int clock, htotal, cpp, width, wm; 1599 1600 if (dev_priv->wm.pri_latency[level] == 0) 1601 return USHRT_MAX; 1602 1603 if (!intel_wm_plane_visible(crtc_state, plane_state)) 1604 return 0; 1605 1606 cpp = plane_state->base.fb->format->cpp[0]; 1607 clock = adjusted_mode->crtc_clock; 1608 htotal = adjusted_mode->crtc_htotal; 1609 width = crtc_state->pipe_src_w; 1610 1611 if (plane->id == PLANE_CURSOR) { 1612 /* 1613 * FIXME the formula gives values that are 1614 * too big for the cursor FIFO, and hence we 1615 * would never be able to use cursors. For 1616 * now just hardcode the watermark. 1617 */ 1618 wm = 63; 1619 } else { 1620 wm = vlv_wm_method2(clock, htotal, width, cpp, 1621 dev_priv->wm.pri_latency[level] * 10); 1622 } 1623 1624 return min_t(int, wm, USHRT_MAX); 1625 } 1626 1627 static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes) 1628 { 1629 return (active_planes & (BIT(PLANE_SPRITE0) | 1630 BIT(PLANE_SPRITE1))) == BIT(PLANE_SPRITE1); 1631 } 1632 1633 static int vlv_compute_fifo(struct intel_crtc_state *crtc_state) 1634 { 1635 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); 1636 const struct g4x_pipe_wm *raw = 1637 &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2]; 1638 struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; 1639 unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR); 1640 int num_active_planes = hweight32(active_planes); 1641 const int fifo_size = 511; 1642 int fifo_extra, fifo_left = fifo_size; 1643 int sprite0_fifo_extra = 0; 1644 unsigned int total_rate; 1645 enum plane_id plane_id; 1646 1647 /* 1648 * When enabling sprite0 after sprite1 has already been enabled 1649 * we tend to get an underrun unless sprite0 already has some 1650 * FIFO space allcoated. Hence we always allocate at least one 1651 * cacheline for sprite0 whenever sprite1 is enabled. 1652 * 1653 * All other plane enable sequences appear immune to this problem. 1654 */ 1655 if (vlv_need_sprite0_fifo_workaround(active_planes)) 1656 sprite0_fifo_extra = 1; 1657 1658 total_rate = raw->plane[PLANE_PRIMARY] + 1659 raw->plane[PLANE_SPRITE0] + 1660 raw->plane[PLANE_SPRITE1] + 1661 sprite0_fifo_extra; 1662 1663 if (total_rate > fifo_size) 1664 return -EINVAL; 1665 1666 if (total_rate == 0) 1667 total_rate = 1; 1668 1669 for_each_plane_id_on_crtc(crtc, plane_id) { 1670 unsigned int rate; 1671 1672 if ((active_planes & BIT(plane_id)) == 0) { 1673 fifo_state->plane[plane_id] = 0; 1674 continue; 1675 } 1676 1677 rate = raw->plane[plane_id]; 1678 fifo_state->plane[plane_id] = fifo_size * rate / total_rate; 1679 fifo_left -= fifo_state->plane[plane_id]; 1680 } 1681 1682 fifo_state->plane[PLANE_SPRITE0] += sprite0_fifo_extra; 1683 fifo_left -= sprite0_fifo_extra; 1684 1685 fifo_state->plane[PLANE_CURSOR] = 63; 1686 1687 fifo_extra = DIV_ROUND_UP(fifo_left, num_active_planes ?: 1); 1688 1689 /* spread the remainder evenly */ 1690 for_each_plane_id_on_crtc(crtc, plane_id) { 1691 int plane_extra; 1692 1693 if (fifo_left == 0) 1694 break; 1695 1696 if ((active_planes & BIT(plane_id)) == 0) 1697 continue; 1698 1699 plane_extra = min(fifo_extra, fifo_left); 1700 fifo_state->plane[plane_id] += plane_extra; 1701 fifo_left -= plane_extra; 1702 } 1703 1704 WARN_ON(active_planes != 0 && fifo_left != 0); 1705 1706 /* give it all to the first plane if none are active */ 1707 if (active_planes == 0) { 1708 WARN_ON(fifo_left != fifo_size); 1709 fifo_state->plane[PLANE_PRIMARY] = fifo_left; 1710 } 1711 1712 return 0; 1713 } 1714 1715 /* mark all levels starting from 'level' as invalid */ 1716 static void vlv_invalidate_wms(struct intel_crtc *crtc, 1717 struct vlv_wm_state *wm_state, int level) 1718 { 1719 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); 1720 1721 for (; level < intel_wm_num_levels(dev_priv); level++) { 1722 enum plane_id plane_id; 1723 1724 for_each_plane_id_on_crtc(crtc, plane_id) 1725 wm_state->wm[level].plane[plane_id] = USHRT_MAX; 1726 1727 wm_state->sr[level].cursor = USHRT_MAX; 1728 wm_state->sr[level].plane = USHRT_MAX; 1729 } 1730 } 1731 1732 static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size) 1733 { 1734 if (wm > fifo_size) 1735 return USHRT_MAX; 1736 else 1737 return fifo_size - wm; 1738 } 1739 1740 /* 1741 * Starting from 'level' set all higher 1742 * levels to 'value' in the "raw" watermarks. 1743 */ 1744 static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state, 1745 int level, enum plane_id plane_id, u16 value) 1746 { 1747 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); 1748 int num_levels = intel_wm_num_levels(dev_priv); 1749 bool dirty = false; 1750 1751 for (; level < num_levels; level++) { 1752 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; 1753 1754 dirty |= raw->plane[plane_id] != value; 1755 raw->plane[plane_id] = value; 1756 } 1757 1758 return dirty; 1759 } 1760 1761 static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state, 1762 const struct intel_plane_state *plane_state) 1763 { 1764 struct intel_plane *plane = to_intel_plane(plane_state->base.plane); 1765 enum plane_id plane_id = plane->id; 1766 int num_levels = intel_wm_num_levels(to_i915(plane->base.dev)); 1767 int level; 1768 bool dirty = false; 1769 1770 if (!intel_wm_plane_visible(crtc_state, plane_state)) { 1771 dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0); 1772 goto out; 1773 } 1774 1775 for (level = 0; level < num_levels; level++) { 1776 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; 1777 int wm = vlv_compute_wm_level(crtc_state, plane_state, level); 1778 int max_wm = plane_id == PLANE_CURSOR ? 63 : 511; 1779 1780 if (wm > max_wm) 1781 break; 1782 1783 dirty |= raw->plane[plane_id] != wm; 1784 raw->plane[plane_id] = wm; 1785 } 1786 1787 /* mark all higher levels as invalid */ 1788 dirty |= vlv_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX); 1789 1790 out: 1791 if (dirty) 1792 DRM_DEBUG_KMS("%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n", 1793 plane->base.name, 1794 crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id], 1795 crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id], 1796 crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]); 1797 1798 return dirty; 1799 } 1800 1801 static bool vlv_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state, 1802 enum plane_id plane_id, int level) 1803 { 1804 const struct g4x_pipe_wm *raw = 1805 &crtc_state->wm.vlv.raw[level]; 1806 const struct vlv_fifo_state *fifo_state = 1807 &crtc_state->wm.vlv.fifo_state; 1808 1809 return raw->plane[plane_id] <= fifo_state->plane[plane_id]; 1810 } 1811 1812 static bool vlv_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level) 1813 { 1814 return vlv_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) && 1815 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) && 1816 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) && 1817 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level); 1818 } 1819 1820 static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) 1821 { 1822 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); 1823 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); 1824 struct intel_atomic_state *state = 1825 to_intel_atomic_state(crtc_state->base.state); 1826 struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; 1827 const struct vlv_fifo_state *fifo_state = 1828 &crtc_state->wm.vlv.fifo_state; 1829 int num_active_planes = hweight32(crtc_state->active_planes & 1830 ~BIT(PLANE_CURSOR)); 1831 bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->base); 1832 const struct intel_plane_state *old_plane_state; 1833 const struct intel_plane_state *new_plane_state; 1834 struct intel_plane *plane; 1835 enum plane_id plane_id; 1836 int level, ret, i; 1837 unsigned int dirty = 0; 1838 1839 for_each_oldnew_intel_plane_in_state(state, plane, 1840 old_plane_state, 1841 new_plane_state, i) { 1842 if (new_plane_state->base.crtc != &crtc->base && 1843 old_plane_state->base.crtc != &crtc->base) 1844 continue; 1845 1846 if (vlv_raw_plane_wm_compute(crtc_state, new_plane_state)) 1847 dirty |= BIT(plane->id); 1848 } 1849 1850 /* 1851 * DSPARB registers may have been reset due to the 1852 * power well being turned off. Make sure we restore 1853 * them to a consistent state even if no primary/sprite 1854 * planes are initially active. 1855 */ 1856 if (needs_modeset) 1857 crtc_state->fifo_changed = true; 1858 1859 if (!dirty) 1860 return 0; 1861 1862 /* cursor changes don't warrant a FIFO recompute */ 1863 if (dirty & ~BIT(PLANE_CURSOR)) { 1864 const struct intel_crtc_state *old_crtc_state = 1865 intel_atomic_get_old_crtc_state(state, crtc); 1866 const struct vlv_fifo_state *old_fifo_state = 1867 &old_crtc_state->wm.vlv.fifo_state; 1868 1869 ret = vlv_compute_fifo(crtc_state); 1870 if (ret) 1871 return ret; 1872 1873 if (needs_modeset || 1874 memcmp(old_fifo_state, fifo_state, 1875 sizeof(*fifo_state)) != 0) 1876 crtc_state->fifo_changed = true; 1877 } 1878 1879 /* initially allow all levels */ 1880 wm_state->num_levels = intel_wm_num_levels(dev_priv); 1881 /* 1882 * Note that enabling cxsr with no primary/sprite planes 1883 * enabled can wedge the pipe. Hence we only allow cxsr 1884 * with exactly one enabled primary/sprite plane. 1885 */ 1886 wm_state->cxsr = crtc->pipe != PIPE_C && num_active_planes == 1; 1887 1888 for (level = 0; level < wm_state->num_levels; level++) { 1889 const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; 1890 const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1; 1891 1892 if (!vlv_raw_crtc_wm_is_valid(crtc_state, level)) 1893 break; 1894 1895 for_each_plane_id_on_crtc(crtc, plane_id) { 1896 wm_state->wm[level].plane[plane_id] = 1897 vlv_invert_wm_value(raw->plane[plane_id], 1898 fifo_state->plane[plane_id]); 1899 } 1900 1901 wm_state->sr[level].plane = 1902 vlv_invert_wm_value(max3(raw->plane[PLANE_PRIMARY], 1903 raw->plane[PLANE_SPRITE0], 1904 raw->plane[PLANE_SPRITE1]), 1905 sr_fifo_size); 1906 1907 wm_state->sr[level].cursor = 1908 vlv_invert_wm_value(raw->plane[PLANE_CURSOR], 1909 63); 1910 } 1911 1912 if (level == 0) 1913 return -EINVAL; 1914 1915 /* limit to only levels we can actually handle */ 1916 wm_state->num_levels = level; 1917 1918 /* invalidate the higher levels */ 1919 vlv_invalidate_wms(crtc, wm_state, level); 1920 1921 return 0; 1922 } 1923 1924 #define VLV_FIFO(plane, value) \ 1925 (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV) 1926 1927 static void vlv_atomic_update_fifo(struct intel_atomic_state *state, 1928 struct intel_crtc_state *crtc_state) 1929 { 1930 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); 1931 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); 1932 const struct vlv_fifo_state *fifo_state = 1933 &crtc_state->wm.vlv.fifo_state; 1934 int sprite0_start, sprite1_start, fifo_size; 1935 1936 if (!crtc_state->fifo_changed) 1937 return; 1938 1939 sprite0_start = fifo_state->plane[PLANE_PRIMARY]; 1940 sprite1_start = fifo_state->plane[PLANE_SPRITE0] + sprite0_start; 1941 fifo_size = fifo_state->plane[PLANE_SPRITE1] + sprite1_start; 1942 1943 WARN_ON(fifo_state->plane[PLANE_CURSOR] != 63); 1944 WARN_ON(fifo_size != 511); 1945 1946 trace_vlv_fifo_size(crtc, sprite0_start, sprite1_start, fifo_size); 1947 1948 /* 1949 * uncore.lock serves a double purpose here. It allows us to 1950 * use the less expensive I915_{READ,WRITE}_FW() functions, and 1951 * it protects the DSPARB registers from getting clobbered by 1952 * parallel updates from multiple pipes. 1953 * 1954 * intel_pipe_update_start() has already disabled interrupts 1955 * for us, so a plain spin_lock() is sufficient here. 1956 */ 1957 lockmgr(&dev_priv->uncore.lock, LK_EXCLUSIVE); 1958 1959 switch (crtc->pipe) { 1960 uint32_t dsparb, dsparb2, dsparb3; 1961 case PIPE_A: 1962 dsparb = I915_READ_FW(DSPARB); 1963 dsparb2 = I915_READ_FW(DSPARB2); 1964 1965 dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) | 1966 VLV_FIFO(SPRITEB, 0xff)); 1967 dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) | 1968 VLV_FIFO(SPRITEB, sprite1_start)); 1969 1970 dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) | 1971 VLV_FIFO(SPRITEB_HI, 0x1)); 1972 dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) | 1973 VLV_FIFO(SPRITEB_HI, sprite1_start >> 8)); 1974 1975 I915_WRITE_FW(DSPARB, dsparb); 1976 I915_WRITE_FW(DSPARB2, dsparb2); 1977 break; 1978 case PIPE_B: 1979 dsparb = I915_READ_FW(DSPARB); 1980 dsparb2 = I915_READ_FW(DSPARB2); 1981 1982 dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) | 1983 VLV_FIFO(SPRITED, 0xff)); 1984 dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) | 1985 VLV_FIFO(SPRITED, sprite1_start)); 1986 1987 dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) | 1988 VLV_FIFO(SPRITED_HI, 0xff)); 1989 dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) | 1990 VLV_FIFO(SPRITED_HI, sprite1_start >> 8)); 1991 1992 I915_WRITE_FW(DSPARB, dsparb); 1993 I915_WRITE_FW(DSPARB2, dsparb2); 1994 break; 1995 case PIPE_C: 1996 dsparb3 = I915_READ_FW(DSPARB3); 1997 dsparb2 = I915_READ_FW(DSPARB2); 1998 1999 dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) | 2000 VLV_FIFO(SPRITEF, 0xff)); 2001 dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) | 2002 VLV_FIFO(SPRITEF, sprite1_start)); 2003 2004 dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) | 2005 VLV_FIFO(SPRITEF_HI, 0xff)); 2006 dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) | 2007 VLV_FIFO(SPRITEF_HI, sprite1_start >> 8)); 2008 2009 I915_WRITE_FW(DSPARB3, dsparb3); 2010 I915_WRITE_FW(DSPARB2, dsparb2); 2011 break; 2012 default: 2013 break; 2014 } 2015 2016 POSTING_READ_FW(DSPARB); 2017 2018 lockmgr(&dev_priv->uncore.lock, LK_RELEASE); 2019 } 2020 2021 #undef VLV_FIFO 2022 2023 static int vlv_compute_intermediate_wm(struct drm_device *dev, 2024 struct intel_crtc *crtc, 2025 struct intel_crtc_state *crtc_state) 2026 { 2027 struct vlv_wm_state *intermediate = &crtc_state->wm.vlv.intermediate; 2028 const struct vlv_wm_state *optimal = &crtc_state->wm.vlv.optimal; 2029 const struct vlv_wm_state *active = &crtc->wm.active.vlv; 2030 int level; 2031 2032 intermediate->num_levels = min(optimal->num_levels, active->num_levels); 2033 intermediate->cxsr = optimal->cxsr && active->cxsr && 2034 !crtc_state->disable_cxsr; 2035 2036 for (level = 0; level < intermediate->num_levels; level++) { 2037 enum plane_id plane_id; 2038 2039 for_each_plane_id_on_crtc(crtc, plane_id) { 2040 intermediate->wm[level].plane[plane_id] = 2041 min(optimal->wm[level].plane[plane_id], 2042 active->wm[level].plane[plane_id]); 2043 } 2044 2045 intermediate->sr[level].plane = min(optimal->sr[level].plane, 2046 active->sr[level].plane); 2047 intermediate->sr[level].cursor = min(optimal->sr[level].cursor, 2048 active->sr[level].cursor); 2049 } 2050 2051 vlv_invalidate_wms(crtc, intermediate, level); 2052 2053 /* 2054 * If our intermediate WM are identical to the final WM, then we can 2055 * omit the post-vblank programming; only update if it's different. 2056 */ 2057 if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0) 2058 crtc_state->wm.need_postvbl_update = true; 2059 2060 return 0; 2061 } 2062 2063 static void vlv_merge_wm(struct drm_i915_private *dev_priv, 2064 struct vlv_wm_values *wm) 2065 { 2066 struct intel_crtc *crtc; 2067 int num_active_crtcs = 0; 2068 2069 wm->level = dev_priv->wm.max_level; 2070 wm->cxsr = true; 2071 2072 for_each_intel_crtc(&dev_priv->drm, crtc) { 2073 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; 2074 2075 if (!crtc->active) 2076 continue; 2077 2078 if (!wm_state->cxsr) 2079 wm->cxsr = false; 2080 2081 num_active_crtcs++; 2082 wm->level = min_t(int, wm->level, wm_state->num_levels - 1); 2083 } 2084 2085 if (num_active_crtcs != 1) 2086 wm->cxsr = false; 2087 2088 if (num_active_crtcs > 1) 2089 wm->level = VLV_WM_LEVEL_PM2; 2090 2091 for_each_intel_crtc(&dev_priv->drm, crtc) { 2092 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; 2093 enum i915_pipe pipe = crtc->pipe; 2094 2095 wm->pipe[pipe] = wm_state->wm[wm->level]; 2096 if (crtc->active && wm->cxsr) 2097 wm->sr = wm_state->sr[wm->level]; 2098 2099 wm->ddl[pipe].plane[PLANE_PRIMARY] = DDL_PRECISION_HIGH | 2; 2100 wm->ddl[pipe].plane[PLANE_SPRITE0] = DDL_PRECISION_HIGH | 2; 2101 wm->ddl[pipe].plane[PLANE_SPRITE1] = DDL_PRECISION_HIGH | 2; 2102 wm->ddl[pipe].plane[PLANE_CURSOR] = DDL_PRECISION_HIGH | 2; 2103 } 2104 } 2105 2106 static void vlv_program_watermarks(struct drm_i915_private *dev_priv) 2107 { 2108 struct vlv_wm_values *old_wm = &dev_priv->wm.vlv; 2109 struct vlv_wm_values new_wm = {}; 2110 2111 vlv_merge_wm(dev_priv, &new_wm); 2112 2113 if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0) 2114 return; 2115 2116 if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS)) 2117 chv_set_memory_dvfs(dev_priv, false); 2118 2119 if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5)) 2120 chv_set_memory_pm5(dev_priv, false); 2121 2122 if (is_disabling(old_wm->cxsr, new_wm.cxsr, true)) 2123 _intel_set_memory_cxsr(dev_priv, false); 2124 2125 vlv_write_wm_values(dev_priv, &new_wm); 2126 2127 if (is_enabling(old_wm->cxsr, new_wm.cxsr, true)) 2128 _intel_set_memory_cxsr(dev_priv, true); 2129 2130 if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5)) 2131 chv_set_memory_pm5(dev_priv, true); 2132 2133 if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS)) 2134 chv_set_memory_dvfs(dev_priv, true); 2135 2136 *old_wm = new_wm; 2137 } 2138 2139 static void vlv_initial_watermarks(struct intel_atomic_state *state, 2140 struct intel_crtc_state *crtc_state) 2141 { 2142 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); 2143 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); 2144 2145 mutex_lock(&dev_priv->wm.wm_mutex); 2146 crtc->wm.active.vlv = crtc_state->wm.vlv.intermediate; 2147 vlv_program_watermarks(dev_priv); 2148 mutex_unlock(&dev_priv->wm.wm_mutex); 2149 } 2150 2151 static void vlv_optimize_watermarks(struct intel_atomic_state *state, 2152 struct intel_crtc_state *crtc_state) 2153 { 2154 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); 2155 struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); 2156 2157 if (!crtc_state->wm.need_postvbl_update) 2158 return; 2159 2160 mutex_lock(&dev_priv->wm.wm_mutex); 2161 intel_crtc->wm.active.vlv = crtc_state->wm.vlv.optimal; 2162 vlv_program_watermarks(dev_priv); 2163 mutex_unlock(&dev_priv->wm.wm_mutex); 2164 } 2165 2166 static void i965_update_wm(struct intel_crtc *unused_crtc) 2167 { 2168 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev); 2169 struct intel_crtc *crtc; 2170 int srwm = 1; 2171 int cursor_sr = 16; 2172 bool cxsr_enabled; 2173 2174 /* Calc sr entries for one plane configs */ 2175 crtc = single_enabled_crtc(dev_priv); 2176 if (crtc) { 2177 /* self-refresh has much higher latency */ 2178 static const int sr_latency_ns = 12000; 2179 const struct drm_display_mode *adjusted_mode = 2180 &crtc->config->base.adjusted_mode; 2181 const struct drm_framebuffer *fb = 2182 crtc->base.primary->state->fb; 2183 int clock = adjusted_mode->crtc_clock; 2184 int htotal = adjusted_mode->crtc_htotal; 2185 int hdisplay = crtc->config->pipe_src_w; 2186 int cpp = fb->format->cpp[0]; 2187 int entries; 2188 2189 entries = intel_wm_method2(clock, htotal, 2190 hdisplay, cpp, sr_latency_ns / 100); 2191 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE); 2192 srwm = I965_FIFO_SIZE - entries; 2193 if (srwm < 0) 2194 srwm = 1; 2195 srwm &= 0x1ff; 2196 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n", 2197 entries, srwm); 2198 2199 entries = intel_wm_method2(clock, htotal, 2200 crtc->base.cursor->state->crtc_w, 4, 2201 sr_latency_ns / 100); 2202 entries = DIV_ROUND_UP(entries, 2203 i965_cursor_wm_info.cacheline_size) + 2204 i965_cursor_wm_info.guard_size; 2205 2206 cursor_sr = i965_cursor_wm_info.fifo_size - entries; 2207 if (cursor_sr > i965_cursor_wm_info.max_wm) 2208 cursor_sr = i965_cursor_wm_info.max_wm; 2209 2210 DRM_DEBUG_KMS("self-refresh watermark: display plane %d " 2211 "cursor %d\n", srwm, cursor_sr); 2212 2213 cxsr_enabled = true; 2214 } else { 2215 cxsr_enabled = false; 2216 /* Turn off self refresh if both pipes are enabled */ 2217 intel_set_memory_cxsr(dev_priv, false); 2218 } 2219 2220 DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n", 2221 srwm); 2222 2223 /* 965 has limitations... */ 2224 I915_WRITE(DSPFW1, FW_WM(srwm, SR) | 2225 FW_WM(8, CURSORB) | 2226 FW_WM(8, PLANEB) | 2227 FW_WM(8, PLANEA)); 2228 I915_WRITE(DSPFW2, FW_WM(8, CURSORA) | 2229 FW_WM(8, PLANEC_OLD)); 2230 /* update cursor SR watermark */ 2231 I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR)); 2232 2233 if (cxsr_enabled) 2234 intel_set_memory_cxsr(dev_priv, true); 2235 } 2236 2237 #undef FW_WM 2238 2239 static void i9xx_update_wm(struct intel_crtc *unused_crtc) 2240 { 2241 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev); 2242 const struct intel_watermark_params *wm_info; 2243 uint32_t fwater_lo; 2244 uint32_t fwater_hi; 2245 int cwm, srwm = 1; 2246 int fifo_size; 2247 int planea_wm, planeb_wm; 2248 struct intel_crtc *crtc, *enabled = NULL; 2249 2250 if (IS_I945GM(dev_priv)) 2251 wm_info = &i945_wm_info; 2252 else if (!IS_GEN2(dev_priv)) 2253 wm_info = &i915_wm_info; 2254 else 2255 wm_info = &i830_a_wm_info; 2256 2257 fifo_size = dev_priv->display.get_fifo_size(dev_priv, 0); 2258 crtc = intel_get_crtc_for_plane(dev_priv, 0); 2259 if (intel_crtc_active(crtc)) { 2260 const struct drm_display_mode *adjusted_mode = 2261 &crtc->config->base.adjusted_mode; 2262 const struct drm_framebuffer *fb = 2263 crtc->base.primary->state->fb; 2264 int cpp; 2265 2266 if (IS_GEN2(dev_priv)) 2267 cpp = 4; 2268 else 2269 cpp = fb->format->cpp[0]; 2270 2271 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock, 2272 wm_info, fifo_size, cpp, 2273 pessimal_latency_ns); 2274 enabled = crtc; 2275 } else { 2276 planea_wm = fifo_size - wm_info->guard_size; 2277 if (planea_wm > (long)wm_info->max_wm) 2278 planea_wm = wm_info->max_wm; 2279 } 2280 2281 if (IS_GEN2(dev_priv)) 2282 wm_info = &i830_bc_wm_info; 2283 2284 fifo_size = dev_priv->display.get_fifo_size(dev_priv, 1); 2285 crtc = intel_get_crtc_for_plane(dev_priv, 1); 2286 if (intel_crtc_active(crtc)) { 2287 const struct drm_display_mode *adjusted_mode = 2288 &crtc->config->base.adjusted_mode; 2289 const struct drm_framebuffer *fb = 2290 crtc->base.primary->state->fb; 2291 int cpp; 2292 2293 if (IS_GEN2(dev_priv)) 2294 cpp = 4; 2295 else 2296 cpp = fb->format->cpp[0]; 2297 2298 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock, 2299 wm_info, fifo_size, cpp, 2300 pessimal_latency_ns); 2301 if (enabled == NULL) 2302 enabled = crtc; 2303 else 2304 enabled = NULL; 2305 } else { 2306 planeb_wm = fifo_size - wm_info->guard_size; 2307 if (planeb_wm > (long)wm_info->max_wm) 2308 planeb_wm = wm_info->max_wm; 2309 } 2310 2311 DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm); 2312 2313 if (IS_I915GM(dev_priv) && enabled) { 2314 struct drm_i915_gem_object *obj; 2315 2316 obj = intel_fb_obj(enabled->base.primary->state->fb); 2317 2318 /* self-refresh seems busted with untiled */ 2319 if (!i915_gem_object_is_tiled(obj)) 2320 enabled = NULL; 2321 } 2322 2323 /* 2324 * Overlay gets an aggressive default since video jitter is bad. 2325 */ 2326 cwm = 2; 2327 2328 /* Play safe and disable self-refresh before adjusting watermarks. */ 2329 intel_set_memory_cxsr(dev_priv, false); 2330 2331 /* Calc sr entries for one plane configs */ 2332 if (HAS_FW_BLC(dev_priv) && enabled) { 2333 /* self-refresh has much higher latency */ 2334 static const int sr_latency_ns = 6000; 2335 const struct drm_display_mode *adjusted_mode = 2336 &enabled->config->base.adjusted_mode; 2337 const struct drm_framebuffer *fb = 2338 enabled->base.primary->state->fb; 2339 int clock = adjusted_mode->crtc_clock; 2340 int htotal = adjusted_mode->crtc_htotal; 2341 int hdisplay = enabled->config->pipe_src_w; 2342 int cpp; 2343 int entries; 2344 2345 if (IS_I915GM(dev_priv) || IS_I945GM(dev_priv)) 2346 cpp = 4; 2347 else 2348 cpp = fb->format->cpp[0]; 2349 2350 entries = intel_wm_method2(clock, htotal, hdisplay, cpp, 2351 sr_latency_ns / 100); 2352 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size); 2353 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries); 2354 srwm = wm_info->fifo_size - entries; 2355 if (srwm < 0) 2356 srwm = 1; 2357 2358 if (IS_I945G(dev_priv) || IS_I945GM(dev_priv)) 2359 I915_WRITE(FW_BLC_SELF, 2360 FW_BLC_SELF_FIFO_MASK | (srwm & 0xff)); 2361 else 2362 I915_WRITE(FW_BLC_SELF, srwm & 0x3f); 2363 } 2364 2365 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n", 2366 planea_wm, planeb_wm, cwm, srwm); 2367 2368 fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f); 2369 fwater_hi = (cwm & 0x1f); 2370 2371 /* Set request length to 8 cachelines per fetch */ 2372 fwater_lo = fwater_lo | (1 << 24) | (1 << 8); 2373 fwater_hi = fwater_hi | (1 << 8); 2374 2375 I915_WRITE(FW_BLC, fwater_lo); 2376 I915_WRITE(FW_BLC2, fwater_hi); 2377 2378 if (enabled) 2379 intel_set_memory_cxsr(dev_priv, true); 2380 } 2381 2382 static void i845_update_wm(struct intel_crtc *unused_crtc) 2383 { 2384 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev); 2385 struct intel_crtc *crtc; 2386 const struct drm_display_mode *adjusted_mode; 2387 uint32_t fwater_lo; 2388 int planea_wm; 2389 2390 crtc = single_enabled_crtc(dev_priv); 2391 if (crtc == NULL) 2392 return; 2393 2394 adjusted_mode = &crtc->config->base.adjusted_mode; 2395 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock, 2396 &i845_wm_info, 2397 dev_priv->display.get_fifo_size(dev_priv, 0), 2398 4, pessimal_latency_ns); 2399 fwater_lo = I915_READ(FW_BLC) & ~0xfff; 2400 fwater_lo |= (3<<8) | planea_wm; 2401 2402 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm); 2403 2404 I915_WRITE(FW_BLC, fwater_lo); 2405 } 2406 2407 /* latency must be in 0.1us units. */ 2408 static unsigned int ilk_wm_method1(unsigned int pixel_rate, 2409 unsigned int cpp, 2410 unsigned int latency) 2411 { 2412 unsigned int ret; 2413 2414 ret = intel_wm_method1(pixel_rate, cpp, latency); 2415 ret = DIV_ROUND_UP(ret, 64) + 2; 2416 2417 return ret; 2418 } 2419 2420 /* latency must be in 0.1us units. */ 2421 static unsigned int ilk_wm_method2(unsigned int pixel_rate, 2422 unsigned int htotal, 2423 unsigned int width, 2424 unsigned int cpp, 2425 unsigned int latency) 2426 { 2427 unsigned int ret; 2428 2429 ret = intel_wm_method2(pixel_rate, htotal, 2430 width, cpp, latency); 2431 ret = DIV_ROUND_UP(ret, 64) + 2; 2432 2433 return ret; 2434 } 2435 2436 static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels, 2437 uint8_t cpp) 2438 { 2439 /* 2440 * Neither of these should be possible since this function shouldn't be 2441 * called if the CRTC is off or the plane is invisible. But let's be 2442 * extra paranoid to avoid a potential divide-by-zero if we screw up 2443 * elsewhere in the driver. 2444 */ 2445 if (WARN_ON(!cpp)) 2446 return 0; 2447 if (WARN_ON(!horiz_pixels)) 2448 return 0; 2449 2450 return DIV_ROUND_UP(pri_val * 64, horiz_pixels * cpp) + 2; 2451 } 2452 2453 struct ilk_wm_maximums { 2454 uint16_t pri; 2455 uint16_t spr; 2456 uint16_t cur; 2457 uint16_t fbc; 2458 }; 2459 2460 /* 2461 * For both WM_PIPE and WM_LP. 2462 * mem_value must be in 0.1us units. 2463 */ 2464 static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate, 2465 const struct intel_plane_state *pstate, 2466 uint32_t mem_value, 2467 bool is_lp) 2468 { 2469 uint32_t method1, method2; 2470 int cpp; 2471 2472 if (!intel_wm_plane_visible(cstate, pstate)) 2473 return 0; 2474 2475 cpp = pstate->base.fb->format->cpp[0]; 2476 2477 method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value); 2478 2479 if (!is_lp) 2480 return method1; 2481 2482 method2 = ilk_wm_method2(cstate->pixel_rate, 2483 cstate->base.adjusted_mode.crtc_htotal, 2484 drm_rect_width(&pstate->base.dst), 2485 cpp, mem_value); 2486 2487 return min(method1, method2); 2488 } 2489 2490 /* 2491 * For both WM_PIPE and WM_LP. 2492 * mem_value must be in 0.1us units. 2493 */ 2494 static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate, 2495 const struct intel_plane_state *pstate, 2496 uint32_t mem_value) 2497 { 2498 uint32_t method1, method2; 2499 int cpp; 2500 2501 if (!intel_wm_plane_visible(cstate, pstate)) 2502 return 0; 2503 2504 cpp = pstate->base.fb->format->cpp[0]; 2505 2506 method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value); 2507 method2 = ilk_wm_method2(cstate->pixel_rate, 2508 cstate->base.adjusted_mode.crtc_htotal, 2509 drm_rect_width(&pstate->base.dst), 2510 cpp, mem_value); 2511 return min(method1, method2); 2512 } 2513 2514 /* 2515 * For both WM_PIPE and WM_LP. 2516 * mem_value must be in 0.1us units. 2517 */ 2518 static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate, 2519 const struct intel_plane_state *pstate, 2520 uint32_t mem_value) 2521 { 2522 int cpp; 2523 2524 if (!intel_wm_plane_visible(cstate, pstate)) 2525 return 0; 2526 2527 cpp = pstate->base.fb->format->cpp[0]; 2528 2529 return ilk_wm_method2(cstate->pixel_rate, 2530 cstate->base.adjusted_mode.crtc_htotal, 2531 pstate->base.crtc_w, cpp, mem_value); 2532 } 2533 2534 /* Only for WM_LP. */ 2535 static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate, 2536 const struct intel_plane_state *pstate, 2537 uint32_t pri_val) 2538 { 2539 int cpp; 2540 2541 if (!intel_wm_plane_visible(cstate, pstate)) 2542 return 0; 2543 2544 cpp = pstate->base.fb->format->cpp[0]; 2545 2546 return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->base.dst), cpp); 2547 } 2548 2549 static unsigned int 2550 ilk_display_fifo_size(const struct drm_i915_private *dev_priv) 2551 { 2552 if (INTEL_GEN(dev_priv) >= 8) 2553 return 3072; 2554 else if (INTEL_GEN(dev_priv) >= 7) 2555 return 768; 2556 else 2557 return 512; 2558 } 2559 2560 static unsigned int 2561 ilk_plane_wm_reg_max(const struct drm_i915_private *dev_priv, 2562 int level, bool is_sprite) 2563 { 2564 if (INTEL_GEN(dev_priv) >= 8) 2565 /* BDW primary/sprite plane watermarks */ 2566 return level == 0 ? 255 : 2047; 2567 else if (INTEL_GEN(dev_priv) >= 7) 2568 /* IVB/HSW primary/sprite plane watermarks */ 2569 return level == 0 ? 127 : 1023; 2570 else if (!is_sprite) 2571 /* ILK/SNB primary plane watermarks */ 2572 return level == 0 ? 127 : 511; 2573 else 2574 /* ILK/SNB sprite plane watermarks */ 2575 return level == 0 ? 63 : 255; 2576 } 2577 2578 static unsigned int 2579 ilk_cursor_wm_reg_max(const struct drm_i915_private *dev_priv, int level) 2580 { 2581 if (INTEL_GEN(dev_priv) >= 7) 2582 return level == 0 ? 63 : 255; 2583 else 2584 return level == 0 ? 31 : 63; 2585 } 2586 2587 static unsigned int ilk_fbc_wm_reg_max(const struct drm_i915_private *dev_priv) 2588 { 2589 if (INTEL_GEN(dev_priv) >= 8) 2590 return 31; 2591 else 2592 return 15; 2593 } 2594 2595 /* Calculate the maximum primary/sprite plane watermark */ 2596 static unsigned int ilk_plane_wm_max(const struct drm_device *dev, 2597 int level, 2598 const struct intel_wm_config *config, 2599 enum intel_ddb_partitioning ddb_partitioning, 2600 bool is_sprite) 2601 { 2602 struct drm_i915_private *dev_priv = to_i915(dev); 2603 unsigned int fifo_size = ilk_display_fifo_size(dev_priv); 2604 2605 /* if sprites aren't enabled, sprites get nothing */ 2606 if (is_sprite && !config->sprites_enabled) 2607 return 0; 2608 2609 /* HSW allows LP1+ watermarks even with multiple pipes */ 2610 if (level == 0 || config->num_pipes_active > 1) { 2611 fifo_size /= INTEL_INFO(dev_priv)->num_pipes; 2612 2613 /* 2614 * For some reason the non self refresh 2615 * FIFO size is only half of the self 2616 * refresh FIFO size on ILK/SNB. 2617 */ 2618 if (INTEL_GEN(dev_priv) <= 6) 2619 fifo_size /= 2; 2620 } 2621 2622 if (config->sprites_enabled) { 2623 /* level 0 is always calculated with 1:1 split */ 2624 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) { 2625 if (is_sprite) 2626 fifo_size *= 5; 2627 fifo_size /= 6; 2628 } else { 2629 fifo_size /= 2; 2630 } 2631 } 2632 2633 /* clamp to max that the registers can hold */ 2634 return min(fifo_size, ilk_plane_wm_reg_max(dev_priv, level, is_sprite)); 2635 } 2636 2637 /* Calculate the maximum cursor plane watermark */ 2638 static unsigned int ilk_cursor_wm_max(const struct drm_device *dev, 2639 int level, 2640 const struct intel_wm_config *config) 2641 { 2642 /* HSW LP1+ watermarks w/ multiple pipes */ 2643 if (level > 0 && config->num_pipes_active > 1) 2644 return 64; 2645 2646 /* otherwise just report max that registers can hold */ 2647 return ilk_cursor_wm_reg_max(to_i915(dev), level); 2648 } 2649 2650 static void ilk_compute_wm_maximums(const struct drm_device *dev, 2651 int level, 2652 const struct intel_wm_config *config, 2653 enum intel_ddb_partitioning ddb_partitioning, 2654 struct ilk_wm_maximums *max) 2655 { 2656 max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false); 2657 max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true); 2658 max->cur = ilk_cursor_wm_max(dev, level, config); 2659 max->fbc = ilk_fbc_wm_reg_max(to_i915(dev)); 2660 } 2661 2662 static void ilk_compute_wm_reg_maximums(const struct drm_i915_private *dev_priv, 2663 int level, 2664 struct ilk_wm_maximums *max) 2665 { 2666 max->pri = ilk_plane_wm_reg_max(dev_priv, level, false); 2667 max->spr = ilk_plane_wm_reg_max(dev_priv, level, true); 2668 max->cur = ilk_cursor_wm_reg_max(dev_priv, level); 2669 max->fbc = ilk_fbc_wm_reg_max(dev_priv); 2670 } 2671 2672 static bool ilk_validate_wm_level(int level, 2673 const struct ilk_wm_maximums *max, 2674 struct intel_wm_level *result) 2675 { 2676 bool ret; 2677 2678 /* already determined to be invalid? */ 2679 if (!result->enable) 2680 return false; 2681 2682 result->enable = result->pri_val <= max->pri && 2683 result->spr_val <= max->spr && 2684 result->cur_val <= max->cur; 2685 2686 ret = result->enable; 2687 2688 /* 2689 * HACK until we can pre-compute everything, 2690 * and thus fail gracefully if LP0 watermarks 2691 * are exceeded... 2692 */ 2693 if (level == 0 && !result->enable) { 2694 if (result->pri_val > max->pri) 2695 DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n", 2696 level, result->pri_val, max->pri); 2697 if (result->spr_val > max->spr) 2698 DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n", 2699 level, result->spr_val, max->spr); 2700 if (result->cur_val > max->cur) 2701 DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n", 2702 level, result->cur_val, max->cur); 2703 2704 result->pri_val = min_t(uint32_t, result->pri_val, max->pri); 2705 result->spr_val = min_t(uint32_t, result->spr_val, max->spr); 2706 result->cur_val = min_t(uint32_t, result->cur_val, max->cur); 2707 result->enable = true; 2708 } 2709 2710 return ret; 2711 } 2712 2713 static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv, 2714 const struct intel_crtc *intel_crtc, 2715 int level, 2716 struct intel_crtc_state *cstate, 2717 const struct intel_plane_state *pristate, 2718 const struct intel_plane_state *sprstate, 2719 const struct intel_plane_state *curstate, 2720 struct intel_wm_level *result) 2721 { 2722 uint16_t pri_latency = dev_priv->wm.pri_latency[level]; 2723 uint16_t spr_latency = dev_priv->wm.spr_latency[level]; 2724 uint16_t cur_latency = dev_priv->wm.cur_latency[level]; 2725 2726 /* WM1+ latency values stored in 0.5us units */ 2727 if (level > 0) { 2728 pri_latency *= 5; 2729 spr_latency *= 5; 2730 cur_latency *= 5; 2731 } 2732 2733 if (pristate) { 2734 result->pri_val = ilk_compute_pri_wm(cstate, pristate, 2735 pri_latency, level); 2736 result->fbc_val = ilk_compute_fbc_wm(cstate, pristate, result->pri_val); 2737 } 2738 2739 if (sprstate) 2740 result->spr_val = ilk_compute_spr_wm(cstate, sprstate, spr_latency); 2741 2742 if (curstate) 2743 result->cur_val = ilk_compute_cur_wm(cstate, curstate, cur_latency); 2744 2745 result->enable = true; 2746 } 2747 2748 static uint32_t 2749 hsw_compute_linetime_wm(const struct intel_crtc_state *cstate) 2750 { 2751 const struct intel_atomic_state *intel_state = 2752 to_intel_atomic_state(cstate->base.state); 2753 const struct drm_display_mode *adjusted_mode = 2754 &cstate->base.adjusted_mode; 2755 u32 linetime, ips_linetime; 2756 2757 if (!cstate->base.active) 2758 return 0; 2759 if (WARN_ON(adjusted_mode->crtc_clock == 0)) 2760 return 0; 2761 if (WARN_ON(intel_state->cdclk.logical.cdclk == 0)) 2762 return 0; 2763 2764 /* The WM are computed with base on how long it takes to fill a single 2765 * row at the given clock rate, multiplied by 8. 2766 * */ 2767 linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, 2768 adjusted_mode->crtc_clock); 2769 ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, 2770 intel_state->cdclk.logical.cdclk); 2771 2772 return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) | 2773 PIPE_WM_LINETIME_TIME(linetime); 2774 } 2775 2776 static void intel_read_wm_latency(struct drm_i915_private *dev_priv, 2777 uint16_t wm[8]) 2778 { 2779 if (INTEL_GEN(dev_priv) >= 9) { 2780 uint32_t val; 2781 int ret, i; 2782 int level, max_level = ilk_wm_max_level(dev_priv); 2783 2784 /* read the first set of memory latencies[0:3] */ 2785 val = 0; /* data0 to be programmed to 0 for first set */ 2786 mutex_lock(&dev_priv->pcu_lock); 2787 ret = sandybridge_pcode_read(dev_priv, 2788 GEN9_PCODE_READ_MEM_LATENCY, 2789 &val); 2790 mutex_unlock(&dev_priv->pcu_lock); 2791 2792 if (ret) { 2793 DRM_ERROR("SKL Mailbox read error = %d\n", ret); 2794 return; 2795 } 2796 2797 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK; 2798 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) & 2799 GEN9_MEM_LATENCY_LEVEL_MASK; 2800 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) & 2801 GEN9_MEM_LATENCY_LEVEL_MASK; 2802 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) & 2803 GEN9_MEM_LATENCY_LEVEL_MASK; 2804 2805 /* read the second set of memory latencies[4:7] */ 2806 val = 1; /* data0 to be programmed to 1 for second set */ 2807 mutex_lock(&dev_priv->pcu_lock); 2808 ret = sandybridge_pcode_read(dev_priv, 2809 GEN9_PCODE_READ_MEM_LATENCY, 2810 &val); 2811 mutex_unlock(&dev_priv->pcu_lock); 2812 if (ret) { 2813 DRM_ERROR("SKL Mailbox read error = %d\n", ret); 2814 return; 2815 } 2816 2817 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK; 2818 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) & 2819 GEN9_MEM_LATENCY_LEVEL_MASK; 2820 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) & 2821 GEN9_MEM_LATENCY_LEVEL_MASK; 2822 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) & 2823 GEN9_MEM_LATENCY_LEVEL_MASK; 2824 2825 /* 2826 * If a level n (n > 1) has a 0us latency, all levels m (m >= n) 2827 * need to be disabled. We make sure to sanitize the values out 2828 * of the punit to satisfy this requirement. 2829 */ 2830 for (level = 1; level <= max_level; level++) { 2831 if (wm[level] == 0) { 2832 for (i = level + 1; i <= max_level; i++) 2833 wm[i] = 0; 2834 break; 2835 } 2836 } 2837 2838 /* 2839 * WaWmMemoryReadLatency:skl+,glk 2840 * 2841 * punit doesn't take into account the read latency so we need 2842 * to add 2us to the various latency levels we retrieve from the 2843 * punit when level 0 response data us 0us. 2844 */ 2845 if (wm[0] == 0) { 2846 wm[0] += 2; 2847 for (level = 1; level <= max_level; level++) { 2848 if (wm[level] == 0) 2849 break; 2850 wm[level] += 2; 2851 } 2852 } 2853 2854 } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { 2855 uint64_t sskpd = I915_READ64(MCH_SSKPD); 2856 2857 wm[0] = (sskpd >> 56) & 0xFF; 2858 if (wm[0] == 0) 2859 wm[0] = sskpd & 0xF; 2860 wm[1] = (sskpd >> 4) & 0xFF; 2861 wm[2] = (sskpd >> 12) & 0xFF; 2862 wm[3] = (sskpd >> 20) & 0x1FF; 2863 wm[4] = (sskpd >> 32) & 0x1FF; 2864 } else if (INTEL_GEN(dev_priv) >= 6) { 2865 uint32_t sskpd = I915_READ(MCH_SSKPD); 2866 2867 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK; 2868 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK; 2869 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK; 2870 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK; 2871 } else if (INTEL_GEN(dev_priv) >= 5) { 2872 uint32_t mltr = I915_READ(MLTR_ILK); 2873 2874 /* ILK primary LP0 latency is 700 ns */ 2875 wm[0] = 7; 2876 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK; 2877 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK; 2878 } else { 2879 MISSING_CASE(INTEL_DEVID(dev_priv)); 2880 } 2881 } 2882 2883 static void intel_fixup_spr_wm_latency(struct drm_i915_private *dev_priv, 2884 uint16_t wm[5]) 2885 { 2886 /* ILK sprite LP0 latency is 1300 ns */ 2887 if (IS_GEN5(dev_priv)) 2888 wm[0] = 13; 2889 } 2890 2891 static void intel_fixup_cur_wm_latency(struct drm_i915_private *dev_priv, 2892 uint16_t wm[5]) 2893 { 2894 /* ILK cursor LP0 latency is 1300 ns */ 2895 if (IS_GEN5(dev_priv)) 2896 wm[0] = 13; 2897 2898 /* WaDoubleCursorLP3Latency:ivb */ 2899 if (IS_IVYBRIDGE(dev_priv)) 2900 wm[3] *= 2; 2901 } 2902 2903 int ilk_wm_max_level(const struct drm_i915_private *dev_priv) 2904 { 2905 /* how many WM levels are we expecting */ 2906 if (INTEL_GEN(dev_priv) >= 9) 2907 return 7; 2908 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 2909 return 4; 2910 else if (INTEL_GEN(dev_priv) >= 6) 2911 return 3; 2912 else 2913 return 2; 2914 } 2915 2916 static void intel_print_wm_latency(struct drm_i915_private *dev_priv, 2917 const char *name, 2918 const uint16_t wm[8]) 2919 { 2920 int level, max_level = ilk_wm_max_level(dev_priv); 2921 2922 for (level = 0; level <= max_level; level++) { 2923 unsigned int latency = wm[level]; 2924 2925 if (latency == 0) { 2926 DRM_ERROR("%s WM%d latency not provided\n", 2927 name, level); 2928 continue; 2929 } 2930 2931 /* 2932 * - latencies are in us on gen9. 2933 * - before then, WM1+ latency values are in 0.5us units 2934 */ 2935 if (INTEL_GEN(dev_priv) >= 9) 2936 latency *= 10; 2937 else if (level > 0) 2938 latency *= 5; 2939 2940 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n", 2941 name, level, wm[level], 2942 latency / 10, latency % 10); 2943 } 2944 } 2945 2946 static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv, 2947 uint16_t wm[5], uint16_t min) 2948 { 2949 int level, max_level = ilk_wm_max_level(dev_priv); 2950 2951 if (wm[0] >= min) 2952 return false; 2953 2954 wm[0] = max(wm[0], min); 2955 for (level = 1; level <= max_level; level++) 2956 wm[level] = max_t(uint16_t, wm[level], DIV_ROUND_UP(min, 5)); 2957 2958 return true; 2959 } 2960 2961 static void snb_wm_latency_quirk(struct drm_i915_private *dev_priv) 2962 { 2963 bool changed; 2964 2965 /* 2966 * The BIOS provided WM memory latency values are often 2967 * inadequate for high resolution displays. Adjust them. 2968 */ 2969 changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) | 2970 ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) | 2971 ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12); 2972 2973 if (!changed) 2974 return; 2975 2976 DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n"); 2977 intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency); 2978 intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency); 2979 intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency); 2980 } 2981 2982 static void ilk_setup_wm_latency(struct drm_i915_private *dev_priv) 2983 { 2984 intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency); 2985 2986 memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency, 2987 sizeof(dev_priv->wm.pri_latency)); 2988 memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency, 2989 sizeof(dev_priv->wm.pri_latency)); 2990 2991 intel_fixup_spr_wm_latency(dev_priv, dev_priv->wm.spr_latency); 2992 intel_fixup_cur_wm_latency(dev_priv, dev_priv->wm.cur_latency); 2993 2994 intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency); 2995 intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency); 2996 intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency); 2997 2998 if (IS_GEN6(dev_priv)) 2999 snb_wm_latency_quirk(dev_priv); 3000 } 3001 3002 static void skl_setup_wm_latency(struct drm_i915_private *dev_priv) 3003 { 3004 intel_read_wm_latency(dev_priv, dev_priv->wm.skl_latency); 3005 intel_print_wm_latency(dev_priv, "Gen9 Plane", dev_priv->wm.skl_latency); 3006 } 3007 3008 static bool ilk_validate_pipe_wm(struct drm_device *dev, 3009 struct intel_pipe_wm *pipe_wm) 3010 { 3011 /* LP0 watermark maximums depend on this pipe alone */ 3012 const struct intel_wm_config config = { 3013 .num_pipes_active = 1, 3014 .sprites_enabled = pipe_wm->sprites_enabled, 3015 .sprites_scaled = pipe_wm->sprites_scaled, 3016 }; 3017 struct ilk_wm_maximums max; 3018 3019 /* LP0 watermarks always use 1/2 DDB partitioning */ 3020 ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max); 3021 3022 /* At least LP0 must be valid */ 3023 if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) { 3024 DRM_DEBUG_KMS("LP0 watermark invalid\n"); 3025 return false; 3026 } 3027 3028 return true; 3029 } 3030 3031 /* Compute new watermarks for the pipe */ 3032 static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate) 3033 { 3034 struct drm_atomic_state *state = cstate->base.state; 3035 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 3036 struct intel_pipe_wm *pipe_wm; 3037 struct drm_device *dev = state->dev; 3038 const struct drm_i915_private *dev_priv = to_i915(dev); 3039 struct drm_plane *plane; 3040 const struct drm_plane_state *plane_state; 3041 const struct intel_plane_state *pristate = NULL; 3042 const struct intel_plane_state *sprstate = NULL; 3043 const struct intel_plane_state *curstate = NULL; 3044 int level, max_level = ilk_wm_max_level(dev_priv), usable_level; 3045 struct ilk_wm_maximums max; 3046 3047 pipe_wm = &cstate->wm.ilk.optimal; 3048 3049 drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, &cstate->base) { 3050 const struct intel_plane_state *ps = to_intel_plane_state(plane_state); 3051 3052 if (plane->type == DRM_PLANE_TYPE_PRIMARY) 3053 pristate = ps; 3054 else if (plane->type == DRM_PLANE_TYPE_OVERLAY) 3055 sprstate = ps; 3056 else if (plane->type == DRM_PLANE_TYPE_CURSOR) 3057 curstate = ps; 3058 } 3059 3060 pipe_wm->pipe_enabled = cstate->base.active; 3061 if (sprstate) { 3062 pipe_wm->sprites_enabled = sprstate->base.visible; 3063 pipe_wm->sprites_scaled = sprstate->base.visible && 3064 (drm_rect_width(&sprstate->base.dst) != drm_rect_width(&sprstate->base.src) >> 16 || 3065 drm_rect_height(&sprstate->base.dst) != drm_rect_height(&sprstate->base.src) >> 16); 3066 } 3067 3068 usable_level = max_level; 3069 3070 /* ILK/SNB: LP2+ watermarks only w/o sprites */ 3071 if (INTEL_GEN(dev_priv) <= 6 && pipe_wm->sprites_enabled) 3072 usable_level = 1; 3073 3074 /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */ 3075 if (pipe_wm->sprites_scaled) 3076 usable_level = 0; 3077 3078 memset(&pipe_wm->wm, 0, sizeof(pipe_wm->wm)); 3079 ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate, 3080 pristate, sprstate, curstate, &pipe_wm->wm[0]); 3081 3082 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 3083 pipe_wm->linetime = hsw_compute_linetime_wm(cstate); 3084 3085 if (!ilk_validate_pipe_wm(dev, pipe_wm)) 3086 return -EINVAL; 3087 3088 ilk_compute_wm_reg_maximums(dev_priv, 1, &max); 3089 3090 for (level = 1; level <= usable_level; level++) { 3091 struct intel_wm_level *wm = &pipe_wm->wm[level]; 3092 3093 ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate, 3094 pristate, sprstate, curstate, wm); 3095 3096 /* 3097 * Disable any watermark level that exceeds the 3098 * register maximums since such watermarks are 3099 * always invalid. 3100 */ 3101 if (!ilk_validate_wm_level(level, &max, wm)) { 3102 memset(wm, 0, sizeof(*wm)); 3103 break; 3104 } 3105 } 3106 3107 return 0; 3108 } 3109 3110 /* 3111 * Build a set of 'intermediate' watermark values that satisfy both the old 3112 * state and the new state. These can be programmed to the hardware 3113 * immediately. 3114 */ 3115 static int ilk_compute_intermediate_wm(struct drm_device *dev, 3116 struct intel_crtc *intel_crtc, 3117 struct intel_crtc_state *newstate) 3118 { 3119 struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate; 3120 struct intel_atomic_state *intel_state = 3121 to_intel_atomic_state(newstate->base.state); 3122 const struct intel_crtc_state *oldstate = 3123 intel_atomic_get_old_crtc_state(intel_state, intel_crtc); 3124 const struct intel_pipe_wm *b = &oldstate->wm.ilk.optimal; 3125 int level, max_level = ilk_wm_max_level(to_i915(dev)); 3126 3127 /* 3128 * Start with the final, target watermarks, then combine with the 3129 * currently active watermarks to get values that are safe both before 3130 * and after the vblank. 3131 */ 3132 *a = newstate->wm.ilk.optimal; 3133 if (!newstate->base.active || drm_atomic_crtc_needs_modeset(&newstate->base)) 3134 return 0; 3135 3136 a->pipe_enabled |= b->pipe_enabled; 3137 a->sprites_enabled |= b->sprites_enabled; 3138 a->sprites_scaled |= b->sprites_scaled; 3139 3140 for (level = 0; level <= max_level; level++) { 3141 struct intel_wm_level *a_wm = &a->wm[level]; 3142 const struct intel_wm_level *b_wm = &b->wm[level]; 3143 3144 a_wm->enable &= b_wm->enable; 3145 a_wm->pri_val = max(a_wm->pri_val, b_wm->pri_val); 3146 a_wm->spr_val = max(a_wm->spr_val, b_wm->spr_val); 3147 a_wm->cur_val = max(a_wm->cur_val, b_wm->cur_val); 3148 a_wm->fbc_val = max(a_wm->fbc_val, b_wm->fbc_val); 3149 } 3150 3151 /* 3152 * We need to make sure that these merged watermark values are 3153 * actually a valid configuration themselves. If they're not, 3154 * there's no safe way to transition from the old state to 3155 * the new state, so we need to fail the atomic transaction. 3156 */ 3157 if (!ilk_validate_pipe_wm(dev, a)) 3158 return -EINVAL; 3159 3160 /* 3161 * If our intermediate WM are identical to the final WM, then we can 3162 * omit the post-vblank programming; only update if it's different. 3163 */ 3164 if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) != 0) 3165 newstate->wm.need_postvbl_update = true; 3166 3167 return 0; 3168 } 3169 3170 /* 3171 * Merge the watermarks from all active pipes for a specific level. 3172 */ 3173 static void ilk_merge_wm_level(struct drm_device *dev, 3174 int level, 3175 struct intel_wm_level *ret_wm) 3176 { 3177 const struct intel_crtc *intel_crtc; 3178 3179 ret_wm->enable = true; 3180 3181 for_each_intel_crtc(dev, intel_crtc) { 3182 const struct intel_pipe_wm *active = &intel_crtc->wm.active.ilk; 3183 const struct intel_wm_level *wm = &active->wm[level]; 3184 3185 if (!active->pipe_enabled) 3186 continue; 3187 3188 /* 3189 * The watermark values may have been used in the past, 3190 * so we must maintain them in the registers for some 3191 * time even if the level is now disabled. 3192 */ 3193 if (!wm->enable) 3194 ret_wm->enable = false; 3195 3196 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val); 3197 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val); 3198 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val); 3199 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val); 3200 } 3201 } 3202 3203 /* 3204 * Merge all low power watermarks for all active pipes. 3205 */ 3206 static void ilk_wm_merge(struct drm_device *dev, 3207 const struct intel_wm_config *config, 3208 const struct ilk_wm_maximums *max, 3209 struct intel_pipe_wm *merged) 3210 { 3211 struct drm_i915_private *dev_priv = to_i915(dev); 3212 int level, max_level = ilk_wm_max_level(dev_priv); 3213 int last_enabled_level = max_level; 3214 3215 /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */ 3216 if ((INTEL_GEN(dev_priv) <= 6 || IS_IVYBRIDGE(dev_priv)) && 3217 config->num_pipes_active > 1) 3218 last_enabled_level = 0; 3219 3220 /* ILK: FBC WM must be disabled always */ 3221 merged->fbc_wm_enabled = INTEL_GEN(dev_priv) >= 6; 3222 3223 /* merge each WM1+ level */ 3224 for (level = 1; level <= max_level; level++) { 3225 struct intel_wm_level *wm = &merged->wm[level]; 3226 3227 ilk_merge_wm_level(dev, level, wm); 3228 3229 if (level > last_enabled_level) 3230 wm->enable = false; 3231 else if (!ilk_validate_wm_level(level, max, wm)) 3232 /* make sure all following levels get disabled */ 3233 last_enabled_level = level - 1; 3234 3235 /* 3236 * The spec says it is preferred to disable 3237 * FBC WMs instead of disabling a WM level. 3238 */ 3239 if (wm->fbc_val > max->fbc) { 3240 if (wm->enable) 3241 merged->fbc_wm_enabled = false; 3242 wm->fbc_val = 0; 3243 } 3244 } 3245 3246 /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */ 3247 /* 3248 * FIXME this is racy. FBC might get enabled later. 3249 * What we should check here is whether FBC can be 3250 * enabled sometime later. 3251 */ 3252 if (IS_GEN5(dev_priv) && !merged->fbc_wm_enabled && 3253 intel_fbc_is_active(dev_priv)) { 3254 for (level = 2; level <= max_level; level++) { 3255 struct intel_wm_level *wm = &merged->wm[level]; 3256 3257 wm->enable = false; 3258 } 3259 } 3260 } 3261 3262 static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm) 3263 { 3264 /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */ 3265 return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable); 3266 } 3267 3268 /* The value we need to program into the WM_LPx latency field */ 3269 static unsigned int ilk_wm_lp_latency(struct drm_device *dev, int level) 3270 { 3271 struct drm_i915_private *dev_priv = to_i915(dev); 3272 3273 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 3274 return 2 * level; 3275 else 3276 return dev_priv->wm.pri_latency[level]; 3277 } 3278 3279 static void ilk_compute_wm_results(struct drm_device *dev, 3280 const struct intel_pipe_wm *merged, 3281 enum intel_ddb_partitioning partitioning, 3282 struct ilk_wm_values *results) 3283 { 3284 struct drm_i915_private *dev_priv = to_i915(dev); 3285 struct intel_crtc *intel_crtc; 3286 int level, wm_lp; 3287 3288 results->enable_fbc_wm = merged->fbc_wm_enabled; 3289 results->partitioning = partitioning; 3290 3291 /* LP1+ register values */ 3292 for (wm_lp = 1; wm_lp <= 3; wm_lp++) { 3293 const struct intel_wm_level *r; 3294 3295 level = ilk_wm_lp_to_level(wm_lp, merged); 3296 3297 r = &merged->wm[level]; 3298 3299 /* 3300 * Maintain the watermark values even if the level is 3301 * disabled. Doing otherwise could cause underruns. 3302 */ 3303 results->wm_lp[wm_lp - 1] = 3304 (ilk_wm_lp_latency(dev, level) << WM1_LP_LATENCY_SHIFT) | 3305 (r->pri_val << WM1_LP_SR_SHIFT) | 3306 r->cur_val; 3307 3308 if (r->enable) 3309 results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN; 3310 3311 if (INTEL_GEN(dev_priv) >= 8) 3312 results->wm_lp[wm_lp - 1] |= 3313 r->fbc_val << WM1_LP_FBC_SHIFT_BDW; 3314 else 3315 results->wm_lp[wm_lp - 1] |= 3316 r->fbc_val << WM1_LP_FBC_SHIFT; 3317 3318 /* 3319 * Always set WM1S_LP_EN when spr_val != 0, even if the 3320 * level is disabled. Doing otherwise could cause underruns. 3321 */ 3322 if (INTEL_GEN(dev_priv) <= 6 && r->spr_val) { 3323 WARN_ON(wm_lp != 1); 3324 results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val; 3325 } else 3326 results->wm_lp_spr[wm_lp - 1] = r->spr_val; 3327 } 3328 3329 /* LP0 register values */ 3330 for_each_intel_crtc(dev, intel_crtc) { 3331 enum i915_pipe pipe = intel_crtc->pipe; 3332 const struct intel_wm_level *r = 3333 &intel_crtc->wm.active.ilk.wm[0]; 3334 3335 if (WARN_ON(!r->enable)) 3336 continue; 3337 3338 results->wm_linetime[pipe] = intel_crtc->wm.active.ilk.linetime; 3339 3340 results->wm_pipe[pipe] = 3341 (r->pri_val << WM0_PIPE_PLANE_SHIFT) | 3342 (r->spr_val << WM0_PIPE_SPRITE_SHIFT) | 3343 r->cur_val; 3344 } 3345 } 3346 3347 /* Find the result with the highest level enabled. Check for enable_fbc_wm in 3348 * case both are at the same level. Prefer r1 in case they're the same. */ 3349 static struct intel_pipe_wm *ilk_find_best_result(struct drm_device *dev, 3350 struct intel_pipe_wm *r1, 3351 struct intel_pipe_wm *r2) 3352 { 3353 int level, max_level = ilk_wm_max_level(to_i915(dev)); 3354 int level1 = 0, level2 = 0; 3355 3356 for (level = 1; level <= max_level; level++) { 3357 if (r1->wm[level].enable) 3358 level1 = level; 3359 if (r2->wm[level].enable) 3360 level2 = level; 3361 } 3362 3363 if (level1 == level2) { 3364 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled) 3365 return r2; 3366 else 3367 return r1; 3368 } else if (level1 > level2) { 3369 return r1; 3370 } else { 3371 return r2; 3372 } 3373 } 3374 3375 /* dirty bits used to track which watermarks need changes */ 3376 #define WM_DIRTY_PIPE(pipe) (1 << (pipe)) 3377 #define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe))) 3378 #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp))) 3379 #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3)) 3380 #define WM_DIRTY_FBC (1 << 24) 3381 #define WM_DIRTY_DDB (1 << 25) 3382 3383 static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv, 3384 const struct ilk_wm_values *old, 3385 const struct ilk_wm_values *new) 3386 { 3387 unsigned int dirty = 0; 3388 enum i915_pipe pipe; 3389 int wm_lp; 3390 3391 for_each_pipe(dev_priv, pipe) { 3392 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) { 3393 dirty |= WM_DIRTY_LINETIME(pipe); 3394 /* Must disable LP1+ watermarks too */ 3395 dirty |= WM_DIRTY_LP_ALL; 3396 } 3397 3398 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) { 3399 dirty |= WM_DIRTY_PIPE(pipe); 3400 /* Must disable LP1+ watermarks too */ 3401 dirty |= WM_DIRTY_LP_ALL; 3402 } 3403 } 3404 3405 if (old->enable_fbc_wm != new->enable_fbc_wm) { 3406 dirty |= WM_DIRTY_FBC; 3407 /* Must disable LP1+ watermarks too */ 3408 dirty |= WM_DIRTY_LP_ALL; 3409 } 3410 3411 if (old->partitioning != new->partitioning) { 3412 dirty |= WM_DIRTY_DDB; 3413 /* Must disable LP1+ watermarks too */ 3414 dirty |= WM_DIRTY_LP_ALL; 3415 } 3416 3417 /* LP1+ watermarks already deemed dirty, no need to continue */ 3418 if (dirty & WM_DIRTY_LP_ALL) 3419 return dirty; 3420 3421 /* Find the lowest numbered LP1+ watermark in need of an update... */ 3422 for (wm_lp = 1; wm_lp <= 3; wm_lp++) { 3423 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] || 3424 old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1]) 3425 break; 3426 } 3427 3428 /* ...and mark it and all higher numbered LP1+ watermarks as dirty */ 3429 for (; wm_lp <= 3; wm_lp++) 3430 dirty |= WM_DIRTY_LP(wm_lp); 3431 3432 return dirty; 3433 } 3434 3435 static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv, 3436 unsigned int dirty) 3437 { 3438 struct ilk_wm_values *previous = &dev_priv->wm.hw; 3439 bool changed = false; 3440 3441 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) { 3442 previous->wm_lp[2] &= ~WM1_LP_SR_EN; 3443 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]); 3444 changed = true; 3445 } 3446 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) { 3447 previous->wm_lp[1] &= ~WM1_LP_SR_EN; 3448 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]); 3449 changed = true; 3450 } 3451 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) { 3452 previous->wm_lp[0] &= ~WM1_LP_SR_EN; 3453 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]); 3454 changed = true; 3455 } 3456 3457 /* 3458 * Don't touch WM1S_LP_EN here. 3459 * Doing so could cause underruns. 3460 */ 3461 3462 return changed; 3463 } 3464 3465 /* 3466 * The spec says we shouldn't write when we don't need, because every write 3467 * causes WMs to be re-evaluated, expending some power. 3468 */ 3469 static void ilk_write_wm_values(struct drm_i915_private *dev_priv, 3470 struct ilk_wm_values *results) 3471 { 3472 struct ilk_wm_values *previous = &dev_priv->wm.hw; 3473 unsigned int dirty; 3474 uint32_t val; 3475 3476 dirty = ilk_compute_wm_dirty(dev_priv, previous, results); 3477 if (!dirty) 3478 return; 3479 3480 _ilk_disable_lp_wm(dev_priv, dirty); 3481 3482 if (dirty & WM_DIRTY_PIPE(PIPE_A)) 3483 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]); 3484 if (dirty & WM_DIRTY_PIPE(PIPE_B)) 3485 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]); 3486 if (dirty & WM_DIRTY_PIPE(PIPE_C)) 3487 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]); 3488 3489 if (dirty & WM_DIRTY_LINETIME(PIPE_A)) 3490 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]); 3491 if (dirty & WM_DIRTY_LINETIME(PIPE_B)) 3492 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]); 3493 if (dirty & WM_DIRTY_LINETIME(PIPE_C)) 3494 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]); 3495 3496 if (dirty & WM_DIRTY_DDB) { 3497 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { 3498 val = I915_READ(WM_MISC); 3499 if (results->partitioning == INTEL_DDB_PART_1_2) 3500 val &= ~WM_MISC_DATA_PARTITION_5_6; 3501 else 3502 val |= WM_MISC_DATA_PARTITION_5_6; 3503 I915_WRITE(WM_MISC, val); 3504 } else { 3505 val = I915_READ(DISP_ARB_CTL2); 3506 if (results->partitioning == INTEL_DDB_PART_1_2) 3507 val &= ~DISP_DATA_PARTITION_5_6; 3508 else 3509 val |= DISP_DATA_PARTITION_5_6; 3510 I915_WRITE(DISP_ARB_CTL2, val); 3511 } 3512 } 3513 3514 if (dirty & WM_DIRTY_FBC) { 3515 val = I915_READ(DISP_ARB_CTL); 3516 if (results->enable_fbc_wm) 3517 val &= ~DISP_FBC_WM_DIS; 3518 else 3519 val |= DISP_FBC_WM_DIS; 3520 I915_WRITE(DISP_ARB_CTL, val); 3521 } 3522 3523 if (dirty & WM_DIRTY_LP(1) && 3524 previous->wm_lp_spr[0] != results->wm_lp_spr[0]) 3525 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]); 3526 3527 if (INTEL_GEN(dev_priv) >= 7) { 3528 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1]) 3529 I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]); 3530 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2]) 3531 I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]); 3532 } 3533 3534 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0]) 3535 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]); 3536 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1]) 3537 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]); 3538 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2]) 3539 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]); 3540 3541 dev_priv->wm.hw = *results; 3542 } 3543 3544 bool ilk_disable_lp_wm(struct drm_device *dev) 3545 { 3546 struct drm_i915_private *dev_priv = to_i915(dev); 3547 3548 return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL); 3549 } 3550 3551 /* 3552 * FIXME: We still don't have the proper code detect if we need to apply the WA, 3553 * so assume we'll always need it in order to avoid underruns. 3554 */ 3555 static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) 3556 { 3557 struct drm_i915_private *dev_priv = to_i915(state->base.dev); 3558 3559 if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) 3560 return true; 3561 3562 return false; 3563 } 3564 3565 static bool 3566 intel_has_sagv(struct drm_i915_private *dev_priv) 3567 { 3568 if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) || 3569 IS_CANNONLAKE(dev_priv)) 3570 return true; 3571 3572 if (IS_SKYLAKE(dev_priv) && 3573 dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED) 3574 return true; 3575 3576 return false; 3577 } 3578 3579 /* 3580 * SAGV dynamically adjusts the system agent voltage and clock frequencies 3581 * depending on power and performance requirements. The display engine access 3582 * to system memory is blocked during the adjustment time. Because of the 3583 * blocking time, having this enabled can cause full system hangs and/or pipe 3584 * underruns if we don't meet all of the following requirements: 3585 * 3586 * - <= 1 pipe enabled 3587 * - All planes can enable watermarks for latencies >= SAGV engine block time 3588 * - We're not using an interlaced display configuration 3589 */ 3590 int 3591 intel_enable_sagv(struct drm_i915_private *dev_priv) 3592 { 3593 int ret; 3594 3595 if (!intel_has_sagv(dev_priv)) 3596 return 0; 3597 3598 if (dev_priv->sagv_status == I915_SAGV_ENABLED) 3599 return 0; 3600 3601 DRM_DEBUG_KMS("Enabling the SAGV\n"); 3602 mutex_lock(&dev_priv->pcu_lock); 3603 3604 ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL, 3605 GEN9_SAGV_ENABLE); 3606 3607 /* We don't need to wait for the SAGV when enabling */ 3608 mutex_unlock(&dev_priv->pcu_lock); 3609 3610 /* 3611 * Some skl systems, pre-release machines in particular, 3612 * don't actually have an SAGV. 3613 */ 3614 if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) { 3615 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); 3616 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; 3617 return 0; 3618 } else if (ret < 0) { 3619 DRM_ERROR("Failed to enable the SAGV\n"); 3620 return ret; 3621 } 3622 3623 dev_priv->sagv_status = I915_SAGV_ENABLED; 3624 return 0; 3625 } 3626 3627 int 3628 intel_disable_sagv(struct drm_i915_private *dev_priv) 3629 { 3630 int ret; 3631 3632 if (!intel_has_sagv(dev_priv)) 3633 return 0; 3634 3635 if (dev_priv->sagv_status == I915_SAGV_DISABLED) 3636 return 0; 3637 3638 DRM_DEBUG_KMS("Disabling the SAGV\n"); 3639 mutex_lock(&dev_priv->pcu_lock); 3640 3641 /* bspec says to keep retrying for at least 1 ms */ 3642 ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL, 3643 GEN9_SAGV_DISABLE, 3644 GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED, 3645 1); 3646 mutex_unlock(&dev_priv->pcu_lock); 3647 3648 /* 3649 * Some skl systems, pre-release machines in particular, 3650 * don't actually have an SAGV. 3651 */ 3652 if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) { 3653 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); 3654 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; 3655 return 0; 3656 } else if (ret < 0) { 3657 DRM_ERROR("Failed to disable the SAGV (%d)\n", ret); 3658 return ret; 3659 } 3660 3661 dev_priv->sagv_status = I915_SAGV_DISABLED; 3662 return 0; 3663 } 3664 3665 bool intel_can_enable_sagv(struct drm_atomic_state *state) 3666 { 3667 struct drm_device *dev = state->dev; 3668 struct drm_i915_private *dev_priv = to_i915(dev); 3669 struct intel_atomic_state *intel_state = to_intel_atomic_state(state); 3670 struct intel_crtc *crtc; 3671 struct intel_plane *plane; 3672 struct intel_crtc_state *cstate; 3673 enum i915_pipe pipe; 3674 int level, latency; 3675 int sagv_block_time_us = IS_GEN9(dev_priv) ? 30 : 20; 3676 3677 if (!intel_has_sagv(dev_priv)) 3678 return false; 3679 3680 /* 3681 * SKL+ workaround: bspec recommends we disable the SAGV when we have 3682 * more then one pipe enabled 3683 * 3684 * If there are no active CRTCs, no additional checks need be performed 3685 */ 3686 if (hweight32(intel_state->active_crtcs) == 0) 3687 return true; 3688 else if (hweight32(intel_state->active_crtcs) > 1) 3689 return false; 3690 3691 /* Since we're now guaranteed to only have one active CRTC... */ 3692 pipe = ffs(intel_state->active_crtcs) - 1; 3693 crtc = intel_get_crtc_for_pipe(dev_priv, pipe); 3694 cstate = to_intel_crtc_state(crtc->base.state); 3695 3696 if (crtc->base.state->adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) 3697 return false; 3698 3699 for_each_intel_plane_on_crtc(dev, crtc, plane) { 3700 struct skl_plane_wm *wm = 3701 &cstate->wm.skl.optimal.planes[plane->id]; 3702 3703 /* Skip this plane if it's not enabled */ 3704 if (!wm->wm[0].plane_en) 3705 continue; 3706 3707 /* Find the highest enabled wm level for this plane */ 3708 for (level = ilk_wm_max_level(dev_priv); 3709 !wm->wm[level].plane_en; --level) 3710 { } 3711 3712 latency = dev_priv->wm.skl_latency[level]; 3713 3714 if (skl_needs_memory_bw_wa(intel_state) && 3715 plane->base.state->fb->modifier == 3716 I915_FORMAT_MOD_X_TILED) 3717 latency += 15; 3718 3719 /* 3720 * If any of the planes on this pipe don't enable wm levels that 3721 * incur memory latencies higher than sagv_block_time_us we 3722 * can't enable the SAGV. 3723 */ 3724 if (latency < sagv_block_time_us) 3725 return false; 3726 } 3727 3728 return true; 3729 } 3730 3731 static void 3732 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, 3733 const struct intel_crtc_state *cstate, 3734 struct skl_ddb_entry *alloc, /* out */ 3735 int *num_active /* out */) 3736 { 3737 struct drm_atomic_state *state = cstate->base.state; 3738 struct intel_atomic_state *intel_state = to_intel_atomic_state(state); 3739 struct drm_i915_private *dev_priv = to_i915(dev); 3740 struct drm_crtc *for_crtc = cstate->base.crtc; 3741 unsigned int pipe_size, ddb_size; 3742 int nth_active_pipe; 3743 3744 if (WARN_ON(!state) || !cstate->base.active) { 3745 alloc->start = 0; 3746 alloc->end = 0; 3747 *num_active = hweight32(dev_priv->active_crtcs); 3748 return; 3749 } 3750 3751 if (intel_state->active_pipe_changes) 3752 *num_active = hweight32(intel_state->active_crtcs); 3753 else 3754 *num_active = hweight32(dev_priv->active_crtcs); 3755 3756 ddb_size = INTEL_INFO(dev_priv)->ddb_size; 3757 WARN_ON(ddb_size == 0); 3758 3759 ddb_size -= 4; /* 4 blocks for bypass path allocation */ 3760 3761 /* 3762 * If the state doesn't change the active CRTC's, then there's 3763 * no need to recalculate; the existing pipe allocation limits 3764 * should remain unchanged. Note that we're safe from racing 3765 * commits since any racing commit that changes the active CRTC 3766 * list would need to grab _all_ crtc locks, including the one 3767 * we currently hold. 3768 */ 3769 if (!intel_state->active_pipe_changes) { 3770 /* 3771 * alloc may be cleared by clear_intel_crtc_state, 3772 * copy from old state to be sure 3773 */ 3774 *alloc = to_intel_crtc_state(for_crtc->state)->wm.skl.ddb; 3775 return; 3776 } 3777 3778 nth_active_pipe = hweight32(intel_state->active_crtcs & 3779 (drm_crtc_mask(for_crtc) - 1)); 3780 pipe_size = ddb_size / hweight32(intel_state->active_crtcs); 3781 alloc->start = nth_active_pipe * ddb_size / *num_active; 3782 alloc->end = alloc->start + pipe_size; 3783 } 3784 3785 static unsigned int skl_cursor_allocation(int num_active) 3786 { 3787 if (num_active == 1) 3788 return 32; 3789 3790 return 8; 3791 } 3792 3793 static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg) 3794 { 3795 entry->start = reg & 0x3ff; 3796 entry->end = (reg >> 16) & 0x3ff; 3797 if (entry->end) 3798 entry->end += 1; 3799 } 3800 3801 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, 3802 struct skl_ddb_allocation *ddb /* out */) 3803 { 3804 struct intel_crtc *crtc; 3805 3806 memset(ddb, 0, sizeof(*ddb)); 3807 3808 for_each_intel_crtc(&dev_priv->drm, crtc) { 3809 enum intel_display_power_domain power_domain; 3810 enum plane_id plane_id; 3811 enum i915_pipe pipe = crtc->pipe; 3812 3813 power_domain = POWER_DOMAIN_PIPE(pipe); 3814 if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) 3815 continue; 3816 3817 for_each_plane_id_on_crtc(crtc, plane_id) { 3818 u32 val; 3819 3820 if (plane_id != PLANE_CURSOR) 3821 val = I915_READ(PLANE_BUF_CFG(pipe, plane_id)); 3822 else 3823 val = I915_READ(CUR_BUF_CFG(pipe)); 3824 3825 skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane_id], val); 3826 } 3827 3828 intel_display_power_put(dev_priv, power_domain); 3829 } 3830 } 3831 3832 /* 3833 * Determines the downscale amount of a plane for the purposes of watermark calculations. 3834 * The bspec defines downscale amount as: 3835 * 3836 * """ 3837 * Horizontal down scale amount = maximum[1, Horizontal source size / 3838 * Horizontal destination size] 3839 * Vertical down scale amount = maximum[1, Vertical source size / 3840 * Vertical destination size] 3841 * Total down scale amount = Horizontal down scale amount * 3842 * Vertical down scale amount 3843 * """ 3844 * 3845 * Return value is provided in 16.16 fixed point form to retain fractional part. 3846 * Caller should take care of dividing & rounding off the value. 3847 */ 3848 static uint_fixed_16_16_t 3849 skl_plane_downscale_amount(const struct intel_crtc_state *cstate, 3850 const struct intel_plane_state *pstate) 3851 { 3852 struct intel_plane *plane = to_intel_plane(pstate->base.plane); 3853 uint32_t src_w, src_h, dst_w, dst_h; 3854 uint_fixed_16_16_t fp_w_ratio, fp_h_ratio; 3855 uint_fixed_16_16_t downscale_h, downscale_w; 3856 3857 if (WARN_ON(!intel_wm_plane_visible(cstate, pstate))) 3858 return u32_to_fixed16(0); 3859 3860 /* n.b., src is 16.16 fixed point, dst is whole integer */ 3861 if (plane->id == PLANE_CURSOR) { 3862 /* 3863 * Cursors only support 0/180 degree rotation, 3864 * hence no need to account for rotation here. 3865 */ 3866 src_w = pstate->base.src_w >> 16; 3867 src_h = pstate->base.src_h >> 16; 3868 dst_w = pstate->base.crtc_w; 3869 dst_h = pstate->base.crtc_h; 3870 } else { 3871 /* 3872 * Src coordinates are already rotated by 270 degrees for 3873 * the 90/270 degree plane rotation cases (to match the 3874 * GTT mapping), hence no need to account for rotation here. 3875 */ 3876 src_w = drm_rect_width(&pstate->base.src) >> 16; 3877 src_h = drm_rect_height(&pstate->base.src) >> 16; 3878 dst_w = drm_rect_width(&pstate->base.dst); 3879 dst_h = drm_rect_height(&pstate->base.dst); 3880 } 3881 3882 fp_w_ratio = div_fixed16(src_w, dst_w); 3883 fp_h_ratio = div_fixed16(src_h, dst_h); 3884 downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1)); 3885 downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1)); 3886 3887 return mul_fixed16(downscale_w, downscale_h); 3888 } 3889 3890 static uint_fixed_16_16_t 3891 skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state) 3892 { 3893 uint_fixed_16_16_t pipe_downscale = u32_to_fixed16(1); 3894 3895 if (!crtc_state->base.enable) 3896 return pipe_downscale; 3897 3898 if (crtc_state->pch_pfit.enabled) { 3899 uint32_t src_w, src_h, dst_w, dst_h; 3900 uint32_t pfit_size = crtc_state->pch_pfit.size; 3901 uint_fixed_16_16_t fp_w_ratio, fp_h_ratio; 3902 uint_fixed_16_16_t downscale_h, downscale_w; 3903 3904 src_w = crtc_state->pipe_src_w; 3905 src_h = crtc_state->pipe_src_h; 3906 dst_w = pfit_size >> 16; 3907 dst_h = pfit_size & 0xffff; 3908 3909 if (!dst_w || !dst_h) 3910 return pipe_downscale; 3911 3912 fp_w_ratio = div_fixed16(src_w, dst_w); 3913 fp_h_ratio = div_fixed16(src_h, dst_h); 3914 downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1)); 3915 downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1)); 3916 3917 pipe_downscale = mul_fixed16(downscale_w, downscale_h); 3918 } 3919 3920 return pipe_downscale; 3921 } 3922 3923 int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, 3924 struct intel_crtc_state *cstate) 3925 { 3926 struct drm_crtc_state *crtc_state = &cstate->base; 3927 struct drm_atomic_state *state = crtc_state->state; 3928 struct drm_plane *plane; 3929 const struct drm_plane_state *pstate; 3930 struct intel_plane_state *intel_pstate; 3931 int crtc_clock, dotclk; 3932 uint32_t pipe_max_pixel_rate; 3933 uint_fixed_16_16_t pipe_downscale; 3934 uint_fixed_16_16_t max_downscale = u32_to_fixed16(1); 3935 3936 if (!cstate->base.enable) 3937 return 0; 3938 3939 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) { 3940 uint_fixed_16_16_t plane_downscale; 3941 uint_fixed_16_16_t fp_9_div_8 = div_fixed16(9, 8); 3942 int bpp; 3943 3944 if (!intel_wm_plane_visible(cstate, 3945 to_intel_plane_state(pstate))) 3946 continue; 3947 3948 if (WARN_ON(!pstate->fb)) 3949 return -EINVAL; 3950 3951 intel_pstate = to_intel_plane_state(pstate); 3952 plane_downscale = skl_plane_downscale_amount(cstate, 3953 intel_pstate); 3954 bpp = pstate->fb->format->cpp[0] * 8; 3955 if (bpp == 64) 3956 plane_downscale = mul_fixed16(plane_downscale, 3957 fp_9_div_8); 3958 3959 max_downscale = max_fixed16(plane_downscale, max_downscale); 3960 } 3961 pipe_downscale = skl_pipe_downscale_amount(cstate); 3962 3963 pipe_downscale = mul_fixed16(pipe_downscale, max_downscale); 3964 3965 crtc_clock = crtc_state->adjusted_mode.crtc_clock; 3966 dotclk = to_intel_atomic_state(state)->cdclk.logical.cdclk; 3967 3968 if (IS_GEMINILAKE(to_i915(intel_crtc->base.dev))) 3969 dotclk *= 2; 3970 3971 pipe_max_pixel_rate = div_round_up_u32_fixed16(dotclk, pipe_downscale); 3972 3973 if (pipe_max_pixel_rate < crtc_clock) { 3974 DRM_DEBUG_KMS("Max supported pixel clock with scaling exceeded\n"); 3975 return -EINVAL; 3976 } 3977 3978 return 0; 3979 } 3980 3981 static unsigned int 3982 skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, 3983 const struct drm_plane_state *pstate, 3984 int y) 3985 { 3986 struct intel_plane *plane = to_intel_plane(pstate->plane); 3987 struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate); 3988 uint32_t data_rate; 3989 uint32_t width = 0, height = 0; 3990 struct drm_framebuffer *fb; 3991 u32 format; 3992 uint_fixed_16_16_t down_scale_amount; 3993 3994 if (!intel_pstate->base.visible) 3995 return 0; 3996 3997 fb = pstate->fb; 3998 format = fb->format->format; 3999 4000 if (plane->id == PLANE_CURSOR) 4001 return 0; 4002 if (y && format != DRM_FORMAT_NV12) 4003 return 0; 4004 4005 /* 4006 * Src coordinates are already rotated by 270 degrees for 4007 * the 90/270 degree plane rotation cases (to match the 4008 * GTT mapping), hence no need to account for rotation here. 4009 */ 4010 width = drm_rect_width(&intel_pstate->base.src) >> 16; 4011 height = drm_rect_height(&intel_pstate->base.src) >> 16; 4012 4013 /* for planar format */ 4014 if (format == DRM_FORMAT_NV12) { 4015 if (y) /* y-plane data rate */ 4016 data_rate = width * height * 4017 fb->format->cpp[0]; 4018 else /* uv-plane data rate */ 4019 data_rate = (width / 2) * (height / 2) * 4020 fb->format->cpp[1]; 4021 } else { 4022 /* for packed formats */ 4023 data_rate = width * height * fb->format->cpp[0]; 4024 } 4025 4026 down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate); 4027 4028 return mul_round_up_u32_fixed16(data_rate, down_scale_amount); 4029 } 4030 4031 /* 4032 * We don't overflow 32 bits. Worst case is 3 planes enabled, each fetching 4033 * a 8192x4096@32bpp framebuffer: 4034 * 3 * 4096 * 8192 * 4 < 2^32 4035 */ 4036 static unsigned int 4037 skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate, 4038 unsigned *plane_data_rate, 4039 unsigned *plane_y_data_rate) 4040 { 4041 struct drm_crtc_state *cstate = &intel_cstate->base; 4042 struct drm_atomic_state *state = cstate->state; 4043 struct drm_plane *plane; 4044 const struct drm_plane_state *pstate; 4045 unsigned int total_data_rate = 0; 4046 4047 if (WARN_ON(!state)) 4048 return 0; 4049 4050 /* Calculate and cache data rate for each plane */ 4051 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, cstate) { 4052 enum plane_id plane_id = to_intel_plane(plane)->id; 4053 unsigned int rate; 4054 4055 /* packed/uv */ 4056 rate = skl_plane_relative_data_rate(intel_cstate, 4057 pstate, 0); 4058 plane_data_rate[plane_id] = rate; 4059 4060 total_data_rate += rate; 4061 4062 /* y-plane */ 4063 rate = skl_plane_relative_data_rate(intel_cstate, 4064 pstate, 1); 4065 plane_y_data_rate[plane_id] = rate; 4066 4067 total_data_rate += rate; 4068 } 4069 4070 return total_data_rate; 4071 } 4072 4073 static uint16_t 4074 skl_ddb_min_alloc(const struct drm_plane_state *pstate, 4075 const int y) 4076 { 4077 struct drm_framebuffer *fb = pstate->fb; 4078 struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate); 4079 uint32_t src_w, src_h; 4080 uint32_t min_scanlines = 8; 4081 uint8_t plane_bpp; 4082 4083 if (WARN_ON(!fb)) 4084 return 0; 4085 4086 /* For packed formats, no y-plane, return 0 */ 4087 if (y && fb->format->format != DRM_FORMAT_NV12) 4088 return 0; 4089 4090 /* For Non Y-tile return 8-blocks */ 4091 if (fb->modifier != I915_FORMAT_MOD_Y_TILED && 4092 fb->modifier != I915_FORMAT_MOD_Yf_TILED && 4093 fb->modifier != I915_FORMAT_MOD_Y_TILED_CCS && 4094 fb->modifier != I915_FORMAT_MOD_Yf_TILED_CCS) 4095 return 8; 4096 4097 /* 4098 * Src coordinates are already rotated by 270 degrees for 4099 * the 90/270 degree plane rotation cases (to match the 4100 * GTT mapping), hence no need to account for rotation here. 4101 */ 4102 src_w = drm_rect_width(&intel_pstate->base.src) >> 16; 4103 src_h = drm_rect_height(&intel_pstate->base.src) >> 16; 4104 4105 /* Halve UV plane width and height for NV12 */ 4106 if (fb->format->format == DRM_FORMAT_NV12 && !y) { 4107 src_w /= 2; 4108 src_h /= 2; 4109 } 4110 4111 if (fb->format->format == DRM_FORMAT_NV12 && !y) 4112 plane_bpp = fb->format->cpp[1]; 4113 else 4114 plane_bpp = fb->format->cpp[0]; 4115 4116 if (drm_rotation_90_or_270(pstate->rotation)) { 4117 switch (plane_bpp) { 4118 case 1: 4119 min_scanlines = 32; 4120 break; 4121 case 2: 4122 min_scanlines = 16; 4123 break; 4124 case 4: 4125 min_scanlines = 8; 4126 break; 4127 case 8: 4128 min_scanlines = 4; 4129 break; 4130 default: 4131 WARN(1, "Unsupported pixel depth %u for rotation", 4132 plane_bpp); 4133 min_scanlines = 32; 4134 } 4135 } 4136 4137 return DIV_ROUND_UP((4 * src_w * plane_bpp), 512) * min_scanlines/4 + 3; 4138 } 4139 4140 static void 4141 skl_ddb_calc_min(const struct intel_crtc_state *cstate, int num_active, 4142 uint16_t *minimum, uint16_t *y_minimum) 4143 { 4144 const struct drm_plane_state *pstate; 4145 struct drm_plane *plane; 4146 4147 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, &cstate->base) { 4148 enum plane_id plane_id = to_intel_plane(plane)->id; 4149 4150 if (plane_id == PLANE_CURSOR) 4151 continue; 4152 4153 if (!pstate->visible) 4154 continue; 4155 4156 minimum[plane_id] = skl_ddb_min_alloc(pstate, 0); 4157 y_minimum[plane_id] = skl_ddb_min_alloc(pstate, 1); 4158 } 4159 4160 minimum[PLANE_CURSOR] = skl_cursor_allocation(num_active); 4161 } 4162 4163 static int 4164 skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, 4165 struct skl_ddb_allocation *ddb /* out */) 4166 { 4167 struct drm_atomic_state *state = cstate->base.state; 4168 struct drm_crtc *crtc = cstate->base.crtc; 4169 struct drm_device *dev = crtc->dev; 4170 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 4171 enum i915_pipe pipe = intel_crtc->pipe; 4172 struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb; 4173 uint16_t alloc_size, start; 4174 uint16_t minimum[I915_MAX_PLANES] = {}; 4175 uint16_t y_minimum[I915_MAX_PLANES] = {}; 4176 unsigned int total_data_rate; 4177 enum plane_id plane_id; 4178 int num_active; 4179 unsigned plane_data_rate[I915_MAX_PLANES] = {}; 4180 unsigned plane_y_data_rate[I915_MAX_PLANES] = {}; 4181 uint16_t total_min_blocks = 0; 4182 4183 /* Clear the partitioning for disabled planes. */ 4184 memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe])); 4185 memset(ddb->y_plane[pipe], 0, sizeof(ddb->y_plane[pipe])); 4186 4187 if (WARN_ON(!state)) 4188 return 0; 4189 4190 if (!cstate->base.active) { 4191 alloc->start = alloc->end = 0; 4192 return 0; 4193 } 4194 4195 skl_ddb_get_pipe_allocation_limits(dev, cstate, alloc, &num_active); 4196 alloc_size = skl_ddb_entry_size(alloc); 4197 if (alloc_size == 0) 4198 return 0; 4199 4200 skl_ddb_calc_min(cstate, num_active, minimum, y_minimum); 4201 4202 /* 4203 * 1. Allocate the mininum required blocks for each active plane 4204 * and allocate the cursor, it doesn't require extra allocation 4205 * proportional to the data rate. 4206 */ 4207 4208 for_each_plane_id_on_crtc(intel_crtc, plane_id) { 4209 total_min_blocks += minimum[plane_id]; 4210 total_min_blocks += y_minimum[plane_id]; 4211 } 4212 4213 if (total_min_blocks > alloc_size) { 4214 DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations"); 4215 DRM_DEBUG_KMS("minimum required %d/%d\n", total_min_blocks, 4216 alloc_size); 4217 return -EINVAL; 4218 } 4219 4220 alloc_size -= total_min_blocks; 4221 ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - minimum[PLANE_CURSOR]; 4222 ddb->plane[pipe][PLANE_CURSOR].end = alloc->end; 4223 4224 /* 4225 * 2. Distribute the remaining space in proportion to the amount of 4226 * data each plane needs to fetch from memory. 4227 * 4228 * FIXME: we may not allocate every single block here. 4229 */ 4230 total_data_rate = skl_get_total_relative_data_rate(cstate, 4231 plane_data_rate, 4232 plane_y_data_rate); 4233 if (total_data_rate == 0) 4234 return 0; 4235 4236 start = alloc->start; 4237 for_each_plane_id_on_crtc(intel_crtc, plane_id) { 4238 unsigned int data_rate, y_data_rate; 4239 uint16_t plane_blocks, y_plane_blocks = 0; 4240 4241 if (plane_id == PLANE_CURSOR) 4242 continue; 4243 4244 data_rate = plane_data_rate[plane_id]; 4245 4246 /* 4247 * allocation for (packed formats) or (uv-plane part of planar format): 4248 * promote the expression to 64 bits to avoid overflowing, the 4249 * result is < available as data_rate / total_data_rate < 1 4250 */ 4251 plane_blocks = minimum[plane_id]; 4252 plane_blocks += div_u64((uint64_t)alloc_size * data_rate, 4253 total_data_rate); 4254 4255 /* Leave disabled planes at (0,0) */ 4256 if (data_rate) { 4257 ddb->plane[pipe][plane_id].start = start; 4258 ddb->plane[pipe][plane_id].end = start + plane_blocks; 4259 } 4260 4261 start += plane_blocks; 4262 4263 /* 4264 * allocation for y_plane part of planar format: 4265 */ 4266 y_data_rate = plane_y_data_rate[plane_id]; 4267 4268 y_plane_blocks = y_minimum[plane_id]; 4269 y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate, 4270 total_data_rate); 4271 4272 if (y_data_rate) { 4273 ddb->y_plane[pipe][plane_id].start = start; 4274 ddb->y_plane[pipe][plane_id].end = start + y_plane_blocks; 4275 } 4276 4277 start += y_plane_blocks; 4278 } 4279 4280 return 0; 4281 } 4282 4283 /* 4284 * The max latency should be 257 (max the punit can code is 255 and we add 2us 4285 * for the read latency) and cpp should always be <= 8, so that 4286 * should allow pixel_rate up to ~2 GHz which seems sufficient since max 4287 * 2xcdclk is 1350 MHz and the pixel rate should never exceed that. 4288 */ 4289 static uint_fixed_16_16_t 4290 skl_wm_method1(const struct drm_i915_private *dev_priv, uint32_t pixel_rate, 4291 uint8_t cpp, uint32_t latency) 4292 { 4293 uint32_t wm_intermediate_val; 4294 uint_fixed_16_16_t ret; 4295 4296 if (latency == 0) 4297 return FP_16_16_MAX; 4298 4299 wm_intermediate_val = latency * pixel_rate * cpp; 4300 ret = div_fixed16(wm_intermediate_val, 1000 * 512); 4301 4302 if (INTEL_GEN(dev_priv) >= 10) 4303 ret = add_fixed16_u32(ret, 1); 4304 4305 return ret; 4306 } 4307 4308 static uint_fixed_16_16_t skl_wm_method2(uint32_t pixel_rate, 4309 uint32_t pipe_htotal, 4310 uint32_t latency, 4311 uint_fixed_16_16_t plane_blocks_per_line) 4312 { 4313 uint32_t wm_intermediate_val; 4314 uint_fixed_16_16_t ret; 4315 4316 if (latency == 0) 4317 return FP_16_16_MAX; 4318 4319 wm_intermediate_val = latency * pixel_rate; 4320 wm_intermediate_val = DIV_ROUND_UP(wm_intermediate_val, 4321 pipe_htotal * 1000); 4322 ret = mul_u32_fixed16(wm_intermediate_val, plane_blocks_per_line); 4323 return ret; 4324 } 4325 4326 static uint_fixed_16_16_t 4327 intel_get_linetime_us(struct intel_crtc_state *cstate) 4328 { 4329 uint32_t pixel_rate; 4330 uint32_t crtc_htotal; 4331 uint_fixed_16_16_t linetime_us; 4332 4333 if (!cstate->base.active) 4334 return u32_to_fixed16(0); 4335 4336 pixel_rate = cstate->pixel_rate; 4337 4338 if (WARN_ON(pixel_rate == 0)) 4339 return u32_to_fixed16(0); 4340 4341 crtc_htotal = cstate->base.adjusted_mode.crtc_htotal; 4342 linetime_us = div_fixed16(crtc_htotal * 1000, pixel_rate); 4343 4344 return linetime_us; 4345 } 4346 4347 static uint32_t 4348 skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate, 4349 const struct intel_plane_state *pstate) 4350 { 4351 uint64_t adjusted_pixel_rate; 4352 uint_fixed_16_16_t downscale_amount; 4353 4354 /* Shouldn't reach here on disabled planes... */ 4355 if (WARN_ON(!intel_wm_plane_visible(cstate, pstate))) 4356 return 0; 4357 4358 /* 4359 * Adjusted plane pixel rate is just the pipe's adjusted pixel rate 4360 * with additional adjustments for plane-specific scaling. 4361 */ 4362 adjusted_pixel_rate = cstate->pixel_rate; 4363 downscale_amount = skl_plane_downscale_amount(cstate, pstate); 4364 4365 return mul_round_up_u32_fixed16(adjusted_pixel_rate, 4366 downscale_amount); 4367 } 4368 4369 static int 4370 skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv, 4371 struct intel_crtc_state *cstate, 4372 const struct intel_plane_state *intel_pstate, 4373 struct skl_wm_params *wp) 4374 { 4375 struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane); 4376 const struct drm_plane_state *pstate = &intel_pstate->base; 4377 const struct drm_framebuffer *fb = pstate->fb; 4378 uint32_t interm_pbpl; 4379 struct intel_atomic_state *state = 4380 to_intel_atomic_state(cstate->base.state); 4381 bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); 4382 4383 if (!intel_wm_plane_visible(cstate, intel_pstate)) 4384 return 0; 4385 4386 wp->y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED || 4387 fb->modifier == I915_FORMAT_MOD_Yf_TILED || 4388 fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || 4389 fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS; 4390 wp->x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED; 4391 wp->rc_surface = fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || 4392 fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS; 4393 4394 if (plane->id == PLANE_CURSOR) { 4395 wp->width = intel_pstate->base.crtc_w; 4396 } else { 4397 /* 4398 * Src coordinates are already rotated by 270 degrees for 4399 * the 90/270 degree plane rotation cases (to match the 4400 * GTT mapping), hence no need to account for rotation here. 4401 */ 4402 wp->width = drm_rect_width(&intel_pstate->base.src) >> 16; 4403 } 4404 4405 wp->cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] : 4406 fb->format->cpp[0]; 4407 wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, 4408 intel_pstate); 4409 4410 if (drm_rotation_90_or_270(pstate->rotation)) { 4411 4412 switch (wp->cpp) { 4413 case 1: 4414 wp->y_min_scanlines = 16; 4415 break; 4416 case 2: 4417 wp->y_min_scanlines = 8; 4418 break; 4419 case 4: 4420 wp->y_min_scanlines = 4; 4421 break; 4422 default: 4423 MISSING_CASE(wp->cpp); 4424 return -EINVAL; 4425 } 4426 } else { 4427 wp->y_min_scanlines = 4; 4428 } 4429 4430 if (apply_memory_bw_wa) 4431 wp->y_min_scanlines *= 2; 4432 4433 wp->plane_bytes_per_line = wp->width * wp->cpp; 4434 if (wp->y_tiled) { 4435 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line * 4436 wp->y_min_scanlines, 512); 4437 4438 if (INTEL_GEN(dev_priv) >= 10) 4439 interm_pbpl++; 4440 4441 wp->plane_blocks_per_line = div_fixed16(interm_pbpl, 4442 wp->y_min_scanlines); 4443 } else if (wp->x_tiled && IS_GEN9(dev_priv)) { 4444 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512); 4445 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl); 4446 } else { 4447 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512) + 1; 4448 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl); 4449 } 4450 4451 wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines, 4452 wp->plane_blocks_per_line); 4453 wp->linetime_us = fixed16_to_u32_round_up( 4454 intel_get_linetime_us(cstate)); 4455 4456 return 0; 4457 } 4458 4459 static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, 4460 struct intel_crtc_state *cstate, 4461 const struct intel_plane_state *intel_pstate, 4462 uint16_t ddb_allocation, 4463 int level, 4464 const struct skl_wm_params *wp, 4465 uint16_t *out_blocks, /* out */ 4466 uint8_t *out_lines, /* out */ 4467 bool *enabled /* out */) 4468 { 4469 const struct drm_plane_state *pstate = &intel_pstate->base; 4470 uint32_t latency = dev_priv->wm.skl_latency[level]; 4471 uint_fixed_16_16_t method1, method2; 4472 uint_fixed_16_16_t selected_result; 4473 uint32_t res_blocks, res_lines; 4474 struct intel_atomic_state *state = 4475 to_intel_atomic_state(cstate->base.state); 4476 bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); 4477 4478 if (latency == 0 || 4479 !intel_wm_plane_visible(cstate, intel_pstate)) { 4480 *enabled = false; 4481 return 0; 4482 } 4483 4484 /* Display WA #1141: kbl,cfl */ 4485 if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) || 4486 IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0)) && 4487 dev_priv->ipc_enabled) 4488 latency += 4; 4489 4490 if (apply_memory_bw_wa && wp->x_tiled) 4491 latency += 15; 4492 4493 method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate, 4494 wp->cpp, latency); 4495 method2 = skl_wm_method2(wp->plane_pixel_rate, 4496 cstate->base.adjusted_mode.crtc_htotal, 4497 latency, 4498 wp->plane_blocks_per_line); 4499 4500 if (wp->y_tiled) { 4501 selected_result = max_fixed16(method2, wp->y_tile_minimum); 4502 } else { 4503 if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal / 4504 512 < 1) && (wp->plane_bytes_per_line / 512 < 1)) 4505 selected_result = method2; 4506 else if (ddb_allocation >= 4507 fixed16_to_u32_round_up(wp->plane_blocks_per_line)) 4508 selected_result = min_fixed16(method1, method2); 4509 else if (latency >= wp->linetime_us) 4510 selected_result = min_fixed16(method1, method2); 4511 else 4512 selected_result = method1; 4513 } 4514 4515 res_blocks = fixed16_to_u32_round_up(selected_result) + 1; 4516 res_lines = div_round_up_fixed16(selected_result, 4517 wp->plane_blocks_per_line); 4518 4519 /* Display WA #1125: skl,bxt,kbl,glk */ 4520 if (level == 0 && wp->rc_surface) 4521 res_blocks += fixed16_to_u32_round_up(wp->y_tile_minimum); 4522 4523 /* Display WA #1126: skl,bxt,kbl,glk */ 4524 if (level >= 1 && level <= 7) { 4525 if (wp->y_tiled) { 4526 res_blocks += fixed16_to_u32_round_up( 4527 wp->y_tile_minimum); 4528 res_lines += wp->y_min_scanlines; 4529 } else { 4530 res_blocks++; 4531 } 4532 } 4533 4534 if (res_blocks >= ddb_allocation || res_lines > 31) { 4535 *enabled = false; 4536 4537 /* 4538 * If there are no valid level 0 watermarks, then we can't 4539 * support this display configuration. 4540 */ 4541 if (level) { 4542 return 0; 4543 } else { 4544 struct drm_plane *plane = pstate->plane; 4545 4546 DRM_DEBUG_KMS("Requested display configuration exceeds system watermark limitations\n"); 4547 DRM_DEBUG_KMS("[PLANE:%d:%s] blocks required = %u/%u, lines required = %u/31\n", 4548 plane->base.id, plane->name, 4549 res_blocks, ddb_allocation, res_lines); 4550 return -EINVAL; 4551 } 4552 } 4553 4554 *out_blocks = res_blocks; 4555 *out_lines = res_lines; 4556 *enabled = true; 4557 4558 return 0; 4559 } 4560 4561 static int 4562 skl_compute_wm_levels(const struct drm_i915_private *dev_priv, 4563 struct skl_ddb_allocation *ddb, 4564 struct intel_crtc_state *cstate, 4565 const struct intel_plane_state *intel_pstate, 4566 const struct skl_wm_params *wm_params, 4567 struct skl_plane_wm *wm) 4568 { 4569 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 4570 struct drm_plane *plane = intel_pstate->base.plane; 4571 struct intel_plane *intel_plane = to_intel_plane(plane); 4572 uint16_t ddb_blocks; 4573 enum i915_pipe pipe = intel_crtc->pipe; 4574 int level, max_level = ilk_wm_max_level(dev_priv); 4575 int ret; 4576 4577 if (WARN_ON(!intel_pstate->base.fb)) 4578 return -EINVAL; 4579 4580 ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][intel_plane->id]); 4581 4582 for (level = 0; level <= max_level; level++) { 4583 struct skl_wm_level *result = &wm->wm[level]; 4584 4585 ret = skl_compute_plane_wm(dev_priv, 4586 cstate, 4587 intel_pstate, 4588 ddb_blocks, 4589 level, 4590 wm_params, 4591 &result->plane_res_b, 4592 &result->plane_res_l, 4593 &result->plane_en); 4594 if (ret) 4595 return ret; 4596 } 4597 4598 return 0; 4599 } 4600 4601 static uint32_t 4602 skl_compute_linetime_wm(struct intel_crtc_state *cstate) 4603 { 4604 struct drm_atomic_state *state = cstate->base.state; 4605 struct drm_i915_private *dev_priv = to_i915(state->dev); 4606 uint_fixed_16_16_t linetime_us; 4607 uint32_t linetime_wm; 4608 4609 linetime_us = intel_get_linetime_us(cstate); 4610 4611 if (is_fixed16_zero(linetime_us)) 4612 return 0; 4613 4614 linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us)); 4615 4616 /* Display WA #1135: bxt:ALL GLK:ALL */ 4617 if ((IS_BROXTON(dev_priv) || IS_GEMINILAKE(dev_priv)) && 4618 dev_priv->ipc_enabled) 4619 linetime_wm /= 2; 4620 4621 return linetime_wm; 4622 } 4623 4624 static void skl_compute_transition_wm(struct intel_crtc_state *cstate, 4625 struct skl_wm_params *wp, 4626 struct skl_wm_level *wm_l0, 4627 uint16_t ddb_allocation, 4628 struct skl_wm_level *trans_wm /* out */) 4629 { 4630 struct drm_device *dev = cstate->base.crtc->dev; 4631 const struct drm_i915_private *dev_priv = to_i915(dev); 4632 uint16_t trans_min, trans_y_tile_min; 4633 const uint16_t trans_amount = 10; /* This is configurable amount */ 4634 uint16_t trans_offset_b, res_blocks; 4635 4636 if (!cstate->base.active) 4637 goto exit; 4638 4639 /* Transition WM are not recommended by HW team for GEN9 */ 4640 if (INTEL_GEN(dev_priv) <= 9) 4641 goto exit; 4642 4643 /* Transition WM don't make any sense if ipc is disabled */ 4644 if (!dev_priv->ipc_enabled) 4645 goto exit; 4646 4647 if (INTEL_GEN(dev_priv) >= 10) 4648 trans_min = 4; 4649 4650 trans_offset_b = trans_min + trans_amount; 4651 4652 if (wp->y_tiled) { 4653 trans_y_tile_min = (uint16_t) mul_round_up_u32_fixed16(2, 4654 wp->y_tile_minimum); 4655 res_blocks = max(wm_l0->plane_res_b, trans_y_tile_min) + 4656 trans_offset_b; 4657 } else { 4658 res_blocks = wm_l0->plane_res_b + trans_offset_b; 4659 4660 /* WA BUG:1938466 add one block for non y-tile planes */ 4661 if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0)) 4662 res_blocks += 1; 4663 4664 } 4665 4666 res_blocks += 1; 4667 4668 if (res_blocks < ddb_allocation) { 4669 trans_wm->plane_res_b = res_blocks; 4670 trans_wm->plane_en = true; 4671 return; 4672 } 4673 4674 exit: 4675 trans_wm->plane_en = false; 4676 } 4677 4678 static int skl_build_pipe_wm(struct intel_crtc_state *cstate, 4679 struct skl_ddb_allocation *ddb, 4680 struct skl_pipe_wm *pipe_wm) 4681 { 4682 struct drm_device *dev = cstate->base.crtc->dev; 4683 struct drm_crtc_state *crtc_state = &cstate->base; 4684 const struct drm_i915_private *dev_priv = to_i915(dev); 4685 struct drm_plane *plane; 4686 const struct drm_plane_state *pstate; 4687 struct skl_plane_wm *wm; 4688 int ret; 4689 4690 /* 4691 * We'll only calculate watermarks for planes that are actually 4692 * enabled, so make sure all other planes are set as disabled. 4693 */ 4694 memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes)); 4695 4696 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) { 4697 const struct intel_plane_state *intel_pstate = 4698 to_intel_plane_state(pstate); 4699 enum plane_id plane_id = to_intel_plane(plane)->id; 4700 struct skl_wm_params wm_params; 4701 enum i915_pipe pipe = to_intel_crtc(cstate->base.crtc)->pipe; 4702 uint16_t ddb_blocks; 4703 4704 wm = &pipe_wm->planes[plane_id]; 4705 ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][plane_id]); 4706 memset(&wm_params, 0, sizeof(struct skl_wm_params)); 4707 4708 ret = skl_compute_plane_wm_params(dev_priv, cstate, 4709 intel_pstate, &wm_params); 4710 if (ret) 4711 return ret; 4712 4713 ret = skl_compute_wm_levels(dev_priv, ddb, cstate, 4714 intel_pstate, &wm_params, wm); 4715 if (ret) 4716 return ret; 4717 skl_compute_transition_wm(cstate, &wm_params, &wm->wm[0], 4718 ddb_blocks, &wm->trans_wm); 4719 } 4720 pipe_wm->linetime = skl_compute_linetime_wm(cstate); 4721 4722 return 0; 4723 } 4724 4725 static void skl_ddb_entry_write(struct drm_i915_private *dev_priv, 4726 i915_reg_t reg, 4727 const struct skl_ddb_entry *entry) 4728 { 4729 if (entry->end) 4730 I915_WRITE(reg, (entry->end - 1) << 16 | entry->start); 4731 else 4732 I915_WRITE(reg, 0); 4733 } 4734 4735 static void skl_write_wm_level(struct drm_i915_private *dev_priv, 4736 i915_reg_t reg, 4737 const struct skl_wm_level *level) 4738 { 4739 uint32_t val = 0; 4740 4741 if (level->plane_en) { 4742 val |= PLANE_WM_EN; 4743 val |= level->plane_res_b; 4744 val |= level->plane_res_l << PLANE_WM_LINES_SHIFT; 4745 } 4746 4747 I915_WRITE(reg, val); 4748 } 4749 4750 static void skl_write_plane_wm(struct intel_crtc *intel_crtc, 4751 const struct skl_plane_wm *wm, 4752 const struct skl_ddb_allocation *ddb, 4753 enum plane_id plane_id) 4754 { 4755 struct drm_crtc *crtc = &intel_crtc->base; 4756 struct drm_device *dev = crtc->dev; 4757 struct drm_i915_private *dev_priv = to_i915(dev); 4758 int level, max_level = ilk_wm_max_level(dev_priv); 4759 enum i915_pipe pipe = intel_crtc->pipe; 4760 4761 for (level = 0; level <= max_level; level++) { 4762 skl_write_wm_level(dev_priv, PLANE_WM(pipe, plane_id, level), 4763 &wm->wm[level]); 4764 } 4765 skl_write_wm_level(dev_priv, PLANE_WM_TRANS(pipe, plane_id), 4766 &wm->trans_wm); 4767 4768 skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane_id), 4769 &ddb->plane[pipe][plane_id]); 4770 skl_ddb_entry_write(dev_priv, PLANE_NV12_BUF_CFG(pipe, plane_id), 4771 &ddb->y_plane[pipe][plane_id]); 4772 } 4773 4774 static void skl_write_cursor_wm(struct intel_crtc *intel_crtc, 4775 const struct skl_plane_wm *wm, 4776 const struct skl_ddb_allocation *ddb) 4777 { 4778 struct drm_crtc *crtc = &intel_crtc->base; 4779 struct drm_device *dev = crtc->dev; 4780 struct drm_i915_private *dev_priv = to_i915(dev); 4781 int level, max_level = ilk_wm_max_level(dev_priv); 4782 enum i915_pipe pipe = intel_crtc->pipe; 4783 4784 for (level = 0; level <= max_level; level++) { 4785 skl_write_wm_level(dev_priv, CUR_WM(pipe, level), 4786 &wm->wm[level]); 4787 } 4788 skl_write_wm_level(dev_priv, CUR_WM_TRANS(pipe), &wm->trans_wm); 4789 4790 skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe), 4791 &ddb->plane[pipe][PLANE_CURSOR]); 4792 } 4793 4794 bool skl_wm_level_equals(const struct skl_wm_level *l1, 4795 const struct skl_wm_level *l2) 4796 { 4797 if (l1->plane_en != l2->plane_en) 4798 return false; 4799 4800 /* If both planes aren't enabled, the rest shouldn't matter */ 4801 if (!l1->plane_en) 4802 return true; 4803 4804 return (l1->plane_res_l == l2->plane_res_l && 4805 l1->plane_res_b == l2->plane_res_b); 4806 } 4807 4808 static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a, 4809 const struct skl_ddb_entry *b) 4810 { 4811 return a->start < b->end && b->start < a->end; 4812 } 4813 4814 bool skl_ddb_allocation_overlaps(struct drm_i915_private *dev_priv, 4815 const struct skl_ddb_entry **entries, 4816 const struct skl_ddb_entry *ddb, 4817 int ignore) 4818 { 4819 enum i915_pipe pipe; 4820 4821 for_each_pipe(dev_priv, pipe) { 4822 if (pipe != ignore && entries[pipe] && 4823 skl_ddb_entries_overlap(ddb, entries[pipe])) 4824 return true; 4825 } 4826 4827 return false; 4828 } 4829 4830 static int skl_update_pipe_wm(struct drm_crtc_state *cstate, 4831 const struct skl_pipe_wm *old_pipe_wm, 4832 struct skl_pipe_wm *pipe_wm, /* out */ 4833 struct skl_ddb_allocation *ddb, /* out */ 4834 bool *changed /* out */) 4835 { 4836 struct intel_crtc_state *intel_cstate = to_intel_crtc_state(cstate); 4837 int ret; 4838 4839 ret = skl_build_pipe_wm(intel_cstate, ddb, pipe_wm); 4840 if (ret) 4841 return ret; 4842 4843 if (!memcmp(old_pipe_wm, pipe_wm, sizeof(*pipe_wm))) 4844 *changed = false; 4845 else 4846 *changed = true; 4847 4848 return 0; 4849 } 4850 4851 static uint32_t 4852 pipes_modified(struct drm_atomic_state *state) 4853 { 4854 struct drm_crtc *crtc; 4855 struct drm_crtc_state *cstate; 4856 uint32_t i, ret = 0; 4857 4858 for_each_new_crtc_in_state(state, crtc, cstate, i) 4859 ret |= drm_crtc_mask(crtc); 4860 4861 return ret; 4862 } 4863 4864 static int 4865 skl_ddb_add_affected_planes(struct intel_crtc_state *cstate) 4866 { 4867 struct drm_atomic_state *state = cstate->base.state; 4868 struct drm_device *dev = state->dev; 4869 struct drm_crtc *crtc = cstate->base.crtc; 4870 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 4871 struct drm_i915_private *dev_priv = to_i915(dev); 4872 struct intel_atomic_state *intel_state = to_intel_atomic_state(state); 4873 struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb; 4874 struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb; 4875 struct drm_plane_state *plane_state; 4876 struct drm_plane *plane; 4877 enum i915_pipe pipe = intel_crtc->pipe; 4878 4879 WARN_ON(!drm_atomic_get_existing_crtc_state(state, crtc)); 4880 4881 drm_for_each_plane_mask(plane, dev, cstate->base.plane_mask) { 4882 enum plane_id plane_id = to_intel_plane(plane)->id; 4883 4884 if (skl_ddb_entry_equal(&cur_ddb->plane[pipe][plane_id], 4885 &new_ddb->plane[pipe][plane_id]) && 4886 skl_ddb_entry_equal(&cur_ddb->y_plane[pipe][plane_id], 4887 &new_ddb->y_plane[pipe][plane_id])) 4888 continue; 4889 4890 plane_state = drm_atomic_get_plane_state(state, plane); 4891 if (IS_ERR(plane_state)) 4892 return PTR_ERR(plane_state); 4893 } 4894 4895 return 0; 4896 } 4897 4898 static int 4899 skl_compute_ddb(struct drm_atomic_state *state) 4900 { 4901 struct drm_device *dev = state->dev; 4902 struct drm_i915_private *dev_priv = to_i915(dev); 4903 struct intel_atomic_state *intel_state = to_intel_atomic_state(state); 4904 struct intel_crtc *intel_crtc; 4905 struct skl_ddb_allocation *ddb = &intel_state->wm_results.ddb; 4906 uint32_t realloc_pipes = pipes_modified(state); 4907 int ret; 4908 4909 /* 4910 * If this is our first atomic update following hardware readout, 4911 * we can't trust the DDB that the BIOS programmed for us. Let's 4912 * pretend that all pipes switched active status so that we'll 4913 * ensure a full DDB recompute. 4914 */ 4915 if (dev_priv->wm.distrust_bios_wm) { 4916 ret = drm_modeset_lock(&dev->mode_config.connection_mutex, 4917 state->acquire_ctx); 4918 if (ret) 4919 return ret; 4920 4921 intel_state->active_pipe_changes = ~0; 4922 4923 /* 4924 * We usually only initialize intel_state->active_crtcs if we 4925 * we're doing a modeset; make sure this field is always 4926 * initialized during the sanitization process that happens 4927 * on the first commit too. 4928 */ 4929 if (!intel_state->modeset) 4930 intel_state->active_crtcs = dev_priv->active_crtcs; 4931 } 4932 4933 /* 4934 * If the modeset changes which CRTC's are active, we need to 4935 * recompute the DDB allocation for *all* active pipes, even 4936 * those that weren't otherwise being modified in any way by this 4937 * atomic commit. Due to the shrinking of the per-pipe allocations 4938 * when new active CRTC's are added, it's possible for a pipe that 4939 * we were already using and aren't changing at all here to suddenly 4940 * become invalid if its DDB needs exceeds its new allocation. 4941 * 4942 * Note that if we wind up doing a full DDB recompute, we can't let 4943 * any other display updates race with this transaction, so we need 4944 * to grab the lock on *all* CRTC's. 4945 */ 4946 if (intel_state->active_pipe_changes) { 4947 realloc_pipes = ~0; 4948 intel_state->wm_results.dirty_pipes = ~0; 4949 } 4950 4951 /* 4952 * We're not recomputing for the pipes not included in the commit, so 4953 * make sure we start with the current state. 4954 */ 4955 memcpy(ddb, &dev_priv->wm.skl_hw.ddb, sizeof(*ddb)); 4956 4957 for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) { 4958 struct intel_crtc_state *cstate; 4959 4960 cstate = intel_atomic_get_crtc_state(state, intel_crtc); 4961 if (IS_ERR(cstate)) 4962 return PTR_ERR(cstate); 4963 4964 ret = skl_allocate_pipe_ddb(cstate, ddb); 4965 if (ret) 4966 return ret; 4967 4968 ret = skl_ddb_add_affected_planes(cstate); 4969 if (ret) 4970 return ret; 4971 } 4972 4973 return 0; 4974 } 4975 4976 static void 4977 skl_copy_wm_for_pipe(struct skl_wm_values *dst, 4978 struct skl_wm_values *src, 4979 enum i915_pipe pipe) 4980 { 4981 memcpy(dst->ddb.y_plane[pipe], src->ddb.y_plane[pipe], 4982 sizeof(dst->ddb.y_plane[pipe])); 4983 memcpy(dst->ddb.plane[pipe], src->ddb.plane[pipe], 4984 sizeof(dst->ddb.plane[pipe])); 4985 } 4986 4987 static void 4988 skl_print_wm_changes(const struct drm_atomic_state *state) 4989 { 4990 const struct drm_device *dev = state->dev; 4991 const struct drm_i915_private *dev_priv = to_i915(dev); 4992 const struct intel_atomic_state *intel_state = 4993 to_intel_atomic_state(state); 4994 const struct drm_crtc *crtc; 4995 const struct drm_crtc_state *cstate; 4996 const struct intel_plane *intel_plane; 4997 const struct skl_ddb_allocation *old_ddb = &dev_priv->wm.skl_hw.ddb; 4998 const struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb; 4999 int i; 5000 5001 for_each_new_crtc_in_state(state, crtc, cstate, i) { 5002 const struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 5003 enum i915_pipe pipe = intel_crtc->pipe; 5004 5005 for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { 5006 enum plane_id plane_id = intel_plane->id; 5007 const struct skl_ddb_entry *old, *new; 5008 5009 old = &old_ddb->plane[pipe][plane_id]; 5010 new = &new_ddb->plane[pipe][plane_id]; 5011 5012 if (skl_ddb_entry_equal(old, new)) 5013 continue; 5014 5015 DRM_DEBUG_ATOMIC("[PLANE:%d:%s] ddb (%d - %d) -> (%d - %d)\n", 5016 intel_plane->base.base.id, 5017 intel_plane->base.name, 5018 old->start, old->end, 5019 new->start, new->end); 5020 } 5021 } 5022 } 5023 5024 static int 5025 skl_compute_wm(struct drm_atomic_state *state) 5026 { 5027 struct drm_crtc *crtc; 5028 struct drm_crtc_state *cstate; 5029 struct intel_atomic_state *intel_state = to_intel_atomic_state(state); 5030 struct skl_wm_values *results = &intel_state->wm_results; 5031 struct drm_device *dev = state->dev; 5032 struct skl_pipe_wm *pipe_wm; 5033 bool changed = false; 5034 int ret, i; 5035 5036 /* 5037 * When we distrust bios wm we always need to recompute to set the 5038 * expected DDB allocations for each CRTC. 5039 */ 5040 if (to_i915(dev)->wm.distrust_bios_wm) 5041 changed = true; 5042 5043 /* 5044 * If this transaction isn't actually touching any CRTC's, don't 5045 * bother with watermark calculation. Note that if we pass this 5046 * test, we're guaranteed to hold at least one CRTC state mutex, 5047 * which means we can safely use values like dev_priv->active_crtcs 5048 * since any racing commits that want to update them would need to 5049 * hold _all_ CRTC state mutexes. 5050 */ 5051 for_each_new_crtc_in_state(state, crtc, cstate, i) 5052 changed = true; 5053 5054 if (!changed) 5055 return 0; 5056 5057 /* Clear all dirty flags */ 5058 results->dirty_pipes = 0; 5059 5060 ret = skl_compute_ddb(state); 5061 if (ret) 5062 return ret; 5063 5064 /* 5065 * Calculate WM's for all pipes that are part of this transaction. 5066 * Note that the DDB allocation above may have added more CRTC's that 5067 * weren't otherwise being modified (and set bits in dirty_pipes) if 5068 * pipe allocations had to change. 5069 * 5070 * FIXME: Now that we're doing this in the atomic check phase, we 5071 * should allow skl_update_pipe_wm() to return failure in cases where 5072 * no suitable watermark values can be found. 5073 */ 5074 for_each_new_crtc_in_state(state, crtc, cstate, i) { 5075 struct intel_crtc_state *intel_cstate = 5076 to_intel_crtc_state(cstate); 5077 const struct skl_pipe_wm *old_pipe_wm = 5078 &to_intel_crtc_state(crtc->state)->wm.skl.optimal; 5079 5080 pipe_wm = &intel_cstate->wm.skl.optimal; 5081 ret = skl_update_pipe_wm(cstate, old_pipe_wm, pipe_wm, 5082 &results->ddb, &changed); 5083 if (ret) 5084 return ret; 5085 5086 if (changed) 5087 results->dirty_pipes |= drm_crtc_mask(crtc); 5088 5089 if ((results->dirty_pipes & drm_crtc_mask(crtc)) == 0) 5090 /* This pipe's WM's did not change */ 5091 continue; 5092 5093 intel_cstate->update_wm_pre = true; 5094 } 5095 5096 skl_print_wm_changes(state); 5097 5098 return 0; 5099 } 5100 5101 static void skl_atomic_update_crtc_wm(struct intel_atomic_state *state, 5102 struct intel_crtc_state *cstate) 5103 { 5104 struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc); 5105 struct drm_i915_private *dev_priv = to_i915(state->base.dev); 5106 struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal; 5107 const struct skl_ddb_allocation *ddb = &state->wm_results.ddb; 5108 enum i915_pipe pipe = crtc->pipe; 5109 enum plane_id plane_id; 5110 5111 if (!(state->wm_results.dirty_pipes & drm_crtc_mask(&crtc->base))) 5112 return; 5113 5114 I915_WRITE(PIPE_WM_LINETIME(pipe), pipe_wm->linetime); 5115 5116 for_each_plane_id_on_crtc(crtc, plane_id) { 5117 if (plane_id != PLANE_CURSOR) 5118 skl_write_plane_wm(crtc, &pipe_wm->planes[plane_id], 5119 ddb, plane_id); 5120 else 5121 skl_write_cursor_wm(crtc, &pipe_wm->planes[plane_id], 5122 ddb); 5123 } 5124 } 5125 5126 static void skl_initial_wm(struct intel_atomic_state *state, 5127 struct intel_crtc_state *cstate) 5128 { 5129 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 5130 struct drm_device *dev = intel_crtc->base.dev; 5131 struct drm_i915_private *dev_priv = to_i915(dev); 5132 struct skl_wm_values *results = &state->wm_results; 5133 struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw; 5134 enum i915_pipe pipe = intel_crtc->pipe; 5135 5136 if ((results->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) == 0) 5137 return; 5138 5139 mutex_lock(&dev_priv->wm.wm_mutex); 5140 5141 if (cstate->base.active_changed) 5142 skl_atomic_update_crtc_wm(state, cstate); 5143 5144 skl_copy_wm_for_pipe(hw_vals, results, pipe); 5145 5146 mutex_unlock(&dev_priv->wm.wm_mutex); 5147 } 5148 5149 static void ilk_compute_wm_config(struct drm_device *dev, 5150 struct intel_wm_config *config) 5151 { 5152 struct intel_crtc *crtc; 5153 5154 /* Compute the currently _active_ config */ 5155 for_each_intel_crtc(dev, crtc) { 5156 const struct intel_pipe_wm *wm = &crtc->wm.active.ilk; 5157 5158 if (!wm->pipe_enabled) 5159 continue; 5160 5161 config->sprites_enabled |= wm->sprites_enabled; 5162 config->sprites_scaled |= wm->sprites_scaled; 5163 config->num_pipes_active++; 5164 } 5165 } 5166 5167 static void ilk_program_watermarks(struct drm_i915_private *dev_priv) 5168 { 5169 struct drm_device *dev = &dev_priv->drm; 5170 struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm; 5171 struct ilk_wm_maximums max; 5172 struct intel_wm_config config = {}; 5173 struct ilk_wm_values results = {}; 5174 enum intel_ddb_partitioning partitioning; 5175 5176 ilk_compute_wm_config(dev, &config); 5177 5178 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max); 5179 ilk_wm_merge(dev, &config, &max, &lp_wm_1_2); 5180 5181 /* 5/6 split only in single pipe config on IVB+ */ 5182 if (INTEL_GEN(dev_priv) >= 7 && 5183 config.num_pipes_active == 1 && config.sprites_enabled) { 5184 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max); 5185 ilk_wm_merge(dev, &config, &max, &lp_wm_5_6); 5186 5187 best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6); 5188 } else { 5189 best_lp_wm = &lp_wm_1_2; 5190 } 5191 5192 partitioning = (best_lp_wm == &lp_wm_1_2) ? 5193 INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6; 5194 5195 ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results); 5196 5197 ilk_write_wm_values(dev_priv, &results); 5198 } 5199 5200 static void ilk_initial_watermarks(struct intel_atomic_state *state, 5201 struct intel_crtc_state *cstate) 5202 { 5203 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); 5204 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 5205 5206 mutex_lock(&dev_priv->wm.wm_mutex); 5207 intel_crtc->wm.active.ilk = cstate->wm.ilk.intermediate; 5208 ilk_program_watermarks(dev_priv); 5209 mutex_unlock(&dev_priv->wm.wm_mutex); 5210 } 5211 5212 static void ilk_optimize_watermarks(struct intel_atomic_state *state, 5213 struct intel_crtc_state *cstate) 5214 { 5215 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); 5216 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 5217 5218 mutex_lock(&dev_priv->wm.wm_mutex); 5219 if (cstate->wm.need_postvbl_update) { 5220 intel_crtc->wm.active.ilk = cstate->wm.ilk.optimal; 5221 ilk_program_watermarks(dev_priv); 5222 } 5223 mutex_unlock(&dev_priv->wm.wm_mutex); 5224 } 5225 5226 static inline void skl_wm_level_from_reg_val(uint32_t val, 5227 struct skl_wm_level *level) 5228 { 5229 level->plane_en = val & PLANE_WM_EN; 5230 level->plane_res_b = val & PLANE_WM_BLOCKS_MASK; 5231 level->plane_res_l = (val >> PLANE_WM_LINES_SHIFT) & 5232 PLANE_WM_LINES_MASK; 5233 } 5234 5235 void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc, 5236 struct skl_pipe_wm *out) 5237 { 5238 struct drm_i915_private *dev_priv = to_i915(crtc->dev); 5239 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 5240 enum i915_pipe pipe = intel_crtc->pipe; 5241 int level, max_level; 5242 enum plane_id plane_id; 5243 uint32_t val; 5244 5245 max_level = ilk_wm_max_level(dev_priv); 5246 5247 for_each_plane_id_on_crtc(intel_crtc, plane_id) { 5248 struct skl_plane_wm *wm = &out->planes[plane_id]; 5249 5250 for (level = 0; level <= max_level; level++) { 5251 if (plane_id != PLANE_CURSOR) 5252 val = I915_READ(PLANE_WM(pipe, plane_id, level)); 5253 else 5254 val = I915_READ(CUR_WM(pipe, level)); 5255 5256 skl_wm_level_from_reg_val(val, &wm->wm[level]); 5257 } 5258 5259 if (plane_id != PLANE_CURSOR) 5260 val = I915_READ(PLANE_WM_TRANS(pipe, plane_id)); 5261 else 5262 val = I915_READ(CUR_WM_TRANS(pipe)); 5263 5264 skl_wm_level_from_reg_val(val, &wm->trans_wm); 5265 } 5266 5267 if (!intel_crtc->active) 5268 return; 5269 5270 out->linetime = I915_READ(PIPE_WM_LINETIME(pipe)); 5271 } 5272 5273 void skl_wm_get_hw_state(struct drm_device *dev) 5274 { 5275 struct drm_i915_private *dev_priv = to_i915(dev); 5276 struct skl_wm_values *hw = &dev_priv->wm.skl_hw; 5277 struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb; 5278 struct drm_crtc *crtc; 5279 struct intel_crtc *intel_crtc; 5280 struct intel_crtc_state *cstate; 5281 5282 skl_ddb_get_hw_state(dev_priv, ddb); 5283 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { 5284 intel_crtc = to_intel_crtc(crtc); 5285 cstate = to_intel_crtc_state(crtc->state); 5286 5287 skl_pipe_wm_get_hw_state(crtc, &cstate->wm.skl.optimal); 5288 5289 if (intel_crtc->active) 5290 hw->dirty_pipes |= drm_crtc_mask(crtc); 5291 } 5292 5293 if (dev_priv->active_crtcs) { 5294 /* Fully recompute DDB on first atomic commit */ 5295 dev_priv->wm.distrust_bios_wm = true; 5296 } else { 5297 /* Easy/common case; just sanitize DDB now if everything off */ 5298 memset(ddb, 0, sizeof(*ddb)); 5299 } 5300 } 5301 5302 static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc) 5303 { 5304 struct drm_device *dev = crtc->dev; 5305 struct drm_i915_private *dev_priv = to_i915(dev); 5306 struct ilk_wm_values *hw = &dev_priv->wm.hw; 5307 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 5308 struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state); 5309 struct intel_pipe_wm *active = &cstate->wm.ilk.optimal; 5310 enum i915_pipe pipe = intel_crtc->pipe; 5311 static const i915_reg_t wm0_pipe_reg[] = { 5312 [PIPE_A] = WM0_PIPEA_ILK, 5313 [PIPE_B] = WM0_PIPEB_ILK, 5314 [PIPE_C] = WM0_PIPEC_IVB, 5315 }; 5316 5317 hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]); 5318 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 5319 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe)); 5320 5321 memset(active, 0, sizeof(*active)); 5322 5323 active->pipe_enabled = intel_crtc->active; 5324 5325 if (active->pipe_enabled) { 5326 u32 tmp = hw->wm_pipe[pipe]; 5327 5328 /* 5329 * For active pipes LP0 watermark is marked as 5330 * enabled, and LP1+ watermaks as disabled since 5331 * we can't really reverse compute them in case 5332 * multiple pipes are active. 5333 */ 5334 active->wm[0].enable = true; 5335 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT; 5336 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT; 5337 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK; 5338 active->linetime = hw->wm_linetime[pipe]; 5339 } else { 5340 int level, max_level = ilk_wm_max_level(dev_priv); 5341 5342 /* 5343 * For inactive pipes, all watermark levels 5344 * should be marked as enabled but zeroed, 5345 * which is what we'd compute them to. 5346 */ 5347 for (level = 0; level <= max_level; level++) 5348 active->wm[level].enable = true; 5349 } 5350 5351 intel_crtc->wm.active.ilk = *active; 5352 } 5353 5354 #define _FW_WM(value, plane) \ 5355 (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT) 5356 #define _FW_WM_VLV(value, plane) \ 5357 (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT) 5358 5359 static void g4x_read_wm_values(struct drm_i915_private *dev_priv, 5360 struct g4x_wm_values *wm) 5361 { 5362 uint32_t tmp; 5363 5364 tmp = I915_READ(DSPFW1); 5365 wm->sr.plane = _FW_WM(tmp, SR); 5366 wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB); 5367 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEB); 5368 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEA); 5369 5370 tmp = I915_READ(DSPFW2); 5371 wm->fbc_en = tmp & DSPFW_FBC_SR_EN; 5372 wm->sr.fbc = _FW_WM(tmp, FBC_SR); 5373 wm->hpll.fbc = _FW_WM(tmp, FBC_HPLL_SR); 5374 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEB); 5375 wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA); 5376 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEA); 5377 5378 tmp = I915_READ(DSPFW3); 5379 wm->hpll_en = tmp & DSPFW_HPLL_SR_EN; 5380 wm->sr.cursor = _FW_WM(tmp, CURSOR_SR); 5381 wm->hpll.cursor = _FW_WM(tmp, HPLL_CURSOR); 5382 wm->hpll.plane = _FW_WM(tmp, HPLL_SR); 5383 } 5384 5385 static void vlv_read_wm_values(struct drm_i915_private *dev_priv, 5386 struct vlv_wm_values *wm) 5387 { 5388 enum i915_pipe pipe; 5389 uint32_t tmp; 5390 5391 for_each_pipe(dev_priv, pipe) { 5392 tmp = I915_READ(VLV_DDL(pipe)); 5393 5394 wm->ddl[pipe].plane[PLANE_PRIMARY] = 5395 (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK); 5396 wm->ddl[pipe].plane[PLANE_CURSOR] = 5397 (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK); 5398 wm->ddl[pipe].plane[PLANE_SPRITE0] = 5399 (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK); 5400 wm->ddl[pipe].plane[PLANE_SPRITE1] = 5401 (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK); 5402 } 5403 5404 tmp = I915_READ(DSPFW1); 5405 wm->sr.plane = _FW_WM(tmp, SR); 5406 wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB); 5407 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEB); 5408 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEA); 5409 5410 tmp = I915_READ(DSPFW2); 5411 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEB); 5412 wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA); 5413 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEA); 5414 5415 tmp = I915_READ(DSPFW3); 5416 wm->sr.cursor = _FW_WM(tmp, CURSOR_SR); 5417 5418 if (IS_CHERRYVIEW(dev_priv)) { 5419 tmp = I915_READ(DSPFW7_CHV); 5420 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED); 5421 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC); 5422 5423 tmp = I915_READ(DSPFW8_CHV); 5424 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEF); 5425 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEE); 5426 5427 tmp = I915_READ(DSPFW9_CHV); 5428 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEC); 5429 wm->pipe[PIPE_C].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORC); 5430 5431 tmp = I915_READ(DSPHOWM); 5432 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9; 5433 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEF_HI) << 8; 5434 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEE_HI) << 8; 5435 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEC_HI) << 8; 5436 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8; 5437 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8; 5438 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8; 5439 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8; 5440 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8; 5441 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8; 5442 } else { 5443 tmp = I915_READ(DSPFW7); 5444 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED); 5445 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC); 5446 5447 tmp = I915_READ(DSPHOWM); 5448 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9; 5449 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8; 5450 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8; 5451 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8; 5452 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8; 5453 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8; 5454 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8; 5455 } 5456 } 5457 5458 #undef _FW_WM 5459 #undef _FW_WM_VLV 5460 5461 void g4x_wm_get_hw_state(struct drm_device *dev) 5462 { 5463 struct drm_i915_private *dev_priv = to_i915(dev); 5464 struct g4x_wm_values *wm = &dev_priv->wm.g4x; 5465 struct intel_crtc *crtc; 5466 5467 g4x_read_wm_values(dev_priv, wm); 5468 5469 wm->cxsr = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN; 5470 5471 for_each_intel_crtc(dev, crtc) { 5472 struct intel_crtc_state *crtc_state = 5473 to_intel_crtc_state(crtc->base.state); 5474 struct g4x_wm_state *active = &crtc->wm.active.g4x; 5475 struct g4x_pipe_wm *raw; 5476 enum i915_pipe pipe = crtc->pipe; 5477 enum plane_id plane_id; 5478 int level, max_level; 5479 5480 active->cxsr = wm->cxsr; 5481 active->hpll_en = wm->hpll_en; 5482 active->fbc_en = wm->fbc_en; 5483 5484 active->sr = wm->sr; 5485 active->hpll = wm->hpll; 5486 5487 for_each_plane_id_on_crtc(crtc, plane_id) { 5488 active->wm.plane[plane_id] = 5489 wm->pipe[pipe].plane[plane_id]; 5490 } 5491 5492 if (wm->cxsr && wm->hpll_en) 5493 max_level = G4X_WM_LEVEL_HPLL; 5494 else if (wm->cxsr) 5495 max_level = G4X_WM_LEVEL_SR; 5496 else 5497 max_level = G4X_WM_LEVEL_NORMAL; 5498 5499 level = G4X_WM_LEVEL_NORMAL; 5500 raw = &crtc_state->wm.g4x.raw[level]; 5501 for_each_plane_id_on_crtc(crtc, plane_id) 5502 raw->plane[plane_id] = active->wm.plane[plane_id]; 5503 5504 if (++level > max_level) 5505 goto out; 5506 5507 raw = &crtc_state->wm.g4x.raw[level]; 5508 raw->plane[PLANE_PRIMARY] = active->sr.plane; 5509 raw->plane[PLANE_CURSOR] = active->sr.cursor; 5510 raw->plane[PLANE_SPRITE0] = 0; 5511 raw->fbc = active->sr.fbc; 5512 5513 if (++level > max_level) 5514 goto out; 5515 5516 raw = &crtc_state->wm.g4x.raw[level]; 5517 raw->plane[PLANE_PRIMARY] = active->hpll.plane; 5518 raw->plane[PLANE_CURSOR] = active->hpll.cursor; 5519 raw->plane[PLANE_SPRITE0] = 0; 5520 raw->fbc = active->hpll.fbc; 5521 5522 out: 5523 for_each_plane_id_on_crtc(crtc, plane_id) 5524 g4x_raw_plane_wm_set(crtc_state, level, 5525 plane_id, USHRT_MAX); 5526 g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX); 5527 5528 crtc_state->wm.g4x.optimal = *active; 5529 crtc_state->wm.g4x.intermediate = *active; 5530 5531 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n", 5532 pipe_name(pipe), 5533 wm->pipe[pipe].plane[PLANE_PRIMARY], 5534 wm->pipe[pipe].plane[PLANE_CURSOR], 5535 wm->pipe[pipe].plane[PLANE_SPRITE0]); 5536 } 5537 5538 DRM_DEBUG_KMS("Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n", 5539 wm->sr.plane, wm->sr.cursor, wm->sr.fbc); 5540 DRM_DEBUG_KMS("Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n", 5541 wm->hpll.plane, wm->hpll.cursor, wm->hpll.fbc); 5542 DRM_DEBUG_KMS("Initial SR=%s HPLL=%s FBC=%s\n", 5543 yesno(wm->cxsr), yesno(wm->hpll_en), yesno(wm->fbc_en)); 5544 } 5545 5546 void g4x_wm_sanitize(struct drm_i915_private *dev_priv) 5547 { 5548 struct intel_plane *plane; 5549 struct intel_crtc *crtc; 5550 5551 mutex_lock(&dev_priv->wm.wm_mutex); 5552 5553 for_each_intel_plane(&dev_priv->drm, plane) { 5554 struct intel_crtc *crtc = 5555 intel_get_crtc_for_pipe(dev_priv, plane->pipe); 5556 struct intel_crtc_state *crtc_state = 5557 to_intel_crtc_state(crtc->base.state); 5558 struct intel_plane_state *plane_state = 5559 to_intel_plane_state(plane->base.state); 5560 struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal; 5561 enum plane_id plane_id = plane->id; 5562 int level; 5563 5564 if (plane_state->base.visible) 5565 continue; 5566 5567 for (level = 0; level < 3; level++) { 5568 struct g4x_pipe_wm *raw = 5569 &crtc_state->wm.g4x.raw[level]; 5570 5571 raw->plane[plane_id] = 0; 5572 wm_state->wm.plane[plane_id] = 0; 5573 } 5574 5575 if (plane_id == PLANE_PRIMARY) { 5576 for (level = 0; level < 3; level++) { 5577 struct g4x_pipe_wm *raw = 5578 &crtc_state->wm.g4x.raw[level]; 5579 raw->fbc = 0; 5580 } 5581 5582 wm_state->sr.fbc = 0; 5583 wm_state->hpll.fbc = 0; 5584 wm_state->fbc_en = false; 5585 } 5586 } 5587 5588 for_each_intel_crtc(&dev_priv->drm, crtc) { 5589 struct intel_crtc_state *crtc_state = 5590 to_intel_crtc_state(crtc->base.state); 5591 5592 crtc_state->wm.g4x.intermediate = 5593 crtc_state->wm.g4x.optimal; 5594 crtc->wm.active.g4x = crtc_state->wm.g4x.optimal; 5595 } 5596 5597 g4x_program_watermarks(dev_priv); 5598 5599 mutex_unlock(&dev_priv->wm.wm_mutex); 5600 } 5601 5602 void vlv_wm_get_hw_state(struct drm_device *dev) 5603 { 5604 struct drm_i915_private *dev_priv = to_i915(dev); 5605 struct vlv_wm_values *wm = &dev_priv->wm.vlv; 5606 struct intel_crtc *crtc; 5607 u32 val; 5608 5609 vlv_read_wm_values(dev_priv, wm); 5610 5611 wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN; 5612 wm->level = VLV_WM_LEVEL_PM2; 5613 5614 if (IS_CHERRYVIEW(dev_priv)) { 5615 mutex_lock(&dev_priv->pcu_lock); 5616 5617 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); 5618 if (val & DSP_MAXFIFO_PM5_ENABLE) 5619 wm->level = VLV_WM_LEVEL_PM5; 5620 5621 /* 5622 * If DDR DVFS is disabled in the BIOS, Punit 5623 * will never ack the request. So if that happens 5624 * assume we don't have to enable/disable DDR DVFS 5625 * dynamically. To test that just set the REQ_ACK 5626 * bit to poke the Punit, but don't change the 5627 * HIGH/LOW bits so that we don't actually change 5628 * the current state. 5629 */ 5630 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); 5631 val |= FORCE_DDR_FREQ_REQ_ACK; 5632 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val); 5633 5634 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) & 5635 FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) { 5636 DRM_DEBUG_KMS("Punit not acking DDR DVFS request, " 5637 "assuming DDR DVFS is disabled\n"); 5638 dev_priv->wm.max_level = VLV_WM_LEVEL_PM5; 5639 } else { 5640 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); 5641 if ((val & FORCE_DDR_HIGH_FREQ) == 0) 5642 wm->level = VLV_WM_LEVEL_DDR_DVFS; 5643 } 5644 5645 mutex_unlock(&dev_priv->pcu_lock); 5646 } 5647 5648 for_each_intel_crtc(dev, crtc) { 5649 struct intel_crtc_state *crtc_state = 5650 to_intel_crtc_state(crtc->base.state); 5651 struct vlv_wm_state *active = &crtc->wm.active.vlv; 5652 const struct vlv_fifo_state *fifo_state = 5653 &crtc_state->wm.vlv.fifo_state; 5654 enum i915_pipe pipe = crtc->pipe; 5655 enum plane_id plane_id; 5656 int level; 5657 5658 vlv_get_fifo_size(crtc_state); 5659 5660 active->num_levels = wm->level + 1; 5661 active->cxsr = wm->cxsr; 5662 5663 for (level = 0; level < active->num_levels; level++) { 5664 struct g4x_pipe_wm *raw = 5665 &crtc_state->wm.vlv.raw[level]; 5666 5667 active->sr[level].plane = wm->sr.plane; 5668 active->sr[level].cursor = wm->sr.cursor; 5669 5670 for_each_plane_id_on_crtc(crtc, plane_id) { 5671 active->wm[level].plane[plane_id] = 5672 wm->pipe[pipe].plane[plane_id]; 5673 5674 raw->plane[plane_id] = 5675 vlv_invert_wm_value(active->wm[level].plane[plane_id], 5676 fifo_state->plane[plane_id]); 5677 } 5678 } 5679 5680 for_each_plane_id_on_crtc(crtc, plane_id) 5681 vlv_raw_plane_wm_set(crtc_state, level, 5682 plane_id, USHRT_MAX); 5683 vlv_invalidate_wms(crtc, active, level); 5684 5685 crtc_state->wm.vlv.optimal = *active; 5686 crtc_state->wm.vlv.intermediate = *active; 5687 5688 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n", 5689 pipe_name(pipe), 5690 wm->pipe[pipe].plane[PLANE_PRIMARY], 5691 wm->pipe[pipe].plane[PLANE_CURSOR], 5692 wm->pipe[pipe].plane[PLANE_SPRITE0], 5693 wm->pipe[pipe].plane[PLANE_SPRITE1]); 5694 } 5695 5696 DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n", 5697 wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr); 5698 } 5699 5700 void vlv_wm_sanitize(struct drm_i915_private *dev_priv) 5701 { 5702 struct intel_plane *plane; 5703 struct intel_crtc *crtc; 5704 5705 mutex_lock(&dev_priv->wm.wm_mutex); 5706 5707 for_each_intel_plane(&dev_priv->drm, plane) { 5708 struct intel_crtc *crtc = 5709 intel_get_crtc_for_pipe(dev_priv, plane->pipe); 5710 struct intel_crtc_state *crtc_state = 5711 to_intel_crtc_state(crtc->base.state); 5712 struct intel_plane_state *plane_state = 5713 to_intel_plane_state(plane->base.state); 5714 struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; 5715 const struct vlv_fifo_state *fifo_state = 5716 &crtc_state->wm.vlv.fifo_state; 5717 enum plane_id plane_id = plane->id; 5718 int level; 5719 5720 if (plane_state->base.visible) 5721 continue; 5722 5723 for (level = 0; level < wm_state->num_levels; level++) { 5724 struct g4x_pipe_wm *raw = 5725 &crtc_state->wm.vlv.raw[level]; 5726 5727 raw->plane[plane_id] = 0; 5728 5729 wm_state->wm[level].plane[plane_id] = 5730 vlv_invert_wm_value(raw->plane[plane_id], 5731 fifo_state->plane[plane_id]); 5732 } 5733 } 5734 5735 for_each_intel_crtc(&dev_priv->drm, crtc) { 5736 struct intel_crtc_state *crtc_state = 5737 to_intel_crtc_state(crtc->base.state); 5738 5739 crtc_state->wm.vlv.intermediate = 5740 crtc_state->wm.vlv.optimal; 5741 crtc->wm.active.vlv = crtc_state->wm.vlv.optimal; 5742 } 5743 5744 vlv_program_watermarks(dev_priv); 5745 5746 mutex_unlock(&dev_priv->wm.wm_mutex); 5747 } 5748 5749 /* 5750 * FIXME should probably kill this and improve 5751 * the real watermark readout/sanitation instead 5752 */ 5753 static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv) 5754 { 5755 I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN); 5756 I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN); 5757 I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN); 5758 5759 /* 5760 * Don't touch WM1S_LP_EN here. 5761 * Doing so could cause underruns. 5762 */ 5763 } 5764 5765 void ilk_wm_get_hw_state(struct drm_device *dev) 5766 { 5767 struct drm_i915_private *dev_priv = to_i915(dev); 5768 struct ilk_wm_values *hw = &dev_priv->wm.hw; 5769 struct drm_crtc *crtc; 5770 5771 ilk_init_lp_watermarks(dev_priv); 5772 5773 for_each_crtc(dev, crtc) 5774 ilk_pipe_wm_get_hw_state(crtc); 5775 5776 hw->wm_lp[0] = I915_READ(WM1_LP_ILK); 5777 hw->wm_lp[1] = I915_READ(WM2_LP_ILK); 5778 hw->wm_lp[2] = I915_READ(WM3_LP_ILK); 5779 5780 hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK); 5781 if (INTEL_GEN(dev_priv) >= 7) { 5782 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB); 5783 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB); 5784 } 5785 5786 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 5787 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ? 5788 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2; 5789 else if (IS_IVYBRIDGE(dev_priv)) 5790 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ? 5791 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2; 5792 5793 hw->enable_fbc_wm = 5794 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS); 5795 } 5796 5797 /** 5798 * intel_update_watermarks - update FIFO watermark values based on current modes 5799 * 5800 * Calculate watermark values for the various WM regs based on current mode 5801 * and plane configuration. 5802 * 5803 * There are several cases to deal with here: 5804 * - normal (i.e. non-self-refresh) 5805 * - self-refresh (SR) mode 5806 * - lines are large relative to FIFO size (buffer can hold up to 2) 5807 * - lines are small relative to FIFO size (buffer can hold more than 2 5808 * lines), so need to account for TLB latency 5809 * 5810 * The normal calculation is: 5811 * watermark = dotclock * bytes per pixel * latency 5812 * where latency is platform & configuration dependent (we assume pessimal 5813 * values here). 5814 * 5815 * The SR calculation is: 5816 * watermark = (trunc(latency/line time)+1) * surface width * 5817 * bytes per pixel 5818 * where 5819 * line time = htotal / dotclock 5820 * surface width = hdisplay for normal plane and 64 for cursor 5821 * and latency is assumed to be high, as above. 5822 * 5823 * The final value programmed to the register should always be rounded up, 5824 * and include an extra 2 entries to account for clock crossings. 5825 * 5826 * We don't use the sprite, so we can ignore that. And on Crestline we have 5827 * to set the non-SR watermarks to 8. 5828 */ 5829 void intel_update_watermarks(struct intel_crtc *crtc) 5830 { 5831 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); 5832 5833 if (dev_priv->display.update_wm) 5834 dev_priv->display.update_wm(crtc); 5835 } 5836 5837 void intel_enable_ipc(struct drm_i915_private *dev_priv) 5838 { 5839 u32 val; 5840 5841 /* Display WA #0477 WaDisableIPC: skl */ 5842 if (IS_SKYLAKE(dev_priv)) { 5843 dev_priv->ipc_enabled = false; 5844 return; 5845 } 5846 5847 val = I915_READ(DISP_ARB_CTL2); 5848 5849 if (dev_priv->ipc_enabled) 5850 val |= DISP_IPC_ENABLE; 5851 else 5852 val &= ~DISP_IPC_ENABLE; 5853 5854 I915_WRITE(DISP_ARB_CTL2, val); 5855 } 5856 5857 void intel_init_ipc(struct drm_i915_private *dev_priv) 5858 { 5859 dev_priv->ipc_enabled = false; 5860 if (!HAS_IPC(dev_priv)) 5861 return; 5862 5863 dev_priv->ipc_enabled = true; 5864 intel_enable_ipc(dev_priv); 5865 } 5866 5867 /* 5868 * Lock protecting IPS related data structures 5869 */ 5870 DEFINE_SPINLOCK(mchdev_lock); 5871 5872 /* Global for IPS driver to get at the current i915 device. Protected by 5873 * mchdev_lock. */ 5874 static struct drm_i915_private *i915_mch_dev; 5875 5876 bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val) 5877 { 5878 u16 rgvswctl; 5879 5880 lockdep_assert_held(&mchdev_lock); 5881 5882 rgvswctl = I915_READ16(MEMSWCTL); 5883 if (rgvswctl & MEMCTL_CMD_STS) { 5884 DRM_DEBUG("gpu busy, RCS change rejected\n"); 5885 return false; /* still busy with another command */ 5886 } 5887 5888 rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | 5889 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM; 5890 I915_WRITE16(MEMSWCTL, rgvswctl); 5891 POSTING_READ16(MEMSWCTL); 5892 5893 rgvswctl |= MEMCTL_CMD_STS; 5894 I915_WRITE16(MEMSWCTL, rgvswctl); 5895 5896 return true; 5897 } 5898 5899 static void ironlake_enable_drps(struct drm_i915_private *dev_priv) 5900 { 5901 u32 rgvmodectl; 5902 u8 fmax, fmin, fstart, vstart; 5903 5904 spin_lock_irq(&mchdev_lock); 5905 5906 rgvmodectl = I915_READ(MEMMODECTL); 5907 5908 /* Enable temp reporting */ 5909 I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN); 5910 I915_WRITE16(TSC1, I915_READ(TSC1) | TSE); 5911 5912 /* 100ms RC evaluation intervals */ 5913 I915_WRITE(RCUPEI, 100000); 5914 I915_WRITE(RCDNEI, 100000); 5915 5916 /* Set max/min thresholds to 90ms and 80ms respectively */ 5917 I915_WRITE(RCBMAXAVG, 90000); 5918 I915_WRITE(RCBMINAVG, 80000); 5919 5920 I915_WRITE(MEMIHYST, 1); 5921 5922 /* Set up min, max, and cur for interrupt handling */ 5923 fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; 5924 fmin = (rgvmodectl & MEMMODE_FMIN_MASK); 5925 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> 5926 MEMMODE_FSTART_SHIFT; 5927 5928 vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >> 5929 PXVFREQ_PX_SHIFT; 5930 5931 dev_priv->ips.fmax = fmax; /* IPS callback will increase this */ 5932 dev_priv->ips.fstart = fstart; 5933 5934 dev_priv->ips.max_delay = fstart; 5935 dev_priv->ips.min_delay = fmin; 5936 dev_priv->ips.cur_delay = fstart; 5937 5938 DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", 5939 fmax, fmin, fstart); 5940 5941 I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); 5942 5943 /* 5944 * Interrupts will be enabled in ironlake_irq_postinstall 5945 */ 5946 5947 I915_WRITE(VIDSTART, vstart); 5948 POSTING_READ(VIDSTART); 5949 5950 rgvmodectl |= MEMMODE_SWMODE_EN; 5951 I915_WRITE(MEMMODECTL, rgvmodectl); 5952 5953 if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10)) 5954 DRM_ERROR("stuck trying to change perf mode\n"); 5955 mdelay(1); 5956 5957 ironlake_set_drps(dev_priv, fstart); 5958 5959 dev_priv->ips.last_count1 = I915_READ(DMIEC) + 5960 I915_READ(DDREC) + I915_READ(CSIEC); 5961 dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies); 5962 dev_priv->ips.last_count2 = I915_READ(GFXEC); 5963 dev_priv->ips.last_time2 = ktime_get_raw_ns(); 5964 5965 spin_unlock_irq(&mchdev_lock); 5966 } 5967 5968 static void ironlake_disable_drps(struct drm_i915_private *dev_priv) 5969 { 5970 u16 rgvswctl; 5971 5972 spin_lock_irq(&mchdev_lock); 5973 5974 rgvswctl = I915_READ16(MEMSWCTL); 5975 5976 /* Ack interrupts, disable EFC interrupt */ 5977 I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN); 5978 I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG); 5979 I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT); 5980 I915_WRITE(DEIIR, DE_PCU_EVENT); 5981 I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT); 5982 5983 /* Go back to the starting frequency */ 5984 ironlake_set_drps(dev_priv, dev_priv->ips.fstart); 5985 mdelay(1); 5986 rgvswctl |= MEMCTL_CMD_STS; 5987 I915_WRITE(MEMSWCTL, rgvswctl); 5988 mdelay(1); 5989 5990 spin_unlock_irq(&mchdev_lock); 5991 } 5992 5993 /* There's a funny hw issue where the hw returns all 0 when reading from 5994 * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value 5995 * ourselves, instead of doing a rmw cycle (which might result in us clearing 5996 * all limits and the gpu stuck at whatever frequency it is at atm). 5997 */ 5998 static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) 5999 { 6000 struct intel_rps *rps = &dev_priv->gt_pm.rps; 6001 u32 limits; 6002 6003 /* Only set the down limit when we've reached the lowest level to avoid 6004 * getting more interrupts, otherwise leave this clear. This prevents a 6005 * race in the hw when coming out of rc6: There's a tiny window where 6006 * the hw runs at the minimal clock before selecting the desired 6007 * frequency, if the down threshold expires in that window we will not 6008 * receive a down interrupt. */ 6009 if (INTEL_GEN(dev_priv) >= 9) { 6010 limits = (rps->max_freq_softlimit) << 23; 6011 if (val <= rps->min_freq_softlimit) 6012 limits |= (rps->min_freq_softlimit) << 14; 6013 } else { 6014 limits = rps->max_freq_softlimit << 24; 6015 if (val <= rps->min_freq_softlimit) 6016 limits |= rps->min_freq_softlimit << 16; 6017 } 6018 6019 return limits; 6020 } 6021 6022 static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) 6023 { 6024 struct intel_rps *rps = &dev_priv->gt_pm.rps; 6025 int new_power; 6026 u32 threshold_up = 0, threshold_down = 0; /* in % */ 6027 u32 ei_up = 0, ei_down = 0; 6028 6029 new_power = rps->power; 6030 switch (rps->power) { 6031 case LOW_POWER: 6032 if (val > rps->efficient_freq + 1 && 6033 val > rps->cur_freq) 6034 new_power = BETWEEN; 6035 break; 6036 6037 case BETWEEN: 6038 if (val <= rps->efficient_freq && 6039 val < rps->cur_freq) 6040 new_power = LOW_POWER; 6041 else if (val >= rps->rp0_freq && 6042 val > rps->cur_freq) 6043 new_power = HIGH_POWER; 6044 break; 6045 6046 case HIGH_POWER: 6047 if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 && 6048 val < rps->cur_freq) 6049 new_power = BETWEEN; 6050 break; 6051 } 6052 /* Max/min bins are special */ 6053 if (val <= rps->min_freq_softlimit) 6054 new_power = LOW_POWER; 6055 if (val >= rps->max_freq_softlimit) 6056 new_power = HIGH_POWER; 6057 if (new_power == rps->power) 6058 return; 6059 6060 /* Note the units here are not exactly 1us, but 1280ns. */ 6061 switch (new_power) { 6062 case LOW_POWER: 6063 /* Upclock if more than 95% busy over 16ms */ 6064 ei_up = 16000; 6065 threshold_up = 95; 6066 6067 /* Downclock if less than 85% busy over 32ms */ 6068 ei_down = 32000; 6069 threshold_down = 85; 6070 break; 6071 6072 case BETWEEN: 6073 /* Upclock if more than 90% busy over 13ms */ 6074 ei_up = 13000; 6075 threshold_up = 90; 6076 6077 /* Downclock if less than 75% busy over 32ms */ 6078 ei_down = 32000; 6079 threshold_down = 75; 6080 break; 6081 6082 case HIGH_POWER: 6083 /* Upclock if more than 85% busy over 10ms */ 6084 ei_up = 10000; 6085 threshold_up = 85; 6086 6087 /* Downclock if less than 60% busy over 32ms */ 6088 ei_down = 32000; 6089 threshold_down = 60; 6090 break; 6091 } 6092 6093 /* When byt can survive without system hang with dynamic 6094 * sw freq adjustments, this restriction can be lifted. 6095 */ 6096 if (IS_VALLEYVIEW(dev_priv)) 6097 goto skip_hw_write; 6098 6099 I915_WRITE(GEN6_RP_UP_EI, 6100 GT_INTERVAL_FROM_US(dev_priv, ei_up)); 6101 I915_WRITE(GEN6_RP_UP_THRESHOLD, 6102 GT_INTERVAL_FROM_US(dev_priv, 6103 ei_up * threshold_up / 100)); 6104 6105 I915_WRITE(GEN6_RP_DOWN_EI, 6106 GT_INTERVAL_FROM_US(dev_priv, ei_down)); 6107 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 6108 GT_INTERVAL_FROM_US(dev_priv, 6109 ei_down * threshold_down / 100)); 6110 6111 I915_WRITE(GEN6_RP_CONTROL, 6112 GEN6_RP_MEDIA_TURBO | 6113 GEN6_RP_MEDIA_HW_NORMAL_MODE | 6114 GEN6_RP_MEDIA_IS_GFX | 6115 GEN6_RP_ENABLE | 6116 GEN6_RP_UP_BUSY_AVG | 6117 GEN6_RP_DOWN_IDLE_AVG); 6118 6119 skip_hw_write: 6120 rps->power = new_power; 6121 rps->up_threshold = threshold_up; 6122 rps->down_threshold = threshold_down; 6123 rps->last_adj = 0; 6124 } 6125 6126 static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) 6127 { 6128 struct intel_rps *rps = &dev_priv->gt_pm.rps; 6129 u32 mask = 0; 6130 6131 /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */ 6132 if (val > rps->min_freq_softlimit) 6133 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; 6134 if (val < rps->max_freq_softlimit) 6135 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; 6136 6137 mask &= dev_priv->pm_rps_events; 6138 6139 return gen6_sanitize_rps_pm_mask(dev_priv, ~mask); 6140 } 6141 6142 /* gen6_set_rps is called to update the frequency request, but should also be 6143 * called when the range (min_delay and max_delay) is modified so that we can 6144 * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ 6145 static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) 6146 { 6147 struct intel_rps *rps = &dev_priv->gt_pm.rps; 6148 6149 /* min/max delay may still have been modified so be sure to 6150 * write the limits value. 6151 */ 6152 if (val != rps->cur_freq) { 6153 gen6_set_rps_thresholds(dev_priv, val); 6154 6155 if (INTEL_GEN(dev_priv) >= 9) 6156 I915_WRITE(GEN6_RPNSWREQ, 6157 GEN9_FREQUENCY(val)); 6158 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 6159 I915_WRITE(GEN6_RPNSWREQ, 6160 HSW_FREQUENCY(val)); 6161 else 6162 I915_WRITE(GEN6_RPNSWREQ, 6163 GEN6_FREQUENCY(val) | 6164 GEN6_OFFSET(0) | 6165 GEN6_AGGRESSIVE_TURBO); 6166 } 6167 6168 /* Make sure we continue to get interrupts 6169 * until we hit the minimum or maximum frequencies. 6170 */ 6171 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val)); 6172 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); 6173 6174 rps->cur_freq = val; 6175 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); 6176 6177 return 0; 6178 } 6179 6180 static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) 6181 { 6182 int err; 6183 6184 if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1), 6185 "Odd GPU freq value\n")) 6186 val &= ~1; 6187 6188 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); 6189 6190 if (val != dev_priv->gt_pm.rps.cur_freq) { 6191 err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); 6192 if (err) 6193 return err; 6194 6195 gen6_set_rps_thresholds(dev_priv, val); 6196 } 6197 6198 dev_priv->gt_pm.rps.cur_freq = val; 6199 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); 6200 6201 return 0; 6202 } 6203 6204 /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down 6205 * 6206 * * If Gfx is Idle, then 6207 * 1. Forcewake Media well. 6208 * 2. Request idle freq. 6209 * 3. Release Forcewake of Media well. 6210 */ 6211 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) 6212 { 6213 struct intel_rps *rps = &dev_priv->gt_pm.rps; 6214 u32 val = rps->idle_freq; 6215 int err; 6216 6217 if (rps->cur_freq <= val) 6218 return; 6219 6220 /* The punit delays the write of the frequency and voltage until it 6221 * determines the GPU is awake. During normal usage we don't want to 6222 * waste power changing the frequency if the GPU is sleeping (rc6). 6223 * However, the GPU and driver is now idle and we do not want to delay 6224 * switching to minimum voltage (reducing power whilst idle) as we do 6225 * not expect to be woken in the near future and so must flush the 6226 * change by waking the device. 6227 * 6228 * We choose to take the media powerwell (either would do to trick the 6229 * punit into committing the voltage change) as that takes a lot less 6230 * power than the render powerwell. 6231 */ 6232 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA); 6233 err = valleyview_set_rps(dev_priv, val); 6234 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA); 6235 6236 if (err) 6237 DRM_ERROR("Failed to set RPS for idle\n"); 6238 } 6239 6240 void gen6_rps_busy(struct drm_i915_private *dev_priv) 6241 { 6242 struct intel_rps *rps = &dev_priv->gt_pm.rps; 6243 6244 mutex_lock(&dev_priv->pcu_lock); 6245 if (rps->enabled) { 6246 u8 freq; 6247 6248 if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED) 6249 gen6_rps_reset_ei(dev_priv); 6250 I915_WRITE(GEN6_PMINTRMSK, 6251 gen6_rps_pm_mask(dev_priv, rps->cur_freq)); 6252 6253 gen6_enable_rps_interrupts(dev_priv); 6254 6255 /* Use the user's desired frequency as a guide, but for better 6256 * performance, jump directly to RPe as our starting frequency. 6257 */ 6258 freq = max(rps->cur_freq, 6259 rps->efficient_freq); 6260 6261 if (intel_set_rps(dev_priv, 6262 clamp(freq, 6263 rps->min_freq_softlimit, 6264 rps->max_freq_softlimit))) 6265 DRM_DEBUG_DRIVER("Failed to set idle frequency\n"); 6266 } 6267 mutex_unlock(&dev_priv->pcu_lock); 6268 } 6269 6270 void gen6_rps_idle(struct drm_i915_private *dev_priv) 6271 { 6272 struct intel_rps *rps = &dev_priv->gt_pm.rps; 6273 6274 /* Flush our bottom-half so that it does not race with us 6275 * setting the idle frequency and so that it is bounded by 6276 * our rpm wakeref. And then disable the interrupts to stop any 6277 * futher RPS reclocking whilst we are asleep. 6278 */ 6279 gen6_disable_rps_interrupts(dev_priv); 6280 6281 mutex_lock(&dev_priv->pcu_lock); 6282 if (rps->enabled) { 6283 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) 6284 vlv_set_rps_idle(dev_priv); 6285 else 6286 gen6_set_rps(dev_priv, rps->idle_freq); 6287 rps->last_adj = 0; 6288 I915_WRITE(GEN6_PMINTRMSK, 6289 gen6_sanitize_rps_pm_mask(dev_priv, ~0)); 6290 } 6291 mutex_unlock(&dev_priv->pcu_lock); 6292 } 6293 6294 void gen6_rps_boost(struct drm_i915_gem_request *rq, 6295 struct intel_rps_client *rps_client) 6296 { 6297 struct intel_rps *rps = &rq->i915->gt_pm.rps; 6298 unsigned long flags; 6299 bool boost; 6300 6301 /* This is intentionally racy! We peek at the state here, then 6302 * validate inside the RPS worker. 6303 */ 6304 if (!rps->enabled) 6305 return; 6306 6307 boost = false; 6308 spin_lock_irqsave(&rq->lock, flags); 6309 if (!rq->waitboost && !i915_gem_request_completed(rq)) { 6310 atomic_inc(&rps->num_waiters); 6311 rq->waitboost = true; 6312 boost = true; 6313 } 6314 spin_unlock_irqrestore(&rq->lock, flags); 6315 if (!boost) 6316 return; 6317 6318 if (READ_ONCE(rps->cur_freq) < rps->boost_freq) 6319 schedule_work(&rps->work); 6320 6321 atomic_inc(rps_client ? &rps_client->boosts : &rps->boosts); 6322 } 6323 6324 int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) 6325 { 6326 struct intel_rps *rps = &dev_priv->gt_pm.rps; 6327 int err; 6328 6329 lockdep_assert_held(&dev_priv->pcu_lock); 6330 GEM_BUG_ON(val > rps->max_freq); 6331 GEM_BUG_ON(val < rps->min_freq); 6332 6333 if (!rps->enabled) { 6334 rps->cur_freq = val; 6335 return 0; 6336 } 6337 6338 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) 6339 err = valleyview_set_rps(dev_priv, val); 6340 else 6341 err = gen6_set_rps(dev_priv, val); 6342 6343 return err; 6344 } 6345 6346 static void gen9_disable_rc6(struct drm_i915_private *dev_priv) 6347 { 6348 I915_WRITE(GEN6_RC_CONTROL, 0); 6349 I915_WRITE(GEN9_PG_ENABLE, 0); 6350 } 6351 6352 static void gen9_disable_rps(struct drm_i915_private *dev_priv) 6353 { 6354 I915_WRITE(GEN6_RP_CONTROL, 0); 6355 } 6356 6357 static void gen6_disable_rc6(struct drm_i915_private *dev_priv) 6358 { 6359 I915_WRITE(GEN6_RC_CONTROL, 0); 6360 } 6361 6362 static void gen6_disable_rps(struct drm_i915_private *dev_priv) 6363 { 6364 I915_WRITE(GEN6_RPNSWREQ, 1 << 31); 6365 I915_WRITE(GEN6_RP_CONTROL, 0); 6366 } 6367 6368 static void cherryview_disable_rc6(struct drm_i915_private *dev_priv) 6369 { 6370 I915_WRITE(GEN6_RC_CONTROL, 0); 6371 } 6372 6373 static void cherryview_disable_rps(struct drm_i915_private *dev_priv) 6374 { 6375 I915_WRITE(GEN6_RP_CONTROL, 0); 6376 } 6377 6378 static void valleyview_disable_rc6(struct drm_i915_private *dev_priv) 6379 { 6380 /* We're doing forcewake before Disabling RC6, 6381 * This what the BIOS expects when going into suspend */ 6382 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 6383 6384 I915_WRITE(GEN6_RC_CONTROL, 0); 6385 6386 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 6387 } 6388 6389 static void valleyview_disable_rps(struct drm_i915_private *dev_priv) 6390 { 6391 I915_WRITE(GEN6_RP_CONTROL, 0); 6392 } 6393 6394 static void intel_print_rc6_info(struct drm_i915_private *dev_priv, u32 mode) 6395 { 6396 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { 6397 if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1))) 6398 mode = GEN6_RC_CTL_RC6_ENABLE; 6399 else 6400 mode = 0; 6401 } 6402 if (HAS_RC6p(dev_priv)) 6403 DRM_DEBUG_DRIVER("Enabling RC6 states: " 6404 "RC6 %s RC6p %s RC6pp %s\n", 6405 onoff(mode & GEN6_RC_CTL_RC6_ENABLE), 6406 onoff(mode & GEN6_RC_CTL_RC6p_ENABLE), 6407 onoff(mode & GEN6_RC_CTL_RC6pp_ENABLE)); 6408 6409 else 6410 DRM_DEBUG_DRIVER("Enabling RC6 states: RC6 %s\n", 6411 onoff(mode & GEN6_RC_CTL_RC6_ENABLE)); 6412 } 6413 6414 static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv) 6415 { 6416 struct i915_ggtt *ggtt = &dev_priv->ggtt; 6417 bool enable_rc6 = true; 6418 unsigned long rc6_ctx_base; 6419 u32 rc_ctl; 6420 int rc_sw_target; 6421 6422 rc_ctl = I915_READ(GEN6_RC_CONTROL); 6423 rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >> 6424 RC_SW_TARGET_STATE_SHIFT; 6425 DRM_DEBUG_DRIVER("BIOS enabled RC states: " 6426 "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n", 6427 onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE), 6428 onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE), 6429 rc_sw_target); 6430 6431 if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) { 6432 DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n"); 6433 enable_rc6 = false; 6434 } 6435 6436 /* 6437 * The exact context size is not known for BXT, so assume a page size 6438 * for this check. 6439 */ 6440 rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK; 6441 if (!((rc6_ctx_base >= ggtt->stolen_reserved_base) && 6442 (rc6_ctx_base + PAGE_SIZE <= ggtt->stolen_reserved_base + 6443 ggtt->stolen_reserved_size))) { 6444 DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n"); 6445 enable_rc6 = false; 6446 } 6447 6448 if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) && 6449 ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) && 6450 ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) && 6451 ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) { 6452 DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n"); 6453 enable_rc6 = false; 6454 } 6455 6456 if (!I915_READ(GEN8_PUSHBUS_CONTROL) || 6457 !I915_READ(GEN8_PUSHBUS_ENABLE) || 6458 !I915_READ(GEN8_PUSHBUS_SHIFT)) { 6459 DRM_DEBUG_DRIVER("Pushbus not setup properly.\n"); 6460 enable_rc6 = false; 6461 } 6462 6463 if (!I915_READ(GEN6_GFXPAUSE)) { 6464 DRM_DEBUG_DRIVER("GFX pause not setup properly.\n"); 6465 enable_rc6 = false; 6466 } 6467 6468 if (!I915_READ(GEN8_MISC_CTRL0)) { 6469 DRM_DEBUG_DRIVER("GPM control not setup properly.\n"); 6470 enable_rc6 = false; 6471 } 6472 6473 return enable_rc6; 6474 } 6475 6476 int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6) 6477 { 6478 /* No RC6 before Ironlake and code is gone for ilk. */ 6479 if (INTEL_INFO(dev_priv)->gen < 6) 6480 return 0; 6481 6482 if (!enable_rc6) 6483 return 0; 6484 6485 if (IS_GEN9_LP(dev_priv) && !bxt_check_bios_rc6_setup(dev_priv)) { 6486 DRM_INFO("RC6 disabled by BIOS\n"); 6487 return 0; 6488 } 6489 6490 /* Respect the kernel parameter if it is set */ 6491 if (enable_rc6 >= 0) { 6492 int mask; 6493 6494 if (HAS_RC6p(dev_priv)) 6495 mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE | 6496 INTEL_RC6pp_ENABLE; 6497 else 6498 mask = INTEL_RC6_ENABLE; 6499 6500 if ((enable_rc6 & mask) != enable_rc6) 6501 DRM_DEBUG_DRIVER("Adjusting RC6 mask to %d " 6502 "(requested %d, valid %d)\n", 6503 enable_rc6 & mask, enable_rc6, mask); 6504 6505 return enable_rc6 & mask; 6506 } 6507 6508 if (IS_IVYBRIDGE(dev_priv)) 6509 return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE); 6510 6511 return INTEL_RC6_ENABLE; 6512 } 6513 6514 static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) 6515 { 6516 struct intel_rps *rps = &dev_priv->gt_pm.rps; 6517 6518 /* All of these values are in units of 50MHz */ 6519 6520 /* static values from HW: RP0 > RP1 > RPn (min_freq) */ 6521 if (IS_GEN9_LP(dev_priv)) { 6522 u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP); 6523 rps->rp0_freq = (rp_state_cap >> 16) & 0xff; 6524 rps->rp1_freq = (rp_state_cap >> 8) & 0xff; 6525 rps->min_freq = (rp_state_cap >> 0) & 0xff; 6526 } else { 6527 u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); 6528 rps->rp0_freq = (rp_state_cap >> 0) & 0xff; 6529 rps->rp1_freq = (rp_state_cap >> 8) & 0xff; 6530 rps->min_freq = (rp_state_cap >> 16) & 0xff; 6531 } 6532 /* hw_max = RP0 until we check for overclocking */ 6533 rps->max_freq = rps->rp0_freq; 6534 6535 rps->efficient_freq = rps->rp1_freq; 6536 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || 6537 IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { 6538 u32 ddcc_status = 0; 6539 6540 if (sandybridge_pcode_read(dev_priv, 6541 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, 6542 &ddcc_status) == 0) 6543 rps->efficient_freq = 6544 clamp_t(u8, 6545 ((ddcc_status >> 8) & 0xff), 6546 rps->min_freq, 6547 rps->max_freq); 6548 } 6549 6550 if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { 6551 /* Store the frequency values in 16.66 MHZ units, which is 6552 * the natural hardware unit for SKL 6553 */ 6554 rps->rp0_freq *= GEN9_FREQ_SCALER; 6555 rps->rp1_freq *= GEN9_FREQ_SCALER; 6556 rps->min_freq *= GEN9_FREQ_SCALER; 6557 rps->max_freq *= GEN9_FREQ_SCALER; 6558 rps->efficient_freq *= GEN9_FREQ_SCALER; 6559 } 6560 } 6561 6562 static void reset_rps(struct drm_i915_private *dev_priv, 6563 int (*set)(struct drm_i915_private *, u8)) 6564 { 6565 struct intel_rps *rps = &dev_priv->gt_pm.rps; 6566 u8 freq = rps->cur_freq; 6567 6568 /* force a reset */ 6569 rps->power = -1; 6570 rps->cur_freq = -1; 6571 6572 if (set(dev_priv, freq)) 6573 DRM_ERROR("Failed to reset RPS to initial values\n"); 6574 } 6575 6576 /* See the Gen9_GT_PM_Programming_Guide doc for the below */ 6577 static void gen9_enable_rps(struct drm_i915_private *dev_priv) 6578 { 6579 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 6580 6581 /* Program defaults and thresholds for RPS*/ 6582 I915_WRITE(GEN6_RC_VIDEO_FREQ, 6583 GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq)); 6584 6585 /* 1 second timeout*/ 6586 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 6587 GT_INTERVAL_FROM_US(dev_priv, 1000000)); 6588 6589 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa); 6590 6591 /* Leaning on the below call to gen6_set_rps to program/setup the 6592 * Up/Down EI & threshold registers, as well as the RP_CONTROL, 6593 * RP_INTERRUPT_LIMITS & RPNSWREQ registers */ 6594 reset_rps(dev_priv, gen6_set_rps); 6595 6596 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 6597 } 6598 6599 static void gen9_enable_rc6(struct drm_i915_private *dev_priv) 6600 { 6601 struct intel_engine_cs *engine; 6602 enum intel_engine_id id; 6603 u32 rc6_mode, rc6_mask = 0; 6604 6605 /* 1a: Software RC state - RC0 */ 6606 I915_WRITE(GEN6_RC_STATE, 0); 6607 6608 /* 1b: Get forcewake during program sequence. Although the driver 6609 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ 6610 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 6611 6612 /* 2a: Disable RC states. */ 6613 I915_WRITE(GEN6_RC_CONTROL, 0); 6614 6615 /* 2b: Program RC6 thresholds.*/ 6616 6617 /* WaRsDoubleRc6WrlWithCoarsePowerGating: Doubling WRL only when CPG is enabled */ 6618 if (IS_SKYLAKE(dev_priv)) 6619 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16); 6620 else 6621 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16); 6622 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 6623 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 6624 for_each_engine(engine, dev_priv, id) 6625 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 6626 6627 if (HAS_GUC(dev_priv)) 6628 I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA); 6629 6630 I915_WRITE(GEN6_RC_SLEEP, 0); 6631 6632 /* 2c: Program Coarse Power Gating Policies. */ 6633 I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 25); 6634 I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25); 6635 6636 /* 3a: Enable RC6 */ 6637 if (intel_rc6_enabled() & INTEL_RC6_ENABLE) 6638 rc6_mask = GEN6_RC_CTL_RC6_ENABLE; 6639 DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE)); 6640 I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ 6641 6642 /* WaRsUseTimeoutMode:cnl (pre-prod) */ 6643 if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0)) 6644 rc6_mode = GEN7_RC_CTL_TO_MODE; 6645 else 6646 rc6_mode = GEN6_RC_CTL_EI_MODE(1); 6647 6648 I915_WRITE(GEN6_RC_CONTROL, 6649 GEN6_RC_CTL_HW_ENABLE | rc6_mode | rc6_mask); 6650 6651 /* 6652 * 3b: Enable Coarse Power Gating only when RC6 is enabled. 6653 * WaRsDisableCoarsePowerGating:skl,bxt - Render/Media PG need to be disabled with RC6. 6654 */ 6655 if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) 6656 I915_WRITE(GEN9_PG_ENABLE, 0); 6657 else 6658 I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? 6659 (GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE) : 0); 6660 6661 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 6662 } 6663 6664 static void gen8_enable_rc6(struct drm_i915_private *dev_priv) 6665 { 6666 struct intel_engine_cs *engine; 6667 enum intel_engine_id id; 6668 uint32_t rc6_mask = 0; 6669 6670 /* 1a: Software RC state - RC0 */ 6671 I915_WRITE(GEN6_RC_STATE, 0); 6672 6673 /* 1b: Get forcewake during program sequence. Although the driver 6674 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ 6675 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 6676 6677 /* 2a: Disable RC states. */ 6678 I915_WRITE(GEN6_RC_CONTROL, 0); 6679 6680 /* 2b: Program RC6 thresholds.*/ 6681 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); 6682 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 6683 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 6684 for_each_engine(engine, dev_priv, id) 6685 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 6686 I915_WRITE(GEN6_RC_SLEEP, 0); 6687 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ 6688 6689 /* 3: Enable RC6 */ 6690 if (intel_rc6_enabled() & INTEL_RC6_ENABLE) 6691 rc6_mask = GEN6_RC_CTL_RC6_ENABLE; 6692 intel_print_rc6_info(dev_priv, rc6_mask); 6693 6694 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | 6695 GEN7_RC_CTL_TO_MODE | 6696 rc6_mask); 6697 6698 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 6699 } 6700 6701 static void gen8_enable_rps(struct drm_i915_private *dev_priv) 6702 { 6703 struct intel_rps *rps = &dev_priv->gt_pm.rps; 6704 6705 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 6706 6707 /* 1 Program defaults and thresholds for RPS*/ 6708 I915_WRITE(GEN6_RPNSWREQ, 6709 HSW_FREQUENCY(rps->rp1_freq)); 6710 I915_WRITE(GEN6_RC_VIDEO_FREQ, 6711 HSW_FREQUENCY(rps->rp1_freq)); 6712 /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ 6713 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */ 6714 6715 /* Docs recommend 900MHz, and 300 MHz respectively */ 6716 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, 6717 rps->max_freq_softlimit << 24 | 6718 rps->min_freq_softlimit << 16); 6719 6720 I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */ 6721 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/ 6722 I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */ 6723 I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */ 6724 6725 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 6726 6727 /* 2: Enable RPS */ 6728 I915_WRITE(GEN6_RP_CONTROL, 6729 GEN6_RP_MEDIA_TURBO | 6730 GEN6_RP_MEDIA_HW_NORMAL_MODE | 6731 GEN6_RP_MEDIA_IS_GFX | 6732 GEN6_RP_ENABLE | 6733 GEN6_RP_UP_BUSY_AVG | 6734 GEN6_RP_DOWN_IDLE_AVG); 6735 6736 reset_rps(dev_priv, gen6_set_rps); 6737 6738 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 6739 } 6740 6741 static void gen6_enable_rc6(struct drm_i915_private *dev_priv) 6742 { 6743 struct intel_engine_cs *engine; 6744 enum intel_engine_id id; 6745 u32 rc6vids, rc6_mask = 0; 6746 u32 gtfifodbg; 6747 int rc6_mode; 6748 int ret; 6749 6750 I915_WRITE(GEN6_RC_STATE, 0); 6751 6752 /* Clear the DBG now so we don't confuse earlier errors */ 6753 gtfifodbg = I915_READ(GTFIFODBG); 6754 if (gtfifodbg) { 6755 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg); 6756 I915_WRITE(GTFIFODBG, gtfifodbg); 6757 } 6758 6759 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 6760 6761 /* disable the counters and set deterministic thresholds */ 6762 I915_WRITE(GEN6_RC_CONTROL, 0); 6763 6764 I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16); 6765 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30); 6766 I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30); 6767 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); 6768 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); 6769 6770 for_each_engine(engine, dev_priv, id) 6771 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 6772 6773 I915_WRITE(GEN6_RC_SLEEP, 0); 6774 I915_WRITE(GEN6_RC1e_THRESHOLD, 1000); 6775 if (IS_IVYBRIDGE(dev_priv)) 6776 I915_WRITE(GEN6_RC6_THRESHOLD, 125000); 6777 else 6778 I915_WRITE(GEN6_RC6_THRESHOLD, 50000); 6779 I915_WRITE(GEN6_RC6p_THRESHOLD, 150000); 6780 I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ 6781 6782 /* Check if we are enabling RC6 */ 6783 rc6_mode = intel_rc6_enabled(); 6784 if (rc6_mode & INTEL_RC6_ENABLE) 6785 rc6_mask |= GEN6_RC_CTL_RC6_ENABLE; 6786 6787 /* We don't use those on Haswell */ 6788 if (!IS_HASWELL(dev_priv)) { 6789 if (rc6_mode & INTEL_RC6p_ENABLE) 6790 rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; 6791 6792 if (rc6_mode & INTEL_RC6pp_ENABLE) 6793 rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; 6794 } 6795 6796 intel_print_rc6_info(dev_priv, rc6_mask); 6797 6798 I915_WRITE(GEN6_RC_CONTROL, 6799 rc6_mask | 6800 GEN6_RC_CTL_EI_MODE(1) | 6801 GEN6_RC_CTL_HW_ENABLE); 6802 6803 rc6vids = 0; 6804 ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); 6805 if (IS_GEN6(dev_priv) && ret) { 6806 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n"); 6807 } else if (IS_GEN6(dev_priv) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { 6808 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", 6809 GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); 6810 rc6vids &= 0xffff00; 6811 rc6vids |= GEN6_ENCODE_RC6_VID(450); 6812 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); 6813 if (ret) 6814 DRM_ERROR("Couldn't fix incorrect rc6 voltage\n"); 6815 } 6816 6817 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 6818 } 6819 6820 static void gen6_enable_rps(struct drm_i915_private *dev_priv) 6821 { 6822 /* Here begins a magic sequence of register writes to enable 6823 * auto-downclocking. 6824 * 6825 * Perhaps there might be some value in exposing these to 6826 * userspace... 6827 */ 6828 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 6829 6830 /* Power down if completely idle for over 50ms */ 6831 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000); 6832 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 6833 6834 reset_rps(dev_priv, gen6_set_rps); 6835 6836 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 6837 } 6838 6839 static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) 6840 { 6841 struct intel_rps *rps = &dev_priv->gt_pm.rps; 6842 int min_freq = 15; 6843 unsigned int gpu_freq; 6844 unsigned int max_ia_freq, min_ring_freq; 6845 unsigned int max_gpu_freq, min_gpu_freq; 6846 int scaling_factor = 180; 6847 6848 WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); 6849 6850 #if 0 6851 policy = cpufreq_cpu_get(0); 6852 if (policy) { 6853 max_ia_freq = policy->cpuinfo.max_freq; 6854 cpufreq_cpu_put(policy); 6855 } else { 6856 /* 6857 * Default to measured freq if none found, PCU will ensure we 6858 * don't go over 6859 */ 6860 max_ia_freq = tsc_khz; 6861 } 6862 #else 6863 max_ia_freq = tsc_frequency / 1000; 6864 #endif 6865 6866 /* Convert from kHz to MHz */ 6867 max_ia_freq /= 1000; 6868 6869 min_ring_freq = I915_READ(DCLK) & 0xf; 6870 /* convert DDR frequency from units of 266.6MHz to bandwidth */ 6871 min_ring_freq = mult_frac(min_ring_freq, 8, 3); 6872 6873 if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { 6874 /* Convert GT frequency to 50 HZ units */ 6875 min_gpu_freq = rps->min_freq / GEN9_FREQ_SCALER; 6876 max_gpu_freq = rps->max_freq / GEN9_FREQ_SCALER; 6877 } else { 6878 min_gpu_freq = rps->min_freq; 6879 max_gpu_freq = rps->max_freq; 6880 } 6881 6882 /* 6883 * For each potential GPU frequency, load a ring frequency we'd like 6884 * to use for memory access. We do this by specifying the IA frequency 6885 * the PCU should use as a reference to determine the ring frequency. 6886 */ 6887 for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) { 6888 int diff = max_gpu_freq - gpu_freq; 6889 unsigned int ia_freq = 0, ring_freq = 0; 6890 6891 if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { 6892 /* 6893 * ring_freq = 2 * GT. ring_freq is in 100MHz units 6894 * No floor required for ring frequency on SKL. 6895 */ 6896 ring_freq = gpu_freq; 6897 } else if (INTEL_INFO(dev_priv)->gen >= 8) { 6898 /* max(2 * GT, DDR). NB: GT is 50MHz units */ 6899 ring_freq = max(min_ring_freq, gpu_freq); 6900 } else if (IS_HASWELL(dev_priv)) { 6901 ring_freq = mult_frac(gpu_freq, 5, 4); 6902 ring_freq = max(min_ring_freq, ring_freq); 6903 /* leave ia_freq as the default, chosen by cpufreq */ 6904 } else { 6905 /* On older processors, there is no separate ring 6906 * clock domain, so in order to boost the bandwidth 6907 * of the ring, we need to upclock the CPU (ia_freq). 6908 * 6909 * For GPU frequencies less than 750MHz, 6910 * just use the lowest ring freq. 6911 */ 6912 if (gpu_freq < min_freq) 6913 ia_freq = 800; 6914 else 6915 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2); 6916 ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100); 6917 } 6918 6919 sandybridge_pcode_write(dev_priv, 6920 GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 6921 ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT | 6922 ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT | 6923 gpu_freq); 6924 } 6925 } 6926 6927 static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv) 6928 { 6929 u32 val, rp0; 6930 6931 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE); 6932 6933 switch (INTEL_INFO(dev_priv)->sseu.eu_total) { 6934 case 8: 6935 /* (2 * 4) config */ 6936 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT); 6937 break; 6938 case 12: 6939 /* (2 * 6) config */ 6940 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT); 6941 break; 6942 case 16: 6943 /* (2 * 8) config */ 6944 default: 6945 /* Setting (2 * 8) Min RP0 for any other combination */ 6946 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT); 6947 break; 6948 } 6949 6950 rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK); 6951 6952 return rp0; 6953 } 6954 6955 static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv) 6956 { 6957 u32 val, rpe; 6958 6959 val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG); 6960 rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; 6961 6962 return rpe; 6963 } 6964 6965 static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv) 6966 { 6967 u32 val, rp1; 6968 6969 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE); 6970 rp1 = (val & FB_GFX_FREQ_FUSE_MASK); 6971 6972 return rp1; 6973 } 6974 6975 static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv) 6976 { 6977 u32 val, rpn; 6978 6979 val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE); 6980 rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) & 6981 FB_GFX_FREQ_FUSE_MASK); 6982 6983 return rpn; 6984 } 6985 6986 static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv) 6987 { 6988 u32 val, rp1; 6989 6990 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); 6991 6992 rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; 6993 6994 return rp1; 6995 } 6996 6997 static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv) 6998 { 6999 u32 val, rp0; 7000 7001 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); 7002 7003 rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; 7004 /* Clamp to max */ 7005 rp0 = min_t(u32, rp0, 0xea); 7006 7007 return rp0; 7008 } 7009 7010 static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv) 7011 { 7012 u32 val, rpe; 7013 7014 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO); 7015 rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; 7016 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI); 7017 rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; 7018 7019 return rpe; 7020 } 7021 7022 static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv) 7023 { 7024 u32 val; 7025 7026 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff; 7027 /* 7028 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value 7029 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on 7030 * a BYT-M B0 the above register contains 0xbf. Moreover when setting 7031 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0 7032 * to make sure it matches what Punit accepts. 7033 */ 7034 return max_t(u32, val, 0xc0); 7035 } 7036 7037 /* Check that the pctx buffer wasn't move under us. */ 7038 static void valleyview_check_pctx(struct drm_i915_private *dev_priv) 7039 { 7040 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; 7041 7042 WARN_ON(pctx_addr != dev_priv->mm.stolen_base + 7043 dev_priv->vlv_pctx->stolen->start); 7044 } 7045 7046 7047 /* Check that the pcbr address is not empty. */ 7048 static void cherryview_check_pctx(struct drm_i915_private *dev_priv) 7049 { 7050 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; 7051 7052 WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0); 7053 } 7054 7055 static void cherryview_setup_pctx(struct drm_i915_private *dev_priv) 7056 { 7057 struct i915_ggtt *ggtt = &dev_priv->ggtt; 7058 unsigned long pctx_paddr, paddr; 7059 u32 pcbr; 7060 int pctx_size = 32*1024; 7061 7062 pcbr = I915_READ(VLV_PCBR); 7063 if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { 7064 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); 7065 paddr = (dev_priv->mm.stolen_base + 7066 (ggtt->stolen_size - pctx_size)); 7067 7068 pctx_paddr = (paddr & (~4095)); 7069 I915_WRITE(VLV_PCBR, pctx_paddr); 7070 } 7071 7072 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); 7073 } 7074 7075 static void valleyview_setup_pctx(struct drm_i915_private *dev_priv) 7076 { 7077 struct drm_i915_gem_object *pctx; 7078 unsigned long pctx_paddr; 7079 u32 pcbr; 7080 int pctx_size = 24*1024; 7081 7082 pcbr = I915_READ(VLV_PCBR); 7083 if (pcbr) { 7084 /* BIOS set it up already, grab the pre-alloc'd space */ 7085 int pcbr_offset; 7086 7087 pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base; 7088 pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv, 7089 pcbr_offset, 7090 I915_GTT_OFFSET_NONE, 7091 pctx_size); 7092 goto out; 7093 } 7094 7095 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); 7096 7097 /* 7098 * From the Gunit register HAS: 7099 * The Gfx driver is expected to program this register and ensure 7100 * proper allocation within Gfx stolen memory. For example, this 7101 * register should be programmed such than the PCBR range does not 7102 * overlap with other ranges, such as the frame buffer, protected 7103 * memory, or any other relevant ranges. 7104 */ 7105 pctx = i915_gem_object_create_stolen(dev_priv, pctx_size); 7106 if (!pctx) { 7107 DRM_DEBUG("not enough stolen space for PCTX, disabling\n"); 7108 goto out; 7109 } 7110 7111 pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start; 7112 I915_WRITE(VLV_PCBR, pctx_paddr); 7113 7114 out: 7115 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); 7116 dev_priv->vlv_pctx = pctx; 7117 } 7118 7119 static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv) 7120 { 7121 if (WARN_ON(!dev_priv->vlv_pctx)) 7122 return; 7123 7124 i915_gem_object_put(dev_priv->vlv_pctx); 7125 dev_priv->vlv_pctx = NULL; 7126 } 7127 7128 static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv) 7129 { 7130 dev_priv->gt_pm.rps.gpll_ref_freq = 7131 vlv_get_cck_clock(dev_priv, "GPLL ref", 7132 CCK_GPLL_CLOCK_CONTROL, 7133 dev_priv->czclk_freq); 7134 7135 DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", 7136 dev_priv->gt_pm.rps.gpll_ref_freq); 7137 } 7138 7139 static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) 7140 { 7141 struct intel_rps *rps = &dev_priv->gt_pm.rps; 7142 u32 val; 7143 7144 valleyview_setup_pctx(dev_priv); 7145 7146 vlv_init_gpll_ref_freq(dev_priv); 7147 7148 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); 7149 switch ((val >> 6) & 3) { 7150 case 0: 7151 case 1: 7152 dev_priv->mem_freq = 800; 7153 break; 7154 case 2: 7155 dev_priv->mem_freq = 1066; 7156 break; 7157 case 3: 7158 dev_priv->mem_freq = 1333; 7159 break; 7160 } 7161 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); 7162 7163 rps->max_freq = valleyview_rps_max_freq(dev_priv); 7164 rps->rp0_freq = rps->max_freq; 7165 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", 7166 intel_gpu_freq(dev_priv, rps->max_freq), 7167 rps->max_freq); 7168 7169 rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv); 7170 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", 7171 intel_gpu_freq(dev_priv, rps->efficient_freq), 7172 rps->efficient_freq); 7173 7174 rps->rp1_freq = valleyview_rps_guar_freq(dev_priv); 7175 DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", 7176 intel_gpu_freq(dev_priv, rps->rp1_freq), 7177 rps->rp1_freq); 7178 7179 rps->min_freq = valleyview_rps_min_freq(dev_priv); 7180 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", 7181 intel_gpu_freq(dev_priv, rps->min_freq), 7182 rps->min_freq); 7183 } 7184 7185 static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) 7186 { 7187 struct intel_rps *rps = &dev_priv->gt_pm.rps; 7188 u32 val; 7189 7190 cherryview_setup_pctx(dev_priv); 7191 7192 vlv_init_gpll_ref_freq(dev_priv); 7193 7194 mutex_lock(&dev_priv->sb_lock); 7195 val = vlv_cck_read(dev_priv, CCK_FUSE_REG); 7196 mutex_unlock(&dev_priv->sb_lock); 7197 7198 switch ((val >> 2) & 0x7) { 7199 case 3: 7200 dev_priv->mem_freq = 2000; 7201 break; 7202 default: 7203 dev_priv->mem_freq = 1600; 7204 break; 7205 } 7206 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); 7207 7208 rps->max_freq = cherryview_rps_max_freq(dev_priv); 7209 rps->rp0_freq = rps->max_freq; 7210 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", 7211 intel_gpu_freq(dev_priv, rps->max_freq), 7212 rps->max_freq); 7213 7214 rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv); 7215 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", 7216 intel_gpu_freq(dev_priv, rps->efficient_freq), 7217 rps->efficient_freq); 7218 7219 rps->rp1_freq = cherryview_rps_guar_freq(dev_priv); 7220 DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", 7221 intel_gpu_freq(dev_priv, rps->rp1_freq), 7222 rps->rp1_freq); 7223 7224 rps->min_freq = cherryview_rps_min_freq(dev_priv); 7225 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", 7226 intel_gpu_freq(dev_priv, rps->min_freq), 7227 rps->min_freq); 7228 7229 WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq | 7230 rps->min_freq) & 1, 7231 "Odd GPU freq values\n"); 7232 } 7233 7234 static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv) 7235 { 7236 valleyview_cleanup_pctx(dev_priv); 7237 } 7238 7239 static void cherryview_enable_rc6(struct drm_i915_private *dev_priv) 7240 { 7241 struct intel_engine_cs *engine; 7242 enum intel_engine_id id; 7243 u32 gtfifodbg, rc6_mode = 0, pcbr; 7244 7245 gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV | 7246 GT_FIFO_FREE_ENTRIES_CHV); 7247 if (gtfifodbg) { 7248 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", 7249 gtfifodbg); 7250 I915_WRITE(GTFIFODBG, gtfifodbg); 7251 } 7252 7253 cherryview_check_pctx(dev_priv); 7254 7255 /* 1a & 1b: Get forcewake during program sequence. Although the driver 7256 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ 7257 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 7258 7259 /* Disable RC states. */ 7260 I915_WRITE(GEN6_RC_CONTROL, 0); 7261 7262 /* 2a: Program RC6 thresholds.*/ 7263 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); 7264 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 7265 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 7266 7267 for_each_engine(engine, dev_priv, id) 7268 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 7269 I915_WRITE(GEN6_RC_SLEEP, 0); 7270 7271 /* TO threshold set to 500 us ( 0x186 * 1.28 us) */ 7272 I915_WRITE(GEN6_RC6_THRESHOLD, 0x186); 7273 7274 /* Allows RC6 residency counter to work */ 7275 I915_WRITE(VLV_COUNTER_CONTROL, 7276 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | 7277 VLV_MEDIA_RC6_COUNT_EN | 7278 VLV_RENDER_RC6_COUNT_EN)); 7279 7280 /* For now we assume BIOS is allocating and populating the PCBR */ 7281 pcbr = I915_READ(VLV_PCBR); 7282 7283 /* 3: Enable RC6 */ 7284 if ((intel_rc6_enabled() & INTEL_RC6_ENABLE) && 7285 (pcbr >> VLV_PCBR_ADDR_SHIFT)) 7286 rc6_mode = GEN7_RC_CTL_TO_MODE; 7287 7288 I915_WRITE(GEN6_RC_CONTROL, rc6_mode); 7289 7290 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 7291 } 7292 7293 static void cherryview_enable_rps(struct drm_i915_private *dev_priv) 7294 { 7295 u32 val; 7296 7297 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 7298 7299 /* 1: Program defaults and thresholds for RPS*/ 7300 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); 7301 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); 7302 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); 7303 I915_WRITE(GEN6_RP_UP_EI, 66000); 7304 I915_WRITE(GEN6_RP_DOWN_EI, 350000); 7305 7306 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 7307 7308 /* 2: Enable RPS */ 7309 I915_WRITE(GEN6_RP_CONTROL, 7310 GEN6_RP_MEDIA_HW_NORMAL_MODE | 7311 GEN6_RP_MEDIA_IS_GFX | 7312 GEN6_RP_ENABLE | 7313 GEN6_RP_UP_BUSY_AVG | 7314 GEN6_RP_DOWN_IDLE_AVG); 7315 7316 /* Setting Fixed Bias */ 7317 val = VLV_OVERRIDE_EN | 7318 VLV_SOC_TDP_EN | 7319 CHV_BIAS_CPU_50_SOC_50; 7320 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); 7321 7322 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); 7323 7324 /* RPS code assumes GPLL is used */ 7325 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); 7326 7327 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); 7328 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); 7329 7330 reset_rps(dev_priv, valleyview_set_rps); 7331 7332 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 7333 } 7334 7335 static void valleyview_enable_rc6(struct drm_i915_private *dev_priv) 7336 { 7337 struct intel_engine_cs *engine; 7338 enum intel_engine_id id; 7339 u32 gtfifodbg, rc6_mode = 0; 7340 7341 valleyview_check_pctx(dev_priv); 7342 7343 gtfifodbg = I915_READ(GTFIFODBG); 7344 if (gtfifodbg) { 7345 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", 7346 gtfifodbg); 7347 I915_WRITE(GTFIFODBG, gtfifodbg); 7348 } 7349 7350 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 7351 7352 /* Disable RC states. */ 7353 I915_WRITE(GEN6_RC_CONTROL, 0); 7354 7355 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); 7356 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); 7357 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); 7358 7359 for_each_engine(engine, dev_priv, id) 7360 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 7361 7362 I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); 7363 7364 /* Allows RC6 residency counter to work */ 7365 I915_WRITE(VLV_COUNTER_CONTROL, 7366 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | 7367 VLV_MEDIA_RC0_COUNT_EN | 7368 VLV_RENDER_RC0_COUNT_EN | 7369 VLV_MEDIA_RC6_COUNT_EN | 7370 VLV_RENDER_RC6_COUNT_EN)); 7371 7372 if (intel_rc6_enabled() & INTEL_RC6_ENABLE) 7373 rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; 7374 7375 intel_print_rc6_info(dev_priv, rc6_mode); 7376 7377 I915_WRITE(GEN6_RC_CONTROL, rc6_mode); 7378 7379 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 7380 } 7381 7382 static void valleyview_enable_rps(struct drm_i915_private *dev_priv) 7383 { 7384 u32 val; 7385 7386 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 7387 7388 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); 7389 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); 7390 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); 7391 I915_WRITE(GEN6_RP_UP_EI, 66000); 7392 I915_WRITE(GEN6_RP_DOWN_EI, 350000); 7393 7394 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 7395 7396 I915_WRITE(GEN6_RP_CONTROL, 7397 GEN6_RP_MEDIA_TURBO | 7398 GEN6_RP_MEDIA_HW_NORMAL_MODE | 7399 GEN6_RP_MEDIA_IS_GFX | 7400 GEN6_RP_ENABLE | 7401 GEN6_RP_UP_BUSY_AVG | 7402 GEN6_RP_DOWN_IDLE_CONT); 7403 7404 /* Setting Fixed Bias */ 7405 val = VLV_OVERRIDE_EN | 7406 VLV_SOC_TDP_EN | 7407 VLV_BIAS_CPU_125_SOC_875; 7408 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); 7409 7410 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); 7411 7412 /* RPS code assumes GPLL is used */ 7413 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); 7414 7415 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); 7416 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); 7417 7418 reset_rps(dev_priv, valleyview_set_rps); 7419 7420 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 7421 } 7422 7423 static unsigned long intel_pxfreq(u32 vidfreq) 7424 { 7425 unsigned long freq; 7426 int div = (vidfreq & 0x3f0000) >> 16; 7427 int post = (vidfreq & 0x3000) >> 12; 7428 int pre = (vidfreq & 0x7); 7429 7430 if (!pre) 7431 return 0; 7432 7433 freq = ((div * 133333) / ((1<<post) * pre)); 7434 7435 return freq; 7436 } 7437 7438 static const struct cparams { 7439 u16 i; 7440 u16 t; 7441 u16 m; 7442 u16 c; 7443 } cparams[] = { 7444 { 1, 1333, 301, 28664 }, 7445 { 1, 1066, 294, 24460 }, 7446 { 1, 800, 294, 25192 }, 7447 { 0, 1333, 276, 27605 }, 7448 { 0, 1066, 276, 27605 }, 7449 { 0, 800, 231, 23784 }, 7450 }; 7451 7452 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv) 7453 { 7454 u64 total_count, diff, ret; 7455 u32 count1, count2, count3, m = 0, c = 0; 7456 unsigned long now = jiffies_to_msecs(jiffies), diff1; 7457 int i; 7458 7459 lockdep_assert_held(&mchdev_lock); 7460 7461 diff1 = now - dev_priv->ips.last_time1; 7462 7463 /* Prevent division-by-zero if we are asking too fast. 7464 * Also, we don't get interesting results if we are polling 7465 * faster than once in 10ms, so just return the saved value 7466 * in such cases. 7467 */ 7468 if (diff1 <= 10) 7469 return dev_priv->ips.chipset_power; 7470 7471 count1 = I915_READ(DMIEC); 7472 count2 = I915_READ(DDREC); 7473 count3 = I915_READ(CSIEC); 7474 7475 total_count = count1 + count2 + count3; 7476 7477 /* FIXME: handle per-counter overflow */ 7478 if (total_count < dev_priv->ips.last_count1) { 7479 diff = ~0UL - dev_priv->ips.last_count1; 7480 diff += total_count; 7481 } else { 7482 diff = total_count - dev_priv->ips.last_count1; 7483 } 7484 7485 for (i = 0; i < ARRAY_SIZE(cparams); i++) { 7486 if (cparams[i].i == dev_priv->ips.c_m && 7487 cparams[i].t == dev_priv->ips.r_t) { 7488 m = cparams[i].m; 7489 c = cparams[i].c; 7490 break; 7491 } 7492 } 7493 7494 diff = div_u64(diff, diff1); 7495 ret = ((m * diff) + c); 7496 ret = div_u64(ret, 10); 7497 7498 dev_priv->ips.last_count1 = total_count; 7499 dev_priv->ips.last_time1 = now; 7500 7501 dev_priv->ips.chipset_power = ret; 7502 7503 return ret; 7504 } 7505 7506 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv) 7507 { 7508 unsigned long val; 7509 7510 if (INTEL_INFO(dev_priv)->gen != 5) 7511 return 0; 7512 7513 spin_lock_irq(&mchdev_lock); 7514 7515 val = __i915_chipset_val(dev_priv); 7516 7517 spin_unlock_irq(&mchdev_lock); 7518 7519 return val; 7520 } 7521 7522 unsigned long i915_mch_val(struct drm_i915_private *dev_priv) 7523 { 7524 unsigned long m, x, b; 7525 u32 tsfs; 7526 7527 tsfs = I915_READ(TSFS); 7528 7529 m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT); 7530 x = I915_READ8(TR1); 7531 7532 b = tsfs & TSFS_INTR_MASK; 7533 7534 return ((m * x) / 127) - b; 7535 } 7536 7537 static int _pxvid_to_vd(u8 pxvid) 7538 { 7539 if (pxvid == 0) 7540 return 0; 7541 7542 if (pxvid >= 8 && pxvid < 31) 7543 pxvid = 31; 7544 7545 return (pxvid + 2) * 125; 7546 } 7547 7548 static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid) 7549 { 7550 const int vd = _pxvid_to_vd(pxvid); 7551 const int vm = vd - 1125; 7552 7553 if (INTEL_INFO(dev_priv)->is_mobile) 7554 return vm > 0 ? vm : 0; 7555 7556 return vd; 7557 } 7558 7559 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) 7560 { 7561 u64 now, diff, diffms; 7562 u32 count; 7563 7564 lockdep_assert_held(&mchdev_lock); 7565 7566 now = ktime_get_raw_ns(); 7567 diffms = now - dev_priv->ips.last_time2; 7568 do_div(diffms, NSEC_PER_MSEC); 7569 7570 /* Don't divide by 0 */ 7571 if (!diffms) 7572 return; 7573 7574 count = I915_READ(GFXEC); 7575 7576 if (count < dev_priv->ips.last_count2) { 7577 diff = ~0UL - dev_priv->ips.last_count2; 7578 diff += count; 7579 } else { 7580 diff = count - dev_priv->ips.last_count2; 7581 } 7582 7583 dev_priv->ips.last_count2 = count; 7584 dev_priv->ips.last_time2 = now; 7585 7586 /* More magic constants... */ 7587 diff = diff * 1181; 7588 diff = div_u64(diff, diffms * 10); 7589 dev_priv->ips.gfx_power = diff; 7590 } 7591 7592 void i915_update_gfx_val(struct drm_i915_private *dev_priv) 7593 { 7594 if (INTEL_INFO(dev_priv)->gen != 5) 7595 return; 7596 7597 spin_lock_irq(&mchdev_lock); 7598 7599 __i915_update_gfx_val(dev_priv); 7600 7601 spin_unlock_irq(&mchdev_lock); 7602 } 7603 7604 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) 7605 { 7606 unsigned long t, corr, state1, corr2, state2; 7607 u32 pxvid, ext_v; 7608 7609 lockdep_assert_held(&mchdev_lock); 7610 7611 pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq)); 7612 pxvid = (pxvid >> 24) & 0x7f; 7613 ext_v = pvid_to_extvid(dev_priv, pxvid); 7614 7615 state1 = ext_v; 7616 7617 t = i915_mch_val(dev_priv); 7618 7619 /* Revel in the empirically derived constants */ 7620 7621 /* Correction factor in 1/100000 units */ 7622 if (t > 80) 7623 corr = ((t * 2349) + 135940); 7624 else if (t >= 50) 7625 corr = ((t * 964) + 29317); 7626 else /* < 50 */ 7627 corr = ((t * 301) + 1004); 7628 7629 corr = corr * ((150142 * state1) / 10000 - 78642); 7630 corr /= 100000; 7631 corr2 = (corr * dev_priv->ips.corr); 7632 7633 state2 = (corr2 * state1) / 10000; 7634 state2 /= 100; /* convert to mW */ 7635 7636 __i915_update_gfx_val(dev_priv); 7637 7638 return dev_priv->ips.gfx_power + state2; 7639 } 7640 7641 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) 7642 { 7643 unsigned long val; 7644 7645 if (INTEL_INFO(dev_priv)->gen != 5) 7646 return 0; 7647 7648 spin_lock_irq(&mchdev_lock); 7649 7650 val = __i915_gfx_val(dev_priv); 7651 7652 spin_unlock_irq(&mchdev_lock); 7653 7654 return val; 7655 } 7656 7657 /** 7658 * i915_read_mch_val - return value for IPS use 7659 * 7660 * Calculate and return a value for the IPS driver to use when deciding whether 7661 * we have thermal and power headroom to increase CPU or GPU power budget. 7662 */ 7663 unsigned long i915_read_mch_val(void) 7664 { 7665 struct drm_i915_private *dev_priv; 7666 unsigned long chipset_val, graphics_val, ret = 0; 7667 7668 spin_lock_irq(&mchdev_lock); 7669 if (!i915_mch_dev) 7670 goto out_unlock; 7671 dev_priv = i915_mch_dev; 7672 7673 chipset_val = __i915_chipset_val(dev_priv); 7674 graphics_val = __i915_gfx_val(dev_priv); 7675 7676 ret = chipset_val + graphics_val; 7677 7678 out_unlock: 7679 spin_unlock_irq(&mchdev_lock); 7680 7681 return ret; 7682 } 7683 EXPORT_SYMBOL_GPL(i915_read_mch_val); 7684 7685 /** 7686 * i915_gpu_raise - raise GPU frequency limit 7687 * 7688 * Raise the limit; IPS indicates we have thermal headroom. 7689 */ 7690 bool i915_gpu_raise(void) 7691 { 7692 struct drm_i915_private *dev_priv; 7693 bool ret = true; 7694 7695 spin_lock_irq(&mchdev_lock); 7696 if (!i915_mch_dev) { 7697 ret = false; 7698 goto out_unlock; 7699 } 7700 dev_priv = i915_mch_dev; 7701 7702 if (dev_priv->ips.max_delay > dev_priv->ips.fmax) 7703 dev_priv->ips.max_delay--; 7704 7705 out_unlock: 7706 spin_unlock_irq(&mchdev_lock); 7707 7708 return ret; 7709 } 7710 EXPORT_SYMBOL_GPL(i915_gpu_raise); 7711 7712 /** 7713 * i915_gpu_lower - lower GPU frequency limit 7714 * 7715 * IPS indicates we're close to a thermal limit, so throttle back the GPU 7716 * frequency maximum. 7717 */ 7718 bool i915_gpu_lower(void) 7719 { 7720 struct drm_i915_private *dev_priv; 7721 bool ret = true; 7722 7723 spin_lock_irq(&mchdev_lock); 7724 if (!i915_mch_dev) { 7725 ret = false; 7726 goto out_unlock; 7727 } 7728 dev_priv = i915_mch_dev; 7729 7730 if (dev_priv->ips.max_delay < dev_priv->ips.min_delay) 7731 dev_priv->ips.max_delay++; 7732 7733 out_unlock: 7734 spin_unlock_irq(&mchdev_lock); 7735 7736 return ret; 7737 } 7738 EXPORT_SYMBOL_GPL(i915_gpu_lower); 7739 7740 /** 7741 * i915_gpu_busy - indicate GPU business to IPS 7742 * 7743 * Tell the IPS driver whether or not the GPU is busy. 7744 */ 7745 bool i915_gpu_busy(void) 7746 { 7747 bool ret = false; 7748 7749 spin_lock_irq(&mchdev_lock); 7750 if (i915_mch_dev) 7751 ret = i915_mch_dev->gt.awake; 7752 spin_unlock_irq(&mchdev_lock); 7753 7754 return ret; 7755 } 7756 EXPORT_SYMBOL_GPL(i915_gpu_busy); 7757 7758 /** 7759 * i915_gpu_turbo_disable - disable graphics turbo 7760 * 7761 * Disable graphics turbo by resetting the max frequency and setting the 7762 * current frequency to the default. 7763 */ 7764 bool i915_gpu_turbo_disable(void) 7765 { 7766 struct drm_i915_private *dev_priv; 7767 bool ret = true; 7768 7769 spin_lock_irq(&mchdev_lock); 7770 if (!i915_mch_dev) { 7771 ret = false; 7772 goto out_unlock; 7773 } 7774 dev_priv = i915_mch_dev; 7775 7776 dev_priv->ips.max_delay = dev_priv->ips.fstart; 7777 7778 if (!ironlake_set_drps(dev_priv, dev_priv->ips.fstart)) 7779 ret = false; 7780 7781 out_unlock: 7782 spin_unlock_irq(&mchdev_lock); 7783 7784 return ret; 7785 } 7786 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); 7787 7788 /** 7789 * Tells the intel_ips driver that the i915 driver is now loaded, if 7790 * IPS got loaded first. 7791 * 7792 * This awkward dance is so that neither module has to depend on the 7793 * other in order for IPS to do the appropriate communication of 7794 * GPU turbo limits to i915. 7795 */ 7796 static void 7797 ips_ping_for_i915_load(void) 7798 { 7799 #if 0 7800 void (*link)(void); 7801 7802 link = symbol_get(ips_link_to_i915_driver); 7803 if (link) { 7804 link(); 7805 symbol_put(ips_link_to_i915_driver); 7806 } 7807 #endif 7808 } 7809 7810 void intel_gpu_ips_init(struct drm_i915_private *dev_priv) 7811 { 7812 /* We only register the i915 ips part with intel-ips once everything is 7813 * set up, to avoid intel-ips sneaking in and reading bogus values. */ 7814 spin_lock_irq(&mchdev_lock); 7815 i915_mch_dev = dev_priv; 7816 spin_unlock_irq(&mchdev_lock); 7817 7818 ips_ping_for_i915_load(); 7819 } 7820 7821 void intel_gpu_ips_teardown(void) 7822 { 7823 spin_lock_irq(&mchdev_lock); 7824 i915_mch_dev = NULL; 7825 spin_unlock_irq(&mchdev_lock); 7826 } 7827 7828 static void intel_init_emon(struct drm_i915_private *dev_priv) 7829 { 7830 u32 lcfuse; 7831 u8 pxw[16]; 7832 int i; 7833 7834 /* Disable to program */ 7835 I915_WRITE(ECR, 0); 7836 POSTING_READ(ECR); 7837 7838 /* Program energy weights for various events */ 7839 I915_WRITE(SDEW, 0x15040d00); 7840 I915_WRITE(CSIEW0, 0x007f0000); 7841 I915_WRITE(CSIEW1, 0x1e220004); 7842 I915_WRITE(CSIEW2, 0x04000004); 7843 7844 for (i = 0; i < 5; i++) 7845 I915_WRITE(PEW(i), 0); 7846 for (i = 0; i < 3; i++) 7847 I915_WRITE(DEW(i), 0); 7848 7849 /* Program P-state weights to account for frequency power adjustment */ 7850 for (i = 0; i < 16; i++) { 7851 u32 pxvidfreq = I915_READ(PXVFREQ(i)); 7852 unsigned long freq = intel_pxfreq(pxvidfreq); 7853 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >> 7854 PXVFREQ_PX_SHIFT; 7855 unsigned long val; 7856 7857 val = vid * vid; 7858 val *= (freq / 1000); 7859 val *= 255; 7860 val /= (127*127*900); 7861 if (val > 0xff) 7862 DRM_ERROR("bad pxval: %ld\n", val); 7863 pxw[i] = val; 7864 } 7865 /* Render standby states get 0 weight */ 7866 pxw[14] = 0; 7867 pxw[15] = 0; 7868 7869 for (i = 0; i < 4; i++) { 7870 u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) | 7871 (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]); 7872 I915_WRITE(PXW(i), val); 7873 } 7874 7875 /* Adjust magic regs to magic values (more experimental results) */ 7876 I915_WRITE(OGW0, 0); 7877 I915_WRITE(OGW1, 0); 7878 I915_WRITE(EG0, 0x00007f00); 7879 I915_WRITE(EG1, 0x0000000e); 7880 I915_WRITE(EG2, 0x000e0000); 7881 I915_WRITE(EG3, 0x68000300); 7882 I915_WRITE(EG4, 0x42000000); 7883 I915_WRITE(EG5, 0x00140031); 7884 I915_WRITE(EG6, 0); 7885 I915_WRITE(EG7, 0); 7886 7887 for (i = 0; i < 8; i++) 7888 I915_WRITE(PXWL(i), 0); 7889 7890 /* Enable PMON + select events */ 7891 I915_WRITE(ECR, 0x80000019); 7892 7893 lcfuse = I915_READ(LCFUSE02); 7894 7895 dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK); 7896 } 7897 7898 void intel_init_gt_powersave(struct drm_i915_private *dev_priv) 7899 { 7900 struct intel_rps *rps = &dev_priv->gt_pm.rps; 7901 7902 /* 7903 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a 7904 * requirement. 7905 */ 7906 if (!i915_modparams.enable_rc6) { 7907 DRM_INFO("RC6 disabled, disabling runtime PM support\n"); 7908 intel_runtime_pm_get(dev_priv); 7909 } 7910 7911 mutex_lock(&dev_priv->drm.struct_mutex); 7912 mutex_lock(&dev_priv->pcu_lock); 7913 7914 /* Initialize RPS limits (for userspace) */ 7915 if (IS_CHERRYVIEW(dev_priv)) 7916 cherryview_init_gt_powersave(dev_priv); 7917 else if (IS_VALLEYVIEW(dev_priv)) 7918 valleyview_init_gt_powersave(dev_priv); 7919 else if (INTEL_GEN(dev_priv) >= 6) 7920 gen6_init_rps_frequencies(dev_priv); 7921 7922 /* Derive initial user preferences/limits from the hardware limits */ 7923 rps->idle_freq = rps->min_freq; 7924 rps->cur_freq = rps->idle_freq; 7925 7926 rps->max_freq_softlimit = rps->max_freq; 7927 rps->min_freq_softlimit = rps->min_freq; 7928 7929 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 7930 rps->min_freq_softlimit = 7931 max_t(int, 7932 rps->efficient_freq, 7933 intel_freq_opcode(dev_priv, 450)); 7934 7935 /* After setting max-softlimit, find the overclock max freq */ 7936 if (IS_GEN6(dev_priv) || 7937 IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) { 7938 u32 params = 0; 7939 7940 sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, ¶ms); 7941 if (params & BIT(31)) { /* OC supported */ 7942 DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n", 7943 (rps->max_freq & 0xff) * 50, 7944 (params & 0xff) * 50); 7945 rps->max_freq = params & 0xff; 7946 } 7947 } 7948 7949 /* Finally allow us to boost to max by default */ 7950 rps->boost_freq = rps->max_freq; 7951 7952 mutex_unlock(&dev_priv->pcu_lock); 7953 mutex_unlock(&dev_priv->drm.struct_mutex); 7954 7955 intel_autoenable_gt_powersave(dev_priv); 7956 } 7957 7958 void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) 7959 { 7960 if (IS_VALLEYVIEW(dev_priv)) 7961 valleyview_cleanup_gt_powersave(dev_priv); 7962 7963 if (!i915_modparams.enable_rc6) 7964 intel_runtime_pm_put(dev_priv); 7965 } 7966 7967 /** 7968 * intel_suspend_gt_powersave - suspend PM work and helper threads 7969 * @dev_priv: i915 device 7970 * 7971 * We don't want to disable RC6 or other features here, we just want 7972 * to make sure any work we've queued has finished and won't bother 7973 * us while we're suspended. 7974 */ 7975 void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv) 7976 { 7977 if (INTEL_GEN(dev_priv) < 6) 7978 return; 7979 7980 if (cancel_delayed_work_sync(&dev_priv->gt_pm.autoenable_work)) 7981 intel_runtime_pm_put(dev_priv); 7982 7983 /* gen6_rps_idle() will be called later to disable interrupts */ 7984 } 7985 7986 void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) 7987 { 7988 dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */ 7989 dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */ 7990 intel_disable_gt_powersave(dev_priv); 7991 7992 gen6_reset_rps_interrupts(dev_priv); 7993 } 7994 7995 static inline void intel_disable_llc_pstate(struct drm_i915_private *i915) 7996 { 7997 lockdep_assert_held(&i915->pcu_lock); 7998 7999 if (!i915->gt_pm.llc_pstate.enabled) 8000 return; 8001 8002 /* Currently there is no HW configuration to be done to disable. */ 8003 8004 i915->gt_pm.llc_pstate.enabled = false; 8005 } 8006 8007 static void intel_disable_rc6(struct drm_i915_private *dev_priv) 8008 { 8009 lockdep_assert_held(&dev_priv->pcu_lock); 8010 8011 if (!dev_priv->gt_pm.rc6.enabled) 8012 return; 8013 8014 if (INTEL_GEN(dev_priv) >= 9) 8015 gen9_disable_rc6(dev_priv); 8016 else if (IS_CHERRYVIEW(dev_priv)) 8017 cherryview_disable_rc6(dev_priv); 8018 else if (IS_VALLEYVIEW(dev_priv)) 8019 valleyview_disable_rc6(dev_priv); 8020 else if (INTEL_GEN(dev_priv) >= 6) 8021 gen6_disable_rc6(dev_priv); 8022 8023 dev_priv->gt_pm.rc6.enabled = false; 8024 } 8025 8026 static void intel_disable_rps(struct drm_i915_private *dev_priv) 8027 { 8028 lockdep_assert_held(&dev_priv->pcu_lock); 8029 8030 if (!dev_priv->gt_pm.rps.enabled) 8031 return; 8032 8033 if (INTEL_GEN(dev_priv) >= 9) 8034 gen9_disable_rps(dev_priv); 8035 else if (IS_CHERRYVIEW(dev_priv)) 8036 cherryview_disable_rps(dev_priv); 8037 else if (IS_VALLEYVIEW(dev_priv)) 8038 valleyview_disable_rps(dev_priv); 8039 else if (INTEL_GEN(dev_priv) >= 6) 8040 gen6_disable_rps(dev_priv); 8041 else if (IS_IRONLAKE_M(dev_priv)) 8042 ironlake_disable_drps(dev_priv); 8043 8044 dev_priv->gt_pm.rps.enabled = false; 8045 } 8046 8047 void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) 8048 { 8049 mutex_lock(&dev_priv->pcu_lock); 8050 8051 intel_disable_rc6(dev_priv); 8052 intel_disable_rps(dev_priv); 8053 if (HAS_LLC(dev_priv)) 8054 intel_disable_llc_pstate(dev_priv); 8055 8056 mutex_unlock(&dev_priv->pcu_lock); 8057 } 8058 8059 static inline void intel_enable_llc_pstate(struct drm_i915_private *i915) 8060 { 8061 lockdep_assert_held(&i915->pcu_lock); 8062 8063 if (i915->gt_pm.llc_pstate.enabled) 8064 return; 8065 8066 gen6_update_ring_freq(i915); 8067 8068 i915->gt_pm.llc_pstate.enabled = true; 8069 } 8070 8071 static void intel_enable_rc6(struct drm_i915_private *dev_priv) 8072 { 8073 lockdep_assert_held(&dev_priv->pcu_lock); 8074 8075 if (dev_priv->gt_pm.rc6.enabled) 8076 return; 8077 8078 if (IS_CHERRYVIEW(dev_priv)) 8079 cherryview_enable_rc6(dev_priv); 8080 else if (IS_VALLEYVIEW(dev_priv)) 8081 valleyview_enable_rc6(dev_priv); 8082 else if (INTEL_GEN(dev_priv) >= 9) 8083 gen9_enable_rc6(dev_priv); 8084 else if (IS_BROADWELL(dev_priv)) 8085 gen8_enable_rc6(dev_priv); 8086 else if (INTEL_GEN(dev_priv) >= 6) 8087 gen6_enable_rc6(dev_priv); 8088 8089 dev_priv->gt_pm.rc6.enabled = true; 8090 } 8091 8092 static void intel_enable_rps(struct drm_i915_private *dev_priv) 8093 { 8094 struct intel_rps *rps = &dev_priv->gt_pm.rps; 8095 8096 lockdep_assert_held(&dev_priv->pcu_lock); 8097 8098 if (rps->enabled) 8099 return; 8100 8101 if (IS_CHERRYVIEW(dev_priv)) { 8102 cherryview_enable_rps(dev_priv); 8103 } else if (IS_VALLEYVIEW(dev_priv)) { 8104 valleyview_enable_rps(dev_priv); 8105 } else if (INTEL_GEN(dev_priv) >= 9) { 8106 gen9_enable_rps(dev_priv); 8107 } else if (IS_BROADWELL(dev_priv)) { 8108 gen8_enable_rps(dev_priv); 8109 } else if (INTEL_GEN(dev_priv) >= 6) { 8110 gen6_enable_rps(dev_priv); 8111 } else if (IS_IRONLAKE_M(dev_priv)) { 8112 ironlake_enable_drps(dev_priv); 8113 intel_init_emon(dev_priv); 8114 } 8115 8116 WARN_ON(rps->max_freq < rps->min_freq); 8117 WARN_ON(rps->idle_freq > rps->max_freq); 8118 8119 WARN_ON(rps->efficient_freq < rps->min_freq); 8120 WARN_ON(rps->efficient_freq > rps->max_freq); 8121 8122 rps->enabled = true; 8123 } 8124 8125 void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) 8126 { 8127 /* Powersaving is controlled by the host when inside a VM */ 8128 if (intel_vgpu_active(dev_priv)) 8129 return; 8130 8131 mutex_lock(&dev_priv->pcu_lock); 8132 8133 intel_enable_rc6(dev_priv); 8134 intel_enable_rps(dev_priv); 8135 if (HAS_LLC(dev_priv)) 8136 intel_enable_llc_pstate(dev_priv); 8137 8138 mutex_unlock(&dev_priv->pcu_lock); 8139 } 8140 8141 static void __intel_autoenable_gt_powersave(struct work_struct *work) 8142 { 8143 struct drm_i915_private *dev_priv = 8144 container_of(work, 8145 typeof(*dev_priv), 8146 gt_pm.autoenable_work.work); 8147 struct intel_engine_cs *rcs; 8148 struct drm_i915_gem_request *req; 8149 8150 rcs = dev_priv->engine[RCS]; 8151 if (rcs->last_retired_context) 8152 goto out; 8153 8154 if (!rcs->init_context) 8155 goto out; 8156 8157 mutex_lock(&dev_priv->drm.struct_mutex); 8158 8159 req = i915_gem_request_alloc(rcs, dev_priv->kernel_context); 8160 if (IS_ERR(req)) 8161 goto unlock; 8162 8163 if (!i915_modparams.enable_execlists && i915_switch_context(req) == 0) 8164 rcs->init_context(req); 8165 8166 /* Mark the device busy, calling intel_enable_gt_powersave() */ 8167 i915_add_request(req); 8168 8169 unlock: 8170 mutex_unlock(&dev_priv->drm.struct_mutex); 8171 out: 8172 intel_runtime_pm_put(dev_priv); 8173 } 8174 8175 void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv) 8176 { 8177 if (IS_IRONLAKE_M(dev_priv)) { 8178 ironlake_enable_drps(dev_priv); 8179 intel_init_emon(dev_priv); 8180 } else if (INTEL_INFO(dev_priv)->gen >= 6) { 8181 /* 8182 * PCU communication is slow and this doesn't need to be 8183 * done at any specific time, so do this out of our fast path 8184 * to make resume and init faster. 8185 * 8186 * We depend on the HW RC6 power context save/restore 8187 * mechanism when entering D3 through runtime PM suspend. So 8188 * disable RPM until RPS/RC6 is properly setup. We can only 8189 * get here via the driver load/system resume/runtime resume 8190 * paths, so the _noresume version is enough (and in case of 8191 * runtime resume it's necessary). 8192 */ 8193 if (queue_delayed_work(dev_priv->wq, 8194 &dev_priv->gt_pm.autoenable_work, 8195 round_jiffies_up_relative(HZ))) 8196 intel_runtime_pm_get_noresume(dev_priv); 8197 } 8198 } 8199 8200 static void ibx_init_clock_gating(struct drm_i915_private *dev_priv) 8201 { 8202 /* 8203 * On Ibex Peak and Cougar Point, we need to disable clock 8204 * gating for the panel power sequencer or it will fail to 8205 * start up when no ports are active. 8206 */ 8207 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE); 8208 } 8209 8210 static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv) 8211 { 8212 enum i915_pipe pipe; 8213 8214 for_each_pipe(dev_priv, pipe) { 8215 I915_WRITE(DSPCNTR(pipe), 8216 I915_READ(DSPCNTR(pipe)) | 8217 DISPPLANE_TRICKLE_FEED_DISABLE); 8218 8219 I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe))); 8220 POSTING_READ(DSPSURF(pipe)); 8221 } 8222 } 8223 8224 static void ilk_init_clock_gating(struct drm_i915_private *dev_priv) 8225 { 8226 uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; 8227 8228 /* 8229 * Required for FBC 8230 * WaFbcDisableDpfcClockGating:ilk 8231 */ 8232 dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE | 8233 ILK_DPFCUNIT_CLOCK_GATE_DISABLE | 8234 ILK_DPFDUNIT_CLOCK_GATE_ENABLE; 8235 8236 I915_WRITE(PCH_3DCGDIS0, 8237 MARIUNIT_CLOCK_GATE_DISABLE | 8238 SVSMUNIT_CLOCK_GATE_DISABLE); 8239 I915_WRITE(PCH_3DCGDIS1, 8240 VFMUNIT_CLOCK_GATE_DISABLE); 8241 8242 /* 8243 * According to the spec the following bits should be set in 8244 * order to enable memory self-refresh 8245 * The bit 22/21 of 0x42004 8246 * The bit 5 of 0x42020 8247 * The bit 15 of 0x45000 8248 */ 8249 I915_WRITE(ILK_DISPLAY_CHICKEN2, 8250 (I915_READ(ILK_DISPLAY_CHICKEN2) | 8251 ILK_DPARB_GATE | ILK_VSDPFD_FULL)); 8252 dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE; 8253 I915_WRITE(DISP_ARB_CTL, 8254 (I915_READ(DISP_ARB_CTL) | 8255 DISP_FBC_WM_DIS)); 8256 8257 /* 8258 * Based on the document from hardware guys the following bits 8259 * should be set unconditionally in order to enable FBC. 8260 * The bit 22 of 0x42000 8261 * The bit 22 of 0x42004 8262 * The bit 7,8,9 of 0x42020. 8263 */ 8264 if (IS_IRONLAKE_M(dev_priv)) { 8265 /* WaFbcAsynchFlipDisableFbcQueue:ilk */ 8266 I915_WRITE(ILK_DISPLAY_CHICKEN1, 8267 I915_READ(ILK_DISPLAY_CHICKEN1) | 8268 ILK_FBCQ_DIS); 8269 I915_WRITE(ILK_DISPLAY_CHICKEN2, 8270 I915_READ(ILK_DISPLAY_CHICKEN2) | 8271 ILK_DPARB_GATE); 8272 } 8273 8274 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); 8275 8276 I915_WRITE(ILK_DISPLAY_CHICKEN2, 8277 I915_READ(ILK_DISPLAY_CHICKEN2) | 8278 ILK_ELPIN_409_SELECT); 8279 I915_WRITE(_3D_CHICKEN2, 8280 _3D_CHICKEN2_WM_READ_PIPELINED << 16 | 8281 _3D_CHICKEN2_WM_READ_PIPELINED); 8282 8283 /* WaDisableRenderCachePipelinedFlush:ilk */ 8284 I915_WRITE(CACHE_MODE_0, 8285 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); 8286 8287 /* WaDisable_RenderCache_OperationalFlush:ilk */ 8288 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 8289 8290 g4x_disable_trickle_feed(dev_priv); 8291 8292 ibx_init_clock_gating(dev_priv); 8293 } 8294 8295 static void cpt_init_clock_gating(struct drm_i915_private *dev_priv) 8296 { 8297 int pipe; 8298 uint32_t val; 8299 8300 /* 8301 * On Ibex Peak and Cougar Point, we need to disable clock 8302 * gating for the panel power sequencer or it will fail to 8303 * start up when no ports are active. 8304 */ 8305 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE | 8306 PCH_DPLUNIT_CLOCK_GATE_DISABLE | 8307 PCH_CPUNIT_CLOCK_GATE_DISABLE); 8308 I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) | 8309 DPLS_EDP_PPS_FIX_DIS); 8310 /* The below fixes the weird display corruption, a few pixels shifted 8311 * downward, on (only) LVDS of some HP laptops with IVY. 8312 */ 8313 for_each_pipe(dev_priv, pipe) { 8314 val = I915_READ(TRANS_CHICKEN2(pipe)); 8315 val |= TRANS_CHICKEN2_TIMING_OVERRIDE; 8316 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED; 8317 if (dev_priv->vbt.fdi_rx_polarity_inverted) 8318 val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED; 8319 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK; 8320 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER; 8321 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH; 8322 I915_WRITE(TRANS_CHICKEN2(pipe), val); 8323 } 8324 /* WADP0ClockGatingDisable */ 8325 for_each_pipe(dev_priv, pipe) { 8326 I915_WRITE(TRANS_CHICKEN1(pipe), 8327 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); 8328 } 8329 } 8330 8331 static void gen6_check_mch_setup(struct drm_i915_private *dev_priv) 8332 { 8333 uint32_t tmp; 8334 8335 tmp = I915_READ(MCH_SSKPD); 8336 if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL) 8337 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n", 8338 tmp); 8339 } 8340 8341 static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) 8342 { 8343 uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; 8344 8345 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); 8346 8347 I915_WRITE(ILK_DISPLAY_CHICKEN2, 8348 I915_READ(ILK_DISPLAY_CHICKEN2) | 8349 ILK_ELPIN_409_SELECT); 8350 8351 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ 8352 I915_WRITE(_3D_CHICKEN, 8353 _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); 8354 8355 /* WaDisable_RenderCache_OperationalFlush:snb */ 8356 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 8357 8358 /* 8359 * BSpec recoomends 8x4 when MSAA is used, 8360 * however in practice 16x4 seems fastest. 8361 * 8362 * Note that PS/WM thread counts depend on the WIZ hashing 8363 * disable bit, which we don't touch here, but it's good 8364 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 8365 */ 8366 I915_WRITE(GEN6_GT_MODE, 8367 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 8368 8369 I915_WRITE(CACHE_MODE_0, 8370 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); 8371 8372 I915_WRITE(GEN6_UCGCTL1, 8373 I915_READ(GEN6_UCGCTL1) | 8374 GEN6_BLBUNIT_CLOCK_GATE_DISABLE | 8375 GEN6_CSUNIT_CLOCK_GATE_DISABLE); 8376 8377 /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock 8378 * gating disable must be set. Failure to set it results in 8379 * flickering pixels due to Z write ordering failures after 8380 * some amount of runtime in the Mesa "fire" demo, and Unigine 8381 * Sanctuary and Tropics, and apparently anything else with 8382 * alpha test or pixel discard. 8383 * 8384 * According to the spec, bit 11 (RCCUNIT) must also be set, 8385 * but we didn't debug actual testcases to find it out. 8386 * 8387 * WaDisableRCCUnitClockGating:snb 8388 * WaDisableRCPBUnitClockGating:snb 8389 */ 8390 I915_WRITE(GEN6_UCGCTL2, 8391 GEN6_RCPBUNIT_CLOCK_GATE_DISABLE | 8392 GEN6_RCCUNIT_CLOCK_GATE_DISABLE); 8393 8394 /* WaStripsFansDisableFastClipPerformanceFix:snb */ 8395 I915_WRITE(_3D_CHICKEN3, 8396 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL)); 8397 8398 /* 8399 * Bspec says: 8400 * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and 8401 * 3DSTATE_SF number of SF output attributes is more than 16." 8402 */ 8403 I915_WRITE(_3D_CHICKEN3, 8404 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH)); 8405 8406 /* 8407 * According to the spec the following bits should be 8408 * set in order to enable memory self-refresh and fbc: 8409 * The bit21 and bit22 of 0x42000 8410 * The bit21 and bit22 of 0x42004 8411 * The bit5 and bit7 of 0x42020 8412 * The bit14 of 0x70180 8413 * The bit14 of 0x71180 8414 * 8415 * WaFbcAsynchFlipDisableFbcQueue:snb 8416 */ 8417 I915_WRITE(ILK_DISPLAY_CHICKEN1, 8418 I915_READ(ILK_DISPLAY_CHICKEN1) | 8419 ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS); 8420 I915_WRITE(ILK_DISPLAY_CHICKEN2, 8421 I915_READ(ILK_DISPLAY_CHICKEN2) | 8422 ILK_DPARB_GATE | ILK_VSDPFD_FULL); 8423 I915_WRITE(ILK_DSPCLK_GATE_D, 8424 I915_READ(ILK_DSPCLK_GATE_D) | 8425 ILK_DPARBUNIT_CLOCK_GATE_ENABLE | 8426 ILK_DPFDUNIT_CLOCK_GATE_ENABLE); 8427 8428 g4x_disable_trickle_feed(dev_priv); 8429 8430 cpt_init_clock_gating(dev_priv); 8431 8432 gen6_check_mch_setup(dev_priv); 8433 } 8434 8435 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) 8436 { 8437 uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE); 8438 8439 /* 8440 * WaVSThreadDispatchOverride:ivb,vlv 8441 * 8442 * This actually overrides the dispatch 8443 * mode for all thread types. 8444 */ 8445 reg &= ~GEN7_FF_SCHED_MASK; 8446 reg |= GEN7_FF_TS_SCHED_HW; 8447 reg |= GEN7_FF_VS_SCHED_HW; 8448 reg |= GEN7_FF_DS_SCHED_HW; 8449 8450 I915_WRITE(GEN7_FF_THREAD_MODE, reg); 8451 } 8452 8453 static void lpt_init_clock_gating(struct drm_i915_private *dev_priv) 8454 { 8455 /* 8456 * TODO: this bit should only be enabled when really needed, then 8457 * disabled when not needed anymore in order to save power. 8458 */ 8459 if (HAS_PCH_LPT_LP(dev_priv)) 8460 I915_WRITE(SOUTH_DSPCLK_GATE_D, 8461 I915_READ(SOUTH_DSPCLK_GATE_D) | 8462 PCH_LP_PARTITION_LEVEL_DISABLE); 8463 8464 /* WADPOClockGatingDisable:hsw */ 8465 I915_WRITE(TRANS_CHICKEN1(PIPE_A), 8466 I915_READ(TRANS_CHICKEN1(PIPE_A)) | 8467 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); 8468 } 8469 8470 static void lpt_suspend_hw(struct drm_i915_private *dev_priv) 8471 { 8472 if (HAS_PCH_LPT_LP(dev_priv)) { 8473 uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D); 8474 8475 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE; 8476 I915_WRITE(SOUTH_DSPCLK_GATE_D, val); 8477 } 8478 } 8479 8480 static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv, 8481 int general_prio_credits, 8482 int high_prio_credits) 8483 { 8484 u32 misccpctl; 8485 u32 val; 8486 8487 /* WaTempDisableDOPClkGating:bdw */ 8488 misccpctl = I915_READ(GEN7_MISCCPCTL); 8489 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); 8490 8491 val = I915_READ(GEN8_L3SQCREG1); 8492 val &= ~L3_PRIO_CREDITS_MASK; 8493 val |= L3_GENERAL_PRIO_CREDITS(general_prio_credits); 8494 val |= L3_HIGH_PRIO_CREDITS(high_prio_credits); 8495 I915_WRITE(GEN8_L3SQCREG1, val); 8496 8497 /* 8498 * Wait at least 100 clocks before re-enabling clock gating. 8499 * See the definition of L3SQCREG1 in BSpec. 8500 */ 8501 POSTING_READ(GEN8_L3SQCREG1); 8502 udelay(1); 8503 I915_WRITE(GEN7_MISCCPCTL, misccpctl); 8504 } 8505 8506 static void cnp_init_clock_gating(struct drm_i915_private *dev_priv) 8507 { 8508 if (!HAS_PCH_CNP(dev_priv)) 8509 return; 8510 8511 /* Wa #1181 */ 8512 I915_WRITE(SOUTH_DSPCLK_GATE_D, I915_READ(SOUTH_DSPCLK_GATE_D) | 8513 CNP_PWM_CGE_GATING_DISABLE); 8514 } 8515 8516 static void cnl_init_clock_gating(struct drm_i915_private *dev_priv) 8517 { 8518 cnp_init_clock_gating(dev_priv); 8519 8520 /* This is not an Wa. Enable for better image quality */ 8521 I915_WRITE(_3D_CHICKEN3, 8522 _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE)); 8523 8524 /* WaEnableChickenDCPR:cnl */ 8525 I915_WRITE(GEN8_CHICKEN_DCPR_1, 8526 I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM); 8527 8528 /* WaFbcWakeMemOn:cnl */ 8529 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 8530 DISP_FBC_MEMORY_WAKE); 8531 8532 /* WaSarbUnitClockGatingDisable:cnl (pre-prod) */ 8533 if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0)) 8534 I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, 8535 I915_READ(SLICE_UNIT_LEVEL_CLKGATE) | 8536 SARBUNIT_CLKGATE_DIS); 8537 } 8538 8539 static void cfl_init_clock_gating(struct drm_i915_private *dev_priv) 8540 { 8541 cnp_init_clock_gating(dev_priv); 8542 gen9_init_clock_gating(dev_priv); 8543 8544 /* WaFbcNukeOnHostModify:cfl */ 8545 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) | 8546 ILK_DPFC_NUKE_ON_ANY_MODIFICATION); 8547 } 8548 8549 static void kbl_init_clock_gating(struct drm_i915_private *dev_priv) 8550 { 8551 gen9_init_clock_gating(dev_priv); 8552 8553 /* WaDisableSDEUnitClockGating:kbl */ 8554 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) 8555 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 8556 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 8557 8558 /* WaDisableGamClockGating:kbl */ 8559 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) 8560 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | 8561 GEN6_GAMUNIT_CLOCK_GATE_DISABLE); 8562 8563 /* WaFbcNukeOnHostModify:kbl */ 8564 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) | 8565 ILK_DPFC_NUKE_ON_ANY_MODIFICATION); 8566 } 8567 8568 static void skl_init_clock_gating(struct drm_i915_private *dev_priv) 8569 { 8570 gen9_init_clock_gating(dev_priv); 8571 8572 /* WAC6entrylatency:skl */ 8573 I915_WRITE(FBC_LLC_READ_CTRL, I915_READ(FBC_LLC_READ_CTRL) | 8574 FBC_LLC_FULLY_OPEN); 8575 8576 /* WaFbcNukeOnHostModify:skl */ 8577 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) | 8578 ILK_DPFC_NUKE_ON_ANY_MODIFICATION); 8579 } 8580 8581 static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) 8582 { 8583 /* The GTT cache must be disabled if the system is using 2M pages. */ 8584 bool can_use_gtt_cache = !HAS_PAGE_SIZES(dev_priv, 8585 I915_GTT_PAGE_SIZE_2M); 8586 enum i915_pipe pipe; 8587 8588 /* WaSwitchSolVfFArbitrationPriority:bdw */ 8589 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); 8590 8591 /* WaPsrDPAMaskVBlankInSRD:bdw */ 8592 I915_WRITE(CHICKEN_PAR1_1, 8593 I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD); 8594 8595 /* WaPsrDPRSUnmaskVBlankInSRD:bdw */ 8596 for_each_pipe(dev_priv, pipe) { 8597 I915_WRITE(CHICKEN_PIPESL_1(pipe), 8598 I915_READ(CHICKEN_PIPESL_1(pipe)) | 8599 BDW_DPRS_MASK_VBLANK_SRD); 8600 } 8601 8602 /* WaVSRefCountFullforceMissDisable:bdw */ 8603 /* WaDSRefCountFullforceMissDisable:bdw */ 8604 I915_WRITE(GEN7_FF_THREAD_MODE, 8605 I915_READ(GEN7_FF_THREAD_MODE) & 8606 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); 8607 8608 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, 8609 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); 8610 8611 /* WaDisableSDEUnitClockGating:bdw */ 8612 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 8613 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 8614 8615 /* WaProgramL3SqcReg1Default:bdw */ 8616 gen8_set_l3sqc_credits(dev_priv, 30, 2); 8617 8618 /* WaGttCachingOffByDefault:bdw */ 8619 I915_WRITE(HSW_GTT_CACHE_EN, can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); 8620 8621 /* WaKVMNotificationOnConfigChange:bdw */ 8622 I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1) 8623 | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT); 8624 8625 lpt_init_clock_gating(dev_priv); 8626 8627 /* WaDisableDopClockGating:bdw 8628 * 8629 * Also see the CHICKEN2 write in bdw_init_workarounds() to disable DOP 8630 * clock gating. 8631 */ 8632 I915_WRITE(GEN6_UCGCTL1, 8633 I915_READ(GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE); 8634 } 8635 8636 static void hsw_init_clock_gating(struct drm_i915_private *dev_priv) 8637 { 8638 /* L3 caching of data atomics doesn't work -- disable it. */ 8639 I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); 8640 I915_WRITE(HSW_ROW_CHICKEN3, 8641 _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE)); 8642 8643 /* This is required by WaCatErrorRejectionIssue:hsw */ 8644 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, 8645 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | 8646 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); 8647 8648 /* WaVSRefCountFullforceMissDisable:hsw */ 8649 I915_WRITE(GEN7_FF_THREAD_MODE, 8650 I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME); 8651 8652 /* WaDisable_RenderCache_OperationalFlush:hsw */ 8653 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 8654 8655 /* enable HiZ Raw Stall Optimization */ 8656 I915_WRITE(CACHE_MODE_0_GEN7, 8657 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); 8658 8659 /* WaDisable4x2SubspanOptimization:hsw */ 8660 I915_WRITE(CACHE_MODE_1, 8661 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); 8662 8663 /* 8664 * BSpec recommends 8x4 when MSAA is used, 8665 * however in practice 16x4 seems fastest. 8666 * 8667 * Note that PS/WM thread counts depend on the WIZ hashing 8668 * disable bit, which we don't touch here, but it's good 8669 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 8670 */ 8671 I915_WRITE(GEN7_GT_MODE, 8672 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 8673 8674 /* WaSampleCChickenBitEnable:hsw */ 8675 I915_WRITE(HALF_SLICE_CHICKEN3, 8676 _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE)); 8677 8678 /* WaSwitchSolVfFArbitrationPriority:hsw */ 8679 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); 8680 8681 lpt_init_clock_gating(dev_priv); 8682 } 8683 8684 static void ivb_init_clock_gating(struct drm_i915_private *dev_priv) 8685 { 8686 uint32_t snpcr; 8687 8688 I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); 8689 8690 /* WaDisableEarlyCull:ivb */ 8691 I915_WRITE(_3D_CHICKEN3, 8692 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); 8693 8694 /* WaDisableBackToBackFlipFix:ivb */ 8695 I915_WRITE(IVB_CHICKEN3, 8696 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | 8697 CHICKEN3_DGMG_DONE_FIX_DISABLE); 8698 8699 /* WaDisablePSDDualDispatchEnable:ivb */ 8700 if (IS_IVB_GT1(dev_priv)) 8701 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, 8702 _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); 8703 8704 /* WaDisable_RenderCache_OperationalFlush:ivb */ 8705 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 8706 8707 /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ 8708 I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1, 8709 GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); 8710 8711 /* WaApplyL3ControlAndL3ChickenMode:ivb */ 8712 I915_WRITE(GEN7_L3CNTLREG1, 8713 GEN7_WA_FOR_GEN7_L3_CONTROL); 8714 I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, 8715 GEN7_WA_L3_CHICKEN_MODE); 8716 if (IS_IVB_GT1(dev_priv)) 8717 I915_WRITE(GEN7_ROW_CHICKEN2, 8718 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 8719 else { 8720 /* must write both registers */ 8721 I915_WRITE(GEN7_ROW_CHICKEN2, 8722 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 8723 I915_WRITE(GEN7_ROW_CHICKEN2_GT2, 8724 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 8725 } 8726 8727 /* WaForceL3Serialization:ivb */ 8728 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & 8729 ~L3SQ_URB_READ_CAM_MATCH_DISABLE); 8730 8731 /* 8732 * According to the spec, bit 13 (RCZUNIT) must be set on IVB. 8733 * This implements the WaDisableRCZUnitClockGating:ivb workaround. 8734 */ 8735 I915_WRITE(GEN6_UCGCTL2, 8736 GEN6_RCZUNIT_CLOCK_GATE_DISABLE); 8737 8738 /* This is required by WaCatErrorRejectionIssue:ivb */ 8739 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, 8740 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | 8741 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); 8742 8743 g4x_disable_trickle_feed(dev_priv); 8744 8745 gen7_setup_fixed_func_scheduler(dev_priv); 8746 8747 if (0) { /* causes HiZ corruption on ivb:gt1 */ 8748 /* enable HiZ Raw Stall Optimization */ 8749 I915_WRITE(CACHE_MODE_0_GEN7, 8750 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); 8751 } 8752 8753 /* WaDisable4x2SubspanOptimization:ivb */ 8754 I915_WRITE(CACHE_MODE_1, 8755 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); 8756 8757 /* 8758 * BSpec recommends 8x4 when MSAA is used, 8759 * however in practice 16x4 seems fastest. 8760 * 8761 * Note that PS/WM thread counts depend on the WIZ hashing 8762 * disable bit, which we don't touch here, but it's good 8763 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 8764 */ 8765 I915_WRITE(GEN7_GT_MODE, 8766 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 8767 8768 snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); 8769 snpcr &= ~GEN6_MBC_SNPCR_MASK; 8770 snpcr |= GEN6_MBC_SNPCR_MED; 8771 I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr); 8772 8773 if (!HAS_PCH_NOP(dev_priv)) 8774 cpt_init_clock_gating(dev_priv); 8775 8776 gen6_check_mch_setup(dev_priv); 8777 } 8778 8779 static void vlv_init_clock_gating(struct drm_i915_private *dev_priv) 8780 { 8781 /* WaDisableEarlyCull:vlv */ 8782 I915_WRITE(_3D_CHICKEN3, 8783 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); 8784 8785 /* WaDisableBackToBackFlipFix:vlv */ 8786 I915_WRITE(IVB_CHICKEN3, 8787 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | 8788 CHICKEN3_DGMG_DONE_FIX_DISABLE); 8789 8790 /* WaPsdDispatchEnable:vlv */ 8791 /* WaDisablePSDDualDispatchEnable:vlv */ 8792 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, 8793 _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP | 8794 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); 8795 8796 /* WaDisable_RenderCache_OperationalFlush:vlv */ 8797 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 8798 8799 /* WaForceL3Serialization:vlv */ 8800 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & 8801 ~L3SQ_URB_READ_CAM_MATCH_DISABLE); 8802 8803 /* WaDisableDopClockGating:vlv */ 8804 I915_WRITE(GEN7_ROW_CHICKEN2, 8805 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 8806 8807 /* This is required by WaCatErrorRejectionIssue:vlv */ 8808 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, 8809 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | 8810 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); 8811 8812 gen7_setup_fixed_func_scheduler(dev_priv); 8813 8814 /* 8815 * According to the spec, bit 13 (RCZUNIT) must be set on IVB. 8816 * This implements the WaDisableRCZUnitClockGating:vlv workaround. 8817 */ 8818 I915_WRITE(GEN6_UCGCTL2, 8819 GEN6_RCZUNIT_CLOCK_GATE_DISABLE); 8820 8821 /* WaDisableL3Bank2xClockGate:vlv 8822 * Disabling L3 clock gating- MMIO 940c[25] = 1 8823 * Set bit 25, to disable L3_BANK_2x_CLK_GATING */ 8824 I915_WRITE(GEN7_UCGCTL4, 8825 I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE); 8826 8827 /* 8828 * BSpec says this must be set, even though 8829 * WaDisable4x2SubspanOptimization isn't listed for VLV. 8830 */ 8831 I915_WRITE(CACHE_MODE_1, 8832 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); 8833 8834 /* 8835 * BSpec recommends 8x4 when MSAA is used, 8836 * however in practice 16x4 seems fastest. 8837 * 8838 * Note that PS/WM thread counts depend on the WIZ hashing 8839 * disable bit, which we don't touch here, but it's good 8840 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 8841 */ 8842 I915_WRITE(GEN7_GT_MODE, 8843 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 8844 8845 /* 8846 * WaIncreaseL3CreditsForVLVB0:vlv 8847 * This is the hardware default actually. 8848 */ 8849 I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); 8850 8851 /* 8852 * WaDisableVLVClockGating_VBIIssue:vlv 8853 * Disable clock gating on th GCFG unit to prevent a delay 8854 * in the reporting of vblank events. 8855 */ 8856 I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS); 8857 } 8858 8859 static void chv_init_clock_gating(struct drm_i915_private *dev_priv) 8860 { 8861 /* WaVSRefCountFullforceMissDisable:chv */ 8862 /* WaDSRefCountFullforceMissDisable:chv */ 8863 I915_WRITE(GEN7_FF_THREAD_MODE, 8864 I915_READ(GEN7_FF_THREAD_MODE) & 8865 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); 8866 8867 /* WaDisableSemaphoreAndSyncFlipWait:chv */ 8868 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, 8869 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); 8870 8871 /* WaDisableCSUnitClockGating:chv */ 8872 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | 8873 GEN6_CSUNIT_CLOCK_GATE_DISABLE); 8874 8875 /* WaDisableSDEUnitClockGating:chv */ 8876 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 8877 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 8878 8879 /* 8880 * WaProgramL3SqcReg1Default:chv 8881 * See gfxspecs/Related Documents/Performance Guide/ 8882 * LSQC Setting Recommendations. 8883 */ 8884 gen8_set_l3sqc_credits(dev_priv, 38, 2); 8885 8886 /* 8887 * GTT cache may not work with big pages, so if those 8888 * are ever enabled GTT cache may need to be disabled. 8889 */ 8890 I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL); 8891 } 8892 8893 static void g4x_init_clock_gating(struct drm_i915_private *dev_priv) 8894 { 8895 uint32_t dspclk_gate; 8896 8897 I915_WRITE(RENCLK_GATE_D1, 0); 8898 I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE | 8899 GS_UNIT_CLOCK_GATE_DISABLE | 8900 CL_UNIT_CLOCK_GATE_DISABLE); 8901 I915_WRITE(RAMCLK_GATE_D, 0); 8902 dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE | 8903 OVRUNIT_CLOCK_GATE_DISABLE | 8904 OVCUNIT_CLOCK_GATE_DISABLE; 8905 if (IS_GM45(dev_priv)) 8906 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE; 8907 I915_WRITE(DSPCLK_GATE_D, dspclk_gate); 8908 8909 /* WaDisableRenderCachePipelinedFlush */ 8910 I915_WRITE(CACHE_MODE_0, 8911 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); 8912 8913 /* WaDisable_RenderCache_OperationalFlush:g4x */ 8914 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 8915 8916 g4x_disable_trickle_feed(dev_priv); 8917 } 8918 8919 static void i965gm_init_clock_gating(struct drm_i915_private *dev_priv) 8920 { 8921 I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE); 8922 I915_WRITE(RENCLK_GATE_D2, 0); 8923 I915_WRITE(DSPCLK_GATE_D, 0); 8924 I915_WRITE(RAMCLK_GATE_D, 0); 8925 I915_WRITE16(DEUC, 0); 8926 I915_WRITE(MI_ARB_STATE, 8927 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); 8928 8929 /* WaDisable_RenderCache_OperationalFlush:gen4 */ 8930 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 8931 } 8932 8933 static void i965g_init_clock_gating(struct drm_i915_private *dev_priv) 8934 { 8935 I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE | 8936 I965_RCC_CLOCK_GATE_DISABLE | 8937 I965_RCPB_CLOCK_GATE_DISABLE | 8938 I965_ISC_CLOCK_GATE_DISABLE | 8939 I965_FBC_CLOCK_GATE_DISABLE); 8940 I915_WRITE(RENCLK_GATE_D2, 0); 8941 I915_WRITE(MI_ARB_STATE, 8942 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); 8943 8944 /* WaDisable_RenderCache_OperationalFlush:gen4 */ 8945 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 8946 } 8947 8948 static void gen3_init_clock_gating(struct drm_i915_private *dev_priv) 8949 { 8950 u32 dstate = I915_READ(D_STATE); 8951 8952 dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING | 8953 DSTATE_DOT_CLOCK_GATING; 8954 I915_WRITE(D_STATE, dstate); 8955 8956 if (IS_PINEVIEW(dev_priv)) 8957 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY)); 8958 8959 /* IIR "flip pending" means done if this bit is set */ 8960 I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE)); 8961 8962 /* interrupts should cause a wake up from C3 */ 8963 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN)); 8964 8965 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 8966 I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); 8967 8968 I915_WRITE(MI_ARB_STATE, 8969 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); 8970 } 8971 8972 static void i85x_init_clock_gating(struct drm_i915_private *dev_priv) 8973 { 8974 I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE); 8975 8976 /* interrupts should cause a wake up from C3 */ 8977 I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) | 8978 _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE)); 8979 8980 I915_WRITE(MEM_MODE, 8981 _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE)); 8982 } 8983 8984 static void i830_init_clock_gating(struct drm_i915_private *dev_priv) 8985 { 8986 I915_WRITE(MEM_MODE, 8987 _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) | 8988 _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE)); 8989 } 8990 8991 void intel_init_clock_gating(struct drm_i915_private *dev_priv) 8992 { 8993 dev_priv->display.init_clock_gating(dev_priv); 8994 } 8995 8996 void intel_suspend_hw(struct drm_i915_private *dev_priv) 8997 { 8998 if (HAS_PCH_LPT(dev_priv)) 8999 lpt_suspend_hw(dev_priv); 9000 } 9001 9002 static void nop_init_clock_gating(struct drm_i915_private *dev_priv) 9003 { 9004 DRM_DEBUG_KMS("No clock gating settings or workarounds applied.\n"); 9005 } 9006 9007 /** 9008 * intel_init_clock_gating_hooks - setup the clock gating hooks 9009 * @dev_priv: device private 9010 * 9011 * Setup the hooks that configure which clocks of a given platform can be 9012 * gated and also apply various GT and display specific workarounds for these 9013 * platforms. Note that some GT specific workarounds are applied separately 9014 * when GPU contexts or batchbuffers start their execution. 9015 */ 9016 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) 9017 { 9018 if (IS_CANNONLAKE(dev_priv)) 9019 dev_priv->display.init_clock_gating = cnl_init_clock_gating; 9020 else if (IS_COFFEELAKE(dev_priv)) 9021 dev_priv->display.init_clock_gating = cfl_init_clock_gating; 9022 else if (IS_SKYLAKE(dev_priv)) 9023 dev_priv->display.init_clock_gating = skl_init_clock_gating; 9024 else if (IS_KABYLAKE(dev_priv)) 9025 dev_priv->display.init_clock_gating = kbl_init_clock_gating; 9026 else if (IS_BROXTON(dev_priv)) 9027 dev_priv->display.init_clock_gating = bxt_init_clock_gating; 9028 else if (IS_GEMINILAKE(dev_priv)) 9029 dev_priv->display.init_clock_gating = glk_init_clock_gating; 9030 else if (IS_BROADWELL(dev_priv)) 9031 dev_priv->display.init_clock_gating = bdw_init_clock_gating; 9032 else if (IS_CHERRYVIEW(dev_priv)) 9033 dev_priv->display.init_clock_gating = chv_init_clock_gating; 9034 else if (IS_HASWELL(dev_priv)) 9035 dev_priv->display.init_clock_gating = hsw_init_clock_gating; 9036 else if (IS_IVYBRIDGE(dev_priv)) 9037 dev_priv->display.init_clock_gating = ivb_init_clock_gating; 9038 else if (IS_VALLEYVIEW(dev_priv)) 9039 dev_priv->display.init_clock_gating = vlv_init_clock_gating; 9040 else if (IS_GEN6(dev_priv)) 9041 dev_priv->display.init_clock_gating = gen6_init_clock_gating; 9042 else if (IS_GEN5(dev_priv)) 9043 dev_priv->display.init_clock_gating = ilk_init_clock_gating; 9044 else if (IS_G4X(dev_priv)) 9045 dev_priv->display.init_clock_gating = g4x_init_clock_gating; 9046 else if (IS_I965GM(dev_priv)) 9047 dev_priv->display.init_clock_gating = i965gm_init_clock_gating; 9048 else if (IS_I965G(dev_priv)) 9049 dev_priv->display.init_clock_gating = i965g_init_clock_gating; 9050 else if (IS_GEN3(dev_priv)) 9051 dev_priv->display.init_clock_gating = gen3_init_clock_gating; 9052 else if (IS_I85X(dev_priv) || IS_I865G(dev_priv)) 9053 dev_priv->display.init_clock_gating = i85x_init_clock_gating; 9054 else if (IS_GEN2(dev_priv)) 9055 dev_priv->display.init_clock_gating = i830_init_clock_gating; 9056 else { 9057 MISSING_CASE(INTEL_DEVID(dev_priv)); 9058 dev_priv->display.init_clock_gating = nop_init_clock_gating; 9059 } 9060 } 9061 9062 /* Set up chip specific power management-related functions */ 9063 void intel_init_pm(struct drm_i915_private *dev_priv) 9064 { 9065 intel_fbc_init(dev_priv); 9066 9067 /* For cxsr */ 9068 if (IS_PINEVIEW(dev_priv)) 9069 i915_pineview_get_mem_freq(dev_priv); 9070 else if (IS_GEN5(dev_priv)) 9071 i915_ironlake_get_mem_freq(dev_priv); 9072 9073 /* For FIFO watermark updates */ 9074 if (INTEL_GEN(dev_priv) >= 9) { 9075 skl_setup_wm_latency(dev_priv); 9076 dev_priv->display.initial_watermarks = skl_initial_wm; 9077 dev_priv->display.atomic_update_watermarks = skl_atomic_update_crtc_wm; 9078 dev_priv->display.compute_global_watermarks = skl_compute_wm; 9079 } else if (HAS_PCH_SPLIT(dev_priv)) { 9080 ilk_setup_wm_latency(dev_priv); 9081 9082 if ((IS_GEN5(dev_priv) && dev_priv->wm.pri_latency[1] && 9083 dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) || 9084 (!IS_GEN5(dev_priv) && dev_priv->wm.pri_latency[0] && 9085 dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) { 9086 dev_priv->display.compute_pipe_wm = ilk_compute_pipe_wm; 9087 dev_priv->display.compute_intermediate_wm = 9088 ilk_compute_intermediate_wm; 9089 dev_priv->display.initial_watermarks = 9090 ilk_initial_watermarks; 9091 dev_priv->display.optimize_watermarks = 9092 ilk_optimize_watermarks; 9093 } else { 9094 DRM_DEBUG_KMS("Failed to read display plane latency. " 9095 "Disable CxSR\n"); 9096 } 9097 } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { 9098 vlv_setup_wm_latency(dev_priv); 9099 dev_priv->display.compute_pipe_wm = vlv_compute_pipe_wm; 9100 dev_priv->display.compute_intermediate_wm = vlv_compute_intermediate_wm; 9101 dev_priv->display.initial_watermarks = vlv_initial_watermarks; 9102 dev_priv->display.optimize_watermarks = vlv_optimize_watermarks; 9103 dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo; 9104 } else if (IS_G4X(dev_priv)) { 9105 g4x_setup_wm_latency(dev_priv); 9106 dev_priv->display.compute_pipe_wm = g4x_compute_pipe_wm; 9107 dev_priv->display.compute_intermediate_wm = g4x_compute_intermediate_wm; 9108 dev_priv->display.initial_watermarks = g4x_initial_watermarks; 9109 dev_priv->display.optimize_watermarks = g4x_optimize_watermarks; 9110 } else if (IS_PINEVIEW(dev_priv)) { 9111 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv), 9112 dev_priv->is_ddr3, 9113 dev_priv->fsb_freq, 9114 dev_priv->mem_freq)) { 9115 DRM_INFO("failed to find known CxSR latency " 9116 "(found ddr%s fsb freq %d, mem freq %d), " 9117 "disabling CxSR\n", 9118 (dev_priv->is_ddr3 == 1) ? "3" : "2", 9119 dev_priv->fsb_freq, dev_priv->mem_freq); 9120 /* Disable CxSR and never update its watermark again */ 9121 intel_set_memory_cxsr(dev_priv, false); 9122 dev_priv->display.update_wm = NULL; 9123 } else 9124 dev_priv->display.update_wm = pineview_update_wm; 9125 } else if (IS_GEN4(dev_priv)) { 9126 dev_priv->display.update_wm = i965_update_wm; 9127 } else if (IS_GEN3(dev_priv)) { 9128 dev_priv->display.update_wm = i9xx_update_wm; 9129 dev_priv->display.get_fifo_size = i9xx_get_fifo_size; 9130 } else if (IS_GEN2(dev_priv)) { 9131 if (INTEL_INFO(dev_priv)->num_pipes == 1) { 9132 dev_priv->display.update_wm = i845_update_wm; 9133 dev_priv->display.get_fifo_size = i845_get_fifo_size; 9134 } else { 9135 dev_priv->display.update_wm = i9xx_update_wm; 9136 dev_priv->display.get_fifo_size = i830_get_fifo_size; 9137 } 9138 } else { 9139 DRM_ERROR("unexpected fall-through in intel_init_pm\n"); 9140 } 9141 } 9142 9143 static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv) 9144 { 9145 uint32_t flags = 9146 I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK; 9147 9148 switch (flags) { 9149 case GEN6_PCODE_SUCCESS: 9150 return 0; 9151 case GEN6_PCODE_UNIMPLEMENTED_CMD: 9152 return -ENODEV; 9153 case GEN6_PCODE_ILLEGAL_CMD: 9154 return -ENXIO; 9155 case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: 9156 case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: 9157 return -EOVERFLOW; 9158 case GEN6_PCODE_TIMEOUT: 9159 return -ETIMEDOUT; 9160 default: 9161 MISSING_CASE(flags); 9162 return 0; 9163 } 9164 } 9165 9166 static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv) 9167 { 9168 uint32_t flags = 9169 I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK; 9170 9171 switch (flags) { 9172 case GEN6_PCODE_SUCCESS: 9173 return 0; 9174 case GEN6_PCODE_ILLEGAL_CMD: 9175 return -ENXIO; 9176 case GEN7_PCODE_TIMEOUT: 9177 return -ETIMEDOUT; 9178 case GEN7_PCODE_ILLEGAL_DATA: 9179 return -EINVAL; 9180 case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: 9181 return -EOVERFLOW; 9182 default: 9183 MISSING_CASE(flags); 9184 return 0; 9185 } 9186 } 9187 9188 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val) 9189 { 9190 int status; 9191 9192 WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); 9193 9194 /* GEN6_PCODE_* are outside of the forcewake domain, we can 9195 * use te fw I915_READ variants to reduce the amount of work 9196 * required when reading/writing. 9197 */ 9198 9199 if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { 9200 DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps\n", 9201 mbox, __builtin_return_address(0)); 9202 return -EAGAIN; 9203 } 9204 9205 I915_WRITE_FW(GEN6_PCODE_DATA, *val); 9206 I915_WRITE_FW(GEN6_PCODE_DATA1, 0); 9207 I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); 9208 9209 if (__intel_wait_for_register_fw(dev_priv, 9210 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, 9211 500, 0, NULL)) { 9212 DRM_ERROR("timeout waiting for pcode read (from mbox %x) to finish for %ps\n", 9213 mbox, __builtin_return_address(0)); 9214 return -ETIMEDOUT; 9215 } 9216 9217 *val = I915_READ_FW(GEN6_PCODE_DATA); 9218 I915_WRITE_FW(GEN6_PCODE_DATA, 0); 9219 9220 if (INTEL_GEN(dev_priv) > 6) 9221 status = gen7_check_mailbox_status(dev_priv); 9222 else 9223 status = gen6_check_mailbox_status(dev_priv); 9224 9225 if (status) { 9226 DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n", 9227 mbox, __builtin_return_address(0), status); 9228 return status; 9229 } 9230 9231 return 0; 9232 } 9233 9234 int sandybridge_pcode_write(struct drm_i915_private *dev_priv, 9235 u32 mbox, u32 val) 9236 { 9237 int status; 9238 9239 WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); 9240 9241 /* GEN6_PCODE_* are outside of the forcewake domain, we can 9242 * use te fw I915_READ variants to reduce the amount of work 9243 * required when reading/writing. 9244 */ 9245 9246 if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { 9247 DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps\n", 9248 val, mbox, __builtin_return_address(0)); 9249 return -EAGAIN; 9250 } 9251 9252 I915_WRITE_FW(GEN6_PCODE_DATA, val); 9253 I915_WRITE_FW(GEN6_PCODE_DATA1, 0); 9254 I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); 9255 9256 if (__intel_wait_for_register_fw(dev_priv, 9257 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, 9258 500, 0, NULL)) { 9259 DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n", 9260 val, mbox, __builtin_return_address(0)); 9261 return -ETIMEDOUT; 9262 } 9263 9264 I915_WRITE_FW(GEN6_PCODE_DATA, 0); 9265 9266 if (INTEL_GEN(dev_priv) > 6) 9267 status = gen7_check_mailbox_status(dev_priv); 9268 else 9269 status = gen6_check_mailbox_status(dev_priv); 9270 9271 if (status) { 9272 DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n", 9273 val, mbox, __builtin_return_address(0), status); 9274 return status; 9275 } 9276 9277 return 0; 9278 } 9279 9280 static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox, 9281 u32 request, u32 reply_mask, u32 reply, 9282 u32 *status) 9283 { 9284 u32 val = request; 9285 9286 *status = sandybridge_pcode_read(dev_priv, mbox, &val); 9287 9288 return *status || ((val & reply_mask) == reply); 9289 } 9290 9291 /** 9292 * skl_pcode_request - send PCODE request until acknowledgment 9293 * @dev_priv: device private 9294 * @mbox: PCODE mailbox ID the request is targeted for 9295 * @request: request ID 9296 * @reply_mask: mask used to check for request acknowledgment 9297 * @reply: value used to check for request acknowledgment 9298 * @timeout_base_ms: timeout for polling with preemption enabled 9299 * 9300 * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE 9301 * reports an error or an overall timeout of @timeout_base_ms+50 ms expires. 9302 * The request is acknowledged once the PCODE reply dword equals @reply after 9303 * applying @reply_mask. Polling is first attempted with preemption enabled 9304 * for @timeout_base_ms and if this times out for another 50 ms with 9305 * preemption disabled. 9306 * 9307 * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some 9308 * other error as reported by PCODE. 9309 */ 9310 int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, 9311 u32 reply_mask, u32 reply, int timeout_base_ms) 9312 { 9313 u32 status; 9314 int ret; 9315 9316 WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); 9317 9318 #define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \ 9319 &status) 9320 9321 /* 9322 * Prime the PCODE by doing a request first. Normally it guarantees 9323 * that a subsequent request, at most @timeout_base_ms later, succeeds. 9324 * _wait_for() doesn't guarantee when its passed condition is evaluated 9325 * first, so send the first request explicitly. 9326 */ 9327 if (COND) { 9328 ret = 0; 9329 goto out; 9330 } 9331 ret = _wait_for(COND, timeout_base_ms * 1000, 10); 9332 if (!ret) 9333 goto out; 9334 9335 /* 9336 * The above can time out if the number of requests was low (2 in the 9337 * worst case) _and_ PCODE was busy for some reason even after a 9338 * (queued) request and @timeout_base_ms delay. As a workaround retry 9339 * the poll with preemption disabled to maximize the number of 9340 * requests. Increase the timeout from @timeout_base_ms to 50ms to 9341 * account for interrupts that could reduce the number of these 9342 * requests, and for any quirks of the PCODE firmware that delays 9343 * the request completion. 9344 */ 9345 DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n"); 9346 WARN_ON_ONCE(timeout_base_ms > 3); 9347 preempt_disable(); 9348 ret = wait_for_atomic(COND, 50); 9349 preempt_enable(); 9350 9351 out: 9352 return ret ? ret : status; 9353 #undef COND 9354 } 9355 9356 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val) 9357 { 9358 struct intel_rps *rps = &dev_priv->gt_pm.rps; 9359 9360 /* 9361 * N = val - 0xb7 9362 * Slow = Fast = GPLL ref * N 9363 */ 9364 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000); 9365 } 9366 9367 static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val) 9368 { 9369 struct intel_rps *rps = &dev_priv->gt_pm.rps; 9370 9371 return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7; 9372 } 9373 9374 static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val) 9375 { 9376 struct intel_rps *rps = &dev_priv->gt_pm.rps; 9377 9378 /* 9379 * N = val / 2 9380 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 9381 */ 9382 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000); 9383 } 9384 9385 static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val) 9386 { 9387 struct intel_rps *rps = &dev_priv->gt_pm.rps; 9388 9389 /* CHV needs even values */ 9390 return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2; 9391 } 9392 9393 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val) 9394 { 9395 if (INTEL_GEN(dev_priv) >= 9) 9396 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, 9397 GEN9_FREQ_SCALER); 9398 else if (IS_CHERRYVIEW(dev_priv)) 9399 return chv_gpu_freq(dev_priv, val); 9400 else if (IS_VALLEYVIEW(dev_priv)) 9401 return byt_gpu_freq(dev_priv, val); 9402 else 9403 return val * GT_FREQUENCY_MULTIPLIER; 9404 } 9405 9406 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val) 9407 { 9408 if (INTEL_GEN(dev_priv) >= 9) 9409 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, 9410 GT_FREQUENCY_MULTIPLIER); 9411 else if (IS_CHERRYVIEW(dev_priv)) 9412 return chv_freq_opcode(dev_priv, val); 9413 else if (IS_VALLEYVIEW(dev_priv)) 9414 return byt_freq_opcode(dev_priv, val); 9415 else 9416 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); 9417 } 9418 9419 void intel_pm_setup(struct drm_i915_private *dev_priv) 9420 { 9421 lockinit(&dev_priv->pcu_lock, "i9pcul", 0, LK_CANRECURSE); 9422 9423 INIT_DELAYED_WORK(&dev_priv->gt_pm.autoenable_work, 9424 __intel_autoenable_gt_powersave); 9425 atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0); 9426 9427 dev_priv->runtime_pm.suspended = false; 9428 atomic_set(&dev_priv->runtime_pm.wakeref_count, 0); 9429 } 9430 9431 static u64 vlv_residency_raw(struct drm_i915_private *dev_priv, 9432 const i915_reg_t reg) 9433 { 9434 u32 lower, upper, tmp; 9435 int loop = 2; 9436 9437 /* The register accessed do not need forcewake. We borrow 9438 * uncore lock to prevent concurrent access to range reg. 9439 */ 9440 spin_lock_irq(&dev_priv->uncore.lock); 9441 9442 /* vlv and chv residency counters are 40 bits in width. 9443 * With a control bit, we can choose between upper or lower 9444 * 32bit window into this counter. 9445 * 9446 * Although we always use the counter in high-range mode elsewhere, 9447 * userspace may attempt to read the value before rc6 is initialised, 9448 * before we have set the default VLV_COUNTER_CONTROL value. So always 9449 * set the high bit to be safe. 9450 */ 9451 I915_WRITE_FW(VLV_COUNTER_CONTROL, 9452 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); 9453 upper = I915_READ_FW(reg); 9454 do { 9455 tmp = upper; 9456 9457 I915_WRITE_FW(VLV_COUNTER_CONTROL, 9458 _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH)); 9459 lower = I915_READ_FW(reg); 9460 9461 I915_WRITE_FW(VLV_COUNTER_CONTROL, 9462 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); 9463 upper = I915_READ_FW(reg); 9464 } while (upper != tmp && --loop); 9465 9466 /* Everywhere else we always use VLV_COUNTER_CONTROL with the 9467 * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set 9468 * now. 9469 */ 9470 9471 spin_unlock_irq(&dev_priv->uncore.lock); 9472 9473 return lower | (u64)upper << 8; 9474 } 9475 9476 u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, 9477 const i915_reg_t reg) 9478 { 9479 u64 time_hw, units, div; 9480 9481 if (!intel_rc6_enabled()) 9482 return 0; 9483 9484 intel_runtime_pm_get(dev_priv); 9485 9486 /* On VLV and CHV, residency time is in CZ units rather than 1.28us */ 9487 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { 9488 units = 1000; 9489 div = dev_priv->czclk_freq; 9490 9491 time_hw = vlv_residency_raw(dev_priv, reg); 9492 } else if (IS_GEN9_LP(dev_priv)) { 9493 units = 1000; 9494 div = 1200; /* 833.33ns */ 9495 9496 time_hw = I915_READ(reg); 9497 } else { 9498 units = 128000; /* 1.28us */ 9499 div = 100000; 9500 9501 time_hw = I915_READ(reg); 9502 } 9503 9504 intel_runtime_pm_put(dev_priv); 9505 return DIV_ROUND_UP_ULL(time_hw * units, div); 9506 } 9507