1 /* 2 * Copyright © 2012 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eugeni Dodonov <eugeni.dodonov@intel.com> 25 * 26 */ 27 28 #include <linux/cpufreq.h> 29 #include <drm/drm_plane_helper.h> 30 #include "i915_drv.h" 31 #include "intel_drv.h" 32 #include <linux/module.h> 33 34 /** 35 * DOC: RC6 36 * 37 * RC6 is a special power stage which allows the GPU to enter an very 38 * low-voltage mode when idle, using down to 0V while at this stage. This 39 * stage is entered automatically when the GPU is idle when RC6 support is 40 * enabled, and as soon as new workload arises GPU wakes up automatically as well. 41 * 42 * There are different RC6 modes available in Intel GPU, which differentiate 43 * among each other with the latency required to enter and leave RC6 and 44 * voltage consumed by the GPU in different states. 45 * 46 * The combination of the following flags define which states GPU is allowed 47 * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and 48 * RC6pp is deepest RC6. Their support by hardware varies according to the 49 * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one 50 * which brings the most power savings; deeper states save more power, but 51 * require higher latency to switch to and wake up. 52 */ 53 #define INTEL_RC6_ENABLE (1<<0) 54 #define INTEL_RC6p_ENABLE (1<<1) 55 #define INTEL_RC6pp_ENABLE (1<<2) 56 57 static void gen9_init_clock_gating(struct drm_device *dev) 58 { 59 struct drm_i915_private *dev_priv = dev->dev_private; 60 61 /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl */ 62 I915_WRITE(CHICKEN_PAR1_1, 63 I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP); 64 65 I915_WRITE(GEN8_CONFIG0, 66 I915_READ(GEN8_CONFIG0) | GEN9_DEFAULT_FIXES); 67 68 /* WaEnableChickenDCPR:skl,bxt,kbl */ 69 I915_WRITE(GEN8_CHICKEN_DCPR_1, 70 I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM); 71 72 /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl */ 73 /* WaFbcWakeMemOn:skl,bxt,kbl */ 74 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 75 DISP_FBC_WM_DIS | 76 DISP_FBC_MEMORY_WAKE); 77 78 /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl */ 79 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) | 80 ILK_DPFC_DISABLE_DUMMY0); 81 } 82 83 static void bxt_init_clock_gating(struct drm_device *dev) 84 { 85 struct drm_i915_private *dev_priv = to_i915(dev); 86 87 gen9_init_clock_gating(dev); 88 89 /* WaDisableSDEUnitClockGating:bxt */ 90 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 91 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 92 93 /* 94 * FIXME: 95 * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only. 96 */ 97 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 98 GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ); 99 100 /* 101 * Wa: Backlight PWM may stop in the asserted state, causing backlight 102 * to stay fully on. 103 */ 104 if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) 105 I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | 106 PWM1_GATING_DIS | PWM2_GATING_DIS); 107 } 108 109 static void i915_pineview_get_mem_freq(struct drm_device *dev) 110 { 111 struct drm_i915_private *dev_priv = to_i915(dev); 112 u32 tmp; 113 114 tmp = I915_READ(CLKCFG); 115 116 switch (tmp & CLKCFG_FSB_MASK) { 117 case CLKCFG_FSB_533: 118 dev_priv->fsb_freq = 533; /* 133*4 */ 119 break; 120 case CLKCFG_FSB_800: 121 dev_priv->fsb_freq = 800; /* 200*4 */ 122 break; 123 case CLKCFG_FSB_667: 124 dev_priv->fsb_freq = 667; /* 167*4 */ 125 break; 126 case CLKCFG_FSB_400: 127 dev_priv->fsb_freq = 400; /* 100*4 */ 128 break; 129 } 130 131 switch (tmp & CLKCFG_MEM_MASK) { 132 case CLKCFG_MEM_533: 133 dev_priv->mem_freq = 533; 134 break; 135 case CLKCFG_MEM_667: 136 dev_priv->mem_freq = 667; 137 break; 138 case CLKCFG_MEM_800: 139 dev_priv->mem_freq = 800; 140 break; 141 } 142 143 /* detect pineview DDR3 setting */ 144 tmp = I915_READ(CSHRDDR3CTL); 145 dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0; 146 } 147 148 static void i915_ironlake_get_mem_freq(struct drm_device *dev) 149 { 150 struct drm_i915_private *dev_priv = to_i915(dev); 151 u16 ddrpll, csipll; 152 153 ddrpll = I915_READ16(DDRMPLL1); 154 csipll = I915_READ16(CSIPLL0); 155 156 switch (ddrpll & 0xff) { 157 case 0xc: 158 dev_priv->mem_freq = 800; 159 break; 160 case 0x10: 161 dev_priv->mem_freq = 1066; 162 break; 163 case 0x14: 164 dev_priv->mem_freq = 1333; 165 break; 166 case 0x18: 167 dev_priv->mem_freq = 1600; 168 break; 169 default: 170 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n", 171 ddrpll & 0xff); 172 dev_priv->mem_freq = 0; 173 break; 174 } 175 176 dev_priv->ips.r_t = dev_priv->mem_freq; 177 178 switch (csipll & 0x3ff) { 179 case 0x00c: 180 dev_priv->fsb_freq = 3200; 181 break; 182 case 0x00e: 183 dev_priv->fsb_freq = 3733; 184 break; 185 case 0x010: 186 dev_priv->fsb_freq = 4266; 187 break; 188 case 0x012: 189 dev_priv->fsb_freq = 4800; 190 break; 191 case 0x014: 192 dev_priv->fsb_freq = 5333; 193 break; 194 case 0x016: 195 dev_priv->fsb_freq = 5866; 196 break; 197 case 0x018: 198 dev_priv->fsb_freq = 6400; 199 break; 200 default: 201 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n", 202 csipll & 0x3ff); 203 dev_priv->fsb_freq = 0; 204 break; 205 } 206 207 if (dev_priv->fsb_freq == 3200) { 208 dev_priv->ips.c_m = 0; 209 } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) { 210 dev_priv->ips.c_m = 1; 211 } else { 212 dev_priv->ips.c_m = 2; 213 } 214 } 215 216 static const struct cxsr_latency cxsr_latency_table[] = { 217 {1, 0, 800, 400, 3382, 33382, 3983, 33983}, /* DDR2-400 SC */ 218 {1, 0, 800, 667, 3354, 33354, 3807, 33807}, /* DDR2-667 SC */ 219 {1, 0, 800, 800, 3347, 33347, 3763, 33763}, /* DDR2-800 SC */ 220 {1, 1, 800, 667, 6420, 36420, 6873, 36873}, /* DDR3-667 SC */ 221 {1, 1, 800, 800, 5902, 35902, 6318, 36318}, /* DDR3-800 SC */ 222 223 {1, 0, 667, 400, 3400, 33400, 4021, 34021}, /* DDR2-400 SC */ 224 {1, 0, 667, 667, 3372, 33372, 3845, 33845}, /* DDR2-667 SC */ 225 {1, 0, 667, 800, 3386, 33386, 3822, 33822}, /* DDR2-800 SC */ 226 {1, 1, 667, 667, 6438, 36438, 6911, 36911}, /* DDR3-667 SC */ 227 {1, 1, 667, 800, 5941, 35941, 6377, 36377}, /* DDR3-800 SC */ 228 229 {1, 0, 400, 400, 3472, 33472, 4173, 34173}, /* DDR2-400 SC */ 230 {1, 0, 400, 667, 3443, 33443, 3996, 33996}, /* DDR2-667 SC */ 231 {1, 0, 400, 800, 3430, 33430, 3946, 33946}, /* DDR2-800 SC */ 232 {1, 1, 400, 667, 6509, 36509, 7062, 37062}, /* DDR3-667 SC */ 233 {1, 1, 400, 800, 5985, 35985, 6501, 36501}, /* DDR3-800 SC */ 234 235 {0, 0, 800, 400, 3438, 33438, 4065, 34065}, /* DDR2-400 SC */ 236 {0, 0, 800, 667, 3410, 33410, 3889, 33889}, /* DDR2-667 SC */ 237 {0, 0, 800, 800, 3403, 33403, 3845, 33845}, /* DDR2-800 SC */ 238 {0, 1, 800, 667, 6476, 36476, 6955, 36955}, /* DDR3-667 SC */ 239 {0, 1, 800, 800, 5958, 35958, 6400, 36400}, /* DDR3-800 SC */ 240 241 {0, 0, 667, 400, 3456, 33456, 4103, 34106}, /* DDR2-400 SC */ 242 {0, 0, 667, 667, 3428, 33428, 3927, 33927}, /* DDR2-667 SC */ 243 {0, 0, 667, 800, 3443, 33443, 3905, 33905}, /* DDR2-800 SC */ 244 {0, 1, 667, 667, 6494, 36494, 6993, 36993}, /* DDR3-667 SC */ 245 {0, 1, 667, 800, 5998, 35998, 6460, 36460}, /* DDR3-800 SC */ 246 247 {0, 0, 400, 400, 3528, 33528, 4255, 34255}, /* DDR2-400 SC */ 248 {0, 0, 400, 667, 3500, 33500, 4079, 34079}, /* DDR2-667 SC */ 249 {0, 0, 400, 800, 3487, 33487, 4029, 34029}, /* DDR2-800 SC */ 250 {0, 1, 400, 667, 6566, 36566, 7145, 37145}, /* DDR3-667 SC */ 251 {0, 1, 400, 800, 6042, 36042, 6584, 36584}, /* DDR3-800 SC */ 252 }; 253 254 static const struct cxsr_latency *intel_get_cxsr_latency(bool is_desktop, 255 bool is_ddr3, 256 int fsb, 257 int mem) 258 { 259 const struct cxsr_latency *latency; 260 int i; 261 262 if (fsb == 0 || mem == 0) 263 return NULL; 264 265 for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) { 266 latency = &cxsr_latency_table[i]; 267 if (is_desktop == latency->is_desktop && 268 is_ddr3 == latency->is_ddr3 && 269 fsb == latency->fsb_freq && mem == latency->mem_freq) 270 return latency; 271 } 272 273 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n"); 274 275 return NULL; 276 } 277 278 static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable) 279 { 280 u32 val; 281 282 mutex_lock(&dev_priv->rps.hw_lock); 283 284 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); 285 if (enable) 286 val &= ~FORCE_DDR_HIGH_FREQ; 287 else 288 val |= FORCE_DDR_HIGH_FREQ; 289 val &= ~FORCE_DDR_LOW_FREQ; 290 val |= FORCE_DDR_FREQ_REQ_ACK; 291 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val); 292 293 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) & 294 FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) 295 DRM_ERROR("timed out waiting for Punit DDR DVFS request\n"); 296 297 mutex_unlock(&dev_priv->rps.hw_lock); 298 } 299 300 static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) 301 { 302 u32 val; 303 304 mutex_lock(&dev_priv->rps.hw_lock); 305 306 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); 307 if (enable) 308 val |= DSP_MAXFIFO_PM5_ENABLE; 309 else 310 val &= ~DSP_MAXFIFO_PM5_ENABLE; 311 vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); 312 313 mutex_unlock(&dev_priv->rps.hw_lock); 314 } 315 316 #define FW_WM(value, plane) \ 317 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK) 318 319 void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable) 320 { 321 struct drm_device *dev = &dev_priv->drm; 322 u32 val; 323 324 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { 325 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0); 326 POSTING_READ(FW_BLC_SELF_VLV); 327 dev_priv->wm.vlv.cxsr = enable; 328 } else if (IS_G4X(dev_priv) || IS_CRESTLINE(dev_priv)) { 329 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0); 330 POSTING_READ(FW_BLC_SELF); 331 } else if (IS_PINEVIEW(dev)) { 332 val = I915_READ(DSPFW3) & ~PINEVIEW_SELF_REFRESH_EN; 333 val |= enable ? PINEVIEW_SELF_REFRESH_EN : 0; 334 I915_WRITE(DSPFW3, val); 335 POSTING_READ(DSPFW3); 336 } else if (IS_I945G(dev_priv) || IS_I945GM(dev_priv)) { 337 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) : 338 _MASKED_BIT_DISABLE(FW_BLC_SELF_EN); 339 I915_WRITE(FW_BLC_SELF, val); 340 POSTING_READ(FW_BLC_SELF); 341 } else if (IS_I915GM(dev_priv)) { 342 /* 343 * FIXME can't find a bit like this for 915G, and 344 * and yet it does have the related watermark in 345 * FW_BLC_SELF. What's going on? 346 */ 347 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) : 348 _MASKED_BIT_DISABLE(INSTPM_SELF_EN); 349 I915_WRITE(INSTPM, val); 350 POSTING_READ(INSTPM); 351 } else { 352 return; 353 } 354 355 DRM_DEBUG_KMS("memory self-refresh is %s\n", 356 enable ? "enabled" : "disabled"); 357 } 358 359 360 /* 361 * Latency for FIFO fetches is dependent on several factors: 362 * - memory configuration (speed, channels) 363 * - chipset 364 * - current MCH state 365 * It can be fairly high in some situations, so here we assume a fairly 366 * pessimal value. It's a tradeoff between extra memory fetches (if we 367 * set this value too high, the FIFO will fetch frequently to stay full) 368 * and power consumption (set it too low to save power and we might see 369 * FIFO underruns and display "flicker"). 370 * 371 * A value of 5us seems to be a good balance; safe for very low end 372 * platforms but not overly aggressive on lower latency configs. 373 */ 374 static const int pessimal_latency_ns = 5000; 375 376 #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \ 377 ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8)) 378 379 static int vlv_get_fifo_size(struct drm_device *dev, 380 enum i915_pipe pipe, int plane) 381 { 382 struct drm_i915_private *dev_priv = to_i915(dev); 383 int sprite0_start, sprite1_start, size; 384 385 switch (pipe) { 386 uint32_t dsparb, dsparb2, dsparb3; 387 case PIPE_A: 388 dsparb = I915_READ(DSPARB); 389 dsparb2 = I915_READ(DSPARB2); 390 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0); 391 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4); 392 break; 393 case PIPE_B: 394 dsparb = I915_READ(DSPARB); 395 dsparb2 = I915_READ(DSPARB2); 396 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8); 397 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12); 398 break; 399 case PIPE_C: 400 dsparb2 = I915_READ(DSPARB2); 401 dsparb3 = I915_READ(DSPARB3); 402 sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16); 403 sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20); 404 break; 405 default: 406 return 0; 407 } 408 409 switch (plane) { 410 case 0: 411 size = sprite0_start; 412 break; 413 case 1: 414 size = sprite1_start - sprite0_start; 415 break; 416 case 2: 417 size = 512 - 1 - sprite1_start; 418 break; 419 default: 420 return 0; 421 } 422 423 DRM_DEBUG_KMS("Pipe %c %s %c FIFO size: %d\n", 424 pipe_name(pipe), plane == 0 ? "primary" : "sprite", 425 plane == 0 ? plane_name(pipe) : sprite_name(pipe, plane - 1), 426 size); 427 428 return size; 429 } 430 431 static int i9xx_get_fifo_size(struct drm_device *dev, int plane) 432 { 433 struct drm_i915_private *dev_priv = to_i915(dev); 434 uint32_t dsparb = I915_READ(DSPARB); 435 int size; 436 437 size = dsparb & 0x7f; 438 if (plane) 439 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size; 440 441 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, 442 plane ? "B" : "A", size); 443 444 return size; 445 } 446 447 static int i830_get_fifo_size(struct drm_device *dev, int plane) 448 { 449 struct drm_i915_private *dev_priv = to_i915(dev); 450 uint32_t dsparb = I915_READ(DSPARB); 451 int size; 452 453 size = dsparb & 0x1ff; 454 if (plane) 455 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size; 456 size >>= 1; /* Convert to cachelines */ 457 458 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, 459 plane ? "B" : "A", size); 460 461 return size; 462 } 463 464 static int i845_get_fifo_size(struct drm_device *dev, int plane) 465 { 466 struct drm_i915_private *dev_priv = to_i915(dev); 467 uint32_t dsparb = I915_READ(DSPARB); 468 int size; 469 470 size = dsparb & 0x7f; 471 size >>= 2; /* Convert to cachelines */ 472 473 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, 474 plane ? "B" : "A", 475 size); 476 477 return size; 478 } 479 480 /* Pineview has different values for various configs */ 481 static const struct intel_watermark_params pineview_display_wm = { 482 .fifo_size = PINEVIEW_DISPLAY_FIFO, 483 .max_wm = PINEVIEW_MAX_WM, 484 .default_wm = PINEVIEW_DFT_WM, 485 .guard_size = PINEVIEW_GUARD_WM, 486 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 487 }; 488 static const struct intel_watermark_params pineview_display_hplloff_wm = { 489 .fifo_size = PINEVIEW_DISPLAY_FIFO, 490 .max_wm = PINEVIEW_MAX_WM, 491 .default_wm = PINEVIEW_DFT_HPLLOFF_WM, 492 .guard_size = PINEVIEW_GUARD_WM, 493 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 494 }; 495 static const struct intel_watermark_params pineview_cursor_wm = { 496 .fifo_size = PINEVIEW_CURSOR_FIFO, 497 .max_wm = PINEVIEW_CURSOR_MAX_WM, 498 .default_wm = PINEVIEW_CURSOR_DFT_WM, 499 .guard_size = PINEVIEW_CURSOR_GUARD_WM, 500 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 501 }; 502 static const struct intel_watermark_params pineview_cursor_hplloff_wm = { 503 .fifo_size = PINEVIEW_CURSOR_FIFO, 504 .max_wm = PINEVIEW_CURSOR_MAX_WM, 505 .default_wm = PINEVIEW_CURSOR_DFT_WM, 506 .guard_size = PINEVIEW_CURSOR_GUARD_WM, 507 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 508 }; 509 static const struct intel_watermark_params g4x_wm_info = { 510 .fifo_size = G4X_FIFO_SIZE, 511 .max_wm = G4X_MAX_WM, 512 .default_wm = G4X_MAX_WM, 513 .guard_size = 2, 514 .cacheline_size = G4X_FIFO_LINE_SIZE, 515 }; 516 static const struct intel_watermark_params g4x_cursor_wm_info = { 517 .fifo_size = I965_CURSOR_FIFO, 518 .max_wm = I965_CURSOR_MAX_WM, 519 .default_wm = I965_CURSOR_DFT_WM, 520 .guard_size = 2, 521 .cacheline_size = G4X_FIFO_LINE_SIZE, 522 }; 523 static const struct intel_watermark_params i965_cursor_wm_info = { 524 .fifo_size = I965_CURSOR_FIFO, 525 .max_wm = I965_CURSOR_MAX_WM, 526 .default_wm = I965_CURSOR_DFT_WM, 527 .guard_size = 2, 528 .cacheline_size = I915_FIFO_LINE_SIZE, 529 }; 530 static const struct intel_watermark_params i945_wm_info = { 531 .fifo_size = I945_FIFO_SIZE, 532 .max_wm = I915_MAX_WM, 533 .default_wm = 1, 534 .guard_size = 2, 535 .cacheline_size = I915_FIFO_LINE_SIZE, 536 }; 537 static const struct intel_watermark_params i915_wm_info = { 538 .fifo_size = I915_FIFO_SIZE, 539 .max_wm = I915_MAX_WM, 540 .default_wm = 1, 541 .guard_size = 2, 542 .cacheline_size = I915_FIFO_LINE_SIZE, 543 }; 544 static const struct intel_watermark_params i830_a_wm_info = { 545 .fifo_size = I855GM_FIFO_SIZE, 546 .max_wm = I915_MAX_WM, 547 .default_wm = 1, 548 .guard_size = 2, 549 .cacheline_size = I830_FIFO_LINE_SIZE, 550 }; 551 static const struct intel_watermark_params i830_bc_wm_info = { 552 .fifo_size = I855GM_FIFO_SIZE, 553 .max_wm = I915_MAX_WM/2, 554 .default_wm = 1, 555 .guard_size = 2, 556 .cacheline_size = I830_FIFO_LINE_SIZE, 557 }; 558 static const struct intel_watermark_params i845_wm_info = { 559 .fifo_size = I830_FIFO_SIZE, 560 .max_wm = I915_MAX_WM, 561 .default_wm = 1, 562 .guard_size = 2, 563 .cacheline_size = I830_FIFO_LINE_SIZE, 564 }; 565 566 /** 567 * intel_calculate_wm - calculate watermark level 568 * @clock_in_khz: pixel clock 569 * @wm: chip FIFO params 570 * @cpp: bytes per pixel 571 * @latency_ns: memory latency for the platform 572 * 573 * Calculate the watermark level (the level at which the display plane will 574 * start fetching from memory again). Each chip has a different display 575 * FIFO size and allocation, so the caller needs to figure that out and pass 576 * in the correct intel_watermark_params structure. 577 * 578 * As the pixel clock runs, the FIFO will be drained at a rate that depends 579 * on the pixel size. When it reaches the watermark level, it'll start 580 * fetching FIFO line sized based chunks from memory until the FIFO fills 581 * past the watermark point. If the FIFO drains completely, a FIFO underrun 582 * will occur, and a display engine hang could result. 583 */ 584 static unsigned long intel_calculate_wm(unsigned long clock_in_khz, 585 const struct intel_watermark_params *wm, 586 int fifo_size, int cpp, 587 unsigned long latency_ns) 588 { 589 long entries_required, wm_size; 590 591 /* 592 * Note: we need to make sure we don't overflow for various clock & 593 * latency values. 594 * clocks go from a few thousand to several hundred thousand. 595 * latency is usually a few thousand 596 */ 597 entries_required = ((clock_in_khz / 1000) * cpp * latency_ns) / 598 1000; 599 entries_required = DIV_ROUND_UP(entries_required, wm->cacheline_size); 600 601 DRM_DEBUG_KMS("FIFO entries required for mode: %ld\n", entries_required); 602 603 wm_size = fifo_size - (entries_required + wm->guard_size); 604 605 DRM_DEBUG_KMS("FIFO watermark level: %ld\n", wm_size); 606 607 /* Don't promote wm_size to unsigned... */ 608 if (wm_size > (long)wm->max_wm) 609 wm_size = wm->max_wm; 610 if (wm_size <= 0) 611 wm_size = wm->default_wm; 612 613 /* 614 * Bspec seems to indicate that the value shouldn't be lower than 615 * 'burst size + 1'. Certainly 830 is quite unhappy with low values. 616 * Lets go for 8 which is the burst size since certain platforms 617 * already use a hardcoded 8 (which is what the spec says should be 618 * done). 619 */ 620 if (wm_size <= 8) 621 wm_size = 8; 622 623 return wm_size; 624 } 625 626 static struct drm_crtc *single_enabled_crtc(struct drm_device *dev) 627 { 628 struct drm_crtc *crtc, *enabled = NULL; 629 630 for_each_crtc(dev, crtc) { 631 if (intel_crtc_active(crtc)) { 632 if (enabled) 633 return NULL; 634 enabled = crtc; 635 } 636 } 637 638 return enabled; 639 } 640 641 static void pineview_update_wm(struct drm_crtc *unused_crtc) 642 { 643 struct drm_device *dev = unused_crtc->dev; 644 struct drm_i915_private *dev_priv = to_i915(dev); 645 struct drm_crtc *crtc; 646 const struct cxsr_latency *latency; 647 u32 reg; 648 unsigned long wm; 649 650 latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv), 651 dev_priv->is_ddr3, 652 dev_priv->fsb_freq, 653 dev_priv->mem_freq); 654 if (!latency) { 655 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n"); 656 intel_set_memory_cxsr(dev_priv, false); 657 return; 658 } 659 660 crtc = single_enabled_crtc(dev); 661 if (crtc) { 662 const struct drm_display_mode *adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 663 int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); 664 int clock = adjusted_mode->crtc_clock; 665 666 /* Display SR */ 667 wm = intel_calculate_wm(clock, &pineview_display_wm, 668 pineview_display_wm.fifo_size, 669 cpp, latency->display_sr); 670 reg = I915_READ(DSPFW1); 671 reg &= ~DSPFW_SR_MASK; 672 reg |= FW_WM(wm, SR); 673 I915_WRITE(DSPFW1, reg); 674 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg); 675 676 /* cursor SR */ 677 wm = intel_calculate_wm(clock, &pineview_cursor_wm, 678 pineview_display_wm.fifo_size, 679 cpp, latency->cursor_sr); 680 reg = I915_READ(DSPFW3); 681 reg &= ~DSPFW_CURSOR_SR_MASK; 682 reg |= FW_WM(wm, CURSOR_SR); 683 I915_WRITE(DSPFW3, reg); 684 685 /* Display HPLL off SR */ 686 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm, 687 pineview_display_hplloff_wm.fifo_size, 688 cpp, latency->display_hpll_disable); 689 reg = I915_READ(DSPFW3); 690 reg &= ~DSPFW_HPLL_SR_MASK; 691 reg |= FW_WM(wm, HPLL_SR); 692 I915_WRITE(DSPFW3, reg); 693 694 /* cursor HPLL off SR */ 695 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm, 696 pineview_display_hplloff_wm.fifo_size, 697 cpp, latency->cursor_hpll_disable); 698 reg = I915_READ(DSPFW3); 699 reg &= ~DSPFW_HPLL_CURSOR_MASK; 700 reg |= FW_WM(wm, HPLL_CURSOR); 701 I915_WRITE(DSPFW3, reg); 702 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg); 703 704 intel_set_memory_cxsr(dev_priv, true); 705 } else { 706 intel_set_memory_cxsr(dev_priv, false); 707 } 708 } 709 710 static bool g4x_compute_wm0(struct drm_device *dev, 711 int plane, 712 const struct intel_watermark_params *display, 713 int display_latency_ns, 714 const struct intel_watermark_params *cursor, 715 int cursor_latency_ns, 716 int *plane_wm, 717 int *cursor_wm) 718 { 719 struct drm_crtc *crtc; 720 const struct drm_display_mode *adjusted_mode; 721 int htotal, hdisplay, clock, cpp; 722 int line_time_us, line_count; 723 int entries, tlb_miss; 724 725 crtc = intel_get_crtc_for_plane(dev, plane); 726 if (!intel_crtc_active(crtc)) { 727 *cursor_wm = cursor->guard_size; 728 *plane_wm = display->guard_size; 729 return false; 730 } 731 732 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 733 clock = adjusted_mode->crtc_clock; 734 htotal = adjusted_mode->crtc_htotal; 735 hdisplay = to_intel_crtc(crtc)->config->pipe_src_w; 736 cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); 737 738 /* Use the small buffer method to calculate plane watermark */ 739 entries = ((clock * cpp / 1000) * display_latency_ns) / 1000; 740 tlb_miss = display->fifo_size*display->cacheline_size - hdisplay * 8; 741 if (tlb_miss > 0) 742 entries += tlb_miss; 743 entries = DIV_ROUND_UP(entries, display->cacheline_size); 744 *plane_wm = entries + display->guard_size; 745 if (*plane_wm > (int)display->max_wm) 746 *plane_wm = display->max_wm; 747 748 /* Use the large buffer method to calculate cursor watermark */ 749 line_time_us = max(htotal * 1000 / clock, 1); 750 line_count = (cursor_latency_ns / line_time_us + 1000) / 1000; 751 entries = line_count * crtc->cursor->state->crtc_w * cpp; 752 tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8; 753 if (tlb_miss > 0) 754 entries += tlb_miss; 755 entries = DIV_ROUND_UP(entries, cursor->cacheline_size); 756 *cursor_wm = entries + cursor->guard_size; 757 if (*cursor_wm > (int)cursor->max_wm) 758 *cursor_wm = (int)cursor->max_wm; 759 760 return true; 761 } 762 763 /* 764 * Check the wm result. 765 * 766 * If any calculated watermark values is larger than the maximum value that 767 * can be programmed into the associated watermark register, that watermark 768 * must be disabled. 769 */ 770 static bool g4x_check_srwm(struct drm_device *dev, 771 int display_wm, int cursor_wm, 772 const struct intel_watermark_params *display, 773 const struct intel_watermark_params *cursor) 774 { 775 DRM_DEBUG_KMS("SR watermark: display plane %d, cursor %d\n", 776 display_wm, cursor_wm); 777 778 if (display_wm > display->max_wm) { 779 DRM_DEBUG_KMS("display watermark is too large(%d/%u), disabling\n", 780 display_wm, display->max_wm); 781 return false; 782 } 783 784 if (cursor_wm > cursor->max_wm) { 785 DRM_DEBUG_KMS("cursor watermark is too large(%d/%u), disabling\n", 786 cursor_wm, cursor->max_wm); 787 return false; 788 } 789 790 if (!(display_wm || cursor_wm)) { 791 DRM_DEBUG_KMS("SR latency is 0, disabling\n"); 792 return false; 793 } 794 795 return true; 796 } 797 798 static bool g4x_compute_srwm(struct drm_device *dev, 799 int plane, 800 int latency_ns, 801 const struct intel_watermark_params *display, 802 const struct intel_watermark_params *cursor, 803 int *display_wm, int *cursor_wm) 804 { 805 struct drm_crtc *crtc; 806 const struct drm_display_mode *adjusted_mode; 807 int hdisplay, htotal, cpp, clock; 808 unsigned long line_time_us; 809 int line_count, line_size; 810 int small, large; 811 int entries; 812 813 if (!latency_ns) { 814 *display_wm = *cursor_wm = 0; 815 return false; 816 } 817 818 crtc = intel_get_crtc_for_plane(dev, plane); 819 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 820 clock = adjusted_mode->crtc_clock; 821 htotal = adjusted_mode->crtc_htotal; 822 hdisplay = to_intel_crtc(crtc)->config->pipe_src_w; 823 cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); 824 825 line_time_us = max(htotal * 1000 / clock, 1); 826 line_count = (latency_ns / line_time_us + 1000) / 1000; 827 line_size = hdisplay * cpp; 828 829 /* Use the minimum of the small and large buffer method for primary */ 830 small = ((clock * cpp / 1000) * latency_ns) / 1000; 831 large = line_count * line_size; 832 833 entries = DIV_ROUND_UP(min(small, large), display->cacheline_size); 834 *display_wm = entries + display->guard_size; 835 836 /* calculate the self-refresh watermark for display cursor */ 837 entries = line_count * cpp * crtc->cursor->state->crtc_w; 838 entries = DIV_ROUND_UP(entries, cursor->cacheline_size); 839 *cursor_wm = entries + cursor->guard_size; 840 841 return g4x_check_srwm(dev, 842 *display_wm, *cursor_wm, 843 display, cursor); 844 } 845 846 #define FW_WM_VLV(value, plane) \ 847 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV) 848 849 static void vlv_write_wm_values(struct intel_crtc *crtc, 850 const struct vlv_wm_values *wm) 851 { 852 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); 853 enum i915_pipe pipe = crtc->pipe; 854 855 I915_WRITE(VLV_DDL(pipe), 856 (wm->ddl[pipe].cursor << DDL_CURSOR_SHIFT) | 857 (wm->ddl[pipe].sprite[1] << DDL_SPRITE_SHIFT(1)) | 858 (wm->ddl[pipe].sprite[0] << DDL_SPRITE_SHIFT(0)) | 859 (wm->ddl[pipe].primary << DDL_PLANE_SHIFT)); 860 861 I915_WRITE(DSPFW1, 862 FW_WM(wm->sr.plane, SR) | 863 FW_WM(wm->pipe[PIPE_B].cursor, CURSORB) | 864 FW_WM_VLV(wm->pipe[PIPE_B].primary, PLANEB) | 865 FW_WM_VLV(wm->pipe[PIPE_A].primary, PLANEA)); 866 I915_WRITE(DSPFW2, 867 FW_WM_VLV(wm->pipe[PIPE_A].sprite[1], SPRITEB) | 868 FW_WM(wm->pipe[PIPE_A].cursor, CURSORA) | 869 FW_WM_VLV(wm->pipe[PIPE_A].sprite[0], SPRITEA)); 870 I915_WRITE(DSPFW3, 871 FW_WM(wm->sr.cursor, CURSOR_SR)); 872 873 if (IS_CHERRYVIEW(dev_priv)) { 874 I915_WRITE(DSPFW7_CHV, 875 FW_WM_VLV(wm->pipe[PIPE_B].sprite[1], SPRITED) | 876 FW_WM_VLV(wm->pipe[PIPE_B].sprite[0], SPRITEC)); 877 I915_WRITE(DSPFW8_CHV, 878 FW_WM_VLV(wm->pipe[PIPE_C].sprite[1], SPRITEF) | 879 FW_WM_VLV(wm->pipe[PIPE_C].sprite[0], SPRITEE)); 880 I915_WRITE(DSPFW9_CHV, 881 FW_WM_VLV(wm->pipe[PIPE_C].primary, PLANEC) | 882 FW_WM(wm->pipe[PIPE_C].cursor, CURSORC)); 883 I915_WRITE(DSPHOWM, 884 FW_WM(wm->sr.plane >> 9, SR_HI) | 885 FW_WM(wm->pipe[PIPE_C].sprite[1] >> 8, SPRITEF_HI) | 886 FW_WM(wm->pipe[PIPE_C].sprite[0] >> 8, SPRITEE_HI) | 887 FW_WM(wm->pipe[PIPE_C].primary >> 8, PLANEC_HI) | 888 FW_WM(wm->pipe[PIPE_B].sprite[1] >> 8, SPRITED_HI) | 889 FW_WM(wm->pipe[PIPE_B].sprite[0] >> 8, SPRITEC_HI) | 890 FW_WM(wm->pipe[PIPE_B].primary >> 8, PLANEB_HI) | 891 FW_WM(wm->pipe[PIPE_A].sprite[1] >> 8, SPRITEB_HI) | 892 FW_WM(wm->pipe[PIPE_A].sprite[0] >> 8, SPRITEA_HI) | 893 FW_WM(wm->pipe[PIPE_A].primary >> 8, PLANEA_HI)); 894 } else { 895 I915_WRITE(DSPFW7, 896 FW_WM_VLV(wm->pipe[PIPE_B].sprite[1], SPRITED) | 897 FW_WM_VLV(wm->pipe[PIPE_B].sprite[0], SPRITEC)); 898 I915_WRITE(DSPHOWM, 899 FW_WM(wm->sr.plane >> 9, SR_HI) | 900 FW_WM(wm->pipe[PIPE_B].sprite[1] >> 8, SPRITED_HI) | 901 FW_WM(wm->pipe[PIPE_B].sprite[0] >> 8, SPRITEC_HI) | 902 FW_WM(wm->pipe[PIPE_B].primary >> 8, PLANEB_HI) | 903 FW_WM(wm->pipe[PIPE_A].sprite[1] >> 8, SPRITEB_HI) | 904 FW_WM(wm->pipe[PIPE_A].sprite[0] >> 8, SPRITEA_HI) | 905 FW_WM(wm->pipe[PIPE_A].primary >> 8, PLANEA_HI)); 906 } 907 908 /* zero (unused) WM1 watermarks */ 909 I915_WRITE(DSPFW4, 0); 910 I915_WRITE(DSPFW5, 0); 911 I915_WRITE(DSPFW6, 0); 912 I915_WRITE(DSPHOWM1, 0); 913 914 POSTING_READ(DSPFW1); 915 } 916 917 #undef FW_WM_VLV 918 919 enum vlv_wm_level { 920 VLV_WM_LEVEL_PM2, 921 VLV_WM_LEVEL_PM5, 922 VLV_WM_LEVEL_DDR_DVFS, 923 }; 924 925 /* latency must be in 0.1us units. */ 926 static unsigned int vlv_wm_method2(unsigned int pixel_rate, 927 unsigned int pipe_htotal, 928 unsigned int horiz_pixels, 929 unsigned int cpp, 930 unsigned int latency) 931 { 932 unsigned int ret; 933 934 ret = (latency * pixel_rate) / (pipe_htotal * 10000); 935 ret = (ret + 1) * horiz_pixels * cpp; 936 ret = DIV_ROUND_UP(ret, 64); 937 938 return ret; 939 } 940 941 static void vlv_setup_wm_latency(struct drm_device *dev) 942 { 943 struct drm_i915_private *dev_priv = to_i915(dev); 944 945 /* all latencies in usec */ 946 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3; 947 948 dev_priv->wm.max_level = VLV_WM_LEVEL_PM2; 949 950 if (IS_CHERRYVIEW(dev_priv)) { 951 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12; 952 dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33; 953 954 dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS; 955 } 956 } 957 958 static uint16_t vlv_compute_wm_level(struct intel_plane *plane, 959 struct intel_crtc *crtc, 960 const struct intel_plane_state *state, 961 int level) 962 { 963 struct drm_i915_private *dev_priv = to_i915(plane->base.dev); 964 int clock, htotal, cpp, width, wm; 965 966 if (dev_priv->wm.pri_latency[level] == 0) 967 return USHRT_MAX; 968 969 if (!state->base.visible) 970 return 0; 971 972 cpp = drm_format_plane_cpp(state->base.fb->pixel_format, 0); 973 clock = crtc->config->base.adjusted_mode.crtc_clock; 974 htotal = crtc->config->base.adjusted_mode.crtc_htotal; 975 width = crtc->config->pipe_src_w; 976 if (WARN_ON(htotal == 0)) 977 htotal = 1; 978 979 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) { 980 /* 981 * FIXME the formula gives values that are 982 * too big for the cursor FIFO, and hence we 983 * would never be able to use cursors. For 984 * now just hardcode the watermark. 985 */ 986 wm = 63; 987 } else { 988 wm = vlv_wm_method2(clock, htotal, width, cpp, 989 dev_priv->wm.pri_latency[level] * 10); 990 } 991 992 return min_t(int, wm, USHRT_MAX); 993 } 994 995 static void vlv_compute_fifo(struct intel_crtc *crtc) 996 { 997 struct drm_device *dev = crtc->base.dev; 998 struct vlv_wm_state *wm_state = &crtc->wm_state; 999 struct intel_plane *plane; 1000 unsigned int total_rate = 0; 1001 const int fifo_size = 512 - 1; 1002 int fifo_extra, fifo_left = fifo_size; 1003 1004 for_each_intel_plane_on_crtc(dev, crtc, plane) { 1005 struct intel_plane_state *state = 1006 to_intel_plane_state(plane->base.state); 1007 1008 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) 1009 continue; 1010 1011 if (state->base.visible) { 1012 wm_state->num_active_planes++; 1013 total_rate += drm_format_plane_cpp(state->base.fb->pixel_format, 0); 1014 } 1015 } 1016 1017 for_each_intel_plane_on_crtc(dev, crtc, plane) { 1018 struct intel_plane_state *state = 1019 to_intel_plane_state(plane->base.state); 1020 unsigned int rate; 1021 1022 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) { 1023 plane->wm.fifo_size = 63; 1024 continue; 1025 } 1026 1027 if (!state->base.visible) { 1028 plane->wm.fifo_size = 0; 1029 continue; 1030 } 1031 1032 rate = drm_format_plane_cpp(state->base.fb->pixel_format, 0); 1033 plane->wm.fifo_size = fifo_size * rate / total_rate; 1034 fifo_left -= plane->wm.fifo_size; 1035 } 1036 1037 fifo_extra = DIV_ROUND_UP(fifo_left, wm_state->num_active_planes ?: 1); 1038 1039 /* spread the remainder evenly */ 1040 for_each_intel_plane_on_crtc(dev, crtc, plane) { 1041 int plane_extra; 1042 1043 if (fifo_left == 0) 1044 break; 1045 1046 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) 1047 continue; 1048 1049 /* give it all to the first plane if none are active */ 1050 if (plane->wm.fifo_size == 0 && 1051 wm_state->num_active_planes) 1052 continue; 1053 1054 plane_extra = min(fifo_extra, fifo_left); 1055 plane->wm.fifo_size += plane_extra; 1056 fifo_left -= plane_extra; 1057 } 1058 1059 WARN_ON(fifo_left != 0); 1060 } 1061 1062 static void vlv_invert_wms(struct intel_crtc *crtc) 1063 { 1064 struct vlv_wm_state *wm_state = &crtc->wm_state; 1065 int level; 1066 1067 for (level = 0; level < wm_state->num_levels; level++) { 1068 struct drm_device *dev = crtc->base.dev; 1069 const int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1; 1070 struct intel_plane *plane; 1071 1072 wm_state->sr[level].plane = sr_fifo_size - wm_state->sr[level].plane; 1073 wm_state->sr[level].cursor = 63 - wm_state->sr[level].cursor; 1074 1075 for_each_intel_plane_on_crtc(dev, crtc, plane) { 1076 switch (plane->base.type) { 1077 int sprite; 1078 case DRM_PLANE_TYPE_CURSOR: 1079 wm_state->wm[level].cursor = plane->wm.fifo_size - 1080 wm_state->wm[level].cursor; 1081 break; 1082 case DRM_PLANE_TYPE_PRIMARY: 1083 wm_state->wm[level].primary = plane->wm.fifo_size - 1084 wm_state->wm[level].primary; 1085 break; 1086 case DRM_PLANE_TYPE_OVERLAY: 1087 sprite = plane->plane; 1088 wm_state->wm[level].sprite[sprite] = plane->wm.fifo_size - 1089 wm_state->wm[level].sprite[sprite]; 1090 break; 1091 } 1092 } 1093 } 1094 } 1095 1096 static void vlv_compute_wm(struct intel_crtc *crtc) 1097 { 1098 struct drm_device *dev = crtc->base.dev; 1099 struct vlv_wm_state *wm_state = &crtc->wm_state; 1100 struct intel_plane *plane; 1101 int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1; 1102 int level; 1103 1104 memset(wm_state, 0, sizeof(*wm_state)); 1105 1106 wm_state->cxsr = crtc->pipe != PIPE_C && crtc->wm.cxsr_allowed; 1107 wm_state->num_levels = to_i915(dev)->wm.max_level + 1; 1108 1109 wm_state->num_active_planes = 0; 1110 1111 vlv_compute_fifo(crtc); 1112 1113 if (wm_state->num_active_planes != 1) 1114 wm_state->cxsr = false; 1115 1116 if (wm_state->cxsr) { 1117 for (level = 0; level < wm_state->num_levels; level++) { 1118 wm_state->sr[level].plane = sr_fifo_size; 1119 wm_state->sr[level].cursor = 63; 1120 } 1121 } 1122 1123 for_each_intel_plane_on_crtc(dev, crtc, plane) { 1124 struct intel_plane_state *state = 1125 to_intel_plane_state(plane->base.state); 1126 1127 if (!state->base.visible) 1128 continue; 1129 1130 /* normal watermarks */ 1131 for (level = 0; level < wm_state->num_levels; level++) { 1132 int wm = vlv_compute_wm_level(plane, crtc, state, level); 1133 int max_wm = plane->base.type == DRM_PLANE_TYPE_CURSOR ? 63 : 511; 1134 1135 /* hack */ 1136 if (WARN_ON(level == 0 && wm > max_wm)) 1137 wm = max_wm; 1138 1139 if (wm > plane->wm.fifo_size) 1140 break; 1141 1142 switch (plane->base.type) { 1143 int sprite; 1144 case DRM_PLANE_TYPE_CURSOR: 1145 wm_state->wm[level].cursor = wm; 1146 break; 1147 case DRM_PLANE_TYPE_PRIMARY: 1148 wm_state->wm[level].primary = wm; 1149 break; 1150 case DRM_PLANE_TYPE_OVERLAY: 1151 sprite = plane->plane; 1152 wm_state->wm[level].sprite[sprite] = wm; 1153 break; 1154 } 1155 } 1156 1157 wm_state->num_levels = level; 1158 1159 if (!wm_state->cxsr) 1160 continue; 1161 1162 /* maxfifo watermarks */ 1163 switch (plane->base.type) { 1164 int sprite, level; 1165 case DRM_PLANE_TYPE_CURSOR: 1166 for (level = 0; level < wm_state->num_levels; level++) 1167 wm_state->sr[level].cursor = 1168 wm_state->wm[level].cursor; 1169 break; 1170 case DRM_PLANE_TYPE_PRIMARY: 1171 for (level = 0; level < wm_state->num_levels; level++) 1172 wm_state->sr[level].plane = 1173 min(wm_state->sr[level].plane, 1174 wm_state->wm[level].primary); 1175 break; 1176 case DRM_PLANE_TYPE_OVERLAY: 1177 sprite = plane->plane; 1178 for (level = 0; level < wm_state->num_levels; level++) 1179 wm_state->sr[level].plane = 1180 min(wm_state->sr[level].plane, 1181 wm_state->wm[level].sprite[sprite]); 1182 break; 1183 } 1184 } 1185 1186 /* clear any (partially) filled invalid levels */ 1187 for (level = wm_state->num_levels; level < to_i915(dev)->wm.max_level + 1; level++) { 1188 memset(&wm_state->wm[level], 0, sizeof(wm_state->wm[level])); 1189 memset(&wm_state->sr[level], 0, sizeof(wm_state->sr[level])); 1190 } 1191 1192 vlv_invert_wms(crtc); 1193 } 1194 1195 #define VLV_FIFO(plane, value) \ 1196 (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV) 1197 1198 static void vlv_pipe_set_fifo_size(struct intel_crtc *crtc) 1199 { 1200 struct drm_device *dev = crtc->base.dev; 1201 struct drm_i915_private *dev_priv = to_i915(dev); 1202 struct intel_plane *plane; 1203 int sprite0_start = 0, sprite1_start = 0, fifo_size = 0; 1204 1205 for_each_intel_plane_on_crtc(dev, crtc, plane) { 1206 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) { 1207 WARN_ON(plane->wm.fifo_size != 63); 1208 continue; 1209 } 1210 1211 if (plane->base.type == DRM_PLANE_TYPE_PRIMARY) 1212 sprite0_start = plane->wm.fifo_size; 1213 else if (plane->plane == 0) 1214 sprite1_start = sprite0_start + plane->wm.fifo_size; 1215 else 1216 fifo_size = sprite1_start + plane->wm.fifo_size; 1217 } 1218 1219 WARN_ON(fifo_size != 512 - 1); 1220 1221 DRM_DEBUG_KMS("Pipe %c FIFO split %d / %d / %d\n", 1222 pipe_name(crtc->pipe), sprite0_start, 1223 sprite1_start, fifo_size); 1224 1225 switch (crtc->pipe) { 1226 uint32_t dsparb, dsparb2, dsparb3; 1227 case PIPE_A: 1228 dsparb = I915_READ(DSPARB); 1229 dsparb2 = I915_READ(DSPARB2); 1230 1231 dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) | 1232 VLV_FIFO(SPRITEB, 0xff)); 1233 dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) | 1234 VLV_FIFO(SPRITEB, sprite1_start)); 1235 1236 dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) | 1237 VLV_FIFO(SPRITEB_HI, 0x1)); 1238 dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) | 1239 VLV_FIFO(SPRITEB_HI, sprite1_start >> 8)); 1240 1241 I915_WRITE(DSPARB, dsparb); 1242 I915_WRITE(DSPARB2, dsparb2); 1243 break; 1244 case PIPE_B: 1245 dsparb = I915_READ(DSPARB); 1246 dsparb2 = I915_READ(DSPARB2); 1247 1248 dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) | 1249 VLV_FIFO(SPRITED, 0xff)); 1250 dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) | 1251 VLV_FIFO(SPRITED, sprite1_start)); 1252 1253 dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) | 1254 VLV_FIFO(SPRITED_HI, 0xff)); 1255 dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) | 1256 VLV_FIFO(SPRITED_HI, sprite1_start >> 8)); 1257 1258 I915_WRITE(DSPARB, dsparb); 1259 I915_WRITE(DSPARB2, dsparb2); 1260 break; 1261 case PIPE_C: 1262 dsparb3 = I915_READ(DSPARB3); 1263 dsparb2 = I915_READ(DSPARB2); 1264 1265 dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) | 1266 VLV_FIFO(SPRITEF, 0xff)); 1267 dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) | 1268 VLV_FIFO(SPRITEF, sprite1_start)); 1269 1270 dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) | 1271 VLV_FIFO(SPRITEF_HI, 0xff)); 1272 dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) | 1273 VLV_FIFO(SPRITEF_HI, sprite1_start >> 8)); 1274 1275 I915_WRITE(DSPARB3, dsparb3); 1276 I915_WRITE(DSPARB2, dsparb2); 1277 break; 1278 default: 1279 break; 1280 } 1281 } 1282 1283 #undef VLV_FIFO 1284 1285 static void vlv_merge_wm(struct drm_device *dev, 1286 struct vlv_wm_values *wm) 1287 { 1288 struct intel_crtc *crtc; 1289 int num_active_crtcs = 0; 1290 1291 wm->level = to_i915(dev)->wm.max_level; 1292 wm->cxsr = true; 1293 1294 for_each_intel_crtc(dev, crtc) { 1295 const struct vlv_wm_state *wm_state = &crtc->wm_state; 1296 1297 if (!crtc->active) 1298 continue; 1299 1300 if (!wm_state->cxsr) 1301 wm->cxsr = false; 1302 1303 num_active_crtcs++; 1304 wm->level = min_t(int, wm->level, wm_state->num_levels - 1); 1305 } 1306 1307 if (num_active_crtcs != 1) 1308 wm->cxsr = false; 1309 1310 if (num_active_crtcs > 1) 1311 wm->level = VLV_WM_LEVEL_PM2; 1312 1313 for_each_intel_crtc(dev, crtc) { 1314 struct vlv_wm_state *wm_state = &crtc->wm_state; 1315 enum i915_pipe pipe = crtc->pipe; 1316 1317 if (!crtc->active) 1318 continue; 1319 1320 wm->pipe[pipe] = wm_state->wm[wm->level]; 1321 if (wm->cxsr) 1322 wm->sr = wm_state->sr[wm->level]; 1323 1324 wm->ddl[pipe].primary = DDL_PRECISION_HIGH | 2; 1325 wm->ddl[pipe].sprite[0] = DDL_PRECISION_HIGH | 2; 1326 wm->ddl[pipe].sprite[1] = DDL_PRECISION_HIGH | 2; 1327 wm->ddl[pipe].cursor = DDL_PRECISION_HIGH | 2; 1328 } 1329 } 1330 1331 static void vlv_update_wm(struct drm_crtc *crtc) 1332 { 1333 struct drm_device *dev = crtc->dev; 1334 struct drm_i915_private *dev_priv = to_i915(dev); 1335 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 1336 enum i915_pipe pipe = intel_crtc->pipe; 1337 struct vlv_wm_values wm = {}; 1338 1339 vlv_compute_wm(intel_crtc); 1340 vlv_merge_wm(dev, &wm); 1341 1342 if (memcmp(&dev_priv->wm.vlv, &wm, sizeof(wm)) == 0) { 1343 /* FIXME should be part of crtc atomic commit */ 1344 vlv_pipe_set_fifo_size(intel_crtc); 1345 return; 1346 } 1347 1348 if (wm.level < VLV_WM_LEVEL_DDR_DVFS && 1349 dev_priv->wm.vlv.level >= VLV_WM_LEVEL_DDR_DVFS) 1350 chv_set_memory_dvfs(dev_priv, false); 1351 1352 if (wm.level < VLV_WM_LEVEL_PM5 && 1353 dev_priv->wm.vlv.level >= VLV_WM_LEVEL_PM5) 1354 chv_set_memory_pm5(dev_priv, false); 1355 1356 if (!wm.cxsr && dev_priv->wm.vlv.cxsr) 1357 intel_set_memory_cxsr(dev_priv, false); 1358 1359 /* FIXME should be part of crtc atomic commit */ 1360 vlv_pipe_set_fifo_size(intel_crtc); 1361 1362 vlv_write_wm_values(intel_crtc, &wm); 1363 1364 DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, " 1365 "sprite0=%d, sprite1=%d, SR: plane=%d, cursor=%d level=%d cxsr=%d\n", 1366 pipe_name(pipe), wm.pipe[pipe].primary, wm.pipe[pipe].cursor, 1367 wm.pipe[pipe].sprite[0], wm.pipe[pipe].sprite[1], 1368 wm.sr.plane, wm.sr.cursor, wm.level, wm.cxsr); 1369 1370 if (wm.cxsr && !dev_priv->wm.vlv.cxsr) 1371 intel_set_memory_cxsr(dev_priv, true); 1372 1373 if (wm.level >= VLV_WM_LEVEL_PM5 && 1374 dev_priv->wm.vlv.level < VLV_WM_LEVEL_PM5) 1375 chv_set_memory_pm5(dev_priv, true); 1376 1377 if (wm.level >= VLV_WM_LEVEL_DDR_DVFS && 1378 dev_priv->wm.vlv.level < VLV_WM_LEVEL_DDR_DVFS) 1379 chv_set_memory_dvfs(dev_priv, true); 1380 1381 dev_priv->wm.vlv = wm; 1382 } 1383 1384 #define single_plane_enabled(mask) is_power_of_2(mask) 1385 1386 static void g4x_update_wm(struct drm_crtc *crtc) 1387 { 1388 struct drm_device *dev = crtc->dev; 1389 static const int sr_latency_ns = 12000; 1390 struct drm_i915_private *dev_priv = to_i915(dev); 1391 int planea_wm, planeb_wm, cursora_wm, cursorb_wm; 1392 int plane_sr, cursor_sr; 1393 unsigned int enabled = 0; 1394 bool cxsr_enabled; 1395 1396 if (g4x_compute_wm0(dev, PIPE_A, 1397 &g4x_wm_info, pessimal_latency_ns, 1398 &g4x_cursor_wm_info, pessimal_latency_ns, 1399 &planea_wm, &cursora_wm)) 1400 enabled |= 1 << PIPE_A; 1401 1402 if (g4x_compute_wm0(dev, PIPE_B, 1403 &g4x_wm_info, pessimal_latency_ns, 1404 &g4x_cursor_wm_info, pessimal_latency_ns, 1405 &planeb_wm, &cursorb_wm)) 1406 enabled |= 1 << PIPE_B; 1407 1408 if (single_plane_enabled(enabled) && 1409 g4x_compute_srwm(dev, ffs(enabled) - 1, 1410 sr_latency_ns, 1411 &g4x_wm_info, 1412 &g4x_cursor_wm_info, 1413 &plane_sr, &cursor_sr)) { 1414 cxsr_enabled = true; 1415 } else { 1416 cxsr_enabled = false; 1417 intel_set_memory_cxsr(dev_priv, false); 1418 plane_sr = cursor_sr = 0; 1419 } 1420 1421 DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, " 1422 "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", 1423 planea_wm, cursora_wm, 1424 planeb_wm, cursorb_wm, 1425 plane_sr, cursor_sr); 1426 1427 I915_WRITE(DSPFW1, 1428 FW_WM(plane_sr, SR) | 1429 FW_WM(cursorb_wm, CURSORB) | 1430 FW_WM(planeb_wm, PLANEB) | 1431 FW_WM(planea_wm, PLANEA)); 1432 I915_WRITE(DSPFW2, 1433 (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) | 1434 FW_WM(cursora_wm, CURSORA)); 1435 /* HPLL off in SR has some issues on G4x... disable it */ 1436 I915_WRITE(DSPFW3, 1437 (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) | 1438 FW_WM(cursor_sr, CURSOR_SR)); 1439 1440 if (cxsr_enabled) 1441 intel_set_memory_cxsr(dev_priv, true); 1442 } 1443 1444 static void i965_update_wm(struct drm_crtc *unused_crtc) 1445 { 1446 struct drm_device *dev = unused_crtc->dev; 1447 struct drm_i915_private *dev_priv = to_i915(dev); 1448 struct drm_crtc *crtc; 1449 int srwm = 1; 1450 int cursor_sr = 16; 1451 bool cxsr_enabled; 1452 1453 /* Calc sr entries for one plane configs */ 1454 crtc = single_enabled_crtc(dev); 1455 if (crtc) { 1456 /* self-refresh has much higher latency */ 1457 static const int sr_latency_ns = 12000; 1458 const struct drm_display_mode *adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 1459 int clock = adjusted_mode->crtc_clock; 1460 int htotal = adjusted_mode->crtc_htotal; 1461 int hdisplay = to_intel_crtc(crtc)->config->pipe_src_w; 1462 int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); 1463 unsigned long line_time_us; 1464 int entries; 1465 1466 line_time_us = max(htotal * 1000 / clock, 1); 1467 1468 /* Use ns/us then divide to preserve precision */ 1469 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * 1470 cpp * hdisplay; 1471 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE); 1472 srwm = I965_FIFO_SIZE - entries; 1473 if (srwm < 0) 1474 srwm = 1; 1475 srwm &= 0x1ff; 1476 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n", 1477 entries, srwm); 1478 1479 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * 1480 cpp * crtc->cursor->state->crtc_w; 1481 entries = DIV_ROUND_UP(entries, 1482 i965_cursor_wm_info.cacheline_size); 1483 cursor_sr = i965_cursor_wm_info.fifo_size - 1484 (entries + i965_cursor_wm_info.guard_size); 1485 1486 if (cursor_sr > i965_cursor_wm_info.max_wm) 1487 cursor_sr = i965_cursor_wm_info.max_wm; 1488 1489 DRM_DEBUG_KMS("self-refresh watermark: display plane %d " 1490 "cursor %d\n", srwm, cursor_sr); 1491 1492 cxsr_enabled = true; 1493 } else { 1494 cxsr_enabled = false; 1495 /* Turn off self refresh if both pipes are enabled */ 1496 intel_set_memory_cxsr(dev_priv, false); 1497 } 1498 1499 DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n", 1500 srwm); 1501 1502 /* 965 has limitations... */ 1503 I915_WRITE(DSPFW1, FW_WM(srwm, SR) | 1504 FW_WM(8, CURSORB) | 1505 FW_WM(8, PLANEB) | 1506 FW_WM(8, PLANEA)); 1507 I915_WRITE(DSPFW2, FW_WM(8, CURSORA) | 1508 FW_WM(8, PLANEC_OLD)); 1509 /* update cursor SR watermark */ 1510 I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR)); 1511 1512 if (cxsr_enabled) 1513 intel_set_memory_cxsr(dev_priv, true); 1514 } 1515 1516 #undef FW_WM 1517 1518 static void i9xx_update_wm(struct drm_crtc *unused_crtc) 1519 { 1520 struct drm_device *dev = unused_crtc->dev; 1521 struct drm_i915_private *dev_priv = to_i915(dev); 1522 const struct intel_watermark_params *wm_info; 1523 uint32_t fwater_lo; 1524 uint32_t fwater_hi; 1525 int cwm, srwm = 1; 1526 int fifo_size; 1527 int planea_wm, planeb_wm; 1528 struct drm_crtc *crtc, *enabled = NULL; 1529 1530 if (IS_I945GM(dev)) 1531 wm_info = &i945_wm_info; 1532 else if (!IS_GEN2(dev_priv)) 1533 wm_info = &i915_wm_info; 1534 else 1535 wm_info = &i830_a_wm_info; 1536 1537 fifo_size = dev_priv->display.get_fifo_size(dev, 0); 1538 crtc = intel_get_crtc_for_plane(dev, 0); 1539 if (intel_crtc_active(crtc)) { 1540 const struct drm_display_mode *adjusted_mode; 1541 int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); 1542 if (IS_GEN2(dev_priv)) 1543 cpp = 4; 1544 1545 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 1546 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock, 1547 wm_info, fifo_size, cpp, 1548 pessimal_latency_ns); 1549 enabled = crtc; 1550 } else { 1551 planea_wm = fifo_size - wm_info->guard_size; 1552 if (planea_wm > (long)wm_info->max_wm) 1553 planea_wm = wm_info->max_wm; 1554 } 1555 1556 if (IS_GEN2(dev_priv)) 1557 wm_info = &i830_bc_wm_info; 1558 1559 fifo_size = dev_priv->display.get_fifo_size(dev, 1); 1560 crtc = intel_get_crtc_for_plane(dev, 1); 1561 if (intel_crtc_active(crtc)) { 1562 const struct drm_display_mode *adjusted_mode; 1563 int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); 1564 if (IS_GEN2(dev_priv)) 1565 cpp = 4; 1566 1567 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 1568 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock, 1569 wm_info, fifo_size, cpp, 1570 pessimal_latency_ns); 1571 if (enabled == NULL) 1572 enabled = crtc; 1573 else 1574 enabled = NULL; 1575 } else { 1576 planeb_wm = fifo_size - wm_info->guard_size; 1577 if (planeb_wm > (long)wm_info->max_wm) 1578 planeb_wm = wm_info->max_wm; 1579 } 1580 1581 DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm); 1582 1583 if (IS_I915GM(dev_priv) && enabled) { 1584 struct drm_i915_gem_object *obj; 1585 1586 obj = intel_fb_obj(enabled->primary->state->fb); 1587 1588 /* self-refresh seems busted with untiled */ 1589 if (!i915_gem_object_is_tiled(obj)) 1590 enabled = NULL; 1591 } 1592 1593 /* 1594 * Overlay gets an aggressive default since video jitter is bad. 1595 */ 1596 cwm = 2; 1597 1598 /* Play safe and disable self-refresh before adjusting watermarks. */ 1599 intel_set_memory_cxsr(dev_priv, false); 1600 1601 /* Calc sr entries for one plane configs */ 1602 if (HAS_FW_BLC(dev) && enabled) { 1603 /* self-refresh has much higher latency */ 1604 static const int sr_latency_ns = 6000; 1605 const struct drm_display_mode *adjusted_mode = &to_intel_crtc(enabled)->config->base.adjusted_mode; 1606 int clock = adjusted_mode->crtc_clock; 1607 int htotal = adjusted_mode->crtc_htotal; 1608 int hdisplay = to_intel_crtc(enabled)->config->pipe_src_w; 1609 int cpp = drm_format_plane_cpp(enabled->primary->state->fb->pixel_format, 0); 1610 unsigned long line_time_us; 1611 int entries; 1612 1613 if (IS_I915GM(dev_priv) || IS_I945GM(dev_priv)) 1614 cpp = 4; 1615 1616 line_time_us = max(htotal * 1000 / clock, 1); 1617 1618 /* Use ns/us then divide to preserve precision */ 1619 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * 1620 cpp * hdisplay; 1621 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size); 1622 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries); 1623 srwm = wm_info->fifo_size - entries; 1624 if (srwm < 0) 1625 srwm = 1; 1626 1627 if (IS_I945G(dev_priv) || IS_I945GM(dev_priv)) 1628 I915_WRITE(FW_BLC_SELF, 1629 FW_BLC_SELF_FIFO_MASK | (srwm & 0xff)); 1630 else 1631 I915_WRITE(FW_BLC_SELF, srwm & 0x3f); 1632 } 1633 1634 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n", 1635 planea_wm, planeb_wm, cwm, srwm); 1636 1637 fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f); 1638 fwater_hi = (cwm & 0x1f); 1639 1640 /* Set request length to 8 cachelines per fetch */ 1641 fwater_lo = fwater_lo | (1 << 24) | (1 << 8); 1642 fwater_hi = fwater_hi | (1 << 8); 1643 1644 I915_WRITE(FW_BLC, fwater_lo); 1645 I915_WRITE(FW_BLC2, fwater_hi); 1646 1647 if (enabled) 1648 intel_set_memory_cxsr(dev_priv, true); 1649 } 1650 1651 static void i845_update_wm(struct drm_crtc *unused_crtc) 1652 { 1653 struct drm_device *dev = unused_crtc->dev; 1654 struct drm_i915_private *dev_priv = to_i915(dev); 1655 struct drm_crtc *crtc; 1656 const struct drm_display_mode *adjusted_mode; 1657 uint32_t fwater_lo; 1658 int planea_wm; 1659 1660 crtc = single_enabled_crtc(dev); 1661 if (crtc == NULL) 1662 return; 1663 1664 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 1665 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock, 1666 &i845_wm_info, 1667 dev_priv->display.get_fifo_size(dev, 0), 1668 4, pessimal_latency_ns); 1669 fwater_lo = I915_READ(FW_BLC) & ~0xfff; 1670 fwater_lo |= (3<<8) | planea_wm; 1671 1672 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm); 1673 1674 I915_WRITE(FW_BLC, fwater_lo); 1675 } 1676 1677 uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) 1678 { 1679 uint32_t pixel_rate; 1680 1681 pixel_rate = pipe_config->base.adjusted_mode.crtc_clock; 1682 1683 /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to 1684 * adjust the pixel_rate here. */ 1685 1686 if (pipe_config->pch_pfit.enabled) { 1687 uint64_t pipe_w, pipe_h, pfit_w, pfit_h; 1688 uint32_t pfit_size = pipe_config->pch_pfit.size; 1689 1690 pipe_w = pipe_config->pipe_src_w; 1691 pipe_h = pipe_config->pipe_src_h; 1692 1693 pfit_w = (pfit_size >> 16) & 0xFFFF; 1694 pfit_h = pfit_size & 0xFFFF; 1695 if (pipe_w < pfit_w) 1696 pipe_w = pfit_w; 1697 if (pipe_h < pfit_h) 1698 pipe_h = pfit_h; 1699 1700 if (WARN_ON(!pfit_w || !pfit_h)) 1701 return pixel_rate; 1702 1703 pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h, 1704 pfit_w * pfit_h); 1705 } 1706 1707 return pixel_rate; 1708 } 1709 1710 /* latency must be in 0.1us units. */ 1711 static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t cpp, uint32_t latency) 1712 { 1713 uint64_t ret; 1714 1715 if (WARN(latency == 0, "Latency value missing\n")) 1716 return UINT_MAX; 1717 1718 ret = (uint64_t) pixel_rate * cpp * latency; 1719 ret = DIV_ROUND_UP_ULL(ret, 64 * 10000) + 2; 1720 1721 return ret; 1722 } 1723 1724 /* latency must be in 0.1us units. */ 1725 static uint32_t ilk_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal, 1726 uint32_t horiz_pixels, uint8_t cpp, 1727 uint32_t latency) 1728 { 1729 uint32_t ret; 1730 1731 if (WARN(latency == 0, "Latency value missing\n")) 1732 return UINT_MAX; 1733 if (WARN_ON(!pipe_htotal)) 1734 return UINT_MAX; 1735 1736 ret = (latency * pixel_rate) / (pipe_htotal * 10000); 1737 ret = (ret + 1) * horiz_pixels * cpp; 1738 ret = DIV_ROUND_UP(ret, 64) + 2; 1739 return ret; 1740 } 1741 1742 static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels, 1743 uint8_t cpp) 1744 { 1745 /* 1746 * Neither of these should be possible since this function shouldn't be 1747 * called if the CRTC is off or the plane is invisible. But let's be 1748 * extra paranoid to avoid a potential divide-by-zero if we screw up 1749 * elsewhere in the driver. 1750 */ 1751 if (WARN_ON(!cpp)) 1752 return 0; 1753 if (WARN_ON(!horiz_pixels)) 1754 return 0; 1755 1756 return DIV_ROUND_UP(pri_val * 64, horiz_pixels * cpp) + 2; 1757 } 1758 1759 struct ilk_wm_maximums { 1760 uint16_t pri; 1761 uint16_t spr; 1762 uint16_t cur; 1763 uint16_t fbc; 1764 }; 1765 1766 /* 1767 * For both WM_PIPE and WM_LP. 1768 * mem_value must be in 0.1us units. 1769 */ 1770 static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate, 1771 const struct intel_plane_state *pstate, 1772 uint32_t mem_value, 1773 bool is_lp) 1774 { 1775 int cpp = pstate->base.fb ? 1776 drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0; 1777 uint32_t method1, method2; 1778 1779 if (!cstate->base.active || !pstate->base.visible) 1780 return 0; 1781 1782 method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value); 1783 1784 if (!is_lp) 1785 return method1; 1786 1787 method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), 1788 cstate->base.adjusted_mode.crtc_htotal, 1789 drm_rect_width(&pstate->base.dst), 1790 cpp, mem_value); 1791 1792 return min(method1, method2); 1793 } 1794 1795 /* 1796 * For both WM_PIPE and WM_LP. 1797 * mem_value must be in 0.1us units. 1798 */ 1799 static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate, 1800 const struct intel_plane_state *pstate, 1801 uint32_t mem_value) 1802 { 1803 int cpp = pstate->base.fb ? 1804 drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0; 1805 uint32_t method1, method2; 1806 1807 if (!cstate->base.active || !pstate->base.visible) 1808 return 0; 1809 1810 method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value); 1811 method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), 1812 cstate->base.adjusted_mode.crtc_htotal, 1813 drm_rect_width(&pstate->base.dst), 1814 cpp, mem_value); 1815 return min(method1, method2); 1816 } 1817 1818 /* 1819 * For both WM_PIPE and WM_LP. 1820 * mem_value must be in 0.1us units. 1821 */ 1822 static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate, 1823 const struct intel_plane_state *pstate, 1824 uint32_t mem_value) 1825 { 1826 /* 1827 * We treat the cursor plane as always-on for the purposes of watermark 1828 * calculation. Until we have two-stage watermark programming merged, 1829 * this is necessary to avoid flickering. 1830 */ 1831 int cpp = 4; 1832 int width = pstate->base.visible ? pstate->base.crtc_w : 64; 1833 1834 if (!cstate->base.active) 1835 return 0; 1836 1837 return ilk_wm_method2(ilk_pipe_pixel_rate(cstate), 1838 cstate->base.adjusted_mode.crtc_htotal, 1839 width, cpp, mem_value); 1840 } 1841 1842 /* Only for WM_LP. */ 1843 static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate, 1844 const struct intel_plane_state *pstate, 1845 uint32_t pri_val) 1846 { 1847 int cpp = pstate->base.fb ? 1848 drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0; 1849 1850 if (!cstate->base.active || !pstate->base.visible) 1851 return 0; 1852 1853 return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->base.dst), cpp); 1854 } 1855 1856 static unsigned int ilk_display_fifo_size(const struct drm_device *dev) 1857 { 1858 if (INTEL_INFO(dev)->gen >= 8) 1859 return 3072; 1860 else if (INTEL_INFO(dev)->gen >= 7) 1861 return 768; 1862 else 1863 return 512; 1864 } 1865 1866 static unsigned int ilk_plane_wm_reg_max(const struct drm_device *dev, 1867 int level, bool is_sprite) 1868 { 1869 if (INTEL_INFO(dev)->gen >= 8) 1870 /* BDW primary/sprite plane watermarks */ 1871 return level == 0 ? 255 : 2047; 1872 else if (INTEL_INFO(dev)->gen >= 7) 1873 /* IVB/HSW primary/sprite plane watermarks */ 1874 return level == 0 ? 127 : 1023; 1875 else if (!is_sprite) 1876 /* ILK/SNB primary plane watermarks */ 1877 return level == 0 ? 127 : 511; 1878 else 1879 /* ILK/SNB sprite plane watermarks */ 1880 return level == 0 ? 63 : 255; 1881 } 1882 1883 static unsigned int ilk_cursor_wm_reg_max(const struct drm_device *dev, 1884 int level) 1885 { 1886 if (INTEL_INFO(dev)->gen >= 7) 1887 return level == 0 ? 63 : 255; 1888 else 1889 return level == 0 ? 31 : 63; 1890 } 1891 1892 static unsigned int ilk_fbc_wm_reg_max(const struct drm_device *dev) 1893 { 1894 if (INTEL_INFO(dev)->gen >= 8) 1895 return 31; 1896 else 1897 return 15; 1898 } 1899 1900 /* Calculate the maximum primary/sprite plane watermark */ 1901 static unsigned int ilk_plane_wm_max(const struct drm_device *dev, 1902 int level, 1903 const struct intel_wm_config *config, 1904 enum intel_ddb_partitioning ddb_partitioning, 1905 bool is_sprite) 1906 { 1907 unsigned int fifo_size = ilk_display_fifo_size(dev); 1908 1909 /* if sprites aren't enabled, sprites get nothing */ 1910 if (is_sprite && !config->sprites_enabled) 1911 return 0; 1912 1913 /* HSW allows LP1+ watermarks even with multiple pipes */ 1914 if (level == 0 || config->num_pipes_active > 1) { 1915 fifo_size /= INTEL_INFO(dev)->num_pipes; 1916 1917 /* 1918 * For some reason the non self refresh 1919 * FIFO size is only half of the self 1920 * refresh FIFO size on ILK/SNB. 1921 */ 1922 if (INTEL_INFO(dev)->gen <= 6) 1923 fifo_size /= 2; 1924 } 1925 1926 if (config->sprites_enabled) { 1927 /* level 0 is always calculated with 1:1 split */ 1928 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) { 1929 if (is_sprite) 1930 fifo_size *= 5; 1931 fifo_size /= 6; 1932 } else { 1933 fifo_size /= 2; 1934 } 1935 } 1936 1937 /* clamp to max that the registers can hold */ 1938 return min(fifo_size, ilk_plane_wm_reg_max(dev, level, is_sprite)); 1939 } 1940 1941 /* Calculate the maximum cursor plane watermark */ 1942 static unsigned int ilk_cursor_wm_max(const struct drm_device *dev, 1943 int level, 1944 const struct intel_wm_config *config) 1945 { 1946 /* HSW LP1+ watermarks w/ multiple pipes */ 1947 if (level > 0 && config->num_pipes_active > 1) 1948 return 64; 1949 1950 /* otherwise just report max that registers can hold */ 1951 return ilk_cursor_wm_reg_max(dev, level); 1952 } 1953 1954 static void ilk_compute_wm_maximums(const struct drm_device *dev, 1955 int level, 1956 const struct intel_wm_config *config, 1957 enum intel_ddb_partitioning ddb_partitioning, 1958 struct ilk_wm_maximums *max) 1959 { 1960 max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false); 1961 max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true); 1962 max->cur = ilk_cursor_wm_max(dev, level, config); 1963 max->fbc = ilk_fbc_wm_reg_max(dev); 1964 } 1965 1966 static void ilk_compute_wm_reg_maximums(struct drm_device *dev, 1967 int level, 1968 struct ilk_wm_maximums *max) 1969 { 1970 max->pri = ilk_plane_wm_reg_max(dev, level, false); 1971 max->spr = ilk_plane_wm_reg_max(dev, level, true); 1972 max->cur = ilk_cursor_wm_reg_max(dev, level); 1973 max->fbc = ilk_fbc_wm_reg_max(dev); 1974 } 1975 1976 static bool ilk_validate_wm_level(int level, 1977 const struct ilk_wm_maximums *max, 1978 struct intel_wm_level *result) 1979 { 1980 bool ret; 1981 1982 /* already determined to be invalid? */ 1983 if (!result->enable) 1984 return false; 1985 1986 result->enable = result->pri_val <= max->pri && 1987 result->spr_val <= max->spr && 1988 result->cur_val <= max->cur; 1989 1990 ret = result->enable; 1991 1992 /* 1993 * HACK until we can pre-compute everything, 1994 * and thus fail gracefully if LP0 watermarks 1995 * are exceeded... 1996 */ 1997 if (level == 0 && !result->enable) { 1998 if (result->pri_val > max->pri) 1999 DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n", 2000 level, result->pri_val, max->pri); 2001 if (result->spr_val > max->spr) 2002 DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n", 2003 level, result->spr_val, max->spr); 2004 if (result->cur_val > max->cur) 2005 DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n", 2006 level, result->cur_val, max->cur); 2007 2008 result->pri_val = min_t(uint32_t, result->pri_val, max->pri); 2009 result->spr_val = min_t(uint32_t, result->spr_val, max->spr); 2010 result->cur_val = min_t(uint32_t, result->cur_val, max->cur); 2011 result->enable = true; 2012 } 2013 2014 return ret; 2015 } 2016 2017 static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv, 2018 const struct intel_crtc *intel_crtc, 2019 int level, 2020 struct intel_crtc_state *cstate, 2021 struct intel_plane_state *pristate, 2022 struct intel_plane_state *sprstate, 2023 struct intel_plane_state *curstate, 2024 struct intel_wm_level *result) 2025 { 2026 uint16_t pri_latency = dev_priv->wm.pri_latency[level]; 2027 uint16_t spr_latency = dev_priv->wm.spr_latency[level]; 2028 uint16_t cur_latency = dev_priv->wm.cur_latency[level]; 2029 2030 /* WM1+ latency values stored in 0.5us units */ 2031 if (level > 0) { 2032 pri_latency *= 5; 2033 spr_latency *= 5; 2034 cur_latency *= 5; 2035 } 2036 2037 if (pristate) { 2038 result->pri_val = ilk_compute_pri_wm(cstate, pristate, 2039 pri_latency, level); 2040 result->fbc_val = ilk_compute_fbc_wm(cstate, pristate, result->pri_val); 2041 } 2042 2043 if (sprstate) 2044 result->spr_val = ilk_compute_spr_wm(cstate, sprstate, spr_latency); 2045 2046 if (curstate) 2047 result->cur_val = ilk_compute_cur_wm(cstate, curstate, cur_latency); 2048 2049 result->enable = true; 2050 } 2051 2052 static uint32_t 2053 hsw_compute_linetime_wm(const struct intel_crtc_state *cstate) 2054 { 2055 const struct intel_atomic_state *intel_state = 2056 to_intel_atomic_state(cstate->base.state); 2057 const struct drm_display_mode *adjusted_mode = 2058 &cstate->base.adjusted_mode; 2059 u32 linetime, ips_linetime; 2060 2061 if (!cstate->base.active) 2062 return 0; 2063 if (WARN_ON(adjusted_mode->crtc_clock == 0)) 2064 return 0; 2065 if (WARN_ON(intel_state->cdclk == 0)) 2066 return 0; 2067 2068 /* The WM are computed with base on how long it takes to fill a single 2069 * row at the given clock rate, multiplied by 8. 2070 * */ 2071 linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, 2072 adjusted_mode->crtc_clock); 2073 ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, 2074 intel_state->cdclk); 2075 2076 return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) | 2077 PIPE_WM_LINETIME_TIME(linetime); 2078 } 2079 2080 static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[8]) 2081 { 2082 struct drm_i915_private *dev_priv = to_i915(dev); 2083 2084 if (IS_GEN9(dev_priv)) { 2085 uint32_t val; 2086 int ret, i; 2087 int level, max_level = ilk_wm_max_level(dev_priv); 2088 2089 /* read the first set of memory latencies[0:3] */ 2090 val = 0; /* data0 to be programmed to 0 for first set */ 2091 mutex_lock(&dev_priv->rps.hw_lock); 2092 ret = sandybridge_pcode_read(dev_priv, 2093 GEN9_PCODE_READ_MEM_LATENCY, 2094 &val); 2095 mutex_unlock(&dev_priv->rps.hw_lock); 2096 2097 if (ret) { 2098 DRM_ERROR("SKL Mailbox read error = %d\n", ret); 2099 return; 2100 } 2101 2102 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK; 2103 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) & 2104 GEN9_MEM_LATENCY_LEVEL_MASK; 2105 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) & 2106 GEN9_MEM_LATENCY_LEVEL_MASK; 2107 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) & 2108 GEN9_MEM_LATENCY_LEVEL_MASK; 2109 2110 /* read the second set of memory latencies[4:7] */ 2111 val = 1; /* data0 to be programmed to 1 for second set */ 2112 mutex_lock(&dev_priv->rps.hw_lock); 2113 ret = sandybridge_pcode_read(dev_priv, 2114 GEN9_PCODE_READ_MEM_LATENCY, 2115 &val); 2116 mutex_unlock(&dev_priv->rps.hw_lock); 2117 if (ret) { 2118 DRM_ERROR("SKL Mailbox read error = %d\n", ret); 2119 return; 2120 } 2121 2122 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK; 2123 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) & 2124 GEN9_MEM_LATENCY_LEVEL_MASK; 2125 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) & 2126 GEN9_MEM_LATENCY_LEVEL_MASK; 2127 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) & 2128 GEN9_MEM_LATENCY_LEVEL_MASK; 2129 2130 /* 2131 * If a level n (n > 1) has a 0us latency, all levels m (m >= n) 2132 * need to be disabled. We make sure to sanitize the values out 2133 * of the punit to satisfy this requirement. 2134 */ 2135 for (level = 1; level <= max_level; level++) { 2136 if (wm[level] == 0) { 2137 for (i = level + 1; i <= max_level; i++) 2138 wm[i] = 0; 2139 break; 2140 } 2141 } 2142 2143 /* 2144 * WaWmMemoryReadLatency:skl 2145 * 2146 * punit doesn't take into account the read latency so we need 2147 * to add 2us to the various latency levels we retrieve from the 2148 * punit when level 0 response data us 0us. 2149 */ 2150 if (wm[0] == 0) { 2151 wm[0] += 2; 2152 for (level = 1; level <= max_level; level++) { 2153 if (wm[level] == 0) 2154 break; 2155 wm[level] += 2; 2156 } 2157 } 2158 2159 } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { 2160 uint64_t sskpd = I915_READ64(MCH_SSKPD); 2161 2162 wm[0] = (sskpd >> 56) & 0xFF; 2163 if (wm[0] == 0) 2164 wm[0] = sskpd & 0xF; 2165 wm[1] = (sskpd >> 4) & 0xFF; 2166 wm[2] = (sskpd >> 12) & 0xFF; 2167 wm[3] = (sskpd >> 20) & 0x1FF; 2168 wm[4] = (sskpd >> 32) & 0x1FF; 2169 } else if (INTEL_INFO(dev)->gen >= 6) { 2170 uint32_t sskpd = I915_READ(MCH_SSKPD); 2171 2172 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK; 2173 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK; 2174 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK; 2175 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK; 2176 } else if (INTEL_INFO(dev)->gen >= 5) { 2177 uint32_t mltr = I915_READ(MLTR_ILK); 2178 2179 /* ILK primary LP0 latency is 700 ns */ 2180 wm[0] = 7; 2181 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK; 2182 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK; 2183 } 2184 } 2185 2186 static void intel_fixup_spr_wm_latency(struct drm_i915_private *dev_priv, 2187 uint16_t wm[5]) 2188 { 2189 /* ILK sprite LP0 latency is 1300 ns */ 2190 if (IS_GEN5(dev_priv)) 2191 wm[0] = 13; 2192 } 2193 2194 static void intel_fixup_cur_wm_latency(struct drm_i915_private *dev_priv, 2195 uint16_t wm[5]) 2196 { 2197 /* ILK cursor LP0 latency is 1300 ns */ 2198 if (IS_GEN5(dev_priv)) 2199 wm[0] = 13; 2200 2201 /* WaDoubleCursorLP3Latency:ivb */ 2202 if (IS_IVYBRIDGE(dev_priv)) 2203 wm[3] *= 2; 2204 } 2205 2206 int ilk_wm_max_level(const struct drm_i915_private *dev_priv) 2207 { 2208 /* how many WM levels are we expecting */ 2209 if (INTEL_GEN(dev_priv) >= 9) 2210 return 7; 2211 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 2212 return 4; 2213 else if (INTEL_GEN(dev_priv) >= 6) 2214 return 3; 2215 else 2216 return 2; 2217 } 2218 2219 static void intel_print_wm_latency(struct drm_i915_private *dev_priv, 2220 const char *name, 2221 const uint16_t wm[8]) 2222 { 2223 int level, max_level = ilk_wm_max_level(dev_priv); 2224 2225 for (level = 0; level <= max_level; level++) { 2226 unsigned int latency = wm[level]; 2227 2228 if (latency == 0) { 2229 DRM_ERROR("%s WM%d latency not provided\n", 2230 name, level); 2231 continue; 2232 } 2233 2234 /* 2235 * - latencies are in us on gen9. 2236 * - before then, WM1+ latency values are in 0.5us units 2237 */ 2238 if (IS_GEN9(dev_priv)) 2239 latency *= 10; 2240 else if (level > 0) 2241 latency *= 5; 2242 2243 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n", 2244 name, level, wm[level], 2245 latency / 10, latency % 10); 2246 } 2247 } 2248 2249 static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv, 2250 uint16_t wm[5], uint16_t min) 2251 { 2252 int level, max_level = ilk_wm_max_level(dev_priv); 2253 2254 if (wm[0] >= min) 2255 return false; 2256 2257 wm[0] = max(wm[0], min); 2258 for (level = 1; level <= max_level; level++) 2259 wm[level] = max_t(uint16_t, wm[level], DIV_ROUND_UP(min, 5)); 2260 2261 return true; 2262 } 2263 2264 static void snb_wm_latency_quirk(struct drm_device *dev) 2265 { 2266 struct drm_i915_private *dev_priv = to_i915(dev); 2267 bool changed; 2268 2269 /* 2270 * The BIOS provided WM memory latency values are often 2271 * inadequate for high resolution displays. Adjust them. 2272 */ 2273 changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) | 2274 ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) | 2275 ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12); 2276 2277 if (!changed) 2278 return; 2279 2280 DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n"); 2281 intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency); 2282 intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency); 2283 intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency); 2284 } 2285 2286 static void ilk_setup_wm_latency(struct drm_device *dev) 2287 { 2288 struct drm_i915_private *dev_priv = to_i915(dev); 2289 2290 intel_read_wm_latency(dev, dev_priv->wm.pri_latency); 2291 2292 memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency, 2293 sizeof(dev_priv->wm.pri_latency)); 2294 memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency, 2295 sizeof(dev_priv->wm.pri_latency)); 2296 2297 intel_fixup_spr_wm_latency(dev_priv, dev_priv->wm.spr_latency); 2298 intel_fixup_cur_wm_latency(dev_priv, dev_priv->wm.cur_latency); 2299 2300 intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency); 2301 intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency); 2302 intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency); 2303 2304 if (IS_GEN6(dev_priv)) 2305 snb_wm_latency_quirk(dev); 2306 } 2307 2308 static void skl_setup_wm_latency(struct drm_device *dev) 2309 { 2310 struct drm_i915_private *dev_priv = to_i915(dev); 2311 2312 intel_read_wm_latency(dev, dev_priv->wm.skl_latency); 2313 intel_print_wm_latency(dev_priv, "Gen9 Plane", dev_priv->wm.skl_latency); 2314 } 2315 2316 static bool ilk_validate_pipe_wm(struct drm_device *dev, 2317 struct intel_pipe_wm *pipe_wm) 2318 { 2319 /* LP0 watermark maximums depend on this pipe alone */ 2320 const struct intel_wm_config config = { 2321 .num_pipes_active = 1, 2322 .sprites_enabled = pipe_wm->sprites_enabled, 2323 .sprites_scaled = pipe_wm->sprites_scaled, 2324 }; 2325 struct ilk_wm_maximums max; 2326 2327 /* LP0 watermarks always use 1/2 DDB partitioning */ 2328 ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max); 2329 2330 /* At least LP0 must be valid */ 2331 if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) { 2332 DRM_DEBUG_KMS("LP0 watermark invalid\n"); 2333 return false; 2334 } 2335 2336 return true; 2337 } 2338 2339 /* Compute new watermarks for the pipe */ 2340 static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate) 2341 { 2342 struct drm_atomic_state *state = cstate->base.state; 2343 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 2344 struct intel_pipe_wm *pipe_wm; 2345 struct drm_device *dev = state->dev; 2346 const struct drm_i915_private *dev_priv = to_i915(dev); 2347 struct intel_plane *intel_plane; 2348 struct intel_plane_state *pristate = NULL; 2349 struct intel_plane_state *sprstate = NULL; 2350 struct intel_plane_state *curstate = NULL; 2351 int level, max_level = ilk_wm_max_level(dev_priv), usable_level; 2352 struct ilk_wm_maximums max; 2353 2354 pipe_wm = &cstate->wm.ilk.optimal; 2355 2356 for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { 2357 struct intel_plane_state *ps; 2358 2359 ps = intel_atomic_get_existing_plane_state(state, 2360 intel_plane); 2361 if (!ps) 2362 continue; 2363 2364 if (intel_plane->base.type == DRM_PLANE_TYPE_PRIMARY) 2365 pristate = ps; 2366 else if (intel_plane->base.type == DRM_PLANE_TYPE_OVERLAY) 2367 sprstate = ps; 2368 else if (intel_plane->base.type == DRM_PLANE_TYPE_CURSOR) 2369 curstate = ps; 2370 } 2371 2372 pipe_wm->pipe_enabled = cstate->base.active; 2373 if (sprstate) { 2374 pipe_wm->sprites_enabled = sprstate->base.visible; 2375 pipe_wm->sprites_scaled = sprstate->base.visible && 2376 (drm_rect_width(&sprstate->base.dst) != drm_rect_width(&sprstate->base.src) >> 16 || 2377 drm_rect_height(&sprstate->base.dst) != drm_rect_height(&sprstate->base.src) >> 16); 2378 } 2379 2380 usable_level = max_level; 2381 2382 /* ILK/SNB: LP2+ watermarks only w/o sprites */ 2383 if (INTEL_INFO(dev)->gen <= 6 && pipe_wm->sprites_enabled) 2384 usable_level = 1; 2385 2386 /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */ 2387 if (pipe_wm->sprites_scaled) 2388 usable_level = 0; 2389 2390 ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate, 2391 pristate, sprstate, curstate, &pipe_wm->raw_wm[0]); 2392 2393 memset(&pipe_wm->wm, 0, sizeof(pipe_wm->wm)); 2394 pipe_wm->wm[0] = pipe_wm->raw_wm[0]; 2395 2396 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 2397 pipe_wm->linetime = hsw_compute_linetime_wm(cstate); 2398 2399 if (!ilk_validate_pipe_wm(dev, pipe_wm)) 2400 return -EINVAL; 2401 2402 ilk_compute_wm_reg_maximums(dev, 1, &max); 2403 2404 for (level = 1; level <= max_level; level++) { 2405 struct intel_wm_level *wm = &pipe_wm->raw_wm[level]; 2406 2407 ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate, 2408 pristate, sprstate, curstate, wm); 2409 2410 /* 2411 * Disable any watermark level that exceeds the 2412 * register maximums since such watermarks are 2413 * always invalid. 2414 */ 2415 if (level > usable_level) 2416 continue; 2417 2418 if (ilk_validate_wm_level(level, &max, wm)) 2419 pipe_wm->wm[level] = *wm; 2420 else 2421 usable_level = level; 2422 } 2423 2424 return 0; 2425 } 2426 2427 /* 2428 * Build a set of 'intermediate' watermark values that satisfy both the old 2429 * state and the new state. These can be programmed to the hardware 2430 * immediately. 2431 */ 2432 static int ilk_compute_intermediate_wm(struct drm_device *dev, 2433 struct intel_crtc *intel_crtc, 2434 struct intel_crtc_state *newstate) 2435 { 2436 struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate; 2437 struct intel_pipe_wm *b = &intel_crtc->wm.active.ilk; 2438 int level, max_level = ilk_wm_max_level(to_i915(dev)); 2439 2440 /* 2441 * Start with the final, target watermarks, then combine with the 2442 * currently active watermarks to get values that are safe both before 2443 * and after the vblank. 2444 */ 2445 *a = newstate->wm.ilk.optimal; 2446 a->pipe_enabled |= b->pipe_enabled; 2447 a->sprites_enabled |= b->sprites_enabled; 2448 a->sprites_scaled |= b->sprites_scaled; 2449 2450 for (level = 0; level <= max_level; level++) { 2451 struct intel_wm_level *a_wm = &a->wm[level]; 2452 const struct intel_wm_level *b_wm = &b->wm[level]; 2453 2454 a_wm->enable &= b_wm->enable; 2455 a_wm->pri_val = max(a_wm->pri_val, b_wm->pri_val); 2456 a_wm->spr_val = max(a_wm->spr_val, b_wm->spr_val); 2457 a_wm->cur_val = max(a_wm->cur_val, b_wm->cur_val); 2458 a_wm->fbc_val = max(a_wm->fbc_val, b_wm->fbc_val); 2459 } 2460 2461 /* 2462 * We need to make sure that these merged watermark values are 2463 * actually a valid configuration themselves. If they're not, 2464 * there's no safe way to transition from the old state to 2465 * the new state, so we need to fail the atomic transaction. 2466 */ 2467 if (!ilk_validate_pipe_wm(dev, a)) 2468 return -EINVAL; 2469 2470 /* 2471 * If our intermediate WM are identical to the final WM, then we can 2472 * omit the post-vblank programming; only update if it's different. 2473 */ 2474 if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) == 0) 2475 newstate->wm.need_postvbl_update = false; 2476 2477 return 0; 2478 } 2479 2480 /* 2481 * Merge the watermarks from all active pipes for a specific level. 2482 */ 2483 static void ilk_merge_wm_level(struct drm_device *dev, 2484 int level, 2485 struct intel_wm_level *ret_wm) 2486 { 2487 const struct intel_crtc *intel_crtc; 2488 2489 ret_wm->enable = true; 2490 2491 for_each_intel_crtc(dev, intel_crtc) { 2492 const struct intel_pipe_wm *active = &intel_crtc->wm.active.ilk; 2493 const struct intel_wm_level *wm = &active->wm[level]; 2494 2495 if (!active->pipe_enabled) 2496 continue; 2497 2498 /* 2499 * The watermark values may have been used in the past, 2500 * so we must maintain them in the registers for some 2501 * time even if the level is now disabled. 2502 */ 2503 if (!wm->enable) 2504 ret_wm->enable = false; 2505 2506 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val); 2507 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val); 2508 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val); 2509 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val); 2510 } 2511 } 2512 2513 /* 2514 * Merge all low power watermarks for all active pipes. 2515 */ 2516 static void ilk_wm_merge(struct drm_device *dev, 2517 const struct intel_wm_config *config, 2518 const struct ilk_wm_maximums *max, 2519 struct intel_pipe_wm *merged) 2520 { 2521 struct drm_i915_private *dev_priv = to_i915(dev); 2522 int level, max_level = ilk_wm_max_level(dev_priv); 2523 int last_enabled_level = max_level; 2524 2525 /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */ 2526 if ((INTEL_GEN(dev_priv) <= 6 || IS_IVYBRIDGE(dev_priv)) && 2527 config->num_pipes_active > 1) 2528 last_enabled_level = 0; 2529 2530 /* ILK: FBC WM must be disabled always */ 2531 merged->fbc_wm_enabled = INTEL_INFO(dev)->gen >= 6; 2532 2533 /* merge each WM1+ level */ 2534 for (level = 1; level <= max_level; level++) { 2535 struct intel_wm_level *wm = &merged->wm[level]; 2536 2537 ilk_merge_wm_level(dev, level, wm); 2538 2539 if (level > last_enabled_level) 2540 wm->enable = false; 2541 else if (!ilk_validate_wm_level(level, max, wm)) 2542 /* make sure all following levels get disabled */ 2543 last_enabled_level = level - 1; 2544 2545 /* 2546 * The spec says it is preferred to disable 2547 * FBC WMs instead of disabling a WM level. 2548 */ 2549 if (wm->fbc_val > max->fbc) { 2550 if (wm->enable) 2551 merged->fbc_wm_enabled = false; 2552 wm->fbc_val = 0; 2553 } 2554 } 2555 2556 /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */ 2557 /* 2558 * FIXME this is racy. FBC might get enabled later. 2559 * What we should check here is whether FBC can be 2560 * enabled sometime later. 2561 */ 2562 if (IS_GEN5(dev_priv) && !merged->fbc_wm_enabled && 2563 intel_fbc_is_active(dev_priv)) { 2564 for (level = 2; level <= max_level; level++) { 2565 struct intel_wm_level *wm = &merged->wm[level]; 2566 2567 wm->enable = false; 2568 } 2569 } 2570 } 2571 2572 static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm) 2573 { 2574 /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */ 2575 return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable); 2576 } 2577 2578 /* The value we need to program into the WM_LPx latency field */ 2579 static unsigned int ilk_wm_lp_latency(struct drm_device *dev, int level) 2580 { 2581 struct drm_i915_private *dev_priv = to_i915(dev); 2582 2583 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 2584 return 2 * level; 2585 else 2586 return dev_priv->wm.pri_latency[level]; 2587 } 2588 2589 static void ilk_compute_wm_results(struct drm_device *dev, 2590 const struct intel_pipe_wm *merged, 2591 enum intel_ddb_partitioning partitioning, 2592 struct ilk_wm_values *results) 2593 { 2594 struct intel_crtc *intel_crtc; 2595 int level, wm_lp; 2596 2597 results->enable_fbc_wm = merged->fbc_wm_enabled; 2598 results->partitioning = partitioning; 2599 2600 /* LP1+ register values */ 2601 for (wm_lp = 1; wm_lp <= 3; wm_lp++) { 2602 const struct intel_wm_level *r; 2603 2604 level = ilk_wm_lp_to_level(wm_lp, merged); 2605 2606 r = &merged->wm[level]; 2607 2608 /* 2609 * Maintain the watermark values even if the level is 2610 * disabled. Doing otherwise could cause underruns. 2611 */ 2612 results->wm_lp[wm_lp - 1] = 2613 (ilk_wm_lp_latency(dev, level) << WM1_LP_LATENCY_SHIFT) | 2614 (r->pri_val << WM1_LP_SR_SHIFT) | 2615 r->cur_val; 2616 2617 if (r->enable) 2618 results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN; 2619 2620 if (INTEL_INFO(dev)->gen >= 8) 2621 results->wm_lp[wm_lp - 1] |= 2622 r->fbc_val << WM1_LP_FBC_SHIFT_BDW; 2623 else 2624 results->wm_lp[wm_lp - 1] |= 2625 r->fbc_val << WM1_LP_FBC_SHIFT; 2626 2627 /* 2628 * Always set WM1S_LP_EN when spr_val != 0, even if the 2629 * level is disabled. Doing otherwise could cause underruns. 2630 */ 2631 if (INTEL_INFO(dev)->gen <= 6 && r->spr_val) { 2632 WARN_ON(wm_lp != 1); 2633 results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val; 2634 } else 2635 results->wm_lp_spr[wm_lp - 1] = r->spr_val; 2636 } 2637 2638 /* LP0 register values */ 2639 for_each_intel_crtc(dev, intel_crtc) { 2640 enum i915_pipe pipe = intel_crtc->pipe; 2641 const struct intel_wm_level *r = 2642 &intel_crtc->wm.active.ilk.wm[0]; 2643 2644 if (WARN_ON(!r->enable)) 2645 continue; 2646 2647 results->wm_linetime[pipe] = intel_crtc->wm.active.ilk.linetime; 2648 2649 results->wm_pipe[pipe] = 2650 (r->pri_val << WM0_PIPE_PLANE_SHIFT) | 2651 (r->spr_val << WM0_PIPE_SPRITE_SHIFT) | 2652 r->cur_val; 2653 } 2654 } 2655 2656 /* Find the result with the highest level enabled. Check for enable_fbc_wm in 2657 * case both are at the same level. Prefer r1 in case they're the same. */ 2658 static struct intel_pipe_wm *ilk_find_best_result(struct drm_device *dev, 2659 struct intel_pipe_wm *r1, 2660 struct intel_pipe_wm *r2) 2661 { 2662 int level, max_level = ilk_wm_max_level(to_i915(dev)); 2663 int level1 = 0, level2 = 0; 2664 2665 for (level = 1; level <= max_level; level++) { 2666 if (r1->wm[level].enable) 2667 level1 = level; 2668 if (r2->wm[level].enable) 2669 level2 = level; 2670 } 2671 2672 if (level1 == level2) { 2673 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled) 2674 return r2; 2675 else 2676 return r1; 2677 } else if (level1 > level2) { 2678 return r1; 2679 } else { 2680 return r2; 2681 } 2682 } 2683 2684 /* dirty bits used to track which watermarks need changes */ 2685 #define WM_DIRTY_PIPE(pipe) (1 << (pipe)) 2686 #define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe))) 2687 #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp))) 2688 #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3)) 2689 #define WM_DIRTY_FBC (1 << 24) 2690 #define WM_DIRTY_DDB (1 << 25) 2691 2692 static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv, 2693 const struct ilk_wm_values *old, 2694 const struct ilk_wm_values *new) 2695 { 2696 unsigned int dirty = 0; 2697 enum i915_pipe pipe; 2698 int wm_lp; 2699 2700 for_each_pipe(dev_priv, pipe) { 2701 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) { 2702 dirty |= WM_DIRTY_LINETIME(pipe); 2703 /* Must disable LP1+ watermarks too */ 2704 dirty |= WM_DIRTY_LP_ALL; 2705 } 2706 2707 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) { 2708 dirty |= WM_DIRTY_PIPE(pipe); 2709 /* Must disable LP1+ watermarks too */ 2710 dirty |= WM_DIRTY_LP_ALL; 2711 } 2712 } 2713 2714 if (old->enable_fbc_wm != new->enable_fbc_wm) { 2715 dirty |= WM_DIRTY_FBC; 2716 /* Must disable LP1+ watermarks too */ 2717 dirty |= WM_DIRTY_LP_ALL; 2718 } 2719 2720 if (old->partitioning != new->partitioning) { 2721 dirty |= WM_DIRTY_DDB; 2722 /* Must disable LP1+ watermarks too */ 2723 dirty |= WM_DIRTY_LP_ALL; 2724 } 2725 2726 /* LP1+ watermarks already deemed dirty, no need to continue */ 2727 if (dirty & WM_DIRTY_LP_ALL) 2728 return dirty; 2729 2730 /* Find the lowest numbered LP1+ watermark in need of an update... */ 2731 for (wm_lp = 1; wm_lp <= 3; wm_lp++) { 2732 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] || 2733 old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1]) 2734 break; 2735 } 2736 2737 /* ...and mark it and all higher numbered LP1+ watermarks as dirty */ 2738 for (; wm_lp <= 3; wm_lp++) 2739 dirty |= WM_DIRTY_LP(wm_lp); 2740 2741 return dirty; 2742 } 2743 2744 static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv, 2745 unsigned int dirty) 2746 { 2747 struct ilk_wm_values *previous = &dev_priv->wm.hw; 2748 bool changed = false; 2749 2750 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) { 2751 previous->wm_lp[2] &= ~WM1_LP_SR_EN; 2752 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]); 2753 changed = true; 2754 } 2755 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) { 2756 previous->wm_lp[1] &= ~WM1_LP_SR_EN; 2757 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]); 2758 changed = true; 2759 } 2760 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) { 2761 previous->wm_lp[0] &= ~WM1_LP_SR_EN; 2762 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]); 2763 changed = true; 2764 } 2765 2766 /* 2767 * Don't touch WM1S_LP_EN here. 2768 * Doing so could cause underruns. 2769 */ 2770 2771 return changed; 2772 } 2773 2774 /* 2775 * The spec says we shouldn't write when we don't need, because every write 2776 * causes WMs to be re-evaluated, expending some power. 2777 */ 2778 static void ilk_write_wm_values(struct drm_i915_private *dev_priv, 2779 struct ilk_wm_values *results) 2780 { 2781 struct drm_device *dev = &dev_priv->drm; 2782 struct ilk_wm_values *previous = &dev_priv->wm.hw; 2783 unsigned int dirty; 2784 uint32_t val; 2785 2786 dirty = ilk_compute_wm_dirty(dev_priv, previous, results); 2787 if (!dirty) 2788 return; 2789 2790 _ilk_disable_lp_wm(dev_priv, dirty); 2791 2792 if (dirty & WM_DIRTY_PIPE(PIPE_A)) 2793 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]); 2794 if (dirty & WM_DIRTY_PIPE(PIPE_B)) 2795 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]); 2796 if (dirty & WM_DIRTY_PIPE(PIPE_C)) 2797 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]); 2798 2799 if (dirty & WM_DIRTY_LINETIME(PIPE_A)) 2800 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]); 2801 if (dirty & WM_DIRTY_LINETIME(PIPE_B)) 2802 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]); 2803 if (dirty & WM_DIRTY_LINETIME(PIPE_C)) 2804 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]); 2805 2806 if (dirty & WM_DIRTY_DDB) { 2807 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { 2808 val = I915_READ(WM_MISC); 2809 if (results->partitioning == INTEL_DDB_PART_1_2) 2810 val &= ~WM_MISC_DATA_PARTITION_5_6; 2811 else 2812 val |= WM_MISC_DATA_PARTITION_5_6; 2813 I915_WRITE(WM_MISC, val); 2814 } else { 2815 val = I915_READ(DISP_ARB_CTL2); 2816 if (results->partitioning == INTEL_DDB_PART_1_2) 2817 val &= ~DISP_DATA_PARTITION_5_6; 2818 else 2819 val |= DISP_DATA_PARTITION_5_6; 2820 I915_WRITE(DISP_ARB_CTL2, val); 2821 } 2822 } 2823 2824 if (dirty & WM_DIRTY_FBC) { 2825 val = I915_READ(DISP_ARB_CTL); 2826 if (results->enable_fbc_wm) 2827 val &= ~DISP_FBC_WM_DIS; 2828 else 2829 val |= DISP_FBC_WM_DIS; 2830 I915_WRITE(DISP_ARB_CTL, val); 2831 } 2832 2833 if (dirty & WM_DIRTY_LP(1) && 2834 previous->wm_lp_spr[0] != results->wm_lp_spr[0]) 2835 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]); 2836 2837 if (INTEL_INFO(dev)->gen >= 7) { 2838 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1]) 2839 I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]); 2840 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2]) 2841 I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]); 2842 } 2843 2844 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0]) 2845 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]); 2846 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1]) 2847 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]); 2848 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2]) 2849 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]); 2850 2851 dev_priv->wm.hw = *results; 2852 } 2853 2854 bool ilk_disable_lp_wm(struct drm_device *dev) 2855 { 2856 struct drm_i915_private *dev_priv = to_i915(dev); 2857 2858 return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL); 2859 } 2860 2861 #define SKL_SAGV_BLOCK_TIME 30 /* µs */ 2862 2863 /* 2864 * Return the index of a plane in the SKL DDB and wm result arrays. Primary 2865 * plane is always in slot 0, cursor is always in slot I915_MAX_PLANES-1, and 2866 * other universal planes are in indices 1..n. Note that this may leave unused 2867 * indices between the top "sprite" plane and the cursor. 2868 */ 2869 static int 2870 skl_wm_plane_id(const struct intel_plane *plane) 2871 { 2872 switch (plane->base.type) { 2873 case DRM_PLANE_TYPE_PRIMARY: 2874 return 0; 2875 case DRM_PLANE_TYPE_CURSOR: 2876 return PLANE_CURSOR; 2877 case DRM_PLANE_TYPE_OVERLAY: 2878 return plane->plane + 1; 2879 default: 2880 MISSING_CASE(plane->base.type); 2881 return plane->plane; 2882 } 2883 } 2884 2885 /* 2886 * FIXME: We still don't have the proper code detect if we need to apply the WA, 2887 * so assume we'll always need it in order to avoid underruns. 2888 */ 2889 static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) 2890 { 2891 struct drm_i915_private *dev_priv = to_i915(state->base.dev); 2892 2893 if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv) || 2894 IS_KABYLAKE(dev_priv)) 2895 return true; 2896 2897 return false; 2898 } 2899 2900 static bool 2901 intel_has_sagv(struct drm_i915_private *dev_priv) 2902 { 2903 if (IS_KABYLAKE(dev_priv)) 2904 return true; 2905 2906 if (IS_SKYLAKE(dev_priv) && 2907 dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED) 2908 return true; 2909 2910 return false; 2911 } 2912 2913 /* 2914 * SAGV dynamically adjusts the system agent voltage and clock frequencies 2915 * depending on power and performance requirements. The display engine access 2916 * to system memory is blocked during the adjustment time. Because of the 2917 * blocking time, having this enabled can cause full system hangs and/or pipe 2918 * underruns if we don't meet all of the following requirements: 2919 * 2920 * - <= 1 pipe enabled 2921 * - All planes can enable watermarks for latencies >= SAGV engine block time 2922 * - We're not using an interlaced display configuration 2923 */ 2924 int 2925 intel_enable_sagv(struct drm_i915_private *dev_priv) 2926 { 2927 int ret; 2928 2929 if (!intel_has_sagv(dev_priv)) 2930 return 0; 2931 2932 if (dev_priv->sagv_status == I915_SAGV_ENABLED) 2933 return 0; 2934 2935 DRM_DEBUG_KMS("Enabling the SAGV\n"); 2936 mutex_lock(&dev_priv->rps.hw_lock); 2937 2938 ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL, 2939 GEN9_SAGV_ENABLE); 2940 2941 /* We don't need to wait for the SAGV when enabling */ 2942 mutex_unlock(&dev_priv->rps.hw_lock); 2943 2944 /* 2945 * Some skl systems, pre-release machines in particular, 2946 * don't actually have an SAGV. 2947 */ 2948 if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) { 2949 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); 2950 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; 2951 return 0; 2952 } else if (ret < 0) { 2953 DRM_ERROR("Failed to enable the SAGV\n"); 2954 return ret; 2955 } 2956 2957 dev_priv->sagv_status = I915_SAGV_ENABLED; 2958 return 0; 2959 } 2960 2961 static int 2962 intel_do_sagv_disable(struct drm_i915_private *dev_priv) 2963 { 2964 int ret; 2965 uint32_t temp = GEN9_SAGV_DISABLE; 2966 2967 ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_SAGV_CONTROL, 2968 &temp); 2969 if (ret) 2970 return ret; 2971 else 2972 return temp & GEN9_SAGV_IS_DISABLED; 2973 } 2974 2975 int 2976 intel_disable_sagv(struct drm_i915_private *dev_priv) 2977 { 2978 int ret, result; 2979 2980 if (!intel_has_sagv(dev_priv)) 2981 return 0; 2982 2983 if (dev_priv->sagv_status == I915_SAGV_DISABLED) 2984 return 0; 2985 2986 DRM_DEBUG_KMS("Disabling the SAGV\n"); 2987 mutex_lock(&dev_priv->rps.hw_lock); 2988 2989 /* bspec says to keep retrying for at least 1 ms */ 2990 ret = wait_for(result = intel_do_sagv_disable(dev_priv), 1); 2991 mutex_unlock(&dev_priv->rps.hw_lock); 2992 2993 if (ret == -ETIMEDOUT) { 2994 DRM_ERROR("Request to disable SAGV timed out\n"); 2995 return -ETIMEDOUT; 2996 } 2997 2998 /* 2999 * Some skl systems, pre-release machines in particular, 3000 * don't actually have an SAGV. 3001 */ 3002 if (IS_SKYLAKE(dev_priv) && result == -ENXIO) { 3003 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); 3004 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; 3005 return 0; 3006 } else if (result < 0) { 3007 DRM_ERROR("Failed to disable the SAGV\n"); 3008 return result; 3009 } 3010 3011 dev_priv->sagv_status = I915_SAGV_DISABLED; 3012 return 0; 3013 } 3014 3015 bool intel_can_enable_sagv(struct drm_atomic_state *state) 3016 { 3017 struct drm_device *dev = state->dev; 3018 struct drm_i915_private *dev_priv = to_i915(dev); 3019 struct intel_atomic_state *intel_state = to_intel_atomic_state(state); 3020 struct intel_crtc *crtc; 3021 struct intel_plane *plane; 3022 struct intel_crtc_state *cstate; 3023 struct skl_plane_wm *wm; 3024 enum i915_pipe pipe; 3025 int level, latency; 3026 3027 if (!intel_has_sagv(dev_priv)) 3028 return false; 3029 3030 /* 3031 * SKL workaround: bspec recommends we disable the SAGV when we have 3032 * more then one pipe enabled 3033 * 3034 * If there are no active CRTCs, no additional checks need be performed 3035 */ 3036 if (hweight32(intel_state->active_crtcs) == 0) 3037 return true; 3038 else if (hweight32(intel_state->active_crtcs) > 1) 3039 return false; 3040 3041 /* Since we're now guaranteed to only have one active CRTC... */ 3042 pipe = ffs(intel_state->active_crtcs) - 1; 3043 crtc = to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]); 3044 cstate = to_intel_crtc_state(crtc->base.state); 3045 3046 if (crtc->base.state->adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) 3047 return false; 3048 3049 for_each_intel_plane_on_crtc(dev, crtc, plane) { 3050 wm = &cstate->wm.skl.optimal.planes[skl_wm_plane_id(plane)]; 3051 3052 /* Skip this plane if it's not enabled */ 3053 if (!wm->wm[0].plane_en) 3054 continue; 3055 3056 /* Find the highest enabled wm level for this plane */ 3057 for (level = ilk_wm_max_level(dev_priv); 3058 !wm->wm[level].plane_en; --level) 3059 { } 3060 3061 latency = dev_priv->wm.skl_latency[level]; 3062 3063 if (skl_needs_memory_bw_wa(intel_state) && 3064 plane->base.state->fb->modifier[0] == 3065 I915_FORMAT_MOD_X_TILED) 3066 latency += 15; 3067 3068 /* 3069 * If any of the planes on this pipe don't enable wm levels 3070 * that incur memory latencies higher then 30µs we can't enable 3071 * the SAGV 3072 */ 3073 if (latency < SKL_SAGV_BLOCK_TIME) 3074 return false; 3075 } 3076 3077 return true; 3078 } 3079 3080 static void 3081 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, 3082 const struct intel_crtc_state *cstate, 3083 struct skl_ddb_entry *alloc, /* out */ 3084 int *num_active /* out */) 3085 { 3086 struct drm_atomic_state *state = cstate->base.state; 3087 struct intel_atomic_state *intel_state = to_intel_atomic_state(state); 3088 struct drm_i915_private *dev_priv = to_i915(dev); 3089 struct drm_crtc *for_crtc = cstate->base.crtc; 3090 unsigned int pipe_size, ddb_size; 3091 int nth_active_pipe; 3092 3093 if (WARN_ON(!state) || !cstate->base.active) { 3094 alloc->start = 0; 3095 alloc->end = 0; 3096 *num_active = hweight32(dev_priv->active_crtcs); 3097 return; 3098 } 3099 3100 if (intel_state->active_pipe_changes) 3101 *num_active = hweight32(intel_state->active_crtcs); 3102 else 3103 *num_active = hweight32(dev_priv->active_crtcs); 3104 3105 ddb_size = INTEL_INFO(dev_priv)->ddb_size; 3106 WARN_ON(ddb_size == 0); 3107 3108 ddb_size -= 4; /* 4 blocks for bypass path allocation */ 3109 3110 /* 3111 * If the state doesn't change the active CRTC's, then there's 3112 * no need to recalculate; the existing pipe allocation limits 3113 * should remain unchanged. Note that we're safe from racing 3114 * commits since any racing commit that changes the active CRTC 3115 * list would need to grab _all_ crtc locks, including the one 3116 * we currently hold. 3117 */ 3118 if (!intel_state->active_pipe_changes) { 3119 *alloc = to_intel_crtc(for_crtc)->hw_ddb; 3120 return; 3121 } 3122 3123 nth_active_pipe = hweight32(intel_state->active_crtcs & 3124 (drm_crtc_mask(for_crtc) - 1)); 3125 pipe_size = ddb_size / hweight32(intel_state->active_crtcs); 3126 alloc->start = nth_active_pipe * ddb_size / *num_active; 3127 alloc->end = alloc->start + pipe_size; 3128 } 3129 3130 static unsigned int skl_cursor_allocation(int num_active) 3131 { 3132 if (num_active == 1) 3133 return 32; 3134 3135 return 8; 3136 } 3137 3138 static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg) 3139 { 3140 entry->start = reg & 0x3ff; 3141 entry->end = (reg >> 16) & 0x3ff; 3142 if (entry->end) 3143 entry->end += 1; 3144 } 3145 3146 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, 3147 struct skl_ddb_allocation *ddb /* out */) 3148 { 3149 enum i915_pipe pipe; 3150 int plane; 3151 u32 val; 3152 3153 memset(ddb, 0, sizeof(*ddb)); 3154 3155 for_each_pipe(dev_priv, pipe) { 3156 enum intel_display_power_domain power_domain; 3157 3158 power_domain = POWER_DOMAIN_PIPE(pipe); 3159 if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) 3160 continue; 3161 3162 for_each_plane(dev_priv, pipe, plane) { 3163 val = I915_READ(PLANE_BUF_CFG(pipe, plane)); 3164 skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane], 3165 val); 3166 } 3167 3168 val = I915_READ(CUR_BUF_CFG(pipe)); 3169 skl_ddb_entry_init_from_hw(&ddb->plane[pipe][PLANE_CURSOR], 3170 val); 3171 3172 intel_display_power_put(dev_priv, power_domain); 3173 } 3174 } 3175 3176 /* 3177 * Determines the downscale amount of a plane for the purposes of watermark calculations. 3178 * The bspec defines downscale amount as: 3179 * 3180 * """ 3181 * Horizontal down scale amount = maximum[1, Horizontal source size / 3182 * Horizontal destination size] 3183 * Vertical down scale amount = maximum[1, Vertical source size / 3184 * Vertical destination size] 3185 * Total down scale amount = Horizontal down scale amount * 3186 * Vertical down scale amount 3187 * """ 3188 * 3189 * Return value is provided in 16.16 fixed point form to retain fractional part. 3190 * Caller should take care of dividing & rounding off the value. 3191 */ 3192 static uint32_t 3193 skl_plane_downscale_amount(const struct intel_plane_state *pstate) 3194 { 3195 uint32_t downscale_h, downscale_w; 3196 uint32_t src_w, src_h, dst_w, dst_h; 3197 3198 if (WARN_ON(!pstate->base.visible)) 3199 return DRM_PLANE_HELPER_NO_SCALING; 3200 3201 /* n.b., src is 16.16 fixed point, dst is whole integer */ 3202 src_w = drm_rect_width(&pstate->base.src); 3203 src_h = drm_rect_height(&pstate->base.src); 3204 dst_w = drm_rect_width(&pstate->base.dst); 3205 dst_h = drm_rect_height(&pstate->base.dst); 3206 if (intel_rotation_90_or_270(pstate->base.rotation)) 3207 swap(dst_w, dst_h); 3208 3209 downscale_h = max(src_h / dst_h, (uint32_t)DRM_PLANE_HELPER_NO_SCALING); 3210 downscale_w = max(src_w / dst_w, (uint32_t)DRM_PLANE_HELPER_NO_SCALING); 3211 3212 /* Provide result in 16.16 fixed point */ 3213 return (uint64_t)downscale_w * downscale_h >> 16; 3214 } 3215 3216 static unsigned int 3217 skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, 3218 struct drm_plane_state *pstate, 3219 int y) 3220 { 3221 struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate); 3222 struct drm_framebuffer *fb = pstate->fb; 3223 uint32_t down_scale_amount, data_rate; 3224 uint32_t width = 0, height = 0; 3225 unsigned format = fb ? fb->pixel_format : DRM_FORMAT_XRGB8888; 3226 3227 if (!intel_pstate->base.visible) 3228 return 0; 3229 if (pstate->plane->type == DRM_PLANE_TYPE_CURSOR) 3230 return 0; 3231 if (y && format != DRM_FORMAT_NV12) 3232 return 0; 3233 3234 width = drm_rect_width(&intel_pstate->base.src) >> 16; 3235 height = drm_rect_height(&intel_pstate->base.src) >> 16; 3236 3237 if (intel_rotation_90_or_270(pstate->rotation)) 3238 swap(width, height); 3239 3240 /* for planar format */ 3241 if (format == DRM_FORMAT_NV12) { 3242 if (y) /* y-plane data rate */ 3243 data_rate = width * height * 3244 drm_format_plane_cpp(format, 0); 3245 else /* uv-plane data rate */ 3246 data_rate = (width / 2) * (height / 2) * 3247 drm_format_plane_cpp(format, 1); 3248 } else { 3249 /* for packed formats */ 3250 data_rate = width * height * drm_format_plane_cpp(format, 0); 3251 } 3252 3253 down_scale_amount = skl_plane_downscale_amount(intel_pstate); 3254 3255 return (uint64_t)data_rate * down_scale_amount >> 16; 3256 } 3257 3258 /* 3259 * We don't overflow 32 bits. Worst case is 3 planes enabled, each fetching 3260 * a 8192x4096@32bpp framebuffer: 3261 * 3 * 4096 * 8192 * 4 < 2^32 3262 */ 3263 static unsigned int 3264 skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate) 3265 { 3266 struct drm_crtc_state *cstate = &intel_cstate->base; 3267 struct drm_atomic_state *state = cstate->state; 3268 struct drm_crtc *crtc = cstate->crtc; 3269 struct drm_device *dev = crtc->dev; 3270 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3271 struct drm_plane *plane; 3272 const struct intel_plane *intel_plane; 3273 struct drm_plane_state *pstate; 3274 unsigned int rate, total_data_rate = 0; 3275 int id; 3276 int i; 3277 3278 if (WARN_ON(!state)) 3279 return 0; 3280 3281 /* Calculate and cache data rate for each plane */ 3282 for_each_plane_in_state(state, plane, pstate, i) { 3283 id = skl_wm_plane_id(to_intel_plane(plane)); 3284 intel_plane = to_intel_plane(plane); 3285 3286 if (intel_plane->pipe != intel_crtc->pipe) 3287 continue; 3288 3289 /* packed/uv */ 3290 rate = skl_plane_relative_data_rate(intel_cstate, 3291 pstate, 0); 3292 intel_cstate->wm.skl.plane_data_rate[id] = rate; 3293 3294 /* y-plane */ 3295 rate = skl_plane_relative_data_rate(intel_cstate, 3296 pstate, 1); 3297 intel_cstate->wm.skl.plane_y_data_rate[id] = rate; 3298 } 3299 3300 /* Calculate CRTC's total data rate from cached values */ 3301 for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { 3302 int id = skl_wm_plane_id(intel_plane); 3303 3304 /* packed/uv */ 3305 total_data_rate += intel_cstate->wm.skl.plane_data_rate[id]; 3306 total_data_rate += intel_cstate->wm.skl.plane_y_data_rate[id]; 3307 } 3308 3309 return total_data_rate; 3310 } 3311 3312 static uint16_t 3313 skl_ddb_min_alloc(struct drm_plane_state *pstate, 3314 const int y) 3315 { 3316 struct drm_framebuffer *fb = pstate->fb; 3317 struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate); 3318 uint32_t src_w, src_h; 3319 uint32_t min_scanlines = 8; 3320 uint8_t plane_bpp; 3321 3322 if (WARN_ON(!fb)) 3323 return 0; 3324 3325 /* For packed formats, no y-plane, return 0 */ 3326 if (y && fb->pixel_format != DRM_FORMAT_NV12) 3327 return 0; 3328 3329 /* For Non Y-tile return 8-blocks */ 3330 if (fb->modifier[0] != I915_FORMAT_MOD_Y_TILED && 3331 fb->modifier[0] != I915_FORMAT_MOD_Yf_TILED) 3332 return 8; 3333 3334 src_w = drm_rect_width(&intel_pstate->base.src) >> 16; 3335 src_h = drm_rect_height(&intel_pstate->base.src) >> 16; 3336 3337 if (intel_rotation_90_or_270(pstate->rotation)) 3338 swap(src_w, src_h); 3339 3340 /* Halve UV plane width and height for NV12 */ 3341 if (fb->pixel_format == DRM_FORMAT_NV12 && !y) { 3342 src_w /= 2; 3343 src_h /= 2; 3344 } 3345 3346 if (fb->pixel_format == DRM_FORMAT_NV12 && !y) 3347 plane_bpp = drm_format_plane_cpp(fb->pixel_format, 1); 3348 else 3349 plane_bpp = drm_format_plane_cpp(fb->pixel_format, 0); 3350 3351 if (intel_rotation_90_or_270(pstate->rotation)) { 3352 switch (plane_bpp) { 3353 case 1: 3354 min_scanlines = 32; 3355 break; 3356 case 2: 3357 min_scanlines = 16; 3358 break; 3359 case 4: 3360 min_scanlines = 8; 3361 break; 3362 case 8: 3363 min_scanlines = 4; 3364 break; 3365 default: 3366 WARN(1, "Unsupported pixel depth %u for rotation", 3367 plane_bpp); 3368 min_scanlines = 32; 3369 } 3370 } 3371 3372 return DIV_ROUND_UP((4 * src_w * plane_bpp), 512) * min_scanlines/4 + 3; 3373 } 3374 3375 static int 3376 skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, 3377 struct skl_ddb_allocation *ddb /* out */) 3378 { 3379 struct drm_atomic_state *state = cstate->base.state; 3380 struct drm_crtc *crtc = cstate->base.crtc; 3381 struct drm_device *dev = crtc->dev; 3382 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3383 struct intel_plane *intel_plane; 3384 struct drm_plane *plane; 3385 struct drm_plane_state *pstate; 3386 enum i915_pipe pipe = intel_crtc->pipe; 3387 struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb; 3388 uint16_t alloc_size, start, cursor_blocks; 3389 uint16_t *minimum = cstate->wm.skl.minimum_blocks; 3390 uint16_t *y_minimum = cstate->wm.skl.minimum_y_blocks; 3391 unsigned int total_data_rate; 3392 int num_active; 3393 int id, i; 3394 3395 /* Clear the partitioning for disabled planes. */ 3396 memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe])); 3397 memset(ddb->y_plane[pipe], 0, sizeof(ddb->y_plane[pipe])); 3398 3399 if (WARN_ON(!state)) 3400 return 0; 3401 3402 if (!cstate->base.active) { 3403 alloc->start = alloc->end = 0; 3404 return 0; 3405 } 3406 3407 skl_ddb_get_pipe_allocation_limits(dev, cstate, alloc, &num_active); 3408 alloc_size = skl_ddb_entry_size(alloc); 3409 if (alloc_size == 0) { 3410 memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe])); 3411 return 0; 3412 } 3413 3414 cursor_blocks = skl_cursor_allocation(num_active); 3415 ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - cursor_blocks; 3416 ddb->plane[pipe][PLANE_CURSOR].end = alloc->end; 3417 3418 alloc_size -= cursor_blocks; 3419 3420 /* 1. Allocate the mininum required blocks for each active plane */ 3421 for_each_plane_in_state(state, plane, pstate, i) { 3422 intel_plane = to_intel_plane(plane); 3423 id = skl_wm_plane_id(intel_plane); 3424 3425 if (intel_plane->pipe != pipe) 3426 continue; 3427 3428 if (!to_intel_plane_state(pstate)->base.visible) { 3429 minimum[id] = 0; 3430 y_minimum[id] = 0; 3431 continue; 3432 } 3433 if (plane->type == DRM_PLANE_TYPE_CURSOR) { 3434 minimum[id] = 0; 3435 y_minimum[id] = 0; 3436 continue; 3437 } 3438 3439 minimum[id] = skl_ddb_min_alloc(pstate, 0); 3440 y_minimum[id] = skl_ddb_min_alloc(pstate, 1); 3441 } 3442 3443 for (i = 0; i < PLANE_CURSOR; i++) { 3444 alloc_size -= minimum[i]; 3445 alloc_size -= y_minimum[i]; 3446 } 3447 3448 /* 3449 * 2. Distribute the remaining space in proportion to the amount of 3450 * data each plane needs to fetch from memory. 3451 * 3452 * FIXME: we may not allocate every single block here. 3453 */ 3454 total_data_rate = skl_get_total_relative_data_rate(cstate); 3455 if (total_data_rate == 0) 3456 return 0; 3457 3458 start = alloc->start; 3459 for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { 3460 unsigned int data_rate, y_data_rate; 3461 uint16_t plane_blocks, y_plane_blocks = 0; 3462 int id = skl_wm_plane_id(intel_plane); 3463 3464 data_rate = cstate->wm.skl.plane_data_rate[id]; 3465 3466 /* 3467 * allocation for (packed formats) or (uv-plane part of planar format): 3468 * promote the expression to 64 bits to avoid overflowing, the 3469 * result is < available as data_rate / total_data_rate < 1 3470 */ 3471 plane_blocks = minimum[id]; 3472 plane_blocks += div_u64((uint64_t)alloc_size * data_rate, 3473 total_data_rate); 3474 3475 /* Leave disabled planes at (0,0) */ 3476 if (data_rate) { 3477 ddb->plane[pipe][id].start = start; 3478 ddb->plane[pipe][id].end = start + plane_blocks; 3479 } 3480 3481 start += plane_blocks; 3482 3483 /* 3484 * allocation for y_plane part of planar format: 3485 */ 3486 y_data_rate = cstate->wm.skl.plane_y_data_rate[id]; 3487 3488 y_plane_blocks = y_minimum[id]; 3489 y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate, 3490 total_data_rate); 3491 3492 if (y_data_rate) { 3493 ddb->y_plane[pipe][id].start = start; 3494 ddb->y_plane[pipe][id].end = start + y_plane_blocks; 3495 } 3496 3497 start += y_plane_blocks; 3498 } 3499 3500 return 0; 3501 } 3502 3503 /* 3504 * The max latency should be 257 (max the punit can code is 255 and we add 2us 3505 * for the read latency) and cpp should always be <= 8, so that 3506 * should allow pixel_rate up to ~2 GHz which seems sufficient since max 3507 * 2xcdclk is 1350 MHz and the pixel rate should never exceed that. 3508 */ 3509 static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp, uint32_t latency) 3510 { 3511 uint32_t wm_intermediate_val, ret; 3512 3513 if (latency == 0) 3514 return UINT_MAX; 3515 3516 wm_intermediate_val = latency * pixel_rate * cpp / 512; 3517 ret = DIV_ROUND_UP(wm_intermediate_val, 1000); 3518 3519 return ret; 3520 } 3521 3522 static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal, 3523 uint32_t latency, uint32_t plane_blocks_per_line) 3524 { 3525 uint32_t ret; 3526 uint32_t wm_intermediate_val; 3527 3528 if (latency == 0) 3529 return UINT_MAX; 3530 3531 wm_intermediate_val = latency * pixel_rate; 3532 ret = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000) * 3533 plane_blocks_per_line; 3534 3535 return ret; 3536 } 3537 3538 static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate, 3539 struct intel_plane_state *pstate) 3540 { 3541 uint64_t adjusted_pixel_rate; 3542 uint64_t downscale_amount; 3543 uint64_t pixel_rate; 3544 3545 /* Shouldn't reach here on disabled planes... */ 3546 if (WARN_ON(!pstate->base.visible)) 3547 return 0; 3548 3549 /* 3550 * Adjusted plane pixel rate is just the pipe's adjusted pixel rate 3551 * with additional adjustments for plane-specific scaling. 3552 */ 3553 adjusted_pixel_rate = ilk_pipe_pixel_rate(cstate); 3554 downscale_amount = skl_plane_downscale_amount(pstate); 3555 3556 pixel_rate = adjusted_pixel_rate * downscale_amount >> 16; 3557 WARN_ON(pixel_rate != clamp_t(uint32_t, pixel_rate, 0, ~0)); 3558 3559 return pixel_rate; 3560 } 3561 3562 static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, 3563 struct intel_crtc_state *cstate, 3564 struct intel_plane_state *intel_pstate, 3565 uint16_t ddb_allocation, 3566 int level, 3567 uint16_t *out_blocks, /* out */ 3568 uint8_t *out_lines, /* out */ 3569 bool *enabled /* out */) 3570 { 3571 struct drm_plane_state *pstate = &intel_pstate->base; 3572 struct drm_framebuffer *fb = pstate->fb; 3573 uint32_t latency = dev_priv->wm.skl_latency[level]; 3574 uint32_t method1, method2; 3575 uint32_t plane_bytes_per_line, plane_blocks_per_line; 3576 uint32_t res_blocks, res_lines; 3577 uint32_t selected_result; 3578 uint8_t cpp; 3579 uint32_t width = 0, height = 0; 3580 uint32_t plane_pixel_rate; 3581 uint32_t y_tile_minimum, y_min_scanlines; 3582 struct intel_atomic_state *state = 3583 to_intel_atomic_state(cstate->base.state); 3584 bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); 3585 3586 if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible) { 3587 *enabled = false; 3588 return 0; 3589 } 3590 3591 if (apply_memory_bw_wa && fb->modifier[0] == I915_FORMAT_MOD_X_TILED) 3592 latency += 15; 3593 3594 width = drm_rect_width(&intel_pstate->base.src) >> 16; 3595 height = drm_rect_height(&intel_pstate->base.src) >> 16; 3596 3597 if (intel_rotation_90_or_270(pstate->rotation)) 3598 swap(width, height); 3599 3600 cpp = drm_format_plane_cpp(fb->pixel_format, 0); 3601 plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate); 3602 3603 if (intel_rotation_90_or_270(pstate->rotation)) { 3604 int cpp = (fb->pixel_format == DRM_FORMAT_NV12) ? 3605 drm_format_plane_cpp(fb->pixel_format, 1) : 3606 drm_format_plane_cpp(fb->pixel_format, 0); 3607 3608 switch (cpp) { 3609 case 1: 3610 y_min_scanlines = 16; 3611 break; 3612 case 2: 3613 y_min_scanlines = 8; 3614 break; 3615 case 4: 3616 y_min_scanlines = 4; 3617 break; 3618 default: 3619 MISSING_CASE(cpp); 3620 return -EINVAL; 3621 } 3622 } else { 3623 y_min_scanlines = 4; 3624 } 3625 3626 plane_bytes_per_line = width * cpp; 3627 if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || 3628 fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { 3629 plane_blocks_per_line = 3630 DIV_ROUND_UP(plane_bytes_per_line * y_min_scanlines, 512); 3631 plane_blocks_per_line /= y_min_scanlines; 3632 } else if (fb->modifier[0] == DRM_FORMAT_MOD_NONE) { 3633 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512) 3634 + 1; 3635 } else { 3636 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); 3637 } 3638 3639 method1 = skl_wm_method1(plane_pixel_rate, cpp, latency); 3640 method2 = skl_wm_method2(plane_pixel_rate, 3641 cstate->base.adjusted_mode.crtc_htotal, 3642 latency, 3643 plane_blocks_per_line); 3644 3645 y_tile_minimum = plane_blocks_per_line * y_min_scanlines; 3646 if (apply_memory_bw_wa) 3647 y_tile_minimum *= 2; 3648 3649 if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || 3650 fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { 3651 selected_result = max(method2, y_tile_minimum); 3652 } else { 3653 if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) && 3654 (plane_bytes_per_line / 512 < 1)) 3655 selected_result = method2; 3656 else if ((ddb_allocation / plane_blocks_per_line) >= 1) 3657 selected_result = min(method1, method2); 3658 else 3659 selected_result = method1; 3660 } 3661 3662 res_blocks = selected_result + 1; 3663 res_lines = DIV_ROUND_UP(selected_result, plane_blocks_per_line); 3664 3665 if (level >= 1 && level <= 7) { 3666 if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || 3667 fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { 3668 res_blocks += y_tile_minimum; 3669 res_lines += y_min_scanlines; 3670 } else { 3671 res_blocks++; 3672 } 3673 } 3674 3675 if (res_blocks >= ddb_allocation || res_lines > 31) { 3676 *enabled = false; 3677 3678 /* 3679 * If there are no valid level 0 watermarks, then we can't 3680 * support this display configuration. 3681 */ 3682 if (level) { 3683 return 0; 3684 } else { 3685 DRM_DEBUG_KMS("Requested display configuration exceeds system watermark limitations\n"); 3686 DRM_DEBUG_KMS("Plane %d.%d: blocks required = %u/%u, lines required = %u/31\n", 3687 to_intel_crtc(cstate->base.crtc)->pipe, 3688 skl_wm_plane_id(to_intel_plane(pstate->plane)), 3689 res_blocks, ddb_allocation, res_lines); 3690 3691 return -EINVAL; 3692 } 3693 } 3694 3695 *out_blocks = res_blocks; 3696 *out_lines = res_lines; 3697 *enabled = true; 3698 3699 return 0; 3700 } 3701 3702 static int 3703 skl_compute_wm_level(const struct drm_i915_private *dev_priv, 3704 struct skl_ddb_allocation *ddb, 3705 struct intel_crtc_state *cstate, 3706 struct intel_plane *intel_plane, 3707 int level, 3708 struct skl_wm_level *result) 3709 { 3710 struct drm_atomic_state *state = cstate->base.state; 3711 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 3712 struct drm_plane *plane = &intel_plane->base; 3713 struct intel_plane_state *intel_pstate = NULL; 3714 uint16_t ddb_blocks; 3715 enum i915_pipe pipe = intel_crtc->pipe; 3716 int ret; 3717 int i = skl_wm_plane_id(intel_plane); 3718 3719 if (state) 3720 intel_pstate = 3721 intel_atomic_get_existing_plane_state(state, 3722 intel_plane); 3723 3724 /* 3725 * Note: If we start supporting multiple pending atomic commits against 3726 * the same planes/CRTC's in the future, plane->state will no longer be 3727 * the correct pre-state to use for the calculations here and we'll 3728 * need to change where we get the 'unchanged' plane data from. 3729 * 3730 * For now this is fine because we only allow one queued commit against 3731 * a CRTC. Even if the plane isn't modified by this transaction and we 3732 * don't have a plane lock, we still have the CRTC's lock, so we know 3733 * that no other transactions are racing with us to update it. 3734 */ 3735 if (!intel_pstate) 3736 intel_pstate = to_intel_plane_state(plane->state); 3737 3738 WARN_ON(!intel_pstate->base.fb); 3739 3740 ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][i]); 3741 3742 ret = skl_compute_plane_wm(dev_priv, 3743 cstate, 3744 intel_pstate, 3745 ddb_blocks, 3746 level, 3747 &result->plane_res_b, 3748 &result->plane_res_l, 3749 &result->plane_en); 3750 if (ret) 3751 return ret; 3752 3753 return 0; 3754 } 3755 3756 static uint32_t 3757 skl_compute_linetime_wm(struct intel_crtc_state *cstate) 3758 { 3759 uint32_t pixel_rate; 3760 3761 if (!cstate->base.active) 3762 return 0; 3763 3764 pixel_rate = ilk_pipe_pixel_rate(cstate); 3765 3766 if (WARN_ON(pixel_rate == 0)) 3767 return 0; 3768 3769 return DIV_ROUND_UP(8 * cstate->base.adjusted_mode.crtc_htotal * 1000, 3770 pixel_rate); 3771 } 3772 3773 static void skl_compute_transition_wm(struct intel_crtc_state *cstate, 3774 struct skl_wm_level *trans_wm /* out */) 3775 { 3776 if (!cstate->base.active) 3777 return; 3778 3779 /* Until we know more, just disable transition WMs */ 3780 trans_wm->plane_en = false; 3781 } 3782 3783 static int skl_build_pipe_wm(struct intel_crtc_state *cstate, 3784 struct skl_ddb_allocation *ddb, 3785 struct skl_pipe_wm *pipe_wm) 3786 { 3787 struct drm_device *dev = cstate->base.crtc->dev; 3788 const struct drm_i915_private *dev_priv = to_i915(dev); 3789 struct intel_plane *intel_plane; 3790 struct skl_plane_wm *wm; 3791 int level, max_level = ilk_wm_max_level(dev_priv); 3792 int ret; 3793 3794 /* 3795 * We'll only calculate watermarks for planes that are actually 3796 * enabled, so make sure all other planes are set as disabled. 3797 */ 3798 memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes)); 3799 3800 for_each_intel_plane_mask(&dev_priv->drm, 3801 intel_plane, 3802 cstate->base.plane_mask) { 3803 wm = &pipe_wm->planes[skl_wm_plane_id(intel_plane)]; 3804 3805 for (level = 0; level <= max_level; level++) { 3806 ret = skl_compute_wm_level(dev_priv, ddb, cstate, 3807 intel_plane, level, 3808 &wm->wm[level]); 3809 if (ret) 3810 return ret; 3811 } 3812 skl_compute_transition_wm(cstate, &wm->trans_wm); 3813 } 3814 pipe_wm->linetime = skl_compute_linetime_wm(cstate); 3815 3816 return 0; 3817 } 3818 3819 static void skl_ddb_entry_write(struct drm_i915_private *dev_priv, 3820 i915_reg_t reg, 3821 const struct skl_ddb_entry *entry) 3822 { 3823 if (entry->end) 3824 I915_WRITE(reg, (entry->end - 1) << 16 | entry->start); 3825 else 3826 I915_WRITE(reg, 0); 3827 } 3828 3829 static void skl_write_wm_level(struct drm_i915_private *dev_priv, 3830 i915_reg_t reg, 3831 const struct skl_wm_level *level) 3832 { 3833 uint32_t val = 0; 3834 3835 if (level->plane_en) { 3836 val |= PLANE_WM_EN; 3837 val |= level->plane_res_b; 3838 val |= level->plane_res_l << PLANE_WM_LINES_SHIFT; 3839 } 3840 3841 I915_WRITE(reg, val); 3842 } 3843 3844 void skl_write_plane_wm(struct intel_crtc *intel_crtc, 3845 const struct skl_plane_wm *wm, 3846 const struct skl_ddb_allocation *ddb, 3847 int plane) 3848 { 3849 struct drm_crtc *crtc = &intel_crtc->base; 3850 struct drm_device *dev = crtc->dev; 3851 struct drm_i915_private *dev_priv = to_i915(dev); 3852 int level, max_level = ilk_wm_max_level(dev_priv); 3853 enum i915_pipe pipe = intel_crtc->pipe; 3854 3855 for (level = 0; level <= max_level; level++) { 3856 skl_write_wm_level(dev_priv, PLANE_WM(pipe, plane, level), 3857 &wm->wm[level]); 3858 } 3859 skl_write_wm_level(dev_priv, PLANE_WM_TRANS(pipe, plane), 3860 &wm->trans_wm); 3861 3862 skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane), 3863 &ddb->plane[pipe][plane]); 3864 skl_ddb_entry_write(dev_priv, PLANE_NV12_BUF_CFG(pipe, plane), 3865 &ddb->y_plane[pipe][plane]); 3866 } 3867 3868 void skl_write_cursor_wm(struct intel_crtc *intel_crtc, 3869 const struct skl_plane_wm *wm, 3870 const struct skl_ddb_allocation *ddb) 3871 { 3872 struct drm_crtc *crtc = &intel_crtc->base; 3873 struct drm_device *dev = crtc->dev; 3874 struct drm_i915_private *dev_priv = to_i915(dev); 3875 int level, max_level = ilk_wm_max_level(dev_priv); 3876 enum i915_pipe pipe = intel_crtc->pipe; 3877 3878 for (level = 0; level <= max_level; level++) { 3879 skl_write_wm_level(dev_priv, CUR_WM(pipe, level), 3880 &wm->wm[level]); 3881 } 3882 skl_write_wm_level(dev_priv, CUR_WM_TRANS(pipe), &wm->trans_wm); 3883 3884 skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe), 3885 &ddb->plane[pipe][PLANE_CURSOR]); 3886 } 3887 3888 bool skl_wm_level_equals(const struct skl_wm_level *l1, 3889 const struct skl_wm_level *l2) 3890 { 3891 if (l1->plane_en != l2->plane_en) 3892 return false; 3893 3894 /* If both planes aren't enabled, the rest shouldn't matter */ 3895 if (!l1->plane_en) 3896 return true; 3897 3898 return (l1->plane_res_l == l2->plane_res_l && 3899 l1->plane_res_b == l2->plane_res_b); 3900 } 3901 3902 static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a, 3903 const struct skl_ddb_entry *b) 3904 { 3905 return a->start < b->end && b->start < a->end; 3906 } 3907 3908 bool skl_ddb_allocation_overlaps(struct drm_atomic_state *state, 3909 struct intel_crtc *intel_crtc) 3910 { 3911 struct drm_crtc *other_crtc; 3912 struct drm_crtc_state *other_cstate; 3913 struct intel_crtc *other_intel_crtc; 3914 const struct skl_ddb_entry *ddb = 3915 &to_intel_crtc_state(intel_crtc->base.state)->wm.skl.ddb; 3916 int i; 3917 3918 for_each_crtc_in_state(state, other_crtc, other_cstate, i) { 3919 other_intel_crtc = to_intel_crtc(other_crtc); 3920 3921 if (other_intel_crtc == intel_crtc) 3922 continue; 3923 3924 if (skl_ddb_entries_overlap(ddb, &other_intel_crtc->hw_ddb)) 3925 return true; 3926 } 3927 3928 return false; 3929 } 3930 3931 static int skl_update_pipe_wm(struct drm_crtc_state *cstate, 3932 struct skl_ddb_allocation *ddb, /* out */ 3933 struct skl_pipe_wm *pipe_wm, /* out */ 3934 bool *changed /* out */) 3935 { 3936 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->crtc); 3937 struct intel_crtc_state *intel_cstate = to_intel_crtc_state(cstate); 3938 int ret; 3939 3940 ret = skl_build_pipe_wm(intel_cstate, ddb, pipe_wm); 3941 if (ret) 3942 return ret; 3943 3944 if (!memcmp(&intel_crtc->wm.active.skl, pipe_wm, sizeof(*pipe_wm))) 3945 *changed = false; 3946 else 3947 *changed = true; 3948 3949 return 0; 3950 } 3951 3952 static uint32_t 3953 pipes_modified(struct drm_atomic_state *state) 3954 { 3955 struct drm_crtc *crtc; 3956 struct drm_crtc_state *cstate; 3957 uint32_t i, ret = 0; 3958 3959 for_each_crtc_in_state(state, crtc, cstate, i) 3960 ret |= drm_crtc_mask(crtc); 3961 3962 return ret; 3963 } 3964 3965 static int 3966 skl_ddb_add_affected_planes(struct intel_crtc_state *cstate) 3967 { 3968 struct drm_atomic_state *state = cstate->base.state; 3969 struct drm_device *dev = state->dev; 3970 struct drm_crtc *crtc = cstate->base.crtc; 3971 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3972 struct drm_i915_private *dev_priv = to_i915(dev); 3973 struct intel_atomic_state *intel_state = to_intel_atomic_state(state); 3974 struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb; 3975 struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb; 3976 struct drm_plane_state *plane_state; 3977 struct drm_plane *plane; 3978 enum i915_pipe pipe = intel_crtc->pipe; 3979 int id; 3980 3981 WARN_ON(!drm_atomic_get_existing_crtc_state(state, crtc)); 3982 3983 drm_for_each_plane_mask(plane, dev, crtc->state->plane_mask) { 3984 id = skl_wm_plane_id(to_intel_plane(plane)); 3985 3986 if (skl_ddb_entry_equal(&cur_ddb->plane[pipe][id], 3987 &new_ddb->plane[pipe][id]) && 3988 skl_ddb_entry_equal(&cur_ddb->y_plane[pipe][id], 3989 &new_ddb->y_plane[pipe][id])) 3990 continue; 3991 3992 plane_state = drm_atomic_get_plane_state(state, plane); 3993 if (IS_ERR(plane_state)) 3994 return PTR_ERR(plane_state); 3995 } 3996 3997 return 0; 3998 } 3999 4000 static int 4001 skl_compute_ddb(struct drm_atomic_state *state) 4002 { 4003 struct drm_device *dev = state->dev; 4004 struct drm_i915_private *dev_priv = to_i915(dev); 4005 struct intel_atomic_state *intel_state = to_intel_atomic_state(state); 4006 struct intel_crtc *intel_crtc; 4007 struct skl_ddb_allocation *ddb = &intel_state->wm_results.ddb; 4008 uint32_t realloc_pipes = pipes_modified(state); 4009 int ret; 4010 4011 /* 4012 * If this is our first atomic update following hardware readout, 4013 * we can't trust the DDB that the BIOS programmed for us. Let's 4014 * pretend that all pipes switched active status so that we'll 4015 * ensure a full DDB recompute. 4016 */ 4017 if (dev_priv->wm.distrust_bios_wm) { 4018 ret = drm_modeset_lock(&dev->mode_config.connection_mutex, 4019 state->acquire_ctx); 4020 if (ret) 4021 return ret; 4022 4023 intel_state->active_pipe_changes = ~0; 4024 4025 /* 4026 * We usually only initialize intel_state->active_crtcs if we 4027 * we're doing a modeset; make sure this field is always 4028 * initialized during the sanitization process that happens 4029 * on the first commit too. 4030 */ 4031 if (!intel_state->modeset) 4032 intel_state->active_crtcs = dev_priv->active_crtcs; 4033 } 4034 4035 /* 4036 * If the modeset changes which CRTC's are active, we need to 4037 * recompute the DDB allocation for *all* active pipes, even 4038 * those that weren't otherwise being modified in any way by this 4039 * atomic commit. Due to the shrinking of the per-pipe allocations 4040 * when new active CRTC's are added, it's possible for a pipe that 4041 * we were already using and aren't changing at all here to suddenly 4042 * become invalid if its DDB needs exceeds its new allocation. 4043 * 4044 * Note that if we wind up doing a full DDB recompute, we can't let 4045 * any other display updates race with this transaction, so we need 4046 * to grab the lock on *all* CRTC's. 4047 */ 4048 if (intel_state->active_pipe_changes) { 4049 realloc_pipes = ~0; 4050 intel_state->wm_results.dirty_pipes = ~0; 4051 } 4052 4053 /* 4054 * We're not recomputing for the pipes not included in the commit, so 4055 * make sure we start with the current state. 4056 */ 4057 memcpy(ddb, &dev_priv->wm.skl_hw.ddb, sizeof(*ddb)); 4058 4059 for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) { 4060 struct intel_crtc_state *cstate; 4061 4062 cstate = intel_atomic_get_crtc_state(state, intel_crtc); 4063 if (IS_ERR(cstate)) 4064 return PTR_ERR(cstate); 4065 4066 ret = skl_allocate_pipe_ddb(cstate, ddb); 4067 if (ret) 4068 return ret; 4069 4070 ret = skl_ddb_add_affected_planes(cstate); 4071 if (ret) 4072 return ret; 4073 } 4074 4075 return 0; 4076 } 4077 4078 static void 4079 skl_copy_wm_for_pipe(struct skl_wm_values *dst, 4080 struct skl_wm_values *src, 4081 enum i915_pipe pipe) 4082 { 4083 memcpy(dst->ddb.y_plane[pipe], src->ddb.y_plane[pipe], 4084 sizeof(dst->ddb.y_plane[pipe])); 4085 memcpy(dst->ddb.plane[pipe], src->ddb.plane[pipe], 4086 sizeof(dst->ddb.plane[pipe])); 4087 } 4088 4089 static void 4090 skl_print_wm_changes(struct drm_atomic_state *state) 4091 { 4092 struct drm_device *dev = state->dev; 4093 const struct drm_i915_private *dev_priv = to_i915(dev); 4094 const struct intel_atomic_state *intel_state = 4095 to_intel_atomic_state(state); 4096 struct drm_crtc *crtc; 4097 const struct drm_crtc_state *cstate; 4098 struct drm_plane *plane; 4099 const struct intel_plane *intel_plane; 4100 const struct drm_plane_state *pstate; 4101 const struct skl_ddb_allocation *old_ddb = &dev_priv->wm.skl_hw.ddb; 4102 const struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb; 4103 enum i915_pipe pipe; 4104 int id; 4105 int i, j; 4106 4107 for_each_crtc_in_state(state, crtc, cstate, i) { 4108 pipe = to_intel_crtc(crtc)->pipe; 4109 4110 for_each_plane_in_state(state, plane, pstate, j) { 4111 const struct skl_ddb_entry *old, *new; 4112 4113 intel_plane = to_intel_plane(plane); 4114 id = skl_wm_plane_id(intel_plane); 4115 old = &old_ddb->plane[pipe][id]; 4116 new = &new_ddb->plane[pipe][id]; 4117 4118 if (intel_plane->pipe != pipe) 4119 continue; 4120 4121 if (skl_ddb_entry_equal(old, new)) 4122 continue; 4123 4124 if (id != PLANE_CURSOR) { 4125 DRM_DEBUG_ATOMIC("[PLANE:%d:plane %d%c] ddb (%d - %d) -> (%d - %d)\n", 4126 plane->base.id, id + 1, 4127 pipe_name(pipe), 4128 old->start, old->end, 4129 new->start, new->end); 4130 } else { 4131 DRM_DEBUG_ATOMIC("[PLANE:%d:cursor %c] ddb (%d - %d) -> (%d - %d)\n", 4132 plane->base.id, 4133 pipe_name(pipe), 4134 old->start, old->end, 4135 new->start, new->end); 4136 } 4137 } 4138 } 4139 } 4140 4141 static int 4142 skl_compute_wm(struct drm_atomic_state *state) 4143 { 4144 struct drm_crtc *crtc; 4145 struct drm_crtc_state *cstate; 4146 struct intel_atomic_state *intel_state = to_intel_atomic_state(state); 4147 struct skl_wm_values *results = &intel_state->wm_results; 4148 struct skl_pipe_wm *pipe_wm; 4149 bool changed = false; 4150 int ret, i; 4151 4152 /* 4153 * If this transaction isn't actually touching any CRTC's, don't 4154 * bother with watermark calculation. Note that if we pass this 4155 * test, we're guaranteed to hold at least one CRTC state mutex, 4156 * which means we can safely use values like dev_priv->active_crtcs 4157 * since any racing commits that want to update them would need to 4158 * hold _all_ CRTC state mutexes. 4159 */ 4160 for_each_crtc_in_state(state, crtc, cstate, i) 4161 changed = true; 4162 if (!changed) 4163 return 0; 4164 4165 /* Clear all dirty flags */ 4166 results->dirty_pipes = 0; 4167 4168 ret = skl_compute_ddb(state); 4169 if (ret) 4170 return ret; 4171 4172 /* 4173 * Calculate WM's for all pipes that are part of this transaction. 4174 * Note that the DDB allocation above may have added more CRTC's that 4175 * weren't otherwise being modified (and set bits in dirty_pipes) if 4176 * pipe allocations had to change. 4177 * 4178 * FIXME: Now that we're doing this in the atomic check phase, we 4179 * should allow skl_update_pipe_wm() to return failure in cases where 4180 * no suitable watermark values can be found. 4181 */ 4182 for_each_crtc_in_state(state, crtc, cstate, i) { 4183 struct intel_crtc_state *intel_cstate = 4184 to_intel_crtc_state(cstate); 4185 4186 pipe_wm = &intel_cstate->wm.skl.optimal; 4187 ret = skl_update_pipe_wm(cstate, &results->ddb, pipe_wm, 4188 &changed); 4189 if (ret) 4190 return ret; 4191 4192 if (changed) 4193 results->dirty_pipes |= drm_crtc_mask(crtc); 4194 4195 if ((results->dirty_pipes & drm_crtc_mask(crtc)) == 0) 4196 /* This pipe's WM's did not change */ 4197 continue; 4198 4199 intel_cstate->update_wm_pre = true; 4200 } 4201 4202 skl_print_wm_changes(state); 4203 4204 return 0; 4205 } 4206 4207 static void skl_update_wm(struct drm_crtc *crtc) 4208 { 4209 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 4210 struct drm_device *dev = crtc->dev; 4211 struct drm_i915_private *dev_priv = to_i915(dev); 4212 struct skl_wm_values *results = &dev_priv->wm.skl_results; 4213 struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw; 4214 struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state); 4215 struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal; 4216 enum i915_pipe pipe = intel_crtc->pipe; 4217 4218 if ((results->dirty_pipes & drm_crtc_mask(crtc)) == 0) 4219 return; 4220 4221 intel_crtc->wm.active.skl = *pipe_wm; 4222 4223 mutex_lock(&dev_priv->wm.wm_mutex); 4224 4225 /* 4226 * If this pipe isn't active already, we're going to be enabling it 4227 * very soon. Since it's safe to update a pipe's ddb allocation while 4228 * the pipe's shut off, just do so here. Already active pipes will have 4229 * their watermarks updated once we update their planes. 4230 */ 4231 if (crtc->state->active_changed) { 4232 int plane; 4233 4234 for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) 4235 skl_write_plane_wm(intel_crtc, &pipe_wm->planes[plane], 4236 &results->ddb, plane); 4237 4238 skl_write_cursor_wm(intel_crtc, &pipe_wm->planes[PLANE_CURSOR], 4239 &results->ddb); 4240 } 4241 4242 skl_copy_wm_for_pipe(hw_vals, results, pipe); 4243 4244 intel_crtc->hw_ddb = cstate->wm.skl.ddb; 4245 4246 mutex_unlock(&dev_priv->wm.wm_mutex); 4247 } 4248 4249 static void ilk_compute_wm_config(struct drm_device *dev, 4250 struct intel_wm_config *config) 4251 { 4252 struct intel_crtc *crtc; 4253 4254 /* Compute the currently _active_ config */ 4255 for_each_intel_crtc(dev, crtc) { 4256 const struct intel_pipe_wm *wm = &crtc->wm.active.ilk; 4257 4258 if (!wm->pipe_enabled) 4259 continue; 4260 4261 config->sprites_enabled |= wm->sprites_enabled; 4262 config->sprites_scaled |= wm->sprites_scaled; 4263 config->num_pipes_active++; 4264 } 4265 } 4266 4267 static void ilk_program_watermarks(struct drm_i915_private *dev_priv) 4268 { 4269 struct drm_device *dev = &dev_priv->drm; 4270 struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm; 4271 struct ilk_wm_maximums max; 4272 struct intel_wm_config config = {}; 4273 struct ilk_wm_values results = {}; 4274 enum intel_ddb_partitioning partitioning; 4275 4276 ilk_compute_wm_config(dev, &config); 4277 4278 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max); 4279 ilk_wm_merge(dev, &config, &max, &lp_wm_1_2); 4280 4281 /* 5/6 split only in single pipe config on IVB+ */ 4282 if (INTEL_INFO(dev)->gen >= 7 && 4283 config.num_pipes_active == 1 && config.sprites_enabled) { 4284 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max); 4285 ilk_wm_merge(dev, &config, &max, &lp_wm_5_6); 4286 4287 best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6); 4288 } else { 4289 best_lp_wm = &lp_wm_1_2; 4290 } 4291 4292 partitioning = (best_lp_wm == &lp_wm_1_2) ? 4293 INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6; 4294 4295 ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results); 4296 4297 ilk_write_wm_values(dev_priv, &results); 4298 } 4299 4300 static void ilk_initial_watermarks(struct intel_crtc_state *cstate) 4301 { 4302 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); 4303 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 4304 4305 mutex_lock(&dev_priv->wm.wm_mutex); 4306 intel_crtc->wm.active.ilk = cstate->wm.ilk.intermediate; 4307 ilk_program_watermarks(dev_priv); 4308 mutex_unlock(&dev_priv->wm.wm_mutex); 4309 } 4310 4311 static void ilk_optimize_watermarks(struct intel_crtc_state *cstate) 4312 { 4313 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); 4314 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 4315 4316 mutex_lock(&dev_priv->wm.wm_mutex); 4317 if (cstate->wm.need_postvbl_update) { 4318 intel_crtc->wm.active.ilk = cstate->wm.ilk.optimal; 4319 ilk_program_watermarks(dev_priv); 4320 } 4321 mutex_unlock(&dev_priv->wm.wm_mutex); 4322 } 4323 4324 static inline void skl_wm_level_from_reg_val(uint32_t val, 4325 struct skl_wm_level *level) 4326 { 4327 level->plane_en = val & PLANE_WM_EN; 4328 level->plane_res_b = val & PLANE_WM_BLOCKS_MASK; 4329 level->plane_res_l = (val >> PLANE_WM_LINES_SHIFT) & 4330 PLANE_WM_LINES_MASK; 4331 } 4332 4333 void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc, 4334 struct skl_pipe_wm *out) 4335 { 4336 struct drm_device *dev = crtc->dev; 4337 struct drm_i915_private *dev_priv = to_i915(dev); 4338 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 4339 struct intel_plane *intel_plane; 4340 struct skl_plane_wm *wm; 4341 enum i915_pipe pipe = intel_crtc->pipe; 4342 int level, id, max_level; 4343 uint32_t val; 4344 4345 max_level = ilk_wm_max_level(dev_priv); 4346 4347 for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { 4348 id = skl_wm_plane_id(intel_plane); 4349 wm = &out->planes[id]; 4350 4351 for (level = 0; level <= max_level; level++) { 4352 if (id != PLANE_CURSOR) 4353 val = I915_READ(PLANE_WM(pipe, id, level)); 4354 else 4355 val = I915_READ(CUR_WM(pipe, level)); 4356 4357 skl_wm_level_from_reg_val(val, &wm->wm[level]); 4358 } 4359 4360 if (id != PLANE_CURSOR) 4361 val = I915_READ(PLANE_WM_TRANS(pipe, id)); 4362 else 4363 val = I915_READ(CUR_WM_TRANS(pipe)); 4364 4365 skl_wm_level_from_reg_val(val, &wm->trans_wm); 4366 } 4367 4368 if (!intel_crtc->active) 4369 return; 4370 4371 out->linetime = I915_READ(PIPE_WM_LINETIME(pipe)); 4372 } 4373 4374 void skl_wm_get_hw_state(struct drm_device *dev) 4375 { 4376 struct drm_i915_private *dev_priv = to_i915(dev); 4377 struct skl_wm_values *hw = &dev_priv->wm.skl_hw; 4378 struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb; 4379 struct drm_crtc *crtc; 4380 struct intel_crtc *intel_crtc; 4381 struct intel_crtc_state *cstate; 4382 4383 skl_ddb_get_hw_state(dev_priv, ddb); 4384 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { 4385 intel_crtc = to_intel_crtc(crtc); 4386 cstate = to_intel_crtc_state(crtc->state); 4387 4388 skl_pipe_wm_get_hw_state(crtc, &cstate->wm.skl.optimal); 4389 4390 if (intel_crtc->active) { 4391 hw->dirty_pipes |= drm_crtc_mask(crtc); 4392 intel_crtc->wm.active.skl = cstate->wm.skl.optimal; 4393 } 4394 } 4395 4396 if (dev_priv->active_crtcs) { 4397 /* Fully recompute DDB on first atomic commit */ 4398 dev_priv->wm.distrust_bios_wm = true; 4399 } else { 4400 /* Easy/common case; just sanitize DDB now if everything off */ 4401 memset(ddb, 0, sizeof(*ddb)); 4402 } 4403 } 4404 4405 static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc) 4406 { 4407 struct drm_device *dev = crtc->dev; 4408 struct drm_i915_private *dev_priv = to_i915(dev); 4409 struct ilk_wm_values *hw = &dev_priv->wm.hw; 4410 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 4411 struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state); 4412 struct intel_pipe_wm *active = &cstate->wm.ilk.optimal; 4413 enum i915_pipe pipe = intel_crtc->pipe; 4414 static const i915_reg_t wm0_pipe_reg[] = { 4415 [PIPE_A] = WM0_PIPEA_ILK, 4416 [PIPE_B] = WM0_PIPEB_ILK, 4417 [PIPE_C] = WM0_PIPEC_IVB, 4418 }; 4419 4420 hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]); 4421 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 4422 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe)); 4423 4424 memset(active, 0, sizeof(*active)); 4425 4426 active->pipe_enabled = intel_crtc->active; 4427 4428 if (active->pipe_enabled) { 4429 u32 tmp = hw->wm_pipe[pipe]; 4430 4431 /* 4432 * For active pipes LP0 watermark is marked as 4433 * enabled, and LP1+ watermaks as disabled since 4434 * we can't really reverse compute them in case 4435 * multiple pipes are active. 4436 */ 4437 active->wm[0].enable = true; 4438 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT; 4439 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT; 4440 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK; 4441 active->linetime = hw->wm_linetime[pipe]; 4442 } else { 4443 int level, max_level = ilk_wm_max_level(dev_priv); 4444 4445 /* 4446 * For inactive pipes, all watermark levels 4447 * should be marked as enabled but zeroed, 4448 * which is what we'd compute them to. 4449 */ 4450 for (level = 0; level <= max_level; level++) 4451 active->wm[level].enable = true; 4452 } 4453 4454 intel_crtc->wm.active.ilk = *active; 4455 } 4456 4457 #define _FW_WM(value, plane) \ 4458 (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT) 4459 #define _FW_WM_VLV(value, plane) \ 4460 (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT) 4461 4462 static void vlv_read_wm_values(struct drm_i915_private *dev_priv, 4463 struct vlv_wm_values *wm) 4464 { 4465 enum i915_pipe pipe; 4466 uint32_t tmp; 4467 4468 for_each_pipe(dev_priv, pipe) { 4469 tmp = I915_READ(VLV_DDL(pipe)); 4470 4471 wm->ddl[pipe].primary = 4472 (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK); 4473 wm->ddl[pipe].cursor = 4474 (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK); 4475 wm->ddl[pipe].sprite[0] = 4476 (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK); 4477 wm->ddl[pipe].sprite[1] = 4478 (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK); 4479 } 4480 4481 tmp = I915_READ(DSPFW1); 4482 wm->sr.plane = _FW_WM(tmp, SR); 4483 wm->pipe[PIPE_B].cursor = _FW_WM(tmp, CURSORB); 4484 wm->pipe[PIPE_B].primary = _FW_WM_VLV(tmp, PLANEB); 4485 wm->pipe[PIPE_A].primary = _FW_WM_VLV(tmp, PLANEA); 4486 4487 tmp = I915_READ(DSPFW2); 4488 wm->pipe[PIPE_A].sprite[1] = _FW_WM_VLV(tmp, SPRITEB); 4489 wm->pipe[PIPE_A].cursor = _FW_WM(tmp, CURSORA); 4490 wm->pipe[PIPE_A].sprite[0] = _FW_WM_VLV(tmp, SPRITEA); 4491 4492 tmp = I915_READ(DSPFW3); 4493 wm->sr.cursor = _FW_WM(tmp, CURSOR_SR); 4494 4495 if (IS_CHERRYVIEW(dev_priv)) { 4496 tmp = I915_READ(DSPFW7_CHV); 4497 wm->pipe[PIPE_B].sprite[1] = _FW_WM_VLV(tmp, SPRITED); 4498 wm->pipe[PIPE_B].sprite[0] = _FW_WM_VLV(tmp, SPRITEC); 4499 4500 tmp = I915_READ(DSPFW8_CHV); 4501 wm->pipe[PIPE_C].sprite[1] = _FW_WM_VLV(tmp, SPRITEF); 4502 wm->pipe[PIPE_C].sprite[0] = _FW_WM_VLV(tmp, SPRITEE); 4503 4504 tmp = I915_READ(DSPFW9_CHV); 4505 wm->pipe[PIPE_C].primary = _FW_WM_VLV(tmp, PLANEC); 4506 wm->pipe[PIPE_C].cursor = _FW_WM(tmp, CURSORC); 4507 4508 tmp = I915_READ(DSPHOWM); 4509 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9; 4510 wm->pipe[PIPE_C].sprite[1] |= _FW_WM(tmp, SPRITEF_HI) << 8; 4511 wm->pipe[PIPE_C].sprite[0] |= _FW_WM(tmp, SPRITEE_HI) << 8; 4512 wm->pipe[PIPE_C].primary |= _FW_WM(tmp, PLANEC_HI) << 8; 4513 wm->pipe[PIPE_B].sprite[1] |= _FW_WM(tmp, SPRITED_HI) << 8; 4514 wm->pipe[PIPE_B].sprite[0] |= _FW_WM(tmp, SPRITEC_HI) << 8; 4515 wm->pipe[PIPE_B].primary |= _FW_WM(tmp, PLANEB_HI) << 8; 4516 wm->pipe[PIPE_A].sprite[1] |= _FW_WM(tmp, SPRITEB_HI) << 8; 4517 wm->pipe[PIPE_A].sprite[0] |= _FW_WM(tmp, SPRITEA_HI) << 8; 4518 wm->pipe[PIPE_A].primary |= _FW_WM(tmp, PLANEA_HI) << 8; 4519 } else { 4520 tmp = I915_READ(DSPFW7); 4521 wm->pipe[PIPE_B].sprite[1] = _FW_WM_VLV(tmp, SPRITED); 4522 wm->pipe[PIPE_B].sprite[0] = _FW_WM_VLV(tmp, SPRITEC); 4523 4524 tmp = I915_READ(DSPHOWM); 4525 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9; 4526 wm->pipe[PIPE_B].sprite[1] |= _FW_WM(tmp, SPRITED_HI) << 8; 4527 wm->pipe[PIPE_B].sprite[0] |= _FW_WM(tmp, SPRITEC_HI) << 8; 4528 wm->pipe[PIPE_B].primary |= _FW_WM(tmp, PLANEB_HI) << 8; 4529 wm->pipe[PIPE_A].sprite[1] |= _FW_WM(tmp, SPRITEB_HI) << 8; 4530 wm->pipe[PIPE_A].sprite[0] |= _FW_WM(tmp, SPRITEA_HI) << 8; 4531 wm->pipe[PIPE_A].primary |= _FW_WM(tmp, PLANEA_HI) << 8; 4532 } 4533 } 4534 4535 #undef _FW_WM 4536 #undef _FW_WM_VLV 4537 4538 void vlv_wm_get_hw_state(struct drm_device *dev) 4539 { 4540 struct drm_i915_private *dev_priv = to_i915(dev); 4541 struct vlv_wm_values *wm = &dev_priv->wm.vlv; 4542 struct intel_plane *plane; 4543 enum i915_pipe pipe; 4544 u32 val; 4545 4546 vlv_read_wm_values(dev_priv, wm); 4547 4548 for_each_intel_plane(dev, plane) { 4549 switch (plane->base.type) { 4550 int sprite; 4551 case DRM_PLANE_TYPE_CURSOR: 4552 plane->wm.fifo_size = 63; 4553 break; 4554 case DRM_PLANE_TYPE_PRIMARY: 4555 plane->wm.fifo_size = vlv_get_fifo_size(dev, plane->pipe, 0); 4556 break; 4557 case DRM_PLANE_TYPE_OVERLAY: 4558 sprite = plane->plane; 4559 plane->wm.fifo_size = vlv_get_fifo_size(dev, plane->pipe, sprite + 1); 4560 break; 4561 } 4562 } 4563 4564 wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN; 4565 wm->level = VLV_WM_LEVEL_PM2; 4566 4567 if (IS_CHERRYVIEW(dev_priv)) { 4568 mutex_lock(&dev_priv->rps.hw_lock); 4569 4570 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); 4571 if (val & DSP_MAXFIFO_PM5_ENABLE) 4572 wm->level = VLV_WM_LEVEL_PM5; 4573 4574 /* 4575 * If DDR DVFS is disabled in the BIOS, Punit 4576 * will never ack the request. So if that happens 4577 * assume we don't have to enable/disable DDR DVFS 4578 * dynamically. To test that just set the REQ_ACK 4579 * bit to poke the Punit, but don't change the 4580 * HIGH/LOW bits so that we don't actually change 4581 * the current state. 4582 */ 4583 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); 4584 val |= FORCE_DDR_FREQ_REQ_ACK; 4585 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val); 4586 4587 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) & 4588 FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) { 4589 DRM_DEBUG_KMS("Punit not acking DDR DVFS request, " 4590 "assuming DDR DVFS is disabled\n"); 4591 dev_priv->wm.max_level = VLV_WM_LEVEL_PM5; 4592 } else { 4593 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); 4594 if ((val & FORCE_DDR_HIGH_FREQ) == 0) 4595 wm->level = VLV_WM_LEVEL_DDR_DVFS; 4596 } 4597 4598 mutex_unlock(&dev_priv->rps.hw_lock); 4599 } 4600 4601 for_each_pipe(dev_priv, pipe) 4602 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n", 4603 pipe_name(pipe), wm->pipe[pipe].primary, wm->pipe[pipe].cursor, 4604 wm->pipe[pipe].sprite[0], wm->pipe[pipe].sprite[1]); 4605 4606 DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n", 4607 wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr); 4608 } 4609 4610 void ilk_wm_get_hw_state(struct drm_device *dev) 4611 { 4612 struct drm_i915_private *dev_priv = to_i915(dev); 4613 struct ilk_wm_values *hw = &dev_priv->wm.hw; 4614 struct drm_crtc *crtc; 4615 4616 for_each_crtc(dev, crtc) 4617 ilk_pipe_wm_get_hw_state(crtc); 4618 4619 hw->wm_lp[0] = I915_READ(WM1_LP_ILK); 4620 hw->wm_lp[1] = I915_READ(WM2_LP_ILK); 4621 hw->wm_lp[2] = I915_READ(WM3_LP_ILK); 4622 4623 hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK); 4624 if (INTEL_INFO(dev)->gen >= 7) { 4625 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB); 4626 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB); 4627 } 4628 4629 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 4630 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ? 4631 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2; 4632 else if (IS_IVYBRIDGE(dev_priv)) 4633 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ? 4634 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2; 4635 4636 hw->enable_fbc_wm = 4637 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS); 4638 } 4639 4640 /** 4641 * intel_update_watermarks - update FIFO watermark values based on current modes 4642 * 4643 * Calculate watermark values for the various WM regs based on current mode 4644 * and plane configuration. 4645 * 4646 * There are several cases to deal with here: 4647 * - normal (i.e. non-self-refresh) 4648 * - self-refresh (SR) mode 4649 * - lines are large relative to FIFO size (buffer can hold up to 2) 4650 * - lines are small relative to FIFO size (buffer can hold more than 2 4651 * lines), so need to account for TLB latency 4652 * 4653 * The normal calculation is: 4654 * watermark = dotclock * bytes per pixel * latency 4655 * where latency is platform & configuration dependent (we assume pessimal 4656 * values here). 4657 * 4658 * The SR calculation is: 4659 * watermark = (trunc(latency/line time)+1) * surface width * 4660 * bytes per pixel 4661 * where 4662 * line time = htotal / dotclock 4663 * surface width = hdisplay for normal plane and 64 for cursor 4664 * and latency is assumed to be high, as above. 4665 * 4666 * The final value programmed to the register should always be rounded up, 4667 * and include an extra 2 entries to account for clock crossings. 4668 * 4669 * We don't use the sprite, so we can ignore that. And on Crestline we have 4670 * to set the non-SR watermarks to 8. 4671 */ 4672 void intel_update_watermarks(struct drm_crtc *crtc) 4673 { 4674 struct drm_i915_private *dev_priv = to_i915(crtc->dev); 4675 4676 if (dev_priv->display.update_wm) 4677 dev_priv->display.update_wm(crtc); 4678 } 4679 4680 /* 4681 * Lock protecting IPS related data structures 4682 */ 4683 DEFINE_SPINLOCK(mchdev_lock); 4684 4685 /* Global for IPS driver to get at the current i915 device. Protected by 4686 * mchdev_lock. */ 4687 static struct drm_i915_private *i915_mch_dev; 4688 4689 bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val) 4690 { 4691 u16 rgvswctl; 4692 4693 assert_spin_locked(&mchdev_lock); 4694 4695 rgvswctl = I915_READ16(MEMSWCTL); 4696 if (rgvswctl & MEMCTL_CMD_STS) { 4697 DRM_DEBUG("gpu busy, RCS change rejected\n"); 4698 return false; /* still busy with another command */ 4699 } 4700 4701 rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | 4702 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM; 4703 I915_WRITE16(MEMSWCTL, rgvswctl); 4704 POSTING_READ16(MEMSWCTL); 4705 4706 rgvswctl |= MEMCTL_CMD_STS; 4707 I915_WRITE16(MEMSWCTL, rgvswctl); 4708 4709 return true; 4710 } 4711 4712 static void ironlake_enable_drps(struct drm_i915_private *dev_priv) 4713 { 4714 u32 rgvmodectl; 4715 u8 fmax, fmin, fstart, vstart; 4716 4717 spin_lock_irq(&mchdev_lock); 4718 4719 rgvmodectl = I915_READ(MEMMODECTL); 4720 4721 /* Enable temp reporting */ 4722 I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN); 4723 I915_WRITE16(TSC1, I915_READ(TSC1) | TSE); 4724 4725 /* 100ms RC evaluation intervals */ 4726 I915_WRITE(RCUPEI, 100000); 4727 I915_WRITE(RCDNEI, 100000); 4728 4729 /* Set max/min thresholds to 90ms and 80ms respectively */ 4730 I915_WRITE(RCBMAXAVG, 90000); 4731 I915_WRITE(RCBMINAVG, 80000); 4732 4733 I915_WRITE(MEMIHYST, 1); 4734 4735 /* Set up min, max, and cur for interrupt handling */ 4736 fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; 4737 fmin = (rgvmodectl & MEMMODE_FMIN_MASK); 4738 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> 4739 MEMMODE_FSTART_SHIFT; 4740 4741 vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >> 4742 PXVFREQ_PX_SHIFT; 4743 4744 dev_priv->ips.fmax = fmax; /* IPS callback will increase this */ 4745 dev_priv->ips.fstart = fstart; 4746 4747 dev_priv->ips.max_delay = fstart; 4748 dev_priv->ips.min_delay = fmin; 4749 dev_priv->ips.cur_delay = fstart; 4750 4751 DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", 4752 fmax, fmin, fstart); 4753 4754 I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); 4755 4756 /* 4757 * Interrupts will be enabled in ironlake_irq_postinstall 4758 */ 4759 4760 I915_WRITE(VIDSTART, vstart); 4761 POSTING_READ(VIDSTART); 4762 4763 rgvmodectl |= MEMMODE_SWMODE_EN; 4764 I915_WRITE(MEMMODECTL, rgvmodectl); 4765 4766 if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10)) 4767 DRM_ERROR("stuck trying to change perf mode\n"); 4768 mdelay(1); 4769 4770 ironlake_set_drps(dev_priv, fstart); 4771 4772 dev_priv->ips.last_count1 = I915_READ(DMIEC) + 4773 I915_READ(DDREC) + I915_READ(CSIEC); 4774 dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies); 4775 dev_priv->ips.last_count2 = I915_READ(GFXEC); 4776 dev_priv->ips.last_time2 = ktime_get_raw_ns(); 4777 4778 spin_unlock_irq(&mchdev_lock); 4779 } 4780 4781 static void ironlake_disable_drps(struct drm_i915_private *dev_priv) 4782 { 4783 u16 rgvswctl; 4784 4785 spin_lock_irq(&mchdev_lock); 4786 4787 rgvswctl = I915_READ16(MEMSWCTL); 4788 4789 /* Ack interrupts, disable EFC interrupt */ 4790 I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN); 4791 I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG); 4792 I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT); 4793 I915_WRITE(DEIIR, DE_PCU_EVENT); 4794 I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT); 4795 4796 /* Go back to the starting frequency */ 4797 ironlake_set_drps(dev_priv, dev_priv->ips.fstart); 4798 mdelay(1); 4799 rgvswctl |= MEMCTL_CMD_STS; 4800 I915_WRITE(MEMSWCTL, rgvswctl); 4801 mdelay(1); 4802 4803 spin_unlock_irq(&mchdev_lock); 4804 } 4805 4806 /* There's a funny hw issue where the hw returns all 0 when reading from 4807 * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value 4808 * ourselves, instead of doing a rmw cycle (which might result in us clearing 4809 * all limits and the gpu stuck at whatever frequency it is at atm). 4810 */ 4811 static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) 4812 { 4813 u32 limits; 4814 4815 /* Only set the down limit when we've reached the lowest level to avoid 4816 * getting more interrupts, otherwise leave this clear. This prevents a 4817 * race in the hw when coming out of rc6: There's a tiny window where 4818 * the hw runs at the minimal clock before selecting the desired 4819 * frequency, if the down threshold expires in that window we will not 4820 * receive a down interrupt. */ 4821 if (IS_GEN9(dev_priv)) { 4822 limits = (dev_priv->rps.max_freq_softlimit) << 23; 4823 if (val <= dev_priv->rps.min_freq_softlimit) 4824 limits |= (dev_priv->rps.min_freq_softlimit) << 14; 4825 } else { 4826 limits = dev_priv->rps.max_freq_softlimit << 24; 4827 if (val <= dev_priv->rps.min_freq_softlimit) 4828 limits |= dev_priv->rps.min_freq_softlimit << 16; 4829 } 4830 4831 return limits; 4832 } 4833 4834 static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) 4835 { 4836 int new_power; 4837 u32 threshold_up = 0, threshold_down = 0; /* in % */ 4838 u32 ei_up = 0, ei_down = 0; 4839 4840 new_power = dev_priv->rps.power; 4841 switch (dev_priv->rps.power) { 4842 case LOW_POWER: 4843 if (val > dev_priv->rps.efficient_freq + 1 && 4844 val > dev_priv->rps.cur_freq) 4845 new_power = BETWEEN; 4846 break; 4847 4848 case BETWEEN: 4849 if (val <= dev_priv->rps.efficient_freq && 4850 val < dev_priv->rps.cur_freq) 4851 new_power = LOW_POWER; 4852 else if (val >= dev_priv->rps.rp0_freq && 4853 val > dev_priv->rps.cur_freq) 4854 new_power = HIGH_POWER; 4855 break; 4856 4857 case HIGH_POWER: 4858 if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && 4859 val < dev_priv->rps.cur_freq) 4860 new_power = BETWEEN; 4861 break; 4862 } 4863 /* Max/min bins are special */ 4864 if (val <= dev_priv->rps.min_freq_softlimit) 4865 new_power = LOW_POWER; 4866 if (val >= dev_priv->rps.max_freq_softlimit) 4867 new_power = HIGH_POWER; 4868 if (new_power == dev_priv->rps.power) 4869 return; 4870 4871 /* Note the units here are not exactly 1us, but 1280ns. */ 4872 switch (new_power) { 4873 case LOW_POWER: 4874 /* Upclock if more than 95% busy over 16ms */ 4875 ei_up = 16000; 4876 threshold_up = 95; 4877 4878 /* Downclock if less than 85% busy over 32ms */ 4879 ei_down = 32000; 4880 threshold_down = 85; 4881 break; 4882 4883 case BETWEEN: 4884 /* Upclock if more than 90% busy over 13ms */ 4885 ei_up = 13000; 4886 threshold_up = 90; 4887 4888 /* Downclock if less than 75% busy over 32ms */ 4889 ei_down = 32000; 4890 threshold_down = 75; 4891 break; 4892 4893 case HIGH_POWER: 4894 /* Upclock if more than 85% busy over 10ms */ 4895 ei_up = 10000; 4896 threshold_up = 85; 4897 4898 /* Downclock if less than 60% busy over 32ms */ 4899 ei_down = 32000; 4900 threshold_down = 60; 4901 break; 4902 } 4903 4904 I915_WRITE(GEN6_RP_UP_EI, 4905 GT_INTERVAL_FROM_US(dev_priv, ei_up)); 4906 I915_WRITE(GEN6_RP_UP_THRESHOLD, 4907 GT_INTERVAL_FROM_US(dev_priv, 4908 ei_up * threshold_up / 100)); 4909 4910 I915_WRITE(GEN6_RP_DOWN_EI, 4911 GT_INTERVAL_FROM_US(dev_priv, ei_down)); 4912 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 4913 GT_INTERVAL_FROM_US(dev_priv, 4914 ei_down * threshold_down / 100)); 4915 4916 I915_WRITE(GEN6_RP_CONTROL, 4917 GEN6_RP_MEDIA_TURBO | 4918 GEN6_RP_MEDIA_HW_NORMAL_MODE | 4919 GEN6_RP_MEDIA_IS_GFX | 4920 GEN6_RP_ENABLE | 4921 GEN6_RP_UP_BUSY_AVG | 4922 GEN6_RP_DOWN_IDLE_AVG); 4923 4924 dev_priv->rps.power = new_power; 4925 dev_priv->rps.up_threshold = threshold_up; 4926 dev_priv->rps.down_threshold = threshold_down; 4927 dev_priv->rps.last_adj = 0; 4928 } 4929 4930 static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) 4931 { 4932 u32 mask = 0; 4933 4934 if (val > dev_priv->rps.min_freq_softlimit) 4935 mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; 4936 if (val < dev_priv->rps.max_freq_softlimit) 4937 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; 4938 4939 mask &= dev_priv->pm_rps_events; 4940 4941 return gen6_sanitize_rps_pm_mask(dev_priv, ~mask); 4942 } 4943 4944 /* gen6_set_rps is called to update the frequency request, but should also be 4945 * called when the range (min_delay and max_delay) is modified so that we can 4946 * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ 4947 static void gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) 4948 { 4949 /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */ 4950 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) 4951 return; 4952 4953 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 4954 WARN_ON(val > dev_priv->rps.max_freq); 4955 WARN_ON(val < dev_priv->rps.min_freq); 4956 4957 /* min/max delay may still have been modified so be sure to 4958 * write the limits value. 4959 */ 4960 if (val != dev_priv->rps.cur_freq) { 4961 gen6_set_rps_thresholds(dev_priv, val); 4962 4963 if (IS_GEN9(dev_priv)) 4964 I915_WRITE(GEN6_RPNSWREQ, 4965 GEN9_FREQUENCY(val)); 4966 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 4967 I915_WRITE(GEN6_RPNSWREQ, 4968 HSW_FREQUENCY(val)); 4969 else 4970 I915_WRITE(GEN6_RPNSWREQ, 4971 GEN6_FREQUENCY(val) | 4972 GEN6_OFFSET(0) | 4973 GEN6_AGGRESSIVE_TURBO); 4974 } 4975 4976 /* Make sure we continue to get interrupts 4977 * until we hit the minimum or maximum frequencies. 4978 */ 4979 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val)); 4980 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); 4981 4982 POSTING_READ(GEN6_RPNSWREQ); 4983 4984 dev_priv->rps.cur_freq = val; 4985 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); 4986 } 4987 4988 static void valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) 4989 { 4990 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 4991 WARN_ON(val > dev_priv->rps.max_freq); 4992 WARN_ON(val < dev_priv->rps.min_freq); 4993 4994 if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1), 4995 "Odd GPU freq value\n")) 4996 val &= ~1; 4997 4998 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); 4999 5000 if (val != dev_priv->rps.cur_freq) { 5001 vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); 5002 if (!IS_CHERRYVIEW(dev_priv)) 5003 gen6_set_rps_thresholds(dev_priv, val); 5004 } 5005 5006 dev_priv->rps.cur_freq = val; 5007 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); 5008 } 5009 5010 /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down 5011 * 5012 * * If Gfx is Idle, then 5013 * 1. Forcewake Media well. 5014 * 2. Request idle freq. 5015 * 3. Release Forcewake of Media well. 5016 */ 5017 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) 5018 { 5019 u32 val = dev_priv->rps.idle_freq; 5020 5021 if (dev_priv->rps.cur_freq <= val) 5022 return; 5023 5024 /* Wake up the media well, as that takes a lot less 5025 * power than the Render well. */ 5026 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA); 5027 valleyview_set_rps(dev_priv, val); 5028 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA); 5029 } 5030 5031 void gen6_rps_busy(struct drm_i915_private *dev_priv) 5032 { 5033 mutex_lock(&dev_priv->rps.hw_lock); 5034 if (dev_priv->rps.enabled) { 5035 if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) 5036 gen6_rps_reset_ei(dev_priv); 5037 I915_WRITE(GEN6_PMINTRMSK, 5038 gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); 5039 5040 gen6_enable_rps_interrupts(dev_priv); 5041 5042 /* Ensure we start at the user's desired frequency */ 5043 intel_set_rps(dev_priv, 5044 clamp(dev_priv->rps.cur_freq, 5045 dev_priv->rps.min_freq_softlimit, 5046 dev_priv->rps.max_freq_softlimit)); 5047 } 5048 mutex_unlock(&dev_priv->rps.hw_lock); 5049 } 5050 5051 void gen6_rps_idle(struct drm_i915_private *dev_priv) 5052 { 5053 /* Flush our bottom-half so that it does not race with us 5054 * setting the idle frequency and so that it is bounded by 5055 * our rpm wakeref. And then disable the interrupts to stop any 5056 * futher RPS reclocking whilst we are asleep. 5057 */ 5058 gen6_disable_rps_interrupts(dev_priv); 5059 5060 mutex_lock(&dev_priv->rps.hw_lock); 5061 if (dev_priv->rps.enabled) { 5062 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) 5063 vlv_set_rps_idle(dev_priv); 5064 else 5065 gen6_set_rps(dev_priv, dev_priv->rps.idle_freq); 5066 dev_priv->rps.last_adj = 0; 5067 I915_WRITE(GEN6_PMINTRMSK, 5068 gen6_sanitize_rps_pm_mask(dev_priv, ~0)); 5069 } 5070 mutex_unlock(&dev_priv->rps.hw_lock); 5071 5072 lockmgr(&dev_priv->rps.client_lock, LK_EXCLUSIVE); 5073 while (!list_empty(&dev_priv->rps.clients)) 5074 list_del_init(dev_priv->rps.clients.next); 5075 lockmgr(&dev_priv->rps.client_lock, LK_RELEASE); 5076 } 5077 5078 void gen6_rps_boost(struct drm_i915_private *dev_priv, 5079 struct intel_rps_client *rps, 5080 unsigned long submitted) 5081 { 5082 /* This is intentionally racy! We peek at the state here, then 5083 * validate inside the RPS worker. 5084 */ 5085 if (!(dev_priv->gt.awake && 5086 dev_priv->rps.enabled && 5087 dev_priv->rps.cur_freq < dev_priv->rps.boost_freq)) 5088 return; 5089 5090 /* Force a RPS boost (and don't count it against the client) if 5091 * the GPU is severely congested. 5092 */ 5093 if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES)) 5094 rps = NULL; 5095 5096 lockmgr(&dev_priv->rps.client_lock, LK_EXCLUSIVE); 5097 if (rps == NULL || list_empty(&rps->link)) { 5098 spin_lock_irq(&dev_priv->irq_lock); 5099 if (dev_priv->rps.interrupts_enabled) { 5100 dev_priv->rps.client_boost = true; 5101 schedule_work(&dev_priv->rps.work); 5102 } 5103 spin_unlock_irq(&dev_priv->irq_lock); 5104 5105 if (rps != NULL) { 5106 list_add(&rps->link, &dev_priv->rps.clients); 5107 rps->boosts++; 5108 } else 5109 dev_priv->rps.boosts++; 5110 } 5111 lockmgr(&dev_priv->rps.client_lock, LK_RELEASE); 5112 } 5113 5114 void intel_set_rps(struct drm_i915_private *dev_priv, u8 val) 5115 { 5116 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) 5117 valleyview_set_rps(dev_priv, val); 5118 else 5119 gen6_set_rps(dev_priv, val); 5120 } 5121 5122 static void gen9_disable_rc6(struct drm_i915_private *dev_priv) 5123 { 5124 I915_WRITE(GEN6_RC_CONTROL, 0); 5125 I915_WRITE(GEN9_PG_ENABLE, 0); 5126 } 5127 5128 static void gen9_disable_rps(struct drm_i915_private *dev_priv) 5129 { 5130 I915_WRITE(GEN6_RP_CONTROL, 0); 5131 } 5132 5133 static void gen6_disable_rps(struct drm_i915_private *dev_priv) 5134 { 5135 I915_WRITE(GEN6_RC_CONTROL, 0); 5136 I915_WRITE(GEN6_RPNSWREQ, 1 << 31); 5137 I915_WRITE(GEN6_RP_CONTROL, 0); 5138 } 5139 5140 static void cherryview_disable_rps(struct drm_i915_private *dev_priv) 5141 { 5142 I915_WRITE(GEN6_RC_CONTROL, 0); 5143 } 5144 5145 static void valleyview_disable_rps(struct drm_i915_private *dev_priv) 5146 { 5147 /* we're doing forcewake before Disabling RC6, 5148 * This what the BIOS expects when going into suspend */ 5149 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5150 5151 I915_WRITE(GEN6_RC_CONTROL, 0); 5152 5153 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5154 } 5155 5156 static void intel_print_rc6_info(struct drm_i915_private *dev_priv, u32 mode) 5157 { 5158 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { 5159 if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1))) 5160 mode = GEN6_RC_CTL_RC6_ENABLE; 5161 else 5162 mode = 0; 5163 } 5164 if (HAS_RC6p(dev_priv)) 5165 DRM_DEBUG_DRIVER("Enabling RC6 states: " 5166 "RC6 %s RC6p %s RC6pp %s\n", 5167 onoff(mode & GEN6_RC_CTL_RC6_ENABLE), 5168 onoff(mode & GEN6_RC_CTL_RC6p_ENABLE), 5169 onoff(mode & GEN6_RC_CTL_RC6pp_ENABLE)); 5170 5171 else 5172 DRM_DEBUG_DRIVER("Enabling RC6 states: RC6 %s\n", 5173 onoff(mode & GEN6_RC_CTL_RC6_ENABLE)); 5174 } 5175 5176 static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv) 5177 { 5178 struct i915_ggtt *ggtt = &dev_priv->ggtt; 5179 bool enable_rc6 = true; 5180 unsigned long rc6_ctx_base; 5181 u32 rc_ctl; 5182 int rc_sw_target; 5183 5184 rc_ctl = I915_READ(GEN6_RC_CONTROL); 5185 rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >> 5186 RC_SW_TARGET_STATE_SHIFT; 5187 DRM_DEBUG_DRIVER("BIOS enabled RC states: " 5188 "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n", 5189 onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE), 5190 onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE), 5191 rc_sw_target); 5192 5193 if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) { 5194 DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n"); 5195 enable_rc6 = false; 5196 } 5197 5198 /* 5199 * The exact context size is not known for BXT, so assume a page size 5200 * for this check. 5201 */ 5202 rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK; 5203 if (!((rc6_ctx_base >= ggtt->stolen_reserved_base) && 5204 (rc6_ctx_base + PAGE_SIZE <= ggtt->stolen_reserved_base + 5205 ggtt->stolen_reserved_size))) { 5206 DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n"); 5207 enable_rc6 = false; 5208 } 5209 5210 if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) && 5211 ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) && 5212 ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) && 5213 ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) { 5214 DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n"); 5215 enable_rc6 = false; 5216 } 5217 5218 if (!I915_READ(GEN8_PUSHBUS_CONTROL) || 5219 !I915_READ(GEN8_PUSHBUS_ENABLE) || 5220 !I915_READ(GEN8_PUSHBUS_SHIFT)) { 5221 DRM_DEBUG_DRIVER("Pushbus not setup properly.\n"); 5222 enable_rc6 = false; 5223 } 5224 5225 if (!I915_READ(GEN6_GFXPAUSE)) { 5226 DRM_DEBUG_DRIVER("GFX pause not setup properly.\n"); 5227 enable_rc6 = false; 5228 } 5229 5230 if (!I915_READ(GEN8_MISC_CTRL0)) { 5231 DRM_DEBUG_DRIVER("GPM control not setup properly.\n"); 5232 enable_rc6 = false; 5233 } 5234 5235 return enable_rc6; 5236 } 5237 5238 int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6) 5239 { 5240 /* No RC6 before Ironlake and code is gone for ilk. */ 5241 if (INTEL_INFO(dev_priv)->gen < 6) 5242 return 0; 5243 5244 if (!enable_rc6) 5245 return 0; 5246 5247 if (IS_BROXTON(dev_priv) && !bxt_check_bios_rc6_setup(dev_priv)) { 5248 DRM_INFO("RC6 disabled by BIOS\n"); 5249 return 0; 5250 } 5251 5252 /* Respect the kernel parameter if it is set */ 5253 if (enable_rc6 >= 0) { 5254 int mask; 5255 5256 if (HAS_RC6p(dev_priv)) 5257 mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE | 5258 INTEL_RC6pp_ENABLE; 5259 else 5260 mask = INTEL_RC6_ENABLE; 5261 5262 if ((enable_rc6 & mask) != enable_rc6) 5263 DRM_DEBUG_DRIVER("Adjusting RC6 mask to %d " 5264 "(requested %d, valid %d)\n", 5265 enable_rc6 & mask, enable_rc6, mask); 5266 5267 return enable_rc6 & mask; 5268 } 5269 5270 if (IS_IVYBRIDGE(dev_priv)) 5271 return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE); 5272 5273 return INTEL_RC6_ENABLE; 5274 } 5275 5276 static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) 5277 { 5278 /* All of these values are in units of 50MHz */ 5279 5280 /* static values from HW: RP0 > RP1 > RPn (min_freq) */ 5281 if (IS_BROXTON(dev_priv)) { 5282 u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP); 5283 dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff; 5284 dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; 5285 dev_priv->rps.min_freq = (rp_state_cap >> 0) & 0xff; 5286 } else { 5287 u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); 5288 dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff; 5289 dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; 5290 dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff; 5291 } 5292 /* hw_max = RP0 until we check for overclocking */ 5293 dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; 5294 5295 dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; 5296 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || 5297 IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { 5298 u32 ddcc_status = 0; 5299 5300 if (sandybridge_pcode_read(dev_priv, 5301 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, 5302 &ddcc_status) == 0) 5303 dev_priv->rps.efficient_freq = 5304 clamp_t(u8, 5305 ((ddcc_status >> 8) & 0xff), 5306 dev_priv->rps.min_freq, 5307 dev_priv->rps.max_freq); 5308 } 5309 5310 if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { 5311 /* Store the frequency values in 16.66 MHZ units, which is 5312 * the natural hardware unit for SKL 5313 */ 5314 dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER; 5315 dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER; 5316 dev_priv->rps.min_freq *= GEN9_FREQ_SCALER; 5317 dev_priv->rps.max_freq *= GEN9_FREQ_SCALER; 5318 dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER; 5319 } 5320 } 5321 5322 static void reset_rps(struct drm_i915_private *dev_priv, 5323 void (*set)(struct drm_i915_private *, u8)) 5324 { 5325 u8 freq = dev_priv->rps.cur_freq; 5326 5327 /* force a reset */ 5328 dev_priv->rps.power = -1; 5329 dev_priv->rps.cur_freq = -1; 5330 5331 set(dev_priv, freq); 5332 } 5333 5334 /* See the Gen9_GT_PM_Programming_Guide doc for the below */ 5335 static void gen9_enable_rps(struct drm_i915_private *dev_priv) 5336 { 5337 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5338 5339 /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */ 5340 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { 5341 /* 5342 * BIOS could leave the Hw Turbo enabled, so need to explicitly 5343 * clear out the Control register just to avoid inconsitency 5344 * with debugfs interface, which will show Turbo as enabled 5345 * only and that is not expected by the User after adding the 5346 * WaGsvDisableTurbo. Apart from this there is no problem even 5347 * if the Turbo is left enabled in the Control register, as the 5348 * Up/Down interrupts would remain masked. 5349 */ 5350 gen9_disable_rps(dev_priv); 5351 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5352 return; 5353 } 5354 5355 /* Program defaults and thresholds for RPS*/ 5356 I915_WRITE(GEN6_RC_VIDEO_FREQ, 5357 GEN9_FREQUENCY(dev_priv->rps.rp1_freq)); 5358 5359 /* 1 second timeout*/ 5360 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 5361 GT_INTERVAL_FROM_US(dev_priv, 1000000)); 5362 5363 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa); 5364 5365 /* Leaning on the below call to gen6_set_rps to program/setup the 5366 * Up/Down EI & threshold registers, as well as the RP_CONTROL, 5367 * RP_INTERRUPT_LIMITS & RPNSWREQ registers */ 5368 reset_rps(dev_priv, gen6_set_rps); 5369 5370 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5371 } 5372 5373 static void gen9_enable_rc6(struct drm_i915_private *dev_priv) 5374 { 5375 struct intel_engine_cs *engine; 5376 enum intel_engine_id id; 5377 uint32_t rc6_mask = 0; 5378 5379 /* 1a: Software RC state - RC0 */ 5380 I915_WRITE(GEN6_RC_STATE, 0); 5381 5382 /* 1b: Get forcewake during program sequence. Although the driver 5383 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ 5384 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5385 5386 /* 2a: Disable RC states. */ 5387 I915_WRITE(GEN6_RC_CONTROL, 0); 5388 5389 /* 2b: Program RC6 thresholds.*/ 5390 5391 /* WaRsDoubleRc6WrlWithCoarsePowerGating: Doubling WRL only when CPG is enabled */ 5392 if (IS_SKYLAKE(dev_priv)) 5393 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16); 5394 else 5395 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16); 5396 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 5397 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 5398 for_each_engine(engine, dev_priv, id) 5399 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 5400 5401 if (HAS_GUC(dev_priv)) 5402 I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA); 5403 5404 I915_WRITE(GEN6_RC_SLEEP, 0); 5405 5406 /* 2c: Program Coarse Power Gating Policies. */ 5407 I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 25); 5408 I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25); 5409 5410 /* 3a: Enable RC6 */ 5411 if (intel_enable_rc6() & INTEL_RC6_ENABLE) 5412 rc6_mask = GEN6_RC_CTL_RC6_ENABLE; 5413 DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE)); 5414 /* WaRsUseTimeoutMode:bxt */ 5415 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { 5416 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us */ 5417 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | 5418 GEN7_RC_CTL_TO_MODE | 5419 rc6_mask); 5420 } else { 5421 I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ 5422 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | 5423 GEN6_RC_CTL_EI_MODE(1) | 5424 rc6_mask); 5425 } 5426 5427 /* 5428 * 3b: Enable Coarse Power Gating only when RC6 is enabled. 5429 * WaRsDisableCoarsePowerGating:skl,bxt - Render/Media PG need to be disabled with RC6. 5430 */ 5431 if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) 5432 I915_WRITE(GEN9_PG_ENABLE, 0); 5433 else 5434 I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? 5435 (GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE) : 0); 5436 5437 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5438 } 5439 5440 static void gen8_enable_rps(struct drm_i915_private *dev_priv) 5441 { 5442 struct intel_engine_cs *engine; 5443 enum intel_engine_id id; 5444 uint32_t rc6_mask = 0; 5445 5446 /* 1a: Software RC state - RC0 */ 5447 I915_WRITE(GEN6_RC_STATE, 0); 5448 5449 /* 1c & 1d: Get forcewake during program sequence. Although the driver 5450 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ 5451 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5452 5453 /* 2a: Disable RC states. */ 5454 I915_WRITE(GEN6_RC_CONTROL, 0); 5455 5456 /* 2b: Program RC6 thresholds.*/ 5457 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); 5458 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 5459 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 5460 for_each_engine(engine, dev_priv, id) 5461 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 5462 I915_WRITE(GEN6_RC_SLEEP, 0); 5463 if (IS_BROADWELL(dev_priv)) 5464 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ 5465 else 5466 I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ 5467 5468 /* 3: Enable RC6 */ 5469 if (intel_enable_rc6() & INTEL_RC6_ENABLE) 5470 rc6_mask = GEN6_RC_CTL_RC6_ENABLE; 5471 intel_print_rc6_info(dev_priv, rc6_mask); 5472 if (IS_BROADWELL(dev_priv)) 5473 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | 5474 GEN7_RC_CTL_TO_MODE | 5475 rc6_mask); 5476 else 5477 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | 5478 GEN6_RC_CTL_EI_MODE(1) | 5479 rc6_mask); 5480 5481 /* 4 Program defaults and thresholds for RPS*/ 5482 I915_WRITE(GEN6_RPNSWREQ, 5483 HSW_FREQUENCY(dev_priv->rps.rp1_freq)); 5484 I915_WRITE(GEN6_RC_VIDEO_FREQ, 5485 HSW_FREQUENCY(dev_priv->rps.rp1_freq)); 5486 /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ 5487 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */ 5488 5489 /* Docs recommend 900MHz, and 300 MHz respectively */ 5490 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, 5491 dev_priv->rps.max_freq_softlimit << 24 | 5492 dev_priv->rps.min_freq_softlimit << 16); 5493 5494 I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */ 5495 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/ 5496 I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */ 5497 I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */ 5498 5499 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 5500 5501 /* 5: Enable RPS */ 5502 I915_WRITE(GEN6_RP_CONTROL, 5503 GEN6_RP_MEDIA_TURBO | 5504 GEN6_RP_MEDIA_HW_NORMAL_MODE | 5505 GEN6_RP_MEDIA_IS_GFX | 5506 GEN6_RP_ENABLE | 5507 GEN6_RP_UP_BUSY_AVG | 5508 GEN6_RP_DOWN_IDLE_AVG); 5509 5510 /* 6: Ring frequency + overclocking (our driver does this later */ 5511 5512 reset_rps(dev_priv, gen6_set_rps); 5513 5514 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5515 } 5516 5517 static void gen6_enable_rps(struct drm_i915_private *dev_priv) 5518 { 5519 struct intel_engine_cs *engine; 5520 enum intel_engine_id id; 5521 u32 rc6vids, rc6_mask = 0; 5522 u32 gtfifodbg; 5523 int rc6_mode; 5524 int ret; 5525 5526 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 5527 5528 /* Here begins a magic sequence of register writes to enable 5529 * auto-downclocking. 5530 * 5531 * Perhaps there might be some value in exposing these to 5532 * userspace... 5533 */ 5534 I915_WRITE(GEN6_RC_STATE, 0); 5535 5536 /* Clear the DBG now so we don't confuse earlier errors */ 5537 gtfifodbg = I915_READ(GTFIFODBG); 5538 if (gtfifodbg) { 5539 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg); 5540 I915_WRITE(GTFIFODBG, gtfifodbg); 5541 } 5542 5543 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5544 5545 /* disable the counters and set deterministic thresholds */ 5546 I915_WRITE(GEN6_RC_CONTROL, 0); 5547 5548 I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16); 5549 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30); 5550 I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30); 5551 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); 5552 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); 5553 5554 for_each_engine(engine, dev_priv, id) 5555 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 5556 5557 I915_WRITE(GEN6_RC_SLEEP, 0); 5558 I915_WRITE(GEN6_RC1e_THRESHOLD, 1000); 5559 if (IS_IVYBRIDGE(dev_priv)) 5560 I915_WRITE(GEN6_RC6_THRESHOLD, 125000); 5561 else 5562 I915_WRITE(GEN6_RC6_THRESHOLD, 50000); 5563 I915_WRITE(GEN6_RC6p_THRESHOLD, 150000); 5564 I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ 5565 5566 /* Check if we are enabling RC6 */ 5567 rc6_mode = intel_enable_rc6(); 5568 if (rc6_mode & INTEL_RC6_ENABLE) 5569 rc6_mask |= GEN6_RC_CTL_RC6_ENABLE; 5570 5571 /* We don't use those on Haswell */ 5572 if (!IS_HASWELL(dev_priv)) { 5573 if (rc6_mode & INTEL_RC6p_ENABLE) 5574 rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; 5575 5576 if (rc6_mode & INTEL_RC6pp_ENABLE) 5577 rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; 5578 } 5579 5580 intel_print_rc6_info(dev_priv, rc6_mask); 5581 5582 I915_WRITE(GEN6_RC_CONTROL, 5583 rc6_mask | 5584 GEN6_RC_CTL_EI_MODE(1) | 5585 GEN6_RC_CTL_HW_ENABLE); 5586 5587 /* Power down if completely idle for over 50ms */ 5588 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000); 5589 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 5590 5591 reset_rps(dev_priv, gen6_set_rps); 5592 5593 rc6vids = 0; 5594 ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); 5595 if (IS_GEN6(dev_priv) && ret) { 5596 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n"); 5597 } else if (IS_GEN6(dev_priv) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { 5598 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", 5599 GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); 5600 rc6vids &= 0xffff00; 5601 rc6vids |= GEN6_ENCODE_RC6_VID(450); 5602 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); 5603 if (ret) 5604 DRM_ERROR("Couldn't fix incorrect rc6 voltage\n"); 5605 } 5606 5607 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5608 } 5609 5610 static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) 5611 { 5612 int min_freq = 15; 5613 unsigned int gpu_freq; 5614 unsigned int max_ia_freq, min_ring_freq; 5615 unsigned int max_gpu_freq, min_gpu_freq; 5616 int scaling_factor = 180; 5617 5618 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 5619 5620 #if 0 5621 policy = cpufreq_cpu_get(0); 5622 if (policy) { 5623 max_ia_freq = policy->cpuinfo.max_freq; 5624 cpufreq_cpu_put(policy); 5625 } else { 5626 /* 5627 * Default to measured freq if none found, PCU will ensure we 5628 * don't go over 5629 */ 5630 max_ia_freq = tsc_khz; 5631 } 5632 #else 5633 max_ia_freq = tsc_frequency / 1000; 5634 #endif 5635 5636 /* Convert from kHz to MHz */ 5637 max_ia_freq /= 1000; 5638 5639 min_ring_freq = I915_READ(DCLK) & 0xf; 5640 /* convert DDR frequency from units of 266.6MHz to bandwidth */ 5641 min_ring_freq = mult_frac(min_ring_freq, 8, 3); 5642 5643 if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { 5644 /* Convert GT frequency to 50 HZ units */ 5645 min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER; 5646 max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER; 5647 } else { 5648 min_gpu_freq = dev_priv->rps.min_freq; 5649 max_gpu_freq = dev_priv->rps.max_freq; 5650 } 5651 5652 /* 5653 * For each potential GPU frequency, load a ring frequency we'd like 5654 * to use for memory access. We do this by specifying the IA frequency 5655 * the PCU should use as a reference to determine the ring frequency. 5656 */ 5657 for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) { 5658 int diff = max_gpu_freq - gpu_freq; 5659 unsigned int ia_freq = 0, ring_freq = 0; 5660 5661 if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { 5662 /* 5663 * ring_freq = 2 * GT. ring_freq is in 100MHz units 5664 * No floor required for ring frequency on SKL. 5665 */ 5666 ring_freq = gpu_freq; 5667 } else if (INTEL_INFO(dev_priv)->gen >= 8) { 5668 /* max(2 * GT, DDR). NB: GT is 50MHz units */ 5669 ring_freq = max(min_ring_freq, gpu_freq); 5670 } else if (IS_HASWELL(dev_priv)) { 5671 ring_freq = mult_frac(gpu_freq, 5, 4); 5672 ring_freq = max(min_ring_freq, ring_freq); 5673 /* leave ia_freq as the default, chosen by cpufreq */ 5674 } else { 5675 /* On older processors, there is no separate ring 5676 * clock domain, so in order to boost the bandwidth 5677 * of the ring, we need to upclock the CPU (ia_freq). 5678 * 5679 * For GPU frequencies less than 750MHz, 5680 * just use the lowest ring freq. 5681 */ 5682 if (gpu_freq < min_freq) 5683 ia_freq = 800; 5684 else 5685 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2); 5686 ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100); 5687 } 5688 5689 sandybridge_pcode_write(dev_priv, 5690 GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 5691 ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT | 5692 ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT | 5693 gpu_freq); 5694 } 5695 } 5696 5697 static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv) 5698 { 5699 u32 val, rp0; 5700 5701 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE); 5702 5703 switch (INTEL_INFO(dev_priv)->sseu.eu_total) { 5704 case 8: 5705 /* (2 * 4) config */ 5706 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT); 5707 break; 5708 case 12: 5709 /* (2 * 6) config */ 5710 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT); 5711 break; 5712 case 16: 5713 /* (2 * 8) config */ 5714 default: 5715 /* Setting (2 * 8) Min RP0 for any other combination */ 5716 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT); 5717 break; 5718 } 5719 5720 rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK); 5721 5722 return rp0; 5723 } 5724 5725 static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv) 5726 { 5727 u32 val, rpe; 5728 5729 val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG); 5730 rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; 5731 5732 return rpe; 5733 } 5734 5735 static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv) 5736 { 5737 u32 val, rp1; 5738 5739 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE); 5740 rp1 = (val & FB_GFX_FREQ_FUSE_MASK); 5741 5742 return rp1; 5743 } 5744 5745 static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv) 5746 { 5747 u32 val, rp1; 5748 5749 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); 5750 5751 rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; 5752 5753 return rp1; 5754 } 5755 5756 static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv) 5757 { 5758 u32 val, rp0; 5759 5760 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); 5761 5762 rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; 5763 /* Clamp to max */ 5764 rp0 = min_t(u32, rp0, 0xea); 5765 5766 return rp0; 5767 } 5768 5769 static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv) 5770 { 5771 u32 val, rpe; 5772 5773 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO); 5774 rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; 5775 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI); 5776 rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; 5777 5778 return rpe; 5779 } 5780 5781 static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv) 5782 { 5783 u32 val; 5784 5785 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff; 5786 /* 5787 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value 5788 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on 5789 * a BYT-M B0 the above register contains 0xbf. Moreover when setting 5790 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0 5791 * to make sure it matches what Punit accepts. 5792 */ 5793 return max_t(u32, val, 0xc0); 5794 } 5795 5796 /* Check that the pctx buffer wasn't move under us. */ 5797 static void valleyview_check_pctx(struct drm_i915_private *dev_priv) 5798 { 5799 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; 5800 5801 WARN_ON(pctx_addr != dev_priv->mm.stolen_base + 5802 dev_priv->vlv_pctx->stolen->start); 5803 } 5804 5805 5806 /* Check that the pcbr address is not empty. */ 5807 static void cherryview_check_pctx(struct drm_i915_private *dev_priv) 5808 { 5809 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; 5810 5811 WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0); 5812 } 5813 5814 static void cherryview_setup_pctx(struct drm_i915_private *dev_priv) 5815 { 5816 struct i915_ggtt *ggtt = &dev_priv->ggtt; 5817 unsigned long pctx_paddr, paddr; 5818 u32 pcbr; 5819 int pctx_size = 32*1024; 5820 5821 pcbr = I915_READ(VLV_PCBR); 5822 if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { 5823 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); 5824 paddr = (dev_priv->mm.stolen_base + 5825 (ggtt->stolen_size - pctx_size)); 5826 5827 pctx_paddr = (paddr & (~4095)); 5828 I915_WRITE(VLV_PCBR, pctx_paddr); 5829 } 5830 5831 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); 5832 } 5833 5834 static void valleyview_setup_pctx(struct drm_i915_private *dev_priv) 5835 { 5836 struct drm_i915_gem_object *pctx; 5837 unsigned long pctx_paddr; 5838 u32 pcbr; 5839 int pctx_size = 24*1024; 5840 5841 pcbr = I915_READ(VLV_PCBR); 5842 if (pcbr) { 5843 /* BIOS set it up already, grab the pre-alloc'd space */ 5844 int pcbr_offset; 5845 5846 pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base; 5847 pctx = i915_gem_object_create_stolen_for_preallocated(&dev_priv->drm, 5848 pcbr_offset, 5849 I915_GTT_OFFSET_NONE, 5850 pctx_size); 5851 goto out; 5852 } 5853 5854 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); 5855 5856 /* 5857 * From the Gunit register HAS: 5858 * The Gfx driver is expected to program this register and ensure 5859 * proper allocation within Gfx stolen memory. For example, this 5860 * register should be programmed such than the PCBR range does not 5861 * overlap with other ranges, such as the frame buffer, protected 5862 * memory, or any other relevant ranges. 5863 */ 5864 pctx = i915_gem_object_create_stolen(&dev_priv->drm, pctx_size); 5865 if (!pctx) { 5866 DRM_DEBUG("not enough stolen space for PCTX, disabling\n"); 5867 goto out; 5868 } 5869 5870 pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start; 5871 I915_WRITE(VLV_PCBR, pctx_paddr); 5872 5873 out: 5874 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); 5875 dev_priv->vlv_pctx = pctx; 5876 } 5877 5878 static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv) 5879 { 5880 if (WARN_ON(!dev_priv->vlv_pctx)) 5881 return; 5882 5883 i915_gem_object_put_unlocked(dev_priv->vlv_pctx); 5884 dev_priv->vlv_pctx = NULL; 5885 } 5886 5887 static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv) 5888 { 5889 dev_priv->rps.gpll_ref_freq = 5890 vlv_get_cck_clock(dev_priv, "GPLL ref", 5891 CCK_GPLL_CLOCK_CONTROL, 5892 dev_priv->czclk_freq); 5893 5894 DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", 5895 dev_priv->rps.gpll_ref_freq); 5896 } 5897 5898 static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) 5899 { 5900 u32 val; 5901 5902 valleyview_setup_pctx(dev_priv); 5903 5904 vlv_init_gpll_ref_freq(dev_priv); 5905 5906 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); 5907 switch ((val >> 6) & 3) { 5908 case 0: 5909 case 1: 5910 dev_priv->mem_freq = 800; 5911 break; 5912 case 2: 5913 dev_priv->mem_freq = 1066; 5914 break; 5915 case 3: 5916 dev_priv->mem_freq = 1333; 5917 break; 5918 } 5919 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); 5920 5921 dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv); 5922 dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; 5923 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", 5924 intel_gpu_freq(dev_priv, dev_priv->rps.max_freq), 5925 dev_priv->rps.max_freq); 5926 5927 dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv); 5928 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", 5929 intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), 5930 dev_priv->rps.efficient_freq); 5931 5932 dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv); 5933 DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", 5934 intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), 5935 dev_priv->rps.rp1_freq); 5936 5937 dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv); 5938 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", 5939 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), 5940 dev_priv->rps.min_freq); 5941 } 5942 5943 static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) 5944 { 5945 u32 val; 5946 5947 cherryview_setup_pctx(dev_priv); 5948 5949 vlv_init_gpll_ref_freq(dev_priv); 5950 5951 mutex_lock(&dev_priv->sb_lock); 5952 val = vlv_cck_read(dev_priv, CCK_FUSE_REG); 5953 mutex_unlock(&dev_priv->sb_lock); 5954 5955 switch ((val >> 2) & 0x7) { 5956 case 3: 5957 dev_priv->mem_freq = 2000; 5958 break; 5959 default: 5960 dev_priv->mem_freq = 1600; 5961 break; 5962 } 5963 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); 5964 5965 dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv); 5966 dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; 5967 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", 5968 intel_gpu_freq(dev_priv, dev_priv->rps.max_freq), 5969 dev_priv->rps.max_freq); 5970 5971 dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv); 5972 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", 5973 intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), 5974 dev_priv->rps.efficient_freq); 5975 5976 dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv); 5977 DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", 5978 intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), 5979 dev_priv->rps.rp1_freq); 5980 5981 /* PUnit validated range is only [RPe, RP0] */ 5982 dev_priv->rps.min_freq = dev_priv->rps.efficient_freq; 5983 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", 5984 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), 5985 dev_priv->rps.min_freq); 5986 5987 WARN_ONCE((dev_priv->rps.max_freq | 5988 dev_priv->rps.efficient_freq | 5989 dev_priv->rps.rp1_freq | 5990 dev_priv->rps.min_freq) & 1, 5991 "Odd GPU freq values\n"); 5992 } 5993 5994 static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv) 5995 { 5996 valleyview_cleanup_pctx(dev_priv); 5997 } 5998 5999 static void cherryview_enable_rps(struct drm_i915_private *dev_priv) 6000 { 6001 struct intel_engine_cs *engine; 6002 enum intel_engine_id id; 6003 u32 gtfifodbg, val, rc6_mode = 0, pcbr; 6004 6005 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 6006 6007 gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV | 6008 GT_FIFO_FREE_ENTRIES_CHV); 6009 if (gtfifodbg) { 6010 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", 6011 gtfifodbg); 6012 I915_WRITE(GTFIFODBG, gtfifodbg); 6013 } 6014 6015 cherryview_check_pctx(dev_priv); 6016 6017 /* 1a & 1b: Get forcewake during program sequence. Although the driver 6018 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ 6019 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 6020 6021 /* Disable RC states. */ 6022 I915_WRITE(GEN6_RC_CONTROL, 0); 6023 6024 /* 2a: Program RC6 thresholds.*/ 6025 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); 6026 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 6027 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 6028 6029 for_each_engine(engine, dev_priv, id) 6030 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 6031 I915_WRITE(GEN6_RC_SLEEP, 0); 6032 6033 /* TO threshold set to 500 us ( 0x186 * 1.28 us) */ 6034 I915_WRITE(GEN6_RC6_THRESHOLD, 0x186); 6035 6036 /* allows RC6 residency counter to work */ 6037 I915_WRITE(VLV_COUNTER_CONTROL, 6038 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | 6039 VLV_MEDIA_RC6_COUNT_EN | 6040 VLV_RENDER_RC6_COUNT_EN)); 6041 6042 /* For now we assume BIOS is allocating and populating the PCBR */ 6043 pcbr = I915_READ(VLV_PCBR); 6044 6045 /* 3: Enable RC6 */ 6046 if ((intel_enable_rc6() & INTEL_RC6_ENABLE) && 6047 (pcbr >> VLV_PCBR_ADDR_SHIFT)) 6048 rc6_mode = GEN7_RC_CTL_TO_MODE; 6049 6050 I915_WRITE(GEN6_RC_CONTROL, rc6_mode); 6051 6052 /* 4 Program defaults and thresholds for RPS*/ 6053 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); 6054 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); 6055 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); 6056 I915_WRITE(GEN6_RP_UP_EI, 66000); 6057 I915_WRITE(GEN6_RP_DOWN_EI, 350000); 6058 6059 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 6060 6061 /* 5: Enable RPS */ 6062 I915_WRITE(GEN6_RP_CONTROL, 6063 GEN6_RP_MEDIA_HW_NORMAL_MODE | 6064 GEN6_RP_MEDIA_IS_GFX | 6065 GEN6_RP_ENABLE | 6066 GEN6_RP_UP_BUSY_AVG | 6067 GEN6_RP_DOWN_IDLE_AVG); 6068 6069 /* Setting Fixed Bias */ 6070 val = VLV_OVERRIDE_EN | 6071 VLV_SOC_TDP_EN | 6072 CHV_BIAS_CPU_50_SOC_50; 6073 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); 6074 6075 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); 6076 6077 /* RPS code assumes GPLL is used */ 6078 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); 6079 6080 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); 6081 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); 6082 6083 reset_rps(dev_priv, valleyview_set_rps); 6084 6085 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 6086 } 6087 6088 static void valleyview_enable_rps(struct drm_i915_private *dev_priv) 6089 { 6090 struct intel_engine_cs *engine; 6091 enum intel_engine_id id; 6092 u32 gtfifodbg, val, rc6_mode = 0; 6093 6094 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 6095 6096 valleyview_check_pctx(dev_priv); 6097 6098 gtfifodbg = I915_READ(GTFIFODBG); 6099 if (gtfifodbg) { 6100 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", 6101 gtfifodbg); 6102 I915_WRITE(GTFIFODBG, gtfifodbg); 6103 } 6104 6105 /* If VLV, Forcewake all wells, else re-direct to regular path */ 6106 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 6107 6108 /* Disable RC states. */ 6109 I915_WRITE(GEN6_RC_CONTROL, 0); 6110 6111 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); 6112 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); 6113 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); 6114 I915_WRITE(GEN6_RP_UP_EI, 66000); 6115 I915_WRITE(GEN6_RP_DOWN_EI, 350000); 6116 6117 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 6118 6119 I915_WRITE(GEN6_RP_CONTROL, 6120 GEN6_RP_MEDIA_TURBO | 6121 GEN6_RP_MEDIA_HW_NORMAL_MODE | 6122 GEN6_RP_MEDIA_IS_GFX | 6123 GEN6_RP_ENABLE | 6124 GEN6_RP_UP_BUSY_AVG | 6125 GEN6_RP_DOWN_IDLE_CONT); 6126 6127 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); 6128 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); 6129 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); 6130 6131 for_each_engine(engine, dev_priv, id) 6132 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 6133 6134 I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); 6135 6136 /* allows RC6 residency counter to work */ 6137 I915_WRITE(VLV_COUNTER_CONTROL, 6138 _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN | 6139 VLV_RENDER_RC0_COUNT_EN | 6140 VLV_MEDIA_RC6_COUNT_EN | 6141 VLV_RENDER_RC6_COUNT_EN)); 6142 6143 if (intel_enable_rc6() & INTEL_RC6_ENABLE) 6144 rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; 6145 6146 intel_print_rc6_info(dev_priv, rc6_mode); 6147 6148 I915_WRITE(GEN6_RC_CONTROL, rc6_mode); 6149 6150 /* Setting Fixed Bias */ 6151 val = VLV_OVERRIDE_EN | 6152 VLV_SOC_TDP_EN | 6153 VLV_BIAS_CPU_125_SOC_875; 6154 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); 6155 6156 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); 6157 6158 /* RPS code assumes GPLL is used */ 6159 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); 6160 6161 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); 6162 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); 6163 6164 reset_rps(dev_priv, valleyview_set_rps); 6165 6166 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 6167 } 6168 6169 static unsigned long intel_pxfreq(u32 vidfreq) 6170 { 6171 unsigned long freq; 6172 int div = (vidfreq & 0x3f0000) >> 16; 6173 int post = (vidfreq & 0x3000) >> 12; 6174 int pre = (vidfreq & 0x7); 6175 6176 if (!pre) 6177 return 0; 6178 6179 freq = ((div * 133333) / ((1<<post) * pre)); 6180 6181 return freq; 6182 } 6183 6184 static const struct cparams { 6185 u16 i; 6186 u16 t; 6187 u16 m; 6188 u16 c; 6189 } cparams[] = { 6190 { 1, 1333, 301, 28664 }, 6191 { 1, 1066, 294, 24460 }, 6192 { 1, 800, 294, 25192 }, 6193 { 0, 1333, 276, 27605 }, 6194 { 0, 1066, 276, 27605 }, 6195 { 0, 800, 231, 23784 }, 6196 }; 6197 6198 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv) 6199 { 6200 u64 total_count, diff, ret; 6201 u32 count1, count2, count3, m = 0, c = 0; 6202 unsigned long now = jiffies_to_msecs(jiffies), diff1; 6203 int i; 6204 6205 assert_spin_locked(&mchdev_lock); 6206 6207 diff1 = now - dev_priv->ips.last_time1; 6208 6209 /* Prevent division-by-zero if we are asking too fast. 6210 * Also, we don't get interesting results if we are polling 6211 * faster than once in 10ms, so just return the saved value 6212 * in such cases. 6213 */ 6214 if (diff1 <= 10) 6215 return dev_priv->ips.chipset_power; 6216 6217 count1 = I915_READ(DMIEC); 6218 count2 = I915_READ(DDREC); 6219 count3 = I915_READ(CSIEC); 6220 6221 total_count = count1 + count2 + count3; 6222 6223 /* FIXME: handle per-counter overflow */ 6224 if (total_count < dev_priv->ips.last_count1) { 6225 diff = ~0UL - dev_priv->ips.last_count1; 6226 diff += total_count; 6227 } else { 6228 diff = total_count - dev_priv->ips.last_count1; 6229 } 6230 6231 for (i = 0; i < ARRAY_SIZE(cparams); i++) { 6232 if (cparams[i].i == dev_priv->ips.c_m && 6233 cparams[i].t == dev_priv->ips.r_t) { 6234 m = cparams[i].m; 6235 c = cparams[i].c; 6236 break; 6237 } 6238 } 6239 6240 diff = div_u64(diff, diff1); 6241 ret = ((m * diff) + c); 6242 ret = div_u64(ret, 10); 6243 6244 dev_priv->ips.last_count1 = total_count; 6245 dev_priv->ips.last_time1 = now; 6246 6247 dev_priv->ips.chipset_power = ret; 6248 6249 return ret; 6250 } 6251 6252 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv) 6253 { 6254 unsigned long val; 6255 6256 if (INTEL_INFO(dev_priv)->gen != 5) 6257 return 0; 6258 6259 spin_lock_irq(&mchdev_lock); 6260 6261 val = __i915_chipset_val(dev_priv); 6262 6263 spin_unlock_irq(&mchdev_lock); 6264 6265 return val; 6266 } 6267 6268 unsigned long i915_mch_val(struct drm_i915_private *dev_priv) 6269 { 6270 unsigned long m, x, b; 6271 u32 tsfs; 6272 6273 tsfs = I915_READ(TSFS); 6274 6275 m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT); 6276 x = I915_READ8(TR1); 6277 6278 b = tsfs & TSFS_INTR_MASK; 6279 6280 return ((m * x) / 127) - b; 6281 } 6282 6283 static int _pxvid_to_vd(u8 pxvid) 6284 { 6285 if (pxvid == 0) 6286 return 0; 6287 6288 if (pxvid >= 8 && pxvid < 31) 6289 pxvid = 31; 6290 6291 return (pxvid + 2) * 125; 6292 } 6293 6294 static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid) 6295 { 6296 const int vd = _pxvid_to_vd(pxvid); 6297 const int vm = vd - 1125; 6298 6299 if (INTEL_INFO(dev_priv)->is_mobile) 6300 return vm > 0 ? vm : 0; 6301 6302 return vd; 6303 } 6304 6305 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) 6306 { 6307 u64 now, diff, diffms; 6308 u32 count; 6309 6310 assert_spin_locked(&mchdev_lock); 6311 6312 now = ktime_get_raw_ns(); 6313 diffms = now - dev_priv->ips.last_time2; 6314 do_div(diffms, NSEC_PER_MSEC); 6315 6316 /* Don't divide by 0 */ 6317 if (!diffms) 6318 return; 6319 6320 count = I915_READ(GFXEC); 6321 6322 if (count < dev_priv->ips.last_count2) { 6323 diff = ~0UL - dev_priv->ips.last_count2; 6324 diff += count; 6325 } else { 6326 diff = count - dev_priv->ips.last_count2; 6327 } 6328 6329 dev_priv->ips.last_count2 = count; 6330 dev_priv->ips.last_time2 = now; 6331 6332 /* More magic constants... */ 6333 diff = diff * 1181; 6334 diff = div_u64(diff, diffms * 10); 6335 dev_priv->ips.gfx_power = diff; 6336 } 6337 6338 void i915_update_gfx_val(struct drm_i915_private *dev_priv) 6339 { 6340 if (INTEL_INFO(dev_priv)->gen != 5) 6341 return; 6342 6343 spin_lock_irq(&mchdev_lock); 6344 6345 __i915_update_gfx_val(dev_priv); 6346 6347 spin_unlock_irq(&mchdev_lock); 6348 } 6349 6350 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) 6351 { 6352 unsigned long t, corr, state1, corr2, state2; 6353 u32 pxvid, ext_v; 6354 6355 assert_spin_locked(&mchdev_lock); 6356 6357 pxvid = I915_READ(PXVFREQ(dev_priv->rps.cur_freq)); 6358 pxvid = (pxvid >> 24) & 0x7f; 6359 ext_v = pvid_to_extvid(dev_priv, pxvid); 6360 6361 state1 = ext_v; 6362 6363 t = i915_mch_val(dev_priv); 6364 6365 /* Revel in the empirically derived constants */ 6366 6367 /* Correction factor in 1/100000 units */ 6368 if (t > 80) 6369 corr = ((t * 2349) + 135940); 6370 else if (t >= 50) 6371 corr = ((t * 964) + 29317); 6372 else /* < 50 */ 6373 corr = ((t * 301) + 1004); 6374 6375 corr = corr * ((150142 * state1) / 10000 - 78642); 6376 corr /= 100000; 6377 corr2 = (corr * dev_priv->ips.corr); 6378 6379 state2 = (corr2 * state1) / 10000; 6380 state2 /= 100; /* convert to mW */ 6381 6382 __i915_update_gfx_val(dev_priv); 6383 6384 return dev_priv->ips.gfx_power + state2; 6385 } 6386 6387 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) 6388 { 6389 unsigned long val; 6390 6391 if (INTEL_INFO(dev_priv)->gen != 5) 6392 return 0; 6393 6394 spin_lock_irq(&mchdev_lock); 6395 6396 val = __i915_gfx_val(dev_priv); 6397 6398 spin_unlock_irq(&mchdev_lock); 6399 6400 return val; 6401 } 6402 6403 /** 6404 * i915_read_mch_val - return value for IPS use 6405 * 6406 * Calculate and return a value for the IPS driver to use when deciding whether 6407 * we have thermal and power headroom to increase CPU or GPU power budget. 6408 */ 6409 unsigned long i915_read_mch_val(void) 6410 { 6411 struct drm_i915_private *dev_priv; 6412 unsigned long chipset_val, graphics_val, ret = 0; 6413 6414 spin_lock_irq(&mchdev_lock); 6415 if (!i915_mch_dev) 6416 goto out_unlock; 6417 dev_priv = i915_mch_dev; 6418 6419 chipset_val = __i915_chipset_val(dev_priv); 6420 graphics_val = __i915_gfx_val(dev_priv); 6421 6422 ret = chipset_val + graphics_val; 6423 6424 out_unlock: 6425 spin_unlock_irq(&mchdev_lock); 6426 6427 return ret; 6428 } 6429 EXPORT_SYMBOL_GPL(i915_read_mch_val); 6430 6431 /** 6432 * i915_gpu_raise - raise GPU frequency limit 6433 * 6434 * Raise the limit; IPS indicates we have thermal headroom. 6435 */ 6436 bool i915_gpu_raise(void) 6437 { 6438 struct drm_i915_private *dev_priv; 6439 bool ret = true; 6440 6441 spin_lock_irq(&mchdev_lock); 6442 if (!i915_mch_dev) { 6443 ret = false; 6444 goto out_unlock; 6445 } 6446 dev_priv = i915_mch_dev; 6447 6448 if (dev_priv->ips.max_delay > dev_priv->ips.fmax) 6449 dev_priv->ips.max_delay--; 6450 6451 out_unlock: 6452 spin_unlock_irq(&mchdev_lock); 6453 6454 return ret; 6455 } 6456 EXPORT_SYMBOL_GPL(i915_gpu_raise); 6457 6458 /** 6459 * i915_gpu_lower - lower GPU frequency limit 6460 * 6461 * IPS indicates we're close to a thermal limit, so throttle back the GPU 6462 * frequency maximum. 6463 */ 6464 bool i915_gpu_lower(void) 6465 { 6466 struct drm_i915_private *dev_priv; 6467 bool ret = true; 6468 6469 spin_lock_irq(&mchdev_lock); 6470 if (!i915_mch_dev) { 6471 ret = false; 6472 goto out_unlock; 6473 } 6474 dev_priv = i915_mch_dev; 6475 6476 if (dev_priv->ips.max_delay < dev_priv->ips.min_delay) 6477 dev_priv->ips.max_delay++; 6478 6479 out_unlock: 6480 spin_unlock_irq(&mchdev_lock); 6481 6482 return ret; 6483 } 6484 EXPORT_SYMBOL_GPL(i915_gpu_lower); 6485 6486 /** 6487 * i915_gpu_busy - indicate GPU business to IPS 6488 * 6489 * Tell the IPS driver whether or not the GPU is busy. 6490 */ 6491 bool i915_gpu_busy(void) 6492 { 6493 bool ret = false; 6494 6495 spin_lock_irq(&mchdev_lock); 6496 if (i915_mch_dev) 6497 ret = i915_mch_dev->gt.awake; 6498 spin_unlock_irq(&mchdev_lock); 6499 6500 return ret; 6501 } 6502 EXPORT_SYMBOL_GPL(i915_gpu_busy); 6503 6504 /** 6505 * i915_gpu_turbo_disable - disable graphics turbo 6506 * 6507 * Disable graphics turbo by resetting the max frequency and setting the 6508 * current frequency to the default. 6509 */ 6510 bool i915_gpu_turbo_disable(void) 6511 { 6512 struct drm_i915_private *dev_priv; 6513 bool ret = true; 6514 6515 spin_lock_irq(&mchdev_lock); 6516 if (!i915_mch_dev) { 6517 ret = false; 6518 goto out_unlock; 6519 } 6520 dev_priv = i915_mch_dev; 6521 6522 dev_priv->ips.max_delay = dev_priv->ips.fstart; 6523 6524 if (!ironlake_set_drps(dev_priv, dev_priv->ips.fstart)) 6525 ret = false; 6526 6527 out_unlock: 6528 spin_unlock_irq(&mchdev_lock); 6529 6530 return ret; 6531 } 6532 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); 6533 6534 /** 6535 * Tells the intel_ips driver that the i915 driver is now loaded, if 6536 * IPS got loaded first. 6537 * 6538 * This awkward dance is so that neither module has to depend on the 6539 * other in order for IPS to do the appropriate communication of 6540 * GPU turbo limits to i915. 6541 */ 6542 static void 6543 ips_ping_for_i915_load(void) 6544 { 6545 #if 0 6546 void (*link)(void); 6547 6548 link = symbol_get(ips_link_to_i915_driver); 6549 if (link) { 6550 link(); 6551 symbol_put(ips_link_to_i915_driver); 6552 } 6553 #endif 6554 } 6555 6556 void intel_gpu_ips_init(struct drm_i915_private *dev_priv) 6557 { 6558 /* We only register the i915 ips part with intel-ips once everything is 6559 * set up, to avoid intel-ips sneaking in and reading bogus values. */ 6560 spin_lock_irq(&mchdev_lock); 6561 i915_mch_dev = dev_priv; 6562 spin_unlock_irq(&mchdev_lock); 6563 6564 ips_ping_for_i915_load(); 6565 } 6566 6567 void intel_gpu_ips_teardown(void) 6568 { 6569 spin_lock_irq(&mchdev_lock); 6570 i915_mch_dev = NULL; 6571 spin_unlock_irq(&mchdev_lock); 6572 } 6573 6574 static void intel_init_emon(struct drm_i915_private *dev_priv) 6575 { 6576 u32 lcfuse; 6577 u8 pxw[16]; 6578 int i; 6579 6580 /* Disable to program */ 6581 I915_WRITE(ECR, 0); 6582 POSTING_READ(ECR); 6583 6584 /* Program energy weights for various events */ 6585 I915_WRITE(SDEW, 0x15040d00); 6586 I915_WRITE(CSIEW0, 0x007f0000); 6587 I915_WRITE(CSIEW1, 0x1e220004); 6588 I915_WRITE(CSIEW2, 0x04000004); 6589 6590 for (i = 0; i < 5; i++) 6591 I915_WRITE(PEW(i), 0); 6592 for (i = 0; i < 3; i++) 6593 I915_WRITE(DEW(i), 0); 6594 6595 /* Program P-state weights to account for frequency power adjustment */ 6596 for (i = 0; i < 16; i++) { 6597 u32 pxvidfreq = I915_READ(PXVFREQ(i)); 6598 unsigned long freq = intel_pxfreq(pxvidfreq); 6599 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >> 6600 PXVFREQ_PX_SHIFT; 6601 unsigned long val; 6602 6603 val = vid * vid; 6604 val *= (freq / 1000); 6605 val *= 255; 6606 val /= (127*127*900); 6607 if (val > 0xff) 6608 DRM_ERROR("bad pxval: %ld\n", val); 6609 pxw[i] = val; 6610 } 6611 /* Render standby states get 0 weight */ 6612 pxw[14] = 0; 6613 pxw[15] = 0; 6614 6615 for (i = 0; i < 4; i++) { 6616 u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) | 6617 (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]); 6618 I915_WRITE(PXW(i), val); 6619 } 6620 6621 /* Adjust magic regs to magic values (more experimental results) */ 6622 I915_WRITE(OGW0, 0); 6623 I915_WRITE(OGW1, 0); 6624 I915_WRITE(EG0, 0x00007f00); 6625 I915_WRITE(EG1, 0x0000000e); 6626 I915_WRITE(EG2, 0x000e0000); 6627 I915_WRITE(EG3, 0x68000300); 6628 I915_WRITE(EG4, 0x42000000); 6629 I915_WRITE(EG5, 0x00140031); 6630 I915_WRITE(EG6, 0); 6631 I915_WRITE(EG7, 0); 6632 6633 for (i = 0; i < 8; i++) 6634 I915_WRITE(PXWL(i), 0); 6635 6636 /* Enable PMON + select events */ 6637 I915_WRITE(ECR, 0x80000019); 6638 6639 lcfuse = I915_READ(LCFUSE02); 6640 6641 dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK); 6642 } 6643 6644 void intel_init_gt_powersave(struct drm_i915_private *dev_priv) 6645 { 6646 /* 6647 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a 6648 * requirement. 6649 */ 6650 if (!i915.enable_rc6) { 6651 DRM_INFO("RC6 disabled, disabling runtime PM support\n"); 6652 intel_runtime_pm_get(dev_priv); 6653 } 6654 6655 mutex_lock(&dev_priv->drm.struct_mutex); 6656 mutex_lock(&dev_priv->rps.hw_lock); 6657 6658 /* Initialize RPS limits (for userspace) */ 6659 if (IS_CHERRYVIEW(dev_priv)) 6660 cherryview_init_gt_powersave(dev_priv); 6661 else if (IS_VALLEYVIEW(dev_priv)) 6662 valleyview_init_gt_powersave(dev_priv); 6663 else if (INTEL_GEN(dev_priv) >= 6) 6664 gen6_init_rps_frequencies(dev_priv); 6665 6666 /* Derive initial user preferences/limits from the hardware limits */ 6667 dev_priv->rps.idle_freq = dev_priv->rps.min_freq; 6668 dev_priv->rps.cur_freq = dev_priv->rps.idle_freq; 6669 6670 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; 6671 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; 6672 6673 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 6674 dev_priv->rps.min_freq_softlimit = 6675 max_t(int, 6676 dev_priv->rps.efficient_freq, 6677 intel_freq_opcode(dev_priv, 450)); 6678 6679 /* After setting max-softlimit, find the overclock max freq */ 6680 if (IS_GEN6(dev_priv) || 6681 IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) { 6682 u32 params = 0; 6683 6684 sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, ¶ms); 6685 if (params & BIT(31)) { /* OC supported */ 6686 DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n", 6687 (dev_priv->rps.max_freq & 0xff) * 50, 6688 (params & 0xff) * 50); 6689 dev_priv->rps.max_freq = params & 0xff; 6690 } 6691 } 6692 6693 /* Finally allow us to boost to max by default */ 6694 dev_priv->rps.boost_freq = dev_priv->rps.max_freq; 6695 6696 mutex_unlock(&dev_priv->rps.hw_lock); 6697 mutex_unlock(&dev_priv->drm.struct_mutex); 6698 6699 intel_autoenable_gt_powersave(dev_priv); 6700 } 6701 6702 void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) 6703 { 6704 if (IS_VALLEYVIEW(dev_priv)) 6705 valleyview_cleanup_gt_powersave(dev_priv); 6706 6707 if (!i915.enable_rc6) 6708 intel_runtime_pm_put(dev_priv); 6709 } 6710 6711 /** 6712 * intel_suspend_gt_powersave - suspend PM work and helper threads 6713 * @dev_priv: i915 device 6714 * 6715 * We don't want to disable RC6 or other features here, we just want 6716 * to make sure any work we've queued has finished and won't bother 6717 * us while we're suspended. 6718 */ 6719 void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv) 6720 { 6721 if (INTEL_GEN(dev_priv) < 6) 6722 return; 6723 6724 if (cancel_delayed_work_sync(&dev_priv->rps.autoenable_work)) 6725 intel_runtime_pm_put(dev_priv); 6726 6727 /* gen6_rps_idle() will be called later to disable interrupts */ 6728 } 6729 6730 void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) 6731 { 6732 dev_priv->rps.enabled = true; /* force disabling */ 6733 intel_disable_gt_powersave(dev_priv); 6734 6735 gen6_reset_rps_interrupts(dev_priv); 6736 } 6737 6738 void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) 6739 { 6740 if (!READ_ONCE(dev_priv->rps.enabled)) 6741 return; 6742 6743 mutex_lock(&dev_priv->rps.hw_lock); 6744 6745 if (INTEL_GEN(dev_priv) >= 9) { 6746 gen9_disable_rc6(dev_priv); 6747 gen9_disable_rps(dev_priv); 6748 } else if (IS_CHERRYVIEW(dev_priv)) { 6749 cherryview_disable_rps(dev_priv); 6750 } else if (IS_VALLEYVIEW(dev_priv)) { 6751 valleyview_disable_rps(dev_priv); 6752 } else if (INTEL_GEN(dev_priv) >= 6) { 6753 gen6_disable_rps(dev_priv); 6754 } else if (IS_IRONLAKE_M(dev_priv)) { 6755 ironlake_disable_drps(dev_priv); 6756 } 6757 6758 dev_priv->rps.enabled = false; 6759 mutex_unlock(&dev_priv->rps.hw_lock); 6760 } 6761 6762 void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) 6763 { 6764 /* We shouldn't be disabling as we submit, so this should be less 6765 * racy than it appears! 6766 */ 6767 if (READ_ONCE(dev_priv->rps.enabled)) 6768 return; 6769 6770 /* Powersaving is controlled by the host when inside a VM */ 6771 if (intel_vgpu_active(dev_priv)) 6772 return; 6773 6774 mutex_lock(&dev_priv->rps.hw_lock); 6775 6776 if (IS_CHERRYVIEW(dev_priv)) { 6777 cherryview_enable_rps(dev_priv); 6778 } else if (IS_VALLEYVIEW(dev_priv)) { 6779 valleyview_enable_rps(dev_priv); 6780 } else if (INTEL_GEN(dev_priv) >= 9) { 6781 gen9_enable_rc6(dev_priv); 6782 gen9_enable_rps(dev_priv); 6783 if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) 6784 gen6_update_ring_freq(dev_priv); 6785 } else if (IS_BROADWELL(dev_priv)) { 6786 gen8_enable_rps(dev_priv); 6787 gen6_update_ring_freq(dev_priv); 6788 } else if (INTEL_GEN(dev_priv) >= 6) { 6789 gen6_enable_rps(dev_priv); 6790 gen6_update_ring_freq(dev_priv); 6791 } else if (IS_IRONLAKE_M(dev_priv)) { 6792 ironlake_enable_drps(dev_priv); 6793 intel_init_emon(dev_priv); 6794 } 6795 6796 WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq); 6797 WARN_ON(dev_priv->rps.idle_freq > dev_priv->rps.max_freq); 6798 6799 WARN_ON(dev_priv->rps.efficient_freq < dev_priv->rps.min_freq); 6800 WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq); 6801 6802 dev_priv->rps.enabled = true; 6803 mutex_unlock(&dev_priv->rps.hw_lock); 6804 } 6805 6806 static void __intel_autoenable_gt_powersave(struct work_struct *work) 6807 { 6808 struct drm_i915_private *dev_priv = 6809 container_of(work, typeof(*dev_priv), rps.autoenable_work.work); 6810 struct intel_engine_cs *rcs; 6811 struct drm_i915_gem_request *req; 6812 6813 if (READ_ONCE(dev_priv->rps.enabled)) 6814 goto out; 6815 6816 rcs = dev_priv->engine[RCS]; 6817 if (rcs->last_context) 6818 goto out; 6819 6820 if (!rcs->init_context) 6821 goto out; 6822 6823 mutex_lock(&dev_priv->drm.struct_mutex); 6824 6825 req = i915_gem_request_alloc(rcs, dev_priv->kernel_context); 6826 if (IS_ERR(req)) 6827 goto unlock; 6828 6829 if (!i915.enable_execlists && i915_switch_context(req) == 0) 6830 rcs->init_context(req); 6831 6832 /* Mark the device busy, calling intel_enable_gt_powersave() */ 6833 i915_add_request_no_flush(req); 6834 6835 unlock: 6836 mutex_unlock(&dev_priv->drm.struct_mutex); 6837 out: 6838 intel_runtime_pm_put(dev_priv); 6839 } 6840 6841 void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv) 6842 { 6843 if (READ_ONCE(dev_priv->rps.enabled)) 6844 return; 6845 6846 if (IS_IRONLAKE_M(dev_priv)) { 6847 ironlake_enable_drps(dev_priv); 6848 intel_init_emon(dev_priv); 6849 } else if (INTEL_INFO(dev_priv)->gen >= 6) { 6850 /* 6851 * PCU communication is slow and this doesn't need to be 6852 * done at any specific time, so do this out of our fast path 6853 * to make resume and init faster. 6854 * 6855 * We depend on the HW RC6 power context save/restore 6856 * mechanism when entering D3 through runtime PM suspend. So 6857 * disable RPM until RPS/RC6 is properly setup. We can only 6858 * get here via the driver load/system resume/runtime resume 6859 * paths, so the _noresume version is enough (and in case of 6860 * runtime resume it's necessary). 6861 */ 6862 if (queue_delayed_work(dev_priv->wq, 6863 &dev_priv->rps.autoenable_work, 6864 round_jiffies_up_relative(HZ))) 6865 intel_runtime_pm_get_noresume(dev_priv); 6866 } 6867 } 6868 6869 static void ibx_init_clock_gating(struct drm_device *dev) 6870 { 6871 struct drm_i915_private *dev_priv = to_i915(dev); 6872 6873 /* 6874 * On Ibex Peak and Cougar Point, we need to disable clock 6875 * gating for the panel power sequencer or it will fail to 6876 * start up when no ports are active. 6877 */ 6878 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE); 6879 } 6880 6881 static void g4x_disable_trickle_feed(struct drm_device *dev) 6882 { 6883 struct drm_i915_private *dev_priv = to_i915(dev); 6884 enum i915_pipe pipe; 6885 6886 for_each_pipe(dev_priv, pipe) { 6887 I915_WRITE(DSPCNTR(pipe), 6888 I915_READ(DSPCNTR(pipe)) | 6889 DISPPLANE_TRICKLE_FEED_DISABLE); 6890 6891 I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe))); 6892 POSTING_READ(DSPSURF(pipe)); 6893 } 6894 } 6895 6896 static void ilk_init_lp_watermarks(struct drm_device *dev) 6897 { 6898 struct drm_i915_private *dev_priv = to_i915(dev); 6899 6900 I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN); 6901 I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN); 6902 I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN); 6903 6904 /* 6905 * Don't touch WM1S_LP_EN here. 6906 * Doing so could cause underruns. 6907 */ 6908 } 6909 6910 static void ironlake_init_clock_gating(struct drm_device *dev) 6911 { 6912 struct drm_i915_private *dev_priv = to_i915(dev); 6913 uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; 6914 6915 /* 6916 * Required for FBC 6917 * WaFbcDisableDpfcClockGating:ilk 6918 */ 6919 dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE | 6920 ILK_DPFCUNIT_CLOCK_GATE_DISABLE | 6921 ILK_DPFDUNIT_CLOCK_GATE_ENABLE; 6922 6923 I915_WRITE(PCH_3DCGDIS0, 6924 MARIUNIT_CLOCK_GATE_DISABLE | 6925 SVSMUNIT_CLOCK_GATE_DISABLE); 6926 I915_WRITE(PCH_3DCGDIS1, 6927 VFMUNIT_CLOCK_GATE_DISABLE); 6928 6929 /* 6930 * According to the spec the following bits should be set in 6931 * order to enable memory self-refresh 6932 * The bit 22/21 of 0x42004 6933 * The bit 5 of 0x42020 6934 * The bit 15 of 0x45000 6935 */ 6936 I915_WRITE(ILK_DISPLAY_CHICKEN2, 6937 (I915_READ(ILK_DISPLAY_CHICKEN2) | 6938 ILK_DPARB_GATE | ILK_VSDPFD_FULL)); 6939 dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE; 6940 I915_WRITE(DISP_ARB_CTL, 6941 (I915_READ(DISP_ARB_CTL) | 6942 DISP_FBC_WM_DIS)); 6943 6944 ilk_init_lp_watermarks(dev); 6945 6946 /* 6947 * Based on the document from hardware guys the following bits 6948 * should be set unconditionally in order to enable FBC. 6949 * The bit 22 of 0x42000 6950 * The bit 22 of 0x42004 6951 * The bit 7,8,9 of 0x42020. 6952 */ 6953 if (IS_IRONLAKE_M(dev_priv)) { 6954 /* WaFbcAsynchFlipDisableFbcQueue:ilk */ 6955 I915_WRITE(ILK_DISPLAY_CHICKEN1, 6956 I915_READ(ILK_DISPLAY_CHICKEN1) | 6957 ILK_FBCQ_DIS); 6958 I915_WRITE(ILK_DISPLAY_CHICKEN2, 6959 I915_READ(ILK_DISPLAY_CHICKEN2) | 6960 ILK_DPARB_GATE); 6961 } 6962 6963 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); 6964 6965 I915_WRITE(ILK_DISPLAY_CHICKEN2, 6966 I915_READ(ILK_DISPLAY_CHICKEN2) | 6967 ILK_ELPIN_409_SELECT); 6968 I915_WRITE(_3D_CHICKEN2, 6969 _3D_CHICKEN2_WM_READ_PIPELINED << 16 | 6970 _3D_CHICKEN2_WM_READ_PIPELINED); 6971 6972 /* WaDisableRenderCachePipelinedFlush:ilk */ 6973 I915_WRITE(CACHE_MODE_0, 6974 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); 6975 6976 /* WaDisable_RenderCache_OperationalFlush:ilk */ 6977 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 6978 6979 g4x_disable_trickle_feed(dev); 6980 6981 ibx_init_clock_gating(dev); 6982 } 6983 6984 static void cpt_init_clock_gating(struct drm_device *dev) 6985 { 6986 struct drm_i915_private *dev_priv = to_i915(dev); 6987 int pipe; 6988 uint32_t val; 6989 6990 /* 6991 * On Ibex Peak and Cougar Point, we need to disable clock 6992 * gating for the panel power sequencer or it will fail to 6993 * start up when no ports are active. 6994 */ 6995 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE | 6996 PCH_DPLUNIT_CLOCK_GATE_DISABLE | 6997 PCH_CPUNIT_CLOCK_GATE_DISABLE); 6998 I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) | 6999 DPLS_EDP_PPS_FIX_DIS); 7000 /* The below fixes the weird display corruption, a few pixels shifted 7001 * downward, on (only) LVDS of some HP laptops with IVY. 7002 */ 7003 for_each_pipe(dev_priv, pipe) { 7004 val = I915_READ(TRANS_CHICKEN2(pipe)); 7005 val |= TRANS_CHICKEN2_TIMING_OVERRIDE; 7006 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED; 7007 if (dev_priv->vbt.fdi_rx_polarity_inverted) 7008 val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED; 7009 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK; 7010 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER; 7011 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH; 7012 I915_WRITE(TRANS_CHICKEN2(pipe), val); 7013 } 7014 /* WADP0ClockGatingDisable */ 7015 for_each_pipe(dev_priv, pipe) { 7016 I915_WRITE(TRANS_CHICKEN1(pipe), 7017 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); 7018 } 7019 } 7020 7021 static void gen6_check_mch_setup(struct drm_device *dev) 7022 { 7023 struct drm_i915_private *dev_priv = to_i915(dev); 7024 uint32_t tmp; 7025 7026 tmp = I915_READ(MCH_SSKPD); 7027 if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL) 7028 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n", 7029 tmp); 7030 } 7031 7032 static void gen6_init_clock_gating(struct drm_device *dev) 7033 { 7034 struct drm_i915_private *dev_priv = to_i915(dev); 7035 uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; 7036 7037 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); 7038 7039 I915_WRITE(ILK_DISPLAY_CHICKEN2, 7040 I915_READ(ILK_DISPLAY_CHICKEN2) | 7041 ILK_ELPIN_409_SELECT); 7042 7043 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ 7044 I915_WRITE(_3D_CHICKEN, 7045 _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); 7046 7047 /* WaDisable_RenderCache_OperationalFlush:snb */ 7048 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7049 7050 /* 7051 * BSpec recoomends 8x4 when MSAA is used, 7052 * however in practice 16x4 seems fastest. 7053 * 7054 * Note that PS/WM thread counts depend on the WIZ hashing 7055 * disable bit, which we don't touch here, but it's good 7056 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 7057 */ 7058 I915_WRITE(GEN6_GT_MODE, 7059 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 7060 7061 ilk_init_lp_watermarks(dev); 7062 7063 I915_WRITE(CACHE_MODE_0, 7064 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); 7065 7066 I915_WRITE(GEN6_UCGCTL1, 7067 I915_READ(GEN6_UCGCTL1) | 7068 GEN6_BLBUNIT_CLOCK_GATE_DISABLE | 7069 GEN6_CSUNIT_CLOCK_GATE_DISABLE); 7070 7071 /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock 7072 * gating disable must be set. Failure to set it results in 7073 * flickering pixels due to Z write ordering failures after 7074 * some amount of runtime in the Mesa "fire" demo, and Unigine 7075 * Sanctuary and Tropics, and apparently anything else with 7076 * alpha test or pixel discard. 7077 * 7078 * According to the spec, bit 11 (RCCUNIT) must also be set, 7079 * but we didn't debug actual testcases to find it out. 7080 * 7081 * WaDisableRCCUnitClockGating:snb 7082 * WaDisableRCPBUnitClockGating:snb 7083 */ 7084 I915_WRITE(GEN6_UCGCTL2, 7085 GEN6_RCPBUNIT_CLOCK_GATE_DISABLE | 7086 GEN6_RCCUNIT_CLOCK_GATE_DISABLE); 7087 7088 /* WaStripsFansDisableFastClipPerformanceFix:snb */ 7089 I915_WRITE(_3D_CHICKEN3, 7090 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL)); 7091 7092 /* 7093 * Bspec says: 7094 * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and 7095 * 3DSTATE_SF number of SF output attributes is more than 16." 7096 */ 7097 I915_WRITE(_3D_CHICKEN3, 7098 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH)); 7099 7100 /* 7101 * According to the spec the following bits should be 7102 * set in order to enable memory self-refresh and fbc: 7103 * The bit21 and bit22 of 0x42000 7104 * The bit21 and bit22 of 0x42004 7105 * The bit5 and bit7 of 0x42020 7106 * The bit14 of 0x70180 7107 * The bit14 of 0x71180 7108 * 7109 * WaFbcAsynchFlipDisableFbcQueue:snb 7110 */ 7111 I915_WRITE(ILK_DISPLAY_CHICKEN1, 7112 I915_READ(ILK_DISPLAY_CHICKEN1) | 7113 ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS); 7114 I915_WRITE(ILK_DISPLAY_CHICKEN2, 7115 I915_READ(ILK_DISPLAY_CHICKEN2) | 7116 ILK_DPARB_GATE | ILK_VSDPFD_FULL); 7117 I915_WRITE(ILK_DSPCLK_GATE_D, 7118 I915_READ(ILK_DSPCLK_GATE_D) | 7119 ILK_DPARBUNIT_CLOCK_GATE_ENABLE | 7120 ILK_DPFDUNIT_CLOCK_GATE_ENABLE); 7121 7122 g4x_disable_trickle_feed(dev); 7123 7124 cpt_init_clock_gating(dev); 7125 7126 gen6_check_mch_setup(dev); 7127 } 7128 7129 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) 7130 { 7131 uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE); 7132 7133 /* 7134 * WaVSThreadDispatchOverride:ivb,vlv 7135 * 7136 * This actually overrides the dispatch 7137 * mode for all thread types. 7138 */ 7139 reg &= ~GEN7_FF_SCHED_MASK; 7140 reg |= GEN7_FF_TS_SCHED_HW; 7141 reg |= GEN7_FF_VS_SCHED_HW; 7142 reg |= GEN7_FF_DS_SCHED_HW; 7143 7144 I915_WRITE(GEN7_FF_THREAD_MODE, reg); 7145 } 7146 7147 static void lpt_init_clock_gating(struct drm_device *dev) 7148 { 7149 struct drm_i915_private *dev_priv = to_i915(dev); 7150 7151 /* 7152 * TODO: this bit should only be enabled when really needed, then 7153 * disabled when not needed anymore in order to save power. 7154 */ 7155 if (HAS_PCH_LPT_LP(dev_priv)) 7156 I915_WRITE(SOUTH_DSPCLK_GATE_D, 7157 I915_READ(SOUTH_DSPCLK_GATE_D) | 7158 PCH_LP_PARTITION_LEVEL_DISABLE); 7159 7160 /* WADPOClockGatingDisable:hsw */ 7161 I915_WRITE(TRANS_CHICKEN1(PIPE_A), 7162 I915_READ(TRANS_CHICKEN1(PIPE_A)) | 7163 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); 7164 } 7165 7166 static void lpt_suspend_hw(struct drm_device *dev) 7167 { 7168 struct drm_i915_private *dev_priv = to_i915(dev); 7169 7170 if (HAS_PCH_LPT_LP(dev_priv)) { 7171 uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D); 7172 7173 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE; 7174 I915_WRITE(SOUTH_DSPCLK_GATE_D, val); 7175 } 7176 } 7177 7178 static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv, 7179 int general_prio_credits, 7180 int high_prio_credits) 7181 { 7182 u32 misccpctl; 7183 7184 /* WaTempDisableDOPClkGating:bdw */ 7185 misccpctl = I915_READ(GEN7_MISCCPCTL); 7186 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); 7187 7188 I915_WRITE(GEN8_L3SQCREG1, 7189 L3_GENERAL_PRIO_CREDITS(general_prio_credits) | 7190 L3_HIGH_PRIO_CREDITS(high_prio_credits)); 7191 7192 /* 7193 * Wait at least 100 clocks before re-enabling clock gating. 7194 * See the definition of L3SQCREG1 in BSpec. 7195 */ 7196 POSTING_READ(GEN8_L3SQCREG1); 7197 udelay(1); 7198 I915_WRITE(GEN7_MISCCPCTL, misccpctl); 7199 } 7200 7201 static void kabylake_init_clock_gating(struct drm_device *dev) 7202 { 7203 struct drm_i915_private *dev_priv = dev->dev_private; 7204 7205 gen9_init_clock_gating(dev); 7206 7207 /* WaDisableSDEUnitClockGating:kbl */ 7208 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) 7209 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 7210 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 7211 7212 /* WaDisableGamClockGating:kbl */ 7213 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) 7214 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | 7215 GEN6_GAMUNIT_CLOCK_GATE_DISABLE); 7216 7217 /* WaFbcNukeOnHostModify:kbl */ 7218 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) | 7219 ILK_DPFC_NUKE_ON_ANY_MODIFICATION); 7220 } 7221 7222 static void skylake_init_clock_gating(struct drm_device *dev) 7223 { 7224 struct drm_i915_private *dev_priv = dev->dev_private; 7225 7226 gen9_init_clock_gating(dev); 7227 7228 /* WAC6entrylatency:skl */ 7229 I915_WRITE(FBC_LLC_READ_CTRL, I915_READ(FBC_LLC_READ_CTRL) | 7230 FBC_LLC_FULLY_OPEN); 7231 7232 /* WaFbcNukeOnHostModify:skl */ 7233 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) | 7234 ILK_DPFC_NUKE_ON_ANY_MODIFICATION); 7235 } 7236 7237 static void broadwell_init_clock_gating(struct drm_device *dev) 7238 { 7239 struct drm_i915_private *dev_priv = to_i915(dev); 7240 enum i915_pipe pipe; 7241 7242 ilk_init_lp_watermarks(dev); 7243 7244 /* WaSwitchSolVfFArbitrationPriority:bdw */ 7245 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); 7246 7247 /* WaPsrDPAMaskVBlankInSRD:bdw */ 7248 I915_WRITE(CHICKEN_PAR1_1, 7249 I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD); 7250 7251 /* WaPsrDPRSUnmaskVBlankInSRD:bdw */ 7252 for_each_pipe(dev_priv, pipe) { 7253 I915_WRITE(CHICKEN_PIPESL_1(pipe), 7254 I915_READ(CHICKEN_PIPESL_1(pipe)) | 7255 BDW_DPRS_MASK_VBLANK_SRD); 7256 } 7257 7258 /* WaVSRefCountFullforceMissDisable:bdw */ 7259 /* WaDSRefCountFullforceMissDisable:bdw */ 7260 I915_WRITE(GEN7_FF_THREAD_MODE, 7261 I915_READ(GEN7_FF_THREAD_MODE) & 7262 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); 7263 7264 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, 7265 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); 7266 7267 /* WaDisableSDEUnitClockGating:bdw */ 7268 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 7269 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 7270 7271 /* WaProgramL3SqcReg1Default:bdw */ 7272 gen8_set_l3sqc_credits(dev_priv, 30, 2); 7273 7274 /* 7275 * WaGttCachingOffByDefault:bdw 7276 * GTT cache may not work with big pages, so if those 7277 * are ever enabled GTT cache may need to be disabled. 7278 */ 7279 I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL); 7280 7281 /* WaKVMNotificationOnConfigChange:bdw */ 7282 I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1) 7283 | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT); 7284 7285 lpt_init_clock_gating(dev); 7286 } 7287 7288 static void haswell_init_clock_gating(struct drm_device *dev) 7289 { 7290 struct drm_i915_private *dev_priv = to_i915(dev); 7291 7292 ilk_init_lp_watermarks(dev); 7293 7294 /* L3 caching of data atomics doesn't work -- disable it. */ 7295 I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); 7296 I915_WRITE(HSW_ROW_CHICKEN3, 7297 _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE)); 7298 7299 /* This is required by WaCatErrorRejectionIssue:hsw */ 7300 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, 7301 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | 7302 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); 7303 7304 /* WaVSRefCountFullforceMissDisable:hsw */ 7305 I915_WRITE(GEN7_FF_THREAD_MODE, 7306 I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME); 7307 7308 /* WaDisable_RenderCache_OperationalFlush:hsw */ 7309 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7310 7311 /* enable HiZ Raw Stall Optimization */ 7312 I915_WRITE(CACHE_MODE_0_GEN7, 7313 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); 7314 7315 /* WaDisable4x2SubspanOptimization:hsw */ 7316 I915_WRITE(CACHE_MODE_1, 7317 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); 7318 7319 /* 7320 * BSpec recommends 8x4 when MSAA is used, 7321 * however in practice 16x4 seems fastest. 7322 * 7323 * Note that PS/WM thread counts depend on the WIZ hashing 7324 * disable bit, which we don't touch here, but it's good 7325 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 7326 */ 7327 I915_WRITE(GEN7_GT_MODE, 7328 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 7329 7330 /* WaSampleCChickenBitEnable:hsw */ 7331 I915_WRITE(HALF_SLICE_CHICKEN3, 7332 _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE)); 7333 7334 /* WaSwitchSolVfFArbitrationPriority:hsw */ 7335 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); 7336 7337 /* WaRsPkgCStateDisplayPMReq:hsw */ 7338 I915_WRITE(CHICKEN_PAR1_1, 7339 I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES); 7340 7341 lpt_init_clock_gating(dev); 7342 } 7343 7344 static void ivybridge_init_clock_gating(struct drm_device *dev) 7345 { 7346 struct drm_i915_private *dev_priv = to_i915(dev); 7347 uint32_t snpcr; 7348 7349 ilk_init_lp_watermarks(dev); 7350 7351 I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); 7352 7353 /* WaDisableEarlyCull:ivb */ 7354 I915_WRITE(_3D_CHICKEN3, 7355 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); 7356 7357 /* WaDisableBackToBackFlipFix:ivb */ 7358 I915_WRITE(IVB_CHICKEN3, 7359 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | 7360 CHICKEN3_DGMG_DONE_FIX_DISABLE); 7361 7362 /* WaDisablePSDDualDispatchEnable:ivb */ 7363 if (IS_IVB_GT1(dev_priv)) 7364 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, 7365 _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); 7366 7367 /* WaDisable_RenderCache_OperationalFlush:ivb */ 7368 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7369 7370 /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ 7371 I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1, 7372 GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); 7373 7374 /* WaApplyL3ControlAndL3ChickenMode:ivb */ 7375 I915_WRITE(GEN7_L3CNTLREG1, 7376 GEN7_WA_FOR_GEN7_L3_CONTROL); 7377 I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, 7378 GEN7_WA_L3_CHICKEN_MODE); 7379 if (IS_IVB_GT1(dev_priv)) 7380 I915_WRITE(GEN7_ROW_CHICKEN2, 7381 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 7382 else { 7383 /* must write both registers */ 7384 I915_WRITE(GEN7_ROW_CHICKEN2, 7385 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 7386 I915_WRITE(GEN7_ROW_CHICKEN2_GT2, 7387 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 7388 } 7389 7390 /* WaForceL3Serialization:ivb */ 7391 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & 7392 ~L3SQ_URB_READ_CAM_MATCH_DISABLE); 7393 7394 /* 7395 * According to the spec, bit 13 (RCZUNIT) must be set on IVB. 7396 * This implements the WaDisableRCZUnitClockGating:ivb workaround. 7397 */ 7398 I915_WRITE(GEN6_UCGCTL2, 7399 GEN6_RCZUNIT_CLOCK_GATE_DISABLE); 7400 7401 /* This is required by WaCatErrorRejectionIssue:ivb */ 7402 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, 7403 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | 7404 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); 7405 7406 g4x_disable_trickle_feed(dev); 7407 7408 gen7_setup_fixed_func_scheduler(dev_priv); 7409 7410 if (0) { /* causes HiZ corruption on ivb:gt1 */ 7411 /* enable HiZ Raw Stall Optimization */ 7412 I915_WRITE(CACHE_MODE_0_GEN7, 7413 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); 7414 } 7415 7416 /* WaDisable4x2SubspanOptimization:ivb */ 7417 I915_WRITE(CACHE_MODE_1, 7418 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); 7419 7420 /* 7421 * BSpec recommends 8x4 when MSAA is used, 7422 * however in practice 16x4 seems fastest. 7423 * 7424 * Note that PS/WM thread counts depend on the WIZ hashing 7425 * disable bit, which we don't touch here, but it's good 7426 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 7427 */ 7428 I915_WRITE(GEN7_GT_MODE, 7429 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 7430 7431 snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); 7432 snpcr &= ~GEN6_MBC_SNPCR_MASK; 7433 snpcr |= GEN6_MBC_SNPCR_MED; 7434 I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr); 7435 7436 if (!HAS_PCH_NOP(dev_priv)) 7437 cpt_init_clock_gating(dev); 7438 7439 gen6_check_mch_setup(dev); 7440 } 7441 7442 static void valleyview_init_clock_gating(struct drm_device *dev) 7443 { 7444 struct drm_i915_private *dev_priv = to_i915(dev); 7445 7446 /* WaDisableEarlyCull:vlv */ 7447 I915_WRITE(_3D_CHICKEN3, 7448 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); 7449 7450 /* WaDisableBackToBackFlipFix:vlv */ 7451 I915_WRITE(IVB_CHICKEN3, 7452 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | 7453 CHICKEN3_DGMG_DONE_FIX_DISABLE); 7454 7455 /* WaPsdDispatchEnable:vlv */ 7456 /* WaDisablePSDDualDispatchEnable:vlv */ 7457 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, 7458 _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP | 7459 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); 7460 7461 /* WaDisable_RenderCache_OperationalFlush:vlv */ 7462 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7463 7464 /* WaForceL3Serialization:vlv */ 7465 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & 7466 ~L3SQ_URB_READ_CAM_MATCH_DISABLE); 7467 7468 /* WaDisableDopClockGating:vlv */ 7469 I915_WRITE(GEN7_ROW_CHICKEN2, 7470 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 7471 7472 /* This is required by WaCatErrorRejectionIssue:vlv */ 7473 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, 7474 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | 7475 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); 7476 7477 gen7_setup_fixed_func_scheduler(dev_priv); 7478 7479 /* 7480 * According to the spec, bit 13 (RCZUNIT) must be set on IVB. 7481 * This implements the WaDisableRCZUnitClockGating:vlv workaround. 7482 */ 7483 I915_WRITE(GEN6_UCGCTL2, 7484 GEN6_RCZUNIT_CLOCK_GATE_DISABLE); 7485 7486 /* WaDisableL3Bank2xClockGate:vlv 7487 * Disabling L3 clock gating- MMIO 940c[25] = 1 7488 * Set bit 25, to disable L3_BANK_2x_CLK_GATING */ 7489 I915_WRITE(GEN7_UCGCTL4, 7490 I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE); 7491 7492 /* 7493 * BSpec says this must be set, even though 7494 * WaDisable4x2SubspanOptimization isn't listed for VLV. 7495 */ 7496 I915_WRITE(CACHE_MODE_1, 7497 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); 7498 7499 /* 7500 * BSpec recommends 8x4 when MSAA is used, 7501 * however in practice 16x4 seems fastest. 7502 * 7503 * Note that PS/WM thread counts depend on the WIZ hashing 7504 * disable bit, which we don't touch here, but it's good 7505 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 7506 */ 7507 I915_WRITE(GEN7_GT_MODE, 7508 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 7509 7510 /* 7511 * WaIncreaseL3CreditsForVLVB0:vlv 7512 * This is the hardware default actually. 7513 */ 7514 I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); 7515 7516 /* 7517 * WaDisableVLVClockGating_VBIIssue:vlv 7518 * Disable clock gating on th GCFG unit to prevent a delay 7519 * in the reporting of vblank events. 7520 */ 7521 I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS); 7522 } 7523 7524 static void cherryview_init_clock_gating(struct drm_device *dev) 7525 { 7526 struct drm_i915_private *dev_priv = to_i915(dev); 7527 7528 /* WaVSRefCountFullforceMissDisable:chv */ 7529 /* WaDSRefCountFullforceMissDisable:chv */ 7530 I915_WRITE(GEN7_FF_THREAD_MODE, 7531 I915_READ(GEN7_FF_THREAD_MODE) & 7532 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); 7533 7534 /* WaDisableSemaphoreAndSyncFlipWait:chv */ 7535 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, 7536 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); 7537 7538 /* WaDisableCSUnitClockGating:chv */ 7539 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | 7540 GEN6_CSUNIT_CLOCK_GATE_DISABLE); 7541 7542 /* WaDisableSDEUnitClockGating:chv */ 7543 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 7544 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 7545 7546 /* 7547 * WaProgramL3SqcReg1Default:chv 7548 * See gfxspecs/Related Documents/Performance Guide/ 7549 * LSQC Setting Recommendations. 7550 */ 7551 gen8_set_l3sqc_credits(dev_priv, 38, 2); 7552 7553 /* 7554 * GTT cache may not work with big pages, so if those 7555 * are ever enabled GTT cache may need to be disabled. 7556 */ 7557 I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL); 7558 } 7559 7560 static void g4x_init_clock_gating(struct drm_device *dev) 7561 { 7562 struct drm_i915_private *dev_priv = to_i915(dev); 7563 uint32_t dspclk_gate; 7564 7565 I915_WRITE(RENCLK_GATE_D1, 0); 7566 I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE | 7567 GS_UNIT_CLOCK_GATE_DISABLE | 7568 CL_UNIT_CLOCK_GATE_DISABLE); 7569 I915_WRITE(RAMCLK_GATE_D, 0); 7570 dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE | 7571 OVRUNIT_CLOCK_GATE_DISABLE | 7572 OVCUNIT_CLOCK_GATE_DISABLE; 7573 if (IS_GM45(dev_priv)) 7574 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE; 7575 I915_WRITE(DSPCLK_GATE_D, dspclk_gate); 7576 7577 /* WaDisableRenderCachePipelinedFlush */ 7578 I915_WRITE(CACHE_MODE_0, 7579 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); 7580 7581 /* WaDisable_RenderCache_OperationalFlush:g4x */ 7582 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7583 7584 g4x_disable_trickle_feed(dev); 7585 } 7586 7587 static void crestline_init_clock_gating(struct drm_device *dev) 7588 { 7589 struct drm_i915_private *dev_priv = to_i915(dev); 7590 7591 I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE); 7592 I915_WRITE(RENCLK_GATE_D2, 0); 7593 I915_WRITE(DSPCLK_GATE_D, 0); 7594 I915_WRITE(RAMCLK_GATE_D, 0); 7595 I915_WRITE16(DEUC, 0); 7596 I915_WRITE(MI_ARB_STATE, 7597 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); 7598 7599 /* WaDisable_RenderCache_OperationalFlush:gen4 */ 7600 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7601 } 7602 7603 static void broadwater_init_clock_gating(struct drm_device *dev) 7604 { 7605 struct drm_i915_private *dev_priv = to_i915(dev); 7606 7607 I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE | 7608 I965_RCC_CLOCK_GATE_DISABLE | 7609 I965_RCPB_CLOCK_GATE_DISABLE | 7610 I965_ISC_CLOCK_GATE_DISABLE | 7611 I965_FBC_CLOCK_GATE_DISABLE); 7612 I915_WRITE(RENCLK_GATE_D2, 0); 7613 I915_WRITE(MI_ARB_STATE, 7614 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); 7615 7616 /* WaDisable_RenderCache_OperationalFlush:gen4 */ 7617 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7618 } 7619 7620 static void gen3_init_clock_gating(struct drm_device *dev) 7621 { 7622 struct drm_i915_private *dev_priv = to_i915(dev); 7623 u32 dstate = I915_READ(D_STATE); 7624 7625 dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING | 7626 DSTATE_DOT_CLOCK_GATING; 7627 I915_WRITE(D_STATE, dstate); 7628 7629 if (IS_PINEVIEW(dev)) 7630 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY)); 7631 7632 /* IIR "flip pending" means done if this bit is set */ 7633 I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE)); 7634 7635 /* interrupts should cause a wake up from C3 */ 7636 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN)); 7637 7638 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 7639 I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); 7640 7641 I915_WRITE(MI_ARB_STATE, 7642 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); 7643 } 7644 7645 static void i85x_init_clock_gating(struct drm_device *dev) 7646 { 7647 struct drm_i915_private *dev_priv = to_i915(dev); 7648 7649 I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE); 7650 7651 /* interrupts should cause a wake up from C3 */ 7652 I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) | 7653 _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE)); 7654 7655 I915_WRITE(MEM_MODE, 7656 _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE)); 7657 } 7658 7659 static void i830_init_clock_gating(struct drm_device *dev) 7660 { 7661 struct drm_i915_private *dev_priv = to_i915(dev); 7662 7663 I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE); 7664 7665 I915_WRITE(MEM_MODE, 7666 _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) | 7667 _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE)); 7668 } 7669 7670 void intel_init_clock_gating(struct drm_device *dev) 7671 { 7672 struct drm_i915_private *dev_priv = to_i915(dev); 7673 7674 dev_priv->display.init_clock_gating(dev); 7675 } 7676 7677 void intel_suspend_hw(struct drm_device *dev) 7678 { 7679 if (HAS_PCH_LPT(to_i915(dev))) 7680 lpt_suspend_hw(dev); 7681 } 7682 7683 static void nop_init_clock_gating(struct drm_device *dev) 7684 { 7685 DRM_DEBUG_KMS("No clock gating settings or workarounds applied.\n"); 7686 } 7687 7688 /** 7689 * intel_init_clock_gating_hooks - setup the clock gating hooks 7690 * @dev_priv: device private 7691 * 7692 * Setup the hooks that configure which clocks of a given platform can be 7693 * gated and also apply various GT and display specific workarounds for these 7694 * platforms. Note that some GT specific workarounds are applied separately 7695 * when GPU contexts or batchbuffers start their execution. 7696 */ 7697 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) 7698 { 7699 if (IS_SKYLAKE(dev_priv)) 7700 dev_priv->display.init_clock_gating = skylake_init_clock_gating; 7701 else if (IS_KABYLAKE(dev_priv)) 7702 dev_priv->display.init_clock_gating = kabylake_init_clock_gating; 7703 else if (IS_BROXTON(dev_priv)) 7704 dev_priv->display.init_clock_gating = bxt_init_clock_gating; 7705 else if (IS_BROADWELL(dev_priv)) 7706 dev_priv->display.init_clock_gating = broadwell_init_clock_gating; 7707 else if (IS_CHERRYVIEW(dev_priv)) 7708 dev_priv->display.init_clock_gating = cherryview_init_clock_gating; 7709 else if (IS_HASWELL(dev_priv)) 7710 dev_priv->display.init_clock_gating = haswell_init_clock_gating; 7711 else if (IS_IVYBRIDGE(dev_priv)) 7712 dev_priv->display.init_clock_gating = ivybridge_init_clock_gating; 7713 else if (IS_VALLEYVIEW(dev_priv)) 7714 dev_priv->display.init_clock_gating = valleyview_init_clock_gating; 7715 else if (IS_GEN6(dev_priv)) 7716 dev_priv->display.init_clock_gating = gen6_init_clock_gating; 7717 else if (IS_GEN5(dev_priv)) 7718 dev_priv->display.init_clock_gating = ironlake_init_clock_gating; 7719 else if (IS_G4X(dev_priv)) 7720 dev_priv->display.init_clock_gating = g4x_init_clock_gating; 7721 else if (IS_CRESTLINE(dev_priv)) 7722 dev_priv->display.init_clock_gating = crestline_init_clock_gating; 7723 else if (IS_BROADWATER(dev_priv)) 7724 dev_priv->display.init_clock_gating = broadwater_init_clock_gating; 7725 else if (IS_GEN3(dev_priv)) 7726 dev_priv->display.init_clock_gating = gen3_init_clock_gating; 7727 else if (IS_I85X(dev_priv) || IS_I865G(dev_priv)) 7728 dev_priv->display.init_clock_gating = i85x_init_clock_gating; 7729 else if (IS_GEN2(dev_priv)) 7730 dev_priv->display.init_clock_gating = i830_init_clock_gating; 7731 else { 7732 MISSING_CASE(INTEL_DEVID(dev_priv)); 7733 dev_priv->display.init_clock_gating = nop_init_clock_gating; 7734 } 7735 } 7736 7737 /* Set up chip specific power management-related functions */ 7738 void intel_init_pm(struct drm_device *dev) 7739 { 7740 struct drm_i915_private *dev_priv = to_i915(dev); 7741 7742 intel_fbc_init(dev_priv); 7743 7744 /* For cxsr */ 7745 if (IS_PINEVIEW(dev)) 7746 i915_pineview_get_mem_freq(dev); 7747 else if (IS_GEN5(dev_priv)) 7748 i915_ironlake_get_mem_freq(dev); 7749 7750 /* For FIFO watermark updates */ 7751 if (INTEL_INFO(dev)->gen >= 9) { 7752 skl_setup_wm_latency(dev); 7753 dev_priv->display.update_wm = skl_update_wm; 7754 dev_priv->display.compute_global_watermarks = skl_compute_wm; 7755 } else if (HAS_PCH_SPLIT(dev_priv)) { 7756 ilk_setup_wm_latency(dev); 7757 7758 if ((IS_GEN5(dev_priv) && dev_priv->wm.pri_latency[1] && 7759 dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) || 7760 (!IS_GEN5(dev_priv) && dev_priv->wm.pri_latency[0] && 7761 dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) { 7762 dev_priv->display.compute_pipe_wm = ilk_compute_pipe_wm; 7763 dev_priv->display.compute_intermediate_wm = 7764 ilk_compute_intermediate_wm; 7765 dev_priv->display.initial_watermarks = 7766 ilk_initial_watermarks; 7767 dev_priv->display.optimize_watermarks = 7768 ilk_optimize_watermarks; 7769 } else { 7770 DRM_DEBUG_KMS("Failed to read display plane latency. " 7771 "Disable CxSR\n"); 7772 } 7773 } else if (IS_CHERRYVIEW(dev_priv)) { 7774 vlv_setup_wm_latency(dev); 7775 dev_priv->display.update_wm = vlv_update_wm; 7776 } else if (IS_VALLEYVIEW(dev_priv)) { 7777 vlv_setup_wm_latency(dev); 7778 dev_priv->display.update_wm = vlv_update_wm; 7779 } else if (IS_PINEVIEW(dev)) { 7780 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv), 7781 dev_priv->is_ddr3, 7782 dev_priv->fsb_freq, 7783 dev_priv->mem_freq)) { 7784 DRM_INFO("failed to find known CxSR latency " 7785 "(found ddr%s fsb freq %d, mem freq %d), " 7786 "disabling CxSR\n", 7787 (dev_priv->is_ddr3 == 1) ? "3" : "2", 7788 dev_priv->fsb_freq, dev_priv->mem_freq); 7789 /* Disable CxSR and never update its watermark again */ 7790 intel_set_memory_cxsr(dev_priv, false); 7791 dev_priv->display.update_wm = NULL; 7792 } else 7793 dev_priv->display.update_wm = pineview_update_wm; 7794 } else if (IS_G4X(dev_priv)) { 7795 dev_priv->display.update_wm = g4x_update_wm; 7796 } else if (IS_GEN4(dev_priv)) { 7797 dev_priv->display.update_wm = i965_update_wm; 7798 } else if (IS_GEN3(dev_priv)) { 7799 dev_priv->display.update_wm = i9xx_update_wm; 7800 dev_priv->display.get_fifo_size = i9xx_get_fifo_size; 7801 } else if (IS_GEN2(dev_priv)) { 7802 if (INTEL_INFO(dev)->num_pipes == 1) { 7803 dev_priv->display.update_wm = i845_update_wm; 7804 dev_priv->display.get_fifo_size = i845_get_fifo_size; 7805 } else { 7806 dev_priv->display.update_wm = i9xx_update_wm; 7807 dev_priv->display.get_fifo_size = i830_get_fifo_size; 7808 } 7809 } else { 7810 DRM_ERROR("unexpected fall-through in intel_init_pm\n"); 7811 } 7812 } 7813 7814 static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv) 7815 { 7816 uint32_t flags = 7817 I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK; 7818 7819 switch (flags) { 7820 case GEN6_PCODE_SUCCESS: 7821 return 0; 7822 case GEN6_PCODE_UNIMPLEMENTED_CMD: 7823 case GEN6_PCODE_ILLEGAL_CMD: 7824 return -ENXIO; 7825 case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: 7826 case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: 7827 return -EOVERFLOW; 7828 case GEN6_PCODE_TIMEOUT: 7829 return -ETIMEDOUT; 7830 default: 7831 MISSING_CASE(flags) 7832 return 0; 7833 } 7834 } 7835 7836 static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv) 7837 { 7838 uint32_t flags = 7839 I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK; 7840 7841 switch (flags) { 7842 case GEN6_PCODE_SUCCESS: 7843 return 0; 7844 case GEN6_PCODE_ILLEGAL_CMD: 7845 return -ENXIO; 7846 case GEN7_PCODE_TIMEOUT: 7847 return -ETIMEDOUT; 7848 case GEN7_PCODE_ILLEGAL_DATA: 7849 return -EINVAL; 7850 case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: 7851 return -EOVERFLOW; 7852 default: 7853 MISSING_CASE(flags); 7854 return 0; 7855 } 7856 } 7857 7858 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val) 7859 { 7860 int status; 7861 7862 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 7863 7864 /* GEN6_PCODE_* are outside of the forcewake domain, we can 7865 * use te fw I915_READ variants to reduce the amount of work 7866 * required when reading/writing. 7867 */ 7868 7869 if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { 7870 DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n"); 7871 return -EAGAIN; 7872 } 7873 7874 I915_WRITE_FW(GEN6_PCODE_DATA, *val); 7875 I915_WRITE_FW(GEN6_PCODE_DATA1, 0); 7876 I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); 7877 7878 if (intel_wait_for_register_fw(dev_priv, 7879 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, 7880 500)) { 7881 DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox); 7882 return -ETIMEDOUT; 7883 } 7884 7885 *val = I915_READ_FW(GEN6_PCODE_DATA); 7886 I915_WRITE_FW(GEN6_PCODE_DATA, 0); 7887 7888 if (INTEL_GEN(dev_priv) > 6) 7889 status = gen7_check_mailbox_status(dev_priv); 7890 else 7891 status = gen6_check_mailbox_status(dev_priv); 7892 7893 if (status) { 7894 DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed: %d\n", 7895 status); 7896 return status; 7897 } 7898 7899 return 0; 7900 } 7901 7902 int sandybridge_pcode_write(struct drm_i915_private *dev_priv, 7903 u32 mbox, u32 val) 7904 { 7905 int status; 7906 7907 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 7908 7909 /* GEN6_PCODE_* are outside of the forcewake domain, we can 7910 * use te fw I915_READ variants to reduce the amount of work 7911 * required when reading/writing. 7912 */ 7913 7914 if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { 7915 DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n"); 7916 return -EAGAIN; 7917 } 7918 7919 I915_WRITE_FW(GEN6_PCODE_DATA, val); 7920 I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); 7921 7922 if (intel_wait_for_register_fw(dev_priv, 7923 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, 7924 500)) { 7925 DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox); 7926 return -ETIMEDOUT; 7927 } 7928 7929 I915_WRITE_FW(GEN6_PCODE_DATA, 0); 7930 7931 if (INTEL_GEN(dev_priv) > 6) 7932 status = gen7_check_mailbox_status(dev_priv); 7933 else 7934 status = gen6_check_mailbox_status(dev_priv); 7935 7936 if (status) { 7937 DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed: %d\n", 7938 status); 7939 return status; 7940 } 7941 7942 return 0; 7943 } 7944 7945 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val) 7946 { 7947 /* 7948 * N = val - 0xb7 7949 * Slow = Fast = GPLL ref * N 7950 */ 7951 return DIV_ROUND_CLOSEST(dev_priv->rps.gpll_ref_freq * (val - 0xb7), 1000); 7952 } 7953 7954 static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val) 7955 { 7956 return DIV_ROUND_CLOSEST(1000 * val, dev_priv->rps.gpll_ref_freq) + 0xb7; 7957 } 7958 7959 static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val) 7960 { 7961 /* 7962 * N = val / 2 7963 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 7964 */ 7965 return DIV_ROUND_CLOSEST(dev_priv->rps.gpll_ref_freq * val, 2 * 2 * 1000); 7966 } 7967 7968 static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val) 7969 { 7970 /* CHV needs even values */ 7971 return DIV_ROUND_CLOSEST(2 * 1000 * val, dev_priv->rps.gpll_ref_freq) * 2; 7972 } 7973 7974 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val) 7975 { 7976 if (IS_GEN9(dev_priv)) 7977 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, 7978 GEN9_FREQ_SCALER); 7979 else if (IS_CHERRYVIEW(dev_priv)) 7980 return chv_gpu_freq(dev_priv, val); 7981 else if (IS_VALLEYVIEW(dev_priv)) 7982 return byt_gpu_freq(dev_priv, val); 7983 else 7984 return val * GT_FREQUENCY_MULTIPLIER; 7985 } 7986 7987 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val) 7988 { 7989 if (IS_GEN9(dev_priv)) 7990 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, 7991 GT_FREQUENCY_MULTIPLIER); 7992 else if (IS_CHERRYVIEW(dev_priv)) 7993 return chv_freq_opcode(dev_priv, val); 7994 else if (IS_VALLEYVIEW(dev_priv)) 7995 return byt_freq_opcode(dev_priv, val); 7996 else 7997 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); 7998 } 7999 8000 struct request_boost { 8001 struct work_struct work; 8002 struct drm_i915_gem_request *req; 8003 }; 8004 8005 static void __intel_rps_boost_work(struct work_struct *work) 8006 { 8007 struct request_boost *boost = container_of(work, struct request_boost, work); 8008 struct drm_i915_gem_request *req = boost->req; 8009 8010 if (!i915_gem_request_completed(req)) 8011 gen6_rps_boost(req->i915, NULL, req->emitted_jiffies); 8012 8013 i915_gem_request_put(req); 8014 kfree(boost); 8015 } 8016 8017 void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req) 8018 { 8019 struct request_boost *boost; 8020 8021 if (req == NULL || INTEL_GEN(req->i915) < 6) 8022 return; 8023 8024 if (i915_gem_request_completed(req)) 8025 return; 8026 8027 boost = kmalloc(sizeof(*boost), M_DRM, GFP_ATOMIC); 8028 if (boost == NULL) 8029 return; 8030 8031 boost->req = i915_gem_request_get(req); 8032 8033 INIT_WORK(&boost->work, __intel_rps_boost_work); 8034 queue_work(req->i915->wq, &boost->work); 8035 } 8036 8037 void intel_pm_setup(struct drm_device *dev) 8038 { 8039 struct drm_i915_private *dev_priv = to_i915(dev); 8040 8041 lockinit(&dev_priv->rps.hw_lock, "i915 rps.hw_lock", 0, LK_CANRECURSE); 8042 lockinit(&dev_priv->rps.client_lock, "i915rcl", 0, 0); 8043 8044 INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work, 8045 __intel_autoenable_gt_powersave); 8046 INIT_LIST_HEAD(&dev_priv->rps.clients); 8047 8048 dev_priv->pm.suspended = false; 8049 atomic_set(&dev_priv->pm.wakeref_count, 0); 8050 atomic_set(&dev_priv->pm.atomic_seq, 0); 8051 } 8052