1 /* 2 * Copyright © 2012 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eugeni Dodonov <eugeni.dodonov@intel.com> 25 * 26 */ 27 28 #include "i915_drv.h" 29 #include "intel_drv.h" 30 #include <linux/module.h> 31 #include <machine/clock.h> 32 33 /** 34 * DOC: RC6 35 * 36 * RC6 is a special power stage which allows the GPU to enter an very 37 * low-voltage mode when idle, using down to 0V while at this stage. This 38 * stage is entered automatically when the GPU is idle when RC6 support is 39 * enabled, and as soon as new workload arises GPU wakes up automatically as well. 40 * 41 * There are different RC6 modes available in Intel GPU, which differentiate 42 * among each other with the latency required to enter and leave RC6 and 43 * voltage consumed by the GPU in different states. 44 * 45 * The combination of the following flags define which states GPU is allowed 46 * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and 47 * RC6pp is deepest RC6. Their support by hardware varies according to the 48 * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one 49 * which brings the most power savings; deeper states save more power, but 50 * require higher latency to switch to and wake up. 51 */ 52 #define INTEL_RC6_ENABLE (1<<0) 53 #define INTEL_RC6p_ENABLE (1<<1) 54 #define INTEL_RC6pp_ENABLE (1<<2) 55 56 static void gen9_init_clock_gating(struct drm_device *dev) 57 { 58 struct drm_i915_private *dev_priv = dev->dev_private; 59 60 /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl */ 61 I915_WRITE(CHICKEN_PAR1_1, 62 I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP); 63 64 I915_WRITE(GEN8_CONFIG0, 65 I915_READ(GEN8_CONFIG0) | GEN9_DEFAULT_FIXES); 66 67 /* WaEnableChickenDCPR:skl,bxt,kbl */ 68 I915_WRITE(GEN8_CHICKEN_DCPR_1, 69 I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM); 70 71 /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl */ 72 /* WaFbcWakeMemOn:skl,bxt,kbl */ 73 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 74 DISP_FBC_WM_DIS | 75 DISP_FBC_MEMORY_WAKE); 76 77 /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl */ 78 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) | 79 ILK_DPFC_DISABLE_DUMMY0); 80 } 81 82 static void bxt_init_clock_gating(struct drm_device *dev) 83 { 84 struct drm_i915_private *dev_priv = dev->dev_private; 85 86 gen9_init_clock_gating(dev); 87 88 /* WaDisableSDEUnitClockGating:bxt */ 89 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 90 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 91 92 /* 93 * FIXME: 94 * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only. 95 */ 96 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 97 GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ); 98 99 /* 100 * Wa: Backlight PWM may stop in the asserted state, causing backlight 101 * to stay fully on. 102 */ 103 if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) 104 I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | 105 PWM1_GATING_DIS | PWM2_GATING_DIS); 106 } 107 108 static void i915_pineview_get_mem_freq(struct drm_device *dev) 109 { 110 struct drm_i915_private *dev_priv = dev->dev_private; 111 u32 tmp; 112 113 tmp = I915_READ(CLKCFG); 114 115 switch (tmp & CLKCFG_FSB_MASK) { 116 case CLKCFG_FSB_533: 117 dev_priv->fsb_freq = 533; /* 133*4 */ 118 break; 119 case CLKCFG_FSB_800: 120 dev_priv->fsb_freq = 800; /* 200*4 */ 121 break; 122 case CLKCFG_FSB_667: 123 dev_priv->fsb_freq = 667; /* 167*4 */ 124 break; 125 case CLKCFG_FSB_400: 126 dev_priv->fsb_freq = 400; /* 100*4 */ 127 break; 128 } 129 130 switch (tmp & CLKCFG_MEM_MASK) { 131 case CLKCFG_MEM_533: 132 dev_priv->mem_freq = 533; 133 break; 134 case CLKCFG_MEM_667: 135 dev_priv->mem_freq = 667; 136 break; 137 case CLKCFG_MEM_800: 138 dev_priv->mem_freq = 800; 139 break; 140 } 141 142 /* detect pineview DDR3 setting */ 143 tmp = I915_READ(CSHRDDR3CTL); 144 dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0; 145 } 146 147 static void i915_ironlake_get_mem_freq(struct drm_device *dev) 148 { 149 struct drm_i915_private *dev_priv = dev->dev_private; 150 u16 ddrpll, csipll; 151 152 ddrpll = I915_READ16(DDRMPLL1); 153 csipll = I915_READ16(CSIPLL0); 154 155 switch (ddrpll & 0xff) { 156 case 0xc: 157 dev_priv->mem_freq = 800; 158 break; 159 case 0x10: 160 dev_priv->mem_freq = 1066; 161 break; 162 case 0x14: 163 dev_priv->mem_freq = 1333; 164 break; 165 case 0x18: 166 dev_priv->mem_freq = 1600; 167 break; 168 default: 169 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n", 170 ddrpll & 0xff); 171 dev_priv->mem_freq = 0; 172 break; 173 } 174 175 dev_priv->ips.r_t = dev_priv->mem_freq; 176 177 switch (csipll & 0x3ff) { 178 case 0x00c: 179 dev_priv->fsb_freq = 3200; 180 break; 181 case 0x00e: 182 dev_priv->fsb_freq = 3733; 183 break; 184 case 0x010: 185 dev_priv->fsb_freq = 4266; 186 break; 187 case 0x012: 188 dev_priv->fsb_freq = 4800; 189 break; 190 case 0x014: 191 dev_priv->fsb_freq = 5333; 192 break; 193 case 0x016: 194 dev_priv->fsb_freq = 5866; 195 break; 196 case 0x018: 197 dev_priv->fsb_freq = 6400; 198 break; 199 default: 200 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n", 201 csipll & 0x3ff); 202 dev_priv->fsb_freq = 0; 203 break; 204 } 205 206 if (dev_priv->fsb_freq == 3200) { 207 dev_priv->ips.c_m = 0; 208 } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) { 209 dev_priv->ips.c_m = 1; 210 } else { 211 dev_priv->ips.c_m = 2; 212 } 213 } 214 215 static const struct cxsr_latency cxsr_latency_table[] = { 216 {1, 0, 800, 400, 3382, 33382, 3983, 33983}, /* DDR2-400 SC */ 217 {1, 0, 800, 667, 3354, 33354, 3807, 33807}, /* DDR2-667 SC */ 218 {1, 0, 800, 800, 3347, 33347, 3763, 33763}, /* DDR2-800 SC */ 219 {1, 1, 800, 667, 6420, 36420, 6873, 36873}, /* DDR3-667 SC */ 220 {1, 1, 800, 800, 5902, 35902, 6318, 36318}, /* DDR3-800 SC */ 221 222 {1, 0, 667, 400, 3400, 33400, 4021, 34021}, /* DDR2-400 SC */ 223 {1, 0, 667, 667, 3372, 33372, 3845, 33845}, /* DDR2-667 SC */ 224 {1, 0, 667, 800, 3386, 33386, 3822, 33822}, /* DDR2-800 SC */ 225 {1, 1, 667, 667, 6438, 36438, 6911, 36911}, /* DDR3-667 SC */ 226 {1, 1, 667, 800, 5941, 35941, 6377, 36377}, /* DDR3-800 SC */ 227 228 {1, 0, 400, 400, 3472, 33472, 4173, 34173}, /* DDR2-400 SC */ 229 {1, 0, 400, 667, 3443, 33443, 3996, 33996}, /* DDR2-667 SC */ 230 {1, 0, 400, 800, 3430, 33430, 3946, 33946}, /* DDR2-800 SC */ 231 {1, 1, 400, 667, 6509, 36509, 7062, 37062}, /* DDR3-667 SC */ 232 {1, 1, 400, 800, 5985, 35985, 6501, 36501}, /* DDR3-800 SC */ 233 234 {0, 0, 800, 400, 3438, 33438, 4065, 34065}, /* DDR2-400 SC */ 235 {0, 0, 800, 667, 3410, 33410, 3889, 33889}, /* DDR2-667 SC */ 236 {0, 0, 800, 800, 3403, 33403, 3845, 33845}, /* DDR2-800 SC */ 237 {0, 1, 800, 667, 6476, 36476, 6955, 36955}, /* DDR3-667 SC */ 238 {0, 1, 800, 800, 5958, 35958, 6400, 36400}, /* DDR3-800 SC */ 239 240 {0, 0, 667, 400, 3456, 33456, 4103, 34106}, /* DDR2-400 SC */ 241 {0, 0, 667, 667, 3428, 33428, 3927, 33927}, /* DDR2-667 SC */ 242 {0, 0, 667, 800, 3443, 33443, 3905, 33905}, /* DDR2-800 SC */ 243 {0, 1, 667, 667, 6494, 36494, 6993, 36993}, /* DDR3-667 SC */ 244 {0, 1, 667, 800, 5998, 35998, 6460, 36460}, /* DDR3-800 SC */ 245 246 {0, 0, 400, 400, 3528, 33528, 4255, 34255}, /* DDR2-400 SC */ 247 {0, 0, 400, 667, 3500, 33500, 4079, 34079}, /* DDR2-667 SC */ 248 {0, 0, 400, 800, 3487, 33487, 4029, 34029}, /* DDR2-800 SC */ 249 {0, 1, 400, 667, 6566, 36566, 7145, 37145}, /* DDR3-667 SC */ 250 {0, 1, 400, 800, 6042, 36042, 6584, 36584}, /* DDR3-800 SC */ 251 }; 252 253 static const struct cxsr_latency *intel_get_cxsr_latency(int is_desktop, 254 int is_ddr3, 255 int fsb, 256 int mem) 257 { 258 const struct cxsr_latency *latency; 259 int i; 260 261 if (fsb == 0 || mem == 0) 262 return NULL; 263 264 for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) { 265 latency = &cxsr_latency_table[i]; 266 if (is_desktop == latency->is_desktop && 267 is_ddr3 == latency->is_ddr3 && 268 fsb == latency->fsb_freq && mem == latency->mem_freq) 269 return latency; 270 } 271 272 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n"); 273 274 return NULL; 275 } 276 277 static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable) 278 { 279 u32 val; 280 281 mutex_lock(&dev_priv->rps.hw_lock); 282 283 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); 284 if (enable) 285 val &= ~FORCE_DDR_HIGH_FREQ; 286 else 287 val |= FORCE_DDR_HIGH_FREQ; 288 val &= ~FORCE_DDR_LOW_FREQ; 289 val |= FORCE_DDR_FREQ_REQ_ACK; 290 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val); 291 292 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) & 293 FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) 294 DRM_ERROR("timed out waiting for Punit DDR DVFS request\n"); 295 296 mutex_unlock(&dev_priv->rps.hw_lock); 297 } 298 299 static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) 300 { 301 u32 val; 302 303 mutex_lock(&dev_priv->rps.hw_lock); 304 305 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); 306 if (enable) 307 val |= DSP_MAXFIFO_PM5_ENABLE; 308 else 309 val &= ~DSP_MAXFIFO_PM5_ENABLE; 310 vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); 311 312 mutex_unlock(&dev_priv->rps.hw_lock); 313 } 314 315 #define FW_WM(value, plane) \ 316 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK) 317 318 void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable) 319 { 320 struct drm_device *dev = dev_priv->dev; 321 u32 val; 322 323 if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { 324 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0); 325 POSTING_READ(FW_BLC_SELF_VLV); 326 dev_priv->wm.vlv.cxsr = enable; 327 } else if (IS_G4X(dev) || IS_CRESTLINE(dev)) { 328 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0); 329 POSTING_READ(FW_BLC_SELF); 330 } else if (IS_PINEVIEW(dev)) { 331 val = I915_READ(DSPFW3) & ~PINEVIEW_SELF_REFRESH_EN; 332 val |= enable ? PINEVIEW_SELF_REFRESH_EN : 0; 333 I915_WRITE(DSPFW3, val); 334 POSTING_READ(DSPFW3); 335 } else if (IS_I945G(dev) || IS_I945GM(dev)) { 336 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) : 337 _MASKED_BIT_DISABLE(FW_BLC_SELF_EN); 338 I915_WRITE(FW_BLC_SELF, val); 339 POSTING_READ(FW_BLC_SELF); 340 } else if (IS_I915GM(dev)) { 341 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) : 342 _MASKED_BIT_DISABLE(INSTPM_SELF_EN); 343 I915_WRITE(INSTPM, val); 344 POSTING_READ(INSTPM); 345 } else { 346 return; 347 } 348 349 DRM_DEBUG_KMS("memory self-refresh is %s\n", 350 enable ? "enabled" : "disabled"); 351 } 352 353 354 /* 355 * Latency for FIFO fetches is dependent on several factors: 356 * - memory configuration (speed, channels) 357 * - chipset 358 * - current MCH state 359 * It can be fairly high in some situations, so here we assume a fairly 360 * pessimal value. It's a tradeoff between extra memory fetches (if we 361 * set this value too high, the FIFO will fetch frequently to stay full) 362 * and power consumption (set it too low to save power and we might see 363 * FIFO underruns and display "flicker"). 364 * 365 * A value of 5us seems to be a good balance; safe for very low end 366 * platforms but not overly aggressive on lower latency configs. 367 */ 368 static const int pessimal_latency_ns = 5000; 369 370 #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \ 371 ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8)) 372 373 static int vlv_get_fifo_size(struct drm_device *dev, 374 enum i915_pipe pipe, int plane) 375 { 376 struct drm_i915_private *dev_priv = dev->dev_private; 377 int sprite0_start, sprite1_start, size; 378 379 switch (pipe) { 380 uint32_t dsparb, dsparb2, dsparb3; 381 case PIPE_A: 382 dsparb = I915_READ(DSPARB); 383 dsparb2 = I915_READ(DSPARB2); 384 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0); 385 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4); 386 break; 387 case PIPE_B: 388 dsparb = I915_READ(DSPARB); 389 dsparb2 = I915_READ(DSPARB2); 390 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8); 391 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12); 392 break; 393 case PIPE_C: 394 dsparb2 = I915_READ(DSPARB2); 395 dsparb3 = I915_READ(DSPARB3); 396 sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16); 397 sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20); 398 break; 399 default: 400 return 0; 401 } 402 403 switch (plane) { 404 case 0: 405 size = sprite0_start; 406 break; 407 case 1: 408 size = sprite1_start - sprite0_start; 409 break; 410 case 2: 411 size = 512 - 1 - sprite1_start; 412 break; 413 default: 414 return 0; 415 } 416 417 DRM_DEBUG_KMS("Pipe %c %s %c FIFO size: %d\n", 418 pipe_name(pipe), plane == 0 ? "primary" : "sprite", 419 plane == 0 ? plane_name(pipe) : sprite_name(pipe, plane - 1), 420 size); 421 422 return size; 423 } 424 425 static int i9xx_get_fifo_size(struct drm_device *dev, int plane) 426 { 427 struct drm_i915_private *dev_priv = dev->dev_private; 428 uint32_t dsparb = I915_READ(DSPARB); 429 int size; 430 431 size = dsparb & 0x7f; 432 if (plane) 433 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size; 434 435 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, 436 plane ? "B" : "A", size); 437 438 return size; 439 } 440 441 static int i830_get_fifo_size(struct drm_device *dev, int plane) 442 { 443 struct drm_i915_private *dev_priv = dev->dev_private; 444 uint32_t dsparb = I915_READ(DSPARB); 445 int size; 446 447 size = dsparb & 0x1ff; 448 if (plane) 449 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size; 450 size >>= 1; /* Convert to cachelines */ 451 452 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, 453 plane ? "B" : "A", size); 454 455 return size; 456 } 457 458 static int i845_get_fifo_size(struct drm_device *dev, int plane) 459 { 460 struct drm_i915_private *dev_priv = dev->dev_private; 461 uint32_t dsparb = I915_READ(DSPARB); 462 int size; 463 464 size = dsparb & 0x7f; 465 size >>= 2; /* Convert to cachelines */ 466 467 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, 468 plane ? "B" : "A", 469 size); 470 471 return size; 472 } 473 474 /* Pineview has different values for various configs */ 475 static const struct intel_watermark_params pineview_display_wm = { 476 .fifo_size = PINEVIEW_DISPLAY_FIFO, 477 .max_wm = PINEVIEW_MAX_WM, 478 .default_wm = PINEVIEW_DFT_WM, 479 .guard_size = PINEVIEW_GUARD_WM, 480 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 481 }; 482 static const struct intel_watermark_params pineview_display_hplloff_wm = { 483 .fifo_size = PINEVIEW_DISPLAY_FIFO, 484 .max_wm = PINEVIEW_MAX_WM, 485 .default_wm = PINEVIEW_DFT_HPLLOFF_WM, 486 .guard_size = PINEVIEW_GUARD_WM, 487 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 488 }; 489 static const struct intel_watermark_params pineview_cursor_wm = { 490 .fifo_size = PINEVIEW_CURSOR_FIFO, 491 .max_wm = PINEVIEW_CURSOR_MAX_WM, 492 .default_wm = PINEVIEW_CURSOR_DFT_WM, 493 .guard_size = PINEVIEW_CURSOR_GUARD_WM, 494 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 495 }; 496 static const struct intel_watermark_params pineview_cursor_hplloff_wm = { 497 .fifo_size = PINEVIEW_CURSOR_FIFO, 498 .max_wm = PINEVIEW_CURSOR_MAX_WM, 499 .default_wm = PINEVIEW_CURSOR_DFT_WM, 500 .guard_size = PINEVIEW_CURSOR_GUARD_WM, 501 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 502 }; 503 static const struct intel_watermark_params g4x_wm_info = { 504 .fifo_size = G4X_FIFO_SIZE, 505 .max_wm = G4X_MAX_WM, 506 .default_wm = G4X_MAX_WM, 507 .guard_size = 2, 508 .cacheline_size = G4X_FIFO_LINE_SIZE, 509 }; 510 static const struct intel_watermark_params g4x_cursor_wm_info = { 511 .fifo_size = I965_CURSOR_FIFO, 512 .max_wm = I965_CURSOR_MAX_WM, 513 .default_wm = I965_CURSOR_DFT_WM, 514 .guard_size = 2, 515 .cacheline_size = G4X_FIFO_LINE_SIZE, 516 }; 517 static const struct intel_watermark_params i965_cursor_wm_info = { 518 .fifo_size = I965_CURSOR_FIFO, 519 .max_wm = I965_CURSOR_MAX_WM, 520 .default_wm = I965_CURSOR_DFT_WM, 521 .guard_size = 2, 522 .cacheline_size = I915_FIFO_LINE_SIZE, 523 }; 524 static const struct intel_watermark_params i945_wm_info = { 525 .fifo_size = I945_FIFO_SIZE, 526 .max_wm = I915_MAX_WM, 527 .default_wm = 1, 528 .guard_size = 2, 529 .cacheline_size = I915_FIFO_LINE_SIZE, 530 }; 531 static const struct intel_watermark_params i915_wm_info = { 532 .fifo_size = I915_FIFO_SIZE, 533 .max_wm = I915_MAX_WM, 534 .default_wm = 1, 535 .guard_size = 2, 536 .cacheline_size = I915_FIFO_LINE_SIZE, 537 }; 538 static const struct intel_watermark_params i830_a_wm_info = { 539 .fifo_size = I855GM_FIFO_SIZE, 540 .max_wm = I915_MAX_WM, 541 .default_wm = 1, 542 .guard_size = 2, 543 .cacheline_size = I830_FIFO_LINE_SIZE, 544 }; 545 static const struct intel_watermark_params i830_bc_wm_info = { 546 .fifo_size = I855GM_FIFO_SIZE, 547 .max_wm = I915_MAX_WM/2, 548 .default_wm = 1, 549 .guard_size = 2, 550 .cacheline_size = I830_FIFO_LINE_SIZE, 551 }; 552 static const struct intel_watermark_params i845_wm_info = { 553 .fifo_size = I830_FIFO_SIZE, 554 .max_wm = I915_MAX_WM, 555 .default_wm = 1, 556 .guard_size = 2, 557 .cacheline_size = I830_FIFO_LINE_SIZE, 558 }; 559 560 /** 561 * intel_calculate_wm - calculate watermark level 562 * @clock_in_khz: pixel clock 563 * @wm: chip FIFO params 564 * @cpp: bytes per pixel 565 * @latency_ns: memory latency for the platform 566 * 567 * Calculate the watermark level (the level at which the display plane will 568 * start fetching from memory again). Each chip has a different display 569 * FIFO size and allocation, so the caller needs to figure that out and pass 570 * in the correct intel_watermark_params structure. 571 * 572 * As the pixel clock runs, the FIFO will be drained at a rate that depends 573 * on the pixel size. When it reaches the watermark level, it'll start 574 * fetching FIFO line sized based chunks from memory until the FIFO fills 575 * past the watermark point. If the FIFO drains completely, a FIFO underrun 576 * will occur, and a display engine hang could result. 577 */ 578 static unsigned long intel_calculate_wm(unsigned long clock_in_khz, 579 const struct intel_watermark_params *wm, 580 int fifo_size, int cpp, 581 unsigned long latency_ns) 582 { 583 long entries_required, wm_size; 584 585 /* 586 * Note: we need to make sure we don't overflow for various clock & 587 * latency values. 588 * clocks go from a few thousand to several hundred thousand. 589 * latency is usually a few thousand 590 */ 591 entries_required = ((clock_in_khz / 1000) * cpp * latency_ns) / 592 1000; 593 entries_required = DIV_ROUND_UP(entries_required, wm->cacheline_size); 594 595 DRM_DEBUG_KMS("FIFO entries required for mode: %ld\n", entries_required); 596 597 wm_size = fifo_size - (entries_required + wm->guard_size); 598 599 DRM_DEBUG_KMS("FIFO watermark level: %ld\n", wm_size); 600 601 /* Don't promote wm_size to unsigned... */ 602 if (wm_size > (long)wm->max_wm) 603 wm_size = wm->max_wm; 604 if (wm_size <= 0) 605 wm_size = wm->default_wm; 606 607 /* 608 * Bspec seems to indicate that the value shouldn't be lower than 609 * 'burst size + 1'. Certainly 830 is quite unhappy with low values. 610 * Lets go for 8 which is the burst size since certain platforms 611 * already use a hardcoded 8 (which is what the spec says should be 612 * done). 613 */ 614 if (wm_size <= 8) 615 wm_size = 8; 616 617 return wm_size; 618 } 619 620 static struct drm_crtc *single_enabled_crtc(struct drm_device *dev) 621 { 622 struct drm_crtc *crtc, *enabled = NULL; 623 624 for_each_crtc(dev, crtc) { 625 if (intel_crtc_active(crtc)) { 626 if (enabled) 627 return NULL; 628 enabled = crtc; 629 } 630 } 631 632 return enabled; 633 } 634 635 static void pineview_update_wm(struct drm_crtc *unused_crtc) 636 { 637 struct drm_device *dev = unused_crtc->dev; 638 struct drm_i915_private *dev_priv = dev->dev_private; 639 struct drm_crtc *crtc; 640 const struct cxsr_latency *latency; 641 u32 reg; 642 unsigned long wm; 643 644 latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev), dev_priv->is_ddr3, 645 dev_priv->fsb_freq, dev_priv->mem_freq); 646 if (!latency) { 647 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n"); 648 intel_set_memory_cxsr(dev_priv, false); 649 return; 650 } 651 652 crtc = single_enabled_crtc(dev); 653 if (crtc) { 654 const struct drm_display_mode *adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 655 int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); 656 int clock = adjusted_mode->crtc_clock; 657 658 /* Display SR */ 659 wm = intel_calculate_wm(clock, &pineview_display_wm, 660 pineview_display_wm.fifo_size, 661 cpp, latency->display_sr); 662 reg = I915_READ(DSPFW1); 663 reg &= ~DSPFW_SR_MASK; 664 reg |= FW_WM(wm, SR); 665 I915_WRITE(DSPFW1, reg); 666 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg); 667 668 /* cursor SR */ 669 wm = intel_calculate_wm(clock, &pineview_cursor_wm, 670 pineview_display_wm.fifo_size, 671 cpp, latency->cursor_sr); 672 reg = I915_READ(DSPFW3); 673 reg &= ~DSPFW_CURSOR_SR_MASK; 674 reg |= FW_WM(wm, CURSOR_SR); 675 I915_WRITE(DSPFW3, reg); 676 677 /* Display HPLL off SR */ 678 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm, 679 pineview_display_hplloff_wm.fifo_size, 680 cpp, latency->display_hpll_disable); 681 reg = I915_READ(DSPFW3); 682 reg &= ~DSPFW_HPLL_SR_MASK; 683 reg |= FW_WM(wm, HPLL_SR); 684 I915_WRITE(DSPFW3, reg); 685 686 /* cursor HPLL off SR */ 687 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm, 688 pineview_display_hplloff_wm.fifo_size, 689 cpp, latency->cursor_hpll_disable); 690 reg = I915_READ(DSPFW3); 691 reg &= ~DSPFW_HPLL_CURSOR_MASK; 692 reg |= FW_WM(wm, HPLL_CURSOR); 693 I915_WRITE(DSPFW3, reg); 694 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg); 695 696 intel_set_memory_cxsr(dev_priv, true); 697 } else { 698 intel_set_memory_cxsr(dev_priv, false); 699 } 700 } 701 702 static bool g4x_compute_wm0(struct drm_device *dev, 703 int plane, 704 const struct intel_watermark_params *display, 705 int display_latency_ns, 706 const struct intel_watermark_params *cursor, 707 int cursor_latency_ns, 708 int *plane_wm, 709 int *cursor_wm) 710 { 711 struct drm_crtc *crtc; 712 const struct drm_display_mode *adjusted_mode; 713 int htotal, hdisplay, clock, cpp; 714 int line_time_us, line_count; 715 int entries, tlb_miss; 716 717 crtc = intel_get_crtc_for_plane(dev, plane); 718 if (!intel_crtc_active(crtc)) { 719 *cursor_wm = cursor->guard_size; 720 *plane_wm = display->guard_size; 721 return false; 722 } 723 724 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 725 clock = adjusted_mode->crtc_clock; 726 htotal = adjusted_mode->crtc_htotal; 727 hdisplay = to_intel_crtc(crtc)->config->pipe_src_w; 728 cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); 729 730 /* Use the small buffer method to calculate plane watermark */ 731 entries = ((clock * cpp / 1000) * display_latency_ns) / 1000; 732 tlb_miss = display->fifo_size*display->cacheline_size - hdisplay * 8; 733 if (tlb_miss > 0) 734 entries += tlb_miss; 735 entries = DIV_ROUND_UP(entries, display->cacheline_size); 736 *plane_wm = entries + display->guard_size; 737 if (*plane_wm > (int)display->max_wm) 738 *plane_wm = display->max_wm; 739 740 /* Use the large buffer method to calculate cursor watermark */ 741 line_time_us = max(htotal * 1000 / clock, 1); 742 line_count = (cursor_latency_ns / line_time_us + 1000) / 1000; 743 entries = line_count * crtc->cursor->state->crtc_w * cpp; 744 tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8; 745 if (tlb_miss > 0) 746 entries += tlb_miss; 747 entries = DIV_ROUND_UP(entries, cursor->cacheline_size); 748 *cursor_wm = entries + cursor->guard_size; 749 if (*cursor_wm > (int)cursor->max_wm) 750 *cursor_wm = (int)cursor->max_wm; 751 752 return true; 753 } 754 755 /* 756 * Check the wm result. 757 * 758 * If any calculated watermark values is larger than the maximum value that 759 * can be programmed into the associated watermark register, that watermark 760 * must be disabled. 761 */ 762 static bool g4x_check_srwm(struct drm_device *dev, 763 int display_wm, int cursor_wm, 764 const struct intel_watermark_params *display, 765 const struct intel_watermark_params *cursor) 766 { 767 DRM_DEBUG_KMS("SR watermark: display plane %d, cursor %d\n", 768 display_wm, cursor_wm); 769 770 if (display_wm > display->max_wm) { 771 DRM_DEBUG_KMS("display watermark is too large(%d/%ld), disabling\n", 772 display_wm, display->max_wm); 773 return false; 774 } 775 776 if (cursor_wm > cursor->max_wm) { 777 DRM_DEBUG_KMS("cursor watermark is too large(%d/%ld), disabling\n", 778 cursor_wm, cursor->max_wm); 779 return false; 780 } 781 782 if (!(display_wm || cursor_wm)) { 783 DRM_DEBUG_KMS("SR latency is 0, disabling\n"); 784 return false; 785 } 786 787 return true; 788 } 789 790 static bool g4x_compute_srwm(struct drm_device *dev, 791 int plane, 792 int latency_ns, 793 const struct intel_watermark_params *display, 794 const struct intel_watermark_params *cursor, 795 int *display_wm, int *cursor_wm) 796 { 797 struct drm_crtc *crtc; 798 const struct drm_display_mode *adjusted_mode; 799 int hdisplay, htotal, cpp, clock; 800 unsigned long line_time_us; 801 int line_count, line_size; 802 int small, large; 803 int entries; 804 805 if (!latency_ns) { 806 *display_wm = *cursor_wm = 0; 807 return false; 808 } 809 810 crtc = intel_get_crtc_for_plane(dev, plane); 811 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 812 clock = adjusted_mode->crtc_clock; 813 htotal = adjusted_mode->crtc_htotal; 814 hdisplay = to_intel_crtc(crtc)->config->pipe_src_w; 815 cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); 816 817 line_time_us = max(htotal * 1000 / clock, 1); 818 line_count = (latency_ns / line_time_us + 1000) / 1000; 819 line_size = hdisplay * cpp; 820 821 /* Use the minimum of the small and large buffer method for primary */ 822 small = ((clock * cpp / 1000) * latency_ns) / 1000; 823 large = line_count * line_size; 824 825 entries = DIV_ROUND_UP(min(small, large), display->cacheline_size); 826 *display_wm = entries + display->guard_size; 827 828 /* calculate the self-refresh watermark for display cursor */ 829 entries = line_count * cpp * crtc->cursor->state->crtc_w; 830 entries = DIV_ROUND_UP(entries, cursor->cacheline_size); 831 *cursor_wm = entries + cursor->guard_size; 832 833 return g4x_check_srwm(dev, 834 *display_wm, *cursor_wm, 835 display, cursor); 836 } 837 838 #define FW_WM_VLV(value, plane) \ 839 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV) 840 841 static void vlv_write_wm_values(struct intel_crtc *crtc, 842 const struct vlv_wm_values *wm) 843 { 844 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); 845 enum i915_pipe pipe = crtc->pipe; 846 847 I915_WRITE(VLV_DDL(pipe), 848 (wm->ddl[pipe].cursor << DDL_CURSOR_SHIFT) | 849 (wm->ddl[pipe].sprite[1] << DDL_SPRITE_SHIFT(1)) | 850 (wm->ddl[pipe].sprite[0] << DDL_SPRITE_SHIFT(0)) | 851 (wm->ddl[pipe].primary << DDL_PLANE_SHIFT)); 852 853 I915_WRITE(DSPFW1, 854 FW_WM(wm->sr.plane, SR) | 855 FW_WM(wm->pipe[PIPE_B].cursor, CURSORB) | 856 FW_WM_VLV(wm->pipe[PIPE_B].primary, PLANEB) | 857 FW_WM_VLV(wm->pipe[PIPE_A].primary, PLANEA)); 858 I915_WRITE(DSPFW2, 859 FW_WM_VLV(wm->pipe[PIPE_A].sprite[1], SPRITEB) | 860 FW_WM(wm->pipe[PIPE_A].cursor, CURSORA) | 861 FW_WM_VLV(wm->pipe[PIPE_A].sprite[0], SPRITEA)); 862 I915_WRITE(DSPFW3, 863 FW_WM(wm->sr.cursor, CURSOR_SR)); 864 865 if (IS_CHERRYVIEW(dev_priv)) { 866 I915_WRITE(DSPFW7_CHV, 867 FW_WM_VLV(wm->pipe[PIPE_B].sprite[1], SPRITED) | 868 FW_WM_VLV(wm->pipe[PIPE_B].sprite[0], SPRITEC)); 869 I915_WRITE(DSPFW8_CHV, 870 FW_WM_VLV(wm->pipe[PIPE_C].sprite[1], SPRITEF) | 871 FW_WM_VLV(wm->pipe[PIPE_C].sprite[0], SPRITEE)); 872 I915_WRITE(DSPFW9_CHV, 873 FW_WM_VLV(wm->pipe[PIPE_C].primary, PLANEC) | 874 FW_WM(wm->pipe[PIPE_C].cursor, CURSORC)); 875 I915_WRITE(DSPHOWM, 876 FW_WM(wm->sr.plane >> 9, SR_HI) | 877 FW_WM(wm->pipe[PIPE_C].sprite[1] >> 8, SPRITEF_HI) | 878 FW_WM(wm->pipe[PIPE_C].sprite[0] >> 8, SPRITEE_HI) | 879 FW_WM(wm->pipe[PIPE_C].primary >> 8, PLANEC_HI) | 880 FW_WM(wm->pipe[PIPE_B].sprite[1] >> 8, SPRITED_HI) | 881 FW_WM(wm->pipe[PIPE_B].sprite[0] >> 8, SPRITEC_HI) | 882 FW_WM(wm->pipe[PIPE_B].primary >> 8, PLANEB_HI) | 883 FW_WM(wm->pipe[PIPE_A].sprite[1] >> 8, SPRITEB_HI) | 884 FW_WM(wm->pipe[PIPE_A].sprite[0] >> 8, SPRITEA_HI) | 885 FW_WM(wm->pipe[PIPE_A].primary >> 8, PLANEA_HI)); 886 } else { 887 I915_WRITE(DSPFW7, 888 FW_WM_VLV(wm->pipe[PIPE_B].sprite[1], SPRITED) | 889 FW_WM_VLV(wm->pipe[PIPE_B].sprite[0], SPRITEC)); 890 I915_WRITE(DSPHOWM, 891 FW_WM(wm->sr.plane >> 9, SR_HI) | 892 FW_WM(wm->pipe[PIPE_B].sprite[1] >> 8, SPRITED_HI) | 893 FW_WM(wm->pipe[PIPE_B].sprite[0] >> 8, SPRITEC_HI) | 894 FW_WM(wm->pipe[PIPE_B].primary >> 8, PLANEB_HI) | 895 FW_WM(wm->pipe[PIPE_A].sprite[1] >> 8, SPRITEB_HI) | 896 FW_WM(wm->pipe[PIPE_A].sprite[0] >> 8, SPRITEA_HI) | 897 FW_WM(wm->pipe[PIPE_A].primary >> 8, PLANEA_HI)); 898 } 899 900 /* zero (unused) WM1 watermarks */ 901 I915_WRITE(DSPFW4, 0); 902 I915_WRITE(DSPFW5, 0); 903 I915_WRITE(DSPFW6, 0); 904 I915_WRITE(DSPHOWM1, 0); 905 906 POSTING_READ(DSPFW1); 907 } 908 909 #undef FW_WM_VLV 910 911 enum vlv_wm_level { 912 VLV_WM_LEVEL_PM2, 913 VLV_WM_LEVEL_PM5, 914 VLV_WM_LEVEL_DDR_DVFS, 915 }; 916 917 /* latency must be in 0.1us units. */ 918 static unsigned int vlv_wm_method2(unsigned int pixel_rate, 919 unsigned int pipe_htotal, 920 unsigned int horiz_pixels, 921 unsigned int cpp, 922 unsigned int latency) 923 { 924 unsigned int ret; 925 926 ret = (latency * pixel_rate) / (pipe_htotal * 10000); 927 ret = (ret + 1) * horiz_pixels * cpp; 928 ret = DIV_ROUND_UP(ret, 64); 929 930 return ret; 931 } 932 933 static void vlv_setup_wm_latency(struct drm_device *dev) 934 { 935 struct drm_i915_private *dev_priv = dev->dev_private; 936 937 /* all latencies in usec */ 938 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3; 939 940 dev_priv->wm.max_level = VLV_WM_LEVEL_PM2; 941 942 if (IS_CHERRYVIEW(dev_priv)) { 943 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12; 944 dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33; 945 946 dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS; 947 } 948 } 949 950 static uint16_t vlv_compute_wm_level(struct intel_plane *plane, 951 struct intel_crtc *crtc, 952 const struct intel_plane_state *state, 953 int level) 954 { 955 struct drm_i915_private *dev_priv = to_i915(plane->base.dev); 956 int clock, htotal, cpp, width, wm; 957 958 if (dev_priv->wm.pri_latency[level] == 0) 959 return USHRT_MAX; 960 961 if (!state->visible) 962 return 0; 963 964 cpp = drm_format_plane_cpp(state->base.fb->pixel_format, 0); 965 clock = crtc->config->base.adjusted_mode.crtc_clock; 966 htotal = crtc->config->base.adjusted_mode.crtc_htotal; 967 width = crtc->config->pipe_src_w; 968 if (WARN_ON(htotal == 0)) 969 htotal = 1; 970 971 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) { 972 /* 973 * FIXME the formula gives values that are 974 * too big for the cursor FIFO, and hence we 975 * would never be able to use cursors. For 976 * now just hardcode the watermark. 977 */ 978 wm = 63; 979 } else { 980 wm = vlv_wm_method2(clock, htotal, width, cpp, 981 dev_priv->wm.pri_latency[level] * 10); 982 } 983 984 return min_t(int, wm, USHRT_MAX); 985 } 986 987 static void vlv_compute_fifo(struct intel_crtc *crtc) 988 { 989 struct drm_device *dev = crtc->base.dev; 990 struct vlv_wm_state *wm_state = &crtc->wm_state; 991 struct intel_plane *plane; 992 unsigned int total_rate = 0; 993 const int fifo_size = 512 - 1; 994 int fifo_extra, fifo_left = fifo_size; 995 996 for_each_intel_plane_on_crtc(dev, crtc, plane) { 997 struct intel_plane_state *state = 998 to_intel_plane_state(plane->base.state); 999 1000 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) 1001 continue; 1002 1003 if (state->visible) { 1004 wm_state->num_active_planes++; 1005 total_rate += drm_format_plane_cpp(state->base.fb->pixel_format, 0); 1006 } 1007 } 1008 1009 for_each_intel_plane_on_crtc(dev, crtc, plane) { 1010 struct intel_plane_state *state = 1011 to_intel_plane_state(plane->base.state); 1012 unsigned int rate; 1013 1014 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) { 1015 plane->wm.fifo_size = 63; 1016 continue; 1017 } 1018 1019 if (!state->visible) { 1020 plane->wm.fifo_size = 0; 1021 continue; 1022 } 1023 1024 rate = drm_format_plane_cpp(state->base.fb->pixel_format, 0); 1025 plane->wm.fifo_size = fifo_size * rate / total_rate; 1026 fifo_left -= plane->wm.fifo_size; 1027 } 1028 1029 fifo_extra = DIV_ROUND_UP(fifo_left, wm_state->num_active_planes ?: 1); 1030 1031 /* spread the remainder evenly */ 1032 for_each_intel_plane_on_crtc(dev, crtc, plane) { 1033 int plane_extra; 1034 1035 if (fifo_left == 0) 1036 break; 1037 1038 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) 1039 continue; 1040 1041 /* give it all to the first plane if none are active */ 1042 if (plane->wm.fifo_size == 0 && 1043 wm_state->num_active_planes) 1044 continue; 1045 1046 plane_extra = min(fifo_extra, fifo_left); 1047 plane->wm.fifo_size += plane_extra; 1048 fifo_left -= plane_extra; 1049 } 1050 1051 WARN_ON(fifo_left != 0); 1052 } 1053 1054 static void vlv_invert_wms(struct intel_crtc *crtc) 1055 { 1056 struct vlv_wm_state *wm_state = &crtc->wm_state; 1057 int level; 1058 1059 for (level = 0; level < wm_state->num_levels; level++) { 1060 struct drm_device *dev = crtc->base.dev; 1061 const int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1; 1062 struct intel_plane *plane; 1063 1064 wm_state->sr[level].plane = sr_fifo_size - wm_state->sr[level].plane; 1065 wm_state->sr[level].cursor = 63 - wm_state->sr[level].cursor; 1066 1067 for_each_intel_plane_on_crtc(dev, crtc, plane) { 1068 switch (plane->base.type) { 1069 int sprite; 1070 case DRM_PLANE_TYPE_CURSOR: 1071 wm_state->wm[level].cursor = plane->wm.fifo_size - 1072 wm_state->wm[level].cursor; 1073 break; 1074 case DRM_PLANE_TYPE_PRIMARY: 1075 wm_state->wm[level].primary = plane->wm.fifo_size - 1076 wm_state->wm[level].primary; 1077 break; 1078 case DRM_PLANE_TYPE_OVERLAY: 1079 sprite = plane->plane; 1080 wm_state->wm[level].sprite[sprite] = plane->wm.fifo_size - 1081 wm_state->wm[level].sprite[sprite]; 1082 break; 1083 } 1084 } 1085 } 1086 } 1087 1088 static void vlv_compute_wm(struct intel_crtc *crtc) 1089 { 1090 struct drm_device *dev = crtc->base.dev; 1091 struct vlv_wm_state *wm_state = &crtc->wm_state; 1092 struct intel_plane *plane; 1093 int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1; 1094 int level; 1095 1096 memset(wm_state, 0, sizeof(*wm_state)); 1097 1098 wm_state->cxsr = crtc->pipe != PIPE_C && crtc->wm.cxsr_allowed; 1099 wm_state->num_levels = to_i915(dev)->wm.max_level + 1; 1100 1101 wm_state->num_active_planes = 0; 1102 1103 vlv_compute_fifo(crtc); 1104 1105 if (wm_state->num_active_planes != 1) 1106 wm_state->cxsr = false; 1107 1108 if (wm_state->cxsr) { 1109 for (level = 0; level < wm_state->num_levels; level++) { 1110 wm_state->sr[level].plane = sr_fifo_size; 1111 wm_state->sr[level].cursor = 63; 1112 } 1113 } 1114 1115 for_each_intel_plane_on_crtc(dev, crtc, plane) { 1116 struct intel_plane_state *state = 1117 to_intel_plane_state(plane->base.state); 1118 1119 if (!state->visible) 1120 continue; 1121 1122 /* normal watermarks */ 1123 for (level = 0; level < wm_state->num_levels; level++) { 1124 int wm = vlv_compute_wm_level(plane, crtc, state, level); 1125 int max_wm = plane->base.type == DRM_PLANE_TYPE_CURSOR ? 63 : 511; 1126 1127 /* hack */ 1128 if (WARN_ON(level == 0 && wm > max_wm)) 1129 wm = max_wm; 1130 1131 if (wm > plane->wm.fifo_size) 1132 break; 1133 1134 switch (plane->base.type) { 1135 int sprite; 1136 case DRM_PLANE_TYPE_CURSOR: 1137 wm_state->wm[level].cursor = wm; 1138 break; 1139 case DRM_PLANE_TYPE_PRIMARY: 1140 wm_state->wm[level].primary = wm; 1141 break; 1142 case DRM_PLANE_TYPE_OVERLAY: 1143 sprite = plane->plane; 1144 wm_state->wm[level].sprite[sprite] = wm; 1145 break; 1146 } 1147 } 1148 1149 wm_state->num_levels = level; 1150 1151 if (!wm_state->cxsr) 1152 continue; 1153 1154 /* maxfifo watermarks */ 1155 switch (plane->base.type) { 1156 int sprite, level; 1157 case DRM_PLANE_TYPE_CURSOR: 1158 for (level = 0; level < wm_state->num_levels; level++) 1159 wm_state->sr[level].cursor = 1160 wm_state->wm[level].cursor; 1161 break; 1162 case DRM_PLANE_TYPE_PRIMARY: 1163 for (level = 0; level < wm_state->num_levels; level++) 1164 wm_state->sr[level].plane = 1165 min(wm_state->sr[level].plane, 1166 wm_state->wm[level].primary); 1167 break; 1168 case DRM_PLANE_TYPE_OVERLAY: 1169 sprite = plane->plane; 1170 for (level = 0; level < wm_state->num_levels; level++) 1171 wm_state->sr[level].plane = 1172 min(wm_state->sr[level].plane, 1173 wm_state->wm[level].sprite[sprite]); 1174 break; 1175 } 1176 } 1177 1178 /* clear any (partially) filled invalid levels */ 1179 for (level = wm_state->num_levels; level < to_i915(dev)->wm.max_level + 1; level++) { 1180 memset(&wm_state->wm[level], 0, sizeof(wm_state->wm[level])); 1181 memset(&wm_state->sr[level], 0, sizeof(wm_state->sr[level])); 1182 } 1183 1184 vlv_invert_wms(crtc); 1185 } 1186 1187 #define VLV_FIFO(plane, value) \ 1188 (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV) 1189 1190 static void vlv_pipe_set_fifo_size(struct intel_crtc *crtc) 1191 { 1192 struct drm_device *dev = crtc->base.dev; 1193 struct drm_i915_private *dev_priv = to_i915(dev); 1194 struct intel_plane *plane; 1195 int sprite0_start = 0, sprite1_start = 0, fifo_size = 0; 1196 1197 for_each_intel_plane_on_crtc(dev, crtc, plane) { 1198 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) { 1199 WARN_ON(plane->wm.fifo_size != 63); 1200 continue; 1201 } 1202 1203 if (plane->base.type == DRM_PLANE_TYPE_PRIMARY) 1204 sprite0_start = plane->wm.fifo_size; 1205 else if (plane->plane == 0) 1206 sprite1_start = sprite0_start + plane->wm.fifo_size; 1207 else 1208 fifo_size = sprite1_start + plane->wm.fifo_size; 1209 } 1210 1211 WARN_ON(fifo_size != 512 - 1); 1212 1213 DRM_DEBUG_KMS("Pipe %c FIFO split %d / %d / %d\n", 1214 pipe_name(crtc->pipe), sprite0_start, 1215 sprite1_start, fifo_size); 1216 1217 switch (crtc->pipe) { 1218 uint32_t dsparb, dsparb2, dsparb3; 1219 case PIPE_A: 1220 dsparb = I915_READ(DSPARB); 1221 dsparb2 = I915_READ(DSPARB2); 1222 1223 dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) | 1224 VLV_FIFO(SPRITEB, 0xff)); 1225 dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) | 1226 VLV_FIFO(SPRITEB, sprite1_start)); 1227 1228 dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) | 1229 VLV_FIFO(SPRITEB_HI, 0x1)); 1230 dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) | 1231 VLV_FIFO(SPRITEB_HI, sprite1_start >> 8)); 1232 1233 I915_WRITE(DSPARB, dsparb); 1234 I915_WRITE(DSPARB2, dsparb2); 1235 break; 1236 case PIPE_B: 1237 dsparb = I915_READ(DSPARB); 1238 dsparb2 = I915_READ(DSPARB2); 1239 1240 dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) | 1241 VLV_FIFO(SPRITED, 0xff)); 1242 dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) | 1243 VLV_FIFO(SPRITED, sprite1_start)); 1244 1245 dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) | 1246 VLV_FIFO(SPRITED_HI, 0xff)); 1247 dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) | 1248 VLV_FIFO(SPRITED_HI, sprite1_start >> 8)); 1249 1250 I915_WRITE(DSPARB, dsparb); 1251 I915_WRITE(DSPARB2, dsparb2); 1252 break; 1253 case PIPE_C: 1254 dsparb3 = I915_READ(DSPARB3); 1255 dsparb2 = I915_READ(DSPARB2); 1256 1257 dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) | 1258 VLV_FIFO(SPRITEF, 0xff)); 1259 dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) | 1260 VLV_FIFO(SPRITEF, sprite1_start)); 1261 1262 dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) | 1263 VLV_FIFO(SPRITEF_HI, 0xff)); 1264 dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) | 1265 VLV_FIFO(SPRITEF_HI, sprite1_start >> 8)); 1266 1267 I915_WRITE(DSPARB3, dsparb3); 1268 I915_WRITE(DSPARB2, dsparb2); 1269 break; 1270 default: 1271 break; 1272 } 1273 } 1274 1275 #undef VLV_FIFO 1276 1277 static void vlv_merge_wm(struct drm_device *dev, 1278 struct vlv_wm_values *wm) 1279 { 1280 struct intel_crtc *crtc; 1281 int num_active_crtcs = 0; 1282 1283 wm->level = to_i915(dev)->wm.max_level; 1284 wm->cxsr = true; 1285 1286 for_each_intel_crtc(dev, crtc) { 1287 const struct vlv_wm_state *wm_state = &crtc->wm_state; 1288 1289 if (!crtc->active) 1290 continue; 1291 1292 if (!wm_state->cxsr) 1293 wm->cxsr = false; 1294 1295 num_active_crtcs++; 1296 wm->level = min_t(int, wm->level, wm_state->num_levels - 1); 1297 } 1298 1299 if (num_active_crtcs != 1) 1300 wm->cxsr = false; 1301 1302 if (num_active_crtcs > 1) 1303 wm->level = VLV_WM_LEVEL_PM2; 1304 1305 for_each_intel_crtc(dev, crtc) { 1306 struct vlv_wm_state *wm_state = &crtc->wm_state; 1307 enum i915_pipe pipe = crtc->pipe; 1308 1309 if (!crtc->active) 1310 continue; 1311 1312 wm->pipe[pipe] = wm_state->wm[wm->level]; 1313 if (wm->cxsr) 1314 wm->sr = wm_state->sr[wm->level]; 1315 1316 wm->ddl[pipe].primary = DDL_PRECISION_HIGH | 2; 1317 wm->ddl[pipe].sprite[0] = DDL_PRECISION_HIGH | 2; 1318 wm->ddl[pipe].sprite[1] = DDL_PRECISION_HIGH | 2; 1319 wm->ddl[pipe].cursor = DDL_PRECISION_HIGH | 2; 1320 } 1321 } 1322 1323 static void vlv_update_wm(struct drm_crtc *crtc) 1324 { 1325 struct drm_device *dev = crtc->dev; 1326 struct drm_i915_private *dev_priv = dev->dev_private; 1327 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 1328 enum i915_pipe pipe = intel_crtc->pipe; 1329 struct vlv_wm_values wm = {}; 1330 1331 vlv_compute_wm(intel_crtc); 1332 vlv_merge_wm(dev, &wm); 1333 1334 if (memcmp(&dev_priv->wm.vlv, &wm, sizeof(wm)) == 0) { 1335 /* FIXME should be part of crtc atomic commit */ 1336 vlv_pipe_set_fifo_size(intel_crtc); 1337 return; 1338 } 1339 1340 if (wm.level < VLV_WM_LEVEL_DDR_DVFS && 1341 dev_priv->wm.vlv.level >= VLV_WM_LEVEL_DDR_DVFS) 1342 chv_set_memory_dvfs(dev_priv, false); 1343 1344 if (wm.level < VLV_WM_LEVEL_PM5 && 1345 dev_priv->wm.vlv.level >= VLV_WM_LEVEL_PM5) 1346 chv_set_memory_pm5(dev_priv, false); 1347 1348 if (!wm.cxsr && dev_priv->wm.vlv.cxsr) 1349 intel_set_memory_cxsr(dev_priv, false); 1350 1351 /* FIXME should be part of crtc atomic commit */ 1352 vlv_pipe_set_fifo_size(intel_crtc); 1353 1354 vlv_write_wm_values(intel_crtc, &wm); 1355 1356 DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, " 1357 "sprite0=%d, sprite1=%d, SR: plane=%d, cursor=%d level=%d cxsr=%d\n", 1358 pipe_name(pipe), wm.pipe[pipe].primary, wm.pipe[pipe].cursor, 1359 wm.pipe[pipe].sprite[0], wm.pipe[pipe].sprite[1], 1360 wm.sr.plane, wm.sr.cursor, wm.level, wm.cxsr); 1361 1362 if (wm.cxsr && !dev_priv->wm.vlv.cxsr) 1363 intel_set_memory_cxsr(dev_priv, true); 1364 1365 if (wm.level >= VLV_WM_LEVEL_PM5 && 1366 dev_priv->wm.vlv.level < VLV_WM_LEVEL_PM5) 1367 chv_set_memory_pm5(dev_priv, true); 1368 1369 if (wm.level >= VLV_WM_LEVEL_DDR_DVFS && 1370 dev_priv->wm.vlv.level < VLV_WM_LEVEL_DDR_DVFS) 1371 chv_set_memory_dvfs(dev_priv, true); 1372 1373 dev_priv->wm.vlv = wm; 1374 } 1375 1376 #define single_plane_enabled(mask) is_power_of_2(mask) 1377 1378 static void g4x_update_wm(struct drm_crtc *crtc) 1379 { 1380 struct drm_device *dev = crtc->dev; 1381 static const int sr_latency_ns = 12000; 1382 struct drm_i915_private *dev_priv = dev->dev_private; 1383 int planea_wm, planeb_wm, cursora_wm, cursorb_wm; 1384 int plane_sr, cursor_sr; 1385 unsigned int enabled = 0; 1386 bool cxsr_enabled; 1387 1388 if (g4x_compute_wm0(dev, PIPE_A, 1389 &g4x_wm_info, pessimal_latency_ns, 1390 &g4x_cursor_wm_info, pessimal_latency_ns, 1391 &planea_wm, &cursora_wm)) 1392 enabled |= 1 << PIPE_A; 1393 1394 if (g4x_compute_wm0(dev, PIPE_B, 1395 &g4x_wm_info, pessimal_latency_ns, 1396 &g4x_cursor_wm_info, pessimal_latency_ns, 1397 &planeb_wm, &cursorb_wm)) 1398 enabled |= 1 << PIPE_B; 1399 1400 if (single_plane_enabled(enabled) && 1401 g4x_compute_srwm(dev, ffs(enabled) - 1, 1402 sr_latency_ns, 1403 &g4x_wm_info, 1404 &g4x_cursor_wm_info, 1405 &plane_sr, &cursor_sr)) { 1406 cxsr_enabled = true; 1407 } else { 1408 cxsr_enabled = false; 1409 intel_set_memory_cxsr(dev_priv, false); 1410 plane_sr = cursor_sr = 0; 1411 } 1412 1413 DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, " 1414 "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", 1415 planea_wm, cursora_wm, 1416 planeb_wm, cursorb_wm, 1417 plane_sr, cursor_sr); 1418 1419 I915_WRITE(DSPFW1, 1420 FW_WM(plane_sr, SR) | 1421 FW_WM(cursorb_wm, CURSORB) | 1422 FW_WM(planeb_wm, PLANEB) | 1423 FW_WM(planea_wm, PLANEA)); 1424 I915_WRITE(DSPFW2, 1425 (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) | 1426 FW_WM(cursora_wm, CURSORA)); 1427 /* HPLL off in SR has some issues on G4x... disable it */ 1428 I915_WRITE(DSPFW3, 1429 (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) | 1430 FW_WM(cursor_sr, CURSOR_SR)); 1431 1432 if (cxsr_enabled) 1433 intel_set_memory_cxsr(dev_priv, true); 1434 } 1435 1436 static void i965_update_wm(struct drm_crtc *unused_crtc) 1437 { 1438 struct drm_device *dev = unused_crtc->dev; 1439 struct drm_i915_private *dev_priv = dev->dev_private; 1440 struct drm_crtc *crtc; 1441 int srwm = 1; 1442 int cursor_sr = 16; 1443 bool cxsr_enabled; 1444 1445 /* Calc sr entries for one plane configs */ 1446 crtc = single_enabled_crtc(dev); 1447 if (crtc) { 1448 /* self-refresh has much higher latency */ 1449 static const int sr_latency_ns = 12000; 1450 const struct drm_display_mode *adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 1451 int clock = adjusted_mode->crtc_clock; 1452 int htotal = adjusted_mode->crtc_htotal; 1453 int hdisplay = to_intel_crtc(crtc)->config->pipe_src_w; 1454 int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); 1455 unsigned long line_time_us; 1456 int entries; 1457 1458 line_time_us = max(htotal * 1000 / clock, 1); 1459 1460 /* Use ns/us then divide to preserve precision */ 1461 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * 1462 cpp * hdisplay; 1463 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE); 1464 srwm = I965_FIFO_SIZE - entries; 1465 if (srwm < 0) 1466 srwm = 1; 1467 srwm &= 0x1ff; 1468 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n", 1469 entries, srwm); 1470 1471 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * 1472 cpp * crtc->cursor->state->crtc_w; 1473 entries = DIV_ROUND_UP(entries, 1474 i965_cursor_wm_info.cacheline_size); 1475 cursor_sr = i965_cursor_wm_info.fifo_size - 1476 (entries + i965_cursor_wm_info.guard_size); 1477 1478 if (cursor_sr > i965_cursor_wm_info.max_wm) 1479 cursor_sr = i965_cursor_wm_info.max_wm; 1480 1481 DRM_DEBUG_KMS("self-refresh watermark: display plane %d " 1482 "cursor %d\n", srwm, cursor_sr); 1483 1484 cxsr_enabled = true; 1485 } else { 1486 cxsr_enabled = false; 1487 /* Turn off self refresh if both pipes are enabled */ 1488 intel_set_memory_cxsr(dev_priv, false); 1489 } 1490 1491 DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n", 1492 srwm); 1493 1494 /* 965 has limitations... */ 1495 I915_WRITE(DSPFW1, FW_WM(srwm, SR) | 1496 FW_WM(8, CURSORB) | 1497 FW_WM(8, PLANEB) | 1498 FW_WM(8, PLANEA)); 1499 I915_WRITE(DSPFW2, FW_WM(8, CURSORA) | 1500 FW_WM(8, PLANEC_OLD)); 1501 /* update cursor SR watermark */ 1502 I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR)); 1503 1504 if (cxsr_enabled) 1505 intel_set_memory_cxsr(dev_priv, true); 1506 } 1507 1508 #undef FW_WM 1509 1510 static void i9xx_update_wm(struct drm_crtc *unused_crtc) 1511 { 1512 struct drm_device *dev = unused_crtc->dev; 1513 struct drm_i915_private *dev_priv = dev->dev_private; 1514 const struct intel_watermark_params *wm_info; 1515 uint32_t fwater_lo; 1516 uint32_t fwater_hi; 1517 int cwm, srwm = 1; 1518 int fifo_size; 1519 int planea_wm, planeb_wm; 1520 struct drm_crtc *crtc, *enabled = NULL; 1521 1522 if (IS_I945GM(dev)) 1523 wm_info = &i945_wm_info; 1524 else if (!IS_GEN2(dev)) 1525 wm_info = &i915_wm_info; 1526 else 1527 wm_info = &i830_a_wm_info; 1528 1529 fifo_size = dev_priv->display.get_fifo_size(dev, 0); 1530 crtc = intel_get_crtc_for_plane(dev, 0); 1531 if (intel_crtc_active(crtc)) { 1532 const struct drm_display_mode *adjusted_mode; 1533 int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); 1534 if (IS_GEN2(dev)) 1535 cpp = 4; 1536 1537 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 1538 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock, 1539 wm_info, fifo_size, cpp, 1540 pessimal_latency_ns); 1541 enabled = crtc; 1542 } else { 1543 planea_wm = fifo_size - wm_info->guard_size; 1544 if (planea_wm > (long)wm_info->max_wm) 1545 planea_wm = wm_info->max_wm; 1546 } 1547 1548 if (IS_GEN2(dev)) 1549 wm_info = &i830_bc_wm_info; 1550 1551 fifo_size = dev_priv->display.get_fifo_size(dev, 1); 1552 crtc = intel_get_crtc_for_plane(dev, 1); 1553 if (intel_crtc_active(crtc)) { 1554 const struct drm_display_mode *adjusted_mode; 1555 int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); 1556 if (IS_GEN2(dev)) 1557 cpp = 4; 1558 1559 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 1560 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock, 1561 wm_info, fifo_size, cpp, 1562 pessimal_latency_ns); 1563 if (enabled == NULL) 1564 enabled = crtc; 1565 else 1566 enabled = NULL; 1567 } else { 1568 planeb_wm = fifo_size - wm_info->guard_size; 1569 if (planeb_wm > (long)wm_info->max_wm) 1570 planeb_wm = wm_info->max_wm; 1571 } 1572 1573 DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm); 1574 1575 if (IS_I915GM(dev) && enabled) { 1576 struct drm_i915_gem_object *obj; 1577 1578 obj = intel_fb_obj(enabled->primary->state->fb); 1579 1580 /* self-refresh seems busted with untiled */ 1581 if (obj->tiling_mode == I915_TILING_NONE) 1582 enabled = NULL; 1583 } 1584 1585 /* 1586 * Overlay gets an aggressive default since video jitter is bad. 1587 */ 1588 cwm = 2; 1589 1590 /* Play safe and disable self-refresh before adjusting watermarks. */ 1591 intel_set_memory_cxsr(dev_priv, false); 1592 1593 /* Calc sr entries for one plane configs */ 1594 if (HAS_FW_BLC(dev) && enabled) { 1595 /* self-refresh has much higher latency */ 1596 static const int sr_latency_ns = 6000; 1597 const struct drm_display_mode *adjusted_mode = &to_intel_crtc(enabled)->config->base.adjusted_mode; 1598 int clock = adjusted_mode->crtc_clock; 1599 int htotal = adjusted_mode->crtc_htotal; 1600 int hdisplay = to_intel_crtc(enabled)->config->pipe_src_w; 1601 int cpp = drm_format_plane_cpp(enabled->primary->state->fb->pixel_format, 0); 1602 unsigned long line_time_us; 1603 int entries; 1604 1605 line_time_us = max(htotal * 1000 / clock, 1); 1606 1607 /* Use ns/us then divide to preserve precision */ 1608 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * 1609 cpp * hdisplay; 1610 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size); 1611 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries); 1612 srwm = wm_info->fifo_size - entries; 1613 if (srwm < 0) 1614 srwm = 1; 1615 1616 if (IS_I945G(dev) || IS_I945GM(dev)) 1617 I915_WRITE(FW_BLC_SELF, 1618 FW_BLC_SELF_FIFO_MASK | (srwm & 0xff)); 1619 else if (IS_I915GM(dev)) 1620 I915_WRITE(FW_BLC_SELF, srwm & 0x3f); 1621 } 1622 1623 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n", 1624 planea_wm, planeb_wm, cwm, srwm); 1625 1626 fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f); 1627 fwater_hi = (cwm & 0x1f); 1628 1629 /* Set request length to 8 cachelines per fetch */ 1630 fwater_lo = fwater_lo | (1 << 24) | (1 << 8); 1631 fwater_hi = fwater_hi | (1 << 8); 1632 1633 I915_WRITE(FW_BLC, fwater_lo); 1634 I915_WRITE(FW_BLC2, fwater_hi); 1635 1636 if (enabled) 1637 intel_set_memory_cxsr(dev_priv, true); 1638 } 1639 1640 static void i845_update_wm(struct drm_crtc *unused_crtc) 1641 { 1642 struct drm_device *dev = unused_crtc->dev; 1643 struct drm_i915_private *dev_priv = dev->dev_private; 1644 struct drm_crtc *crtc; 1645 const struct drm_display_mode *adjusted_mode; 1646 uint32_t fwater_lo; 1647 int planea_wm; 1648 1649 crtc = single_enabled_crtc(dev); 1650 if (crtc == NULL) 1651 return; 1652 1653 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; 1654 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock, 1655 &i845_wm_info, 1656 dev_priv->display.get_fifo_size(dev, 0), 1657 4, pessimal_latency_ns); 1658 fwater_lo = I915_READ(FW_BLC) & ~0xfff; 1659 fwater_lo |= (3<<8) | planea_wm; 1660 1661 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm); 1662 1663 I915_WRITE(FW_BLC, fwater_lo); 1664 } 1665 1666 uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) 1667 { 1668 uint32_t pixel_rate; 1669 1670 pixel_rate = pipe_config->base.adjusted_mode.crtc_clock; 1671 1672 /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to 1673 * adjust the pixel_rate here. */ 1674 1675 if (pipe_config->pch_pfit.enabled) { 1676 uint64_t pipe_w, pipe_h, pfit_w, pfit_h; 1677 uint32_t pfit_size = pipe_config->pch_pfit.size; 1678 1679 pipe_w = pipe_config->pipe_src_w; 1680 pipe_h = pipe_config->pipe_src_h; 1681 1682 pfit_w = (pfit_size >> 16) & 0xFFFF; 1683 pfit_h = pfit_size & 0xFFFF; 1684 if (pipe_w < pfit_w) 1685 pipe_w = pfit_w; 1686 if (pipe_h < pfit_h) 1687 pipe_h = pfit_h; 1688 1689 if (WARN_ON(!pfit_w || !pfit_h)) 1690 return pixel_rate; 1691 1692 pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h, 1693 pfit_w * pfit_h); 1694 } 1695 1696 return pixel_rate; 1697 } 1698 1699 /* latency must be in 0.1us units. */ 1700 static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t cpp, uint32_t latency) 1701 { 1702 uint64_t ret; 1703 1704 if (WARN(latency == 0, "Latency value missing\n")) 1705 return UINT_MAX; 1706 1707 ret = (uint64_t) pixel_rate * cpp * latency; 1708 ret = DIV_ROUND_UP_ULL(ret, 64 * 10000) + 2; 1709 1710 return ret; 1711 } 1712 1713 /* latency must be in 0.1us units. */ 1714 static uint32_t ilk_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal, 1715 uint32_t horiz_pixels, uint8_t cpp, 1716 uint32_t latency) 1717 { 1718 uint32_t ret; 1719 1720 if (WARN(latency == 0, "Latency value missing\n")) 1721 return UINT_MAX; 1722 if (WARN_ON(!pipe_htotal)) 1723 return UINT_MAX; 1724 1725 ret = (latency * pixel_rate) / (pipe_htotal * 10000); 1726 ret = (ret + 1) * horiz_pixels * cpp; 1727 ret = DIV_ROUND_UP(ret, 64) + 2; 1728 return ret; 1729 } 1730 1731 static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels, 1732 uint8_t cpp) 1733 { 1734 /* 1735 * Neither of these should be possible since this function shouldn't be 1736 * called if the CRTC is off or the plane is invisible. But let's be 1737 * extra paranoid to avoid a potential divide-by-zero if we screw up 1738 * elsewhere in the driver. 1739 */ 1740 if (WARN_ON(!cpp)) 1741 return 0; 1742 if (WARN_ON(!horiz_pixels)) 1743 return 0; 1744 1745 return DIV_ROUND_UP(pri_val * 64, horiz_pixels * cpp) + 2; 1746 } 1747 1748 struct ilk_wm_maximums { 1749 uint16_t pri; 1750 uint16_t spr; 1751 uint16_t cur; 1752 uint16_t fbc; 1753 }; 1754 1755 /* 1756 * For both WM_PIPE and WM_LP. 1757 * mem_value must be in 0.1us units. 1758 */ 1759 static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate, 1760 const struct intel_plane_state *pstate, 1761 uint32_t mem_value, 1762 bool is_lp) 1763 { 1764 int cpp = pstate->base.fb ? 1765 drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0; 1766 uint32_t method1, method2; 1767 1768 if (!cstate->base.active || !pstate->visible) 1769 return 0; 1770 1771 method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value); 1772 1773 if (!is_lp) 1774 return method1; 1775 1776 method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), 1777 cstate->base.adjusted_mode.crtc_htotal, 1778 drm_rect_width(&pstate->dst), 1779 cpp, mem_value); 1780 1781 return min(method1, method2); 1782 } 1783 1784 /* 1785 * For both WM_PIPE and WM_LP. 1786 * mem_value must be in 0.1us units. 1787 */ 1788 static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate, 1789 const struct intel_plane_state *pstate, 1790 uint32_t mem_value) 1791 { 1792 int cpp = pstate->base.fb ? 1793 drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0; 1794 uint32_t method1, method2; 1795 1796 if (!cstate->base.active || !pstate->visible) 1797 return 0; 1798 1799 method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value); 1800 method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), 1801 cstate->base.adjusted_mode.crtc_htotal, 1802 drm_rect_width(&pstate->dst), 1803 cpp, mem_value); 1804 return min(method1, method2); 1805 } 1806 1807 /* 1808 * For both WM_PIPE and WM_LP. 1809 * mem_value must be in 0.1us units. 1810 */ 1811 static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate, 1812 const struct intel_plane_state *pstate, 1813 uint32_t mem_value) 1814 { 1815 /* 1816 * We treat the cursor plane as always-on for the purposes of watermark 1817 * calculation. Until we have two-stage watermark programming merged, 1818 * this is necessary to avoid flickering. 1819 */ 1820 int cpp = 4; 1821 int width = pstate->visible ? pstate->base.crtc_w : 64; 1822 1823 if (!cstate->base.active) 1824 return 0; 1825 1826 return ilk_wm_method2(ilk_pipe_pixel_rate(cstate), 1827 cstate->base.adjusted_mode.crtc_htotal, 1828 width, cpp, mem_value); 1829 } 1830 1831 /* Only for WM_LP. */ 1832 static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate, 1833 const struct intel_plane_state *pstate, 1834 uint32_t pri_val) 1835 { 1836 int cpp = pstate->base.fb ? 1837 drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0; 1838 1839 if (!cstate->base.active || !pstate->visible) 1840 return 0; 1841 1842 return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->dst), cpp); 1843 } 1844 1845 static unsigned int ilk_display_fifo_size(const struct drm_device *dev) 1846 { 1847 if (INTEL_INFO(dev)->gen >= 8) 1848 return 3072; 1849 else if (INTEL_INFO(dev)->gen >= 7) 1850 return 768; 1851 else 1852 return 512; 1853 } 1854 1855 static unsigned int ilk_plane_wm_reg_max(const struct drm_device *dev, 1856 int level, bool is_sprite) 1857 { 1858 if (INTEL_INFO(dev)->gen >= 8) 1859 /* BDW primary/sprite plane watermarks */ 1860 return level == 0 ? 255 : 2047; 1861 else if (INTEL_INFO(dev)->gen >= 7) 1862 /* IVB/HSW primary/sprite plane watermarks */ 1863 return level == 0 ? 127 : 1023; 1864 else if (!is_sprite) 1865 /* ILK/SNB primary plane watermarks */ 1866 return level == 0 ? 127 : 511; 1867 else 1868 /* ILK/SNB sprite plane watermarks */ 1869 return level == 0 ? 63 : 255; 1870 } 1871 1872 static unsigned int ilk_cursor_wm_reg_max(const struct drm_device *dev, 1873 int level) 1874 { 1875 if (INTEL_INFO(dev)->gen >= 7) 1876 return level == 0 ? 63 : 255; 1877 else 1878 return level == 0 ? 31 : 63; 1879 } 1880 1881 static unsigned int ilk_fbc_wm_reg_max(const struct drm_device *dev) 1882 { 1883 if (INTEL_INFO(dev)->gen >= 8) 1884 return 31; 1885 else 1886 return 15; 1887 } 1888 1889 /* Calculate the maximum primary/sprite plane watermark */ 1890 static unsigned int ilk_plane_wm_max(const struct drm_device *dev, 1891 int level, 1892 const struct intel_wm_config *config, 1893 enum intel_ddb_partitioning ddb_partitioning, 1894 bool is_sprite) 1895 { 1896 unsigned int fifo_size = ilk_display_fifo_size(dev); 1897 1898 /* if sprites aren't enabled, sprites get nothing */ 1899 if (is_sprite && !config->sprites_enabled) 1900 return 0; 1901 1902 /* HSW allows LP1+ watermarks even with multiple pipes */ 1903 if (level == 0 || config->num_pipes_active > 1) { 1904 fifo_size /= INTEL_INFO(dev)->num_pipes; 1905 1906 /* 1907 * For some reason the non self refresh 1908 * FIFO size is only half of the self 1909 * refresh FIFO size on ILK/SNB. 1910 */ 1911 if (INTEL_INFO(dev)->gen <= 6) 1912 fifo_size /= 2; 1913 } 1914 1915 if (config->sprites_enabled) { 1916 /* level 0 is always calculated with 1:1 split */ 1917 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) { 1918 if (is_sprite) 1919 fifo_size *= 5; 1920 fifo_size /= 6; 1921 } else { 1922 fifo_size /= 2; 1923 } 1924 } 1925 1926 /* clamp to max that the registers can hold */ 1927 return min(fifo_size, ilk_plane_wm_reg_max(dev, level, is_sprite)); 1928 } 1929 1930 /* Calculate the maximum cursor plane watermark */ 1931 static unsigned int ilk_cursor_wm_max(const struct drm_device *dev, 1932 int level, 1933 const struct intel_wm_config *config) 1934 { 1935 /* HSW LP1+ watermarks w/ multiple pipes */ 1936 if (level > 0 && config->num_pipes_active > 1) 1937 return 64; 1938 1939 /* otherwise just report max that registers can hold */ 1940 return ilk_cursor_wm_reg_max(dev, level); 1941 } 1942 1943 static void ilk_compute_wm_maximums(const struct drm_device *dev, 1944 int level, 1945 const struct intel_wm_config *config, 1946 enum intel_ddb_partitioning ddb_partitioning, 1947 struct ilk_wm_maximums *max) 1948 { 1949 max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false); 1950 max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true); 1951 max->cur = ilk_cursor_wm_max(dev, level, config); 1952 max->fbc = ilk_fbc_wm_reg_max(dev); 1953 } 1954 1955 static void ilk_compute_wm_reg_maximums(struct drm_device *dev, 1956 int level, 1957 struct ilk_wm_maximums *max) 1958 { 1959 max->pri = ilk_plane_wm_reg_max(dev, level, false); 1960 max->spr = ilk_plane_wm_reg_max(dev, level, true); 1961 max->cur = ilk_cursor_wm_reg_max(dev, level); 1962 max->fbc = ilk_fbc_wm_reg_max(dev); 1963 } 1964 1965 static bool ilk_validate_wm_level(int level, 1966 const struct ilk_wm_maximums *max, 1967 struct intel_wm_level *result) 1968 { 1969 bool ret; 1970 1971 /* already determined to be invalid? */ 1972 if (!result->enable) 1973 return false; 1974 1975 result->enable = result->pri_val <= max->pri && 1976 result->spr_val <= max->spr && 1977 result->cur_val <= max->cur; 1978 1979 ret = result->enable; 1980 1981 /* 1982 * HACK until we can pre-compute everything, 1983 * and thus fail gracefully if LP0 watermarks 1984 * are exceeded... 1985 */ 1986 if (level == 0 && !result->enable) { 1987 if (result->pri_val > max->pri) 1988 DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n", 1989 level, result->pri_val, max->pri); 1990 if (result->spr_val > max->spr) 1991 DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n", 1992 level, result->spr_val, max->spr); 1993 if (result->cur_val > max->cur) 1994 DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n", 1995 level, result->cur_val, max->cur); 1996 1997 result->pri_val = min_t(uint32_t, result->pri_val, max->pri); 1998 result->spr_val = min_t(uint32_t, result->spr_val, max->spr); 1999 result->cur_val = min_t(uint32_t, result->cur_val, max->cur); 2000 result->enable = true; 2001 } 2002 2003 return ret; 2004 } 2005 2006 static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv, 2007 const struct intel_crtc *intel_crtc, 2008 int level, 2009 struct intel_crtc_state *cstate, 2010 struct intel_plane_state *pristate, 2011 struct intel_plane_state *sprstate, 2012 struct intel_plane_state *curstate, 2013 struct intel_wm_level *result) 2014 { 2015 uint16_t pri_latency = dev_priv->wm.pri_latency[level]; 2016 uint16_t spr_latency = dev_priv->wm.spr_latency[level]; 2017 uint16_t cur_latency = dev_priv->wm.cur_latency[level]; 2018 2019 /* WM1+ latency values stored in 0.5us units */ 2020 if (level > 0) { 2021 pri_latency *= 5; 2022 spr_latency *= 5; 2023 cur_latency *= 5; 2024 } 2025 2026 if (pristate) { 2027 result->pri_val = ilk_compute_pri_wm(cstate, pristate, 2028 pri_latency, level); 2029 result->fbc_val = ilk_compute_fbc_wm(cstate, pristate, result->pri_val); 2030 } 2031 2032 if (sprstate) 2033 result->spr_val = ilk_compute_spr_wm(cstate, sprstate, spr_latency); 2034 2035 if (curstate) 2036 result->cur_val = ilk_compute_cur_wm(cstate, curstate, cur_latency); 2037 2038 result->enable = true; 2039 } 2040 2041 static uint32_t 2042 hsw_compute_linetime_wm(struct drm_device *dev, 2043 struct intel_crtc_state *cstate) 2044 { 2045 struct drm_i915_private *dev_priv = dev->dev_private; 2046 const struct drm_display_mode *adjusted_mode = 2047 &cstate->base.adjusted_mode; 2048 u32 linetime, ips_linetime; 2049 2050 if (!cstate->base.active) 2051 return 0; 2052 if (WARN_ON(adjusted_mode->crtc_clock == 0)) 2053 return 0; 2054 if (WARN_ON(dev_priv->cdclk_freq == 0)) 2055 return 0; 2056 2057 /* The WM are computed with base on how long it takes to fill a single 2058 * row at the given clock rate, multiplied by 8. 2059 * */ 2060 linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, 2061 adjusted_mode->crtc_clock); 2062 ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, 2063 dev_priv->cdclk_freq); 2064 2065 return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) | 2066 PIPE_WM_LINETIME_TIME(linetime); 2067 } 2068 2069 static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[8]) 2070 { 2071 struct drm_i915_private *dev_priv = dev->dev_private; 2072 2073 if (IS_GEN9(dev)) { 2074 uint32_t val; 2075 int ret, i; 2076 int level, max_level = ilk_wm_max_level(dev); 2077 2078 /* read the first set of memory latencies[0:3] */ 2079 val = 0; /* data0 to be programmed to 0 for first set */ 2080 mutex_lock(&dev_priv->rps.hw_lock); 2081 ret = sandybridge_pcode_read(dev_priv, 2082 GEN9_PCODE_READ_MEM_LATENCY, 2083 &val); 2084 mutex_unlock(&dev_priv->rps.hw_lock); 2085 2086 if (ret) { 2087 DRM_ERROR("SKL Mailbox read error = %d\n", ret); 2088 return; 2089 } 2090 2091 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK; 2092 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) & 2093 GEN9_MEM_LATENCY_LEVEL_MASK; 2094 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) & 2095 GEN9_MEM_LATENCY_LEVEL_MASK; 2096 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) & 2097 GEN9_MEM_LATENCY_LEVEL_MASK; 2098 2099 /* read the second set of memory latencies[4:7] */ 2100 val = 1; /* data0 to be programmed to 1 for second set */ 2101 mutex_lock(&dev_priv->rps.hw_lock); 2102 ret = sandybridge_pcode_read(dev_priv, 2103 GEN9_PCODE_READ_MEM_LATENCY, 2104 &val); 2105 mutex_unlock(&dev_priv->rps.hw_lock); 2106 if (ret) { 2107 DRM_ERROR("SKL Mailbox read error = %d\n", ret); 2108 return; 2109 } 2110 2111 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK; 2112 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) & 2113 GEN9_MEM_LATENCY_LEVEL_MASK; 2114 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) & 2115 GEN9_MEM_LATENCY_LEVEL_MASK; 2116 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) & 2117 GEN9_MEM_LATENCY_LEVEL_MASK; 2118 2119 /* 2120 * WaWmMemoryReadLatency:skl 2121 * 2122 * punit doesn't take into account the read latency so we need 2123 * to add 2us to the various latency levels we retrieve from 2124 * the punit. 2125 * - W0 is a bit special in that it's the only level that 2126 * can't be disabled if we want to have display working, so 2127 * we always add 2us there. 2128 * - For levels >=1, punit returns 0us latency when they are 2129 * disabled, so we respect that and don't add 2us then 2130 * 2131 * Additionally, if a level n (n > 1) has a 0us latency, all 2132 * levels m (m >= n) need to be disabled. We make sure to 2133 * sanitize the values out of the punit to satisfy this 2134 * requirement. 2135 */ 2136 wm[0] += 2; 2137 for (level = 1; level <= max_level; level++) 2138 if (wm[level] != 0) 2139 wm[level] += 2; 2140 else { 2141 for (i = level + 1; i <= max_level; i++) 2142 wm[i] = 0; 2143 2144 break; 2145 } 2146 } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { 2147 uint64_t sskpd = I915_READ64(MCH_SSKPD); 2148 2149 wm[0] = (sskpd >> 56) & 0xFF; 2150 if (wm[0] == 0) 2151 wm[0] = sskpd & 0xF; 2152 wm[1] = (sskpd >> 4) & 0xFF; 2153 wm[2] = (sskpd >> 12) & 0xFF; 2154 wm[3] = (sskpd >> 20) & 0x1FF; 2155 wm[4] = (sskpd >> 32) & 0x1FF; 2156 } else if (INTEL_INFO(dev)->gen >= 6) { 2157 uint32_t sskpd = I915_READ(MCH_SSKPD); 2158 2159 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK; 2160 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK; 2161 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK; 2162 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK; 2163 } else if (INTEL_INFO(dev)->gen >= 5) { 2164 uint32_t mltr = I915_READ(MLTR_ILK); 2165 2166 /* ILK primary LP0 latency is 700 ns */ 2167 wm[0] = 7; 2168 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK; 2169 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK; 2170 } 2171 } 2172 2173 static void intel_fixup_spr_wm_latency(struct drm_device *dev, uint16_t wm[5]) 2174 { 2175 /* ILK sprite LP0 latency is 1300 ns */ 2176 if (INTEL_INFO(dev)->gen == 5) 2177 wm[0] = 13; 2178 } 2179 2180 static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5]) 2181 { 2182 /* ILK cursor LP0 latency is 1300 ns */ 2183 if (INTEL_INFO(dev)->gen == 5) 2184 wm[0] = 13; 2185 2186 /* WaDoubleCursorLP3Latency:ivb */ 2187 if (IS_IVYBRIDGE(dev)) 2188 wm[3] *= 2; 2189 } 2190 2191 int ilk_wm_max_level(const struct drm_device *dev) 2192 { 2193 /* how many WM levels are we expecting */ 2194 if (INTEL_INFO(dev)->gen >= 9) 2195 return 7; 2196 else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 2197 return 4; 2198 else if (INTEL_INFO(dev)->gen >= 6) 2199 return 3; 2200 else 2201 return 2; 2202 } 2203 2204 static void intel_print_wm_latency(struct drm_device *dev, 2205 const char *name, 2206 const uint16_t wm[8]) 2207 { 2208 int level, max_level = ilk_wm_max_level(dev); 2209 2210 for (level = 0; level <= max_level; level++) { 2211 unsigned int latency = wm[level]; 2212 2213 if (latency == 0) { 2214 DRM_ERROR("%s WM%d latency not provided\n", 2215 name, level); 2216 continue; 2217 } 2218 2219 /* 2220 * - latencies are in us on gen9. 2221 * - before then, WM1+ latency values are in 0.5us units 2222 */ 2223 if (IS_GEN9(dev)) 2224 latency *= 10; 2225 else if (level > 0) 2226 latency *= 5; 2227 2228 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n", 2229 name, level, wm[level], 2230 latency / 10, latency % 10); 2231 } 2232 } 2233 2234 static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv, 2235 uint16_t wm[5], uint16_t min) 2236 { 2237 int level, max_level = ilk_wm_max_level(dev_priv->dev); 2238 2239 if (wm[0] >= min) 2240 return false; 2241 2242 wm[0] = max(wm[0], min); 2243 for (level = 1; level <= max_level; level++) 2244 wm[level] = max_t(uint16_t, wm[level], DIV_ROUND_UP(min, 5)); 2245 2246 return true; 2247 } 2248 2249 static void snb_wm_latency_quirk(struct drm_device *dev) 2250 { 2251 struct drm_i915_private *dev_priv = dev->dev_private; 2252 bool changed; 2253 2254 /* 2255 * The BIOS provided WM memory latency values are often 2256 * inadequate for high resolution displays. Adjust them. 2257 */ 2258 changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) | 2259 ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) | 2260 ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12); 2261 2262 if (!changed) 2263 return; 2264 2265 DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n"); 2266 intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency); 2267 intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency); 2268 intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency); 2269 } 2270 2271 static void ilk_setup_wm_latency(struct drm_device *dev) 2272 { 2273 struct drm_i915_private *dev_priv = dev->dev_private; 2274 2275 intel_read_wm_latency(dev, dev_priv->wm.pri_latency); 2276 2277 memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency, 2278 sizeof(dev_priv->wm.pri_latency)); 2279 memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency, 2280 sizeof(dev_priv->wm.pri_latency)); 2281 2282 intel_fixup_spr_wm_latency(dev, dev_priv->wm.spr_latency); 2283 intel_fixup_cur_wm_latency(dev, dev_priv->wm.cur_latency); 2284 2285 intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency); 2286 intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency); 2287 intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency); 2288 2289 if (IS_GEN6(dev)) 2290 snb_wm_latency_quirk(dev); 2291 } 2292 2293 static void skl_setup_wm_latency(struct drm_device *dev) 2294 { 2295 struct drm_i915_private *dev_priv = dev->dev_private; 2296 2297 intel_read_wm_latency(dev, dev_priv->wm.skl_latency); 2298 intel_print_wm_latency(dev, "Gen9 Plane", dev_priv->wm.skl_latency); 2299 } 2300 2301 static bool ilk_validate_pipe_wm(struct drm_device *dev, 2302 struct intel_pipe_wm *pipe_wm) 2303 { 2304 /* LP0 watermark maximums depend on this pipe alone */ 2305 const struct intel_wm_config config = { 2306 .num_pipes_active = 1, 2307 .sprites_enabled = pipe_wm->sprites_enabled, 2308 .sprites_scaled = pipe_wm->sprites_scaled, 2309 }; 2310 struct ilk_wm_maximums max; 2311 2312 /* LP0 watermarks always use 1/2 DDB partitioning */ 2313 ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max); 2314 2315 /* At least LP0 must be valid */ 2316 if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) { 2317 DRM_DEBUG_KMS("LP0 watermark invalid\n"); 2318 return false; 2319 } 2320 2321 return true; 2322 } 2323 2324 /* Compute new watermarks for the pipe */ 2325 static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate) 2326 { 2327 struct drm_atomic_state *state = cstate->base.state; 2328 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 2329 struct intel_pipe_wm *pipe_wm; 2330 struct drm_device *dev = state->dev; 2331 const struct drm_i915_private *dev_priv = dev->dev_private; 2332 struct intel_plane *intel_plane; 2333 struct intel_plane_state *pristate = NULL; 2334 struct intel_plane_state *sprstate = NULL; 2335 struct intel_plane_state *curstate = NULL; 2336 int level, max_level = ilk_wm_max_level(dev), usable_level; 2337 struct ilk_wm_maximums max; 2338 2339 pipe_wm = &cstate->wm.optimal.ilk; 2340 2341 for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { 2342 struct intel_plane_state *ps; 2343 2344 ps = intel_atomic_get_existing_plane_state(state, 2345 intel_plane); 2346 if (!ps) 2347 continue; 2348 2349 if (intel_plane->base.type == DRM_PLANE_TYPE_PRIMARY) 2350 pristate = ps; 2351 else if (intel_plane->base.type == DRM_PLANE_TYPE_OVERLAY) 2352 sprstate = ps; 2353 else if (intel_plane->base.type == DRM_PLANE_TYPE_CURSOR) 2354 curstate = ps; 2355 } 2356 2357 pipe_wm->pipe_enabled = cstate->base.active; 2358 if (sprstate) { 2359 pipe_wm->sprites_enabled = sprstate->visible; 2360 pipe_wm->sprites_scaled = sprstate->visible && 2361 (drm_rect_width(&sprstate->dst) != drm_rect_width(&sprstate->src) >> 16 || 2362 drm_rect_height(&sprstate->dst) != drm_rect_height(&sprstate->src) >> 16); 2363 } 2364 2365 usable_level = max_level; 2366 2367 /* ILK/SNB: LP2+ watermarks only w/o sprites */ 2368 if (INTEL_INFO(dev)->gen <= 6 && pipe_wm->sprites_enabled) 2369 usable_level = 1; 2370 2371 /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */ 2372 if (pipe_wm->sprites_scaled) 2373 usable_level = 0; 2374 2375 ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate, 2376 pristate, sprstate, curstate, &pipe_wm->raw_wm[0]); 2377 2378 memset(&pipe_wm->wm, 0, sizeof(pipe_wm->wm)); 2379 pipe_wm->wm[0] = pipe_wm->raw_wm[0]; 2380 2381 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 2382 pipe_wm->linetime = hsw_compute_linetime_wm(dev, cstate); 2383 2384 if (!ilk_validate_pipe_wm(dev, pipe_wm)) 2385 return -EINVAL; 2386 2387 ilk_compute_wm_reg_maximums(dev, 1, &max); 2388 2389 for (level = 1; level <= max_level; level++) { 2390 struct intel_wm_level *wm = &pipe_wm->raw_wm[level]; 2391 2392 ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate, 2393 pristate, sprstate, curstate, wm); 2394 2395 /* 2396 * Disable any watermark level that exceeds the 2397 * register maximums since such watermarks are 2398 * always invalid. 2399 */ 2400 if (level > usable_level) 2401 continue; 2402 2403 if (ilk_validate_wm_level(level, &max, wm)) 2404 pipe_wm->wm[level] = *wm; 2405 else 2406 usable_level = level; 2407 } 2408 2409 return 0; 2410 } 2411 2412 /* 2413 * Build a set of 'intermediate' watermark values that satisfy both the old 2414 * state and the new state. These can be programmed to the hardware 2415 * immediately. 2416 */ 2417 static int ilk_compute_intermediate_wm(struct drm_device *dev, 2418 struct intel_crtc *intel_crtc, 2419 struct intel_crtc_state *newstate) 2420 { 2421 struct intel_pipe_wm *a = &newstate->wm.intermediate; 2422 struct intel_pipe_wm *b = &intel_crtc->wm.active.ilk; 2423 int level, max_level = ilk_wm_max_level(dev); 2424 2425 /* 2426 * Start with the final, target watermarks, then combine with the 2427 * currently active watermarks to get values that are safe both before 2428 * and after the vblank. 2429 */ 2430 *a = newstate->wm.optimal.ilk; 2431 a->pipe_enabled |= b->pipe_enabled; 2432 a->sprites_enabled |= b->sprites_enabled; 2433 a->sprites_scaled |= b->sprites_scaled; 2434 2435 for (level = 0; level <= max_level; level++) { 2436 struct intel_wm_level *a_wm = &a->wm[level]; 2437 const struct intel_wm_level *b_wm = &b->wm[level]; 2438 2439 a_wm->enable &= b_wm->enable; 2440 a_wm->pri_val = max(a_wm->pri_val, b_wm->pri_val); 2441 a_wm->spr_val = max(a_wm->spr_val, b_wm->spr_val); 2442 a_wm->cur_val = max(a_wm->cur_val, b_wm->cur_val); 2443 a_wm->fbc_val = max(a_wm->fbc_val, b_wm->fbc_val); 2444 } 2445 2446 /* 2447 * We need to make sure that these merged watermark values are 2448 * actually a valid configuration themselves. If they're not, 2449 * there's no safe way to transition from the old state to 2450 * the new state, so we need to fail the atomic transaction. 2451 */ 2452 if (!ilk_validate_pipe_wm(dev, a)) 2453 return -EINVAL; 2454 2455 /* 2456 * If our intermediate WM are identical to the final WM, then we can 2457 * omit the post-vblank programming; only update if it's different. 2458 */ 2459 if (memcmp(a, &newstate->wm.optimal.ilk, sizeof(*a)) == 0) 2460 newstate->wm.need_postvbl_update = false; 2461 2462 return 0; 2463 } 2464 2465 /* 2466 * Merge the watermarks from all active pipes for a specific level. 2467 */ 2468 static void ilk_merge_wm_level(struct drm_device *dev, 2469 int level, 2470 struct intel_wm_level *ret_wm) 2471 { 2472 struct intel_crtc *intel_crtc; 2473 2474 ret_wm->enable = true; 2475 2476 for_each_intel_crtc(dev, intel_crtc) { 2477 const struct intel_pipe_wm *active = &intel_crtc->wm.active.ilk; 2478 const struct intel_wm_level *wm = &active->wm[level]; 2479 2480 if (!active->pipe_enabled) 2481 continue; 2482 2483 /* 2484 * The watermark values may have been used in the past, 2485 * so we must maintain them in the registers for some 2486 * time even if the level is now disabled. 2487 */ 2488 if (!wm->enable) 2489 ret_wm->enable = false; 2490 2491 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val); 2492 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val); 2493 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val); 2494 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val); 2495 } 2496 } 2497 2498 /* 2499 * Merge all low power watermarks for all active pipes. 2500 */ 2501 static void ilk_wm_merge(struct drm_device *dev, 2502 const struct intel_wm_config *config, 2503 const struct ilk_wm_maximums *max, 2504 struct intel_pipe_wm *merged) 2505 { 2506 struct drm_i915_private *dev_priv = dev->dev_private; 2507 int level, max_level = ilk_wm_max_level(dev); 2508 int last_enabled_level = max_level; 2509 2510 /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */ 2511 if ((INTEL_INFO(dev)->gen <= 6 || IS_IVYBRIDGE(dev)) && 2512 config->num_pipes_active > 1) 2513 last_enabled_level = 0; 2514 2515 /* ILK: FBC WM must be disabled always */ 2516 merged->fbc_wm_enabled = INTEL_INFO(dev)->gen >= 6; 2517 2518 /* merge each WM1+ level */ 2519 for (level = 1; level <= max_level; level++) { 2520 struct intel_wm_level *wm = &merged->wm[level]; 2521 2522 ilk_merge_wm_level(dev, level, wm); 2523 2524 if (level > last_enabled_level) 2525 wm->enable = false; 2526 else if (!ilk_validate_wm_level(level, max, wm)) 2527 /* make sure all following levels get disabled */ 2528 last_enabled_level = level - 1; 2529 2530 /* 2531 * The spec says it is preferred to disable 2532 * FBC WMs instead of disabling a WM level. 2533 */ 2534 if (wm->fbc_val > max->fbc) { 2535 if (wm->enable) 2536 merged->fbc_wm_enabled = false; 2537 wm->fbc_val = 0; 2538 } 2539 } 2540 2541 /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */ 2542 /* 2543 * FIXME this is racy. FBC might get enabled later. 2544 * What we should check here is whether FBC can be 2545 * enabled sometime later. 2546 */ 2547 if (IS_GEN5(dev) && !merged->fbc_wm_enabled && 2548 intel_fbc_is_active(dev_priv)) { 2549 for (level = 2; level <= max_level; level++) { 2550 struct intel_wm_level *wm = &merged->wm[level]; 2551 2552 wm->enable = false; 2553 } 2554 } 2555 } 2556 2557 static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm) 2558 { 2559 /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */ 2560 return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable); 2561 } 2562 2563 /* The value we need to program into the WM_LPx latency field */ 2564 static unsigned int ilk_wm_lp_latency(struct drm_device *dev, int level) 2565 { 2566 struct drm_i915_private *dev_priv = dev->dev_private; 2567 2568 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 2569 return 2 * level; 2570 else 2571 return dev_priv->wm.pri_latency[level]; 2572 } 2573 2574 static void ilk_compute_wm_results(struct drm_device *dev, 2575 const struct intel_pipe_wm *merged, 2576 enum intel_ddb_partitioning partitioning, 2577 struct ilk_wm_values *results) 2578 { 2579 struct intel_crtc *intel_crtc; 2580 int level, wm_lp; 2581 2582 results->enable_fbc_wm = merged->fbc_wm_enabled; 2583 results->partitioning = partitioning; 2584 2585 /* LP1+ register values */ 2586 for (wm_lp = 1; wm_lp <= 3; wm_lp++) { 2587 const struct intel_wm_level *r; 2588 2589 level = ilk_wm_lp_to_level(wm_lp, merged); 2590 2591 r = &merged->wm[level]; 2592 2593 /* 2594 * Maintain the watermark values even if the level is 2595 * disabled. Doing otherwise could cause underruns. 2596 */ 2597 results->wm_lp[wm_lp - 1] = 2598 (ilk_wm_lp_latency(dev, level) << WM1_LP_LATENCY_SHIFT) | 2599 (r->pri_val << WM1_LP_SR_SHIFT) | 2600 r->cur_val; 2601 2602 if (r->enable) 2603 results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN; 2604 2605 if (INTEL_INFO(dev)->gen >= 8) 2606 results->wm_lp[wm_lp - 1] |= 2607 r->fbc_val << WM1_LP_FBC_SHIFT_BDW; 2608 else 2609 results->wm_lp[wm_lp - 1] |= 2610 r->fbc_val << WM1_LP_FBC_SHIFT; 2611 2612 /* 2613 * Always set WM1S_LP_EN when spr_val != 0, even if the 2614 * level is disabled. Doing otherwise could cause underruns. 2615 */ 2616 if (INTEL_INFO(dev)->gen <= 6 && r->spr_val) { 2617 WARN_ON(wm_lp != 1); 2618 results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val; 2619 } else 2620 results->wm_lp_spr[wm_lp - 1] = r->spr_val; 2621 } 2622 2623 /* LP0 register values */ 2624 for_each_intel_crtc(dev, intel_crtc) { 2625 enum i915_pipe pipe = intel_crtc->pipe; 2626 const struct intel_wm_level *r = 2627 &intel_crtc->wm.active.ilk.wm[0]; 2628 2629 if (WARN_ON(!r->enable)) 2630 continue; 2631 2632 results->wm_linetime[pipe] = intel_crtc->wm.active.ilk.linetime; 2633 2634 results->wm_pipe[pipe] = 2635 (r->pri_val << WM0_PIPE_PLANE_SHIFT) | 2636 (r->spr_val << WM0_PIPE_SPRITE_SHIFT) | 2637 r->cur_val; 2638 } 2639 } 2640 2641 /* Find the result with the highest level enabled. Check for enable_fbc_wm in 2642 * case both are at the same level. Prefer r1 in case they're the same. */ 2643 static struct intel_pipe_wm *ilk_find_best_result(struct drm_device *dev, 2644 struct intel_pipe_wm *r1, 2645 struct intel_pipe_wm *r2) 2646 { 2647 int level, max_level = ilk_wm_max_level(dev); 2648 int level1 = 0, level2 = 0; 2649 2650 for (level = 1; level <= max_level; level++) { 2651 if (r1->wm[level].enable) 2652 level1 = level; 2653 if (r2->wm[level].enable) 2654 level2 = level; 2655 } 2656 2657 if (level1 == level2) { 2658 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled) 2659 return r2; 2660 else 2661 return r1; 2662 } else if (level1 > level2) { 2663 return r1; 2664 } else { 2665 return r2; 2666 } 2667 } 2668 2669 /* dirty bits used to track which watermarks need changes */ 2670 #define WM_DIRTY_PIPE(pipe) (1 << (pipe)) 2671 #define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe))) 2672 #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp))) 2673 #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3)) 2674 #define WM_DIRTY_FBC (1 << 24) 2675 #define WM_DIRTY_DDB (1 << 25) 2676 2677 static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv, 2678 const struct ilk_wm_values *old, 2679 const struct ilk_wm_values *new) 2680 { 2681 unsigned int dirty = 0; 2682 enum i915_pipe pipe; 2683 int wm_lp; 2684 2685 for_each_pipe(dev_priv, pipe) { 2686 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) { 2687 dirty |= WM_DIRTY_LINETIME(pipe); 2688 /* Must disable LP1+ watermarks too */ 2689 dirty |= WM_DIRTY_LP_ALL; 2690 } 2691 2692 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) { 2693 dirty |= WM_DIRTY_PIPE(pipe); 2694 /* Must disable LP1+ watermarks too */ 2695 dirty |= WM_DIRTY_LP_ALL; 2696 } 2697 } 2698 2699 if (old->enable_fbc_wm != new->enable_fbc_wm) { 2700 dirty |= WM_DIRTY_FBC; 2701 /* Must disable LP1+ watermarks too */ 2702 dirty |= WM_DIRTY_LP_ALL; 2703 } 2704 2705 if (old->partitioning != new->partitioning) { 2706 dirty |= WM_DIRTY_DDB; 2707 /* Must disable LP1+ watermarks too */ 2708 dirty |= WM_DIRTY_LP_ALL; 2709 } 2710 2711 /* LP1+ watermarks already deemed dirty, no need to continue */ 2712 if (dirty & WM_DIRTY_LP_ALL) 2713 return dirty; 2714 2715 /* Find the lowest numbered LP1+ watermark in need of an update... */ 2716 for (wm_lp = 1; wm_lp <= 3; wm_lp++) { 2717 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] || 2718 old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1]) 2719 break; 2720 } 2721 2722 /* ...and mark it and all higher numbered LP1+ watermarks as dirty */ 2723 for (; wm_lp <= 3; wm_lp++) 2724 dirty |= WM_DIRTY_LP(wm_lp); 2725 2726 return dirty; 2727 } 2728 2729 static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv, 2730 unsigned int dirty) 2731 { 2732 struct ilk_wm_values *previous = &dev_priv->wm.hw; 2733 bool changed = false; 2734 2735 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) { 2736 previous->wm_lp[2] &= ~WM1_LP_SR_EN; 2737 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]); 2738 changed = true; 2739 } 2740 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) { 2741 previous->wm_lp[1] &= ~WM1_LP_SR_EN; 2742 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]); 2743 changed = true; 2744 } 2745 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) { 2746 previous->wm_lp[0] &= ~WM1_LP_SR_EN; 2747 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]); 2748 changed = true; 2749 } 2750 2751 /* 2752 * Don't touch WM1S_LP_EN here. 2753 * Doing so could cause underruns. 2754 */ 2755 2756 return changed; 2757 } 2758 2759 /* 2760 * The spec says we shouldn't write when we don't need, because every write 2761 * causes WMs to be re-evaluated, expending some power. 2762 */ 2763 static void ilk_write_wm_values(struct drm_i915_private *dev_priv, 2764 struct ilk_wm_values *results) 2765 { 2766 struct drm_device *dev = dev_priv->dev; 2767 struct ilk_wm_values *previous = &dev_priv->wm.hw; 2768 unsigned int dirty; 2769 uint32_t val; 2770 2771 dirty = ilk_compute_wm_dirty(dev_priv, previous, results); 2772 if (!dirty) 2773 return; 2774 2775 _ilk_disable_lp_wm(dev_priv, dirty); 2776 2777 if (dirty & WM_DIRTY_PIPE(PIPE_A)) 2778 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]); 2779 if (dirty & WM_DIRTY_PIPE(PIPE_B)) 2780 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]); 2781 if (dirty & WM_DIRTY_PIPE(PIPE_C)) 2782 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]); 2783 2784 if (dirty & WM_DIRTY_LINETIME(PIPE_A)) 2785 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]); 2786 if (dirty & WM_DIRTY_LINETIME(PIPE_B)) 2787 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]); 2788 if (dirty & WM_DIRTY_LINETIME(PIPE_C)) 2789 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]); 2790 2791 if (dirty & WM_DIRTY_DDB) { 2792 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { 2793 val = I915_READ(WM_MISC); 2794 if (results->partitioning == INTEL_DDB_PART_1_2) 2795 val &= ~WM_MISC_DATA_PARTITION_5_6; 2796 else 2797 val |= WM_MISC_DATA_PARTITION_5_6; 2798 I915_WRITE(WM_MISC, val); 2799 } else { 2800 val = I915_READ(DISP_ARB_CTL2); 2801 if (results->partitioning == INTEL_DDB_PART_1_2) 2802 val &= ~DISP_DATA_PARTITION_5_6; 2803 else 2804 val |= DISP_DATA_PARTITION_5_6; 2805 I915_WRITE(DISP_ARB_CTL2, val); 2806 } 2807 } 2808 2809 if (dirty & WM_DIRTY_FBC) { 2810 val = I915_READ(DISP_ARB_CTL); 2811 if (results->enable_fbc_wm) 2812 val &= ~DISP_FBC_WM_DIS; 2813 else 2814 val |= DISP_FBC_WM_DIS; 2815 I915_WRITE(DISP_ARB_CTL, val); 2816 } 2817 2818 if (dirty & WM_DIRTY_LP(1) && 2819 previous->wm_lp_spr[0] != results->wm_lp_spr[0]) 2820 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]); 2821 2822 if (INTEL_INFO(dev)->gen >= 7) { 2823 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1]) 2824 I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]); 2825 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2]) 2826 I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]); 2827 } 2828 2829 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0]) 2830 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]); 2831 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1]) 2832 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]); 2833 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2]) 2834 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]); 2835 2836 dev_priv->wm.hw = *results; 2837 } 2838 2839 bool ilk_disable_lp_wm(struct drm_device *dev) 2840 { 2841 struct drm_i915_private *dev_priv = dev->dev_private; 2842 2843 return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL); 2844 } 2845 2846 /* 2847 * On gen9, we need to allocate Display Data Buffer (DDB) portions to the 2848 * different active planes. 2849 */ 2850 2851 #define SKL_DDB_SIZE 896 /* in blocks */ 2852 #define BXT_DDB_SIZE 512 2853 2854 /* 2855 * Return the index of a plane in the SKL DDB and wm result arrays. Primary 2856 * plane is always in slot 0, cursor is always in slot I915_MAX_PLANES-1, and 2857 * other universal planes are in indices 1..n. Note that this may leave unused 2858 * indices between the top "sprite" plane and the cursor. 2859 */ 2860 static int 2861 skl_wm_plane_id(const struct intel_plane *plane) 2862 { 2863 switch (plane->base.type) { 2864 case DRM_PLANE_TYPE_PRIMARY: 2865 return 0; 2866 case DRM_PLANE_TYPE_CURSOR: 2867 return PLANE_CURSOR; 2868 case DRM_PLANE_TYPE_OVERLAY: 2869 return plane->plane + 1; 2870 default: 2871 MISSING_CASE(plane->base.type); 2872 return plane->plane; 2873 } 2874 } 2875 2876 static void 2877 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, 2878 const struct intel_crtc_state *cstate, 2879 const struct intel_wm_config *config, 2880 struct skl_ddb_entry *alloc /* out */) 2881 { 2882 struct drm_crtc *for_crtc = cstate->base.crtc; 2883 struct drm_crtc *crtc; 2884 unsigned int pipe_size, ddb_size; 2885 int nth_active_pipe; 2886 2887 if (!cstate->base.active) { 2888 alloc->start = 0; 2889 alloc->end = 0; 2890 return; 2891 } 2892 2893 if (IS_BROXTON(dev)) 2894 ddb_size = BXT_DDB_SIZE; 2895 else 2896 ddb_size = SKL_DDB_SIZE; 2897 2898 ddb_size -= 4; /* 4 blocks for bypass path allocation */ 2899 2900 nth_active_pipe = 0; 2901 for_each_crtc(dev, crtc) { 2902 if (!to_intel_crtc(crtc)->active) 2903 continue; 2904 2905 if (crtc == for_crtc) 2906 break; 2907 2908 nth_active_pipe++; 2909 } 2910 2911 pipe_size = ddb_size / config->num_pipes_active; 2912 alloc->start = nth_active_pipe * ddb_size / config->num_pipes_active; 2913 alloc->end = alloc->start + pipe_size; 2914 } 2915 2916 static unsigned int skl_cursor_allocation(const struct intel_wm_config *config) 2917 { 2918 if (config->num_pipes_active == 1) 2919 return 32; 2920 2921 return 8; 2922 } 2923 2924 static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg) 2925 { 2926 entry->start = reg & 0x3ff; 2927 entry->end = (reg >> 16) & 0x3ff; 2928 if (entry->end) 2929 entry->end += 1; 2930 } 2931 2932 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, 2933 struct skl_ddb_allocation *ddb /* out */) 2934 { 2935 enum i915_pipe pipe; 2936 int plane; 2937 u32 val; 2938 2939 memset(ddb, 0, sizeof(*ddb)); 2940 2941 for_each_pipe(dev_priv, pipe) { 2942 enum intel_display_power_domain power_domain; 2943 2944 power_domain = POWER_DOMAIN_PIPE(pipe); 2945 if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) 2946 continue; 2947 2948 for_each_plane(dev_priv, pipe, plane) { 2949 val = I915_READ(PLANE_BUF_CFG(pipe, plane)); 2950 skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane], 2951 val); 2952 } 2953 2954 val = I915_READ(CUR_BUF_CFG(pipe)); 2955 skl_ddb_entry_init_from_hw(&ddb->plane[pipe][PLANE_CURSOR], 2956 val); 2957 2958 intel_display_power_put(dev_priv, power_domain); 2959 } 2960 } 2961 2962 static unsigned int 2963 skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, 2964 struct drm_plane_state *pstate, 2965 int y) 2966 { 2967 struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate); 2968 struct drm_framebuffer *fb = pstate->fb; 2969 uint32_t width = 0, height = 0; 2970 2971 width = drm_rect_width(&intel_pstate->src) >> 16; 2972 height = drm_rect_height(&intel_pstate->src) >> 16; 2973 2974 if (intel_rotation_90_or_270(pstate->rotation)) 2975 swap(width, height); 2976 2977 /* for planar format */ 2978 if (fb->pixel_format == DRM_FORMAT_NV12) { 2979 if (y) /* y-plane data rate */ 2980 return width * height * 2981 drm_format_plane_cpp(fb->pixel_format, 0); 2982 else /* uv-plane data rate */ 2983 return (width / 2) * (height / 2) * 2984 drm_format_plane_cpp(fb->pixel_format, 1); 2985 } 2986 2987 /* for packed formats */ 2988 return width * height * drm_format_plane_cpp(fb->pixel_format, 0); 2989 } 2990 2991 /* 2992 * We don't overflow 32 bits. Worst case is 3 planes enabled, each fetching 2993 * a 8192x4096@32bpp framebuffer: 2994 * 3 * 4096 * 8192 * 4 < 2^32 2995 */ 2996 static unsigned int 2997 skl_get_total_relative_data_rate(const struct intel_crtc_state *cstate) 2998 { 2999 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 3000 struct drm_device *dev = intel_crtc->base.dev; 3001 struct intel_plane *intel_plane; 3002 unsigned int total_data_rate = 0; 3003 3004 for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { 3005 struct drm_plane_state *pstate = intel_plane->base.state; 3006 3007 if (pstate->fb == NULL) 3008 continue; 3009 3010 if (intel_plane->base.type == DRM_PLANE_TYPE_CURSOR) 3011 continue; 3012 3013 /* packed/uv */ 3014 total_data_rate += skl_plane_relative_data_rate(cstate, 3015 pstate, 3016 0); 3017 3018 if (pstate->fb->pixel_format == DRM_FORMAT_NV12) 3019 /* y-plane */ 3020 total_data_rate += skl_plane_relative_data_rate(cstate, 3021 pstate, 3022 1); 3023 } 3024 3025 return total_data_rate; 3026 } 3027 3028 static void 3029 skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, 3030 struct skl_ddb_allocation *ddb /* out */) 3031 { 3032 struct drm_crtc *crtc = cstate->base.crtc; 3033 struct drm_device *dev = crtc->dev; 3034 struct drm_i915_private *dev_priv = to_i915(dev); 3035 struct intel_wm_config *config = &dev_priv->wm.config; 3036 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3037 struct intel_plane *intel_plane; 3038 enum i915_pipe pipe = intel_crtc->pipe; 3039 struct skl_ddb_entry *alloc = &ddb->pipe[pipe]; 3040 uint16_t alloc_size, start, cursor_blocks; 3041 uint16_t minimum[I915_MAX_PLANES]; 3042 uint16_t y_minimum[I915_MAX_PLANES]; 3043 unsigned int total_data_rate; 3044 3045 skl_ddb_get_pipe_allocation_limits(dev, cstate, config, alloc); 3046 alloc_size = skl_ddb_entry_size(alloc); 3047 if (alloc_size == 0) { 3048 memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe])); 3049 memset(&ddb->plane[pipe][PLANE_CURSOR], 0, 3050 sizeof(ddb->plane[pipe][PLANE_CURSOR])); 3051 return; 3052 } 3053 3054 cursor_blocks = skl_cursor_allocation(config); 3055 ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - cursor_blocks; 3056 ddb->plane[pipe][PLANE_CURSOR].end = alloc->end; 3057 3058 alloc_size -= cursor_blocks; 3059 alloc->end -= cursor_blocks; 3060 3061 /* 1. Allocate the mininum required blocks for each active plane */ 3062 for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { 3063 struct drm_plane *plane = &intel_plane->base; 3064 struct drm_framebuffer *fb = plane->state->fb; 3065 int id = skl_wm_plane_id(intel_plane); 3066 3067 if (!to_intel_plane_state(plane->state)->visible) 3068 continue; 3069 3070 if (plane->type == DRM_PLANE_TYPE_CURSOR) 3071 continue; 3072 3073 minimum[id] = 8; 3074 alloc_size -= minimum[id]; 3075 y_minimum[id] = (fb->pixel_format == DRM_FORMAT_NV12) ? 8 : 0; 3076 alloc_size -= y_minimum[id]; 3077 } 3078 3079 /* 3080 * 2. Distribute the remaining space in proportion to the amount of 3081 * data each plane needs to fetch from memory. 3082 * 3083 * FIXME: we may not allocate every single block here. 3084 */ 3085 total_data_rate = skl_get_total_relative_data_rate(cstate); 3086 3087 start = alloc->start; 3088 for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { 3089 struct drm_plane *plane = &intel_plane->base; 3090 struct drm_plane_state *pstate = intel_plane->base.state; 3091 unsigned int data_rate, y_data_rate; 3092 uint16_t plane_blocks, y_plane_blocks = 0; 3093 int id = skl_wm_plane_id(intel_plane); 3094 3095 if (!to_intel_plane_state(pstate)->visible) 3096 continue; 3097 if (plane->type == DRM_PLANE_TYPE_CURSOR) 3098 continue; 3099 3100 data_rate = skl_plane_relative_data_rate(cstate, pstate, 0); 3101 3102 /* 3103 * allocation for (packed formats) or (uv-plane part of planar format): 3104 * promote the expression to 64 bits to avoid overflowing, the 3105 * result is < available as data_rate / total_data_rate < 1 3106 */ 3107 plane_blocks = minimum[id]; 3108 plane_blocks += div_u64((uint64_t)alloc_size * data_rate, 3109 total_data_rate); 3110 3111 ddb->plane[pipe][id].start = start; 3112 ddb->plane[pipe][id].end = start + plane_blocks; 3113 3114 start += plane_blocks; 3115 3116 /* 3117 * allocation for y_plane part of planar format: 3118 */ 3119 if (pstate->fb->pixel_format == DRM_FORMAT_NV12) { 3120 y_data_rate = skl_plane_relative_data_rate(cstate, 3121 pstate, 3122 1); 3123 y_plane_blocks = y_minimum[id]; 3124 y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate, 3125 total_data_rate); 3126 3127 ddb->y_plane[pipe][id].start = start; 3128 ddb->y_plane[pipe][id].end = start + y_plane_blocks; 3129 3130 start += y_plane_blocks; 3131 } 3132 3133 } 3134 3135 } 3136 3137 static uint32_t skl_pipe_pixel_rate(const struct intel_crtc_state *config) 3138 { 3139 /* TODO: Take into account the scalers once we support them */ 3140 return config->base.adjusted_mode.crtc_clock; 3141 } 3142 3143 /* 3144 * The max latency should be 257 (max the punit can code is 255 and we add 2us 3145 * for the read latency) and cpp should always be <= 8, so that 3146 * should allow pixel_rate up to ~2 GHz which seems sufficient since max 3147 * 2xcdclk is 1350 MHz and the pixel rate should never exceed that. 3148 */ 3149 static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp, uint32_t latency) 3150 { 3151 uint32_t wm_intermediate_val, ret; 3152 3153 if (latency == 0) 3154 return UINT_MAX; 3155 3156 wm_intermediate_val = latency * pixel_rate * cpp / 512; 3157 ret = DIV_ROUND_UP(wm_intermediate_val, 1000); 3158 3159 return ret; 3160 } 3161 3162 static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal, 3163 uint32_t horiz_pixels, uint8_t cpp, 3164 uint64_t tiling, uint32_t latency) 3165 { 3166 uint32_t ret; 3167 uint32_t plane_bytes_per_line, plane_blocks_per_line; 3168 uint32_t wm_intermediate_val; 3169 3170 if (latency == 0) 3171 return UINT_MAX; 3172 3173 plane_bytes_per_line = horiz_pixels * cpp; 3174 3175 if (tiling == I915_FORMAT_MOD_Y_TILED || 3176 tiling == I915_FORMAT_MOD_Yf_TILED) { 3177 plane_bytes_per_line *= 4; 3178 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); 3179 plane_blocks_per_line /= 4; 3180 } else { 3181 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); 3182 } 3183 3184 wm_intermediate_val = latency * pixel_rate; 3185 ret = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000) * 3186 plane_blocks_per_line; 3187 3188 return ret; 3189 } 3190 3191 static bool skl_ddb_allocation_changed(const struct skl_ddb_allocation *new_ddb, 3192 const struct intel_crtc *intel_crtc) 3193 { 3194 struct drm_device *dev = intel_crtc->base.dev; 3195 struct drm_i915_private *dev_priv = dev->dev_private; 3196 const struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb; 3197 3198 /* 3199 * If ddb allocation of pipes changed, it may require recalculation of 3200 * watermarks 3201 */ 3202 if (memcmp(new_ddb->pipe, cur_ddb->pipe, sizeof(new_ddb->pipe))) 3203 return true; 3204 3205 return false; 3206 } 3207 3208 static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv, 3209 struct intel_crtc_state *cstate, 3210 struct intel_plane *intel_plane, 3211 uint16_t ddb_allocation, 3212 int level, 3213 uint16_t *out_blocks, /* out */ 3214 uint8_t *out_lines /* out */) 3215 { 3216 struct drm_plane *plane = &intel_plane->base; 3217 struct drm_framebuffer *fb = plane->state->fb; 3218 struct intel_plane_state *intel_pstate = 3219 to_intel_plane_state(plane->state); 3220 uint32_t latency = dev_priv->wm.skl_latency[level]; 3221 uint32_t method1, method2; 3222 uint32_t plane_bytes_per_line, plane_blocks_per_line; 3223 uint32_t res_blocks, res_lines; 3224 uint32_t selected_result; 3225 uint8_t cpp; 3226 uint32_t width = 0, height = 0; 3227 3228 if (latency == 0 || !cstate->base.active || !intel_pstate->visible) 3229 return false; 3230 3231 width = drm_rect_width(&intel_pstate->src) >> 16; 3232 height = drm_rect_height(&intel_pstate->src) >> 16; 3233 3234 if (intel_rotation_90_or_270(plane->state->rotation)) 3235 swap(width, height); 3236 3237 cpp = drm_format_plane_cpp(fb->pixel_format, 0); 3238 method1 = skl_wm_method1(skl_pipe_pixel_rate(cstate), 3239 cpp, latency); 3240 method2 = skl_wm_method2(skl_pipe_pixel_rate(cstate), 3241 cstate->base.adjusted_mode.crtc_htotal, 3242 width, 3243 cpp, 3244 fb->modifier[0], 3245 latency); 3246 3247 plane_bytes_per_line = width * cpp; 3248 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); 3249 3250 if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || 3251 fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { 3252 uint32_t min_scanlines = 4; 3253 uint32_t y_tile_minimum; 3254 if (intel_rotation_90_or_270(plane->state->rotation)) { 3255 int cpp = (fb->pixel_format == DRM_FORMAT_NV12) ? 3256 drm_format_plane_cpp(fb->pixel_format, 1) : 3257 drm_format_plane_cpp(fb->pixel_format, 0); 3258 3259 switch (cpp) { 3260 case 1: 3261 min_scanlines = 16; 3262 break; 3263 case 2: 3264 min_scanlines = 8; 3265 break; 3266 case 8: 3267 WARN(1, "Unsupported pixel depth for rotation"); 3268 } 3269 } 3270 y_tile_minimum = plane_blocks_per_line * min_scanlines; 3271 selected_result = max(method2, y_tile_minimum); 3272 } else { 3273 if ((ddb_allocation / plane_blocks_per_line) >= 1) 3274 selected_result = min(method1, method2); 3275 else 3276 selected_result = method1; 3277 } 3278 3279 res_blocks = selected_result + 1; 3280 res_lines = DIV_ROUND_UP(selected_result, plane_blocks_per_line); 3281 3282 if (level >= 1 && level <= 7) { 3283 if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || 3284 fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) 3285 res_lines += 4; 3286 else 3287 res_blocks++; 3288 } 3289 3290 if (res_blocks >= ddb_allocation || res_lines > 31) 3291 return false; 3292 3293 *out_blocks = res_blocks; 3294 *out_lines = res_lines; 3295 3296 return true; 3297 } 3298 3299 static void skl_compute_wm_level(const struct drm_i915_private *dev_priv, 3300 struct skl_ddb_allocation *ddb, 3301 struct intel_crtc_state *cstate, 3302 int level, 3303 struct skl_wm_level *result) 3304 { 3305 struct drm_device *dev = dev_priv->dev; 3306 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 3307 struct intel_plane *intel_plane; 3308 uint16_t ddb_blocks; 3309 enum i915_pipe pipe = intel_crtc->pipe; 3310 3311 for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { 3312 int i = skl_wm_plane_id(intel_plane); 3313 3314 ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][i]); 3315 3316 result->plane_en[i] = skl_compute_plane_wm(dev_priv, 3317 cstate, 3318 intel_plane, 3319 ddb_blocks, 3320 level, 3321 &result->plane_res_b[i], 3322 &result->plane_res_l[i]); 3323 } 3324 } 3325 3326 static uint32_t 3327 skl_compute_linetime_wm(struct intel_crtc_state *cstate) 3328 { 3329 if (!cstate->base.active) 3330 return 0; 3331 3332 if (WARN_ON(skl_pipe_pixel_rate(cstate) == 0)) 3333 return 0; 3334 3335 return DIV_ROUND_UP(8 * cstate->base.adjusted_mode.crtc_htotal * 1000, 3336 skl_pipe_pixel_rate(cstate)); 3337 } 3338 3339 static void skl_compute_transition_wm(struct intel_crtc_state *cstate, 3340 struct skl_wm_level *trans_wm /* out */) 3341 { 3342 struct drm_crtc *crtc = cstate->base.crtc; 3343 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3344 struct intel_plane *intel_plane; 3345 3346 if (!cstate->base.active) 3347 return; 3348 3349 /* Until we know more, just disable transition WMs */ 3350 for_each_intel_plane_on_crtc(crtc->dev, intel_crtc, intel_plane) { 3351 int i = skl_wm_plane_id(intel_plane); 3352 3353 trans_wm->plane_en[i] = false; 3354 } 3355 } 3356 3357 static void skl_compute_pipe_wm(struct intel_crtc_state *cstate, 3358 struct skl_ddb_allocation *ddb, 3359 struct skl_pipe_wm *pipe_wm) 3360 { 3361 struct drm_device *dev = cstate->base.crtc->dev; 3362 const struct drm_i915_private *dev_priv = dev->dev_private; 3363 int level, max_level = ilk_wm_max_level(dev); 3364 3365 for (level = 0; level <= max_level; level++) { 3366 skl_compute_wm_level(dev_priv, ddb, cstate, 3367 level, &pipe_wm->wm[level]); 3368 } 3369 pipe_wm->linetime = skl_compute_linetime_wm(cstate); 3370 3371 skl_compute_transition_wm(cstate, &pipe_wm->trans_wm); 3372 } 3373 3374 static void skl_compute_wm_results(struct drm_device *dev, 3375 struct skl_pipe_wm *p_wm, 3376 struct skl_wm_values *r, 3377 struct intel_crtc *intel_crtc) 3378 { 3379 int level, max_level = ilk_wm_max_level(dev); 3380 enum i915_pipe pipe = intel_crtc->pipe; 3381 uint32_t temp; 3382 int i; 3383 3384 for (level = 0; level <= max_level; level++) { 3385 for (i = 0; i < intel_num_planes(intel_crtc); i++) { 3386 temp = 0; 3387 3388 temp |= p_wm->wm[level].plane_res_l[i] << 3389 PLANE_WM_LINES_SHIFT; 3390 temp |= p_wm->wm[level].plane_res_b[i]; 3391 if (p_wm->wm[level].plane_en[i]) 3392 temp |= PLANE_WM_EN; 3393 3394 r->plane[pipe][i][level] = temp; 3395 } 3396 3397 temp = 0; 3398 3399 temp |= p_wm->wm[level].plane_res_l[PLANE_CURSOR] << PLANE_WM_LINES_SHIFT; 3400 temp |= p_wm->wm[level].plane_res_b[PLANE_CURSOR]; 3401 3402 if (p_wm->wm[level].plane_en[PLANE_CURSOR]) 3403 temp |= PLANE_WM_EN; 3404 3405 r->plane[pipe][PLANE_CURSOR][level] = temp; 3406 3407 } 3408 3409 /* transition WMs */ 3410 for (i = 0; i < intel_num_planes(intel_crtc); i++) { 3411 temp = 0; 3412 temp |= p_wm->trans_wm.plane_res_l[i] << PLANE_WM_LINES_SHIFT; 3413 temp |= p_wm->trans_wm.plane_res_b[i]; 3414 if (p_wm->trans_wm.plane_en[i]) 3415 temp |= PLANE_WM_EN; 3416 3417 r->plane_trans[pipe][i] = temp; 3418 } 3419 3420 temp = 0; 3421 temp |= p_wm->trans_wm.plane_res_l[PLANE_CURSOR] << PLANE_WM_LINES_SHIFT; 3422 temp |= p_wm->trans_wm.plane_res_b[PLANE_CURSOR]; 3423 if (p_wm->trans_wm.plane_en[PLANE_CURSOR]) 3424 temp |= PLANE_WM_EN; 3425 3426 r->plane_trans[pipe][PLANE_CURSOR] = temp; 3427 3428 r->wm_linetime[pipe] = p_wm->linetime; 3429 } 3430 3431 static void skl_ddb_entry_write(struct drm_i915_private *dev_priv, 3432 i915_reg_t reg, 3433 const struct skl_ddb_entry *entry) 3434 { 3435 if (entry->end) 3436 I915_WRITE(reg, (entry->end - 1) << 16 | entry->start); 3437 else 3438 I915_WRITE(reg, 0); 3439 } 3440 3441 static void skl_write_wm_values(struct drm_i915_private *dev_priv, 3442 const struct skl_wm_values *new) 3443 { 3444 struct drm_device *dev = dev_priv->dev; 3445 struct intel_crtc *crtc; 3446 3447 for_each_intel_crtc(dev, crtc) { 3448 int i, level, max_level = ilk_wm_max_level(dev); 3449 enum i915_pipe pipe = crtc->pipe; 3450 3451 if (!new->dirty[pipe]) 3452 continue; 3453 3454 I915_WRITE(PIPE_WM_LINETIME(pipe), new->wm_linetime[pipe]); 3455 3456 for (level = 0; level <= max_level; level++) { 3457 for (i = 0; i < intel_num_planes(crtc); i++) 3458 I915_WRITE(PLANE_WM(pipe, i, level), 3459 new->plane[pipe][i][level]); 3460 I915_WRITE(CUR_WM(pipe, level), 3461 new->plane[pipe][PLANE_CURSOR][level]); 3462 } 3463 for (i = 0; i < intel_num_planes(crtc); i++) 3464 I915_WRITE(PLANE_WM_TRANS(pipe, i), 3465 new->plane_trans[pipe][i]); 3466 I915_WRITE(CUR_WM_TRANS(pipe), 3467 new->plane_trans[pipe][PLANE_CURSOR]); 3468 3469 for (i = 0; i < intel_num_planes(crtc); i++) { 3470 skl_ddb_entry_write(dev_priv, 3471 PLANE_BUF_CFG(pipe, i), 3472 &new->ddb.plane[pipe][i]); 3473 skl_ddb_entry_write(dev_priv, 3474 PLANE_NV12_BUF_CFG(pipe, i), 3475 &new->ddb.y_plane[pipe][i]); 3476 } 3477 3478 skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe), 3479 &new->ddb.plane[pipe][PLANE_CURSOR]); 3480 } 3481 } 3482 3483 /* 3484 * When setting up a new DDB allocation arrangement, we need to correctly 3485 * sequence the times at which the new allocations for the pipes are taken into 3486 * account or we'll have pipes fetching from space previously allocated to 3487 * another pipe. 3488 * 3489 * Roughly the sequence looks like: 3490 * 1. re-allocate the pipe(s) with the allocation being reduced and not 3491 * overlapping with a previous light-up pipe (another way to put it is: 3492 * pipes with their new allocation strickly included into their old ones). 3493 * 2. re-allocate the other pipes that get their allocation reduced 3494 * 3. allocate the pipes having their allocation increased 3495 * 3496 * Steps 1. and 2. are here to take care of the following case: 3497 * - Initially DDB looks like this: 3498 * | B | C | 3499 * - enable pipe A. 3500 * - pipe B has a reduced DDB allocation that overlaps with the old pipe C 3501 * allocation 3502 * | A | B | C | 3503 * 3504 * We need to sequence the re-allocation: C, B, A (and not B, C, A). 3505 */ 3506 3507 static void 3508 skl_wm_flush_pipe(struct drm_i915_private *dev_priv, enum i915_pipe pipe, int pass) 3509 { 3510 int plane; 3511 3512 DRM_DEBUG_KMS("flush pipe %c (pass %d)\n", pipe_name(pipe), pass); 3513 3514 for_each_plane(dev_priv, pipe, plane) { 3515 I915_WRITE(PLANE_SURF(pipe, plane), 3516 I915_READ(PLANE_SURF(pipe, plane))); 3517 } 3518 I915_WRITE(CURBASE(pipe), I915_READ(CURBASE(pipe))); 3519 } 3520 3521 static bool 3522 skl_ddb_allocation_included(const struct skl_ddb_allocation *old, 3523 const struct skl_ddb_allocation *new, 3524 enum i915_pipe pipe) 3525 { 3526 uint16_t old_size, new_size; 3527 3528 old_size = skl_ddb_entry_size(&old->pipe[pipe]); 3529 new_size = skl_ddb_entry_size(&new->pipe[pipe]); 3530 3531 return old_size != new_size && 3532 new->pipe[pipe].start >= old->pipe[pipe].start && 3533 new->pipe[pipe].end <= old->pipe[pipe].end; 3534 } 3535 3536 static void skl_flush_wm_values(struct drm_i915_private *dev_priv, 3537 struct skl_wm_values *new_values) 3538 { 3539 struct drm_device *dev = dev_priv->dev; 3540 struct skl_ddb_allocation *cur_ddb, *new_ddb; 3541 bool reallocated[I915_MAX_PIPES] = {}; 3542 struct intel_crtc *crtc; 3543 enum i915_pipe pipe; 3544 3545 new_ddb = &new_values->ddb; 3546 cur_ddb = &dev_priv->wm.skl_hw.ddb; 3547 3548 /* 3549 * First pass: flush the pipes with the new allocation contained into 3550 * the old space. 3551 * 3552 * We'll wait for the vblank on those pipes to ensure we can safely 3553 * re-allocate the freed space without this pipe fetching from it. 3554 */ 3555 for_each_intel_crtc(dev, crtc) { 3556 if (!crtc->active) 3557 continue; 3558 3559 pipe = crtc->pipe; 3560 3561 if (!skl_ddb_allocation_included(cur_ddb, new_ddb, pipe)) 3562 continue; 3563 3564 skl_wm_flush_pipe(dev_priv, pipe, 1); 3565 intel_wait_for_vblank(dev, pipe); 3566 3567 reallocated[pipe] = true; 3568 } 3569 3570 3571 /* 3572 * Second pass: flush the pipes that are having their allocation 3573 * reduced, but overlapping with a previous allocation. 3574 * 3575 * Here as well we need to wait for the vblank to make sure the freed 3576 * space is not used anymore. 3577 */ 3578 for_each_intel_crtc(dev, crtc) { 3579 if (!crtc->active) 3580 continue; 3581 3582 pipe = crtc->pipe; 3583 3584 if (reallocated[pipe]) 3585 continue; 3586 3587 if (skl_ddb_entry_size(&new_ddb->pipe[pipe]) < 3588 skl_ddb_entry_size(&cur_ddb->pipe[pipe])) { 3589 skl_wm_flush_pipe(dev_priv, pipe, 2); 3590 intel_wait_for_vblank(dev, pipe); 3591 reallocated[pipe] = true; 3592 } 3593 } 3594 3595 /* 3596 * Third pass: flush the pipes that got more space allocated. 3597 * 3598 * We don't need to actively wait for the update here, next vblank 3599 * will just get more DDB space with the correct WM values. 3600 */ 3601 for_each_intel_crtc(dev, crtc) { 3602 if (!crtc->active) 3603 continue; 3604 3605 pipe = crtc->pipe; 3606 3607 /* 3608 * At this point, only the pipes more space than before are 3609 * left to re-allocate. 3610 */ 3611 if (reallocated[pipe]) 3612 continue; 3613 3614 skl_wm_flush_pipe(dev_priv, pipe, 3); 3615 } 3616 } 3617 3618 static bool skl_update_pipe_wm(struct drm_crtc *crtc, 3619 struct skl_ddb_allocation *ddb, /* out */ 3620 struct skl_pipe_wm *pipe_wm /* out */) 3621 { 3622 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3623 struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state); 3624 3625 skl_allocate_pipe_ddb(cstate, ddb); 3626 skl_compute_pipe_wm(cstate, ddb, pipe_wm); 3627 3628 if (!memcmp(&intel_crtc->wm.active.skl, pipe_wm, sizeof(*pipe_wm))) 3629 return false; 3630 3631 intel_crtc->wm.active.skl = *pipe_wm; 3632 3633 return true; 3634 } 3635 3636 static void skl_update_other_pipe_wm(struct drm_device *dev, 3637 struct drm_crtc *crtc, 3638 struct skl_wm_values *r) 3639 { 3640 struct intel_crtc *intel_crtc; 3641 struct intel_crtc *this_crtc = to_intel_crtc(crtc); 3642 3643 /* 3644 * If the WM update hasn't changed the allocation for this_crtc (the 3645 * crtc we are currently computing the new WM values for), other 3646 * enabled crtcs will keep the same allocation and we don't need to 3647 * recompute anything for them. 3648 */ 3649 if (!skl_ddb_allocation_changed(&r->ddb, this_crtc)) 3650 return; 3651 3652 /* 3653 * Otherwise, because of this_crtc being freshly enabled/disabled, the 3654 * other active pipes need new DDB allocation and WM values. 3655 */ 3656 for_each_intel_crtc(dev, intel_crtc) { 3657 struct skl_pipe_wm pipe_wm = {}; 3658 bool wm_changed; 3659 3660 if (this_crtc->pipe == intel_crtc->pipe) 3661 continue; 3662 3663 if (!intel_crtc->active) 3664 continue; 3665 3666 wm_changed = skl_update_pipe_wm(&intel_crtc->base, 3667 &r->ddb, &pipe_wm); 3668 3669 /* 3670 * If we end up re-computing the other pipe WM values, it's 3671 * because it was really needed, so we expect the WM values to 3672 * be different. 3673 */ 3674 WARN_ON(!wm_changed); 3675 3676 skl_compute_wm_results(dev, &pipe_wm, r, intel_crtc); 3677 r->dirty[intel_crtc->pipe] = true; 3678 } 3679 } 3680 3681 static void skl_clear_wm(struct skl_wm_values *watermarks, enum i915_pipe pipe) 3682 { 3683 watermarks->wm_linetime[pipe] = 0; 3684 memset(watermarks->plane[pipe], 0, 3685 sizeof(uint32_t) * 8 * I915_MAX_PLANES); 3686 memset(watermarks->plane_trans[pipe], 3687 0, sizeof(uint32_t) * I915_MAX_PLANES); 3688 watermarks->plane_trans[pipe][PLANE_CURSOR] = 0; 3689 3690 /* Clear ddb entries for pipe */ 3691 memset(&watermarks->ddb.pipe[pipe], 0, sizeof(struct skl_ddb_entry)); 3692 memset(&watermarks->ddb.plane[pipe], 0, 3693 sizeof(struct skl_ddb_entry) * I915_MAX_PLANES); 3694 memset(&watermarks->ddb.y_plane[pipe], 0, 3695 sizeof(struct skl_ddb_entry) * I915_MAX_PLANES); 3696 memset(&watermarks->ddb.plane[pipe][PLANE_CURSOR], 0, 3697 sizeof(struct skl_ddb_entry)); 3698 3699 } 3700 3701 static void skl_update_wm(struct drm_crtc *crtc) 3702 { 3703 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3704 struct drm_device *dev = crtc->dev; 3705 struct drm_i915_private *dev_priv = dev->dev_private; 3706 struct skl_wm_values *results = &dev_priv->wm.skl_results; 3707 struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state); 3708 struct skl_pipe_wm *pipe_wm = &cstate->wm.optimal.skl; 3709 3710 3711 /* Clear all dirty flags */ 3712 memset(results->dirty, 0, sizeof(bool) * I915_MAX_PIPES); 3713 3714 skl_clear_wm(results, intel_crtc->pipe); 3715 3716 if (!skl_update_pipe_wm(crtc, &results->ddb, pipe_wm)) 3717 return; 3718 3719 skl_compute_wm_results(dev, pipe_wm, results, intel_crtc); 3720 results->dirty[intel_crtc->pipe] = true; 3721 3722 skl_update_other_pipe_wm(dev, crtc, results); 3723 skl_write_wm_values(dev_priv, results); 3724 skl_flush_wm_values(dev_priv, results); 3725 3726 /* store the new configuration */ 3727 dev_priv->wm.skl_hw = *results; 3728 } 3729 3730 static void ilk_compute_wm_config(struct drm_device *dev, 3731 struct intel_wm_config *config) 3732 { 3733 struct intel_crtc *crtc; 3734 3735 /* Compute the currently _active_ config */ 3736 for_each_intel_crtc(dev, crtc) { 3737 const struct intel_pipe_wm *wm = &crtc->wm.active.ilk; 3738 3739 if (!wm->pipe_enabled) 3740 continue; 3741 3742 config->sprites_enabled |= wm->sprites_enabled; 3743 config->sprites_scaled |= wm->sprites_scaled; 3744 config->num_pipes_active++; 3745 } 3746 } 3747 3748 static void ilk_program_watermarks(struct drm_i915_private *dev_priv) 3749 { 3750 struct drm_device *dev = dev_priv->dev; 3751 struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm; 3752 struct ilk_wm_maximums max; 3753 struct intel_wm_config config = {}; 3754 struct ilk_wm_values results = {}; 3755 enum intel_ddb_partitioning partitioning; 3756 3757 ilk_compute_wm_config(dev, &config); 3758 3759 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max); 3760 ilk_wm_merge(dev, &config, &max, &lp_wm_1_2); 3761 3762 /* 5/6 split only in single pipe config on IVB+ */ 3763 if (INTEL_INFO(dev)->gen >= 7 && 3764 config.num_pipes_active == 1 && config.sprites_enabled) { 3765 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max); 3766 ilk_wm_merge(dev, &config, &max, &lp_wm_5_6); 3767 3768 best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6); 3769 } else { 3770 best_lp_wm = &lp_wm_1_2; 3771 } 3772 3773 partitioning = (best_lp_wm == &lp_wm_1_2) ? 3774 INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6; 3775 3776 ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results); 3777 3778 ilk_write_wm_values(dev_priv, &results); 3779 } 3780 3781 static void ilk_initial_watermarks(struct intel_crtc_state *cstate) 3782 { 3783 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); 3784 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 3785 3786 mutex_lock(&dev_priv->wm.wm_mutex); 3787 intel_crtc->wm.active.ilk = cstate->wm.intermediate; 3788 ilk_program_watermarks(dev_priv); 3789 mutex_unlock(&dev_priv->wm.wm_mutex); 3790 } 3791 3792 static void ilk_optimize_watermarks(struct intel_crtc_state *cstate) 3793 { 3794 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); 3795 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 3796 3797 mutex_lock(&dev_priv->wm.wm_mutex); 3798 if (cstate->wm.need_postvbl_update) { 3799 intel_crtc->wm.active.ilk = cstate->wm.optimal.ilk; 3800 ilk_program_watermarks(dev_priv); 3801 } 3802 mutex_unlock(&dev_priv->wm.wm_mutex); 3803 } 3804 3805 static void skl_pipe_wm_active_state(uint32_t val, 3806 struct skl_pipe_wm *active, 3807 bool is_transwm, 3808 bool is_cursor, 3809 int i, 3810 int level) 3811 { 3812 bool is_enabled = (val & PLANE_WM_EN) != 0; 3813 3814 if (!is_transwm) { 3815 if (!is_cursor) { 3816 active->wm[level].plane_en[i] = is_enabled; 3817 active->wm[level].plane_res_b[i] = 3818 val & PLANE_WM_BLOCKS_MASK; 3819 active->wm[level].plane_res_l[i] = 3820 (val >> PLANE_WM_LINES_SHIFT) & 3821 PLANE_WM_LINES_MASK; 3822 } else { 3823 active->wm[level].plane_en[PLANE_CURSOR] = is_enabled; 3824 active->wm[level].plane_res_b[PLANE_CURSOR] = 3825 val & PLANE_WM_BLOCKS_MASK; 3826 active->wm[level].plane_res_l[PLANE_CURSOR] = 3827 (val >> PLANE_WM_LINES_SHIFT) & 3828 PLANE_WM_LINES_MASK; 3829 } 3830 } else { 3831 if (!is_cursor) { 3832 active->trans_wm.plane_en[i] = is_enabled; 3833 active->trans_wm.plane_res_b[i] = 3834 val & PLANE_WM_BLOCKS_MASK; 3835 active->trans_wm.plane_res_l[i] = 3836 (val >> PLANE_WM_LINES_SHIFT) & 3837 PLANE_WM_LINES_MASK; 3838 } else { 3839 active->trans_wm.plane_en[PLANE_CURSOR] = is_enabled; 3840 active->trans_wm.plane_res_b[PLANE_CURSOR] = 3841 val & PLANE_WM_BLOCKS_MASK; 3842 active->trans_wm.plane_res_l[PLANE_CURSOR] = 3843 (val >> PLANE_WM_LINES_SHIFT) & 3844 PLANE_WM_LINES_MASK; 3845 } 3846 } 3847 } 3848 3849 static void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc) 3850 { 3851 struct drm_device *dev = crtc->dev; 3852 struct drm_i915_private *dev_priv = dev->dev_private; 3853 struct skl_wm_values *hw = &dev_priv->wm.skl_hw; 3854 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3855 struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state); 3856 struct skl_pipe_wm *active = &cstate->wm.optimal.skl; 3857 enum i915_pipe pipe = intel_crtc->pipe; 3858 int level, i, max_level; 3859 uint32_t temp; 3860 3861 max_level = ilk_wm_max_level(dev); 3862 3863 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe)); 3864 3865 for (level = 0; level <= max_level; level++) { 3866 for (i = 0; i < intel_num_planes(intel_crtc); i++) 3867 hw->plane[pipe][i][level] = 3868 I915_READ(PLANE_WM(pipe, i, level)); 3869 hw->plane[pipe][PLANE_CURSOR][level] = I915_READ(CUR_WM(pipe, level)); 3870 } 3871 3872 for (i = 0; i < intel_num_planes(intel_crtc); i++) 3873 hw->plane_trans[pipe][i] = I915_READ(PLANE_WM_TRANS(pipe, i)); 3874 hw->plane_trans[pipe][PLANE_CURSOR] = I915_READ(CUR_WM_TRANS(pipe)); 3875 3876 if (!intel_crtc->active) 3877 return; 3878 3879 hw->dirty[pipe] = true; 3880 3881 active->linetime = hw->wm_linetime[pipe]; 3882 3883 for (level = 0; level <= max_level; level++) { 3884 for (i = 0; i < intel_num_planes(intel_crtc); i++) { 3885 temp = hw->plane[pipe][i][level]; 3886 skl_pipe_wm_active_state(temp, active, false, 3887 false, i, level); 3888 } 3889 temp = hw->plane[pipe][PLANE_CURSOR][level]; 3890 skl_pipe_wm_active_state(temp, active, false, true, i, level); 3891 } 3892 3893 for (i = 0; i < intel_num_planes(intel_crtc); i++) { 3894 temp = hw->plane_trans[pipe][i]; 3895 skl_pipe_wm_active_state(temp, active, true, false, i, 0); 3896 } 3897 3898 temp = hw->plane_trans[pipe][PLANE_CURSOR]; 3899 skl_pipe_wm_active_state(temp, active, true, true, i, 0); 3900 3901 intel_crtc->wm.active.skl = *active; 3902 } 3903 3904 void skl_wm_get_hw_state(struct drm_device *dev) 3905 { 3906 struct drm_i915_private *dev_priv = dev->dev_private; 3907 struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb; 3908 struct drm_crtc *crtc; 3909 3910 skl_ddb_get_hw_state(dev_priv, ddb); 3911 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) 3912 skl_pipe_wm_get_hw_state(crtc); 3913 } 3914 3915 static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc) 3916 { 3917 struct drm_device *dev = crtc->dev; 3918 struct drm_i915_private *dev_priv = dev->dev_private; 3919 struct ilk_wm_values *hw = &dev_priv->wm.hw; 3920 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3921 struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state); 3922 struct intel_pipe_wm *active = &cstate->wm.optimal.ilk; 3923 enum i915_pipe pipe = intel_crtc->pipe; 3924 static const i915_reg_t wm0_pipe_reg[] = { 3925 [PIPE_A] = WM0_PIPEA_ILK, 3926 [PIPE_B] = WM0_PIPEB_ILK, 3927 [PIPE_C] = WM0_PIPEC_IVB, 3928 }; 3929 3930 hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]); 3931 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 3932 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe)); 3933 3934 memset(active, 0, sizeof(*active)); 3935 3936 active->pipe_enabled = intel_crtc->active; 3937 3938 if (active->pipe_enabled) { 3939 u32 tmp = hw->wm_pipe[pipe]; 3940 3941 /* 3942 * For active pipes LP0 watermark is marked as 3943 * enabled, and LP1+ watermaks as disabled since 3944 * we can't really reverse compute them in case 3945 * multiple pipes are active. 3946 */ 3947 active->wm[0].enable = true; 3948 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT; 3949 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT; 3950 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK; 3951 active->linetime = hw->wm_linetime[pipe]; 3952 } else { 3953 int level, max_level = ilk_wm_max_level(dev); 3954 3955 /* 3956 * For inactive pipes, all watermark levels 3957 * should be marked as enabled but zeroed, 3958 * which is what we'd compute them to. 3959 */ 3960 for (level = 0; level <= max_level; level++) 3961 active->wm[level].enable = true; 3962 } 3963 3964 intel_crtc->wm.active.ilk = *active; 3965 } 3966 3967 #define _FW_WM(value, plane) \ 3968 (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT) 3969 #define _FW_WM_VLV(value, plane) \ 3970 (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT) 3971 3972 static void vlv_read_wm_values(struct drm_i915_private *dev_priv, 3973 struct vlv_wm_values *wm) 3974 { 3975 enum i915_pipe pipe; 3976 uint32_t tmp; 3977 3978 for_each_pipe(dev_priv, pipe) { 3979 tmp = I915_READ(VLV_DDL(pipe)); 3980 3981 wm->ddl[pipe].primary = 3982 (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK); 3983 wm->ddl[pipe].cursor = 3984 (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK); 3985 wm->ddl[pipe].sprite[0] = 3986 (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK); 3987 wm->ddl[pipe].sprite[1] = 3988 (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK); 3989 } 3990 3991 tmp = I915_READ(DSPFW1); 3992 wm->sr.plane = _FW_WM(tmp, SR); 3993 wm->pipe[PIPE_B].cursor = _FW_WM(tmp, CURSORB); 3994 wm->pipe[PIPE_B].primary = _FW_WM_VLV(tmp, PLANEB); 3995 wm->pipe[PIPE_A].primary = _FW_WM_VLV(tmp, PLANEA); 3996 3997 tmp = I915_READ(DSPFW2); 3998 wm->pipe[PIPE_A].sprite[1] = _FW_WM_VLV(tmp, SPRITEB); 3999 wm->pipe[PIPE_A].cursor = _FW_WM(tmp, CURSORA); 4000 wm->pipe[PIPE_A].sprite[0] = _FW_WM_VLV(tmp, SPRITEA); 4001 4002 tmp = I915_READ(DSPFW3); 4003 wm->sr.cursor = _FW_WM(tmp, CURSOR_SR); 4004 4005 if (IS_CHERRYVIEW(dev_priv)) { 4006 tmp = I915_READ(DSPFW7_CHV); 4007 wm->pipe[PIPE_B].sprite[1] = _FW_WM_VLV(tmp, SPRITED); 4008 wm->pipe[PIPE_B].sprite[0] = _FW_WM_VLV(tmp, SPRITEC); 4009 4010 tmp = I915_READ(DSPFW8_CHV); 4011 wm->pipe[PIPE_C].sprite[1] = _FW_WM_VLV(tmp, SPRITEF); 4012 wm->pipe[PIPE_C].sprite[0] = _FW_WM_VLV(tmp, SPRITEE); 4013 4014 tmp = I915_READ(DSPFW9_CHV); 4015 wm->pipe[PIPE_C].primary = _FW_WM_VLV(tmp, PLANEC); 4016 wm->pipe[PIPE_C].cursor = _FW_WM(tmp, CURSORC); 4017 4018 tmp = I915_READ(DSPHOWM); 4019 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9; 4020 wm->pipe[PIPE_C].sprite[1] |= _FW_WM(tmp, SPRITEF_HI) << 8; 4021 wm->pipe[PIPE_C].sprite[0] |= _FW_WM(tmp, SPRITEE_HI) << 8; 4022 wm->pipe[PIPE_C].primary |= _FW_WM(tmp, PLANEC_HI) << 8; 4023 wm->pipe[PIPE_B].sprite[1] |= _FW_WM(tmp, SPRITED_HI) << 8; 4024 wm->pipe[PIPE_B].sprite[0] |= _FW_WM(tmp, SPRITEC_HI) << 8; 4025 wm->pipe[PIPE_B].primary |= _FW_WM(tmp, PLANEB_HI) << 8; 4026 wm->pipe[PIPE_A].sprite[1] |= _FW_WM(tmp, SPRITEB_HI) << 8; 4027 wm->pipe[PIPE_A].sprite[0] |= _FW_WM(tmp, SPRITEA_HI) << 8; 4028 wm->pipe[PIPE_A].primary |= _FW_WM(tmp, PLANEA_HI) << 8; 4029 } else { 4030 tmp = I915_READ(DSPFW7); 4031 wm->pipe[PIPE_B].sprite[1] = _FW_WM_VLV(tmp, SPRITED); 4032 wm->pipe[PIPE_B].sprite[0] = _FW_WM_VLV(tmp, SPRITEC); 4033 4034 tmp = I915_READ(DSPHOWM); 4035 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9; 4036 wm->pipe[PIPE_B].sprite[1] |= _FW_WM(tmp, SPRITED_HI) << 8; 4037 wm->pipe[PIPE_B].sprite[0] |= _FW_WM(tmp, SPRITEC_HI) << 8; 4038 wm->pipe[PIPE_B].primary |= _FW_WM(tmp, PLANEB_HI) << 8; 4039 wm->pipe[PIPE_A].sprite[1] |= _FW_WM(tmp, SPRITEB_HI) << 8; 4040 wm->pipe[PIPE_A].sprite[0] |= _FW_WM(tmp, SPRITEA_HI) << 8; 4041 wm->pipe[PIPE_A].primary |= _FW_WM(tmp, PLANEA_HI) << 8; 4042 } 4043 } 4044 4045 #undef _FW_WM 4046 #undef _FW_WM_VLV 4047 4048 void vlv_wm_get_hw_state(struct drm_device *dev) 4049 { 4050 struct drm_i915_private *dev_priv = to_i915(dev); 4051 struct vlv_wm_values *wm = &dev_priv->wm.vlv; 4052 struct intel_plane *plane; 4053 enum i915_pipe pipe; 4054 u32 val; 4055 4056 vlv_read_wm_values(dev_priv, wm); 4057 4058 for_each_intel_plane(dev, plane) { 4059 switch (plane->base.type) { 4060 int sprite; 4061 case DRM_PLANE_TYPE_CURSOR: 4062 plane->wm.fifo_size = 63; 4063 break; 4064 case DRM_PLANE_TYPE_PRIMARY: 4065 plane->wm.fifo_size = vlv_get_fifo_size(dev, plane->pipe, 0); 4066 break; 4067 case DRM_PLANE_TYPE_OVERLAY: 4068 sprite = plane->plane; 4069 plane->wm.fifo_size = vlv_get_fifo_size(dev, plane->pipe, sprite + 1); 4070 break; 4071 } 4072 } 4073 4074 wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN; 4075 wm->level = VLV_WM_LEVEL_PM2; 4076 4077 if (IS_CHERRYVIEW(dev_priv)) { 4078 mutex_lock(&dev_priv->rps.hw_lock); 4079 4080 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); 4081 if (val & DSP_MAXFIFO_PM5_ENABLE) 4082 wm->level = VLV_WM_LEVEL_PM5; 4083 4084 /* 4085 * If DDR DVFS is disabled in the BIOS, Punit 4086 * will never ack the request. So if that happens 4087 * assume we don't have to enable/disable DDR DVFS 4088 * dynamically. To test that just set the REQ_ACK 4089 * bit to poke the Punit, but don't change the 4090 * HIGH/LOW bits so that we don't actually change 4091 * the current state. 4092 */ 4093 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); 4094 val |= FORCE_DDR_FREQ_REQ_ACK; 4095 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val); 4096 4097 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) & 4098 FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) { 4099 DRM_DEBUG_KMS("Punit not acking DDR DVFS request, " 4100 "assuming DDR DVFS is disabled\n"); 4101 dev_priv->wm.max_level = VLV_WM_LEVEL_PM5; 4102 } else { 4103 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); 4104 if ((val & FORCE_DDR_HIGH_FREQ) == 0) 4105 wm->level = VLV_WM_LEVEL_DDR_DVFS; 4106 } 4107 4108 mutex_unlock(&dev_priv->rps.hw_lock); 4109 } 4110 4111 for_each_pipe(dev_priv, pipe) 4112 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n", 4113 pipe_name(pipe), wm->pipe[pipe].primary, wm->pipe[pipe].cursor, 4114 wm->pipe[pipe].sprite[0], wm->pipe[pipe].sprite[1]); 4115 4116 DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n", 4117 wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr); 4118 } 4119 4120 void ilk_wm_get_hw_state(struct drm_device *dev) 4121 { 4122 struct drm_i915_private *dev_priv = dev->dev_private; 4123 struct ilk_wm_values *hw = &dev_priv->wm.hw; 4124 struct drm_crtc *crtc; 4125 4126 for_each_crtc(dev, crtc) 4127 ilk_pipe_wm_get_hw_state(crtc); 4128 4129 hw->wm_lp[0] = I915_READ(WM1_LP_ILK); 4130 hw->wm_lp[1] = I915_READ(WM2_LP_ILK); 4131 hw->wm_lp[2] = I915_READ(WM3_LP_ILK); 4132 4133 hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK); 4134 if (INTEL_INFO(dev)->gen >= 7) { 4135 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB); 4136 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB); 4137 } 4138 4139 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 4140 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ? 4141 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2; 4142 else if (IS_IVYBRIDGE(dev)) 4143 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ? 4144 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2; 4145 4146 hw->enable_fbc_wm = 4147 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS); 4148 } 4149 4150 /** 4151 * intel_update_watermarks - update FIFO watermark values based on current modes 4152 * 4153 * Calculate watermark values for the various WM regs based on current mode 4154 * and plane configuration. 4155 * 4156 * There are several cases to deal with here: 4157 * - normal (i.e. non-self-refresh) 4158 * - self-refresh (SR) mode 4159 * - lines are large relative to FIFO size (buffer can hold up to 2) 4160 * - lines are small relative to FIFO size (buffer can hold more than 2 4161 * lines), so need to account for TLB latency 4162 * 4163 * The normal calculation is: 4164 * watermark = dotclock * bytes per pixel * latency 4165 * where latency is platform & configuration dependent (we assume pessimal 4166 * values here). 4167 * 4168 * The SR calculation is: 4169 * watermark = (trunc(latency/line time)+1) * surface width * 4170 * bytes per pixel 4171 * where 4172 * line time = htotal / dotclock 4173 * surface width = hdisplay for normal plane and 64 for cursor 4174 * and latency is assumed to be high, as above. 4175 * 4176 * The final value programmed to the register should always be rounded up, 4177 * and include an extra 2 entries to account for clock crossings. 4178 * 4179 * We don't use the sprite, so we can ignore that. And on Crestline we have 4180 * to set the non-SR watermarks to 8. 4181 */ 4182 void intel_update_watermarks(struct drm_crtc *crtc) 4183 { 4184 struct drm_i915_private *dev_priv = crtc->dev->dev_private; 4185 4186 if (dev_priv->display.update_wm) 4187 dev_priv->display.update_wm(crtc); 4188 } 4189 4190 /* 4191 * Lock protecting IPS related data structures 4192 */ 4193 struct lock mchdev_lock; 4194 LOCK_SYSINIT(mchdev, &mchdev_lock, "mchdev", LK_CANRECURSE); 4195 4196 /* Global for IPS driver to get at the current i915 device. Protected by 4197 * mchdev_lock. */ 4198 static struct drm_i915_private *i915_mch_dev; 4199 4200 bool ironlake_set_drps(struct drm_device *dev, u8 val) 4201 { 4202 struct drm_i915_private *dev_priv = dev->dev_private; 4203 u16 rgvswctl; 4204 4205 assert_spin_locked(&mchdev_lock); 4206 4207 rgvswctl = I915_READ16(MEMSWCTL); 4208 if (rgvswctl & MEMCTL_CMD_STS) { 4209 DRM_DEBUG("gpu busy, RCS change rejected\n"); 4210 return false; /* still busy with another command */ 4211 } 4212 4213 rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | 4214 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM; 4215 I915_WRITE16(MEMSWCTL, rgvswctl); 4216 POSTING_READ16(MEMSWCTL); 4217 4218 rgvswctl |= MEMCTL_CMD_STS; 4219 I915_WRITE16(MEMSWCTL, rgvswctl); 4220 4221 return true; 4222 } 4223 4224 static void ironlake_enable_drps(struct drm_device *dev) 4225 { 4226 struct drm_i915_private *dev_priv = dev->dev_private; 4227 u32 rgvmodectl; 4228 u8 fmax, fmin, fstart, vstart; 4229 4230 spin_lock_irq(&mchdev_lock); 4231 4232 rgvmodectl = I915_READ(MEMMODECTL); 4233 4234 /* Enable temp reporting */ 4235 I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN); 4236 I915_WRITE16(TSC1, I915_READ(TSC1) | TSE); 4237 4238 /* 100ms RC evaluation intervals */ 4239 I915_WRITE(RCUPEI, 100000); 4240 I915_WRITE(RCDNEI, 100000); 4241 4242 /* Set max/min thresholds to 90ms and 80ms respectively */ 4243 I915_WRITE(RCBMAXAVG, 90000); 4244 I915_WRITE(RCBMINAVG, 80000); 4245 4246 I915_WRITE(MEMIHYST, 1); 4247 4248 /* Set up min, max, and cur for interrupt handling */ 4249 fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; 4250 fmin = (rgvmodectl & MEMMODE_FMIN_MASK); 4251 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> 4252 MEMMODE_FSTART_SHIFT; 4253 4254 vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >> 4255 PXVFREQ_PX_SHIFT; 4256 4257 dev_priv->ips.fmax = fmax; /* IPS callback will increase this */ 4258 dev_priv->ips.fstart = fstart; 4259 4260 dev_priv->ips.max_delay = fstart; 4261 dev_priv->ips.min_delay = fmin; 4262 dev_priv->ips.cur_delay = fstart; 4263 4264 DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", 4265 fmax, fmin, fstart); 4266 4267 I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); 4268 4269 /* 4270 * Interrupts will be enabled in ironlake_irq_postinstall 4271 */ 4272 4273 I915_WRITE(VIDSTART, vstart); 4274 POSTING_READ(VIDSTART); 4275 4276 rgvmodectl |= MEMMODE_SWMODE_EN; 4277 I915_WRITE(MEMMODECTL, rgvmodectl); 4278 4279 if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10)) 4280 DRM_ERROR("stuck trying to change perf mode\n"); 4281 mdelay(1); 4282 4283 ironlake_set_drps(dev, fstart); 4284 4285 dev_priv->ips.last_count1 = I915_READ(DMIEC) + 4286 I915_READ(DDREC) + I915_READ(CSIEC); 4287 dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies); 4288 dev_priv->ips.last_count2 = I915_READ(GFXEC); 4289 dev_priv->ips.last_time2 = ktime_get_raw_ns(); 4290 4291 spin_unlock_irq(&mchdev_lock); 4292 } 4293 4294 static void ironlake_disable_drps(struct drm_device *dev) 4295 { 4296 struct drm_i915_private *dev_priv = dev->dev_private; 4297 u16 rgvswctl; 4298 4299 spin_lock_irq(&mchdev_lock); 4300 4301 rgvswctl = I915_READ16(MEMSWCTL); 4302 4303 /* Ack interrupts, disable EFC interrupt */ 4304 I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN); 4305 I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG); 4306 I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT); 4307 I915_WRITE(DEIIR, DE_PCU_EVENT); 4308 I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT); 4309 4310 /* Go back to the starting frequency */ 4311 ironlake_set_drps(dev, dev_priv->ips.fstart); 4312 mdelay(1); 4313 rgvswctl |= MEMCTL_CMD_STS; 4314 I915_WRITE(MEMSWCTL, rgvswctl); 4315 mdelay(1); 4316 4317 spin_unlock_irq(&mchdev_lock); 4318 } 4319 4320 /* There's a funny hw issue where the hw returns all 0 when reading from 4321 * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value 4322 * ourselves, instead of doing a rmw cycle (which might result in us clearing 4323 * all limits and the gpu stuck at whatever frequency it is at atm). 4324 */ 4325 static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) 4326 { 4327 u32 limits; 4328 4329 /* Only set the down limit when we've reached the lowest level to avoid 4330 * getting more interrupts, otherwise leave this clear. This prevents a 4331 * race in the hw when coming out of rc6: There's a tiny window where 4332 * the hw runs at the minimal clock before selecting the desired 4333 * frequency, if the down threshold expires in that window we will not 4334 * receive a down interrupt. */ 4335 if (IS_GEN9(dev_priv)) { 4336 limits = (dev_priv->rps.max_freq_softlimit) << 23; 4337 if (val <= dev_priv->rps.min_freq_softlimit) 4338 limits |= (dev_priv->rps.min_freq_softlimit) << 14; 4339 } else { 4340 limits = dev_priv->rps.max_freq_softlimit << 24; 4341 if (val <= dev_priv->rps.min_freq_softlimit) 4342 limits |= dev_priv->rps.min_freq_softlimit << 16; 4343 } 4344 4345 return limits; 4346 } 4347 4348 static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) 4349 { 4350 int new_power; 4351 u32 threshold_up = 0, threshold_down = 0; /* in % */ 4352 u32 ei_up = 0, ei_down = 0; 4353 4354 new_power = dev_priv->rps.power; 4355 switch (dev_priv->rps.power) { 4356 case LOW_POWER: 4357 if (val > dev_priv->rps.efficient_freq + 1 && val > dev_priv->rps.cur_freq) 4358 new_power = BETWEEN; 4359 break; 4360 4361 case BETWEEN: 4362 if (val <= dev_priv->rps.efficient_freq && val < dev_priv->rps.cur_freq) 4363 new_power = LOW_POWER; 4364 else if (val >= dev_priv->rps.rp0_freq && val > dev_priv->rps.cur_freq) 4365 new_power = HIGH_POWER; 4366 break; 4367 4368 case HIGH_POWER: 4369 if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && val < dev_priv->rps.cur_freq) 4370 new_power = BETWEEN; 4371 break; 4372 } 4373 /* Max/min bins are special */ 4374 if (val <= dev_priv->rps.min_freq_softlimit) 4375 new_power = LOW_POWER; 4376 if (val >= dev_priv->rps.max_freq_softlimit) 4377 new_power = HIGH_POWER; 4378 if (new_power == dev_priv->rps.power) 4379 return; 4380 4381 /* Note the units here are not exactly 1us, but 1280ns. */ 4382 switch (new_power) { 4383 case LOW_POWER: 4384 /* Upclock if more than 95% busy over 16ms */ 4385 ei_up = 16000; 4386 threshold_up = 95; 4387 4388 /* Downclock if less than 85% busy over 32ms */ 4389 ei_down = 32000; 4390 threshold_down = 85; 4391 break; 4392 4393 case BETWEEN: 4394 /* Upclock if more than 90% busy over 13ms */ 4395 ei_up = 13000; 4396 threshold_up = 90; 4397 4398 /* Downclock if less than 75% busy over 32ms */ 4399 ei_down = 32000; 4400 threshold_down = 75; 4401 break; 4402 4403 case HIGH_POWER: 4404 /* Upclock if more than 85% busy over 10ms */ 4405 ei_up = 10000; 4406 threshold_up = 85; 4407 4408 /* Downclock if less than 60% busy over 32ms */ 4409 ei_down = 32000; 4410 threshold_down = 60; 4411 break; 4412 } 4413 4414 I915_WRITE(GEN6_RP_UP_EI, 4415 GT_INTERVAL_FROM_US(dev_priv, ei_up)); 4416 I915_WRITE(GEN6_RP_UP_THRESHOLD, 4417 GT_INTERVAL_FROM_US(dev_priv, (ei_up * threshold_up / 100))); 4418 4419 I915_WRITE(GEN6_RP_DOWN_EI, 4420 GT_INTERVAL_FROM_US(dev_priv, ei_down)); 4421 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 4422 GT_INTERVAL_FROM_US(dev_priv, (ei_down * threshold_down / 100))); 4423 4424 I915_WRITE(GEN6_RP_CONTROL, 4425 GEN6_RP_MEDIA_TURBO | 4426 GEN6_RP_MEDIA_HW_NORMAL_MODE | 4427 GEN6_RP_MEDIA_IS_GFX | 4428 GEN6_RP_ENABLE | 4429 GEN6_RP_UP_BUSY_AVG | 4430 GEN6_RP_DOWN_IDLE_AVG); 4431 4432 dev_priv->rps.power = new_power; 4433 dev_priv->rps.up_threshold = threshold_up; 4434 dev_priv->rps.down_threshold = threshold_down; 4435 dev_priv->rps.last_adj = 0; 4436 } 4437 4438 static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) 4439 { 4440 u32 mask = 0; 4441 4442 if (val > dev_priv->rps.min_freq_softlimit) 4443 mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; 4444 if (val < dev_priv->rps.max_freq_softlimit) 4445 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; 4446 4447 mask &= dev_priv->pm_rps_events; 4448 4449 return gen6_sanitize_rps_pm_mask(dev_priv, ~mask); 4450 } 4451 4452 /* gen6_set_rps is called to update the frequency request, but should also be 4453 * called when the range (min_delay and max_delay) is modified so that we can 4454 * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ 4455 static void gen6_set_rps(struct drm_device *dev, u8 val) 4456 { 4457 struct drm_i915_private *dev_priv = dev->dev_private; 4458 4459 /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */ 4460 if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) 4461 return; 4462 4463 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 4464 WARN_ON(val > dev_priv->rps.max_freq); 4465 WARN_ON(val < dev_priv->rps.min_freq); 4466 4467 /* min/max delay may still have been modified so be sure to 4468 * write the limits value. 4469 */ 4470 if (val != dev_priv->rps.cur_freq) { 4471 gen6_set_rps_thresholds(dev_priv, val); 4472 4473 if (IS_GEN9(dev)) 4474 I915_WRITE(GEN6_RPNSWREQ, 4475 GEN9_FREQUENCY(val)); 4476 else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 4477 I915_WRITE(GEN6_RPNSWREQ, 4478 HSW_FREQUENCY(val)); 4479 else 4480 I915_WRITE(GEN6_RPNSWREQ, 4481 GEN6_FREQUENCY(val) | 4482 GEN6_OFFSET(0) | 4483 GEN6_AGGRESSIVE_TURBO); 4484 } 4485 4486 /* Make sure we continue to get interrupts 4487 * until we hit the minimum or maximum frequencies. 4488 */ 4489 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val)); 4490 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); 4491 4492 POSTING_READ(GEN6_RPNSWREQ); 4493 4494 dev_priv->rps.cur_freq = val; 4495 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); 4496 } 4497 4498 static void valleyview_set_rps(struct drm_device *dev, u8 val) 4499 { 4500 struct drm_i915_private *dev_priv = dev->dev_private; 4501 4502 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 4503 WARN_ON(val > dev_priv->rps.max_freq); 4504 WARN_ON(val < dev_priv->rps.min_freq); 4505 4506 if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1), 4507 "Odd GPU freq value\n")) 4508 val &= ~1; 4509 4510 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); 4511 4512 if (val != dev_priv->rps.cur_freq) { 4513 vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); 4514 if (!IS_CHERRYVIEW(dev_priv)) 4515 gen6_set_rps_thresholds(dev_priv, val); 4516 } 4517 4518 dev_priv->rps.cur_freq = val; 4519 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); 4520 } 4521 4522 /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down 4523 * 4524 * * If Gfx is Idle, then 4525 * 1. Forcewake Media well. 4526 * 2. Request idle freq. 4527 * 3. Release Forcewake of Media well. 4528 */ 4529 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) 4530 { 4531 u32 val = dev_priv->rps.idle_freq; 4532 4533 if (dev_priv->rps.cur_freq <= val) 4534 return; 4535 4536 /* Wake up the media well, as that takes a lot less 4537 * power than the Render well. */ 4538 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA); 4539 valleyview_set_rps(dev_priv->dev, val); 4540 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA); 4541 } 4542 4543 void gen6_rps_busy(struct drm_i915_private *dev_priv) 4544 { 4545 mutex_lock(&dev_priv->rps.hw_lock); 4546 if (dev_priv->rps.enabled) { 4547 if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) 4548 gen6_rps_reset_ei(dev_priv); 4549 I915_WRITE(GEN6_PMINTRMSK, 4550 gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); 4551 } 4552 mutex_unlock(&dev_priv->rps.hw_lock); 4553 } 4554 4555 void gen6_rps_idle(struct drm_i915_private *dev_priv) 4556 { 4557 struct drm_device *dev = dev_priv->dev; 4558 4559 mutex_lock(&dev_priv->rps.hw_lock); 4560 if (dev_priv->rps.enabled) { 4561 if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) 4562 vlv_set_rps_idle(dev_priv); 4563 else 4564 gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq); 4565 dev_priv->rps.last_adj = 0; 4566 I915_WRITE(GEN6_PMINTRMSK, 4567 gen6_sanitize_rps_pm_mask(dev_priv, ~0)); 4568 } 4569 mutex_unlock(&dev_priv->rps.hw_lock); 4570 4571 lockmgr(&dev_priv->rps.client_lock, LK_EXCLUSIVE); 4572 while (!list_empty(&dev_priv->rps.clients)) 4573 list_del_init(dev_priv->rps.clients.next); 4574 lockmgr(&dev_priv->rps.client_lock, LK_RELEASE); 4575 } 4576 4577 void gen6_rps_boost(struct drm_i915_private *dev_priv, 4578 struct intel_rps_client *rps, 4579 unsigned long submitted) 4580 { 4581 /* This is intentionally racy! We peek at the state here, then 4582 * validate inside the RPS worker. 4583 */ 4584 if (!(dev_priv->mm.busy && 4585 dev_priv->rps.enabled && 4586 dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)) 4587 return; 4588 4589 /* Force a RPS boost (and don't count it against the client) if 4590 * the GPU is severely congested. 4591 */ 4592 if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES)) 4593 rps = NULL; 4594 4595 lockmgr(&dev_priv->rps.client_lock, LK_EXCLUSIVE); 4596 if (rps == NULL || list_empty(&rps->link)) { 4597 spin_lock_irq(&dev_priv->irq_lock); 4598 if (dev_priv->rps.interrupts_enabled) { 4599 dev_priv->rps.client_boost = true; 4600 queue_work(dev_priv->wq, &dev_priv->rps.work); 4601 } 4602 spin_unlock_irq(&dev_priv->irq_lock); 4603 4604 if (rps != NULL) { 4605 list_add(&rps->link, &dev_priv->rps.clients); 4606 rps->boosts++; 4607 } else 4608 dev_priv->rps.boosts++; 4609 } 4610 lockmgr(&dev_priv->rps.client_lock, LK_RELEASE); 4611 } 4612 4613 void intel_set_rps(struct drm_device *dev, u8 val) 4614 { 4615 if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) 4616 valleyview_set_rps(dev, val); 4617 else 4618 gen6_set_rps(dev, val); 4619 } 4620 4621 static void gen9_disable_rc6(struct drm_device *dev) 4622 { 4623 struct drm_i915_private *dev_priv = dev->dev_private; 4624 4625 I915_WRITE(GEN6_RC_CONTROL, 0); 4626 I915_WRITE(GEN9_PG_ENABLE, 0); 4627 } 4628 4629 static void gen9_disable_rps(struct drm_device *dev) 4630 { 4631 struct drm_i915_private *dev_priv = dev->dev_private; 4632 4633 I915_WRITE(GEN6_RP_CONTROL, 0); 4634 } 4635 4636 static void gen6_disable_rps(struct drm_device *dev) 4637 { 4638 struct drm_i915_private *dev_priv = dev->dev_private; 4639 4640 I915_WRITE(GEN6_RC_CONTROL, 0); 4641 I915_WRITE(GEN6_RPNSWREQ, 1 << 31); 4642 I915_WRITE(GEN6_RP_CONTROL, 0); 4643 } 4644 4645 static void cherryview_disable_rps(struct drm_device *dev) 4646 { 4647 struct drm_i915_private *dev_priv = dev->dev_private; 4648 4649 I915_WRITE(GEN6_RC_CONTROL, 0); 4650 } 4651 4652 static void valleyview_disable_rps(struct drm_device *dev) 4653 { 4654 struct drm_i915_private *dev_priv = dev->dev_private; 4655 4656 /* we're doing forcewake before Disabling RC6, 4657 * This what the BIOS expects when going into suspend */ 4658 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4659 4660 I915_WRITE(GEN6_RC_CONTROL, 0); 4661 4662 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4663 } 4664 4665 static void intel_print_rc6_info(struct drm_device *dev, u32 mode) 4666 { 4667 if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { 4668 if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1))) 4669 mode = GEN6_RC_CTL_RC6_ENABLE; 4670 else 4671 mode = 0; 4672 } 4673 if (HAS_RC6p(dev)) 4674 DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s RC6p %s RC6pp %s\n", 4675 onoff(mode & GEN6_RC_CTL_RC6_ENABLE), 4676 onoff(mode & GEN6_RC_CTL_RC6p_ENABLE), 4677 onoff(mode & GEN6_RC_CTL_RC6pp_ENABLE)); 4678 4679 else 4680 DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s\n", 4681 onoff(mode & GEN6_RC_CTL_RC6_ENABLE)); 4682 } 4683 4684 static bool bxt_check_bios_rc6_setup(const struct drm_device *dev) 4685 { 4686 struct drm_i915_private *dev_priv = to_i915(dev); 4687 struct i915_ggtt *ggtt = &dev_priv->ggtt; 4688 bool enable_rc6 = true; 4689 unsigned long rc6_ctx_base; 4690 4691 if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) { 4692 DRM_DEBUG_KMS("RC6 Base location not set properly.\n"); 4693 enable_rc6 = false; 4694 } 4695 4696 /* 4697 * The exact context size is not known for BXT, so assume a page size 4698 * for this check. 4699 */ 4700 rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK; 4701 if (!((rc6_ctx_base >= ggtt->stolen_reserved_base) && 4702 (rc6_ctx_base + PAGE_SIZE <= ggtt->stolen_reserved_base + 4703 ggtt->stolen_reserved_size))) { 4704 DRM_DEBUG_KMS("RC6 Base address not as expected.\n"); 4705 enable_rc6 = false; 4706 } 4707 4708 if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) && 4709 ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) && 4710 ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) && 4711 ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) { 4712 DRM_DEBUG_KMS("Engine Idle wait time not set properly.\n"); 4713 enable_rc6 = false; 4714 } 4715 4716 if (!(I915_READ(GEN6_RC_CONTROL) & (GEN6_RC_CTL_RC6_ENABLE | 4717 GEN6_RC_CTL_HW_ENABLE)) && 4718 ((I915_READ(GEN6_RC_CONTROL) & GEN6_RC_CTL_HW_ENABLE) || 4719 !(I915_READ(GEN6_RC_STATE) & RC6_STATE))) { 4720 DRM_DEBUG_KMS("HW/SW RC6 is not enabled by BIOS.\n"); 4721 enable_rc6 = false; 4722 } 4723 4724 return enable_rc6; 4725 } 4726 4727 int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6) 4728 { 4729 /* No RC6 before Ironlake and code is gone for ilk. */ 4730 if (INTEL_INFO(dev)->gen < 6) 4731 return 0; 4732 4733 if (!enable_rc6) 4734 return 0; 4735 4736 if (IS_BROXTON(dev) && !bxt_check_bios_rc6_setup(dev)) { 4737 DRM_INFO("RC6 disabled by BIOS\n"); 4738 return 0; 4739 } 4740 4741 /* Respect the kernel parameter if it is set */ 4742 if (enable_rc6 >= 0) { 4743 int mask; 4744 4745 if (HAS_RC6p(dev)) 4746 mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE | 4747 INTEL_RC6pp_ENABLE; 4748 else 4749 mask = INTEL_RC6_ENABLE; 4750 4751 if ((enable_rc6 & mask) != enable_rc6) 4752 DRM_DEBUG_KMS("Adjusting RC6 mask to %d (requested %d, valid %d)\n", 4753 enable_rc6 & mask, enable_rc6, mask); 4754 4755 return enable_rc6 & mask; 4756 } 4757 4758 if (IS_IVYBRIDGE(dev)) 4759 return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE); 4760 4761 return INTEL_RC6_ENABLE; 4762 } 4763 4764 int intel_enable_rc6(const struct drm_device *dev) 4765 { 4766 return i915.enable_rc6; 4767 } 4768 4769 static void gen6_init_rps_frequencies(struct drm_device *dev) 4770 { 4771 struct drm_i915_private *dev_priv = dev->dev_private; 4772 uint32_t rp_state_cap; 4773 u32 ddcc_status = 0; 4774 int ret; 4775 4776 /* All of these values are in units of 50MHz */ 4777 dev_priv->rps.cur_freq = 0; 4778 /* static values from HW: RP0 > RP1 > RPn (min_freq) */ 4779 if (IS_BROXTON(dev)) { 4780 rp_state_cap = I915_READ(BXT_RP_STATE_CAP); 4781 dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff; 4782 dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; 4783 dev_priv->rps.min_freq = (rp_state_cap >> 0) & 0xff; 4784 } else { 4785 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); 4786 dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff; 4787 dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; 4788 dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff; 4789 } 4790 4791 /* hw_max = RP0 until we check for overclocking */ 4792 dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; 4793 4794 dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; 4795 if (IS_HASWELL(dev) || IS_BROADWELL(dev) || 4796 IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) { 4797 ret = sandybridge_pcode_read(dev_priv, 4798 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, 4799 &ddcc_status); 4800 if (0 == ret) 4801 dev_priv->rps.efficient_freq = 4802 clamp_t(u8, 4803 ((ddcc_status >> 8) & 0xff), 4804 dev_priv->rps.min_freq, 4805 dev_priv->rps.max_freq); 4806 } 4807 4808 if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) { 4809 /* Store the frequency values in 16.66 MHZ units, which is 4810 the natural hardware unit for SKL */ 4811 dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER; 4812 dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER; 4813 dev_priv->rps.min_freq *= GEN9_FREQ_SCALER; 4814 dev_priv->rps.max_freq *= GEN9_FREQ_SCALER; 4815 dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER; 4816 } 4817 4818 dev_priv->rps.idle_freq = dev_priv->rps.min_freq; 4819 4820 /* Preserve min/max settings in case of re-init */ 4821 if (dev_priv->rps.max_freq_softlimit == 0) 4822 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; 4823 4824 if (dev_priv->rps.min_freq_softlimit == 0) { 4825 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) 4826 dev_priv->rps.min_freq_softlimit = 4827 max_t(int, dev_priv->rps.efficient_freq, 4828 intel_freq_opcode(dev_priv, 450)); 4829 else 4830 dev_priv->rps.min_freq_softlimit = 4831 dev_priv->rps.min_freq; 4832 } 4833 } 4834 4835 /* See the Gen9_GT_PM_Programming_Guide doc for the below */ 4836 static void gen9_enable_rps(struct drm_device *dev) 4837 { 4838 struct drm_i915_private *dev_priv = dev->dev_private; 4839 4840 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4841 4842 gen6_init_rps_frequencies(dev); 4843 4844 /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */ 4845 if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) { 4846 /* 4847 * BIOS could leave the Hw Turbo enabled, so need to explicitly 4848 * clear out the Control register just to avoid inconsitency 4849 * with debugfs interface, which will show Turbo as enabled 4850 * only and that is not expected by the User after adding the 4851 * WaGsvDisableTurbo. Apart from this there is no problem even 4852 * if the Turbo is left enabled in the Control register, as the 4853 * Up/Down interrupts would remain masked. 4854 */ 4855 gen9_disable_rps(dev); 4856 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4857 return; 4858 } 4859 4860 /* Program defaults and thresholds for RPS*/ 4861 I915_WRITE(GEN6_RC_VIDEO_FREQ, 4862 GEN9_FREQUENCY(dev_priv->rps.rp1_freq)); 4863 4864 /* 1 second timeout*/ 4865 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 4866 GT_INTERVAL_FROM_US(dev_priv, 1000000)); 4867 4868 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa); 4869 4870 /* Leaning on the below call to gen6_set_rps to program/setup the 4871 * Up/Down EI & threshold registers, as well as the RP_CONTROL, 4872 * RP_INTERRUPT_LIMITS & RPNSWREQ registers */ 4873 dev_priv->rps.power = HIGH_POWER; /* force a reset */ 4874 gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq); 4875 4876 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4877 } 4878 4879 static void gen9_enable_rc6(struct drm_device *dev) 4880 { 4881 struct drm_i915_private *dev_priv = dev->dev_private; 4882 struct intel_engine_cs *engine; 4883 uint32_t rc6_mask = 0; 4884 4885 /* 1a: Software RC state - RC0 */ 4886 I915_WRITE(GEN6_RC_STATE, 0); 4887 4888 /* 1b: Get forcewake during program sequence. Although the driver 4889 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ 4890 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4891 4892 /* 2a: Disable RC states. */ 4893 I915_WRITE(GEN6_RC_CONTROL, 0); 4894 4895 /* 2b: Program RC6 thresholds.*/ 4896 4897 /* WaRsDoubleRc6WrlWithCoarsePowerGating: Doubling WRL only when CPG is enabled */ 4898 if (IS_SKYLAKE(dev)) 4899 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16); 4900 else 4901 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16); 4902 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 4903 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 4904 for_each_engine(engine, dev_priv) 4905 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 4906 4907 if (HAS_GUC_UCODE(dev)) 4908 I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA); 4909 4910 I915_WRITE(GEN6_RC_SLEEP, 0); 4911 4912 /* 2c: Program Coarse Power Gating Policies. */ 4913 I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 25); 4914 I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25); 4915 4916 /* 3a: Enable RC6 */ 4917 if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) 4918 rc6_mask = GEN6_RC_CTL_RC6_ENABLE; 4919 DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE)); 4920 /* WaRsUseTimeoutMode */ 4921 if (IS_SKL_REVID(dev, 0, SKL_REVID_D0) || 4922 IS_BXT_REVID(dev, 0, BXT_REVID_A1)) { 4923 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us */ 4924 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | 4925 GEN7_RC_CTL_TO_MODE | 4926 rc6_mask); 4927 } else { 4928 I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ 4929 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | 4930 GEN6_RC_CTL_EI_MODE(1) | 4931 rc6_mask); 4932 } 4933 4934 /* 4935 * 3b: Enable Coarse Power Gating only when RC6 is enabled. 4936 * WaRsDisableCoarsePowerGating:skl,bxt - Render/Media PG need to be disabled with RC6. 4937 */ 4938 if (NEEDS_WaRsDisableCoarsePowerGating(dev)) 4939 I915_WRITE(GEN9_PG_ENABLE, 0); 4940 else 4941 I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? 4942 (GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE) : 0); 4943 4944 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4945 4946 } 4947 4948 static void gen8_enable_rps(struct drm_device *dev) 4949 { 4950 struct drm_i915_private *dev_priv = dev->dev_private; 4951 struct intel_engine_cs *engine; 4952 uint32_t rc6_mask = 0; 4953 4954 /* 1a: Software RC state - RC0 */ 4955 I915_WRITE(GEN6_RC_STATE, 0); 4956 4957 /* 1c & 1d: Get forcewake during program sequence. Although the driver 4958 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ 4959 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4960 4961 /* 2a: Disable RC states. */ 4962 I915_WRITE(GEN6_RC_CONTROL, 0); 4963 4964 /* Initialize rps frequencies */ 4965 gen6_init_rps_frequencies(dev); 4966 4967 /* 2b: Program RC6 thresholds.*/ 4968 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); 4969 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 4970 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 4971 for_each_engine(engine, dev_priv) 4972 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 4973 I915_WRITE(GEN6_RC_SLEEP, 0); 4974 if (IS_BROADWELL(dev)) 4975 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ 4976 else 4977 I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ 4978 4979 /* 3: Enable RC6 */ 4980 if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) 4981 rc6_mask = GEN6_RC_CTL_RC6_ENABLE; 4982 intel_print_rc6_info(dev, rc6_mask); 4983 if (IS_BROADWELL(dev)) 4984 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | 4985 GEN7_RC_CTL_TO_MODE | 4986 rc6_mask); 4987 else 4988 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | 4989 GEN6_RC_CTL_EI_MODE(1) | 4990 rc6_mask); 4991 4992 /* 4 Program defaults and thresholds for RPS*/ 4993 I915_WRITE(GEN6_RPNSWREQ, 4994 HSW_FREQUENCY(dev_priv->rps.rp1_freq)); 4995 I915_WRITE(GEN6_RC_VIDEO_FREQ, 4996 HSW_FREQUENCY(dev_priv->rps.rp1_freq)); 4997 /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ 4998 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */ 4999 5000 /* Docs recommend 900MHz, and 300 MHz respectively */ 5001 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, 5002 dev_priv->rps.max_freq_softlimit << 24 | 5003 dev_priv->rps.min_freq_softlimit << 16); 5004 5005 I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */ 5006 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/ 5007 I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */ 5008 I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */ 5009 5010 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 5011 5012 /* 5: Enable RPS */ 5013 I915_WRITE(GEN6_RP_CONTROL, 5014 GEN6_RP_MEDIA_TURBO | 5015 GEN6_RP_MEDIA_HW_NORMAL_MODE | 5016 GEN6_RP_MEDIA_IS_GFX | 5017 GEN6_RP_ENABLE | 5018 GEN6_RP_UP_BUSY_AVG | 5019 GEN6_RP_DOWN_IDLE_AVG); 5020 5021 /* 6: Ring frequency + overclocking (our driver does this later */ 5022 5023 dev_priv->rps.power = HIGH_POWER; /* force a reset */ 5024 gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq); 5025 5026 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5027 } 5028 5029 static void gen6_enable_rps(struct drm_device *dev) 5030 { 5031 struct drm_i915_private *dev_priv = dev->dev_private; 5032 struct intel_engine_cs *engine; 5033 u32 rc6vids, pcu_mbox = 0, rc6_mask = 0; 5034 u32 gtfifodbg; 5035 int rc6_mode; 5036 int ret; 5037 5038 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 5039 5040 /* Here begins a magic sequence of register writes to enable 5041 * auto-downclocking. 5042 * 5043 * Perhaps there might be some value in exposing these to 5044 * userspace... 5045 */ 5046 I915_WRITE(GEN6_RC_STATE, 0); 5047 5048 /* Clear the DBG now so we don't confuse earlier errors */ 5049 gtfifodbg = I915_READ(GTFIFODBG); 5050 if (gtfifodbg) { 5051 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg); 5052 I915_WRITE(GTFIFODBG, gtfifodbg); 5053 } 5054 5055 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5056 5057 /* Initialize rps frequencies */ 5058 gen6_init_rps_frequencies(dev); 5059 5060 /* disable the counters and set deterministic thresholds */ 5061 I915_WRITE(GEN6_RC_CONTROL, 0); 5062 5063 I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16); 5064 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30); 5065 I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30); 5066 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); 5067 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); 5068 5069 for_each_engine(engine, dev_priv) 5070 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 5071 5072 I915_WRITE(GEN6_RC_SLEEP, 0); 5073 I915_WRITE(GEN6_RC1e_THRESHOLD, 1000); 5074 if (IS_IVYBRIDGE(dev)) 5075 I915_WRITE(GEN6_RC6_THRESHOLD, 125000); 5076 else 5077 I915_WRITE(GEN6_RC6_THRESHOLD, 50000); 5078 I915_WRITE(GEN6_RC6p_THRESHOLD, 150000); 5079 I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ 5080 5081 /* Check if we are enabling RC6 */ 5082 rc6_mode = intel_enable_rc6(dev_priv->dev); 5083 if (rc6_mode & INTEL_RC6_ENABLE) 5084 rc6_mask |= GEN6_RC_CTL_RC6_ENABLE; 5085 5086 /* We don't use those on Haswell */ 5087 if (!IS_HASWELL(dev)) { 5088 if (rc6_mode & INTEL_RC6p_ENABLE) 5089 rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; 5090 5091 if (rc6_mode & INTEL_RC6pp_ENABLE) 5092 rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; 5093 } 5094 5095 intel_print_rc6_info(dev, rc6_mask); 5096 5097 I915_WRITE(GEN6_RC_CONTROL, 5098 rc6_mask | 5099 GEN6_RC_CTL_EI_MODE(1) | 5100 GEN6_RC_CTL_HW_ENABLE); 5101 5102 /* Power down if completely idle for over 50ms */ 5103 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000); 5104 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 5105 5106 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0); 5107 if (ret) 5108 DRM_DEBUG_DRIVER("Failed to set the min frequency\n"); 5109 5110 ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox); 5111 if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */ 5112 DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n", 5113 (dev_priv->rps.max_freq_softlimit & 0xff) * 50, 5114 (pcu_mbox & 0xff) * 50); 5115 dev_priv->rps.max_freq = pcu_mbox & 0xff; 5116 } 5117 5118 dev_priv->rps.power = HIGH_POWER; /* force a reset */ 5119 gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq); 5120 5121 rc6vids = 0; 5122 ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); 5123 if (IS_GEN6(dev) && ret) { 5124 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n"); 5125 } else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { 5126 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", 5127 GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); 5128 rc6vids &= 0xffff00; 5129 rc6vids |= GEN6_ENCODE_RC6_VID(450); 5130 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); 5131 if (ret) 5132 DRM_ERROR("Couldn't fix incorrect rc6 voltage\n"); 5133 } 5134 5135 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5136 } 5137 5138 static void __gen6_update_ring_freq(struct drm_device *dev) 5139 { 5140 struct drm_i915_private *dev_priv = dev->dev_private; 5141 int min_freq = 15; 5142 unsigned int gpu_freq; 5143 unsigned int max_ia_freq, min_ring_freq; 5144 unsigned int max_gpu_freq, min_gpu_freq; 5145 int scaling_factor = 180; 5146 5147 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 5148 5149 #if 0 5150 policy = cpufreq_cpu_get(0); 5151 if (policy) { 5152 max_ia_freq = policy->cpuinfo.max_freq; 5153 cpufreq_cpu_put(policy); 5154 } else { 5155 /* 5156 * Default to measured freq if none found, PCU will ensure we 5157 * don't go over 5158 */ 5159 max_ia_freq = tsc_khz; 5160 } 5161 #else 5162 max_ia_freq = tsc_frequency / 1000; 5163 #endif 5164 5165 /* Convert from kHz to MHz */ 5166 max_ia_freq /= 1000; 5167 5168 min_ring_freq = I915_READ(DCLK) & 0xf; 5169 /* convert DDR frequency from units of 266.6MHz to bandwidth */ 5170 min_ring_freq = mult_frac(min_ring_freq, 8, 3); 5171 5172 if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) { 5173 /* Convert GT frequency to 50 HZ units */ 5174 min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER; 5175 max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER; 5176 } else { 5177 min_gpu_freq = dev_priv->rps.min_freq; 5178 max_gpu_freq = dev_priv->rps.max_freq; 5179 } 5180 5181 /* 5182 * For each potential GPU frequency, load a ring frequency we'd like 5183 * to use for memory access. We do this by specifying the IA frequency 5184 * the PCU should use as a reference to determine the ring frequency. 5185 */ 5186 for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) { 5187 int diff = max_gpu_freq - gpu_freq; 5188 unsigned int ia_freq = 0, ring_freq = 0; 5189 5190 if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) { 5191 /* 5192 * ring_freq = 2 * GT. ring_freq is in 100MHz units 5193 * No floor required for ring frequency on SKL. 5194 */ 5195 ring_freq = gpu_freq; 5196 } else if (INTEL_INFO(dev)->gen >= 8) { 5197 /* max(2 * GT, DDR). NB: GT is 50MHz units */ 5198 ring_freq = max(min_ring_freq, gpu_freq); 5199 } else if (IS_HASWELL(dev)) { 5200 ring_freq = mult_frac(gpu_freq, 5, 4); 5201 ring_freq = max(min_ring_freq, ring_freq); 5202 /* leave ia_freq as the default, chosen by cpufreq */ 5203 } else { 5204 /* On older processors, there is no separate ring 5205 * clock domain, so in order to boost the bandwidth 5206 * of the ring, we need to upclock the CPU (ia_freq). 5207 * 5208 * For GPU frequencies less than 750MHz, 5209 * just use the lowest ring freq. 5210 */ 5211 if (gpu_freq < min_freq) 5212 ia_freq = 800; 5213 else 5214 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2); 5215 ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100); 5216 } 5217 5218 sandybridge_pcode_write(dev_priv, 5219 GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 5220 ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT | 5221 ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT | 5222 gpu_freq); 5223 } 5224 } 5225 5226 void gen6_update_ring_freq(struct drm_device *dev) 5227 { 5228 struct drm_i915_private *dev_priv = dev->dev_private; 5229 5230 if (!HAS_CORE_RING_FREQ(dev)) 5231 return; 5232 5233 mutex_lock(&dev_priv->rps.hw_lock); 5234 __gen6_update_ring_freq(dev); 5235 mutex_unlock(&dev_priv->rps.hw_lock); 5236 } 5237 5238 static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv) 5239 { 5240 struct drm_device *dev = dev_priv->dev; 5241 u32 val, rp0; 5242 5243 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE); 5244 5245 switch (INTEL_INFO(dev)->eu_total) { 5246 case 8: 5247 /* (2 * 4) config */ 5248 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT); 5249 break; 5250 case 12: 5251 /* (2 * 6) config */ 5252 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT); 5253 break; 5254 case 16: 5255 /* (2 * 8) config */ 5256 default: 5257 /* Setting (2 * 8) Min RP0 for any other combination */ 5258 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT); 5259 break; 5260 } 5261 5262 rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK); 5263 5264 return rp0; 5265 } 5266 5267 static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv) 5268 { 5269 u32 val, rpe; 5270 5271 val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG); 5272 rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; 5273 5274 return rpe; 5275 } 5276 5277 static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv) 5278 { 5279 u32 val, rp1; 5280 5281 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE); 5282 rp1 = (val & FB_GFX_FREQ_FUSE_MASK); 5283 5284 return rp1; 5285 } 5286 5287 static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv) 5288 { 5289 u32 val, rp1; 5290 5291 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); 5292 5293 rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; 5294 5295 return rp1; 5296 } 5297 5298 static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv) 5299 { 5300 u32 val, rp0; 5301 5302 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); 5303 5304 rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; 5305 /* Clamp to max */ 5306 rp0 = min_t(u32, rp0, 0xea); 5307 5308 return rp0; 5309 } 5310 5311 static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv) 5312 { 5313 u32 val, rpe; 5314 5315 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO); 5316 rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; 5317 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI); 5318 rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; 5319 5320 return rpe; 5321 } 5322 5323 static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv) 5324 { 5325 u32 val; 5326 5327 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff; 5328 /* 5329 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value 5330 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on 5331 * a BYT-M B0 the above register contains 0xbf. Moreover when setting 5332 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0 5333 * to make sure it matches what Punit accepts. 5334 */ 5335 return max_t(u32, val, 0xc0); 5336 } 5337 5338 /* Check that the pctx buffer wasn't move under us. */ 5339 static void valleyview_check_pctx(struct drm_i915_private *dev_priv) 5340 { 5341 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; 5342 5343 /* DragonFly - if EDID fails vlv_pctx can wind up NULL */ 5344 if (WARN_ON(!dev_priv->vlv_pctx)) 5345 return; 5346 5347 WARN_ON(pctx_addr != dev_priv->mm.stolen_base + 5348 dev_priv->vlv_pctx->stolen->start); 5349 } 5350 5351 5352 /* Check that the pcbr address is not empty. */ 5353 static void cherryview_check_pctx(struct drm_i915_private *dev_priv) 5354 { 5355 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; 5356 5357 WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0); 5358 } 5359 5360 static void cherryview_setup_pctx(struct drm_device *dev) 5361 { 5362 struct drm_i915_private *dev_priv = to_i915(dev); 5363 struct i915_ggtt *ggtt = &dev_priv->ggtt; 5364 unsigned long pctx_paddr, paddr; 5365 u32 pcbr; 5366 int pctx_size = 32*1024; 5367 5368 pcbr = I915_READ(VLV_PCBR); 5369 if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { 5370 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); 5371 paddr = (dev_priv->mm.stolen_base + 5372 (ggtt->stolen_size - pctx_size)); 5373 5374 pctx_paddr = (paddr & (~4095)); 5375 I915_WRITE(VLV_PCBR, pctx_paddr); 5376 } 5377 5378 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); 5379 } 5380 5381 static void valleyview_setup_pctx(struct drm_device *dev) 5382 { 5383 struct drm_i915_private *dev_priv = dev->dev_private; 5384 struct drm_i915_gem_object *pctx; 5385 unsigned long pctx_paddr; 5386 u32 pcbr; 5387 int pctx_size = 24*1024; 5388 5389 mutex_lock(&dev->struct_mutex); 5390 5391 pcbr = I915_READ(VLV_PCBR); 5392 if (pcbr) { 5393 /* BIOS set it up already, grab the pre-alloc'd space */ 5394 int pcbr_offset; 5395 5396 pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base; 5397 pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv->dev, 5398 pcbr_offset, 5399 I915_GTT_OFFSET_NONE, 5400 pctx_size); 5401 goto out; 5402 } 5403 5404 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); 5405 5406 /* 5407 * From the Gunit register HAS: 5408 * The Gfx driver is expected to program this register and ensure 5409 * proper allocation within Gfx stolen memory. For example, this 5410 * register should be programmed such than the PCBR range does not 5411 * overlap with other ranges, such as the frame buffer, protected 5412 * memory, or any other relevant ranges. 5413 */ 5414 pctx = i915_gem_object_create_stolen(dev, pctx_size); 5415 if (!pctx) { 5416 DRM_DEBUG("not enough stolen space for PCTX, disabling\n"); 5417 goto out; 5418 } 5419 5420 pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start; 5421 I915_WRITE(VLV_PCBR, pctx_paddr); 5422 5423 out: 5424 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); 5425 dev_priv->vlv_pctx = pctx; 5426 mutex_unlock(&dev->struct_mutex); 5427 } 5428 5429 static void valleyview_cleanup_pctx(struct drm_device *dev) 5430 { 5431 struct drm_i915_private *dev_priv = dev->dev_private; 5432 5433 if (WARN_ON(!dev_priv->vlv_pctx)) 5434 return; 5435 5436 drm_gem_object_unreference_unlocked(&dev_priv->vlv_pctx->base); 5437 dev_priv->vlv_pctx = NULL; 5438 } 5439 5440 static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv) 5441 { 5442 dev_priv->rps.gpll_ref_freq = 5443 vlv_get_cck_clock(dev_priv, "GPLL ref", 5444 CCK_GPLL_CLOCK_CONTROL, 5445 dev_priv->czclk_freq); 5446 5447 DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", 5448 dev_priv->rps.gpll_ref_freq); 5449 } 5450 5451 static void valleyview_init_gt_powersave(struct drm_device *dev) 5452 { 5453 struct drm_i915_private *dev_priv = dev->dev_private; 5454 u32 val; 5455 5456 valleyview_setup_pctx(dev); 5457 5458 vlv_init_gpll_ref_freq(dev_priv); 5459 5460 mutex_lock(&dev_priv->rps.hw_lock); 5461 5462 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); 5463 switch ((val >> 6) & 3) { 5464 case 0: 5465 case 1: 5466 dev_priv->mem_freq = 800; 5467 break; 5468 case 2: 5469 dev_priv->mem_freq = 1066; 5470 break; 5471 case 3: 5472 dev_priv->mem_freq = 1333; 5473 break; 5474 } 5475 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); 5476 5477 dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv); 5478 dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; 5479 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", 5480 intel_gpu_freq(dev_priv, dev_priv->rps.max_freq), 5481 dev_priv->rps.max_freq); 5482 5483 dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv); 5484 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", 5485 intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), 5486 dev_priv->rps.efficient_freq); 5487 5488 dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv); 5489 DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", 5490 intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), 5491 dev_priv->rps.rp1_freq); 5492 5493 dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv); 5494 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", 5495 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), 5496 dev_priv->rps.min_freq); 5497 5498 dev_priv->rps.idle_freq = dev_priv->rps.min_freq; 5499 5500 /* Preserve min/max settings in case of re-init */ 5501 if (dev_priv->rps.max_freq_softlimit == 0) 5502 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; 5503 5504 if (dev_priv->rps.min_freq_softlimit == 0) 5505 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; 5506 5507 mutex_unlock(&dev_priv->rps.hw_lock); 5508 } 5509 5510 static void cherryview_init_gt_powersave(struct drm_device *dev) 5511 { 5512 struct drm_i915_private *dev_priv = dev->dev_private; 5513 u32 val; 5514 5515 cherryview_setup_pctx(dev); 5516 5517 vlv_init_gpll_ref_freq(dev_priv); 5518 5519 mutex_lock(&dev_priv->rps.hw_lock); 5520 5521 mutex_lock(&dev_priv->sb_lock); 5522 val = vlv_cck_read(dev_priv, CCK_FUSE_REG); 5523 mutex_unlock(&dev_priv->sb_lock); 5524 5525 switch ((val >> 2) & 0x7) { 5526 case 3: 5527 dev_priv->mem_freq = 2000; 5528 break; 5529 default: 5530 dev_priv->mem_freq = 1600; 5531 break; 5532 } 5533 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); 5534 5535 dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv); 5536 dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; 5537 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", 5538 intel_gpu_freq(dev_priv, dev_priv->rps.max_freq), 5539 dev_priv->rps.max_freq); 5540 5541 dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv); 5542 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", 5543 intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), 5544 dev_priv->rps.efficient_freq); 5545 5546 dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv); 5547 DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", 5548 intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), 5549 dev_priv->rps.rp1_freq); 5550 5551 /* PUnit validated range is only [RPe, RP0] */ 5552 dev_priv->rps.min_freq = dev_priv->rps.efficient_freq; 5553 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", 5554 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), 5555 dev_priv->rps.min_freq); 5556 5557 WARN_ONCE((dev_priv->rps.max_freq | 5558 dev_priv->rps.efficient_freq | 5559 dev_priv->rps.rp1_freq | 5560 dev_priv->rps.min_freq) & 1, 5561 "Odd GPU freq values\n"); 5562 5563 dev_priv->rps.idle_freq = dev_priv->rps.min_freq; 5564 5565 /* Preserve min/max settings in case of re-init */ 5566 if (dev_priv->rps.max_freq_softlimit == 0) 5567 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; 5568 5569 if (dev_priv->rps.min_freq_softlimit == 0) 5570 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; 5571 5572 mutex_unlock(&dev_priv->rps.hw_lock); 5573 } 5574 5575 static void valleyview_cleanup_gt_powersave(struct drm_device *dev) 5576 { 5577 valleyview_cleanup_pctx(dev); 5578 } 5579 5580 static void cherryview_enable_rps(struct drm_device *dev) 5581 { 5582 struct drm_i915_private *dev_priv = dev->dev_private; 5583 struct intel_engine_cs *engine; 5584 u32 gtfifodbg, val, rc6_mode = 0, pcbr; 5585 5586 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 5587 5588 gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV | 5589 GT_FIFO_FREE_ENTRIES_CHV); 5590 if (gtfifodbg) { 5591 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", 5592 gtfifodbg); 5593 I915_WRITE(GTFIFODBG, gtfifodbg); 5594 } 5595 5596 cherryview_check_pctx(dev_priv); 5597 5598 /* 1a & 1b: Get forcewake during program sequence. Although the driver 5599 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ 5600 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5601 5602 /* Disable RC states. */ 5603 I915_WRITE(GEN6_RC_CONTROL, 0); 5604 5605 /* 2a: Program RC6 thresholds.*/ 5606 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); 5607 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 5608 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 5609 5610 for_each_engine(engine, dev_priv) 5611 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 5612 I915_WRITE(GEN6_RC_SLEEP, 0); 5613 5614 /* TO threshold set to 500 us ( 0x186 * 1.28 us) */ 5615 I915_WRITE(GEN6_RC6_THRESHOLD, 0x186); 5616 5617 /* allows RC6 residency counter to work */ 5618 I915_WRITE(VLV_COUNTER_CONTROL, 5619 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | 5620 VLV_MEDIA_RC6_COUNT_EN | 5621 VLV_RENDER_RC6_COUNT_EN)); 5622 5623 /* For now we assume BIOS is allocating and populating the PCBR */ 5624 pcbr = I915_READ(VLV_PCBR); 5625 5626 /* 3: Enable RC6 */ 5627 if ((intel_enable_rc6(dev) & INTEL_RC6_ENABLE) && 5628 (pcbr >> VLV_PCBR_ADDR_SHIFT)) 5629 rc6_mode = GEN7_RC_CTL_TO_MODE; 5630 5631 I915_WRITE(GEN6_RC_CONTROL, rc6_mode); 5632 5633 /* 4 Program defaults and thresholds for RPS*/ 5634 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); 5635 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); 5636 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); 5637 I915_WRITE(GEN6_RP_UP_EI, 66000); 5638 I915_WRITE(GEN6_RP_DOWN_EI, 350000); 5639 5640 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 5641 5642 /* 5: Enable RPS */ 5643 I915_WRITE(GEN6_RP_CONTROL, 5644 GEN6_RP_MEDIA_HW_NORMAL_MODE | 5645 GEN6_RP_MEDIA_IS_GFX | 5646 GEN6_RP_ENABLE | 5647 GEN6_RP_UP_BUSY_AVG | 5648 GEN6_RP_DOWN_IDLE_AVG); 5649 5650 /* Setting Fixed Bias */ 5651 val = VLV_OVERRIDE_EN | 5652 VLV_SOC_TDP_EN | 5653 CHV_BIAS_CPU_50_SOC_50; 5654 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); 5655 5656 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); 5657 5658 /* RPS code assumes GPLL is used */ 5659 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); 5660 5661 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); 5662 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); 5663 5664 dev_priv->rps.cur_freq = (val >> 8) & 0xff; 5665 DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", 5666 intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq), 5667 dev_priv->rps.cur_freq); 5668 5669 DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", 5670 intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq), 5671 dev_priv->rps.idle_freq); 5672 5673 valleyview_set_rps(dev_priv->dev, dev_priv->rps.idle_freq); 5674 5675 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5676 } 5677 5678 static void valleyview_enable_rps(struct drm_device *dev) 5679 { 5680 struct drm_i915_private *dev_priv = dev->dev_private; 5681 struct intel_engine_cs *engine; 5682 u32 gtfifodbg, val, rc6_mode = 0; 5683 5684 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 5685 5686 valleyview_check_pctx(dev_priv); 5687 5688 gtfifodbg = I915_READ(GTFIFODBG); 5689 if (gtfifodbg) { 5690 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", 5691 gtfifodbg); 5692 I915_WRITE(GTFIFODBG, gtfifodbg); 5693 } 5694 5695 /* If VLV, Forcewake all wells, else re-direct to regular path */ 5696 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5697 5698 /* Disable RC states. */ 5699 I915_WRITE(GEN6_RC_CONTROL, 0); 5700 5701 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); 5702 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); 5703 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); 5704 I915_WRITE(GEN6_RP_UP_EI, 66000); 5705 I915_WRITE(GEN6_RP_DOWN_EI, 350000); 5706 5707 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 5708 5709 I915_WRITE(GEN6_RP_CONTROL, 5710 GEN6_RP_MEDIA_TURBO | 5711 GEN6_RP_MEDIA_HW_NORMAL_MODE | 5712 GEN6_RP_MEDIA_IS_GFX | 5713 GEN6_RP_ENABLE | 5714 GEN6_RP_UP_BUSY_AVG | 5715 GEN6_RP_DOWN_IDLE_CONT); 5716 5717 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); 5718 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); 5719 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); 5720 5721 for_each_engine(engine, dev_priv) 5722 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 5723 5724 I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); 5725 5726 /* allows RC6 residency counter to work */ 5727 I915_WRITE(VLV_COUNTER_CONTROL, 5728 _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN | 5729 VLV_RENDER_RC0_COUNT_EN | 5730 VLV_MEDIA_RC6_COUNT_EN | 5731 VLV_RENDER_RC6_COUNT_EN)); 5732 5733 if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) 5734 rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; 5735 5736 intel_print_rc6_info(dev, rc6_mode); 5737 5738 I915_WRITE(GEN6_RC_CONTROL, rc6_mode); 5739 5740 /* Setting Fixed Bias */ 5741 val = VLV_OVERRIDE_EN | 5742 VLV_SOC_TDP_EN | 5743 VLV_BIAS_CPU_125_SOC_875; 5744 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); 5745 5746 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); 5747 5748 /* RPS code assumes GPLL is used */ 5749 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); 5750 5751 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); 5752 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); 5753 5754 dev_priv->rps.cur_freq = (val >> 8) & 0xff; 5755 DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", 5756 intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq), 5757 dev_priv->rps.cur_freq); 5758 5759 DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", 5760 intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq), 5761 dev_priv->rps.idle_freq); 5762 5763 valleyview_set_rps(dev_priv->dev, dev_priv->rps.idle_freq); 5764 5765 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5766 } 5767 5768 static unsigned long intel_pxfreq(u32 vidfreq) 5769 { 5770 unsigned long freq; 5771 int div = (vidfreq & 0x3f0000) >> 16; 5772 int post = (vidfreq & 0x3000) >> 12; 5773 int pre = (vidfreq & 0x7); 5774 5775 if (!pre) 5776 return 0; 5777 5778 freq = ((div * 133333) / ((1<<post) * pre)); 5779 5780 return freq; 5781 } 5782 5783 static const struct cparams { 5784 u16 i; 5785 u16 t; 5786 u16 m; 5787 u16 c; 5788 } cparams[] = { 5789 { 1, 1333, 301, 28664 }, 5790 { 1, 1066, 294, 24460 }, 5791 { 1, 800, 294, 25192 }, 5792 { 0, 1333, 276, 27605 }, 5793 { 0, 1066, 276, 27605 }, 5794 { 0, 800, 231, 23784 }, 5795 }; 5796 5797 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv) 5798 { 5799 u64 total_count, diff, ret; 5800 u32 count1, count2, count3, m = 0, c = 0; 5801 unsigned long now = jiffies_to_msecs(jiffies), diff1; 5802 int i; 5803 5804 assert_spin_locked(&mchdev_lock); 5805 5806 diff1 = now - dev_priv->ips.last_time1; 5807 5808 /* Prevent division-by-zero if we are asking too fast. 5809 * Also, we don't get interesting results if we are polling 5810 * faster than once in 10ms, so just return the saved value 5811 * in such cases. 5812 */ 5813 if (diff1 <= 10) 5814 return dev_priv->ips.chipset_power; 5815 5816 count1 = I915_READ(DMIEC); 5817 count2 = I915_READ(DDREC); 5818 count3 = I915_READ(CSIEC); 5819 5820 total_count = count1 + count2 + count3; 5821 5822 /* FIXME: handle per-counter overflow */ 5823 if (total_count < dev_priv->ips.last_count1) { 5824 diff = ~0UL - dev_priv->ips.last_count1; 5825 diff += total_count; 5826 } else { 5827 diff = total_count - dev_priv->ips.last_count1; 5828 } 5829 5830 for (i = 0; i < ARRAY_SIZE(cparams); i++) { 5831 if (cparams[i].i == dev_priv->ips.c_m && 5832 cparams[i].t == dev_priv->ips.r_t) { 5833 m = cparams[i].m; 5834 c = cparams[i].c; 5835 break; 5836 } 5837 } 5838 5839 diff = div_u64(diff, diff1); 5840 ret = ((m * diff) + c); 5841 ret = div_u64(ret, 10); 5842 5843 dev_priv->ips.last_count1 = total_count; 5844 dev_priv->ips.last_time1 = now; 5845 5846 dev_priv->ips.chipset_power = ret; 5847 5848 return ret; 5849 } 5850 5851 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv) 5852 { 5853 struct drm_device *dev = dev_priv->dev; 5854 unsigned long val; 5855 5856 if (INTEL_INFO(dev)->gen != 5) 5857 return 0; 5858 5859 spin_lock_irq(&mchdev_lock); 5860 5861 val = __i915_chipset_val(dev_priv); 5862 5863 spin_unlock_irq(&mchdev_lock); 5864 5865 return val; 5866 } 5867 5868 unsigned long i915_mch_val(struct drm_i915_private *dev_priv) 5869 { 5870 unsigned long m, x, b; 5871 u32 tsfs; 5872 5873 tsfs = I915_READ(TSFS); 5874 5875 m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT); 5876 x = I915_READ8(TR1); 5877 5878 b = tsfs & TSFS_INTR_MASK; 5879 5880 return ((m * x) / 127) - b; 5881 } 5882 5883 static int _pxvid_to_vd(u8 pxvid) 5884 { 5885 if (pxvid == 0) 5886 return 0; 5887 5888 if (pxvid >= 8 && pxvid < 31) 5889 pxvid = 31; 5890 5891 return (pxvid + 2) * 125; 5892 } 5893 5894 static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid) 5895 { 5896 struct drm_device *dev = dev_priv->dev; 5897 const int vd = _pxvid_to_vd(pxvid); 5898 const int vm = vd - 1125; 5899 5900 if (INTEL_INFO(dev)->is_mobile) 5901 return vm > 0 ? vm : 0; 5902 5903 return vd; 5904 } 5905 5906 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) 5907 { 5908 u64 now, diff, diffms; 5909 u32 count; 5910 5911 assert_spin_locked(&mchdev_lock); 5912 5913 now = ktime_get_raw_ns(); 5914 diffms = now - dev_priv->ips.last_time2; 5915 do_div(diffms, NSEC_PER_MSEC); 5916 5917 /* Don't divide by 0 */ 5918 if (!diffms) 5919 return; 5920 5921 count = I915_READ(GFXEC); 5922 5923 if (count < dev_priv->ips.last_count2) { 5924 diff = ~0UL - dev_priv->ips.last_count2; 5925 diff += count; 5926 } else { 5927 diff = count - dev_priv->ips.last_count2; 5928 } 5929 5930 dev_priv->ips.last_count2 = count; 5931 dev_priv->ips.last_time2 = now; 5932 5933 /* More magic constants... */ 5934 diff = diff * 1181; 5935 diff = div_u64(diff, diffms * 10); 5936 dev_priv->ips.gfx_power = diff; 5937 } 5938 5939 void i915_update_gfx_val(struct drm_i915_private *dev_priv) 5940 { 5941 struct drm_device *dev = dev_priv->dev; 5942 5943 if (INTEL_INFO(dev)->gen != 5) 5944 return; 5945 5946 spin_lock_irq(&mchdev_lock); 5947 5948 __i915_update_gfx_val(dev_priv); 5949 5950 spin_unlock_irq(&mchdev_lock); 5951 } 5952 5953 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) 5954 { 5955 unsigned long t, corr, state1, corr2, state2; 5956 u32 pxvid, ext_v; 5957 5958 assert_spin_locked(&mchdev_lock); 5959 5960 pxvid = I915_READ(PXVFREQ(dev_priv->rps.cur_freq)); 5961 pxvid = (pxvid >> 24) & 0x7f; 5962 ext_v = pvid_to_extvid(dev_priv, pxvid); 5963 5964 state1 = ext_v; 5965 5966 t = i915_mch_val(dev_priv); 5967 5968 /* Revel in the empirically derived constants */ 5969 5970 /* Correction factor in 1/100000 units */ 5971 if (t > 80) 5972 corr = ((t * 2349) + 135940); 5973 else if (t >= 50) 5974 corr = ((t * 964) + 29317); 5975 else /* < 50 */ 5976 corr = ((t * 301) + 1004); 5977 5978 corr = corr * ((150142 * state1) / 10000 - 78642); 5979 corr /= 100000; 5980 corr2 = (corr * dev_priv->ips.corr); 5981 5982 state2 = (corr2 * state1) / 10000; 5983 state2 /= 100; /* convert to mW */ 5984 5985 __i915_update_gfx_val(dev_priv); 5986 5987 return dev_priv->ips.gfx_power + state2; 5988 } 5989 5990 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) 5991 { 5992 struct drm_device *dev = dev_priv->dev; 5993 unsigned long val; 5994 5995 if (INTEL_INFO(dev)->gen != 5) 5996 return 0; 5997 5998 spin_lock_irq(&mchdev_lock); 5999 6000 val = __i915_gfx_val(dev_priv); 6001 6002 spin_unlock_irq(&mchdev_lock); 6003 6004 return val; 6005 } 6006 6007 /** 6008 * i915_read_mch_val - return value for IPS use 6009 * 6010 * Calculate and return a value for the IPS driver to use when deciding whether 6011 * we have thermal and power headroom to increase CPU or GPU power budget. 6012 */ 6013 unsigned long i915_read_mch_val(void) 6014 { 6015 struct drm_i915_private *dev_priv; 6016 unsigned long chipset_val, graphics_val, ret = 0; 6017 6018 spin_lock_irq(&mchdev_lock); 6019 if (!i915_mch_dev) 6020 goto out_unlock; 6021 dev_priv = i915_mch_dev; 6022 6023 chipset_val = __i915_chipset_val(dev_priv); 6024 graphics_val = __i915_gfx_val(dev_priv); 6025 6026 ret = chipset_val + graphics_val; 6027 6028 out_unlock: 6029 spin_unlock_irq(&mchdev_lock); 6030 6031 return ret; 6032 } 6033 6034 /** 6035 * i915_gpu_raise - raise GPU frequency limit 6036 * 6037 * Raise the limit; IPS indicates we have thermal headroom. 6038 */ 6039 bool i915_gpu_raise(void) 6040 { 6041 struct drm_i915_private *dev_priv; 6042 bool ret = true; 6043 6044 spin_lock_irq(&mchdev_lock); 6045 if (!i915_mch_dev) { 6046 ret = false; 6047 goto out_unlock; 6048 } 6049 dev_priv = i915_mch_dev; 6050 6051 if (dev_priv->ips.max_delay > dev_priv->ips.fmax) 6052 dev_priv->ips.max_delay--; 6053 6054 out_unlock: 6055 spin_unlock_irq(&mchdev_lock); 6056 6057 return ret; 6058 } 6059 6060 /** 6061 * i915_gpu_lower - lower GPU frequency limit 6062 * 6063 * IPS indicates we're close to a thermal limit, so throttle back the GPU 6064 * frequency maximum. 6065 */ 6066 bool i915_gpu_lower(void) 6067 { 6068 struct drm_i915_private *dev_priv; 6069 bool ret = true; 6070 6071 spin_lock_irq(&mchdev_lock); 6072 if (!i915_mch_dev) { 6073 ret = false; 6074 goto out_unlock; 6075 } 6076 dev_priv = i915_mch_dev; 6077 6078 if (dev_priv->ips.max_delay < dev_priv->ips.min_delay) 6079 dev_priv->ips.max_delay++; 6080 6081 out_unlock: 6082 spin_unlock_irq(&mchdev_lock); 6083 6084 return ret; 6085 } 6086 6087 /** 6088 * i915_gpu_busy - indicate GPU business to IPS 6089 * 6090 * Tell the IPS driver whether or not the GPU is busy. 6091 */ 6092 bool i915_gpu_busy(void) 6093 { 6094 struct drm_i915_private *dev_priv; 6095 struct intel_engine_cs *engine; 6096 bool ret = false; 6097 6098 spin_lock_irq(&mchdev_lock); 6099 if (!i915_mch_dev) 6100 goto out_unlock; 6101 dev_priv = i915_mch_dev; 6102 6103 for_each_engine(engine, dev_priv) 6104 ret |= !list_empty(&engine->request_list); 6105 6106 out_unlock: 6107 spin_unlock_irq(&mchdev_lock); 6108 6109 return ret; 6110 } 6111 6112 /** 6113 * i915_gpu_turbo_disable - disable graphics turbo 6114 * 6115 * Disable graphics turbo by resetting the max frequency and setting the 6116 * current frequency to the default. 6117 */ 6118 bool i915_gpu_turbo_disable(void) 6119 { 6120 struct drm_i915_private *dev_priv; 6121 bool ret = true; 6122 6123 spin_lock_irq(&mchdev_lock); 6124 if (!i915_mch_dev) { 6125 ret = false; 6126 goto out_unlock; 6127 } 6128 dev_priv = i915_mch_dev; 6129 6130 dev_priv->ips.max_delay = dev_priv->ips.fstart; 6131 6132 if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart)) 6133 ret = false; 6134 6135 out_unlock: 6136 spin_unlock_irq(&mchdev_lock); 6137 6138 return ret; 6139 } 6140 6141 #if 0 6142 /** 6143 * Tells the intel_ips driver that the i915 driver is now loaded, if 6144 * IPS got loaded first. 6145 * 6146 * This awkward dance is so that neither module has to depend on the 6147 * other in order for IPS to do the appropriate communication of 6148 * GPU turbo limits to i915. 6149 */ 6150 static void 6151 ips_ping_for_i915_load(void) 6152 { 6153 void (*link)(void); 6154 6155 link = symbol_get(ips_link_to_i915_driver); 6156 if (link) { 6157 link(); 6158 symbol_put(ips_link_to_i915_driver); 6159 } 6160 } 6161 #endif 6162 6163 void intel_gpu_ips_init(struct drm_i915_private *dev_priv) 6164 { 6165 /* We only register the i915 ips part with intel-ips once everything is 6166 * set up, to avoid intel-ips sneaking in and reading bogus values. */ 6167 spin_lock_irq(&mchdev_lock); 6168 i915_mch_dev = dev_priv; 6169 spin_unlock_irq(&mchdev_lock); 6170 6171 } 6172 6173 void intel_gpu_ips_teardown(void) 6174 { 6175 spin_lock_irq(&mchdev_lock); 6176 i915_mch_dev = NULL; 6177 spin_unlock_irq(&mchdev_lock); 6178 } 6179 6180 static void intel_init_emon(struct drm_device *dev) 6181 { 6182 struct drm_i915_private *dev_priv = dev->dev_private; 6183 u32 lcfuse; 6184 u8 pxw[16]; 6185 int i; 6186 6187 /* Disable to program */ 6188 I915_WRITE(ECR, 0); 6189 POSTING_READ(ECR); 6190 6191 /* Program energy weights for various events */ 6192 I915_WRITE(SDEW, 0x15040d00); 6193 I915_WRITE(CSIEW0, 0x007f0000); 6194 I915_WRITE(CSIEW1, 0x1e220004); 6195 I915_WRITE(CSIEW2, 0x04000004); 6196 6197 for (i = 0; i < 5; i++) 6198 I915_WRITE(PEW(i), 0); 6199 for (i = 0; i < 3; i++) 6200 I915_WRITE(DEW(i), 0); 6201 6202 /* Program P-state weights to account for frequency power adjustment */ 6203 for (i = 0; i < 16; i++) { 6204 u32 pxvidfreq = I915_READ(PXVFREQ(i)); 6205 unsigned long freq = intel_pxfreq(pxvidfreq); 6206 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >> 6207 PXVFREQ_PX_SHIFT; 6208 unsigned long val; 6209 6210 val = vid * vid; 6211 val *= (freq / 1000); 6212 val *= 255; 6213 val /= (127*127*900); 6214 if (val > 0xff) 6215 DRM_ERROR("bad pxval: %ld\n", val); 6216 pxw[i] = val; 6217 } 6218 /* Render standby states get 0 weight */ 6219 pxw[14] = 0; 6220 pxw[15] = 0; 6221 6222 for (i = 0; i < 4; i++) { 6223 u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) | 6224 (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]); 6225 I915_WRITE(PXW(i), val); 6226 } 6227 6228 /* Adjust magic regs to magic values (more experimental results) */ 6229 I915_WRITE(OGW0, 0); 6230 I915_WRITE(OGW1, 0); 6231 I915_WRITE(EG0, 0x00007f00); 6232 I915_WRITE(EG1, 0x0000000e); 6233 I915_WRITE(EG2, 0x000e0000); 6234 I915_WRITE(EG3, 0x68000300); 6235 I915_WRITE(EG4, 0x42000000); 6236 I915_WRITE(EG5, 0x00140031); 6237 I915_WRITE(EG6, 0); 6238 I915_WRITE(EG7, 0); 6239 6240 for (i = 0; i < 8; i++) 6241 I915_WRITE(PXWL(i), 0); 6242 6243 /* Enable PMON + select events */ 6244 I915_WRITE(ECR, 0x80000019); 6245 6246 lcfuse = I915_READ(LCFUSE02); 6247 6248 dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK); 6249 } 6250 6251 void intel_init_gt_powersave(struct drm_device *dev) 6252 { 6253 struct drm_i915_private *dev_priv = dev->dev_private; 6254 6255 /* 6256 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a 6257 * requirement. 6258 */ 6259 if (!i915.enable_rc6) { 6260 DRM_INFO("RC6 disabled, disabling runtime PM support\n"); 6261 intel_runtime_pm_get(dev_priv); 6262 } 6263 6264 if (IS_CHERRYVIEW(dev)) 6265 cherryview_init_gt_powersave(dev); 6266 else if (IS_VALLEYVIEW(dev)) 6267 valleyview_init_gt_powersave(dev); 6268 } 6269 6270 void intel_cleanup_gt_powersave(struct drm_device *dev) 6271 { 6272 struct drm_i915_private *dev_priv = dev->dev_private; 6273 6274 if (IS_CHERRYVIEW(dev)) 6275 return; 6276 else if (IS_VALLEYVIEW(dev)) 6277 valleyview_cleanup_gt_powersave(dev); 6278 6279 if (!i915.enable_rc6) 6280 intel_runtime_pm_put(dev_priv); 6281 } 6282 6283 static void gen6_suspend_rps(struct drm_device *dev) 6284 { 6285 struct drm_i915_private *dev_priv = dev->dev_private; 6286 6287 flush_delayed_work(&dev_priv->rps.delayed_resume_work); 6288 6289 gen6_disable_rps_interrupts(dev); 6290 } 6291 6292 /** 6293 * intel_suspend_gt_powersave - suspend PM work and helper threads 6294 * @dev: drm device 6295 * 6296 * We don't want to disable RC6 or other features here, we just want 6297 * to make sure any work we've queued has finished and won't bother 6298 * us while we're suspended. 6299 */ 6300 void intel_suspend_gt_powersave(struct drm_device *dev) 6301 { 6302 struct drm_i915_private *dev_priv = dev->dev_private; 6303 6304 if (INTEL_INFO(dev)->gen < 6) 6305 return; 6306 6307 gen6_suspend_rps(dev); 6308 6309 /* Force GPU to min freq during suspend */ 6310 gen6_rps_idle(dev_priv); 6311 } 6312 6313 void intel_disable_gt_powersave(struct drm_device *dev) 6314 { 6315 struct drm_i915_private *dev_priv = dev->dev_private; 6316 6317 if (IS_IRONLAKE_M(dev)) { 6318 ironlake_disable_drps(dev); 6319 } else if (INTEL_INFO(dev)->gen >= 6) { 6320 intel_suspend_gt_powersave(dev); 6321 6322 mutex_lock(&dev_priv->rps.hw_lock); 6323 if (INTEL_INFO(dev)->gen >= 9) { 6324 gen9_disable_rc6(dev); 6325 gen9_disable_rps(dev); 6326 } else if (IS_CHERRYVIEW(dev)) 6327 cherryview_disable_rps(dev); 6328 else if (IS_VALLEYVIEW(dev)) 6329 valleyview_disable_rps(dev); 6330 else 6331 gen6_disable_rps(dev); 6332 6333 dev_priv->rps.enabled = false; 6334 mutex_unlock(&dev_priv->rps.hw_lock); 6335 } 6336 } 6337 6338 static void intel_gen6_powersave_work(struct work_struct *work) 6339 { 6340 struct drm_i915_private *dev_priv = 6341 container_of(work, struct drm_i915_private, 6342 rps.delayed_resume_work.work); 6343 struct drm_device *dev = dev_priv->dev; 6344 6345 mutex_lock(&dev_priv->rps.hw_lock); 6346 6347 gen6_reset_rps_interrupts(dev); 6348 6349 if (IS_CHERRYVIEW(dev)) { 6350 cherryview_enable_rps(dev); 6351 } else if (IS_VALLEYVIEW(dev)) { 6352 valleyview_enable_rps(dev); 6353 } else if (INTEL_INFO(dev)->gen >= 9) { 6354 gen9_enable_rc6(dev); 6355 gen9_enable_rps(dev); 6356 if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) 6357 __gen6_update_ring_freq(dev); 6358 } else if (IS_BROADWELL(dev)) { 6359 gen8_enable_rps(dev); 6360 __gen6_update_ring_freq(dev); 6361 } else { 6362 gen6_enable_rps(dev); 6363 __gen6_update_ring_freq(dev); 6364 } 6365 6366 WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq); 6367 WARN_ON(dev_priv->rps.idle_freq > dev_priv->rps.max_freq); 6368 6369 WARN_ON(dev_priv->rps.efficient_freq < dev_priv->rps.min_freq); 6370 WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq); 6371 6372 dev_priv->rps.enabled = true; 6373 6374 gen6_enable_rps_interrupts(dev); 6375 6376 mutex_unlock(&dev_priv->rps.hw_lock); 6377 6378 intel_runtime_pm_put(dev_priv); 6379 } 6380 6381 void intel_enable_gt_powersave(struct drm_device *dev) 6382 { 6383 struct drm_i915_private *dev_priv = dev->dev_private; 6384 6385 /* Powersaving is controlled by the host when inside a VM */ 6386 if (intel_vgpu_active(dev)) 6387 return; 6388 6389 if (IS_IRONLAKE_M(dev)) { 6390 ironlake_enable_drps(dev); 6391 mutex_lock(&dev->struct_mutex); 6392 intel_init_emon(dev); 6393 mutex_unlock(&dev->struct_mutex); 6394 } else if (INTEL_INFO(dev)->gen >= 6) { 6395 /* 6396 * PCU communication is slow and this doesn't need to be 6397 * done at any specific time, so do this out of our fast path 6398 * to make resume and init faster. 6399 * 6400 * We depend on the HW RC6 power context save/restore 6401 * mechanism when entering D3 through runtime PM suspend. So 6402 * disable RPM until RPS/RC6 is properly setup. We can only 6403 * get here via the driver load/system resume/runtime resume 6404 * paths, so the _noresume version is enough (and in case of 6405 * runtime resume it's necessary). 6406 */ 6407 if (schedule_delayed_work(&dev_priv->rps.delayed_resume_work, 6408 round_jiffies_up_relative(HZ))) 6409 intel_runtime_pm_get_noresume(dev_priv); 6410 } 6411 } 6412 6413 void intel_reset_gt_powersave(struct drm_device *dev) 6414 { 6415 struct drm_i915_private *dev_priv = dev->dev_private; 6416 6417 if (INTEL_INFO(dev)->gen < 6) 6418 return; 6419 6420 gen6_suspend_rps(dev); 6421 dev_priv->rps.enabled = false; 6422 } 6423 6424 static void ibx_init_clock_gating(struct drm_device *dev) 6425 { 6426 struct drm_i915_private *dev_priv = dev->dev_private; 6427 6428 /* 6429 * On Ibex Peak and Cougar Point, we need to disable clock 6430 * gating for the panel power sequencer or it will fail to 6431 * start up when no ports are active. 6432 */ 6433 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE); 6434 } 6435 6436 static void g4x_disable_trickle_feed(struct drm_device *dev) 6437 { 6438 struct drm_i915_private *dev_priv = dev->dev_private; 6439 enum i915_pipe pipe; 6440 6441 for_each_pipe(dev_priv, pipe) { 6442 I915_WRITE(DSPCNTR(pipe), 6443 I915_READ(DSPCNTR(pipe)) | 6444 DISPPLANE_TRICKLE_FEED_DISABLE); 6445 6446 I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe))); 6447 POSTING_READ(DSPSURF(pipe)); 6448 } 6449 } 6450 6451 static void ilk_init_lp_watermarks(struct drm_device *dev) 6452 { 6453 struct drm_i915_private *dev_priv = dev->dev_private; 6454 6455 I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN); 6456 I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN); 6457 I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN); 6458 6459 /* 6460 * Don't touch WM1S_LP_EN here. 6461 * Doing so could cause underruns. 6462 */ 6463 } 6464 6465 static void ironlake_init_clock_gating(struct drm_device *dev) 6466 { 6467 struct drm_i915_private *dev_priv = dev->dev_private; 6468 uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; 6469 6470 /* 6471 * Required for FBC 6472 * WaFbcDisableDpfcClockGating:ilk 6473 */ 6474 dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE | 6475 ILK_DPFCUNIT_CLOCK_GATE_DISABLE | 6476 ILK_DPFDUNIT_CLOCK_GATE_ENABLE; 6477 6478 I915_WRITE(PCH_3DCGDIS0, 6479 MARIUNIT_CLOCK_GATE_DISABLE | 6480 SVSMUNIT_CLOCK_GATE_DISABLE); 6481 I915_WRITE(PCH_3DCGDIS1, 6482 VFMUNIT_CLOCK_GATE_DISABLE); 6483 6484 /* 6485 * According to the spec the following bits should be set in 6486 * order to enable memory self-refresh 6487 * The bit 22/21 of 0x42004 6488 * The bit 5 of 0x42020 6489 * The bit 15 of 0x45000 6490 */ 6491 I915_WRITE(ILK_DISPLAY_CHICKEN2, 6492 (I915_READ(ILK_DISPLAY_CHICKEN2) | 6493 ILK_DPARB_GATE | ILK_VSDPFD_FULL)); 6494 dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE; 6495 I915_WRITE(DISP_ARB_CTL, 6496 (I915_READ(DISP_ARB_CTL) | 6497 DISP_FBC_WM_DIS)); 6498 6499 ilk_init_lp_watermarks(dev); 6500 6501 /* 6502 * Based on the document from hardware guys the following bits 6503 * should be set unconditionally in order to enable FBC. 6504 * The bit 22 of 0x42000 6505 * The bit 22 of 0x42004 6506 * The bit 7,8,9 of 0x42020. 6507 */ 6508 if (IS_IRONLAKE_M(dev)) { 6509 /* WaFbcAsynchFlipDisableFbcQueue:ilk */ 6510 I915_WRITE(ILK_DISPLAY_CHICKEN1, 6511 I915_READ(ILK_DISPLAY_CHICKEN1) | 6512 ILK_FBCQ_DIS); 6513 I915_WRITE(ILK_DISPLAY_CHICKEN2, 6514 I915_READ(ILK_DISPLAY_CHICKEN2) | 6515 ILK_DPARB_GATE); 6516 } 6517 6518 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); 6519 6520 I915_WRITE(ILK_DISPLAY_CHICKEN2, 6521 I915_READ(ILK_DISPLAY_CHICKEN2) | 6522 ILK_ELPIN_409_SELECT); 6523 I915_WRITE(_3D_CHICKEN2, 6524 _3D_CHICKEN2_WM_READ_PIPELINED << 16 | 6525 _3D_CHICKEN2_WM_READ_PIPELINED); 6526 6527 /* WaDisableRenderCachePipelinedFlush:ilk */ 6528 I915_WRITE(CACHE_MODE_0, 6529 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); 6530 6531 /* WaDisable_RenderCache_OperationalFlush:ilk */ 6532 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 6533 6534 g4x_disable_trickle_feed(dev); 6535 6536 ibx_init_clock_gating(dev); 6537 } 6538 6539 static void cpt_init_clock_gating(struct drm_device *dev) 6540 { 6541 struct drm_i915_private *dev_priv = dev->dev_private; 6542 int pipe; 6543 uint32_t val; 6544 6545 /* 6546 * On Ibex Peak and Cougar Point, we need to disable clock 6547 * gating for the panel power sequencer or it will fail to 6548 * start up when no ports are active. 6549 */ 6550 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE | 6551 PCH_DPLUNIT_CLOCK_GATE_DISABLE | 6552 PCH_CPUNIT_CLOCK_GATE_DISABLE); 6553 I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) | 6554 DPLS_EDP_PPS_FIX_DIS); 6555 /* The below fixes the weird display corruption, a few pixels shifted 6556 * downward, on (only) LVDS of some HP laptops with IVY. 6557 */ 6558 for_each_pipe(dev_priv, pipe) { 6559 val = I915_READ(TRANS_CHICKEN2(pipe)); 6560 val |= TRANS_CHICKEN2_TIMING_OVERRIDE; 6561 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED; 6562 if (dev_priv->vbt.fdi_rx_polarity_inverted) 6563 val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED; 6564 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK; 6565 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER; 6566 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH; 6567 I915_WRITE(TRANS_CHICKEN2(pipe), val); 6568 } 6569 /* WADP0ClockGatingDisable */ 6570 for_each_pipe(dev_priv, pipe) { 6571 I915_WRITE(TRANS_CHICKEN1(pipe), 6572 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); 6573 } 6574 } 6575 6576 static void gen6_check_mch_setup(struct drm_device *dev) 6577 { 6578 struct drm_i915_private *dev_priv = dev->dev_private; 6579 uint32_t tmp; 6580 6581 tmp = I915_READ(MCH_SSKPD); 6582 if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL) 6583 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n", 6584 tmp); 6585 } 6586 6587 static void gen6_init_clock_gating(struct drm_device *dev) 6588 { 6589 struct drm_i915_private *dev_priv = dev->dev_private; 6590 uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; 6591 6592 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); 6593 6594 I915_WRITE(ILK_DISPLAY_CHICKEN2, 6595 I915_READ(ILK_DISPLAY_CHICKEN2) | 6596 ILK_ELPIN_409_SELECT); 6597 6598 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ 6599 I915_WRITE(_3D_CHICKEN, 6600 _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); 6601 6602 /* WaDisable_RenderCache_OperationalFlush:snb */ 6603 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 6604 6605 /* 6606 * BSpec recoomends 8x4 when MSAA is used, 6607 * however in practice 16x4 seems fastest. 6608 * 6609 * Note that PS/WM thread counts depend on the WIZ hashing 6610 * disable bit, which we don't touch here, but it's good 6611 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 6612 */ 6613 I915_WRITE(GEN6_GT_MODE, 6614 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 6615 6616 ilk_init_lp_watermarks(dev); 6617 6618 I915_WRITE(CACHE_MODE_0, 6619 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); 6620 6621 I915_WRITE(GEN6_UCGCTL1, 6622 I915_READ(GEN6_UCGCTL1) | 6623 GEN6_BLBUNIT_CLOCK_GATE_DISABLE | 6624 GEN6_CSUNIT_CLOCK_GATE_DISABLE); 6625 6626 /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock 6627 * gating disable must be set. Failure to set it results in 6628 * flickering pixels due to Z write ordering failures after 6629 * some amount of runtime in the Mesa "fire" demo, and Unigine 6630 * Sanctuary and Tropics, and apparently anything else with 6631 * alpha test or pixel discard. 6632 * 6633 * According to the spec, bit 11 (RCCUNIT) must also be set, 6634 * but we didn't debug actual testcases to find it out. 6635 * 6636 * WaDisableRCCUnitClockGating:snb 6637 * WaDisableRCPBUnitClockGating:snb 6638 */ 6639 I915_WRITE(GEN6_UCGCTL2, 6640 GEN6_RCPBUNIT_CLOCK_GATE_DISABLE | 6641 GEN6_RCCUNIT_CLOCK_GATE_DISABLE); 6642 6643 /* WaStripsFansDisableFastClipPerformanceFix:snb */ 6644 I915_WRITE(_3D_CHICKEN3, 6645 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL)); 6646 6647 /* 6648 * Bspec says: 6649 * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and 6650 * 3DSTATE_SF number of SF output attributes is more than 16." 6651 */ 6652 I915_WRITE(_3D_CHICKEN3, 6653 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH)); 6654 6655 /* 6656 * According to the spec the following bits should be 6657 * set in order to enable memory self-refresh and fbc: 6658 * The bit21 and bit22 of 0x42000 6659 * The bit21 and bit22 of 0x42004 6660 * The bit5 and bit7 of 0x42020 6661 * The bit14 of 0x70180 6662 * The bit14 of 0x71180 6663 * 6664 * WaFbcAsynchFlipDisableFbcQueue:snb 6665 */ 6666 I915_WRITE(ILK_DISPLAY_CHICKEN1, 6667 I915_READ(ILK_DISPLAY_CHICKEN1) | 6668 ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS); 6669 I915_WRITE(ILK_DISPLAY_CHICKEN2, 6670 I915_READ(ILK_DISPLAY_CHICKEN2) | 6671 ILK_DPARB_GATE | ILK_VSDPFD_FULL); 6672 I915_WRITE(ILK_DSPCLK_GATE_D, 6673 I915_READ(ILK_DSPCLK_GATE_D) | 6674 ILK_DPARBUNIT_CLOCK_GATE_ENABLE | 6675 ILK_DPFDUNIT_CLOCK_GATE_ENABLE); 6676 6677 g4x_disable_trickle_feed(dev); 6678 6679 cpt_init_clock_gating(dev); 6680 6681 gen6_check_mch_setup(dev); 6682 } 6683 6684 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) 6685 { 6686 uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE); 6687 6688 /* 6689 * WaVSThreadDispatchOverride:ivb,vlv 6690 * 6691 * This actually overrides the dispatch 6692 * mode for all thread types. 6693 */ 6694 reg &= ~GEN7_FF_SCHED_MASK; 6695 reg |= GEN7_FF_TS_SCHED_HW; 6696 reg |= GEN7_FF_VS_SCHED_HW; 6697 reg |= GEN7_FF_DS_SCHED_HW; 6698 6699 I915_WRITE(GEN7_FF_THREAD_MODE, reg); 6700 } 6701 6702 static void lpt_init_clock_gating(struct drm_device *dev) 6703 { 6704 struct drm_i915_private *dev_priv = dev->dev_private; 6705 6706 /* 6707 * TODO: this bit should only be enabled when really needed, then 6708 * disabled when not needed anymore in order to save power. 6709 */ 6710 if (HAS_PCH_LPT_LP(dev)) 6711 I915_WRITE(SOUTH_DSPCLK_GATE_D, 6712 I915_READ(SOUTH_DSPCLK_GATE_D) | 6713 PCH_LP_PARTITION_LEVEL_DISABLE); 6714 6715 /* WADPOClockGatingDisable:hsw */ 6716 I915_WRITE(TRANS_CHICKEN1(PIPE_A), 6717 I915_READ(TRANS_CHICKEN1(PIPE_A)) | 6718 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); 6719 } 6720 6721 static void lpt_suspend_hw(struct drm_device *dev) 6722 { 6723 struct drm_i915_private *dev_priv = dev->dev_private; 6724 6725 if (HAS_PCH_LPT_LP(dev)) { 6726 uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D); 6727 6728 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE; 6729 I915_WRITE(SOUTH_DSPCLK_GATE_D, val); 6730 } 6731 } 6732 6733 static void kabylake_init_clock_gating(struct drm_device *dev) 6734 { 6735 struct drm_i915_private *dev_priv = dev->dev_private; 6736 6737 gen9_init_clock_gating(dev); 6738 6739 /* WaDisableSDEUnitClockGating:kbl */ 6740 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) 6741 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 6742 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 6743 6744 /* WaDisableGamClockGating:kbl */ 6745 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) 6746 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | 6747 GEN6_GAMUNIT_CLOCK_GATE_DISABLE); 6748 6749 /* WaFbcNukeOnHostModify:kbl */ 6750 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) | 6751 ILK_DPFC_NUKE_ON_ANY_MODIFICATION); 6752 } 6753 6754 static void skylake_init_clock_gating(struct drm_device *dev) 6755 { 6756 struct drm_i915_private *dev_priv = dev->dev_private; 6757 6758 gen9_init_clock_gating(dev); 6759 6760 /* WaFbcNukeOnHostModify:skl */ 6761 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) | 6762 ILK_DPFC_NUKE_ON_ANY_MODIFICATION); 6763 } 6764 6765 static void broadwell_init_clock_gating(struct drm_device *dev) 6766 { 6767 struct drm_i915_private *dev_priv = dev->dev_private; 6768 enum i915_pipe pipe; 6769 uint32_t misccpctl; 6770 6771 ilk_init_lp_watermarks(dev); 6772 6773 /* WaSwitchSolVfFArbitrationPriority:bdw */ 6774 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); 6775 6776 /* WaPsrDPAMaskVBlankInSRD:bdw */ 6777 I915_WRITE(CHICKEN_PAR1_1, 6778 I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD); 6779 6780 /* WaPsrDPRSUnmaskVBlankInSRD:bdw */ 6781 for_each_pipe(dev_priv, pipe) { 6782 I915_WRITE(CHICKEN_PIPESL_1(pipe), 6783 I915_READ(CHICKEN_PIPESL_1(pipe)) | 6784 BDW_DPRS_MASK_VBLANK_SRD); 6785 } 6786 6787 /* WaVSRefCountFullforceMissDisable:bdw */ 6788 /* WaDSRefCountFullforceMissDisable:bdw */ 6789 I915_WRITE(GEN7_FF_THREAD_MODE, 6790 I915_READ(GEN7_FF_THREAD_MODE) & 6791 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); 6792 6793 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, 6794 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); 6795 6796 /* WaDisableSDEUnitClockGating:bdw */ 6797 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 6798 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 6799 6800 /* 6801 * WaProgramL3SqcReg1Default:bdw 6802 * WaTempDisableDOPClkGating:bdw 6803 */ 6804 misccpctl = I915_READ(GEN7_MISCCPCTL); 6805 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); 6806 I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT); 6807 /* 6808 * Wait at least 100 clocks before re-enabling clock gating. See 6809 * the definition of L3SQCREG1 in BSpec. 6810 */ 6811 POSTING_READ(GEN8_L3SQCREG1); 6812 udelay(1); 6813 I915_WRITE(GEN7_MISCCPCTL, misccpctl); 6814 6815 /* 6816 * WaGttCachingOffByDefault:bdw 6817 * GTT cache may not work with big pages, so if those 6818 * are ever enabled GTT cache may need to be disabled. 6819 */ 6820 I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL); 6821 6822 lpt_init_clock_gating(dev); 6823 } 6824 6825 static void haswell_init_clock_gating(struct drm_device *dev) 6826 { 6827 struct drm_i915_private *dev_priv = dev->dev_private; 6828 6829 ilk_init_lp_watermarks(dev); 6830 6831 /* L3 caching of data atomics doesn't work -- disable it. */ 6832 I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); 6833 I915_WRITE(HSW_ROW_CHICKEN3, 6834 _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE)); 6835 6836 /* This is required by WaCatErrorRejectionIssue:hsw */ 6837 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, 6838 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | 6839 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); 6840 6841 /* WaVSRefCountFullforceMissDisable:hsw */ 6842 I915_WRITE(GEN7_FF_THREAD_MODE, 6843 I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME); 6844 6845 /* WaDisable_RenderCache_OperationalFlush:hsw */ 6846 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 6847 6848 /* enable HiZ Raw Stall Optimization */ 6849 I915_WRITE(CACHE_MODE_0_GEN7, 6850 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); 6851 6852 /* WaDisable4x2SubspanOptimization:hsw */ 6853 I915_WRITE(CACHE_MODE_1, 6854 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); 6855 6856 /* 6857 * BSpec recommends 8x4 when MSAA is used, 6858 * however in practice 16x4 seems fastest. 6859 * 6860 * Note that PS/WM thread counts depend on the WIZ hashing 6861 * disable bit, which we don't touch here, but it's good 6862 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 6863 */ 6864 I915_WRITE(GEN7_GT_MODE, 6865 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 6866 6867 /* WaSampleCChickenBitEnable:hsw */ 6868 I915_WRITE(HALF_SLICE_CHICKEN3, 6869 _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE)); 6870 6871 /* WaSwitchSolVfFArbitrationPriority:hsw */ 6872 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); 6873 6874 /* WaRsPkgCStateDisplayPMReq:hsw */ 6875 I915_WRITE(CHICKEN_PAR1_1, 6876 I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES); 6877 6878 lpt_init_clock_gating(dev); 6879 } 6880 6881 static void ivybridge_init_clock_gating(struct drm_device *dev) 6882 { 6883 struct drm_i915_private *dev_priv = dev->dev_private; 6884 uint32_t snpcr; 6885 6886 ilk_init_lp_watermarks(dev); 6887 6888 I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); 6889 6890 /* WaDisableEarlyCull:ivb */ 6891 I915_WRITE(_3D_CHICKEN3, 6892 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); 6893 6894 /* WaDisableBackToBackFlipFix:ivb */ 6895 I915_WRITE(IVB_CHICKEN3, 6896 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | 6897 CHICKEN3_DGMG_DONE_FIX_DISABLE); 6898 6899 /* WaDisablePSDDualDispatchEnable:ivb */ 6900 if (IS_IVB_GT1(dev)) 6901 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, 6902 _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); 6903 6904 /* WaDisable_RenderCache_OperationalFlush:ivb */ 6905 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 6906 6907 /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ 6908 I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1, 6909 GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); 6910 6911 /* WaApplyL3ControlAndL3ChickenMode:ivb */ 6912 I915_WRITE(GEN7_L3CNTLREG1, 6913 GEN7_WA_FOR_GEN7_L3_CONTROL); 6914 I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, 6915 GEN7_WA_L3_CHICKEN_MODE); 6916 if (IS_IVB_GT1(dev)) 6917 I915_WRITE(GEN7_ROW_CHICKEN2, 6918 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 6919 else { 6920 /* must write both registers */ 6921 I915_WRITE(GEN7_ROW_CHICKEN2, 6922 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 6923 I915_WRITE(GEN7_ROW_CHICKEN2_GT2, 6924 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 6925 } 6926 6927 /* WaForceL3Serialization:ivb */ 6928 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & 6929 ~L3SQ_URB_READ_CAM_MATCH_DISABLE); 6930 6931 /* 6932 * According to the spec, bit 13 (RCZUNIT) must be set on IVB. 6933 * This implements the WaDisableRCZUnitClockGating:ivb workaround. 6934 */ 6935 I915_WRITE(GEN6_UCGCTL2, 6936 GEN6_RCZUNIT_CLOCK_GATE_DISABLE); 6937 6938 /* This is required by WaCatErrorRejectionIssue:ivb */ 6939 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, 6940 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | 6941 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); 6942 6943 g4x_disable_trickle_feed(dev); 6944 6945 gen7_setup_fixed_func_scheduler(dev_priv); 6946 6947 if (0) { /* causes HiZ corruption on ivb:gt1 */ 6948 /* enable HiZ Raw Stall Optimization */ 6949 I915_WRITE(CACHE_MODE_0_GEN7, 6950 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); 6951 } 6952 6953 /* WaDisable4x2SubspanOptimization:ivb */ 6954 I915_WRITE(CACHE_MODE_1, 6955 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); 6956 6957 /* 6958 * BSpec recommends 8x4 when MSAA is used, 6959 * however in practice 16x4 seems fastest. 6960 * 6961 * Note that PS/WM thread counts depend on the WIZ hashing 6962 * disable bit, which we don't touch here, but it's good 6963 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 6964 */ 6965 I915_WRITE(GEN7_GT_MODE, 6966 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 6967 6968 snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); 6969 snpcr &= ~GEN6_MBC_SNPCR_MASK; 6970 snpcr |= GEN6_MBC_SNPCR_MED; 6971 I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr); 6972 6973 if (!HAS_PCH_NOP(dev)) 6974 cpt_init_clock_gating(dev); 6975 6976 gen6_check_mch_setup(dev); 6977 } 6978 6979 static void valleyview_init_clock_gating(struct drm_device *dev) 6980 { 6981 struct drm_i915_private *dev_priv = dev->dev_private; 6982 6983 /* WaDisableEarlyCull:vlv */ 6984 I915_WRITE(_3D_CHICKEN3, 6985 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); 6986 6987 /* WaDisableBackToBackFlipFix:vlv */ 6988 I915_WRITE(IVB_CHICKEN3, 6989 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | 6990 CHICKEN3_DGMG_DONE_FIX_DISABLE); 6991 6992 /* WaPsdDispatchEnable:vlv */ 6993 /* WaDisablePSDDualDispatchEnable:vlv */ 6994 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, 6995 _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP | 6996 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); 6997 6998 /* WaDisable_RenderCache_OperationalFlush:vlv */ 6999 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7000 7001 /* WaForceL3Serialization:vlv */ 7002 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & 7003 ~L3SQ_URB_READ_CAM_MATCH_DISABLE); 7004 7005 /* WaDisableDopClockGating:vlv */ 7006 I915_WRITE(GEN7_ROW_CHICKEN2, 7007 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 7008 7009 /* This is required by WaCatErrorRejectionIssue:vlv */ 7010 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, 7011 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | 7012 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); 7013 7014 gen7_setup_fixed_func_scheduler(dev_priv); 7015 7016 /* 7017 * According to the spec, bit 13 (RCZUNIT) must be set on IVB. 7018 * This implements the WaDisableRCZUnitClockGating:vlv workaround. 7019 */ 7020 I915_WRITE(GEN6_UCGCTL2, 7021 GEN6_RCZUNIT_CLOCK_GATE_DISABLE); 7022 7023 /* WaDisableL3Bank2xClockGate:vlv 7024 * Disabling L3 clock gating- MMIO 940c[25] = 1 7025 * Set bit 25, to disable L3_BANK_2x_CLK_GATING */ 7026 I915_WRITE(GEN7_UCGCTL4, 7027 I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE); 7028 7029 /* 7030 * BSpec says this must be set, even though 7031 * WaDisable4x2SubspanOptimization isn't listed for VLV. 7032 */ 7033 I915_WRITE(CACHE_MODE_1, 7034 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); 7035 7036 /* 7037 * BSpec recommends 8x4 when MSAA is used, 7038 * however in practice 16x4 seems fastest. 7039 * 7040 * Note that PS/WM thread counts depend on the WIZ hashing 7041 * disable bit, which we don't touch here, but it's good 7042 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 7043 */ 7044 I915_WRITE(GEN7_GT_MODE, 7045 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 7046 7047 /* 7048 * WaIncreaseL3CreditsForVLVB0:vlv 7049 * This is the hardware default actually. 7050 */ 7051 I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); 7052 7053 /* 7054 * WaDisableVLVClockGating_VBIIssue:vlv 7055 * Disable clock gating on th GCFG unit to prevent a delay 7056 * in the reporting of vblank events. 7057 */ 7058 I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS); 7059 } 7060 7061 static void cherryview_init_clock_gating(struct drm_device *dev) 7062 { 7063 struct drm_i915_private *dev_priv = dev->dev_private; 7064 7065 /* WaVSRefCountFullforceMissDisable:chv */ 7066 /* WaDSRefCountFullforceMissDisable:chv */ 7067 I915_WRITE(GEN7_FF_THREAD_MODE, 7068 I915_READ(GEN7_FF_THREAD_MODE) & 7069 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); 7070 7071 /* WaDisableSemaphoreAndSyncFlipWait:chv */ 7072 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, 7073 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); 7074 7075 /* WaDisableCSUnitClockGating:chv */ 7076 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | 7077 GEN6_CSUNIT_CLOCK_GATE_DISABLE); 7078 7079 /* WaDisableSDEUnitClockGating:chv */ 7080 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 7081 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 7082 7083 /* 7084 * GTT cache may not work with big pages, so if those 7085 * are ever enabled GTT cache may need to be disabled. 7086 */ 7087 I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL); 7088 } 7089 7090 static void g4x_init_clock_gating(struct drm_device *dev) 7091 { 7092 struct drm_i915_private *dev_priv = dev->dev_private; 7093 uint32_t dspclk_gate; 7094 7095 I915_WRITE(RENCLK_GATE_D1, 0); 7096 I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE | 7097 GS_UNIT_CLOCK_GATE_DISABLE | 7098 CL_UNIT_CLOCK_GATE_DISABLE); 7099 I915_WRITE(RAMCLK_GATE_D, 0); 7100 dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE | 7101 OVRUNIT_CLOCK_GATE_DISABLE | 7102 OVCUNIT_CLOCK_GATE_DISABLE; 7103 if (IS_GM45(dev)) 7104 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE; 7105 I915_WRITE(DSPCLK_GATE_D, dspclk_gate); 7106 7107 /* WaDisableRenderCachePipelinedFlush */ 7108 I915_WRITE(CACHE_MODE_0, 7109 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); 7110 7111 /* WaDisable_RenderCache_OperationalFlush:g4x */ 7112 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7113 7114 g4x_disable_trickle_feed(dev); 7115 } 7116 7117 static void crestline_init_clock_gating(struct drm_device *dev) 7118 { 7119 struct drm_i915_private *dev_priv = dev->dev_private; 7120 7121 I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE); 7122 I915_WRITE(RENCLK_GATE_D2, 0); 7123 I915_WRITE(DSPCLK_GATE_D, 0); 7124 I915_WRITE(RAMCLK_GATE_D, 0); 7125 I915_WRITE16(DEUC, 0); 7126 I915_WRITE(MI_ARB_STATE, 7127 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); 7128 7129 /* WaDisable_RenderCache_OperationalFlush:gen4 */ 7130 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7131 } 7132 7133 static void broadwater_init_clock_gating(struct drm_device *dev) 7134 { 7135 struct drm_i915_private *dev_priv = dev->dev_private; 7136 7137 I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE | 7138 I965_RCC_CLOCK_GATE_DISABLE | 7139 I965_RCPB_CLOCK_GATE_DISABLE | 7140 I965_ISC_CLOCK_GATE_DISABLE | 7141 I965_FBC_CLOCK_GATE_DISABLE); 7142 I915_WRITE(RENCLK_GATE_D2, 0); 7143 I915_WRITE(MI_ARB_STATE, 7144 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); 7145 7146 /* WaDisable_RenderCache_OperationalFlush:gen4 */ 7147 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7148 } 7149 7150 static void gen3_init_clock_gating(struct drm_device *dev) 7151 { 7152 struct drm_i915_private *dev_priv = dev->dev_private; 7153 u32 dstate = I915_READ(D_STATE); 7154 7155 dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING | 7156 DSTATE_DOT_CLOCK_GATING; 7157 I915_WRITE(D_STATE, dstate); 7158 7159 if (IS_PINEVIEW(dev)) 7160 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY)); 7161 7162 /* IIR "flip pending" means done if this bit is set */ 7163 I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE)); 7164 7165 /* interrupts should cause a wake up from C3 */ 7166 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN)); 7167 7168 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 7169 I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); 7170 7171 I915_WRITE(MI_ARB_STATE, 7172 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); 7173 } 7174 7175 static void i85x_init_clock_gating(struct drm_device *dev) 7176 { 7177 struct drm_i915_private *dev_priv = dev->dev_private; 7178 7179 I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE); 7180 7181 /* interrupts should cause a wake up from C3 */ 7182 I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) | 7183 _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE)); 7184 7185 I915_WRITE(MEM_MODE, 7186 _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE)); 7187 } 7188 7189 static void i830_init_clock_gating(struct drm_device *dev) 7190 { 7191 struct drm_i915_private *dev_priv = dev->dev_private; 7192 7193 I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE); 7194 7195 I915_WRITE(MEM_MODE, 7196 _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) | 7197 _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE)); 7198 } 7199 7200 void intel_init_clock_gating(struct drm_device *dev) 7201 { 7202 struct drm_i915_private *dev_priv = dev->dev_private; 7203 7204 dev_priv->display.init_clock_gating(dev); 7205 } 7206 7207 void intel_suspend_hw(struct drm_device *dev) 7208 { 7209 if (HAS_PCH_LPT(dev)) 7210 lpt_suspend_hw(dev); 7211 } 7212 7213 static void nop_init_clock_gating(struct drm_device *dev) 7214 { 7215 DRM_DEBUG_KMS("No clock gating settings or workarounds applied.\n"); 7216 } 7217 7218 /** 7219 * intel_init_clock_gating_hooks - setup the clock gating hooks 7220 * @dev_priv: device private 7221 * 7222 * Setup the hooks that configure which clocks of a given platform can be 7223 * gated and also apply various GT and display specific workarounds for these 7224 * platforms. Note that some GT specific workarounds are applied separately 7225 * when GPU contexts or batchbuffers start their execution. 7226 */ 7227 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) 7228 { 7229 if (IS_SKYLAKE(dev_priv)) 7230 dev_priv->display.init_clock_gating = skylake_init_clock_gating; 7231 else if (IS_KABYLAKE(dev_priv)) 7232 dev_priv->display.init_clock_gating = kabylake_init_clock_gating; 7233 else if (IS_BROXTON(dev_priv)) 7234 dev_priv->display.init_clock_gating = bxt_init_clock_gating; 7235 else if (IS_BROADWELL(dev_priv)) 7236 dev_priv->display.init_clock_gating = broadwell_init_clock_gating; 7237 else if (IS_CHERRYVIEW(dev_priv)) 7238 dev_priv->display.init_clock_gating = cherryview_init_clock_gating; 7239 else if (IS_HASWELL(dev_priv)) 7240 dev_priv->display.init_clock_gating = haswell_init_clock_gating; 7241 else if (IS_IVYBRIDGE(dev_priv)) 7242 dev_priv->display.init_clock_gating = ivybridge_init_clock_gating; 7243 else if (IS_VALLEYVIEW(dev_priv)) 7244 dev_priv->display.init_clock_gating = valleyview_init_clock_gating; 7245 else if (IS_GEN6(dev_priv)) 7246 dev_priv->display.init_clock_gating = gen6_init_clock_gating; 7247 else if (IS_GEN5(dev_priv)) 7248 dev_priv->display.init_clock_gating = ironlake_init_clock_gating; 7249 else if (IS_G4X(dev_priv)) 7250 dev_priv->display.init_clock_gating = g4x_init_clock_gating; 7251 else if (IS_CRESTLINE(dev_priv)) 7252 dev_priv->display.init_clock_gating = crestline_init_clock_gating; 7253 else if (IS_BROADWATER(dev_priv)) 7254 dev_priv->display.init_clock_gating = broadwater_init_clock_gating; 7255 else if (IS_GEN3(dev_priv)) 7256 dev_priv->display.init_clock_gating = gen3_init_clock_gating; 7257 else if (IS_I85X(dev_priv) || IS_I865G(dev_priv)) 7258 dev_priv->display.init_clock_gating = i85x_init_clock_gating; 7259 else if (IS_GEN2(dev_priv)) 7260 dev_priv->display.init_clock_gating = i830_init_clock_gating; 7261 else { 7262 MISSING_CASE(INTEL_DEVID(dev_priv)); 7263 dev_priv->display.init_clock_gating = nop_init_clock_gating; 7264 } 7265 } 7266 7267 /* Set up chip specific power management-related functions */ 7268 void intel_init_pm(struct drm_device *dev) 7269 { 7270 struct drm_i915_private *dev_priv = dev->dev_private; 7271 7272 intel_fbc_init(dev_priv); 7273 7274 /* For cxsr */ 7275 if (IS_PINEVIEW(dev)) 7276 i915_pineview_get_mem_freq(dev); 7277 else if (IS_GEN5(dev)) 7278 i915_ironlake_get_mem_freq(dev); 7279 7280 /* For FIFO watermark updates */ 7281 if (INTEL_INFO(dev)->gen >= 9) { 7282 skl_setup_wm_latency(dev); 7283 dev_priv->display.update_wm = skl_update_wm; 7284 } else if (HAS_PCH_SPLIT(dev)) { 7285 ilk_setup_wm_latency(dev); 7286 7287 if ((IS_GEN5(dev) && dev_priv->wm.pri_latency[1] && 7288 dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) || 7289 (!IS_GEN5(dev) && dev_priv->wm.pri_latency[0] && 7290 dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) { 7291 dev_priv->display.compute_pipe_wm = ilk_compute_pipe_wm; 7292 dev_priv->display.compute_intermediate_wm = 7293 ilk_compute_intermediate_wm; 7294 dev_priv->display.initial_watermarks = 7295 ilk_initial_watermarks; 7296 dev_priv->display.optimize_watermarks = 7297 ilk_optimize_watermarks; 7298 } else { 7299 DRM_DEBUG_KMS("Failed to read display plane latency. " 7300 "Disable CxSR\n"); 7301 } 7302 } else if (IS_CHERRYVIEW(dev)) { 7303 vlv_setup_wm_latency(dev); 7304 dev_priv->display.update_wm = vlv_update_wm; 7305 } else if (IS_VALLEYVIEW(dev)) { 7306 vlv_setup_wm_latency(dev); 7307 dev_priv->display.update_wm = vlv_update_wm; 7308 } else if (IS_PINEVIEW(dev)) { 7309 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev), 7310 dev_priv->is_ddr3, 7311 dev_priv->fsb_freq, 7312 dev_priv->mem_freq)) { 7313 DRM_INFO("failed to find known CxSR latency " 7314 "(found ddr%s fsb freq %d, mem freq %d), " 7315 "disabling CxSR\n", 7316 (dev_priv->is_ddr3 == 1) ? "3" : "2", 7317 dev_priv->fsb_freq, dev_priv->mem_freq); 7318 /* Disable CxSR and never update its watermark again */ 7319 intel_set_memory_cxsr(dev_priv, false); 7320 dev_priv->display.update_wm = NULL; 7321 } else 7322 dev_priv->display.update_wm = pineview_update_wm; 7323 } else if (IS_G4X(dev)) { 7324 dev_priv->display.update_wm = g4x_update_wm; 7325 } else if (IS_GEN4(dev)) { 7326 dev_priv->display.update_wm = i965_update_wm; 7327 } else if (IS_GEN3(dev)) { 7328 dev_priv->display.update_wm = i9xx_update_wm; 7329 dev_priv->display.get_fifo_size = i9xx_get_fifo_size; 7330 } else if (IS_GEN2(dev)) { 7331 if (INTEL_INFO(dev)->num_pipes == 1) { 7332 dev_priv->display.update_wm = i845_update_wm; 7333 dev_priv->display.get_fifo_size = i845_get_fifo_size; 7334 } else { 7335 dev_priv->display.update_wm = i9xx_update_wm; 7336 dev_priv->display.get_fifo_size = i830_get_fifo_size; 7337 } 7338 } else { 7339 DRM_ERROR("unexpected fall-through in intel_init_pm\n"); 7340 } 7341 } 7342 7343 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val) 7344 { 7345 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 7346 7347 if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { 7348 DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n"); 7349 return -EAGAIN; 7350 } 7351 7352 I915_WRITE(GEN6_PCODE_DATA, *val); 7353 I915_WRITE(GEN6_PCODE_DATA1, 0); 7354 I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); 7355 7356 if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, 7357 500)) { 7358 DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox); 7359 return -ETIMEDOUT; 7360 } 7361 7362 *val = I915_READ(GEN6_PCODE_DATA); 7363 I915_WRITE(GEN6_PCODE_DATA, 0); 7364 7365 return 0; 7366 } 7367 7368 int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val) 7369 { 7370 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 7371 7372 if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { 7373 DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n"); 7374 return -EAGAIN; 7375 } 7376 7377 I915_WRITE(GEN6_PCODE_DATA, val); 7378 I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); 7379 7380 if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, 7381 500)) { 7382 DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox); 7383 return -ETIMEDOUT; 7384 } 7385 7386 I915_WRITE(GEN6_PCODE_DATA, 0); 7387 7388 return 0; 7389 } 7390 7391 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val) 7392 { 7393 /* 7394 * N = val - 0xb7 7395 * Slow = Fast = GPLL ref * N 7396 */ 7397 return DIV_ROUND_CLOSEST(dev_priv->rps.gpll_ref_freq * (val - 0xb7), 1000); 7398 } 7399 7400 static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val) 7401 { 7402 return DIV_ROUND_CLOSEST(1000 * val, dev_priv->rps.gpll_ref_freq) + 0xb7; 7403 } 7404 7405 static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val) 7406 { 7407 /* 7408 * N = val / 2 7409 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 7410 */ 7411 return DIV_ROUND_CLOSEST(dev_priv->rps.gpll_ref_freq * val, 2 * 2 * 1000); 7412 } 7413 7414 static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val) 7415 { 7416 /* CHV needs even values */ 7417 return DIV_ROUND_CLOSEST(2 * 1000 * val, dev_priv->rps.gpll_ref_freq) * 2; 7418 } 7419 7420 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val) 7421 { 7422 if (IS_GEN9(dev_priv)) 7423 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, 7424 GEN9_FREQ_SCALER); 7425 else if (IS_CHERRYVIEW(dev_priv)) 7426 return chv_gpu_freq(dev_priv, val); 7427 else if (IS_VALLEYVIEW(dev_priv)) 7428 return byt_gpu_freq(dev_priv, val); 7429 else 7430 return val * GT_FREQUENCY_MULTIPLIER; 7431 } 7432 7433 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val) 7434 { 7435 if (IS_GEN9(dev_priv)) 7436 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, 7437 GT_FREQUENCY_MULTIPLIER); 7438 else if (IS_CHERRYVIEW(dev_priv)) 7439 return chv_freq_opcode(dev_priv, val); 7440 else if (IS_VALLEYVIEW(dev_priv)) 7441 return byt_freq_opcode(dev_priv, val); 7442 else 7443 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); 7444 } 7445 7446 struct request_boost { 7447 struct work_struct work; 7448 struct drm_i915_gem_request *req; 7449 }; 7450 7451 static void __intel_rps_boost_work(struct work_struct *work) 7452 { 7453 struct request_boost *boost = container_of(work, struct request_boost, work); 7454 struct drm_i915_gem_request *req = boost->req; 7455 7456 if (!i915_gem_request_completed(req, true)) 7457 gen6_rps_boost(to_i915(req->engine->dev), NULL, 7458 req->emitted_jiffies); 7459 7460 i915_gem_request_unreference__unlocked(req); 7461 kfree(boost); 7462 } 7463 7464 void intel_queue_rps_boost_for_request(struct drm_device *dev, 7465 struct drm_i915_gem_request *req) 7466 { 7467 struct request_boost *boost; 7468 7469 if (req == NULL || INTEL_INFO(dev)->gen < 6) 7470 return; 7471 7472 if (i915_gem_request_completed(req, true)) 7473 return; 7474 7475 boost = kmalloc(sizeof(*boost), M_DRM, M_NOWAIT); 7476 if (boost == NULL) 7477 return; 7478 7479 i915_gem_request_reference(req); 7480 boost->req = req; 7481 7482 INIT_WORK(&boost->work, __intel_rps_boost_work); 7483 queue_work(to_i915(dev)->wq, &boost->work); 7484 } 7485 7486 void intel_pm_setup(struct drm_device *dev) 7487 { 7488 struct drm_i915_private *dev_priv = dev->dev_private; 7489 7490 lockinit(&dev_priv->rps.hw_lock, "i915 rps.hw_lock", 0, LK_CANRECURSE); 7491 lockinit(&dev_priv->rps.client_lock, "i915rcl", 0, LK_CANRECURSE); 7492 7493 INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work, 7494 intel_gen6_powersave_work); 7495 INIT_LIST_HEAD(&dev_priv->rps.clients); 7496 INIT_LIST_HEAD(&dev_priv->rps.semaphores.link); 7497 INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link); 7498 7499 dev_priv->pm.suspended = false; 7500 atomic_set(&dev_priv->pm.wakeref_count, 0); 7501 atomic_set(&dev_priv->pm.atomic_seq, 0); 7502 } 7503