1 /* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include "i915_drv.h" 26 #include "intel_ringbuffer.h" 27 #include "intel_lrc.h" 28 29 static const struct engine_info { 30 const char *name; 31 unsigned int exec_id; 32 unsigned int hw_id; 33 u32 mmio_base; 34 unsigned irq_shift; 35 int (*init_legacy)(struct intel_engine_cs *engine); 36 int (*init_execlists)(struct intel_engine_cs *engine); 37 } intel_engines[] = { 38 [RCS] = { 39 .name = "rcs", 40 .hw_id = RCS_HW, 41 .exec_id = I915_EXEC_RENDER, 42 .mmio_base = RENDER_RING_BASE, 43 .irq_shift = GEN8_RCS_IRQ_SHIFT, 44 .init_execlists = logical_render_ring_init, 45 .init_legacy = intel_init_render_ring_buffer, 46 }, 47 [BCS] = { 48 .name = "bcs", 49 .hw_id = BCS_HW, 50 .exec_id = I915_EXEC_BLT, 51 .mmio_base = BLT_RING_BASE, 52 .irq_shift = GEN8_BCS_IRQ_SHIFT, 53 .init_execlists = logical_xcs_ring_init, 54 .init_legacy = intel_init_blt_ring_buffer, 55 }, 56 [VCS] = { 57 .name = "vcs", 58 .hw_id = VCS_HW, 59 .exec_id = I915_EXEC_BSD, 60 .mmio_base = GEN6_BSD_RING_BASE, 61 .irq_shift = GEN8_VCS1_IRQ_SHIFT, 62 .init_execlists = logical_xcs_ring_init, 63 .init_legacy = intel_init_bsd_ring_buffer, 64 }, 65 [VCS2] = { 66 .name = "vcs2", 67 .hw_id = VCS2_HW, 68 .exec_id = I915_EXEC_BSD, 69 .mmio_base = GEN8_BSD2_RING_BASE, 70 .irq_shift = GEN8_VCS2_IRQ_SHIFT, 71 .init_execlists = logical_xcs_ring_init, 72 .init_legacy = intel_init_bsd2_ring_buffer, 73 }, 74 [VECS] = { 75 .name = "vecs", 76 .hw_id = VECS_HW, 77 .exec_id = I915_EXEC_VEBOX, 78 .mmio_base = VEBOX_RING_BASE, 79 .irq_shift = GEN8_VECS_IRQ_SHIFT, 80 .init_execlists = logical_xcs_ring_init, 81 .init_legacy = intel_init_vebox_ring_buffer, 82 }, 83 }; 84 85 static int 86 intel_engine_setup(struct drm_i915_private *dev_priv, 87 enum intel_engine_id id) 88 { 89 const struct engine_info *info = &intel_engines[id]; 90 struct intel_engine_cs *engine; 91 92 GEM_BUG_ON(dev_priv->engine[id]); 93 engine = kzalloc(sizeof(*engine), GFP_KERNEL); 94 if (!engine) 95 return -ENOMEM; 96 97 engine->id = id; 98 engine->i915 = dev_priv; 99 engine->name = info->name; 100 engine->exec_id = info->exec_id; 101 engine->hw_id = engine->guc_id = info->hw_id; 102 engine->mmio_base = info->mmio_base; 103 engine->irq_shift = info->irq_shift; 104 105 /* Nothing to do here, execute in order of dependencies */ 106 engine->schedule = NULL; 107 108 ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); 109 110 dev_priv->engine[id] = engine; 111 return 0; 112 } 113 114 /** 115 * intel_engines_init_early() - allocate the Engine Command Streamers 116 * @dev_priv: i915 device private 117 * 118 * Return: non-zero if the initialization failed. 119 */ 120 int intel_engines_init_early(struct drm_i915_private *dev_priv) 121 { 122 struct intel_device_info *device_info = mkwrite_device_info(dev_priv); 123 unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask; 124 unsigned int mask = 0; 125 struct intel_engine_cs *engine; 126 enum intel_engine_id id; 127 unsigned int i; 128 int err; 129 130 WARN_ON(ring_mask == 0); 131 WARN_ON(ring_mask & 132 GENMASK(sizeof(mask) * BITS_PER_BYTE - 1, I915_NUM_ENGINES)); 133 134 for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { 135 if (!HAS_ENGINE(dev_priv, i)) 136 continue; 137 138 err = intel_engine_setup(dev_priv, i); 139 if (err) 140 goto cleanup; 141 142 mask |= ENGINE_MASK(i); 143 } 144 145 /* 146 * Catch failures to update intel_engines table when the new engines 147 * are added to the driver by a warning and disabling the forgotten 148 * engines. 149 */ 150 if (WARN_ON(mask != ring_mask)) 151 device_info->ring_mask = mask; 152 153 device_info->num_rings = hweight32(mask); 154 155 return 0; 156 157 cleanup: 158 for_each_engine(engine, dev_priv, id) 159 kfree(engine); 160 return err; 161 } 162 163 /** 164 * intel_engines_init() - allocate, populate and init the Engine Command Streamers 165 * @dev_priv: i915 device private 166 * 167 * Return: non-zero if the initialization failed. 168 */ 169 int intel_engines_init(struct drm_i915_private *dev_priv) 170 { 171 struct intel_device_info *device_info = mkwrite_device_info(dev_priv); 172 struct intel_engine_cs *engine; 173 enum intel_engine_id id, err_id; 174 unsigned int mask = 0; 175 int err = 0; 176 177 for_each_engine(engine, dev_priv, id) { 178 int (*init)(struct intel_engine_cs *engine); 179 180 if (i915.enable_execlists) 181 init = intel_engines[id].init_execlists; 182 else 183 init = intel_engines[id].init_legacy; 184 if (!init) { 185 kfree(engine); 186 dev_priv->engine[id] = NULL; 187 continue; 188 } 189 190 err = init(engine); 191 if (err) { 192 err_id = id; 193 goto cleanup; 194 } 195 196 GEM_BUG_ON(!engine->submit_request); 197 mask |= ENGINE_MASK(id); 198 } 199 200 /* 201 * Catch failures to update intel_engines table when the new engines 202 * are added to the driver by a warning and disabling the forgotten 203 * engines. 204 */ 205 if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask)) 206 device_info->ring_mask = mask; 207 208 device_info->num_rings = hweight32(mask); 209 210 return 0; 211 212 cleanup: 213 for_each_engine(engine, dev_priv, id) { 214 if (id >= err_id) 215 kfree(engine); 216 else 217 dev_priv->gt.cleanup_engine(engine); 218 } 219 return err; 220 } 221 222 void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) 223 { 224 struct drm_i915_private *dev_priv = engine->i915; 225 226 /* Our semaphore implementation is strictly monotonic (i.e. we proceed 227 * so long as the semaphore value in the register/page is greater 228 * than the sync value), so whenever we reset the seqno, 229 * so long as we reset the tracking semaphore value to 0, it will 230 * always be before the next request's seqno. If we don't reset 231 * the semaphore value, then when the seqno moves backwards all 232 * future waits will complete instantly (causing rendering corruption). 233 */ 234 if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) { 235 I915_WRITE(RING_SYNC_0(engine->mmio_base), 0); 236 I915_WRITE(RING_SYNC_1(engine->mmio_base), 0); 237 if (HAS_VEBOX(dev_priv)) 238 I915_WRITE(RING_SYNC_2(engine->mmio_base), 0); 239 } 240 if (dev_priv->semaphore) { 241 struct page *page = i915_vma_first_page(dev_priv->semaphore); 242 void *semaphores; 243 244 /* Semaphores are in noncoherent memory, flush to be safe */ 245 semaphores = kmap_atomic(page); 246 memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), 247 0, I915_NUM_ENGINES * gen8_semaphore_seqno_size); 248 drm_clflush_virt_range(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), 249 I915_NUM_ENGINES * gen8_semaphore_seqno_size); 250 kunmap_atomic(semaphores); 251 } 252 253 intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); 254 clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); 255 256 GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request)); 257 engine->hangcheck.seqno = seqno; 258 259 /* After manually advancing the seqno, fake the interrupt in case 260 * there are any waiters for that seqno. 261 */ 262 intel_engine_wakeup(engine); 263 } 264 265 static void intel_engine_init_timeline(struct intel_engine_cs *engine) 266 { 267 engine->timeline = &engine->i915->gt.global_timeline.engine[engine->id]; 268 } 269 270 /** 271 * intel_engines_setup_common - setup engine state not requiring hw access 272 * @engine: Engine to setup. 273 * 274 * Initializes @engine@ structure members shared between legacy and execlists 275 * submission modes which do not require hardware access. 276 * 277 * Typically done early in the submission mode specific engine setup stage. 278 */ 279 void intel_engine_setup_common(struct intel_engine_cs *engine) 280 { 281 engine->execlist_queue = LINUX_RB_ROOT; 282 engine->execlist_first = NULL; 283 284 intel_engine_init_timeline(engine); 285 intel_engine_init_hangcheck(engine); 286 i915_gem_batch_pool_init(engine, &engine->batch_pool); 287 288 intel_engine_init_cmd_parser(engine); 289 } 290 291 int intel_engine_create_scratch(struct intel_engine_cs *engine, int size) 292 { 293 struct drm_i915_gem_object *obj; 294 struct i915_vma *vma; 295 int ret; 296 297 WARN_ON(engine->scratch); 298 299 obj = i915_gem_object_create_stolen(engine->i915, size); 300 if (!obj) 301 obj = i915_gem_object_create_internal(engine->i915, size); 302 if (IS_ERR(obj)) { 303 DRM_ERROR("Failed to allocate scratch page\n"); 304 return PTR_ERR(obj); 305 } 306 307 vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); 308 if (IS_ERR(vma)) { 309 ret = PTR_ERR(vma); 310 goto err_unref; 311 } 312 313 ret = i915_vma_pin(vma, 0, 4096, PIN_GLOBAL | PIN_HIGH); 314 if (ret) 315 goto err_unref; 316 317 engine->scratch = vma; 318 DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n", 319 engine->name, i915_ggtt_offset(vma)); 320 return 0; 321 322 err_unref: 323 i915_gem_object_put(obj); 324 return ret; 325 } 326 327 static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine) 328 { 329 i915_vma_unpin_and_release(&engine->scratch); 330 } 331 332 /** 333 * intel_engines_init_common - initialize cengine state which might require hw access 334 * @engine: Engine to initialize. 335 * 336 * Initializes @engine@ structure members shared between legacy and execlists 337 * submission modes which do require hardware access. 338 * 339 * Typcally done at later stages of submission mode specific engine setup. 340 * 341 * Returns zero on success or an error code on failure. 342 */ 343 int intel_engine_init_common(struct intel_engine_cs *engine) 344 { 345 int ret; 346 347 engine->set_default_submission(engine); 348 349 /* We may need to do things with the shrinker which 350 * require us to immediately switch back to the default 351 * context. This can cause a problem as pinning the 352 * default context also requires GTT space which may not 353 * be available. To avoid this we always pin the default 354 * context. 355 */ 356 ret = engine->context_pin(engine, engine->i915->kernel_context); 357 if (ret) 358 return ret; 359 360 ret = intel_engine_init_breadcrumbs(engine); 361 if (ret) 362 goto err_unpin; 363 364 ret = i915_gem_render_state_init(engine); 365 if (ret) 366 goto err_unpin; 367 368 return 0; 369 370 err_unpin: 371 engine->context_unpin(engine, engine->i915->kernel_context); 372 return ret; 373 } 374 375 /** 376 * intel_engines_cleanup_common - cleans up the engine state created by 377 * the common initiailizers. 378 * @engine: Engine to cleanup. 379 * 380 * This cleans up everything created by the common helpers. 381 */ 382 void intel_engine_cleanup_common(struct intel_engine_cs *engine) 383 { 384 intel_engine_cleanup_scratch(engine); 385 386 i915_gem_render_state_fini(engine); 387 intel_engine_fini_breadcrumbs(engine); 388 intel_engine_cleanup_cmd_parser(engine); 389 i915_gem_batch_pool_fini(&engine->batch_pool); 390 391 engine->context_unpin(engine, engine->i915->kernel_context); 392 } 393 394 u64 intel_engine_get_active_head(struct intel_engine_cs *engine) 395 { 396 struct drm_i915_private *dev_priv = engine->i915; 397 u64 acthd; 398 399 if (INTEL_GEN(dev_priv) >= 8) 400 acthd = I915_READ64_2x32(RING_ACTHD(engine->mmio_base), 401 RING_ACTHD_UDW(engine->mmio_base)); 402 else if (INTEL_GEN(dev_priv) >= 4) 403 acthd = I915_READ(RING_ACTHD(engine->mmio_base)); 404 else 405 acthd = I915_READ(ACTHD); 406 407 return acthd; 408 } 409 410 u64 intel_engine_get_last_batch_head(struct intel_engine_cs *engine) 411 { 412 struct drm_i915_private *dev_priv = engine->i915; 413 u64 bbaddr; 414 415 if (INTEL_GEN(dev_priv) >= 8) 416 bbaddr = I915_READ64_2x32(RING_BBADDR(engine->mmio_base), 417 RING_BBADDR_UDW(engine->mmio_base)); 418 else 419 bbaddr = I915_READ(RING_BBADDR(engine->mmio_base)); 420 421 return bbaddr; 422 } 423 424 const char *i915_cache_level_str(struct drm_i915_private *i915, int type) 425 { 426 switch (type) { 427 case I915_CACHE_NONE: return " uncached"; 428 case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped"; 429 case I915_CACHE_L3_LLC: return " L3+LLC"; 430 case I915_CACHE_WT: return " WT"; 431 default: return ""; 432 } 433 } 434 435 static inline uint32_t 436 read_subslice_reg(struct drm_i915_private *dev_priv, int slice, 437 int subslice, i915_reg_t reg) 438 { 439 uint32_t mcr; 440 uint32_t ret; 441 enum forcewake_domains fw_domains; 442 443 fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg, 444 FW_REG_READ); 445 fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, 446 GEN8_MCR_SELECTOR, 447 FW_REG_READ | FW_REG_WRITE); 448 449 spin_lock_irq(&dev_priv->uncore.lock); 450 intel_uncore_forcewake_get__locked(dev_priv, fw_domains); 451 452 mcr = I915_READ_FW(GEN8_MCR_SELECTOR); 453 /* 454 * The HW expects the slice and sublice selectors to be reset to 0 455 * after reading out the registers. 456 */ 457 WARN_ON_ONCE(mcr & (GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK)); 458 mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK); 459 mcr |= GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); 460 I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr); 461 462 ret = I915_READ_FW(reg); 463 464 mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK); 465 I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr); 466 467 intel_uncore_forcewake_put__locked(dev_priv, fw_domains); 468 spin_unlock_irq(&dev_priv->uncore.lock); 469 470 return ret; 471 } 472 473 /* NB: please notice the memset */ 474 void intel_engine_get_instdone(struct intel_engine_cs *engine, 475 struct intel_instdone *instdone) 476 { 477 struct drm_i915_private *dev_priv = engine->i915; 478 u32 mmio_base = engine->mmio_base; 479 int slice; 480 int subslice; 481 482 memset(instdone, 0, sizeof(*instdone)); 483 484 switch (INTEL_GEN(dev_priv)) { 485 default: 486 instdone->instdone = I915_READ(RING_INSTDONE(mmio_base)); 487 488 if (engine->id != RCS) 489 break; 490 491 instdone->slice_common = I915_READ(GEN7_SC_INSTDONE); 492 for_each_instdone_slice_subslice(dev_priv, slice, subslice) { 493 instdone->sampler[slice][subslice] = 494 read_subslice_reg(dev_priv, slice, subslice, 495 GEN7_SAMPLER_INSTDONE); 496 instdone->row[slice][subslice] = 497 read_subslice_reg(dev_priv, slice, subslice, 498 GEN7_ROW_INSTDONE); 499 } 500 break; 501 case 7: 502 instdone->instdone = I915_READ(RING_INSTDONE(mmio_base)); 503 504 if (engine->id != RCS) 505 break; 506 507 instdone->slice_common = I915_READ(GEN7_SC_INSTDONE); 508 instdone->sampler[0][0] = I915_READ(GEN7_SAMPLER_INSTDONE); 509 instdone->row[0][0] = I915_READ(GEN7_ROW_INSTDONE); 510 511 break; 512 case 6: 513 case 5: 514 case 4: 515 instdone->instdone = I915_READ(RING_INSTDONE(mmio_base)); 516 517 if (engine->id == RCS) 518 /* HACK: Using the wrong struct member */ 519 instdone->slice_common = I915_READ(GEN4_INSTDONE1); 520 break; 521 case 3: 522 case 2: 523 instdone->instdone = I915_READ(GEN2_INSTDONE); 524 break; 525 } 526 } 527 528 static int wa_add(struct drm_i915_private *dev_priv, 529 i915_reg_t addr, 530 const u32 mask, const u32 val) 531 { 532 const u32 idx = dev_priv->workarounds.count; 533 534 if (WARN_ON(idx >= I915_MAX_WA_REGS)) 535 return -ENOSPC; 536 537 dev_priv->workarounds.reg[idx].addr = addr; 538 dev_priv->workarounds.reg[idx].value = val; 539 dev_priv->workarounds.reg[idx].mask = mask; 540 541 dev_priv->workarounds.count++; 542 543 return 0; 544 } 545 546 #define WA_REG(addr, mask, val) do { \ 547 const int r = wa_add(dev_priv, (addr), (mask), (val)); \ 548 if (r) \ 549 return r; \ 550 } while (0) 551 552 #define WA_SET_BIT_MASKED(addr, mask) \ 553 WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) 554 555 #define WA_CLR_BIT_MASKED(addr, mask) \ 556 WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) 557 558 #define WA_SET_FIELD_MASKED(addr, mask, value) \ 559 WA_REG(addr, mask, _MASKED_FIELD(mask, value)) 560 561 #define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask)) 562 #define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask)) 563 564 #define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val) 565 566 static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, 567 i915_reg_t reg) 568 { 569 struct drm_i915_private *dev_priv = engine->i915; 570 struct i915_workarounds *wa = &dev_priv->workarounds; 571 const uint32_t index = wa->hw_whitelist_count[engine->id]; 572 573 if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) 574 return -EINVAL; 575 576 WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), 577 i915_mmio_reg_offset(reg)); 578 wa->hw_whitelist_count[engine->id]++; 579 580 return 0; 581 } 582 583 static int gen8_init_workarounds(struct intel_engine_cs *engine) 584 { 585 struct drm_i915_private *dev_priv = engine->i915; 586 587 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); 588 589 /* WaDisableAsyncFlipPerfMode:bdw,chv */ 590 WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); 591 592 /* WaDisablePartialInstShootdown:bdw,chv */ 593 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 594 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); 595 596 /* Use Force Non-Coherent whenever executing a 3D context. This is a 597 * workaround for for a possible hang in the unlikely event a TLB 598 * invalidation occurs during a PSD flush. 599 */ 600 /* WaForceEnableNonCoherent:bdw,chv */ 601 /* WaHdcDisableFetchWhenMasked:bdw,chv */ 602 WA_SET_BIT_MASKED(HDC_CHICKEN0, 603 HDC_DONOT_FETCH_MEM_WHEN_MASKED | 604 HDC_FORCE_NON_COHERENT); 605 606 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: 607 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping 608 * polygons in the same 8x4 pixel/sample area to be processed without 609 * stalling waiting for the earlier ones to write to Hierarchical Z 610 * buffer." 611 * 612 * This optimization is off by default for BDW and CHV; turn it on. 613 */ 614 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); 615 616 /* Wa4x4STCOptimizationDisable:bdw,chv */ 617 WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); 618 619 /* 620 * BSpec recommends 8x4 when MSAA is used, 621 * however in practice 16x4 seems fastest. 622 * 623 * Note that PS/WM thread counts depend on the WIZ hashing 624 * disable bit, which we don't touch here, but it's good 625 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 626 */ 627 WA_SET_FIELD_MASKED(GEN7_GT_MODE, 628 GEN6_WIZ_HASHING_MASK, 629 GEN6_WIZ_HASHING_16x4); 630 631 return 0; 632 } 633 634 static int bdw_init_workarounds(struct intel_engine_cs *engine) 635 { 636 struct drm_i915_private *dev_priv = engine->i915; 637 int ret; 638 639 ret = gen8_init_workarounds(engine); 640 if (ret) 641 return ret; 642 643 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ 644 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); 645 646 /* WaDisableDopClockGating:bdw 647 * 648 * Also see the related UCGTCL1 write in broadwell_init_clock_gating() 649 * to disable EUTC clock gating. 650 */ 651 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, 652 DOP_CLOCK_GATING_DISABLE); 653 654 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 655 GEN8_SAMPLER_POWER_BYPASS_DIS); 656 657 WA_SET_BIT_MASKED(HDC_CHICKEN0, 658 /* WaForceContextSaveRestoreNonCoherent:bdw */ 659 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | 660 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ 661 (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); 662 663 return 0; 664 } 665 666 static int chv_init_workarounds(struct intel_engine_cs *engine) 667 { 668 struct drm_i915_private *dev_priv = engine->i915; 669 int ret; 670 671 ret = gen8_init_workarounds(engine); 672 if (ret) 673 return ret; 674 675 /* WaDisableThreadStallDopClockGating:chv */ 676 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); 677 678 /* Improve HiZ throughput on CHV. */ 679 WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); 680 681 return 0; 682 } 683 684 static int gen9_init_workarounds(struct intel_engine_cs *engine) 685 { 686 struct drm_i915_private *dev_priv = engine->i915; 687 int ret; 688 689 /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk */ 690 I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); 691 692 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk */ 693 I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | 694 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); 695 696 /* WaDisableKillLogic:bxt,skl,kbl */ 697 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | 698 ECOCHK_DIS_TLB); 699 700 /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk */ 701 /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk */ 702 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 703 FLOW_CONTROL_ENABLE | 704 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); 705 706 /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ 707 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 708 GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); 709 710 /* WaDisableDgMirrorFixInHalfSliceChicken5:bxt */ 711 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) 712 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, 713 GEN9_DG_MIRROR_FIX_ENABLE); 714 715 /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */ 716 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { 717 WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1, 718 GEN9_RHWO_OPTIMIZATION_DISABLE); 719 /* 720 * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set 721 * but we do that in per ctx batchbuffer as there is an issue 722 * with this register not getting restored on ctx restore 723 */ 724 } 725 726 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */ 727 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, 728 GEN9_ENABLE_GPGPU_PREEMPTION); 729 730 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk */ 731 /* WaDisablePartialResolveInVc:skl,bxt,kbl */ 732 WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE | 733 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE)); 734 735 /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk */ 736 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, 737 GEN9_CCS_TLB_PREFETCH_ENABLE); 738 739 /* WaDisableMaskBasedCammingInRCC:bxt */ 740 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) 741 WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0, 742 PIXEL_MASK_CAMMING_DISABLE); 743 744 /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl */ 745 WA_SET_BIT_MASKED(HDC_CHICKEN0, 746 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | 747 HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); 748 749 /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are 750 * both tied to WaForceContextSaveRestoreNonCoherent 751 * in some hsds for skl. We keep the tie for all gen9. The 752 * documentation is a bit hazy and so we want to get common behaviour, 753 * even though there is no clear evidence we would need both on kbl/bxt. 754 * This area has been source of system hangs so we play it safe 755 * and mimic the skl regardless of what bspec says. 756 * 757 * Use Force Non-Coherent whenever executing a 3D context. This 758 * is a workaround for a possible hang in the unlikely event 759 * a TLB invalidation occurs during a PSD flush. 760 */ 761 762 /* WaForceEnableNonCoherent:skl,bxt,kbl */ 763 WA_SET_BIT_MASKED(HDC_CHICKEN0, 764 HDC_FORCE_NON_COHERENT); 765 766 /* WaDisableHDCInvalidation:skl,bxt,kbl */ 767 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | 768 BDW_DISABLE_HDC_INVALIDATION); 769 770 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl */ 771 if (IS_SKYLAKE(dev_priv) || 772 IS_KABYLAKE(dev_priv) || 773 IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) 774 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 775 GEN8_SAMPLER_POWER_BYPASS_DIS); 776 777 /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk */ 778 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); 779 780 /* WaOCLCoherentLineFlush:skl,bxt,kbl */ 781 I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | 782 GEN8_LQSC_FLUSH_COHERENT_LINES)); 783 784 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk */ 785 ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); 786 if (ret) 787 return ret; 788 789 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl */ 790 ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); 791 if (ret) 792 return ret; 793 794 /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk */ 795 ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); 796 if (ret) 797 return ret; 798 799 return 0; 800 } 801 802 static int skl_tune_iz_hashing(struct intel_engine_cs *engine) 803 { 804 struct drm_i915_private *dev_priv = engine->i915; 805 u8 vals[3] = { 0, 0, 0 }; 806 unsigned int i; 807 808 for (i = 0; i < 3; i++) { 809 u8 ss; 810 811 /* 812 * Only consider slices where one, and only one, subslice has 7 813 * EUs 814 */ 815 if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i])) 816 continue; 817 818 /* 819 * subslice_7eu[i] != 0 (because of the check above) and 820 * ss_max == 4 (maximum number of subslices possible per slice) 821 * 822 * -> 0 <= ss <= 3; 823 */ 824 ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1; 825 vals[i] = 3 - ss; 826 } 827 828 if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) 829 return 0; 830 831 /* Tune IZ hashing. See intel_device_info_runtime_init() */ 832 WA_SET_FIELD_MASKED(GEN7_GT_MODE, 833 GEN9_IZ_HASHING_MASK(2) | 834 GEN9_IZ_HASHING_MASK(1) | 835 GEN9_IZ_HASHING_MASK(0), 836 GEN9_IZ_HASHING(2, vals[2]) | 837 GEN9_IZ_HASHING(1, vals[1]) | 838 GEN9_IZ_HASHING(0, vals[0])); 839 840 return 0; 841 } 842 843 static int skl_init_workarounds(struct intel_engine_cs *engine) 844 { 845 struct drm_i915_private *dev_priv = engine->i915; 846 int ret; 847 848 ret = gen9_init_workarounds(engine); 849 if (ret) 850 return ret; 851 852 /* 853 * Actual WA is to disable percontext preemption granularity control 854 * until D0 which is the default case so this is equivalent to 855 * !WaDisablePerCtxtPreemptionGranularityControl:skl 856 */ 857 I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, 858 _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); 859 860 /* WaEnableGapsTsvCreditFix:skl */ 861 I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | 862 GEN9_GAPS_TSV_CREDIT_DISABLE)); 863 864 /* WaDisableGafsUnitClkGating:skl */ 865 WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 866 867 /* WaInPlaceDecompressionHang:skl */ 868 if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER)) 869 WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, 870 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 871 872 /* WaDisableLSQCROPERFforOCL:skl */ 873 ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); 874 if (ret) 875 return ret; 876 877 return skl_tune_iz_hashing(engine); 878 } 879 880 static int bxt_init_workarounds(struct intel_engine_cs *engine) 881 { 882 struct drm_i915_private *dev_priv = engine->i915; 883 int ret; 884 885 ret = gen9_init_workarounds(engine); 886 if (ret) 887 return ret; 888 889 /* WaStoreMultiplePTEenable:bxt */ 890 /* This is a requirement according to Hardware specification */ 891 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) 892 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF); 893 894 /* WaSetClckGatingDisableMedia:bxt */ 895 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { 896 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & 897 ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE)); 898 } 899 900 /* WaDisableThreadStallDopClockGating:bxt */ 901 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 902 STALL_DOP_GATING_DISABLE); 903 904 /* WaDisablePooledEuLoadBalancingFix:bxt */ 905 if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) { 906 WA_SET_BIT_MASKED(FF_SLICE_CS_CHICKEN2, 907 GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); 908 } 909 910 /* WaDisableSbeCacheDispatchPortSharing:bxt */ 911 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) { 912 WA_SET_BIT_MASKED( 913 GEN7_HALF_SLICE_CHICKEN1, 914 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 915 } 916 917 /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */ 918 /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */ 919 /* WaDisableObjectLevelPreemtionForInstanceId:bxt */ 920 /* WaDisableLSQCROPERFforOCL:bxt */ 921 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { 922 ret = wa_ring_whitelist_reg(engine, GEN9_CS_DEBUG_MODE1); 923 if (ret) 924 return ret; 925 926 ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); 927 if (ret) 928 return ret; 929 } 930 931 /* WaProgramL3SqcReg1DefaultForPerf:bxt */ 932 if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) 933 I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) | 934 L3_HIGH_PRIO_CREDITS(2)); 935 936 /* WaToEnableHwFixForPushConstHWBug:bxt */ 937 if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) 938 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 939 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 940 941 /* WaInPlaceDecompressionHang:bxt */ 942 if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) 943 WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, 944 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 945 946 return 0; 947 } 948 949 static int kbl_init_workarounds(struct intel_engine_cs *engine) 950 { 951 struct drm_i915_private *dev_priv = engine->i915; 952 int ret; 953 954 ret = gen9_init_workarounds(engine); 955 if (ret) 956 return ret; 957 958 /* WaEnableGapsTsvCreditFix:kbl */ 959 I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | 960 GEN9_GAPS_TSV_CREDIT_DISABLE)); 961 962 /* WaDisableDynamicCreditSharing:kbl */ 963 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) 964 WA_SET_BIT(GAMT_CHKN_BIT_REG, 965 GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); 966 967 /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */ 968 if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0)) 969 WA_SET_BIT_MASKED(HDC_CHICKEN0, 970 HDC_FENCE_DEST_SLM_DISABLE); 971 972 /* WaToEnableHwFixForPushConstHWBug:kbl */ 973 if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) 974 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 975 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 976 977 /* WaDisableGafsUnitClkGating:kbl */ 978 WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 979 980 /* WaDisableSbeCacheDispatchPortSharing:kbl */ 981 WA_SET_BIT_MASKED( 982 GEN7_HALF_SLICE_CHICKEN1, 983 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 984 985 /* WaInPlaceDecompressionHang:kbl */ 986 WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, 987 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 988 989 /* WaDisableLSQCROPERFforOCL:kbl */ 990 ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); 991 if (ret) 992 return ret; 993 994 return 0; 995 } 996 997 static int glk_init_workarounds(struct intel_engine_cs *engine) 998 { 999 struct drm_i915_private *dev_priv = engine->i915; 1000 int ret; 1001 1002 ret = gen9_init_workarounds(engine); 1003 if (ret) 1004 return ret; 1005 1006 /* WaToEnableHwFixForPushConstHWBug:glk */ 1007 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 1008 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 1009 1010 return 0; 1011 } 1012 1013 int init_workarounds_ring(struct intel_engine_cs *engine) 1014 { 1015 struct drm_i915_private *dev_priv = engine->i915; 1016 int err; 1017 1018 WARN_ON(engine->id != RCS); 1019 1020 dev_priv->workarounds.count = 0; 1021 dev_priv->workarounds.hw_whitelist_count[engine->id] = 0; 1022 1023 if (IS_BROADWELL(dev_priv)) 1024 err = bdw_init_workarounds(engine); 1025 else if (IS_CHERRYVIEW(dev_priv)) 1026 err = chv_init_workarounds(engine); 1027 else if (IS_SKYLAKE(dev_priv)) 1028 err = skl_init_workarounds(engine); 1029 else if (IS_BROXTON(dev_priv)) 1030 err = bxt_init_workarounds(engine); 1031 else if (IS_KABYLAKE(dev_priv)) 1032 err = kbl_init_workarounds(engine); 1033 else if (IS_GEMINILAKE(dev_priv)) 1034 err = glk_init_workarounds(engine); 1035 else 1036 err = 0; 1037 if (err) 1038 return err; 1039 1040 DRM_DEBUG_DRIVER("%s: Number of context specific w/a: %d\n", 1041 engine->name, dev_priv->workarounds.count); 1042 return 0; 1043 } 1044 1045 int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) 1046 { 1047 struct i915_workarounds *w = &req->i915->workarounds; 1048 u32 *cs; 1049 int ret, i; 1050 1051 if (w->count == 0) 1052 return 0; 1053 1054 ret = req->engine->emit_flush(req, EMIT_BARRIER); 1055 if (ret) 1056 return ret; 1057 1058 cs = intel_ring_begin(req, (w->count * 2 + 2)); 1059 if (IS_ERR(cs)) 1060 return PTR_ERR(cs); 1061 1062 *cs++ = MI_LOAD_REGISTER_IMM(w->count); 1063 for (i = 0; i < w->count; i++) { 1064 *cs++ = i915_mmio_reg_offset(w->reg[i].addr); 1065 *cs++ = w->reg[i].value; 1066 } 1067 *cs++ = MI_NOOP; 1068 1069 intel_ring_advance(req, cs); 1070 1071 ret = req->engine->emit_flush(req, EMIT_BARRIER); 1072 if (ret) 1073 return ret; 1074 1075 return 0; 1076 } 1077 1078 static bool ring_is_idle(struct intel_engine_cs *engine) 1079 { 1080 struct drm_i915_private *dev_priv = engine->i915; 1081 bool idle = true; 1082 1083 intel_runtime_pm_get(dev_priv); 1084 1085 /* No bit for gen2, so assume the CS parser is idle */ 1086 if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE)) 1087 idle = false; 1088 1089 intel_runtime_pm_put(dev_priv); 1090 1091 return idle; 1092 } 1093 1094 /** 1095 * intel_engine_is_idle() - Report if the engine has finished process all work 1096 * @engine: the intel_engine_cs 1097 * 1098 * Return true if there are no requests pending, nothing left to be submitted 1099 * to hardware, and that the engine is idle. 1100 */ 1101 bool intel_engine_is_idle(struct intel_engine_cs *engine) 1102 { 1103 /* Any inflight/incomplete requests? */ 1104 if (!i915_seqno_passed(intel_engine_get_seqno(engine), 1105 intel_engine_last_submit(engine))) 1106 return false; 1107 1108 /* Interrupt/tasklet pending? */ 1109 if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) 1110 return false; 1111 1112 /* Both ports drained, no more ELSP submission? */ 1113 if (engine->execlist_port[0].request) 1114 return false; 1115 1116 /* Ring stopped? */ 1117 if (!ring_is_idle(engine)) 1118 return false; 1119 1120 return true; 1121 } 1122 1123 bool intel_engines_are_idle(struct drm_i915_private *dev_priv) 1124 { 1125 struct intel_engine_cs *engine; 1126 enum intel_engine_id id; 1127 1128 if (READ_ONCE(dev_priv->gt.active_requests)) 1129 return false; 1130 1131 /* If the driver is wedged, HW state may be very inconsistent and 1132 * report that it is still busy, even though we have stopped using it. 1133 */ 1134 if (i915_terminally_wedged(&dev_priv->gpu_error)) 1135 return true; 1136 1137 for_each_engine(engine, dev_priv, id) { 1138 if (!intel_engine_is_idle(engine)) 1139 return false; 1140 } 1141 1142 return true; 1143 } 1144 1145 void intel_engines_reset_default_submission(struct drm_i915_private *i915) 1146 { 1147 struct intel_engine_cs *engine; 1148 enum intel_engine_id id; 1149 1150 for_each_engine(engine, i915, id) 1151 engine->set_default_submission(engine); 1152 } 1153 1154 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1155 #include "selftests/mock_engine.c" 1156 #endif 1157