1 /* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <drm/drm_print.h> 26 27 #include "i915_drv.h" 28 #include "i915_vgpu.h" 29 #include "intel_ringbuffer.h" 30 #include "intel_lrc.h" 31 32 /* Haswell does have the CXT_SIZE register however it does not appear to be 33 * valid. Now, docs explain in dwords what is in the context object. The full 34 * size is 70720 bytes, however, the power context and execlist context will 35 * never be saved (power context is stored elsewhere, and execlists don't work 36 * on HSW) - so the final size, including the extra state required for the 37 * Resource Streamer, is 66944 bytes, which rounds to 17 pages. 38 */ 39 #define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) 40 /* Same as Haswell, but 72064 bytes now. */ 41 #define GEN8_CXT_TOTAL_SIZE (18 * PAGE_SIZE) 42 43 #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) 44 #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) 45 #define GEN10_LR_CONTEXT_RENDER_SIZE (18 * PAGE_SIZE) 46 47 #define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) 48 49 struct engine_class_info { 50 const char *name; 51 int (*init_legacy)(struct intel_engine_cs *engine); 52 int (*init_execlists)(struct intel_engine_cs *engine); 53 }; 54 55 static const struct engine_class_info intel_engine_classes[] = { 56 [RENDER_CLASS] = { 57 .name = "rcs", 58 .init_execlists = logical_render_ring_init, 59 .init_legacy = intel_init_render_ring_buffer, 60 }, 61 [COPY_ENGINE_CLASS] = { 62 .name = "bcs", 63 .init_execlists = logical_xcs_ring_init, 64 .init_legacy = intel_init_blt_ring_buffer, 65 }, 66 [VIDEO_DECODE_CLASS] = { 67 .name = "vcs", 68 .init_execlists = logical_xcs_ring_init, 69 .init_legacy = intel_init_bsd_ring_buffer, 70 }, 71 [VIDEO_ENHANCEMENT_CLASS] = { 72 .name = "vecs", 73 .init_execlists = logical_xcs_ring_init, 74 .init_legacy = intel_init_vebox_ring_buffer, 75 }, 76 }; 77 78 struct engine_info { 79 unsigned int hw_id; 80 unsigned int uabi_id; 81 u8 class; 82 u8 instance; 83 u32 mmio_base; 84 unsigned irq_shift; 85 }; 86 87 static const struct engine_info intel_engines[] = { 88 [RCS] = { 89 .hw_id = RCS_HW, 90 .uabi_id = I915_EXEC_RENDER, 91 .class = RENDER_CLASS, 92 .instance = 0, 93 .mmio_base = RENDER_RING_BASE, 94 .irq_shift = GEN8_RCS_IRQ_SHIFT, 95 }, 96 [BCS] = { 97 .hw_id = BCS_HW, 98 .uabi_id = I915_EXEC_BLT, 99 .class = COPY_ENGINE_CLASS, 100 .instance = 0, 101 .mmio_base = BLT_RING_BASE, 102 .irq_shift = GEN8_BCS_IRQ_SHIFT, 103 }, 104 [VCS] = { 105 .hw_id = VCS_HW, 106 .uabi_id = I915_EXEC_BSD, 107 .class = VIDEO_DECODE_CLASS, 108 .instance = 0, 109 .mmio_base = GEN6_BSD_RING_BASE, 110 .irq_shift = GEN8_VCS1_IRQ_SHIFT, 111 }, 112 [VCS2] = { 113 .hw_id = VCS2_HW, 114 .uabi_id = I915_EXEC_BSD, 115 .class = VIDEO_DECODE_CLASS, 116 .instance = 1, 117 .mmio_base = GEN8_BSD2_RING_BASE, 118 .irq_shift = GEN8_VCS2_IRQ_SHIFT, 119 }, 120 [VECS] = { 121 .hw_id = VECS_HW, 122 .uabi_id = I915_EXEC_VEBOX, 123 .class = VIDEO_ENHANCEMENT_CLASS, 124 .instance = 0, 125 .mmio_base = VEBOX_RING_BASE, 126 .irq_shift = GEN8_VECS_IRQ_SHIFT, 127 }, 128 }; 129 130 /** 131 * ___intel_engine_context_size() - return the size of the context for an engine 132 * @dev_priv: i915 device private 133 * @class: engine class 134 * 135 * Each engine class may require a different amount of space for a context 136 * image. 137 * 138 * Return: size (in bytes) of an engine class specific context image 139 * 140 * Note: this size includes the HWSP, which is part of the context image 141 * in LRC mode, but does not include the "shared data page" used with 142 * GuC submission. The caller should account for this if using the GuC. 143 */ 144 static u32 145 __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) 146 { 147 u32 cxt_size; 148 149 BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE); 150 151 switch (class) { 152 case RENDER_CLASS: 153 switch (INTEL_GEN(dev_priv)) { 154 default: 155 MISSING_CASE(INTEL_GEN(dev_priv)); 156 case 10: 157 return GEN10_LR_CONTEXT_RENDER_SIZE; 158 case 9: 159 return GEN9_LR_CONTEXT_RENDER_SIZE; 160 case 8: 161 return i915_modparams.enable_execlists ? 162 GEN8_LR_CONTEXT_RENDER_SIZE : 163 GEN8_CXT_TOTAL_SIZE; 164 case 7: 165 if (IS_HASWELL(dev_priv)) 166 return HSW_CXT_TOTAL_SIZE; 167 168 cxt_size = I915_READ(GEN7_CXT_SIZE); 169 return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64, 170 PAGE_SIZE); 171 case 6: 172 cxt_size = I915_READ(CXT_SIZE); 173 return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64, 174 PAGE_SIZE); 175 case 5: 176 case 4: 177 case 3: 178 case 2: 179 /* For the special day when i810 gets merged. */ 180 case 1: 181 return 0; 182 } 183 break; 184 default: 185 MISSING_CASE(class); 186 case VIDEO_DECODE_CLASS: 187 case VIDEO_ENHANCEMENT_CLASS: 188 case COPY_ENGINE_CLASS: 189 if (INTEL_GEN(dev_priv) < 8) 190 return 0; 191 return GEN8_LR_CONTEXT_OTHER_SIZE; 192 } 193 } 194 195 static int 196 intel_engine_setup(struct drm_i915_private *dev_priv, 197 enum intel_engine_id id) 198 { 199 const struct engine_info *info = &intel_engines[id]; 200 const struct engine_class_info *class_info; 201 struct intel_engine_cs *engine; 202 203 GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes)); 204 class_info = &intel_engine_classes[info->class]; 205 206 GEM_BUG_ON(dev_priv->engine[id]); 207 engine = kzalloc(sizeof(*engine), GFP_KERNEL); 208 if (!engine) 209 return -ENOMEM; 210 211 engine->id = id; 212 engine->i915 = dev_priv; 213 WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s%u", 214 class_info->name, info->instance) >= 215 sizeof(engine->name)); 216 engine->uabi_id = info->uabi_id; 217 engine->hw_id = engine->guc_id = info->hw_id; 218 engine->mmio_base = info->mmio_base; 219 engine->irq_shift = info->irq_shift; 220 engine->class = info->class; 221 engine->instance = info->instance; 222 223 engine->context_size = __intel_engine_context_size(dev_priv, 224 engine->class); 225 if (WARN_ON(engine->context_size > BIT(20))) 226 engine->context_size = 0; 227 228 /* Nothing to do here, execute in order of dependencies */ 229 engine->schedule = NULL; 230 231 ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); 232 233 dev_priv->engine[id] = engine; 234 return 0; 235 } 236 237 /** 238 * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers 239 * @dev_priv: i915 device private 240 * 241 * Return: non-zero if the initialization failed. 242 */ 243 int intel_engines_init_mmio(struct drm_i915_private *dev_priv) 244 { 245 struct intel_device_info *device_info = mkwrite_device_info(dev_priv); 246 const unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask; 247 struct intel_engine_cs *engine; 248 enum intel_engine_id id; 249 unsigned int mask = 0; 250 unsigned int i; 251 int err; 252 253 WARN_ON(ring_mask == 0); 254 WARN_ON(ring_mask & 255 GENMASK(sizeof(mask) * BITS_PER_BYTE - 1, I915_NUM_ENGINES)); 256 257 for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { 258 if (!HAS_ENGINE(dev_priv, i)) 259 continue; 260 261 err = intel_engine_setup(dev_priv, i); 262 if (err) 263 goto cleanup; 264 265 mask |= ENGINE_MASK(i); 266 } 267 268 /* 269 * Catch failures to update intel_engines table when the new engines 270 * are added to the driver by a warning and disabling the forgotten 271 * engines. 272 */ 273 if (WARN_ON(mask != ring_mask)) 274 device_info->ring_mask = mask; 275 276 /* We always presume we have at least RCS available for later probing */ 277 if (WARN_ON(!HAS_ENGINE(dev_priv, RCS))) { 278 err = -ENODEV; 279 goto cleanup; 280 } 281 282 device_info->num_rings = hweight32(mask); 283 284 return 0; 285 286 cleanup: 287 for_each_engine(engine, dev_priv, id) 288 kfree(engine); 289 return err; 290 } 291 292 /** 293 * intel_engines_init() - init the Engine Command Streamers 294 * @dev_priv: i915 device private 295 * 296 * Return: non-zero if the initialization failed. 297 */ 298 int intel_engines_init(struct drm_i915_private *dev_priv) 299 { 300 struct intel_engine_cs *engine; 301 enum intel_engine_id id, err_id; 302 int err; 303 304 for_each_engine(engine, dev_priv, id) { 305 const struct engine_class_info *class_info = 306 &intel_engine_classes[engine->class]; 307 int (*init)(struct intel_engine_cs *engine); 308 309 if (i915_modparams.enable_execlists) 310 init = class_info->init_execlists; 311 else 312 init = class_info->init_legacy; 313 314 err = -EINVAL; 315 err_id = id; 316 317 if (GEM_WARN_ON(!init)) 318 goto cleanup; 319 320 err = init(engine); 321 if (err) 322 goto cleanup; 323 324 GEM_BUG_ON(!engine->submit_request); 325 } 326 327 return 0; 328 329 cleanup: 330 for_each_engine(engine, dev_priv, id) { 331 if (id >= err_id) { 332 kfree(engine); 333 dev_priv->engine[id] = NULL; 334 } else { 335 dev_priv->gt.cleanup_engine(engine); 336 } 337 } 338 return err; 339 } 340 341 void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) 342 { 343 struct drm_i915_private *dev_priv = engine->i915; 344 345 /* Our semaphore implementation is strictly monotonic (i.e. we proceed 346 * so long as the semaphore value in the register/page is greater 347 * than the sync value), so whenever we reset the seqno, 348 * so long as we reset the tracking semaphore value to 0, it will 349 * always be before the next request's seqno. If we don't reset 350 * the semaphore value, then when the seqno moves backwards all 351 * future waits will complete instantly (causing rendering corruption). 352 */ 353 if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) { 354 I915_WRITE(RING_SYNC_0(engine->mmio_base), 0); 355 I915_WRITE(RING_SYNC_1(engine->mmio_base), 0); 356 if (HAS_VEBOX(dev_priv)) 357 I915_WRITE(RING_SYNC_2(engine->mmio_base), 0); 358 } 359 if (dev_priv->semaphore) { 360 struct page *page = i915_vma_first_page(dev_priv->semaphore); 361 void *semaphores; 362 363 /* Semaphores are in noncoherent memory, flush to be safe */ 364 semaphores = kmap_atomic(page); 365 memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), 366 0, I915_NUM_ENGINES * gen8_semaphore_seqno_size); 367 drm_clflush_virt_range(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), 368 I915_NUM_ENGINES * gen8_semaphore_seqno_size); 369 kunmap_atomic(semaphores); 370 } 371 372 intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); 373 clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); 374 375 /* After manually advancing the seqno, fake the interrupt in case 376 * there are any waiters for that seqno. 377 */ 378 intel_engine_wakeup(engine); 379 380 GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno); 381 } 382 383 static void intel_engine_init_timeline(struct intel_engine_cs *engine) 384 { 385 engine->timeline = &engine->i915->gt.global_timeline.engine[engine->id]; 386 } 387 388 static bool csb_force_mmio(struct drm_i915_private *i915) 389 { 390 /* 391 * IOMMU adds unpredictable latency causing the CSB write (from the 392 * GPU into the HWSP) to only be visible some time after the interrupt 393 * (missed breadcrumb syndrome). 394 */ 395 if (intel_vtd_active()) 396 return true; 397 398 /* Older GVT emulation depends upon intercepting CSB mmio */ 399 if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915)) 400 return true; 401 402 return false; 403 } 404 405 static void intel_engine_init_execlist(struct intel_engine_cs *engine) 406 { 407 struct intel_engine_execlists * const execlists = &engine->execlists; 408 409 execlists->csb_use_mmio = csb_force_mmio(engine->i915); 410 411 execlists->port_mask = 1; 412 GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists))); /* From Linux 5.0 */ 413 GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); 414 415 execlists->queue = LINUX_RB_ROOT; 416 execlists->first = NULL; 417 } 418 419 /** 420 * intel_engines_setup_common - setup engine state not requiring hw access 421 * @engine: Engine to setup. 422 * 423 * Initializes @engine@ structure members shared between legacy and execlists 424 * submission modes which do not require hardware access. 425 * 426 * Typically done early in the submission mode specific engine setup stage. 427 */ 428 void intel_engine_setup_common(struct intel_engine_cs *engine) 429 { 430 intel_engine_init_execlist(engine); 431 432 intel_engine_init_timeline(engine); 433 intel_engine_init_hangcheck(engine); 434 i915_gem_batch_pool_init(engine, &engine->batch_pool); 435 436 intel_engine_init_cmd_parser(engine); 437 } 438 439 int intel_engine_create_scratch(struct intel_engine_cs *engine, int size) 440 { 441 struct drm_i915_gem_object *obj; 442 struct i915_vma *vma; 443 int ret; 444 445 WARN_ON(engine->scratch); 446 447 obj = i915_gem_object_create_stolen(engine->i915, size); 448 if (!obj) 449 obj = i915_gem_object_create_internal(engine->i915, size); 450 if (IS_ERR(obj)) { 451 DRM_ERROR("Failed to allocate scratch page\n"); 452 return PTR_ERR(obj); 453 } 454 455 vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); 456 if (IS_ERR(vma)) { 457 ret = PTR_ERR(vma); 458 goto err_unref; 459 } 460 461 ret = i915_vma_pin(vma, 0, 4096, PIN_GLOBAL | PIN_HIGH); 462 if (ret) 463 goto err_unref; 464 465 engine->scratch = vma; 466 DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n", 467 engine->name, i915_ggtt_offset(vma)); 468 return 0; 469 470 err_unref: 471 i915_gem_object_put(obj); 472 return ret; 473 } 474 475 static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine) 476 { 477 i915_vma_unpin_and_release(&engine->scratch); 478 } 479 480 static void cleanup_phys_status_page(struct intel_engine_cs *engine) 481 { 482 struct drm_i915_private *dev_priv = engine->i915; 483 484 if (!dev_priv->status_page_dmah) 485 return; 486 487 drm_pci_free(&dev_priv->drm, dev_priv->status_page_dmah); 488 engine->status_page.page_addr = NULL; 489 } 490 491 static void cleanup_status_page(struct intel_engine_cs *engine) 492 { 493 struct i915_vma *vma; 494 struct drm_i915_gem_object *obj; 495 496 vma = fetch_and_zero(&engine->status_page.vma); 497 if (!vma) 498 return; 499 500 obj = vma->obj; 501 502 i915_vma_unpin(vma); 503 i915_vma_close(vma); 504 505 i915_gem_object_unpin_map(obj); 506 __i915_gem_object_release_unless_active(obj); 507 } 508 509 static int init_status_page(struct intel_engine_cs *engine) 510 { 511 struct drm_i915_gem_object *obj; 512 struct i915_vma *vma; 513 unsigned int flags; 514 void *vaddr; 515 int ret; 516 517 obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); 518 if (IS_ERR(obj)) { 519 DRM_ERROR("Failed to allocate status page\n"); 520 return PTR_ERR(obj); 521 } 522 523 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 524 if (ret) 525 goto err; 526 527 vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); 528 if (IS_ERR(vma)) { 529 ret = PTR_ERR(vma); 530 goto err; 531 } 532 533 flags = PIN_GLOBAL; 534 if (!HAS_LLC(engine->i915)) 535 /* On g33, we cannot place HWS above 256MiB, so 536 * restrict its pinning to the low mappable arena. 537 * Though this restriction is not documented for 538 * gen4, gen5, or byt, they also behave similarly 539 * and hang if the HWS is placed at the top of the 540 * GTT. To generalise, it appears that all !llc 541 * platforms have issues with us placing the HWS 542 * above the mappable region (even though we never 543 * actually map it). 544 */ 545 flags |= PIN_MAPPABLE; 546 else 547 flags |= PIN_HIGH; 548 ret = i915_vma_pin(vma, 0, 4096, flags); 549 if (ret) 550 goto err; 551 552 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); 553 if (IS_ERR(vaddr)) { 554 ret = PTR_ERR(vaddr); 555 goto err_unpin; 556 } 557 558 engine->status_page.vma = vma; 559 engine->status_page.ggtt_offset = i915_ggtt_offset(vma); 560 engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE); 561 562 DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", 563 engine->name, i915_ggtt_offset(vma)); 564 return 0; 565 566 err_unpin: 567 i915_vma_unpin(vma); 568 err: 569 i915_gem_object_put(obj); 570 return ret; 571 } 572 573 static int init_phys_status_page(struct intel_engine_cs *engine) 574 { 575 struct drm_i915_private *dev_priv = engine->i915; 576 577 GEM_BUG_ON(engine->id != RCS); 578 579 dev_priv->status_page_dmah = 580 drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE); 581 if (!dev_priv->status_page_dmah) 582 return -ENOMEM; 583 584 engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr; 585 memset(engine->status_page.page_addr, 0, PAGE_SIZE); 586 587 return 0; 588 } 589 590 /** 591 * intel_engines_init_common - initialize cengine state which might require hw access 592 * @engine: Engine to initialize. 593 * 594 * Initializes @engine@ structure members shared between legacy and execlists 595 * submission modes which do require hardware access. 596 * 597 * Typcally done at later stages of submission mode specific engine setup. 598 * 599 * Returns zero on success or an error code on failure. 600 */ 601 int intel_engine_init_common(struct intel_engine_cs *engine) 602 { 603 struct intel_ring *ring; 604 int ret; 605 606 engine->set_default_submission(engine); 607 608 /* We may need to do things with the shrinker which 609 * require us to immediately switch back to the default 610 * context. This can cause a problem as pinning the 611 * default context also requires GTT space which may not 612 * be available. To avoid this we always pin the default 613 * context. 614 */ 615 ring = engine->context_pin(engine, engine->i915->kernel_context); 616 if (IS_ERR(ring)) 617 return PTR_ERR(ring); 618 619 /* 620 * Similarly the preempt context must always be available so that 621 * we can interrupt the engine at any time. 622 */ 623 if (INTEL_INFO(engine->i915)->has_logical_ring_preemption) { 624 ring = engine->context_pin(engine, 625 engine->i915->preempt_context); 626 if (IS_ERR(ring)) { 627 ret = PTR_ERR(ring); 628 goto err_unpin_kernel; 629 } 630 } 631 632 ret = intel_engine_init_breadcrumbs(engine); 633 if (ret) 634 goto err_unpin_preempt; 635 636 ret = i915_gem_render_state_init(engine); 637 if (ret) 638 goto err_breadcrumbs; 639 640 if (HWS_NEEDS_PHYSICAL(engine->i915)) 641 ret = init_phys_status_page(engine); 642 else 643 ret = init_status_page(engine); 644 if (ret) 645 goto err_rs_fini; 646 647 return 0; 648 649 err_rs_fini: 650 i915_gem_render_state_fini(engine); 651 err_breadcrumbs: 652 intel_engine_fini_breadcrumbs(engine); 653 err_unpin_preempt: 654 if (INTEL_INFO(engine->i915)->has_logical_ring_preemption) 655 engine->context_unpin(engine, engine->i915->preempt_context); 656 err_unpin_kernel: 657 engine->context_unpin(engine, engine->i915->kernel_context); 658 return ret; 659 } 660 661 /** 662 * intel_engines_cleanup_common - cleans up the engine state created by 663 * the common initiailizers. 664 * @engine: Engine to cleanup. 665 * 666 * This cleans up everything created by the common helpers. 667 */ 668 void intel_engine_cleanup_common(struct intel_engine_cs *engine) 669 { 670 intel_engine_cleanup_scratch(engine); 671 672 if (HWS_NEEDS_PHYSICAL(engine->i915)) 673 cleanup_phys_status_page(engine); 674 else 675 cleanup_status_page(engine); 676 677 i915_gem_render_state_fini(engine); 678 intel_engine_fini_breadcrumbs(engine); 679 intel_engine_cleanup_cmd_parser(engine); 680 i915_gem_batch_pool_fini(&engine->batch_pool); 681 682 if (INTEL_INFO(engine->i915)->has_logical_ring_preemption) 683 engine->context_unpin(engine, engine->i915->preempt_context); 684 engine->context_unpin(engine, engine->i915->kernel_context); 685 } 686 687 u64 intel_engine_get_active_head(struct intel_engine_cs *engine) 688 { 689 struct drm_i915_private *dev_priv = engine->i915; 690 u64 acthd; 691 692 if (INTEL_GEN(dev_priv) >= 8) 693 acthd = I915_READ64_2x32(RING_ACTHD(engine->mmio_base), 694 RING_ACTHD_UDW(engine->mmio_base)); 695 else if (INTEL_GEN(dev_priv) >= 4) 696 acthd = I915_READ(RING_ACTHD(engine->mmio_base)); 697 else 698 acthd = I915_READ(ACTHD); 699 700 return acthd; 701 } 702 703 u64 intel_engine_get_last_batch_head(struct intel_engine_cs *engine) 704 { 705 struct drm_i915_private *dev_priv = engine->i915; 706 u64 bbaddr; 707 708 if (INTEL_GEN(dev_priv) >= 8) 709 bbaddr = I915_READ64_2x32(RING_BBADDR(engine->mmio_base), 710 RING_BBADDR_UDW(engine->mmio_base)); 711 else 712 bbaddr = I915_READ(RING_BBADDR(engine->mmio_base)); 713 714 return bbaddr; 715 } 716 717 const char *i915_cache_level_str(struct drm_i915_private *i915, int type) 718 { 719 switch (type) { 720 case I915_CACHE_NONE: return " uncached"; 721 case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped"; 722 case I915_CACHE_L3_LLC: return " L3+LLC"; 723 case I915_CACHE_WT: return " WT"; 724 default: return ""; 725 } 726 } 727 728 static inline uint32_t 729 read_subslice_reg(struct drm_i915_private *dev_priv, int slice, 730 int subslice, i915_reg_t reg) 731 { 732 uint32_t mcr; 733 uint32_t ret; 734 enum forcewake_domains fw_domains; 735 736 fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg, 737 FW_REG_READ); 738 fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, 739 GEN8_MCR_SELECTOR, 740 FW_REG_READ | FW_REG_WRITE); 741 742 spin_lock_irq(&dev_priv->uncore.lock); 743 intel_uncore_forcewake_get__locked(dev_priv, fw_domains); 744 745 mcr = I915_READ_FW(GEN8_MCR_SELECTOR); 746 /* 747 * The HW expects the slice and sublice selectors to be reset to 0 748 * after reading out the registers. 749 */ 750 WARN_ON_ONCE(mcr & (GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK)); 751 mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK); 752 mcr |= GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); 753 I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr); 754 755 ret = I915_READ_FW(reg); 756 757 mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK); 758 I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr); 759 760 intel_uncore_forcewake_put__locked(dev_priv, fw_domains); 761 spin_unlock_irq(&dev_priv->uncore.lock); 762 763 return ret; 764 } 765 766 /* NB: please notice the memset */ 767 void intel_engine_get_instdone(struct intel_engine_cs *engine, 768 struct intel_instdone *instdone) 769 { 770 struct drm_i915_private *dev_priv = engine->i915; 771 u32 mmio_base = engine->mmio_base; 772 int slice; 773 int subslice; 774 775 memset(instdone, 0, sizeof(*instdone)); 776 777 switch (INTEL_GEN(dev_priv)) { 778 default: 779 instdone->instdone = I915_READ(RING_INSTDONE(mmio_base)); 780 781 if (engine->id != RCS) 782 break; 783 784 instdone->slice_common = I915_READ(GEN7_SC_INSTDONE); 785 for_each_instdone_slice_subslice(dev_priv, slice, subslice) { 786 instdone->sampler[slice][subslice] = 787 read_subslice_reg(dev_priv, slice, subslice, 788 GEN7_SAMPLER_INSTDONE); 789 instdone->row[slice][subslice] = 790 read_subslice_reg(dev_priv, slice, subslice, 791 GEN7_ROW_INSTDONE); 792 } 793 break; 794 case 7: 795 instdone->instdone = I915_READ(RING_INSTDONE(mmio_base)); 796 797 if (engine->id != RCS) 798 break; 799 800 instdone->slice_common = I915_READ(GEN7_SC_INSTDONE); 801 instdone->sampler[0][0] = I915_READ(GEN7_SAMPLER_INSTDONE); 802 instdone->row[0][0] = I915_READ(GEN7_ROW_INSTDONE); 803 804 break; 805 case 6: 806 case 5: 807 case 4: 808 instdone->instdone = I915_READ(RING_INSTDONE(mmio_base)); 809 810 if (engine->id == RCS) 811 /* HACK: Using the wrong struct member */ 812 instdone->slice_common = I915_READ(GEN4_INSTDONE1); 813 break; 814 case 3: 815 case 2: 816 instdone->instdone = I915_READ(GEN2_INSTDONE); 817 break; 818 } 819 } 820 821 static int wa_add(struct drm_i915_private *dev_priv, 822 i915_reg_t addr, 823 const u32 mask, const u32 val) 824 { 825 const u32 idx = dev_priv->workarounds.count; 826 827 if (WARN_ON(idx >= I915_MAX_WA_REGS)) 828 return -ENOSPC; 829 830 dev_priv->workarounds.reg[idx].addr = addr; 831 dev_priv->workarounds.reg[idx].value = val; 832 dev_priv->workarounds.reg[idx].mask = mask; 833 834 dev_priv->workarounds.count++; 835 836 return 0; 837 } 838 839 #define WA_REG(addr, mask, val) do { \ 840 const int r = wa_add(dev_priv, (addr), (mask), (val)); \ 841 if (r) \ 842 return r; \ 843 } while (0) 844 845 #define WA_SET_BIT_MASKED(addr, mask) \ 846 WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) 847 848 #define WA_CLR_BIT_MASKED(addr, mask) \ 849 WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) 850 851 #define WA_SET_FIELD_MASKED(addr, mask, value) \ 852 WA_REG(addr, mask, _MASKED_FIELD(mask, value)) 853 854 static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, 855 i915_reg_t reg) 856 { 857 struct drm_i915_private *dev_priv = engine->i915; 858 struct i915_workarounds *wa = &dev_priv->workarounds; 859 const uint32_t index = wa->hw_whitelist_count[engine->id]; 860 861 if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) 862 return -EINVAL; 863 864 I915_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), 865 i915_mmio_reg_offset(reg)); 866 wa->hw_whitelist_count[engine->id]++; 867 868 return 0; 869 } 870 871 static int gen8_init_workarounds(struct intel_engine_cs *engine) 872 { 873 struct drm_i915_private *dev_priv = engine->i915; 874 875 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); 876 877 /* WaDisableAsyncFlipPerfMode:bdw,chv */ 878 WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); 879 880 /* WaDisablePartialInstShootdown:bdw,chv */ 881 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 882 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); 883 884 /* Use Force Non-Coherent whenever executing a 3D context. This is a 885 * workaround for for a possible hang in the unlikely event a TLB 886 * invalidation occurs during a PSD flush. 887 */ 888 /* WaForceEnableNonCoherent:bdw,chv */ 889 /* WaHdcDisableFetchWhenMasked:bdw,chv */ 890 WA_SET_BIT_MASKED(HDC_CHICKEN0, 891 HDC_DONOT_FETCH_MEM_WHEN_MASKED | 892 HDC_FORCE_NON_COHERENT); 893 894 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: 895 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping 896 * polygons in the same 8x4 pixel/sample area to be processed without 897 * stalling waiting for the earlier ones to write to Hierarchical Z 898 * buffer." 899 * 900 * This optimization is off by default for BDW and CHV; turn it on. 901 */ 902 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); 903 904 /* Wa4x4STCOptimizationDisable:bdw,chv */ 905 WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); 906 907 /* 908 * BSpec recommends 8x4 when MSAA is used, 909 * however in practice 16x4 seems fastest. 910 * 911 * Note that PS/WM thread counts depend on the WIZ hashing 912 * disable bit, which we don't touch here, but it's good 913 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 914 */ 915 WA_SET_FIELD_MASKED(GEN7_GT_MODE, 916 GEN6_WIZ_HASHING_MASK, 917 GEN6_WIZ_HASHING_16x4); 918 919 return 0; 920 } 921 922 static int bdw_init_workarounds(struct intel_engine_cs *engine) 923 { 924 struct drm_i915_private *dev_priv = engine->i915; 925 int ret; 926 927 ret = gen8_init_workarounds(engine); 928 if (ret) 929 return ret; 930 931 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ 932 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); 933 934 /* WaDisableDopClockGating:bdw 935 * 936 * Also see the related UCGTCL1 write in broadwell_init_clock_gating() 937 * to disable EUTC clock gating. 938 */ 939 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, 940 DOP_CLOCK_GATING_DISABLE); 941 942 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 943 GEN8_SAMPLER_POWER_BYPASS_DIS); 944 945 WA_SET_BIT_MASKED(HDC_CHICKEN0, 946 /* WaForceContextSaveRestoreNonCoherent:bdw */ 947 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | 948 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ 949 (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); 950 951 return 0; 952 } 953 954 static int chv_init_workarounds(struct intel_engine_cs *engine) 955 { 956 struct drm_i915_private *dev_priv = engine->i915; 957 int ret; 958 959 ret = gen8_init_workarounds(engine); 960 if (ret) 961 return ret; 962 963 /* WaDisableThreadStallDopClockGating:chv */ 964 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); 965 966 /* Improve HiZ throughput on CHV. */ 967 WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); 968 969 return 0; 970 } 971 972 static int gen9_init_workarounds(struct intel_engine_cs *engine) 973 { 974 struct drm_i915_private *dev_priv = engine->i915; 975 int ret; 976 977 /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */ 978 I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); 979 980 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */ 981 I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | 982 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); 983 984 /* WaDisableKillLogic:bxt,skl,kbl */ 985 if (!IS_COFFEELAKE(dev_priv)) 986 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | 987 ECOCHK_DIS_TLB); 988 989 if (HAS_LLC(dev_priv)) { 990 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl 991 * 992 * Must match Display Engine. See 993 * WaCompressedResourceDisplayNewHashMode. 994 */ 995 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 996 GEN9_PBE_COMPRESSED_HASH_SELECTION); 997 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, 998 GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); 999 1000 I915_WRITE(MMCD_MISC_CTRL, 1001 I915_READ(MMCD_MISC_CTRL) | 1002 MMCD_PCLA | 1003 MMCD_HOTSPOT_EN); 1004 } 1005 1006 /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ 1007 /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ 1008 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 1009 FLOW_CONTROL_ENABLE | 1010 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); 1011 1012 /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ 1013 if (!IS_COFFEELAKE(dev_priv)) 1014 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 1015 GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); 1016 1017 /* WaDisableDgMirrorFixInHalfSliceChicken5:bxt */ 1018 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) 1019 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, 1020 GEN9_DG_MIRROR_FIX_ENABLE); 1021 1022 /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */ 1023 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { 1024 WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1, 1025 GEN9_RHWO_OPTIMIZATION_DISABLE); 1026 /* 1027 * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set 1028 * but we do that in per ctx batchbuffer as there is an issue 1029 * with this register not getting restored on ctx restore 1030 */ 1031 } 1032 1033 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ 1034 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ 1035 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, 1036 GEN9_ENABLE_YV12_BUGFIX | 1037 GEN9_ENABLE_GPGPU_PREEMPTION); 1038 1039 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ 1040 /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ 1041 WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE | 1042 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE)); 1043 1044 /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ 1045 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, 1046 GEN9_CCS_TLB_PREFETCH_ENABLE); 1047 1048 /* WaDisableMaskBasedCammingInRCC:bxt */ 1049 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) 1050 WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0, 1051 PIXEL_MASK_CAMMING_DISABLE); 1052 1053 /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ 1054 WA_SET_BIT_MASKED(HDC_CHICKEN0, 1055 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | 1056 HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); 1057 1058 /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are 1059 * both tied to WaForceContextSaveRestoreNonCoherent 1060 * in some hsds for skl. We keep the tie for all gen9. The 1061 * documentation is a bit hazy and so we want to get common behaviour, 1062 * even though there is no clear evidence we would need both on kbl/bxt. 1063 * This area has been source of system hangs so we play it safe 1064 * and mimic the skl regardless of what bspec says. 1065 * 1066 * Use Force Non-Coherent whenever executing a 3D context. This 1067 * is a workaround for a possible hang in the unlikely event 1068 * a TLB invalidation occurs during a PSD flush. 1069 */ 1070 1071 /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */ 1072 WA_SET_BIT_MASKED(HDC_CHICKEN0, 1073 HDC_FORCE_NON_COHERENT); 1074 1075 /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */ 1076 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | 1077 BDW_DISABLE_HDC_INVALIDATION); 1078 1079 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ 1080 if (IS_SKYLAKE(dev_priv) || 1081 IS_KABYLAKE(dev_priv) || 1082 IS_COFFEELAKE(dev_priv) || 1083 IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) 1084 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 1085 GEN8_SAMPLER_POWER_BYPASS_DIS); 1086 1087 /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ 1088 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); 1089 1090 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ 1091 I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | 1092 GEN8_LQSC_FLUSH_COHERENT_LINES)); 1093 1094 /* 1095 * Supporting preemption with fine-granularity requires changes in the 1096 * batch buffer programming. Since we can't break old userspace, we 1097 * need to set our default preemption level to safe value. Userspace is 1098 * still able to use more fine-grained preemption levels, since in 1099 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the 1100 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are 1101 * not real HW workarounds, but merely a way to start using preemption 1102 * while maintaining old contract with userspace. 1103 */ 1104 1105 /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */ 1106 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); 1107 1108 /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */ 1109 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, 1110 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); 1111 1112 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ 1113 ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); 1114 if (ret) 1115 return ret; 1116 1117 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ 1118 I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, 1119 _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); 1120 ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); 1121 if (ret) 1122 return ret; 1123 1124 /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ 1125 ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); 1126 if (ret) 1127 return ret; 1128 1129 return 0; 1130 } 1131 1132 static int skl_tune_iz_hashing(struct intel_engine_cs *engine) 1133 { 1134 struct drm_i915_private *dev_priv = engine->i915; 1135 u8 vals[3] = { 0, 0, 0 }; 1136 unsigned int i; 1137 1138 for (i = 0; i < 3; i++) { 1139 u8 ss; 1140 1141 /* 1142 * Only consider slices where one, and only one, subslice has 7 1143 * EUs 1144 */ 1145 if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i])) 1146 continue; 1147 1148 /* 1149 * subslice_7eu[i] != 0 (because of the check above) and 1150 * ss_max == 4 (maximum number of subslices possible per slice) 1151 * 1152 * -> 0 <= ss <= 3; 1153 */ 1154 ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1; 1155 vals[i] = 3 - ss; 1156 } 1157 1158 if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) 1159 return 0; 1160 1161 /* Tune IZ hashing. See intel_device_info_runtime_init() */ 1162 WA_SET_FIELD_MASKED(GEN7_GT_MODE, 1163 GEN9_IZ_HASHING_MASK(2) | 1164 GEN9_IZ_HASHING_MASK(1) | 1165 GEN9_IZ_HASHING_MASK(0), 1166 GEN9_IZ_HASHING(2, vals[2]) | 1167 GEN9_IZ_HASHING(1, vals[1]) | 1168 GEN9_IZ_HASHING(0, vals[0])); 1169 1170 return 0; 1171 } 1172 1173 static int skl_init_workarounds(struct intel_engine_cs *engine) 1174 { 1175 struct drm_i915_private *dev_priv = engine->i915; 1176 int ret; 1177 1178 ret = gen9_init_workarounds(engine); 1179 if (ret) 1180 return ret; 1181 1182 /* WaEnableGapsTsvCreditFix:skl */ 1183 I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | 1184 GEN9_GAPS_TSV_CREDIT_DISABLE)); 1185 1186 /* WaDisableGafsUnitClkGating:skl */ 1187 I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) | 1188 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE)); 1189 1190 /* WaInPlaceDecompressionHang:skl */ 1191 if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER)) 1192 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, 1193 (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | 1194 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); 1195 1196 /* WaDisableLSQCROPERFforOCL:skl */ 1197 ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); 1198 if (ret) 1199 return ret; 1200 1201 return skl_tune_iz_hashing(engine); 1202 } 1203 1204 static int bxt_init_workarounds(struct intel_engine_cs *engine) 1205 { 1206 struct drm_i915_private *dev_priv = engine->i915; 1207 int ret; 1208 1209 ret = gen9_init_workarounds(engine); 1210 if (ret) 1211 return ret; 1212 1213 /* WaStoreMultiplePTEenable:bxt */ 1214 /* This is a requirement according to Hardware specification */ 1215 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) 1216 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF); 1217 1218 /* WaSetClckGatingDisableMedia:bxt */ 1219 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { 1220 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & 1221 ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE)); 1222 } 1223 1224 /* WaDisableThreadStallDopClockGating:bxt */ 1225 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 1226 STALL_DOP_GATING_DISABLE); 1227 1228 /* WaDisablePooledEuLoadBalancingFix:bxt */ 1229 if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) { 1230 I915_WRITE(FF_SLICE_CS_CHICKEN2, 1231 _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE)); 1232 } 1233 1234 /* WaDisableSbeCacheDispatchPortSharing:bxt */ 1235 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) { 1236 WA_SET_BIT_MASKED( 1237 GEN7_HALF_SLICE_CHICKEN1, 1238 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 1239 } 1240 1241 /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */ 1242 /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */ 1243 /* WaDisableObjectLevelPreemtionForInstanceId:bxt */ 1244 /* WaDisableLSQCROPERFforOCL:bxt */ 1245 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { 1246 ret = wa_ring_whitelist_reg(engine, GEN9_CS_DEBUG_MODE1); 1247 if (ret) 1248 return ret; 1249 1250 ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); 1251 if (ret) 1252 return ret; 1253 } 1254 1255 /* WaProgramL3SqcReg1DefaultForPerf:bxt */ 1256 if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) { 1257 u32 val = I915_READ(GEN8_L3SQCREG1); 1258 val &= ~L3_PRIO_CREDITS_MASK; 1259 val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2); 1260 I915_WRITE(GEN8_L3SQCREG1, val); 1261 } 1262 1263 /* WaToEnableHwFixForPushConstHWBug:bxt */ 1264 if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) 1265 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 1266 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 1267 1268 /* WaInPlaceDecompressionHang:bxt */ 1269 if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) 1270 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, 1271 (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | 1272 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); 1273 1274 return 0; 1275 } 1276 1277 static int cnl_init_workarounds(struct intel_engine_cs *engine) 1278 { 1279 struct drm_i915_private *dev_priv = engine->i915; 1280 int ret; 1281 1282 /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */ 1283 if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0)) 1284 I915_WRITE(GAMT_CHKN_BIT_REG, 1285 (I915_READ(GAMT_CHKN_BIT_REG) | 1286 GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT)); 1287 1288 /* WaForceContextSaveRestoreNonCoherent:cnl */ 1289 WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0, 1290 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); 1291 1292 /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */ 1293 if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0)) 1294 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5); 1295 1296 /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ 1297 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 1298 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 1299 1300 /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */ 1301 if (IS_CNL_REVID(dev_priv, 0, CNL_REVID_B0)) 1302 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 1303 GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE); 1304 1305 /* WaInPlaceDecompressionHang:cnl */ 1306 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, 1307 (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | 1308 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); 1309 1310 /* WaPushConstantDereferenceHoldDisable:cnl */ 1311 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); 1312 1313 /* FtrEnableFastAnisoL1BankingFix: cnl */ 1314 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); 1315 1316 /* WaDisable3DMidCmdPreemption:cnl */ 1317 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); 1318 1319 /* WaDisableGPGPUMidCmdPreemption:cnl */ 1320 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, 1321 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); 1322 1323 /* WaEnablePreemptionGranularityControlByUMD:cnl */ 1324 I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, 1325 _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); 1326 ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); 1327 if (ret) 1328 return ret; 1329 1330 return 0; 1331 } 1332 1333 static int kbl_init_workarounds(struct intel_engine_cs *engine) 1334 { 1335 struct drm_i915_private *dev_priv = engine->i915; 1336 int ret; 1337 1338 ret = gen9_init_workarounds(engine); 1339 if (ret) 1340 return ret; 1341 1342 /* WaEnableGapsTsvCreditFix:kbl */ 1343 I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | 1344 GEN9_GAPS_TSV_CREDIT_DISABLE)); 1345 1346 /* WaDisableDynamicCreditSharing:kbl */ 1347 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) 1348 I915_WRITE(GAMT_CHKN_BIT_REG, 1349 (I915_READ(GAMT_CHKN_BIT_REG) | 1350 GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING)); 1351 1352 /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */ 1353 if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0)) 1354 WA_SET_BIT_MASKED(HDC_CHICKEN0, 1355 HDC_FENCE_DEST_SLM_DISABLE); 1356 1357 /* WaToEnableHwFixForPushConstHWBug:kbl */ 1358 if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) 1359 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 1360 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 1361 1362 /* WaDisableGafsUnitClkGating:kbl */ 1363 I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) | 1364 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE)); 1365 1366 /* WaDisableSbeCacheDispatchPortSharing:kbl */ 1367 WA_SET_BIT_MASKED( 1368 GEN7_HALF_SLICE_CHICKEN1, 1369 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 1370 1371 /* WaInPlaceDecompressionHang:kbl */ 1372 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, 1373 (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | 1374 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); 1375 1376 /* WaDisableLSQCROPERFforOCL:kbl */ 1377 ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); 1378 if (ret) 1379 return ret; 1380 1381 return 0; 1382 } 1383 1384 static int glk_init_workarounds(struct intel_engine_cs *engine) 1385 { 1386 struct drm_i915_private *dev_priv = engine->i915; 1387 int ret; 1388 1389 ret = gen9_init_workarounds(engine); 1390 if (ret) 1391 return ret; 1392 1393 /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */ 1394 ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1); 1395 if (ret) 1396 return ret; 1397 1398 /* WaToEnableHwFixForPushConstHWBug:glk */ 1399 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 1400 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 1401 1402 return 0; 1403 } 1404 1405 static int cfl_init_workarounds(struct intel_engine_cs *engine) 1406 { 1407 struct drm_i915_private *dev_priv = engine->i915; 1408 int ret; 1409 1410 ret = gen9_init_workarounds(engine); 1411 if (ret) 1412 return ret; 1413 1414 /* WaEnableGapsTsvCreditFix:cfl */ 1415 I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | 1416 GEN9_GAPS_TSV_CREDIT_DISABLE)); 1417 1418 /* WaToEnableHwFixForPushConstHWBug:cfl */ 1419 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 1420 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 1421 1422 /* WaDisableGafsUnitClkGating:cfl */ 1423 I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) | 1424 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE)); 1425 1426 /* WaDisableSbeCacheDispatchPortSharing:cfl */ 1427 WA_SET_BIT_MASKED( 1428 GEN7_HALF_SLICE_CHICKEN1, 1429 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 1430 1431 /* WaInPlaceDecompressionHang:cfl */ 1432 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, 1433 (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | 1434 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); 1435 1436 return 0; 1437 } 1438 1439 int init_workarounds_ring(struct intel_engine_cs *engine) 1440 { 1441 struct drm_i915_private *dev_priv = engine->i915; 1442 int err; 1443 1444 WARN_ON(engine->id != RCS); 1445 1446 dev_priv->workarounds.count = 0; 1447 dev_priv->workarounds.hw_whitelist_count[engine->id] = 0; 1448 1449 if (IS_BROADWELL(dev_priv)) 1450 err = bdw_init_workarounds(engine); 1451 else if (IS_CHERRYVIEW(dev_priv)) 1452 err = chv_init_workarounds(engine); 1453 else if (IS_SKYLAKE(dev_priv)) 1454 err = skl_init_workarounds(engine); 1455 else if (IS_BROXTON(dev_priv)) 1456 err = bxt_init_workarounds(engine); 1457 else if (IS_KABYLAKE(dev_priv)) 1458 err = kbl_init_workarounds(engine); 1459 else if (IS_GEMINILAKE(dev_priv)) 1460 err = glk_init_workarounds(engine); 1461 else if (IS_COFFEELAKE(dev_priv)) 1462 err = cfl_init_workarounds(engine); 1463 else if (IS_CANNONLAKE(dev_priv)) 1464 err = cnl_init_workarounds(engine); 1465 else 1466 err = 0; 1467 if (err) 1468 return err; 1469 1470 DRM_DEBUG_DRIVER("%s: Number of context specific w/a: %d\n", 1471 engine->name, dev_priv->workarounds.count); 1472 return 0; 1473 } 1474 1475 int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) 1476 { 1477 struct i915_workarounds *w = &req->i915->workarounds; 1478 u32 *cs; 1479 int ret, i; 1480 1481 if (w->count == 0) 1482 return 0; 1483 1484 ret = req->engine->emit_flush(req, EMIT_BARRIER); 1485 if (ret) 1486 return ret; 1487 1488 cs = intel_ring_begin(req, (w->count * 2 + 2)); 1489 if (IS_ERR(cs)) 1490 return PTR_ERR(cs); 1491 1492 *cs++ = MI_LOAD_REGISTER_IMM(w->count); 1493 for (i = 0; i < w->count; i++) { 1494 *cs++ = i915_mmio_reg_offset(w->reg[i].addr); 1495 *cs++ = w->reg[i].value; 1496 } 1497 *cs++ = MI_NOOP; 1498 1499 intel_ring_advance(req, cs); 1500 1501 ret = req->engine->emit_flush(req, EMIT_BARRIER); 1502 if (ret) 1503 return ret; 1504 1505 return 0; 1506 } 1507 1508 static bool ring_is_idle(struct intel_engine_cs *engine) 1509 { 1510 struct drm_i915_private *dev_priv = engine->i915; 1511 bool idle = true; 1512 1513 intel_runtime_pm_get(dev_priv); 1514 1515 /* First check that no commands are left in the ring */ 1516 if ((I915_READ_HEAD(engine) & HEAD_ADDR) != 1517 (I915_READ_TAIL(engine) & TAIL_ADDR)) 1518 idle = false; 1519 1520 /* No bit for gen2, so assume the CS parser is idle */ 1521 if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE)) 1522 idle = false; 1523 1524 intel_runtime_pm_put(dev_priv); 1525 1526 return idle; 1527 } 1528 1529 /** 1530 * intel_engine_is_idle() - Report if the engine has finished process all work 1531 * @engine: the intel_engine_cs 1532 * 1533 * Return true if there are no requests pending, nothing left to be submitted 1534 * to hardware, and that the engine is idle. 1535 */ 1536 bool intel_engine_is_idle(struct intel_engine_cs *engine) 1537 { 1538 struct drm_i915_private *dev_priv = engine->i915; 1539 1540 /* More white lies, if wedged, hw state is inconsistent */ 1541 if (i915_terminally_wedged(&dev_priv->gpu_error)) 1542 return true; 1543 1544 /* Any inflight/incomplete requests? */ 1545 if (!i915_seqno_passed(intel_engine_get_seqno(engine), 1546 intel_engine_last_submit(engine))) 1547 return false; 1548 1549 if (I915_SELFTEST_ONLY(engine->breadcrumbs.mock)) 1550 return true; 1551 1552 /* Interrupt/tasklet pending? */ 1553 if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) 1554 return false; 1555 1556 /* Waiting to drain ELSP? */ 1557 if (READ_ONCE(engine->execlists.active)) 1558 return false; 1559 1560 /* ELSP is empty, but there are ready requests? */ 1561 if (READ_ONCE(engine->execlists.first)) 1562 return false; 1563 1564 /* Ring stopped? */ 1565 if (!ring_is_idle(engine)) 1566 return false; 1567 1568 return true; 1569 } 1570 1571 bool intel_engines_are_idle(struct drm_i915_private *dev_priv) 1572 { 1573 struct intel_engine_cs *engine; 1574 enum intel_engine_id id; 1575 1576 if (READ_ONCE(dev_priv->gt.active_requests)) 1577 return false; 1578 1579 /* If the driver is wedged, HW state may be very inconsistent and 1580 * report that it is still busy, even though we have stopped using it. 1581 */ 1582 if (i915_terminally_wedged(&dev_priv->gpu_error)) 1583 return true; 1584 1585 for_each_engine(engine, dev_priv, id) { 1586 if (!intel_engine_is_idle(engine)) 1587 return false; 1588 } 1589 1590 return true; 1591 } 1592 1593 void intel_engines_reset_default_submission(struct drm_i915_private *i915) 1594 { 1595 struct intel_engine_cs *engine; 1596 enum intel_engine_id id; 1597 1598 for_each_engine(engine, i915, id) 1599 engine->set_default_submission(engine); 1600 } 1601 1602 void intel_engines_mark_idle(struct drm_i915_private *i915) 1603 { 1604 struct intel_engine_cs *engine; 1605 enum intel_engine_id id; 1606 1607 for_each_engine(engine, i915, id) { 1608 intel_engine_disarm_breadcrumbs(engine); 1609 i915_gem_batch_pool_fini(&engine->batch_pool); 1610 tasklet_kill(&engine->execlists.irq_tasklet); 1611 engine->execlists.no_priolist = false; 1612 } 1613 } 1614 1615 bool intel_engine_can_store_dword(struct intel_engine_cs *engine) 1616 { 1617 switch (INTEL_GEN(engine->i915)) { 1618 case 2: 1619 return false; /* uses physical not virtual addresses */ 1620 case 3: 1621 /* maybe only uses physical not virtual addresses */ 1622 return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915)); 1623 case 6: 1624 return engine->class != VIDEO_DECODE_CLASS; /* b0rked */ 1625 default: 1626 return true; 1627 } 1628 } 1629 1630 static void print_request(struct drm_printer *m, 1631 struct drm_i915_gem_request *rq, 1632 const char *prefix) 1633 { 1634 drm_printf(m, "%s%x%s [%x:%x] prio=%d @ %ldms: %s\n", prefix, 1635 rq->global_seqno, 1636 i915_gem_request_completed(rq) ? "!" : "", 1637 rq->ctx->hw_id, rq->fence.seqno, 1638 rq->priotree.priority, 1639 jiffies_to_msecs(jiffies - rq->emitted_jiffies), 1640 rq->timeline->common->name); 1641 } 1642 1643 void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m) 1644 { 1645 struct intel_breadcrumbs * const b = &engine->breadcrumbs; 1646 const struct intel_engine_execlists * const execlists = &engine->execlists; 1647 struct i915_gpu_error * const error = &engine->i915->gpu_error; 1648 struct drm_i915_private *dev_priv = engine->i915; 1649 struct drm_i915_gem_request *rq; 1650 struct rb_node *rb; 1651 u64 addr; 1652 1653 drm_printf(m, "%s\n", engine->name); 1654 drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%ld ms], inflight %d\n", 1655 intel_engine_get_seqno(engine), 1656 intel_engine_last_submit(engine), 1657 engine->hangcheck.seqno, 1658 jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), 1659 engine->timeline->inflight_seqnos); 1660 drm_printf(m, "\tReset count: %d\n", 1661 i915_reset_engine_count(error, engine)); 1662 1663 rcu_read_lock(); 1664 1665 drm_printf(m, "\tRequests:\n"); 1666 1667 rq = list_first_entry(&engine->timeline->requests, 1668 struct drm_i915_gem_request, link); 1669 if (&rq->link != &engine->timeline->requests) 1670 print_request(m, rq, "\t\tfirst "); 1671 1672 rq = list_last_entry(&engine->timeline->requests, 1673 struct drm_i915_gem_request, link); 1674 if (&rq->link != &engine->timeline->requests) 1675 print_request(m, rq, "\t\tlast "); 1676 1677 rq = i915_gem_find_active_request(engine); 1678 if (rq) { 1679 print_request(m, rq, "\t\tactive "); 1680 drm_printf(m, 1681 "\t\t[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]\n", 1682 rq->head, rq->postfix, rq->tail, 1683 rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, 1684 rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); 1685 } 1686 1687 drm_printf(m, "\tRING_START: 0x%08x [0x%08x]\n", 1688 I915_READ(RING_START(engine->mmio_base)), 1689 rq ? i915_ggtt_offset(rq->ring->vma) : 0); 1690 drm_printf(m, "\tRING_HEAD: 0x%08x [0x%08x]\n", 1691 I915_READ(RING_HEAD(engine->mmio_base)) & HEAD_ADDR, 1692 rq ? rq->ring->head : 0); 1693 drm_printf(m, "\tRING_TAIL: 0x%08x [0x%08x]\n", 1694 I915_READ(RING_TAIL(engine->mmio_base)) & TAIL_ADDR, 1695 rq ? rq->ring->tail : 0); 1696 drm_printf(m, "\tRING_CTL: 0x%08x [%s]\n", 1697 I915_READ(RING_CTL(engine->mmio_base)), 1698 I915_READ(RING_CTL(engine->mmio_base)) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? "waiting" : ""); 1699 1700 rcu_read_unlock(); 1701 1702 addr = intel_engine_get_active_head(engine); 1703 drm_printf(m, "\tACTHD: 0x%08x_%08x\n", 1704 upper_32_bits(addr), lower_32_bits(addr)); 1705 addr = intel_engine_get_last_batch_head(engine); 1706 drm_printf(m, "\tBBADDR: 0x%08x_%08x\n", 1707 upper_32_bits(addr), lower_32_bits(addr)); 1708 1709 if (i915_modparams.enable_execlists) { 1710 const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; 1711 u32 ptr, read, write; 1712 unsigned int idx; 1713 1714 drm_printf(m, "\tExeclist status: 0x%08x %08x\n", 1715 I915_READ(RING_EXECLIST_STATUS_LO(engine)), 1716 I915_READ(RING_EXECLIST_STATUS_HI(engine))); 1717 1718 ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine)); 1719 read = GEN8_CSB_READ_PTR(ptr); 1720 write = GEN8_CSB_WRITE_PTR(ptr); 1721 drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s\n", 1722 read, execlists->csb_head, 1723 write, 1724 intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)), 1725 yesno(test_bit(ENGINE_IRQ_EXECLIST, 1726 &engine->irq_posted))); 1727 if (read >= GEN8_CSB_ENTRIES) 1728 read = 0; 1729 if (write >= GEN8_CSB_ENTRIES) 1730 write = 0; 1731 if (read > write) 1732 write += GEN8_CSB_ENTRIES; 1733 while (read < write) { 1734 idx = ++read % GEN8_CSB_ENTRIES; 1735 drm_printf(m, "\tExeclist CSB[%d]: 0x%08x [0x%08x in hwsp], context: %d [%d in hwsp]\n", 1736 idx, 1737 I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)), 1738 hws[idx * 2], 1739 I915_READ(RING_CONTEXT_STATUS_BUF_HI(engine, idx)), 1740 hws[idx * 2 + 1]); 1741 } 1742 1743 rcu_read_lock(); 1744 for (idx = 0; idx < execlists_num_ports(execlists); idx++) { 1745 unsigned int count; 1746 1747 rq = port_unpack(&execlists->port[idx], &count); 1748 if (rq) { 1749 drm_printf(m, "\t\tELSP[%d] count=%d, ", 1750 idx, count); 1751 print_request(m, rq, "rq: "); 1752 } else { 1753 drm_printf(m, "\t\tELSP[%d] idle\n", 1754 idx); 1755 } 1756 } 1757 drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active); 1758 rcu_read_unlock(); 1759 } else if (INTEL_GEN(dev_priv) > 6) { 1760 drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", 1761 I915_READ(RING_PP_DIR_BASE(engine))); 1762 drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n", 1763 I915_READ(RING_PP_DIR_BASE_READ(engine))); 1764 drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n", 1765 I915_READ(RING_PP_DIR_DCLV(engine))); 1766 } 1767 1768 spin_lock_irq(&engine->timeline->lock); 1769 list_for_each_entry(rq, &engine->timeline->requests, link) 1770 print_request(m, rq, "\t\tE "); 1771 for (rb = execlists->first; rb; rb = rb_next(rb)) { 1772 struct i915_priolist *p = 1773 rb_entry(rb, typeof(*p), node); 1774 1775 list_for_each_entry(rq, &p->requests, priotree.link) 1776 print_request(m, rq, "\t\tQ "); 1777 } 1778 spin_unlock_irq(&engine->timeline->lock); 1779 1780 spin_lock_irq(&b->rb_lock); 1781 for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { 1782 struct intel_wait *w = rb_entry(rb, typeof(*w), node); 1783 1784 drm_printf(m, "\t%s [%d] waiting for %x\n", 1785 w->tsk->comm, w->tsk->pid, w->seqno); 1786 } 1787 spin_unlock_irq(&b->rb_lock); 1788 1789 drm_printf(m, "\n"); 1790 } 1791 1792 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1793 #include "selftests/mock_engine.c" 1794 #endif 1795