1 /* 2 * Copyright © 2008-2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Zou Nan hai <nanhai.zou@intel.com> 26 * Xiang Hai hao<haihao.xiang@intel.com> 27 * 28 */ 29 30 #include <linux/log2.h> 31 #include <drm/drmP.h> 32 #include "i915_drv.h" 33 #include <drm/i915_drm.h> 34 #include "i915_trace.h" 35 #include "intel_drv.h" 36 37 /* Rough estimate of the typical request size, performing a flush, 38 * set-context and then emitting the batch. 39 */ 40 #define LEGACY_REQUEST_SIZE 200 41 42 static int __intel_ring_space(int head, int tail, int size) 43 { 44 int space = head - tail; 45 if (space <= 0) 46 space += size; 47 return space - I915_RING_FREE_SPACE; 48 } 49 50 void intel_ring_update_space(struct intel_ring *ring) 51 { 52 ring->space = __intel_ring_space(ring->head, ring->emit, ring->size); 53 } 54 55 static int 56 gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) 57 { 58 u32 cmd, *cs; 59 60 cmd = MI_FLUSH; 61 62 if (mode & EMIT_INVALIDATE) 63 cmd |= MI_READ_FLUSH; 64 65 cs = intel_ring_begin(req, 2); 66 if (IS_ERR(cs)) 67 return PTR_ERR(cs); 68 69 *cs++ = cmd; 70 *cs++ = MI_NOOP; 71 intel_ring_advance(req, cs); 72 73 return 0; 74 } 75 76 static int 77 gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) 78 { 79 u32 cmd, *cs; 80 81 /* 82 * read/write caches: 83 * 84 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 85 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 86 * also flushed at 2d versus 3d pipeline switches. 87 * 88 * read-only caches: 89 * 90 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 91 * MI_READ_FLUSH is set, and is always flushed on 965. 92 * 93 * I915_GEM_DOMAIN_COMMAND may not exist? 94 * 95 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 96 * invalidated when MI_EXE_FLUSH is set. 97 * 98 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 99 * invalidated with every MI_FLUSH. 100 * 101 * TLBs: 102 * 103 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 104 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 105 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 106 * are flushed at any MI_FLUSH. 107 */ 108 109 cmd = MI_FLUSH; 110 if (mode & EMIT_INVALIDATE) { 111 cmd |= MI_EXE_FLUSH; 112 if (IS_G4X(req->i915) || IS_GEN5(req->i915)) 113 cmd |= MI_INVALIDATE_ISP; 114 } 115 116 cs = intel_ring_begin(req, 2); 117 if (IS_ERR(cs)) 118 return PTR_ERR(cs); 119 120 *cs++ = cmd; 121 *cs++ = MI_NOOP; 122 intel_ring_advance(req, cs); 123 124 return 0; 125 } 126 127 /** 128 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for 129 * implementing two workarounds on gen6. From section 1.4.7.1 130 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: 131 * 132 * [DevSNB-C+{W/A}] Before any depth stall flush (including those 133 * produced by non-pipelined state commands), software needs to first 134 * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 135 * 0. 136 * 137 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable 138 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. 139 * 140 * And the workaround for these two requires this workaround first: 141 * 142 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 143 * BEFORE the pipe-control with a post-sync op and no write-cache 144 * flushes. 145 * 146 * And this last workaround is tricky because of the requirements on 147 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM 148 * volume 2 part 1: 149 * 150 * "1 of the following must also be set: 151 * - Render Target Cache Flush Enable ([12] of DW1) 152 * - Depth Cache Flush Enable ([0] of DW1) 153 * - Stall at Pixel Scoreboard ([1] of DW1) 154 * - Depth Stall ([13] of DW1) 155 * - Post-Sync Operation ([13] of DW1) 156 * - Notify Enable ([8] of DW1)" 157 * 158 * The cache flushes require the workaround flush that triggered this 159 * one, so we can't use it. Depth stall would trigger the same. 160 * Post-sync nonzero is what triggered this second workaround, so we 161 * can't use that one either. Notify enable is IRQs, which aren't 162 * really our business. That leaves only stall at scoreboard. 163 */ 164 static int 165 intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) 166 { 167 u32 scratch_addr = 168 i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; 169 u32 *cs; 170 171 cs = intel_ring_begin(req, 6); 172 if (IS_ERR(cs)) 173 return PTR_ERR(cs); 174 175 *cs++ = GFX_OP_PIPE_CONTROL(5); 176 *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; 177 *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; 178 *cs++ = 0; /* low dword */ 179 *cs++ = 0; /* high dword */ 180 *cs++ = MI_NOOP; 181 intel_ring_advance(req, cs); 182 183 cs = intel_ring_begin(req, 6); 184 if (IS_ERR(cs)) 185 return PTR_ERR(cs); 186 187 *cs++ = GFX_OP_PIPE_CONTROL(5); 188 *cs++ = PIPE_CONTROL_QW_WRITE; 189 *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; 190 *cs++ = 0; 191 *cs++ = 0; 192 *cs++ = MI_NOOP; 193 intel_ring_advance(req, cs); 194 195 return 0; 196 } 197 198 static int 199 gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) 200 { 201 u32 scratch_addr = 202 i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; 203 u32 *cs, flags = 0; 204 int ret; 205 206 /* Force SNB workarounds for PIPE_CONTROL flushes */ 207 ret = intel_emit_post_sync_nonzero_flush(req); 208 if (ret) 209 return ret; 210 211 /* Just flush everything. Experiments have shown that reducing the 212 * number of bits based on the write domains has little performance 213 * impact. 214 */ 215 if (mode & EMIT_FLUSH) { 216 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 217 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 218 /* 219 * Ensure that any following seqno writes only happen 220 * when the render cache is indeed flushed. 221 */ 222 flags |= PIPE_CONTROL_CS_STALL; 223 } 224 if (mode & EMIT_INVALIDATE) { 225 flags |= PIPE_CONTROL_TLB_INVALIDATE; 226 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 227 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 228 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 229 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 230 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 231 /* 232 * TLB invalidate requires a post-sync write. 233 */ 234 flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; 235 } 236 237 cs = intel_ring_begin(req, 4); 238 if (IS_ERR(cs)) 239 return PTR_ERR(cs); 240 241 *cs++ = GFX_OP_PIPE_CONTROL(4); 242 *cs++ = flags; 243 *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; 244 *cs++ = 0; 245 intel_ring_advance(req, cs); 246 247 return 0; 248 } 249 250 static int 251 gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) 252 { 253 u32 *cs; 254 255 cs = intel_ring_begin(req, 4); 256 if (IS_ERR(cs)) 257 return PTR_ERR(cs); 258 259 *cs++ = GFX_OP_PIPE_CONTROL(4); 260 *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; 261 *cs++ = 0; 262 *cs++ = 0; 263 intel_ring_advance(req, cs); 264 265 return 0; 266 } 267 268 static int 269 gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) 270 { 271 u32 scratch_addr = 272 i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; 273 u32 *cs, flags = 0; 274 275 /* 276 * Ensure that any following seqno writes only happen when the render 277 * cache is indeed flushed. 278 * 279 * Workaround: 4th PIPE_CONTROL command (except the ones with only 280 * read-cache invalidate bits set) must have the CS_STALL bit set. We 281 * don't try to be clever and just set it unconditionally. 282 */ 283 flags |= PIPE_CONTROL_CS_STALL; 284 285 /* Just flush everything. Experiments have shown that reducing the 286 * number of bits based on the write domains has little performance 287 * impact. 288 */ 289 if (mode & EMIT_FLUSH) { 290 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 291 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 292 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; 293 flags |= PIPE_CONTROL_FLUSH_ENABLE; 294 } 295 if (mode & EMIT_INVALIDATE) { 296 flags |= PIPE_CONTROL_TLB_INVALIDATE; 297 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 298 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 299 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 300 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 301 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 302 flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR; 303 /* 304 * TLB invalidate requires a post-sync write. 305 */ 306 flags |= PIPE_CONTROL_QW_WRITE; 307 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; 308 309 flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; 310 311 /* Workaround: we must issue a pipe_control with CS-stall bit 312 * set before a pipe_control command that has the state cache 313 * invalidate bit set. */ 314 gen7_render_ring_cs_stall_wa(req); 315 } 316 317 cs = intel_ring_begin(req, 4); 318 if (IS_ERR(cs)) 319 return PTR_ERR(cs); 320 321 *cs++ = GFX_OP_PIPE_CONTROL(4); 322 *cs++ = flags; 323 *cs++ = scratch_addr; 324 *cs++ = 0; 325 intel_ring_advance(req, cs); 326 327 return 0; 328 } 329 330 static int 331 gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) 332 { 333 u32 flags; 334 u32 *cs; 335 336 cs = intel_ring_begin(req, mode & EMIT_INVALIDATE ? 12 : 6); 337 if (IS_ERR(cs)) 338 return PTR_ERR(cs); 339 340 flags = PIPE_CONTROL_CS_STALL; 341 342 if (mode & EMIT_FLUSH) { 343 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 344 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 345 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; 346 flags |= PIPE_CONTROL_FLUSH_ENABLE; 347 } 348 if (mode & EMIT_INVALIDATE) { 349 flags |= PIPE_CONTROL_TLB_INVALIDATE; 350 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 351 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 352 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 353 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 354 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 355 flags |= PIPE_CONTROL_QW_WRITE; 356 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; 357 358 /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */ 359 cs = gen8_emit_pipe_control(cs, 360 PIPE_CONTROL_CS_STALL | 361 PIPE_CONTROL_STALL_AT_SCOREBOARD, 362 0); 363 } 364 365 cs = gen8_emit_pipe_control(cs, flags, 366 i915_ggtt_offset(req->engine->scratch) + 367 2 * CACHELINE_BYTES); 368 369 intel_ring_advance(req, cs); 370 371 return 0; 372 } 373 374 static void ring_setup_phys_status_page(struct intel_engine_cs *engine) 375 { 376 struct drm_i915_private *dev_priv = engine->i915; 377 u32 addr; 378 379 addr = dev_priv->status_page_dmah->busaddr; 380 if (INTEL_GEN(dev_priv) >= 4) 381 addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0; 382 I915_WRITE(HWS_PGA, addr); 383 } 384 385 static void intel_ring_setup_status_page(struct intel_engine_cs *engine) 386 { 387 struct drm_i915_private *dev_priv = engine->i915; 388 i915_reg_t mmio; 389 390 /* The ring status page addresses are no longer next to the rest of 391 * the ring registers as of gen7. 392 */ 393 if (IS_GEN7(dev_priv)) { 394 switch (engine->id) { 395 case RCS: 396 mmio = RENDER_HWS_PGA_GEN7; 397 break; 398 case BCS: 399 mmio = BLT_HWS_PGA_GEN7; 400 break; 401 /* 402 * VCS2 actually doesn't exist on Gen7. Only shut up 403 * gcc switch check warning 404 */ 405 case VCS2: 406 case VCS: 407 mmio = BSD_HWS_PGA_GEN7; 408 break; 409 case VECS: 410 mmio = VEBOX_HWS_PGA_GEN7; 411 break; 412 } 413 } else if (IS_GEN6(dev_priv)) { 414 mmio = RING_HWS_PGA_GEN6(engine->mmio_base); 415 } else { 416 /* XXX: gen8 returns to sanity */ 417 mmio = RING_HWS_PGA(engine->mmio_base); 418 } 419 420 I915_WRITE(mmio, engine->status_page.ggtt_offset); 421 POSTING_READ(mmio); 422 423 /* 424 * Flush the TLB for this page 425 * 426 * FIXME: These two bits have disappeared on gen8, so a question 427 * arises: do we still need this and if so how should we go about 428 * invalidating the TLB? 429 */ 430 if (IS_GEN(dev_priv, 6, 7)) { 431 i915_reg_t reg = RING_INSTPM(engine->mmio_base); 432 433 /* ring should be idle before issuing a sync flush*/ 434 WARN_ON((I915_READ_MODE(engine) & MODE_IDLE) == 0); 435 436 I915_WRITE(reg, 437 _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | 438 INSTPM_SYNC_FLUSH)); 439 if (intel_wait_for_register(dev_priv, 440 reg, INSTPM_SYNC_FLUSH, 0, 441 1000)) 442 DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n", 443 engine->name); 444 } 445 } 446 447 static bool stop_ring(struct intel_engine_cs *engine) 448 { 449 struct drm_i915_private *dev_priv = engine->i915; 450 451 if (INTEL_GEN(dev_priv) > 2) { 452 I915_WRITE_MODE(engine, _MASKED_BIT_ENABLE(STOP_RING)); 453 if (intel_wait_for_register(dev_priv, 454 RING_MI_MODE(engine->mmio_base), 455 MODE_IDLE, 456 MODE_IDLE, 457 1000)) { 458 DRM_ERROR("%s : timed out trying to stop ring\n", 459 engine->name); 460 /* Sometimes we observe that the idle flag is not 461 * set even though the ring is empty. So double 462 * check before giving up. 463 */ 464 if (I915_READ_HEAD(engine) != I915_READ_TAIL(engine)) 465 return false; 466 } 467 } 468 469 I915_WRITE_CTL(engine, 0); 470 I915_WRITE_HEAD(engine, 0); 471 I915_WRITE_TAIL(engine, 0); 472 473 if (INTEL_GEN(dev_priv) > 2) { 474 (void)I915_READ_CTL(engine); 475 I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING)); 476 } 477 478 return (I915_READ_HEAD(engine) & HEAD_ADDR) == 0; 479 } 480 481 static int init_ring_common(struct intel_engine_cs *engine) 482 { 483 struct drm_i915_private *dev_priv = engine->i915; 484 struct intel_ring *ring = engine->buffer; 485 int ret = 0; 486 487 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 488 489 if (!stop_ring(engine)) { 490 /* G45 ring initialization often fails to reset head to zero */ 491 DRM_DEBUG_KMS("%s head not reset to zero " 492 "ctl %08x head %08x tail %08x start %08x\n", 493 engine->name, 494 I915_READ_CTL(engine), 495 I915_READ_HEAD(engine), 496 I915_READ_TAIL(engine), 497 I915_READ_START(engine)); 498 499 if (!stop_ring(engine)) { 500 DRM_ERROR("failed to set %s head to zero " 501 "ctl %08x head %08x tail %08x start %08x\n", 502 engine->name, 503 I915_READ_CTL(engine), 504 I915_READ_HEAD(engine), 505 I915_READ_TAIL(engine), 506 I915_READ_START(engine)); 507 ret = -EIO; 508 goto out; 509 } 510 } 511 512 if (HWS_NEEDS_PHYSICAL(dev_priv)) 513 ring_setup_phys_status_page(engine); 514 else 515 intel_ring_setup_status_page(engine); 516 517 intel_engine_reset_breadcrumbs(engine); 518 519 /* Enforce ordering by reading HEAD register back */ 520 I915_READ_HEAD(engine); 521 522 /* Initialize the ring. This must happen _after_ we've cleared the ring 523 * registers with the above sequence (the readback of the HEAD registers 524 * also enforces ordering), otherwise the hw might lose the new ring 525 * register values. */ 526 I915_WRITE_START(engine, i915_ggtt_offset(ring->vma)); 527 528 /* WaClearRingBufHeadRegAtInit:ctg,elk */ 529 if (I915_READ_HEAD(engine)) 530 DRM_DEBUG("%s initialization failed [head=%08x], fudging\n", 531 engine->name, I915_READ_HEAD(engine)); 532 533 intel_ring_update_space(ring); 534 I915_WRITE_HEAD(engine, ring->head); 535 I915_WRITE_TAIL(engine, ring->tail); 536 (void)I915_READ_TAIL(engine); 537 538 I915_WRITE_CTL(engine, RING_CTL_SIZE(ring->size) | RING_VALID); 539 540 /* If the head is still not zero, the ring is dead */ 541 if (intel_wait_for_register_fw(dev_priv, RING_CTL(engine->mmio_base), 542 RING_VALID, RING_VALID, 543 50)) { 544 DRM_ERROR("%s initialization failed " 545 "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n", 546 engine->name, 547 I915_READ_CTL(engine), 548 I915_READ_CTL(engine) & RING_VALID, 549 I915_READ_HEAD(engine), ring->head, 550 I915_READ_TAIL(engine), ring->tail, 551 I915_READ_START(engine), 552 i915_ggtt_offset(ring->vma)); 553 ret = -EIO; 554 goto out; 555 } 556 557 intel_engine_init_hangcheck(engine); 558 559 out: 560 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 561 562 return ret; 563 } 564 565 static void reset_ring_common(struct intel_engine_cs *engine, 566 struct drm_i915_gem_request *request) 567 { 568 /* Try to restore the logical GPU state to match the continuation 569 * of the request queue. If we skip the context/PD restore, then 570 * the next request may try to execute assuming that its context 571 * is valid and loaded on the GPU and so may try to access invalid 572 * memory, prompting repeated GPU hangs. 573 * 574 * If the request was guilty, we still restore the logical state 575 * in case the next request requires it (e.g. the aliasing ppgtt), 576 * but skip over the hung batch. 577 * 578 * If the request was innocent, we try to replay the request with 579 * the restored context. 580 */ 581 if (request) { 582 struct drm_i915_private *dev_priv = request->i915; 583 struct intel_context *ce = &request->ctx->engine[engine->id]; 584 struct i915_hw_ppgtt *ppgtt; 585 586 /* FIXME consider gen8 reset */ 587 588 if (ce->state) { 589 I915_WRITE(CCID, 590 i915_ggtt_offset(ce->state) | 591 BIT(8) /* must be set! */ | 592 CCID_EXTENDED_STATE_SAVE | 593 CCID_EXTENDED_STATE_RESTORE | 594 CCID_EN); 595 } 596 597 ppgtt = request->ctx->ppgtt ?: engine->i915->mm.aliasing_ppgtt; 598 if (ppgtt) { 599 u32 pd_offset = ppgtt->pd.base.ggtt_offset << 10; 600 601 I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); 602 I915_WRITE(RING_PP_DIR_BASE(engine), pd_offset); 603 604 /* Wait for the PD reload to complete */ 605 if (intel_wait_for_register(dev_priv, 606 RING_PP_DIR_BASE(engine), 607 BIT(0), 0, 608 10)) 609 DRM_ERROR("Wait for reload of ppgtt page-directory timed out\n"); 610 611 ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); 612 } 613 614 /* If the rq hung, jump to its breadcrumb and skip the batch */ 615 if (request->fence.error == -EIO) 616 request->ring->head = request->postfix; 617 } else { 618 engine->legacy_active_context = NULL; 619 } 620 } 621 622 static int intel_rcs_ctx_init(struct drm_i915_gem_request *req) 623 { 624 int ret; 625 626 ret = intel_ring_workarounds_emit(req); 627 if (ret != 0) 628 return ret; 629 630 ret = i915_gem_render_state_emit(req); 631 if (ret) 632 return ret; 633 634 return 0; 635 } 636 637 static int init_render_ring(struct intel_engine_cs *engine) 638 { 639 struct drm_i915_private *dev_priv = engine->i915; 640 int ret = init_ring_common(engine); 641 if (ret) 642 return ret; 643 644 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ 645 if (IS_GEN(dev_priv, 4, 6)) 646 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); 647 648 /* We need to disable the AsyncFlip performance optimisations in order 649 * to use MI_WAIT_FOR_EVENT within the CS. It should already be 650 * programmed to '1' on all products. 651 * 652 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv 653 */ 654 if (IS_GEN(dev_priv, 6, 7)) 655 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); 656 657 /* Required for the hardware to program scanline values for waiting */ 658 /* WaEnableFlushTlbInvalidationMode:snb */ 659 if (IS_GEN6(dev_priv)) 660 I915_WRITE(GFX_MODE, 661 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT)); 662 663 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */ 664 if (IS_GEN7(dev_priv)) 665 I915_WRITE(GFX_MODE_GEN7, 666 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) | 667 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); 668 669 if (IS_GEN6(dev_priv)) { 670 /* From the Sandybridge PRM, volume 1 part 3, page 24: 671 * "If this bit is set, STCunit will have LRA as replacement 672 * policy. [...] This bit must be reset. LRA replacement 673 * policy is not supported." 674 */ 675 I915_WRITE(CACHE_MODE_0, 676 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); 677 } 678 679 if (IS_GEN(dev_priv, 6, 7)) 680 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); 681 682 if (INTEL_INFO(dev_priv)->gen >= 6) 683 I915_WRITE_IMR(engine, ~engine->irq_keep_mask); 684 685 return init_workarounds_ring(engine); 686 } 687 688 static void render_ring_cleanup(struct intel_engine_cs *engine) 689 { 690 struct drm_i915_private *dev_priv = engine->i915; 691 692 i915_vma_unpin_and_release(&dev_priv->semaphore); 693 } 694 695 static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *cs) 696 { 697 struct drm_i915_private *dev_priv = req->i915; 698 struct intel_engine_cs *waiter; 699 enum intel_engine_id id; 700 701 for_each_engine(waiter, dev_priv, id) { 702 u64 gtt_offset = req->engine->semaphore.signal_ggtt[id]; 703 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) 704 continue; 705 706 *cs++ = GFX_OP_PIPE_CONTROL(6); 707 *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE | 708 PIPE_CONTROL_CS_STALL; 709 *cs++ = lower_32_bits(gtt_offset); 710 *cs++ = upper_32_bits(gtt_offset); 711 *cs++ = req->global_seqno; 712 *cs++ = 0; 713 *cs++ = MI_SEMAPHORE_SIGNAL | 714 MI_SEMAPHORE_TARGET(waiter->hw_id); 715 *cs++ = 0; 716 } 717 718 return cs; 719 } 720 721 static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *cs) 722 { 723 struct drm_i915_private *dev_priv = req->i915; 724 struct intel_engine_cs *waiter; 725 enum intel_engine_id id; 726 727 for_each_engine(waiter, dev_priv, id) { 728 u64 gtt_offset = req->engine->semaphore.signal_ggtt[id]; 729 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) 730 continue; 731 732 *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; 733 *cs++ = lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT; 734 *cs++ = upper_32_bits(gtt_offset); 735 *cs++ = req->global_seqno; 736 *cs++ = MI_SEMAPHORE_SIGNAL | 737 MI_SEMAPHORE_TARGET(waiter->hw_id); 738 *cs++ = 0; 739 } 740 741 return cs; 742 } 743 744 static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs) 745 { 746 struct drm_i915_private *dev_priv = req->i915; 747 struct intel_engine_cs *engine; 748 enum intel_engine_id id; 749 int num_rings = 0; 750 751 for_each_engine(engine, dev_priv, id) { 752 i915_reg_t mbox_reg; 753 754 if (!(BIT(engine->hw_id) & GEN6_SEMAPHORES_MASK)) 755 continue; 756 757 mbox_reg = req->engine->semaphore.mbox.signal[engine->hw_id]; 758 if (i915_mmio_reg_valid(mbox_reg)) { 759 *cs++ = MI_LOAD_REGISTER_IMM(1); 760 *cs++ = i915_mmio_reg_offset(mbox_reg); 761 *cs++ = req->global_seqno; 762 num_rings++; 763 } 764 } 765 if (num_rings & 1) 766 *cs++ = MI_NOOP; 767 768 return cs; 769 } 770 771 static void i9xx_submit_request(struct drm_i915_gem_request *request) 772 { 773 struct drm_i915_private *dev_priv = request->i915; 774 775 i915_gem_request_submit(request); 776 777 I915_WRITE_TAIL(request->engine, 778 intel_ring_set_tail(request->ring, request->tail)); 779 } 780 781 static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) 782 { 783 *cs++ = MI_STORE_DWORD_INDEX; 784 *cs++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; 785 *cs++ = req->global_seqno; 786 *cs++ = MI_USER_INTERRUPT; 787 788 req->tail = intel_ring_offset(req, cs); 789 assert_ring_tail_valid(req->ring, req->tail); 790 } 791 792 static const int i9xx_emit_breadcrumb_sz = 4; 793 794 /** 795 * gen6_sema_emit_breadcrumb - Update the semaphore mailbox registers 796 * 797 * @request - request to write to the ring 798 * 799 * Update the mailbox registers in the *other* rings with the current seqno. 800 * This acts like a signal in the canonical semaphore. 801 */ 802 static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) 803 { 804 return i9xx_emit_breadcrumb(req, 805 req->engine->semaphore.signal(req, cs)); 806 } 807 808 static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req, 809 u32 *cs) 810 { 811 struct intel_engine_cs *engine = req->engine; 812 813 if (engine->semaphore.signal) 814 cs = engine->semaphore.signal(req, cs); 815 816 *cs++ = GFX_OP_PIPE_CONTROL(6); 817 *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | 818 PIPE_CONTROL_QW_WRITE; 819 *cs++ = intel_hws_seqno_address(engine); 820 *cs++ = 0; 821 *cs++ = req->global_seqno; 822 /* We're thrashing one dword of HWS. */ 823 *cs++ = 0; 824 *cs++ = MI_USER_INTERRUPT; 825 *cs++ = MI_NOOP; 826 827 req->tail = intel_ring_offset(req, cs); 828 assert_ring_tail_valid(req->ring, req->tail); 829 } 830 831 static const int gen8_render_emit_breadcrumb_sz = 8; 832 833 /** 834 * intel_ring_sync - sync the waiter to the signaller on seqno 835 * 836 * @waiter - ring that is waiting 837 * @signaller - ring which has, or will signal 838 * @seqno - seqno which the waiter will block on 839 */ 840 841 static int 842 gen8_ring_sync_to(struct drm_i915_gem_request *req, 843 struct drm_i915_gem_request *signal) 844 { 845 struct drm_i915_private *dev_priv = req->i915; 846 u64 offset = GEN8_WAIT_OFFSET(req->engine, signal->engine->id); 847 struct i915_hw_ppgtt *ppgtt; 848 u32 *cs; 849 850 cs = intel_ring_begin(req, 4); 851 if (IS_ERR(cs)) 852 return PTR_ERR(cs); 853 854 *cs++ = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_GLOBAL_GTT | 855 MI_SEMAPHORE_SAD_GTE_SDD; 856 *cs++ = signal->global_seqno; 857 *cs++ = lower_32_bits(offset); 858 *cs++ = upper_32_bits(offset); 859 intel_ring_advance(req, cs); 860 861 /* When the !RCS engines idle waiting upon a semaphore, they lose their 862 * pagetables and we must reload them before executing the batch. 863 * We do this on the i915_switch_context() following the wait and 864 * before the dispatch. 865 */ 866 ppgtt = req->ctx->ppgtt; 867 if (ppgtt && req->engine->id != RCS) 868 ppgtt->pd_dirty_rings |= intel_engine_flag(req->engine); 869 return 0; 870 } 871 872 static int 873 gen6_ring_sync_to(struct drm_i915_gem_request *req, 874 struct drm_i915_gem_request *signal) 875 { 876 u32 dw1 = MI_SEMAPHORE_MBOX | 877 MI_SEMAPHORE_COMPARE | 878 MI_SEMAPHORE_REGISTER; 879 u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->hw_id]; 880 u32 *cs; 881 882 WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); 883 884 cs = intel_ring_begin(req, 4); 885 if (IS_ERR(cs)) 886 return PTR_ERR(cs); 887 888 *cs++ = dw1 | wait_mbox; 889 /* Throughout all of the GEM code, seqno passed implies our current 890 * seqno is >= the last seqno executed. However for hardware the 891 * comparison is strictly greater than. 892 */ 893 *cs++ = signal->global_seqno - 1; 894 *cs++ = 0; 895 *cs++ = MI_NOOP; 896 intel_ring_advance(req, cs); 897 898 return 0; 899 } 900 901 static void 902 gen5_seqno_barrier(struct intel_engine_cs *engine) 903 { 904 /* MI_STORE are internally buffered by the GPU and not flushed 905 * either by MI_FLUSH or SyncFlush or any other combination of 906 * MI commands. 907 * 908 * "Only the submission of the store operation is guaranteed. 909 * The write result will be complete (coherent) some time later 910 * (this is practically a finite period but there is no guaranteed 911 * latency)." 912 * 913 * Empirically, we observe that we need a delay of at least 75us to 914 * be sure that the seqno write is visible by the CPU. 915 */ 916 usleep_range(125, 250); 917 } 918 919 static void 920 gen6_seqno_barrier(struct intel_engine_cs *engine) 921 { 922 struct drm_i915_private *dev_priv = engine->i915; 923 924 /* Workaround to force correct ordering between irq and seqno writes on 925 * ivb (and maybe also on snb) by reading from a CS register (like 926 * ACTHD) before reading the status page. 927 * 928 * Note that this effectively stalls the read by the time it takes to 929 * do a memory transaction, which more or less ensures that the write 930 * from the GPU has sufficient time to invalidate the CPU cacheline. 931 * Alternatively we could delay the interrupt from the CS ring to give 932 * the write time to land, but that would incur a delay after every 933 * batch i.e. much more frequent than a delay when waiting for the 934 * interrupt (with the same net latency). 935 * 936 * Also note that to prevent whole machine hangs on gen7, we have to 937 * take the spinlock to guard against concurrent cacheline access. 938 */ 939 spin_lock_irq(&dev_priv->uncore.lock); 940 POSTING_READ_FW(RING_ACTHD(engine->mmio_base)); 941 spin_unlock_irq(&dev_priv->uncore.lock); 942 } 943 944 static void 945 gen5_irq_enable(struct intel_engine_cs *engine) 946 { 947 gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask); 948 } 949 950 static void 951 gen5_irq_disable(struct intel_engine_cs *engine) 952 { 953 gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask); 954 } 955 956 static void 957 i9xx_irq_enable(struct intel_engine_cs *engine) 958 { 959 struct drm_i915_private *dev_priv = engine->i915; 960 961 dev_priv->irq_mask &= ~engine->irq_enable_mask; 962 I915_WRITE(IMR, dev_priv->irq_mask); 963 POSTING_READ_FW(RING_IMR(engine->mmio_base)); 964 } 965 966 static void 967 i9xx_irq_disable(struct intel_engine_cs *engine) 968 { 969 struct drm_i915_private *dev_priv = engine->i915; 970 971 dev_priv->irq_mask |= engine->irq_enable_mask; 972 I915_WRITE(IMR, dev_priv->irq_mask); 973 } 974 975 static void 976 i8xx_irq_enable(struct intel_engine_cs *engine) 977 { 978 struct drm_i915_private *dev_priv = engine->i915; 979 980 dev_priv->irq_mask &= ~engine->irq_enable_mask; 981 I915_WRITE16(IMR, dev_priv->irq_mask); 982 POSTING_READ16(RING_IMR(engine->mmio_base)); 983 } 984 985 static void 986 i8xx_irq_disable(struct intel_engine_cs *engine) 987 { 988 struct drm_i915_private *dev_priv = engine->i915; 989 990 dev_priv->irq_mask |= engine->irq_enable_mask; 991 I915_WRITE16(IMR, dev_priv->irq_mask); 992 } 993 994 static int 995 bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) 996 { 997 u32 *cs; 998 999 cs = intel_ring_begin(req, 2); 1000 if (IS_ERR(cs)) 1001 return PTR_ERR(cs); 1002 1003 *cs++ = MI_FLUSH; 1004 *cs++ = MI_NOOP; 1005 intel_ring_advance(req, cs); 1006 return 0; 1007 } 1008 1009 static void 1010 gen6_irq_enable(struct intel_engine_cs *engine) 1011 { 1012 struct drm_i915_private *dev_priv = engine->i915; 1013 1014 I915_WRITE_IMR(engine, 1015 ~(engine->irq_enable_mask | 1016 engine->irq_keep_mask)); 1017 gen5_enable_gt_irq(dev_priv, engine->irq_enable_mask); 1018 } 1019 1020 static void 1021 gen6_irq_disable(struct intel_engine_cs *engine) 1022 { 1023 struct drm_i915_private *dev_priv = engine->i915; 1024 1025 I915_WRITE_IMR(engine, ~engine->irq_keep_mask); 1026 gen5_disable_gt_irq(dev_priv, engine->irq_enable_mask); 1027 } 1028 1029 static void 1030 hsw_vebox_irq_enable(struct intel_engine_cs *engine) 1031 { 1032 struct drm_i915_private *dev_priv = engine->i915; 1033 1034 I915_WRITE_IMR(engine, ~engine->irq_enable_mask); 1035 gen6_unmask_pm_irq(dev_priv, engine->irq_enable_mask); 1036 } 1037 1038 static void 1039 hsw_vebox_irq_disable(struct intel_engine_cs *engine) 1040 { 1041 struct drm_i915_private *dev_priv = engine->i915; 1042 1043 I915_WRITE_IMR(engine, ~0); 1044 gen6_mask_pm_irq(dev_priv, engine->irq_enable_mask); 1045 } 1046 1047 static void 1048 gen8_irq_enable(struct intel_engine_cs *engine) 1049 { 1050 struct drm_i915_private *dev_priv = engine->i915; 1051 1052 I915_WRITE_IMR(engine, 1053 ~(engine->irq_enable_mask | 1054 engine->irq_keep_mask)); 1055 POSTING_READ_FW(RING_IMR(engine->mmio_base)); 1056 } 1057 1058 static void 1059 gen8_irq_disable(struct intel_engine_cs *engine) 1060 { 1061 struct drm_i915_private *dev_priv = engine->i915; 1062 1063 I915_WRITE_IMR(engine, ~engine->irq_keep_mask); 1064 } 1065 1066 static int 1067 i965_emit_bb_start(struct drm_i915_gem_request *req, 1068 u64 offset, u32 length, 1069 unsigned int dispatch_flags) 1070 { 1071 u32 *cs; 1072 1073 cs = intel_ring_begin(req, 2); 1074 if (IS_ERR(cs)) 1075 return PTR_ERR(cs); 1076 1077 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags & 1078 I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965); 1079 *cs++ = offset; 1080 intel_ring_advance(req, cs); 1081 1082 return 0; 1083 } 1084 1085 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */ 1086 #define I830_BATCH_LIMIT (256*1024) 1087 #define I830_TLB_ENTRIES (2) 1088 #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) 1089 static int 1090 i830_emit_bb_start(struct drm_i915_gem_request *req, 1091 u64 offset, u32 len, 1092 unsigned int dispatch_flags) 1093 { 1094 u32 *cs, cs_offset = i915_ggtt_offset(req->engine->scratch); 1095 1096 cs = intel_ring_begin(req, 6); 1097 if (IS_ERR(cs)) 1098 return PTR_ERR(cs); 1099 1100 /* Evict the invalid PTE TLBs */ 1101 *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA; 1102 *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096; 1103 *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */ 1104 *cs++ = cs_offset; 1105 *cs++ = 0xdeadbeef; 1106 *cs++ = MI_NOOP; 1107 intel_ring_advance(req, cs); 1108 1109 if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { 1110 if (len > I830_BATCH_LIMIT) 1111 return -ENOSPC; 1112 1113 cs = intel_ring_begin(req, 6 + 2); 1114 if (IS_ERR(cs)) 1115 return PTR_ERR(cs); 1116 1117 /* Blit the batch (which has now all relocs applied) to the 1118 * stable batch scratch bo area (so that the CS never 1119 * stumbles over its tlb invalidation bug) ... 1120 */ 1121 *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA; 1122 *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096; 1123 *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096; 1124 *cs++ = cs_offset; 1125 *cs++ = 4096; 1126 *cs++ = offset; 1127 1128 *cs++ = MI_FLUSH; 1129 *cs++ = MI_NOOP; 1130 intel_ring_advance(req, cs); 1131 1132 /* ... and execute it. */ 1133 offset = cs_offset; 1134 } 1135 1136 cs = intel_ring_begin(req, 2); 1137 if (IS_ERR(cs)) 1138 return PTR_ERR(cs); 1139 1140 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; 1141 *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : 1142 MI_BATCH_NON_SECURE); 1143 intel_ring_advance(req, cs); 1144 1145 return 0; 1146 } 1147 1148 static int 1149 i915_emit_bb_start(struct drm_i915_gem_request *req, 1150 u64 offset, u32 len, 1151 unsigned int dispatch_flags) 1152 { 1153 u32 *cs; 1154 1155 cs = intel_ring_begin(req, 2); 1156 if (IS_ERR(cs)) 1157 return PTR_ERR(cs); 1158 1159 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; 1160 *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : 1161 MI_BATCH_NON_SECURE); 1162 intel_ring_advance(req, cs); 1163 1164 return 0; 1165 } 1166 1167 static void cleanup_phys_status_page(struct intel_engine_cs *engine) 1168 { 1169 struct drm_i915_private *dev_priv = engine->i915; 1170 1171 if (!dev_priv->status_page_dmah) 1172 return; 1173 1174 drm_pci_free(&dev_priv->drm, dev_priv->status_page_dmah); 1175 engine->status_page.page_addr = NULL; 1176 } 1177 1178 static void cleanup_status_page(struct intel_engine_cs *engine) 1179 { 1180 struct i915_vma *vma; 1181 struct drm_i915_gem_object *obj; 1182 1183 vma = fetch_and_zero(&engine->status_page.vma); 1184 if (!vma) 1185 return; 1186 1187 obj = vma->obj; 1188 1189 i915_vma_unpin(vma); 1190 i915_vma_close(vma); 1191 1192 i915_gem_object_unpin_map(obj); 1193 __i915_gem_object_release_unless_active(obj); 1194 } 1195 1196 static int init_status_page(struct intel_engine_cs *engine) 1197 { 1198 struct drm_i915_gem_object *obj; 1199 struct i915_vma *vma; 1200 unsigned int flags; 1201 void *vaddr; 1202 int ret; 1203 1204 obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); 1205 if (IS_ERR(obj)) { 1206 DRM_ERROR("Failed to allocate status page\n"); 1207 return PTR_ERR(obj); 1208 } 1209 1210 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 1211 if (ret) 1212 goto err; 1213 1214 vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); 1215 if (IS_ERR(vma)) { 1216 ret = PTR_ERR(vma); 1217 goto err; 1218 } 1219 1220 flags = PIN_GLOBAL; 1221 if (!HAS_LLC(engine->i915)) 1222 /* On g33, we cannot place HWS above 256MiB, so 1223 * restrict its pinning to the low mappable arena. 1224 * Though this restriction is not documented for 1225 * gen4, gen5, or byt, they also behave similarly 1226 * and hang if the HWS is placed at the top of the 1227 * GTT. To generalise, it appears that all !llc 1228 * platforms have issues with us placing the HWS 1229 * above the mappable region (even though we never 1230 * actualy map it). 1231 */ 1232 flags |= PIN_MAPPABLE; 1233 ret = i915_vma_pin(vma, 0, 4096, flags); 1234 if (ret) 1235 goto err; 1236 1237 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); 1238 if (IS_ERR(vaddr)) { 1239 ret = PTR_ERR(vaddr); 1240 goto err_unpin; 1241 } 1242 1243 engine->status_page.vma = vma; 1244 engine->status_page.ggtt_offset = i915_ggtt_offset(vma); 1245 engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE); 1246 1247 DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", 1248 engine->name, i915_ggtt_offset(vma)); 1249 return 0; 1250 1251 err_unpin: 1252 i915_vma_unpin(vma); 1253 err: 1254 i915_gem_object_put(obj); 1255 return ret; 1256 } 1257 1258 static int init_phys_status_page(struct intel_engine_cs *engine) 1259 { 1260 struct drm_i915_private *dev_priv = engine->i915; 1261 1262 dev_priv->status_page_dmah = 1263 drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE); 1264 if (!dev_priv->status_page_dmah) 1265 return -ENOMEM; 1266 1267 engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr; 1268 memset(engine->status_page.page_addr, 0, PAGE_SIZE); 1269 1270 return 0; 1271 } 1272 1273 int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias) 1274 { 1275 unsigned int flags; 1276 enum i915_map_type map; 1277 struct i915_vma *vma = ring->vma; 1278 void *addr; 1279 int ret; 1280 1281 GEM_BUG_ON(ring->vaddr); 1282 1283 map = HAS_LLC(ring->engine->i915) ? I915_MAP_WB : I915_MAP_WC; 1284 1285 flags = PIN_GLOBAL; 1286 if (offset_bias) 1287 flags |= PIN_OFFSET_BIAS | offset_bias; 1288 if (vma->obj->stolen) 1289 flags |= PIN_MAPPABLE; 1290 1291 if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { 1292 if (flags & PIN_MAPPABLE || map == I915_MAP_WC) 1293 ret = i915_gem_object_set_to_gtt_domain(vma->obj, true); 1294 else 1295 ret = i915_gem_object_set_to_cpu_domain(vma->obj, true); 1296 if (unlikely(ret)) 1297 return ret; 1298 } 1299 1300 ret = i915_vma_pin(vma, 0, PAGE_SIZE, flags); 1301 if (unlikely(ret)) 1302 return ret; 1303 1304 if (i915_vma_is_map_and_fenceable(vma)) 1305 addr = (void __force *)i915_vma_pin_iomap(vma); 1306 else 1307 addr = i915_gem_object_pin_map(vma->obj, map); 1308 if (IS_ERR(addr)) 1309 goto err; 1310 1311 ring->vaddr = addr; 1312 return 0; 1313 1314 err: 1315 i915_vma_unpin(vma); 1316 return PTR_ERR(addr); 1317 } 1318 1319 void intel_ring_reset(struct intel_ring *ring, u32 tail) 1320 { 1321 GEM_BUG_ON(!list_empty(&ring->request_list)); 1322 ring->tail = tail; 1323 ring->head = tail; 1324 ring->emit = tail; 1325 intel_ring_update_space(ring); 1326 } 1327 1328 void intel_ring_unpin(struct intel_ring *ring) 1329 { 1330 GEM_BUG_ON(!ring->vma); 1331 GEM_BUG_ON(!ring->vaddr); 1332 1333 /* Discard any unused bytes beyond that submitted to hw. */ 1334 intel_ring_reset(ring, ring->tail); 1335 1336 if (i915_vma_is_map_and_fenceable(ring->vma)) 1337 i915_vma_unpin_iomap(ring->vma); 1338 else 1339 i915_gem_object_unpin_map(ring->vma->obj); 1340 ring->vaddr = NULL; 1341 1342 i915_vma_unpin(ring->vma); 1343 } 1344 1345 static struct i915_vma * 1346 intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) 1347 { 1348 struct drm_i915_gem_object *obj; 1349 struct i915_vma *vma; 1350 1351 obj = i915_gem_object_create_stolen(dev_priv, size); 1352 if (!obj) 1353 obj = i915_gem_object_create(dev_priv, size); 1354 if (IS_ERR(obj)) 1355 return ERR_CAST(obj); 1356 1357 /* mark ring buffers as read-only from GPU side by default */ 1358 obj->gt_ro = 1; 1359 1360 vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); 1361 if (IS_ERR(vma)) 1362 goto err; 1363 1364 return vma; 1365 1366 err: 1367 i915_gem_object_put(obj); 1368 return vma; 1369 } 1370 1371 struct intel_ring * 1372 intel_engine_create_ring(struct intel_engine_cs *engine, int size) 1373 { 1374 struct intel_ring *ring; 1375 struct i915_vma *vma; 1376 1377 GEM_BUG_ON(!is_power_of_2(size)); 1378 GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); 1379 1380 ring = kzalloc(sizeof(*ring), GFP_KERNEL); 1381 if (!ring) 1382 return ERR_PTR(-ENOMEM); 1383 1384 ring->engine = engine; 1385 1386 INIT_LIST_HEAD(&ring->request_list); 1387 1388 ring->size = size; 1389 /* Workaround an erratum on the i830 which causes a hang if 1390 * the TAIL pointer points to within the last 2 cachelines 1391 * of the buffer. 1392 */ 1393 ring->effective_size = size; 1394 if (IS_I830(engine->i915) || IS_I845G(engine->i915)) 1395 ring->effective_size -= 2 * CACHELINE_BYTES; 1396 1397 intel_ring_update_space(ring); 1398 1399 vma = intel_ring_create_vma(engine->i915, size); 1400 if (IS_ERR(vma)) { 1401 kfree(ring); 1402 return ERR_CAST(vma); 1403 } 1404 ring->vma = vma; 1405 1406 return ring; 1407 } 1408 1409 void 1410 intel_ring_free(struct intel_ring *ring) 1411 { 1412 struct drm_i915_gem_object *obj = ring->vma->obj; 1413 1414 i915_vma_close(ring->vma); 1415 __i915_gem_object_release_unless_active(obj); 1416 1417 kfree(ring); 1418 } 1419 1420 static int context_pin(struct i915_gem_context *ctx) 1421 { 1422 struct i915_vma *vma = ctx->engine[RCS].state; 1423 int ret; 1424 1425 /* Clear this page out of any CPU caches for coherent swap-in/out. 1426 * We only want to do this on the first bind so that we do not stall 1427 * on an active context (which by nature is already on the GPU). 1428 */ 1429 if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { 1430 ret = i915_gem_object_set_to_gtt_domain(vma->obj, false); 1431 if (ret) 1432 return ret; 1433 } 1434 1435 return i915_vma_pin(vma, 0, I915_GTT_MIN_ALIGNMENT, 1436 PIN_GLOBAL | PIN_HIGH); 1437 } 1438 1439 static int intel_ring_context_pin(struct intel_engine_cs *engine, 1440 struct i915_gem_context *ctx) 1441 { 1442 struct intel_context *ce = &ctx->engine[engine->id]; 1443 int ret; 1444 1445 lockdep_assert_held(&ctx->i915->drm.struct_mutex); 1446 1447 if (ce->pin_count++) 1448 return 0; 1449 GEM_BUG_ON(!ce->pin_count); /* no overflow please! */ 1450 1451 if (ce->state) { 1452 ret = context_pin(ctx); 1453 if (ret) 1454 goto error; 1455 1456 ce->state->obj->mm.dirty = true; 1457 } 1458 1459 /* The kernel context is only used as a placeholder for flushing the 1460 * active context. It is never used for submitting user rendering and 1461 * as such never requires the golden render context, and so we can skip 1462 * emitting it when we switch to the kernel context. This is required 1463 * as during eviction we cannot allocate and pin the renderstate in 1464 * order to initialise the context. 1465 */ 1466 if (i915_gem_context_is_kernel(ctx)) 1467 ce->initialised = true; 1468 1469 i915_gem_context_get(ctx); 1470 return 0; 1471 1472 error: 1473 ce->pin_count = 0; 1474 return ret; 1475 } 1476 1477 static void intel_ring_context_unpin(struct intel_engine_cs *engine, 1478 struct i915_gem_context *ctx) 1479 { 1480 struct intel_context *ce = &ctx->engine[engine->id]; 1481 1482 lockdep_assert_held(&ctx->i915->drm.struct_mutex); 1483 GEM_BUG_ON(ce->pin_count == 0); 1484 1485 if (--ce->pin_count) 1486 return; 1487 1488 if (ce->state) 1489 i915_vma_unpin(ce->state); 1490 1491 i915_gem_context_put(ctx); 1492 } 1493 1494 static int intel_init_ring_buffer(struct intel_engine_cs *engine) 1495 { 1496 struct drm_i915_private *dev_priv = engine->i915; 1497 struct intel_ring *ring; 1498 int ret; 1499 1500 WARN_ON(engine->buffer); 1501 1502 intel_engine_setup_common(engine); 1503 1504 ret = intel_engine_init_common(engine); 1505 if (ret) 1506 goto error; 1507 1508 ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE); 1509 if (IS_ERR(ring)) { 1510 ret = PTR_ERR(ring); 1511 goto error; 1512 } 1513 1514 if (HWS_NEEDS_PHYSICAL(dev_priv)) { 1515 WARN_ON(engine->id != RCS); 1516 ret = init_phys_status_page(engine); 1517 if (ret) 1518 goto error; 1519 } else { 1520 ret = init_status_page(engine); 1521 if (ret) 1522 goto error; 1523 } 1524 1525 /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ 1526 ret = intel_ring_pin(ring, I915_GTT_PAGE_SIZE); 1527 if (ret) { 1528 intel_ring_free(ring); 1529 goto error; 1530 } 1531 engine->buffer = ring; 1532 1533 return 0; 1534 1535 error: 1536 intel_engine_cleanup(engine); 1537 return ret; 1538 } 1539 1540 void intel_engine_cleanup(struct intel_engine_cs *engine) 1541 { 1542 struct drm_i915_private *dev_priv; 1543 1544 dev_priv = engine->i915; 1545 1546 if (engine->buffer) { 1547 WARN_ON(INTEL_GEN(dev_priv) > 2 && 1548 (I915_READ_MODE(engine) & MODE_IDLE) == 0); 1549 1550 intel_ring_unpin(engine->buffer); 1551 intel_ring_free(engine->buffer); 1552 engine->buffer = NULL; 1553 } 1554 1555 if (engine->cleanup) 1556 engine->cleanup(engine); 1557 1558 if (HWS_NEEDS_PHYSICAL(dev_priv)) { 1559 WARN_ON(engine->id != RCS); 1560 cleanup_phys_status_page(engine); 1561 } else { 1562 cleanup_status_page(engine); 1563 } 1564 1565 intel_engine_cleanup_common(engine); 1566 1567 engine->i915 = NULL; 1568 dev_priv->engine[engine->id] = NULL; 1569 kfree(engine); 1570 } 1571 1572 void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) 1573 { 1574 struct intel_engine_cs *engine; 1575 enum intel_engine_id id; 1576 1577 /* Restart from the beginning of the rings for convenience */ 1578 for_each_engine(engine, dev_priv, id) 1579 intel_ring_reset(engine->buffer, 0); 1580 } 1581 1582 static int ring_request_alloc(struct drm_i915_gem_request *request) 1583 { 1584 u32 *cs; 1585 1586 GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count); 1587 1588 /* Flush enough space to reduce the likelihood of waiting after 1589 * we start building the request - in which case we will just 1590 * have to repeat work. 1591 */ 1592 request->reserved_space += LEGACY_REQUEST_SIZE; 1593 1594 GEM_BUG_ON(!request->engine->buffer); 1595 request->ring = request->engine->buffer; 1596 1597 cs = intel_ring_begin(request, 0); 1598 if (IS_ERR(cs)) 1599 return PTR_ERR(cs); 1600 1601 request->reserved_space -= LEGACY_REQUEST_SIZE; 1602 return 0; 1603 } 1604 1605 static int wait_for_space(struct drm_i915_gem_request *req, int bytes) 1606 { 1607 struct intel_ring *ring = req->ring; 1608 struct drm_i915_gem_request *target; 1609 long timeout; 1610 1611 lockdep_assert_held(&req->i915->drm.struct_mutex); 1612 1613 intel_ring_update_space(ring); 1614 if (ring->space >= bytes) 1615 return 0; 1616 1617 /* 1618 * Space is reserved in the ringbuffer for finalising the request, 1619 * as that cannot be allowed to fail. During request finalisation, 1620 * reserved_space is set to 0 to stop the overallocation and the 1621 * assumption is that then we never need to wait (which has the 1622 * risk of failing with EINTR). 1623 * 1624 * See also i915_gem_request_alloc() and i915_add_request(). 1625 */ 1626 GEM_BUG_ON(!req->reserved_space); 1627 1628 list_for_each_entry(target, &ring->request_list, ring_link) { 1629 unsigned space; 1630 1631 /* Would completion of this request free enough space? */ 1632 space = __intel_ring_space(target->postfix, ring->emit, 1633 ring->size); 1634 if (space >= bytes) 1635 break; 1636 } 1637 1638 if (WARN_ON(&target->ring_link == &ring->request_list)) 1639 return -ENOSPC; 1640 1641 timeout = i915_wait_request(target, 1642 I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, 1643 MAX_SCHEDULE_TIMEOUT); 1644 if (timeout < 0) 1645 return timeout; 1646 1647 i915_gem_request_retire_upto(target); 1648 1649 intel_ring_update_space(ring); 1650 GEM_BUG_ON(ring->space < bytes); 1651 return 0; 1652 } 1653 1654 u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) 1655 { 1656 struct intel_ring *ring = req->ring; 1657 int remain_actual = ring->size - ring->emit; 1658 int remain_usable = ring->effective_size - ring->emit; 1659 int bytes = num_dwords * sizeof(u32); 1660 int total_bytes, wait_bytes; 1661 bool need_wrap = false; 1662 u32 *cs; 1663 1664 total_bytes = bytes + req->reserved_space; 1665 1666 if (unlikely(bytes > remain_usable)) { 1667 /* 1668 * Not enough space for the basic request. So need to flush 1669 * out the remainder and then wait for base + reserved. 1670 */ 1671 wait_bytes = remain_actual + total_bytes; 1672 need_wrap = true; 1673 } else if (unlikely(total_bytes > remain_usable)) { 1674 /* 1675 * The base request will fit but the reserved space 1676 * falls off the end. So we don't need an immediate wrap 1677 * and only need to effectively wait for the reserved 1678 * size space from the start of ringbuffer. 1679 */ 1680 wait_bytes = remain_actual + req->reserved_space; 1681 } else { 1682 /* No wrapping required, just waiting. */ 1683 wait_bytes = total_bytes; 1684 } 1685 1686 if (wait_bytes > ring->space) { 1687 int ret = wait_for_space(req, wait_bytes); 1688 if (unlikely(ret)) 1689 return ERR_PTR(ret); 1690 } 1691 1692 if (unlikely(need_wrap)) { 1693 GEM_BUG_ON(remain_actual > ring->space); 1694 GEM_BUG_ON(ring->emit + remain_actual > ring->size); 1695 1696 /* Fill the tail with MI_NOOP */ 1697 memset(ring->vaddr + ring->emit, 0, remain_actual); 1698 ring->emit = 0; 1699 ring->space -= remain_actual; 1700 } 1701 1702 GEM_BUG_ON(ring->emit > ring->size - bytes); 1703 cs = ring->vaddr + ring->emit; 1704 ring->emit += bytes; 1705 ring->space -= bytes; 1706 GEM_BUG_ON(ring->space < 0); 1707 1708 return cs; 1709 } 1710 1711 /* Align the ring tail to a cacheline boundary */ 1712 int intel_ring_cacheline_align(struct drm_i915_gem_request *req) 1713 { 1714 int num_dwords = 1715 (req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); 1716 u32 *cs; 1717 1718 if (num_dwords == 0) 1719 return 0; 1720 1721 num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords; 1722 cs = intel_ring_begin(req, num_dwords); 1723 if (IS_ERR(cs)) 1724 return PTR_ERR(cs); 1725 1726 while (num_dwords--) 1727 *cs++ = MI_NOOP; 1728 1729 intel_ring_advance(req, cs); 1730 1731 return 0; 1732 } 1733 1734 static void gen6_bsd_submit_request(struct drm_i915_gem_request *request) 1735 { 1736 struct drm_i915_private *dev_priv = request->i915; 1737 1738 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 1739 1740 /* Every tail move must follow the sequence below */ 1741 1742 /* Disable notification that the ring is IDLE. The GT 1743 * will then assume that it is busy and bring it out of rc6. 1744 */ 1745 I915_WRITE_FW(GEN6_BSD_SLEEP_PSMI_CONTROL, 1746 _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 1747 1748 /* Clear the context id. Here be magic! */ 1749 I915_WRITE64_FW(GEN6_BSD_RNCID, 0x0); 1750 1751 /* Wait for the ring not to be idle, i.e. for it to wake up. */ 1752 if (intel_wait_for_register_fw(dev_priv, 1753 GEN6_BSD_SLEEP_PSMI_CONTROL, 1754 GEN6_BSD_SLEEP_INDICATOR, 1755 0, 1756 50)) 1757 DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); 1758 1759 /* Now that the ring is fully powered up, update the tail */ 1760 i9xx_submit_request(request); 1761 1762 /* Let the ring send IDLE messages to the GT again, 1763 * and so let it sleep to conserve power when idle. 1764 */ 1765 I915_WRITE_FW(GEN6_BSD_SLEEP_PSMI_CONTROL, 1766 _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 1767 1768 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 1769 } 1770 1771 static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) 1772 { 1773 u32 cmd, *cs; 1774 1775 cs = intel_ring_begin(req, 4); 1776 if (IS_ERR(cs)) 1777 return PTR_ERR(cs); 1778 1779 cmd = MI_FLUSH_DW; 1780 if (INTEL_GEN(req->i915) >= 8) 1781 cmd += 1; 1782 1783 /* We always require a command barrier so that subsequent 1784 * commands, such as breadcrumb interrupts, are strictly ordered 1785 * wrt the contents of the write cache being flushed to memory 1786 * (and thus being coherent from the CPU). 1787 */ 1788 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; 1789 1790 /* 1791 * Bspec vol 1c.5 - video engine command streamer: 1792 * "If ENABLED, all TLBs will be invalidated once the flush 1793 * operation is complete. This bit is only valid when the 1794 * Post-Sync Operation field is a value of 1h or 3h." 1795 */ 1796 if (mode & EMIT_INVALIDATE) 1797 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; 1798 1799 *cs++ = cmd; 1800 *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; 1801 if (INTEL_GEN(req->i915) >= 8) { 1802 *cs++ = 0; /* upper addr */ 1803 *cs++ = 0; /* value */ 1804 } else { 1805 *cs++ = 0; 1806 *cs++ = MI_NOOP; 1807 } 1808 intel_ring_advance(req, cs); 1809 return 0; 1810 } 1811 1812 static int 1813 gen8_emit_bb_start(struct drm_i915_gem_request *req, 1814 u64 offset, u32 len, 1815 unsigned int dispatch_flags) 1816 { 1817 bool ppgtt = USES_PPGTT(req->i915) && 1818 !(dispatch_flags & I915_DISPATCH_SECURE); 1819 u32 *cs; 1820 1821 cs = intel_ring_begin(req, 4); 1822 if (IS_ERR(cs)) 1823 return PTR_ERR(cs); 1824 1825 /* FIXME(BDW): Address space and security selectors. */ 1826 *cs++ = MI_BATCH_BUFFER_START_GEN8 | (ppgtt << 8) | (dispatch_flags & 1827 I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); 1828 *cs++ = lower_32_bits(offset); 1829 *cs++ = upper_32_bits(offset); 1830 *cs++ = MI_NOOP; 1831 intel_ring_advance(req, cs); 1832 1833 return 0; 1834 } 1835 1836 static int 1837 hsw_emit_bb_start(struct drm_i915_gem_request *req, 1838 u64 offset, u32 len, 1839 unsigned int dispatch_flags) 1840 { 1841 u32 *cs; 1842 1843 cs = intel_ring_begin(req, 2); 1844 if (IS_ERR(cs)) 1845 return PTR_ERR(cs); 1846 1847 *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? 1848 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | 1849 (dispatch_flags & I915_DISPATCH_RS ? 1850 MI_BATCH_RESOURCE_STREAMER : 0); 1851 /* bit0-7 is the length on GEN6+ */ 1852 *cs++ = offset; 1853 intel_ring_advance(req, cs); 1854 1855 return 0; 1856 } 1857 1858 static int 1859 gen6_emit_bb_start(struct drm_i915_gem_request *req, 1860 u64 offset, u32 len, 1861 unsigned int dispatch_flags) 1862 { 1863 u32 *cs; 1864 1865 cs = intel_ring_begin(req, 2); 1866 if (IS_ERR(cs)) 1867 return PTR_ERR(cs); 1868 1869 *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? 1870 0 : MI_BATCH_NON_SECURE_I965); 1871 /* bit0-7 is the length on GEN6+ */ 1872 *cs++ = offset; 1873 intel_ring_advance(req, cs); 1874 1875 return 0; 1876 } 1877 1878 /* Blitter support (SandyBridge+) */ 1879 1880 static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) 1881 { 1882 u32 cmd, *cs; 1883 1884 cs = intel_ring_begin(req, 4); 1885 if (IS_ERR(cs)) 1886 return PTR_ERR(cs); 1887 1888 cmd = MI_FLUSH_DW; 1889 if (INTEL_GEN(req->i915) >= 8) 1890 cmd += 1; 1891 1892 /* We always require a command barrier so that subsequent 1893 * commands, such as breadcrumb interrupts, are strictly ordered 1894 * wrt the contents of the write cache being flushed to memory 1895 * (and thus being coherent from the CPU). 1896 */ 1897 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; 1898 1899 /* 1900 * Bspec vol 1c.3 - blitter engine command streamer: 1901 * "If ENABLED, all TLBs will be invalidated once the flush 1902 * operation is complete. This bit is only valid when the 1903 * Post-Sync Operation field is a value of 1h or 3h." 1904 */ 1905 if (mode & EMIT_INVALIDATE) 1906 cmd |= MI_INVALIDATE_TLB; 1907 *cs++ = cmd; 1908 *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; 1909 if (INTEL_GEN(req->i915) >= 8) { 1910 *cs++ = 0; /* upper addr */ 1911 *cs++ = 0; /* value */ 1912 } else { 1913 *cs++ = 0; 1914 *cs++ = MI_NOOP; 1915 } 1916 intel_ring_advance(req, cs); 1917 1918 return 0; 1919 } 1920 1921 static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, 1922 struct intel_engine_cs *engine) 1923 { 1924 struct drm_i915_gem_object *obj; 1925 int ret, i; 1926 1927 if (!i915.semaphores) 1928 return; 1929 1930 if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore) { 1931 struct i915_vma *vma; 1932 1933 obj = i915_gem_object_create(dev_priv, PAGE_SIZE); 1934 if (IS_ERR(obj)) 1935 goto err; 1936 1937 vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); 1938 if (IS_ERR(vma)) 1939 goto err_obj; 1940 1941 ret = i915_gem_object_set_to_gtt_domain(obj, false); 1942 if (ret) 1943 goto err_obj; 1944 1945 ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); 1946 if (ret) 1947 goto err_obj; 1948 1949 dev_priv->semaphore = vma; 1950 } 1951 1952 if (INTEL_GEN(dev_priv) >= 8) { 1953 u32 offset = i915_ggtt_offset(dev_priv->semaphore); 1954 1955 engine->semaphore.sync_to = gen8_ring_sync_to; 1956 engine->semaphore.signal = gen8_xcs_signal; 1957 1958 for (i = 0; i < I915_NUM_ENGINES; i++) { 1959 u32 ring_offset; 1960 1961 if (i != engine->id) 1962 ring_offset = offset + GEN8_SEMAPHORE_OFFSET(engine->id, i); 1963 else 1964 ring_offset = MI_SEMAPHORE_SYNC_INVALID; 1965 1966 engine->semaphore.signal_ggtt[i] = ring_offset; 1967 } 1968 } else if (INTEL_GEN(dev_priv) >= 6) { 1969 engine->semaphore.sync_to = gen6_ring_sync_to; 1970 engine->semaphore.signal = gen6_signal; 1971 1972 /* 1973 * The current semaphore is only applied on pre-gen8 1974 * platform. And there is no VCS2 ring on the pre-gen8 1975 * platform. So the semaphore between RCS and VCS2 is 1976 * initialized as INVALID. Gen8 will initialize the 1977 * sema between VCS2 and RCS later. 1978 */ 1979 for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) { 1980 static const struct { 1981 u32 wait_mbox; 1982 i915_reg_t mbox_reg; 1983 } sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = { 1984 [RCS_HW] = { 1985 [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC }, 1986 [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC }, 1987 [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC }, 1988 }, 1989 [VCS_HW] = { 1990 [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC }, 1991 [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC }, 1992 [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC }, 1993 }, 1994 [BCS_HW] = { 1995 [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC }, 1996 [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC }, 1997 [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC }, 1998 }, 1999 [VECS_HW] = { 2000 [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC }, 2001 [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC }, 2002 [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC }, 2003 }, 2004 }; 2005 u32 wait_mbox; 2006 i915_reg_t mbox_reg; 2007 2008 if (i == engine->hw_id) { 2009 wait_mbox = MI_SEMAPHORE_SYNC_INVALID; 2010 mbox_reg = GEN6_NOSYNC; 2011 } else { 2012 wait_mbox = sem_data[engine->hw_id][i].wait_mbox; 2013 mbox_reg = sem_data[engine->hw_id][i].mbox_reg; 2014 } 2015 2016 engine->semaphore.mbox.wait[i] = wait_mbox; 2017 engine->semaphore.mbox.signal[i] = mbox_reg; 2018 } 2019 } 2020 2021 return; 2022 2023 err_obj: 2024 i915_gem_object_put(obj); 2025 err: 2026 DRM_DEBUG_DRIVER("Failed to allocate space for semaphores, disabling\n"); 2027 i915.semaphores = 0; 2028 } 2029 2030 static void intel_ring_init_irq(struct drm_i915_private *dev_priv, 2031 struct intel_engine_cs *engine) 2032 { 2033 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << engine->irq_shift; 2034 2035 if (INTEL_GEN(dev_priv) >= 8) { 2036 engine->irq_enable = gen8_irq_enable; 2037 engine->irq_disable = gen8_irq_disable; 2038 engine->irq_seqno_barrier = gen6_seqno_barrier; 2039 } else if (INTEL_GEN(dev_priv) >= 6) { 2040 engine->irq_enable = gen6_irq_enable; 2041 engine->irq_disable = gen6_irq_disable; 2042 engine->irq_seqno_barrier = gen6_seqno_barrier; 2043 } else if (INTEL_GEN(dev_priv) >= 5) { 2044 engine->irq_enable = gen5_irq_enable; 2045 engine->irq_disable = gen5_irq_disable; 2046 engine->irq_seqno_barrier = gen5_seqno_barrier; 2047 } else if (INTEL_GEN(dev_priv) >= 3) { 2048 engine->irq_enable = i9xx_irq_enable; 2049 engine->irq_disable = i9xx_irq_disable; 2050 } else { 2051 engine->irq_enable = i8xx_irq_enable; 2052 engine->irq_disable = i8xx_irq_disable; 2053 } 2054 } 2055 2056 static void i9xx_set_default_submission(struct intel_engine_cs *engine) 2057 { 2058 engine->submit_request = i9xx_submit_request; 2059 } 2060 2061 static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine) 2062 { 2063 engine->submit_request = gen6_bsd_submit_request; 2064 } 2065 2066 static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, 2067 struct intel_engine_cs *engine) 2068 { 2069 intel_ring_init_irq(dev_priv, engine); 2070 intel_ring_init_semaphores(dev_priv, engine); 2071 2072 engine->init_hw = init_ring_common; 2073 engine->reset_hw = reset_ring_common; 2074 2075 engine->context_pin = intel_ring_context_pin; 2076 engine->context_unpin = intel_ring_context_unpin; 2077 2078 engine->request_alloc = ring_request_alloc; 2079 2080 engine->emit_breadcrumb = i9xx_emit_breadcrumb; 2081 engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz; 2082 if (i915.semaphores) { 2083 int num_rings; 2084 2085 engine->emit_breadcrumb = gen6_sema_emit_breadcrumb; 2086 2087 num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1; 2088 if (INTEL_GEN(dev_priv) >= 8) { 2089 engine->emit_breadcrumb_sz += num_rings * 6; 2090 } else { 2091 engine->emit_breadcrumb_sz += num_rings * 3; 2092 if (num_rings & 1) 2093 engine->emit_breadcrumb_sz++; 2094 } 2095 } 2096 2097 engine->set_default_submission = i9xx_set_default_submission; 2098 2099 if (INTEL_GEN(dev_priv) >= 8) 2100 engine->emit_bb_start = gen8_emit_bb_start; 2101 else if (INTEL_GEN(dev_priv) >= 6) 2102 engine->emit_bb_start = gen6_emit_bb_start; 2103 else if (INTEL_GEN(dev_priv) >= 4) 2104 engine->emit_bb_start = i965_emit_bb_start; 2105 else if (IS_I830(dev_priv) || IS_I845G(dev_priv)) 2106 engine->emit_bb_start = i830_emit_bb_start; 2107 else 2108 engine->emit_bb_start = i915_emit_bb_start; 2109 } 2110 2111 int intel_init_render_ring_buffer(struct intel_engine_cs *engine) 2112 { 2113 struct drm_i915_private *dev_priv = engine->i915; 2114 int ret; 2115 2116 intel_ring_default_vfuncs(dev_priv, engine); 2117 2118 if (HAS_L3_DPF(dev_priv)) 2119 engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT; 2120 2121 if (INTEL_GEN(dev_priv) >= 8) { 2122 engine->init_context = intel_rcs_ctx_init; 2123 engine->emit_breadcrumb = gen8_render_emit_breadcrumb; 2124 engine->emit_breadcrumb_sz = gen8_render_emit_breadcrumb_sz; 2125 engine->emit_flush = gen8_render_ring_flush; 2126 if (i915.semaphores) { 2127 int num_rings; 2128 2129 engine->semaphore.signal = gen8_rcs_signal; 2130 2131 num_rings = 2132 hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1; 2133 engine->emit_breadcrumb_sz += num_rings * 8; 2134 } 2135 } else if (INTEL_GEN(dev_priv) >= 6) { 2136 engine->init_context = intel_rcs_ctx_init; 2137 engine->emit_flush = gen7_render_ring_flush; 2138 if (IS_GEN6(dev_priv)) 2139 engine->emit_flush = gen6_render_ring_flush; 2140 } else if (IS_GEN5(dev_priv)) { 2141 engine->emit_flush = gen4_render_ring_flush; 2142 } else { 2143 if (INTEL_GEN(dev_priv) < 4) 2144 engine->emit_flush = gen2_render_ring_flush; 2145 else 2146 engine->emit_flush = gen4_render_ring_flush; 2147 engine->irq_enable_mask = I915_USER_INTERRUPT; 2148 } 2149 2150 if (IS_HASWELL(dev_priv)) 2151 engine->emit_bb_start = hsw_emit_bb_start; 2152 2153 engine->init_hw = init_render_ring; 2154 engine->cleanup = render_ring_cleanup; 2155 2156 ret = intel_init_ring_buffer(engine); 2157 if (ret) 2158 return ret; 2159 2160 if (INTEL_GEN(dev_priv) >= 6) { 2161 ret = intel_engine_create_scratch(engine, PAGE_SIZE); 2162 if (ret) 2163 return ret; 2164 } else if (HAS_BROKEN_CS_TLB(dev_priv)) { 2165 ret = intel_engine_create_scratch(engine, I830_WA_SIZE); 2166 if (ret) 2167 return ret; 2168 } 2169 2170 return 0; 2171 } 2172 2173 int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) 2174 { 2175 struct drm_i915_private *dev_priv = engine->i915; 2176 2177 intel_ring_default_vfuncs(dev_priv, engine); 2178 2179 if (INTEL_GEN(dev_priv) >= 6) { 2180 /* gen6 bsd needs a special wa for tail updates */ 2181 if (IS_GEN6(dev_priv)) 2182 engine->set_default_submission = gen6_bsd_set_default_submission; 2183 engine->emit_flush = gen6_bsd_ring_flush; 2184 if (INTEL_GEN(dev_priv) < 8) 2185 engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; 2186 } else { 2187 engine->mmio_base = BSD_RING_BASE; 2188 engine->emit_flush = bsd_ring_flush; 2189 if (IS_GEN5(dev_priv)) 2190 engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT; 2191 else 2192 engine->irq_enable_mask = I915_BSD_USER_INTERRUPT; 2193 } 2194 2195 return intel_init_ring_buffer(engine); 2196 } 2197 2198 /** 2199 * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3) 2200 */ 2201 int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine) 2202 { 2203 struct drm_i915_private *dev_priv = engine->i915; 2204 2205 intel_ring_default_vfuncs(dev_priv, engine); 2206 2207 engine->emit_flush = gen6_bsd_ring_flush; 2208 2209 return intel_init_ring_buffer(engine); 2210 } 2211 2212 int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) 2213 { 2214 struct drm_i915_private *dev_priv = engine->i915; 2215 2216 intel_ring_default_vfuncs(dev_priv, engine); 2217 2218 engine->emit_flush = gen6_ring_flush; 2219 if (INTEL_GEN(dev_priv) < 8) 2220 engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; 2221 2222 return intel_init_ring_buffer(engine); 2223 } 2224 2225 int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) 2226 { 2227 struct drm_i915_private *dev_priv = engine->i915; 2228 2229 intel_ring_default_vfuncs(dev_priv, engine); 2230 2231 engine->emit_flush = gen6_ring_flush; 2232 2233 if (INTEL_GEN(dev_priv) < 8) { 2234 engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; 2235 engine->irq_enable = hsw_vebox_irq_enable; 2236 engine->irq_disable = hsw_vebox_irq_disable; 2237 } 2238 2239 return intel_init_ring_buffer(engine); 2240 } 2241