1 /* 2 * Copyright © 2008-2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Zou Nan hai <nanhai.zou@intel.com> 26 * Xiang Hai hao<haihao.xiang@intel.com> 27 * 28 */ 29 30 #include <linux/log2.h> 31 #include <drm/drmP.h> 32 #include "i915_drv.h" 33 #include <drm/i915_drm.h> 34 #include "i915_trace.h" 35 #include "intel_drv.h" 36 37 /* Rough estimate of the typical request size, performing a flush, 38 * set-context and then emitting the batch. 39 */ 40 #define LEGACY_REQUEST_SIZE 200 41 42 int __intel_ring_space(int head, int tail, int size) 43 { 44 int space = head - tail; 45 if (space <= 0) 46 space += size; 47 return space - I915_RING_FREE_SPACE; 48 } 49 50 void intel_ring_update_space(struct intel_ringbuffer *ringbuf) 51 { 52 if (ringbuf->last_retired_head != -1) { 53 ringbuf->head = ringbuf->last_retired_head; 54 ringbuf->last_retired_head = -1; 55 } 56 57 ringbuf->space = __intel_ring_space(ringbuf->head & HEAD_ADDR, 58 ringbuf->tail, ringbuf->size); 59 } 60 61 static void __intel_ring_advance(struct intel_engine_cs *engine) 62 { 63 struct intel_ringbuffer *ringbuf = engine->buffer; 64 ringbuf->tail &= ringbuf->size - 1; 65 engine->write_tail(engine, ringbuf->tail); 66 } 67 68 static int 69 gen2_render_ring_flush(struct drm_i915_gem_request *req, 70 u32 invalidate_domains, 71 u32 flush_domains) 72 { 73 struct intel_engine_cs *engine = req->engine; 74 u32 cmd; 75 int ret; 76 77 cmd = MI_FLUSH; 78 if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0) 79 cmd |= MI_NO_WRITE_FLUSH; 80 81 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) 82 cmd |= MI_READ_FLUSH; 83 84 ret = intel_ring_begin(req, 2); 85 if (ret) 86 return ret; 87 88 intel_ring_emit(engine, cmd); 89 intel_ring_emit(engine, MI_NOOP); 90 intel_ring_advance(engine); 91 92 return 0; 93 } 94 95 static int 96 gen4_render_ring_flush(struct drm_i915_gem_request *req, 97 u32 invalidate_domains, 98 u32 flush_domains) 99 { 100 struct intel_engine_cs *engine = req->engine; 101 u32 cmd; 102 int ret; 103 104 /* 105 * read/write caches: 106 * 107 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 108 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 109 * also flushed at 2d versus 3d pipeline switches. 110 * 111 * read-only caches: 112 * 113 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 114 * MI_READ_FLUSH is set, and is always flushed on 965. 115 * 116 * I915_GEM_DOMAIN_COMMAND may not exist? 117 * 118 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 119 * invalidated when MI_EXE_FLUSH is set. 120 * 121 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 122 * invalidated with every MI_FLUSH. 123 * 124 * TLBs: 125 * 126 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 127 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 128 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 129 * are flushed at any MI_FLUSH. 130 */ 131 132 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 133 if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) 134 cmd &= ~MI_NO_WRITE_FLUSH; 135 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) 136 cmd |= MI_EXE_FLUSH; 137 138 if (invalidate_domains & I915_GEM_DOMAIN_COMMAND && 139 (IS_G4X(req->i915) || IS_GEN5(req->i915))) 140 cmd |= MI_INVALIDATE_ISP; 141 142 ret = intel_ring_begin(req, 2); 143 if (ret) 144 return ret; 145 146 intel_ring_emit(engine, cmd); 147 intel_ring_emit(engine, MI_NOOP); 148 intel_ring_advance(engine); 149 150 return 0; 151 } 152 153 /** 154 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for 155 * implementing two workarounds on gen6. From section 1.4.7.1 156 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: 157 * 158 * [DevSNB-C+{W/A}] Before any depth stall flush (including those 159 * produced by non-pipelined state commands), software needs to first 160 * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 161 * 0. 162 * 163 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable 164 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. 165 * 166 * And the workaround for these two requires this workaround first: 167 * 168 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 169 * BEFORE the pipe-control with a post-sync op and no write-cache 170 * flushes. 171 * 172 * And this last workaround is tricky because of the requirements on 173 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM 174 * volume 2 part 1: 175 * 176 * "1 of the following must also be set: 177 * - Render Target Cache Flush Enable ([12] of DW1) 178 * - Depth Cache Flush Enable ([0] of DW1) 179 * - Stall at Pixel Scoreboard ([1] of DW1) 180 * - Depth Stall ([13] of DW1) 181 * - Post-Sync Operation ([13] of DW1) 182 * - Notify Enable ([8] of DW1)" 183 * 184 * The cache flushes require the workaround flush that triggered this 185 * one, so we can't use it. Depth stall would trigger the same. 186 * Post-sync nonzero is what triggered this second workaround, so we 187 * can't use that one either. Notify enable is IRQs, which aren't 188 * really our business. That leaves only stall at scoreboard. 189 */ 190 static int 191 intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) 192 { 193 struct intel_engine_cs *engine = req->engine; 194 u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; 195 int ret; 196 197 ret = intel_ring_begin(req, 6); 198 if (ret) 199 return ret; 200 201 intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(5)); 202 intel_ring_emit(engine, PIPE_CONTROL_CS_STALL | 203 PIPE_CONTROL_STALL_AT_SCOREBOARD); 204 intel_ring_emit(engine, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 205 intel_ring_emit(engine, 0); /* low dword */ 206 intel_ring_emit(engine, 0); /* high dword */ 207 intel_ring_emit(engine, MI_NOOP); 208 intel_ring_advance(engine); 209 210 ret = intel_ring_begin(req, 6); 211 if (ret) 212 return ret; 213 214 intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(5)); 215 intel_ring_emit(engine, PIPE_CONTROL_QW_WRITE); 216 intel_ring_emit(engine, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 217 intel_ring_emit(engine, 0); 218 intel_ring_emit(engine, 0); 219 intel_ring_emit(engine, MI_NOOP); 220 intel_ring_advance(engine); 221 222 return 0; 223 } 224 225 static int 226 gen6_render_ring_flush(struct drm_i915_gem_request *req, 227 u32 invalidate_domains, u32 flush_domains) 228 { 229 struct intel_engine_cs *engine = req->engine; 230 u32 flags = 0; 231 u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; 232 int ret; 233 234 /* Force SNB workarounds for PIPE_CONTROL flushes */ 235 ret = intel_emit_post_sync_nonzero_flush(req); 236 if (ret) 237 return ret; 238 239 /* Just flush everything. Experiments have shown that reducing the 240 * number of bits based on the write domains has little performance 241 * impact. 242 */ 243 if (flush_domains) { 244 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 245 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 246 /* 247 * Ensure that any following seqno writes only happen 248 * when the render cache is indeed flushed. 249 */ 250 flags |= PIPE_CONTROL_CS_STALL; 251 } 252 if (invalidate_domains) { 253 flags |= PIPE_CONTROL_TLB_INVALIDATE; 254 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 255 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 256 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 257 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 258 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 259 /* 260 * TLB invalidate requires a post-sync write. 261 */ 262 flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; 263 } 264 265 ret = intel_ring_begin(req, 4); 266 if (ret) 267 return ret; 268 269 intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(4)); 270 intel_ring_emit(engine, flags); 271 intel_ring_emit(engine, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); 272 intel_ring_emit(engine, 0); 273 intel_ring_advance(engine); 274 275 return 0; 276 } 277 278 static int 279 gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) 280 { 281 struct intel_engine_cs *engine = req->engine; 282 int ret; 283 284 ret = intel_ring_begin(req, 4); 285 if (ret) 286 return ret; 287 288 intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(4)); 289 intel_ring_emit(engine, PIPE_CONTROL_CS_STALL | 290 PIPE_CONTROL_STALL_AT_SCOREBOARD); 291 intel_ring_emit(engine, 0); 292 intel_ring_emit(engine, 0); 293 intel_ring_advance(engine); 294 295 return 0; 296 } 297 298 static int 299 gen7_render_ring_flush(struct drm_i915_gem_request *req, 300 u32 invalidate_domains, u32 flush_domains) 301 { 302 struct intel_engine_cs *engine = req->engine; 303 u32 flags = 0; 304 u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; 305 int ret; 306 307 /* 308 * Ensure that any following seqno writes only happen when the render 309 * cache is indeed flushed. 310 * 311 * Workaround: 4th PIPE_CONTROL command (except the ones with only 312 * read-cache invalidate bits set) must have the CS_STALL bit set. We 313 * don't try to be clever and just set it unconditionally. 314 */ 315 flags |= PIPE_CONTROL_CS_STALL; 316 317 /* Just flush everything. Experiments have shown that reducing the 318 * number of bits based on the write domains has little performance 319 * impact. 320 */ 321 if (flush_domains) { 322 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 323 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 324 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; 325 flags |= PIPE_CONTROL_FLUSH_ENABLE; 326 } 327 if (invalidate_domains) { 328 flags |= PIPE_CONTROL_TLB_INVALIDATE; 329 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 330 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 331 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 332 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 333 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 334 flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR; 335 /* 336 * TLB invalidate requires a post-sync write. 337 */ 338 flags |= PIPE_CONTROL_QW_WRITE; 339 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; 340 341 flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; 342 343 /* Workaround: we must issue a pipe_control with CS-stall bit 344 * set before a pipe_control command that has the state cache 345 * invalidate bit set. */ 346 gen7_render_ring_cs_stall_wa(req); 347 } 348 349 ret = intel_ring_begin(req, 4); 350 if (ret) 351 return ret; 352 353 intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(4)); 354 intel_ring_emit(engine, flags); 355 intel_ring_emit(engine, scratch_addr); 356 intel_ring_emit(engine, 0); 357 intel_ring_advance(engine); 358 359 return 0; 360 } 361 362 static int 363 gen8_emit_pipe_control(struct drm_i915_gem_request *req, 364 u32 flags, u32 scratch_addr) 365 { 366 struct intel_engine_cs *engine = req->engine; 367 int ret; 368 369 ret = intel_ring_begin(req, 6); 370 if (ret) 371 return ret; 372 373 intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(6)); 374 intel_ring_emit(engine, flags); 375 intel_ring_emit(engine, scratch_addr); 376 intel_ring_emit(engine, 0); 377 intel_ring_emit(engine, 0); 378 intel_ring_emit(engine, 0); 379 intel_ring_advance(engine); 380 381 return 0; 382 } 383 384 static int 385 gen8_render_ring_flush(struct drm_i915_gem_request *req, 386 u32 invalidate_domains, u32 flush_domains) 387 { 388 u32 flags = 0; 389 u32 scratch_addr = req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; 390 int ret; 391 392 flags |= PIPE_CONTROL_CS_STALL; 393 394 if (flush_domains) { 395 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 396 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 397 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; 398 flags |= PIPE_CONTROL_FLUSH_ENABLE; 399 } 400 if (invalidate_domains) { 401 flags |= PIPE_CONTROL_TLB_INVALIDATE; 402 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 403 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 404 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 405 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 406 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 407 flags |= PIPE_CONTROL_QW_WRITE; 408 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; 409 410 /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */ 411 ret = gen8_emit_pipe_control(req, 412 PIPE_CONTROL_CS_STALL | 413 PIPE_CONTROL_STALL_AT_SCOREBOARD, 414 0); 415 if (ret) 416 return ret; 417 } 418 419 return gen8_emit_pipe_control(req, flags, scratch_addr); 420 } 421 422 static void ring_write_tail(struct intel_engine_cs *engine, 423 u32 value) 424 { 425 struct drm_i915_private *dev_priv = engine->i915; 426 I915_WRITE_TAIL(engine, value); 427 } 428 429 u64 intel_ring_get_active_head(struct intel_engine_cs *engine) 430 { 431 struct drm_i915_private *dev_priv = engine->i915; 432 u64 acthd; 433 434 if (INTEL_GEN(dev_priv) >= 8) 435 acthd = I915_READ64_2x32(RING_ACTHD(engine->mmio_base), 436 RING_ACTHD_UDW(engine->mmio_base)); 437 else if (INTEL_GEN(dev_priv) >= 4) 438 acthd = I915_READ(RING_ACTHD(engine->mmio_base)); 439 else 440 acthd = I915_READ(ACTHD); 441 442 return acthd; 443 } 444 445 static void ring_setup_phys_status_page(struct intel_engine_cs *engine) 446 { 447 struct drm_i915_private *dev_priv = engine->i915; 448 u32 addr; 449 450 addr = dev_priv->status_page_dmah->busaddr; 451 if (INTEL_GEN(dev_priv) >= 4) 452 addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0; 453 I915_WRITE(HWS_PGA, addr); 454 } 455 456 static void intel_ring_setup_status_page(struct intel_engine_cs *engine) 457 { 458 struct drm_i915_private *dev_priv = engine->i915; 459 i915_reg_t mmio; 460 461 /* The ring status page addresses are no longer next to the rest of 462 * the ring registers as of gen7. 463 */ 464 if (IS_GEN7(dev_priv)) { 465 switch (engine->id) { 466 case RCS: 467 mmio = RENDER_HWS_PGA_GEN7; 468 break; 469 case BCS: 470 mmio = BLT_HWS_PGA_GEN7; 471 break; 472 /* 473 * VCS2 actually doesn't exist on Gen7. Only shut up 474 * gcc switch check warning 475 */ 476 case VCS2: 477 case VCS: 478 mmio = BSD_HWS_PGA_GEN7; 479 break; 480 case VECS: 481 mmio = VEBOX_HWS_PGA_GEN7; 482 break; 483 } 484 } else if (IS_GEN6(dev_priv)) { 485 mmio = RING_HWS_PGA_GEN6(engine->mmio_base); 486 } else { 487 /* XXX: gen8 returns to sanity */ 488 mmio = RING_HWS_PGA(engine->mmio_base); 489 } 490 491 I915_WRITE(mmio, (u32)engine->status_page.gfx_addr); 492 POSTING_READ(mmio); 493 494 /* 495 * Flush the TLB for this page 496 * 497 * FIXME: These two bits have disappeared on gen8, so a question 498 * arises: do we still need this and if so how should we go about 499 * invalidating the TLB? 500 */ 501 if (IS_GEN(dev_priv, 6, 7)) { 502 i915_reg_t reg = RING_INSTPM(engine->mmio_base); 503 504 /* ring should be idle before issuing a sync flush*/ 505 WARN_ON((I915_READ_MODE(engine) & MODE_IDLE) == 0); 506 507 I915_WRITE(reg, 508 _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | 509 INSTPM_SYNC_FLUSH)); 510 if (intel_wait_for_register(dev_priv, 511 reg, INSTPM_SYNC_FLUSH, 0, 512 1000)) 513 DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n", 514 engine->name); 515 } 516 } 517 518 static bool stop_ring(struct intel_engine_cs *engine) 519 { 520 struct drm_i915_private *dev_priv = engine->i915; 521 522 if (!IS_GEN2(dev_priv)) { 523 I915_WRITE_MODE(engine, _MASKED_BIT_ENABLE(STOP_RING)); 524 if (intel_wait_for_register(dev_priv, 525 RING_MI_MODE(engine->mmio_base), 526 MODE_IDLE, 527 MODE_IDLE, 528 1000)) { 529 DRM_ERROR("%s : timed out trying to stop ring\n", 530 engine->name); 531 /* Sometimes we observe that the idle flag is not 532 * set even though the ring is empty. So double 533 * check before giving up. 534 */ 535 if (I915_READ_HEAD(engine) != I915_READ_TAIL(engine)) 536 return false; 537 } 538 } 539 540 I915_WRITE_CTL(engine, 0); 541 I915_WRITE_HEAD(engine, 0); 542 engine->write_tail(engine, 0); 543 544 if (!IS_GEN2(dev_priv)) { 545 (void)I915_READ_CTL(engine); 546 I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING)); 547 } 548 549 return (I915_READ_HEAD(engine) & HEAD_ADDR) == 0; 550 } 551 552 static int init_ring_common(struct intel_engine_cs *engine) 553 { 554 struct drm_i915_private *dev_priv = engine->i915; 555 struct intel_ringbuffer *ringbuf = engine->buffer; 556 struct drm_i915_gem_object *obj = ringbuf->obj; 557 int ret = 0; 558 559 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 560 561 if (!stop_ring(engine)) { 562 /* G45 ring initialization often fails to reset head to zero */ 563 DRM_DEBUG_KMS("%s head not reset to zero " 564 "ctl %08x head %08x tail %08x start %08x\n", 565 engine->name, 566 I915_READ_CTL(engine), 567 I915_READ_HEAD(engine), 568 I915_READ_TAIL(engine), 569 I915_READ_START(engine)); 570 571 if (!stop_ring(engine)) { 572 DRM_ERROR("failed to set %s head to zero " 573 "ctl %08x head %08x tail %08x start %08x\n", 574 engine->name, 575 I915_READ_CTL(engine), 576 I915_READ_HEAD(engine), 577 I915_READ_TAIL(engine), 578 I915_READ_START(engine)); 579 ret = -EIO; 580 goto out; 581 } 582 } 583 584 if (I915_NEED_GFX_HWS(dev_priv)) 585 intel_ring_setup_status_page(engine); 586 else 587 ring_setup_phys_status_page(engine); 588 589 /* Enforce ordering by reading HEAD register back */ 590 I915_READ_HEAD(engine); 591 592 /* Initialize the ring. This must happen _after_ we've cleared the ring 593 * registers with the above sequence (the readback of the HEAD registers 594 * also enforces ordering), otherwise the hw might lose the new ring 595 * register values. */ 596 I915_WRITE_START(engine, i915_gem_obj_ggtt_offset(obj)); 597 598 /* WaClearRingBufHeadRegAtInit:ctg,elk */ 599 if (I915_READ_HEAD(engine)) 600 DRM_DEBUG("%s initialization failed [head=%08x], fudging\n", 601 engine->name, I915_READ_HEAD(engine)); 602 I915_WRITE_HEAD(engine, 0); 603 (void)I915_READ_HEAD(engine); 604 605 I915_WRITE_CTL(engine, 606 ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) 607 | RING_VALID); 608 609 /* If the head is still not zero, the ring is dead */ 610 if (wait_for((I915_READ_CTL(engine) & RING_VALID) != 0 && 611 I915_READ_START(engine) == i915_gem_obj_ggtt_offset(obj) && 612 (I915_READ_HEAD(engine) & HEAD_ADDR) == 0, 50)) { 613 DRM_ERROR("%s initialization failed " 614 "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n", 615 engine->name, 616 I915_READ_CTL(engine), 617 I915_READ_CTL(engine) & RING_VALID, 618 I915_READ_HEAD(engine), I915_READ_TAIL(engine), 619 I915_READ_START(engine), 620 (unsigned long)i915_gem_obj_ggtt_offset(obj)); 621 ret = -EIO; 622 goto out; 623 } 624 625 ringbuf->last_retired_head = -1; 626 ringbuf->head = I915_READ_HEAD(engine); 627 ringbuf->tail = I915_READ_TAIL(engine) & TAIL_ADDR; 628 intel_ring_update_space(ringbuf); 629 630 intel_engine_init_hangcheck(engine); 631 632 out: 633 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 634 635 return ret; 636 } 637 638 void intel_fini_pipe_control(struct intel_engine_cs *engine) 639 { 640 if (engine->scratch.obj == NULL) 641 return; 642 643 i915_gem_object_ggtt_unpin(engine->scratch.obj); 644 i915_gem_object_put(engine->scratch.obj); 645 engine->scratch.obj = NULL; 646 } 647 648 int intel_init_pipe_control(struct intel_engine_cs *engine, int size) 649 { 650 struct drm_i915_gem_object *obj; 651 int ret; 652 653 WARN_ON(engine->scratch.obj); 654 655 obj = i915_gem_object_create_stolen(&engine->i915->drm, size); 656 if (!obj) 657 obj = i915_gem_object_create(&engine->i915->drm, size); 658 if (IS_ERR(obj)) { 659 DRM_ERROR("Failed to allocate scratch page\n"); 660 ret = PTR_ERR(obj); 661 goto err; 662 } 663 664 ret = i915_gem_obj_ggtt_pin(obj, 4096, PIN_HIGH); 665 if (ret) 666 goto err_unref; 667 668 engine->scratch.obj = obj; 669 engine->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj); 670 DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n", 671 engine->name, engine->scratch.gtt_offset); 672 return 0; 673 674 err_unref: 675 i915_gem_object_put(engine->scratch.obj); 676 err: 677 return ret; 678 } 679 680 static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) 681 { 682 struct intel_engine_cs *engine = req->engine; 683 struct i915_workarounds *w = &req->i915->workarounds; 684 int ret, i; 685 686 if (w->count == 0) 687 return 0; 688 689 engine->gpu_caches_dirty = true; 690 ret = intel_ring_flush_all_caches(req); 691 if (ret) 692 return ret; 693 694 ret = intel_ring_begin(req, (w->count * 2 + 2)); 695 if (ret) 696 return ret; 697 698 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(w->count)); 699 for (i = 0; i < w->count; i++) { 700 intel_ring_emit_reg(engine, w->reg[i].addr); 701 intel_ring_emit(engine, w->reg[i].value); 702 } 703 intel_ring_emit(engine, MI_NOOP); 704 705 intel_ring_advance(engine); 706 707 engine->gpu_caches_dirty = true; 708 ret = intel_ring_flush_all_caches(req); 709 if (ret) 710 return ret; 711 712 DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count); 713 714 return 0; 715 } 716 717 static int intel_rcs_ctx_init(struct drm_i915_gem_request *req) 718 { 719 int ret; 720 721 ret = intel_ring_workarounds_emit(req); 722 if (ret != 0) 723 return ret; 724 725 ret = i915_gem_render_state_init(req); 726 if (ret) 727 return ret; 728 729 return 0; 730 } 731 732 static int wa_add(struct drm_i915_private *dev_priv, 733 i915_reg_t addr, 734 const u32 mask, const u32 val) 735 { 736 const u32 idx = dev_priv->workarounds.count; 737 738 if (WARN_ON(idx >= I915_MAX_WA_REGS)) 739 return -ENOSPC; 740 741 dev_priv->workarounds.reg[idx].addr = addr; 742 dev_priv->workarounds.reg[idx].value = val; 743 dev_priv->workarounds.reg[idx].mask = mask; 744 745 dev_priv->workarounds.count++; 746 747 return 0; 748 } 749 750 #define WA_REG(addr, mask, val) do { \ 751 const int r = wa_add(dev_priv, (addr), (mask), (val)); \ 752 if (r) \ 753 return r; \ 754 } while (0) 755 756 #define WA_SET_BIT_MASKED(addr, mask) \ 757 WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) 758 759 #define WA_CLR_BIT_MASKED(addr, mask) \ 760 WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) 761 762 #define WA_SET_FIELD_MASKED(addr, mask, value) \ 763 WA_REG(addr, mask, _MASKED_FIELD(mask, value)) 764 765 #define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask)) 766 #define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask)) 767 768 #define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val) 769 770 static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, 771 i915_reg_t reg) 772 { 773 struct drm_i915_private *dev_priv = engine->i915; 774 struct i915_workarounds *wa = &dev_priv->workarounds; 775 const uint32_t index = wa->hw_whitelist_count[engine->id]; 776 777 if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) 778 return -EINVAL; 779 780 WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), 781 i915_mmio_reg_offset(reg)); 782 wa->hw_whitelist_count[engine->id]++; 783 784 return 0; 785 } 786 787 static int gen8_init_workarounds(struct intel_engine_cs *engine) 788 { 789 struct drm_i915_private *dev_priv = engine->i915; 790 791 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); 792 793 /* WaDisableAsyncFlipPerfMode:bdw,chv */ 794 WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); 795 796 /* WaDisablePartialInstShootdown:bdw,chv */ 797 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 798 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); 799 800 /* Use Force Non-Coherent whenever executing a 3D context. This is a 801 * workaround for for a possible hang in the unlikely event a TLB 802 * invalidation occurs during a PSD flush. 803 */ 804 /* WaForceEnableNonCoherent:bdw,chv */ 805 /* WaHdcDisableFetchWhenMasked:bdw,chv */ 806 WA_SET_BIT_MASKED(HDC_CHICKEN0, 807 HDC_DONOT_FETCH_MEM_WHEN_MASKED | 808 HDC_FORCE_NON_COHERENT); 809 810 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: 811 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping 812 * polygons in the same 8x4 pixel/sample area to be processed without 813 * stalling waiting for the earlier ones to write to Hierarchical Z 814 * buffer." 815 * 816 * This optimization is off by default for BDW and CHV; turn it on. 817 */ 818 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); 819 820 /* Wa4x4STCOptimizationDisable:bdw,chv */ 821 WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); 822 823 /* 824 * BSpec recommends 8x4 when MSAA is used, 825 * however in practice 16x4 seems fastest. 826 * 827 * Note that PS/WM thread counts depend on the WIZ hashing 828 * disable bit, which we don't touch here, but it's good 829 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 830 */ 831 WA_SET_FIELD_MASKED(GEN7_GT_MODE, 832 GEN6_WIZ_HASHING_MASK, 833 GEN6_WIZ_HASHING_16x4); 834 835 return 0; 836 } 837 838 static int bdw_init_workarounds(struct intel_engine_cs *engine) 839 { 840 struct drm_i915_private *dev_priv = engine->i915; 841 int ret; 842 843 ret = gen8_init_workarounds(engine); 844 if (ret) 845 return ret; 846 847 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ 848 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); 849 850 /* WaDisableDopClockGating:bdw */ 851 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, 852 DOP_CLOCK_GATING_DISABLE); 853 854 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 855 GEN8_SAMPLER_POWER_BYPASS_DIS); 856 857 WA_SET_BIT_MASKED(HDC_CHICKEN0, 858 /* WaForceContextSaveRestoreNonCoherent:bdw */ 859 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | 860 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ 861 (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); 862 863 return 0; 864 } 865 866 static int chv_init_workarounds(struct intel_engine_cs *engine) 867 { 868 struct drm_i915_private *dev_priv = engine->i915; 869 int ret; 870 871 ret = gen8_init_workarounds(engine); 872 if (ret) 873 return ret; 874 875 /* WaDisableThreadStallDopClockGating:chv */ 876 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); 877 878 /* Improve HiZ throughput on CHV. */ 879 WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); 880 881 return 0; 882 } 883 884 static int gen9_init_workarounds(struct intel_engine_cs *engine) 885 { 886 struct drm_i915_private *dev_priv = engine->i915; 887 int ret; 888 889 /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl */ 890 I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); 891 892 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl */ 893 I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | 894 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); 895 896 /* WaDisableKillLogic:bxt,skl,kbl */ 897 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | 898 ECOCHK_DIS_TLB); 899 900 /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl */ 901 /* WaDisablePartialInstShootdown:skl,bxt,kbl */ 902 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 903 FLOW_CONTROL_ENABLE | 904 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); 905 906 /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ 907 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 908 GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); 909 910 /* WaDisableDgMirrorFixInHalfSliceChicken5:skl,bxt */ 911 if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) || 912 IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) 913 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, 914 GEN9_DG_MIRROR_FIX_ENABLE); 915 916 /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */ 917 if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) || 918 IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { 919 WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1, 920 GEN9_RHWO_OPTIMIZATION_DISABLE); 921 /* 922 * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set 923 * but we do that in per ctx batchbuffer as there is an issue 924 * with this register not getting restored on ctx restore 925 */ 926 } 927 928 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl */ 929 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */ 930 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, 931 GEN9_ENABLE_YV12_BUGFIX | 932 GEN9_ENABLE_GPGPU_PREEMPTION); 933 934 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl */ 935 /* WaDisablePartialResolveInVc:skl,bxt,kbl */ 936 WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE | 937 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE)); 938 939 /* WaCcsTlbPrefetchDisable:skl,bxt,kbl */ 940 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, 941 GEN9_CCS_TLB_PREFETCH_ENABLE); 942 943 /* WaDisableMaskBasedCammingInRCC:skl,bxt */ 944 if (IS_SKL_REVID(dev_priv, SKL_REVID_C0, SKL_REVID_C0) || 945 IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) 946 WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0, 947 PIXEL_MASK_CAMMING_DISABLE); 948 949 /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl */ 950 WA_SET_BIT_MASKED(HDC_CHICKEN0, 951 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | 952 HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); 953 954 /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are 955 * both tied to WaForceContextSaveRestoreNonCoherent 956 * in some hsds for skl. We keep the tie for all gen9. The 957 * documentation is a bit hazy and so we want to get common behaviour, 958 * even though there is no clear evidence we would need both on kbl/bxt. 959 * This area has been source of system hangs so we play it safe 960 * and mimic the skl regardless of what bspec says. 961 * 962 * Use Force Non-Coherent whenever executing a 3D context. This 963 * is a workaround for a possible hang in the unlikely event 964 * a TLB invalidation occurs during a PSD flush. 965 */ 966 967 /* WaForceEnableNonCoherent:skl,bxt,kbl */ 968 WA_SET_BIT_MASKED(HDC_CHICKEN0, 969 HDC_FORCE_NON_COHERENT); 970 971 /* WaDisableHDCInvalidation:skl,bxt,kbl */ 972 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | 973 BDW_DISABLE_HDC_INVALIDATION); 974 975 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl */ 976 if (IS_SKYLAKE(dev_priv) || 977 IS_KABYLAKE(dev_priv) || 978 IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) 979 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 980 GEN8_SAMPLER_POWER_BYPASS_DIS); 981 982 /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl */ 983 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); 984 985 /* WaOCLCoherentLineFlush:skl,bxt,kbl */ 986 I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | 987 GEN8_LQSC_FLUSH_COHERENT_LINES)); 988 989 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt */ 990 ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); 991 if (ret) 992 return ret; 993 994 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl */ 995 ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); 996 if (ret) 997 return ret; 998 999 /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl */ 1000 ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); 1001 if (ret) 1002 return ret; 1003 1004 return 0; 1005 } 1006 1007 static int skl_tune_iz_hashing(struct intel_engine_cs *engine) 1008 { 1009 struct drm_i915_private *dev_priv = engine->i915; 1010 u8 vals[3] = { 0, 0, 0 }; 1011 unsigned int i; 1012 1013 for (i = 0; i < 3; i++) { 1014 u8 ss; 1015 1016 /* 1017 * Only consider slices where one, and only one, subslice has 7 1018 * EUs 1019 */ 1020 if (!is_power_of_2(dev_priv->info.subslice_7eu[i])) 1021 continue; 1022 1023 /* 1024 * subslice_7eu[i] != 0 (because of the check above) and 1025 * ss_max == 4 (maximum number of subslices possible per slice) 1026 * 1027 * -> 0 <= ss <= 3; 1028 */ 1029 ss = ffs(dev_priv->info.subslice_7eu[i]) - 1; 1030 vals[i] = 3 - ss; 1031 } 1032 1033 if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) 1034 return 0; 1035 1036 /* Tune IZ hashing. See intel_device_info_runtime_init() */ 1037 WA_SET_FIELD_MASKED(GEN7_GT_MODE, 1038 GEN9_IZ_HASHING_MASK(2) | 1039 GEN9_IZ_HASHING_MASK(1) | 1040 GEN9_IZ_HASHING_MASK(0), 1041 GEN9_IZ_HASHING(2, vals[2]) | 1042 GEN9_IZ_HASHING(1, vals[1]) | 1043 GEN9_IZ_HASHING(0, vals[0])); 1044 1045 return 0; 1046 } 1047 1048 static int skl_init_workarounds(struct intel_engine_cs *engine) 1049 { 1050 struct drm_i915_private *dev_priv = engine->i915; 1051 int ret; 1052 1053 ret = gen9_init_workarounds(engine); 1054 if (ret) 1055 return ret; 1056 1057 /* 1058 * Actual WA is to disable percontext preemption granularity control 1059 * until D0 which is the default case so this is equivalent to 1060 * !WaDisablePerCtxtPreemptionGranularityControl:skl 1061 */ 1062 if (IS_SKL_REVID(dev_priv, SKL_REVID_E0, REVID_FOREVER)) { 1063 I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, 1064 _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); 1065 } 1066 1067 if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_E0)) { 1068 /* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */ 1069 I915_WRITE(FF_SLICE_CS_CHICKEN2, 1070 _MASKED_BIT_ENABLE(GEN9_TSG_BARRIER_ACK_DISABLE)); 1071 } 1072 1073 /* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes 1074 * involving this register should also be added to WA batch as required. 1075 */ 1076 if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_E0)) 1077 /* WaDisableLSQCROPERFforOCL:skl */ 1078 I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | 1079 GEN8_LQSC_RO_PERF_DIS); 1080 1081 /* WaEnableGapsTsvCreditFix:skl */ 1082 if (IS_SKL_REVID(dev_priv, SKL_REVID_C0, REVID_FOREVER)) { 1083 I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | 1084 GEN9_GAPS_TSV_CREDIT_DISABLE)); 1085 } 1086 1087 /* WaDisablePowerCompilerClockGating:skl */ 1088 if (IS_SKL_REVID(dev_priv, SKL_REVID_B0, SKL_REVID_B0)) 1089 WA_SET_BIT_MASKED(HIZ_CHICKEN, 1090 BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE); 1091 1092 /* WaBarrierPerformanceFixDisable:skl */ 1093 if (IS_SKL_REVID(dev_priv, SKL_REVID_C0, SKL_REVID_D0)) 1094 WA_SET_BIT_MASKED(HDC_CHICKEN0, 1095 HDC_FENCE_DEST_SLM_DISABLE | 1096 HDC_BARRIER_PERFORMANCE_DISABLE); 1097 1098 /* WaDisableSbeCacheDispatchPortSharing:skl */ 1099 if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_F0)) 1100 WA_SET_BIT_MASKED( 1101 GEN7_HALF_SLICE_CHICKEN1, 1102 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 1103 1104 /* WaDisableGafsUnitClkGating:skl */ 1105 WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 1106 1107 /* WaInPlaceDecompressionHang:skl */ 1108 if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER)) 1109 WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, 1110 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 1111 1112 /* WaDisableLSQCROPERFforOCL:skl */ 1113 ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); 1114 if (ret) 1115 return ret; 1116 1117 return skl_tune_iz_hashing(engine); 1118 } 1119 1120 static int bxt_init_workarounds(struct intel_engine_cs *engine) 1121 { 1122 struct drm_i915_private *dev_priv = engine->i915; 1123 int ret; 1124 1125 ret = gen9_init_workarounds(engine); 1126 if (ret) 1127 return ret; 1128 1129 /* WaStoreMultiplePTEenable:bxt */ 1130 /* This is a requirement according to Hardware specification */ 1131 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) 1132 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF); 1133 1134 /* WaSetClckGatingDisableMedia:bxt */ 1135 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { 1136 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & 1137 ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE)); 1138 } 1139 1140 /* WaDisableThreadStallDopClockGating:bxt */ 1141 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 1142 STALL_DOP_GATING_DISABLE); 1143 1144 /* WaDisablePooledEuLoadBalancingFix:bxt */ 1145 if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) { 1146 WA_SET_BIT_MASKED(FF_SLICE_CS_CHICKEN2, 1147 GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); 1148 } 1149 1150 /* WaDisableSbeCacheDispatchPortSharing:bxt */ 1151 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) { 1152 WA_SET_BIT_MASKED( 1153 GEN7_HALF_SLICE_CHICKEN1, 1154 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 1155 } 1156 1157 /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */ 1158 /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */ 1159 /* WaDisableObjectLevelPreemtionForInstanceId:bxt */ 1160 /* WaDisableLSQCROPERFforOCL:bxt */ 1161 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { 1162 ret = wa_ring_whitelist_reg(engine, GEN9_CS_DEBUG_MODE1); 1163 if (ret) 1164 return ret; 1165 1166 ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); 1167 if (ret) 1168 return ret; 1169 } 1170 1171 /* WaProgramL3SqcReg1DefaultForPerf:bxt */ 1172 if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) 1173 I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) | 1174 L3_HIGH_PRIO_CREDITS(2)); 1175 1176 /* WaToEnableHwFixForPushConstHWBug:bxt */ 1177 if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) 1178 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 1179 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 1180 1181 /* WaInPlaceDecompressionHang:bxt */ 1182 if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) 1183 WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, 1184 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 1185 1186 return 0; 1187 } 1188 1189 static int kbl_init_workarounds(struct intel_engine_cs *engine) 1190 { 1191 struct drm_i915_private *dev_priv = engine->i915; 1192 int ret; 1193 1194 ret = gen9_init_workarounds(engine); 1195 if (ret) 1196 return ret; 1197 1198 /* WaEnableGapsTsvCreditFix:kbl */ 1199 I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | 1200 GEN9_GAPS_TSV_CREDIT_DISABLE)); 1201 1202 /* WaDisableDynamicCreditSharing:kbl */ 1203 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) 1204 WA_SET_BIT(GAMT_CHKN_BIT_REG, 1205 GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); 1206 1207 /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */ 1208 if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0)) 1209 WA_SET_BIT_MASKED(HDC_CHICKEN0, 1210 HDC_FENCE_DEST_SLM_DISABLE); 1211 1212 /* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes 1213 * involving this register should also be added to WA batch as required. 1214 */ 1215 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0)) 1216 /* WaDisableLSQCROPERFforOCL:kbl */ 1217 I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | 1218 GEN8_LQSC_RO_PERF_DIS); 1219 1220 /* WaToEnableHwFixForPushConstHWBug:kbl */ 1221 if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) 1222 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 1223 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 1224 1225 /* WaDisableGafsUnitClkGating:kbl */ 1226 WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 1227 1228 /* WaDisableSbeCacheDispatchPortSharing:kbl */ 1229 WA_SET_BIT_MASKED( 1230 GEN7_HALF_SLICE_CHICKEN1, 1231 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 1232 1233 /* WaInPlaceDecompressionHang:kbl */ 1234 WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, 1235 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 1236 1237 /* WaDisableLSQCROPERFforOCL:kbl */ 1238 ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); 1239 if (ret) 1240 return ret; 1241 1242 return 0; 1243 } 1244 1245 int init_workarounds_ring(struct intel_engine_cs *engine) 1246 { 1247 struct drm_i915_private *dev_priv = engine->i915; 1248 1249 WARN_ON(engine->id != RCS); 1250 1251 dev_priv->workarounds.count = 0; 1252 dev_priv->workarounds.hw_whitelist_count[RCS] = 0; 1253 1254 if (IS_BROADWELL(dev_priv)) 1255 return bdw_init_workarounds(engine); 1256 1257 if (IS_CHERRYVIEW(dev_priv)) 1258 return chv_init_workarounds(engine); 1259 1260 if (IS_SKYLAKE(dev_priv)) 1261 return skl_init_workarounds(engine); 1262 1263 if (IS_BROXTON(dev_priv)) 1264 return bxt_init_workarounds(engine); 1265 1266 if (IS_KABYLAKE(dev_priv)) 1267 return kbl_init_workarounds(engine); 1268 1269 return 0; 1270 } 1271 1272 static int init_render_ring(struct intel_engine_cs *engine) 1273 { 1274 struct drm_i915_private *dev_priv = engine->i915; 1275 int ret = init_ring_common(engine); 1276 if (ret) 1277 return ret; 1278 1279 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ 1280 if (IS_GEN(dev_priv, 4, 6)) 1281 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); 1282 1283 /* We need to disable the AsyncFlip performance optimisations in order 1284 * to use MI_WAIT_FOR_EVENT within the CS. It should already be 1285 * programmed to '1' on all products. 1286 * 1287 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv 1288 */ 1289 if (IS_GEN(dev_priv, 6, 7)) 1290 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); 1291 1292 /* Required for the hardware to program scanline values for waiting */ 1293 /* WaEnableFlushTlbInvalidationMode:snb */ 1294 if (IS_GEN6(dev_priv)) 1295 I915_WRITE(GFX_MODE, 1296 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT)); 1297 1298 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */ 1299 if (IS_GEN7(dev_priv)) 1300 I915_WRITE(GFX_MODE_GEN7, 1301 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) | 1302 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); 1303 1304 if (IS_GEN6(dev_priv)) { 1305 /* From the Sandybridge PRM, volume 1 part 3, page 24: 1306 * "If this bit is set, STCunit will have LRA as replacement 1307 * policy. [...] This bit must be reset. LRA replacement 1308 * policy is not supported." 1309 */ 1310 I915_WRITE(CACHE_MODE_0, 1311 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); 1312 } 1313 1314 if (IS_GEN(dev_priv, 6, 7)) 1315 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); 1316 1317 if (INTEL_INFO(dev_priv)->gen >= 6) 1318 I915_WRITE_IMR(engine, ~engine->irq_keep_mask); 1319 1320 return init_workarounds_ring(engine); 1321 } 1322 1323 static void render_ring_cleanup(struct intel_engine_cs *engine) 1324 { 1325 struct drm_i915_private *dev_priv = engine->i915; 1326 1327 if (dev_priv->semaphore_obj) { 1328 i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj); 1329 i915_gem_object_put(dev_priv->semaphore_obj); 1330 dev_priv->semaphore_obj = NULL; 1331 } 1332 1333 intel_fini_pipe_control(engine); 1334 } 1335 1336 static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req, 1337 unsigned int num_dwords) 1338 { 1339 #define MBOX_UPDATE_DWORDS 8 1340 struct intel_engine_cs *signaller = signaller_req->engine; 1341 struct drm_i915_private *dev_priv = signaller_req->i915; 1342 struct intel_engine_cs *waiter; 1343 enum intel_engine_id id; 1344 int ret, num_rings; 1345 1346 num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); 1347 num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS; 1348 #undef MBOX_UPDATE_DWORDS 1349 1350 ret = intel_ring_begin(signaller_req, num_dwords); 1351 if (ret) 1352 return ret; 1353 1354 for_each_engine_id(waiter, dev_priv, id) { 1355 u64 gtt_offset = signaller->semaphore.signal_ggtt[id]; 1356 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) 1357 continue; 1358 1359 intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6)); 1360 intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB | 1361 PIPE_CONTROL_QW_WRITE | 1362 PIPE_CONTROL_CS_STALL); 1363 intel_ring_emit(signaller, lower_32_bits(gtt_offset)); 1364 intel_ring_emit(signaller, upper_32_bits(gtt_offset)); 1365 intel_ring_emit(signaller, signaller_req->fence.seqno); 1366 intel_ring_emit(signaller, 0); 1367 intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | 1368 MI_SEMAPHORE_TARGET(waiter->hw_id)); 1369 intel_ring_emit(signaller, 0); 1370 } 1371 1372 return 0; 1373 } 1374 1375 static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req, 1376 unsigned int num_dwords) 1377 { 1378 #define MBOX_UPDATE_DWORDS 6 1379 struct intel_engine_cs *signaller = signaller_req->engine; 1380 struct drm_i915_private *dev_priv = signaller_req->i915; 1381 struct intel_engine_cs *waiter; 1382 enum intel_engine_id id; 1383 int ret, num_rings; 1384 1385 num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); 1386 num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS; 1387 #undef MBOX_UPDATE_DWORDS 1388 1389 ret = intel_ring_begin(signaller_req, num_dwords); 1390 if (ret) 1391 return ret; 1392 1393 for_each_engine_id(waiter, dev_priv, id) { 1394 u64 gtt_offset = signaller->semaphore.signal_ggtt[id]; 1395 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) 1396 continue; 1397 1398 intel_ring_emit(signaller, (MI_FLUSH_DW + 1) | 1399 MI_FLUSH_DW_OP_STOREDW); 1400 intel_ring_emit(signaller, lower_32_bits(gtt_offset) | 1401 MI_FLUSH_DW_USE_GTT); 1402 intel_ring_emit(signaller, upper_32_bits(gtt_offset)); 1403 intel_ring_emit(signaller, signaller_req->fence.seqno); 1404 intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | 1405 MI_SEMAPHORE_TARGET(waiter->hw_id)); 1406 intel_ring_emit(signaller, 0); 1407 } 1408 1409 return 0; 1410 } 1411 1412 static int gen6_signal(struct drm_i915_gem_request *signaller_req, 1413 unsigned int num_dwords) 1414 { 1415 struct intel_engine_cs *signaller = signaller_req->engine; 1416 struct drm_i915_private *dev_priv = signaller_req->i915; 1417 struct intel_engine_cs *useless; 1418 enum intel_engine_id id; 1419 int ret, num_rings; 1420 1421 #define MBOX_UPDATE_DWORDS 3 1422 num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); 1423 num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2); 1424 #undef MBOX_UPDATE_DWORDS 1425 1426 ret = intel_ring_begin(signaller_req, num_dwords); 1427 if (ret) 1428 return ret; 1429 1430 for_each_engine_id(useless, dev_priv, id) { 1431 i915_reg_t mbox_reg = signaller->semaphore.mbox.signal[id]; 1432 1433 if (i915_mmio_reg_valid(mbox_reg)) { 1434 intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1)); 1435 intel_ring_emit_reg(signaller, mbox_reg); 1436 intel_ring_emit(signaller, signaller_req->fence.seqno); 1437 } 1438 } 1439 1440 /* If num_dwords was rounded, make sure the tail pointer is correct */ 1441 if (num_rings % 2 == 0) 1442 intel_ring_emit(signaller, MI_NOOP); 1443 1444 return 0; 1445 } 1446 1447 /** 1448 * gen6_add_request - Update the semaphore mailbox registers 1449 * 1450 * @request - request to write to the ring 1451 * 1452 * Update the mailbox registers in the *other* rings with the current seqno. 1453 * This acts like a signal in the canonical semaphore. 1454 */ 1455 static int 1456 gen6_add_request(struct drm_i915_gem_request *req) 1457 { 1458 struct intel_engine_cs *engine = req->engine; 1459 int ret; 1460 1461 if (engine->semaphore.signal) 1462 ret = engine->semaphore.signal(req, 4); 1463 else 1464 ret = intel_ring_begin(req, 4); 1465 1466 if (ret) 1467 return ret; 1468 1469 intel_ring_emit(engine, MI_STORE_DWORD_INDEX); 1470 intel_ring_emit(engine, 1471 I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 1472 intel_ring_emit(engine, req->fence.seqno); 1473 intel_ring_emit(engine, MI_USER_INTERRUPT); 1474 __intel_ring_advance(engine); 1475 1476 return 0; 1477 } 1478 1479 static int 1480 gen8_render_add_request(struct drm_i915_gem_request *req) 1481 { 1482 struct intel_engine_cs *engine = req->engine; 1483 int ret; 1484 1485 if (engine->semaphore.signal) 1486 ret = engine->semaphore.signal(req, 8); 1487 else 1488 ret = intel_ring_begin(req, 8); 1489 if (ret) 1490 return ret; 1491 1492 intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(6)); 1493 intel_ring_emit(engine, (PIPE_CONTROL_GLOBAL_GTT_IVB | 1494 PIPE_CONTROL_CS_STALL | 1495 PIPE_CONTROL_QW_WRITE)); 1496 intel_ring_emit(engine, intel_hws_seqno_address(req->engine)); 1497 intel_ring_emit(engine, 0); 1498 intel_ring_emit(engine, i915_gem_request_get_seqno(req)); 1499 /* We're thrashing one dword of HWS. */ 1500 intel_ring_emit(engine, 0); 1501 intel_ring_emit(engine, MI_USER_INTERRUPT); 1502 intel_ring_emit(engine, MI_NOOP); 1503 __intel_ring_advance(engine); 1504 1505 return 0; 1506 } 1507 1508 static inline bool i915_gem_has_seqno_wrapped(struct drm_i915_private *dev_priv, 1509 u32 seqno) 1510 { 1511 return dev_priv->last_seqno < seqno; 1512 } 1513 1514 /** 1515 * intel_ring_sync - sync the waiter to the signaller on seqno 1516 * 1517 * @waiter - ring that is waiting 1518 * @signaller - ring which has, or will signal 1519 * @seqno - seqno which the waiter will block on 1520 */ 1521 1522 static int 1523 gen8_ring_sync(struct drm_i915_gem_request *waiter_req, 1524 struct intel_engine_cs *signaller, 1525 u32 seqno) 1526 { 1527 struct intel_engine_cs *waiter = waiter_req->engine; 1528 struct drm_i915_private *dev_priv = waiter_req->i915; 1529 u64 offset = GEN8_WAIT_OFFSET(waiter, signaller->id); 1530 struct i915_hw_ppgtt *ppgtt; 1531 int ret; 1532 1533 ret = intel_ring_begin(waiter_req, 4); 1534 if (ret) 1535 return ret; 1536 1537 intel_ring_emit(waiter, MI_SEMAPHORE_WAIT | 1538 MI_SEMAPHORE_GLOBAL_GTT | 1539 MI_SEMAPHORE_SAD_GTE_SDD); 1540 intel_ring_emit(waiter, seqno); 1541 intel_ring_emit(waiter, lower_32_bits(offset)); 1542 intel_ring_emit(waiter, upper_32_bits(offset)); 1543 intel_ring_advance(waiter); 1544 1545 /* When the !RCS engines idle waiting upon a semaphore, they lose their 1546 * pagetables and we must reload them before executing the batch. 1547 * We do this on the i915_switch_context() following the wait and 1548 * before the dispatch. 1549 */ 1550 ppgtt = waiter_req->ctx->ppgtt; 1551 if (ppgtt && waiter_req->engine->id != RCS) 1552 ppgtt->pd_dirty_rings |= intel_engine_flag(waiter_req->engine); 1553 return 0; 1554 } 1555 1556 static int 1557 gen6_ring_sync(struct drm_i915_gem_request *waiter_req, 1558 struct intel_engine_cs *signaller, 1559 u32 seqno) 1560 { 1561 struct intel_engine_cs *waiter = waiter_req->engine; 1562 u32 dw1 = MI_SEMAPHORE_MBOX | 1563 MI_SEMAPHORE_COMPARE | 1564 MI_SEMAPHORE_REGISTER; 1565 u32 wait_mbox = signaller->semaphore.mbox.wait[waiter->id]; 1566 int ret; 1567 1568 /* Throughout all of the GEM code, seqno passed implies our current 1569 * seqno is >= the last seqno executed. However for hardware the 1570 * comparison is strictly greater than. 1571 */ 1572 seqno -= 1; 1573 1574 WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); 1575 1576 ret = intel_ring_begin(waiter_req, 4); 1577 if (ret) 1578 return ret; 1579 1580 /* If seqno wrap happened, omit the wait with no-ops */ 1581 if (likely(!i915_gem_has_seqno_wrapped(waiter_req->i915, seqno))) { 1582 intel_ring_emit(waiter, dw1 | wait_mbox); 1583 intel_ring_emit(waiter, seqno); 1584 intel_ring_emit(waiter, 0); 1585 intel_ring_emit(waiter, MI_NOOP); 1586 } else { 1587 intel_ring_emit(waiter, MI_NOOP); 1588 intel_ring_emit(waiter, MI_NOOP); 1589 intel_ring_emit(waiter, MI_NOOP); 1590 intel_ring_emit(waiter, MI_NOOP); 1591 } 1592 intel_ring_advance(waiter); 1593 1594 return 0; 1595 } 1596 1597 static void 1598 gen5_seqno_barrier(struct intel_engine_cs *engine) 1599 { 1600 /* MI_STORE are internally buffered by the GPU and not flushed 1601 * either by MI_FLUSH or SyncFlush or any other combination of 1602 * MI commands. 1603 * 1604 * "Only the submission of the store operation is guaranteed. 1605 * The write result will be complete (coherent) some time later 1606 * (this is practically a finite period but there is no guaranteed 1607 * latency)." 1608 * 1609 * Empirically, we observe that we need a delay of at least 75us to 1610 * be sure that the seqno write is visible by the CPU. 1611 */ 1612 usleep_range(125, 250); 1613 } 1614 1615 static void 1616 gen6_seqno_barrier(struct intel_engine_cs *engine) 1617 { 1618 struct drm_i915_private *dev_priv = engine->i915; 1619 1620 /* Workaround to force correct ordering between irq and seqno writes on 1621 * ivb (and maybe also on snb) by reading from a CS register (like 1622 * ACTHD) before reading the status page. 1623 * 1624 * Note that this effectively stalls the read by the time it takes to 1625 * do a memory transaction, which more or less ensures that the write 1626 * from the GPU has sufficient time to invalidate the CPU cacheline. 1627 * Alternatively we could delay the interrupt from the CS ring to give 1628 * the write time to land, but that would incur a delay after every 1629 * batch i.e. much more frequent than a delay when waiting for the 1630 * interrupt (with the same net latency). 1631 * 1632 * Also note that to prevent whole machine hangs on gen7, we have to 1633 * take the spinlock to guard against concurrent cacheline access. 1634 */ 1635 spin_lock_irq(&dev_priv->uncore.lock); 1636 POSTING_READ_FW(RING_ACTHD(engine->mmio_base)); 1637 spin_unlock_irq(&dev_priv->uncore.lock); 1638 } 1639 1640 static void 1641 gen5_irq_enable(struct intel_engine_cs *engine) 1642 { 1643 gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask); 1644 } 1645 1646 static void 1647 gen5_irq_disable(struct intel_engine_cs *engine) 1648 { 1649 gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask); 1650 } 1651 1652 static void 1653 i9xx_irq_enable(struct intel_engine_cs *engine) 1654 { 1655 struct drm_i915_private *dev_priv = engine->i915; 1656 1657 dev_priv->irq_mask &= ~engine->irq_enable_mask; 1658 I915_WRITE(IMR, dev_priv->irq_mask); 1659 POSTING_READ_FW(RING_IMR(engine->mmio_base)); 1660 } 1661 1662 static void 1663 i9xx_irq_disable(struct intel_engine_cs *engine) 1664 { 1665 struct drm_i915_private *dev_priv = engine->i915; 1666 1667 dev_priv->irq_mask |= engine->irq_enable_mask; 1668 I915_WRITE(IMR, dev_priv->irq_mask); 1669 } 1670 1671 static void 1672 i8xx_irq_enable(struct intel_engine_cs *engine) 1673 { 1674 struct drm_i915_private *dev_priv = engine->i915; 1675 1676 dev_priv->irq_mask &= ~engine->irq_enable_mask; 1677 I915_WRITE16(IMR, dev_priv->irq_mask); 1678 POSTING_READ16(RING_IMR(engine->mmio_base)); 1679 } 1680 1681 static void 1682 i8xx_irq_disable(struct intel_engine_cs *engine) 1683 { 1684 struct drm_i915_private *dev_priv = engine->i915; 1685 1686 dev_priv->irq_mask |= engine->irq_enable_mask; 1687 I915_WRITE16(IMR, dev_priv->irq_mask); 1688 } 1689 1690 static int 1691 bsd_ring_flush(struct drm_i915_gem_request *req, 1692 u32 invalidate_domains, 1693 u32 flush_domains) 1694 { 1695 struct intel_engine_cs *engine = req->engine; 1696 int ret; 1697 1698 ret = intel_ring_begin(req, 2); 1699 if (ret) 1700 return ret; 1701 1702 intel_ring_emit(engine, MI_FLUSH); 1703 intel_ring_emit(engine, MI_NOOP); 1704 intel_ring_advance(engine); 1705 return 0; 1706 } 1707 1708 static int 1709 i9xx_add_request(struct drm_i915_gem_request *req) 1710 { 1711 struct intel_engine_cs *engine = req->engine; 1712 int ret; 1713 1714 ret = intel_ring_begin(req, 4); 1715 if (ret) 1716 return ret; 1717 1718 intel_ring_emit(engine, MI_STORE_DWORD_INDEX); 1719 intel_ring_emit(engine, 1720 I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 1721 intel_ring_emit(engine, req->fence.seqno); 1722 intel_ring_emit(engine, MI_USER_INTERRUPT); 1723 __intel_ring_advance(engine); 1724 1725 return 0; 1726 } 1727 1728 static void 1729 gen6_irq_enable(struct intel_engine_cs *engine) 1730 { 1731 struct drm_i915_private *dev_priv = engine->i915; 1732 1733 I915_WRITE_IMR(engine, 1734 ~(engine->irq_enable_mask | 1735 engine->irq_keep_mask)); 1736 gen5_enable_gt_irq(dev_priv, engine->irq_enable_mask); 1737 } 1738 1739 static void 1740 gen6_irq_disable(struct intel_engine_cs *engine) 1741 { 1742 struct drm_i915_private *dev_priv = engine->i915; 1743 1744 I915_WRITE_IMR(engine, ~engine->irq_keep_mask); 1745 gen5_disable_gt_irq(dev_priv, engine->irq_enable_mask); 1746 } 1747 1748 static void 1749 hsw_vebox_irq_enable(struct intel_engine_cs *engine) 1750 { 1751 struct drm_i915_private *dev_priv = engine->i915; 1752 1753 I915_WRITE_IMR(engine, ~engine->irq_enable_mask); 1754 gen6_enable_pm_irq(dev_priv, engine->irq_enable_mask); 1755 } 1756 1757 static void 1758 hsw_vebox_irq_disable(struct intel_engine_cs *engine) 1759 { 1760 struct drm_i915_private *dev_priv = engine->i915; 1761 1762 I915_WRITE_IMR(engine, ~0); 1763 gen6_disable_pm_irq(dev_priv, engine->irq_enable_mask); 1764 } 1765 1766 static void 1767 gen8_irq_enable(struct intel_engine_cs *engine) 1768 { 1769 struct drm_i915_private *dev_priv = engine->i915; 1770 1771 I915_WRITE_IMR(engine, 1772 ~(engine->irq_enable_mask | 1773 engine->irq_keep_mask)); 1774 POSTING_READ_FW(RING_IMR(engine->mmio_base)); 1775 } 1776 1777 static void 1778 gen8_irq_disable(struct intel_engine_cs *engine) 1779 { 1780 struct drm_i915_private *dev_priv = engine->i915; 1781 1782 I915_WRITE_IMR(engine, ~engine->irq_keep_mask); 1783 } 1784 1785 static int 1786 i965_dispatch_execbuffer(struct drm_i915_gem_request *req, 1787 u64 offset, u32 length, 1788 unsigned dispatch_flags) 1789 { 1790 struct intel_engine_cs *engine = req->engine; 1791 int ret; 1792 1793 ret = intel_ring_begin(req, 2); 1794 if (ret) 1795 return ret; 1796 1797 intel_ring_emit(engine, 1798 MI_BATCH_BUFFER_START | 1799 MI_BATCH_GTT | 1800 (dispatch_flags & I915_DISPATCH_SECURE ? 1801 0 : MI_BATCH_NON_SECURE_I965)); 1802 intel_ring_emit(engine, offset); 1803 intel_ring_advance(engine); 1804 1805 return 0; 1806 } 1807 1808 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */ 1809 #define I830_BATCH_LIMIT (256*1024) 1810 #define I830_TLB_ENTRIES (2) 1811 #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) 1812 static int 1813 i830_dispatch_execbuffer(struct drm_i915_gem_request *req, 1814 u64 offset, u32 len, 1815 unsigned dispatch_flags) 1816 { 1817 struct intel_engine_cs *engine = req->engine; 1818 u32 cs_offset = engine->scratch.gtt_offset; 1819 int ret; 1820 1821 ret = intel_ring_begin(req, 6); 1822 if (ret) 1823 return ret; 1824 1825 /* Evict the invalid PTE TLBs */ 1826 intel_ring_emit(engine, COLOR_BLT_CMD | BLT_WRITE_RGBA); 1827 intel_ring_emit(engine, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096); 1828 intel_ring_emit(engine, I830_TLB_ENTRIES << 16 | 4); /* load each page */ 1829 intel_ring_emit(engine, cs_offset); 1830 intel_ring_emit(engine, 0xdeadbeef); 1831 intel_ring_emit(engine, MI_NOOP); 1832 intel_ring_advance(engine); 1833 1834 if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { 1835 if (len > I830_BATCH_LIMIT) 1836 return -ENOSPC; 1837 1838 ret = intel_ring_begin(req, 6 + 2); 1839 if (ret) 1840 return ret; 1841 1842 /* Blit the batch (which has now all relocs applied) to the 1843 * stable batch scratch bo area (so that the CS never 1844 * stumbles over its tlb invalidation bug) ... 1845 */ 1846 intel_ring_emit(engine, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA); 1847 intel_ring_emit(engine, 1848 BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096); 1849 intel_ring_emit(engine, DIV_ROUND_UP(len, 4096) << 16 | 4096); 1850 intel_ring_emit(engine, cs_offset); 1851 intel_ring_emit(engine, 4096); 1852 intel_ring_emit(engine, offset); 1853 1854 intel_ring_emit(engine, MI_FLUSH); 1855 intel_ring_emit(engine, MI_NOOP); 1856 intel_ring_advance(engine); 1857 1858 /* ... and execute it. */ 1859 offset = cs_offset; 1860 } 1861 1862 ret = intel_ring_begin(req, 2); 1863 if (ret) 1864 return ret; 1865 1866 intel_ring_emit(engine, MI_BATCH_BUFFER_START | MI_BATCH_GTT); 1867 intel_ring_emit(engine, offset | (dispatch_flags & I915_DISPATCH_SECURE ? 1868 0 : MI_BATCH_NON_SECURE)); 1869 intel_ring_advance(engine); 1870 1871 return 0; 1872 } 1873 1874 static int 1875 i915_dispatch_execbuffer(struct drm_i915_gem_request *req, 1876 u64 offset, u32 len, 1877 unsigned dispatch_flags) 1878 { 1879 struct intel_engine_cs *engine = req->engine; 1880 int ret; 1881 1882 ret = intel_ring_begin(req, 2); 1883 if (ret) 1884 return ret; 1885 1886 intel_ring_emit(engine, MI_BATCH_BUFFER_START | MI_BATCH_GTT); 1887 intel_ring_emit(engine, offset | (dispatch_flags & I915_DISPATCH_SECURE ? 1888 0 : MI_BATCH_NON_SECURE)); 1889 intel_ring_advance(engine); 1890 1891 return 0; 1892 } 1893 1894 static void cleanup_phys_status_page(struct intel_engine_cs *engine) 1895 { 1896 struct drm_i915_private *dev_priv = engine->i915; 1897 1898 if (!dev_priv->status_page_dmah) 1899 return; 1900 1901 drm_pci_free(&dev_priv->drm, dev_priv->status_page_dmah); 1902 engine->status_page.page_addr = NULL; 1903 } 1904 1905 static void cleanup_status_page(struct intel_engine_cs *engine) 1906 { 1907 struct drm_i915_gem_object *obj; 1908 1909 obj = engine->status_page.obj; 1910 if (obj == NULL) 1911 return; 1912 1913 kunmap(sg_page(obj->pages->sgl)); 1914 i915_gem_object_ggtt_unpin(obj); 1915 i915_gem_object_put(obj); 1916 engine->status_page.obj = NULL; 1917 } 1918 1919 static int init_status_page(struct intel_engine_cs *engine) 1920 { 1921 struct drm_i915_gem_object *obj = engine->status_page.obj; 1922 1923 if (obj == NULL) { 1924 unsigned flags; 1925 int ret; 1926 1927 obj = i915_gem_object_create(&engine->i915->drm, 4096); 1928 if (IS_ERR(obj)) { 1929 DRM_ERROR("Failed to allocate status page\n"); 1930 return PTR_ERR(obj); 1931 } 1932 1933 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 1934 if (ret) 1935 goto err_unref; 1936 1937 flags = 0; 1938 if (!HAS_LLC(engine->i915)) 1939 /* On g33, we cannot place HWS above 256MiB, so 1940 * restrict its pinning to the low mappable arena. 1941 * Though this restriction is not documented for 1942 * gen4, gen5, or byt, they also behave similarly 1943 * and hang if the HWS is placed at the top of the 1944 * GTT. To generalise, it appears that all !llc 1945 * platforms have issues with us placing the HWS 1946 * above the mappable region (even though we never 1947 * actualy map it). 1948 */ 1949 flags |= PIN_MAPPABLE; 1950 ret = i915_gem_obj_ggtt_pin(obj, 4096, flags); 1951 if (ret) { 1952 err_unref: 1953 i915_gem_object_put(obj); 1954 return ret; 1955 } 1956 1957 engine->status_page.obj = obj; 1958 } 1959 1960 engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj); 1961 engine->status_page.page_addr = kmap(sg_page(obj->pages->sgl)); 1962 memset(engine->status_page.page_addr, 0, PAGE_SIZE); 1963 1964 DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", 1965 engine->name, engine->status_page.gfx_addr); 1966 1967 return 0; 1968 } 1969 1970 static int init_phys_status_page(struct intel_engine_cs *engine) 1971 { 1972 struct drm_i915_private *dev_priv = engine->i915; 1973 1974 if (!dev_priv->status_page_dmah) { 1975 dev_priv->status_page_dmah = 1976 drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE); 1977 if (!dev_priv->status_page_dmah) 1978 return -ENOMEM; 1979 } 1980 1981 engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr; 1982 memset(engine->status_page.page_addr, 0, PAGE_SIZE); 1983 1984 return 0; 1985 } 1986 1987 void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf) 1988 { 1989 GEM_BUG_ON(!ringbuf->vma); 1990 GEM_BUG_ON(!ringbuf->vaddr); 1991 1992 if (HAS_LLC(ringbuf->obj->base.dev) && !ringbuf->obj->stolen) 1993 i915_gem_object_unpin_map(ringbuf->obj); 1994 else 1995 i915_vma_unpin_iomap(ringbuf->vma); 1996 ringbuf->vaddr = NULL; 1997 1998 i915_gem_object_ggtt_unpin(ringbuf->obj); 1999 ringbuf->vma = NULL; 2000 } 2001 2002 int intel_pin_and_map_ringbuffer_obj(struct drm_i915_private *dev_priv, 2003 struct intel_ringbuffer *ringbuf) 2004 { 2005 struct drm_i915_gem_object *obj = ringbuf->obj; 2006 /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ 2007 unsigned flags = PIN_OFFSET_BIAS | 4096; 2008 void *addr; 2009 int ret; 2010 2011 if (HAS_LLC(dev_priv) && !obj->stolen) { 2012 ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, flags); 2013 if (ret) 2014 return ret; 2015 2016 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2017 if (ret) 2018 goto err_unpin; 2019 2020 addr = i915_gem_object_pin_map(obj); 2021 if (IS_ERR(addr)) { 2022 ret = PTR_ERR(addr); 2023 goto err_unpin; 2024 } 2025 } else { 2026 ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, 2027 flags | PIN_MAPPABLE); 2028 if (ret) 2029 return ret; 2030 2031 ret = i915_gem_object_set_to_gtt_domain(obj, true); 2032 if (ret) 2033 goto err_unpin; 2034 2035 /* Access through the GTT requires the device to be awake. */ 2036 assert_rpm_wakelock_held(dev_priv); 2037 2038 addr = (void __force *) 2039 i915_vma_pin_iomap(i915_gem_obj_to_ggtt(obj)); 2040 if (IS_ERR(addr)) { 2041 ret = PTR_ERR(addr); 2042 goto err_unpin; 2043 } 2044 } 2045 2046 ringbuf->vaddr = addr; 2047 ringbuf->vma = i915_gem_obj_to_ggtt(obj); 2048 return 0; 2049 2050 err_unpin: 2051 i915_gem_object_ggtt_unpin(obj); 2052 return ret; 2053 } 2054 2055 static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) 2056 { 2057 i915_gem_object_put(ringbuf->obj); 2058 ringbuf->obj = NULL; 2059 } 2060 2061 static int intel_alloc_ringbuffer_obj(struct drm_device *dev, 2062 struct intel_ringbuffer *ringbuf) 2063 { 2064 struct drm_i915_gem_object *obj; 2065 2066 obj = NULL; 2067 if (!HAS_LLC(dev)) 2068 obj = i915_gem_object_create_stolen(dev, ringbuf->size); 2069 if (obj == NULL) 2070 obj = i915_gem_object_create(dev, ringbuf->size); 2071 if (IS_ERR(obj)) 2072 return PTR_ERR(obj); 2073 2074 /* mark ring buffers as read-only from GPU side by default */ 2075 obj->gt_ro = 1; 2076 2077 ringbuf->obj = obj; 2078 2079 return 0; 2080 } 2081 2082 struct intel_ringbuffer * 2083 intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size) 2084 { 2085 struct intel_ringbuffer *ring; 2086 int ret; 2087 2088 ring = kzalloc(sizeof(*ring), GFP_KERNEL); 2089 if (ring == NULL) { 2090 DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n", 2091 engine->name); 2092 return ERR_PTR(-ENOMEM); 2093 } 2094 2095 ring->engine = engine; 2096 list_add(&ring->link, &engine->buffers); 2097 2098 ring->size = size; 2099 /* Workaround an erratum on the i830 which causes a hang if 2100 * the TAIL pointer points to within the last 2 cachelines 2101 * of the buffer. 2102 */ 2103 ring->effective_size = size; 2104 if (IS_I830(engine->i915) || IS_845G(engine->i915)) 2105 ring->effective_size -= 2 * CACHELINE_BYTES; 2106 2107 ring->last_retired_head = -1; 2108 intel_ring_update_space(ring); 2109 2110 ret = intel_alloc_ringbuffer_obj(&engine->i915->drm, ring); 2111 if (ret) { 2112 DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s: %d\n", 2113 engine->name, ret); 2114 list_del(&ring->link); 2115 kfree(ring); 2116 return ERR_PTR(ret); 2117 } 2118 2119 return ring; 2120 } 2121 2122 void 2123 intel_ringbuffer_free(struct intel_ringbuffer *ring) 2124 { 2125 intel_destroy_ringbuffer_obj(ring); 2126 list_del(&ring->link); 2127 kfree(ring); 2128 } 2129 2130 static int intel_ring_context_pin(struct i915_gem_context *ctx, 2131 struct intel_engine_cs *engine) 2132 { 2133 struct intel_context *ce = &ctx->engine[engine->id]; 2134 int ret; 2135 2136 lockdep_assert_held(&ctx->i915->drm.struct_mutex); 2137 2138 if (ce->pin_count++) 2139 return 0; 2140 2141 if (ce->state) { 2142 ret = i915_gem_obj_ggtt_pin(ce->state, ctx->ggtt_alignment, 0); 2143 if (ret) 2144 goto error; 2145 } 2146 2147 /* The kernel context is only used as a placeholder for flushing the 2148 * active context. It is never used for submitting user rendering and 2149 * as such never requires the golden render context, and so we can skip 2150 * emitting it when we switch to the kernel context. This is required 2151 * as during eviction we cannot allocate and pin the renderstate in 2152 * order to initialise the context. 2153 */ 2154 if (ctx == ctx->i915->kernel_context) 2155 ce->initialised = true; 2156 2157 i915_gem_context_get(ctx); 2158 return 0; 2159 2160 error: 2161 ce->pin_count = 0; 2162 return ret; 2163 } 2164 2165 static void intel_ring_context_unpin(struct i915_gem_context *ctx, 2166 struct intel_engine_cs *engine) 2167 { 2168 struct intel_context *ce = &ctx->engine[engine->id]; 2169 2170 lockdep_assert_held(&ctx->i915->drm.struct_mutex); 2171 2172 if (--ce->pin_count) 2173 return; 2174 2175 if (ce->state) 2176 i915_gem_object_ggtt_unpin(ce->state); 2177 2178 i915_gem_context_put(ctx); 2179 } 2180 2181 static int intel_init_ring_buffer(struct intel_engine_cs *engine) 2182 { 2183 struct drm_i915_private *dev_priv = engine->i915; 2184 struct intel_ringbuffer *ringbuf; 2185 int ret; 2186 2187 WARN_ON(engine->buffer); 2188 2189 intel_engine_setup_common(engine); 2190 2191 memset(engine->semaphore.sync_seqno, 0, 2192 sizeof(engine->semaphore.sync_seqno)); 2193 2194 ret = intel_engine_init_common(engine); 2195 if (ret) 2196 goto error; 2197 2198 /* We may need to do things with the shrinker which 2199 * require us to immediately switch back to the default 2200 * context. This can cause a problem as pinning the 2201 * default context also requires GTT space which may not 2202 * be available. To avoid this we always pin the default 2203 * context. 2204 */ 2205 ret = intel_ring_context_pin(dev_priv->kernel_context, engine); 2206 if (ret) 2207 goto error; 2208 2209 ringbuf = intel_engine_create_ringbuffer(engine, 32 * PAGE_SIZE); 2210 if (IS_ERR(ringbuf)) { 2211 ret = PTR_ERR(ringbuf); 2212 goto error; 2213 } 2214 engine->buffer = ringbuf; 2215 2216 if (I915_NEED_GFX_HWS(dev_priv)) { 2217 ret = init_status_page(engine); 2218 if (ret) 2219 goto error; 2220 } else { 2221 WARN_ON(engine->id != RCS); 2222 ret = init_phys_status_page(engine); 2223 if (ret) 2224 goto error; 2225 } 2226 2227 ret = intel_pin_and_map_ringbuffer_obj(dev_priv, ringbuf); 2228 if (ret) { 2229 DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n", 2230 engine->name, ret); 2231 intel_destroy_ringbuffer_obj(ringbuf); 2232 goto error; 2233 } 2234 2235 return 0; 2236 2237 error: 2238 intel_cleanup_engine(engine); 2239 return ret; 2240 } 2241 2242 void intel_cleanup_engine(struct intel_engine_cs *engine) 2243 { 2244 struct drm_i915_private *dev_priv; 2245 2246 if (!intel_engine_initialized(engine)) 2247 return; 2248 2249 dev_priv = engine->i915; 2250 2251 if (engine->buffer) { 2252 intel_stop_engine(engine); 2253 WARN_ON(!IS_GEN2(dev_priv) && (I915_READ_MODE(engine) & MODE_IDLE) == 0); 2254 2255 intel_unpin_ringbuffer_obj(engine->buffer); 2256 intel_ringbuffer_free(engine->buffer); 2257 engine->buffer = NULL; 2258 } 2259 2260 if (engine->cleanup) 2261 engine->cleanup(engine); 2262 2263 if (I915_NEED_GFX_HWS(dev_priv)) { 2264 cleanup_status_page(engine); 2265 } else { 2266 WARN_ON(engine->id != RCS); 2267 cleanup_phys_status_page(engine); 2268 } 2269 2270 i915_cmd_parser_fini_ring(engine); 2271 i915_gem_batch_pool_fini(&engine->batch_pool); 2272 intel_engine_fini_breadcrumbs(engine); 2273 2274 intel_ring_context_unpin(dev_priv->kernel_context, engine); 2275 2276 engine->i915 = NULL; 2277 } 2278 2279 int intel_engine_idle(struct intel_engine_cs *engine) 2280 { 2281 struct drm_i915_gem_request *req; 2282 2283 /* Wait upon the last request to be completed */ 2284 if (list_empty(&engine->request_list)) 2285 return 0; 2286 2287 req = list_entry(engine->request_list.prev, 2288 struct drm_i915_gem_request, 2289 list); 2290 2291 /* Make sure we do not trigger any retires */ 2292 return __i915_wait_request(req, 2293 req->i915->mm.interruptible, 2294 NULL, NULL); 2295 } 2296 2297 int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request) 2298 { 2299 int ret; 2300 2301 /* Flush enough space to reduce the likelihood of waiting after 2302 * we start building the request - in which case we will just 2303 * have to repeat work. 2304 */ 2305 request->reserved_space += LEGACY_REQUEST_SIZE; 2306 2307 request->ringbuf = request->engine->buffer; 2308 2309 ret = intel_ring_begin(request, 0); 2310 if (ret) 2311 return ret; 2312 2313 request->reserved_space -= LEGACY_REQUEST_SIZE; 2314 return 0; 2315 } 2316 2317 static int wait_for_space(struct drm_i915_gem_request *req, int bytes) 2318 { 2319 struct intel_ringbuffer *ringbuf = req->ringbuf; 2320 struct intel_engine_cs *engine = req->engine; 2321 struct drm_i915_gem_request *target; 2322 2323 intel_ring_update_space(ringbuf); 2324 if (ringbuf->space >= bytes) 2325 return 0; 2326 2327 /* 2328 * Space is reserved in the ringbuffer for finalising the request, 2329 * as that cannot be allowed to fail. During request finalisation, 2330 * reserved_space is set to 0 to stop the overallocation and the 2331 * assumption is that then we never need to wait (which has the 2332 * risk of failing with EINTR). 2333 * 2334 * See also i915_gem_request_alloc() and i915_add_request(). 2335 */ 2336 GEM_BUG_ON(!req->reserved_space); 2337 2338 list_for_each_entry(target, &engine->request_list, list) { 2339 unsigned space; 2340 2341 /* 2342 * The request queue is per-engine, so can contain requests 2343 * from multiple ringbuffers. Here, we must ignore any that 2344 * aren't from the ringbuffer we're considering. 2345 */ 2346 if (target->ringbuf != ringbuf) 2347 continue; 2348 2349 /* Would completion of this request free enough space? */ 2350 space = __intel_ring_space(target->postfix, ringbuf->tail, 2351 ringbuf->size); 2352 if (space >= bytes) 2353 break; 2354 } 2355 2356 if (WARN_ON(&target->list == &engine->request_list)) 2357 return -ENOSPC; 2358 2359 return i915_wait_request(target); 2360 } 2361 2362 int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) 2363 { 2364 struct intel_ringbuffer *ringbuf = req->ringbuf; 2365 int remain_actual = ringbuf->size - ringbuf->tail; 2366 int remain_usable = ringbuf->effective_size - ringbuf->tail; 2367 int bytes = num_dwords * sizeof(u32); 2368 int total_bytes, wait_bytes; 2369 bool need_wrap = false; 2370 2371 total_bytes = bytes + req->reserved_space; 2372 2373 if (unlikely(bytes > remain_usable)) { 2374 /* 2375 * Not enough space for the basic request. So need to flush 2376 * out the remainder and then wait for base + reserved. 2377 */ 2378 wait_bytes = remain_actual + total_bytes; 2379 need_wrap = true; 2380 } else if (unlikely(total_bytes > remain_usable)) { 2381 /* 2382 * The base request will fit but the reserved space 2383 * falls off the end. So we don't need an immediate wrap 2384 * and only need to effectively wait for the reserved 2385 * size space from the start of ringbuffer. 2386 */ 2387 wait_bytes = remain_actual + req->reserved_space; 2388 } else { 2389 /* No wrapping required, just waiting. */ 2390 wait_bytes = total_bytes; 2391 } 2392 2393 if (wait_bytes > ringbuf->space) { 2394 int ret = wait_for_space(req, wait_bytes); 2395 if (unlikely(ret)) 2396 return ret; 2397 2398 intel_ring_update_space(ringbuf); 2399 if (unlikely(ringbuf->space < wait_bytes)) 2400 return -EAGAIN; 2401 } 2402 2403 if (unlikely(need_wrap)) { 2404 GEM_BUG_ON(remain_actual > ringbuf->space); 2405 GEM_BUG_ON(ringbuf->tail + remain_actual > ringbuf->size); 2406 2407 /* Fill the tail with MI_NOOP */ 2408 memset(ringbuf->vaddr + ringbuf->tail, 0, remain_actual); 2409 ringbuf->tail = 0; 2410 ringbuf->space -= remain_actual; 2411 } 2412 2413 ringbuf->space -= bytes; 2414 GEM_BUG_ON(ringbuf->space < 0); 2415 return 0; 2416 } 2417 2418 /* Align the ring tail to a cacheline boundary */ 2419 int intel_ring_cacheline_align(struct drm_i915_gem_request *req) 2420 { 2421 struct intel_engine_cs *engine = req->engine; 2422 int num_dwords = (engine->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); 2423 int ret; 2424 2425 if (num_dwords == 0) 2426 return 0; 2427 2428 num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords; 2429 ret = intel_ring_begin(req, num_dwords); 2430 if (ret) 2431 return ret; 2432 2433 while (num_dwords--) 2434 intel_ring_emit(engine, MI_NOOP); 2435 2436 intel_ring_advance(engine); 2437 2438 return 0; 2439 } 2440 2441 void intel_ring_init_seqno(struct intel_engine_cs *engine, u32 seqno) 2442 { 2443 struct drm_i915_private *dev_priv = engine->i915; 2444 2445 /* Our semaphore implementation is strictly monotonic (i.e. we proceed 2446 * so long as the semaphore value in the register/page is greater 2447 * than the sync value), so whenever we reset the seqno, 2448 * so long as we reset the tracking semaphore value to 0, it will 2449 * always be before the next request's seqno. If we don't reset 2450 * the semaphore value, then when the seqno moves backwards all 2451 * future waits will complete instantly (causing rendering corruption). 2452 */ 2453 if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) { 2454 I915_WRITE(RING_SYNC_0(engine->mmio_base), 0); 2455 I915_WRITE(RING_SYNC_1(engine->mmio_base), 0); 2456 if (HAS_VEBOX(dev_priv)) 2457 I915_WRITE(RING_SYNC_2(engine->mmio_base), 0); 2458 } 2459 if (dev_priv->semaphore_obj) { 2460 struct drm_i915_gem_object *obj = dev_priv->semaphore_obj; 2461 struct page *page = i915_gem_object_get_dirty_page(obj, 0); 2462 void *semaphores = kmap(page); 2463 memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), 2464 0, I915_NUM_ENGINES * gen8_semaphore_seqno_size); 2465 kunmap(page); 2466 } 2467 memset(engine->semaphore.sync_seqno, 0, 2468 sizeof(engine->semaphore.sync_seqno)); 2469 2470 intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); 2471 if (engine->irq_seqno_barrier) 2472 engine->irq_seqno_barrier(engine); 2473 engine->last_submitted_seqno = seqno; 2474 2475 engine->hangcheck.seqno = seqno; 2476 2477 /* After manually advancing the seqno, fake the interrupt in case 2478 * there are any waiters for that seqno. 2479 */ 2480 rcu_read_lock(); 2481 intel_engine_wakeup(engine); 2482 rcu_read_unlock(); 2483 } 2484 2485 static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine, 2486 u32 value) 2487 { 2488 struct drm_i915_private *dev_priv = engine->i915; 2489 2490 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 2491 2492 /* Every tail move must follow the sequence below */ 2493 2494 /* Disable notification that the ring is IDLE. The GT 2495 * will then assume that it is busy and bring it out of rc6. 2496 */ 2497 I915_WRITE_FW(GEN6_BSD_SLEEP_PSMI_CONTROL, 2498 _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 2499 2500 /* Clear the context id. Here be magic! */ 2501 I915_WRITE64_FW(GEN6_BSD_RNCID, 0x0); 2502 2503 /* Wait for the ring not to be idle, i.e. for it to wake up. */ 2504 if (intel_wait_for_register_fw(dev_priv, 2505 GEN6_BSD_SLEEP_PSMI_CONTROL, 2506 GEN6_BSD_SLEEP_INDICATOR, 2507 0, 2508 50)) 2509 DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); 2510 2511 /* Now that the ring is fully powered up, update the tail */ 2512 I915_WRITE_FW(RING_TAIL(engine->mmio_base), value); 2513 POSTING_READ_FW(RING_TAIL(engine->mmio_base)); 2514 2515 /* Let the ring send IDLE messages to the GT again, 2516 * and so let it sleep to conserve power when idle. 2517 */ 2518 I915_WRITE_FW(GEN6_BSD_SLEEP_PSMI_CONTROL, 2519 _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 2520 2521 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 2522 } 2523 2524 static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, 2525 u32 invalidate, u32 flush) 2526 { 2527 struct intel_engine_cs *engine = req->engine; 2528 uint32_t cmd; 2529 int ret; 2530 2531 ret = intel_ring_begin(req, 4); 2532 if (ret) 2533 return ret; 2534 2535 cmd = MI_FLUSH_DW; 2536 if (INTEL_GEN(req->i915) >= 8) 2537 cmd += 1; 2538 2539 /* We always require a command barrier so that subsequent 2540 * commands, such as breadcrumb interrupts, are strictly ordered 2541 * wrt the contents of the write cache being flushed to memory 2542 * (and thus being coherent from the CPU). 2543 */ 2544 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; 2545 2546 /* 2547 * Bspec vol 1c.5 - video engine command streamer: 2548 * "If ENABLED, all TLBs will be invalidated once the flush 2549 * operation is complete. This bit is only valid when the 2550 * Post-Sync Operation field is a value of 1h or 3h." 2551 */ 2552 if (invalidate & I915_GEM_GPU_DOMAINS) 2553 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; 2554 2555 intel_ring_emit(engine, cmd); 2556 intel_ring_emit(engine, 2557 I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); 2558 if (INTEL_GEN(req->i915) >= 8) { 2559 intel_ring_emit(engine, 0); /* upper addr */ 2560 intel_ring_emit(engine, 0); /* value */ 2561 } else { 2562 intel_ring_emit(engine, 0); 2563 intel_ring_emit(engine, MI_NOOP); 2564 } 2565 intel_ring_advance(engine); 2566 return 0; 2567 } 2568 2569 static int 2570 gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, 2571 u64 offset, u32 len, 2572 unsigned dispatch_flags) 2573 { 2574 struct intel_engine_cs *engine = req->engine; 2575 bool ppgtt = USES_PPGTT(engine->dev) && 2576 !(dispatch_flags & I915_DISPATCH_SECURE); 2577 int ret; 2578 2579 ret = intel_ring_begin(req, 4); 2580 if (ret) 2581 return ret; 2582 2583 /* FIXME(BDW): Address space and security selectors. */ 2584 intel_ring_emit(engine, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | 2585 (dispatch_flags & I915_DISPATCH_RS ? 2586 MI_BATCH_RESOURCE_STREAMER : 0)); 2587 intel_ring_emit(engine, lower_32_bits(offset)); 2588 intel_ring_emit(engine, upper_32_bits(offset)); 2589 intel_ring_emit(engine, MI_NOOP); 2590 intel_ring_advance(engine); 2591 2592 return 0; 2593 } 2594 2595 static int 2596 hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, 2597 u64 offset, u32 len, 2598 unsigned dispatch_flags) 2599 { 2600 struct intel_engine_cs *engine = req->engine; 2601 int ret; 2602 2603 ret = intel_ring_begin(req, 2); 2604 if (ret) 2605 return ret; 2606 2607 intel_ring_emit(engine, 2608 MI_BATCH_BUFFER_START | 2609 (dispatch_flags & I915_DISPATCH_SECURE ? 2610 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | 2611 (dispatch_flags & I915_DISPATCH_RS ? 2612 MI_BATCH_RESOURCE_STREAMER : 0)); 2613 /* bit0-7 is the length on GEN6+ */ 2614 intel_ring_emit(engine, offset); 2615 intel_ring_advance(engine); 2616 2617 return 0; 2618 } 2619 2620 static int 2621 gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, 2622 u64 offset, u32 len, 2623 unsigned dispatch_flags) 2624 { 2625 struct intel_engine_cs *engine = req->engine; 2626 int ret; 2627 2628 ret = intel_ring_begin(req, 2); 2629 if (ret) 2630 return ret; 2631 2632 intel_ring_emit(engine, 2633 MI_BATCH_BUFFER_START | 2634 (dispatch_flags & I915_DISPATCH_SECURE ? 2635 0 : MI_BATCH_NON_SECURE_I965)); 2636 /* bit0-7 is the length on GEN6+ */ 2637 intel_ring_emit(engine, offset); 2638 intel_ring_advance(engine); 2639 2640 return 0; 2641 } 2642 2643 /* Blitter support (SandyBridge+) */ 2644 2645 static int gen6_ring_flush(struct drm_i915_gem_request *req, 2646 u32 invalidate, u32 flush) 2647 { 2648 struct intel_engine_cs *engine = req->engine; 2649 uint32_t cmd; 2650 int ret; 2651 2652 ret = intel_ring_begin(req, 4); 2653 if (ret) 2654 return ret; 2655 2656 cmd = MI_FLUSH_DW; 2657 if (INTEL_GEN(req->i915) >= 8) 2658 cmd += 1; 2659 2660 /* We always require a command barrier so that subsequent 2661 * commands, such as breadcrumb interrupts, are strictly ordered 2662 * wrt the contents of the write cache being flushed to memory 2663 * (and thus being coherent from the CPU). 2664 */ 2665 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; 2666 2667 /* 2668 * Bspec vol 1c.3 - blitter engine command streamer: 2669 * "If ENABLED, all TLBs will be invalidated once the flush 2670 * operation is complete. This bit is only valid when the 2671 * Post-Sync Operation field is a value of 1h or 3h." 2672 */ 2673 if (invalidate & I915_GEM_DOMAIN_RENDER) 2674 cmd |= MI_INVALIDATE_TLB; 2675 intel_ring_emit(engine, cmd); 2676 intel_ring_emit(engine, 2677 I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); 2678 if (INTEL_GEN(req->i915) >= 8) { 2679 intel_ring_emit(engine, 0); /* upper addr */ 2680 intel_ring_emit(engine, 0); /* value */ 2681 } else { 2682 intel_ring_emit(engine, 0); 2683 intel_ring_emit(engine, MI_NOOP); 2684 } 2685 intel_ring_advance(engine); 2686 2687 return 0; 2688 } 2689 2690 static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, 2691 struct intel_engine_cs *engine) 2692 { 2693 struct drm_i915_gem_object *obj; 2694 int ret, i; 2695 2696 if (!i915.semaphores) 2697 return; 2698 2699 if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore_obj) { 2700 obj = i915_gem_object_create(&dev_priv->drm, 4096); 2701 if (IS_ERR(obj)) { 2702 DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n"); 2703 i915.semaphores = 0; 2704 } else { 2705 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 2706 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_NONBLOCK); 2707 if (ret != 0) { 2708 i915_gem_object_put(obj); 2709 DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n"); 2710 i915.semaphores = 0; 2711 } else { 2712 dev_priv->semaphore_obj = obj; 2713 } 2714 } 2715 } 2716 2717 if (!i915.semaphores) 2718 return; 2719 2720 if (INTEL_GEN(dev_priv) >= 8) { 2721 u64 offset = i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj); 2722 2723 engine->semaphore.sync_to = gen8_ring_sync; 2724 engine->semaphore.signal = gen8_xcs_signal; 2725 2726 for (i = 0; i < I915_NUM_ENGINES; i++) { 2727 u64 ring_offset; 2728 2729 if (i != engine->id) 2730 ring_offset = offset + GEN8_SEMAPHORE_OFFSET(engine->id, i); 2731 else 2732 ring_offset = MI_SEMAPHORE_SYNC_INVALID; 2733 2734 engine->semaphore.signal_ggtt[i] = ring_offset; 2735 } 2736 } else if (INTEL_GEN(dev_priv) >= 6) { 2737 engine->semaphore.sync_to = gen6_ring_sync; 2738 engine->semaphore.signal = gen6_signal; 2739 2740 /* 2741 * The current semaphore is only applied on pre-gen8 2742 * platform. And there is no VCS2 ring on the pre-gen8 2743 * platform. So the semaphore between RCS and VCS2 is 2744 * initialized as INVALID. Gen8 will initialize the 2745 * sema between VCS2 and RCS later. 2746 */ 2747 for (i = 0; i < I915_NUM_ENGINES; i++) { 2748 static const struct { 2749 u32 wait_mbox; 2750 i915_reg_t mbox_reg; 2751 } sem_data[I915_NUM_ENGINES][I915_NUM_ENGINES] = { 2752 [RCS] = { 2753 [VCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC }, 2754 [BCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC }, 2755 [VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC }, 2756 }, 2757 [VCS] = { 2758 [RCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC }, 2759 [BCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC }, 2760 [VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC }, 2761 }, 2762 [BCS] = { 2763 [RCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC }, 2764 [VCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC }, 2765 [VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC }, 2766 }, 2767 [VECS] = { 2768 [RCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC }, 2769 [VCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC }, 2770 [BCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC }, 2771 }, 2772 }; 2773 u32 wait_mbox; 2774 i915_reg_t mbox_reg; 2775 2776 if (i == engine->id || i == VCS2) { 2777 wait_mbox = MI_SEMAPHORE_SYNC_INVALID; 2778 mbox_reg = GEN6_NOSYNC; 2779 } else { 2780 wait_mbox = sem_data[engine->id][i].wait_mbox; 2781 mbox_reg = sem_data[engine->id][i].mbox_reg; 2782 } 2783 2784 engine->semaphore.mbox.wait[i] = wait_mbox; 2785 engine->semaphore.mbox.signal[i] = mbox_reg; 2786 } 2787 } 2788 } 2789 2790 static void intel_ring_init_irq(struct drm_i915_private *dev_priv, 2791 struct intel_engine_cs *engine) 2792 { 2793 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << engine->irq_shift; 2794 2795 if (INTEL_GEN(dev_priv) >= 8) { 2796 engine->irq_enable = gen8_irq_enable; 2797 engine->irq_disable = gen8_irq_disable; 2798 engine->irq_seqno_barrier = gen6_seqno_barrier; 2799 } else if (INTEL_GEN(dev_priv) >= 6) { 2800 engine->irq_enable = gen6_irq_enable; 2801 engine->irq_disable = gen6_irq_disable; 2802 engine->irq_seqno_barrier = gen6_seqno_barrier; 2803 } else if (INTEL_GEN(dev_priv) >= 5) { 2804 engine->irq_enable = gen5_irq_enable; 2805 engine->irq_disable = gen5_irq_disable; 2806 engine->irq_seqno_barrier = gen5_seqno_barrier; 2807 } else if (INTEL_GEN(dev_priv) >= 3) { 2808 engine->irq_enable = i9xx_irq_enable; 2809 engine->irq_disable = i9xx_irq_disable; 2810 } else { 2811 engine->irq_enable = i8xx_irq_enable; 2812 engine->irq_disable = i8xx_irq_disable; 2813 } 2814 } 2815 2816 static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, 2817 struct intel_engine_cs *engine) 2818 { 2819 engine->init_hw = init_ring_common; 2820 engine->write_tail = ring_write_tail; 2821 2822 engine->add_request = i9xx_add_request; 2823 if (INTEL_GEN(dev_priv) >= 6) 2824 engine->add_request = gen6_add_request; 2825 2826 if (INTEL_GEN(dev_priv) >= 8) 2827 engine->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; 2828 else if (INTEL_GEN(dev_priv) >= 6) 2829 engine->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; 2830 else if (INTEL_GEN(dev_priv) >= 4) 2831 engine->dispatch_execbuffer = i965_dispatch_execbuffer; 2832 else if (IS_I830(dev_priv) || IS_845G(dev_priv)) 2833 engine->dispatch_execbuffer = i830_dispatch_execbuffer; 2834 else 2835 engine->dispatch_execbuffer = i915_dispatch_execbuffer; 2836 2837 intel_ring_init_irq(dev_priv, engine); 2838 intel_ring_init_semaphores(dev_priv, engine); 2839 } 2840 2841 int intel_init_render_ring_buffer(struct intel_engine_cs *engine) 2842 { 2843 struct drm_i915_private *dev_priv = engine->i915; 2844 int ret; 2845 2846 intel_ring_default_vfuncs(dev_priv, engine); 2847 2848 if (HAS_L3_DPF(dev_priv)) 2849 engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT; 2850 2851 if (INTEL_GEN(dev_priv) >= 8) { 2852 engine->init_context = intel_rcs_ctx_init; 2853 engine->add_request = gen8_render_add_request; 2854 engine->flush = gen8_render_ring_flush; 2855 if (i915.semaphores) 2856 engine->semaphore.signal = gen8_rcs_signal; 2857 } else if (INTEL_GEN(dev_priv) >= 6) { 2858 engine->init_context = intel_rcs_ctx_init; 2859 engine->flush = gen7_render_ring_flush; 2860 if (IS_GEN6(dev_priv)) 2861 engine->flush = gen6_render_ring_flush; 2862 } else if (IS_GEN5(dev_priv)) { 2863 engine->flush = gen4_render_ring_flush; 2864 } else { 2865 if (INTEL_GEN(dev_priv) < 4) 2866 engine->flush = gen2_render_ring_flush; 2867 else 2868 engine->flush = gen4_render_ring_flush; 2869 engine->irq_enable_mask = I915_USER_INTERRUPT; 2870 } 2871 2872 if (IS_HASWELL(dev_priv)) 2873 engine->dispatch_execbuffer = hsw_ring_dispatch_execbuffer; 2874 2875 engine->init_hw = init_render_ring; 2876 engine->cleanup = render_ring_cleanup; 2877 2878 ret = intel_init_ring_buffer(engine); 2879 if (ret) 2880 return ret; 2881 2882 if (INTEL_GEN(dev_priv) >= 6) { 2883 ret = intel_init_pipe_control(engine, 4096); 2884 if (ret) 2885 return ret; 2886 } else if (HAS_BROKEN_CS_TLB(dev_priv)) { 2887 ret = intel_init_pipe_control(engine, I830_WA_SIZE); 2888 if (ret) 2889 return ret; 2890 } 2891 2892 return 0; 2893 } 2894 2895 int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) 2896 { 2897 struct drm_i915_private *dev_priv = engine->i915; 2898 2899 intel_ring_default_vfuncs(dev_priv, engine); 2900 2901 if (INTEL_GEN(dev_priv) >= 6) { 2902 /* gen6 bsd needs a special wa for tail updates */ 2903 if (IS_GEN6(dev_priv)) 2904 engine->write_tail = gen6_bsd_ring_write_tail; 2905 engine->flush = gen6_bsd_ring_flush; 2906 if (INTEL_GEN(dev_priv) < 8) 2907 engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; 2908 } else { 2909 engine->mmio_base = BSD_RING_BASE; 2910 engine->flush = bsd_ring_flush; 2911 if (IS_GEN5(dev_priv)) 2912 engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT; 2913 else 2914 engine->irq_enable_mask = I915_BSD_USER_INTERRUPT; 2915 } 2916 2917 return intel_init_ring_buffer(engine); 2918 } 2919 2920 /** 2921 * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3) 2922 */ 2923 int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine) 2924 { 2925 struct drm_i915_private *dev_priv = engine->i915; 2926 2927 intel_ring_default_vfuncs(dev_priv, engine); 2928 2929 engine->flush = gen6_bsd_ring_flush; 2930 2931 return intel_init_ring_buffer(engine); 2932 } 2933 2934 int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) 2935 { 2936 struct drm_i915_private *dev_priv = engine->i915; 2937 2938 intel_ring_default_vfuncs(dev_priv, engine); 2939 2940 engine->flush = gen6_ring_flush; 2941 if (INTEL_GEN(dev_priv) < 8) 2942 engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; 2943 2944 return intel_init_ring_buffer(engine); 2945 } 2946 2947 int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) 2948 { 2949 struct drm_i915_private *dev_priv = engine->i915; 2950 2951 intel_ring_default_vfuncs(dev_priv, engine); 2952 2953 engine->flush = gen6_ring_flush; 2954 2955 if (INTEL_GEN(dev_priv) < 8) { 2956 engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; 2957 engine->irq_enable = hsw_vebox_irq_enable; 2958 engine->irq_disable = hsw_vebox_irq_disable; 2959 } 2960 2961 return intel_init_ring_buffer(engine); 2962 } 2963 2964 int 2965 intel_ring_flush_all_caches(struct drm_i915_gem_request *req) 2966 { 2967 struct intel_engine_cs *engine = req->engine; 2968 int ret; 2969 2970 if (!engine->gpu_caches_dirty) 2971 return 0; 2972 2973 ret = engine->flush(req, 0, I915_GEM_GPU_DOMAINS); 2974 if (ret) 2975 return ret; 2976 2977 trace_i915_gem_ring_flush(req, 0, I915_GEM_GPU_DOMAINS); 2978 2979 engine->gpu_caches_dirty = false; 2980 return 0; 2981 } 2982 2983 int 2984 intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req) 2985 { 2986 struct intel_engine_cs *engine = req->engine; 2987 uint32_t flush_domains; 2988 int ret; 2989 2990 flush_domains = 0; 2991 if (engine->gpu_caches_dirty) 2992 flush_domains = I915_GEM_GPU_DOMAINS; 2993 2994 ret = engine->flush(req, I915_GEM_GPU_DOMAINS, flush_domains); 2995 if (ret) 2996 return ret; 2997 2998 trace_i915_gem_ring_flush(req, I915_GEM_GPU_DOMAINS, flush_domains); 2999 3000 engine->gpu_caches_dirty = false; 3001 return 0; 3002 } 3003 3004 void 3005 intel_stop_engine(struct intel_engine_cs *engine) 3006 { 3007 int ret; 3008 3009 if (!intel_engine_initialized(engine)) 3010 return; 3011 3012 ret = intel_engine_idle(engine); 3013 if (ret) 3014 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", 3015 engine->name, ret); 3016 3017 stop_ring(engine); 3018 } 3019