1 /* 2 * Copyright © 2008-2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Zou Nan hai <nanhai.zou@intel.com> 26 * Xiang Hai hao<haihao.xiang@intel.com> 27 * 28 */ 29 30 #include <drm/drmP.h> 31 #include <drm/i915_drm.h> 32 #include "i915_drv.h" 33 #include "intel_drv.h" 34 #include "intel_ringbuffer.h" 35 36 /* 37 * 965+ support PIPE_CONTROL commands, which provide finer grained control 38 * over cache flushing. 39 */ 40 struct pipe_control { 41 struct drm_i915_gem_object *obj; 42 volatile u32 *cpu_page; 43 u32 gtt_offset; 44 }; 45 46 static inline int ring_space(struct intel_ring_buffer *ring) 47 { 48 int space = (ring->head & HEAD_ADDR) - (ring->tail + I915_RING_FREE_SPACE); 49 if (space < 0) 50 space += ring->size; 51 return space; 52 } 53 54 static int 55 gen2_render_ring_flush(struct intel_ring_buffer *ring, 56 u32 invalidate_domains, 57 u32 flush_domains) 58 { 59 u32 cmd; 60 int ret; 61 62 cmd = MI_FLUSH; 63 if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0) 64 cmd |= MI_NO_WRITE_FLUSH; 65 66 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) 67 cmd |= MI_READ_FLUSH; 68 69 ret = intel_ring_begin(ring, 2); 70 if (ret) 71 return ret; 72 73 intel_ring_emit(ring, cmd); 74 intel_ring_emit(ring, MI_NOOP); 75 intel_ring_advance(ring); 76 77 return 0; 78 } 79 80 static int 81 gen4_render_ring_flush(struct intel_ring_buffer *ring, 82 u32 invalidate_domains, 83 u32 flush_domains) 84 { 85 struct drm_device *dev = ring->dev; 86 u32 cmd; 87 int ret; 88 89 /* 90 * read/write caches: 91 * 92 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 93 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 94 * also flushed at 2d versus 3d pipeline switches. 95 * 96 * read-only caches: 97 * 98 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 99 * MI_READ_FLUSH is set, and is always flushed on 965. 100 * 101 * I915_GEM_DOMAIN_COMMAND may not exist? 102 * 103 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 104 * invalidated when MI_EXE_FLUSH is set. 105 * 106 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 107 * invalidated with every MI_FLUSH. 108 * 109 * TLBs: 110 * 111 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 112 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 113 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 114 * are flushed at any MI_FLUSH. 115 */ 116 117 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 118 if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) 119 cmd &= ~MI_NO_WRITE_FLUSH; 120 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) 121 cmd |= MI_EXE_FLUSH; 122 123 if (invalidate_domains & I915_GEM_DOMAIN_COMMAND && 124 (IS_G4X(dev) || IS_GEN5(dev))) 125 cmd |= MI_INVALIDATE_ISP; 126 127 ret = intel_ring_begin(ring, 2); 128 if (ret) 129 return ret; 130 131 intel_ring_emit(ring, cmd); 132 intel_ring_emit(ring, MI_NOOP); 133 intel_ring_advance(ring); 134 135 return 0; 136 } 137 138 /** 139 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for 140 * implementing two workarounds on gen6. From section 1.4.7.1 141 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: 142 * 143 * [DevSNB-C+{W/A}] Before any depth stall flush (including those 144 * produced by non-pipelined state commands), software needs to first 145 * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 146 * 0. 147 * 148 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable 149 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. 150 * 151 * And the workaround for these two requires this workaround first: 152 * 153 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 154 * BEFORE the pipe-control with a post-sync op and no write-cache 155 * flushes. 156 * 157 * And this last workaround is tricky because of the requirements on 158 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM 159 * volume 2 part 1: 160 * 161 * "1 of the following must also be set: 162 * - Render Target Cache Flush Enable ([12] of DW1) 163 * - Depth Cache Flush Enable ([0] of DW1) 164 * - Stall at Pixel Scoreboard ([1] of DW1) 165 * - Depth Stall ([13] of DW1) 166 * - Post-Sync Operation ([13] of DW1) 167 * - Notify Enable ([8] of DW1)" 168 * 169 * The cache flushes require the workaround flush that triggered this 170 * one, so we can't use it. Depth stall would trigger the same. 171 * Post-sync nonzero is what triggered this second workaround, so we 172 * can't use that one either. Notify enable is IRQs, which aren't 173 * really our business. That leaves only stall at scoreboard. 174 */ 175 static int 176 intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring) 177 { 178 struct pipe_control *pc = ring->private; 179 u32 scratch_addr = pc->gtt_offset + 128; 180 int ret; 181 182 183 ret = intel_ring_begin(ring, 6); 184 if (ret) 185 return ret; 186 187 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 188 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | 189 PIPE_CONTROL_STALL_AT_SCOREBOARD); 190 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 191 intel_ring_emit(ring, 0); /* low dword */ 192 intel_ring_emit(ring, 0); /* high dword */ 193 intel_ring_emit(ring, MI_NOOP); 194 intel_ring_advance(ring); 195 196 ret = intel_ring_begin(ring, 6); 197 if (ret) 198 return ret; 199 200 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 201 intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE); 202 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 203 intel_ring_emit(ring, 0); 204 intel_ring_emit(ring, 0); 205 intel_ring_emit(ring, MI_NOOP); 206 intel_ring_advance(ring); 207 208 return 0; 209 } 210 211 static int 212 gen6_render_ring_flush(struct intel_ring_buffer *ring, 213 u32 invalidate_domains, u32 flush_domains) 214 { 215 u32 flags = 0; 216 struct pipe_control *pc = ring->private; 217 u32 scratch_addr = pc->gtt_offset + 128; 218 int ret; 219 220 /* Force SNB workarounds for PIPE_CONTROL flushes */ 221 ret = intel_emit_post_sync_nonzero_flush(ring); 222 if (ret) 223 return ret; 224 225 /* Just flush everything. Experiments have shown that reducing the 226 * number of bits based on the write domains has little performance 227 * impact. 228 */ 229 if (flush_domains) { 230 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 231 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 232 /* 233 * Ensure that any following seqno writes only happen 234 * when the render cache is indeed flushed. 235 */ 236 flags |= PIPE_CONTROL_CS_STALL; 237 } 238 if (invalidate_domains) { 239 flags |= PIPE_CONTROL_TLB_INVALIDATE; 240 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 241 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 242 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 243 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 244 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 245 /* 246 * TLB invalidate requires a post-sync write. 247 */ 248 flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; 249 } 250 251 ret = intel_ring_begin(ring, 4); 252 if (ret) 253 return ret; 254 255 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); 256 intel_ring_emit(ring, flags); 257 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); 258 intel_ring_emit(ring, 0); 259 intel_ring_advance(ring); 260 261 return 0; 262 } 263 264 static int 265 gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring) 266 { 267 int ret; 268 269 ret = intel_ring_begin(ring, 4); 270 if (ret) 271 return ret; 272 273 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); 274 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | 275 PIPE_CONTROL_STALL_AT_SCOREBOARD); 276 intel_ring_emit(ring, 0); 277 intel_ring_emit(ring, 0); 278 intel_ring_advance(ring); 279 280 return 0; 281 } 282 283 static int 284 gen7_render_ring_flush(struct intel_ring_buffer *ring, 285 u32 invalidate_domains, u32 flush_domains) 286 { 287 u32 flags = 0; 288 struct pipe_control *pc = ring->private; 289 u32 scratch_addr = pc->gtt_offset + 128; 290 int ret; 291 292 /* 293 * Ensure that any following seqno writes only happen when the render 294 * cache is indeed flushed. 295 * 296 * Workaround: 4th PIPE_CONTROL command (except the ones with only 297 * read-cache invalidate bits set) must have the CS_STALL bit set. We 298 * don't try to be clever and just set it unconditionally. 299 */ 300 flags |= PIPE_CONTROL_CS_STALL; 301 302 /* Just flush everything. Experiments have shown that reducing the 303 * number of bits based on the write domains has little performance 304 * impact. 305 */ 306 if (flush_domains) { 307 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 308 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 309 } 310 if (invalidate_domains) { 311 flags |= PIPE_CONTROL_TLB_INVALIDATE; 312 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 313 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 314 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 315 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 316 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 317 /* 318 * TLB invalidate requires a post-sync write. 319 */ 320 flags |= PIPE_CONTROL_QW_WRITE; 321 322 /* Workaround: we must issue a pipe_control with CS-stall bit 323 * set before a pipe_control command that has the state cache 324 * invalidate bit set. */ 325 gen7_render_ring_cs_stall_wa(ring); 326 } 327 328 ret = intel_ring_begin(ring, 4); 329 if (ret) 330 return ret; 331 332 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); 333 intel_ring_emit(ring, flags); 334 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); 335 intel_ring_emit(ring, 0); 336 intel_ring_advance(ring); 337 338 return 0; 339 } 340 341 static void ring_write_tail(struct intel_ring_buffer *ring, 342 u32 value) 343 { 344 drm_i915_private_t *dev_priv = ring->dev->dev_private; 345 I915_WRITE_TAIL(ring, value); 346 } 347 348 u32 intel_ring_get_active_head(struct intel_ring_buffer *ring) 349 { 350 drm_i915_private_t *dev_priv = ring->dev->dev_private; 351 u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ? 352 RING_ACTHD(ring->mmio_base) : ACTHD; 353 354 return I915_READ(acthd_reg); 355 } 356 357 static int init_ring_common(struct intel_ring_buffer *ring) 358 { 359 struct drm_device *dev = ring->dev; 360 drm_i915_private_t *dev_priv = dev->dev_private; 361 struct drm_i915_gem_object *obj = ring->obj; 362 int ret = 0; 363 u32 head; 364 365 if (HAS_FORCE_WAKE(dev)) 366 gen6_gt_force_wake_get(dev_priv); 367 368 /* Stop the ring if it's running. */ 369 I915_WRITE_CTL(ring, 0); 370 I915_WRITE_HEAD(ring, 0); 371 ring->write_tail(ring, 0); 372 373 head = I915_READ_HEAD(ring) & HEAD_ADDR; 374 375 /* G45 ring initialization fails to reset head to zero */ 376 if (head != 0) { 377 DRM_DEBUG_KMS("%s head not reset to zero " 378 "ctl %08x head %08x tail %08x start %08x\n", 379 ring->name, 380 I915_READ_CTL(ring), 381 I915_READ_HEAD(ring), 382 I915_READ_TAIL(ring), 383 I915_READ_START(ring)); 384 385 I915_WRITE_HEAD(ring, 0); 386 387 if (I915_READ_HEAD(ring) & HEAD_ADDR) { 388 DRM_ERROR("failed to set %s head to zero " 389 "ctl %08x head %08x tail %08x start %08x\n", 390 ring->name, 391 I915_READ_CTL(ring), 392 I915_READ_HEAD(ring), 393 I915_READ_TAIL(ring), 394 I915_READ_START(ring)); 395 } 396 } 397 398 /* Initialize the ring. This must happen _after_ we've cleared the ring 399 * registers with the above sequence (the readback of the HEAD registers 400 * also enforces ordering), otherwise the hw might lose the new ring 401 * register values. */ 402 I915_WRITE_START(ring, obj->gtt_offset); 403 I915_WRITE_CTL(ring, 404 ((ring->size - PAGE_SIZE) & RING_NR_PAGES) 405 | RING_VALID); 406 407 /* If the head is still not zero, the ring is dead */ 408 if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 && 409 I915_READ_START(ring) == obj->gtt_offset && 410 (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) { 411 DRM_ERROR("%s initialization failed " 412 "ctl %08x head %08x tail %08x start %08x\n", 413 ring->name, 414 I915_READ_CTL(ring), 415 I915_READ_HEAD(ring), 416 I915_READ_TAIL(ring), 417 I915_READ_START(ring)); 418 ret = -EIO; 419 goto out; 420 } 421 422 if (!drm_core_check_feature(ring->dev, DRIVER_MODESET)) 423 i915_kernel_lost_context(ring->dev); 424 else { 425 ring->head = I915_READ_HEAD(ring); 426 ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR; 427 ring->space = ring_space(ring); 428 ring->last_retired_head = -1; 429 } 430 431 out: 432 if (HAS_FORCE_WAKE(dev)) 433 gen6_gt_force_wake_put(dev_priv); 434 435 return ret; 436 } 437 438 static int 439 init_pipe_control(struct intel_ring_buffer *ring) 440 { 441 struct pipe_control *pc; 442 struct drm_i915_gem_object *obj; 443 int ret; 444 445 if (ring->private) 446 return 0; 447 448 pc = kmalloc(sizeof(*pc), M_DRM, M_WAITOK); 449 if (!pc) 450 return -ENOMEM; 451 452 obj = i915_gem_alloc_object(ring->dev, 4096); 453 if (obj == NULL) { 454 DRM_ERROR("Failed to allocate seqno page\n"); 455 ret = -ENOMEM; 456 goto err; 457 } 458 459 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 460 461 ret = i915_gem_object_pin(obj, 4096, true, false); 462 if (ret) 463 goto err_unref; 464 465 pc->gtt_offset = obj->gtt_offset; 466 pc->cpu_page = (uint32_t *)kmem_alloc_nofault(&kernel_map, PAGE_SIZE, PAGE_SIZE); 467 if (pc->cpu_page == NULL) 468 goto err_unpin; 469 pmap_qenter((uintptr_t)pc->cpu_page, &obj->pages[0], 1); 470 pmap_invalidate_cache_range((vm_offset_t)pc->cpu_page, 471 (vm_offset_t)pc->cpu_page + PAGE_SIZE); 472 473 pc->obj = obj; 474 ring->private = pc; 475 return 0; 476 477 err_unpin: 478 i915_gem_object_unpin(obj); 479 err_unref: 480 drm_gem_object_unreference(&obj->base); 481 err: 482 kfree(pc, M_DRM); 483 return ret; 484 } 485 486 static void 487 cleanup_pipe_control(struct intel_ring_buffer *ring) 488 { 489 struct pipe_control *pc = ring->private; 490 struct drm_i915_gem_object *obj; 491 492 if (!ring->private) 493 return; 494 495 obj = pc->obj; 496 pmap_qremove((vm_offset_t)pc->cpu_page, 1); 497 kmem_free(&kernel_map, (uintptr_t)pc->cpu_page, PAGE_SIZE); 498 i915_gem_object_unpin(obj); 499 drm_gem_object_unreference(&obj->base); 500 501 kfree(pc, M_DRM); 502 ring->private = NULL; 503 } 504 505 static int init_render_ring(struct intel_ring_buffer *ring) 506 { 507 struct drm_device *dev = ring->dev; 508 struct drm_i915_private *dev_priv = dev->dev_private; 509 int ret = init_ring_common(ring); 510 511 if (INTEL_INFO(dev)->gen > 3) 512 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); 513 514 /* We need to disable the AsyncFlip performance optimisations in order 515 * to use MI_WAIT_FOR_EVENT within the CS. It should already be 516 * programmed to '1' on all products. 517 */ 518 if (INTEL_INFO(dev)->gen >= 6) 519 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); 520 521 /* Required for the hardware to program scanline values for waiting */ 522 if (INTEL_INFO(dev)->gen == 6) 523 I915_WRITE(GFX_MODE, 524 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_ALWAYS)); 525 526 if (IS_GEN7(dev)) 527 I915_WRITE(GFX_MODE_GEN7, 528 _MASKED_BIT_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) | 529 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); 530 531 if (INTEL_INFO(dev)->gen >= 5) { 532 ret = init_pipe_control(ring); 533 if (ret) 534 return ret; 535 } 536 537 if (IS_GEN6(dev)) { 538 /* From the Sandybridge PRM, volume 1 part 3, page 24: 539 * "If this bit is set, STCunit will have LRA as replacement 540 * policy. [...] This bit must be reset. LRA replacement 541 * policy is not supported." 542 */ 543 I915_WRITE(CACHE_MODE_0, 544 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); 545 546 /* This is not explicitly set for GEN6, so read the register. 547 * see intel_ring_mi_set_context() for why we care. 548 * TODO: consider explicitly setting the bit for GEN5 549 */ 550 ring->itlb_before_ctx_switch = 551 !!(I915_READ(GFX_MODE) & GFX_TLB_INVALIDATE_ALWAYS); 552 } 553 554 if (INTEL_INFO(dev)->gen >= 6) 555 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); 556 557 if (HAS_L3_GPU_CACHE(dev)) 558 I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR); 559 560 return ret; 561 } 562 563 static void render_ring_cleanup(struct intel_ring_buffer *ring) 564 { 565 struct drm_device *dev = ring->dev; 566 567 if (!ring->private) 568 return; 569 570 if (HAS_BROKEN_CS_TLB(dev)) 571 drm_gem_object_unreference(to_gem_object(ring->private)); 572 573 cleanup_pipe_control(ring); 574 } 575 576 static void 577 update_mboxes(struct intel_ring_buffer *ring, 578 u32 mmio_offset) 579 { 580 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 581 intel_ring_emit(ring, mmio_offset); 582 intel_ring_emit(ring, ring->outstanding_lazy_request); 583 } 584 585 /** 586 * gen6_add_request - Update the semaphore mailbox registers 587 * 588 * @ring - ring that is adding a request 589 * @seqno - return seqno stuck into the ring 590 * 591 * Update the mailbox registers in the *other* rings with the current seqno. 592 * This acts like a signal in the canonical semaphore. 593 */ 594 static int 595 gen6_add_request(struct intel_ring_buffer *ring) 596 { 597 u32 mbox1_reg; 598 u32 mbox2_reg; 599 int ret; 600 601 ret = intel_ring_begin(ring, 10); 602 if (ret) 603 return ret; 604 605 mbox1_reg = ring->signal_mbox[0]; 606 mbox2_reg = ring->signal_mbox[1]; 607 608 update_mboxes(ring, mbox1_reg); 609 update_mboxes(ring, mbox2_reg); 610 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 611 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 612 intel_ring_emit(ring, ring->outstanding_lazy_request); 613 intel_ring_emit(ring, MI_USER_INTERRUPT); 614 intel_ring_advance(ring); 615 616 return 0; 617 } 618 619 /** 620 * intel_ring_sync - sync the waiter to the signaller on seqno 621 * 622 * @waiter - ring that is waiting 623 * @signaller - ring which has, or will signal 624 * @seqno - seqno which the waiter will block on 625 */ 626 static int 627 gen6_ring_sync(struct intel_ring_buffer *waiter, 628 struct intel_ring_buffer *signaller, 629 u32 seqno) 630 { 631 int ret; 632 u32 dw1 = MI_SEMAPHORE_MBOX | 633 MI_SEMAPHORE_COMPARE | 634 MI_SEMAPHORE_REGISTER; 635 636 /* Throughout all of the GEM code, seqno passed implies our current 637 * seqno is >= the last seqno executed. However for hardware the 638 * comparison is strictly greater than. 639 */ 640 seqno -= 1; 641 642 WARN_ON(signaller->semaphore_register[waiter->id] == 643 MI_SEMAPHORE_SYNC_INVALID); 644 645 ret = intel_ring_begin(waiter, 4); 646 if (ret) 647 return ret; 648 649 intel_ring_emit(waiter, 650 dw1 | signaller->semaphore_register[waiter->id]); 651 intel_ring_emit(waiter, seqno); 652 intel_ring_emit(waiter, 0); 653 intel_ring_emit(waiter, MI_NOOP); 654 intel_ring_advance(waiter); 655 656 return 0; 657 } 658 659 #define PIPE_CONTROL_FLUSH(ring__, addr__) \ 660 do { \ 661 intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | \ 662 PIPE_CONTROL_DEPTH_STALL); \ 663 intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT); \ 664 intel_ring_emit(ring__, 0); \ 665 intel_ring_emit(ring__, 0); \ 666 } while (0) 667 668 static int 669 pc_render_add_request(struct intel_ring_buffer *ring) 670 { 671 struct pipe_control *pc = ring->private; 672 u32 scratch_addr = pc->gtt_offset + 128; 673 int ret; 674 675 /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently 676 * incoherent with writes to memory, i.e. completely fubar, 677 * so we need to use PIPE_NOTIFY instead. 678 * 679 * However, we also need to workaround the qword write 680 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to 681 * memory before requesting an interrupt. 682 */ 683 ret = intel_ring_begin(ring, 32); 684 if (ret) 685 return ret; 686 687 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | 688 PIPE_CONTROL_WRITE_FLUSH | 689 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); 690 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 691 intel_ring_emit(ring, ring->outstanding_lazy_request); 692 intel_ring_emit(ring, 0); 693 PIPE_CONTROL_FLUSH(ring, scratch_addr); 694 scratch_addr += 128; /* write to separate cachelines */ 695 PIPE_CONTROL_FLUSH(ring, scratch_addr); 696 scratch_addr += 128; 697 PIPE_CONTROL_FLUSH(ring, scratch_addr); 698 scratch_addr += 128; 699 PIPE_CONTROL_FLUSH(ring, scratch_addr); 700 scratch_addr += 128; 701 PIPE_CONTROL_FLUSH(ring, scratch_addr); 702 scratch_addr += 128; 703 PIPE_CONTROL_FLUSH(ring, scratch_addr); 704 705 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | 706 PIPE_CONTROL_WRITE_FLUSH | 707 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 708 PIPE_CONTROL_NOTIFY); 709 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 710 intel_ring_emit(ring, ring->outstanding_lazy_request); 711 intel_ring_emit(ring, 0); 712 intel_ring_advance(ring); 713 714 return 0; 715 } 716 717 static u32 718 gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) 719 { 720 /* Workaround to force correct ordering between irq and seqno writes on 721 * ivb (and maybe also on snb) by reading from a CS register (like 722 * ACTHD) before reading the status page. */ 723 if (!lazy_coherency) 724 intel_ring_get_active_head(ring); 725 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 726 } 727 728 static u32 729 ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) 730 { 731 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 732 } 733 734 static u32 735 pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) 736 { 737 struct pipe_control *pc = ring->private; 738 return pc->cpu_page[0]; 739 } 740 741 static bool 742 gen5_ring_get_irq(struct intel_ring_buffer *ring) 743 { 744 struct drm_device *dev = ring->dev; 745 drm_i915_private_t *dev_priv = dev->dev_private; 746 747 if (!dev->irq_enabled) 748 return false; 749 750 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 751 if (ring->irq_refcount++ == 0) { 752 dev_priv->gt_irq_mask &= ~ring->irq_enable_mask; 753 I915_WRITE(GTIMR, dev_priv->gt_irq_mask); 754 POSTING_READ(GTIMR); 755 } 756 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 757 758 return true; 759 } 760 761 static void 762 gen5_ring_put_irq(struct intel_ring_buffer *ring) 763 { 764 struct drm_device *dev = ring->dev; 765 drm_i915_private_t *dev_priv = dev->dev_private; 766 767 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 768 if (--ring->irq_refcount == 0) { 769 dev_priv->gt_irq_mask |= ring->irq_enable_mask; 770 I915_WRITE(GTIMR, dev_priv->gt_irq_mask); 771 POSTING_READ(GTIMR); 772 } 773 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 774 } 775 776 static bool 777 i9xx_ring_get_irq(struct intel_ring_buffer *ring) 778 { 779 struct drm_device *dev = ring->dev; 780 drm_i915_private_t *dev_priv = dev->dev_private; 781 782 if (!dev->irq_enabled) 783 return false; 784 785 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 786 if (ring->irq_refcount++ == 0) { 787 dev_priv->irq_mask &= ~ring->irq_enable_mask; 788 I915_WRITE(IMR, dev_priv->irq_mask); 789 POSTING_READ(IMR); 790 } 791 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 792 793 return true; 794 } 795 796 static void 797 i9xx_ring_put_irq(struct intel_ring_buffer *ring) 798 { 799 struct drm_device *dev = ring->dev; 800 drm_i915_private_t *dev_priv = dev->dev_private; 801 802 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 803 if (--ring->irq_refcount == 0) { 804 dev_priv->irq_mask |= ring->irq_enable_mask; 805 I915_WRITE(IMR, dev_priv->irq_mask); 806 POSTING_READ(IMR); 807 } 808 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 809 } 810 811 static bool 812 i8xx_ring_get_irq(struct intel_ring_buffer *ring) 813 { 814 struct drm_device *dev = ring->dev; 815 drm_i915_private_t *dev_priv = dev->dev_private; 816 817 if (!dev->irq_enabled) 818 return false; 819 820 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 821 if (ring->irq_refcount++ == 0) { 822 dev_priv->irq_mask &= ~ring->irq_enable_mask; 823 I915_WRITE16(IMR, dev_priv->irq_mask); 824 POSTING_READ16(IMR); 825 } 826 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 827 828 return true; 829 } 830 831 static void 832 i8xx_ring_put_irq(struct intel_ring_buffer *ring) 833 { 834 struct drm_device *dev = ring->dev; 835 drm_i915_private_t *dev_priv = dev->dev_private; 836 837 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 838 if (--ring->irq_refcount == 0) { 839 dev_priv->irq_mask |= ring->irq_enable_mask; 840 I915_WRITE16(IMR, dev_priv->irq_mask); 841 POSTING_READ16(IMR); 842 } 843 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 844 } 845 846 void intel_ring_setup_status_page(struct intel_ring_buffer *ring) 847 { 848 struct drm_device *dev = ring->dev; 849 drm_i915_private_t *dev_priv = ring->dev->dev_private; 850 u32 mmio = 0; 851 852 /* The ring status page addresses are no longer next to the rest of 853 * the ring registers as of gen7. 854 */ 855 if (IS_GEN7(dev)) { 856 switch (ring->id) { 857 case RCS: 858 mmio = RENDER_HWS_PGA_GEN7; 859 break; 860 case BCS: 861 mmio = BLT_HWS_PGA_GEN7; 862 break; 863 case VCS: 864 mmio = BSD_HWS_PGA_GEN7; 865 break; 866 } 867 } else if (IS_GEN6(ring->dev)) { 868 mmio = RING_HWS_PGA_GEN6(ring->mmio_base); 869 } else { 870 mmio = RING_HWS_PGA(ring->mmio_base); 871 } 872 873 I915_WRITE(mmio, (u32)ring->status_page.gfx_addr); 874 POSTING_READ(mmio); 875 } 876 877 static int 878 bsd_ring_flush(struct intel_ring_buffer *ring, 879 u32 invalidate_domains, 880 u32 flush_domains) 881 { 882 int ret; 883 884 ret = intel_ring_begin(ring, 2); 885 if (ret) 886 return ret; 887 888 intel_ring_emit(ring, MI_FLUSH); 889 intel_ring_emit(ring, MI_NOOP); 890 intel_ring_advance(ring); 891 return 0; 892 } 893 894 static int 895 i9xx_add_request(struct intel_ring_buffer *ring) 896 { 897 int ret; 898 899 ret = intel_ring_begin(ring, 4); 900 if (ret) 901 return ret; 902 903 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 904 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 905 intel_ring_emit(ring, ring->outstanding_lazy_request); 906 intel_ring_emit(ring, MI_USER_INTERRUPT); 907 intel_ring_advance(ring); 908 909 return 0; 910 } 911 912 static bool 913 gen6_ring_get_irq(struct intel_ring_buffer *ring) 914 { 915 struct drm_device *dev = ring->dev; 916 drm_i915_private_t *dev_priv = dev->dev_private; 917 918 if (!dev->irq_enabled) 919 return false; 920 921 /* It looks like we need to prevent the gt from suspending while waiting 922 * for an notifiy irq, otherwise irqs seem to get lost on at least the 923 * blt/bsd rings on ivb. */ 924 gen6_gt_force_wake_get(dev_priv); 925 926 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 927 if (ring->irq_refcount++ == 0) { 928 if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS) 929 I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | 930 GEN6_RENDER_L3_PARITY_ERROR)); 931 else 932 I915_WRITE_IMR(ring, ~ring->irq_enable_mask); 933 dev_priv->gt_irq_mask &= ~ring->irq_enable_mask; 934 I915_WRITE(GTIMR, dev_priv->gt_irq_mask); 935 POSTING_READ(GTIMR); 936 } 937 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 938 939 return true; 940 } 941 942 static void 943 gen6_ring_put_irq(struct intel_ring_buffer *ring) 944 { 945 struct drm_device *dev = ring->dev; 946 drm_i915_private_t *dev_priv = dev->dev_private; 947 948 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 949 if (--ring->irq_refcount == 0) { 950 if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS) 951 I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR); 952 else 953 I915_WRITE_IMR(ring, ~0); 954 dev_priv->gt_irq_mask |= ring->irq_enable_mask; 955 I915_WRITE(GTIMR, dev_priv->gt_irq_mask); 956 POSTING_READ(GTIMR); 957 } 958 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 959 960 gen6_gt_force_wake_put(dev_priv); 961 } 962 963 static int 964 i965_dispatch_execbuffer(struct intel_ring_buffer *ring, 965 u32 offset, u32 length, 966 unsigned flags) 967 { 968 int ret; 969 970 ret = intel_ring_begin(ring, 2); 971 if (ret) 972 return ret; 973 974 intel_ring_emit(ring, 975 MI_BATCH_BUFFER_START | 976 MI_BATCH_GTT | 977 (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); 978 intel_ring_emit(ring, offset); 979 intel_ring_advance(ring); 980 981 return 0; 982 } 983 984 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */ 985 #define I830_BATCH_LIMIT (256*1024) 986 static int 987 i830_dispatch_execbuffer(struct intel_ring_buffer *ring, 988 u32 offset, u32 len, 989 unsigned flags) 990 { 991 int ret; 992 993 if (flags & I915_DISPATCH_PINNED) { 994 ret = intel_ring_begin(ring, 4); 995 if (ret) 996 return ret; 997 998 intel_ring_emit(ring, MI_BATCH_BUFFER); 999 intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); 1000 intel_ring_emit(ring, offset + len - 8); 1001 intel_ring_emit(ring, MI_NOOP); 1002 intel_ring_advance(ring); 1003 } else { 1004 struct drm_i915_gem_object *obj = ring->private; 1005 u32 cs_offset = obj->gtt_offset; 1006 1007 if (len > I830_BATCH_LIMIT) 1008 return -ENOSPC; 1009 1010 ret = intel_ring_begin(ring, 9+3); 1011 if (ret) 1012 return ret; 1013 /* Blit the batch (which has now all relocs applied) to the stable batch 1014 * scratch bo area (so that the CS never stumbles over its tlb 1015 * invalidation bug) ... */ 1016 intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD | 1017 XY_SRC_COPY_BLT_WRITE_ALPHA | 1018 XY_SRC_COPY_BLT_WRITE_RGB); 1019 intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096); 1020 intel_ring_emit(ring, 0); 1021 intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024); 1022 intel_ring_emit(ring, cs_offset); 1023 intel_ring_emit(ring, 0); 1024 intel_ring_emit(ring, 4096); 1025 intel_ring_emit(ring, offset); 1026 intel_ring_emit(ring, MI_FLUSH); 1027 1028 /* ... and execute it. */ 1029 intel_ring_emit(ring, MI_BATCH_BUFFER); 1030 intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); 1031 intel_ring_emit(ring, cs_offset + len - 8); 1032 intel_ring_advance(ring); 1033 } 1034 1035 return 0; 1036 } 1037 1038 static int 1039 i915_dispatch_execbuffer(struct intel_ring_buffer *ring, 1040 u32 offset, u32 len, 1041 unsigned flags) 1042 { 1043 int ret; 1044 1045 ret = intel_ring_begin(ring, 2); 1046 if (ret) 1047 return ret; 1048 1049 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); 1050 intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); 1051 intel_ring_advance(ring); 1052 1053 return 0; 1054 } 1055 1056 static void cleanup_status_page(struct intel_ring_buffer *ring) 1057 { 1058 struct drm_i915_gem_object *obj; 1059 1060 obj = ring->status_page.obj; 1061 if (obj == NULL) 1062 return; 1063 1064 pmap_qremove((vm_offset_t)ring->status_page.page_addr, 1); 1065 kmem_free(&kernel_map, (vm_offset_t)ring->status_page.page_addr, 1066 PAGE_SIZE); 1067 i915_gem_object_unpin(obj); 1068 drm_gem_object_unreference(&obj->base); 1069 ring->status_page.obj = NULL; 1070 } 1071 1072 static int init_status_page(struct intel_ring_buffer *ring) 1073 { 1074 struct drm_device *dev = ring->dev; 1075 struct drm_i915_gem_object *obj; 1076 int ret; 1077 1078 obj = i915_gem_alloc_object(dev, 4096); 1079 if (obj == NULL) { 1080 DRM_ERROR("Failed to allocate status page\n"); 1081 ret = -ENOMEM; 1082 goto err; 1083 } 1084 1085 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 1086 1087 ret = i915_gem_object_pin(obj, 4096, true, false); 1088 if (ret != 0) { 1089 goto err_unref; 1090 } 1091 1092 ring->status_page.gfx_addr = obj->gtt_offset; 1093 ring->status_page.page_addr = (void *)kmem_alloc_nofault(&kernel_map, 1094 PAGE_SIZE, PAGE_SIZE); 1095 if (ring->status_page.page_addr == NULL) { 1096 ret = -ENOMEM; 1097 goto err_unpin; 1098 } 1099 pmap_qenter((vm_offset_t)ring->status_page.page_addr, &obj->pages[0], 1100 1); 1101 pmap_invalidate_cache_range((vm_offset_t)ring->status_page.page_addr, 1102 (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE); 1103 ring->status_page.obj = obj; 1104 memset(ring->status_page.page_addr, 0, PAGE_SIZE); 1105 1106 intel_ring_setup_status_page(ring); 1107 DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", 1108 ring->name, ring->status_page.gfx_addr); 1109 1110 return 0; 1111 1112 err_unpin: 1113 i915_gem_object_unpin(obj); 1114 err_unref: 1115 drm_gem_object_unreference(&obj->base); 1116 err: 1117 return ret; 1118 } 1119 1120 static int init_phys_hws_pga(struct intel_ring_buffer *ring) 1121 { 1122 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1123 u32 addr; 1124 1125 if (!dev_priv->status_page_dmah) { 1126 dev_priv->status_page_dmah = 1127 drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE, ~0); 1128 if (!dev_priv->status_page_dmah) 1129 return -ENOMEM; 1130 } 1131 1132 addr = dev_priv->status_page_dmah->busaddr; 1133 if (INTEL_INFO(ring->dev)->gen >= 4) 1134 addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0; 1135 I915_WRITE(HWS_PGA, addr); 1136 1137 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr; 1138 memset(ring->status_page.page_addr, 0, PAGE_SIZE); 1139 1140 return 0; 1141 } 1142 1143 static int intel_init_ring_buffer(struct drm_device *dev, 1144 struct intel_ring_buffer *ring) 1145 { 1146 struct drm_i915_gem_object *obj; 1147 int ret; 1148 1149 ring->dev = dev; 1150 INIT_LIST_HEAD(&ring->active_list); 1151 INIT_LIST_HEAD(&ring->request_list); 1152 ring->size = 32 * PAGE_SIZE; 1153 memset(ring->sync_seqno, 0, sizeof(ring->sync_seqno)); 1154 1155 init_waitqueue_head(&ring->irq_queue); 1156 1157 if (I915_NEED_GFX_HWS(dev)) { 1158 ret = init_status_page(ring); 1159 if (ret) 1160 return ret; 1161 } else { 1162 BUG_ON(ring->id != RCS); 1163 ret = init_phys_hws_pga(ring); 1164 if (ret) 1165 return ret; 1166 } 1167 1168 obj = i915_gem_alloc_object(dev, ring->size); 1169 if (obj == NULL) { 1170 DRM_ERROR("Failed to allocate ringbuffer\n"); 1171 ret = -ENOMEM; 1172 goto err_hws; 1173 } 1174 1175 ring->obj = obj; 1176 1177 ret = i915_gem_object_pin(obj, PAGE_SIZE, true, false); 1178 if (ret) 1179 goto err_unref; 1180 1181 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1182 if (ret) 1183 goto err_unpin; 1184 1185 ring->virtual_start = ioremap_wc(dev->agp->base + obj->gtt_offset, 1186 ring->size); 1187 if (ring->virtual_start == NULL) { 1188 DRM_ERROR("Failed to map ringbuffer.\n"); 1189 ret = -EINVAL; 1190 goto err_unpin; 1191 } 1192 1193 ret = ring->init(ring); 1194 if (ret) 1195 goto err_unmap; 1196 1197 /* Workaround an erratum on the i830 which causes a hang if 1198 * the TAIL pointer points to within the last 2 cachelines 1199 * of the buffer. 1200 */ 1201 ring->effective_size = ring->size; 1202 if (IS_I830(ring->dev) || IS_845G(ring->dev)) 1203 ring->effective_size -= 128; 1204 1205 return 0; 1206 1207 err_unmap: 1208 pmap_unmapdev((vm_offset_t)ring->virtual_start, ring->size); 1209 err_unpin: 1210 i915_gem_object_unpin(obj); 1211 err_unref: 1212 drm_gem_object_unreference(&obj->base); 1213 ring->obj = NULL; 1214 err_hws: 1215 cleanup_status_page(ring); 1216 return ret; 1217 } 1218 1219 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring) 1220 { 1221 struct drm_i915_private *dev_priv; 1222 int ret; 1223 1224 if (ring->obj == NULL) 1225 return; 1226 1227 /* Disable the ring buffer. The ring must be idle at this point */ 1228 dev_priv = ring->dev->dev_private; 1229 ret = intel_ring_idle(ring); 1230 if (ret) 1231 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", 1232 ring->name, ret); 1233 1234 I915_WRITE_CTL(ring, 0); 1235 1236 pmap_unmapdev((vm_offset_t)ring->virtual_start, ring->size); 1237 1238 i915_gem_object_unpin(ring->obj); 1239 drm_gem_object_unreference(&ring->obj->base); 1240 ring->obj = NULL; 1241 1242 if (ring->cleanup) 1243 ring->cleanup(ring); 1244 1245 cleanup_status_page(ring); 1246 } 1247 1248 static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno) 1249 { 1250 int ret; 1251 1252 ret = i915_wait_seqno(ring, seqno); 1253 if (!ret) 1254 i915_gem_retire_requests_ring(ring); 1255 1256 return ret; 1257 } 1258 1259 static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n) 1260 { 1261 struct drm_i915_gem_request *request; 1262 u32 seqno = 0; 1263 int ret; 1264 1265 i915_gem_retire_requests_ring(ring); 1266 1267 if (ring->last_retired_head != -1) { 1268 ring->head = ring->last_retired_head; 1269 ring->last_retired_head = -1; 1270 ring->space = ring_space(ring); 1271 if (ring->space >= n) 1272 return 0; 1273 } 1274 1275 list_for_each_entry(request, &ring->request_list, list) { 1276 int space; 1277 1278 if (request->tail == -1) 1279 continue; 1280 1281 space = request->tail - (ring->tail + I915_RING_FREE_SPACE); 1282 if (space < 0) 1283 space += ring->size; 1284 if (space >= n) { 1285 seqno = request->seqno; 1286 break; 1287 } 1288 1289 /* Consume this request in case we need more space than 1290 * is available and so need to prevent a race between 1291 * updating last_retired_head and direct reads of 1292 * I915_RING_HEAD. It also provides a nice sanity check. 1293 */ 1294 request->tail = -1; 1295 } 1296 1297 if (seqno == 0) 1298 return -ENOSPC; 1299 1300 ret = intel_ring_wait_seqno(ring, seqno); 1301 if (ret) 1302 return ret; 1303 1304 if (WARN_ON(ring->last_retired_head == -1)) 1305 return -ENOSPC; 1306 1307 ring->head = ring->last_retired_head; 1308 ring->last_retired_head = -1; 1309 ring->space = ring_space(ring); 1310 if (WARN_ON(ring->space < n)) 1311 return -ENOSPC; 1312 1313 return 0; 1314 } 1315 1316 static int ring_wait_for_space(struct intel_ring_buffer *ring, int n) 1317 { 1318 struct drm_device *dev = ring->dev; 1319 struct drm_i915_private *dev_priv = dev->dev_private; 1320 unsigned long end; 1321 int ret; 1322 1323 ret = intel_ring_wait_request(ring, n); 1324 if (ret != -ENOSPC) 1325 return ret; 1326 1327 /* With GEM the hangcheck timer should kick us out of the loop, 1328 * leaving it early runs the risk of corrupting GEM state (due 1329 * to running on almost untested codepaths). But on resume 1330 * timers don't work yet, so prevent a complete hang in that 1331 * case by choosing an insanely large timeout. */ 1332 end = jiffies + 60 * HZ; 1333 1334 do { 1335 ring->head = I915_READ_HEAD(ring); 1336 ring->space = ring_space(ring); 1337 if (ring->space >= n) { 1338 return 0; 1339 } 1340 1341 #if 0 1342 if (dev->primary->master) { 1343 struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; 1344 if (master_priv->sarea_priv) 1345 master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; 1346 } 1347 #else 1348 if (dev_priv->sarea_priv) 1349 dev_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; 1350 #endif 1351 1352 msleep(1); 1353 1354 ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible); 1355 if (ret) 1356 return ret; 1357 } while (!time_after(jiffies, end)); 1358 return -EBUSY; 1359 } 1360 1361 static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring) 1362 { 1363 uint32_t __iomem *virt; 1364 int rem = ring->size - ring->tail; 1365 1366 if (ring->space < rem) { 1367 int ret = ring_wait_for_space(ring, rem); 1368 if (ret) 1369 return ret; 1370 } 1371 1372 virt = (unsigned int *)((char *)ring->virtual_start + ring->tail); 1373 rem /= 4; 1374 while (rem--) 1375 iowrite32(MI_NOOP, virt++); 1376 1377 ring->tail = 0; 1378 ring->space = ring_space(ring); 1379 1380 return 0; 1381 } 1382 1383 int intel_ring_idle(struct intel_ring_buffer *ring) 1384 { 1385 u32 seqno; 1386 int ret; 1387 1388 /* We need to add any requests required to flush the objects and ring */ 1389 if (ring->outstanding_lazy_request) { 1390 ret = i915_add_request(ring, NULL, NULL); 1391 if (ret) 1392 return ret; 1393 } 1394 1395 /* Wait upon the last request to be completed */ 1396 if (list_empty(&ring->request_list)) 1397 return 0; 1398 1399 seqno = list_entry(ring->request_list.prev, 1400 struct drm_i915_gem_request, 1401 list)->seqno; 1402 1403 return i915_wait_seqno(ring, seqno); 1404 } 1405 1406 static int 1407 intel_ring_alloc_seqno(struct intel_ring_buffer *ring) 1408 { 1409 if (ring->outstanding_lazy_request) 1410 return 0; 1411 1412 return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_request); 1413 } 1414 1415 int intel_ring_begin(struct intel_ring_buffer *ring, 1416 int num_dwords) 1417 { 1418 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1419 int n = 4*num_dwords; 1420 int ret; 1421 1422 ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible); 1423 if (ret) 1424 return ret; 1425 1426 /* Preallocate the olr before touching the ring */ 1427 ret = intel_ring_alloc_seqno(ring); 1428 if (ret) 1429 return ret; 1430 1431 if (unlikely(ring->tail + n > ring->effective_size)) { 1432 ret = intel_wrap_ring_buffer(ring); 1433 if (unlikely(ret)) 1434 return ret; 1435 } 1436 1437 if (unlikely(ring->space < n)) { 1438 ret = ring_wait_for_space(ring, n); 1439 if (unlikely(ret)) 1440 return ret; 1441 } 1442 1443 ring->space -= n; 1444 return 0; 1445 } 1446 1447 void intel_ring_advance(struct intel_ring_buffer *ring) 1448 { 1449 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1450 1451 ring->tail &= ring->size - 1; 1452 if (dev_priv->stop_rings & intel_ring_flag(ring)) 1453 return; 1454 ring->write_tail(ring, ring->tail); 1455 } 1456 1457 1458 static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring, 1459 u32 value) 1460 { 1461 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1462 1463 /* Every tail move must follow the sequence below */ 1464 1465 /* Disable notification that the ring is IDLE. The GT 1466 * will then assume that it is busy and bring it out of rc6. 1467 */ 1468 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, 1469 _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 1470 1471 /* Clear the context id. Here be magic! */ 1472 I915_WRITE64(GEN6_BSD_RNCID, 0x0); 1473 1474 /* Wait for the ring not to be idle, i.e. for it to wake up. */ 1475 if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) & 1476 GEN6_BSD_SLEEP_INDICATOR) == 0, 1477 50)) 1478 DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); 1479 1480 /* Now that the ring is fully powered up, update the tail */ 1481 I915_WRITE_TAIL(ring, value); 1482 POSTING_READ(RING_TAIL(ring->mmio_base)); 1483 1484 /* Let the ring send IDLE messages to the GT again, 1485 * and so let it sleep to conserve power when idle. 1486 */ 1487 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, 1488 _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 1489 } 1490 1491 static int gen6_ring_flush(struct intel_ring_buffer *ring, 1492 u32 invalidate, u32 flush) 1493 { 1494 uint32_t cmd; 1495 int ret; 1496 1497 ret = intel_ring_begin(ring, 4); 1498 if (ret) 1499 return ret; 1500 1501 cmd = MI_FLUSH_DW; 1502 /* 1503 * Bspec vol 1c.5 - video engine command streamer: 1504 * "If ENABLED, all TLBs will be invalidated once the flush 1505 * operation is complete. This bit is only valid when the 1506 * Post-Sync Operation field is a value of 1h or 3h." 1507 */ 1508 if (invalidate & I915_GEM_GPU_DOMAINS) 1509 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD | 1510 MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; 1511 intel_ring_emit(ring, cmd); 1512 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); 1513 intel_ring_emit(ring, 0); 1514 intel_ring_emit(ring, MI_NOOP); 1515 intel_ring_advance(ring); 1516 return 0; 1517 } 1518 1519 static int 1520 hsw_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, 1521 u32 offset, u32 len, 1522 unsigned flags) 1523 { 1524 int ret; 1525 1526 ret = intel_ring_begin(ring, 2); 1527 if (ret) 1528 return ret; 1529 1530 intel_ring_emit(ring, 1531 MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW | 1532 (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW)); 1533 /* bit0-7 is the length on GEN6+ */ 1534 intel_ring_emit(ring, offset); 1535 intel_ring_advance(ring); 1536 1537 return 0; 1538 } 1539 1540 static int 1541 gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, 1542 u32 offset, u32 len, 1543 unsigned flags) 1544 { 1545 int ret; 1546 1547 ret = intel_ring_begin(ring, 2); 1548 if (ret) 1549 return ret; 1550 1551 intel_ring_emit(ring, 1552 MI_BATCH_BUFFER_START | 1553 (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); 1554 /* bit0-7 is the length on GEN6+ */ 1555 intel_ring_emit(ring, offset); 1556 intel_ring_advance(ring); 1557 1558 return 0; 1559 } 1560 1561 /* Blitter support (SandyBridge+) */ 1562 1563 static int blt_ring_flush(struct intel_ring_buffer *ring, 1564 u32 invalidate, u32 flush) 1565 { 1566 uint32_t cmd; 1567 int ret; 1568 1569 ret = intel_ring_begin(ring, 4); 1570 if (ret) 1571 return ret; 1572 1573 cmd = MI_FLUSH_DW; 1574 /* 1575 * Bspec vol 1c.3 - blitter engine command streamer: 1576 * "If ENABLED, all TLBs will be invalidated once the flush 1577 * operation is complete. This bit is only valid when the 1578 * Post-Sync Operation field is a value of 1h or 3h." 1579 */ 1580 if (invalidate & I915_GEM_DOMAIN_RENDER) 1581 cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX | 1582 MI_FLUSH_DW_OP_STOREDW; 1583 intel_ring_emit(ring, cmd); 1584 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); 1585 intel_ring_emit(ring, 0); 1586 intel_ring_emit(ring, MI_NOOP); 1587 intel_ring_advance(ring); 1588 return 0; 1589 } 1590 1591 int intel_init_render_ring_buffer(struct drm_device *dev) 1592 { 1593 drm_i915_private_t *dev_priv = dev->dev_private; 1594 struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; 1595 1596 ring->name = "render ring"; 1597 ring->id = RCS; 1598 ring->mmio_base = RENDER_RING_BASE; 1599 1600 if (INTEL_INFO(dev)->gen >= 6) { 1601 ring->add_request = gen6_add_request; 1602 ring->flush = gen7_render_ring_flush; 1603 if (INTEL_INFO(dev)->gen == 6) 1604 ring->flush = gen6_render_ring_flush; 1605 ring->irq_get = gen6_ring_get_irq; 1606 ring->irq_put = gen6_ring_put_irq; 1607 ring->irq_enable_mask = GT_USER_INTERRUPT; 1608 ring->get_seqno = gen6_ring_get_seqno; 1609 ring->sync_to = gen6_ring_sync; 1610 ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_INVALID; 1611 ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_RV; 1612 ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_RB; 1613 ring->signal_mbox[0] = GEN6_VRSYNC; 1614 ring->signal_mbox[1] = GEN6_BRSYNC; 1615 } else if (IS_GEN5(dev)) { 1616 ring->add_request = pc_render_add_request; 1617 ring->flush = gen4_render_ring_flush; 1618 ring->get_seqno = pc_render_get_seqno; 1619 ring->irq_get = gen5_ring_get_irq; 1620 ring->irq_put = gen5_ring_put_irq; 1621 ring->irq_enable_mask = GT_USER_INTERRUPT | GT_PIPE_NOTIFY; 1622 } else { 1623 ring->add_request = i9xx_add_request; 1624 if (INTEL_INFO(dev)->gen < 4) 1625 ring->flush = gen2_render_ring_flush; 1626 else 1627 ring->flush = gen4_render_ring_flush; 1628 ring->get_seqno = ring_get_seqno; 1629 if (IS_GEN2(dev)) { 1630 ring->irq_get = i8xx_ring_get_irq; 1631 ring->irq_put = i8xx_ring_put_irq; 1632 } else { 1633 ring->irq_get = i9xx_ring_get_irq; 1634 ring->irq_put = i9xx_ring_put_irq; 1635 } 1636 ring->irq_enable_mask = I915_USER_INTERRUPT; 1637 } 1638 ring->write_tail = ring_write_tail; 1639 if (IS_HASWELL(dev)) 1640 ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer; 1641 else if (INTEL_INFO(dev)->gen >= 6) 1642 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; 1643 else if (INTEL_INFO(dev)->gen >= 4) 1644 ring->dispatch_execbuffer = i965_dispatch_execbuffer; 1645 else if (IS_I830(dev) || IS_845G(dev)) 1646 ring->dispatch_execbuffer = i830_dispatch_execbuffer; 1647 else 1648 ring->dispatch_execbuffer = i915_dispatch_execbuffer; 1649 ring->init = init_render_ring; 1650 ring->cleanup = render_ring_cleanup; 1651 1652 /* Workaround batchbuffer to combat CS tlb bug. */ 1653 if (HAS_BROKEN_CS_TLB(dev)) { 1654 struct drm_i915_gem_object *obj; 1655 int ret; 1656 1657 obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT); 1658 if (obj == NULL) { 1659 DRM_ERROR("Failed to allocate batch bo\n"); 1660 return -ENOMEM; 1661 } 1662 1663 ret = i915_gem_object_pin(obj, 0, true, false); 1664 if (ret != 0) { 1665 drm_gem_object_unreference(&obj->base); 1666 DRM_ERROR("Failed to ping batch bo\n"); 1667 return ret; 1668 } 1669 1670 ring->private = obj; 1671 } 1672 1673 return intel_init_ring_buffer(dev, ring); 1674 } 1675 1676 int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size) 1677 { 1678 drm_i915_private_t *dev_priv = dev->dev_private; 1679 struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; 1680 int ret; 1681 1682 ring->name = "render ring"; 1683 ring->id = RCS; 1684 ring->mmio_base = RENDER_RING_BASE; 1685 1686 if (INTEL_INFO(dev)->gen >= 6) { 1687 /* non-kms not supported on gen6+ */ 1688 return -ENODEV; 1689 } 1690 1691 /* Note: gem is not supported on gen5/ilk without kms (the corresponding 1692 * gem_init ioctl returns with -ENODEV). Hence we do not need to set up 1693 * the special gen5 functions. */ 1694 ring->add_request = i9xx_add_request; 1695 if (INTEL_INFO(dev)->gen < 4) 1696 ring->flush = gen2_render_ring_flush; 1697 else 1698 ring->flush = gen4_render_ring_flush; 1699 ring->get_seqno = ring_get_seqno; 1700 if (IS_GEN2(dev)) { 1701 ring->irq_get = i8xx_ring_get_irq; 1702 ring->irq_put = i8xx_ring_put_irq; 1703 } else { 1704 ring->irq_get = i9xx_ring_get_irq; 1705 ring->irq_put = i9xx_ring_put_irq; 1706 } 1707 ring->irq_enable_mask = I915_USER_INTERRUPT; 1708 ring->write_tail = ring_write_tail; 1709 if (INTEL_INFO(dev)->gen >= 4) 1710 ring->dispatch_execbuffer = i965_dispatch_execbuffer; 1711 else if (IS_I830(dev) || IS_845G(dev)) 1712 ring->dispatch_execbuffer = i830_dispatch_execbuffer; 1713 else 1714 ring->dispatch_execbuffer = i915_dispatch_execbuffer; 1715 ring->init = init_render_ring; 1716 ring->cleanup = render_ring_cleanup; 1717 1718 ring->dev = dev; 1719 INIT_LIST_HEAD(&ring->active_list); 1720 INIT_LIST_HEAD(&ring->request_list); 1721 1722 ring->size = size; 1723 ring->effective_size = ring->size; 1724 if (IS_I830(ring->dev) || IS_845G(ring->dev)) 1725 ring->effective_size -= 128; 1726 1727 ring->virtual_start = ioremap_wc(start, size); 1728 if (ring->virtual_start == NULL) { 1729 DRM_ERROR("can not ioremap virtual address for" 1730 " ring buffer\n"); 1731 return -ENOMEM; 1732 } 1733 1734 if (!I915_NEED_GFX_HWS(dev)) { 1735 ret = init_phys_hws_pga(ring); 1736 if (ret) 1737 return ret; 1738 } 1739 1740 return 0; 1741 } 1742 1743 int intel_init_bsd_ring_buffer(struct drm_device *dev) 1744 { 1745 drm_i915_private_t *dev_priv = dev->dev_private; 1746 struct intel_ring_buffer *ring = &dev_priv->ring[VCS]; 1747 1748 ring->name = "bsd ring"; 1749 ring->id = VCS; 1750 1751 ring->write_tail = ring_write_tail; 1752 if (IS_GEN6(dev) || IS_GEN7(dev)) { 1753 ring->mmio_base = GEN6_BSD_RING_BASE; 1754 /* gen6 bsd needs a special wa for tail updates */ 1755 if (IS_GEN6(dev)) 1756 ring->write_tail = gen6_bsd_ring_write_tail; 1757 ring->flush = gen6_ring_flush; 1758 ring->add_request = gen6_add_request; 1759 ring->get_seqno = gen6_ring_get_seqno; 1760 ring->irq_enable_mask = GEN6_BSD_USER_INTERRUPT; 1761 ring->irq_get = gen6_ring_get_irq; 1762 ring->irq_put = gen6_ring_put_irq; 1763 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; 1764 ring->sync_to = gen6_ring_sync; 1765 ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_VR; 1766 ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_INVALID; 1767 ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_VB; 1768 ring->signal_mbox[0] = GEN6_RVSYNC; 1769 ring->signal_mbox[1] = GEN6_BVSYNC; 1770 } else { 1771 ring->mmio_base = BSD_RING_BASE; 1772 ring->flush = bsd_ring_flush; 1773 ring->add_request = i9xx_add_request; 1774 ring->get_seqno = ring_get_seqno; 1775 if (IS_GEN5(dev)) { 1776 ring->irq_enable_mask = GT_BSD_USER_INTERRUPT; 1777 ring->irq_get = gen5_ring_get_irq; 1778 ring->irq_put = gen5_ring_put_irq; 1779 } else { 1780 ring->irq_enable_mask = I915_BSD_USER_INTERRUPT; 1781 ring->irq_get = i9xx_ring_get_irq; 1782 ring->irq_put = i9xx_ring_put_irq; 1783 } 1784 ring->dispatch_execbuffer = i965_dispatch_execbuffer; 1785 } 1786 ring->init = init_ring_common; 1787 1788 return intel_init_ring_buffer(dev, ring); 1789 } 1790 1791 int intel_init_blt_ring_buffer(struct drm_device *dev) 1792 { 1793 drm_i915_private_t *dev_priv = dev->dev_private; 1794 struct intel_ring_buffer *ring = &dev_priv->ring[BCS]; 1795 1796 ring->name = "blitter ring"; 1797 ring->id = BCS; 1798 1799 ring->mmio_base = BLT_RING_BASE; 1800 ring->write_tail = ring_write_tail; 1801 ring->flush = blt_ring_flush; 1802 ring->add_request = gen6_add_request; 1803 ring->get_seqno = gen6_ring_get_seqno; 1804 ring->irq_enable_mask = GEN6_BLITTER_USER_INTERRUPT; 1805 ring->irq_get = gen6_ring_get_irq; 1806 ring->irq_put = gen6_ring_put_irq; 1807 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; 1808 ring->sync_to = gen6_ring_sync; 1809 ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_BR; 1810 ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_BV; 1811 ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_INVALID; 1812 ring->signal_mbox[0] = GEN6_RBSYNC; 1813 ring->signal_mbox[1] = GEN6_VBSYNC; 1814 ring->init = init_ring_common; 1815 1816 return intel_init_ring_buffer(dev, ring); 1817 } 1818 1819 int 1820 intel_ring_flush_all_caches(struct intel_ring_buffer *ring) 1821 { 1822 int ret; 1823 1824 if (!ring->gpu_caches_dirty) 1825 return 0; 1826 1827 ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS); 1828 if (ret) 1829 return ret; 1830 1831 ring->gpu_caches_dirty = false; 1832 return 0; 1833 } 1834 1835 int 1836 intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring) 1837 { 1838 uint32_t flush_domains; 1839 int ret; 1840 1841 flush_domains = 0; 1842 if (ring->gpu_caches_dirty) 1843 flush_domains = I915_GEM_GPU_DOMAINS; 1844 1845 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); 1846 if (ret) 1847 return ret; 1848 1849 ring->gpu_caches_dirty = false; 1850 return 0; 1851 } 1852