1 /* 2 * Copyright © 2008-2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Zou Nan hai <nanhai.zou@intel.com> 26 * Xiang Hai hao<haihao.xiang@intel.com> 27 * 28 */ 29 30 #include <drm/drmP.h> 31 #include <drm/i915_drm.h> 32 #include "i915_drv.h" 33 #include "intel_drv.h" 34 #include "intel_ringbuffer.h" 35 #include <sys/sched.h> 36 37 /* 38 * 965+ support PIPE_CONTROL commands, which provide finer grained control 39 * over cache flushing. 40 */ 41 struct pipe_control { 42 struct drm_i915_gem_object *obj; 43 volatile u32 *cpu_page; 44 u32 gtt_offset; 45 }; 46 47 static inline int ring_space(struct intel_ring_buffer *ring) 48 { 49 int space = (ring->head & HEAD_ADDR) - (ring->tail + I915_RING_FREE_SPACE); 50 if (space < 0) 51 space += ring->size; 52 return space; 53 } 54 55 static int 56 gen2_render_ring_flush(struct intel_ring_buffer *ring, 57 u32 invalidate_domains, 58 u32 flush_domains) 59 { 60 u32 cmd; 61 int ret; 62 63 cmd = MI_FLUSH; 64 if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0) 65 cmd |= MI_NO_WRITE_FLUSH; 66 67 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) 68 cmd |= MI_READ_FLUSH; 69 70 ret = intel_ring_begin(ring, 2); 71 if (ret) 72 return ret; 73 74 intel_ring_emit(ring, cmd); 75 intel_ring_emit(ring, MI_NOOP); 76 intel_ring_advance(ring); 77 78 return 0; 79 } 80 81 static int 82 gen4_render_ring_flush(struct intel_ring_buffer *ring, 83 u32 invalidate_domains, 84 u32 flush_domains) 85 { 86 struct drm_device *dev = ring->dev; 87 u32 cmd; 88 int ret; 89 90 /* 91 * read/write caches: 92 * 93 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 94 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 95 * also flushed at 2d versus 3d pipeline switches. 96 * 97 * read-only caches: 98 * 99 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 100 * MI_READ_FLUSH is set, and is always flushed on 965. 101 * 102 * I915_GEM_DOMAIN_COMMAND may not exist? 103 * 104 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 105 * invalidated when MI_EXE_FLUSH is set. 106 * 107 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 108 * invalidated with every MI_FLUSH. 109 * 110 * TLBs: 111 * 112 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 113 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 114 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 115 * are flushed at any MI_FLUSH. 116 */ 117 118 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 119 if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) 120 cmd &= ~MI_NO_WRITE_FLUSH; 121 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) 122 cmd |= MI_EXE_FLUSH; 123 124 if (invalidate_domains & I915_GEM_DOMAIN_COMMAND && 125 (IS_G4X(dev) || IS_GEN5(dev))) 126 cmd |= MI_INVALIDATE_ISP; 127 128 ret = intel_ring_begin(ring, 2); 129 if (ret) 130 return ret; 131 132 intel_ring_emit(ring, cmd); 133 intel_ring_emit(ring, MI_NOOP); 134 intel_ring_advance(ring); 135 136 return 0; 137 } 138 139 /** 140 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for 141 * implementing two workarounds on gen6. From section 1.4.7.1 142 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: 143 * 144 * [DevSNB-C+{W/A}] Before any depth stall flush (including those 145 * produced by non-pipelined state commands), software needs to first 146 * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 147 * 0. 148 * 149 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable 150 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. 151 * 152 * And the workaround for these two requires this workaround first: 153 * 154 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 155 * BEFORE the pipe-control with a post-sync op and no write-cache 156 * flushes. 157 * 158 * And this last workaround is tricky because of the requirements on 159 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM 160 * volume 2 part 1: 161 * 162 * "1 of the following must also be set: 163 * - Render Target Cache Flush Enable ([12] of DW1) 164 * - Depth Cache Flush Enable ([0] of DW1) 165 * - Stall at Pixel Scoreboard ([1] of DW1) 166 * - Depth Stall ([13] of DW1) 167 * - Post-Sync Operation ([13] of DW1) 168 * - Notify Enable ([8] of DW1)" 169 * 170 * The cache flushes require the workaround flush that triggered this 171 * one, so we can't use it. Depth stall would trigger the same. 172 * Post-sync nonzero is what triggered this second workaround, so we 173 * can't use that one either. Notify enable is IRQs, which aren't 174 * really our business. That leaves only stall at scoreboard. 175 */ 176 static int 177 intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring) 178 { 179 struct pipe_control *pc = ring->private; 180 u32 scratch_addr = pc->gtt_offset + 128; 181 int ret; 182 183 184 ret = intel_ring_begin(ring, 6); 185 if (ret) 186 return ret; 187 188 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 189 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | 190 PIPE_CONTROL_STALL_AT_SCOREBOARD); 191 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 192 intel_ring_emit(ring, 0); /* low dword */ 193 intel_ring_emit(ring, 0); /* high dword */ 194 intel_ring_emit(ring, MI_NOOP); 195 intel_ring_advance(ring); 196 197 ret = intel_ring_begin(ring, 6); 198 if (ret) 199 return ret; 200 201 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 202 intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE); 203 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 204 intel_ring_emit(ring, 0); 205 intel_ring_emit(ring, 0); 206 intel_ring_emit(ring, MI_NOOP); 207 intel_ring_advance(ring); 208 209 return 0; 210 } 211 212 static int 213 gen6_render_ring_flush(struct intel_ring_buffer *ring, 214 u32 invalidate_domains, u32 flush_domains) 215 { 216 u32 flags = 0; 217 struct pipe_control *pc = ring->private; 218 u32 scratch_addr = pc->gtt_offset + 128; 219 int ret; 220 221 /* Force SNB workarounds for PIPE_CONTROL flushes */ 222 ret = intel_emit_post_sync_nonzero_flush(ring); 223 if (ret) 224 return ret; 225 226 /* Just flush everything. Experiments have shown that reducing the 227 * number of bits based on the write domains has little performance 228 * impact. 229 */ 230 if (flush_domains) { 231 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 232 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 233 /* 234 * Ensure that any following seqno writes only happen 235 * when the render cache is indeed flushed. 236 */ 237 flags |= PIPE_CONTROL_CS_STALL; 238 } 239 if (invalidate_domains) { 240 flags |= PIPE_CONTROL_TLB_INVALIDATE; 241 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 242 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 243 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 244 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 245 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 246 /* 247 * TLB invalidate requires a post-sync write. 248 */ 249 flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; 250 } 251 252 ret = intel_ring_begin(ring, 4); 253 if (ret) 254 return ret; 255 256 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); 257 intel_ring_emit(ring, flags); 258 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); 259 intel_ring_emit(ring, 0); 260 intel_ring_advance(ring); 261 262 return 0; 263 } 264 265 static int 266 gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring) 267 { 268 int ret; 269 270 ret = intel_ring_begin(ring, 4); 271 if (ret) 272 return ret; 273 274 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); 275 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | 276 PIPE_CONTROL_STALL_AT_SCOREBOARD); 277 intel_ring_emit(ring, 0); 278 intel_ring_emit(ring, 0); 279 intel_ring_advance(ring); 280 281 return 0; 282 } 283 284 static int 285 gen7_render_ring_flush(struct intel_ring_buffer *ring, 286 u32 invalidate_domains, u32 flush_domains) 287 { 288 u32 flags = 0; 289 struct pipe_control *pc = ring->private; 290 u32 scratch_addr = pc->gtt_offset + 128; 291 int ret; 292 293 /* 294 * Ensure that any following seqno writes only happen when the render 295 * cache is indeed flushed. 296 * 297 * Workaround: 4th PIPE_CONTROL command (except the ones with only 298 * read-cache invalidate bits set) must have the CS_STALL bit set. We 299 * don't try to be clever and just set it unconditionally. 300 */ 301 flags |= PIPE_CONTROL_CS_STALL; 302 303 /* Just flush everything. Experiments have shown that reducing the 304 * number of bits based on the write domains has little performance 305 * impact. 306 */ 307 if (flush_domains) { 308 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 309 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 310 } 311 if (invalidate_domains) { 312 flags |= PIPE_CONTROL_TLB_INVALIDATE; 313 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 314 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 315 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 316 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 317 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 318 /* 319 * TLB invalidate requires a post-sync write. 320 */ 321 flags |= PIPE_CONTROL_QW_WRITE; 322 323 /* Workaround: we must issue a pipe_control with CS-stall bit 324 * set before a pipe_control command that has the state cache 325 * invalidate bit set. */ 326 gen7_render_ring_cs_stall_wa(ring); 327 } 328 329 ret = intel_ring_begin(ring, 4); 330 if (ret) 331 return ret; 332 333 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); 334 intel_ring_emit(ring, flags); 335 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); 336 intel_ring_emit(ring, 0); 337 intel_ring_advance(ring); 338 339 return 0; 340 } 341 342 static void ring_write_tail(struct intel_ring_buffer *ring, 343 u32 value) 344 { 345 drm_i915_private_t *dev_priv = ring->dev->dev_private; 346 I915_WRITE_TAIL(ring, value); 347 } 348 349 u32 intel_ring_get_active_head(struct intel_ring_buffer *ring) 350 { 351 drm_i915_private_t *dev_priv = ring->dev->dev_private; 352 u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ? 353 RING_ACTHD(ring->mmio_base) : ACTHD; 354 355 return I915_READ(acthd_reg); 356 } 357 358 static int init_ring_common(struct intel_ring_buffer *ring) 359 { 360 struct drm_device *dev = ring->dev; 361 drm_i915_private_t *dev_priv = dev->dev_private; 362 struct drm_i915_gem_object *obj = ring->obj; 363 int ret = 0; 364 u32 head; 365 366 if (HAS_FORCE_WAKE(dev)) 367 gen6_gt_force_wake_get(dev_priv); 368 369 /* Stop the ring if it's running. */ 370 I915_WRITE_CTL(ring, 0); 371 I915_WRITE_HEAD(ring, 0); 372 ring->write_tail(ring, 0); 373 374 head = I915_READ_HEAD(ring) & HEAD_ADDR; 375 376 /* G45 ring initialization fails to reset head to zero */ 377 if (head != 0) { 378 DRM_DEBUG_KMS("%s head not reset to zero " 379 "ctl %08x head %08x tail %08x start %08x\n", 380 ring->name, 381 I915_READ_CTL(ring), 382 I915_READ_HEAD(ring), 383 I915_READ_TAIL(ring), 384 I915_READ_START(ring)); 385 386 I915_WRITE_HEAD(ring, 0); 387 388 if (I915_READ_HEAD(ring) & HEAD_ADDR) { 389 DRM_ERROR("failed to set %s head to zero " 390 "ctl %08x head %08x tail %08x start %08x\n", 391 ring->name, 392 I915_READ_CTL(ring), 393 I915_READ_HEAD(ring), 394 I915_READ_TAIL(ring), 395 I915_READ_START(ring)); 396 } 397 } 398 399 /* Initialize the ring. This must happen _after_ we've cleared the ring 400 * registers with the above sequence (the readback of the HEAD registers 401 * also enforces ordering), otherwise the hw might lose the new ring 402 * register values. */ 403 I915_WRITE_START(ring, obj->gtt_offset); 404 I915_WRITE_CTL(ring, 405 ((ring->size - PAGE_SIZE) & RING_NR_PAGES) 406 | RING_VALID); 407 408 /* If the head is still not zero, the ring is dead */ 409 if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 && 410 I915_READ_START(ring) == obj->gtt_offset && 411 (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) { 412 DRM_ERROR("%s initialization failed " 413 "ctl %08x head %08x tail %08x start %08x\n", 414 ring->name, 415 I915_READ_CTL(ring), 416 I915_READ_HEAD(ring), 417 I915_READ_TAIL(ring), 418 I915_READ_START(ring)); 419 ret = -EIO; 420 goto out; 421 } 422 423 if (!drm_core_check_feature(ring->dev, DRIVER_MODESET)) 424 i915_kernel_lost_context(ring->dev); 425 else { 426 ring->head = I915_READ_HEAD(ring); 427 ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR; 428 ring->space = ring_space(ring); 429 ring->last_retired_head = -1; 430 } 431 432 out: 433 if (HAS_FORCE_WAKE(dev)) 434 gen6_gt_force_wake_put(dev_priv); 435 436 return ret; 437 } 438 439 static int 440 init_pipe_control(struct intel_ring_buffer *ring) 441 { 442 struct pipe_control *pc; 443 struct drm_i915_gem_object *obj; 444 int ret; 445 446 if (ring->private) 447 return 0; 448 449 pc = kmalloc(sizeof(*pc), DRM_I915_GEM, M_WAITOK); 450 if (!pc) 451 return -ENOMEM; 452 453 obj = i915_gem_alloc_object(ring->dev, 4096); 454 if (obj == NULL) { 455 DRM_ERROR("Failed to allocate seqno page\n"); 456 ret = -ENOMEM; 457 goto err; 458 } 459 460 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 461 462 ret = i915_gem_object_pin(obj, 4096, true); 463 if (ret) 464 goto err_unref; 465 466 pc->gtt_offset = obj->gtt_offset; 467 pc->cpu_page = (uint32_t *)kmem_alloc_nofault(&kernel_map, PAGE_SIZE, PAGE_SIZE); 468 if (pc->cpu_page == NULL) 469 goto err_unpin; 470 pmap_qenter((uintptr_t)pc->cpu_page, &obj->pages[0], 1); 471 pmap_invalidate_cache_range((vm_offset_t)pc->cpu_page, 472 (vm_offset_t)pc->cpu_page + PAGE_SIZE); 473 474 pc->obj = obj; 475 ring->private = pc; 476 return 0; 477 478 err_unpin: 479 i915_gem_object_unpin(obj); 480 err_unref: 481 drm_gem_object_unreference(&obj->base); 482 err: 483 drm_free(pc, DRM_I915_GEM); 484 return ret; 485 } 486 487 static void 488 cleanup_pipe_control(struct intel_ring_buffer *ring) 489 { 490 struct pipe_control *pc = ring->private; 491 struct drm_i915_gem_object *obj; 492 493 if (!ring->private) 494 return; 495 496 obj = pc->obj; 497 pmap_qremove((vm_offset_t)pc->cpu_page, 1); 498 kmem_free(&kernel_map, (uintptr_t)pc->cpu_page, PAGE_SIZE); 499 i915_gem_object_unpin(obj); 500 drm_gem_object_unreference(&obj->base); 501 502 drm_free(pc, DRM_I915_GEM); 503 ring->private = NULL; 504 } 505 506 static int init_render_ring(struct intel_ring_buffer *ring) 507 { 508 struct drm_device *dev = ring->dev; 509 struct drm_i915_private *dev_priv = dev->dev_private; 510 int ret = init_ring_common(ring); 511 512 if (INTEL_INFO(dev)->gen > 3) 513 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); 514 515 /* We need to disable the AsyncFlip performance optimisations in order 516 * to use MI_WAIT_FOR_EVENT within the CS. It should already be 517 * programmed to '1' on all products. 518 */ 519 if (INTEL_INFO(dev)->gen >= 6) 520 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); 521 522 /* Required for the hardware to program scanline values for waiting */ 523 if (INTEL_INFO(dev)->gen == 6) 524 I915_WRITE(GFX_MODE, 525 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_ALWAYS)); 526 527 if (IS_GEN7(dev)) 528 I915_WRITE(GFX_MODE_GEN7, 529 _MASKED_BIT_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) | 530 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); 531 532 if (INTEL_INFO(dev)->gen >= 5) { 533 ret = init_pipe_control(ring); 534 if (ret) 535 return ret; 536 } 537 538 if (IS_GEN6(dev)) { 539 /* From the Sandybridge PRM, volume 1 part 3, page 24: 540 * "If this bit is set, STCunit will have LRA as replacement 541 * policy. [...] This bit must be reset. LRA replacement 542 * policy is not supported." 543 */ 544 I915_WRITE(CACHE_MODE_0, 545 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); 546 547 /* This is not explicitly set for GEN6, so read the register. 548 * see intel_ring_mi_set_context() for why we care. 549 * TODO: consider explicitly setting the bit for GEN5 550 */ 551 ring->itlb_before_ctx_switch = 552 !!(I915_READ(GFX_MODE) & GFX_TLB_INVALIDATE_ALWAYS); 553 } 554 555 if (INTEL_INFO(dev)->gen >= 6) 556 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); 557 558 if (HAS_L3_GPU_CACHE(dev)) 559 I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR); 560 561 return ret; 562 } 563 564 static void render_ring_cleanup(struct intel_ring_buffer *ring) 565 { 566 struct drm_device *dev = ring->dev; 567 568 if (!ring->private) 569 return; 570 571 if (HAS_BROKEN_CS_TLB(dev)) 572 drm_gem_object_unreference(to_gem_object(ring->private)); 573 574 cleanup_pipe_control(ring); 575 } 576 577 static void 578 update_mboxes(struct intel_ring_buffer *ring, 579 u32 mmio_offset) 580 { 581 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 582 intel_ring_emit(ring, mmio_offset); 583 intel_ring_emit(ring, ring->outstanding_lazy_request); 584 } 585 586 /** 587 * gen6_add_request - Update the semaphore mailbox registers 588 * 589 * @ring - ring that is adding a request 590 * @seqno - return seqno stuck into the ring 591 * 592 * Update the mailbox registers in the *other* rings with the current seqno. 593 * This acts like a signal in the canonical semaphore. 594 */ 595 static int 596 gen6_add_request(struct intel_ring_buffer *ring) 597 { 598 u32 mbox1_reg; 599 u32 mbox2_reg; 600 int ret; 601 602 ret = intel_ring_begin(ring, 10); 603 if (ret) 604 return ret; 605 606 mbox1_reg = ring->signal_mbox[0]; 607 mbox2_reg = ring->signal_mbox[1]; 608 609 update_mboxes(ring, mbox1_reg); 610 update_mboxes(ring, mbox2_reg); 611 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 612 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 613 intel_ring_emit(ring, ring->outstanding_lazy_request); 614 intel_ring_emit(ring, MI_USER_INTERRUPT); 615 intel_ring_advance(ring); 616 617 return 0; 618 } 619 620 /** 621 * intel_ring_sync - sync the waiter to the signaller on seqno 622 * 623 * @waiter - ring that is waiting 624 * @signaller - ring which has, or will signal 625 * @seqno - seqno which the waiter will block on 626 */ 627 static int 628 gen6_ring_sync(struct intel_ring_buffer *waiter, 629 struct intel_ring_buffer *signaller, 630 u32 seqno) 631 { 632 int ret; 633 u32 dw1 = MI_SEMAPHORE_MBOX | 634 MI_SEMAPHORE_COMPARE | 635 MI_SEMAPHORE_REGISTER; 636 637 /* Throughout all of the GEM code, seqno passed implies our current 638 * seqno is >= the last seqno executed. However for hardware the 639 * comparison is strictly greater than. 640 */ 641 seqno -= 1; 642 643 WARN_ON(signaller->semaphore_register[waiter->id] == 644 MI_SEMAPHORE_SYNC_INVALID); 645 646 ret = intel_ring_begin(waiter, 4); 647 if (ret) 648 return ret; 649 650 intel_ring_emit(waiter, 651 dw1 | signaller->semaphore_register[waiter->id]); 652 intel_ring_emit(waiter, seqno); 653 intel_ring_emit(waiter, 0); 654 intel_ring_emit(waiter, MI_NOOP); 655 intel_ring_advance(waiter); 656 657 return 0; 658 } 659 660 #define PIPE_CONTROL_FLUSH(ring__, addr__) \ 661 do { \ 662 intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | \ 663 PIPE_CONTROL_DEPTH_STALL); \ 664 intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT); \ 665 intel_ring_emit(ring__, 0); \ 666 intel_ring_emit(ring__, 0); \ 667 } while (0) 668 669 static int 670 pc_render_add_request(struct intel_ring_buffer *ring) 671 { 672 struct pipe_control *pc = ring->private; 673 u32 scratch_addr = pc->gtt_offset + 128; 674 int ret; 675 676 /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently 677 * incoherent with writes to memory, i.e. completely fubar, 678 * so we need to use PIPE_NOTIFY instead. 679 * 680 * However, we also need to workaround the qword write 681 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to 682 * memory before requesting an interrupt. 683 */ 684 ret = intel_ring_begin(ring, 32); 685 if (ret) 686 return ret; 687 688 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | 689 PIPE_CONTROL_WRITE_FLUSH | 690 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); 691 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 692 intel_ring_emit(ring, ring->outstanding_lazy_request); 693 intel_ring_emit(ring, 0); 694 PIPE_CONTROL_FLUSH(ring, scratch_addr); 695 scratch_addr += 128; /* write to separate cachelines */ 696 PIPE_CONTROL_FLUSH(ring, scratch_addr); 697 scratch_addr += 128; 698 PIPE_CONTROL_FLUSH(ring, scratch_addr); 699 scratch_addr += 128; 700 PIPE_CONTROL_FLUSH(ring, scratch_addr); 701 scratch_addr += 128; 702 PIPE_CONTROL_FLUSH(ring, scratch_addr); 703 scratch_addr += 128; 704 PIPE_CONTROL_FLUSH(ring, scratch_addr); 705 706 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | 707 PIPE_CONTROL_WRITE_FLUSH | 708 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 709 PIPE_CONTROL_NOTIFY); 710 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 711 intel_ring_emit(ring, ring->outstanding_lazy_request); 712 intel_ring_emit(ring, 0); 713 intel_ring_advance(ring); 714 715 return 0; 716 } 717 718 static u32 719 gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) 720 { 721 /* Workaround to force correct ordering between irq and seqno writes on 722 * ivb (and maybe also on snb) by reading from a CS register (like 723 * ACTHD) before reading the status page. */ 724 if (!lazy_coherency) 725 intel_ring_get_active_head(ring); 726 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 727 } 728 729 static u32 730 ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) 731 { 732 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 733 } 734 735 static u32 736 pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) 737 { 738 struct pipe_control *pc = ring->private; 739 return pc->cpu_page[0]; 740 } 741 742 static bool 743 gen5_ring_get_irq(struct intel_ring_buffer *ring) 744 { 745 struct drm_device *dev = ring->dev; 746 drm_i915_private_t *dev_priv = dev->dev_private; 747 748 if (!dev->irq_enabled) 749 return false; 750 751 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 752 if (ring->irq_refcount++ == 0) { 753 dev_priv->gt_irq_mask &= ~ring->irq_enable_mask; 754 I915_WRITE(GTIMR, dev_priv->gt_irq_mask); 755 POSTING_READ(GTIMR); 756 } 757 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 758 759 return true; 760 } 761 762 static void 763 gen5_ring_put_irq(struct intel_ring_buffer *ring) 764 { 765 struct drm_device *dev = ring->dev; 766 drm_i915_private_t *dev_priv = dev->dev_private; 767 768 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 769 if (--ring->irq_refcount == 0) { 770 dev_priv->gt_irq_mask |= ring->irq_enable_mask; 771 I915_WRITE(GTIMR, dev_priv->gt_irq_mask); 772 POSTING_READ(GTIMR); 773 } 774 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 775 } 776 777 static bool 778 i9xx_ring_get_irq(struct intel_ring_buffer *ring) 779 { 780 struct drm_device *dev = ring->dev; 781 drm_i915_private_t *dev_priv = dev->dev_private; 782 783 if (!dev->irq_enabled) 784 return false; 785 786 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 787 if (ring->irq_refcount++ == 0) { 788 dev_priv->irq_mask &= ~ring->irq_enable_mask; 789 I915_WRITE(IMR, dev_priv->irq_mask); 790 POSTING_READ(IMR); 791 } 792 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 793 794 return true; 795 } 796 797 static void 798 i9xx_ring_put_irq(struct intel_ring_buffer *ring) 799 { 800 struct drm_device *dev = ring->dev; 801 drm_i915_private_t *dev_priv = dev->dev_private; 802 803 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 804 if (--ring->irq_refcount == 0) { 805 dev_priv->irq_mask |= ring->irq_enable_mask; 806 I915_WRITE(IMR, dev_priv->irq_mask); 807 POSTING_READ(IMR); 808 } 809 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 810 } 811 812 static bool 813 i8xx_ring_get_irq(struct intel_ring_buffer *ring) 814 { 815 struct drm_device *dev = ring->dev; 816 drm_i915_private_t *dev_priv = dev->dev_private; 817 818 if (!dev->irq_enabled) 819 return false; 820 821 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 822 if (ring->irq_refcount++ == 0) { 823 dev_priv->irq_mask &= ~ring->irq_enable_mask; 824 I915_WRITE16(IMR, dev_priv->irq_mask); 825 POSTING_READ16(IMR); 826 } 827 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 828 829 return true; 830 } 831 832 static void 833 i8xx_ring_put_irq(struct intel_ring_buffer *ring) 834 { 835 struct drm_device *dev = ring->dev; 836 drm_i915_private_t *dev_priv = dev->dev_private; 837 838 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 839 if (--ring->irq_refcount == 0) { 840 dev_priv->irq_mask |= ring->irq_enable_mask; 841 I915_WRITE16(IMR, dev_priv->irq_mask); 842 POSTING_READ16(IMR); 843 } 844 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 845 } 846 847 void intel_ring_setup_status_page(struct intel_ring_buffer *ring) 848 { 849 struct drm_device *dev = ring->dev; 850 drm_i915_private_t *dev_priv = ring->dev->dev_private; 851 u32 mmio = 0; 852 853 /* The ring status page addresses are no longer next to the rest of 854 * the ring registers as of gen7. 855 */ 856 if (IS_GEN7(dev)) { 857 switch (ring->id) { 858 case RCS: 859 mmio = RENDER_HWS_PGA_GEN7; 860 break; 861 case BCS: 862 mmio = BLT_HWS_PGA_GEN7; 863 break; 864 case VCS: 865 mmio = BSD_HWS_PGA_GEN7; 866 break; 867 } 868 } else if (IS_GEN6(ring->dev)) { 869 mmio = RING_HWS_PGA_GEN6(ring->mmio_base); 870 } else { 871 mmio = RING_HWS_PGA(ring->mmio_base); 872 } 873 874 I915_WRITE(mmio, (u32)ring->status_page.gfx_addr); 875 POSTING_READ(mmio); 876 } 877 878 static int 879 bsd_ring_flush(struct intel_ring_buffer *ring, 880 u32 invalidate_domains, 881 u32 flush_domains) 882 { 883 int ret; 884 885 ret = intel_ring_begin(ring, 2); 886 if (ret) 887 return ret; 888 889 intel_ring_emit(ring, MI_FLUSH); 890 intel_ring_emit(ring, MI_NOOP); 891 intel_ring_advance(ring); 892 return 0; 893 } 894 895 static int 896 i9xx_add_request(struct intel_ring_buffer *ring) 897 { 898 int ret; 899 900 ret = intel_ring_begin(ring, 4); 901 if (ret) 902 return ret; 903 904 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 905 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 906 intel_ring_emit(ring, ring->outstanding_lazy_request); 907 intel_ring_emit(ring, MI_USER_INTERRUPT); 908 intel_ring_advance(ring); 909 910 return 0; 911 } 912 913 static bool 914 gen6_ring_get_irq(struct intel_ring_buffer *ring) 915 { 916 struct drm_device *dev = ring->dev; 917 drm_i915_private_t *dev_priv = dev->dev_private; 918 919 if (!dev->irq_enabled) 920 return false; 921 922 /* It looks like we need to prevent the gt from suspending while waiting 923 * for an notifiy irq, otherwise irqs seem to get lost on at least the 924 * blt/bsd rings on ivb. */ 925 gen6_gt_force_wake_get(dev_priv); 926 927 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 928 if (ring->irq_refcount++ == 0) { 929 if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS) 930 I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | 931 GEN6_RENDER_L3_PARITY_ERROR)); 932 else 933 I915_WRITE_IMR(ring, ~ring->irq_enable_mask); 934 dev_priv->gt_irq_mask &= ~ring->irq_enable_mask; 935 I915_WRITE(GTIMR, dev_priv->gt_irq_mask); 936 POSTING_READ(GTIMR); 937 } 938 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 939 940 return true; 941 } 942 943 static void 944 gen6_ring_put_irq(struct intel_ring_buffer *ring) 945 { 946 struct drm_device *dev = ring->dev; 947 drm_i915_private_t *dev_priv = dev->dev_private; 948 949 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 950 if (--ring->irq_refcount == 0) { 951 if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS) 952 I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR); 953 else 954 I915_WRITE_IMR(ring, ~0); 955 dev_priv->gt_irq_mask |= ring->irq_enable_mask; 956 I915_WRITE(GTIMR, dev_priv->gt_irq_mask); 957 POSTING_READ(GTIMR); 958 } 959 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 960 961 gen6_gt_force_wake_put(dev_priv); 962 } 963 964 static int 965 i965_dispatch_execbuffer(struct intel_ring_buffer *ring, 966 u32 offset, u32 length, 967 unsigned flags) 968 { 969 int ret; 970 971 ret = intel_ring_begin(ring, 2); 972 if (ret) 973 return ret; 974 975 intel_ring_emit(ring, 976 MI_BATCH_BUFFER_START | 977 MI_BATCH_GTT | 978 (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); 979 intel_ring_emit(ring, offset); 980 intel_ring_advance(ring); 981 982 return 0; 983 } 984 985 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */ 986 #define I830_BATCH_LIMIT (256*1024) 987 static int 988 i830_dispatch_execbuffer(struct intel_ring_buffer *ring, 989 u32 offset, u32 len, 990 unsigned flags) 991 { 992 int ret; 993 994 if (flags & I915_DISPATCH_PINNED) { 995 ret = intel_ring_begin(ring, 4); 996 if (ret) 997 return ret; 998 999 intel_ring_emit(ring, MI_BATCH_BUFFER); 1000 intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); 1001 intel_ring_emit(ring, offset + len - 8); 1002 intel_ring_emit(ring, MI_NOOP); 1003 intel_ring_advance(ring); 1004 } else { 1005 struct drm_i915_gem_object *obj = ring->private; 1006 u32 cs_offset = obj->gtt_offset; 1007 1008 if (len > I830_BATCH_LIMIT) 1009 return -ENOSPC; 1010 1011 ret = intel_ring_begin(ring, 9+3); 1012 if (ret) 1013 return ret; 1014 /* Blit the batch (which has now all relocs applied) to the stable batch 1015 * scratch bo area (so that the CS never stumbles over its tlb 1016 * invalidation bug) ... */ 1017 intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD | 1018 XY_SRC_COPY_BLT_WRITE_ALPHA | 1019 XY_SRC_COPY_BLT_WRITE_RGB); 1020 intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096); 1021 intel_ring_emit(ring, 0); 1022 intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024); 1023 intel_ring_emit(ring, cs_offset); 1024 intel_ring_emit(ring, 0); 1025 intel_ring_emit(ring, 4096); 1026 intel_ring_emit(ring, offset); 1027 intel_ring_emit(ring, MI_FLUSH); 1028 1029 /* ... and execute it. */ 1030 intel_ring_emit(ring, MI_BATCH_BUFFER); 1031 intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); 1032 intel_ring_emit(ring, cs_offset + len - 8); 1033 intel_ring_advance(ring); 1034 } 1035 1036 return 0; 1037 } 1038 1039 static int 1040 i915_dispatch_execbuffer(struct intel_ring_buffer *ring, 1041 u32 offset, u32 len, 1042 unsigned flags) 1043 { 1044 int ret; 1045 1046 ret = intel_ring_begin(ring, 2); 1047 if (ret) 1048 return ret; 1049 1050 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); 1051 intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); 1052 intel_ring_advance(ring); 1053 1054 return 0; 1055 } 1056 1057 static void cleanup_status_page(struct intel_ring_buffer *ring) 1058 { 1059 struct drm_i915_gem_object *obj; 1060 1061 obj = ring->status_page.obj; 1062 if (obj == NULL) 1063 return; 1064 1065 pmap_qremove((vm_offset_t)ring->status_page.page_addr, 1); 1066 kmem_free(&kernel_map, (vm_offset_t)ring->status_page.page_addr, 1067 PAGE_SIZE); 1068 i915_gem_object_unpin(obj); 1069 drm_gem_object_unreference(&obj->base); 1070 ring->status_page.obj = NULL; 1071 } 1072 1073 static int init_status_page(struct intel_ring_buffer *ring) 1074 { 1075 struct drm_device *dev = ring->dev; 1076 struct drm_i915_gem_object *obj; 1077 int ret; 1078 1079 obj = i915_gem_alloc_object(dev, 4096); 1080 if (obj == NULL) { 1081 DRM_ERROR("Failed to allocate status page\n"); 1082 ret = -ENOMEM; 1083 goto err; 1084 } 1085 1086 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 1087 1088 ret = i915_gem_object_pin(obj, 4096, true); 1089 if (ret != 0) { 1090 goto err_unref; 1091 } 1092 1093 ring->status_page.gfx_addr = obj->gtt_offset; 1094 ring->status_page.page_addr = (void *)kmem_alloc_nofault(&kernel_map, 1095 PAGE_SIZE, PAGE_SIZE); 1096 if (ring->status_page.page_addr == NULL) { 1097 ret = -ENOMEM; 1098 goto err_unpin; 1099 } 1100 pmap_qenter((vm_offset_t)ring->status_page.page_addr, &obj->pages[0], 1101 1); 1102 pmap_invalidate_cache_range((vm_offset_t)ring->status_page.page_addr, 1103 (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE); 1104 ring->status_page.obj = obj; 1105 memset(ring->status_page.page_addr, 0, PAGE_SIZE); 1106 1107 intel_ring_setup_status_page(ring); 1108 DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", 1109 ring->name, ring->status_page.gfx_addr); 1110 1111 return 0; 1112 1113 err_unpin: 1114 i915_gem_object_unpin(obj); 1115 err_unref: 1116 drm_gem_object_unreference(&obj->base); 1117 err: 1118 return ret; 1119 } 1120 1121 static int init_phys_hws_pga(struct intel_ring_buffer *ring) 1122 { 1123 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1124 u32 addr; 1125 1126 if (!dev_priv->status_page_dmah) { 1127 dev_priv->status_page_dmah = 1128 drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE, ~0); 1129 if (!dev_priv->status_page_dmah) 1130 return -ENOMEM; 1131 } 1132 1133 addr = dev_priv->status_page_dmah->busaddr; 1134 if (INTEL_INFO(ring->dev)->gen >= 4) 1135 addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0; 1136 I915_WRITE(HWS_PGA, addr); 1137 1138 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr; 1139 memset(ring->status_page.page_addr, 0, PAGE_SIZE); 1140 1141 return 0; 1142 } 1143 1144 static inline void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size) 1145 { 1146 return pmap_mapdev_attr(phys_addr, size, VM_MEMATTR_WRITE_COMBINING); 1147 } 1148 1149 static int intel_init_ring_buffer(struct drm_device *dev, 1150 struct intel_ring_buffer *ring) 1151 { 1152 struct drm_i915_gem_object *obj; 1153 int ret; 1154 1155 ring->dev = dev; 1156 INIT_LIST_HEAD(&ring->active_list); 1157 INIT_LIST_HEAD(&ring->request_list); 1158 ring->size = 32 * PAGE_SIZE; 1159 memset(ring->sync_seqno, 0, sizeof(ring->sync_seqno)); 1160 1161 init_waitqueue_head(&ring->irq_queue); 1162 1163 if (I915_NEED_GFX_HWS(dev)) { 1164 ret = init_status_page(ring); 1165 if (ret) 1166 return ret; 1167 } else { 1168 BUG_ON(ring->id != RCS); 1169 ret = init_phys_hws_pga(ring); 1170 if (ret) 1171 return ret; 1172 } 1173 1174 obj = i915_gem_alloc_object(dev, ring->size); 1175 if (obj == NULL) { 1176 DRM_ERROR("Failed to allocate ringbuffer\n"); 1177 ret = -ENOMEM; 1178 goto err_hws; 1179 } 1180 1181 ring->obj = obj; 1182 1183 ret = i915_gem_object_pin(obj, PAGE_SIZE, true); 1184 if (ret) 1185 goto err_unref; 1186 1187 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1188 if (ret) 1189 goto err_unpin; 1190 1191 ring->virtual_start = ioremap_wc(dev->agp->base + obj->gtt_offset, 1192 ring->size); 1193 if (ring->virtual_start == NULL) { 1194 DRM_ERROR("Failed to map ringbuffer.\n"); 1195 ret = -EINVAL; 1196 goto err_unpin; 1197 } 1198 1199 ret = ring->init(ring); 1200 if (ret) 1201 goto err_unmap; 1202 1203 /* Workaround an erratum on the i830 which causes a hang if 1204 * the TAIL pointer points to within the last 2 cachelines 1205 * of the buffer. 1206 */ 1207 ring->effective_size = ring->size; 1208 if (IS_I830(ring->dev) || IS_845G(ring->dev)) 1209 ring->effective_size -= 128; 1210 1211 return 0; 1212 1213 err_unmap: 1214 pmap_unmapdev((vm_offset_t)ring->virtual_start, ring->size); 1215 err_unpin: 1216 i915_gem_object_unpin(obj); 1217 err_unref: 1218 drm_gem_object_unreference(&obj->base); 1219 ring->obj = NULL; 1220 err_hws: 1221 cleanup_status_page(ring); 1222 return ret; 1223 } 1224 1225 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring) 1226 { 1227 struct drm_i915_private *dev_priv; 1228 int ret; 1229 1230 if (ring->obj == NULL) 1231 return; 1232 1233 /* Disable the ring buffer. The ring must be idle at this point */ 1234 dev_priv = ring->dev->dev_private; 1235 ret = intel_ring_idle(ring); 1236 if (ret) 1237 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", 1238 ring->name, ret); 1239 1240 I915_WRITE_CTL(ring, 0); 1241 1242 pmap_unmapdev((vm_offset_t)ring->virtual_start, ring->size); 1243 1244 i915_gem_object_unpin(ring->obj); 1245 drm_gem_object_unreference(&ring->obj->base); 1246 ring->obj = NULL; 1247 1248 if (ring->cleanup) 1249 ring->cleanup(ring); 1250 1251 cleanup_status_page(ring); 1252 } 1253 1254 static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno) 1255 { 1256 int ret; 1257 1258 ret = i915_wait_seqno(ring, seqno); 1259 if (!ret) 1260 i915_gem_retire_requests_ring(ring); 1261 1262 return ret; 1263 } 1264 1265 static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n) 1266 { 1267 struct drm_i915_gem_request *request; 1268 u32 seqno = 0; 1269 int ret; 1270 1271 i915_gem_retire_requests_ring(ring); 1272 1273 if (ring->last_retired_head != -1) { 1274 ring->head = ring->last_retired_head; 1275 ring->last_retired_head = -1; 1276 ring->space = ring_space(ring); 1277 if (ring->space >= n) 1278 return 0; 1279 } 1280 1281 list_for_each_entry(request, &ring->request_list, list) { 1282 int space; 1283 1284 if (request->tail == -1) 1285 continue; 1286 1287 space = request->tail - (ring->tail + I915_RING_FREE_SPACE); 1288 if (space < 0) 1289 space += ring->size; 1290 if (space >= n) { 1291 seqno = request->seqno; 1292 break; 1293 } 1294 1295 /* Consume this request in case we need more space than 1296 * is available and so need to prevent a race between 1297 * updating last_retired_head and direct reads of 1298 * I915_RING_HEAD. It also provides a nice sanity check. 1299 */ 1300 request->tail = -1; 1301 } 1302 1303 if (seqno == 0) 1304 return -ENOSPC; 1305 1306 ret = intel_ring_wait_seqno(ring, seqno); 1307 if (ret) 1308 return ret; 1309 1310 if (WARN_ON(ring->last_retired_head == -1)) 1311 return -ENOSPC; 1312 1313 ring->head = ring->last_retired_head; 1314 ring->last_retired_head = -1; 1315 ring->space = ring_space(ring); 1316 if (WARN_ON(ring->space < n)) 1317 return -ENOSPC; 1318 1319 return 0; 1320 } 1321 1322 static int ring_wait_for_space(struct intel_ring_buffer *ring, int n) 1323 { 1324 struct drm_device *dev = ring->dev; 1325 struct drm_i915_private *dev_priv = dev->dev_private; 1326 unsigned long end; 1327 int ret; 1328 1329 ret = intel_ring_wait_request(ring, n); 1330 if (ret != -ENOSPC) 1331 return ret; 1332 1333 /* With GEM the hangcheck timer should kick us out of the loop, 1334 * leaving it early runs the risk of corrupting GEM state (due 1335 * to running on almost untested codepaths). But on resume 1336 * timers don't work yet, so prevent a complete hang in that 1337 * case by choosing an insanely large timeout. */ 1338 end = ticks + 60 * hz; 1339 1340 do { 1341 ring->head = I915_READ_HEAD(ring); 1342 ring->space = ring_space(ring); 1343 if (ring->space >= n) { 1344 return 0; 1345 } 1346 1347 #if 0 1348 if (dev->primary->master) { 1349 struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; 1350 if (master_priv->sarea_priv) 1351 master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; 1352 } 1353 #else 1354 if (dev_priv->sarea_priv) 1355 dev_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; 1356 #endif 1357 1358 DELAY(1000); 1359 1360 ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible); 1361 if (ret) 1362 return ret; 1363 } while (!time_after(ticks, end)); 1364 return -EBUSY; 1365 } 1366 1367 static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring) 1368 { 1369 uint32_t __iomem *virt; 1370 int rem = ring->size - ring->tail; 1371 1372 if (ring->space < rem) { 1373 int ret = ring_wait_for_space(ring, rem); 1374 if (ret) 1375 return ret; 1376 } 1377 1378 virt = (unsigned int *)((char *)ring->virtual_start + ring->tail); 1379 rem /= 4; 1380 while (rem--) 1381 iowrite32(MI_NOOP, virt++); 1382 1383 ring->tail = 0; 1384 ring->space = ring_space(ring); 1385 1386 return 0; 1387 } 1388 1389 int intel_ring_idle(struct intel_ring_buffer *ring) 1390 { 1391 u32 seqno; 1392 int ret; 1393 1394 /* We need to add any requests required to flush the objects and ring */ 1395 if (ring->outstanding_lazy_request) { 1396 ret = i915_add_request(ring, NULL, NULL); 1397 if (ret) 1398 return ret; 1399 } 1400 1401 /* Wait upon the last request to be completed */ 1402 if (list_empty(&ring->request_list)) 1403 return 0; 1404 1405 seqno = list_entry(ring->request_list.prev, 1406 struct drm_i915_gem_request, 1407 list)->seqno; 1408 1409 return i915_wait_seqno(ring, seqno); 1410 } 1411 1412 static int 1413 intel_ring_alloc_seqno(struct intel_ring_buffer *ring) 1414 { 1415 if (ring->outstanding_lazy_request) 1416 return 0; 1417 1418 return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_request); 1419 } 1420 1421 int intel_ring_begin(struct intel_ring_buffer *ring, 1422 int num_dwords) 1423 { 1424 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1425 int n = 4*num_dwords; 1426 int ret; 1427 1428 ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible); 1429 if (ret) 1430 return ret; 1431 1432 /* Preallocate the olr before touching the ring */ 1433 ret = intel_ring_alloc_seqno(ring); 1434 if (ret) 1435 return ret; 1436 1437 if (unlikely(ring->tail + n > ring->effective_size)) { 1438 ret = intel_wrap_ring_buffer(ring); 1439 if (unlikely(ret)) 1440 return ret; 1441 } 1442 1443 if (unlikely(ring->space < n)) { 1444 ret = ring_wait_for_space(ring, n); 1445 if (unlikely(ret)) 1446 return ret; 1447 } 1448 1449 ring->space -= n; 1450 return 0; 1451 } 1452 1453 void intel_ring_advance(struct intel_ring_buffer *ring) 1454 { 1455 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1456 1457 ring->tail &= ring->size - 1; 1458 if (dev_priv->stop_rings & intel_ring_flag(ring)) 1459 return; 1460 ring->write_tail(ring, ring->tail); 1461 } 1462 1463 1464 static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring, 1465 u32 value) 1466 { 1467 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1468 1469 /* Every tail move must follow the sequence below */ 1470 1471 /* Disable notification that the ring is IDLE. The GT 1472 * will then assume that it is busy and bring it out of rc6. 1473 */ 1474 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, 1475 _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 1476 1477 /* Clear the context id. Here be magic! */ 1478 I915_WRITE64(GEN6_BSD_RNCID, 0x0); 1479 1480 /* Wait for the ring not to be idle, i.e. for it to wake up. */ 1481 if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) & 1482 GEN6_BSD_SLEEP_INDICATOR) == 0, 1483 50)) 1484 DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); 1485 1486 /* Now that the ring is fully powered up, update the tail */ 1487 I915_WRITE_TAIL(ring, value); 1488 POSTING_READ(RING_TAIL(ring->mmio_base)); 1489 1490 /* Let the ring send IDLE messages to the GT again, 1491 * and so let it sleep to conserve power when idle. 1492 */ 1493 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, 1494 _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 1495 } 1496 1497 static int gen6_ring_flush(struct intel_ring_buffer *ring, 1498 u32 invalidate, u32 flush) 1499 { 1500 uint32_t cmd; 1501 int ret; 1502 1503 ret = intel_ring_begin(ring, 4); 1504 if (ret) 1505 return ret; 1506 1507 cmd = MI_FLUSH_DW; 1508 /* 1509 * Bspec vol 1c.5 - video engine command streamer: 1510 * "If ENABLED, all TLBs will be invalidated once the flush 1511 * operation is complete. This bit is only valid when the 1512 * Post-Sync Operation field is a value of 1h or 3h." 1513 */ 1514 if (invalidate & I915_GEM_GPU_DOMAINS) 1515 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD | 1516 MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; 1517 intel_ring_emit(ring, cmd); 1518 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); 1519 intel_ring_emit(ring, 0); 1520 intel_ring_emit(ring, MI_NOOP); 1521 intel_ring_advance(ring); 1522 return 0; 1523 } 1524 1525 static int 1526 hsw_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, 1527 u32 offset, u32 len, 1528 unsigned flags) 1529 { 1530 int ret; 1531 1532 ret = intel_ring_begin(ring, 2); 1533 if (ret) 1534 return ret; 1535 1536 intel_ring_emit(ring, 1537 MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW | 1538 (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW)); 1539 /* bit0-7 is the length on GEN6+ */ 1540 intel_ring_emit(ring, offset); 1541 intel_ring_advance(ring); 1542 1543 return 0; 1544 } 1545 1546 static int 1547 gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, 1548 u32 offset, u32 len, 1549 unsigned flags) 1550 { 1551 int ret; 1552 1553 ret = intel_ring_begin(ring, 2); 1554 if (ret) 1555 return ret; 1556 1557 intel_ring_emit(ring, 1558 MI_BATCH_BUFFER_START | 1559 (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); 1560 /* bit0-7 is the length on GEN6+ */ 1561 intel_ring_emit(ring, offset); 1562 intel_ring_advance(ring); 1563 1564 return 0; 1565 } 1566 1567 /* Blitter support (SandyBridge+) */ 1568 1569 static int blt_ring_flush(struct intel_ring_buffer *ring, 1570 u32 invalidate, u32 flush) 1571 { 1572 uint32_t cmd; 1573 int ret; 1574 1575 ret = intel_ring_begin(ring, 4); 1576 if (ret) 1577 return ret; 1578 1579 cmd = MI_FLUSH_DW; 1580 /* 1581 * Bspec vol 1c.3 - blitter engine command streamer: 1582 * "If ENABLED, all TLBs will be invalidated once the flush 1583 * operation is complete. This bit is only valid when the 1584 * Post-Sync Operation field is a value of 1h or 3h." 1585 */ 1586 if (invalidate & I915_GEM_DOMAIN_RENDER) 1587 cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX | 1588 MI_FLUSH_DW_OP_STOREDW; 1589 intel_ring_emit(ring, cmd); 1590 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); 1591 intel_ring_emit(ring, 0); 1592 intel_ring_emit(ring, MI_NOOP); 1593 intel_ring_advance(ring); 1594 return 0; 1595 } 1596 1597 int intel_init_render_ring_buffer(struct drm_device *dev) 1598 { 1599 drm_i915_private_t *dev_priv = dev->dev_private; 1600 struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; 1601 1602 ring->name = "render ring"; 1603 ring->id = RCS; 1604 ring->mmio_base = RENDER_RING_BASE; 1605 1606 if (INTEL_INFO(dev)->gen >= 6) { 1607 ring->add_request = gen6_add_request; 1608 ring->flush = gen7_render_ring_flush; 1609 if (INTEL_INFO(dev)->gen == 6) 1610 ring->flush = gen6_render_ring_flush; 1611 ring->irq_get = gen6_ring_get_irq; 1612 ring->irq_put = gen6_ring_put_irq; 1613 ring->irq_enable_mask = GT_USER_INTERRUPT; 1614 ring->get_seqno = gen6_ring_get_seqno; 1615 ring->sync_to = gen6_ring_sync; 1616 ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_INVALID; 1617 ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_RV; 1618 ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_RB; 1619 ring->signal_mbox[0] = GEN6_VRSYNC; 1620 ring->signal_mbox[1] = GEN6_BRSYNC; 1621 } else if (IS_GEN5(dev)) { 1622 ring->add_request = pc_render_add_request; 1623 ring->flush = gen4_render_ring_flush; 1624 ring->get_seqno = pc_render_get_seqno; 1625 ring->irq_get = gen5_ring_get_irq; 1626 ring->irq_put = gen5_ring_put_irq; 1627 ring->irq_enable_mask = GT_USER_INTERRUPT | GT_PIPE_NOTIFY; 1628 } else { 1629 ring->add_request = i9xx_add_request; 1630 if (INTEL_INFO(dev)->gen < 4) 1631 ring->flush = gen2_render_ring_flush; 1632 else 1633 ring->flush = gen4_render_ring_flush; 1634 ring->get_seqno = ring_get_seqno; 1635 if (IS_GEN2(dev)) { 1636 ring->irq_get = i8xx_ring_get_irq; 1637 ring->irq_put = i8xx_ring_put_irq; 1638 } else { 1639 ring->irq_get = i9xx_ring_get_irq; 1640 ring->irq_put = i9xx_ring_put_irq; 1641 } 1642 ring->irq_enable_mask = I915_USER_INTERRUPT; 1643 } 1644 ring->write_tail = ring_write_tail; 1645 if (IS_HASWELL(dev)) 1646 ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer; 1647 else if (INTEL_INFO(dev)->gen >= 6) 1648 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; 1649 else if (INTEL_INFO(dev)->gen >= 4) 1650 ring->dispatch_execbuffer = i965_dispatch_execbuffer; 1651 else if (IS_I830(dev) || IS_845G(dev)) 1652 ring->dispatch_execbuffer = i830_dispatch_execbuffer; 1653 else 1654 ring->dispatch_execbuffer = i915_dispatch_execbuffer; 1655 ring->init = init_render_ring; 1656 ring->cleanup = render_ring_cleanup; 1657 1658 /* Workaround batchbuffer to combat CS tlb bug. */ 1659 if (HAS_BROKEN_CS_TLB(dev)) { 1660 struct drm_i915_gem_object *obj; 1661 int ret; 1662 1663 obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT); 1664 if (obj == NULL) { 1665 DRM_ERROR("Failed to allocate batch bo\n"); 1666 return -ENOMEM; 1667 } 1668 1669 ret = i915_gem_object_pin(obj, 0, true); 1670 if (ret != 0) { 1671 drm_gem_object_unreference(&obj->base); 1672 DRM_ERROR("Failed to ping batch bo\n"); 1673 return ret; 1674 } 1675 1676 ring->private = obj; 1677 } 1678 1679 return intel_init_ring_buffer(dev, ring); 1680 } 1681 1682 int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size) 1683 { 1684 drm_i915_private_t *dev_priv = dev->dev_private; 1685 struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; 1686 int ret; 1687 1688 ring->name = "render ring"; 1689 ring->id = RCS; 1690 ring->mmio_base = RENDER_RING_BASE; 1691 1692 if (INTEL_INFO(dev)->gen >= 6) { 1693 /* non-kms not supported on gen6+ */ 1694 return -ENODEV; 1695 } 1696 1697 /* Note: gem is not supported on gen5/ilk without kms (the corresponding 1698 * gem_init ioctl returns with -ENODEV). Hence we do not need to set up 1699 * the special gen5 functions. */ 1700 ring->add_request = i9xx_add_request; 1701 if (INTEL_INFO(dev)->gen < 4) 1702 ring->flush = gen2_render_ring_flush; 1703 else 1704 ring->flush = gen4_render_ring_flush; 1705 ring->get_seqno = ring_get_seqno; 1706 if (IS_GEN2(dev)) { 1707 ring->irq_get = i8xx_ring_get_irq; 1708 ring->irq_put = i8xx_ring_put_irq; 1709 } else { 1710 ring->irq_get = i9xx_ring_get_irq; 1711 ring->irq_put = i9xx_ring_put_irq; 1712 } 1713 ring->irq_enable_mask = I915_USER_INTERRUPT; 1714 ring->write_tail = ring_write_tail; 1715 if (INTEL_INFO(dev)->gen >= 4) 1716 ring->dispatch_execbuffer = i965_dispatch_execbuffer; 1717 else if (IS_I830(dev) || IS_845G(dev)) 1718 ring->dispatch_execbuffer = i830_dispatch_execbuffer; 1719 else 1720 ring->dispatch_execbuffer = i915_dispatch_execbuffer; 1721 ring->init = init_render_ring; 1722 ring->cleanup = render_ring_cleanup; 1723 1724 ring->dev = dev; 1725 INIT_LIST_HEAD(&ring->active_list); 1726 INIT_LIST_HEAD(&ring->request_list); 1727 INIT_LIST_HEAD(&ring->gpu_write_list); 1728 1729 ring->size = size; 1730 ring->effective_size = ring->size; 1731 if (IS_I830(ring->dev) || IS_845G(ring->dev)) 1732 ring->effective_size -= 128; 1733 1734 ring->virtual_start = ioremap_wc(start, size); 1735 if (ring->virtual_start == NULL) { 1736 DRM_ERROR("can not ioremap virtual address for" 1737 " ring buffer\n"); 1738 return -ENOMEM; 1739 } 1740 1741 if (!I915_NEED_GFX_HWS(dev)) { 1742 ret = init_phys_hws_pga(ring); 1743 if (ret) 1744 return ret; 1745 } 1746 1747 return 0; 1748 } 1749 1750 int intel_init_bsd_ring_buffer(struct drm_device *dev) 1751 { 1752 drm_i915_private_t *dev_priv = dev->dev_private; 1753 struct intel_ring_buffer *ring = &dev_priv->ring[VCS]; 1754 1755 ring->name = "bsd ring"; 1756 ring->id = VCS; 1757 1758 ring->write_tail = ring_write_tail; 1759 if (IS_GEN6(dev) || IS_GEN7(dev)) { 1760 ring->mmio_base = GEN6_BSD_RING_BASE; 1761 /* gen6 bsd needs a special wa for tail updates */ 1762 if (IS_GEN6(dev)) 1763 ring->write_tail = gen6_bsd_ring_write_tail; 1764 ring->flush = gen6_ring_flush; 1765 ring->add_request = gen6_add_request; 1766 ring->get_seqno = gen6_ring_get_seqno; 1767 ring->irq_enable_mask = GEN6_BSD_USER_INTERRUPT; 1768 ring->irq_get = gen6_ring_get_irq; 1769 ring->irq_put = gen6_ring_put_irq; 1770 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; 1771 ring->sync_to = gen6_ring_sync; 1772 ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_VR; 1773 ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_INVALID; 1774 ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_VB; 1775 ring->signal_mbox[0] = GEN6_RVSYNC; 1776 ring->signal_mbox[1] = GEN6_BVSYNC; 1777 } else { 1778 ring->mmio_base = BSD_RING_BASE; 1779 ring->flush = bsd_ring_flush; 1780 ring->add_request = i9xx_add_request; 1781 ring->get_seqno = ring_get_seqno; 1782 if (IS_GEN5(dev)) { 1783 ring->irq_enable_mask = GT_BSD_USER_INTERRUPT; 1784 ring->irq_get = gen5_ring_get_irq; 1785 ring->irq_put = gen5_ring_put_irq; 1786 } else { 1787 ring->irq_enable_mask = I915_BSD_USER_INTERRUPT; 1788 ring->irq_get = i9xx_ring_get_irq; 1789 ring->irq_put = i9xx_ring_put_irq; 1790 } 1791 ring->dispatch_execbuffer = i965_dispatch_execbuffer; 1792 } 1793 ring->init = init_ring_common; 1794 1795 return intel_init_ring_buffer(dev, ring); 1796 } 1797 1798 int intel_init_blt_ring_buffer(struct drm_device *dev) 1799 { 1800 drm_i915_private_t *dev_priv = dev->dev_private; 1801 struct intel_ring_buffer *ring = &dev_priv->ring[BCS]; 1802 1803 ring->name = "blitter ring"; 1804 ring->id = BCS; 1805 1806 ring->mmio_base = BLT_RING_BASE; 1807 ring->write_tail = ring_write_tail; 1808 ring->flush = blt_ring_flush; 1809 ring->add_request = gen6_add_request; 1810 ring->get_seqno = gen6_ring_get_seqno; 1811 ring->irq_enable_mask = GEN6_BLITTER_USER_INTERRUPT; 1812 ring->irq_get = gen6_ring_get_irq; 1813 ring->irq_put = gen6_ring_put_irq; 1814 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; 1815 ring->sync_to = gen6_ring_sync; 1816 ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_BR; 1817 ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_BV; 1818 ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_INVALID; 1819 ring->signal_mbox[0] = GEN6_RBSYNC; 1820 ring->signal_mbox[1] = GEN6_VBSYNC; 1821 ring->init = init_ring_common; 1822 1823 return intel_init_ring_buffer(dev, ring); 1824 } 1825 1826 int 1827 intel_ring_flush_all_caches(struct intel_ring_buffer *ring) 1828 { 1829 int ret; 1830 1831 if (!ring->gpu_caches_dirty) 1832 return 0; 1833 1834 ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS); 1835 if (ret) 1836 return ret; 1837 1838 ring->gpu_caches_dirty = false; 1839 return 0; 1840 } 1841 1842 int 1843 intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring) 1844 { 1845 uint32_t flush_domains; 1846 int ret; 1847 1848 flush_domains = 0; 1849 if (ring->gpu_caches_dirty) 1850 flush_domains = I915_GEM_GPU_DOMAINS; 1851 1852 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); 1853 if (ret) 1854 return ret; 1855 1856 ring->gpu_caches_dirty = false; 1857 return 0; 1858 } 1859