1 /* 2 * Copyright © 2008-2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Zou Nan hai <nanhai.zou@intel.com> 26 * Xiang Hai hao<haihao.xiang@intel.com> 27 * 28 * $FreeBSD: head/sys/dev/drm2/i915/intel_ringbuffer.c 253709 2013-07-27 16:42:29Z kib $ 29 */ 30 31 #include <drm/drmP.h> 32 #include <drm/i915_drm.h> 33 #include "i915_drv.h" 34 #include "intel_drv.h" 35 #include "intel_ringbuffer.h" 36 #include <sys/sched.h> 37 38 /* 39 * 965+ support PIPE_CONTROL commands, which provide finer grained control 40 * over cache flushing. 41 */ 42 struct pipe_control { 43 struct drm_i915_gem_object *obj; 44 volatile u32 *cpu_page; 45 u32 gtt_offset; 46 }; 47 48 static inline int ring_space(struct intel_ring_buffer *ring) 49 { 50 int space = (ring->head & HEAD_ADDR) - (ring->tail + I915_RING_FREE_SPACE); 51 if (space < 0) 52 space += ring->size; 53 return space; 54 } 55 56 static int 57 render_ring_flush(struct intel_ring_buffer *ring, 58 uint32_t invalidate_domains, 59 uint32_t flush_domains) 60 { 61 struct drm_device *dev = ring->dev; 62 uint32_t cmd; 63 int ret; 64 65 /* 66 * read/write caches: 67 * 68 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 69 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 70 * also flushed at 2d versus 3d pipeline switches. 71 * 72 * read-only caches: 73 * 74 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 75 * MI_READ_FLUSH is set, and is always flushed on 965. 76 * 77 * I915_GEM_DOMAIN_COMMAND may not exist? 78 * 79 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 80 * invalidated when MI_EXE_FLUSH is set. 81 * 82 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 83 * invalidated with every MI_FLUSH. 84 * 85 * TLBs: 86 * 87 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 88 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 89 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 90 * are flushed at any MI_FLUSH. 91 */ 92 93 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 94 if ((invalidate_domains|flush_domains) & 95 I915_GEM_DOMAIN_RENDER) 96 cmd &= ~MI_NO_WRITE_FLUSH; 97 if (INTEL_INFO(dev)->gen < 4) { 98 /* 99 * On the 965, the sampler cache always gets flushed 100 * and this bit is reserved. 101 */ 102 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) 103 cmd |= MI_READ_FLUSH; 104 } 105 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) 106 cmd |= MI_EXE_FLUSH; 107 108 if (invalidate_domains & I915_GEM_DOMAIN_COMMAND && 109 (IS_G4X(dev) || IS_GEN5(dev))) 110 cmd |= MI_INVALIDATE_ISP; 111 112 ret = intel_ring_begin(ring, 2); 113 if (ret) 114 return ret; 115 116 intel_ring_emit(ring, cmd); 117 intel_ring_emit(ring, MI_NOOP); 118 intel_ring_advance(ring); 119 120 return 0; 121 } 122 123 /** 124 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for 125 * implementing two workarounds on gen6. From section 1.4.7.1 126 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: 127 * 128 * [DevSNB-C+{W/A}] Before any depth stall flush (including those 129 * produced by non-pipelined state commands), software needs to first 130 * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 131 * 0. 132 * 133 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable 134 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. 135 * 136 * And the workaround for these two requires this workaround first: 137 * 138 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 139 * BEFORE the pipe-control with a post-sync op and no write-cache 140 * flushes. 141 * 142 * And this last workaround is tricky because of the requirements on 143 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM 144 * volume 2 part 1: 145 * 146 * "1 of the following must also be set: 147 * - Render Target Cache Flush Enable ([12] of DW1) 148 * - Depth Cache Flush Enable ([0] of DW1) 149 * - Stall at Pixel Scoreboard ([1] of DW1) 150 * - Depth Stall ([13] of DW1) 151 * - Post-Sync Operation ([13] of DW1) 152 * - Notify Enable ([8] of DW1)" 153 * 154 * The cache flushes require the workaround flush that triggered this 155 * one, so we can't use it. Depth stall would trigger the same. 156 * Post-sync nonzero is what triggered this second workaround, so we 157 * can't use that one either. Notify enable is IRQs, which aren't 158 * really our business. That leaves only stall at scoreboard. 159 */ 160 static int 161 intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring) 162 { 163 struct pipe_control *pc = ring->private; 164 u32 scratch_addr = pc->gtt_offset + 128; 165 int ret; 166 167 168 ret = intel_ring_begin(ring, 6); 169 if (ret) 170 return ret; 171 172 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 173 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | 174 PIPE_CONTROL_STALL_AT_SCOREBOARD); 175 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 176 intel_ring_emit(ring, 0); /* low dword */ 177 intel_ring_emit(ring, 0); /* high dword */ 178 intel_ring_emit(ring, MI_NOOP); 179 intel_ring_advance(ring); 180 181 ret = intel_ring_begin(ring, 6); 182 if (ret) 183 return ret; 184 185 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 186 intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE); 187 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 188 intel_ring_emit(ring, 0); 189 intel_ring_emit(ring, 0); 190 intel_ring_emit(ring, MI_NOOP); 191 intel_ring_advance(ring); 192 193 return 0; 194 } 195 196 static int 197 gen6_render_ring_flush(struct intel_ring_buffer *ring, 198 u32 invalidate_domains, u32 flush_domains) 199 { 200 u32 flags = 0; 201 struct pipe_control *pc = ring->private; 202 u32 scratch_addr = pc->gtt_offset + 128; 203 int ret; 204 205 /* Force SNB workarounds for PIPE_CONTROL flushes */ 206 intel_emit_post_sync_nonzero_flush(ring); 207 208 /* Just flush everything. Experiments have shown that reducing the 209 * number of bits based on the write domains has little performance 210 * impact. 211 */ 212 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 213 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 214 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 215 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 216 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 217 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 218 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 219 220 ret = intel_ring_begin(ring, 6); 221 if (ret) 222 return ret; 223 224 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 225 intel_ring_emit(ring, flags); 226 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); 227 intel_ring_emit(ring, 0); /* lower dword */ 228 intel_ring_emit(ring, 0); /* uppwer dword */ 229 intel_ring_emit(ring, MI_NOOP); 230 intel_ring_advance(ring); 231 232 return 0; 233 } 234 235 static void ring_write_tail(struct intel_ring_buffer *ring, 236 uint32_t value) 237 { 238 drm_i915_private_t *dev_priv = ring->dev->dev_private; 239 I915_WRITE_TAIL(ring, value); 240 } 241 242 u32 intel_ring_get_active_head(struct intel_ring_buffer *ring) 243 { 244 drm_i915_private_t *dev_priv = ring->dev->dev_private; 245 uint32_t acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ? 246 RING_ACTHD(ring->mmio_base) : ACTHD; 247 248 return I915_READ(acthd_reg); 249 } 250 251 static int init_ring_common(struct intel_ring_buffer *ring) 252 { 253 drm_i915_private_t *dev_priv = ring->dev->dev_private; 254 struct drm_i915_gem_object *obj = ring->obj; 255 uint32_t head; 256 257 /* Stop the ring if it's running. */ 258 I915_WRITE_CTL(ring, 0); 259 I915_WRITE_HEAD(ring, 0); 260 ring->write_tail(ring, 0); 261 262 /* Initialize the ring. */ 263 I915_WRITE_START(ring, obj->gtt_offset); 264 head = I915_READ_HEAD(ring) & HEAD_ADDR; 265 266 /* G45 ring initialization fails to reset head to zero */ 267 if (head != 0) { 268 DRM_DEBUG("%s head not reset to zero " 269 "ctl %08x head %08x tail %08x start %08x\n", 270 ring->name, 271 I915_READ_CTL(ring), 272 I915_READ_HEAD(ring), 273 I915_READ_TAIL(ring), 274 I915_READ_START(ring)); 275 276 I915_WRITE_HEAD(ring, 0); 277 278 if (I915_READ_HEAD(ring) & HEAD_ADDR) { 279 DRM_ERROR("failed to set %s head to zero " 280 "ctl %08x head %08x tail %08x start %08x\n", 281 ring->name, 282 I915_READ_CTL(ring), 283 I915_READ_HEAD(ring), 284 I915_READ_TAIL(ring), 285 I915_READ_START(ring)); 286 } 287 } 288 289 I915_WRITE_CTL(ring, 290 ((ring->size - PAGE_SIZE) & RING_NR_PAGES) 291 | RING_VALID); 292 293 /* If the head is still not zero, the ring is dead */ 294 if (_intel_wait_for(ring->dev, 295 (I915_READ_CTL(ring) & RING_VALID) != 0 && 296 I915_READ_START(ring) == obj->gtt_offset && 297 (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 298 50, 1, "915rii")) { 299 DRM_ERROR("%s initialization failed " 300 "ctl %08x head %08x tail %08x start %08x\n", 301 ring->name, 302 I915_READ_CTL(ring), 303 I915_READ_HEAD(ring), 304 I915_READ_TAIL(ring), 305 I915_READ_START(ring)); 306 return -EIO; 307 } 308 309 if (!drm_core_check_feature(ring->dev, DRIVER_MODESET)) 310 i915_kernel_lost_context(ring->dev); 311 else { 312 ring->head = I915_READ_HEAD(ring); 313 ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR; 314 ring->space = ring_space(ring); 315 } 316 317 return 0; 318 } 319 320 static int 321 init_pipe_control(struct intel_ring_buffer *ring) 322 { 323 struct pipe_control *pc; 324 struct drm_i915_gem_object *obj; 325 int ret; 326 327 if (ring->private) 328 return 0; 329 330 pc = kmalloc(sizeof(*pc), DRM_I915_GEM, M_WAITOK); 331 if (!pc) 332 return -ENOMEM; 333 334 obj = i915_gem_alloc_object(ring->dev, 4096); 335 if (obj == NULL) { 336 DRM_ERROR("Failed to allocate seqno page\n"); 337 ret = -ENOMEM; 338 goto err; 339 } 340 341 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 342 343 ret = i915_gem_object_pin(obj, 4096, true); 344 if (ret) 345 goto err_unref; 346 347 pc->gtt_offset = obj->gtt_offset; 348 pc->cpu_page = (uint32_t *)kmem_alloc_nofault(&kernel_map, PAGE_SIZE, PAGE_SIZE); 349 if (pc->cpu_page == NULL) 350 goto err_unpin; 351 pmap_qenter((uintptr_t)pc->cpu_page, &obj->pages[0], 1); 352 pmap_invalidate_cache_range((vm_offset_t)pc->cpu_page, 353 (vm_offset_t)pc->cpu_page + PAGE_SIZE); 354 355 pc->obj = obj; 356 ring->private = pc; 357 return 0; 358 359 err_unpin: 360 i915_gem_object_unpin(obj); 361 err_unref: 362 drm_gem_object_unreference(&obj->base); 363 err: 364 drm_free(pc, DRM_I915_GEM); 365 return ret; 366 } 367 368 static void 369 cleanup_pipe_control(struct intel_ring_buffer *ring) 370 { 371 struct pipe_control *pc = ring->private; 372 struct drm_i915_gem_object *obj; 373 374 if (!ring->private) 375 return; 376 377 obj = pc->obj; 378 pmap_qremove((vm_offset_t)pc->cpu_page, 1); 379 kmem_free(&kernel_map, (uintptr_t)pc->cpu_page, PAGE_SIZE); 380 i915_gem_object_unpin(obj); 381 drm_gem_object_unreference(&obj->base); 382 383 drm_free(pc, DRM_I915_GEM); 384 ring->private = NULL; 385 } 386 387 static int init_render_ring(struct intel_ring_buffer *ring) 388 { 389 struct drm_device *dev = ring->dev; 390 struct drm_i915_private *dev_priv = dev->dev_private; 391 int ret = init_ring_common(ring); 392 393 if (INTEL_INFO(dev)->gen > 3) 394 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); 395 396 /* We need to disable the AsyncFlip performance optimisations in order 397 * to use MI_WAIT_FOR_EVENT within the CS. It should already be 398 * programmed to '1' on all products. 399 */ 400 if (INTEL_INFO(dev)->gen >= 6) 401 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); 402 403 /* Required for the hardware to program scanline values for waiting */ 404 if (INTEL_INFO(dev)->gen == 6) 405 I915_WRITE(GFX_MODE, 406 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_ALWAYS)); 407 408 if (IS_GEN7(dev)) 409 I915_WRITE(GFX_MODE_GEN7, 410 _MASKED_BIT_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) | 411 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); 412 413 if (INTEL_INFO(dev)->gen >= 5) { 414 ret = init_pipe_control(ring); 415 if (ret) 416 return ret; 417 } 418 419 if (IS_GEN6(dev)) { 420 /* From the Sandybridge PRM, volume 1 part 3, page 24: 421 * "If this bit is set, STCunit will have LRA as replacement 422 * policy. [...] This bit must be reset. LRA replacement 423 * policy is not supported." 424 */ 425 I915_WRITE(CACHE_MODE_0, 426 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); 427 } 428 429 if (INTEL_INFO(dev)->gen >= 6) 430 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); 431 432 if (HAS_L3_GPU_CACHE(dev)) 433 I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR); 434 435 return ret; 436 } 437 438 static void render_ring_cleanup(struct intel_ring_buffer *ring) 439 { 440 if (!ring->private) 441 return; 442 443 cleanup_pipe_control(ring); 444 } 445 446 static void 447 update_mboxes(struct intel_ring_buffer *ring, 448 u32 seqno, 449 u32 mmio_offset) 450 { 451 intel_ring_emit(ring, MI_SEMAPHORE_MBOX | 452 MI_SEMAPHORE_GLOBAL_GTT | 453 MI_SEMAPHORE_REGISTER | 454 MI_SEMAPHORE_UPDATE); 455 intel_ring_emit(ring, seqno); 456 intel_ring_emit(ring, mmio_offset); 457 } 458 459 /** 460 * gen6_add_request - Update the semaphore mailbox registers 461 * 462 * @ring - ring that is adding a request 463 * @seqno - return seqno stuck into the ring 464 * 465 * Update the mailbox registers in the *other* rings with the current seqno. 466 * This acts like a signal in the canonical semaphore. 467 */ 468 static int 469 gen6_add_request(struct intel_ring_buffer *ring, 470 u32 *seqno) 471 { 472 u32 mbox1_reg; 473 u32 mbox2_reg; 474 int ret; 475 476 ret = intel_ring_begin(ring, 10); 477 if (ret) 478 return ret; 479 480 mbox1_reg = ring->signal_mbox[0]; 481 mbox2_reg = ring->signal_mbox[1]; 482 483 *seqno = i915_gem_next_request_seqno(ring); 484 485 update_mboxes(ring, *seqno, mbox1_reg); 486 update_mboxes(ring, *seqno, mbox2_reg); 487 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 488 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 489 intel_ring_emit(ring, *seqno); 490 intel_ring_emit(ring, MI_USER_INTERRUPT); 491 intel_ring_advance(ring); 492 493 return 0; 494 } 495 496 /** 497 * intel_ring_sync - sync the waiter to the signaller on seqno 498 * 499 * @waiter - ring that is waiting 500 * @signaller - ring which has, or will signal 501 * @seqno - seqno which the waiter will block on 502 */ 503 static int 504 intel_ring_sync(struct intel_ring_buffer *waiter, 505 struct intel_ring_buffer *signaller, 506 int ring, 507 u32 seqno) 508 { 509 int ret; 510 u32 dw1 = MI_SEMAPHORE_MBOX | 511 MI_SEMAPHORE_COMPARE | 512 MI_SEMAPHORE_REGISTER; 513 514 ret = intel_ring_begin(waiter, 4); 515 if (ret) 516 return ret; 517 518 intel_ring_emit(waiter, dw1 | signaller->semaphore_register[ring]); 519 intel_ring_emit(waiter, seqno); 520 intel_ring_emit(waiter, 0); 521 intel_ring_emit(waiter, MI_NOOP); 522 intel_ring_advance(waiter); 523 524 return 0; 525 } 526 527 int render_ring_sync_to(struct intel_ring_buffer *waiter, 528 struct intel_ring_buffer *signaller, u32 seqno); 529 int gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter, 530 struct intel_ring_buffer *signaller, u32 seqno); 531 int gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter, 532 struct intel_ring_buffer *signaller, u32 seqno); 533 534 /* VCS->RCS (RVSYNC) or BCS->RCS (RBSYNC) */ 535 int 536 render_ring_sync_to(struct intel_ring_buffer *waiter, 537 struct intel_ring_buffer *signaller, 538 u32 seqno) 539 { 540 KASSERT(signaller->semaphore_register[RCS] != MI_SEMAPHORE_SYNC_INVALID, 541 ("valid RCS semaphore")); 542 return intel_ring_sync(waiter, 543 signaller, 544 RCS, 545 seqno); 546 } 547 548 /* RCS->VCS (VRSYNC) or BCS->VCS (VBSYNC) */ 549 int 550 gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter, 551 struct intel_ring_buffer *signaller, 552 u32 seqno) 553 { 554 KASSERT(signaller->semaphore_register[VCS] != MI_SEMAPHORE_SYNC_INVALID, 555 ("Valid VCS semaphore")); 556 return intel_ring_sync(waiter, 557 signaller, 558 VCS, 559 seqno); 560 } 561 562 /* RCS->BCS (BRSYNC) or VCS->BCS (BVSYNC) */ 563 int 564 gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter, 565 struct intel_ring_buffer *signaller, 566 u32 seqno) 567 { 568 KASSERT(signaller->semaphore_register[BCS] != MI_SEMAPHORE_SYNC_INVALID, 569 ("Valid BCS semaphore")); 570 return intel_ring_sync(waiter, 571 signaller, 572 BCS, 573 seqno); 574 } 575 576 #define PIPE_CONTROL_FLUSH(ring__, addr__) \ 577 do { \ 578 intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | \ 579 PIPE_CONTROL_DEPTH_STALL); \ 580 intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT); \ 581 intel_ring_emit(ring__, 0); \ 582 intel_ring_emit(ring__, 0); \ 583 } while (0) 584 585 static int 586 pc_render_add_request(struct intel_ring_buffer *ring, 587 uint32_t *result) 588 { 589 u32 seqno = i915_gem_next_request_seqno(ring); 590 struct pipe_control *pc = ring->private; 591 u32 scratch_addr = pc->gtt_offset + 128; 592 int ret; 593 594 /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently 595 * incoherent with writes to memory, i.e. completely fubar, 596 * so we need to use PIPE_NOTIFY instead. 597 * 598 * However, we also need to workaround the qword write 599 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to 600 * memory before requesting an interrupt. 601 */ 602 ret = intel_ring_begin(ring, 32); 603 if (ret) 604 return ret; 605 606 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | 607 PIPE_CONTROL_WRITE_FLUSH | 608 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); 609 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 610 intel_ring_emit(ring, seqno); 611 intel_ring_emit(ring, 0); 612 PIPE_CONTROL_FLUSH(ring, scratch_addr); 613 scratch_addr += 128; /* write to separate cachelines */ 614 PIPE_CONTROL_FLUSH(ring, scratch_addr); 615 scratch_addr += 128; 616 PIPE_CONTROL_FLUSH(ring, scratch_addr); 617 scratch_addr += 128; 618 PIPE_CONTROL_FLUSH(ring, scratch_addr); 619 scratch_addr += 128; 620 PIPE_CONTROL_FLUSH(ring, scratch_addr); 621 scratch_addr += 128; 622 PIPE_CONTROL_FLUSH(ring, scratch_addr); 623 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | 624 PIPE_CONTROL_WRITE_FLUSH | 625 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 626 PIPE_CONTROL_NOTIFY); 627 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 628 intel_ring_emit(ring, seqno); 629 intel_ring_emit(ring, 0); 630 intel_ring_advance(ring); 631 632 *result = seqno; 633 return 0; 634 } 635 636 static int 637 render_ring_add_request(struct intel_ring_buffer *ring, 638 uint32_t *result) 639 { 640 u32 seqno = i915_gem_next_request_seqno(ring); 641 int ret; 642 643 ret = intel_ring_begin(ring, 4); 644 if (ret) 645 return ret; 646 647 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 648 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 649 intel_ring_emit(ring, seqno); 650 intel_ring_emit(ring, MI_USER_INTERRUPT); 651 intel_ring_advance(ring); 652 653 *result = seqno; 654 return 0; 655 } 656 657 static u32 658 gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) 659 { 660 /* Workaround to force correct ordering between irq and seqno writes on 661 * ivb (and maybe also on snb) by reading from a CS register (like 662 * ACTHD) before reading the status page. */ 663 if (!lazy_coherency) 664 intel_ring_get_active_head(ring); 665 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 666 } 667 668 static u32 669 ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) 670 { 671 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 672 } 673 674 static u32 675 pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) 676 { 677 struct pipe_control *pc = ring->private; 678 return pc->cpu_page[0]; 679 } 680 681 static void 682 ironlake_enable_irq(drm_i915_private_t *dev_priv, uint32_t mask) 683 { 684 dev_priv->gt_irq_mask &= ~mask; 685 I915_WRITE(GTIMR, dev_priv->gt_irq_mask); 686 POSTING_READ(GTIMR); 687 } 688 689 static void 690 ironlake_disable_irq(drm_i915_private_t *dev_priv, uint32_t mask) 691 { 692 dev_priv->gt_irq_mask |= mask; 693 I915_WRITE(GTIMR, dev_priv->gt_irq_mask); 694 POSTING_READ(GTIMR); 695 } 696 697 static void 698 i915_enable_irq(drm_i915_private_t *dev_priv, uint32_t mask) 699 { 700 dev_priv->irq_mask &= ~mask; 701 I915_WRITE(IMR, dev_priv->irq_mask); 702 POSTING_READ(IMR); 703 } 704 705 static void 706 i915_disable_irq(drm_i915_private_t *dev_priv, uint32_t mask) 707 { 708 dev_priv->irq_mask |= mask; 709 I915_WRITE(IMR, dev_priv->irq_mask); 710 POSTING_READ(IMR); 711 } 712 713 static bool 714 render_ring_get_irq(struct intel_ring_buffer *ring) 715 { 716 struct drm_device *dev = ring->dev; 717 drm_i915_private_t *dev_priv = dev->dev_private; 718 719 if (!dev->irq_enabled) 720 return false; 721 722 KKASSERT(lockstatus(&ring->irq_lock, curthread) != 0); 723 if (ring->irq_refcount++ == 0) { 724 if (HAS_PCH_SPLIT(dev)) 725 ironlake_enable_irq(dev_priv, 726 GT_PIPE_NOTIFY | GT_USER_INTERRUPT); 727 else 728 i915_enable_irq(dev_priv, I915_USER_INTERRUPT); 729 } 730 731 return true; 732 } 733 734 static void 735 render_ring_put_irq(struct intel_ring_buffer *ring) 736 { 737 struct drm_device *dev = ring->dev; 738 drm_i915_private_t *dev_priv = dev->dev_private; 739 740 KKASSERT(lockstatus(&ring->irq_lock, curthread) != 0); 741 if (--ring->irq_refcount == 0) { 742 if (HAS_PCH_SPLIT(dev)) 743 ironlake_disable_irq(dev_priv, 744 GT_USER_INTERRUPT | 745 GT_PIPE_NOTIFY); 746 else 747 i915_disable_irq(dev_priv, I915_USER_INTERRUPT); 748 } 749 } 750 751 void intel_ring_setup_status_page(struct intel_ring_buffer *ring) 752 { 753 struct drm_device *dev = ring->dev; 754 drm_i915_private_t *dev_priv = dev->dev_private; 755 uint32_t mmio = 0; 756 757 /* The ring status page addresses are no longer next to the rest of 758 * the ring registers as of gen7. 759 */ 760 if (IS_GEN7(dev)) { 761 switch (ring->id) { 762 case RCS: 763 mmio = RENDER_HWS_PGA_GEN7; 764 break; 765 case BCS: 766 mmio = BLT_HWS_PGA_GEN7; 767 break; 768 case VCS: 769 mmio = BSD_HWS_PGA_GEN7; 770 break; 771 } 772 } else if (IS_GEN6(dev)) { 773 mmio = RING_HWS_PGA_GEN6(ring->mmio_base); 774 } else { 775 mmio = RING_HWS_PGA(ring->mmio_base); 776 } 777 778 I915_WRITE(mmio, (u32)ring->status_page.gfx_addr); 779 POSTING_READ(mmio); 780 } 781 782 static int 783 bsd_ring_flush(struct intel_ring_buffer *ring, 784 uint32_t invalidate_domains, 785 uint32_t flush_domains) 786 { 787 int ret; 788 789 ret = intel_ring_begin(ring, 2); 790 if (ret) 791 return ret; 792 793 intel_ring_emit(ring, MI_FLUSH); 794 intel_ring_emit(ring, MI_NOOP); 795 intel_ring_advance(ring); 796 return 0; 797 } 798 799 static int 800 ring_add_request(struct intel_ring_buffer *ring, 801 uint32_t *result) 802 { 803 uint32_t seqno; 804 int ret; 805 806 ret = intel_ring_begin(ring, 4); 807 if (ret) 808 return ret; 809 810 seqno = i915_gem_next_request_seqno(ring); 811 812 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 813 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 814 intel_ring_emit(ring, seqno); 815 intel_ring_emit(ring, MI_USER_INTERRUPT); 816 intel_ring_advance(ring); 817 818 *result = seqno; 819 return 0; 820 } 821 822 static bool 823 gen6_ring_get_irq(struct intel_ring_buffer *ring, uint32_t gflag, uint32_t rflag) 824 { 825 struct drm_device *dev = ring->dev; 826 drm_i915_private_t *dev_priv = dev->dev_private; 827 828 if (!dev->irq_enabled) 829 return false; 830 831 gen6_gt_force_wake_get(dev_priv); 832 833 KKASSERT(lockstatus(&ring->irq_lock, curthread) != 0); 834 if (ring->irq_refcount++ == 0) { 835 ring->irq_mask &= ~rflag; 836 I915_WRITE_IMR(ring, ring->irq_mask); 837 ironlake_enable_irq(dev_priv, gflag); 838 } 839 840 return true; 841 } 842 843 static void 844 gen6_ring_put_irq(struct intel_ring_buffer *ring, uint32_t gflag, uint32_t rflag) 845 { 846 struct drm_device *dev = ring->dev; 847 drm_i915_private_t *dev_priv = dev->dev_private; 848 849 KKASSERT(lockstatus(&ring->irq_lock, curthread) != 0); 850 if (--ring->irq_refcount == 0) { 851 ring->irq_mask |= rflag; 852 I915_WRITE_IMR(ring, ring->irq_mask); 853 ironlake_disable_irq(dev_priv, gflag); 854 } 855 856 gen6_gt_force_wake_put(dev_priv); 857 } 858 859 static bool 860 bsd_ring_get_irq(struct intel_ring_buffer *ring) 861 { 862 struct drm_device *dev = ring->dev; 863 drm_i915_private_t *dev_priv = dev->dev_private; 864 865 if (!dev->irq_enabled) 866 return false; 867 868 KKASSERT(lockstatus(&ring->irq_lock, curthread) != 0); 869 if (ring->irq_refcount++ == 0) { 870 if (IS_G4X(dev)) 871 i915_enable_irq(dev_priv, I915_BSD_USER_INTERRUPT); 872 else 873 ironlake_enable_irq(dev_priv, GT_BSD_USER_INTERRUPT); 874 } 875 876 return true; 877 } 878 static void 879 bsd_ring_put_irq(struct intel_ring_buffer *ring) 880 { 881 struct drm_device *dev = ring->dev; 882 drm_i915_private_t *dev_priv = dev->dev_private; 883 884 KKASSERT(lockstatus(&ring->irq_lock, curthread) != 0); 885 if (--ring->irq_refcount == 0) { 886 if (IS_G4X(dev)) 887 i915_disable_irq(dev_priv, I915_BSD_USER_INTERRUPT); 888 else 889 ironlake_disable_irq(dev_priv, GT_BSD_USER_INTERRUPT); 890 } 891 } 892 893 static int 894 ring_dispatch_execbuffer(struct intel_ring_buffer *ring, uint32_t offset, 895 uint32_t length) 896 { 897 int ret; 898 899 ret = intel_ring_begin(ring, 2); 900 if (ret) 901 return ret; 902 903 intel_ring_emit(ring, 904 MI_BATCH_BUFFER_START | (2 << 6) | 905 MI_BATCH_NON_SECURE_I965); 906 intel_ring_emit(ring, offset); 907 intel_ring_advance(ring); 908 909 return 0; 910 } 911 912 static int 913 render_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, 914 uint32_t offset, uint32_t len) 915 { 916 struct drm_device *dev = ring->dev; 917 int ret; 918 919 if (IS_I830(dev) || IS_845G(dev)) { 920 ret = intel_ring_begin(ring, 4); 921 if (ret) 922 return ret; 923 924 intel_ring_emit(ring, MI_BATCH_BUFFER); 925 intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE); 926 intel_ring_emit(ring, offset + len - 8); 927 intel_ring_emit(ring, 0); 928 } else { 929 ret = intel_ring_begin(ring, 2); 930 if (ret) 931 return ret; 932 933 if (INTEL_INFO(dev)->gen >= 4) { 934 intel_ring_emit(ring, 935 MI_BATCH_BUFFER_START | (2 << 6) | 936 MI_BATCH_NON_SECURE_I965); 937 intel_ring_emit(ring, offset); 938 } else { 939 intel_ring_emit(ring, 940 MI_BATCH_BUFFER_START | (2 << 6)); 941 intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE); 942 } 943 } 944 intel_ring_advance(ring); 945 946 return 0; 947 } 948 949 static void cleanup_status_page(struct intel_ring_buffer *ring) 950 { 951 drm_i915_private_t *dev_priv = ring->dev->dev_private; 952 struct drm_i915_gem_object *obj; 953 954 obj = ring->status_page.obj; 955 if (obj == NULL) 956 return; 957 958 pmap_qremove((vm_offset_t)ring->status_page.page_addr, 1); 959 kmem_free(&kernel_map, (vm_offset_t)ring->status_page.page_addr, 960 PAGE_SIZE); 961 i915_gem_object_unpin(obj); 962 drm_gem_object_unreference(&obj->base); 963 ring->status_page.obj = NULL; 964 965 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 966 } 967 968 static int init_status_page(struct intel_ring_buffer *ring) 969 { 970 struct drm_device *dev = ring->dev; 971 drm_i915_private_t *dev_priv = dev->dev_private; 972 struct drm_i915_gem_object *obj; 973 int ret; 974 975 obj = i915_gem_alloc_object(dev, 4096); 976 if (obj == NULL) { 977 DRM_ERROR("Failed to allocate status page\n"); 978 ret = -ENOMEM; 979 goto err; 980 } 981 982 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 983 984 ret = i915_gem_object_pin(obj, 4096, true); 985 if (ret != 0) { 986 goto err_unref; 987 } 988 989 ring->status_page.gfx_addr = obj->gtt_offset; 990 ring->status_page.page_addr = (void *)kmem_alloc_nofault(&kernel_map, 991 PAGE_SIZE, PAGE_SIZE); 992 if (ring->status_page.page_addr == NULL) { 993 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 994 goto err_unpin; 995 } 996 pmap_qenter((vm_offset_t)ring->status_page.page_addr, &obj->pages[0], 997 1); 998 pmap_invalidate_cache_range((vm_offset_t)ring->status_page.page_addr, 999 (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE); 1000 ring->status_page.obj = obj; 1001 memset(ring->status_page.page_addr, 0, PAGE_SIZE); 1002 1003 intel_ring_setup_status_page(ring); 1004 DRM_DEBUG("i915: init_status_page %s hws offset: 0x%08x\n", 1005 ring->name, ring->status_page.gfx_addr); 1006 1007 return 0; 1008 1009 err_unpin: 1010 i915_gem_object_unpin(obj); 1011 err_unref: 1012 drm_gem_object_unreference(&obj->base); 1013 err: 1014 return ret; 1015 } 1016 1017 static int intel_init_ring_buffer(struct drm_device *dev, 1018 struct intel_ring_buffer *ring) 1019 { 1020 struct drm_i915_gem_object *obj; 1021 int ret; 1022 1023 ring->dev = dev; 1024 INIT_LIST_HEAD(&ring->active_list); 1025 INIT_LIST_HEAD(&ring->request_list); 1026 INIT_LIST_HEAD(&ring->gpu_write_list); 1027 1028 lockinit(&ring->irq_lock, "ringb", 0, LK_CANRECURSE); 1029 ring->irq_mask = ~0; 1030 1031 init_waitqueue_head(&ring->irq_queue); 1032 1033 if (I915_NEED_GFX_HWS(dev)) { 1034 ret = init_status_page(ring); 1035 if (ret) 1036 return ret; 1037 } 1038 1039 obj = i915_gem_alloc_object(dev, ring->size); 1040 if (obj == NULL) { 1041 DRM_ERROR("Failed to allocate ringbuffer\n"); 1042 ret = -ENOMEM; 1043 goto err_hws; 1044 } 1045 1046 ring->obj = obj; 1047 1048 ret = i915_gem_object_pin(obj, PAGE_SIZE, true); 1049 if (ret) 1050 goto err_unref; 1051 1052 ring->map.size = ring->size; 1053 ring->map.offset = dev->agp->base + obj->gtt_offset; 1054 ring->map.type = 0; 1055 ring->map.flags = 0; 1056 ring->map.mtrr = 0; 1057 1058 drm_core_ioremap_wc(&ring->map, dev); 1059 if (ring->map.virtual == NULL) { 1060 DRM_ERROR("Failed to map ringbuffer.\n"); 1061 ret = -EINVAL; 1062 goto err_unpin; 1063 } 1064 1065 ring->virtual_start = ring->map.virtual; 1066 ret = ring->init(ring); 1067 if (ret) 1068 goto err_unmap; 1069 1070 /* Workaround an erratum on the i830 which causes a hang if 1071 * the TAIL pointer points to within the last 2 cachelines 1072 * of the buffer. 1073 */ 1074 ring->effective_size = ring->size; 1075 if (IS_I830(ring->dev) || IS_845G(ring->dev)) 1076 ring->effective_size -= 128; 1077 1078 return 0; 1079 1080 err_unmap: 1081 drm_core_ioremapfree(&ring->map, dev); 1082 err_unpin: 1083 i915_gem_object_unpin(obj); 1084 err_unref: 1085 drm_gem_object_unreference(&obj->base); 1086 ring->obj = NULL; 1087 err_hws: 1088 cleanup_status_page(ring); 1089 return ret; 1090 } 1091 1092 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring) 1093 { 1094 struct drm_i915_private *dev_priv; 1095 int ret; 1096 1097 if (ring->obj == NULL) 1098 return; 1099 1100 /* Disable the ring buffer. The ring must be idle at this point */ 1101 dev_priv = ring->dev->dev_private; 1102 ret = intel_ring_idle(ring); 1103 if (ret) 1104 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", 1105 ring->name, ret); 1106 1107 I915_WRITE_CTL(ring, 0); 1108 1109 drm_core_ioremapfree(&ring->map, ring->dev); 1110 1111 i915_gem_object_unpin(ring->obj); 1112 drm_gem_object_unreference(&ring->obj->base); 1113 ring->obj = NULL; 1114 1115 if (ring->cleanup) 1116 ring->cleanup(ring); 1117 1118 cleanup_status_page(ring); 1119 } 1120 1121 static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno) 1122 { 1123 int ret; 1124 1125 ret = i915_wait_seqno(ring, seqno); 1126 if (!ret) 1127 i915_gem_retire_requests_ring(ring); 1128 1129 return ret; 1130 } 1131 1132 static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n) 1133 { 1134 struct drm_i915_gem_request *request; 1135 u32 seqno = 0; 1136 int ret; 1137 1138 i915_gem_retire_requests_ring(ring); 1139 1140 if (ring->last_retired_head != -1) { 1141 ring->head = ring->last_retired_head; 1142 ring->last_retired_head = -1; 1143 ring->space = ring_space(ring); 1144 if (ring->space >= n) 1145 return 0; 1146 } 1147 1148 list_for_each_entry(request, &ring->request_list, list) { 1149 int space; 1150 1151 if (request->tail == -1) 1152 continue; 1153 1154 space = request->tail - (ring->tail + 8); 1155 if (space < 0) 1156 space += ring->size; 1157 if (space >= n) { 1158 seqno = request->seqno; 1159 break; 1160 } 1161 1162 /* Consume this request in case we need more space than 1163 * is available and so need to prevent a race between 1164 * updating last_retired_head and direct reads of 1165 * I915_RING_HEAD. It also provides a nice sanity check. 1166 */ 1167 request->tail = -1; 1168 } 1169 1170 if (seqno == 0) 1171 return -ENOSPC; 1172 1173 ret = intel_ring_wait_seqno(ring, seqno); 1174 if (ret) 1175 return ret; 1176 1177 if (ring->last_retired_head == -1) 1178 return -ENOSPC; 1179 1180 ring->head = ring->last_retired_head; 1181 ring->last_retired_head = -1; 1182 ring->space = ring_space(ring); 1183 if (ring->space < n) 1184 return -ENOSPC; 1185 1186 return 0; 1187 } 1188 1189 static int ring_wait_for_space(struct intel_ring_buffer *ring, int n) 1190 { 1191 struct drm_device *dev = ring->dev; 1192 struct drm_i915_private *dev_priv = dev->dev_private; 1193 int end; 1194 int ret; 1195 1196 ret = intel_ring_wait_request(ring, n); 1197 if (ret != -ENOSPC) 1198 return ret; 1199 1200 if (drm_core_check_feature(dev, DRIVER_GEM)) 1201 /* With GEM the hangcheck timer should kick us out of the loop, 1202 * leaving it early runs the risk of corrupting GEM state (due 1203 * to running on almost untested codepaths). But on resume 1204 * timers don't work yet, so prevent a complete hang in that 1205 * case by choosing an insanely large timeout. */ 1206 end = ticks + hz * 60; 1207 else 1208 end = ticks + hz * 3; 1209 do { 1210 ring->head = I915_READ_HEAD(ring); 1211 ring->space = ring_space(ring); 1212 if (ring->space >= n) { 1213 return 0; 1214 } 1215 1216 #if 0 1217 if (dev->primary->master) { 1218 struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; 1219 if (master_priv->sarea_priv) 1220 master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; 1221 } 1222 #else 1223 if (dev_priv->sarea_priv) 1224 dev_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; 1225 #endif 1226 1227 DELAY(1000); 1228 if (atomic_read(&dev_priv->mm.wedged) != 0) { 1229 return -EAGAIN; 1230 } 1231 } while (!time_after(ticks, end)); 1232 return -EBUSY; 1233 } 1234 1235 static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring) 1236 { 1237 uint32_t __iomem *virt; 1238 int rem = ring->size - ring->tail; 1239 1240 if (ring->space < rem) { 1241 int ret = ring_wait_for_space(ring, rem); 1242 if (ret) 1243 return ret; 1244 } 1245 1246 virt = (unsigned int *)((char *)ring->virtual_start + ring->tail); 1247 rem /= 4; 1248 while (rem--) 1249 *virt++ = MI_NOOP; 1250 1251 ring->tail = 0; 1252 ring->space = ring_space(ring); 1253 1254 return 0; 1255 } 1256 1257 int intel_ring_idle(struct intel_ring_buffer *ring) 1258 { 1259 return ring_wait_for_space(ring, ring->size - 8); 1260 } 1261 1262 int intel_ring_begin(struct intel_ring_buffer *ring, 1263 int num_dwords) 1264 { 1265 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1266 int n = 4*num_dwords; 1267 int ret; 1268 1269 if (atomic_read(&dev_priv->mm.wedged)) 1270 return -EIO; 1271 1272 if (ring->tail + n > ring->effective_size) { 1273 ret = intel_wrap_ring_buffer(ring); 1274 if (ret != 0) 1275 return ret; 1276 } 1277 1278 if (unlikely(ring->space < n)) { 1279 ret = ring_wait_for_space(ring, n); 1280 if (unlikely(ret)) 1281 return ret; 1282 } 1283 1284 ring->space -= n; 1285 return 0; 1286 } 1287 1288 void intel_ring_advance(struct intel_ring_buffer *ring) 1289 { 1290 ring->tail &= ring->size - 1; 1291 ring->write_tail(ring, ring->tail); 1292 } 1293 1294 static const struct intel_ring_buffer render_ring = { 1295 .name = "render ring", 1296 .id = RCS, 1297 .mmio_base = RENDER_RING_BASE, 1298 .size = 32 * PAGE_SIZE, 1299 .init = init_render_ring, 1300 .write_tail = ring_write_tail, 1301 .flush = render_ring_flush, 1302 .add_request = render_ring_add_request, 1303 .get_seqno = ring_get_seqno, 1304 .irq_get = render_ring_get_irq, 1305 .irq_put = render_ring_put_irq, 1306 .dispatch_execbuffer = render_ring_dispatch_execbuffer, 1307 .cleanup = render_ring_cleanup, 1308 .sync_to = render_ring_sync_to, 1309 .semaphore_register = {MI_SEMAPHORE_SYNC_INVALID, 1310 MI_SEMAPHORE_SYNC_RV, 1311 MI_SEMAPHORE_SYNC_RB}, 1312 .signal_mbox = {GEN6_VRSYNC, GEN6_BRSYNC}, 1313 }; 1314 1315 /* ring buffer for bit-stream decoder */ 1316 1317 static const struct intel_ring_buffer bsd_ring = { 1318 .name = "bsd ring", 1319 .id = VCS, 1320 .mmio_base = BSD_RING_BASE, 1321 .size = 32 * PAGE_SIZE, 1322 .init = init_ring_common, 1323 .write_tail = ring_write_tail, 1324 .flush = bsd_ring_flush, 1325 .add_request = ring_add_request, 1326 .get_seqno = ring_get_seqno, 1327 .irq_get = bsd_ring_get_irq, 1328 .irq_put = bsd_ring_put_irq, 1329 .dispatch_execbuffer = ring_dispatch_execbuffer, 1330 }; 1331 1332 1333 static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring, 1334 u32 value) 1335 { 1336 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1337 1338 /* Every tail move must follow the sequence below */ 1339 1340 /* Disable notification that the ring is IDLE. The GT 1341 * will then assume that it is busy and bring it out of rc6. 1342 */ 1343 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, 1344 _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 1345 1346 /* Clear the context id. Here be magic! */ 1347 I915_WRITE64(GEN6_BSD_RNCID, 0x0); 1348 1349 /* Wait for the ring not to be idle, i.e. for it to wake up. */ 1350 if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) & 1351 GEN6_BSD_SLEEP_INDICATOR) == 0, 1352 50)) 1353 DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); 1354 1355 /* Now that the ring is fully powered up, update the tail */ 1356 I915_WRITE_TAIL(ring, value); 1357 POSTING_READ(RING_TAIL(ring->mmio_base)); 1358 1359 /* Let the ring send IDLE messages to the GT again, 1360 * and so let it sleep to conserve power when idle. 1361 */ 1362 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, 1363 _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 1364 } 1365 1366 static int gen6_ring_flush(struct intel_ring_buffer *ring, 1367 uint32_t invalidate, uint32_t flush) 1368 { 1369 uint32_t cmd; 1370 int ret; 1371 1372 ret = intel_ring_begin(ring, 4); 1373 if (ret) 1374 return ret; 1375 1376 cmd = MI_FLUSH_DW; 1377 if (invalidate & I915_GEM_GPU_DOMAINS) 1378 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; 1379 intel_ring_emit(ring, cmd); 1380 intel_ring_emit(ring, 0); 1381 intel_ring_emit(ring, 0); 1382 intel_ring_emit(ring, MI_NOOP); 1383 intel_ring_advance(ring); 1384 return 0; 1385 } 1386 1387 static int 1388 gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, 1389 uint32_t offset, uint32_t len) 1390 { 1391 int ret; 1392 1393 ret = intel_ring_begin(ring, 2); 1394 if (ret) 1395 return ret; 1396 1397 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965); 1398 /* bit0-7 is the length on GEN6+ */ 1399 intel_ring_emit(ring, offset); 1400 intel_ring_advance(ring); 1401 1402 return 0; 1403 } 1404 1405 static bool 1406 gen6_render_ring_get_irq(struct intel_ring_buffer *ring) 1407 { 1408 return gen6_ring_get_irq(ring, 1409 GT_USER_INTERRUPT, 1410 GEN6_RENDER_USER_INTERRUPT); 1411 } 1412 1413 static void 1414 gen6_render_ring_put_irq(struct intel_ring_buffer *ring) 1415 { 1416 return gen6_ring_put_irq(ring, 1417 GT_USER_INTERRUPT, 1418 GEN6_RENDER_USER_INTERRUPT); 1419 } 1420 1421 static bool 1422 gen6_bsd_ring_get_irq(struct intel_ring_buffer *ring) 1423 { 1424 return gen6_ring_get_irq(ring, 1425 GT_GEN6_BSD_USER_INTERRUPT, 1426 GEN6_BSD_USER_INTERRUPT); 1427 } 1428 1429 static void 1430 gen6_bsd_ring_put_irq(struct intel_ring_buffer *ring) 1431 { 1432 return gen6_ring_put_irq(ring, 1433 GT_GEN6_BSD_USER_INTERRUPT, 1434 GEN6_BSD_USER_INTERRUPT); 1435 } 1436 1437 /* ring buffer for Video Codec for Gen6+ */ 1438 static const struct intel_ring_buffer gen6_bsd_ring = { 1439 .name = "gen6 bsd ring", 1440 .id = VCS, 1441 .mmio_base = GEN6_BSD_RING_BASE, 1442 .size = 32 * PAGE_SIZE, 1443 .init = init_ring_common, 1444 .write_tail = gen6_bsd_ring_write_tail, 1445 .flush = gen6_ring_flush, 1446 .add_request = gen6_add_request, 1447 .get_seqno = gen6_ring_get_seqno, 1448 .irq_get = gen6_bsd_ring_get_irq, 1449 .irq_put = gen6_bsd_ring_put_irq, 1450 .dispatch_execbuffer = gen6_ring_dispatch_execbuffer, 1451 .sync_to = gen6_bsd_ring_sync_to, 1452 .semaphore_register = {MI_SEMAPHORE_SYNC_VR, 1453 MI_SEMAPHORE_SYNC_INVALID, 1454 MI_SEMAPHORE_SYNC_VB}, 1455 .signal_mbox = {GEN6_RVSYNC, GEN6_BVSYNC}, 1456 }; 1457 1458 /* Blitter support (SandyBridge+) */ 1459 1460 static bool 1461 blt_ring_get_irq(struct intel_ring_buffer *ring) 1462 { 1463 return gen6_ring_get_irq(ring, 1464 GT_GEN6_BLT_USER_INTERRUPT, 1465 GEN6_BLITTER_USER_INTERRUPT); 1466 } 1467 1468 static void 1469 blt_ring_put_irq(struct intel_ring_buffer *ring) 1470 { 1471 gen6_ring_put_irq(ring, 1472 GT_GEN6_BLT_USER_INTERRUPT, 1473 GEN6_BLITTER_USER_INTERRUPT); 1474 } 1475 1476 static int blt_ring_flush(struct intel_ring_buffer *ring, 1477 uint32_t invalidate, uint32_t flush) 1478 { 1479 uint32_t cmd; 1480 int ret; 1481 1482 ret = intel_ring_begin(ring, 4); 1483 if (ret) 1484 return ret; 1485 1486 cmd = MI_FLUSH_DW; 1487 if (invalidate & I915_GEM_DOMAIN_RENDER) 1488 cmd |= MI_INVALIDATE_TLB; 1489 intel_ring_emit(ring, cmd); 1490 intel_ring_emit(ring, 0); 1491 intel_ring_emit(ring, 0); 1492 intel_ring_emit(ring, MI_NOOP); 1493 intel_ring_advance(ring); 1494 return 0; 1495 } 1496 1497 static const struct intel_ring_buffer gen6_blt_ring = { 1498 .name = "blt ring", 1499 .id = BCS, 1500 .mmio_base = BLT_RING_BASE, 1501 .size = 32 * PAGE_SIZE, 1502 .init = init_ring_common, 1503 .write_tail = ring_write_tail, 1504 .flush = blt_ring_flush, 1505 .add_request = gen6_add_request, 1506 .get_seqno = gen6_ring_get_seqno, 1507 .irq_get = blt_ring_get_irq, 1508 .irq_put = blt_ring_put_irq, 1509 .dispatch_execbuffer = gen6_ring_dispatch_execbuffer, 1510 .sync_to = gen6_blt_ring_sync_to, 1511 .semaphore_register = {MI_SEMAPHORE_SYNC_BR, 1512 MI_SEMAPHORE_SYNC_BV, 1513 MI_SEMAPHORE_SYNC_INVALID}, 1514 .signal_mbox = {GEN6_RBSYNC, GEN6_VBSYNC}, 1515 }; 1516 1517 int intel_init_render_ring_buffer(struct drm_device *dev) 1518 { 1519 drm_i915_private_t *dev_priv = dev->dev_private; 1520 struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; 1521 1522 *ring = render_ring; 1523 if (INTEL_INFO(dev)->gen >= 6) { 1524 ring->add_request = gen6_add_request; 1525 ring->flush = gen6_render_ring_flush; 1526 ring->irq_get = gen6_render_ring_get_irq; 1527 ring->irq_put = gen6_render_ring_put_irq; 1528 ring->get_seqno = gen6_ring_get_seqno; 1529 } else if (IS_GEN5(dev)) { 1530 ring->add_request = pc_render_add_request; 1531 ring->get_seqno = pc_render_get_seqno; 1532 } 1533 1534 if (!I915_NEED_GFX_HWS(dev)) { 1535 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr; 1536 memset(ring->status_page.page_addr, 0, PAGE_SIZE); 1537 } 1538 1539 return intel_init_ring_buffer(dev, ring); 1540 } 1541 1542 int intel_render_ring_init_dri(struct drm_device *dev, uint64_t start, 1543 uint32_t size) 1544 { 1545 drm_i915_private_t *dev_priv = dev->dev_private; 1546 struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; 1547 1548 *ring = render_ring; 1549 if (INTEL_INFO(dev)->gen >= 6) { 1550 ring->add_request = gen6_add_request; 1551 ring->irq_get = gen6_render_ring_get_irq; 1552 ring->irq_put = gen6_render_ring_put_irq; 1553 } else if (IS_GEN5(dev)) { 1554 ring->add_request = pc_render_add_request; 1555 ring->get_seqno = pc_render_get_seqno; 1556 } 1557 1558 ring->dev = dev; 1559 INIT_LIST_HEAD(&ring->active_list); 1560 INIT_LIST_HEAD(&ring->request_list); 1561 INIT_LIST_HEAD(&ring->gpu_write_list); 1562 1563 ring->size = size; 1564 ring->effective_size = ring->size; 1565 if (IS_I830(ring->dev)) 1566 ring->effective_size -= 128; 1567 1568 ring->map.offset = start; 1569 ring->map.size = size; 1570 ring->map.type = 0; 1571 ring->map.flags = 0; 1572 ring->map.mtrr = 0; 1573 1574 drm_core_ioremap_wc(&ring->map, dev); 1575 if (ring->map.virtual == NULL) { 1576 DRM_ERROR("can not ioremap virtual address for" 1577 " ring buffer\n"); 1578 return -ENOMEM; 1579 } 1580 1581 ring->virtual_start = (void *)ring->map.virtual; 1582 return 0; 1583 } 1584 1585 int intel_init_bsd_ring_buffer(struct drm_device *dev) 1586 { 1587 drm_i915_private_t *dev_priv = dev->dev_private; 1588 struct intel_ring_buffer *ring = &dev_priv->ring[VCS]; 1589 1590 if (IS_GEN6(dev) || IS_GEN7(dev)) 1591 *ring = gen6_bsd_ring; 1592 else 1593 *ring = bsd_ring; 1594 1595 return intel_init_ring_buffer(dev, ring); 1596 } 1597 1598 int intel_init_blt_ring_buffer(struct drm_device *dev) 1599 { 1600 drm_i915_private_t *dev_priv = dev->dev_private; 1601 struct intel_ring_buffer *ring = &dev_priv->ring[BCS]; 1602 1603 *ring = gen6_blt_ring; 1604 1605 return intel_init_ring_buffer(dev, ring); 1606 } 1607 1608 int 1609 intel_ring_flush_all_caches(struct intel_ring_buffer *ring) 1610 { 1611 int ret; 1612 1613 if (!ring->gpu_caches_dirty) 1614 return 0; 1615 1616 ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS); 1617 if (ret) 1618 return ret; 1619 1620 ring->gpu_caches_dirty = false; 1621 return 0; 1622 } 1623 1624 int 1625 intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring) 1626 { 1627 uint32_t flush_domains; 1628 int ret; 1629 1630 flush_domains = 0; 1631 if (ring->gpu_caches_dirty) 1632 flush_domains = I915_GEM_GPU_DOMAINS; 1633 1634 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); 1635 if (ret) 1636 return ret; 1637 1638 ring->gpu_caches_dirty = false; 1639 return 0; 1640 } 1641