1 /* 2 * Copyright © 2008-2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Zou Nan hai <nanhai.zou@intel.com> 26 * Xiang Hai hao<haihao.xiang@intel.com> 27 * 28 * $FreeBSD: head/sys/dev/drm2/i915/intel_ringbuffer.c 253709 2013-07-27 16:42:29Z kib $ 29 */ 30 31 #include <drm/drmP.h> 32 #include <drm/i915_drm.h> 33 #include "i915_drv.h" 34 #include "intel_drv.h" 35 #include "intel_ringbuffer.h" 36 #include <sys/sched.h> 37 38 /* 39 * 965+ support PIPE_CONTROL commands, which provide finer grained control 40 * over cache flushing. 41 */ 42 struct pipe_control { 43 struct drm_i915_gem_object *obj; 44 volatile u32 *cpu_page; 45 u32 gtt_offset; 46 }; 47 48 static inline int ring_space(struct intel_ring_buffer *ring) 49 { 50 int space = (ring->head & HEAD_ADDR) - (ring->tail + 8); 51 if (space < 0) 52 space += ring->size; 53 return space; 54 } 55 56 static int 57 render_ring_flush(struct intel_ring_buffer *ring, 58 uint32_t invalidate_domains, 59 uint32_t flush_domains) 60 { 61 struct drm_device *dev = ring->dev; 62 uint32_t cmd; 63 int ret; 64 65 /* 66 * read/write caches: 67 * 68 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 69 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 70 * also flushed at 2d versus 3d pipeline switches. 71 * 72 * read-only caches: 73 * 74 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 75 * MI_READ_FLUSH is set, and is always flushed on 965. 76 * 77 * I915_GEM_DOMAIN_COMMAND may not exist? 78 * 79 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 80 * invalidated when MI_EXE_FLUSH is set. 81 * 82 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 83 * invalidated with every MI_FLUSH. 84 * 85 * TLBs: 86 * 87 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 88 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 89 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 90 * are flushed at any MI_FLUSH. 91 */ 92 93 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 94 if ((invalidate_domains|flush_domains) & 95 I915_GEM_DOMAIN_RENDER) 96 cmd &= ~MI_NO_WRITE_FLUSH; 97 if (INTEL_INFO(dev)->gen < 4) { 98 /* 99 * On the 965, the sampler cache always gets flushed 100 * and this bit is reserved. 101 */ 102 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) 103 cmd |= MI_READ_FLUSH; 104 } 105 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) 106 cmd |= MI_EXE_FLUSH; 107 108 if (invalidate_domains & I915_GEM_DOMAIN_COMMAND && 109 (IS_G4X(dev) || IS_GEN5(dev))) 110 cmd |= MI_INVALIDATE_ISP; 111 112 ret = intel_ring_begin(ring, 2); 113 if (ret) 114 return ret; 115 116 intel_ring_emit(ring, cmd); 117 intel_ring_emit(ring, MI_NOOP); 118 intel_ring_advance(ring); 119 120 return 0; 121 } 122 123 /** 124 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for 125 * implementing two workarounds on gen6. From section 1.4.7.1 126 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: 127 * 128 * [DevSNB-C+{W/A}] Before any depth stall flush (including those 129 * produced by non-pipelined state commands), software needs to first 130 * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 131 * 0. 132 * 133 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable 134 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. 135 * 136 * And the workaround for these two requires this workaround first: 137 * 138 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 139 * BEFORE the pipe-control with a post-sync op and no write-cache 140 * flushes. 141 * 142 * And this last workaround is tricky because of the requirements on 143 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM 144 * volume 2 part 1: 145 * 146 * "1 of the following must also be set: 147 * - Render Target Cache Flush Enable ([12] of DW1) 148 * - Depth Cache Flush Enable ([0] of DW1) 149 * - Stall at Pixel Scoreboard ([1] of DW1) 150 * - Depth Stall ([13] of DW1) 151 * - Post-Sync Operation ([13] of DW1) 152 * - Notify Enable ([8] of DW1)" 153 * 154 * The cache flushes require the workaround flush that triggered this 155 * one, so we can't use it. Depth stall would trigger the same. 156 * Post-sync nonzero is what triggered this second workaround, so we 157 * can't use that one either. Notify enable is IRQs, which aren't 158 * really our business. That leaves only stall at scoreboard. 159 */ 160 static int 161 intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring) 162 { 163 struct pipe_control *pc = ring->private; 164 u32 scratch_addr = pc->gtt_offset + 128; 165 int ret; 166 167 168 ret = intel_ring_begin(ring, 6); 169 if (ret) 170 return ret; 171 172 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 173 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | 174 PIPE_CONTROL_STALL_AT_SCOREBOARD); 175 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 176 intel_ring_emit(ring, 0); /* low dword */ 177 intel_ring_emit(ring, 0); /* high dword */ 178 intel_ring_emit(ring, MI_NOOP); 179 intel_ring_advance(ring); 180 181 ret = intel_ring_begin(ring, 6); 182 if (ret) 183 return ret; 184 185 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 186 intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE); 187 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 188 intel_ring_emit(ring, 0); 189 intel_ring_emit(ring, 0); 190 intel_ring_emit(ring, MI_NOOP); 191 intel_ring_advance(ring); 192 193 return 0; 194 } 195 196 static int 197 gen6_render_ring_flush(struct intel_ring_buffer *ring, 198 u32 invalidate_domains, u32 flush_domains) 199 { 200 u32 flags = 0; 201 struct pipe_control *pc = ring->private; 202 u32 scratch_addr = pc->gtt_offset + 128; 203 int ret; 204 205 /* Force SNB workarounds for PIPE_CONTROL flushes */ 206 intel_emit_post_sync_nonzero_flush(ring); 207 208 /* Just flush everything. Experiments have shown that reducing the 209 * number of bits based on the write domains has little performance 210 * impact. 211 */ 212 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 213 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 214 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 215 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 216 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 217 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 218 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 219 220 ret = intel_ring_begin(ring, 6); 221 if (ret) 222 return ret; 223 224 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 225 intel_ring_emit(ring, flags); 226 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); 227 intel_ring_emit(ring, 0); /* lower dword */ 228 intel_ring_emit(ring, 0); /* uppwer dword */ 229 intel_ring_emit(ring, MI_NOOP); 230 intel_ring_advance(ring); 231 232 return 0; 233 } 234 235 static void ring_write_tail(struct intel_ring_buffer *ring, 236 uint32_t value) 237 { 238 drm_i915_private_t *dev_priv = ring->dev->dev_private; 239 I915_WRITE_TAIL(ring, value); 240 } 241 242 u32 intel_ring_get_active_head(struct intel_ring_buffer *ring) 243 { 244 drm_i915_private_t *dev_priv = ring->dev->dev_private; 245 uint32_t acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ? 246 RING_ACTHD(ring->mmio_base) : ACTHD; 247 248 return I915_READ(acthd_reg); 249 } 250 251 static int init_ring_common(struct intel_ring_buffer *ring) 252 { 253 drm_i915_private_t *dev_priv = ring->dev->dev_private; 254 struct drm_i915_gem_object *obj = ring->obj; 255 uint32_t head; 256 257 /* Stop the ring if it's running. */ 258 I915_WRITE_CTL(ring, 0); 259 I915_WRITE_HEAD(ring, 0); 260 ring->write_tail(ring, 0); 261 262 /* Initialize the ring. */ 263 I915_WRITE_START(ring, obj->gtt_offset); 264 head = I915_READ_HEAD(ring) & HEAD_ADDR; 265 266 /* G45 ring initialization fails to reset head to zero */ 267 if (head != 0) { 268 DRM_DEBUG("%s head not reset to zero " 269 "ctl %08x head %08x tail %08x start %08x\n", 270 ring->name, 271 I915_READ_CTL(ring), 272 I915_READ_HEAD(ring), 273 I915_READ_TAIL(ring), 274 I915_READ_START(ring)); 275 276 I915_WRITE_HEAD(ring, 0); 277 278 if (I915_READ_HEAD(ring) & HEAD_ADDR) { 279 DRM_ERROR("failed to set %s head to zero " 280 "ctl %08x head %08x tail %08x start %08x\n", 281 ring->name, 282 I915_READ_CTL(ring), 283 I915_READ_HEAD(ring), 284 I915_READ_TAIL(ring), 285 I915_READ_START(ring)); 286 } 287 } 288 289 I915_WRITE_CTL(ring, 290 ((ring->size - PAGE_SIZE) & RING_NR_PAGES) 291 | RING_VALID); 292 293 /* If the head is still not zero, the ring is dead */ 294 if (_intel_wait_for(ring->dev, 295 (I915_READ_CTL(ring) & RING_VALID) != 0 && 296 I915_READ_START(ring) == obj->gtt_offset && 297 (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 298 50, 1, "915rii")) { 299 DRM_ERROR("%s initialization failed " 300 "ctl %08x head %08x tail %08x start %08x\n", 301 ring->name, 302 I915_READ_CTL(ring), 303 I915_READ_HEAD(ring), 304 I915_READ_TAIL(ring), 305 I915_READ_START(ring)); 306 return -EIO; 307 } 308 309 if (!drm_core_check_feature(ring->dev, DRIVER_MODESET)) 310 i915_kernel_lost_context(ring->dev); 311 else { 312 ring->head = I915_READ_HEAD(ring); 313 ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR; 314 ring->space = ring_space(ring); 315 } 316 317 return 0; 318 } 319 320 static int 321 init_pipe_control(struct intel_ring_buffer *ring) 322 { 323 struct pipe_control *pc; 324 struct drm_i915_gem_object *obj; 325 int ret; 326 327 if (ring->private) 328 return 0; 329 330 pc = kmalloc(sizeof(*pc), DRM_I915_GEM, M_WAITOK); 331 if (!pc) 332 return -ENOMEM; 333 334 obj = i915_gem_alloc_object(ring->dev, 4096); 335 if (obj == NULL) { 336 DRM_ERROR("Failed to allocate seqno page\n"); 337 ret = -ENOMEM; 338 goto err; 339 } 340 341 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 342 343 ret = i915_gem_object_pin(obj, 4096, true); 344 if (ret) 345 goto err_unref; 346 347 pc->gtt_offset = obj->gtt_offset; 348 pc->cpu_page = (uint32_t *)kmem_alloc_nofault(&kernel_map, PAGE_SIZE, PAGE_SIZE); 349 if (pc->cpu_page == NULL) 350 goto err_unpin; 351 pmap_qenter((uintptr_t)pc->cpu_page, &obj->pages[0], 1); 352 pmap_invalidate_cache_range((vm_offset_t)pc->cpu_page, 353 (vm_offset_t)pc->cpu_page + PAGE_SIZE); 354 355 pc->obj = obj; 356 ring->private = pc; 357 return 0; 358 359 err_unpin: 360 i915_gem_object_unpin(obj); 361 err_unref: 362 drm_gem_object_unreference(&obj->base); 363 err: 364 drm_free(pc, DRM_I915_GEM); 365 return ret; 366 } 367 368 static void 369 cleanup_pipe_control(struct intel_ring_buffer *ring) 370 { 371 struct pipe_control *pc = ring->private; 372 struct drm_i915_gem_object *obj; 373 374 if (!ring->private) 375 return; 376 377 obj = pc->obj; 378 pmap_qremove((vm_offset_t)pc->cpu_page, 1); 379 kmem_free(&kernel_map, (uintptr_t)pc->cpu_page, PAGE_SIZE); 380 i915_gem_object_unpin(obj); 381 drm_gem_object_unreference(&obj->base); 382 383 drm_free(pc, DRM_I915_GEM); 384 ring->private = NULL; 385 } 386 387 static int init_render_ring(struct intel_ring_buffer *ring) 388 { 389 struct drm_device *dev = ring->dev; 390 struct drm_i915_private *dev_priv = dev->dev_private; 391 int ret = init_ring_common(ring); 392 393 if (INTEL_INFO(dev)->gen > 3) 394 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); 395 396 /* We need to disable the AsyncFlip performance optimisations in order 397 * to use MI_WAIT_FOR_EVENT within the CS. It should already be 398 * programmed to '1' on all products. 399 */ 400 if (INTEL_INFO(dev)->gen >= 6) 401 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); 402 403 /* Required for the hardware to program scanline values for waiting */ 404 if (INTEL_INFO(dev)->gen == 6) 405 I915_WRITE(GFX_MODE, 406 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_ALWAYS)); 407 408 if (IS_GEN7(dev)) 409 I915_WRITE(GFX_MODE_GEN7, 410 _MASKED_BIT_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) | 411 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); 412 413 if (INTEL_INFO(dev)->gen >= 5) { 414 ret = init_pipe_control(ring); 415 if (ret) 416 return ret; 417 } 418 419 if (IS_GEN6(dev)) { 420 /* From the Sandybridge PRM, volume 1 part 3, page 24: 421 * "If this bit is set, STCunit will have LRA as replacement 422 * policy. [...] This bit must be reset. LRA replacement 423 * policy is not supported." 424 */ 425 I915_WRITE(CACHE_MODE_0, 426 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); 427 } 428 429 if (INTEL_INFO(dev)->gen >= 6) 430 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); 431 432 if (HAS_L3_GPU_CACHE(dev)) 433 I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR); 434 435 return ret; 436 } 437 438 static void render_ring_cleanup(struct intel_ring_buffer *ring) 439 { 440 if (!ring->private) 441 return; 442 443 cleanup_pipe_control(ring); 444 } 445 446 static void 447 update_mboxes(struct intel_ring_buffer *ring, 448 u32 seqno, 449 u32 mmio_offset) 450 { 451 intel_ring_emit(ring, MI_SEMAPHORE_MBOX | 452 MI_SEMAPHORE_GLOBAL_GTT | 453 MI_SEMAPHORE_REGISTER | 454 MI_SEMAPHORE_UPDATE); 455 intel_ring_emit(ring, seqno); 456 intel_ring_emit(ring, mmio_offset); 457 } 458 459 /** 460 * gen6_add_request - Update the semaphore mailbox registers 461 * 462 * @ring - ring that is adding a request 463 * @seqno - return seqno stuck into the ring 464 * 465 * Update the mailbox registers in the *other* rings with the current seqno. 466 * This acts like a signal in the canonical semaphore. 467 */ 468 static int 469 gen6_add_request(struct intel_ring_buffer *ring, 470 u32 *seqno) 471 { 472 u32 mbox1_reg; 473 u32 mbox2_reg; 474 int ret; 475 476 ret = intel_ring_begin(ring, 10); 477 if (ret) 478 return ret; 479 480 mbox1_reg = ring->signal_mbox[0]; 481 mbox2_reg = ring->signal_mbox[1]; 482 483 *seqno = i915_gem_next_request_seqno(ring); 484 485 update_mboxes(ring, *seqno, mbox1_reg); 486 update_mboxes(ring, *seqno, mbox2_reg); 487 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 488 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 489 intel_ring_emit(ring, *seqno); 490 intel_ring_emit(ring, MI_USER_INTERRUPT); 491 intel_ring_advance(ring); 492 493 return 0; 494 } 495 496 /** 497 * intel_ring_sync - sync the waiter to the signaller on seqno 498 * 499 * @waiter - ring that is waiting 500 * @signaller - ring which has, or will signal 501 * @seqno - seqno which the waiter will block on 502 */ 503 static int 504 intel_ring_sync(struct intel_ring_buffer *waiter, 505 struct intel_ring_buffer *signaller, 506 int ring, 507 u32 seqno) 508 { 509 int ret; 510 u32 dw1 = MI_SEMAPHORE_MBOX | 511 MI_SEMAPHORE_COMPARE | 512 MI_SEMAPHORE_REGISTER; 513 514 ret = intel_ring_begin(waiter, 4); 515 if (ret) 516 return ret; 517 518 intel_ring_emit(waiter, dw1 | signaller->semaphore_register[ring]); 519 intel_ring_emit(waiter, seqno); 520 intel_ring_emit(waiter, 0); 521 intel_ring_emit(waiter, MI_NOOP); 522 intel_ring_advance(waiter); 523 524 return 0; 525 } 526 527 int render_ring_sync_to(struct intel_ring_buffer *waiter, 528 struct intel_ring_buffer *signaller, u32 seqno); 529 int gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter, 530 struct intel_ring_buffer *signaller, u32 seqno); 531 int gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter, 532 struct intel_ring_buffer *signaller, u32 seqno); 533 534 /* VCS->RCS (RVSYNC) or BCS->RCS (RBSYNC) */ 535 int 536 render_ring_sync_to(struct intel_ring_buffer *waiter, 537 struct intel_ring_buffer *signaller, 538 u32 seqno) 539 { 540 KASSERT(signaller->semaphore_register[RCS] != MI_SEMAPHORE_SYNC_INVALID, 541 ("valid RCS semaphore")); 542 return intel_ring_sync(waiter, 543 signaller, 544 RCS, 545 seqno); 546 } 547 548 /* RCS->VCS (VRSYNC) or BCS->VCS (VBSYNC) */ 549 int 550 gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter, 551 struct intel_ring_buffer *signaller, 552 u32 seqno) 553 { 554 KASSERT(signaller->semaphore_register[VCS] != MI_SEMAPHORE_SYNC_INVALID, 555 ("Valid VCS semaphore")); 556 return intel_ring_sync(waiter, 557 signaller, 558 VCS, 559 seqno); 560 } 561 562 /* RCS->BCS (BRSYNC) or VCS->BCS (BVSYNC) */ 563 int 564 gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter, 565 struct intel_ring_buffer *signaller, 566 u32 seqno) 567 { 568 KASSERT(signaller->semaphore_register[BCS] != MI_SEMAPHORE_SYNC_INVALID, 569 ("Valid BCS semaphore")); 570 return intel_ring_sync(waiter, 571 signaller, 572 BCS, 573 seqno); 574 } 575 576 #define PIPE_CONTROL_FLUSH(ring__, addr__) \ 577 do { \ 578 intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | \ 579 PIPE_CONTROL_DEPTH_STALL); \ 580 intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT); \ 581 intel_ring_emit(ring__, 0); \ 582 intel_ring_emit(ring__, 0); \ 583 } while (0) 584 585 static int 586 pc_render_add_request(struct intel_ring_buffer *ring, 587 uint32_t *result) 588 { 589 u32 seqno = i915_gem_next_request_seqno(ring); 590 struct pipe_control *pc = ring->private; 591 u32 scratch_addr = pc->gtt_offset + 128; 592 int ret; 593 594 /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently 595 * incoherent with writes to memory, i.e. completely fubar, 596 * so we need to use PIPE_NOTIFY instead. 597 * 598 * However, we also need to workaround the qword write 599 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to 600 * memory before requesting an interrupt. 601 */ 602 ret = intel_ring_begin(ring, 32); 603 if (ret) 604 return ret; 605 606 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | 607 PIPE_CONTROL_WRITE_FLUSH | 608 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); 609 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 610 intel_ring_emit(ring, seqno); 611 intel_ring_emit(ring, 0); 612 PIPE_CONTROL_FLUSH(ring, scratch_addr); 613 scratch_addr += 128; /* write to separate cachelines */ 614 PIPE_CONTROL_FLUSH(ring, scratch_addr); 615 scratch_addr += 128; 616 PIPE_CONTROL_FLUSH(ring, scratch_addr); 617 scratch_addr += 128; 618 PIPE_CONTROL_FLUSH(ring, scratch_addr); 619 scratch_addr += 128; 620 PIPE_CONTROL_FLUSH(ring, scratch_addr); 621 scratch_addr += 128; 622 PIPE_CONTROL_FLUSH(ring, scratch_addr); 623 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | 624 PIPE_CONTROL_WRITE_FLUSH | 625 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 626 PIPE_CONTROL_NOTIFY); 627 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 628 intel_ring_emit(ring, seqno); 629 intel_ring_emit(ring, 0); 630 intel_ring_advance(ring); 631 632 *result = seqno; 633 return 0; 634 } 635 636 static int 637 render_ring_add_request(struct intel_ring_buffer *ring, 638 uint32_t *result) 639 { 640 u32 seqno = i915_gem_next_request_seqno(ring); 641 int ret; 642 643 ret = intel_ring_begin(ring, 4); 644 if (ret) 645 return ret; 646 647 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 648 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 649 intel_ring_emit(ring, seqno); 650 intel_ring_emit(ring, MI_USER_INTERRUPT); 651 intel_ring_advance(ring); 652 653 *result = seqno; 654 return 0; 655 } 656 657 static u32 658 gen6_ring_get_seqno(struct intel_ring_buffer *ring) 659 { 660 struct drm_device *dev = ring->dev; 661 662 /* Workaround to force correct ordering between irq and seqno writes on 663 * ivb (and maybe also on snb) by reading from a CS register (like 664 * ACTHD) before reading the status page. */ 665 if (/* IS_GEN6(dev) || */IS_GEN7(dev)) 666 intel_ring_get_active_head(ring); 667 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 668 } 669 670 static uint32_t 671 ring_get_seqno(struct intel_ring_buffer *ring) 672 { 673 if (ring->status_page.page_addr == NULL) 674 return (-1); 675 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 676 } 677 678 static uint32_t 679 pc_render_get_seqno(struct intel_ring_buffer *ring) 680 { 681 struct pipe_control *pc = ring->private; 682 if (pc != NULL) 683 return pc->cpu_page[0]; 684 else 685 return (-1); 686 } 687 688 static void 689 ironlake_enable_irq(drm_i915_private_t *dev_priv, uint32_t mask) 690 { 691 dev_priv->gt_irq_mask &= ~mask; 692 I915_WRITE(GTIMR, dev_priv->gt_irq_mask); 693 POSTING_READ(GTIMR); 694 } 695 696 static void 697 ironlake_disable_irq(drm_i915_private_t *dev_priv, uint32_t mask) 698 { 699 dev_priv->gt_irq_mask |= mask; 700 I915_WRITE(GTIMR, dev_priv->gt_irq_mask); 701 POSTING_READ(GTIMR); 702 } 703 704 static void 705 i915_enable_irq(drm_i915_private_t *dev_priv, uint32_t mask) 706 { 707 dev_priv->irq_mask &= ~mask; 708 I915_WRITE(IMR, dev_priv->irq_mask); 709 POSTING_READ(IMR); 710 } 711 712 static void 713 i915_disable_irq(drm_i915_private_t *dev_priv, uint32_t mask) 714 { 715 dev_priv->irq_mask |= mask; 716 I915_WRITE(IMR, dev_priv->irq_mask); 717 POSTING_READ(IMR); 718 } 719 720 static bool 721 render_ring_get_irq(struct intel_ring_buffer *ring) 722 { 723 struct drm_device *dev = ring->dev; 724 drm_i915_private_t *dev_priv = dev->dev_private; 725 726 if (!dev->irq_enabled) 727 return false; 728 729 KKASSERT(lockstatus(&ring->irq_lock, curthread) != 0); 730 if (ring->irq_refcount++ == 0) { 731 if (HAS_PCH_SPLIT(dev)) 732 ironlake_enable_irq(dev_priv, 733 GT_PIPE_NOTIFY | GT_USER_INTERRUPT); 734 else 735 i915_enable_irq(dev_priv, I915_USER_INTERRUPT); 736 } 737 738 return true; 739 } 740 741 static void 742 render_ring_put_irq(struct intel_ring_buffer *ring) 743 { 744 struct drm_device *dev = ring->dev; 745 drm_i915_private_t *dev_priv = dev->dev_private; 746 747 KKASSERT(lockstatus(&ring->irq_lock, curthread) != 0); 748 if (--ring->irq_refcount == 0) { 749 if (HAS_PCH_SPLIT(dev)) 750 ironlake_disable_irq(dev_priv, 751 GT_USER_INTERRUPT | 752 GT_PIPE_NOTIFY); 753 else 754 i915_disable_irq(dev_priv, I915_USER_INTERRUPT); 755 } 756 } 757 758 void intel_ring_setup_status_page(struct intel_ring_buffer *ring) 759 { 760 struct drm_device *dev = ring->dev; 761 drm_i915_private_t *dev_priv = dev->dev_private; 762 uint32_t mmio = 0; 763 764 /* The ring status page addresses are no longer next to the rest of 765 * the ring registers as of gen7. 766 */ 767 if (IS_GEN7(dev)) { 768 switch (ring->id) { 769 case RCS: 770 mmio = RENDER_HWS_PGA_GEN7; 771 break; 772 case BCS: 773 mmio = BLT_HWS_PGA_GEN7; 774 break; 775 case VCS: 776 mmio = BSD_HWS_PGA_GEN7; 777 break; 778 } 779 } else if (IS_GEN6(dev)) { 780 mmio = RING_HWS_PGA_GEN6(ring->mmio_base); 781 } else { 782 mmio = RING_HWS_PGA(ring->mmio_base); 783 } 784 785 I915_WRITE(mmio, (u32)ring->status_page.gfx_addr); 786 POSTING_READ(mmio); 787 } 788 789 static int 790 bsd_ring_flush(struct intel_ring_buffer *ring, 791 uint32_t invalidate_domains, 792 uint32_t flush_domains) 793 { 794 int ret; 795 796 ret = intel_ring_begin(ring, 2); 797 if (ret) 798 return ret; 799 800 intel_ring_emit(ring, MI_FLUSH); 801 intel_ring_emit(ring, MI_NOOP); 802 intel_ring_advance(ring); 803 return 0; 804 } 805 806 static int 807 ring_add_request(struct intel_ring_buffer *ring, 808 uint32_t *result) 809 { 810 uint32_t seqno; 811 int ret; 812 813 ret = intel_ring_begin(ring, 4); 814 if (ret) 815 return ret; 816 817 seqno = i915_gem_next_request_seqno(ring); 818 819 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 820 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 821 intel_ring_emit(ring, seqno); 822 intel_ring_emit(ring, MI_USER_INTERRUPT); 823 intel_ring_advance(ring); 824 825 *result = seqno; 826 return 0; 827 } 828 829 static bool 830 gen6_ring_get_irq(struct intel_ring_buffer *ring, uint32_t gflag, uint32_t rflag) 831 { 832 struct drm_device *dev = ring->dev; 833 drm_i915_private_t *dev_priv = dev->dev_private; 834 835 if (!dev->irq_enabled) 836 return false; 837 838 gen6_gt_force_wake_get(dev_priv); 839 840 KKASSERT(lockstatus(&ring->irq_lock, curthread) != 0); 841 if (ring->irq_refcount++ == 0) { 842 ring->irq_mask &= ~rflag; 843 I915_WRITE_IMR(ring, ring->irq_mask); 844 ironlake_enable_irq(dev_priv, gflag); 845 } 846 847 return true; 848 } 849 850 static void 851 gen6_ring_put_irq(struct intel_ring_buffer *ring, uint32_t gflag, uint32_t rflag) 852 { 853 struct drm_device *dev = ring->dev; 854 drm_i915_private_t *dev_priv = dev->dev_private; 855 856 KKASSERT(lockstatus(&ring->irq_lock, curthread) != 0); 857 if (--ring->irq_refcount == 0) { 858 ring->irq_mask |= rflag; 859 I915_WRITE_IMR(ring, ring->irq_mask); 860 ironlake_disable_irq(dev_priv, gflag); 861 } 862 863 gen6_gt_force_wake_put(dev_priv); 864 } 865 866 static bool 867 bsd_ring_get_irq(struct intel_ring_buffer *ring) 868 { 869 struct drm_device *dev = ring->dev; 870 drm_i915_private_t *dev_priv = dev->dev_private; 871 872 if (!dev->irq_enabled) 873 return false; 874 875 KKASSERT(lockstatus(&ring->irq_lock, curthread) != 0); 876 if (ring->irq_refcount++ == 0) { 877 if (IS_G4X(dev)) 878 i915_enable_irq(dev_priv, I915_BSD_USER_INTERRUPT); 879 else 880 ironlake_enable_irq(dev_priv, GT_BSD_USER_INTERRUPT); 881 } 882 883 return true; 884 } 885 static void 886 bsd_ring_put_irq(struct intel_ring_buffer *ring) 887 { 888 struct drm_device *dev = ring->dev; 889 drm_i915_private_t *dev_priv = dev->dev_private; 890 891 KKASSERT(lockstatus(&ring->irq_lock, curthread) != 0); 892 if (--ring->irq_refcount == 0) { 893 if (IS_G4X(dev)) 894 i915_disable_irq(dev_priv, I915_BSD_USER_INTERRUPT); 895 else 896 ironlake_disable_irq(dev_priv, GT_BSD_USER_INTERRUPT); 897 } 898 } 899 900 static int 901 ring_dispatch_execbuffer(struct intel_ring_buffer *ring, uint32_t offset, 902 uint32_t length) 903 { 904 int ret; 905 906 ret = intel_ring_begin(ring, 2); 907 if (ret) 908 return ret; 909 910 intel_ring_emit(ring, 911 MI_BATCH_BUFFER_START | (2 << 6) | 912 MI_BATCH_NON_SECURE_I965); 913 intel_ring_emit(ring, offset); 914 intel_ring_advance(ring); 915 916 return 0; 917 } 918 919 static int 920 render_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, 921 uint32_t offset, uint32_t len) 922 { 923 struct drm_device *dev = ring->dev; 924 int ret; 925 926 if (IS_I830(dev) || IS_845G(dev)) { 927 ret = intel_ring_begin(ring, 4); 928 if (ret) 929 return ret; 930 931 intel_ring_emit(ring, MI_BATCH_BUFFER); 932 intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE); 933 intel_ring_emit(ring, offset + len - 8); 934 intel_ring_emit(ring, 0); 935 } else { 936 ret = intel_ring_begin(ring, 2); 937 if (ret) 938 return ret; 939 940 if (INTEL_INFO(dev)->gen >= 4) { 941 intel_ring_emit(ring, 942 MI_BATCH_BUFFER_START | (2 << 6) | 943 MI_BATCH_NON_SECURE_I965); 944 intel_ring_emit(ring, offset); 945 } else { 946 intel_ring_emit(ring, 947 MI_BATCH_BUFFER_START | (2 << 6)); 948 intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE); 949 } 950 } 951 intel_ring_advance(ring); 952 953 return 0; 954 } 955 956 static void cleanup_status_page(struct intel_ring_buffer *ring) 957 { 958 drm_i915_private_t *dev_priv = ring->dev->dev_private; 959 struct drm_i915_gem_object *obj; 960 961 obj = ring->status_page.obj; 962 if (obj == NULL) 963 return; 964 965 pmap_qremove((vm_offset_t)ring->status_page.page_addr, 1); 966 kmem_free(&kernel_map, (vm_offset_t)ring->status_page.page_addr, 967 PAGE_SIZE); 968 i915_gem_object_unpin(obj); 969 drm_gem_object_unreference(&obj->base); 970 ring->status_page.obj = NULL; 971 972 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 973 } 974 975 static int init_status_page(struct intel_ring_buffer *ring) 976 { 977 struct drm_device *dev = ring->dev; 978 drm_i915_private_t *dev_priv = dev->dev_private; 979 struct drm_i915_gem_object *obj; 980 int ret; 981 982 obj = i915_gem_alloc_object(dev, 4096); 983 if (obj == NULL) { 984 DRM_ERROR("Failed to allocate status page\n"); 985 ret = -ENOMEM; 986 goto err; 987 } 988 989 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 990 991 ret = i915_gem_object_pin(obj, 4096, true); 992 if (ret != 0) { 993 goto err_unref; 994 } 995 996 ring->status_page.gfx_addr = obj->gtt_offset; 997 ring->status_page.page_addr = (void *)kmem_alloc_nofault(&kernel_map, 998 PAGE_SIZE, PAGE_SIZE); 999 if (ring->status_page.page_addr == NULL) { 1000 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 1001 goto err_unpin; 1002 } 1003 pmap_qenter((vm_offset_t)ring->status_page.page_addr, &obj->pages[0], 1004 1); 1005 pmap_invalidate_cache_range((vm_offset_t)ring->status_page.page_addr, 1006 (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE); 1007 ring->status_page.obj = obj; 1008 memset(ring->status_page.page_addr, 0, PAGE_SIZE); 1009 1010 intel_ring_setup_status_page(ring); 1011 DRM_DEBUG("i915: init_status_page %s hws offset: 0x%08x\n", 1012 ring->name, ring->status_page.gfx_addr); 1013 1014 return 0; 1015 1016 err_unpin: 1017 i915_gem_object_unpin(obj); 1018 err_unref: 1019 drm_gem_object_unreference(&obj->base); 1020 err: 1021 return ret; 1022 } 1023 1024 static 1025 int intel_init_ring_buffer(struct drm_device *dev, 1026 struct intel_ring_buffer *ring) 1027 { 1028 struct drm_i915_gem_object *obj; 1029 int ret; 1030 1031 ring->dev = dev; 1032 INIT_LIST_HEAD(&ring->active_list); 1033 INIT_LIST_HEAD(&ring->request_list); 1034 INIT_LIST_HEAD(&ring->gpu_write_list); 1035 1036 lockinit(&ring->irq_lock, "ringb", 0, LK_CANRECURSE); 1037 ring->irq_mask = ~0; 1038 1039 if (I915_NEED_GFX_HWS(dev)) { 1040 ret = init_status_page(ring); 1041 if (ret) 1042 return ret; 1043 } 1044 1045 obj = i915_gem_alloc_object(dev, ring->size); 1046 if (obj == NULL) { 1047 DRM_ERROR("Failed to allocate ringbuffer\n"); 1048 ret = -ENOMEM; 1049 goto err_hws; 1050 } 1051 1052 ring->obj = obj; 1053 1054 ret = i915_gem_object_pin(obj, PAGE_SIZE, true); 1055 if (ret) 1056 goto err_unref; 1057 1058 ring->map.size = ring->size; 1059 ring->map.offset = dev->agp->base + obj->gtt_offset; 1060 ring->map.type = 0; 1061 ring->map.flags = 0; 1062 ring->map.mtrr = 0; 1063 1064 drm_core_ioremap_wc(&ring->map, dev); 1065 if (ring->map.virtual == NULL) { 1066 DRM_ERROR("Failed to map ringbuffer.\n"); 1067 ret = -EINVAL; 1068 goto err_unpin; 1069 } 1070 1071 ring->virtual_start = ring->map.virtual; 1072 ret = ring->init(ring); 1073 if (ret) 1074 goto err_unmap; 1075 1076 /* Workaround an erratum on the i830 which causes a hang if 1077 * the TAIL pointer points to within the last 2 cachelines 1078 * of the buffer. 1079 */ 1080 ring->effective_size = ring->size; 1081 if (IS_I830(ring->dev) || IS_845G(ring->dev)) 1082 ring->effective_size -= 128; 1083 1084 return 0; 1085 1086 err_unmap: 1087 drm_core_ioremapfree(&ring->map, dev); 1088 err_unpin: 1089 i915_gem_object_unpin(obj); 1090 err_unref: 1091 drm_gem_object_unreference(&obj->base); 1092 ring->obj = NULL; 1093 err_hws: 1094 cleanup_status_page(ring); 1095 return ret; 1096 } 1097 1098 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring) 1099 { 1100 struct drm_i915_private *dev_priv; 1101 int ret; 1102 1103 if (ring->obj == NULL) 1104 return; 1105 1106 /* Disable the ring buffer. The ring must be idle at this point */ 1107 dev_priv = ring->dev->dev_private; 1108 ret = intel_wait_ring_idle(ring); 1109 I915_WRITE_CTL(ring, 0); 1110 1111 drm_core_ioremapfree(&ring->map, ring->dev); 1112 1113 i915_gem_object_unpin(ring->obj); 1114 drm_gem_object_unreference(&ring->obj->base); 1115 ring->obj = NULL; 1116 1117 if (ring->cleanup) 1118 ring->cleanup(ring); 1119 1120 cleanup_status_page(ring); 1121 } 1122 1123 static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring) 1124 { 1125 unsigned int *virt; 1126 int rem = ring->size - ring->tail; 1127 1128 if (ring->space < rem) { 1129 int ret = intel_wait_ring_buffer(ring, rem); 1130 if (ret) 1131 return ret; 1132 } 1133 1134 virt = (unsigned int *)((char *)ring->virtual_start + ring->tail); 1135 rem /= 8; 1136 while (rem--) { 1137 *virt++ = MI_NOOP; 1138 *virt++ = MI_NOOP; 1139 } 1140 1141 ring->tail = 0; 1142 ring->space = ring_space(ring); 1143 1144 return 0; 1145 } 1146 1147 static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno) 1148 { 1149 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1150 bool was_interruptible; 1151 int ret; 1152 1153 /* XXX As we have not yet audited all the paths to check that 1154 * they are ready for ERESTARTSYS from intel_ring_begin, do not 1155 * allow us to be interruptible by a signal. 1156 */ 1157 was_interruptible = dev_priv->mm.interruptible; 1158 dev_priv->mm.interruptible = false; 1159 1160 ret = i915_wait_request(ring, seqno, true); 1161 1162 dev_priv->mm.interruptible = was_interruptible; 1163 1164 return ret; 1165 } 1166 1167 static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n) 1168 { 1169 struct drm_i915_gem_request *request; 1170 u32 seqno = 0; 1171 int ret; 1172 1173 i915_gem_retire_requests_ring(ring); 1174 1175 if (ring->last_retired_head != -1) { 1176 ring->head = ring->last_retired_head; 1177 ring->last_retired_head = -1; 1178 ring->space = ring_space(ring); 1179 if (ring->space >= n) 1180 return 0; 1181 } 1182 1183 list_for_each_entry(request, &ring->request_list, list) { 1184 int space; 1185 1186 if (request->tail == -1) 1187 continue; 1188 1189 space = request->tail - (ring->tail + 8); 1190 if (space < 0) 1191 space += ring->size; 1192 if (space >= n) { 1193 seqno = request->seqno; 1194 break; 1195 } 1196 1197 /* Consume this request in case we need more space than 1198 * is available and so need to prevent a race between 1199 * updating last_retired_head and direct reads of 1200 * I915_RING_HEAD. It also provides a nice sanity check. 1201 */ 1202 request->tail = -1; 1203 } 1204 1205 if (seqno == 0) 1206 return -ENOSPC; 1207 1208 ret = intel_ring_wait_seqno(ring, seqno); 1209 if (ret) 1210 return ret; 1211 1212 if (ring->last_retired_head == -1) 1213 return -ENOSPC; 1214 1215 ring->head = ring->last_retired_head; 1216 ring->last_retired_head = -1; 1217 ring->space = ring_space(ring); 1218 if (ring->space < n) 1219 return -ENOSPC; 1220 1221 return 0; 1222 } 1223 1224 int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n) 1225 { 1226 struct drm_device *dev = ring->dev; 1227 struct drm_i915_private *dev_priv = dev->dev_private; 1228 int end; 1229 int ret; 1230 1231 ret = intel_ring_wait_request(ring, n); 1232 if (ret != -ENOSPC) 1233 return ret; 1234 1235 if (drm_core_check_feature(dev, DRIVER_GEM)) 1236 /* With GEM the hangcheck timer should kick us out of the loop, 1237 * leaving it early runs the risk of corrupting GEM state (due 1238 * to running on almost untested codepaths). But on resume 1239 * timers don't work yet, so prevent a complete hang in that 1240 * case by choosing an insanely large timeout. */ 1241 end = ticks + hz * 60; 1242 else 1243 end = ticks + hz * 3; 1244 do { 1245 ring->head = I915_READ_HEAD(ring); 1246 ring->space = ring_space(ring); 1247 if (ring->space >= n) { 1248 return 0; 1249 } 1250 1251 #if 0 1252 if (dev->primary->master) { 1253 struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; 1254 if (master_priv->sarea_priv) 1255 master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; 1256 } 1257 #else 1258 if (dev_priv->sarea_priv) 1259 dev_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; 1260 #endif 1261 1262 DELAY(1000); 1263 if (atomic_read(&dev_priv->mm.wedged) != 0) { 1264 return -EAGAIN; 1265 } 1266 } while (!time_after(ticks, end)); 1267 return -EBUSY; 1268 } 1269 1270 int intel_ring_begin(struct intel_ring_buffer *ring, 1271 int num_dwords) 1272 { 1273 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1274 int n = 4*num_dwords; 1275 int ret; 1276 1277 if (atomic_read(&dev_priv->mm.wedged)) 1278 return -EIO; 1279 1280 if (ring->tail + n > ring->effective_size) { 1281 ret = intel_wrap_ring_buffer(ring); 1282 if (ret != 0) 1283 return ret; 1284 } 1285 1286 if (ring->space < n) { 1287 ret = intel_wait_ring_buffer(ring, n); 1288 if (ret != 0) 1289 return ret; 1290 } 1291 1292 ring->space -= n; 1293 return 0; 1294 } 1295 1296 void intel_ring_advance(struct intel_ring_buffer *ring) 1297 { 1298 ring->tail &= ring->size - 1; 1299 ring->write_tail(ring, ring->tail); 1300 } 1301 1302 static const struct intel_ring_buffer render_ring = { 1303 .name = "render ring", 1304 .id = RCS, 1305 .mmio_base = RENDER_RING_BASE, 1306 .size = 32 * PAGE_SIZE, 1307 .init = init_render_ring, 1308 .write_tail = ring_write_tail, 1309 .flush = render_ring_flush, 1310 .add_request = render_ring_add_request, 1311 .get_seqno = ring_get_seqno, 1312 .irq_get = render_ring_get_irq, 1313 .irq_put = render_ring_put_irq, 1314 .dispatch_execbuffer = render_ring_dispatch_execbuffer, 1315 .cleanup = render_ring_cleanup, 1316 .sync_to = render_ring_sync_to, 1317 .semaphore_register = {MI_SEMAPHORE_SYNC_INVALID, 1318 MI_SEMAPHORE_SYNC_RV, 1319 MI_SEMAPHORE_SYNC_RB}, 1320 .signal_mbox = {GEN6_VRSYNC, GEN6_BRSYNC}, 1321 }; 1322 1323 /* ring buffer for bit-stream decoder */ 1324 1325 static const struct intel_ring_buffer bsd_ring = { 1326 .name = "bsd ring", 1327 .id = VCS, 1328 .mmio_base = BSD_RING_BASE, 1329 .size = 32 * PAGE_SIZE, 1330 .init = init_ring_common, 1331 .write_tail = ring_write_tail, 1332 .flush = bsd_ring_flush, 1333 .add_request = ring_add_request, 1334 .get_seqno = ring_get_seqno, 1335 .irq_get = bsd_ring_get_irq, 1336 .irq_put = bsd_ring_put_irq, 1337 .dispatch_execbuffer = ring_dispatch_execbuffer, 1338 }; 1339 1340 1341 static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring, 1342 u32 value) 1343 { 1344 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1345 1346 /* Every tail move must follow the sequence below */ 1347 1348 /* Disable notification that the ring is IDLE. The GT 1349 * will then assume that it is busy and bring it out of rc6. 1350 */ 1351 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, 1352 _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 1353 1354 /* Clear the context id. Here be magic! */ 1355 I915_WRITE64(GEN6_BSD_RNCID, 0x0); 1356 1357 /* Wait for the ring not to be idle, i.e. for it to wake up. */ 1358 if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) & 1359 GEN6_BSD_SLEEP_INDICATOR) == 0, 1360 50)) 1361 DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); 1362 1363 /* Now that the ring is fully powered up, update the tail */ 1364 I915_WRITE_TAIL(ring, value); 1365 POSTING_READ(RING_TAIL(ring->mmio_base)); 1366 1367 /* Let the ring send IDLE messages to the GT again, 1368 * and so let it sleep to conserve power when idle. 1369 */ 1370 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, 1371 _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 1372 } 1373 1374 static int gen6_ring_flush(struct intel_ring_buffer *ring, 1375 uint32_t invalidate, uint32_t flush) 1376 { 1377 uint32_t cmd; 1378 int ret; 1379 1380 ret = intel_ring_begin(ring, 4); 1381 if (ret) 1382 return ret; 1383 1384 cmd = MI_FLUSH_DW; 1385 if (invalidate & I915_GEM_GPU_DOMAINS) 1386 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; 1387 intel_ring_emit(ring, cmd); 1388 intel_ring_emit(ring, 0); 1389 intel_ring_emit(ring, 0); 1390 intel_ring_emit(ring, MI_NOOP); 1391 intel_ring_advance(ring); 1392 return 0; 1393 } 1394 1395 static int 1396 gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, 1397 uint32_t offset, uint32_t len) 1398 { 1399 int ret; 1400 1401 ret = intel_ring_begin(ring, 2); 1402 if (ret) 1403 return ret; 1404 1405 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965); 1406 /* bit0-7 is the length on GEN6+ */ 1407 intel_ring_emit(ring, offset); 1408 intel_ring_advance(ring); 1409 1410 return 0; 1411 } 1412 1413 static bool 1414 gen6_render_ring_get_irq(struct intel_ring_buffer *ring) 1415 { 1416 return gen6_ring_get_irq(ring, 1417 GT_USER_INTERRUPT, 1418 GEN6_RENDER_USER_INTERRUPT); 1419 } 1420 1421 static void 1422 gen6_render_ring_put_irq(struct intel_ring_buffer *ring) 1423 { 1424 return gen6_ring_put_irq(ring, 1425 GT_USER_INTERRUPT, 1426 GEN6_RENDER_USER_INTERRUPT); 1427 } 1428 1429 static bool 1430 gen6_bsd_ring_get_irq(struct intel_ring_buffer *ring) 1431 { 1432 return gen6_ring_get_irq(ring, 1433 GT_GEN6_BSD_USER_INTERRUPT, 1434 GEN6_BSD_USER_INTERRUPT); 1435 } 1436 1437 static void 1438 gen6_bsd_ring_put_irq(struct intel_ring_buffer *ring) 1439 { 1440 return gen6_ring_put_irq(ring, 1441 GT_GEN6_BSD_USER_INTERRUPT, 1442 GEN6_BSD_USER_INTERRUPT); 1443 } 1444 1445 /* ring buffer for Video Codec for Gen6+ */ 1446 static const struct intel_ring_buffer gen6_bsd_ring = { 1447 .name = "gen6 bsd ring", 1448 .id = VCS, 1449 .mmio_base = GEN6_BSD_RING_BASE, 1450 .size = 32 * PAGE_SIZE, 1451 .init = init_ring_common, 1452 .write_tail = gen6_bsd_ring_write_tail, 1453 .flush = gen6_ring_flush, 1454 .add_request = gen6_add_request, 1455 .get_seqno = gen6_ring_get_seqno, 1456 .irq_get = gen6_bsd_ring_get_irq, 1457 .irq_put = gen6_bsd_ring_put_irq, 1458 .dispatch_execbuffer = gen6_ring_dispatch_execbuffer, 1459 .sync_to = gen6_bsd_ring_sync_to, 1460 .semaphore_register = {MI_SEMAPHORE_SYNC_VR, 1461 MI_SEMAPHORE_SYNC_INVALID, 1462 MI_SEMAPHORE_SYNC_VB}, 1463 .signal_mbox = {GEN6_RVSYNC, GEN6_BVSYNC}, 1464 }; 1465 1466 /* Blitter support (SandyBridge+) */ 1467 1468 static bool 1469 blt_ring_get_irq(struct intel_ring_buffer *ring) 1470 { 1471 return gen6_ring_get_irq(ring, 1472 GT_GEN6_BLT_USER_INTERRUPT, 1473 GEN6_BLITTER_USER_INTERRUPT); 1474 } 1475 1476 static void 1477 blt_ring_put_irq(struct intel_ring_buffer *ring) 1478 { 1479 gen6_ring_put_irq(ring, 1480 GT_GEN6_BLT_USER_INTERRUPT, 1481 GEN6_BLITTER_USER_INTERRUPT); 1482 } 1483 1484 static int blt_ring_flush(struct intel_ring_buffer *ring, 1485 uint32_t invalidate, uint32_t flush) 1486 { 1487 uint32_t cmd; 1488 int ret; 1489 1490 ret = intel_ring_begin(ring, 4); 1491 if (ret) 1492 return ret; 1493 1494 cmd = MI_FLUSH_DW; 1495 if (invalidate & I915_GEM_DOMAIN_RENDER) 1496 cmd |= MI_INVALIDATE_TLB; 1497 intel_ring_emit(ring, cmd); 1498 intel_ring_emit(ring, 0); 1499 intel_ring_emit(ring, 0); 1500 intel_ring_emit(ring, MI_NOOP); 1501 intel_ring_advance(ring); 1502 return 0; 1503 } 1504 1505 static const struct intel_ring_buffer gen6_blt_ring = { 1506 .name = "blt ring", 1507 .id = BCS, 1508 .mmio_base = BLT_RING_BASE, 1509 .size = 32 * PAGE_SIZE, 1510 .init = init_ring_common, 1511 .write_tail = ring_write_tail, 1512 .flush = blt_ring_flush, 1513 .add_request = gen6_add_request, 1514 .get_seqno = gen6_ring_get_seqno, 1515 .irq_get = blt_ring_get_irq, 1516 .irq_put = blt_ring_put_irq, 1517 .dispatch_execbuffer = gen6_ring_dispatch_execbuffer, 1518 .sync_to = gen6_blt_ring_sync_to, 1519 .semaphore_register = {MI_SEMAPHORE_SYNC_BR, 1520 MI_SEMAPHORE_SYNC_BV, 1521 MI_SEMAPHORE_SYNC_INVALID}, 1522 .signal_mbox = {GEN6_RBSYNC, GEN6_VBSYNC}, 1523 }; 1524 1525 int intel_init_render_ring_buffer(struct drm_device *dev) 1526 { 1527 drm_i915_private_t *dev_priv = dev->dev_private; 1528 struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; 1529 1530 *ring = render_ring; 1531 if (INTEL_INFO(dev)->gen >= 6) { 1532 ring->add_request = gen6_add_request; 1533 ring->flush = gen6_render_ring_flush; 1534 ring->irq_get = gen6_render_ring_get_irq; 1535 ring->irq_put = gen6_render_ring_put_irq; 1536 ring->get_seqno = gen6_ring_get_seqno; 1537 } else if (IS_GEN5(dev)) { 1538 ring->add_request = pc_render_add_request; 1539 ring->get_seqno = pc_render_get_seqno; 1540 } 1541 1542 if (!I915_NEED_GFX_HWS(dev)) { 1543 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr; 1544 memset(ring->status_page.page_addr, 0, PAGE_SIZE); 1545 } 1546 1547 return intel_init_ring_buffer(dev, ring); 1548 } 1549 1550 int intel_render_ring_init_dri(struct drm_device *dev, uint64_t start, 1551 uint32_t size) 1552 { 1553 drm_i915_private_t *dev_priv = dev->dev_private; 1554 struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; 1555 1556 *ring = render_ring; 1557 if (INTEL_INFO(dev)->gen >= 6) { 1558 ring->add_request = gen6_add_request; 1559 ring->irq_get = gen6_render_ring_get_irq; 1560 ring->irq_put = gen6_render_ring_put_irq; 1561 } else if (IS_GEN5(dev)) { 1562 ring->add_request = pc_render_add_request; 1563 ring->get_seqno = pc_render_get_seqno; 1564 } 1565 1566 ring->dev = dev; 1567 INIT_LIST_HEAD(&ring->active_list); 1568 INIT_LIST_HEAD(&ring->request_list); 1569 INIT_LIST_HEAD(&ring->gpu_write_list); 1570 1571 ring->size = size; 1572 ring->effective_size = ring->size; 1573 if (IS_I830(ring->dev)) 1574 ring->effective_size -= 128; 1575 1576 ring->map.offset = start; 1577 ring->map.size = size; 1578 ring->map.type = 0; 1579 ring->map.flags = 0; 1580 ring->map.mtrr = 0; 1581 1582 drm_core_ioremap_wc(&ring->map, dev); 1583 if (ring->map.virtual == NULL) { 1584 DRM_ERROR("can not ioremap virtual address for" 1585 " ring buffer\n"); 1586 return -ENOMEM; 1587 } 1588 1589 ring->virtual_start = (void *)ring->map.virtual; 1590 return 0; 1591 } 1592 1593 int intel_init_bsd_ring_buffer(struct drm_device *dev) 1594 { 1595 drm_i915_private_t *dev_priv = dev->dev_private; 1596 struct intel_ring_buffer *ring = &dev_priv->ring[VCS]; 1597 1598 if (IS_GEN6(dev) || IS_GEN7(dev)) 1599 *ring = gen6_bsd_ring; 1600 else 1601 *ring = bsd_ring; 1602 1603 return intel_init_ring_buffer(dev, ring); 1604 } 1605 1606 int intel_init_blt_ring_buffer(struct drm_device *dev) 1607 { 1608 drm_i915_private_t *dev_priv = dev->dev_private; 1609 struct intel_ring_buffer *ring = &dev_priv->ring[BCS]; 1610 1611 *ring = gen6_blt_ring; 1612 1613 return intel_init_ring_buffer(dev, ring); 1614 } 1615