1 #ifndef _INTEL_RINGBUFFER_H_ 2 #define _INTEL_RINGBUFFER_H_ 3 4 #include <linux/hashtable.h> 5 #include "i915_gem_batch_pool.h" 6 #include "i915_gem_request.h" 7 #include "i915_gem_timeline.h" 8 #include "i915_selftest.h" 9 10 struct drm_printer; 11 12 #define I915_CMD_HASH_ORDER 9 13 14 /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, 15 * but keeps the logic simple. Indeed, the whole purpose of this macro is just 16 * to give some inclination as to some of the magic values used in the various 17 * workarounds! 18 */ 19 #define CACHELINE_BYTES 64 20 #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t)) 21 22 struct intel_hw_status_page { 23 struct i915_vma *vma; 24 u32 *page_addr; 25 u32 ggtt_offset; 26 }; 27 28 #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base)) 29 #define I915_WRITE_TAIL(engine, val) I915_WRITE(RING_TAIL((engine)->mmio_base), val) 30 31 #define I915_READ_START(engine) I915_READ(RING_START((engine)->mmio_base)) 32 #define I915_WRITE_START(engine, val) I915_WRITE(RING_START((engine)->mmio_base), val) 33 34 #define I915_READ_HEAD(engine) I915_READ(RING_HEAD((engine)->mmio_base)) 35 #define I915_WRITE_HEAD(engine, val) I915_WRITE(RING_HEAD((engine)->mmio_base), val) 36 37 #define I915_READ_CTL(engine) I915_READ(RING_CTL((engine)->mmio_base)) 38 #define I915_WRITE_CTL(engine, val) I915_WRITE(RING_CTL((engine)->mmio_base), val) 39 40 #define I915_READ_IMR(engine) I915_READ(RING_IMR((engine)->mmio_base)) 41 #define I915_WRITE_IMR(engine, val) I915_WRITE(RING_IMR((engine)->mmio_base), val) 42 43 #define I915_READ_MODE(engine) I915_READ(RING_MI_MODE((engine)->mmio_base)) 44 #define I915_WRITE_MODE(engine, val) I915_WRITE(RING_MI_MODE((engine)->mmio_base), val) 45 46 /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to 47 * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. 48 */ 49 #define gen8_semaphore_seqno_size sizeof(uint64_t) 50 #define GEN8_SEMAPHORE_OFFSET(__from, __to) \ 51 (((__from) * I915_NUM_ENGINES + (__to)) * gen8_semaphore_seqno_size) 52 #define GEN8_SIGNAL_OFFSET(__ring, to) \ 53 (dev_priv->semaphore->node.start + \ 54 GEN8_SEMAPHORE_OFFSET((__ring)->id, (to))) 55 #define GEN8_WAIT_OFFSET(__ring, from) \ 56 (dev_priv->semaphore->node.start + \ 57 GEN8_SEMAPHORE_OFFSET(from, (__ring)->id)) 58 59 enum intel_engine_hangcheck_action { 60 ENGINE_IDLE = 0, 61 ENGINE_WAIT, 62 ENGINE_ACTIVE_SEQNO, 63 ENGINE_ACTIVE_HEAD, 64 ENGINE_ACTIVE_SUBUNITS, 65 ENGINE_WAIT_KICK, 66 ENGINE_DEAD, 67 }; 68 69 static inline const char * 70 hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) 71 { 72 switch (a) { 73 case ENGINE_IDLE: 74 return "idle"; 75 case ENGINE_WAIT: 76 return "wait"; 77 case ENGINE_ACTIVE_SEQNO: 78 return "active seqno"; 79 case ENGINE_ACTIVE_HEAD: 80 return "active head"; 81 case ENGINE_ACTIVE_SUBUNITS: 82 return "active subunits"; 83 case ENGINE_WAIT_KICK: 84 return "wait kick"; 85 case ENGINE_DEAD: 86 return "dead"; 87 } 88 89 return "unknown"; 90 } 91 92 #define I915_MAX_SLICES 3 93 #define I915_MAX_SUBSLICES 3 94 95 #define instdone_slice_mask(dev_priv__) \ 96 (INTEL_GEN(dev_priv__) == 7 ? \ 97 1 : INTEL_INFO(dev_priv__)->sseu.slice_mask) 98 99 #define instdone_subslice_mask(dev_priv__) \ 100 (INTEL_GEN(dev_priv__) == 7 ? \ 101 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask) 102 103 #define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ 104 for ((slice__) = 0, (subslice__) = 0; \ 105 (slice__) < I915_MAX_SLICES; \ 106 (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \ 107 (slice__) += ((subslice__) == 0)) \ 108 for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \ 109 (BIT(subslice__) & instdone_subslice_mask(dev_priv__))) 110 111 struct intel_instdone { 112 u32 instdone; 113 /* The following exist only in the RCS engine */ 114 u32 slice_common; 115 u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; 116 u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; 117 }; 118 119 struct intel_engine_hangcheck { 120 u64 acthd; 121 u32 seqno; 122 enum intel_engine_hangcheck_action action; 123 unsigned long action_timestamp; 124 int deadlock; 125 struct intel_instdone instdone; 126 struct drm_i915_gem_request *active_request; 127 bool stalled; 128 }; 129 130 struct intel_ring { 131 struct i915_vma *vma; 132 void *vaddr; 133 134 struct list_head request_list; 135 136 u32 head; 137 u32 tail; 138 u32 emit; 139 140 u32 space; 141 u32 size; 142 u32 effective_size; 143 }; 144 145 struct i915_gem_context; 146 struct drm_i915_reg_table; 147 148 /* 149 * we use a single page to load ctx workarounds so all of these 150 * values are referred in terms of dwords 151 * 152 * struct i915_wa_ctx_bb: 153 * offset: specifies batch starting position, also helpful in case 154 * if we want to have multiple batches at different offsets based on 155 * some criteria. It is not a requirement at the moment but provides 156 * an option for future use. 157 * size: size of the batch in DWORDS 158 */ 159 struct i915_ctx_workarounds { 160 struct i915_wa_ctx_bb { 161 u32 offset; 162 u32 size; 163 } indirect_ctx, per_ctx; 164 struct i915_vma *vma; 165 }; 166 167 struct drm_i915_gem_request; 168 struct intel_render_state; 169 170 /* 171 * Engine IDs definitions. 172 * Keep instances of the same type engine together. 173 */ 174 enum intel_engine_id { 175 RCS = 0, 176 BCS, 177 VCS, 178 VCS2, 179 #define _VCS(n) (VCS + (n)) 180 VECS 181 }; 182 183 struct i915_priolist { 184 struct rb_node node; 185 struct list_head requests; 186 int priority; 187 }; 188 189 /** 190 * struct intel_engine_execlists - execlist submission queue and port state 191 * 192 * The struct intel_engine_execlists represents the combined logical state of 193 * driver and the hardware state for execlist mode of submission. 194 */ 195 struct intel_engine_execlists { 196 /** 197 * @irq_tasklet: softirq tasklet for bottom handler 198 */ 199 struct tasklet_struct irq_tasklet; 200 201 /** 202 * @default_priolist: priority list for I915_PRIORITY_NORMAL 203 */ 204 struct i915_priolist default_priolist; 205 206 /** 207 * @no_priolist: priority lists disabled 208 */ 209 bool no_priolist; 210 211 /** 212 * @port: execlist port states 213 * 214 * For each hardware ELSP (ExecList Submission Port) we keep 215 * track of the last request and the number of times we submitted 216 * that port to hw. We then count the number of times the hw reports 217 * a context completion or preemption. As only one context can 218 * be active on hw, we limit resubmission of context to port[0]. This 219 * is called Lite Restore, of the context. 220 */ 221 struct execlist_port { 222 /** 223 * @request_count: combined request and submission count 224 */ 225 struct drm_i915_gem_request *request_count; 226 #define EXECLIST_COUNT_BITS 2 227 #define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) 228 #define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) 229 #define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS) 230 #define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS) 231 #define port_set(p, packed) ((p)->request_count = (packed)) 232 #define port_isset(p) ((p)->request_count) 233 #define port_index(p, execlists) ((p) - (execlists)->port) 234 235 /** 236 * @context_id: context ID for port 237 */ 238 GEM_DEBUG_DECL(u32 context_id); 239 240 #define EXECLIST_MAX_PORTS 2 241 } port[EXECLIST_MAX_PORTS]; 242 243 /** 244 * @active: is the HW active? We consider the HW as active after 245 * submitting any context for execution and until we have seen the 246 * last context completion event. After that, we do not expect any 247 * more events until we submit, and so can park the HW. 248 * 249 * As we have a small number of different sources from which we feed 250 * the HW, we track the state of each inside a single bitfield. 251 */ 252 unsigned int active; 253 #define EXECLISTS_ACTIVE_USER 0 254 #define EXECLISTS_ACTIVE_PREEMPT 1 255 256 /** 257 * @port_mask: number of execlist ports - 1 258 */ 259 unsigned int port_mask; 260 261 /** 262 * @queue: queue of requests, in priority lists 263 */ 264 struct rb_root queue; 265 266 /** 267 * @first: leftmost level in priority @queue 268 */ 269 struct rb_node *first; 270 271 /** 272 * @fw_domains: forcewake domains for irq tasklet 273 */ 274 unsigned int fw_domains; 275 276 /** 277 * @csb_head: context status buffer head 278 */ 279 unsigned int csb_head; 280 281 /** 282 * @csb_use_mmio: access csb through mmio, instead of hwsp 283 */ 284 bool csb_use_mmio; 285 }; 286 287 #define INTEL_ENGINE_CS_MAX_NAME 8 288 289 struct intel_engine_cs { 290 struct drm_i915_private *i915; 291 char name[INTEL_ENGINE_CS_MAX_NAME]; 292 enum intel_engine_id id; 293 unsigned int uabi_id; 294 unsigned int hw_id; 295 unsigned int guc_id; 296 297 u8 class; 298 u8 instance; 299 u32 context_size; 300 u32 mmio_base; 301 unsigned int irq_shift; 302 303 struct intel_ring *buffer; 304 struct intel_timeline *timeline; 305 306 struct intel_render_state *render_state; 307 308 atomic_t irq_count; 309 unsigned long irq_posted; 310 #define ENGINE_IRQ_BREADCRUMB 0 311 #define ENGINE_IRQ_EXECLIST 1 312 313 /* Rather than have every client wait upon all user interrupts, 314 * with the herd waking after every interrupt and each doing the 315 * heavyweight seqno dance, we delegate the task (of being the 316 * bottom-half of the user interrupt) to the first client. After 317 * every interrupt, we wake up one client, who does the heavyweight 318 * coherent seqno read and either goes back to sleep (if incomplete), 319 * or wakes up all the completed clients in parallel, before then 320 * transferring the bottom-half status to the next client in the queue. 321 * 322 * Compared to walking the entire list of waiters in a single dedicated 323 * bottom-half, we reduce the latency of the first waiter by avoiding 324 * a context switch, but incur additional coherent seqno reads when 325 * following the chain of request breadcrumbs. Since it is most likely 326 * that we have a single client waiting on each seqno, then reducing 327 * the overhead of waking that client is much preferred. 328 */ 329 struct intel_breadcrumbs { 330 spinlock_t irq_lock; /* protects irq_*; irqsafe */ 331 struct intel_wait *irq_wait; /* oldest waiter by retirement */ 332 333 spinlock_t rb_lock; /* protects the rb and wraps irq_lock */ 334 struct rb_root waiters; /* sorted by retirement, priority */ 335 struct rb_root signals; /* sorted by retirement */ 336 struct task_struct *signaler; /* used for fence signalling */ 337 struct drm_i915_gem_request __rcu *first_signal; 338 struct timer_list fake_irq; /* used after a missed interrupt */ 339 struct timer_list hangcheck; /* detect missed interrupts */ 340 341 unsigned int hangcheck_interrupts; 342 343 bool irq_armed : 1; 344 bool irq_enabled : 1; 345 I915_SELFTEST_DECLARE(bool mock : 1); 346 } breadcrumbs; 347 348 /* 349 * A pool of objects to use as shadow copies of client batch buffers 350 * when the command parser is enabled. Prevents the client from 351 * modifying the batch contents after software parsing. 352 */ 353 struct i915_gem_batch_pool batch_pool; 354 355 struct intel_hw_status_page status_page; 356 struct i915_ctx_workarounds wa_ctx; 357 struct i915_vma *scratch; 358 359 u32 irq_keep_mask; /* always keep these interrupts */ 360 u32 irq_enable_mask; /* bitmask to enable ring interrupt */ 361 void (*irq_enable)(struct intel_engine_cs *engine); 362 void (*irq_disable)(struct intel_engine_cs *engine); 363 364 int (*init_hw)(struct intel_engine_cs *engine); 365 void (*reset_hw)(struct intel_engine_cs *engine, 366 struct drm_i915_gem_request *req); 367 368 void (*set_default_submission)(struct intel_engine_cs *engine); 369 370 struct intel_ring *(*context_pin)(struct intel_engine_cs *engine, 371 struct i915_gem_context *ctx); 372 void (*context_unpin)(struct intel_engine_cs *engine, 373 struct i915_gem_context *ctx); 374 int (*request_alloc)(struct drm_i915_gem_request *req); 375 int (*init_context)(struct drm_i915_gem_request *req); 376 377 int (*emit_flush)(struct drm_i915_gem_request *request, 378 u32 mode); 379 #define EMIT_INVALIDATE BIT(0) 380 #define EMIT_FLUSH BIT(1) 381 #define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) 382 int (*emit_bb_start)(struct drm_i915_gem_request *req, 383 u64 offset, u32 length, 384 unsigned int dispatch_flags); 385 #define I915_DISPATCH_SECURE BIT(0) 386 #define I915_DISPATCH_PINNED BIT(1) 387 #define I915_DISPATCH_RS BIT(2) 388 void (*emit_breadcrumb)(struct drm_i915_gem_request *req, 389 u32 *cs); 390 int emit_breadcrumb_sz; 391 392 /* Pass the request to the hardware queue (e.g. directly into 393 * the legacy ringbuffer or to the end of an execlist). 394 * 395 * This is called from an atomic context with irqs disabled; must 396 * be irq safe. 397 */ 398 void (*submit_request)(struct drm_i915_gem_request *req); 399 400 /* Call when the priority on a request has changed and it and its 401 * dependencies may need rescheduling. Note the request itself may 402 * not be ready to run! 403 * 404 * Called under the struct_mutex. 405 */ 406 void (*schedule)(struct drm_i915_gem_request *request, 407 int priority); 408 409 /* 410 * Cancel all requests on the hardware, or queued for execution. 411 * This should only cancel the ready requests that have been 412 * submitted to the engine (via the engine->submit_request callback). 413 * This is called when marking the device as wedged. 414 */ 415 void (*cancel_requests)(struct intel_engine_cs *engine); 416 417 /* Some chipsets are not quite as coherent as advertised and need 418 * an expensive kick to force a true read of the up-to-date seqno. 419 * However, the up-to-date seqno is not always required and the last 420 * seen value is good enough. Note that the seqno will always be 421 * monotonic, even if not coherent. 422 */ 423 void (*irq_seqno_barrier)(struct intel_engine_cs *engine); 424 void (*cleanup)(struct intel_engine_cs *engine); 425 426 /* GEN8 signal/wait table - never trust comments! 427 * signal to signal to signal to signal to signal to 428 * RCS VCS BCS VECS VCS2 429 * -------------------------------------------------------------------- 430 * RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) | 431 * |------------------------------------------------------------------- 432 * VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) | 433 * |------------------------------------------------------------------- 434 * BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) | 435 * |------------------------------------------------------------------- 436 * VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) | NOP (0x90) | VCS2 (0x98) | 437 * |------------------------------------------------------------------- 438 * VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP (0xc0) | 439 * |------------------------------------------------------------------- 440 * 441 * Generalization: 442 * f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id) 443 * ie. transpose of g(x, y) 444 * 445 * sync from sync from sync from sync from sync from 446 * RCS VCS BCS VECS VCS2 447 * -------------------------------------------------------------------- 448 * RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) | 449 * |------------------------------------------------------------------- 450 * VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) | 451 * |------------------------------------------------------------------- 452 * BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) | 453 * |------------------------------------------------------------------- 454 * VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) | NOP (0x90) | VCS2 (0xb8) | 455 * |------------------------------------------------------------------- 456 * VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) | NOP (0xc0) | 457 * |------------------------------------------------------------------- 458 * 459 * Generalization: 460 * g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id) 461 * ie. transpose of f(x, y) 462 */ 463 struct { 464 union { 465 #define GEN6_SEMAPHORE_LAST VECS_HW 466 #define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1) 467 #define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0) 468 struct { 469 /* our mbox written by others */ 470 u32 wait[GEN6_NUM_SEMAPHORES]; 471 /* mboxes this ring signals to */ 472 i915_reg_t signal[GEN6_NUM_SEMAPHORES]; 473 } mbox; 474 u64 signal_ggtt[I915_NUM_ENGINES]; 475 }; 476 477 /* AKA wait() */ 478 int (*sync_to)(struct drm_i915_gem_request *req, 479 struct drm_i915_gem_request *signal); 480 u32 *(*signal)(struct drm_i915_gem_request *req, u32 *cs); 481 } semaphore; 482 483 struct intel_engine_execlists execlists; 484 485 /* Contexts are pinned whilst they are active on the GPU. The last 486 * context executed remains active whilst the GPU is idle - the 487 * switch away and write to the context object only occurs on the 488 * next execution. Contexts are only unpinned on retirement of the 489 * following request ensuring that we can always write to the object 490 * on the context switch even after idling. Across suspend, we switch 491 * to the kernel context and trash it as the save may not happen 492 * before the hardware is powered down. 493 */ 494 struct i915_gem_context *last_retired_context; 495 496 /* We track the current MI_SET_CONTEXT in order to eliminate 497 * redudant context switches. This presumes that requests are not 498 * reordered! Or when they are the tracking is updated along with 499 * the emission of individual requests into the legacy command 500 * stream (ring). 501 */ 502 struct i915_gem_context *legacy_active_context; 503 504 /* status_notifier: list of callbacks for context-switch changes */ 505 struct atomic_notifier_head context_status_notifier; 506 507 struct intel_engine_hangcheck hangcheck; 508 509 bool needs_cmd_parser; 510 511 /* 512 * Table of commands the command parser needs to know about 513 * for this engine. 514 */ 515 DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); 516 517 /* 518 * Table of registers allowed in commands that read/write registers. 519 */ 520 const struct drm_i915_reg_table *reg_tables; 521 int reg_table_count; 522 523 /* 524 * Returns the bitmask for the length field of the specified command. 525 * Return 0 for an unrecognized/invalid command. 526 * 527 * If the command parser finds an entry for a command in the engine's 528 * cmd_tables, it gets the command's length based on the table entry. 529 * If not, it calls this function to determine the per-engine length 530 * field encoding for the command (i.e. different opcode ranges use 531 * certain bits to encode the command length in the header). 532 */ 533 u32 (*get_cmd_length_mask)(u32 cmd_header); 534 }; 535 536 static inline void 537 execlists_set_active(struct intel_engine_execlists *execlists, 538 unsigned int bit) 539 { 540 __set_bit(bit, (unsigned long *)&execlists->active); 541 } 542 543 static inline void 544 execlists_clear_active(struct intel_engine_execlists *execlists, 545 unsigned int bit) 546 { 547 __clear_bit(bit, (unsigned long *)&execlists->active); 548 } 549 550 static inline bool 551 execlists_is_active(const struct intel_engine_execlists *execlists, 552 unsigned int bit) 553 { 554 return test_bit(bit, (unsigned long *)&execlists->active); 555 } 556 557 static inline unsigned int 558 execlists_num_ports(const struct intel_engine_execlists * const execlists) 559 { 560 return execlists->port_mask + 1; 561 } 562 563 static inline void 564 execlists_port_complete(struct intel_engine_execlists * const execlists, 565 struct execlist_port * const port) 566 { 567 const unsigned int m = execlists->port_mask; 568 569 GEM_BUG_ON(port_index(port, execlists) != 0); 570 GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); 571 572 memmove(port, port + 1, m * sizeof(struct execlist_port)); 573 memset(port + m, 0, sizeof(struct execlist_port)); 574 } 575 576 static inline unsigned int 577 intel_engine_flag(const struct intel_engine_cs *engine) 578 { 579 return BIT(engine->id); 580 } 581 582 static inline u32 583 intel_read_status_page(struct intel_engine_cs *engine, int reg) 584 { 585 /* Ensure that the compiler doesn't optimize away the load. */ 586 return READ_ONCE(engine->status_page.page_addr[reg]); 587 } 588 589 static inline void 590 intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) 591 { 592 /* Writing into the status page should be done sparingly. Since 593 * we do when we are uncertain of the device state, we take a bit 594 * of extra paranoia to try and ensure that the HWS takes the value 595 * we give and that it doesn't end up trapped inside the CPU! 596 */ 597 if (static_cpu_has(X86_FEATURE_CLFLUSH)) { 598 mb(); 599 linux_clflush(&engine->status_page.page_addr[reg]); 600 engine->status_page.page_addr[reg] = value; 601 linux_clflush(&engine->status_page.page_addr[reg]); 602 mb(); 603 } else { 604 WRITE_ONCE(engine->status_page.page_addr[reg], value); 605 } 606 } 607 608 /* 609 * Reads a dword out of the status page, which is written to from the command 610 * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or 611 * MI_STORE_DATA_IMM. 612 * 613 * The following dwords have a reserved meaning: 614 * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes. 615 * 0x04: ring 0 head pointer 616 * 0x05: ring 1 head pointer (915-class) 617 * 0x06: ring 2 head pointer (915-class) 618 * 0x10-0x1b: Context status DWords (GM45) 619 * 0x1f: Last written status offset. (GM45) 620 * 0x20-0x2f: Reserved (Gen6+) 621 * 622 * The area from dword 0x30 to 0x3ff is available for driver usage. 623 */ 624 #define I915_GEM_HWS_INDEX 0x30 625 #define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT) 626 #define I915_GEM_HWS_SCRATCH_INDEX 0x40 627 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) 628 629 #define I915_HWS_CSB_BUF0_INDEX 0x10 630 #define I915_HWS_CSB_WRITE_INDEX 0x1f 631 #define CNL_HWS_CSB_WRITE_INDEX 0x2f 632 633 struct intel_ring * 634 intel_engine_create_ring(struct intel_engine_cs *engine, int size); 635 int intel_ring_pin(struct intel_ring *ring, 636 struct drm_i915_private *i915, 637 unsigned int offset_bias); 638 void intel_ring_reset(struct intel_ring *ring, u32 tail); 639 unsigned int intel_ring_update_space(struct intel_ring *ring); 640 void intel_ring_unpin(struct intel_ring *ring); 641 void intel_ring_free(struct intel_ring *ring); 642 643 void intel_engine_stop(struct intel_engine_cs *engine); 644 void intel_engine_cleanup(struct intel_engine_cs *engine); 645 646 void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); 647 648 int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); 649 650 u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req, 651 unsigned int n); 652 653 static inline void 654 intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs) 655 { 656 /* Dummy function. 657 * 658 * This serves as a placeholder in the code so that the reader 659 * can compare against the preceding intel_ring_begin() and 660 * check that the number of dwords emitted matches the space 661 * reserved for the command packet (i.e. the value passed to 662 * intel_ring_begin()). 663 */ 664 GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs); 665 } 666 667 static inline u32 668 intel_ring_wrap(const struct intel_ring *ring, u32 pos) 669 { 670 return pos & (ring->size - 1); 671 } 672 673 static inline u32 674 intel_ring_offset(const struct drm_i915_gem_request *req, void *addr) 675 { 676 /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ 677 u32 offset = addr - req->ring->vaddr; 678 GEM_BUG_ON(offset > req->ring->size); 679 return intel_ring_wrap(req->ring, offset); 680 } 681 682 static inline void 683 assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) 684 { 685 /* We could combine these into a single tail operation, but keeping 686 * them as seperate tests will help identify the cause should one 687 * ever fire. 688 */ 689 GEM_BUG_ON(!IS_ALIGNED(tail, 8)); 690 GEM_BUG_ON(tail >= ring->size); 691 692 /* 693 * "Ring Buffer Use" 694 * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 695 * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5 696 * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5 697 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the 698 * same cacheline, the Head Pointer must not be greater than the Tail 699 * Pointer." 700 * 701 * We use ring->head as the last known location of the actual RING_HEAD, 702 * it may have advanced but in the worst case it is equally the same 703 * as ring->head and so we should never program RING_TAIL to advance 704 * into the same cacheline as ring->head. 705 */ 706 #define cacheline(a) round_down(a, CACHELINE_BYTES) 707 GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) && 708 tail < ring->head); 709 #undef cacheline 710 } 711 712 static inline unsigned int 713 intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) 714 { 715 /* Whilst writes to the tail are strictly order, there is no 716 * serialisation between readers and the writers. The tail may be 717 * read by i915_gem_request_retire() just as it is being updated 718 * by execlists, as although the breadcrumb is complete, the context 719 * switch hasn't been seen. 720 */ 721 assert_ring_tail_valid(ring, tail); 722 ring->tail = tail; 723 return tail; 724 } 725 726 void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); 727 728 void intel_engine_setup_common(struct intel_engine_cs *engine); 729 int intel_engine_init_common(struct intel_engine_cs *engine); 730 int intel_engine_create_scratch(struct intel_engine_cs *engine, int size); 731 void intel_engine_cleanup_common(struct intel_engine_cs *engine); 732 733 int intel_init_render_ring_buffer(struct intel_engine_cs *engine); 734 int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); 735 int intel_init_blt_ring_buffer(struct intel_engine_cs *engine); 736 int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine); 737 738 u64 intel_engine_get_active_head(struct intel_engine_cs *engine); 739 u64 intel_engine_get_last_batch_head(struct intel_engine_cs *engine); 740 741 static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine) 742 { 743 return intel_read_status_page(engine, I915_GEM_HWS_INDEX); 744 } 745 746 static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) 747 { 748 /* We are only peeking at the tail of the submit queue (and not the 749 * queue itself) in order to gain a hint as to the current active 750 * state of the engine. Callers are not expected to be taking 751 * engine->timeline->lock, nor are they expected to be concerned 752 * wtih serialising this hint with anything, so document it as 753 * a hint and nothing more. 754 */ 755 return READ_ONCE(engine->timeline->seqno); 756 } 757 758 int init_workarounds_ring(struct intel_engine_cs *engine); 759 int intel_ring_workarounds_emit(struct drm_i915_gem_request *req); 760 761 void intel_engine_get_instdone(struct intel_engine_cs *engine, 762 struct intel_instdone *instdone); 763 764 /* 765 * Arbitrary size for largest possible 'add request' sequence. The code paths 766 * are complex and variable. Empirical measurement shows that the worst case 767 * is BDW at 192 bytes (6 + 6 + 36 dwords), then ILK at 136 bytes. However, 768 * we need to allocate double the largest single packet within that emission 769 * to account for tail wraparound (so 6 + 6 + 72 dwords for BDW). 770 */ 771 #define MIN_SPACE_FOR_ADD_REQUEST 336 772 773 static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) 774 { 775 return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR; 776 } 777 778 /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ 779 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); 780 781 static inline void intel_wait_init(struct intel_wait *wait, 782 struct drm_i915_gem_request *rq) 783 { 784 wait->tsk = current; 785 wait->request = rq; 786 } 787 788 static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno) 789 { 790 wait->tsk = current; 791 wait->seqno = seqno; 792 } 793 794 static inline bool intel_wait_has_seqno(const struct intel_wait *wait) 795 { 796 return wait->seqno; 797 } 798 799 static inline bool 800 intel_wait_update_seqno(struct intel_wait *wait, u32 seqno) 801 { 802 wait->seqno = seqno; 803 return intel_wait_has_seqno(wait); 804 } 805 806 static inline bool 807 intel_wait_update_request(struct intel_wait *wait, 808 const struct drm_i915_gem_request *rq) 809 { 810 return intel_wait_update_seqno(wait, i915_gem_request_global_seqno(rq)); 811 } 812 813 static inline bool 814 intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno) 815 { 816 return wait->seqno == seqno; 817 } 818 819 static inline bool 820 intel_wait_check_request(const struct intel_wait *wait, 821 const struct drm_i915_gem_request *rq) 822 { 823 return intel_wait_check_seqno(wait, i915_gem_request_global_seqno(rq)); 824 } 825 826 static inline bool intel_wait_complete(const struct intel_wait *wait) 827 { 828 return RB_EMPTY_NODE(&wait->node); 829 } 830 831 bool intel_engine_add_wait(struct intel_engine_cs *engine, 832 struct intel_wait *wait); 833 void intel_engine_remove_wait(struct intel_engine_cs *engine, 834 struct intel_wait *wait); 835 void intel_engine_enable_signaling(struct drm_i915_gem_request *request, 836 bool wakeup); 837 void intel_engine_cancel_signaling(struct drm_i915_gem_request *request); 838 839 static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) 840 { 841 return READ_ONCE(engine->breadcrumbs.irq_wait); 842 } 843 844 unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); 845 #define ENGINE_WAKEUP_WAITER BIT(0) 846 #define ENGINE_WAKEUP_ASLEEP BIT(1) 847 848 void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); 849 void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); 850 851 void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); 852 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); 853 bool intel_breadcrumbs_busy(struct intel_engine_cs *engine); 854 855 static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) 856 { 857 memset(batch, 0, 6 * sizeof(u32)); 858 859 batch[0] = GFX_OP_PIPE_CONTROL(6); 860 batch[1] = flags; 861 batch[2] = offset; 862 863 return batch + 6; 864 } 865 866 bool intel_engine_is_idle(struct intel_engine_cs *engine); 867 bool intel_engines_are_idle(struct drm_i915_private *dev_priv); 868 869 void intel_engines_mark_idle(struct drm_i915_private *i915); 870 void intel_engines_reset_default_submission(struct drm_i915_private *i915); 871 872 bool intel_engine_can_store_dword(struct intel_engine_cs *engine); 873 874 void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *p); 875 876 #endif /* _INTEL_RINGBUFFER_H_ */ 877