1 /* SPDX-License-Identifier: MIT */ 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #ifndef __INTEL_ENGINE_TYPES__ 7 #define __INTEL_ENGINE_TYPES__ 8 9 #include <linux/average.h> 10 #include <linux/hashtable.h> 11 #include <linux/irq_work.h> 12 #include <linux/kref.h> 13 #include <linux/list.h> 14 #include <linux/llist.h> 15 #include <linux/rbtree.h> 16 #include <linux/timer.h> 17 #include <linux/types.h> 18 #include <linux/workqueue.h> 19 20 #include "i915_gem.h" 21 #include "i915_pmu.h" 22 #include "i915_priolist_types.h" 23 #include "i915_selftest.h" 24 #include "intel_sseu.h" 25 #include "intel_timeline_types.h" 26 #include "intel_uncore.h" 27 #include "intel_wakeref.h" 28 #include "intel_workarounds_types.h" 29 30 /* HW Engine class + instance */ 31 #define RENDER_CLASS 0 32 #define VIDEO_DECODE_CLASS 1 33 #define VIDEO_ENHANCEMENT_CLASS 2 34 #define COPY_ENGINE_CLASS 3 35 #define OTHER_CLASS 4 36 #define COMPUTE_CLASS 5 37 #define MAX_ENGINE_CLASS 5 38 #define MAX_ENGINE_INSTANCE 7 39 40 #define I915_MAX_SLICES 3 41 #define I915_MAX_SUBSLICES 8 42 43 #define I915_CMD_HASH_ORDER 9 44 45 struct dma_fence; 46 struct drm_i915_gem_object; 47 struct drm_i915_reg_table; 48 struct i915_gem_context; 49 struct i915_request; 50 struct i915_sched_attr; 51 struct i915_sched_engine; 52 struct intel_gt; 53 struct intel_ring; 54 struct intel_uncore; 55 struct intel_breadcrumbs; 56 57 typedef u32 intel_engine_mask_t; 58 #define ALL_ENGINES ((intel_engine_mask_t)~0ul) 59 60 struct intel_hw_status_page { 61 struct list_head timelines; 62 struct i915_vma *vma; 63 u32 *addr; 64 }; 65 66 struct intel_instdone { 67 u32 instdone; 68 /* The following exist only in the RCS engine */ 69 u32 slice_common; 70 u32 slice_common_extra[2]; 71 u32 sampler[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; 72 u32 row[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; 73 74 /* Added in XeHPG */ 75 u32 geom_svg[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; 76 }; 77 78 /* 79 * we use a single page to load ctx workarounds so all of these 80 * values are referred in terms of dwords 81 * 82 * struct i915_wa_ctx_bb: 83 * offset: specifies batch starting position, also helpful in case 84 * if we want to have multiple batches at different offsets based on 85 * some criteria. It is not a requirement at the moment but provides 86 * an option for future use. 87 * size: size of the batch in DWORDS 88 */ 89 struct i915_ctx_workarounds { 90 struct i915_wa_ctx_bb { 91 u32 offset; 92 u32 size; 93 } indirect_ctx, per_ctx; 94 struct i915_vma *vma; 95 }; 96 97 #define I915_MAX_VCS 8 98 #define I915_MAX_VECS 4 99 #define I915_MAX_SFC (I915_MAX_VCS / 2) 100 #define I915_MAX_CCS 4 101 #define I915_MAX_RCS 1 102 103 /* 104 * Engine IDs definitions. 105 * Keep instances of the same type engine together. 106 */ 107 enum intel_engine_id { 108 RCS0 = 0, 109 BCS0, 110 VCS0, 111 VCS1, 112 VCS2, 113 VCS3, 114 VCS4, 115 VCS5, 116 VCS6, 117 VCS7, 118 #define _VCS(n) (VCS0 + (n)) 119 VECS0, 120 VECS1, 121 VECS2, 122 VECS3, 123 #define _VECS(n) (VECS0 + (n)) 124 CCS0, 125 CCS1, 126 CCS2, 127 CCS3, 128 #define _CCS(n) (CCS0 + (n)) 129 I915_NUM_ENGINES 130 #define INVALID_ENGINE ((enum intel_engine_id)-1) 131 }; 132 133 /* A simple estimator for the round-trip latency of an engine */ 134 DECLARE_EWMA(_engine_latency, 6, 4) 135 136 struct st_preempt_hang { 137 struct completion completion; 138 unsigned int count; 139 }; 140 141 /** 142 * struct intel_engine_execlists - execlist submission queue and port state 143 * 144 * The struct intel_engine_execlists represents the combined logical state of 145 * driver and the hardware state for execlist mode of submission. 146 */ 147 struct intel_engine_execlists { 148 /** 149 * @timer: kick the current context if its timeslice expires 150 */ 151 struct timer_list timer; 152 153 /** 154 * @preempt: reset the current context if it fails to give way 155 */ 156 struct timer_list preempt; 157 158 /** 159 * @ccid: identifier for contexts submitted to this engine 160 */ 161 u32 ccid; 162 163 /** 164 * @yield: CCID at the time of the last semaphore-wait interrupt. 165 * 166 * Instead of leaving a semaphore busy-spinning on an engine, we would 167 * like to switch to another ready context, i.e. yielding the semaphore 168 * timeslice. 169 */ 170 u32 yield; 171 172 /** 173 * @error_interrupt: CS Master EIR 174 * 175 * The CS generates an interrupt when it detects an error. We capture 176 * the first error interrupt, record the EIR and schedule the tasklet. 177 * In the tasklet, we process the pending CS events to ensure we have 178 * the guilty request, and then reset the engine. 179 * 180 * Low 16b are used by HW, with the upper 16b used as the enabling mask. 181 * Reserve the upper 16b for tracking internal errors. 182 */ 183 u32 error_interrupt; 184 #define ERROR_CSB BIT(31) 185 #define ERROR_PREEMPT BIT(30) 186 187 /** 188 * @reset_ccid: Active CCID [EXECLISTS_STATUS_HI] at the time of reset 189 */ 190 u32 reset_ccid; 191 192 /** 193 * @submit_reg: gen-specific execlist submission register 194 * set to the ExecList Submission Port (elsp) register pre-Gen11 and to 195 * the ExecList Submission Queue Contents register array for Gen11+ 196 */ 197 u32 __iomem *submit_reg; 198 199 /** 200 * @ctrl_reg: the enhanced execlists control register, used to load the 201 * submit queue on the HW and to request preemptions to idle 202 */ 203 u32 __iomem *ctrl_reg; 204 205 #define EXECLIST_MAX_PORTS 2 206 /** 207 * @active: the currently known context executing on HW 208 */ 209 struct i915_request * const *active; 210 /** 211 * @inflight: the set of contexts submitted and acknowleged by HW 212 * 213 * The set of inflight contexts is managed by reading CS events 214 * from the HW. On a context-switch event (not preemption), we 215 * know the HW has transitioned from port0 to port1, and we 216 * advance our inflight/active tracking accordingly. 217 */ 218 struct i915_request *inflight[EXECLIST_MAX_PORTS + 1 /* sentinel */]; 219 /** 220 * @pending: the next set of contexts submitted to ELSP 221 * 222 * We store the array of contexts that we submit to HW (via ELSP) and 223 * promote them to the inflight array once HW has signaled the 224 * preemption or idle-to-active event. 225 */ 226 struct i915_request *pending[EXECLIST_MAX_PORTS + 1]; 227 228 /** 229 * @port_mask: number of execlist ports - 1 230 */ 231 unsigned int port_mask; 232 233 /** 234 * @virtual: Queue of requets on a virtual engine, sorted by priority. 235 * Each RB entry is a struct i915_priolist containing a list of requests 236 * of the same priority. 237 */ 238 struct rb_root_cached virtual; 239 240 /** 241 * @csb_write: control register for Context Switch buffer 242 * 243 * Note this register may be either mmio or HWSP shadow. 244 */ 245 u32 *csb_write; 246 247 /** 248 * @csb_status: status array for Context Switch buffer 249 * 250 * Note these register may be either mmio or HWSP shadow. 251 */ 252 u64 *csb_status; 253 254 /** 255 * @csb_size: context status buffer FIFO size 256 */ 257 u8 csb_size; 258 259 /** 260 * @csb_head: context status buffer head 261 */ 262 u8 csb_head; 263 264 I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;) 265 }; 266 267 #define INTEL_ENGINE_CS_MAX_NAME 8 268 269 struct intel_engine_execlists_stats { 270 /** 271 * @active: Number of contexts currently scheduled in. 272 */ 273 unsigned int active; 274 275 /** 276 * @lock: Lock protecting the below fields. 277 */ 278 seqcount_t lock; 279 280 /** 281 * @total: Total time this engine was busy. 282 * 283 * Accumulated time not counting the most recent block in cases where 284 * engine is currently busy (active > 0). 285 */ 286 ktime_t total; 287 288 /** 289 * @start: Timestamp of the last idle to active transition. 290 * 291 * Idle is defined as active == 0, active is active > 0. 292 */ 293 ktime_t start; 294 }; 295 296 struct intel_engine_guc_stats { 297 /** 298 * @running: Active state of the engine when busyness was last sampled. 299 */ 300 bool running; 301 302 /** 303 * @prev_total: Previous value of total runtime clock cycles. 304 */ 305 u32 prev_total; 306 307 /** 308 * @total_gt_clks: Total gt clock cycles this engine was busy. 309 */ 310 u64 total_gt_clks; 311 312 /** 313 * @start_gt_clk: GT clock time of last idle to active transition. 314 */ 315 u64 start_gt_clk; 316 }; 317 318 struct intel_engine_cs { 319 struct drm_i915_private *i915; 320 struct intel_gt *gt; 321 struct intel_uncore *uncore; 322 char name[INTEL_ENGINE_CS_MAX_NAME]; 323 324 enum intel_engine_id id; 325 enum intel_engine_id legacy_idx; 326 327 unsigned int guc_id; 328 329 intel_engine_mask_t mask; 330 u32 reset_domain; 331 /** 332 * @logical_mask: logical mask of engine, reported to user space via 333 * query IOCTL and used to communicate with the GuC in logical space. 334 * The logical instance of a physical engine can change based on product 335 * and fusing. 336 */ 337 intel_engine_mask_t logical_mask; 338 339 u8 class; 340 u8 instance; 341 342 u16 uabi_class; 343 u16 uabi_instance; 344 345 u32 uabi_capabilities; 346 u32 context_size; 347 u32 mmio_base; 348 349 /* 350 * Some w/a require forcewake to be held (which prevents RC6) while 351 * a particular engine is active. If so, we set fw_domain to which 352 * domains need to be held for the duration of request activity, 353 * and 0 if none. We try to limit the duration of the hold as much 354 * as possible. 355 */ 356 enum forcewake_domains fw_domain; 357 unsigned int fw_active; 358 359 unsigned long context_tag; 360 361 struct rb_node uabi_node; 362 363 struct intel_sseu sseu; 364 365 struct i915_sched_engine *sched_engine; 366 367 /* keep a request in reserve for a [pm] barrier under oom */ 368 struct i915_request *request_pool; 369 370 struct intel_context *hung_ce; 371 372 struct llist_head barrier_tasks; 373 374 struct intel_context *kernel_context; /* pinned */ 375 376 /** 377 * pinned_contexts_list: List of pinned contexts. This list is only 378 * assumed to be manipulated during driver load- or unload time and 379 * does therefore not have any additional protection. 380 */ 381 struct list_head pinned_contexts_list; 382 383 intel_engine_mask_t saturated; /* submitting semaphores too late? */ 384 385 struct { 386 struct delayed_work work; 387 struct i915_request *systole; 388 unsigned long blocked; 389 } heartbeat; 390 391 unsigned long serial; 392 393 unsigned long wakeref_serial; 394 struct intel_wakeref wakeref; 395 struct file *default_state; 396 397 struct { 398 struct intel_ring *ring; 399 struct intel_timeline *timeline; 400 } legacy; 401 402 /* 403 * We track the average duration of the idle pulse on parking the 404 * engine to keep an estimate of the how the fast the engine is 405 * under ideal conditions. 406 */ 407 struct ewma__engine_latency latency; 408 409 /* Keep track of all the seqno used, a trail of breadcrumbs */ 410 struct intel_breadcrumbs *breadcrumbs; 411 412 struct intel_engine_pmu { 413 /** 414 * @enable: Bitmask of enable sample events on this engine. 415 * 416 * Bits correspond to sample event types, for instance 417 * I915_SAMPLE_QUEUED is bit 0 etc. 418 */ 419 u32 enable; 420 /** 421 * @enable_count: Reference count for the enabled samplers. 422 * 423 * Index number corresponds to @enum drm_i915_pmu_engine_sample. 424 */ 425 unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT]; 426 /** 427 * @sample: Counter values for sampling events. 428 * 429 * Our internal timer stores the current counters in this field. 430 * 431 * Index number corresponds to @enum drm_i915_pmu_engine_sample. 432 */ 433 struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT]; 434 } pmu; 435 436 struct intel_hw_status_page status_page; 437 struct i915_ctx_workarounds wa_ctx; 438 struct i915_wa_list ctx_wa_list; 439 struct i915_wa_list wa_list; 440 struct i915_wa_list whitelist; 441 442 u32 irq_keep_mask; /* always keep these interrupts */ 443 u32 irq_enable_mask; /* bitmask to enable ring interrupt */ 444 void (*irq_enable)(struct intel_engine_cs *engine); 445 void (*irq_disable)(struct intel_engine_cs *engine); 446 void (*irq_handler)(struct intel_engine_cs *engine, u16 iir); 447 448 void (*sanitize)(struct intel_engine_cs *engine); 449 int (*resume)(struct intel_engine_cs *engine); 450 451 struct { 452 void (*prepare)(struct intel_engine_cs *engine); 453 454 void (*rewind)(struct intel_engine_cs *engine, bool stalled); 455 void (*cancel)(struct intel_engine_cs *engine); 456 457 void (*finish)(struct intel_engine_cs *engine); 458 } reset; 459 460 void (*park)(struct intel_engine_cs *engine); 461 void (*unpark)(struct intel_engine_cs *engine); 462 463 void (*bump_serial)(struct intel_engine_cs *engine); 464 465 void (*set_default_submission)(struct intel_engine_cs *engine); 466 467 const struct intel_context_ops *cops; 468 469 int (*request_alloc)(struct i915_request *rq); 470 471 int (*emit_flush)(struct i915_request *request, u32 mode); 472 #define EMIT_INVALIDATE BIT(0) 473 #define EMIT_FLUSH BIT(1) 474 #define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) 475 int (*emit_bb_start)(struct i915_request *rq, 476 u64 offset, u32 length, 477 unsigned int dispatch_flags); 478 #define I915_DISPATCH_SECURE BIT(0) 479 #define I915_DISPATCH_PINNED BIT(1) 480 int (*emit_init_breadcrumb)(struct i915_request *rq); 481 u32 *(*emit_fini_breadcrumb)(struct i915_request *rq, 482 u32 *cs); 483 unsigned int emit_fini_breadcrumb_dw; 484 485 /* Pass the request to the hardware queue (e.g. directly into 486 * the legacy ringbuffer or to the end of an execlist). 487 * 488 * This is called from an atomic context with irqs disabled; must 489 * be irq safe. 490 */ 491 void (*submit_request)(struct i915_request *rq); 492 493 void (*release)(struct intel_engine_cs *engine); 494 495 /* 496 * Add / remove request from engine active tracking 497 */ 498 void (*add_active_request)(struct i915_request *rq); 499 void (*remove_active_request)(struct i915_request *rq); 500 501 /* 502 * Get engine busyness and the time at which the busyness was sampled. 503 */ 504 ktime_t (*busyness)(struct intel_engine_cs *engine, 505 ktime_t *now); 506 507 struct intel_engine_execlists execlists; 508 509 /* 510 * Keep track of completed timelines on this engine for early 511 * retirement with the goal of quickly enabling powersaving as 512 * soon as the engine is idle. 513 */ 514 struct intel_timeline *retire; 515 struct work_struct retire_work; 516 517 /* status_notifier: list of callbacks for context-switch changes */ 518 struct atomic_notifier_head context_status_notifier; 519 520 #define I915_ENGINE_USING_CMD_PARSER BIT(0) 521 #define I915_ENGINE_SUPPORTS_STATS BIT(1) 522 #define I915_ENGINE_HAS_PREEMPTION BIT(2) 523 #define I915_ENGINE_HAS_SEMAPHORES BIT(3) 524 #define I915_ENGINE_HAS_TIMESLICES BIT(4) 525 #define I915_ENGINE_IS_VIRTUAL BIT(5) 526 #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) 527 #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) 528 #define I915_ENGINE_WANT_FORCED_PREEMPTION BIT(8) 529 #define I915_ENGINE_HAS_RCS_REG_STATE BIT(9) 530 #define I915_ENGINE_HAS_EU_PRIORITY BIT(10) 531 #define I915_ENGINE_FIRST_RENDER_COMPUTE BIT(11) 532 #define I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT BIT(12) 533 unsigned int flags; 534 535 /* 536 * Table of commands the command parser needs to know about 537 * for this engine. 538 */ 539 DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); 540 541 /* 542 * Table of registers allowed in commands that read/write registers. 543 */ 544 const struct drm_i915_reg_table *reg_tables; 545 int reg_table_count; 546 547 /* 548 * Returns the bitmask for the length field of the specified command. 549 * Return 0 for an unrecognized/invalid command. 550 * 551 * If the command parser finds an entry for a command in the engine's 552 * cmd_tables, it gets the command's length based on the table entry. 553 * If not, it calls this function to determine the per-engine length 554 * field encoding for the command (i.e. different opcode ranges use 555 * certain bits to encode the command length in the header). 556 */ 557 u32 (*get_cmd_length_mask)(u32 cmd_header); 558 559 struct { 560 union { 561 struct intel_engine_execlists_stats execlists; 562 struct intel_engine_guc_stats guc; 563 }; 564 565 /** 566 * @rps: Utilisation at last RPS sampling. 567 */ 568 ktime_t rps; 569 } stats; 570 571 struct { 572 unsigned long heartbeat_interval_ms; 573 unsigned long max_busywait_duration_ns; 574 unsigned long preempt_timeout_ms; 575 unsigned long stop_timeout_ms; 576 unsigned long timeslice_duration_ms; 577 } props, defaults; 578 579 I915_SELFTEST_DECLARE(struct fault_attr reset_timeout); 580 }; 581 582 static inline bool 583 intel_engine_using_cmd_parser(const struct intel_engine_cs *engine) 584 { 585 return engine->flags & I915_ENGINE_USING_CMD_PARSER; 586 } 587 588 static inline bool 589 intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine) 590 { 591 return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER; 592 } 593 594 static inline bool 595 intel_engine_supports_stats(const struct intel_engine_cs *engine) 596 { 597 return engine->flags & I915_ENGINE_SUPPORTS_STATS; 598 } 599 600 static inline bool 601 intel_engine_has_preemption(const struct intel_engine_cs *engine) 602 { 603 return engine->flags & I915_ENGINE_HAS_PREEMPTION; 604 } 605 606 static inline bool 607 intel_engine_has_semaphores(const struct intel_engine_cs *engine) 608 { 609 return engine->flags & I915_ENGINE_HAS_SEMAPHORES; 610 } 611 612 static inline bool 613 intel_engine_has_timeslices(const struct intel_engine_cs *engine) 614 { 615 if (!CONFIG_DRM_I915_TIMESLICE_DURATION) 616 return false; 617 618 return engine->flags & I915_ENGINE_HAS_TIMESLICES; 619 } 620 621 static inline bool 622 intel_engine_is_virtual(const struct intel_engine_cs *engine) 623 { 624 return engine->flags & I915_ENGINE_IS_VIRTUAL; 625 } 626 627 static inline bool 628 intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine) 629 { 630 return engine->flags & I915_ENGINE_HAS_RELATIVE_MMIO; 631 } 632 633 /* Wa_14014475959:dg2 */ 634 static inline bool 635 intel_engine_uses_wa_hold_ccs_switchout(struct intel_engine_cs *engine) 636 { 637 return engine->flags & I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; 638 } 639 640 #define instdone_has_slice(dev_priv___, sseu___, slice___) \ 641 ((GRAPHICS_VER(dev_priv___) == 7 ? 1 : ((sseu___)->slice_mask)) & BIT(slice___)) 642 643 #define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \ 644 (GRAPHICS_VER(dev_priv__) == 7 ? (1 & BIT(subslice__)) : \ 645 intel_sseu_has_subslice(sseu__, 0, subslice__)) 646 647 #define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \ 648 for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \ 649 (subslice_) = ((subslice_) + 1) % I915_MAX_SUBSLICES, \ 650 (slice_) += ((subslice_) == 0)) \ 651 for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \ 652 (instdone_has_subslice(dev_priv_, sseu_, slice_, \ 653 subslice_))) 654 655 #define for_each_instdone_gslice_dss_xehp(dev_priv_, sseu_, iter_, gslice_, dss_) \ 656 for ((iter_) = 0, (gslice_) = 0, (dss_) = 0; \ 657 (iter_) < GEN_SS_MASK_SIZE; \ 658 (iter_)++, (gslice_) = (iter_) / GEN_DSS_PER_GSLICE, \ 659 (dss_) = (iter_) % GEN_DSS_PER_GSLICE) \ 660 for_each_if(intel_sseu_has_subslice((sseu_), 0, (iter_))) 661 662 #endif /* __INTEL_ENGINE_TYPES_H__ */ 663