1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #ifndef __INTEL_ENGINE_TYPES__ 8 #define __INTEL_ENGINE_TYPES__ 9 10 #include <linux/average.h> 11 #include <linux/hashtable.h> 12 #include <linux/irq_work.h> 13 #include <linux/kref.h> 14 #include <linux/list.h> 15 #include <linux/llist.h> 16 #include <linux/rbtree.h> 17 #include <linux/timer.h> 18 #include <linux/types.h> 19 #include <linux/workqueue.h> 20 21 #include "i915_gem.h" 22 #include "i915_pmu.h" 23 #include "i915_priolist_types.h" 24 #include "i915_selftest.h" 25 #include "intel_engine_pool_types.h" 26 #include "intel_sseu.h" 27 #include "intel_timeline_types.h" 28 #include "intel_wakeref.h" 29 #include "intel_workarounds_types.h" 30 31 /* Legacy HW Engine ID */ 32 33 #define RCS0_HW 0 34 #define VCS0_HW 1 35 #define BCS0_HW 2 36 #define VECS0_HW 3 37 #define VCS1_HW 4 38 #define VCS2_HW 6 39 #define VCS3_HW 7 40 #define VECS1_HW 12 41 42 /* Gen11+ HW Engine class + instance */ 43 #define RENDER_CLASS 0 44 #define VIDEO_DECODE_CLASS 1 45 #define VIDEO_ENHANCEMENT_CLASS 2 46 #define COPY_ENGINE_CLASS 3 47 #define OTHER_CLASS 4 48 #define MAX_ENGINE_CLASS 4 49 #define MAX_ENGINE_INSTANCE 3 50 51 #define I915_MAX_SLICES 3 52 #define I915_MAX_SUBSLICES 8 53 54 #define I915_CMD_HASH_ORDER 9 55 56 struct dma_fence; 57 struct drm_i915_gem_object; 58 struct drm_i915_reg_table; 59 struct i915_gem_context; 60 struct i915_request; 61 struct i915_sched_attr; 62 struct intel_gt; 63 struct intel_ring; 64 struct intel_uncore; 65 66 typedef u8 intel_engine_mask_t; 67 #define ALL_ENGINES ((intel_engine_mask_t)~0ul) 68 69 struct intel_hw_status_page { 70 struct i915_vma *vma; 71 u32 *addr; 72 }; 73 74 struct intel_instdone { 75 u32 instdone; 76 /* The following exist only in the RCS engine */ 77 u32 slice_common; 78 u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; 79 u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; 80 }; 81 82 /* 83 * we use a single page to load ctx workarounds so all of these 84 * values are referred in terms of dwords 85 * 86 * struct i915_wa_ctx_bb: 87 * offset: specifies batch starting position, also helpful in case 88 * if we want to have multiple batches at different offsets based on 89 * some criteria. It is not a requirement at the moment but provides 90 * an option for future use. 91 * size: size of the batch in DWORDS 92 */ 93 struct i915_ctx_workarounds { 94 struct i915_wa_ctx_bb { 95 u32 offset; 96 u32 size; 97 } indirect_ctx, per_ctx; 98 struct i915_vma *vma; 99 }; 100 101 #define I915_MAX_VCS 4 102 #define I915_MAX_VECS 2 103 104 /* 105 * Engine IDs definitions. 106 * Keep instances of the same type engine together. 107 */ 108 enum intel_engine_id { 109 RCS0 = 0, 110 BCS0, 111 VCS0, 112 VCS1, 113 VCS2, 114 VCS3, 115 #define _VCS(n) (VCS0 + (n)) 116 VECS0, 117 VECS1, 118 #define _VECS(n) (VECS0 + (n)) 119 I915_NUM_ENGINES 120 #define INVALID_ENGINE ((enum intel_engine_id)-1) 121 }; 122 123 /* A simple estimator for the round-trip latency of an engine */ 124 DECLARE_EWMA(_engine_latency, 6, 4) 125 126 struct st_preempt_hang { 127 struct completion completion; 128 unsigned int count; 129 bool inject_hang; 130 }; 131 132 /** 133 * struct intel_engine_execlists - execlist submission queue and port state 134 * 135 * The struct intel_engine_execlists represents the combined logical state of 136 * driver and the hardware state for execlist mode of submission. 137 */ 138 struct intel_engine_execlists { 139 /** 140 * @tasklet: softirq tasklet for bottom handler 141 */ 142 struct tasklet_struct tasklet; 143 144 /** 145 * @timer: kick the current context if its timeslice expires 146 */ 147 struct timer_list timer; 148 149 /** 150 * @preempt: reset the current context if it fails to give way 151 */ 152 struct timer_list preempt; 153 154 /** 155 * @default_priolist: priority list for I915_PRIORITY_NORMAL 156 */ 157 struct i915_priolist default_priolist; 158 159 /** 160 * @no_priolist: priority lists disabled 161 */ 162 bool no_priolist; 163 164 /** 165 * @submit_reg: gen-specific execlist submission register 166 * set to the ExecList Submission Port (elsp) register pre-Gen11 and to 167 * the ExecList Submission Queue Contents register array for Gen11+ 168 */ 169 u32 __iomem *submit_reg; 170 171 /** 172 * @ctrl_reg: the enhanced execlists control register, used to load the 173 * submit queue on the HW and to request preemptions to idle 174 */ 175 u32 __iomem *ctrl_reg; 176 177 #define EXECLIST_MAX_PORTS 2 178 /** 179 * @active: the currently known context executing on HW 180 */ 181 struct i915_request * const *active; 182 /** 183 * @inflight: the set of contexts submitted and acknowleged by HW 184 * 185 * The set of inflight contexts is managed by reading CS events 186 * from the HW. On a context-switch event (not preemption), we 187 * know the HW has transitioned from port0 to port1, and we 188 * advance our inflight/active tracking accordingly. 189 */ 190 struct i915_request *inflight[EXECLIST_MAX_PORTS + 1 /* sentinel */]; 191 /** 192 * @pending: the next set of contexts submitted to ELSP 193 * 194 * We store the array of contexts that we submit to HW (via ELSP) and 195 * promote them to the inflight array once HW has signaled the 196 * preemption or idle-to-active event. 197 */ 198 struct i915_request *pending[EXECLIST_MAX_PORTS + 1]; 199 200 /** 201 * @port_mask: number of execlist ports - 1 202 */ 203 unsigned int port_mask; 204 205 /** 206 * @switch_priority_hint: Second context priority. 207 * 208 * We submit multiple contexts to the HW simultaneously and would 209 * like to occasionally switch between them to emulate timeslicing. 210 * To know when timeslicing is suitable, we track the priority of 211 * the context submitted second. 212 */ 213 int switch_priority_hint; 214 215 /** 216 * @queue_priority_hint: Highest pending priority. 217 * 218 * When we add requests into the queue, or adjust the priority of 219 * executing requests, we compute the maximum priority of those 220 * pending requests. We can then use this value to determine if 221 * we need to preempt the executing requests to service the queue. 222 * However, since the we may have recorded the priority of an inflight 223 * request we wanted to preempt but since completed, at the time of 224 * dequeuing the priority hint may no longer may match the highest 225 * available request priority. 226 */ 227 int queue_priority_hint; 228 229 /** 230 * @queue: queue of requests, in priority lists 231 */ 232 struct rb_root_cached queue; 233 struct rb_root_cached virtual; 234 235 /** 236 * @csb_write: control register for Context Switch buffer 237 * 238 * Note this register may be either mmio or HWSP shadow. 239 */ 240 u32 *csb_write; 241 242 /** 243 * @csb_status: status array for Context Switch buffer 244 * 245 * Note these register may be either mmio or HWSP shadow. 246 */ 247 u32 *csb_status; 248 249 /** 250 * @csb_size: context status buffer FIFO size 251 */ 252 u8 csb_size; 253 254 /** 255 * @csb_head: context status buffer head 256 */ 257 u8 csb_head; 258 259 I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;) 260 }; 261 262 #define INTEL_ENGINE_CS_MAX_NAME 8 263 264 struct intel_engine_cs { 265 struct drm_i915_private *i915; 266 struct intel_gt *gt; 267 struct intel_uncore *uncore; 268 char name[INTEL_ENGINE_CS_MAX_NAME]; 269 270 enum intel_engine_id id; 271 enum intel_engine_id legacy_idx; 272 273 unsigned int hw_id; 274 unsigned int guc_id; 275 276 intel_engine_mask_t mask; 277 278 u8 class; 279 u8 instance; 280 281 u16 uabi_class; 282 u16 uabi_instance; 283 284 u32 uabi_capabilities; 285 u32 context_size; 286 u32 mmio_base; 287 288 unsigned int context_tag; 289 #define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS) 290 291 struct rb_node uabi_node; 292 293 struct intel_sseu sseu; 294 295 struct { 296 spinlock_t lock; 297 struct list_head requests; 298 struct list_head hold; /* ready requests, but on hold */ 299 } active; 300 301 struct llist_head barrier_tasks; 302 303 struct intel_context *kernel_context; /* pinned */ 304 305 intel_engine_mask_t saturated; /* submitting semaphores too late? */ 306 307 struct { 308 struct delayed_work work; 309 struct i915_request *systole; 310 } heartbeat; 311 312 unsigned long serial; 313 314 unsigned long wakeref_serial; 315 struct intel_wakeref wakeref; 316 struct drm_i915_gem_object *default_state; 317 void *pinned_default_state; 318 319 struct { 320 struct intel_ring *ring; 321 struct intel_timeline *timeline; 322 } legacy; 323 324 /* 325 * We track the average duration of the idle pulse on parking the 326 * engine to keep an estimate of the how the fast the engine is 327 * under ideal conditions. 328 */ 329 struct ewma__engine_latency latency; 330 331 /* Rather than have every client wait upon all user interrupts, 332 * with the herd waking after every interrupt and each doing the 333 * heavyweight seqno dance, we delegate the task (of being the 334 * bottom-half of the user interrupt) to the first client. After 335 * every interrupt, we wake up one client, who does the heavyweight 336 * coherent seqno read and either goes back to sleep (if incomplete), 337 * or wakes up all the completed clients in parallel, before then 338 * transferring the bottom-half status to the next client in the queue. 339 * 340 * Compared to walking the entire list of waiters in a single dedicated 341 * bottom-half, we reduce the latency of the first waiter by avoiding 342 * a context switch, but incur additional coherent seqno reads when 343 * following the chain of request breadcrumbs. Since it is most likely 344 * that we have a single client waiting on each seqno, then reducing 345 * the overhead of waking that client is much preferred. 346 */ 347 struct intel_breadcrumbs { 348 spinlock_t irq_lock; 349 struct list_head signalers; 350 351 struct irq_work irq_work; /* for use from inside irq_lock */ 352 353 unsigned int irq_enabled; 354 355 bool irq_armed; 356 } breadcrumbs; 357 358 struct intel_engine_pmu { 359 /** 360 * @enable: Bitmask of enable sample events on this engine. 361 * 362 * Bits correspond to sample event types, for instance 363 * I915_SAMPLE_QUEUED is bit 0 etc. 364 */ 365 u32 enable; 366 /** 367 * @enable_count: Reference count for the enabled samplers. 368 * 369 * Index number corresponds to @enum drm_i915_pmu_engine_sample. 370 */ 371 unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT]; 372 /** 373 * @sample: Counter values for sampling events. 374 * 375 * Our internal timer stores the current counters in this field. 376 * 377 * Index number corresponds to @enum drm_i915_pmu_engine_sample. 378 */ 379 struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT]; 380 } pmu; 381 382 /* 383 * A pool of objects to use as shadow copies of client batch buffers 384 * when the command parser is enabled. Prevents the client from 385 * modifying the batch contents after software parsing. 386 */ 387 struct intel_engine_pool pool; 388 389 struct intel_hw_status_page status_page; 390 struct i915_ctx_workarounds wa_ctx; 391 struct i915_wa_list ctx_wa_list; 392 struct i915_wa_list wa_list; 393 struct i915_wa_list whitelist; 394 395 u32 irq_keep_mask; /* always keep these interrupts */ 396 u32 irq_enable_mask; /* bitmask to enable ring interrupt */ 397 void (*irq_enable)(struct intel_engine_cs *engine); 398 void (*irq_disable)(struct intel_engine_cs *engine); 399 400 int (*resume)(struct intel_engine_cs *engine); 401 402 struct { 403 void (*prepare)(struct intel_engine_cs *engine); 404 405 void (*rewind)(struct intel_engine_cs *engine, bool stalled); 406 void (*cancel)(struct intel_engine_cs *engine); 407 408 void (*finish)(struct intel_engine_cs *engine); 409 } reset; 410 411 void (*park)(struct intel_engine_cs *engine); 412 void (*unpark)(struct intel_engine_cs *engine); 413 414 void (*set_default_submission)(struct intel_engine_cs *engine); 415 416 const struct intel_context_ops *cops; 417 418 int (*request_alloc)(struct i915_request *rq); 419 420 int (*emit_flush)(struct i915_request *request, u32 mode); 421 #define EMIT_INVALIDATE BIT(0) 422 #define EMIT_FLUSH BIT(1) 423 #define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) 424 int (*emit_bb_start)(struct i915_request *rq, 425 u64 offset, u32 length, 426 unsigned int dispatch_flags); 427 #define I915_DISPATCH_SECURE BIT(0) 428 #define I915_DISPATCH_PINNED BIT(1) 429 int (*emit_init_breadcrumb)(struct i915_request *rq); 430 u32 *(*emit_fini_breadcrumb)(struct i915_request *rq, 431 u32 *cs); 432 unsigned int emit_fini_breadcrumb_dw; 433 434 /* Pass the request to the hardware queue (e.g. directly into 435 * the legacy ringbuffer or to the end of an execlist). 436 * 437 * This is called from an atomic context with irqs disabled; must 438 * be irq safe. 439 */ 440 void (*submit_request)(struct i915_request *rq); 441 442 /* 443 * Called on signaling of a SUBMIT_FENCE, passing along the signaling 444 * request down to the bonded pairs. 445 */ 446 void (*bond_execute)(struct i915_request *rq, 447 struct dma_fence *signal); 448 449 /* 450 * Call when the priority on a request has changed and it and its 451 * dependencies may need rescheduling. Note the request itself may 452 * not be ready to run! 453 */ 454 void (*schedule)(struct i915_request *request, 455 const struct i915_sched_attr *attr); 456 457 void (*release)(struct intel_engine_cs *engine); 458 459 struct intel_engine_execlists execlists; 460 461 /* 462 * Keep track of completed timelines on this engine for early 463 * retirement with the goal of quickly enabling powersaving as 464 * soon as the engine is idle. 465 */ 466 struct intel_timeline *retire; 467 struct work_struct retire_work; 468 469 /* status_notifier: list of callbacks for context-switch changes */ 470 struct atomic_notifier_head context_status_notifier; 471 472 #define I915_ENGINE_USING_CMD_PARSER BIT(0) 473 #define I915_ENGINE_SUPPORTS_STATS BIT(1) 474 #define I915_ENGINE_HAS_PREEMPTION BIT(2) 475 #define I915_ENGINE_HAS_SEMAPHORES BIT(3) 476 #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) 477 #define I915_ENGINE_IS_VIRTUAL BIT(5) 478 #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) 479 #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) 480 unsigned int flags; 481 482 /* 483 * Table of commands the command parser needs to know about 484 * for this engine. 485 */ 486 DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); 487 488 /* 489 * Table of registers allowed in commands that read/write registers. 490 */ 491 const struct drm_i915_reg_table *reg_tables; 492 int reg_table_count; 493 494 /* 495 * Returns the bitmask for the length field of the specified command. 496 * Return 0 for an unrecognized/invalid command. 497 * 498 * If the command parser finds an entry for a command in the engine's 499 * cmd_tables, it gets the command's length based on the table entry. 500 * If not, it calls this function to determine the per-engine length 501 * field encoding for the command (i.e. different opcode ranges use 502 * certain bits to encode the command length in the header). 503 */ 504 u32 (*get_cmd_length_mask)(u32 cmd_header); 505 506 struct { 507 /** 508 * @lock: Lock protecting the below fields. 509 */ 510 seqlock_t lock; 511 /** 512 * @enabled: Reference count indicating number of listeners. 513 */ 514 unsigned int enabled; 515 /** 516 * @active: Number of contexts currently scheduled in. 517 */ 518 unsigned int active; 519 /** 520 * @enabled_at: Timestamp when busy stats were enabled. 521 */ 522 ktime_t enabled_at; 523 /** 524 * @start: Timestamp of the last idle to active transition. 525 * 526 * Idle is defined as active == 0, active is active > 0. 527 */ 528 ktime_t start; 529 /** 530 * @total: Total time this engine was busy. 531 * 532 * Accumulated time not counting the most recent block in cases 533 * where engine is currently busy (active > 0). 534 */ 535 ktime_t total; 536 } stats; 537 538 struct { 539 unsigned long heartbeat_interval_ms; 540 unsigned long preempt_timeout_ms; 541 unsigned long stop_timeout_ms; 542 unsigned long timeslice_duration_ms; 543 } props; 544 }; 545 546 static inline bool 547 intel_engine_using_cmd_parser(const struct intel_engine_cs *engine) 548 { 549 return engine->flags & I915_ENGINE_USING_CMD_PARSER; 550 } 551 552 static inline bool 553 intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine) 554 { 555 return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER; 556 } 557 558 static inline bool 559 intel_engine_supports_stats(const struct intel_engine_cs *engine) 560 { 561 return engine->flags & I915_ENGINE_SUPPORTS_STATS; 562 } 563 564 static inline bool 565 intel_engine_has_preemption(const struct intel_engine_cs *engine) 566 { 567 return engine->flags & I915_ENGINE_HAS_PREEMPTION; 568 } 569 570 static inline bool 571 intel_engine_has_semaphores(const struct intel_engine_cs *engine) 572 { 573 return engine->flags & I915_ENGINE_HAS_SEMAPHORES; 574 } 575 576 static inline bool 577 intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine) 578 { 579 return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; 580 } 581 582 static inline bool 583 intel_engine_is_virtual(const struct intel_engine_cs *engine) 584 { 585 return engine->flags & I915_ENGINE_IS_VIRTUAL; 586 } 587 588 static inline bool 589 intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine) 590 { 591 return engine->flags & I915_ENGINE_HAS_RELATIVE_MMIO; 592 } 593 594 #define instdone_has_slice(dev_priv___, sseu___, slice___) \ 595 ((IS_GEN(dev_priv___, 7) ? 1 : ((sseu___)->slice_mask)) & BIT(slice___)) 596 597 #define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \ 598 (IS_GEN(dev_priv__, 7) ? (1 & BIT(subslice__)) : \ 599 intel_sseu_has_subslice(sseu__, 0, subslice__)) 600 601 #define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \ 602 for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \ 603 (subslice_) = ((subslice_) + 1) % I915_MAX_SUBSLICES, \ 604 (slice_) += ((subslice_) == 0)) \ 605 for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \ 606 (instdone_has_subslice(dev_priv_, sseu_, slice_, \ 607 subslice_))) 608 #endif /* __INTEL_ENGINE_TYPES_H__ */ 609