1 /* $NetBSD: intel_engine_types.h,v 1.7 2021/12/19 11:51:59 riastradh Exp $ */
2
3 /*
4 * SPDX-License-Identifier: MIT
5 *
6 * Copyright © 2019 Intel Corporation
7 */
8
9 #ifndef __INTEL_ENGINE_TYPES__
10 #define __INTEL_ENGINE_TYPES__
11
12 #include <linux/average.h>
13 #include <linux/completion.h>
14 #include <linux/hashtable.h>
15 #include <linux/irq_work.h>
16 #include <linux/kref.h>
17 #include <linux/list.h>
18 #include <linux/llist.h>
19 #include <linux/notifier.h>
20 #include <linux/rbtree.h>
21 #include <linux/timer.h>
22 #include <linux/types.h>
23 #include <linux/workqueue.h>
24
25 #include "i915_gem.h"
26 #include "i915_pmu.h"
27 #include "i915_priolist_types.h"
28 #include "i915_selftest.h"
29 #include "intel_engine_pool_types.h"
30 #include "intel_sseu.h"
31 #include "intel_timeline_types.h"
32 #include "intel_wakeref.h"
33 #include "intel_workarounds_types.h"
34
35 /* Legacy HW Engine ID */
36
37 #define RCS0_HW 0
38 #define VCS0_HW 1
39 #define BCS0_HW 2
40 #define VECS0_HW 3
41 #define VCS1_HW 4
42 #define VCS2_HW 6
43 #define VCS3_HW 7
44 #define VECS1_HW 12
45
46 /* Gen11+ HW Engine class + instance */
47 #define RENDER_CLASS 0
48 #define VIDEO_DECODE_CLASS 1
49 #define VIDEO_ENHANCEMENT_CLASS 2
50 #define COPY_ENGINE_CLASS 3
51 #define OTHER_CLASS 4
52 #define MAX_ENGINE_CLASS 4
53 #define MAX_ENGINE_INSTANCE 3
54
55 #define I915_MAX_SLICES 3
56 #define I915_MAX_SUBSLICES 8
57
58 #define I915_CMD_HASH_ORDER 9
59
60 struct dma_fence;
61 struct drm_i915_gem_object;
62 struct drm_i915_reg_table;
63 struct i915_gem_context;
64 struct i915_request;
65 struct i915_sched_attr;
66 struct intel_gt;
67 struct intel_ring;
68 struct intel_uncore;
69
70 typedef u8 intel_engine_mask_t;
71 #define ALL_ENGINES ((intel_engine_mask_t)~0ul)
72
73 struct intel_hw_status_page {
74 struct i915_vma *vma;
75 u32 *addr;
76 };
77
78 struct intel_instdone {
79 u32 instdone;
80 /* The following exist only in the RCS engine */
81 u32 slice_common;
82 u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES];
83 u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
84 };
85
86 /*
87 * we use a single page to load ctx workarounds so all of these
88 * values are referred in terms of dwords
89 *
90 * struct i915_wa_ctx_bb:
91 * offset: specifies batch starting position, also helpful in case
92 * if we want to have multiple batches at different offsets based on
93 * some criteria. It is not a requirement at the moment but provides
94 * an option for future use.
95 * size: size of the batch in DWORDS
96 */
97 struct i915_ctx_workarounds {
98 struct i915_wa_ctx_bb {
99 u32 offset;
100 u32 size;
101 } indirect_ctx, per_ctx;
102 struct i915_vma *vma;
103 };
104
105 #define I915_MAX_VCS 4
106 #define I915_MAX_VECS 2
107
108 /*
109 * Engine IDs definitions.
110 * Keep instances of the same type engine together.
111 */
112 enum intel_engine_id {
113 RCS0 = 0,
114 BCS0,
115 VCS0,
116 VCS1,
117 VCS2,
118 VCS3,
119 #define _VCS(n) (VCS0 + (n))
120 VECS0,
121 VECS1,
122 #define _VECS(n) (VECS0 + (n))
123 I915_NUM_ENGINES
124 #define INVALID_ENGINE ((enum intel_engine_id)-1)
125 };
126
127 /* A simple estimator for the round-trip latency of an engine */
128 DECLARE_EWMA(_engine_latency, 6, 4)
129
130 struct st_preempt_hang {
131 struct completion completion;
132 unsigned int count;
133 bool inject_hang;
134 };
135
136 /**
137 * struct intel_engine_execlists - execlist submission queue and port state
138 *
139 * The struct intel_engine_execlists represents the combined logical state of
140 * driver and the hardware state for execlist mode of submission.
141 */
142 struct intel_engine_execlists {
143 /**
144 * @tasklet: softirq tasklet for bottom handler
145 */
146 struct tasklet_struct tasklet;
147
148 /**
149 * @timer: kick the current context if its timeslice expires
150 */
151 struct timer_list timer;
152
153 /**
154 * @preempt: reset the current context if it fails to give way
155 */
156 struct timer_list preempt;
157
158 /**
159 * @default_priolist: priority list for I915_PRIORITY_NORMAL
160 */
161 struct i915_priolist default_priolist;
162
163 /**
164 * @no_priolist: priority lists disabled
165 */
166 bool no_priolist;
167
168 #ifdef __NetBSD__
169 bus_space_tag_t bst;
170 bus_space_handle_t bsh;
171 bus_size_t submit_reg;
172 bus_size_t ctrl_reg;
173 #else
174 /**
175 * @submit_reg: gen-specific execlist submission register
176 * set to the ExecList Submission Port (elsp) register pre-Gen11 and to
177 * the ExecList Submission Queue Contents register array for Gen11+
178 */
179 u32 __iomem *submit_reg;
180
181 /**
182 * @ctrl_reg: the enhanced execlists control register, used to load the
183 * submit queue on the HW and to request preemptions to idle
184 */
185 u32 __iomem *ctrl_reg;
186 #endif
187
188 #define EXECLIST_MAX_PORTS 2
189 /**
190 * @active: the currently known context executing on HW
191 */
192 struct i915_request * const *active;
193 /**
194 * @inflight: the set of contexts submitted and acknowleged by HW
195 *
196 * The set of inflight contexts is managed by reading CS events
197 * from the HW. On a context-switch event (not preemption), we
198 * know the HW has transitioned from port0 to port1, and we
199 * advance our inflight/active tracking accordingly.
200 */
201 struct i915_request *inflight[EXECLIST_MAX_PORTS + 1 /* sentinel */];
202 /**
203 * @pending: the next set of contexts submitted to ELSP
204 *
205 * We store the array of contexts that we submit to HW (via ELSP) and
206 * promote them to the inflight array once HW has signaled the
207 * preemption or idle-to-active event.
208 */
209 struct i915_request *pending[EXECLIST_MAX_PORTS + 1];
210
211 /**
212 * @port_mask: number of execlist ports - 1
213 */
214 unsigned int port_mask;
215
216 /**
217 * @switch_priority_hint: Second context priority.
218 *
219 * We submit multiple contexts to the HW simultaneously and would
220 * like to occasionally switch between them to emulate timeslicing.
221 * To know when timeslicing is suitable, we track the priority of
222 * the context submitted second.
223 */
224 int switch_priority_hint;
225
226 /**
227 * @queue_priority_hint: Highest pending priority.
228 *
229 * When we add requests into the queue, or adjust the priority of
230 * executing requests, we compute the maximum priority of those
231 * pending requests. We can then use this value to determine if
232 * we need to preempt the executing requests to service the queue.
233 * However, since the we may have recorded the priority of an inflight
234 * request we wanted to preempt but since completed, at the time of
235 * dequeuing the priority hint may no longer may match the highest
236 * available request priority.
237 */
238 int queue_priority_hint;
239
240 /**
241 * @queue: queue of requests, in priority lists
242 */
243 struct rb_root_cached queue;
244 struct rb_root_cached virtual;
245
246 /**
247 * @csb_write: control register for Context Switch buffer
248 *
249 * Note this register may be either mmio or HWSP shadow.
250 */
251 u32 *csb_write;
252
253 /**
254 * @csb_status: status array for Context Switch buffer
255 *
256 * Note these register may be either mmio or HWSP shadow.
257 */
258 u32 *csb_status;
259
260 /**
261 * @csb_size: context status buffer FIFO size
262 */
263 u8 csb_size;
264
265 /**
266 * @csb_head: context status buffer head
267 */
268 u8 csb_head;
269
270 I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;)
271 };
272
273 #define INTEL_ENGINE_CS_MAX_NAME 8
274
275 struct intel_engine_cs {
276 struct drm_i915_private *i915;
277 struct intel_gt *gt;
278 struct intel_uncore *uncore;
279 char name[INTEL_ENGINE_CS_MAX_NAME];
280
281 enum intel_engine_id id;
282 enum intel_engine_id legacy_idx;
283
284 unsigned int hw_id;
285 unsigned int guc_id;
286
287 intel_engine_mask_t mask;
288
289 u8 class;
290 u8 instance;
291
292 u16 uabi_class;
293 u16 uabi_instance;
294
295 u32 uabi_capabilities;
296 u32 context_size;
297 u32 mmio_base;
298
299 unsigned int context_tag;
300 #define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS)
301
302 union {
303 struct rb_node rbtree;
304 struct llist_node llist;
305 struct list_head list;
306 } uabi_node;
307
308 struct intel_sseu sseu;
309
310 struct {
311 spinlock_t lock;
312 struct list_head requests;
313 struct list_head hold; /* ready requests, but on hold */
314 } active;
315
316 struct llist_head barrier_tasks;
317
318 struct intel_context *kernel_context; /* pinned */
319
320 intel_engine_mask_t saturated; /* submitting semaphores too late? */
321
322 struct {
323 struct delayed_work work;
324 struct i915_request *systole;
325 } heartbeat;
326
327 unsigned long serial;
328
329 unsigned long wakeref_serial;
330 struct intel_wakeref wakeref;
331 struct drm_i915_gem_object *default_state;
332 void *pinned_default_state;
333
334 struct {
335 struct intel_ring *ring;
336 struct intel_timeline *timeline;
337 } legacy;
338
339 /*
340 * We track the average duration of the idle pulse on parking the
341 * engine to keep an estimate of the how the fast the engine is
342 * under ideal conditions.
343 */
344 struct ewma__engine_latency latency;
345
346 /* Rather than have every client wait upon all user interrupts,
347 * with the herd waking after every interrupt and each doing the
348 * heavyweight seqno dance, we delegate the task (of being the
349 * bottom-half of the user interrupt) to the first client. After
350 * every interrupt, we wake up one client, who does the heavyweight
351 * coherent seqno read and either goes back to sleep (if incomplete),
352 * or wakes up all the completed clients in parallel, before then
353 * transferring the bottom-half status to the next client in the queue.
354 *
355 * Compared to walking the entire list of waiters in a single dedicated
356 * bottom-half, we reduce the latency of the first waiter by avoiding
357 * a context switch, but incur additional coherent seqno reads when
358 * following the chain of request breadcrumbs. Since it is most likely
359 * that we have a single client waiting on each seqno, then reducing
360 * the overhead of waking that client is much preferred.
361 */
362 struct intel_breadcrumbs {
363 spinlock_t irq_lock;
364 struct list_head signalers;
365
366 struct irq_work irq_work; /* for use from inside irq_lock */
367
368 unsigned int irq_enabled;
369
370 bool irq_armed;
371 } breadcrumbs;
372
373 struct intel_engine_pmu {
374 /**
375 * @enable: Bitmask of enable sample events on this engine.
376 *
377 * Bits correspond to sample event types, for instance
378 * I915_SAMPLE_QUEUED is bit 0 etc.
379 */
380 u32 enable;
381 /**
382 * @enable_count: Reference count for the enabled samplers.
383 *
384 * Index number corresponds to @enum drm_i915_pmu_engine_sample.
385 */
386 unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT];
387 /**
388 * @sample: Counter values for sampling events.
389 *
390 * Our internal timer stores the current counters in this field.
391 *
392 * Index number corresponds to @enum drm_i915_pmu_engine_sample.
393 */
394 struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT];
395 } pmu;
396
397 /*
398 * A pool of objects to use as shadow copies of client batch buffers
399 * when the command parser is enabled. Prevents the client from
400 * modifying the batch contents after software parsing.
401 */
402 struct intel_engine_pool pool;
403
404 struct intel_hw_status_page status_page;
405 struct i915_ctx_workarounds wa_ctx;
406 struct i915_wa_list ctx_wa_list;
407 struct i915_wa_list wa_list;
408 struct i915_wa_list whitelist;
409
410 u32 irq_keep_mask; /* always keep these interrupts */
411 u32 irq_enable_mask; /* bitmask to enable ring interrupt */
412 void (*irq_enable)(struct intel_engine_cs *engine);
413 void (*irq_disable)(struct intel_engine_cs *engine);
414
415 int (*resume)(struct intel_engine_cs *engine);
416
417 struct {
418 void (*prepare)(struct intel_engine_cs *engine);
419
420 void (*rewind)(struct intel_engine_cs *engine, bool stalled);
421 void (*cancel)(struct intel_engine_cs *engine);
422
423 void (*finish)(struct intel_engine_cs *engine);
424 } reset;
425
426 void (*park)(struct intel_engine_cs *engine);
427 void (*unpark)(struct intel_engine_cs *engine);
428
429 void (*set_default_submission)(struct intel_engine_cs *engine);
430
431 const struct intel_context_ops *cops;
432
433 int (*request_alloc)(struct i915_request *rq);
434
435 int (*emit_flush)(struct i915_request *request, u32 mode);
436 #define EMIT_INVALIDATE BIT(0)
437 #define EMIT_FLUSH BIT(1)
438 #define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH)
439 int (*emit_bb_start)(struct i915_request *rq,
440 u64 offset, u32 length,
441 unsigned int dispatch_flags);
442 #define I915_DISPATCH_SECURE BIT(0)
443 #define I915_DISPATCH_PINNED BIT(1)
444 int (*emit_init_breadcrumb)(struct i915_request *rq);
445 u32 *(*emit_fini_breadcrumb)(struct i915_request *rq,
446 u32 *cs);
447 unsigned int emit_fini_breadcrumb_dw;
448
449 /* Pass the request to the hardware queue (e.g. directly into
450 * the legacy ringbuffer or to the end of an execlist).
451 *
452 * This is called from an atomic context with irqs disabled; must
453 * be irq safe.
454 */
455 void (*submit_request)(struct i915_request *rq);
456
457 /*
458 * Called on signaling of a SUBMIT_FENCE, passing along the signaling
459 * request down to the bonded pairs.
460 */
461 void (*bond_execute)(struct i915_request *rq,
462 struct dma_fence *signal);
463
464 /*
465 * Call when the priority on a request has changed and it and its
466 * dependencies may need rescheduling. Note the request itself may
467 * not be ready to run!
468 */
469 void (*schedule)(struct i915_request *request,
470 const struct i915_sched_attr *attr);
471
472 void (*release)(struct intel_engine_cs *engine);
473
474 struct intel_engine_execlists execlists;
475
476 /*
477 * Keep track of completed timelines on this engine for early
478 * retirement with the goal of quickly enabling powersaving as
479 * soon as the engine is idle.
480 */
481 struct intel_timeline *retire;
482 struct work_struct retire_work;
483
484 /* status_notifier: list of callbacks for context-switch changes */
485 struct atomic_notifier_head context_status_notifier;
486
487 #define I915_ENGINE_USING_CMD_PARSER BIT(0)
488 #define I915_ENGINE_SUPPORTS_STATS BIT(1)
489 #define I915_ENGINE_HAS_PREEMPTION BIT(2)
490 #define I915_ENGINE_HAS_SEMAPHORES BIT(3)
491 #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
492 #define I915_ENGINE_IS_VIRTUAL BIT(5)
493 #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
494 #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
495 unsigned int flags;
496
497 /*
498 * Table of commands the command parser needs to know about
499 * for this engine.
500 */
501 DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER);
502
503 /*
504 * Table of registers allowed in commands that read/write registers.
505 */
506 const struct drm_i915_reg_table *reg_tables;
507 int reg_table_count;
508
509 /*
510 * Returns the bitmask for the length field of the specified command.
511 * Return 0 for an unrecognized/invalid command.
512 *
513 * If the command parser finds an entry for a command in the engine's
514 * cmd_tables, it gets the command's length based on the table entry.
515 * If not, it calls this function to determine the per-engine length
516 * field encoding for the command (i.e. different opcode ranges use
517 * certain bits to encode the command length in the header).
518 */
519 u32 (*get_cmd_length_mask)(u32 cmd_header);
520
521 struct {
522 /**
523 * @lock: Lock protecting the below fields.
524 */
525 seqlock_t lock;
526 /**
527 * @enabled: Reference count indicating number of listeners.
528 */
529 unsigned int enabled;
530 /**
531 * @active: Number of contexts currently scheduled in.
532 */
533 unsigned int active;
534 /**
535 * @enabled_at: Timestamp when busy stats were enabled.
536 */
537 ktime_t enabled_at;
538 /**
539 * @start: Timestamp of the last idle to active transition.
540 *
541 * Idle is defined as active == 0, active is active > 0.
542 */
543 ktime_t start;
544 /**
545 * @total: Total time this engine was busy.
546 *
547 * Accumulated time not counting the most recent block in cases
548 * where engine is currently busy (active > 0).
549 */
550 ktime_t total;
551 } stats;
552
553 struct {
554 unsigned long heartbeat_interval_ms;
555 unsigned long preempt_timeout_ms;
556 unsigned long stop_timeout_ms;
557 unsigned long timeslice_duration_ms;
558 } props;
559 };
560
561 static inline bool
intel_engine_using_cmd_parser(const struct intel_engine_cs * engine)562 intel_engine_using_cmd_parser(const struct intel_engine_cs *engine)
563 {
564 return engine->flags & I915_ENGINE_USING_CMD_PARSER;
565 }
566
567 static inline bool
intel_engine_requires_cmd_parser(const struct intel_engine_cs * engine)568 intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine)
569 {
570 return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER;
571 }
572
573 static inline bool
intel_engine_supports_stats(const struct intel_engine_cs * engine)574 intel_engine_supports_stats(const struct intel_engine_cs *engine)
575 {
576 return engine->flags & I915_ENGINE_SUPPORTS_STATS;
577 }
578
579 static inline bool
intel_engine_has_preemption(const struct intel_engine_cs * engine)580 intel_engine_has_preemption(const struct intel_engine_cs *engine)
581 {
582 return engine->flags & I915_ENGINE_HAS_PREEMPTION;
583 }
584
585 static inline bool
intel_engine_has_semaphores(const struct intel_engine_cs * engine)586 intel_engine_has_semaphores(const struct intel_engine_cs *engine)
587 {
588 return engine->flags & I915_ENGINE_HAS_SEMAPHORES;
589 }
590
591 static inline bool
intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs * engine)592 intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
593 {
594 return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
595 }
596
597 static inline bool
intel_engine_is_virtual(const struct intel_engine_cs * engine)598 intel_engine_is_virtual(const struct intel_engine_cs *engine)
599 {
600 return engine->flags & I915_ENGINE_IS_VIRTUAL;
601 }
602
603 static inline bool
intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)604 intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
605 {
606 return engine->flags & I915_ENGINE_HAS_RELATIVE_MMIO;
607 }
608
609 #define instdone_has_slice(dev_priv___, sseu___, slice___) \
610 ((IS_GEN(dev_priv___, 7) ? 1 : ((sseu___)->slice_mask)) & BIT(slice___))
611
612 #define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \
613 (IS_GEN(dev_priv__, 7) ? (1 & BIT(subslice__)) : \
614 intel_sseu_has_subslice(sseu__, 0, subslice__))
615
616 #define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \
617 for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \
618 (subslice_) = ((subslice_) + 1) % I915_MAX_SUBSLICES, \
619 (slice_) += ((subslice_) == 0)) \
620 for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \
621 (instdone_has_subslice(dev_priv_, sseu_, slice_, \
622 subslice_)))
623 #endif /* __INTEL_ENGINE_TYPES_H__ */
624