xref: /openbsd/sys/dev/pci/drm/i915/i915_perf_types.h (revision f005ef32)
1 /* SPDX-License-Identifier: MIT */
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5 
6 #ifndef _I915_PERF_TYPES_H_
7 #define _I915_PERF_TYPES_H_
8 
9 #include <linux/atomic.h>
10 #include <linux/device.h>
11 #include <linux/hrtimer.h>
12 #include <linux/llist.h>
13 #include <linux/poll.h>
14 #include <linux/sysfs.h>
15 #include <linux/types.h>
16 #include <linux/uuid.h>
17 #include <linux/wait.h>
18 #include <uapi/drm/i915_drm.h>
19 
20 #include "gt/intel_engine_types.h"
21 #include "gt/intel_sseu.h"
22 #include "i915_reg_defs.h"
23 #include "intel_uncore.h"
24 #include "intel_wakeref.h"
25 
26 struct drm_i915_private;
27 struct file;
28 struct i915_active;
29 struct i915_gem_context;
30 struct i915_perf;
31 struct i915_vma;
32 struct intel_context;
33 struct intel_engine_cs;
34 
35 enum {
36 	PERF_GROUP_OAG = 0,
37 	PERF_GROUP_OAM_SAMEDIA_0 = 0,
38 
39 	PERF_GROUP_MAX,
40 	PERF_GROUP_INVALID = U32_MAX,
41 };
42 
43 enum report_header {
44 	HDR_32_BIT = 0,
45 	HDR_64_BIT,
46 };
47 
48 struct i915_perf_regs {
49 	u32 base;
50 	i915_reg_t oa_head_ptr;
51 	i915_reg_t oa_tail_ptr;
52 	i915_reg_t oa_buffer;
53 	i915_reg_t oa_ctx_ctrl;
54 	i915_reg_t oa_ctrl;
55 	i915_reg_t oa_debug;
56 	i915_reg_t oa_status;
57 	u32 oa_ctrl_counter_format_shift;
58 };
59 
60 enum oa_type {
61 	TYPE_OAG,
62 	TYPE_OAM,
63 };
64 
65 struct i915_oa_format {
66 	u32 format;
67 	int size;
68 	int type;
69 	enum report_header header;
70 };
71 
72 struct i915_oa_reg {
73 	i915_reg_t addr;
74 	u32 value;
75 };
76 
77 struct i915_oa_config {
78 	struct i915_perf *perf;
79 
80 	char uuid[UUID_STRING_LEN + 1];
81 	int id;
82 
83 	const struct i915_oa_reg *mux_regs;
84 	u32 mux_regs_len;
85 	const struct i915_oa_reg *b_counter_regs;
86 	u32 b_counter_regs_len;
87 	const struct i915_oa_reg *flex_regs;
88 	u32 flex_regs_len;
89 
90 	struct attribute_group sysfs_metric;
91 	struct attribute *attrs[2];
92 	struct kobj_attribute sysfs_metric_id;
93 
94 	struct kref ref;
95 	struct rcu_head rcu;
96 };
97 
98 struct i915_perf_stream;
99 
100 /**
101  * struct i915_perf_stream_ops - the OPs to support a specific stream type
102  */
103 struct i915_perf_stream_ops {
104 	/**
105 	 * @enable: Enables the collection of HW samples, either in response to
106 	 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
107 	 * without `I915_PERF_FLAG_DISABLED`.
108 	 */
109 	void (*enable)(struct i915_perf_stream *stream);
110 
111 	/**
112 	 * @disable: Disables the collection of HW samples, either in response
113 	 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
114 	 * the stream.
115 	 */
116 	void (*disable)(struct i915_perf_stream *stream);
117 
118 	/**
119 	 * @poll_wait: Call poll_wait, passing a wait queue that will be woken
120 	 * once there is something ready to read() for the stream
121 	 */
122 #ifdef notyet
123 	void (*poll_wait)(struct i915_perf_stream *stream,
124 			  struct file *file,
125 			  poll_table *wait);
126 #endif
127 
128 	/**
129 	 * @wait_unlocked: For handling a blocking read, wait until there is
130 	 * something to ready to read() for the stream. E.g. wait on the same
131 	 * wait queue that would be passed to poll_wait().
132 	 */
133 	int (*wait_unlocked)(struct i915_perf_stream *stream);
134 
135 	/**
136 	 * @read: Copy buffered metrics as records to userspace
137 	 * **buf**: the userspace, destination buffer
138 	 * **count**: the number of bytes to copy, requested by userspace
139 	 * **offset**: zero at the start of the read, updated as the read
140 	 * proceeds, it represents how many bytes have been copied so far and
141 	 * the buffer offset for copying the next record.
142 	 *
143 	 * Copy as many buffered i915 perf samples and records for this stream
144 	 * to userspace as will fit in the given buffer.
145 	 *
146 	 * Only write complete records; returning -%ENOSPC if there isn't room
147 	 * for a complete record.
148 	 *
149 	 * Return any error condition that results in a short read such as
150 	 * -%ENOSPC or -%EFAULT, even though these may be squashed before
151 	 * returning to userspace.
152 	 */
153 	int (*read)(struct i915_perf_stream *stream,
154 		    char __user *buf,
155 		    size_t count,
156 		    size_t *offset);
157 
158 	/**
159 	 * @destroy: Cleanup any stream specific resources.
160 	 *
161 	 * The stream will always be disabled before this is called.
162 	 */
163 	void (*destroy)(struct i915_perf_stream *stream);
164 };
165 
166 /**
167  * struct i915_perf_stream - state for a single open stream FD
168  */
169 struct i915_perf_stream {
170 	/**
171 	 * @perf: i915_perf backpointer
172 	 */
173 	struct i915_perf *perf;
174 
175 	/**
176 	 * @uncore: mmio access path
177 	 */
178 	struct intel_uncore *uncore;
179 
180 	/**
181 	 * @engine: Engine associated with this performance stream.
182 	 */
183 	struct intel_engine_cs *engine;
184 
185 	/**
186 	 * @lock: Lock associated with operations on stream
187 	 */
188 	struct rwlock lock;
189 
190 	/**
191 	 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
192 	 * properties given when opening a stream, representing the contents
193 	 * of a single sample as read() by userspace.
194 	 */
195 	u32 sample_flags;
196 
197 	/**
198 	 * @sample_size: Considering the configured contents of a sample
199 	 * combined with the required header size, this is the total size
200 	 * of a single sample record.
201 	 */
202 	int sample_size;
203 
204 	/**
205 	 * @ctx: %NULL if measuring system-wide across all contexts or a
206 	 * specific context that is being monitored.
207 	 */
208 	struct i915_gem_context *ctx;
209 
210 	/**
211 	 * @enabled: Whether the stream is currently enabled, considering
212 	 * whether the stream was opened in a disabled state and based
213 	 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
214 	 */
215 	bool enabled;
216 
217 	/**
218 	 * @hold_preemption: Whether preemption is put on hold for command
219 	 * submissions done on the @ctx. This is useful for some drivers that
220 	 * cannot easily post process the OA buffer context to subtract delta
221 	 * of performance counters not associated with @ctx.
222 	 */
223 	bool hold_preemption;
224 
225 	/**
226 	 * @ops: The callbacks providing the implementation of this specific
227 	 * type of configured stream.
228 	 */
229 	const struct i915_perf_stream_ops *ops;
230 
231 	/**
232 	 * @oa_config: The OA configuration used by the stream.
233 	 */
234 	struct i915_oa_config *oa_config;
235 
236 	/**
237 	 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
238 	 * each time @oa_config changes.
239 	 */
240 	struct llist_head oa_config_bos;
241 
242 	/**
243 	 * @pinned_ctx: The OA context specific information.
244 	 */
245 	struct intel_context *pinned_ctx;
246 
247 	/**
248 	 * @specific_ctx_id: The id of the specific context.
249 	 */
250 	u32 specific_ctx_id;
251 
252 	/**
253 	 * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits.
254 	 */
255 	u32 specific_ctx_id_mask;
256 
257 	/**
258 	 * @poll_check_timer: High resolution timer that will periodically
259 	 * check for data in the circular OA buffer for notifying userspace
260 	 * (e.g. during a read() or poll()).
261 	 */
262 	struct hrtimer poll_check_timer;
263 
264 	/**
265 	 * @poll_wq: The wait queue that hrtimer callback wakes when it
266 	 * sees data ready to read in the circular OA buffer.
267 	 */
268 	wait_queue_head_t poll_wq;
269 
270 	/**
271 	 * @pollin: Whether there is data available to read.
272 	 */
273 	bool pollin;
274 
275 	/**
276 	 * @periodic: Whether periodic sampling is currently enabled.
277 	 */
278 	bool periodic;
279 
280 	/**
281 	 * @period_exponent: The OA unit sampling frequency is derived from this.
282 	 */
283 	int period_exponent;
284 
285 	/**
286 	 * @oa_buffer: State of the OA buffer.
287 	 */
288 	struct {
289 		const struct i915_oa_format *format;
290 		struct i915_vma *vma;
291 		u8 *vaddr;
292 		u32 last_ctx_id;
293 		int size_exponent;
294 
295 		/**
296 		 * @ptr_lock: Locks reads and writes to all head/tail state
297 		 *
298 		 * Consider: the head and tail pointer state needs to be read
299 		 * consistently from a hrtimer callback (atomic context) and
300 		 * read() fop (user context) with tail pointer updates happening
301 		 * in atomic context and head updates in user context and the
302 		 * (unlikely) possibility of read() errors needing to reset all
303 		 * head/tail state.
304 		 *
305 		 * Note: Contention/performance aren't currently a significant
306 		 * concern here considering the relatively low frequency of
307 		 * hrtimer callbacks (5ms period) and that reads typically only
308 		 * happen in response to a hrtimer event and likely complete
309 		 * before the next callback.
310 		 *
311 		 * Note: This lock is not held *while* reading and copying data
312 		 * to userspace so the value of head observed in htrimer
313 		 * callbacks won't represent any partial consumption of data.
314 		 */
315 		spinlock_t ptr_lock;
316 
317 		/**
318 		 * @head: Although we can always read back the head pointer register,
319 		 * we prefer to avoid trusting the HW state, just to avoid any
320 		 * risk that some hardware condition could * somehow bump the
321 		 * head pointer unpredictably and cause us to forward the wrong
322 		 * OA buffer data to userspace.
323 		 */
324 		u32 head;
325 
326 		/**
327 		 * @tail: The last verified tail that can be read by userspace.
328 		 */
329 		u32 tail;
330 	} oa_buffer;
331 
332 	/**
333 	 * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be
334 	 * reprogrammed.
335 	 */
336 	struct i915_vma *noa_wait;
337 
338 	/**
339 	 * @poll_oa_period: The period in nanoseconds at which the OA
340 	 * buffer should be checked for available data.
341 	 */
342 	u64 poll_oa_period;
343 
344 	/**
345 	 * @override_gucrc: GuC RC has been overridden for the perf stream,
346 	 * and we need to restore the default configuration on release.
347 	 */
348 	bool override_gucrc;
349 };
350 
351 /**
352  * struct i915_oa_ops - Gen specific implementation of an OA unit stream
353  */
354 struct i915_oa_ops {
355 	/**
356 	 * @is_valid_b_counter_reg: Validates register's address for
357 	 * programming boolean counters for a particular platform.
358 	 */
359 	bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr);
360 
361 	/**
362 	 * @is_valid_mux_reg: Validates register's address for programming mux
363 	 * for a particular platform.
364 	 */
365 	bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr);
366 
367 	/**
368 	 * @is_valid_flex_reg: Validates register's address for programming
369 	 * flex EU filtering for a particular platform.
370 	 */
371 	bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr);
372 
373 	/**
374 	 * @enable_metric_set: Selects and applies any MUX configuration to set
375 	 * up the Boolean and Custom (B/C) counters that are part of the
376 	 * counter reports being sampled. May apply system constraints such as
377 	 * disabling EU clock gating as required.
378 	 */
379 	int (*enable_metric_set)(struct i915_perf_stream *stream,
380 				 struct i915_active *active);
381 
382 	/**
383 	 * @disable_metric_set: Remove system constraints associated with using
384 	 * the OA unit.
385 	 */
386 	void (*disable_metric_set)(struct i915_perf_stream *stream);
387 
388 	/**
389 	 * @oa_enable: Enable periodic sampling
390 	 */
391 	void (*oa_enable)(struct i915_perf_stream *stream);
392 
393 	/**
394 	 * @oa_disable: Disable periodic sampling
395 	 */
396 	void (*oa_disable)(struct i915_perf_stream *stream);
397 
398 	/**
399 	 * @read: Copy data from the circular OA buffer into a given userspace
400 	 * buffer.
401 	 */
402 	int (*read)(struct i915_perf_stream *stream,
403 		    char __user *buf,
404 		    size_t count,
405 		    size_t *offset);
406 
407 	/**
408 	 * @oa_hw_tail_read: read the OA tail pointer register
409 	 *
410 	 * In particular this enables us to share all the fiddly code for
411 	 * handling the OA unit tail pointer race that affects multiple
412 	 * generations.
413 	 */
414 	u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
415 };
416 
417 struct i915_perf_group {
418 	/*
419 	 * @exclusive_stream: The stream currently using the OA unit. This is
420 	 * sometimes accessed outside a syscall associated to its file
421 	 * descriptor.
422 	 */
423 	struct i915_perf_stream *exclusive_stream;
424 
425 	/*
426 	 * @num_engines: The number of engines using this OA unit.
427 	 */
428 	u32 num_engines;
429 
430 	/*
431 	 * @regs: OA buffer register group for programming the OA unit.
432 	 */
433 	struct i915_perf_regs regs;
434 
435 	/*
436 	 * @type: Type of OA unit - OAM, OAG etc.
437 	 */
438 	enum oa_type type;
439 };
440 
441 struct i915_perf_gt {
442 	/*
443 	 * Lock associated with anything below within this structure.
444 	 */
445 	struct rwlock lock;
446 
447 	/**
448 	 * @sseu: sseu configuration selected to run while perf is active,
449 	 * applies to all contexts.
450 	 */
451 	struct intel_sseu sseu;
452 
453 	/**
454 	 * @num_perf_groups: number of perf groups per gt.
455 	 */
456 	u32 num_perf_groups;
457 
458 	/*
459 	 * @group: list of OA groups - one for each OA buffer.
460 	 */
461 	struct i915_perf_group *group;
462 };
463 
464 struct i915_perf {
465 	struct drm_i915_private *i915;
466 
467 	struct kobject *metrics_kobj;
468 
469 	/*
470 	 * Lock associated with adding/modifying/removing OA configs
471 	 * in perf->metrics_idr.
472 	 */
473 	struct rwlock metrics_lock;
474 
475 	/*
476 	 * List of dynamic configurations (struct i915_oa_config), you
477 	 * need to hold perf->metrics_lock to access it.
478 	 */
479 	struct idr metrics_idr;
480 
481 	/**
482 	 * For rate limiting any notifications of spurious
483 	 * invalid OA reports
484 	 */
485 	struct ratelimit_state spurious_report_rs;
486 
487 	/**
488 	 * For rate limiting any notifications of tail pointer
489 	 * race.
490 	 */
491 	struct ratelimit_state tail_pointer_race;
492 
493 	u32 gen7_latched_oastatus1;
494 	u32 ctx_oactxctrl_offset;
495 	u32 ctx_flexeu0_offset;
496 
497 	/**
498 	 * The RPT_ID/reason field for Gen8+ includes a bit
499 	 * to determine if the CTX ID in the report is valid
500 	 * but the specific bit differs between Gen 8 and 9
501 	 */
502 	u32 gen8_valid_ctx_bit;
503 
504 	struct i915_oa_ops ops;
505 	const struct i915_oa_format *oa_formats;
506 
507 	/**
508 	 * Use a format mask to store the supported formats
509 	 * for a platform.
510 	 */
511 #define FORMAT_MASK_SIZE DIV_ROUND_UP(I915_OA_FORMAT_MAX - 1, BITS_PER_LONG)
512 	unsigned long format_mask[FORMAT_MASK_SIZE];
513 
514 	atomic64_t noa_programming_delay;
515 };
516 
517 #endif /* _I915_PERF_TYPES_H_ */
518