xref: /linux/drivers/gpu/drm/i915/i915_perf_types.h (revision f86fd32d)
1 /* SPDX-License-Identifier: MIT */
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5 
6 #ifndef _I915_PERF_TYPES_H_
7 #define _I915_PERF_TYPES_H_
8 
9 #include <linux/atomic.h>
10 #include <linux/device.h>
11 #include <linux/hrtimer.h>
12 #include <linux/llist.h>
13 #include <linux/poll.h>
14 #include <linux/sysfs.h>
15 #include <linux/types.h>
16 #include <linux/uuid.h>
17 #include <linux/wait.h>
18 
19 #include "i915_reg.h"
20 #include "intel_wakeref.h"
21 
22 struct drm_i915_private;
23 struct file;
24 struct i915_gem_context;
25 struct i915_perf;
26 struct i915_vma;
27 struct intel_context;
28 struct intel_engine_cs;
29 
30 struct i915_oa_format {
31 	u32 format;
32 	int size;
33 };
34 
35 struct i915_oa_reg {
36 	i915_reg_t addr;
37 	u32 value;
38 };
39 
40 struct i915_oa_config {
41 	struct i915_perf *perf;
42 
43 	char uuid[UUID_STRING_LEN + 1];
44 	int id;
45 
46 	const struct i915_oa_reg *mux_regs;
47 	u32 mux_regs_len;
48 	const struct i915_oa_reg *b_counter_regs;
49 	u32 b_counter_regs_len;
50 	const struct i915_oa_reg *flex_regs;
51 	u32 flex_regs_len;
52 
53 	struct attribute_group sysfs_metric;
54 	struct attribute *attrs[2];
55 	struct device_attribute sysfs_metric_id;
56 
57 	struct kref ref;
58 	struct rcu_head rcu;
59 };
60 
61 struct i915_perf_stream;
62 
63 /**
64  * struct i915_perf_stream_ops - the OPs to support a specific stream type
65  */
66 struct i915_perf_stream_ops {
67 	/**
68 	 * @enable: Enables the collection of HW samples, either in response to
69 	 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
70 	 * without `I915_PERF_FLAG_DISABLED`.
71 	 */
72 	void (*enable)(struct i915_perf_stream *stream);
73 
74 	/**
75 	 * @disable: Disables the collection of HW samples, either in response
76 	 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
77 	 * the stream.
78 	 */
79 	void (*disable)(struct i915_perf_stream *stream);
80 
81 	/**
82 	 * @poll_wait: Call poll_wait, passing a wait queue that will be woken
83 	 * once there is something ready to read() for the stream
84 	 */
85 	void (*poll_wait)(struct i915_perf_stream *stream,
86 			  struct file *file,
87 			  poll_table *wait);
88 
89 	/**
90 	 * @wait_unlocked: For handling a blocking read, wait until there is
91 	 * something to ready to read() for the stream. E.g. wait on the same
92 	 * wait queue that would be passed to poll_wait().
93 	 */
94 	int (*wait_unlocked)(struct i915_perf_stream *stream);
95 
96 	/**
97 	 * @read: Copy buffered metrics as records to userspace
98 	 * **buf**: the userspace, destination buffer
99 	 * **count**: the number of bytes to copy, requested by userspace
100 	 * **offset**: zero at the start of the read, updated as the read
101 	 * proceeds, it represents how many bytes have been copied so far and
102 	 * the buffer offset for copying the next record.
103 	 *
104 	 * Copy as many buffered i915 perf samples and records for this stream
105 	 * to userspace as will fit in the given buffer.
106 	 *
107 	 * Only write complete records; returning -%ENOSPC if there isn't room
108 	 * for a complete record.
109 	 *
110 	 * Return any error condition that results in a short read such as
111 	 * -%ENOSPC or -%EFAULT, even though these may be squashed before
112 	 * returning to userspace.
113 	 */
114 	int (*read)(struct i915_perf_stream *stream,
115 		    char __user *buf,
116 		    size_t count,
117 		    size_t *offset);
118 
119 	/**
120 	 * @destroy: Cleanup any stream specific resources.
121 	 *
122 	 * The stream will always be disabled before this is called.
123 	 */
124 	void (*destroy)(struct i915_perf_stream *stream);
125 };
126 
127 /**
128  * struct i915_perf_stream - state for a single open stream FD
129  */
130 struct i915_perf_stream {
131 	/**
132 	 * @perf: i915_perf backpointer
133 	 */
134 	struct i915_perf *perf;
135 
136 	/**
137 	 * @uncore: mmio access path
138 	 */
139 	struct intel_uncore *uncore;
140 
141 	/**
142 	 * @engine: Engine associated with this performance stream.
143 	 */
144 	struct intel_engine_cs *engine;
145 
146 	/**
147 	 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
148 	 * properties given when opening a stream, representing the contents
149 	 * of a single sample as read() by userspace.
150 	 */
151 	u32 sample_flags;
152 
153 	/**
154 	 * @sample_size: Considering the configured contents of a sample
155 	 * combined with the required header size, this is the total size
156 	 * of a single sample record.
157 	 */
158 	int sample_size;
159 
160 	/**
161 	 * @ctx: %NULL if measuring system-wide across all contexts or a
162 	 * specific context that is being monitored.
163 	 */
164 	struct i915_gem_context *ctx;
165 
166 	/**
167 	 * @enabled: Whether the stream is currently enabled, considering
168 	 * whether the stream was opened in a disabled state and based
169 	 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
170 	 */
171 	bool enabled;
172 
173 	/**
174 	 * @hold_preemption: Whether preemption is put on hold for command
175 	 * submissions done on the @ctx. This is useful for some drivers that
176 	 * cannot easily post process the OA buffer context to subtract delta
177 	 * of performance counters not associated with @ctx.
178 	 */
179 	bool hold_preemption;
180 
181 	/**
182 	 * @ops: The callbacks providing the implementation of this specific
183 	 * type of configured stream.
184 	 */
185 	const struct i915_perf_stream_ops *ops;
186 
187 	/**
188 	 * @oa_config: The OA configuration used by the stream.
189 	 */
190 	struct i915_oa_config *oa_config;
191 
192 	/**
193 	 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
194 	 * each time @oa_config changes.
195 	 */
196 	struct llist_head oa_config_bos;
197 
198 	/**
199 	 * @pinned_ctx: The OA context specific information.
200 	 */
201 	struct intel_context *pinned_ctx;
202 
203 	/**
204 	 * @specific_ctx_id: The id of the specific context.
205 	 */
206 	u32 specific_ctx_id;
207 
208 	/**
209 	 * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits.
210 	 */
211 	u32 specific_ctx_id_mask;
212 
213 	/**
214 	 * @poll_check_timer: High resolution timer that will periodically
215 	 * check for data in the circular OA buffer for notifying userspace
216 	 * (e.g. during a read() or poll()).
217 	 */
218 	struct hrtimer poll_check_timer;
219 
220 	/**
221 	 * @poll_wq: The wait queue that hrtimer callback wakes when it
222 	 * sees data ready to read in the circular OA buffer.
223 	 */
224 	wait_queue_head_t poll_wq;
225 
226 	/**
227 	 * @pollin: Whether there is data available to read.
228 	 */
229 	bool pollin;
230 
231 	/**
232 	 * @periodic: Whether periodic sampling is currently enabled.
233 	 */
234 	bool periodic;
235 
236 	/**
237 	 * @period_exponent: The OA unit sampling frequency is derived from this.
238 	 */
239 	int period_exponent;
240 
241 	/**
242 	 * @oa_buffer: State of the OA buffer.
243 	 */
244 	struct {
245 		struct i915_vma *vma;
246 		u8 *vaddr;
247 		u32 last_ctx_id;
248 		int format;
249 		int format_size;
250 		int size_exponent;
251 
252 		/**
253 		 * @ptr_lock: Locks reads and writes to all head/tail state
254 		 *
255 		 * Consider: the head and tail pointer state needs to be read
256 		 * consistently from a hrtimer callback (atomic context) and
257 		 * read() fop (user context) with tail pointer updates happening
258 		 * in atomic context and head updates in user context and the
259 		 * (unlikely) possibility of read() errors needing to reset all
260 		 * head/tail state.
261 		 *
262 		 * Note: Contention/performance aren't currently a significant
263 		 * concern here considering the relatively low frequency of
264 		 * hrtimer callbacks (5ms period) and that reads typically only
265 		 * happen in response to a hrtimer event and likely complete
266 		 * before the next callback.
267 		 *
268 		 * Note: This lock is not held *while* reading and copying data
269 		 * to userspace so the value of head observed in htrimer
270 		 * callbacks won't represent any partial consumption of data.
271 		 */
272 		spinlock_t ptr_lock;
273 
274 		/**
275 		 * @tails: One 'aging' tail pointer and one 'aged' tail pointer ready to
276 		 * used for reading.
277 		 *
278 		 * Initial values of 0xffffffff are invalid and imply that an
279 		 * update is required (and should be ignored by an attempted
280 		 * read)
281 		 */
282 		struct {
283 			u32 offset;
284 		} tails[2];
285 
286 		/**
287 		 * @aged_tail_idx: Index for the aged tail ready to read() data up to.
288 		 */
289 		unsigned int aged_tail_idx;
290 
291 		/**
292 		 * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer
293 		 * was read; used to determine when it is old enough to trust.
294 		 */
295 		u64 aging_timestamp;
296 
297 		/**
298 		 * @head: Although we can always read back the head pointer register,
299 		 * we prefer to avoid trusting the HW state, just to avoid any
300 		 * risk that some hardware condition could * somehow bump the
301 		 * head pointer unpredictably and cause us to forward the wrong
302 		 * OA buffer data to userspace.
303 		 */
304 		u32 head;
305 	} oa_buffer;
306 
307 	/**
308 	 * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be
309 	 * reprogrammed.
310 	 */
311 	struct i915_vma *noa_wait;
312 };
313 
314 /**
315  * struct i915_oa_ops - Gen specific implementation of an OA unit stream
316  */
317 struct i915_oa_ops {
318 	/**
319 	 * @is_valid_b_counter_reg: Validates register's address for
320 	 * programming boolean counters for a particular platform.
321 	 */
322 	bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr);
323 
324 	/**
325 	 * @is_valid_mux_reg: Validates register's address for programming mux
326 	 * for a particular platform.
327 	 */
328 	bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr);
329 
330 	/**
331 	 * @is_valid_flex_reg: Validates register's address for programming
332 	 * flex EU filtering for a particular platform.
333 	 */
334 	bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr);
335 
336 	/**
337 	 * @enable_metric_set: Selects and applies any MUX configuration to set
338 	 * up the Boolean and Custom (B/C) counters that are part of the
339 	 * counter reports being sampled. May apply system constraints such as
340 	 * disabling EU clock gating as required.
341 	 */
342 	int (*enable_metric_set)(struct i915_perf_stream *stream);
343 
344 	/**
345 	 * @disable_metric_set: Remove system constraints associated with using
346 	 * the OA unit.
347 	 */
348 	void (*disable_metric_set)(struct i915_perf_stream *stream);
349 
350 	/**
351 	 * @oa_enable: Enable periodic sampling
352 	 */
353 	void (*oa_enable)(struct i915_perf_stream *stream);
354 
355 	/**
356 	 * @oa_disable: Disable periodic sampling
357 	 */
358 	void (*oa_disable)(struct i915_perf_stream *stream);
359 
360 	/**
361 	 * @read: Copy data from the circular OA buffer into a given userspace
362 	 * buffer.
363 	 */
364 	int (*read)(struct i915_perf_stream *stream,
365 		    char __user *buf,
366 		    size_t count,
367 		    size_t *offset);
368 
369 	/**
370 	 * @oa_hw_tail_read: read the OA tail pointer register
371 	 *
372 	 * In particular this enables us to share all the fiddly code for
373 	 * handling the OA unit tail pointer race that affects multiple
374 	 * generations.
375 	 */
376 	u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
377 };
378 
379 struct i915_perf {
380 	struct drm_i915_private *i915;
381 
382 	struct kobject *metrics_kobj;
383 
384 	/*
385 	 * Lock associated with adding/modifying/removing OA configs
386 	 * in perf->metrics_idr.
387 	 */
388 	struct mutex metrics_lock;
389 
390 	/*
391 	 * List of dynamic configurations (struct i915_oa_config), you
392 	 * need to hold perf->metrics_lock to access it.
393 	 */
394 	struct idr metrics_idr;
395 
396 	/*
397 	 * Lock associated with anything below within this structure
398 	 * except exclusive_stream.
399 	 */
400 	struct mutex lock;
401 
402 	/*
403 	 * The stream currently using the OA unit. If accessed
404 	 * outside a syscall associated to its file
405 	 * descriptor.
406 	 */
407 	struct i915_perf_stream *exclusive_stream;
408 
409 	/**
410 	 * For rate limiting any notifications of spurious
411 	 * invalid OA reports
412 	 */
413 	struct ratelimit_state spurious_report_rs;
414 
415 	struct i915_oa_config test_config;
416 
417 	u32 gen7_latched_oastatus1;
418 	u32 ctx_oactxctrl_offset;
419 	u32 ctx_flexeu0_offset;
420 
421 	/**
422 	 * The RPT_ID/reason field for Gen8+ includes a bit
423 	 * to determine if the CTX ID in the report is valid
424 	 * but the specific bit differs between Gen 8 and 9
425 	 */
426 	u32 gen8_valid_ctx_bit;
427 
428 	struct i915_oa_ops ops;
429 	const struct i915_oa_format *oa_formats;
430 
431 	atomic64_t noa_programming_delay;
432 };
433 
434 #endif /* _I915_PERF_TYPES_H_ */
435