1 /*	$NetBSD: i915_perf_types.h,v 1.6 2021/12/19 11:36:57 riastradh Exp $	*/
2 
3 /* SPDX-License-Identifier: MIT */
4 /*
5  * Copyright © 2019 Intel Corporation
6  */
7 
8 #ifndef _I915_PERF_TYPES_H_
9 #define _I915_PERF_TYPES_H_
10 
11 #include <linux/atomic.h>
12 #include <linux/device.h>
13 #include <linux/hrtimer.h>
14 #include <linux/llist.h>
15 #include <linux/poll.h>
16 #include <linux/sysfs.h>
17 #include <linux/types.h>
18 #include <linux/uuid.h>
19 #include <linux/wait.h>
20 
21 #include "i915_reg.h"
22 #include "intel_wakeref.h"
23 
24 struct drm_i915_private;
25 struct file;
26 struct i915_gem_context;
27 struct i915_perf;
28 struct i915_vma;
29 struct intel_context;
30 struct intel_engine_cs;
31 
32 struct i915_oa_format {
33 	u32 format;
34 	int size;
35 };
36 
37 struct i915_oa_reg {
38 	i915_reg_t addr;
39 	u32 value;
40 };
41 
42 struct i915_oa_config {
43 	struct i915_perf *perf;
44 
45 	char uuid[UUID_STRING_LEN + 1];
46 	int id;
47 
48 	const struct i915_oa_reg *mux_regs;
49 	u32 mux_regs_len;
50 	const struct i915_oa_reg *b_counter_regs;
51 	u32 b_counter_regs_len;
52 	const struct i915_oa_reg *flex_regs;
53 	u32 flex_regs_len;
54 
55 #ifndef __NetBSD__		/* XXX sysfs */
56 	struct attribute_group sysfs_metric;
57 	struct attribute *attrs[2];
58 	struct device_attribute sysfs_metric_id;
59 #endif
60 
61 	struct kref ref;
62 	struct rcu_head rcu;
63 };
64 
65 struct i915_perf_stream;
66 
67 /**
68  * struct i915_perf_stream_ops - the OPs to support a specific stream type
69  */
70 struct i915_perf_stream_ops {
71 	/**
72 	 * @enable: Enables the collection of HW samples, either in response to
73 	 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
74 	 * without `I915_PERF_FLAG_DISABLED`.
75 	 */
76 	void (*enable)(struct i915_perf_stream *stream);
77 
78 	/**
79 	 * @disable: Disables the collection of HW samples, either in response
80 	 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
81 	 * the stream.
82 	 */
83 	void (*disable)(struct i915_perf_stream *stream);
84 
85 #ifndef __NetBSD__
86 	/**
87 	 * @poll_wait: Call poll_wait, passing a wait queue that will be woken
88 	 * once there is something ready to read() for the stream
89 	 */
90 	void (*poll_wait)(struct i915_perf_stream *stream,
91 			  struct file *file,
92 			  poll_table *wait);
93 #endif
94 
95 	/**
96 	 * @wait_unlocked: For handling a blocking read, wait until there is
97 	 * something to ready to read() for the stream. E.g. wait on the same
98 	 * wait queue that would be passed to poll_wait().
99 	 */
100 	int (*wait_unlocked)(struct i915_perf_stream *stream);
101 
102 	/**
103 	 * @read: Copy buffered metrics as records to userspace
104 	 * **buf**: the userspace, destination buffer
105 	 * **count**: the number of bytes to copy, requested by userspace
106 	 * **offset**: zero at the start of the read, updated as the read
107 	 * proceeds, it represents how many bytes have been copied so far and
108 	 * the buffer offset for copying the next record.
109 	 *
110 	 * Copy as many buffered i915 perf samples and records for this stream
111 	 * to userspace as will fit in the given buffer.
112 	 *
113 	 * Only write complete records; returning -%ENOSPC if there isn't room
114 	 * for a complete record.
115 	 *
116 	 * Return any error condition that results in a short read such as
117 	 * -%ENOSPC or -%EFAULT, even though these may be squashed before
118 	 * returning to userspace.
119 	 */
120 #ifdef __NetBSD__
121 	int (*read)(struct i915_perf_stream *stream,
122 		    struct uio *buf,
123 		    kauth_cred_t count, /* XXX dummy */
124 		    int offset);	/* XXX dummy */
125 #else
126 	int (*read)(struct i915_perf_stream *stream,
127 		    char __user *buf,
128 		    size_t count,
129 		    size_t *offset);
130 #endif
131 
132 	/**
133 	 * @destroy: Cleanup any stream specific resources.
134 	 *
135 	 * The stream will always be disabled before this is called.
136 	 */
137 	void (*destroy)(struct i915_perf_stream *stream);
138 };
139 
140 /**
141  * struct i915_perf_stream - state for a single open stream FD
142  */
143 struct i915_perf_stream {
144 	/**
145 	 * @perf: i915_perf backpointer
146 	 */
147 	struct i915_perf *perf;
148 
149 	/**
150 	 * @uncore: mmio access path
151 	 */
152 	struct intel_uncore *uncore;
153 
154 	/**
155 	 * @engine: Engine associated with this performance stream.
156 	 */
157 	struct intel_engine_cs *engine;
158 
159 	/**
160 	 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
161 	 * properties given when opening a stream, representing the contents
162 	 * of a single sample as read() by userspace.
163 	 */
164 	u32 sample_flags;
165 
166 	/**
167 	 * @sample_size: Considering the configured contents of a sample
168 	 * combined with the required header size, this is the total size
169 	 * of a single sample record.
170 	 */
171 	int sample_size;
172 
173 	/**
174 	 * @ctx: %NULL if measuring system-wide across all contexts or a
175 	 * specific context that is being monitored.
176 	 */
177 	struct i915_gem_context *ctx;
178 
179 	/**
180 	 * @enabled: Whether the stream is currently enabled, considering
181 	 * whether the stream was opened in a disabled state and based
182 	 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
183 	 */
184 	bool enabled;
185 
186 	/**
187 	 * @hold_preemption: Whether preemption is put on hold for command
188 	 * submissions done on the @ctx. This is useful for some drivers that
189 	 * cannot easily post process the OA buffer context to subtract delta
190 	 * of performance counters not associated with @ctx.
191 	 */
192 	bool hold_preemption;
193 
194 	/**
195 	 * @ops: The callbacks providing the implementation of this specific
196 	 * type of configured stream.
197 	 */
198 	const struct i915_perf_stream_ops *ops;
199 
200 	/**
201 	 * @oa_config: The OA configuration used by the stream.
202 	 */
203 	struct i915_oa_config *oa_config;
204 
205 	/**
206 	 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
207 	 * each time @oa_config changes.
208 	 */
209 	struct llist_head oa_config_bos;
210 
211 	/**
212 	 * @pinned_ctx: The OA context specific information.
213 	 */
214 	struct intel_context *pinned_ctx;
215 
216 	/**
217 	 * @specific_ctx_id: The id of the specific context.
218 	 */
219 	u32 specific_ctx_id;
220 
221 	/**
222 	 * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits.
223 	 */
224 	u32 specific_ctx_id_mask;
225 
226 	/**
227 	 * @poll_check_timer: High resolution timer that will periodically
228 	 * check for data in the circular OA buffer for notifying userspace
229 	 * (e.g. during a read() or poll()).
230 	 */
231 	struct hrtimer poll_check_timer;
232 
233 	/**
234 	 * @poll_wq: The wait queue that hrtimer callback wakes when it
235 	 * sees data ready to read in the circular OA buffer.
236 	 */
237 #ifdef __NetBSD__
238 	drm_waitqueue_t poll_wq;
239 	struct selinfo poll_selq;
240 #else
241 	wait_queue_head_t poll_wq;
242 #endif
243 
244 	/**
245 	 * @pollin: Whether there is data available to read.
246 	 */
247 	bool pollin;
248 
249 	/**
250 	 * @periodic: Whether periodic sampling is currently enabled.
251 	 */
252 	bool periodic;
253 
254 	/**
255 	 * @period_exponent: The OA unit sampling frequency is derived from this.
256 	 */
257 	int period_exponent;
258 
259 	/**
260 	 * @oa_buffer: State of the OA buffer.
261 	 */
262 	struct {
263 		struct i915_vma *vma;
264 		u8 *vaddr;
265 		u32 last_ctx_id;
266 		int format;
267 		int format_size;
268 		int size_exponent;
269 
270 		/**
271 		 * @ptr_lock: Locks reads and writes to all head/tail state
272 		 *
273 		 * Consider: the head and tail pointer state needs to be read
274 		 * consistently from a hrtimer callback (atomic context) and
275 		 * read() fop (user context) with tail pointer updates happening
276 		 * in atomic context and head updates in user context and the
277 		 * (unlikely) possibility of read() errors needing to reset all
278 		 * head/tail state.
279 		 *
280 		 * Note: Contention/performance aren't currently a significant
281 		 * concern here considering the relatively low frequency of
282 		 * hrtimer callbacks (5ms period) and that reads typically only
283 		 * happen in response to a hrtimer event and likely complete
284 		 * before the next callback.
285 		 *
286 		 * Note: This lock is not held *while* reading and copying data
287 		 * to userspace so the value of head observed in htrimer
288 		 * callbacks won't represent any partial consumption of data.
289 		 */
290 		spinlock_t ptr_lock;
291 
292 		/**
293 		 * @tails: One 'aging' tail pointer and one 'aged' tail pointer ready to
294 		 * used for reading.
295 		 *
296 		 * Initial values of 0xffffffff are invalid and imply that an
297 		 * update is required (and should be ignored by an attempted
298 		 * read)
299 		 */
300 		struct {
301 			u32 offset;
302 		} tails[2];
303 
304 		/**
305 		 * @aged_tail_idx: Index for the aged tail ready to read() data up to.
306 		 */
307 		unsigned int aged_tail_idx;
308 
309 		/**
310 		 * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer
311 		 * was read; used to determine when it is old enough to trust.
312 		 */
313 		u64 aging_timestamp;
314 
315 		/**
316 		 * @head: Although we can always read back the head pointer register,
317 		 * we prefer to avoid trusting the HW state, just to avoid any
318 		 * risk that some hardware condition could * somehow bump the
319 		 * head pointer unpredictably and cause us to forward the wrong
320 		 * OA buffer data to userspace.
321 		 */
322 		u32 head;
323 	} oa_buffer;
324 
325 	/**
326 	 * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be
327 	 * reprogrammed.
328 	 */
329 	struct i915_vma *noa_wait;
330 };
331 
332 /**
333  * struct i915_oa_ops - Gen specific implementation of an OA unit stream
334  */
335 struct i915_oa_ops {
336 	/**
337 	 * @is_valid_b_counter_reg: Validates register's address for
338 	 * programming boolean counters for a particular platform.
339 	 */
340 	bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr);
341 
342 	/**
343 	 * @is_valid_mux_reg: Validates register's address for programming mux
344 	 * for a particular platform.
345 	 */
346 	bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr);
347 
348 	/**
349 	 * @is_valid_flex_reg: Validates register's address for programming
350 	 * flex EU filtering for a particular platform.
351 	 */
352 	bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr);
353 
354 	/**
355 	 * @enable_metric_set: Selects and applies any MUX configuration to set
356 	 * up the Boolean and Custom (B/C) counters that are part of the
357 	 * counter reports being sampled. May apply system constraints such as
358 	 * disabling EU clock gating as required.
359 	 */
360 	int (*enable_metric_set)(struct i915_perf_stream *stream);
361 
362 	/**
363 	 * @disable_metric_set: Remove system constraints associated with using
364 	 * the OA unit.
365 	 */
366 	void (*disable_metric_set)(struct i915_perf_stream *stream);
367 
368 	/**
369 	 * @oa_enable: Enable periodic sampling
370 	 */
371 	void (*oa_enable)(struct i915_perf_stream *stream);
372 
373 	/**
374 	 * @oa_disable: Disable periodic sampling
375 	 */
376 	void (*oa_disable)(struct i915_perf_stream *stream);
377 
378 	/**
379 	 * @read: Copy data from the circular OA buffer into a given userspace
380 	 * buffer.
381 	 */
382 #ifdef __NetBSD__
383 	int (*read)(struct i915_perf_stream *stream,
384 		    struct uio *buf,
385 		    kauth_cred_t count, /* XXX dummy */
386 		    int offset);	/* XXX dummy */
387 #else
388 	int (*read)(struct i915_perf_stream *stream,
389 		    char __user *buf,
390 		    size_t count,
391 		    size_t *offset);
392 #endif
393 
394 	/**
395 	 * @oa_hw_tail_read: read the OA tail pointer register
396 	 *
397 	 * In particular this enables us to share all the fiddly code for
398 	 * handling the OA unit tail pointer race that affects multiple
399 	 * generations.
400 	 */
401 	u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
402 };
403 
404 struct i915_perf {
405 	struct drm_i915_private *i915;
406 
407 	struct kobject *metrics_kobj;
408 
409 	/*
410 	 * Lock associated with adding/modifying/removing OA configs
411 	 * in perf->metrics_idr.
412 	 */
413 	struct mutex metrics_lock;
414 
415 	/*
416 	 * List of dynamic configurations (struct i915_oa_config), you
417 	 * need to hold perf->metrics_lock to access it.
418 	 */
419 	struct idr metrics_idr;
420 
421 	/*
422 	 * Lock associated with anything below within this structure
423 	 * except exclusive_stream.
424 	 */
425 	struct mutex lock;
426 
427 	/*
428 	 * The stream currently using the OA unit. If accessed
429 	 * outside a syscall associated to its file
430 	 * descriptor.
431 	 */
432 	struct i915_perf_stream *exclusive_stream;
433 
434 	/**
435 	 * For rate limiting any notifications of spurious
436 	 * invalid OA reports
437 	 */
438 	struct ratelimit_state spurious_report_rs;
439 
440 	struct i915_oa_config test_config;
441 
442 	u32 gen7_latched_oastatus1;
443 	u32 ctx_oactxctrl_offset;
444 	u32 ctx_flexeu0_offset;
445 
446 	/**
447 	 * The RPT_ID/reason field for Gen8+ includes a bit
448 	 * to determine if the CTX ID in the report is valid
449 	 * but the specific bit differs between Gen 8 and 9
450 	 */
451 	u32 gen8_valid_ctx_bit;
452 
453 	struct i915_oa_ops ops;
454 	const struct i915_oa_format *oa_formats;
455 
456 	atomic64_t noa_programming_delay;
457 };
458 
459 #endif /* _I915_PERF_TYPES_H_ */
460