1 /* SPDX-License-Identifier: MIT */
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5 
6 #ifndef _I915_PERF_TYPES_H_
7 #define _I915_PERF_TYPES_H_
8 
9 #include <linux/atomic.h>
10 #include <linux/device.h>
11 #include <linux/hrtimer.h>
12 #include <linux/llist.h>
13 #include <linux/poll.h>
14 #include <linux/sysfs.h>
15 #include <linux/types.h>
16 #include <linux/uuid.h>
17 #include <linux/wait.h>
18 #include <uapi/drm/i915_drm.h>
19 
20 #include "gt/intel_sseu.h"
21 #include "i915_reg.h"
22 #include "intel_wakeref.h"
23 
24 struct drm_i915_private;
25 struct file;
26 struct i915_active;
27 struct i915_gem_context;
28 struct i915_perf;
29 struct i915_vma;
30 struct intel_context;
31 struct intel_engine_cs;
32 
33 struct i915_oa_format {
34 	u32 format;
35 	int size;
36 };
37 
38 struct i915_oa_reg {
39 	i915_reg_t addr;
40 	u32 value;
41 };
42 
43 struct i915_oa_config {
44 	struct i915_perf *perf;
45 
46 	char uuid[UUID_STRING_LEN + 1];
47 	int id;
48 
49 	const struct i915_oa_reg *mux_regs;
50 	u32 mux_regs_len;
51 	const struct i915_oa_reg *b_counter_regs;
52 	u32 b_counter_regs_len;
53 	const struct i915_oa_reg *flex_regs;
54 	u32 flex_regs_len;
55 
56 	struct attribute_group sysfs_metric;
57 	struct attribute *attrs[2];
58 	struct device_attribute sysfs_metric_id;
59 
60 	struct kref ref;
61 	struct rcu_head rcu;
62 };
63 
64 struct i915_perf_stream;
65 
66 /**
67  * struct i915_perf_stream_ops - the OPs to support a specific stream type
68  */
69 struct i915_perf_stream_ops {
70 	/**
71 	 * @enable: Enables the collection of HW samples, either in response to
72 	 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
73 	 * without `I915_PERF_FLAG_DISABLED`.
74 	 */
75 	void (*enable)(struct i915_perf_stream *stream);
76 
77 	/**
78 	 * @disable: Disables the collection of HW samples, either in response
79 	 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
80 	 * the stream.
81 	 */
82 	void (*disable)(struct i915_perf_stream *stream);
83 
84 	/**
85 	 * @poll_wait: Call poll_wait, passing a wait queue that will be woken
86 	 * once there is something ready to read() for the stream
87 	 */
88 	void (*poll_wait)(struct i915_perf_stream *stream,
89 			  struct file *file,
90 			  poll_table *wait);
91 
92 	/**
93 	 * @wait_unlocked: For handling a blocking read, wait until there is
94 	 * something to ready to read() for the stream. E.g. wait on the same
95 	 * wait queue that would be passed to poll_wait().
96 	 */
97 	int (*wait_unlocked)(struct i915_perf_stream *stream);
98 
99 	/**
100 	 * @read: Copy buffered metrics as records to userspace
101 	 * **buf**: the userspace, destination buffer
102 	 * **count**: the number of bytes to copy, requested by userspace
103 	 * **offset**: zero at the start of the read, updated as the read
104 	 * proceeds, it represents how many bytes have been copied so far and
105 	 * the buffer offset for copying the next record.
106 	 *
107 	 * Copy as many buffered i915 perf samples and records for this stream
108 	 * to userspace as will fit in the given buffer.
109 	 *
110 	 * Only write complete records; returning -%ENOSPC if there isn't room
111 	 * for a complete record.
112 	 *
113 	 * Return any error condition that results in a short read such as
114 	 * -%ENOSPC or -%EFAULT, even though these may be squashed before
115 	 * returning to userspace.
116 	 */
117 	int (*read)(struct i915_perf_stream *stream,
118 		    char __user *buf,
119 		    size_t count,
120 		    size_t *offset);
121 
122 	/**
123 	 * @destroy: Cleanup any stream specific resources.
124 	 *
125 	 * The stream will always be disabled before this is called.
126 	 */
127 	void (*destroy)(struct i915_perf_stream *stream);
128 };
129 
130 /**
131  * struct i915_perf_stream - state for a single open stream FD
132  */
133 struct i915_perf_stream {
134 	/**
135 	 * @perf: i915_perf backpointer
136 	 */
137 	struct i915_perf *perf;
138 
139 	/**
140 	 * @uncore: mmio access path
141 	 */
142 	struct intel_uncore *uncore;
143 
144 	/**
145 	 * @engine: Engine associated with this performance stream.
146 	 */
147 	struct intel_engine_cs *engine;
148 
149 	/**
150 	 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
151 	 * properties given when opening a stream, representing the contents
152 	 * of a single sample as read() by userspace.
153 	 */
154 	u32 sample_flags;
155 
156 	/**
157 	 * @sample_size: Considering the configured contents of a sample
158 	 * combined with the required header size, this is the total size
159 	 * of a single sample record.
160 	 */
161 	int sample_size;
162 
163 	/**
164 	 * @ctx: %NULL if measuring system-wide across all contexts or a
165 	 * specific context that is being monitored.
166 	 */
167 	struct i915_gem_context *ctx;
168 
169 	/**
170 	 * @enabled: Whether the stream is currently enabled, considering
171 	 * whether the stream was opened in a disabled state and based
172 	 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
173 	 */
174 	bool enabled;
175 
176 	/**
177 	 * @hold_preemption: Whether preemption is put on hold for command
178 	 * submissions done on the @ctx. This is useful for some drivers that
179 	 * cannot easily post process the OA buffer context to subtract delta
180 	 * of performance counters not associated with @ctx.
181 	 */
182 	bool hold_preemption;
183 
184 	/**
185 	 * @ops: The callbacks providing the implementation of this specific
186 	 * type of configured stream.
187 	 */
188 	const struct i915_perf_stream_ops *ops;
189 
190 	/**
191 	 * @oa_config: The OA configuration used by the stream.
192 	 */
193 	struct i915_oa_config *oa_config;
194 
195 	/**
196 	 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
197 	 * each time @oa_config changes.
198 	 */
199 	struct llist_head oa_config_bos;
200 
201 	/**
202 	 * @pinned_ctx: The OA context specific information.
203 	 */
204 	struct intel_context *pinned_ctx;
205 
206 	/**
207 	 * @specific_ctx_id: The id of the specific context.
208 	 */
209 	u32 specific_ctx_id;
210 
211 	/**
212 	 * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits.
213 	 */
214 	u32 specific_ctx_id_mask;
215 
216 	/**
217 	 * @poll_check_timer: High resolution timer that will periodically
218 	 * check for data in the circular OA buffer for notifying userspace
219 	 * (e.g. during a read() or poll()).
220 	 */
221 	struct hrtimer poll_check_timer;
222 
223 	/**
224 	 * @poll_wq: The wait queue that hrtimer callback wakes when it
225 	 * sees data ready to read in the circular OA buffer.
226 	 */
227 	wait_queue_head_t poll_wq;
228 
229 	/**
230 	 * @pollin: Whether there is data available to read.
231 	 */
232 	bool pollin;
233 
234 	/**
235 	 * @periodic: Whether periodic sampling is currently enabled.
236 	 */
237 	bool periodic;
238 
239 	/**
240 	 * @period_exponent: The OA unit sampling frequency is derived from this.
241 	 */
242 	int period_exponent;
243 
244 	/**
245 	 * @oa_buffer: State of the OA buffer.
246 	 */
247 	struct {
248 		struct i915_vma *vma;
249 		u8 *vaddr;
250 		u32 last_ctx_id;
251 		int format;
252 		int format_size;
253 		int size_exponent;
254 
255 		/**
256 		 * @ptr_lock: Locks reads and writes to all head/tail state
257 		 *
258 		 * Consider: the head and tail pointer state needs to be read
259 		 * consistently from a hrtimer callback (atomic context) and
260 		 * read() fop (user context) with tail pointer updates happening
261 		 * in atomic context and head updates in user context and the
262 		 * (unlikely) possibility of read() errors needing to reset all
263 		 * head/tail state.
264 		 *
265 		 * Note: Contention/performance aren't currently a significant
266 		 * concern here considering the relatively low frequency of
267 		 * hrtimer callbacks (5ms period) and that reads typically only
268 		 * happen in response to a hrtimer event and likely complete
269 		 * before the next callback.
270 		 *
271 		 * Note: This lock is not held *while* reading and copying data
272 		 * to userspace so the value of head observed in htrimer
273 		 * callbacks won't represent any partial consumption of data.
274 		 */
275 		spinlock_t ptr_lock;
276 
277 		/**
278 		 * @aging_tail: The last HW tail reported by HW. The data
279 		 * might not have made it to memory yet though.
280 		 */
281 		u32 aging_tail;
282 
283 		/**
284 		 * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer
285 		 * was read; used to determine when it is old enough to trust.
286 		 */
287 		u64 aging_timestamp;
288 
289 		/**
290 		 * @head: Although we can always read back the head pointer register,
291 		 * we prefer to avoid trusting the HW state, just to avoid any
292 		 * risk that some hardware condition could * somehow bump the
293 		 * head pointer unpredictably and cause us to forward the wrong
294 		 * OA buffer data to userspace.
295 		 */
296 		u32 head;
297 
298 		/**
299 		 * @tail: The last verified tail that can be read by userspace.
300 		 */
301 		u32 tail;
302 	} oa_buffer;
303 
304 	/**
305 	 * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be
306 	 * reprogrammed.
307 	 */
308 	struct i915_vma *noa_wait;
309 
310 	/**
311 	 * @poll_oa_period: The period in nanoseconds at which the OA
312 	 * buffer should be checked for available data.
313 	 */
314 	u64 poll_oa_period;
315 };
316 
317 /**
318  * struct i915_oa_ops - Gen specific implementation of an OA unit stream
319  */
320 struct i915_oa_ops {
321 	/**
322 	 * @is_valid_b_counter_reg: Validates register's address for
323 	 * programming boolean counters for a particular platform.
324 	 */
325 	bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr);
326 
327 	/**
328 	 * @is_valid_mux_reg: Validates register's address for programming mux
329 	 * for a particular platform.
330 	 */
331 	bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr);
332 
333 	/**
334 	 * @is_valid_flex_reg: Validates register's address for programming
335 	 * flex EU filtering for a particular platform.
336 	 */
337 	bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr);
338 
339 	/**
340 	 * @enable_metric_set: Selects and applies any MUX configuration to set
341 	 * up the Boolean and Custom (B/C) counters that are part of the
342 	 * counter reports being sampled. May apply system constraints such as
343 	 * disabling EU clock gating as required.
344 	 */
345 	int (*enable_metric_set)(struct i915_perf_stream *stream,
346 				 struct i915_active *active);
347 
348 	/**
349 	 * @disable_metric_set: Remove system constraints associated with using
350 	 * the OA unit.
351 	 */
352 	void (*disable_metric_set)(struct i915_perf_stream *stream);
353 
354 	/**
355 	 * @oa_enable: Enable periodic sampling
356 	 */
357 	void (*oa_enable)(struct i915_perf_stream *stream);
358 
359 	/**
360 	 * @oa_disable: Disable periodic sampling
361 	 */
362 	void (*oa_disable)(struct i915_perf_stream *stream);
363 
364 	/**
365 	 * @read: Copy data from the circular OA buffer into a given userspace
366 	 * buffer.
367 	 */
368 	int (*read)(struct i915_perf_stream *stream,
369 		    char __user *buf,
370 		    size_t count,
371 		    size_t *offset);
372 
373 	/**
374 	 * @oa_hw_tail_read: read the OA tail pointer register
375 	 *
376 	 * In particular this enables us to share all the fiddly code for
377 	 * handling the OA unit tail pointer race that affects multiple
378 	 * generations.
379 	 */
380 	u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
381 };
382 
383 struct i915_perf {
384 	struct drm_i915_private *i915;
385 
386 	struct kobject *metrics_kobj;
387 
388 	/*
389 	 * Lock associated with adding/modifying/removing OA configs
390 	 * in perf->metrics_idr.
391 	 */
392 	struct mutex metrics_lock;
393 
394 	/*
395 	 * List of dynamic configurations (struct i915_oa_config), you
396 	 * need to hold perf->metrics_lock to access it.
397 	 */
398 	struct idr metrics_idr;
399 
400 	/*
401 	 * Lock associated with anything below within this structure
402 	 * except exclusive_stream.
403 	 */
404 	struct mutex lock;
405 
406 	/*
407 	 * The stream currently using the OA unit. If accessed
408 	 * outside a syscall associated to its file
409 	 * descriptor.
410 	 */
411 	struct i915_perf_stream *exclusive_stream;
412 
413 	/**
414 	 * @sseu: sseu configuration selected to run while perf is active,
415 	 * applies to all contexts.
416 	 */
417 	struct intel_sseu sseu;
418 
419 	/**
420 	 * For rate limiting any notifications of spurious
421 	 * invalid OA reports
422 	 */
423 	struct ratelimit_state spurious_report_rs;
424 
425 	/**
426 	 * For rate limiting any notifications of tail pointer
427 	 * race.
428 	 */
429 	struct ratelimit_state tail_pointer_race;
430 
431 	u32 gen7_latched_oastatus1;
432 	u32 ctx_oactxctrl_offset;
433 	u32 ctx_flexeu0_offset;
434 
435 	/**
436 	 * The RPT_ID/reason field for Gen8+ includes a bit
437 	 * to determine if the CTX ID in the report is valid
438 	 * but the specific bit differs between Gen 8 and 9
439 	 */
440 	u32 gen8_valid_ctx_bit;
441 
442 	struct i915_oa_ops ops;
443 	const struct i915_oa_format *oa_formats;
444 
445 	/**
446 	 * Use a format mask to store the supported formats
447 	 * for a platform.
448 	 */
449 #define FORMAT_MASK_SIZE DIV_ROUND_UP(I915_OA_FORMAT_MAX - 1, BITS_PER_LONG)
450 	unsigned long format_mask[FORMAT_MASK_SIZE];
451 
452 	atomic64_t noa_programming_delay;
453 };
454 
455 #endif /* _I915_PERF_TYPES_H_ */
456