1 /* SPDX-License-Identifier: MIT */ 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #ifndef _I915_PERF_TYPES_H_ 7 #define _I915_PERF_TYPES_H_ 8 9 #include <linux/atomic.h> 10 #include <linux/device.h> 11 #include <linux/hrtimer.h> 12 #include <linux/llist.h> 13 #include <linux/poll.h> 14 #include <linux/sysfs.h> 15 #include <linux/types.h> 16 #include <linux/uuid.h> 17 #include <linux/wait.h> 18 19 #include "i915_reg.h" 20 #include "intel_wakeref.h" 21 22 struct drm_i915_private; 23 struct file; 24 struct i915_gem_context; 25 struct i915_perf; 26 struct i915_vma; 27 struct intel_context; 28 struct intel_engine_cs; 29 30 struct i915_oa_format { 31 u32 format; 32 int size; 33 }; 34 35 struct i915_oa_reg { 36 i915_reg_t addr; 37 u32 value; 38 }; 39 40 struct i915_oa_config { 41 struct i915_perf *perf; 42 43 char uuid[UUID_STRING_LEN + 1]; 44 int id; 45 46 const struct i915_oa_reg *mux_regs; 47 u32 mux_regs_len; 48 const struct i915_oa_reg *b_counter_regs; 49 u32 b_counter_regs_len; 50 const struct i915_oa_reg *flex_regs; 51 u32 flex_regs_len; 52 53 struct attribute_group sysfs_metric; 54 struct attribute *attrs[2]; 55 struct device_attribute sysfs_metric_id; 56 57 struct kref ref; 58 struct rcu_head rcu; 59 }; 60 61 struct i915_perf_stream; 62 63 /** 64 * struct i915_perf_stream_ops - the OPs to support a specific stream type 65 */ 66 struct i915_perf_stream_ops { 67 /** 68 * @enable: Enables the collection of HW samples, either in response to 69 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened 70 * without `I915_PERF_FLAG_DISABLED`. 71 */ 72 void (*enable)(struct i915_perf_stream *stream); 73 74 /** 75 * @disable: Disables the collection of HW samples, either in response 76 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying 77 * the stream. 78 */ 79 void (*disable)(struct i915_perf_stream *stream); 80 81 /** 82 * @poll_wait: Call poll_wait, passing a wait queue that will be woken 83 * once there is something ready to read() for the stream 84 */ 85 void (*poll_wait)(struct i915_perf_stream *stream, 86 struct file *file, 87 poll_table *wait); 88 89 /** 90 * @wait_unlocked: For handling a blocking read, wait until there is 91 * something to ready to read() for the stream. E.g. wait on the same 92 * wait queue that would be passed to poll_wait(). 93 */ 94 int (*wait_unlocked)(struct i915_perf_stream *stream); 95 96 /** 97 * @read: Copy buffered metrics as records to userspace 98 * **buf**: the userspace, destination buffer 99 * **count**: the number of bytes to copy, requested by userspace 100 * **offset**: zero at the start of the read, updated as the read 101 * proceeds, it represents how many bytes have been copied so far and 102 * the buffer offset for copying the next record. 103 * 104 * Copy as many buffered i915 perf samples and records for this stream 105 * to userspace as will fit in the given buffer. 106 * 107 * Only write complete records; returning -%ENOSPC if there isn't room 108 * for a complete record. 109 * 110 * Return any error condition that results in a short read such as 111 * -%ENOSPC or -%EFAULT, even though these may be squashed before 112 * returning to userspace. 113 */ 114 int (*read)(struct i915_perf_stream *stream, 115 char __user *buf, 116 size_t count, 117 size_t *offset); 118 119 /** 120 * @destroy: Cleanup any stream specific resources. 121 * 122 * The stream will always be disabled before this is called. 123 */ 124 void (*destroy)(struct i915_perf_stream *stream); 125 }; 126 127 /** 128 * struct i915_perf_stream - state for a single open stream FD 129 */ 130 struct i915_perf_stream { 131 /** 132 * @perf: i915_perf backpointer 133 */ 134 struct i915_perf *perf; 135 136 /** 137 * @uncore: mmio access path 138 */ 139 struct intel_uncore *uncore; 140 141 /** 142 * @engine: Engine associated with this performance stream. 143 */ 144 struct intel_engine_cs *engine; 145 146 /** 147 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` 148 * properties given when opening a stream, representing the contents 149 * of a single sample as read() by userspace. 150 */ 151 u32 sample_flags; 152 153 /** 154 * @sample_size: Considering the configured contents of a sample 155 * combined with the required header size, this is the total size 156 * of a single sample record. 157 */ 158 int sample_size; 159 160 /** 161 * @ctx: %NULL if measuring system-wide across all contexts or a 162 * specific context that is being monitored. 163 */ 164 struct i915_gem_context *ctx; 165 166 /** 167 * @enabled: Whether the stream is currently enabled, considering 168 * whether the stream was opened in a disabled state and based 169 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls. 170 */ 171 bool enabled; 172 173 /** 174 * @hold_preemption: Whether preemption is put on hold for command 175 * submissions done on the @ctx. This is useful for some drivers that 176 * cannot easily post process the OA buffer context to subtract delta 177 * of performance counters not associated with @ctx. 178 */ 179 bool hold_preemption; 180 181 /** 182 * @ops: The callbacks providing the implementation of this specific 183 * type of configured stream. 184 */ 185 const struct i915_perf_stream_ops *ops; 186 187 /** 188 * @oa_config: The OA configuration used by the stream. 189 */ 190 struct i915_oa_config *oa_config; 191 192 /** 193 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily 194 * each time @oa_config changes. 195 */ 196 struct llist_head oa_config_bos; 197 198 /** 199 * @pinned_ctx: The OA context specific information. 200 */ 201 struct intel_context *pinned_ctx; 202 203 /** 204 * @specific_ctx_id: The id of the specific context. 205 */ 206 u32 specific_ctx_id; 207 208 /** 209 * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits. 210 */ 211 u32 specific_ctx_id_mask; 212 213 /** 214 * @poll_check_timer: High resolution timer that will periodically 215 * check for data in the circular OA buffer for notifying userspace 216 * (e.g. during a read() or poll()). 217 */ 218 struct hrtimer poll_check_timer; 219 220 /** 221 * @poll_wq: The wait queue that hrtimer callback wakes when it 222 * sees data ready to read in the circular OA buffer. 223 */ 224 wait_queue_head_t poll_wq; 225 226 /** 227 * @pollin: Whether there is data available to read. 228 */ 229 bool pollin; 230 231 /** 232 * @periodic: Whether periodic sampling is currently enabled. 233 */ 234 bool periodic; 235 236 /** 237 * @period_exponent: The OA unit sampling frequency is derived from this. 238 */ 239 int period_exponent; 240 241 /** 242 * @oa_buffer: State of the OA buffer. 243 */ 244 struct { 245 struct i915_vma *vma; 246 u8 *vaddr; 247 u32 last_ctx_id; 248 int format; 249 int format_size; 250 int size_exponent; 251 252 /** 253 * @ptr_lock: Locks reads and writes to all head/tail state 254 * 255 * Consider: the head and tail pointer state needs to be read 256 * consistently from a hrtimer callback (atomic context) and 257 * read() fop (user context) with tail pointer updates happening 258 * in atomic context and head updates in user context and the 259 * (unlikely) possibility of read() errors needing to reset all 260 * head/tail state. 261 * 262 * Note: Contention/performance aren't currently a significant 263 * concern here considering the relatively low frequency of 264 * hrtimer callbacks (5ms period) and that reads typically only 265 * happen in response to a hrtimer event and likely complete 266 * before the next callback. 267 * 268 * Note: This lock is not held *while* reading and copying data 269 * to userspace so the value of head observed in htrimer 270 * callbacks won't represent any partial consumption of data. 271 */ 272 spinlock_t ptr_lock; 273 274 /** 275 * @tails: One 'aging' tail pointer and one 'aged' tail pointer ready to 276 * used for reading. 277 * 278 * Initial values of 0xffffffff are invalid and imply that an 279 * update is required (and should be ignored by an attempted 280 * read) 281 */ 282 struct { 283 u32 offset; 284 } tails[2]; 285 286 /** 287 * @aged_tail_idx: Index for the aged tail ready to read() data up to. 288 */ 289 unsigned int aged_tail_idx; 290 291 /** 292 * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer 293 * was read; used to determine when it is old enough to trust. 294 */ 295 u64 aging_timestamp; 296 297 /** 298 * @head: Although we can always read back the head pointer register, 299 * we prefer to avoid trusting the HW state, just to avoid any 300 * risk that some hardware condition could * somehow bump the 301 * head pointer unpredictably and cause us to forward the wrong 302 * OA buffer data to userspace. 303 */ 304 u32 head; 305 } oa_buffer; 306 307 /** 308 * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be 309 * reprogrammed. 310 */ 311 struct i915_vma *noa_wait; 312 }; 313 314 /** 315 * struct i915_oa_ops - Gen specific implementation of an OA unit stream 316 */ 317 struct i915_oa_ops { 318 /** 319 * @is_valid_b_counter_reg: Validates register's address for 320 * programming boolean counters for a particular platform. 321 */ 322 bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr); 323 324 /** 325 * @is_valid_mux_reg: Validates register's address for programming mux 326 * for a particular platform. 327 */ 328 bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr); 329 330 /** 331 * @is_valid_flex_reg: Validates register's address for programming 332 * flex EU filtering for a particular platform. 333 */ 334 bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr); 335 336 /** 337 * @enable_metric_set: Selects and applies any MUX configuration to set 338 * up the Boolean and Custom (B/C) counters that are part of the 339 * counter reports being sampled. May apply system constraints such as 340 * disabling EU clock gating as required. 341 */ 342 int (*enable_metric_set)(struct i915_perf_stream *stream); 343 344 /** 345 * @disable_metric_set: Remove system constraints associated with using 346 * the OA unit. 347 */ 348 void (*disable_metric_set)(struct i915_perf_stream *stream); 349 350 /** 351 * @oa_enable: Enable periodic sampling 352 */ 353 void (*oa_enable)(struct i915_perf_stream *stream); 354 355 /** 356 * @oa_disable: Disable periodic sampling 357 */ 358 void (*oa_disable)(struct i915_perf_stream *stream); 359 360 /** 361 * @read: Copy data from the circular OA buffer into a given userspace 362 * buffer. 363 */ 364 int (*read)(struct i915_perf_stream *stream, 365 char __user *buf, 366 size_t count, 367 size_t *offset); 368 369 /** 370 * @oa_hw_tail_read: read the OA tail pointer register 371 * 372 * In particular this enables us to share all the fiddly code for 373 * handling the OA unit tail pointer race that affects multiple 374 * generations. 375 */ 376 u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream); 377 }; 378 379 struct i915_perf { 380 struct drm_i915_private *i915; 381 382 struct kobject *metrics_kobj; 383 384 /* 385 * Lock associated with adding/modifying/removing OA configs 386 * in perf->metrics_idr. 387 */ 388 struct mutex metrics_lock; 389 390 /* 391 * List of dynamic configurations (struct i915_oa_config), you 392 * need to hold perf->metrics_lock to access it. 393 */ 394 struct idr metrics_idr; 395 396 /* 397 * Lock associated with anything below within this structure 398 * except exclusive_stream. 399 */ 400 struct mutex lock; 401 402 /* 403 * The stream currently using the OA unit. If accessed 404 * outside a syscall associated to its file 405 * descriptor. 406 */ 407 struct i915_perf_stream *exclusive_stream; 408 409 /** 410 * For rate limiting any notifications of spurious 411 * invalid OA reports 412 */ 413 struct ratelimit_state spurious_report_rs; 414 415 struct i915_oa_config test_config; 416 417 u32 gen7_latched_oastatus1; 418 u32 ctx_oactxctrl_offset; 419 u32 ctx_flexeu0_offset; 420 421 /** 422 * The RPT_ID/reason field for Gen8+ includes a bit 423 * to determine if the CTX ID in the report is valid 424 * but the specific bit differs between Gen 8 and 9 425 */ 426 u32 gen8_valid_ctx_bit; 427 428 struct i915_oa_ops ops; 429 const struct i915_oa_format *oa_formats; 430 431 atomic64_t noa_programming_delay; 432 }; 433 434 #endif /* _I915_PERF_TYPES_H_ */ 435