1 /* SPDX-License-Identifier: MIT */ 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #ifndef _I915_PERF_TYPES_H_ 7 #define _I915_PERF_TYPES_H_ 8 9 #include <linux/atomic.h> 10 #include <linux/device.h> 11 #include <linux/hrtimer.h> 12 #include <linux/llist.h> 13 #include <linux/poll.h> 14 #include <linux/sysfs.h> 15 #include <linux/types.h> 16 #include <linux/uuid.h> 17 #include <linux/wait.h> 18 #include <uapi/drm/i915_drm.h> 19 20 #include "gt/intel_engine_types.h" 21 #include "gt/intel_sseu.h" 22 #include "i915_reg_defs.h" 23 #include "intel_uncore.h" 24 #include "intel_wakeref.h" 25 26 struct drm_i915_private; 27 struct file; 28 struct i915_active; 29 struct i915_gem_context; 30 struct i915_perf; 31 struct i915_vma; 32 struct intel_context; 33 struct intel_engine_cs; 34 35 enum { 36 PERF_GROUP_OAG = 0, 37 PERF_GROUP_OAM_SAMEDIA_0 = 0, 38 39 PERF_GROUP_MAX, 40 PERF_GROUP_INVALID = U32_MAX, 41 }; 42 43 enum report_header { 44 HDR_32_BIT = 0, 45 HDR_64_BIT, 46 }; 47 48 struct i915_perf_regs { 49 u32 base; 50 i915_reg_t oa_head_ptr; 51 i915_reg_t oa_tail_ptr; 52 i915_reg_t oa_buffer; 53 i915_reg_t oa_ctx_ctrl; 54 i915_reg_t oa_ctrl; 55 i915_reg_t oa_debug; 56 i915_reg_t oa_status; 57 u32 oa_ctrl_counter_format_shift; 58 }; 59 60 enum oa_type { 61 TYPE_OAG, 62 TYPE_OAM, 63 }; 64 65 struct i915_oa_format { 66 u32 format; 67 int size; 68 int type; 69 enum report_header header; 70 }; 71 72 struct i915_oa_reg { 73 i915_reg_t addr; 74 u32 value; 75 }; 76 77 struct i915_oa_config { 78 struct i915_perf *perf; 79 80 char uuid[UUID_STRING_LEN + 1]; 81 int id; 82 83 const struct i915_oa_reg *mux_regs; 84 u32 mux_regs_len; 85 const struct i915_oa_reg *b_counter_regs; 86 u32 b_counter_regs_len; 87 const struct i915_oa_reg *flex_regs; 88 u32 flex_regs_len; 89 90 struct attribute_group sysfs_metric; 91 struct attribute *attrs[2]; 92 struct kobj_attribute sysfs_metric_id; 93 94 struct kref ref; 95 struct rcu_head rcu; 96 }; 97 98 struct i915_perf_stream; 99 100 /** 101 * struct i915_perf_stream_ops - the OPs to support a specific stream type 102 */ 103 struct i915_perf_stream_ops { 104 /** 105 * @enable: Enables the collection of HW samples, either in response to 106 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened 107 * without `I915_PERF_FLAG_DISABLED`. 108 */ 109 void (*enable)(struct i915_perf_stream *stream); 110 111 /** 112 * @disable: Disables the collection of HW samples, either in response 113 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying 114 * the stream. 115 */ 116 void (*disable)(struct i915_perf_stream *stream); 117 118 /** 119 * @poll_wait: Call poll_wait, passing a wait queue that will be woken 120 * once there is something ready to read() for the stream 121 */ 122 #ifdef notyet 123 void (*poll_wait)(struct i915_perf_stream *stream, 124 struct file *file, 125 poll_table *wait); 126 #endif 127 128 /** 129 * @wait_unlocked: For handling a blocking read, wait until there is 130 * something to ready to read() for the stream. E.g. wait on the same 131 * wait queue that would be passed to poll_wait(). 132 */ 133 int (*wait_unlocked)(struct i915_perf_stream *stream); 134 135 /** 136 * @read: Copy buffered metrics as records to userspace 137 * **buf**: the userspace, destination buffer 138 * **count**: the number of bytes to copy, requested by userspace 139 * **offset**: zero at the start of the read, updated as the read 140 * proceeds, it represents how many bytes have been copied so far and 141 * the buffer offset for copying the next record. 142 * 143 * Copy as many buffered i915 perf samples and records for this stream 144 * to userspace as will fit in the given buffer. 145 * 146 * Only write complete records; returning -%ENOSPC if there isn't room 147 * for a complete record. 148 * 149 * Return any error condition that results in a short read such as 150 * -%ENOSPC or -%EFAULT, even though these may be squashed before 151 * returning to userspace. 152 */ 153 int (*read)(struct i915_perf_stream *stream, 154 char __user *buf, 155 size_t count, 156 size_t *offset); 157 158 /** 159 * @destroy: Cleanup any stream specific resources. 160 * 161 * The stream will always be disabled before this is called. 162 */ 163 void (*destroy)(struct i915_perf_stream *stream); 164 }; 165 166 /** 167 * struct i915_perf_stream - state for a single open stream FD 168 */ 169 struct i915_perf_stream { 170 /** 171 * @perf: i915_perf backpointer 172 */ 173 struct i915_perf *perf; 174 175 /** 176 * @uncore: mmio access path 177 */ 178 struct intel_uncore *uncore; 179 180 /** 181 * @engine: Engine associated with this performance stream. 182 */ 183 struct intel_engine_cs *engine; 184 185 /** 186 * @lock: Lock associated with operations on stream 187 */ 188 struct rwlock lock; 189 190 /** 191 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` 192 * properties given when opening a stream, representing the contents 193 * of a single sample as read() by userspace. 194 */ 195 u32 sample_flags; 196 197 /** 198 * @sample_size: Considering the configured contents of a sample 199 * combined with the required header size, this is the total size 200 * of a single sample record. 201 */ 202 int sample_size; 203 204 /** 205 * @ctx: %NULL if measuring system-wide across all contexts or a 206 * specific context that is being monitored. 207 */ 208 struct i915_gem_context *ctx; 209 210 /** 211 * @enabled: Whether the stream is currently enabled, considering 212 * whether the stream was opened in a disabled state and based 213 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls. 214 */ 215 bool enabled; 216 217 /** 218 * @hold_preemption: Whether preemption is put on hold for command 219 * submissions done on the @ctx. This is useful for some drivers that 220 * cannot easily post process the OA buffer context to subtract delta 221 * of performance counters not associated with @ctx. 222 */ 223 bool hold_preemption; 224 225 /** 226 * @ops: The callbacks providing the implementation of this specific 227 * type of configured stream. 228 */ 229 const struct i915_perf_stream_ops *ops; 230 231 /** 232 * @oa_config: The OA configuration used by the stream. 233 */ 234 struct i915_oa_config *oa_config; 235 236 /** 237 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily 238 * each time @oa_config changes. 239 */ 240 struct llist_head oa_config_bos; 241 242 /** 243 * @pinned_ctx: The OA context specific information. 244 */ 245 struct intel_context *pinned_ctx; 246 247 /** 248 * @specific_ctx_id: The id of the specific context. 249 */ 250 u32 specific_ctx_id; 251 252 /** 253 * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits. 254 */ 255 u32 specific_ctx_id_mask; 256 257 /** 258 * @poll_check_timer: High resolution timer that will periodically 259 * check for data in the circular OA buffer for notifying userspace 260 * (e.g. during a read() or poll()). 261 */ 262 struct hrtimer poll_check_timer; 263 264 /** 265 * @poll_wq: The wait queue that hrtimer callback wakes when it 266 * sees data ready to read in the circular OA buffer. 267 */ 268 wait_queue_head_t poll_wq; 269 270 /** 271 * @pollin: Whether there is data available to read. 272 */ 273 bool pollin; 274 275 /** 276 * @periodic: Whether periodic sampling is currently enabled. 277 */ 278 bool periodic; 279 280 /** 281 * @period_exponent: The OA unit sampling frequency is derived from this. 282 */ 283 int period_exponent; 284 285 /** 286 * @oa_buffer: State of the OA buffer. 287 */ 288 struct { 289 const struct i915_oa_format *format; 290 struct i915_vma *vma; 291 u8 *vaddr; 292 u32 last_ctx_id; 293 int size_exponent; 294 295 /** 296 * @ptr_lock: Locks reads and writes to all head/tail state 297 * 298 * Consider: the head and tail pointer state needs to be read 299 * consistently from a hrtimer callback (atomic context) and 300 * read() fop (user context) with tail pointer updates happening 301 * in atomic context and head updates in user context and the 302 * (unlikely) possibility of read() errors needing to reset all 303 * head/tail state. 304 * 305 * Note: Contention/performance aren't currently a significant 306 * concern here considering the relatively low frequency of 307 * hrtimer callbacks (5ms period) and that reads typically only 308 * happen in response to a hrtimer event and likely complete 309 * before the next callback. 310 * 311 * Note: This lock is not held *while* reading and copying data 312 * to userspace so the value of head observed in htrimer 313 * callbacks won't represent any partial consumption of data. 314 */ 315 spinlock_t ptr_lock; 316 317 /** 318 * @head: Although we can always read back the head pointer register, 319 * we prefer to avoid trusting the HW state, just to avoid any 320 * risk that some hardware condition could * somehow bump the 321 * head pointer unpredictably and cause us to forward the wrong 322 * OA buffer data to userspace. 323 */ 324 u32 head; 325 326 /** 327 * @tail: The last verified tail that can be read by userspace. 328 */ 329 u32 tail; 330 } oa_buffer; 331 332 /** 333 * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be 334 * reprogrammed. 335 */ 336 struct i915_vma *noa_wait; 337 338 /** 339 * @poll_oa_period: The period in nanoseconds at which the OA 340 * buffer should be checked for available data. 341 */ 342 u64 poll_oa_period; 343 344 /** 345 * @override_gucrc: GuC RC has been overridden for the perf stream, 346 * and we need to restore the default configuration on release. 347 */ 348 bool override_gucrc; 349 }; 350 351 /** 352 * struct i915_oa_ops - Gen specific implementation of an OA unit stream 353 */ 354 struct i915_oa_ops { 355 /** 356 * @is_valid_b_counter_reg: Validates register's address for 357 * programming boolean counters for a particular platform. 358 */ 359 bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr); 360 361 /** 362 * @is_valid_mux_reg: Validates register's address for programming mux 363 * for a particular platform. 364 */ 365 bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr); 366 367 /** 368 * @is_valid_flex_reg: Validates register's address for programming 369 * flex EU filtering for a particular platform. 370 */ 371 bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr); 372 373 /** 374 * @enable_metric_set: Selects and applies any MUX configuration to set 375 * up the Boolean and Custom (B/C) counters that are part of the 376 * counter reports being sampled. May apply system constraints such as 377 * disabling EU clock gating as required. 378 */ 379 int (*enable_metric_set)(struct i915_perf_stream *stream, 380 struct i915_active *active); 381 382 /** 383 * @disable_metric_set: Remove system constraints associated with using 384 * the OA unit. 385 */ 386 void (*disable_metric_set)(struct i915_perf_stream *stream); 387 388 /** 389 * @oa_enable: Enable periodic sampling 390 */ 391 void (*oa_enable)(struct i915_perf_stream *stream); 392 393 /** 394 * @oa_disable: Disable periodic sampling 395 */ 396 void (*oa_disable)(struct i915_perf_stream *stream); 397 398 /** 399 * @read: Copy data from the circular OA buffer into a given userspace 400 * buffer. 401 */ 402 int (*read)(struct i915_perf_stream *stream, 403 char __user *buf, 404 size_t count, 405 size_t *offset); 406 407 /** 408 * @oa_hw_tail_read: read the OA tail pointer register 409 * 410 * In particular this enables us to share all the fiddly code for 411 * handling the OA unit tail pointer race that affects multiple 412 * generations. 413 */ 414 u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream); 415 }; 416 417 struct i915_perf_group { 418 /* 419 * @exclusive_stream: The stream currently using the OA unit. This is 420 * sometimes accessed outside a syscall associated to its file 421 * descriptor. 422 */ 423 struct i915_perf_stream *exclusive_stream; 424 425 /* 426 * @num_engines: The number of engines using this OA unit. 427 */ 428 u32 num_engines; 429 430 /* 431 * @regs: OA buffer register group for programming the OA unit. 432 */ 433 struct i915_perf_regs regs; 434 435 /* 436 * @type: Type of OA unit - OAM, OAG etc. 437 */ 438 enum oa_type type; 439 }; 440 441 struct i915_perf_gt { 442 /* 443 * Lock associated with anything below within this structure. 444 */ 445 struct rwlock lock; 446 447 /** 448 * @sseu: sseu configuration selected to run while perf is active, 449 * applies to all contexts. 450 */ 451 struct intel_sseu sseu; 452 453 /** 454 * @num_perf_groups: number of perf groups per gt. 455 */ 456 u32 num_perf_groups; 457 458 /* 459 * @group: list of OA groups - one for each OA buffer. 460 */ 461 struct i915_perf_group *group; 462 }; 463 464 struct i915_perf { 465 struct drm_i915_private *i915; 466 467 struct kobject *metrics_kobj; 468 469 /* 470 * Lock associated with adding/modifying/removing OA configs 471 * in perf->metrics_idr. 472 */ 473 struct rwlock metrics_lock; 474 475 /* 476 * List of dynamic configurations (struct i915_oa_config), you 477 * need to hold perf->metrics_lock to access it. 478 */ 479 struct idr metrics_idr; 480 481 /** 482 * For rate limiting any notifications of spurious 483 * invalid OA reports 484 */ 485 struct ratelimit_state spurious_report_rs; 486 487 /** 488 * For rate limiting any notifications of tail pointer 489 * race. 490 */ 491 struct ratelimit_state tail_pointer_race; 492 493 u32 gen7_latched_oastatus1; 494 u32 ctx_oactxctrl_offset; 495 u32 ctx_flexeu0_offset; 496 497 /** 498 * The RPT_ID/reason field for Gen8+ includes a bit 499 * to determine if the CTX ID in the report is valid 500 * but the specific bit differs between Gen 8 and 9 501 */ 502 u32 gen8_valid_ctx_bit; 503 504 struct i915_oa_ops ops; 505 const struct i915_oa_format *oa_formats; 506 507 /** 508 * Use a format mask to store the supported formats 509 * for a platform. 510 */ 511 #define FORMAT_MASK_SIZE DIV_ROUND_UP(I915_OA_FORMAT_MAX - 1, BITS_PER_LONG) 512 unsigned long format_mask[FORMAT_MASK_SIZE]; 513 514 atomic64_t noa_programming_delay; 515 }; 516 517 #endif /* _I915_PERF_TYPES_H_ */ 518