1 /* 2 * Copyright © 2015-2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Robert Bragg <robert@sixbynine.org> 25 */ 26 27 28 /** 29 * DOC: i915 Perf Overview 30 * 31 * Gen graphics supports a large number of performance counters that can help 32 * driver and application developers understand and optimize their use of the 33 * GPU. 34 * 35 * This i915 perf interface enables userspace to configure and open a file 36 * descriptor representing a stream of GPU metrics which can then be read() as 37 * a stream of sample records. 38 * 39 * The interface is particularly suited to exposing buffered metrics that are 40 * captured by DMA from the GPU, unsynchronized with and unrelated to the CPU. 41 * 42 * Streams representing a single context are accessible to applications with a 43 * corresponding drm file descriptor, such that OpenGL can use the interface 44 * without special privileges. Access to system-wide metrics requires root 45 * privileges by default, unless changed via the dev.i915.perf_event_paranoid 46 * sysctl option. 47 * 48 */ 49 50 /** 51 * DOC: i915 Perf History and Comparison with Core Perf 52 * 53 * The interface was initially inspired by the core Perf infrastructure but 54 * some notable differences are: 55 * 56 * i915 perf file descriptors represent a "stream" instead of an "event"; where 57 * a perf event primarily corresponds to a single 64bit value, while a stream 58 * might sample sets of tightly-coupled counters, depending on the 59 * configuration. For example the Gen OA unit isn't designed to support 60 * orthogonal configurations of individual counters; it's configured for a set 61 * of related counters. Samples for an i915 perf stream capturing OA metrics 62 * will include a set of counter values packed in a compact HW specific format. 63 * The OA unit supports a number of different packing formats which can be 64 * selected by the user opening the stream. Perf has support for grouping 65 * events, but each event in the group is configured, validated and 66 * authenticated individually with separate system calls. 67 * 68 * i915 perf stream configurations are provided as an array of u64 (key,value) 69 * pairs, instead of a fixed struct with multiple miscellaneous config members, 70 * interleaved with event-type specific members. 71 * 72 * i915 perf doesn't support exposing metrics via an mmap'd circular buffer. 73 * The supported metrics are being written to memory by the GPU unsynchronized 74 * with the CPU, using HW specific packing formats for counter sets. Sometimes 75 * the constraints on HW configuration require reports to be filtered before it 76 * would be acceptable to expose them to unprivileged applications - to hide 77 * the metrics of other processes/contexts. For these use cases a read() based 78 * interface is a good fit, and provides an opportunity to filter data as it 79 * gets copied from the GPU mapped buffers to userspace buffers. 80 * 81 * 82 * Issues hit with first prototype based on Core Perf 83 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 84 * 85 * The first prototype of this driver was based on the core perf 86 * infrastructure, and while we did make that mostly work, with some changes to 87 * perf, we found we were breaking or working around too many assumptions baked 88 * into perf's currently cpu centric design. 89 * 90 * In the end we didn't see a clear benefit to making perf's implementation and 91 * interface more complex by changing design assumptions while we knew we still 92 * wouldn't be able to use any existing perf based userspace tools. 93 * 94 * Also considering the Gen specific nature of the Observability hardware and 95 * how userspace will sometimes need to combine i915 perf OA metrics with 96 * side-band OA data captured via MI_REPORT_PERF_COUNT commands; we're 97 * expecting the interface to be used by a platform specific userspace such as 98 * OpenGL or tools. This is to say; we aren't inherently missing out on having 99 * a standard vendor/architecture agnostic interface by not using perf. 100 * 101 * 102 * For posterity, in case we might re-visit trying to adapt core perf to be 103 * better suited to exposing i915 metrics these were the main pain points we 104 * hit: 105 * 106 * - The perf based OA PMU driver broke some significant design assumptions: 107 * 108 * Existing perf pmus are used for profiling work on a cpu and we were 109 * introducing the idea of _IS_DEVICE pmus with different security 110 * implications, the need to fake cpu-related data (such as user/kernel 111 * registers) to fit with perf's current design, and adding _DEVICE records 112 * as a way to forward device-specific status records. 113 * 114 * The OA unit writes reports of counters into a circular buffer, without 115 * involvement from the CPU, making our PMU driver the first of a kind. 116 * 117 * Given the way we were periodically forward data from the GPU-mapped, OA 118 * buffer to perf's buffer, those bursts of sample writes looked to perf like 119 * we were sampling too fast and so we had to subvert its throttling checks. 120 * 121 * Perf supports groups of counters and allows those to be read via 122 * transactions internally but transactions currently seem designed to be 123 * explicitly initiated from the cpu (say in response to a userspace read()) 124 * and while we could pull a report out of the OA buffer we can't 125 * trigger a report from the cpu on demand. 126 * 127 * Related to being report based; the OA counters are configured in HW as a 128 * set while perf generally expects counter configurations to be orthogonal. 129 * Although counters can be associated with a group leader as they are 130 * opened, there's no clear precedent for being able to provide group-wide 131 * configuration attributes (for example we want to let userspace choose the 132 * OA unit report format used to capture all counters in a set, or specify a 133 * GPU context to filter metrics on). We avoided using perf's grouping 134 * feature and forwarded OA reports to userspace via perf's 'raw' sample 135 * field. This suited our userspace well considering how coupled the counters 136 * are when dealing with normalizing. It would be inconvenient to split 137 * counters up into separate events, only to require userspace to recombine 138 * them. For Mesa it's also convenient to be forwarded raw, periodic reports 139 * for combining with the side-band raw reports it captures using 140 * MI_REPORT_PERF_COUNT commands. 141 * 142 * - As a side note on perf's grouping feature; there was also some concern 143 * that using PERF_FORMAT_GROUP as a way to pack together counter values 144 * would quite drastically inflate our sample sizes, which would likely 145 * lower the effective sampling resolutions we could use when the available 146 * memory bandwidth is limited. 147 * 148 * With the OA unit's report formats, counters are packed together as 32 149 * or 40bit values, with the largest report size being 256 bytes. 150 * 151 * PERF_FORMAT_GROUP values are 64bit, but there doesn't appear to be a 152 * documented ordering to the values, implying PERF_FORMAT_ID must also be 153 * used to add a 64bit ID before each value; giving 16 bytes per counter. 154 * 155 * Related to counter orthogonality; we can't time share the OA unit, while 156 * event scheduling is a central design idea within perf for allowing 157 * userspace to open + enable more events than can be configured in HW at any 158 * one time. The OA unit is not designed to allow re-configuration while in 159 * use. We can't reconfigure the OA unit without losing internal OA unit 160 * state which we can't access explicitly to save and restore. Reconfiguring 161 * the OA unit is also relatively slow, involving ~100 register writes. From 162 * userspace Mesa also depends on a stable OA configuration when emitting 163 * MI_REPORT_PERF_COUNT commands and importantly the OA unit can't be 164 * disabled while there are outstanding MI_RPC commands lest we hang the 165 * command streamer. 166 * 167 * The contents of sample records aren't extensible by device drivers (i.e. 168 * the sample_type bits). As an example; Sourab Gupta had been looking to 169 * attach GPU timestamps to our OA samples. We were shoehorning OA reports 170 * into sample records by using the 'raw' field, but it's tricky to pack more 171 * than one thing into this field because events/core.c currently only lets a 172 * pmu give a single raw data pointer plus len which will be copied into the 173 * ring buffer. To include more than the OA report we'd have to copy the 174 * report into an intermediate larger buffer. I'd been considering allowing a 175 * vector of data+len values to be specified for copying the raw data, but 176 * it felt like a kludge to being using the raw field for this purpose. 177 * 178 * - It felt like our perf based PMU was making some technical compromises 179 * just for the sake of using perf: 180 * 181 * perf_event_open() requires events to either relate to a pid or a specific 182 * cpu core, while our device pmu related to neither. Events opened with a 183 * pid will be automatically enabled/disabled according to the scheduling of 184 * that process - so not appropriate for us. When an event is related to a 185 * cpu id, perf ensures pmu methods will be invoked via an inter process 186 * interrupt on that core. To avoid invasive changes our userspace opened OA 187 * perf events for a specific cpu. This was workable but it meant the 188 * majority of the OA driver ran in atomic context, including all OA report 189 * forwarding, which wasn't really necessary in our case and seems to make 190 * our locking requirements somewhat complex as we handled the interaction 191 * with the rest of the i915 driver. 192 */ 193 194 #include <linux/anon_inodes.h> 195 #include <linux/sizes.h> 196 #include <linux/uuid.h> 197 198 #include "i915_drv.h" 199 #include "i915_oa_hsw.h" 200 #include "i915_oa_bdw.h" 201 #include "i915_oa_chv.h" 202 #include "i915_oa_sklgt2.h" 203 #include "i915_oa_sklgt3.h" 204 #include "i915_oa_sklgt4.h" 205 #include "i915_oa_bxt.h" 206 #include "i915_oa_kblgt2.h" 207 #include "i915_oa_kblgt3.h" 208 #include "i915_oa_glk.h" 209 #include "i915_oa_cflgt2.h" 210 211 /* HW requires this to be a power of two, between 128k and 16M, though driver 212 * is currently generally designed assuming the largest 16M size is used such 213 * that the overflow cases are unlikely in normal operation. 214 */ 215 #define OA_BUFFER_SIZE SZ_16M 216 217 #define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1)) 218 219 /* There's a HW race condition between OA unit tail pointer register updates and 220 * writes to memory whereby the tail pointer can sometimes get ahead of what's 221 * been written out to the OA buffer so far. 222 * 223 * Although this can be observed explicitly by checking for a zeroed report-id 224 * field in tail reports, it seems preferable to account for this earlier e.g. 225 * as part of the _oa_buffer_is_empty checks to minimize -EAGAIN polling cycles 226 * in this situation. 227 * 228 * To give time for the most recent reports to land before they may be copied to 229 * userspace, the driver operates as if the tail pointer effectively lags behind 230 * the HW tail pointer by 'tail_margin' bytes. The margin in bytes is calculated 231 * based on this constant in nanoseconds, the current OA sampling exponent 232 * and current report size. 233 * 234 * There is also a fallback check while reading to simply skip over reports with 235 * a zeroed report-id. 236 */ 237 #define OA_TAIL_MARGIN_NSEC 100000ULL 238 239 /* frequency for checking whether the OA unit has written new reports to the 240 * circular OA buffer... 241 */ 242 #define POLL_FREQUENCY 200 243 #define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY) 244 245 #if 0 246 /* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */ 247 static int zero; 248 static int one = 1; 249 static u32 i915_perf_stream_paranoid = true; 250 251 /* The maximum exponent the hardware accepts is 63 (essentially it selects one 252 * of the 64bit timestamp bits to trigger reports from) but there's currently 253 * no known use case for sampling as infrequently as once per 47 thousand years. 254 * 255 * Since the timestamps included in OA reports are only 32bits it seems 256 * reasonable to limit the OA exponent where it's still possible to account for 257 * overflow in OA report timestamps. 258 */ 259 #define OA_EXPONENT_MAX 31 260 261 #define INVALID_CTX_ID 0xffffffff 262 263 /* On Gen8+ automatically triggered OA reports include a 'reason' field... */ 264 #define OAREPORT_REASON_MASK 0x3f 265 #define OAREPORT_REASON_SHIFT 19 266 #define OAREPORT_REASON_TIMER (1<<0) 267 #define OAREPORT_REASON_CTX_SWITCH (1<<3) 268 #define OAREPORT_REASON_CLK_RATIO (1<<5) 269 270 271 /* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate 272 * 273 * The highest sampling frequency we can theoretically program the OA unit 274 * with is always half the timestamp frequency: E.g. 6.25Mhz for Haswell. 275 * 276 * Initialized just before we register the sysctl parameter. 277 */ 278 static int oa_sample_rate_hard_limit; 279 280 /* Theoretically we can program the OA unit to sample every 160ns but don't 281 * allow that by default unless root... 282 * 283 * The default threshold of 100000Hz is based on perf's similar 284 * kernel.perf_event_max_sample_rate sysctl parameter. 285 */ 286 static u32 i915_oa_max_sample_rate = 100000; 287 288 /* XXX: beware if future OA HW adds new report formats that the current 289 * code assumes all reports have a power-of-two size and ~(size - 1) can 290 * be used as a mask to align the OA tail pointer. 291 */ 292 static struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = { 293 [I915_OA_FORMAT_A13] = { 0, 64 }, 294 [I915_OA_FORMAT_A29] = { 1, 128 }, 295 [I915_OA_FORMAT_A13_B8_C8] = { 2, 128 }, 296 /* A29_B8_C8 Disallowed as 192 bytes doesn't factor into buffer size */ 297 [I915_OA_FORMAT_B4_C8] = { 4, 64 }, 298 [I915_OA_FORMAT_A45_B8_C8] = { 5, 256 }, 299 [I915_OA_FORMAT_B4_C8_A16] = { 6, 128 }, 300 [I915_OA_FORMAT_C4_B8] = { 7, 64 }, 301 }; 302 303 static struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = { 304 [I915_OA_FORMAT_A12] = { 0, 64 }, 305 [I915_OA_FORMAT_A12_B8_C8] = { 2, 128 }, 306 [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, 307 [I915_OA_FORMAT_C4_B8] = { 7, 64 }, 308 }; 309 #endif 310 311 #define SAMPLE_OA_REPORT (1<<0) 312 313 /** 314 * struct perf_open_properties - for validated properties given to open a stream 315 * @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags 316 * @single_context: Whether a single or all gpu contexts should be monitored 317 * @ctx_handle: A gem ctx handle for use with @single_context 318 * @metrics_set: An ID for an OA unit metric set advertised via sysfs 319 * @oa_format: An OA unit HW report format 320 * @oa_periodic: Whether to enable periodic OA unit sampling 321 * @oa_period_exponent: The OA unit sampling period is derived from this 322 * 323 * As read_properties_unlocked() enumerates and validates the properties given 324 * to open a stream of metrics the configuration is built up in the structure 325 * which starts out zero initialized. 326 */ 327 struct perf_open_properties { 328 u32 sample_flags; 329 330 u64 single_context:1; 331 u64 ctx_handle; 332 333 /* OA sampling state */ 334 int metrics_set; 335 int oa_format; 336 bool oa_periodic; 337 int oa_period_exponent; 338 }; 339 340 #if 0 341 /* NB: This is either called via fops or the poll check hrtimer (atomic ctx) 342 * 343 * It's safe to read OA config state here unlocked, assuming that this is only 344 * called while the stream is enabled, while the global OA configuration can't 345 * be modified. 346 * 347 * Note: we don't lock around the head/tail reads even though there's the slim 348 * possibility of read() fop errors forcing a re-init of the OA buffer 349 * pointers. A race here could result in a false positive !empty status which 350 * is acceptable. 351 */ 352 static bool gen7_oa_buffer_is_empty_fop_unlocked(struct drm_i915_private *dev_priv) 353 { 354 int report_size = dev_priv->perf.oa.oa_buffer.format_size; 355 u32 oastatus2 = I915_READ(GEN7_OASTATUS2); 356 u32 oastatus1 = I915_READ(GEN7_OASTATUS1); 357 u32 head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK; 358 u32 tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK; 359 360 return OA_TAKEN(tail, head) < 361 dev_priv->perf.oa.tail_margin + report_size; 362 } 363 364 /** 365 * append_oa_status - Appends a status record to a userspace read() buffer. 366 * @stream: An i915-perf stream opened for OA metrics 367 * @buf: destination buffer given by userspace 368 * @count: the number of bytes userspace wants to read 369 * @offset: (inout): the current position for writing into @buf 370 * @type: The kind of status to report to userspace 371 * 372 * Writes a status record (such as `DRM_I915_PERF_RECORD_OA_REPORT_LOST`) 373 * into the userspace read() buffer. 374 * 375 * The @buf @offset will only be updated on success. 376 * 377 * Returns: 0 on success, negative error code on failure. 378 */ 379 static int append_oa_status(struct i915_perf_stream *stream, 380 char __user *buf, 381 size_t count, 382 size_t *offset, 383 enum drm_i915_perf_record_type type) 384 { 385 struct drm_i915_perf_record_header header = { type, 0, sizeof(header) }; 386 387 if ((count - *offset) < header.size) 388 return -ENOSPC; 389 390 if (copy_to_user(buf + *offset, &header, sizeof(header))) 391 return -EFAULT; 392 393 (*offset) += header.size; 394 395 return 0; 396 } 397 398 /** 399 * append_oa_sample - Copies single OA report into userspace read() buffer. 400 * @stream: An i915-perf stream opened for OA metrics 401 * @buf: destination buffer given by userspace 402 * @count: the number of bytes userspace wants to read 403 * @offset: (inout): the current position for writing into @buf 404 * @report: A single OA report to (optionally) include as part of the sample 405 * 406 * The contents of a sample are configured through `DRM_I915_PERF_PROP_SAMPLE_*` 407 * properties when opening a stream, tracked as `stream->sample_flags`. This 408 * function copies the requested components of a single sample to the given 409 * read() @buf. 410 * 411 * The @buf @offset will only be updated on success. 412 * 413 * Returns: 0 on success, negative error code on failure. 414 */ 415 static int append_oa_sample(struct i915_perf_stream *stream, 416 char __user *buf, 417 size_t count, 418 size_t *offset, 419 const u8 *report) 420 { 421 struct drm_i915_private *dev_priv = stream->dev_priv; 422 int report_size = dev_priv->perf.oa.oa_buffer.format_size; 423 struct drm_i915_perf_record_header header; 424 u32 sample_flags = stream->sample_flags; 425 426 header.type = DRM_I915_PERF_RECORD_SAMPLE; 427 header.pad = 0; 428 header.size = stream->sample_size; 429 430 if ((count - *offset) < header.size) 431 return -ENOSPC; 432 433 buf += *offset; 434 if (copy_to_user(buf, &header, sizeof(header))) 435 return -EFAULT; 436 buf += sizeof(header); 437 438 if (sample_flags & SAMPLE_OA_REPORT) { 439 if (copy_to_user(buf, report, report_size)) 440 return -EFAULT; 441 } 442 443 (*offset) += header.size; 444 445 return 0; 446 } 447 448 /** 449 * Copies all buffered OA reports into userspace read() buffer. 450 * @stream: An i915-perf stream opened for OA metrics 451 * @buf: destination buffer given by userspace 452 * @count: the number of bytes userspace wants to read 453 * @offset: (inout): the current position for writing into @buf 454 * 455 * Notably any error condition resulting in a short read (-%ENOSPC or 456 * -%EFAULT) will be returned even though one or more records may 457 * have been successfully copied. In this case it's up to the caller 458 * to decide if the error should be squashed before returning to 459 * userspace. 460 * 461 * Note: reports are consumed from the head, and appended to the 462 * tail, so the tail chases the head?... If you think that's mad 463 * and back-to-front you're not alone, but this follows the 464 * Gen PRM naming convention. 465 * 466 * Returns: 0 on success, negative error code on failure. 467 */ 468 static int gen8_append_oa_reports(struct i915_perf_stream *stream, 469 char __user *buf, 470 size_t count, 471 size_t *offset) 472 { 473 struct drm_i915_private *dev_priv = stream->dev_priv; 474 int report_size = dev_priv->perf.oa.oa_buffer.format_size; 475 u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr; 476 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); 477 u32 mask = (OA_BUFFER_SIZE - 1); 478 size_t start_offset = *offset; 479 unsigned long flags; 480 unsigned int aged_tail_idx; 481 u32 head, tail; 482 u32 taken; 483 int ret = 0; 484 485 if (WARN_ON(!stream->enabled)) 486 return -EIO; 487 488 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); 489 490 head = dev_priv->perf.oa.oa_buffer.head; 491 aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx; 492 tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset; 493 494 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); 495 496 /* 497 * An invalid tail pointer here means we're still waiting for the poll 498 * hrtimer callback to give us a pointer 499 */ 500 if (tail == INVALID_TAIL_PTR) 501 return -EAGAIN; 502 503 /* 504 * NB: oa_buffer.head/tail include the gtt_offset which we don't want 505 * while indexing relative to oa_buf_base. 506 */ 507 head -= gtt_offset; 508 tail -= gtt_offset; 509 510 /* 511 * An out of bounds or misaligned head or tail pointer implies a driver 512 * bug since we validate + align the tail pointers we read from the 513 * hardware and we are in full control of the head pointer which should 514 * only be incremented by multiples of the report size (notably also 515 * all a power of two). 516 */ 517 if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size || 518 tail > OA_BUFFER_SIZE || tail % report_size, 519 "Inconsistent OA buffer pointers: head = %u, tail = %u\n", 520 head, tail)) 521 return -EIO; 522 523 524 for (/* none */; 525 (taken = OA_TAKEN(tail, head)); 526 head = (head + report_size) & mask) { 527 u8 *report = oa_buf_base + head; 528 u32 *report32 = (void *)report; 529 u32 ctx_id; 530 u32 reason; 531 532 /* 533 * All the report sizes factor neatly into the buffer 534 * size so we never expect to see a report split 535 * between the beginning and end of the buffer. 536 * 537 * Given the initial alignment check a misalignment 538 * here would imply a driver bug that would result 539 * in an overrun. 540 */ 541 if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) { 542 DRM_ERROR("Spurious OA head ptr: non-integral report offset\n"); 543 break; 544 } 545 546 /* 547 * The reason field includes flags identifying what 548 * triggered this specific report (mostly timer 549 * triggered or e.g. due to a context switch). 550 * 551 * This field is never expected to be zero so we can 552 * check that the report isn't invalid before copying 553 * it to userspace... 554 */ 555 reason = ((report32[0] >> OAREPORT_REASON_SHIFT) & 556 OAREPORT_REASON_MASK); 557 if (reason == 0) { 558 if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs)) 559 DRM_NOTE("Skipping spurious, invalid OA report\n"); 560 continue; 561 } 562 563 /* 564 * XXX: Just keep the lower 21 bits for now since I'm not 565 * entirely sure if the HW touches any of the higher bits in 566 * this field 567 */ 568 ctx_id = report32[2] & 0x1fffff; 569 570 /* 571 * Squash whatever is in the CTX_ID field if it's marked as 572 * invalid to be sure we avoid false-positive, single-context 573 * filtering below... 574 * 575 * Note: that we don't clear the valid_ctx_bit so userspace can 576 * understand that the ID has been squashed by the kernel. 577 */ 578 if (!(report32[0] & dev_priv->perf.oa.gen8_valid_ctx_bit)) 579 ctx_id = report32[2] = INVALID_CTX_ID; 580 581 /* 582 * NB: For Gen 8 the OA unit no longer supports clock gating 583 * off for a specific context and the kernel can't securely 584 * stop the counters from updating as system-wide / global 585 * values. 586 * 587 * Automatic reports now include a context ID so reports can be 588 * filtered on the cpu but it's not worth trying to 589 * automatically subtract/hide counter progress for other 590 * contexts while filtering since we can't stop userspace 591 * issuing MI_REPORT_PERF_COUNT commands which would still 592 * provide a side-band view of the real values. 593 * 594 * To allow userspace (such as Mesa/GL_INTEL_performance_query) 595 * to normalize counters for a single filtered context then it 596 * needs be forwarded bookend context-switch reports so that it 597 * can track switches in between MI_REPORT_PERF_COUNT commands 598 * and can itself subtract/ignore the progress of counters 599 * associated with other contexts. Note that the hardware 600 * automatically triggers reports when switching to a new 601 * context which are tagged with the ID of the newly active 602 * context. To avoid the complexity (and likely fragility) of 603 * reading ahead while parsing reports to try and minimize 604 * forwarding redundant context switch reports (i.e. between 605 * other, unrelated contexts) we simply elect to forward them 606 * all. 607 * 608 * We don't rely solely on the reason field to identify context 609 * switches since it's not-uncommon for periodic samples to 610 * identify a switch before any 'context switch' report. 611 */ 612 if (!dev_priv->perf.oa.exclusive_stream->ctx || 613 dev_priv->perf.oa.specific_ctx_id == ctx_id || 614 (dev_priv->perf.oa.oa_buffer.last_ctx_id == 615 dev_priv->perf.oa.specific_ctx_id) || 616 reason & OAREPORT_REASON_CTX_SWITCH) { 617 618 /* 619 * While filtering for a single context we avoid 620 * leaking the IDs of other contexts. 621 */ 622 if (dev_priv->perf.oa.exclusive_stream->ctx && 623 dev_priv->perf.oa.specific_ctx_id != ctx_id) { 624 report32[2] = INVALID_CTX_ID; 625 } 626 627 ret = append_oa_sample(stream, buf, count, offset, 628 report); 629 if (ret) 630 break; 631 632 dev_priv->perf.oa.oa_buffer.last_ctx_id = ctx_id; 633 } 634 635 /* 636 * The above reason field sanity check is based on 637 * the assumption that the OA buffer is initially 638 * zeroed and we reset the field after copying so the 639 * check is still meaningful once old reports start 640 * being overwritten. 641 */ 642 report32[0] = 0; 643 } 644 645 if (start_offset != *offset) { 646 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); 647 648 /* 649 * We removed the gtt_offset for the copy loop above, indexing 650 * relative to oa_buf_base so put back here... 651 */ 652 head += gtt_offset; 653 654 I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK); 655 dev_priv->perf.oa.oa_buffer.head = head; 656 657 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); 658 } 659 660 return ret; 661 } 662 663 /** 664 * gen8_oa_read - copy status records then buffered OA reports 665 * @stream: An i915-perf stream opened for OA metrics 666 * @buf: destination buffer given by userspace 667 * @count: the number of bytes userspace wants to read 668 * @offset: (inout): the current position for writing into @buf 669 * 670 * Checks OA unit status registers and if necessary appends corresponding 671 * status records for userspace (such as for a buffer full condition) and then 672 * initiate appending any buffered OA reports. 673 * 674 * Updates @offset according to the number of bytes successfully copied into 675 * the userspace buffer. 676 * 677 * NB: some data may be successfully copied to the userspace buffer 678 * even if an error is returned, and this is reflected in the 679 * updated @offset. 680 * 681 * Returns: zero on success or a negative error code 682 */ 683 static int gen8_oa_read(struct i915_perf_stream *stream, 684 char __user *buf, 685 size_t count, 686 size_t *offset) 687 { 688 struct drm_i915_private *dev_priv = stream->dev_priv; 689 u32 oastatus; 690 int ret; 691 692 if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr)) 693 return -EIO; 694 695 oastatus = I915_READ(GEN8_OASTATUS); 696 697 /* 698 * We treat OABUFFER_OVERFLOW as a significant error: 699 * 700 * Although theoretically we could handle this more gracefully 701 * sometimes, some Gens don't correctly suppress certain 702 * automatically triggered reports in this condition and so we 703 * have to assume that old reports are now being trampled 704 * over. 705 * 706 * Considering how we don't currently give userspace control 707 * over the OA buffer size and always configure a large 16MB 708 * buffer, then a buffer overflow does anyway likely indicate 709 * that something has gone quite badly wrong. 710 */ 711 if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) { 712 ret = append_oa_status(stream, buf, count, offset, 713 DRM_I915_PERF_RECORD_OA_BUFFER_LOST); 714 if (ret) 715 return ret; 716 717 DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", 718 dev_priv->perf.oa.period_exponent); 719 720 dev_priv->perf.oa.ops.oa_disable(dev_priv); 721 dev_priv->perf.oa.ops.oa_enable(dev_priv); 722 723 /* 724 * Note: .oa_enable() is expected to re-init the oabuffer and 725 * reset GEN8_OASTATUS for us 726 */ 727 oastatus = I915_READ(GEN8_OASTATUS); 728 } 729 730 if (oastatus & GEN8_OASTATUS_REPORT_LOST) { 731 ret = append_oa_status(stream, buf, count, offset, 732 DRM_I915_PERF_RECORD_OA_REPORT_LOST); 733 if (ret) 734 return ret; 735 I915_WRITE(GEN8_OASTATUS, 736 oastatus & ~GEN8_OASTATUS_REPORT_LOST); 737 } 738 739 return gen8_append_oa_reports(stream, buf, count, offset); 740 } 741 742 /** 743 * Copies all buffered OA reports into userspace read() buffer. 744 * @stream: An i915-perf stream opened for OA metrics 745 * @buf: destination buffer given by userspace 746 * @count: the number of bytes userspace wants to read 747 * @offset: (inout): the current position for writing into @buf 748 * @head_ptr: (inout): the current oa buffer cpu read position 749 * @tail: the current oa buffer gpu write position 750 * 751 * Notably any error condition resulting in a short read (-%ENOSPC or 752 * -%EFAULT) will be returned even though one or more records may 753 * have been successfully copied. In this case it's up to the caller 754 * to decide if the error should be squashed before returning to 755 * userspace. 756 * 757 * Note: reports are consumed from the head, and appended to the 758 * tail, so the head chases the tail?... If you think that's mad 759 * and back-to-front you're not alone, but this follows the 760 * Gen PRM naming convention. 761 * 762 * Returns: 0 on success, negative error code on failure. 763 */ 764 static int gen7_append_oa_reports(struct i915_perf_stream *stream, 765 char __user *buf, 766 size_t count, 767 size_t *offset, 768 u32 *head_ptr, 769 u32 tail) 770 { 771 struct drm_i915_private *dev_priv = stream->dev_priv; 772 int report_size = dev_priv->perf.oa.oa_buffer.format_size; 773 u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr; 774 int tail_margin = dev_priv->perf.oa.tail_margin; 775 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); 776 u32 mask = (OA_BUFFER_SIZE - 1); 777 u32 head; 778 u32 taken; 779 int ret = 0; 780 781 if (WARN_ON(!stream->enabled)) 782 return -EIO; 783 784 head = *head_ptr - gtt_offset; 785 tail -= gtt_offset; 786 787 /* The OA unit is expected to wrap the tail pointer according to the OA 788 * buffer size and since we should never write a misaligned head 789 * pointer we don't expect to read one back either... 790 */ 791 if (tail > OA_BUFFER_SIZE || head > OA_BUFFER_SIZE || 792 head % report_size) { 793 DRM_ERROR("Inconsistent OA buffer pointer (head = %u, tail = %u): force restart\n", 794 head, tail); 795 dev_priv->perf.oa.ops.oa_disable(dev_priv); 796 dev_priv->perf.oa.ops.oa_enable(dev_priv); 797 *head_ptr = I915_READ(GEN7_OASTATUS2) & 798 GEN7_OASTATUS2_HEAD_MASK; 799 return -EIO; 800 } 801 802 803 /* The tail pointer increases in 64 byte increments, not in report_size 804 * steps... 805 */ 806 tail &= ~(report_size - 1); 807 808 /* Move the tail pointer back by the current tail_margin to account for 809 * the possibility that the latest reports may not have really landed 810 * in memory yet... 811 */ 812 813 if (OA_TAKEN(tail, head) < report_size + tail_margin) 814 return -EAGAIN; 815 816 tail -= tail_margin; 817 tail &= mask; 818 819 for (/* none */; 820 (taken = OA_TAKEN(tail, head)); 821 head = (head + report_size) & mask) { 822 u8 *report = oa_buf_base + head; 823 u32 *report32 = (void *)report; 824 825 /* All the report sizes factor neatly into the buffer 826 * size so we never expect to see a report split 827 * between the beginning and end of the buffer. 828 * 829 * Given the initial alignment check a misalignment 830 * here would imply a driver bug that would result 831 * in an overrun. 832 */ 833 if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) { 834 DRM_ERROR("Spurious OA head ptr: non-integral report offset\n"); 835 break; 836 } 837 838 /* The report-ID field for periodic samples includes 839 * some undocumented flags related to what triggered 840 * the report and is never expected to be zero so we 841 * can check that the report isn't invalid before 842 * copying it to userspace... 843 */ 844 if (report32[0] == 0) { 845 DRM_NOTE("Skipping spurious, invalid OA report\n"); 846 continue; 847 } 848 849 ret = append_oa_sample(stream, buf, count, offset, report); 850 if (ret) 851 break; 852 853 /* The above report-id field sanity check is based on 854 * the assumption that the OA buffer is initially 855 * zeroed and we reset the field after copying so the 856 * check is still meaningful once old reports start 857 * being overwritten. 858 */ 859 report32[0] = 0; 860 } 861 862 *head_ptr = gtt_offset + head; 863 864 return ret; 865 } 866 867 /** 868 * gen7_oa_read - copy status records then buffered OA reports 869 * @stream: An i915-perf stream opened for OA metrics 870 * @buf: destination buffer given by userspace 871 * @count: the number of bytes userspace wants to read 872 * @offset: (inout): the current position for writing into @buf 873 * 874 * Checks Gen 7 specific OA unit status registers and if necessary appends 875 * corresponding status records for userspace (such as for a buffer full 876 * condition) and then initiate appending any buffered OA reports. 877 * 878 * Updates @offset according to the number of bytes successfully copied into 879 * the userspace buffer. 880 * 881 * Returns: zero on success or a negative error code 882 */ 883 static int gen7_oa_read(struct i915_perf_stream *stream, 884 char __user *buf, 885 size_t count, 886 size_t *offset) 887 { 888 struct drm_i915_private *dev_priv = stream->dev_priv; 889 int report_size = dev_priv->perf.oa.oa_buffer.format_size; 890 u32 oastatus2; 891 u32 oastatus1; 892 u32 head; 893 u32 tail; 894 int ret; 895 896 if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr)) 897 return -EIO; 898 899 oastatus2 = I915_READ(GEN7_OASTATUS2); 900 oastatus1 = I915_READ(GEN7_OASTATUS1); 901 902 head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK; 903 tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK; 904 905 /* XXX: On Haswell we don't have a safe way to clear oastatus1 906 * bits while the OA unit is enabled (while the tail pointer 907 * may be updated asynchronously) so we ignore status bits 908 * that have already been reported to userspace. 909 */ 910 oastatus1 &= ~dev_priv->perf.oa.gen7_latched_oastatus1; 911 912 /* We treat OABUFFER_OVERFLOW as a significant error: 913 * 914 * - The status can be interpreted to mean that the buffer is 915 * currently full (with a higher precedence than OA_TAKEN() 916 * which will start to report a near-empty buffer after an 917 * overflow) but it's awkward that we can't clear the status 918 * on Haswell, so without a reset we won't be able to catch 919 * the state again. 920 * 921 * - Since it also implies the HW has started overwriting old 922 * reports it may also affect our sanity checks for invalid 923 * reports when copying to userspace that assume new reports 924 * are being written to cleared memory. 925 * 926 * - In the future we may want to introduce a flight recorder 927 * mode where the driver will automatically maintain a safe 928 * guard band between head/tail, avoiding this overflow 929 * condition, but we avoid the added driver complexity for 930 * now. 931 */ 932 if (unlikely(oastatus1 & GEN7_OASTATUS1_OABUFFER_OVERFLOW)) { 933 ret = append_oa_status(stream, buf, count, offset, 934 DRM_I915_PERF_RECORD_OA_BUFFER_LOST); 935 if (ret) 936 return ret; 937 938 DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", 939 dev_priv->perf.oa.period_exponent); 940 941 dev_priv->perf.oa.ops.oa_disable(dev_priv); 942 dev_priv->perf.oa.ops.oa_enable(dev_priv); 943 944 oastatus2 = I915_READ(GEN7_OASTATUS2); 945 oastatus1 = I915_READ(GEN7_OASTATUS1); 946 947 head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK; 948 tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK; 949 } 950 951 if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) { 952 ret = append_oa_status(stream, buf, count, offset, 953 DRM_I915_PERF_RECORD_OA_REPORT_LOST); 954 if (ret) 955 return ret; 956 dev_priv->perf.oa.gen7_latched_oastatus1 |= 957 GEN7_OASTATUS1_REPORT_LOST; 958 } 959 960 ret = gen7_append_oa_reports(stream, buf, count, offset, 961 &head, tail); 962 963 /* All the report sizes are a power of two and the 964 * head should always be incremented by some multiple 965 * of the report size. 966 * 967 * A warning here, but notably if we later read back a 968 * misaligned pointer we will treat that as a bug since 969 * it could lead to a buffer overrun. 970 */ 971 WARN_ONCE(head & (report_size - 1), 972 "i915: Writing misaligned OA head pointer"); 973 974 /* Note: we update the head pointer here even if an error 975 * was returned since the error may represent a short read 976 * where some some reports were successfully copied. 977 */ 978 I915_WRITE(GEN7_OASTATUS2, 979 ((head & GEN7_OASTATUS2_HEAD_MASK) | 980 OA_MEM_SELECT_GGTT)); 981 982 return ret; 983 } 984 985 /** 986 * i915_oa_wait_unlocked - handles blocking IO until OA data available 987 * @stream: An i915-perf stream opened for OA metrics 988 * 989 * Called when userspace tries to read() from a blocking stream FD opened 990 * for OA metrics. It waits until the hrtimer callback finds a non-empty 991 * OA buffer and wakes us. 992 * 993 * Note: it's acceptable to have this return with some false positives 994 * since any subsequent read handling will return -EAGAIN if there isn't 995 * really data ready for userspace yet. 996 * 997 * Returns: zero on success or a negative error code 998 */ 999 static int i915_oa_wait_unlocked(struct i915_perf_stream *stream) 1000 { 1001 struct drm_i915_private *dev_priv = stream->dev_priv; 1002 1003 /* We would wait indefinitely if periodic sampling is not enabled */ 1004 if (!dev_priv->perf.oa.periodic) 1005 return -EIO; 1006 1007 /* Note: the oa_buffer_is_empty() condition is ok to run unlocked as it 1008 * just performs mmio reads of the OA buffer head + tail pointers and 1009 * it's assumed we're handling some operation that implies the stream 1010 * can't be destroyed until completion (such as a read()) that ensures 1011 * the device + OA buffer can't disappear 1012 */ 1013 return wait_event_interruptible(dev_priv->perf.oa.poll_wq, 1014 !dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv)); 1015 } 1016 1017 /** 1018 * i915_oa_poll_wait - call poll_wait() for an OA stream poll() 1019 * @stream: An i915-perf stream opened for OA metrics 1020 * @file: An i915 perf stream file 1021 * @wait: poll() state table 1022 * 1023 * For handling userspace polling on an i915 perf stream opened for OA metrics, 1024 * this starts a poll_wait with the wait queue that our hrtimer callback wakes 1025 * when it sees data ready to read in the circular OA buffer. 1026 */ 1027 static void i915_oa_poll_wait(struct i915_perf_stream *stream, 1028 struct file *file, 1029 poll_table *wait) 1030 { 1031 struct drm_i915_private *dev_priv = stream->dev_priv; 1032 1033 poll_wait(file, &dev_priv->perf.oa.poll_wq, wait); 1034 } 1035 1036 /** 1037 * i915_oa_read - just calls through to &i915_oa_ops->read 1038 * @stream: An i915-perf stream opened for OA metrics 1039 * @buf: destination buffer given by userspace 1040 * @count: the number of bytes userspace wants to read 1041 * @offset: (inout): the current position for writing into @buf 1042 * 1043 * Updates @offset according to the number of bytes successfully copied into 1044 * the userspace buffer. 1045 * 1046 * Returns: zero on success or a negative error code 1047 */ 1048 static int i915_oa_read(struct i915_perf_stream *stream, 1049 char __user *buf, 1050 size_t count, 1051 size_t *offset) 1052 { 1053 struct drm_i915_private *dev_priv = stream->dev_priv; 1054 1055 return dev_priv->perf.oa.ops.read(stream, buf, count, offset); 1056 } 1057 1058 /** 1059 * oa_get_render_ctx_id - determine and hold ctx hw id 1060 * @stream: An i915-perf stream opened for OA metrics 1061 * 1062 * Determine the render context hw id, and ensure it remains fixed for the 1063 * lifetime of the stream. This ensures that we don't have to worry about 1064 * updating the context ID in OACONTROL on the fly. 1065 * 1066 * Returns: zero on success or a negative error code 1067 */ 1068 static int oa_get_render_ctx_id(struct i915_perf_stream *stream) 1069 { 1070 struct drm_i915_private *dev_priv = stream->dev_priv; 1071 1072 if (i915_modparams.enable_execlists) 1073 dev_priv->perf.oa.specific_ctx_id = stream->ctx->hw_id; 1074 else { 1075 struct intel_engine_cs *engine = dev_priv->engine[RCS]; 1076 struct intel_ring *ring; 1077 int ret; 1078 1079 ret = i915_mutex_lock_interruptible(&dev_priv->drm); 1080 if (ret) 1081 return ret; 1082 1083 /* 1084 * As the ID is the gtt offset of the context's vma we 1085 * pin the vma to ensure the ID remains fixed. 1086 * 1087 * NB: implied RCS engine... 1088 */ 1089 ring = engine->context_pin(engine, stream->ctx); 1090 mutex_unlock(&dev_priv->drm.struct_mutex); 1091 if (IS_ERR(ring)) 1092 return PTR_ERR(ring); 1093 1094 1095 /* 1096 * Explicitly track the ID (instead of calling 1097 * i915_ggtt_offset() on the fly) considering the difference 1098 * with gen8+ and execlists 1099 */ 1100 dev_priv->perf.oa.specific_ctx_id = 1101 i915_ggtt_offset(stream->ctx->engine[engine->id].state); 1102 } 1103 1104 return 0; 1105 } 1106 1107 /** 1108 * oa_put_render_ctx_id - counterpart to oa_get_render_ctx_id releases hold 1109 * @stream: An i915-perf stream opened for OA metrics 1110 * 1111 * In case anything needed doing to ensure the context HW ID would remain valid 1112 * for the lifetime of the stream, then that can be undone here. 1113 */ 1114 static void oa_put_render_ctx_id(struct i915_perf_stream *stream) 1115 { 1116 struct drm_i915_private *dev_priv = stream->dev_priv; 1117 1118 if (i915_modparams.enable_execlists) { 1119 dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; 1120 } else { 1121 struct intel_engine_cs *engine = dev_priv->engine[RCS]; 1122 1123 mutex_lock(&dev_priv->drm.struct_mutex); 1124 1125 dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; 1126 engine->context_unpin(engine, stream->ctx); 1127 1128 mutex_unlock(&dev_priv->drm.struct_mutex); 1129 } 1130 } 1131 1132 static void 1133 free_oa_buffer(struct drm_i915_private *i915) 1134 { 1135 mutex_lock(&i915->drm.struct_mutex); 1136 1137 i915_gem_object_unpin_map(i915->perf.oa.oa_buffer.vma->obj); 1138 i915_vma_unpin(i915->perf.oa.oa_buffer.vma); 1139 i915_gem_object_put(i915->perf.oa.oa_buffer.vma->obj); 1140 1141 i915->perf.oa.oa_buffer.vma = NULL; 1142 i915->perf.oa.oa_buffer.vaddr = NULL; 1143 1144 mutex_unlock(&i915->drm.struct_mutex); 1145 } 1146 1147 static void i915_oa_stream_destroy(struct i915_perf_stream *stream) 1148 { 1149 struct drm_i915_private *dev_priv = stream->dev_priv; 1150 1151 BUG_ON(stream != dev_priv->perf.oa.exclusive_stream); 1152 1153 /* 1154 * Unset exclusive_stream first, it will be checked while disabling 1155 * the metric set on gen8+. 1156 */ 1157 mutex_lock(&dev_priv->drm.struct_mutex); 1158 dev_priv->perf.oa.exclusive_stream = NULL; 1159 dev_priv->perf.oa.ops.disable_metric_set(dev_priv); 1160 mutex_unlock(&dev_priv->drm.struct_mutex); 1161 1162 free_oa_buffer(dev_priv); 1163 1164 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 1165 intel_runtime_pm_put(dev_priv); 1166 1167 if (stream->ctx) 1168 oa_put_render_ctx_id(stream); 1169 1170 dev_priv->perf.oa.exclusive_stream = NULL; 1171 } 1172 1173 static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) 1174 { 1175 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); 1176 1177 /* Pre-DevBDW: OABUFFER must be set with counters off, 1178 * before OASTATUS1, but after OASTATUS2 1179 */ 1180 I915_WRITE(GEN7_OASTATUS2, gtt_offset | OA_MEM_SELECT_GGTT); /* head */ 1181 I915_WRITE(GEN7_OABUFFER, gtt_offset); 1182 I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */ 1183 1184 /* On Haswell we have to track which OASTATUS1 flags we've 1185 * already seen since they can't be cleared while periodic 1186 * sampling is enabled. 1187 */ 1188 dev_priv->perf.oa.gen7_latched_oastatus1 = 0; 1189 1190 /* NB: although the OA buffer will initially be allocated 1191 * zeroed via shmfs (and so this memset is redundant when 1192 * first allocating), we may re-init the OA buffer, either 1193 * when re-enabling a stream or in error/reset paths. 1194 * 1195 * The reason we clear the buffer for each re-init is for the 1196 * sanity check in gen7_append_oa_reports() that looks at the 1197 * report-id field to make sure it's non-zero which relies on 1198 * the assumption that new reports are being written to zeroed 1199 * memory... 1200 */ 1201 memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE); 1202 1203 /* Maybe make ->pollin per-stream state if we support multiple 1204 * concurrent streams in the future. 1205 */ 1206 dev_priv->perf.oa.pollin = false; 1207 } 1208 1209 static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv) 1210 { 1211 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); 1212 unsigned long flags; 1213 1214 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); 1215 1216 I915_WRITE(GEN8_OASTATUS, 0); 1217 I915_WRITE(GEN8_OAHEADPTR, gtt_offset); 1218 dev_priv->perf.oa.oa_buffer.head = gtt_offset; 1219 1220 I915_WRITE(GEN8_OABUFFER_UDW, 0); 1221 1222 /* 1223 * PRM says: 1224 * 1225 * "This MMIO must be set before the OATAILPTR 1226 * register and after the OAHEADPTR register. This is 1227 * to enable proper functionality of the overflow 1228 * bit." 1229 */ 1230 I915_WRITE(GEN8_OABUFFER, gtt_offset | 1231 OABUFFER_SIZE_16M | OA_MEM_SELECT_GGTT); 1232 I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); 1233 1234 /* Mark that we need updated tail pointers to read from... */ 1235 dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR; 1236 dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR; 1237 1238 /* 1239 * Reset state used to recognise context switches, affecting which 1240 * reports we will forward to userspace while filtering for a single 1241 * context. 1242 */ 1243 dev_priv->perf.oa.oa_buffer.last_ctx_id = INVALID_CTX_ID; 1244 1245 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); 1246 1247 /* 1248 * NB: although the OA buffer will initially be allocated 1249 * zeroed via shmfs (and so this memset is redundant when 1250 * first allocating), we may re-init the OA buffer, either 1251 * when re-enabling a stream or in error/reset paths. 1252 * 1253 * The reason we clear the buffer for each re-init is for the 1254 * sanity check in gen8_append_oa_reports() that looks at the 1255 * reason field to make sure it's non-zero which relies on 1256 * the assumption that new reports are being written to zeroed 1257 * memory... 1258 */ 1259 memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE); 1260 1261 /* 1262 * Maybe make ->pollin per-stream state if we support multiple 1263 * concurrent streams in the future. 1264 */ 1265 dev_priv->perf.oa.pollin = false; 1266 } 1267 1268 static int alloc_oa_buffer(struct drm_i915_private *dev_priv) 1269 { 1270 struct drm_i915_gem_object *bo; 1271 struct i915_vma *vma; 1272 int ret; 1273 1274 if (WARN_ON(dev_priv->perf.oa.oa_buffer.vma)) 1275 return -ENODEV; 1276 1277 ret = i915_mutex_lock_interruptible(&dev_priv->drm); 1278 if (ret) 1279 return ret; 1280 1281 BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE); 1282 BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M); 1283 1284 bo = i915_gem_object_create(dev_priv, OA_BUFFER_SIZE); 1285 if (IS_ERR(bo)) { 1286 DRM_ERROR("Failed to allocate OA buffer\n"); 1287 ret = PTR_ERR(bo); 1288 goto unlock; 1289 } 1290 1291 ret = i915_gem_object_set_cache_level(bo, I915_CACHE_LLC); 1292 if (ret) 1293 goto err_unref; 1294 1295 /* PreHSW required 512K alignment, HSW requires 16M */ 1296 vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0); 1297 if (IS_ERR(vma)) { 1298 ret = PTR_ERR(vma); 1299 goto err_unref; 1300 } 1301 dev_priv->perf.oa.oa_buffer.vma = vma; 1302 1303 dev_priv->perf.oa.oa_buffer.vaddr = 1304 i915_gem_object_pin_map(bo, I915_MAP_WB); 1305 if (IS_ERR(dev_priv->perf.oa.oa_buffer.vaddr)) { 1306 ret = PTR_ERR(dev_priv->perf.oa.oa_buffer.vaddr); 1307 goto err_unpin; 1308 } 1309 1310 dev_priv->perf.oa.ops.init_oa_buffer(dev_priv); 1311 1312 DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n", 1313 i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma), 1314 dev_priv->perf.oa.oa_buffer.vaddr); 1315 1316 goto unlock; 1317 1318 err_unpin: 1319 __i915_vma_unpin(vma); 1320 1321 err_unref: 1322 i915_gem_object_put(bo); 1323 1324 dev_priv->perf.oa.oa_buffer.vaddr = NULL; 1325 dev_priv->perf.oa.oa_buffer.vma = NULL; 1326 1327 unlock: 1328 mutex_unlock(&dev_priv->drm.struct_mutex); 1329 return ret; 1330 } 1331 1332 static void config_oa_regs(struct drm_i915_private *dev_priv, 1333 const struct i915_oa_reg *regs, 1334 u32 n_regs) 1335 { 1336 u32 i; 1337 1338 for (i = 0; i < n_regs; i++) { 1339 const struct i915_oa_reg *reg = regs + i; 1340 1341 I915_WRITE(reg->addr, reg->value); 1342 } 1343 } 1344 1345 static int hsw_enable_metric_set(struct drm_i915_private *dev_priv, 1346 const struct i915_oa_config *oa_config) 1347 { 1348 /* PRM: 1349 * 1350 * OA unit is using “crclk” for its functionality. When trunk 1351 * level clock gating takes place, OA clock would be gated, 1352 * unable to count the events from non-render clock domain. 1353 * Render clock gating must be disabled when OA is enabled to 1354 * count the events from non-render domain. Unit level clock 1355 * gating for RCS should also be disabled. 1356 */ 1357 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & 1358 ~GEN7_DOP_CLOCK_GATE_ENABLE)); 1359 I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) | 1360 GEN6_CSUNIT_CLOCK_GATE_DISABLE)); 1361 1362 config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); 1363 1364 /* It apparently takes a fairly long time for a new MUX 1365 * configuration to be be applied after these register writes. 1366 * This delay duration was derived empirically based on the 1367 * render_basic config but hopefully it covers the maximum 1368 * configuration latency. 1369 * 1370 * As a fallback, the checks in _append_oa_reports() to skip 1371 * invalid OA reports do also seem to work to discard reports 1372 * generated before this config has completed - albeit not 1373 * silently. 1374 * 1375 * Unfortunately this is essentially a magic number, since we 1376 * don't currently know of a reliable mechanism for predicting 1377 * how long the MUX config will take to apply and besides 1378 * seeing invalid reports we don't know of a reliable way to 1379 * explicitly check that the MUX config has landed. 1380 * 1381 * It's even possible we've miss characterized the underlying 1382 * problem - it just seems like the simplest explanation why 1383 * a delay at this location would mitigate any invalid reports. 1384 */ 1385 usleep_range(15000, 20000); 1386 1387 config_oa_regs(dev_priv, oa_config->b_counter_regs, 1388 oa_config->b_counter_regs_len); 1389 1390 return 0; 1391 } 1392 1393 static void hsw_disable_metric_set(struct drm_i915_private *dev_priv) 1394 { 1395 I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) & 1396 ~GEN6_CSUNIT_CLOCK_GATE_DISABLE)); 1397 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) | 1398 GEN7_DOP_CLOCK_GATE_ENABLE)); 1399 1400 I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & 1401 ~GT_NOA_ENABLE)); 1402 } 1403 1404 /* 1405 * NB: It must always remain pointer safe to run this even if the OA unit 1406 * has been disabled. 1407 * 1408 * It's fine to put out-of-date values into these per-context registers 1409 * in the case that the OA unit has been disabled. 1410 */ 1411 static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, 1412 u32 *reg_state, 1413 const struct i915_oa_config *oa_config) 1414 { 1415 struct drm_i915_private *dev_priv = ctx->i915; 1416 u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset; 1417 u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset; 1418 /* The MMIO offsets for Flex EU registers aren't contiguous */ 1419 u32 flex_mmio[] = { 1420 i915_mmio_reg_offset(EU_PERF_CNTL0), 1421 i915_mmio_reg_offset(EU_PERF_CNTL1), 1422 i915_mmio_reg_offset(EU_PERF_CNTL2), 1423 i915_mmio_reg_offset(EU_PERF_CNTL3), 1424 i915_mmio_reg_offset(EU_PERF_CNTL4), 1425 i915_mmio_reg_offset(EU_PERF_CNTL5), 1426 i915_mmio_reg_offset(EU_PERF_CNTL6), 1427 }; 1428 int i; 1429 1430 reg_state[ctx_oactxctrl] = i915_mmio_reg_offset(GEN8_OACTXCONTROL); 1431 reg_state[ctx_oactxctrl+1] = (dev_priv->perf.oa.period_exponent << 1432 GEN8_OA_TIMER_PERIOD_SHIFT) | 1433 (dev_priv->perf.oa.periodic ? 1434 GEN8_OA_TIMER_ENABLE : 0) | 1435 GEN8_OA_COUNTER_RESUME; 1436 1437 for (i = 0; i < ARRAY_SIZE(flex_mmio); i++) { 1438 u32 state_offset = ctx_flexeu0 + i * 2; 1439 u32 mmio = flex_mmio[i]; 1440 1441 /* 1442 * This arbitrary default will select the 'EU FPU0 Pipeline 1443 * Active' event. In the future it's anticipated that there 1444 * will be an explicit 'No Event' we can select, but not yet... 1445 */ 1446 u32 value = 0; 1447 1448 if (oa_config) { 1449 u32 j; 1450 1451 for (j = 0; j < oa_config->flex_regs_len; j++) { 1452 if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) { 1453 value = oa_config->flex_regs[j].value; 1454 break; 1455 } 1456 } 1457 } 1458 1459 reg_state[state_offset] = mmio; 1460 reg_state[state_offset+1] = value; 1461 } 1462 } 1463 1464 /* 1465 * Same as gen8_update_reg_state_unlocked only through the batchbuffer. This 1466 * is only used by the kernel context. 1467 */ 1468 static int gen8_emit_oa_config(struct drm_i915_gem_request *req, 1469 const struct i915_oa_config *oa_config) 1470 { 1471 struct drm_i915_private *dev_priv = req->i915; 1472 /* The MMIO offsets for Flex EU registers aren't contiguous */ 1473 u32 flex_mmio[] = { 1474 i915_mmio_reg_offset(EU_PERF_CNTL0), 1475 i915_mmio_reg_offset(EU_PERF_CNTL1), 1476 i915_mmio_reg_offset(EU_PERF_CNTL2), 1477 i915_mmio_reg_offset(EU_PERF_CNTL3), 1478 i915_mmio_reg_offset(EU_PERF_CNTL4), 1479 i915_mmio_reg_offset(EU_PERF_CNTL5), 1480 i915_mmio_reg_offset(EU_PERF_CNTL6), 1481 }; 1482 u32 *cs; 1483 int i; 1484 1485 cs = intel_ring_begin(req, ARRAY_SIZE(flex_mmio) * 2 + 4); 1486 if (IS_ERR(cs)) 1487 return PTR_ERR(cs); 1488 1489 *cs++ = MI_LOAD_REGISTER_IMM(ARRAY_SIZE(flex_mmio) + 1); 1490 1491 *cs++ = i915_mmio_reg_offset(GEN8_OACTXCONTROL); 1492 *cs++ = (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | 1493 (dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) | 1494 GEN8_OA_COUNTER_RESUME; 1495 1496 for (i = 0; i < ARRAY_SIZE(flex_mmio); i++) { 1497 u32 mmio = flex_mmio[i]; 1498 1499 /* 1500 * This arbitrary default will select the 'EU FPU0 Pipeline 1501 * Active' event. In the future it's anticipated that there 1502 * will be an explicit 'No Event' we can select, but not 1503 * yet... 1504 */ 1505 u32 value = 0; 1506 1507 if (oa_config) { 1508 u32 j; 1509 1510 for (j = 0; j < oa_config->flex_regs_len; j++) { 1511 if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) { 1512 value = oa_config->flex_regs[j].value; 1513 break; 1514 } 1515 } 1516 } 1517 1518 *cs++ = mmio; 1519 *cs++ = value; 1520 } 1521 1522 *cs++ = MI_NOOP; 1523 intel_ring_advance(req, cs); 1524 1525 return 0; 1526 } 1527 1528 static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_priv, 1529 const struct i915_oa_config *oa_config) 1530 { 1531 struct intel_engine_cs *engine = dev_priv->engine[RCS]; 1532 struct i915_gem_timeline *timeline; 1533 struct drm_i915_gem_request *req; 1534 int ret; 1535 1536 lockdep_assert_held(&dev_priv->drm.struct_mutex); 1537 1538 i915_gem_retire_requests(dev_priv); 1539 1540 req = i915_gem_request_alloc(engine, dev_priv->kernel_context); 1541 if (IS_ERR(req)) 1542 return PTR_ERR(req); 1543 1544 ret = gen8_emit_oa_config(req, oa_config); 1545 if (ret) { 1546 i915_add_request(req); 1547 return ret; 1548 } 1549 1550 /* Queue this switch after all other activity */ 1551 list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { 1552 struct drm_i915_gem_request *prev; 1553 struct intel_timeline *tl; 1554 1555 tl = &timeline->engine[engine->id]; 1556 prev = i915_gem_active_raw(&tl->last_request, 1557 &dev_priv->drm.struct_mutex); 1558 if (prev) 1559 i915_sw_fence_await_sw_fence_gfp(&req->submit, 1560 &prev->submit, 1561 GFP_KERNEL); 1562 } 1563 1564 ret = i915_switch_context(req); 1565 i915_add_request(req); 1566 1567 return ret; 1568 } 1569 1570 /* 1571 * Manages updating the per-context aspects of the OA stream 1572 * configuration across all contexts. 1573 * 1574 * The awkward consideration here is that OACTXCONTROL controls the 1575 * exponent for periodic sampling which is primarily used for system 1576 * wide profiling where we'd like a consistent sampling period even in 1577 * the face of context switches. 1578 * 1579 * Our approach of updating the register state context (as opposed to 1580 * say using a workaround batch buffer) ensures that the hardware 1581 * won't automatically reload an out-of-date timer exponent even 1582 * transiently before a WA BB could be parsed. 1583 * 1584 * This function needs to: 1585 * - Ensure the currently running context's per-context OA state is 1586 * updated 1587 * - Ensure that all existing contexts will have the correct per-context 1588 * OA state if they are scheduled for use. 1589 * - Ensure any new contexts will be initialized with the correct 1590 * per-context OA state. 1591 * 1592 * Note: it's only the RCS/Render context that has any OA state. 1593 */ 1594 static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, 1595 const struct i915_oa_config *oa_config) 1596 { 1597 struct i915_gem_context *ctx; 1598 int ret; 1599 unsigned int wait_flags = I915_WAIT_LOCKED; 1600 1601 lockdep_assert_held(&dev_priv->drm.struct_mutex); 1602 1603 /* Switch away from any user context. */ 1604 ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config); 1605 if (ret) 1606 goto out; 1607 1608 /* 1609 * The OA register config is setup through the context image. This image 1610 * might be written to by the GPU on context switch (in particular on 1611 * lite-restore). This means we can't safely update a context's image, 1612 * if this context is scheduled/submitted to run on the GPU. 1613 * 1614 * We could emit the OA register config through the batch buffer but 1615 * this might leave small interval of time where the OA unit is 1616 * configured at an invalid sampling period. 1617 * 1618 * So far the best way to work around this issue seems to be draining 1619 * the GPU from any submitted work. 1620 */ 1621 ret = i915_gem_wait_for_idle(dev_priv, wait_flags); 1622 if (ret) 1623 goto out; 1624 1625 /* Update all contexts now that we've stalled the submission. */ 1626 list_for_each_entry(ctx, &dev_priv->contexts.list, link) { 1627 struct intel_context *ce = &ctx->engine[RCS]; 1628 u32 *regs; 1629 1630 /* OA settings will be set upon first use */ 1631 if (!ce->state) 1632 continue; 1633 1634 regs = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB); 1635 if (IS_ERR(regs)) { 1636 ret = PTR_ERR(regs); 1637 goto out; 1638 } 1639 1640 ce->state->obj->mm.dirty = true; 1641 regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs); 1642 1643 gen8_update_reg_state_unlocked(ctx, regs, oa_config); 1644 1645 i915_gem_object_unpin_map(ce->state->obj); 1646 } 1647 1648 out: 1649 return ret; 1650 } 1651 1652 static int gen8_enable_metric_set(struct drm_i915_private *dev_priv, 1653 const struct i915_oa_config *oa_config) 1654 { 1655 int ret; 1656 1657 /* 1658 * We disable slice/unslice clock ratio change reports on SKL since 1659 * they are too noisy. The HW generates a lot of redundant reports 1660 * where the ratio hasn't really changed causing a lot of redundant 1661 * work to processes and increasing the chances we'll hit buffer 1662 * overruns. 1663 * 1664 * Although we don't currently use the 'disable overrun' OABUFFER 1665 * feature it's worth noting that clock ratio reports have to be 1666 * disabled before considering to use that feature since the HW doesn't 1667 * correctly block these reports. 1668 * 1669 * Currently none of the high-level metrics we have depend on knowing 1670 * this ratio to normalize. 1671 * 1672 * Note: This register is not power context saved and restored, but 1673 * that's OK considering that we disable RC6 while the OA unit is 1674 * enabled. 1675 * 1676 * The _INCLUDE_CLK_RATIO bit allows the slice/unslice frequency to 1677 * be read back from automatically triggered reports, as part of the 1678 * RPT_ID field. 1679 */ 1680 if (IS_GEN9(dev_priv)) { 1681 I915_WRITE(GEN8_OA_DEBUG, 1682 _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | 1683 GEN9_OA_DEBUG_INCLUDE_CLK_RATIO)); 1684 } 1685 1686 /* 1687 * Update all contexts prior writing the mux configurations as we need 1688 * to make sure all slices/subslices are ON before writing to NOA 1689 * registers. 1690 */ 1691 ret = gen8_configure_all_contexts(dev_priv, oa_config); 1692 if (ret) 1693 return ret; 1694 1695 config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); 1696 1697 config_oa_regs(dev_priv, oa_config->b_counter_regs, 1698 oa_config->b_counter_regs_len); 1699 1700 return 0; 1701 } 1702 1703 static void gen8_disable_metric_set(struct drm_i915_private *dev_priv) 1704 { 1705 /* Reset all contexts' slices/subslices configurations. */ 1706 gen8_configure_all_contexts(dev_priv, NULL); 1707 1708 I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & 1709 ~GT_NOA_ENABLE)); 1710 1711 } 1712 1713 static void gen7_oa_enable(struct drm_i915_private *dev_priv) 1714 { 1715 /* 1716 * Reset buf pointers so we don't forward reports from before now. 1717 * 1718 * Think carefully if considering trying to avoid this, since it 1719 * also ensures status flags and the buffer itself are cleared 1720 * in error paths, and we have checks for invalid reports based 1721 * on the assumption that certain fields are written to zeroed 1722 * memory which this helps maintains. 1723 */ 1724 gen7_init_oa_buffer(dev_priv); 1725 1726 if (dev_priv->perf.oa.exclusive_stream->enabled) { 1727 struct i915_gem_context *ctx = 1728 dev_priv->perf.oa.exclusive_stream->ctx; 1729 u32 ctx_id = dev_priv->perf.oa.specific_ctx_id; 1730 1731 bool periodic = dev_priv->perf.oa.periodic; 1732 u32 period_exponent = dev_priv->perf.oa.period_exponent; 1733 u32 report_format = dev_priv->perf.oa.oa_buffer.format; 1734 1735 I915_WRITE(GEN7_OACONTROL, 1736 (ctx_id & GEN7_OACONTROL_CTX_MASK) | 1737 (period_exponent << 1738 GEN7_OACONTROL_TIMER_PERIOD_SHIFT) | 1739 (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) | 1740 (report_format << GEN7_OACONTROL_FORMAT_SHIFT) | 1741 (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) | 1742 GEN7_OACONTROL_ENABLE); 1743 } else 1744 I915_WRITE(GEN7_OACONTROL, 0); 1745 } 1746 1747 static void gen8_oa_enable(struct drm_i915_private *dev_priv) 1748 { 1749 u32 report_format = dev_priv->perf.oa.oa_buffer.format; 1750 1751 /* 1752 * Reset buf pointers so we don't forward reports from before now. 1753 * 1754 * Think carefully if considering trying to avoid this, since it 1755 * also ensures status flags and the buffer itself are cleared 1756 * in error paths, and we have checks for invalid reports based 1757 * on the assumption that certain fields are written to zeroed 1758 * memory which this helps maintains. 1759 */ 1760 gen8_init_oa_buffer(dev_priv); 1761 1762 /* 1763 * Note: we don't rely on the hardware to perform single context 1764 * filtering and instead filter on the cpu based on the context-id 1765 * field of reports 1766 */ 1767 I915_WRITE(GEN8_OACONTROL, (report_format << 1768 GEN8_OA_REPORT_FORMAT_SHIFT) | 1769 GEN8_OA_COUNTER_ENABLE); 1770 } 1771 1772 /** 1773 * i915_oa_stream_enable - handle `I915_PERF_IOCTL_ENABLE` for OA stream 1774 * @stream: An i915 perf stream opened for OA metrics 1775 * 1776 * [Re]enables hardware periodic sampling according to the period configured 1777 * when opening the stream. This also starts a hrtimer that will periodically 1778 * check for data in the circular OA buffer for notifying userspace (e.g. 1779 * during a read() or poll()). 1780 */ 1781 static void i915_oa_stream_enable(struct i915_perf_stream *stream) 1782 { 1783 struct drm_i915_private *dev_priv = stream->dev_priv; 1784 1785 dev_priv->perf.oa.ops.oa_enable(dev_priv); 1786 1787 if (dev_priv->perf.oa.periodic) 1788 hrtimer_start(&dev_priv->perf.oa.poll_check_timer, 1789 ns_to_ktime(POLL_PERIOD), 1790 HRTIMER_MODE_REL_PINNED); 1791 } 1792 1793 static void gen7_oa_disable(struct drm_i915_private *dev_priv) 1794 { 1795 I915_WRITE(GEN7_OACONTROL, 0); 1796 } 1797 1798 static void gen8_oa_disable(struct drm_i915_private *dev_priv) 1799 { 1800 I915_WRITE(GEN8_OACONTROL, 0); 1801 } 1802 1803 /** 1804 * i915_oa_stream_disable - handle `I915_PERF_IOCTL_DISABLE` for OA stream 1805 * @stream: An i915 perf stream opened for OA metrics 1806 * 1807 * Stops the OA unit from periodically writing counter reports into the 1808 * circular OA buffer. This also stops the hrtimer that periodically checks for 1809 * data in the circular OA buffer, for notifying userspace. 1810 */ 1811 static void i915_oa_stream_disable(struct i915_perf_stream *stream) 1812 { 1813 struct drm_i915_private *dev_priv = stream->dev_priv; 1814 1815 dev_priv->perf.oa.ops.oa_disable(dev_priv); 1816 1817 if (dev_priv->perf.oa.periodic) 1818 hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer); 1819 } 1820 1821 static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) 1822 { 1823 return div_u64(1000000000ULL * (2ULL << exponent), 1824 dev_priv->perf.oa.timestamp_frequency); 1825 } 1826 1827 static const struct i915_perf_stream_ops i915_oa_stream_ops = { 1828 .destroy = i915_oa_stream_destroy, 1829 .enable = i915_oa_stream_enable, 1830 .disable = i915_oa_stream_disable, 1831 .wait_unlocked = i915_oa_wait_unlocked, 1832 .poll_wait = i915_oa_poll_wait, 1833 .read = i915_oa_read, 1834 }; 1835 1836 /** 1837 * i915_oa_stream_init - validate combined props for OA stream and init 1838 * @stream: An i915 perf stream 1839 * @param: The open parameters passed to `DRM_I915_PERF_OPEN` 1840 * @props: The property state that configures stream (individually validated) 1841 * 1842 * While read_properties_unlocked() validates properties in isolation it 1843 * doesn't ensure that the combination necessarily makes sense. 1844 * 1845 * At this point it has been determined that userspace wants a stream of 1846 * OA metrics, but still we need to further validate the combined 1847 * properties are OK. 1848 * 1849 * If the configuration makes sense then we can allocate memory for 1850 * a circular OA buffer and apply the requested metric set configuration. 1851 * 1852 * Returns: zero on success or a negative error code. 1853 */ 1854 static int i915_oa_stream_init(struct i915_perf_stream *stream, 1855 struct drm_i915_perf_open_param *param, 1856 struct perf_open_properties *props) 1857 { 1858 struct drm_i915_private *dev_priv = stream->dev_priv; 1859 int format_size; 1860 int ret; 1861 1862 /* If the sysfs metrics/ directory wasn't registered for some 1863 * reason then don't let userspace try their luck with config 1864 * IDs 1865 */ 1866 if (!dev_priv->perf.metrics_kobj) { 1867 DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); 1868 return -EINVAL; 1869 } 1870 1871 if (!(props->sample_flags & SAMPLE_OA_REPORT)) { 1872 DRM_DEBUG("Only OA report sampling supported\n"); 1873 return -EINVAL; 1874 } 1875 1876 if (!dev_priv->perf.oa.ops.init_oa_buffer) { 1877 DRM_DEBUG("OA unit not supported\n"); 1878 return -ENODEV; 1879 } 1880 1881 /* To avoid the complexity of having to accurately filter 1882 * counter reports and marshal to the appropriate client 1883 * we currently only allow exclusive access 1884 */ 1885 if (dev_priv->perf.oa.exclusive_stream) { 1886 DRM_DEBUG("OA unit already in use\n"); 1887 return -EBUSY; 1888 } 1889 1890 if (!props->oa_format) { 1891 DRM_DEBUG("OA report format not specified\n"); 1892 return -EINVAL; 1893 } 1894 1895 stream->sample_size = sizeof(struct drm_i915_perf_record_header); 1896 1897 format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size; 1898 1899 stream->sample_flags |= SAMPLE_OA_REPORT; 1900 stream->sample_size += format_size; 1901 1902 dev_priv->perf.oa.oa_buffer.format_size = format_size; 1903 if (WARN_ON(dev_priv->perf.oa.oa_buffer.format_size == 0)) 1904 return -EINVAL; 1905 1906 dev_priv->perf.oa.oa_buffer.format = 1907 dev_priv->perf.oa.oa_formats[props->oa_format].format; 1908 1909 dev_priv->perf.oa.periodic = props->oa_periodic; 1910 if (dev_priv->perf.oa.periodic) { 1911 u32 tail; 1912 1913 dev_priv->perf.oa.period_exponent = props->oa_period_exponent; 1914 1915 /* See comment for OA_TAIL_MARGIN_NSEC for details 1916 * about this tail_margin... 1917 */ 1918 tail = div64_u64(OA_TAIL_MARGIN_NSEC, 1919 oa_exponent_to_ns(dev_priv, 1920 props->oa_period_exponent)); 1921 dev_priv->perf.oa.tail_margin = (tail + 1) * format_size; 1922 } 1923 1924 if (stream->ctx) { 1925 ret = oa_get_render_ctx_id(stream); 1926 if (ret) 1927 return ret; 1928 } 1929 1930 ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config); 1931 if (ret) 1932 goto err_config; 1933 1934 /* PRM - observability performance counters: 1935 * 1936 * OACONTROL, performance counter enable, note: 1937 * 1938 * "When this bit is set, in order to have coherent counts, 1939 * RC6 power state and trunk clock gating must be disabled. 1940 * This can be achieved by programming MMIO registers as 1941 * 0xA094=0 and 0xA090[31]=1" 1942 * 1943 * In our case we are expecting that taking pm + FORCEWAKE 1944 * references will effectively disable RC6. 1945 */ 1946 intel_runtime_pm_get(dev_priv); 1947 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 1948 1949 ret = alloc_oa_buffer(dev_priv); 1950 if (ret) 1951 goto err_oa_buf_alloc; 1952 1953 ret = alloc_oa_buffer(dev_priv); 1954 if (ret) 1955 goto err_oa_buf_alloc; 1956 1957 ret = i915_mutex_lock_interruptible(&dev_priv->drm); 1958 if (ret) 1959 goto err_lock; 1960 1961 ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv, 1962 stream->oa_config); 1963 if (ret) 1964 goto err_enable; 1965 1966 stream->ops = &i915_oa_stream_ops; 1967 1968 dev_priv->perf.oa.exclusive_stream = stream; 1969 1970 mutex_unlock(&dev_priv->drm.struct_mutex); 1971 1972 return 0; 1973 1974 err_enable: 1975 dev_priv->perf.oa.ops.disable_metric_set(dev_priv); 1976 mutex_unlock(&dev_priv->drm.struct_mutex); 1977 1978 err_lock: 1979 free_oa_buffer(dev_priv); 1980 1981 err_oa_buf_alloc: 1982 put_oa_config(dev_priv, stream->oa_config); 1983 1984 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 1985 intel_runtime_pm_put(dev_priv); 1986 1987 err_config: 1988 if (stream->ctx) 1989 oa_put_render_ctx_id(stream); 1990 1991 return ret; 1992 } 1993 #endif 1994 1995 void i915_oa_init_reg_state(struct intel_engine_cs *engine, 1996 struct i915_gem_context *ctx, 1997 u32 *reg_state) 1998 { 1999 #if 0 2000 struct drm_i915_private *dev_priv = engine->i915; 2001 struct i915_perf_stream *stream = dev_priv->perf.oa.exclusive_stream; 2002 2003 if (engine->id != RCS) 2004 return; 2005 2006 if (!dev_priv->perf.initialized) 2007 return; 2008 2009 gen8_update_reg_state_unlocked(ctx, reg_state); 2010 #endif 2011 } 2012 2013 #if 0 2014 /** 2015 * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation 2016 * @stream: An i915 perf stream 2017 * @file: An i915 perf stream file 2018 * @buf: destination buffer given by userspace 2019 * @count: the number of bytes userspace wants to read 2020 * @ppos: (inout) file seek position (unused) 2021 * 2022 * Besides wrapping &i915_perf_stream_ops->read this provides a common place to 2023 * ensure that if we've successfully copied any data then reporting that takes 2024 * precedence over any internal error status, so the data isn't lost. 2025 * 2026 * For example ret will be -ENOSPC whenever there is more buffered data than 2027 * can be copied to userspace, but that's only interesting if we weren't able 2028 * to copy some data because it implies the userspace buffer is too small to 2029 * receive a single record (and we never split records). 2030 * 2031 * Another case with ret == -EFAULT is more of a grey area since it would seem 2032 * like bad form for userspace to ask us to overrun its buffer, but the user 2033 * knows best: 2034 * 2035 * http://yarchive.net/comp/linux/partial_reads_writes.html 2036 * 2037 * Returns: The number of bytes copied or a negative error code on failure. 2038 */ 2039 static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream, 2040 struct file *file, 2041 char __user *buf, 2042 size_t count, 2043 loff_t *ppos) 2044 { 2045 /* Note we keep the offset (aka bytes read) separate from any 2046 * error status so that the final check for whether we return 2047 * the bytes read with a higher precedence than any error (see 2048 * comment below) doesn't need to be handled/duplicated in 2049 * stream->ops->read() implementations. 2050 */ 2051 size_t offset = 0; 2052 int ret = stream->ops->read(stream, buf, count, &offset); 2053 2054 return offset ?: (ret ?: -EAGAIN); 2055 } 2056 2057 /** 2058 * i915_perf_read - handles read() FOP for i915 perf stream FDs 2059 * @file: An i915 perf stream file 2060 * @buf: destination buffer given by userspace 2061 * @count: the number of bytes userspace wants to read 2062 * @ppos: (inout) file seek position (unused) 2063 * 2064 * The entry point for handling a read() on a stream file descriptor from 2065 * userspace. Most of the work is left to the i915_perf_read_locked() and 2066 * &i915_perf_stream_ops->read but to save having stream implementations (of 2067 * which we might have multiple later) we handle blocking read here. 2068 * 2069 * We can also consistently treat trying to read from a disabled stream 2070 * as an IO error so implementations can assume the stream is enabled 2071 * while reading. 2072 * 2073 * Returns: The number of bytes copied or a negative error code on failure. 2074 */ 2075 static ssize_t i915_perf_read(struct file *file, 2076 char __user *buf, 2077 size_t count, 2078 loff_t *ppos) 2079 { 2080 struct i915_perf_stream *stream = file->private_data; 2081 struct drm_i915_private *dev_priv = stream->dev_priv; 2082 ssize_t ret; 2083 2084 /* To ensure it's handled consistently we simply treat all reads of a 2085 * disabled stream as an error. In particular it might otherwise lead 2086 * to a deadlock for blocking file descriptors... 2087 */ 2088 if (!stream->enabled) 2089 return -EIO; 2090 2091 if (!(file->f_flags & O_NONBLOCK)) { 2092 /* There's the small chance of false positives from 2093 * stream->ops->wait_unlocked. 2094 * 2095 * E.g. with single context filtering since we only wait until 2096 * oabuffer has >= 1 report we don't immediately know whether 2097 * any reports really belong to the current context 2098 */ 2099 do { 2100 ret = stream->ops->wait_unlocked(stream); 2101 if (ret) 2102 return ret; 2103 2104 mutex_lock(&dev_priv->perf.lock); 2105 ret = i915_perf_read_locked(stream, file, 2106 buf, count, ppos); 2107 mutex_unlock(&dev_priv->perf.lock); 2108 } while (ret == -EAGAIN); 2109 } else { 2110 mutex_lock(&dev_priv->perf.lock); 2111 ret = i915_perf_read_locked(stream, file, buf, count, ppos); 2112 mutex_unlock(&dev_priv->perf.lock); 2113 } 2114 2115 if (ret >= 0) { 2116 /* Maybe make ->pollin per-stream state if we support multiple 2117 * concurrent streams in the future. 2118 */ 2119 dev_priv->perf.oa.pollin = false; 2120 } 2121 2122 return ret; 2123 } 2124 2125 static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) 2126 { 2127 struct drm_i915_private *dev_priv = 2128 container_of(hrtimer, typeof(*dev_priv), 2129 perf.oa.poll_check_timer); 2130 2131 if (!dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv)) { 2132 dev_priv->perf.oa.pollin = true; 2133 wake_up(&dev_priv->perf.oa.poll_wq); 2134 } 2135 2136 hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD)); 2137 2138 return HRTIMER_RESTART; 2139 } 2140 2141 /** 2142 * i915_perf_poll_locked - poll_wait() with a suitable wait queue for stream 2143 * @dev_priv: i915 device instance 2144 * @stream: An i915 perf stream 2145 * @file: An i915 perf stream file 2146 * @wait: poll() state table 2147 * 2148 * For handling userspace polling on an i915 perf stream, this calls through to 2149 * &i915_perf_stream_ops->poll_wait to call poll_wait() with a wait queue that 2150 * will be woken for new stream data. 2151 * 2152 * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize 2153 * with any non-file-operation driver hooks. 2154 * 2155 * Returns: any poll events that are ready without sleeping 2156 */ 2157 static unsigned int i915_perf_poll_locked(struct drm_i915_private *dev_priv, 2158 struct i915_perf_stream *stream, 2159 struct file *file, 2160 poll_table *wait) 2161 { 2162 unsigned int events = 0; 2163 2164 stream->ops->poll_wait(stream, file, wait); 2165 2166 /* Note: we don't explicitly check whether there's something to read 2167 * here since this path may be very hot depending on what else 2168 * userspace is polling, or on the timeout in use. We rely solely on 2169 * the hrtimer/oa_poll_check_timer_cb to notify us when there are 2170 * samples to read. 2171 */ 2172 if (dev_priv->perf.oa.pollin) 2173 events |= POLLIN; 2174 2175 return events; 2176 } 2177 2178 /** 2179 * i915_perf_poll - call poll_wait() with a suitable wait queue for stream 2180 * @file: An i915 perf stream file 2181 * @wait: poll() state table 2182 * 2183 * For handling userspace polling on an i915 perf stream, this ensures 2184 * poll_wait() gets called with a wait queue that will be woken for new stream 2185 * data. 2186 * 2187 * Note: Implementation deferred to i915_perf_poll_locked() 2188 * 2189 * Returns: any poll events that are ready without sleeping 2190 */ 2191 static unsigned int i915_perf_poll(struct file *file, poll_table *wait) 2192 { 2193 struct i915_perf_stream *stream = file->private_data; 2194 struct drm_i915_private *dev_priv = stream->dev_priv; 2195 int ret; 2196 2197 mutex_lock(&dev_priv->perf.lock); 2198 ret = i915_perf_poll_locked(dev_priv, stream, file, wait); 2199 mutex_unlock(&dev_priv->perf.lock); 2200 2201 return ret; 2202 } 2203 2204 /** 2205 * i915_perf_enable_locked - handle `I915_PERF_IOCTL_ENABLE` ioctl 2206 * @stream: A disabled i915 perf stream 2207 * 2208 * [Re]enables the associated capture of data for this stream. 2209 * 2210 * If a stream was previously enabled then there's currently no intention 2211 * to provide userspace any guarantee about the preservation of previously 2212 * buffered data. 2213 */ 2214 static void i915_perf_enable_locked(struct i915_perf_stream *stream) 2215 { 2216 if (stream->enabled) 2217 return; 2218 2219 /* Allow stream->ops->enable() to refer to this */ 2220 stream->enabled = true; 2221 2222 if (stream->ops->enable) 2223 stream->ops->enable(stream); 2224 } 2225 2226 /** 2227 * i915_perf_disable_locked - handle `I915_PERF_IOCTL_DISABLE` ioctl 2228 * @stream: An enabled i915 perf stream 2229 * 2230 * Disables the associated capture of data for this stream. 2231 * 2232 * The intention is that disabling an re-enabling a stream will ideally be 2233 * cheaper than destroying and re-opening a stream with the same configuration, 2234 * though there are no formal guarantees about what state or buffered data 2235 * must be retained between disabling and re-enabling a stream. 2236 * 2237 * Note: while a stream is disabled it's considered an error for userspace 2238 * to attempt to read from the stream (-EIO). 2239 */ 2240 static void i915_perf_disable_locked(struct i915_perf_stream *stream) 2241 { 2242 if (!stream->enabled) 2243 return; 2244 2245 /* Allow stream->ops->disable() to refer to this */ 2246 stream->enabled = false; 2247 2248 if (stream->ops->disable) 2249 stream->ops->disable(stream); 2250 } 2251 2252 /** 2253 * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs 2254 * @stream: An i915 perf stream 2255 * @cmd: the ioctl request 2256 * @arg: the ioctl data 2257 * 2258 * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize 2259 * with any non-file-operation driver hooks. 2260 * 2261 * Returns: zero on success or a negative error code. Returns -EINVAL for 2262 * an unknown ioctl request. 2263 */ 2264 static long i915_perf_ioctl_locked(struct i915_perf_stream *stream, 2265 unsigned int cmd, 2266 unsigned long arg) 2267 { 2268 switch (cmd) { 2269 case I915_PERF_IOCTL_ENABLE: 2270 i915_perf_enable_locked(stream); 2271 return 0; 2272 case I915_PERF_IOCTL_DISABLE: 2273 i915_perf_disable_locked(stream); 2274 return 0; 2275 } 2276 2277 return -EINVAL; 2278 } 2279 2280 /** 2281 * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs 2282 * @file: An i915 perf stream file 2283 * @cmd: the ioctl request 2284 * @arg: the ioctl data 2285 * 2286 * Implementation deferred to i915_perf_ioctl_locked(). 2287 * 2288 * Returns: zero on success or a negative error code. Returns -EINVAL for 2289 * an unknown ioctl request. 2290 */ 2291 static long i915_perf_ioctl(struct file *file, 2292 unsigned int cmd, 2293 unsigned long arg) 2294 { 2295 struct i915_perf_stream *stream = file->private_data; 2296 struct drm_i915_private *dev_priv = stream->dev_priv; 2297 long ret; 2298 2299 mutex_lock(&dev_priv->perf.lock); 2300 ret = i915_perf_ioctl_locked(stream, cmd, arg); 2301 mutex_unlock(&dev_priv->perf.lock); 2302 2303 return ret; 2304 } 2305 2306 /** 2307 * i915_perf_destroy_locked - destroy an i915 perf stream 2308 * @stream: An i915 perf stream 2309 * 2310 * Frees all resources associated with the given i915 perf @stream, disabling 2311 * any associated data capture in the process. 2312 * 2313 * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize 2314 * with any non-file-operation driver hooks. 2315 */ 2316 static void i915_perf_destroy_locked(struct i915_perf_stream *stream) 2317 { 2318 if (stream->enabled) 2319 i915_perf_disable_locked(stream); 2320 2321 if (stream->ops->destroy) 2322 stream->ops->destroy(stream); 2323 2324 list_del(&stream->link); 2325 2326 if (stream->ctx) 2327 i915_gem_context_put(stream->ctx); 2328 2329 kfree(stream); 2330 } 2331 2332 /** 2333 * i915_perf_release - handles userspace close() of a stream file 2334 * @inode: anonymous inode associated with file 2335 * @file: An i915 perf stream file 2336 * 2337 * Cleans up any resources associated with an open i915 perf stream file. 2338 * 2339 * NB: close() can't really fail from the userspace point of view. 2340 * 2341 * Returns: zero on success or a negative error code. 2342 */ 2343 static int i915_perf_release(struct inode *inode, struct file *file) 2344 { 2345 struct i915_perf_stream *stream = file->private_data; 2346 struct drm_i915_private *dev_priv = stream->dev_priv; 2347 2348 mutex_lock(&dev_priv->perf.lock); 2349 i915_perf_destroy_locked(stream); 2350 mutex_unlock(&dev_priv->perf.lock); 2351 2352 return 0; 2353 } 2354 2355 2356 static const struct file_operations fops = { 2357 .owner = THIS_MODULE, 2358 .llseek = no_llseek, 2359 .release = i915_perf_release, 2360 .poll = i915_perf_poll, 2361 .read = i915_perf_read, 2362 .unlocked_ioctl = i915_perf_ioctl, 2363 /* Our ioctl have no arguments, so it's safe to use the same function 2364 * to handle 32bits compatibility. 2365 */ 2366 .compat_ioctl = i915_perf_ioctl, 2367 }; 2368 2369 2370 /** 2371 * i915_perf_open_ioctl_locked - DRM ioctl() for userspace to open a stream FD 2372 * @dev_priv: i915 device instance 2373 * @param: The open parameters passed to 'DRM_I915_PERF_OPEN` 2374 * @props: individually validated u64 property value pairs 2375 * @file: drm file 2376 * 2377 * See i915_perf_ioctl_open() for interface details. 2378 * 2379 * Implements further stream config validation and stream initialization on 2380 * behalf of i915_perf_open_ioctl() with the &drm_i915_private->perf.lock mutex 2381 * taken to serialize with any non-file-operation driver hooks. 2382 * 2383 * Note: at this point the @props have only been validated in isolation and 2384 * it's still necessary to validate that the combination of properties makes 2385 * sense. 2386 * 2387 * In the case where userspace is interested in OA unit metrics then further 2388 * config validation and stream initialization details will be handled by 2389 * i915_oa_stream_init(). The code here should only validate config state that 2390 * will be relevant to all stream types / backends. 2391 * 2392 * Returns: zero on success or a negative error code. 2393 */ 2394 static int 2395 i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, 2396 struct drm_i915_perf_open_param *param, 2397 struct perf_open_properties *props, 2398 struct drm_file *file) 2399 { 2400 struct i915_gem_context *specific_ctx = NULL; 2401 struct i915_perf_stream *stream = NULL; 2402 unsigned long f_flags = 0; 2403 bool privileged_op = true; 2404 int stream_fd; 2405 int ret; 2406 2407 if (props->single_context) { 2408 u32 ctx_handle = props->ctx_handle; 2409 struct drm_i915_file_private *file_priv = file->driver_priv; 2410 2411 specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle); 2412 if (!specific_ctx) { 2413 DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n", 2414 ctx_handle); 2415 ret = -ENOENT; 2416 goto err; 2417 } 2418 } 2419 2420 /* 2421 * On Haswell the OA unit supports clock gating off for a specific 2422 * context and in this mode there's no visibility of metrics for the 2423 * rest of the system, which we consider acceptable for a 2424 * non-privileged client. 2425 * 2426 * For Gen8+ the OA unit no longer supports clock gating off for a 2427 * specific context and the kernel can't securely stop the counters 2428 * from updating as system-wide / global values. Even though we can 2429 * filter reports based on the included context ID we can't block 2430 * clients from seeing the raw / global counter values via 2431 * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to 2432 * enable the OA unit by default. 2433 */ 2434 if (IS_HASWELL(dev_priv) && specific_ctx) 2435 privileged_op = false; 2436 2437 /* Similar to perf's kernel.perf_paranoid_cpu sysctl option 2438 * we check a dev.i915.perf_stream_paranoid sysctl option 2439 * to determine if it's ok to access system wide OA counters 2440 * without CAP_SYS_ADMIN privileges. 2441 */ 2442 if (privileged_op && 2443 i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { 2444 DRM_DEBUG("Insufficient privileges to open system-wide i915 perf stream\n"); 2445 ret = -EACCES; 2446 goto err_ctx; 2447 } 2448 2449 stream = kzalloc(sizeof(*stream), GFP_KERNEL); 2450 if (!stream) { 2451 ret = -ENOMEM; 2452 goto err_ctx; 2453 } 2454 2455 stream->dev_priv = dev_priv; 2456 stream->ctx = specific_ctx; 2457 2458 ret = i915_oa_stream_init(stream, param, props); 2459 if (ret) 2460 goto err_alloc; 2461 2462 /* we avoid simply assigning stream->sample_flags = props->sample_flags 2463 * to have _stream_init check the combination of sample flags more 2464 * thoroughly, but still this is the expected result at this point. 2465 */ 2466 if (WARN_ON(stream->sample_flags != props->sample_flags)) { 2467 ret = -ENODEV; 2468 goto err_flags; 2469 } 2470 2471 list_add(&stream->link, &dev_priv->perf.streams); 2472 2473 if (param->flags & I915_PERF_FLAG_FD_CLOEXEC) 2474 f_flags |= O_CLOEXEC; 2475 if (param->flags & I915_PERF_FLAG_FD_NONBLOCK) 2476 f_flags |= O_NONBLOCK; 2477 2478 stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags); 2479 if (stream_fd < 0) { 2480 ret = stream_fd; 2481 goto err_open; 2482 } 2483 2484 if (!(param->flags & I915_PERF_FLAG_DISABLED)) 2485 i915_perf_enable_locked(stream); 2486 2487 return stream_fd; 2488 2489 err_open: 2490 list_del(&stream->link); 2491 err_flags: 2492 if (stream->ops->destroy) 2493 stream->ops->destroy(stream); 2494 err_alloc: 2495 kfree(stream); 2496 err_ctx: 2497 if (specific_ctx) 2498 i915_gem_context_put(specific_ctx); 2499 err: 2500 return ret; 2501 } 2502 2503 static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) 2504 { 2505 return div_u64(1000000000ULL * (2ULL << exponent), 2506 dev_priv->perf.oa.timestamp_frequency); 2507 } 2508 2509 /** 2510 * read_properties_unlocked - validate + copy userspace stream open properties 2511 * @dev_priv: i915 device instance 2512 * @uprops: The array of u64 key value pairs given by userspace 2513 * @n_props: The number of key value pairs expected in @uprops 2514 * @props: The stream configuration built up while validating properties 2515 * 2516 * Note this function only validates properties in isolation it doesn't 2517 * validate that the combination of properties makes sense or that all 2518 * properties necessary for a particular kind of stream have been set. 2519 * 2520 * Note that there currently aren't any ordering requirements for properties so 2521 * we shouldn't validate or assume anything about ordering here. This doesn't 2522 * rule out defining new properties with ordering requirements in the future. 2523 */ 2524 static int read_properties_unlocked(struct drm_i915_private *dev_priv, 2525 u64 __user *uprops, 2526 u32 n_props, 2527 struct perf_open_properties *props) 2528 { 2529 u64 __user *uprop = uprops; 2530 u32 i; 2531 2532 memset(props, 0, sizeof(struct perf_open_properties)); 2533 2534 if (!n_props) { 2535 DRM_DEBUG("No i915 perf properties given\n"); 2536 return -EINVAL; 2537 } 2538 2539 /* Considering that ID = 0 is reserved and assuming that we don't 2540 * (currently) expect any configurations to ever specify duplicate 2541 * values for a particular property ID then the last _PROP_MAX value is 2542 * one greater than the maximum number of properties we expect to get 2543 * from userspace. 2544 */ 2545 if (n_props >= DRM_I915_PERF_PROP_MAX) { 2546 DRM_DEBUG("More i915 perf properties specified than exist\n"); 2547 return -EINVAL; 2548 } 2549 2550 for (i = 0; i < n_props; i++) { 2551 u64 oa_period, oa_freq_hz; 2552 u64 id, value; 2553 int ret; 2554 2555 ret = get_user(id, uprop); 2556 if (ret) 2557 return ret; 2558 2559 ret = get_user(value, uprop + 1); 2560 if (ret) 2561 return ret; 2562 2563 if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) { 2564 DRM_DEBUG("Unknown i915 perf property ID\n"); 2565 return -EINVAL; 2566 } 2567 2568 switch ((enum drm_i915_perf_property_id)id) { 2569 case DRM_I915_PERF_PROP_CTX_HANDLE: 2570 props->single_context = 1; 2571 props->ctx_handle = value; 2572 break; 2573 case DRM_I915_PERF_PROP_SAMPLE_OA: 2574 props->sample_flags |= SAMPLE_OA_REPORT; 2575 break; 2576 case DRM_I915_PERF_PROP_OA_METRICS_SET: 2577 if (value == 0) { 2578 DRM_DEBUG("Unknown OA metric set ID\n"); 2579 return -EINVAL; 2580 } 2581 props->metrics_set = value; 2582 break; 2583 case DRM_I915_PERF_PROP_OA_FORMAT: 2584 if (value == 0 || value >= I915_OA_FORMAT_MAX) { 2585 DRM_DEBUG("Invalid OA report format\n"); 2586 return -EINVAL; 2587 } 2588 if (!dev_priv->perf.oa.oa_formats[value].size) { 2589 DRM_DEBUG("Invalid OA report format\n"); 2590 return -EINVAL; 2591 } 2592 props->oa_format = value; 2593 break; 2594 case DRM_I915_PERF_PROP_OA_EXPONENT: 2595 if (value > OA_EXPONENT_MAX) { 2596 DRM_DEBUG("OA timer exponent too high (> %u)\n", 2597 OA_EXPONENT_MAX); 2598 return -EINVAL; 2599 } 2600 2601 /* Theoretically we can program the OA unit to sample 2602 * e.g. every 160ns for HSW, 167ns for BDW/SKL or 104ns 2603 * for BXT. We don't allow such high sampling 2604 * frequencies by default unless root. 2605 */ 2606 2607 BUILD_BUG_ON(sizeof(oa_period) != 8); 2608 oa_period = oa_exponent_to_ns(dev_priv, value); 2609 2610 /* This check is primarily to ensure that oa_period <= 2611 * UINT32_MAX (before passing to do_div which only 2612 * accepts a u32 denominator), but we can also skip 2613 * checking anything < 1Hz which implicitly can't be 2614 * limited via an integer oa_max_sample_rate. 2615 */ 2616 if (oa_period <= NSEC_PER_SEC) { 2617 u64 tmp = NSEC_PER_SEC; 2618 do_div(tmp, oa_period); 2619 oa_freq_hz = tmp; 2620 } else 2621 oa_freq_hz = 0; 2622 2623 if (oa_freq_hz > i915_oa_max_sample_rate && 2624 !capable(CAP_SYS_ADMIN)) { 2625 DRM_DEBUG("OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without root privileges\n", 2626 i915_oa_max_sample_rate); 2627 return -EACCES; 2628 } 2629 2630 props->oa_periodic = true; 2631 props->oa_period_exponent = value; 2632 break; 2633 case DRM_I915_PERF_PROP_MAX: 2634 MISSING_CASE(id); 2635 return -EINVAL; 2636 } 2637 2638 uprop += 2; 2639 } 2640 2641 return 0; 2642 } 2643 #endif 2644 2645 /** 2646 * i915_perf_open_ioctl - DRM ioctl() for userspace to open a stream FD 2647 * @dev: drm device 2648 * @data: ioctl data copied from userspace (unvalidated) 2649 * @file: drm file 2650 * 2651 * Validates the stream open parameters given by userspace including flags 2652 * and an array of u64 key, value pair properties. 2653 * 2654 * Very little is assumed up front about the nature of the stream being 2655 * opened (for instance we don't assume it's for periodic OA unit metrics). An 2656 * i915-perf stream is expected to be a suitable interface for other forms of 2657 * buffered data written by the GPU besides periodic OA metrics. 2658 * 2659 * Note we copy the properties from userspace outside of the i915 perf 2660 * mutex to avoid an awkward lockdep with mmap_sem. 2661 * 2662 * Most of the implementation details are handled by 2663 * i915_perf_open_ioctl_locked() after taking the &drm_i915_private->perf.lock 2664 * mutex for serializing with any non-file-operation driver hooks. 2665 * 2666 * Return: A newly opened i915 Perf stream file descriptor or negative 2667 * error code on failure. 2668 */ 2669 int i915_perf_open_ioctl(struct drm_device *dev, void *data, 2670 struct drm_file *file) 2671 { 2672 #if 0 2673 struct drm_i915_private *dev_priv = dev->dev_private; 2674 struct drm_i915_perf_open_param *param = data; 2675 struct perf_open_properties props; 2676 u32 known_open_flags; 2677 int ret; 2678 2679 if (!dev_priv->perf.initialized) { 2680 #endif 2681 DRM_DEBUG("i915 perf interface not available for this system\n"); 2682 return -ENOTSUPP; 2683 #if 0 2684 } 2685 2686 known_open_flags = I915_PERF_FLAG_FD_CLOEXEC | 2687 I915_PERF_FLAG_FD_NONBLOCK | 2688 I915_PERF_FLAG_DISABLED; 2689 if (param->flags & ~known_open_flags) { 2690 DRM_DEBUG("Unknown drm_i915_perf_open_param flag\n"); 2691 return -EINVAL; 2692 } 2693 2694 ret = read_properties_unlocked(dev_priv, 2695 u64_to_user_ptr(param->properties_ptr), 2696 param->num_properties, 2697 &props); 2698 if (ret) 2699 return ret; 2700 2701 mutex_lock(&dev_priv->perf.lock); 2702 ret = i915_perf_open_ioctl_locked(dev_priv, param, &props, file); 2703 mutex_unlock(&dev_priv->perf.lock); 2704 2705 return ret; 2706 #endif 2707 } 2708 2709 /** 2710 * i915_perf_register - exposes i915-perf to userspace 2711 * @dev_priv: i915 device instance 2712 * 2713 * In particular OA metric sets are advertised under a sysfs metrics/ 2714 * directory allowing userspace to enumerate valid IDs that can be 2715 * used to open an i915-perf stream. 2716 */ 2717 void i915_perf_register(struct drm_i915_private *dev_priv) 2718 { 2719 #if 0 2720 int ret; 2721 2722 if (!dev_priv->perf.initialized) 2723 return; 2724 2725 /* To be sure we're synchronized with an attempted 2726 * i915_perf_open_ioctl(); considering that we register after 2727 * being exposed to userspace. 2728 */ 2729 mutex_lock(&dev_priv->perf.lock); 2730 2731 dev_priv->perf.metrics_kobj = 2732 kobject_create_and_add("metrics", 2733 &dev_priv->drm.primary->kdev->kobj); 2734 if (!dev_priv->perf.metrics_kobj) 2735 goto exit; 2736 2737 sysfs_attr_init(&dev_priv->perf.oa.test_config.sysfs_metric_id.attr); 2738 2739 if (IS_HASWELL(dev_priv)) { 2740 i915_perf_load_test_config_hsw(dev_priv); 2741 } else if (IS_BROADWELL(dev_priv)) { 2742 i915_perf_load_test_config_bdw(dev_priv); 2743 } else if (IS_CHERRYVIEW(dev_priv)) { 2744 i915_perf_load_test_config_chv(dev_priv); 2745 } else if (IS_SKYLAKE(dev_priv)) { 2746 if (IS_SKL_GT2(dev_priv)) 2747 i915_perf_load_test_config_sklgt2(dev_priv); 2748 else if (IS_SKL_GT3(dev_priv)) 2749 i915_perf_load_test_config_sklgt3(dev_priv); 2750 else if (IS_SKL_GT4(dev_priv)) 2751 i915_perf_load_test_config_sklgt4(dev_priv); 2752 } else if (IS_BROXTON(dev_priv)) { 2753 i915_perf_load_test_config_bxt(dev_priv); 2754 } else if (IS_KABYLAKE(dev_priv)) { 2755 if (IS_KBL_GT2(dev_priv)) 2756 i915_perf_load_test_config_kblgt2(dev_priv); 2757 else if (IS_KBL_GT3(dev_priv)) 2758 i915_perf_load_test_config_kblgt3(dev_priv); 2759 } else if (IS_GEMINILAKE(dev_priv)) { 2760 i915_perf_load_test_config_glk(dev_priv); 2761 } else if (IS_COFFEELAKE(dev_priv)) { 2762 if (IS_CFL_GT2(dev_priv)) 2763 i915_perf_load_test_config_cflgt2(dev_priv); 2764 } 2765 2766 if (dev_priv->perf.oa.test_config.id == 0) 2767 goto sysfs_error; 2768 2769 ret = sysfs_create_group(dev_priv->perf.metrics_kobj, 2770 &dev_priv->perf.oa.test_config.sysfs_metric); 2771 if (ret) 2772 goto sysfs_error; 2773 2774 atomic_set(&dev_priv->perf.oa.test_config.ref_count, 1); 2775 2776 goto exit; 2777 2778 sysfs_error: 2779 kobject_put(dev_priv->perf.metrics_kobj); 2780 dev_priv->perf.metrics_kobj = NULL; 2781 2782 exit: 2783 mutex_unlock(&dev_priv->perf.lock); 2784 #endif 2785 } 2786 2787 /** 2788 * i915_perf_unregister - hide i915-perf from userspace 2789 * @dev_priv: i915 device instance 2790 * 2791 * i915-perf state cleanup is split up into an 'unregister' and 2792 * 'deinit' phase where the interface is first hidden from 2793 * userspace by i915_perf_unregister() before cleaning up 2794 * remaining state in i915_perf_fini(). 2795 */ 2796 void i915_perf_unregister(struct drm_i915_private *dev_priv) 2797 { 2798 if (!IS_HASWELL(dev_priv)) 2799 return; 2800 2801 if (!dev_priv->perf.metrics_kobj) 2802 return; 2803 2804 #if 0 2805 i915_perf_unregister_sysfs_hsw(dev_priv); 2806 2807 kobject_put(dev_priv->perf.metrics_kobj); 2808 dev_priv->perf.metrics_kobj = NULL; 2809 #endif 2810 } 2811 2812 #if 0 2813 static struct ctl_table oa_table[] = { 2814 { 2815 .procname = "perf_stream_paranoid", 2816 .data = &i915_perf_stream_paranoid, 2817 .maxlen = sizeof(i915_perf_stream_paranoid), 2818 .mode = 0644, 2819 .proc_handler = proc_dointvec_minmax, 2820 .extra1 = &zero, 2821 .extra2 = &one, 2822 }, 2823 { 2824 .procname = "oa_max_sample_rate", 2825 .data = &i915_oa_max_sample_rate, 2826 .maxlen = sizeof(i915_oa_max_sample_rate), 2827 .mode = 0644, 2828 .proc_handler = proc_dointvec_minmax, 2829 .extra1 = &zero, 2830 .extra2 = &oa_sample_rate_hard_limit, 2831 }, 2832 {} 2833 }; 2834 2835 static struct ctl_table i915_root[] = { 2836 { 2837 .procname = "i915", 2838 .maxlen = 0, 2839 .mode = 0555, 2840 .child = oa_table, 2841 }, 2842 {} 2843 }; 2844 2845 static struct ctl_table dev_root[] = { 2846 { 2847 .procname = "dev", 2848 .maxlen = 0, 2849 .mode = 0555, 2850 .child = i915_root, 2851 }, 2852 {} 2853 }; 2854 #endif 2855 2856 /** 2857 * i915_perf_init - initialize i915-perf state on module load 2858 * @dev_priv: i915 device instance 2859 * 2860 * Initializes i915-perf state without exposing anything to userspace. 2861 * 2862 * Note: i915-perf initialization is split into an 'init' and 'register' 2863 * phase with the i915_perf_register() exposing state to userspace. 2864 */ 2865 void i915_perf_init(struct drm_i915_private *dev_priv) 2866 { 2867 dev_priv->perf.oa.timestamp_frequency = 0; 2868 2869 #if 0 2870 if (IS_HASWELL(dev_priv)) { 2871 dev_priv->perf.oa.ops.is_valid_b_counter_reg = 2872 gen7_is_valid_b_counter_addr; 2873 dev_priv->perf.oa.ops.is_valid_mux_reg = 2874 hsw_is_valid_mux_addr; 2875 dev_priv->perf.oa.ops.is_valid_flex_reg = NULL; 2876 dev_priv->perf.oa.ops.init_oa_buffer = gen7_init_oa_buffer; 2877 dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set; 2878 dev_priv->perf.oa.ops.disable_metric_set = hsw_disable_metric_set; 2879 dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable; 2880 dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable; 2881 dev_priv->perf.oa.ops.read = gen7_oa_read; 2882 dev_priv->perf.oa.ops.oa_hw_tail_read = 2883 gen7_oa_hw_tail_read; 2884 2885 dev_priv->perf.oa.timestamp_frequency = 12500000; 2886 2887 dev_priv->perf.oa.oa_formats = hsw_oa_formats; 2888 } else if (i915_modparams.enable_execlists) { 2889 /* Note: that although we could theoretically also support the 2890 * legacy ringbuffer mode on BDW (and earlier iterations of 2891 * this driver, before upstreaming did this) it didn't seem 2892 * worth the complexity to maintain now that BDW+ enable 2893 * execlist mode by default. 2894 */ 2895 dev_priv->perf.oa.ops.is_valid_b_counter_reg = 2896 gen7_is_valid_b_counter_addr; 2897 dev_priv->perf.oa.ops.is_valid_mux_reg = 2898 gen8_is_valid_mux_addr; 2899 dev_priv->perf.oa.ops.is_valid_flex_reg = 2900 gen8_is_valid_flex_addr; 2901 2902 dev_priv->perf.oa.ops.init_oa_buffer = gen8_init_oa_buffer; 2903 dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set; 2904 dev_priv->perf.oa.ops.disable_metric_set = gen8_disable_metric_set; 2905 dev_priv->perf.oa.ops.oa_enable = gen8_oa_enable; 2906 dev_priv->perf.oa.ops.oa_disable = gen8_oa_disable; 2907 dev_priv->perf.oa.ops.read = gen8_oa_read; 2908 dev_priv->perf.oa.ops.oa_hw_tail_read = gen8_oa_hw_tail_read; 2909 2910 dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats; 2911 2912 if (IS_GEN8(dev_priv)) { 2913 dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120; 2914 dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce; 2915 2916 dev_priv->perf.oa.timestamp_frequency = 12500000; 2917 2918 dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25); 2919 if (IS_CHERRYVIEW(dev_priv)) { 2920 dev_priv->perf.oa.ops.is_valid_mux_reg = 2921 chv_is_valid_mux_addr; 2922 } 2923 } else if (IS_GEN9(dev_priv)) { 2924 dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; 2925 dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; 2926 2927 dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); 2928 2929 switch (dev_priv->info.platform) { 2930 case INTEL_BROXTON: 2931 case INTEL_GEMINILAKE: 2932 dev_priv->perf.oa.timestamp_frequency = 19200000; 2933 break; 2934 case INTEL_SKYLAKE: 2935 case INTEL_KABYLAKE: 2936 case INTEL_COFFEELAKE: 2937 dev_priv->perf.oa.timestamp_frequency = 12000000; 2938 break; 2939 default: 2940 /* Leave timestamp_frequency to 0 so we can 2941 * detect unsupported platforms. 2942 */ 2943 break; 2944 } 2945 } 2946 } 2947 2948 if (dev_priv->perf.oa.timestamp_frequency) { 2949 hrtimer_init(&dev_priv->perf.oa.poll_check_timer, 2950 CLOCK_MONOTONIC, HRTIMER_MODE_REL); 2951 dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb; 2952 init_waitqueue_head(&dev_priv->perf.oa.poll_wq); 2953 2954 INIT_LIST_HEAD(&dev_priv->perf.streams); 2955 mutex_init(&dev_priv->perf.lock); 2956 spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock); 2957 2958 oa_sample_rate_hard_limit = 2959 dev_priv->perf.oa.timestamp_frequency / 2; 2960 dev_priv->perf.sysctl_header = register_sysctl_table(dev_root); 2961 2962 mutex_init(&dev_priv->perf.metrics_lock); 2963 idr_init(&dev_priv->perf.metrics_idr); 2964 2965 dev_priv->perf.initialized = true; 2966 } 2967 #endif 2968 } 2969 2970 /** 2971 * i915_perf_fini - Counter part to i915_perf_init() 2972 * @dev_priv: i915 device instance 2973 */ 2974 void i915_perf_fini(struct drm_i915_private *dev_priv) 2975 { 2976 if (!dev_priv->perf.initialized) 2977 return; 2978 2979 #if 0 2980 unregister_sysctl_table(dev_priv->perf.sysctl_header); 2981 2982 memset(&dev_priv->perf.oa.ops, 0, sizeof(dev_priv->perf.oa.ops)); 2983 2984 #endif 2985 dev_priv->perf.initialized = false; 2986 } 2987