1 /* 2 * Copyright © 2020-2021 Collabora, Ltd. 3 * Author: Antonio Caggiano <antonio.caggiano@collabora.com> 4 * 5 * SPDX-License-Identifier: MIT 6 */ 7 8 #pragma once 9 10 #include <pps/pps_driver.h> 11 12 #include "intel_pps_perf.h" 13 14 namespace pps 15 { 16 /// Timestamp correlation between CPU/GPU. 17 struct TimestampCorrelation { 18 /// In CLOCK_MONOTONIC 19 uint64_t cpu_timestamp; 20 21 /// Engine timestamp associated with the OA unit 22 uint64_t gpu_timestamp; 23 }; 24 25 /// @brief Variable length sequence of bytes generated by Intel Obstervation Architecture (OA) 26 using PerfRecord = std::vector<uint8_t>; 27 28 /// @brief PPS Driver implementation for Intel graphics devices. 29 /// When sampling it may collect multiple perf-records at once. Each perf-record holds multiple 30 /// counter values. Those values are continuously incremented by the GPU. In order to get a delta, 31 /// the driver computes an _accumulation_ (`last_perf_record - previous_perf_record`). 32 /// For optimization purposes, it might ignore some perf-records, considering only those 33 /// perf-records close to the boundary of the sampling period range. 34 class IntelDriver : public Driver 35 { 36 public: 37 std::optional<TimestampCorrelation> query_correlation_timestamps() const; 38 void get_new_correlation(); 39 40 /// @brief OA reports only have the lower 32 bits of the timestamp 41 /// register, while correlation data has the whole 36 bits. 42 /// @param gpu_ts a 32 bit OA report GPU timestamp 43 /// @return The CPU timestamp relative to the argument 44 uint64_t correlate_gpu_timestamp(uint32_t gpu_ts); 45 46 uint64_t get_min_sampling_period_ns() override; 47 bool init_perfcnt() override; 48 void enable_counter(uint32_t counter_id) override; 49 void enable_all_counters() override; 50 void enable_perfcnt(uint64_t sampling_period_ns) override; 51 void disable_perfcnt() override; 52 bool dump_perfcnt() override; 53 uint64_t next() override; 54 55 /// @brief Requests the next perf sample 56 /// @return The sample GPU timestamp 57 uint32_t gpu_next(); 58 59 /// @brief Requests the next perf sample accumulating those which 60 /// which duration is shorter than the requested sampling period 61 /// @return The sample CPU timestamp 62 uint64_t cpu_next(); 63 64 /// @param data Buffer of bytes to parse 65 /// @param byte_count Number of bytes to parse 66 /// @return A list of perf records parsed from raw data passed as input 67 std::vector<PerfRecord> parse_perf_records(const std::vector<uint8_t> &data, size_t byte_count); 68 69 /// @brief Reads data from the GPU metric set 70 void read_data_from_metric_set(); 71 72 /// Sampling period in nanoseconds requested by the datasource 73 uint64_t sampling_period_ns = 0; 74 75 /// Keep track of the timestamp of the last sample generated 76 uint64_t last_cpu_timestamp = 0; 77 78 /// This is used to correlate CPU and GPU timestamps 79 std::array<TimestampCorrelation, 64> correlations; 80 81 /// Data buffer used to store data read from the metric set 82 std::vector<uint8_t> metric_buffer = std::vector<uint8_t>(1024, 0); 83 /// Number of bytes read so far still un-parsed. 84 /// Reset once bytes from the metric buffer are parsed to perf records 85 size_t total_bytes_read = 0; 86 87 /// List of OA perf records read so far 88 std::vector<PerfRecord> records; 89 90 std::unique_ptr<IntelPerf> perf; 91 92 // Accumulations are stored here 93 struct intel_perf_query_result result = {}; 94 }; 95 96 } // namespace pps 97