1 /*
2  * Copyright © 2020-2021 Collabora, Ltd.
3  * Author: Antonio Caggiano <antonio.caggiano@collabora.com>
4  *
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #pragma once
9 
10 #include <pps/pps_driver.h>
11 
12 #include "intel_pps_perf.h"
13 
14 namespace pps
15 {
16 /// Timestamp correlation between CPU/GPU.
17 struct TimestampCorrelation {
18    /// In CLOCK_MONOTONIC
19    uint64_t cpu_timestamp;
20 
21    /// Engine timestamp associated with the OA unit
22    uint64_t gpu_timestamp;
23 };
24 
25 /// @brief Variable length sequence of bytes generated by Intel Obstervation Architecture (OA)
26 using PerfRecord = std::vector<uint8_t>;
27 
28 /// @brief PPS Driver implementation for Intel graphics devices.
29 /// When sampling it may collect multiple perf-records at once. Each perf-record holds multiple
30 /// counter values. Those values are continuously incremented by the GPU. In order to get a delta,
31 /// the driver computes an _accumulation_ (`last_perf_record - previous_perf_record`).
32 /// For optimization purposes, it might ignore some perf-records, considering only those
33 /// perf-records close to the boundary of the sampling period range.
34 class IntelDriver : public Driver
35 {
36    public:
37    std::optional<TimestampCorrelation> query_correlation_timestamps() const;
38    void get_new_correlation();
39 
40    /// @brief OA reports only have the lower 32 bits of the timestamp
41    /// register, while correlation data has the whole 36 bits.
42    /// @param gpu_ts a 32 bit OA report GPU timestamp
43    /// @return The CPU timestamp relative to the argument
44    uint64_t correlate_gpu_timestamp(uint32_t gpu_ts);
45 
46    uint64_t get_min_sampling_period_ns() override;
47    bool init_perfcnt() override;
48    void enable_counter(uint32_t counter_id) override;
49    void enable_all_counters() override;
50    void enable_perfcnt(uint64_t sampling_period_ns) override;
51    void disable_perfcnt() override;
52    bool dump_perfcnt() override;
53    uint64_t next() override;
54 
55    /// @brief Requests the next perf sample
56    /// @return The sample GPU timestamp
57    uint32_t gpu_next();
58 
59    /// @brief Requests the next perf sample accumulating those which
60    /// which duration is shorter than the requested sampling period
61    /// @return The sample CPU timestamp
62    uint64_t cpu_next();
63 
64    /// @param data Buffer of bytes to parse
65    /// @param byte_count Number of bytes to parse
66    /// @return A list of perf records parsed from raw data passed as input
67    std::vector<PerfRecord> parse_perf_records(const std::vector<uint8_t> &data, size_t byte_count);
68 
69    /// @brief Reads data from the GPU metric set
70    void read_data_from_metric_set();
71 
72    /// Sampling period in nanoseconds requested by the datasource
73    uint64_t sampling_period_ns = 0;
74 
75    /// Keep track of the timestamp of the last sample generated
76    uint64_t last_cpu_timestamp = 0;
77 
78    /// This is used to correlate CPU and GPU timestamps
79    std::array<TimestampCorrelation, 64> correlations;
80 
81    /// Data buffer used to store data read from the metric set
82    std::vector<uint8_t> metric_buffer = std::vector<uint8_t>(1024, 0);
83    /// Number of bytes read so far still un-parsed.
84    /// Reset once bytes from the metric buffer are parsed to perf records
85    size_t total_bytes_read = 0;
86 
87    /// List of OA perf records read so far
88    std::vector<PerfRecord> records;
89 
90    std::unique_ptr<IntelPerf> perf;
91 
92    // Accumulations are stored here
93    struct intel_perf_query_result result = {};
94 };
95 
96 } // namespace pps
97