1 /*
2  * Copyright © 2021 Collabora, Ltd.
3  * Author: Antonio Caggiano <antonio.caggiano@collabora.com>
4  *
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #include "intel_pps_perf.h"
9 
10 #include <i915_drm.h>
11 #include <math.h>
12 #include <sys/ioctl.h>
13 #include <util/ralloc.h>
14 #include <utility>
15 
16 #include <pps/pps.h>
17 #include <pps/pps_device.h>
18 
19 namespace pps
20 {
perf_ioctl(int fd,unsigned long request,void * arg)21 int perf_ioctl(int fd, unsigned long request, void *arg)
22 {
23    int ret;
24 
25    do {
26       ret = ioctl(fd, request, arg);
27    } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
28 
29    return ret;
30 }
31 
IntelPerf(const int drm_fd)32 IntelPerf::IntelPerf(const int drm_fd)
33    : drm_fd {drm_fd}
34    , ralloc_ctx {ralloc_context(nullptr)}
35    , ralloc_cfg {ralloc_context(nullptr)}
36    , cfg {intel_perf_new(ralloc_cfg)}
37 {
38    assert(drm_fd >= 0 && "DRM fd is not valid");
39 
40    if (!intel_get_device_info_from_fd(drm_fd, &devinfo)) {
41       PPS_LOG_FATAL("Failed to get devinfo");
42    }
43 
44    intel_perf_init_metrics(cfg,
45       &devinfo,
46       drm_fd,
47       false, // no pipeline statistics
48       false  // no register snapshots
49    );
50 
51    // Enable RenderBasic counters
52    auto query_name = "RenderBasic";
53    query = find_query_by_name(query_name);
54    if (!query) {
55       PPS_LOG_FATAL("Failed to find %s query", query_name);
56    }
57 }
58 
IntelPerf(IntelPerf && o)59 IntelPerf::IntelPerf(IntelPerf &&o)
60    : drm_fd {o.drm_fd}
61    , ralloc_ctx {o.ralloc_ctx}
62    , ralloc_cfg {o.ralloc_cfg}
63    , ctx {o.ctx}
64    , cfg {o.cfg}
65    , devinfo {std::move(o.devinfo)}
66    , query {std::move(o.query)}
67 {
68    o.drm_fd = -1;
69    o.ralloc_ctx = nullptr;
70    o.ralloc_cfg = nullptr;
71    o.ctx = nullptr;
72    o.cfg = nullptr;
73 }
74 
operator =(IntelPerf && o)75 IntelPerf &IntelPerf::operator=(IntelPerf &&o) noexcept
76 {
77    std::swap(drm_fd, o.drm_fd);
78    std::swap(ralloc_ctx, o.ralloc_ctx);
79    std::swap(ralloc_cfg, o.ralloc_cfg);
80    std::swap(ctx, o.ctx);
81    std::swap(cfg, o.cfg);
82    std::swap(devinfo, o.devinfo);
83    std::swap(query, o.query);
84    return *this;
85 }
86 
~IntelPerf()87 IntelPerf::~IntelPerf()
88 {
89    close();
90 
91    if (ralloc_ctx) {
92       ralloc_free(ralloc_ctx);
93    }
94 
95    if (ralloc_cfg) {
96       ralloc_free(ralloc_cfg);
97    }
98 }
99 
100 /// @return A query info, which is something like a group of counters
find_query_by_name(const std::string & name) const101 std::optional<struct intel_perf_query_info> IntelPerf::find_query_by_name(
102    const std::string &name) const
103 {
104    for (int i = 0; i < cfg->n_queries; ++i) {
105       struct intel_perf_query_info query = cfg->queries[i];
106       if (name == query.symbol_name) {
107          return query;
108       }
109    }
110 
111    return std::nullopt;
112 }
113 
get_queries() const114 std::vector<struct intel_perf_query_info *> IntelPerf::get_queries() const
115 {
116    assert(cfg && "Intel perf config should be valid");
117    assert(cfg->n_queries && "Intel perf queries not initialized");
118 
119    std::vector<struct intel_perf_query_info *> queries = {};
120 
121    for (int i = 0; i < cfg->n_queries; ++i) {
122       struct intel_perf_query_info *query = &cfg->queries[i];
123       // Skip invalid queries
124       if (query && query->symbol_name) {
125          queries.push_back(query);
126       }
127    }
128 
129    return queries;
130 }
131 
query_timestamp_frequency(const int drm_fd)132 static uint64_t query_timestamp_frequency(const int drm_fd)
133 {
134    int timestamp_frequency;
135 
136    drm_i915_getparam_t gp = {};
137    gp.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY;
138    gp.value = &timestamp_frequency;
139    if (perf_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp) == 0) {
140       return timestamp_frequency;
141    }
142 
143    PPS_LOG_ERROR("Unable to query timestamp frequency from i915, guessing values...");
144    return 12000000;
145 }
146 
147 // The period_exponent gives a sampling period as follows:
148 // sample_period = timestamp_period * 2^(period_exponent + 1)
149 // where timestamp_period is 80ns for Haswell+
get_oa_exponent(const int drm_fd,const uint64_t sampling_period_ns)150 static uint32_t get_oa_exponent(const int drm_fd, const uint64_t sampling_period_ns)
151 {
152    uint64_t timestamp_frequency = query_timestamp_frequency(drm_fd);
153    return static_cast<uint32_t>(log2(sampling_period_ns * timestamp_frequency / 1000000000ull)) - 1;
154 }
155 
open(const uint64_t sampling_period_ns)156 bool IntelPerf::open(const uint64_t sampling_period_ns)
157 {
158    assert(!ctx && "Perf context should not be initialized at this point");
159 
160    ctx = intel_perf_new_context(ralloc_ctx);
161    intel_perf_init_context(ctx, cfg, nullptr, nullptr, nullptr, &devinfo, 0, drm_fd);
162 
163    auto oa_exponent = get_oa_exponent(drm_fd, sampling_period_ns);
164 
165    return intel_perf_open(ctx,
166       query->oa_metrics_set_id,
167       query->oa_format,
168       oa_exponent,
169       drm_fd,
170       INTEL_PERF_INVALID_CTX_ID,
171       true /* enable stream immediately */);
172 }
173 
close()174 void IntelPerf::close()
175 {
176    if (ctx) {
177       intel_perf_close(ctx, nullptr);
178       ctx = nullptr;
179    }
180 }
181 
oa_stream_ready() const182 bool IntelPerf::oa_stream_ready() const
183 {
184    assert(ctx && "Perf context was not open");
185    return intel_perf_oa_stream_ready(ctx);
186 }
187 
read_oa_stream(void * buf,size_t bytes) const188 ssize_t IntelPerf::read_oa_stream(void *buf, size_t bytes) const
189 {
190    assert(ctx && "Perf context was not open");
191    return intel_perf_read_oa_stream(ctx, buf, bytes);
192 }
193 
194 } // namespace pps
195