1 /*
2  * Copyright © 2021 Google, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "fd_pps_driver.h"
8 
9 #include <cstring>
10 #include <iostream>
11 #include <perfetto.h>
12 
13 #include "pps/pps.h"
14 #include "pps/pps_algorithm.h"
15 
16 namespace pps
17 {
18 
19 uint64_t
get_min_sampling_period_ns()20 FreedrenoDriver::get_min_sampling_period_ns()
21 {
22    return 100000;
23 }
24 
25 /*
26 TODO this sees like it would be largely the same for a5xx as well
27 (ie. same countable names)..
28  */
29 void
setup_a6xx_counters()30 FreedrenoDriver::setup_a6xx_counters()
31 {
32    /* TODO is there a reason to want more than one group? */
33    CounterGroup group = {};
34    group.name = "counters";
35    groups.clear();
36    counters.clear();
37    countables.clear();
38    enabled_counters.clear();
39    groups.emplace_back(std::move(group));
40 
41    /*
42     * Create the countables that we'll be using.
43     */
44 
45    auto PERF_CP_ALWAYS_COUNT = countable("PERF_CP_ALWAYS_COUNT");
46    auto PERF_CP_BUSY_CYCLES  = countable("PERF_CP_BUSY_CYCLES");
47    auto PERF_RB_3D_PIXELS    = countable("PERF_RB_3D_PIXELS");
48    auto PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS");
49    auto PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS");
50    auto PERF_TP_L1_CACHELINE_MISSES = countable("PERF_TP_L1_CACHELINE_MISSES");
51    auto PERF_SP_BUSY_CYCLES  = countable("PERF_SP_BUSY_CYCLES");
52 
53    /*
54     * And then setup the derived counters that we are exporting to
55     * pps based on the captured countable values
56     */
57 
58    counter("GPU Frequency", Counter::Units::Hertz, [=]() {
59          return PERF_CP_ALWAYS_COUNT / time;
60       }
61    );
62 
63    counter("GPU % Utilization", Counter::Units::Percent, [=]() {
64          return 100.0 * (PERF_CP_BUSY_CYCLES / time) / max_freq;
65       }
66    );
67 
68    // This one is a bit of a guess, but seems plausible..
69    counter("ALU / Fragment", Counter::Units::None, [=]() {
70          return (PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
71                PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2) / PERF_RB_3D_PIXELS;
72       }
73    );
74 
75    counter("TP L1 Cache Misses", Counter::Units::None, [=]() {
76          return PERF_TP_L1_CACHELINE_MISSES / time;
77       }
78    );
79 
80    counter("Shader Core Utilization", Counter::Units::Percent, [=]() {
81          return 100.0 * (PERF_SP_BUSY_CYCLES / time) / (max_freq * info->num_sp_cores);
82       }
83    );
84 
85    // TODO add more.. see https://gpuinspector.dev/docs/gpu-counters/qualcomm
86    // for what blob exposes
87 }
88 
89 /**
90  * Generate an submit the cmdstream to configure the counter/countable
91  * muxing
92  */
93 void
configure_counters(bool reset,bool wait)94 FreedrenoDriver::configure_counters(bool reset, bool wait)
95 {
96    struct fd_submit *submit = fd_submit_new(pipe);
97    enum fd_ringbuffer_flags flags =
98       (enum fd_ringbuffer_flags)(FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
99    struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(submit, 0x1000, flags);
100 
101    for (auto countable : countables)
102       countable.configure(ring, reset);
103 
104    struct fd_submit_fence fence = {};
105    util_queue_fence_init(&fence.ready);
106 
107    fd_submit_flush(submit, -1, &fence);
108 
109    util_queue_fence_wait(&fence.ready);
110 
111    fd_ringbuffer_del(ring);
112    fd_submit_del(submit);
113 
114    if (wait)
115       fd_pipe_wait(pipe, &fence.fence);
116 }
117 
118 /**
119  * Read the current counter values and record the time.
120  */
121 void
collect_countables()122 FreedrenoDriver::collect_countables()
123 {
124    last_dump_ts = perfetto::base::GetBootTimeNs().count();
125 
126    for (auto countable : countables)
127       countable.collect();
128 }
129 
130 bool
init_perfcnt()131 FreedrenoDriver::init_perfcnt()
132 {
133    uint64_t val;
134 
135    dev = fd_device_new(drm_device.fd);
136    pipe = fd_pipe_new(dev, FD_PIPE_3D);
137    dev_id = fd_pipe_dev_id(pipe);
138 
139    if (fd_pipe_get_param(pipe, FD_MAX_FREQ, &val)) {
140       PERFETTO_FATAL("Could not get MAX_FREQ");
141       return false;
142    }
143    max_freq = val;
144 
145    if (fd_pipe_get_param(pipe, FD_SUSPEND_COUNT, &val)) {
146       PERFETTO_ILOG("Could not get SUSPEND_COUNT");
147    } else {
148       suspend_count = val;
149       has_suspend_count = true;
150    }
151 
152    perfcntrs = fd_perfcntrs(fd_pipe_dev_id(pipe), &num_perfcntrs);
153    if (num_perfcntrs == 0) {
154       PERFETTO_FATAL("No hw counters available");
155       return false;
156    }
157 
158    assigned_counters.resize(num_perfcntrs);
159    assigned_counters.assign(assigned_counters.size(), 0);
160 
161    switch (fd_dev_gen(dev_id)) {
162    case 6:
163       setup_a6xx_counters();
164       break;
165    default:
166       PERFETTO_FATAL("Unsupported GPU: a%03u", fd_dev_gpu_id(dev_id));
167       return false;
168    }
169 
170    state.resize(next_countable_id);
171 
172    for (auto countable : countables)
173       countable.resolve();
174 
175    info = fd_dev_info(dev_id);
176 
177    io = fd_dt_find_io();
178    if (!io) {
179       PERFETTO_FATAL("Could not map GPU I/O space");
180       return false;
181    }
182 
183    configure_counters(true, true);
184    collect_countables();
185 
186    return true;
187 }
188 
189 void
enable_counter(const uint32_t counter_id)190 FreedrenoDriver::enable_counter(const uint32_t counter_id)
191 {
192    enabled_counters.push_back(counters[counter_id]);
193 }
194 
195 void
enable_all_counters()196 FreedrenoDriver::enable_all_counters()
197 {
198    enabled_counters.reserve(counters.size());
199    for (auto &counter : counters) {
200       enabled_counters.push_back(counter);
201    }
202 }
203 
204 void
enable_perfcnt(const uint64_t)205 FreedrenoDriver::enable_perfcnt(const uint64_t /* sampling_period_ns */)
206 {
207 }
208 
209 bool
dump_perfcnt()210 FreedrenoDriver::dump_perfcnt()
211 {
212    if (has_suspend_count) {
213       uint64_t val;
214 
215       fd_pipe_get_param(pipe, FD_SUSPEND_COUNT, &val);
216 
217       if (suspend_count != val) {
218          PERFETTO_ILOG("Device had suspended!");
219 
220          suspend_count = val;
221 
222          configure_counters(true, true);
223          collect_countables();
224 
225          /* We aren't going to have anything sensible by comparing
226           * current values to values from prior to the suspend, so
227           * just skip this sampling period.
228           */
229          return false;
230       }
231    }
232 
233    auto last_ts = last_dump_ts;
234 
235    /* Capture the timestamp from the *start* of the sampling period: */
236    last_capture_ts = last_dump_ts;
237 
238    collect_countables();
239 
240    auto elapsed_time_ns = last_dump_ts - last_ts;
241 
242    time = (float)elapsed_time_ns / 1000000000.0;
243 
244    /* On older kernels that dont' support querying the suspend-
245     * count, just send configuration cmdstream regularly to keep
246     * the GPU alive and correctly configured for the countables
247     * we want
248     */
249    if (!has_suspend_count) {
250       configure_counters(false, false);
251    }
252 
253    return true;
254 }
255 
next()256 uint64_t FreedrenoDriver::next()
257 {
258    auto ret = last_capture_ts;
259    last_capture_ts = 0;
260    return ret;
261 }
262 
disable_perfcnt()263 void FreedrenoDriver::disable_perfcnt()
264 {
265    /* There isn't really any disable, only reconfiguring which countables
266     * get muxed to which counters
267     */
268 }
269 
270 /*
271  * Countable
272  */
273 
274 FreedrenoDriver::Countable
countable(std::string name)275 FreedrenoDriver::countable(std::string name)
276 {
277    auto countable = Countable(this, name);
278    countables.emplace_back(countable);
279    return countable;
280 }
281 
Countable(FreedrenoDriver * d,std::string name)282 FreedrenoDriver::Countable::Countable(FreedrenoDriver *d, std::string name)
283    : id {d->next_countable_id++}, d {d}, name {name}
284 {
285 }
286 
287 /* Emit register writes on ring to configure counter/countable muxing: */
288 void
configure(struct fd_ringbuffer * ring,bool reset)289 FreedrenoDriver::Countable::configure(struct fd_ringbuffer *ring, bool reset)
290 {
291    const struct fd_perfcntr_countable *countable = d->state[id].countable;
292    const struct fd_perfcntr_counter   *counter   = d->state[id].counter;
293 
294    OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
295 
296    if (counter->enable && reset) {
297       OUT_PKT4(ring, counter->enable, 1);
298       OUT_RING(ring, 0);
299    }
300 
301    if (counter->clear && reset) {
302       OUT_PKT4(ring, counter->clear, 1);
303       OUT_RING(ring, 1);
304 
305       OUT_PKT4(ring, counter->clear, 1);
306       OUT_RING(ring, 0);
307    }
308 
309    OUT_PKT4(ring, counter->select_reg, 1);
310    OUT_RING(ring, countable->selector);
311 
312    if (counter->enable && reset) {
313       OUT_PKT4(ring, counter->enable, 1);
314       OUT_RING(ring, 1);
315    }
316 }
317 
318 /* Collect current counter value and calculate delta since last sample: */
319 void
collect()320 FreedrenoDriver::Countable::collect()
321 {
322    const struct fd_perfcntr_counter *counter = d->state[id].counter;
323 
324    d->state[id].last_value = d->state[id].value;
325 
326    uint32_t *reg_lo = (uint32_t *)d->io + counter->counter_reg_lo;
327    uint32_t *reg_hi = (uint32_t *)d->io + counter->counter_reg_hi;
328 
329    uint32_t lo = *reg_lo;
330    uint32_t hi = *reg_hi;
331 
332    d->state[id].value = lo | ((uint64_t)hi << 32);
333 }
334 
335 /* Resolve the countable and assign next counter from it's group: */
336 void
resolve()337 FreedrenoDriver::Countable::resolve()
338 {
339    for (unsigned i = 0; i < d->num_perfcntrs; i++) {
340       const struct fd_perfcntr_group *g = &d->perfcntrs[i];
341       for (unsigned j = 0; j < g->num_countables; j++) {
342          const struct fd_perfcntr_countable *c = &g->countables[j];
343          if (name == c->name) {
344             d->state[id].countable = c;
345 
346             /* Assign a counter from the same group: */
347             assert(d->assigned_counters[i] < g->num_counters);
348             d->state[id].counter = &g->counters[d->assigned_counters[i]++];
349 
350             std::cout << "Countable: " << name << ", group=" << g->name <<
351                   ", counter=" << d->assigned_counters[i] - 1 << "\n";
352 
353             return;
354          }
355       }
356    }
357    unreachable("no such countable!");
358 }
359 
360 uint64_t
get_value() const361 FreedrenoDriver::Countable::get_value() const
362 {
363    return d->state[id].value - d->state[id].last_value;
364 }
365 
366 /*
367  * DerivedCounter
368  */
369 
DerivedCounter(FreedrenoDriver * d,std::string name,Counter::Units units,std::function<int64_t ()> derive)370 FreedrenoDriver::DerivedCounter::DerivedCounter(FreedrenoDriver *d, std::string name,
371                                                 Counter::Units units,
372                                                 std::function<int64_t()> derive)
373    : Counter(d->next_counter_id++, name, 0)
374 {
375    std::cout << "DerivedCounter: " << name << ", id=" << id << "\n";
376    this->units = units;
377    set_getter([=](const Counter &c, const Driver &d) {
378          return derive();
379       }
380    );
381 }
382 
383 FreedrenoDriver::DerivedCounter
counter(std::string name,Counter::Units units,std::function<int64_t ()> derive)384 FreedrenoDriver::counter(std::string name, Counter::Units units,
385                          std::function<int64_t()> derive)
386 {
387    auto counter = DerivedCounter(this, name, units, derive);
388    counters.emplace_back(counter);
389    return counter;
390 }
391 
392 } // namespace pps
393