1 /*
2  * Copyright © 2020 Google, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <getopt.h>
25 #include <inttypes.h>
26 #include <locale.h>
27 #include <stdlib.h>
28 #include <xf86drm.h>
29 
30 #include "util/u_math.h"
31 
32 #include "perfcntrs/freedreno_perfcntr.h"
33 
34 #include "main.h"
35 
36 static void
dump_float(void * buf,int sz)37 dump_float(void *buf, int sz)
38 {
39    uint8_t *ptr = (uint8_t *)buf;
40    uint8_t *end = ptr + sz - 3;
41    int i = 0;
42 
43    while (ptr < end) {
44       uint32_t d = 0;
45 
46       printf((i % 8) ? " " : "\t");
47 
48       d |= *(ptr++) << 0;
49       d |= *(ptr++) << 8;
50       d |= *(ptr++) << 16;
51       d |= *(ptr++) << 24;
52 
53       printf("%8f", uif(d));
54 
55       if ((i % 8) == 7) {
56          printf("\n");
57       }
58 
59       i++;
60    }
61 
62    if (i % 8) {
63       printf("\n");
64    }
65 }
66 
67 static void
dump_hex(void * buf,int sz)68 dump_hex(void *buf, int sz)
69 {
70    uint8_t *ptr = (uint8_t *)buf;
71    uint8_t *end = ptr + sz;
72    int i = 0;
73 
74    while (ptr < end) {
75       uint32_t d = 0;
76 
77       printf((i % 8) ? " " : "\t");
78 
79       d |= *(ptr++) << 0;
80       d |= *(ptr++) << 8;
81       d |= *(ptr++) << 16;
82       d |= *(ptr++) << 24;
83 
84       printf("%08x", d);
85 
86       if ((i % 8) == 7) {
87          printf("\n");
88       }
89 
90       i++;
91    }
92 
93    if (i % 8) {
94       printf("\n");
95    }
96 }
97 
98 static const char *shortopts = "df:g:hp:";
99 
100 static const struct option longopts[] = {
101    {"disasm", no_argument, 0, 'd'},         {"file", required_argument, 0, 'f'},
102    {"groups", required_argument, 0, 'g'},   {"help", no_argument, 0, 'h'},
103    {"perfcntr", required_argument, 0, 'p'}, {0, 0, 0, 0}};
104 
105 static void
usage(const char * name)106 usage(const char *name)
107 {
108    printf(
109       "Usage: %s [-dfgh]\n"
110       "\n"
111       "options:\n"
112       "    -d, --disasm             print disassembled shader\n"
113       "    -f, --file=FILE          read shader from file (instead of stdin)\n"
114       "    -g, --groups=X,Y,Z       use specified group size\n"
115       "    -h, --help               show this message\n"
116       "    -p, --perfcntr=LIST      sample specified performance counters "
117       "(comma\n"
118       "                             separated list)\n",
119       name);
120 }
121 
122 /* performance counter description: */
123 static unsigned num_groups;
124 static const struct fd_perfcntr_group *groups;
125 
126 /* Track enabled counters per group: */
127 static unsigned *enabled_counters;
128 
129 static void
setup_counter(const char * name,struct perfcntr * c)130 setup_counter(const char *name, struct perfcntr *c)
131 {
132    for (int i = 0; i < num_groups; i++) {
133       const struct fd_perfcntr_group *group = &groups[i];
134 
135       for (int j = 0; j < group->num_countables; j++) {
136          const struct fd_perfcntr_countable *countable = &group->countables[j];
137 
138          if (strcmp(name, countable->name) != 0)
139             continue;
140 
141          /*
142           * Allocate a counter to use to monitor the requested countable:
143           */
144          if (enabled_counters[i] >= group->num_counters) {
145             errx(-1, "Too many counters selected in group: %s", group->name);
146          }
147 
148          unsigned idx = enabled_counters[i]++;
149          const struct fd_perfcntr_counter *counter = &group->counters[idx];
150 
151          /*
152           * And initialize the perfcntr struct, pulling together the info
153           * about selected counter and countable, to simplify life for the
154           * backend:
155           */
156          c->name = name;
157          c->select_reg = counter->select_reg;
158          c->counter_reg_lo = counter->counter_reg_lo;
159          c->counter_reg_hi = counter->counter_reg_hi;
160          c->selector = countable->selector;
161 
162          return;
163       }
164    }
165 
166    errx(-1, "could not find countable: %s", name);
167 }
168 
169 static struct perfcntr *
parse_perfcntrs(const struct fd_dev_id * dev_id,const char * perfcntrstr,unsigned * num_perfcntrs)170 parse_perfcntrs(const struct fd_dev_id *dev_id, const char *perfcntrstr,
171                 unsigned *num_perfcntrs)
172 {
173    struct perfcntr *counters = NULL;
174    char *cnames, *s;
175    unsigned cnt = 0;
176 
177    groups = fd_perfcntrs(dev_id, &num_groups);
178    enabled_counters = calloc(num_groups, sizeof(enabled_counters[0]));
179 
180    cnames = strdup(perfcntrstr);
181    while ((s = strstr(cnames, ","))) {
182       char *name = cnames;
183       s[0] = '\0';
184       cnames = &s[1];
185 
186       counters = realloc(counters, ++cnt * sizeof(counters[0]));
187       setup_counter(name, &counters[cnt - 1]);
188    }
189 
190    char *name = cnames;
191    counters = realloc(counters, ++cnt * sizeof(counters[0]));
192    setup_counter(name, &counters[cnt - 1]);
193 
194    *num_perfcntrs = cnt;
195 
196    return counters;
197 }
198 
199 int
main(int argc,char ** argv)200 main(int argc, char **argv)
201 {
202    FILE *in = stdin;
203    const char *perfcntrstr = NULL;
204    struct perfcntr *perfcntrs = NULL;
205    unsigned num_perfcntrs = 0;
206    bool disasm = false;
207    uint32_t grid[3] = {0};
208    int opt, ret;
209 
210    setlocale(LC_NUMERIC, "en_US.UTF-8");
211 
212    while ((opt = getopt_long_only(argc, argv, shortopts, longopts, NULL)) !=
213           -1) {
214       switch (opt) {
215       case 'd':
216          disasm = true;
217          break;
218       case 'f':
219          in = fopen(optarg, "r");
220          if (!in)
221             err(1, "could not open '%s'", optarg);
222          break;
223       case 'g':
224          ret = sscanf(optarg, "%u,%u,%u", &grid[0], &grid[1], &grid[2]);
225          if (ret != 3)
226             goto usage;
227          break;
228       case 'h':
229          goto usage;
230       case 'p':
231          perfcntrstr = optarg;
232          break;
233       default:
234          printf("unrecognized arg: %c\n", opt);
235          goto usage;
236       }
237    }
238 
239    int fd = drmOpenWithType("msm", NULL, DRM_NODE_RENDER);
240    if (fd < 0)
241       err(1, "could not open drm device");
242 
243    struct fd_device *dev = fd_device_new(fd);
244    struct fd_pipe *pipe = fd_pipe_new(dev, FD_PIPE_3D);
245 
246    const struct fd_dev_id *dev_id = fd_pipe_dev_id(pipe);
247 
248    printf("got gpu: %s\n", fd_dev_name(dev_id));
249 
250    struct backend *backend;
251    switch (fd_dev_gen(dev_id)) {
252    case 4:
253       backend = a4xx_init(dev, dev_id);
254       break;
255    case 6:
256       backend = a6xx_init(dev, dev_id);
257       break;
258    default:
259       err(1, "unsupported gpu generation: a%uxx", fd_dev_gen(dev_id));
260    }
261 
262    struct kernel *kernel = backend->assemble(backend, in);
263    printf("localsize: %dx%dx%d\n", kernel->local_size[0], kernel->local_size[1],
264           kernel->local_size[2]);
265    for (int i = 0; i < kernel->num_bufs; i++) {
266       printf("buf[%d]: size=%u\n", i, kernel->buf_sizes[i]);
267       kernel->bufs[i] = fd_bo_new(dev, kernel->buf_sizes[i] * 4, 0, "buf[%d]", i);
268    }
269 
270    if (disasm)
271       backend->disassemble(kernel, stdout);
272 
273    if (grid[0] == 0)
274       return 0;
275 
276    struct fd_submit *submit = fd_submit_new(pipe);
277 
278    if (perfcntrstr) {
279       if (!backend->set_perfcntrs) {
280          err(1, "performance counters not supported");
281       }
282       perfcntrs = parse_perfcntrs(dev_id, perfcntrstr, &num_perfcntrs);
283       backend->set_perfcntrs(backend, perfcntrs, num_perfcntrs);
284    }
285 
286    backend->emit_grid(kernel, grid, submit);
287 
288    struct fd_submit_fence fence = {};
289    util_queue_fence_init(&fence.ready);
290 
291    fd_submit_flush(submit, -1, &fence);
292 
293    util_queue_fence_wait(&fence.ready);
294 
295    for (int i = 0; i < kernel->num_bufs; i++) {
296       fd_bo_cpu_prep(kernel->bufs[i], pipe, FD_BO_PREP_READ);
297       void *map = fd_bo_map(kernel->bufs[i]);
298 
299       printf("buf[%d]:\n", i);
300       dump_hex(map, kernel->buf_sizes[i] * 4);
301       dump_float(map, kernel->buf_sizes[i] * 4);
302    }
303 
304    if (perfcntrstr) {
305       uint64_t results[num_perfcntrs];
306       backend->read_perfcntrs(backend, results);
307 
308       for (unsigned i = 0; i < num_perfcntrs; i++) {
309          printf("%s:\t%'" PRIu64 "\n", perfcntrs[i].name, results[i]);
310       }
311    }
312 
313    return 0;
314 
315 usage:
316    usage(argv[0]);
317    return -1;
318 }
319