1 /*
2  * Copyright (C) 2017 Rob Clark <robclark@freedesktop.org>
3  * Copyright © 2018 Google, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  * Authors:
25  *    Rob Clark <robclark@freedesktop.org>
26  */
27 
28 /* NOTE: see https://github.com/freedreno/freedreno/wiki/A5xx-Queries */
29 
30 #include "freedreno_query_acc.h"
31 #include "freedreno_resource.h"
32 
33 #include "fd6_context.h"
34 #include "fd6_emit.h"
35 #include "fd6_format.h"
36 #include "fd6_query.h"
37 
38 struct PACKED fd6_query_sample {
39    uint64_t start;
40    uint64_t result;
41    uint64_t stop;
42 };
43 
44 /* offset of a single field of an array of fd6_query_sample: */
45 #define query_sample_idx(aq, idx, field)                                       \
46    fd_resource((aq)->prsc)->bo,                                                \
47       (idx * sizeof(struct fd6_query_sample)) +                                \
48          offsetof(struct fd6_query_sample, field),                             \
49       0, 0
50 
51 /* offset of a single field of fd6_query_sample: */
52 #define query_sample(aq, field) query_sample_idx(aq, 0, field)
53 
54 /*
55  * Occlusion Query:
56  *
57  * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they
58  * interpret results
59  */
60 
61 static void
occlusion_resume(struct fd_acc_query * aq,struct fd_batch * batch)62 occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)
63 {
64    struct fd_ringbuffer *ring = batch->draw;
65 
66    OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
67    OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
68 
69    OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
70    OUT_RELOC(ring, query_sample(aq, start));
71 
72    fd6_event_write(batch, ring, ZPASS_DONE, false);
73 
74    fd6_context(batch->ctx)->samples_passed_queries++;
75 }
76 
77 static void
occlusion_pause(struct fd_acc_query * aq,struct fd_batch * batch)78 occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
79 {
80    struct fd_ringbuffer *ring = batch->draw;
81 
82    OUT_PKT7(ring, CP_MEM_WRITE, 4);
83    OUT_RELOC(ring, query_sample(aq, stop));
84    OUT_RING(ring, 0xffffffff);
85    OUT_RING(ring, 0xffffffff);
86 
87    OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
88 
89    OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
90    OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
91 
92    OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
93    OUT_RELOC(ring, query_sample(aq, stop));
94 
95    fd6_event_write(batch, ring, ZPASS_DONE, false);
96 
97    /* To avoid stalling in the draw buffer, emit code the code to compute the
98     * counter delta in the epilogue ring.
99     */
100    struct fd_ringbuffer *epilogue = fd_batch_get_epilogue(batch);
101    fd_wfi(batch, epilogue);
102 
103    /* result += stop - start: */
104    OUT_PKT7(epilogue, CP_MEM_TO_MEM, 9);
105    OUT_RING(epilogue, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
106    OUT_RELOC(epilogue, query_sample(aq, result)); /* dst */
107    OUT_RELOC(epilogue, query_sample(aq, result)); /* srcA */
108    OUT_RELOC(epilogue, query_sample(aq, stop));   /* srcB */
109    OUT_RELOC(epilogue, query_sample(aq, start));  /* srcC */
110 
111    fd6_context(batch->ctx)->samples_passed_queries--;
112 }
113 
114 static void
occlusion_counter_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)115 occlusion_counter_result(struct fd_acc_query *aq, void *buf,
116                          union pipe_query_result *result)
117 {
118    struct fd6_query_sample *sp = buf;
119    result->u64 = sp->result;
120 }
121 
122 static void
occlusion_predicate_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)123 occlusion_predicate_result(struct fd_acc_query *aq, void *buf,
124                            union pipe_query_result *result)
125 {
126    struct fd6_query_sample *sp = buf;
127    result->b = !!sp->result;
128 }
129 
130 static const struct fd_acc_sample_provider occlusion_counter = {
131    .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
132    .size = sizeof(struct fd6_query_sample),
133    .resume = occlusion_resume,
134    .pause = occlusion_pause,
135    .result = occlusion_counter_result,
136 };
137 
138 static const struct fd_acc_sample_provider occlusion_predicate = {
139    .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
140    .size = sizeof(struct fd6_query_sample),
141    .resume = occlusion_resume,
142    .pause = occlusion_pause,
143    .result = occlusion_predicate_result,
144 };
145 
146 static const struct fd_acc_sample_provider occlusion_predicate_conservative = {
147    .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
148    .size = sizeof(struct fd6_query_sample),
149    .resume = occlusion_resume,
150    .pause = occlusion_pause,
151    .result = occlusion_predicate_result,
152 };
153 
154 /*
155  * Timestamp Queries:
156  */
157 
158 static void
timestamp_resume(struct fd_acc_query * aq,struct fd_batch * batch)159 timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch)
160 {
161    struct fd_ringbuffer *ring = batch->draw;
162 
163    OUT_PKT7(ring, CP_EVENT_WRITE, 4);
164    OUT_RING(ring,
165             CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
166    OUT_RELOC(ring, query_sample(aq, start));
167    OUT_RING(ring, 0x00000000);
168 
169    fd_reset_wfi(batch);
170 }
171 
172 static void
time_elapsed_pause(struct fd_acc_query * aq,struct fd_batch * batch)173 time_elapsed_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
174 {
175    struct fd_ringbuffer *ring = batch->draw;
176 
177    OUT_PKT7(ring, CP_EVENT_WRITE, 4);
178    OUT_RING(ring,
179             CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
180    OUT_RELOC(ring, query_sample(aq, stop));
181    OUT_RING(ring, 0x00000000);
182 
183    fd_reset_wfi(batch);
184    fd_wfi(batch, ring);
185 
186    /* result += stop - start: */
187    OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
188    OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
189    OUT_RELOC(ring, query_sample(aq, result)); /* dst */
190    OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
191    OUT_RELOC(ring, query_sample(aq, stop));   /* srcB */
192    OUT_RELOC(ring, query_sample(aq, start));  /* srcC */
193 }
194 
195 static void
timestamp_pause(struct fd_acc_query * aq,struct fd_batch * batch)196 timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch)
197 {
198    /* We captured a timestamp in timestamp_resume(), nothing to do here. */
199 }
200 
201 /* timestamp logging for u_trace: */
202 static void
record_timestamp(struct fd_ringbuffer * ring,struct fd_bo * bo,unsigned offset)203 record_timestamp(struct fd_ringbuffer *ring, struct fd_bo *bo, unsigned offset)
204 {
205    OUT_PKT7(ring, CP_EVENT_WRITE, 4);
206    OUT_RING(ring,
207             CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
208    OUT_RELOC(ring, bo, offset, 0, 0);
209    OUT_RING(ring, 0x00000000);
210 }
211 
212 static uint64_t
ticks_to_ns(uint64_t ts)213 ticks_to_ns(uint64_t ts)
214 {
215    /* This is based on the 19.2MHz always-on rbbm timer.
216     *
217     * TODO we should probably query this value from kernel..
218     */
219    return ts * (1000000000 / 19200000);
220 }
221 
222 static void
time_elapsed_accumulate_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)223 time_elapsed_accumulate_result(struct fd_acc_query *aq, void *buf,
224                                union pipe_query_result *result)
225 {
226    struct fd6_query_sample *sp = buf;
227    result->u64 = ticks_to_ns(sp->result);
228 }
229 
230 static void
timestamp_accumulate_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)231 timestamp_accumulate_result(struct fd_acc_query *aq, void *buf,
232                             union pipe_query_result *result)
233 {
234    struct fd6_query_sample *sp = buf;
235    result->u64 = ticks_to_ns(sp->start);
236 }
237 
238 static const struct fd_acc_sample_provider time_elapsed = {
239    .query_type = PIPE_QUERY_TIME_ELAPSED,
240    .always = true,
241    .size = sizeof(struct fd6_query_sample),
242    .resume = timestamp_resume,
243    .pause = time_elapsed_pause,
244    .result = time_elapsed_accumulate_result,
245 };
246 
247 /* NOTE: timestamp query isn't going to give terribly sensible results
248  * on a tiler.  But it is needed by qapitrace profile heatmap.  If you
249  * add in a binning pass, the results get even more non-sensical.  So
250  * we just return the timestamp on the last tile and hope that is
251  * kind of good enough.
252  */
253 
254 static const struct fd_acc_sample_provider timestamp = {
255    .query_type = PIPE_QUERY_TIMESTAMP,
256    .always = true,
257    .size = sizeof(struct fd6_query_sample),
258    .resume = timestamp_resume,
259    .pause = timestamp_pause,
260    .result = timestamp_accumulate_result,
261 };
262 
263 struct PACKED fd6_primitives_sample {
264    struct {
265       uint64_t emitted, generated;
266    } start[4], stop[4], result;
267 
268    uint64_t prim_start[16], prim_stop[16], prim_emitted;
269 };
270 
271 #define primitives_relocw(ring, aq, field)                                     \
272    OUT_RELOC(ring, fd_resource((aq)->prsc)->bo,                                \
273              offsetof(struct fd6_primitives_sample, field), 0, 0);
274 #define primitives_reloc(ring, aq, field)                                      \
275    OUT_RELOC(ring, fd_resource((aq)->prsc)->bo,                                \
276              offsetof(struct fd6_primitives_sample, field), 0, 0);
277 
278 #ifdef DEBUG_COUNTERS
279 static const unsigned counter_count = 10;
280 static const unsigned counter_base = REG_A6XX_RBBM_PRIMCTR_0_LO;
281 
282 static void
log_counters(struct fd6_primitives_sample * ps)283 log_counters(struct fd6_primitives_sample *ps)
284 {
285    const char *labels[] = {
286       "vs_vertices_in",    "vs_primitives_out",
287       "hs_vertices_in",    "hs_patches_out",
288       "ds_vertices_in",    "ds_primitives_out",
289       "gs_primitives_in",  "gs_primitives_out",
290       "ras_primitives_in", "x",
291    };
292 
293    mesa_logd("  counter\t\tstart\t\t\tstop\t\t\tdiff");
294    for (int i = 0; i < ARRAY_SIZE(labels); i++) {
295       int register_idx = i + (counter_base - REG_A6XX_RBBM_PRIMCTR_0_LO) / 2;
296       mesa_logd("  RBBM_PRIMCTR_%d\t0x%016" PRIx64 "\t0x%016" PRIx64 "\t%" PRIi64
297              "\t%s",
298              register_idx, ps->prim_start[i], ps->prim_stop[i],
299              ps->prim_stop[i] - ps->prim_start[i], labels[register_idx]);
300    }
301 
302    mesa_logd("  so counts");
303    for (int i = 0; i < ARRAY_SIZE(ps->start); i++) {
304       mesa_logd("  CHANNEL %d emitted\t0x%016" PRIx64 "\t0x%016" PRIx64
305              "\t%" PRIi64,
306              i, ps->start[i].generated, ps->stop[i].generated,
307              ps->stop[i].generated - ps->start[i].generated);
308       mesa_logd("  CHANNEL %d generated\t0x%016" PRIx64 "\t0x%016" PRIx64
309              "\t%" PRIi64,
310              i, ps->start[i].emitted, ps->stop[i].emitted,
311              ps->stop[i].emitted - ps->start[i].emitted);
312    }
313 
314    mesa_logd("generated %" PRIu64 ", emitted %" PRIu64, ps->result.generated,
315           ps->result.emitted);
316 }
317 
318 #else
319 
320 static const unsigned counter_count = 1;
321 static const unsigned counter_base = REG_A6XX_RBBM_PRIMCTR_8_LO;
322 
323 static void
log_counters(struct fd6_primitives_sample * ps)324 log_counters(struct fd6_primitives_sample *ps)
325 {
326 }
327 
328 #endif
329 
330 static void
primitives_generated_resume(struct fd_acc_query * aq,struct fd_batch * batch)331 primitives_generated_resume(struct fd_acc_query *aq,
332                             struct fd_batch *batch) assert_dt
333 {
334    struct fd_ringbuffer *ring = batch->draw;
335 
336    fd_wfi(batch, ring);
337 
338    OUT_PKT7(ring, CP_REG_TO_MEM, 3);
339    OUT_RING(ring, CP_REG_TO_MEM_0_64B | CP_REG_TO_MEM_0_CNT(counter_count * 2) |
340                      CP_REG_TO_MEM_0_REG(counter_base));
341    primitives_relocw(ring, aq, prim_start);
342 
343    fd6_event_write(batch, ring, START_PRIMITIVE_CTRS, false);
344 }
345 
346 static void
primitives_generated_pause(struct fd_acc_query * aq,struct fd_batch * batch)347 primitives_generated_pause(struct fd_acc_query *aq,
348                            struct fd_batch *batch) assert_dt
349 {
350    struct fd_ringbuffer *ring = batch->draw;
351 
352    fd_wfi(batch, ring);
353 
354    /* snapshot the end values: */
355    OUT_PKT7(ring, CP_REG_TO_MEM, 3);
356    OUT_RING(ring, CP_REG_TO_MEM_0_64B | CP_REG_TO_MEM_0_CNT(counter_count * 2) |
357                      CP_REG_TO_MEM_0_REG(counter_base));
358    primitives_relocw(ring, aq, prim_stop);
359 
360    fd6_event_write(batch, ring, STOP_PRIMITIVE_CTRS, false);
361 
362    /* result += stop - start: */
363    OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
364    OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C | 0x40000000);
365    primitives_relocw(ring, aq, result.generated);
366    primitives_reloc(ring, aq, prim_emitted);
367    primitives_reloc(ring, aq,
368                     prim_stop[(REG_A6XX_RBBM_PRIMCTR_8_LO - counter_base) / 2])
369       primitives_reloc(
370          ring, aq, prim_start[(REG_A6XX_RBBM_PRIMCTR_8_LO - counter_base) / 2]);
371 }
372 
373 static void
primitives_generated_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)374 primitives_generated_result(struct fd_acc_query *aq, void *buf,
375                             union pipe_query_result *result)
376 {
377    struct fd6_primitives_sample *ps = buf;
378 
379    log_counters(ps);
380 
381    result->u64 = ps->result.generated;
382 }
383 
384 static const struct fd_acc_sample_provider primitives_generated = {
385    .query_type = PIPE_QUERY_PRIMITIVES_GENERATED,
386    .size = sizeof(struct fd6_primitives_sample),
387    .resume = primitives_generated_resume,
388    .pause = primitives_generated_pause,
389    .result = primitives_generated_result,
390 };
391 
392 static void
primitives_emitted_resume(struct fd_acc_query * aq,struct fd_batch * batch)393 primitives_emitted_resume(struct fd_acc_query *aq,
394                           struct fd_batch *batch) assert_dt
395 {
396    struct fd_ringbuffer *ring = batch->draw;
397 
398    fd_wfi(batch, ring);
399    OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS, 2);
400    primitives_relocw(ring, aq, start[0]);
401 
402    fd6_event_write(batch, ring, WRITE_PRIMITIVE_COUNTS, false);
403 }
404 
405 static void
primitives_emitted_pause(struct fd_acc_query * aq,struct fd_batch * batch)406 primitives_emitted_pause(struct fd_acc_query *aq,
407                          struct fd_batch *batch) assert_dt
408 {
409    struct fd_ringbuffer *ring = batch->draw;
410 
411    fd_wfi(batch, ring);
412 
413    OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS, 2);
414    primitives_relocw(ring, aq, stop[0]);
415    fd6_event_write(batch, ring, WRITE_PRIMITIVE_COUNTS, false);
416 
417    fd6_event_write(batch, batch->draw, CACHE_FLUSH_TS, true);
418 
419    /* result += stop - start: */
420    OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
421    OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C | 0x80000000);
422    primitives_relocw(ring, aq, result.emitted);
423    primitives_reloc(ring, aq, result.emitted);
424    primitives_reloc(ring, aq, stop[aq->base.index].emitted);
425    primitives_reloc(ring, aq, start[aq->base.index].emitted);
426 }
427 
428 static void
primitives_emitted_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)429 primitives_emitted_result(struct fd_acc_query *aq, void *buf,
430                           union pipe_query_result *result)
431 {
432    struct fd6_primitives_sample *ps = buf;
433 
434    log_counters(ps);
435 
436    result->u64 = ps->result.emitted;
437 }
438 
439 static const struct fd_acc_sample_provider primitives_emitted = {
440    .query_type = PIPE_QUERY_PRIMITIVES_EMITTED,
441    .size = sizeof(struct fd6_primitives_sample),
442    .resume = primitives_emitted_resume,
443    .pause = primitives_emitted_pause,
444    .result = primitives_emitted_result,
445 };
446 
447 /*
448  * Performance Counter (batch) queries:
449  *
450  * Only one of these is active at a time, per design of the gallium
451  * batch_query API design.  On perfcntr query tracks N query_types,
452  * each of which has a 'fd_batch_query_entry' that maps it back to
453  * the associated group and counter.
454  */
455 
456 struct fd_batch_query_entry {
457    uint8_t gid; /* group-id */
458    uint8_t cid; /* countable-id within the group */
459 };
460 
461 struct fd_batch_query_data {
462    struct fd_screen *screen;
463    unsigned num_query_entries;
464    struct fd_batch_query_entry query_entries[];
465 };
466 
467 static void
perfcntr_resume(struct fd_acc_query * aq,struct fd_batch * batch)468 perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
469 {
470    struct fd_batch_query_data *data = aq->query_data;
471    struct fd_screen *screen = data->screen;
472    struct fd_ringbuffer *ring = batch->draw;
473 
474    unsigned counters_per_group[screen->num_perfcntr_groups];
475    memset(counters_per_group, 0, sizeof(counters_per_group));
476 
477    fd_wfi(batch, ring);
478 
479    /* configure performance counters for the requested queries: */
480    for (unsigned i = 0; i < data->num_query_entries; i++) {
481       struct fd_batch_query_entry *entry = &data->query_entries[i];
482       const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
483       unsigned counter_idx = counters_per_group[entry->gid]++;
484 
485       debug_assert(counter_idx < g->num_counters);
486 
487       OUT_PKT4(ring, g->counters[counter_idx].select_reg, 1);
488       OUT_RING(ring, g->countables[entry->cid].selector);
489    }
490 
491    memset(counters_per_group, 0, sizeof(counters_per_group));
492 
493    /* and snapshot the start values */
494    for (unsigned i = 0; i < data->num_query_entries; i++) {
495       struct fd_batch_query_entry *entry = &data->query_entries[i];
496       const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
497       unsigned counter_idx = counters_per_group[entry->gid]++;
498       const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
499 
500       OUT_PKT7(ring, CP_REG_TO_MEM, 3);
501       OUT_RING(ring, CP_REG_TO_MEM_0_64B |
502                         CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
503       OUT_RELOC(ring, query_sample_idx(aq, i, start));
504    }
505 }
506 
507 static void
perfcntr_pause(struct fd_acc_query * aq,struct fd_batch * batch)508 perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
509 {
510    struct fd_batch_query_data *data = aq->query_data;
511    struct fd_screen *screen = data->screen;
512    struct fd_ringbuffer *ring = batch->draw;
513 
514    unsigned counters_per_group[screen->num_perfcntr_groups];
515    memset(counters_per_group, 0, sizeof(counters_per_group));
516 
517    fd_wfi(batch, ring);
518 
519    /* TODO do we need to bother to turn anything off? */
520 
521    /* snapshot the end values: */
522    for (unsigned i = 0; i < data->num_query_entries; i++) {
523       struct fd_batch_query_entry *entry = &data->query_entries[i];
524       const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
525       unsigned counter_idx = counters_per_group[entry->gid]++;
526       const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
527 
528       OUT_PKT7(ring, CP_REG_TO_MEM, 3);
529       OUT_RING(ring, CP_REG_TO_MEM_0_64B |
530                         CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
531       OUT_RELOC(ring, query_sample_idx(aq, i, stop));
532    }
533 
534    /* and compute the result: */
535    for (unsigned i = 0; i < data->num_query_entries; i++) {
536       /* result += stop - start: */
537       OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
538       OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
539       OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* dst */
540       OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* srcA */
541       OUT_RELOC(ring, query_sample_idx(aq, i, stop));   /* srcB */
542       OUT_RELOC(ring, query_sample_idx(aq, i, start));  /* srcC */
543    }
544 }
545 
546 static void
perfcntr_accumulate_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)547 perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf,
548                            union pipe_query_result *result)
549 {
550    struct fd_batch_query_data *data = aq->query_data;
551    struct fd6_query_sample *sp = buf;
552 
553    for (unsigned i = 0; i < data->num_query_entries; i++) {
554       result->batch[i].u64 = sp[i].result;
555    }
556 }
557 
558 static const struct fd_acc_sample_provider perfcntr = {
559    .query_type = FD_QUERY_FIRST_PERFCNTR,
560    .always = true,
561    .resume = perfcntr_resume,
562    .pause = perfcntr_pause,
563    .result = perfcntr_accumulate_result,
564 };
565 
566 static struct pipe_query *
fd6_create_batch_query(struct pipe_context * pctx,unsigned num_queries,unsigned * query_types)567 fd6_create_batch_query(struct pipe_context *pctx, unsigned num_queries,
568                        unsigned *query_types)
569 {
570    struct fd_context *ctx = fd_context(pctx);
571    struct fd_screen *screen = ctx->screen;
572    struct fd_query *q;
573    struct fd_acc_query *aq;
574    struct fd_batch_query_data *data;
575 
576    data = CALLOC_VARIANT_LENGTH_STRUCT(
577       fd_batch_query_data, num_queries * sizeof(data->query_entries[0]));
578 
579    data->screen = screen;
580    data->num_query_entries = num_queries;
581 
582    /* validate the requested query_types and ensure we don't try
583     * to request more query_types of a given group than we have
584     * counters:
585     */
586    unsigned counters_per_group[screen->num_perfcntr_groups];
587    memset(counters_per_group, 0, sizeof(counters_per_group));
588 
589    for (unsigned i = 0; i < num_queries; i++) {
590       unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
591 
592       /* verify valid query_type, ie. is it actually a perfcntr? */
593       if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
594           (idx >= screen->num_perfcntr_queries)) {
595          mesa_loge("invalid batch query query_type: %u", query_types[i]);
596          goto error;
597       }
598 
599       struct fd_batch_query_entry *entry = &data->query_entries[i];
600       struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
601 
602       entry->gid = pq->group_id;
603 
604       /* the perfcntr_queries[] table flattens all the countables
605        * for each group in series, ie:
606        *
607        *   (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
608        *
609        * So to find the countable index just step back through the
610        * table to find the first entry with the same group-id.
611        */
612       while (pq > screen->perfcntr_queries) {
613          pq--;
614          if (pq->group_id == entry->gid)
615             entry->cid++;
616       }
617 
618       if (counters_per_group[entry->gid] >=
619           screen->perfcntr_groups[entry->gid].num_counters) {
620          mesa_loge("too many counters for group %u", entry->gid);
621          goto error;
622       }
623 
624       counters_per_group[entry->gid]++;
625    }
626 
627    q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
628    aq = fd_acc_query(q);
629 
630    /* sample buffer size is based on # of queries: */
631    aq->size = num_queries * sizeof(struct fd6_query_sample);
632    aq->query_data = data;
633 
634    return (struct pipe_query *)q;
635 
636 error:
637    free(data);
638    return NULL;
639 }
640 
641 void
fd6_query_context_init(struct pipe_context * pctx)642 fd6_query_context_init(struct pipe_context *pctx) disable_thread_safety_analysis
643 {
644    struct fd_context *ctx = fd_context(pctx);
645 
646    ctx->create_query = fd_acc_create_query;
647    ctx->query_update_batch = fd_acc_query_update_batch;
648 
649    ctx->record_timestamp = record_timestamp;
650    ctx->ts_to_ns = ticks_to_ns;
651 
652    pctx->create_batch_query = fd6_create_batch_query;
653 
654    fd_acc_query_register_provider(pctx, &occlusion_counter);
655    fd_acc_query_register_provider(pctx, &occlusion_predicate);
656    fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative);
657 
658    fd_acc_query_register_provider(pctx, &time_elapsed);
659    fd_acc_query_register_provider(pctx, &timestamp);
660 
661    fd_acc_query_register_provider(pctx, &primitives_generated);
662    fd_acc_query_register_provider(pctx, &primitives_emitted);
663 }
664