1 /*
2  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #include "pipe/p_state.h"
28 #include "util/u_inlines.h"
29 #include "util/u_memory.h"
30 
31 #include "freedreno_context.h"
32 #include "freedreno_query_hw.h"
33 #include "freedreno_resource.h"
34 #include "freedreno_util.h"
35 
36 struct fd_hw_sample_period {
37    struct fd_hw_sample *start, *end;
38    struct list_head list;
39 };
40 
41 static struct fd_hw_sample *
get_sample(struct fd_batch * batch,struct fd_ringbuffer * ring,unsigned query_type)42 get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring,
43            unsigned query_type) assert_dt
44 {
45    struct fd_context *ctx = batch->ctx;
46    struct fd_hw_sample *samp = NULL;
47    int idx = pidx(query_type);
48 
49    assume(idx >= 0); /* query never would have been created otherwise */
50 
51    if (!batch->sample_cache[idx]) {
52       struct fd_hw_sample *new_samp =
53          ctx->hw_sample_providers[idx]->get_sample(batch, ring);
54       fd_hw_sample_reference(ctx, &batch->sample_cache[idx], new_samp);
55       util_dynarray_append(&batch->samples, struct fd_hw_sample *, new_samp);
56       fd_batch_needs_flush(batch);
57    }
58 
59    fd_hw_sample_reference(ctx, &samp, batch->sample_cache[idx]);
60 
61    return samp;
62 }
63 
64 static void
clear_sample_cache(struct fd_batch * batch)65 clear_sample_cache(struct fd_batch *batch)
66 {
67    int i;
68 
69    for (i = 0; i < ARRAY_SIZE(batch->sample_cache); i++)
70       fd_hw_sample_reference(batch->ctx, &batch->sample_cache[i], NULL);
71 }
72 
73 static bool
query_active_in_batch(struct fd_batch * batch,struct fd_hw_query * hq)74 query_active_in_batch(struct fd_batch *batch, struct fd_hw_query *hq)
75 {
76    int idx = pidx(hq->provider->query_type);
77    return batch->query_providers_active & (1 << idx);
78 }
79 
80 static void
resume_query(struct fd_batch * batch,struct fd_hw_query * hq,struct fd_ringbuffer * ring)81 resume_query(struct fd_batch *batch, struct fd_hw_query *hq,
82              struct fd_ringbuffer *ring) assert_dt
83 {
84    int idx = pidx(hq->provider->query_type);
85    DBG("%p", hq);
86    assert(idx >= 0); /* query never would have been created otherwise */
87    assert(!hq->period);
88    batch->query_providers_used |= (1 << idx);
89    batch->query_providers_active |= (1 << idx);
90    hq->period = slab_alloc_st(&batch->ctx->sample_period_pool);
91    list_inithead(&hq->period->list);
92    hq->period->start = get_sample(batch, ring, hq->base.type);
93    /* NOTE: slab_alloc_st() does not zero out the buffer: */
94    hq->period->end = NULL;
95 }
96 
97 static void
pause_query(struct fd_batch * batch,struct fd_hw_query * hq,struct fd_ringbuffer * ring)98 pause_query(struct fd_batch *batch, struct fd_hw_query *hq,
99             struct fd_ringbuffer *ring) assert_dt
100 {
101    ASSERTED int idx = pidx(hq->provider->query_type);
102    DBG("%p", hq);
103    assert(idx >= 0); /* query never would have been created otherwise */
104    assert(hq->period && !hq->period->end);
105    assert(query_active_in_batch(batch, hq));
106    batch->query_providers_active &= ~(1 << idx);
107    hq->period->end = get_sample(batch, ring, hq->base.type);
108    list_addtail(&hq->period->list, &hq->periods);
109    hq->period = NULL;
110 }
111 
112 static void
destroy_periods(struct fd_context * ctx,struct fd_hw_query * hq)113 destroy_periods(struct fd_context *ctx, struct fd_hw_query *hq)
114 {
115    struct fd_hw_sample_period *period, *s;
116    LIST_FOR_EACH_ENTRY_SAFE (period, s, &hq->periods, list) {
117       fd_hw_sample_reference(ctx, &period->start, NULL);
118       fd_hw_sample_reference(ctx, &period->end, NULL);
119       list_del(&period->list);
120       slab_free_st(&ctx->sample_period_pool, period);
121    }
122 }
123 
124 static void
fd_hw_destroy_query(struct fd_context * ctx,struct fd_query * q)125 fd_hw_destroy_query(struct fd_context *ctx, struct fd_query *q)
126 {
127    struct fd_hw_query *hq = fd_hw_query(q);
128 
129    DBG("%p", q);
130 
131    destroy_periods(ctx, hq);
132    list_del(&hq->list);
133 
134    free(hq);
135 }
136 
137 static void
fd_hw_begin_query(struct fd_context * ctx,struct fd_query * q)138 fd_hw_begin_query(struct fd_context *ctx, struct fd_query *q) assert_dt
139 {
140    struct fd_batch *batch = fd_context_batch_locked(ctx);
141    struct fd_hw_query *hq = fd_hw_query(q);
142 
143    DBG("%p", q);
144 
145    /* begin_query() should clear previous results: */
146    destroy_periods(ctx, hq);
147 
148    if (batch && (ctx->active_queries || hq->provider->always))
149       resume_query(batch, hq, batch->draw);
150 
151    /* add to active list: */
152    assert(list_is_empty(&hq->list));
153    list_addtail(&hq->list, &ctx->hw_active_queries);
154 
155    fd_batch_unlock_submit(batch);
156    fd_batch_reference(&batch, NULL);
157 }
158 
159 static void
fd_hw_end_query(struct fd_context * ctx,struct fd_query * q)160 fd_hw_end_query(struct fd_context *ctx, struct fd_query *q) assert_dt
161 {
162    struct fd_batch *batch = fd_context_batch_locked(ctx);
163    struct fd_hw_query *hq = fd_hw_query(q);
164 
165    DBG("%p", q);
166 
167    if (batch && (ctx->active_queries || hq->provider->always))
168       pause_query(batch, hq, batch->draw);
169 
170    /* remove from active list: */
171    list_delinit(&hq->list);
172 
173    fd_batch_unlock_submit(batch);
174    fd_batch_reference(&batch, NULL);
175 }
176 
177 /* helper to get ptr to specified sample: */
178 static void *
sampptr(struct fd_hw_sample * samp,uint32_t n,void * ptr)179 sampptr(struct fd_hw_sample *samp, uint32_t n, void *ptr)
180 {
181    return ((char *)ptr) + (samp->tile_stride * n) + samp->offset;
182 }
183 
184 static bool
fd_hw_get_query_result(struct fd_context * ctx,struct fd_query * q,bool wait,union pipe_query_result * result)185 fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q, bool wait,
186                        union pipe_query_result *result)
187 {
188    struct fd_hw_query *hq = fd_hw_query(q);
189    const struct fd_hw_sample_provider *p = hq->provider;
190    struct fd_hw_sample_period *period, *tmp;
191 
192    DBG("%p: wait=%d", q, wait);
193 
194    if (list_is_empty(&hq->periods))
195       return true;
196 
197    assert(list_is_empty(&hq->list));
198    assert(!hq->period);
199 
200    /* sum the result across all sample periods.  Start with the last period
201     * so that no-wait will bail quickly.
202     */
203    LIST_FOR_EACH_ENTRY_SAFE_REV (period, tmp, &hq->periods, list) {
204       struct fd_hw_sample *start = period->start;
205       ASSERTED struct fd_hw_sample *end = period->end;
206       unsigned i;
207 
208       /* start and end samples should be from same batch: */
209       assert(start->prsc == end->prsc);
210       assert(start->num_tiles == end->num_tiles);
211 
212       struct fd_resource *rsc = fd_resource(start->prsc);
213 
214       /* ARB_occlusion_query says:
215        *
216        *     "Querying the state for a given occlusion query forces that
217        *      occlusion query to complete within a finite amount of time."
218        *
219        * So, regardless of whether we are supposed to wait or not, we do need to
220        * flush now.
221        */
222       if (fd_get_query_result_in_driver_thread(q)) {
223          tc_assert_driver_thread(ctx->tc);
224          fd_context_access_begin(ctx);
225          fd_bc_flush_writer(ctx, rsc);
226          fd_context_access_end(ctx);
227       }
228 
229       /* some piglit tests at least do query with no draws, I guess: */
230       if (!rsc->bo)
231          continue;
232 
233       if (!wait) {
234          int ret = fd_resource_wait(
235             ctx, rsc, FD_BO_PREP_READ | FD_BO_PREP_NOSYNC | FD_BO_PREP_FLUSH);
236          if (ret)
237             return false;
238       } else {
239          fd_resource_wait(ctx, rsc, FD_BO_PREP_READ);
240       }
241 
242       void *ptr = fd_bo_map(rsc->bo);
243 
244       for (i = 0; i < start->num_tiles; i++) {
245          p->accumulate_result(ctx, sampptr(period->start, i, ptr),
246                               sampptr(period->end, i, ptr), result);
247       }
248 
249       fd_bo_cpu_fini(rsc->bo);
250    }
251 
252    return true;
253 }
254 
255 static const struct fd_query_funcs hw_query_funcs = {
256    .destroy_query = fd_hw_destroy_query,
257    .begin_query = fd_hw_begin_query,
258    .end_query = fd_hw_end_query,
259    .get_query_result = fd_hw_get_query_result,
260 };
261 
262 struct fd_query *
fd_hw_create_query(struct fd_context * ctx,unsigned query_type,unsigned index)263 fd_hw_create_query(struct fd_context *ctx, unsigned query_type, unsigned index)
264 {
265    struct fd_hw_query *hq;
266    struct fd_query *q;
267    int idx = pidx(query_type);
268 
269    if ((idx < 0) || !ctx->hw_sample_providers[idx])
270       return NULL;
271 
272    hq = CALLOC_STRUCT(fd_hw_query);
273    if (!hq)
274       return NULL;
275 
276    DBG("%p: query_type=%u", hq, query_type);
277 
278    hq->provider = ctx->hw_sample_providers[idx];
279 
280    list_inithead(&hq->periods);
281    list_inithead(&hq->list);
282 
283    q = &hq->base;
284    q->funcs = &hw_query_funcs;
285    q->type = query_type;
286    q->index = index;
287 
288    return q;
289 }
290 
291 struct fd_hw_sample *
fd_hw_sample_init(struct fd_batch * batch,uint32_t size)292 fd_hw_sample_init(struct fd_batch *batch, uint32_t size)
293 {
294    struct fd_hw_sample *samp = slab_alloc_st(&batch->ctx->sample_pool);
295    pipe_reference_init(&samp->reference, 1);
296    samp->size = size;
297    debug_assert(util_is_power_of_two_or_zero(size));
298    batch->next_sample_offset = align(batch->next_sample_offset, size);
299    samp->offset = batch->next_sample_offset;
300    /* NOTE: slab_alloc_st() does not zero out the buffer: */
301    samp->prsc = NULL;
302    samp->num_tiles = 0;
303    samp->tile_stride = 0;
304    batch->next_sample_offset += size;
305 
306    if (!batch->query_buf) {
307       struct pipe_screen *pscreen = &batch->ctx->screen->base;
308       struct pipe_resource templ = {
309          .target = PIPE_BUFFER,
310          .format = PIPE_FORMAT_R8_UNORM,
311          .bind = PIPE_BIND_QUERY_BUFFER,
312          .width0 = 0, /* create initially zero size buffer */
313          .height0 = 1,
314          .depth0 = 1,
315          .array_size = 1,
316          .last_level = 0,
317          .nr_samples = 1,
318       };
319       batch->query_buf = pscreen->resource_create(pscreen, &templ);
320    }
321 
322    pipe_resource_reference(&samp->prsc, batch->query_buf);
323 
324    return samp;
325 }
326 
327 void
__fd_hw_sample_destroy(struct fd_context * ctx,struct fd_hw_sample * samp)328 __fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp)
329 {
330    pipe_resource_reference(&samp->prsc, NULL);
331    slab_free_st(&ctx->sample_pool, samp);
332 }
333 
334 /* called from gmem code once total storage requirements are known (ie.
335  * number of samples times number of tiles)
336  */
337 void
fd_hw_query_prepare(struct fd_batch * batch,uint32_t num_tiles)338 fd_hw_query_prepare(struct fd_batch *batch, uint32_t num_tiles)
339 {
340    uint32_t tile_stride = batch->next_sample_offset;
341 
342    if (tile_stride > 0)
343       fd_resource_resize(batch->query_buf, tile_stride * num_tiles);
344 
345    batch->query_tile_stride = tile_stride;
346 
347    while (batch->samples.size > 0) {
348       struct fd_hw_sample *samp =
349          util_dynarray_pop(&batch->samples, struct fd_hw_sample *);
350       samp->num_tiles = num_tiles;
351       samp->tile_stride = tile_stride;
352       fd_hw_sample_reference(batch->ctx, &samp, NULL);
353    }
354 
355    /* reset things for next batch: */
356    batch->next_sample_offset = 0;
357 }
358 
359 void
fd_hw_query_prepare_tile(struct fd_batch * batch,uint32_t n,struct fd_ringbuffer * ring)360 fd_hw_query_prepare_tile(struct fd_batch *batch, uint32_t n,
361                          struct fd_ringbuffer *ring)
362 {
363    uint32_t tile_stride = batch->query_tile_stride;
364    uint32_t offset = tile_stride * n;
365 
366    /* bail if no queries: */
367    if (tile_stride == 0)
368       return;
369 
370    fd_wfi(batch, ring);
371    OUT_PKT0(ring, HW_QUERY_BASE_REG, 1);
372    OUT_RELOC(ring, fd_resource(batch->query_buf)->bo, offset, 0, 0);
373 }
374 
375 void
fd_hw_query_update_batch(struct fd_batch * batch,bool disable_all)376 fd_hw_query_update_batch(struct fd_batch *batch, bool disable_all)
377 {
378    struct fd_context *ctx = batch->ctx;
379 
380    if (disable_all || ctx->update_active_queries) {
381       struct fd_hw_query *hq;
382       LIST_FOR_EACH_ENTRY (hq, &batch->ctx->hw_active_queries, list) {
383          bool was_active = query_active_in_batch(batch, hq);
384          bool now_active =
385             !disable_all && (ctx->active_queries || hq->provider->always);
386 
387          if (now_active && !was_active)
388             resume_query(batch, hq, batch->draw);
389          else if (was_active && !now_active)
390             pause_query(batch, hq, batch->draw);
391       }
392    }
393    clear_sample_cache(batch);
394 }
395 
396 /* call the provider->enable() for all the hw queries that were active
397  * in the current batch.  This sets up perfctr selector regs statically
398  * for the duration of the batch.
399  */
400 void
fd_hw_query_enable(struct fd_batch * batch,struct fd_ringbuffer * ring)401 fd_hw_query_enable(struct fd_batch *batch, struct fd_ringbuffer *ring)
402 {
403    struct fd_context *ctx = batch->ctx;
404    for (int idx = 0; idx < MAX_HW_SAMPLE_PROVIDERS; idx++) {
405       if (batch->query_providers_used & (1 << idx)) {
406          assert(ctx->hw_sample_providers[idx]);
407          if (ctx->hw_sample_providers[idx]->enable)
408             ctx->hw_sample_providers[idx]->enable(ctx, ring);
409       }
410    }
411 }
412 
413 void
fd_hw_query_register_provider(struct pipe_context * pctx,const struct fd_hw_sample_provider * provider)414 fd_hw_query_register_provider(struct pipe_context *pctx,
415                               const struct fd_hw_sample_provider *provider)
416 {
417    struct fd_context *ctx = fd_context(pctx);
418    int idx = pidx(provider->query_type);
419 
420    assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS));
421    assert(!ctx->hw_sample_providers[idx]);
422 
423    ctx->hw_sample_providers[idx] = provider;
424 }
425 
426 void
fd_hw_query_init(struct pipe_context * pctx)427 fd_hw_query_init(struct pipe_context *pctx)
428 {
429    struct fd_context *ctx = fd_context(pctx);
430 
431    slab_create(&ctx->sample_pool, sizeof(struct fd_hw_sample), 16);
432    slab_create(&ctx->sample_period_pool, sizeof(struct fd_hw_sample_period),
433                16);
434 }
435 
436 void
fd_hw_query_fini(struct pipe_context * pctx)437 fd_hw_query_fini(struct pipe_context *pctx)
438 {
439    struct fd_context *ctx = fd_context(pctx);
440 
441    slab_destroy(&ctx->sample_pool);
442    slab_destroy(&ctx->sample_period_pool);
443 }
444