1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "d3d12_query.h"
25 #include "d3d12_context.h"
26 #include "d3d12_resource.h"
27 #include "d3d12_screen.h"
28 
29 #include "util/u_dump.h"
30 #include "util/u_inlines.h"
31 #include "util/u_memory.h"
32 
33 #include <dxguids/dxguids.h>
34 
35 struct d3d12_query {
36    enum pipe_query_type type;
37 
38    ID3D12QueryHeap *query_heap;
39    unsigned curr_query, num_queries;
40    size_t query_size;
41    struct d3d12_query *subquery;
42 
43    D3D12_QUERY_TYPE d3d12qtype;
44 
45    pipe_resource *buffer;
46    unsigned buffer_offset;
47    uint64_t fence_value;
48 
49    struct list_head active_list;
50    struct d3d12_resource *predicate;
51 };
52 
53 static D3D12_QUERY_HEAP_TYPE
d3d12_query_heap_type(unsigned query_type)54 d3d12_query_heap_type(unsigned query_type)
55 {
56    switch (query_type) {
57    case PIPE_QUERY_OCCLUSION_COUNTER:
58    case PIPE_QUERY_OCCLUSION_PREDICATE:
59    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
60       return D3D12_QUERY_HEAP_TYPE_OCCLUSION;
61    case PIPE_QUERY_PIPELINE_STATISTICS:
62       return D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS;
63    case PIPE_QUERY_PRIMITIVES_GENERATED:
64    case PIPE_QUERY_PRIMITIVES_EMITTED:
65    case PIPE_QUERY_SO_STATISTICS:
66       return D3D12_QUERY_HEAP_TYPE_SO_STATISTICS;
67    case PIPE_QUERY_TIMESTAMP:
68    case PIPE_QUERY_TIME_ELAPSED:
69       return D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
70 
71    default:
72       debug_printf("unknown query: %s\n",
73                    util_str_query_type(query_type, true));
74       unreachable("d3d12: unknown query type");
75    }
76 }
77 
78 static D3D12_QUERY_TYPE
d3d12_query_type(unsigned query_type)79 d3d12_query_type(unsigned query_type)
80 {
81    switch (query_type) {
82    case PIPE_QUERY_OCCLUSION_COUNTER:
83       return D3D12_QUERY_TYPE_OCCLUSION;
84    case PIPE_QUERY_OCCLUSION_PREDICATE:
85    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
86       return D3D12_QUERY_TYPE_BINARY_OCCLUSION;
87    case PIPE_QUERY_PIPELINE_STATISTICS:
88       return D3D12_QUERY_TYPE_PIPELINE_STATISTICS;
89    case PIPE_QUERY_PRIMITIVES_GENERATED:
90    case PIPE_QUERY_PRIMITIVES_EMITTED:
91    case PIPE_QUERY_SO_STATISTICS:
92       return D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0;
93    case PIPE_QUERY_TIMESTAMP:
94    case PIPE_QUERY_TIME_ELAPSED:
95       return D3D12_QUERY_TYPE_TIMESTAMP;
96    default:
97       debug_printf("unknown query: %s\n",
98                    util_str_query_type(query_type, true));
99       unreachable("d3d12: unknown query type");
100    }
101 }
102 
103 static struct pipe_query *
d3d12_create_query(struct pipe_context * pctx,unsigned query_type,unsigned index)104 d3d12_create_query(struct pipe_context *pctx,
105                    unsigned query_type, unsigned index)
106 {
107    struct d3d12_context *ctx = d3d12_context(pctx);
108    struct d3d12_screen *screen = d3d12_screen(pctx->screen);
109    struct d3d12_query *query = CALLOC_STRUCT(d3d12_query);
110    D3D12_QUERY_HEAP_DESC desc = {};
111 
112    if (!query)
113       return NULL;
114 
115    query->type = (pipe_query_type)query_type;
116    query->d3d12qtype = d3d12_query_type(query_type);
117    query->num_queries = 16;
118 
119    /* With timer queries we want a few more queries, especially since we need two slots
120     * per query for TIME_ELAPSED queries */
121    if (unlikely(query->d3d12qtype == D3D12_QUERY_TYPE_TIMESTAMP))
122       query->num_queries = 64;
123 
124    query->curr_query = 0;
125 
126    switch (query->d3d12qtype) {
127    case D3D12_QUERY_TYPE_PIPELINE_STATISTICS:
128       query->query_size = sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS);
129       break;
130    case D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0:
131       query->query_size = sizeof(D3D12_QUERY_DATA_SO_STATISTICS);
132       break;
133    default:
134       query->query_size = sizeof(uint64_t);
135       break;
136    }
137 
138    desc.Count = query->num_queries;
139    desc.Type = d3d12_query_heap_type(query_type);
140    if (FAILED(screen->dev->CreateQueryHeap(&desc,
141                                            IID_PPV_ARGS(&query->query_heap)))) {
142       FREE(query);
143       return NULL;
144    }
145 
146    /* Query result goes into a readback buffer */
147    size_t buffer_size = query->query_size * query->num_queries;
148    u_suballocator_alloc(&ctx->query_allocator, buffer_size, 256,
149                         &query->buffer_offset, &query->buffer);
150 
151    return (struct pipe_query *)query;
152 }
153 
154 static void
d3d12_destroy_query(struct pipe_context * pctx,struct pipe_query * q)155 d3d12_destroy_query(struct pipe_context *pctx,
156                     struct pipe_query *q)
157 {
158    struct d3d12_query *query = (struct d3d12_query *)q;
159    pipe_resource *predicate = &query->predicate->base;
160    if (query->subquery)
161       d3d12_destroy_query(pctx, (struct pipe_query *)query->subquery);
162    pipe_resource_reference(&predicate, NULL);
163    query->query_heap->Release();
164    FREE(query);
165 }
166 
167 static bool
accumulate_result(struct d3d12_context * ctx,struct d3d12_query * q,union pipe_query_result * result,bool write)168 accumulate_result(struct d3d12_context *ctx, struct d3d12_query *q,
169                   union pipe_query_result *result, bool write)
170 {
171    struct pipe_transfer *transfer = NULL;
172    struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
173    unsigned access = PIPE_MAP_READ;
174    void *results;
175 
176    if (write)
177       access |= PIPE_MAP_WRITE;
178    results = pipe_buffer_map_range(&ctx->base, q->buffer, q->buffer_offset,
179                                    q->num_queries * q->query_size,
180                                    access, &transfer);
181 
182    if (results == NULL)
183       return false;
184 
185    uint64_t *results_u64 = (uint64_t *)results;
186    D3D12_QUERY_DATA_PIPELINE_STATISTICS *results_stats = (D3D12_QUERY_DATA_PIPELINE_STATISTICS *)results;
187    D3D12_QUERY_DATA_SO_STATISTICS *results_so = (D3D12_QUERY_DATA_SO_STATISTICS *)results;
188 
189    util_query_clear_result(result, q->type);
190    for (unsigned i = 0; i < q->curr_query; ++i) {
191       switch (q->type) {
192       case PIPE_QUERY_OCCLUSION_PREDICATE:
193       case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
194          result->b |= results_u64[i] != 0;
195          break;
196 
197       case PIPE_QUERY_OCCLUSION_COUNTER:
198          result->u64 += results_u64[i];
199          break;
200       case PIPE_QUERY_TIMESTAMP:
201          result->u64 = results_u64[i];
202          break;
203 
204       case PIPE_QUERY_PIPELINE_STATISTICS:
205          result->pipeline_statistics.ia_vertices += results_stats[i].IAVertices;
206          result->pipeline_statistics.ia_primitives += results_stats[i].IAPrimitives;
207          result->pipeline_statistics.vs_invocations += results_stats[i].VSInvocations;
208          result->pipeline_statistics.gs_invocations += results_stats[i].GSInvocations;
209          result->pipeline_statistics.gs_primitives += results_stats[i].GSPrimitives;
210          result->pipeline_statistics.c_invocations += results_stats[i].CInvocations;
211          result->pipeline_statistics.c_primitives += results_stats[i].CPrimitives;
212          result->pipeline_statistics.ps_invocations += results_stats[i].PSInvocations;
213          result->pipeline_statistics.hs_invocations += results_stats[i].HSInvocations;
214          result->pipeline_statistics.ds_invocations += results_stats[i].DSInvocations;
215          result->pipeline_statistics.cs_invocations += results_stats[i].CSInvocations;
216          break;
217 
218       case PIPE_QUERY_PRIMITIVES_GENERATED:
219          result->u64 += results_so[i].PrimitivesStorageNeeded;
220          break;
221 
222       case PIPE_QUERY_PRIMITIVES_EMITTED:
223          result->u64 += results_so[i].NumPrimitivesWritten;
224          break;
225 
226       case PIPE_QUERY_TIME_ELAPSED:
227          result->u64 += results_u64[2 * i + 1] - results_u64[2 * i];
228          break;
229 
230       case PIPE_QUERY_SO_STATISTICS:
231          result->so_statistics.num_primitives_written += results_so[i].NumPrimitivesWritten;
232          result->so_statistics.primitives_storage_needed += results_so[i].PrimitivesStorageNeeded;
233          break;
234 
235       default:
236          debug_printf("unsupported query type: %s\n",
237                       util_str_query_type(q->type, true));
238          unreachable("unexpected query type");
239       }
240    }
241 
242    if (q->subquery) {
243       union pipe_query_result subresult;
244 
245       accumulate_result(ctx, q->subquery, &subresult, false);
246       q->subquery->curr_query = 0;
247       if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED)
248          result->u64 += subresult.pipeline_statistics.ia_primitives;
249    }
250 
251    if (write) {
252       if (q->type == PIPE_QUERY_PIPELINE_STATISTICS) {
253          results_stats[0].IAVertices = result->pipeline_statistics.ia_vertices;
254          results_stats[0].IAPrimitives = result->pipeline_statistics.ia_primitives;
255          results_stats[0].VSInvocations = result->pipeline_statistics.vs_invocations;
256          results_stats[0].GSInvocations = result->pipeline_statistics.gs_invocations;
257          results_stats[0].GSPrimitives = result->pipeline_statistics.gs_primitives;
258          results_stats[0].CInvocations = result->pipeline_statistics.c_invocations;
259          results_stats[0].CPrimitives = result->pipeline_statistics.c_primitives;
260          results_stats[0].PSInvocations = result->pipeline_statistics.ps_invocations;
261          results_stats[0].HSInvocations = result->pipeline_statistics.hs_invocations;
262          results_stats[0].DSInvocations = result->pipeline_statistics.ds_invocations;
263          results_stats[0].CSInvocations = result->pipeline_statistics.cs_invocations;
264       } else if (q->type == PIPE_QUERY_SO_STATISTICS) {
265          results_so[0].NumPrimitivesWritten = result->so_statistics.num_primitives_written;
266          results_so[0].PrimitivesStorageNeeded = result->so_statistics.primitives_storage_needed;
267       } else {
268          if (unlikely(q->d3d12qtype == D3D12_QUERY_TYPE_TIMESTAMP)) {
269             results_u64[0] = 0;
270             results_u64[1] = result->u64;
271          } else {
272             results_u64[0] = result->u64;
273          }
274       }
275    }
276 
277    pipe_buffer_unmap(&ctx->base, transfer);
278 
279    if (q->type == PIPE_QUERY_TIME_ELAPSED ||
280        q->type == PIPE_QUERY_TIMESTAMP)
281       result->u64 = static_cast<uint64_t>(screen->timestamp_multiplier * result->u64);
282 
283    return true;
284 }
285 
286 static void
begin_query(struct d3d12_context * ctx,struct d3d12_query * q,bool restart)287 begin_query(struct d3d12_context *ctx, struct d3d12_query *q, bool restart)
288 {
289    if (restart) {
290       q->curr_query = 0;
291    } else if (q->curr_query == q->num_queries) {
292       union pipe_query_result result;
293 
294       /* Accumulate current results and store in first slot */
295       d3d12_flush_cmdlist_and_wait(ctx);
296       accumulate_result(ctx, q, &result, true);
297       q->curr_query = 1;
298    }
299 
300    if (q->subquery)
301       begin_query(ctx, q->subquery, restart);
302 
303    ctx->cmdlist->BeginQuery(q->query_heap, q->d3d12qtype, q->curr_query);
304 }
305 
306 
307 static void
begin_timer_query(struct d3d12_context * ctx,struct d3d12_query * q,bool restart)308 begin_timer_query(struct d3d12_context *ctx, struct d3d12_query *q, bool restart)
309 {
310    /* For PIPE_QUERY_TIME_ELAPSED we record one time with BeginQuery and one in
311     * EndQuery, so we need two query slots */
312    unsigned query_index = 2 * q->curr_query;
313 
314    if (restart) {
315       q->curr_query = 0;
316       query_index = 0;
317    } else if (query_index == q->num_queries) {
318       union pipe_query_result result;
319 
320       /* Accumulate current results and store in first slot */
321       d3d12_flush_cmdlist_and_wait(ctx);
322       accumulate_result(ctx, q, &result, true);
323       q->curr_query = 2;
324    }
325 
326    ctx->cmdlist->EndQuery(q->query_heap, q->d3d12qtype, query_index);
327 }
328 
329 static bool
d3d12_begin_query(struct pipe_context * pctx,struct pipe_query * q)330 d3d12_begin_query(struct pipe_context *pctx,
331                   struct pipe_query *q)
332 {
333    struct d3d12_context *ctx = d3d12_context(pctx);
334    struct d3d12_query *query = (struct d3d12_query *)q;
335 
336    assert(query->type != PIPE_QUERY_TIMESTAMP);
337 
338    if (unlikely(query->type == PIPE_QUERY_TIME_ELAPSED))
339       begin_timer_query(ctx, query, true);
340    else {
341       begin_query(ctx, query, true);
342       list_addtail(&query->active_list, &ctx->active_queries);
343    }
344 
345    return true;
346 }
347 
348 static void
end_query(struct d3d12_context * ctx,struct d3d12_query * q)349 end_query(struct d3d12_context *ctx, struct d3d12_query *q)
350 {
351    uint64_t offset = 0;
352    struct d3d12_batch *batch = d3d12_current_batch(ctx);
353    struct d3d12_resource *res = (struct d3d12_resource *)q->buffer;
354    ID3D12Resource *d3d12_res = d3d12_resource_underlying(res, &offset);
355 
356    /* End subquery first so that we can use fence value from parent */
357    if (q->subquery)
358       end_query(ctx, q->subquery);
359 
360    /* With QUERY_TIME_ELAPSED we have recorded one value at
361     * (2 * q->curr_query), and now we record a value at (2 * q->curr_query + 1)
362     * and when resolving the query we subtract the latter from the former */
363 
364    unsigned resolve_count = q->type == PIPE_QUERY_TIME_ELAPSED ? 2 : 1;
365    unsigned resolve_index = resolve_count * q->curr_query;
366    unsigned end_index = resolve_index + resolve_count - 1;
367 
368    offset += q->buffer_offset + resolve_index * q->query_size;
369    ctx->cmdlist->EndQuery(q->query_heap, q->d3d12qtype, end_index);
370    d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_BIND_INVALIDATE_FULL);
371    d3d12_apply_resource_states(ctx);
372    ctx->cmdlist->ResolveQueryData(q->query_heap, q->d3d12qtype, resolve_index,
373                                   resolve_count, d3d12_res, offset);
374 
375    d3d12_batch_reference_object(batch, q->query_heap);
376    d3d12_batch_reference_resource(batch, res);
377 
378    assert(q->curr_query < q->num_queries);
379    q->curr_query++;
380 }
381 
382 static bool
d3d12_end_query(struct pipe_context * pctx,struct pipe_query * q)383 d3d12_end_query(struct pipe_context *pctx,
384                struct pipe_query *q)
385 {
386    struct d3d12_context *ctx = d3d12_context(pctx);
387    struct d3d12_query *query = (struct d3d12_query *)q;
388 
389    end_query(ctx, query);
390 
391    if (query->type != PIPE_QUERY_TIMESTAMP &&
392        query->type != PIPE_QUERY_TIME_ELAPSED)
393       list_delinit(&query->active_list);
394 
395    query->fence_value = ctx->fence_value;
396    return true;
397 }
398 
399 static bool
d3d12_get_query_result(struct pipe_context * pctx,struct pipe_query * q,bool wait,union pipe_query_result * result)400 d3d12_get_query_result(struct pipe_context *pctx,
401                       struct pipe_query *q,
402                       bool wait,
403                       union pipe_query_result *result)
404 {
405    struct d3d12_context *ctx = d3d12_context(pctx);
406    struct d3d12_query *query = (struct d3d12_query *)q;
407 
408    if (ctx->cmdqueue_fence->GetCompletedValue() < query->fence_value) {
409       if (!wait)
410          return false;
411       d3d12_flush_cmdlist_and_wait(ctx);
412    }
413 
414    return accumulate_result(ctx, query, result, false);
415 }
416 
417 void
d3d12_suspend_queries(struct d3d12_context * ctx)418 d3d12_suspend_queries(struct d3d12_context *ctx)
419 {
420    list_for_each_entry(struct d3d12_query, query, &ctx->active_queries, active_list) {
421       end_query(ctx, query);
422    }
423 }
424 
425 void
d3d12_resume_queries(struct d3d12_context * ctx)426 d3d12_resume_queries(struct d3d12_context *ctx)
427 {
428    list_for_each_entry(struct d3d12_query, query, &ctx->active_queries, active_list) {
429       begin_query(ctx, query, false);
430    }
431 }
432 
433 void
d3d12_validate_queries(struct d3d12_context * ctx)434 d3d12_validate_queries(struct d3d12_context *ctx)
435 {
436    bool have_xfb = !!ctx->gfx_pipeline_state.num_so_targets;
437 
438    list_for_each_entry(struct d3d12_query, query, &ctx->active_queries, active_list) {
439       if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED && !have_xfb && !query->subquery) {
440          struct pipe_query *subquery = d3d12_create_query(&ctx->base, PIPE_QUERY_PIPELINE_STATISTICS, 0);
441          query->subquery = (struct d3d12_query *)subquery;
442          if (!ctx->queries_disabled)
443             begin_query(ctx, query->subquery, true);
444       }
445    }
446 }
447 
448 static void
d3d12_set_active_query_state(struct pipe_context * pctx,bool enable)449 d3d12_set_active_query_state(struct pipe_context *pctx, bool enable)
450 {
451    struct d3d12_context *ctx = d3d12_context(pctx);
452    ctx->queries_disabled = !enable;
453 
454    if (enable)
455       d3d12_resume_queries(ctx);
456    else
457       d3d12_suspend_queries(ctx);
458 }
459 
460 static void
d3d12_render_condition(struct pipe_context * pctx,struct pipe_query * pquery,bool condition,enum pipe_render_cond_flag mode)461 d3d12_render_condition(struct pipe_context *pctx,
462                        struct pipe_query *pquery,
463                        bool condition,
464                        enum pipe_render_cond_flag mode)
465 {
466    struct d3d12_context *ctx = d3d12_context(pctx);
467    struct d3d12_query *query = (struct d3d12_query *)pquery;
468 
469    if (query == nullptr) {
470       ctx->cmdlist->SetPredication(nullptr, 0, D3D12_PREDICATION_OP_EQUAL_ZERO);
471       ctx->current_predication = nullptr;
472       return;
473    }
474 
475    if (!query->predicate)
476       query->predicate = d3d12_resource(pipe_buffer_create(pctx->screen, 0,
477                                                            PIPE_USAGE_DEFAULT, sizeof(uint64_t)));
478 
479    if (mode == PIPE_RENDER_COND_WAIT) {
480       d3d12_flush_cmdlist_and_wait(ctx);
481       union pipe_query_result result;
482       accumulate_result(ctx, (d3d12_query *)pquery, &result, true);
483    }
484 
485    struct d3d12_resource *res = (struct d3d12_resource *)query->buffer;
486    d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_BIND_INVALIDATE_FULL);
487    d3d12_transition_resource_state(ctx, query->predicate, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_BIND_INVALIDATE_NONE);
488    d3d12_apply_resource_states(ctx);
489    ctx->cmdlist->CopyBufferRegion(d3d12_resource_resource(query->predicate), 0,
490                                   d3d12_resource_resource(res), 0,
491                                   sizeof(uint64_t));
492 
493    d3d12_transition_resource_state(ctx, query->predicate, D3D12_RESOURCE_STATE_PREDICATION, D3D12_BIND_INVALIDATE_NONE);
494    d3d12_apply_resource_states(ctx);
495 
496    ctx->current_predication = query->predicate;
497    /* documentation of ID3D12GraphicsCommandList::SetPredication method:
498     * "resource manipulation commands are _not_ actually performed
499     *  if the resulting predicate data of the predicate is equal to
500     *  the operation specified."
501     */
502    ctx->cmdlist->SetPredication(d3d12_resource_resource(query->predicate), 0,
503                                 condition ? D3D12_PREDICATION_OP_NOT_EQUAL_ZERO :
504                                 D3D12_PREDICATION_OP_EQUAL_ZERO);
505 }
506 
507 void
d3d12_context_query_init(struct pipe_context * pctx)508 d3d12_context_query_init(struct pipe_context *pctx)
509 {
510    struct d3d12_context *ctx = d3d12_context(pctx);
511    list_inithead(&ctx->active_queries);
512 
513    u_suballocator_init(&ctx->query_allocator, &ctx->base, 4096, 0, PIPE_USAGE_STAGING,
514                          0, true);
515 
516    pctx->create_query = d3d12_create_query;
517    pctx->destroy_query = d3d12_destroy_query;
518    pctx->begin_query = d3d12_begin_query;
519    pctx->end_query = d3d12_end_query;
520    pctx->get_query_result = d3d12_get_query_result;
521    pctx->set_active_query_state = d3d12_set_active_query_state;
522    pctx->render_condition = d3d12_render_condition;
523 }
524