1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "d3d12_query.h"
25 #include "d3d12_context.h"
26 #include "d3d12_resource.h"
27 #include "d3d12_screen.h"
28
29 #include "util/u_dump.h"
30 #include "util/u_inlines.h"
31 #include "util/u_memory.h"
32
33 #include <dxguids/dxguids.h>
34
35 struct d3d12_query {
36 enum pipe_query_type type;
37
38 ID3D12QueryHeap *query_heap;
39 unsigned curr_query, num_queries;
40 size_t query_size;
41 struct d3d12_query *subquery;
42
43 D3D12_QUERY_TYPE d3d12qtype;
44
45 pipe_resource *buffer;
46 unsigned buffer_offset;
47 uint64_t fence_value;
48
49 struct list_head active_list;
50 struct d3d12_resource *predicate;
51 };
52
53 static D3D12_QUERY_HEAP_TYPE
d3d12_query_heap_type(unsigned query_type)54 d3d12_query_heap_type(unsigned query_type)
55 {
56 switch (query_type) {
57 case PIPE_QUERY_OCCLUSION_COUNTER:
58 case PIPE_QUERY_OCCLUSION_PREDICATE:
59 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
60 return D3D12_QUERY_HEAP_TYPE_OCCLUSION;
61 case PIPE_QUERY_PIPELINE_STATISTICS:
62 return D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS;
63 case PIPE_QUERY_PRIMITIVES_GENERATED:
64 case PIPE_QUERY_PRIMITIVES_EMITTED:
65 case PIPE_QUERY_SO_STATISTICS:
66 return D3D12_QUERY_HEAP_TYPE_SO_STATISTICS;
67 case PIPE_QUERY_TIMESTAMP:
68 case PIPE_QUERY_TIME_ELAPSED:
69 return D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
70
71 default:
72 debug_printf("unknown query: %s\n",
73 util_str_query_type(query_type, true));
74 unreachable("d3d12: unknown query type");
75 }
76 }
77
78 static D3D12_QUERY_TYPE
d3d12_query_type(unsigned query_type)79 d3d12_query_type(unsigned query_type)
80 {
81 switch (query_type) {
82 case PIPE_QUERY_OCCLUSION_COUNTER:
83 return D3D12_QUERY_TYPE_OCCLUSION;
84 case PIPE_QUERY_OCCLUSION_PREDICATE:
85 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
86 return D3D12_QUERY_TYPE_BINARY_OCCLUSION;
87 case PIPE_QUERY_PIPELINE_STATISTICS:
88 return D3D12_QUERY_TYPE_PIPELINE_STATISTICS;
89 case PIPE_QUERY_PRIMITIVES_GENERATED:
90 case PIPE_QUERY_PRIMITIVES_EMITTED:
91 case PIPE_QUERY_SO_STATISTICS:
92 return D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0;
93 case PIPE_QUERY_TIMESTAMP:
94 case PIPE_QUERY_TIME_ELAPSED:
95 return D3D12_QUERY_TYPE_TIMESTAMP;
96 default:
97 debug_printf("unknown query: %s\n",
98 util_str_query_type(query_type, true));
99 unreachable("d3d12: unknown query type");
100 }
101 }
102
103 static struct pipe_query *
d3d12_create_query(struct pipe_context * pctx,unsigned query_type,unsigned index)104 d3d12_create_query(struct pipe_context *pctx,
105 unsigned query_type, unsigned index)
106 {
107 struct d3d12_context *ctx = d3d12_context(pctx);
108 struct d3d12_screen *screen = d3d12_screen(pctx->screen);
109 struct d3d12_query *query = CALLOC_STRUCT(d3d12_query);
110 D3D12_QUERY_HEAP_DESC desc = {};
111
112 if (!query)
113 return NULL;
114
115 query->type = (pipe_query_type)query_type;
116 query->d3d12qtype = d3d12_query_type(query_type);
117 query->num_queries = 16;
118
119 /* With timer queries we want a few more queries, especially since we need two slots
120 * per query for TIME_ELAPSED queries */
121 if (unlikely(query->d3d12qtype == D3D12_QUERY_TYPE_TIMESTAMP))
122 query->num_queries = 64;
123
124 query->curr_query = 0;
125
126 switch (query->d3d12qtype) {
127 case D3D12_QUERY_TYPE_PIPELINE_STATISTICS:
128 query->query_size = sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS);
129 break;
130 case D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0:
131 query->query_size = sizeof(D3D12_QUERY_DATA_SO_STATISTICS);
132 break;
133 default:
134 query->query_size = sizeof(uint64_t);
135 break;
136 }
137
138 desc.Count = query->num_queries;
139 desc.Type = d3d12_query_heap_type(query_type);
140 if (FAILED(screen->dev->CreateQueryHeap(&desc,
141 IID_PPV_ARGS(&query->query_heap)))) {
142 FREE(query);
143 return NULL;
144 }
145
146 /* Query result goes into a readback buffer */
147 size_t buffer_size = query->query_size * query->num_queries;
148 u_suballocator_alloc(&ctx->query_allocator, buffer_size, 256,
149 &query->buffer_offset, &query->buffer);
150
151 return (struct pipe_query *)query;
152 }
153
154 static void
d3d12_destroy_query(struct pipe_context * pctx,struct pipe_query * q)155 d3d12_destroy_query(struct pipe_context *pctx,
156 struct pipe_query *q)
157 {
158 struct d3d12_query *query = (struct d3d12_query *)q;
159 pipe_resource *predicate = &query->predicate->base;
160 if (query->subquery)
161 d3d12_destroy_query(pctx, (struct pipe_query *)query->subquery);
162 pipe_resource_reference(&predicate, NULL);
163 query->query_heap->Release();
164 FREE(query);
165 }
166
167 static bool
accumulate_result(struct d3d12_context * ctx,struct d3d12_query * q,union pipe_query_result * result,bool write)168 accumulate_result(struct d3d12_context *ctx, struct d3d12_query *q,
169 union pipe_query_result *result, bool write)
170 {
171 struct pipe_transfer *transfer = NULL;
172 struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
173 unsigned access = PIPE_MAP_READ;
174 void *results;
175
176 if (write)
177 access |= PIPE_MAP_WRITE;
178 results = pipe_buffer_map_range(&ctx->base, q->buffer, q->buffer_offset,
179 q->num_queries * q->query_size,
180 access, &transfer);
181
182 if (results == NULL)
183 return false;
184
185 uint64_t *results_u64 = (uint64_t *)results;
186 D3D12_QUERY_DATA_PIPELINE_STATISTICS *results_stats = (D3D12_QUERY_DATA_PIPELINE_STATISTICS *)results;
187 D3D12_QUERY_DATA_SO_STATISTICS *results_so = (D3D12_QUERY_DATA_SO_STATISTICS *)results;
188
189 util_query_clear_result(result, q->type);
190 for (unsigned i = 0; i < q->curr_query; ++i) {
191 switch (q->type) {
192 case PIPE_QUERY_OCCLUSION_PREDICATE:
193 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
194 result->b |= results_u64[i] != 0;
195 break;
196
197 case PIPE_QUERY_OCCLUSION_COUNTER:
198 result->u64 += results_u64[i];
199 break;
200 case PIPE_QUERY_TIMESTAMP:
201 result->u64 = results_u64[i];
202 break;
203
204 case PIPE_QUERY_PIPELINE_STATISTICS:
205 result->pipeline_statistics.ia_vertices += results_stats[i].IAVertices;
206 result->pipeline_statistics.ia_primitives += results_stats[i].IAPrimitives;
207 result->pipeline_statistics.vs_invocations += results_stats[i].VSInvocations;
208 result->pipeline_statistics.gs_invocations += results_stats[i].GSInvocations;
209 result->pipeline_statistics.gs_primitives += results_stats[i].GSPrimitives;
210 result->pipeline_statistics.c_invocations += results_stats[i].CInvocations;
211 result->pipeline_statistics.c_primitives += results_stats[i].CPrimitives;
212 result->pipeline_statistics.ps_invocations += results_stats[i].PSInvocations;
213 result->pipeline_statistics.hs_invocations += results_stats[i].HSInvocations;
214 result->pipeline_statistics.ds_invocations += results_stats[i].DSInvocations;
215 result->pipeline_statistics.cs_invocations += results_stats[i].CSInvocations;
216 break;
217
218 case PIPE_QUERY_PRIMITIVES_GENERATED:
219 result->u64 += results_so[i].PrimitivesStorageNeeded;
220 break;
221
222 case PIPE_QUERY_PRIMITIVES_EMITTED:
223 result->u64 += results_so[i].NumPrimitivesWritten;
224 break;
225
226 case PIPE_QUERY_TIME_ELAPSED:
227 result->u64 += results_u64[2 * i + 1] - results_u64[2 * i];
228 break;
229
230 case PIPE_QUERY_SO_STATISTICS:
231 result->so_statistics.num_primitives_written += results_so[i].NumPrimitivesWritten;
232 result->so_statistics.primitives_storage_needed += results_so[i].PrimitivesStorageNeeded;
233 break;
234
235 default:
236 debug_printf("unsupported query type: %s\n",
237 util_str_query_type(q->type, true));
238 unreachable("unexpected query type");
239 }
240 }
241
242 if (q->subquery) {
243 union pipe_query_result subresult;
244
245 accumulate_result(ctx, q->subquery, &subresult, false);
246 q->subquery->curr_query = 0;
247 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED)
248 result->u64 += subresult.pipeline_statistics.ia_primitives;
249 }
250
251 if (write) {
252 if (q->type == PIPE_QUERY_PIPELINE_STATISTICS) {
253 results_stats[0].IAVertices = result->pipeline_statistics.ia_vertices;
254 results_stats[0].IAPrimitives = result->pipeline_statistics.ia_primitives;
255 results_stats[0].VSInvocations = result->pipeline_statistics.vs_invocations;
256 results_stats[0].GSInvocations = result->pipeline_statistics.gs_invocations;
257 results_stats[0].GSPrimitives = result->pipeline_statistics.gs_primitives;
258 results_stats[0].CInvocations = result->pipeline_statistics.c_invocations;
259 results_stats[0].CPrimitives = result->pipeline_statistics.c_primitives;
260 results_stats[0].PSInvocations = result->pipeline_statistics.ps_invocations;
261 results_stats[0].HSInvocations = result->pipeline_statistics.hs_invocations;
262 results_stats[0].DSInvocations = result->pipeline_statistics.ds_invocations;
263 results_stats[0].CSInvocations = result->pipeline_statistics.cs_invocations;
264 } else if (q->type == PIPE_QUERY_SO_STATISTICS) {
265 results_so[0].NumPrimitivesWritten = result->so_statistics.num_primitives_written;
266 results_so[0].PrimitivesStorageNeeded = result->so_statistics.primitives_storage_needed;
267 } else {
268 if (unlikely(q->d3d12qtype == D3D12_QUERY_TYPE_TIMESTAMP)) {
269 results_u64[0] = 0;
270 results_u64[1] = result->u64;
271 } else {
272 results_u64[0] = result->u64;
273 }
274 }
275 }
276
277 pipe_buffer_unmap(&ctx->base, transfer);
278
279 if (q->type == PIPE_QUERY_TIME_ELAPSED ||
280 q->type == PIPE_QUERY_TIMESTAMP)
281 result->u64 = static_cast<uint64_t>(screen->timestamp_multiplier * result->u64);
282
283 return true;
284 }
285
286 static void
begin_query(struct d3d12_context * ctx,struct d3d12_query * q,bool restart)287 begin_query(struct d3d12_context *ctx, struct d3d12_query *q, bool restart)
288 {
289 if (restart) {
290 q->curr_query = 0;
291 } else if (q->curr_query == q->num_queries) {
292 union pipe_query_result result;
293
294 /* Accumulate current results and store in first slot */
295 d3d12_flush_cmdlist_and_wait(ctx);
296 accumulate_result(ctx, q, &result, true);
297 q->curr_query = 1;
298 }
299
300 if (q->subquery)
301 begin_query(ctx, q->subquery, restart);
302
303 ctx->cmdlist->BeginQuery(q->query_heap, q->d3d12qtype, q->curr_query);
304 }
305
306
307 static void
begin_timer_query(struct d3d12_context * ctx,struct d3d12_query * q,bool restart)308 begin_timer_query(struct d3d12_context *ctx, struct d3d12_query *q, bool restart)
309 {
310 /* For PIPE_QUERY_TIME_ELAPSED we record one time with BeginQuery and one in
311 * EndQuery, so we need two query slots */
312 unsigned query_index = 2 * q->curr_query;
313
314 if (restart) {
315 q->curr_query = 0;
316 query_index = 0;
317 } else if (query_index == q->num_queries) {
318 union pipe_query_result result;
319
320 /* Accumulate current results and store in first slot */
321 d3d12_flush_cmdlist_and_wait(ctx);
322 accumulate_result(ctx, q, &result, true);
323 q->curr_query = 2;
324 }
325
326 ctx->cmdlist->EndQuery(q->query_heap, q->d3d12qtype, query_index);
327 }
328
329 static bool
d3d12_begin_query(struct pipe_context * pctx,struct pipe_query * q)330 d3d12_begin_query(struct pipe_context *pctx,
331 struct pipe_query *q)
332 {
333 struct d3d12_context *ctx = d3d12_context(pctx);
334 struct d3d12_query *query = (struct d3d12_query *)q;
335
336 assert(query->type != PIPE_QUERY_TIMESTAMP);
337
338 if (unlikely(query->type == PIPE_QUERY_TIME_ELAPSED))
339 begin_timer_query(ctx, query, true);
340 else {
341 begin_query(ctx, query, true);
342 list_addtail(&query->active_list, &ctx->active_queries);
343 }
344
345 return true;
346 }
347
348 static void
end_query(struct d3d12_context * ctx,struct d3d12_query * q)349 end_query(struct d3d12_context *ctx, struct d3d12_query *q)
350 {
351 uint64_t offset = 0;
352 struct d3d12_batch *batch = d3d12_current_batch(ctx);
353 struct d3d12_resource *res = (struct d3d12_resource *)q->buffer;
354 ID3D12Resource *d3d12_res = d3d12_resource_underlying(res, &offset);
355
356 /* End subquery first so that we can use fence value from parent */
357 if (q->subquery)
358 end_query(ctx, q->subquery);
359
360 /* With QUERY_TIME_ELAPSED we have recorded one value at
361 * (2 * q->curr_query), and now we record a value at (2 * q->curr_query + 1)
362 * and when resolving the query we subtract the latter from the former */
363
364 unsigned resolve_count = q->type == PIPE_QUERY_TIME_ELAPSED ? 2 : 1;
365 unsigned resolve_index = resolve_count * q->curr_query;
366 unsigned end_index = resolve_index + resolve_count - 1;
367
368 offset += q->buffer_offset + resolve_index * q->query_size;
369 ctx->cmdlist->EndQuery(q->query_heap, q->d3d12qtype, end_index);
370 d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_BIND_INVALIDATE_FULL);
371 d3d12_apply_resource_states(ctx);
372 ctx->cmdlist->ResolveQueryData(q->query_heap, q->d3d12qtype, resolve_index,
373 resolve_count, d3d12_res, offset);
374
375 d3d12_batch_reference_object(batch, q->query_heap);
376 d3d12_batch_reference_resource(batch, res);
377
378 assert(q->curr_query < q->num_queries);
379 q->curr_query++;
380 }
381
382 static bool
d3d12_end_query(struct pipe_context * pctx,struct pipe_query * q)383 d3d12_end_query(struct pipe_context *pctx,
384 struct pipe_query *q)
385 {
386 struct d3d12_context *ctx = d3d12_context(pctx);
387 struct d3d12_query *query = (struct d3d12_query *)q;
388
389 end_query(ctx, query);
390
391 if (query->type != PIPE_QUERY_TIMESTAMP &&
392 query->type != PIPE_QUERY_TIME_ELAPSED)
393 list_delinit(&query->active_list);
394
395 query->fence_value = ctx->fence_value;
396 return true;
397 }
398
399 static bool
d3d12_get_query_result(struct pipe_context * pctx,struct pipe_query * q,bool wait,union pipe_query_result * result)400 d3d12_get_query_result(struct pipe_context *pctx,
401 struct pipe_query *q,
402 bool wait,
403 union pipe_query_result *result)
404 {
405 struct d3d12_context *ctx = d3d12_context(pctx);
406 struct d3d12_query *query = (struct d3d12_query *)q;
407
408 if (ctx->cmdqueue_fence->GetCompletedValue() < query->fence_value) {
409 if (!wait)
410 return false;
411 d3d12_flush_cmdlist_and_wait(ctx);
412 }
413
414 return accumulate_result(ctx, query, result, false);
415 }
416
417 void
d3d12_suspend_queries(struct d3d12_context * ctx)418 d3d12_suspend_queries(struct d3d12_context *ctx)
419 {
420 list_for_each_entry(struct d3d12_query, query, &ctx->active_queries, active_list) {
421 end_query(ctx, query);
422 }
423 }
424
425 void
d3d12_resume_queries(struct d3d12_context * ctx)426 d3d12_resume_queries(struct d3d12_context *ctx)
427 {
428 list_for_each_entry(struct d3d12_query, query, &ctx->active_queries, active_list) {
429 begin_query(ctx, query, false);
430 }
431 }
432
433 void
d3d12_validate_queries(struct d3d12_context * ctx)434 d3d12_validate_queries(struct d3d12_context *ctx)
435 {
436 bool have_xfb = !!ctx->gfx_pipeline_state.num_so_targets;
437
438 list_for_each_entry(struct d3d12_query, query, &ctx->active_queries, active_list) {
439 if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED && !have_xfb && !query->subquery) {
440 struct pipe_query *subquery = d3d12_create_query(&ctx->base, PIPE_QUERY_PIPELINE_STATISTICS, 0);
441 query->subquery = (struct d3d12_query *)subquery;
442 if (!ctx->queries_disabled)
443 begin_query(ctx, query->subquery, true);
444 }
445 }
446 }
447
448 static void
d3d12_set_active_query_state(struct pipe_context * pctx,bool enable)449 d3d12_set_active_query_state(struct pipe_context *pctx, bool enable)
450 {
451 struct d3d12_context *ctx = d3d12_context(pctx);
452 ctx->queries_disabled = !enable;
453
454 if (enable)
455 d3d12_resume_queries(ctx);
456 else
457 d3d12_suspend_queries(ctx);
458 }
459
460 static void
d3d12_render_condition(struct pipe_context * pctx,struct pipe_query * pquery,bool condition,enum pipe_render_cond_flag mode)461 d3d12_render_condition(struct pipe_context *pctx,
462 struct pipe_query *pquery,
463 bool condition,
464 enum pipe_render_cond_flag mode)
465 {
466 struct d3d12_context *ctx = d3d12_context(pctx);
467 struct d3d12_query *query = (struct d3d12_query *)pquery;
468
469 if (query == nullptr) {
470 ctx->cmdlist->SetPredication(nullptr, 0, D3D12_PREDICATION_OP_EQUAL_ZERO);
471 ctx->current_predication = nullptr;
472 return;
473 }
474
475 if (!query->predicate)
476 query->predicate = d3d12_resource(pipe_buffer_create(pctx->screen, 0,
477 PIPE_USAGE_DEFAULT, sizeof(uint64_t)));
478
479 if (mode == PIPE_RENDER_COND_WAIT) {
480 d3d12_flush_cmdlist_and_wait(ctx);
481 union pipe_query_result result;
482 accumulate_result(ctx, (d3d12_query *)pquery, &result, true);
483 }
484
485 struct d3d12_resource *res = (struct d3d12_resource *)query->buffer;
486 d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_BIND_INVALIDATE_FULL);
487 d3d12_transition_resource_state(ctx, query->predicate, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_BIND_INVALIDATE_NONE);
488 d3d12_apply_resource_states(ctx);
489 ctx->cmdlist->CopyBufferRegion(d3d12_resource_resource(query->predicate), 0,
490 d3d12_resource_resource(res), 0,
491 sizeof(uint64_t));
492
493 d3d12_transition_resource_state(ctx, query->predicate, D3D12_RESOURCE_STATE_PREDICATION, D3D12_BIND_INVALIDATE_NONE);
494 d3d12_apply_resource_states(ctx);
495
496 ctx->current_predication = query->predicate;
497 /* documentation of ID3D12GraphicsCommandList::SetPredication method:
498 * "resource manipulation commands are _not_ actually performed
499 * if the resulting predicate data of the predicate is equal to
500 * the operation specified."
501 */
502 ctx->cmdlist->SetPredication(d3d12_resource_resource(query->predicate), 0,
503 condition ? D3D12_PREDICATION_OP_NOT_EQUAL_ZERO :
504 D3D12_PREDICATION_OP_EQUAL_ZERO);
505 }
506
507 void
d3d12_context_query_init(struct pipe_context * pctx)508 d3d12_context_query_init(struct pipe_context *pctx)
509 {
510 struct d3d12_context *ctx = d3d12_context(pctx);
511 list_inithead(&ctx->active_queries);
512
513 u_suballocator_init(&ctx->query_allocator, &ctx->base, 4096, 0, PIPE_USAGE_STAGING,
514 0, true);
515
516 pctx->create_query = d3d12_create_query;
517 pctx->destroy_query = d3d12_destroy_query;
518 pctx->begin_query = d3d12_begin_query;
519 pctx->end_query = d3d12_end_query;
520 pctx->get_query_result = d3d12_get_query_result;
521 pctx->set_active_query_state = d3d12_set_active_query_state;
522 pctx->render_condition = d3d12_render_condition;
523 }
524