1 /*
2 * Copyright © 2021 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_private.h"
25
26 #include "perf/intel_perf.h"
27
28 static uint32_t
command_buffers_count_utraces(struct anv_device * device,uint32_t cmd_buffer_count,struct anv_cmd_buffer ** cmd_buffers,uint32_t * utrace_copies)29 command_buffers_count_utraces(struct anv_device *device,
30 uint32_t cmd_buffer_count,
31 struct anv_cmd_buffer **cmd_buffers,
32 uint32_t *utrace_copies)
33 {
34 if (!u_trace_context_actively_tracing(&device->ds.trace_context))
35 return 0;
36
37 uint32_t utraces = 0;
38 for (uint32_t i = 0; i < cmd_buffer_count; i++) {
39 if (u_trace_has_points(&cmd_buffers[i]->trace)) {
40 utraces++;
41 if (!(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT))
42 *utrace_copies += list_length(&cmd_buffers[i]->trace.trace_chunks);
43 }
44 }
45
46 return utraces;
47 }
48
49 static void
anv_utrace_delete_flush_data(struct u_trace_context * utctx,void * flush_data)50 anv_utrace_delete_flush_data(struct u_trace_context *utctx,
51 void *flush_data)
52 {
53 struct anv_device *device =
54 container_of(utctx, struct anv_device, ds.trace_context);
55 struct anv_utrace_flush_copy *flush = flush_data;
56
57 intel_ds_flush_data_fini(&flush->ds);
58
59 if (flush->trace_bo) {
60 assert(flush->batch_bo);
61 anv_reloc_list_finish(&flush->relocs, &device->vk.alloc);
62 anv_device_release_bo(device, flush->batch_bo);
63 anv_device_release_bo(device, flush->trace_bo);
64 }
65
66 vk_sync_destroy(&device->vk, flush->sync);
67
68 vk_free(&device->vk.alloc, flush);
69 }
70
71 static void
anv_device_utrace_emit_copy_ts_buffer(struct u_trace_context * utctx,void * cmdstream,void * ts_from,uint32_t from_offset,void * ts_to,uint32_t to_offset,uint32_t count)72 anv_device_utrace_emit_copy_ts_buffer(struct u_trace_context *utctx,
73 void *cmdstream,
74 void *ts_from, uint32_t from_offset,
75 void *ts_to, uint32_t to_offset,
76 uint32_t count)
77 {
78 struct anv_device *device =
79 container_of(utctx, struct anv_device, ds.trace_context);
80 struct anv_utrace_flush_copy *flush = cmdstream;
81 struct anv_address from_addr = (struct anv_address) {
82 .bo = ts_from, .offset = from_offset * sizeof(uint64_t) };
83 struct anv_address to_addr = (struct anv_address) {
84 .bo = ts_to, .offset = to_offset * sizeof(uint64_t) };
85
86 anv_genX(&device->info, emit_so_memcpy)(&flush->memcpy_state,
87 to_addr, from_addr, count * sizeof(uint64_t));
88 }
89
90 VkResult
anv_device_utrace_flush_cmd_buffers(struct anv_queue * queue,uint32_t cmd_buffer_count,struct anv_cmd_buffer ** cmd_buffers,struct anv_utrace_flush_copy ** out_flush_data)91 anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
92 uint32_t cmd_buffer_count,
93 struct anv_cmd_buffer **cmd_buffers,
94 struct anv_utrace_flush_copy **out_flush_data)
95 {
96 struct anv_device *device = queue->device;
97 uint32_t utrace_copies = 0;
98 uint32_t utraces = command_buffers_count_utraces(device,
99 cmd_buffer_count,
100 cmd_buffers,
101 &utrace_copies);
102 if (!utraces) {
103 *out_flush_data = NULL;
104 return VK_SUCCESS;
105 }
106
107 VkResult result;
108 struct anv_utrace_flush_copy *flush =
109 vk_zalloc(&device->vk.alloc, sizeof(struct anv_utrace_flush_copy),
110 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
111 if (!flush)
112 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
113
114 intel_ds_flush_data_init(&flush->ds, queue->ds, queue->ds->submission_id);
115
116 result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type,
117 0, 0, &flush->sync);
118 if (result != VK_SUCCESS)
119 goto error_sync;
120
121 if (utrace_copies > 0) {
122 result =
123 anv_device_alloc_bo(device, "utrace-copy-buf", utrace_copies * 4096,
124 ANV_BO_ALLOC_MAPPED, 0 /* explicit_address */,
125 &flush->trace_bo);
126 if (result != VK_SUCCESS)
127 goto error_trace_buf;
128
129 result =
130 anv_device_alloc_bo(device, "utrace-copy-batch",
131 /* 128 dwords of setup + 64 dwords per copy */
132 align_u32(512 + 64 * utrace_copies, 4096),
133 ANV_BO_ALLOC_MAPPED, 0 /* explicit_address */,
134 &flush->batch_bo);
135 if (result != VK_SUCCESS)
136 goto error_batch_buf;
137
138 result = anv_reloc_list_init(&flush->relocs, &device->vk.alloc);
139 if (result != VK_SUCCESS)
140 goto error_reloc_list;
141
142 flush->batch.alloc = &device->vk.alloc;
143 flush->batch.relocs = &flush->relocs;
144 anv_batch_set_storage(&flush->batch,
145 (struct anv_address) { .bo = flush->batch_bo, },
146 flush->batch_bo->map, flush->batch_bo->size);
147
148 /* Emit the copies */
149 anv_genX(&device->info, emit_so_memcpy_init)(&flush->memcpy_state,
150 device,
151 &flush->batch);
152 for (uint32_t i = 0; i < cmd_buffer_count; i++) {
153 if (cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) {
154 u_trace_flush(&cmd_buffers[i]->trace, flush, false);
155 } else {
156 u_trace_clone_append(u_trace_begin_iterator(&cmd_buffers[i]->trace),
157 u_trace_end_iterator(&cmd_buffers[i]->trace),
158 &flush->ds.trace,
159 flush,
160 anv_device_utrace_emit_copy_ts_buffer);
161 }
162 }
163 anv_genX(&device->info, emit_so_memcpy_fini)(&flush->memcpy_state);
164
165 u_trace_flush(&flush->ds.trace, flush, true);
166
167 if (flush->batch.status != VK_SUCCESS) {
168 result = flush->batch.status;
169 goto error_batch;
170 }
171 } else {
172 for (uint32_t i = 0; i < cmd_buffer_count; i++) {
173 assert(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT);
174 u_trace_flush(&cmd_buffers[i]->trace, flush, i == (cmd_buffer_count - 1));
175 }
176 }
177
178 flush->queue = queue;
179
180 *out_flush_data = flush;
181
182 return VK_SUCCESS;
183
184 error_batch:
185 anv_reloc_list_finish(&flush->relocs, &device->vk.alloc);
186 error_reloc_list:
187 anv_device_release_bo(device, flush->batch_bo);
188 error_batch_buf:
189 anv_device_release_bo(device, flush->trace_bo);
190 error_trace_buf:
191 vk_sync_destroy(&device->vk, flush->sync);
192 error_sync:
193 vk_free(&device->vk.alloc, flush);
194 return result;
195 }
196
197 static void *
anv_utrace_create_ts_buffer(struct u_trace_context * utctx,uint32_t size_b)198 anv_utrace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size_b)
199 {
200 struct anv_device *device =
201 container_of(utctx, struct anv_device, ds.trace_context);
202
203 struct anv_bo *bo = NULL;
204 UNUSED VkResult result =
205 anv_device_alloc_bo(device, "utrace-ts", align_u32(size_b, 4096),
206 ANV_BO_ALLOC_MAPPED, 0, &bo);
207 assert(result == VK_SUCCESS);
208
209 return bo;
210 }
211
212 static void
anv_utrace_destroy_ts_buffer(struct u_trace_context * utctx,void * timestamps)213 anv_utrace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
214 {
215 struct anv_device *device =
216 container_of(utctx, struct anv_device, ds.trace_context);
217 struct anv_bo *bo = timestamps;
218
219 anv_device_release_bo(device, bo);
220 }
221
222 static void
anv_utrace_record_ts(struct u_trace * ut,void * cs,void * timestamps,unsigned idx,bool end_of_pipe)223 anv_utrace_record_ts(struct u_trace *ut, void *cs, void *timestamps, unsigned idx,
224 bool end_of_pipe)
225 {
226 struct anv_cmd_buffer *cmd_buffer = cs;
227 struct anv_device *device = cmd_buffer->device;
228 struct anv_bo *bo = timestamps;
229
230 device->physical->cmd_emit_timestamp(&cmd_buffer->batch, device,
231 (struct anv_address) {
232 .bo = bo,
233 .offset = idx * sizeof(uint64_t) },
234 end_of_pipe);
235 }
236
237 static uint64_t
anv_utrace_read_ts(struct u_trace_context * utctx,void * timestamps,unsigned idx,void * flush_data)238 anv_utrace_read_ts(struct u_trace_context *utctx,
239 void *timestamps, unsigned idx, void *flush_data)
240 {
241 struct anv_device *device =
242 container_of(utctx, struct anv_device, ds.trace_context);
243 struct anv_bo *bo = timestamps;
244 struct anv_utrace_flush_copy *flush = flush_data;
245
246 /* Only need to stall on results for the first entry: */
247 if (idx == 0) {
248 UNUSED VkResult result =
249 vk_sync_wait(&device->vk,
250 flush->sync,
251 0,
252 VK_SYNC_WAIT_COMPLETE,
253 os_time_get_absolute_timeout(OS_TIMEOUT_INFINITE));
254 assert(result == VK_SUCCESS);
255 }
256
257 uint64_t *ts = bo->map;
258
259 /* Don't translate the no-timestamp marker: */
260 if (ts[idx] == U_TRACE_NO_TIMESTAMP)
261 return U_TRACE_NO_TIMESTAMP;
262
263 return intel_device_info_timebase_scale(&device->info, ts[idx]);
264 }
265
266 static const char *
queue_family_to_name(const struct anv_queue_family * family)267 queue_family_to_name(const struct anv_queue_family *family)
268 {
269 switch (family->engine_class) {
270 case I915_ENGINE_CLASS_RENDER:
271 return "render";
272 case I915_ENGINE_CLASS_COPY:
273 return "copy";
274 case I915_ENGINE_CLASS_VIDEO:
275 return "video";
276 case I915_ENGINE_CLASS_VIDEO_ENHANCE:
277 return "video-enh";
278 default:
279 return "unknown";
280 }
281 }
282
283 void
anv_device_utrace_init(struct anv_device * device)284 anv_device_utrace_init(struct anv_device *device)
285 {
286 intel_ds_device_init(&device->ds, &device->info, device->fd,
287 device->physical->local_minor - 128,
288 INTEL_DS_API_VULKAN);
289 u_trace_context_init(&device->ds.trace_context,
290 &device->ds,
291 anv_utrace_create_ts_buffer,
292 anv_utrace_destroy_ts_buffer,
293 anv_utrace_record_ts,
294 anv_utrace_read_ts,
295 anv_utrace_delete_flush_data);
296
297 for (uint32_t q = 0; q < device->queue_count; q++) {
298 struct anv_queue *queue = &device->queues[q];
299
300 queue->ds =
301 intel_ds_device_add_queue(&device->ds, "%s%u",
302 queue_family_to_name(queue->family),
303 queue->index_in_family);
304 }
305 }
306
307 void
anv_device_utrace_finish(struct anv_device * device)308 anv_device_utrace_finish(struct anv_device *device)
309 {
310 u_trace_context_process(&device->ds.trace_context, true);
311 intel_ds_device_fini(&device->ds);
312 }
313
314 enum intel_ds_stall_flag
anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits)315 anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits)
316 {
317 static const struct {
318 enum anv_pipe_bits anv;
319 enum intel_ds_stall_flag ds;
320 } anv_to_ds_flags[] = {
321 { .anv = ANV_PIPE_DEPTH_CACHE_FLUSH_BIT, .ds = INTEL_DS_DEPTH_CACHE_FLUSH_BIT, },
322 { .anv = ANV_PIPE_DATA_CACHE_FLUSH_BIT, .ds = INTEL_DS_DATA_CACHE_FLUSH_BIT, },
323 { .anv = ANV_PIPE_TILE_CACHE_FLUSH_BIT, .ds = INTEL_DS_TILE_CACHE_FLUSH_BIT, },
324 { .anv = ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT, .ds = INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT, },
325 { .anv = ANV_PIPE_STATE_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_STATE_CACHE_INVALIDATE_BIT, },
326 { .anv = ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_CONST_CACHE_INVALIDATE_BIT, },
327 { .anv = ANV_PIPE_VF_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_VF_CACHE_INVALIDATE_BIT, },
328 { .anv = ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT, },
329 { .anv = ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_INST_CACHE_INVALIDATE_BIT, },
330 { .anv = ANV_PIPE_DEPTH_STALL_BIT, .ds = INTEL_DS_DEPTH_STALL_BIT, },
331 { .anv = ANV_PIPE_CS_STALL_BIT, .ds = INTEL_DS_CS_STALL_BIT, },
332 { .anv = ANV_PIPE_HDC_PIPELINE_FLUSH_BIT, .ds = INTEL_DS_HDC_PIPELINE_FLUSH_BIT, },
333 { .anv = ANV_PIPE_STALL_AT_SCOREBOARD_BIT, .ds = INTEL_DS_STALL_AT_SCOREBOARD_BIT, },
334 };
335
336 enum intel_ds_stall_flag ret = 0;
337 for (uint32_t i = 0; i < ARRAY_SIZE(anv_to_ds_flags); i++) {
338 if (anv_to_ds_flags[i].anv & bits)
339 ret |= anv_to_ds_flags[i].ds;
340 }
341
342 return ret;
343 }
344