1 #include "common/common.h"
2 #include "common/msg.h"
3
4 #include "ra_pl.h"
5 #include "utils.h"
6
7 struct ra_pl {
8 const struct pl_gpu *gpu;
9 struct ra_timer_pl *active_timer;
10 };
11
get_gpu(const struct ra * ra)12 static inline const struct pl_gpu *get_gpu(const struct ra *ra)
13 {
14 struct ra_pl *p = ra->priv;
15 return p->gpu;
16 }
17
18 static struct ra_fns ra_fns_pl;
19
ra_pl_get(const struct ra * ra)20 const struct pl_gpu *ra_pl_get(const struct ra *ra)
21 {
22 return ra->fns == &ra_fns_pl ? get_gpu(ra) : NULL;
23 }
24
25 static struct pl_timer *get_active_timer(const struct ra *ra);
26
ra_create_pl(const struct pl_gpu * gpu,struct mp_log * log)27 struct ra *ra_create_pl(const struct pl_gpu *gpu, struct mp_log *log)
28 {
29 assert(gpu);
30
31 struct ra *ra = talloc_zero(NULL, struct ra);
32 ra->log = log;
33 ra->fns = &ra_fns_pl;
34
35 struct ra_pl *p = ra->priv = talloc_zero(ra, struct ra_pl);
36 p->gpu = gpu;
37
38 ra->glsl_version = gpu->glsl.version;
39 ra->glsl_vulkan = gpu->glsl.vulkan;
40 ra->glsl_es = gpu->glsl.gles;
41
42 ra->caps = RA_CAP_DIRECT_UPLOAD | RA_CAP_NESTED_ARRAY | RA_CAP_FRAGCOORD;
43
44 if (gpu->caps & PL_GPU_CAP_COMPUTE)
45 ra->caps |= RA_CAP_COMPUTE | RA_CAP_NUM_GROUPS;
46 if (gpu->caps & PL_GPU_CAP_PARALLEL_COMPUTE)
47 ra->caps |= RA_CAP_PARALLEL_COMPUTE;
48 if (gpu->caps & PL_GPU_CAP_INPUT_VARIABLES)
49 ra->caps |= RA_CAP_GLOBAL_UNIFORM;
50
51 if (gpu->limits.max_tex_1d_dim)
52 ra->caps |= RA_CAP_TEX_1D;
53 if (gpu->limits.max_tex_3d_dim)
54 ra->caps |= RA_CAP_TEX_3D;
55 if (gpu->limits.max_ubo_size)
56 ra->caps |= RA_CAP_BUF_RO;
57 if (gpu->limits.max_ssbo_size)
58 ra->caps |= RA_CAP_BUF_RW;
59 if (gpu->limits.min_gather_offset && gpu->limits.max_gather_offset)
60 ra->caps |= RA_CAP_GATHER;
61
62 // Semi-hack: assume all textures are blittable if r8 is
63 const struct pl_fmt *r8 = pl_find_named_fmt(gpu, "r8");
64 if (r8->caps & PL_FMT_CAP_BLITTABLE)
65 ra->caps |= RA_CAP_BLIT;
66
67 ra->max_texture_wh = gpu->limits.max_tex_2d_dim;
68 ra->max_shmem = gpu->limits.max_shmem_size;
69 ra->max_pushc_size = gpu->limits.max_pushc_size;
70
71 // Set up format wrappers
72 for (int i = 0; i < gpu->num_formats; i++) {
73 const struct pl_fmt *plfmt = gpu->formats[i];
74 static const enum ra_ctype fmt_type_map[PL_FMT_TYPE_COUNT] = {
75 [PL_FMT_UNORM] = RA_CTYPE_UNORM,
76 [PL_FMT_UINT] = RA_CTYPE_UINT,
77 [PL_FMT_FLOAT] = RA_CTYPE_FLOAT,
78 };
79
80 enum ra_ctype type = fmt_type_map[plfmt->type];
81 if (!type || !(plfmt->caps & PL_FMT_CAP_SAMPLEABLE))
82 continue;
83
84 struct ra_format *rafmt = talloc_zero(ra, struct ra_format);
85 *rafmt = (struct ra_format) {
86 .name = plfmt->name,
87 .priv = (void *) plfmt,
88 .ctype = type,
89 .ordered = pl_fmt_is_ordered(plfmt),
90 .num_components = plfmt->num_components,
91 .pixel_size = plfmt->texel_size,
92 .linear_filter = plfmt->caps & PL_FMT_CAP_LINEAR,
93 .renderable = plfmt->caps & PL_FMT_CAP_RENDERABLE,
94 .storable = plfmt->caps & PL_FMT_CAP_STORABLE,
95 .glsl_format = plfmt->glsl_format,
96 };
97
98 for (int c = 0; c < plfmt->num_components; c++) {
99 rafmt->component_size[c] = plfmt->host_bits[c];
100 rafmt->component_depth[c] = plfmt->component_depth[c];
101 }
102
103 MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, rafmt);
104 }
105
106 return ra;
107 }
108
destroy_ra_pl(struct ra * ra)109 static void destroy_ra_pl(struct ra *ra)
110 {
111 talloc_free(ra);
112 }
113
map_fmt(struct ra * ra,const struct pl_fmt * plfmt)114 static struct ra_format *map_fmt(struct ra *ra, const struct pl_fmt *plfmt)
115 {
116 for (int i = 0; i < ra->num_formats; i++) {
117 if (ra->formats[i]->priv == plfmt)
118 return ra->formats[i];
119 }
120
121 MP_ERR(ra, "Failed mapping pl_fmt '%s' to ra_fmt?\n", plfmt->name);
122 return NULL;
123 }
124
mppl_wrap_tex(struct ra * ra,const struct pl_tex * pltex,struct ra_tex * out_tex)125 bool mppl_wrap_tex(struct ra *ra, const struct pl_tex *pltex,
126 struct ra_tex *out_tex)
127 {
128 if (!pltex)
129 return false;
130
131 *out_tex = (struct ra_tex) {
132 .params = {
133 .dimensions = pl_tex_params_dimension(pltex->params),
134 .w = pltex->params.w,
135 .h = pltex->params.h,
136 .d = pltex->params.d,
137 .format = map_fmt(ra, pltex->params.format),
138 .render_src = pltex->params.sampleable,
139 .render_dst = pltex->params.renderable,
140 .storage_dst = pltex->params.storable,
141 .blit_src = pltex->params.blit_src,
142 .blit_dst = pltex->params.blit_dst,
143 .host_mutable = pltex->params.host_writable,
144 .downloadable = pltex->params.host_readable,
145 // These don't exist upstream, so just pick something reasonable
146 .src_linear = pltex->params.format->caps & PL_FMT_CAP_LINEAR,
147 .src_repeat = false,
148 },
149 .priv = (void *) pltex,
150 };
151
152 return !!out_tex->params.format;
153 }
154
tex_create_pl(struct ra * ra,const struct ra_tex_params * params)155 static struct ra_tex *tex_create_pl(struct ra *ra,
156 const struct ra_tex_params *params)
157 {
158 const struct pl_gpu *gpu = get_gpu(ra);
159 const struct pl_tex *pltex = pl_tex_create(gpu, &(struct pl_tex_params) {
160 .w = params->w,
161 .h = params->dimensions >= 2 ? params->h : 0,
162 .d = params->dimensions >= 3 ? params->d : 0,
163 .format = params->format->priv,
164 .sampleable = params->render_src,
165 .renderable = params->render_dst,
166 .storable = params->storage_dst,
167 .blit_src = params->blit_src,
168 .blit_dst = params->blit_dst || params->render_dst,
169 .host_writable = params->host_mutable,
170 .host_readable = params->downloadable,
171 .initial_data = params->initial_data,
172 });
173
174 struct ra_tex *ratex = talloc_ptrtype(NULL, ratex);
175 if (!mppl_wrap_tex(ra, pltex, ratex)) {
176 pl_tex_destroy(gpu, &pltex);
177 talloc_free(ratex);
178 return NULL;
179 }
180
181 // Keep track of these, so we can correctly bind them later
182 ratex->params.src_repeat = params->src_repeat;
183 ratex->params.src_linear = params->src_linear;
184
185 return ratex;
186 }
187
tex_destroy_pl(struct ra * ra,struct ra_tex * tex)188 static void tex_destroy_pl(struct ra *ra, struct ra_tex *tex)
189 {
190 if (!tex)
191 return;
192
193 pl_tex_destroy(get_gpu(ra), (const struct pl_tex **) &tex->priv);
194 talloc_free(tex);
195 }
196
tex_upload_pl(struct ra * ra,const struct ra_tex_upload_params * params)197 static bool tex_upload_pl(struct ra *ra, const struct ra_tex_upload_params *params)
198 {
199 const struct pl_gpu *gpu = get_gpu(ra);
200 const struct pl_tex *tex = params->tex->priv;
201 struct pl_tex_transfer_params pl_params = {
202 .tex = tex,
203 .buf = params->buf ? params->buf->priv : NULL,
204 .buf_offset = params->buf_offset,
205 .ptr = (void *) params->src,
206 .timer = get_active_timer(ra),
207 };
208
209 const struct pl_buf *staging = NULL;
210 if (params->tex->params.dimensions == 2) {
211 if (params->rc) {
212 pl_params.rc = (struct pl_rect3d) {
213 .x0 = params->rc->x0, .x1 = params->rc->x1,
214 .y0 = params->rc->y0, .y1 = params->rc->y1,
215 };
216 }
217
218 #if PL_API_VER >= 168
219 pl_params.row_pitch = params->stride;
220 #else
221 // Older libplacebo uses texel-sized strides, so we have to manually
222 // compensate for possibly misaligned sources (typically rgb24).
223 size_t texel_size = tex->params.format->texel_size;
224 pl_params.stride_w = params->stride / texel_size;
225 size_t stride = pl_params.stride_w * texel_size;
226
227 if (stride != params->stride) {
228 // Fall back to uploading via a staging buffer prepared in CPU
229 int lines = params->rc ? pl_rect_h(pl_params.rc) : tex->params.h;
230 staging = pl_buf_create(gpu, &(struct pl_buf_params) {
231 .size = lines * stride,
232 .memory_type = PL_BUF_MEM_HOST,
233 .host_mapped = true,
234 });
235 if (!staging)
236 return false;
237
238 const uint8_t *src = params->buf ? params->buf->data : params->src;
239 assert(src);
240 for (int y = 0; y < lines; y++)
241 memcpy(staging->data + y * stride, src + y * params->stride, stride);
242
243 pl_params.ptr = NULL;
244 pl_params.buf = staging;
245 pl_params.buf_offset = 0;
246 }
247 #endif
248 }
249
250 bool ok = pl_tex_upload(gpu, &pl_params);
251 pl_buf_destroy(gpu, &staging);
252 return ok;
253 }
254
tex_download_pl(struct ra * ra,struct ra_tex_download_params * params)255 static bool tex_download_pl(struct ra *ra, struct ra_tex_download_params *params)
256 {
257 const struct pl_tex *tex = params->tex->priv;
258 struct pl_tex_transfer_params pl_params = {
259 .tex = tex,
260 .ptr = params->dst,
261 .timer = get_active_timer(ra),
262 };
263
264 #if PL_API_VER >= 168
265 pl_params.row_pitch = params->stride;
266 return pl_tex_download(get_gpu(ra), &pl_params);
267 #else
268 size_t texel_size = tex->params.format->texel_size;
269 pl_params.stride_w = params->stride / texel_size;
270 size_t stride = pl_params.stride_w * texel_size;
271 uint8_t *staging = NULL;
272 if (stride != params->stride) {
273 staging = talloc_size(NULL, tex->params.h * stride);
274 pl_params.ptr = staging;
275 }
276
277 bool ok = pl_tex_download(get_gpu(ra), &pl_params);
278 if (ok && staging) {
279 for (int y = 0; y < tex->params.h; y++) {
280 memcpy((uint8_t *) params->dst + y * params->stride,
281 staging + y * stride,
282 stride);
283 }
284 }
285
286 talloc_free(staging);
287 return ok;
288 #endif
289 }
290
buf_create_pl(struct ra * ra,const struct ra_buf_params * params)291 static struct ra_buf *buf_create_pl(struct ra *ra,
292 const struct ra_buf_params *params)
293 {
294 const struct pl_buf *plbuf = pl_buf_create(get_gpu(ra), &(struct pl_buf_params) {
295 .size = params->size,
296 .uniform = params->type == RA_BUF_TYPE_UNIFORM,
297 .storable = params->type == RA_BUF_TYPE_SHADER_STORAGE,
298 .host_mapped = params->host_mapped,
299 .host_writable = params->host_mutable,
300 .initial_data = params->initial_data,
301 });
302
303 if (!plbuf)
304 return NULL;
305
306 struct ra_buf *rabuf = talloc_ptrtype(NULL, rabuf);
307 *rabuf = (struct ra_buf) {
308 .params = *params,
309 .data = plbuf->data,
310 .priv = (void *) plbuf,
311 };
312
313 rabuf->params.initial_data = NULL;
314 return rabuf;
315 }
316
buf_destroy_pl(struct ra * ra,struct ra_buf * buf)317 static void buf_destroy_pl(struct ra *ra, struct ra_buf *buf)
318 {
319 if (!buf)
320 return;
321
322 pl_buf_destroy(get_gpu(ra), (const struct pl_buf **) &buf->priv);
323 talloc_free(buf);
324 }
325
buf_update_pl(struct ra * ra,struct ra_buf * buf,ptrdiff_t offset,const void * data,size_t size)326 static void buf_update_pl(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset,
327 const void *data, size_t size)
328 {
329 pl_buf_write(get_gpu(ra), buf->priv, offset, data, size);
330 }
331
buf_poll_pl(struct ra * ra,struct ra_buf * buf)332 static bool buf_poll_pl(struct ra *ra, struct ra_buf *buf)
333 {
334 return !pl_buf_poll(get_gpu(ra), buf->priv, 0);
335 }
336
clear_pl(struct ra * ra,struct ra_tex * dst,float color[4],struct mp_rect * scissor)337 static void clear_pl(struct ra *ra, struct ra_tex *dst, float color[4],
338 struct mp_rect *scissor)
339 {
340 // TODO: implement scissor clearing by bltting a 1x1 tex instead
341 pl_tex_clear(get_gpu(ra), dst->priv, color);
342 }
343
blit_pl(struct ra * ra,struct ra_tex * dst,struct ra_tex * src,struct mp_rect * dst_rc,struct mp_rect * src_rc)344 static void blit_pl(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
345 struct mp_rect *dst_rc, struct mp_rect *src_rc)
346 {
347 struct pl_rect3d plsrc = {0}, pldst = {0};
348 if (src_rc) {
349 plsrc.x0 = MPMIN(MPMAX(src_rc->x0, 0), src->params.w);
350 plsrc.y0 = MPMIN(MPMAX(src_rc->y0, 0), src->params.h);
351 plsrc.x1 = MPMIN(MPMAX(src_rc->x1, 0), src->params.w);
352 plsrc.y1 = MPMIN(MPMAX(src_rc->y1, 0), src->params.h);
353 }
354
355 if (dst_rc) {
356 pldst.x0 = MPMIN(MPMAX(dst_rc->x0, 0), dst->params.w);
357 pldst.y0 = MPMIN(MPMAX(dst_rc->y0, 0), dst->params.h);
358 pldst.x1 = MPMIN(MPMAX(dst_rc->x1, 0), dst->params.w);
359 pldst.y1 = MPMIN(MPMAX(dst_rc->y1, 0), dst->params.h);
360 }
361
362 pl_tex_blit(get_gpu(ra), &(struct pl_tex_blit_params) {
363 .src = src->priv,
364 .dst = dst->priv,
365 .src_rc = plsrc,
366 .dst_rc = pldst,
367 .sample_mode = src->params.src_linear ? PL_TEX_SAMPLE_LINEAR
368 : PL_TEX_SAMPLE_NEAREST,
369 });
370 }
371
372 static const enum pl_var_type var_type[RA_VARTYPE_COUNT] = {
373 [RA_VARTYPE_INT] = PL_VAR_SINT,
374 [RA_VARTYPE_FLOAT] = PL_VAR_FLOAT,
375 };
376
377 static const enum pl_desc_type desc_type[RA_VARTYPE_COUNT] = {
378 [RA_VARTYPE_TEX] = PL_DESC_SAMPLED_TEX,
379 [RA_VARTYPE_IMG_W] = PL_DESC_STORAGE_IMG,
380 [RA_VARTYPE_BUF_RO] = PL_DESC_BUF_UNIFORM,
381 [RA_VARTYPE_BUF_RW] = PL_DESC_BUF_STORAGE,
382 };
383
384 static const enum pl_fmt_type fmt_type[RA_VARTYPE_COUNT] = {
385 [RA_VARTYPE_INT] = PL_FMT_SINT,
386 [RA_VARTYPE_FLOAT] = PL_FMT_FLOAT,
387 [RA_VARTYPE_BYTE_UNORM] = PL_FMT_UNORM,
388 };
389
390 static const size_t var_size[RA_VARTYPE_COUNT] = {
391 [RA_VARTYPE_INT] = sizeof(int),
392 [RA_VARTYPE_FLOAT] = sizeof(float),
393 [RA_VARTYPE_BYTE_UNORM] = sizeof(uint8_t),
394 };
395
uniform_layout_pl(struct ra_renderpass_input * inp)396 static struct ra_layout uniform_layout_pl(struct ra_renderpass_input *inp)
397 {
398 // To get the alignment requirements, we try laying this out with
399 // an offset of 1 and then see where it ends up. This will always be
400 // the minimum alignment requirement.
401 struct pl_var_layout layout = pl_buf_uniform_layout(1, &(struct pl_var) {
402 .name = inp->name,
403 .type = var_type[inp->type],
404 .dim_v = inp->dim_v,
405 .dim_m = inp->dim_m,
406 .dim_a = 1,
407 });
408
409 return (struct ra_layout) {
410 .align = layout.offset,
411 .stride = layout.stride,
412 .size = layout.size,
413 };
414 }
415
push_constant_layout_pl(struct ra_renderpass_input * inp)416 static struct ra_layout push_constant_layout_pl(struct ra_renderpass_input *inp)
417 {
418 struct pl_var_layout layout = pl_push_constant_layout(1, &(struct pl_var) {
419 .name = inp->name,
420 .type = var_type[inp->type],
421 .dim_v = inp->dim_v,
422 .dim_m = inp->dim_m,
423 .dim_a = 1,
424 });
425
426 return (struct ra_layout) {
427 .align = layout.offset,
428 .stride = layout.stride,
429 .size = layout.size,
430 };
431 }
432
desc_namespace_pl(struct ra * ra,enum ra_vartype type)433 static int desc_namespace_pl(struct ra *ra, enum ra_vartype type)
434 {
435 return pl_desc_namespace(get_gpu(ra), desc_type[type]);
436 }
437
438 struct pass_priv {
439 const struct pl_pass *pl_pass;
440 uint16_t *inp_index; // index translation map
441 // Space to hold the descriptor bindings and variable updates
442 struct pl_desc_binding *binds;
443 struct pl_var_update *varups;
444 int num_varups;
445 };
446
renderpass_create_pl(struct ra * ra,const struct ra_renderpass_params * params)447 static struct ra_renderpass *renderpass_create_pl(struct ra *ra,
448 const struct ra_renderpass_params *params)
449 {
450 void *tmp = talloc_new(NULL);
451 const struct pl_gpu *gpu = get_gpu(ra);
452 struct ra_renderpass *pass = NULL;
453
454 static const enum pl_pass_type pass_type[] = {
455 [RA_RENDERPASS_TYPE_RASTER] = PL_PASS_RASTER,
456 [RA_RENDERPASS_TYPE_COMPUTE] = PL_PASS_COMPUTE,
457 };
458
459 struct pl_var *vars = NULL;
460 struct pl_desc *descs = NULL;
461 int num_vars = 0, num_descs = 0;
462
463 struct pass_priv *priv = talloc_ptrtype(tmp, priv);
464 priv->inp_index = talloc_zero_array(priv, uint16_t, params->num_inputs);
465
466 for (int i = 0; i < params->num_inputs; i++) {
467 const struct ra_renderpass_input *inp = ¶ms->inputs[i];
468 if (var_type[inp->type]) {
469 priv->inp_index[i] = num_vars;
470 MP_TARRAY_APPEND(tmp, vars, num_vars, (struct pl_var) {
471 .name = inp->name,
472 .type = var_type[inp->type],
473 .dim_v = inp->dim_v,
474 .dim_m = inp->dim_m,
475 .dim_a = 1,
476 });
477 } else if (desc_type[inp->type]) {
478 priv->inp_index[i] = num_descs;
479 MP_TARRAY_APPEND(tmp, descs, num_descs, (struct pl_desc) {
480 .name = inp->name,
481 .type = desc_type[inp->type],
482 .binding = inp->binding,
483 .access = inp->type == RA_VARTYPE_IMG_W ? PL_DESC_ACCESS_WRITEONLY
484 : inp->type == RA_VARTYPE_BUF_RW ? PL_DESC_ACCESS_READWRITE
485 : PL_DESC_ACCESS_READONLY,
486 });
487 }
488 }
489
490 // Allocate space to store the bindings map persistently
491 priv->binds = talloc_zero_array(priv, struct pl_desc_binding, num_descs);
492
493 struct pl_pass_params pl_params = {
494 .type = pass_type[params->type],
495 .variables = vars,
496 .num_variables = num_vars,
497 .descriptors = descs,
498 .num_descriptors = num_descs,
499 .push_constants_size = params->push_constants_size,
500 .glsl_shader = params->type == RA_RENDERPASS_TYPE_COMPUTE
501 ? params->compute_shader
502 : params->frag_shader,
503 .cached_program = params->cached_program.start,
504 .cached_program_len = params->cached_program.len,
505 };
506
507 struct pl_blend_params blend_params;
508
509 if (params->type == RA_RENDERPASS_TYPE_RASTER) {
510 pl_params.vertex_shader = params->vertex_shader;
511 pl_params.vertex_type = PL_PRIM_TRIANGLE_LIST;
512 pl_params.vertex_stride = params->vertex_stride;
513 pl_params.target_dummy.params.format = params->target_format->priv;
514 pl_params.load_target = !params->invalidate_target;
515
516 if (params->enable_blend) {
517 pl_params.blend_params = &blend_params;
518 blend_params = (struct pl_blend_params) {
519 // Same enum order as ra_blend
520 .src_rgb = (enum ra_blend) params->blend_src_rgb,
521 .dst_rgb = (enum ra_blend) params->blend_dst_rgb,
522 .src_alpha = (enum ra_blend) params->blend_src_alpha,
523 .dst_alpha = (enum ra_blend) params->blend_dst_alpha,
524 };
525 }
526
527 for (int i = 0; i < params->num_vertex_attribs; i++) {
528 const struct ra_renderpass_input *inp = ¶ms->vertex_attribs[i];
529 struct pl_vertex_attrib attrib = {
530 .name = inp->name,
531 .offset = inp->offset,
532 .location = i,
533 .fmt = pl_find_fmt(gpu, fmt_type[inp->type], inp->dim_v, 0,
534 var_size[inp->type] * 8, PL_FMT_CAP_VERTEX),
535 };
536
537 if (!attrib.fmt) {
538 MP_ERR(ra, "Failed mapping vertex attrib '%s' to pl_fmt?\n",
539 inp->name);
540 goto error;
541 }
542
543 MP_TARRAY_APPEND(tmp, pl_params.vertex_attribs,
544 pl_params.num_vertex_attribs, attrib);
545 }
546 }
547
548 priv->pl_pass = pl_pass_create(gpu, &pl_params);
549 if (!priv->pl_pass)
550 goto error;
551
552 pass = talloc_ptrtype(NULL, pass);
553 *pass = (struct ra_renderpass) {
554 .params = *ra_renderpass_params_copy(pass, params),
555 .priv = talloc_steal(pass, priv),
556 };
557
558 pass->params.cached_program = (struct bstr) {
559 .start = (void *) priv->pl_pass->params.cached_program,
560 .len = priv->pl_pass->params.cached_program_len,
561 };
562
563 // fall through
564 error:
565 talloc_free(tmp);
566 return pass;
567 }
568
renderpass_destroy_pl(struct ra * ra,struct ra_renderpass * pass)569 static void renderpass_destroy_pl(struct ra *ra, struct ra_renderpass *pass)
570 {
571 if (!pass)
572 return;
573
574 struct pass_priv *priv = pass->priv;
575 pl_pass_destroy(get_gpu(ra), (const struct pl_pass **) &priv->pl_pass);
576 talloc_free(pass);
577 }
578
renderpass_run_pl(struct ra * ra,const struct ra_renderpass_run_params * params)579 static void renderpass_run_pl(struct ra *ra,
580 const struct ra_renderpass_run_params *params)
581 {
582 struct pass_priv *p = params->pass->priv;
583 p->num_varups = 0;
584
585 for (int i = 0; i < params->num_values; i++) {
586 const struct ra_renderpass_input_val *val = ¶ms->values[i];
587 const struct ra_renderpass_input *inp = ¶ms->pass->params.inputs[i];
588 if (var_type[inp->type]) {
589 MP_TARRAY_APPEND(p, p->varups, p->num_varups, (struct pl_var_update) {
590 .index = p->inp_index[val->index],
591 .data = val->data,
592 });
593 } else {
594 struct pl_desc_binding bind;
595 switch (inp->type) {
596 case RA_VARTYPE_TEX:
597 case RA_VARTYPE_IMG_W: {
598 struct ra_tex *tex = *((struct ra_tex **) val->data);
599 bind.object = tex->priv;
600 bind.sample_mode = tex->params.src_linear ? PL_TEX_SAMPLE_LINEAR
601 : PL_TEX_SAMPLE_NEAREST;
602 bind.address_mode = tex->params.src_repeat ? PL_TEX_ADDRESS_REPEAT
603 : PL_TEX_ADDRESS_CLAMP;
604 break;
605 }
606 case RA_VARTYPE_BUF_RO:
607 case RA_VARTYPE_BUF_RW:
608 bind.object = (* (struct ra_buf **) val->data)->priv;
609 break;
610 default: abort();
611 };
612
613 p->binds[p->inp_index[val->index]] = bind;
614 };
615 }
616
617 struct pl_pass_run_params pl_params = {
618 .pass = p->pl_pass,
619 .var_updates = p->varups,
620 .num_var_updates = p->num_varups,
621 .desc_bindings = p->binds,
622 .push_constants = params->push_constants,
623 .timer = get_active_timer(ra),
624 };
625
626 if (p->pl_pass->params.type == PL_PASS_RASTER) {
627 pl_params.target = params->target->priv;
628 pl_params.viewport = mp_rect2d_to_pl(params->viewport);
629 pl_params.scissors = mp_rect2d_to_pl(params->scissors);
630 pl_params.vertex_data = params->vertex_data;
631 pl_params.vertex_count = params->vertex_count;
632 } else {
633 for (int i = 0; i < MP_ARRAY_SIZE(pl_params.compute_groups); i++)
634 pl_params.compute_groups[i] = params->compute_groups[i];
635 }
636
637 pl_pass_run(get_gpu(ra), &pl_params);
638 }
639
640 struct ra_timer_pl {
641 // Because libpplacebo only supports one operation per timer, we need
642 // to use multiple pl_timers to sum up multiple passes/transfers
643 struct pl_timer **timers;
644 int num_timers;
645 int idx_timers;
646 };
647
timer_create_pl(struct ra * ra)648 static ra_timer *timer_create_pl(struct ra *ra)
649 {
650 struct ra_timer_pl *t = talloc_zero(ra, struct ra_timer_pl);
651 return t;
652 }
653
timer_destroy_pl(struct ra * ra,ra_timer * timer)654 static void timer_destroy_pl(struct ra *ra, ra_timer *timer)
655 {
656 const struct pl_gpu *gpu = get_gpu(ra);
657 struct ra_timer_pl *t = timer;
658
659 for (int i = 0; i < t->num_timers; i++)
660 pl_timer_destroy(gpu, &t->timers[i]);
661
662 talloc_free(t);
663 }
664
timer_start_pl(struct ra * ra,ra_timer * timer)665 static void timer_start_pl(struct ra *ra, ra_timer *timer)
666 {
667 struct ra_pl *p = ra->priv;
668 struct ra_timer_pl *t = timer;
669
670 // There's nothing easy we can do in this case, since libplacebo only
671 // supports one timer object per operation; so just ignore "inner" timers
672 // when the user is nesting different timer queries
673 if (p->active_timer)
674 return;
675
676 p->active_timer = t;
677 t->idx_timers = 0;
678 }
679
timer_stop_pl(struct ra * ra,ra_timer * timer)680 static uint64_t timer_stop_pl(struct ra *ra, ra_timer *timer)
681 {
682 struct ra_pl *p = ra->priv;
683 struct ra_timer_pl *t = timer;
684
685 if (p->active_timer != t)
686 return 0;
687
688 p->active_timer = NULL;
689
690 // Sum up all of the active results
691 uint64_t res = 0;
692 for (int i = 0; i < t->idx_timers; i++)
693 res += pl_timer_query(p->gpu, t->timers[i]);
694
695 return res;
696 }
697
get_active_timer(const struct ra * ra)698 static struct pl_timer *get_active_timer(const struct ra *ra)
699 {
700 struct ra_pl *p = ra->priv;
701 if (!p->active_timer)
702 return NULL;
703
704 struct ra_timer_pl *t = p->active_timer;
705 if (t->idx_timers == t->num_timers)
706 MP_TARRAY_APPEND(t, t->timers, t->num_timers, pl_timer_create(p->gpu));
707
708 return t->timers[t->idx_timers++];
709 }
710
711 static struct ra_fns ra_fns_pl = {
712 .destroy = destroy_ra_pl,
713 .tex_create = tex_create_pl,
714 .tex_destroy = tex_destroy_pl,
715 .tex_upload = tex_upload_pl,
716 .tex_download = tex_download_pl,
717 .buf_create = buf_create_pl,
718 .buf_destroy = buf_destroy_pl,
719 .buf_update = buf_update_pl,
720 .buf_poll = buf_poll_pl,
721 .clear = clear_pl,
722 .blit = blit_pl,
723 .uniform_layout = uniform_layout_pl,
724 .push_constant_layout = push_constant_layout_pl,
725 .desc_namespace = desc_namespace_pl,
726 .renderpass_create = renderpass_create_pl,
727 .renderpass_destroy = renderpass_destroy_pl,
728 .renderpass_run = renderpass_run_pl,
729 .timer_create = timer_create_pl,
730 .timer_destroy = timer_destroy_pl,
731 .timer_start = timer_start_pl,
732 .timer_stop = timer_stop_pl,
733 };
734
735