1 /*
2 * This file is part of libplacebo.
3 *
4 * libplacebo is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * libplacebo is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include <math.h>
19
20 #include "common.h"
21 #include "log.h"
22 #include "shaders.h"
23 #include "gpu.h"
24
25 #define require(expr) \
26 do { \
27 if (!(expr)) { \
28 PL_ERR(gpu, "Validation failed: %s (%s:%d)", \
29 #expr, __FILE__, __LINE__); \
30 goto error; \
31 } \
32 } while (0)
33
pl_optimal_transfer_stride(pl_gpu gpu,int dimension)34 int pl_optimal_transfer_stride(pl_gpu gpu, int dimension)
35 {
36 return PL_ALIGN2(dimension, gpu->limits.align_tex_xfer_stride);
37 }
38
pl_gpu_destroy(pl_gpu gpu)39 void pl_gpu_destroy(pl_gpu gpu)
40 {
41 if (!gpu)
42 return;
43
44 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
45 impl->destroy(gpu);
46 }
47
pl_fmt_is_ordered(pl_fmt fmt)48 bool pl_fmt_is_ordered(pl_fmt fmt)
49 {
50 bool ret = !fmt->opaque;
51 for (int i = 0; i < fmt->num_components; i++)
52 ret &= fmt->sample_order[i] == i;
53 return ret;
54 }
55
pl_fmt_is_float(pl_fmt fmt)56 bool pl_fmt_is_float(pl_fmt fmt)
57 {
58 switch (fmt->type) {
59 case PL_FMT_UNKNOWN: // more likely than not
60 case PL_FMT_FLOAT:
61 case PL_FMT_UNORM:
62 case PL_FMT_SNORM:
63 return true;
64
65 case PL_FMT_UINT:
66 case PL_FMT_SINT:
67 return false;
68
69 case PL_FMT_TYPE_COUNT:
70 break;
71 }
72
73 pl_unreachable();
74 }
75
cmp_fmt(const void * pa,const void * pb)76 static int cmp_fmt(const void *pa, const void *pb)
77 {
78 pl_fmt a = *(pl_fmt *)pa;
79 pl_fmt b = *(pl_fmt *)pb;
80
81 // Always prefer non-opaque formats
82 if (a->opaque != b->opaque)
83 return PL_CMP(a->opaque, b->opaque);
84
85 // Always prefer non-emulated formats
86 if (a->emulated != b->emulated)
87 return PL_CMP(a->emulated, b->emulated);
88
89 int ca = __builtin_popcount(a->caps),
90 cb = __builtin_popcount(b->caps);
91 if (ca != cb)
92 return -PL_CMP(ca, cb); // invert to sort higher values first
93
94 // If the population count is the same but the caps are different, prefer
95 // the caps with a "lower" value (which tend to be more fundamental caps)
96 if (a->caps != b->caps)
97 return PL_CMP(a->caps, b->caps);
98
99 // If the capabilities are equal, sort based on the component attributes
100 for (int i = 0; i < PL_ARRAY_SIZE(a->component_depth); i++) {
101 int da = a->component_depth[i],
102 db = b->component_depth[i];
103 if (da != db)
104 return PL_CMP(da, db);
105
106 int ha = a->host_bits[i],
107 hb = b->host_bits[i];
108 if (ha != hb)
109 return PL_CMP(ha, hb);
110
111 int oa = a->sample_order[i],
112 ob = b->sample_order[i];
113 if (oa != ob)
114 return PL_CMP(oa, ob);
115 }
116
117 // Fall back to sorting by the name (for stability)
118 return strcmp(a->name, b->name);
119 }
120
121 #define FMT_BOOL(letter, cap) ((cap) ? (letter) : '-')
122 #define FMT_IDX4(f) (f)[0], (f)[1], (f)[2], (f)[3]
123
print_formats(pl_gpu gpu)124 static void print_formats(pl_gpu gpu)
125 {
126 if (!pl_msg_test(gpu->log, PL_LOG_DEBUG))
127 return;
128
129 #define CAP_HEADER "%-12s"
130 #define CAP_FIELDS "%c%c%c%c%c%c%c%c%c%c%c%c"
131 #define CAP_VALUES \
132 FMT_BOOL('S', fmt->caps & PL_FMT_CAP_SAMPLEABLE), \
133 FMT_BOOL('s', fmt->caps & PL_FMT_CAP_STORABLE), \
134 FMT_BOOL('L', fmt->caps & PL_FMT_CAP_LINEAR), \
135 FMT_BOOL('R', fmt->caps & PL_FMT_CAP_RENDERABLE), \
136 FMT_BOOL('b', fmt->caps & PL_FMT_CAP_BLENDABLE), \
137 FMT_BOOL('B', fmt->caps & PL_FMT_CAP_BLITTABLE), \
138 FMT_BOOL('V', fmt->caps & PL_FMT_CAP_VERTEX), \
139 FMT_BOOL('u', fmt->caps & PL_FMT_CAP_TEXEL_UNIFORM), \
140 FMT_BOOL('t', fmt->caps & PL_FMT_CAP_TEXEL_STORAGE), \
141 FMT_BOOL('H', fmt->caps & PL_FMT_CAP_HOST_READABLE), \
142 FMT_BOOL('W', fmt->caps & PL_FMT_CAP_READWRITE), \
143 FMT_BOOL('G', fmt->gatherable)
144
145 PL_DEBUG(gpu, "GPU texture formats:");
146 PL_DEBUG(gpu, " %-10s %-6s %-4s %-4s " CAP_HEADER " %-3s %-13s %-13s %-10s %-10s %-6s",
147 "NAME", "TYPE", "SIZE", "COMP", "CAPS", "EMU", "DEPTH", "HOST_BITS",
148 "GLSL_TYPE", "GLSL_FMT", "FOURCC");
149 for (int n = 0; n < gpu->num_formats; n++) {
150 pl_fmt fmt = gpu->formats[n];
151
152 static const char *types[] = {
153 [PL_FMT_UNKNOWN] = "UNKNOWN",
154 [PL_FMT_UNORM] = "UNORM",
155 [PL_FMT_SNORM] = "SNORM",
156 [PL_FMT_UINT] = "UINT",
157 [PL_FMT_SINT] = "SINT",
158 [PL_FMT_FLOAT] = "FLOAT",
159 };
160
161 static const char idx_map[4] = {'R', 'G', 'B', 'A'};
162 char indices[4] = {' ', ' ', ' ', ' '};
163 if (!fmt->opaque) {
164 for (int i = 0; i < fmt->num_components; i++)
165 indices[i] = idx_map[fmt->sample_order[i]];
166 }
167
168
169 PL_DEBUG(gpu, " %-10s %-6s %-4zu %c%c%c%c " CAP_FIELDS " %-3s "
170 "{%-2d %-2d %-2d %-2d} {%-2d %-2d %-2d %-2d} %-10s %-10s %-6s",
171 fmt->name, types[fmt->type], fmt->texel_size,
172 FMT_IDX4(indices), CAP_VALUES, fmt->emulated ? "y" : "n",
173 FMT_IDX4(fmt->component_depth), FMT_IDX4(fmt->host_bits),
174 PL_DEF(fmt->glsl_type, ""), PL_DEF(fmt->glsl_format, ""),
175 PRINT_FOURCC(fmt->fourcc));
176
177 #undef CAP_HEADER
178 #undef CAP_FIELDS
179 #undef CAP_VALUES
180
181 for (int i = 0; i < fmt->num_modifiers; i++) {
182 PL_TRACE(gpu, " modifiers[%d]: %s",
183 i, PRINT_DRM_MOD(fmt->modifiers[i]));
184 }
185 }
186 }
187
pl_gpu_finalize(struct pl_gpu * gpu)188 pl_gpu pl_gpu_finalize(struct pl_gpu *gpu)
189 {
190 // Sort formats
191 qsort(gpu->formats, gpu->num_formats, sizeof(pl_fmt), cmp_fmt);
192
193 // Verification
194 pl_assert(gpu->ctx == gpu->log);
195 pl_assert(gpu->limits.max_tex_2d_dim);
196 pl_assert(gpu->limits.max_variables || gpu->limits.max_ubo_size);
197
198 for (int n = 0; n < gpu->num_formats; n++) {
199 pl_fmt fmt = gpu->formats[n];
200 pl_assert(fmt->name);
201 pl_assert(fmt->type);
202 pl_assert(fmt->num_components);
203 pl_assert(fmt->internal_size);
204 pl_assert(fmt->opaque ? !fmt->texel_size : fmt->texel_size);
205 pl_assert(!fmt->gatherable || (fmt->caps & PL_FMT_CAP_SAMPLEABLE));
206 for (int i = 0; i < fmt->num_components; i++) {
207 pl_assert(fmt->component_depth[i]);
208 pl_assert(fmt->opaque ? !fmt->host_bits[i] : fmt->host_bits[i]);
209 }
210
211 enum pl_fmt_caps texel_caps = PL_FMT_CAP_VERTEX |
212 PL_FMT_CAP_TEXEL_UNIFORM |
213 PL_FMT_CAP_TEXEL_STORAGE;
214
215 if (fmt->caps & texel_caps) {
216 pl_assert(fmt->glsl_type);
217 pl_assert(!fmt->opaque);
218 }
219 pl_assert(!fmt->opaque || !(fmt->caps & PL_FMT_CAP_HOST_READABLE));
220 if (fmt->internal_size != fmt->texel_size && !fmt->opaque)
221 pl_assert(fmt->emulated);
222
223 // Assert uniqueness of name
224 for (int o = n + 1; o < gpu->num_formats; o++)
225 pl_assert(strcmp(fmt->name, gpu->formats[o]->name) != 0);
226 }
227
228 // Print info
229 PL_INFO(gpu, "GPU information:");
230
231 #define LOG(fmt, field) \
232 PL_INFO(gpu, " %-26s %" fmt, #field ":", gpu->LOG_STRUCT.field)
233
234 #define LOG_STRUCT glsl
235 PL_INFO(gpu, " GLSL version: %d%s", gpu->glsl.version,
236 gpu->glsl.vulkan ? " (vulkan)" : gpu->glsl.gles ? " es" : "");
237 if (gpu->glsl.compute) {
238 LOG("zu", max_shmem_size);
239 LOG(PRIu32, max_group_threads);
240 LOG(PRIu32, max_group_size[0]);
241 LOG(PRIu32, max_group_size[1]);
242 LOG(PRIu32, max_group_size[2]);
243 }
244 LOG(PRIu32, subgroup_size);
245 LOG(PRIi16, min_gather_offset);
246 LOG(PRIi16, max_gather_offset);
247 #undef LOG_STRUCT
248
249 #define LOG_STRUCT limits
250 PL_INFO(gpu, " Limits:");
251 // pl_gpu
252 LOG("d", thread_safe);
253 LOG("d", callbacks);
254 // pl_buf
255 LOG("zu", max_buf_size);
256 LOG("zu", max_ubo_size);
257 LOG("zu", max_ssbo_size);
258 LOG("zu", max_vbo_size);
259 LOG("zu", max_mapped_size);
260 LOG(PRIu64, max_buffer_texels);
261 LOG("zu", align_host_ptr);
262 // pl_tex
263 LOG(PRIu32, max_tex_1d_dim);
264 LOG(PRIu32, max_tex_2d_dim);
265 LOG(PRIu32, max_tex_3d_dim);
266 LOG("d", blittable_1d_3d);
267 LOG("d", buf_transfer);
268 LOG(PRIu32, align_tex_xfer_stride);
269 LOG("zu", align_tex_xfer_offset);
270 // pl_pass
271 LOG("zu", max_variables);
272 LOG("zu", max_constants);
273 LOG("zu", max_pushc_size);
274 if (gpu->glsl.compute) {
275 LOG(PRIu32, max_dispatch[0]);
276 LOG(PRIu32, max_dispatch[1]);
277 LOG(PRIu32, max_dispatch[2]);
278 }
279 LOG(PRIu32, fragment_queues);
280 LOG(PRIu32, compute_queues);
281 #undef LOG_STRUCT
282 #undef LOG
283
284 if (pl_gpu_supports_interop(gpu)) {
285 PL_INFO(gpu, " External API interop:");
286
287 PL_INFO(gpu, " UUID: %s", PRINT_UUID(gpu->uuid));
288 PL_INFO(gpu, " PCI: %04x:%02x:%02x:%x",
289 gpu->pci.domain, gpu->pci.bus, gpu->pci.device, gpu->pci.function);
290 PL_INFO(gpu, " buf export caps: 0x%x",
291 (unsigned int) gpu->export_caps.buf);
292 PL_INFO(gpu, " buf import caps: 0x%x",
293 (unsigned int) gpu->import_caps.buf);
294 PL_INFO(gpu, " tex export caps: 0x%x",
295 (unsigned int) gpu->export_caps.tex);
296 PL_INFO(gpu, " tex import caps: 0x%x",
297 (unsigned int) gpu->import_caps.tex);
298 PL_INFO(gpu, " sync export caps: 0x%x",
299 (unsigned int) gpu->export_caps.sync);
300 PL_INFO(gpu, " sync import caps: 0x%x",
301 (unsigned int) gpu->import_caps.sync);
302 }
303
304 print_formats(gpu);
305
306 // Set `gpu->caps` for backwards compatibility
307 pl_gpu_caps caps = 0;
308 if (gpu->glsl.compute)
309 caps |= PL_GPU_CAP_COMPUTE;
310 if (gpu->limits.compute_queues > gpu->limits.fragment_queues)
311 caps |= PL_GPU_CAP_PARALLEL_COMPUTE;
312 if (gpu->limits.max_variables)
313 caps |= PL_GPU_CAP_INPUT_VARIABLES;
314 if (gpu->limits.max_mapped_size)
315 caps |= PL_GPU_CAP_MAPPED_BUFFERS;
316 if (gpu->limits.blittable_1d_3d)
317 caps |= PL_GPU_CAP_BLITTABLE_1D_3D;
318 if (gpu->glsl.subgroup_size)
319 caps |= PL_GPU_CAP_SUBGROUPS;
320 if (gpu->limits.callbacks)
321 caps |= PL_GPU_CAP_CALLBACKS;
322 if (gpu->limits.thread_safe)
323 caps |= PL_GPU_CAP_THREAD_SAFE;
324 if (gpu->limits.max_constants)
325 caps |= PL_GPU_CAP_SPEC_CONSTANTS;
326 gpu->caps = caps;
327
328 // Set the backwards compatibility fields in `limits`
329 gpu->limits.max_shmem_size = gpu->glsl.max_shmem_size;
330 gpu->limits.max_group_threads = gpu->glsl.max_group_threads;
331 for (int i = 0; i < 3; i++)
332 gpu->limits.max_group_size[i] = gpu->glsl.max_group_size[i];
333 gpu->limits.subgroup_size = gpu->glsl.subgroup_size;
334 gpu->limits.min_gather_offset = gpu->glsl.min_gather_offset;
335 gpu->limits.max_gather_offset = gpu->glsl.max_gather_offset;
336
337 return gpu;
338 }
339
340 struct glsl_fmt {
341 enum pl_fmt_type type;
342 int num_components;
343 int depth[4];
344 const char *glsl_format;
345 uint32_t drm_fourcc;
346 };
347
348 // List taken from the GLSL specification. (Yes, GLSL supports only exactly
349 // these formats with exactly these names)
350 static const struct glsl_fmt pl_glsl_fmts[] = {
351 {PL_FMT_FLOAT, 1, {16}, "r16f"},
352 {PL_FMT_FLOAT, 1, {32}, "r32f"},
353 {PL_FMT_FLOAT, 2, {16, 16}, "rg16f"},
354 {PL_FMT_FLOAT, 2, {32, 32}, "rg32f"},
355 {PL_FMT_FLOAT, 4, {16, 16, 16, 16}, "rgba16f"},
356 {PL_FMT_FLOAT, 4, {32, 32, 32, 32}, "rgba32f"},
357 {PL_FMT_FLOAT, 3, {11, 11, 10}, "r11f_g11f_b10f"},
358
359 {PL_FMT_UNORM, 1, {8}, "r8"},
360 {PL_FMT_UNORM, 1, {16}, "r16"},
361 {PL_FMT_UNORM, 2, {8, 8}, "rg8"},
362 {PL_FMT_UNORM, 2, {16, 16}, "rg16"},
363 {PL_FMT_UNORM, 4, {8, 8, 8, 8}, "rgba8"},
364 {PL_FMT_UNORM, 4, {16, 16, 16, 16}, "rgba16"},
365 {PL_FMT_UNORM, 4, {10, 10, 10, 2}, "rgb10_a2"},
366
367 {PL_FMT_SNORM, 1, {8}, "r8_snorm"},
368 {PL_FMT_SNORM, 1, {16}, "r16_snorm"},
369 {PL_FMT_SNORM, 2, {8, 8}, "rg8_snorm"},
370 {PL_FMT_SNORM, 2, {16, 16}, "rg16_snorm"},
371 {PL_FMT_SNORM, 4, {8, 8, 8, 8}, "rgba8_snorm"},
372 {PL_FMT_SNORM, 4, {16, 16, 16, 16}, "rgba16_snorm"},
373
374 {PL_FMT_UINT, 1, {8}, "r8ui"},
375 {PL_FMT_UINT, 1, {16}, "r16ui"},
376 {PL_FMT_UINT, 1, {32}, "r32ui"},
377 {PL_FMT_UINT, 2, {8, 8}, "rg8ui"},
378 {PL_FMT_UINT, 2, {16, 16}, "rg16ui"},
379 {PL_FMT_UINT, 2, {32, 32}, "rg32ui"},
380 {PL_FMT_UINT, 4, {8, 8, 8, 8}, "rgba8ui"},
381 {PL_FMT_UINT, 4, {16, 16, 16, 16}, "rgba16ui"},
382 {PL_FMT_UINT, 4, {32, 32, 32, 32}, "rgba32ui"},
383 {PL_FMT_UINT, 4, {10, 10, 10, 2}, "rgb10_a2ui"},
384
385 {PL_FMT_SINT, 1, {8}, "r8i"},
386 {PL_FMT_SINT, 1, {16}, "r16i"},
387 {PL_FMT_SINT, 1, {32}, "r32i"},
388 {PL_FMT_SINT, 2, {8, 8}, "rg8i"},
389 {PL_FMT_SINT, 2, {16, 16}, "rg16i"},
390 {PL_FMT_SINT, 2, {32, 32}, "rg32i"},
391 {PL_FMT_SINT, 4, {8, 8, 8, 8}, "rgba8i"},
392 {PL_FMT_SINT, 4, {16, 16, 16, 16}, "rgba16i"},
393 {PL_FMT_SINT, 4, {32, 32, 32, 32}, "rgba32i"},
394 };
395
pl_fmt_glsl_format(pl_fmt fmt,int components)396 const char *pl_fmt_glsl_format(pl_fmt fmt, int components)
397 {
398 if (fmt->opaque)
399 return NULL;
400
401 for (int n = 0; n < PL_ARRAY_SIZE(pl_glsl_fmts); n++) {
402 const struct glsl_fmt *gfmt = &pl_glsl_fmts[n];
403
404 if (fmt->type != gfmt->type)
405 continue;
406 if (components != gfmt->num_components)
407 continue;
408
409 // The component order is irrelevant, so we need to sort the depth
410 // based on the component's index
411 int depth[4] = {0};
412 for (int i = 0; i < fmt->num_components; i++)
413 depth[fmt->sample_order[i]] = fmt->component_depth[i];
414
415 // Copy over any emulated components
416 for (int i = fmt->num_components; i < components; i++)
417 depth[i] = gfmt->depth[i];
418
419 for (int i = 0; i < PL_ARRAY_SIZE(depth); i++) {
420 if (depth[i] != gfmt->depth[i])
421 goto next_fmt;
422 }
423
424 return gfmt->glsl_format;
425
426 next_fmt: ; // equivalent to `continue`
427 }
428
429 return NULL;
430 }
431
432 #define FOURCC(a,b,c,d) ((uint32_t)(a) | ((uint32_t)(b) << 8) | \
433 ((uint32_t)(c) << 16) | ((uint32_t)(d) << 24))
434
435 struct pl_fmt_fourcc {
436 const char *name;
437 uint32_t fourcc;
438 };
439
440 static const struct pl_fmt_fourcc pl_fmt_fourccs[] = {
441 // 8 bpp red
442 {"r8", FOURCC('R','8',' ',' ')},
443 // 16 bpp red
444 {"r16", FOURCC('R','1','6',' ')},
445 // 16 bpp rg
446 {"rg8", FOURCC('G','R','8','8')},
447 {"gr8", FOURCC('R','G','8','8')},
448 // 32 bpp rg
449 {"rg16", FOURCC('G','R','3','2')},
450 {"gr16", FOURCC('R','G','3','2')},
451 // 8 bpp rgb: N/A
452 // 16 bpp rgb
453 {"argb4", FOURCC('B','A','1','2')},
454 {"abgr4", FOURCC('R','A','1','2')},
455 {"rgba4", FOURCC('A','B','1','2')},
456 {"bgra4", FOURCC('A','R','1','2')},
457
458 {"a1rgb5", FOURCC('B','A','1','5')},
459 {"a1bgr5", FOURCC('R','A','1','5')},
460 {"rgb5a1", FOURCC('A','B','1','5')},
461 {"bgr5a1", FOURCC('A','R','1','5')},
462
463 {"rgb565", FOURCC('B','G','1','6')},
464 {"bgr565", FOURCC('R','G','1','6')},
465 // 24 bpp rgb
466 {"rgb8", FOURCC('B','G','2','4')},
467 {"bgr8", FOURCC('R','G','2','4')},
468 // 32 bpp rgb
469 {"argb8", FOURCC('B','A','2','4')},
470 {"abgr8", FOURCC('R','A','2','4')},
471 {"rgba8", FOURCC('A','B','2','4')},
472 {"bgra8", FOURCC('A','R','2','4')},
473
474 {"a2rgb10", FOURCC('B','A','3','0')},
475 {"a2bgr10", FOURCC('R','A','3','0')},
476 {"rgb10a2", FOURCC('A','B','3','0')},
477 {"bgr10a2", FOURCC('A','R','3','0')},
478 // 64bpp rgb
479 {"rgba16hf", FOURCC('A','B','4','H')},
480 {"bgra16hf", FOURCC('A','R','4','H')},
481
482 // no planar formats yet (tm)
483 };
484
pl_fmt_fourcc(pl_fmt fmt)485 uint32_t pl_fmt_fourcc(pl_fmt fmt)
486 {
487 if (fmt->opaque)
488 return 0;
489
490 for (int n = 0; n < PL_ARRAY_SIZE(pl_fmt_fourccs); n++) {
491 const struct pl_fmt_fourcc *fourcc = &pl_fmt_fourccs[n];
492 if (strcmp(fmt->name, fourcc->name) == 0)
493 return fourcc->fourcc;
494 }
495
496 return 0; // no matching format
497 }
498
pl_find_fmt(pl_gpu gpu,enum pl_fmt_type type,int num_components,int min_depth,int host_bits,enum pl_fmt_caps caps)499 pl_fmt pl_find_fmt(pl_gpu gpu, enum pl_fmt_type type, int num_components,
500 int min_depth, int host_bits, enum pl_fmt_caps caps)
501 {
502 for (int n = 0; n < gpu->num_formats; n++) {
503 pl_fmt fmt = gpu->formats[n];
504 if (fmt->type != type || fmt->num_components != num_components)
505 continue;
506 if ((fmt->caps & caps) != caps)
507 continue;
508
509 // When specifying some particular host representation, ensure the
510 // format is non-opaque, ordered and unpadded
511 if (host_bits && fmt->opaque)
512 continue;
513 if (host_bits && fmt->texel_size * 8 != host_bits * num_components)
514 continue;
515 if (host_bits && !pl_fmt_is_ordered(fmt))
516 continue;
517
518 for (int i = 0; i < fmt->num_components; i++) {
519 if (fmt->component_depth[i] < min_depth)
520 goto next_fmt;
521 if (host_bits && fmt->host_bits[i] != host_bits)
522 goto next_fmt;
523 }
524
525 return fmt;
526
527 next_fmt: ; // equivalent to `continue`
528 }
529
530 // ran out of formats
531 PL_DEBUG(gpu, "No matching format found");
532 return NULL;
533 }
534
pl_find_vertex_fmt(pl_gpu gpu,enum pl_fmt_type type,int comps)535 pl_fmt pl_find_vertex_fmt(pl_gpu gpu, enum pl_fmt_type type, int comps)
536 {
537 static const size_t sizes[] = {
538 [PL_FMT_FLOAT] = sizeof(float),
539 [PL_FMT_UNORM] = sizeof(unsigned),
540 [PL_FMT_UINT] = sizeof(unsigned),
541 [PL_FMT_SNORM] = sizeof(int),
542 [PL_FMT_SINT] = sizeof(int),
543 };
544
545 return pl_find_fmt(gpu, type, comps, 0, 8 * sizes[type], PL_FMT_CAP_VERTEX);
546 }
547
pl_find_named_fmt(pl_gpu gpu,const char * name)548 pl_fmt pl_find_named_fmt(pl_gpu gpu, const char *name)
549 {
550 if (!name)
551 return NULL;
552
553 for (int i = 0; i < gpu->num_formats; i++) {
554 pl_fmt fmt = gpu->formats[i];
555 if (strcmp(name, fmt->name) == 0)
556 return fmt;
557 }
558
559 // ran out of formats
560 return NULL;
561 }
562
pl_find_fourcc(pl_gpu gpu,uint32_t fourcc)563 pl_fmt pl_find_fourcc(pl_gpu gpu, uint32_t fourcc)
564 {
565 if (!fourcc)
566 return NULL;
567
568 for (int i = 0; i < gpu->num_formats; i++) {
569 pl_fmt fmt = gpu->formats[i];
570 if (fourcc == fmt->fourcc)
571 return fmt;
572 }
573
574 // ran out of formats
575 return NULL;
576 }
577
check_mod(pl_gpu gpu,pl_fmt fmt,uint64_t mod)578 static inline bool check_mod(pl_gpu gpu, pl_fmt fmt, uint64_t mod)
579 {
580 for (int i = 0; i < fmt->num_modifiers; i++) {
581 if (fmt->modifiers[i] == mod)
582 return true;
583 }
584
585
586 PL_ERR(gpu, "DRM modifier %s not available for format %s. Available modifiers:",
587 PRINT_DRM_MOD(mod), fmt->name);
588 for (int i = 0; i < fmt->num_modifiers; i++)
589 PL_ERR(gpu, " %s", PRINT_DRM_MOD(fmt->modifiers[i]));
590
591 return false;
592 }
593
pl_tex_create(pl_gpu gpu,const struct pl_tex_params * params)594 pl_tex pl_tex_create(pl_gpu gpu, const struct pl_tex_params *params)
595 {
596 require(!params->import_handle || !params->export_handle);
597 require(!params->import_handle || !params->initial_data);
598 if (params->export_handle) {
599 require(params->export_handle & gpu->export_caps.tex);
600 require(PL_ISPOT(params->export_handle));
601 }
602 if (params->import_handle) {
603 require(params->import_handle & gpu->import_caps.tex);
604 require(PL_ISPOT(params->import_handle));
605 require(params->shared_mem.size > 0);
606 if (params->import_handle == PL_HANDLE_DMA_BUF) {
607 if (!check_mod(gpu, params->format, params->shared_mem.drm_format_mod))
608 goto error;
609 if (params->shared_mem.stride_w)
610 require(params->w && params->shared_mem.stride_w >= params->w);
611 if (params->shared_mem.stride_h)
612 require(params->h && params->shared_mem.stride_h >= params->h);
613 }
614 }
615
616 switch (pl_tex_params_dimension(*params)) {
617 case 1:
618 require(params->w > 0);
619 require(params->w <= gpu->limits.max_tex_1d_dim);
620 require(!params->renderable);
621 require(!params->blit_src || gpu->limits.blittable_1d_3d);
622 require(!params->blit_dst || gpu->limits.blittable_1d_3d);
623 break;
624 case 2:
625 require(params->w > 0 && params->h > 0);
626 require(params->w <= gpu->limits.max_tex_2d_dim);
627 require(params->h <= gpu->limits.max_tex_2d_dim);
628 break;
629 case 3:
630 require(params->w > 0 && params->h > 0 && params->d > 0);
631 require(params->w <= gpu->limits.max_tex_3d_dim);
632 require(params->h <= gpu->limits.max_tex_3d_dim);
633 require(params->d <= gpu->limits.max_tex_3d_dim);
634 require(!params->renderable);
635 require(!params->blit_src || gpu->limits.blittable_1d_3d);
636 require(!params->blit_dst || gpu->limits.blittable_1d_3d);
637 break;
638 }
639
640 pl_fmt fmt = params->format;
641 require(fmt);
642 require(!params->host_readable || fmt->caps & PL_FMT_CAP_HOST_READABLE);
643 require(!params->host_readable || !fmt->opaque);
644 require(!params->host_writable || !fmt->opaque);
645 require(!params->sampleable || fmt->caps & PL_FMT_CAP_SAMPLEABLE);
646 require(!params->renderable || fmt->caps & PL_FMT_CAP_RENDERABLE);
647 require(!params->storable || fmt->caps & PL_FMT_CAP_STORABLE);
648 require(!params->blit_src || fmt->caps & PL_FMT_CAP_BLITTABLE);
649 require(!params->blit_dst || fmt->caps & PL_FMT_CAP_BLITTABLE);
650
651 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
652 return impl->tex_create(gpu, params);
653
654 error:
655 return NULL;
656 }
657
pl_tex_destroy(pl_gpu gpu,pl_tex * tex)658 void pl_tex_destroy(pl_gpu gpu, pl_tex *tex)
659 {
660 if (!*tex)
661 return;
662
663 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
664 impl->tex_destroy(gpu, *tex);
665 *tex = NULL;
666 }
667
pl_tex_params_superset(struct pl_tex_params a,struct pl_tex_params b)668 static bool pl_tex_params_superset(struct pl_tex_params a, struct pl_tex_params b)
669 {
670 return a.w == b.w && a.h == b.h && a.d == b.d &&
671 a.format == b.format &&
672 (a.sampleable || !b.sampleable) &&
673 (a.renderable || !b.renderable) &&
674 (a.storable || !b.storable) &&
675 (a.blit_src || !b.blit_src) &&
676 (a.blit_dst || !b.blit_dst) &&
677 (a.host_writable || !b.host_writable) &&
678 (a.host_readable || !b.host_readable);
679 }
680
pl_tex_recreate(pl_gpu gpu,pl_tex * tex,const struct pl_tex_params * params)681 bool pl_tex_recreate(pl_gpu gpu, pl_tex *tex, const struct pl_tex_params *params)
682 {
683 if (params->initial_data) {
684 PL_ERR(gpu, "pl_tex_recreate may not be used with `initial_data`!");
685 return false;
686 }
687
688 if (*tex && pl_tex_params_superset((*tex)->params, *params)) {
689 pl_tex_invalidate(gpu, *tex);
690 return true;
691 }
692
693 PL_INFO(gpu, "(Re)creating %dx%dx%d texture with format %s",
694 params->w, params->h, params->d, params->format->name);
695
696 pl_tex_destroy(gpu, tex);
697 *tex = pl_tex_create(gpu, params);
698
699 return !!*tex;
700 }
701
pl_tex_clear_ex(pl_gpu gpu,pl_tex dst,const union pl_clear_color color)702 void pl_tex_clear_ex(pl_gpu gpu, pl_tex dst, const union pl_clear_color color)
703 {
704 require(dst->params.blit_dst);
705
706 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
707 if (impl->tex_invalidate)
708 impl->tex_invalidate(gpu, dst);
709 impl->tex_clear_ex(gpu, dst, color);
710
711 error:
712 return;
713 }
714
pl_tex_clear(pl_gpu gpu,pl_tex dst,const float color[4])715 void pl_tex_clear(pl_gpu gpu, pl_tex dst, const float color[4])
716 {
717 if (!pl_fmt_is_float(dst->params.format)) {
718 PL_ERR(gpu, "Cannot call `pl_tex_clear` on integer textures, please "
719 "use `pl_tex_clear_ex` instead.");
720 return;
721 }
722
723 const union pl_clear_color col = {
724 .f = { color[0], color[1], color[2], color[3] },
725 };
726
727 pl_tex_clear_ex(gpu, dst, col);
728 }
729
pl_tex_invalidate(pl_gpu gpu,pl_tex tex)730 void pl_tex_invalidate(pl_gpu gpu, pl_tex tex)
731 {
732 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
733 if (impl->tex_invalidate)
734 impl->tex_invalidate(gpu, tex);
735 }
736
strip_coords(pl_tex tex,struct pl_rect3d * rc)737 static void strip_coords(pl_tex tex, struct pl_rect3d *rc)
738 {
739 if (!tex->params.d) {
740 rc->z0 = 0;
741 rc->z1 = 1;
742 }
743
744 if (!tex->params.h) {
745 rc->y0 = 0;
746 rc->y1 = 1;
747 }
748 }
749
infer_rc(pl_tex tex,struct pl_rect3d * rc)750 static void infer_rc(pl_tex tex, struct pl_rect3d *rc)
751 {
752 if (!rc->x0 && !rc->x1)
753 rc->x1 = tex->params.w;
754 if (!rc->y0 && !rc->y1)
755 rc->y1 = tex->params.h;
756 if (!rc->z0 && !rc->z1)
757 rc->z1 = tex->params.d;
758 }
759
pl_tex_blit(pl_gpu gpu,const struct pl_tex_blit_params * params)760 void pl_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params)
761 {
762 pl_tex src = params->src, dst = params->dst;
763 require(src && dst);
764 pl_fmt src_fmt = src->params.format;
765 pl_fmt dst_fmt = dst->params.format;
766 require(src_fmt->internal_size == dst_fmt->internal_size);
767 require((src_fmt->type == PL_FMT_UINT) == (dst_fmt->type == PL_FMT_UINT));
768 require((src_fmt->type == PL_FMT_SINT) == (dst_fmt->type == PL_FMT_SINT));
769 require(src->params.blit_src);
770 require(dst->params.blit_dst);
771 require(params->sample_mode != PL_TEX_SAMPLE_LINEAR || (src_fmt->caps & PL_FMT_CAP_LINEAR));
772
773 struct pl_tex_blit_params fixed = *params;
774 infer_rc(src, &fixed.src_rc);
775 infer_rc(dst, &fixed.dst_rc);
776 strip_coords(src, &fixed.src_rc);
777 strip_coords(dst, &fixed.dst_rc);
778
779 require(fixed.src_rc.x0 >= 0 && fixed.src_rc.x0 < src->params.w);
780 require(fixed.src_rc.x1 > 0 && fixed.src_rc.x1 <= src->params.w);
781 require(fixed.dst_rc.x0 >= 0 && fixed.dst_rc.x0 < dst->params.w);
782 require(fixed.dst_rc.x1 > 0 && fixed.dst_rc.x1 <= dst->params.w);
783
784 if (src->params.h) {
785 require(fixed.src_rc.y0 >= 0 && fixed.src_rc.y0 < src->params.h);
786 require(fixed.src_rc.y1 > 0 && fixed.src_rc.y1 <= src->params.h);
787 }
788
789 if (dst->params.h) {
790 require(fixed.dst_rc.y0 >= 0 && fixed.dst_rc.y0 < dst->params.h);
791 require(fixed.dst_rc.y1 > 0 && fixed.dst_rc.y1 <= dst->params.h);
792 }
793
794 if (src->params.d) {
795 require(fixed.src_rc.z0 >= 0 && fixed.src_rc.z0 < src->params.d);
796 require(fixed.src_rc.z1 > 0 && fixed.src_rc.z1 <= src->params.d);
797 }
798
799 if (dst->params.d) {
800 require(fixed.dst_rc.z0 >= 0 && fixed.dst_rc.z0 < dst->params.d);
801 require(fixed.dst_rc.z1 > 0 && fixed.dst_rc.z1 <= dst->params.d);
802 }
803
804 struct pl_rect3d full = {0, 0, 0, dst->params.w, dst->params.h, dst->params.d};
805 strip_coords(dst, &full);
806
807 struct pl_rect3d rcnorm = fixed.dst_rc;
808 pl_rect3d_normalize(&rcnorm);
809 if (pl_rect3d_eq(rcnorm, full))
810 pl_tex_invalidate(gpu, dst);
811
812 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
813 impl->tex_blit(gpu, &fixed);
814
815 error:
816 return;
817 }
818
pl_tex_transfer_size(const struct pl_tex_transfer_params * par)819 size_t pl_tex_transfer_size(const struct pl_tex_transfer_params *par)
820 {
821 pl_tex tex = par->tex;
822 int w = pl_rect_w(par->rc), h = pl_rect_h(par->rc), d = pl_rect_d(par->rc);
823
824 // This generates the absolute bare minimum size of a buffer required to
825 // hold the data of a texture upload/download, by including stride padding
826 // only where strictly necessary.
827 int texels = ((d - 1) * par->stride_h + (h - 1)) * par->stride_w + w;
828 return texels * tex->params.format->texel_size;
829 }
830
fix_tex_transfer(pl_gpu gpu,struct pl_tex_transfer_params * params)831 static bool fix_tex_transfer(pl_gpu gpu, struct pl_tex_transfer_params *params)
832 {
833 pl_tex tex = params->tex;
834 struct pl_rect3d rc = params->rc;
835
836 // Infer the default values
837 infer_rc(tex, &rc);
838 if (!params->stride_w)
839 params->stride_w = pl_rect_w(rc);
840 if (!params->stride_h)
841 params->stride_h = pl_rect_h(rc);
842
843 // Sanitize superfluous coordinates for the benefit of the GPU
844 strip_coords(tex, &rc);
845 if (!tex->params.w)
846 params->stride_w = 1;
847 if (!tex->params.h)
848 params->stride_h = 1;
849
850 params->rc = rc;
851
852 // Check the parameters for sanity
853 switch (pl_tex_params_dimension(tex->params))
854 {
855 case 3:
856 require(rc.z1 > rc.z0);
857 require(rc.z0 >= 0 && rc.z0 < tex->params.d);
858 require(rc.z1 > 0 && rc.z1 <= tex->params.d);
859 require(params->stride_h >= pl_rect_h(rc));
860 // fall through
861 case 2:
862 require(rc.y1 > rc.y0);
863 require(rc.y0 >= 0 && rc.y0 < tex->params.h);
864 require(rc.y1 > 0 && rc.y1 <= tex->params.h);
865 require(params->stride_w >= pl_rect_w(rc));
866 // fall through
867 case 1:
868 require(rc.x1 > rc.x0);
869 require(rc.x0 >= 0 && rc.x0 < tex->params.w);
870 require(rc.x1 > 0 && rc.x1 <= tex->params.w);
871 break;
872 }
873
874 require(!params->buf ^ !params->ptr); // exactly one
875 if (params->buf) {
876 pl_buf buf = params->buf;
877 size_t size = pl_tex_transfer_size(params);
878 require(params->buf_offset + size <= buf->params.size);
879 require(gpu->limits.buf_transfer);
880 }
881
882 require(!params->callback || gpu->limits.callbacks);
883 return true;
884
885 error:
886 return false;
887 }
888
pl_tex_upload(pl_gpu gpu,const struct pl_tex_transfer_params * params)889 bool pl_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params)
890 {
891 pl_tex tex = params->tex;
892 require(tex);
893 require(tex->params.host_writable);
894
895 struct pl_tex_transfer_params fixed = *params;
896 if (!fix_tex_transfer(gpu, &fixed))
897 goto error;
898
899 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
900 return impl->tex_upload(gpu, &fixed);
901
902 error:
903 return false;
904 }
905
pl_tex_download(pl_gpu gpu,const struct pl_tex_transfer_params * params)906 bool pl_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params)
907 {
908 pl_tex tex = params->tex;
909 require(tex);
910 require(tex->params.host_readable);
911
912 struct pl_tex_transfer_params fixed = *params;
913 if (!fix_tex_transfer(gpu, &fixed))
914 goto error;
915
916 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
917 return impl->tex_download(gpu, &fixed);
918
919 error:
920 return false;
921 }
922
pl_tex_poll(pl_gpu gpu,pl_tex tex,uint64_t t)923 bool pl_tex_poll(pl_gpu gpu, pl_tex tex, uint64_t t)
924 {
925 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
926 return impl->tex_poll ? impl->tex_poll(gpu, tex, t) : false;
927 }
928
pl_buf_params_infer(struct pl_buf_params params)929 static struct pl_buf_params pl_buf_params_infer(struct pl_buf_params params)
930 {
931 switch (params.type) {
932 case PL_BUF_UNIFORM:
933 case PL_BUF_TEXEL_UNIFORM:
934 params.uniform = true;
935 break;
936 case PL_BUF_STORAGE:
937 case PL_BUF_TEXEL_STORAGE:
938 params.storable = true;
939 break;
940 case PL_BUF_TEX_TRANSFER:
941 break;
942 case PL_BUF_TYPE_COUNT:
943 pl_unreachable();
944 }
945
946 return params;
947 }
948
949 static bool warned_rounding = false;
950
pl_buf_create(pl_gpu gpu,const struct pl_buf_params * pparams)951 pl_buf pl_buf_create(pl_gpu gpu, const struct pl_buf_params *pparams)
952 {
953 struct pl_buf_params params = pl_buf_params_infer(*pparams);
954
955 require(!params.import_handle || !params.export_handle);
956 if (params.export_handle) {
957 require(PL_ISPOT(params.export_handle));
958 require(params.export_handle & gpu->export_caps.buf);
959 }
960 if (params.import_handle) {
961 require(PL_ISPOT(params.import_handle));
962 require(params.import_handle & gpu->import_caps.buf);
963 struct pl_shared_mem *shmem = ¶ms.shared_mem;
964 require(shmem->offset + params.size <= shmem->size);
965 require(params.import_handle != PL_HANDLE_DMA_BUF || !shmem->drm_format_mod);
966
967 // Fix misalignment on host pointer imports
968 if (params.import_handle == PL_HANDLE_HOST_PTR) {
969 uintptr_t page_mask = ~(gpu->limits.align_host_ptr - 1);
970 uintptr_t ptr_base = (uintptr_t) shmem->handle.ptr & page_mask;
971 size_t ptr_offset = (uintptr_t) shmem->handle.ptr - ptr_base;
972 size_t buf_offset = ptr_offset + shmem->offset;
973 size_t ptr_size = PL_ALIGN2(ptr_offset + shmem->size,
974 gpu->limits.align_host_ptr);
975
976 if (ptr_base != (uintptr_t) shmem->handle.ptr || ptr_size > shmem->size) {
977 if (!warned_rounding) {
978 warned_rounding = true;
979 PL_WARN(gpu, "Imported host pointer is not page-aligned. "
980 "This should normally be fine on most platforms, "
981 "but may cause issues in some rare circumstances.");
982 }
983
984 PL_TRACE(gpu, "Rounding imported host pointer %p + %zu -> %zu to "
985 "nearest page boundaries: %p + %zu -> %zu",
986 shmem->handle.ptr, shmem->offset, shmem->size,
987 (void *) ptr_base, buf_offset, ptr_size);
988 }
989
990 shmem->handle.ptr = (void *) ptr_base;
991 shmem->offset = buf_offset;
992 shmem->size = ptr_size;
993 }
994 }
995
996 require(params.size > 0 && params.size <= gpu->limits.max_buf_size);
997 require(!params.uniform || params.size <= gpu->limits.max_ubo_size);
998 require(!params.storable || params.size <= gpu->limits.max_ssbo_size);
999 require(!params.drawable || params.size <= gpu->limits.max_vbo_size);
1000 require(!params.host_mapped || params.size <= gpu->limits.max_mapped_size);
1001
1002 if (params.format) {
1003 pl_fmt fmt = params.format;
1004 require(params.size <= gpu->limits.max_buffer_texels * fmt->texel_size);
1005 require(!params.uniform || (fmt->caps & PL_FMT_CAP_TEXEL_UNIFORM));
1006 require(!params.storable || (fmt->caps & PL_FMT_CAP_TEXEL_STORAGE));
1007 }
1008
1009 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1010 pl_buf buf = impl->buf_create(gpu, ¶ms);
1011 if (buf)
1012 require(!params.host_mapped || buf->data);
1013
1014 return buf;
1015
1016 error:
1017 return NULL;
1018 }
1019
pl_buf_destroy(pl_gpu gpu,pl_buf * buf)1020 void pl_buf_destroy(pl_gpu gpu, pl_buf *buf)
1021 {
1022 if (!*buf)
1023 return;
1024
1025 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1026 impl->buf_destroy(gpu, *buf);
1027 *buf = NULL;
1028 }
1029
pl_buf_params_superset(struct pl_buf_params a,struct pl_buf_params b)1030 static bool pl_buf_params_superset(struct pl_buf_params a, struct pl_buf_params b)
1031 {
1032 return a.size >= b.size &&
1033 a.memory_type == b.memory_type &&
1034 a.format == b.format &&
1035 (a.host_writable || !b.host_writable) &&
1036 (a.host_readable || !b.host_readable) &&
1037 (a.host_mapped || !b.host_mapped) &&
1038 (a.uniform || !b.uniform) &&
1039 (a.storable || !b.storable) &&
1040 (a.drawable || !b.drawable);
1041 }
1042
pl_buf_recreate(pl_gpu gpu,pl_buf * buf,const struct pl_buf_params * pparams)1043 bool pl_buf_recreate(pl_gpu gpu, pl_buf *buf, const struct pl_buf_params *pparams)
1044 {
1045
1046 struct pl_buf_params params = pl_buf_params_infer(*pparams);
1047
1048 if (params.initial_data) {
1049 PL_ERR(gpu, "pl_buf_recreate may not be used with `initial_data`!");
1050 return false;
1051 }
1052
1053 if (*buf && pl_buf_params_superset((*buf)->params, params))
1054 return true;
1055
1056 PL_INFO(gpu, "(Re)creating %zu buffer", params.size);
1057 pl_buf_destroy(gpu, buf);
1058 *buf = pl_buf_create(gpu, ¶ms);
1059
1060 return !!*buf;
1061 }
1062
pl_buf_write(pl_gpu gpu,pl_buf buf,size_t buf_offset,const void * data,size_t size)1063 void pl_buf_write(pl_gpu gpu, pl_buf buf, size_t buf_offset,
1064 const void *data, size_t size)
1065 {
1066 require(buf->params.host_writable);
1067 require(buf_offset + size <= buf->params.size);
1068 require(buf_offset == PL_ALIGN2(buf_offset, 4));
1069
1070 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1071 impl->buf_write(gpu, buf, buf_offset, data, size);
1072
1073 error:
1074 return;
1075 }
1076
pl_buf_read(pl_gpu gpu,pl_buf buf,size_t buf_offset,void * dest,size_t size)1077 bool pl_buf_read(pl_gpu gpu, pl_buf buf, size_t buf_offset,
1078 void *dest, size_t size)
1079 {
1080 require(buf->params.host_readable);
1081 require(buf_offset + size <= buf->params.size);
1082
1083 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1084 return impl->buf_read(gpu, buf, buf_offset, dest, size);
1085
1086 error:
1087 return false;
1088 }
1089
pl_buf_copy(pl_gpu gpu,pl_buf dst,size_t dst_offset,pl_buf src,size_t src_offset,size_t size)1090 void pl_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset,
1091 pl_buf src, size_t src_offset, size_t size)
1092 {
1093 require(src_offset + size <= src->params.size);
1094 require(dst_offset + size <= dst->params.size);
1095
1096 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1097 impl->buf_copy(gpu, dst, dst_offset, src, src_offset, size);
1098
1099 error:
1100 return;
1101 }
1102
pl_buf_export(pl_gpu gpu,pl_buf buf)1103 bool pl_buf_export(pl_gpu gpu, pl_buf buf)
1104 {
1105 require(buf->params.export_handle || buf->params.import_handle);
1106
1107 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1108 return impl->buf_export(gpu, buf);
1109
1110 error:
1111 return false;
1112 }
1113
pl_buf_poll(pl_gpu gpu,pl_buf buf,uint64_t t)1114 bool pl_buf_poll(pl_gpu gpu, pl_buf buf, uint64_t t)
1115 {
1116 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1117 return impl->buf_poll ? impl->buf_poll(gpu, buf, t) : false;
1118 }
1119
pl_var_type_size(enum pl_var_type type)1120 size_t pl_var_type_size(enum pl_var_type type)
1121 {
1122 switch (type) {
1123 case PL_VAR_SINT: return sizeof(int);
1124 case PL_VAR_UINT: return sizeof(unsigned int);
1125 case PL_VAR_FLOAT: return sizeof(float);
1126 case PL_VAR_INVALID: // fall through
1127 case PL_VAR_TYPE_COUNT: break;
1128 }
1129
1130 pl_unreachable();
1131 }
1132
1133 #define PL_VAR(TYPE, NAME, M, V) \
1134 struct pl_var pl_var_##NAME(const char *name) { \
1135 return (struct pl_var) { \
1136 .name = name, \
1137 .type = PL_VAR_##TYPE, \
1138 .dim_m = M, \
1139 .dim_v = V, \
1140 .dim_a = 1, \
1141 }; \
1142 }
1143
1144 PL_VAR(FLOAT, float, 1, 1)
1145 PL_VAR(FLOAT, vec2, 1, 2)
1146 PL_VAR(FLOAT, vec3, 1, 3)
1147 PL_VAR(FLOAT, vec4, 1, 4)
1148 PL_VAR(FLOAT, mat2, 2, 2)
1149 PL_VAR(FLOAT, mat2x3, 2, 3)
1150 PL_VAR(FLOAT, mat2x4, 2, 4)
1151 PL_VAR(FLOAT, mat3, 3, 3)
1152 PL_VAR(FLOAT, mat3x4, 3, 4)
1153 PL_VAR(FLOAT, mat4x2, 4, 2)
1154 PL_VAR(FLOAT, mat4x3, 4, 3)
1155 PL_VAR(FLOAT, mat4, 4, 4)
1156 PL_VAR(SINT, int, 1, 1)
1157 PL_VAR(SINT, ivec2, 1, 2)
1158 PL_VAR(SINT, ivec3, 1, 3)
1159 PL_VAR(SINT, ivec4, 1, 4)
1160 PL_VAR(UINT, uint, 1, 1)
1161 PL_VAR(UINT, uvec2, 1, 2)
1162 PL_VAR(UINT, uvec3, 1, 3)
1163 PL_VAR(UINT, uvec4, 1, 4)
1164
1165 #undef PL_VAR
1166
1167 const struct pl_named_var pl_var_glsl_types[] = {
1168 // float vectors
1169 { "float", { .type = PL_VAR_FLOAT, .dim_m = 1, .dim_v = 1, .dim_a = 1, }},
1170 { "vec2", { .type = PL_VAR_FLOAT, .dim_m = 1, .dim_v = 2, .dim_a = 1, }},
1171 { "vec3", { .type = PL_VAR_FLOAT, .dim_m = 1, .dim_v = 3, .dim_a = 1, }},
1172 { "vec4", { .type = PL_VAR_FLOAT, .dim_m = 1, .dim_v = 4, .dim_a = 1, }},
1173 // float matrices
1174 { "mat2", { .type = PL_VAR_FLOAT, .dim_m = 2, .dim_v = 2, .dim_a = 1, }},
1175 { "mat2x3", { .type = PL_VAR_FLOAT, .dim_m = 2, .dim_v = 3, .dim_a = 1, }},
1176 { "mat2x4", { .type = PL_VAR_FLOAT, .dim_m = 2, .dim_v = 4, .dim_a = 1, }},
1177 { "mat3", { .type = PL_VAR_FLOAT, .dim_m = 3, .dim_v = 3, .dim_a = 1, }},
1178 { "mat3x4", { .type = PL_VAR_FLOAT, .dim_m = 3, .dim_v = 4, .dim_a = 1, }},
1179 { "mat4x2", { .type = PL_VAR_FLOAT, .dim_m = 4, .dim_v = 2, .dim_a = 1, }},
1180 { "mat4x3", { .type = PL_VAR_FLOAT, .dim_m = 4, .dim_v = 3, .dim_a = 1, }},
1181 { "mat4", { .type = PL_VAR_FLOAT, .dim_m = 4, .dim_v = 4, .dim_a = 1, }},
1182 // integer vectors
1183 { "int", { .type = PL_VAR_SINT, .dim_m = 1, .dim_v = 1, .dim_a = 1, }},
1184 { "ivec2", { .type = PL_VAR_SINT, .dim_m = 1, .dim_v = 2, .dim_a = 1, }},
1185 { "ivec3", { .type = PL_VAR_SINT, .dim_m = 1, .dim_v = 3, .dim_a = 1, }},
1186 { "ivec4", { .type = PL_VAR_SINT, .dim_m = 1, .dim_v = 4, .dim_a = 1, }},
1187 // unsigned integer vectors
1188 { "uint", { .type = PL_VAR_UINT, .dim_m = 1, .dim_v = 1, .dim_a = 1, }},
1189 { "uvec2", { .type = PL_VAR_UINT, .dim_m = 1, .dim_v = 2, .dim_a = 1, }},
1190 { "uvec3", { .type = PL_VAR_UINT, .dim_m = 1, .dim_v = 3, .dim_a = 1, }},
1191 { "uvec4", { .type = PL_VAR_UINT, .dim_m = 1, .dim_v = 4, .dim_a = 1, }},
1192
1193 {0},
1194 };
1195
1196 #define MAX_DIM 4
1197
pl_var_glsl_type_name(struct pl_var var)1198 const char *pl_var_glsl_type_name(struct pl_var var)
1199 {
1200 static const char *types[PL_VAR_TYPE_COUNT][MAX_DIM+1][MAX_DIM+1] = {
1201 // float vectors
1202 [PL_VAR_FLOAT][1][1] = "float",
1203 [PL_VAR_FLOAT][1][2] = "vec2",
1204 [PL_VAR_FLOAT][1][3] = "vec3",
1205 [PL_VAR_FLOAT][1][4] = "vec4",
1206 // float matrices
1207 [PL_VAR_FLOAT][2][2] = "mat2",
1208 [PL_VAR_FLOAT][2][3] = "mat2x3",
1209 [PL_VAR_FLOAT][2][4] = "mat2x4",
1210 [PL_VAR_FLOAT][3][2] = "mat3x2",
1211 [PL_VAR_FLOAT][3][3] = "mat3",
1212 [PL_VAR_FLOAT][3][4] = "mat3x4",
1213 [PL_VAR_FLOAT][4][2] = "mat4x2",
1214 [PL_VAR_FLOAT][4][3] = "mat4x3",
1215 [PL_VAR_FLOAT][4][4] = "mat4",
1216 // integer vectors
1217 [PL_VAR_SINT][1][1] = "int",
1218 [PL_VAR_SINT][1][2] = "ivec2",
1219 [PL_VAR_SINT][1][3] = "ivec3",
1220 [PL_VAR_SINT][1][4] = "ivec4",
1221 // unsigned integer vectors
1222 [PL_VAR_UINT][1][1] = "uint",
1223 [PL_VAR_UINT][1][2] = "uvec2",
1224 [PL_VAR_UINT][1][3] = "uvec3",
1225 [PL_VAR_UINT][1][4] = "uvec4",
1226 };
1227
1228 if (var.dim_v > MAX_DIM || var.dim_m > MAX_DIM)
1229 return NULL;
1230
1231 return types[var.type][var.dim_m][var.dim_v];
1232 }
1233
pl_var_from_fmt(pl_fmt fmt,const char * name)1234 struct pl_var pl_var_from_fmt(pl_fmt fmt, const char *name)
1235 {
1236 static const enum pl_var_type vartypes[] = {
1237 [PL_FMT_FLOAT] = PL_VAR_FLOAT,
1238 [PL_FMT_UNORM] = PL_VAR_FLOAT,
1239 [PL_FMT_SNORM] = PL_VAR_FLOAT,
1240 [PL_FMT_UINT] = PL_VAR_UINT,
1241 [PL_FMT_SINT] = PL_VAR_SINT,
1242 };
1243
1244 pl_assert(fmt->type < PL_ARRAY_SIZE(vartypes));
1245 return (struct pl_var) {
1246 .type = vartypes[fmt->type],
1247 .name = name,
1248 .dim_v = fmt->num_components,
1249 .dim_m = 1,
1250 .dim_a = 1,
1251 };
1252 }
1253
pl_var_host_layout(size_t offset,const struct pl_var * var)1254 struct pl_var_layout pl_var_host_layout(size_t offset, const struct pl_var *var)
1255 {
1256 size_t col_size = pl_var_type_size(var->type) * var->dim_v;
1257 return (struct pl_var_layout) {
1258 .offset = offset,
1259 .stride = col_size,
1260 .size = col_size * var->dim_m * var->dim_a,
1261 };
1262 }
1263
pl_std140_layout(size_t offset,const struct pl_var * var)1264 struct pl_var_layout pl_std140_layout(size_t offset, const struct pl_var *var)
1265 {
1266 size_t el_size = pl_var_type_size(var->type);
1267
1268 // std140 packing rules:
1269 // 1. The size of generic values is their size in bytes
1270 // 2. The size of vectors is the vector length * the base count
1271 // 3. Matrices are treated like arrays of column vectors
1272 // 4. The size of array rows is that of the element size rounded up to
1273 // the nearest multiple of vec4
1274 // 5. All values are aligned to a multiple of their size (stride for arrays),
1275 // with the exception of vec3 which is aligned like vec4
1276 size_t stride = el_size * var->dim_v;
1277 size_t align = stride;
1278 if (var->dim_v == 3)
1279 align += el_size;
1280 if (var->dim_m * var->dim_a > 1)
1281 stride = align = PL_ALIGN2(align, sizeof(float[4]));
1282
1283 return (struct pl_var_layout) {
1284 .offset = PL_ALIGN2(offset, align),
1285 .stride = stride,
1286 .size = stride * var->dim_m * var->dim_a,
1287 };
1288 }
1289
pl_std430_layout(size_t offset,const struct pl_var * var)1290 struct pl_var_layout pl_std430_layout(size_t offset, const struct pl_var *var)
1291 {
1292 size_t el_size = pl_var_type_size(var->type);
1293
1294 // std430 packing rules: like std140, except arrays/matrices are always
1295 // "tightly" packed, even arrays/matrices of vec3s
1296 size_t stride = el_size * var->dim_v;
1297 size_t align = stride;
1298 if (var->dim_v == 3)
1299 align += el_size;
1300 if (var->dim_m * var->dim_a > 1)
1301 stride = align;
1302
1303 return (struct pl_var_layout) {
1304 .offset = PL_ALIGN2(offset, align),
1305 .stride = stride,
1306 .size = stride * var->dim_m * var->dim_a,
1307 };
1308 }
1309
memcpy_layout(void * dst_p,struct pl_var_layout dst_layout,const void * src_p,struct pl_var_layout src_layout)1310 void memcpy_layout(void *dst_p, struct pl_var_layout dst_layout,
1311 const void *src_p, struct pl_var_layout src_layout)
1312 {
1313 uintptr_t src = (uintptr_t) src_p + src_layout.offset;
1314 uintptr_t dst = (uintptr_t) dst_p + dst_layout.offset;
1315
1316 if (src_layout.stride == dst_layout.stride) {
1317 pl_assert(dst_layout.size == src_layout.size);
1318 memcpy((void *) dst, (const void *) src, src_layout.size);
1319 return;
1320 }
1321
1322 size_t stride = PL_MIN(src_layout.stride, dst_layout.stride);
1323 uintptr_t end = src + src_layout.size;
1324 while (src < end) {
1325 pl_assert(dst < dst + dst_layout.size);
1326 memcpy((void *) dst, (const void *) src, stride);
1327 src += src_layout.stride;
1328 dst += dst_layout.stride;
1329 }
1330 }
1331
pl_desc_namespace(pl_gpu gpu,enum pl_desc_type type)1332 int pl_desc_namespace(pl_gpu gpu, enum pl_desc_type type)
1333 {
1334 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1335 int ret = impl->desc_namespace(gpu, type);
1336 pl_assert(ret >= 0 && ret < PL_DESC_TYPE_COUNT);
1337 return ret;
1338 }
1339
pl_desc_access_glsl_name(enum pl_desc_access mode)1340 const char *pl_desc_access_glsl_name(enum pl_desc_access mode)
1341 {
1342 switch (mode) {
1343 case PL_DESC_ACCESS_READWRITE: return "";
1344 case PL_DESC_ACCESS_READONLY: return "readonly";
1345 case PL_DESC_ACCESS_WRITEONLY: return "writeonly";
1346 case PL_DESC_ACCESS_COUNT: break;
1347 }
1348
1349 pl_unreachable();
1350 }
1351
1352 const struct pl_blend_params pl_alpha_overlay = {
1353 .src_rgb = PL_BLEND_SRC_ALPHA,
1354 .dst_rgb = PL_BLEND_ONE_MINUS_SRC_ALPHA,
1355 .src_alpha = PL_BLEND_ONE,
1356 .dst_alpha = PL_BLEND_ONE_MINUS_SRC_ALPHA,
1357 };
1358
pl_pass_create(pl_gpu gpu,const struct pl_pass_params * params)1359 pl_pass pl_pass_create(pl_gpu gpu, const struct pl_pass_params *params)
1360 {
1361 require(params->glsl_shader);
1362 switch(params->type) {
1363 case PL_PASS_RASTER:
1364 require(params->vertex_shader);
1365 for (int i = 0; i < params->num_vertex_attribs; i++) {
1366 struct pl_vertex_attrib va = params->vertex_attribs[i];
1367 require(va.name);
1368 require(va.fmt);
1369 require(va.fmt->caps & PL_FMT_CAP_VERTEX);
1370 require(va.offset + va.fmt->texel_size <= params->vertex_stride);
1371 }
1372
1373 pl_fmt target_fmt = params->target_dummy.params.format;
1374 require(target_fmt);
1375 require(target_fmt->caps & PL_FMT_CAP_RENDERABLE);
1376 require(!params->blend_params || target_fmt->caps & PL_FMT_CAP_BLENDABLE);
1377 require(!params->blend_params || params->load_target);
1378 break;
1379 case PL_PASS_COMPUTE:
1380 require(gpu->glsl.compute);
1381 break;
1382 case PL_PASS_INVALID:
1383 case PL_PASS_TYPE_COUNT:
1384 pl_unreachable();
1385 }
1386
1387 require(params->num_variables <= gpu->limits.max_variables);
1388 for (int i = 0; i < params->num_variables; i++) {
1389 struct pl_var var = params->variables[i];
1390 require(var.name);
1391 require(pl_var_glsl_type_name(var));
1392 }
1393
1394 require(params->num_constants <= gpu->limits.max_constants);
1395 for (int i = 0; i < params->num_constants; i++)
1396 require(params->constants[i].type);
1397
1398 for (int i = 0; i < params->num_descriptors; i++) {
1399 struct pl_desc desc = params->descriptors[i];
1400 require(desc.name);
1401
1402 // enforce disjoint descriptor bindings for each namespace
1403 int namespace = pl_desc_namespace(gpu, desc.type);
1404 for (int j = i+1; j < params->num_descriptors; j++) {
1405 struct pl_desc other = params->descriptors[j];
1406 require(desc.binding != other.binding ||
1407 namespace != pl_desc_namespace(gpu, other.type));
1408 }
1409 }
1410
1411 require(params->push_constants_size <= gpu->limits.max_pushc_size);
1412 require(params->push_constants_size == PL_ALIGN2(params->push_constants_size, 4));
1413
1414 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1415 return impl->pass_create(gpu, params);
1416
1417 error:
1418 return NULL;
1419 }
1420
pl_pass_destroy(pl_gpu gpu,pl_pass * pass)1421 void pl_pass_destroy(pl_gpu gpu, pl_pass *pass)
1422 {
1423 if (!*pass)
1424 return;
1425
1426 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1427 impl->pass_destroy(gpu, *pass);
1428 *pass = NULL;
1429 }
1430
pl_pass_run(pl_gpu gpu,const struct pl_pass_run_params * params)1431 void pl_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params)
1432 {
1433 pl_pass pass = params->pass;
1434 struct pl_pass_run_params new = *params;
1435
1436 for (int i = 0; i < pass->params.num_descriptors; i++) {
1437 struct pl_desc desc = pass->params.descriptors[i];
1438 struct pl_desc_binding db = params->desc_bindings[i];
1439 require(db.object);
1440 switch (desc.type) {
1441 case PL_DESC_SAMPLED_TEX: {
1442 pl_tex tex = db.object;
1443 pl_fmt fmt = tex->params.format;
1444 require(tex->params.sampleable);
1445 require(db.sample_mode != PL_TEX_SAMPLE_LINEAR || (fmt->caps & PL_FMT_CAP_LINEAR));
1446 break;
1447 }
1448 case PL_DESC_STORAGE_IMG: {
1449 pl_tex tex = db.object;
1450 pl_fmt fmt = tex->params.format;
1451 require(tex->params.storable);
1452 require(desc.access != PL_DESC_ACCESS_READWRITE || (fmt->caps & PL_FMT_CAP_READWRITE));
1453 break;
1454 }
1455 case PL_DESC_BUF_UNIFORM: {
1456 pl_buf buf = db.object;
1457 require(buf->params.uniform);
1458 break;
1459 }
1460 case PL_DESC_BUF_STORAGE: {
1461 pl_buf buf = db.object;
1462 require(buf->params.storable);
1463 break;
1464 }
1465 case PL_DESC_BUF_TEXEL_UNIFORM: {
1466 pl_buf buf = db.object;
1467 require(buf->params.uniform && buf->params.format);
1468 break;
1469 }
1470 case PL_DESC_BUF_TEXEL_STORAGE: {
1471 pl_buf buf = db.object;
1472 pl_fmt fmt = buf->params.format;
1473 require(buf->params.storable && buf->params.format);
1474 require(desc.access != PL_DESC_ACCESS_READWRITE || (fmt->caps & PL_FMT_CAP_READWRITE));
1475 break;
1476 }
1477 case PL_DESC_INVALID:
1478 case PL_DESC_TYPE_COUNT:
1479 pl_unreachable();
1480 }
1481 }
1482
1483 for (int i = 0; i < params->num_var_updates; i++) {
1484 struct pl_var_update vu = params->var_updates[i];
1485 require(vu.index >= 0 && vu.index < pass->params.num_variables);
1486 require(vu.data);
1487 }
1488
1489 require(params->push_constants || !pass->params.push_constants_size);
1490
1491 switch (pass->params.type) {
1492 case PL_PASS_RASTER: {
1493 switch (pass->params.vertex_type) {
1494 case PL_PRIM_TRIANGLE_LIST:
1495 require(params->vertex_count % 3 == 0);
1496 // fall through
1497 case PL_PRIM_TRIANGLE_STRIP:
1498 require(params->vertex_count >= 3);
1499 break;
1500 case PL_PRIM_TYPE_COUNT:
1501 pl_unreachable();
1502 }
1503
1504 require(!params->vertex_data ^ !params->vertex_buf);
1505 if (params->vertex_buf) {
1506 pl_buf vertex_buf = params->vertex_buf;
1507 require(vertex_buf->params.drawable);
1508 if (!params->index_data && !params->index_buf) {
1509 // Cannot bounds check indexed draws
1510 size_t vert_size = params->vertex_count * pass->params.vertex_stride;
1511 require(params->buf_offset + vert_size <= vertex_buf->params.size);
1512 }
1513 }
1514
1515 require(!params->index_data || !params->index_buf);
1516 if (params->index_buf) {
1517 pl_buf index_buf = params->index_buf;
1518 require(!params->vertex_data);
1519 require(index_buf->params.drawable);
1520 size_t index_size = params->vertex_count * sizeof(*params->index_data);
1521 require(params->index_offset + index_size <= index_buf->params.size);
1522 }
1523
1524 pl_tex target = params->target;
1525 require(target);
1526 require(pl_tex_params_dimension(target->params) == 2);
1527 require(target->params.format == pass->params.target_dummy.params.format);
1528 require(target->params.renderable);
1529 struct pl_rect2d *vp = &new.viewport;
1530 struct pl_rect2d *sc = &new.scissors;
1531
1532 // Sanitize viewport/scissors
1533 if (!vp->x0 && !vp->x1)
1534 vp->x1 = target->params.w;
1535 if (!vp->y0 && !vp->y1)
1536 vp->y1 = target->params.h;
1537
1538 if (!sc->x0 && !sc->x1)
1539 sc->x1 = target->params.w;
1540 if (!sc->y0 && !sc->y1)
1541 sc->y1 = target->params.h;
1542
1543 // Constrain the scissors to the target dimension (to sanitize the
1544 // underlying graphics API calls)
1545 sc->x0 = PL_CLAMP(sc->x0, 0, target->params.w);
1546 sc->y0 = PL_CLAMP(sc->y0, 0, target->params.h);
1547 sc->x1 = PL_CLAMP(sc->x1, 0, target->params.w);
1548 sc->y1 = PL_CLAMP(sc->y1, 0, target->params.h);
1549
1550 // Scissors wholly outside target -> silently drop pass (also needed
1551 // to ensure we don't cause UB by specifying invalid scissors)
1552 if (!pl_rect_w(*sc) || !pl_rect_h(*sc))
1553 return;
1554
1555 require(pl_rect_w(*vp) > 0);
1556 require(pl_rect_h(*vp) > 0);
1557 require(pl_rect_w(*sc) > 0);
1558 require(pl_rect_h(*sc) > 0);
1559
1560 if (!pass->params.load_target)
1561 pl_tex_invalidate(gpu, target);
1562 break;
1563 }
1564 case PL_PASS_COMPUTE:
1565 for (int i = 0; i < PL_ARRAY_SIZE(params->compute_groups); i++) {
1566 require(params->compute_groups[i] >= 0);
1567 require(params->compute_groups[i] <= gpu->limits.max_dispatch[i]);
1568 }
1569 break;
1570 case PL_PASS_INVALID:
1571 case PL_PASS_TYPE_COUNT:
1572 pl_unreachable();
1573 }
1574
1575 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1576 impl->pass_run(gpu, &new);
1577
1578 error:
1579 return;
1580 }
1581
pl_gpu_flush(pl_gpu gpu)1582 void pl_gpu_flush(pl_gpu gpu)
1583 {
1584 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1585 if (impl->gpu_flush)
1586 impl->gpu_flush(gpu);
1587 }
1588
pl_gpu_finish(pl_gpu gpu)1589 void pl_gpu_finish(pl_gpu gpu)
1590 {
1591 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1592 impl->gpu_finish(gpu);
1593 }
1594
pl_gpu_is_failed(pl_gpu gpu)1595 bool pl_gpu_is_failed(pl_gpu gpu)
1596 {
1597 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1598 if (!impl->gpu_is_failed)
1599 return false;
1600
1601 return impl->gpu_is_failed(gpu);
1602 }
1603
1604 // GPU-internal helpers
1605
pl_tex_upload_pbo(pl_gpu gpu,const struct pl_tex_transfer_params * params)1606 bool pl_tex_upload_pbo(pl_gpu gpu, const struct pl_tex_transfer_params *params)
1607 {
1608 if (params->buf)
1609 return pl_tex_upload(gpu, params);
1610
1611 pl_buf buf = NULL;
1612 struct pl_buf_params bufparams = {
1613 .size = pl_tex_transfer_size(params),
1614 };
1615
1616 // If we can import host pointers directly, and the function is being used
1617 // asynchronously, then we can use host pointer import to skip a memcpy. In
1618 // the synchronous case, we still force a host memcpy to avoid stalling the
1619 // host until the GPU memcpy completes.
1620 bool can_import = gpu->import_caps.buf & PL_HANDLE_HOST_PTR;
1621 if (can_import && params->callback && bufparams.size > 32*1024) { // 32 KiB
1622 bufparams.import_handle = PL_HANDLE_HOST_PTR;
1623 bufparams.shared_mem = (struct pl_shared_mem) {
1624 .handle.ptr = params->ptr,
1625 .size = bufparams.size,
1626 .offset = 0,
1627 };
1628
1629 // Suppress errors for this test because it may fail, in which case we
1630 // want to silently fall back.
1631 pl_log_level_cap(gpu->log, PL_LOG_DEBUG);
1632 buf = pl_buf_create(gpu, &bufparams);
1633 pl_log_level_cap(gpu->log, PL_LOG_NONE);
1634 }
1635
1636 if (!buf) {
1637 bufparams.import_handle = 0;
1638 bufparams.host_writable = true;
1639 buf = pl_buf_create(gpu, &bufparams);
1640 }
1641
1642 if (!buf)
1643 return false;
1644
1645 if (!bufparams.import_handle)
1646 pl_buf_write(gpu, buf, 0, params->ptr, buf->params.size);
1647
1648 struct pl_tex_transfer_params newparams = *params;
1649 newparams.buf = buf;
1650 newparams.ptr = NULL;
1651
1652 bool ok = pl_tex_upload(gpu, &newparams);
1653 pl_buf_destroy(gpu, &buf);
1654 return ok;
1655 }
1656
1657 struct pbo_cb_ctx {
1658 pl_gpu gpu;
1659 pl_buf buf;
1660 void *ptr;
1661 void (*callback)(void *priv);
1662 void *priv;
1663 };
1664
pbo_download_cb(void * priv)1665 static void pbo_download_cb(void *priv)
1666 {
1667 struct pbo_cb_ctx *p = priv;
1668 pl_buf_read(p->gpu, p->buf, 0, p->ptr, p->buf->params.size);
1669 pl_buf_destroy(p->gpu, &p->buf);
1670
1671 // Run the original callback
1672 p->callback(p->priv);
1673 pl_free(priv);
1674 };
1675
pl_tex_download_pbo(pl_gpu gpu,const struct pl_tex_transfer_params * params)1676 bool pl_tex_download_pbo(pl_gpu gpu, const struct pl_tex_transfer_params *params)
1677 {
1678 if (params->buf)
1679 return pl_tex_download(gpu, params);
1680
1681 pl_buf buf = NULL;
1682 struct pl_buf_params bufparams = {
1683 .size = pl_tex_transfer_size(params),
1684 };
1685
1686 // If we can import host pointers directly, we can avoid an extra memcpy
1687 // (sometimes). In the cases where it isn't avoidable, the extra memcpy
1688 // will happen inside VRAM, which is typically faster anyway.
1689 bool can_import = gpu->import_caps.buf & PL_HANDLE_HOST_PTR;
1690 if (can_import && bufparams.size > 32*1024) { // 32 KiB
1691 bufparams.import_handle = PL_HANDLE_HOST_PTR;
1692 bufparams.shared_mem = (struct pl_shared_mem) {
1693 .handle.ptr = params->ptr,
1694 .size = bufparams.size,
1695 .offset = 0,
1696 };
1697
1698 // Suppress errors for this test because it may fail, in which case we
1699 // want to silently fall back.
1700 pl_log_level_cap(gpu->log, PL_LOG_DEBUG);
1701 buf = pl_buf_create(gpu, &bufparams);
1702 pl_log_level_cap(gpu->log, PL_LOG_NONE);
1703 }
1704
1705 if (!buf) {
1706 // Fallback when host pointer import is not supported
1707 bufparams.import_handle = 0;
1708 bufparams.host_readable = true;
1709 buf = pl_buf_create(gpu, &bufparams);
1710 }
1711
1712 if (!buf)
1713 return false;
1714
1715 struct pl_tex_transfer_params newparams = *params;
1716 newparams.ptr = NULL;
1717 newparams.buf = buf;
1718
1719 // If the transfer is asynchronous, propagate our host read asynchronously
1720 if (params->callback && !bufparams.import_handle) {
1721 newparams.callback = pbo_download_cb;
1722 newparams.priv = pl_alloc_struct(NULL, struct pbo_cb_ctx, {
1723 .gpu = gpu,
1724 .buf = buf,
1725 .ptr = params->ptr,
1726 .callback = params->callback,
1727 .priv = params->priv,
1728 });
1729 }
1730
1731 if (!pl_tex_download(gpu, &newparams)) {
1732 pl_buf_destroy(gpu, &buf);
1733 return false;
1734 }
1735
1736 if (!params->callback) {
1737 while (pl_buf_poll(gpu, buf, 10000000)) // 10 ms
1738 PL_TRACE(gpu, "pl_tex_download: synchronous/blocking (slow path)");
1739 }
1740
1741 bool ok;
1742 if (bufparams.import_handle) {
1743 // Buffer download completion already means the host pointer contains
1744 // the valid data, no more need to copy. (Note: this applies even for
1745 // asynchronous downloads)
1746 ok = true;
1747 pl_buf_destroy(gpu, &buf);
1748 } else if (!params->callback) {
1749 // Synchronous read back to the host pointer
1750 ok = pl_buf_read(gpu, buf, 0, params->ptr, bufparams.size);
1751 pl_buf_destroy(gpu, &buf);
1752 } else {
1753 // Nothing left to do here, the rest will be done by pbo_download_cb
1754 ok = true;
1755 }
1756
1757 return ok;
1758 }
1759
pl_tex_upload_texel(pl_gpu gpu,pl_dispatch dp,const struct pl_tex_transfer_params * params)1760 bool pl_tex_upload_texel(pl_gpu gpu, pl_dispatch dp,
1761 const struct pl_tex_transfer_params *params)
1762 {
1763 const int threads = PL_MIN(256, pl_rect_w(params->rc));
1764 pl_tex tex = params->tex;
1765 pl_fmt fmt = tex->params.format;
1766 require(params->buf);
1767
1768 pl_shader sh = pl_dispatch_begin(dp);
1769 if (!sh_try_compute(sh, threads, 1, false, 0)) {
1770 PL_ERR(gpu, "Failed emulating texture transfer!");
1771 pl_dispatch_abort(dp, &sh);
1772 return false;
1773 }
1774
1775 bool ubo = params->buf->params.uniform;
1776 ident_t buf = sh_desc(sh, (struct pl_shader_desc) {
1777 .binding.object = params->buf,
1778 .desc = {
1779 .name = "data",
1780 .type = ubo ? PL_DESC_BUF_TEXEL_UNIFORM : PL_DESC_BUF_TEXEL_STORAGE,
1781 },
1782 });
1783
1784 ident_t img = sh_desc(sh, (struct pl_shader_desc) {
1785 .binding.object = params->tex,
1786 .desc = {
1787 .name = "image",
1788 .type = PL_DESC_STORAGE_IMG,
1789 .access = PL_DESC_ACCESS_WRITEONLY,
1790 },
1791 });
1792
1793 // If the transfer width is a natural multiple of the thread size, we
1794 // can skip the bounds check. Otherwise, make sure we aren't blitting out
1795 // of the range since this would read out of bounds.
1796 int groups_x = (pl_rect_w(params->rc) + threads - 1) / threads;
1797 if (groups_x * threads != pl_rect_w(params->rc)) {
1798 GLSL("if (gl_GlobalInvocationID.x >= %d) \n"
1799 " return; \n",
1800 pl_rect_w(params->rc));
1801 }
1802
1803 GLSL("vec4 color = vec4(0.0); \n"
1804 "ivec3 pos = ivec3(gl_GlobalInvocationID) + ivec3(%d, %d, %d); \n"
1805 "int base = ((pos.z * %d + pos.y) * %d + pos.x) * %d; \n",
1806 params->rc.x0, params->rc.y0, params->rc.z0,
1807 params->stride_h, params->stride_w, fmt->num_components);
1808
1809 for (int i = 0; i < fmt->num_components; i++) {
1810 GLSL("color[%d] = %s(%s, base + %d).r; \n",
1811 i, ubo ? "texelFetch" : "imageLoad", buf, i);
1812 }
1813
1814 int dims = pl_tex_params_dimension(tex->params);
1815 static const char *coord_types[] = {
1816 [1] = "int",
1817 [2] = "ivec2",
1818 [3] = "ivec3",
1819 };
1820
1821 GLSL("imageStore(%s, %s(pos), color);\n", img, coord_types[dims]);
1822 return pl_dispatch_compute(dp, &(struct pl_dispatch_compute_params) {
1823 .shader = &sh,
1824 .dispatch_size = {
1825 groups_x,
1826 pl_rect_h(params->rc),
1827 pl_rect_d(params->rc),
1828 },
1829 });
1830
1831 error:
1832 return false;
1833 }
1834
pl_tex_download_texel(pl_gpu gpu,pl_dispatch dp,const struct pl_tex_transfer_params * params)1835 bool pl_tex_download_texel(pl_gpu gpu, pl_dispatch dp,
1836 const struct pl_tex_transfer_params *params)
1837 {
1838 const int threads = PL_MIN(256, pl_rect_w(params->rc));
1839 pl_tex tex = params->tex;
1840 pl_fmt fmt = tex->params.format;
1841 require(params->buf);
1842
1843 pl_shader sh = pl_dispatch_begin(dp);
1844 if (!sh_try_compute(sh, threads, 1, false, 0)) {
1845 PL_ERR(gpu, "Failed emulating texture transfer!");
1846 pl_dispatch_abort(dp, &sh);
1847 return false;
1848 }
1849
1850 ident_t buf = sh_desc(sh, (struct pl_shader_desc) {
1851 .binding.object = params->buf,
1852 .desc = {
1853 .name = "data",
1854 .type = PL_DESC_BUF_TEXEL_STORAGE,
1855 },
1856 });
1857
1858 ident_t img = sh_desc(sh, (struct pl_shader_desc) {
1859 .binding.object = params->tex,
1860 .desc = {
1861 .name = "image",
1862 .type = PL_DESC_STORAGE_IMG,
1863 .access = PL_DESC_ACCESS_READONLY,
1864 },
1865 });
1866
1867 int groups_x = (pl_rect_w(params->rc) + threads - 1) / threads;
1868 if (groups_x * threads != pl_rect_w(params->rc)) {
1869 GLSL("if (gl_GlobalInvocationID.x >= %d) \n"
1870 " return; \n",
1871 pl_rect_w(params->rc));
1872 }
1873
1874 int dims = pl_tex_params_dimension(tex->params);
1875 static const char *coord_types[] = {
1876 [1] = "int",
1877 [2] = "ivec2",
1878 [3] = "ivec3",
1879 };
1880
1881 GLSL("ivec3 pos = ivec3(gl_GlobalInvocationID) + ivec3(%d, %d, %d); \n"
1882 "int base = ((pos.z * %d + pos.y) * %d + pos.x) * %d; \n"
1883 "vec4 color = imageLoad(%s, %s(pos)); \n",
1884 params->rc.x0, params->rc.y0, params->rc.z0,
1885 params->stride_h, params->stride_w, fmt->num_components,
1886 img, coord_types[dims]);
1887
1888 for (int i = 0; i < fmt->num_components; i++)
1889 GLSL("imageStore(%s, base + %d, vec4(color[%d])); \n", buf, i, i);
1890
1891 return pl_dispatch_compute(dp, &(struct pl_dispatch_compute_params) {
1892 .shader = &sh,
1893 .dispatch_size = {
1894 groups_x,
1895 pl_rect_h(params->rc),
1896 pl_rect_d(params->rc),
1897 },
1898 });
1899
1900 error:
1901 return false;
1902 }
1903
pl_tex_blit_compute(pl_gpu gpu,pl_dispatch dp,const struct pl_tex_blit_params * params)1904 bool pl_tex_blit_compute(pl_gpu gpu, pl_dispatch dp,
1905 const struct pl_tex_blit_params *params)
1906 {
1907 if (!params->src->params.storable || !params->dst->params.storable)
1908 return false;
1909
1910 // Normalize `dst_rc`, moving all flipping to `src_rc` instead.
1911 struct pl_rect3d src_rc = params->src_rc;
1912 struct pl_rect3d dst_rc = params->dst_rc;
1913 if (pl_rect_w(dst_rc) < 0) {
1914 PL_SWAP(src_rc.x0, src_rc.x1);
1915 PL_SWAP(dst_rc.x0, dst_rc.x1);
1916 }
1917 if (pl_rect_h(dst_rc) < 0) {
1918 PL_SWAP(src_rc.y0, src_rc.y1);
1919 PL_SWAP(dst_rc.y0, dst_rc.y1);
1920 }
1921 if (pl_rect_d(dst_rc) < 0) {
1922 PL_SWAP(src_rc.z0, src_rc.z1);
1923 PL_SWAP(dst_rc.z0, dst_rc.z1);
1924 }
1925
1926 bool needs_scaling = false;
1927 needs_scaling |= pl_rect_w(dst_rc) != abs(pl_rect_w(src_rc));
1928 needs_scaling |= pl_rect_h(dst_rc) != abs(pl_rect_h(src_rc));
1929 needs_scaling |= pl_rect_d(dst_rc) != abs(pl_rect_d(src_rc));
1930
1931 // Manual trilinear interpolation would be too slow to justify
1932 bool needs_sampling = needs_scaling && params->sample_mode != PL_TEX_SAMPLE_NEAREST;
1933 if (needs_sampling && !params->src->params.sampleable)
1934 return false;
1935
1936 const int threads = 256;
1937 int bw = PL_MIN(32, pl_rect_w(dst_rc));
1938 int bh = PL_MIN(threads / bw, pl_rect_h(dst_rc));
1939 pl_shader sh = pl_dispatch_begin(dp);
1940 if (!sh_try_compute(sh, bw, bh, false, 0)) {
1941 pl_dispatch_abort(dp, &sh);
1942 return false;
1943 }
1944
1945 // Avoid over-writing into `dst`
1946 int groups_x = (pl_rect_w(dst_rc) + bw - 1) / bw;
1947 if (groups_x * bw != pl_rect_w(dst_rc)) {
1948 GLSL("if (gl_GlobalInvocationID.x >= %d) \n"
1949 " return; \n",
1950 pl_rect_w(dst_rc));
1951 }
1952
1953 int groups_y = (pl_rect_h(dst_rc) + bh - 1) / bh;
1954 if (groups_y * bh != pl_rect_h(dst_rc)) {
1955 GLSL("if (gl_GlobalInvocationID.y >= %d) \n"
1956 " return; \n",
1957 pl_rect_h(dst_rc));
1958 }
1959
1960 ident_t dst = sh_desc(sh, (struct pl_shader_desc) {
1961 .binding.object = params->dst,
1962 .desc = {
1963 .name = "dst",
1964 .type = PL_DESC_STORAGE_IMG,
1965 .access = PL_DESC_ACCESS_WRITEONLY,
1966 },
1967 });
1968
1969 static const char *vecs[] = {
1970 [1] = "float",
1971 [2] = "vec2",
1972 [3] = "vec3",
1973 [4] = "vec4",
1974 };
1975
1976 static const char *ivecs[] = {
1977 [1] = "int",
1978 [2] = "ivec2",
1979 [3] = "ivec3",
1980 [4] = "ivec4",
1981 };
1982
1983 int src_dims = pl_tex_params_dimension(params->src->params);
1984 int dst_dims = pl_tex_params_dimension(params->dst->params);
1985 GLSL("const ivec3 pos = ivec3(gl_GlobalInvocationID); \n"
1986 "%s dst_pos = %s(pos + ivec3(%d, %d, %d)); \n",
1987 ivecs[dst_dims], ivecs[dst_dims],
1988 params->dst_rc.x0, params->dst_rc.y0, params->dst_rc.z0);
1989
1990 if (needs_sampling || (needs_scaling && params->src->params.sampleable)) {
1991
1992 ident_t src = sh_desc(sh, (struct pl_shader_desc) {
1993 .desc = {
1994 .name = "src",
1995 .type = PL_DESC_SAMPLED_TEX,
1996 },
1997 .binding = {
1998 .object = params->src,
1999 .address_mode = PL_TEX_ADDRESS_CLAMP,
2000 .sample_mode = params->sample_mode,
2001 }
2002 });
2003
2004 GLSL("vec3 fpos = (vec3(pos) + vec3(0.5)) / vec3(%d.0, %d.0, %d.0); \n"
2005 "%s src_pos = %s(0.5); \n"
2006 "src_pos.x = mix(%f, %f, fpos.x); \n",
2007 pl_rect_w(dst_rc), pl_rect_h(dst_rc), pl_rect_d(dst_rc),
2008 vecs[src_dims], vecs[src_dims],
2009 (float) src_rc.x0 / params->src->params.w,
2010 (float) src_rc.x1 / params->src->params.w);
2011
2012 if (params->src->params.h) {
2013 GLSL("src_pos.y = mix(%f, %f, fpos.y); \n",
2014 (float) src_rc.y0 / params->src->params.h,
2015 (float) src_rc.y1 / params->src->params.h);
2016 }
2017
2018 if (params->src->params.d) {
2019 GLSL("src_pos.z = mix(%f, %f, fpos.z); \n",
2020 (float) src_rc.z0 / params->src->params.d,
2021 (float) src_rc.z1 / params->src->params.d);
2022 }
2023
2024 GLSL("imageStore(%s, dst_pos, %s(%s, src_pos)); \n",
2025 dst, sh_tex_fn(sh, params->src->params), src);
2026
2027 } else {
2028
2029 ident_t src = sh_desc(sh, (struct pl_shader_desc) {
2030 .binding.object = params->src,
2031 .desc = {
2032 .name = "src",
2033 .type = PL_DESC_STORAGE_IMG,
2034 .access = PL_DESC_ACCESS_READONLY,
2035 },
2036 });
2037
2038 if (needs_scaling) {
2039 GLSL("ivec3 src_pos = ivec3(round(vec3(%f, %f, %f) * vec3(pos))); \n",
2040 fabs((float) pl_rect_w(src_rc) / pl_rect_w(dst_rc)),
2041 fabs((float) pl_rect_h(src_rc) / pl_rect_h(dst_rc)),
2042 fabs((float) pl_rect_d(src_rc) / pl_rect_d(dst_rc)));
2043 } else {
2044 GLSL("ivec3 src_pos = pos; \n");
2045 }
2046
2047 GLSL("src_pos = ivec3(%d, %d, %d) * src_pos + ivec3(%d, %d, %d); \n"
2048 "imageStore(%s, dst_pos, imageLoad(%s, %s(src_pos))); \n",
2049 src_rc.x1 < src_rc.x0 ? -1 : 1,
2050 src_rc.y1 < src_rc.y0 ? -1 : 1,
2051 src_rc.z1 < src_rc.z0 ? -1 : 1,
2052 src_rc.x0, src_rc.y0, src_rc.z0,
2053 dst, src, ivecs[src_dims]);
2054
2055 }
2056
2057 return pl_dispatch_compute(dp, &(struct pl_dispatch_compute_params) {
2058 .shader = &sh,
2059 .dispatch_size = {
2060 groups_x,
2061 groups_y,
2062 pl_rect_d(dst_rc),
2063 },
2064 });
2065 }
2066
pl_tex_blit_raster(pl_gpu gpu,pl_dispatch dp,const struct pl_tex_blit_params * params)2067 void pl_tex_blit_raster(pl_gpu gpu, pl_dispatch dp,
2068 const struct pl_tex_blit_params *params)
2069 {
2070 enum pl_fmt_type src_type = params->src->params.format->type;
2071 enum pl_fmt_type dst_type = params->dst->params.format->type;
2072
2073 // Only for 2D textures
2074 pl_assert(params->src->params.h && !params->src->params.d);
2075 pl_assert(params->dst->params.h && !params->dst->params.d);
2076
2077 // Integer textures are not supported
2078 pl_assert(src_type != PL_FMT_UINT && src_type != PL_FMT_SINT);
2079 pl_assert(dst_type != PL_FMT_UINT && dst_type != PL_FMT_SINT);
2080
2081 struct pl_rect2df src_rc = {
2082 .x0 = params->src_rc.x0, .x1 = params->src_rc.x1,
2083 .y0 = params->src_rc.y0, .y1 = params->src_rc.y1,
2084 };
2085 struct pl_rect2d dst_rc = {
2086 .x0 = params->dst_rc.x0, .x1 = params->dst_rc.x1,
2087 .y0 = params->dst_rc.y0, .y1 = params->dst_rc.y1,
2088 };
2089
2090 pl_shader sh = pl_dispatch_begin(dp);
2091 sh->res.output = PL_SHADER_SIG_COLOR;
2092
2093 ident_t pos, src = sh_bind(sh, params->src, PL_TEX_ADDRESS_CLAMP,
2094 params->sample_mode, "src_tex", &src_rc, &pos, NULL, NULL);
2095
2096 GLSL("vec4 color = %s(%s, %s); \n",
2097 sh_tex_fn(sh, params->src->params), src, pos);
2098
2099 pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
2100 .shader = &sh,
2101 .target = params->dst,
2102 .rect = dst_rc,
2103 });
2104 }
2105
pl_pass_run_vbo(pl_gpu gpu,const struct pl_pass_run_params * params)2106 void pl_pass_run_vbo(pl_gpu gpu, const struct pl_pass_run_params *params)
2107 {
2108 if (!params->vertex_data && !params->index_data)
2109 return pl_pass_run(gpu, params);
2110
2111 struct pl_pass_run_params newparams = *params;
2112 pl_buf vert = NULL, index = NULL;
2113
2114 if (params->vertex_data) {
2115 int num_vertices = 0;
2116 if (params->index_data) {
2117 // Indexed draw, so we need to store all indexed vertices
2118 for (int i = 0; i < params->vertex_count; i++)
2119 num_vertices = PL_MAX(num_vertices, params->index_data[i]);
2120 num_vertices += 1;
2121 } else {
2122 num_vertices = params->vertex_count;
2123 }
2124
2125 vert = pl_buf_create(gpu, &(struct pl_buf_params) {
2126 .size = num_vertices * params->pass->params.vertex_stride,
2127 .initial_data = params->vertex_data,
2128 .drawable = true,
2129 });
2130
2131 if (!vert) {
2132 PL_ERR(gpu, "Failed allocating vertex buffer!");
2133 return;
2134 }
2135
2136 newparams.vertex_buf = vert;
2137 newparams.vertex_data = NULL;
2138 }
2139
2140 if (params->index_data) {
2141 index = pl_buf_create(gpu, &(struct pl_buf_params) {
2142 .size = params->vertex_count * sizeof(*params->index_data),
2143 .initial_data = params->index_data,
2144 .drawable = true,
2145 });
2146
2147 if (!index) {
2148 PL_ERR(gpu, "Failed allocating index buffer!");
2149 return;
2150 }
2151
2152 newparams.index_buf = index;
2153 newparams.index_data = NULL;
2154 }
2155
2156 pl_pass_run(gpu, &newparams);
2157 pl_buf_destroy(gpu, &vert);
2158 pl_buf_destroy(gpu, &index);
2159 }
2160
pl_pass_params_copy(void * alloc,const struct pl_pass_params * params)2161 struct pl_pass_params pl_pass_params_copy(void *alloc, const struct pl_pass_params *params)
2162 {
2163 struct pl_pass_params new = *params;
2164 new.cached_program = NULL;
2165 new.cached_program_len = 0;
2166
2167 new.glsl_shader = pl_str0dup0(alloc, new.glsl_shader);
2168 new.vertex_shader = pl_str0dup0(alloc, new.vertex_shader);
2169 if (new.blend_params)
2170 new.blend_params = pl_memdup_ptr(alloc, new.blend_params);
2171
2172 #define DUPNAMES(field) \
2173 do { \
2174 size_t _size = new.num_##field * sizeof(new.field[0]); \
2175 new.field = pl_memdup(alloc, new.field, _size); \
2176 for (int j = 0; j < new.num_##field; j++) \
2177 new.field[j].name = pl_str0dup0(alloc, new.field[j].name); \
2178 } while (0)
2179
2180 DUPNAMES(variables);
2181 DUPNAMES(descriptors);
2182 DUPNAMES(vertex_attribs);
2183
2184 #undef DUPNAMES
2185
2186 new.constant_data = NULL;
2187 new.constants = pl_memdup(alloc, new.constants,
2188 new.num_constants * sizeof(new.constants[0]));
2189
2190 return new;
2191 }
2192
pl_sync_create(pl_gpu gpu,enum pl_handle_type handle_type)2193 pl_sync pl_sync_create(pl_gpu gpu, enum pl_handle_type handle_type)
2194 {
2195 require(handle_type);
2196 require(handle_type & gpu->export_caps.sync);
2197 require(PL_ISPOT(handle_type));
2198
2199 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
2200 return impl->sync_create(gpu, handle_type);
2201
2202 error:
2203 return NULL;
2204 }
2205
pl_sync_destroy(pl_gpu gpu,pl_sync * sync)2206 void pl_sync_destroy(pl_gpu gpu, pl_sync *sync)
2207 {
2208 if (!*sync)
2209 return;
2210
2211 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
2212 impl->sync_destroy(gpu, *sync);
2213 *sync = NULL;
2214 }
2215
pl_tex_export(pl_gpu gpu,pl_tex tex,pl_sync sync)2216 bool pl_tex_export(pl_gpu gpu, pl_tex tex, pl_sync sync)
2217 {
2218 require(tex->params.import_handle || tex->params.export_handle);
2219
2220 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
2221 return impl->tex_export(gpu, tex, sync);
2222
2223 error:
2224 return false;
2225 }
2226
pl_timer_create(pl_gpu gpu)2227 pl_timer pl_timer_create(pl_gpu gpu)
2228 {
2229 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
2230 if (!impl->timer_create)
2231 return NULL;
2232
2233 return impl->timer_create(gpu);
2234 }
2235
pl_timer_destroy(pl_gpu gpu,pl_timer * timer)2236 void pl_timer_destroy(pl_gpu gpu, pl_timer *timer)
2237 {
2238 if (!*timer)
2239 return;
2240
2241 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
2242 impl->timer_destroy(gpu, *timer);
2243 *timer = NULL;
2244 }
2245
pl_timer_query(pl_gpu gpu,pl_timer timer)2246 uint64_t pl_timer_query(pl_gpu gpu, pl_timer timer)
2247 {
2248 if (!timer)
2249 return 0;
2250
2251 const struct pl_gpu_fns *impl = PL_PRIV(gpu);
2252 return impl->timer_query(gpu, timer);
2253 }
2254
print_uuid(char buf[3* UUID_SIZE],const uint8_t uuid[UUID_SIZE])2255 const char *print_uuid(char buf[3 * UUID_SIZE], const uint8_t uuid[UUID_SIZE])
2256 {
2257 static const char *hexdigits = "0123456789ABCDEF";
2258 for (int i = 0; i < UUID_SIZE; i++) {
2259 uint8_t x = uuid[i];
2260 buf[3 * i + 0] = hexdigits[x >> 4];
2261 buf[3 * i + 1] = hexdigits[x & 0xF];
2262 buf[3 * i + 2] = i == UUID_SIZE - 1 ? '\0' : ':';
2263 }
2264
2265 return buf;
2266 }
2267
print_drm_mod(char buf[DRM_MOD_SIZE],uint64_t mod)2268 const char *print_drm_mod(char buf[DRM_MOD_SIZE], uint64_t mod)
2269 {
2270 switch (mod) {
2271 case DRM_FORMAT_MOD_LINEAR: return "LINEAR";
2272 case DRM_FORMAT_MOD_INVALID: return "INVALID";
2273 }
2274
2275 uint8_t vendor = mod >> 56;
2276 uint64_t val = mod & ((1ULL << 56) - 1);
2277
2278 const char *name = NULL;
2279 switch (vendor) {
2280 case 0x00: name = "NONE"; break;
2281 case 0x01: name = "INTEL"; break;
2282 case 0x02: name = "AMD"; break;
2283 case 0x03: name = "NVIDIA"; break;
2284 case 0x04: name = "SAMSUNG"; break;
2285 case 0x08: name = "ARM"; break;
2286 }
2287
2288 if (name) {
2289 snprintf(buf, DRM_MOD_SIZE, "%s 0x%"PRIx64, name, val);
2290 } else {
2291 snprintf(buf, DRM_MOD_SIZE, "0x%02x 0x%"PRIx64, vendor, val);
2292 }
2293
2294 return buf;
2295 }
2296