1 /*
2  * This file is part of libplacebo.
3  *
4  * libplacebo is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * libplacebo is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include <math.h>
19 
20 #include "common.h"
21 #include "log.h"
22 #include "shaders.h"
23 #include "gpu.h"
24 
25 #define require(expr)                                           \
26   do {                                                          \
27       if (!(expr)) {                                            \
28           PL_ERR(gpu, "Validation failed: %s (%s:%d)",          \
29                   #expr, __FILE__, __LINE__);                   \
30           goto error;                                           \
31       }                                                         \
32   } while (0)
33 
pl_optimal_transfer_stride(pl_gpu gpu,int dimension)34 int pl_optimal_transfer_stride(pl_gpu gpu, int dimension)
35 {
36     return PL_ALIGN2(dimension, gpu->limits.align_tex_xfer_stride);
37 }
38 
pl_gpu_destroy(pl_gpu gpu)39 void pl_gpu_destroy(pl_gpu gpu)
40 {
41     if (!gpu)
42         return;
43 
44     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
45     impl->destroy(gpu);
46 }
47 
pl_fmt_is_ordered(pl_fmt fmt)48 bool pl_fmt_is_ordered(pl_fmt fmt)
49 {
50     bool ret = !fmt->opaque;
51     for (int i = 0; i < fmt->num_components; i++)
52         ret &= fmt->sample_order[i] == i;
53     return ret;
54 }
55 
pl_fmt_is_float(pl_fmt fmt)56 bool pl_fmt_is_float(pl_fmt fmt)
57 {
58     switch (fmt->type) {
59     case PL_FMT_UNKNOWN: // more likely than not
60     case PL_FMT_FLOAT:
61     case PL_FMT_UNORM:
62     case PL_FMT_SNORM:
63         return true;
64 
65     case PL_FMT_UINT:
66     case PL_FMT_SINT:
67         return false;
68 
69     case PL_FMT_TYPE_COUNT:
70         break;
71     }
72 
73     pl_unreachable();
74 }
75 
cmp_fmt(const void * pa,const void * pb)76 static int cmp_fmt(const void *pa, const void *pb)
77 {
78     pl_fmt a = *(pl_fmt *)pa;
79     pl_fmt b = *(pl_fmt *)pb;
80 
81     // Always prefer non-opaque formats
82     if (a->opaque != b->opaque)
83         return PL_CMP(a->opaque, b->opaque);
84 
85     // Always prefer non-emulated formats
86     if (a->emulated != b->emulated)
87         return PL_CMP(a->emulated, b->emulated);
88 
89     int ca = __builtin_popcount(a->caps),
90         cb = __builtin_popcount(b->caps);
91     if (ca != cb)
92         return -PL_CMP(ca, cb); // invert to sort higher values first
93 
94     // If the population count is the same but the caps are different, prefer
95     // the caps with a "lower" value (which tend to be more fundamental caps)
96     if (a->caps != b->caps)
97         return PL_CMP(a->caps, b->caps);
98 
99     // If the capabilities are equal, sort based on the component attributes
100     for (int i = 0; i < PL_ARRAY_SIZE(a->component_depth); i++) {
101         int da = a->component_depth[i],
102             db = b->component_depth[i];
103         if (da != db)
104             return PL_CMP(da, db);
105 
106         int ha = a->host_bits[i],
107             hb = b->host_bits[i];
108         if (ha != hb)
109             return PL_CMP(ha, hb);
110 
111         int oa = a->sample_order[i],
112             ob = b->sample_order[i];
113         if (oa != ob)
114             return PL_CMP(oa, ob);
115     }
116 
117     // Fall back to sorting by the name (for stability)
118     return strcmp(a->name, b->name);
119 }
120 
121 #define FMT_BOOL(letter, cap) ((cap) ? (letter) : '-')
122 #define FMT_IDX4(f) (f)[0], (f)[1], (f)[2], (f)[3]
123 
print_formats(pl_gpu gpu)124 static void print_formats(pl_gpu gpu)
125 {
126     if (!pl_msg_test(gpu->log, PL_LOG_DEBUG))
127         return;
128 
129 #define CAP_HEADER "%-12s"
130 #define CAP_FIELDS "%c%c%c%c%c%c%c%c%c%c%c%c"
131 #define CAP_VALUES \
132     FMT_BOOL('S', fmt->caps & PL_FMT_CAP_SAMPLEABLE),       \
133     FMT_BOOL('s', fmt->caps & PL_FMT_CAP_STORABLE),         \
134     FMT_BOOL('L', fmt->caps & PL_FMT_CAP_LINEAR),           \
135     FMT_BOOL('R', fmt->caps & PL_FMT_CAP_RENDERABLE),       \
136     FMT_BOOL('b', fmt->caps & PL_FMT_CAP_BLENDABLE),        \
137     FMT_BOOL('B', fmt->caps & PL_FMT_CAP_BLITTABLE),        \
138     FMT_BOOL('V', fmt->caps & PL_FMT_CAP_VERTEX),           \
139     FMT_BOOL('u', fmt->caps & PL_FMT_CAP_TEXEL_UNIFORM),    \
140     FMT_BOOL('t', fmt->caps & PL_FMT_CAP_TEXEL_STORAGE),    \
141     FMT_BOOL('H', fmt->caps & PL_FMT_CAP_HOST_READABLE),    \
142     FMT_BOOL('W', fmt->caps & PL_FMT_CAP_READWRITE),        \
143     FMT_BOOL('G', fmt->gatherable)
144 
145     PL_DEBUG(gpu,  "GPU texture formats:");
146     PL_DEBUG(gpu,  "    %-10s %-6s %-4s %-4s " CAP_HEADER " %-3s %-13s %-13s %-10s %-10s %-6s",
147             "NAME", "TYPE", "SIZE", "COMP", "CAPS", "EMU", "DEPTH", "HOST_BITS",
148             "GLSL_TYPE", "GLSL_FMT", "FOURCC");
149     for (int n = 0; n < gpu->num_formats; n++) {
150         pl_fmt fmt = gpu->formats[n];
151 
152         static const char *types[] = {
153             [PL_FMT_UNKNOWN] = "UNKNOWN",
154             [PL_FMT_UNORM]   = "UNORM",
155             [PL_FMT_SNORM]   = "SNORM",
156             [PL_FMT_UINT]    = "UINT",
157             [PL_FMT_SINT]    = "SINT",
158             [PL_FMT_FLOAT]   = "FLOAT",
159         };
160 
161         static const char idx_map[4] = {'R', 'G', 'B', 'A'};
162         char indices[4] = {' ', ' ', ' ', ' '};
163         if (!fmt->opaque) {
164             for (int i = 0; i < fmt->num_components; i++)
165                 indices[i] = idx_map[fmt->sample_order[i]];
166         }
167 
168 
169         PL_DEBUG(gpu, "    %-10s %-6s %-4zu %c%c%c%c " CAP_FIELDS " %-3s "
170                  "{%-2d %-2d %-2d %-2d} {%-2d %-2d %-2d %-2d} %-10s %-10s %-6s",
171                  fmt->name, types[fmt->type], fmt->texel_size,
172                  FMT_IDX4(indices), CAP_VALUES, fmt->emulated ? "y" : "n",
173                  FMT_IDX4(fmt->component_depth), FMT_IDX4(fmt->host_bits),
174                  PL_DEF(fmt->glsl_type, ""), PL_DEF(fmt->glsl_format, ""),
175                  PRINT_FOURCC(fmt->fourcc));
176 
177 #undef CAP_HEADER
178 #undef CAP_FIELDS
179 #undef CAP_VALUES
180 
181         for (int i = 0; i < fmt->num_modifiers; i++) {
182             PL_TRACE(gpu, "        modifiers[%d]: %s",
183                      i, PRINT_DRM_MOD(fmt->modifiers[i]));
184         }
185     }
186 }
187 
pl_gpu_finalize(struct pl_gpu * gpu)188 pl_gpu pl_gpu_finalize(struct pl_gpu *gpu)
189 {
190     // Sort formats
191     qsort(gpu->formats, gpu->num_formats, sizeof(pl_fmt), cmp_fmt);
192 
193     // Verification
194     pl_assert(gpu->ctx == gpu->log);
195     pl_assert(gpu->limits.max_tex_2d_dim);
196     pl_assert(gpu->limits.max_variables || gpu->limits.max_ubo_size);
197 
198     for (int n = 0; n < gpu->num_formats; n++) {
199         pl_fmt fmt = gpu->formats[n];
200         pl_assert(fmt->name);
201         pl_assert(fmt->type);
202         pl_assert(fmt->num_components);
203         pl_assert(fmt->internal_size);
204         pl_assert(fmt->opaque ? !fmt->texel_size : fmt->texel_size);
205         pl_assert(!fmt->gatherable || (fmt->caps & PL_FMT_CAP_SAMPLEABLE));
206         for (int i = 0; i < fmt->num_components; i++) {
207             pl_assert(fmt->component_depth[i]);
208             pl_assert(fmt->opaque ? !fmt->host_bits[i] : fmt->host_bits[i]);
209         }
210 
211         enum pl_fmt_caps texel_caps = PL_FMT_CAP_VERTEX |
212                                       PL_FMT_CAP_TEXEL_UNIFORM |
213                                       PL_FMT_CAP_TEXEL_STORAGE;
214 
215         if (fmt->caps & texel_caps) {
216             pl_assert(fmt->glsl_type);
217             pl_assert(!fmt->opaque);
218         }
219         pl_assert(!fmt->opaque || !(fmt->caps & PL_FMT_CAP_HOST_READABLE));
220         if (fmt->internal_size != fmt->texel_size && !fmt->opaque)
221             pl_assert(fmt->emulated);
222 
223         // Assert uniqueness of name
224         for (int o = n + 1; o < gpu->num_formats; o++)
225             pl_assert(strcmp(fmt->name, gpu->formats[o]->name) != 0);
226     }
227 
228     // Print info
229     PL_INFO(gpu, "GPU information:");
230 
231 #define LOG(fmt, field) \
232     PL_INFO(gpu, "      %-26s %" fmt, #field ":", gpu->LOG_STRUCT.field)
233 
234 #define LOG_STRUCT glsl
235     PL_INFO(gpu, "    GLSL version: %d%s", gpu->glsl.version,
236            gpu->glsl.vulkan ? " (vulkan)" : gpu->glsl.gles ? " es" : "");
237     if (gpu->glsl.compute) {
238         LOG("zu", max_shmem_size);
239         LOG(PRIu32, max_group_threads);
240         LOG(PRIu32, max_group_size[0]);
241         LOG(PRIu32, max_group_size[1]);
242         LOG(PRIu32, max_group_size[2]);
243     }
244     LOG(PRIu32, subgroup_size);
245     LOG(PRIi16, min_gather_offset);
246     LOG(PRIi16, max_gather_offset);
247 #undef LOG_STRUCT
248 
249 #define LOG_STRUCT limits
250     PL_INFO(gpu, "    Limits:");
251     // pl_gpu
252     LOG("d", thread_safe);
253     LOG("d", callbacks);
254     // pl_buf
255     LOG("zu", max_buf_size);
256     LOG("zu", max_ubo_size);
257     LOG("zu", max_ssbo_size);
258     LOG("zu", max_vbo_size);
259     LOG("zu", max_mapped_size);
260     LOG(PRIu64, max_buffer_texels);
261     LOG("zu", align_host_ptr);
262     // pl_tex
263     LOG(PRIu32, max_tex_1d_dim);
264     LOG(PRIu32, max_tex_2d_dim);
265     LOG(PRIu32, max_tex_3d_dim);
266     LOG("d", blittable_1d_3d);
267     LOG("d", buf_transfer);
268     LOG(PRIu32, align_tex_xfer_stride);
269     LOG("zu", align_tex_xfer_offset);
270     // pl_pass
271     LOG("zu", max_variables);
272     LOG("zu", max_constants);
273     LOG("zu", max_pushc_size);
274     if (gpu->glsl.compute) {
275         LOG(PRIu32, max_dispatch[0]);
276         LOG(PRIu32, max_dispatch[1]);
277         LOG(PRIu32, max_dispatch[2]);
278     }
279     LOG(PRIu32, fragment_queues);
280     LOG(PRIu32, compute_queues);
281 #undef LOG_STRUCT
282 #undef LOG
283 
284     if (pl_gpu_supports_interop(gpu)) {
285         PL_INFO(gpu, "    External API interop:");
286 
287         PL_INFO(gpu, "      UUID: %s", PRINT_UUID(gpu->uuid));
288         PL_INFO(gpu, "      PCI: %04x:%02x:%02x:%x",
289                 gpu->pci.domain, gpu->pci.bus, gpu->pci.device, gpu->pci.function);
290         PL_INFO(gpu, "      buf export caps: 0x%x",
291                 (unsigned int) gpu->export_caps.buf);
292         PL_INFO(gpu, "      buf import caps: 0x%x",
293                 (unsigned int) gpu->import_caps.buf);
294         PL_INFO(gpu, "      tex export caps: 0x%x",
295                 (unsigned int) gpu->export_caps.tex);
296         PL_INFO(gpu, "      tex import caps: 0x%x",
297                 (unsigned int) gpu->import_caps.tex);
298         PL_INFO(gpu, "      sync export caps: 0x%x",
299                 (unsigned int) gpu->export_caps.sync);
300         PL_INFO(gpu, "      sync import caps: 0x%x",
301                 (unsigned int) gpu->import_caps.sync);
302     }
303 
304     print_formats(gpu);
305 
306     // Set `gpu->caps` for backwards compatibility
307     pl_gpu_caps caps = 0;
308     if (gpu->glsl.compute)
309         caps |= PL_GPU_CAP_COMPUTE;
310     if (gpu->limits.compute_queues > gpu->limits.fragment_queues)
311         caps |= PL_GPU_CAP_PARALLEL_COMPUTE;
312     if (gpu->limits.max_variables)
313         caps |= PL_GPU_CAP_INPUT_VARIABLES;
314     if (gpu->limits.max_mapped_size)
315         caps |= PL_GPU_CAP_MAPPED_BUFFERS;
316     if (gpu->limits.blittable_1d_3d)
317         caps |= PL_GPU_CAP_BLITTABLE_1D_3D;
318     if (gpu->glsl.subgroup_size)
319         caps |= PL_GPU_CAP_SUBGROUPS;
320     if (gpu->limits.callbacks)
321         caps |= PL_GPU_CAP_CALLBACKS;
322     if (gpu->limits.thread_safe)
323         caps |= PL_GPU_CAP_THREAD_SAFE;
324     if (gpu->limits.max_constants)
325         caps |= PL_GPU_CAP_SPEC_CONSTANTS;
326     gpu->caps = caps;
327 
328     // Set the backwards compatibility fields in `limits`
329     gpu->limits.max_shmem_size = gpu->glsl.max_shmem_size;
330     gpu->limits.max_group_threads = gpu->glsl.max_group_threads;
331     for (int i = 0; i < 3; i++)
332         gpu->limits.max_group_size[i] = gpu->glsl.max_group_size[i];
333     gpu->limits.subgroup_size = gpu->glsl.subgroup_size;
334     gpu->limits.min_gather_offset = gpu->glsl.min_gather_offset;
335     gpu->limits.max_gather_offset = gpu->glsl.max_gather_offset;
336 
337     return gpu;
338 }
339 
340 struct glsl_fmt {
341     enum pl_fmt_type type;
342     int num_components;
343     int depth[4];
344     const char *glsl_format;
345     uint32_t drm_fourcc;
346 };
347 
348 // List taken from the GLSL specification. (Yes, GLSL supports only exactly
349 // these formats with exactly these names)
350 static const struct glsl_fmt pl_glsl_fmts[] = {
351     {PL_FMT_FLOAT, 1, {16},             "r16f"},
352     {PL_FMT_FLOAT, 1, {32},             "r32f"},
353     {PL_FMT_FLOAT, 2, {16, 16},         "rg16f"},
354     {PL_FMT_FLOAT, 2, {32, 32},         "rg32f"},
355     {PL_FMT_FLOAT, 4, {16, 16, 16, 16}, "rgba16f"},
356     {PL_FMT_FLOAT, 4, {32, 32, 32, 32}, "rgba32f"},
357     {PL_FMT_FLOAT, 3, {11, 11, 10},     "r11f_g11f_b10f"},
358 
359     {PL_FMT_UNORM, 1, {8},              "r8"},
360     {PL_FMT_UNORM, 1, {16},             "r16"},
361     {PL_FMT_UNORM, 2, {8,  8},          "rg8"},
362     {PL_FMT_UNORM, 2, {16, 16},         "rg16"},
363     {PL_FMT_UNORM, 4, {8,  8,  8,  8},  "rgba8"},
364     {PL_FMT_UNORM, 4, {16, 16, 16, 16}, "rgba16"},
365     {PL_FMT_UNORM, 4, {10, 10, 10,  2}, "rgb10_a2"},
366 
367     {PL_FMT_SNORM, 1, {8},              "r8_snorm"},
368     {PL_FMT_SNORM, 1, {16},             "r16_snorm"},
369     {PL_FMT_SNORM, 2, {8,  8},          "rg8_snorm"},
370     {PL_FMT_SNORM, 2, {16, 16},         "rg16_snorm"},
371     {PL_FMT_SNORM, 4, {8,  8,  8,  8},  "rgba8_snorm"},
372     {PL_FMT_SNORM, 4, {16, 16, 16, 16}, "rgba16_snorm"},
373 
374     {PL_FMT_UINT,  1, {8},              "r8ui"},
375     {PL_FMT_UINT,  1, {16},             "r16ui"},
376     {PL_FMT_UINT,  1, {32},             "r32ui"},
377     {PL_FMT_UINT,  2, {8,  8},          "rg8ui"},
378     {PL_FMT_UINT,  2, {16, 16},         "rg16ui"},
379     {PL_FMT_UINT,  2, {32, 32},         "rg32ui"},
380     {PL_FMT_UINT,  4, {8,  8,  8,  8},  "rgba8ui"},
381     {PL_FMT_UINT,  4, {16, 16, 16, 16}, "rgba16ui"},
382     {PL_FMT_UINT,  4, {32, 32, 32, 32}, "rgba32ui"},
383     {PL_FMT_UINT,  4, {10, 10, 10,  2}, "rgb10_a2ui"},
384 
385     {PL_FMT_SINT,  1, {8},              "r8i"},
386     {PL_FMT_SINT,  1, {16},             "r16i"},
387     {PL_FMT_SINT,  1, {32},             "r32i"},
388     {PL_FMT_SINT,  2, {8,  8},          "rg8i"},
389     {PL_FMT_SINT,  2, {16, 16},         "rg16i"},
390     {PL_FMT_SINT,  2, {32, 32},         "rg32i"},
391     {PL_FMT_SINT,  4, {8,  8,  8,  8},  "rgba8i"},
392     {PL_FMT_SINT,  4, {16, 16, 16, 16}, "rgba16i"},
393     {PL_FMT_SINT,  4, {32, 32, 32, 32}, "rgba32i"},
394 };
395 
pl_fmt_glsl_format(pl_fmt fmt,int components)396 const char *pl_fmt_glsl_format(pl_fmt fmt, int components)
397 {
398     if (fmt->opaque)
399         return NULL;
400 
401     for (int n = 0; n < PL_ARRAY_SIZE(pl_glsl_fmts); n++) {
402         const struct glsl_fmt *gfmt = &pl_glsl_fmts[n];
403 
404         if (fmt->type != gfmt->type)
405             continue;
406         if (components != gfmt->num_components)
407             continue;
408 
409         // The component order is irrelevant, so we need to sort the depth
410         // based on the component's index
411         int depth[4] = {0};
412         for (int i = 0; i < fmt->num_components; i++)
413             depth[fmt->sample_order[i]] = fmt->component_depth[i];
414 
415         // Copy over any emulated components
416         for (int i = fmt->num_components; i < components; i++)
417             depth[i] = gfmt->depth[i];
418 
419         for (int i = 0; i < PL_ARRAY_SIZE(depth); i++) {
420             if (depth[i] != gfmt->depth[i])
421                 goto next_fmt;
422         }
423 
424         return gfmt->glsl_format;
425 
426 next_fmt: ; // equivalent to `continue`
427     }
428 
429     return NULL;
430 }
431 
432 #define FOURCC(a,b,c,d) ((uint32_t)(a)        | ((uint32_t)(b) << 8) | \
433                         ((uint32_t)(c) << 16) | ((uint32_t)(d) << 24))
434 
435 struct pl_fmt_fourcc {
436     const char *name;
437     uint32_t fourcc;
438 };
439 
440 static const struct pl_fmt_fourcc pl_fmt_fourccs[] = {
441     // 8 bpp red
442     {"r8",          FOURCC('R','8',' ',' ')},
443     // 16 bpp red
444     {"r16",         FOURCC('R','1','6',' ')},
445     // 16 bpp rg
446     {"rg8",         FOURCC('G','R','8','8')},
447     {"gr8",         FOURCC('R','G','8','8')},
448     // 32 bpp rg
449     {"rg16",        FOURCC('G','R','3','2')},
450     {"gr16",        FOURCC('R','G','3','2')},
451     // 8 bpp rgb: N/A
452     // 16 bpp rgb
453     {"argb4",       FOURCC('B','A','1','2')},
454     {"abgr4",       FOURCC('R','A','1','2')},
455     {"rgba4",       FOURCC('A','B','1','2')},
456     {"bgra4",       FOURCC('A','R','1','2')},
457 
458     {"a1rgb5",      FOURCC('B','A','1','5')},
459     {"a1bgr5",      FOURCC('R','A','1','5')},
460     {"rgb5a1",      FOURCC('A','B','1','5')},
461     {"bgr5a1",      FOURCC('A','R','1','5')},
462 
463     {"rgb565",      FOURCC('B','G','1','6')},
464     {"bgr565",      FOURCC('R','G','1','6')},
465     // 24 bpp rgb
466     {"rgb8",        FOURCC('B','G','2','4')},
467     {"bgr8",        FOURCC('R','G','2','4')},
468     // 32 bpp rgb
469     {"argb8",       FOURCC('B','A','2','4')},
470     {"abgr8",       FOURCC('R','A','2','4')},
471     {"rgba8",       FOURCC('A','B','2','4')},
472     {"bgra8",       FOURCC('A','R','2','4')},
473 
474     {"a2rgb10",     FOURCC('B','A','3','0')},
475     {"a2bgr10",     FOURCC('R','A','3','0')},
476     {"rgb10a2",     FOURCC('A','B','3','0')},
477     {"bgr10a2",     FOURCC('A','R','3','0')},
478     // 64bpp rgb
479     {"rgba16hf",    FOURCC('A','B','4','H')},
480     {"bgra16hf",    FOURCC('A','R','4','H')},
481 
482     // no planar formats yet (tm)
483 };
484 
pl_fmt_fourcc(pl_fmt fmt)485 uint32_t pl_fmt_fourcc(pl_fmt fmt)
486 {
487     if (fmt->opaque)
488         return 0;
489 
490     for (int n = 0; n < PL_ARRAY_SIZE(pl_fmt_fourccs); n++) {
491         const struct pl_fmt_fourcc *fourcc = &pl_fmt_fourccs[n];
492         if (strcmp(fmt->name, fourcc->name) == 0)
493             return fourcc->fourcc;
494     }
495 
496     return 0; // no matching format
497 }
498 
pl_find_fmt(pl_gpu gpu,enum pl_fmt_type type,int num_components,int min_depth,int host_bits,enum pl_fmt_caps caps)499 pl_fmt pl_find_fmt(pl_gpu gpu, enum pl_fmt_type type, int num_components,
500                     int min_depth, int host_bits, enum pl_fmt_caps caps)
501 {
502     for (int n = 0; n < gpu->num_formats; n++) {
503         pl_fmt fmt = gpu->formats[n];
504         if (fmt->type != type || fmt->num_components != num_components)
505             continue;
506         if ((fmt->caps & caps) != caps)
507             continue;
508 
509         // When specifying some particular host representation, ensure the
510         // format is non-opaque, ordered and unpadded
511         if (host_bits && fmt->opaque)
512             continue;
513         if (host_bits && fmt->texel_size * 8 != host_bits * num_components)
514             continue;
515         if (host_bits && !pl_fmt_is_ordered(fmt))
516             continue;
517 
518         for (int i = 0; i < fmt->num_components; i++) {
519             if (fmt->component_depth[i] < min_depth)
520                 goto next_fmt;
521             if (host_bits && fmt->host_bits[i] != host_bits)
522                 goto next_fmt;
523         }
524 
525         return fmt;
526 
527 next_fmt: ; // equivalent to `continue`
528     }
529 
530     // ran out of formats
531     PL_DEBUG(gpu, "No matching format found");
532     return NULL;
533 }
534 
pl_find_vertex_fmt(pl_gpu gpu,enum pl_fmt_type type,int comps)535 pl_fmt pl_find_vertex_fmt(pl_gpu gpu, enum pl_fmt_type type, int comps)
536 {
537     static const size_t sizes[] = {
538         [PL_FMT_FLOAT] = sizeof(float),
539         [PL_FMT_UNORM] = sizeof(unsigned),
540         [PL_FMT_UINT]  = sizeof(unsigned),
541         [PL_FMT_SNORM] = sizeof(int),
542         [PL_FMT_SINT]  = sizeof(int),
543     };
544 
545     return pl_find_fmt(gpu, type, comps, 0, 8 * sizes[type], PL_FMT_CAP_VERTEX);
546 }
547 
pl_find_named_fmt(pl_gpu gpu,const char * name)548 pl_fmt pl_find_named_fmt(pl_gpu gpu, const char *name)
549 {
550     if (!name)
551         return NULL;
552 
553     for (int i = 0; i < gpu->num_formats; i++) {
554         pl_fmt fmt = gpu->formats[i];
555         if (strcmp(name, fmt->name) == 0)
556             return fmt;
557     }
558 
559     // ran out of formats
560     return NULL;
561 }
562 
pl_find_fourcc(pl_gpu gpu,uint32_t fourcc)563 pl_fmt pl_find_fourcc(pl_gpu gpu, uint32_t fourcc)
564 {
565     if (!fourcc)
566         return NULL;
567 
568     for (int i = 0; i < gpu->num_formats; i++) {
569         pl_fmt fmt = gpu->formats[i];
570         if (fourcc == fmt->fourcc)
571             return fmt;
572     }
573 
574     // ran out of formats
575     return NULL;
576 }
577 
check_mod(pl_gpu gpu,pl_fmt fmt,uint64_t mod)578 static inline bool check_mod(pl_gpu gpu, pl_fmt fmt, uint64_t mod)
579 {
580     for (int i = 0; i < fmt->num_modifiers; i++) {
581         if (fmt->modifiers[i] == mod)
582             return true;
583     }
584 
585 
586     PL_ERR(gpu, "DRM modifier %s not available for format %s. Available modifiers:",
587            PRINT_DRM_MOD(mod), fmt->name);
588     for (int i = 0; i < fmt->num_modifiers; i++)
589         PL_ERR(gpu, "    %s", PRINT_DRM_MOD(fmt->modifiers[i]));
590 
591     return false;
592 }
593 
pl_tex_create(pl_gpu gpu,const struct pl_tex_params * params)594 pl_tex pl_tex_create(pl_gpu gpu, const struct pl_tex_params *params)
595 {
596     require(!params->import_handle || !params->export_handle);
597     require(!params->import_handle || !params->initial_data);
598     if (params->export_handle) {
599         require(params->export_handle & gpu->export_caps.tex);
600         require(PL_ISPOT(params->export_handle));
601     }
602     if (params->import_handle) {
603         require(params->import_handle & gpu->import_caps.tex);
604         require(PL_ISPOT(params->import_handle));
605         require(params->shared_mem.size > 0);
606         if (params->import_handle == PL_HANDLE_DMA_BUF) {
607             if (!check_mod(gpu, params->format, params->shared_mem.drm_format_mod))
608                 goto error;
609             if (params->shared_mem.stride_w)
610                 require(params->w && params->shared_mem.stride_w >= params->w);
611             if (params->shared_mem.stride_h)
612                 require(params->h && params->shared_mem.stride_h >= params->h);
613         }
614     }
615 
616     switch (pl_tex_params_dimension(*params)) {
617     case 1:
618         require(params->w > 0);
619         require(params->w <= gpu->limits.max_tex_1d_dim);
620         require(!params->renderable);
621         require(!params->blit_src || gpu->limits.blittable_1d_3d);
622         require(!params->blit_dst || gpu->limits.blittable_1d_3d);
623         break;
624     case 2:
625         require(params->w > 0 && params->h > 0);
626         require(params->w <= gpu->limits.max_tex_2d_dim);
627         require(params->h <= gpu->limits.max_tex_2d_dim);
628         break;
629     case 3:
630         require(params->w > 0 && params->h > 0 && params->d > 0);
631         require(params->w <= gpu->limits.max_tex_3d_dim);
632         require(params->h <= gpu->limits.max_tex_3d_dim);
633         require(params->d <= gpu->limits.max_tex_3d_dim);
634         require(!params->renderable);
635         require(!params->blit_src || gpu->limits.blittable_1d_3d);
636         require(!params->blit_dst || gpu->limits.blittable_1d_3d);
637         break;
638     }
639 
640     pl_fmt fmt = params->format;
641     require(fmt);
642     require(!params->host_readable || fmt->caps & PL_FMT_CAP_HOST_READABLE);
643     require(!params->host_readable || !fmt->opaque);
644     require(!params->host_writable || !fmt->opaque);
645     require(!params->sampleable || fmt->caps & PL_FMT_CAP_SAMPLEABLE);
646     require(!params->renderable || fmt->caps & PL_FMT_CAP_RENDERABLE);
647     require(!params->storable   || fmt->caps & PL_FMT_CAP_STORABLE);
648     require(!params->blit_src   || fmt->caps & PL_FMT_CAP_BLITTABLE);
649     require(!params->blit_dst   || fmt->caps & PL_FMT_CAP_BLITTABLE);
650 
651     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
652     return impl->tex_create(gpu, params);
653 
654 error:
655     return NULL;
656 }
657 
pl_tex_destroy(pl_gpu gpu,pl_tex * tex)658 void pl_tex_destroy(pl_gpu gpu, pl_tex *tex)
659 {
660     if (!*tex)
661         return;
662 
663     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
664     impl->tex_destroy(gpu, *tex);
665     *tex = NULL;
666 }
667 
pl_tex_params_superset(struct pl_tex_params a,struct pl_tex_params b)668 static bool pl_tex_params_superset(struct pl_tex_params a, struct pl_tex_params b)
669 {
670     return a.w == b.w && a.h == b.h && a.d == b.d &&
671            a.format          == b.format &&
672            (a.sampleable     || !b.sampleable) &&
673            (a.renderable     || !b.renderable) &&
674            (a.storable       || !b.storable) &&
675            (a.blit_src       || !b.blit_src) &&
676            (a.blit_dst       || !b.blit_dst) &&
677            (a.host_writable  || !b.host_writable) &&
678            (a.host_readable  || !b.host_readable);
679 }
680 
pl_tex_recreate(pl_gpu gpu,pl_tex * tex,const struct pl_tex_params * params)681 bool pl_tex_recreate(pl_gpu gpu, pl_tex *tex, const struct pl_tex_params *params)
682 {
683     if (params->initial_data) {
684         PL_ERR(gpu, "pl_tex_recreate may not be used with `initial_data`!");
685         return false;
686     }
687 
688     if (*tex && pl_tex_params_superset((*tex)->params, *params)) {
689         pl_tex_invalidate(gpu, *tex);
690         return true;
691     }
692 
693     PL_INFO(gpu, "(Re)creating %dx%dx%d texture with format %s",
694             params->w, params->h, params->d, params->format->name);
695 
696     pl_tex_destroy(gpu, tex);
697     *tex = pl_tex_create(gpu, params);
698 
699     return !!*tex;
700 }
701 
pl_tex_clear_ex(pl_gpu gpu,pl_tex dst,const union pl_clear_color color)702 void pl_tex_clear_ex(pl_gpu gpu, pl_tex dst, const union pl_clear_color color)
703 {
704     require(dst->params.blit_dst);
705 
706     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
707     if (impl->tex_invalidate)
708         impl->tex_invalidate(gpu, dst);
709     impl->tex_clear_ex(gpu, dst, color);
710 
711 error:
712     return;
713 }
714 
pl_tex_clear(pl_gpu gpu,pl_tex dst,const float color[4])715 void pl_tex_clear(pl_gpu gpu, pl_tex dst, const float color[4])
716 {
717     if (!pl_fmt_is_float(dst->params.format)) {
718         PL_ERR(gpu, "Cannot call `pl_tex_clear` on integer textures, please "
719                "use `pl_tex_clear_ex` instead.");
720         return;
721     }
722 
723     const union pl_clear_color col = {
724         .f = { color[0], color[1], color[2], color[3] },
725     };
726 
727     pl_tex_clear_ex(gpu, dst, col);
728 }
729 
pl_tex_invalidate(pl_gpu gpu,pl_tex tex)730 void pl_tex_invalidate(pl_gpu gpu, pl_tex tex)
731 {
732     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
733     if (impl->tex_invalidate)
734         impl->tex_invalidate(gpu, tex);
735 }
736 
strip_coords(pl_tex tex,struct pl_rect3d * rc)737 static void strip_coords(pl_tex tex, struct pl_rect3d *rc)
738 {
739     if (!tex->params.d) {
740         rc->z0 = 0;
741         rc->z1 = 1;
742     }
743 
744     if (!tex->params.h) {
745         rc->y0 = 0;
746         rc->y1 = 1;
747     }
748 }
749 
infer_rc(pl_tex tex,struct pl_rect3d * rc)750 static void infer_rc(pl_tex tex, struct pl_rect3d *rc)
751 {
752     if (!rc->x0 && !rc->x1)
753         rc->x1 = tex->params.w;
754     if (!rc->y0 && !rc->y1)
755         rc->y1 = tex->params.h;
756     if (!rc->z0 && !rc->z1)
757         rc->z1 = tex->params.d;
758 }
759 
pl_tex_blit(pl_gpu gpu,const struct pl_tex_blit_params * params)760 void pl_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params)
761 {
762     pl_tex src = params->src, dst = params->dst;
763     require(src && dst);
764     pl_fmt src_fmt = src->params.format;
765     pl_fmt dst_fmt = dst->params.format;
766     require(src_fmt->internal_size == dst_fmt->internal_size);
767     require((src_fmt->type == PL_FMT_UINT) == (dst_fmt->type == PL_FMT_UINT));
768     require((src_fmt->type == PL_FMT_SINT) == (dst_fmt->type == PL_FMT_SINT));
769     require(src->params.blit_src);
770     require(dst->params.blit_dst);
771     require(params->sample_mode != PL_TEX_SAMPLE_LINEAR || (src_fmt->caps & PL_FMT_CAP_LINEAR));
772 
773     struct pl_tex_blit_params fixed = *params;
774     infer_rc(src, &fixed.src_rc);
775     infer_rc(dst, &fixed.dst_rc);
776     strip_coords(src, &fixed.src_rc);
777     strip_coords(dst, &fixed.dst_rc);
778 
779     require(fixed.src_rc.x0 >= 0 && fixed.src_rc.x0 < src->params.w);
780     require(fixed.src_rc.x1 > 0 && fixed.src_rc.x1 <= src->params.w);
781     require(fixed.dst_rc.x0 >= 0 && fixed.dst_rc.x0 < dst->params.w);
782     require(fixed.dst_rc.x1 > 0 && fixed.dst_rc.x1 <= dst->params.w);
783 
784     if (src->params.h) {
785         require(fixed.src_rc.y0 >= 0 && fixed.src_rc.y0 < src->params.h);
786         require(fixed.src_rc.y1 > 0 && fixed.src_rc.y1 <= src->params.h);
787     }
788 
789     if (dst->params.h) {
790         require(fixed.dst_rc.y0 >= 0 && fixed.dst_rc.y0 < dst->params.h);
791         require(fixed.dst_rc.y1 > 0 && fixed.dst_rc.y1 <= dst->params.h);
792     }
793 
794     if (src->params.d) {
795         require(fixed.src_rc.z0 >= 0 && fixed.src_rc.z0 < src->params.d);
796         require(fixed.src_rc.z1 > 0 && fixed.src_rc.z1 <= src->params.d);
797     }
798 
799     if (dst->params.d) {
800         require(fixed.dst_rc.z0 >= 0 && fixed.dst_rc.z0 < dst->params.d);
801         require(fixed.dst_rc.z1 > 0 && fixed.dst_rc.z1 <= dst->params.d);
802     }
803 
804     struct pl_rect3d full = {0, 0, 0, dst->params.w, dst->params.h, dst->params.d};
805     strip_coords(dst, &full);
806 
807     struct pl_rect3d rcnorm = fixed.dst_rc;
808     pl_rect3d_normalize(&rcnorm);
809     if (pl_rect3d_eq(rcnorm, full))
810         pl_tex_invalidate(gpu, dst);
811 
812     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
813     impl->tex_blit(gpu, &fixed);
814 
815 error:
816     return;
817 }
818 
pl_tex_transfer_size(const struct pl_tex_transfer_params * par)819 size_t pl_tex_transfer_size(const struct pl_tex_transfer_params *par)
820 {
821     pl_tex tex = par->tex;
822     int w = pl_rect_w(par->rc), h = pl_rect_h(par->rc), d = pl_rect_d(par->rc);
823 
824     // This generates the absolute bare minimum size of a buffer required to
825     // hold the data of a texture upload/download, by including stride padding
826     // only where strictly necessary.
827     int texels = ((d - 1) * par->stride_h + (h - 1)) * par->stride_w + w;
828     return texels * tex->params.format->texel_size;
829 }
830 
fix_tex_transfer(pl_gpu gpu,struct pl_tex_transfer_params * params)831 static bool fix_tex_transfer(pl_gpu gpu, struct pl_tex_transfer_params *params)
832 {
833     pl_tex tex = params->tex;
834     struct pl_rect3d rc = params->rc;
835 
836     // Infer the default values
837     infer_rc(tex, &rc);
838     if (!params->stride_w)
839         params->stride_w = pl_rect_w(rc);
840     if (!params->stride_h)
841         params->stride_h = pl_rect_h(rc);
842 
843     // Sanitize superfluous coordinates for the benefit of the GPU
844     strip_coords(tex, &rc);
845     if (!tex->params.w)
846         params->stride_w = 1;
847     if (!tex->params.h)
848         params->stride_h = 1;
849 
850     params->rc = rc;
851 
852     // Check the parameters for sanity
853     switch (pl_tex_params_dimension(tex->params))
854     {
855     case 3:
856         require(rc.z1 > rc.z0);
857         require(rc.z0 >= 0 && rc.z0 <  tex->params.d);
858         require(rc.z1 >  0 && rc.z1 <= tex->params.d);
859         require(params->stride_h >= pl_rect_h(rc));
860         // fall through
861     case 2:
862         require(rc.y1 > rc.y0);
863         require(rc.y0 >= 0 && rc.y0 <  tex->params.h);
864         require(rc.y1 >  0 && rc.y1 <= tex->params.h);
865         require(params->stride_w >= pl_rect_w(rc));
866         // fall through
867     case 1:
868         require(rc.x1 > rc.x0);
869         require(rc.x0 >= 0 && rc.x0 <  tex->params.w);
870         require(rc.x1 >  0 && rc.x1 <= tex->params.w);
871         break;
872     }
873 
874     require(!params->buf ^ !params->ptr); // exactly one
875     if (params->buf) {
876         pl_buf buf = params->buf;
877         size_t size = pl_tex_transfer_size(params);
878         require(params->buf_offset + size <= buf->params.size);
879         require(gpu->limits.buf_transfer);
880     }
881 
882     require(!params->callback || gpu->limits.callbacks);
883     return true;
884 
885 error:
886     return false;
887 }
888 
pl_tex_upload(pl_gpu gpu,const struct pl_tex_transfer_params * params)889 bool pl_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params)
890 {
891     pl_tex tex = params->tex;
892     require(tex);
893     require(tex->params.host_writable);
894 
895     struct pl_tex_transfer_params fixed = *params;
896     if (!fix_tex_transfer(gpu, &fixed))
897         goto error;
898 
899     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
900     return impl->tex_upload(gpu, &fixed);
901 
902 error:
903     return false;
904 }
905 
pl_tex_download(pl_gpu gpu,const struct pl_tex_transfer_params * params)906 bool pl_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params)
907 {
908     pl_tex tex = params->tex;
909     require(tex);
910     require(tex->params.host_readable);
911 
912     struct pl_tex_transfer_params fixed = *params;
913     if (!fix_tex_transfer(gpu, &fixed))
914         goto error;
915 
916     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
917     return impl->tex_download(gpu, &fixed);
918 
919 error:
920     return false;
921 }
922 
pl_tex_poll(pl_gpu gpu,pl_tex tex,uint64_t t)923 bool pl_tex_poll(pl_gpu gpu, pl_tex tex, uint64_t t)
924 {
925     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
926     return impl->tex_poll ? impl->tex_poll(gpu, tex, t) : false;
927 }
928 
pl_buf_params_infer(struct pl_buf_params params)929 static struct pl_buf_params pl_buf_params_infer(struct pl_buf_params params)
930 {
931     switch (params.type) {
932     case PL_BUF_UNIFORM:
933     case PL_BUF_TEXEL_UNIFORM:
934         params.uniform = true;
935         break;
936     case PL_BUF_STORAGE:
937     case PL_BUF_TEXEL_STORAGE:
938         params.storable = true;
939         break;
940     case PL_BUF_TEX_TRANSFER:
941         break;
942     case PL_BUF_TYPE_COUNT:
943         pl_unreachable();
944     }
945 
946     return params;
947 }
948 
949 static bool warned_rounding = false;
950 
pl_buf_create(pl_gpu gpu,const struct pl_buf_params * pparams)951 pl_buf pl_buf_create(pl_gpu gpu, const struct pl_buf_params *pparams)
952 {
953     struct pl_buf_params params = pl_buf_params_infer(*pparams);
954 
955     require(!params.import_handle || !params.export_handle);
956     if (params.export_handle) {
957         require(PL_ISPOT(params.export_handle));
958         require(params.export_handle & gpu->export_caps.buf);
959     }
960     if (params.import_handle) {
961         require(PL_ISPOT(params.import_handle));
962         require(params.import_handle & gpu->import_caps.buf);
963         struct pl_shared_mem *shmem = &params.shared_mem;
964         require(shmem->offset + params.size <= shmem->size);
965         require(params.import_handle != PL_HANDLE_DMA_BUF || !shmem->drm_format_mod);
966 
967         // Fix misalignment on host pointer imports
968         if (params.import_handle == PL_HANDLE_HOST_PTR) {
969             uintptr_t page_mask = ~(gpu->limits.align_host_ptr - 1);
970             uintptr_t ptr_base = (uintptr_t) shmem->handle.ptr & page_mask;
971             size_t ptr_offset = (uintptr_t) shmem->handle.ptr - ptr_base;
972             size_t buf_offset = ptr_offset + shmem->offset;
973             size_t ptr_size = PL_ALIGN2(ptr_offset + shmem->size,
974                                         gpu->limits.align_host_ptr);
975 
976             if (ptr_base != (uintptr_t) shmem->handle.ptr || ptr_size > shmem->size) {
977                 if (!warned_rounding) {
978                     warned_rounding = true;
979                     PL_WARN(gpu, "Imported host pointer is not page-aligned. "
980                             "This should normally be fine on most platforms, "
981                             "but may cause issues in some rare circumstances.");
982                 }
983 
984                 PL_TRACE(gpu, "Rounding imported host pointer %p + %zu -> %zu to "
985                          "nearest page boundaries: %p + %zu -> %zu",
986                           shmem->handle.ptr, shmem->offset, shmem->size,
987                           (void *) ptr_base, buf_offset, ptr_size);
988             }
989 
990             shmem->handle.ptr = (void *) ptr_base;
991             shmem->offset = buf_offset;
992             shmem->size = ptr_size;
993         }
994     }
995 
996     require(params.size > 0 && params.size <= gpu->limits.max_buf_size);
997     require(!params.uniform || params.size <= gpu->limits.max_ubo_size);
998     require(!params.storable || params.size <= gpu->limits.max_ssbo_size);
999     require(!params.drawable || params.size <= gpu->limits.max_vbo_size);
1000     require(!params.host_mapped || params.size <= gpu->limits.max_mapped_size);
1001 
1002     if (params.format) {
1003         pl_fmt fmt = params.format;
1004         require(params.size <= gpu->limits.max_buffer_texels * fmt->texel_size);
1005         require(!params.uniform || (fmt->caps & PL_FMT_CAP_TEXEL_UNIFORM));
1006         require(!params.storable || (fmt->caps & PL_FMT_CAP_TEXEL_STORAGE));
1007     }
1008 
1009     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1010     pl_buf buf = impl->buf_create(gpu, &params);
1011     if (buf)
1012         require(!params.host_mapped || buf->data);
1013 
1014     return buf;
1015 
1016 error:
1017     return NULL;
1018 }
1019 
pl_buf_destroy(pl_gpu gpu,pl_buf * buf)1020 void pl_buf_destroy(pl_gpu gpu, pl_buf *buf)
1021 {
1022     if (!*buf)
1023         return;
1024 
1025     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1026     impl->buf_destroy(gpu, *buf);
1027     *buf = NULL;
1028 }
1029 
pl_buf_params_superset(struct pl_buf_params a,struct pl_buf_params b)1030 static bool pl_buf_params_superset(struct pl_buf_params a, struct pl_buf_params b)
1031 {
1032     return a.size            >= b.size &&
1033            a.memory_type     == b.memory_type &&
1034            a.format          == b.format &&
1035            (a.host_writable  || !b.host_writable) &&
1036            (a.host_readable  || !b.host_readable) &&
1037            (a.host_mapped    || !b.host_mapped) &&
1038            (a.uniform        || !b.uniform) &&
1039            (a.storable       || !b.storable) &&
1040            (a.drawable       || !b.drawable);
1041 }
1042 
pl_buf_recreate(pl_gpu gpu,pl_buf * buf,const struct pl_buf_params * pparams)1043 bool pl_buf_recreate(pl_gpu gpu, pl_buf *buf, const struct pl_buf_params *pparams)
1044 {
1045 
1046     struct pl_buf_params params = pl_buf_params_infer(*pparams);
1047 
1048     if (params.initial_data) {
1049         PL_ERR(gpu, "pl_buf_recreate may not be used with `initial_data`!");
1050         return false;
1051     }
1052 
1053     if (*buf && pl_buf_params_superset((*buf)->params, params))
1054         return true;
1055 
1056     PL_INFO(gpu, "(Re)creating %zu buffer", params.size);
1057     pl_buf_destroy(gpu, buf);
1058     *buf = pl_buf_create(gpu, &params);
1059 
1060     return !!*buf;
1061 }
1062 
pl_buf_write(pl_gpu gpu,pl_buf buf,size_t buf_offset,const void * data,size_t size)1063 void pl_buf_write(pl_gpu gpu, pl_buf buf, size_t buf_offset,
1064                   const void *data, size_t size)
1065 {
1066     require(buf->params.host_writable);
1067     require(buf_offset + size <= buf->params.size);
1068     require(buf_offset == PL_ALIGN2(buf_offset, 4));
1069 
1070     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1071     impl->buf_write(gpu, buf, buf_offset, data, size);
1072 
1073 error:
1074     return;
1075 }
1076 
pl_buf_read(pl_gpu gpu,pl_buf buf,size_t buf_offset,void * dest,size_t size)1077 bool pl_buf_read(pl_gpu gpu, pl_buf buf, size_t buf_offset,
1078                  void *dest, size_t size)
1079 {
1080     require(buf->params.host_readable);
1081     require(buf_offset + size <= buf->params.size);
1082 
1083     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1084     return impl->buf_read(gpu, buf, buf_offset, dest, size);
1085 
1086 error:
1087     return false;
1088 }
1089 
pl_buf_copy(pl_gpu gpu,pl_buf dst,size_t dst_offset,pl_buf src,size_t src_offset,size_t size)1090 void pl_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset,
1091                  pl_buf src, size_t src_offset, size_t size)
1092 {
1093     require(src_offset + size <= src->params.size);
1094     require(dst_offset + size <= dst->params.size);
1095 
1096     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1097     impl->buf_copy(gpu, dst, dst_offset, src, src_offset, size);
1098 
1099 error:
1100     return;
1101 }
1102 
pl_buf_export(pl_gpu gpu,pl_buf buf)1103 bool pl_buf_export(pl_gpu gpu, pl_buf buf)
1104 {
1105     require(buf->params.export_handle || buf->params.import_handle);
1106 
1107     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1108     return impl->buf_export(gpu, buf);
1109 
1110 error:
1111     return false;
1112 }
1113 
pl_buf_poll(pl_gpu gpu,pl_buf buf,uint64_t t)1114 bool pl_buf_poll(pl_gpu gpu, pl_buf buf, uint64_t t)
1115 {
1116     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1117     return impl->buf_poll ? impl->buf_poll(gpu, buf, t) : false;
1118 }
1119 
pl_var_type_size(enum pl_var_type type)1120 size_t pl_var_type_size(enum pl_var_type type)
1121 {
1122     switch (type) {
1123     case PL_VAR_SINT:  return sizeof(int);
1124     case PL_VAR_UINT:  return sizeof(unsigned int);
1125     case PL_VAR_FLOAT: return sizeof(float);
1126     case PL_VAR_INVALID: // fall through
1127     case PL_VAR_TYPE_COUNT: break;
1128     }
1129 
1130     pl_unreachable();
1131 }
1132 
1133 #define PL_VAR(TYPE, NAME, M, V)                        \
1134     struct pl_var pl_var_##NAME(const char *name) {     \
1135         return (struct pl_var) {                        \
1136             .name  = name,                              \
1137             .type  = PL_VAR_##TYPE,                     \
1138             .dim_m = M,                                 \
1139             .dim_v = V,                                 \
1140             .dim_a = 1,                                 \
1141         };                                              \
1142     }
1143 
1144 PL_VAR(FLOAT, float,    1, 1)
1145 PL_VAR(FLOAT, vec2,     1, 2)
1146 PL_VAR(FLOAT, vec3,     1, 3)
1147 PL_VAR(FLOAT, vec4,     1, 4)
1148 PL_VAR(FLOAT, mat2,     2, 2)
1149 PL_VAR(FLOAT, mat2x3,   2, 3)
1150 PL_VAR(FLOAT, mat2x4,   2, 4)
1151 PL_VAR(FLOAT, mat3,     3, 3)
1152 PL_VAR(FLOAT, mat3x4,   3, 4)
1153 PL_VAR(FLOAT, mat4x2,   4, 2)
1154 PL_VAR(FLOAT, mat4x3,   4, 3)
1155 PL_VAR(FLOAT, mat4,     4, 4)
1156 PL_VAR(SINT,  int,      1, 1)
1157 PL_VAR(SINT,  ivec2,    1, 2)
1158 PL_VAR(SINT,  ivec3,    1, 3)
1159 PL_VAR(SINT,  ivec4,    1, 4)
1160 PL_VAR(UINT,  uint,     1, 1)
1161 PL_VAR(UINT,  uvec2,    1, 2)
1162 PL_VAR(UINT,  uvec3,    1, 3)
1163 PL_VAR(UINT,  uvec4,    1, 4)
1164 
1165 #undef PL_VAR
1166 
1167 const struct pl_named_var pl_var_glsl_types[] = {
1168     // float vectors
1169     { "float",  { .type = PL_VAR_FLOAT, .dim_m = 1, .dim_v = 1, .dim_a = 1, }},
1170     { "vec2",   { .type = PL_VAR_FLOAT, .dim_m = 1, .dim_v = 2, .dim_a = 1, }},
1171     { "vec3",   { .type = PL_VAR_FLOAT, .dim_m = 1, .dim_v = 3, .dim_a = 1, }},
1172     { "vec4",   { .type = PL_VAR_FLOAT, .dim_m = 1, .dim_v = 4, .dim_a = 1, }},
1173     // float matrices
1174     { "mat2",   { .type = PL_VAR_FLOAT, .dim_m = 2, .dim_v = 2, .dim_a = 1, }},
1175     { "mat2x3", { .type = PL_VAR_FLOAT, .dim_m = 2, .dim_v = 3, .dim_a = 1, }},
1176     { "mat2x4", { .type = PL_VAR_FLOAT, .dim_m = 2, .dim_v = 4, .dim_a = 1, }},
1177     { "mat3",   { .type = PL_VAR_FLOAT, .dim_m = 3, .dim_v = 3, .dim_a = 1, }},
1178     { "mat3x4", { .type = PL_VAR_FLOAT, .dim_m = 3, .dim_v = 4, .dim_a = 1, }},
1179     { "mat4x2", { .type = PL_VAR_FLOAT, .dim_m = 4, .dim_v = 2, .dim_a = 1, }},
1180     { "mat4x3", { .type = PL_VAR_FLOAT, .dim_m = 4, .dim_v = 3, .dim_a = 1, }},
1181     { "mat4",   { .type = PL_VAR_FLOAT, .dim_m = 4, .dim_v = 4, .dim_a = 1, }},
1182     // integer vectors
1183     { "int",    { .type = PL_VAR_SINT,  .dim_m = 1, .dim_v = 1, .dim_a = 1, }},
1184     { "ivec2",  { .type = PL_VAR_SINT,  .dim_m = 1, .dim_v = 2, .dim_a = 1, }},
1185     { "ivec3",  { .type = PL_VAR_SINT,  .dim_m = 1, .dim_v = 3, .dim_a = 1, }},
1186     { "ivec4",  { .type = PL_VAR_SINT,  .dim_m = 1, .dim_v = 4, .dim_a = 1, }},
1187     // unsigned integer vectors
1188     { "uint",   { .type = PL_VAR_UINT,  .dim_m = 1, .dim_v = 1, .dim_a = 1, }},
1189     { "uvec2",  { .type = PL_VAR_UINT,  .dim_m = 1, .dim_v = 2, .dim_a = 1, }},
1190     { "uvec3",  { .type = PL_VAR_UINT,  .dim_m = 1, .dim_v = 3, .dim_a = 1, }},
1191     { "uvec4",  { .type = PL_VAR_UINT,  .dim_m = 1, .dim_v = 4, .dim_a = 1, }},
1192 
1193     {0},
1194 };
1195 
1196 #define MAX_DIM 4
1197 
pl_var_glsl_type_name(struct pl_var var)1198 const char *pl_var_glsl_type_name(struct pl_var var)
1199 {
1200     static const char *types[PL_VAR_TYPE_COUNT][MAX_DIM+1][MAX_DIM+1] = {
1201     // float vectors
1202     [PL_VAR_FLOAT][1][1] = "float",
1203     [PL_VAR_FLOAT][1][2] = "vec2",
1204     [PL_VAR_FLOAT][1][3] = "vec3",
1205     [PL_VAR_FLOAT][1][4] = "vec4",
1206     // float matrices
1207     [PL_VAR_FLOAT][2][2] = "mat2",
1208     [PL_VAR_FLOAT][2][3] = "mat2x3",
1209     [PL_VAR_FLOAT][2][4] = "mat2x4",
1210     [PL_VAR_FLOAT][3][2] = "mat3x2",
1211     [PL_VAR_FLOAT][3][3] = "mat3",
1212     [PL_VAR_FLOAT][3][4] = "mat3x4",
1213     [PL_VAR_FLOAT][4][2] = "mat4x2",
1214     [PL_VAR_FLOAT][4][3] = "mat4x3",
1215     [PL_VAR_FLOAT][4][4] = "mat4",
1216     // integer vectors
1217     [PL_VAR_SINT][1][1] = "int",
1218     [PL_VAR_SINT][1][2] = "ivec2",
1219     [PL_VAR_SINT][1][3] = "ivec3",
1220     [PL_VAR_SINT][1][4] = "ivec4",
1221     // unsigned integer vectors
1222     [PL_VAR_UINT][1][1] = "uint",
1223     [PL_VAR_UINT][1][2] = "uvec2",
1224     [PL_VAR_UINT][1][3] = "uvec3",
1225     [PL_VAR_UINT][1][4] = "uvec4",
1226     };
1227 
1228     if (var.dim_v > MAX_DIM || var.dim_m > MAX_DIM)
1229         return NULL;
1230 
1231     return types[var.type][var.dim_m][var.dim_v];
1232 }
1233 
pl_var_from_fmt(pl_fmt fmt,const char * name)1234 struct pl_var pl_var_from_fmt(pl_fmt fmt, const char *name)
1235 {
1236     static const enum pl_var_type vartypes[] = {
1237         [PL_FMT_FLOAT] = PL_VAR_FLOAT,
1238         [PL_FMT_UNORM] = PL_VAR_FLOAT,
1239         [PL_FMT_SNORM] = PL_VAR_FLOAT,
1240         [PL_FMT_UINT]  = PL_VAR_UINT,
1241         [PL_FMT_SINT]  = PL_VAR_SINT,
1242     };
1243 
1244     pl_assert(fmt->type < PL_ARRAY_SIZE(vartypes));
1245     return (struct pl_var) {
1246         .type  = vartypes[fmt->type],
1247         .name  = name,
1248         .dim_v = fmt->num_components,
1249         .dim_m = 1,
1250         .dim_a = 1,
1251     };
1252 }
1253 
pl_var_host_layout(size_t offset,const struct pl_var * var)1254 struct pl_var_layout pl_var_host_layout(size_t offset, const struct pl_var *var)
1255 {
1256     size_t col_size = pl_var_type_size(var->type) * var->dim_v;
1257     return (struct pl_var_layout) {
1258         .offset = offset,
1259         .stride = col_size,
1260         .size   = col_size * var->dim_m * var->dim_a,
1261     };
1262 }
1263 
pl_std140_layout(size_t offset,const struct pl_var * var)1264 struct pl_var_layout pl_std140_layout(size_t offset, const struct pl_var *var)
1265 {
1266     size_t el_size = pl_var_type_size(var->type);
1267 
1268     // std140 packing rules:
1269     // 1. The size of generic values is their size in bytes
1270     // 2. The size of vectors is the vector length * the base count
1271     // 3. Matrices are treated like arrays of column vectors
1272     // 4. The size of array rows is that of the element size rounded up to
1273     // the nearest multiple of vec4
1274     // 5. All values are aligned to a multiple of their size (stride for arrays),
1275     // with the exception of vec3 which is aligned like vec4
1276     size_t stride = el_size * var->dim_v;
1277     size_t align = stride;
1278     if (var->dim_v == 3)
1279         align += el_size;
1280     if (var->dim_m * var->dim_a > 1)
1281         stride = align = PL_ALIGN2(align, sizeof(float[4]));
1282 
1283     return (struct pl_var_layout) {
1284         .offset = PL_ALIGN2(offset, align),
1285         .stride = stride,
1286         .size   = stride * var->dim_m * var->dim_a,
1287     };
1288 }
1289 
pl_std430_layout(size_t offset,const struct pl_var * var)1290 struct pl_var_layout pl_std430_layout(size_t offset, const struct pl_var *var)
1291 {
1292     size_t el_size = pl_var_type_size(var->type);
1293 
1294     // std430 packing rules: like std140, except arrays/matrices are always
1295     // "tightly" packed, even arrays/matrices of vec3s
1296     size_t stride = el_size * var->dim_v;
1297     size_t align = stride;
1298     if (var->dim_v == 3)
1299         align += el_size;
1300     if (var->dim_m * var->dim_a > 1)
1301         stride = align;
1302 
1303     return (struct pl_var_layout) {
1304         .offset = PL_ALIGN2(offset, align),
1305         .stride = stride,
1306         .size   = stride * var->dim_m * var->dim_a,
1307     };
1308 }
1309 
memcpy_layout(void * dst_p,struct pl_var_layout dst_layout,const void * src_p,struct pl_var_layout src_layout)1310 void memcpy_layout(void *dst_p, struct pl_var_layout dst_layout,
1311                    const void *src_p, struct pl_var_layout src_layout)
1312 {
1313     uintptr_t src = (uintptr_t) src_p + src_layout.offset;
1314     uintptr_t dst = (uintptr_t) dst_p + dst_layout.offset;
1315 
1316     if (src_layout.stride == dst_layout.stride) {
1317         pl_assert(dst_layout.size == src_layout.size);
1318         memcpy((void *) dst, (const void *) src, src_layout.size);
1319         return;
1320     }
1321 
1322     size_t stride = PL_MIN(src_layout.stride, dst_layout.stride);
1323     uintptr_t end = src + src_layout.size;
1324     while (src < end) {
1325         pl_assert(dst < dst + dst_layout.size);
1326         memcpy((void *) dst, (const void *) src, stride);
1327         src += src_layout.stride;
1328         dst += dst_layout.stride;
1329     }
1330 }
1331 
pl_desc_namespace(pl_gpu gpu,enum pl_desc_type type)1332 int pl_desc_namespace(pl_gpu gpu, enum pl_desc_type type)
1333 {
1334     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1335     int ret = impl->desc_namespace(gpu, type);
1336     pl_assert(ret >= 0 && ret < PL_DESC_TYPE_COUNT);
1337     return ret;
1338 }
1339 
pl_desc_access_glsl_name(enum pl_desc_access mode)1340 const char *pl_desc_access_glsl_name(enum pl_desc_access mode)
1341 {
1342     switch (mode) {
1343     case PL_DESC_ACCESS_READWRITE: return "";
1344     case PL_DESC_ACCESS_READONLY:  return "readonly";
1345     case PL_DESC_ACCESS_WRITEONLY: return "writeonly";
1346     case PL_DESC_ACCESS_COUNT: break;
1347     }
1348 
1349     pl_unreachable();
1350 }
1351 
1352 const struct pl_blend_params pl_alpha_overlay = {
1353     .src_rgb = PL_BLEND_SRC_ALPHA,
1354     .dst_rgb = PL_BLEND_ONE_MINUS_SRC_ALPHA,
1355     .src_alpha = PL_BLEND_ONE,
1356     .dst_alpha = PL_BLEND_ONE_MINUS_SRC_ALPHA,
1357 };
1358 
pl_pass_create(pl_gpu gpu,const struct pl_pass_params * params)1359 pl_pass pl_pass_create(pl_gpu gpu, const struct pl_pass_params *params)
1360 {
1361     require(params->glsl_shader);
1362     switch(params->type) {
1363     case PL_PASS_RASTER:
1364         require(params->vertex_shader);
1365         for (int i = 0; i < params->num_vertex_attribs; i++) {
1366             struct pl_vertex_attrib va = params->vertex_attribs[i];
1367             require(va.name);
1368             require(va.fmt);
1369             require(va.fmt->caps & PL_FMT_CAP_VERTEX);
1370             require(va.offset + va.fmt->texel_size <= params->vertex_stride);
1371         }
1372 
1373         pl_fmt target_fmt = params->target_dummy.params.format;
1374         require(target_fmt);
1375         require(target_fmt->caps & PL_FMT_CAP_RENDERABLE);
1376         require(!params->blend_params || target_fmt->caps & PL_FMT_CAP_BLENDABLE);
1377         require(!params->blend_params || params->load_target);
1378         break;
1379     case PL_PASS_COMPUTE:
1380         require(gpu->glsl.compute);
1381         break;
1382     case PL_PASS_INVALID:
1383     case PL_PASS_TYPE_COUNT:
1384         pl_unreachable();
1385     }
1386 
1387     require(params->num_variables <= gpu->limits.max_variables);
1388     for (int i = 0; i < params->num_variables; i++) {
1389         struct pl_var var = params->variables[i];
1390         require(var.name);
1391         require(pl_var_glsl_type_name(var));
1392     }
1393 
1394     require(params->num_constants <= gpu->limits.max_constants);
1395     for (int i = 0; i < params->num_constants; i++)
1396         require(params->constants[i].type);
1397 
1398     for (int i = 0; i < params->num_descriptors; i++) {
1399         struct pl_desc desc = params->descriptors[i];
1400         require(desc.name);
1401 
1402         // enforce disjoint descriptor bindings for each namespace
1403         int namespace = pl_desc_namespace(gpu, desc.type);
1404         for (int j = i+1; j < params->num_descriptors; j++) {
1405             struct pl_desc other = params->descriptors[j];
1406             require(desc.binding != other.binding ||
1407                     namespace != pl_desc_namespace(gpu, other.type));
1408         }
1409     }
1410 
1411     require(params->push_constants_size <= gpu->limits.max_pushc_size);
1412     require(params->push_constants_size == PL_ALIGN2(params->push_constants_size, 4));
1413 
1414     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1415     return impl->pass_create(gpu, params);
1416 
1417 error:
1418     return NULL;
1419 }
1420 
pl_pass_destroy(pl_gpu gpu,pl_pass * pass)1421 void pl_pass_destroy(pl_gpu gpu, pl_pass *pass)
1422 {
1423     if (!*pass)
1424         return;
1425 
1426     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1427     impl->pass_destroy(gpu, *pass);
1428     *pass = NULL;
1429 }
1430 
pl_pass_run(pl_gpu gpu,const struct pl_pass_run_params * params)1431 void pl_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params)
1432 {
1433     pl_pass pass = params->pass;
1434     struct pl_pass_run_params new = *params;
1435 
1436     for (int i = 0; i < pass->params.num_descriptors; i++) {
1437         struct pl_desc desc = pass->params.descriptors[i];
1438         struct pl_desc_binding db = params->desc_bindings[i];
1439         require(db.object);
1440         switch (desc.type) {
1441         case PL_DESC_SAMPLED_TEX: {
1442             pl_tex tex = db.object;
1443             pl_fmt fmt = tex->params.format;
1444             require(tex->params.sampleable);
1445             require(db.sample_mode != PL_TEX_SAMPLE_LINEAR || (fmt->caps & PL_FMT_CAP_LINEAR));
1446             break;
1447         }
1448         case PL_DESC_STORAGE_IMG: {
1449             pl_tex tex = db.object;
1450             pl_fmt fmt = tex->params.format;
1451             require(tex->params.storable);
1452             require(desc.access != PL_DESC_ACCESS_READWRITE || (fmt->caps & PL_FMT_CAP_READWRITE));
1453             break;
1454         }
1455         case PL_DESC_BUF_UNIFORM: {
1456             pl_buf buf = db.object;
1457             require(buf->params.uniform);
1458             break;
1459         }
1460         case PL_DESC_BUF_STORAGE: {
1461             pl_buf buf = db.object;
1462             require(buf->params.storable);
1463             break;
1464         }
1465         case PL_DESC_BUF_TEXEL_UNIFORM: {
1466             pl_buf buf = db.object;
1467             require(buf->params.uniform && buf->params.format);
1468             break;
1469         }
1470         case PL_DESC_BUF_TEXEL_STORAGE: {
1471             pl_buf buf = db.object;
1472             pl_fmt fmt = buf->params.format;
1473             require(buf->params.storable && buf->params.format);
1474             require(desc.access != PL_DESC_ACCESS_READWRITE || (fmt->caps & PL_FMT_CAP_READWRITE));
1475             break;
1476         }
1477         case PL_DESC_INVALID:
1478         case PL_DESC_TYPE_COUNT:
1479             pl_unreachable();
1480         }
1481     }
1482 
1483     for (int i = 0; i < params->num_var_updates; i++) {
1484         struct pl_var_update vu = params->var_updates[i];
1485         require(vu.index >= 0 && vu.index < pass->params.num_variables);
1486         require(vu.data);
1487     }
1488 
1489     require(params->push_constants || !pass->params.push_constants_size);
1490 
1491     switch (pass->params.type) {
1492     case PL_PASS_RASTER: {
1493         switch (pass->params.vertex_type) {
1494         case PL_PRIM_TRIANGLE_LIST:
1495             require(params->vertex_count % 3 == 0);
1496             // fall through
1497         case PL_PRIM_TRIANGLE_STRIP:
1498             require(params->vertex_count >= 3);
1499             break;
1500         case PL_PRIM_TYPE_COUNT:
1501             pl_unreachable();
1502         }
1503 
1504         require(!params->vertex_data ^ !params->vertex_buf);
1505         if (params->vertex_buf) {
1506             pl_buf vertex_buf = params->vertex_buf;
1507             require(vertex_buf->params.drawable);
1508             if (!params->index_data && !params->index_buf) {
1509                 // Cannot bounds check indexed draws
1510                 size_t vert_size = params->vertex_count * pass->params.vertex_stride;
1511                 require(params->buf_offset + vert_size <= vertex_buf->params.size);
1512             }
1513         }
1514 
1515         require(!params->index_data || !params->index_buf);
1516         if (params->index_buf) {
1517             pl_buf index_buf = params->index_buf;
1518             require(!params->vertex_data);
1519             require(index_buf->params.drawable);
1520             size_t index_size = params->vertex_count * sizeof(*params->index_data);
1521             require(params->index_offset + index_size <= index_buf->params.size);
1522         }
1523 
1524         pl_tex target = params->target;
1525         require(target);
1526         require(pl_tex_params_dimension(target->params) == 2);
1527         require(target->params.format == pass->params.target_dummy.params.format);
1528         require(target->params.renderable);
1529         struct pl_rect2d *vp = &new.viewport;
1530         struct pl_rect2d *sc = &new.scissors;
1531 
1532         // Sanitize viewport/scissors
1533         if (!vp->x0 && !vp->x1)
1534             vp->x1 = target->params.w;
1535         if (!vp->y0 && !vp->y1)
1536             vp->y1 = target->params.h;
1537 
1538         if (!sc->x0 && !sc->x1)
1539             sc->x1 = target->params.w;
1540         if (!sc->y0 && !sc->y1)
1541             sc->y1 = target->params.h;
1542 
1543         // Constrain the scissors to the target dimension (to sanitize the
1544         // underlying graphics API calls)
1545         sc->x0 = PL_CLAMP(sc->x0, 0, target->params.w);
1546         sc->y0 = PL_CLAMP(sc->y0, 0, target->params.h);
1547         sc->x1 = PL_CLAMP(sc->x1, 0, target->params.w);
1548         sc->y1 = PL_CLAMP(sc->y1, 0, target->params.h);
1549 
1550         // Scissors wholly outside target -> silently drop pass (also needed
1551         // to ensure we don't cause UB by specifying invalid scissors)
1552         if (!pl_rect_w(*sc) || !pl_rect_h(*sc))
1553             return;
1554 
1555         require(pl_rect_w(*vp) > 0);
1556         require(pl_rect_h(*vp) > 0);
1557         require(pl_rect_w(*sc) > 0);
1558         require(pl_rect_h(*sc) > 0);
1559 
1560         if (!pass->params.load_target)
1561             pl_tex_invalidate(gpu, target);
1562         break;
1563     }
1564     case PL_PASS_COMPUTE:
1565         for (int i = 0; i < PL_ARRAY_SIZE(params->compute_groups); i++) {
1566             require(params->compute_groups[i] >= 0);
1567             require(params->compute_groups[i] <= gpu->limits.max_dispatch[i]);
1568         }
1569         break;
1570     case PL_PASS_INVALID:
1571     case PL_PASS_TYPE_COUNT:
1572         pl_unreachable();
1573     }
1574 
1575     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1576     impl->pass_run(gpu, &new);
1577 
1578 error:
1579     return;
1580 }
1581 
pl_gpu_flush(pl_gpu gpu)1582 void pl_gpu_flush(pl_gpu gpu)
1583 {
1584     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1585     if (impl->gpu_flush)
1586         impl->gpu_flush(gpu);
1587 }
1588 
pl_gpu_finish(pl_gpu gpu)1589 void pl_gpu_finish(pl_gpu gpu)
1590 {
1591     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1592     impl->gpu_finish(gpu);
1593 }
1594 
pl_gpu_is_failed(pl_gpu gpu)1595 bool pl_gpu_is_failed(pl_gpu gpu)
1596 {
1597     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
1598     if (!impl->gpu_is_failed)
1599         return false;
1600 
1601     return impl->gpu_is_failed(gpu);
1602 }
1603 
1604 // GPU-internal helpers
1605 
pl_tex_upload_pbo(pl_gpu gpu,const struct pl_tex_transfer_params * params)1606 bool pl_tex_upload_pbo(pl_gpu gpu, const struct pl_tex_transfer_params *params)
1607 {
1608     if (params->buf)
1609         return pl_tex_upload(gpu, params);
1610 
1611     pl_buf buf = NULL;
1612     struct pl_buf_params bufparams = {
1613         .size = pl_tex_transfer_size(params),
1614     };
1615 
1616     // If we can import host pointers directly, and the function is being used
1617     // asynchronously, then we can use host pointer import to skip a memcpy. In
1618     // the synchronous case, we still force a host memcpy to avoid stalling the
1619     // host until the GPU memcpy completes.
1620     bool can_import = gpu->import_caps.buf & PL_HANDLE_HOST_PTR;
1621     if (can_import && params->callback && bufparams.size > 32*1024) { // 32 KiB
1622         bufparams.import_handle = PL_HANDLE_HOST_PTR;
1623         bufparams.shared_mem = (struct pl_shared_mem) {
1624             .handle.ptr = params->ptr,
1625             .size = bufparams.size,
1626             .offset = 0,
1627         };
1628 
1629         // Suppress errors for this test because it may fail, in which case we
1630         // want to silently fall back.
1631         pl_log_level_cap(gpu->log, PL_LOG_DEBUG);
1632         buf = pl_buf_create(gpu, &bufparams);
1633         pl_log_level_cap(gpu->log, PL_LOG_NONE);
1634     }
1635 
1636     if (!buf) {
1637         bufparams.import_handle = 0;
1638         bufparams.host_writable = true;
1639         buf = pl_buf_create(gpu, &bufparams);
1640     }
1641 
1642     if (!buf)
1643         return false;
1644 
1645     if (!bufparams.import_handle)
1646         pl_buf_write(gpu, buf, 0, params->ptr, buf->params.size);
1647 
1648     struct pl_tex_transfer_params newparams = *params;
1649     newparams.buf = buf;
1650     newparams.ptr = NULL;
1651 
1652     bool ok = pl_tex_upload(gpu, &newparams);
1653     pl_buf_destroy(gpu, &buf);
1654     return ok;
1655 }
1656 
1657 struct pbo_cb_ctx {
1658     pl_gpu gpu;
1659     pl_buf buf;
1660     void *ptr;
1661     void (*callback)(void *priv);
1662     void *priv;
1663 };
1664 
pbo_download_cb(void * priv)1665 static void pbo_download_cb(void *priv)
1666 {
1667     struct pbo_cb_ctx *p = priv;
1668     pl_buf_read(p->gpu, p->buf, 0, p->ptr, p->buf->params.size);
1669     pl_buf_destroy(p->gpu, &p->buf);
1670 
1671     // Run the original callback
1672     p->callback(p->priv);
1673     pl_free(priv);
1674 };
1675 
pl_tex_download_pbo(pl_gpu gpu,const struct pl_tex_transfer_params * params)1676 bool pl_tex_download_pbo(pl_gpu gpu, const struct pl_tex_transfer_params *params)
1677 {
1678     if (params->buf)
1679         return pl_tex_download(gpu, params);
1680 
1681     pl_buf buf = NULL;
1682     struct pl_buf_params bufparams = {
1683         .size = pl_tex_transfer_size(params),
1684     };
1685 
1686     // If we can import host pointers directly, we can avoid an extra memcpy
1687     // (sometimes). In the cases where it isn't avoidable, the extra memcpy
1688     // will happen inside VRAM, which is typically faster anyway.
1689     bool can_import = gpu->import_caps.buf & PL_HANDLE_HOST_PTR;
1690     if (can_import && bufparams.size > 32*1024) { // 32 KiB
1691         bufparams.import_handle = PL_HANDLE_HOST_PTR;
1692         bufparams.shared_mem = (struct pl_shared_mem) {
1693             .handle.ptr = params->ptr,
1694             .size = bufparams.size,
1695             .offset = 0,
1696         };
1697 
1698         // Suppress errors for this test because it may fail, in which case we
1699         // want to silently fall back.
1700         pl_log_level_cap(gpu->log, PL_LOG_DEBUG);
1701         buf = pl_buf_create(gpu, &bufparams);
1702         pl_log_level_cap(gpu->log, PL_LOG_NONE);
1703     }
1704 
1705     if (!buf) {
1706         // Fallback when host pointer import is not supported
1707         bufparams.import_handle = 0;
1708         bufparams.host_readable = true;
1709         buf = pl_buf_create(gpu, &bufparams);
1710     }
1711 
1712     if (!buf)
1713         return false;
1714 
1715     struct pl_tex_transfer_params newparams = *params;
1716     newparams.ptr = NULL;
1717     newparams.buf = buf;
1718 
1719     // If the transfer is asynchronous, propagate our host read asynchronously
1720     if (params->callback && !bufparams.import_handle) {
1721         newparams.callback = pbo_download_cb;
1722         newparams.priv = pl_alloc_struct(NULL, struct pbo_cb_ctx, {
1723             .gpu = gpu,
1724             .buf = buf,
1725             .ptr = params->ptr,
1726             .callback = params->callback,
1727             .priv = params->priv,
1728         });
1729     }
1730 
1731     if (!pl_tex_download(gpu, &newparams)) {
1732         pl_buf_destroy(gpu, &buf);
1733         return false;
1734     }
1735 
1736     if (!params->callback) {
1737         while (pl_buf_poll(gpu, buf, 10000000)) // 10 ms
1738             PL_TRACE(gpu, "pl_tex_download: synchronous/blocking (slow path)");
1739     }
1740 
1741     bool ok;
1742     if (bufparams.import_handle) {
1743         // Buffer download completion already means the host pointer contains
1744         // the valid data, no more need to copy. (Note: this applies even for
1745         // asynchronous downloads)
1746         ok = true;
1747         pl_buf_destroy(gpu, &buf);
1748     } else if (!params->callback) {
1749         // Synchronous read back to the host pointer
1750         ok = pl_buf_read(gpu, buf, 0, params->ptr, bufparams.size);
1751         pl_buf_destroy(gpu, &buf);
1752     } else {
1753         // Nothing left to do here, the rest will be done by pbo_download_cb
1754         ok = true;
1755     }
1756 
1757     return ok;
1758 }
1759 
pl_tex_upload_texel(pl_gpu gpu,pl_dispatch dp,const struct pl_tex_transfer_params * params)1760 bool pl_tex_upload_texel(pl_gpu gpu, pl_dispatch dp,
1761                          const struct pl_tex_transfer_params *params)
1762 {
1763     const int threads = PL_MIN(256, pl_rect_w(params->rc));
1764     pl_tex tex = params->tex;
1765     pl_fmt fmt = tex->params.format;
1766     require(params->buf);
1767 
1768     pl_shader sh = pl_dispatch_begin(dp);
1769     if (!sh_try_compute(sh, threads, 1, false, 0)) {
1770         PL_ERR(gpu, "Failed emulating texture transfer!");
1771         pl_dispatch_abort(dp, &sh);
1772         return false;
1773     }
1774 
1775     bool ubo = params->buf->params.uniform;
1776     ident_t buf = sh_desc(sh, (struct pl_shader_desc) {
1777         .binding.object = params->buf,
1778         .desc = {
1779             .name = "data",
1780             .type = ubo ? PL_DESC_BUF_TEXEL_UNIFORM : PL_DESC_BUF_TEXEL_STORAGE,
1781         },
1782     });
1783 
1784     ident_t img = sh_desc(sh, (struct pl_shader_desc) {
1785         .binding.object = params->tex,
1786         .desc = {
1787             .name = "image",
1788             .type = PL_DESC_STORAGE_IMG,
1789             .access = PL_DESC_ACCESS_WRITEONLY,
1790         },
1791     });
1792 
1793     // If the transfer width is a natural multiple of the thread size, we
1794     // can skip the bounds check. Otherwise, make sure we aren't blitting out
1795     // of the range since this would read out of bounds.
1796     int groups_x = (pl_rect_w(params->rc) + threads - 1) / threads;
1797     if (groups_x * threads != pl_rect_w(params->rc)) {
1798         GLSL("if (gl_GlobalInvocationID.x >= %d) \n"
1799              "    return;                        \n",
1800              pl_rect_w(params->rc));
1801     }
1802 
1803     GLSL("vec4 color = vec4(0.0);                                       \n"
1804          "ivec3 pos = ivec3(gl_GlobalInvocationID) + ivec3(%d, %d, %d); \n"
1805          "int base = ((pos.z * %d + pos.y) * %d + pos.x) * %d;          \n",
1806          params->rc.x0, params->rc.y0, params->rc.z0,
1807          params->stride_h, params->stride_w, fmt->num_components);
1808 
1809     for (int i = 0; i < fmt->num_components; i++) {
1810         GLSL("color[%d] = %s(%s, base + %d).r; \n",
1811              i, ubo ? "texelFetch" : "imageLoad", buf, i);
1812     }
1813 
1814     int dims = pl_tex_params_dimension(tex->params);
1815     static const char *coord_types[] = {
1816         [1] = "int",
1817         [2] = "ivec2",
1818         [3] = "ivec3",
1819     };
1820 
1821     GLSL("imageStore(%s, %s(pos), color);\n", img, coord_types[dims]);
1822     return pl_dispatch_compute(dp, &(struct pl_dispatch_compute_params) {
1823         .shader = &sh,
1824         .dispatch_size = {
1825             groups_x,
1826             pl_rect_h(params->rc),
1827             pl_rect_d(params->rc),
1828         },
1829     });
1830 
1831 error:
1832     return false;
1833 }
1834 
pl_tex_download_texel(pl_gpu gpu,pl_dispatch dp,const struct pl_tex_transfer_params * params)1835 bool pl_tex_download_texel(pl_gpu gpu, pl_dispatch dp,
1836                            const struct pl_tex_transfer_params *params)
1837 {
1838     const int threads = PL_MIN(256, pl_rect_w(params->rc));
1839     pl_tex tex = params->tex;
1840     pl_fmt fmt = tex->params.format;
1841     require(params->buf);
1842 
1843     pl_shader sh = pl_dispatch_begin(dp);
1844     if (!sh_try_compute(sh, threads, 1, false, 0)) {
1845         PL_ERR(gpu, "Failed emulating texture transfer!");
1846         pl_dispatch_abort(dp, &sh);
1847         return false;
1848     }
1849 
1850     ident_t buf = sh_desc(sh, (struct pl_shader_desc) {
1851         .binding.object = params->buf,
1852         .desc = {
1853             .name = "data",
1854             .type = PL_DESC_BUF_TEXEL_STORAGE,
1855         },
1856     });
1857 
1858     ident_t img = sh_desc(sh, (struct pl_shader_desc) {
1859         .binding.object = params->tex,
1860         .desc = {
1861             .name = "image",
1862             .type = PL_DESC_STORAGE_IMG,
1863             .access = PL_DESC_ACCESS_READONLY,
1864         },
1865     });
1866 
1867     int groups_x = (pl_rect_w(params->rc) + threads - 1) / threads;
1868     if (groups_x * threads != pl_rect_w(params->rc)) {
1869         GLSL("if (gl_GlobalInvocationID.x >= %d) \n"
1870              "    return;                        \n",
1871              pl_rect_w(params->rc));
1872     }
1873 
1874     int dims = pl_tex_params_dimension(tex->params);
1875     static const char *coord_types[] = {
1876         [1] = "int",
1877         [2] = "ivec2",
1878         [3] = "ivec3",
1879     };
1880 
1881     GLSL("ivec3 pos = ivec3(gl_GlobalInvocationID) + ivec3(%d, %d, %d); \n"
1882          "int base = ((pos.z * %d + pos.y) * %d + pos.x) * %d;          \n"
1883          "vec4 color = imageLoad(%s, %s(pos));                          \n",
1884          params->rc.x0, params->rc.y0, params->rc.z0,
1885          params->stride_h, params->stride_w, fmt->num_components,
1886          img, coord_types[dims]);
1887 
1888     for (int i = 0; i < fmt->num_components; i++)
1889         GLSL("imageStore(%s, base + %d, vec4(color[%d])); \n", buf, i, i);
1890 
1891     return pl_dispatch_compute(dp, &(struct pl_dispatch_compute_params) {
1892         .shader = &sh,
1893         .dispatch_size = {
1894             groups_x,
1895             pl_rect_h(params->rc),
1896             pl_rect_d(params->rc),
1897         },
1898     });
1899 
1900 error:
1901     return false;
1902 }
1903 
pl_tex_blit_compute(pl_gpu gpu,pl_dispatch dp,const struct pl_tex_blit_params * params)1904 bool pl_tex_blit_compute(pl_gpu gpu, pl_dispatch dp,
1905                          const struct pl_tex_blit_params *params)
1906 {
1907     if (!params->src->params.storable || !params->dst->params.storable)
1908         return false;
1909 
1910     // Normalize `dst_rc`, moving all flipping to `src_rc` instead.
1911     struct pl_rect3d src_rc = params->src_rc;
1912     struct pl_rect3d dst_rc = params->dst_rc;
1913     if (pl_rect_w(dst_rc) < 0) {
1914         PL_SWAP(src_rc.x0, src_rc.x1);
1915         PL_SWAP(dst_rc.x0, dst_rc.x1);
1916     }
1917     if (pl_rect_h(dst_rc) < 0) {
1918         PL_SWAP(src_rc.y0, src_rc.y1);
1919         PL_SWAP(dst_rc.y0, dst_rc.y1);
1920     }
1921     if (pl_rect_d(dst_rc) < 0) {
1922         PL_SWAP(src_rc.z0, src_rc.z1);
1923         PL_SWAP(dst_rc.z0, dst_rc.z1);
1924     }
1925 
1926     bool needs_scaling = false;
1927     needs_scaling |= pl_rect_w(dst_rc) != abs(pl_rect_w(src_rc));
1928     needs_scaling |= pl_rect_h(dst_rc) != abs(pl_rect_h(src_rc));
1929     needs_scaling |= pl_rect_d(dst_rc) != abs(pl_rect_d(src_rc));
1930 
1931     // Manual trilinear interpolation would be too slow to justify
1932     bool needs_sampling = needs_scaling && params->sample_mode != PL_TEX_SAMPLE_NEAREST;
1933     if (needs_sampling && !params->src->params.sampleable)
1934         return false;
1935 
1936     const int threads = 256;
1937     int bw = PL_MIN(32, pl_rect_w(dst_rc));
1938     int bh = PL_MIN(threads / bw, pl_rect_h(dst_rc));
1939     pl_shader sh = pl_dispatch_begin(dp);
1940     if (!sh_try_compute(sh, bw, bh, false, 0)) {
1941         pl_dispatch_abort(dp, &sh);
1942         return false;
1943     }
1944 
1945     // Avoid over-writing into `dst`
1946     int groups_x = (pl_rect_w(dst_rc) + bw - 1) / bw;
1947     if (groups_x * bw != pl_rect_w(dst_rc)) {
1948         GLSL("if (gl_GlobalInvocationID.x >= %d) \n"
1949              "    return;                        \n",
1950              pl_rect_w(dst_rc));
1951     }
1952 
1953     int groups_y = (pl_rect_h(dst_rc) + bh - 1) / bh;
1954     if (groups_y * bh != pl_rect_h(dst_rc)) {
1955         GLSL("if (gl_GlobalInvocationID.y >= %d) \n"
1956              "    return;                        \n",
1957              pl_rect_h(dst_rc));
1958     }
1959 
1960     ident_t dst = sh_desc(sh, (struct pl_shader_desc) {
1961         .binding.object = params->dst,
1962         .desc = {
1963             .name   = "dst",
1964             .type   = PL_DESC_STORAGE_IMG,
1965             .access = PL_DESC_ACCESS_WRITEONLY,
1966         },
1967     });
1968 
1969     static const char *vecs[] = {
1970         [1] = "float",
1971         [2] = "vec2",
1972         [3] = "vec3",
1973         [4] = "vec4",
1974     };
1975 
1976     static const char *ivecs[] = {
1977         [1] = "int",
1978         [2] = "ivec2",
1979         [3] = "ivec3",
1980         [4] = "ivec4",
1981     };
1982 
1983     int src_dims = pl_tex_params_dimension(params->src->params);
1984     int dst_dims = pl_tex_params_dimension(params->dst->params);
1985     GLSL("const ivec3 pos = ivec3(gl_GlobalInvocationID);   \n"
1986          "%s dst_pos = %s(pos + ivec3(%d, %d, %d)); \n",
1987          ivecs[dst_dims], ivecs[dst_dims],
1988          params->dst_rc.x0, params->dst_rc.y0, params->dst_rc.z0);
1989 
1990     if (needs_sampling || (needs_scaling && params->src->params.sampleable)) {
1991 
1992         ident_t src = sh_desc(sh, (struct pl_shader_desc) {
1993             .desc = {
1994                 .name = "src",
1995                 .type = PL_DESC_SAMPLED_TEX,
1996             },
1997             .binding = {
1998                 .object = params->src,
1999                 .address_mode = PL_TEX_ADDRESS_CLAMP,
2000                 .sample_mode = params->sample_mode,
2001             }
2002         });
2003 
2004         GLSL("vec3 fpos = (vec3(pos) + vec3(0.5)) / vec3(%d.0, %d.0, %d.0); \n"
2005              "%s src_pos = %s(0.5);                                         \n"
2006              "src_pos.x = mix(%f, %f, fpos.x);                              \n",
2007              pl_rect_w(dst_rc), pl_rect_h(dst_rc), pl_rect_d(dst_rc),
2008              vecs[src_dims], vecs[src_dims],
2009              (float) src_rc.x0 / params->src->params.w,
2010              (float) src_rc.x1 / params->src->params.w);
2011 
2012         if (params->src->params.h) {
2013             GLSL("src_pos.y = mix(%f, %f, fpos.y); \n",
2014                  (float) src_rc.y0 / params->src->params.h,
2015                  (float) src_rc.y1 / params->src->params.h);
2016         }
2017 
2018         if (params->src->params.d) {
2019             GLSL("src_pos.z = mix(%f, %f, fpos.z); \n",
2020                  (float) src_rc.z0 / params->src->params.d,
2021                  (float) src_rc.z1 / params->src->params.d);
2022         }
2023 
2024         GLSL("imageStore(%s, dst_pos, %s(%s, src_pos)); \n",
2025              dst, sh_tex_fn(sh, params->src->params), src);
2026 
2027     } else {
2028 
2029         ident_t src = sh_desc(sh, (struct pl_shader_desc) {
2030             .binding.object = params->src,
2031             .desc = {
2032                 .name   = "src",
2033                 .type   = PL_DESC_STORAGE_IMG,
2034                 .access = PL_DESC_ACCESS_READONLY,
2035             },
2036         });
2037 
2038         if (needs_scaling) {
2039             GLSL("ivec3 src_pos = ivec3(round(vec3(%f, %f, %f) * vec3(pos))); \n",
2040                  fabs((float) pl_rect_w(src_rc) / pl_rect_w(dst_rc)),
2041                  fabs((float) pl_rect_h(src_rc) / pl_rect_h(dst_rc)),
2042                  fabs((float) pl_rect_d(src_rc) / pl_rect_d(dst_rc)));
2043         } else {
2044             GLSL("ivec3 src_pos = pos; \n");
2045         }
2046 
2047         GLSL("src_pos = ivec3(%d, %d, %d) * src_pos + ivec3(%d, %d, %d);    \n"
2048              "imageStore(%s, dst_pos, imageLoad(%s, %s(src_pos)));          \n",
2049              src_rc.x1 < src_rc.x0 ? -1 : 1,
2050              src_rc.y1 < src_rc.y0 ? -1 : 1,
2051              src_rc.z1 < src_rc.z0 ? -1 : 1,
2052              src_rc.x0, src_rc.y0, src_rc.z0,
2053              dst, src, ivecs[src_dims]);
2054 
2055     }
2056 
2057     return pl_dispatch_compute(dp, &(struct pl_dispatch_compute_params) {
2058         .shader = &sh,
2059         .dispatch_size = {
2060             groups_x,
2061             groups_y,
2062             pl_rect_d(dst_rc),
2063         },
2064     });
2065 }
2066 
pl_tex_blit_raster(pl_gpu gpu,pl_dispatch dp,const struct pl_tex_blit_params * params)2067 void pl_tex_blit_raster(pl_gpu gpu, pl_dispatch dp,
2068                         const struct pl_tex_blit_params *params)
2069 {
2070     enum pl_fmt_type src_type = params->src->params.format->type;
2071     enum pl_fmt_type dst_type = params->dst->params.format->type;
2072 
2073     // Only for 2D textures
2074     pl_assert(params->src->params.h && !params->src->params.d);
2075     pl_assert(params->dst->params.h && !params->dst->params.d);
2076 
2077     // Integer textures are not supported
2078     pl_assert(src_type != PL_FMT_UINT && src_type != PL_FMT_SINT);
2079     pl_assert(dst_type != PL_FMT_UINT && dst_type != PL_FMT_SINT);
2080 
2081     struct pl_rect2df src_rc = {
2082         .x0 = params->src_rc.x0, .x1 = params->src_rc.x1,
2083         .y0 = params->src_rc.y0, .y1 = params->src_rc.y1,
2084     };
2085     struct pl_rect2d dst_rc = {
2086         .x0 = params->dst_rc.x0, .x1 = params->dst_rc.x1,
2087         .y0 = params->dst_rc.y0, .y1 = params->dst_rc.y1,
2088     };
2089 
2090     pl_shader sh = pl_dispatch_begin(dp);
2091     sh->res.output = PL_SHADER_SIG_COLOR;
2092 
2093     ident_t pos, src = sh_bind(sh, params->src, PL_TEX_ADDRESS_CLAMP,
2094         params->sample_mode, "src_tex", &src_rc, &pos, NULL, NULL);
2095 
2096     GLSL("vec4 color = %s(%s, %s); \n",
2097          sh_tex_fn(sh, params->src->params), src, pos);
2098 
2099     pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
2100         .shader = &sh,
2101         .target = params->dst,
2102         .rect = dst_rc,
2103     });
2104 }
2105 
pl_pass_run_vbo(pl_gpu gpu,const struct pl_pass_run_params * params)2106 void pl_pass_run_vbo(pl_gpu gpu, const struct pl_pass_run_params *params)
2107 {
2108     if (!params->vertex_data && !params->index_data)
2109         return pl_pass_run(gpu, params);
2110 
2111     struct pl_pass_run_params newparams = *params;
2112     pl_buf vert = NULL, index = NULL;
2113 
2114     if (params->vertex_data) {
2115         int num_vertices = 0;
2116         if (params->index_data) {
2117             // Indexed draw, so we need to store all indexed vertices
2118             for (int i = 0; i < params->vertex_count; i++)
2119                 num_vertices = PL_MAX(num_vertices, params->index_data[i]);
2120             num_vertices += 1;
2121         } else {
2122             num_vertices = params->vertex_count;
2123         }
2124 
2125         vert = pl_buf_create(gpu, &(struct pl_buf_params) {
2126             .size = num_vertices * params->pass->params.vertex_stride,
2127             .initial_data = params->vertex_data,
2128             .drawable = true,
2129         });
2130 
2131         if (!vert) {
2132             PL_ERR(gpu, "Failed allocating vertex buffer!");
2133             return;
2134         }
2135 
2136         newparams.vertex_buf = vert;
2137         newparams.vertex_data = NULL;
2138     }
2139 
2140     if (params->index_data) {
2141         index = pl_buf_create(gpu, &(struct pl_buf_params) {
2142             .size = params->vertex_count * sizeof(*params->index_data),
2143             .initial_data = params->index_data,
2144             .drawable = true,
2145         });
2146 
2147         if (!index) {
2148             PL_ERR(gpu, "Failed allocating index buffer!");
2149             return;
2150         }
2151 
2152         newparams.index_buf = index;
2153         newparams.index_data = NULL;
2154     }
2155 
2156     pl_pass_run(gpu, &newparams);
2157     pl_buf_destroy(gpu, &vert);
2158     pl_buf_destroy(gpu, &index);
2159 }
2160 
pl_pass_params_copy(void * alloc,const struct pl_pass_params * params)2161 struct pl_pass_params pl_pass_params_copy(void *alloc, const struct pl_pass_params *params)
2162 {
2163     struct pl_pass_params new = *params;
2164     new.cached_program = NULL;
2165     new.cached_program_len = 0;
2166 
2167     new.glsl_shader = pl_str0dup0(alloc, new.glsl_shader);
2168     new.vertex_shader = pl_str0dup0(alloc, new.vertex_shader);
2169     if (new.blend_params)
2170         new.blend_params = pl_memdup_ptr(alloc, new.blend_params);
2171 
2172 #define DUPNAMES(field)                                                 \
2173     do {                                                                \
2174         size_t _size = new.num_##field * sizeof(new.field[0]);          \
2175         new.field = pl_memdup(alloc, new.field, _size);                 \
2176         for (int j = 0; j < new.num_##field; j++)                       \
2177             new.field[j].name = pl_str0dup0(alloc, new.field[j].name);  \
2178     } while (0)
2179 
2180     DUPNAMES(variables);
2181     DUPNAMES(descriptors);
2182     DUPNAMES(vertex_attribs);
2183 
2184 #undef DUPNAMES
2185 
2186     new.constant_data = NULL;
2187     new.constants = pl_memdup(alloc, new.constants,
2188                               new.num_constants * sizeof(new.constants[0]));
2189 
2190     return new;
2191 }
2192 
pl_sync_create(pl_gpu gpu,enum pl_handle_type handle_type)2193 pl_sync pl_sync_create(pl_gpu gpu, enum pl_handle_type handle_type)
2194 {
2195     require(handle_type);
2196     require(handle_type & gpu->export_caps.sync);
2197     require(PL_ISPOT(handle_type));
2198 
2199     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
2200     return impl->sync_create(gpu, handle_type);
2201 
2202 error:
2203     return NULL;
2204 }
2205 
pl_sync_destroy(pl_gpu gpu,pl_sync * sync)2206 void pl_sync_destroy(pl_gpu gpu, pl_sync *sync)
2207 {
2208     if (!*sync)
2209         return;
2210 
2211     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
2212     impl->sync_destroy(gpu, *sync);
2213     *sync = NULL;
2214 }
2215 
pl_tex_export(pl_gpu gpu,pl_tex tex,pl_sync sync)2216 bool pl_tex_export(pl_gpu gpu, pl_tex tex, pl_sync sync)
2217 {
2218     require(tex->params.import_handle || tex->params.export_handle);
2219 
2220     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
2221     return impl->tex_export(gpu, tex, sync);
2222 
2223 error:
2224     return false;
2225 }
2226 
pl_timer_create(pl_gpu gpu)2227 pl_timer pl_timer_create(pl_gpu gpu)
2228 {
2229     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
2230     if (!impl->timer_create)
2231         return NULL;
2232 
2233     return impl->timer_create(gpu);
2234 }
2235 
pl_timer_destroy(pl_gpu gpu,pl_timer * timer)2236 void pl_timer_destroy(pl_gpu gpu, pl_timer *timer)
2237 {
2238     if (!*timer)
2239         return;
2240 
2241     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
2242     impl->timer_destroy(gpu, *timer);
2243     *timer = NULL;
2244 }
2245 
pl_timer_query(pl_gpu gpu,pl_timer timer)2246 uint64_t pl_timer_query(pl_gpu gpu, pl_timer timer)
2247 {
2248     if (!timer)
2249         return 0;
2250 
2251     const struct pl_gpu_fns *impl = PL_PRIV(gpu);
2252     return impl->timer_query(gpu, timer);
2253 }
2254 
print_uuid(char buf[3* UUID_SIZE],const uint8_t uuid[UUID_SIZE])2255 const char *print_uuid(char buf[3 * UUID_SIZE], const uint8_t uuid[UUID_SIZE])
2256 {
2257     static const char *hexdigits = "0123456789ABCDEF";
2258     for (int i = 0; i < UUID_SIZE; i++) {
2259         uint8_t x = uuid[i];
2260         buf[3 * i + 0] = hexdigits[x >> 4];
2261         buf[3 * i + 1] = hexdigits[x & 0xF];
2262         buf[3 * i + 2] = i == UUID_SIZE - 1 ? '\0' : ':';
2263     }
2264 
2265     return buf;
2266 }
2267 
print_drm_mod(char buf[DRM_MOD_SIZE],uint64_t mod)2268 const char *print_drm_mod(char buf[DRM_MOD_SIZE], uint64_t mod)
2269 {
2270     switch (mod) {
2271     case DRM_FORMAT_MOD_LINEAR: return "LINEAR";
2272     case DRM_FORMAT_MOD_INVALID: return "INVALID";
2273     }
2274 
2275     uint8_t vendor = mod >> 56;
2276     uint64_t val = mod & ((1ULL << 56) - 1);
2277 
2278     const char *name = NULL;
2279     switch (vendor) {
2280     case 0x00: name = "NONE"; break;
2281     case 0x01: name = "INTEL"; break;
2282     case 0x02: name = "AMD"; break;
2283     case 0x03: name = "NVIDIA"; break;
2284     case 0x04: name = "SAMSUNG"; break;
2285     case 0x08: name = "ARM"; break;
2286     }
2287 
2288     if (name) {
2289         snprintf(buf, DRM_MOD_SIZE, "%s 0x%"PRIx64, name, val);
2290     } else {
2291         snprintf(buf, DRM_MOD_SIZE, "0x%02x 0x%"PRIx64, vendor, val);
2292     }
2293 
2294     return buf;
2295 }
2296