1 #include "tests.h"
2 #include "shaders.h"
3 
pl_buffer_tests(pl_gpu gpu)4 static void pl_buffer_tests(pl_gpu gpu)
5 {
6     const size_t buf_size = 1024;
7     if (buf_size > gpu->limits.max_buf_size)
8         return;
9 
10     uint8_t *test_src = malloc(buf_size * 2);
11     uint8_t *test_dst = test_src + buf_size;
12     assert(test_src && test_dst);
13     memset(test_dst, 0, buf_size);
14     for (int i = 0; i < buf_size; i++)
15         test_src[i] = (RANDOM * 256);
16 
17     pl_buf buf = NULL, tbuf = NULL;
18 
19     printf("test buffer static creation and readback\n");
20     buf = pl_buf_create(gpu, &(struct pl_buf_params) {
21         .size = buf_size,
22         .host_readable = true,
23         .initial_data = test_src,
24     });
25 
26     REQUIRE(buf);
27     REQUIRE(pl_buf_read(gpu, buf, 0, test_dst, buf_size));
28     REQUIRE(memcmp(test_src, test_dst, buf_size) == 0);
29     pl_buf_destroy(gpu, &buf);
30 
31     printf("test buffer empty creation, update and readback\n");
32     memset(test_dst, 0, buf_size);
33     buf = pl_buf_create(gpu, &(struct pl_buf_params) {
34         .size = buf_size,
35         .host_writable = true,
36         .host_readable = true,
37     });
38 
39     REQUIRE(buf);
40     pl_buf_write(gpu, buf, 0, test_src, buf_size);
41     REQUIRE(pl_buf_read(gpu, buf, 0, test_dst, buf_size));
42     REQUIRE(memcmp(test_src, test_dst, buf_size) == 0);
43     pl_buf_destroy(gpu, &buf);
44 
45     printf("test buffer-buffer copy and readback\n");
46     memset(test_dst, 0, buf_size);
47     buf = pl_buf_create(gpu, &(struct pl_buf_params) {
48         .size = buf_size,
49         .initial_data = test_src,
50     });
51 
52     tbuf = pl_buf_create(gpu, &(struct pl_buf_params) {
53         .size = buf_size,
54         .host_readable = true,
55     });
56 
57     REQUIRE(buf && tbuf);
58     pl_buf_copy(gpu, tbuf, 0, buf, 0, buf_size);
59     REQUIRE(pl_buf_read(gpu, tbuf, 0, test_dst, buf_size));
60     REQUIRE(memcmp(test_src, test_dst, buf_size) == 0);
61     pl_buf_destroy(gpu, &buf);
62     pl_buf_destroy(gpu, &tbuf);
63 
64     if (buf_size <= gpu->limits.max_mapped_size) {
65         printf("test host mapped buffer readback\n");
66         buf = pl_buf_create(gpu, &(struct pl_buf_params) {
67             .size = buf_size,
68             .host_mapped = true,
69             .initial_data = test_src,
70         });
71 
72         REQUIRE(buf);
73         REQUIRE(!pl_buf_poll(gpu, buf, 0));
74         REQUIRE(memcmp(test_src, buf->data, buf_size) == 0);
75         pl_buf_destroy(gpu, &buf);
76     }
77 
78     free(test_src);
79 }
80 
test_cb(void * priv)81 static void test_cb(void *priv)
82 {
83     bool *flag = priv;
84     *flag = true;
85 }
86 
pl_test_roundtrip(pl_gpu gpu,pl_tex tex[2],uint8_t * src,uint8_t * dst)87 static void pl_test_roundtrip(pl_gpu gpu, pl_tex tex[2],
88                               uint8_t *src, uint8_t *dst)
89 {
90     if (!tex[0] || !tex[1]) {
91         printf("failed creating test textures... skipping this test\n");
92         return;
93     }
94 
95     int texels = tex[0]->params.w;
96     texels *= tex[0]->params.h ? tex[0]->params.h : 1;
97     texels *= tex[0]->params.d ? tex[0]->params.d : 1;
98 
99     pl_fmt fmt = tex[0]->params.format;
100     size_t bytes = texels * fmt->texel_size;
101     memset(src, 0, bytes);
102     memset(dst, 0, bytes);
103 
104     for (size_t i = 0; i < bytes; i++)
105         src[i] = (RANDOM * 256);
106 
107     pl_timer ul, dl;
108     ul = pl_timer_create(gpu);
109     dl = pl_timer_create(gpu);
110 
111     bool ran_ul = false, ran_dl = false;
112 
113     REQUIRE(pl_tex_upload(gpu, &(struct pl_tex_transfer_params){
114         .tex = tex[0],
115         .ptr = src,
116         .timer = ul,
117         .callback = gpu->limits.callbacks ? test_cb : NULL,
118         .priv = &ran_ul,
119     }));
120 
121     // Test blitting, if possible for this format
122     pl_tex dst_tex = tex[0];
123     if (tex[0]->params.blit_src && tex[1]->params.blit_dst) {
124         pl_tex_clear_ex(gpu, tex[1], (union pl_clear_color){0}); // for testing
125         pl_tex_blit(gpu, &(struct pl_tex_blit_params) {
126             .src = tex[0],
127             .dst = tex[1],
128         });
129         dst_tex = tex[1];
130     }
131 
132     REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params){
133         .tex = dst_tex,
134         .ptr = dst,
135         .timer = dl,
136         .callback = gpu->limits.callbacks ? test_cb : NULL,
137         .priv = &ran_dl,
138     }));
139 
140     pl_gpu_finish(gpu);
141     if (gpu->limits.callbacks)
142         REQUIRE(ran_ul && ran_dl);
143 
144     if (fmt->emulated && fmt->type == PL_FMT_FLOAT) {
145         // TODO: can't memcmp here because bits might be lost due to the
146         // emulated 16/32 bit upload paths, figure out a better way to
147         // generate data and verify the roundtrip!
148     } else {
149         REQUIRE(memcmp(src, dst, bytes) == 0);
150     }
151 
152     // Report timer results
153     printf("upload time: %"PRIu64", download time: %"PRIu64"\n",
154            pl_timer_query(gpu, ul), pl_timer_query(gpu, dl));
155 
156     pl_timer_destroy(gpu, &ul);
157     pl_timer_destroy(gpu, &dl);
158 }
159 
pl_texture_tests(pl_gpu gpu)160 static void pl_texture_tests(pl_gpu gpu)
161 {
162     const size_t max_size = 16*16*16 * 4 *sizeof(double);
163     uint8_t *test_src = malloc(max_size * 2);
164     uint8_t *test_dst = test_src + max_size;
165 
166     for (int f = 0; f < gpu->num_formats; f++) {
167         pl_fmt fmt = gpu->formats[f];
168         if (fmt->opaque || !(fmt->caps & PL_FMT_CAP_HOST_READABLE))
169             continue;
170 
171         printf("testing texture roundtrip for format %s\n", fmt->name);
172         assert(fmt->texel_size <= 4 * sizeof(double));
173 
174         struct pl_tex_params ref_params = {
175             .format        = fmt,
176             .blit_src      = (fmt->caps & PL_FMT_CAP_BLITTABLE),
177             .blit_dst      = (fmt->caps & PL_FMT_CAP_BLITTABLE),
178             .host_writable = true,
179             .host_readable = true,
180         };
181 
182         pl_tex tex[2];
183 
184         if (gpu->limits.max_tex_1d_dim >= 16) {
185             printf("... 1D\n");
186             struct pl_tex_params params = ref_params;
187             params.w = 16;
188             if (!gpu->limits.blittable_1d_3d)
189                 params.blit_src = params.blit_dst = false;
190             for (int i = 0; i < PL_ARRAY_SIZE(tex); i++)
191                 tex[i] = pl_tex_create(gpu, &params);
192             pl_test_roundtrip(gpu, tex, test_src, test_dst);
193             for (int i = 0; i < PL_ARRAY_SIZE(tex); i++)
194                 pl_tex_destroy(gpu, &tex[i]);
195         }
196 
197         if (gpu->limits.max_tex_2d_dim >= 16) {
198             printf("... 2D\n");
199             struct pl_tex_params params = ref_params;
200             params.w = params.h = 16;
201             for (int i = 0; i < PL_ARRAY_SIZE(tex); i++)
202                 tex[i] = pl_tex_create(gpu, &params);
203             pl_test_roundtrip(gpu, tex, test_src, test_dst);
204             for (int i = 0; i < PL_ARRAY_SIZE(tex); i++)
205                 pl_tex_destroy(gpu, &tex[i]);
206         }
207 
208         if (gpu->limits.max_tex_3d_dim >= 16) {
209             printf("... 3D\n");
210             struct pl_tex_params params = ref_params;
211             params.w = params.h = params.d = 16;
212             if (!gpu->limits.blittable_1d_3d)
213                 params.blit_src = params.blit_dst = false;
214             for (int i = 0; i < PL_ARRAY_SIZE(tex); i++)
215                 tex[i] = pl_tex_create(gpu, &params);
216             pl_test_roundtrip(gpu, tex, test_src, test_dst);
217             for (int i = 0; i < PL_ARRAY_SIZE(tex); i++)
218                 pl_tex_destroy(gpu, &tex[i]);
219         }
220     }
221 
222     free(test_src);
223 }
224 
pl_shader_tests(pl_gpu gpu)225 static void pl_shader_tests(pl_gpu gpu)
226 {
227     if (gpu->glsl.version < 410)
228         return;
229 
230     const char *vert_shader =
231         "#version 410                               \n"
232         "layout(location=0) in vec2 vertex_pos;     \n"
233         "layout(location=1) in vec3 vertex_color;   \n"
234         "layout(location=0) out vec3 frag_color;    \n"
235         "void main() {                              \n"
236         "    gl_Position = vec4(vertex_pos, 0, 1);  \n"
237         "    frag_color = vertex_color;             \n"
238         "}";
239 
240     const char *frag_shader =
241         "#version 410                               \n"
242         "layout(location=0) in vec3 frag_color;     \n"
243         "layout(location=0) out vec4 out_color;     \n"
244         "void main() {                              \n"
245         "    out_color = vec4(frag_color, 1.0);     \n"
246         "}";
247 
248     pl_fmt fbo_fmt;
249     enum pl_fmt_caps caps = PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_BLITTABLE |
250                             PL_FMT_CAP_LINEAR;
251 
252     fbo_fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 4, 16, 32, caps);
253     if (!fbo_fmt)
254         return;
255 
256 #define FBO_W 16
257 #define FBO_H 16
258 
259     pl_tex fbo;
260     fbo = pl_tex_create(gpu, &(struct pl_tex_params) {
261         .format         = fbo_fmt,
262         .w              = FBO_W,
263         .h              = FBO_H,
264         .renderable     = true,
265         .storable       = !!(fbo_fmt->caps & PL_FMT_CAP_STORABLE),
266         .host_readable  = true,
267         .blit_dst       = true,
268     });
269     REQUIRE(fbo);
270 
271     pl_tex_clear_ex(gpu, fbo, (union pl_clear_color){0});
272 
273     pl_fmt vert_fmt;
274     vert_fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 3);
275     REQUIRE(vert_fmt);
276 
277     static const struct vertex { float pos[2]; float color[3]; } vertices[] = {
278         {{-1.0, -1.0}, {0, 0, 0}},
279         {{ 1.0, -1.0}, {1, 0, 0}},
280         {{-1.0,  1.0}, {0, 1, 0}},
281         {{ 1.0,  1.0}, {1, 1, 0}},
282     };
283 
284     pl_pass pass;
285     pass = pl_pass_create(gpu, &(struct pl_pass_params) {
286         .type           = PL_PASS_RASTER,
287         .target_dummy   = *fbo,
288         .vertex_shader  = vert_shader,
289         .glsl_shader    = frag_shader,
290 
291         .vertex_type    = PL_PRIM_TRIANGLE_STRIP,
292         .vertex_stride  = sizeof(struct vertex),
293         .num_vertex_attribs = 2,
294         .vertex_attribs = (struct pl_vertex_attrib[]) {{
295             .name     = "vertex_pos",
296             .fmt      = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2),
297             .location = 0,
298             .offset   = offsetof(struct vertex, pos),
299         }, {
300             .name     = "vertex_color",
301             .fmt      = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 3),
302             .location = 1,
303             .offset   = offsetof(struct vertex, color),
304         }},
305     });
306     REQUIRE(pass);
307     if (pass->params.cached_program || pass->params.cached_program_len) {
308         // Ensure both are set if either one is set
309         REQUIRE(pass->params.cached_program);
310         REQUIRE(pass->params.cached_program_len);
311     }
312 
313     pl_timer timer = pl_timer_create(gpu);
314     pl_pass_run(gpu, &(struct pl_pass_run_params) {
315         .pass           = pass,
316         .target         = fbo,
317         .vertex_count   = PL_ARRAY_SIZE(vertices),
318         .vertex_data    = vertices,
319         .timer          = timer,
320     });
321 
322     // Wait until this pass is complete and report the timer result
323     pl_gpu_finish(gpu);
324     printf("timer query result: %"PRIu64"\n", pl_timer_query(gpu, timer));
325     pl_timer_destroy(gpu, &timer);
326 
327     static float data[FBO_H * FBO_W * 4] = {0};
328 
329     // Test against the known pattern of `src`, only useful for roundtrip tests
330 #define TEST_FBO_PATTERN(eps, fmt, ...)                                     \
331     do {                                                                    \
332         printf("testing pattern of " fmt "\n", __VA_ARGS__);                \
333         REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params) {     \
334             .tex = fbo,                                                     \
335             .ptr = data,                                                    \
336         }));                                                                \
337                                                                             \
338         for (int y = 0; y < FBO_H; y++) {                                   \
339             for (int x = 0; x < FBO_W; x++) {                               \
340                 float *color = &data[(y * FBO_W + x) * 4];                  \
341                 REQUIRE(feq(color[0], (x + 0.5) / FBO_W, eps));             \
342                 REQUIRE(feq(color[1], (y + 0.5) / FBO_H, eps));             \
343                 REQUIRE(feq(color[2], 0.0, eps));                           \
344                 REQUIRE(feq(color[3], 1.0, eps));                           \
345             }                                                               \
346         }                                                                   \
347     } while (0)
348 
349     TEST_FBO_PATTERN(1e-6, "%s", "initial rendering");
350 
351     if (sizeof(vertices) <= gpu->limits.max_vbo_size) {
352         // Test the use of an explicit vertex buffer
353         pl_buf vert = pl_buf_create(gpu, &(struct pl_buf_params) {
354             .size = sizeof(vertices),
355             .initial_data = vertices,
356             .drawable = true,
357         });
358 
359         REQUIRE(vert);
360         pl_pass_run(gpu, &(struct pl_pass_run_params) {
361             .pass           = pass,
362             .target         = fbo,
363             .vertex_count   = sizeof(vertices) / sizeof(struct vertex),
364             .vertex_buf     = vert,
365             .buf_offset     = 0,
366         });
367 
368         pl_buf_destroy(gpu, &vert);
369         TEST_FBO_PATTERN(1e-6, "%s", "using vertex buffer");
370     }
371 
372     // Test the use of index buffers
373     static const uint16_t indices[] = { 3, 2, 1, 0 };
374     pl_pass_run(gpu, &(struct pl_pass_run_params) {
375         .pass           = pass,
376         .target         = fbo,
377         .vertex_count   = PL_ARRAY_SIZE(indices),
378         .vertex_data    = vertices,
379         .index_data     = indices,
380     });
381 
382     pl_pass_destroy(gpu, &pass);
383     TEST_FBO_PATTERN(1e-6, "%s", "using indexed rendering");
384 
385     // Test the use of pl_dispatch
386     pl_dispatch dp = pl_dispatch_create(gpu->log, gpu);
387     pl_shader sh = pl_dispatch_begin(dp);
388     REQUIRE(pl_shader_custom(sh, &(struct pl_custom_shader) {
389         .body       = "color = vec4(col, 1.0);",
390         .input      = PL_SHADER_SIG_NONE,
391         .output     = PL_SHADER_SIG_COLOR,
392     }));
393 
394     REQUIRE(pl_dispatch_vertex(dp, &(struct pl_dispatch_vertex_params) {
395         .shader         = &sh,
396         .target         = fbo,
397         .vertex_stride  = sizeof(struct vertex),
398         .vertex_position_idx = 0,
399         .num_vertex_attribs = 2,
400         .vertex_attribs = (struct pl_vertex_attrib[]) {{
401             .name   = "pos",
402             .fmt    = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2),
403             .offset = offsetof(struct vertex, pos),
404         }, {
405             .name   = "col",
406             .fmt    = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 3),
407             .offset = offsetof(struct vertex, color),
408         }},
409 
410         .vertex_type    = PL_PRIM_TRIANGLE_STRIP,
411         .vertex_coords  = PL_COORDS_NORMALIZED,
412         .vertex_count   = PL_ARRAY_SIZE(vertices),
413         .vertex_data    = vertices,
414     }));
415 
416     TEST_FBO_PATTERN(1e-6, "%s", "using custom vertices");
417 
418     pl_tex src;
419     src = pl_tex_create(gpu, &(struct pl_tex_params) {
420         .format         = fbo_fmt,
421         .w              = FBO_W,
422         .h              = FBO_H,
423         .storable       = fbo->params.storable,
424         .sampleable     = true,
425         .initial_data   = data,
426     });
427 
428     if (fbo->params.storable) {
429         // Test 1x1 blit, to make sure the scaling code runs
430         REQUIRE(pl_tex_blit_compute(gpu, dp, &(struct pl_tex_blit_params) {
431             .src = src,
432             .dst = fbo,
433             .src_rc = {0, 0, 0, 1, 1, 1},
434             .dst_rc = {0, 0, 0, FBO_W, FBO_H, 1},
435             .sample_mode = PL_TEX_SAMPLE_NEAREST,
436         }));
437 
438         // Test non-resizing blit, which uses the efficient imageLoad path
439         REQUIRE(pl_tex_blit_compute(gpu, dp, &(struct pl_tex_blit_params) {
440             .src = src,
441             .dst = fbo,
442             .src_rc = {0, 0, 0, FBO_W, FBO_H, 1},
443             .dst_rc = {0, 0, 0, FBO_W, FBO_H, 1},
444             .sample_mode = PL_TEX_SAMPLE_NEAREST,
445         }));
446 
447         TEST_FBO_PATTERN(1e-6, "%s", "pl_tex_blit_compute");
448     }
449 
450     // Test encoding/decoding of all gamma functions, color spaces, etc.
451     for (enum pl_color_transfer trc = 0; trc < PL_COLOR_TRC_COUNT; trc++) {
452         sh = pl_dispatch_begin(dp);
453         pl_shader_sample_nearest(sh, &(struct pl_sample_src) { .tex = src });
454         pl_shader_delinearize(sh, (struct pl_color_space) { .transfer = trc });
455         pl_shader_linearize(sh, (struct pl_color_space) { .transfer = trc });
456         REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
457             .shader = &sh,
458             .target = fbo,
459         }));
460 
461         float epsilon = pl_color_transfer_is_hdr(trc) ? 1e-4 : 1e-6;
462         TEST_FBO_PATTERN(epsilon, "transfer function %d", (int) trc);
463     }
464 
465     for (enum pl_color_system sys = 0; sys < PL_COLOR_SYSTEM_COUNT; sys++) {
466         sh = pl_dispatch_begin(dp);
467         pl_shader_sample_nearest(sh, &(struct pl_sample_src) { .tex = src });
468         pl_shader_encode_color(sh, &(struct pl_color_repr) { .sys = sys });
469         pl_shader_decode_color(sh, &(struct pl_color_repr) { .sys = sys }, NULL);
470         REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
471             .shader = &sh,
472             .target = fbo,
473         }));
474 
475         float epsilon;
476         switch (sys) {
477         case PL_COLOR_SYSTEM_BT_2020_C:
478             epsilon = 1e-5;
479             break;
480 
481         case PL_COLOR_SYSTEM_BT_2100_PQ:
482         case PL_COLOR_SYSTEM_BT_2100_HLG:
483             // These seem to be horrifically noisy and prone to breaking on
484             // edge cases for some reason
485             // TODO: figure out why!
486             continue;
487 
488         default: epsilon = 1e-6; break;
489         }
490 
491         TEST_FBO_PATTERN(epsilon, "color system %d", (int) sys);
492     }
493 
494     for (enum pl_color_light light = 0; light < PL_COLOR_LIGHT_COUNT; light++) {
495         struct pl_color_space dst_space = { .transfer = PL_COLOR_TRC_LINEAR };
496         struct pl_color_space src_space = {
497             .transfer = PL_COLOR_TRC_LINEAR,
498             .light = light
499         };
500         sh = pl_dispatch_begin(dp);
501         pl_shader_sample_nearest(sh, &(struct pl_sample_src) { .tex = src });
502         pl_shader_color_map(sh, NULL, src_space, dst_space, NULL, false);
503         pl_shader_color_map(sh, NULL, dst_space, src_space, NULL, false);
504         REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
505             .shader = &sh,
506             .target = fbo,
507         }));
508 
509         TEST_FBO_PATTERN(1e-6, "light %d", (int) light);
510     }
511 
512     // Repeat this a few times to test the caching
513     for (int i = 0; i < 10; i++) {
514         if (i == 5) {
515             printf("Recreating pl_dispatch to test the caching\n");
516             size_t size = pl_dispatch_save(dp, NULL);
517             REQUIRE(size > 0);
518             uint8_t *cache = malloc(size);
519             REQUIRE(cache);
520             REQUIRE(pl_dispatch_save(dp, cache) == size);
521 
522             pl_dispatch_destroy(&dp);
523             dp = pl_dispatch_create(gpu->log, gpu);
524             pl_dispatch_load(dp, cache);
525 
526 #ifndef MSAN
527             // Test to make sure the pass regenerates the same cache, but skip
528             // this on MSAN because it doesn't like it when we read from
529             // program cache data generated by the non-instrumented GPU driver
530             uint64_t hash = pl_str_hash((pl_str) { cache, size });
531             REQUIRE(pl_dispatch_save(dp, NULL) == size);
532             REQUIRE(pl_dispatch_save(dp, cache) == size);
533             REQUIRE(pl_str_hash((pl_str) { cache, size }) == hash);
534 #endif
535             free(cache);
536         }
537 
538         sh = pl_dispatch_begin(dp);
539 
540         // For testing, force the use of CS if possible
541         if (gpu->glsl.compute) {
542             sh->is_compute = true;
543             sh->res.compute_group_size[0] = 8;
544             sh->res.compute_group_size[1] = 8;
545         }
546 
547         pl_shader_deband(sh,
548             &(struct pl_sample_src) {
549                 .tex            = src,
550             },
551             &(struct pl_deband_params) {
552                 .iterations     = 0,
553                 .grain          = 0.0,
554         });
555 
556         REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
557             .shader = &sh,
558             .target = fbo,
559         }));
560         TEST_FBO_PATTERN(1e-6, "deband iter %d", i);
561     }
562 
563     // Test peak detection and readback if possible
564     sh = pl_dispatch_begin(dp);
565     pl_shader_sample_nearest(sh, &(struct pl_sample_src) { .tex = src });
566 
567     pl_shader_obj peak_state = NULL;
568     struct pl_color_space csp_gamma22 = { .transfer = PL_COLOR_TRC_GAMMA22 };
569     struct pl_peak_detect_params peak_params = { .minimum_peak = 0.01 };
570     if (pl_shader_detect_peak(sh, csp_gamma22, &peak_state, &peak_params)) {
571         REQUIRE(pl_dispatch_compute(dp, &(struct pl_dispatch_compute_params) {
572             .shader = &sh,
573             .width = fbo->params.w,
574             .height = fbo->params.h,
575         }));
576 
577         float peak, avg;
578         REQUIRE(pl_get_detected_peak(peak_state, &peak, &avg));
579         printf("detected peak: %f, average: %f\n", peak, avg);
580 
581         float real_peak = 0, real_avg = 0;
582         for (int y = 0; y < FBO_H; y++) {
583             for (int x = 0; x < FBO_W; x++) {
584                 float *color = &data[(y * FBO_W + x) * 4];
585                 float smax = powf(PL_MAX(color[0], color[1]), 2.2);
586                 float slog = logf(PL_MAX(smax, 0.001));
587                 real_peak = PL_MAX(smax, real_peak);
588                 real_avg += slog;
589             }
590         }
591 
592         real_avg = expf(real_avg / (FBO_W * FBO_H));
593         printf("real peak: %f, real average: %f\n", real_peak, real_avg);
594         REQUIRE(feq(peak, real_peak, 1e-4));
595         REQUIRE(feq(avg, real_avg, 1e-3));
596     }
597 
598     pl_dispatch_abort(dp, &sh);
599     pl_shader_obj_destroy(&peak_state);
600 
601 #ifdef PL_HAVE_LCMS
602     // Test the use of ICC profiles if available
603     sh = pl_dispatch_begin(dp);
604     pl_shader_sample_nearest(sh, &(struct pl_sample_src) { .tex = src });
605 
606     pl_shader_obj icc = NULL;
607     struct pl_icc_color_space src_color = { .color = pl_color_space_bt709 };
608     struct pl_icc_color_space dst_color = { .color = pl_color_space_srgb };
609     struct pl_icc_result out;
610 
611     if (pl_icc_update(sh, &src_color, &dst_color, &icc, &out, NULL)) {
612         pl_icc_apply(sh, &icc);
613         REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
614             .shader = &sh,
615             .target = fbo,
616         }));
617     }
618 
619     pl_dispatch_abort(dp, &sh);
620     pl_shader_obj_destroy(&icc);
621 #endif
622 
623     // Test AV1 grain synthesis
624     pl_shader_obj grain = NULL;
625     for (int i = 0; i < 2; i++) {
626         struct pl_av1_grain_params grain_params = {
627             .data = av1_grain_data,
628             .tex = src,
629             .components = 3,
630             .component_mapping = { 0, 1, 2 },
631             .repr = &(struct pl_color_repr) {
632                 .sys = PL_COLOR_SYSTEM_BT_709,
633                 .levels = PL_COLOR_LEVELS_LIMITED,
634                 .bits = { .color_depth = 10, .sample_depth = 10 },
635             },
636         };
637         grain_params.data.grain_seed = rand();
638         grain_params.data.overlap = !!i;
639 
640         sh = pl_dispatch_begin(dp);
641         pl_shader_av1_grain(sh, &grain, &grain_params);
642         REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
643             .shader = &sh,
644             .target = fbo,
645         }));
646     }
647     pl_shader_obj_destroy(&grain);
648 
649     // Test custom shaders
650     struct pl_custom_shader custom = {
651         .header =
652             "vec3 invert(vec3 color)            \n"
653             "{                                  \n"
654             "    return vec3(1.0) - color;      \n"
655             "}                                  \n",
656 
657         .body =
658             "color = vec4(gl_FragCoord.xy, 0.0, 1.0);   \n"
659             "color.rgb = invert(color.rgb) + offset;    \n",
660 
661         .input = PL_SHADER_SIG_NONE,
662         .output = PL_SHADER_SIG_COLOR,
663 
664         .num_variables = 1,
665         .variables = &(struct pl_shader_var) {
666             .var = pl_var_float("offset"),
667             .data = &(float) { 0.1 },
668         },
669     };
670 
671     sh = pl_dispatch_begin(dp);
672     REQUIRE(pl_shader_custom(sh, &custom));
673     REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
674         .shader = &sh,
675         .target = fbo,
676     }));
677 
678     pl_dispatch_destroy(&dp);
679     pl_tex_destroy(gpu, &src);
680     pl_tex_destroy(gpu, &fbo);
681 }
682 
pl_scaler_tests(pl_gpu gpu)683 static void pl_scaler_tests(pl_gpu gpu)
684 {
685     pl_fmt src_fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 1, 16, 32, PL_FMT_CAP_LINEAR);
686     pl_fmt fbo_fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 1, 16, 32, PL_FMT_CAP_RENDERABLE);
687     if (!src_fmt || !fbo_fmt)
688         return;
689 
690     float *fbo_data = NULL;
691     pl_shader_obj lut = NULL;
692 
693     static float data_5x5[5][5] = {
694         { 0, 0, 0, 0, 0 },
695         { 0, 0, 0, 0, 0 },
696         { 0, 0, 1, 0, 0 },
697         { 0, 0, 0, 0, 0 },
698         { 0, 0, 0, 0, 0 },
699     };
700 
701     pl_tex dot5x5 = pl_tex_create(gpu, &(struct pl_tex_params) {
702         .w              = 5,
703         .h              = 5,
704         .format         = src_fmt,
705         .sampleable     = true,
706         .initial_data   = &data_5x5[0][0],
707     });
708 
709     struct pl_tex_params fbo_params = {
710         .w              = 100,
711         .h              = 100,
712         .format         = fbo_fmt,
713         .renderable     = true,
714         .storable       = !!(fbo_fmt->caps & PL_FMT_CAP_STORABLE),
715         .host_readable  = true,
716     };
717 
718     pl_tex fbo = pl_tex_create(gpu, &fbo_params);
719     if (!fbo) {
720         printf("Failed creating readable FBO... falling back to non-readable\n");
721         fbo_params.host_readable = false;
722         fbo = pl_tex_create(gpu, &fbo_params);
723     }
724 
725     pl_dispatch dp = pl_dispatch_create(gpu->log, gpu);
726     if (!dot5x5 || !fbo || !dp)
727         goto error;
728 
729     pl_shader sh = pl_dispatch_begin(dp);
730     REQUIRE(pl_shader_sample_polar(sh,
731         &(struct pl_sample_src) {
732             .tex        = dot5x5,
733             .new_w      = fbo->params.w,
734             .new_h      = fbo->params.h,
735         },
736         &(struct pl_sample_filter_params) {
737             .filter     = pl_filter_ewa_lanczos,
738             .lut        = &lut,
739             .no_compute = !fbo->params.storable,
740         }
741     ));
742     REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
743         .shader = &sh,
744         .target = fbo,
745     }));
746 
747     if (fbo->params.host_readable) {
748         fbo_data = malloc(fbo->params.w * fbo->params.h * sizeof(float));
749         REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params) {
750             .tex            = fbo,
751             .ptr            = fbo_data,
752         }));
753 
754         int max = 255;
755         printf("P2\n%d %d\n%d\n", fbo->params.w, fbo->params.h, max);
756         for (int y = 0; y < fbo->params.h; y++) {
757             for (int x = 0; x < fbo->params.w; x++) {
758                 float v = fbo_data[y * fbo->params.h + x];
759                 printf("%d ", (int) round(fmin(fmax(v, 0.0), 1.0) * max));
760             }
761             printf("\n");
762         }
763     }
764 
765 error:
766     free(fbo_data);
767     pl_shader_obj_destroy(&lut);
768     pl_dispatch_destroy(&dp);
769     pl_tex_destroy(gpu, &dot5x5);
770     pl_tex_destroy(gpu, &fbo);
771 }
772 
773 static const char *user_shader_tests[] = {
774 
775     // Test hooking, saving and loading
776     "// Example of a comment at the beginning                               \n"
777     "                                                                       \n"
778     "//!HOOK NATIVE                                                         \n"
779     "//!DESC upscale image                                                  \n"
780     "//!BIND HOOKED                                                         \n"
781     "//!WIDTH HOOKED.w 10 *                                                 \n"
782     "//!HEIGHT HOOKED.h 10 *                                                \n"
783     "//!SAVE NATIVEBIG                                                      \n"
784     "//!WHEN NATIVE.w 500 <                                                 \n"
785     "                                                                       \n"
786     "vec4 hook()                                                            \n"
787     "{                                                                      \n"
788     "    return HOOKED_texOff(0);                                           \n"
789     "}                                                                      \n"
790     "                                                                       \n"
791     "//!HOOK MAIN                                                           \n"
792     "//!DESC downscale bigger image                                         \n"
793     "//!WHEN NATIVE.w 500 <                                                 \n"
794     "//!BIND NATIVEBIG                                                      \n"
795     "                                                                       \n"
796     "vec4 hook()                                                            \n"
797     "{                                                                      \n"
798     "    return NATIVEBIG_texOff(0);                                        \n"
799     "}                                                                      \n",
800 
801     // Test use of textures
802     "//!HOOK MAIN                                                           \n"
803     "//!DESC turn everything into colorful pixels                           \n"
804     "//!BIND HOOKED                                                         \n"
805     "//!BIND DISCO                                                          \n"
806     "//!COMPONENTS 3                                                        \n"
807     "                                                                       \n"
808     "vec4 hook()                                                            \n"
809     "{                                                                      \n"
810     "    return vec4(DISCO_tex(HOOKED_pos * 10.0).rgb, 1);                  \n"
811     "}                                                                      \n"
812     "                                                                       \n"
813     "//!TEXTURE DISCO                                                       \n"
814     "//!SIZE 3 3                                                            \n"
815     "//!FORMAT rgba32f                                                      \n"
816     "//!FILTER NEAREST                                                      \n"
817     "//!BORDER REPEAT                                                       \n"
818     "0000803f000000000000000000000000000000000000803f0000000000000000000000000"
819     "00000000000803f00000000000000000000803f0000803f000000000000803f0000000000"
820     "00803f000000000000803f0000803f00000000000000009a99993e9a99993e9a99993e000"
821     "000009a99193F9A99193f9a99193f000000000000803f0000803f0000803f00000000  \n",
822 
823     // Test use of storage/buffer resources
824     "//!HOOK MAIN                                                           \n"
825     "//!DESC attach some storage objects                                    \n"
826     "//!BIND tex_storage                                                    \n"
827     "//!BIND buf_uniform                                                    \n"
828     "//!BIND buf_storage                                                    \n"
829     "//!COMPONENTS 4                                                        \n"
830     "                                                                       \n"
831     "vec4 hook()                                                            \n"
832     "{                                                                      \n"
833     "    return vec4(foo, bar, bat);                                        \n"
834     "}                                                                      \n"
835     "                                                                       \n"
836     "//!TEXTURE tex_storage                                                 \n"
837     "//!SIZE 100 100                                                        \n"
838     "//!FORMAT r32f                                                         \n"
839     "//!STORAGE                                                             \n"
840     "                                                                       \n"
841     "//!BUFFER buf_uniform                                                  \n"
842     "//!VAR float foo                                                       \n"
843     "//!VAR float bar                                                       \n"
844     "0000000000000000                                                       \n"
845     "                                                                       \n"
846     "//!BUFFER buf_storage                                                  \n"
847     "//!VAR vec2 bat                                                        \n"
848     "//!VAR int big[32];                                                    \n"
849     "//!STORAGE                                                             \n"
850 
851 };
852 
853 static const char *test_luts[] = {
854 
855     "TITLE \"1D identity\"  \n"
856     "LUT_1D_SIZE 2          \n"
857     "0.0 0.0 0.0            \n"
858     "1.0 1.0 1.0            \n",
859 
860     "TITLE \"3D identity\"  \n"
861     "LUT_3D_SIZE 2          \n"
862     "0.0 0.0 0.0            \n"
863     "1.0 0.0 0.0            \n"
864     "0.0 1.0 0.0            \n"
865     "1.0 1.0 0.0            \n"
866     "0.0 0.0 1.0            \n"
867     "1.0 0.0 1.0            \n"
868     "0.0 1.0 1.0            \n"
869     "1.0 1.0 1.0            \n"
870 
871 };
872 
frame_passthrough(pl_gpu gpu,pl_tex * tex,const struct pl_source_frame * src,struct pl_frame * out_frame)873 static bool frame_passthrough(pl_gpu gpu, pl_tex *tex,
874                               const struct pl_source_frame *src, struct pl_frame *out_frame)
875 {
876     const struct pl_frame *frame = src->frame_data;
877     *out_frame = *frame;
878     return true;
879 }
880 
get_frame_ptr(struct pl_source_frame * out_frame,const struct pl_queue_params * qparams)881 static enum pl_queue_status get_frame_ptr(struct pl_source_frame *out_frame,
882                                           const struct pl_queue_params *qparams)
883 {
884     const struct pl_source_frame **pframe = qparams->priv;
885     if (!(*pframe)->frame_data)
886         return PL_QUEUE_EOF;
887 
888     *out_frame = *(*pframe)++;
889     return PL_QUEUE_OK;
890 }
891 
render_info_cb(void * priv,const struct pl_render_info * info)892 static void render_info_cb(void *priv, const struct pl_render_info *info)
893 {
894     printf("{%d} Executed shader: %s\n", info->index,
895            info->pass->shader->description);
896 }
897 
pl_render_tests(pl_gpu gpu)898 static void pl_render_tests(pl_gpu gpu)
899 {
900     pl_tex img5x5_tex = NULL, fbo = NULL;
901     pl_renderer rr = NULL;
902 
903     float *fbo_data = NULL;
904     static float data_5x5[5][5] = {
905         { 0.0, 0.0, 0.0, 0.0, 0.0 },
906         { 0.0, 0.0, 0.0, 0.0, 0.0 },
907         { 1.0, 0.0, 0.5, 0.0, 0.0 },
908         { 0.0, 0.0, 0.0, 1.0, 0.0 },
909         { 0.0, 0.3, 0.0, 0.0, 0.0 },
910     };
911 
912     const int width = 5, height = 5;
913     struct pl_plane img5x5 = {0};
914     struct pl_plane_data img5x5_data = {
915         .type = PL_FMT_FLOAT,
916         .width = width,
917         .height = height,
918         .component_size = { 8 * sizeof(float) },
919         .component_map  = { 0 },
920         .pixel_stride = sizeof(float),
921         .pixels = &data_5x5,
922     };
923 
924     if (!pl_recreate_plane(gpu, NULL, &fbo, &img5x5_data))
925         return;
926 
927     if (!pl_upload_plane(gpu, &img5x5, &img5x5_tex, &img5x5_data))
928         goto error;
929 
930     rr = pl_renderer_create(gpu->log, gpu);
931     pl_tex_clear_ex(gpu, fbo, (union pl_clear_color){0});
932 
933     struct pl_frame image = {
934         .num_planes     = 1,
935         .planes         = { img5x5 },
936         .repr = {
937             .sys        = PL_COLOR_SYSTEM_BT_709,
938             .levels     = PL_COLOR_LEVELS_FULL,
939         },
940         .color          = pl_color_space_srgb,
941         .crop           = {-1.0, 0.0, width - 1.0, height},
942     };
943 
944     struct pl_frame target = {
945         .num_planes     = 1,
946         .planes         = {{
947             .texture            = fbo,
948             .components         = 3,
949             .component_mapping  = {0, 1, 2},
950         }},
951         .crop           = {2, 2, fbo->params.w - 2, fbo->params.h - 2},
952         .repr = {
953             .sys        = PL_COLOR_SYSTEM_RGB,
954             .levels     = PL_COLOR_LEVELS_FULL,
955         },
956         .color          = pl_color_space_srgb,
957     };
958 
959     REQUIRE(pl_render_image(rr, &image, &target, NULL));
960 
961     fbo_data = malloc(fbo->params.w * fbo->params.h * sizeof(float[4]));
962     REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params) {
963         .tex            = fbo,
964         .ptr            = fbo_data,
965     }));
966 
967     // TODO: embed a reference texture and ensure it matches
968 
969     // Test a bunch of different params
970 #define TEST(SNAME, STYPE, DEFAULT, FIELD, LIMIT)                       \
971     do {                                                                \
972         for (int i = 0; i <= LIMIT; i++) {                              \
973             printf("testing `" #STYPE "." #FIELD " = %d`\n", i);        \
974             struct pl_render_params params = pl_render_default_params;  \
975             params.force_dither = true;                                 \
976             struct STYPE tmp = DEFAULT;                                 \
977             tmp.FIELD = i;                                              \
978             params.SNAME = &tmp;                                        \
979             for (int p = 0; p < 5; p++) {                               \
980                 REQUIRE(pl_render_image(rr, &image, &target, &params)); \
981                 pl_gpu_flush(gpu);                                      \
982             }                                                           \
983         }                                                               \
984     } while (0)
985 
986 #define TEST_PARAMS(NAME, FIELD, LIMIT) \
987     TEST(NAME##_params, pl_##NAME##_params, pl_##NAME##_default_params, FIELD, LIMIT)
988 
989     for (int i = 0; i < pl_num_scale_filters; i++) {
990         struct pl_render_params params = pl_render_default_params;
991         params.upscaler = pl_scale_filters[i].filter;
992         printf("testing `params.upscaler = /* %s */`\n", pl_scale_filters[i].name);
993         REQUIRE(pl_render_image(rr, &image, &target, &params));
994         pl_gpu_flush(gpu);
995     }
996 
997     TEST_PARAMS(deband, iterations, 3);
998     TEST_PARAMS(sigmoid, center, 1);
999     TEST_PARAMS(color_map, intent, PL_INTENT_ABSOLUTE_COLORIMETRIC);
1000     TEST_PARAMS(color_map, gamut_warning, 1);
1001     TEST_PARAMS(dither, method, PL_DITHER_WHITE_NOISE);
1002     TEST_PARAMS(dither, temporal, true);
1003     TEST(cone_params, pl_cone_params, pl_vision_deuteranomaly, strength, 0);
1004 
1005     // Test HDR stuff
1006     image.color.sig_scale = 10.0;
1007     target.color.sig_scale = 2.0;
1008     TEST_PARAMS(color_map, tone_mapping_algo, PL_TONE_MAPPING_BT_2390);
1009     TEST_PARAMS(color_map, desaturation_strength, 1);
1010     image.color.sig_scale = target.color.sig_scale = 0.0;
1011 
1012     // Test some misc stuff
1013     struct pl_render_params params = pl_render_default_params;
1014     params.color_adjustment = &(struct pl_color_adjustment) {
1015         .brightness = 0.1,
1016         .contrast = 0.9,
1017         .saturation = 1.5,
1018         .gamma = 0.8,
1019         .temperature = 0.3,
1020     };
1021     REQUIRE(pl_render_image(rr, &image, &target, &params));
1022     params = pl_render_default_params;
1023 
1024     params.force_icc_lut = true;
1025     REQUIRE(pl_render_image(rr, &image, &target, &params));
1026     params = pl_render_default_params;
1027 
1028     image.av1_grain = av1_grain_data;
1029     REQUIRE(pl_render_image(rr, &image, &target, &params));
1030     image.av1_grain = (struct pl_av1_grain_data) {0};
1031 
1032     // Test mpv-style custom shaders
1033     for (int i = 0; i < PL_ARRAY_SIZE(user_shader_tests); i++) {
1034         printf("testing user shader:\n\n%s\n", user_shader_tests[i]);
1035         const struct pl_hook *hook;
1036         hook = pl_mpv_user_shader_parse(gpu, user_shader_tests[i],
1037                                         strlen(user_shader_tests[i]));
1038 
1039         if (gpu->glsl.compute) {
1040             REQUIRE(hook);
1041         } else {
1042             // Not all shaders compile without compute shader support
1043             if (!hook)
1044                 continue;
1045         }
1046 
1047         params.hooks = &hook;
1048         params.num_hooks = 1;
1049         REQUIRE(pl_render_image(rr, &image, &target, &params));
1050 
1051         pl_mpv_user_shader_destroy(&hook);
1052     }
1053     params = pl_render_default_params;
1054 
1055     // Test custom LUTs
1056     for (int i = 0; i < PL_ARRAY_SIZE(test_luts); i++) {
1057         printf("testing custom lut %d\n", i);
1058         struct pl_custom_lut *lut;
1059         lut = pl_lut_parse_cube(gpu->log, test_luts[i], strlen(test_luts[i]));
1060         REQUIRE(lut);
1061 
1062         // Test all three at the same time to reduce the number of tests
1063         image.lut = target.lut = params.lut = lut;
1064 
1065         for (enum pl_lut_type t = PL_LUT_UNKNOWN; t <= PL_LUT_CONVERSION; t++) {
1066             printf("testing LUT method %d\n", t);
1067             image.lut_type = target.lut_type = params.lut_type = t;
1068             REQUIRE(pl_render_image(rr, &image, &target, &params));
1069         }
1070 
1071         image.lut = target.lut = params.lut = NULL;
1072         pl_lut_free(&lut);
1073     }
1074 
1075     // Test overlays
1076     image.num_overlays = 1;
1077     image.overlays = &(struct pl_overlay) {
1078         .tex = img5x5.texture,
1079         .mode = PL_OVERLAY_NORMAL,
1080         .num_parts = 2,
1081         .parts = (struct pl_overlay_part[]) {{
1082             .src = {0, 0, 2, 2},
1083             .dst = {30, 100, 40, 200},
1084         }, {
1085             .src = {2, 2, 5, 5},
1086             .dst = {1000, -1, 3, 5},
1087         }},
1088     };
1089     REQUIRE(pl_render_image(rr, &image, &target, &params));
1090     params.disable_fbos = true;
1091     REQUIRE(pl_render_image(rr, &image, &target, &params));
1092     image.num_overlays = 0;
1093     params = pl_render_default_params;
1094 
1095     target.num_overlays = 1;
1096     target.overlays = &(struct pl_overlay) {
1097         .tex = img5x5.texture,
1098         .mode = PL_OVERLAY_MONOCHROME,
1099         .num_parts = 1,
1100         .parts = &(struct pl_overlay_part) {
1101             .src = {5, 5, 15, 15},
1102             .dst = {5, 5, 15, 15},
1103             .color = {1.0, 0.5, 0.0},
1104         },
1105     };
1106     REQUIRE(pl_render_image(rr, &image, &target, &params));
1107     REQUIRE(pl_render_image(rr, NULL, &target, &params));
1108     target.num_overlays = 0;
1109 
1110     // Attempt frame mixing, using the mixer queue helper
1111     printf("testing frame mixing \n");
1112     struct pl_render_params mix_params = {
1113         .frame_mixer = &pl_filter_mitchell_clamp,
1114         .info_callback = render_info_cb,
1115     };
1116 
1117     struct pl_queue_params qparams = {
1118         .radius = pl_frame_mix_radius(&mix_params),
1119         .vsync_duration = 1.0 / 60.0,
1120         .frame_duration = 1.0 / 24.0,
1121     };
1122 
1123 #define NUM_MIX_FRAMES 20
1124     struct pl_source_frame srcframes[NUM_MIX_FRAMES+1];
1125     srcframes[NUM_MIX_FRAMES] = (struct pl_source_frame) {0};
1126     for (int i = 0; i < NUM_MIX_FRAMES; i++) {
1127         srcframes[i] = (struct pl_source_frame) {
1128             .pts = i * qparams.frame_duration,
1129             .map = frame_passthrough,
1130             .frame_data = &image,
1131         };
1132     }
1133 
1134     pl_queue queue = pl_queue_create(gpu);
1135     enum pl_queue_status ret;
1136 
1137     // Test pre-pushing all frames, with delayed EOF.
1138     for (int i = 0; i < NUM_MIX_FRAMES; i++) {
1139         if (!pl_queue_push_block(queue, 1, &srcframes[i])) // mini-sleep
1140             pl_queue_push(queue, &srcframes[i]); // push it anyway, for testing
1141     }
1142 
1143     struct pl_frame_mix mix;
1144     while ((ret = pl_queue_update(queue, &mix, &qparams)) != PL_QUEUE_EOF) {
1145         if (ret == PL_QUEUE_MORE) {
1146             REQUIRE(qparams.pts > 0.0);
1147             pl_queue_push(queue, NULL); // push delayed EOF
1148             continue;
1149         }
1150 
1151         REQUIRE(ret == PL_QUEUE_OK);
1152         REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params));
1153 
1154         // Simulate advancing vsync
1155         qparams.pts += qparams.vsync_duration;
1156     }
1157 
1158     // Test dynamically pulling all frames, with oversample mixer
1159     const struct pl_source_frame *frame_ptr = &srcframes[0];
1160     mix_params.frame_mixer = &pl_oversample_frame_mixer;
1161 
1162     qparams = (struct pl_queue_params) {
1163         .radius = pl_frame_mix_radius(&mix_params),
1164         .vsync_duration = qparams.vsync_duration,
1165         .frame_duration = qparams.frame_duration,
1166         .get_frame = get_frame_ptr,
1167         .priv = &frame_ptr,
1168     };
1169 
1170     pl_queue_reset(queue);
1171     while ((ret = pl_queue_update(queue, &mix, &qparams)) != PL_QUEUE_EOF) {
1172         REQUIRE(ret == PL_QUEUE_OK);
1173         REQUIRE(mix.num_frames <= 2);
1174         REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params));
1175         qparams.pts += qparams.vsync_duration;
1176     }
1177 
1178     // Test large PTS jump
1179     pl_queue_reset(queue);
1180     REQUIRE(pl_queue_update(queue, &mix, &qparams) == PL_QUEUE_EOF);
1181 
1182     pl_queue_destroy(&queue);
1183 
1184 error:
1185     free(fbo_data);
1186     pl_renderer_destroy(&rr);
1187     pl_tex_destroy(gpu, &img5x5_tex);
1188     pl_tex_destroy(gpu, &fbo);
1189 }
1190 
noop_hook(void * priv,const struct pl_hook_params * params)1191 static struct pl_hook_res noop_hook(void *priv, const struct pl_hook_params *params)
1192 {
1193     return (struct pl_hook_res) {0};
1194 }
1195 
pl_ycbcr_tests(pl_gpu gpu)1196 static void pl_ycbcr_tests(pl_gpu gpu)
1197 {
1198     pl_renderer rr = pl_renderer_create(gpu->log, gpu);
1199     if (!rr)
1200         return;
1201 
1202     struct pl_plane_data data[3];
1203     for (int i = 0; i < 3; i++) {
1204         const int sub = i > 0 ? 1 : 0;
1205         const int width = (323 + sub) >> sub;
1206         const int height = (255 + sub) >> sub;
1207 
1208         data[i] = (struct pl_plane_data) {
1209             .type = PL_FMT_UNORM,
1210             .width = width,
1211             .height = height,
1212             .component_size = {16},
1213             .component_map = {i},
1214             .pixel_stride = sizeof(uint16_t),
1215             .row_stride = PL_ALIGN2(width * sizeof(uint16_t),
1216                                     gpu->limits.align_tex_xfer_stride),
1217         };
1218     }
1219 
1220     pl_fmt fmt = pl_plane_find_fmt(gpu, NULL, &data[0]);
1221     if (!fmt || !(fmt->caps & (PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_HOST_READABLE)))
1222         return;
1223 
1224     pl_tex src_tex[3] = {0};
1225     pl_tex dst_tex[3] = {0};
1226     struct pl_frame img = {
1227         .num_planes = 3,
1228         .repr = pl_color_repr_hdtv,
1229         .color = pl_color_space_bt709,
1230     };
1231 
1232     struct pl_frame target = {
1233         .num_planes = 3,
1234         .repr = pl_color_repr_hdtv,
1235         .color = pl_color_space_bt709,
1236     };
1237 
1238     uint8_t *src_buffer[3] = {0};
1239     uint8_t *dst_buffer = NULL;
1240     for (int i = 0; i < 3; i++) {
1241         // Generate some arbitrary data for the buffer
1242         src_buffer[i] = malloc(data[i].height * data[i].row_stride);
1243         if (!src_buffer[i])
1244             goto error;
1245 
1246         data[i].pixels = src_buffer[i];
1247         for (int y = 0; y < data[i].height; y++) {
1248             for (int x = 0; x < data[i].width; x++) {
1249                 size_t off = y * data[i].row_stride + x * data[i].pixel_stride;
1250                 uint16_t *pixel = (uint16_t *) &src_buffer[i][off];
1251                 int gx = 200 + 100 * i, gy = 300 + 150 * i;
1252                 *pixel = (gx * x) ^ (gy * y); // whatever
1253             }
1254         }
1255 
1256         REQUIRE(pl_upload_plane(gpu, &img.planes[i], &src_tex[i], &data[i]));
1257     }
1258 
1259     // This co-sites chroma pixels with pixels in the RGB image, meaning we
1260     // get an exact round-trip when sampling both ways. This makes it useful
1261     // as a test case, even though it's not common in the real world.
1262     pl_frame_set_chroma_location(&img, PL_CHROMA_TOP_LEFT);
1263 
1264     for (int i = 0; i < 3; i++) {
1265         dst_tex[i] = pl_tex_create(gpu, &(struct pl_tex_params) {
1266             .format = fmt,
1267             .w = data[i].width,
1268             .h = data[i].height,
1269             .renderable = true,
1270             .host_readable = true,
1271             .storable = fmt->caps & PL_FMT_CAP_STORABLE,
1272             .blit_dst = fmt->caps & PL_FMT_CAP_BLITTABLE,
1273         });
1274 
1275         if (!dst_tex[i])
1276             goto error;
1277 
1278         target.planes[i] = img.planes[i];
1279         target.planes[i].texture = dst_tex[i];
1280     }
1281 
1282     REQUIRE(pl_render_image(rr, &img, &target, &(struct pl_render_params) {
1283         .num_hooks = 1,
1284         .hooks = &(const struct pl_hook *){&(struct pl_hook) {
1285             // Forces chroma merging, to test the chroma merging code
1286             .stages = PL_HOOK_CHROMA_INPUT,
1287             .hook = noop_hook,
1288         }},
1289     }));
1290 
1291     size_t buf_size = data[0].height * data[0].row_stride;
1292     dst_buffer = malloc(buf_size);
1293     if (!dst_buffer)
1294         goto error;
1295 
1296     for (int i = 0; i < 3; i++) {
1297         memset(dst_buffer, 0xAA, buf_size);
1298         REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params) {
1299             .tex = dst_tex[i],
1300             .ptr = dst_buffer,
1301             .stride_w = data[i].row_stride / data[i].pixel_stride,
1302         }));
1303 
1304         for (int y = 0; y < data[i].height; y++) {
1305             for (int x = 0; x < data[i].width; x++) {
1306                 size_t off = y * data[i].row_stride + x * data[i].pixel_stride;
1307                 uint16_t *src_pixel = (uint16_t *) &src_buffer[i][off];
1308                 uint16_t *dst_pixel = (uint16_t *) &dst_buffer[off];
1309                 int diff = abs((int) *src_pixel - (int) *dst_pixel);
1310                 REQUIRE(diff <= 50); // a little under 0.1%
1311             }
1312         }
1313     }
1314 
1315 error:
1316     pl_renderer_destroy(&rr);
1317     free(dst_buffer);
1318     for (int i = 0; i < 3; i++) {
1319         free(src_buffer[i]);
1320         pl_tex_destroy(gpu, &src_tex[i]);
1321         pl_tex_destroy(gpu, &dst_tex[i]);
1322     }
1323 }
1324 
pl_test_export_import(pl_gpu gpu,enum pl_handle_type handle_type)1325 static void pl_test_export_import(pl_gpu gpu,
1326                                   enum pl_handle_type handle_type)
1327 {
1328     // Test texture roundtrip
1329 
1330     if (!(gpu->export_caps.tex & handle_type) ||
1331         !(gpu->import_caps.tex & handle_type))
1332         goto skip_tex;
1333 
1334     pl_fmt fmt = pl_find_fmt(gpu, PL_FMT_UNORM, 1, 0, 0, PL_FMT_CAP_BLITTABLE);
1335     if (!fmt)
1336         goto skip_tex;
1337 
1338     printf("testing texture import/export\n");
1339 
1340     pl_tex export = pl_tex_create(gpu, &(struct pl_tex_params) {
1341         .w = 32,
1342         .h = 32,
1343         .format = fmt,
1344         .export_handle = handle_type,
1345     });
1346     REQUIRE(export);
1347     REQUIRE_HANDLE(export->shared_mem, handle_type);
1348 
1349     pl_tex import = pl_tex_create(gpu, &(struct pl_tex_params) {
1350         .w = 32,
1351         .h = 32,
1352         .format = fmt,
1353         .import_handle = handle_type,
1354         .shared_mem = export->shared_mem,
1355     });
1356     REQUIRE(import);
1357 
1358     pl_tex_destroy(gpu, &import);
1359     pl_tex_destroy(gpu, &export);
1360 
1361 skip_tex: ;
1362 
1363     // Test buffer roundtrip
1364 
1365     if (!(gpu->export_caps.buf & handle_type) ||
1366         !(gpu->import_caps.buf & handle_type))
1367         return;
1368 
1369     printf("testing buffer import/export\n");
1370 
1371     pl_buf exp_buf = pl_buf_create(gpu, &(struct pl_buf_params) {
1372         .size = 32,
1373         .export_handle = handle_type,
1374     });
1375     REQUIRE(exp_buf);
1376     REQUIRE_HANDLE(exp_buf->shared_mem, handle_type);
1377 
1378     pl_buf imp_buf = pl_buf_create(gpu, &(struct pl_buf_params) {
1379         .size = 32,
1380         .import_handle = handle_type,
1381         .shared_mem = exp_buf->shared_mem,
1382     });
1383     REQUIRE(imp_buf);
1384 
1385     pl_buf_destroy(gpu, &imp_buf);
1386     pl_buf_destroy(gpu, &exp_buf);
1387 }
1388 
pl_test_host_ptr(pl_gpu gpu)1389 static void pl_test_host_ptr(pl_gpu gpu)
1390 {
1391     if (!(gpu->import_caps.buf & PL_HANDLE_HOST_PTR))
1392         return;
1393 
1394 #ifdef __unix__
1395 
1396     printf("testing host ptr\n");
1397     REQUIRE(gpu->limits.max_mapped_size);
1398 
1399     const size_t size = 2 << 20;
1400     const size_t offset = 2 << 10;
1401     const size_t slice = 2 << 16;
1402 
1403     uint8_t *data = aligned_alloc(0x1000, size);
1404     for (int i = 0; i < size; i++)
1405         data[i] = (uint8_t) i;
1406 
1407     pl_buf buf = pl_buf_create(gpu, &(struct pl_buf_params) {
1408         .type = PL_BUF_TEX_TRANSFER,
1409         .size = slice,
1410         .import_handle = PL_HANDLE_HOST_PTR,
1411         .shared_mem = {
1412             .handle.ptr = data,
1413             .size = size,
1414             .offset = offset,
1415         },
1416         .host_mapped = true,
1417     });
1418 
1419     REQUIRE(buf);
1420     REQUIRE(memcmp(data + offset, buf->data, slice) == 0);
1421 
1422     pl_buf_destroy(gpu, &buf);
1423     free(data);
1424 
1425 #endif // unix
1426 }
1427 
gpu_shader_tests(pl_gpu gpu)1428 static void gpu_shader_tests(pl_gpu gpu)
1429 {
1430     pl_buffer_tests(gpu);
1431     pl_texture_tests(gpu);
1432     pl_shader_tests(gpu);
1433     pl_scaler_tests(gpu);
1434     pl_render_tests(gpu);
1435     pl_ycbcr_tests(gpu);
1436 
1437     REQUIRE(!pl_gpu_is_failed(gpu));
1438 }
1439 
gpu_interop_tests(pl_gpu gpu)1440 static void gpu_interop_tests(pl_gpu gpu)
1441 {
1442     pl_test_export_import(gpu, PL_HANDLE_DMA_BUF);
1443     pl_test_host_ptr(gpu);
1444 
1445     REQUIRE(!pl_gpu_is_failed(gpu));
1446 }
1447