1 #include "tests.h"
2 #include "shaders.h"
3
pl_buffer_tests(pl_gpu gpu)4 static void pl_buffer_tests(pl_gpu gpu)
5 {
6 const size_t buf_size = 1024;
7 if (buf_size > gpu->limits.max_buf_size)
8 return;
9
10 uint8_t *test_src = malloc(buf_size * 2);
11 uint8_t *test_dst = test_src + buf_size;
12 assert(test_src && test_dst);
13 memset(test_dst, 0, buf_size);
14 for (int i = 0; i < buf_size; i++)
15 test_src[i] = (RANDOM * 256);
16
17 pl_buf buf = NULL, tbuf = NULL;
18
19 printf("test buffer static creation and readback\n");
20 buf = pl_buf_create(gpu, &(struct pl_buf_params) {
21 .size = buf_size,
22 .host_readable = true,
23 .initial_data = test_src,
24 });
25
26 REQUIRE(buf);
27 REQUIRE(pl_buf_read(gpu, buf, 0, test_dst, buf_size));
28 REQUIRE(memcmp(test_src, test_dst, buf_size) == 0);
29 pl_buf_destroy(gpu, &buf);
30
31 printf("test buffer empty creation, update and readback\n");
32 memset(test_dst, 0, buf_size);
33 buf = pl_buf_create(gpu, &(struct pl_buf_params) {
34 .size = buf_size,
35 .host_writable = true,
36 .host_readable = true,
37 });
38
39 REQUIRE(buf);
40 pl_buf_write(gpu, buf, 0, test_src, buf_size);
41 REQUIRE(pl_buf_read(gpu, buf, 0, test_dst, buf_size));
42 REQUIRE(memcmp(test_src, test_dst, buf_size) == 0);
43 pl_buf_destroy(gpu, &buf);
44
45 printf("test buffer-buffer copy and readback\n");
46 memset(test_dst, 0, buf_size);
47 buf = pl_buf_create(gpu, &(struct pl_buf_params) {
48 .size = buf_size,
49 .initial_data = test_src,
50 });
51
52 tbuf = pl_buf_create(gpu, &(struct pl_buf_params) {
53 .size = buf_size,
54 .host_readable = true,
55 });
56
57 REQUIRE(buf && tbuf);
58 pl_buf_copy(gpu, tbuf, 0, buf, 0, buf_size);
59 REQUIRE(pl_buf_read(gpu, tbuf, 0, test_dst, buf_size));
60 REQUIRE(memcmp(test_src, test_dst, buf_size) == 0);
61 pl_buf_destroy(gpu, &buf);
62 pl_buf_destroy(gpu, &tbuf);
63
64 if (buf_size <= gpu->limits.max_mapped_size) {
65 printf("test host mapped buffer readback\n");
66 buf = pl_buf_create(gpu, &(struct pl_buf_params) {
67 .size = buf_size,
68 .host_mapped = true,
69 .initial_data = test_src,
70 });
71
72 REQUIRE(buf);
73 REQUIRE(!pl_buf_poll(gpu, buf, 0));
74 REQUIRE(memcmp(test_src, buf->data, buf_size) == 0);
75 pl_buf_destroy(gpu, &buf);
76 }
77
78 free(test_src);
79 }
80
test_cb(void * priv)81 static void test_cb(void *priv)
82 {
83 bool *flag = priv;
84 *flag = true;
85 }
86
pl_test_roundtrip(pl_gpu gpu,pl_tex tex[2],uint8_t * src,uint8_t * dst)87 static void pl_test_roundtrip(pl_gpu gpu, pl_tex tex[2],
88 uint8_t *src, uint8_t *dst)
89 {
90 if (!tex[0] || !tex[1]) {
91 printf("failed creating test textures... skipping this test\n");
92 return;
93 }
94
95 int texels = tex[0]->params.w;
96 texels *= tex[0]->params.h ? tex[0]->params.h : 1;
97 texels *= tex[0]->params.d ? tex[0]->params.d : 1;
98
99 pl_fmt fmt = tex[0]->params.format;
100 size_t bytes = texels * fmt->texel_size;
101 memset(src, 0, bytes);
102 memset(dst, 0, bytes);
103
104 for (size_t i = 0; i < bytes; i++)
105 src[i] = (RANDOM * 256);
106
107 pl_timer ul, dl;
108 ul = pl_timer_create(gpu);
109 dl = pl_timer_create(gpu);
110
111 bool ran_ul = false, ran_dl = false;
112
113 REQUIRE(pl_tex_upload(gpu, &(struct pl_tex_transfer_params){
114 .tex = tex[0],
115 .ptr = src,
116 .timer = ul,
117 .callback = gpu->limits.callbacks ? test_cb : NULL,
118 .priv = &ran_ul,
119 }));
120
121 // Test blitting, if possible for this format
122 pl_tex dst_tex = tex[0];
123 if (tex[0]->params.blit_src && tex[1]->params.blit_dst) {
124 pl_tex_clear_ex(gpu, tex[1], (union pl_clear_color){0}); // for testing
125 pl_tex_blit(gpu, &(struct pl_tex_blit_params) {
126 .src = tex[0],
127 .dst = tex[1],
128 });
129 dst_tex = tex[1];
130 }
131
132 REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params){
133 .tex = dst_tex,
134 .ptr = dst,
135 .timer = dl,
136 .callback = gpu->limits.callbacks ? test_cb : NULL,
137 .priv = &ran_dl,
138 }));
139
140 pl_gpu_finish(gpu);
141 if (gpu->limits.callbacks)
142 REQUIRE(ran_ul && ran_dl);
143
144 if (fmt->emulated && fmt->type == PL_FMT_FLOAT) {
145 // TODO: can't memcmp here because bits might be lost due to the
146 // emulated 16/32 bit upload paths, figure out a better way to
147 // generate data and verify the roundtrip!
148 } else {
149 REQUIRE(memcmp(src, dst, bytes) == 0);
150 }
151
152 // Report timer results
153 printf("upload time: %"PRIu64", download time: %"PRIu64"\n",
154 pl_timer_query(gpu, ul), pl_timer_query(gpu, dl));
155
156 pl_timer_destroy(gpu, &ul);
157 pl_timer_destroy(gpu, &dl);
158 }
159
pl_texture_tests(pl_gpu gpu)160 static void pl_texture_tests(pl_gpu gpu)
161 {
162 const size_t max_size = 16*16*16 * 4 *sizeof(double);
163 uint8_t *test_src = malloc(max_size * 2);
164 uint8_t *test_dst = test_src + max_size;
165
166 for (int f = 0; f < gpu->num_formats; f++) {
167 pl_fmt fmt = gpu->formats[f];
168 if (fmt->opaque || !(fmt->caps & PL_FMT_CAP_HOST_READABLE))
169 continue;
170
171 printf("testing texture roundtrip for format %s\n", fmt->name);
172 assert(fmt->texel_size <= 4 * sizeof(double));
173
174 struct pl_tex_params ref_params = {
175 .format = fmt,
176 .blit_src = (fmt->caps & PL_FMT_CAP_BLITTABLE),
177 .blit_dst = (fmt->caps & PL_FMT_CAP_BLITTABLE),
178 .host_writable = true,
179 .host_readable = true,
180 };
181
182 pl_tex tex[2];
183
184 if (gpu->limits.max_tex_1d_dim >= 16) {
185 printf("... 1D\n");
186 struct pl_tex_params params = ref_params;
187 params.w = 16;
188 if (!gpu->limits.blittable_1d_3d)
189 params.blit_src = params.blit_dst = false;
190 for (int i = 0; i < PL_ARRAY_SIZE(tex); i++)
191 tex[i] = pl_tex_create(gpu, ¶ms);
192 pl_test_roundtrip(gpu, tex, test_src, test_dst);
193 for (int i = 0; i < PL_ARRAY_SIZE(tex); i++)
194 pl_tex_destroy(gpu, &tex[i]);
195 }
196
197 if (gpu->limits.max_tex_2d_dim >= 16) {
198 printf("... 2D\n");
199 struct pl_tex_params params = ref_params;
200 params.w = params.h = 16;
201 for (int i = 0; i < PL_ARRAY_SIZE(tex); i++)
202 tex[i] = pl_tex_create(gpu, ¶ms);
203 pl_test_roundtrip(gpu, tex, test_src, test_dst);
204 for (int i = 0; i < PL_ARRAY_SIZE(tex); i++)
205 pl_tex_destroy(gpu, &tex[i]);
206 }
207
208 if (gpu->limits.max_tex_3d_dim >= 16) {
209 printf("... 3D\n");
210 struct pl_tex_params params = ref_params;
211 params.w = params.h = params.d = 16;
212 if (!gpu->limits.blittable_1d_3d)
213 params.blit_src = params.blit_dst = false;
214 for (int i = 0; i < PL_ARRAY_SIZE(tex); i++)
215 tex[i] = pl_tex_create(gpu, ¶ms);
216 pl_test_roundtrip(gpu, tex, test_src, test_dst);
217 for (int i = 0; i < PL_ARRAY_SIZE(tex); i++)
218 pl_tex_destroy(gpu, &tex[i]);
219 }
220 }
221
222 free(test_src);
223 }
224
pl_shader_tests(pl_gpu gpu)225 static void pl_shader_tests(pl_gpu gpu)
226 {
227 if (gpu->glsl.version < 410)
228 return;
229
230 const char *vert_shader =
231 "#version 410 \n"
232 "layout(location=0) in vec2 vertex_pos; \n"
233 "layout(location=1) in vec3 vertex_color; \n"
234 "layout(location=0) out vec3 frag_color; \n"
235 "void main() { \n"
236 " gl_Position = vec4(vertex_pos, 0, 1); \n"
237 " frag_color = vertex_color; \n"
238 "}";
239
240 const char *frag_shader =
241 "#version 410 \n"
242 "layout(location=0) in vec3 frag_color; \n"
243 "layout(location=0) out vec4 out_color; \n"
244 "void main() { \n"
245 " out_color = vec4(frag_color, 1.0); \n"
246 "}";
247
248 pl_fmt fbo_fmt;
249 enum pl_fmt_caps caps = PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_BLITTABLE |
250 PL_FMT_CAP_LINEAR;
251
252 fbo_fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 4, 16, 32, caps);
253 if (!fbo_fmt)
254 return;
255
256 #define FBO_W 16
257 #define FBO_H 16
258
259 pl_tex fbo;
260 fbo = pl_tex_create(gpu, &(struct pl_tex_params) {
261 .format = fbo_fmt,
262 .w = FBO_W,
263 .h = FBO_H,
264 .renderable = true,
265 .storable = !!(fbo_fmt->caps & PL_FMT_CAP_STORABLE),
266 .host_readable = true,
267 .blit_dst = true,
268 });
269 REQUIRE(fbo);
270
271 pl_tex_clear_ex(gpu, fbo, (union pl_clear_color){0});
272
273 pl_fmt vert_fmt;
274 vert_fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 3);
275 REQUIRE(vert_fmt);
276
277 static const struct vertex { float pos[2]; float color[3]; } vertices[] = {
278 {{-1.0, -1.0}, {0, 0, 0}},
279 {{ 1.0, -1.0}, {1, 0, 0}},
280 {{-1.0, 1.0}, {0, 1, 0}},
281 {{ 1.0, 1.0}, {1, 1, 0}},
282 };
283
284 pl_pass pass;
285 pass = pl_pass_create(gpu, &(struct pl_pass_params) {
286 .type = PL_PASS_RASTER,
287 .target_dummy = *fbo,
288 .vertex_shader = vert_shader,
289 .glsl_shader = frag_shader,
290
291 .vertex_type = PL_PRIM_TRIANGLE_STRIP,
292 .vertex_stride = sizeof(struct vertex),
293 .num_vertex_attribs = 2,
294 .vertex_attribs = (struct pl_vertex_attrib[]) {{
295 .name = "vertex_pos",
296 .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2),
297 .location = 0,
298 .offset = offsetof(struct vertex, pos),
299 }, {
300 .name = "vertex_color",
301 .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 3),
302 .location = 1,
303 .offset = offsetof(struct vertex, color),
304 }},
305 });
306 REQUIRE(pass);
307 if (pass->params.cached_program || pass->params.cached_program_len) {
308 // Ensure both are set if either one is set
309 REQUIRE(pass->params.cached_program);
310 REQUIRE(pass->params.cached_program_len);
311 }
312
313 pl_timer timer = pl_timer_create(gpu);
314 pl_pass_run(gpu, &(struct pl_pass_run_params) {
315 .pass = pass,
316 .target = fbo,
317 .vertex_count = PL_ARRAY_SIZE(vertices),
318 .vertex_data = vertices,
319 .timer = timer,
320 });
321
322 // Wait until this pass is complete and report the timer result
323 pl_gpu_finish(gpu);
324 printf("timer query result: %"PRIu64"\n", pl_timer_query(gpu, timer));
325 pl_timer_destroy(gpu, &timer);
326
327 static float data[FBO_H * FBO_W * 4] = {0};
328
329 // Test against the known pattern of `src`, only useful for roundtrip tests
330 #define TEST_FBO_PATTERN(eps, fmt, ...) \
331 do { \
332 printf("testing pattern of " fmt "\n", __VA_ARGS__); \
333 REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params) { \
334 .tex = fbo, \
335 .ptr = data, \
336 })); \
337 \
338 for (int y = 0; y < FBO_H; y++) { \
339 for (int x = 0; x < FBO_W; x++) { \
340 float *color = &data[(y * FBO_W + x) * 4]; \
341 REQUIRE(feq(color[0], (x + 0.5) / FBO_W, eps)); \
342 REQUIRE(feq(color[1], (y + 0.5) / FBO_H, eps)); \
343 REQUIRE(feq(color[2], 0.0, eps)); \
344 REQUIRE(feq(color[3], 1.0, eps)); \
345 } \
346 } \
347 } while (0)
348
349 TEST_FBO_PATTERN(1e-6, "%s", "initial rendering");
350
351 if (sizeof(vertices) <= gpu->limits.max_vbo_size) {
352 // Test the use of an explicit vertex buffer
353 pl_buf vert = pl_buf_create(gpu, &(struct pl_buf_params) {
354 .size = sizeof(vertices),
355 .initial_data = vertices,
356 .drawable = true,
357 });
358
359 REQUIRE(vert);
360 pl_pass_run(gpu, &(struct pl_pass_run_params) {
361 .pass = pass,
362 .target = fbo,
363 .vertex_count = sizeof(vertices) / sizeof(struct vertex),
364 .vertex_buf = vert,
365 .buf_offset = 0,
366 });
367
368 pl_buf_destroy(gpu, &vert);
369 TEST_FBO_PATTERN(1e-6, "%s", "using vertex buffer");
370 }
371
372 // Test the use of index buffers
373 static const uint16_t indices[] = { 3, 2, 1, 0 };
374 pl_pass_run(gpu, &(struct pl_pass_run_params) {
375 .pass = pass,
376 .target = fbo,
377 .vertex_count = PL_ARRAY_SIZE(indices),
378 .vertex_data = vertices,
379 .index_data = indices,
380 });
381
382 pl_pass_destroy(gpu, &pass);
383 TEST_FBO_PATTERN(1e-6, "%s", "using indexed rendering");
384
385 // Test the use of pl_dispatch
386 pl_dispatch dp = pl_dispatch_create(gpu->log, gpu);
387 pl_shader sh = pl_dispatch_begin(dp);
388 REQUIRE(pl_shader_custom(sh, &(struct pl_custom_shader) {
389 .body = "color = vec4(col, 1.0);",
390 .input = PL_SHADER_SIG_NONE,
391 .output = PL_SHADER_SIG_COLOR,
392 }));
393
394 REQUIRE(pl_dispatch_vertex(dp, &(struct pl_dispatch_vertex_params) {
395 .shader = &sh,
396 .target = fbo,
397 .vertex_stride = sizeof(struct vertex),
398 .vertex_position_idx = 0,
399 .num_vertex_attribs = 2,
400 .vertex_attribs = (struct pl_vertex_attrib[]) {{
401 .name = "pos",
402 .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2),
403 .offset = offsetof(struct vertex, pos),
404 }, {
405 .name = "col",
406 .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 3),
407 .offset = offsetof(struct vertex, color),
408 }},
409
410 .vertex_type = PL_PRIM_TRIANGLE_STRIP,
411 .vertex_coords = PL_COORDS_NORMALIZED,
412 .vertex_count = PL_ARRAY_SIZE(vertices),
413 .vertex_data = vertices,
414 }));
415
416 TEST_FBO_PATTERN(1e-6, "%s", "using custom vertices");
417
418 pl_tex src;
419 src = pl_tex_create(gpu, &(struct pl_tex_params) {
420 .format = fbo_fmt,
421 .w = FBO_W,
422 .h = FBO_H,
423 .storable = fbo->params.storable,
424 .sampleable = true,
425 .initial_data = data,
426 });
427
428 if (fbo->params.storable) {
429 // Test 1x1 blit, to make sure the scaling code runs
430 REQUIRE(pl_tex_blit_compute(gpu, dp, &(struct pl_tex_blit_params) {
431 .src = src,
432 .dst = fbo,
433 .src_rc = {0, 0, 0, 1, 1, 1},
434 .dst_rc = {0, 0, 0, FBO_W, FBO_H, 1},
435 .sample_mode = PL_TEX_SAMPLE_NEAREST,
436 }));
437
438 // Test non-resizing blit, which uses the efficient imageLoad path
439 REQUIRE(pl_tex_blit_compute(gpu, dp, &(struct pl_tex_blit_params) {
440 .src = src,
441 .dst = fbo,
442 .src_rc = {0, 0, 0, FBO_W, FBO_H, 1},
443 .dst_rc = {0, 0, 0, FBO_W, FBO_H, 1},
444 .sample_mode = PL_TEX_SAMPLE_NEAREST,
445 }));
446
447 TEST_FBO_PATTERN(1e-6, "%s", "pl_tex_blit_compute");
448 }
449
450 // Test encoding/decoding of all gamma functions, color spaces, etc.
451 for (enum pl_color_transfer trc = 0; trc < PL_COLOR_TRC_COUNT; trc++) {
452 sh = pl_dispatch_begin(dp);
453 pl_shader_sample_nearest(sh, &(struct pl_sample_src) { .tex = src });
454 pl_shader_delinearize(sh, (struct pl_color_space) { .transfer = trc });
455 pl_shader_linearize(sh, (struct pl_color_space) { .transfer = trc });
456 REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
457 .shader = &sh,
458 .target = fbo,
459 }));
460
461 float epsilon = pl_color_transfer_is_hdr(trc) ? 1e-4 : 1e-6;
462 TEST_FBO_PATTERN(epsilon, "transfer function %d", (int) trc);
463 }
464
465 for (enum pl_color_system sys = 0; sys < PL_COLOR_SYSTEM_COUNT; sys++) {
466 sh = pl_dispatch_begin(dp);
467 pl_shader_sample_nearest(sh, &(struct pl_sample_src) { .tex = src });
468 pl_shader_encode_color(sh, &(struct pl_color_repr) { .sys = sys });
469 pl_shader_decode_color(sh, &(struct pl_color_repr) { .sys = sys }, NULL);
470 REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
471 .shader = &sh,
472 .target = fbo,
473 }));
474
475 float epsilon;
476 switch (sys) {
477 case PL_COLOR_SYSTEM_BT_2020_C:
478 epsilon = 1e-5;
479 break;
480
481 case PL_COLOR_SYSTEM_BT_2100_PQ:
482 case PL_COLOR_SYSTEM_BT_2100_HLG:
483 // These seem to be horrifically noisy and prone to breaking on
484 // edge cases for some reason
485 // TODO: figure out why!
486 continue;
487
488 default: epsilon = 1e-6; break;
489 }
490
491 TEST_FBO_PATTERN(epsilon, "color system %d", (int) sys);
492 }
493
494 for (enum pl_color_light light = 0; light < PL_COLOR_LIGHT_COUNT; light++) {
495 struct pl_color_space dst_space = { .transfer = PL_COLOR_TRC_LINEAR };
496 struct pl_color_space src_space = {
497 .transfer = PL_COLOR_TRC_LINEAR,
498 .light = light
499 };
500 sh = pl_dispatch_begin(dp);
501 pl_shader_sample_nearest(sh, &(struct pl_sample_src) { .tex = src });
502 pl_shader_color_map(sh, NULL, src_space, dst_space, NULL, false);
503 pl_shader_color_map(sh, NULL, dst_space, src_space, NULL, false);
504 REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
505 .shader = &sh,
506 .target = fbo,
507 }));
508
509 TEST_FBO_PATTERN(1e-6, "light %d", (int) light);
510 }
511
512 // Repeat this a few times to test the caching
513 for (int i = 0; i < 10; i++) {
514 if (i == 5) {
515 printf("Recreating pl_dispatch to test the caching\n");
516 size_t size = pl_dispatch_save(dp, NULL);
517 REQUIRE(size > 0);
518 uint8_t *cache = malloc(size);
519 REQUIRE(cache);
520 REQUIRE(pl_dispatch_save(dp, cache) == size);
521
522 pl_dispatch_destroy(&dp);
523 dp = pl_dispatch_create(gpu->log, gpu);
524 pl_dispatch_load(dp, cache);
525
526 #ifndef MSAN
527 // Test to make sure the pass regenerates the same cache, but skip
528 // this on MSAN because it doesn't like it when we read from
529 // program cache data generated by the non-instrumented GPU driver
530 uint64_t hash = pl_str_hash((pl_str) { cache, size });
531 REQUIRE(pl_dispatch_save(dp, NULL) == size);
532 REQUIRE(pl_dispatch_save(dp, cache) == size);
533 REQUIRE(pl_str_hash((pl_str) { cache, size }) == hash);
534 #endif
535 free(cache);
536 }
537
538 sh = pl_dispatch_begin(dp);
539
540 // For testing, force the use of CS if possible
541 if (gpu->glsl.compute) {
542 sh->is_compute = true;
543 sh->res.compute_group_size[0] = 8;
544 sh->res.compute_group_size[1] = 8;
545 }
546
547 pl_shader_deband(sh,
548 &(struct pl_sample_src) {
549 .tex = src,
550 },
551 &(struct pl_deband_params) {
552 .iterations = 0,
553 .grain = 0.0,
554 });
555
556 REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
557 .shader = &sh,
558 .target = fbo,
559 }));
560 TEST_FBO_PATTERN(1e-6, "deband iter %d", i);
561 }
562
563 // Test peak detection and readback if possible
564 sh = pl_dispatch_begin(dp);
565 pl_shader_sample_nearest(sh, &(struct pl_sample_src) { .tex = src });
566
567 pl_shader_obj peak_state = NULL;
568 struct pl_color_space csp_gamma22 = { .transfer = PL_COLOR_TRC_GAMMA22 };
569 struct pl_peak_detect_params peak_params = { .minimum_peak = 0.01 };
570 if (pl_shader_detect_peak(sh, csp_gamma22, &peak_state, &peak_params)) {
571 REQUIRE(pl_dispatch_compute(dp, &(struct pl_dispatch_compute_params) {
572 .shader = &sh,
573 .width = fbo->params.w,
574 .height = fbo->params.h,
575 }));
576
577 float peak, avg;
578 REQUIRE(pl_get_detected_peak(peak_state, &peak, &avg));
579 printf("detected peak: %f, average: %f\n", peak, avg);
580
581 float real_peak = 0, real_avg = 0;
582 for (int y = 0; y < FBO_H; y++) {
583 for (int x = 0; x < FBO_W; x++) {
584 float *color = &data[(y * FBO_W + x) * 4];
585 float smax = powf(PL_MAX(color[0], color[1]), 2.2);
586 float slog = logf(PL_MAX(smax, 0.001));
587 real_peak = PL_MAX(smax, real_peak);
588 real_avg += slog;
589 }
590 }
591
592 real_avg = expf(real_avg / (FBO_W * FBO_H));
593 printf("real peak: %f, real average: %f\n", real_peak, real_avg);
594 REQUIRE(feq(peak, real_peak, 1e-4));
595 REQUIRE(feq(avg, real_avg, 1e-3));
596 }
597
598 pl_dispatch_abort(dp, &sh);
599 pl_shader_obj_destroy(&peak_state);
600
601 #ifdef PL_HAVE_LCMS
602 // Test the use of ICC profiles if available
603 sh = pl_dispatch_begin(dp);
604 pl_shader_sample_nearest(sh, &(struct pl_sample_src) { .tex = src });
605
606 pl_shader_obj icc = NULL;
607 struct pl_icc_color_space src_color = { .color = pl_color_space_bt709 };
608 struct pl_icc_color_space dst_color = { .color = pl_color_space_srgb };
609 struct pl_icc_result out;
610
611 if (pl_icc_update(sh, &src_color, &dst_color, &icc, &out, NULL)) {
612 pl_icc_apply(sh, &icc);
613 REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
614 .shader = &sh,
615 .target = fbo,
616 }));
617 }
618
619 pl_dispatch_abort(dp, &sh);
620 pl_shader_obj_destroy(&icc);
621 #endif
622
623 // Test AV1 grain synthesis
624 pl_shader_obj grain = NULL;
625 for (int i = 0; i < 2; i++) {
626 struct pl_av1_grain_params grain_params = {
627 .data = av1_grain_data,
628 .tex = src,
629 .components = 3,
630 .component_mapping = { 0, 1, 2 },
631 .repr = &(struct pl_color_repr) {
632 .sys = PL_COLOR_SYSTEM_BT_709,
633 .levels = PL_COLOR_LEVELS_LIMITED,
634 .bits = { .color_depth = 10, .sample_depth = 10 },
635 },
636 };
637 grain_params.data.grain_seed = rand();
638 grain_params.data.overlap = !!i;
639
640 sh = pl_dispatch_begin(dp);
641 pl_shader_av1_grain(sh, &grain, &grain_params);
642 REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
643 .shader = &sh,
644 .target = fbo,
645 }));
646 }
647 pl_shader_obj_destroy(&grain);
648
649 // Test custom shaders
650 struct pl_custom_shader custom = {
651 .header =
652 "vec3 invert(vec3 color) \n"
653 "{ \n"
654 " return vec3(1.0) - color; \n"
655 "} \n",
656
657 .body =
658 "color = vec4(gl_FragCoord.xy, 0.0, 1.0); \n"
659 "color.rgb = invert(color.rgb) + offset; \n",
660
661 .input = PL_SHADER_SIG_NONE,
662 .output = PL_SHADER_SIG_COLOR,
663
664 .num_variables = 1,
665 .variables = &(struct pl_shader_var) {
666 .var = pl_var_float("offset"),
667 .data = &(float) { 0.1 },
668 },
669 };
670
671 sh = pl_dispatch_begin(dp);
672 REQUIRE(pl_shader_custom(sh, &custom));
673 REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
674 .shader = &sh,
675 .target = fbo,
676 }));
677
678 pl_dispatch_destroy(&dp);
679 pl_tex_destroy(gpu, &src);
680 pl_tex_destroy(gpu, &fbo);
681 }
682
pl_scaler_tests(pl_gpu gpu)683 static void pl_scaler_tests(pl_gpu gpu)
684 {
685 pl_fmt src_fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 1, 16, 32, PL_FMT_CAP_LINEAR);
686 pl_fmt fbo_fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 1, 16, 32, PL_FMT_CAP_RENDERABLE);
687 if (!src_fmt || !fbo_fmt)
688 return;
689
690 float *fbo_data = NULL;
691 pl_shader_obj lut = NULL;
692
693 static float data_5x5[5][5] = {
694 { 0, 0, 0, 0, 0 },
695 { 0, 0, 0, 0, 0 },
696 { 0, 0, 1, 0, 0 },
697 { 0, 0, 0, 0, 0 },
698 { 0, 0, 0, 0, 0 },
699 };
700
701 pl_tex dot5x5 = pl_tex_create(gpu, &(struct pl_tex_params) {
702 .w = 5,
703 .h = 5,
704 .format = src_fmt,
705 .sampleable = true,
706 .initial_data = &data_5x5[0][0],
707 });
708
709 struct pl_tex_params fbo_params = {
710 .w = 100,
711 .h = 100,
712 .format = fbo_fmt,
713 .renderable = true,
714 .storable = !!(fbo_fmt->caps & PL_FMT_CAP_STORABLE),
715 .host_readable = true,
716 };
717
718 pl_tex fbo = pl_tex_create(gpu, &fbo_params);
719 if (!fbo) {
720 printf("Failed creating readable FBO... falling back to non-readable\n");
721 fbo_params.host_readable = false;
722 fbo = pl_tex_create(gpu, &fbo_params);
723 }
724
725 pl_dispatch dp = pl_dispatch_create(gpu->log, gpu);
726 if (!dot5x5 || !fbo || !dp)
727 goto error;
728
729 pl_shader sh = pl_dispatch_begin(dp);
730 REQUIRE(pl_shader_sample_polar(sh,
731 &(struct pl_sample_src) {
732 .tex = dot5x5,
733 .new_w = fbo->params.w,
734 .new_h = fbo->params.h,
735 },
736 &(struct pl_sample_filter_params) {
737 .filter = pl_filter_ewa_lanczos,
738 .lut = &lut,
739 .no_compute = !fbo->params.storable,
740 }
741 ));
742 REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
743 .shader = &sh,
744 .target = fbo,
745 }));
746
747 if (fbo->params.host_readable) {
748 fbo_data = malloc(fbo->params.w * fbo->params.h * sizeof(float));
749 REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params) {
750 .tex = fbo,
751 .ptr = fbo_data,
752 }));
753
754 int max = 255;
755 printf("P2\n%d %d\n%d\n", fbo->params.w, fbo->params.h, max);
756 for (int y = 0; y < fbo->params.h; y++) {
757 for (int x = 0; x < fbo->params.w; x++) {
758 float v = fbo_data[y * fbo->params.h + x];
759 printf("%d ", (int) round(fmin(fmax(v, 0.0), 1.0) * max));
760 }
761 printf("\n");
762 }
763 }
764
765 error:
766 free(fbo_data);
767 pl_shader_obj_destroy(&lut);
768 pl_dispatch_destroy(&dp);
769 pl_tex_destroy(gpu, &dot5x5);
770 pl_tex_destroy(gpu, &fbo);
771 }
772
773 static const char *user_shader_tests[] = {
774
775 // Test hooking, saving and loading
776 "// Example of a comment at the beginning \n"
777 " \n"
778 "//!HOOK NATIVE \n"
779 "//!DESC upscale image \n"
780 "//!BIND HOOKED \n"
781 "//!WIDTH HOOKED.w 10 * \n"
782 "//!HEIGHT HOOKED.h 10 * \n"
783 "//!SAVE NATIVEBIG \n"
784 "//!WHEN NATIVE.w 500 < \n"
785 " \n"
786 "vec4 hook() \n"
787 "{ \n"
788 " return HOOKED_texOff(0); \n"
789 "} \n"
790 " \n"
791 "//!HOOK MAIN \n"
792 "//!DESC downscale bigger image \n"
793 "//!WHEN NATIVE.w 500 < \n"
794 "//!BIND NATIVEBIG \n"
795 " \n"
796 "vec4 hook() \n"
797 "{ \n"
798 " return NATIVEBIG_texOff(0); \n"
799 "} \n",
800
801 // Test use of textures
802 "//!HOOK MAIN \n"
803 "//!DESC turn everything into colorful pixels \n"
804 "//!BIND HOOKED \n"
805 "//!BIND DISCO \n"
806 "//!COMPONENTS 3 \n"
807 " \n"
808 "vec4 hook() \n"
809 "{ \n"
810 " return vec4(DISCO_tex(HOOKED_pos * 10.0).rgb, 1); \n"
811 "} \n"
812 " \n"
813 "//!TEXTURE DISCO \n"
814 "//!SIZE 3 3 \n"
815 "//!FORMAT rgba32f \n"
816 "//!FILTER NEAREST \n"
817 "//!BORDER REPEAT \n"
818 "0000803f000000000000000000000000000000000000803f0000000000000000000000000"
819 "00000000000803f00000000000000000000803f0000803f000000000000803f0000000000"
820 "00803f000000000000803f0000803f00000000000000009a99993e9a99993e9a99993e000"
821 "000009a99193F9A99193f9a99193f000000000000803f0000803f0000803f00000000 \n",
822
823 // Test use of storage/buffer resources
824 "//!HOOK MAIN \n"
825 "//!DESC attach some storage objects \n"
826 "//!BIND tex_storage \n"
827 "//!BIND buf_uniform \n"
828 "//!BIND buf_storage \n"
829 "//!COMPONENTS 4 \n"
830 " \n"
831 "vec4 hook() \n"
832 "{ \n"
833 " return vec4(foo, bar, bat); \n"
834 "} \n"
835 " \n"
836 "//!TEXTURE tex_storage \n"
837 "//!SIZE 100 100 \n"
838 "//!FORMAT r32f \n"
839 "//!STORAGE \n"
840 " \n"
841 "//!BUFFER buf_uniform \n"
842 "//!VAR float foo \n"
843 "//!VAR float bar \n"
844 "0000000000000000 \n"
845 " \n"
846 "//!BUFFER buf_storage \n"
847 "//!VAR vec2 bat \n"
848 "//!VAR int big[32]; \n"
849 "//!STORAGE \n"
850
851 };
852
853 static const char *test_luts[] = {
854
855 "TITLE \"1D identity\" \n"
856 "LUT_1D_SIZE 2 \n"
857 "0.0 0.0 0.0 \n"
858 "1.0 1.0 1.0 \n",
859
860 "TITLE \"3D identity\" \n"
861 "LUT_3D_SIZE 2 \n"
862 "0.0 0.0 0.0 \n"
863 "1.0 0.0 0.0 \n"
864 "0.0 1.0 0.0 \n"
865 "1.0 1.0 0.0 \n"
866 "0.0 0.0 1.0 \n"
867 "1.0 0.0 1.0 \n"
868 "0.0 1.0 1.0 \n"
869 "1.0 1.0 1.0 \n"
870
871 };
872
frame_passthrough(pl_gpu gpu,pl_tex * tex,const struct pl_source_frame * src,struct pl_frame * out_frame)873 static bool frame_passthrough(pl_gpu gpu, pl_tex *tex,
874 const struct pl_source_frame *src, struct pl_frame *out_frame)
875 {
876 const struct pl_frame *frame = src->frame_data;
877 *out_frame = *frame;
878 return true;
879 }
880
get_frame_ptr(struct pl_source_frame * out_frame,const struct pl_queue_params * qparams)881 static enum pl_queue_status get_frame_ptr(struct pl_source_frame *out_frame,
882 const struct pl_queue_params *qparams)
883 {
884 const struct pl_source_frame **pframe = qparams->priv;
885 if (!(*pframe)->frame_data)
886 return PL_QUEUE_EOF;
887
888 *out_frame = *(*pframe)++;
889 return PL_QUEUE_OK;
890 }
891
render_info_cb(void * priv,const struct pl_render_info * info)892 static void render_info_cb(void *priv, const struct pl_render_info *info)
893 {
894 printf("{%d} Executed shader: %s\n", info->index,
895 info->pass->shader->description);
896 }
897
pl_render_tests(pl_gpu gpu)898 static void pl_render_tests(pl_gpu gpu)
899 {
900 pl_tex img5x5_tex = NULL, fbo = NULL;
901 pl_renderer rr = NULL;
902
903 float *fbo_data = NULL;
904 static float data_5x5[5][5] = {
905 { 0.0, 0.0, 0.0, 0.0, 0.0 },
906 { 0.0, 0.0, 0.0, 0.0, 0.0 },
907 { 1.0, 0.0, 0.5, 0.0, 0.0 },
908 { 0.0, 0.0, 0.0, 1.0, 0.0 },
909 { 0.0, 0.3, 0.0, 0.0, 0.0 },
910 };
911
912 const int width = 5, height = 5;
913 struct pl_plane img5x5 = {0};
914 struct pl_plane_data img5x5_data = {
915 .type = PL_FMT_FLOAT,
916 .width = width,
917 .height = height,
918 .component_size = { 8 * sizeof(float) },
919 .component_map = { 0 },
920 .pixel_stride = sizeof(float),
921 .pixels = &data_5x5,
922 };
923
924 if (!pl_recreate_plane(gpu, NULL, &fbo, &img5x5_data))
925 return;
926
927 if (!pl_upload_plane(gpu, &img5x5, &img5x5_tex, &img5x5_data))
928 goto error;
929
930 rr = pl_renderer_create(gpu->log, gpu);
931 pl_tex_clear_ex(gpu, fbo, (union pl_clear_color){0});
932
933 struct pl_frame image = {
934 .num_planes = 1,
935 .planes = { img5x5 },
936 .repr = {
937 .sys = PL_COLOR_SYSTEM_BT_709,
938 .levels = PL_COLOR_LEVELS_FULL,
939 },
940 .color = pl_color_space_srgb,
941 .crop = {-1.0, 0.0, width - 1.0, height},
942 };
943
944 struct pl_frame target = {
945 .num_planes = 1,
946 .planes = {{
947 .texture = fbo,
948 .components = 3,
949 .component_mapping = {0, 1, 2},
950 }},
951 .crop = {2, 2, fbo->params.w - 2, fbo->params.h - 2},
952 .repr = {
953 .sys = PL_COLOR_SYSTEM_RGB,
954 .levels = PL_COLOR_LEVELS_FULL,
955 },
956 .color = pl_color_space_srgb,
957 };
958
959 REQUIRE(pl_render_image(rr, &image, &target, NULL));
960
961 fbo_data = malloc(fbo->params.w * fbo->params.h * sizeof(float[4]));
962 REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params) {
963 .tex = fbo,
964 .ptr = fbo_data,
965 }));
966
967 // TODO: embed a reference texture and ensure it matches
968
969 // Test a bunch of different params
970 #define TEST(SNAME, STYPE, DEFAULT, FIELD, LIMIT) \
971 do { \
972 for (int i = 0; i <= LIMIT; i++) { \
973 printf("testing `" #STYPE "." #FIELD " = %d`\n", i); \
974 struct pl_render_params params = pl_render_default_params; \
975 params.force_dither = true; \
976 struct STYPE tmp = DEFAULT; \
977 tmp.FIELD = i; \
978 params.SNAME = &tmp; \
979 for (int p = 0; p < 5; p++) { \
980 REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); \
981 pl_gpu_flush(gpu); \
982 } \
983 } \
984 } while (0)
985
986 #define TEST_PARAMS(NAME, FIELD, LIMIT) \
987 TEST(NAME##_params, pl_##NAME##_params, pl_##NAME##_default_params, FIELD, LIMIT)
988
989 for (int i = 0; i < pl_num_scale_filters; i++) {
990 struct pl_render_params params = pl_render_default_params;
991 params.upscaler = pl_scale_filters[i].filter;
992 printf("testing `params.upscaler = /* %s */`\n", pl_scale_filters[i].name);
993 REQUIRE(pl_render_image(rr, &image, &target, ¶ms));
994 pl_gpu_flush(gpu);
995 }
996
997 TEST_PARAMS(deband, iterations, 3);
998 TEST_PARAMS(sigmoid, center, 1);
999 TEST_PARAMS(color_map, intent, PL_INTENT_ABSOLUTE_COLORIMETRIC);
1000 TEST_PARAMS(color_map, gamut_warning, 1);
1001 TEST_PARAMS(dither, method, PL_DITHER_WHITE_NOISE);
1002 TEST_PARAMS(dither, temporal, true);
1003 TEST(cone_params, pl_cone_params, pl_vision_deuteranomaly, strength, 0);
1004
1005 // Test HDR stuff
1006 image.color.sig_scale = 10.0;
1007 target.color.sig_scale = 2.0;
1008 TEST_PARAMS(color_map, tone_mapping_algo, PL_TONE_MAPPING_BT_2390);
1009 TEST_PARAMS(color_map, desaturation_strength, 1);
1010 image.color.sig_scale = target.color.sig_scale = 0.0;
1011
1012 // Test some misc stuff
1013 struct pl_render_params params = pl_render_default_params;
1014 params.color_adjustment = &(struct pl_color_adjustment) {
1015 .brightness = 0.1,
1016 .contrast = 0.9,
1017 .saturation = 1.5,
1018 .gamma = 0.8,
1019 .temperature = 0.3,
1020 };
1021 REQUIRE(pl_render_image(rr, &image, &target, ¶ms));
1022 params = pl_render_default_params;
1023
1024 params.force_icc_lut = true;
1025 REQUIRE(pl_render_image(rr, &image, &target, ¶ms));
1026 params = pl_render_default_params;
1027
1028 image.av1_grain = av1_grain_data;
1029 REQUIRE(pl_render_image(rr, &image, &target, ¶ms));
1030 image.av1_grain = (struct pl_av1_grain_data) {0};
1031
1032 // Test mpv-style custom shaders
1033 for (int i = 0; i < PL_ARRAY_SIZE(user_shader_tests); i++) {
1034 printf("testing user shader:\n\n%s\n", user_shader_tests[i]);
1035 const struct pl_hook *hook;
1036 hook = pl_mpv_user_shader_parse(gpu, user_shader_tests[i],
1037 strlen(user_shader_tests[i]));
1038
1039 if (gpu->glsl.compute) {
1040 REQUIRE(hook);
1041 } else {
1042 // Not all shaders compile without compute shader support
1043 if (!hook)
1044 continue;
1045 }
1046
1047 params.hooks = &hook;
1048 params.num_hooks = 1;
1049 REQUIRE(pl_render_image(rr, &image, &target, ¶ms));
1050
1051 pl_mpv_user_shader_destroy(&hook);
1052 }
1053 params = pl_render_default_params;
1054
1055 // Test custom LUTs
1056 for (int i = 0; i < PL_ARRAY_SIZE(test_luts); i++) {
1057 printf("testing custom lut %d\n", i);
1058 struct pl_custom_lut *lut;
1059 lut = pl_lut_parse_cube(gpu->log, test_luts[i], strlen(test_luts[i]));
1060 REQUIRE(lut);
1061
1062 // Test all three at the same time to reduce the number of tests
1063 image.lut = target.lut = params.lut = lut;
1064
1065 for (enum pl_lut_type t = PL_LUT_UNKNOWN; t <= PL_LUT_CONVERSION; t++) {
1066 printf("testing LUT method %d\n", t);
1067 image.lut_type = target.lut_type = params.lut_type = t;
1068 REQUIRE(pl_render_image(rr, &image, &target, ¶ms));
1069 }
1070
1071 image.lut = target.lut = params.lut = NULL;
1072 pl_lut_free(&lut);
1073 }
1074
1075 // Test overlays
1076 image.num_overlays = 1;
1077 image.overlays = &(struct pl_overlay) {
1078 .tex = img5x5.texture,
1079 .mode = PL_OVERLAY_NORMAL,
1080 .num_parts = 2,
1081 .parts = (struct pl_overlay_part[]) {{
1082 .src = {0, 0, 2, 2},
1083 .dst = {30, 100, 40, 200},
1084 }, {
1085 .src = {2, 2, 5, 5},
1086 .dst = {1000, -1, 3, 5},
1087 }},
1088 };
1089 REQUIRE(pl_render_image(rr, &image, &target, ¶ms));
1090 params.disable_fbos = true;
1091 REQUIRE(pl_render_image(rr, &image, &target, ¶ms));
1092 image.num_overlays = 0;
1093 params = pl_render_default_params;
1094
1095 target.num_overlays = 1;
1096 target.overlays = &(struct pl_overlay) {
1097 .tex = img5x5.texture,
1098 .mode = PL_OVERLAY_MONOCHROME,
1099 .num_parts = 1,
1100 .parts = &(struct pl_overlay_part) {
1101 .src = {5, 5, 15, 15},
1102 .dst = {5, 5, 15, 15},
1103 .color = {1.0, 0.5, 0.0},
1104 },
1105 };
1106 REQUIRE(pl_render_image(rr, &image, &target, ¶ms));
1107 REQUIRE(pl_render_image(rr, NULL, &target, ¶ms));
1108 target.num_overlays = 0;
1109
1110 // Attempt frame mixing, using the mixer queue helper
1111 printf("testing frame mixing \n");
1112 struct pl_render_params mix_params = {
1113 .frame_mixer = &pl_filter_mitchell_clamp,
1114 .info_callback = render_info_cb,
1115 };
1116
1117 struct pl_queue_params qparams = {
1118 .radius = pl_frame_mix_radius(&mix_params),
1119 .vsync_duration = 1.0 / 60.0,
1120 .frame_duration = 1.0 / 24.0,
1121 };
1122
1123 #define NUM_MIX_FRAMES 20
1124 struct pl_source_frame srcframes[NUM_MIX_FRAMES+1];
1125 srcframes[NUM_MIX_FRAMES] = (struct pl_source_frame) {0};
1126 for (int i = 0; i < NUM_MIX_FRAMES; i++) {
1127 srcframes[i] = (struct pl_source_frame) {
1128 .pts = i * qparams.frame_duration,
1129 .map = frame_passthrough,
1130 .frame_data = &image,
1131 };
1132 }
1133
1134 pl_queue queue = pl_queue_create(gpu);
1135 enum pl_queue_status ret;
1136
1137 // Test pre-pushing all frames, with delayed EOF.
1138 for (int i = 0; i < NUM_MIX_FRAMES; i++) {
1139 if (!pl_queue_push_block(queue, 1, &srcframes[i])) // mini-sleep
1140 pl_queue_push(queue, &srcframes[i]); // push it anyway, for testing
1141 }
1142
1143 struct pl_frame_mix mix;
1144 while ((ret = pl_queue_update(queue, &mix, &qparams)) != PL_QUEUE_EOF) {
1145 if (ret == PL_QUEUE_MORE) {
1146 REQUIRE(qparams.pts > 0.0);
1147 pl_queue_push(queue, NULL); // push delayed EOF
1148 continue;
1149 }
1150
1151 REQUIRE(ret == PL_QUEUE_OK);
1152 REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params));
1153
1154 // Simulate advancing vsync
1155 qparams.pts += qparams.vsync_duration;
1156 }
1157
1158 // Test dynamically pulling all frames, with oversample mixer
1159 const struct pl_source_frame *frame_ptr = &srcframes[0];
1160 mix_params.frame_mixer = &pl_oversample_frame_mixer;
1161
1162 qparams = (struct pl_queue_params) {
1163 .radius = pl_frame_mix_radius(&mix_params),
1164 .vsync_duration = qparams.vsync_duration,
1165 .frame_duration = qparams.frame_duration,
1166 .get_frame = get_frame_ptr,
1167 .priv = &frame_ptr,
1168 };
1169
1170 pl_queue_reset(queue);
1171 while ((ret = pl_queue_update(queue, &mix, &qparams)) != PL_QUEUE_EOF) {
1172 REQUIRE(ret == PL_QUEUE_OK);
1173 REQUIRE(mix.num_frames <= 2);
1174 REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params));
1175 qparams.pts += qparams.vsync_duration;
1176 }
1177
1178 // Test large PTS jump
1179 pl_queue_reset(queue);
1180 REQUIRE(pl_queue_update(queue, &mix, &qparams) == PL_QUEUE_EOF);
1181
1182 pl_queue_destroy(&queue);
1183
1184 error:
1185 free(fbo_data);
1186 pl_renderer_destroy(&rr);
1187 pl_tex_destroy(gpu, &img5x5_tex);
1188 pl_tex_destroy(gpu, &fbo);
1189 }
1190
noop_hook(void * priv,const struct pl_hook_params * params)1191 static struct pl_hook_res noop_hook(void *priv, const struct pl_hook_params *params)
1192 {
1193 return (struct pl_hook_res) {0};
1194 }
1195
pl_ycbcr_tests(pl_gpu gpu)1196 static void pl_ycbcr_tests(pl_gpu gpu)
1197 {
1198 pl_renderer rr = pl_renderer_create(gpu->log, gpu);
1199 if (!rr)
1200 return;
1201
1202 struct pl_plane_data data[3];
1203 for (int i = 0; i < 3; i++) {
1204 const int sub = i > 0 ? 1 : 0;
1205 const int width = (323 + sub) >> sub;
1206 const int height = (255 + sub) >> sub;
1207
1208 data[i] = (struct pl_plane_data) {
1209 .type = PL_FMT_UNORM,
1210 .width = width,
1211 .height = height,
1212 .component_size = {16},
1213 .component_map = {i},
1214 .pixel_stride = sizeof(uint16_t),
1215 .row_stride = PL_ALIGN2(width * sizeof(uint16_t),
1216 gpu->limits.align_tex_xfer_stride),
1217 };
1218 }
1219
1220 pl_fmt fmt = pl_plane_find_fmt(gpu, NULL, &data[0]);
1221 if (!fmt || !(fmt->caps & (PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_HOST_READABLE)))
1222 return;
1223
1224 pl_tex src_tex[3] = {0};
1225 pl_tex dst_tex[3] = {0};
1226 struct pl_frame img = {
1227 .num_planes = 3,
1228 .repr = pl_color_repr_hdtv,
1229 .color = pl_color_space_bt709,
1230 };
1231
1232 struct pl_frame target = {
1233 .num_planes = 3,
1234 .repr = pl_color_repr_hdtv,
1235 .color = pl_color_space_bt709,
1236 };
1237
1238 uint8_t *src_buffer[3] = {0};
1239 uint8_t *dst_buffer = NULL;
1240 for (int i = 0; i < 3; i++) {
1241 // Generate some arbitrary data for the buffer
1242 src_buffer[i] = malloc(data[i].height * data[i].row_stride);
1243 if (!src_buffer[i])
1244 goto error;
1245
1246 data[i].pixels = src_buffer[i];
1247 for (int y = 0; y < data[i].height; y++) {
1248 for (int x = 0; x < data[i].width; x++) {
1249 size_t off = y * data[i].row_stride + x * data[i].pixel_stride;
1250 uint16_t *pixel = (uint16_t *) &src_buffer[i][off];
1251 int gx = 200 + 100 * i, gy = 300 + 150 * i;
1252 *pixel = (gx * x) ^ (gy * y); // whatever
1253 }
1254 }
1255
1256 REQUIRE(pl_upload_plane(gpu, &img.planes[i], &src_tex[i], &data[i]));
1257 }
1258
1259 // This co-sites chroma pixels with pixels in the RGB image, meaning we
1260 // get an exact round-trip when sampling both ways. This makes it useful
1261 // as a test case, even though it's not common in the real world.
1262 pl_frame_set_chroma_location(&img, PL_CHROMA_TOP_LEFT);
1263
1264 for (int i = 0; i < 3; i++) {
1265 dst_tex[i] = pl_tex_create(gpu, &(struct pl_tex_params) {
1266 .format = fmt,
1267 .w = data[i].width,
1268 .h = data[i].height,
1269 .renderable = true,
1270 .host_readable = true,
1271 .storable = fmt->caps & PL_FMT_CAP_STORABLE,
1272 .blit_dst = fmt->caps & PL_FMT_CAP_BLITTABLE,
1273 });
1274
1275 if (!dst_tex[i])
1276 goto error;
1277
1278 target.planes[i] = img.planes[i];
1279 target.planes[i].texture = dst_tex[i];
1280 }
1281
1282 REQUIRE(pl_render_image(rr, &img, &target, &(struct pl_render_params) {
1283 .num_hooks = 1,
1284 .hooks = &(const struct pl_hook *){&(struct pl_hook) {
1285 // Forces chroma merging, to test the chroma merging code
1286 .stages = PL_HOOK_CHROMA_INPUT,
1287 .hook = noop_hook,
1288 }},
1289 }));
1290
1291 size_t buf_size = data[0].height * data[0].row_stride;
1292 dst_buffer = malloc(buf_size);
1293 if (!dst_buffer)
1294 goto error;
1295
1296 for (int i = 0; i < 3; i++) {
1297 memset(dst_buffer, 0xAA, buf_size);
1298 REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params) {
1299 .tex = dst_tex[i],
1300 .ptr = dst_buffer,
1301 .stride_w = data[i].row_stride / data[i].pixel_stride,
1302 }));
1303
1304 for (int y = 0; y < data[i].height; y++) {
1305 for (int x = 0; x < data[i].width; x++) {
1306 size_t off = y * data[i].row_stride + x * data[i].pixel_stride;
1307 uint16_t *src_pixel = (uint16_t *) &src_buffer[i][off];
1308 uint16_t *dst_pixel = (uint16_t *) &dst_buffer[off];
1309 int diff = abs((int) *src_pixel - (int) *dst_pixel);
1310 REQUIRE(diff <= 50); // a little under 0.1%
1311 }
1312 }
1313 }
1314
1315 error:
1316 pl_renderer_destroy(&rr);
1317 free(dst_buffer);
1318 for (int i = 0; i < 3; i++) {
1319 free(src_buffer[i]);
1320 pl_tex_destroy(gpu, &src_tex[i]);
1321 pl_tex_destroy(gpu, &dst_tex[i]);
1322 }
1323 }
1324
pl_test_export_import(pl_gpu gpu,enum pl_handle_type handle_type)1325 static void pl_test_export_import(pl_gpu gpu,
1326 enum pl_handle_type handle_type)
1327 {
1328 // Test texture roundtrip
1329
1330 if (!(gpu->export_caps.tex & handle_type) ||
1331 !(gpu->import_caps.tex & handle_type))
1332 goto skip_tex;
1333
1334 pl_fmt fmt = pl_find_fmt(gpu, PL_FMT_UNORM, 1, 0, 0, PL_FMT_CAP_BLITTABLE);
1335 if (!fmt)
1336 goto skip_tex;
1337
1338 printf("testing texture import/export\n");
1339
1340 pl_tex export = pl_tex_create(gpu, &(struct pl_tex_params) {
1341 .w = 32,
1342 .h = 32,
1343 .format = fmt,
1344 .export_handle = handle_type,
1345 });
1346 REQUIRE(export);
1347 REQUIRE_HANDLE(export->shared_mem, handle_type);
1348
1349 pl_tex import = pl_tex_create(gpu, &(struct pl_tex_params) {
1350 .w = 32,
1351 .h = 32,
1352 .format = fmt,
1353 .import_handle = handle_type,
1354 .shared_mem = export->shared_mem,
1355 });
1356 REQUIRE(import);
1357
1358 pl_tex_destroy(gpu, &import);
1359 pl_tex_destroy(gpu, &export);
1360
1361 skip_tex: ;
1362
1363 // Test buffer roundtrip
1364
1365 if (!(gpu->export_caps.buf & handle_type) ||
1366 !(gpu->import_caps.buf & handle_type))
1367 return;
1368
1369 printf("testing buffer import/export\n");
1370
1371 pl_buf exp_buf = pl_buf_create(gpu, &(struct pl_buf_params) {
1372 .size = 32,
1373 .export_handle = handle_type,
1374 });
1375 REQUIRE(exp_buf);
1376 REQUIRE_HANDLE(exp_buf->shared_mem, handle_type);
1377
1378 pl_buf imp_buf = pl_buf_create(gpu, &(struct pl_buf_params) {
1379 .size = 32,
1380 .import_handle = handle_type,
1381 .shared_mem = exp_buf->shared_mem,
1382 });
1383 REQUIRE(imp_buf);
1384
1385 pl_buf_destroy(gpu, &imp_buf);
1386 pl_buf_destroy(gpu, &exp_buf);
1387 }
1388
pl_test_host_ptr(pl_gpu gpu)1389 static void pl_test_host_ptr(pl_gpu gpu)
1390 {
1391 if (!(gpu->import_caps.buf & PL_HANDLE_HOST_PTR))
1392 return;
1393
1394 #ifdef __unix__
1395
1396 printf("testing host ptr\n");
1397 REQUIRE(gpu->limits.max_mapped_size);
1398
1399 const size_t size = 2 << 20;
1400 const size_t offset = 2 << 10;
1401 const size_t slice = 2 << 16;
1402
1403 uint8_t *data = aligned_alloc(0x1000, size);
1404 for (int i = 0; i < size; i++)
1405 data[i] = (uint8_t) i;
1406
1407 pl_buf buf = pl_buf_create(gpu, &(struct pl_buf_params) {
1408 .type = PL_BUF_TEX_TRANSFER,
1409 .size = slice,
1410 .import_handle = PL_HANDLE_HOST_PTR,
1411 .shared_mem = {
1412 .handle.ptr = data,
1413 .size = size,
1414 .offset = offset,
1415 },
1416 .host_mapped = true,
1417 });
1418
1419 REQUIRE(buf);
1420 REQUIRE(memcmp(data + offset, buf->data, slice) == 0);
1421
1422 pl_buf_destroy(gpu, &buf);
1423 free(data);
1424
1425 #endif // unix
1426 }
1427
gpu_shader_tests(pl_gpu gpu)1428 static void gpu_shader_tests(pl_gpu gpu)
1429 {
1430 pl_buffer_tests(gpu);
1431 pl_texture_tests(gpu);
1432 pl_shader_tests(gpu);
1433 pl_scaler_tests(gpu);
1434 pl_render_tests(gpu);
1435 pl_ycbcr_tests(gpu);
1436
1437 REQUIRE(!pl_gpu_is_failed(gpu));
1438 }
1439
gpu_interop_tests(pl_gpu gpu)1440 static void gpu_interop_tests(pl_gpu gpu)
1441 {
1442 pl_test_export_import(gpu, PL_HANDLE_DMA_BUF);
1443 pl_test_host_ptr(gpu);
1444
1445 REQUIRE(!pl_gpu_is_failed(gpu));
1446 }
1447