1 #include "tests.h"
2 #include <sys/time.h>
3
4 #define TEX_SIZE 2048
5 #define CUBE_SIZE 64
6 #define NUM_FBOS 16
7 #define BENCH_DUR 3
8
create_test_img(pl_gpu gpu)9 static pl_tex create_test_img(pl_gpu gpu)
10 {
11 pl_fmt fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 4, 16, 32, PL_FMT_CAP_LINEAR);
12 REQUIRE(fmt);
13
14 int cube_stride = TEX_SIZE / CUBE_SIZE;
15 int cube_count = cube_stride * cube_stride;
16
17 assert(cube_count * CUBE_SIZE * CUBE_SIZE == TEX_SIZE * TEX_SIZE);
18 float *data = malloc(TEX_SIZE * TEX_SIZE * sizeof(float[4]));
19 for (int n = 0; n < cube_count; n++) {
20 int xbase = (n % cube_stride) * CUBE_SIZE;
21 int ybase = (n / cube_stride) * CUBE_SIZE;
22 for (int g = 0; g < CUBE_SIZE; g++) {
23 for (int r = 0; r < CUBE_SIZE; r++) {
24 int xpos = xbase + r;
25 int ypos = ybase + g;
26 assert(xpos < TEX_SIZE && ypos < TEX_SIZE);
27
28 float *color = &data[(ypos * TEX_SIZE + xpos) * 4];
29 color[0] = (float) r / CUBE_SIZE;
30 color[1] = (float) g / CUBE_SIZE;
31 color[2] = (float) n / cube_count;
32 color[3] = 1.0;
33 }
34 }
35 }
36
37 pl_tex tex = pl_tex_create(gpu, &(struct pl_tex_params) {
38 .format = fmt,
39 .w = TEX_SIZE,
40 .h = TEX_SIZE,
41 .sampleable = true,
42 .initial_data = data,
43 });
44
45 free(data);
46 REQUIRE(tex);
47 return tex;
48 }
49
50 struct bench {
51 void (*run_sh)(pl_shader sh, pl_shader_obj *state,
52 pl_tex src);
53
54 void (*run_tex)(pl_gpu gpu, pl_tex tex);
55 };
56
run_bench(pl_gpu gpu,pl_dispatch dp,pl_shader_obj * state,pl_tex src,pl_tex fbo,pl_timer timer,const struct bench * bench)57 static void run_bench(pl_gpu gpu, pl_dispatch dp,
58 pl_shader_obj *state, pl_tex src,
59 pl_tex fbo, pl_timer timer,
60 const struct bench *bench)
61 {
62 if (bench->run_sh) {
63 pl_shader sh = pl_dispatch_begin(dp);
64 bench->run_sh(sh, state, src);
65
66 pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
67 .shader = &sh,
68 .target = fbo,
69 .timer = timer,
70 });
71 } else {
72 bench->run_tex(gpu, fbo);
73 }
74 }
75
benchmark(pl_gpu gpu,const char * name,const struct bench * bench)76 static void benchmark(pl_gpu gpu, const char *name,
77 const struct bench *bench)
78 {
79 pl_dispatch dp = pl_dispatch_create(gpu->log, gpu);
80 pl_shader_obj state = NULL;
81 pl_tex src = create_test_img(gpu);
82
83 // Create the FBOs
84 pl_fmt fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 4, 16, 32,
85 PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_BLITTABLE);
86 REQUIRE(fmt);
87
88 pl_tex fbos[NUM_FBOS] = {0};
89 for (int i = 0; i < NUM_FBOS; i++) {
90 fbos[i] = pl_tex_create(gpu, &(struct pl_tex_params) {
91 .format = fmt,
92 .w = TEX_SIZE,
93 .h = TEX_SIZE,
94 .renderable = true,
95 .blit_dst = true,
96 .host_writable = true,
97 .host_readable = true,
98 .storable = !!(fmt->caps & PL_FMT_CAP_STORABLE),
99 });
100 REQUIRE(fbos[i]);
101
102 pl_tex_clear(gpu, fbos[i], (float[4]){ 0.0 });
103 }
104
105 // Run the benchmark and flush+block once to force shader compilation etc.
106 run_bench(gpu, dp, &state, src, fbos[0], NULL, bench);
107 pl_gpu_finish(gpu);
108
109 // Perform the actual benchmark
110 struct timeval start = {0}, stop = {0};
111 unsigned long frames = 0;
112 int index = 0;
113
114 pl_timer timer = pl_timer_create(gpu);
115 uint64_t gputime_total = 0;
116 unsigned long gputime_count = 0;
117 uint64_t gputime;
118
119 gettimeofday(&start, NULL);
120 do {
121 frames++;
122 run_bench(gpu, dp, &state, src, fbos[index++], timer, bench);
123 index %= NUM_FBOS;
124 if (index == 0) {
125 pl_gpu_flush(gpu);
126 gettimeofday(&stop, NULL);
127 }
128 while ((gputime = pl_timer_query(gpu, timer))) {
129 gputime_total += gputime;
130 gputime_count++;
131 }
132 } while (stop.tv_sec - start.tv_sec < BENCH_DUR);
133
134 // Force the GPU to finish execution and re-measure the final stop time
135 pl_gpu_finish(gpu);
136
137 gettimeofday(&stop, NULL);
138 while ((gputime = pl_timer_query(gpu, timer))) {
139 gputime_total += gputime;
140 gputime_count++;
141 }
142
143 float secs = (float) (stop.tv_sec - start.tv_sec) +
144 1e-6 * (stop.tv_usec - start.tv_usec);
145 printf("'%s':\t%4lu frames in %1.6f seconds => %2.6f ms/frame (%5.2f FPS)",
146 name, frames, secs, 1000 * secs / frames, frames / secs);
147 if (gputime_count)
148 printf(", gpu time: %2.6f ms", 1e-6 * (gputime_total / gputime_count));
149 printf("\n");
150
151 pl_timer_destroy(gpu, &timer);
152 pl_shader_obj_destroy(&state);
153 pl_dispatch_destroy(&dp);
154 pl_tex_destroy(gpu, &src);
155 for (int i = 0; i < NUM_FBOS; i++)
156 pl_tex_destroy(gpu, &fbos[i]);
157 }
158
159 // List of benchmarks
bench_deband(pl_shader sh,pl_shader_obj * state,pl_tex src)160 static void bench_deband(pl_shader sh, pl_shader_obj *state, pl_tex src)
161 {
162 pl_shader_deband(sh, &(struct pl_sample_src) { .tex = src }, NULL);
163 }
164
bench_deband_heavy(pl_shader sh,pl_shader_obj * state,pl_tex src)165 static void bench_deband_heavy(pl_shader sh, pl_shader_obj *state, pl_tex src)
166 {
167 pl_shader_deband(sh, &(struct pl_sample_src) { .tex = src },
168 &(struct pl_deband_params) {
169 .iterations = 4,
170 .threshold = 4.0,
171 .radius = 4.0,
172 .grain = 16.0,
173 });
174 }
175
bench_bilinear(pl_shader sh,pl_shader_obj * state,pl_tex src)176 static void bench_bilinear(pl_shader sh, pl_shader_obj *state, pl_tex src)
177 {
178 pl_shader_sample_bilinear(sh, &(struct pl_sample_src) { .tex = src });
179 }
180
bench_bicubic(pl_shader sh,pl_shader_obj * state,pl_tex src)181 static void bench_bicubic(pl_shader sh, pl_shader_obj *state, pl_tex src)
182 {
183 pl_shader_sample_bicubic(sh, &(struct pl_sample_src) { .tex = src });
184 }
185
bench_dither_blue(pl_shader sh,pl_shader_obj * state,pl_tex src)186 static void bench_dither_blue(pl_shader sh, pl_shader_obj *state, pl_tex src)
187 {
188 struct pl_dither_params params = pl_dither_default_params;
189 params.method = PL_DITHER_BLUE_NOISE;
190
191 pl_shader_sample_direct(sh, &(struct pl_sample_src) { .tex = src });
192 pl_shader_dither(sh, 8, state, ¶ms);
193 }
194
bench_dither_white(pl_shader sh,pl_shader_obj * state,pl_tex src)195 static void bench_dither_white(pl_shader sh, pl_shader_obj *state, pl_tex src)
196 {
197 struct pl_dither_params params = pl_dither_default_params;
198 params.method = PL_DITHER_WHITE_NOISE;
199
200 pl_shader_sample_direct(sh, &(struct pl_sample_src) { .tex = src });
201 pl_shader_dither(sh, 8, state, ¶ms);
202 }
203
bench_dither_ordered_fix(pl_shader sh,pl_shader_obj * state,pl_tex src)204 static void bench_dither_ordered_fix(pl_shader sh, pl_shader_obj *state, pl_tex src)
205 {
206 struct pl_dither_params params = pl_dither_default_params;
207 params.method = PL_DITHER_ORDERED_FIXED;
208
209 pl_shader_sample_direct(sh, &(struct pl_sample_src) { .tex = src });
210 pl_shader_dither(sh, 8, state, ¶ms);
211 }
212
bench_polar(pl_shader sh,pl_shader_obj * state,pl_tex src)213 static void bench_polar(pl_shader sh, pl_shader_obj *state, pl_tex src)
214 {
215 struct pl_sample_filter_params params = {
216 .filter = pl_filter_ewa_lanczos,
217 .lut = state,
218 };
219
220 pl_shader_sample_polar(sh, &(struct pl_sample_src) { .tex = src }, ¶ms);
221 }
222
bench_polar_nocompute(pl_shader sh,pl_shader_obj * state,pl_tex src)223 static void bench_polar_nocompute(pl_shader sh, pl_shader_obj *state, pl_tex src)
224 {
225 struct pl_sample_filter_params params = {
226 .filter = pl_filter_ewa_lanczos,
227 .no_compute = true,
228 .lut = state,
229 };
230
231 pl_shader_sample_polar(sh, &(struct pl_sample_src) { .tex = src }, ¶ms);
232 }
233
234
bench_hdr_peak(pl_shader sh,pl_shader_obj * state,pl_tex src)235 static void bench_hdr_peak(pl_shader sh, pl_shader_obj *state, pl_tex src)
236 {
237 pl_shader_sample_direct(sh, &(struct pl_sample_src) { .tex = src });
238 pl_shader_detect_peak(sh, pl_color_space_hdr10, state, NULL);
239 }
240
bench_av1_grain(pl_shader sh,pl_shader_obj * state,pl_tex src)241 static void bench_av1_grain(pl_shader sh, pl_shader_obj *state, pl_tex src)
242 {
243 struct pl_av1_grain_params params = {
244 .data = av1_grain_data,
245 .tex = src,
246 .components = 3,
247 .component_mapping = {0, 1, 2},
248 .repr = &(struct pl_color_repr) {0},
249 };
250
251 params.data.grain_seed = rand();
252 pl_shader_av1_grain(sh, state, ¶ms);
253 }
254
bench_av1_grain_lap(pl_shader sh,pl_shader_obj * state,pl_tex src)255 static void bench_av1_grain_lap(pl_shader sh, pl_shader_obj *state, pl_tex src)
256 {
257 struct pl_av1_grain_params params = {
258 .data = av1_grain_data,
259 .tex = src,
260 .components = 3,
261 .component_mapping = {0, 1, 2},
262 .repr = &(struct pl_color_repr) {0},
263 };
264
265 params.data.overlap = true;
266 params.data.grain_seed = rand();
267 pl_shader_av1_grain(sh, state, ¶ms);
268 }
269
270 static float data[TEX_SIZE * TEX_SIZE * 4 + 8192];
271
bench_download(pl_gpu gpu,pl_tex tex)272 static void bench_download(pl_gpu gpu, pl_tex tex)
273 {
274 pl_tex_download(gpu, &(struct pl_tex_transfer_params) {
275 .tex = tex,
276 .ptr = (uint8_t *) PL_ALIGN((uintptr_t) data, 4096),
277 });
278 }
279
bench_upload(pl_gpu gpu,pl_tex tex)280 static void bench_upload(pl_gpu gpu, pl_tex tex)
281 {
282 pl_tex_upload(gpu, &(struct pl_tex_transfer_params) {
283 .tex = tex,
284 .ptr = (uint8_t *) PL_ALIGN((uintptr_t) data, 4096),
285 });
286 }
287
dummy_cb(void * arg)288 static void dummy_cb(void *arg) {}
289
bench_download_async(pl_gpu gpu,pl_tex tex)290 static void bench_download_async(pl_gpu gpu, pl_tex tex)
291 {
292 pl_tex_download(gpu, &(struct pl_tex_transfer_params) {
293 .tex = tex,
294 .ptr = (uint8_t *) PL_ALIGN((uintptr_t) data, 4096),
295 .callback = dummy_cb,
296 });
297 }
298
bench_upload_async(pl_gpu gpu,pl_tex tex)299 static void bench_upload_async(pl_gpu gpu, pl_tex tex)
300 {
301 pl_tex_upload(gpu, &(struct pl_tex_transfer_params) {
302 .tex = tex,
303 .ptr = (uint8_t *) PL_ALIGN((uintptr_t) data, 4096),
304 .callback = dummy_cb,
305 });
306 }
307
main()308 int main()
309 {
310 setbuf(stdout, NULL);
311 setbuf(stderr, NULL);
312
313 pl_log log = pl_log_create(PL_API_VER, &(struct pl_log_params) {
314 .log_cb = isatty(fileno(stdout)) ? pl_log_color : pl_log_simple,
315 .log_level = PL_LOG_WARN,
316 });
317
318 pl_vulkan vk = pl_vulkan_create(log, &(struct pl_vulkan_params) {
319 .allow_software = true,
320 .async_compute = true,
321 .queue_count = NUM_FBOS,
322 });
323
324 if (!vk)
325 return SKIP;
326
327 #define BENCH_SH(fn) &(struct bench) { .run_sh = fn }
328 #define BENCH_TEX(fn) &(struct bench) { .run_tex = fn }
329
330 printf("= Running benchmarks =\n");
331 benchmark(vk->gpu, "tex_download ptr", BENCH_TEX(bench_download));
332 benchmark(vk->gpu, "tex_download ptr async", BENCH_TEX(bench_download_async));
333 benchmark(vk->gpu, "tex_upload ptr", BENCH_TEX(bench_upload));
334 benchmark(vk->gpu, "tex_upload ptr async", BENCH_TEX(bench_upload_async));
335 benchmark(vk->gpu, "bilinear", BENCH_SH(bench_bilinear));
336 benchmark(vk->gpu, "bicubic", BENCH_SH(bench_bicubic));
337 benchmark(vk->gpu, "deband", BENCH_SH(bench_deband));
338 benchmark(vk->gpu, "deband_heavy", BENCH_SH(bench_deband_heavy));
339
340 // Polar sampling
341 benchmark(vk->gpu, "polar", BENCH_SH(bench_polar));
342 if (vk->gpu->glsl.compute)
343 benchmark(vk->gpu, "polar_nocompute", BENCH_SH(bench_polar_nocompute));
344
345 // Dithering algorithms
346 benchmark(vk->gpu, "dither_blue", BENCH_SH(bench_dither_blue));
347 benchmark(vk->gpu, "dither_white", BENCH_SH(bench_dither_white));
348 benchmark(vk->gpu, "dither_ordered_fixed", BENCH_SH(bench_dither_ordered_fix));
349
350 // HDR peak detection
351 if (vk->gpu->glsl.compute)
352 benchmark(vk->gpu, "hdr_peakdetect", BENCH_SH(bench_hdr_peak));
353
354 // Misc stuff
355 benchmark(vk->gpu, "av1_grain", BENCH_SH(bench_av1_grain));
356 benchmark(vk->gpu, "av1_grain_lap", BENCH_SH(bench_av1_grain_lap));
357
358 pl_vulkan_destroy(&vk);
359 pl_log_destroy(&log);
360 return 0;
361 }
362