1 /*
2 * This file is part of libplacebo.
3 *
4 * libplacebo is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * libplacebo is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include <stdio.h>
19 #include <math.h>
20
21 #include "common.h"
22 #include "log.h"
23 #include "shaders.h"
24
pl_shader_alloc(pl_log log,const struct pl_shader_params * params)25 pl_shader pl_shader_alloc(pl_log log, const struct pl_shader_params *params)
26 {
27 pl_shader sh = pl_alloc_ptr(NULL, sh);
28 *sh = (struct pl_shader) {
29 .log = log,
30 .mutable = true,
31 };
32
33 // Ensure there's always at least one `tmp` object
34 PL_ARRAY_APPEND(sh, sh->tmp, pl_ref_new(NULL));
35
36 if (params)
37 sh->res.params = *params;
38
39 return sh;
40 }
41
pl_shader_free(pl_shader * psh)42 void pl_shader_free(pl_shader *psh)
43 {
44 pl_shader sh = *psh;
45 if (!sh)
46 return;
47
48 for (int i = 0; i < sh->tmp.num; i++)
49 pl_ref_deref(&sh->tmp.elem[i]);
50
51 pl_free_ptr(psh);
52 }
53
pl_shader_reset(pl_shader sh,const struct pl_shader_params * params)54 void pl_shader_reset(pl_shader sh, const struct pl_shader_params *params)
55 {
56 for (int i = 0; i < sh->tmp.num; i++)
57 pl_ref_deref(&sh->tmp.elem[i]);
58
59 struct pl_shader new = {
60 .log = sh->log,
61 .mutable = true,
62
63 // Preserve array allocations
64 .tmp.elem = sh->tmp.elem,
65 .vas.elem = sh->vas.elem,
66 .vars.elem = sh->vars.elem,
67 .descs.elem = sh->descs.elem,
68 .consts.elem = sh->consts.elem,
69 .steps.elem = sh->steps.elem,
70 };
71
72 if (params)
73 new.res.params = *params;
74
75 // Preserve buffer allocations
76 for (int i = 0; i < PL_ARRAY_SIZE(new.buffers); i++)
77 new.buffers[i] = (pl_str) { .buf = sh->buffers[i].buf };
78
79 *sh = new;
80 PL_ARRAY_APPEND(sh, sh->tmp, pl_ref_new(NULL));
81 }
82
pl_shader_is_failed(const pl_shader sh)83 bool pl_shader_is_failed(const pl_shader sh)
84 {
85 return sh->failed;
86 }
87
sh_glsl(const pl_shader sh)88 struct pl_glsl_version sh_glsl(const pl_shader sh)
89 {
90 if (SH_PARAMS(sh).glsl.version)
91 return SH_PARAMS(sh).glsl;
92
93 if (SH_GPU(sh))
94 return SH_GPU(sh)->glsl;
95
96 return (struct pl_glsl_version) { .version = 130 };
97 }
98
sh_try_compute(pl_shader sh,int bw,int bh,bool flex,size_t mem)99 bool sh_try_compute(pl_shader sh, int bw, int bh, bool flex, size_t mem)
100 {
101 pl_assert(bw && bh);
102 int *sh_bw = &sh->res.compute_group_size[0];
103 int *sh_bh = &sh->res.compute_group_size[1];
104
105 struct pl_glsl_version glsl = sh_glsl(sh);
106 if (!glsl.compute) {
107 PL_TRACE(sh, "Disabling compute shader due to missing `compute` support");
108 return false;
109 }
110
111 if (sh->res.compute_shmem + mem > glsl.max_shmem_size) {
112 PL_TRACE(sh, "Disabling compute shader due to insufficient shmem");
113 return false;
114 }
115
116 if (bw > glsl.max_group_size[0] ||
117 bh > glsl.max_group_size[1] ||
118 (bw * bh) > glsl.max_group_threads)
119 {
120 if (!flex) {
121 PL_TRACE(sh, "Disabling compute shader due to exceeded group "
122 "thread count.");
123 return false;
124 } else {
125 // Pick better group sizes
126 bw = PL_MIN(bw, glsl.max_group_size[0]);
127 bh = glsl.max_group_threads / bw;
128 }
129 }
130
131 sh->res.compute_shmem += mem;
132
133 // If the current shader is either not a compute shader, or we have no
134 // choice but to override the metadata, always do so
135 if (!sh->is_compute || (sh->flexible_work_groups && !flex)) {
136 *sh_bw = bw;
137 *sh_bh = bh;
138 sh->is_compute = true;
139 return true;
140 }
141
142 // If both shaders are flexible, pick the larger of the two
143 if (sh->flexible_work_groups && flex) {
144 *sh_bw = PL_MAX(*sh_bw, bw);
145 *sh_bh = PL_MAX(*sh_bh, bh);
146 pl_assert(*sh_bw * *sh_bh <= glsl.max_group_threads);
147 return true;
148 }
149
150 // If the other shader is rigid but this is flexible, change nothing
151 if (flex)
152 return true;
153
154 // If neither are flexible, make sure the parameters match
155 pl_assert(!flex && !sh->flexible_work_groups);
156 if (bw != *sh_bw || bh != *sh_bh) {
157 PL_TRACE(sh, "Disabling compute shader due to incompatible group "
158 "sizes %dx%d and %dx%d", *sh_bw, *sh_bh, bw, bh);
159 sh->res.compute_shmem -= mem;
160 return false;
161 }
162
163 return true;
164 }
165
pl_shader_is_compute(const pl_shader sh)166 bool pl_shader_is_compute(const pl_shader sh)
167 {
168 return sh->is_compute;
169 }
170
pl_shader_output_size(const pl_shader sh,int * w,int * h)171 bool pl_shader_output_size(const pl_shader sh, int *w, int *h)
172 {
173 if (!sh->output_w || !sh->output_h)
174 return false;
175
176 *w = sh->output_w;
177 *h = sh->output_h;
178 return true;
179 }
180
sh_fresh(pl_shader sh,const char * name)181 ident_t sh_fresh(pl_shader sh, const char *name)
182 {
183 return pl_asprintf(SH_TMP(sh), "_%s_%d_%u", PL_DEF(name, "var"),
184 sh->fresh++, SH_PARAMS(sh).id);
185 }
186
sh_var(pl_shader sh,struct pl_shader_var sv)187 ident_t sh_var(pl_shader sh, struct pl_shader_var sv)
188 {
189 sv.var.name = sh_fresh(sh, sv.var.name);
190 sv.data = pl_memdup(SH_TMP(sh), sv.data, pl_var_host_layout(0, &sv.var).size);
191 PL_ARRAY_APPEND(sh, sh->vars, sv);
192 return (ident_t) sv.var.name;
193 }
194
sh_desc(pl_shader sh,struct pl_shader_desc sd)195 ident_t sh_desc(pl_shader sh, struct pl_shader_desc sd)
196 {
197 switch (sd.desc.type) {
198 case PL_DESC_BUF_UNIFORM:
199 case PL_DESC_BUF_STORAGE:
200 case PL_DESC_BUF_TEXEL_UNIFORM:
201 case PL_DESC_BUF_TEXEL_STORAGE:
202 // Skip re-attaching the same buffer desc twice
203 // FIXME: define aliases if the variable names differ
204 for (int i = 0; i < sh->descs.num; i++) {
205 if (sh->descs.elem[i].binding.object == sd.binding.object)
206 return (ident_t) sh->descs.elem[i].desc.name;
207 }
208
209 size_t bsize = sizeof(sd.buffer_vars[0]) * sd.num_buffer_vars;
210 sd.buffer_vars = pl_memdup(SH_TMP(sh), sd.buffer_vars, bsize);
211 break;
212
213 case PL_DESC_SAMPLED_TEX:
214 case PL_DESC_STORAGE_IMG:
215 pl_assert(!sd.num_buffer_vars);
216 break;
217
218 case PL_DESC_INVALID:
219 case PL_DESC_TYPE_COUNT:
220 pl_unreachable();
221 }
222
223 sd.desc.name = sh_fresh(sh, sd.desc.name);
224 PL_ARRAY_APPEND(sh, sh->descs, sd);
225 return (ident_t) sd.desc.name;
226 }
227
sh_const(pl_shader sh,struct pl_shader_const sc)228 ident_t sh_const(pl_shader sh, struct pl_shader_const sc)
229 {
230 if (sh->res.params.dynamic_constants && !sc.compile_time) {
231 return sh_var(sh, (struct pl_shader_var) {
232 .var = {
233 .name = sc.name,
234 .type = sc.type,
235 .dim_v = 1,
236 .dim_m = 1,
237 .dim_a = 1,
238 },
239 .data = sc.data,
240 });
241 }
242
243 sc.name = sh_fresh(sh, sc.name);
244
245 pl_gpu gpu = SH_GPU(sh);
246 if (gpu && gpu->limits.max_constants) {
247 sc.data = pl_memdup(SH_TMP(sh), sc.data, pl_var_type_size(sc.type));
248 PL_ARRAY_APPEND(sh, sh->consts, sc);
249 return (ident_t) sc.name;
250 }
251
252 // Fallback for GPUs without specialization constants
253 switch (sc.type) {
254 case PL_VAR_SINT:
255 GLSLH("const int %s = %d; \n", sc.name, *(int *) sc.data);
256 return (ident_t) sc.name;
257 case PL_VAR_UINT:
258 GLSLH("const uint %s = %uu; \n", sc.name, *(unsigned int *) sc.data);
259 return (ident_t) sc.name;
260 case PL_VAR_FLOAT:
261 GLSLH("const float %s = %f; \n", sc.name, *(float *) sc.data);
262 return (ident_t) sc.name;
263 case PL_VAR_INVALID:
264 case PL_VAR_TYPE_COUNT:
265 break;
266 }
267
268 pl_unreachable();
269 }
270
sh_const_int(pl_shader sh,const char * name,int val)271 ident_t sh_const_int(pl_shader sh, const char *name, int val)
272 {
273 return sh_const(sh, (struct pl_shader_const) {
274 .type = PL_VAR_SINT,
275 .name = name,
276 .data = &val,
277 });
278 }
279
sh_const_uint(pl_shader sh,const char * name,unsigned int val)280 ident_t sh_const_uint(pl_shader sh, const char *name, unsigned int val)
281 {
282 return sh_const(sh, (struct pl_shader_const) {
283 .type = PL_VAR_UINT,
284 .name = name,
285 .data = &val,
286 });
287 }
288
sh_const_float(pl_shader sh,const char * name,float val)289 ident_t sh_const_float(pl_shader sh, const char *name, float val)
290 {
291 return sh_const(sh, (struct pl_shader_const) {
292 .type = PL_VAR_FLOAT,
293 .name = name,
294 .data = &val,
295 });
296 }
297
298
sh_attr_vec2(pl_shader sh,const char * name,const struct pl_rect2df * rc)299 ident_t sh_attr_vec2(pl_shader sh, const char *name,
300 const struct pl_rect2df *rc)
301 {
302 pl_gpu gpu = SH_GPU(sh);
303 if (!gpu) {
304 SH_FAIL(sh, "Failed adding vertex attr '%s': No GPU available!", name);
305 return NULL;
306 }
307
308 pl_fmt fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2);
309 if (!fmt) {
310 SH_FAIL(sh, "Failed adding vertex attr '%s': no vertex fmt!", name);
311 return NULL;
312 }
313
314 float vals[4][2] = {
315 { rc->x0, rc->y0 },
316 { rc->x1, rc->y0 },
317 { rc->x0, rc->y1 },
318 { rc->x1, rc->y1 },
319 };
320
321 float *data = pl_memdup(SH_TMP(sh), &vals[0][0], sizeof(vals));
322 struct pl_shader_va va = {
323 .attr = {
324 .name = sh_fresh(sh, name),
325 .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2),
326 },
327 .data = { &data[0], &data[2], &data[4], &data[6] },
328 };
329
330 PL_ARRAY_APPEND(sh, sh->vas, va);
331 return (ident_t) va.attr.name;
332 }
333
sh_bind(pl_shader sh,pl_tex tex,enum pl_tex_address_mode address_mode,enum pl_tex_sample_mode sample_mode,const char * name,const struct pl_rect2df * rect,ident_t * out_pos,ident_t * out_size,ident_t * out_pt)334 ident_t sh_bind(pl_shader sh, pl_tex tex,
335 enum pl_tex_address_mode address_mode,
336 enum pl_tex_sample_mode sample_mode,
337 const char *name, const struct pl_rect2df *rect,
338 ident_t *out_pos, ident_t *out_size, ident_t *out_pt)
339 {
340 if (pl_tex_params_dimension(tex->params) != 2 || !tex->params.sampleable) {
341 SH_FAIL(sh, "Failed binding texture '%s': incompatible params!", name);
342 return NULL;
343 }
344
345 ident_t itex = sh_desc(sh, (struct pl_shader_desc) {
346 .desc = {
347 .name = name,
348 .type = PL_DESC_SAMPLED_TEX,
349 },
350 .binding = {
351 .object = tex,
352 .address_mode = address_mode,
353 .sample_mode = sample_mode,
354 },
355 });
356
357 float sx, sy;
358 if (tex->sampler_type == PL_SAMPLER_RECT) {
359 sx = 1.0;
360 sy = 1.0;
361 } else {
362 sx = 1.0 / tex->params.w;
363 sy = 1.0 / tex->params.h;
364 }
365
366 if (out_pos) {
367 struct pl_rect2df full = {
368 .x1 = tex->params.w,
369 .y1 = tex->params.h,
370 };
371
372 rect = PL_DEF(rect, &full);
373 *out_pos = sh_attr_vec2(sh, "tex_coord", &(struct pl_rect2df) {
374 .x0 = sx * rect->x0, .y0 = sy * rect->y0,
375 .x1 = sx * rect->x1, .y1 = sy * rect->y1,
376 });
377 }
378
379 if (out_size) {
380 *out_size = sh_var(sh, (struct pl_shader_var) {
381 .var = pl_var_vec2("tex_size"),
382 .data = &(float[2]) {tex->params.w, tex->params.h},
383 });
384 }
385
386 if (out_pt) {
387 *out_pt = sh_var(sh, (struct pl_shader_var) {
388 .var = pl_var_vec2("tex_pt"),
389 .data = &(float[2]) {sx, sy},
390 });
391 }
392
393 return itex;
394 }
395
sh_buf_desc_append(void * alloc,pl_gpu gpu,struct pl_shader_desc * buf_desc,struct pl_var_layout * out_layout,const struct pl_var new_var)396 bool sh_buf_desc_append(void *alloc, pl_gpu gpu,
397 struct pl_shader_desc *buf_desc,
398 struct pl_var_layout *out_layout,
399 const struct pl_var new_var)
400 {
401 struct pl_buffer_var bv = { .var = new_var };
402 size_t cur_size = sh_buf_desc_size(buf_desc);
403
404 switch (buf_desc->desc.type) {
405 case PL_DESC_BUF_UNIFORM:
406 bv.layout = pl_std140_layout(cur_size, &new_var);
407 if (bv.layout.offset + bv.layout.size > gpu->limits.max_ubo_size)
408 return false;
409 break;
410 case PL_DESC_BUF_STORAGE:
411 bv.layout = pl_std430_layout(cur_size, &new_var);
412 if (bv.layout.offset + bv.layout.size > gpu->limits.max_ssbo_size)
413 return false;
414 break;
415 case PL_DESC_INVALID:
416 case PL_DESC_SAMPLED_TEX:
417 case PL_DESC_STORAGE_IMG:
418 case PL_DESC_BUF_TEXEL_UNIFORM:
419 case PL_DESC_BUF_TEXEL_STORAGE:
420 case PL_DESC_TYPE_COUNT:
421 pl_unreachable();
422 }
423
424 if (out_layout)
425 *out_layout = bv.layout;
426 PL_ARRAY_APPEND_RAW(alloc, buf_desc->buffer_vars, buf_desc->num_buffer_vars, bv);
427 return true;
428 }
429
sh_buf_desc_size(const struct pl_shader_desc * buf_desc)430 size_t sh_buf_desc_size(const struct pl_shader_desc *buf_desc)
431 {
432 if (!buf_desc->num_buffer_vars)
433 return 0;
434
435 const struct pl_buffer_var *last;
436 last = &buf_desc->buffer_vars[buf_desc->num_buffer_vars - 1];
437 return last->layout.offset + last->layout.size;
438 }
439
sh_append(pl_shader sh,enum pl_shader_buf buf,const char * fmt,...)440 void sh_append(pl_shader sh, enum pl_shader_buf buf, const char *fmt, ...)
441 {
442 pl_assert(buf >= 0 && buf < SH_BUF_COUNT);
443
444 va_list ap;
445 va_start(ap, fmt);
446 pl_str_append_vasprintf_c(sh, &sh->buffers[buf], fmt, ap);
447 va_end(ap);
448 }
449
sh_append_str(pl_shader sh,enum pl_shader_buf buf,pl_str str)450 void sh_append_str(pl_shader sh, enum pl_shader_buf buf, pl_str str)
451 {
452 pl_assert(buf >= 0 && buf < SH_BUF_COUNT);
453 pl_str_append(sh, &sh->buffers[buf], str);
454 }
455
456 static const char *insigs[] = {
457 [PL_SHADER_SIG_NONE] = "",
458 [PL_SHADER_SIG_COLOR] = "vec4 color",
459 };
460
461 static const char *outsigs[] = {
462 [PL_SHADER_SIG_NONE] = "void",
463 [PL_SHADER_SIG_COLOR] = "vec4",
464 };
465
466 static const char *retvals[] = {
467 [PL_SHADER_SIG_NONE] = "",
468 [PL_SHADER_SIG_COLOR] = "return color;",
469 };
470
471 // libplacebo currently only allows 2D samplers for shader signatures
472 static const char *samplers2D[] = {
473 [PL_SAMPLER_NORMAL] = "sampler2D",
474 [PL_SAMPLER_RECT] = "sampler2DRect",
475 [PL_SAMPLER_EXTERNAL] = "samplerExternalOES",
476 };
477
sh_subpass(pl_shader sh,const pl_shader sub)478 ident_t sh_subpass(pl_shader sh, const pl_shader sub)
479 {
480 pl_assert(sh->mutable);
481
482 if (SH_PARAMS(sh).id == SH_PARAMS(sub).id) {
483 PL_TRACE(sh, "Can't merge shaders: conflicting identifiers!");
484 return NULL;
485 }
486
487 // Check for shader compatibility
488 int res_w = PL_DEF(sh->output_w, sub->output_w),
489 res_h = PL_DEF(sh->output_h, sub->output_h);
490
491 if ((sub->output_w && res_w != sub->output_w) ||
492 (sub->output_h && res_h != sub->output_h))
493 {
494 PL_TRACE(sh, "Can't merge shaders: incompatible sizes: %dx%d and %dx%d",
495 sh->output_w, sh->output_h, sub->output_w, sub->output_h);
496 return NULL;
497 }
498
499 if (sub->is_compute) {
500 int subw = sub->res.compute_group_size[0],
501 subh = sub->res.compute_group_size[1];
502 bool flex = sub->flexible_work_groups;
503
504 if (!sh_try_compute(sh, subw, subh, flex, sub->res.compute_shmem)) {
505 PL_TRACE(sh, "Can't merge shaders: incompatible block sizes or "
506 "exceeded shared memory resource capabilities");
507 return NULL;
508 }
509 }
510
511 sh->output_w = res_w;
512 sh->output_h = res_h;
513
514 // Append the prelude and header
515 pl_str_append(sh, &sh->buffers[SH_BUF_PRELUDE], sub->buffers[SH_BUF_PRELUDE]);
516 pl_str_append(sh, &sh->buffers[SH_BUF_HEADER], sub->buffers[SH_BUF_HEADER]);
517
518 // Append the body as a new header function
519 ident_t name = sh_fresh(sh, "sub");
520 if (sub->res.input == PL_SHADER_SIG_SAMPLER) {
521 pl_assert(sub->sampler_prefix);
522 GLSLH("%s %s(%c%s src_tex, vec2 tex_coord) {\n",
523 outsigs[sub->res.output], name,
524 sub->sampler_prefix, samplers2D[sub->sampler_type]);
525 } else {
526 GLSLH("%s %s(%s) {\n", outsigs[sub->res.output], name, insigs[sub->res.input]);
527 }
528 pl_str_append(sh, &sh->buffers[SH_BUF_HEADER], sub->buffers[SH_BUF_BODY]);
529 GLSLH("%s\n}\n\n", retvals[sub->res.output]);
530
531 // Copy over all of the descriptors etc.
532 for (int i = 0; i < sub->tmp.num; i++)
533 PL_ARRAY_APPEND(sh, sh->tmp, pl_ref_dup(sub->tmp.elem[i]));
534 PL_ARRAY_CONCAT(sh, sh->vas, sub->vas);
535 PL_ARRAY_CONCAT(sh, sh->vars, sub->vars);
536 PL_ARRAY_CONCAT(sh, sh->descs, sub->descs);
537 PL_ARRAY_CONCAT(sh, sh->consts, sub->consts);
538 PL_ARRAY_CONCAT(sh, sh->steps, sub->steps);
539
540 return name;
541 }
542
543 // Finish the current shader body and return its function name
sh_split(pl_shader sh)544 static ident_t sh_split(pl_shader sh)
545 {
546 pl_assert(sh->mutable);
547
548 // Concatenate the body onto the head as a new function
549 ident_t name = sh_fresh(sh, "main");
550 if (sh->res.input == PL_SHADER_SIG_SAMPLER) {
551 pl_assert(sh->sampler_prefix);
552 GLSLH("%s %s(%c%s src_tex, vec2 tex_coord) {\n",
553 outsigs[sh->res.output], name,
554 sh->sampler_prefix, samplers2D[sh->sampler_type]);
555 } else {
556 GLSLH("%s %s(%s) {\n", outsigs[sh->res.output], name, insigs[sh->res.input]);
557 }
558
559 if (sh->buffers[SH_BUF_BODY].len) {
560 pl_str_append(sh, &sh->buffers[SH_BUF_HEADER], sh->buffers[SH_BUF_BODY]);
561 sh->buffers[SH_BUF_BODY].len = 0;
562 sh->buffers[SH_BUF_BODY].buf[0] = '\0'; // for sanity / efficiency
563 }
564
565 if (sh->buffers[SH_BUF_FOOTER].len) {
566 pl_str_append(sh, &sh->buffers[SH_BUF_HEADER], sh->buffers[SH_BUF_FOOTER]);
567 sh->buffers[SH_BUF_FOOTER].len = 0;
568 sh->buffers[SH_BUF_FOOTER].buf[0] = '\0';
569 }
570
571 GLSLH("%s\n}\n\n", retvals[sh->res.output]);
572 return name;
573 }
574
pl_shader_finalize(pl_shader sh)575 const struct pl_shader_res *pl_shader_finalize(pl_shader sh)
576 {
577 if (sh->failed)
578 return NULL;
579
580 if (!sh->mutable)
581 return &sh->res;
582
583 // Split the shader. This finalizes the body and adds it to the header
584 sh->res.name = sh_split(sh);
585
586 // Padding for readability
587 GLSLP("\n");
588
589 // Concatenate the header onto the prelude to form the final output
590 pl_str *glsl = &sh->buffers[SH_BUF_PRELUDE];
591 pl_str_append(sh, glsl, sh->buffers[SH_BUF_HEADER]);
592
593 // Generate the pretty description
594 sh->res.description = "(unknown shader)";
595 if (sh->steps.num) {
596 // Reuse this buffer
597 pl_str *desc = &sh->buffers[SH_BUF_BODY];
598 desc->len = 0;
599
600 for (int i = 0; i < sh->steps.num; i++) {
601 const char *step = sh->steps.elem[i];
602 if (!step)
603 continue;
604
605 // Group together duplicates. We're okay using a weak equality
606 // check here because all pass descriptions are static strings.
607 int count = 1;
608 for (int j = i+1; j < sh->steps.num; j++) {
609 if (sh->steps.elem[j] == step) {
610 sh->steps.elem[j] = NULL;
611 count++;
612 }
613 }
614
615 if (i > 0)
616 pl_str_append(sh, desc, pl_str0(", "));
617 pl_str_append(sh, desc, pl_str0(step));
618 if (count > 1)
619 pl_str_append_asprintf(sh, desc, " x%d", count);
620 }
621
622 sh->res.description = desc->buf;
623 }
624
625 // Set the vas/vars/descs
626 sh->res.vertex_attribs = sh->vas.elem;
627 sh->res.num_vertex_attribs = sh->vas.num;
628 sh->res.variables = sh->vars.elem;
629 sh->res.num_variables = sh->vars.num;
630 sh->res.descriptors = sh->descs.elem;
631 sh->res.num_descriptors = sh->descs.num;
632 sh->res.constants = sh->consts.elem;
633 sh->res.num_constants = sh->consts.num;
634 sh->res.steps = sh->steps.elem;
635 sh->res.num_steps = sh->steps.num;
636
637 // Update the result pointer and return
638 sh->res.glsl = glsl->buf;
639 sh->mutable = false;
640 return &sh->res;
641 }
642
sh_require(pl_shader sh,enum pl_shader_sig insig,int w,int h)643 bool sh_require(pl_shader sh, enum pl_shader_sig insig, int w, int h)
644 {
645 if (sh->failed) {
646 SH_FAIL(sh, "Attempting to modify a failed shader!");
647 return false;
648 }
649
650 if (!sh->mutable) {
651 SH_FAIL(sh, "Attempted to modify an immutable shader!");
652 return false;
653 }
654
655 if ((w && sh->output_w && sh->output_w != w) ||
656 (h && sh->output_h && sh->output_h != h))
657 {
658 SH_FAIL(sh, "Illegal sequence of shader operations: Incompatible "
659 "output size requirements %dx%d and %dx%d",
660 sh->output_w, sh->output_h, w, h);
661 return false;
662 }
663
664 static const char *names[] = {
665 [PL_SHADER_SIG_NONE] = "PL_SHADER_SIG_NONE",
666 [PL_SHADER_SIG_COLOR] = "PL_SHADER_SIG_COLOR",
667 };
668
669 // If we require an input, but there is none available - just get it from
670 // the user by turning it into an explicit input signature.
671 if (!sh->res.output && insig) {
672 pl_assert(!sh->res.input);
673 sh->res.input = insig;
674 } else if (sh->res.output != insig) {
675 SH_FAIL(sh, "Illegal sequence of shader operations! Current output "
676 "signature is '%s', but called operation expects '%s'!",
677 names[sh->res.output], names[insig]);
678 return false;
679 }
680
681 // All of our shaders end up returning a vec4 color
682 sh->res.output = PL_SHADER_SIG_COLOR;
683 sh->output_w = PL_DEF(sh->output_w, w);
684 sh->output_h = PL_DEF(sh->output_h, h);
685 return true;
686 }
687
pl_shader_obj_destroy(pl_shader_obj * ptr)688 void pl_shader_obj_destroy(pl_shader_obj *ptr)
689 {
690 pl_shader_obj obj = *ptr;
691 if (!obj)
692 return;
693
694 if (obj->uninit)
695 obj->uninit(obj->gpu, obj->priv);
696
697 *ptr = NULL;
698 pl_free(obj);
699 }
700
sh_require_obj(pl_shader sh,pl_shader_obj * ptr,enum pl_shader_obj_type type,size_t priv_size,void (* uninit)(pl_gpu gpu,void * priv))701 void *sh_require_obj(pl_shader sh, pl_shader_obj *ptr,
702 enum pl_shader_obj_type type, size_t priv_size,
703 void (*uninit)(pl_gpu gpu, void *priv))
704 {
705 if (!ptr)
706 return NULL;
707
708 pl_shader_obj obj = *ptr;
709 if (obj && obj->gpu != SH_GPU(sh)) {
710 SH_FAIL(sh, "Passed pl_shader_obj belongs to different GPU!");
711 return NULL;
712 }
713
714 if (obj && obj->type != type) {
715 SH_FAIL(sh, "Passed pl_shader_obj of wrong type! Shader objects must "
716 "always be used with the same type of shader.");
717 return NULL;
718 }
719
720 if (!obj) {
721 obj = pl_zalloc_ptr(NULL, obj);
722 obj->gpu = SH_GPU(sh);
723 obj->type = type;
724 obj->priv = pl_zalloc(obj, priv_size);
725 obj->uninit = uninit;
726 }
727
728 *ptr = obj;
729 return obj->priv;
730 }
731
sh_prng(pl_shader sh,bool temporal,ident_t * p_state)732 ident_t sh_prng(pl_shader sh, bool temporal, ident_t *p_state)
733 {
734 // Initialize the PRNG. This is friendly for wide usage and returns in
735 // a very pleasant-looking distribution across frames even if the difference
736 // between input coordinates is very small. This is based on BlumBlumShub,
737 // with some modifications for speed / aesthetics.
738 // cf. https://briansharpe.wordpress.com/2011/10/01/gpu-texture-free-noise/
739 ident_t randfun = sh_fresh(sh, "random"), permute = sh_fresh(sh, "permute");
740 GLSLH("float %s(float x) { \n"
741 " x = (34.0 * x + 1.0) * x; \n"
742 " return fract(x * 1.0/289.0) * 289.0; \n" // (almost) mod 289
743 "} \n"
744 "float %s(inout float state) { \n"
745 " state = %s(state); \n"
746 " return fract(state * 1.0/41.0); \n"
747 "}\n", permute, randfun, permute);
748
749 // Phi is the most irrational number, so it's a good candidate for
750 // generating seed values to the PRNG
751 static const double phi = 1.618033988749895;
752
753 const char *seed = "0.0";
754 if (temporal) {
755 float seedval = modff(phi * SH_PARAMS(sh).index, &(float){0});
756 seed = sh_var(sh, (struct pl_shader_var) {
757 .var = pl_var_float("seed"),
758 .data = &seedval,
759 .dynamic = true,
760 });
761 }
762
763 ident_t state = sh_fresh(sh, "prng");
764 GLSL("vec2 %s_init = fract(gl_FragCoord.xy * vec2(%f)); \n"
765 "vec3 %s_m = vec3(%s_init, %s) + vec3(1.0); \n"
766 "float %s = %s(%s(%s(%s_m.x) + %s_m.y) + %s_m.z); \n",
767 state, phi,
768 state, state, seed,
769 state, permute, permute, permute, state, state, state);
770
771 if (p_state)
772 *p_state = state;
773
774 ident_t res = sh_fresh(sh, "RAND");
775 GLSLH("#define %s (%s(%s))\n", res, randfun, state);
776 return res;
777 }
778
779 // Defines a LUT position helper macro. This translates from an absolute texel
780 // scale (0.0 - 1.0) to the texture coordinate scale for the corresponding
781 // sample in a texture of dimension `lut_size`.
sh_lut_pos(pl_shader sh,int lut_size)782 static ident_t sh_lut_pos(pl_shader sh, int lut_size)
783 {
784 ident_t name = sh_fresh(sh, "LUT_POS");
785 GLSLH("#define %s(x) mix(%s, %s, (x)) \n",
786 name, SH_FLOAT(0.5 / lut_size), SH_FLOAT(1.0 - 0.5 / lut_size));
787 return name;
788 }
789
790 struct sh_lut_obj {
791 enum sh_lut_method method;
792 enum pl_var_type type;
793 bool linear;
794 int width, height, depth, comps;
795 uint64_t signature;
796
797 // weights, depending on the method
798 pl_tex tex;
799 pl_str str;
800 void *data;
801 };
802
sh_lut_uninit(pl_gpu gpu,void * ptr)803 static void sh_lut_uninit(pl_gpu gpu, void *ptr)
804 {
805 struct sh_lut_obj *lut = ptr;
806 pl_tex_destroy(gpu, &lut->tex);
807 pl_free(lut->str.buf);
808 pl_free(lut->data);
809
810 *lut = (struct sh_lut_obj) {0};
811 }
812
813 // Maximum number of floats to embed as a literal array (when using SH_LUT_AUTO)
814 #define SH_LUT_MAX_LITERAL 256
815
sh_lut(pl_shader sh,const struct sh_lut_params * params)816 ident_t sh_lut(pl_shader sh, const struct sh_lut_params *params)
817 {
818 pl_gpu gpu = SH_GPU(sh);
819 void *tmp = NULL;
820 ident_t ret = NULL;
821
822 pl_assert(params->width > 0 && params->height >= 0 && params->depth >= 0);
823 pl_assert(params->comps > 0);
824 pl_assert(params->type);
825 pl_assert(!params->linear || params->type == PL_VAR_FLOAT);
826
827 int sizes[] = { params->width, params->height, params->depth };
828 int size = params->width * PL_DEF(params->height, 1) * PL_DEF(params->depth, 1);
829 int dims = params->depth ? 3 : params->height ? 2 : 1;
830
831 int texdim = 0;
832 uint32_t max_tex_dim[] = {
833 gpu ? gpu->limits.max_tex_1d_dim : 0,
834 gpu ? gpu->limits.max_tex_2d_dim : 0,
835 gpu ? gpu->limits.max_tex_3d_dim : 0,
836 };
837
838 // Try picking the right number of dimensions for the texture LUT. This
839 // allows e.g. falling back to 2D textures if 1D textures are unsupported.
840 for (int d = dims; d <= PL_ARRAY_SIZE(max_tex_dim); d++) {
841 // For a given dimension to be compatible, all coordinates need to be
842 // within the maximum texture size for that dimension
843 for (int i = 0; i < d; i++) {
844 if (sizes[i] > max_tex_dim[d - 1])
845 goto next_dim;
846 }
847
848 // All dimensions are compatible, so pick this texture dimension
849 texdim = d;
850 break;
851
852 next_dim: ; // `continue` out of the inner loop
853 }
854
855 static const enum pl_fmt_type fmt_type[PL_VAR_TYPE_COUNT] = {
856 [PL_VAR_SINT] = PL_FMT_SINT,
857 [PL_VAR_UINT] = PL_FMT_UINT,
858 [PL_VAR_FLOAT] = PL_FMT_FLOAT,
859 };
860
861 enum pl_fmt_caps texcaps = PL_FMT_CAP_SAMPLEABLE;
862 if (params->linear)
863 texcaps |= PL_FMT_CAP_LINEAR;
864
865 pl_fmt texfmt = NULL;
866 if (texdim) {
867 texfmt = pl_find_fmt(gpu, fmt_type[params->type], params->comps,
868 params->type == PL_VAR_FLOAT ? 16 : 32,
869 pl_var_type_size(params->type) * 8,
870 texcaps);
871 }
872
873 struct sh_lut_obj *lut = SH_OBJ(sh, params->object, PL_SHADER_OBJ_LUT,
874 struct sh_lut_obj, sh_lut_uninit);
875
876 if (!lut) {
877 SH_FAIL(sh, "Failed initializing LUT object!");
878 goto error;
879 }
880
881 enum sh_lut_method method = params->method;
882
883 // The linear sampling code currently only supports 1D linear interpolation
884 if (params->linear && dims > 1) {
885 if (texfmt) {
886 method = SH_LUT_TEXTURE;
887 } else {
888 SH_FAIL(sh, "Can't emulate linear LUTs for 2D/3D LUTs and no "
889 "texture support available!");
890 goto error;
891 }
892 }
893
894 // Older GLSL forbids literal array constructors
895 bool can_literal = sh_glsl(sh).version > 110;
896
897 // Pick the best method
898 if (!method && size <= SH_LUT_MAX_LITERAL && !params->dynamic && can_literal)
899 method = SH_LUT_LITERAL; // use literals for small constant LUTs
900
901 if (!method && texfmt)
902 method = SH_LUT_TEXTURE; // use textures if a texfmt exists
903
904 // Use an input variable as a last fallback
905 if (!method)
906 method = SH_LUT_UNIFORM;
907
908 // Forcibly reinitialize the existing LUT if needed
909 bool update = params->update || lut->signature != params->signature;
910 if (method != lut->method || params->type != lut->type ||
911 params->linear != lut->linear || params->width != lut->width ||
912 params->height != lut->height || params->depth != lut->depth ||
913 params->comps != lut->comps)
914 {
915 PL_DEBUG(sh, "LUT cache invalidated, regenerating..");
916 update = true;
917 }
918
919 if (update) {
920 size_t buf_size = size * params->comps * pl_var_type_size(params->type);
921 tmp = pl_zalloc(NULL, buf_size);
922 params->fill(tmp, params);
923
924 switch (method) {
925 case SH_LUT_TEXTURE: {
926 if (!texdim) {
927 SH_FAIL(sh, "Texture LUT exceeds texture dimensions!");
928 goto error;
929 }
930
931 if (!texfmt) {
932 SH_FAIL(sh, "Found no compatible texture format for LUT!");
933 goto error;
934 }
935
936 struct pl_tex_params tex_params = {
937 .w = params->width,
938 .h = PL_DEF(params->height, texdim >= 2 ? 1 : 0),
939 .d = PL_DEF(params->depth, texdim >= 3 ? 1 : 0),
940 .format = texfmt,
941 .sampleable = true,
942 .host_writable = params->dynamic,
943 .initial_data = params->dynamic ? NULL : tmp,
944 };
945
946 bool ok;
947 if (params->dynamic) {
948 ok = pl_tex_recreate(gpu, &lut->tex, &tex_params);
949 if (ok) {
950 ok = pl_tex_upload(gpu, &(struct pl_tex_transfer_params) {
951 .tex = lut->tex,
952 .ptr = tmp,
953 });
954 }
955 } else {
956 // Can't use pl_tex_recreate because of `initial_data`
957 pl_tex_destroy(gpu, &lut->tex);
958 lut->tex = pl_tex_create(gpu, &tex_params);
959 ok = lut->tex;
960 }
961
962 if (!ok) {
963 SH_FAIL(sh, "Failed creating LUT texture!");
964 goto error;
965 }
966 break;
967 }
968
969 case SH_LUT_UNIFORM:
970 pl_free(lut->data);
971 lut->data = tmp; // re-use `tmp`
972 tmp = NULL;
973 break;
974
975 case SH_LUT_LITERAL: {
976 lut->str.len = 0;
977 static const char prefix[PL_VAR_TYPE_COUNT] = {
978 [PL_VAR_SINT] = 'i',
979 [PL_VAR_UINT] = 'u',
980 [PL_VAR_FLOAT] = ' ',
981 };
982
983 for (int i = 0; i < size * params->comps; i += params->comps) {
984 if (i > 0)
985 pl_str_append_asprintf_c(lut, &lut->str, ",");
986 if (params->comps > 1) {
987 pl_str_append_asprintf_c(lut, &lut->str, "%cvec%d(",
988 prefix[params->type], params->comps);
989 }
990 for (int c = 0; c < params->comps; c++) {
991 switch (params->type) {
992 case PL_VAR_FLOAT:
993 pl_str_append_asprintf_c(lut, &lut->str, "%s%f",
994 c > 0 ? "," : "",
995 ((float *) tmp)[i+c]);
996 break;
997 case PL_VAR_UINT:
998 pl_str_append_asprintf_c(lut, &lut->str, "%s%u",
999 c > 0 ? "," : "",
1000 ((unsigned int *) tmp)[i+c]);
1001 break;
1002 case PL_VAR_SINT:
1003 pl_str_append_asprintf_c(lut, &lut->str, "%s%d",
1004 c > 0 ? "," : "",
1005 ((int *) tmp)[i+c]);
1006 break;
1007 case PL_VAR_INVALID:
1008 case PL_VAR_TYPE_COUNT:
1009 pl_unreachable();
1010 }
1011 }
1012 if (params->comps > 1)
1013 pl_str_append_asprintf_c(lut, &lut->str, ")");
1014 }
1015 break;
1016 }
1017
1018 case SH_LUT_AUTO:
1019 pl_unreachable();
1020 }
1021
1022 lut->method = method;
1023 lut->type = params->type;
1024 lut->linear = params->linear;
1025 lut->width = params->width;
1026 lut->height = params->height;
1027 lut->depth = params->depth;
1028 lut->comps = params->comps;
1029 }
1030
1031 // Done updating, generate the GLSL
1032 ident_t name = sh_fresh(sh, "lut");
1033 ident_t arr_name = NULL;
1034
1035 static const char * const swizzles[] = {"x", "xy", "xyz", "xyzw"};
1036 static const char * const vartypes[PL_VAR_TYPE_COUNT][4] = {
1037 [PL_VAR_SINT] = { "int", "ivec2", "ivec3", "ivec4" },
1038 [PL_VAR_UINT] = { "uint", "uvec2", "uvec3", "uvec4" },
1039 [PL_VAR_FLOAT] = { "float", "vec2", "vec3", "vec4" },
1040 };
1041
1042 switch (method) {
1043 case SH_LUT_TEXTURE: {
1044 assert(texdim);
1045 ident_t tex = sh_desc(sh, (struct pl_shader_desc) {
1046 .desc = {
1047 .name = "weights",
1048 .type = PL_DESC_SAMPLED_TEX,
1049 },
1050 .binding = {
1051 .object = lut->tex,
1052 .sample_mode = params->linear ? PL_TEX_SAMPLE_LINEAR
1053 : PL_TEX_SAMPLE_NEAREST,
1054 }
1055 });
1056
1057 // texelFetch requires GLSL >= 130, so fall back to the linear code
1058 if (params->linear || gpu->glsl.version < 130) {
1059 ident_t pos_macros[PL_ARRAY_SIZE(sizes)] = {0};
1060 for (int i = 0; i < dims; i++)
1061 pos_macros[i] = sh_lut_pos(sh, sizes[i]);
1062
1063 GLSLH("#define %s(pos) (%s(%s, %s(\\\n",
1064 name, sh_tex_fn(sh, lut->tex->params),
1065 tex, vartypes[PL_VAR_FLOAT][texdim - 1]);
1066
1067 for (int i = 0; i < texdim; i++) {
1068 char sep = i == 0 ? ' ' : ',';
1069 if (pos_macros[i]) {
1070 if (dims > 1) {
1071 GLSLH(" %c%s(%s(pos).%c)\\\n", sep, pos_macros[i],
1072 vartypes[PL_VAR_FLOAT][dims - 1], "xyzw"[i]);
1073 } else {
1074 GLSLH(" %c%s(float(pos))\\\n", sep, pos_macros[i]);
1075 }
1076 } else {
1077 GLSLH(" %c%f\\\n", sep, 0.5);
1078 }
1079 }
1080 GLSLH(" )).%s)\n", swizzles[params->comps - 1]);
1081 } else {
1082 GLSLH("#define %s(pos) (texelFetch(%s, %s(pos",
1083 name, tex, vartypes[PL_VAR_SINT][texdim - 1]);
1084
1085 // Fill up extra components of the index
1086 for (int i = dims; i < texdim; i++)
1087 GLSLH(", 0");
1088
1089 GLSLH("), 0).%s)\n", swizzles[params->comps - 1]);
1090 }
1091
1092 break;
1093 }
1094
1095 case SH_LUT_UNIFORM:
1096 arr_name = sh_var(sh, (struct pl_shader_var) {
1097 .var = {
1098 .name = "weights",
1099 .type = params->type,
1100 .dim_v = params->comps,
1101 .dim_m = 1,
1102 .dim_a = size,
1103 },
1104 .data = lut->data,
1105 });
1106 break;
1107
1108 case SH_LUT_LITERAL:
1109 arr_name = sh_fresh(sh, "weights");
1110 GLSLH("const %s %s[%d] = %s[](\n ",
1111 vartypes[params->type][params->comps - 1], arr_name, size,
1112 vartypes[params->type][params->comps - 1]);
1113 pl_str_append(sh, &sh->buffers[SH_BUF_HEADER], lut->str);
1114 GLSLH(");\n");
1115 break;
1116
1117 case SH_LUT_AUTO:
1118 pl_unreachable();
1119 }
1120
1121 if (arr_name) {
1122 GLSLH("#define %s(pos) (%s[int((pos)%s)\\\n",
1123 name, arr_name, dims > 1 ? "[0]" : "");
1124 int shift = params->width;
1125 for (int i = 1; i < dims; i++) {
1126 GLSLH(" + %d * int((pos)[%d])\\\n", shift, i);
1127 shift *= sizes[i];
1128 }
1129 GLSLH(" ])\n");
1130
1131 if (params->linear) {
1132 pl_assert(dims == 1);
1133 pl_assert(params->type == PL_VAR_FLOAT);
1134 ident_t arr_lut = name;
1135 name = sh_fresh(sh, "lut_lin");
1136 GLSLH("%s %s(float fpos) { \n"
1137 " fpos = clamp(fpos, 0.0, 1.0) * %d.0; \n"
1138 " float fbase = floor(fpos); \n"
1139 " float fceil = ceil(fpos); \n"
1140 " float fcoord = fpos - fbase; \n"
1141 " return mix(%s(fbase), %s(fceil), fcoord); \n"
1142 "} \n",
1143 vartypes[PL_VAR_FLOAT][params->comps - 1], name,
1144 size - 1,
1145 arr_lut, arr_lut);
1146 }
1147 }
1148
1149 pl_assert(name);
1150 ret = name;
1151 // fall through
1152 error:
1153 pl_free(tmp);
1154 return ret;
1155 }
1156
sh_bvec(const pl_shader sh,int dims)1157 const char *sh_bvec(const pl_shader sh, int dims)
1158 {
1159 static const char *bvecs[] = {
1160 [1] = "bool",
1161 [2] = "bvec2",
1162 [3] = "bvec3",
1163 [4] = "bvec4",
1164 };
1165
1166 static const char *vecs[] = {
1167 [1] = "float",
1168 [2] = "vec2",
1169 [3] = "vec3",
1170 [4] = "vec4",
1171 };
1172
1173 pl_assert(dims > 0 && dims < PL_ARRAY_SIZE(bvecs));
1174 return sh_glsl(sh).version >= 130 ? bvecs[dims] : vecs[dims];
1175 }
1176