1 /*
2  * This file is part of libplacebo.
3  *
4  * libplacebo is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * libplacebo is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include <stdio.h>
19 #include <math.h>
20 
21 #include "common.h"
22 #include "log.h"
23 #include "shaders.h"
24 
pl_shader_alloc(pl_log log,const struct pl_shader_params * params)25 pl_shader pl_shader_alloc(pl_log log, const struct pl_shader_params *params)
26 {
27     pl_shader sh = pl_alloc_ptr(NULL, sh);
28     *sh = (struct pl_shader) {
29         .log = log,
30         .mutable = true,
31     };
32 
33     // Ensure there's always at least one `tmp` object
34     PL_ARRAY_APPEND(sh, sh->tmp, pl_ref_new(NULL));
35 
36     if (params)
37         sh->res.params = *params;
38 
39     return sh;
40 }
41 
pl_shader_free(pl_shader * psh)42 void pl_shader_free(pl_shader *psh)
43 {
44     pl_shader sh = *psh;
45     if (!sh)
46         return;
47 
48     for (int i = 0; i < sh->tmp.num; i++)
49         pl_ref_deref(&sh->tmp.elem[i]);
50 
51     pl_free_ptr(psh);
52 }
53 
pl_shader_reset(pl_shader sh,const struct pl_shader_params * params)54 void pl_shader_reset(pl_shader sh, const struct pl_shader_params *params)
55 {
56     for (int i = 0; i < sh->tmp.num; i++)
57         pl_ref_deref(&sh->tmp.elem[i]);
58 
59     struct pl_shader new = {
60         .log = sh->log,
61         .mutable = true,
62 
63         // Preserve array allocations
64         .tmp.elem       = sh->tmp.elem,
65         .vas.elem       = sh->vas.elem,
66         .vars.elem      = sh->vars.elem,
67         .descs.elem     = sh->descs.elem,
68         .consts.elem    = sh->consts.elem,
69         .steps.elem     = sh->steps.elem,
70     };
71 
72     if (params)
73         new.res.params = *params;
74 
75     // Preserve buffer allocations
76     for (int i = 0; i < PL_ARRAY_SIZE(new.buffers); i++)
77         new.buffers[i] = (pl_str) { .buf = sh->buffers[i].buf };
78 
79     *sh = new;
80     PL_ARRAY_APPEND(sh, sh->tmp, pl_ref_new(NULL));
81 }
82 
pl_shader_is_failed(const pl_shader sh)83 bool pl_shader_is_failed(const pl_shader sh)
84 {
85     return sh->failed;
86 }
87 
sh_glsl(const pl_shader sh)88 struct pl_glsl_version sh_glsl(const pl_shader sh)
89 {
90     if (SH_PARAMS(sh).glsl.version)
91         return SH_PARAMS(sh).glsl;
92 
93     if (SH_GPU(sh))
94         return SH_GPU(sh)->glsl;
95 
96     return (struct pl_glsl_version) { .version = 130 };
97 }
98 
sh_try_compute(pl_shader sh,int bw,int bh,bool flex,size_t mem)99 bool sh_try_compute(pl_shader sh, int bw, int bh, bool flex, size_t mem)
100 {
101     pl_assert(bw && bh);
102     int *sh_bw = &sh->res.compute_group_size[0];
103     int *sh_bh = &sh->res.compute_group_size[1];
104 
105     struct pl_glsl_version glsl = sh_glsl(sh);
106     if (!glsl.compute) {
107         PL_TRACE(sh, "Disabling compute shader due to missing `compute` support");
108         return false;
109     }
110 
111     if (sh->res.compute_shmem + mem > glsl.max_shmem_size) {
112         PL_TRACE(sh, "Disabling compute shader due to insufficient shmem");
113         return false;
114     }
115 
116     if (bw > glsl.max_group_size[0] ||
117         bh > glsl.max_group_size[1] ||
118         (bw * bh) > glsl.max_group_threads)
119     {
120         if (!flex) {
121             PL_TRACE(sh, "Disabling compute shader due to exceeded group "
122                      "thread count.");
123             return false;
124         } else {
125             // Pick better group sizes
126             bw = PL_MIN(bw, glsl.max_group_size[0]);
127             bh = glsl.max_group_threads / bw;
128         }
129     }
130 
131     sh->res.compute_shmem += mem;
132 
133     // If the current shader is either not a compute shader, or we have no
134     // choice but to override the metadata, always do so
135     if (!sh->is_compute || (sh->flexible_work_groups && !flex)) {
136         *sh_bw = bw;
137         *sh_bh = bh;
138         sh->is_compute = true;
139         return true;
140     }
141 
142     // If both shaders are flexible, pick the larger of the two
143     if (sh->flexible_work_groups && flex) {
144         *sh_bw = PL_MAX(*sh_bw, bw);
145         *sh_bh = PL_MAX(*sh_bh, bh);
146         pl_assert(*sh_bw * *sh_bh <= glsl.max_group_threads);
147         return true;
148     }
149 
150     // If the other shader is rigid but this is flexible, change nothing
151     if (flex)
152         return true;
153 
154     // If neither are flexible, make sure the parameters match
155     pl_assert(!flex && !sh->flexible_work_groups);
156     if (bw != *sh_bw || bh != *sh_bh) {
157         PL_TRACE(sh, "Disabling compute shader due to incompatible group "
158                  "sizes %dx%d and %dx%d", *sh_bw, *sh_bh, bw, bh);
159         sh->res.compute_shmem -= mem;
160         return false;
161     }
162 
163     return true;
164 }
165 
pl_shader_is_compute(const pl_shader sh)166 bool pl_shader_is_compute(const pl_shader sh)
167 {
168     return sh->is_compute;
169 }
170 
pl_shader_output_size(const pl_shader sh,int * w,int * h)171 bool pl_shader_output_size(const pl_shader sh, int *w, int *h)
172 {
173     if (!sh->output_w || !sh->output_h)
174         return false;
175 
176     *w = sh->output_w;
177     *h = sh->output_h;
178     return true;
179 }
180 
sh_fresh(pl_shader sh,const char * name)181 ident_t sh_fresh(pl_shader sh, const char *name)
182 {
183     return pl_asprintf(SH_TMP(sh), "_%s_%d_%u", PL_DEF(name, "var"),
184                        sh->fresh++, SH_PARAMS(sh).id);
185 }
186 
sh_var(pl_shader sh,struct pl_shader_var sv)187 ident_t sh_var(pl_shader sh, struct pl_shader_var sv)
188 {
189     sv.var.name = sh_fresh(sh, sv.var.name);
190     sv.data = pl_memdup(SH_TMP(sh), sv.data, pl_var_host_layout(0, &sv.var).size);
191     PL_ARRAY_APPEND(sh, sh->vars, sv);
192     return (ident_t) sv.var.name;
193 }
194 
sh_desc(pl_shader sh,struct pl_shader_desc sd)195 ident_t sh_desc(pl_shader sh, struct pl_shader_desc sd)
196 {
197     switch (sd.desc.type) {
198     case PL_DESC_BUF_UNIFORM:
199     case PL_DESC_BUF_STORAGE:
200     case PL_DESC_BUF_TEXEL_UNIFORM:
201     case PL_DESC_BUF_TEXEL_STORAGE:
202         // Skip re-attaching the same buffer desc twice
203         // FIXME: define aliases if the variable names differ
204         for (int i = 0; i < sh->descs.num; i++) {
205             if (sh->descs.elem[i].binding.object == sd.binding.object)
206                 return (ident_t) sh->descs.elem[i].desc.name;
207         }
208 
209         size_t bsize = sizeof(sd.buffer_vars[0]) * sd.num_buffer_vars;
210         sd.buffer_vars = pl_memdup(SH_TMP(sh), sd.buffer_vars, bsize);
211         break;
212 
213     case PL_DESC_SAMPLED_TEX:
214     case PL_DESC_STORAGE_IMG:
215         pl_assert(!sd.num_buffer_vars);
216         break;
217 
218     case PL_DESC_INVALID:
219     case PL_DESC_TYPE_COUNT:
220         pl_unreachable();
221     }
222 
223     sd.desc.name = sh_fresh(sh, sd.desc.name);
224     PL_ARRAY_APPEND(sh, sh->descs, sd);
225     return (ident_t) sd.desc.name;
226 }
227 
sh_const(pl_shader sh,struct pl_shader_const sc)228 ident_t sh_const(pl_shader sh, struct pl_shader_const sc)
229 {
230     if (sh->res.params.dynamic_constants && !sc.compile_time) {
231         return sh_var(sh, (struct pl_shader_var) {
232             .var = {
233                 .name = sc.name,
234                 .type = sc.type,
235                 .dim_v = 1,
236                 .dim_m = 1,
237                 .dim_a = 1,
238             },
239             .data = sc.data,
240         });
241     }
242 
243     sc.name = sh_fresh(sh, sc.name);
244 
245     pl_gpu gpu = SH_GPU(sh);
246     if (gpu && gpu->limits.max_constants) {
247         sc.data = pl_memdup(SH_TMP(sh), sc.data, pl_var_type_size(sc.type));
248         PL_ARRAY_APPEND(sh, sh->consts, sc);
249         return (ident_t) sc.name;
250     }
251 
252     // Fallback for GPUs without specialization constants
253     switch (sc.type) {
254     case PL_VAR_SINT:
255         GLSLH("const int %s = %d; \n", sc.name, *(int *) sc.data);
256         return (ident_t) sc.name;
257     case PL_VAR_UINT:
258         GLSLH("const uint %s = %uu; \n", sc.name, *(unsigned int *) sc.data);
259         return (ident_t) sc.name;
260     case PL_VAR_FLOAT:
261         GLSLH("const float %s = %f; \n", sc.name, *(float *) sc.data);
262         return (ident_t) sc.name;
263     case PL_VAR_INVALID:
264     case PL_VAR_TYPE_COUNT:
265         break;
266     }
267 
268     pl_unreachable();
269 }
270 
sh_const_int(pl_shader sh,const char * name,int val)271 ident_t sh_const_int(pl_shader sh, const char *name, int val)
272 {
273     return sh_const(sh, (struct pl_shader_const) {
274         .type = PL_VAR_SINT,
275         .name = name,
276         .data = &val,
277     });
278 }
279 
sh_const_uint(pl_shader sh,const char * name,unsigned int val)280 ident_t sh_const_uint(pl_shader sh, const char *name, unsigned int val)
281 {
282     return sh_const(sh, (struct pl_shader_const) {
283         .type = PL_VAR_UINT,
284         .name = name,
285         .data = &val,
286     });
287 }
288 
sh_const_float(pl_shader sh,const char * name,float val)289 ident_t sh_const_float(pl_shader sh, const char *name, float val)
290 {
291     return sh_const(sh, (struct pl_shader_const) {
292         .type = PL_VAR_FLOAT,
293         .name = name,
294         .data = &val,
295     });
296 }
297 
298 
sh_attr_vec2(pl_shader sh,const char * name,const struct pl_rect2df * rc)299 ident_t sh_attr_vec2(pl_shader sh, const char *name,
300                      const struct pl_rect2df *rc)
301 {
302     pl_gpu gpu = SH_GPU(sh);
303     if (!gpu) {
304         SH_FAIL(sh, "Failed adding vertex attr '%s': No GPU available!", name);
305         return NULL;
306     }
307 
308     pl_fmt fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2);
309     if (!fmt) {
310         SH_FAIL(sh, "Failed adding vertex attr '%s': no vertex fmt!", name);
311         return NULL;
312     }
313 
314     float vals[4][2] = {
315         { rc->x0, rc->y0 },
316         { rc->x1, rc->y0 },
317         { rc->x0, rc->y1 },
318         { rc->x1, rc->y1 },
319     };
320 
321     float *data = pl_memdup(SH_TMP(sh), &vals[0][0], sizeof(vals));
322     struct pl_shader_va va = {
323         .attr = {
324             .name     = sh_fresh(sh, name),
325             .fmt      = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2),
326         },
327         .data = { &data[0], &data[2], &data[4], &data[6] },
328     };
329 
330     PL_ARRAY_APPEND(sh, sh->vas, va);
331     return (ident_t) va.attr.name;
332 }
333 
sh_bind(pl_shader sh,pl_tex tex,enum pl_tex_address_mode address_mode,enum pl_tex_sample_mode sample_mode,const char * name,const struct pl_rect2df * rect,ident_t * out_pos,ident_t * out_size,ident_t * out_pt)334 ident_t sh_bind(pl_shader sh, pl_tex tex,
335                 enum pl_tex_address_mode address_mode,
336                 enum pl_tex_sample_mode sample_mode,
337                 const char *name, const struct pl_rect2df *rect,
338                 ident_t *out_pos, ident_t *out_size, ident_t *out_pt)
339 {
340     if (pl_tex_params_dimension(tex->params) != 2 || !tex->params.sampleable) {
341         SH_FAIL(sh, "Failed binding texture '%s': incompatible params!", name);
342         return NULL;
343     }
344 
345     ident_t itex = sh_desc(sh, (struct pl_shader_desc) {
346         .desc = {
347             .name = name,
348             .type = PL_DESC_SAMPLED_TEX,
349         },
350         .binding = {
351             .object = tex,
352             .address_mode = address_mode,
353             .sample_mode = sample_mode,
354         },
355     });
356 
357     float sx, sy;
358     if (tex->sampler_type == PL_SAMPLER_RECT) {
359         sx = 1.0;
360         sy = 1.0;
361     } else {
362         sx = 1.0 / tex->params.w;
363         sy = 1.0 / tex->params.h;
364     }
365 
366     if (out_pos) {
367         struct pl_rect2df full = {
368             .x1 = tex->params.w,
369             .y1 = tex->params.h,
370         };
371 
372         rect = PL_DEF(rect, &full);
373         *out_pos = sh_attr_vec2(sh, "tex_coord", &(struct pl_rect2df) {
374             .x0 = sx * rect->x0, .y0 = sy * rect->y0,
375             .x1 = sx * rect->x1, .y1 = sy * rect->y1,
376         });
377     }
378 
379     if (out_size) {
380         *out_size = sh_var(sh, (struct pl_shader_var) {
381             .var  = pl_var_vec2("tex_size"),
382             .data = &(float[2]) {tex->params.w, tex->params.h},
383         });
384     }
385 
386     if (out_pt) {
387         *out_pt = sh_var(sh, (struct pl_shader_var) {
388             .var  = pl_var_vec2("tex_pt"),
389             .data = &(float[2]) {sx, sy},
390         });
391     }
392 
393     return itex;
394 }
395 
sh_buf_desc_append(void * alloc,pl_gpu gpu,struct pl_shader_desc * buf_desc,struct pl_var_layout * out_layout,const struct pl_var new_var)396 bool sh_buf_desc_append(void *alloc, pl_gpu gpu,
397                         struct pl_shader_desc *buf_desc,
398                         struct pl_var_layout *out_layout,
399                         const struct pl_var new_var)
400 {
401     struct pl_buffer_var bv = { .var = new_var };
402     size_t cur_size = sh_buf_desc_size(buf_desc);
403 
404     switch (buf_desc->desc.type) {
405     case PL_DESC_BUF_UNIFORM:
406         bv.layout = pl_std140_layout(cur_size, &new_var);
407         if (bv.layout.offset + bv.layout.size > gpu->limits.max_ubo_size)
408             return false;
409         break;
410     case PL_DESC_BUF_STORAGE:
411         bv.layout = pl_std430_layout(cur_size, &new_var);
412         if (bv.layout.offset + bv.layout.size > gpu->limits.max_ssbo_size)
413             return false;
414         break;
415     case PL_DESC_INVALID:
416     case PL_DESC_SAMPLED_TEX:
417     case PL_DESC_STORAGE_IMG:
418     case PL_DESC_BUF_TEXEL_UNIFORM:
419     case PL_DESC_BUF_TEXEL_STORAGE:
420     case PL_DESC_TYPE_COUNT:
421         pl_unreachable();
422     }
423 
424     if (out_layout)
425         *out_layout = bv.layout;
426     PL_ARRAY_APPEND_RAW(alloc, buf_desc->buffer_vars, buf_desc->num_buffer_vars, bv);
427     return true;
428 }
429 
sh_buf_desc_size(const struct pl_shader_desc * buf_desc)430 size_t sh_buf_desc_size(const struct pl_shader_desc *buf_desc)
431 {
432     if (!buf_desc->num_buffer_vars)
433         return 0;
434 
435     const struct pl_buffer_var *last;
436     last = &buf_desc->buffer_vars[buf_desc->num_buffer_vars - 1];
437     return last->layout.offset + last->layout.size;
438 }
439 
sh_append(pl_shader sh,enum pl_shader_buf buf,const char * fmt,...)440 void sh_append(pl_shader sh, enum pl_shader_buf buf, const char *fmt, ...)
441 {
442     pl_assert(buf >= 0 && buf < SH_BUF_COUNT);
443 
444     va_list ap;
445     va_start(ap, fmt);
446     pl_str_append_vasprintf_c(sh, &sh->buffers[buf], fmt, ap);
447     va_end(ap);
448 }
449 
sh_append_str(pl_shader sh,enum pl_shader_buf buf,pl_str str)450 void sh_append_str(pl_shader sh, enum pl_shader_buf buf, pl_str str)
451 {
452     pl_assert(buf >= 0 && buf < SH_BUF_COUNT);
453     pl_str_append(sh, &sh->buffers[buf], str);
454 }
455 
456 static const char *insigs[] = {
457     [PL_SHADER_SIG_NONE]  = "",
458     [PL_SHADER_SIG_COLOR] = "vec4 color",
459 };
460 
461 static const char *outsigs[] = {
462     [PL_SHADER_SIG_NONE]  = "void",
463     [PL_SHADER_SIG_COLOR] = "vec4",
464 };
465 
466 static const char *retvals[] = {
467     [PL_SHADER_SIG_NONE]  = "",
468     [PL_SHADER_SIG_COLOR] = "return color;",
469 };
470 
471 // libplacebo currently only allows 2D samplers for shader signatures
472 static const char *samplers2D[] = {
473     [PL_SAMPLER_NORMAL]     = "sampler2D",
474     [PL_SAMPLER_RECT]       = "sampler2DRect",
475     [PL_SAMPLER_EXTERNAL]   = "samplerExternalOES",
476 };
477 
sh_subpass(pl_shader sh,const pl_shader sub)478 ident_t sh_subpass(pl_shader sh, const pl_shader sub)
479 {
480     pl_assert(sh->mutable);
481 
482     if (SH_PARAMS(sh).id == SH_PARAMS(sub).id) {
483         PL_TRACE(sh, "Can't merge shaders: conflicting identifiers!");
484         return NULL;
485     }
486 
487     // Check for shader compatibility
488     int res_w = PL_DEF(sh->output_w, sub->output_w),
489         res_h = PL_DEF(sh->output_h, sub->output_h);
490 
491     if ((sub->output_w && res_w != sub->output_w) ||
492         (sub->output_h && res_h != sub->output_h))
493     {
494         PL_TRACE(sh, "Can't merge shaders: incompatible sizes: %dx%d and %dx%d",
495                  sh->output_w, sh->output_h, sub->output_w, sub->output_h);
496         return NULL;
497     }
498 
499     if (sub->is_compute) {
500         int subw = sub->res.compute_group_size[0],
501             subh = sub->res.compute_group_size[1];
502         bool flex = sub->flexible_work_groups;
503 
504         if (!sh_try_compute(sh, subw, subh, flex, sub->res.compute_shmem)) {
505             PL_TRACE(sh, "Can't merge shaders: incompatible block sizes or "
506                      "exceeded shared memory resource capabilities");
507             return NULL;
508         }
509     }
510 
511     sh->output_w = res_w;
512     sh->output_h = res_h;
513 
514     // Append the prelude and header
515     pl_str_append(sh, &sh->buffers[SH_BUF_PRELUDE], sub->buffers[SH_BUF_PRELUDE]);
516     pl_str_append(sh, &sh->buffers[SH_BUF_HEADER],  sub->buffers[SH_BUF_HEADER]);
517 
518     // Append the body as a new header function
519     ident_t name = sh_fresh(sh, "sub");
520     if (sub->res.input == PL_SHADER_SIG_SAMPLER) {
521         pl_assert(sub->sampler_prefix);
522         GLSLH("%s %s(%c%s src_tex, vec2 tex_coord) {\n",
523               outsigs[sub->res.output], name,
524               sub->sampler_prefix, samplers2D[sub->sampler_type]);
525     } else {
526         GLSLH("%s %s(%s) {\n", outsigs[sub->res.output], name, insigs[sub->res.input]);
527     }
528     pl_str_append(sh, &sh->buffers[SH_BUF_HEADER], sub->buffers[SH_BUF_BODY]);
529     GLSLH("%s\n}\n\n", retvals[sub->res.output]);
530 
531     // Copy over all of the descriptors etc.
532     for (int i = 0; i < sub->tmp.num; i++)
533         PL_ARRAY_APPEND(sh, sh->tmp, pl_ref_dup(sub->tmp.elem[i]));
534     PL_ARRAY_CONCAT(sh, sh->vas, sub->vas);
535     PL_ARRAY_CONCAT(sh, sh->vars, sub->vars);
536     PL_ARRAY_CONCAT(sh, sh->descs, sub->descs);
537     PL_ARRAY_CONCAT(sh, sh->consts, sub->consts);
538     PL_ARRAY_CONCAT(sh, sh->steps, sub->steps);
539 
540     return name;
541 }
542 
543 // Finish the current shader body and return its function name
sh_split(pl_shader sh)544 static ident_t sh_split(pl_shader sh)
545 {
546     pl_assert(sh->mutable);
547 
548     // Concatenate the body onto the head as a new function
549     ident_t name = sh_fresh(sh, "main");
550     if (sh->res.input == PL_SHADER_SIG_SAMPLER) {
551         pl_assert(sh->sampler_prefix);
552         GLSLH("%s %s(%c%s src_tex, vec2 tex_coord) {\n",
553               outsigs[sh->res.output], name,
554               sh->sampler_prefix, samplers2D[sh->sampler_type]);
555     } else {
556         GLSLH("%s %s(%s) {\n", outsigs[sh->res.output], name, insigs[sh->res.input]);
557     }
558 
559     if (sh->buffers[SH_BUF_BODY].len) {
560         pl_str_append(sh, &sh->buffers[SH_BUF_HEADER], sh->buffers[SH_BUF_BODY]);
561         sh->buffers[SH_BUF_BODY].len = 0;
562         sh->buffers[SH_BUF_BODY].buf[0] = '\0'; // for sanity / efficiency
563     }
564 
565     if (sh->buffers[SH_BUF_FOOTER].len) {
566         pl_str_append(sh, &sh->buffers[SH_BUF_HEADER], sh->buffers[SH_BUF_FOOTER]);
567         sh->buffers[SH_BUF_FOOTER].len = 0;
568         sh->buffers[SH_BUF_FOOTER].buf[0] = '\0';
569     }
570 
571     GLSLH("%s\n}\n\n", retvals[sh->res.output]);
572     return name;
573 }
574 
pl_shader_finalize(pl_shader sh)575 const struct pl_shader_res *pl_shader_finalize(pl_shader sh)
576 {
577     if (sh->failed)
578         return NULL;
579 
580     if (!sh->mutable)
581         return &sh->res;
582 
583     // Split the shader. This finalizes the body and adds it to the header
584     sh->res.name = sh_split(sh);
585 
586     // Padding for readability
587     GLSLP("\n");
588 
589     // Concatenate the header onto the prelude to form the final output
590     pl_str *glsl = &sh->buffers[SH_BUF_PRELUDE];
591     pl_str_append(sh, glsl, sh->buffers[SH_BUF_HEADER]);
592 
593     // Generate the pretty description
594     sh->res.description = "(unknown shader)";
595     if (sh->steps.num) {
596         // Reuse this buffer
597         pl_str *desc = &sh->buffers[SH_BUF_BODY];
598         desc->len = 0;
599 
600         for (int i = 0; i < sh->steps.num; i++) {
601             const char *step = sh->steps.elem[i];
602             if (!step)
603                 continue;
604 
605             // Group together duplicates. We're okay using a weak equality
606             // check here because all pass descriptions are static strings.
607             int count = 1;
608             for (int j = i+1; j < sh->steps.num; j++) {
609                 if (sh->steps.elem[j] == step) {
610                     sh->steps.elem[j] = NULL;
611                     count++;
612                 }
613             }
614 
615             if (i > 0)
616                 pl_str_append(sh, desc, pl_str0(", "));
617             pl_str_append(sh, desc, pl_str0(step));
618             if (count > 1)
619                 pl_str_append_asprintf(sh, desc, " x%d", count);
620         }
621 
622         sh->res.description = desc->buf;
623     }
624 
625     // Set the vas/vars/descs
626     sh->res.vertex_attribs = sh->vas.elem;
627     sh->res.num_vertex_attribs = sh->vas.num;
628     sh->res.variables = sh->vars.elem;
629     sh->res.num_variables = sh->vars.num;
630     sh->res.descriptors = sh->descs.elem;
631     sh->res.num_descriptors = sh->descs.num;
632     sh->res.constants = sh->consts.elem;
633     sh->res.num_constants = sh->consts.num;
634     sh->res.steps = sh->steps.elem;
635     sh->res.num_steps = sh->steps.num;
636 
637     // Update the result pointer and return
638     sh->res.glsl = glsl->buf;
639     sh->mutable = false;
640     return &sh->res;
641 }
642 
sh_require(pl_shader sh,enum pl_shader_sig insig,int w,int h)643 bool sh_require(pl_shader sh, enum pl_shader_sig insig, int w, int h)
644 {
645     if (sh->failed) {
646         SH_FAIL(sh, "Attempting to modify a failed shader!");
647         return false;
648     }
649 
650     if (!sh->mutable) {
651         SH_FAIL(sh, "Attempted to modify an immutable shader!");
652         return false;
653     }
654 
655     if ((w && sh->output_w && sh->output_w != w) ||
656         (h && sh->output_h && sh->output_h != h))
657     {
658         SH_FAIL(sh, "Illegal sequence of shader operations: Incompatible "
659                 "output size requirements %dx%d and %dx%d",
660                 sh->output_w, sh->output_h, w, h);
661         return false;
662     }
663 
664     static const char *names[] = {
665         [PL_SHADER_SIG_NONE]  = "PL_SHADER_SIG_NONE",
666         [PL_SHADER_SIG_COLOR] = "PL_SHADER_SIG_COLOR",
667     };
668 
669     // If we require an input, but there is none available - just get it from
670     // the user by turning it into an explicit input signature.
671     if (!sh->res.output && insig) {
672         pl_assert(!sh->res.input);
673         sh->res.input = insig;
674     } else if (sh->res.output != insig) {
675         SH_FAIL(sh, "Illegal sequence of shader operations! Current output "
676                 "signature is '%s', but called operation expects '%s'!",
677                 names[sh->res.output], names[insig]);
678         return false;
679     }
680 
681     // All of our shaders end up returning a vec4 color
682     sh->res.output = PL_SHADER_SIG_COLOR;
683     sh->output_w = PL_DEF(sh->output_w, w);
684     sh->output_h = PL_DEF(sh->output_h, h);
685     return true;
686 }
687 
pl_shader_obj_destroy(pl_shader_obj * ptr)688 void pl_shader_obj_destroy(pl_shader_obj *ptr)
689 {
690     pl_shader_obj obj = *ptr;
691     if (!obj)
692         return;
693 
694     if (obj->uninit)
695         obj->uninit(obj->gpu, obj->priv);
696 
697     *ptr = NULL;
698     pl_free(obj);
699 }
700 
sh_require_obj(pl_shader sh,pl_shader_obj * ptr,enum pl_shader_obj_type type,size_t priv_size,void (* uninit)(pl_gpu gpu,void * priv))701 void *sh_require_obj(pl_shader sh, pl_shader_obj *ptr,
702                      enum pl_shader_obj_type type, size_t priv_size,
703                      void (*uninit)(pl_gpu gpu, void *priv))
704 {
705     if (!ptr)
706         return NULL;
707 
708     pl_shader_obj obj = *ptr;
709     if (obj && obj->gpu != SH_GPU(sh)) {
710         SH_FAIL(sh, "Passed pl_shader_obj belongs to different GPU!");
711         return NULL;
712     }
713 
714     if (obj && obj->type != type) {
715         SH_FAIL(sh, "Passed pl_shader_obj of wrong type! Shader objects must "
716                 "always be used with the same type of shader.");
717         return NULL;
718     }
719 
720     if (!obj) {
721         obj = pl_zalloc_ptr(NULL, obj);
722         obj->gpu = SH_GPU(sh);
723         obj->type = type;
724         obj->priv = pl_zalloc(obj, priv_size);
725         obj->uninit = uninit;
726     }
727 
728     *ptr = obj;
729     return obj->priv;
730 }
731 
sh_prng(pl_shader sh,bool temporal,ident_t * p_state)732 ident_t sh_prng(pl_shader sh, bool temporal, ident_t *p_state)
733 {
734     // Initialize the PRNG. This is friendly for wide usage and returns in
735     // a very pleasant-looking distribution across frames even if the difference
736     // between input coordinates is very small. This is based on BlumBlumShub,
737     // with some modifications for speed / aesthetics.
738     // cf. https://briansharpe.wordpress.com/2011/10/01/gpu-texture-free-noise/
739     ident_t randfun = sh_fresh(sh, "random"), permute = sh_fresh(sh, "permute");
740     GLSLH("float %s(float x) {                      \n"
741           "    x = (34.0 * x + 1.0) * x;            \n"
742           "    return fract(x * 1.0/289.0) * 289.0; \n" // (almost) mod 289
743           "}                                        \n"
744           "float %s(inout float state) {            \n"
745           "    state = %s(state);                   \n"
746           "    return fract(state * 1.0/41.0);      \n"
747           "}\n", permute, randfun, permute);
748 
749     // Phi is the most irrational number, so it's a good candidate for
750     // generating seed values to the PRNG
751     static const double phi = 1.618033988749895;
752 
753     const char *seed = "0.0";
754     if (temporal) {
755         float seedval = modff(phi * SH_PARAMS(sh).index, &(float){0});
756         seed = sh_var(sh, (struct pl_shader_var) {
757             .var  = pl_var_float("seed"),
758             .data = &seedval,
759             .dynamic = true,
760         });
761     }
762 
763     ident_t state = sh_fresh(sh, "prng");
764     GLSL("vec2 %s_init = fract(gl_FragCoord.xy * vec2(%f)); \n"
765          "vec3 %s_m = vec3(%s_init, %s) + vec3(1.0);        \n"
766          "float %s = %s(%s(%s(%s_m.x) + %s_m.y) + %s_m.z);  \n",
767          state, phi,
768          state, state, seed,
769          state, permute, permute, permute, state, state, state);
770 
771     if (p_state)
772         *p_state = state;
773 
774     ident_t res = sh_fresh(sh, "RAND");
775     GLSLH("#define %s (%s(%s))\n", res, randfun, state);
776     return res;
777 }
778 
779 // Defines a LUT position helper macro. This translates from an absolute texel
780 // scale (0.0 - 1.0) to the texture coordinate scale for the corresponding
781 // sample in a texture of dimension `lut_size`.
sh_lut_pos(pl_shader sh,int lut_size)782 static ident_t sh_lut_pos(pl_shader sh, int lut_size)
783 {
784     ident_t name = sh_fresh(sh, "LUT_POS");
785     GLSLH("#define %s(x) mix(%s, %s, (x)) \n",
786           name, SH_FLOAT(0.5 / lut_size), SH_FLOAT(1.0 - 0.5 / lut_size));
787     return name;
788 }
789 
790 struct sh_lut_obj {
791     enum sh_lut_method method;
792     enum pl_var_type type;
793     bool linear;
794     int width, height, depth, comps;
795     uint64_t signature;
796 
797     // weights, depending on the method
798     pl_tex tex;
799     pl_str str;
800     void *data;
801 };
802 
sh_lut_uninit(pl_gpu gpu,void * ptr)803 static void sh_lut_uninit(pl_gpu gpu, void *ptr)
804 {
805     struct sh_lut_obj *lut = ptr;
806     pl_tex_destroy(gpu, &lut->tex);
807     pl_free(lut->str.buf);
808     pl_free(lut->data);
809 
810     *lut = (struct sh_lut_obj) {0};
811 }
812 
813 // Maximum number of floats to embed as a literal array (when using SH_LUT_AUTO)
814 #define SH_LUT_MAX_LITERAL 256
815 
sh_lut(pl_shader sh,const struct sh_lut_params * params)816 ident_t sh_lut(pl_shader sh, const struct sh_lut_params *params)
817 {
818     pl_gpu gpu = SH_GPU(sh);
819     void *tmp = NULL;
820     ident_t ret = NULL;
821 
822     pl_assert(params->width > 0 && params->height >= 0 && params->depth >= 0);
823     pl_assert(params->comps > 0);
824     pl_assert(params->type);
825     pl_assert(!params->linear || params->type == PL_VAR_FLOAT);
826 
827     int sizes[] = { params->width, params->height, params->depth };
828     int size = params->width * PL_DEF(params->height, 1) * PL_DEF(params->depth, 1);
829     int dims = params->depth ? 3 : params->height ? 2 : 1;
830 
831     int texdim = 0;
832     uint32_t max_tex_dim[] = {
833         gpu ? gpu->limits.max_tex_1d_dim : 0,
834         gpu ? gpu->limits.max_tex_2d_dim : 0,
835         gpu ? gpu->limits.max_tex_3d_dim : 0,
836     };
837 
838     // Try picking the right number of dimensions for the texture LUT. This
839     // allows e.g. falling back to 2D textures if 1D textures are unsupported.
840     for (int d = dims; d <= PL_ARRAY_SIZE(max_tex_dim); d++) {
841         // For a given dimension to be compatible, all coordinates need to be
842         // within the maximum texture size for that dimension
843         for (int i = 0; i < d; i++) {
844             if (sizes[i] > max_tex_dim[d - 1])
845                 goto next_dim;
846         }
847 
848         // All dimensions are compatible, so pick this texture dimension
849         texdim = d;
850         break;
851 
852 next_dim: ; // `continue` out of the inner loop
853     }
854 
855     static const enum pl_fmt_type fmt_type[PL_VAR_TYPE_COUNT] = {
856         [PL_VAR_SINT]   = PL_FMT_SINT,
857         [PL_VAR_UINT]   = PL_FMT_UINT,
858         [PL_VAR_FLOAT]  = PL_FMT_FLOAT,
859     };
860 
861     enum pl_fmt_caps texcaps = PL_FMT_CAP_SAMPLEABLE;
862     if (params->linear)
863         texcaps |= PL_FMT_CAP_LINEAR;
864 
865     pl_fmt texfmt = NULL;
866     if (texdim) {
867         texfmt = pl_find_fmt(gpu, fmt_type[params->type], params->comps,
868                              params->type == PL_VAR_FLOAT ? 16 : 32,
869                              pl_var_type_size(params->type) * 8,
870                              texcaps);
871     }
872 
873     struct sh_lut_obj *lut = SH_OBJ(sh, params->object, PL_SHADER_OBJ_LUT,
874                                     struct sh_lut_obj, sh_lut_uninit);
875 
876     if (!lut) {
877         SH_FAIL(sh, "Failed initializing LUT object!");
878         goto error;
879     }
880 
881     enum sh_lut_method method = params->method;
882 
883     // The linear sampling code currently only supports 1D linear interpolation
884     if (params->linear && dims > 1) {
885         if (texfmt) {
886             method = SH_LUT_TEXTURE;
887         } else {
888             SH_FAIL(sh, "Can't emulate linear LUTs for 2D/3D LUTs and no "
889                     "texture support available!");
890             goto error;
891         }
892     }
893 
894     // Older GLSL forbids literal array constructors
895     bool can_literal = sh_glsl(sh).version > 110;
896 
897     // Pick the best method
898     if (!method && size <= SH_LUT_MAX_LITERAL && !params->dynamic && can_literal)
899         method = SH_LUT_LITERAL; // use literals for small constant LUTs
900 
901     if (!method && texfmt)
902         method = SH_LUT_TEXTURE; // use textures if a texfmt exists
903 
904     // Use an input variable as a last fallback
905     if (!method)
906         method = SH_LUT_UNIFORM;
907 
908     // Forcibly reinitialize the existing LUT if needed
909     bool update = params->update || lut->signature != params->signature;
910     if (method != lut->method || params->type != lut->type ||
911         params->linear != lut->linear || params->width != lut->width ||
912         params->height != lut->height || params->depth != lut->depth ||
913         params->comps != lut->comps)
914     {
915         PL_DEBUG(sh, "LUT cache invalidated, regenerating..");
916         update = true;
917     }
918 
919     if (update) {
920         size_t buf_size = size * params->comps * pl_var_type_size(params->type);
921         tmp = pl_zalloc(NULL, buf_size);
922         params->fill(tmp, params);
923 
924         switch (method) {
925         case SH_LUT_TEXTURE: {
926             if (!texdim) {
927                 SH_FAIL(sh, "Texture LUT exceeds texture dimensions!");
928                 goto error;
929             }
930 
931             if (!texfmt) {
932                 SH_FAIL(sh, "Found no compatible texture format for LUT!");
933                 goto error;
934             }
935 
936             struct pl_tex_params tex_params = {
937                 .w              = params->width,
938                 .h              = PL_DEF(params->height, texdim >= 2 ? 1 : 0),
939                 .d              = PL_DEF(params->depth,  texdim >= 3 ? 1 : 0),
940                 .format         = texfmt,
941                 .sampleable     = true,
942                 .host_writable  = params->dynamic,
943                 .initial_data   = params->dynamic ? NULL : tmp,
944             };
945 
946             bool ok;
947             if (params->dynamic) {
948                 ok = pl_tex_recreate(gpu, &lut->tex, &tex_params);
949                 if (ok) {
950                     ok = pl_tex_upload(gpu, &(struct pl_tex_transfer_params) {
951                         .tex = lut->tex,
952                         .ptr = tmp,
953                     });
954                 }
955             } else {
956                 // Can't use pl_tex_recreate because of `initial_data`
957                 pl_tex_destroy(gpu, &lut->tex);
958                 lut->tex = pl_tex_create(gpu, &tex_params);
959                 ok = lut->tex;
960             }
961 
962             if (!ok) {
963                 SH_FAIL(sh, "Failed creating LUT texture!");
964                 goto error;
965             }
966             break;
967         }
968 
969         case SH_LUT_UNIFORM:
970             pl_free(lut->data);
971             lut->data = tmp; // re-use `tmp`
972             tmp = NULL;
973             break;
974 
975         case SH_LUT_LITERAL: {
976             lut->str.len = 0;
977             static const char prefix[PL_VAR_TYPE_COUNT] = {
978                 [PL_VAR_SINT]   = 'i',
979                 [PL_VAR_UINT]   = 'u',
980                 [PL_VAR_FLOAT]  = ' ',
981             };
982 
983             for (int i = 0; i < size * params->comps; i += params->comps) {
984                 if (i > 0)
985                     pl_str_append_asprintf_c(lut, &lut->str, ",");
986                 if (params->comps > 1) {
987                     pl_str_append_asprintf_c(lut, &lut->str, "%cvec%d(",
988                                              prefix[params->type], params->comps);
989                 }
990                 for (int c = 0; c < params->comps; c++) {
991                     switch (params->type) {
992                     case PL_VAR_FLOAT:
993                         pl_str_append_asprintf_c(lut, &lut->str, "%s%f",
994                                                  c > 0 ? "," : "",
995                                                  ((float *) tmp)[i+c]);
996                         break;
997                     case PL_VAR_UINT:
998                         pl_str_append_asprintf_c(lut, &lut->str, "%s%u",
999                                                  c > 0 ? "," : "",
1000                                                  ((unsigned int *) tmp)[i+c]);
1001                         break;
1002                     case PL_VAR_SINT:
1003                         pl_str_append_asprintf_c(lut, &lut->str, "%s%d",
1004                                                  c > 0 ? "," : "",
1005                                                  ((int *) tmp)[i+c]);
1006                         break;
1007                     case PL_VAR_INVALID:
1008                     case PL_VAR_TYPE_COUNT:
1009                         pl_unreachable();
1010                     }
1011                 }
1012                 if (params->comps > 1)
1013                     pl_str_append_asprintf_c(lut, &lut->str, ")");
1014             }
1015             break;
1016         }
1017 
1018         case SH_LUT_AUTO:
1019             pl_unreachable();
1020         }
1021 
1022         lut->method = method;
1023         lut->type = params->type;
1024         lut->linear = params->linear;
1025         lut->width = params->width;
1026         lut->height = params->height;
1027         lut->depth = params->depth;
1028         lut->comps = params->comps;
1029     }
1030 
1031     // Done updating, generate the GLSL
1032     ident_t name = sh_fresh(sh, "lut");
1033     ident_t arr_name = NULL;
1034 
1035     static const char * const swizzles[] = {"x", "xy", "xyz", "xyzw"};
1036     static const char * const vartypes[PL_VAR_TYPE_COUNT][4] = {
1037         [PL_VAR_SINT] = { "int", "ivec2", "ivec3", "ivec4" },
1038         [PL_VAR_UINT] = { "uint", "uvec2", "uvec3", "uvec4" },
1039         [PL_VAR_FLOAT] = { "float", "vec2", "vec3", "vec4" },
1040     };
1041 
1042     switch (method) {
1043     case SH_LUT_TEXTURE: {
1044         assert(texdim);
1045         ident_t tex = sh_desc(sh, (struct pl_shader_desc) {
1046             .desc = {
1047                 .name = "weights",
1048                 .type = PL_DESC_SAMPLED_TEX,
1049             },
1050             .binding = {
1051                 .object = lut->tex,
1052                 .sample_mode = params->linear ? PL_TEX_SAMPLE_LINEAR
1053                                               : PL_TEX_SAMPLE_NEAREST,
1054             }
1055         });
1056 
1057         // texelFetch requires GLSL >= 130, so fall back to the linear code
1058         if (params->linear || gpu->glsl.version < 130) {
1059             ident_t pos_macros[PL_ARRAY_SIZE(sizes)] = {0};
1060             for (int i = 0; i < dims; i++)
1061                 pos_macros[i] = sh_lut_pos(sh, sizes[i]);
1062 
1063             GLSLH("#define %s(pos) (%s(%s, %s(\\\n",
1064                   name, sh_tex_fn(sh, lut->tex->params),
1065                   tex, vartypes[PL_VAR_FLOAT][texdim - 1]);
1066 
1067             for (int i = 0; i < texdim; i++) {
1068                 char sep = i == 0 ? ' ' : ',';
1069                 if (pos_macros[i]) {
1070                     if (dims > 1) {
1071                         GLSLH("   %c%s(%s(pos).%c)\\\n", sep, pos_macros[i],
1072                               vartypes[PL_VAR_FLOAT][dims - 1], "xyzw"[i]);
1073                     } else {
1074                         GLSLH("   %c%s(float(pos))\\\n", sep, pos_macros[i]);
1075                     }
1076                 } else {
1077                     GLSLH("   %c%f\\\n", sep, 0.5);
1078                 }
1079             }
1080             GLSLH("  )).%s)\n", swizzles[params->comps - 1]);
1081         } else {
1082             GLSLH("#define %s(pos) (texelFetch(%s, %s(pos",
1083                   name, tex, vartypes[PL_VAR_SINT][texdim - 1]);
1084 
1085             // Fill up extra components of the index
1086             for (int i = dims; i < texdim; i++)
1087                 GLSLH(", 0");
1088 
1089             GLSLH("), 0).%s)\n", swizzles[params->comps - 1]);
1090         }
1091 
1092         break;
1093     }
1094 
1095     case SH_LUT_UNIFORM:
1096         arr_name = sh_var(sh, (struct pl_shader_var) {
1097             .var = {
1098                 .name = "weights",
1099                 .type = params->type,
1100                 .dim_v = params->comps,
1101                 .dim_m = 1,
1102                 .dim_a = size,
1103             },
1104             .data = lut->data,
1105         });
1106         break;
1107 
1108     case SH_LUT_LITERAL:
1109         arr_name = sh_fresh(sh, "weights");
1110         GLSLH("const %s %s[%d] = %s[](\n  ",
1111               vartypes[params->type][params->comps - 1], arr_name, size,
1112               vartypes[params->type][params->comps - 1]);
1113         pl_str_append(sh, &sh->buffers[SH_BUF_HEADER], lut->str);
1114         GLSLH(");\n");
1115         break;
1116 
1117     case SH_LUT_AUTO:
1118         pl_unreachable();
1119     }
1120 
1121     if (arr_name) {
1122         GLSLH("#define %s(pos) (%s[int((pos)%s)\\\n",
1123               name, arr_name, dims > 1 ? "[0]" : "");
1124         int shift = params->width;
1125         for (int i = 1; i < dims; i++) {
1126             GLSLH("    + %d * int((pos)[%d])\\\n", shift, i);
1127             shift *= sizes[i];
1128         }
1129         GLSLH("  ])\n");
1130 
1131         if (params->linear) {
1132             pl_assert(dims == 1);
1133             pl_assert(params->type == PL_VAR_FLOAT);
1134             ident_t arr_lut = name;
1135             name = sh_fresh(sh, "lut_lin");
1136             GLSLH("%s %s(float fpos) {                              \n"
1137                   "    fpos = clamp(fpos, 0.0, 1.0) * %d.0;         \n"
1138                   "    float fbase = floor(fpos);                   \n"
1139                   "    float fceil = ceil(fpos);                    \n"
1140                   "    float fcoord = fpos - fbase;                 \n"
1141                   "    return mix(%s(fbase), %s(fceil), fcoord);    \n"
1142                   "}                                                \n",
1143                   vartypes[PL_VAR_FLOAT][params->comps - 1], name,
1144                   size - 1,
1145                   arr_lut, arr_lut);
1146         }
1147     }
1148 
1149     pl_assert(name);
1150     ret = name;
1151     // fall through
1152 error:
1153     pl_free(tmp);
1154     return ret;
1155 }
1156 
sh_bvec(const pl_shader sh,int dims)1157 const char *sh_bvec(const pl_shader sh, int dims)
1158 {
1159     static const char *bvecs[] = {
1160         [1] = "bool",
1161         [2] = "bvec2",
1162         [3] = "bvec3",
1163         [4] = "bvec4",
1164     };
1165 
1166     static const char *vecs[] = {
1167         [1] = "float",
1168         [2] = "vec2",
1169         [3] = "vec3",
1170         [4] = "vec4",
1171     };
1172 
1173     pl_assert(dims > 0 && dims < PL_ARRAY_SIZE(bvecs));
1174     return sh_glsl(sh).version >= 130 ? bvecs[dims] : vecs[dims];
1175 }
1176