1 #include <stddef.h>
2 #include <stdint.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include <stdarg.h>
6 #include <assert.h>
7 
8 #include <libavutil/sha.h>
9 #include <libavutil/mem.h>
10 
11 #include "osdep/io.h"
12 
13 #include "common/common.h"
14 #include "options/path.h"
15 #include "stream/stream.h"
16 #include "shader_cache.h"
17 #include "utils.h"
18 
19 // Force cache flush if more than this number of shaders is created.
20 #define SC_MAX_ENTRIES 256
21 
22 union uniform_val {
23     float f[9];         // RA_VARTYPE_FLOAT
24     int i[4];           // RA_VARTYPE_INT
25     struct ra_tex *tex; // RA_VARTYPE_TEX, RA_VARTYPE_IMG_*
26     struct ra_buf *buf; // RA_VARTYPE_BUF_*
27 };
28 
29 enum sc_uniform_type {
30     SC_UNIFORM_TYPE_GLOBAL = 0, // global uniform (RA_CAP_GLOBAL_UNIFORM)
31     SC_UNIFORM_TYPE_UBO = 1,    // uniform buffer (RA_CAP_BUF_RO)
32     SC_UNIFORM_TYPE_PUSHC = 2,  // push constant (ra.max_pushc_size)
33 };
34 
35 struct sc_uniform {
36     enum sc_uniform_type type;
37     struct ra_renderpass_input input;
38     const char *glsl_type;
39     union uniform_val v;
40     char *buffer_format;
41     // for SC_UNIFORM_TYPE_UBO/PUSHC:
42     struct ra_layout layout;
43     size_t offset; // byte offset within the buffer
44 };
45 
46 struct sc_cached_uniform {
47     union uniform_val v;
48     int index; // for ra_renderpass_input_val
49     bool set; // whether the uniform has ever been set
50 };
51 
52 struct sc_entry {
53     struct ra_renderpass *pass;
54     struct sc_cached_uniform *cached_uniforms;
55     int num_cached_uniforms;
56     bstr total;
57     struct timer_pool *timer;
58     struct ra_buf *ubo;
59     int ubo_index; // for ra_renderpass_input_val.index
60     void *pushc;
61 };
62 
63 struct gl_shader_cache {
64     struct ra *ra;
65     struct mp_log *log;
66 
67     // permanent
68     char **exts;
69     int num_exts;
70 
71     // this is modified during use (gl_sc_add() etc.) and reset for each shader
72     bstr prelude_text;
73     bstr header_text;
74     bstr text;
75 
76     // Next binding point (texture unit, image unit, buffer binding, etc.)
77     // In OpenGL these are separate for each input type
78     int next_binding[RA_VARTYPE_COUNT];
79     bool next_uniform_dynamic;
80 
81     struct ra_renderpass_params params;
82 
83     struct sc_entry **entries;
84     int num_entries;
85 
86     struct sc_entry *current_shader; // set by gl_sc_generate()
87 
88     struct sc_uniform *uniforms;
89     int num_uniforms;
90 
91     int ubo_binding;
92     size_t ubo_size;
93     size_t pushc_size;
94 
95     struct ra_renderpass_input_val *values;
96     int num_values;
97 
98     // For checking that the user is calling gl_sc_reset() properly.
99     bool needs_reset;
100 
101     bool error_state; // true if an error occurred
102 
103     // temporary buffers (avoids frequent reallocations)
104     bstr tmp[6];
105 
106     // For the disk-cache.
107     char *cache_dir;
108     struct mpv_global *global; // can be NULL
109 };
110 
gl_sc_create(struct ra * ra,struct mpv_global * global,struct mp_log * log)111 struct gl_shader_cache *gl_sc_create(struct ra *ra, struct mpv_global *global,
112                                      struct mp_log *log)
113 {
114     struct gl_shader_cache *sc = talloc_ptrtype(NULL, sc);
115     *sc = (struct gl_shader_cache){
116         .ra = ra,
117         .global = global,
118         .log = log,
119     };
120     gl_sc_reset(sc);
121     return sc;
122 }
123 
124 // Reset the previous pass. This must be called after gl_sc_generate and before
125 // starting a new shader. It may also be called on errors.
gl_sc_reset(struct gl_shader_cache * sc)126 void gl_sc_reset(struct gl_shader_cache *sc)
127 {
128     sc->prelude_text.len = 0;
129     sc->header_text.len = 0;
130     sc->text.len = 0;
131     for (int n = 0; n < sc->num_uniforms; n++)
132         talloc_free((void *)sc->uniforms[n].input.name);
133     sc->num_uniforms = 0;
134     sc->ubo_binding = 0;
135     sc->ubo_size = 0;
136     sc->pushc_size = 0;
137     for (int i = 0; i < RA_VARTYPE_COUNT; i++)
138         sc->next_binding[i] = 0;
139     sc->next_uniform_dynamic = false;
140     sc->current_shader = NULL;
141     sc->params = (struct ra_renderpass_params){0};
142     sc->needs_reset = false;
143 }
144 
sc_flush_cache(struct gl_shader_cache * sc)145 static void sc_flush_cache(struct gl_shader_cache *sc)
146 {
147     MP_DBG(sc, "flushing shader cache\n");
148 
149     for (int n = 0; n < sc->num_entries; n++) {
150         struct sc_entry *e = sc->entries[n];
151         ra_buf_free(sc->ra, &e->ubo);
152         if (e->pass)
153             sc->ra->fns->renderpass_destroy(sc->ra, e->pass);
154         timer_pool_destroy(e->timer);
155         talloc_free(e);
156     }
157     sc->num_entries = 0;
158 }
159 
gl_sc_destroy(struct gl_shader_cache * sc)160 void gl_sc_destroy(struct gl_shader_cache *sc)
161 {
162     if (!sc)
163         return;
164     gl_sc_reset(sc);
165     sc_flush_cache(sc);
166     talloc_free(sc);
167 }
168 
gl_sc_error_state(struct gl_shader_cache * sc)169 bool gl_sc_error_state(struct gl_shader_cache *sc)
170 {
171     return sc->error_state;
172 }
173 
gl_sc_reset_error(struct gl_shader_cache * sc)174 void gl_sc_reset_error(struct gl_shader_cache *sc)
175 {
176     sc->error_state = false;
177 }
178 
gl_sc_enable_extension(struct gl_shader_cache * sc,char * name)179 void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name)
180 {
181     for (int n = 0; n < sc->num_exts; n++) {
182         if (strcmp(sc->exts[n], name) == 0)
183             return;
184     }
185     MP_TARRAY_APPEND(sc, sc->exts, sc->num_exts, talloc_strdup(sc, name));
186 }
187 
188 #define bstr_xappend0(sc, b, s) bstr_xappend(sc, b, bstr0(s))
189 
gl_sc_add(struct gl_shader_cache * sc,const char * text)190 void gl_sc_add(struct gl_shader_cache *sc, const char *text)
191 {
192     bstr_xappend0(sc, &sc->text, text);
193 }
194 
gl_sc_addf(struct gl_shader_cache * sc,const char * textf,...)195 void gl_sc_addf(struct gl_shader_cache *sc, const char *textf, ...)
196 {
197     va_list ap;
198     va_start(ap, textf);
199     bstr_xappend_vasprintf(sc, &sc->text, textf, ap);
200     va_end(ap);
201 }
202 
gl_sc_hadd(struct gl_shader_cache * sc,const char * text)203 void gl_sc_hadd(struct gl_shader_cache *sc, const char *text)
204 {
205     bstr_xappend0(sc, &sc->header_text, text);
206 }
207 
gl_sc_haddf(struct gl_shader_cache * sc,const char * textf,...)208 void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...)
209 {
210     va_list ap;
211     va_start(ap, textf);
212     bstr_xappend_vasprintf(sc, &sc->header_text, textf, ap);
213     va_end(ap);
214 }
215 
gl_sc_hadd_bstr(struct gl_shader_cache * sc,struct bstr text)216 void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text)
217 {
218     bstr_xappend(sc, &sc->header_text, text);
219 }
220 
gl_sc_paddf(struct gl_shader_cache * sc,const char * textf,...)221 void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...)
222 {
223     va_list ap;
224     va_start(ap, textf);
225     bstr_xappend_vasprintf(sc, &sc->prelude_text, textf, ap);
226     va_end(ap);
227 }
228 
find_uniform(struct gl_shader_cache * sc,const char * name)229 static struct sc_uniform *find_uniform(struct gl_shader_cache *sc,
230                                        const char *name)
231 {
232     struct sc_uniform new = {
233         .input = {
234             .dim_v = 1,
235             .dim_m = 1,
236         },
237     };
238 
239     for (int n = 0; n < sc->num_uniforms; n++) {
240         struct sc_uniform *u = &sc->uniforms[n];
241         if (strcmp(u->input.name, name) == 0) {
242             const char *allocname = u->input.name;
243             *u = new;
244             u->input.name = allocname;
245             return u;
246         }
247     }
248 
249     // not found -> add it
250     new.input.name = talloc_strdup(NULL, name);
251     MP_TARRAY_APPEND(sc, sc->uniforms, sc->num_uniforms, new);
252     return &sc->uniforms[sc->num_uniforms - 1];
253 }
254 
gl_sc_next_binding(struct gl_shader_cache * sc,enum ra_vartype type)255 static int gl_sc_next_binding(struct gl_shader_cache *sc, enum ra_vartype type)
256 {
257     return sc->next_binding[sc->ra->fns->desc_namespace(sc->ra, type)]++;
258 }
259 
gl_sc_uniform_dynamic(struct gl_shader_cache * sc)260 void gl_sc_uniform_dynamic(struct gl_shader_cache *sc)
261 {
262     sc->next_uniform_dynamic = true;
263 }
264 
265 // Updates the metadata for the given sc_uniform. Assumes sc_uniform->input
266 // and glsl_type/buffer_format are already set.
update_uniform_params(struct gl_shader_cache * sc,struct sc_uniform * u)267 static void update_uniform_params(struct gl_shader_cache *sc, struct sc_uniform *u)
268 {
269     bool dynamic = sc->next_uniform_dynamic;
270     sc->next_uniform_dynamic = false;
271 
272     // Try not using push constants for "large" values like matrices, since
273     // this is likely to both exceed the VGPR budget as well as the pushc size
274     // budget
275     bool try_pushc = u->input.dim_m == 1 || dynamic;
276 
277     // Attempt using push constants first
278     if (try_pushc && sc->ra->glsl_vulkan && sc->ra->max_pushc_size) {
279         struct ra_layout layout = sc->ra->fns->push_constant_layout(&u->input);
280         size_t offset = MP_ALIGN_UP(sc->pushc_size, layout.align);
281         // Push constants have limited size, so make sure we don't exceed this
282         size_t new_size = offset + layout.size;
283         if (new_size <= sc->ra->max_pushc_size) {
284             u->type = SC_UNIFORM_TYPE_PUSHC;
285             u->layout = layout;
286             u->offset = offset;
287             sc->pushc_size = new_size;
288             return;
289         }
290     }
291 
292     // Attempt using uniform buffer next. The GLSL version 440 check is due
293     // to explicit offsets on UBO entries. In theory we could leave away
294     // the offsets and support UBOs for older GL as well, but this is a nice
295     // safety net for driver bugs (and also rules out potentially buggy drivers)
296     // Also avoid UBOs for highly dynamic stuff since that requires synchronizing
297     // the UBO writes every frame
298     bool try_ubo = !(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM) || !dynamic;
299     if (try_ubo && sc->ra->glsl_version >= 440 && (sc->ra->caps & RA_CAP_BUF_RO)) {
300         u->type = SC_UNIFORM_TYPE_UBO;
301         u->layout = sc->ra->fns->uniform_layout(&u->input);
302         u->offset = MP_ALIGN_UP(sc->ubo_size, u->layout.align);
303         sc->ubo_size = u->offset + u->layout.size;
304         return;
305     }
306 
307     // If all else fails, use global uniforms
308     assert(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM);
309     u->type = SC_UNIFORM_TYPE_GLOBAL;
310 }
311 
gl_sc_uniform_texture(struct gl_shader_cache * sc,char * name,struct ra_tex * tex)312 void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name,
313                            struct ra_tex *tex)
314 {
315     const char *glsl_type = "sampler2D";
316     if (tex->params.dimensions == 1) {
317         glsl_type = "sampler1D";
318     } else if (tex->params.dimensions == 3) {
319         glsl_type = "sampler3D";
320     } else if (tex->params.non_normalized) {
321         glsl_type = "sampler2DRect";
322     } else if (tex->params.external_oes) {
323         glsl_type = "samplerExternalOES";
324     } else if (tex->params.format->ctype == RA_CTYPE_UINT) {
325         glsl_type = sc->ra->glsl_es ? "highp usampler2D" : "usampler2D";
326     }
327 
328     struct sc_uniform *u = find_uniform(sc, name);
329     u->input.type = RA_VARTYPE_TEX;
330     u->glsl_type = glsl_type;
331     u->input.binding = gl_sc_next_binding(sc, u->input.type);
332     u->v.tex = tex;
333 }
334 
gl_sc_uniform_image2D_wo(struct gl_shader_cache * sc,const char * name,struct ra_tex * tex)335 void gl_sc_uniform_image2D_wo(struct gl_shader_cache *sc, const char *name,
336                               struct ra_tex *tex)
337 {
338     gl_sc_enable_extension(sc, "GL_ARB_shader_image_load_store");
339 
340     struct sc_uniform *u = find_uniform(sc, name);
341     u->input.type = RA_VARTYPE_IMG_W;
342     u->glsl_type = "writeonly image2D";
343     u->input.binding = gl_sc_next_binding(sc, u->input.type);
344     u->v.tex = tex;
345 }
346 
gl_sc_ssbo(struct gl_shader_cache * sc,char * name,struct ra_buf * buf,char * format,...)347 void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, struct ra_buf *buf,
348                 char *format, ...)
349 {
350     assert(sc->ra->caps & RA_CAP_BUF_RW);
351     gl_sc_enable_extension(sc, "GL_ARB_shader_storage_buffer_object");
352 
353     struct sc_uniform *u = find_uniform(sc, name);
354     u->input.type = RA_VARTYPE_BUF_RW;
355     u->glsl_type = "";
356     u->input.binding = gl_sc_next_binding(sc, u->input.type);
357     u->v.buf = buf;
358 
359     va_list ap;
360     va_start(ap, format);
361     u->buffer_format = ta_vasprintf(sc, format, ap);
362     va_end(ap);
363 }
364 
gl_sc_uniform_f(struct gl_shader_cache * sc,char * name,float f)365 void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, float f)
366 {
367     struct sc_uniform *u = find_uniform(sc, name);
368     u->input.type = RA_VARTYPE_FLOAT;
369     u->glsl_type = "float";
370     update_uniform_params(sc, u);
371     u->v.f[0] = f;
372 }
373 
gl_sc_uniform_i(struct gl_shader_cache * sc,char * name,int i)374 void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, int i)
375 {
376     struct sc_uniform *u = find_uniform(sc, name);
377     u->input.type = RA_VARTYPE_INT;
378     u->glsl_type = "int";
379     update_uniform_params(sc, u);
380     u->v.i[0] = i;
381 }
382 
gl_sc_uniform_vec2(struct gl_shader_cache * sc,char * name,float f[2])383 void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, float f[2])
384 {
385     struct sc_uniform *u = find_uniform(sc, name);
386     u->input.type = RA_VARTYPE_FLOAT;
387     u->input.dim_v = 2;
388     u->glsl_type = "vec2";
389     update_uniform_params(sc, u);
390     u->v.f[0] = f[0];
391     u->v.f[1] = f[1];
392 }
393 
gl_sc_uniform_vec3(struct gl_shader_cache * sc,char * name,float f[3])394 void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, float f[3])
395 {
396     struct sc_uniform *u = find_uniform(sc, name);
397     u->input.type = RA_VARTYPE_FLOAT;
398     u->input.dim_v = 3;
399     u->glsl_type = "vec3";
400     update_uniform_params(sc, u);
401     u->v.f[0] = f[0];
402     u->v.f[1] = f[1];
403     u->v.f[2] = f[2];
404 }
405 
transpose2x2(float r[2* 2])406 static void transpose2x2(float r[2 * 2])
407 {
408     MPSWAP(float, r[0+2*1], r[1+2*0]);
409 }
410 
gl_sc_uniform_mat2(struct gl_shader_cache * sc,char * name,bool transpose,float * v)411 void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name,
412                         bool transpose, float *v)
413 {
414     struct sc_uniform *u = find_uniform(sc, name);
415     u->input.type = RA_VARTYPE_FLOAT;
416     u->input.dim_v = 2;
417     u->input.dim_m = 2;
418     u->glsl_type = "mat2";
419     update_uniform_params(sc, u);
420     for (int n = 0; n < 4; n++)
421         u->v.f[n] = v[n];
422     if (transpose)
423         transpose2x2(&u->v.f[0]);
424 }
425 
transpose3x3(float r[3* 3])426 static void transpose3x3(float r[3 * 3])
427 {
428     MPSWAP(float, r[0+3*1], r[1+3*0]);
429     MPSWAP(float, r[0+3*2], r[2+3*0]);
430     MPSWAP(float, r[1+3*2], r[2+3*1]);
431 }
432 
gl_sc_uniform_mat3(struct gl_shader_cache * sc,char * name,bool transpose,float * v)433 void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name,
434                         bool transpose, float *v)
435 {
436     struct sc_uniform *u = find_uniform(sc, name);
437     u->input.type = RA_VARTYPE_FLOAT;
438     u->input.dim_v = 3;
439     u->input.dim_m = 3;
440     u->glsl_type = "mat3";
441     update_uniform_params(sc, u);
442     for (int n = 0; n < 9; n++)
443         u->v.f[n] = v[n];
444     if (transpose)
445         transpose3x3(&u->v.f[0]);
446 }
447 
gl_sc_blend(struct gl_shader_cache * sc,enum ra_blend blend_src_rgb,enum ra_blend blend_dst_rgb,enum ra_blend blend_src_alpha,enum ra_blend blend_dst_alpha)448 void gl_sc_blend(struct gl_shader_cache *sc,
449                  enum ra_blend blend_src_rgb,
450                  enum ra_blend blend_dst_rgb,
451                  enum ra_blend blend_src_alpha,
452                  enum ra_blend blend_dst_alpha)
453 {
454     sc->params.enable_blend = true;
455     sc->params.blend_src_rgb = blend_src_rgb;
456     sc->params.blend_dst_rgb = blend_dst_rgb;
457     sc->params.blend_src_alpha = blend_src_alpha;
458     sc->params.blend_dst_alpha = blend_dst_alpha;
459 }
460 
gl_sc_bvec(struct gl_shader_cache * sc,int dims)461 const char *gl_sc_bvec(struct gl_shader_cache *sc, int dims)
462 {
463     static const char *bvecs[] = {
464         [1] = "bool",
465         [2] = "bvec2",
466         [3] = "bvec3",
467         [4] = "bvec4",
468     };
469 
470     static const char *vecs[] = {
471         [1] = "float",
472         [2] = "vec2",
473         [3] = "vec3",
474         [4] = "vec4",
475     };
476 
477     assert(dims > 0 && dims < MP_ARRAY_SIZE(bvecs));
478     return sc->ra->glsl_version >= 130 ? bvecs[dims] : vecs[dims];
479 }
480 
vao_glsl_type(const struct ra_renderpass_input * e)481 static const char *vao_glsl_type(const struct ra_renderpass_input *e)
482 {
483     // pretty dumb... too dumb, but works for us
484     switch (e->dim_v) {
485     case 1: return "float";
486     case 2: return "vec2";
487     case 3: return "vec3";
488     case 4: return "vec4";
489     default: abort();
490     }
491 }
492 
update_ubo(struct ra * ra,struct ra_buf * ubo,struct sc_uniform * u)493 static void update_ubo(struct ra *ra, struct ra_buf *ubo, struct sc_uniform *u)
494 {
495     uintptr_t src = (uintptr_t) &u->v;
496     size_t dst = u->offset;
497     struct ra_layout src_layout = ra_renderpass_input_layout(&u->input);
498     struct ra_layout dst_layout = u->layout;
499 
500     for (int i = 0; i < u->input.dim_m; i++) {
501         ra->fns->buf_update(ra, ubo, dst, (void *)src, src_layout.stride);
502         src += src_layout.stride;
503         dst += dst_layout.stride;
504     }
505 }
506 
update_pushc(struct ra * ra,void * pushc,struct sc_uniform * u)507 static void update_pushc(struct ra *ra, void *pushc, struct sc_uniform *u)
508 {
509     uintptr_t src = (uintptr_t) &u->v;
510     uintptr_t dst = (uintptr_t) pushc + (ptrdiff_t) u->offset;
511     struct ra_layout src_layout = ra_renderpass_input_layout(&u->input);
512     struct ra_layout dst_layout = u->layout;
513 
514     for (int i = 0; i < u->input.dim_m; i++) {
515         memcpy((void *)dst, (void *)src, src_layout.stride);
516         src += src_layout.stride;
517         dst += dst_layout.stride;
518     }
519 }
520 
update_uniform(struct gl_shader_cache * sc,struct sc_entry * e,struct sc_uniform * u,int n)521 static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e,
522                            struct sc_uniform *u, int n)
523 {
524     struct sc_cached_uniform *un = &e->cached_uniforms[n];
525     struct ra_layout layout = ra_renderpass_input_layout(&u->input);
526     if (layout.size > 0 && un->set && memcmp(&un->v, &u->v, layout.size) == 0)
527         return;
528 
529     un->v = u->v;
530     un->set = true;
531 
532     static const char *desc[] = {
533         [SC_UNIFORM_TYPE_UBO]    = "UBO",
534         [SC_UNIFORM_TYPE_PUSHC]  = "PC",
535         [SC_UNIFORM_TYPE_GLOBAL] = "global",
536     };
537     MP_TRACE(sc, "Updating %s uniform '%s'\n", desc[u->type], u->input.name);
538 
539     switch (u->type) {
540     case SC_UNIFORM_TYPE_GLOBAL: {
541         struct ra_renderpass_input_val value = {
542             .index = un->index,
543             .data = &un->v,
544         };
545         MP_TARRAY_APPEND(sc, sc->values, sc->num_values, value);
546         break;
547     }
548     case SC_UNIFORM_TYPE_UBO:
549         assert(e->ubo);
550         update_ubo(sc->ra, e->ubo, u);
551         break;
552     case SC_UNIFORM_TYPE_PUSHC:
553         assert(e->pushc);
554         update_pushc(sc->ra, e->pushc, u);
555         break;
556     default: abort();
557     }
558 }
559 
gl_sc_set_cache_dir(struct gl_shader_cache * sc,const char * dir)560 void gl_sc_set_cache_dir(struct gl_shader_cache *sc, const char *dir)
561 {
562     talloc_free(sc->cache_dir);
563     sc->cache_dir = talloc_strdup(sc, dir);
564 }
565 
create_pass(struct gl_shader_cache * sc,struct sc_entry * entry)566 static bool create_pass(struct gl_shader_cache *sc, struct sc_entry *entry)
567 {
568     bool ret = false;
569 
570     void *tmp = talloc_new(NULL);
571     struct ra_renderpass_params params = sc->params;
572 
573     const char *cache_header = "mpv shader cache v1\n";
574     char *cache_filename = NULL;
575     char *cache_dir = NULL;
576 
577     if (sc->cache_dir && sc->cache_dir[0]) {
578         // Try to load it from a disk cache.
579         cache_dir = mp_get_user_path(tmp, sc->global, sc->cache_dir);
580 
581         struct AVSHA *sha = av_sha_alloc();
582         if (!sha)
583             abort();
584         av_sha_init(sha, 256);
585         av_sha_update(sha, entry->total.start, entry->total.len);
586 
587         uint8_t hash[256 / 8];
588         av_sha_final(sha, hash);
589         av_free(sha);
590 
591         char hashstr[256 / 8 * 2 + 1];
592         for (int n = 0; n < 256 / 8; n++)
593             snprintf(hashstr + n * 2, sizeof(hashstr) - n * 2, "%02X", hash[n]);
594 
595         cache_filename = mp_path_join(tmp, cache_dir, hashstr);
596         if (stat(cache_filename, &(struct stat){0}) == 0) {
597             MP_DBG(sc, "Trying to load shader from disk...\n");
598             struct bstr cachedata =
599                 stream_read_file(cache_filename, tmp, sc->global, 1000000000);
600             if (bstr_eatstart0(&cachedata, cache_header))
601                 params.cached_program = cachedata;
602         }
603     }
604 
605     // If using a UBO, also make sure to add it as an input value so the RA
606     // can see it
607     if (sc->ubo_size) {
608         entry->ubo_index = sc->params.num_inputs;
609         struct ra_renderpass_input ubo_input = {
610             .name = "UBO",
611             .type = RA_VARTYPE_BUF_RO,
612             .dim_v = 1,
613             .dim_m = 1,
614             .binding = sc->ubo_binding,
615         };
616         MP_TARRAY_APPEND(sc, params.inputs, params.num_inputs, ubo_input);
617     }
618 
619     if (sc->pushc_size) {
620         params.push_constants_size = MP_ALIGN_UP(sc->pushc_size, 4);
621         entry->pushc = talloc_zero_size(entry, params.push_constants_size);
622     }
623 
624     if (sc->ubo_size) {
625         struct ra_buf_params ubo_params = {
626             .type = RA_BUF_TYPE_UNIFORM,
627             .size = sc->ubo_size,
628             .host_mutable = true,
629         };
630 
631         entry->ubo = ra_buf_create(sc->ra, &ubo_params);
632         if (!entry->ubo) {
633             MP_ERR(sc, "Failed creating uniform buffer!\n");
634             goto error;
635         }
636     }
637 
638     entry->pass = sc->ra->fns->renderpass_create(sc->ra, &params);
639     if (!entry->pass)
640         goto error;
641 
642     if (entry->pass && cache_filename) {
643         bstr nc = entry->pass->params.cached_program;
644         if (nc.len && !bstr_equals(params.cached_program, nc)) {
645             mp_mkdirp(cache_dir);
646 
647             MP_DBG(sc, "Writing shader cache file: %s\n", cache_filename);
648             FILE *out = fopen(cache_filename, "wb");
649             if (out) {
650                 fwrite(cache_header, strlen(cache_header), 1, out);
651                 fwrite(nc.start, nc.len, 1, out);
652                 fclose(out);
653             }
654         }
655     }
656 
657     ret = true;
658 
659 error:
660     talloc_free(tmp);
661     return ret;
662 }
663 
664 #define ADD(x, ...) bstr_xappend_asprintf(sc, (x), __VA_ARGS__)
665 #define ADD_BSTR(x, s) bstr_xappend(sc, (x), (s))
666 
add_uniforms(struct gl_shader_cache * sc,bstr * dst)667 static void add_uniforms(struct gl_shader_cache *sc, bstr *dst)
668 {
669     // Add all of the UBO entries separately as members of their own buffer
670     if (sc->ubo_size > 0) {
671         ADD(dst, "layout(std140, binding=%d) uniform UBO {\n", sc->ubo_binding);
672         for (int n = 0; n < sc->num_uniforms; n++) {
673             struct sc_uniform *u = &sc->uniforms[n];
674             if (u->type != SC_UNIFORM_TYPE_UBO)
675                 continue;
676             ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset, u->glsl_type,
677                 u->input.name);
678         }
679         ADD(dst, "};\n");
680     }
681 
682     // Ditto for push constants
683     if (sc->pushc_size > 0) {
684         ADD(dst, "layout(std430, push_constant) uniform PushC {\n");
685         for (int n = 0; n < sc->num_uniforms; n++) {
686             struct sc_uniform *u = &sc->uniforms[n];
687             if (u->type != SC_UNIFORM_TYPE_PUSHC)
688                 continue;
689             ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset, u->glsl_type,
690                 u->input.name);
691         }
692         ADD(dst, "};\n");
693     }
694 
695     for (int n = 0; n < sc->num_uniforms; n++) {
696         struct sc_uniform *u = &sc->uniforms[n];
697         if (u->type != SC_UNIFORM_TYPE_GLOBAL)
698             continue;
699         switch (u->input.type) {
700         case RA_VARTYPE_INT:
701         case RA_VARTYPE_FLOAT:
702             assert(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM);
703             // fall through
704         case RA_VARTYPE_TEX:
705             // Vulkan requires explicitly assigning the bindings in the shader
706             // source. For OpenGL it's optional, but requires higher GL version
707             // so we don't do it (and instead have ra_gl update the bindings
708             // after program creation).
709             if (sc->ra->glsl_vulkan)
710                 ADD(dst, "layout(binding=%d) ", u->input.binding);
711             ADD(dst, "uniform %s %s;\n", u->glsl_type, u->input.name);
712             break;
713         case RA_VARTYPE_BUF_RO:
714             ADD(dst, "layout(std140, binding=%d) uniform %s { %s };\n",
715                 u->input.binding, u->input.name, u->buffer_format);
716             break;
717         case RA_VARTYPE_BUF_RW:
718             ADD(dst, "layout(std430, binding=%d) restrict coherent buffer %s { %s };\n",
719                 u->input.binding, u->input.name, u->buffer_format);
720             break;
721         case RA_VARTYPE_IMG_W: {
722             // For better compatibility, we have to explicitly label the
723             // type of data we will be reading/writing to this image.
724             const char *fmt = u->v.tex->params.format->glsl_format;
725 
726             if (sc->ra->glsl_vulkan) {
727                 if (fmt) {
728                     ADD(dst, "layout(binding=%d, %s) ", u->input.binding, fmt);
729                 } else {
730                     ADD(dst, "layout(binding=%d) ", u->input.binding);
731                 }
732             } else if (fmt) {
733                 ADD(dst, "layout(%s) ", fmt);
734             }
735             ADD(dst, "uniform restrict %s %s;\n", u->glsl_type, u->input.name);
736         }
737         }
738     }
739 }
740 
741 // 1. Generate vertex and fragment shaders from the fragment shader text added
742 //    with gl_sc_add(). The generated shader program is cached (based on the
743 //    text), so actual compilation happens only the first time.
744 // 2. Update the uniforms and textures set with gl_sc_uniform_*.
745 // 3. Make the new shader program current (glUseProgram()).
746 // After that, you render, and then you call gc_sc_reset(), which does:
747 // 1. Unbind the program and all textures.
748 // 2. Reset the sc state and prepare for a new shader program. (All uniforms
749 //    and fragment operations needed for the next program have to be re-added.)
gl_sc_generate(struct gl_shader_cache * sc,enum ra_renderpass_type type,const struct ra_format * target_format,const struct ra_renderpass_input * vao,int vao_len,size_t vertex_stride)750 static void gl_sc_generate(struct gl_shader_cache *sc,
751                            enum ra_renderpass_type type,
752                            const struct ra_format *target_format,
753                            const struct ra_renderpass_input *vao,
754                            int vao_len, size_t vertex_stride)
755 {
756     int glsl_version = sc->ra->glsl_version;
757     int glsl_es = sc->ra->glsl_es ? glsl_version : 0;
758 
759     sc->params.type = type;
760 
761     // gl_sc_reset() must be called after ending the previous render process,
762     // and before starting a new one.
763     assert(!sc->needs_reset);
764     sc->needs_reset = true;
765 
766     // If using a UBO, pick a binding (needed for shader generation)
767     if (sc->ubo_size)
768         sc->ubo_binding = gl_sc_next_binding(sc, RA_VARTYPE_BUF_RO);
769 
770     for (int n = 0; n < MP_ARRAY_SIZE(sc->tmp); n++)
771         sc->tmp[n].len = 0;
772 
773     // set up shader text (header + uniforms + body)
774     bstr *header = &sc->tmp[0];
775     ADD(header, "#version %d%s\n", glsl_version, glsl_es >= 300 ? " es" : "");
776     if (type == RA_RENDERPASS_TYPE_COMPUTE) {
777         // This extension cannot be enabled in fragment shader. Enable it as
778         // an exception for compute shader.
779         ADD(header, "#extension GL_ARB_compute_shader : enable\n");
780     }
781     for (int n = 0; n < sc->num_exts; n++)
782         ADD(header, "#extension %s : enable\n", sc->exts[n]);
783     if (glsl_es) {
784         ADD(header, "#ifdef GL_FRAGMENT_PRECISION_HIGH\n");
785         ADD(header, "precision highp float;\n");
786         ADD(header, "#else\n");
787         ADD(header, "precision mediump float;\n");
788         ADD(header, "#endif\n");
789 
790         ADD(header, "precision mediump sampler2D;\n");
791         if (sc->ra->caps & RA_CAP_TEX_3D)
792             ADD(header, "precision mediump sampler3D;\n");
793     }
794 
795     if (glsl_version >= 130) {
796         ADD(header, "#define tex1D texture\n");
797         ADD(header, "#define tex3D texture\n");
798     } else {
799         ADD(header, "#define tex1D texture1D\n");
800         ADD(header, "#define tex3D texture3D\n");
801         ADD(header, "#define texture texture2D\n");
802     }
803 
804     // Additional helpers.
805     ADD(header, "#define LUT_POS(x, lut_size)"
806                 " mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x))\n");
807 
808     char *vert_in = glsl_version >= 130 ? "in" : "attribute";
809     char *vert_out = glsl_version >= 130 ? "out" : "varying";
810     char *frag_in = glsl_version >= 130 ? "in" : "varying";
811 
812     struct bstr *vert = NULL, *frag = NULL, *comp = NULL;
813 
814     if (type == RA_RENDERPASS_TYPE_RASTER) {
815         // vertex shader: we don't use the vertex shader, so just setup a
816         // dummy, which passes through the vertex array attributes.
817         bstr *vert_head = &sc->tmp[1];
818         ADD_BSTR(vert_head, *header);
819         bstr *vert_body = &sc->tmp[2];
820         ADD(vert_body, "void main() {\n");
821         bstr *frag_vaos = &sc->tmp[3];
822         for (int n = 0; n < vao_len; n++) {
823             const struct ra_renderpass_input *e = &vao[n];
824             const char *glsl_type = vao_glsl_type(e);
825             char loc[32] = {0};
826             if (sc->ra->glsl_vulkan)
827                 snprintf(loc, sizeof(loc), "layout(location=%d) ", n);
828             if (strcmp(e->name, "position") == 0) {
829                 // setting raster pos. requires setting gl_Position magic variable
830                 assert(e->dim_v == 2 && e->type == RA_VARTYPE_FLOAT);
831                 ADD(vert_head, "%s%s vec2 vertex_position;\n", loc, vert_in);
832                 ADD(vert_body, "gl_Position = vec4(vertex_position, 1.0, 1.0);\n");
833             } else {
834                 ADD(vert_head, "%s%s %s vertex_%s;\n", loc, vert_in, glsl_type, e->name);
835                 ADD(vert_head, "%s%s %s %s;\n", loc, vert_out, glsl_type, e->name);
836                 ADD(vert_body, "%s = vertex_%s;\n", e->name, e->name);
837                 ADD(frag_vaos, "%s%s %s %s;\n", loc, frag_in, glsl_type, e->name);
838             }
839         }
840         ADD(vert_body, "}\n");
841         vert = vert_head;
842         ADD_BSTR(vert, *vert_body);
843 
844         // fragment shader; still requires adding used uniforms and VAO elements
845         frag = &sc->tmp[4];
846         ADD_BSTR(frag, *header);
847         if (glsl_version >= 130) {
848             ADD(frag, "%sout vec4 out_color;\n",
849                 sc->ra->glsl_vulkan ? "layout(location=0) " : "");
850         }
851         ADD_BSTR(frag, *frag_vaos);
852         add_uniforms(sc, frag);
853 
854         ADD_BSTR(frag, sc->prelude_text);
855         ADD_BSTR(frag, sc->header_text);
856 
857         ADD(frag, "void main() {\n");
858         // we require _all_ frag shaders to write to a "vec4 color"
859         ADD(frag, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n");
860         ADD_BSTR(frag, sc->text);
861         if (glsl_version >= 130) {
862             ADD(frag, "out_color = color;\n");
863         } else {
864             ADD(frag, "gl_FragColor = color;\n");
865         }
866         ADD(frag, "}\n");
867 
868         // We need to fix the format of the render dst at renderpass creation
869         // time
870         assert(target_format);
871         sc->params.target_format = target_format;
872     }
873 
874     if (type == RA_RENDERPASS_TYPE_COMPUTE) {
875         comp = &sc->tmp[4];
876         ADD_BSTR(comp, *header);
877 
878         add_uniforms(sc, comp);
879 
880         ADD_BSTR(comp, sc->prelude_text);
881         ADD_BSTR(comp, sc->header_text);
882 
883         ADD(comp, "void main() {\n");
884         ADD(comp, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n"); // convenience
885         ADD_BSTR(comp, sc->text);
886         ADD(comp, "}\n");
887     }
888 
889     bstr *hash_total = &sc->tmp[5];
890 
891     ADD(hash_total, "type %d\n", sc->params.type);
892 
893     if (frag) {
894         ADD_BSTR(hash_total, *frag);
895         sc->params.frag_shader = frag->start;
896     }
897     ADD(hash_total, "\n");
898     if (vert) {
899         ADD_BSTR(hash_total, *vert);
900         sc->params.vertex_shader = vert->start;
901     }
902     ADD(hash_total, "\n");
903     if (comp) {
904         ADD_BSTR(hash_total, *comp);
905         sc->params.compute_shader = comp->start;
906     }
907     ADD(hash_total, "\n");
908 
909     if (sc->params.enable_blend) {
910         ADD(hash_total, "blend %d %d %d %d\n",
911             sc->params.blend_src_rgb, sc->params.blend_dst_rgb,
912             sc->params.blend_src_alpha, sc->params.blend_dst_alpha);
913     }
914 
915     if (sc->params.target_format)
916         ADD(hash_total, "format %s\n", sc->params.target_format->name);
917 
918     struct sc_entry *entry = NULL;
919     for (int n = 0; n < sc->num_entries; n++) {
920         struct sc_entry *cur = sc->entries[n];
921         if (bstr_equals(cur->total, *hash_total)) {
922             entry = cur;
923             break;
924         }
925     }
926     if (!entry) {
927         if (sc->num_entries == SC_MAX_ENTRIES)
928             sc_flush_cache(sc);
929         entry = talloc_ptrtype(NULL, entry);
930         *entry = (struct sc_entry){
931             .total = bstrdup(entry, *hash_total),
932             .timer = timer_pool_create(sc->ra),
933         };
934 
935         // The vertex shader uses mangled names for the vertex attributes, so
936         // that the fragment shader can use the "real" names. But the shader is
937         // expecting the vertex attribute names (at least with older GLSL
938         // targets for GL).
939         sc->params.vertex_stride = vertex_stride;
940         for (int n = 0; n < vao_len; n++) {
941             struct ra_renderpass_input attrib = vao[n];
942             attrib.name = talloc_asprintf(entry, "vertex_%s", attrib.name);
943             MP_TARRAY_APPEND(sc, sc->params.vertex_attribs,
944                              sc->params.num_vertex_attribs, attrib);
945         }
946 
947         for (int n = 0; n < sc->num_uniforms; n++) {
948             struct sc_cached_uniform u = {0};
949             if (sc->uniforms[n].type == SC_UNIFORM_TYPE_GLOBAL) {
950                 // global uniforms need to be made visible to the ra_renderpass
951                 u.index = sc->params.num_inputs;
952                 MP_TARRAY_APPEND(sc, sc->params.inputs, sc->params.num_inputs,
953                                  sc->uniforms[n].input);
954             }
955             MP_TARRAY_APPEND(entry, entry->cached_uniforms,
956                              entry->num_cached_uniforms, u);
957         }
958         if (!create_pass(sc, entry))
959             sc->error_state = true;
960         MP_TARRAY_APPEND(sc, sc->entries, sc->num_entries, entry);
961     }
962 
963     if (!entry->pass) {
964         sc->current_shader = NULL;
965         return;
966     }
967 
968     assert(sc->num_uniforms == entry->num_cached_uniforms);
969 
970     sc->num_values = 0;
971     for (int n = 0; n < sc->num_uniforms; n++)
972         update_uniform(sc, entry, &sc->uniforms[n], n);
973 
974     // If we're using a UBO, make sure to bind it as well
975     if (sc->ubo_size) {
976         struct ra_renderpass_input_val ubo_val = {
977             .index = entry->ubo_index,
978             .data = &entry->ubo,
979         };
980         MP_TARRAY_APPEND(sc, sc->values, sc->num_values, ubo_val);
981     }
982 
983     sc->current_shader = entry;
984 }
985 
gl_sc_dispatch_draw(struct gl_shader_cache * sc,struct ra_tex * target,bool discard,const struct ra_renderpass_input * vao,int vao_len,size_t vertex_stride,void * vertices,size_t num_vertices)986 struct mp_pass_perf gl_sc_dispatch_draw(struct gl_shader_cache *sc,
987                                         struct ra_tex *target, bool discard,
988                                         const struct ra_renderpass_input *vao,
989                                         int vao_len, size_t vertex_stride,
990                                         void *vertices, size_t num_vertices)
991 {
992     struct timer_pool *timer = NULL;
993 
994     sc->params.invalidate_target = discard;
995     gl_sc_generate(sc, RA_RENDERPASS_TYPE_RASTER, target->params.format,
996                    vao, vao_len, vertex_stride);
997     if (!sc->current_shader)
998         goto error;
999 
1000     timer = sc->current_shader->timer;
1001 
1002     struct mp_rect full_rc = {0, 0, target->params.w, target->params.h};
1003 
1004     struct ra_renderpass_run_params run = {
1005         .pass = sc->current_shader->pass,
1006         .values = sc->values,
1007         .num_values = sc->num_values,
1008         .push_constants = sc->current_shader->pushc,
1009         .target = target,
1010         .vertex_data = vertices,
1011         .vertex_count = num_vertices,
1012         .viewport = full_rc,
1013         .scissors = full_rc,
1014     };
1015 
1016     timer_pool_start(timer);
1017     sc->ra->fns->renderpass_run(sc->ra, &run);
1018     timer_pool_stop(timer);
1019 
1020 error:
1021     gl_sc_reset(sc);
1022     return timer_pool_measure(timer);
1023 }
1024 
gl_sc_dispatch_compute(struct gl_shader_cache * sc,int w,int h,int d)1025 struct mp_pass_perf gl_sc_dispatch_compute(struct gl_shader_cache *sc,
1026                                            int w, int h, int d)
1027 {
1028     struct timer_pool *timer = NULL;
1029 
1030     gl_sc_generate(sc, RA_RENDERPASS_TYPE_COMPUTE, NULL, NULL, 0, 0);
1031     if (!sc->current_shader)
1032         goto error;
1033 
1034     timer = sc->current_shader->timer;
1035 
1036     struct ra_renderpass_run_params run = {
1037         .pass = sc->current_shader->pass,
1038         .values = sc->values,
1039         .num_values = sc->num_values,
1040         .push_constants = sc->current_shader->pushc,
1041         .compute_groups = {w, h, d},
1042     };
1043 
1044     timer_pool_start(timer);
1045     sc->ra->fns->renderpass_run(sc->ra, &run);
1046     timer_pool_stop(timer);
1047 
1048 error:
1049     gl_sc_reset(sc);
1050     return timer_pool_measure(timer);
1051 }
1052