1 /*
2 * This file is part of libplacebo.
3 *
4 * libplacebo is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * libplacebo is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "gpu.h"
19 #include "common.h"
20 #include "formats.h"
21 #include "utils.h"
22
23 #ifdef PL_HAVE_UNIX
24 #include <unistd.h>
25 #endif
26
27 #ifdef PL_HAVE_WIN32
28 #include <windows.h>
29 #include <sysinfoapi.h>
30 #endif
31
32 static const struct pl_gpu_fns pl_fns_gl;
33
gl_destroy_gpu(pl_gpu gpu)34 static void gl_destroy_gpu(pl_gpu gpu)
35 {
36 struct pl_gl *p = PL_PRIV(gpu);
37
38 pl_gpu_finish(gpu);
39 while (p->callbacks.num > 0)
40 gl_poll_callbacks(gpu);
41
42 pl_free((void *) gpu);
43 }
44
45 #ifdef EPOXY_HAS_EGL
46
tex_handle_caps(pl_gpu gpu,bool import)47 static pl_handle_caps tex_handle_caps(pl_gpu gpu, bool import)
48 {
49 pl_handle_caps caps = 0;
50 struct pl_gl *p = PL_PRIV(gpu);
51
52 if (!p->egl_dpy)
53 return 0;
54
55 if (import) {
56 if (epoxy_has_egl_extension(p->egl_dpy, "EXT_image_dma_buf_import"))
57 caps |= PL_HANDLE_DMA_BUF;
58 } else if (!import && p->egl_ctx) {
59 if (epoxy_has_egl_extension(p->egl_dpy, "EGL_MESA_image_dma_buf_export"))
60 caps |= PL_HANDLE_DMA_BUF;
61 }
62
63 return caps;
64 }
65
66 #endif // EPOXY_HAS_EGL
67
get_page_size()68 static inline size_t get_page_size()
69 {
70
71 #ifdef PL_HAVE_UNIX
72 return sysconf(_SC_PAGESIZE);
73 #endif
74
75 #ifdef PL_HAVE_WIN32
76 SYSTEM_INFO sysInfo;
77 GetSystemInfo(&sysInfo);
78 return sysInfo.dwAllocationGranularity;
79 #endif
80
81 pl_assert(!"Unsupported platform!");
82 }
83
84 #define get(pname, field) \
85 do { \
86 GLint tmp = 0; \
87 glGetIntegerv((pname), &tmp); \
88 *(field) = tmp; \
89 } while (0)
90
91 #define geti(pname, i, field) \
92 do { \
93 GLint tmp = 0; \
94 glGetIntegeri_v((pname), i, &tmp); \
95 *(field) = tmp; \
96 } while (0)
97
pl_gpu_create_gl(pl_log log,pl_opengl gl,const struct pl_opengl_params * params)98 pl_gpu pl_gpu_create_gl(pl_log log, pl_opengl gl, const struct pl_opengl_params *params)
99 {
100 struct pl_gpu *gpu = pl_zalloc_obj(NULL, gpu, struct pl_gl);
101 gpu->log = log;
102 gpu->ctx = gpu->log;
103
104 struct pl_gl *p = PL_PRIV(gpu);
105 p->impl = pl_fns_gl;
106 p->gl = gl;
107
108 struct pl_glsl_version *glsl = &gpu->glsl;
109 int ver = epoxy_gl_version();
110 glsl->gles = !epoxy_is_desktop_gl();
111 p->gl_ver = glsl->gles ? 0 : ver;
112 p->gles_ver = glsl->gles ? ver : 0;
113
114 // If possible, query the GLSL version from the implementation
115 const char *glslver = glGetString(GL_SHADING_LANGUAGE_VERSION);
116 if (glslver) {
117 PL_INFO(gpu, " GL_SHADING_LANGUAGE_VERSION: %s", glslver);
118 int major = 0, minor = 0;
119 if (sscanf(glslver, "%d.%d", &major, &minor) == 2)
120 glsl->version = major * 100 + minor;
121 }
122
123 if (!glsl->version) {
124 // Otherwise, use the fixed magic versions 200 and 300 for early GLES,
125 // and otherwise fall back to 110 if all else fails.
126 if (p->gles_ver >= 30) {
127 glsl->version = 300;
128 } else if (p->gles_ver >= 20) {
129 glsl->version = 200;
130 } else {
131 glsl->version = 110;
132 }
133 }
134
135 if (gl_test_ext(gpu, "GL_ARB_compute_shader", 43, 0)) {
136 glsl->compute = true;
137 get(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE, &glsl->max_shmem_size);
138 get(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &glsl->max_group_threads);
139 for (int i = 0; i < 3; i++)
140 geti(GL_MAX_COMPUTE_WORK_GROUP_SIZE, i, &glsl->max_group_size[i]);
141 }
142
143 if (gl_test_ext(gpu, "GL_ARB_texture_gather", 40, 0)) {
144 get(GL_MAX_PROGRAM_TEXTURE_GATHER_COMPONENTS_ARB, &p->gather_comps);
145 get(GL_MIN_PROGRAM_TEXTURE_GATHER_OFFSET_ARB, &glsl->min_gather_offset);
146 get(GL_MAX_PROGRAM_TEXTURE_GATHER_OFFSET_ARB, &glsl->max_gather_offset);
147 }
148
149 // Query all device limits
150 struct pl_gpu_limits *limits = &gpu->limits;
151 limits->thread_safe = params->make_current;
152 limits->callbacks = gl_test_ext(gpu, "GL_ARB_sync", 32, 30);
153 if (gl_test_ext(gpu, "GL_ARB_pixel_buffer_object", 31, 0))
154 limits->max_buf_size = SIZE_MAX; // no restriction imposed by GL
155 if (gl_test_ext(gpu, "GL_ARB_uniform_buffer_object", 31, 0))
156 get(GL_MAX_UNIFORM_BLOCK_SIZE, &limits->max_ubo_size);
157 if (gl_test_ext(gpu, "GL_ARB_shader_storage_buffer_object", 43, 0))
158 get(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &limits->max_ssbo_size);
159 limits->max_vbo_size = limits->max_buf_size; // No additional restrictions
160 if (gl_test_ext(gpu, "GL_ARB_buffer_storage", 44, 0))
161 limits->max_mapped_size = limits->max_buf_size;
162
163 get(GL_MAX_TEXTURE_SIZE, &limits->max_tex_2d_dim);
164 if (gl_test_ext(gpu, "GL_EXT_texture3D", 21, 30))
165 get(GL_MAX_3D_TEXTURE_SIZE, &limits->max_tex_3d_dim);
166 // There's no equivalent limit for 1D textures for whatever reason, so
167 // just set it to the same as the 2D limit
168 if (p->gl_ver >= 21)
169 limits->max_tex_1d_dim = limits->max_tex_2d_dim;
170 limits->buf_transfer = true;
171
172 // This doesn't really map too well to OpenGL, so just set a conservative
173 // upper bound, which assumes (optimistically) that all uniforms are
174 // individual floats.
175 size_t max_vectors = 0;
176 get(GL_MAX_FRAGMENT_UNIFORM_VECTORS, &max_vectors);
177 limits->max_variables = max_vectors * 4;
178
179 if (glsl->compute) {
180 for (int i = 0; i < 3; i++)
181 geti(GL_MAX_COMPUTE_WORK_GROUP_COUNT, i, &limits->max_dispatch[i]);
182 }
183
184 // Query import/export support
185 #ifdef EPOXY_HAS_EGL
186 p->egl_dpy = params->egl_display;
187 p->egl_ctx = params->egl_context;
188 gpu->export_caps.tex = tex_handle_caps(gpu, false);
189 gpu->import_caps.tex = tex_handle_caps(gpu, true);
190
191 if (p->egl_dpy) {
192 p->has_modifiers = epoxy_has_egl_extension(p->egl_dpy,
193 "EXT_image_dma_buf_import_modifiers");
194 }
195 #endif
196
197 if (epoxy_has_gl_extension("GL_AMD_pinned_memory")) {
198 gpu->import_caps.buf |= PL_HANDLE_HOST_PTR;
199 gpu->limits.align_host_ptr = get_page_size();
200 }
201
202 // Cache some internal capability checks
203 p->has_stride = gl_test_ext(gpu, "GL_EXT_unpack_subimage", 11, 30);
204 p->has_vao = gl_test_ext(gpu, "GL_ARB_vertex_array_object", 30, 0);
205 p->has_invalidate_fb = gl_test_ext(gpu, "GL_ARB_invalidate_subdata", 43, 30);
206 p->has_invalidate_tex = gl_test_ext(gpu, "GL_ARB_invalidate_subdata", 43, 0);
207 p->has_queries = gl_test_ext(gpu, "GL_ARB_timer_query", 33, 0);
208 p->has_fbos = gl_test_ext(gpu, "GL_ARB_framebuffer_object", 30, 20);
209 p->has_storage = gl_test_ext(gpu, "GL_ARB_shader_image_load_store", 42, 0);
210
211 // We simply don't know, so make up some values
212 limits->align_tex_xfer_offset = 32;
213 limits->align_tex_xfer_stride = 1;
214 limits->fragment_queues = 1;
215 limits->compute_queues = 1;
216 if (gl_test_ext(gpu, "GL_EXT_unpack_subimage", 11, 30))
217 limits->align_tex_xfer_stride = 4;
218
219 if (!gl_check_err(gpu, "pl_gpu_create_gl"))
220 goto error;
221
222 // Filter out error messages during format probing
223 pl_log_level_cap(gpu->log, PL_LOG_INFO);
224 bool formats_ok = gl_setup_formats(gpu);
225 pl_log_level_cap(gpu->log, PL_LOG_NONE);
226 if (!formats_ok)
227 goto error;
228
229 return pl_gpu_finalize(gpu);
230
231 error:
232 gl_destroy_gpu(gpu);
233 return NULL;
234 }
235
gl_buf_destroy(pl_gpu gpu,pl_buf buf)236 void gl_buf_destroy(pl_gpu gpu, pl_buf buf)
237 {
238 if (!MAKE_CURRENT()) {
239 PL_ERR(gpu, "Failed uninitializing buffer, leaking resources!");
240 return;
241 }
242
243 struct pl_buf_gl *buf_gl = PL_PRIV(buf);
244 if (buf_gl->fence)
245 glDeleteSync(buf_gl->fence);
246
247 if (buf_gl->mapped) {
248 glBindBuffer(GL_COPY_WRITE_BUFFER, buf_gl->buffer);
249 glUnmapBuffer(GL_COPY_WRITE_BUFFER);
250 glBindBuffer(GL_COPY_WRITE_BUFFER, 0);
251 }
252
253 glDeleteBuffers(1, &buf_gl->buffer);
254 gl_check_err(gpu, "gl_buf_destroy");
255 RELEASE_CURRENT();
256 pl_free((void *) buf);
257 }
258
gl_buf_create(pl_gpu gpu,const struct pl_buf_params * params)259 pl_buf gl_buf_create(pl_gpu gpu, const struct pl_buf_params *params)
260 {
261 if (!MAKE_CURRENT())
262 return NULL;
263
264 struct pl_buf *buf = pl_zalloc_obj(NULL, buf, struct pl_buf_gl);
265 buf->params = *params;
266 buf->params.initial_data = NULL;
267
268 struct pl_gl *p = PL_PRIV(gpu);
269 struct pl_buf_gl *buf_gl = PL_PRIV(buf);
270 buf_gl->id = ++p->buf_id;
271
272 // Just use this since the generic GL_BUFFER doesn't work
273 GLenum target = GL_COPY_WRITE_BUFFER;
274 const void *data = params->initial_data;
275 size_t total_size = params->size;
276 bool import = false;
277
278 if (params->import_handle == PL_HANDLE_HOST_PTR) {
279 const struct pl_shared_mem *shmem = ¶ms->shared_mem;
280 target = GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD;
281
282 data = shmem->handle.ptr;
283 buf_gl->offset = shmem->offset;
284 total_size = shmem->size;
285 import = true;
286
287 if (params->host_mapped)
288 buf->data = (uint8_t *) data + buf_gl->offset;
289
290 if (buf_gl->offset > 0 && params->drawable) {
291 PL_ERR(gpu, "Cannot combine non-aligned host pointer imports with "
292 "drawable (vertex) buffers! This is a design limitation, "
293 "open an issue if you absolutely need this.");
294 goto error;
295 }
296 }
297
298 glGenBuffers(1, &buf_gl->buffer);
299 glBindBuffer(target, buf_gl->buffer);
300
301 if (gl_test_ext(gpu, "GL_ARB_buffer_storage", 44, 0) && !import) {
302
303 GLbitfield mapflags = 0, storflags = 0;
304 if (params->host_writable)
305 storflags |= GL_DYNAMIC_STORAGE_BIT;
306 if (params->host_mapped) {
307 mapflags |= GL_MAP_READ_BIT | GL_MAP_WRITE_BIT |
308 GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
309 }
310 if (params->memory_type == PL_BUF_MEM_HOST)
311 storflags |= GL_CLIENT_STORAGE_BIT; // hopefully this works
312
313 glBufferStorage(target, total_size, data, storflags | mapflags);
314
315 if (params->host_mapped) {
316 buf_gl->mapped = true;
317 buf->data = glMapBufferRange(target, buf_gl->offset, params->size,
318 mapflags);
319 if (!buf->data) {
320 glBindBuffer(target, 0);
321 if (!gl_check_err(gpu, "gl_buf_create: map"))
322 PL_ERR(gpu, "Failed mapping buffer: unknown reason");
323 goto error;
324 }
325 }
326
327 } else {
328
329 // Make a random guess based on arbitrary criteria we can't know
330 GLenum hint = GL_STREAM_DRAW;
331 if (params->initial_data && !params->host_writable && !params->host_mapped)
332 hint = GL_STATIC_DRAW;
333 if (params->host_readable && !params->host_writable && !params->host_mapped)
334 hint = GL_STREAM_READ;
335 if (params->storable)
336 hint = GL_DYNAMIC_COPY;
337
338 glBufferData(target, total_size, data, hint);
339
340 if (import && glGetError() == GL_INVALID_OPERATION) {
341 PL_ERR(gpu, "Failed importing host pointer!");
342 goto error;
343 }
344
345 }
346
347 glBindBuffer(target, 0);
348 if (!gl_check_err(gpu, "gl_buf_create"))
349 goto error;
350
351 if (params->storable) {
352 buf_gl->barrier = GL_BUFFER_UPDATE_BARRIER_BIT | // for buf_copy etc.
353 GL_PIXEL_BUFFER_BARRIER_BIT | // for tex_upload
354 GL_SHADER_STORAGE_BARRIER_BIT;
355
356 if (params->host_mapped)
357 buf_gl->barrier |= GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT;
358 if (params->uniform)
359 buf_gl->barrier |= GL_UNIFORM_BARRIER_BIT;
360 if (params->drawable)
361 buf_gl->barrier |= GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT;
362 }
363
364 RELEASE_CURRENT();
365 return buf;
366
367 error:
368 gl_buf_destroy(gpu, buf);
369 RELEASE_CURRENT();
370 return NULL;
371 }
372
gl_buf_poll(pl_gpu gpu,pl_buf buf,uint64_t timeout)373 bool gl_buf_poll(pl_gpu gpu, pl_buf buf, uint64_t timeout)
374 {
375 // Non-persistently mapped buffers are always implicitly reusable in OpenGL,
376 // the implementation will create more buffers under the hood if needed.
377 if (!buf->data)
378 return false;
379
380 if (!MAKE_CURRENT())
381 return true; // conservative guess
382
383 struct pl_buf_gl *buf_gl = PL_PRIV(buf);
384 if (buf_gl->fence) {
385 GLenum res = glClientWaitSync(buf_gl->fence,
386 timeout ? GL_SYNC_FLUSH_COMMANDS_BIT : 0,
387 timeout);
388 if (res == GL_ALREADY_SIGNALED || res == GL_CONDITION_SATISFIED) {
389 glDeleteSync(buf_gl->fence);
390 buf_gl->fence = NULL;
391 }
392 }
393
394 gl_poll_callbacks(gpu);
395 RELEASE_CURRENT();
396 return !!buf_gl->fence;
397 }
398
gl_buf_write(pl_gpu gpu,pl_buf buf,size_t offset,const void * data,size_t size)399 void gl_buf_write(pl_gpu gpu, pl_buf buf, size_t offset,
400 const void *data, size_t size)
401 {
402 if (!MAKE_CURRENT())
403 return;
404
405 struct pl_buf_gl *buf_gl = PL_PRIV(buf);
406 glBindBuffer(GL_COPY_WRITE_BUFFER, buf_gl->buffer);
407 glBufferSubData(GL_COPY_WRITE_BUFFER, buf_gl->offset + offset, size, data);
408 glBindBuffer(GL_COPY_WRITE_BUFFER, 0);
409 gl_check_err(gpu, "gl_buf_write");
410 RELEASE_CURRENT();
411 }
412
gl_buf_read(pl_gpu gpu,pl_buf buf,size_t offset,void * dest,size_t size)413 bool gl_buf_read(pl_gpu gpu, pl_buf buf, size_t offset,
414 void *dest, size_t size)
415 {
416 if (!MAKE_CURRENT())
417 return false;
418
419 struct pl_buf_gl *buf_gl = PL_PRIV(buf);
420 glBindBuffer(GL_COPY_READ_BUFFER, buf_gl->buffer);
421 glGetBufferSubData(GL_COPY_READ_BUFFER, buf_gl->offset + offset, size, dest);
422 glBindBuffer(GL_COPY_READ_BUFFER, 0);
423 bool ok = gl_check_err(gpu, "gl_buf_read");
424 RELEASE_CURRENT();
425 return ok;
426 }
427
gl_buf_copy(pl_gpu gpu,pl_buf dst,size_t dst_offset,pl_buf src,size_t src_offset,size_t size)428 void gl_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset,
429 pl_buf src, size_t src_offset, size_t size)
430 {
431 if (!MAKE_CURRENT())
432 return;
433
434 struct pl_buf_gl *src_gl = PL_PRIV(src);
435 struct pl_buf_gl *dst_gl = PL_PRIV(dst);
436 glBindBuffer(GL_COPY_READ_BUFFER, src_gl->buffer);
437 glBindBuffer(GL_COPY_WRITE_BUFFER, dst_gl->buffer);
438 glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER,
439 src_gl->offset + src_offset,
440 dst_gl->offset + dst_offset, size);
441 gl_check_err(gpu, "gl_buf_copy");
442 RELEASE_CURRENT();
443 }
444
445 #define QUERY_OBJECT_NUM 8
446
447 struct pl_timer {
448 GLuint query[QUERY_OBJECT_NUM];
449 int index_write; // next index to write to
450 int index_read; // next index to read from
451 };
452
gl_timer_create(pl_gpu gpu)453 static pl_timer gl_timer_create(pl_gpu gpu)
454 {
455 struct pl_gl *p = PL_PRIV(gpu);
456 if (!p->has_queries || !MAKE_CURRENT())
457 return NULL;
458
459 pl_timer timer = pl_zalloc_ptr(NULL, timer);
460 glGenQueries(QUERY_OBJECT_NUM, timer->query);
461 RELEASE_CURRENT();
462 return timer;
463 }
464
gl_timer_destroy(pl_gpu gpu,pl_timer timer)465 static void gl_timer_destroy(pl_gpu gpu, pl_timer timer)
466 {
467 if (!MAKE_CURRENT()) {
468 PL_ERR(gpu, "Failed uninitializing timer, leaking resources!");
469 return;
470 }
471
472 glDeleteQueries(QUERY_OBJECT_NUM, timer->query);
473 gl_check_err(gpu, "gl_timer_destroy");
474 RELEASE_CURRENT();
475 pl_free(timer);
476 }
477
gl_timer_query(pl_gpu gpu,pl_timer timer)478 static uint64_t gl_timer_query(pl_gpu gpu, pl_timer timer)
479 {
480 if (timer->index_read == timer->index_write)
481 return 0; // no more unprocessed results
482
483 if (!MAKE_CURRENT())
484 return 0;
485
486 uint64_t res = 0;
487 GLuint query = timer->query[timer->index_read];
488 int avail = 0;
489 glGetQueryObjectiv(query, GL_QUERY_RESULT_AVAILABLE, &avail);
490 if (!avail)
491 goto done;
492 glGetQueryObjectui64v(query, GL_QUERY_RESULT, &res);
493
494 timer->index_read = (timer->index_read + 1) % QUERY_OBJECT_NUM;
495 // fall through
496
497 done:
498 RELEASE_CURRENT();
499 return res;
500 }
501
gl_timer_begin(pl_timer timer)502 void gl_timer_begin(pl_timer timer)
503 {
504 if (!timer)
505 return;
506
507 glBeginQuery(GL_TIME_ELAPSED, timer->query[timer->index_write]);
508 }
509
gl_timer_end(pl_timer timer)510 void gl_timer_end(pl_timer timer)
511 {
512 if (!timer)
513 return;
514
515 glEndQuery(GL_TIME_ELAPSED);
516
517 timer->index_write = (timer->index_write + 1) % QUERY_OBJECT_NUM;
518 if (timer->index_write == timer->index_read) {
519 // forcibly drop the least recent result to make space
520 timer->index_read = (timer->index_read + 1) % QUERY_OBJECT_NUM;
521 }
522 }
523
gl_gpu_flush(pl_gpu gpu)524 static void gl_gpu_flush(pl_gpu gpu)
525 {
526 if (!MAKE_CURRENT())
527 return;
528
529 glFlush();
530 gl_check_err(gpu, "gl_gpu_flush");
531 RELEASE_CURRENT();
532 }
533
gl_gpu_finish(pl_gpu gpu)534 static void gl_gpu_finish(pl_gpu gpu)
535 {
536 if (!MAKE_CURRENT())
537 return;
538
539 glFinish();
540 gl_check_err(gpu, "gl_gpu_finish");
541 RELEASE_CURRENT();
542 }
543
gl_gpu_is_failed(pl_gpu gpu)544 static bool gl_gpu_is_failed(pl_gpu gpu)
545 {
546 struct pl_gl *gl = PL_PRIV(gpu);
547 return gl->failed;
548 }
549
550 static const struct pl_gpu_fns pl_fns_gl = {
551 .destroy = gl_destroy_gpu,
552 .tex_create = gl_tex_create,
553 .tex_destroy = gl_tex_destroy,
554 .tex_invalidate = gl_tex_invalidate,
555 .tex_clear_ex = gl_tex_clear_ex,
556 .tex_blit = gl_tex_blit,
557 .tex_upload = gl_tex_upload,
558 .tex_download = gl_tex_download,
559 .buf_create = gl_buf_create,
560 .buf_destroy = gl_buf_destroy,
561 .buf_write = gl_buf_write,
562 .buf_read = gl_buf_read,
563 .buf_copy = gl_buf_copy,
564 .buf_poll = gl_buf_poll,
565 .desc_namespace = gl_desc_namespace,
566 .pass_create = gl_pass_create,
567 .pass_destroy = gl_pass_destroy,
568 .pass_run = gl_pass_run,
569 .timer_create = gl_timer_create,
570 .timer_destroy = gl_timer_destroy,
571 .timer_query = gl_timer_query,
572 .gpu_flush = gl_gpu_flush,
573 .gpu_finish = gl_gpu_finish,
574 .gpu_is_failed = gl_gpu_is_failed,
575 };
576