1 #include "HalideRuntimeOpenGL.h"
2 #include "device_interface.h"
3 #include "mini_opengl.h"
4 #include "printer.h"
5
6 // This constant is used to indicate that the application will take
7 // responsibility for binding the output render target before calling the
8 // Halide function.
9 #define HALIDE_OPENGL_RENDER_TARGET ((uint64_t)-1)
10
11 // Implementation note: all function that directly or indirectly access the
12 // runtime state in halide_opengl_state must be declared as WEAK, otherwise
13 // the behavior at runtime is undefined.
14
15 // List of all OpenGL functions used by the runtime. The list is used to
16 // declare and initialize the dispatch table in OpenGLState below.
17 #define USED_GL_FUNCTIONS \
18 GLFUNC(PFNGLDELETETEXTURESPROC, DeleteTextures); \
19 GLFUNC(PFNGLGENTEXTURESPROC, GenTextures); \
20 GLFUNC(PFNGLBINDTEXTUREPROC, BindTexture); \
21 GLFUNC(PFNGLGETERRORPROC, GetError); \
22 GLFUNC(PFNGLVIEWPORTPROC, Viewport); \
23 GLFUNC(PFNGLGENBUFFERSPROC, GenBuffers); \
24 GLFUNC(PFNGLDELETEBUFFERSPROC, DeleteBuffers); \
25 GLFUNC(PFNGLBINDBUFFERPROC, BindBuffer); \
26 GLFUNC(PFNGLBUFFERDATAPROC, BufferData); \
27 GLFUNC(PFNGLTEXPARAMETERIPROC, TexParameteri); \
28 GLFUNC(PFNGLTEXIMAGE2DPROC, TexImage2D); \
29 GLFUNC(PFNGLTEXSUBIMAGE2DPROC, TexSubImage2D); \
30 GLFUNC(PFNGLDISABLEPROC, Disable); \
31 GLFUNC(PFNGLDISABLEPROC, Enable); \
32 GLFUNC(PFNGLCREATESHADERPROC, CreateShader); \
33 GLFUNC(PFNGLACTIVETEXTUREPROC, ActiveTexture); \
34 GLFUNC(PFNGLSHADERSOURCEPROC, ShaderSource); \
35 GLFUNC(PFNGLCOMPILESHADERPROC, CompileShader); \
36 GLFUNC(PFNGLGETSHADERIVPROC, GetShaderiv); \
37 GLFUNC(PFNGLGETSHADERINFOLOGPROC, GetShaderInfoLog); \
38 GLFUNC(PFNGLDELETESHADERPROC, DeleteShader); \
39 GLFUNC(PFNGLCREATEPROGRAMPROC, CreateProgram); \
40 GLFUNC(PFNGLATTACHSHADERPROC, AttachShader); \
41 GLFUNC(PFNGLLINKPROGRAMPROC, LinkProgram); \
42 GLFUNC(PFNGLGETPROGRAMIVPROC, GetProgramiv); \
43 GLFUNC(PFNGLGETPROGRAMINFOLOGPROC, GetProgramInfoLog); \
44 GLFUNC(PFNGLUSEPROGRAMPROC, UseProgram); \
45 GLFUNC(PFNGLDELETEPROGRAMPROC, DeleteProgram); \
46 GLFUNC(PFNGLGETUNIFORMLOCATIONPROC, GetUniformLocation); \
47 GLFUNC(PFNGLUNIFORM1IVPROC, Uniform1iv); \
48 GLFUNC(PFNGLUNIFORM2IVPROC, Uniform2iv); \
49 GLFUNC(PFNGLUNIFORM2IVPROC, Uniform4iv); \
50 GLFUNC(PFNGLUNIFORM1FVPROC, Uniform1fv); \
51 GLFUNC(PFNGLUNIFORM1FVPROC, Uniform4fv); \
52 GLFUNC(PFNGLGENFRAMEBUFFERSPROC, GenFramebuffers); \
53 GLFUNC(PFNGLDELETEFRAMEBUFFERSPROC, DeleteFramebuffers); \
54 GLFUNC(PFNGLCHECKFRAMEBUFFERSTATUSPROC, CheckFramebufferStatus); \
55 GLFUNC(PFNGLBINDFRAMEBUFFERPROC, BindFramebuffer); \
56 GLFUNC(PFNGLFRAMEBUFFERTEXTURE2DPROC, FramebufferTexture2D); \
57 GLFUNC(PFNGLGETATTRIBLOCATIONPROC, GetAttribLocation); \
58 GLFUNC(PFNGLVERTEXATTRIBPOINTERPROC, VertexAttribPointer); \
59 GLFUNC(PFNGLDRAWELEMENTSPROC, DrawElements); \
60 GLFUNC(PFNGLENABLEVERTEXATTRIBARRAYPROC, EnableVertexAttribArray); \
61 GLFUNC(PFNGLDISABLEVERTEXATTRIBARRAYPROC, DisableVertexAttribArray); \
62 GLFUNC(PFNGLGETVERTEXATTRIBIVPROC, GetVertexAttribiv); \
63 GLFUNC(PFNGLPIXELSTOREIPROC, PixelStorei); \
64 GLFUNC(PFNGLREADPIXELS, ReadPixels); \
65 GLFUNC(PFNGLGETSTRINGPROC, GetString); \
66 GLFUNC(PFNGLGETINTEGERV, GetIntegerv); \
67 GLFUNC(PFNGLGETBOOLEANV, GetBooleanv); \
68 GLFUNC(PFNGLFINISHPROC, Finish);
69
70 // List of all OpenGL functions used by the runtime, which may not
71 // exist due to an older or less capable version of GL. In using any
72 // of these functions, code must test if they are NULL.
73 #define OPTIONAL_GL_FUNCTIONS \
74 GLFUNC(PFNGLGENVERTEXARRAYS, GenVertexArrays); \
75 GLFUNC(PFNGLBINDVERTEXARRAY, BindVertexArray); \
76 GLFUNC(PFNGLDELETEVERTEXARRAYS, DeleteVertexArrays); \
77 GLFUNC(PFNDRAWBUFFERS, DrawBuffers)
78
79 // ---------- Types ----------
80
81 using namespace Halide::Runtime::Internal;
82
83 namespace Halide {
84 namespace Runtime {
85 namespace Internal {
86 namespace OpenGL {
87
88 extern WEAK halide_device_interface_t opengl_device_interface;
89
gl_error_name(int32_t err)90 WEAK const char *gl_error_name(int32_t err) {
91 const char *result;
92 switch (err) {
93 case 0x500:
94 result = "GL_INVALID_ENUM";
95 break;
96 case 0x501:
97 result = "GL_INVALID_VALUE";
98 break;
99 case 0x502:
100 result = "GL_INVALID_OPERATION";
101 break;
102 case 0x503:
103 result = "GL_STACK_OVERFLOW";
104 break;
105 case 0x504:
106 result = "GL_STACK_UNDERFLOW";
107 break;
108 case 0x505:
109 result = "GL_OUT_OF_MEMORY";
110 break;
111 case 0x506:
112 result = "GL_INVALID_FRAMEBUFFER_OPERATION";
113 break;
114 case 0x507:
115 result = "GL_CONTEXT_LOST";
116 break;
117 case 0x8031:
118 result = "GL_TABLE_TOO_LARGE";
119 break;
120 default:
121 result = "<unknown GL error>";
122 break;
123 }
124 return result;
125 }
126
127 struct HalideMalloc {
HalideMallocHalide::Runtime::Internal::OpenGL::HalideMalloc128 ALWAYS_INLINE HalideMalloc(void *user_context, size_t size)
129 : user_context(user_context), ptr(halide_malloc(user_context, size)) {
130 }
~HalideMallocHalide::Runtime::Internal::OpenGL::HalideMalloc131 ALWAYS_INLINE ~HalideMalloc() {
132 halide_free(user_context, ptr);
133 }
134 void *const user_context;
135 void *const ptr;
136 };
137
138 enum OpenGLProfile {
139 OpenGL,
140 OpenGLES
141 };
142
143 struct Argument {
144 // The kind of data stored in an argument
145 enum Kind {
146 Invalid,
147 Uniform, // uniform variable
148 Varying, // varying attribute
149 Inbuf, // input texture
150 Outbuf // output texture
151 };
152
153 // The elementary data type of the argument
154 enum Type {
155 Void,
156 Bool,
157 Float,
158 Int8,
159 Int16,
160 Int32,
161 UInt8,
162 UInt16,
163 UInt32
164 };
165
166 char *name;
167 Kind kind;
168 Type type;
169 Argument *next;
170 };
171
172 struct KernelInfo {
173 char *name;
174 char *source;
175 Argument *arguments;
176 GLuint shader_id;
177 GLuint program_id;
178 };
179
180 struct ModuleState {
181 KernelInfo *kernel;
182 ModuleState *next;
183 };
184
185 // All persistent state maintained by the runtime.
186 struct GlobalState {
187 void init();
188 bool CheckAndReportError(void *user_context, const char *location);
189
190 bool initialized;
191
192 // Information about the OpenGL platform we're running on.
193 OpenGLProfile profile;
194 int major_version, minor_version;
195 bool have_vertex_array_objects;
196 bool have_texture_rg;
197 bool have_texture_float;
198 bool have_texture_rgb8_rgba8;
199
200 // Various objects shared by all filter kernels
201 GLuint framebuffer_id;
202 GLuint vertex_array_object;
203 GLuint vertex_buffer;
204 GLuint element_buffer;
205
206 // Declare pointers used OpenGL functions
207 #define GLFUNC(PTYPE, VAR) PTYPE VAR
208 USED_GL_FUNCTIONS;
209 OPTIONAL_GL_FUNCTIONS;
210 #undef GLFUNC
211 };
212
CheckAndReportError(void * user_context,const char * location)213 WEAK bool GlobalState::CheckAndReportError(void *user_context, const char *location) {
214 GLenum err = GetError();
215 if (err != GL_NO_ERROR) {
216 error(user_context) << "OpenGL error " << gl_error_name(err) << "(" << (int)err << ")"
217 << " at " << location << ".\n";
218 return true;
219 }
220 return false;
221 }
222
223 WEAK GlobalState global_state;
224
225 // Saves & restores OpenGL state
226 class GLStateSaver {
227 public:
GLStateSaver()228 ALWAYS_INLINE GLStateSaver() {
229 save();
230 }
~GLStateSaver()231 ALWAYS_INLINE ~GLStateSaver() {
232 restore();
233 }
234
235 private:
236 // The state variables
237 GLint active_texture;
238 GLint array_buffer_binding;
239 GLint element_array_buffer_binding;
240 GLint framebuffer_binding;
241 GLint program;
242 GLint vertex_array_binding;
243 GLint viewport[4];
244 GLboolean cull_face;
245 GLboolean depth_test;
246 int max_combined_texture_image_units;
247 GLint *texture_2d_binding;
248 int max_vertex_attribs;
249 GLint *vertex_attrib_array_enabled;
250
251 // Define these out-of-line as WEAK, to avoid LLVM error "MachO doesn't support COMDATs"
252 void save();
253 void restore();
254 };
255
save()256 WEAK void GLStateSaver::save() {
257 global_state.GetIntegerv(GL_ACTIVE_TEXTURE, &active_texture);
258 global_state.GetIntegerv(GL_ARRAY_BUFFER_BINDING, &array_buffer_binding);
259 global_state.GetIntegerv(GL_ELEMENT_ARRAY_BUFFER_BINDING, &element_array_buffer_binding);
260 global_state.GetIntegerv(GL_FRAMEBUFFER_BINDING, &framebuffer_binding);
261 global_state.GetIntegerv(GL_CURRENT_PROGRAM, &program);
262 global_state.GetBooleanv(GL_CULL_FACE, &cull_face);
263 global_state.GetBooleanv(GL_DEPTH_TEST, &depth_test);
264 global_state.GetIntegerv(GL_VIEWPORT, viewport);
265
266 global_state.GetIntegerv(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS, &max_combined_texture_image_units);
267 texture_2d_binding = (GLint *)malloc(max_combined_texture_image_units * sizeof(GLint));
268 for (int i = 0; i < max_combined_texture_image_units; i++) {
269 global_state.ActiveTexture(GL_TEXTURE0 + i);
270 global_state.GetIntegerv(GL_TEXTURE_BINDING_2D, &texture_2d_binding[i]);
271 }
272
273 global_state.GetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_vertex_attribs);
274 vertex_attrib_array_enabled = (GLint *)malloc(max_vertex_attribs * sizeof(GLint));
275 for (int i = 0; i < max_vertex_attribs; i++) {
276 global_state.GetVertexAttribiv(i, GL_VERTEX_ATTRIB_ARRAY_ENABLED, &vertex_attrib_array_enabled[i]);
277 }
278
279 if (global_state.have_vertex_array_objects) {
280 global_state.GetIntegerv(GL_VERTEX_ARRAY_BINDING, &vertex_array_binding);
281 }
282
283 #ifdef DEBUG_RUNTIME
284 debug(NULL) << "Saved OpenGL state\n";
285 #endif
286 }
287
restore()288 WEAK void GLStateSaver::restore() {
289 #ifdef DEBUG_RUNTIME
290 debug(NULL) << "Restoring OpenGL state\n";
291 #endif
292
293 for (int i = 0; i < max_combined_texture_image_units; i++) {
294 global_state.ActiveTexture(GL_TEXTURE0 + i);
295 global_state.BindTexture(GL_TEXTURE_2D, texture_2d_binding[i]);
296 }
297 free(texture_2d_binding);
298
299 for (int i = 0; i < max_vertex_attribs; i++) {
300 if (vertex_attrib_array_enabled[i])
301 global_state.EnableVertexAttribArray(i);
302 else
303 global_state.DisableVertexAttribArray(i);
304 }
305 free(vertex_attrib_array_enabled);
306
307 if (global_state.have_vertex_array_objects) {
308 global_state.BindVertexArray(vertex_array_binding);
309 }
310
311 global_state.ActiveTexture(active_texture);
312 global_state.BindFramebuffer(GL_FRAMEBUFFER, framebuffer_binding);
313 global_state.BindBuffer(GL_ARRAY_BUFFER, array_buffer_binding);
314 global_state.BindBuffer(GL_ELEMENT_ARRAY_BUFFER, element_array_buffer_binding);
315 global_state.UseProgram(program);
316 global_state.Viewport(viewport[0], viewport[1], viewport[2], viewport[3]);
317 (cull_face ? global_state.Enable : global_state.Disable)(GL_CULL_FACE);
318 (depth_test ? global_state.Enable : global_state.Disable)(GL_DEPTH_TEST);
319 }
320
321 // A list of module-specific state. Each module corresponds to a single Halide filter
322 WEAK ModuleState *state_list;
323
324 WEAK const char *kernel_marker = "/// KERNEL ";
325 WEAK const char *input_marker = "/// IN_BUFFER ";
326 WEAK const char *output_marker = "/// OUT_BUFFER ";
327 WEAK const char *uniform_marker = "/// UNIFORM ";
328 WEAK const char *varying_marker = "/// VARYING ";
329
330 // ---------- Helper functions ----------
331
strndup(const char * s,size_t n)332 WEAK char *strndup(const char *s, size_t n) {
333 char *p = (char *)malloc(n + 1);
334 memcpy(p, s, n);
335 p[n] = '\0';
336 return p;
337 }
338
339 // Strip whitespace from the right side of
340 // a string
strstrip(char * str,size_t n)341 WEAK char *strstrip(char *str, size_t n) {
342 char *pos = str;
343 while (pos != str + n && *pos != '\0' && *pos != '\n' && *pos != ' ') {
344 pos++;
345 }
346 *pos = '\0';
347 return str;
348 }
349
debug_buffer(void * user_context,halide_buffer_t * buf)350 WEAK void debug_buffer(void *user_context, halide_buffer_t *buf) {
351 debug(user_context) << *buf << "\n";
352 }
353
make_shader(void * user_context,GLenum type,const char * source,GLint * length)354 WEAK GLuint make_shader(void *user_context, GLenum type,
355 const char *source, GLint *length) {
356 #ifdef DEBUG_RUNTIME
357 {
358 debug(user_context) << ((type == GL_VERTEX_SHADER) ? "GL_VERTEX_SHADER" : "GL_FRAGMENT_SHADER")
359 << " SOURCE:\n";
360 // debug() will go thru Printer<> which has a fixed, non-growing size.
361 // Just pass the source directly to halide_print instead, so it won't get clipped.
362 halide_print(user_context, source);
363 }
364 #endif
365
366 GLuint shader = global_state.CreateShader(type);
367 if (global_state.CheckAndReportError(user_context, "make_shader(1)")) {
368 return 1;
369 }
370 if (*source == '\0') {
371 debug(user_context) << "Halide GLSL: passed shader source is empty, using default.\n";
372 const char *default_shader = "varying vec2 pixcoord;\n void main() { }";
373 global_state.ShaderSource(shader, 1, (const GLchar **)&default_shader, NULL);
374 } else {
375 global_state.ShaderSource(shader, 1, (const GLchar **)&source, length);
376 }
377 if (global_state.CheckAndReportError(user_context, "make_shader(2)")) {
378 return 1;
379 }
380 global_state.CompileShader(shader);
381 if (global_state.CheckAndReportError(user_context, "make_shader(3)")) {
382 return 1;
383 }
384
385 GLint shader_ok = 0;
386 global_state.GetShaderiv(shader, GL_COMPILE_STATUS, &shader_ok);
387 if (!shader_ok) {
388 print(user_context) << "Could not compile shader:\n";
389 GLint log_len;
390 global_state.GetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_len);
391 HalideMalloc log_tmp(user_context, log_len);
392 if (log_tmp.ptr) {
393 char *log = (char *)log_tmp.ptr;
394 global_state.GetShaderInfoLog(shader, log_len, NULL, log);
395 print(user_context) << log << "\n";
396 }
397 global_state.DeleteShader(shader);
398 return 0;
399 }
400 return shader;
401 }
402
403 // Check whether string starts with a given prefix.
404 // Returns pointer to character after matched prefix if successful or NULL.
match_prefix(const char * s,const char * prefix)405 WEAK const char *match_prefix(const char *s, const char *prefix) {
406 if (0 == strncmp(s, prefix, strlen(prefix))) {
407 return s + strlen(prefix);
408 }
409 return NULL;
410 }
411
412 // Parse declaration of the form "type name" and construct matching Argument.
parse_argument(void * user_context,const char * src,const char * end)413 WEAK Argument *parse_argument(void *user_context, const char *src,
414 const char *end) {
415 const char *name;
416 Argument::Type type = Argument::Void;
417 if ((name = match_prefix(src, "float "))) {
418 type = Argument::Float;
419 } else if ((name = match_prefix(src, "bool "))) {
420 type = Argument::Bool;
421 } else if ((name = match_prefix(src, "int8_t "))) {
422 type = Argument::Int8;
423 } else if ((name = match_prefix(src, "int16_t "))) {
424 type = Argument::Int16;
425 } else if ((name = match_prefix(src, "int32_t "))) {
426 type = Argument::Int32;
427 } else if ((name = match_prefix(src, "uint8_t "))) {
428 type = Argument::UInt8;
429 } else if ((name = match_prefix(src, "uint16_t "))) {
430 type = Argument::UInt16;
431 } else if ((name = match_prefix(src, "uint32_t "))) {
432 type = Argument::UInt32;
433 }
434 if (type == Argument::Void) {
435 error(user_context) << "Internal error: argument type not supported";
436 return NULL;
437 }
438
439 Argument *arg = (Argument *)malloc(sizeof(Argument));
440 arg->name = strndup(name, end - name);
441 arg->type = type;
442 arg->kind = Argument::Invalid;
443 arg->next = 0;
444 return arg;
445 }
446
447 // Create KernelInfo for a piece of GLSL code
create_kernel(void * user_context,const char * src,int size)448 WEAK KernelInfo *create_kernel(void *user_context, const char *src, int size) {
449 KernelInfo *kernel = (KernelInfo *)malloc(sizeof(KernelInfo));
450
451 kernel->source = strndup(src, size);
452 kernel->arguments = NULL;
453 kernel->program_id = 0;
454
455 debug(user_context) << "Compiling GLSL kernel (size = " << size << "):\n";
456
457 // Parse initial comment block
458 const char *line = kernel->source;
459 while (*line) {
460 const char *next_line = strchr(line, '\n') + 1;
461 if (!next_line)
462 next_line = line + size;
463
464 const char *args;
465 if ((args = match_prefix(line, kernel_marker))) {
466 // set name
467 kernel->name = strstrip(strndup(args, next_line - args), next_line - args);
468 } else if ((args = match_prefix(line, uniform_marker))) {
469 if (Argument *arg =
470 parse_argument(user_context, args, next_line - 1)) {
471 arg->kind = Argument::Uniform;
472 arg->next = kernel->arguments;
473 kernel->arguments = arg;
474 } else {
475 halide_error(user_context, "Invalid VAR marker");
476 goto error;
477 }
478 } else if ((args = match_prefix(line, varying_marker))) {
479 if (Argument *arg =
480 parse_argument(user_context, args, next_line - 1)) {
481 arg->kind = Argument::Varying;
482 arg->next = kernel->arguments;
483 kernel->arguments = arg;
484 } else {
485 halide_error(user_context, "Invalid VARYING marker");
486 goto error;
487 }
488 } else if ((args = match_prefix(line, input_marker))) {
489 if (Argument *arg = parse_argument(user_context, args, next_line - 1)) {
490 arg->kind = Argument::Inbuf;
491 arg->next = kernel->arguments;
492 kernel->arguments = arg;
493 } else {
494 error(user_context) << "Invalid IN_BUFFER marker";
495 goto error;
496 }
497 } else if ((args = match_prefix(line, output_marker))) {
498 if (Argument *arg = parse_argument(user_context, args, next_line - 1)) {
499 arg->kind = Argument::Outbuf;
500 arg->next = kernel->arguments;
501 kernel->arguments = arg;
502 } else {
503 error(user_context) << "Invalid OUT_BUFFER marker";
504 goto error;
505 }
506 } else {
507 // Stop parsing if we encounter something we don't recognize
508 break;
509 }
510 line = next_line;
511 }
512
513 // Arguments are currently in reverse order, flip the list.
514 {
515 Argument *cur = kernel->arguments;
516 kernel->arguments = NULL;
517 while (cur) {
518 Argument *next = cur->next;
519 cur->next = kernel->arguments;
520 kernel->arguments = cur;
521 cur = next;
522 }
523 }
524
525 return kernel;
526 error:
527 free(kernel);
528 return NULL;
529 }
530
531 // Delete all data associated with a kernel. Also release associated OpenGL
532 // shader and program.
delete_kernel(void * user_context,KernelInfo * kernel)533 WEAK void delete_kernel(void *user_context, KernelInfo *kernel) {
534 global_state.DeleteProgram(kernel->program_id);
535 #if 0 // TODO figure out why this got deleted.
536 global_state.DeleteShader(kernel->shader_id);
537 #endif
538
539 Argument *arg = kernel->arguments;
540 while (arg) {
541 Argument *next = arg->next;
542 free(arg->name);
543 free(arg);
544 arg = next;
545 }
546 free(kernel->source);
547 free(kernel->name);
548 free(kernel);
549 }
550
551 // Vertices and their order in a triangle strip for rendering a quad
552 // ranging from (-1,-1) to (1,1).
553 WEAK GLfloat quad_vertices[] = {
554 -1.0f, -1.0f, 1.0f, -1.0f,
555 -1.0f, 1.0f, 1.0f, 1.0f};
556 WEAK GLuint quad_indices[] = {0, 1, 2, 3};
557
init()558 WEAK void GlobalState::init() {
559 initialized = false;
560 profile = OpenGL;
561 major_version = 2;
562 minor_version = 0;
563 framebuffer_id = 0;
564 vertex_array_object = vertex_buffer = element_buffer = 0;
565 have_vertex_array_objects = false;
566 have_texture_rg = false;
567 have_texture_rgb8_rgba8 = false;
568 // Initialize all GL function pointers to NULL
569 #define GLFUNC(type, name) name = NULL;
570 USED_GL_FUNCTIONS;
571 OPTIONAL_GL_FUNCTIONS;
572 #undef GLFUNC
573 }
574
load_gl_func(void * user_context,const char * name,void ** ptr,bool required)575 WEAK int load_gl_func(void *user_context, const char *name, void **ptr, bool required) {
576 void *p = halide_opengl_get_proc_address(user_context, name);
577 if (!p && required) {
578 error(user_context) << "Could not load function pointer for " << name;
579 return -1;
580 }
581 *ptr = p;
582 return 0;
583 }
584
extension_supported(void * user_context,const char * name)585 WEAK bool extension_supported(void *user_context, const char *name) {
586 // Iterate over space delimited extension strings. Note that glGetStringi
587 // is not part of GL ES 2.0, and not reliable in all implementations of
588 // GL ES 3.0.
589 const char *start = (const char *)global_state.GetString(GL_EXTENSIONS);
590 if (!start) {
591 return false;
592 }
593 while (const char *pos = strstr(start, name)) {
594 const char *end = pos + strlen(name);
595 // Ensure the found match is a full word, not a substring.
596 if ((pos == start || pos[-1] == ' ') &&
597 (*end == ' ' || *end == '\0')) {
598 return true;
599 }
600 start = end;
601 }
602
603 return false;
604 }
605
606 // Check for availability of various version- and extension-specific features
607 // and hook up functions pointers as necessary
init_extensions(void * user_context)608 WEAK void init_extensions(void *user_context) {
609 if (global_state.major_version >= 3) { // This is likely valid for both OpenGL and OpenGL ES
610 load_gl_func(user_context, "glGenVertexArrays", (void **)&global_state.GenVertexArrays, false);
611 load_gl_func(user_context, "glBindVertexArray", (void **)&global_state.BindVertexArray, false);
612 load_gl_func(user_context, "glDeleteVertexArrays", (void **)&global_state.DeleteVertexArrays, false);
613 if (global_state.GenVertexArrays && global_state.BindVertexArray && global_state.DeleteVertexArrays) {
614 global_state.have_vertex_array_objects = true;
615 }
616 }
617 load_gl_func(user_context, "glDrawBuffers", (void **)&global_state.DrawBuffers, false);
618
619 global_state.have_texture_rg =
620 global_state.major_version >= 3 ||
621 (global_state.profile == OpenGL &&
622 extension_supported(user_context, "GL_ARB_texture_rg")) ||
623 (global_state.profile == OpenGLES &&
624 extension_supported(user_context, "GL_EXT_texture_rg"));
625
626 global_state.have_texture_rgb8_rgba8 =
627 global_state.major_version >= 3 ||
628 (global_state.profile == OpenGLES &&
629 extension_supported(user_context, "GL_OES_rgb8_rgba8"));
630
631 global_state.have_texture_float =
632 (global_state.major_version >= 3) ||
633 (global_state.profile == OpenGL &&
634 extension_supported(user_context, "GL_ARB_texture_float")) ||
635 (global_state.profile == OpenGLES &&
636 extension_supported(user_context, "GL_OES_texture_float"));
637 }
638
parse_int(const char * str,int * val)639 WEAK const char *parse_int(const char *str, int *val) {
640 int v = 0;
641 size_t i = 0;
642 while (str[i] >= '0' && str[i] <= '9') {
643 v = 10 * v + (str[i] - '0');
644 i++;
645 }
646 if (i > 0) {
647 *val = v;
648 return &str[i];
649 }
650 return NULL;
651 }
652
parse_opengl_version(const char * str,int * major,int * minor)653 WEAK const char *parse_opengl_version(const char *str, int *major, int *minor) {
654 str = parse_int(str, major);
655 if (str == NULL || *str != '.') {
656 return NULL;
657 }
658 return parse_int(str + 1, minor);
659 }
660
661 // Initialize the OpenGL-specific parts of the runtime.
halide_opengl_init(void * user_context)662 WEAK int halide_opengl_init(void *user_context) {
663 if (global_state.initialized) {
664 return 0;
665 }
666
667 #ifdef DEBUG_RUNTIME
668 halide_start_clock(user_context);
669 #endif
670
671 global_state.init();
672
673 // Make a context if there isn't one
674 if (halide_opengl_create_context(user_context)) {
675 error(user_context) << "Failed to make OpenGL context";
676 return -1;
677 }
678
679 // Initialize pointers to core OpenGL functions.
680 #define GLFUNC(TYPE, VAR) \
681 if (load_gl_func(user_context, "gl" #VAR, (void **)&global_state.VAR, true) < 0) { \
682 return -1; \
683 }
684 USED_GL_FUNCTIONS;
685 #undef GLFUNC
686
687 const char *version = (const char *)global_state.GetString(GL_VERSION);
688 const char *gles_version = match_prefix(version, "OpenGL ES ");
689 int major, minor;
690 if (gles_version && parse_opengl_version(gles_version, &major, &minor)) {
691 global_state.profile = OpenGLES;
692 global_state.major_version = major;
693 global_state.minor_version = minor;
694 } else if (parse_opengl_version(version, &major, &minor)) {
695 global_state.profile = OpenGL;
696 global_state.major_version = major;
697 global_state.minor_version = minor;
698 } else {
699 global_state.profile = OpenGL;
700 global_state.major_version = 2;
701 global_state.minor_version = 0;
702 }
703 init_extensions(user_context);
704 debug(user_context)
705 << "Halide running on OpenGL " << ((global_state.profile == OpenGL) ? "" : "ES ") << major << "." << minor << "\n"
706 << " vertex_array_objects: " << (global_state.have_vertex_array_objects ? "yes\n" : "no\n")
707 << " texture_rg: " << (global_state.have_texture_rg ? "yes\n" : "no\n")
708 << " have_texture_rgb8_rgba8: " << (global_state.have_texture_rgb8_rgba8 ? "yes\n" : "no\n")
709 << " texture_float: " << (global_state.have_texture_float ? "yes\n" : "no\n");
710
711 // Initialize framebuffer.
712 global_state.GenFramebuffers(1, &global_state.framebuffer_id);
713 if (global_state.CheckAndReportError(user_context, "halide_opengl_init GenFramebuffers")) {
714 return 1;
715 }
716
717 // Initialize vertex and element buffers.
718 GLuint buf[2];
719 global_state.GenBuffers(2, buf);
720 global_state.BindBuffer(GL_ARRAY_BUFFER, buf[0]);
721 global_state.BufferData(GL_ARRAY_BUFFER, sizeof(quad_vertices), quad_vertices, GL_STATIC_DRAW);
722 global_state.BindBuffer(GL_ARRAY_BUFFER, 0);
723 global_state.BindBuffer(GL_ELEMENT_ARRAY_BUFFER, buf[1]);
724 global_state.BufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(quad_indices), quad_indices, GL_STATIC_DRAW);
725 global_state.BindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
726 global_state.vertex_buffer = buf[0];
727 global_state.element_buffer = buf[1];
728
729 if (global_state.have_vertex_array_objects) {
730 global_state.GenVertexArrays(1, &global_state.vertex_array_object);
731 if (global_state.CheckAndReportError(user_context, "halide_opengl_init GenVertexArrays")) {
732 return 1;
733 }
734 }
735
736 global_state.initialized = true;
737 return 0;
738 }
739
740 // Release all data allocated by the runtime.
741 //
742 // The OpenGL context itself is generally managed by the host application, so
743 // we leave it untouched.
halide_opengl_device_release(void * user_context)744 WEAK int halide_opengl_device_release(void *user_context) {
745 if (!global_state.initialized) {
746 return 0;
747 }
748
749 debug(user_context) << "halide_opengl_release\n";
750 global_state.DeleteFramebuffers(1, &global_state.framebuffer_id);
751
752 ModuleState *mod = state_list;
753 while (mod) {
754 delete_kernel(user_context, mod->kernel);
755 mod->kernel = NULL;
756 ModuleState *next = mod->next;
757 // do not call free(mod) to avoid dangling pointers: the module state
758 // is still referenced in the code generated by Halide (see
759 // CodeGen_GPU_Host::get_module_state).
760 mod = next;
761 }
762
763 global_state.DeleteBuffers(1, &global_state.vertex_buffer);
764 global_state.DeleteBuffers(1, &global_state.element_buffer);
765 if (global_state.have_vertex_array_objects) {
766 global_state.DeleteVertexArrays(1, &global_state.vertex_array_object);
767 }
768
769 global_state = GlobalState();
770
771 return 0;
772 }
773
774 // Determine OpenGL texture format and channel type for a given halide_buffer_t.
get_texture_format(void * user_context,halide_buffer_t * buf,GLint * internal_format,GLint * format,GLint * type)775 WEAK bool get_texture_format(void *user_context, halide_buffer_t *buf,
776 GLint *internal_format, GLint *format, GLint *type) {
777 if (buf->type == halide_type_of<uint8_t>()) {
778 *type = GL_UNSIGNED_BYTE;
779 } else if (buf->type == halide_type_of<uint16_t>()) {
780 *type = GL_UNSIGNED_SHORT;
781 } else if (buf->type == halide_type_of<float>()) {
782 *type = GL_FLOAT;
783 } else {
784 error(user_context) << "OpenGL: Only uint8, uint16, and float textures are supported.";
785 return false;
786 }
787
788 const int channels = (buf->dimensions > 2) ? buf->dim[2].extent : 0;
789
790 // GL_LUMINANCE and GL_LUMINANCE_ALPHA aren't color-renderable in ES2, period,
791 // thus can't be read back via ReadPixels, thus are nearly useless to us.
792 // GL_RED and GL_RG are technically optional in ES2 (required in ES3),
793 // but as a practical matter, they are supported on pretty much every recent device
794 // (iOS: everything >= iPhone 4s; Android: everything >= 4.3 plus various older devices).
795 // This is definitely suboptimal; the only real alternative would be to implement
796 // these as GL_RGB or GL_RGBA, ignoring the extra channels.
797 if (channels <= 2 && !global_state.have_texture_rg) {
798 error(user_context) << "OpenGL: 1 and 2 channel textures are not supported for this version of OpenGL.";
799 return false;
800 }
801
802 // Common formats supported by both GLES 2.0 and GL 2.1 are selected below
803 //
804 switch (channels) {
805 case 0:
806 case 1:
807 *format = GL_RED;
808 break;
809 case 2:
810 *format = GL_RG;
811 break;
812 case 3:
813 *format = GL_RGB;
814 break;
815 case 4:
816 *format = GL_RGBA;
817 break;
818 default:
819 error(user_context) << "OpenGL: Invalid number of color channels: " << channels;
820 return false;
821 }
822
823 switch (global_state.profile) {
824 case OpenGLES:
825 // For OpenGL ES, the texture format has to match the pixel format
826 // since there no conversion is performed during texture transfers.
827 // See OES_texture_float.
828 *internal_format = *format;
829 break;
830 case OpenGL:
831 // For desktop OpenGL, the internal format specifiers include the
832 // precise data type, see ARB_texture_float.
833 if (*type == GL_FLOAT) {
834 switch (*format) {
835 case GL_RED:
836 case GL_RG:
837 case GL_RGB:
838 case GL_RGBA:
839 *internal_format = GL_RGBA32F;
840 break;
841 default:
842 error(user_context) << "OpenGL: Cannot select internal format for format " << *format;
843 return false;
844 }
845 } else {
846 *internal_format = *format;
847 }
848 break;
849 }
850
851 return true;
852 }
853
854 // This function returns the width, height and number of color channels that the
855 // texture for the specified halide_buffer_t will contain. It provides a single place
856 // to implement the logic snapping zero sized dimensions to one element.
get_texture_dimensions(void * user_context,halide_buffer_t * buf,GLint * width,GLint * height,GLint * channels)857 WEAK bool get_texture_dimensions(void *user_context, halide_buffer_t *buf, GLint *width,
858 GLint *height, GLint *channels) {
859 if (buf->dimensions > 3) {
860 error(user_context) << "The GL backend supports buffers of at most 3 dimensions\n";
861 return false;
862 }
863
864 *width = buf->dim[0].extent;
865 if (*width == 0) {
866 error(user_context) << "Invalid dim[0].extent: " << *width << "\n";
867 return false;
868 }
869
870 // GLES 2.0 supports GL_TEXTURE_2D (plus cube map), but not 1d or 3d. If we
871 // end up with a buffer that has a zero extent, set the corresponding size
872 // to one.
873 *height = (buf->dimensions > 1) ? buf->dim[1].extent : 1;
874 *channels = (buf->dimensions > 2) ? buf->dim[2].extent : 1;
875
876 return true;
877 }
878
879 // Allocate a new texture matching the dimension and color format of the
880 // specified buffer.
halide_opengl_device_malloc(void * user_context,halide_buffer_t * buf)881 WEAK int halide_opengl_device_malloc(void *user_context, halide_buffer_t *buf) {
882 if (int error = halide_opengl_init(user_context)) {
883 return error;
884 }
885
886 if (!buf) {
887 error(user_context) << "Invalid buffer";
888 return 1;
889 }
890
891 // If the texture was already created by the host application, check that
892 // it has the correct format. Otherwise, allocate and set up an
893 // appropriate texture.
894 GLuint tex = 0;
895 bool halide_allocated = false;
896
897 if (buf->device) {
898 #ifdef HAVE_GLES3
899 // Look up the width and the height from the existing texture. Note that
900 // glGetTexLevelParameteriv does not support GL_TEXTURE_WIDTH or
901 // GL_TEXTURE_HEIGHT in GLES 2.0
902 GLint width, height;
903 global_state.BindTexture(GL_TEXTURE_2D, tex);
904 global_state.GetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &width);
905 global_state.GetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &height);
906 if (global_state.CheckAndReportError(user_context, "halide_opengl_device_malloc binding texture (GLES3)")) {
907 return 1;
908 }
909 if (width < buf->dim[0].extent || height < buf->dim[1].extent) {
910 error(user_context)
911 << "Existing texture is smaller than buffer. "
912 << "Texture size: " << width << "x" << height
913 << ", buffer size: " << buf->dim[0].extent << "x" << buf->dim[1].extent;
914 return 1;
915 }
916 #endif
917 uint64_t handle = buf->device;
918 tex = (handle == HALIDE_OPENGL_RENDER_TARGET) ? 0 : (GLuint)handle;
919 } else {
920 if (buf->dimensions > 3) {
921 error(user_context) << "high-dimensional textures are not supported";
922 return 1;
923 }
924
925 // Generate texture ID
926 global_state.GenTextures(1, &tex);
927 if (global_state.CheckAndReportError(user_context, "halide_opengl_device_malloc GenTextures")) {
928 global_state.DeleteTextures(1, &tex);
929 return 1;
930 }
931
932 // Set parameters for this texture: no interpolation and clamp to edges.
933 global_state.BindTexture(GL_TEXTURE_2D, tex);
934 global_state.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
935 global_state.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
936 global_state.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
937 global_state.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
938 if (global_state.CheckAndReportError(user_context, "halide_opengl_device_malloc binding texture")) {
939 global_state.DeleteTextures(1, &tex);
940 return 1;
941 }
942
943 // Create empty texture here and fill it with glTexSubImage2D later.
944 GLint internal_format, format, type;
945 if (!get_texture_format(user_context, buf, &internal_format, &format, &type)) {
946 error(user_context) << "Invalid texture format";
947 global_state.DeleteTextures(1, &tex);
948 return 1;
949 }
950
951 GLint width, height, channels;
952 if (!get_texture_dimensions(user_context, buf, &width, &height, &channels)) {
953 error(user_context) << "Invalid texture dimensions";
954 return 1;
955 }
956
957 global_state.TexImage2D(GL_TEXTURE_2D, 0, internal_format, width, height, 0, format, type, NULL);
958 if (global_state.CheckAndReportError(user_context, "halide_opengl_device_malloc TexImage2D")) {
959 global_state.DeleteTextures(1, &tex);
960 return 1;
961 }
962
963 buf->device = tex;
964 buf->device_interface = &opengl_device_interface;
965 buf->device_interface->impl->use_module();
966 halide_allocated = true;
967 debug(user_context) << "Allocated texture " << tex
968 << " of size " << width << " x " << height << "\n";
969
970 global_state.BindTexture(GL_TEXTURE_2D, 0);
971 }
972
973 return 0;
974 }
975
976 // Delete all texture information associated with a buffer.
halide_opengl_device_free(void * user_context,halide_buffer_t * buf)977 WEAK int halide_opengl_device_free(void *user_context, halide_buffer_t *buf) {
978 if (!global_state.initialized) {
979 error(user_context) << "OpenGL runtime not initialized in call to halide_opengl_device_free.";
980 return 1;
981 }
982
983 if (buf->device == 0) {
984 return 0;
985 }
986
987 uint64_t handle = buf->device;
988 GLuint tex = (handle == HALIDE_OPENGL_RENDER_TARGET) ? 0 : (GLuint)handle;
989
990 int result = 0;
991 debug(user_context) << "halide_opengl_device_free: Deleting texture " << tex << "\n";
992 global_state.DeleteTextures(1, &tex);
993 if (global_state.CheckAndReportError(user_context, "halide_opengl_device_free DeleteTextures")) {
994 result = 1;
995 // do not return: we want to zero out the interface and
996 // device fields even if we can't delete the texture.
997 }
998 buf->device = 0;
999 buf->device_interface->impl->release_module();
1000 buf->device_interface = NULL;
1001
1002 return result;
1003 }
1004
1005 // Can't use std::min, std::max in Halide runtime.
1006 template<typename T>
std_min(T a,T b)1007 ALWAYS_INLINE T std_min(T a, T b) {
1008 return (a < b) ? a : b;
1009 }
1010 template<typename T>
std_max(T a,T b)1011 ALWAYS_INLINE T std_max(T a, T b) {
1012 return (a > b) ? a : b;
1013 }
1014
1015 // This method copies image data from the layout specified by the strides of the
1016 // halide_buffer_t to the packed interleaved format needed by GL. It is assumed that
1017 // src and dst have the same number of channels.
1018 template<class T>
halide_to_interleaved(const halide_buffer_t * src_buf,T * dst)1019 ALWAYS_INLINE void halide_to_interleaved(const halide_buffer_t *src_buf, T *dst) {
1020 const T *src = reinterpret_cast<const T *>(src_buf->host);
1021 int width = (src_buf->dimensions > 0) ? src_buf->dim[0].extent : 1;
1022 int height = (src_buf->dimensions > 1) ? src_buf->dim[1].extent : 1;
1023 int channels = (src_buf->dimensions > 2) ? src_buf->dim[2].extent : 1;
1024 int x_stride = (src_buf->dimensions > 0) ? src_buf->dim[0].stride : 0;
1025 int y_stride = (src_buf->dimensions > 1) ? src_buf->dim[1].stride : 0;
1026 int c_stride = (src_buf->dimensions > 2) ? src_buf->dim[2].stride : 0;
1027 for (int y = 0; y < height; y++) {
1028 int dstidx = y * width * channels;
1029 for (int x = 0; x < width; x++) {
1030 int srcidx = y * y_stride + x * x_stride;
1031 for (int c = 0; c < channels; c++) {
1032 dst[dstidx] = src[srcidx];
1033 srcidx += c_stride;
1034 dstidx += 1;
1035 }
1036 }
1037 }
1038 }
1039
1040 // This method copies image data from the packed interleaved format needed by GL
1041 // to the arbitrary strided layout specified by the halide_buffer_t. If src has fewer
1042 // channels than dst, the excess in dst will be left untouched; if src has
1043 // more channels than dst, the excess will be ignored.
1044 template<class T>
interleaved_to_halide(void * user_context,const T * src,int src_channels,halide_buffer_t * dst_buf)1045 ALWAYS_INLINE void interleaved_to_halide(void *user_context, const T *src, int src_channels, halide_buffer_t *dst_buf) {
1046 T *dst = reinterpret_cast<T *>(dst_buf->host);
1047 int width = (dst_buf->dimensions > 0) ? dst_buf->dim[0].extent : 1;
1048 int height = (dst_buf->dimensions > 1) ? dst_buf->dim[1].extent : 1;
1049 int dst_channels = (dst_buf->dimensions > 2) ? dst_buf->dim[2].extent : 1;
1050 int x_stride = (dst_buf->dimensions > 0) ? dst_buf->dim[0].stride : 0;
1051 int y_stride = (dst_buf->dimensions > 1) ? dst_buf->dim[1].stride : 0;
1052 int c_stride = (dst_buf->dimensions > 2) ? dst_buf->dim[2].stride : 0;
1053 int src_skip = std_max(0, src_channels - dst_channels);
1054 int channels = std_min<int>(src_channels, dst_channels);
1055
1056 for (int y = 0; y < height; y++) {
1057 int srcidx = y * width * src_channels;
1058 for (int x = 0; x < width; x++) {
1059 int dstidx = y * y_stride + x * x_stride;
1060 for (int c = 0; c < channels; c++) {
1061 dst[dstidx] = src[srcidx];
1062 srcidx += 1;
1063 dstidx += c_stride;
1064 }
1065 srcidx += src_skip;
1066 }
1067 }
1068 }
1069
1070 // Copy image data from host memory to texture.
halide_opengl_copy_to_device(void * user_context,halide_buffer_t * buf)1071 WEAK int halide_opengl_copy_to_device(void *user_context, halide_buffer_t *buf) {
1072 if (!global_state.initialized) {
1073 error(user_context) << "OpenGL runtime not initialized (halide_opengl_copy_to_device).";
1074 return 1;
1075 }
1076
1077 GLStateSaver state_saver;
1078
1079 int err = halide_opengl_device_malloc(user_context, buf);
1080 if (err) {
1081 return err;
1082 }
1083
1084 if (!buf->host || !buf->device) {
1085 debug_buffer(user_context, buf);
1086 error(user_context) << "Invalid copy_to_device operation: host or device NULL";
1087 return 1;
1088 }
1089
1090 uint64_t handle = buf->device;
1091 if (handle == HALIDE_OPENGL_RENDER_TARGET) {
1092 // TODO: this isn't correct; we want to ensure we copy to the current render_target.
1093 debug(user_context) << "halide_opengl_copy_to_device: called for HALIDE_OPENGL_RENDER_TARGET\n";
1094 return 0;
1095 }
1096 GLuint tex = (GLuint)handle;
1097 debug(user_context) << "halide_opengl_copy_to_device: " << tex << "\n";
1098
1099 global_state.BindTexture(GL_TEXTURE_2D, tex);
1100 if (global_state.CheckAndReportError(user_context, "halide_opengl_copy_to_device BindTexture")) {
1101 return 1;
1102 }
1103 GLint internal_format, format, type;
1104 if (!get_texture_format(user_context, buf, &internal_format, &format, &type)) {
1105 error(user_context) << "Invalid texture format";
1106 return 1;
1107 }
1108
1109 GLint width, height, buffer_channels;
1110 if (!get_texture_dimensions(user_context, buf, &width, &height, &buffer_channels)) {
1111 error(user_context) << "Invalid texture dimensions";
1112 return 1;
1113 }
1114
1115 // To use TexSubImage2D directly, the colors must be stored interleaved
1116 // and rows must be stored consecutively.
1117 // (Single-channel buffers are "interleaved" for our purposes here.)
1118 bool is_interleaved = (buffer_channels == 1) || (buf->dim[2].stride == 1 && buf->dim[0].stride == buf->dim[2].extent);
1119 bool is_packed = (buf->dim[1].stride == buf->dim[0].extent * buf->dim[0].stride);
1120 if (is_interleaved && is_packed) {
1121 global_state.PixelStorei(GL_UNPACK_ALIGNMENT, 1);
1122 global_state.TexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, format, type, buf->host);
1123 if (global_state.CheckAndReportError(user_context, "halide_opengl_copy_to_device TexSubImage2D(1)")) {
1124 return 1;
1125 }
1126 } else {
1127 debug(user_context)
1128 << "Warning: In copy_to_device, host buffer is not interleaved. Doing slow interleave.\n";
1129
1130 size_t texture_size = width * height * buffer_channels * buf->type.bytes();
1131 HalideMalloc tmp(user_context, texture_size);
1132 if (!tmp.ptr) {
1133 error(user_context) << "halide_malloc failed inside copy_to_device";
1134 return -1;
1135 }
1136
1137 switch (type) {
1138 case GL_UNSIGNED_BYTE:
1139 halide_to_interleaved<uint8_t>(buf, (uint8_t *)tmp.ptr);
1140 break;
1141 case GL_UNSIGNED_SHORT:
1142 halide_to_interleaved<uint16_t>(buf, (uint16_t *)tmp.ptr);
1143 break;
1144 case GL_FLOAT:
1145 halide_to_interleaved<float>(buf, (float *)tmp.ptr);
1146 break;
1147 }
1148
1149 global_state.PixelStorei(GL_UNPACK_ALIGNMENT, 1);
1150 global_state.TexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, format, type, tmp.ptr);
1151 if (global_state.CheckAndReportError(user_context, "halide_opengl_copy_to_device TexSubImage2D(2)")) {
1152 return 1;
1153 }
1154 }
1155
1156 return 0;
1157 }
1158
1159 // Copy image data from texture back to host memory.
halide_opengl_copy_to_host(void * user_context,halide_buffer_t * buf)1160 WEAK int halide_opengl_copy_to_host(void *user_context, halide_buffer_t *buf) {
1161 if (!global_state.initialized) {
1162 error(user_context) << "OpenGL runtime not initialized (halide_opengl_copy_to_host).";
1163 return 1;
1164 }
1165
1166 GLStateSaver state_saver;
1167
1168 if (!buf->host || !buf->device) {
1169 debug_buffer(user_context, buf);
1170 error(user_context) << "Invalid copy_to_host operation: host or dev NULL";
1171 return 1;
1172 }
1173
1174 GLint internal_format, format, type;
1175 if (!get_texture_format(user_context, buf, &internal_format, &format, &type)) {
1176 error(user_context) << "Invalid texture format";
1177 return 1;
1178 }
1179
1180 GLint width, height, buffer_channels;
1181 if (!get_texture_dimensions(user_context, buf, &width, &height, &buffer_channels)) {
1182 error(user_context) << "Invalid texture dimensions";
1183 return 1;
1184 }
1185 GLint texture_channels = buffer_channels;
1186
1187 uint64_t handle = buf->device;
1188 if (handle != HALIDE_OPENGL_RENDER_TARGET) {
1189 GLuint tex = (GLuint)handle;
1190 debug(user_context) << "halide_copy_to_host: texture " << tex << "\n";
1191 global_state.BindFramebuffer(GL_FRAMEBUFFER, global_state.framebuffer_id);
1192 if (global_state.CheckAndReportError(user_context, "copy_to_host BindFramebuffer")) {
1193 return 1;
1194 }
1195 global_state.FramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, tex, 0);
1196 if (global_state.CheckAndReportError(user_context, "copy_to_host FramebufferTexture2D")) {
1197 return 1;
1198 }
1199 } else {
1200 debug(user_context) << "halide_copy_to_host: HALIDE_OPENGL_RENDER_TARGET\n";
1201 }
1202
1203 // Check that framebuffer is set up correctly
1204 GLenum status = global_state.CheckFramebufferStatus(GL_FRAMEBUFFER);
1205 if (status != GL_FRAMEBUFFER_COMPLETE) {
1206 error(user_context)
1207 << "Setting up GL framebuffer " << global_state.framebuffer_id << " failed " << status;
1208 return 1;
1209 }
1210
1211 // The only format/type pairs guaranteed to be readable in GLES2 are GL_RGBA+GL_UNSIGNED_BYTE,
1212 // plus one other implementation-dependent pair specified here. Spoiler alert:
1213 // some ES2 implementations return that very same pair here (i.e., they don't support
1214 // any other formats); in that case, we need to read as RGBA and manually convert to
1215 // what we need (usually GL_RGB).
1216 // NOTE: this requires the currently-bound Framebuffer is correct.
1217 // TODO: short and float will require even more effort on top of this.
1218 if (global_state.profile == OpenGLES && format == GL_RGB) {
1219 GLint extra_format, extra_type;
1220 global_state.GetIntegerv(GL_IMPLEMENTATION_COLOR_READ_TYPE, &extra_type);
1221 if (type != GL_UNSIGNED_BYTE && type != extra_type) {
1222 error(user_context) << "ReadPixels does not support our type; we don't handle this yet.\n";
1223 return 1;
1224 }
1225 global_state.GetIntegerv(GL_IMPLEMENTATION_COLOR_READ_FORMAT, &extra_format);
1226 if (format != GL_RGBA && format != extra_format) {
1227 debug(user_context) << "ReadPixels does not support our format; falling back to GL_RGBA\n";
1228 format = GL_RGBA;
1229 texture_channels = 4;
1230 }
1231 }
1232
1233 // To download the texture directly, the colors must be stored interleaved
1234 // and rows must be stored consecutively.
1235 // (Single-channel buffers are "interleaved" for our purposes here.)
1236 bool is_interleaved = (buffer_channels == 1) || (buf->dim[2].stride == 1 && buf->dim[0].stride == buf->dim[2].extent);
1237 bool is_packed = (buf->dim[1].stride == buf->dim[0].extent * buf->dim[0].stride);
1238 if (is_interleaved && is_packed && texture_channels == buffer_channels) {
1239 global_state.PixelStorei(GL_PACK_ALIGNMENT, 1);
1240 #ifdef DEBUG_RUNTIME
1241 int64_t t1 = halide_current_time_ns(user_context);
1242 #endif
1243 global_state.ReadPixels(0, 0, buf->dim[0].extent, buf->dim[1].extent, format, type, buf->host);
1244 #ifdef DEBUG_RUNTIME
1245 int64_t t2 = halide_current_time_ns(user_context);
1246 #endif
1247 if (global_state.CheckAndReportError(user_context, "copy_to_host ReadPixels (1)")) {
1248 return 1;
1249 }
1250 #ifdef DEBUG_RUNTIME
1251 debug(user_context) << "ReadPixels(1) time: " << (t2 - t1) / 1e3 << "usec\n";
1252 #endif
1253 } else {
1254 debug(user_context)
1255 << "Warning: In copy_to_host, host buffer is not interleaved, or not a native format. Doing slow deinterleave.\n";
1256
1257 size_t texture_size = width * height * texture_channels * buf->type.bytes();
1258 HalideMalloc tmp(user_context, texture_size);
1259 if (!tmp.ptr) {
1260 error(user_context) << "halide_malloc failed inside copy_to_host";
1261 return -1;
1262 }
1263
1264 global_state.PixelStorei(GL_PACK_ALIGNMENT, 1);
1265 #ifdef DEBUG_RUNTIME
1266 int64_t t1 = halide_current_time_ns(user_context);
1267 #endif
1268 global_state.ReadPixels(0, 0, buf->dim[0].extent, buf->dim[1].extent, format, type, tmp.ptr);
1269 #ifdef DEBUG_RUNTIME
1270 int64_t t2 = halide_current_time_ns(user_context);
1271 debug(user_context) << "ReadPixels(2) time: " << (t2 - t1) / 1e3 << "usec\n";
1272 #endif
1273 if (global_state.CheckAndReportError(user_context, "copy_to_host ReadPixels (2)")) {
1274 return 1;
1275 }
1276
1277 // Premature optimization warning: interleaved_to_halide() could definitely
1278 // be optimized, but ReadPixels() typically takes ~2-10x as long (especially on
1279 // mobile devices), so the returns will be modest.
1280 #ifdef DEBUG_RUNTIME
1281 int64_t t3 = halide_current_time_ns(user_context);
1282 #endif
1283 switch (type) {
1284 case GL_UNSIGNED_BYTE:
1285 interleaved_to_halide<uint8_t>(user_context, (uint8_t *)tmp.ptr, texture_channels, buf);
1286 break;
1287 case GL_UNSIGNED_SHORT:
1288 interleaved_to_halide<uint16_t>(user_context, (uint16_t *)tmp.ptr, texture_channels, buf);
1289 break;
1290 case GL_FLOAT:
1291 interleaved_to_halide<float>(user_context, (float *)tmp.ptr, texture_channels, buf);
1292 break;
1293 }
1294 #ifdef DEBUG_RUNTIME
1295 int64_t t4 = halide_current_time_ns(user_context);
1296 debug(user_context) << "deinterleave time: " << (t4 - t3) / 1e3 << "usec\n";
1297 #endif
1298 }
1299
1300 return 0;
1301 }
1302
1303 } // namespace OpenGL
1304 } // namespace Internal
1305 } // namespace Runtime
1306 } // namespace Halide
1307
1308 using namespace Halide::Runtime::Internal::OpenGL;
1309
1310 // Find the correct module for the called function
1311 // TODO: This currently takes O(# of GLSL'd stages) and can
1312 // be optimized
find_module(const char * stage_name)1313 WEAK ModuleState *find_module(const char *stage_name) {
1314 ModuleState *state_ptr = state_list;
1315
1316 while (state_ptr != NULL) {
1317 KernelInfo *kernel = state_ptr->kernel;
1318 if (kernel && strcmp(stage_name, kernel->name) == 0) {
1319 return state_ptr;
1320 }
1321 state_ptr = state_ptr->next;
1322 }
1323
1324 return NULL;
1325 }
1326
1327 // Create wrappers that satisfy old naming conventions
1328
1329 extern "C" {
1330
halide_opengl_run(void * user_context,void * state_ptr,const char * entry_name,int blocksX,int blocksY,int blocksZ,int threadsX,int threadsY,int threadsZ,int shared_mem_bytes,size_t arg_sizes[],void * args[],int8_t is_buffer[],int num_padded_attributes,float * vertex_buffer,int num_coords_dim0,int num_coords_dim1)1331 WEAK int halide_opengl_run(void *user_context,
1332 void *state_ptr,
1333 const char *entry_name,
1334 int blocksX, int blocksY, int blocksZ,
1335 int threadsX, int threadsY, int threadsZ,
1336 int shared_mem_bytes,
1337 size_t arg_sizes[], void *args[], int8_t is_buffer[],
1338 int num_padded_attributes,
1339 float *vertex_buffer,
1340 int num_coords_dim0,
1341 int num_coords_dim1) {
1342 if (!global_state.initialized) {
1343 error(user_context) << "OpenGL runtime not initialized (halide_opengl_run).";
1344 return 1;
1345 }
1346
1347 GLStateSaver state_saver;
1348
1349 // Find the right module
1350 ModuleState *mod = find_module(entry_name);
1351 if (!mod) {
1352 error(user_context) << "Internal error: module state for stage " << entry_name << " not found\n";
1353 return 1;
1354 }
1355
1356 KernelInfo *kernel = mod->kernel;
1357
1358 global_state.UseProgram(kernel->program_id);
1359 if (global_state.CheckAndReportError(user_context, "halide_opengl_run UseProgram")) {
1360 return 1;
1361 }
1362
1363 // TODO(abstephensg) it would be great to codegen these vec4 uniform buffers
1364 // directly, instead of passing an array of arguments and then copying them
1365 // out at runtime.
1366
1367 // Determine the number of float and int uniform parameters. This code
1368 // follows the argument packing convention in CodeGen_GPU_Host and
1369 // CodeGen_OpenGL_Dev
1370 int num_uniform_floats = 0;
1371 int num_uniform_ints = 0;
1372
1373 Argument *kernel_arg = kernel->arguments;
1374 for (int i = 0; args[i]; i++, kernel_arg = kernel_arg->next) {
1375
1376 // Check for a mismatch between the number of arguments declared in the
1377 // fragment shader source header and the number passed to this function
1378 if (!kernel_arg) {
1379 error(user_context)
1380 << "Too many arguments passed to halide_opengl_run\n"
1381 << "Argument " << i << ": size=" << i << " value=" << args[i];
1382 return 1;
1383 }
1384
1385 // Count the number of float and int uniform parameters.
1386 if (kernel_arg->kind == Argument::Uniform) {
1387 switch (kernel_arg->type) {
1388 case Argument::Float:
1389 // Integer parameters less than 32 bits wide are passed as
1390 // normalized float values
1391 case Argument::Int8:
1392 case Argument::UInt8:
1393 case Argument::Int16:
1394 case Argument::UInt16:
1395 ++num_uniform_floats;
1396 break;
1397 case Argument::Bool:
1398 case Argument::Int32:
1399 case Argument::UInt32:
1400 ++num_uniform_ints;
1401 break;
1402 default:
1403 error(user_context) << "GLSL: Encountered invalid kernel argument type";
1404 return 1;
1405 }
1406 }
1407 }
1408
1409 // Pad up to a multiple of four
1410 int num_padded_uniform_floats = (num_uniform_floats + 0x3) & ~0x3;
1411 int num_padded_uniform_ints = (num_uniform_ints + 0x3) & ~0x3;
1412
1413 // Allocate storage for the packed arguments
1414 float uniform_float[num_padded_uniform_floats];
1415 int uniform_int[num_padded_uniform_ints];
1416
1417 bool bind_render_targets = true;
1418
1419 // Copy input arguments to corresponding GLSL uniforms.
1420 GLint num_active_textures = 0;
1421 int uniform_float_idx = 0;
1422 int uniform_int_idx = 0;
1423
1424 kernel_arg = kernel->arguments;
1425 for (int i = 0; args[i]; i++, kernel_arg = kernel_arg->next) {
1426
1427 if (kernel_arg->kind == Argument::Outbuf) {
1428 halide_assert(user_context, is_buffer[i] && "OpenGL Outbuf argument is not a buffer.");
1429 // Check if the output buffer will be bound by the client instead of
1430 // the Halide runtime
1431 uint64_t handle = ((halide_buffer_t *)args[i])->device;
1432 if (!handle) {
1433 error(user_context) << "GLSL: Encountered invalid NULL dev pointer";
1434 return 1;
1435 }
1436 if (handle == HALIDE_OPENGL_RENDER_TARGET) {
1437 bind_render_targets = false;
1438 }
1439 // Outbuf textures are handled explicitly below
1440 continue;
1441 } else if (kernel_arg->kind == Argument::Inbuf) {
1442 halide_assert(user_context, is_buffer[i] && "OpenGL Inbuf argument is not a buffer.")
1443 GLint loc =
1444 global_state.GetUniformLocation(kernel->program_id, kernel_arg->name);
1445 if (global_state.CheckAndReportError(user_context, "halide_opengl_run GetUniformLocation(InBuf)")) {
1446 return 1;
1447 }
1448 if (loc == -1) {
1449 error(user_context) << "No sampler defined for input texture.";
1450 return 1;
1451 }
1452 uint64_t handle = ((halide_buffer_t *)args[i])->device;
1453 if (!handle) {
1454 error(user_context) << "GLSL: Encountered invalid NULL dev pointer";
1455 return 1;
1456 }
1457 global_state.ActiveTexture(GL_TEXTURE0 + num_active_textures);
1458 global_state.BindTexture(GL_TEXTURE_2D, handle == HALIDE_OPENGL_RENDER_TARGET ? 0 : (GLuint)handle);
1459 global_state.Uniform1iv(loc, 1, &num_active_textures);
1460
1461 // Textures not created by the Halide runtime might not have
1462 // parameters set, or might have had parameters set differently
1463 global_state.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1464 global_state.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1465 global_state.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
1466 global_state.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
1467
1468 num_active_textures++;
1469 // TODO: check maximum number of active textures
1470 } else if (kernel_arg->kind == Argument::Uniform) {
1471 // Copy the uniform parameter into the packed scalar list
1472 // corresponding to its type.
1473
1474 // Note: small integers are represented as floats in GLSL.
1475 switch (kernel_arg->type) {
1476 case Argument::Float:
1477 uniform_float[uniform_float_idx++] = *(float *)args[i];
1478 break;
1479 case Argument::Bool:
1480 uniform_int[uniform_int_idx++] = *((bool *)args[i]) ? 1 : 0;
1481 break;
1482 case Argument::Int8:
1483 uniform_float[uniform_float_idx++] = *((int8_t *)args[i]);
1484 break;
1485 case Argument::UInt8:
1486 uniform_float[uniform_float_idx++] = *((uint8_t *)args[i]);
1487 break;
1488 case Argument::Int16: {
1489 uniform_float[uniform_float_idx++] = *((int16_t *)args[i]);
1490 break;
1491 }
1492 case Argument::UInt16: {
1493 uniform_float[uniform_float_idx++] = *((uint16_t *)args[i]);
1494 break;
1495 }
1496 case Argument::Int32: {
1497 uniform_int[uniform_int_idx++] = *((int32_t *)args[i]);
1498 break;
1499 }
1500 case Argument::UInt32: {
1501 uint32_t value = *((uint32_t *)args[i]);
1502 if (value > 0x7fffffff) {
1503 error(user_context)
1504 << "OpenGL: argument '" << kernel_arg->name << "' is too large for GLint";
1505 return -1;
1506 }
1507 uniform_int[uniform_int_idx++] = static_cast<GLint>(value);
1508 break;
1509 }
1510 case Argument::Void:
1511 error(user_context) << "OpenGL: Encountered invalid kernel argument type";
1512 return 1;
1513 }
1514 }
1515 }
1516
1517 if (kernel_arg) {
1518 error(user_context) << "Too few arguments passed to halide_opengl_run";
1519 return 1;
1520 }
1521
1522 // Set the packed uniform int parameters
1523 for (int idx = 0; idx != num_padded_uniform_ints; idx += 4) {
1524
1525 // Produce the uniform parameter name without using the std library.
1526 Printer<StringStreamPrinter, 16> name(user_context);
1527 name << "_uniformi" << (idx / 4);
1528
1529 GLint loc = global_state.GetUniformLocation(kernel->program_id, name.str());
1530 if (global_state.CheckAndReportError(user_context, "halide_opengl_run GetUniformLocation")) {
1531 return 1;
1532 }
1533 if (loc == -1) {
1534 // Argument was probably optimized away by GLSL compiler.
1535 continue;
1536 }
1537
1538 global_state.Uniform4iv(loc, 1, &uniform_int[idx]);
1539 }
1540
1541 // Set the packed uniform float parameters
1542 for (int idx = 0; idx != num_padded_uniform_floats; idx += 4) {
1543
1544 // Produce the uniform parameter name without using the std library.
1545 Printer<StringStreamPrinter, 16> name(user_context);
1546 name << "_uniformf" << (idx / 4);
1547
1548 GLint loc = global_state.GetUniformLocation(kernel->program_id, name.str());
1549 if (global_state.CheckAndReportError(user_context, "halide_opengl_run GetUniformLocation(2)")) {
1550 return 1;
1551 }
1552 if (loc == -1) {
1553 // Argument was probably optimized away by GLSL compiler.
1554 continue;
1555 }
1556
1557 global_state.Uniform4fv(loc, 1, &uniform_float[idx]);
1558 }
1559
1560 // Prepare framebuffer for rendering to output textures.
1561 GLint output_min[2] = {0, 0};
1562 GLint output_extent[2] = {0, 0};
1563
1564 if (bind_render_targets) {
1565 global_state.BindFramebuffer(GL_FRAMEBUFFER, global_state.framebuffer_id);
1566 }
1567
1568 global_state.Disable(GL_CULL_FACE);
1569 global_state.Disable(GL_DEPTH_TEST);
1570
1571 GLint num_output_textures = 0;
1572 kernel_arg = kernel->arguments;
1573 for (int i = 0; args[i]; i++, kernel_arg = kernel_arg->next) {
1574 if (kernel_arg->kind != Argument::Outbuf) continue;
1575
1576 halide_assert(user_context, is_buffer[i] && "OpenGL Outbuf argument is not a buffer.")
1577
1578 // TODO: GL_MAX_COLOR_ATTACHMENTS
1579 if (num_output_textures >= 1) {
1580 error(user_context)
1581 << "OpenGL ES 2.0 only supports one single output texture";
1582 return 1;
1583 }
1584
1585 halide_buffer_t *buf = (halide_buffer_t *)args[i];
1586 halide_assert(user_context, buf->dimensions >= 2);
1587 uint64_t handle = buf->device;
1588 if (!handle) {
1589 error(user_context) << "GLSL: Encountered invalid NULL dev pointer";
1590 return 1;
1591 }
1592 GLuint tex = (handle == HALIDE_OPENGL_RENDER_TARGET) ? 0 : (GLuint)handle;
1593
1594 // Check to see if the object name is actually a FBO
1595 if (bind_render_targets) {
1596 debug(user_context)
1597 << "Output texture " << num_output_textures << ": " << tex << "\n";
1598 global_state.FramebufferTexture2D(GL_FRAMEBUFFER,
1599 GL_COLOR_ATTACHMENT0 + num_output_textures,
1600 GL_TEXTURE_2D, tex, 0);
1601 if (global_state.CheckAndReportError(user_context, "halide_opengl_run FramebufferTexture2D")) {
1602 return 1;
1603 }
1604 }
1605
1606 output_min[0] = buf->dim[0].min;
1607 output_min[1] = buf->dim[1].min;
1608 output_extent[0] = buf->dim[0].extent;
1609 output_extent[1] = buf->dim[1].extent;
1610 num_output_textures++;
1611 }
1612 // TODO: GL_MAX_DRAW_BUFFERS
1613 if (num_output_textures == 0) {
1614 error(user_context) << "halide_opengl_run: kernel has no output\n";
1615 // TODO: cleanup
1616 return 1;
1617 } else if (num_output_textures > 1) {
1618 if (global_state.DrawBuffers) {
1619 HalideMalloc draw_buffers_tmp(user_context, num_output_textures * sizeof(GLenum));
1620 if (!draw_buffers_tmp.ptr) {
1621 error(user_context) << "halide_malloc";
1622 return 1;
1623 }
1624 GLenum *draw_buffers = (GLenum *)draw_buffers_tmp.ptr;
1625 for (int i = 0; i < num_output_textures; i++) {
1626 draw_buffers[i] = GL_COLOR_ATTACHMENT0 + i;
1627 }
1628 global_state.DrawBuffers(num_output_textures, draw_buffers);
1629 if (global_state.CheckAndReportError(user_context, "halide_opengl_run DrawBuffers")) {
1630 return 1;
1631 }
1632 } else {
1633 error(user_context) << "halide_opengl_run: kernel has more than one output and DrawBuffers is not available (earlier than GL ES 3.0?).\n";
1634 // TODO: cleanup
1635 return 1;
1636 }
1637 }
1638
1639 if (bind_render_targets) {
1640 // Check that framebuffer is set up correctly
1641 GLenum status = global_state.CheckFramebufferStatus(GL_FRAMEBUFFER);
1642 if (global_state.CheckAndReportError(user_context, "halide_opengl_run CheckFramebufferStatus")) {
1643 return 1;
1644 }
1645 if (status != GL_FRAMEBUFFER_COMPLETE) {
1646 error(user_context)
1647 << "Setting up GL framebuffer " << global_state.framebuffer_id
1648 << " failed (" << status << ")";
1649 // TODO: cleanup
1650 return 1;
1651 }
1652 }
1653
1654 // Set vertex attributes
1655 GLint loc = global_state.GetUniformLocation(kernel->program_id, "output_extent");
1656 global_state.Uniform2iv(loc, 1, output_extent);
1657 if (global_state.CheckAndReportError(user_context, "halide_opengl_run Uniform2iv(output_extent)")) {
1658 return 1;
1659 }
1660 loc = global_state.GetUniformLocation(kernel->program_id, "output_min");
1661 global_state.Uniform2iv(loc, 1, output_min);
1662 if (global_state.CheckAndReportError(user_context, "halide_opengl_run Uniform2iv(output_min)")) {
1663 return 1;
1664 }
1665
1666 #if 0 // DEBUG_RUNTIME
1667 debug(user_context) << "output_extent: " << output_extent[0] << "," << output_extent[1] << "\n";
1668 debug(user_context) << "output_min: " << output_min[0] << "," << output_min[1] << "\n";
1669 #endif
1670
1671 // TODO(abestephensg): Sort coordinate dimensions when the linear solver is integrated
1672 // Sort the coordinates
1673
1674 // Construct an element buffer using the sorted vertex order.
1675 // Note that this is "width" and "height" of the vertices, not the output image.
1676 int width = num_coords_dim0;
1677 int height = num_coords_dim1;
1678
1679 int vertex_buffer_size = width * height * num_padded_attributes;
1680
1681 int element_buffer_size = (width - 1) * (height - 1) * 6;
1682 int element_buffer[element_buffer_size];
1683
1684 int idx = 0;
1685 for (int h = 0; h != (height - 1); ++h) {
1686 for (int w = 0; w != (width - 1); ++w) {
1687
1688 // TODO(abestephensg): Use sorted coordinates when integrated
1689 int v = w + h * width;
1690 element_buffer[idx++] = v;
1691 element_buffer[idx++] = v + 1;
1692 element_buffer[idx++] = v + width + 1;
1693
1694 element_buffer[idx++] = v + width + 1;
1695 element_buffer[idx++] = v + width;
1696 element_buffer[idx++] = v;
1697 }
1698 }
1699
1700 #if 0 // DEBUG_RUNTIME
1701 debug(user_context) << "Vertex buffer:";
1702 for (int i=0;i!=vertex_buffer_size;++i) {
1703 if (!(i%num_padded_attributes)) {
1704 debug(user_context) << "\n";
1705 }
1706 debug(user_context) << vertex_buffer[i] << " ";
1707 }
1708 debug(user_context) << "\n";
1709 debug(user_context) << "\n";
1710
1711 debug(user_context) << "Element buffer:";
1712 for (int i=0;i!=element_buffer_size;++i) {
1713 if (!(i%3)) {
1714 debug(user_context) << "\n";
1715 }
1716 debug(user_context) << element_buffer[i] << " ";
1717 }
1718 debug(user_context) << "\n";
1719 #endif
1720
1721 // Setup viewport
1722 global_state.Viewport(0, 0, output_extent[0], output_extent[1]);
1723
1724 // Setup the vertex and element buffers
1725 GLuint vertex_array_object = 0;
1726 if (global_state.have_vertex_array_objects) {
1727 global_state.GenVertexArrays(1, &vertex_array_object);
1728 global_state.BindVertexArray(vertex_array_object);
1729 }
1730
1731 GLuint vertex_buffer_id;
1732 global_state.GenBuffers(1, &vertex_buffer_id);
1733 global_state.BindBuffer(GL_ARRAY_BUFFER, vertex_buffer_id);
1734 global_state.BufferData(GL_ARRAY_BUFFER, sizeof(float) * vertex_buffer_size, vertex_buffer, GL_STATIC_DRAW);
1735 if (global_state.CheckAndReportError(user_context, "halide_opengl_run vertex BufferData et al")) {
1736 return 1;
1737 }
1738
1739 GLuint element_buffer_id;
1740 global_state.GenBuffers(1, &element_buffer_id);
1741 global_state.BindBuffer(GL_ELEMENT_ARRAY_BUFFER, element_buffer_id);
1742 global_state.BufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(float) * element_buffer_size, element_buffer, GL_STATIC_DRAW);
1743 if (global_state.CheckAndReportError(user_context, "halide_opengl_run element BufferData et al")) {
1744 return 1;
1745 }
1746
1747 // The num_padded_attributes argument is the number of vertex attributes,
1748 // including the spatial x and y coordinates, padded up to a multiple of
1749 // four so that the attributes may be packed into vec4 slots.
1750 int num_packed_attributes = num_padded_attributes / 4;
1751
1752 // Set up the per vertex attributes
1753 GLint attrib_ids[num_packed_attributes];
1754
1755 for (int i = 0; i != num_packed_attributes; i++) {
1756
1757 // The attribute names can synthesized by the runtime based on the
1758 // number of packed varying attributes
1759 Printer<StringStreamPrinter> attribute_name(user_context);
1760 attribute_name << "_varyingf" << i << "_attrib";
1761
1762 // TODO(abstephensg): Switch to glBindAttribLocation
1763 GLint attrib_id = global_state.GetAttribLocation(kernel->program_id, attribute_name.buf);
1764 attrib_ids[i] = attrib_id;
1765
1766 // Check to see if the varying attribute was simplified out of the
1767 // program by the GLSL compiler.
1768 if (attrib_id == -1) {
1769 continue;
1770 }
1771
1772 global_state.VertexAttribPointer(attrib_id, 4, GL_FLOAT, GL_FALSE /* Normalized */, sizeof(GLfloat) * num_padded_attributes, (void *)(i * sizeof(GLfloat) * 4));
1773 if (global_state.CheckAndReportError(user_context, "halide_opengl_run VertexAttribPointer et al")) {
1774 return 1;
1775 }
1776
1777 global_state.EnableVertexAttribArray(attrib_id);
1778 if (global_state.CheckAndReportError(user_context, "halide_opengl_run EnableVertexAttribArray et al")) {
1779 return 1;
1780 }
1781 }
1782
1783 // Draw the scene
1784 global_state.DrawElements(GL_TRIANGLES, element_buffer_size, GL_UNSIGNED_INT, NULL);
1785 if (global_state.CheckAndReportError(user_context, "halide_opengl_run DrawElements et al")) {
1786 return 1;
1787 }
1788
1789 // Cleanup
1790 if (global_state.have_vertex_array_objects) {
1791 global_state.DeleteVertexArrays(1, &vertex_array_object);
1792 }
1793
1794 global_state.DeleteBuffers(1, &vertex_buffer_id);
1795 global_state.DeleteBuffers(1, &element_buffer_id);
1796
1797 return 0;
1798 }
1799
halide_opengl_device_sync(void * user_context,struct halide_buffer_t *)1800 WEAK int halide_opengl_device_sync(void *user_context, struct halide_buffer_t *) {
1801 if (!global_state.initialized) {
1802 error(user_context) << "OpenGL runtime not initialized (halide_opengl_device_sync).";
1803 return 1;
1804 }
1805 #ifdef DEBUG_RUNTIME
1806 int64_t t0 = halide_current_time_ns(user_context);
1807 #endif
1808 global_state.Finish();
1809 #ifdef DEBUG_RUNTIME
1810 int64_t t1 = halide_current_time_ns(user_context);
1811 debug(user_context) << "halide_opengl_device_sync: took " << (t1 - t0) / 1e3 << "usec\n";
1812 #endif
1813 return 0;
1814 }
1815
1816 // Called at the beginning of a code block generated by Halide. This function
1817 // is responsible for setting up the OpenGL environment and compiling the GLSL
1818 // code into a fragment shader.
halide_opengl_initialize_kernels(void * user_context,void ** state_ptr,const char * src,int size)1819 WEAK int halide_opengl_initialize_kernels(void *user_context, void **state_ptr,
1820 const char *src, int size) {
1821 debug(user_context) << "In initialize_kernels\n";
1822
1823 if (int error = halide_opengl_init(user_context)) {
1824 return error;
1825 }
1826
1827 const char *this_kernel = src;
1828
1829 ModuleState **state = (ModuleState **)state_ptr;
1830 ModuleState *module = *state;
1831
1832 while (this_kernel) {
1833 // Find the start of the next kernel
1834 const char *next_kernel = strstr(this_kernel + 1, kernel_marker);
1835
1836 // Use that to compute the length of this kernel
1837 int len = 0;
1838 if (!next_kernel) {
1839 len = strlen(this_kernel);
1840 } else {
1841 len = next_kernel - this_kernel;
1842 }
1843
1844 // Construct a new ModuleState and add it to the global list
1845 module = (ModuleState *)malloc(sizeof(ModuleState));
1846 module->kernel = NULL;
1847 module->next = state_list;
1848 state_list = module;
1849 *state = module;
1850
1851 KernelInfo *kernel = module->kernel;
1852 if (!kernel) {
1853 kernel = create_kernel(user_context, this_kernel, len);
1854 if (!kernel) {
1855 error(user_context) << "Invalid kernel: " << this_kernel;
1856 return -1;
1857 }
1858 module->kernel = kernel;
1859 }
1860
1861 // Create the vertex shader. The runtime will output boilerplate for the
1862 // vertex shader based on a fixed program plus arguments obtained from
1863 // the comment header passed in the fragment shader. Since there are a
1864 // relatively small number of vertices (i.e. usually only four), per-vertex
1865 // expressions interpolated by varying attributes are evaluated
1866 // by host code on the CPU and passed to the GPU as values in the
1867 // vertex buffer.
1868 enum { PrinterLength = 1024 * 4 };
1869 Printer<StringStreamPrinter, PrinterLength> vertex_src(user_context);
1870
1871 // Count the number of varying attributes, this is 2 for the spatial
1872 // x and y coordinates, plus the number of scalar varying attribute
1873 // expressions pulled out of the fragment shader.
1874 int num_varying_float = 2;
1875
1876 for (Argument *arg = kernel->arguments; arg; arg = arg->next) {
1877 if (arg->kind == Argument::Varying)
1878 ++num_varying_float;
1879 }
1880
1881 int num_packed_varying_float = ((num_varying_float + 3) & ~0x3) / 4;
1882
1883 for (int i = 0; i != num_packed_varying_float; ++i) {
1884 vertex_src << "attribute vec4 _varyingf" << i << "_attrib;\n";
1885 vertex_src << "varying vec4 _varyingf" << i << ";\n";
1886 }
1887
1888 vertex_src << "uniform ivec2 output_min;\n"
1889 << "uniform ivec2 output_extent;\n"
1890 << "void main() {\n"
1891
1892 // Host codegen always passes the spatial vertex coordinates
1893 // in the first two elements of the _varyingf0_attrib
1894 << " vec2 position = vec2(_varyingf0_attrib[0], _varyingf0_attrib[1]);\n"
1895 << " gl_Position = vec4(position, 0.0, 1.0);\n"
1896 << " vec2 texcoord = 0.5 * position + 0.5;\n"
1897 << " vec2 pixcoord = texcoord * vec2(output_extent.xy) + vec2(output_min.xy);\n";
1898
1899 // Copy through all of the varying attributes
1900 for (int i = 0; i != num_packed_varying_float; ++i) {
1901 vertex_src << " _varyingf" << i << " = _varyingf" << i << "_attrib;\n";
1902 }
1903
1904 vertex_src << " _varyingf0.xy = pixcoord;\n";
1905
1906 vertex_src << "}\n";
1907
1908 // Check to see if there was sufficient storage for the vertex program.
1909 if (vertex_src.size() >= PrinterLength) {
1910 error(user_context) << "Vertex shader source truncated";
1911 return 1;
1912 }
1913
1914 // Initialize vertex shader.
1915 GLuint vertex_shader_id = make_shader(user_context,
1916 GL_VERTEX_SHADER, vertex_src.buf, NULL);
1917 if (vertex_shader_id == 0) {
1918 halide_error(user_context, "Failed to create vertex shader");
1919 return 1;
1920 }
1921
1922 // Create the fragment shader
1923 GLuint fragment_shader_id = make_shader(user_context, GL_FRAGMENT_SHADER,
1924 kernel->source, NULL);
1925 // Link GLSL program
1926 GLuint program = global_state.CreateProgram();
1927 global_state.AttachShader(program, vertex_shader_id);
1928 global_state.AttachShader(program, fragment_shader_id);
1929 global_state.LinkProgram(program);
1930
1931 // Release the individual shaders
1932 global_state.DeleteShader(vertex_shader_id);
1933 global_state.DeleteShader(fragment_shader_id);
1934
1935 GLint status;
1936 global_state.GetProgramiv(program, GL_LINK_STATUS, &status);
1937 if (!status) {
1938 GLint log_len;
1939 global_state.GetProgramiv(program, GL_INFO_LOG_LENGTH, &log_len);
1940 HalideMalloc log_tmp(user_context, log_len);
1941 if (log_tmp.ptr) {
1942 char *log = (char *)log_tmp.ptr;
1943 global_state.GetProgramInfoLog(program, log_len, NULL, log);
1944 debug(user_context) << "Could not link GLSL program:\n"
1945 << log << "\n";
1946 }
1947 global_state.DeleteProgram(program);
1948 return -1;
1949 }
1950 kernel->program_id = program;
1951
1952 this_kernel = next_kernel;
1953 }
1954 return 0;
1955 }
1956
halide_opengl_device_and_host_malloc(void * user_context,struct halide_buffer_t * buf)1957 WEAK int halide_opengl_device_and_host_malloc(void *user_context, struct halide_buffer_t *buf) {
1958 return halide_default_device_and_host_malloc(user_context, buf, &opengl_device_interface);
1959 }
1960
halide_opengl_device_and_host_free(void * user_context,struct halide_buffer_t * buf)1961 WEAK int halide_opengl_device_and_host_free(void *user_context, struct halide_buffer_t *buf) {
1962 return halide_default_device_and_host_free(user_context, buf, &opengl_device_interface);
1963 }
1964
halide_opengl_device_interface()1965 WEAK const halide_device_interface_t *halide_opengl_device_interface() {
1966 return &opengl_device_interface;
1967 }
1968
halide_opengl_context_lost(void * user_context)1969 WEAK void halide_opengl_context_lost(void *user_context) {
1970 if (!global_state.initialized) return;
1971
1972 debug(user_context) << "halide_opengl_context_lost\n";
1973 for (ModuleState *mod = state_list; mod; mod = mod->next) {
1974 // Reset program handle to force recompilation.
1975 mod->kernel->program_id = 0;
1976 }
1977
1978 global_state.init();
1979 return;
1980 }
1981
halide_opengl_wrap_texture(void * user_context,halide_buffer_t * buf,uint64_t texture_id)1982 WEAK int halide_opengl_wrap_texture(void *user_context, halide_buffer_t *buf, uint64_t texture_id) {
1983 if (!global_state.initialized) {
1984 if (int error = halide_opengl_init(user_context)) {
1985 return error;
1986 }
1987 }
1988 if (texture_id == 0) {
1989 error(user_context) << "Texture " << texture_id << " is not a valid texture name.";
1990 return -3;
1991 }
1992 halide_assert(user_context, buf->device == 0);
1993 if (buf->device != 0) {
1994 return -2;
1995 }
1996 buf->device = texture_id;
1997 buf->device_interface = &opengl_device_interface;
1998 buf->device_interface->impl->use_module();
1999 return 0;
2000 }
2001
halide_opengl_wrap_render_target(void * user_context,halide_buffer_t * buf)2002 WEAK int halide_opengl_wrap_render_target(void *user_context, halide_buffer_t *buf) {
2003 if (!global_state.initialized) {
2004 if (int error = halide_opengl_init(user_context)) {
2005 return error;
2006 }
2007 }
2008 halide_assert(user_context, buf->device == 0);
2009 if (buf->device != 0) {
2010 return -2;
2011 }
2012 buf->device = HALIDE_OPENGL_RENDER_TARGET;
2013 buf->device_interface = &opengl_device_interface;
2014 buf->device_interface->impl->use_module();
2015 return 0;
2016 }
2017
halide_opengl_detach_texture(void * user_context,halide_buffer_t * buf)2018 WEAK int halide_opengl_detach_texture(void *user_context, halide_buffer_t *buf) {
2019 if (buf->device == 0) {
2020 return 0;
2021 }
2022
2023 halide_assert(user_context, buf->device_interface == &opengl_device_interface);
2024 buf->device = 0;
2025 buf->device_interface->impl->release_module();
2026 buf->device_interface = NULL;
2027 return 0;
2028 }
2029
halide_opengl_get_texture(void * user_context,halide_buffer_t * buf)2030 WEAK uintptr_t halide_opengl_get_texture(void *user_context, halide_buffer_t *buf) {
2031 if (buf->device == 0) {
2032 return 0;
2033 }
2034 halide_assert(user_context, buf->device_interface == &opengl_device_interface);
2035 uint64_t handle = buf->device;
2036 // client_bound always return 0 here.
2037 return handle == HALIDE_OPENGL_RENDER_TARGET ? 0 : (uintptr_t)handle;
2038 }
2039
2040 namespace {
halide_opengl_cleanup()2041 WEAK __attribute__((destructor)) void halide_opengl_cleanup() {
2042 halide_opengl_device_release(NULL);
2043 }
2044 } // namespace
2045
2046 } // extern "C"
2047
2048 namespace Halide {
2049 namespace Runtime {
2050 namespace Internal {
2051 namespace OpenGL {
2052
2053 WEAK halide_device_interface_impl_t opengl_device_interface_impl = {
2054 halide_use_jit_module,
2055 halide_release_jit_module,
2056 halide_opengl_device_malloc,
2057 halide_opengl_device_free,
2058 halide_opengl_device_sync,
2059 halide_opengl_device_release,
2060 halide_opengl_copy_to_host,
2061 halide_opengl_copy_to_device,
2062 halide_opengl_device_and_host_malloc,
2063 halide_opengl_device_and_host_free,
2064 halide_default_buffer_copy,
2065 halide_default_device_crop,
2066 halide_default_device_slice,
2067 halide_default_device_release_crop,
2068 halide_opengl_wrap_texture,
2069 halide_opengl_detach_texture};
2070
2071 WEAK halide_device_interface_t opengl_device_interface = {
2072 halide_device_malloc,
2073 halide_device_free,
2074 halide_device_sync,
2075 halide_device_release,
2076 halide_copy_to_host,
2077 halide_copy_to_device,
2078 halide_device_and_host_malloc,
2079 halide_device_and_host_free,
2080 halide_buffer_copy,
2081 halide_device_crop,
2082 halide_device_slice,
2083 halide_device_release_crop,
2084 halide_device_wrap_native,
2085 halide_device_detach_native,
2086 NULL,
2087 &opengl_device_interface_impl};
2088
2089 } // namespace OpenGL
2090 } // namespace Internal
2091 } // namespace Runtime
2092 } // namespace Halide
2093