1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 
5 #include <stdlib.h>
6 #include <stdint.h>
7 #include <string.h>
8 #include <assert.h>
9 #include <stdio.h>
10 #include <math.h>
11 
12 #ifdef __MACH__
13 #  include <mach/mach.h>
14 #  include <mach/mach_time.h>
15 #else
16 #  include <time.h>
17 #endif
18 
19 #ifdef NDEBUG
20 #  define debugf(...)
21 #else
22 #  define debugf(...) printf(__VA_ARGS__)
23 #endif
24 
25 // #define PRINT_TIMINGS
26 
27 #ifdef _WIN32
28 #  define ALWAYS_INLINE __forceinline
29 #  define NO_INLINE __declspec(noinline)
30 
31 // Including Windows.h brings a huge amount of namespace polution so just
32 // define a couple of things manually
33 typedef int BOOL;
34 #  define WINAPI __stdcall
35 #  define DECLSPEC_IMPORT __declspec(dllimport)
36 #  define WINBASEAPI DECLSPEC_IMPORT
37 typedef unsigned long DWORD;
38 typedef long LONG;
39 typedef __int64 LONGLONG;
40 #  define DUMMYSTRUCTNAME
41 
42 typedef union _LARGE_INTEGER {
43   struct {
44     DWORD LowPart;
45     LONG HighPart;
46   } DUMMYSTRUCTNAME;
47   struct {
48     DWORD LowPart;
49     LONG HighPart;
50   } u;
51   LONGLONG QuadPart;
52 } LARGE_INTEGER;
53 extern "C" {
54 WINBASEAPI BOOL WINAPI
55 QueryPerformanceCounter(LARGE_INTEGER* lpPerformanceCount);
56 
57 WINBASEAPI BOOL WINAPI QueryPerformanceFrequency(LARGE_INTEGER* lpFrequency);
58 }
59 
60 #else
61 // GCC is slower when dealing with always_inline, especially in debug builds.
62 // When using Clang, use always_inline more aggressively.
63 #  if defined(__clang__) || defined(NDEBUG)
64 #    define ALWAYS_INLINE __attribute__((always_inline)) inline
65 #  else
66 #    define ALWAYS_INLINE inline
67 #  endif
68 #  define NO_INLINE __attribute__((noinline))
69 #endif
70 
71 // Some functions may cause excessive binary bloat if inlined in debug or with
72 // GCC builds, so use PREFER_INLINE on these instead of ALWAYS_INLINE.
73 #if defined(__clang__) && defined(NDEBUG)
74 #  define PREFER_INLINE ALWAYS_INLINE
75 #else
76 #  define PREFER_INLINE inline
77 #endif
78 
79 #define UNREACHABLE __builtin_unreachable()
80 
81 #define UNUSED [[maybe_unused]]
82 
83 #define FALLTHROUGH [[fallthrough]]
84 
85 #if defined(MOZILLA_CLIENT) && defined(MOZ_CLANG_PLUGIN)
86 #  define IMPLICIT __attribute__((annotate("moz_implicit")))
87 #else
88 #  define IMPLICIT
89 #endif
90 
91 #include "gl_defs.h"
92 #include "glsl.h"
93 #include "program.h"
94 #include "texture.h"
95 
96 using namespace glsl;
97 
98 typedef ivec2_scalar IntPoint;
99 
100 struct IntRect {
101   int x0;
102   int y0;
103   int x1;
104   int y1;
105 
IntRectIntRect106   IntRect() : x0(0), y0(0), x1(0), y1(0) {}
IntRectIntRect107   IntRect(int x0, int y0, int x1, int y1) : x0(x0), y0(y0), x1(x1), y1(y1) {}
IntRectIntRect108   IntRect(IntPoint origin, IntPoint size)
109       : x0(origin.x),
110         y0(origin.y),
111         x1(origin.x + size.x),
112         y1(origin.y + size.y) {}
113 
widthIntRect114   int width() const { return x1 - x0; }
heightIntRect115   int height() const { return y1 - y0; }
is_emptyIntRect116   bool is_empty() const { return width() <= 0 || height() <= 0; }
117 
originIntRect118   IntPoint origin() const { return IntPoint(x0, y0); }
119 
same_sizeIntRect120   bool same_size(const IntRect& o) const {
121     return width() == o.width() && height() == o.height();
122   }
123 
containsIntRect124   bool contains(const IntRect& o) const {
125     return o.x0 >= x0 && o.y0 >= y0 && o.x1 <= x1 && o.y1 <= y1;
126   }
127 
intersectIntRect128   IntRect& intersect(const IntRect& o) {
129     x0 = max(x0, o.x0);
130     y0 = max(y0, o.y0);
131     x1 = min(x1, o.x1);
132     y1 = min(y1, o.y1);
133     return *this;
134   }
135 
intersectionIntRect136   IntRect intersection(const IntRect& o) {
137     IntRect result = *this;
138     result.intersect(o);
139     return result;
140   }
141 
142   // Scale from source-space to dest-space, optionally rounding inward
scaleIntRect143   IntRect& scale(int srcWidth, int srcHeight, int dstWidth, int dstHeight,
144                  bool roundIn = false) {
145     x0 = (x0 * dstWidth + (roundIn ? srcWidth - 1 : 0)) / srcWidth;
146     y0 = (y0 * dstHeight + (roundIn ? srcHeight - 1 : 0)) / srcHeight;
147     x1 = (x1 * dstWidth) / srcWidth;
148     y1 = (y1 * dstHeight) / srcHeight;
149     return *this;
150   }
151 
152   // Flip the rect's Y coords around inflection point at Y=offset
invert_yIntRect153   void invert_y(int offset) {
154     y0 = offset - y0;
155     y1 = offset - y1;
156     swap(y0, y1);
157   }
158 
offsetIntRect159   IntRect& offset(const IntPoint& o) {
160     x0 += o.x;
161     y0 += o.y;
162     x1 += o.x;
163     y1 += o.y;
164     return *this;
165   }
166 
operator +IntRect167   IntRect operator+(const IntPoint& o) const {
168     return IntRect(*this).offset(o);
169   }
operator -IntRect170   IntRect operator-(const IntPoint& o) const {
171     return IntRect(*this).offset(-o);
172   }
173 };
174 
175 typedef vec2_scalar Point2D;
176 typedef vec4_scalar Point3D;
177 
178 struct IntRange {
179   int start;
180   int end;
181 
lenIntRange182   int len() const { return end - start; }
183 
intersectIntRange184   IntRange intersect(IntRange r) const {
185     return {max(start, r.start), min(end, r.end)};
186   }
187 };
188 
189 struct FloatRange {
190   float start;
191   float end;
192 
clipFloatRange193   float clip(float x) const { return clamp(x, start, end); }
194 
clipFloatRange195   FloatRange clip(FloatRange r) const { return {clip(r.start), clip(r.end)}; }
196 
mergeFloatRange197   FloatRange merge(FloatRange r) const {
198     return {min(start, r.start), max(end, r.end)};
199   }
200 
roundFloatRange201   IntRange round() const {
202     return {int(floor(start + 0.5f)), int(floor(end + 0.5f))};
203   }
204 
round_outFloatRange205   IntRange round_out() const { return {int(floor(start)), int(ceil(end))}; }
206 };
207 
208 template <typename P>
x_range(P p0,P p1)209 static inline FloatRange x_range(P p0, P p1) {
210   return {min(p0.x, p1.x), max(p0.x, p1.x)};
211 }
212 
213 struct VertexAttrib {
214   size_t size = 0;  // in bytes
215   GLenum type = 0;
216   bool normalized = false;
217   GLsizei stride = 0;
218   GLuint offset = 0;
219   bool enabled = false;
220   GLuint divisor = 0;
221   int vertex_array = 0;
222   int vertex_buffer = 0;
223   char* buf = nullptr;  // XXX: this can easily dangle
224   size_t buf_size = 0;  // this will let us bounds check
225 
226   // Mark the buffer as invalid so we don't accidentally use stale data.
disableVertexAttrib227   void disable() {
228     enabled = false;
229     buf = nullptr;
230     buf_size = 0;
231   }
232 };
233 
bytes_for_internal_format(GLenum internal_format)234 static int bytes_for_internal_format(GLenum internal_format) {
235   switch (internal_format) {
236     case GL_RGBA32F:
237       return 4 * 4;
238     case GL_RGBA32I:
239       return 4 * 4;
240     case GL_RGBA8:
241     case GL_BGRA8:
242     case GL_RGBA:
243       return 4;
244     case GL_R8:
245     case GL_RED:
246       return 1;
247     case GL_RG8:
248     case GL_RG:
249       return 2;
250     case GL_DEPTH_COMPONENT:
251     case GL_DEPTH_COMPONENT16:
252     case GL_DEPTH_COMPONENT24:
253     case GL_DEPTH_COMPONENT32:
254       return 4;
255     case GL_RGB_RAW_422_APPLE:
256       return 2;
257     case GL_R16:
258       return 2;
259     case GL_RG16:
260       return 4;
261     default:
262       debugf("internal format: %x\n", internal_format);
263       assert(0);
264       return 0;
265   }
266 }
267 
aligned_stride(int row_bytes)268 static inline int aligned_stride(int row_bytes) { return (row_bytes + 3) & ~3; }
269 
gl_format_to_texture_format(int type)270 static TextureFormat gl_format_to_texture_format(int type) {
271   switch (type) {
272     case GL_RGBA32F:
273       return TextureFormat::RGBA32F;
274     case GL_RGBA32I:
275       return TextureFormat::RGBA32I;
276     case GL_RGBA8:
277       return TextureFormat::RGBA8;
278     case GL_R8:
279       return TextureFormat::R8;
280     case GL_RG8:
281       return TextureFormat::RG8;
282     case GL_R16:
283       return TextureFormat::R16;
284     case GL_RG16:
285       return TextureFormat::RG16;
286     case GL_RGB_RAW_422_APPLE:
287       return TextureFormat::YUV422;
288     default:
289       assert(0);
290       return TextureFormat::RGBA8;
291   }
292 }
293 
294 struct Query {
295   uint64_t value = 0;
296 };
297 
298 struct Buffer {
299   char* buf = nullptr;
300   size_t size = 0;
301   size_t capacity = 0;
302 
303   // Returns true if re-allocation succeeded, false otherwise...
allocateBuffer304   bool allocate(size_t new_size) {
305     // If the size remains unchanged, don't allocate anything.
306     if (new_size == size) {
307       return true;
308     }
309     // If the new size is within the existing capacity of the buffer, just
310     // reuse the existing buffer.
311     if (new_size <= capacity) {
312       size = new_size;
313       return true;
314     }
315     // Otherwise we need to reallocate the buffer to hold up to the requested
316     // larger size.
317     char* new_buf = (char*)realloc(buf, new_size);
318     assert(new_buf);
319     if (!new_buf) {
320       // If we fail, null out the buffer rather than leave around the old
321       // allocation state.
322       cleanup();
323       return false;
324     }
325     // The reallocation succeeded, so install the buffer.
326     buf = new_buf;
327     size = new_size;
328     capacity = new_size;
329     return true;
330   }
331 
cleanupBuffer332   void cleanup() {
333     if (buf) {
334       free(buf);
335       buf = nullptr;
336       size = 0;
337       capacity = 0;
338     }
339   }
340 
~BufferBuffer341   ~Buffer() { cleanup(); }
342 };
343 
344 struct Framebuffer {
345   GLuint color_attachment = 0;
346   GLuint depth_attachment = 0;
347 };
348 
349 struct Renderbuffer {
350   GLuint texture = 0;
351 
352   void on_erase();
353 };
354 
gl_filter_to_texture_filter(int type)355 TextureFilter gl_filter_to_texture_filter(int type) {
356   switch (type) {
357     case GL_NEAREST:
358       return TextureFilter::NEAREST;
359     case GL_NEAREST_MIPMAP_LINEAR:
360       return TextureFilter::NEAREST;
361     case GL_NEAREST_MIPMAP_NEAREST:
362       return TextureFilter::NEAREST;
363     case GL_LINEAR:
364       return TextureFilter::LINEAR;
365     case GL_LINEAR_MIPMAP_LINEAR:
366       return TextureFilter::LINEAR;
367     case GL_LINEAR_MIPMAP_NEAREST:
368       return TextureFilter::LINEAR;
369     default:
370       assert(0);
371       return TextureFilter::NEAREST;
372   }
373 }
374 
375 struct Texture {
376   GLenum internal_format = 0;
377   int width = 0;
378   int height = 0;
379   char* buf = nullptr;
380   size_t buf_size = 0;
381   uint32_t buf_stride = 0;
382   uint8_t buf_bpp = 0;
383   GLenum min_filter = GL_NEAREST;
384   GLenum mag_filter = GL_LINEAR;
385   // The number of active locks on this texture. If this texture has any active
386   // locks, we need to disallow modifying or destroying the texture as it may
387   // be accessed by other threads where modifications could lead to races.
388   int32_t locked = 0;
389   // When used as an attachment of a framebuffer, rendering to the texture
390   // behaves as if it is located at the given offset such that the offset is
391   // subtracted from all transformed vertexes after the viewport is applied.
392   IntPoint offset;
393 
394   enum FLAGS {
395     // If the buffer is internally-allocated by SWGL
396     SHOULD_FREE = 1 << 1,
397     // If the buffer has been cleared to initialize it. Currently this is only
398     // utilized by depth buffers which need to know when depth runs have reset
399     // to a valid row state. When unset, the depth runs may contain garbage.
400     CLEARED = 1 << 2,
401   };
402   int flags = SHOULD_FREE;
should_freeTexture403   bool should_free() const { return bool(flags & SHOULD_FREE); }
clearedTexture404   bool cleared() const { return bool(flags & CLEARED); }
405 
set_flagTexture406   void set_flag(int flag, bool val) {
407     if (val) {
408       flags |= flag;
409     } else {
410       flags &= ~flag;
411     }
412   }
set_should_freeTexture413   void set_should_free(bool val) {
414     // buf must be null before SHOULD_FREE can be safely toggled. Otherwise, we
415     // might accidentally mistakenly realloc an externally allocated buffer as
416     // if it were an internally allocated one.
417     assert(!buf);
418     set_flag(SHOULD_FREE, val);
419   }
set_clearedTexture420   void set_cleared(bool val) { set_flag(CLEARED, val); }
421 
422   // Delayed-clearing state. When a clear of an FB is requested, we don't
423   // immediately clear each row, as the rows may be subsequently overwritten
424   // by draw calls, allowing us to skip the work of clearing the affected rows
425   // either fully or partially. Instead, we keep a bit vector of rows that need
426   // to be cleared later and save the value they need to be cleared with so
427   // that we can clear these rows individually when they are touched by draws.
428   // This currently only works for 2D textures, but not on texture arrays.
429   int delay_clear = 0;
430   uint32_t clear_val = 0;
431   uint32_t* cleared_rows = nullptr;
432 
433   void init_depth_runs(uint32_t z);
434   void fill_depth_runs(uint32_t z, const IntRect& scissor);
435 
enable_delayed_clearTexture436   void enable_delayed_clear(uint32_t val) {
437     delay_clear = height;
438     clear_val = val;
439     if (!cleared_rows) {
440       cleared_rows = new uint32_t[(height + 31) / 32];
441     }
442     memset(cleared_rows, 0, ((height + 31) / 32) * sizeof(uint32_t));
443     if (height & 31) {
444       cleared_rows[height / 32] = ~0U << (height & 31);
445     }
446   }
447 
disable_delayed_clearTexture448   void disable_delayed_clear() {
449     if (cleared_rows) {
450       delete[] cleared_rows;
451       cleared_rows = nullptr;
452       delay_clear = 0;
453     }
454   }
455 
bppTexture456   int bpp() const { return buf_bpp; }
set_bppTexture457   void set_bpp() { buf_bpp = bytes_for_internal_format(internal_format); }
458 
strideTexture459   size_t stride() const { return buf_stride; }
set_strideTexture460   void set_stride() { buf_stride = aligned_stride(buf_bpp * width); }
461 
462   // Set an external backing buffer of this texture.
set_bufferTexture463   void set_buffer(void* new_buf, size_t new_stride) {
464     assert(!should_free());
465     // Ensure that the supplied stride is at least as big as the row data and
466     // is aligned to the smaller of either the BPP or word-size. We need to at
467     // least be able to sample data from within a row and sample whole pixels
468     // of smaller formats without risking unaligned access.
469     set_bpp();
470     set_stride();
471     assert(new_stride >= size_t(bpp() * width) &&
472            new_stride % min(bpp(), sizeof(uint32_t)) == 0);
473 
474     buf = (char*)new_buf;
475     buf_size = 0;
476     buf_stride = new_stride;
477   }
478 
479   // Returns true if re-allocation succeeded, false otherwise...
allocateTexture480   bool allocate(bool force = false, int min_width = 0, int min_height = 0) {
481     assert(!locked);  // Locked textures shouldn't be reallocated
482     // If we get here, some GL API call that invalidates the texture was used.
483     // Mark the buffer as not-cleared to signal this.
484     set_cleared(false);
485     // Check if there is either no buffer currently or if we forced validation
486     // of the buffer size because some dimension might have changed.
487     if ((!buf || force) && should_free()) {
488       // Initialize the buffer's BPP and stride, since they may have changed.
489       set_bpp();
490       set_stride();
491       // Compute new size based on the maximum potential stride, rather than
492       // the current stride, to hopefully avoid reallocations when size would
493       // otherwise change too much...
494       size_t max_stride = max(buf_stride, aligned_stride(buf_bpp * min_width));
495       size_t size = max_stride * max(height, min_height);
496       if ((!buf && size > 0) || size > buf_size) {
497         // Allocate with a SIMD register-sized tail of padding at the end so we
498         // can safely read or write past the end of the texture with SIMD ops.
499         // Currently only the flat Z-buffer texture needs this padding due to
500         // full-register loads and stores in check_depth and discard_depth. In
501         // case some code in the future accidentally uses a linear filter on a
502         // texture with less than 2 pixels per row, we also add this padding
503         // just to be safe. All other texture types and use-cases should be
504         // safe to omit padding.
505         size_t padding =
506             internal_format == GL_DEPTH_COMPONENT24 || max(width, min_width) < 2
507                 ? sizeof(Float)
508                 : 0;
509         char* new_buf = (char*)realloc(buf, size + padding);
510         assert(new_buf);
511         if (new_buf) {
512           // Successfully reallocated the buffer, so go ahead and set it.
513           buf = new_buf;
514           buf_size = size;
515           return true;
516         }
517         // Allocation failed, so ensure we don't leave stale buffer state.
518         cleanup();
519         return false;
520       }
521     }
522     // Nothing changed...
523     return true;
524   }
525 
cleanupTexture526   void cleanup() {
527     assert(!locked);  // Locked textures shouldn't be destroyed
528     if (buf) {
529       // If we need to toggle SHOULD_FREE state, ensure that buf is nulled out,
530       // regardless of whether we internally allocated it. This will prevent us
531       // from wrongly treating buf as having been internally allocated for when
532       // we go to realloc if it actually was externally allocted.
533       if (should_free()) {
534         free(buf);
535       }
536       buf = nullptr;
537       buf_size = 0;
538       buf_bpp = 0;
539       buf_stride = 0;
540     }
541     disable_delayed_clear();
542   }
543 
~TextureTexture544   ~Texture() { cleanup(); }
545 
boundsTexture546   IntRect bounds() const { return IntRect{0, 0, width, height}; }
offset_boundsTexture547   IntRect offset_bounds() const { return bounds() + offset; }
548 
549   // Find the valid sampling bounds relative to the requested region
sample_boundsTexture550   IntRect sample_bounds(const IntRect& req, bool invertY = false) const {
551     IntRect bb = bounds().intersect(req) - req.origin();
552     if (invertY) bb.invert_y(req.height());
553     return bb;
554   }
555 
556   // Get a pointer for sampling at the given offset
sample_ptrTexture557   char* sample_ptr(int x, int y) const {
558     return buf + y * stride() + x * bpp();
559   }
560 
561   // Get a pointer for sampling the requested region and limit to the provided
562   // sampling bounds
sample_ptrTexture563   char* sample_ptr(const IntRect& req, const IntRect& bounds,
564                    bool invertY = false) const {
565     // Offset the sample pointer by the clamped bounds
566     int x = req.x0 + bounds.x0;
567     // Invert the Y offset if necessary
568     int y = invertY ? req.y1 - 1 - bounds.y0 : req.y0 + bounds.y0;
569     return sample_ptr(x, y);
570   }
571 };
572 
573 // The last vertex attribute is reserved as a null attribute in case a vertex
574 // attribute is used without being set.
575 #define MAX_ATTRIBS 17
576 #define NULL_ATTRIB 16
577 struct VertexArray {
578   VertexAttrib attribs[MAX_ATTRIBS];
579   int max_attrib = -1;
580   // The GL spec defines element array buffer binding to be part of VAO state.
581   GLuint element_array_buffer_binding = 0;
582 
583   void validate();
584 };
585 
586 struct Shader {
587   GLenum type = 0;
588   ProgramLoader loader = nullptr;
589 };
590 
591 struct Program {
592   ProgramImpl* impl = nullptr;
593   VertexShaderImpl* vert_impl = nullptr;
594   FragmentShaderImpl* frag_impl = nullptr;
595   bool deleted = false;
596 
~ProgramProgram597   ~Program() { delete impl; }
598 };
599 
600 // clang-format off
601 // Fully-expand GL defines while ignoring more than 4 suffixes
602 #define CONCAT_KEY(prefix, x, y, z, w, ...) prefix##x##y##z##w
603 // Generate a blend key enum symbol
604 #define BLEND_KEY(...) CONCAT_KEY(BLEND_, __VA_ARGS__, 0, 0, 0)
605 #define MASK_BLEND_KEY(...) CONCAT_KEY(MASK_BLEND_, __VA_ARGS__, 0, 0, 0)
606 #define AA_BLEND_KEY(...) CONCAT_KEY(AA_BLEND_, __VA_ARGS__, 0, 0, 0)
607 #define AA_MASK_BLEND_KEY(...) CONCAT_KEY(AA_MASK_BLEND_, __VA_ARGS__, 0, 0, 0)
608 
609 // Utility macro to easily generate similar code for all implemented blend modes
610 #define FOR_EACH_BLEND_KEY(macro)                                              \
611   macro(GL_ONE, GL_ZERO, 0, 0)                                                 \
612   macro(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA)  \
613   macro(GL_ONE, GL_ONE_MINUS_SRC_ALPHA, 0, 0)                                  \
614   macro(GL_ZERO, GL_ONE_MINUS_SRC_COLOR, 0, 0)                                 \
615   macro(GL_ZERO, GL_ONE_MINUS_SRC_COLOR, GL_ZERO, GL_ONE)                      \
616   macro(GL_ZERO, GL_ONE_MINUS_SRC_ALPHA, 0, 0)                                 \
617   macro(GL_ZERO, GL_SRC_COLOR, 0, 0)                                           \
618   macro(GL_ONE, GL_ONE, 0, 0)                                                  \
619   macro(GL_ONE, GL_ONE, GL_ONE, GL_ONE_MINUS_SRC_ALPHA)                        \
620   macro(GL_ONE_MINUS_DST_ALPHA, GL_ONE, GL_ZERO, GL_ONE)                       \
621   macro(GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_COLOR, 0, 0)                       \
622   macro(GL_ONE, GL_ONE_MINUS_SRC1_COLOR, 0, 0)                                 \
623   macro(GL_MIN, 0, 0, 0)                                                       \
624   macro(GL_MAX, 0, 0, 0)                                                       \
625   macro(GL_MULTIPLY_KHR, 0, 0, 0)                                              \
626   macro(GL_SCREEN_KHR, 0, 0, 0)                                                \
627   macro(GL_OVERLAY_KHR, 0, 0, 0)                                               \
628   macro(GL_DARKEN_KHR, 0, 0, 0)                                                \
629   macro(GL_LIGHTEN_KHR, 0, 0, 0)                                               \
630   macro(GL_COLORDODGE_KHR, 0, 0, 0)                                            \
631   macro(GL_COLORBURN_KHR, 0, 0, 0)                                             \
632   macro(GL_HARDLIGHT_KHR, 0, 0, 0)                                             \
633   macro(GL_SOFTLIGHT_KHR, 0, 0, 0)                                             \
634   macro(GL_DIFFERENCE_KHR, 0, 0, 0)                                            \
635   macro(GL_EXCLUSION_KHR, 0, 0, 0)                                             \
636   macro(GL_HSL_HUE_KHR, 0, 0, 0)                                               \
637   macro(GL_HSL_SATURATION_KHR, 0, 0, 0)                                        \
638   macro(GL_HSL_COLOR_KHR, 0, 0, 0)                                             \
639   macro(GL_HSL_LUMINOSITY_KHR, 0, 0, 0)                                        \
640   macro(SWGL_BLEND_DROP_SHADOW, 0, 0, 0)                                       \
641   macro(SWGL_BLEND_SUBPIXEL_TEXT, 0, 0, 0)
642 
643 #define DEFINE_BLEND_KEY(...) BLEND_KEY(__VA_ARGS__),
644 #define DEFINE_MASK_BLEND_KEY(...) MASK_BLEND_KEY(__VA_ARGS__),
645 #define DEFINE_AA_BLEND_KEY(...) AA_BLEND_KEY(__VA_ARGS__),
646 #define DEFINE_AA_MASK_BLEND_KEY(...) AA_MASK_BLEND_KEY(__VA_ARGS__),
647 enum BlendKey : uint8_t {
648   FOR_EACH_BLEND_KEY(DEFINE_BLEND_KEY)
649   FOR_EACH_BLEND_KEY(DEFINE_MASK_BLEND_KEY)
650   FOR_EACH_BLEND_KEY(DEFINE_AA_BLEND_KEY)
651   FOR_EACH_BLEND_KEY(DEFINE_AA_MASK_BLEND_KEY)
652   BLEND_KEY_NONE = BLEND_KEY(GL_ONE, GL_ZERO),
653   MASK_BLEND_KEY_NONE = MASK_BLEND_KEY(GL_ONE, GL_ZERO),
654   AA_BLEND_KEY_NONE = AA_BLEND_KEY(GL_ONE, GL_ZERO),
655   AA_MASK_BLEND_KEY_NONE = AA_MASK_BLEND_KEY(GL_ONE, GL_ZERO),
656 };
657 // clang-format on
658 
659 const size_t MAX_TEXTURE_UNITS = 16;
660 
661 template <typename T>
unlink(T & binding,T n)662 static inline bool unlink(T& binding, T n) {
663   if (binding == n) {
664     binding = 0;
665     return true;
666   }
667   return false;
668 }
669 
670 template <typename O>
671 struct ObjectStore {
672   O** objects = nullptr;
673   size_t size = 0;
674   // reserve object 0 as null
675   size_t first_free = 1;
676   O invalid;
677 
~ObjectStoreObjectStore678   ~ObjectStore() {
679     if (objects) {
680       for (size_t i = 0; i < size; i++) delete objects[i];
681       free(objects);
682     }
683   }
684 
growObjectStore685   bool grow(size_t i) {
686     size_t new_size = size ? size : 8;
687     while (new_size <= i) new_size += new_size / 2;
688     O** new_objects = (O**)realloc(objects, new_size * sizeof(O*));
689     assert(new_objects);
690     if (!new_objects) return false;
691     while (size < new_size) new_objects[size++] = nullptr;
692     objects = new_objects;
693     return true;
694   }
695 
insertObjectStore696   void insert(size_t i, const O& o) {
697     if (i >= size && !grow(i)) return;
698     if (!objects[i]) objects[i] = new O(o);
699   }
700 
next_freeObjectStore701   size_t next_free() {
702     size_t i = first_free;
703     while (i < size && objects[i]) i++;
704     first_free = i;
705     return i;
706   }
707 
insertObjectStore708   size_t insert(const O& o = O()) {
709     size_t i = next_free();
710     insert(i, o);
711     return i;
712   }
713 
operator []ObjectStore714   O& operator[](size_t i) {
715     insert(i, O());
716     return i < size ? *objects[i] : invalid;
717   }
718 
findObjectStore719   O* find(size_t i) const { return i < size ? objects[i] : nullptr; }
720 
721   template <typename T>
on_eraseObjectStore722   void on_erase(T*, ...) {}
723   template <typename T>
on_eraseObjectStore724   void on_erase(T* o, decltype(&T::on_erase)) {
725     o->on_erase();
726   }
727 
eraseObjectStore728   bool erase(size_t i) {
729     if (i < size && objects[i]) {
730       on_erase(objects[i], nullptr);
731       delete objects[i];
732       objects[i] = nullptr;
733       if (i < first_free) first_free = i;
734       return true;
735     }
736     return false;
737   }
738 
beginObjectStore739   O** begin() const { return objects; }
endObjectStore740   O** end() const { return &objects[size]; }
741 };
742 
743 struct Context {
744   int32_t references = 1;
745 
746   ObjectStore<Query> queries;
747   ObjectStore<Buffer> buffers;
748   ObjectStore<Texture> textures;
749   ObjectStore<VertexArray> vertex_arrays;
750   ObjectStore<Framebuffer> framebuffers;
751   ObjectStore<Renderbuffer> renderbuffers;
752   ObjectStore<Shader> shaders;
753   ObjectStore<Program> programs;
754 
755   GLenum last_error = GL_NO_ERROR;
756 
757   IntRect viewport = {0, 0, 0, 0};
758 
759   bool blend = false;
760   GLenum blendfunc_srgb = GL_ONE;
761   GLenum blendfunc_drgb = GL_ZERO;
762   GLenum blendfunc_sa = GL_ONE;
763   GLenum blendfunc_da = GL_ZERO;
764   GLenum blend_equation = GL_FUNC_ADD;
765   V8<uint16_t> blendcolor = 0;
766   BlendKey blend_key = BLEND_KEY_NONE;
767 
768   bool depthtest = false;
769   bool depthmask = true;
770   GLenum depthfunc = GL_LESS;
771 
772   bool scissortest = false;
773   IntRect scissor = {0, 0, 0, 0};
774 
775   GLfloat clearcolor[4] = {0, 0, 0, 0};
776   GLdouble cleardepth = 1;
777 
778   int unpack_row_length = 0;
779 
780   int shaded_rows = 0;
781   int shaded_pixels = 0;
782 
783   struct TextureUnit {
784     GLuint texture_2d_binding = 0;
785     GLuint texture_rectangle_binding = 0;
786 
unlinkContext::TextureUnit787     void unlink(GLuint n) {
788       ::unlink(texture_2d_binding, n);
789       ::unlink(texture_rectangle_binding, n);
790     }
791   };
792   TextureUnit texture_units[MAX_TEXTURE_UNITS];
793   int active_texture_unit = 0;
794 
795   GLuint current_program = 0;
796 
797   GLuint current_vertex_array = 0;
798   bool validate_vertex_array = true;
799 
800   GLuint pixel_pack_buffer_binding = 0;
801   GLuint pixel_unpack_buffer_binding = 0;
802   GLuint array_buffer_binding = 0;
803   GLuint time_elapsed_query = 0;
804   GLuint samples_passed_query = 0;
805   GLuint renderbuffer_binding = 0;
806   GLuint draw_framebuffer_binding = 0;
807   GLuint read_framebuffer_binding = 0;
808   GLuint unknown_binding = 0;
809 
get_bindingContext810   GLuint& get_binding(GLenum name) {
811     switch (name) {
812       case GL_PIXEL_PACK_BUFFER:
813         return pixel_pack_buffer_binding;
814       case GL_PIXEL_UNPACK_BUFFER:
815         return pixel_unpack_buffer_binding;
816       case GL_ARRAY_BUFFER:
817         return array_buffer_binding;
818       case GL_ELEMENT_ARRAY_BUFFER:
819         return vertex_arrays[current_vertex_array].element_array_buffer_binding;
820       case GL_TEXTURE_2D:
821         return texture_units[active_texture_unit].texture_2d_binding;
822       case GL_TEXTURE_RECTANGLE:
823         return texture_units[active_texture_unit].texture_rectangle_binding;
824       case GL_TIME_ELAPSED:
825         return time_elapsed_query;
826       case GL_SAMPLES_PASSED:
827         return samples_passed_query;
828       case GL_RENDERBUFFER:
829         return renderbuffer_binding;
830       case GL_DRAW_FRAMEBUFFER:
831         return draw_framebuffer_binding;
832       case GL_READ_FRAMEBUFFER:
833         return read_framebuffer_binding;
834       default:
835         debugf("unknown binding %x\n", name);
836         assert(false);
837         return unknown_binding;
838     }
839   }
840 
get_textureContext841   Texture& get_texture(sampler2D, int unit) {
842     return textures[texture_units[unit].texture_2d_binding];
843   }
844 
get_textureContext845   Texture& get_texture(isampler2D, int unit) {
846     return textures[texture_units[unit].texture_2d_binding];
847   }
848 
get_textureContext849   Texture& get_texture(sampler2DRect, int unit) {
850     return textures[texture_units[unit].texture_rectangle_binding];
851   }
852 
apply_scissorContext853   IntRect apply_scissor(IntRect bb,
854                         const IntPoint& origin = IntPoint(0, 0)) const {
855     return scissortest ? bb.intersect(scissor - origin) : bb;
856   }
857 
apply_scissorContext858   IntRect apply_scissor(const Texture& t) const {
859     return apply_scissor(t.bounds(), t.offset);
860   }
861 };
862 static Context* ctx = nullptr;
863 static VertexShaderImpl* vertex_shader = nullptr;
864 static FragmentShaderImpl* fragment_shader = nullptr;
865 static BlendKey blend_key = BLEND_KEY_NONE;
866 
867 static void prepare_texture(Texture& t, const IntRect* skip = nullptr);
868 
869 template <typename S>
init_filter(S * s,Texture & t)870 static inline void init_filter(S* s, Texture& t) {
871   // If the width is not at least 2 pixels, then we can't safely sample the end
872   // of the row with a linear filter. In that case, just punt to using nearest
873   // filtering instead.
874   s->filter = t.width >= 2 ? gl_filter_to_texture_filter(t.mag_filter)
875                            : TextureFilter::NEAREST;
876 }
877 
878 template <typename S>
init_sampler(S * s,Texture & t)879 static inline void init_sampler(S* s, Texture& t) {
880   prepare_texture(t);
881   s->width = t.width;
882   s->height = t.height;
883   s->stride = t.stride();
884   int bpp = t.bpp();
885   if (bpp >= 4)
886     s->stride /= 4;
887   else if (bpp == 2)
888     s->stride /= 2;
889   else
890     assert(bpp == 1);
891   // Use uint32_t* for easier sampling, but need to cast to uint8_t* or
892   // uint16_t* for formats with bpp < 4.
893   s->buf = (uint32_t*)t.buf;
894   s->format = gl_format_to_texture_format(t.internal_format);
895 }
896 
897 template <typename S>
null_sampler(S * s)898 static inline void null_sampler(S* s) {
899   // For null texture data, just make the sampler provide a 1x1 buffer that is
900   // transparent black. Ensure buffer holds at least a SIMD vector of zero data
901   // for SIMD padding of unaligned loads.
902   static const uint32_t zeroBuf[sizeof(Float) / sizeof(uint32_t)] = {0};
903   s->width = 1;
904   s->height = 1;
905   s->stride = s->width;
906   s->buf = (uint32_t*)zeroBuf;
907   s->format = TextureFormat::RGBA8;
908 }
909 
910 template <typename S>
null_filter(S * s)911 static inline void null_filter(S* s) {
912   s->filter = TextureFilter::NEAREST;
913 }
914 
915 template <typename S>
lookup_sampler(S * s,int texture)916 S* lookup_sampler(S* s, int texture) {
917   Texture& t = ctx->get_texture(s, texture);
918   if (!t.buf) {
919     null_sampler(s);
920     null_filter(s);
921   } else {
922     init_sampler(s, t);
923     init_filter(s, t);
924   }
925   return s;
926 }
927 
928 template <typename S>
lookup_isampler(S * s,int texture)929 S* lookup_isampler(S* s, int texture) {
930   Texture& t = ctx->get_texture(s, texture);
931   if (!t.buf) {
932     null_sampler(s);
933   } else {
934     init_sampler(s, t);
935   }
936   return s;
937 }
938 
bytes_per_type(GLenum type)939 int bytes_per_type(GLenum type) {
940   switch (type) {
941     case GL_INT:
942       return 4;
943     case GL_FLOAT:
944       return 4;
945     case GL_UNSIGNED_SHORT:
946       return 2;
947     case GL_UNSIGNED_BYTE:
948       return 1;
949     default:
950       assert(0);
951       return 0;
952   }
953 }
954 
955 template <typename S, typename C>
expand_attrib(const char * buf,size_t size,bool normalized)956 static inline S expand_attrib(const char* buf, size_t size, bool normalized) {
957   typedef typename ElementType<S>::ty elem_type;
958   S scalar = {0};
959   const C* src = reinterpret_cast<const C*>(buf);
960   if (normalized) {
961     const float scale = 1.0f / ((1 << (8 * sizeof(C))) - 1);
962     for (size_t i = 0; i < size / sizeof(C); i++) {
963       put_nth_component(scalar, i, elem_type(src[i]) * scale);
964     }
965   } else {
966     for (size_t i = 0; i < size / sizeof(C); i++) {
967       put_nth_component(scalar, i, elem_type(src[i]));
968     }
969   }
970   return scalar;
971 }
972 
973 template <typename S>
load_attrib_scalar(VertexAttrib & va,const char * src)974 static inline S load_attrib_scalar(VertexAttrib& va, const char* src) {
975   if (sizeof(S) <= va.size) {
976     return *reinterpret_cast<const S*>(src);
977   }
978   if (va.type == GL_UNSIGNED_SHORT) {
979     return expand_attrib<S, uint16_t>(src, va.size, va.normalized);
980   }
981   if (va.type == GL_UNSIGNED_BYTE) {
982     return expand_attrib<S, uint8_t>(src, va.size, va.normalized);
983   }
984   assert(sizeof(typename ElementType<S>::ty) == bytes_per_type(va.type));
985   S scalar = {0};
986   memcpy(&scalar, src, va.size);
987   return scalar;
988 }
989 
990 template <typename T>
load_attrib(T & attrib,VertexAttrib & va,uint32_t start,int instance,int count)991 void load_attrib(T& attrib, VertexAttrib& va, uint32_t start, int instance,
992                  int count) {
993   typedef decltype(force_scalar(attrib)) scalar_type;
994   // If no buffer is available, just use a zero default.
995   if (!va.buf_size) {
996     attrib = T(scalar_type{0});
997   } else if (va.divisor != 0) {
998     char* src = (char*)va.buf + va.stride * instance + va.offset;
999     assert(src + va.size <= va.buf + va.buf_size);
1000     attrib = T(load_attrib_scalar<scalar_type>(va, src));
1001   } else {
1002     // Specialized for WR's primitive vertex order/winding.
1003     if (!count) return;
1004     assert(count >= 2 && count <= 4);
1005     char* src = (char*)va.buf + va.stride * start + va.offset;
1006     switch (count) {
1007       case 2: {
1008         // Lines must be indexed at offsets 0, 1.
1009         // Line vertexes fill vertex shader SIMD lanes as 0, 1, 1, 0.
1010         scalar_type lanes[2] = {
1011             load_attrib_scalar<scalar_type>(va, src),
1012             load_attrib_scalar<scalar_type>(va, src + va.stride)};
1013         attrib = (T){lanes[0], lanes[1], lanes[1], lanes[0]};
1014         break;
1015       }
1016       case 3: {
1017         // Triangles must be indexed at offsets 0, 1, 2.
1018         // Triangle vertexes fill vertex shader SIMD lanes as 0, 1, 2, 2.
1019         scalar_type lanes[3] = {
1020             load_attrib_scalar<scalar_type>(va, src),
1021             load_attrib_scalar<scalar_type>(va, src + va.stride),
1022             load_attrib_scalar<scalar_type>(va, src + va.stride * 2)};
1023         attrib = (T){lanes[0], lanes[1], lanes[2], lanes[2]};
1024         break;
1025       }
1026       default:
1027         // Quads must be successive triangles indexed at offsets 0, 1, 2, 2,
1028         // 1, 3. Quad vertexes fill vertex shader SIMD lanes as 0, 1, 3, 2, so
1029         // that the points form a convex path that can be traversed by the
1030         // rasterizer.
1031         attrib = (T){load_attrib_scalar<scalar_type>(va, src),
1032                      load_attrib_scalar<scalar_type>(va, src + va.stride),
1033                      load_attrib_scalar<scalar_type>(va, src + va.stride * 3),
1034                      load_attrib_scalar<scalar_type>(va, src + va.stride * 2)};
1035         break;
1036     }
1037   }
1038 }
1039 
1040 template <typename T>
load_flat_attrib(T & attrib,VertexAttrib & va,uint32_t start,int instance,int count)1041 void load_flat_attrib(T& attrib, VertexAttrib& va, uint32_t start, int instance,
1042                       int count) {
1043   typedef decltype(force_scalar(attrib)) scalar_type;
1044   // If no buffer is available, just use a zero default.
1045   if (!va.buf_size) {
1046     attrib = T{0};
1047     return;
1048   }
1049   char* src = nullptr;
1050   if (va.divisor != 0) {
1051     src = (char*)va.buf + va.stride * instance + va.offset;
1052   } else {
1053     if (!count) return;
1054     src = (char*)va.buf + va.stride * start + va.offset;
1055   }
1056   assert(src + va.size <= va.buf + va.buf_size);
1057   attrib = T(load_attrib_scalar<scalar_type>(va, src));
1058 }
1059 
setup_program(GLuint program)1060 void setup_program(GLuint program) {
1061   if (!program) {
1062     vertex_shader = nullptr;
1063     fragment_shader = nullptr;
1064     return;
1065   }
1066   Program& p = ctx->programs[program];
1067   assert(p.impl);
1068   assert(p.vert_impl);
1069   assert(p.frag_impl);
1070   vertex_shader = p.vert_impl;
1071   fragment_shader = p.frag_impl;
1072 }
1073 
1074 extern ProgramLoader load_shader(const char* name);
1075 
1076 extern "C" {
1077 
UseProgram(GLuint program)1078 void UseProgram(GLuint program) {
1079   if (ctx->current_program && program != ctx->current_program) {
1080     auto* p = ctx->programs.find(ctx->current_program);
1081     if (p && p->deleted) {
1082       ctx->programs.erase(ctx->current_program);
1083     }
1084   }
1085   ctx->current_program = program;
1086   setup_program(program);
1087 }
1088 
SetViewport(GLint x,GLint y,GLsizei width,GLsizei height)1089 void SetViewport(GLint x, GLint y, GLsizei width, GLsizei height) {
1090   ctx->viewport = IntRect{x, y, x + width, y + height};
1091 }
1092 
Enable(GLenum cap)1093 void Enable(GLenum cap) {
1094   switch (cap) {
1095     case GL_BLEND:
1096       ctx->blend = true;
1097       break;
1098     case GL_DEPTH_TEST:
1099       ctx->depthtest = true;
1100       break;
1101     case GL_SCISSOR_TEST:
1102       ctx->scissortest = true;
1103       break;
1104   }
1105 }
1106 
Disable(GLenum cap)1107 void Disable(GLenum cap) {
1108   switch (cap) {
1109     case GL_BLEND:
1110       ctx->blend = false;
1111       break;
1112     case GL_DEPTH_TEST:
1113       ctx->depthtest = false;
1114       break;
1115     case GL_SCISSOR_TEST:
1116       ctx->scissortest = false;
1117       break;
1118   }
1119 }
1120 
1121 // Report the last error generated and clear the error status.
GetError()1122 GLenum GetError() {
1123   GLenum error = ctx->last_error;
1124   ctx->last_error = GL_NO_ERROR;
1125   return error;
1126 }
1127 
1128 // Sets the error status to out-of-memory to indicate that a buffer
1129 // or texture re-allocation failed.
out_of_memory()1130 static void out_of_memory() { ctx->last_error = GL_OUT_OF_MEMORY; }
1131 
1132 static const char* const extensions[] = {
1133     "GL_ARB_blend_func_extended",
1134     "GL_ARB_clear_texture",
1135     "GL_ARB_copy_image",
1136     "GL_ARB_draw_instanced",
1137     "GL_ARB_explicit_attrib_location",
1138     "GL_ARB_instanced_arrays",
1139     "GL_ARB_invalidate_subdata",
1140     "GL_ARB_texture_storage",
1141     "GL_EXT_timer_query",
1142     "GL_KHR_blend_equation_advanced",
1143     "GL_KHR_blend_equation_advanced_coherent",
1144     "GL_APPLE_rgb_422",
1145 };
1146 
GetIntegerv(GLenum pname,GLint * params)1147 void GetIntegerv(GLenum pname, GLint* params) {
1148   assert(params);
1149   switch (pname) {
1150     case GL_MAX_TEXTURE_UNITS:
1151     case GL_MAX_TEXTURE_IMAGE_UNITS:
1152       params[0] = MAX_TEXTURE_UNITS;
1153       break;
1154     case GL_MAX_TEXTURE_SIZE:
1155       params[0] = 1 << 15;
1156       break;
1157     case GL_MAX_ARRAY_TEXTURE_LAYERS:
1158       params[0] = 0;
1159       break;
1160     case GL_READ_FRAMEBUFFER_BINDING:
1161       params[0] = ctx->read_framebuffer_binding;
1162       break;
1163     case GL_DRAW_FRAMEBUFFER_BINDING:
1164       params[0] = ctx->draw_framebuffer_binding;
1165       break;
1166     case GL_PIXEL_PACK_BUFFER_BINDING:
1167       params[0] = ctx->pixel_pack_buffer_binding;
1168       break;
1169     case GL_PIXEL_UNPACK_BUFFER_BINDING:
1170       params[0] = ctx->pixel_unpack_buffer_binding;
1171       break;
1172     case GL_NUM_EXTENSIONS:
1173       params[0] = sizeof(extensions) / sizeof(extensions[0]);
1174       break;
1175     case GL_MAJOR_VERSION:
1176       params[0] = 3;
1177       break;
1178     case GL_MINOR_VERSION:
1179       params[0] = 2;
1180       break;
1181     case GL_MIN_PROGRAM_TEXEL_OFFSET:
1182       params[0] = 0;
1183       break;
1184     case GL_MAX_PROGRAM_TEXEL_OFFSET:
1185       params[0] = MAX_TEXEL_OFFSET;
1186       break;
1187     default:
1188       debugf("unhandled glGetIntegerv parameter %x\n", pname);
1189       assert(false);
1190   }
1191 }
1192 
GetBooleanv(GLenum pname,GLboolean * params)1193 void GetBooleanv(GLenum pname, GLboolean* params) {
1194   assert(params);
1195   switch (pname) {
1196     case GL_DEPTH_WRITEMASK:
1197       params[0] = ctx->depthmask;
1198       break;
1199     default:
1200       debugf("unhandled glGetBooleanv parameter %x\n", pname);
1201       assert(false);
1202   }
1203 }
1204 
GetString(GLenum name)1205 const char* GetString(GLenum name) {
1206   switch (name) {
1207     case GL_VENDOR:
1208       return "Mozilla Gfx";
1209     case GL_RENDERER:
1210       return "Software WebRender";
1211     case GL_VERSION:
1212       return "3.2";
1213     case GL_SHADING_LANGUAGE_VERSION:
1214       return "1.50";
1215     default:
1216       debugf("unhandled glGetString parameter %x\n", name);
1217       assert(false);
1218       return nullptr;
1219   }
1220 }
1221 
GetStringi(GLenum name,GLuint index)1222 const char* GetStringi(GLenum name, GLuint index) {
1223   switch (name) {
1224     case GL_EXTENSIONS:
1225       if (index >= sizeof(extensions) / sizeof(extensions[0])) {
1226         return nullptr;
1227       }
1228       return extensions[index];
1229     default:
1230       debugf("unhandled glGetStringi parameter %x\n", name);
1231       assert(false);
1232       return nullptr;
1233   }
1234 }
1235 
remap_blendfunc(GLenum rgb,GLenum a)1236 GLenum remap_blendfunc(GLenum rgb, GLenum a) {
1237   switch (a) {
1238     case GL_SRC_ALPHA:
1239       if (rgb == GL_SRC_COLOR) a = GL_SRC_COLOR;
1240       break;
1241     case GL_ONE_MINUS_SRC_ALPHA:
1242       if (rgb == GL_ONE_MINUS_SRC_COLOR) a = GL_ONE_MINUS_SRC_COLOR;
1243       break;
1244     case GL_DST_ALPHA:
1245       if (rgb == GL_DST_COLOR) a = GL_DST_COLOR;
1246       break;
1247     case GL_ONE_MINUS_DST_ALPHA:
1248       if (rgb == GL_ONE_MINUS_DST_COLOR) a = GL_ONE_MINUS_DST_COLOR;
1249       break;
1250     case GL_CONSTANT_ALPHA:
1251       if (rgb == GL_CONSTANT_COLOR) a = GL_CONSTANT_COLOR;
1252       break;
1253     case GL_ONE_MINUS_CONSTANT_ALPHA:
1254       if (rgb == GL_ONE_MINUS_CONSTANT_COLOR) a = GL_ONE_MINUS_CONSTANT_COLOR;
1255       break;
1256     case GL_SRC_COLOR:
1257       if (rgb == GL_SRC_ALPHA) a = GL_SRC_ALPHA;
1258       break;
1259     case GL_ONE_MINUS_SRC_COLOR:
1260       if (rgb == GL_ONE_MINUS_SRC_ALPHA) a = GL_ONE_MINUS_SRC_ALPHA;
1261       break;
1262     case GL_DST_COLOR:
1263       if (rgb == GL_DST_ALPHA) a = GL_DST_ALPHA;
1264       break;
1265     case GL_ONE_MINUS_DST_COLOR:
1266       if (rgb == GL_ONE_MINUS_DST_ALPHA) a = GL_ONE_MINUS_DST_ALPHA;
1267       break;
1268     case GL_CONSTANT_COLOR:
1269       if (rgb == GL_CONSTANT_ALPHA) a = GL_CONSTANT_ALPHA;
1270       break;
1271     case GL_ONE_MINUS_CONSTANT_COLOR:
1272       if (rgb == GL_ONE_MINUS_CONSTANT_ALPHA) a = GL_ONE_MINUS_CONSTANT_ALPHA;
1273       break;
1274     case GL_SRC1_ALPHA:
1275       if (rgb == GL_SRC1_COLOR) a = GL_SRC1_COLOR;
1276       break;
1277     case GL_ONE_MINUS_SRC1_ALPHA:
1278       if (rgb == GL_ONE_MINUS_SRC1_COLOR) a = GL_ONE_MINUS_SRC1_COLOR;
1279       break;
1280     case GL_SRC1_COLOR:
1281       if (rgb == GL_SRC1_ALPHA) a = GL_SRC1_ALPHA;
1282       break;
1283     case GL_ONE_MINUS_SRC1_COLOR:
1284       if (rgb == GL_ONE_MINUS_SRC1_ALPHA) a = GL_ONE_MINUS_SRC1_ALPHA;
1285       break;
1286   }
1287   return a;
1288 }
1289 
1290 // Generate a hashed blend key based on blend func and equation state. This
1291 // allows all the blend state to be processed down to a blend key that can be
1292 // dealt with inside a single switch statement.
hash_blend_key()1293 static void hash_blend_key() {
1294   GLenum srgb = ctx->blendfunc_srgb;
1295   GLenum drgb = ctx->blendfunc_drgb;
1296   GLenum sa = ctx->blendfunc_sa;
1297   GLenum da = ctx->blendfunc_da;
1298   GLenum equation = ctx->blend_equation;
1299 #define HASH_BLEND_KEY(x, y, z, w) ((x << 4) | (y) | (z << 24) | (w << 20))
1300   // Basic non-separate blend funcs used the two argument form
1301   int hash = HASH_BLEND_KEY(srgb, drgb, 0, 0);
1302   // Separate alpha blend funcs use the 4 argument hash
1303   if (srgb != sa || drgb != da) hash |= HASH_BLEND_KEY(0, 0, sa, da);
1304   // Any other blend equation than the default func_add ignores the func and
1305   // instead generates a one-argument hash based on the equation
1306   if (equation != GL_FUNC_ADD) hash = HASH_BLEND_KEY(equation, 0, 0, 0);
1307   switch (hash) {
1308 #define MAP_BLEND_KEY(...)                   \
1309   case HASH_BLEND_KEY(__VA_ARGS__):          \
1310     ctx->blend_key = BLEND_KEY(__VA_ARGS__); \
1311     break;
1312     FOR_EACH_BLEND_KEY(MAP_BLEND_KEY)
1313     default:
1314       debugf("blendfunc: %x, %x, separate: %x, %x, equation: %x\n", srgb, drgb,
1315              sa, da, equation);
1316       assert(false);
1317       break;
1318   }
1319 }
1320 
BlendFunc(GLenum srgb,GLenum drgb,GLenum sa,GLenum da)1321 void BlendFunc(GLenum srgb, GLenum drgb, GLenum sa, GLenum da) {
1322   ctx->blendfunc_srgb = srgb;
1323   ctx->blendfunc_drgb = drgb;
1324   sa = remap_blendfunc(srgb, sa);
1325   da = remap_blendfunc(drgb, da);
1326   ctx->blendfunc_sa = sa;
1327   ctx->blendfunc_da = da;
1328 
1329   hash_blend_key();
1330 }
1331 
BlendColor(GLfloat r,GLfloat g,GLfloat b,GLfloat a)1332 void BlendColor(GLfloat r, GLfloat g, GLfloat b, GLfloat a) {
1333   I32 c = round_pixel((Float){b, g, r, a});
1334   ctx->blendcolor = CONVERT(c, U16).xyzwxyzw;
1335 }
1336 
BlendEquation(GLenum mode)1337 void BlendEquation(GLenum mode) {
1338   assert(mode == GL_FUNC_ADD || mode == GL_MIN || mode == GL_MAX ||
1339          (mode >= GL_MULTIPLY_KHR && mode <= GL_HSL_LUMINOSITY_KHR));
1340   if (mode != ctx->blend_equation) {
1341     ctx->blend_equation = mode;
1342     hash_blend_key();
1343   }
1344 }
1345 
DepthMask(GLboolean flag)1346 void DepthMask(GLboolean flag) { ctx->depthmask = flag; }
1347 
DepthFunc(GLenum func)1348 void DepthFunc(GLenum func) {
1349   switch (func) {
1350     case GL_LESS:
1351     case GL_LEQUAL:
1352       break;
1353     default:
1354       assert(false);
1355   }
1356   ctx->depthfunc = func;
1357 }
1358 
SetScissor(GLint x,GLint y,GLsizei width,GLsizei height)1359 void SetScissor(GLint x, GLint y, GLsizei width, GLsizei height) {
1360   ctx->scissor = IntRect{x, y, x + width, y + height};
1361 }
1362 
ClearColor(GLfloat r,GLfloat g,GLfloat b,GLfloat a)1363 void ClearColor(GLfloat r, GLfloat g, GLfloat b, GLfloat a) {
1364   ctx->clearcolor[0] = r;
1365   ctx->clearcolor[1] = g;
1366   ctx->clearcolor[2] = b;
1367   ctx->clearcolor[3] = a;
1368 }
1369 
ClearDepth(GLdouble depth)1370 void ClearDepth(GLdouble depth) { ctx->cleardepth = depth; }
1371 
ActiveTexture(GLenum texture)1372 void ActiveTexture(GLenum texture) {
1373   assert(texture >= GL_TEXTURE0);
1374   assert(texture < GL_TEXTURE0 + MAX_TEXTURE_UNITS);
1375   ctx->active_texture_unit =
1376       clamp(int(texture - GL_TEXTURE0), 0, int(MAX_TEXTURE_UNITS - 1));
1377 }
1378 
GenQueries(GLsizei n,GLuint * result)1379 void GenQueries(GLsizei n, GLuint* result) {
1380   for (int i = 0; i < n; i++) {
1381     Query q;
1382     result[i] = ctx->queries.insert(q);
1383   }
1384 }
1385 
DeleteQuery(GLuint n)1386 void DeleteQuery(GLuint n) {
1387   if (n && ctx->queries.erase(n)) {
1388     unlink(ctx->time_elapsed_query, n);
1389     unlink(ctx->samples_passed_query, n);
1390   }
1391 }
1392 
GenBuffers(int n,GLuint * result)1393 void GenBuffers(int n, GLuint* result) {
1394   for (int i = 0; i < n; i++) {
1395     Buffer b;
1396     result[i] = ctx->buffers.insert(b);
1397   }
1398 }
1399 
DeleteBuffer(GLuint n)1400 void DeleteBuffer(GLuint n) {
1401   if (n && ctx->buffers.erase(n)) {
1402     unlink(ctx->pixel_pack_buffer_binding, n);
1403     unlink(ctx->pixel_unpack_buffer_binding, n);
1404     unlink(ctx->array_buffer_binding, n);
1405   }
1406 }
1407 
GenVertexArrays(int n,GLuint * result)1408 void GenVertexArrays(int n, GLuint* result) {
1409   for (int i = 0; i < n; i++) {
1410     VertexArray v;
1411     result[i] = ctx->vertex_arrays.insert(v);
1412   }
1413 }
1414 
DeleteVertexArray(GLuint n)1415 void DeleteVertexArray(GLuint n) {
1416   if (n && ctx->vertex_arrays.erase(n)) {
1417     unlink(ctx->current_vertex_array, n);
1418   }
1419 }
1420 
CreateShader(GLenum type)1421 GLuint CreateShader(GLenum type) {
1422   Shader s;
1423   s.type = type;
1424   return ctx->shaders.insert(s);
1425 }
1426 
ShaderSourceByName(GLuint shader,char * name)1427 void ShaderSourceByName(GLuint shader, char* name) {
1428   Shader& s = ctx->shaders[shader];
1429   s.loader = load_shader(name);
1430   if (!s.loader) {
1431     debugf("unknown shader %s\n", name);
1432   }
1433 }
1434 
AttachShader(GLuint program,GLuint shader)1435 void AttachShader(GLuint program, GLuint shader) {
1436   Program& p = ctx->programs[program];
1437   Shader& s = ctx->shaders[shader];
1438   if (s.type == GL_VERTEX_SHADER) {
1439     if (!p.impl && s.loader) p.impl = s.loader();
1440   } else if (s.type == GL_FRAGMENT_SHADER) {
1441     if (!p.impl && s.loader) p.impl = s.loader();
1442   } else {
1443     assert(0);
1444   }
1445 }
1446 
DeleteShader(GLuint n)1447 void DeleteShader(GLuint n) {
1448   if (n) ctx->shaders.erase(n);
1449 }
1450 
CreateProgram()1451 GLuint CreateProgram() {
1452   Program p;
1453   return ctx->programs.insert(p);
1454 }
1455 
DeleteProgram(GLuint n)1456 void DeleteProgram(GLuint n) {
1457   if (!n) return;
1458   if (ctx->current_program == n) {
1459     if (auto* p = ctx->programs.find(n)) {
1460       p->deleted = true;
1461     }
1462   } else {
1463     ctx->programs.erase(n);
1464   }
1465 }
1466 
LinkProgram(GLuint program)1467 void LinkProgram(GLuint program) {
1468   Program& p = ctx->programs[program];
1469   assert(p.impl);
1470   if (!p.impl) {
1471     return;
1472   }
1473   assert(p.impl->interpolants_size() <= sizeof(Interpolants));
1474   if (!p.vert_impl) p.vert_impl = p.impl->get_vertex_shader();
1475   if (!p.frag_impl) p.frag_impl = p.impl->get_fragment_shader();
1476 }
1477 
GetLinkStatus(GLuint program)1478 GLint GetLinkStatus(GLuint program) {
1479   if (auto* p = ctx->programs.find(program)) {
1480     return p->impl ? 1 : 0;
1481   }
1482   return 0;
1483 }
1484 
BindAttribLocation(GLuint program,GLuint index,char * name)1485 void BindAttribLocation(GLuint program, GLuint index, char* name) {
1486   Program& p = ctx->programs[program];
1487   assert(p.impl);
1488   if (!p.impl) {
1489     return;
1490   }
1491   p.impl->bind_attrib(name, index);
1492 }
1493 
GetAttribLocation(GLuint program,char * name)1494 GLint GetAttribLocation(GLuint program, char* name) {
1495   Program& p = ctx->programs[program];
1496   assert(p.impl);
1497   if (!p.impl) {
1498     return -1;
1499   }
1500   return p.impl->get_attrib(name);
1501 }
1502 
GetUniformLocation(GLuint program,char * name)1503 GLint GetUniformLocation(GLuint program, char* name) {
1504   Program& p = ctx->programs[program];
1505   assert(p.impl);
1506   if (!p.impl) {
1507     return -1;
1508   }
1509   GLint loc = p.impl->get_uniform(name);
1510   // debugf("location: %d\n", loc);
1511   return loc;
1512 }
1513 
get_time_value()1514 static uint64_t get_time_value() {
1515 #ifdef __MACH__
1516   return mach_absolute_time();
1517 #elif defined(_WIN32)
1518   LARGE_INTEGER time;
1519   static bool have_frequency = false;
1520   static LARGE_INTEGER frequency;
1521   if (!have_frequency) {
1522     QueryPerformanceFrequency(&frequency);
1523     have_frequency = true;
1524   }
1525   QueryPerformanceCounter(&time);
1526   return time.QuadPart * 1000000000ULL / frequency.QuadPart;
1527 #else
1528   return ({
1529     struct timespec tp;
1530     clock_gettime(CLOCK_MONOTONIC, &tp);
1531     tp.tv_sec * 1000000000ULL + tp.tv_nsec;
1532   });
1533 #endif
1534 }
1535 
BeginQuery(GLenum target,GLuint id)1536 void BeginQuery(GLenum target, GLuint id) {
1537   ctx->get_binding(target) = id;
1538   Query& q = ctx->queries[id];
1539   switch (target) {
1540     case GL_SAMPLES_PASSED:
1541       q.value = 0;
1542       break;
1543     case GL_TIME_ELAPSED:
1544       q.value = get_time_value();
1545       break;
1546     default:
1547       debugf("unknown query target %x for query %d\n", target, id);
1548       assert(false);
1549   }
1550 }
1551 
EndQuery(GLenum target)1552 void EndQuery(GLenum target) {
1553   Query& q = ctx->queries[ctx->get_binding(target)];
1554   switch (target) {
1555     case GL_SAMPLES_PASSED:
1556       break;
1557     case GL_TIME_ELAPSED:
1558       q.value = get_time_value() - q.value;
1559       break;
1560     default:
1561       debugf("unknown query target %x\n", target);
1562       assert(false);
1563   }
1564   ctx->get_binding(target) = 0;
1565 }
1566 
GetQueryObjectui64v(GLuint id,GLenum pname,GLuint64 * params)1567 void GetQueryObjectui64v(GLuint id, GLenum pname, GLuint64* params) {
1568   Query& q = ctx->queries[id];
1569   switch (pname) {
1570     case GL_QUERY_RESULT:
1571       assert(params);
1572       params[0] = q.value;
1573       break;
1574     default:
1575       assert(false);
1576   }
1577 }
1578 
BindVertexArray(GLuint vertex_array)1579 void BindVertexArray(GLuint vertex_array) {
1580   if (vertex_array != ctx->current_vertex_array) {
1581     ctx->validate_vertex_array = true;
1582   }
1583   ctx->current_vertex_array = vertex_array;
1584 }
1585 
BindTexture(GLenum target,GLuint texture)1586 void BindTexture(GLenum target, GLuint texture) {
1587   ctx->get_binding(target) = texture;
1588 }
1589 
BindBuffer(GLenum target,GLuint buffer)1590 void BindBuffer(GLenum target, GLuint buffer) {
1591   ctx->get_binding(target) = buffer;
1592 }
1593 
BindFramebuffer(GLenum target,GLuint fb)1594 void BindFramebuffer(GLenum target, GLuint fb) {
1595   if (target == GL_FRAMEBUFFER) {
1596     ctx->read_framebuffer_binding = fb;
1597     ctx->draw_framebuffer_binding = fb;
1598   } else {
1599     assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER);
1600     ctx->get_binding(target) = fb;
1601   }
1602 }
1603 
BindRenderbuffer(GLenum target,GLuint rb)1604 void BindRenderbuffer(GLenum target, GLuint rb) {
1605   ctx->get_binding(target) = rb;
1606 }
1607 
PixelStorei(GLenum name,GLint param)1608 void PixelStorei(GLenum name, GLint param) {
1609   if (name == GL_UNPACK_ALIGNMENT) {
1610     assert(param == 1);
1611   } else if (name == GL_UNPACK_ROW_LENGTH) {
1612     ctx->unpack_row_length = param;
1613   }
1614 }
1615 
remap_internal_format(GLenum format)1616 static GLenum remap_internal_format(GLenum format) {
1617   switch (format) {
1618     case GL_DEPTH_COMPONENT:
1619       return GL_DEPTH_COMPONENT24;
1620     case GL_RGBA:
1621       return GL_RGBA8;
1622     case GL_RED:
1623       return GL_R8;
1624     case GL_RG:
1625       return GL_RG8;
1626     case GL_RGB_422_APPLE:
1627       return GL_RGB_RAW_422_APPLE;
1628     default:
1629       return format;
1630   }
1631 }
1632 
1633 }  // extern "C"
1634 
format_requires_conversion(GLenum external_format,GLenum internal_format)1635 static bool format_requires_conversion(GLenum external_format,
1636                                        GLenum internal_format) {
1637   switch (external_format) {
1638     case GL_RGBA:
1639       return internal_format == GL_RGBA8;
1640     default:
1641       return false;
1642   }
1643 }
1644 
copy_bgra8_to_rgba8(uint32_t * dest,const uint32_t * src,int width)1645 static inline void copy_bgra8_to_rgba8(uint32_t* dest, const uint32_t* src,
1646                                        int width) {
1647   for (; width >= 4; width -= 4, dest += 4, src += 4) {
1648     U32 p = unaligned_load<U32>(src);
1649     U32 rb = p & 0x00FF00FF;
1650     unaligned_store(dest, (p & 0xFF00FF00) | (rb << 16) | (rb >> 16));
1651   }
1652   for (; width > 0; width--, dest++, src++) {
1653     uint32_t p = *src;
1654     uint32_t rb = p & 0x00FF00FF;
1655     *dest = (p & 0xFF00FF00) | (rb << 16) | (rb >> 16);
1656   }
1657 }
1658 
convert_copy(GLenum external_format,GLenum internal_format,uint8_t * dst_buf,size_t dst_stride,const uint8_t * src_buf,size_t src_stride,size_t width,size_t height)1659 static void convert_copy(GLenum external_format, GLenum internal_format,
1660                          uint8_t* dst_buf, size_t dst_stride,
1661                          const uint8_t* src_buf, size_t src_stride,
1662                          size_t width, size_t height) {
1663   switch (external_format) {
1664     case GL_RGBA:
1665       if (internal_format == GL_RGBA8) {
1666         for (; height; height--) {
1667           copy_bgra8_to_rgba8((uint32_t*)dst_buf, (const uint32_t*)src_buf,
1668                               width);
1669           dst_buf += dst_stride;
1670           src_buf += src_stride;
1671         }
1672         return;
1673       }
1674       break;
1675     default:
1676       break;
1677   }
1678   size_t row_bytes = width * bytes_for_internal_format(internal_format);
1679   for (; height; height--) {
1680     memcpy(dst_buf, src_buf, row_bytes);
1681     dst_buf += dst_stride;
1682     src_buf += src_stride;
1683   }
1684 }
1685 
set_tex_storage(Texture & t,GLenum external_format,GLsizei width,GLsizei height,void * buf=nullptr,GLsizei stride=0,GLsizei min_width=0,GLsizei min_height=0)1686 static void set_tex_storage(Texture& t, GLenum external_format, GLsizei width,
1687                             GLsizei height, void* buf = nullptr,
1688                             GLsizei stride = 0, GLsizei min_width = 0,
1689                             GLsizei min_height = 0) {
1690   GLenum internal_format = remap_internal_format(external_format);
1691   bool changed = false;
1692   if (t.width != width || t.height != height ||
1693       t.internal_format != internal_format) {
1694     changed = true;
1695     t.internal_format = internal_format;
1696     t.width = width;
1697     t.height = height;
1698   }
1699   // If we are changed from an internally managed buffer to an externally
1700   // supplied one or vice versa, ensure that we clean up old buffer state.
1701   // However, if we have to convert the data from a non-native format, then
1702   // always treat it as internally managed since we will need to copy to an
1703   // internally managed native format buffer.
1704   bool should_free = buf == nullptr || format_requires_conversion(
1705                                            external_format, internal_format);
1706   if (t.should_free() != should_free) {
1707     changed = true;
1708     t.cleanup();
1709     t.set_should_free(should_free);
1710   }
1711   // If now an external buffer, explicitly set it...
1712   if (!should_free) {
1713     t.set_buffer(buf, stride);
1714   }
1715   t.disable_delayed_clear();
1716   if (!t.allocate(changed, min_width, min_height)) {
1717     out_of_memory();
1718   }
1719   // If we have a buffer that needs format conversion, then do that now.
1720   if (buf && should_free) {
1721     convert_copy(external_format, internal_format, (uint8_t*)t.buf, t.stride(),
1722                  (const uint8_t*)buf, stride, width, height);
1723   }
1724 }
1725 
1726 extern "C" {
1727 
TexStorage2D(GLenum target,GLint levels,GLenum internal_format,GLsizei width,GLsizei height)1728 void TexStorage2D(GLenum target, GLint levels, GLenum internal_format,
1729                   GLsizei width, GLsizei height) {
1730   assert(levels == 1);
1731   Texture& t = ctx->textures[ctx->get_binding(target)];
1732   set_tex_storage(t, internal_format, width, height);
1733 }
1734 
internal_format_for_data(GLenum format,GLenum ty)1735 GLenum internal_format_for_data(GLenum format, GLenum ty) {
1736   if (format == GL_RED && ty == GL_UNSIGNED_BYTE) {
1737     return GL_R8;
1738   } else if ((format == GL_RGBA || format == GL_BGRA) &&
1739              (ty == GL_UNSIGNED_BYTE || ty == GL_UNSIGNED_INT_8_8_8_8_REV)) {
1740     return GL_RGBA8;
1741   } else if (format == GL_RGBA && ty == GL_FLOAT) {
1742     return GL_RGBA32F;
1743   } else if (format == GL_RGBA_INTEGER && ty == GL_INT) {
1744     return GL_RGBA32I;
1745   } else if (format == GL_RG && ty == GL_UNSIGNED_BYTE) {
1746     return GL_RG8;
1747   } else if (format == GL_RGB_422_APPLE &&
1748              ty == GL_UNSIGNED_SHORT_8_8_REV_APPLE) {
1749     return GL_RGB_RAW_422_APPLE;
1750   } else if (format == GL_RED && ty == GL_UNSIGNED_SHORT) {
1751     return GL_R16;
1752   } else if (format == GL_RG && ty == GL_UNSIGNED_SHORT) {
1753     return GL_RG16;
1754   } else {
1755     debugf("unknown internal format for format %x, type %x\n", format, ty);
1756     assert(false);
1757     return 0;
1758   }
1759 }
1760 
get_pixel_pack_buffer()1761 static Buffer* get_pixel_pack_buffer() {
1762   return ctx->pixel_pack_buffer_binding
1763              ? &ctx->buffers[ctx->pixel_pack_buffer_binding]
1764              : nullptr;
1765 }
1766 
get_pixel_pack_buffer_data(void * data)1767 static void* get_pixel_pack_buffer_data(void* data) {
1768   if (Buffer* b = get_pixel_pack_buffer()) {
1769     return b->buf ? b->buf + (size_t)data : nullptr;
1770   }
1771   return data;
1772 }
1773 
get_pixel_unpack_buffer()1774 static Buffer* get_pixel_unpack_buffer() {
1775   return ctx->pixel_unpack_buffer_binding
1776              ? &ctx->buffers[ctx->pixel_unpack_buffer_binding]
1777              : nullptr;
1778 }
1779 
get_pixel_unpack_buffer_data(void * data)1780 static void* get_pixel_unpack_buffer_data(void* data) {
1781   if (Buffer* b = get_pixel_unpack_buffer()) {
1782     return b->buf ? b->buf + (size_t)data : nullptr;
1783   }
1784   return data;
1785 }
1786 
TexSubImage2D(GLenum target,GLint level,GLint xoffset,GLint yoffset,GLsizei width,GLsizei height,GLenum format,GLenum ty,void * data)1787 void TexSubImage2D(GLenum target, GLint level, GLint xoffset, GLint yoffset,
1788                    GLsizei width, GLsizei height, GLenum format, GLenum ty,
1789                    void* data) {
1790   if (level != 0) {
1791     assert(false);
1792     return;
1793   }
1794   data = get_pixel_unpack_buffer_data(data);
1795   if (!data) return;
1796   Texture& t = ctx->textures[ctx->get_binding(target)];
1797   IntRect skip = {xoffset, yoffset, xoffset + width, yoffset + height};
1798   prepare_texture(t, &skip);
1799   assert(xoffset + width <= t.width);
1800   assert(yoffset + height <= t.height);
1801   assert(ctx->unpack_row_length == 0 || ctx->unpack_row_length >= width);
1802   GLsizei row_length =
1803       ctx->unpack_row_length != 0 ? ctx->unpack_row_length : width;
1804   assert(t.internal_format == internal_format_for_data(format, ty));
1805   int src_bpp = format_requires_conversion(format, t.internal_format)
1806                     ? bytes_for_internal_format(format)
1807                     : t.bpp();
1808   if (!src_bpp || !t.buf) return;
1809   convert_copy(format, t.internal_format,
1810                (uint8_t*)t.sample_ptr(xoffset, yoffset), t.stride(),
1811                (const uint8_t*)data, row_length * src_bpp, width, height);
1812 }
1813 
TexImage2D(GLenum target,GLint level,GLint internal_format,GLsizei width,GLsizei height,GLint border,GLenum format,GLenum ty,void * data)1814 void TexImage2D(GLenum target, GLint level, GLint internal_format,
1815                 GLsizei width, GLsizei height, GLint border, GLenum format,
1816                 GLenum ty, void* data) {
1817   if (level != 0) {
1818     assert(false);
1819     return;
1820   }
1821   assert(border == 0);
1822   TexStorage2D(target, 1, internal_format, width, height);
1823   TexSubImage2D(target, 0, 0, 0, width, height, format, ty, data);
1824 }
1825 
GenerateMipmap(UNUSED GLenum target)1826 void GenerateMipmap(UNUSED GLenum target) {
1827   // TODO: support mipmaps
1828 }
1829 
SetTextureParameter(GLuint texid,GLenum pname,GLint param)1830 void SetTextureParameter(GLuint texid, GLenum pname, GLint param) {
1831   Texture& t = ctx->textures[texid];
1832   switch (pname) {
1833     case GL_TEXTURE_WRAP_S:
1834       assert(param == GL_CLAMP_TO_EDGE);
1835       break;
1836     case GL_TEXTURE_WRAP_T:
1837       assert(param == GL_CLAMP_TO_EDGE);
1838       break;
1839     case GL_TEXTURE_MIN_FILTER:
1840       t.min_filter = param;
1841       break;
1842     case GL_TEXTURE_MAG_FILTER:
1843       t.mag_filter = param;
1844       break;
1845     default:
1846       break;
1847   }
1848 }
1849 
TexParameteri(GLenum target,GLenum pname,GLint param)1850 void TexParameteri(GLenum target, GLenum pname, GLint param) {
1851   SetTextureParameter(ctx->get_binding(target), pname, param);
1852 }
1853 
GenTextures(int n,GLuint * result)1854 void GenTextures(int n, GLuint* result) {
1855   for (int i = 0; i < n; i++) {
1856     Texture t;
1857     result[i] = ctx->textures.insert(t);
1858   }
1859 }
1860 
DeleteTexture(GLuint n)1861 void DeleteTexture(GLuint n) {
1862   if (n && ctx->textures.erase(n)) {
1863     for (size_t i = 0; i < MAX_TEXTURE_UNITS; i++) {
1864       ctx->texture_units[i].unlink(n);
1865     }
1866   }
1867 }
1868 
GenRenderbuffers(int n,GLuint * result)1869 void GenRenderbuffers(int n, GLuint* result) {
1870   for (int i = 0; i < n; i++) {
1871     Renderbuffer r;
1872     result[i] = ctx->renderbuffers.insert(r);
1873   }
1874 }
1875 
on_erase()1876 void Renderbuffer::on_erase() {
1877   for (auto* fb : ctx->framebuffers) {
1878     if (fb) {
1879       unlink(fb->color_attachment, texture);
1880       unlink(fb->depth_attachment, texture);
1881     }
1882   }
1883   DeleteTexture(texture);
1884 }
1885 
DeleteRenderbuffer(GLuint n)1886 void DeleteRenderbuffer(GLuint n) {
1887   if (n && ctx->renderbuffers.erase(n)) {
1888     unlink(ctx->renderbuffer_binding, n);
1889   }
1890 }
1891 
GenFramebuffers(int n,GLuint * result)1892 void GenFramebuffers(int n, GLuint* result) {
1893   for (int i = 0; i < n; i++) {
1894     Framebuffer f;
1895     result[i] = ctx->framebuffers.insert(f);
1896   }
1897 }
1898 
DeleteFramebuffer(GLuint n)1899 void DeleteFramebuffer(GLuint n) {
1900   if (n && ctx->framebuffers.erase(n)) {
1901     unlink(ctx->read_framebuffer_binding, n);
1902     unlink(ctx->draw_framebuffer_binding, n);
1903   }
1904 }
1905 
RenderbufferStorage(GLenum target,GLenum internal_format,GLsizei width,GLsizei height)1906 void RenderbufferStorage(GLenum target, GLenum internal_format, GLsizei width,
1907                          GLsizei height) {
1908   // Just refer a renderbuffer to a texture to simplify things for now...
1909   Renderbuffer& r = ctx->renderbuffers[ctx->get_binding(target)];
1910   if (!r.texture) {
1911     GenTextures(1, &r.texture);
1912   }
1913   switch (internal_format) {
1914     case GL_DEPTH_COMPONENT:
1915     case GL_DEPTH_COMPONENT16:
1916     case GL_DEPTH_COMPONENT24:
1917     case GL_DEPTH_COMPONENT32:
1918       // Force depth format to 24 bits...
1919       internal_format = GL_DEPTH_COMPONENT24;
1920       break;
1921   }
1922   set_tex_storage(ctx->textures[r.texture], internal_format, width, height);
1923 }
1924 
VertexAttribPointer(GLuint index,GLint size,GLenum type,bool normalized,GLsizei stride,GLuint offset)1925 void VertexAttribPointer(GLuint index, GLint size, GLenum type, bool normalized,
1926                          GLsizei stride, GLuint offset) {
1927   // debugf("cva: %d\n", ctx->current_vertex_array);
1928   VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1929   if (index >= NULL_ATTRIB) {
1930     assert(0);
1931     return;
1932   }
1933   VertexAttrib& va = v.attribs[index];
1934   va.size = size * bytes_per_type(type);
1935   va.type = type;
1936   va.normalized = normalized;
1937   va.stride = stride;
1938   va.offset = offset;
1939   // Buffer &vertex_buf = ctx->buffers[ctx->array_buffer_binding];
1940   va.vertex_buffer = ctx->array_buffer_binding;
1941   va.vertex_array = ctx->current_vertex_array;
1942   ctx->validate_vertex_array = true;
1943 }
1944 
VertexAttribIPointer(GLuint index,GLint size,GLenum type,GLsizei stride,GLuint offset)1945 void VertexAttribIPointer(GLuint index, GLint size, GLenum type, GLsizei stride,
1946                           GLuint offset) {
1947   // debugf("cva: %d\n", ctx->current_vertex_array);
1948   VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1949   if (index >= NULL_ATTRIB) {
1950     assert(0);
1951     return;
1952   }
1953   VertexAttrib& va = v.attribs[index];
1954   va.size = size * bytes_per_type(type);
1955   va.type = type;
1956   va.normalized = false;
1957   va.stride = stride;
1958   va.offset = offset;
1959   // Buffer &vertex_buf = ctx->buffers[ctx->array_buffer_binding];
1960   va.vertex_buffer = ctx->array_buffer_binding;
1961   va.vertex_array = ctx->current_vertex_array;
1962   ctx->validate_vertex_array = true;
1963 }
1964 
EnableVertexAttribArray(GLuint index)1965 void EnableVertexAttribArray(GLuint index) {
1966   VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1967   if (index >= NULL_ATTRIB) {
1968     assert(0);
1969     return;
1970   }
1971   VertexAttrib& va = v.attribs[index];
1972   if (!va.enabled) {
1973     ctx->validate_vertex_array = true;
1974   }
1975   va.enabled = true;
1976   v.max_attrib = max(v.max_attrib, (int)index);
1977 }
1978 
DisableVertexAttribArray(GLuint index)1979 void DisableVertexAttribArray(GLuint index) {
1980   VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1981   if (index >= NULL_ATTRIB) {
1982     assert(0);
1983     return;
1984   }
1985   VertexAttrib& va = v.attribs[index];
1986   if (va.enabled) {
1987     ctx->validate_vertex_array = true;
1988   }
1989   va.disable();
1990 }
1991 
VertexAttribDivisor(GLuint index,GLuint divisor)1992 void VertexAttribDivisor(GLuint index, GLuint divisor) {
1993   VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1994   // Only support divisor being 0 (per-vertex) or 1 (per-instance).
1995   if (index >= NULL_ATTRIB || divisor > 1) {
1996     assert(0);
1997     return;
1998   }
1999   VertexAttrib& va = v.attribs[index];
2000   va.divisor = divisor;
2001 }
2002 
BufferData(GLenum target,GLsizeiptr size,void * data,UNUSED GLenum usage)2003 void BufferData(GLenum target, GLsizeiptr size, void* data,
2004                 UNUSED GLenum usage) {
2005   Buffer& b = ctx->buffers[ctx->get_binding(target)];
2006   if (size != b.size) {
2007     if (!b.allocate(size)) {
2008       out_of_memory();
2009     }
2010     ctx->validate_vertex_array = true;
2011   }
2012   if (data && b.buf && size <= b.size) {
2013     memcpy(b.buf, data, size);
2014   }
2015 }
2016 
BufferSubData(GLenum target,GLintptr offset,GLsizeiptr size,void * data)2017 void BufferSubData(GLenum target, GLintptr offset, GLsizeiptr size,
2018                    void* data) {
2019   Buffer& b = ctx->buffers[ctx->get_binding(target)];
2020   assert(offset + size <= b.size);
2021   if (data && b.buf && offset + size <= b.size) {
2022     memcpy(&b.buf[offset], data, size);
2023   }
2024 }
2025 
MapBuffer(GLenum target,UNUSED GLbitfield access)2026 void* MapBuffer(GLenum target, UNUSED GLbitfield access) {
2027   Buffer& b = ctx->buffers[ctx->get_binding(target)];
2028   return b.buf;
2029 }
2030 
MapBufferRange(GLenum target,GLintptr offset,GLsizeiptr length,UNUSED GLbitfield access)2031 void* MapBufferRange(GLenum target, GLintptr offset, GLsizeiptr length,
2032                      UNUSED GLbitfield access) {
2033   Buffer& b = ctx->buffers[ctx->get_binding(target)];
2034   if (b.buf && offset >= 0 && length > 0 && offset + length <= b.size) {
2035     return b.buf + offset;
2036   }
2037   return nullptr;
2038 }
2039 
UnmapBuffer(GLenum target)2040 GLboolean UnmapBuffer(GLenum target) {
2041   Buffer& b = ctx->buffers[ctx->get_binding(target)];
2042   return b.buf != nullptr;
2043 }
2044 
Uniform1i(GLint location,GLint V0)2045 void Uniform1i(GLint location, GLint V0) {
2046   // debugf("tex: %d\n", (int)ctx->textures.size);
2047   if (vertex_shader) {
2048     vertex_shader->set_uniform_1i(location, V0);
2049   }
2050 }
Uniform4fv(GLint location,GLsizei count,const GLfloat * v)2051 void Uniform4fv(GLint location, GLsizei count, const GLfloat* v) {
2052   assert(count == 1);
2053   if (vertex_shader) {
2054     vertex_shader->set_uniform_4fv(location, v);
2055   }
2056 }
UniformMatrix4fv(GLint location,GLsizei count,GLboolean transpose,const GLfloat * value)2057 void UniformMatrix4fv(GLint location, GLsizei count, GLboolean transpose,
2058                       const GLfloat* value) {
2059   assert(count == 1);
2060   assert(!transpose);
2061   if (vertex_shader) {
2062     vertex_shader->set_uniform_matrix4fv(location, value);
2063   }
2064 }
2065 
FramebufferTexture2D(GLenum target,GLenum attachment,GLenum textarget,GLuint texture,GLint level)2066 void FramebufferTexture2D(GLenum target, GLenum attachment, GLenum textarget,
2067                           GLuint texture, GLint level) {
2068   assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER);
2069   assert(textarget == GL_TEXTURE_2D || textarget == GL_TEXTURE_RECTANGLE);
2070   assert(level == 0);
2071   Framebuffer& fb = ctx->framebuffers[ctx->get_binding(target)];
2072   if (attachment == GL_COLOR_ATTACHMENT0) {
2073     fb.color_attachment = texture;
2074   } else if (attachment == GL_DEPTH_ATTACHMENT) {
2075     fb.depth_attachment = texture;
2076   } else {
2077     assert(0);
2078   }
2079 }
2080 
FramebufferRenderbuffer(GLenum target,GLenum attachment,GLenum renderbuffertarget,GLuint renderbuffer)2081 void FramebufferRenderbuffer(GLenum target, GLenum attachment,
2082                              GLenum renderbuffertarget, GLuint renderbuffer) {
2083   assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER);
2084   assert(renderbuffertarget == GL_RENDERBUFFER);
2085   Framebuffer& fb = ctx->framebuffers[ctx->get_binding(target)];
2086   Renderbuffer& rb = ctx->renderbuffers[renderbuffer];
2087   if (attachment == GL_COLOR_ATTACHMENT0) {
2088     fb.color_attachment = rb.texture;
2089   } else if (attachment == GL_DEPTH_ATTACHMENT) {
2090     fb.depth_attachment = rb.texture;
2091   } else {
2092     assert(0);
2093   }
2094 }
2095 
2096 }  // extern "C"
2097 
get_framebuffer(GLenum target,bool fallback=false)2098 static inline Framebuffer* get_framebuffer(GLenum target,
2099                                            bool fallback = false) {
2100   if (target == GL_FRAMEBUFFER) {
2101     target = GL_DRAW_FRAMEBUFFER;
2102   }
2103   Framebuffer* fb = ctx->framebuffers.find(ctx->get_binding(target));
2104   if (fallback && !fb) {
2105     // If the specified framebuffer isn't found and a fallback is requested,
2106     // use the default framebuffer.
2107     fb = &ctx->framebuffers[0];
2108   }
2109   return fb;
2110 }
2111 
2112 template <typename T>
fill_n(T * dst,size_t n,T val)2113 static inline void fill_n(T* dst, size_t n, T val) {
2114   for (T* end = &dst[n]; dst < end; dst++) *dst = val;
2115 }
2116 
2117 #if USE_SSE2
2118 template <>
fill_n(uint32_t * dst,size_t n,uint32_t val)2119 inline void fill_n<uint32_t>(uint32_t* dst, size_t n, uint32_t val) {
2120   __asm__ __volatile__("rep stosl\n"
2121                        : "+D"(dst), "+c"(n)
2122                        : "a"(val)
2123                        : "memory", "cc");
2124 }
2125 #endif
2126 
clear_chunk(uint8_t value)2127 static inline uint32_t clear_chunk(uint8_t value) {
2128   return uint32_t(value) * 0x01010101U;
2129 }
2130 
clear_chunk(uint16_t value)2131 static inline uint32_t clear_chunk(uint16_t value) {
2132   return uint32_t(value) | (uint32_t(value) << 16);
2133 }
2134 
clear_chunk(uint32_t value)2135 static inline uint32_t clear_chunk(uint32_t value) { return value; }
2136 
2137 template <typename T>
clear_row(T * buf,size_t len,T value,uint32_t chunk)2138 static inline void clear_row(T* buf, size_t len, T value, uint32_t chunk) {
2139   const size_t N = sizeof(uint32_t) / sizeof(T);
2140   // fill any leading unaligned values
2141   if (N > 1) {
2142     size_t align = (-(intptr_t)buf & (sizeof(uint32_t) - 1)) / sizeof(T);
2143     if (align <= len) {
2144       fill_n(buf, align, value);
2145       len -= align;
2146       buf += align;
2147     }
2148   }
2149   // fill as many aligned chunks as possible
2150   fill_n((uint32_t*)buf, len / N, chunk);
2151   // fill any remaining values
2152   if (N > 1) {
2153     fill_n(buf + (len & ~(N - 1)), len & (N - 1), value);
2154   }
2155 }
2156 
2157 template <typename T>
clear_buffer(Texture & t,T value,IntRect bb,int skip_start=0,int skip_end=0)2158 static void clear_buffer(Texture& t, T value, IntRect bb, int skip_start = 0,
2159                          int skip_end = 0) {
2160   if (!t.buf) return;
2161   skip_start = max(skip_start, bb.x0);
2162   skip_end = max(skip_end, skip_start);
2163   assert(sizeof(T) == t.bpp());
2164   size_t stride = t.stride();
2165   // When clearing multiple full-width rows, collapse them into a single large
2166   // "row" to avoid redundant setup from clearing each row individually. Note
2167   // that we can only safely do this if the stride is tightly packed.
2168   if (bb.width() == t.width && bb.height() > 1 && skip_start >= skip_end &&
2169       (t.should_free() || stride == t.width * sizeof(T))) {
2170     bb.x1 += (stride / sizeof(T)) * (bb.height() - 1);
2171     bb.y1 = bb.y0 + 1;
2172   }
2173   T* buf = (T*)t.sample_ptr(bb.x0, bb.y0);
2174   uint32_t chunk = clear_chunk(value);
2175   for (int rows = bb.height(); rows > 0; rows--) {
2176     if (bb.x0 < skip_start) {
2177       clear_row(buf, skip_start - bb.x0, value, chunk);
2178     }
2179     if (skip_end < bb.x1) {
2180       clear_row(buf + (skip_end - bb.x0), bb.x1 - skip_end, value, chunk);
2181     }
2182     buf += stride / sizeof(T);
2183   }
2184 }
2185 
2186 template <typename T>
force_clear_row(Texture & t,int y,int skip_start=0,int skip_end=0)2187 static inline void force_clear_row(Texture& t, int y, int skip_start = 0,
2188                                    int skip_end = 0) {
2189   assert(t.buf != nullptr);
2190   assert(sizeof(T) == t.bpp());
2191   assert(skip_start <= skip_end);
2192   T* buf = (T*)t.sample_ptr(0, y);
2193   uint32_t chunk = clear_chunk((T)t.clear_val);
2194   if (skip_start > 0) {
2195     clear_row<T>(buf, skip_start, t.clear_val, chunk);
2196   }
2197   if (skip_end < t.width) {
2198     clear_row<T>(buf + skip_end, t.width - skip_end, t.clear_val, chunk);
2199   }
2200 }
2201 
2202 template <typename T>
force_clear(Texture & t,const IntRect * skip=nullptr)2203 static void force_clear(Texture& t, const IntRect* skip = nullptr) {
2204   if (!t.delay_clear || !t.cleared_rows) {
2205     return;
2206   }
2207   int y0 = 0;
2208   int y1 = t.height;
2209   int skip_start = 0;
2210   int skip_end = 0;
2211   if (skip) {
2212     y0 = clamp(skip->y0, 0, t.height);
2213     y1 = clamp(skip->y1, y0, t.height);
2214     skip_start = clamp(skip->x0, 0, t.width);
2215     skip_end = clamp(skip->x1, skip_start, t.width);
2216     if (skip_start <= 0 && skip_end >= t.width && y0 <= 0 && y1 >= t.height) {
2217       t.disable_delayed_clear();
2218       return;
2219     }
2220   }
2221   int num_masks = (y1 + 31) / 32;
2222   uint32_t* rows = t.cleared_rows;
2223   for (int i = y0 / 32; i < num_masks; i++) {
2224     uint32_t mask = rows[i];
2225     if (mask != ~0U) {
2226       rows[i] = ~0U;
2227       int start = i * 32;
2228       while (mask) {
2229         int count = __builtin_ctz(mask);
2230         if (count > 0) {
2231           clear_buffer<T>(t, t.clear_val,
2232                           IntRect{0, start, t.width, start + count}, skip_start,
2233                           skip_end);
2234           t.delay_clear -= count;
2235           start += count;
2236           mask >>= count;
2237         }
2238         count = __builtin_ctz(mask + 1);
2239         start += count;
2240         mask >>= count;
2241       }
2242       int count = (i + 1) * 32 - start;
2243       if (count > 0) {
2244         clear_buffer<T>(t, t.clear_val,
2245                         IntRect{0, start, t.width, start + count}, skip_start,
2246                         skip_end);
2247         t.delay_clear -= count;
2248       }
2249     }
2250   }
2251   if (t.delay_clear <= 0) t.disable_delayed_clear();
2252 }
2253 
prepare_texture(Texture & t,const IntRect * skip)2254 static void prepare_texture(Texture& t, const IntRect* skip) {
2255   if (t.delay_clear) {
2256     switch (t.internal_format) {
2257       case GL_RGBA8:
2258         force_clear<uint32_t>(t, skip);
2259         break;
2260       case GL_R8:
2261         force_clear<uint8_t>(t, skip);
2262         break;
2263       case GL_RG8:
2264         force_clear<uint16_t>(t, skip);
2265         break;
2266       default:
2267         assert(false);
2268         break;
2269     }
2270   }
2271 }
2272 
2273 // Setup a clear on a texture. This may either force an immediate clear or
2274 // potentially punt to a delayed clear, if applicable.
2275 template <typename T>
request_clear(Texture & t,T value,const IntRect & scissor)2276 static void request_clear(Texture& t, T value, const IntRect& scissor) {
2277   // If the clear would require a scissor, force clear anything outside
2278   // the scissor, and then immediately clear anything inside the scissor.
2279   if (!scissor.contains(t.offset_bounds())) {
2280     IntRect skip = scissor - t.offset;
2281     force_clear<T>(t, &skip);
2282     clear_buffer<T>(t, value, skip.intersection(t.bounds()));
2283   } else {
2284     // Do delayed clear for 2D texture without scissor.
2285     t.enable_delayed_clear(value);
2286   }
2287 }
2288 
2289 template <typename T>
request_clear(Texture & t,T value)2290 static inline void request_clear(Texture& t, T value) {
2291   // If scissoring is enabled, use the scissor rect. Otherwise, just scissor to
2292   // the entire texture bounds.
2293   request_clear(t, value, ctx->scissortest ? ctx->scissor : t.offset_bounds());
2294 }
2295 
2296 extern "C" {
2297 
InitDefaultFramebuffer(int x,int y,int width,int height,int stride,void * buf)2298 void InitDefaultFramebuffer(int x, int y, int width, int height, int stride,
2299                             void* buf) {
2300   Framebuffer& fb = ctx->framebuffers[0];
2301   if (!fb.color_attachment) {
2302     GenTextures(1, &fb.color_attachment);
2303   }
2304   // If the dimensions or buffer properties changed, we need to reallocate
2305   // the underlying storage for the color buffer texture.
2306   Texture& colortex = ctx->textures[fb.color_attachment];
2307   set_tex_storage(colortex, GL_RGBA8, width, height, buf, stride);
2308   colortex.offset = IntPoint(x, y);
2309   if (!fb.depth_attachment) {
2310     GenTextures(1, &fb.depth_attachment);
2311   }
2312   // Ensure dimensions of the depth buffer match the color buffer.
2313   Texture& depthtex = ctx->textures[fb.depth_attachment];
2314   set_tex_storage(depthtex, GL_DEPTH_COMPONENT24, width, height);
2315   depthtex.offset = IntPoint(x, y);
2316 }
2317 
GetColorBuffer(GLuint fbo,GLboolean flush,int32_t * width,int32_t * height,int32_t * stride)2318 void* GetColorBuffer(GLuint fbo, GLboolean flush, int32_t* width,
2319                      int32_t* height, int32_t* stride) {
2320   Framebuffer* fb = ctx->framebuffers.find(fbo);
2321   if (!fb || !fb->color_attachment) {
2322     return nullptr;
2323   }
2324   Texture& colortex = ctx->textures[fb->color_attachment];
2325   if (flush) {
2326     prepare_texture(colortex);
2327   }
2328   assert(colortex.offset == IntPoint(0, 0));
2329   if (width) {
2330     *width = colortex.width;
2331   }
2332   if (height) {
2333     *height = colortex.height;
2334   }
2335   if (stride) {
2336     *stride = colortex.stride();
2337   }
2338   return colortex.buf ? colortex.sample_ptr(0, 0) : nullptr;
2339 }
2340 
ResolveFramebuffer(GLuint fbo)2341 void ResolveFramebuffer(GLuint fbo) {
2342   Framebuffer* fb = ctx->framebuffers.find(fbo);
2343   if (!fb || !fb->color_attachment) {
2344     return;
2345   }
2346   Texture& colortex = ctx->textures[fb->color_attachment];
2347   prepare_texture(colortex);
2348 }
2349 
SetTextureBuffer(GLuint texid,GLenum internal_format,GLsizei width,GLsizei height,GLsizei stride,void * buf,GLsizei min_width,GLsizei min_height)2350 void SetTextureBuffer(GLuint texid, GLenum internal_format, GLsizei width,
2351                       GLsizei height, GLsizei stride, void* buf,
2352                       GLsizei min_width, GLsizei min_height) {
2353   Texture& t = ctx->textures[texid];
2354   set_tex_storage(t, internal_format, width, height, buf, stride, min_width,
2355                   min_height);
2356 }
2357 
CheckFramebufferStatus(GLenum target)2358 GLenum CheckFramebufferStatus(GLenum target) {
2359   Framebuffer* fb = get_framebuffer(target);
2360   if (!fb || !fb->color_attachment) {
2361     return GL_FRAMEBUFFER_UNSUPPORTED;
2362   }
2363   return GL_FRAMEBUFFER_COMPLETE;
2364 }
2365 
ClearTexSubImage(GLuint texture,GLint level,GLint xoffset,GLint yoffset,GLint zoffset,GLsizei width,GLsizei height,GLsizei depth,GLenum format,GLenum type,const void * data)2366 void ClearTexSubImage(GLuint texture, GLint level, GLint xoffset, GLint yoffset,
2367                       GLint zoffset, GLsizei width, GLsizei height,
2368                       GLsizei depth, GLenum format, GLenum type,
2369                       const void* data) {
2370   if (level != 0) {
2371     assert(false);
2372     return;
2373   }
2374   Texture& t = ctx->textures[texture];
2375   assert(!t.locked);
2376   if (width <= 0 || height <= 0 || depth <= 0) {
2377     return;
2378   }
2379   assert(zoffset == 0 && depth == 1);
2380   IntRect scissor = {xoffset, yoffset, xoffset + width, yoffset + height};
2381   if (t.internal_format == GL_DEPTH_COMPONENT24) {
2382     uint32_t value = 0xFFFFFF;
2383     switch (format) {
2384       case GL_DEPTH_COMPONENT:
2385         switch (type) {
2386           case GL_DOUBLE:
2387             value = uint32_t(*(const GLdouble*)data * 0xFFFFFF);
2388             break;
2389           case GL_FLOAT:
2390             value = uint32_t(*(const GLfloat*)data * 0xFFFFFF);
2391             break;
2392           default:
2393             assert(false);
2394             break;
2395         }
2396         break;
2397       default:
2398         assert(false);
2399         break;
2400     }
2401     if (t.cleared() && !scissor.contains(t.offset_bounds())) {
2402       // If we need to scissor the clear and the depth buffer was already
2403       // initialized, then just fill runs for that scissor area.
2404       t.fill_depth_runs(value, scissor);
2405     } else {
2406       // Otherwise, the buffer is either uninitialized or the clear would
2407       // encompass the entire buffer. If uninitialized, we can safely fill
2408       // the entire buffer with any value and thus ignore any scissoring.
2409       t.init_depth_runs(value);
2410     }
2411     return;
2412   }
2413 
2414   uint32_t color = 0xFF000000;
2415   switch (type) {
2416     case GL_FLOAT: {
2417       const GLfloat* f = (const GLfloat*)data;
2418       Float v = {0.0f, 0.0f, 0.0f, 1.0f};
2419       switch (format) {
2420         case GL_RGBA:
2421           v.w = f[3];  // alpha
2422           FALLTHROUGH;
2423         case GL_RGB:
2424           v.z = f[2];  // blue
2425           FALLTHROUGH;
2426         case GL_RG:
2427           v.y = f[1];  // green
2428           FALLTHROUGH;
2429         case GL_RED:
2430           v.x = f[0];  // red
2431           break;
2432         default:
2433           assert(false);
2434           break;
2435       }
2436       color = bit_cast<uint32_t>(CONVERT(round_pixel(v), U8));
2437       break;
2438     }
2439     case GL_UNSIGNED_BYTE: {
2440       const GLubyte* b = (const GLubyte*)data;
2441       switch (format) {
2442         case GL_RGBA:
2443           color = (color & ~0xFF000000) | (uint32_t(b[3]) << 24);  // alpha
2444           FALLTHROUGH;
2445         case GL_RGB:
2446           color = (color & ~0x00FF0000) | (uint32_t(b[2]) << 16);  // blue
2447           FALLTHROUGH;
2448         case GL_RG:
2449           color = (color & ~0x0000FF00) | (uint32_t(b[1]) << 8);  // green
2450           FALLTHROUGH;
2451         case GL_RED:
2452           color = (color & ~0x000000FF) | uint32_t(b[0]);  // red
2453           break;
2454         default:
2455           assert(false);
2456           break;
2457       }
2458       break;
2459     }
2460     default:
2461       assert(false);
2462       break;
2463   }
2464 
2465   switch (t.internal_format) {
2466     case GL_RGBA8:
2467       // Clear color needs to swizzle to BGRA.
2468       request_clear<uint32_t>(t,
2469                               (color & 0xFF00FF00) |
2470                                   ((color << 16) & 0xFF0000) |
2471                                   ((color >> 16) & 0xFF),
2472                               scissor);
2473       break;
2474     case GL_R8:
2475       request_clear<uint8_t>(t, uint8_t(color & 0xFF), scissor);
2476       break;
2477     case GL_RG8:
2478       request_clear<uint16_t>(t, uint16_t(color & 0xFFFF), scissor);
2479       break;
2480     default:
2481       assert(false);
2482       break;
2483   }
2484 }
2485 
ClearTexImage(GLuint texture,GLint level,GLenum format,GLenum type,const void * data)2486 void ClearTexImage(GLuint texture, GLint level, GLenum format, GLenum type,
2487                    const void* data) {
2488   Texture& t = ctx->textures[texture];
2489   IntRect scissor = t.offset_bounds();
2490   ClearTexSubImage(texture, level, scissor.x0, scissor.y0, 0, scissor.width(),
2491                    scissor.height(), 1, format, type, data);
2492 }
2493 
Clear(GLbitfield mask)2494 void Clear(GLbitfield mask) {
2495   Framebuffer& fb = *get_framebuffer(GL_DRAW_FRAMEBUFFER, true);
2496   if ((mask & GL_COLOR_BUFFER_BIT) && fb.color_attachment) {
2497     Texture& t = ctx->textures[fb.color_attachment];
2498     IntRect scissor = ctx->scissortest
2499                           ? ctx->scissor.intersection(t.offset_bounds())
2500                           : t.offset_bounds();
2501     ClearTexSubImage(fb.color_attachment, 0, scissor.x0, scissor.y0, 0,
2502                      scissor.width(), scissor.height(), 1, GL_RGBA, GL_FLOAT,
2503                      ctx->clearcolor);
2504   }
2505   if ((mask & GL_DEPTH_BUFFER_BIT) && fb.depth_attachment) {
2506     Texture& t = ctx->textures[fb.depth_attachment];
2507     IntRect scissor = ctx->scissortest
2508                           ? ctx->scissor.intersection(t.offset_bounds())
2509                           : t.offset_bounds();
2510     ClearTexSubImage(fb.depth_attachment, 0, scissor.x0, scissor.y0, 0,
2511                      scissor.width(), scissor.height(), 1, GL_DEPTH_COMPONENT,
2512                      GL_DOUBLE, &ctx->cleardepth);
2513   }
2514 }
2515 
ClearColorRect(GLuint fbo,GLint xoffset,GLint yoffset,GLsizei width,GLsizei height,GLfloat r,GLfloat g,GLfloat b,GLfloat a)2516 void ClearColorRect(GLuint fbo, GLint xoffset, GLint yoffset, GLsizei width,
2517                     GLsizei height, GLfloat r, GLfloat g, GLfloat b,
2518                     GLfloat a) {
2519   GLfloat color[] = {r, g, b, a};
2520   Framebuffer& fb = ctx->framebuffers[fbo];
2521   Texture& t = ctx->textures[fb.color_attachment];
2522   IntRect scissor =
2523       IntRect{xoffset, yoffset, xoffset + width, yoffset + height}.intersection(
2524           t.offset_bounds());
2525   ClearTexSubImage(fb.color_attachment, 0, scissor.x0, scissor.y0, 0,
2526                    scissor.width(), scissor.height(), 1, GL_RGBA, GL_FLOAT,
2527                    color);
2528 }
2529 
InvalidateFramebuffer(GLenum target,GLsizei num_attachments,const GLenum * attachments)2530 void InvalidateFramebuffer(GLenum target, GLsizei num_attachments,
2531                            const GLenum* attachments) {
2532   Framebuffer* fb = get_framebuffer(target);
2533   if (!fb || num_attachments <= 0 || !attachments) {
2534     return;
2535   }
2536   for (GLsizei i = 0; i < num_attachments; i++) {
2537     switch (attachments[i]) {
2538       case GL_DEPTH_ATTACHMENT: {
2539         Texture& t = ctx->textures[fb->depth_attachment];
2540         t.set_cleared(false);
2541         break;
2542       }
2543       case GL_COLOR_ATTACHMENT0: {
2544         Texture& t = ctx->textures[fb->color_attachment];
2545         t.disable_delayed_clear();
2546         break;
2547       }
2548     }
2549   }
2550 }
2551 
ReadPixels(GLint x,GLint y,GLsizei width,GLsizei height,GLenum format,GLenum type,void * data)2552 void ReadPixels(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format,
2553                 GLenum type, void* data) {
2554   data = get_pixel_pack_buffer_data(data);
2555   if (!data) return;
2556   Framebuffer* fb = get_framebuffer(GL_READ_FRAMEBUFFER);
2557   if (!fb) return;
2558   assert(format == GL_RED || format == GL_RGBA || format == GL_RGBA_INTEGER ||
2559          format == GL_BGRA || format == GL_RG);
2560   Texture& t = ctx->textures[fb->color_attachment];
2561   if (!t.buf) return;
2562   prepare_texture(t);
2563   // debugf("read pixels %d, %d, %d, %d from fb %d with format %x\n", x, y,
2564   // width, height, ctx->read_framebuffer_binding, t.internal_format);
2565   x -= t.offset.x;
2566   y -= t.offset.y;
2567   assert(x >= 0 && y >= 0);
2568   assert(x + width <= t.width);
2569   assert(y + height <= t.height);
2570   if (internal_format_for_data(format, type) != t.internal_format) {
2571     debugf("mismatched format for read pixels: %x vs %x\n", t.internal_format,
2572            internal_format_for_data(format, type));
2573     assert(false);
2574     return;
2575   }
2576   // Only support readback conversions that are reversible
2577   assert(!format_requires_conversion(format, t.internal_format) ||
2578          bytes_for_internal_format(format) == t.bpp());
2579   uint8_t* dest = (uint8_t*)data;
2580   size_t destStride = width * t.bpp();
2581   if (y < 0) {
2582     dest += -y * destStride;
2583     height += y;
2584     y = 0;
2585   }
2586   if (y + height > t.height) {
2587     height = t.height - y;
2588   }
2589   if (x < 0) {
2590     dest += -x * t.bpp();
2591     width += x;
2592     x = 0;
2593   }
2594   if (x + width > t.width) {
2595     width = t.width - x;
2596   }
2597   if (width <= 0 || height <= 0) {
2598     return;
2599   }
2600   convert_copy(format, t.internal_format, dest, destStride,
2601                (const uint8_t*)t.sample_ptr(x, y), t.stride(), width, height);
2602 }
2603 
CopyImageSubData(GLuint srcName,GLenum srcTarget,UNUSED GLint srcLevel,GLint srcX,GLint srcY,GLint srcZ,GLuint dstName,GLenum dstTarget,UNUSED GLint dstLevel,GLint dstX,GLint dstY,GLint dstZ,GLsizei srcWidth,GLsizei srcHeight,GLsizei srcDepth)2604 void CopyImageSubData(GLuint srcName, GLenum srcTarget, UNUSED GLint srcLevel,
2605                       GLint srcX, GLint srcY, GLint srcZ, GLuint dstName,
2606                       GLenum dstTarget, UNUSED GLint dstLevel, GLint dstX,
2607                       GLint dstY, GLint dstZ, GLsizei srcWidth,
2608                       GLsizei srcHeight, GLsizei srcDepth) {
2609   assert(srcLevel == 0 && dstLevel == 0);
2610   assert(srcZ == 0 && srcDepth == 1 && dstZ == 0);
2611   if (srcTarget == GL_RENDERBUFFER) {
2612     Renderbuffer& rb = ctx->renderbuffers[srcName];
2613     srcName = rb.texture;
2614   }
2615   if (dstTarget == GL_RENDERBUFFER) {
2616     Renderbuffer& rb = ctx->renderbuffers[dstName];
2617     dstName = rb.texture;
2618   }
2619   Texture& srctex = ctx->textures[srcName];
2620   if (!srctex.buf) return;
2621   prepare_texture(srctex);
2622   Texture& dsttex = ctx->textures[dstName];
2623   if (!dsttex.buf) return;
2624   assert(!dsttex.locked);
2625   IntRect skip = {dstX, dstY, dstX + srcWidth, dstY + srcHeight};
2626   prepare_texture(dsttex, &skip);
2627   assert(srctex.internal_format == dsttex.internal_format);
2628   assert(srcWidth >= 0);
2629   assert(srcHeight >= 0);
2630   assert(srcX + srcWidth <= srctex.width);
2631   assert(srcY + srcHeight <= srctex.height);
2632   assert(dstX + srcWidth <= dsttex.width);
2633   assert(dstY + srcHeight <= dsttex.height);
2634   int bpp = srctex.bpp();
2635   int src_stride = srctex.stride();
2636   int dest_stride = dsttex.stride();
2637   char* dest = dsttex.sample_ptr(dstX, dstY);
2638   char* src = srctex.sample_ptr(srcX, srcY);
2639   for (int y = 0; y < srcHeight; y++) {
2640     memcpy(dest, src, srcWidth * bpp);
2641     dest += dest_stride;
2642     src += src_stride;
2643   }
2644 }
2645 
CopyTexSubImage2D(GLenum target,UNUSED GLint level,GLint xoffset,GLint yoffset,GLint x,GLint y,GLsizei width,GLsizei height)2646 void CopyTexSubImage2D(GLenum target, UNUSED GLint level, GLint xoffset,
2647                        GLint yoffset, GLint x, GLint y, GLsizei width,
2648                        GLsizei height) {
2649   assert(level == 0);
2650   Framebuffer* fb = get_framebuffer(GL_READ_FRAMEBUFFER);
2651   if (!fb) return;
2652   CopyImageSubData(fb->color_attachment, GL_TEXTURE_2D, 0, x, y, 0,
2653                    ctx->get_binding(target), GL_TEXTURE_2D, 0, xoffset, yoffset,
2654                    0, width, height, 1);
2655 }
2656 
2657 }  // extern "C"
2658 
2659 #include "blend.h"
2660 #include "composite.h"
2661 #include "swgl_ext.h"
2662 
2663 #pragma GCC diagnostic push
2664 #pragma GCC diagnostic ignored "-Wuninitialized"
2665 #pragma GCC diagnostic ignored "-Wunused-function"
2666 #pragma GCC diagnostic ignored "-Wunused-parameter"
2667 #pragma GCC diagnostic ignored "-Wunused-variable"
2668 #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
2669 #ifdef __clang__
2670 #  pragma GCC diagnostic ignored "-Wunused-private-field"
2671 #else
2672 #  pragma GCC diagnostic ignored "-Wunused-but-set-variable"
2673 #endif
2674 #include "load_shader.h"
2675 #pragma GCC diagnostic pop
2676 
2677 #include "rasterize.h"
2678 
validate()2679 void VertexArray::validate() {
2680   int last_enabled = -1;
2681   for (int i = 0; i <= max_attrib; i++) {
2682     VertexAttrib& attr = attribs[i];
2683     if (attr.enabled) {
2684       // VertexArray &v = ctx->vertex_arrays[attr.vertex_array];
2685       Buffer& vertex_buf = ctx->buffers[attr.vertex_buffer];
2686       attr.buf = vertex_buf.buf;
2687       attr.buf_size = vertex_buf.size;
2688       // debugf("%d %x %d %d %d %d\n", i, attr.type, attr.size, attr.stride,
2689       // attr.offset, attr.divisor);
2690       last_enabled = i;
2691     }
2692   }
2693   max_attrib = last_enabled;
2694 }
2695 
2696 extern "C" {
2697 
DrawElementsInstanced(GLenum mode,GLsizei count,GLenum type,GLintptr offset,GLsizei instancecount)2698 void DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type,
2699                            GLintptr offset, GLsizei instancecount) {
2700   if (offset < 0 || count <= 0 || instancecount <= 0 || !vertex_shader ||
2701       !fragment_shader) {
2702     return;
2703   }
2704 
2705   Framebuffer& fb = *get_framebuffer(GL_DRAW_FRAMEBUFFER, true);
2706   if (!fb.color_attachment) {
2707     return;
2708   }
2709   Texture& colortex = ctx->textures[fb.color_attachment];
2710   if (!colortex.buf) {
2711     return;
2712   }
2713   assert(!colortex.locked);
2714   assert(colortex.internal_format == GL_RGBA8 ||
2715          colortex.internal_format == GL_R8);
2716   Texture& depthtex = ctx->textures[ctx->depthtest ? fb.depth_attachment : 0];
2717   if (depthtex.buf) {
2718     assert(depthtex.internal_format == GL_DEPTH_COMPONENT24);
2719     assert(colortex.width == depthtex.width &&
2720            colortex.height == depthtex.height);
2721     assert(colortex.offset == depthtex.offset);
2722   }
2723 
2724   // debugf("current_vertex_array %d\n", ctx->current_vertex_array);
2725   // debugf("indices size: %d\n", indices_buf.size);
2726   VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
2727   if (ctx->validate_vertex_array) {
2728     ctx->validate_vertex_array = false;
2729     v.validate();
2730   }
2731 
2732 #ifdef PRINT_TIMINGS
2733   uint64_t start = get_time_value();
2734 #endif
2735 
2736   ctx->shaded_rows = 0;
2737   ctx->shaded_pixels = 0;
2738 
2739   vertex_shader->init_batch();
2740 
2741   switch (type) {
2742     case GL_UNSIGNED_SHORT:
2743       assert(mode == GL_TRIANGLES);
2744       draw_elements<uint16_t>(count, instancecount, offset, v, colortex,
2745                               depthtex);
2746       break;
2747     case GL_UNSIGNED_INT:
2748       assert(mode == GL_TRIANGLES);
2749       draw_elements<uint32_t>(count, instancecount, offset, v, colortex,
2750                               depthtex);
2751       break;
2752     case GL_NONE:
2753       // Non-standard GL extension - if element type is GL_NONE, then we don't
2754       // use any element buffer and behave as if DrawArrays was called instead.
2755       for (GLsizei instance = 0; instance < instancecount; instance++) {
2756         switch (mode) {
2757           case GL_LINES:
2758             for (GLsizei i = 0; i + 2 <= count; i += 2) {
2759               vertex_shader->load_attribs(v.attribs, offset + i, instance, 2);
2760               draw_quad(2, colortex, depthtex);
2761             }
2762             break;
2763           case GL_TRIANGLES:
2764             for (GLsizei i = 0; i + 3 <= count; i += 3) {
2765               vertex_shader->load_attribs(v.attribs, offset + i, instance, 3);
2766               draw_quad(3, colortex, depthtex);
2767             }
2768             break;
2769           default:
2770             assert(false);
2771             break;
2772         }
2773       }
2774       break;
2775     default:
2776       assert(false);
2777       break;
2778   }
2779 
2780   if (ctx->samples_passed_query) {
2781     Query& q = ctx->queries[ctx->samples_passed_query];
2782     q.value += ctx->shaded_pixels;
2783   }
2784 
2785 #ifdef PRINT_TIMINGS
2786   uint64_t end = get_time_value();
2787   printf(
2788       "%7.3fms draw(%s, %d): %d pixels in %d rows (avg %f pixels/row, "
2789       "%fns/pixel)\n",
2790       double(end - start) / (1000. * 1000.),
2791       ctx->programs[ctx->current_program].impl->get_name(), instancecount,
2792       ctx->shaded_pixels, ctx->shaded_rows,
2793       double(ctx->shaded_pixels) / ctx->shaded_rows,
2794       double(end - start) / max(ctx->shaded_pixels, 1));
2795 #endif
2796 }
2797 
Finish()2798 void Finish() {
2799 #ifdef PRINT_TIMINGS
2800   printf("Finish\n");
2801 #endif
2802 }
2803 
MakeCurrent(Context * c)2804 void MakeCurrent(Context* c) {
2805   if (ctx == c) {
2806     return;
2807   }
2808   ctx = c;
2809   setup_program(ctx ? ctx->current_program : 0);
2810 }
2811 
CreateContext()2812 Context* CreateContext() { return new Context; }
2813 
ReferenceContext(Context * c)2814 void ReferenceContext(Context* c) {
2815   if (!c) {
2816     return;
2817   }
2818   ++c->references;
2819 }
2820 
DestroyContext(Context * c)2821 void DestroyContext(Context* c) {
2822   if (!c) {
2823     return;
2824   }
2825   assert(c->references > 0);
2826   --c->references;
2827   if (c->references > 0) {
2828     return;
2829   }
2830   if (ctx == c) {
2831     MakeCurrent(nullptr);
2832   }
2833   delete c;
2834 }
2835 
ReportMemory(Context * ctx,size_t (* size_of_op)(void *))2836 size_t ReportMemory(Context* ctx, size_t (*size_of_op)(void*)) {
2837   size_t size = 0;
2838   if (ctx) {
2839     for (auto& t : ctx->textures) {
2840       if (t && t->should_free()) {
2841         size += size_of_op(t->buf);
2842       }
2843     }
2844   }
2845   return size;
2846 }
2847 }  // extern "C"
2848