1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 
5 #include <stdlib.h>
6 #include <stdint.h>
7 #include <string.h>
8 #include <assert.h>
9 #include <stdio.h>
10 #include <math.h>
11 
12 #ifdef __MACH__
13 #  include <mach/mach.h>
14 #  include <mach/mach_time.h>
15 #else
16 #  include <time.h>
17 #endif
18 
19 #ifdef NDEBUG
20 #  define debugf(...)
21 #else
22 #  define debugf(...) printf(__VA_ARGS__)
23 #endif
24 
25 // #define PRINT_TIMINGS
26 
27 #ifdef _WIN32
28 #  define ALWAYS_INLINE __forceinline
29 #  define NO_INLINE __declspec(noinline)
30 
31 // Including Windows.h brings a huge amount of namespace polution so just
32 // define a couple of things manually
33 typedef int BOOL;
34 #  define WINAPI __stdcall
35 #  define DECLSPEC_IMPORT __declspec(dllimport)
36 #  define WINBASEAPI DECLSPEC_IMPORT
37 typedef unsigned long DWORD;
38 typedef long LONG;
39 typedef __int64 LONGLONG;
40 #  define DUMMYSTRUCTNAME
41 
42 typedef union _LARGE_INTEGER {
43   struct {
44     DWORD LowPart;
45     LONG HighPart;
46   } DUMMYSTRUCTNAME;
47   struct {
48     DWORD LowPart;
49     LONG HighPart;
50   } u;
51   LONGLONG QuadPart;
52 } LARGE_INTEGER;
53 extern "C" {
54 WINBASEAPI BOOL WINAPI
55 QueryPerformanceCounter(LARGE_INTEGER* lpPerformanceCount);
56 
57 WINBASEAPI BOOL WINAPI QueryPerformanceFrequency(LARGE_INTEGER* lpFrequency);
58 }
59 
60 #else
61 // GCC is slower when dealing with always_inline, especially in debug builds.
62 // When using Clang, use always_inline more aggressively.
63 #  if defined(__clang__) || defined(NDEBUG)
64 #    define ALWAYS_INLINE __attribute__((always_inline)) inline
65 #  else
66 #    define ALWAYS_INLINE inline
67 #  endif
68 #  define NO_INLINE __attribute__((noinline))
69 #endif
70 
71 // Some functions may cause excessive binary bloat if inlined in debug or with
72 // GCC builds, so use PREFER_INLINE on these instead of ALWAYS_INLINE.
73 #if defined(__clang__) && defined(NDEBUG)
74 #  define PREFER_INLINE ALWAYS_INLINE
75 #else
76 #  define PREFER_INLINE inline
77 #endif
78 
79 #define UNREACHABLE __builtin_unreachable()
80 
81 #define UNUSED [[maybe_unused]]
82 
83 #define FALLTHROUGH [[fallthrough]]
84 
85 #ifdef MOZILLA_CLIENT
86 #  define IMPLICIT __attribute__((annotate("moz_implicit")))
87 #else
88 #  define IMPLICIT
89 #endif
90 
91 #include "gl_defs.h"
92 #include "glsl.h"
93 #include "program.h"
94 #include "texture.h"
95 
96 using namespace glsl;
97 
98 typedef ivec2_scalar IntPoint;
99 
100 struct IntRect {
101   int x0;
102   int y0;
103   int x1;
104   int y1;
105 
IntRectIntRect106   IntRect() : x0(0), y0(0), x1(0), y1(0) {}
IntRectIntRect107   IntRect(int x0, int y0, int x1, int y1) : x0(x0), y0(y0), x1(x1), y1(y1) {}
IntRectIntRect108   IntRect(IntPoint origin, IntPoint size)
109       : x0(origin.x),
110         y0(origin.y),
111         x1(origin.x + size.x),
112         y1(origin.y + size.y) {}
113 
widthIntRect114   int width() const { return x1 - x0; }
heightIntRect115   int height() const { return y1 - y0; }
is_emptyIntRect116   bool is_empty() const { return width() <= 0 || height() <= 0; }
117 
originIntRect118   IntPoint origin() const { return IntPoint(x0, y0); }
119 
same_sizeIntRect120   bool same_size(const IntRect& o) const {
121     return width() == o.width() && height() == o.height();
122   }
123 
containsIntRect124   bool contains(const IntRect& o) const {
125     return o.x0 >= x0 && o.y0 >= y0 && o.x1 <= x1 && o.y1 <= y1;
126   }
127 
intersectIntRect128   IntRect& intersect(const IntRect& o) {
129     x0 = max(x0, o.x0);
130     y0 = max(y0, o.y0);
131     x1 = min(x1, o.x1);
132     y1 = min(y1, o.y1);
133     return *this;
134   }
135 
intersectionIntRect136   IntRect intersection(const IntRect& o) {
137     IntRect result = *this;
138     result.intersect(o);
139     return result;
140   }
141 
142   // Scale from source-space to dest-space, optionally rounding inward
scaleIntRect143   IntRect& scale(int srcWidth, int srcHeight, int dstWidth, int dstHeight,
144                  bool roundIn = false) {
145     x0 = (x0 * dstWidth + (roundIn ? srcWidth - 1 : 0)) / srcWidth;
146     y0 = (y0 * dstHeight + (roundIn ? srcHeight - 1 : 0)) / srcHeight;
147     x1 = (x1 * dstWidth) / srcWidth;
148     y1 = (y1 * dstHeight) / srcHeight;
149     return *this;
150   }
151 
152   // Flip the rect's Y coords around inflection point at Y=offset
invert_yIntRect153   void invert_y(int offset) {
154     y0 = offset - y0;
155     y1 = offset - y1;
156     swap(y0, y1);
157   }
158 
offsetIntRect159   IntRect& offset(const IntPoint& o) {
160     x0 += o.x;
161     y0 += o.y;
162     x1 += o.x;
163     y1 += o.y;
164     return *this;
165   }
166 
operator +IntRect167   IntRect operator+(const IntPoint& o) const {
168     return IntRect(*this).offset(o);
169   }
operator -IntRect170   IntRect operator-(const IntPoint& o) const {
171     return IntRect(*this).offset(-o);
172   }
173 };
174 
175 typedef vec2_scalar Point2D;
176 typedef vec4_scalar Point3D;
177 
178 struct IntRange {
179   int start;
180   int end;
181 
lenIntRange182   int len() const { return end - start; }
183 
intersectIntRange184   IntRange intersect(IntRange r) const {
185     return {max(start, r.start), min(end, r.end)};
186   }
187 };
188 
189 struct FloatRange {
190   float start;
191   float end;
192 
clipFloatRange193   float clip(float x) const { return clamp(x, start, end); }
194 
clipFloatRange195   FloatRange clip(FloatRange r) const { return {clip(r.start), clip(r.end)}; }
196 
mergeFloatRange197   FloatRange merge(FloatRange r) const {
198     return {min(start, r.start), max(end, r.end)};
199   }
200 
roundFloatRange201   IntRange round() const {
202     return {int(floor(start + 0.5f)), int(floor(end + 0.5f))};
203   }
204 
round_outFloatRange205   IntRange round_out() const { return {int(floor(start)), int(ceil(end))}; }
206 };
207 
208 template <typename P>
x_range(P p0,P p1)209 static inline FloatRange x_range(P p0, P p1) {
210   return {min(p0.x, p1.x), max(p0.x, p1.x)};
211 }
212 
213 struct VertexAttrib {
214   size_t size = 0;  // in bytes
215   GLenum type = 0;
216   bool normalized = false;
217   GLsizei stride = 0;
218   GLuint offset = 0;
219   bool enabled = false;
220   GLuint divisor = 0;
221   int vertex_array = 0;
222   int vertex_buffer = 0;
223   char* buf = nullptr;  // XXX: this can easily dangle
224   size_t buf_size = 0;  // this will let us bounds check
225 };
226 
bytes_for_internal_format(GLenum internal_format)227 static int bytes_for_internal_format(GLenum internal_format) {
228   switch (internal_format) {
229     case GL_RGBA32F:
230       return 4 * 4;
231     case GL_RGBA32I:
232       return 4 * 4;
233     case GL_RGBA8:
234     case GL_BGRA8:
235     case GL_RGBA:
236       return 4;
237     case GL_R8:
238     case GL_RED:
239       return 1;
240     case GL_RG8:
241     case GL_RG:
242       return 2;
243     case GL_DEPTH_COMPONENT:
244     case GL_DEPTH_COMPONENT16:
245     case GL_DEPTH_COMPONENT24:
246     case GL_DEPTH_COMPONENT32:
247       return 4;
248     case GL_RGB_RAW_422_APPLE:
249       return 2;
250     case GL_R16:
251       return 2;
252     default:
253       debugf("internal format: %x\n", internal_format);
254       assert(0);
255       return 0;
256   }
257 }
258 
aligned_stride(int row_bytes)259 static inline int aligned_stride(int row_bytes) { return (row_bytes + 3) & ~3; }
260 
gl_format_to_texture_format(int type)261 static TextureFormat gl_format_to_texture_format(int type) {
262   switch (type) {
263     case GL_RGBA32F:
264       return TextureFormat::RGBA32F;
265     case GL_RGBA32I:
266       return TextureFormat::RGBA32I;
267     case GL_RGBA8:
268       return TextureFormat::RGBA8;
269     case GL_R8:
270       return TextureFormat::R8;
271     case GL_RG8:
272       return TextureFormat::RG8;
273     case GL_R16:
274       return TextureFormat::R16;
275     case GL_RGB_RAW_422_APPLE:
276       return TextureFormat::YUV422;
277     default:
278       assert(0);
279       return TextureFormat::RGBA8;
280   }
281 }
282 
283 struct Query {
284   uint64_t value = 0;
285 };
286 
287 struct Buffer {
288   char* buf = nullptr;
289   size_t size = 0;
290   size_t capacity = 0;
291 
allocateBuffer292   bool allocate(size_t new_size) {
293     // If the size remains unchanged, don't allocate anything.
294     if (new_size == size) {
295       return false;
296     }
297     // If the new size is within the existing capacity of the buffer, just
298     // reuse the existing buffer.
299     if (new_size <= capacity) {
300       size = new_size;
301       return true;
302     }
303     // Otherwise we need to reallocate the buffer to hold up to the requested
304     // larger size.
305     char* new_buf = (char*)realloc(buf, new_size);
306     assert(new_buf);
307     if (!new_buf) {
308       // If we fail, null out the buffer rather than leave around the old
309       // allocation state.
310       cleanup();
311       return false;
312     }
313     // The reallocation succeeded, so install the buffer.
314     buf = new_buf;
315     size = new_size;
316     capacity = new_size;
317     return true;
318   }
319 
cleanupBuffer320   void cleanup() {
321     if (buf) {
322       free(buf);
323       buf = nullptr;
324       size = 0;
325       capacity = 0;
326     }
327   }
328 
~BufferBuffer329   ~Buffer() { cleanup(); }
330 };
331 
332 struct Framebuffer {
333   GLuint color_attachment = 0;
334   GLuint depth_attachment = 0;
335 };
336 
337 struct Renderbuffer {
338   GLuint texture = 0;
339 
340   void on_erase();
341 };
342 
gl_filter_to_texture_filter(int type)343 TextureFilter gl_filter_to_texture_filter(int type) {
344   switch (type) {
345     case GL_NEAREST:
346       return TextureFilter::NEAREST;
347     case GL_NEAREST_MIPMAP_LINEAR:
348       return TextureFilter::NEAREST;
349     case GL_NEAREST_MIPMAP_NEAREST:
350       return TextureFilter::NEAREST;
351     case GL_LINEAR:
352       return TextureFilter::LINEAR;
353     case GL_LINEAR_MIPMAP_LINEAR:
354       return TextureFilter::LINEAR;
355     case GL_LINEAR_MIPMAP_NEAREST:
356       return TextureFilter::LINEAR;
357     default:
358       assert(0);
359       return TextureFilter::NEAREST;
360   }
361 }
362 
363 struct Texture {
364   GLenum internal_format = 0;
365   int width = 0;
366   int height = 0;
367   char* buf = nullptr;
368   size_t buf_size = 0;
369   uint32_t buf_stride = 0;
370   uint8_t buf_bpp = 0;
371   GLenum min_filter = GL_NEAREST;
372   GLenum mag_filter = GL_LINEAR;
373   // The number of active locks on this texture. If this texture has any active
374   // locks, we need to disallow modifying or destroying the texture as it may
375   // be accessed by other threads where modifications could lead to races.
376   int32_t locked = 0;
377   // When used as an attachment of a framebuffer, rendering to the texture
378   // behaves as if it is located at the given offset such that the offset is
379   // subtracted from all transformed vertexes after the viewport is applied.
380   IntPoint offset;
381 
382   enum FLAGS {
383     // If the buffer is internally-allocated by SWGL
384     SHOULD_FREE = 1 << 1,
385     // If the buffer has been cleared to initialize it. Currently this is only
386     // utilized by depth buffers which need to know when depth runs have reset
387     // to a valid row state. When unset, the depth runs may contain garbage.
388     CLEARED = 1 << 2,
389   };
390   int flags = SHOULD_FREE;
should_freeTexture391   bool should_free() const { return bool(flags & SHOULD_FREE); }
clearedTexture392   bool cleared() const { return bool(flags & CLEARED); }
393 
set_flagTexture394   void set_flag(int flag, bool val) {
395     if (val) {
396       flags |= flag;
397     } else {
398       flags &= ~flag;
399     }
400   }
set_should_freeTexture401   void set_should_free(bool val) {
402     // buf must be null before SHOULD_FREE can be safely toggled. Otherwise, we
403     // might accidentally mistakenly realloc an externally allocated buffer as
404     // if it were an internally allocated one.
405     assert(!buf);
406     set_flag(SHOULD_FREE, val);
407   }
set_clearedTexture408   void set_cleared(bool val) { set_flag(CLEARED, val); }
409 
410   // Delayed-clearing state. When a clear of an FB is requested, we don't
411   // immediately clear each row, as the rows may be subsequently overwritten
412   // by draw calls, allowing us to skip the work of clearing the affected rows
413   // either fully or partially. Instead, we keep a bit vector of rows that need
414   // to be cleared later and save the value they need to be cleared with so
415   // that we can clear these rows individually when they are touched by draws.
416   // This currently only works for 2D textures, but not on texture arrays.
417   int delay_clear = 0;
418   uint32_t clear_val = 0;
419   uint32_t* cleared_rows = nullptr;
420 
421   void init_depth_runs(uint32_t z);
422   void fill_depth_runs(uint32_t z, const IntRect& scissor);
423 
enable_delayed_clearTexture424   void enable_delayed_clear(uint32_t val) {
425     delay_clear = height;
426     clear_val = val;
427     if (!cleared_rows) {
428       cleared_rows = new uint32_t[(height + 31) / 32];
429     }
430     memset(cleared_rows, 0, ((height + 31) / 32) * sizeof(uint32_t));
431     if (height & 31) {
432       cleared_rows[height / 32] = ~0U << (height & 31);
433     }
434   }
435 
disable_delayed_clearTexture436   void disable_delayed_clear() {
437     if (cleared_rows) {
438       delete[] cleared_rows;
439       cleared_rows = nullptr;
440       delay_clear = 0;
441     }
442   }
443 
bppTexture444   int bpp() const { return buf_bpp; }
set_bppTexture445   void set_bpp() { buf_bpp = bytes_for_internal_format(internal_format); }
446 
strideTexture447   size_t stride() const { return buf_stride; }
set_strideTexture448   void set_stride() { buf_stride = aligned_stride(buf_bpp * width); }
449 
450   // Set an external backing buffer of this texture.
set_bufferTexture451   void set_buffer(void* new_buf, size_t new_stride) {
452     assert(!should_free());
453     // Ensure that the supplied stride is at least as big as the row data and
454     // is aligned to the smaller of either the BPP or word-size. We need to at
455     // least be able to sample data from within a row and sample whole pixels
456     // of smaller formats without risking unaligned access.
457     set_bpp();
458     set_stride();
459     assert(new_stride >= size_t(bpp() * width) &&
460            new_stride % min(bpp(), sizeof(uint32_t)) == 0);
461 
462     buf = (char*)new_buf;
463     buf_size = 0;
464     buf_stride = new_stride;
465   }
466 
allocateTexture467   bool allocate(bool force = false, int min_width = 0, int min_height = 0) {
468     assert(!locked);  // Locked textures shouldn't be reallocated
469     // If we get here, some GL API call that invalidates the texture was used.
470     // Mark the buffer as not-cleared to signal this.
471     set_cleared(false);
472     // Check if there is either no buffer currently or if we forced validation
473     // of the buffer size because some dimension might have changed.
474     if ((!buf || force) && should_free()) {
475       // Initialize the buffer's BPP and stride, since they may have changed.
476       set_bpp();
477       set_stride();
478       // Compute new size based on the maximum potential stride, rather than
479       // the current stride, to hopefully avoid reallocations when size would
480       // otherwise change too much...
481       size_t max_stride = max(buf_stride, aligned_stride(buf_bpp * min_width));
482       size_t size = max_stride * max(height, min_height);
483       if ((!buf && size > 0) || size > buf_size) {
484         // Allocate with a SIMD register-sized tail of padding at the end so we
485         // can safely read or write past the end of the texture with SIMD ops.
486         // Currently only the flat Z-buffer texture needs this padding due to
487         // full-register loads and stores in check_depth and discard_depth. In
488         // case some code in the future accidentally uses a linear filter on a
489         // texture with less than 2 pixels per row, we also add this padding
490         // just to be safe. All other texture types and use-cases should be
491         // safe to omit padding.
492         size_t padding =
493             internal_format == GL_DEPTH_COMPONENT24 || max(width, min_width) < 2
494                 ? sizeof(Float)
495                 : 0;
496         char* new_buf = (char*)realloc(buf, size + padding);
497         assert(new_buf);
498         if (new_buf) {
499           // Successfully reallocated the buffer, so go ahead and set it.
500           buf = new_buf;
501           buf_size = size;
502           return true;
503         }
504         // Allocation failed, so ensure we don't leave stale buffer state.
505         cleanup();
506       }
507     }
508     // Nothing changed...
509     return false;
510   }
511 
cleanupTexture512   void cleanup() {
513     assert(!locked);  // Locked textures shouldn't be destroyed
514     if (buf) {
515       // If we need to toggle SHOULD_FREE state, ensure that buf is nulled out,
516       // regardless of whether we internally allocated it. This will prevent us
517       // from wrongly treating buf as having been internally allocated for when
518       // we go to realloc if it actually was externally allocted.
519       if (should_free()) {
520         free(buf);
521       }
522       buf = nullptr;
523       buf_size = 0;
524       buf_bpp = 0;
525       buf_stride = 0;
526     }
527     disable_delayed_clear();
528   }
529 
~TextureTexture530   ~Texture() { cleanup(); }
531 
boundsTexture532   IntRect bounds() const { return IntRect{0, 0, width, height}; }
offset_boundsTexture533   IntRect offset_bounds() const { return bounds() + offset; }
534 
535   // Find the valid sampling bounds relative to the requested region
sample_boundsTexture536   IntRect sample_bounds(const IntRect& req, bool invertY = false) const {
537     IntRect bb = bounds().intersect(req) - req.origin();
538     if (invertY) bb.invert_y(req.height());
539     return bb;
540   }
541 
542   // Get a pointer for sampling at the given offset
sample_ptrTexture543   char* sample_ptr(int x, int y) const {
544     return buf + y * stride() + x * bpp();
545   }
546 
547   // Get a pointer for sampling the requested region and limit to the provided
548   // sampling bounds
sample_ptrTexture549   char* sample_ptr(const IntRect& req, const IntRect& bounds,
550                    bool invertY = false) const {
551     // Offset the sample pointer by the clamped bounds
552     int x = req.x0 + bounds.x0;
553     // Invert the Y offset if necessary
554     int y = invertY ? req.y1 - 1 - bounds.y0 : req.y0 + bounds.y0;
555     return sample_ptr(x, y);
556   }
557 };
558 
559 // The last vertex attribute is reserved as a null attribute in case a vertex
560 // attribute is used without being set.
561 #define MAX_ATTRIBS 17
562 #define NULL_ATTRIB 16
563 struct VertexArray {
564   VertexAttrib attribs[MAX_ATTRIBS];
565   int max_attrib = -1;
566   // The GL spec defines element array buffer binding to be part of VAO state.
567   GLuint element_array_buffer_binding = 0;
568 
569   void validate();
570 };
571 
572 struct Shader {
573   GLenum type = 0;
574   ProgramLoader loader = nullptr;
575 };
576 
577 struct Program {
578   ProgramImpl* impl = nullptr;
579   VertexShaderImpl* vert_impl = nullptr;
580   FragmentShaderImpl* frag_impl = nullptr;
581   bool deleted = false;
582 
~ProgramProgram583   ~Program() { delete impl; }
584 };
585 
586 // clang-format off
587 // Fully-expand GL defines while ignoring more than 4 suffixes
588 #define CONCAT_KEY(prefix, x, y, z, w, ...) prefix##x##y##z##w
589 // Generate a blend key enum symbol
590 #define BLEND_KEY(...) CONCAT_KEY(BLEND_, __VA_ARGS__, 0, 0, 0)
591 #define MASK_BLEND_KEY(...) CONCAT_KEY(MASK_BLEND_, __VA_ARGS__, 0, 0, 0)
592 #define AA_BLEND_KEY(...) CONCAT_KEY(AA_BLEND_, __VA_ARGS__, 0, 0, 0)
593 #define AA_MASK_BLEND_KEY(...) CONCAT_KEY(AA_MASK_BLEND_, __VA_ARGS__, 0, 0, 0)
594 
595 // Utility macro to easily generate similar code for all implemented blend modes
596 #define FOR_EACH_BLEND_KEY(macro)                                              \
597   macro(GL_ONE, GL_ZERO, 0, 0)                                                 \
598   macro(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA)  \
599   macro(GL_ONE, GL_ONE_MINUS_SRC_ALPHA, 0, 0)                                  \
600   macro(GL_ZERO, GL_ONE_MINUS_SRC_COLOR, 0, 0)                                 \
601   macro(GL_ZERO, GL_ONE_MINUS_SRC_COLOR, GL_ZERO, GL_ONE)                      \
602   macro(GL_ZERO, GL_ONE_MINUS_SRC_ALPHA, 0, 0)                                 \
603   macro(GL_ZERO, GL_SRC_COLOR, 0, 0)                                           \
604   macro(GL_ONE, GL_ONE, 0, 0)                                                  \
605   macro(GL_ONE, GL_ONE, GL_ONE, GL_ONE_MINUS_SRC_ALPHA)                        \
606   macro(GL_ONE_MINUS_DST_ALPHA, GL_ONE, GL_ZERO, GL_ONE)                       \
607   macro(GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_COLOR, 0, 0)                       \
608   macro(GL_ONE, GL_ONE_MINUS_SRC1_COLOR, 0, 0)                                 \
609   macro(GL_MIN, 0, 0, 0)                                                       \
610   macro(GL_MAX, 0, 0, 0)                                                       \
611   macro(GL_MULTIPLY_KHR, 0, 0, 0)                                              \
612   macro(GL_SCREEN_KHR, 0, 0, 0)                                                \
613   macro(GL_OVERLAY_KHR, 0, 0, 0)                                               \
614   macro(GL_DARKEN_KHR, 0, 0, 0)                                                \
615   macro(GL_LIGHTEN_KHR, 0, 0, 0)                                               \
616   macro(GL_COLORDODGE_KHR, 0, 0, 0)                                            \
617   macro(GL_COLORBURN_KHR, 0, 0, 0)                                             \
618   macro(GL_HARDLIGHT_KHR, 0, 0, 0)                                             \
619   macro(GL_SOFTLIGHT_KHR, 0, 0, 0)                                             \
620   macro(GL_DIFFERENCE_KHR, 0, 0, 0)                                            \
621   macro(GL_EXCLUSION_KHR, 0, 0, 0)                                             \
622   macro(GL_HSL_HUE_KHR, 0, 0, 0)                                               \
623   macro(GL_HSL_SATURATION_KHR, 0, 0, 0)                                        \
624   macro(GL_HSL_COLOR_KHR, 0, 0, 0)                                             \
625   macro(GL_HSL_LUMINOSITY_KHR, 0, 0, 0)                                        \
626   macro(SWGL_BLEND_DROP_SHADOW, 0, 0, 0)                                       \
627   macro(SWGL_BLEND_SUBPIXEL_TEXT, 0, 0, 0)
628 
629 #define DEFINE_BLEND_KEY(...) BLEND_KEY(__VA_ARGS__),
630 #define DEFINE_MASK_BLEND_KEY(...) MASK_BLEND_KEY(__VA_ARGS__),
631 #define DEFINE_AA_BLEND_KEY(...) AA_BLEND_KEY(__VA_ARGS__),
632 #define DEFINE_AA_MASK_BLEND_KEY(...) AA_MASK_BLEND_KEY(__VA_ARGS__),
633 enum BlendKey : uint8_t {
634   FOR_EACH_BLEND_KEY(DEFINE_BLEND_KEY)
635   FOR_EACH_BLEND_KEY(DEFINE_MASK_BLEND_KEY)
636   FOR_EACH_BLEND_KEY(DEFINE_AA_BLEND_KEY)
637   FOR_EACH_BLEND_KEY(DEFINE_AA_MASK_BLEND_KEY)
638   BLEND_KEY_NONE = BLEND_KEY(GL_ONE, GL_ZERO),
639   MASK_BLEND_KEY_NONE = MASK_BLEND_KEY(GL_ONE, GL_ZERO),
640   AA_BLEND_KEY_NONE = AA_BLEND_KEY(GL_ONE, GL_ZERO),
641   AA_MASK_BLEND_KEY_NONE = AA_MASK_BLEND_KEY(GL_ONE, GL_ZERO),
642 };
643 // clang-format on
644 
645 const size_t MAX_TEXTURE_UNITS = 16;
646 
647 template <typename T>
unlink(T & binding,T n)648 static inline bool unlink(T& binding, T n) {
649   if (binding == n) {
650     binding = 0;
651     return true;
652   }
653   return false;
654 }
655 
656 template <typename O>
657 struct ObjectStore {
658   O** objects = nullptr;
659   size_t size = 0;
660   // reserve object 0 as null
661   size_t first_free = 1;
662   O invalid;
663 
~ObjectStoreObjectStore664   ~ObjectStore() {
665     if (objects) {
666       for (size_t i = 0; i < size; i++) delete objects[i];
667       free(objects);
668     }
669   }
670 
growObjectStore671   bool grow(size_t i) {
672     size_t new_size = size ? size : 8;
673     while (new_size <= i) new_size += new_size / 2;
674     O** new_objects = (O**)realloc(objects, new_size * sizeof(O*));
675     assert(new_objects);
676     if (!new_objects) return false;
677     while (size < new_size) new_objects[size++] = nullptr;
678     objects = new_objects;
679     return true;
680   }
681 
insertObjectStore682   void insert(size_t i, const O& o) {
683     if (i >= size && !grow(i)) return;
684     if (!objects[i]) objects[i] = new O(o);
685   }
686 
next_freeObjectStore687   size_t next_free() {
688     size_t i = first_free;
689     while (i < size && objects[i]) i++;
690     first_free = i;
691     return i;
692   }
693 
insertObjectStore694   size_t insert(const O& o = O()) {
695     size_t i = next_free();
696     insert(i, o);
697     return i;
698   }
699 
operator []ObjectStore700   O& operator[](size_t i) {
701     insert(i, O());
702     return i < size ? *objects[i] : invalid;
703   }
704 
findObjectStore705   O* find(size_t i) const { return i < size ? objects[i] : nullptr; }
706 
707   template <typename T>
on_eraseObjectStore708   void on_erase(T*, ...) {}
709   template <typename T>
on_eraseObjectStore710   void on_erase(T* o, decltype(&T::on_erase)) {
711     o->on_erase();
712   }
713 
eraseObjectStore714   bool erase(size_t i) {
715     if (i < size && objects[i]) {
716       on_erase(objects[i], nullptr);
717       delete objects[i];
718       objects[i] = nullptr;
719       if (i < first_free) first_free = i;
720       return true;
721     }
722     return false;
723   }
724 
beginObjectStore725   O** begin() const { return objects; }
endObjectStore726   O** end() const { return &objects[size]; }
727 };
728 
729 struct Context {
730   int32_t references = 1;
731 
732   ObjectStore<Query> queries;
733   ObjectStore<Buffer> buffers;
734   ObjectStore<Texture> textures;
735   ObjectStore<VertexArray> vertex_arrays;
736   ObjectStore<Framebuffer> framebuffers;
737   ObjectStore<Renderbuffer> renderbuffers;
738   ObjectStore<Shader> shaders;
739   ObjectStore<Program> programs;
740 
741   IntRect viewport = {0, 0, 0, 0};
742 
743   bool blend = false;
744   GLenum blendfunc_srgb = GL_ONE;
745   GLenum blendfunc_drgb = GL_ZERO;
746   GLenum blendfunc_sa = GL_ONE;
747   GLenum blendfunc_da = GL_ZERO;
748   GLenum blend_equation = GL_FUNC_ADD;
749   V8<uint16_t> blendcolor = 0;
750   BlendKey blend_key = BLEND_KEY_NONE;
751 
752   bool depthtest = false;
753   bool depthmask = true;
754   GLenum depthfunc = GL_LESS;
755 
756   bool scissortest = false;
757   IntRect scissor = {0, 0, 0, 0};
758 
759   GLfloat clearcolor[4] = {0, 0, 0, 0};
760   GLdouble cleardepth = 1;
761 
762   int unpack_row_length = 0;
763 
764   int shaded_rows = 0;
765   int shaded_pixels = 0;
766 
767   struct TextureUnit {
768     GLuint texture_2d_binding = 0;
769     GLuint texture_rectangle_binding = 0;
770 
unlinkContext::TextureUnit771     void unlink(GLuint n) {
772       ::unlink(texture_2d_binding, n);
773       ::unlink(texture_rectangle_binding, n);
774     }
775   };
776   TextureUnit texture_units[MAX_TEXTURE_UNITS];
777   int active_texture_unit = 0;
778 
779   GLuint current_program = 0;
780 
781   GLuint current_vertex_array = 0;
782   bool validate_vertex_array = true;
783 
784   GLuint pixel_pack_buffer_binding = 0;
785   GLuint pixel_unpack_buffer_binding = 0;
786   GLuint array_buffer_binding = 0;
787   GLuint time_elapsed_query = 0;
788   GLuint samples_passed_query = 0;
789   GLuint renderbuffer_binding = 0;
790   GLuint draw_framebuffer_binding = 0;
791   GLuint read_framebuffer_binding = 0;
792   GLuint unknown_binding = 0;
793 
get_bindingContext794   GLuint& get_binding(GLenum name) {
795     switch (name) {
796       case GL_PIXEL_PACK_BUFFER:
797         return pixel_pack_buffer_binding;
798       case GL_PIXEL_UNPACK_BUFFER:
799         return pixel_unpack_buffer_binding;
800       case GL_ARRAY_BUFFER:
801         return array_buffer_binding;
802       case GL_ELEMENT_ARRAY_BUFFER:
803         return vertex_arrays[current_vertex_array].element_array_buffer_binding;
804       case GL_TEXTURE_2D:
805         return texture_units[active_texture_unit].texture_2d_binding;
806       case GL_TEXTURE_RECTANGLE:
807         return texture_units[active_texture_unit].texture_rectangle_binding;
808       case GL_TIME_ELAPSED:
809         return time_elapsed_query;
810       case GL_SAMPLES_PASSED:
811         return samples_passed_query;
812       case GL_RENDERBUFFER:
813         return renderbuffer_binding;
814       case GL_DRAW_FRAMEBUFFER:
815         return draw_framebuffer_binding;
816       case GL_READ_FRAMEBUFFER:
817         return read_framebuffer_binding;
818       default:
819         debugf("unknown binding %x\n", name);
820         assert(false);
821         return unknown_binding;
822     }
823   }
824 
get_textureContext825   Texture& get_texture(sampler2D, int unit) {
826     return textures[texture_units[unit].texture_2d_binding];
827   }
828 
get_textureContext829   Texture& get_texture(isampler2D, int unit) {
830     return textures[texture_units[unit].texture_2d_binding];
831   }
832 
get_textureContext833   Texture& get_texture(sampler2DRect, int unit) {
834     return textures[texture_units[unit].texture_rectangle_binding];
835   }
836 
apply_scissorContext837   IntRect apply_scissor(IntRect bb,
838                         const IntPoint& origin = IntPoint(0, 0)) const {
839     return scissortest ? bb.intersect(scissor - origin) : bb;
840   }
841 
apply_scissorContext842   IntRect apply_scissor(const Texture& t) const {
843     return apply_scissor(t.bounds(), t.offset);
844   }
845 };
846 static Context* ctx = nullptr;
847 static VertexShaderImpl* vertex_shader = nullptr;
848 static FragmentShaderImpl* fragment_shader = nullptr;
849 static BlendKey blend_key = BLEND_KEY_NONE;
850 
851 static void prepare_texture(Texture& t, const IntRect* skip = nullptr);
852 
853 template <typename S>
init_filter(S * s,Texture & t)854 static inline void init_filter(S* s, Texture& t) {
855   // If the width is not at least 2 pixels, then we can't safely sample the end
856   // of the row with a linear filter. In that case, just punt to using nearest
857   // filtering instead.
858   s->filter = t.width >= 2 ? gl_filter_to_texture_filter(t.mag_filter)
859                            : TextureFilter::NEAREST;
860 }
861 
862 template <typename S>
init_sampler(S * s,Texture & t)863 static inline void init_sampler(S* s, Texture& t) {
864   prepare_texture(t);
865   s->width = t.width;
866   s->height = t.height;
867   s->stride = t.stride();
868   int bpp = t.bpp();
869   if (bpp >= 4)
870     s->stride /= 4;
871   else if (bpp == 2)
872     s->stride /= 2;
873   else
874     assert(bpp == 1);
875   // Use uint32_t* for easier sampling, but need to cast to uint8_t* or
876   // uint16_t* for formats with bpp < 4.
877   s->buf = (uint32_t*)t.buf;
878   s->format = gl_format_to_texture_format(t.internal_format);
879 }
880 
881 template <typename S>
null_sampler(S * s)882 static inline void null_sampler(S* s) {
883   // For null texture data, just make the sampler provide a 1x1 buffer that is
884   // transparent black. Ensure buffer holds at least a SIMD vector of zero data
885   // for SIMD padding of unaligned loads.
886   static const uint32_t zeroBuf[sizeof(Float) / sizeof(uint32_t)] = {0};
887   s->width = 1;
888   s->height = 1;
889   s->stride = s->width;
890   s->buf = (uint32_t*)zeroBuf;
891   s->format = TextureFormat::RGBA8;
892 }
893 
894 template <typename S>
null_filter(S * s)895 static inline void null_filter(S* s) {
896   s->filter = TextureFilter::NEAREST;
897 }
898 
899 template <typename S>
lookup_sampler(S * s,int texture)900 S* lookup_sampler(S* s, int texture) {
901   Texture& t = ctx->get_texture(s, texture);
902   if (!t.buf) {
903     null_sampler(s);
904     null_filter(s);
905   } else {
906     init_sampler(s, t);
907     init_filter(s, t);
908   }
909   return s;
910 }
911 
912 template <typename S>
lookup_isampler(S * s,int texture)913 S* lookup_isampler(S* s, int texture) {
914   Texture& t = ctx->get_texture(s, texture);
915   if (!t.buf) {
916     null_sampler(s);
917   } else {
918     init_sampler(s, t);
919   }
920   return s;
921 }
922 
bytes_per_type(GLenum type)923 int bytes_per_type(GLenum type) {
924   switch (type) {
925     case GL_INT:
926       return 4;
927     case GL_FLOAT:
928       return 4;
929     case GL_UNSIGNED_SHORT:
930       return 2;
931     case GL_UNSIGNED_BYTE:
932       return 1;
933     default:
934       assert(0);
935       return 0;
936   }
937 }
938 
939 template <typename S, typename C>
expand_attrib(const char * buf,size_t size,bool normalized)940 static inline S expand_attrib(const char* buf, size_t size, bool normalized) {
941   typedef typename ElementType<S>::ty elem_type;
942   S scalar = {0};
943   const C* src = reinterpret_cast<const C*>(buf);
944   if (normalized) {
945     const float scale = 1.0f / ((1 << (8 * sizeof(C))) - 1);
946     for (size_t i = 0; i < size / sizeof(C); i++) {
947       put_nth_component(scalar, i, elem_type(src[i]) * scale);
948     }
949   } else {
950     for (size_t i = 0; i < size / sizeof(C); i++) {
951       put_nth_component(scalar, i, elem_type(src[i]));
952     }
953   }
954   return scalar;
955 }
956 
957 template <typename S>
load_attrib_scalar(VertexAttrib & va,const char * src)958 static inline S load_attrib_scalar(VertexAttrib& va, const char* src) {
959   if (sizeof(S) <= va.size) {
960     return *reinterpret_cast<const S*>(src);
961   }
962   if (va.type == GL_UNSIGNED_SHORT) {
963     return expand_attrib<S, uint16_t>(src, va.size, va.normalized);
964   }
965   if (va.type == GL_UNSIGNED_BYTE) {
966     return expand_attrib<S, uint8_t>(src, va.size, va.normalized);
967   }
968   assert(sizeof(typename ElementType<S>::ty) == bytes_per_type(va.type));
969   S scalar = {0};
970   memcpy(&scalar, src, va.size);
971   return scalar;
972 }
973 
974 template <typename T>
load_attrib(T & attrib,VertexAttrib & va,uint32_t start,int instance,int count)975 void load_attrib(T& attrib, VertexAttrib& va, uint32_t start, int instance,
976                  int count) {
977   typedef decltype(force_scalar(attrib)) scalar_type;
978   if (!va.enabled) {
979     attrib = T(scalar_type{0});
980   } else if (va.divisor != 0) {
981     char* src = (char*)va.buf + va.stride * instance + va.offset;
982     assert(src + va.size <= va.buf + va.buf_size);
983     attrib = T(load_attrib_scalar<scalar_type>(va, src));
984   } else {
985     // Specialized for WR's primitive vertex order/winding.
986     if (!count) return;
987     assert(count >= 2 && count <= 4);
988     char* src = (char*)va.buf + va.stride * start + va.offset;
989     switch (count) {
990       case 2: {
991         // Lines must be indexed at offsets 0, 1.
992         // Line vertexes fill vertex shader SIMD lanes as 0, 1, 1, 0.
993         scalar_type lanes[2] = {
994             load_attrib_scalar<scalar_type>(va, src),
995             load_attrib_scalar<scalar_type>(va, src + va.stride)};
996         attrib = (T){lanes[0], lanes[1], lanes[1], lanes[0]};
997         break;
998       }
999       case 3: {
1000         // Triangles must be indexed at offsets 0, 1, 2.
1001         // Triangle vertexes fill vertex shader SIMD lanes as 0, 1, 2, 2.
1002         scalar_type lanes[3] = {
1003             load_attrib_scalar<scalar_type>(va, src),
1004             load_attrib_scalar<scalar_type>(va, src + va.stride),
1005             load_attrib_scalar<scalar_type>(va, src + va.stride * 2)};
1006         attrib = (T){lanes[0], lanes[1], lanes[2], lanes[2]};
1007         break;
1008       }
1009       default:
1010         // Quads must be successive triangles indexed at offsets 0, 1, 2, 2,
1011         // 1, 3. Quad vertexes fill vertex shader SIMD lanes as 0, 1, 3, 2, so
1012         // that the points form a convex path that can be traversed by the
1013         // rasterizer.
1014         attrib = (T){load_attrib_scalar<scalar_type>(va, src),
1015                      load_attrib_scalar<scalar_type>(va, src + va.stride),
1016                      load_attrib_scalar<scalar_type>(va, src + va.stride * 3),
1017                      load_attrib_scalar<scalar_type>(va, src + va.stride * 2)};
1018         break;
1019     }
1020   }
1021 }
1022 
1023 template <typename T>
load_flat_attrib(T & attrib,VertexAttrib & va,uint32_t start,int instance,int count)1024 void load_flat_attrib(T& attrib, VertexAttrib& va, uint32_t start, int instance,
1025                       int count) {
1026   typedef decltype(force_scalar(attrib)) scalar_type;
1027   if (!va.enabled) {
1028     attrib = T{0};
1029     return;
1030   }
1031   char* src = nullptr;
1032   if (va.divisor != 0) {
1033     src = (char*)va.buf + va.stride * instance + va.offset;
1034   } else {
1035     if (!count) return;
1036     src = (char*)va.buf + va.stride * start + va.offset;
1037   }
1038   assert(src + va.size <= va.buf + va.buf_size);
1039   attrib = T(load_attrib_scalar<scalar_type>(va, src));
1040 }
1041 
setup_program(GLuint program)1042 void setup_program(GLuint program) {
1043   if (!program) {
1044     vertex_shader = nullptr;
1045     fragment_shader = nullptr;
1046     return;
1047   }
1048   Program& p = ctx->programs[program];
1049   assert(p.impl);
1050   assert(p.vert_impl);
1051   assert(p.frag_impl);
1052   vertex_shader = p.vert_impl;
1053   fragment_shader = p.frag_impl;
1054 }
1055 
1056 extern ProgramLoader load_shader(const char* name);
1057 
1058 extern "C" {
1059 
UseProgram(GLuint program)1060 void UseProgram(GLuint program) {
1061   if (ctx->current_program && program != ctx->current_program) {
1062     auto* p = ctx->programs.find(ctx->current_program);
1063     if (p && p->deleted) {
1064       ctx->programs.erase(ctx->current_program);
1065     }
1066   }
1067   ctx->current_program = program;
1068   setup_program(program);
1069 }
1070 
SetViewport(GLint x,GLint y,GLsizei width,GLsizei height)1071 void SetViewport(GLint x, GLint y, GLsizei width, GLsizei height) {
1072   ctx->viewport = IntRect{x, y, x + width, y + height};
1073 }
1074 
Enable(GLenum cap)1075 void Enable(GLenum cap) {
1076   switch (cap) {
1077     case GL_BLEND:
1078       ctx->blend = true;
1079       break;
1080     case GL_DEPTH_TEST:
1081       ctx->depthtest = true;
1082       break;
1083     case GL_SCISSOR_TEST:
1084       ctx->scissortest = true;
1085       break;
1086   }
1087 }
1088 
Disable(GLenum cap)1089 void Disable(GLenum cap) {
1090   switch (cap) {
1091     case GL_BLEND:
1092       ctx->blend = false;
1093       break;
1094     case GL_DEPTH_TEST:
1095       ctx->depthtest = false;
1096       break;
1097     case GL_SCISSOR_TEST:
1098       ctx->scissortest = false;
1099       break;
1100   }
1101 }
1102 
GetError()1103 GLenum GetError() { return GL_NO_ERROR; }
1104 
1105 static const char* const extensions[] = {
1106     "GL_ARB_blend_func_extended",
1107     "GL_ARB_clear_texture",
1108     "GL_ARB_copy_image",
1109     "GL_ARB_draw_instanced",
1110     "GL_ARB_explicit_attrib_location",
1111     "GL_ARB_instanced_arrays",
1112     "GL_ARB_invalidate_subdata",
1113     "GL_ARB_texture_storage",
1114     "GL_EXT_timer_query",
1115     "GL_KHR_blend_equation_advanced",
1116     "GL_KHR_blend_equation_advanced_coherent",
1117     "GL_APPLE_rgb_422",
1118 };
1119 
GetIntegerv(GLenum pname,GLint * params)1120 void GetIntegerv(GLenum pname, GLint* params) {
1121   assert(params);
1122   switch (pname) {
1123     case GL_MAX_TEXTURE_UNITS:
1124     case GL_MAX_TEXTURE_IMAGE_UNITS:
1125       params[0] = MAX_TEXTURE_UNITS;
1126       break;
1127     case GL_MAX_TEXTURE_SIZE:
1128       params[0] = 1 << 15;
1129       break;
1130     case GL_MAX_ARRAY_TEXTURE_LAYERS:
1131       params[0] = 0;
1132       break;
1133     case GL_READ_FRAMEBUFFER_BINDING:
1134       params[0] = ctx->read_framebuffer_binding;
1135       break;
1136     case GL_DRAW_FRAMEBUFFER_BINDING:
1137       params[0] = ctx->draw_framebuffer_binding;
1138       break;
1139     case GL_PIXEL_PACK_BUFFER_BINDING:
1140       params[0] = ctx->pixel_pack_buffer_binding;
1141       break;
1142     case GL_PIXEL_UNPACK_BUFFER_BINDING:
1143       params[0] = ctx->pixel_unpack_buffer_binding;
1144       break;
1145     case GL_NUM_EXTENSIONS:
1146       params[0] = sizeof(extensions) / sizeof(extensions[0]);
1147       break;
1148     case GL_MAJOR_VERSION:
1149       params[0] = 3;
1150       break;
1151     case GL_MINOR_VERSION:
1152       params[0] = 2;
1153       break;
1154     case GL_MIN_PROGRAM_TEXEL_OFFSET:
1155       params[0] = 0;
1156       break;
1157     case GL_MAX_PROGRAM_TEXEL_OFFSET:
1158       params[0] = MAX_TEXEL_OFFSET;
1159       break;
1160     default:
1161       debugf("unhandled glGetIntegerv parameter %x\n", pname);
1162       assert(false);
1163   }
1164 }
1165 
GetBooleanv(GLenum pname,GLboolean * params)1166 void GetBooleanv(GLenum pname, GLboolean* params) {
1167   assert(params);
1168   switch (pname) {
1169     case GL_DEPTH_WRITEMASK:
1170       params[0] = ctx->depthmask;
1171       break;
1172     default:
1173       debugf("unhandled glGetBooleanv parameter %x\n", pname);
1174       assert(false);
1175   }
1176 }
1177 
GetString(GLenum name)1178 const char* GetString(GLenum name) {
1179   switch (name) {
1180     case GL_VENDOR:
1181       return "Mozilla Gfx";
1182     case GL_RENDERER:
1183       return "Software WebRender";
1184     case GL_VERSION:
1185       return "3.2";
1186     case GL_SHADING_LANGUAGE_VERSION:
1187       return "1.50";
1188     default:
1189       debugf("unhandled glGetString parameter %x\n", name);
1190       assert(false);
1191       return nullptr;
1192   }
1193 }
1194 
GetStringi(GLenum name,GLuint index)1195 const char* GetStringi(GLenum name, GLuint index) {
1196   switch (name) {
1197     case GL_EXTENSIONS:
1198       if (index >= sizeof(extensions) / sizeof(extensions[0])) {
1199         return nullptr;
1200       }
1201       return extensions[index];
1202     default:
1203       debugf("unhandled glGetStringi parameter %x\n", name);
1204       assert(false);
1205       return nullptr;
1206   }
1207 }
1208 
remap_blendfunc(GLenum rgb,GLenum a)1209 GLenum remap_blendfunc(GLenum rgb, GLenum a) {
1210   switch (a) {
1211     case GL_SRC_ALPHA:
1212       if (rgb == GL_SRC_COLOR) a = GL_SRC_COLOR;
1213       break;
1214     case GL_ONE_MINUS_SRC_ALPHA:
1215       if (rgb == GL_ONE_MINUS_SRC_COLOR) a = GL_ONE_MINUS_SRC_COLOR;
1216       break;
1217     case GL_DST_ALPHA:
1218       if (rgb == GL_DST_COLOR) a = GL_DST_COLOR;
1219       break;
1220     case GL_ONE_MINUS_DST_ALPHA:
1221       if (rgb == GL_ONE_MINUS_DST_COLOR) a = GL_ONE_MINUS_DST_COLOR;
1222       break;
1223     case GL_CONSTANT_ALPHA:
1224       if (rgb == GL_CONSTANT_COLOR) a = GL_CONSTANT_COLOR;
1225       break;
1226     case GL_ONE_MINUS_CONSTANT_ALPHA:
1227       if (rgb == GL_ONE_MINUS_CONSTANT_COLOR) a = GL_ONE_MINUS_CONSTANT_COLOR;
1228       break;
1229     case GL_SRC_COLOR:
1230       if (rgb == GL_SRC_ALPHA) a = GL_SRC_ALPHA;
1231       break;
1232     case GL_ONE_MINUS_SRC_COLOR:
1233       if (rgb == GL_ONE_MINUS_SRC_ALPHA) a = GL_ONE_MINUS_SRC_ALPHA;
1234       break;
1235     case GL_DST_COLOR:
1236       if (rgb == GL_DST_ALPHA) a = GL_DST_ALPHA;
1237       break;
1238     case GL_ONE_MINUS_DST_COLOR:
1239       if (rgb == GL_ONE_MINUS_DST_ALPHA) a = GL_ONE_MINUS_DST_ALPHA;
1240       break;
1241     case GL_CONSTANT_COLOR:
1242       if (rgb == GL_CONSTANT_ALPHA) a = GL_CONSTANT_ALPHA;
1243       break;
1244     case GL_ONE_MINUS_CONSTANT_COLOR:
1245       if (rgb == GL_ONE_MINUS_CONSTANT_ALPHA) a = GL_ONE_MINUS_CONSTANT_ALPHA;
1246       break;
1247     case GL_SRC1_ALPHA:
1248       if (rgb == GL_SRC1_COLOR) a = GL_SRC1_COLOR;
1249       break;
1250     case GL_ONE_MINUS_SRC1_ALPHA:
1251       if (rgb == GL_ONE_MINUS_SRC1_COLOR) a = GL_ONE_MINUS_SRC1_COLOR;
1252       break;
1253     case GL_SRC1_COLOR:
1254       if (rgb == GL_SRC1_ALPHA) a = GL_SRC1_ALPHA;
1255       break;
1256     case GL_ONE_MINUS_SRC1_COLOR:
1257       if (rgb == GL_ONE_MINUS_SRC1_ALPHA) a = GL_ONE_MINUS_SRC1_ALPHA;
1258       break;
1259   }
1260   return a;
1261 }
1262 
1263 // Generate a hashed blend key based on blend func and equation state. This
1264 // allows all the blend state to be processed down to a blend key that can be
1265 // dealt with inside a single switch statement.
hash_blend_key()1266 static void hash_blend_key() {
1267   GLenum srgb = ctx->blendfunc_srgb;
1268   GLenum drgb = ctx->blendfunc_drgb;
1269   GLenum sa = ctx->blendfunc_sa;
1270   GLenum da = ctx->blendfunc_da;
1271   GLenum equation = ctx->blend_equation;
1272 #define HASH_BLEND_KEY(x, y, z, w) ((x << 4) | (y) | (z << 24) | (w << 20))
1273   // Basic non-separate blend funcs used the two argument form
1274   int hash = HASH_BLEND_KEY(srgb, drgb, 0, 0);
1275   // Separate alpha blend funcs use the 4 argument hash
1276   if (srgb != sa || drgb != da) hash |= HASH_BLEND_KEY(0, 0, sa, da);
1277   // Any other blend equation than the default func_add ignores the func and
1278   // instead generates a one-argument hash based on the equation
1279   if (equation != GL_FUNC_ADD) hash = HASH_BLEND_KEY(equation, 0, 0, 0);
1280   switch (hash) {
1281 #define MAP_BLEND_KEY(...)                   \
1282   case HASH_BLEND_KEY(__VA_ARGS__):          \
1283     ctx->blend_key = BLEND_KEY(__VA_ARGS__); \
1284     break;
1285     FOR_EACH_BLEND_KEY(MAP_BLEND_KEY)
1286     default:
1287       debugf("blendfunc: %x, %x, separate: %x, %x, equation: %x\n", srgb, drgb,
1288              sa, da, equation);
1289       assert(false);
1290       break;
1291   }
1292 }
1293 
BlendFunc(GLenum srgb,GLenum drgb,GLenum sa,GLenum da)1294 void BlendFunc(GLenum srgb, GLenum drgb, GLenum sa, GLenum da) {
1295   ctx->blendfunc_srgb = srgb;
1296   ctx->blendfunc_drgb = drgb;
1297   sa = remap_blendfunc(srgb, sa);
1298   da = remap_blendfunc(drgb, da);
1299   ctx->blendfunc_sa = sa;
1300   ctx->blendfunc_da = da;
1301 
1302   hash_blend_key();
1303 }
1304 
BlendColor(GLfloat r,GLfloat g,GLfloat b,GLfloat a)1305 void BlendColor(GLfloat r, GLfloat g, GLfloat b, GLfloat a) {
1306   I32 c = round_pixel((Float){b, g, r, a});
1307   ctx->blendcolor = CONVERT(c, U16).xyzwxyzw;
1308 }
1309 
BlendEquation(GLenum mode)1310 void BlendEquation(GLenum mode) {
1311   assert(mode == GL_FUNC_ADD || mode == GL_MIN || mode == GL_MAX ||
1312          (mode >= GL_MULTIPLY_KHR && mode <= GL_HSL_LUMINOSITY_KHR));
1313   if (mode != ctx->blend_equation) {
1314     ctx->blend_equation = mode;
1315     hash_blend_key();
1316   }
1317 }
1318 
DepthMask(GLboolean flag)1319 void DepthMask(GLboolean flag) { ctx->depthmask = flag; }
1320 
DepthFunc(GLenum func)1321 void DepthFunc(GLenum func) {
1322   switch (func) {
1323     case GL_LESS:
1324     case GL_LEQUAL:
1325       break;
1326     default:
1327       assert(false);
1328   }
1329   ctx->depthfunc = func;
1330 }
1331 
SetScissor(GLint x,GLint y,GLsizei width,GLsizei height)1332 void SetScissor(GLint x, GLint y, GLsizei width, GLsizei height) {
1333   ctx->scissor = IntRect{x, y, x + width, y + height};
1334 }
1335 
ClearColor(GLfloat r,GLfloat g,GLfloat b,GLfloat a)1336 void ClearColor(GLfloat r, GLfloat g, GLfloat b, GLfloat a) {
1337   ctx->clearcolor[0] = r;
1338   ctx->clearcolor[1] = g;
1339   ctx->clearcolor[2] = b;
1340   ctx->clearcolor[3] = a;
1341 }
1342 
ClearDepth(GLdouble depth)1343 void ClearDepth(GLdouble depth) { ctx->cleardepth = depth; }
1344 
ActiveTexture(GLenum texture)1345 void ActiveTexture(GLenum texture) {
1346   assert(texture >= GL_TEXTURE0);
1347   assert(texture < GL_TEXTURE0 + MAX_TEXTURE_UNITS);
1348   ctx->active_texture_unit =
1349       clamp(int(texture - GL_TEXTURE0), 0, int(MAX_TEXTURE_UNITS - 1));
1350 }
1351 
GenQueries(GLsizei n,GLuint * result)1352 void GenQueries(GLsizei n, GLuint* result) {
1353   for (int i = 0; i < n; i++) {
1354     Query q;
1355     result[i] = ctx->queries.insert(q);
1356   }
1357 }
1358 
DeleteQuery(GLuint n)1359 void DeleteQuery(GLuint n) {
1360   if (n && ctx->queries.erase(n)) {
1361     unlink(ctx->time_elapsed_query, n);
1362     unlink(ctx->samples_passed_query, n);
1363   }
1364 }
1365 
GenBuffers(int n,GLuint * result)1366 void GenBuffers(int n, GLuint* result) {
1367   for (int i = 0; i < n; i++) {
1368     Buffer b;
1369     result[i] = ctx->buffers.insert(b);
1370   }
1371 }
1372 
DeleteBuffer(GLuint n)1373 void DeleteBuffer(GLuint n) {
1374   if (n && ctx->buffers.erase(n)) {
1375     unlink(ctx->pixel_pack_buffer_binding, n);
1376     unlink(ctx->pixel_unpack_buffer_binding, n);
1377     unlink(ctx->array_buffer_binding, n);
1378   }
1379 }
1380 
GenVertexArrays(int n,GLuint * result)1381 void GenVertexArrays(int n, GLuint* result) {
1382   for (int i = 0; i < n; i++) {
1383     VertexArray v;
1384     result[i] = ctx->vertex_arrays.insert(v);
1385   }
1386 }
1387 
DeleteVertexArray(GLuint n)1388 void DeleteVertexArray(GLuint n) {
1389   if (n && ctx->vertex_arrays.erase(n)) {
1390     unlink(ctx->current_vertex_array, n);
1391   }
1392 }
1393 
CreateShader(GLenum type)1394 GLuint CreateShader(GLenum type) {
1395   Shader s;
1396   s.type = type;
1397   return ctx->shaders.insert(s);
1398 }
1399 
ShaderSourceByName(GLuint shader,char * name)1400 void ShaderSourceByName(GLuint shader, char* name) {
1401   Shader& s = ctx->shaders[shader];
1402   s.loader = load_shader(name);
1403   if (!s.loader) {
1404     debugf("unknown shader %s\n", name);
1405   }
1406 }
1407 
AttachShader(GLuint program,GLuint shader)1408 void AttachShader(GLuint program, GLuint shader) {
1409   Program& p = ctx->programs[program];
1410   Shader& s = ctx->shaders[shader];
1411   if (s.type == GL_VERTEX_SHADER) {
1412     if (!p.impl && s.loader) p.impl = s.loader();
1413   } else if (s.type == GL_FRAGMENT_SHADER) {
1414     if (!p.impl && s.loader) p.impl = s.loader();
1415   } else {
1416     assert(0);
1417   }
1418 }
1419 
DeleteShader(GLuint n)1420 void DeleteShader(GLuint n) {
1421   if (n) ctx->shaders.erase(n);
1422 }
1423 
CreateProgram()1424 GLuint CreateProgram() {
1425   Program p;
1426   return ctx->programs.insert(p);
1427 }
1428 
DeleteProgram(GLuint n)1429 void DeleteProgram(GLuint n) {
1430   if (!n) return;
1431   if (ctx->current_program == n) {
1432     if (auto* p = ctx->programs.find(n)) {
1433       p->deleted = true;
1434     }
1435   } else {
1436     ctx->programs.erase(n);
1437   }
1438 }
1439 
LinkProgram(GLuint program)1440 void LinkProgram(GLuint program) {
1441   Program& p = ctx->programs[program];
1442   assert(p.impl);
1443   if (!p.impl) {
1444     return;
1445   }
1446   assert(p.impl->interpolants_size() <= sizeof(Interpolants));
1447   if (!p.vert_impl) p.vert_impl = p.impl->get_vertex_shader();
1448   if (!p.frag_impl) p.frag_impl = p.impl->get_fragment_shader();
1449 }
1450 
GetLinkStatus(GLuint program)1451 GLint GetLinkStatus(GLuint program) {
1452   if (auto* p = ctx->programs.find(program)) {
1453     return p->impl ? 1 : 0;
1454   }
1455   return 0;
1456 }
1457 
BindAttribLocation(GLuint program,GLuint index,char * name)1458 void BindAttribLocation(GLuint program, GLuint index, char* name) {
1459   Program& p = ctx->programs[program];
1460   assert(p.impl);
1461   if (!p.impl) {
1462     return;
1463   }
1464   p.impl->bind_attrib(name, index);
1465 }
1466 
GetAttribLocation(GLuint program,char * name)1467 GLint GetAttribLocation(GLuint program, char* name) {
1468   Program& p = ctx->programs[program];
1469   assert(p.impl);
1470   if (!p.impl) {
1471     return -1;
1472   }
1473   return p.impl->get_attrib(name);
1474 }
1475 
GetUniformLocation(GLuint program,char * name)1476 GLint GetUniformLocation(GLuint program, char* name) {
1477   Program& p = ctx->programs[program];
1478   assert(p.impl);
1479   if (!p.impl) {
1480     return -1;
1481   }
1482   GLint loc = p.impl->get_uniform(name);
1483   // debugf("location: %d\n", loc);
1484   return loc;
1485 }
1486 
get_time_value()1487 static uint64_t get_time_value() {
1488 #ifdef __MACH__
1489   return mach_absolute_time();
1490 #elif defined(_WIN32)
1491   LARGE_INTEGER time;
1492   static bool have_frequency = false;
1493   static LARGE_INTEGER frequency;
1494   if (!have_frequency) {
1495     QueryPerformanceFrequency(&frequency);
1496     have_frequency = true;
1497   }
1498   QueryPerformanceCounter(&time);
1499   return time.QuadPart * 1000000000ULL / frequency.QuadPart;
1500 #else
1501   return ({
1502     struct timespec tp;
1503     clock_gettime(CLOCK_MONOTONIC, &tp);
1504     tp.tv_sec * 1000000000ULL + tp.tv_nsec;
1505   });
1506 #endif
1507 }
1508 
BeginQuery(GLenum target,GLuint id)1509 void BeginQuery(GLenum target, GLuint id) {
1510   ctx->get_binding(target) = id;
1511   Query& q = ctx->queries[id];
1512   switch (target) {
1513     case GL_SAMPLES_PASSED:
1514       q.value = 0;
1515       break;
1516     case GL_TIME_ELAPSED:
1517       q.value = get_time_value();
1518       break;
1519     default:
1520       debugf("unknown query target %x for query %d\n", target, id);
1521       assert(false);
1522   }
1523 }
1524 
EndQuery(GLenum target)1525 void EndQuery(GLenum target) {
1526   Query& q = ctx->queries[ctx->get_binding(target)];
1527   switch (target) {
1528     case GL_SAMPLES_PASSED:
1529       break;
1530     case GL_TIME_ELAPSED:
1531       q.value = get_time_value() - q.value;
1532       break;
1533     default:
1534       debugf("unknown query target %x\n", target);
1535       assert(false);
1536   }
1537   ctx->get_binding(target) = 0;
1538 }
1539 
GetQueryObjectui64v(GLuint id,GLenum pname,GLuint64 * params)1540 void GetQueryObjectui64v(GLuint id, GLenum pname, GLuint64* params) {
1541   Query& q = ctx->queries[id];
1542   switch (pname) {
1543     case GL_QUERY_RESULT:
1544       assert(params);
1545       params[0] = q.value;
1546       break;
1547     default:
1548       assert(false);
1549   }
1550 }
1551 
BindVertexArray(GLuint vertex_array)1552 void BindVertexArray(GLuint vertex_array) {
1553   if (vertex_array != ctx->current_vertex_array) {
1554     ctx->validate_vertex_array = true;
1555   }
1556   ctx->current_vertex_array = vertex_array;
1557 }
1558 
BindTexture(GLenum target,GLuint texture)1559 void BindTexture(GLenum target, GLuint texture) {
1560   ctx->get_binding(target) = texture;
1561 }
1562 
BindBuffer(GLenum target,GLuint buffer)1563 void BindBuffer(GLenum target, GLuint buffer) {
1564   ctx->get_binding(target) = buffer;
1565 }
1566 
BindFramebuffer(GLenum target,GLuint fb)1567 void BindFramebuffer(GLenum target, GLuint fb) {
1568   if (target == GL_FRAMEBUFFER) {
1569     ctx->read_framebuffer_binding = fb;
1570     ctx->draw_framebuffer_binding = fb;
1571   } else {
1572     assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER);
1573     ctx->get_binding(target) = fb;
1574   }
1575 }
1576 
BindRenderbuffer(GLenum target,GLuint rb)1577 void BindRenderbuffer(GLenum target, GLuint rb) {
1578   ctx->get_binding(target) = rb;
1579 }
1580 
PixelStorei(GLenum name,GLint param)1581 void PixelStorei(GLenum name, GLint param) {
1582   if (name == GL_UNPACK_ALIGNMENT) {
1583     assert(param == 1);
1584   } else if (name == GL_UNPACK_ROW_LENGTH) {
1585     ctx->unpack_row_length = param;
1586   }
1587 }
1588 
remap_internal_format(GLenum format)1589 static GLenum remap_internal_format(GLenum format) {
1590   switch (format) {
1591     case GL_DEPTH_COMPONENT:
1592       return GL_DEPTH_COMPONENT24;
1593     case GL_RGBA:
1594       return GL_RGBA8;
1595     case GL_RED:
1596       return GL_R8;
1597     case GL_RG:
1598       return GL_RG8;
1599     case GL_RGB_422_APPLE:
1600       return GL_RGB_RAW_422_APPLE;
1601     default:
1602       return format;
1603   }
1604 }
1605 
1606 }  // extern "C"
1607 
format_requires_conversion(GLenum external_format,GLenum internal_format)1608 static bool format_requires_conversion(GLenum external_format,
1609                                        GLenum internal_format) {
1610   switch (external_format) {
1611     case GL_RGBA:
1612       return internal_format == GL_RGBA8;
1613     default:
1614       return false;
1615   }
1616 }
1617 
copy_bgra8_to_rgba8(uint32_t * dest,const uint32_t * src,int width)1618 static inline void copy_bgra8_to_rgba8(uint32_t* dest, const uint32_t* src,
1619                                        int width) {
1620   for (; width >= 4; width -= 4, dest += 4, src += 4) {
1621     U32 p = unaligned_load<U32>(src);
1622     U32 rb = p & 0x00FF00FF;
1623     unaligned_store(dest, (p & 0xFF00FF00) | (rb << 16) | (rb >> 16));
1624   }
1625   for (; width > 0; width--, dest++, src++) {
1626     uint32_t p = *src;
1627     uint32_t rb = p & 0x00FF00FF;
1628     *dest = (p & 0xFF00FF00) | (rb << 16) | (rb >> 16);
1629   }
1630 }
1631 
convert_copy(GLenum external_format,GLenum internal_format,uint8_t * dst_buf,size_t dst_stride,const uint8_t * src_buf,size_t src_stride,size_t width,size_t height)1632 static void convert_copy(GLenum external_format, GLenum internal_format,
1633                          uint8_t* dst_buf, size_t dst_stride,
1634                          const uint8_t* src_buf, size_t src_stride,
1635                          size_t width, size_t height) {
1636   switch (external_format) {
1637     case GL_RGBA:
1638       if (internal_format == GL_RGBA8) {
1639         for (; height; height--) {
1640           copy_bgra8_to_rgba8((uint32_t*)dst_buf, (const uint32_t*)src_buf,
1641                               width);
1642           dst_buf += dst_stride;
1643           src_buf += src_stride;
1644         }
1645         return;
1646       }
1647       break;
1648     default:
1649       break;
1650   }
1651   size_t row_bytes = width * bytes_for_internal_format(internal_format);
1652   for (; height; height--) {
1653     memcpy(dst_buf, src_buf, row_bytes);
1654     dst_buf += dst_stride;
1655     src_buf += src_stride;
1656   }
1657 }
1658 
set_tex_storage(Texture & t,GLenum external_format,GLsizei width,GLsizei height,void * buf=nullptr,GLsizei stride=0,GLsizei min_width=0,GLsizei min_height=0)1659 static void set_tex_storage(Texture& t, GLenum external_format, GLsizei width,
1660                             GLsizei height, void* buf = nullptr,
1661                             GLsizei stride = 0, GLsizei min_width = 0,
1662                             GLsizei min_height = 0) {
1663   GLenum internal_format = remap_internal_format(external_format);
1664   bool changed = false;
1665   if (t.width != width || t.height != height ||
1666       t.internal_format != internal_format) {
1667     changed = true;
1668     t.internal_format = internal_format;
1669     t.width = width;
1670     t.height = height;
1671   }
1672   // If we are changed from an internally managed buffer to an externally
1673   // supplied one or vice versa, ensure that we clean up old buffer state.
1674   // However, if we have to convert the data from a non-native format, then
1675   // always treat it as internally managed since we will need to copy to an
1676   // internally managed native format buffer.
1677   bool should_free = buf == nullptr || format_requires_conversion(
1678                                            external_format, internal_format);
1679   if (t.should_free() != should_free) {
1680     changed = true;
1681     t.cleanup();
1682     t.set_should_free(should_free);
1683   }
1684   // If now an external buffer, explicitly set it...
1685   if (!should_free) {
1686     t.set_buffer(buf, stride);
1687   }
1688   t.disable_delayed_clear();
1689   t.allocate(changed, min_width, min_height);
1690   // If we have a buffer that needs format conversion, then do that now.
1691   if (buf && should_free) {
1692     convert_copy(external_format, internal_format, (uint8_t*)t.buf, t.stride(),
1693                  (const uint8_t*)buf, stride, width, height);
1694   }
1695 }
1696 
1697 extern "C" {
1698 
TexStorage2D(GLenum target,GLint levels,GLenum internal_format,GLsizei width,GLsizei height)1699 void TexStorage2D(GLenum target, GLint levels, GLenum internal_format,
1700                   GLsizei width, GLsizei height) {
1701   assert(levels == 1);
1702   Texture& t = ctx->textures[ctx->get_binding(target)];
1703   set_tex_storage(t, internal_format, width, height);
1704 }
1705 
internal_format_for_data(GLenum format,GLenum ty)1706 GLenum internal_format_for_data(GLenum format, GLenum ty) {
1707   if (format == GL_RED && ty == GL_UNSIGNED_BYTE) {
1708     return GL_R8;
1709   } else if ((format == GL_RGBA || format == GL_BGRA) &&
1710              (ty == GL_UNSIGNED_BYTE || ty == GL_UNSIGNED_INT_8_8_8_8_REV)) {
1711     return GL_RGBA8;
1712   } else if (format == GL_RGBA && ty == GL_FLOAT) {
1713     return GL_RGBA32F;
1714   } else if (format == GL_RGBA_INTEGER && ty == GL_INT) {
1715     return GL_RGBA32I;
1716   } else if (format == GL_RG && ty == GL_UNSIGNED_BYTE) {
1717     return GL_RG8;
1718   } else if (format == GL_RGB_422_APPLE &&
1719              ty == GL_UNSIGNED_SHORT_8_8_REV_APPLE) {
1720     return GL_RGB_RAW_422_APPLE;
1721   } else if (format == GL_RED && ty == GL_UNSIGNED_SHORT) {
1722     return GL_R16;
1723   } else {
1724     debugf("unknown internal format for format %x, type %x\n", format, ty);
1725     assert(false);
1726     return 0;
1727   }
1728 }
1729 
get_pixel_pack_buffer()1730 static Buffer* get_pixel_pack_buffer() {
1731   return ctx->pixel_pack_buffer_binding
1732              ? &ctx->buffers[ctx->pixel_pack_buffer_binding]
1733              : nullptr;
1734 }
1735 
get_pixel_pack_buffer_data(void * data)1736 static void* get_pixel_pack_buffer_data(void* data) {
1737   if (Buffer* b = get_pixel_pack_buffer()) {
1738     return b->buf ? b->buf + (size_t)data : nullptr;
1739   }
1740   return data;
1741 }
1742 
get_pixel_unpack_buffer()1743 static Buffer* get_pixel_unpack_buffer() {
1744   return ctx->pixel_unpack_buffer_binding
1745              ? &ctx->buffers[ctx->pixel_unpack_buffer_binding]
1746              : nullptr;
1747 }
1748 
get_pixel_unpack_buffer_data(void * data)1749 static void* get_pixel_unpack_buffer_data(void* data) {
1750   if (Buffer* b = get_pixel_unpack_buffer()) {
1751     return b->buf ? b->buf + (size_t)data : nullptr;
1752   }
1753   return data;
1754 }
1755 
TexSubImage2D(GLenum target,GLint level,GLint xoffset,GLint yoffset,GLsizei width,GLsizei height,GLenum format,GLenum ty,void * data)1756 void TexSubImage2D(GLenum target, GLint level, GLint xoffset, GLint yoffset,
1757                    GLsizei width, GLsizei height, GLenum format, GLenum ty,
1758                    void* data) {
1759   if (level != 0) {
1760     assert(false);
1761     return;
1762   }
1763   data = get_pixel_unpack_buffer_data(data);
1764   if (!data) return;
1765   Texture& t = ctx->textures[ctx->get_binding(target)];
1766   IntRect skip = {xoffset, yoffset, xoffset + width, yoffset + height};
1767   prepare_texture(t, &skip);
1768   assert(xoffset + width <= t.width);
1769   assert(yoffset + height <= t.height);
1770   assert(ctx->unpack_row_length == 0 || ctx->unpack_row_length >= width);
1771   GLsizei row_length =
1772       ctx->unpack_row_length != 0 ? ctx->unpack_row_length : width;
1773   assert(t.internal_format == internal_format_for_data(format, ty));
1774   int src_bpp = format_requires_conversion(format, t.internal_format)
1775                     ? bytes_for_internal_format(format)
1776                     : t.bpp();
1777   if (!src_bpp || !t.buf) return;
1778   convert_copy(format, t.internal_format,
1779                (uint8_t*)t.sample_ptr(xoffset, yoffset), t.stride(),
1780                (const uint8_t*)data, row_length * src_bpp, width, height);
1781 }
1782 
TexImage2D(GLenum target,GLint level,GLint internal_format,GLsizei width,GLsizei height,GLint border,GLenum format,GLenum ty,void * data)1783 void TexImage2D(GLenum target, GLint level, GLint internal_format,
1784                 GLsizei width, GLsizei height, GLint border, GLenum format,
1785                 GLenum ty, void* data) {
1786   if (level != 0) {
1787     assert(false);
1788     return;
1789   }
1790   assert(border == 0);
1791   TexStorage2D(target, 1, internal_format, width, height);
1792   TexSubImage2D(target, 0, 0, 0, width, height, format, ty, data);
1793 }
1794 
GenerateMipmap(UNUSED GLenum target)1795 void GenerateMipmap(UNUSED GLenum target) {
1796   // TODO: support mipmaps
1797 }
1798 
SetTextureParameter(GLuint texid,GLenum pname,GLint param)1799 void SetTextureParameter(GLuint texid, GLenum pname, GLint param) {
1800   Texture& t = ctx->textures[texid];
1801   switch (pname) {
1802     case GL_TEXTURE_WRAP_S:
1803       assert(param == GL_CLAMP_TO_EDGE);
1804       break;
1805     case GL_TEXTURE_WRAP_T:
1806       assert(param == GL_CLAMP_TO_EDGE);
1807       break;
1808     case GL_TEXTURE_MIN_FILTER:
1809       t.min_filter = param;
1810       break;
1811     case GL_TEXTURE_MAG_FILTER:
1812       t.mag_filter = param;
1813       break;
1814     default:
1815       break;
1816   }
1817 }
1818 
TexParameteri(GLenum target,GLenum pname,GLint param)1819 void TexParameteri(GLenum target, GLenum pname, GLint param) {
1820   SetTextureParameter(ctx->get_binding(target), pname, param);
1821 }
1822 
GenTextures(int n,GLuint * result)1823 void GenTextures(int n, GLuint* result) {
1824   for (int i = 0; i < n; i++) {
1825     Texture t;
1826     result[i] = ctx->textures.insert(t);
1827   }
1828 }
1829 
DeleteTexture(GLuint n)1830 void DeleteTexture(GLuint n) {
1831   if (n && ctx->textures.erase(n)) {
1832     for (size_t i = 0; i < MAX_TEXTURE_UNITS; i++) {
1833       ctx->texture_units[i].unlink(n);
1834     }
1835   }
1836 }
1837 
GenRenderbuffers(int n,GLuint * result)1838 void GenRenderbuffers(int n, GLuint* result) {
1839   for (int i = 0; i < n; i++) {
1840     Renderbuffer r;
1841     result[i] = ctx->renderbuffers.insert(r);
1842   }
1843 }
1844 
on_erase()1845 void Renderbuffer::on_erase() {
1846   for (auto* fb : ctx->framebuffers) {
1847     if (fb) {
1848       unlink(fb->color_attachment, texture);
1849       unlink(fb->depth_attachment, texture);
1850     }
1851   }
1852   DeleteTexture(texture);
1853 }
1854 
DeleteRenderbuffer(GLuint n)1855 void DeleteRenderbuffer(GLuint n) {
1856   if (n && ctx->renderbuffers.erase(n)) {
1857     unlink(ctx->renderbuffer_binding, n);
1858   }
1859 }
1860 
GenFramebuffers(int n,GLuint * result)1861 void GenFramebuffers(int n, GLuint* result) {
1862   for (int i = 0; i < n; i++) {
1863     Framebuffer f;
1864     result[i] = ctx->framebuffers.insert(f);
1865   }
1866 }
1867 
DeleteFramebuffer(GLuint n)1868 void DeleteFramebuffer(GLuint n) {
1869   if (n && ctx->framebuffers.erase(n)) {
1870     unlink(ctx->read_framebuffer_binding, n);
1871     unlink(ctx->draw_framebuffer_binding, n);
1872   }
1873 }
1874 
RenderbufferStorage(GLenum target,GLenum internal_format,GLsizei width,GLsizei height)1875 void RenderbufferStorage(GLenum target, GLenum internal_format, GLsizei width,
1876                          GLsizei height) {
1877   // Just refer a renderbuffer to a texture to simplify things for now...
1878   Renderbuffer& r = ctx->renderbuffers[ctx->get_binding(target)];
1879   if (!r.texture) {
1880     GenTextures(1, &r.texture);
1881   }
1882   switch (internal_format) {
1883     case GL_DEPTH_COMPONENT:
1884     case GL_DEPTH_COMPONENT16:
1885     case GL_DEPTH_COMPONENT24:
1886     case GL_DEPTH_COMPONENT32:
1887       // Force depth format to 24 bits...
1888       internal_format = GL_DEPTH_COMPONENT24;
1889       break;
1890   }
1891   set_tex_storage(ctx->textures[r.texture], internal_format, width, height);
1892 }
1893 
VertexAttribPointer(GLuint index,GLint size,GLenum type,bool normalized,GLsizei stride,GLuint offset)1894 void VertexAttribPointer(GLuint index, GLint size, GLenum type, bool normalized,
1895                          GLsizei stride, GLuint offset) {
1896   // debugf("cva: %d\n", ctx->current_vertex_array);
1897   VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1898   if (index >= NULL_ATTRIB) {
1899     assert(0);
1900     return;
1901   }
1902   VertexAttrib& va = v.attribs[index];
1903   va.size = size * bytes_per_type(type);
1904   va.type = type;
1905   va.normalized = normalized;
1906   va.stride = stride;
1907   va.offset = offset;
1908   // Buffer &vertex_buf = ctx->buffers[ctx->array_buffer_binding];
1909   va.vertex_buffer = ctx->array_buffer_binding;
1910   va.vertex_array = ctx->current_vertex_array;
1911   ctx->validate_vertex_array = true;
1912 }
1913 
VertexAttribIPointer(GLuint index,GLint size,GLenum type,GLsizei stride,GLuint offset)1914 void VertexAttribIPointer(GLuint index, GLint size, GLenum type, GLsizei stride,
1915                           GLuint offset) {
1916   // debugf("cva: %d\n", ctx->current_vertex_array);
1917   VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1918   if (index >= NULL_ATTRIB) {
1919     assert(0);
1920     return;
1921   }
1922   VertexAttrib& va = v.attribs[index];
1923   va.size = size * bytes_per_type(type);
1924   va.type = type;
1925   va.normalized = false;
1926   va.stride = stride;
1927   va.offset = offset;
1928   // Buffer &vertex_buf = ctx->buffers[ctx->array_buffer_binding];
1929   va.vertex_buffer = ctx->array_buffer_binding;
1930   va.vertex_array = ctx->current_vertex_array;
1931   ctx->validate_vertex_array = true;
1932 }
1933 
EnableVertexAttribArray(GLuint index)1934 void EnableVertexAttribArray(GLuint index) {
1935   VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1936   if (index >= NULL_ATTRIB) {
1937     assert(0);
1938     return;
1939   }
1940   VertexAttrib& va = v.attribs[index];
1941   if (!va.enabled) {
1942     ctx->validate_vertex_array = true;
1943   }
1944   va.enabled = true;
1945   v.max_attrib = max(v.max_attrib, (int)index);
1946 }
1947 
DisableVertexAttribArray(GLuint index)1948 void DisableVertexAttribArray(GLuint index) {
1949   VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1950   if (index >= NULL_ATTRIB) {
1951     assert(0);
1952     return;
1953   }
1954   VertexAttrib& va = v.attribs[index];
1955   if (va.enabled) {
1956     ctx->validate_vertex_array = true;
1957   }
1958   va.enabled = false;
1959 }
1960 
VertexAttribDivisor(GLuint index,GLuint divisor)1961 void VertexAttribDivisor(GLuint index, GLuint divisor) {
1962   VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1963   // Only support divisor being 0 (per-vertex) or 1 (per-instance).
1964   if (index >= NULL_ATTRIB || divisor > 1) {
1965     assert(0);
1966     return;
1967   }
1968   VertexAttrib& va = v.attribs[index];
1969   va.divisor = divisor;
1970 }
1971 
BufferData(GLenum target,GLsizeiptr size,void * data,UNUSED GLenum usage)1972 void BufferData(GLenum target, GLsizeiptr size, void* data,
1973                 UNUSED GLenum usage) {
1974   Buffer& b = ctx->buffers[ctx->get_binding(target)];
1975   if (b.allocate(size)) {
1976     ctx->validate_vertex_array = true;
1977   }
1978   if (data && b.buf && size <= b.size) {
1979     memcpy(b.buf, data, size);
1980   }
1981 }
1982 
BufferSubData(GLenum target,GLintptr offset,GLsizeiptr size,void * data)1983 void BufferSubData(GLenum target, GLintptr offset, GLsizeiptr size,
1984                    void* data) {
1985   Buffer& b = ctx->buffers[ctx->get_binding(target)];
1986   assert(offset + size <= b.size);
1987   if (data && b.buf && offset + size <= b.size) {
1988     memcpy(&b.buf[offset], data, size);
1989   }
1990 }
1991 
MapBuffer(GLenum target,UNUSED GLbitfield access)1992 void* MapBuffer(GLenum target, UNUSED GLbitfield access) {
1993   Buffer& b = ctx->buffers[ctx->get_binding(target)];
1994   return b.buf;
1995 }
1996 
MapBufferRange(GLenum target,GLintptr offset,GLsizeiptr length,UNUSED GLbitfield access)1997 void* MapBufferRange(GLenum target, GLintptr offset, GLsizeiptr length,
1998                      UNUSED GLbitfield access) {
1999   Buffer& b = ctx->buffers[ctx->get_binding(target)];
2000   if (b.buf && offset >= 0 && length > 0 && offset + length <= b.size) {
2001     return b.buf + offset;
2002   }
2003   return nullptr;
2004 }
2005 
UnmapBuffer(GLenum target)2006 GLboolean UnmapBuffer(GLenum target) {
2007   Buffer& b = ctx->buffers[ctx->get_binding(target)];
2008   return b.buf != nullptr;
2009 }
2010 
Uniform1i(GLint location,GLint V0)2011 void Uniform1i(GLint location, GLint V0) {
2012   // debugf("tex: %d\n", (int)ctx->textures.size);
2013   if (vertex_shader) {
2014     vertex_shader->set_uniform_1i(location, V0);
2015   }
2016 }
Uniform4fv(GLint location,GLsizei count,const GLfloat * v)2017 void Uniform4fv(GLint location, GLsizei count, const GLfloat* v) {
2018   assert(count == 1);
2019   if (vertex_shader) {
2020     vertex_shader->set_uniform_4fv(location, v);
2021   }
2022 }
UniformMatrix4fv(GLint location,GLsizei count,GLboolean transpose,const GLfloat * value)2023 void UniformMatrix4fv(GLint location, GLsizei count, GLboolean transpose,
2024                       const GLfloat* value) {
2025   assert(count == 1);
2026   assert(!transpose);
2027   if (vertex_shader) {
2028     vertex_shader->set_uniform_matrix4fv(location, value);
2029   }
2030 }
2031 
FramebufferTexture2D(GLenum target,GLenum attachment,GLenum textarget,GLuint texture,GLint level)2032 void FramebufferTexture2D(GLenum target, GLenum attachment, GLenum textarget,
2033                           GLuint texture, GLint level) {
2034   assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER);
2035   assert(textarget == GL_TEXTURE_2D || textarget == GL_TEXTURE_RECTANGLE);
2036   assert(level == 0);
2037   Framebuffer& fb = ctx->framebuffers[ctx->get_binding(target)];
2038   if (attachment == GL_COLOR_ATTACHMENT0) {
2039     fb.color_attachment = texture;
2040   } else if (attachment == GL_DEPTH_ATTACHMENT) {
2041     fb.depth_attachment = texture;
2042   } else {
2043     assert(0);
2044   }
2045 }
2046 
FramebufferRenderbuffer(GLenum target,GLenum attachment,GLenum renderbuffertarget,GLuint renderbuffer)2047 void FramebufferRenderbuffer(GLenum target, GLenum attachment,
2048                              GLenum renderbuffertarget, GLuint renderbuffer) {
2049   assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER);
2050   assert(renderbuffertarget == GL_RENDERBUFFER);
2051   Framebuffer& fb = ctx->framebuffers[ctx->get_binding(target)];
2052   Renderbuffer& rb = ctx->renderbuffers[renderbuffer];
2053   if (attachment == GL_COLOR_ATTACHMENT0) {
2054     fb.color_attachment = rb.texture;
2055   } else if (attachment == GL_DEPTH_ATTACHMENT) {
2056     fb.depth_attachment = rb.texture;
2057   } else {
2058     assert(0);
2059   }
2060 }
2061 
2062 }  // extern "C"
2063 
get_framebuffer(GLenum target,bool fallback=false)2064 static inline Framebuffer* get_framebuffer(GLenum target,
2065                                            bool fallback = false) {
2066   if (target == GL_FRAMEBUFFER) {
2067     target = GL_DRAW_FRAMEBUFFER;
2068   }
2069   Framebuffer* fb = ctx->framebuffers.find(ctx->get_binding(target));
2070   if (fallback && !fb) {
2071     // If the specified framebuffer isn't found and a fallback is requested,
2072     // use the default framebuffer.
2073     fb = &ctx->framebuffers[0];
2074   }
2075   return fb;
2076 }
2077 
2078 template <typename T>
fill_n(T * dst,size_t n,T val)2079 static inline void fill_n(T* dst, size_t n, T val) {
2080   for (T* end = &dst[n]; dst < end; dst++) *dst = val;
2081 }
2082 
2083 #if USE_SSE2
2084 template <>
fill_n(uint32_t * dst,size_t n,uint32_t val)2085 inline void fill_n<uint32_t>(uint32_t* dst, size_t n, uint32_t val) {
2086   __asm__ __volatile__("rep stosl\n"
2087                        : "+D"(dst), "+c"(n)
2088                        : "a"(val)
2089                        : "memory", "cc");
2090 }
2091 #endif
2092 
clear_chunk(uint8_t value)2093 static inline uint32_t clear_chunk(uint8_t value) {
2094   return uint32_t(value) * 0x01010101U;
2095 }
2096 
clear_chunk(uint16_t value)2097 static inline uint32_t clear_chunk(uint16_t value) {
2098   return uint32_t(value) | (uint32_t(value) << 16);
2099 }
2100 
clear_chunk(uint32_t value)2101 static inline uint32_t clear_chunk(uint32_t value) { return value; }
2102 
2103 template <typename T>
clear_row(T * buf,size_t len,T value,uint32_t chunk)2104 static inline void clear_row(T* buf, size_t len, T value, uint32_t chunk) {
2105   const size_t N = sizeof(uint32_t) / sizeof(T);
2106   // fill any leading unaligned values
2107   if (N > 1) {
2108     size_t align = (-(intptr_t)buf & (sizeof(uint32_t) - 1)) / sizeof(T);
2109     if (align <= len) {
2110       fill_n(buf, align, value);
2111       len -= align;
2112       buf += align;
2113     }
2114   }
2115   // fill as many aligned chunks as possible
2116   fill_n((uint32_t*)buf, len / N, chunk);
2117   // fill any remaining values
2118   if (N > 1) {
2119     fill_n(buf + (len & ~(N - 1)), len & (N - 1), value);
2120   }
2121 }
2122 
2123 template <typename T>
clear_buffer(Texture & t,T value,IntRect bb,int skip_start=0,int skip_end=0)2124 static void clear_buffer(Texture& t, T value, IntRect bb, int skip_start = 0,
2125                          int skip_end = 0) {
2126   if (!t.buf) return;
2127   skip_start = max(skip_start, bb.x0);
2128   skip_end = max(skip_end, skip_start);
2129   assert(sizeof(T) == t.bpp());
2130   size_t stride = t.stride();
2131   // When clearing multiple full-width rows, collapse them into a single large
2132   // "row" to avoid redundant setup from clearing each row individually. Note
2133   // that we can only safely do this if the stride is tightly packed.
2134   if (bb.width() == t.width && bb.height() > 1 && skip_start >= skip_end &&
2135       (t.should_free() || stride == t.width * sizeof(T))) {
2136     bb.x1 += (stride / sizeof(T)) * (bb.height() - 1);
2137     bb.y1 = bb.y0 + 1;
2138   }
2139   T* buf = (T*)t.sample_ptr(bb.x0, bb.y0);
2140   uint32_t chunk = clear_chunk(value);
2141   for (int rows = bb.height(); rows > 0; rows--) {
2142     if (bb.x0 < skip_start) {
2143       clear_row(buf, skip_start - bb.x0, value, chunk);
2144     }
2145     if (skip_end < bb.x1) {
2146       clear_row(buf + (skip_end - bb.x0), bb.x1 - skip_end, value, chunk);
2147     }
2148     buf += stride / sizeof(T);
2149   }
2150 }
2151 
2152 template <typename T>
force_clear_row(Texture & t,int y,int skip_start=0,int skip_end=0)2153 static inline void force_clear_row(Texture& t, int y, int skip_start = 0,
2154                                    int skip_end = 0) {
2155   assert(t.buf != nullptr);
2156   assert(sizeof(T) == t.bpp());
2157   assert(skip_start <= skip_end);
2158   T* buf = (T*)t.sample_ptr(0, y);
2159   uint32_t chunk = clear_chunk((T)t.clear_val);
2160   if (skip_start > 0) {
2161     clear_row<T>(buf, skip_start, t.clear_val, chunk);
2162   }
2163   if (skip_end < t.width) {
2164     clear_row<T>(buf + skip_end, t.width - skip_end, t.clear_val, chunk);
2165   }
2166 }
2167 
2168 template <typename T>
force_clear(Texture & t,const IntRect * skip=nullptr)2169 static void force_clear(Texture& t, const IntRect* skip = nullptr) {
2170   if (!t.delay_clear || !t.cleared_rows) {
2171     return;
2172   }
2173   int y0 = 0;
2174   int y1 = t.height;
2175   int skip_start = 0;
2176   int skip_end = 0;
2177   if (skip) {
2178     y0 = clamp(skip->y0, 0, t.height);
2179     y1 = clamp(skip->y1, y0, t.height);
2180     skip_start = clamp(skip->x0, 0, t.width);
2181     skip_end = clamp(skip->x1, skip_start, t.width);
2182     if (skip_start <= 0 && skip_end >= t.width && y0 <= 0 && y1 >= t.height) {
2183       t.disable_delayed_clear();
2184       return;
2185     }
2186   }
2187   int num_masks = (y1 + 31) / 32;
2188   uint32_t* rows = t.cleared_rows;
2189   for (int i = y0 / 32; i < num_masks; i++) {
2190     uint32_t mask = rows[i];
2191     if (mask != ~0U) {
2192       rows[i] = ~0U;
2193       int start = i * 32;
2194       while (mask) {
2195         int count = __builtin_ctz(mask);
2196         if (count > 0) {
2197           clear_buffer<T>(t, t.clear_val,
2198                           IntRect{0, start, t.width, start + count}, skip_start,
2199                           skip_end);
2200           t.delay_clear -= count;
2201           start += count;
2202           mask >>= count;
2203         }
2204         count = __builtin_ctz(mask + 1);
2205         start += count;
2206         mask >>= count;
2207       }
2208       int count = (i + 1) * 32 - start;
2209       if (count > 0) {
2210         clear_buffer<T>(t, t.clear_val,
2211                         IntRect{0, start, t.width, start + count}, skip_start,
2212                         skip_end);
2213         t.delay_clear -= count;
2214       }
2215     }
2216   }
2217   if (t.delay_clear <= 0) t.disable_delayed_clear();
2218 }
2219 
prepare_texture(Texture & t,const IntRect * skip)2220 static void prepare_texture(Texture& t, const IntRect* skip) {
2221   if (t.delay_clear) {
2222     switch (t.internal_format) {
2223       case GL_RGBA8:
2224         force_clear<uint32_t>(t, skip);
2225         break;
2226       case GL_R8:
2227         force_clear<uint8_t>(t, skip);
2228         break;
2229       case GL_RG8:
2230         force_clear<uint16_t>(t, skip);
2231         break;
2232       default:
2233         assert(false);
2234         break;
2235     }
2236   }
2237 }
2238 
2239 // Setup a clear on a texture. This may either force an immediate clear or
2240 // potentially punt to a delayed clear, if applicable.
2241 template <typename T>
request_clear(Texture & t,T value,const IntRect & scissor)2242 static void request_clear(Texture& t, T value, const IntRect& scissor) {
2243   // If the clear would require a scissor, force clear anything outside
2244   // the scissor, and then immediately clear anything inside the scissor.
2245   if (!scissor.contains(t.offset_bounds())) {
2246     IntRect skip = scissor - t.offset;
2247     force_clear<T>(t, &skip);
2248     clear_buffer<T>(t, value, skip.intersection(t.bounds()));
2249   } else {
2250     // Do delayed clear for 2D texture without scissor.
2251     t.enable_delayed_clear(value);
2252   }
2253 }
2254 
2255 template <typename T>
request_clear(Texture & t,T value)2256 static inline void request_clear(Texture& t, T value) {
2257   // If scissoring is enabled, use the scissor rect. Otherwise, just scissor to
2258   // the entire texture bounds.
2259   request_clear(t, value, ctx->scissortest ? ctx->scissor : t.offset_bounds());
2260 }
2261 
2262 extern "C" {
2263 
InitDefaultFramebuffer(int x,int y,int width,int height,int stride,void * buf)2264 void InitDefaultFramebuffer(int x, int y, int width, int height, int stride,
2265                             void* buf) {
2266   Framebuffer& fb = ctx->framebuffers[0];
2267   if (!fb.color_attachment) {
2268     GenTextures(1, &fb.color_attachment);
2269   }
2270   // If the dimensions or buffer properties changed, we need to reallocate
2271   // the underlying storage for the color buffer texture.
2272   Texture& colortex = ctx->textures[fb.color_attachment];
2273   set_tex_storage(colortex, GL_RGBA8, width, height, buf, stride);
2274   colortex.offset = IntPoint(x, y);
2275   if (!fb.depth_attachment) {
2276     GenTextures(1, &fb.depth_attachment);
2277   }
2278   // Ensure dimensions of the depth buffer match the color buffer.
2279   Texture& depthtex = ctx->textures[fb.depth_attachment];
2280   set_tex_storage(depthtex, GL_DEPTH_COMPONENT24, width, height);
2281   depthtex.offset = IntPoint(x, y);
2282 }
2283 
GetColorBuffer(GLuint fbo,GLboolean flush,int32_t * width,int32_t * height,int32_t * stride)2284 void* GetColorBuffer(GLuint fbo, GLboolean flush, int32_t* width,
2285                      int32_t* height, int32_t* stride) {
2286   Framebuffer* fb = ctx->framebuffers.find(fbo);
2287   if (!fb || !fb->color_attachment) {
2288     return nullptr;
2289   }
2290   Texture& colortex = ctx->textures[fb->color_attachment];
2291   if (flush) {
2292     prepare_texture(colortex);
2293   }
2294   assert(colortex.offset == IntPoint(0, 0));
2295   if (width) {
2296     *width = colortex.width;
2297   }
2298   if (height) {
2299     *height = colortex.height;
2300   }
2301   if (stride) {
2302     *stride = colortex.stride();
2303   }
2304   return colortex.buf ? colortex.sample_ptr(0, 0) : nullptr;
2305 }
2306 
ResolveFramebuffer(GLuint fbo)2307 void ResolveFramebuffer(GLuint fbo) {
2308   Framebuffer* fb = ctx->framebuffers.find(fbo);
2309   if (!fb || !fb->color_attachment) {
2310     return;
2311   }
2312   Texture& colortex = ctx->textures[fb->color_attachment];
2313   prepare_texture(colortex);
2314 }
2315 
SetTextureBuffer(GLuint texid,GLenum internal_format,GLsizei width,GLsizei height,GLsizei stride,void * buf,GLsizei min_width,GLsizei min_height)2316 void SetTextureBuffer(GLuint texid, GLenum internal_format, GLsizei width,
2317                       GLsizei height, GLsizei stride, void* buf,
2318                       GLsizei min_width, GLsizei min_height) {
2319   Texture& t = ctx->textures[texid];
2320   set_tex_storage(t, internal_format, width, height, buf, stride, min_width,
2321                   min_height);
2322 }
2323 
CheckFramebufferStatus(GLenum target)2324 GLenum CheckFramebufferStatus(GLenum target) {
2325   Framebuffer* fb = get_framebuffer(target);
2326   if (!fb || !fb->color_attachment) {
2327     return GL_FRAMEBUFFER_UNSUPPORTED;
2328   }
2329   return GL_FRAMEBUFFER_COMPLETE;
2330 }
2331 
ClearTexSubImage(GLuint texture,GLint level,GLint xoffset,GLint yoffset,GLint zoffset,GLsizei width,GLsizei height,GLsizei depth,GLenum format,GLenum type,const void * data)2332 void ClearTexSubImage(GLuint texture, GLint level, GLint xoffset, GLint yoffset,
2333                       GLint zoffset, GLsizei width, GLsizei height,
2334                       GLsizei depth, GLenum format, GLenum type,
2335                       const void* data) {
2336   if (level != 0) {
2337     assert(false);
2338     return;
2339   }
2340   Texture& t = ctx->textures[texture];
2341   assert(!t.locked);
2342   if (width <= 0 || height <= 0 || depth <= 0) {
2343     return;
2344   }
2345   assert(zoffset == 0 && depth == 1);
2346   IntRect scissor = {xoffset, yoffset, xoffset + width, yoffset + height};
2347   if (t.internal_format == GL_DEPTH_COMPONENT24) {
2348     uint32_t value = 0xFFFFFF;
2349     switch (format) {
2350       case GL_DEPTH_COMPONENT:
2351         switch (type) {
2352           case GL_DOUBLE:
2353             value = uint32_t(*(const GLdouble*)data * 0xFFFFFF);
2354             break;
2355           case GL_FLOAT:
2356             value = uint32_t(*(const GLfloat*)data * 0xFFFFFF);
2357             break;
2358           default:
2359             assert(false);
2360             break;
2361         }
2362         break;
2363       default:
2364         assert(false);
2365         break;
2366     }
2367     if (t.cleared() && !scissor.contains(t.offset_bounds())) {
2368       // If we need to scissor the clear and the depth buffer was already
2369       // initialized, then just fill runs for that scissor area.
2370       t.fill_depth_runs(value, scissor);
2371     } else {
2372       // Otherwise, the buffer is either uninitialized or the clear would
2373       // encompass the entire buffer. If uninitialized, we can safely fill
2374       // the entire buffer with any value and thus ignore any scissoring.
2375       t.init_depth_runs(value);
2376     }
2377     return;
2378   }
2379 
2380   uint32_t color = 0xFF000000;
2381   switch (type) {
2382     case GL_FLOAT: {
2383       const GLfloat* f = (const GLfloat*)data;
2384       Float v = {0.0f, 0.0f, 0.0f, 1.0f};
2385       switch (format) {
2386         case GL_RGBA:
2387           v.w = f[3];  // alpha
2388           FALLTHROUGH;
2389         case GL_RGB:
2390           v.z = f[2];  // blue
2391           FALLTHROUGH;
2392         case GL_RG:
2393           v.y = f[1];  // green
2394           FALLTHROUGH;
2395         case GL_RED:
2396           v.x = f[0];  // red
2397           break;
2398         default:
2399           assert(false);
2400           break;
2401       }
2402       color = bit_cast<uint32_t>(CONVERT(round_pixel(v), U8));
2403       break;
2404     }
2405     case GL_UNSIGNED_BYTE: {
2406       const GLubyte* b = (const GLubyte*)data;
2407       switch (format) {
2408         case GL_RGBA:
2409           color = (color & ~0xFF000000) | (uint32_t(b[3]) << 24);  // alpha
2410           FALLTHROUGH;
2411         case GL_RGB:
2412           color = (color & ~0x00FF0000) | (uint32_t(b[2]) << 16);  // blue
2413           FALLTHROUGH;
2414         case GL_RG:
2415           color = (color & ~0x0000FF00) | (uint32_t(b[1]) << 8);  // green
2416           FALLTHROUGH;
2417         case GL_RED:
2418           color = (color & ~0x000000FF) | uint32_t(b[0]);  // red
2419           break;
2420         default:
2421           assert(false);
2422           break;
2423       }
2424       break;
2425     }
2426     default:
2427       assert(false);
2428       break;
2429   }
2430 
2431   switch (t.internal_format) {
2432     case GL_RGBA8:
2433       // Clear color needs to swizzle to BGRA.
2434       request_clear<uint32_t>(t,
2435                               (color & 0xFF00FF00) |
2436                                   ((color << 16) & 0xFF0000) |
2437                                   ((color >> 16) & 0xFF),
2438                               scissor);
2439       break;
2440     case GL_R8:
2441       request_clear<uint8_t>(t, uint8_t(color & 0xFF), scissor);
2442       break;
2443     case GL_RG8:
2444       request_clear<uint16_t>(t, uint16_t(color & 0xFFFF), scissor);
2445       break;
2446     default:
2447       assert(false);
2448       break;
2449   }
2450 }
2451 
ClearTexImage(GLuint texture,GLint level,GLenum format,GLenum type,const void * data)2452 void ClearTexImage(GLuint texture, GLint level, GLenum format, GLenum type,
2453                    const void* data) {
2454   Texture& t = ctx->textures[texture];
2455   IntRect scissor = t.offset_bounds();
2456   ClearTexSubImage(texture, level, scissor.x0, scissor.y0, 0, scissor.width(),
2457                    scissor.height(), 1, format, type, data);
2458 }
2459 
Clear(GLbitfield mask)2460 void Clear(GLbitfield mask) {
2461   Framebuffer& fb = *get_framebuffer(GL_DRAW_FRAMEBUFFER, true);
2462   if ((mask & GL_COLOR_BUFFER_BIT) && fb.color_attachment) {
2463     Texture& t = ctx->textures[fb.color_attachment];
2464     IntRect scissor = ctx->scissortest
2465                           ? ctx->scissor.intersection(t.offset_bounds())
2466                           : t.offset_bounds();
2467     ClearTexSubImage(fb.color_attachment, 0, scissor.x0, scissor.y0, 0,
2468                      scissor.width(), scissor.height(), 1, GL_RGBA, GL_FLOAT,
2469                      ctx->clearcolor);
2470   }
2471   if ((mask & GL_DEPTH_BUFFER_BIT) && fb.depth_attachment) {
2472     Texture& t = ctx->textures[fb.depth_attachment];
2473     IntRect scissor = ctx->scissortest
2474                           ? ctx->scissor.intersection(t.offset_bounds())
2475                           : t.offset_bounds();
2476     ClearTexSubImage(fb.depth_attachment, 0, scissor.x0, scissor.y0, 0,
2477                      scissor.width(), scissor.height(), 1, GL_DEPTH_COMPONENT,
2478                      GL_DOUBLE, &ctx->cleardepth);
2479   }
2480 }
2481 
ClearColorRect(GLuint fbo,GLint xoffset,GLint yoffset,GLsizei width,GLsizei height,GLfloat r,GLfloat g,GLfloat b,GLfloat a)2482 void ClearColorRect(GLuint fbo, GLint xoffset, GLint yoffset, GLsizei width,
2483                     GLsizei height, GLfloat r, GLfloat g, GLfloat b,
2484                     GLfloat a) {
2485   GLfloat color[] = {r, g, b, a};
2486   Framebuffer& fb = ctx->framebuffers[fbo];
2487   Texture& t = ctx->textures[fb.color_attachment];
2488   IntRect scissor =
2489       IntRect{xoffset, yoffset, xoffset + width, yoffset + height}.intersection(
2490           t.offset_bounds());
2491   ClearTexSubImage(fb.color_attachment, 0, scissor.x0, scissor.y0, 0,
2492                    scissor.width(), scissor.height(), 1, GL_RGBA, GL_FLOAT,
2493                    color);
2494 }
2495 
InvalidateFramebuffer(GLenum target,GLsizei num_attachments,const GLenum * attachments)2496 void InvalidateFramebuffer(GLenum target, GLsizei num_attachments,
2497                            const GLenum* attachments) {
2498   Framebuffer* fb = get_framebuffer(target);
2499   if (!fb || num_attachments <= 0 || !attachments) {
2500     return;
2501   }
2502   for (GLsizei i = 0; i < num_attachments; i++) {
2503     switch (attachments[i]) {
2504       case GL_DEPTH_ATTACHMENT: {
2505         Texture& t = ctx->textures[fb->depth_attachment];
2506         t.set_cleared(false);
2507         break;
2508       }
2509       case GL_COLOR_ATTACHMENT0: {
2510         Texture& t = ctx->textures[fb->color_attachment];
2511         t.disable_delayed_clear();
2512         break;
2513       }
2514     }
2515   }
2516 }
2517 
ReadPixels(GLint x,GLint y,GLsizei width,GLsizei height,GLenum format,GLenum type,void * data)2518 void ReadPixels(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format,
2519                 GLenum type, void* data) {
2520   data = get_pixel_pack_buffer_data(data);
2521   if (!data) return;
2522   Framebuffer* fb = get_framebuffer(GL_READ_FRAMEBUFFER);
2523   if (!fb) return;
2524   assert(format == GL_RED || format == GL_RGBA || format == GL_RGBA_INTEGER ||
2525          format == GL_BGRA || format == GL_RG);
2526   Texture& t = ctx->textures[fb->color_attachment];
2527   if (!t.buf) return;
2528   prepare_texture(t);
2529   // debugf("read pixels %d, %d, %d, %d from fb %d with format %x\n", x, y,
2530   // width, height, ctx->read_framebuffer_binding, t.internal_format);
2531   x -= t.offset.x;
2532   y -= t.offset.y;
2533   assert(x >= 0 && y >= 0);
2534   assert(x + width <= t.width);
2535   assert(y + height <= t.height);
2536   if (internal_format_for_data(format, type) != t.internal_format) {
2537     debugf("mismatched format for read pixels: %x vs %x\n", t.internal_format,
2538            internal_format_for_data(format, type));
2539     assert(false);
2540     return;
2541   }
2542   // Only support readback conversions that are reversible
2543   assert(!format_requires_conversion(format, t.internal_format) ||
2544          bytes_for_internal_format(format) == t.bpp());
2545   uint8_t* dest = (uint8_t*)data;
2546   size_t destStride = width * t.bpp();
2547   if (y < 0) {
2548     dest += -y * destStride;
2549     height += y;
2550     y = 0;
2551   }
2552   if (y + height > t.height) {
2553     height = t.height - y;
2554   }
2555   if (x < 0) {
2556     dest += -x * t.bpp();
2557     width += x;
2558     x = 0;
2559   }
2560   if (x + width > t.width) {
2561     width = t.width - x;
2562   }
2563   if (width <= 0 || height <= 0) {
2564     return;
2565   }
2566   convert_copy(format, t.internal_format, dest, destStride,
2567                (const uint8_t*)t.sample_ptr(x, y), t.stride(), width, height);
2568 }
2569 
CopyImageSubData(GLuint srcName,GLenum srcTarget,UNUSED GLint srcLevel,GLint srcX,GLint srcY,GLint srcZ,GLuint dstName,GLenum dstTarget,UNUSED GLint dstLevel,GLint dstX,GLint dstY,GLint dstZ,GLsizei srcWidth,GLsizei srcHeight,GLsizei srcDepth)2570 void CopyImageSubData(GLuint srcName, GLenum srcTarget, UNUSED GLint srcLevel,
2571                       GLint srcX, GLint srcY, GLint srcZ, GLuint dstName,
2572                       GLenum dstTarget, UNUSED GLint dstLevel, GLint dstX,
2573                       GLint dstY, GLint dstZ, GLsizei srcWidth,
2574                       GLsizei srcHeight, GLsizei srcDepth) {
2575   assert(srcLevel == 0 && dstLevel == 0);
2576   assert(srcZ == 0 && srcDepth == 1 && dstZ == 0);
2577   if (srcTarget == GL_RENDERBUFFER) {
2578     Renderbuffer& rb = ctx->renderbuffers[srcName];
2579     srcName = rb.texture;
2580   }
2581   if (dstTarget == GL_RENDERBUFFER) {
2582     Renderbuffer& rb = ctx->renderbuffers[dstName];
2583     dstName = rb.texture;
2584   }
2585   Texture& srctex = ctx->textures[srcName];
2586   if (!srctex.buf) return;
2587   prepare_texture(srctex);
2588   Texture& dsttex = ctx->textures[dstName];
2589   if (!dsttex.buf) return;
2590   assert(!dsttex.locked);
2591   IntRect skip = {dstX, dstY, dstX + srcWidth, dstY + srcHeight};
2592   prepare_texture(dsttex, &skip);
2593   assert(srctex.internal_format == dsttex.internal_format);
2594   assert(srcWidth >= 0);
2595   assert(srcHeight >= 0);
2596   assert(srcX + srcWidth <= srctex.width);
2597   assert(srcY + srcHeight <= srctex.height);
2598   assert(dstX + srcWidth <= dsttex.width);
2599   assert(dstY + srcHeight <= dsttex.height);
2600   int bpp = srctex.bpp();
2601   int src_stride = srctex.stride();
2602   int dest_stride = dsttex.stride();
2603   char* dest = dsttex.sample_ptr(dstX, dstY);
2604   char* src = srctex.sample_ptr(srcX, srcY);
2605   for (int y = 0; y < srcHeight; y++) {
2606     memcpy(dest, src, srcWidth * bpp);
2607     dest += dest_stride;
2608     src += src_stride;
2609   }
2610 }
2611 
CopyTexSubImage2D(GLenum target,UNUSED GLint level,GLint xoffset,GLint yoffset,GLint x,GLint y,GLsizei width,GLsizei height)2612 void CopyTexSubImage2D(GLenum target, UNUSED GLint level, GLint xoffset,
2613                        GLint yoffset, GLint x, GLint y, GLsizei width,
2614                        GLsizei height) {
2615   assert(level == 0);
2616   Framebuffer* fb = get_framebuffer(GL_READ_FRAMEBUFFER);
2617   if (!fb) return;
2618   CopyImageSubData(fb->color_attachment, GL_TEXTURE_2D, 0, x, y, 0,
2619                    ctx->get_binding(target), GL_TEXTURE_2D, 0, xoffset, yoffset,
2620                    0, width, height, 1);
2621 }
2622 
2623 }  // extern "C"
2624 
2625 #include "blend.h"
2626 #include "composite.h"
2627 #include "swgl_ext.h"
2628 
2629 #pragma GCC diagnostic push
2630 #pragma GCC diagnostic ignored "-Wuninitialized"
2631 #pragma GCC diagnostic ignored "-Wunused-function"
2632 #pragma GCC diagnostic ignored "-Wunused-parameter"
2633 #pragma GCC diagnostic ignored "-Wunused-variable"
2634 #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
2635 #ifdef __clang__
2636 #  pragma GCC diagnostic ignored "-Wunused-private-field"
2637 #else
2638 #  pragma GCC diagnostic ignored "-Wunused-but-set-variable"
2639 #endif
2640 #include "load_shader.h"
2641 #pragma GCC diagnostic pop
2642 
2643 #include "rasterize.h"
2644 
validate()2645 void VertexArray::validate() {
2646   int last_enabled = -1;
2647   for (int i = 0; i <= max_attrib; i++) {
2648     VertexAttrib& attr = attribs[i];
2649     if (attr.enabled) {
2650       // VertexArray &v = ctx->vertex_arrays[attr.vertex_array];
2651       Buffer& vertex_buf = ctx->buffers[attr.vertex_buffer];
2652       attr.buf = vertex_buf.buf;
2653       attr.buf_size = vertex_buf.size;
2654       // debugf("%d %x %d %d %d %d\n", i, attr.type, attr.size, attr.stride,
2655       // attr.offset, attr.divisor);
2656       last_enabled = i;
2657     }
2658   }
2659   max_attrib = last_enabled;
2660 }
2661 
2662 extern "C" {
2663 
DrawElementsInstanced(GLenum mode,GLsizei count,GLenum type,GLintptr offset,GLsizei instancecount)2664 void DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type,
2665                            GLintptr offset, GLsizei instancecount) {
2666   if (offset < 0 || count <= 0 || instancecount <= 0 || !vertex_shader ||
2667       !fragment_shader) {
2668     return;
2669   }
2670 
2671   Framebuffer& fb = *get_framebuffer(GL_DRAW_FRAMEBUFFER, true);
2672   if (!fb.color_attachment) {
2673     return;
2674   }
2675   Texture& colortex = ctx->textures[fb.color_attachment];
2676   if (!colortex.buf) {
2677     return;
2678   }
2679   assert(!colortex.locked);
2680   assert(colortex.internal_format == GL_RGBA8 ||
2681          colortex.internal_format == GL_R8);
2682   Texture& depthtex = ctx->textures[ctx->depthtest ? fb.depth_attachment : 0];
2683   if (depthtex.buf) {
2684     assert(depthtex.internal_format == GL_DEPTH_COMPONENT24);
2685     assert(colortex.width == depthtex.width &&
2686            colortex.height == depthtex.height);
2687     assert(colortex.offset == depthtex.offset);
2688   }
2689 
2690   // debugf("current_vertex_array %d\n", ctx->current_vertex_array);
2691   // debugf("indices size: %d\n", indices_buf.size);
2692   VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
2693   if (ctx->validate_vertex_array) {
2694     ctx->validate_vertex_array = false;
2695     v.validate();
2696   }
2697 
2698 #ifdef PRINT_TIMINGS
2699   uint64_t start = get_time_value();
2700 #endif
2701 
2702   ctx->shaded_rows = 0;
2703   ctx->shaded_pixels = 0;
2704 
2705   vertex_shader->init_batch();
2706 
2707   switch (type) {
2708     case GL_UNSIGNED_SHORT:
2709       assert(mode == GL_TRIANGLES);
2710       draw_elements<uint16_t>(count, instancecount, offset, v, colortex,
2711                               depthtex);
2712       break;
2713     case GL_UNSIGNED_INT:
2714       assert(mode == GL_TRIANGLES);
2715       draw_elements<uint32_t>(count, instancecount, offset, v, colortex,
2716                               depthtex);
2717       break;
2718     case GL_NONE:
2719       // Non-standard GL extension - if element type is GL_NONE, then we don't
2720       // use any element buffer and behave as if DrawArrays was called instead.
2721       for (GLsizei instance = 0; instance < instancecount; instance++) {
2722         switch (mode) {
2723           case GL_LINES:
2724             for (GLsizei i = 0; i + 2 <= count; i += 2) {
2725               vertex_shader->load_attribs(v.attribs, offset + i, instance, 2);
2726               draw_quad(2, colortex, depthtex);
2727             }
2728             break;
2729           case GL_TRIANGLES:
2730             for (GLsizei i = 0; i + 3 <= count; i += 3) {
2731               vertex_shader->load_attribs(v.attribs, offset + i, instance, 3);
2732               draw_quad(3, colortex, depthtex);
2733             }
2734             break;
2735           default:
2736             assert(false);
2737             break;
2738         }
2739       }
2740       break;
2741     default:
2742       assert(false);
2743       break;
2744   }
2745 
2746   if (ctx->samples_passed_query) {
2747     Query& q = ctx->queries[ctx->samples_passed_query];
2748     q.value += ctx->shaded_pixels;
2749   }
2750 
2751 #ifdef PRINT_TIMINGS
2752   uint64_t end = get_time_value();
2753   printf(
2754       "%7.3fms draw(%s, %d): %d pixels in %d rows (avg %f pixels/row, "
2755       "%fns/pixel)\n",
2756       double(end - start) / (1000. * 1000.),
2757       ctx->programs[ctx->current_program].impl->get_name(), instancecount,
2758       ctx->shaded_pixels, ctx->shaded_rows,
2759       double(ctx->shaded_pixels) / ctx->shaded_rows,
2760       double(end - start) / max(ctx->shaded_pixels, 1));
2761 #endif
2762 }
2763 
Finish()2764 void Finish() {
2765 #ifdef PRINT_TIMINGS
2766   printf("Finish\n");
2767 #endif
2768 }
2769 
MakeCurrent(Context * c)2770 void MakeCurrent(Context* c) {
2771   if (ctx == c) {
2772     return;
2773   }
2774   ctx = c;
2775   setup_program(ctx ? ctx->current_program : 0);
2776 }
2777 
CreateContext()2778 Context* CreateContext() { return new Context; }
2779 
ReferenceContext(Context * c)2780 void ReferenceContext(Context* c) {
2781   if (!c) {
2782     return;
2783   }
2784   ++c->references;
2785 }
2786 
DestroyContext(Context * c)2787 void DestroyContext(Context* c) {
2788   if (!c) {
2789     return;
2790   }
2791   assert(c->references > 0);
2792   --c->references;
2793   if (c->references > 0) {
2794     return;
2795   }
2796   if (ctx == c) {
2797     MakeCurrent(nullptr);
2798   }
2799   delete c;
2800 }
2801 
ReportMemory(Context * ctx,size_t (* size_of_op)(void *))2802 size_t ReportMemory(Context* ctx, size_t (*size_of_op)(void*)) {
2803   size_t size = 0;
2804   if (ctx) {
2805     for (auto& t : ctx->textures) {
2806       if (t && t->should_free()) {
2807         size += size_of_op(t->buf);
2808       }
2809     }
2810   }
2811   return size;
2812 }
2813 }  // extern "C"
2814