1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 #include <stdlib.h>
6 #include <stdint.h>
7 #include <string.h>
8 #include <assert.h>
9 #include <stdio.h>
10 #include <math.h>
11
12 #ifdef __MACH__
13 # include <mach/mach.h>
14 # include <mach/mach_time.h>
15 #else
16 # include <time.h>
17 #endif
18
19 #ifdef NDEBUG
20 # define debugf(...)
21 #else
22 # define debugf(...) printf(__VA_ARGS__)
23 #endif
24
25 // #define PRINT_TIMINGS
26
27 #ifdef _WIN32
28 # define ALWAYS_INLINE __forceinline
29 # define NO_INLINE __declspec(noinline)
30
31 // Including Windows.h brings a huge amount of namespace polution so just
32 // define a couple of things manually
33 typedef int BOOL;
34 # define WINAPI __stdcall
35 # define DECLSPEC_IMPORT __declspec(dllimport)
36 # define WINBASEAPI DECLSPEC_IMPORT
37 typedef unsigned long DWORD;
38 typedef long LONG;
39 typedef __int64 LONGLONG;
40 # define DUMMYSTRUCTNAME
41
42 typedef union _LARGE_INTEGER {
43 struct {
44 DWORD LowPart;
45 LONG HighPart;
46 } DUMMYSTRUCTNAME;
47 struct {
48 DWORD LowPart;
49 LONG HighPart;
50 } u;
51 LONGLONG QuadPart;
52 } LARGE_INTEGER;
53 extern "C" {
54 WINBASEAPI BOOL WINAPI
55 QueryPerformanceCounter(LARGE_INTEGER* lpPerformanceCount);
56
57 WINBASEAPI BOOL WINAPI QueryPerformanceFrequency(LARGE_INTEGER* lpFrequency);
58 }
59
60 #else
61 // GCC is slower when dealing with always_inline, especially in debug builds.
62 // When using Clang, use always_inline more aggressively.
63 # if defined(__clang__) || defined(NDEBUG)
64 # define ALWAYS_INLINE __attribute__((always_inline)) inline
65 # else
66 # define ALWAYS_INLINE inline
67 # endif
68 # define NO_INLINE __attribute__((noinline))
69 #endif
70
71 // Some functions may cause excessive binary bloat if inlined in debug or with
72 // GCC builds, so use PREFER_INLINE on these instead of ALWAYS_INLINE.
73 #if defined(__clang__) && defined(NDEBUG)
74 # define PREFER_INLINE ALWAYS_INLINE
75 #else
76 # define PREFER_INLINE inline
77 #endif
78
79 #define UNREACHABLE __builtin_unreachable()
80
81 #define UNUSED [[maybe_unused]]
82
83 #define FALLTHROUGH [[fallthrough]]
84
85 #ifdef MOZILLA_CLIENT
86 # define IMPLICIT __attribute__((annotate("moz_implicit")))
87 #else
88 # define IMPLICIT
89 #endif
90
91 #include "gl_defs.h"
92 #include "glsl.h"
93 #include "program.h"
94 #include "texture.h"
95
96 using namespace glsl;
97
98 typedef ivec2_scalar IntPoint;
99
100 struct IntRect {
101 int x0;
102 int y0;
103 int x1;
104 int y1;
105
IntRectIntRect106 IntRect() : x0(0), y0(0), x1(0), y1(0) {}
IntRectIntRect107 IntRect(int x0, int y0, int x1, int y1) : x0(x0), y0(y0), x1(x1), y1(y1) {}
IntRectIntRect108 IntRect(IntPoint origin, IntPoint size)
109 : x0(origin.x),
110 y0(origin.y),
111 x1(origin.x + size.x),
112 y1(origin.y + size.y) {}
113
widthIntRect114 int width() const { return x1 - x0; }
heightIntRect115 int height() const { return y1 - y0; }
is_emptyIntRect116 bool is_empty() const { return width() <= 0 || height() <= 0; }
117
originIntRect118 IntPoint origin() const { return IntPoint(x0, y0); }
119
same_sizeIntRect120 bool same_size(const IntRect& o) const {
121 return width() == o.width() && height() == o.height();
122 }
123
containsIntRect124 bool contains(const IntRect& o) const {
125 return o.x0 >= x0 && o.y0 >= y0 && o.x1 <= x1 && o.y1 <= y1;
126 }
127
intersectIntRect128 IntRect& intersect(const IntRect& o) {
129 x0 = max(x0, o.x0);
130 y0 = max(y0, o.y0);
131 x1 = min(x1, o.x1);
132 y1 = min(y1, o.y1);
133 return *this;
134 }
135
intersectionIntRect136 IntRect intersection(const IntRect& o) {
137 IntRect result = *this;
138 result.intersect(o);
139 return result;
140 }
141
142 // Scale from source-space to dest-space, optionally rounding inward
scaleIntRect143 IntRect& scale(int srcWidth, int srcHeight, int dstWidth, int dstHeight,
144 bool roundIn = false) {
145 x0 = (x0 * dstWidth + (roundIn ? srcWidth - 1 : 0)) / srcWidth;
146 y0 = (y0 * dstHeight + (roundIn ? srcHeight - 1 : 0)) / srcHeight;
147 x1 = (x1 * dstWidth) / srcWidth;
148 y1 = (y1 * dstHeight) / srcHeight;
149 return *this;
150 }
151
152 // Flip the rect's Y coords around inflection point at Y=offset
invert_yIntRect153 void invert_y(int offset) {
154 y0 = offset - y0;
155 y1 = offset - y1;
156 swap(y0, y1);
157 }
158
offsetIntRect159 IntRect& offset(const IntPoint& o) {
160 x0 += o.x;
161 y0 += o.y;
162 x1 += o.x;
163 y1 += o.y;
164 return *this;
165 }
166
operator +IntRect167 IntRect operator+(const IntPoint& o) const {
168 return IntRect(*this).offset(o);
169 }
operator -IntRect170 IntRect operator-(const IntPoint& o) const {
171 return IntRect(*this).offset(-o);
172 }
173 };
174
175 typedef vec2_scalar Point2D;
176 typedef vec4_scalar Point3D;
177
178 struct IntRange {
179 int start;
180 int end;
181
lenIntRange182 int len() const { return end - start; }
183
intersectIntRange184 IntRange intersect(IntRange r) const {
185 return {max(start, r.start), min(end, r.end)};
186 }
187 };
188
189 struct FloatRange {
190 float start;
191 float end;
192
clipFloatRange193 float clip(float x) const { return clamp(x, start, end); }
194
clipFloatRange195 FloatRange clip(FloatRange r) const { return {clip(r.start), clip(r.end)}; }
196
mergeFloatRange197 FloatRange merge(FloatRange r) const {
198 return {min(start, r.start), max(end, r.end)};
199 }
200
roundFloatRange201 IntRange round() const {
202 return {int(floor(start + 0.5f)), int(floor(end + 0.5f))};
203 }
204
round_outFloatRange205 IntRange round_out() const { return {int(floor(start)), int(ceil(end))}; }
206 };
207
208 template <typename P>
x_range(P p0,P p1)209 static inline FloatRange x_range(P p0, P p1) {
210 return {min(p0.x, p1.x), max(p0.x, p1.x)};
211 }
212
213 struct VertexAttrib {
214 size_t size = 0; // in bytes
215 GLenum type = 0;
216 bool normalized = false;
217 GLsizei stride = 0;
218 GLuint offset = 0;
219 bool enabled = false;
220 GLuint divisor = 0;
221 int vertex_array = 0;
222 int vertex_buffer = 0;
223 char* buf = nullptr; // XXX: this can easily dangle
224 size_t buf_size = 0; // this will let us bounds check
225 };
226
bytes_for_internal_format(GLenum internal_format)227 static int bytes_for_internal_format(GLenum internal_format) {
228 switch (internal_format) {
229 case GL_RGBA32F:
230 return 4 * 4;
231 case GL_RGBA32I:
232 return 4 * 4;
233 case GL_RGBA8:
234 case GL_BGRA8:
235 case GL_RGBA:
236 return 4;
237 case GL_R8:
238 case GL_RED:
239 return 1;
240 case GL_RG8:
241 case GL_RG:
242 return 2;
243 case GL_DEPTH_COMPONENT:
244 case GL_DEPTH_COMPONENT16:
245 case GL_DEPTH_COMPONENT24:
246 case GL_DEPTH_COMPONENT32:
247 return 4;
248 case GL_RGB_RAW_422_APPLE:
249 return 2;
250 case GL_R16:
251 return 2;
252 default:
253 debugf("internal format: %x\n", internal_format);
254 assert(0);
255 return 0;
256 }
257 }
258
aligned_stride(int row_bytes)259 static inline int aligned_stride(int row_bytes) { return (row_bytes + 3) & ~3; }
260
gl_format_to_texture_format(int type)261 static TextureFormat gl_format_to_texture_format(int type) {
262 switch (type) {
263 case GL_RGBA32F:
264 return TextureFormat::RGBA32F;
265 case GL_RGBA32I:
266 return TextureFormat::RGBA32I;
267 case GL_RGBA8:
268 return TextureFormat::RGBA8;
269 case GL_R8:
270 return TextureFormat::R8;
271 case GL_RG8:
272 return TextureFormat::RG8;
273 case GL_R16:
274 return TextureFormat::R16;
275 case GL_RGB_RAW_422_APPLE:
276 return TextureFormat::YUV422;
277 default:
278 assert(0);
279 return TextureFormat::RGBA8;
280 }
281 }
282
283 struct Query {
284 uint64_t value = 0;
285 };
286
287 struct Buffer {
288 char* buf = nullptr;
289 size_t size = 0;
290 size_t capacity = 0;
291
allocateBuffer292 bool allocate(size_t new_size) {
293 // If the size remains unchanged, don't allocate anything.
294 if (new_size == size) {
295 return false;
296 }
297 // If the new size is within the existing capacity of the buffer, just
298 // reuse the existing buffer.
299 if (new_size <= capacity) {
300 size = new_size;
301 return true;
302 }
303 // Otherwise we need to reallocate the buffer to hold up to the requested
304 // larger size.
305 char* new_buf = (char*)realloc(buf, new_size);
306 assert(new_buf);
307 if (!new_buf) {
308 // If we fail, null out the buffer rather than leave around the old
309 // allocation state.
310 cleanup();
311 return false;
312 }
313 // The reallocation succeeded, so install the buffer.
314 buf = new_buf;
315 size = new_size;
316 capacity = new_size;
317 return true;
318 }
319
cleanupBuffer320 void cleanup() {
321 if (buf) {
322 free(buf);
323 buf = nullptr;
324 size = 0;
325 capacity = 0;
326 }
327 }
328
~BufferBuffer329 ~Buffer() { cleanup(); }
330 };
331
332 struct Framebuffer {
333 GLuint color_attachment = 0;
334 GLuint depth_attachment = 0;
335 };
336
337 struct Renderbuffer {
338 GLuint texture = 0;
339
340 void on_erase();
341 };
342
gl_filter_to_texture_filter(int type)343 TextureFilter gl_filter_to_texture_filter(int type) {
344 switch (type) {
345 case GL_NEAREST:
346 return TextureFilter::NEAREST;
347 case GL_NEAREST_MIPMAP_LINEAR:
348 return TextureFilter::NEAREST;
349 case GL_NEAREST_MIPMAP_NEAREST:
350 return TextureFilter::NEAREST;
351 case GL_LINEAR:
352 return TextureFilter::LINEAR;
353 case GL_LINEAR_MIPMAP_LINEAR:
354 return TextureFilter::LINEAR;
355 case GL_LINEAR_MIPMAP_NEAREST:
356 return TextureFilter::LINEAR;
357 default:
358 assert(0);
359 return TextureFilter::NEAREST;
360 }
361 }
362
363 struct Texture {
364 GLenum internal_format = 0;
365 int width = 0;
366 int height = 0;
367 char* buf = nullptr;
368 size_t buf_size = 0;
369 uint32_t buf_stride = 0;
370 uint8_t buf_bpp = 0;
371 GLenum min_filter = GL_NEAREST;
372 GLenum mag_filter = GL_LINEAR;
373 // The number of active locks on this texture. If this texture has any active
374 // locks, we need to disallow modifying or destroying the texture as it may
375 // be accessed by other threads where modifications could lead to races.
376 int32_t locked = 0;
377 // When used as an attachment of a framebuffer, rendering to the texture
378 // behaves as if it is located at the given offset such that the offset is
379 // subtracted from all transformed vertexes after the viewport is applied.
380 IntPoint offset;
381
382 enum FLAGS {
383 // If the buffer is internally-allocated by SWGL
384 SHOULD_FREE = 1 << 1,
385 // If the buffer has been cleared to initialize it. Currently this is only
386 // utilized by depth buffers which need to know when depth runs have reset
387 // to a valid row state. When unset, the depth runs may contain garbage.
388 CLEARED = 1 << 2,
389 };
390 int flags = SHOULD_FREE;
should_freeTexture391 bool should_free() const { return bool(flags & SHOULD_FREE); }
clearedTexture392 bool cleared() const { return bool(flags & CLEARED); }
393
set_flagTexture394 void set_flag(int flag, bool val) {
395 if (val) {
396 flags |= flag;
397 } else {
398 flags &= ~flag;
399 }
400 }
set_should_freeTexture401 void set_should_free(bool val) {
402 // buf must be null before SHOULD_FREE can be safely toggled. Otherwise, we
403 // might accidentally mistakenly realloc an externally allocated buffer as
404 // if it were an internally allocated one.
405 assert(!buf);
406 set_flag(SHOULD_FREE, val);
407 }
set_clearedTexture408 void set_cleared(bool val) { set_flag(CLEARED, val); }
409
410 // Delayed-clearing state. When a clear of an FB is requested, we don't
411 // immediately clear each row, as the rows may be subsequently overwritten
412 // by draw calls, allowing us to skip the work of clearing the affected rows
413 // either fully or partially. Instead, we keep a bit vector of rows that need
414 // to be cleared later and save the value they need to be cleared with so
415 // that we can clear these rows individually when they are touched by draws.
416 // This currently only works for 2D textures, but not on texture arrays.
417 int delay_clear = 0;
418 uint32_t clear_val = 0;
419 uint32_t* cleared_rows = nullptr;
420
421 void init_depth_runs(uint32_t z);
422 void fill_depth_runs(uint32_t z, const IntRect& scissor);
423
enable_delayed_clearTexture424 void enable_delayed_clear(uint32_t val) {
425 delay_clear = height;
426 clear_val = val;
427 if (!cleared_rows) {
428 cleared_rows = new uint32_t[(height + 31) / 32];
429 }
430 memset(cleared_rows, 0, ((height + 31) / 32) * sizeof(uint32_t));
431 if (height & 31) {
432 cleared_rows[height / 32] = ~0U << (height & 31);
433 }
434 }
435
disable_delayed_clearTexture436 void disable_delayed_clear() {
437 if (cleared_rows) {
438 delete[] cleared_rows;
439 cleared_rows = nullptr;
440 delay_clear = 0;
441 }
442 }
443
bppTexture444 int bpp() const { return buf_bpp; }
set_bppTexture445 void set_bpp() { buf_bpp = bytes_for_internal_format(internal_format); }
446
strideTexture447 size_t stride() const { return buf_stride; }
set_strideTexture448 void set_stride() { buf_stride = aligned_stride(buf_bpp * width); }
449
450 // Set an external backing buffer of this texture.
set_bufferTexture451 void set_buffer(void* new_buf, size_t new_stride) {
452 assert(!should_free());
453 // Ensure that the supplied stride is at least as big as the row data and
454 // is aligned to the smaller of either the BPP or word-size. We need to at
455 // least be able to sample data from within a row and sample whole pixels
456 // of smaller formats without risking unaligned access.
457 set_bpp();
458 set_stride();
459 assert(new_stride >= size_t(bpp() * width) &&
460 new_stride % min(bpp(), sizeof(uint32_t)) == 0);
461
462 buf = (char*)new_buf;
463 buf_size = 0;
464 buf_stride = new_stride;
465 }
466
allocateTexture467 bool allocate(bool force = false, int min_width = 0, int min_height = 0) {
468 assert(!locked); // Locked textures shouldn't be reallocated
469 // If we get here, some GL API call that invalidates the texture was used.
470 // Mark the buffer as not-cleared to signal this.
471 set_cleared(false);
472 // Check if there is either no buffer currently or if we forced validation
473 // of the buffer size because some dimension might have changed.
474 if ((!buf || force) && should_free()) {
475 // Initialize the buffer's BPP and stride, since they may have changed.
476 set_bpp();
477 set_stride();
478 // Compute new size based on the maximum potential stride, rather than
479 // the current stride, to hopefully avoid reallocations when size would
480 // otherwise change too much...
481 size_t max_stride = max(buf_stride, aligned_stride(buf_bpp * min_width));
482 size_t size = max_stride * max(height, min_height);
483 if ((!buf && size > 0) || size > buf_size) {
484 // Allocate with a SIMD register-sized tail of padding at the end so we
485 // can safely read or write past the end of the texture with SIMD ops.
486 // Currently only the flat Z-buffer texture needs this padding due to
487 // full-register loads and stores in check_depth and discard_depth. In
488 // case some code in the future accidentally uses a linear filter on a
489 // texture with less than 2 pixels per row, we also add this padding
490 // just to be safe. All other texture types and use-cases should be
491 // safe to omit padding.
492 size_t padding =
493 internal_format == GL_DEPTH_COMPONENT24 || max(width, min_width) < 2
494 ? sizeof(Float)
495 : 0;
496 char* new_buf = (char*)realloc(buf, size + padding);
497 assert(new_buf);
498 if (new_buf) {
499 // Successfully reallocated the buffer, so go ahead and set it.
500 buf = new_buf;
501 buf_size = size;
502 return true;
503 }
504 // Allocation failed, so ensure we don't leave stale buffer state.
505 cleanup();
506 }
507 }
508 // Nothing changed...
509 return false;
510 }
511
cleanupTexture512 void cleanup() {
513 assert(!locked); // Locked textures shouldn't be destroyed
514 if (buf) {
515 // If we need to toggle SHOULD_FREE state, ensure that buf is nulled out,
516 // regardless of whether we internally allocated it. This will prevent us
517 // from wrongly treating buf as having been internally allocated for when
518 // we go to realloc if it actually was externally allocted.
519 if (should_free()) {
520 free(buf);
521 }
522 buf = nullptr;
523 buf_size = 0;
524 buf_bpp = 0;
525 buf_stride = 0;
526 }
527 disable_delayed_clear();
528 }
529
~TextureTexture530 ~Texture() { cleanup(); }
531
boundsTexture532 IntRect bounds() const { return IntRect{0, 0, width, height}; }
offset_boundsTexture533 IntRect offset_bounds() const { return bounds() + offset; }
534
535 // Find the valid sampling bounds relative to the requested region
sample_boundsTexture536 IntRect sample_bounds(const IntRect& req, bool invertY = false) const {
537 IntRect bb = bounds().intersect(req) - req.origin();
538 if (invertY) bb.invert_y(req.height());
539 return bb;
540 }
541
542 // Get a pointer for sampling at the given offset
sample_ptrTexture543 char* sample_ptr(int x, int y) const {
544 return buf + y * stride() + x * bpp();
545 }
546
547 // Get a pointer for sampling the requested region and limit to the provided
548 // sampling bounds
sample_ptrTexture549 char* sample_ptr(const IntRect& req, const IntRect& bounds,
550 bool invertY = false) const {
551 // Offset the sample pointer by the clamped bounds
552 int x = req.x0 + bounds.x0;
553 // Invert the Y offset if necessary
554 int y = invertY ? req.y1 - 1 - bounds.y0 : req.y0 + bounds.y0;
555 return sample_ptr(x, y);
556 }
557 };
558
559 // The last vertex attribute is reserved as a null attribute in case a vertex
560 // attribute is used without being set.
561 #define MAX_ATTRIBS 17
562 #define NULL_ATTRIB 16
563 struct VertexArray {
564 VertexAttrib attribs[MAX_ATTRIBS];
565 int max_attrib = -1;
566 // The GL spec defines element array buffer binding to be part of VAO state.
567 GLuint element_array_buffer_binding = 0;
568
569 void validate();
570 };
571
572 struct Shader {
573 GLenum type = 0;
574 ProgramLoader loader = nullptr;
575 };
576
577 struct Program {
578 ProgramImpl* impl = nullptr;
579 VertexShaderImpl* vert_impl = nullptr;
580 FragmentShaderImpl* frag_impl = nullptr;
581 bool deleted = false;
582
~ProgramProgram583 ~Program() { delete impl; }
584 };
585
586 // clang-format off
587 // Fully-expand GL defines while ignoring more than 4 suffixes
588 #define CONCAT_KEY(prefix, x, y, z, w, ...) prefix##x##y##z##w
589 // Generate a blend key enum symbol
590 #define BLEND_KEY(...) CONCAT_KEY(BLEND_, __VA_ARGS__, 0, 0, 0)
591 #define MASK_BLEND_KEY(...) CONCAT_KEY(MASK_BLEND_, __VA_ARGS__, 0, 0, 0)
592 #define AA_BLEND_KEY(...) CONCAT_KEY(AA_BLEND_, __VA_ARGS__, 0, 0, 0)
593 #define AA_MASK_BLEND_KEY(...) CONCAT_KEY(AA_MASK_BLEND_, __VA_ARGS__, 0, 0, 0)
594
595 // Utility macro to easily generate similar code for all implemented blend modes
596 #define FOR_EACH_BLEND_KEY(macro) \
597 macro(GL_ONE, GL_ZERO, 0, 0) \
598 macro(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA) \
599 macro(GL_ONE, GL_ONE_MINUS_SRC_ALPHA, 0, 0) \
600 macro(GL_ZERO, GL_ONE_MINUS_SRC_COLOR, 0, 0) \
601 macro(GL_ZERO, GL_ONE_MINUS_SRC_COLOR, GL_ZERO, GL_ONE) \
602 macro(GL_ZERO, GL_ONE_MINUS_SRC_ALPHA, 0, 0) \
603 macro(GL_ZERO, GL_SRC_COLOR, 0, 0) \
604 macro(GL_ONE, GL_ONE, 0, 0) \
605 macro(GL_ONE, GL_ONE, GL_ONE, GL_ONE_MINUS_SRC_ALPHA) \
606 macro(GL_ONE_MINUS_DST_ALPHA, GL_ONE, GL_ZERO, GL_ONE) \
607 macro(GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_COLOR, 0, 0) \
608 macro(GL_ONE, GL_ONE_MINUS_SRC1_COLOR, 0, 0) \
609 macro(GL_MIN, 0, 0, 0) \
610 macro(GL_MAX, 0, 0, 0) \
611 macro(GL_MULTIPLY_KHR, 0, 0, 0) \
612 macro(GL_SCREEN_KHR, 0, 0, 0) \
613 macro(GL_OVERLAY_KHR, 0, 0, 0) \
614 macro(GL_DARKEN_KHR, 0, 0, 0) \
615 macro(GL_LIGHTEN_KHR, 0, 0, 0) \
616 macro(GL_COLORDODGE_KHR, 0, 0, 0) \
617 macro(GL_COLORBURN_KHR, 0, 0, 0) \
618 macro(GL_HARDLIGHT_KHR, 0, 0, 0) \
619 macro(GL_SOFTLIGHT_KHR, 0, 0, 0) \
620 macro(GL_DIFFERENCE_KHR, 0, 0, 0) \
621 macro(GL_EXCLUSION_KHR, 0, 0, 0) \
622 macro(GL_HSL_HUE_KHR, 0, 0, 0) \
623 macro(GL_HSL_SATURATION_KHR, 0, 0, 0) \
624 macro(GL_HSL_COLOR_KHR, 0, 0, 0) \
625 macro(GL_HSL_LUMINOSITY_KHR, 0, 0, 0) \
626 macro(SWGL_BLEND_DROP_SHADOW, 0, 0, 0) \
627 macro(SWGL_BLEND_SUBPIXEL_TEXT, 0, 0, 0)
628
629 #define DEFINE_BLEND_KEY(...) BLEND_KEY(__VA_ARGS__),
630 #define DEFINE_MASK_BLEND_KEY(...) MASK_BLEND_KEY(__VA_ARGS__),
631 #define DEFINE_AA_BLEND_KEY(...) AA_BLEND_KEY(__VA_ARGS__),
632 #define DEFINE_AA_MASK_BLEND_KEY(...) AA_MASK_BLEND_KEY(__VA_ARGS__),
633 enum BlendKey : uint8_t {
634 FOR_EACH_BLEND_KEY(DEFINE_BLEND_KEY)
635 FOR_EACH_BLEND_KEY(DEFINE_MASK_BLEND_KEY)
636 FOR_EACH_BLEND_KEY(DEFINE_AA_BLEND_KEY)
637 FOR_EACH_BLEND_KEY(DEFINE_AA_MASK_BLEND_KEY)
638 BLEND_KEY_NONE = BLEND_KEY(GL_ONE, GL_ZERO),
639 MASK_BLEND_KEY_NONE = MASK_BLEND_KEY(GL_ONE, GL_ZERO),
640 AA_BLEND_KEY_NONE = AA_BLEND_KEY(GL_ONE, GL_ZERO),
641 AA_MASK_BLEND_KEY_NONE = AA_MASK_BLEND_KEY(GL_ONE, GL_ZERO),
642 };
643 // clang-format on
644
645 const size_t MAX_TEXTURE_UNITS = 16;
646
647 template <typename T>
unlink(T & binding,T n)648 static inline bool unlink(T& binding, T n) {
649 if (binding == n) {
650 binding = 0;
651 return true;
652 }
653 return false;
654 }
655
656 template <typename O>
657 struct ObjectStore {
658 O** objects = nullptr;
659 size_t size = 0;
660 // reserve object 0 as null
661 size_t first_free = 1;
662 O invalid;
663
~ObjectStoreObjectStore664 ~ObjectStore() {
665 if (objects) {
666 for (size_t i = 0; i < size; i++) delete objects[i];
667 free(objects);
668 }
669 }
670
growObjectStore671 bool grow(size_t i) {
672 size_t new_size = size ? size : 8;
673 while (new_size <= i) new_size += new_size / 2;
674 O** new_objects = (O**)realloc(objects, new_size * sizeof(O*));
675 assert(new_objects);
676 if (!new_objects) return false;
677 while (size < new_size) new_objects[size++] = nullptr;
678 objects = new_objects;
679 return true;
680 }
681
insertObjectStore682 void insert(size_t i, const O& o) {
683 if (i >= size && !grow(i)) return;
684 if (!objects[i]) objects[i] = new O(o);
685 }
686
next_freeObjectStore687 size_t next_free() {
688 size_t i = first_free;
689 while (i < size && objects[i]) i++;
690 first_free = i;
691 return i;
692 }
693
insertObjectStore694 size_t insert(const O& o = O()) {
695 size_t i = next_free();
696 insert(i, o);
697 return i;
698 }
699
operator []ObjectStore700 O& operator[](size_t i) {
701 insert(i, O());
702 return i < size ? *objects[i] : invalid;
703 }
704
findObjectStore705 O* find(size_t i) const { return i < size ? objects[i] : nullptr; }
706
707 template <typename T>
on_eraseObjectStore708 void on_erase(T*, ...) {}
709 template <typename T>
on_eraseObjectStore710 void on_erase(T* o, decltype(&T::on_erase)) {
711 o->on_erase();
712 }
713
eraseObjectStore714 bool erase(size_t i) {
715 if (i < size && objects[i]) {
716 on_erase(objects[i], nullptr);
717 delete objects[i];
718 objects[i] = nullptr;
719 if (i < first_free) first_free = i;
720 return true;
721 }
722 return false;
723 }
724
beginObjectStore725 O** begin() const { return objects; }
endObjectStore726 O** end() const { return &objects[size]; }
727 };
728
729 struct Context {
730 int32_t references = 1;
731
732 ObjectStore<Query> queries;
733 ObjectStore<Buffer> buffers;
734 ObjectStore<Texture> textures;
735 ObjectStore<VertexArray> vertex_arrays;
736 ObjectStore<Framebuffer> framebuffers;
737 ObjectStore<Renderbuffer> renderbuffers;
738 ObjectStore<Shader> shaders;
739 ObjectStore<Program> programs;
740
741 IntRect viewport = {0, 0, 0, 0};
742
743 bool blend = false;
744 GLenum blendfunc_srgb = GL_ONE;
745 GLenum blendfunc_drgb = GL_ZERO;
746 GLenum blendfunc_sa = GL_ONE;
747 GLenum blendfunc_da = GL_ZERO;
748 GLenum blend_equation = GL_FUNC_ADD;
749 V8<uint16_t> blendcolor = 0;
750 BlendKey blend_key = BLEND_KEY_NONE;
751
752 bool depthtest = false;
753 bool depthmask = true;
754 GLenum depthfunc = GL_LESS;
755
756 bool scissortest = false;
757 IntRect scissor = {0, 0, 0, 0};
758
759 GLfloat clearcolor[4] = {0, 0, 0, 0};
760 GLdouble cleardepth = 1;
761
762 int unpack_row_length = 0;
763
764 int shaded_rows = 0;
765 int shaded_pixels = 0;
766
767 struct TextureUnit {
768 GLuint texture_2d_binding = 0;
769 GLuint texture_rectangle_binding = 0;
770
unlinkContext::TextureUnit771 void unlink(GLuint n) {
772 ::unlink(texture_2d_binding, n);
773 ::unlink(texture_rectangle_binding, n);
774 }
775 };
776 TextureUnit texture_units[MAX_TEXTURE_UNITS];
777 int active_texture_unit = 0;
778
779 GLuint current_program = 0;
780
781 GLuint current_vertex_array = 0;
782 bool validate_vertex_array = true;
783
784 GLuint pixel_pack_buffer_binding = 0;
785 GLuint pixel_unpack_buffer_binding = 0;
786 GLuint array_buffer_binding = 0;
787 GLuint time_elapsed_query = 0;
788 GLuint samples_passed_query = 0;
789 GLuint renderbuffer_binding = 0;
790 GLuint draw_framebuffer_binding = 0;
791 GLuint read_framebuffer_binding = 0;
792 GLuint unknown_binding = 0;
793
get_bindingContext794 GLuint& get_binding(GLenum name) {
795 switch (name) {
796 case GL_PIXEL_PACK_BUFFER:
797 return pixel_pack_buffer_binding;
798 case GL_PIXEL_UNPACK_BUFFER:
799 return pixel_unpack_buffer_binding;
800 case GL_ARRAY_BUFFER:
801 return array_buffer_binding;
802 case GL_ELEMENT_ARRAY_BUFFER:
803 return vertex_arrays[current_vertex_array].element_array_buffer_binding;
804 case GL_TEXTURE_2D:
805 return texture_units[active_texture_unit].texture_2d_binding;
806 case GL_TEXTURE_RECTANGLE:
807 return texture_units[active_texture_unit].texture_rectangle_binding;
808 case GL_TIME_ELAPSED:
809 return time_elapsed_query;
810 case GL_SAMPLES_PASSED:
811 return samples_passed_query;
812 case GL_RENDERBUFFER:
813 return renderbuffer_binding;
814 case GL_DRAW_FRAMEBUFFER:
815 return draw_framebuffer_binding;
816 case GL_READ_FRAMEBUFFER:
817 return read_framebuffer_binding;
818 default:
819 debugf("unknown binding %x\n", name);
820 assert(false);
821 return unknown_binding;
822 }
823 }
824
get_textureContext825 Texture& get_texture(sampler2D, int unit) {
826 return textures[texture_units[unit].texture_2d_binding];
827 }
828
get_textureContext829 Texture& get_texture(isampler2D, int unit) {
830 return textures[texture_units[unit].texture_2d_binding];
831 }
832
get_textureContext833 Texture& get_texture(sampler2DRect, int unit) {
834 return textures[texture_units[unit].texture_rectangle_binding];
835 }
836
apply_scissorContext837 IntRect apply_scissor(IntRect bb,
838 const IntPoint& origin = IntPoint(0, 0)) const {
839 return scissortest ? bb.intersect(scissor - origin) : bb;
840 }
841
apply_scissorContext842 IntRect apply_scissor(const Texture& t) const {
843 return apply_scissor(t.bounds(), t.offset);
844 }
845 };
846 static Context* ctx = nullptr;
847 static VertexShaderImpl* vertex_shader = nullptr;
848 static FragmentShaderImpl* fragment_shader = nullptr;
849 static BlendKey blend_key = BLEND_KEY_NONE;
850
851 static void prepare_texture(Texture& t, const IntRect* skip = nullptr);
852
853 template <typename S>
init_filter(S * s,Texture & t)854 static inline void init_filter(S* s, Texture& t) {
855 // If the width is not at least 2 pixels, then we can't safely sample the end
856 // of the row with a linear filter. In that case, just punt to using nearest
857 // filtering instead.
858 s->filter = t.width >= 2 ? gl_filter_to_texture_filter(t.mag_filter)
859 : TextureFilter::NEAREST;
860 }
861
862 template <typename S>
init_sampler(S * s,Texture & t)863 static inline void init_sampler(S* s, Texture& t) {
864 prepare_texture(t);
865 s->width = t.width;
866 s->height = t.height;
867 s->stride = t.stride();
868 int bpp = t.bpp();
869 if (bpp >= 4)
870 s->stride /= 4;
871 else if (bpp == 2)
872 s->stride /= 2;
873 else
874 assert(bpp == 1);
875 // Use uint32_t* for easier sampling, but need to cast to uint8_t* or
876 // uint16_t* for formats with bpp < 4.
877 s->buf = (uint32_t*)t.buf;
878 s->format = gl_format_to_texture_format(t.internal_format);
879 }
880
881 template <typename S>
null_sampler(S * s)882 static inline void null_sampler(S* s) {
883 // For null texture data, just make the sampler provide a 1x1 buffer that is
884 // transparent black. Ensure buffer holds at least a SIMD vector of zero data
885 // for SIMD padding of unaligned loads.
886 static const uint32_t zeroBuf[sizeof(Float) / sizeof(uint32_t)] = {0};
887 s->width = 1;
888 s->height = 1;
889 s->stride = s->width;
890 s->buf = (uint32_t*)zeroBuf;
891 s->format = TextureFormat::RGBA8;
892 }
893
894 template <typename S>
null_filter(S * s)895 static inline void null_filter(S* s) {
896 s->filter = TextureFilter::NEAREST;
897 }
898
899 template <typename S>
lookup_sampler(S * s,int texture)900 S* lookup_sampler(S* s, int texture) {
901 Texture& t = ctx->get_texture(s, texture);
902 if (!t.buf) {
903 null_sampler(s);
904 null_filter(s);
905 } else {
906 init_sampler(s, t);
907 init_filter(s, t);
908 }
909 return s;
910 }
911
912 template <typename S>
lookup_isampler(S * s,int texture)913 S* lookup_isampler(S* s, int texture) {
914 Texture& t = ctx->get_texture(s, texture);
915 if (!t.buf) {
916 null_sampler(s);
917 } else {
918 init_sampler(s, t);
919 }
920 return s;
921 }
922
bytes_per_type(GLenum type)923 int bytes_per_type(GLenum type) {
924 switch (type) {
925 case GL_INT:
926 return 4;
927 case GL_FLOAT:
928 return 4;
929 case GL_UNSIGNED_SHORT:
930 return 2;
931 case GL_UNSIGNED_BYTE:
932 return 1;
933 default:
934 assert(0);
935 return 0;
936 }
937 }
938
939 template <typename S, typename C>
expand_attrib(const char * buf,size_t size,bool normalized)940 static inline S expand_attrib(const char* buf, size_t size, bool normalized) {
941 typedef typename ElementType<S>::ty elem_type;
942 S scalar = {0};
943 const C* src = reinterpret_cast<const C*>(buf);
944 if (normalized) {
945 const float scale = 1.0f / ((1 << (8 * sizeof(C))) - 1);
946 for (size_t i = 0; i < size / sizeof(C); i++) {
947 put_nth_component(scalar, i, elem_type(src[i]) * scale);
948 }
949 } else {
950 for (size_t i = 0; i < size / sizeof(C); i++) {
951 put_nth_component(scalar, i, elem_type(src[i]));
952 }
953 }
954 return scalar;
955 }
956
957 template <typename S>
load_attrib_scalar(VertexAttrib & va,const char * src)958 static inline S load_attrib_scalar(VertexAttrib& va, const char* src) {
959 if (sizeof(S) <= va.size) {
960 return *reinterpret_cast<const S*>(src);
961 }
962 if (va.type == GL_UNSIGNED_SHORT) {
963 return expand_attrib<S, uint16_t>(src, va.size, va.normalized);
964 }
965 if (va.type == GL_UNSIGNED_BYTE) {
966 return expand_attrib<S, uint8_t>(src, va.size, va.normalized);
967 }
968 assert(sizeof(typename ElementType<S>::ty) == bytes_per_type(va.type));
969 S scalar = {0};
970 memcpy(&scalar, src, va.size);
971 return scalar;
972 }
973
974 template <typename T>
load_attrib(T & attrib,VertexAttrib & va,uint32_t start,int instance,int count)975 void load_attrib(T& attrib, VertexAttrib& va, uint32_t start, int instance,
976 int count) {
977 typedef decltype(force_scalar(attrib)) scalar_type;
978 if (!va.enabled) {
979 attrib = T(scalar_type{0});
980 } else if (va.divisor != 0) {
981 char* src = (char*)va.buf + va.stride * instance + va.offset;
982 assert(src + va.size <= va.buf + va.buf_size);
983 attrib = T(load_attrib_scalar<scalar_type>(va, src));
984 } else {
985 // Specialized for WR's primitive vertex order/winding.
986 if (!count) return;
987 assert(count >= 2 && count <= 4);
988 char* src = (char*)va.buf + va.stride * start + va.offset;
989 switch (count) {
990 case 2: {
991 // Lines must be indexed at offsets 0, 1.
992 // Line vertexes fill vertex shader SIMD lanes as 0, 1, 1, 0.
993 scalar_type lanes[2] = {
994 load_attrib_scalar<scalar_type>(va, src),
995 load_attrib_scalar<scalar_type>(va, src + va.stride)};
996 attrib = (T){lanes[0], lanes[1], lanes[1], lanes[0]};
997 break;
998 }
999 case 3: {
1000 // Triangles must be indexed at offsets 0, 1, 2.
1001 // Triangle vertexes fill vertex shader SIMD lanes as 0, 1, 2, 2.
1002 scalar_type lanes[3] = {
1003 load_attrib_scalar<scalar_type>(va, src),
1004 load_attrib_scalar<scalar_type>(va, src + va.stride),
1005 load_attrib_scalar<scalar_type>(va, src + va.stride * 2)};
1006 attrib = (T){lanes[0], lanes[1], lanes[2], lanes[2]};
1007 break;
1008 }
1009 default:
1010 // Quads must be successive triangles indexed at offsets 0, 1, 2, 2,
1011 // 1, 3. Quad vertexes fill vertex shader SIMD lanes as 0, 1, 3, 2, so
1012 // that the points form a convex path that can be traversed by the
1013 // rasterizer.
1014 attrib = (T){load_attrib_scalar<scalar_type>(va, src),
1015 load_attrib_scalar<scalar_type>(va, src + va.stride),
1016 load_attrib_scalar<scalar_type>(va, src + va.stride * 3),
1017 load_attrib_scalar<scalar_type>(va, src + va.stride * 2)};
1018 break;
1019 }
1020 }
1021 }
1022
1023 template <typename T>
load_flat_attrib(T & attrib,VertexAttrib & va,uint32_t start,int instance,int count)1024 void load_flat_attrib(T& attrib, VertexAttrib& va, uint32_t start, int instance,
1025 int count) {
1026 typedef decltype(force_scalar(attrib)) scalar_type;
1027 if (!va.enabled) {
1028 attrib = T{0};
1029 return;
1030 }
1031 char* src = nullptr;
1032 if (va.divisor != 0) {
1033 src = (char*)va.buf + va.stride * instance + va.offset;
1034 } else {
1035 if (!count) return;
1036 src = (char*)va.buf + va.stride * start + va.offset;
1037 }
1038 assert(src + va.size <= va.buf + va.buf_size);
1039 attrib = T(load_attrib_scalar<scalar_type>(va, src));
1040 }
1041
setup_program(GLuint program)1042 void setup_program(GLuint program) {
1043 if (!program) {
1044 vertex_shader = nullptr;
1045 fragment_shader = nullptr;
1046 return;
1047 }
1048 Program& p = ctx->programs[program];
1049 assert(p.impl);
1050 assert(p.vert_impl);
1051 assert(p.frag_impl);
1052 vertex_shader = p.vert_impl;
1053 fragment_shader = p.frag_impl;
1054 }
1055
1056 extern ProgramLoader load_shader(const char* name);
1057
1058 extern "C" {
1059
UseProgram(GLuint program)1060 void UseProgram(GLuint program) {
1061 if (ctx->current_program && program != ctx->current_program) {
1062 auto* p = ctx->programs.find(ctx->current_program);
1063 if (p && p->deleted) {
1064 ctx->programs.erase(ctx->current_program);
1065 }
1066 }
1067 ctx->current_program = program;
1068 setup_program(program);
1069 }
1070
SetViewport(GLint x,GLint y,GLsizei width,GLsizei height)1071 void SetViewport(GLint x, GLint y, GLsizei width, GLsizei height) {
1072 ctx->viewport = IntRect{x, y, x + width, y + height};
1073 }
1074
Enable(GLenum cap)1075 void Enable(GLenum cap) {
1076 switch (cap) {
1077 case GL_BLEND:
1078 ctx->blend = true;
1079 break;
1080 case GL_DEPTH_TEST:
1081 ctx->depthtest = true;
1082 break;
1083 case GL_SCISSOR_TEST:
1084 ctx->scissortest = true;
1085 break;
1086 }
1087 }
1088
Disable(GLenum cap)1089 void Disable(GLenum cap) {
1090 switch (cap) {
1091 case GL_BLEND:
1092 ctx->blend = false;
1093 break;
1094 case GL_DEPTH_TEST:
1095 ctx->depthtest = false;
1096 break;
1097 case GL_SCISSOR_TEST:
1098 ctx->scissortest = false;
1099 break;
1100 }
1101 }
1102
GetError()1103 GLenum GetError() { return GL_NO_ERROR; }
1104
1105 static const char* const extensions[] = {
1106 "GL_ARB_blend_func_extended",
1107 "GL_ARB_clear_texture",
1108 "GL_ARB_copy_image",
1109 "GL_ARB_draw_instanced",
1110 "GL_ARB_explicit_attrib_location",
1111 "GL_ARB_instanced_arrays",
1112 "GL_ARB_invalidate_subdata",
1113 "GL_ARB_texture_storage",
1114 "GL_EXT_timer_query",
1115 "GL_KHR_blend_equation_advanced",
1116 "GL_KHR_blend_equation_advanced_coherent",
1117 "GL_APPLE_rgb_422",
1118 };
1119
GetIntegerv(GLenum pname,GLint * params)1120 void GetIntegerv(GLenum pname, GLint* params) {
1121 assert(params);
1122 switch (pname) {
1123 case GL_MAX_TEXTURE_UNITS:
1124 case GL_MAX_TEXTURE_IMAGE_UNITS:
1125 params[0] = MAX_TEXTURE_UNITS;
1126 break;
1127 case GL_MAX_TEXTURE_SIZE:
1128 params[0] = 1 << 15;
1129 break;
1130 case GL_MAX_ARRAY_TEXTURE_LAYERS:
1131 params[0] = 0;
1132 break;
1133 case GL_READ_FRAMEBUFFER_BINDING:
1134 params[0] = ctx->read_framebuffer_binding;
1135 break;
1136 case GL_DRAW_FRAMEBUFFER_BINDING:
1137 params[0] = ctx->draw_framebuffer_binding;
1138 break;
1139 case GL_PIXEL_PACK_BUFFER_BINDING:
1140 params[0] = ctx->pixel_pack_buffer_binding;
1141 break;
1142 case GL_PIXEL_UNPACK_BUFFER_BINDING:
1143 params[0] = ctx->pixel_unpack_buffer_binding;
1144 break;
1145 case GL_NUM_EXTENSIONS:
1146 params[0] = sizeof(extensions) / sizeof(extensions[0]);
1147 break;
1148 case GL_MAJOR_VERSION:
1149 params[0] = 3;
1150 break;
1151 case GL_MINOR_VERSION:
1152 params[0] = 2;
1153 break;
1154 case GL_MIN_PROGRAM_TEXEL_OFFSET:
1155 params[0] = 0;
1156 break;
1157 case GL_MAX_PROGRAM_TEXEL_OFFSET:
1158 params[0] = MAX_TEXEL_OFFSET;
1159 break;
1160 default:
1161 debugf("unhandled glGetIntegerv parameter %x\n", pname);
1162 assert(false);
1163 }
1164 }
1165
GetBooleanv(GLenum pname,GLboolean * params)1166 void GetBooleanv(GLenum pname, GLboolean* params) {
1167 assert(params);
1168 switch (pname) {
1169 case GL_DEPTH_WRITEMASK:
1170 params[0] = ctx->depthmask;
1171 break;
1172 default:
1173 debugf("unhandled glGetBooleanv parameter %x\n", pname);
1174 assert(false);
1175 }
1176 }
1177
GetString(GLenum name)1178 const char* GetString(GLenum name) {
1179 switch (name) {
1180 case GL_VENDOR:
1181 return "Mozilla Gfx";
1182 case GL_RENDERER:
1183 return "Software WebRender";
1184 case GL_VERSION:
1185 return "3.2";
1186 case GL_SHADING_LANGUAGE_VERSION:
1187 return "1.50";
1188 default:
1189 debugf("unhandled glGetString parameter %x\n", name);
1190 assert(false);
1191 return nullptr;
1192 }
1193 }
1194
GetStringi(GLenum name,GLuint index)1195 const char* GetStringi(GLenum name, GLuint index) {
1196 switch (name) {
1197 case GL_EXTENSIONS:
1198 if (index >= sizeof(extensions) / sizeof(extensions[0])) {
1199 return nullptr;
1200 }
1201 return extensions[index];
1202 default:
1203 debugf("unhandled glGetStringi parameter %x\n", name);
1204 assert(false);
1205 return nullptr;
1206 }
1207 }
1208
remap_blendfunc(GLenum rgb,GLenum a)1209 GLenum remap_blendfunc(GLenum rgb, GLenum a) {
1210 switch (a) {
1211 case GL_SRC_ALPHA:
1212 if (rgb == GL_SRC_COLOR) a = GL_SRC_COLOR;
1213 break;
1214 case GL_ONE_MINUS_SRC_ALPHA:
1215 if (rgb == GL_ONE_MINUS_SRC_COLOR) a = GL_ONE_MINUS_SRC_COLOR;
1216 break;
1217 case GL_DST_ALPHA:
1218 if (rgb == GL_DST_COLOR) a = GL_DST_COLOR;
1219 break;
1220 case GL_ONE_MINUS_DST_ALPHA:
1221 if (rgb == GL_ONE_MINUS_DST_COLOR) a = GL_ONE_MINUS_DST_COLOR;
1222 break;
1223 case GL_CONSTANT_ALPHA:
1224 if (rgb == GL_CONSTANT_COLOR) a = GL_CONSTANT_COLOR;
1225 break;
1226 case GL_ONE_MINUS_CONSTANT_ALPHA:
1227 if (rgb == GL_ONE_MINUS_CONSTANT_COLOR) a = GL_ONE_MINUS_CONSTANT_COLOR;
1228 break;
1229 case GL_SRC_COLOR:
1230 if (rgb == GL_SRC_ALPHA) a = GL_SRC_ALPHA;
1231 break;
1232 case GL_ONE_MINUS_SRC_COLOR:
1233 if (rgb == GL_ONE_MINUS_SRC_ALPHA) a = GL_ONE_MINUS_SRC_ALPHA;
1234 break;
1235 case GL_DST_COLOR:
1236 if (rgb == GL_DST_ALPHA) a = GL_DST_ALPHA;
1237 break;
1238 case GL_ONE_MINUS_DST_COLOR:
1239 if (rgb == GL_ONE_MINUS_DST_ALPHA) a = GL_ONE_MINUS_DST_ALPHA;
1240 break;
1241 case GL_CONSTANT_COLOR:
1242 if (rgb == GL_CONSTANT_ALPHA) a = GL_CONSTANT_ALPHA;
1243 break;
1244 case GL_ONE_MINUS_CONSTANT_COLOR:
1245 if (rgb == GL_ONE_MINUS_CONSTANT_ALPHA) a = GL_ONE_MINUS_CONSTANT_ALPHA;
1246 break;
1247 case GL_SRC1_ALPHA:
1248 if (rgb == GL_SRC1_COLOR) a = GL_SRC1_COLOR;
1249 break;
1250 case GL_ONE_MINUS_SRC1_ALPHA:
1251 if (rgb == GL_ONE_MINUS_SRC1_COLOR) a = GL_ONE_MINUS_SRC1_COLOR;
1252 break;
1253 case GL_SRC1_COLOR:
1254 if (rgb == GL_SRC1_ALPHA) a = GL_SRC1_ALPHA;
1255 break;
1256 case GL_ONE_MINUS_SRC1_COLOR:
1257 if (rgb == GL_ONE_MINUS_SRC1_ALPHA) a = GL_ONE_MINUS_SRC1_ALPHA;
1258 break;
1259 }
1260 return a;
1261 }
1262
1263 // Generate a hashed blend key based on blend func and equation state. This
1264 // allows all the blend state to be processed down to a blend key that can be
1265 // dealt with inside a single switch statement.
hash_blend_key()1266 static void hash_blend_key() {
1267 GLenum srgb = ctx->blendfunc_srgb;
1268 GLenum drgb = ctx->blendfunc_drgb;
1269 GLenum sa = ctx->blendfunc_sa;
1270 GLenum da = ctx->blendfunc_da;
1271 GLenum equation = ctx->blend_equation;
1272 #define HASH_BLEND_KEY(x, y, z, w) ((x << 4) | (y) | (z << 24) | (w << 20))
1273 // Basic non-separate blend funcs used the two argument form
1274 int hash = HASH_BLEND_KEY(srgb, drgb, 0, 0);
1275 // Separate alpha blend funcs use the 4 argument hash
1276 if (srgb != sa || drgb != da) hash |= HASH_BLEND_KEY(0, 0, sa, da);
1277 // Any other blend equation than the default func_add ignores the func and
1278 // instead generates a one-argument hash based on the equation
1279 if (equation != GL_FUNC_ADD) hash = HASH_BLEND_KEY(equation, 0, 0, 0);
1280 switch (hash) {
1281 #define MAP_BLEND_KEY(...) \
1282 case HASH_BLEND_KEY(__VA_ARGS__): \
1283 ctx->blend_key = BLEND_KEY(__VA_ARGS__); \
1284 break;
1285 FOR_EACH_BLEND_KEY(MAP_BLEND_KEY)
1286 default:
1287 debugf("blendfunc: %x, %x, separate: %x, %x, equation: %x\n", srgb, drgb,
1288 sa, da, equation);
1289 assert(false);
1290 break;
1291 }
1292 }
1293
BlendFunc(GLenum srgb,GLenum drgb,GLenum sa,GLenum da)1294 void BlendFunc(GLenum srgb, GLenum drgb, GLenum sa, GLenum da) {
1295 ctx->blendfunc_srgb = srgb;
1296 ctx->blendfunc_drgb = drgb;
1297 sa = remap_blendfunc(srgb, sa);
1298 da = remap_blendfunc(drgb, da);
1299 ctx->blendfunc_sa = sa;
1300 ctx->blendfunc_da = da;
1301
1302 hash_blend_key();
1303 }
1304
BlendColor(GLfloat r,GLfloat g,GLfloat b,GLfloat a)1305 void BlendColor(GLfloat r, GLfloat g, GLfloat b, GLfloat a) {
1306 I32 c = round_pixel((Float){b, g, r, a});
1307 ctx->blendcolor = CONVERT(c, U16).xyzwxyzw;
1308 }
1309
BlendEquation(GLenum mode)1310 void BlendEquation(GLenum mode) {
1311 assert(mode == GL_FUNC_ADD || mode == GL_MIN || mode == GL_MAX ||
1312 (mode >= GL_MULTIPLY_KHR && mode <= GL_HSL_LUMINOSITY_KHR));
1313 if (mode != ctx->blend_equation) {
1314 ctx->blend_equation = mode;
1315 hash_blend_key();
1316 }
1317 }
1318
DepthMask(GLboolean flag)1319 void DepthMask(GLboolean flag) { ctx->depthmask = flag; }
1320
DepthFunc(GLenum func)1321 void DepthFunc(GLenum func) {
1322 switch (func) {
1323 case GL_LESS:
1324 case GL_LEQUAL:
1325 break;
1326 default:
1327 assert(false);
1328 }
1329 ctx->depthfunc = func;
1330 }
1331
SetScissor(GLint x,GLint y,GLsizei width,GLsizei height)1332 void SetScissor(GLint x, GLint y, GLsizei width, GLsizei height) {
1333 ctx->scissor = IntRect{x, y, x + width, y + height};
1334 }
1335
ClearColor(GLfloat r,GLfloat g,GLfloat b,GLfloat a)1336 void ClearColor(GLfloat r, GLfloat g, GLfloat b, GLfloat a) {
1337 ctx->clearcolor[0] = r;
1338 ctx->clearcolor[1] = g;
1339 ctx->clearcolor[2] = b;
1340 ctx->clearcolor[3] = a;
1341 }
1342
ClearDepth(GLdouble depth)1343 void ClearDepth(GLdouble depth) { ctx->cleardepth = depth; }
1344
ActiveTexture(GLenum texture)1345 void ActiveTexture(GLenum texture) {
1346 assert(texture >= GL_TEXTURE0);
1347 assert(texture < GL_TEXTURE0 + MAX_TEXTURE_UNITS);
1348 ctx->active_texture_unit =
1349 clamp(int(texture - GL_TEXTURE0), 0, int(MAX_TEXTURE_UNITS - 1));
1350 }
1351
GenQueries(GLsizei n,GLuint * result)1352 void GenQueries(GLsizei n, GLuint* result) {
1353 for (int i = 0; i < n; i++) {
1354 Query q;
1355 result[i] = ctx->queries.insert(q);
1356 }
1357 }
1358
DeleteQuery(GLuint n)1359 void DeleteQuery(GLuint n) {
1360 if (n && ctx->queries.erase(n)) {
1361 unlink(ctx->time_elapsed_query, n);
1362 unlink(ctx->samples_passed_query, n);
1363 }
1364 }
1365
GenBuffers(int n,GLuint * result)1366 void GenBuffers(int n, GLuint* result) {
1367 for (int i = 0; i < n; i++) {
1368 Buffer b;
1369 result[i] = ctx->buffers.insert(b);
1370 }
1371 }
1372
DeleteBuffer(GLuint n)1373 void DeleteBuffer(GLuint n) {
1374 if (n && ctx->buffers.erase(n)) {
1375 unlink(ctx->pixel_pack_buffer_binding, n);
1376 unlink(ctx->pixel_unpack_buffer_binding, n);
1377 unlink(ctx->array_buffer_binding, n);
1378 }
1379 }
1380
GenVertexArrays(int n,GLuint * result)1381 void GenVertexArrays(int n, GLuint* result) {
1382 for (int i = 0; i < n; i++) {
1383 VertexArray v;
1384 result[i] = ctx->vertex_arrays.insert(v);
1385 }
1386 }
1387
DeleteVertexArray(GLuint n)1388 void DeleteVertexArray(GLuint n) {
1389 if (n && ctx->vertex_arrays.erase(n)) {
1390 unlink(ctx->current_vertex_array, n);
1391 }
1392 }
1393
CreateShader(GLenum type)1394 GLuint CreateShader(GLenum type) {
1395 Shader s;
1396 s.type = type;
1397 return ctx->shaders.insert(s);
1398 }
1399
ShaderSourceByName(GLuint shader,char * name)1400 void ShaderSourceByName(GLuint shader, char* name) {
1401 Shader& s = ctx->shaders[shader];
1402 s.loader = load_shader(name);
1403 if (!s.loader) {
1404 debugf("unknown shader %s\n", name);
1405 }
1406 }
1407
AttachShader(GLuint program,GLuint shader)1408 void AttachShader(GLuint program, GLuint shader) {
1409 Program& p = ctx->programs[program];
1410 Shader& s = ctx->shaders[shader];
1411 if (s.type == GL_VERTEX_SHADER) {
1412 if (!p.impl && s.loader) p.impl = s.loader();
1413 } else if (s.type == GL_FRAGMENT_SHADER) {
1414 if (!p.impl && s.loader) p.impl = s.loader();
1415 } else {
1416 assert(0);
1417 }
1418 }
1419
DeleteShader(GLuint n)1420 void DeleteShader(GLuint n) {
1421 if (n) ctx->shaders.erase(n);
1422 }
1423
CreateProgram()1424 GLuint CreateProgram() {
1425 Program p;
1426 return ctx->programs.insert(p);
1427 }
1428
DeleteProgram(GLuint n)1429 void DeleteProgram(GLuint n) {
1430 if (!n) return;
1431 if (ctx->current_program == n) {
1432 if (auto* p = ctx->programs.find(n)) {
1433 p->deleted = true;
1434 }
1435 } else {
1436 ctx->programs.erase(n);
1437 }
1438 }
1439
LinkProgram(GLuint program)1440 void LinkProgram(GLuint program) {
1441 Program& p = ctx->programs[program];
1442 assert(p.impl);
1443 if (!p.impl) {
1444 return;
1445 }
1446 assert(p.impl->interpolants_size() <= sizeof(Interpolants));
1447 if (!p.vert_impl) p.vert_impl = p.impl->get_vertex_shader();
1448 if (!p.frag_impl) p.frag_impl = p.impl->get_fragment_shader();
1449 }
1450
GetLinkStatus(GLuint program)1451 GLint GetLinkStatus(GLuint program) {
1452 if (auto* p = ctx->programs.find(program)) {
1453 return p->impl ? 1 : 0;
1454 }
1455 return 0;
1456 }
1457
BindAttribLocation(GLuint program,GLuint index,char * name)1458 void BindAttribLocation(GLuint program, GLuint index, char* name) {
1459 Program& p = ctx->programs[program];
1460 assert(p.impl);
1461 if (!p.impl) {
1462 return;
1463 }
1464 p.impl->bind_attrib(name, index);
1465 }
1466
GetAttribLocation(GLuint program,char * name)1467 GLint GetAttribLocation(GLuint program, char* name) {
1468 Program& p = ctx->programs[program];
1469 assert(p.impl);
1470 if (!p.impl) {
1471 return -1;
1472 }
1473 return p.impl->get_attrib(name);
1474 }
1475
GetUniformLocation(GLuint program,char * name)1476 GLint GetUniformLocation(GLuint program, char* name) {
1477 Program& p = ctx->programs[program];
1478 assert(p.impl);
1479 if (!p.impl) {
1480 return -1;
1481 }
1482 GLint loc = p.impl->get_uniform(name);
1483 // debugf("location: %d\n", loc);
1484 return loc;
1485 }
1486
get_time_value()1487 static uint64_t get_time_value() {
1488 #ifdef __MACH__
1489 return mach_absolute_time();
1490 #elif defined(_WIN32)
1491 LARGE_INTEGER time;
1492 static bool have_frequency = false;
1493 static LARGE_INTEGER frequency;
1494 if (!have_frequency) {
1495 QueryPerformanceFrequency(&frequency);
1496 have_frequency = true;
1497 }
1498 QueryPerformanceCounter(&time);
1499 return time.QuadPart * 1000000000ULL / frequency.QuadPart;
1500 #else
1501 return ({
1502 struct timespec tp;
1503 clock_gettime(CLOCK_MONOTONIC, &tp);
1504 tp.tv_sec * 1000000000ULL + tp.tv_nsec;
1505 });
1506 #endif
1507 }
1508
BeginQuery(GLenum target,GLuint id)1509 void BeginQuery(GLenum target, GLuint id) {
1510 ctx->get_binding(target) = id;
1511 Query& q = ctx->queries[id];
1512 switch (target) {
1513 case GL_SAMPLES_PASSED:
1514 q.value = 0;
1515 break;
1516 case GL_TIME_ELAPSED:
1517 q.value = get_time_value();
1518 break;
1519 default:
1520 debugf("unknown query target %x for query %d\n", target, id);
1521 assert(false);
1522 }
1523 }
1524
EndQuery(GLenum target)1525 void EndQuery(GLenum target) {
1526 Query& q = ctx->queries[ctx->get_binding(target)];
1527 switch (target) {
1528 case GL_SAMPLES_PASSED:
1529 break;
1530 case GL_TIME_ELAPSED:
1531 q.value = get_time_value() - q.value;
1532 break;
1533 default:
1534 debugf("unknown query target %x\n", target);
1535 assert(false);
1536 }
1537 ctx->get_binding(target) = 0;
1538 }
1539
GetQueryObjectui64v(GLuint id,GLenum pname,GLuint64 * params)1540 void GetQueryObjectui64v(GLuint id, GLenum pname, GLuint64* params) {
1541 Query& q = ctx->queries[id];
1542 switch (pname) {
1543 case GL_QUERY_RESULT:
1544 assert(params);
1545 params[0] = q.value;
1546 break;
1547 default:
1548 assert(false);
1549 }
1550 }
1551
BindVertexArray(GLuint vertex_array)1552 void BindVertexArray(GLuint vertex_array) {
1553 if (vertex_array != ctx->current_vertex_array) {
1554 ctx->validate_vertex_array = true;
1555 }
1556 ctx->current_vertex_array = vertex_array;
1557 }
1558
BindTexture(GLenum target,GLuint texture)1559 void BindTexture(GLenum target, GLuint texture) {
1560 ctx->get_binding(target) = texture;
1561 }
1562
BindBuffer(GLenum target,GLuint buffer)1563 void BindBuffer(GLenum target, GLuint buffer) {
1564 ctx->get_binding(target) = buffer;
1565 }
1566
BindFramebuffer(GLenum target,GLuint fb)1567 void BindFramebuffer(GLenum target, GLuint fb) {
1568 if (target == GL_FRAMEBUFFER) {
1569 ctx->read_framebuffer_binding = fb;
1570 ctx->draw_framebuffer_binding = fb;
1571 } else {
1572 assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER);
1573 ctx->get_binding(target) = fb;
1574 }
1575 }
1576
BindRenderbuffer(GLenum target,GLuint rb)1577 void BindRenderbuffer(GLenum target, GLuint rb) {
1578 ctx->get_binding(target) = rb;
1579 }
1580
PixelStorei(GLenum name,GLint param)1581 void PixelStorei(GLenum name, GLint param) {
1582 if (name == GL_UNPACK_ALIGNMENT) {
1583 assert(param == 1);
1584 } else if (name == GL_UNPACK_ROW_LENGTH) {
1585 ctx->unpack_row_length = param;
1586 }
1587 }
1588
remap_internal_format(GLenum format)1589 static GLenum remap_internal_format(GLenum format) {
1590 switch (format) {
1591 case GL_DEPTH_COMPONENT:
1592 return GL_DEPTH_COMPONENT24;
1593 case GL_RGBA:
1594 return GL_RGBA8;
1595 case GL_RED:
1596 return GL_R8;
1597 case GL_RG:
1598 return GL_RG8;
1599 case GL_RGB_422_APPLE:
1600 return GL_RGB_RAW_422_APPLE;
1601 default:
1602 return format;
1603 }
1604 }
1605
1606 } // extern "C"
1607
format_requires_conversion(GLenum external_format,GLenum internal_format)1608 static bool format_requires_conversion(GLenum external_format,
1609 GLenum internal_format) {
1610 switch (external_format) {
1611 case GL_RGBA:
1612 return internal_format == GL_RGBA8;
1613 default:
1614 return false;
1615 }
1616 }
1617
copy_bgra8_to_rgba8(uint32_t * dest,const uint32_t * src,int width)1618 static inline void copy_bgra8_to_rgba8(uint32_t* dest, const uint32_t* src,
1619 int width) {
1620 for (; width >= 4; width -= 4, dest += 4, src += 4) {
1621 U32 p = unaligned_load<U32>(src);
1622 U32 rb = p & 0x00FF00FF;
1623 unaligned_store(dest, (p & 0xFF00FF00) | (rb << 16) | (rb >> 16));
1624 }
1625 for (; width > 0; width--, dest++, src++) {
1626 uint32_t p = *src;
1627 uint32_t rb = p & 0x00FF00FF;
1628 *dest = (p & 0xFF00FF00) | (rb << 16) | (rb >> 16);
1629 }
1630 }
1631
convert_copy(GLenum external_format,GLenum internal_format,uint8_t * dst_buf,size_t dst_stride,const uint8_t * src_buf,size_t src_stride,size_t width,size_t height)1632 static void convert_copy(GLenum external_format, GLenum internal_format,
1633 uint8_t* dst_buf, size_t dst_stride,
1634 const uint8_t* src_buf, size_t src_stride,
1635 size_t width, size_t height) {
1636 switch (external_format) {
1637 case GL_RGBA:
1638 if (internal_format == GL_RGBA8) {
1639 for (; height; height--) {
1640 copy_bgra8_to_rgba8((uint32_t*)dst_buf, (const uint32_t*)src_buf,
1641 width);
1642 dst_buf += dst_stride;
1643 src_buf += src_stride;
1644 }
1645 return;
1646 }
1647 break;
1648 default:
1649 break;
1650 }
1651 size_t row_bytes = width * bytes_for_internal_format(internal_format);
1652 for (; height; height--) {
1653 memcpy(dst_buf, src_buf, row_bytes);
1654 dst_buf += dst_stride;
1655 src_buf += src_stride;
1656 }
1657 }
1658
set_tex_storage(Texture & t,GLenum external_format,GLsizei width,GLsizei height,void * buf=nullptr,GLsizei stride=0,GLsizei min_width=0,GLsizei min_height=0)1659 static void set_tex_storage(Texture& t, GLenum external_format, GLsizei width,
1660 GLsizei height, void* buf = nullptr,
1661 GLsizei stride = 0, GLsizei min_width = 0,
1662 GLsizei min_height = 0) {
1663 GLenum internal_format = remap_internal_format(external_format);
1664 bool changed = false;
1665 if (t.width != width || t.height != height ||
1666 t.internal_format != internal_format) {
1667 changed = true;
1668 t.internal_format = internal_format;
1669 t.width = width;
1670 t.height = height;
1671 }
1672 // If we are changed from an internally managed buffer to an externally
1673 // supplied one or vice versa, ensure that we clean up old buffer state.
1674 // However, if we have to convert the data from a non-native format, then
1675 // always treat it as internally managed since we will need to copy to an
1676 // internally managed native format buffer.
1677 bool should_free = buf == nullptr || format_requires_conversion(
1678 external_format, internal_format);
1679 if (t.should_free() != should_free) {
1680 changed = true;
1681 t.cleanup();
1682 t.set_should_free(should_free);
1683 }
1684 // If now an external buffer, explicitly set it...
1685 if (!should_free) {
1686 t.set_buffer(buf, stride);
1687 }
1688 t.disable_delayed_clear();
1689 t.allocate(changed, min_width, min_height);
1690 // If we have a buffer that needs format conversion, then do that now.
1691 if (buf && should_free) {
1692 convert_copy(external_format, internal_format, (uint8_t*)t.buf, t.stride(),
1693 (const uint8_t*)buf, stride, width, height);
1694 }
1695 }
1696
1697 extern "C" {
1698
TexStorage2D(GLenum target,GLint levels,GLenum internal_format,GLsizei width,GLsizei height)1699 void TexStorage2D(GLenum target, GLint levels, GLenum internal_format,
1700 GLsizei width, GLsizei height) {
1701 assert(levels == 1);
1702 Texture& t = ctx->textures[ctx->get_binding(target)];
1703 set_tex_storage(t, internal_format, width, height);
1704 }
1705
internal_format_for_data(GLenum format,GLenum ty)1706 GLenum internal_format_for_data(GLenum format, GLenum ty) {
1707 if (format == GL_RED && ty == GL_UNSIGNED_BYTE) {
1708 return GL_R8;
1709 } else if ((format == GL_RGBA || format == GL_BGRA) &&
1710 (ty == GL_UNSIGNED_BYTE || ty == GL_UNSIGNED_INT_8_8_8_8_REV)) {
1711 return GL_RGBA8;
1712 } else if (format == GL_RGBA && ty == GL_FLOAT) {
1713 return GL_RGBA32F;
1714 } else if (format == GL_RGBA_INTEGER && ty == GL_INT) {
1715 return GL_RGBA32I;
1716 } else if (format == GL_RG && ty == GL_UNSIGNED_BYTE) {
1717 return GL_RG8;
1718 } else if (format == GL_RGB_422_APPLE &&
1719 ty == GL_UNSIGNED_SHORT_8_8_REV_APPLE) {
1720 return GL_RGB_RAW_422_APPLE;
1721 } else if (format == GL_RED && ty == GL_UNSIGNED_SHORT) {
1722 return GL_R16;
1723 } else {
1724 debugf("unknown internal format for format %x, type %x\n", format, ty);
1725 assert(false);
1726 return 0;
1727 }
1728 }
1729
get_pixel_pack_buffer()1730 static Buffer* get_pixel_pack_buffer() {
1731 return ctx->pixel_pack_buffer_binding
1732 ? &ctx->buffers[ctx->pixel_pack_buffer_binding]
1733 : nullptr;
1734 }
1735
get_pixel_pack_buffer_data(void * data)1736 static void* get_pixel_pack_buffer_data(void* data) {
1737 if (Buffer* b = get_pixel_pack_buffer()) {
1738 return b->buf ? b->buf + (size_t)data : nullptr;
1739 }
1740 return data;
1741 }
1742
get_pixel_unpack_buffer()1743 static Buffer* get_pixel_unpack_buffer() {
1744 return ctx->pixel_unpack_buffer_binding
1745 ? &ctx->buffers[ctx->pixel_unpack_buffer_binding]
1746 : nullptr;
1747 }
1748
get_pixel_unpack_buffer_data(void * data)1749 static void* get_pixel_unpack_buffer_data(void* data) {
1750 if (Buffer* b = get_pixel_unpack_buffer()) {
1751 return b->buf ? b->buf + (size_t)data : nullptr;
1752 }
1753 return data;
1754 }
1755
TexSubImage2D(GLenum target,GLint level,GLint xoffset,GLint yoffset,GLsizei width,GLsizei height,GLenum format,GLenum ty,void * data)1756 void TexSubImage2D(GLenum target, GLint level, GLint xoffset, GLint yoffset,
1757 GLsizei width, GLsizei height, GLenum format, GLenum ty,
1758 void* data) {
1759 if (level != 0) {
1760 assert(false);
1761 return;
1762 }
1763 data = get_pixel_unpack_buffer_data(data);
1764 if (!data) return;
1765 Texture& t = ctx->textures[ctx->get_binding(target)];
1766 IntRect skip = {xoffset, yoffset, xoffset + width, yoffset + height};
1767 prepare_texture(t, &skip);
1768 assert(xoffset + width <= t.width);
1769 assert(yoffset + height <= t.height);
1770 assert(ctx->unpack_row_length == 0 || ctx->unpack_row_length >= width);
1771 GLsizei row_length =
1772 ctx->unpack_row_length != 0 ? ctx->unpack_row_length : width;
1773 assert(t.internal_format == internal_format_for_data(format, ty));
1774 int src_bpp = format_requires_conversion(format, t.internal_format)
1775 ? bytes_for_internal_format(format)
1776 : t.bpp();
1777 if (!src_bpp || !t.buf) return;
1778 convert_copy(format, t.internal_format,
1779 (uint8_t*)t.sample_ptr(xoffset, yoffset), t.stride(),
1780 (const uint8_t*)data, row_length * src_bpp, width, height);
1781 }
1782
TexImage2D(GLenum target,GLint level,GLint internal_format,GLsizei width,GLsizei height,GLint border,GLenum format,GLenum ty,void * data)1783 void TexImage2D(GLenum target, GLint level, GLint internal_format,
1784 GLsizei width, GLsizei height, GLint border, GLenum format,
1785 GLenum ty, void* data) {
1786 if (level != 0) {
1787 assert(false);
1788 return;
1789 }
1790 assert(border == 0);
1791 TexStorage2D(target, 1, internal_format, width, height);
1792 TexSubImage2D(target, 0, 0, 0, width, height, format, ty, data);
1793 }
1794
GenerateMipmap(UNUSED GLenum target)1795 void GenerateMipmap(UNUSED GLenum target) {
1796 // TODO: support mipmaps
1797 }
1798
SetTextureParameter(GLuint texid,GLenum pname,GLint param)1799 void SetTextureParameter(GLuint texid, GLenum pname, GLint param) {
1800 Texture& t = ctx->textures[texid];
1801 switch (pname) {
1802 case GL_TEXTURE_WRAP_S:
1803 assert(param == GL_CLAMP_TO_EDGE);
1804 break;
1805 case GL_TEXTURE_WRAP_T:
1806 assert(param == GL_CLAMP_TO_EDGE);
1807 break;
1808 case GL_TEXTURE_MIN_FILTER:
1809 t.min_filter = param;
1810 break;
1811 case GL_TEXTURE_MAG_FILTER:
1812 t.mag_filter = param;
1813 break;
1814 default:
1815 break;
1816 }
1817 }
1818
TexParameteri(GLenum target,GLenum pname,GLint param)1819 void TexParameteri(GLenum target, GLenum pname, GLint param) {
1820 SetTextureParameter(ctx->get_binding(target), pname, param);
1821 }
1822
GenTextures(int n,GLuint * result)1823 void GenTextures(int n, GLuint* result) {
1824 for (int i = 0; i < n; i++) {
1825 Texture t;
1826 result[i] = ctx->textures.insert(t);
1827 }
1828 }
1829
DeleteTexture(GLuint n)1830 void DeleteTexture(GLuint n) {
1831 if (n && ctx->textures.erase(n)) {
1832 for (size_t i = 0; i < MAX_TEXTURE_UNITS; i++) {
1833 ctx->texture_units[i].unlink(n);
1834 }
1835 }
1836 }
1837
GenRenderbuffers(int n,GLuint * result)1838 void GenRenderbuffers(int n, GLuint* result) {
1839 for (int i = 0; i < n; i++) {
1840 Renderbuffer r;
1841 result[i] = ctx->renderbuffers.insert(r);
1842 }
1843 }
1844
on_erase()1845 void Renderbuffer::on_erase() {
1846 for (auto* fb : ctx->framebuffers) {
1847 if (fb) {
1848 unlink(fb->color_attachment, texture);
1849 unlink(fb->depth_attachment, texture);
1850 }
1851 }
1852 DeleteTexture(texture);
1853 }
1854
DeleteRenderbuffer(GLuint n)1855 void DeleteRenderbuffer(GLuint n) {
1856 if (n && ctx->renderbuffers.erase(n)) {
1857 unlink(ctx->renderbuffer_binding, n);
1858 }
1859 }
1860
GenFramebuffers(int n,GLuint * result)1861 void GenFramebuffers(int n, GLuint* result) {
1862 for (int i = 0; i < n; i++) {
1863 Framebuffer f;
1864 result[i] = ctx->framebuffers.insert(f);
1865 }
1866 }
1867
DeleteFramebuffer(GLuint n)1868 void DeleteFramebuffer(GLuint n) {
1869 if (n && ctx->framebuffers.erase(n)) {
1870 unlink(ctx->read_framebuffer_binding, n);
1871 unlink(ctx->draw_framebuffer_binding, n);
1872 }
1873 }
1874
RenderbufferStorage(GLenum target,GLenum internal_format,GLsizei width,GLsizei height)1875 void RenderbufferStorage(GLenum target, GLenum internal_format, GLsizei width,
1876 GLsizei height) {
1877 // Just refer a renderbuffer to a texture to simplify things for now...
1878 Renderbuffer& r = ctx->renderbuffers[ctx->get_binding(target)];
1879 if (!r.texture) {
1880 GenTextures(1, &r.texture);
1881 }
1882 switch (internal_format) {
1883 case GL_DEPTH_COMPONENT:
1884 case GL_DEPTH_COMPONENT16:
1885 case GL_DEPTH_COMPONENT24:
1886 case GL_DEPTH_COMPONENT32:
1887 // Force depth format to 24 bits...
1888 internal_format = GL_DEPTH_COMPONENT24;
1889 break;
1890 }
1891 set_tex_storage(ctx->textures[r.texture], internal_format, width, height);
1892 }
1893
VertexAttribPointer(GLuint index,GLint size,GLenum type,bool normalized,GLsizei stride,GLuint offset)1894 void VertexAttribPointer(GLuint index, GLint size, GLenum type, bool normalized,
1895 GLsizei stride, GLuint offset) {
1896 // debugf("cva: %d\n", ctx->current_vertex_array);
1897 VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1898 if (index >= NULL_ATTRIB) {
1899 assert(0);
1900 return;
1901 }
1902 VertexAttrib& va = v.attribs[index];
1903 va.size = size * bytes_per_type(type);
1904 va.type = type;
1905 va.normalized = normalized;
1906 va.stride = stride;
1907 va.offset = offset;
1908 // Buffer &vertex_buf = ctx->buffers[ctx->array_buffer_binding];
1909 va.vertex_buffer = ctx->array_buffer_binding;
1910 va.vertex_array = ctx->current_vertex_array;
1911 ctx->validate_vertex_array = true;
1912 }
1913
VertexAttribIPointer(GLuint index,GLint size,GLenum type,GLsizei stride,GLuint offset)1914 void VertexAttribIPointer(GLuint index, GLint size, GLenum type, GLsizei stride,
1915 GLuint offset) {
1916 // debugf("cva: %d\n", ctx->current_vertex_array);
1917 VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1918 if (index >= NULL_ATTRIB) {
1919 assert(0);
1920 return;
1921 }
1922 VertexAttrib& va = v.attribs[index];
1923 va.size = size * bytes_per_type(type);
1924 va.type = type;
1925 va.normalized = false;
1926 va.stride = stride;
1927 va.offset = offset;
1928 // Buffer &vertex_buf = ctx->buffers[ctx->array_buffer_binding];
1929 va.vertex_buffer = ctx->array_buffer_binding;
1930 va.vertex_array = ctx->current_vertex_array;
1931 ctx->validate_vertex_array = true;
1932 }
1933
EnableVertexAttribArray(GLuint index)1934 void EnableVertexAttribArray(GLuint index) {
1935 VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1936 if (index >= NULL_ATTRIB) {
1937 assert(0);
1938 return;
1939 }
1940 VertexAttrib& va = v.attribs[index];
1941 if (!va.enabled) {
1942 ctx->validate_vertex_array = true;
1943 }
1944 va.enabled = true;
1945 v.max_attrib = max(v.max_attrib, (int)index);
1946 }
1947
DisableVertexAttribArray(GLuint index)1948 void DisableVertexAttribArray(GLuint index) {
1949 VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1950 if (index >= NULL_ATTRIB) {
1951 assert(0);
1952 return;
1953 }
1954 VertexAttrib& va = v.attribs[index];
1955 if (va.enabled) {
1956 ctx->validate_vertex_array = true;
1957 }
1958 va.enabled = false;
1959 }
1960
VertexAttribDivisor(GLuint index,GLuint divisor)1961 void VertexAttribDivisor(GLuint index, GLuint divisor) {
1962 VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1963 // Only support divisor being 0 (per-vertex) or 1 (per-instance).
1964 if (index >= NULL_ATTRIB || divisor > 1) {
1965 assert(0);
1966 return;
1967 }
1968 VertexAttrib& va = v.attribs[index];
1969 va.divisor = divisor;
1970 }
1971
BufferData(GLenum target,GLsizeiptr size,void * data,UNUSED GLenum usage)1972 void BufferData(GLenum target, GLsizeiptr size, void* data,
1973 UNUSED GLenum usage) {
1974 Buffer& b = ctx->buffers[ctx->get_binding(target)];
1975 if (b.allocate(size)) {
1976 ctx->validate_vertex_array = true;
1977 }
1978 if (data && b.buf && size <= b.size) {
1979 memcpy(b.buf, data, size);
1980 }
1981 }
1982
BufferSubData(GLenum target,GLintptr offset,GLsizeiptr size,void * data)1983 void BufferSubData(GLenum target, GLintptr offset, GLsizeiptr size,
1984 void* data) {
1985 Buffer& b = ctx->buffers[ctx->get_binding(target)];
1986 assert(offset + size <= b.size);
1987 if (data && b.buf && offset + size <= b.size) {
1988 memcpy(&b.buf[offset], data, size);
1989 }
1990 }
1991
MapBuffer(GLenum target,UNUSED GLbitfield access)1992 void* MapBuffer(GLenum target, UNUSED GLbitfield access) {
1993 Buffer& b = ctx->buffers[ctx->get_binding(target)];
1994 return b.buf;
1995 }
1996
MapBufferRange(GLenum target,GLintptr offset,GLsizeiptr length,UNUSED GLbitfield access)1997 void* MapBufferRange(GLenum target, GLintptr offset, GLsizeiptr length,
1998 UNUSED GLbitfield access) {
1999 Buffer& b = ctx->buffers[ctx->get_binding(target)];
2000 if (b.buf && offset >= 0 && length > 0 && offset + length <= b.size) {
2001 return b.buf + offset;
2002 }
2003 return nullptr;
2004 }
2005
UnmapBuffer(GLenum target)2006 GLboolean UnmapBuffer(GLenum target) {
2007 Buffer& b = ctx->buffers[ctx->get_binding(target)];
2008 return b.buf != nullptr;
2009 }
2010
Uniform1i(GLint location,GLint V0)2011 void Uniform1i(GLint location, GLint V0) {
2012 // debugf("tex: %d\n", (int)ctx->textures.size);
2013 if (vertex_shader) {
2014 vertex_shader->set_uniform_1i(location, V0);
2015 }
2016 }
Uniform4fv(GLint location,GLsizei count,const GLfloat * v)2017 void Uniform4fv(GLint location, GLsizei count, const GLfloat* v) {
2018 assert(count == 1);
2019 if (vertex_shader) {
2020 vertex_shader->set_uniform_4fv(location, v);
2021 }
2022 }
UniformMatrix4fv(GLint location,GLsizei count,GLboolean transpose,const GLfloat * value)2023 void UniformMatrix4fv(GLint location, GLsizei count, GLboolean transpose,
2024 const GLfloat* value) {
2025 assert(count == 1);
2026 assert(!transpose);
2027 if (vertex_shader) {
2028 vertex_shader->set_uniform_matrix4fv(location, value);
2029 }
2030 }
2031
FramebufferTexture2D(GLenum target,GLenum attachment,GLenum textarget,GLuint texture,GLint level)2032 void FramebufferTexture2D(GLenum target, GLenum attachment, GLenum textarget,
2033 GLuint texture, GLint level) {
2034 assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER);
2035 assert(textarget == GL_TEXTURE_2D || textarget == GL_TEXTURE_RECTANGLE);
2036 assert(level == 0);
2037 Framebuffer& fb = ctx->framebuffers[ctx->get_binding(target)];
2038 if (attachment == GL_COLOR_ATTACHMENT0) {
2039 fb.color_attachment = texture;
2040 } else if (attachment == GL_DEPTH_ATTACHMENT) {
2041 fb.depth_attachment = texture;
2042 } else {
2043 assert(0);
2044 }
2045 }
2046
FramebufferRenderbuffer(GLenum target,GLenum attachment,GLenum renderbuffertarget,GLuint renderbuffer)2047 void FramebufferRenderbuffer(GLenum target, GLenum attachment,
2048 GLenum renderbuffertarget, GLuint renderbuffer) {
2049 assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER);
2050 assert(renderbuffertarget == GL_RENDERBUFFER);
2051 Framebuffer& fb = ctx->framebuffers[ctx->get_binding(target)];
2052 Renderbuffer& rb = ctx->renderbuffers[renderbuffer];
2053 if (attachment == GL_COLOR_ATTACHMENT0) {
2054 fb.color_attachment = rb.texture;
2055 } else if (attachment == GL_DEPTH_ATTACHMENT) {
2056 fb.depth_attachment = rb.texture;
2057 } else {
2058 assert(0);
2059 }
2060 }
2061
2062 } // extern "C"
2063
get_framebuffer(GLenum target,bool fallback=false)2064 static inline Framebuffer* get_framebuffer(GLenum target,
2065 bool fallback = false) {
2066 if (target == GL_FRAMEBUFFER) {
2067 target = GL_DRAW_FRAMEBUFFER;
2068 }
2069 Framebuffer* fb = ctx->framebuffers.find(ctx->get_binding(target));
2070 if (fallback && !fb) {
2071 // If the specified framebuffer isn't found and a fallback is requested,
2072 // use the default framebuffer.
2073 fb = &ctx->framebuffers[0];
2074 }
2075 return fb;
2076 }
2077
2078 template <typename T>
fill_n(T * dst,size_t n,T val)2079 static inline void fill_n(T* dst, size_t n, T val) {
2080 for (T* end = &dst[n]; dst < end; dst++) *dst = val;
2081 }
2082
2083 #if USE_SSE2
2084 template <>
fill_n(uint32_t * dst,size_t n,uint32_t val)2085 inline void fill_n<uint32_t>(uint32_t* dst, size_t n, uint32_t val) {
2086 __asm__ __volatile__("rep stosl\n"
2087 : "+D"(dst), "+c"(n)
2088 : "a"(val)
2089 : "memory", "cc");
2090 }
2091 #endif
2092
clear_chunk(uint8_t value)2093 static inline uint32_t clear_chunk(uint8_t value) {
2094 return uint32_t(value) * 0x01010101U;
2095 }
2096
clear_chunk(uint16_t value)2097 static inline uint32_t clear_chunk(uint16_t value) {
2098 return uint32_t(value) | (uint32_t(value) << 16);
2099 }
2100
clear_chunk(uint32_t value)2101 static inline uint32_t clear_chunk(uint32_t value) { return value; }
2102
2103 template <typename T>
clear_row(T * buf,size_t len,T value,uint32_t chunk)2104 static inline void clear_row(T* buf, size_t len, T value, uint32_t chunk) {
2105 const size_t N = sizeof(uint32_t) / sizeof(T);
2106 // fill any leading unaligned values
2107 if (N > 1) {
2108 size_t align = (-(intptr_t)buf & (sizeof(uint32_t) - 1)) / sizeof(T);
2109 if (align <= len) {
2110 fill_n(buf, align, value);
2111 len -= align;
2112 buf += align;
2113 }
2114 }
2115 // fill as many aligned chunks as possible
2116 fill_n((uint32_t*)buf, len / N, chunk);
2117 // fill any remaining values
2118 if (N > 1) {
2119 fill_n(buf + (len & ~(N - 1)), len & (N - 1), value);
2120 }
2121 }
2122
2123 template <typename T>
clear_buffer(Texture & t,T value,IntRect bb,int skip_start=0,int skip_end=0)2124 static void clear_buffer(Texture& t, T value, IntRect bb, int skip_start = 0,
2125 int skip_end = 0) {
2126 if (!t.buf) return;
2127 skip_start = max(skip_start, bb.x0);
2128 skip_end = max(skip_end, skip_start);
2129 assert(sizeof(T) == t.bpp());
2130 size_t stride = t.stride();
2131 // When clearing multiple full-width rows, collapse them into a single large
2132 // "row" to avoid redundant setup from clearing each row individually. Note
2133 // that we can only safely do this if the stride is tightly packed.
2134 if (bb.width() == t.width && bb.height() > 1 && skip_start >= skip_end &&
2135 (t.should_free() || stride == t.width * sizeof(T))) {
2136 bb.x1 += (stride / sizeof(T)) * (bb.height() - 1);
2137 bb.y1 = bb.y0 + 1;
2138 }
2139 T* buf = (T*)t.sample_ptr(bb.x0, bb.y0);
2140 uint32_t chunk = clear_chunk(value);
2141 for (int rows = bb.height(); rows > 0; rows--) {
2142 if (bb.x0 < skip_start) {
2143 clear_row(buf, skip_start - bb.x0, value, chunk);
2144 }
2145 if (skip_end < bb.x1) {
2146 clear_row(buf + (skip_end - bb.x0), bb.x1 - skip_end, value, chunk);
2147 }
2148 buf += stride / sizeof(T);
2149 }
2150 }
2151
2152 template <typename T>
force_clear_row(Texture & t,int y,int skip_start=0,int skip_end=0)2153 static inline void force_clear_row(Texture& t, int y, int skip_start = 0,
2154 int skip_end = 0) {
2155 assert(t.buf != nullptr);
2156 assert(sizeof(T) == t.bpp());
2157 assert(skip_start <= skip_end);
2158 T* buf = (T*)t.sample_ptr(0, y);
2159 uint32_t chunk = clear_chunk((T)t.clear_val);
2160 if (skip_start > 0) {
2161 clear_row<T>(buf, skip_start, t.clear_val, chunk);
2162 }
2163 if (skip_end < t.width) {
2164 clear_row<T>(buf + skip_end, t.width - skip_end, t.clear_val, chunk);
2165 }
2166 }
2167
2168 template <typename T>
force_clear(Texture & t,const IntRect * skip=nullptr)2169 static void force_clear(Texture& t, const IntRect* skip = nullptr) {
2170 if (!t.delay_clear || !t.cleared_rows) {
2171 return;
2172 }
2173 int y0 = 0;
2174 int y1 = t.height;
2175 int skip_start = 0;
2176 int skip_end = 0;
2177 if (skip) {
2178 y0 = clamp(skip->y0, 0, t.height);
2179 y1 = clamp(skip->y1, y0, t.height);
2180 skip_start = clamp(skip->x0, 0, t.width);
2181 skip_end = clamp(skip->x1, skip_start, t.width);
2182 if (skip_start <= 0 && skip_end >= t.width && y0 <= 0 && y1 >= t.height) {
2183 t.disable_delayed_clear();
2184 return;
2185 }
2186 }
2187 int num_masks = (y1 + 31) / 32;
2188 uint32_t* rows = t.cleared_rows;
2189 for (int i = y0 / 32; i < num_masks; i++) {
2190 uint32_t mask = rows[i];
2191 if (mask != ~0U) {
2192 rows[i] = ~0U;
2193 int start = i * 32;
2194 while (mask) {
2195 int count = __builtin_ctz(mask);
2196 if (count > 0) {
2197 clear_buffer<T>(t, t.clear_val,
2198 IntRect{0, start, t.width, start + count}, skip_start,
2199 skip_end);
2200 t.delay_clear -= count;
2201 start += count;
2202 mask >>= count;
2203 }
2204 count = __builtin_ctz(mask + 1);
2205 start += count;
2206 mask >>= count;
2207 }
2208 int count = (i + 1) * 32 - start;
2209 if (count > 0) {
2210 clear_buffer<T>(t, t.clear_val,
2211 IntRect{0, start, t.width, start + count}, skip_start,
2212 skip_end);
2213 t.delay_clear -= count;
2214 }
2215 }
2216 }
2217 if (t.delay_clear <= 0) t.disable_delayed_clear();
2218 }
2219
prepare_texture(Texture & t,const IntRect * skip)2220 static void prepare_texture(Texture& t, const IntRect* skip) {
2221 if (t.delay_clear) {
2222 switch (t.internal_format) {
2223 case GL_RGBA8:
2224 force_clear<uint32_t>(t, skip);
2225 break;
2226 case GL_R8:
2227 force_clear<uint8_t>(t, skip);
2228 break;
2229 case GL_RG8:
2230 force_clear<uint16_t>(t, skip);
2231 break;
2232 default:
2233 assert(false);
2234 break;
2235 }
2236 }
2237 }
2238
2239 // Setup a clear on a texture. This may either force an immediate clear or
2240 // potentially punt to a delayed clear, if applicable.
2241 template <typename T>
request_clear(Texture & t,T value,const IntRect & scissor)2242 static void request_clear(Texture& t, T value, const IntRect& scissor) {
2243 // If the clear would require a scissor, force clear anything outside
2244 // the scissor, and then immediately clear anything inside the scissor.
2245 if (!scissor.contains(t.offset_bounds())) {
2246 IntRect skip = scissor - t.offset;
2247 force_clear<T>(t, &skip);
2248 clear_buffer<T>(t, value, skip.intersection(t.bounds()));
2249 } else {
2250 // Do delayed clear for 2D texture without scissor.
2251 t.enable_delayed_clear(value);
2252 }
2253 }
2254
2255 template <typename T>
request_clear(Texture & t,T value)2256 static inline void request_clear(Texture& t, T value) {
2257 // If scissoring is enabled, use the scissor rect. Otherwise, just scissor to
2258 // the entire texture bounds.
2259 request_clear(t, value, ctx->scissortest ? ctx->scissor : t.offset_bounds());
2260 }
2261
2262 extern "C" {
2263
InitDefaultFramebuffer(int x,int y,int width,int height,int stride,void * buf)2264 void InitDefaultFramebuffer(int x, int y, int width, int height, int stride,
2265 void* buf) {
2266 Framebuffer& fb = ctx->framebuffers[0];
2267 if (!fb.color_attachment) {
2268 GenTextures(1, &fb.color_attachment);
2269 }
2270 // If the dimensions or buffer properties changed, we need to reallocate
2271 // the underlying storage for the color buffer texture.
2272 Texture& colortex = ctx->textures[fb.color_attachment];
2273 set_tex_storage(colortex, GL_RGBA8, width, height, buf, stride);
2274 colortex.offset = IntPoint(x, y);
2275 if (!fb.depth_attachment) {
2276 GenTextures(1, &fb.depth_attachment);
2277 }
2278 // Ensure dimensions of the depth buffer match the color buffer.
2279 Texture& depthtex = ctx->textures[fb.depth_attachment];
2280 set_tex_storage(depthtex, GL_DEPTH_COMPONENT24, width, height);
2281 depthtex.offset = IntPoint(x, y);
2282 }
2283
GetColorBuffer(GLuint fbo,GLboolean flush,int32_t * width,int32_t * height,int32_t * stride)2284 void* GetColorBuffer(GLuint fbo, GLboolean flush, int32_t* width,
2285 int32_t* height, int32_t* stride) {
2286 Framebuffer* fb = ctx->framebuffers.find(fbo);
2287 if (!fb || !fb->color_attachment) {
2288 return nullptr;
2289 }
2290 Texture& colortex = ctx->textures[fb->color_attachment];
2291 if (flush) {
2292 prepare_texture(colortex);
2293 }
2294 assert(colortex.offset == IntPoint(0, 0));
2295 if (width) {
2296 *width = colortex.width;
2297 }
2298 if (height) {
2299 *height = colortex.height;
2300 }
2301 if (stride) {
2302 *stride = colortex.stride();
2303 }
2304 return colortex.buf ? colortex.sample_ptr(0, 0) : nullptr;
2305 }
2306
ResolveFramebuffer(GLuint fbo)2307 void ResolveFramebuffer(GLuint fbo) {
2308 Framebuffer* fb = ctx->framebuffers.find(fbo);
2309 if (!fb || !fb->color_attachment) {
2310 return;
2311 }
2312 Texture& colortex = ctx->textures[fb->color_attachment];
2313 prepare_texture(colortex);
2314 }
2315
SetTextureBuffer(GLuint texid,GLenum internal_format,GLsizei width,GLsizei height,GLsizei stride,void * buf,GLsizei min_width,GLsizei min_height)2316 void SetTextureBuffer(GLuint texid, GLenum internal_format, GLsizei width,
2317 GLsizei height, GLsizei stride, void* buf,
2318 GLsizei min_width, GLsizei min_height) {
2319 Texture& t = ctx->textures[texid];
2320 set_tex_storage(t, internal_format, width, height, buf, stride, min_width,
2321 min_height);
2322 }
2323
CheckFramebufferStatus(GLenum target)2324 GLenum CheckFramebufferStatus(GLenum target) {
2325 Framebuffer* fb = get_framebuffer(target);
2326 if (!fb || !fb->color_attachment) {
2327 return GL_FRAMEBUFFER_UNSUPPORTED;
2328 }
2329 return GL_FRAMEBUFFER_COMPLETE;
2330 }
2331
ClearTexSubImage(GLuint texture,GLint level,GLint xoffset,GLint yoffset,GLint zoffset,GLsizei width,GLsizei height,GLsizei depth,GLenum format,GLenum type,const void * data)2332 void ClearTexSubImage(GLuint texture, GLint level, GLint xoffset, GLint yoffset,
2333 GLint zoffset, GLsizei width, GLsizei height,
2334 GLsizei depth, GLenum format, GLenum type,
2335 const void* data) {
2336 if (level != 0) {
2337 assert(false);
2338 return;
2339 }
2340 Texture& t = ctx->textures[texture];
2341 assert(!t.locked);
2342 if (width <= 0 || height <= 0 || depth <= 0) {
2343 return;
2344 }
2345 assert(zoffset == 0 && depth == 1);
2346 IntRect scissor = {xoffset, yoffset, xoffset + width, yoffset + height};
2347 if (t.internal_format == GL_DEPTH_COMPONENT24) {
2348 uint32_t value = 0xFFFFFF;
2349 switch (format) {
2350 case GL_DEPTH_COMPONENT:
2351 switch (type) {
2352 case GL_DOUBLE:
2353 value = uint32_t(*(const GLdouble*)data * 0xFFFFFF);
2354 break;
2355 case GL_FLOAT:
2356 value = uint32_t(*(const GLfloat*)data * 0xFFFFFF);
2357 break;
2358 default:
2359 assert(false);
2360 break;
2361 }
2362 break;
2363 default:
2364 assert(false);
2365 break;
2366 }
2367 if (t.cleared() && !scissor.contains(t.offset_bounds())) {
2368 // If we need to scissor the clear and the depth buffer was already
2369 // initialized, then just fill runs for that scissor area.
2370 t.fill_depth_runs(value, scissor);
2371 } else {
2372 // Otherwise, the buffer is either uninitialized or the clear would
2373 // encompass the entire buffer. If uninitialized, we can safely fill
2374 // the entire buffer with any value and thus ignore any scissoring.
2375 t.init_depth_runs(value);
2376 }
2377 return;
2378 }
2379
2380 uint32_t color = 0xFF000000;
2381 switch (type) {
2382 case GL_FLOAT: {
2383 const GLfloat* f = (const GLfloat*)data;
2384 Float v = {0.0f, 0.0f, 0.0f, 1.0f};
2385 switch (format) {
2386 case GL_RGBA:
2387 v.w = f[3]; // alpha
2388 FALLTHROUGH;
2389 case GL_RGB:
2390 v.z = f[2]; // blue
2391 FALLTHROUGH;
2392 case GL_RG:
2393 v.y = f[1]; // green
2394 FALLTHROUGH;
2395 case GL_RED:
2396 v.x = f[0]; // red
2397 break;
2398 default:
2399 assert(false);
2400 break;
2401 }
2402 color = bit_cast<uint32_t>(CONVERT(round_pixel(v), U8));
2403 break;
2404 }
2405 case GL_UNSIGNED_BYTE: {
2406 const GLubyte* b = (const GLubyte*)data;
2407 switch (format) {
2408 case GL_RGBA:
2409 color = (color & ~0xFF000000) | (uint32_t(b[3]) << 24); // alpha
2410 FALLTHROUGH;
2411 case GL_RGB:
2412 color = (color & ~0x00FF0000) | (uint32_t(b[2]) << 16); // blue
2413 FALLTHROUGH;
2414 case GL_RG:
2415 color = (color & ~0x0000FF00) | (uint32_t(b[1]) << 8); // green
2416 FALLTHROUGH;
2417 case GL_RED:
2418 color = (color & ~0x000000FF) | uint32_t(b[0]); // red
2419 break;
2420 default:
2421 assert(false);
2422 break;
2423 }
2424 break;
2425 }
2426 default:
2427 assert(false);
2428 break;
2429 }
2430
2431 switch (t.internal_format) {
2432 case GL_RGBA8:
2433 // Clear color needs to swizzle to BGRA.
2434 request_clear<uint32_t>(t,
2435 (color & 0xFF00FF00) |
2436 ((color << 16) & 0xFF0000) |
2437 ((color >> 16) & 0xFF),
2438 scissor);
2439 break;
2440 case GL_R8:
2441 request_clear<uint8_t>(t, uint8_t(color & 0xFF), scissor);
2442 break;
2443 case GL_RG8:
2444 request_clear<uint16_t>(t, uint16_t(color & 0xFFFF), scissor);
2445 break;
2446 default:
2447 assert(false);
2448 break;
2449 }
2450 }
2451
ClearTexImage(GLuint texture,GLint level,GLenum format,GLenum type,const void * data)2452 void ClearTexImage(GLuint texture, GLint level, GLenum format, GLenum type,
2453 const void* data) {
2454 Texture& t = ctx->textures[texture];
2455 IntRect scissor = t.offset_bounds();
2456 ClearTexSubImage(texture, level, scissor.x0, scissor.y0, 0, scissor.width(),
2457 scissor.height(), 1, format, type, data);
2458 }
2459
Clear(GLbitfield mask)2460 void Clear(GLbitfield mask) {
2461 Framebuffer& fb = *get_framebuffer(GL_DRAW_FRAMEBUFFER, true);
2462 if ((mask & GL_COLOR_BUFFER_BIT) && fb.color_attachment) {
2463 Texture& t = ctx->textures[fb.color_attachment];
2464 IntRect scissor = ctx->scissortest
2465 ? ctx->scissor.intersection(t.offset_bounds())
2466 : t.offset_bounds();
2467 ClearTexSubImage(fb.color_attachment, 0, scissor.x0, scissor.y0, 0,
2468 scissor.width(), scissor.height(), 1, GL_RGBA, GL_FLOAT,
2469 ctx->clearcolor);
2470 }
2471 if ((mask & GL_DEPTH_BUFFER_BIT) && fb.depth_attachment) {
2472 Texture& t = ctx->textures[fb.depth_attachment];
2473 IntRect scissor = ctx->scissortest
2474 ? ctx->scissor.intersection(t.offset_bounds())
2475 : t.offset_bounds();
2476 ClearTexSubImage(fb.depth_attachment, 0, scissor.x0, scissor.y0, 0,
2477 scissor.width(), scissor.height(), 1, GL_DEPTH_COMPONENT,
2478 GL_DOUBLE, &ctx->cleardepth);
2479 }
2480 }
2481
ClearColorRect(GLuint fbo,GLint xoffset,GLint yoffset,GLsizei width,GLsizei height,GLfloat r,GLfloat g,GLfloat b,GLfloat a)2482 void ClearColorRect(GLuint fbo, GLint xoffset, GLint yoffset, GLsizei width,
2483 GLsizei height, GLfloat r, GLfloat g, GLfloat b,
2484 GLfloat a) {
2485 GLfloat color[] = {r, g, b, a};
2486 Framebuffer& fb = ctx->framebuffers[fbo];
2487 Texture& t = ctx->textures[fb.color_attachment];
2488 IntRect scissor =
2489 IntRect{xoffset, yoffset, xoffset + width, yoffset + height}.intersection(
2490 t.offset_bounds());
2491 ClearTexSubImage(fb.color_attachment, 0, scissor.x0, scissor.y0, 0,
2492 scissor.width(), scissor.height(), 1, GL_RGBA, GL_FLOAT,
2493 color);
2494 }
2495
InvalidateFramebuffer(GLenum target,GLsizei num_attachments,const GLenum * attachments)2496 void InvalidateFramebuffer(GLenum target, GLsizei num_attachments,
2497 const GLenum* attachments) {
2498 Framebuffer* fb = get_framebuffer(target);
2499 if (!fb || num_attachments <= 0 || !attachments) {
2500 return;
2501 }
2502 for (GLsizei i = 0; i < num_attachments; i++) {
2503 switch (attachments[i]) {
2504 case GL_DEPTH_ATTACHMENT: {
2505 Texture& t = ctx->textures[fb->depth_attachment];
2506 t.set_cleared(false);
2507 break;
2508 }
2509 case GL_COLOR_ATTACHMENT0: {
2510 Texture& t = ctx->textures[fb->color_attachment];
2511 t.disable_delayed_clear();
2512 break;
2513 }
2514 }
2515 }
2516 }
2517
ReadPixels(GLint x,GLint y,GLsizei width,GLsizei height,GLenum format,GLenum type,void * data)2518 void ReadPixels(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format,
2519 GLenum type, void* data) {
2520 data = get_pixel_pack_buffer_data(data);
2521 if (!data) return;
2522 Framebuffer* fb = get_framebuffer(GL_READ_FRAMEBUFFER);
2523 if (!fb) return;
2524 assert(format == GL_RED || format == GL_RGBA || format == GL_RGBA_INTEGER ||
2525 format == GL_BGRA || format == GL_RG);
2526 Texture& t = ctx->textures[fb->color_attachment];
2527 if (!t.buf) return;
2528 prepare_texture(t);
2529 // debugf("read pixels %d, %d, %d, %d from fb %d with format %x\n", x, y,
2530 // width, height, ctx->read_framebuffer_binding, t.internal_format);
2531 x -= t.offset.x;
2532 y -= t.offset.y;
2533 assert(x >= 0 && y >= 0);
2534 assert(x + width <= t.width);
2535 assert(y + height <= t.height);
2536 if (internal_format_for_data(format, type) != t.internal_format) {
2537 debugf("mismatched format for read pixels: %x vs %x\n", t.internal_format,
2538 internal_format_for_data(format, type));
2539 assert(false);
2540 return;
2541 }
2542 // Only support readback conversions that are reversible
2543 assert(!format_requires_conversion(format, t.internal_format) ||
2544 bytes_for_internal_format(format) == t.bpp());
2545 uint8_t* dest = (uint8_t*)data;
2546 size_t destStride = width * t.bpp();
2547 if (y < 0) {
2548 dest += -y * destStride;
2549 height += y;
2550 y = 0;
2551 }
2552 if (y + height > t.height) {
2553 height = t.height - y;
2554 }
2555 if (x < 0) {
2556 dest += -x * t.bpp();
2557 width += x;
2558 x = 0;
2559 }
2560 if (x + width > t.width) {
2561 width = t.width - x;
2562 }
2563 if (width <= 0 || height <= 0) {
2564 return;
2565 }
2566 convert_copy(format, t.internal_format, dest, destStride,
2567 (const uint8_t*)t.sample_ptr(x, y), t.stride(), width, height);
2568 }
2569
CopyImageSubData(GLuint srcName,GLenum srcTarget,UNUSED GLint srcLevel,GLint srcX,GLint srcY,GLint srcZ,GLuint dstName,GLenum dstTarget,UNUSED GLint dstLevel,GLint dstX,GLint dstY,GLint dstZ,GLsizei srcWidth,GLsizei srcHeight,GLsizei srcDepth)2570 void CopyImageSubData(GLuint srcName, GLenum srcTarget, UNUSED GLint srcLevel,
2571 GLint srcX, GLint srcY, GLint srcZ, GLuint dstName,
2572 GLenum dstTarget, UNUSED GLint dstLevel, GLint dstX,
2573 GLint dstY, GLint dstZ, GLsizei srcWidth,
2574 GLsizei srcHeight, GLsizei srcDepth) {
2575 assert(srcLevel == 0 && dstLevel == 0);
2576 assert(srcZ == 0 && srcDepth == 1 && dstZ == 0);
2577 if (srcTarget == GL_RENDERBUFFER) {
2578 Renderbuffer& rb = ctx->renderbuffers[srcName];
2579 srcName = rb.texture;
2580 }
2581 if (dstTarget == GL_RENDERBUFFER) {
2582 Renderbuffer& rb = ctx->renderbuffers[dstName];
2583 dstName = rb.texture;
2584 }
2585 Texture& srctex = ctx->textures[srcName];
2586 if (!srctex.buf) return;
2587 prepare_texture(srctex);
2588 Texture& dsttex = ctx->textures[dstName];
2589 if (!dsttex.buf) return;
2590 assert(!dsttex.locked);
2591 IntRect skip = {dstX, dstY, dstX + srcWidth, dstY + srcHeight};
2592 prepare_texture(dsttex, &skip);
2593 assert(srctex.internal_format == dsttex.internal_format);
2594 assert(srcWidth >= 0);
2595 assert(srcHeight >= 0);
2596 assert(srcX + srcWidth <= srctex.width);
2597 assert(srcY + srcHeight <= srctex.height);
2598 assert(dstX + srcWidth <= dsttex.width);
2599 assert(dstY + srcHeight <= dsttex.height);
2600 int bpp = srctex.bpp();
2601 int src_stride = srctex.stride();
2602 int dest_stride = dsttex.stride();
2603 char* dest = dsttex.sample_ptr(dstX, dstY);
2604 char* src = srctex.sample_ptr(srcX, srcY);
2605 for (int y = 0; y < srcHeight; y++) {
2606 memcpy(dest, src, srcWidth * bpp);
2607 dest += dest_stride;
2608 src += src_stride;
2609 }
2610 }
2611
CopyTexSubImage2D(GLenum target,UNUSED GLint level,GLint xoffset,GLint yoffset,GLint x,GLint y,GLsizei width,GLsizei height)2612 void CopyTexSubImage2D(GLenum target, UNUSED GLint level, GLint xoffset,
2613 GLint yoffset, GLint x, GLint y, GLsizei width,
2614 GLsizei height) {
2615 assert(level == 0);
2616 Framebuffer* fb = get_framebuffer(GL_READ_FRAMEBUFFER);
2617 if (!fb) return;
2618 CopyImageSubData(fb->color_attachment, GL_TEXTURE_2D, 0, x, y, 0,
2619 ctx->get_binding(target), GL_TEXTURE_2D, 0, xoffset, yoffset,
2620 0, width, height, 1);
2621 }
2622
2623 } // extern "C"
2624
2625 #include "blend.h"
2626 #include "composite.h"
2627 #include "swgl_ext.h"
2628
2629 #pragma GCC diagnostic push
2630 #pragma GCC diagnostic ignored "-Wuninitialized"
2631 #pragma GCC diagnostic ignored "-Wunused-function"
2632 #pragma GCC diagnostic ignored "-Wunused-parameter"
2633 #pragma GCC diagnostic ignored "-Wunused-variable"
2634 #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
2635 #ifdef __clang__
2636 # pragma GCC diagnostic ignored "-Wunused-private-field"
2637 #else
2638 # pragma GCC diagnostic ignored "-Wunused-but-set-variable"
2639 #endif
2640 #include "load_shader.h"
2641 #pragma GCC diagnostic pop
2642
2643 #include "rasterize.h"
2644
validate()2645 void VertexArray::validate() {
2646 int last_enabled = -1;
2647 for (int i = 0; i <= max_attrib; i++) {
2648 VertexAttrib& attr = attribs[i];
2649 if (attr.enabled) {
2650 // VertexArray &v = ctx->vertex_arrays[attr.vertex_array];
2651 Buffer& vertex_buf = ctx->buffers[attr.vertex_buffer];
2652 attr.buf = vertex_buf.buf;
2653 attr.buf_size = vertex_buf.size;
2654 // debugf("%d %x %d %d %d %d\n", i, attr.type, attr.size, attr.stride,
2655 // attr.offset, attr.divisor);
2656 last_enabled = i;
2657 }
2658 }
2659 max_attrib = last_enabled;
2660 }
2661
2662 extern "C" {
2663
DrawElementsInstanced(GLenum mode,GLsizei count,GLenum type,GLintptr offset,GLsizei instancecount)2664 void DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type,
2665 GLintptr offset, GLsizei instancecount) {
2666 if (offset < 0 || count <= 0 || instancecount <= 0 || !vertex_shader ||
2667 !fragment_shader) {
2668 return;
2669 }
2670
2671 Framebuffer& fb = *get_framebuffer(GL_DRAW_FRAMEBUFFER, true);
2672 if (!fb.color_attachment) {
2673 return;
2674 }
2675 Texture& colortex = ctx->textures[fb.color_attachment];
2676 if (!colortex.buf) {
2677 return;
2678 }
2679 assert(!colortex.locked);
2680 assert(colortex.internal_format == GL_RGBA8 ||
2681 colortex.internal_format == GL_R8);
2682 Texture& depthtex = ctx->textures[ctx->depthtest ? fb.depth_attachment : 0];
2683 if (depthtex.buf) {
2684 assert(depthtex.internal_format == GL_DEPTH_COMPONENT24);
2685 assert(colortex.width == depthtex.width &&
2686 colortex.height == depthtex.height);
2687 assert(colortex.offset == depthtex.offset);
2688 }
2689
2690 // debugf("current_vertex_array %d\n", ctx->current_vertex_array);
2691 // debugf("indices size: %d\n", indices_buf.size);
2692 VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
2693 if (ctx->validate_vertex_array) {
2694 ctx->validate_vertex_array = false;
2695 v.validate();
2696 }
2697
2698 #ifdef PRINT_TIMINGS
2699 uint64_t start = get_time_value();
2700 #endif
2701
2702 ctx->shaded_rows = 0;
2703 ctx->shaded_pixels = 0;
2704
2705 vertex_shader->init_batch();
2706
2707 switch (type) {
2708 case GL_UNSIGNED_SHORT:
2709 assert(mode == GL_TRIANGLES);
2710 draw_elements<uint16_t>(count, instancecount, offset, v, colortex,
2711 depthtex);
2712 break;
2713 case GL_UNSIGNED_INT:
2714 assert(mode == GL_TRIANGLES);
2715 draw_elements<uint32_t>(count, instancecount, offset, v, colortex,
2716 depthtex);
2717 break;
2718 case GL_NONE:
2719 // Non-standard GL extension - if element type is GL_NONE, then we don't
2720 // use any element buffer and behave as if DrawArrays was called instead.
2721 for (GLsizei instance = 0; instance < instancecount; instance++) {
2722 switch (mode) {
2723 case GL_LINES:
2724 for (GLsizei i = 0; i + 2 <= count; i += 2) {
2725 vertex_shader->load_attribs(v.attribs, offset + i, instance, 2);
2726 draw_quad(2, colortex, depthtex);
2727 }
2728 break;
2729 case GL_TRIANGLES:
2730 for (GLsizei i = 0; i + 3 <= count; i += 3) {
2731 vertex_shader->load_attribs(v.attribs, offset + i, instance, 3);
2732 draw_quad(3, colortex, depthtex);
2733 }
2734 break;
2735 default:
2736 assert(false);
2737 break;
2738 }
2739 }
2740 break;
2741 default:
2742 assert(false);
2743 break;
2744 }
2745
2746 if (ctx->samples_passed_query) {
2747 Query& q = ctx->queries[ctx->samples_passed_query];
2748 q.value += ctx->shaded_pixels;
2749 }
2750
2751 #ifdef PRINT_TIMINGS
2752 uint64_t end = get_time_value();
2753 printf(
2754 "%7.3fms draw(%s, %d): %d pixels in %d rows (avg %f pixels/row, "
2755 "%fns/pixel)\n",
2756 double(end - start) / (1000. * 1000.),
2757 ctx->programs[ctx->current_program].impl->get_name(), instancecount,
2758 ctx->shaded_pixels, ctx->shaded_rows,
2759 double(ctx->shaded_pixels) / ctx->shaded_rows,
2760 double(end - start) / max(ctx->shaded_pixels, 1));
2761 #endif
2762 }
2763
Finish()2764 void Finish() {
2765 #ifdef PRINT_TIMINGS
2766 printf("Finish\n");
2767 #endif
2768 }
2769
MakeCurrent(Context * c)2770 void MakeCurrent(Context* c) {
2771 if (ctx == c) {
2772 return;
2773 }
2774 ctx = c;
2775 setup_program(ctx ? ctx->current_program : 0);
2776 }
2777
CreateContext()2778 Context* CreateContext() { return new Context; }
2779
ReferenceContext(Context * c)2780 void ReferenceContext(Context* c) {
2781 if (!c) {
2782 return;
2783 }
2784 ++c->references;
2785 }
2786
DestroyContext(Context * c)2787 void DestroyContext(Context* c) {
2788 if (!c) {
2789 return;
2790 }
2791 assert(c->references > 0);
2792 --c->references;
2793 if (c->references > 0) {
2794 return;
2795 }
2796 if (ctx == c) {
2797 MakeCurrent(nullptr);
2798 }
2799 delete c;
2800 }
2801
ReportMemory(Context * ctx,size_t (* size_of_op)(void *))2802 size_t ReportMemory(Context* ctx, size_t (*size_of_op)(void*)) {
2803 size_t size = 0;
2804 if (ctx) {
2805 for (auto& t : ctx->textures) {
2806 if (t && t->should_free()) {
2807 size += size_of_op(t->buf);
2808 }
2809 }
2810 }
2811 return size;
2812 }
2813 } // extern "C"
2814