1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 #include <stdlib.h>
6 #include <stdint.h>
7 #include <string.h>
8 #include <assert.h>
9 #include <stdio.h>
10 #include <math.h>
11
12 #ifdef __MACH__
13 # include <mach/mach.h>
14 # include <mach/mach_time.h>
15 #else
16 # include <time.h>
17 #endif
18
19 #ifdef NDEBUG
20 # define debugf(...)
21 #else
22 # define debugf(...) printf(__VA_ARGS__)
23 #endif
24
25 // #define PRINT_TIMINGS
26
27 #ifdef _WIN32
28 # define ALWAYS_INLINE __forceinline
29 # define NO_INLINE __declspec(noinline)
30
31 // Including Windows.h brings a huge amount of namespace polution so just
32 // define a couple of things manually
33 typedef int BOOL;
34 # define WINAPI __stdcall
35 # define DECLSPEC_IMPORT __declspec(dllimport)
36 # define WINBASEAPI DECLSPEC_IMPORT
37 typedef unsigned long DWORD;
38 typedef long LONG;
39 typedef __int64 LONGLONG;
40 # define DUMMYSTRUCTNAME
41
42 typedef union _LARGE_INTEGER {
43 struct {
44 DWORD LowPart;
45 LONG HighPart;
46 } DUMMYSTRUCTNAME;
47 struct {
48 DWORD LowPart;
49 LONG HighPart;
50 } u;
51 LONGLONG QuadPart;
52 } LARGE_INTEGER;
53 extern "C" {
54 WINBASEAPI BOOL WINAPI
55 QueryPerformanceCounter(LARGE_INTEGER* lpPerformanceCount);
56
57 WINBASEAPI BOOL WINAPI QueryPerformanceFrequency(LARGE_INTEGER* lpFrequency);
58 }
59
60 #else
61 // GCC is slower when dealing with always_inline, especially in debug builds.
62 // When using Clang, use always_inline more aggressively.
63 # if defined(__clang__) || defined(NDEBUG)
64 # define ALWAYS_INLINE __attribute__((always_inline)) inline
65 # else
66 # define ALWAYS_INLINE inline
67 # endif
68 # define NO_INLINE __attribute__((noinline))
69 #endif
70
71 // Some functions may cause excessive binary bloat if inlined in debug or with
72 // GCC builds, so use PREFER_INLINE on these instead of ALWAYS_INLINE.
73 #if defined(__clang__) && defined(NDEBUG)
74 # define PREFER_INLINE ALWAYS_INLINE
75 #else
76 # define PREFER_INLINE inline
77 #endif
78
79 #define UNREACHABLE __builtin_unreachable()
80
81 #define UNUSED [[maybe_unused]]
82
83 #define FALLTHROUGH [[fallthrough]]
84
85 #if defined(MOZILLA_CLIENT) && defined(MOZ_CLANG_PLUGIN)
86 # define IMPLICIT __attribute__((annotate("moz_implicit")))
87 #else
88 # define IMPLICIT
89 #endif
90
91 #include "gl_defs.h"
92 #include "glsl.h"
93 #include "program.h"
94 #include "texture.h"
95
96 using namespace glsl;
97
98 typedef ivec2_scalar IntPoint;
99
100 struct IntRect {
101 int x0;
102 int y0;
103 int x1;
104 int y1;
105
IntRectIntRect106 IntRect() : x0(0), y0(0), x1(0), y1(0) {}
IntRectIntRect107 IntRect(int x0, int y0, int x1, int y1) : x0(x0), y0(y0), x1(x1), y1(y1) {}
IntRectIntRect108 IntRect(IntPoint origin, IntPoint size)
109 : x0(origin.x),
110 y0(origin.y),
111 x1(origin.x + size.x),
112 y1(origin.y + size.y) {}
113
widthIntRect114 int width() const { return x1 - x0; }
heightIntRect115 int height() const { return y1 - y0; }
is_emptyIntRect116 bool is_empty() const { return width() <= 0 || height() <= 0; }
117
originIntRect118 IntPoint origin() const { return IntPoint(x0, y0); }
119
same_sizeIntRect120 bool same_size(const IntRect& o) const {
121 return width() == o.width() && height() == o.height();
122 }
123
containsIntRect124 bool contains(const IntRect& o) const {
125 return o.x0 >= x0 && o.y0 >= y0 && o.x1 <= x1 && o.y1 <= y1;
126 }
127
intersectIntRect128 IntRect& intersect(const IntRect& o) {
129 x0 = max(x0, o.x0);
130 y0 = max(y0, o.y0);
131 x1 = min(x1, o.x1);
132 y1 = min(y1, o.y1);
133 return *this;
134 }
135
intersectionIntRect136 IntRect intersection(const IntRect& o) {
137 IntRect result = *this;
138 result.intersect(o);
139 return result;
140 }
141
142 // Scale from source-space to dest-space, optionally rounding inward
scaleIntRect143 IntRect& scale(int srcWidth, int srcHeight, int dstWidth, int dstHeight,
144 bool roundIn = false) {
145 x0 = (x0 * dstWidth + (roundIn ? srcWidth - 1 : 0)) / srcWidth;
146 y0 = (y0 * dstHeight + (roundIn ? srcHeight - 1 : 0)) / srcHeight;
147 x1 = (x1 * dstWidth) / srcWidth;
148 y1 = (y1 * dstHeight) / srcHeight;
149 return *this;
150 }
151
152 // Flip the rect's Y coords around inflection point at Y=offset
invert_yIntRect153 void invert_y(int offset) {
154 y0 = offset - y0;
155 y1 = offset - y1;
156 swap(y0, y1);
157 }
158
offsetIntRect159 IntRect& offset(const IntPoint& o) {
160 x0 += o.x;
161 y0 += o.y;
162 x1 += o.x;
163 y1 += o.y;
164 return *this;
165 }
166
operator +IntRect167 IntRect operator+(const IntPoint& o) const {
168 return IntRect(*this).offset(o);
169 }
operator -IntRect170 IntRect operator-(const IntPoint& o) const {
171 return IntRect(*this).offset(-o);
172 }
173 };
174
175 typedef vec2_scalar Point2D;
176 typedef vec4_scalar Point3D;
177
178 struct IntRange {
179 int start;
180 int end;
181
lenIntRange182 int len() const { return end - start; }
183
intersectIntRange184 IntRange intersect(IntRange r) const {
185 return {max(start, r.start), min(end, r.end)};
186 }
187 };
188
189 struct FloatRange {
190 float start;
191 float end;
192
clipFloatRange193 float clip(float x) const { return clamp(x, start, end); }
194
clipFloatRange195 FloatRange clip(FloatRange r) const { return {clip(r.start), clip(r.end)}; }
196
mergeFloatRange197 FloatRange merge(FloatRange r) const {
198 return {min(start, r.start), max(end, r.end)};
199 }
200
roundFloatRange201 IntRange round() const {
202 return {int(floor(start + 0.5f)), int(floor(end + 0.5f))};
203 }
204
round_outFloatRange205 IntRange round_out() const { return {int(floor(start)), int(ceil(end))}; }
206 };
207
208 template <typename P>
x_range(P p0,P p1)209 static inline FloatRange x_range(P p0, P p1) {
210 return {min(p0.x, p1.x), max(p0.x, p1.x)};
211 }
212
213 struct VertexAttrib {
214 size_t size = 0; // in bytes
215 GLenum type = 0;
216 bool normalized = false;
217 GLsizei stride = 0;
218 GLuint offset = 0;
219 bool enabled = false;
220 GLuint divisor = 0;
221 int vertex_array = 0;
222 int vertex_buffer = 0;
223 char* buf = nullptr; // XXX: this can easily dangle
224 size_t buf_size = 0; // this will let us bounds check
225
226 // Mark the buffer as invalid so we don't accidentally use stale data.
disableVertexAttrib227 void disable() {
228 enabled = false;
229 buf = nullptr;
230 buf_size = 0;
231 }
232 };
233
bytes_for_internal_format(GLenum internal_format)234 static int bytes_for_internal_format(GLenum internal_format) {
235 switch (internal_format) {
236 case GL_RGBA32F:
237 return 4 * 4;
238 case GL_RGBA32I:
239 return 4 * 4;
240 case GL_RGBA8:
241 case GL_BGRA8:
242 case GL_RGBA:
243 return 4;
244 case GL_R8:
245 case GL_RED:
246 return 1;
247 case GL_RG8:
248 case GL_RG:
249 return 2;
250 case GL_DEPTH_COMPONENT:
251 case GL_DEPTH_COMPONENT16:
252 case GL_DEPTH_COMPONENT24:
253 case GL_DEPTH_COMPONENT32:
254 return 4;
255 case GL_RGB_RAW_422_APPLE:
256 return 2;
257 case GL_R16:
258 return 2;
259 case GL_RG16:
260 return 4;
261 default:
262 debugf("internal format: %x\n", internal_format);
263 assert(0);
264 return 0;
265 }
266 }
267
aligned_stride(int row_bytes)268 static inline int aligned_stride(int row_bytes) { return (row_bytes + 3) & ~3; }
269
gl_format_to_texture_format(int type)270 static TextureFormat gl_format_to_texture_format(int type) {
271 switch (type) {
272 case GL_RGBA32F:
273 return TextureFormat::RGBA32F;
274 case GL_RGBA32I:
275 return TextureFormat::RGBA32I;
276 case GL_RGBA8:
277 return TextureFormat::RGBA8;
278 case GL_R8:
279 return TextureFormat::R8;
280 case GL_RG8:
281 return TextureFormat::RG8;
282 case GL_R16:
283 return TextureFormat::R16;
284 case GL_RG16:
285 return TextureFormat::RG16;
286 case GL_RGB_RAW_422_APPLE:
287 return TextureFormat::YUV422;
288 default:
289 assert(0);
290 return TextureFormat::RGBA8;
291 }
292 }
293
294 struct Query {
295 uint64_t value = 0;
296 };
297
298 struct Buffer {
299 char* buf = nullptr;
300 size_t size = 0;
301 size_t capacity = 0;
302
303 // Returns true if re-allocation succeeded, false otherwise...
allocateBuffer304 bool allocate(size_t new_size) {
305 // If the size remains unchanged, don't allocate anything.
306 if (new_size == size) {
307 return true;
308 }
309 // If the new size is within the existing capacity of the buffer, just
310 // reuse the existing buffer.
311 if (new_size <= capacity) {
312 size = new_size;
313 return true;
314 }
315 // Otherwise we need to reallocate the buffer to hold up to the requested
316 // larger size.
317 char* new_buf = (char*)realloc(buf, new_size);
318 assert(new_buf);
319 if (!new_buf) {
320 // If we fail, null out the buffer rather than leave around the old
321 // allocation state.
322 cleanup();
323 return false;
324 }
325 // The reallocation succeeded, so install the buffer.
326 buf = new_buf;
327 size = new_size;
328 capacity = new_size;
329 return true;
330 }
331
cleanupBuffer332 void cleanup() {
333 if (buf) {
334 free(buf);
335 buf = nullptr;
336 size = 0;
337 capacity = 0;
338 }
339 }
340
~BufferBuffer341 ~Buffer() { cleanup(); }
342 };
343
344 struct Framebuffer {
345 GLuint color_attachment = 0;
346 GLuint depth_attachment = 0;
347 };
348
349 struct Renderbuffer {
350 GLuint texture = 0;
351
352 void on_erase();
353 };
354
gl_filter_to_texture_filter(int type)355 TextureFilter gl_filter_to_texture_filter(int type) {
356 switch (type) {
357 case GL_NEAREST:
358 return TextureFilter::NEAREST;
359 case GL_NEAREST_MIPMAP_LINEAR:
360 return TextureFilter::NEAREST;
361 case GL_NEAREST_MIPMAP_NEAREST:
362 return TextureFilter::NEAREST;
363 case GL_LINEAR:
364 return TextureFilter::LINEAR;
365 case GL_LINEAR_MIPMAP_LINEAR:
366 return TextureFilter::LINEAR;
367 case GL_LINEAR_MIPMAP_NEAREST:
368 return TextureFilter::LINEAR;
369 default:
370 assert(0);
371 return TextureFilter::NEAREST;
372 }
373 }
374
375 struct Texture {
376 GLenum internal_format = 0;
377 int width = 0;
378 int height = 0;
379 char* buf = nullptr;
380 size_t buf_size = 0;
381 uint32_t buf_stride = 0;
382 uint8_t buf_bpp = 0;
383 GLenum min_filter = GL_NEAREST;
384 GLenum mag_filter = GL_LINEAR;
385 // The number of active locks on this texture. If this texture has any active
386 // locks, we need to disallow modifying or destroying the texture as it may
387 // be accessed by other threads where modifications could lead to races.
388 int32_t locked = 0;
389 // When used as an attachment of a framebuffer, rendering to the texture
390 // behaves as if it is located at the given offset such that the offset is
391 // subtracted from all transformed vertexes after the viewport is applied.
392 IntPoint offset;
393
394 enum FLAGS {
395 // If the buffer is internally-allocated by SWGL
396 SHOULD_FREE = 1 << 1,
397 // If the buffer has been cleared to initialize it. Currently this is only
398 // utilized by depth buffers which need to know when depth runs have reset
399 // to a valid row state. When unset, the depth runs may contain garbage.
400 CLEARED = 1 << 2,
401 };
402 int flags = SHOULD_FREE;
should_freeTexture403 bool should_free() const { return bool(flags & SHOULD_FREE); }
clearedTexture404 bool cleared() const { return bool(flags & CLEARED); }
405
set_flagTexture406 void set_flag(int flag, bool val) {
407 if (val) {
408 flags |= flag;
409 } else {
410 flags &= ~flag;
411 }
412 }
set_should_freeTexture413 void set_should_free(bool val) {
414 // buf must be null before SHOULD_FREE can be safely toggled. Otherwise, we
415 // might accidentally mistakenly realloc an externally allocated buffer as
416 // if it were an internally allocated one.
417 assert(!buf);
418 set_flag(SHOULD_FREE, val);
419 }
set_clearedTexture420 void set_cleared(bool val) { set_flag(CLEARED, val); }
421
422 // Delayed-clearing state. When a clear of an FB is requested, we don't
423 // immediately clear each row, as the rows may be subsequently overwritten
424 // by draw calls, allowing us to skip the work of clearing the affected rows
425 // either fully or partially. Instead, we keep a bit vector of rows that need
426 // to be cleared later and save the value they need to be cleared with so
427 // that we can clear these rows individually when they are touched by draws.
428 // This currently only works for 2D textures, but not on texture arrays.
429 int delay_clear = 0;
430 uint32_t clear_val = 0;
431 uint32_t* cleared_rows = nullptr;
432
433 void init_depth_runs(uint32_t z);
434 void fill_depth_runs(uint32_t z, const IntRect& scissor);
435
enable_delayed_clearTexture436 void enable_delayed_clear(uint32_t val) {
437 delay_clear = height;
438 clear_val = val;
439 if (!cleared_rows) {
440 cleared_rows = new uint32_t[(height + 31) / 32];
441 }
442 memset(cleared_rows, 0, ((height + 31) / 32) * sizeof(uint32_t));
443 if (height & 31) {
444 cleared_rows[height / 32] = ~0U << (height & 31);
445 }
446 }
447
disable_delayed_clearTexture448 void disable_delayed_clear() {
449 if (cleared_rows) {
450 delete[] cleared_rows;
451 cleared_rows = nullptr;
452 delay_clear = 0;
453 }
454 }
455
bppTexture456 int bpp() const { return buf_bpp; }
set_bppTexture457 void set_bpp() { buf_bpp = bytes_for_internal_format(internal_format); }
458
strideTexture459 size_t stride() const { return buf_stride; }
set_strideTexture460 void set_stride() { buf_stride = aligned_stride(buf_bpp * width); }
461
462 // Set an external backing buffer of this texture.
set_bufferTexture463 void set_buffer(void* new_buf, size_t new_stride) {
464 assert(!should_free());
465 // Ensure that the supplied stride is at least as big as the row data and
466 // is aligned to the smaller of either the BPP or word-size. We need to at
467 // least be able to sample data from within a row and sample whole pixels
468 // of smaller formats without risking unaligned access.
469 set_bpp();
470 set_stride();
471 assert(new_stride >= size_t(bpp() * width) &&
472 new_stride % min(bpp(), sizeof(uint32_t)) == 0);
473
474 buf = (char*)new_buf;
475 buf_size = 0;
476 buf_stride = new_stride;
477 }
478
479 // Returns true if re-allocation succeeded, false otherwise...
allocateTexture480 bool allocate(bool force = false, int min_width = 0, int min_height = 0) {
481 assert(!locked); // Locked textures shouldn't be reallocated
482 // If we get here, some GL API call that invalidates the texture was used.
483 // Mark the buffer as not-cleared to signal this.
484 set_cleared(false);
485 // Check if there is either no buffer currently or if we forced validation
486 // of the buffer size because some dimension might have changed.
487 if ((!buf || force) && should_free()) {
488 // Initialize the buffer's BPP and stride, since they may have changed.
489 set_bpp();
490 set_stride();
491 // Compute new size based on the maximum potential stride, rather than
492 // the current stride, to hopefully avoid reallocations when size would
493 // otherwise change too much...
494 size_t max_stride = max(buf_stride, aligned_stride(buf_bpp * min_width));
495 size_t size = max_stride * max(height, min_height);
496 if ((!buf && size > 0) || size > buf_size) {
497 // Allocate with a SIMD register-sized tail of padding at the end so we
498 // can safely read or write past the end of the texture with SIMD ops.
499 // Currently only the flat Z-buffer texture needs this padding due to
500 // full-register loads and stores in check_depth and discard_depth. In
501 // case some code in the future accidentally uses a linear filter on a
502 // texture with less than 2 pixels per row, we also add this padding
503 // just to be safe. All other texture types and use-cases should be
504 // safe to omit padding.
505 size_t padding =
506 internal_format == GL_DEPTH_COMPONENT24 || max(width, min_width) < 2
507 ? sizeof(Float)
508 : 0;
509 char* new_buf = (char*)realloc(buf, size + padding);
510 assert(new_buf);
511 if (new_buf) {
512 // Successfully reallocated the buffer, so go ahead and set it.
513 buf = new_buf;
514 buf_size = size;
515 return true;
516 }
517 // Allocation failed, so ensure we don't leave stale buffer state.
518 cleanup();
519 return false;
520 }
521 }
522 // Nothing changed...
523 return true;
524 }
525
cleanupTexture526 void cleanup() {
527 assert(!locked); // Locked textures shouldn't be destroyed
528 if (buf) {
529 // If we need to toggle SHOULD_FREE state, ensure that buf is nulled out,
530 // regardless of whether we internally allocated it. This will prevent us
531 // from wrongly treating buf as having been internally allocated for when
532 // we go to realloc if it actually was externally allocted.
533 if (should_free()) {
534 free(buf);
535 }
536 buf = nullptr;
537 buf_size = 0;
538 buf_bpp = 0;
539 buf_stride = 0;
540 }
541 disable_delayed_clear();
542 }
543
~TextureTexture544 ~Texture() { cleanup(); }
545
boundsTexture546 IntRect bounds() const { return IntRect{0, 0, width, height}; }
offset_boundsTexture547 IntRect offset_bounds() const { return bounds() + offset; }
548
549 // Find the valid sampling bounds relative to the requested region
sample_boundsTexture550 IntRect sample_bounds(const IntRect& req, bool invertY = false) const {
551 IntRect bb = bounds().intersect(req) - req.origin();
552 if (invertY) bb.invert_y(req.height());
553 return bb;
554 }
555
556 // Get a pointer for sampling at the given offset
sample_ptrTexture557 char* sample_ptr(int x, int y) const {
558 return buf + y * stride() + x * bpp();
559 }
560
561 // Get a pointer for sampling the requested region and limit to the provided
562 // sampling bounds
sample_ptrTexture563 char* sample_ptr(const IntRect& req, const IntRect& bounds,
564 bool invertY = false) const {
565 // Offset the sample pointer by the clamped bounds
566 int x = req.x0 + bounds.x0;
567 // Invert the Y offset if necessary
568 int y = invertY ? req.y1 - 1 - bounds.y0 : req.y0 + bounds.y0;
569 return sample_ptr(x, y);
570 }
571 };
572
573 // The last vertex attribute is reserved as a null attribute in case a vertex
574 // attribute is used without being set.
575 #define MAX_ATTRIBS 17
576 #define NULL_ATTRIB 16
577 struct VertexArray {
578 VertexAttrib attribs[MAX_ATTRIBS];
579 int max_attrib = -1;
580 // The GL spec defines element array buffer binding to be part of VAO state.
581 GLuint element_array_buffer_binding = 0;
582
583 void validate();
584 };
585
586 struct Shader {
587 GLenum type = 0;
588 ProgramLoader loader = nullptr;
589 };
590
591 struct Program {
592 ProgramImpl* impl = nullptr;
593 VertexShaderImpl* vert_impl = nullptr;
594 FragmentShaderImpl* frag_impl = nullptr;
595 bool deleted = false;
596
~ProgramProgram597 ~Program() { delete impl; }
598 };
599
600 // clang-format off
601 // Fully-expand GL defines while ignoring more than 4 suffixes
602 #define CONCAT_KEY(prefix, x, y, z, w, ...) prefix##x##y##z##w
603 // Generate a blend key enum symbol
604 #define BLEND_KEY(...) CONCAT_KEY(BLEND_, __VA_ARGS__, 0, 0, 0)
605 #define MASK_BLEND_KEY(...) CONCAT_KEY(MASK_BLEND_, __VA_ARGS__, 0, 0, 0)
606 #define AA_BLEND_KEY(...) CONCAT_KEY(AA_BLEND_, __VA_ARGS__, 0, 0, 0)
607 #define AA_MASK_BLEND_KEY(...) CONCAT_KEY(AA_MASK_BLEND_, __VA_ARGS__, 0, 0, 0)
608
609 // Utility macro to easily generate similar code for all implemented blend modes
610 #define FOR_EACH_BLEND_KEY(macro) \
611 macro(GL_ONE, GL_ZERO, 0, 0) \
612 macro(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA) \
613 macro(GL_ONE, GL_ONE_MINUS_SRC_ALPHA, 0, 0) \
614 macro(GL_ZERO, GL_ONE_MINUS_SRC_COLOR, 0, 0) \
615 macro(GL_ZERO, GL_ONE_MINUS_SRC_COLOR, GL_ZERO, GL_ONE) \
616 macro(GL_ZERO, GL_ONE_MINUS_SRC_ALPHA, 0, 0) \
617 macro(GL_ZERO, GL_SRC_COLOR, 0, 0) \
618 macro(GL_ONE, GL_ONE, 0, 0) \
619 macro(GL_ONE, GL_ONE, GL_ONE, GL_ONE_MINUS_SRC_ALPHA) \
620 macro(GL_ONE_MINUS_DST_ALPHA, GL_ONE, GL_ZERO, GL_ONE) \
621 macro(GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_COLOR, 0, 0) \
622 macro(GL_ONE, GL_ONE_MINUS_SRC1_COLOR, 0, 0) \
623 macro(GL_MIN, 0, 0, 0) \
624 macro(GL_MAX, 0, 0, 0) \
625 macro(GL_MULTIPLY_KHR, 0, 0, 0) \
626 macro(GL_SCREEN_KHR, 0, 0, 0) \
627 macro(GL_OVERLAY_KHR, 0, 0, 0) \
628 macro(GL_DARKEN_KHR, 0, 0, 0) \
629 macro(GL_LIGHTEN_KHR, 0, 0, 0) \
630 macro(GL_COLORDODGE_KHR, 0, 0, 0) \
631 macro(GL_COLORBURN_KHR, 0, 0, 0) \
632 macro(GL_HARDLIGHT_KHR, 0, 0, 0) \
633 macro(GL_SOFTLIGHT_KHR, 0, 0, 0) \
634 macro(GL_DIFFERENCE_KHR, 0, 0, 0) \
635 macro(GL_EXCLUSION_KHR, 0, 0, 0) \
636 macro(GL_HSL_HUE_KHR, 0, 0, 0) \
637 macro(GL_HSL_SATURATION_KHR, 0, 0, 0) \
638 macro(GL_HSL_COLOR_KHR, 0, 0, 0) \
639 macro(GL_HSL_LUMINOSITY_KHR, 0, 0, 0) \
640 macro(SWGL_BLEND_DROP_SHADOW, 0, 0, 0) \
641 macro(SWGL_BLEND_SUBPIXEL_TEXT, 0, 0, 0)
642
643 #define DEFINE_BLEND_KEY(...) BLEND_KEY(__VA_ARGS__),
644 #define DEFINE_MASK_BLEND_KEY(...) MASK_BLEND_KEY(__VA_ARGS__),
645 #define DEFINE_AA_BLEND_KEY(...) AA_BLEND_KEY(__VA_ARGS__),
646 #define DEFINE_AA_MASK_BLEND_KEY(...) AA_MASK_BLEND_KEY(__VA_ARGS__),
647 enum BlendKey : uint8_t {
648 FOR_EACH_BLEND_KEY(DEFINE_BLEND_KEY)
649 FOR_EACH_BLEND_KEY(DEFINE_MASK_BLEND_KEY)
650 FOR_EACH_BLEND_KEY(DEFINE_AA_BLEND_KEY)
651 FOR_EACH_BLEND_KEY(DEFINE_AA_MASK_BLEND_KEY)
652 BLEND_KEY_NONE = BLEND_KEY(GL_ONE, GL_ZERO),
653 MASK_BLEND_KEY_NONE = MASK_BLEND_KEY(GL_ONE, GL_ZERO),
654 AA_BLEND_KEY_NONE = AA_BLEND_KEY(GL_ONE, GL_ZERO),
655 AA_MASK_BLEND_KEY_NONE = AA_MASK_BLEND_KEY(GL_ONE, GL_ZERO),
656 };
657 // clang-format on
658
659 const size_t MAX_TEXTURE_UNITS = 16;
660
661 template <typename T>
unlink(T & binding,T n)662 static inline bool unlink(T& binding, T n) {
663 if (binding == n) {
664 binding = 0;
665 return true;
666 }
667 return false;
668 }
669
670 template <typename O>
671 struct ObjectStore {
672 O** objects = nullptr;
673 size_t size = 0;
674 // reserve object 0 as null
675 size_t first_free = 1;
676 O invalid;
677
~ObjectStoreObjectStore678 ~ObjectStore() {
679 if (objects) {
680 for (size_t i = 0; i < size; i++) delete objects[i];
681 free(objects);
682 }
683 }
684
growObjectStore685 bool grow(size_t i) {
686 size_t new_size = size ? size : 8;
687 while (new_size <= i) new_size += new_size / 2;
688 O** new_objects = (O**)realloc(objects, new_size * sizeof(O*));
689 assert(new_objects);
690 if (!new_objects) return false;
691 while (size < new_size) new_objects[size++] = nullptr;
692 objects = new_objects;
693 return true;
694 }
695
insertObjectStore696 void insert(size_t i, const O& o) {
697 if (i >= size && !grow(i)) return;
698 if (!objects[i]) objects[i] = new O(o);
699 }
700
next_freeObjectStore701 size_t next_free() {
702 size_t i = first_free;
703 while (i < size && objects[i]) i++;
704 first_free = i;
705 return i;
706 }
707
insertObjectStore708 size_t insert(const O& o = O()) {
709 size_t i = next_free();
710 insert(i, o);
711 return i;
712 }
713
operator []ObjectStore714 O& operator[](size_t i) {
715 insert(i, O());
716 return i < size ? *objects[i] : invalid;
717 }
718
findObjectStore719 O* find(size_t i) const { return i < size ? objects[i] : nullptr; }
720
721 template <typename T>
on_eraseObjectStore722 void on_erase(T*, ...) {}
723 template <typename T>
on_eraseObjectStore724 void on_erase(T* o, decltype(&T::on_erase)) {
725 o->on_erase();
726 }
727
eraseObjectStore728 bool erase(size_t i) {
729 if (i < size && objects[i]) {
730 on_erase(objects[i], nullptr);
731 delete objects[i];
732 objects[i] = nullptr;
733 if (i < first_free) first_free = i;
734 return true;
735 }
736 return false;
737 }
738
beginObjectStore739 O** begin() const { return objects; }
endObjectStore740 O** end() const { return &objects[size]; }
741 };
742
743 struct Context {
744 int32_t references = 1;
745
746 ObjectStore<Query> queries;
747 ObjectStore<Buffer> buffers;
748 ObjectStore<Texture> textures;
749 ObjectStore<VertexArray> vertex_arrays;
750 ObjectStore<Framebuffer> framebuffers;
751 ObjectStore<Renderbuffer> renderbuffers;
752 ObjectStore<Shader> shaders;
753 ObjectStore<Program> programs;
754
755 GLenum last_error = GL_NO_ERROR;
756
757 IntRect viewport = {0, 0, 0, 0};
758
759 bool blend = false;
760 GLenum blendfunc_srgb = GL_ONE;
761 GLenum blendfunc_drgb = GL_ZERO;
762 GLenum blendfunc_sa = GL_ONE;
763 GLenum blendfunc_da = GL_ZERO;
764 GLenum blend_equation = GL_FUNC_ADD;
765 V8<uint16_t> blendcolor = 0;
766 BlendKey blend_key = BLEND_KEY_NONE;
767
768 bool depthtest = false;
769 bool depthmask = true;
770 GLenum depthfunc = GL_LESS;
771
772 bool scissortest = false;
773 IntRect scissor = {0, 0, 0, 0};
774
775 GLfloat clearcolor[4] = {0, 0, 0, 0};
776 GLdouble cleardepth = 1;
777
778 int unpack_row_length = 0;
779
780 int shaded_rows = 0;
781 int shaded_pixels = 0;
782
783 struct TextureUnit {
784 GLuint texture_2d_binding = 0;
785 GLuint texture_rectangle_binding = 0;
786
unlinkContext::TextureUnit787 void unlink(GLuint n) {
788 ::unlink(texture_2d_binding, n);
789 ::unlink(texture_rectangle_binding, n);
790 }
791 };
792 TextureUnit texture_units[MAX_TEXTURE_UNITS];
793 int active_texture_unit = 0;
794
795 GLuint current_program = 0;
796
797 GLuint current_vertex_array = 0;
798 bool validate_vertex_array = true;
799
800 GLuint pixel_pack_buffer_binding = 0;
801 GLuint pixel_unpack_buffer_binding = 0;
802 GLuint array_buffer_binding = 0;
803 GLuint time_elapsed_query = 0;
804 GLuint samples_passed_query = 0;
805 GLuint renderbuffer_binding = 0;
806 GLuint draw_framebuffer_binding = 0;
807 GLuint read_framebuffer_binding = 0;
808 GLuint unknown_binding = 0;
809
get_bindingContext810 GLuint& get_binding(GLenum name) {
811 switch (name) {
812 case GL_PIXEL_PACK_BUFFER:
813 return pixel_pack_buffer_binding;
814 case GL_PIXEL_UNPACK_BUFFER:
815 return pixel_unpack_buffer_binding;
816 case GL_ARRAY_BUFFER:
817 return array_buffer_binding;
818 case GL_ELEMENT_ARRAY_BUFFER:
819 return vertex_arrays[current_vertex_array].element_array_buffer_binding;
820 case GL_TEXTURE_2D:
821 return texture_units[active_texture_unit].texture_2d_binding;
822 case GL_TEXTURE_RECTANGLE:
823 return texture_units[active_texture_unit].texture_rectangle_binding;
824 case GL_TIME_ELAPSED:
825 return time_elapsed_query;
826 case GL_SAMPLES_PASSED:
827 return samples_passed_query;
828 case GL_RENDERBUFFER:
829 return renderbuffer_binding;
830 case GL_DRAW_FRAMEBUFFER:
831 return draw_framebuffer_binding;
832 case GL_READ_FRAMEBUFFER:
833 return read_framebuffer_binding;
834 default:
835 debugf("unknown binding %x\n", name);
836 assert(false);
837 return unknown_binding;
838 }
839 }
840
get_textureContext841 Texture& get_texture(sampler2D, int unit) {
842 return textures[texture_units[unit].texture_2d_binding];
843 }
844
get_textureContext845 Texture& get_texture(isampler2D, int unit) {
846 return textures[texture_units[unit].texture_2d_binding];
847 }
848
get_textureContext849 Texture& get_texture(sampler2DRect, int unit) {
850 return textures[texture_units[unit].texture_rectangle_binding];
851 }
852
apply_scissorContext853 IntRect apply_scissor(IntRect bb,
854 const IntPoint& origin = IntPoint(0, 0)) const {
855 return scissortest ? bb.intersect(scissor - origin) : bb;
856 }
857
apply_scissorContext858 IntRect apply_scissor(const Texture& t) const {
859 return apply_scissor(t.bounds(), t.offset);
860 }
861 };
862 static Context* ctx = nullptr;
863 static VertexShaderImpl* vertex_shader = nullptr;
864 static FragmentShaderImpl* fragment_shader = nullptr;
865 static BlendKey blend_key = BLEND_KEY_NONE;
866
867 static void prepare_texture(Texture& t, const IntRect* skip = nullptr);
868
869 template <typename S>
init_filter(S * s,Texture & t)870 static inline void init_filter(S* s, Texture& t) {
871 // If the width is not at least 2 pixels, then we can't safely sample the end
872 // of the row with a linear filter. In that case, just punt to using nearest
873 // filtering instead.
874 s->filter = t.width >= 2 ? gl_filter_to_texture_filter(t.mag_filter)
875 : TextureFilter::NEAREST;
876 }
877
878 template <typename S>
init_sampler(S * s,Texture & t)879 static inline void init_sampler(S* s, Texture& t) {
880 prepare_texture(t);
881 s->width = t.width;
882 s->height = t.height;
883 s->stride = t.stride();
884 int bpp = t.bpp();
885 if (bpp >= 4)
886 s->stride /= 4;
887 else if (bpp == 2)
888 s->stride /= 2;
889 else
890 assert(bpp == 1);
891 // Use uint32_t* for easier sampling, but need to cast to uint8_t* or
892 // uint16_t* for formats with bpp < 4.
893 s->buf = (uint32_t*)t.buf;
894 s->format = gl_format_to_texture_format(t.internal_format);
895 }
896
897 template <typename S>
null_sampler(S * s)898 static inline void null_sampler(S* s) {
899 // For null texture data, just make the sampler provide a 1x1 buffer that is
900 // transparent black. Ensure buffer holds at least a SIMD vector of zero data
901 // for SIMD padding of unaligned loads.
902 static const uint32_t zeroBuf[sizeof(Float) / sizeof(uint32_t)] = {0};
903 s->width = 1;
904 s->height = 1;
905 s->stride = s->width;
906 s->buf = (uint32_t*)zeroBuf;
907 s->format = TextureFormat::RGBA8;
908 }
909
910 template <typename S>
null_filter(S * s)911 static inline void null_filter(S* s) {
912 s->filter = TextureFilter::NEAREST;
913 }
914
915 template <typename S>
lookup_sampler(S * s,int texture)916 S* lookup_sampler(S* s, int texture) {
917 Texture& t = ctx->get_texture(s, texture);
918 if (!t.buf) {
919 null_sampler(s);
920 null_filter(s);
921 } else {
922 init_sampler(s, t);
923 init_filter(s, t);
924 }
925 return s;
926 }
927
928 template <typename S>
lookup_isampler(S * s,int texture)929 S* lookup_isampler(S* s, int texture) {
930 Texture& t = ctx->get_texture(s, texture);
931 if (!t.buf) {
932 null_sampler(s);
933 } else {
934 init_sampler(s, t);
935 }
936 return s;
937 }
938
bytes_per_type(GLenum type)939 int bytes_per_type(GLenum type) {
940 switch (type) {
941 case GL_INT:
942 return 4;
943 case GL_FLOAT:
944 return 4;
945 case GL_UNSIGNED_SHORT:
946 return 2;
947 case GL_UNSIGNED_BYTE:
948 return 1;
949 default:
950 assert(0);
951 return 0;
952 }
953 }
954
955 template <typename S, typename C>
expand_attrib(const char * buf,size_t size,bool normalized)956 static inline S expand_attrib(const char* buf, size_t size, bool normalized) {
957 typedef typename ElementType<S>::ty elem_type;
958 S scalar = {0};
959 const C* src = reinterpret_cast<const C*>(buf);
960 if (normalized) {
961 const float scale = 1.0f / ((1 << (8 * sizeof(C))) - 1);
962 for (size_t i = 0; i < size / sizeof(C); i++) {
963 put_nth_component(scalar, i, elem_type(src[i]) * scale);
964 }
965 } else {
966 for (size_t i = 0; i < size / sizeof(C); i++) {
967 put_nth_component(scalar, i, elem_type(src[i]));
968 }
969 }
970 return scalar;
971 }
972
973 template <typename S>
load_attrib_scalar(VertexAttrib & va,const char * src)974 static inline S load_attrib_scalar(VertexAttrib& va, const char* src) {
975 if (sizeof(S) <= va.size) {
976 return *reinterpret_cast<const S*>(src);
977 }
978 if (va.type == GL_UNSIGNED_SHORT) {
979 return expand_attrib<S, uint16_t>(src, va.size, va.normalized);
980 }
981 if (va.type == GL_UNSIGNED_BYTE) {
982 return expand_attrib<S, uint8_t>(src, va.size, va.normalized);
983 }
984 assert(sizeof(typename ElementType<S>::ty) == bytes_per_type(va.type));
985 S scalar = {0};
986 memcpy(&scalar, src, va.size);
987 return scalar;
988 }
989
990 template <typename T>
load_attrib(T & attrib,VertexAttrib & va,uint32_t start,int instance,int count)991 void load_attrib(T& attrib, VertexAttrib& va, uint32_t start, int instance,
992 int count) {
993 typedef decltype(force_scalar(attrib)) scalar_type;
994 // If no buffer is available, just use a zero default.
995 if (!va.buf_size) {
996 attrib = T(scalar_type{0});
997 } else if (va.divisor != 0) {
998 char* src = (char*)va.buf + va.stride * instance + va.offset;
999 assert(src + va.size <= va.buf + va.buf_size);
1000 attrib = T(load_attrib_scalar<scalar_type>(va, src));
1001 } else {
1002 // Specialized for WR's primitive vertex order/winding.
1003 if (!count) return;
1004 assert(count >= 2 && count <= 4);
1005 char* src = (char*)va.buf + va.stride * start + va.offset;
1006 switch (count) {
1007 case 2: {
1008 // Lines must be indexed at offsets 0, 1.
1009 // Line vertexes fill vertex shader SIMD lanes as 0, 1, 1, 0.
1010 scalar_type lanes[2] = {
1011 load_attrib_scalar<scalar_type>(va, src),
1012 load_attrib_scalar<scalar_type>(va, src + va.stride)};
1013 attrib = (T){lanes[0], lanes[1], lanes[1], lanes[0]};
1014 break;
1015 }
1016 case 3: {
1017 // Triangles must be indexed at offsets 0, 1, 2.
1018 // Triangle vertexes fill vertex shader SIMD lanes as 0, 1, 2, 2.
1019 scalar_type lanes[3] = {
1020 load_attrib_scalar<scalar_type>(va, src),
1021 load_attrib_scalar<scalar_type>(va, src + va.stride),
1022 load_attrib_scalar<scalar_type>(va, src + va.stride * 2)};
1023 attrib = (T){lanes[0], lanes[1], lanes[2], lanes[2]};
1024 break;
1025 }
1026 default:
1027 // Quads must be successive triangles indexed at offsets 0, 1, 2, 2,
1028 // 1, 3. Quad vertexes fill vertex shader SIMD lanes as 0, 1, 3, 2, so
1029 // that the points form a convex path that can be traversed by the
1030 // rasterizer.
1031 attrib = (T){load_attrib_scalar<scalar_type>(va, src),
1032 load_attrib_scalar<scalar_type>(va, src + va.stride),
1033 load_attrib_scalar<scalar_type>(va, src + va.stride * 3),
1034 load_attrib_scalar<scalar_type>(va, src + va.stride * 2)};
1035 break;
1036 }
1037 }
1038 }
1039
1040 template <typename T>
load_flat_attrib(T & attrib,VertexAttrib & va,uint32_t start,int instance,int count)1041 void load_flat_attrib(T& attrib, VertexAttrib& va, uint32_t start, int instance,
1042 int count) {
1043 typedef decltype(force_scalar(attrib)) scalar_type;
1044 // If no buffer is available, just use a zero default.
1045 if (!va.buf_size) {
1046 attrib = T{0};
1047 return;
1048 }
1049 char* src = nullptr;
1050 if (va.divisor != 0) {
1051 src = (char*)va.buf + va.stride * instance + va.offset;
1052 } else {
1053 if (!count) return;
1054 src = (char*)va.buf + va.stride * start + va.offset;
1055 }
1056 assert(src + va.size <= va.buf + va.buf_size);
1057 attrib = T(load_attrib_scalar<scalar_type>(va, src));
1058 }
1059
setup_program(GLuint program)1060 void setup_program(GLuint program) {
1061 if (!program) {
1062 vertex_shader = nullptr;
1063 fragment_shader = nullptr;
1064 return;
1065 }
1066 Program& p = ctx->programs[program];
1067 assert(p.impl);
1068 assert(p.vert_impl);
1069 assert(p.frag_impl);
1070 vertex_shader = p.vert_impl;
1071 fragment_shader = p.frag_impl;
1072 }
1073
1074 extern ProgramLoader load_shader(const char* name);
1075
1076 extern "C" {
1077
UseProgram(GLuint program)1078 void UseProgram(GLuint program) {
1079 if (ctx->current_program && program != ctx->current_program) {
1080 auto* p = ctx->programs.find(ctx->current_program);
1081 if (p && p->deleted) {
1082 ctx->programs.erase(ctx->current_program);
1083 }
1084 }
1085 ctx->current_program = program;
1086 setup_program(program);
1087 }
1088
SetViewport(GLint x,GLint y,GLsizei width,GLsizei height)1089 void SetViewport(GLint x, GLint y, GLsizei width, GLsizei height) {
1090 ctx->viewport = IntRect{x, y, x + width, y + height};
1091 }
1092
Enable(GLenum cap)1093 void Enable(GLenum cap) {
1094 switch (cap) {
1095 case GL_BLEND:
1096 ctx->blend = true;
1097 break;
1098 case GL_DEPTH_TEST:
1099 ctx->depthtest = true;
1100 break;
1101 case GL_SCISSOR_TEST:
1102 ctx->scissortest = true;
1103 break;
1104 }
1105 }
1106
Disable(GLenum cap)1107 void Disable(GLenum cap) {
1108 switch (cap) {
1109 case GL_BLEND:
1110 ctx->blend = false;
1111 break;
1112 case GL_DEPTH_TEST:
1113 ctx->depthtest = false;
1114 break;
1115 case GL_SCISSOR_TEST:
1116 ctx->scissortest = false;
1117 break;
1118 }
1119 }
1120
1121 // Report the last error generated and clear the error status.
GetError()1122 GLenum GetError() {
1123 GLenum error = ctx->last_error;
1124 ctx->last_error = GL_NO_ERROR;
1125 return error;
1126 }
1127
1128 // Sets the error status to out-of-memory to indicate that a buffer
1129 // or texture re-allocation failed.
out_of_memory()1130 static void out_of_memory() { ctx->last_error = GL_OUT_OF_MEMORY; }
1131
1132 static const char* const extensions[] = {
1133 "GL_ARB_blend_func_extended",
1134 "GL_ARB_clear_texture",
1135 "GL_ARB_copy_image",
1136 "GL_ARB_draw_instanced",
1137 "GL_ARB_explicit_attrib_location",
1138 "GL_ARB_instanced_arrays",
1139 "GL_ARB_invalidate_subdata",
1140 "GL_ARB_texture_storage",
1141 "GL_EXT_timer_query",
1142 "GL_KHR_blend_equation_advanced",
1143 "GL_KHR_blend_equation_advanced_coherent",
1144 "GL_APPLE_rgb_422",
1145 };
1146
GetIntegerv(GLenum pname,GLint * params)1147 void GetIntegerv(GLenum pname, GLint* params) {
1148 assert(params);
1149 switch (pname) {
1150 case GL_MAX_TEXTURE_UNITS:
1151 case GL_MAX_TEXTURE_IMAGE_UNITS:
1152 params[0] = MAX_TEXTURE_UNITS;
1153 break;
1154 case GL_MAX_TEXTURE_SIZE:
1155 params[0] = 1 << 15;
1156 break;
1157 case GL_MAX_ARRAY_TEXTURE_LAYERS:
1158 params[0] = 0;
1159 break;
1160 case GL_READ_FRAMEBUFFER_BINDING:
1161 params[0] = ctx->read_framebuffer_binding;
1162 break;
1163 case GL_DRAW_FRAMEBUFFER_BINDING:
1164 params[0] = ctx->draw_framebuffer_binding;
1165 break;
1166 case GL_PIXEL_PACK_BUFFER_BINDING:
1167 params[0] = ctx->pixel_pack_buffer_binding;
1168 break;
1169 case GL_PIXEL_UNPACK_BUFFER_BINDING:
1170 params[0] = ctx->pixel_unpack_buffer_binding;
1171 break;
1172 case GL_NUM_EXTENSIONS:
1173 params[0] = sizeof(extensions) / sizeof(extensions[0]);
1174 break;
1175 case GL_MAJOR_VERSION:
1176 params[0] = 3;
1177 break;
1178 case GL_MINOR_VERSION:
1179 params[0] = 2;
1180 break;
1181 case GL_MIN_PROGRAM_TEXEL_OFFSET:
1182 params[0] = 0;
1183 break;
1184 case GL_MAX_PROGRAM_TEXEL_OFFSET:
1185 params[0] = MAX_TEXEL_OFFSET;
1186 break;
1187 default:
1188 debugf("unhandled glGetIntegerv parameter %x\n", pname);
1189 assert(false);
1190 }
1191 }
1192
GetBooleanv(GLenum pname,GLboolean * params)1193 void GetBooleanv(GLenum pname, GLboolean* params) {
1194 assert(params);
1195 switch (pname) {
1196 case GL_DEPTH_WRITEMASK:
1197 params[0] = ctx->depthmask;
1198 break;
1199 default:
1200 debugf("unhandled glGetBooleanv parameter %x\n", pname);
1201 assert(false);
1202 }
1203 }
1204
GetString(GLenum name)1205 const char* GetString(GLenum name) {
1206 switch (name) {
1207 case GL_VENDOR:
1208 return "Mozilla Gfx";
1209 case GL_RENDERER:
1210 return "Software WebRender";
1211 case GL_VERSION:
1212 return "3.2";
1213 case GL_SHADING_LANGUAGE_VERSION:
1214 return "1.50";
1215 default:
1216 debugf("unhandled glGetString parameter %x\n", name);
1217 assert(false);
1218 return nullptr;
1219 }
1220 }
1221
GetStringi(GLenum name,GLuint index)1222 const char* GetStringi(GLenum name, GLuint index) {
1223 switch (name) {
1224 case GL_EXTENSIONS:
1225 if (index >= sizeof(extensions) / sizeof(extensions[0])) {
1226 return nullptr;
1227 }
1228 return extensions[index];
1229 default:
1230 debugf("unhandled glGetStringi parameter %x\n", name);
1231 assert(false);
1232 return nullptr;
1233 }
1234 }
1235
remap_blendfunc(GLenum rgb,GLenum a)1236 GLenum remap_blendfunc(GLenum rgb, GLenum a) {
1237 switch (a) {
1238 case GL_SRC_ALPHA:
1239 if (rgb == GL_SRC_COLOR) a = GL_SRC_COLOR;
1240 break;
1241 case GL_ONE_MINUS_SRC_ALPHA:
1242 if (rgb == GL_ONE_MINUS_SRC_COLOR) a = GL_ONE_MINUS_SRC_COLOR;
1243 break;
1244 case GL_DST_ALPHA:
1245 if (rgb == GL_DST_COLOR) a = GL_DST_COLOR;
1246 break;
1247 case GL_ONE_MINUS_DST_ALPHA:
1248 if (rgb == GL_ONE_MINUS_DST_COLOR) a = GL_ONE_MINUS_DST_COLOR;
1249 break;
1250 case GL_CONSTANT_ALPHA:
1251 if (rgb == GL_CONSTANT_COLOR) a = GL_CONSTANT_COLOR;
1252 break;
1253 case GL_ONE_MINUS_CONSTANT_ALPHA:
1254 if (rgb == GL_ONE_MINUS_CONSTANT_COLOR) a = GL_ONE_MINUS_CONSTANT_COLOR;
1255 break;
1256 case GL_SRC_COLOR:
1257 if (rgb == GL_SRC_ALPHA) a = GL_SRC_ALPHA;
1258 break;
1259 case GL_ONE_MINUS_SRC_COLOR:
1260 if (rgb == GL_ONE_MINUS_SRC_ALPHA) a = GL_ONE_MINUS_SRC_ALPHA;
1261 break;
1262 case GL_DST_COLOR:
1263 if (rgb == GL_DST_ALPHA) a = GL_DST_ALPHA;
1264 break;
1265 case GL_ONE_MINUS_DST_COLOR:
1266 if (rgb == GL_ONE_MINUS_DST_ALPHA) a = GL_ONE_MINUS_DST_ALPHA;
1267 break;
1268 case GL_CONSTANT_COLOR:
1269 if (rgb == GL_CONSTANT_ALPHA) a = GL_CONSTANT_ALPHA;
1270 break;
1271 case GL_ONE_MINUS_CONSTANT_COLOR:
1272 if (rgb == GL_ONE_MINUS_CONSTANT_ALPHA) a = GL_ONE_MINUS_CONSTANT_ALPHA;
1273 break;
1274 case GL_SRC1_ALPHA:
1275 if (rgb == GL_SRC1_COLOR) a = GL_SRC1_COLOR;
1276 break;
1277 case GL_ONE_MINUS_SRC1_ALPHA:
1278 if (rgb == GL_ONE_MINUS_SRC1_COLOR) a = GL_ONE_MINUS_SRC1_COLOR;
1279 break;
1280 case GL_SRC1_COLOR:
1281 if (rgb == GL_SRC1_ALPHA) a = GL_SRC1_ALPHA;
1282 break;
1283 case GL_ONE_MINUS_SRC1_COLOR:
1284 if (rgb == GL_ONE_MINUS_SRC1_ALPHA) a = GL_ONE_MINUS_SRC1_ALPHA;
1285 break;
1286 }
1287 return a;
1288 }
1289
1290 // Generate a hashed blend key based on blend func and equation state. This
1291 // allows all the blend state to be processed down to a blend key that can be
1292 // dealt with inside a single switch statement.
hash_blend_key()1293 static void hash_blend_key() {
1294 GLenum srgb = ctx->blendfunc_srgb;
1295 GLenum drgb = ctx->blendfunc_drgb;
1296 GLenum sa = ctx->blendfunc_sa;
1297 GLenum da = ctx->blendfunc_da;
1298 GLenum equation = ctx->blend_equation;
1299 #define HASH_BLEND_KEY(x, y, z, w) ((x << 4) | (y) | (z << 24) | (w << 20))
1300 // Basic non-separate blend funcs used the two argument form
1301 int hash = HASH_BLEND_KEY(srgb, drgb, 0, 0);
1302 // Separate alpha blend funcs use the 4 argument hash
1303 if (srgb != sa || drgb != da) hash |= HASH_BLEND_KEY(0, 0, sa, da);
1304 // Any other blend equation than the default func_add ignores the func and
1305 // instead generates a one-argument hash based on the equation
1306 if (equation != GL_FUNC_ADD) hash = HASH_BLEND_KEY(equation, 0, 0, 0);
1307 switch (hash) {
1308 #define MAP_BLEND_KEY(...) \
1309 case HASH_BLEND_KEY(__VA_ARGS__): \
1310 ctx->blend_key = BLEND_KEY(__VA_ARGS__); \
1311 break;
1312 FOR_EACH_BLEND_KEY(MAP_BLEND_KEY)
1313 default:
1314 debugf("blendfunc: %x, %x, separate: %x, %x, equation: %x\n", srgb, drgb,
1315 sa, da, equation);
1316 assert(false);
1317 break;
1318 }
1319 }
1320
BlendFunc(GLenum srgb,GLenum drgb,GLenum sa,GLenum da)1321 void BlendFunc(GLenum srgb, GLenum drgb, GLenum sa, GLenum da) {
1322 ctx->blendfunc_srgb = srgb;
1323 ctx->blendfunc_drgb = drgb;
1324 sa = remap_blendfunc(srgb, sa);
1325 da = remap_blendfunc(drgb, da);
1326 ctx->blendfunc_sa = sa;
1327 ctx->blendfunc_da = da;
1328
1329 hash_blend_key();
1330 }
1331
BlendColor(GLfloat r,GLfloat g,GLfloat b,GLfloat a)1332 void BlendColor(GLfloat r, GLfloat g, GLfloat b, GLfloat a) {
1333 I32 c = round_pixel((Float){b, g, r, a});
1334 ctx->blendcolor = CONVERT(c, U16).xyzwxyzw;
1335 }
1336
BlendEquation(GLenum mode)1337 void BlendEquation(GLenum mode) {
1338 assert(mode == GL_FUNC_ADD || mode == GL_MIN || mode == GL_MAX ||
1339 (mode >= GL_MULTIPLY_KHR && mode <= GL_HSL_LUMINOSITY_KHR));
1340 if (mode != ctx->blend_equation) {
1341 ctx->blend_equation = mode;
1342 hash_blend_key();
1343 }
1344 }
1345
DepthMask(GLboolean flag)1346 void DepthMask(GLboolean flag) { ctx->depthmask = flag; }
1347
DepthFunc(GLenum func)1348 void DepthFunc(GLenum func) {
1349 switch (func) {
1350 case GL_LESS:
1351 case GL_LEQUAL:
1352 break;
1353 default:
1354 assert(false);
1355 }
1356 ctx->depthfunc = func;
1357 }
1358
SetScissor(GLint x,GLint y,GLsizei width,GLsizei height)1359 void SetScissor(GLint x, GLint y, GLsizei width, GLsizei height) {
1360 ctx->scissor = IntRect{x, y, x + width, y + height};
1361 }
1362
ClearColor(GLfloat r,GLfloat g,GLfloat b,GLfloat a)1363 void ClearColor(GLfloat r, GLfloat g, GLfloat b, GLfloat a) {
1364 ctx->clearcolor[0] = r;
1365 ctx->clearcolor[1] = g;
1366 ctx->clearcolor[2] = b;
1367 ctx->clearcolor[3] = a;
1368 }
1369
ClearDepth(GLdouble depth)1370 void ClearDepth(GLdouble depth) { ctx->cleardepth = depth; }
1371
ActiveTexture(GLenum texture)1372 void ActiveTexture(GLenum texture) {
1373 assert(texture >= GL_TEXTURE0);
1374 assert(texture < GL_TEXTURE0 + MAX_TEXTURE_UNITS);
1375 ctx->active_texture_unit =
1376 clamp(int(texture - GL_TEXTURE0), 0, int(MAX_TEXTURE_UNITS - 1));
1377 }
1378
GenQueries(GLsizei n,GLuint * result)1379 void GenQueries(GLsizei n, GLuint* result) {
1380 for (int i = 0; i < n; i++) {
1381 Query q;
1382 result[i] = ctx->queries.insert(q);
1383 }
1384 }
1385
DeleteQuery(GLuint n)1386 void DeleteQuery(GLuint n) {
1387 if (n && ctx->queries.erase(n)) {
1388 unlink(ctx->time_elapsed_query, n);
1389 unlink(ctx->samples_passed_query, n);
1390 }
1391 }
1392
GenBuffers(int n,GLuint * result)1393 void GenBuffers(int n, GLuint* result) {
1394 for (int i = 0; i < n; i++) {
1395 Buffer b;
1396 result[i] = ctx->buffers.insert(b);
1397 }
1398 }
1399
DeleteBuffer(GLuint n)1400 void DeleteBuffer(GLuint n) {
1401 if (n && ctx->buffers.erase(n)) {
1402 unlink(ctx->pixel_pack_buffer_binding, n);
1403 unlink(ctx->pixel_unpack_buffer_binding, n);
1404 unlink(ctx->array_buffer_binding, n);
1405 }
1406 }
1407
GenVertexArrays(int n,GLuint * result)1408 void GenVertexArrays(int n, GLuint* result) {
1409 for (int i = 0; i < n; i++) {
1410 VertexArray v;
1411 result[i] = ctx->vertex_arrays.insert(v);
1412 }
1413 }
1414
DeleteVertexArray(GLuint n)1415 void DeleteVertexArray(GLuint n) {
1416 if (n && ctx->vertex_arrays.erase(n)) {
1417 unlink(ctx->current_vertex_array, n);
1418 }
1419 }
1420
CreateShader(GLenum type)1421 GLuint CreateShader(GLenum type) {
1422 Shader s;
1423 s.type = type;
1424 return ctx->shaders.insert(s);
1425 }
1426
ShaderSourceByName(GLuint shader,char * name)1427 void ShaderSourceByName(GLuint shader, char* name) {
1428 Shader& s = ctx->shaders[shader];
1429 s.loader = load_shader(name);
1430 if (!s.loader) {
1431 debugf("unknown shader %s\n", name);
1432 }
1433 }
1434
AttachShader(GLuint program,GLuint shader)1435 void AttachShader(GLuint program, GLuint shader) {
1436 Program& p = ctx->programs[program];
1437 Shader& s = ctx->shaders[shader];
1438 if (s.type == GL_VERTEX_SHADER) {
1439 if (!p.impl && s.loader) p.impl = s.loader();
1440 } else if (s.type == GL_FRAGMENT_SHADER) {
1441 if (!p.impl && s.loader) p.impl = s.loader();
1442 } else {
1443 assert(0);
1444 }
1445 }
1446
DeleteShader(GLuint n)1447 void DeleteShader(GLuint n) {
1448 if (n) ctx->shaders.erase(n);
1449 }
1450
CreateProgram()1451 GLuint CreateProgram() {
1452 Program p;
1453 return ctx->programs.insert(p);
1454 }
1455
DeleteProgram(GLuint n)1456 void DeleteProgram(GLuint n) {
1457 if (!n) return;
1458 if (ctx->current_program == n) {
1459 if (auto* p = ctx->programs.find(n)) {
1460 p->deleted = true;
1461 }
1462 } else {
1463 ctx->programs.erase(n);
1464 }
1465 }
1466
LinkProgram(GLuint program)1467 void LinkProgram(GLuint program) {
1468 Program& p = ctx->programs[program];
1469 assert(p.impl);
1470 if (!p.impl) {
1471 return;
1472 }
1473 assert(p.impl->interpolants_size() <= sizeof(Interpolants));
1474 if (!p.vert_impl) p.vert_impl = p.impl->get_vertex_shader();
1475 if (!p.frag_impl) p.frag_impl = p.impl->get_fragment_shader();
1476 }
1477
GetLinkStatus(GLuint program)1478 GLint GetLinkStatus(GLuint program) {
1479 if (auto* p = ctx->programs.find(program)) {
1480 return p->impl ? 1 : 0;
1481 }
1482 return 0;
1483 }
1484
BindAttribLocation(GLuint program,GLuint index,char * name)1485 void BindAttribLocation(GLuint program, GLuint index, char* name) {
1486 Program& p = ctx->programs[program];
1487 assert(p.impl);
1488 if (!p.impl) {
1489 return;
1490 }
1491 p.impl->bind_attrib(name, index);
1492 }
1493
GetAttribLocation(GLuint program,char * name)1494 GLint GetAttribLocation(GLuint program, char* name) {
1495 Program& p = ctx->programs[program];
1496 assert(p.impl);
1497 if (!p.impl) {
1498 return -1;
1499 }
1500 return p.impl->get_attrib(name);
1501 }
1502
GetUniformLocation(GLuint program,char * name)1503 GLint GetUniformLocation(GLuint program, char* name) {
1504 Program& p = ctx->programs[program];
1505 assert(p.impl);
1506 if (!p.impl) {
1507 return -1;
1508 }
1509 GLint loc = p.impl->get_uniform(name);
1510 // debugf("location: %d\n", loc);
1511 return loc;
1512 }
1513
get_time_value()1514 static uint64_t get_time_value() {
1515 #ifdef __MACH__
1516 return mach_absolute_time();
1517 #elif defined(_WIN32)
1518 LARGE_INTEGER time;
1519 static bool have_frequency = false;
1520 static LARGE_INTEGER frequency;
1521 if (!have_frequency) {
1522 QueryPerformanceFrequency(&frequency);
1523 have_frequency = true;
1524 }
1525 QueryPerformanceCounter(&time);
1526 return time.QuadPart * 1000000000ULL / frequency.QuadPart;
1527 #else
1528 return ({
1529 struct timespec tp;
1530 clock_gettime(CLOCK_MONOTONIC, &tp);
1531 tp.tv_sec * 1000000000ULL + tp.tv_nsec;
1532 });
1533 #endif
1534 }
1535
BeginQuery(GLenum target,GLuint id)1536 void BeginQuery(GLenum target, GLuint id) {
1537 ctx->get_binding(target) = id;
1538 Query& q = ctx->queries[id];
1539 switch (target) {
1540 case GL_SAMPLES_PASSED:
1541 q.value = 0;
1542 break;
1543 case GL_TIME_ELAPSED:
1544 q.value = get_time_value();
1545 break;
1546 default:
1547 debugf("unknown query target %x for query %d\n", target, id);
1548 assert(false);
1549 }
1550 }
1551
EndQuery(GLenum target)1552 void EndQuery(GLenum target) {
1553 Query& q = ctx->queries[ctx->get_binding(target)];
1554 switch (target) {
1555 case GL_SAMPLES_PASSED:
1556 break;
1557 case GL_TIME_ELAPSED:
1558 q.value = get_time_value() - q.value;
1559 break;
1560 default:
1561 debugf("unknown query target %x\n", target);
1562 assert(false);
1563 }
1564 ctx->get_binding(target) = 0;
1565 }
1566
GetQueryObjectui64v(GLuint id,GLenum pname,GLuint64 * params)1567 void GetQueryObjectui64v(GLuint id, GLenum pname, GLuint64* params) {
1568 Query& q = ctx->queries[id];
1569 switch (pname) {
1570 case GL_QUERY_RESULT:
1571 assert(params);
1572 params[0] = q.value;
1573 break;
1574 default:
1575 assert(false);
1576 }
1577 }
1578
BindVertexArray(GLuint vertex_array)1579 void BindVertexArray(GLuint vertex_array) {
1580 if (vertex_array != ctx->current_vertex_array) {
1581 ctx->validate_vertex_array = true;
1582 }
1583 ctx->current_vertex_array = vertex_array;
1584 }
1585
BindTexture(GLenum target,GLuint texture)1586 void BindTexture(GLenum target, GLuint texture) {
1587 ctx->get_binding(target) = texture;
1588 }
1589
BindBuffer(GLenum target,GLuint buffer)1590 void BindBuffer(GLenum target, GLuint buffer) {
1591 ctx->get_binding(target) = buffer;
1592 }
1593
BindFramebuffer(GLenum target,GLuint fb)1594 void BindFramebuffer(GLenum target, GLuint fb) {
1595 if (target == GL_FRAMEBUFFER) {
1596 ctx->read_framebuffer_binding = fb;
1597 ctx->draw_framebuffer_binding = fb;
1598 } else {
1599 assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER);
1600 ctx->get_binding(target) = fb;
1601 }
1602 }
1603
BindRenderbuffer(GLenum target,GLuint rb)1604 void BindRenderbuffer(GLenum target, GLuint rb) {
1605 ctx->get_binding(target) = rb;
1606 }
1607
PixelStorei(GLenum name,GLint param)1608 void PixelStorei(GLenum name, GLint param) {
1609 if (name == GL_UNPACK_ALIGNMENT) {
1610 assert(param == 1);
1611 } else if (name == GL_UNPACK_ROW_LENGTH) {
1612 ctx->unpack_row_length = param;
1613 }
1614 }
1615
remap_internal_format(GLenum format)1616 static GLenum remap_internal_format(GLenum format) {
1617 switch (format) {
1618 case GL_DEPTH_COMPONENT:
1619 return GL_DEPTH_COMPONENT24;
1620 case GL_RGBA:
1621 return GL_RGBA8;
1622 case GL_RED:
1623 return GL_R8;
1624 case GL_RG:
1625 return GL_RG8;
1626 case GL_RGB_422_APPLE:
1627 return GL_RGB_RAW_422_APPLE;
1628 default:
1629 return format;
1630 }
1631 }
1632
1633 } // extern "C"
1634
format_requires_conversion(GLenum external_format,GLenum internal_format)1635 static bool format_requires_conversion(GLenum external_format,
1636 GLenum internal_format) {
1637 switch (external_format) {
1638 case GL_RGBA:
1639 return internal_format == GL_RGBA8;
1640 default:
1641 return false;
1642 }
1643 }
1644
copy_bgra8_to_rgba8(uint32_t * dest,const uint32_t * src,int width)1645 static inline void copy_bgra8_to_rgba8(uint32_t* dest, const uint32_t* src,
1646 int width) {
1647 for (; width >= 4; width -= 4, dest += 4, src += 4) {
1648 U32 p = unaligned_load<U32>(src);
1649 U32 rb = p & 0x00FF00FF;
1650 unaligned_store(dest, (p & 0xFF00FF00) | (rb << 16) | (rb >> 16));
1651 }
1652 for (; width > 0; width--, dest++, src++) {
1653 uint32_t p = *src;
1654 uint32_t rb = p & 0x00FF00FF;
1655 *dest = (p & 0xFF00FF00) | (rb << 16) | (rb >> 16);
1656 }
1657 }
1658
convert_copy(GLenum external_format,GLenum internal_format,uint8_t * dst_buf,size_t dst_stride,const uint8_t * src_buf,size_t src_stride,size_t width,size_t height)1659 static void convert_copy(GLenum external_format, GLenum internal_format,
1660 uint8_t* dst_buf, size_t dst_stride,
1661 const uint8_t* src_buf, size_t src_stride,
1662 size_t width, size_t height) {
1663 switch (external_format) {
1664 case GL_RGBA:
1665 if (internal_format == GL_RGBA8) {
1666 for (; height; height--) {
1667 copy_bgra8_to_rgba8((uint32_t*)dst_buf, (const uint32_t*)src_buf,
1668 width);
1669 dst_buf += dst_stride;
1670 src_buf += src_stride;
1671 }
1672 return;
1673 }
1674 break;
1675 default:
1676 break;
1677 }
1678 size_t row_bytes = width * bytes_for_internal_format(internal_format);
1679 for (; height; height--) {
1680 memcpy(dst_buf, src_buf, row_bytes);
1681 dst_buf += dst_stride;
1682 src_buf += src_stride;
1683 }
1684 }
1685
set_tex_storage(Texture & t,GLenum external_format,GLsizei width,GLsizei height,void * buf=nullptr,GLsizei stride=0,GLsizei min_width=0,GLsizei min_height=0)1686 static void set_tex_storage(Texture& t, GLenum external_format, GLsizei width,
1687 GLsizei height, void* buf = nullptr,
1688 GLsizei stride = 0, GLsizei min_width = 0,
1689 GLsizei min_height = 0) {
1690 GLenum internal_format = remap_internal_format(external_format);
1691 bool changed = false;
1692 if (t.width != width || t.height != height ||
1693 t.internal_format != internal_format) {
1694 changed = true;
1695 t.internal_format = internal_format;
1696 t.width = width;
1697 t.height = height;
1698 }
1699 // If we are changed from an internally managed buffer to an externally
1700 // supplied one or vice versa, ensure that we clean up old buffer state.
1701 // However, if we have to convert the data from a non-native format, then
1702 // always treat it as internally managed since we will need to copy to an
1703 // internally managed native format buffer.
1704 bool should_free = buf == nullptr || format_requires_conversion(
1705 external_format, internal_format);
1706 if (t.should_free() != should_free) {
1707 changed = true;
1708 t.cleanup();
1709 t.set_should_free(should_free);
1710 }
1711 // If now an external buffer, explicitly set it...
1712 if (!should_free) {
1713 t.set_buffer(buf, stride);
1714 }
1715 t.disable_delayed_clear();
1716 if (!t.allocate(changed, min_width, min_height)) {
1717 out_of_memory();
1718 }
1719 // If we have a buffer that needs format conversion, then do that now.
1720 if (buf && should_free) {
1721 convert_copy(external_format, internal_format, (uint8_t*)t.buf, t.stride(),
1722 (const uint8_t*)buf, stride, width, height);
1723 }
1724 }
1725
1726 extern "C" {
1727
TexStorage2D(GLenum target,GLint levels,GLenum internal_format,GLsizei width,GLsizei height)1728 void TexStorage2D(GLenum target, GLint levels, GLenum internal_format,
1729 GLsizei width, GLsizei height) {
1730 assert(levels == 1);
1731 Texture& t = ctx->textures[ctx->get_binding(target)];
1732 set_tex_storage(t, internal_format, width, height);
1733 }
1734
internal_format_for_data(GLenum format,GLenum ty)1735 GLenum internal_format_for_data(GLenum format, GLenum ty) {
1736 if (format == GL_RED && ty == GL_UNSIGNED_BYTE) {
1737 return GL_R8;
1738 } else if ((format == GL_RGBA || format == GL_BGRA) &&
1739 (ty == GL_UNSIGNED_BYTE || ty == GL_UNSIGNED_INT_8_8_8_8_REV)) {
1740 return GL_RGBA8;
1741 } else if (format == GL_RGBA && ty == GL_FLOAT) {
1742 return GL_RGBA32F;
1743 } else if (format == GL_RGBA_INTEGER && ty == GL_INT) {
1744 return GL_RGBA32I;
1745 } else if (format == GL_RG && ty == GL_UNSIGNED_BYTE) {
1746 return GL_RG8;
1747 } else if (format == GL_RGB_422_APPLE &&
1748 ty == GL_UNSIGNED_SHORT_8_8_REV_APPLE) {
1749 return GL_RGB_RAW_422_APPLE;
1750 } else if (format == GL_RED && ty == GL_UNSIGNED_SHORT) {
1751 return GL_R16;
1752 } else if (format == GL_RG && ty == GL_UNSIGNED_SHORT) {
1753 return GL_RG16;
1754 } else {
1755 debugf("unknown internal format for format %x, type %x\n", format, ty);
1756 assert(false);
1757 return 0;
1758 }
1759 }
1760
get_pixel_pack_buffer()1761 static Buffer* get_pixel_pack_buffer() {
1762 return ctx->pixel_pack_buffer_binding
1763 ? &ctx->buffers[ctx->pixel_pack_buffer_binding]
1764 : nullptr;
1765 }
1766
get_pixel_pack_buffer_data(void * data)1767 static void* get_pixel_pack_buffer_data(void* data) {
1768 if (Buffer* b = get_pixel_pack_buffer()) {
1769 return b->buf ? b->buf + (size_t)data : nullptr;
1770 }
1771 return data;
1772 }
1773
get_pixel_unpack_buffer()1774 static Buffer* get_pixel_unpack_buffer() {
1775 return ctx->pixel_unpack_buffer_binding
1776 ? &ctx->buffers[ctx->pixel_unpack_buffer_binding]
1777 : nullptr;
1778 }
1779
get_pixel_unpack_buffer_data(void * data)1780 static void* get_pixel_unpack_buffer_data(void* data) {
1781 if (Buffer* b = get_pixel_unpack_buffer()) {
1782 return b->buf ? b->buf + (size_t)data : nullptr;
1783 }
1784 return data;
1785 }
1786
TexSubImage2D(GLenum target,GLint level,GLint xoffset,GLint yoffset,GLsizei width,GLsizei height,GLenum format,GLenum ty,void * data)1787 void TexSubImage2D(GLenum target, GLint level, GLint xoffset, GLint yoffset,
1788 GLsizei width, GLsizei height, GLenum format, GLenum ty,
1789 void* data) {
1790 if (level != 0) {
1791 assert(false);
1792 return;
1793 }
1794 data = get_pixel_unpack_buffer_data(data);
1795 if (!data) return;
1796 Texture& t = ctx->textures[ctx->get_binding(target)];
1797 IntRect skip = {xoffset, yoffset, xoffset + width, yoffset + height};
1798 prepare_texture(t, &skip);
1799 assert(xoffset + width <= t.width);
1800 assert(yoffset + height <= t.height);
1801 assert(ctx->unpack_row_length == 0 || ctx->unpack_row_length >= width);
1802 GLsizei row_length =
1803 ctx->unpack_row_length != 0 ? ctx->unpack_row_length : width;
1804 assert(t.internal_format == internal_format_for_data(format, ty));
1805 int src_bpp = format_requires_conversion(format, t.internal_format)
1806 ? bytes_for_internal_format(format)
1807 : t.bpp();
1808 if (!src_bpp || !t.buf) return;
1809 convert_copy(format, t.internal_format,
1810 (uint8_t*)t.sample_ptr(xoffset, yoffset), t.stride(),
1811 (const uint8_t*)data, row_length * src_bpp, width, height);
1812 }
1813
TexImage2D(GLenum target,GLint level,GLint internal_format,GLsizei width,GLsizei height,GLint border,GLenum format,GLenum ty,void * data)1814 void TexImage2D(GLenum target, GLint level, GLint internal_format,
1815 GLsizei width, GLsizei height, GLint border, GLenum format,
1816 GLenum ty, void* data) {
1817 if (level != 0) {
1818 assert(false);
1819 return;
1820 }
1821 assert(border == 0);
1822 TexStorage2D(target, 1, internal_format, width, height);
1823 TexSubImage2D(target, 0, 0, 0, width, height, format, ty, data);
1824 }
1825
GenerateMipmap(UNUSED GLenum target)1826 void GenerateMipmap(UNUSED GLenum target) {
1827 // TODO: support mipmaps
1828 }
1829
SetTextureParameter(GLuint texid,GLenum pname,GLint param)1830 void SetTextureParameter(GLuint texid, GLenum pname, GLint param) {
1831 Texture& t = ctx->textures[texid];
1832 switch (pname) {
1833 case GL_TEXTURE_WRAP_S:
1834 assert(param == GL_CLAMP_TO_EDGE);
1835 break;
1836 case GL_TEXTURE_WRAP_T:
1837 assert(param == GL_CLAMP_TO_EDGE);
1838 break;
1839 case GL_TEXTURE_MIN_FILTER:
1840 t.min_filter = param;
1841 break;
1842 case GL_TEXTURE_MAG_FILTER:
1843 t.mag_filter = param;
1844 break;
1845 default:
1846 break;
1847 }
1848 }
1849
TexParameteri(GLenum target,GLenum pname,GLint param)1850 void TexParameteri(GLenum target, GLenum pname, GLint param) {
1851 SetTextureParameter(ctx->get_binding(target), pname, param);
1852 }
1853
GenTextures(int n,GLuint * result)1854 void GenTextures(int n, GLuint* result) {
1855 for (int i = 0; i < n; i++) {
1856 Texture t;
1857 result[i] = ctx->textures.insert(t);
1858 }
1859 }
1860
DeleteTexture(GLuint n)1861 void DeleteTexture(GLuint n) {
1862 if (n && ctx->textures.erase(n)) {
1863 for (size_t i = 0; i < MAX_TEXTURE_UNITS; i++) {
1864 ctx->texture_units[i].unlink(n);
1865 }
1866 }
1867 }
1868
GenRenderbuffers(int n,GLuint * result)1869 void GenRenderbuffers(int n, GLuint* result) {
1870 for (int i = 0; i < n; i++) {
1871 Renderbuffer r;
1872 result[i] = ctx->renderbuffers.insert(r);
1873 }
1874 }
1875
on_erase()1876 void Renderbuffer::on_erase() {
1877 for (auto* fb : ctx->framebuffers) {
1878 if (fb) {
1879 unlink(fb->color_attachment, texture);
1880 unlink(fb->depth_attachment, texture);
1881 }
1882 }
1883 DeleteTexture(texture);
1884 }
1885
DeleteRenderbuffer(GLuint n)1886 void DeleteRenderbuffer(GLuint n) {
1887 if (n && ctx->renderbuffers.erase(n)) {
1888 unlink(ctx->renderbuffer_binding, n);
1889 }
1890 }
1891
GenFramebuffers(int n,GLuint * result)1892 void GenFramebuffers(int n, GLuint* result) {
1893 for (int i = 0; i < n; i++) {
1894 Framebuffer f;
1895 result[i] = ctx->framebuffers.insert(f);
1896 }
1897 }
1898
DeleteFramebuffer(GLuint n)1899 void DeleteFramebuffer(GLuint n) {
1900 if (n && ctx->framebuffers.erase(n)) {
1901 unlink(ctx->read_framebuffer_binding, n);
1902 unlink(ctx->draw_framebuffer_binding, n);
1903 }
1904 }
1905
RenderbufferStorage(GLenum target,GLenum internal_format,GLsizei width,GLsizei height)1906 void RenderbufferStorage(GLenum target, GLenum internal_format, GLsizei width,
1907 GLsizei height) {
1908 // Just refer a renderbuffer to a texture to simplify things for now...
1909 Renderbuffer& r = ctx->renderbuffers[ctx->get_binding(target)];
1910 if (!r.texture) {
1911 GenTextures(1, &r.texture);
1912 }
1913 switch (internal_format) {
1914 case GL_DEPTH_COMPONENT:
1915 case GL_DEPTH_COMPONENT16:
1916 case GL_DEPTH_COMPONENT24:
1917 case GL_DEPTH_COMPONENT32:
1918 // Force depth format to 24 bits...
1919 internal_format = GL_DEPTH_COMPONENT24;
1920 break;
1921 }
1922 set_tex_storage(ctx->textures[r.texture], internal_format, width, height);
1923 }
1924
VertexAttribPointer(GLuint index,GLint size,GLenum type,bool normalized,GLsizei stride,GLuint offset)1925 void VertexAttribPointer(GLuint index, GLint size, GLenum type, bool normalized,
1926 GLsizei stride, GLuint offset) {
1927 // debugf("cva: %d\n", ctx->current_vertex_array);
1928 VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1929 if (index >= NULL_ATTRIB) {
1930 assert(0);
1931 return;
1932 }
1933 VertexAttrib& va = v.attribs[index];
1934 va.size = size * bytes_per_type(type);
1935 va.type = type;
1936 va.normalized = normalized;
1937 va.stride = stride;
1938 va.offset = offset;
1939 // Buffer &vertex_buf = ctx->buffers[ctx->array_buffer_binding];
1940 va.vertex_buffer = ctx->array_buffer_binding;
1941 va.vertex_array = ctx->current_vertex_array;
1942 ctx->validate_vertex_array = true;
1943 }
1944
VertexAttribIPointer(GLuint index,GLint size,GLenum type,GLsizei stride,GLuint offset)1945 void VertexAttribIPointer(GLuint index, GLint size, GLenum type, GLsizei stride,
1946 GLuint offset) {
1947 // debugf("cva: %d\n", ctx->current_vertex_array);
1948 VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1949 if (index >= NULL_ATTRIB) {
1950 assert(0);
1951 return;
1952 }
1953 VertexAttrib& va = v.attribs[index];
1954 va.size = size * bytes_per_type(type);
1955 va.type = type;
1956 va.normalized = false;
1957 va.stride = stride;
1958 va.offset = offset;
1959 // Buffer &vertex_buf = ctx->buffers[ctx->array_buffer_binding];
1960 va.vertex_buffer = ctx->array_buffer_binding;
1961 va.vertex_array = ctx->current_vertex_array;
1962 ctx->validate_vertex_array = true;
1963 }
1964
EnableVertexAttribArray(GLuint index)1965 void EnableVertexAttribArray(GLuint index) {
1966 VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1967 if (index >= NULL_ATTRIB) {
1968 assert(0);
1969 return;
1970 }
1971 VertexAttrib& va = v.attribs[index];
1972 if (!va.enabled) {
1973 ctx->validate_vertex_array = true;
1974 }
1975 va.enabled = true;
1976 v.max_attrib = max(v.max_attrib, (int)index);
1977 }
1978
DisableVertexAttribArray(GLuint index)1979 void DisableVertexAttribArray(GLuint index) {
1980 VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1981 if (index >= NULL_ATTRIB) {
1982 assert(0);
1983 return;
1984 }
1985 VertexAttrib& va = v.attribs[index];
1986 if (va.enabled) {
1987 ctx->validate_vertex_array = true;
1988 }
1989 va.disable();
1990 }
1991
VertexAttribDivisor(GLuint index,GLuint divisor)1992 void VertexAttribDivisor(GLuint index, GLuint divisor) {
1993 VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1994 // Only support divisor being 0 (per-vertex) or 1 (per-instance).
1995 if (index >= NULL_ATTRIB || divisor > 1) {
1996 assert(0);
1997 return;
1998 }
1999 VertexAttrib& va = v.attribs[index];
2000 va.divisor = divisor;
2001 }
2002
BufferData(GLenum target,GLsizeiptr size,void * data,UNUSED GLenum usage)2003 void BufferData(GLenum target, GLsizeiptr size, void* data,
2004 UNUSED GLenum usage) {
2005 Buffer& b = ctx->buffers[ctx->get_binding(target)];
2006 if (size != b.size) {
2007 if (!b.allocate(size)) {
2008 out_of_memory();
2009 }
2010 ctx->validate_vertex_array = true;
2011 }
2012 if (data && b.buf && size <= b.size) {
2013 memcpy(b.buf, data, size);
2014 }
2015 }
2016
BufferSubData(GLenum target,GLintptr offset,GLsizeiptr size,void * data)2017 void BufferSubData(GLenum target, GLintptr offset, GLsizeiptr size,
2018 void* data) {
2019 Buffer& b = ctx->buffers[ctx->get_binding(target)];
2020 assert(offset + size <= b.size);
2021 if (data && b.buf && offset + size <= b.size) {
2022 memcpy(&b.buf[offset], data, size);
2023 }
2024 }
2025
MapBuffer(GLenum target,UNUSED GLbitfield access)2026 void* MapBuffer(GLenum target, UNUSED GLbitfield access) {
2027 Buffer& b = ctx->buffers[ctx->get_binding(target)];
2028 return b.buf;
2029 }
2030
MapBufferRange(GLenum target,GLintptr offset,GLsizeiptr length,UNUSED GLbitfield access)2031 void* MapBufferRange(GLenum target, GLintptr offset, GLsizeiptr length,
2032 UNUSED GLbitfield access) {
2033 Buffer& b = ctx->buffers[ctx->get_binding(target)];
2034 if (b.buf && offset >= 0 && length > 0 && offset + length <= b.size) {
2035 return b.buf + offset;
2036 }
2037 return nullptr;
2038 }
2039
UnmapBuffer(GLenum target)2040 GLboolean UnmapBuffer(GLenum target) {
2041 Buffer& b = ctx->buffers[ctx->get_binding(target)];
2042 return b.buf != nullptr;
2043 }
2044
Uniform1i(GLint location,GLint V0)2045 void Uniform1i(GLint location, GLint V0) {
2046 // debugf("tex: %d\n", (int)ctx->textures.size);
2047 if (vertex_shader) {
2048 vertex_shader->set_uniform_1i(location, V0);
2049 }
2050 }
Uniform4fv(GLint location,GLsizei count,const GLfloat * v)2051 void Uniform4fv(GLint location, GLsizei count, const GLfloat* v) {
2052 assert(count == 1);
2053 if (vertex_shader) {
2054 vertex_shader->set_uniform_4fv(location, v);
2055 }
2056 }
UniformMatrix4fv(GLint location,GLsizei count,GLboolean transpose,const GLfloat * value)2057 void UniformMatrix4fv(GLint location, GLsizei count, GLboolean transpose,
2058 const GLfloat* value) {
2059 assert(count == 1);
2060 assert(!transpose);
2061 if (vertex_shader) {
2062 vertex_shader->set_uniform_matrix4fv(location, value);
2063 }
2064 }
2065
FramebufferTexture2D(GLenum target,GLenum attachment,GLenum textarget,GLuint texture,GLint level)2066 void FramebufferTexture2D(GLenum target, GLenum attachment, GLenum textarget,
2067 GLuint texture, GLint level) {
2068 assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER);
2069 assert(textarget == GL_TEXTURE_2D || textarget == GL_TEXTURE_RECTANGLE);
2070 assert(level == 0);
2071 Framebuffer& fb = ctx->framebuffers[ctx->get_binding(target)];
2072 if (attachment == GL_COLOR_ATTACHMENT0) {
2073 fb.color_attachment = texture;
2074 } else if (attachment == GL_DEPTH_ATTACHMENT) {
2075 fb.depth_attachment = texture;
2076 } else {
2077 assert(0);
2078 }
2079 }
2080
FramebufferRenderbuffer(GLenum target,GLenum attachment,GLenum renderbuffertarget,GLuint renderbuffer)2081 void FramebufferRenderbuffer(GLenum target, GLenum attachment,
2082 GLenum renderbuffertarget, GLuint renderbuffer) {
2083 assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER);
2084 assert(renderbuffertarget == GL_RENDERBUFFER);
2085 Framebuffer& fb = ctx->framebuffers[ctx->get_binding(target)];
2086 Renderbuffer& rb = ctx->renderbuffers[renderbuffer];
2087 if (attachment == GL_COLOR_ATTACHMENT0) {
2088 fb.color_attachment = rb.texture;
2089 } else if (attachment == GL_DEPTH_ATTACHMENT) {
2090 fb.depth_attachment = rb.texture;
2091 } else {
2092 assert(0);
2093 }
2094 }
2095
2096 } // extern "C"
2097
get_framebuffer(GLenum target,bool fallback=false)2098 static inline Framebuffer* get_framebuffer(GLenum target,
2099 bool fallback = false) {
2100 if (target == GL_FRAMEBUFFER) {
2101 target = GL_DRAW_FRAMEBUFFER;
2102 }
2103 Framebuffer* fb = ctx->framebuffers.find(ctx->get_binding(target));
2104 if (fallback && !fb) {
2105 // If the specified framebuffer isn't found and a fallback is requested,
2106 // use the default framebuffer.
2107 fb = &ctx->framebuffers[0];
2108 }
2109 return fb;
2110 }
2111
2112 template <typename T>
fill_n(T * dst,size_t n,T val)2113 static inline void fill_n(T* dst, size_t n, T val) {
2114 for (T* end = &dst[n]; dst < end; dst++) *dst = val;
2115 }
2116
2117 #if USE_SSE2
2118 template <>
fill_n(uint32_t * dst,size_t n,uint32_t val)2119 inline void fill_n<uint32_t>(uint32_t* dst, size_t n, uint32_t val) {
2120 __asm__ __volatile__("rep stosl\n"
2121 : "+D"(dst), "+c"(n)
2122 : "a"(val)
2123 : "memory", "cc");
2124 }
2125 #endif
2126
clear_chunk(uint8_t value)2127 static inline uint32_t clear_chunk(uint8_t value) {
2128 return uint32_t(value) * 0x01010101U;
2129 }
2130
clear_chunk(uint16_t value)2131 static inline uint32_t clear_chunk(uint16_t value) {
2132 return uint32_t(value) | (uint32_t(value) << 16);
2133 }
2134
clear_chunk(uint32_t value)2135 static inline uint32_t clear_chunk(uint32_t value) { return value; }
2136
2137 template <typename T>
clear_row(T * buf,size_t len,T value,uint32_t chunk)2138 static inline void clear_row(T* buf, size_t len, T value, uint32_t chunk) {
2139 const size_t N = sizeof(uint32_t) / sizeof(T);
2140 // fill any leading unaligned values
2141 if (N > 1) {
2142 size_t align = (-(intptr_t)buf & (sizeof(uint32_t) - 1)) / sizeof(T);
2143 if (align <= len) {
2144 fill_n(buf, align, value);
2145 len -= align;
2146 buf += align;
2147 }
2148 }
2149 // fill as many aligned chunks as possible
2150 fill_n((uint32_t*)buf, len / N, chunk);
2151 // fill any remaining values
2152 if (N > 1) {
2153 fill_n(buf + (len & ~(N - 1)), len & (N - 1), value);
2154 }
2155 }
2156
2157 template <typename T>
clear_buffer(Texture & t,T value,IntRect bb,int skip_start=0,int skip_end=0)2158 static void clear_buffer(Texture& t, T value, IntRect bb, int skip_start = 0,
2159 int skip_end = 0) {
2160 if (!t.buf) return;
2161 skip_start = max(skip_start, bb.x0);
2162 skip_end = max(skip_end, skip_start);
2163 assert(sizeof(T) == t.bpp());
2164 size_t stride = t.stride();
2165 // When clearing multiple full-width rows, collapse them into a single large
2166 // "row" to avoid redundant setup from clearing each row individually. Note
2167 // that we can only safely do this if the stride is tightly packed.
2168 if (bb.width() == t.width && bb.height() > 1 && skip_start >= skip_end &&
2169 (t.should_free() || stride == t.width * sizeof(T))) {
2170 bb.x1 += (stride / sizeof(T)) * (bb.height() - 1);
2171 bb.y1 = bb.y0 + 1;
2172 }
2173 T* buf = (T*)t.sample_ptr(bb.x0, bb.y0);
2174 uint32_t chunk = clear_chunk(value);
2175 for (int rows = bb.height(); rows > 0; rows--) {
2176 if (bb.x0 < skip_start) {
2177 clear_row(buf, skip_start - bb.x0, value, chunk);
2178 }
2179 if (skip_end < bb.x1) {
2180 clear_row(buf + (skip_end - bb.x0), bb.x1 - skip_end, value, chunk);
2181 }
2182 buf += stride / sizeof(T);
2183 }
2184 }
2185
2186 template <typename T>
force_clear_row(Texture & t,int y,int skip_start=0,int skip_end=0)2187 static inline void force_clear_row(Texture& t, int y, int skip_start = 0,
2188 int skip_end = 0) {
2189 assert(t.buf != nullptr);
2190 assert(sizeof(T) == t.bpp());
2191 assert(skip_start <= skip_end);
2192 T* buf = (T*)t.sample_ptr(0, y);
2193 uint32_t chunk = clear_chunk((T)t.clear_val);
2194 if (skip_start > 0) {
2195 clear_row<T>(buf, skip_start, t.clear_val, chunk);
2196 }
2197 if (skip_end < t.width) {
2198 clear_row<T>(buf + skip_end, t.width - skip_end, t.clear_val, chunk);
2199 }
2200 }
2201
2202 template <typename T>
force_clear(Texture & t,const IntRect * skip=nullptr)2203 static void force_clear(Texture& t, const IntRect* skip = nullptr) {
2204 if (!t.delay_clear || !t.cleared_rows) {
2205 return;
2206 }
2207 int y0 = 0;
2208 int y1 = t.height;
2209 int skip_start = 0;
2210 int skip_end = 0;
2211 if (skip) {
2212 y0 = clamp(skip->y0, 0, t.height);
2213 y1 = clamp(skip->y1, y0, t.height);
2214 skip_start = clamp(skip->x0, 0, t.width);
2215 skip_end = clamp(skip->x1, skip_start, t.width);
2216 if (skip_start <= 0 && skip_end >= t.width && y0 <= 0 && y1 >= t.height) {
2217 t.disable_delayed_clear();
2218 return;
2219 }
2220 }
2221 int num_masks = (y1 + 31) / 32;
2222 uint32_t* rows = t.cleared_rows;
2223 for (int i = y0 / 32; i < num_masks; i++) {
2224 uint32_t mask = rows[i];
2225 if (mask != ~0U) {
2226 rows[i] = ~0U;
2227 int start = i * 32;
2228 while (mask) {
2229 int count = __builtin_ctz(mask);
2230 if (count > 0) {
2231 clear_buffer<T>(t, t.clear_val,
2232 IntRect{0, start, t.width, start + count}, skip_start,
2233 skip_end);
2234 t.delay_clear -= count;
2235 start += count;
2236 mask >>= count;
2237 }
2238 count = __builtin_ctz(mask + 1);
2239 start += count;
2240 mask >>= count;
2241 }
2242 int count = (i + 1) * 32 - start;
2243 if (count > 0) {
2244 clear_buffer<T>(t, t.clear_val,
2245 IntRect{0, start, t.width, start + count}, skip_start,
2246 skip_end);
2247 t.delay_clear -= count;
2248 }
2249 }
2250 }
2251 if (t.delay_clear <= 0) t.disable_delayed_clear();
2252 }
2253
prepare_texture(Texture & t,const IntRect * skip)2254 static void prepare_texture(Texture& t, const IntRect* skip) {
2255 if (t.delay_clear) {
2256 switch (t.internal_format) {
2257 case GL_RGBA8:
2258 force_clear<uint32_t>(t, skip);
2259 break;
2260 case GL_R8:
2261 force_clear<uint8_t>(t, skip);
2262 break;
2263 case GL_RG8:
2264 force_clear<uint16_t>(t, skip);
2265 break;
2266 default:
2267 assert(false);
2268 break;
2269 }
2270 }
2271 }
2272
2273 // Setup a clear on a texture. This may either force an immediate clear or
2274 // potentially punt to a delayed clear, if applicable.
2275 template <typename T>
request_clear(Texture & t,T value,const IntRect & scissor)2276 static void request_clear(Texture& t, T value, const IntRect& scissor) {
2277 // If the clear would require a scissor, force clear anything outside
2278 // the scissor, and then immediately clear anything inside the scissor.
2279 if (!scissor.contains(t.offset_bounds())) {
2280 IntRect skip = scissor - t.offset;
2281 force_clear<T>(t, &skip);
2282 clear_buffer<T>(t, value, skip.intersection(t.bounds()));
2283 } else {
2284 // Do delayed clear for 2D texture without scissor.
2285 t.enable_delayed_clear(value);
2286 }
2287 }
2288
2289 template <typename T>
request_clear(Texture & t,T value)2290 static inline void request_clear(Texture& t, T value) {
2291 // If scissoring is enabled, use the scissor rect. Otherwise, just scissor to
2292 // the entire texture bounds.
2293 request_clear(t, value, ctx->scissortest ? ctx->scissor : t.offset_bounds());
2294 }
2295
2296 extern "C" {
2297
InitDefaultFramebuffer(int x,int y,int width,int height,int stride,void * buf)2298 void InitDefaultFramebuffer(int x, int y, int width, int height, int stride,
2299 void* buf) {
2300 Framebuffer& fb = ctx->framebuffers[0];
2301 if (!fb.color_attachment) {
2302 GenTextures(1, &fb.color_attachment);
2303 }
2304 // If the dimensions or buffer properties changed, we need to reallocate
2305 // the underlying storage for the color buffer texture.
2306 Texture& colortex = ctx->textures[fb.color_attachment];
2307 set_tex_storage(colortex, GL_RGBA8, width, height, buf, stride);
2308 colortex.offset = IntPoint(x, y);
2309 if (!fb.depth_attachment) {
2310 GenTextures(1, &fb.depth_attachment);
2311 }
2312 // Ensure dimensions of the depth buffer match the color buffer.
2313 Texture& depthtex = ctx->textures[fb.depth_attachment];
2314 set_tex_storage(depthtex, GL_DEPTH_COMPONENT24, width, height);
2315 depthtex.offset = IntPoint(x, y);
2316 }
2317
GetColorBuffer(GLuint fbo,GLboolean flush,int32_t * width,int32_t * height,int32_t * stride)2318 void* GetColorBuffer(GLuint fbo, GLboolean flush, int32_t* width,
2319 int32_t* height, int32_t* stride) {
2320 Framebuffer* fb = ctx->framebuffers.find(fbo);
2321 if (!fb || !fb->color_attachment) {
2322 return nullptr;
2323 }
2324 Texture& colortex = ctx->textures[fb->color_attachment];
2325 if (flush) {
2326 prepare_texture(colortex);
2327 }
2328 assert(colortex.offset == IntPoint(0, 0));
2329 if (width) {
2330 *width = colortex.width;
2331 }
2332 if (height) {
2333 *height = colortex.height;
2334 }
2335 if (stride) {
2336 *stride = colortex.stride();
2337 }
2338 return colortex.buf ? colortex.sample_ptr(0, 0) : nullptr;
2339 }
2340
ResolveFramebuffer(GLuint fbo)2341 void ResolveFramebuffer(GLuint fbo) {
2342 Framebuffer* fb = ctx->framebuffers.find(fbo);
2343 if (!fb || !fb->color_attachment) {
2344 return;
2345 }
2346 Texture& colortex = ctx->textures[fb->color_attachment];
2347 prepare_texture(colortex);
2348 }
2349
SetTextureBuffer(GLuint texid,GLenum internal_format,GLsizei width,GLsizei height,GLsizei stride,void * buf,GLsizei min_width,GLsizei min_height)2350 void SetTextureBuffer(GLuint texid, GLenum internal_format, GLsizei width,
2351 GLsizei height, GLsizei stride, void* buf,
2352 GLsizei min_width, GLsizei min_height) {
2353 Texture& t = ctx->textures[texid];
2354 set_tex_storage(t, internal_format, width, height, buf, stride, min_width,
2355 min_height);
2356 }
2357
CheckFramebufferStatus(GLenum target)2358 GLenum CheckFramebufferStatus(GLenum target) {
2359 Framebuffer* fb = get_framebuffer(target);
2360 if (!fb || !fb->color_attachment) {
2361 return GL_FRAMEBUFFER_UNSUPPORTED;
2362 }
2363 return GL_FRAMEBUFFER_COMPLETE;
2364 }
2365
ClearTexSubImage(GLuint texture,GLint level,GLint xoffset,GLint yoffset,GLint zoffset,GLsizei width,GLsizei height,GLsizei depth,GLenum format,GLenum type,const void * data)2366 void ClearTexSubImage(GLuint texture, GLint level, GLint xoffset, GLint yoffset,
2367 GLint zoffset, GLsizei width, GLsizei height,
2368 GLsizei depth, GLenum format, GLenum type,
2369 const void* data) {
2370 if (level != 0) {
2371 assert(false);
2372 return;
2373 }
2374 Texture& t = ctx->textures[texture];
2375 assert(!t.locked);
2376 if (width <= 0 || height <= 0 || depth <= 0) {
2377 return;
2378 }
2379 assert(zoffset == 0 && depth == 1);
2380 IntRect scissor = {xoffset, yoffset, xoffset + width, yoffset + height};
2381 if (t.internal_format == GL_DEPTH_COMPONENT24) {
2382 uint32_t value = 0xFFFFFF;
2383 switch (format) {
2384 case GL_DEPTH_COMPONENT:
2385 switch (type) {
2386 case GL_DOUBLE:
2387 value = uint32_t(*(const GLdouble*)data * 0xFFFFFF);
2388 break;
2389 case GL_FLOAT:
2390 value = uint32_t(*(const GLfloat*)data * 0xFFFFFF);
2391 break;
2392 default:
2393 assert(false);
2394 break;
2395 }
2396 break;
2397 default:
2398 assert(false);
2399 break;
2400 }
2401 if (t.cleared() && !scissor.contains(t.offset_bounds())) {
2402 // If we need to scissor the clear and the depth buffer was already
2403 // initialized, then just fill runs for that scissor area.
2404 t.fill_depth_runs(value, scissor);
2405 } else {
2406 // Otherwise, the buffer is either uninitialized or the clear would
2407 // encompass the entire buffer. If uninitialized, we can safely fill
2408 // the entire buffer with any value and thus ignore any scissoring.
2409 t.init_depth_runs(value);
2410 }
2411 return;
2412 }
2413
2414 uint32_t color = 0xFF000000;
2415 switch (type) {
2416 case GL_FLOAT: {
2417 const GLfloat* f = (const GLfloat*)data;
2418 Float v = {0.0f, 0.0f, 0.0f, 1.0f};
2419 switch (format) {
2420 case GL_RGBA:
2421 v.w = f[3]; // alpha
2422 FALLTHROUGH;
2423 case GL_RGB:
2424 v.z = f[2]; // blue
2425 FALLTHROUGH;
2426 case GL_RG:
2427 v.y = f[1]; // green
2428 FALLTHROUGH;
2429 case GL_RED:
2430 v.x = f[0]; // red
2431 break;
2432 default:
2433 assert(false);
2434 break;
2435 }
2436 color = bit_cast<uint32_t>(CONVERT(round_pixel(v), U8));
2437 break;
2438 }
2439 case GL_UNSIGNED_BYTE: {
2440 const GLubyte* b = (const GLubyte*)data;
2441 switch (format) {
2442 case GL_RGBA:
2443 color = (color & ~0xFF000000) | (uint32_t(b[3]) << 24); // alpha
2444 FALLTHROUGH;
2445 case GL_RGB:
2446 color = (color & ~0x00FF0000) | (uint32_t(b[2]) << 16); // blue
2447 FALLTHROUGH;
2448 case GL_RG:
2449 color = (color & ~0x0000FF00) | (uint32_t(b[1]) << 8); // green
2450 FALLTHROUGH;
2451 case GL_RED:
2452 color = (color & ~0x000000FF) | uint32_t(b[0]); // red
2453 break;
2454 default:
2455 assert(false);
2456 break;
2457 }
2458 break;
2459 }
2460 default:
2461 assert(false);
2462 break;
2463 }
2464
2465 switch (t.internal_format) {
2466 case GL_RGBA8:
2467 // Clear color needs to swizzle to BGRA.
2468 request_clear<uint32_t>(t,
2469 (color & 0xFF00FF00) |
2470 ((color << 16) & 0xFF0000) |
2471 ((color >> 16) & 0xFF),
2472 scissor);
2473 break;
2474 case GL_R8:
2475 request_clear<uint8_t>(t, uint8_t(color & 0xFF), scissor);
2476 break;
2477 case GL_RG8:
2478 request_clear<uint16_t>(t, uint16_t(color & 0xFFFF), scissor);
2479 break;
2480 default:
2481 assert(false);
2482 break;
2483 }
2484 }
2485
ClearTexImage(GLuint texture,GLint level,GLenum format,GLenum type,const void * data)2486 void ClearTexImage(GLuint texture, GLint level, GLenum format, GLenum type,
2487 const void* data) {
2488 Texture& t = ctx->textures[texture];
2489 IntRect scissor = t.offset_bounds();
2490 ClearTexSubImage(texture, level, scissor.x0, scissor.y0, 0, scissor.width(),
2491 scissor.height(), 1, format, type, data);
2492 }
2493
Clear(GLbitfield mask)2494 void Clear(GLbitfield mask) {
2495 Framebuffer& fb = *get_framebuffer(GL_DRAW_FRAMEBUFFER, true);
2496 if ((mask & GL_COLOR_BUFFER_BIT) && fb.color_attachment) {
2497 Texture& t = ctx->textures[fb.color_attachment];
2498 IntRect scissor = ctx->scissortest
2499 ? ctx->scissor.intersection(t.offset_bounds())
2500 : t.offset_bounds();
2501 ClearTexSubImage(fb.color_attachment, 0, scissor.x0, scissor.y0, 0,
2502 scissor.width(), scissor.height(), 1, GL_RGBA, GL_FLOAT,
2503 ctx->clearcolor);
2504 }
2505 if ((mask & GL_DEPTH_BUFFER_BIT) && fb.depth_attachment) {
2506 Texture& t = ctx->textures[fb.depth_attachment];
2507 IntRect scissor = ctx->scissortest
2508 ? ctx->scissor.intersection(t.offset_bounds())
2509 : t.offset_bounds();
2510 ClearTexSubImage(fb.depth_attachment, 0, scissor.x0, scissor.y0, 0,
2511 scissor.width(), scissor.height(), 1, GL_DEPTH_COMPONENT,
2512 GL_DOUBLE, &ctx->cleardepth);
2513 }
2514 }
2515
ClearColorRect(GLuint fbo,GLint xoffset,GLint yoffset,GLsizei width,GLsizei height,GLfloat r,GLfloat g,GLfloat b,GLfloat a)2516 void ClearColorRect(GLuint fbo, GLint xoffset, GLint yoffset, GLsizei width,
2517 GLsizei height, GLfloat r, GLfloat g, GLfloat b,
2518 GLfloat a) {
2519 GLfloat color[] = {r, g, b, a};
2520 Framebuffer& fb = ctx->framebuffers[fbo];
2521 Texture& t = ctx->textures[fb.color_attachment];
2522 IntRect scissor =
2523 IntRect{xoffset, yoffset, xoffset + width, yoffset + height}.intersection(
2524 t.offset_bounds());
2525 ClearTexSubImage(fb.color_attachment, 0, scissor.x0, scissor.y0, 0,
2526 scissor.width(), scissor.height(), 1, GL_RGBA, GL_FLOAT,
2527 color);
2528 }
2529
InvalidateFramebuffer(GLenum target,GLsizei num_attachments,const GLenum * attachments)2530 void InvalidateFramebuffer(GLenum target, GLsizei num_attachments,
2531 const GLenum* attachments) {
2532 Framebuffer* fb = get_framebuffer(target);
2533 if (!fb || num_attachments <= 0 || !attachments) {
2534 return;
2535 }
2536 for (GLsizei i = 0; i < num_attachments; i++) {
2537 switch (attachments[i]) {
2538 case GL_DEPTH_ATTACHMENT: {
2539 Texture& t = ctx->textures[fb->depth_attachment];
2540 t.set_cleared(false);
2541 break;
2542 }
2543 case GL_COLOR_ATTACHMENT0: {
2544 Texture& t = ctx->textures[fb->color_attachment];
2545 t.disable_delayed_clear();
2546 break;
2547 }
2548 }
2549 }
2550 }
2551
ReadPixels(GLint x,GLint y,GLsizei width,GLsizei height,GLenum format,GLenum type,void * data)2552 void ReadPixels(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format,
2553 GLenum type, void* data) {
2554 data = get_pixel_pack_buffer_data(data);
2555 if (!data) return;
2556 Framebuffer* fb = get_framebuffer(GL_READ_FRAMEBUFFER);
2557 if (!fb) return;
2558 assert(format == GL_RED || format == GL_RGBA || format == GL_RGBA_INTEGER ||
2559 format == GL_BGRA || format == GL_RG);
2560 Texture& t = ctx->textures[fb->color_attachment];
2561 if (!t.buf) return;
2562 prepare_texture(t);
2563 // debugf("read pixels %d, %d, %d, %d from fb %d with format %x\n", x, y,
2564 // width, height, ctx->read_framebuffer_binding, t.internal_format);
2565 x -= t.offset.x;
2566 y -= t.offset.y;
2567 assert(x >= 0 && y >= 0);
2568 assert(x + width <= t.width);
2569 assert(y + height <= t.height);
2570 if (internal_format_for_data(format, type) != t.internal_format) {
2571 debugf("mismatched format for read pixels: %x vs %x\n", t.internal_format,
2572 internal_format_for_data(format, type));
2573 assert(false);
2574 return;
2575 }
2576 // Only support readback conversions that are reversible
2577 assert(!format_requires_conversion(format, t.internal_format) ||
2578 bytes_for_internal_format(format) == t.bpp());
2579 uint8_t* dest = (uint8_t*)data;
2580 size_t destStride = width * t.bpp();
2581 if (y < 0) {
2582 dest += -y * destStride;
2583 height += y;
2584 y = 0;
2585 }
2586 if (y + height > t.height) {
2587 height = t.height - y;
2588 }
2589 if (x < 0) {
2590 dest += -x * t.bpp();
2591 width += x;
2592 x = 0;
2593 }
2594 if (x + width > t.width) {
2595 width = t.width - x;
2596 }
2597 if (width <= 0 || height <= 0) {
2598 return;
2599 }
2600 convert_copy(format, t.internal_format, dest, destStride,
2601 (const uint8_t*)t.sample_ptr(x, y), t.stride(), width, height);
2602 }
2603
CopyImageSubData(GLuint srcName,GLenum srcTarget,UNUSED GLint srcLevel,GLint srcX,GLint srcY,GLint srcZ,GLuint dstName,GLenum dstTarget,UNUSED GLint dstLevel,GLint dstX,GLint dstY,GLint dstZ,GLsizei srcWidth,GLsizei srcHeight,GLsizei srcDepth)2604 void CopyImageSubData(GLuint srcName, GLenum srcTarget, UNUSED GLint srcLevel,
2605 GLint srcX, GLint srcY, GLint srcZ, GLuint dstName,
2606 GLenum dstTarget, UNUSED GLint dstLevel, GLint dstX,
2607 GLint dstY, GLint dstZ, GLsizei srcWidth,
2608 GLsizei srcHeight, GLsizei srcDepth) {
2609 assert(srcLevel == 0 && dstLevel == 0);
2610 assert(srcZ == 0 && srcDepth == 1 && dstZ == 0);
2611 if (srcTarget == GL_RENDERBUFFER) {
2612 Renderbuffer& rb = ctx->renderbuffers[srcName];
2613 srcName = rb.texture;
2614 }
2615 if (dstTarget == GL_RENDERBUFFER) {
2616 Renderbuffer& rb = ctx->renderbuffers[dstName];
2617 dstName = rb.texture;
2618 }
2619 Texture& srctex = ctx->textures[srcName];
2620 if (!srctex.buf) return;
2621 prepare_texture(srctex);
2622 Texture& dsttex = ctx->textures[dstName];
2623 if (!dsttex.buf) return;
2624 assert(!dsttex.locked);
2625 IntRect skip = {dstX, dstY, dstX + srcWidth, dstY + srcHeight};
2626 prepare_texture(dsttex, &skip);
2627 assert(srctex.internal_format == dsttex.internal_format);
2628 assert(srcWidth >= 0);
2629 assert(srcHeight >= 0);
2630 assert(srcX + srcWidth <= srctex.width);
2631 assert(srcY + srcHeight <= srctex.height);
2632 assert(dstX + srcWidth <= dsttex.width);
2633 assert(dstY + srcHeight <= dsttex.height);
2634 int bpp = srctex.bpp();
2635 int src_stride = srctex.stride();
2636 int dest_stride = dsttex.stride();
2637 char* dest = dsttex.sample_ptr(dstX, dstY);
2638 char* src = srctex.sample_ptr(srcX, srcY);
2639 for (int y = 0; y < srcHeight; y++) {
2640 memcpy(dest, src, srcWidth * bpp);
2641 dest += dest_stride;
2642 src += src_stride;
2643 }
2644 }
2645
CopyTexSubImage2D(GLenum target,UNUSED GLint level,GLint xoffset,GLint yoffset,GLint x,GLint y,GLsizei width,GLsizei height)2646 void CopyTexSubImage2D(GLenum target, UNUSED GLint level, GLint xoffset,
2647 GLint yoffset, GLint x, GLint y, GLsizei width,
2648 GLsizei height) {
2649 assert(level == 0);
2650 Framebuffer* fb = get_framebuffer(GL_READ_FRAMEBUFFER);
2651 if (!fb) return;
2652 CopyImageSubData(fb->color_attachment, GL_TEXTURE_2D, 0, x, y, 0,
2653 ctx->get_binding(target), GL_TEXTURE_2D, 0, xoffset, yoffset,
2654 0, width, height, 1);
2655 }
2656
2657 } // extern "C"
2658
2659 #include "blend.h"
2660 #include "composite.h"
2661 #include "swgl_ext.h"
2662
2663 #pragma GCC diagnostic push
2664 #pragma GCC diagnostic ignored "-Wuninitialized"
2665 #pragma GCC diagnostic ignored "-Wunused-function"
2666 #pragma GCC diagnostic ignored "-Wunused-parameter"
2667 #pragma GCC diagnostic ignored "-Wunused-variable"
2668 #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
2669 #ifdef __clang__
2670 # pragma GCC diagnostic ignored "-Wunused-private-field"
2671 #else
2672 # pragma GCC diagnostic ignored "-Wunused-but-set-variable"
2673 #endif
2674 #include "load_shader.h"
2675 #pragma GCC diagnostic pop
2676
2677 #include "rasterize.h"
2678
validate()2679 void VertexArray::validate() {
2680 int last_enabled = -1;
2681 for (int i = 0; i <= max_attrib; i++) {
2682 VertexAttrib& attr = attribs[i];
2683 if (attr.enabled) {
2684 // VertexArray &v = ctx->vertex_arrays[attr.vertex_array];
2685 Buffer& vertex_buf = ctx->buffers[attr.vertex_buffer];
2686 attr.buf = vertex_buf.buf;
2687 attr.buf_size = vertex_buf.size;
2688 // debugf("%d %x %d %d %d %d\n", i, attr.type, attr.size, attr.stride,
2689 // attr.offset, attr.divisor);
2690 last_enabled = i;
2691 }
2692 }
2693 max_attrib = last_enabled;
2694 }
2695
2696 extern "C" {
2697
DrawElementsInstanced(GLenum mode,GLsizei count,GLenum type,GLintptr offset,GLsizei instancecount)2698 void DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type,
2699 GLintptr offset, GLsizei instancecount) {
2700 if (offset < 0 || count <= 0 || instancecount <= 0 || !vertex_shader ||
2701 !fragment_shader) {
2702 return;
2703 }
2704
2705 Framebuffer& fb = *get_framebuffer(GL_DRAW_FRAMEBUFFER, true);
2706 if (!fb.color_attachment) {
2707 return;
2708 }
2709 Texture& colortex = ctx->textures[fb.color_attachment];
2710 if (!colortex.buf) {
2711 return;
2712 }
2713 assert(!colortex.locked);
2714 assert(colortex.internal_format == GL_RGBA8 ||
2715 colortex.internal_format == GL_R8);
2716 Texture& depthtex = ctx->textures[ctx->depthtest ? fb.depth_attachment : 0];
2717 if (depthtex.buf) {
2718 assert(depthtex.internal_format == GL_DEPTH_COMPONENT24);
2719 assert(colortex.width == depthtex.width &&
2720 colortex.height == depthtex.height);
2721 assert(colortex.offset == depthtex.offset);
2722 }
2723
2724 // debugf("current_vertex_array %d\n", ctx->current_vertex_array);
2725 // debugf("indices size: %d\n", indices_buf.size);
2726 VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
2727 if (ctx->validate_vertex_array) {
2728 ctx->validate_vertex_array = false;
2729 v.validate();
2730 }
2731
2732 #ifdef PRINT_TIMINGS
2733 uint64_t start = get_time_value();
2734 #endif
2735
2736 ctx->shaded_rows = 0;
2737 ctx->shaded_pixels = 0;
2738
2739 vertex_shader->init_batch();
2740
2741 switch (type) {
2742 case GL_UNSIGNED_SHORT:
2743 assert(mode == GL_TRIANGLES);
2744 draw_elements<uint16_t>(count, instancecount, offset, v, colortex,
2745 depthtex);
2746 break;
2747 case GL_UNSIGNED_INT:
2748 assert(mode == GL_TRIANGLES);
2749 draw_elements<uint32_t>(count, instancecount, offset, v, colortex,
2750 depthtex);
2751 break;
2752 case GL_NONE:
2753 // Non-standard GL extension - if element type is GL_NONE, then we don't
2754 // use any element buffer and behave as if DrawArrays was called instead.
2755 for (GLsizei instance = 0; instance < instancecount; instance++) {
2756 switch (mode) {
2757 case GL_LINES:
2758 for (GLsizei i = 0; i + 2 <= count; i += 2) {
2759 vertex_shader->load_attribs(v.attribs, offset + i, instance, 2);
2760 draw_quad(2, colortex, depthtex);
2761 }
2762 break;
2763 case GL_TRIANGLES:
2764 for (GLsizei i = 0; i + 3 <= count; i += 3) {
2765 vertex_shader->load_attribs(v.attribs, offset + i, instance, 3);
2766 draw_quad(3, colortex, depthtex);
2767 }
2768 break;
2769 default:
2770 assert(false);
2771 break;
2772 }
2773 }
2774 break;
2775 default:
2776 assert(false);
2777 break;
2778 }
2779
2780 if (ctx->samples_passed_query) {
2781 Query& q = ctx->queries[ctx->samples_passed_query];
2782 q.value += ctx->shaded_pixels;
2783 }
2784
2785 #ifdef PRINT_TIMINGS
2786 uint64_t end = get_time_value();
2787 printf(
2788 "%7.3fms draw(%s, %d): %d pixels in %d rows (avg %f pixels/row, "
2789 "%fns/pixel)\n",
2790 double(end - start) / (1000. * 1000.),
2791 ctx->programs[ctx->current_program].impl->get_name(), instancecount,
2792 ctx->shaded_pixels, ctx->shaded_rows,
2793 double(ctx->shaded_pixels) / ctx->shaded_rows,
2794 double(end - start) / max(ctx->shaded_pixels, 1));
2795 #endif
2796 }
2797
Finish()2798 void Finish() {
2799 #ifdef PRINT_TIMINGS
2800 printf("Finish\n");
2801 #endif
2802 }
2803
MakeCurrent(Context * c)2804 void MakeCurrent(Context* c) {
2805 if (ctx == c) {
2806 return;
2807 }
2808 ctx = c;
2809 setup_program(ctx ? ctx->current_program : 0);
2810 }
2811
CreateContext()2812 Context* CreateContext() { return new Context; }
2813
ReferenceContext(Context * c)2814 void ReferenceContext(Context* c) {
2815 if (!c) {
2816 return;
2817 }
2818 ++c->references;
2819 }
2820
DestroyContext(Context * c)2821 void DestroyContext(Context* c) {
2822 if (!c) {
2823 return;
2824 }
2825 assert(c->references > 0);
2826 --c->references;
2827 if (c->references > 0) {
2828 return;
2829 }
2830 if (ctx == c) {
2831 MakeCurrent(nullptr);
2832 }
2833 delete c;
2834 }
2835
ReportMemory(Context * ctx,size_t (* size_of_op)(void *))2836 size_t ReportMemory(Context* ctx, size_t (*size_of_op)(void*)) {
2837 size_t size = 0;
2838 if (ctx) {
2839 for (auto& t : ctx->textures) {
2840 if (t && t->should_free()) {
2841 size += size_of_op(t->buf);
2842 }
2843 }
2844 }
2845 return size;
2846 }
2847 } // extern "C"
2848