1 #pragma once 2 3 #include <thread> 4 #include <unordered_map> 5 #include <vector> 6 #include <functional> 7 #include <set> 8 #include <string> 9 #include <mutex> 10 #include <condition_variable> 11 12 #include "Common/GPU/OpenGL/GLCommon.h" 13 #include "Common/Data/Convert/SmallDataConvert.h" 14 #include "Common/Log.h" 15 #include "GLQueueRunner.h" 16 17 class GLRInputLayout; 18 class GLPushBuffer; 19 20 namespace Draw { 21 class DrawContext; 22 } 23 24 constexpr int MAX_GL_TEXTURE_SLOTS = 8; 25 26 class GLRTexture { 27 public: 28 GLRTexture(int width, int height, int numMips); 29 ~GLRTexture(); 30 31 GLuint texture = 0; 32 uint16_t w; 33 uint16_t h; 34 35 // We don't trust OpenGL defaults - setting wildly off values ensures that we'll end up overwriting these parameters. 36 GLenum target = 0xFFFF; 37 GLenum wrapS = 0xFFFF; 38 GLenum wrapT = 0xFFFF; 39 GLenum magFilter = 0xFFFF; 40 GLenum minFilter = 0xFFFF; 41 uint8_t numMips = 0; 42 bool canWrap = true; 43 float anisotropy = -100000.0f; 44 float minLod = -1000.0f; 45 float maxLod = 1000.0f; 46 float lodBias = 0.0f; 47 }; 48 49 class GLRFramebuffer { 50 public: GLRFramebuffer(int _width,int _height,bool z_stencil)51 GLRFramebuffer(int _width, int _height, bool z_stencil) 52 : color_texture(_width, _height, 1), z_stencil_texture(_width, _height, 1), 53 width(_width), height(_height), z_stencil_(z_stencil) { 54 } 55 56 ~GLRFramebuffer(); 57 58 int numShadows = 1; // TODO: Support this. 59 60 GLuint handle = 0; 61 GLRTexture color_texture; 62 // Either z_stencil_texture, z_stencil_buffer, or (z_buffer and stencil_buffer) are set. 63 GLuint z_stencil_buffer = 0; 64 GLRTexture z_stencil_texture; 65 GLuint z_buffer = 0; 66 GLuint stencil_buffer = 0; 67 68 int width; 69 int height; 70 GLuint colorDepth = 0; 71 72 bool z_stencil_; 73 }; 74 75 // We need to create some custom heap-allocated types so we can forward things that need to be created on the GL thread, before 76 // they've actually been created. 77 78 class GLRShader { 79 public: ~GLRShader()80 ~GLRShader() { 81 if (shader) { 82 glDeleteShader(shader); 83 } 84 } 85 86 GLuint shader = 0; 87 bool valid = false; 88 // Warning: Won't know until a future frame. 89 bool failed = false; 90 std::string desc; 91 std::string code; 92 std::string error; 93 }; 94 95 class GLRProgram { 96 public: ~GLRProgram()97 ~GLRProgram() { 98 if (program) { 99 glDeleteProgram(program); 100 } 101 } 102 struct Semantic { 103 int location; 104 const char *attrib; 105 }; 106 107 struct UniformLocQuery { 108 GLint *dest; 109 const char *name; 110 }; 111 112 struct Initializer { 113 GLint *uniform; 114 int type; 115 int value; 116 }; 117 118 GLuint program = 0; 119 std::vector<Semantic> semantics_; 120 std::vector<UniformLocQuery> queries_; 121 std::vector<Initializer> initialize_; 122 123 struct UniformInfo { 124 int loc_; 125 }; 126 127 // Must ONLY be called from GLQueueRunner! 128 // Also it's pretty slow... GetUniformLoc(const char * name)129 int GetUniformLoc(const char *name) { 130 auto iter = uniformCache_.find(std::string(name)); 131 int loc = -1; 132 if (iter != uniformCache_.end()) { 133 loc = iter->second.loc_; 134 } else { 135 loc = glGetUniformLocation(program, name); 136 UniformInfo info; 137 info.loc_ = loc; 138 uniformCache_[name] = info; 139 } 140 return loc; 141 } 142 std::unordered_map<std::string, UniformInfo> uniformCache_; 143 }; 144 145 enum class GLBufferStrategy { 146 SUBDATA = 0, 147 148 MASK_FLUSH = 0x10, 149 MASK_INVALIDATE = 0x20, 150 151 // Map/unmap the buffer each frame. 152 FRAME_UNMAP = 1, 153 // Map/unmap and also invalidate the buffer on map. 154 INVALIDATE_UNMAP = MASK_INVALIDATE, 155 // Map/unmap and explicitly flushed changed ranges. 156 FLUSH_UNMAP = MASK_FLUSH, 157 // Map/unmap, invalidate on map, and explicit flush. 158 FLUSH_INVALIDATE_UNMAP = MASK_FLUSH | MASK_INVALIDATE, 159 }; 160 161 static inline int operator &(const GLBufferStrategy &lhs, const GLBufferStrategy &rhs) { 162 return (int)lhs & (int)rhs; 163 } 164 165 class GLRBuffer { 166 public: GLRBuffer(GLuint target,size_t size)167 GLRBuffer(GLuint target, size_t size) : target_(target), size_((int)size) {} ~GLRBuffer()168 ~GLRBuffer() { 169 if (buffer_) { 170 glDeleteBuffers(1, &buffer_); 171 } 172 } 173 174 void *Map(GLBufferStrategy strategy); 175 bool Unmap(); 176 Mapped()177 bool Mapped() const { 178 return mapped_; 179 } 180 181 GLuint buffer_ = 0; 182 GLuint target_; 183 int size_; 184 185 private: 186 bool mapped_ = false; 187 bool hasStorage_ = false; 188 }; 189 190 class GLRenderManager; 191 192 // Similar to VulkanPushBuffer but is currently less efficient - it collects all the data in 193 // RAM then does a big memcpy/buffer upload at the end of the frame. This is at least a lot 194 // faster than the hundreds of buffer uploads or memory array buffers we used before. 195 // On modern GL we could avoid the copy using glBufferStorage but not sure it's worth the 196 // trouble. 197 // We need to manage the lifetime of this together with the other resources so its destructor 198 // runs on the render thread. 199 class GLPushBuffer { 200 public: 201 friend class GLRenderManager; 202 203 struct BufInfo { 204 GLRBuffer *buffer = nullptr; 205 uint8_t *localMemory = nullptr; 206 uint8_t *deviceMemory = nullptr; 207 size_t flushOffset = 0; 208 }; 209 210 GLPushBuffer(GLRenderManager *render, GLuint target, size_t size); 211 ~GLPushBuffer(); 212 Reset()213 void Reset() { offset_ = 0; } 214 215 private: 216 // Needs context in case of defragment. Begin()217 void Begin() { 218 buf_ = 0; 219 offset_ = 0; 220 // Note: we must defrag because some buffers may be smaller than size_. 221 Defragment(); 222 Map(); 223 _dbg_assert_(writePtr_); 224 } 225 BeginNoReset()226 void BeginNoReset() { 227 Map(); 228 } 229 End()230 void End() { 231 Unmap(); 232 } 233 234 public: 235 void Map(); 236 void Unmap(); 237 IsReady()238 bool IsReady() const { 239 return writePtr_ != nullptr; 240 } 241 242 // When using the returned memory, make sure to bind the returned vkbuf. 243 // This will later allow for handling overflow correctly. Allocate(size_t numBytes,GLRBuffer ** vkbuf)244 size_t Allocate(size_t numBytes, GLRBuffer **vkbuf) { 245 size_t out = offset_; 246 if (offset_ + ((numBytes + 3) & ~3) >= size_) { 247 NextBuffer(numBytes); 248 out = offset_; 249 offset_ += (numBytes + 3) & ~3; 250 } else { 251 offset_ += (numBytes + 3) & ~3; // Round up to 4 bytes. 252 } 253 *vkbuf = buffers_[buf_].buffer; 254 return out; 255 } 256 257 // Returns the offset that should be used when binding this buffer to get this data. Push(const void * data,size_t size,GLRBuffer ** vkbuf)258 size_t Push(const void *data, size_t size, GLRBuffer **vkbuf) { 259 _dbg_assert_(writePtr_); 260 size_t off = Allocate(size, vkbuf); 261 memcpy(writePtr_ + off, data, size); 262 return off; 263 } 264 PushAligned(const void * data,size_t size,int align,GLRBuffer ** vkbuf)265 uint32_t PushAligned(const void *data, size_t size, int align, GLRBuffer **vkbuf) { 266 _dbg_assert_(writePtr_); 267 offset_ = (offset_ + align - 1) & ~(align - 1); 268 size_t off = Allocate(size, vkbuf); 269 memcpy(writePtr_ + off, data, size); 270 return (uint32_t)off; 271 } 272 GetOffset()273 size_t GetOffset() const { 274 return offset_; 275 } 276 277 // "Zero-copy" variant - you can write the data directly as you compute it. 278 // Recommended. Push(size_t size,uint32_t * bindOffset,GLRBuffer ** vkbuf)279 void *Push(size_t size, uint32_t *bindOffset, GLRBuffer **vkbuf) { 280 _dbg_assert_(writePtr_); 281 size_t off = Allocate(size, vkbuf); 282 *bindOffset = (uint32_t)off; 283 return writePtr_ + off; 284 } PushAligned(size_t size,uint32_t * bindOffset,GLRBuffer ** vkbuf,int align)285 void *PushAligned(size_t size, uint32_t *bindOffset, GLRBuffer **vkbuf, int align) { 286 _dbg_assert_(writePtr_); 287 offset_ = (offset_ + align - 1) & ~(align - 1); 288 size_t off = Allocate(size, vkbuf); 289 *bindOffset = (uint32_t)off; 290 return writePtr_ + off; 291 } 292 293 size_t GetTotalSize() const; 294 295 void Destroy(bool onRenderThread); 296 void Flush(); 297 298 protected: 299 void MapDevice(GLBufferStrategy strategy); 300 void UnmapDevice(); 301 302 private: 303 bool AddBuffer(); 304 void NextBuffer(size_t minSize); 305 void Defragment(); 306 307 GLRenderManager *render_; 308 std::vector<BufInfo> buffers_; 309 size_t buf_ = 0; 310 size_t offset_ = 0; 311 size_t size_ = 0; 312 uint8_t *writePtr_ = nullptr; 313 GLuint target_; 314 GLBufferStrategy strategy_ = GLBufferStrategy::SUBDATA; 315 }; 316 317 enum class GLRRunType { 318 END, 319 SYNC, 320 }; 321 322 class GLDeleter { 323 public: 324 void Perform(GLRenderManager *renderManager, bool skipGLCalls); 325 IsEmpty()326 bool IsEmpty() const { 327 return shaders.empty() && programs.empty() && buffers.empty() && textures.empty() && inputLayouts.empty() && framebuffers.empty() && pushBuffers.empty(); 328 } 329 330 void Take(GLDeleter &other); 331 332 std::vector<GLRShader *> shaders; 333 std::vector<GLRProgram *> programs; 334 std::vector<GLRBuffer *> buffers; 335 std::vector<GLRTexture *> textures; 336 std::vector<GLRInputLayout *> inputLayouts; 337 std::vector<GLRFramebuffer *> framebuffers; 338 std::vector<GLPushBuffer *> pushBuffers; 339 }; 340 341 class GLRInputLayout { 342 public: 343 struct Entry { 344 int location; 345 int count; 346 GLenum type; 347 GLboolean normalized; 348 int stride; 349 intptr_t offset; 350 }; 351 std::vector<Entry> entries; 352 int semanticsMask_ = 0; 353 }; 354 355 // Note: The GLRenderManager is created and destroyed on the render thread, and the latter 356 // happens after the emu thread has been destroyed. Therefore, it's safe to run wild deleting stuff 357 // directly in the destructor. 358 class GLRenderManager { 359 public: 360 GLRenderManager(); 361 ~GLRenderManager(); 362 SetErrorCallback(ErrorCallbackFn callback,void * userdata)363 void SetErrorCallback(ErrorCallbackFn callback, void *userdata) { 364 queueRunner_.SetErrorCallback(callback, userdata); 365 } 366 367 void ThreadStart(Draw::DrawContext *draw); 368 void ThreadEnd(); 369 bool ThreadFrame(); // Returns false to request exiting the loop. 370 371 // Makes sure that the GPU has caught up enough that we can start writing buffers of this frame again. 372 void BeginFrame(); 373 // Can run on a different thread! 374 void Finish(); 375 void Run(int frame); 376 377 // Zaps queued up commands. Use if you know there's a risk you've queued up stuff that has already been deleted. Can happen during in-game shutdown. 378 void Wipe(); 379 380 // Wait until no frames are pending. Use during shutdown before freeing pointers. 381 void WaitUntilQueueIdle(); 382 383 // Creation commands. These were not needed in Vulkan since there we can do that on the main thread. 384 // We pass in width/height here even though it's not strictly needed until we support glTextureStorage 385 // and then we'll also need formats and stuff. CreateTexture(GLenum target,int width,int height,int numMips)386 GLRTexture *CreateTexture(GLenum target, int width, int height, int numMips) { 387 GLRInitStep step{ GLRInitStepType::CREATE_TEXTURE }; 388 step.create_texture.texture = new GLRTexture(width, height, numMips); 389 step.create_texture.texture->target = target; 390 initSteps_.push_back(step); 391 return step.create_texture.texture; 392 } 393 CreateBuffer(GLuint target,size_t size,GLuint usage)394 GLRBuffer *CreateBuffer(GLuint target, size_t size, GLuint usage) { 395 GLRInitStep step{ GLRInitStepType::CREATE_BUFFER }; 396 step.create_buffer.buffer = new GLRBuffer(target, size); 397 step.create_buffer.size = (int)size; 398 step.create_buffer.usage = usage; 399 initSteps_.push_back(step); 400 return step.create_buffer.buffer; 401 } 402 CreateShader(GLuint stage,const std::string & code,const std::string & desc)403 GLRShader *CreateShader(GLuint stage, const std::string &code, const std::string &desc) { 404 GLRInitStep step{ GLRInitStepType::CREATE_SHADER }; 405 step.create_shader.shader = new GLRShader(); 406 step.create_shader.shader->desc = desc; 407 step.create_shader.stage = stage; 408 step.create_shader.code = new char[code.size() + 1]; 409 memcpy(step.create_shader.code, code.data(), code.size() + 1); 410 initSteps_.push_back(step); 411 return step.create_shader.shader; 412 } 413 CreateFramebuffer(int width,int height,bool z_stencil)414 GLRFramebuffer *CreateFramebuffer(int width, int height, bool z_stencil) { 415 GLRInitStep step{ GLRInitStepType::CREATE_FRAMEBUFFER }; 416 step.create_framebuffer.framebuffer = new GLRFramebuffer(width, height, z_stencil); 417 initSteps_.push_back(step); 418 return step.create_framebuffer.framebuffer; 419 } 420 421 // Can't replace uniform initializers with direct calls to SetUniform() etc because there might 422 // not be an active render pass. CreateProgram(std::vector<GLRShader * > shaders,std::vector<GLRProgram::Semantic> semantics,std::vector<GLRProgram::UniformLocQuery> queries,std::vector<GLRProgram::Initializer> initalizers,bool supportDualSource)423 GLRProgram *CreateProgram( 424 std::vector<GLRShader *> shaders, std::vector<GLRProgram::Semantic> semantics, std::vector<GLRProgram::UniformLocQuery> queries, 425 std::vector<GLRProgram::Initializer> initalizers, bool supportDualSource) { 426 GLRInitStep step{ GLRInitStepType::CREATE_PROGRAM }; 427 _assert_(shaders.size() <= ARRAY_SIZE(step.create_program.shaders)); 428 step.create_program.program = new GLRProgram(); 429 step.create_program.program->semantics_ = semantics; 430 step.create_program.program->queries_ = queries; 431 step.create_program.program->initialize_ = initalizers; 432 step.create_program.support_dual_source = supportDualSource; 433 _assert_msg_(shaders.size() > 0, "Can't create a program with zero shaders"); 434 for (size_t i = 0; i < shaders.size(); i++) { 435 step.create_program.shaders[i] = shaders[i]; 436 } 437 #ifdef _DEBUG 438 for (auto &iter : queries) { 439 _dbg_assert_(iter.name); 440 } 441 for (auto &sem : semantics) { 442 _dbg_assert_(sem.attrib); 443 } 444 #endif 445 step.create_program.num_shaders = (int)shaders.size(); 446 initSteps_.push_back(step); 447 return step.create_program.program; 448 } 449 CreateInputLayout(std::vector<GLRInputLayout::Entry> & entries)450 GLRInputLayout *CreateInputLayout(std::vector<GLRInputLayout::Entry> &entries) { 451 GLRInitStep step{ GLRInitStepType::CREATE_INPUT_LAYOUT }; 452 step.create_input_layout.inputLayout = new GLRInputLayout(); 453 step.create_input_layout.inputLayout->entries = entries; 454 for (auto &iter : step.create_input_layout.inputLayout->entries) { 455 step.create_input_layout.inputLayout->semanticsMask_ |= 1 << iter.location; 456 } 457 initSteps_.push_back(step); 458 return step.create_input_layout.inputLayout; 459 } 460 CreatePushBuffer(int frame,GLuint target,size_t size)461 GLPushBuffer *CreatePushBuffer(int frame, GLuint target, size_t size) { 462 GLPushBuffer *push = new GLPushBuffer(this, target, size); 463 RegisterPushBuffer(frame, push); 464 return push; 465 } 466 DeleteShader(GLRShader * shader)467 void DeleteShader(GLRShader *shader) { 468 deleter_.shaders.push_back(shader); 469 } DeleteProgram(GLRProgram * program)470 void DeleteProgram(GLRProgram *program) { 471 deleter_.programs.push_back(program); 472 } DeleteBuffer(GLRBuffer * buffer)473 void DeleteBuffer(GLRBuffer *buffer) { 474 deleter_.buffers.push_back(buffer); 475 } DeleteTexture(GLRTexture * texture)476 void DeleteTexture(GLRTexture *texture) { 477 deleter_.textures.push_back(texture); 478 } DeleteInputLayout(GLRInputLayout * inputLayout)479 void DeleteInputLayout(GLRInputLayout *inputLayout) { 480 deleter_.inputLayouts.push_back(inputLayout); 481 } DeleteFramebuffer(GLRFramebuffer * framebuffer)482 void DeleteFramebuffer(GLRFramebuffer *framebuffer) { 483 deleter_.framebuffers.push_back(framebuffer); 484 } DeletePushBuffer(GLPushBuffer * pushbuffer)485 void DeletePushBuffer(GLPushBuffer *pushbuffer) { 486 deleter_.pushBuffers.push_back(pushbuffer); 487 } 488 BeginPushBuffer(GLPushBuffer * pushbuffer)489 void BeginPushBuffer(GLPushBuffer *pushbuffer) { 490 pushbuffer->Begin(); 491 } 492 EndPushBuffer(GLPushBuffer * pushbuffer)493 void EndPushBuffer(GLPushBuffer *pushbuffer) { 494 pushbuffer->End(); 495 } 496 497 // This starts a new step (like a "render pass" in Vulkan). 498 // 499 // After a "CopyFramebuffer" or the other functions that start "steps", you need to call this beforce 500 // making any new render state changes or draw calls. 501 // 502 // The following state needs to be reset by the caller after calling this (and will thus not safely carry over from 503 // the previous one): 504 // * Viewport/Scissor 505 // * Depth/stencil 506 // * Blend 507 // * Raster state like primitive, culling, etc. 508 // 509 // It can be useful to use GetCurrentStepId() to figure out when you need to send all this state again, if you're 510 // not keeping track of your calls to this function on your own. 511 void BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRenderPassAction color, GLRRenderPassAction depth, GLRRenderPassAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag); 512 513 // Binds a framebuffer as a texture, for the following draws. 514 void BindFramebufferAsTexture(GLRFramebuffer *fb, int binding, int aspectBit, int attachment); 515 516 bool CopyFramebufferToMemorySync(GLRFramebuffer *src, int aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag); 517 void CopyImageToMemorySync(GLRTexture *texture, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag); 518 519 void CopyFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLOffset2D dstPos, int aspectMask, const char *tag); 520 void BlitFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLRect2D dstRect, int aspectMask, bool filter, const char *tag); 521 522 // Takes ownership of data if deleteData = true. 523 void BufferSubdata(GLRBuffer *buffer, size_t offset, size_t size, uint8_t *data, bool deleteData = true) { 524 // TODO: Maybe should be a render command instead of an init command? When possible it's better as 525 // an init command, that's for sure. 526 GLRInitStep step{ GLRInitStepType::BUFFER_SUBDATA }; 527 _dbg_assert_(offset >= 0); 528 _dbg_assert_(offset <= buffer->size_ - size); 529 step.buffer_subdata.buffer = buffer; 530 step.buffer_subdata.offset = (int)offset; 531 step.buffer_subdata.size = (int)size; 532 step.buffer_subdata.data = data; 533 step.buffer_subdata.deleteData = deleteData; 534 initSteps_.push_back(step); 535 } 536 537 // Takes ownership over the data pointer and delete[]-s it. 538 void TextureImage(GLRTexture *texture, int level, int width, int height, Draw::DataFormat format, uint8_t *data, GLRAllocType allocType = GLRAllocType::NEW, bool linearFilter = false) { 539 GLRInitStep step{ GLRInitStepType::TEXTURE_IMAGE }; 540 step.texture_image.texture = texture; 541 step.texture_image.data = data; 542 step.texture_image.format = format; 543 step.texture_image.level = level; 544 step.texture_image.width = width; 545 step.texture_image.height = height; 546 step.texture_image.allocType = allocType; 547 step.texture_image.linearFilter = linearFilter; 548 initSteps_.push_back(step); 549 } 550 551 void TextureSubImage(GLRTexture *texture, int level, int x, int y, int width, int height, Draw::DataFormat format, uint8_t *data, GLRAllocType allocType = GLRAllocType::NEW) { 552 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 553 GLRRenderData _data{ GLRRenderCommand::TEXTURE_SUBIMAGE }; 554 _data.texture_subimage.texture = texture; 555 _data.texture_subimage.data = data; 556 _data.texture_subimage.format = format; 557 _data.texture_subimage.level = level; 558 _data.texture_subimage.x = x; 559 _data.texture_subimage.y = y; 560 _data.texture_subimage.width = width; 561 _data.texture_subimage.height = height; 562 _data.texture_subimage.allocType = allocType; 563 curRenderStep_->commands.push_back(_data); 564 } 565 FinalizeTexture(GLRTexture * texture,int maxLevels,bool genMips)566 void FinalizeTexture(GLRTexture *texture, int maxLevels, bool genMips) { 567 GLRInitStep step{ GLRInitStepType::TEXTURE_FINALIZE }; 568 step.texture_finalize.texture = texture; 569 step.texture_finalize.maxLevel = maxLevels; 570 step.texture_finalize.genMips = genMips; 571 initSteps_.push_back(step); 572 } 573 BindTexture(int slot,GLRTexture * tex)574 void BindTexture(int slot, GLRTexture *tex) { 575 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 576 _dbg_assert_(slot < MAX_GL_TEXTURE_SLOTS); 577 GLRRenderData data{ GLRRenderCommand::BINDTEXTURE }; 578 data.texture.slot = slot; 579 data.texture.texture = tex; 580 curRenderStep_->commands.push_back(data); 581 } 582 BindProgram(GLRProgram * program)583 void BindProgram(GLRProgram *program) { 584 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 585 GLRRenderData data{ GLRRenderCommand::BINDPROGRAM }; 586 _dbg_assert_(program != nullptr); 587 data.program.program = program; 588 curRenderStep_->commands.push_back(data); 589 #ifdef _DEBUG 590 curProgram_ = program; 591 #endif 592 } 593 BindPixelPackBuffer(GLRBuffer * buffer)594 void BindPixelPackBuffer(GLRBuffer *buffer) { // Want to support an offset but can't in ES 2.0. We supply an offset when binding the buffers instead. 595 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 596 GLRRenderData data{ GLRRenderCommand::BIND_BUFFER }; 597 data.bind_buffer.buffer = buffer; 598 data.bind_buffer.target = GL_PIXEL_PACK_BUFFER; 599 curRenderStep_->commands.push_back(data); 600 } 601 BindIndexBuffer(GLRBuffer * buffer)602 void BindIndexBuffer(GLRBuffer *buffer) { // Want to support an offset but can't in ES 2.0. We supply an offset when binding the buffers instead. 603 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 604 GLRRenderData data{ GLRRenderCommand::BIND_BUFFER}; 605 data.bind_buffer.buffer = buffer; 606 data.bind_buffer.target = GL_ELEMENT_ARRAY_BUFFER; 607 curRenderStep_->commands.push_back(data); 608 } 609 BindVertexBuffer(GLRInputLayout * inputLayout,GLRBuffer * buffer,size_t offset)610 void BindVertexBuffer(GLRInputLayout *inputLayout, GLRBuffer *buffer, size_t offset) { 611 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 612 _dbg_assert_(inputLayout); 613 GLRRenderData data{ GLRRenderCommand::BIND_VERTEX_BUFFER }; 614 data.bindVertexBuffer.inputLayout = inputLayout; 615 data.bindVertexBuffer.offset = offset; 616 data.bindVertexBuffer.buffer = buffer; 617 curRenderStep_->commands.push_back(data); 618 } 619 SetDepth(bool enabled,bool write,GLenum func)620 void SetDepth(bool enabled, bool write, GLenum func) { 621 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 622 GLRRenderData data{ GLRRenderCommand::DEPTH }; 623 data.depth.enabled = enabled; 624 data.depth.write = write; 625 data.depth.func = func; 626 curRenderStep_->commands.push_back(data); 627 } 628 SetViewport(const GLRViewport & vp)629 void SetViewport(const GLRViewport &vp) { 630 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 631 GLRRenderData data{ GLRRenderCommand::VIEWPORT }; 632 data.viewport.vp = vp; 633 curRenderStep_->commands.push_back(data); 634 } 635 SetScissor(const GLRect2D & rc)636 void SetScissor(const GLRect2D &rc) { 637 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 638 GLRRenderData data{ GLRRenderCommand::SCISSOR }; 639 data.scissor.rc = rc; 640 curRenderStep_->commands.push_back(data); 641 } 642 SetUniformI(const GLint * loc,int count,const int * udata)643 void SetUniformI(const GLint *loc, int count, const int *udata) { 644 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 645 #ifdef _DEBUG 646 _dbg_assert_(curProgram_); 647 #endif 648 GLRRenderData data{ GLRRenderCommand::UNIFORM4I }; 649 data.uniform4.loc = loc; 650 data.uniform4.count = count; 651 memcpy(data.uniform4.v, udata, sizeof(int) * count); 652 curRenderStep_->commands.push_back(data); 653 } 654 SetUniformI1(const GLint * loc,int udata)655 void SetUniformI1(const GLint *loc, int udata) { 656 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 657 #ifdef _DEBUG 658 _dbg_assert_(curProgram_); 659 #endif 660 GLRRenderData data{ GLRRenderCommand::UNIFORM4I }; 661 data.uniform4.loc = loc; 662 data.uniform4.count = 1; 663 memcpy(data.uniform4.v, &udata, sizeof(udata)); 664 curRenderStep_->commands.push_back(data); 665 } 666 SetUniformUI(const GLint * loc,int count,const uint32_t * udata)667 void SetUniformUI(const GLint *loc, int count, const uint32_t *udata) { 668 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 669 #ifdef _DEBUG 670 _dbg_assert_(curProgram_); 671 #endif 672 GLRRenderData data{ GLRRenderCommand::UNIFORM4UI }; 673 data.uniform4.loc = loc; 674 data.uniform4.count = count; 675 memcpy(data.uniform4.v, udata, sizeof(uint32_t) * count); 676 curRenderStep_->commands.push_back(data); 677 } 678 SetUniformUI1(const GLint * loc,uint32_t udata)679 void SetUniformUI1(const GLint *loc, uint32_t udata) { 680 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 681 #ifdef _DEBUG 682 _dbg_assert_(curProgram_); 683 #endif 684 GLRRenderData data{ GLRRenderCommand::UNIFORM4UI }; 685 data.uniform4.loc = loc; 686 data.uniform4.count = 1; 687 memcpy(data.uniform4.v, &udata, sizeof(udata)); 688 curRenderStep_->commands.push_back(data); 689 } 690 SetUniformF(const GLint * loc,int count,const float * udata)691 void SetUniformF(const GLint *loc, int count, const float *udata) { 692 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 693 #ifdef _DEBUG 694 _dbg_assert_(curProgram_); 695 #endif 696 GLRRenderData data{ GLRRenderCommand::UNIFORM4F }; 697 data.uniform4.loc = loc; 698 data.uniform4.count = count; 699 memcpy(data.uniform4.v, udata, sizeof(float) * count); 700 curRenderStep_->commands.push_back(data); 701 } 702 SetUniformF1(const GLint * loc,const float udata)703 void SetUniformF1(const GLint *loc, const float udata) { 704 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 705 #ifdef _DEBUG 706 _dbg_assert_(curProgram_); 707 #endif 708 GLRRenderData data{ GLRRenderCommand::UNIFORM4F }; 709 data.uniform4.loc = loc; 710 data.uniform4.count = 1; 711 memcpy(data.uniform4.v, &udata, sizeof(float)); 712 curRenderStep_->commands.push_back(data); 713 } 714 SetUniformF(const char * name,int count,const float * udata)715 void SetUniformF(const char *name, int count, const float *udata) { 716 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 717 #ifdef _DEBUG 718 _dbg_assert_(curProgram_); 719 #endif 720 GLRRenderData data{ GLRRenderCommand::UNIFORM4F }; 721 data.uniform4.name = name; 722 data.uniform4.count = count; 723 memcpy(data.uniform4.v, udata, sizeof(float) * count); 724 curRenderStep_->commands.push_back(data); 725 } 726 SetUniformM4x4(const GLint * loc,const float * udata)727 void SetUniformM4x4(const GLint *loc, const float *udata) { 728 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 729 #ifdef _DEBUG 730 _dbg_assert_(curProgram_); 731 #endif 732 GLRRenderData data{ GLRRenderCommand::UNIFORMMATRIX }; 733 data.uniformMatrix4.loc = loc; 734 memcpy(data.uniformMatrix4.m, udata, sizeof(float) * 16); 735 curRenderStep_->commands.push_back(data); 736 } 737 SetUniformM4x4(const char * name,const float * udata)738 void SetUniformM4x4(const char *name, const float *udata) { 739 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 740 #ifdef _DEBUG 741 _dbg_assert_(curProgram_); 742 #endif 743 GLRRenderData data{ GLRRenderCommand::UNIFORMMATRIX }; 744 data.uniformMatrix4.name = name; 745 memcpy(data.uniformMatrix4.m, udata, sizeof(float) * 16); 746 curRenderStep_->commands.push_back(data); 747 } 748 SetBlendAndMask(int colorMask,bool blendEnabled,GLenum srcColor,GLenum dstColor,GLenum srcAlpha,GLenum dstAlpha,GLenum funcColor,GLenum funcAlpha)749 void SetBlendAndMask(int colorMask, bool blendEnabled, GLenum srcColor, GLenum dstColor, GLenum srcAlpha, GLenum dstAlpha, GLenum funcColor, GLenum funcAlpha) { 750 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 751 GLRRenderData data{ GLRRenderCommand::BLEND }; 752 data.blend.mask = colorMask; 753 data.blend.enabled = blendEnabled; 754 data.blend.srcColor = srcColor; 755 data.blend.dstColor = dstColor; 756 data.blend.srcAlpha = srcAlpha; 757 data.blend.dstAlpha = dstAlpha; 758 data.blend.funcColor = funcColor; 759 data.blend.funcAlpha = funcAlpha; 760 curRenderStep_->commands.push_back(data); 761 } 762 SetNoBlendAndMask(int colorMask)763 void SetNoBlendAndMask(int colorMask) { 764 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 765 GLRRenderData data{ GLRRenderCommand::BLEND }; 766 data.blend.mask = colorMask; 767 data.blend.enabled = false; 768 curRenderStep_->commands.push_back(data); 769 } 770 771 #ifndef USING_GLES2 SetLogicOp(bool enabled,GLenum logicOp)772 void SetLogicOp(bool enabled, GLenum logicOp) { 773 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 774 GLRRenderData data{ GLRRenderCommand::LOGICOP }; 775 data.logic.enabled = enabled; 776 data.logic.logicOp = logicOp; 777 curRenderStep_->commands.push_back(data); 778 } 779 #endif 780 SetStencilFunc(bool enabled,GLenum func,uint8_t refValue,uint8_t compareMask)781 void SetStencilFunc(bool enabled, GLenum func, uint8_t refValue, uint8_t compareMask) { 782 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 783 GLRRenderData data{ GLRRenderCommand::STENCILFUNC }; 784 data.stencilFunc.enabled = enabled; 785 data.stencilFunc.func = func; 786 data.stencilFunc.ref = refValue; 787 data.stencilFunc.compareMask = compareMask; 788 curRenderStep_->commands.push_back(data); 789 } 790 SetStencilOp(uint8_t writeMask,GLenum sFail,GLenum zFail,GLenum pass)791 void SetStencilOp(uint8_t writeMask, GLenum sFail, GLenum zFail, GLenum pass) { 792 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 793 GLRRenderData data{ GLRRenderCommand::STENCILOP }; 794 data.stencilOp.writeMask = writeMask; 795 data.stencilOp.sFail = sFail; 796 data.stencilOp.zFail = zFail; 797 data.stencilOp.pass = pass; 798 curRenderStep_->commands.push_back(data); 799 } 800 SetStencilDisabled()801 void SetStencilDisabled() { 802 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 803 GLRRenderData data; 804 data.cmd = GLRRenderCommand::STENCILFUNC; 805 data.stencilFunc.enabled = false; 806 curRenderStep_->commands.push_back(data); 807 } 808 SetBlendFactor(const float color[4])809 void SetBlendFactor(const float color[4]) { 810 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 811 GLRRenderData data{ GLRRenderCommand::BLENDCOLOR }; 812 CopyFloat4(data.blendColor.color, color); 813 curRenderStep_->commands.push_back(data); 814 } 815 SetRaster(GLboolean cullEnable,GLenum frontFace,GLenum cullFace,GLboolean ditherEnable,GLboolean depthClamp)816 void SetRaster(GLboolean cullEnable, GLenum frontFace, GLenum cullFace, GLboolean ditherEnable, GLboolean depthClamp) { 817 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 818 GLRRenderData data{ GLRRenderCommand::RASTER }; 819 data.raster.cullEnable = cullEnable; 820 data.raster.frontFace = frontFace; 821 data.raster.cullFace = cullFace; 822 data.raster.ditherEnable = ditherEnable; 823 data.raster.depthClampEnable = depthClamp; 824 curRenderStep_->commands.push_back(data); 825 } 826 827 // Modifies the current texture as per GL specs, not global state. SetTextureSampler(int slot,GLenum wrapS,GLenum wrapT,GLenum magFilter,GLenum minFilter,float anisotropy)828 void SetTextureSampler(int slot, GLenum wrapS, GLenum wrapT, GLenum magFilter, GLenum minFilter, float anisotropy) { 829 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 830 _dbg_assert_(slot < MAX_GL_TEXTURE_SLOTS); 831 GLRRenderData data{ GLRRenderCommand::TEXTURESAMPLER }; 832 data.textureSampler.slot = slot; 833 data.textureSampler.wrapS = wrapS; 834 data.textureSampler.wrapT = wrapT; 835 data.textureSampler.magFilter = magFilter; 836 data.textureSampler.minFilter = minFilter; 837 data.textureSampler.anisotropy = anisotropy; 838 curRenderStep_->commands.push_back(data); 839 } 840 SetTextureLod(int slot,float minLod,float maxLod,float lodBias)841 void SetTextureLod(int slot, float minLod, float maxLod, float lodBias) { 842 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 843 _dbg_assert_(slot < MAX_GL_TEXTURE_SLOTS); 844 GLRRenderData data{ GLRRenderCommand::TEXTURELOD}; 845 data.textureLod.slot = slot; 846 data.textureLod.minLod = minLod; 847 data.textureLod.maxLod = maxLod; 848 data.textureLod.lodBias = lodBias; 849 curRenderStep_->commands.push_back(data); 850 } 851 852 // If scissorW == 0, no scissor is applied (the whole render target is cleared). Clear(uint32_t clearColor,float clearZ,int clearStencil,int clearMask,int colorMask,int scissorX,int scissorY,int scissorW,int scissorH)853 void Clear(uint32_t clearColor, float clearZ, int clearStencil, int clearMask, int colorMask, int scissorX, int scissorY, int scissorW, int scissorH) { 854 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 855 if (!clearMask) 856 return; 857 GLRRenderData data{ GLRRenderCommand::CLEAR }; 858 data.clear.clearMask = clearMask; 859 data.clear.clearColor = clearColor; 860 data.clear.clearZ = clearZ; 861 data.clear.clearStencil = clearStencil; 862 data.clear.colorMask = colorMask; 863 data.clear.scissorX = scissorX; 864 data.clear.scissorY = scissorY; 865 data.clear.scissorW = scissorW; 866 data.clear.scissorH = scissorH; 867 curRenderStep_->commands.push_back(data); 868 } 869 Invalidate(int invalidateMask)870 void Invalidate(int invalidateMask) { 871 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 872 GLRRenderData data{ GLRRenderCommand::INVALIDATE }; 873 data.clear.clearMask = invalidateMask; 874 curRenderStep_->commands.push_back(data); 875 } 876 Draw(GLenum mode,int first,int count)877 void Draw(GLenum mode, int first, int count) { 878 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 879 GLRRenderData data{ GLRRenderCommand::DRAW }; 880 data.draw.mode = mode; 881 data.draw.first = first; 882 data.draw.count = count; 883 data.draw.buffer = 0; 884 curRenderStep_->commands.push_back(data); 885 curRenderStep_->render.numDraws++; 886 } 887 888 void DrawIndexed(GLenum mode, int count, GLenum indexType, void *indices, int instances = 1) { 889 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); 890 GLRRenderData data{ GLRRenderCommand::DRAW_INDEXED }; 891 data.drawIndexed.mode = mode; 892 data.drawIndexed.count = count; 893 data.drawIndexed.indexType = indexType; 894 data.drawIndexed.instances = instances; 895 data.drawIndexed.indices = indices; 896 curRenderStep_->commands.push_back(data); 897 curRenderStep_->render.numDraws++; 898 } 899 900 enum { MAX_INFLIGHT_FRAMES = 3 }; 901 SetInflightFrames(int f)902 void SetInflightFrames(int f) { 903 newInflightFrames_ = f < 1 || f > MAX_INFLIGHT_FRAMES ? MAX_INFLIGHT_FRAMES : f; 904 } 905 GetCurFrame()906 int GetCurFrame() const { 907 return curFrame_; 908 } 909 Resize(int width,int height)910 void Resize(int width, int height) { 911 targetWidth_ = width; 912 targetHeight_ = height; 913 queueRunner_.Resize(width, height); 914 } 915 UnregisterPushBuffer(GLPushBuffer * buffer)916 void UnregisterPushBuffer(GLPushBuffer *buffer) { 917 int foundCount = 0; 918 for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) { 919 auto iter = frameData_[i].activePushBuffers.find(buffer); 920 if (iter != frameData_[i].activePushBuffers.end()) { 921 frameData_[i].activePushBuffers.erase(iter); 922 foundCount++; 923 } 924 } 925 _dbg_assert_(foundCount == 1); 926 } 927 SetSwapFunction(std::function<void ()> swapFunction)928 void SetSwapFunction(std::function<void()> swapFunction) { 929 swapFunction_ = swapFunction; 930 } 931 SetSwapIntervalFunction(std::function<void (int)> swapIntervalFunction)932 void SetSwapIntervalFunction(std::function<void(int)> swapIntervalFunction) { 933 swapIntervalFunction_ = swapIntervalFunction; 934 } 935 SwapInterval(int interval)936 void SwapInterval(int interval) { 937 if (interval != swapInterval_) { 938 swapInterval_ = interval; 939 swapIntervalChanged_ = true; 940 } 941 } 942 943 void StopThread(); 944 SawOutOfMemory()945 bool SawOutOfMemory() { 946 return queueRunner_.SawOutOfMemory(); 947 } 948 949 // Only supports a common subset. GetGLString(int name)950 std::string GetGLString(int name) const { 951 return queueRunner_.GetGLString(name); 952 } 953 954 // Used during Android-style ugly shutdown. No need to have a way to set it back because we'll be 955 // destroyed. SetSkipGLCalls()956 void SetSkipGLCalls() { 957 skipGLCalls_ = true; 958 } 959 960 // Gets a frame-unique ID of the current step being recorded. Can be used to figure out 961 // when the current step has changed, which means the caller will need to re-record its state. GetCurrentStepId()962 int GetCurrentStepId() const { 963 return renderStepOffset_ + (int)steps_.size(); 964 } 965 966 private: 967 void BeginSubmitFrame(int frame); 968 void EndSubmitFrame(int frame); 969 void Submit(int frame, bool triggerFence); 970 971 // Bad for performance but sometimes necessary for synchronous CPU readbacks (screenshots and whatnot). 972 void FlushSync(); 973 void EndSyncFrame(int frame); 974 975 // When using legacy functionality for push buffers (glBufferData), we need to flush them 976 // before actually making the glDraw* calls. It's best if the render manager handles that. RegisterPushBuffer(int frame,GLPushBuffer * buffer)977 void RegisterPushBuffer(int frame, GLPushBuffer *buffer) { 978 frameData_[frame].activePushBuffers.insert(buffer); 979 } 980 981 // Per-frame data, round-robin so we can overlap submission with execution of the previous frame. 982 struct FrameData { 983 std::mutex push_mutex; 984 std::condition_variable push_condVar; 985 986 std::mutex pull_mutex; 987 std::condition_variable pull_condVar; 988 989 bool readyForFence = true; 990 bool readyForRun = false; 991 bool readyForSubmit = false; 992 bool skipSwap = false; 993 GLRRunType type = GLRRunType::END; 994 995 // GLuint fence; For future AZDO stuff? 996 std::vector<GLRStep *> steps; 997 std::vector<GLRInitStep> initSteps; 998 999 // Swapchain. 1000 bool hasBegun = false; 1001 uint32_t curSwapchainImage = -1; 1002 1003 GLDeleter deleter; 1004 GLDeleter deleter_prev; 1005 std::set<GLPushBuffer *> activePushBuffers; 1006 }; 1007 1008 FrameData frameData_[MAX_INFLIGHT_FRAMES]; 1009 1010 // Submission time state 1011 bool insideFrame_ = false; 1012 // This is the offset within this frame, in case of a mid-frame sync. 1013 int renderStepOffset_ = 0; 1014 GLRStep *curRenderStep_ = nullptr; 1015 std::vector<GLRStep *> steps_; 1016 std::vector<GLRInitStep> initSteps_; 1017 1018 // Execution time state 1019 bool run_ = true; 1020 // Thread is managed elsewhere, and should call ThreadFrame. 1021 std::mutex mutex_; 1022 int threadInitFrame_ = 0; 1023 GLQueueRunner queueRunner_; 1024 1025 // Thread state 1026 int threadFrame_ = -1; 1027 1028 bool nextFrame = false; 1029 bool firstFrame = true; 1030 1031 GLDeleter deleter_; 1032 bool skipGLCalls_ = false; 1033 1034 int curFrame_ = 0; 1035 1036 std::function<void()> swapFunction_; 1037 std::function<void(int)> swapIntervalFunction_; 1038 GLBufferStrategy bufferStrategy_ = GLBufferStrategy::SUBDATA; 1039 1040 int inflightFrames_ = MAX_INFLIGHT_FRAMES; 1041 int newInflightFrames_ = -1; 1042 1043 int swapInterval_ = 0; 1044 bool swapIntervalChanged_ = true; 1045 1046 int targetWidth_ = 0; 1047 int targetHeight_ = 0; 1048 1049 #ifdef _DEBUG 1050 GLRProgram *curProgram_ = nullptr; 1051 #endif 1052 }; 1053