1 /* Copyright (c) 2020 Themaister 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining 4 * a copy of this software and associated documentation files (the 5 * "Software"), to deal in the Software without restriction, including 6 * without limitation the rights to use, copy, modify, merge, publish, 7 * distribute, sublicense, and/or sell copies of the Software, and to 8 * permit persons to whom the Software is furnished to do so, subject to 9 * the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be 12 * included in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 17 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 18 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 19 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 20 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #pragma once 24 25 #include <memory> 26 #include <thread> 27 #include <queue> 28 #include "device.hpp" 29 #include "video_interface.hpp" 30 #include "rdp_renderer.hpp" 31 #include "rdp_common.hpp" 32 #include "command_ring.hpp" 33 #include "worker_thread.hpp" 34 35 #ifndef GRANITE_VULKAN_MT 36 #error "Granite Vulkan backend must be built with multithreading support." 37 #endif 38 39 namespace RDP 40 { 41 struct RGBA 42 { 43 uint8_t r, g, b, a; 44 }; 45 46 enum CommandProcessorFlagBits 47 { 48 COMMAND_PROCESSOR_FLAG_HOST_VISIBLE_HIDDEN_RDRAM_BIT = 1 << 0, 49 COMMAND_PROCESSOR_FLAG_HOST_VISIBLE_TMEM_BIT = 1 << 1, 50 COMMAND_PROCESSOR_FLAG_UPSCALING_2X_BIT = 1 << 2, 51 COMMAND_PROCESSOR_FLAG_UPSCALING_4X_BIT = 1 << 3, 52 COMMAND_PROCESSOR_FLAG_UPSCALING_8X_BIT = 1 << 4 53 }; 54 using CommandProcessorFlags = uint32_t; 55 56 struct CoherencyCopy 57 { 58 size_t src_offset = 0; 59 size_t mask_offset = 0; 60 size_t dst_offset = 0; 61 size_t size = 0; 62 std::atomic_uint32_t *counter_base = nullptr; 63 unsigned counters = 0; 64 }; 65 66 struct CoherencyOperation 67 { 68 Vulkan::Fence fence; 69 uint64_t timeline_value = 0; 70 71 uint8_t *dst = nullptr; 72 const Vulkan::Buffer *src = nullptr; 73 std::vector<CoherencyCopy> copies; 74 std::atomic_uint32_t *unlock_cookie = nullptr; 75 }; 76 77 // These options control various behavior when upscaling to workaround glitches which arise naturally as part of upscaling. 78 struct Quirks 79 { QuirksRDP::Quirks80 inline Quirks() 81 { 82 u.options.native_resolution_tex_rect = true; 83 u.options.native_texture_lod = false; 84 } 85 set_native_resolution_tex_rectRDP::Quirks86 inline void set_native_resolution_tex_rect(bool enable) 87 { 88 u.options.native_resolution_tex_rect = enable; 89 } 90 set_native_texture_lodRDP::Quirks91 inline void set_native_texture_lod(bool enable) 92 { 93 u.options.native_texture_lod = enable; 94 } 95 96 union 97 { 98 struct Opts 99 { 100 // If true, force TEX_RECT and TEX_RECT_FLIP to render without upscaling. 101 // Works around bilinear filtering bugs in Cycle1/Cycle2 mode where game assumed 1:1 pixel transfer. 102 bool native_resolution_tex_rect; 103 104 // Forces LOD to be computed as 1x upscale. 105 // Fixes content which relies on LOD computation to select textures in clever ways. 106 bool native_texture_lod; 107 } options; 108 uint32_t words[1]; 109 } u; 110 }; 111 112 class CommandProcessor 113 { 114 public: 115 CommandProcessor(Vulkan::Device &device, 116 void *rdram_ptr, 117 size_t rdram_offset, 118 size_t rdram_size, 119 size_t hidden_rdram_size, 120 CommandProcessorFlags flags); 121 122 ~CommandProcessor(); 123 124 bool device_is_supported() const; 125 126 // Synchronization. 127 void flush(); 128 uint64_t signal_timeline(); 129 void wait_for_timeline(uint64_t index); 130 void idle(); 131 void begin_frame_context(); 132 133 // Queues up state and drawing commands. 134 void enqueue_command(unsigned num_words, const uint32_t *words); 135 void enqueue_command_direct(unsigned num_words, const uint32_t *words); 136 137 void set_quirks(const Quirks &quirks); 138 139 // Interact with memory. 140 void *begin_read_rdram(); 141 void end_write_rdram(); 142 void *begin_read_hidden_rdram(); 143 void end_write_hidden_rdram(); 144 size_t get_rdram_size() const; 145 size_t get_hidden_rdram_size() const; 146 void *get_tmem(); 147 148 // Sets VI register 149 void set_vi_register(VIRegister reg, uint32_t value); 150 151 Vulkan::ImageHandle scanout(const ScanoutOptions &opts = {}); 152 void scanout_sync(std::vector<RGBA> &colors, unsigned &width, unsigned &height); 153 154 private: 155 Vulkan::Device &device; 156 Vulkan::BufferHandle rdram; 157 Vulkan::BufferHandle hidden_rdram; 158 Vulkan::BufferHandle tmem; 159 size_t rdram_offset; 160 size_t rdram_size; 161 CommandProcessorFlags flags; 162 #ifndef PARALLEL_RDP_SHADER_DIR 163 std::unique_ptr<ShaderBank> shader_bank; 164 #endif 165 166 CommandRing ring; 167 168 VideoInterface vi; 169 Renderer renderer; 170 171 void clear_hidden_rdram(); 172 void clear_tmem(); 173 void clear_buffer(Vulkan::Buffer &buffer, uint32_t value); 174 void init_renderer(); 175 176 #define OP(x) void op_##x(const uint32_t *words) 177 OP(fill_triangle); OP(fill_z_buffer_triangle); OP(texture_triangle); OP(texture_z_buffer_triangle); 178 OP(shade_triangle); OP(shade_z_buffer_triangle); OP(shade_texture_triangle); OP(shade_texture_z_buffer_triangle); 179 OP(texture_rectangle); OP(texture_rectangle_flip); OP(sync_load); OP(sync_pipe); 180 OP(sync_tile); OP(sync_full); OP(set_key_gb); OP(set_key_r); 181 OP(set_convert); OP(set_scissor); OP(set_prim_depth); OP(set_other_modes); 182 OP(load_tlut); OP(set_tile_size); OP(load_block); 183 OP(load_tile); OP(set_tile); OP(fill_rectangle); OP(set_fill_color); 184 OP(set_fog_color); OP(set_blend_color); OP(set_prim_color); OP(set_env_color); 185 OP(set_combine); OP(set_texture_image); OP(set_mask_image); OP(set_color_image); 186 #undef OP 187 188 ScissorState scissor_state = {}; 189 StaticRasterizationState static_state = {}; 190 DepthBlendState depth_blend = {}; 191 192 struct 193 { 194 uint32_t addr; 195 uint32_t width; 196 TextureFormat fmt; 197 TextureSize size; 198 } texture_image = {}; 199 200 uint64_t timeline_value = 0; 201 uint64_t thread_timeline_value = 0; 202 203 struct FenceExecutor 204 { FenceExecutorRDP::CommandProcessor::FenceExecutor205 explicit inline FenceExecutor(Vulkan::Device *device_, uint64_t *ptr) 206 : device(device_), value(ptr) 207 { 208 } 209 210 Vulkan::Device *device; 211 uint64_t *value; 212 bool is_sentinel(const CoherencyOperation &work) const; 213 void perform_work(CoherencyOperation &work); 214 void notify_work_locked(const CoherencyOperation &work); 215 }; 216 WorkerThread<CoherencyOperation, FenceExecutor> timeline_worker; 217 218 uint8_t *host_rdram = nullptr; 219 bool measure_stall_time = false; 220 bool single_threaded_processing = false; 221 bool is_supported = false; 222 bool is_host_coherent = true; 223 bool timestamp = false; 224 225 friend class Renderer; 226 227 void enqueue_coherency_operation(CoherencyOperation &&op); 228 void drain_command_ring(); 229 void decode_triangle_setup(TriangleSetup &setup, const uint32_t *words) const; 230 231 Quirks quirks; 232 }; 233 } 234