1 /* Copyright (c) 2020 Themaister
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining
4  * a copy of this software and associated documentation files (the
5  * "Software"), to deal in the Software without restriction, including
6  * without limitation the rights to use, copy, modify, merge, publish,
7  * distribute, sublicense, and/or sell copies of the Software, and to
8  * permit persons to whom the Software is furnished to do so, subject to
9  * the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be
12  * included in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #pragma once
24 
25 #include <memory>
26 #include <thread>
27 #include <queue>
28 #include "device.hpp"
29 #include "video_interface.hpp"
30 #include "rdp_renderer.hpp"
31 #include "rdp_common.hpp"
32 #include "command_ring.hpp"
33 #include "worker_thread.hpp"
34 
35 #ifndef GRANITE_VULKAN_MT
36 #error "Granite Vulkan backend must be built with multithreading support."
37 #endif
38 
39 namespace RDP
40 {
41 struct RGBA
42 {
43 	uint8_t r, g, b, a;
44 };
45 
46 enum CommandProcessorFlagBits
47 {
48 	COMMAND_PROCESSOR_FLAG_HOST_VISIBLE_HIDDEN_RDRAM_BIT = 1 << 0,
49 	COMMAND_PROCESSOR_FLAG_HOST_VISIBLE_TMEM_BIT = 1 << 1,
50 	COMMAND_PROCESSOR_FLAG_UPSCALING_2X_BIT = 1 << 2,
51 	COMMAND_PROCESSOR_FLAG_UPSCALING_4X_BIT = 1 << 3,
52 	COMMAND_PROCESSOR_FLAG_UPSCALING_8X_BIT = 1 << 4
53 };
54 using CommandProcessorFlags = uint32_t;
55 
56 struct CoherencyCopy
57 {
58 	size_t src_offset = 0;
59 	size_t mask_offset = 0;
60 	size_t dst_offset = 0;
61 	size_t size = 0;
62 	std::atomic_uint32_t *counter_base = nullptr;
63 	unsigned counters = 0;
64 };
65 
66 struct CoherencyOperation
67 {
68 	Vulkan::Fence fence;
69 	uint64_t timeline_value = 0;
70 
71 	uint8_t *dst = nullptr;
72 	const Vulkan::Buffer *src = nullptr;
73 	std::vector<CoherencyCopy> copies;
74 	std::atomic_uint32_t *unlock_cookie = nullptr;
75 };
76 
77 // These options control various behavior when upscaling to workaround glitches which arise naturally as part of upscaling.
78 struct Quirks
79 {
QuirksRDP::Quirks80 	inline Quirks()
81 	{
82 		u.options.native_resolution_tex_rect = true;
83 		u.options.native_texture_lod = false;
84 	}
85 
set_native_resolution_tex_rectRDP::Quirks86 	inline void set_native_resolution_tex_rect(bool enable)
87 	{
88 		u.options.native_resolution_tex_rect = enable;
89 	}
90 
set_native_texture_lodRDP::Quirks91 	inline void set_native_texture_lod(bool enable)
92 	{
93 		u.options.native_texture_lod = enable;
94 	}
95 
96 	union
97 	{
98 		struct Opts
99 		{
100 			// If true, force TEX_RECT and TEX_RECT_FLIP to render without upscaling.
101 			// Works around bilinear filtering bugs in Cycle1/Cycle2 mode where game assumed 1:1 pixel transfer.
102 			bool native_resolution_tex_rect;
103 
104 			// Forces LOD to be computed as 1x upscale.
105 			// Fixes content which relies on LOD computation to select textures in clever ways.
106 			bool native_texture_lod;
107 		} options;
108 		uint32_t words[1];
109 	} u;
110 };
111 
112 class CommandProcessor
113 {
114 public:
115 	CommandProcessor(Vulkan::Device &device,
116 	                 void *rdram_ptr,
117 	                 size_t rdram_offset,
118 	                 size_t rdram_size,
119 	                 size_t hidden_rdram_size,
120 	                 CommandProcessorFlags flags);
121 
122 	~CommandProcessor();
123 
124 	bool device_is_supported() const;
125 
126 	// Synchronization.
127 	void flush();
128 	uint64_t signal_timeline();
129 	void wait_for_timeline(uint64_t index);
130 	void idle();
131 	void begin_frame_context();
132 
133 	// Queues up state and drawing commands.
134 	void enqueue_command(unsigned num_words, const uint32_t *words);
135 	void enqueue_command_direct(unsigned num_words, const uint32_t *words);
136 
137 	void set_quirks(const Quirks &quirks);
138 
139 	// Interact with memory.
140 	void *begin_read_rdram();
141 	void end_write_rdram();
142 	void *begin_read_hidden_rdram();
143 	void end_write_hidden_rdram();
144 	size_t get_rdram_size() const;
145 	size_t get_hidden_rdram_size() const;
146 	void *get_tmem();
147 
148 	// Sets VI register
149 	void set_vi_register(VIRegister reg, uint32_t value);
150 
151 	Vulkan::ImageHandle scanout(const ScanoutOptions &opts = {});
152 	void scanout_sync(std::vector<RGBA> &colors, unsigned &width, unsigned &height);
153 
154 private:
155 	Vulkan::Device &device;
156 	Vulkan::BufferHandle rdram;
157 	Vulkan::BufferHandle hidden_rdram;
158 	Vulkan::BufferHandle tmem;
159 	size_t rdram_offset;
160 	size_t rdram_size;
161 	CommandProcessorFlags flags;
162 #ifndef PARALLEL_RDP_SHADER_DIR
163 	std::unique_ptr<ShaderBank> shader_bank;
164 #endif
165 
166 	CommandRing ring;
167 
168 	VideoInterface vi;
169 	Renderer renderer;
170 
171 	void clear_hidden_rdram();
172 	void clear_tmem();
173 	void clear_buffer(Vulkan::Buffer &buffer, uint32_t value);
174 	void init_renderer();
175 
176 #define OP(x) void op_##x(const uint32_t *words)
177 	OP(fill_triangle); OP(fill_z_buffer_triangle); OP(texture_triangle); OP(texture_z_buffer_triangle);
178 	OP(shade_triangle); OP(shade_z_buffer_triangle); OP(shade_texture_triangle); OP(shade_texture_z_buffer_triangle);
179 	OP(texture_rectangle); OP(texture_rectangle_flip); OP(sync_load); OP(sync_pipe);
180 	OP(sync_tile); OP(sync_full); OP(set_key_gb); OP(set_key_r);
181 	OP(set_convert); OP(set_scissor); OP(set_prim_depth); OP(set_other_modes);
182 	OP(load_tlut); OP(set_tile_size); OP(load_block);
183 	OP(load_tile); OP(set_tile); OP(fill_rectangle); OP(set_fill_color);
184 	OP(set_fog_color); OP(set_blend_color); OP(set_prim_color); OP(set_env_color);
185 	OP(set_combine); OP(set_texture_image); OP(set_mask_image); OP(set_color_image);
186 #undef OP
187 
188 	ScissorState scissor_state = {};
189 	StaticRasterizationState static_state = {};
190 	DepthBlendState depth_blend = {};
191 
192 	struct
193 	{
194 		uint32_t addr;
195 		uint32_t width;
196 		TextureFormat fmt;
197 		TextureSize size;
198 	} texture_image = {};
199 
200 	uint64_t timeline_value = 0;
201 	uint64_t thread_timeline_value = 0;
202 
203 	struct FenceExecutor
204 	{
FenceExecutorRDP::CommandProcessor::FenceExecutor205 		explicit inline FenceExecutor(Vulkan::Device *device_, uint64_t *ptr)
206 			: device(device_), value(ptr)
207 		{
208 		}
209 
210 		Vulkan::Device *device;
211 		uint64_t *value;
212 		bool is_sentinel(const CoherencyOperation &work) const;
213 		void perform_work(CoherencyOperation &work);
214 		void notify_work_locked(const CoherencyOperation &work);
215 	};
216 	WorkerThread<CoherencyOperation, FenceExecutor> timeline_worker;
217 
218 	uint8_t *host_rdram = nullptr;
219 	bool measure_stall_time = false;
220 	bool single_threaded_processing = false;
221 	bool is_supported = false;
222 	bool is_host_coherent = true;
223 	bool timestamp = false;
224 
225 	friend class Renderer;
226 
227 	void enqueue_coherency_operation(CoherencyOperation &&op);
228 	void drain_command_ring();
229 	void decode_triangle_setup(TriangleSetup &setup, const uint32_t *words) const;
230 
231 	Quirks quirks;
232 };
233 }
234