1 // Copyright 2016 Dolphin Emulator Project
2 // Licensed under GPLv2+
3 // Refer to the license.txt file included.
4
5 #include "VideoBackends/Vulkan/StreamBuffer.h"
6
7 #include <algorithm>
8 #include <cstdint>
9 #include <functional>
10
11 #include "Common/Align.h"
12 #include "Common/Assert.h"
13 #include "Common/MsgHandler.h"
14
15 #include "VideoBackends/Vulkan/CommandBufferManager.h"
16 #include "VideoBackends/Vulkan/VulkanContext.h"
17
18 namespace Vulkan
19 {
StreamBuffer(VkBufferUsageFlags usage,u32 size)20 StreamBuffer::StreamBuffer(VkBufferUsageFlags usage, u32 size) : m_usage(usage), m_size(size)
21 {
22 }
23
~StreamBuffer()24 StreamBuffer::~StreamBuffer()
25 {
26 if (m_host_pointer)
27 vkUnmapMemory(g_vulkan_context->GetDevice(), m_memory);
28
29 if (m_buffer != VK_NULL_HANDLE)
30 g_command_buffer_mgr->DeferBufferDestruction(m_buffer);
31 if (m_memory != VK_NULL_HANDLE)
32 g_command_buffer_mgr->DeferDeviceMemoryDestruction(m_memory);
33 }
34
Create(VkBufferUsageFlags usage,u32 size)35 std::unique_ptr<StreamBuffer> StreamBuffer::Create(VkBufferUsageFlags usage, u32 size)
36 {
37 std::unique_ptr<StreamBuffer> buffer = std::make_unique<StreamBuffer>(usage, size);
38 if (!buffer->AllocateBuffer())
39 return nullptr;
40
41 return buffer;
42 }
43
AllocateBuffer()44 bool StreamBuffer::AllocateBuffer()
45 {
46 // Create the buffer descriptor
47 VkBufferCreateInfo buffer_create_info = {
48 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType
49 nullptr, // const void* pNext
50 0, // VkBufferCreateFlags flags
51 static_cast<VkDeviceSize>(m_size), // VkDeviceSize size
52 m_usage, // VkBufferUsageFlags usage
53 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode
54 0, // uint32_t queueFamilyIndexCount
55 nullptr // const uint32_t* pQueueFamilyIndices
56 };
57
58 VkBuffer buffer = VK_NULL_HANDLE;
59 VkResult res =
60 vkCreateBuffer(g_vulkan_context->GetDevice(), &buffer_create_info, nullptr, &buffer);
61 if (res != VK_SUCCESS)
62 {
63 LOG_VULKAN_ERROR(res, "vkCreateBuffer failed: ");
64 return false;
65 }
66
67 // Get memory requirements (types etc) for this buffer
68 VkMemoryRequirements memory_requirements;
69 vkGetBufferMemoryRequirements(g_vulkan_context->GetDevice(), buffer, &memory_requirements);
70
71 // Aim for a coherent mapping if possible.
72 u32 memory_type_index = g_vulkan_context->GetUploadMemoryType(memory_requirements.memoryTypeBits,
73 &m_coherent_mapping);
74
75 // Allocate memory for backing this buffer
76 VkMemoryAllocateInfo memory_allocate_info = {
77 VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // VkStructureType sType
78 nullptr, // const void* pNext
79 memory_requirements.size, // VkDeviceSize allocationSize
80 memory_type_index // uint32_t memoryTypeIndex
81 };
82 VkDeviceMemory memory = VK_NULL_HANDLE;
83 res = vkAllocateMemory(g_vulkan_context->GetDevice(), &memory_allocate_info, nullptr, &memory);
84 if (res != VK_SUCCESS)
85 {
86 LOG_VULKAN_ERROR(res, "vkAllocateMemory failed: ");
87 vkDestroyBuffer(g_vulkan_context->GetDevice(), buffer, nullptr);
88 return false;
89 }
90
91 // Bind memory to buffer
92 res = vkBindBufferMemory(g_vulkan_context->GetDevice(), buffer, memory, 0);
93 if (res != VK_SUCCESS)
94 {
95 LOG_VULKAN_ERROR(res, "vkBindBufferMemory failed: ");
96 vkDestroyBuffer(g_vulkan_context->GetDevice(), buffer, nullptr);
97 vkFreeMemory(g_vulkan_context->GetDevice(), memory, nullptr);
98 return false;
99 }
100
101 // Map this buffer into user-space
102 void* mapped_ptr = nullptr;
103 res = vkMapMemory(g_vulkan_context->GetDevice(), memory, 0, m_size, 0, &mapped_ptr);
104 if (res != VK_SUCCESS)
105 {
106 LOG_VULKAN_ERROR(res, "vkMapMemory failed: ");
107 vkDestroyBuffer(g_vulkan_context->GetDevice(), buffer, nullptr);
108 vkFreeMemory(g_vulkan_context->GetDevice(), memory, nullptr);
109 return false;
110 }
111
112 // Unmap current host pointer (if there was a previous buffer)
113 if (m_host_pointer)
114 vkUnmapMemory(g_vulkan_context->GetDevice(), m_memory);
115
116 // Destroy the backings for the buffer after the command buffer executes
117 if (m_buffer != VK_NULL_HANDLE)
118 g_command_buffer_mgr->DeferBufferDestruction(m_buffer);
119 if (m_memory != VK_NULL_HANDLE)
120 g_command_buffer_mgr->DeferDeviceMemoryDestruction(m_memory);
121
122 // Replace with the new buffer
123 m_buffer = buffer;
124 m_memory = memory;
125 m_host_pointer = reinterpret_cast<u8*>(mapped_ptr);
126 m_current_offset = 0;
127 m_current_gpu_position = 0;
128 m_tracked_fences.clear();
129 return true;
130 }
131
ReserveMemory(u32 num_bytes,u32 alignment)132 bool StreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment)
133 {
134 const u32 required_bytes = num_bytes + alignment;
135
136 // Check for sane allocations
137 if (required_bytes > m_size)
138 {
139 PanicAlert("Attempting to allocate %u bytes from a %u byte stream buffer",
140 static_cast<uint32_t>(num_bytes), static_cast<uint32_t>(m_size));
141
142 return false;
143 }
144
145 // Is the GPU behind or up to date with our current offset?
146 UpdateCurrentFencePosition();
147 if (m_current_offset >= m_current_gpu_position)
148 {
149 const u32 remaining_bytes = m_size - m_current_offset;
150 if (required_bytes <= remaining_bytes)
151 {
152 // Place at the current position, after the GPU position.
153 m_current_offset = Common::AlignUp(m_current_offset, alignment);
154 m_last_allocation_size = num_bytes;
155 return true;
156 }
157
158 // Check for space at the start of the buffer
159 // We use < here because we don't want to have the case of m_current_offset ==
160 // m_current_gpu_position. That would mean the code above would assume the
161 // GPU has caught up to us, which it hasn't.
162 if (required_bytes < m_current_gpu_position)
163 {
164 // Reset offset to zero, since we're allocating behind the gpu now
165 m_current_offset = 0;
166 m_last_allocation_size = num_bytes;
167 return true;
168 }
169 }
170
171 // Is the GPU ahead of our current offset?
172 if (m_current_offset < m_current_gpu_position)
173 {
174 // We have from m_current_offset..m_current_gpu_position space to use.
175 const u32 remaining_bytes = m_current_gpu_position - m_current_offset;
176 if (required_bytes < remaining_bytes)
177 {
178 // Place at the current position, since this is still behind the GPU.
179 m_current_offset = Common::AlignUp(m_current_offset, alignment);
180 m_last_allocation_size = num_bytes;
181 return true;
182 }
183 }
184
185 // Can we find a fence to wait on that will give us enough memory?
186 if (WaitForClearSpace(required_bytes))
187 {
188 m_current_offset = Common::AlignUp(m_current_offset, alignment);
189 m_last_allocation_size = num_bytes;
190 return true;
191 }
192
193 // We tried everything we could, and still couldn't get anything. This means that too much space
194 // in the buffer is being used by the command buffer currently being recorded. Therefore, the
195 // only option is to execute it, and wait until it's done.
196 return false;
197 }
198
CommitMemory(u32 final_num_bytes)199 void StreamBuffer::CommitMemory(u32 final_num_bytes)
200 {
201 ASSERT((m_current_offset + final_num_bytes) <= m_size);
202 ASSERT(final_num_bytes <= m_last_allocation_size);
203
204 // For non-coherent mappings, flush the memory range
205 if (!m_coherent_mapping)
206 {
207 VkMappedMemoryRange range = {VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, nullptr, m_memory,
208 m_current_offset, final_num_bytes};
209 vkFlushMappedMemoryRanges(g_vulkan_context->GetDevice(), 1, &range);
210 }
211
212 m_current_offset += final_num_bytes;
213 }
214
UpdateCurrentFencePosition()215 void StreamBuffer::UpdateCurrentFencePosition()
216 {
217 // Don't create a tracking entry if the GPU is caught up with the buffer.
218 if (m_current_offset == m_current_gpu_position)
219 return;
220
221 // Has the offset changed since the last fence?
222 const u64 counter = g_command_buffer_mgr->GetCurrentFenceCounter();
223 if (!m_tracked_fences.empty() && m_tracked_fences.back().first == counter)
224 {
225 // Still haven't executed a command buffer, so just update the offset.
226 m_tracked_fences.back().second = m_current_offset;
227 return;
228 }
229
230 // New buffer, so update the GPU position while we're at it.
231 UpdateGPUPosition();
232 m_tracked_fences.emplace_back(counter, m_current_offset);
233 }
234
UpdateGPUPosition()235 void StreamBuffer::UpdateGPUPosition()
236 {
237 auto start = m_tracked_fences.begin();
238 auto end = start;
239
240 const u64 completed_counter = g_command_buffer_mgr->GetCompletedFenceCounter();
241 while (end != m_tracked_fences.end() && completed_counter >= end->first)
242 {
243 m_current_gpu_position = end->second;
244 ++end;
245 }
246
247 if (start != end)
248 m_tracked_fences.erase(start, end);
249 }
250
WaitForClearSpace(u32 num_bytes)251 bool StreamBuffer::WaitForClearSpace(u32 num_bytes)
252 {
253 u32 new_offset = 0;
254 u32 new_gpu_position = 0;
255
256 auto iter = m_tracked_fences.begin();
257 for (; iter != m_tracked_fences.end(); ++iter)
258 {
259 // Would this fence bring us in line with the GPU?
260 // This is the "last resort" case, where a command buffer execution has been forced
261 // after no additional data has been written to it, so we can assume that after the
262 // fence has been signaled the entire buffer is now consumed.
263 u32 gpu_position = iter->second;
264 if (m_current_offset == gpu_position)
265 {
266 new_offset = 0;
267 new_gpu_position = 0;
268 break;
269 }
270
271 // Assuming that we wait for this fence, are we allocating in front of the GPU?
272 if (m_current_offset > gpu_position)
273 {
274 // This would suggest the GPU has now followed us and wrapped around, so we have from
275 // m_current_position..m_size free, as well as and 0..gpu_position.
276 const u32 remaining_space_after_offset = m_size - m_current_offset;
277 if (remaining_space_after_offset >= num_bytes)
278 {
279 // Switch to allocating in front of the GPU, using the remainder of the buffer.
280 new_offset = m_current_offset;
281 new_gpu_position = gpu_position;
282 break;
283 }
284
285 // We can wrap around to the start, behind the GPU, if there is enough space.
286 // We use > here because otherwise we'd end up lining up with the GPU, and then the
287 // allocator would assume that the GPU has consumed what we just wrote.
288 if (gpu_position > num_bytes)
289 {
290 new_offset = 0;
291 new_gpu_position = gpu_position;
292 break;
293 }
294 }
295 else
296 {
297 // We're currently allocating behind the GPU. This would give us between the current
298 // offset and the GPU position worth of space to work with. Again, > because we can't
299 // align the GPU position with the buffer offset.
300 u32 available_space_inbetween = gpu_position - m_current_offset;
301 if (available_space_inbetween > num_bytes)
302 {
303 // Leave the offset as-is, but update the GPU position.
304 new_offset = m_current_offset;
305 new_gpu_position = gpu_position;
306 break;
307 }
308 }
309 }
310
311 // Did any fences satisfy this condition?
312 // Has the command buffer been executed yet? If not, the caller should execute it.
313 if (iter == m_tracked_fences.end() ||
314 iter->first == g_command_buffer_mgr->GetCurrentFenceCounter())
315 {
316 return false;
317 }
318
319 // Wait until this fence is signaled. This will fire the callback, updating the GPU position.
320 g_command_buffer_mgr->WaitForFenceCounter(iter->first);
321 m_tracked_fences.erase(m_tracked_fences.begin(),
322 m_current_offset == iter->second ? m_tracked_fences.end() : ++iter);
323 m_current_offset = new_offset;
324 m_current_gpu_position = new_gpu_position;
325 return true;
326 }
327
328 } // namespace Vulkan
329