1 // Copyright 2016 Dolphin Emulator Project
2 // Licensed under GPLv2+
3 // Refer to the license.txt file included.
4 
5 #include "VideoBackends/Vulkan/StreamBuffer.h"
6 
7 #include <algorithm>
8 #include <cstdint>
9 #include <functional>
10 
11 #include "Common/Align.h"
12 #include "Common/Assert.h"
13 #include "Common/MsgHandler.h"
14 
15 #include "VideoBackends/Vulkan/CommandBufferManager.h"
16 #include "VideoBackends/Vulkan/VulkanContext.h"
17 
18 namespace Vulkan
19 {
StreamBuffer(VkBufferUsageFlags usage,u32 size)20 StreamBuffer::StreamBuffer(VkBufferUsageFlags usage, u32 size) : m_usage(usage), m_size(size)
21 {
22 }
23 
~StreamBuffer()24 StreamBuffer::~StreamBuffer()
25 {
26   if (m_host_pointer)
27     vkUnmapMemory(g_vulkan_context->GetDevice(), m_memory);
28 
29   if (m_buffer != VK_NULL_HANDLE)
30     g_command_buffer_mgr->DeferBufferDestruction(m_buffer);
31   if (m_memory != VK_NULL_HANDLE)
32     g_command_buffer_mgr->DeferDeviceMemoryDestruction(m_memory);
33 }
34 
Create(VkBufferUsageFlags usage,u32 size)35 std::unique_ptr<StreamBuffer> StreamBuffer::Create(VkBufferUsageFlags usage, u32 size)
36 {
37   std::unique_ptr<StreamBuffer> buffer = std::make_unique<StreamBuffer>(usage, size);
38   if (!buffer->AllocateBuffer())
39     return nullptr;
40 
41   return buffer;
42 }
43 
AllocateBuffer()44 bool StreamBuffer::AllocateBuffer()
45 {
46   // Create the buffer descriptor
47   VkBufferCreateInfo buffer_create_info = {
48       VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,  // VkStructureType        sType
49       nullptr,                               // const void*            pNext
50       0,                                     // VkBufferCreateFlags    flags
51       static_cast<VkDeviceSize>(m_size),     // VkDeviceSize           size
52       m_usage,                               // VkBufferUsageFlags     usage
53       VK_SHARING_MODE_EXCLUSIVE,             // VkSharingMode          sharingMode
54       0,                                     // uint32_t               queueFamilyIndexCount
55       nullptr                                // const uint32_t*        pQueueFamilyIndices
56   };
57 
58   VkBuffer buffer = VK_NULL_HANDLE;
59   VkResult res =
60       vkCreateBuffer(g_vulkan_context->GetDevice(), &buffer_create_info, nullptr, &buffer);
61   if (res != VK_SUCCESS)
62   {
63     LOG_VULKAN_ERROR(res, "vkCreateBuffer failed: ");
64     return false;
65   }
66 
67   // Get memory requirements (types etc) for this buffer
68   VkMemoryRequirements memory_requirements;
69   vkGetBufferMemoryRequirements(g_vulkan_context->GetDevice(), buffer, &memory_requirements);
70 
71   // Aim for a coherent mapping if possible.
72   u32 memory_type_index = g_vulkan_context->GetUploadMemoryType(memory_requirements.memoryTypeBits,
73                                                                 &m_coherent_mapping);
74 
75   // Allocate memory for backing this buffer
76   VkMemoryAllocateInfo memory_allocate_info = {
77       VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,  // VkStructureType    sType
78       nullptr,                                 // const void*        pNext
79       memory_requirements.size,                // VkDeviceSize       allocationSize
80       memory_type_index                        // uint32_t           memoryTypeIndex
81   };
82   VkDeviceMemory memory = VK_NULL_HANDLE;
83   res = vkAllocateMemory(g_vulkan_context->GetDevice(), &memory_allocate_info, nullptr, &memory);
84   if (res != VK_SUCCESS)
85   {
86     LOG_VULKAN_ERROR(res, "vkAllocateMemory failed: ");
87     vkDestroyBuffer(g_vulkan_context->GetDevice(), buffer, nullptr);
88     return false;
89   }
90 
91   // Bind memory to buffer
92   res = vkBindBufferMemory(g_vulkan_context->GetDevice(), buffer, memory, 0);
93   if (res != VK_SUCCESS)
94   {
95     LOG_VULKAN_ERROR(res, "vkBindBufferMemory failed: ");
96     vkDestroyBuffer(g_vulkan_context->GetDevice(), buffer, nullptr);
97     vkFreeMemory(g_vulkan_context->GetDevice(), memory, nullptr);
98     return false;
99   }
100 
101   // Map this buffer into user-space
102   void* mapped_ptr = nullptr;
103   res = vkMapMemory(g_vulkan_context->GetDevice(), memory, 0, m_size, 0, &mapped_ptr);
104   if (res != VK_SUCCESS)
105   {
106     LOG_VULKAN_ERROR(res, "vkMapMemory failed: ");
107     vkDestroyBuffer(g_vulkan_context->GetDevice(), buffer, nullptr);
108     vkFreeMemory(g_vulkan_context->GetDevice(), memory, nullptr);
109     return false;
110   }
111 
112   // Unmap current host pointer (if there was a previous buffer)
113   if (m_host_pointer)
114     vkUnmapMemory(g_vulkan_context->GetDevice(), m_memory);
115 
116   // Destroy the backings for the buffer after the command buffer executes
117   if (m_buffer != VK_NULL_HANDLE)
118     g_command_buffer_mgr->DeferBufferDestruction(m_buffer);
119   if (m_memory != VK_NULL_HANDLE)
120     g_command_buffer_mgr->DeferDeviceMemoryDestruction(m_memory);
121 
122   // Replace with the new buffer
123   m_buffer = buffer;
124   m_memory = memory;
125   m_host_pointer = reinterpret_cast<u8*>(mapped_ptr);
126   m_current_offset = 0;
127   m_current_gpu_position = 0;
128   m_tracked_fences.clear();
129   return true;
130 }
131 
ReserveMemory(u32 num_bytes,u32 alignment)132 bool StreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment)
133 {
134   const u32 required_bytes = num_bytes + alignment;
135 
136   // Check for sane allocations
137   if (required_bytes > m_size)
138   {
139     PanicAlert("Attempting to allocate %u bytes from a %u byte stream buffer",
140                static_cast<uint32_t>(num_bytes), static_cast<uint32_t>(m_size));
141 
142     return false;
143   }
144 
145   // Is the GPU behind or up to date with our current offset?
146   UpdateCurrentFencePosition();
147   if (m_current_offset >= m_current_gpu_position)
148   {
149     const u32 remaining_bytes = m_size - m_current_offset;
150     if (required_bytes <= remaining_bytes)
151     {
152       // Place at the current position, after the GPU position.
153       m_current_offset = Common::AlignUp(m_current_offset, alignment);
154       m_last_allocation_size = num_bytes;
155       return true;
156     }
157 
158     // Check for space at the start of the buffer
159     // We use < here because we don't want to have the case of m_current_offset ==
160     // m_current_gpu_position. That would mean the code above would assume the
161     // GPU has caught up to us, which it hasn't.
162     if (required_bytes < m_current_gpu_position)
163     {
164       // Reset offset to zero, since we're allocating behind the gpu now
165       m_current_offset = 0;
166       m_last_allocation_size = num_bytes;
167       return true;
168     }
169   }
170 
171   // Is the GPU ahead of our current offset?
172   if (m_current_offset < m_current_gpu_position)
173   {
174     // We have from m_current_offset..m_current_gpu_position space to use.
175     const u32 remaining_bytes = m_current_gpu_position - m_current_offset;
176     if (required_bytes < remaining_bytes)
177     {
178       // Place at the current position, since this is still behind the GPU.
179       m_current_offset = Common::AlignUp(m_current_offset, alignment);
180       m_last_allocation_size = num_bytes;
181       return true;
182     }
183   }
184 
185   // Can we find a fence to wait on that will give us enough memory?
186   if (WaitForClearSpace(required_bytes))
187   {
188     m_current_offset = Common::AlignUp(m_current_offset, alignment);
189     m_last_allocation_size = num_bytes;
190     return true;
191   }
192 
193   // We tried everything we could, and still couldn't get anything. This means that too much space
194   // in the buffer is being used by the command buffer currently being recorded. Therefore, the
195   // only option is to execute it, and wait until it's done.
196   return false;
197 }
198 
CommitMemory(u32 final_num_bytes)199 void StreamBuffer::CommitMemory(u32 final_num_bytes)
200 {
201   ASSERT((m_current_offset + final_num_bytes) <= m_size);
202   ASSERT(final_num_bytes <= m_last_allocation_size);
203 
204   // For non-coherent mappings, flush the memory range
205   if (!m_coherent_mapping)
206   {
207     VkMappedMemoryRange range = {VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, nullptr, m_memory,
208                                  m_current_offset, final_num_bytes};
209     vkFlushMappedMemoryRanges(g_vulkan_context->GetDevice(), 1, &range);
210   }
211 
212   m_current_offset += final_num_bytes;
213 }
214 
UpdateCurrentFencePosition()215 void StreamBuffer::UpdateCurrentFencePosition()
216 {
217   // Don't create a tracking entry if the GPU is caught up with the buffer.
218   if (m_current_offset == m_current_gpu_position)
219     return;
220 
221   // Has the offset changed since the last fence?
222   const u64 counter = g_command_buffer_mgr->GetCurrentFenceCounter();
223   if (!m_tracked_fences.empty() && m_tracked_fences.back().first == counter)
224   {
225     // Still haven't executed a command buffer, so just update the offset.
226     m_tracked_fences.back().second = m_current_offset;
227     return;
228   }
229 
230   // New buffer, so update the GPU position while we're at it.
231   UpdateGPUPosition();
232   m_tracked_fences.emplace_back(counter, m_current_offset);
233 }
234 
UpdateGPUPosition()235 void StreamBuffer::UpdateGPUPosition()
236 {
237   auto start = m_tracked_fences.begin();
238   auto end = start;
239 
240   const u64 completed_counter = g_command_buffer_mgr->GetCompletedFenceCounter();
241   while (end != m_tracked_fences.end() && completed_counter >= end->first)
242   {
243     m_current_gpu_position = end->second;
244     ++end;
245   }
246 
247   if (start != end)
248     m_tracked_fences.erase(start, end);
249 }
250 
WaitForClearSpace(u32 num_bytes)251 bool StreamBuffer::WaitForClearSpace(u32 num_bytes)
252 {
253   u32 new_offset = 0;
254   u32 new_gpu_position = 0;
255 
256   auto iter = m_tracked_fences.begin();
257   for (; iter != m_tracked_fences.end(); ++iter)
258   {
259     // Would this fence bring us in line with the GPU?
260     // This is the "last resort" case, where a command buffer execution has been forced
261     // after no additional data has been written to it, so we can assume that after the
262     // fence has been signaled the entire buffer is now consumed.
263     u32 gpu_position = iter->second;
264     if (m_current_offset == gpu_position)
265     {
266       new_offset = 0;
267       new_gpu_position = 0;
268       break;
269     }
270 
271     // Assuming that we wait for this fence, are we allocating in front of the GPU?
272     if (m_current_offset > gpu_position)
273     {
274       // This would suggest the GPU has now followed us and wrapped around, so we have from
275       // m_current_position..m_size free, as well as and 0..gpu_position.
276       const u32 remaining_space_after_offset = m_size - m_current_offset;
277       if (remaining_space_after_offset >= num_bytes)
278       {
279         // Switch to allocating in front of the GPU, using the remainder of the buffer.
280         new_offset = m_current_offset;
281         new_gpu_position = gpu_position;
282         break;
283       }
284 
285       // We can wrap around to the start, behind the GPU, if there is enough space.
286       // We use > here because otherwise we'd end up lining up with the GPU, and then the
287       // allocator would assume that the GPU has consumed what we just wrote.
288       if (gpu_position > num_bytes)
289       {
290         new_offset = 0;
291         new_gpu_position = gpu_position;
292         break;
293       }
294     }
295     else
296     {
297       // We're currently allocating behind the GPU. This would give us between the current
298       // offset and the GPU position worth of space to work with. Again, > because we can't
299       // align the GPU position with the buffer offset.
300       u32 available_space_inbetween = gpu_position - m_current_offset;
301       if (available_space_inbetween > num_bytes)
302       {
303         // Leave the offset as-is, but update the GPU position.
304         new_offset = m_current_offset;
305         new_gpu_position = gpu_position;
306         break;
307       }
308     }
309   }
310 
311   // Did any fences satisfy this condition?
312   // Has the command buffer been executed yet? If not, the caller should execute it.
313   if (iter == m_tracked_fences.end() ||
314       iter->first == g_command_buffer_mgr->GetCurrentFenceCounter())
315   {
316     return false;
317   }
318 
319   // Wait until this fence is signaled. This will fire the callback, updating the GPU position.
320   g_command_buffer_mgr->WaitForFenceCounter(iter->first);
321   m_tracked_fences.erase(m_tracked_fences.begin(),
322                          m_current_offset == iter->second ? m_tracked_fences.end() : ++iter);
323   m_current_offset = new_offset;
324   m_current_gpu_position = new_gpu_position;
325   return true;
326 }
327 
328 }  // namespace Vulkan
329