1 // Copyright 2019 yuzu Emulator Project
2 // Licensed under GPLv2 or any later version
3 // Refer to the license.txt file included.
4 
5 #include <algorithm>
6 #include <limits>
7 #include <optional>
8 #include <tuple>
9 #include <vector>
10 
11 #include "common/alignment.h"
12 #include "common/assert.h"
13 #include "video_core/renderer_vulkan/vk_device.h"
14 #include "video_core/renderer_vulkan/vk_scheduler.h"
15 #include "video_core/renderer_vulkan/vk_stream_buffer.h"
16 #include "video_core/renderer_vulkan/wrapper.h"
17 
18 namespace Vulkan {
19 
20 namespace {
21 
22 constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
23 constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
24 
25 constexpr u64 PREFERRED_STREAM_BUFFER_SIZE = 256 * 1024 * 1024;
26 
27 /// Find a memory type with the passed requirements
FindMemoryType(const VkPhysicalDeviceMemoryProperties & properties,VkMemoryPropertyFlags wanted,u32 filter=std::numeric_limits<u32>::max ())28 std::optional<u32> FindMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
29                                   VkMemoryPropertyFlags wanted,
30                                   u32 filter = std::numeric_limits<u32>::max()) {
31     for (u32 i = 0; i < properties.memoryTypeCount; ++i) {
32         const auto flags = properties.memoryTypes[i].propertyFlags;
33         if ((flags & wanted) == wanted && (filter & (1U << i)) != 0) {
34             return i;
35         }
36     }
37     return std::nullopt;
38 }
39 
40 /// Get the preferred host visible memory type.
GetMemoryType(const VkPhysicalDeviceMemoryProperties & properties,u32 filter=std::numeric_limits<u32>::max ())41 u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
42                   u32 filter = std::numeric_limits<u32>::max()) {
43     // Prefer device local host visible allocations. Both AMD and Nvidia now provide one.
44     // Otherwise search for a host visible allocation.
45     static constexpr auto HOST_MEMORY =
46         VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
47     static constexpr auto DYNAMIC_MEMORY = HOST_MEMORY | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
48 
49     std::optional preferred_type = FindMemoryType(properties, DYNAMIC_MEMORY);
50     if (!preferred_type) {
51         preferred_type = FindMemoryType(properties, HOST_MEMORY);
52         ASSERT_MSG(preferred_type, "No host visible and coherent memory type found");
53     }
54     return preferred_type.value_or(0);
55 }
56 
57 } // Anonymous namespace
58 
VKStreamBuffer(const VKDevice & device_,VKScheduler & scheduler_,VkBufferUsageFlags usage)59 VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_,
60                                VkBufferUsageFlags usage)
61     : device{device_}, scheduler{scheduler_} {
62     CreateBuffers(usage);
63     ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
64     ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
65 }
66 
67 VKStreamBuffer::~VKStreamBuffer() = default;
68 
Map(u64 size,u64 alignment)69 std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
70     ASSERT(size <= stream_buffer_size);
71     mapped_size = size;
72 
73     if (alignment > 0) {
74         offset = Common::AlignUp(offset, alignment);
75     }
76 
77     WaitPendingOperations(offset);
78 
79     bool invalidated = false;
80     if (offset + size > stream_buffer_size) {
81         // The buffer would overflow, save the amount of used watches and reset the state.
82         invalidation_mark = current_watch_cursor;
83         current_watch_cursor = 0;
84         offset = 0;
85 
86         // Swap watches and reset waiting cursors.
87         std::swap(previous_watches, current_watches);
88         wait_cursor = 0;
89         wait_bound = 0;
90 
91         // Ensure that we don't wait for uncommitted fences.
92         scheduler.Flush();
93 
94         invalidated = true;
95     }
96 
97     return {memory.Map(offset, size), offset, invalidated};
98 }
99 
Unmap(u64 size)100 void VKStreamBuffer::Unmap(u64 size) {
101     ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
102 
103     memory.Unmap();
104 
105     offset += size;
106 
107     if (current_watch_cursor + 1 >= current_watches.size()) {
108         // Ensure that there are enough watches.
109         ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK);
110     }
111     auto& watch = current_watches[current_watch_cursor++];
112     watch.upper_bound = offset;
113     watch.tick = scheduler.CurrentTick();
114 }
115 
CreateBuffers(VkBufferUsageFlags usage)116 void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) {
117     const auto memory_properties = device.GetPhysical().GetMemoryProperties();
118     const u32 preferred_type = GetMemoryType(memory_properties);
119     const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex;
120 
121     // Substract from the preferred heap size some bytes to avoid getting out of memory.
122     const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size;
123     // As per DXVK's example, using `heap_size / 2`
124     const VkDeviceSize allocable_size = heap_size / 2;
125     buffer = device.GetLogical().CreateBuffer({
126         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
127         .pNext = nullptr,
128         .flags = 0,
129         .size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size),
130         .usage = usage,
131         .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
132         .queueFamilyIndexCount = 0,
133         .pQueueFamilyIndices = nullptr,
134     });
135 
136     const auto requirements = device.GetLogical().GetBufferMemoryRequirements(*buffer);
137     const u32 required_flags = requirements.memoryTypeBits;
138     stream_buffer_size = static_cast<u64>(requirements.size);
139 
140     memory = device.GetLogical().AllocateMemory({
141         .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
142         .pNext = nullptr,
143         .allocationSize = requirements.size,
144         .memoryTypeIndex = GetMemoryType(memory_properties, required_flags),
145     });
146     buffer.BindMemory(*memory, 0);
147 }
148 
ReserveWatches(std::vector<Watch> & watches,std::size_t grow_size)149 void VKStreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
150     watches.resize(watches.size() + grow_size);
151 }
152 
WaitPendingOperations(u64 requested_upper_bound)153 void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
154     if (!invalidation_mark) {
155         return;
156     }
157     while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) {
158         auto& watch = previous_watches[wait_cursor];
159         wait_bound = watch.upper_bound;
160         scheduler.Wait(watch.tick);
161         ++wait_cursor;
162     }
163 }
164 
165 } // namespace Vulkan
166