1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 #ifndef NCNN_ALLOCATOR_H
16 #define NCNN_ALLOCATOR_H
17 
18 #ifdef _WIN32
19 #define WIN32_LEAN_AND_MEAN
20 #include <windows.h>
21 #endif
22 
23 #include "platform.h"
24 
25 #include <stdlib.h>
26 
27 #if NCNN_VULKAN
28 #include <vulkan/vulkan.h>
29 #endif // NCNN_VULKAN
30 
31 #if NCNN_PLATFORM_API
32 #if __ANDROID_API__ >= 26
33 #include <android/hardware_buffer.h>
34 #endif // __ANDROID_API__ >= 26
35 #endif // NCNN_PLATFORM_API
36 
37 namespace ncnn {
38 
39 #if __AVX__
40 // the alignment of all the allocated buffers
41 #define MALLOC_ALIGN 32
42 #else
43 // the alignment of all the allocated buffers
44 #define MALLOC_ALIGN 16
45 #endif
46 
47 // Aligns a pointer to the specified number of bytes
48 // ptr Aligned pointer
49 // n Alignment size that must be a power of two
50 template<typename _Tp>
51 static inline _Tp* alignPtr(_Tp* ptr, int n = (int)sizeof(_Tp))
52 {
53     return (_Tp*)(((size_t)ptr + n - 1) & -n);
54 }
55 
56 // Aligns a buffer size to the specified number of bytes
57 // The function returns the minimum number that is greater or equal to sz and is divisible by n
58 // sz Buffer size to align
59 // n Alignment size that must be a power of two
alignSize(size_t sz,int n)60 static inline size_t alignSize(size_t sz, int n)
61 {
62     return (sz + n - 1) & -n;
63 }
64 
fastMalloc(size_t size)65 static inline void* fastMalloc(size_t size)
66 {
67 #if _MSC_VER
68     return _aligned_malloc(size, MALLOC_ALIGN);
69 #elif (defined(__unix__) || defined(__APPLE__)) && _POSIX_C_SOURCE >= 200112L || (__ANDROID__ && __ANDROID_API__ >= 17)
70     void* ptr = 0;
71     if (posix_memalign(&ptr, MALLOC_ALIGN, size))
72         ptr = 0;
73     return ptr;
74 #elif __ANDROID__ && __ANDROID_API__ < 17
75     return memalign(MALLOC_ALIGN, size);
76 #else
77     unsigned char* udata = (unsigned char*)malloc(size + sizeof(void*) + MALLOC_ALIGN);
78     if (!udata)
79         return 0;
80     unsigned char** adata = alignPtr((unsigned char**)udata + 1, MALLOC_ALIGN);
81     adata[-1] = udata;
82     return adata;
83 #endif
84 }
85 
fastFree(void * ptr)86 static inline void fastFree(void* ptr)
87 {
88     if (ptr)
89     {
90 #if _MSC_VER
91         _aligned_free(ptr);
92 #elif (defined(__unix__) || defined(__APPLE__)) && _POSIX_C_SOURCE >= 200112L || (__ANDROID__ && __ANDROID_API__ >= 17)
93         free(ptr);
94 #elif __ANDROID__ && __ANDROID_API__ < 17
95         free(ptr);
96 #else
97         unsigned char* udata = ((unsigned char**)ptr)[-1];
98         free(udata);
99 #endif
100     }
101 }
102 
103 #if NCNN_THREADS
104 // exchange-add operation for atomic operations on reference counters
105 #if defined __riscv && !defined __riscv_atomic
106 // riscv target without A extension
NCNN_XADD(int * addr,int delta)107 static inline int NCNN_XADD(int* addr, int delta)
108 {
109     int tmp = *addr;
110     *addr += delta;
111     return tmp;
112 }
113 #elif defined __INTEL_COMPILER && !(defined WIN32 || defined _WIN32)
114 // atomic increment on the linux version of the Intel(tm) compiler
115 #define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd(const_cast<void*>(reinterpret_cast<volatile void*>(addr)), delta)
116 #elif defined __GNUC__
117 #if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__)
118 #ifdef __ATOMIC_ACQ_REL
119 #define NCNN_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL)
120 #else
121 #define NCNN_XADD(addr, delta) __atomic_fetch_add((_Atomic(int)*)(addr), delta, 4)
122 #endif
123 #else
124 #if defined __ATOMIC_ACQ_REL && !defined __clang__
125 // version for gcc >= 4.7
126 #define NCNN_XADD(addr, delta) (int)__atomic_fetch_add((unsigned*)(addr), (unsigned)(delta), __ATOMIC_ACQ_REL)
127 #else
128 #define NCNN_XADD(addr, delta) (int)__sync_fetch_and_add((unsigned*)(addr), (unsigned)(delta))
129 #endif
130 #endif
131 #elif defined _MSC_VER && !defined RC_INVOKED
132 #define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta)
133 #else
134 // thread-unsafe branch
NCNN_XADD(int * addr,int delta)135 static inline int NCNN_XADD(int* addr, int delta)
136 {
137     int tmp = *addr;
138     *addr += delta;
139     return tmp;
140 }
141 #endif
142 #else  // NCNN_THREADS
NCNN_XADD(int * addr,int delta)143 static inline int NCNN_XADD(int* addr, int delta)
144 {
145     int tmp = *addr;
146     *addr += delta;
147     return tmp;
148 }
149 #endif // NCNN_THREADS
150 
151 class NCNN_EXPORT Allocator
152 {
153 public:
154     virtual ~Allocator();
155     virtual void* fastMalloc(size_t size) = 0;
156     virtual void fastFree(void* ptr) = 0;
157 };
158 
159 class PoolAllocatorPrivate;
160 class NCNN_EXPORT PoolAllocator : public Allocator
161 {
162 public:
163     PoolAllocator();
164     ~PoolAllocator();
165 
166     // ratio range 0 ~ 1
167     // default cr = 0.75
168     void set_size_compare_ratio(float scr);
169 
170     // release all budgets immediately
171     void clear();
172 
173     virtual void* fastMalloc(size_t size);
174     virtual void fastFree(void* ptr);
175 
176 private:
177     PoolAllocator(const PoolAllocator&);
178     PoolAllocator& operator=(const PoolAllocator&);
179 
180 private:
181     PoolAllocatorPrivate* const d;
182 };
183 
184 class UnlockedPoolAllocatorPrivate;
185 class NCNN_EXPORT UnlockedPoolAllocator : public Allocator
186 {
187 public:
188     UnlockedPoolAllocator();
189     ~UnlockedPoolAllocator();
190 
191     // ratio range 0 ~ 1
192     // default cr = 0.75
193     void set_size_compare_ratio(float scr);
194 
195     // release all budgets immediately
196     void clear();
197 
198     virtual void* fastMalloc(size_t size);
199     virtual void fastFree(void* ptr);
200 
201 private:
202     UnlockedPoolAllocator(const UnlockedPoolAllocator&);
203     UnlockedPoolAllocator& operator=(const UnlockedPoolAllocator&);
204 
205 private:
206     UnlockedPoolAllocatorPrivate* const d;
207 };
208 
209 #if NCNN_VULKAN
210 
211 class VulkanDevice;
212 
213 class NCNN_EXPORT VkBufferMemory
214 {
215 public:
216     VkBuffer buffer;
217 
218     // the base offset assigned by allocator
219     size_t offset;
220     size_t capacity;
221 
222     VkDeviceMemory memory;
223     void* mapped_ptr;
224 
225     // buffer state, modified by command functions internally
226     mutable VkAccessFlags access_flags;
227     mutable VkPipelineStageFlags stage_flags;
228 
229     // initialize and modified by mat
230     int refcount;
231 };
232 
233 class NCNN_EXPORT VkImageMemory
234 {
235 public:
236     VkImage image;
237     VkImageView imageview;
238 
239     // underlying info assigned by allocator
240     int width;
241     int height;
242     int depth;
243     VkFormat format;
244 
245     VkDeviceMemory memory;
246     void* mapped_ptr;
247 
248     // the base offset assigned by allocator
249     size_t bind_offset;
250     size_t bind_capacity;
251 
252     // image state, modified by command functions internally
253     mutable VkAccessFlags access_flags;
254     mutable VkImageLayout image_layout;
255     mutable VkPipelineStageFlags stage_flags;
256 
257     // in-execution state, modified by command functions internally
258     mutable int command_refcount;
259 
260     // initialize and modified by mat
261     int refcount;
262 };
263 
264 class NCNN_EXPORT VkAllocator
265 {
266 public:
267     explicit VkAllocator(const VulkanDevice* _vkdev);
268     virtual ~VkAllocator();
269 
270     virtual void clear();
271 
272     virtual VkBufferMemory* fastMalloc(size_t size) = 0;
273     virtual void fastFree(VkBufferMemory* ptr) = 0;
274     virtual int flush(VkBufferMemory* ptr);
275     virtual int invalidate(VkBufferMemory* ptr);
276 
277     virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack) = 0;
278     virtual void fastFree(VkImageMemory* ptr) = 0;
279 
280 public:
281     const VulkanDevice* vkdev;
282     uint32_t buffer_memory_type_index;
283     uint32_t image_memory_type_index;
284     uint32_t reserved_type_index;
285     bool mappable;
286     bool coherent;
287 
288 protected:
289     VkBuffer create_buffer(size_t size, VkBufferUsageFlags usage);
290     VkDeviceMemory allocate_memory(size_t size, uint32_t memory_type_index);
291     VkDeviceMemory allocate_dedicated_memory(size_t size, uint32_t memory_type_index, VkImage image, VkBuffer buffer);
292 
293     VkImage create_image(int width, int height, int depth, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage);
294     VkImageView create_imageview(VkImage image, VkFormat format);
295 };
296 
297 class VkBlobAllocatorPrivate;
298 class NCNN_EXPORT VkBlobAllocator : public VkAllocator
299 {
300 public:
301     explicit VkBlobAllocator(const VulkanDevice* vkdev, size_t preferred_block_size = 16 * 1024 * 1024); // 16M
302     virtual ~VkBlobAllocator();
303 
304 public:
305     // release all budgets immediately
306     virtual void clear();
307 
308     virtual VkBufferMemory* fastMalloc(size_t size);
309     virtual void fastFree(VkBufferMemory* ptr);
310     virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack);
311     virtual void fastFree(VkImageMemory* ptr);
312 
313 private:
314     VkBlobAllocator(const VkBlobAllocator&);
315     VkBlobAllocator& operator=(const VkBlobAllocator&);
316 
317 private:
318     VkBlobAllocatorPrivate* const d;
319 };
320 
321 class VkWeightAllocatorPrivate;
322 class NCNN_EXPORT VkWeightAllocator : public VkAllocator
323 {
324 public:
325     explicit VkWeightAllocator(const VulkanDevice* vkdev, size_t preferred_block_size = 8 * 1024 * 1024); // 8M
326     virtual ~VkWeightAllocator();
327 
328 public:
329     // release all blocks immediately
330     virtual void clear();
331 
332 public:
333     virtual VkBufferMemory* fastMalloc(size_t size);
334     virtual void fastFree(VkBufferMemory* ptr);
335     virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack);
336     virtual void fastFree(VkImageMemory* ptr);
337 
338 private:
339     VkWeightAllocator(const VkWeightAllocator&);
340     VkWeightAllocator& operator=(const VkWeightAllocator&);
341 
342 private:
343     VkWeightAllocatorPrivate* const d;
344 };
345 
346 class VkStagingAllocatorPrivate;
347 class NCNN_EXPORT VkStagingAllocator : public VkAllocator
348 {
349 public:
350     explicit VkStagingAllocator(const VulkanDevice* vkdev);
351     virtual ~VkStagingAllocator();
352 
353 public:
354     // ratio range 0 ~ 1
355     // default cr = 0.75
356     void set_size_compare_ratio(float scr);
357 
358     // release all budgets immediately
359     virtual void clear();
360 
361     virtual VkBufferMemory* fastMalloc(size_t size);
362     virtual void fastFree(VkBufferMemory* ptr);
363     virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack);
364     virtual void fastFree(VkImageMemory* ptr);
365 
366 private:
367     VkStagingAllocator(const VkStagingAllocator&);
368     VkStagingAllocator& operator=(const VkStagingAllocator&);
369 
370 private:
371     VkStagingAllocatorPrivate* const d;
372 };
373 
374 class VkWeightStagingAllocatorPrivate;
375 class NCNN_EXPORT VkWeightStagingAllocator : public VkAllocator
376 {
377 public:
378     explicit VkWeightStagingAllocator(const VulkanDevice* vkdev);
379     virtual ~VkWeightStagingAllocator();
380 
381 public:
382     virtual VkBufferMemory* fastMalloc(size_t size);
383     virtual void fastFree(VkBufferMemory* ptr);
384     virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack);
385     virtual void fastFree(VkImageMemory* ptr);
386 
387 private:
388     VkWeightStagingAllocator(const VkWeightStagingAllocator&);
389     VkWeightStagingAllocator& operator=(const VkWeightStagingAllocator&);
390 
391 private:
392     VkWeightStagingAllocatorPrivate* const d;
393 };
394 
395 #if NCNN_PLATFORM_API
396 #if __ANDROID_API__ >= 26
397 class NCNN_EXPORT VkAndroidHardwareBufferImageAllocator : public VkAllocator
398 {
399 public:
400     VkAndroidHardwareBufferImageAllocator(const VulkanDevice* _vkdev, AHardwareBuffer* _hb);
401     virtual ~VkAndroidHardwareBufferImageAllocator();
402 
403 public:
404     virtual VkBufferMemory* fastMalloc(size_t size);
405     virtual void fastFree(VkBufferMemory* ptr);
406     virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack);
407     virtual void fastFree(VkImageMemory* ptr);
408 
409 private:
410     VkAndroidHardwareBufferImageAllocator(const VkAndroidHardwareBufferImageAllocator&);
411     VkAndroidHardwareBufferImageAllocator& operator=(const VkAndroidHardwareBufferImageAllocator&);
412 
413 public:
414     int init();
415 
416     int width() const;
417     int height() const;
418     uint64_t external_format() const;
419 
420 public:
421     AHardwareBuffer* hb;
422     AHardwareBuffer_Desc bufferDesc;
423     VkAndroidHardwareBufferFormatPropertiesANDROID bufferFormatProperties;
424     VkAndroidHardwareBufferPropertiesANDROID bufferProperties;
425     VkSamplerYcbcrConversionKHR samplerYcbcrConversion;
426 };
427 #endif // __ANDROID_API__ >= 26
428 #endif // NCNN_PLATFORM_API
429 
430 #endif // NCNN_VULKAN
431 
432 } // namespace ncnn
433 
434 #endif // NCNN_ALLOCATOR_H
435