1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14
15 #ifndef NCNN_ALLOCATOR_H
16 #define NCNN_ALLOCATOR_H
17
18 #ifdef _WIN32
19 #define WIN32_LEAN_AND_MEAN
20 #include <windows.h>
21 #endif
22
23 #include "platform.h"
24
25 #include <stdlib.h>
26
27 #if NCNN_VULKAN
28 #include <vulkan/vulkan.h>
29 #endif // NCNN_VULKAN
30
31 #if NCNN_PLATFORM_API
32 #if __ANDROID_API__ >= 26
33 #include <android/hardware_buffer.h>
34 #endif // __ANDROID_API__ >= 26
35 #endif // NCNN_PLATFORM_API
36
37 namespace ncnn {
38
39 #if __AVX__
40 // the alignment of all the allocated buffers
41 #define MALLOC_ALIGN 32
42 #else
43 // the alignment of all the allocated buffers
44 #define MALLOC_ALIGN 16
45 #endif
46
47 // Aligns a pointer to the specified number of bytes
48 // ptr Aligned pointer
49 // n Alignment size that must be a power of two
50 template<typename _Tp>
51 static inline _Tp* alignPtr(_Tp* ptr, int n = (int)sizeof(_Tp))
52 {
53 return (_Tp*)(((size_t)ptr + n - 1) & -n);
54 }
55
56 // Aligns a buffer size to the specified number of bytes
57 // The function returns the minimum number that is greater or equal to sz and is divisible by n
58 // sz Buffer size to align
59 // n Alignment size that must be a power of two
alignSize(size_t sz,int n)60 static inline size_t alignSize(size_t sz, int n)
61 {
62 return (sz + n - 1) & -n;
63 }
64
fastMalloc(size_t size)65 static inline void* fastMalloc(size_t size)
66 {
67 #if _MSC_VER
68 return _aligned_malloc(size, MALLOC_ALIGN);
69 #elif (defined(__unix__) || defined(__APPLE__)) && _POSIX_C_SOURCE >= 200112L || (__ANDROID__ && __ANDROID_API__ >= 17)
70 void* ptr = 0;
71 if (posix_memalign(&ptr, MALLOC_ALIGN, size))
72 ptr = 0;
73 return ptr;
74 #elif __ANDROID__ && __ANDROID_API__ < 17
75 return memalign(MALLOC_ALIGN, size);
76 #else
77 unsigned char* udata = (unsigned char*)malloc(size + sizeof(void*) + MALLOC_ALIGN);
78 if (!udata)
79 return 0;
80 unsigned char** adata = alignPtr((unsigned char**)udata + 1, MALLOC_ALIGN);
81 adata[-1] = udata;
82 return adata;
83 #endif
84 }
85
fastFree(void * ptr)86 static inline void fastFree(void* ptr)
87 {
88 if (ptr)
89 {
90 #if _MSC_VER
91 _aligned_free(ptr);
92 #elif (defined(__unix__) || defined(__APPLE__)) && _POSIX_C_SOURCE >= 200112L || (__ANDROID__ && __ANDROID_API__ >= 17)
93 free(ptr);
94 #elif __ANDROID__ && __ANDROID_API__ < 17
95 free(ptr);
96 #else
97 unsigned char* udata = ((unsigned char**)ptr)[-1];
98 free(udata);
99 #endif
100 }
101 }
102
103 #if NCNN_THREADS
104 // exchange-add operation for atomic operations on reference counters
105 #if defined __riscv && !defined __riscv_atomic
106 // riscv target without A extension
NCNN_XADD(int * addr,int delta)107 static inline int NCNN_XADD(int* addr, int delta)
108 {
109 int tmp = *addr;
110 *addr += delta;
111 return tmp;
112 }
113 #elif defined __INTEL_COMPILER && !(defined WIN32 || defined _WIN32)
114 // atomic increment on the linux version of the Intel(tm) compiler
115 #define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd(const_cast<void*>(reinterpret_cast<volatile void*>(addr)), delta)
116 #elif defined __GNUC__
117 #if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__)
118 #ifdef __ATOMIC_ACQ_REL
119 #define NCNN_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL)
120 #else
121 #define NCNN_XADD(addr, delta) __atomic_fetch_add((_Atomic(int)*)(addr), delta, 4)
122 #endif
123 #else
124 #if defined __ATOMIC_ACQ_REL && !defined __clang__
125 // version for gcc >= 4.7
126 #define NCNN_XADD(addr, delta) (int)__atomic_fetch_add((unsigned*)(addr), (unsigned)(delta), __ATOMIC_ACQ_REL)
127 #else
128 #define NCNN_XADD(addr, delta) (int)__sync_fetch_and_add((unsigned*)(addr), (unsigned)(delta))
129 #endif
130 #endif
131 #elif defined _MSC_VER && !defined RC_INVOKED
132 #define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta)
133 #else
134 // thread-unsafe branch
NCNN_XADD(int * addr,int delta)135 static inline int NCNN_XADD(int* addr, int delta)
136 {
137 int tmp = *addr;
138 *addr += delta;
139 return tmp;
140 }
141 #endif
142 #else // NCNN_THREADS
NCNN_XADD(int * addr,int delta)143 static inline int NCNN_XADD(int* addr, int delta)
144 {
145 int tmp = *addr;
146 *addr += delta;
147 return tmp;
148 }
149 #endif // NCNN_THREADS
150
151 class NCNN_EXPORT Allocator
152 {
153 public:
154 virtual ~Allocator();
155 virtual void* fastMalloc(size_t size) = 0;
156 virtual void fastFree(void* ptr) = 0;
157 };
158
159 class PoolAllocatorPrivate;
160 class NCNN_EXPORT PoolAllocator : public Allocator
161 {
162 public:
163 PoolAllocator();
164 ~PoolAllocator();
165
166 // ratio range 0 ~ 1
167 // default cr = 0.75
168 void set_size_compare_ratio(float scr);
169
170 // release all budgets immediately
171 void clear();
172
173 virtual void* fastMalloc(size_t size);
174 virtual void fastFree(void* ptr);
175
176 private:
177 PoolAllocator(const PoolAllocator&);
178 PoolAllocator& operator=(const PoolAllocator&);
179
180 private:
181 PoolAllocatorPrivate* const d;
182 };
183
184 class UnlockedPoolAllocatorPrivate;
185 class NCNN_EXPORT UnlockedPoolAllocator : public Allocator
186 {
187 public:
188 UnlockedPoolAllocator();
189 ~UnlockedPoolAllocator();
190
191 // ratio range 0 ~ 1
192 // default cr = 0.75
193 void set_size_compare_ratio(float scr);
194
195 // release all budgets immediately
196 void clear();
197
198 virtual void* fastMalloc(size_t size);
199 virtual void fastFree(void* ptr);
200
201 private:
202 UnlockedPoolAllocator(const UnlockedPoolAllocator&);
203 UnlockedPoolAllocator& operator=(const UnlockedPoolAllocator&);
204
205 private:
206 UnlockedPoolAllocatorPrivate* const d;
207 };
208
209 #if NCNN_VULKAN
210
211 class VulkanDevice;
212
213 class NCNN_EXPORT VkBufferMemory
214 {
215 public:
216 VkBuffer buffer;
217
218 // the base offset assigned by allocator
219 size_t offset;
220 size_t capacity;
221
222 VkDeviceMemory memory;
223 void* mapped_ptr;
224
225 // buffer state, modified by command functions internally
226 mutable VkAccessFlags access_flags;
227 mutable VkPipelineStageFlags stage_flags;
228
229 // initialize and modified by mat
230 int refcount;
231 };
232
233 class NCNN_EXPORT VkImageMemory
234 {
235 public:
236 VkImage image;
237 VkImageView imageview;
238
239 // underlying info assigned by allocator
240 int width;
241 int height;
242 int depth;
243 VkFormat format;
244
245 VkDeviceMemory memory;
246 void* mapped_ptr;
247
248 // the base offset assigned by allocator
249 size_t bind_offset;
250 size_t bind_capacity;
251
252 // image state, modified by command functions internally
253 mutable VkAccessFlags access_flags;
254 mutable VkImageLayout image_layout;
255 mutable VkPipelineStageFlags stage_flags;
256
257 // in-execution state, modified by command functions internally
258 mutable int command_refcount;
259
260 // initialize and modified by mat
261 int refcount;
262 };
263
264 class NCNN_EXPORT VkAllocator
265 {
266 public:
267 explicit VkAllocator(const VulkanDevice* _vkdev);
268 virtual ~VkAllocator();
269
270 virtual void clear();
271
272 virtual VkBufferMemory* fastMalloc(size_t size) = 0;
273 virtual void fastFree(VkBufferMemory* ptr) = 0;
274 virtual int flush(VkBufferMemory* ptr);
275 virtual int invalidate(VkBufferMemory* ptr);
276
277 virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack) = 0;
278 virtual void fastFree(VkImageMemory* ptr) = 0;
279
280 public:
281 const VulkanDevice* vkdev;
282 uint32_t buffer_memory_type_index;
283 uint32_t image_memory_type_index;
284 uint32_t reserved_type_index;
285 bool mappable;
286 bool coherent;
287
288 protected:
289 VkBuffer create_buffer(size_t size, VkBufferUsageFlags usage);
290 VkDeviceMemory allocate_memory(size_t size, uint32_t memory_type_index);
291 VkDeviceMemory allocate_dedicated_memory(size_t size, uint32_t memory_type_index, VkImage image, VkBuffer buffer);
292
293 VkImage create_image(int width, int height, int depth, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage);
294 VkImageView create_imageview(VkImage image, VkFormat format);
295 };
296
297 class VkBlobAllocatorPrivate;
298 class NCNN_EXPORT VkBlobAllocator : public VkAllocator
299 {
300 public:
301 explicit VkBlobAllocator(const VulkanDevice* vkdev, size_t preferred_block_size = 16 * 1024 * 1024); // 16M
302 virtual ~VkBlobAllocator();
303
304 public:
305 // release all budgets immediately
306 virtual void clear();
307
308 virtual VkBufferMemory* fastMalloc(size_t size);
309 virtual void fastFree(VkBufferMemory* ptr);
310 virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack);
311 virtual void fastFree(VkImageMemory* ptr);
312
313 private:
314 VkBlobAllocator(const VkBlobAllocator&);
315 VkBlobAllocator& operator=(const VkBlobAllocator&);
316
317 private:
318 VkBlobAllocatorPrivate* const d;
319 };
320
321 class VkWeightAllocatorPrivate;
322 class NCNN_EXPORT VkWeightAllocator : public VkAllocator
323 {
324 public:
325 explicit VkWeightAllocator(const VulkanDevice* vkdev, size_t preferred_block_size = 8 * 1024 * 1024); // 8M
326 virtual ~VkWeightAllocator();
327
328 public:
329 // release all blocks immediately
330 virtual void clear();
331
332 public:
333 virtual VkBufferMemory* fastMalloc(size_t size);
334 virtual void fastFree(VkBufferMemory* ptr);
335 virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack);
336 virtual void fastFree(VkImageMemory* ptr);
337
338 private:
339 VkWeightAllocator(const VkWeightAllocator&);
340 VkWeightAllocator& operator=(const VkWeightAllocator&);
341
342 private:
343 VkWeightAllocatorPrivate* const d;
344 };
345
346 class VkStagingAllocatorPrivate;
347 class NCNN_EXPORT VkStagingAllocator : public VkAllocator
348 {
349 public:
350 explicit VkStagingAllocator(const VulkanDevice* vkdev);
351 virtual ~VkStagingAllocator();
352
353 public:
354 // ratio range 0 ~ 1
355 // default cr = 0.75
356 void set_size_compare_ratio(float scr);
357
358 // release all budgets immediately
359 virtual void clear();
360
361 virtual VkBufferMemory* fastMalloc(size_t size);
362 virtual void fastFree(VkBufferMemory* ptr);
363 virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack);
364 virtual void fastFree(VkImageMemory* ptr);
365
366 private:
367 VkStagingAllocator(const VkStagingAllocator&);
368 VkStagingAllocator& operator=(const VkStagingAllocator&);
369
370 private:
371 VkStagingAllocatorPrivate* const d;
372 };
373
374 class VkWeightStagingAllocatorPrivate;
375 class NCNN_EXPORT VkWeightStagingAllocator : public VkAllocator
376 {
377 public:
378 explicit VkWeightStagingAllocator(const VulkanDevice* vkdev);
379 virtual ~VkWeightStagingAllocator();
380
381 public:
382 virtual VkBufferMemory* fastMalloc(size_t size);
383 virtual void fastFree(VkBufferMemory* ptr);
384 virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack);
385 virtual void fastFree(VkImageMemory* ptr);
386
387 private:
388 VkWeightStagingAllocator(const VkWeightStagingAllocator&);
389 VkWeightStagingAllocator& operator=(const VkWeightStagingAllocator&);
390
391 private:
392 VkWeightStagingAllocatorPrivate* const d;
393 };
394
395 #if NCNN_PLATFORM_API
396 #if __ANDROID_API__ >= 26
397 class NCNN_EXPORT VkAndroidHardwareBufferImageAllocator : public VkAllocator
398 {
399 public:
400 VkAndroidHardwareBufferImageAllocator(const VulkanDevice* _vkdev, AHardwareBuffer* _hb);
401 virtual ~VkAndroidHardwareBufferImageAllocator();
402
403 public:
404 virtual VkBufferMemory* fastMalloc(size_t size);
405 virtual void fastFree(VkBufferMemory* ptr);
406 virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack);
407 virtual void fastFree(VkImageMemory* ptr);
408
409 private:
410 VkAndroidHardwareBufferImageAllocator(const VkAndroidHardwareBufferImageAllocator&);
411 VkAndroidHardwareBufferImageAllocator& operator=(const VkAndroidHardwareBufferImageAllocator&);
412
413 public:
414 int init();
415
416 int width() const;
417 int height() const;
418 uint64_t external_format() const;
419
420 public:
421 AHardwareBuffer* hb;
422 AHardwareBuffer_Desc bufferDesc;
423 VkAndroidHardwareBufferFormatPropertiesANDROID bufferFormatProperties;
424 VkAndroidHardwareBufferPropertiesANDROID bufferProperties;
425 VkSamplerYcbcrConversionKHR samplerYcbcrConversion;
426 };
427 #endif // __ANDROID_API__ >= 26
428 #endif // NCNN_PLATFORM_API
429
430 #endif // NCNN_VULKAN
431
432 } // namespace ncnn
433
434 #endif // NCNN_ALLOCATOR_H
435