1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14
15 #ifndef NCNN_MAT_H
16 #define NCNN_MAT_H
17
18 #include <stdlib.h>
19 #include <string.h>
20 #if __ARM_NEON
21 #include <arm_neon.h>
22 #endif
23 #if __AVX__
24 #include <immintrin.h>
25 #endif
26
27 #include "allocator.h"
28 #include "option.h"
29 #include "platform.h"
30
31 #if NCNN_VULKAN
32 #include <vulkan/vulkan.h>
33 #endif // NCNN_VULKAN
34
35 #if NCNN_PIXEL
36 #if NCNN_PLATFORM_API
37 #if __ANDROID_API__ >= 9
38 #include <android/bitmap.h>
39 #include <jni.h>
40 #endif // __ANDROID_API__ >= 9
41 #endif // NCNN_PLATFORM_API
42 #endif // NCNN_PIXEL
43
44 namespace ncnn {
45
46 #if NCNN_VULKAN
47 class VkMat;
48 class VkImageMat;
49 #endif // NCNN_VULKAN
50
51 // the three dimension matrix
52 class NCNN_EXPORT Mat
53 {
54 public:
55 // empty
56 Mat();
57 // vec
58 Mat(int w, size_t elemsize = 4u, Allocator* allocator = 0);
59 // image
60 Mat(int w, int h, size_t elemsize = 4u, Allocator* allocator = 0);
61 // dim
62 Mat(int w, int h, int c, size_t elemsize = 4u, Allocator* allocator = 0);
63 // packed vec
64 Mat(int w, size_t elemsize, int elempack, Allocator* allocator = 0);
65 // packed image
66 Mat(int w, int h, size_t elemsize, int elempack, Allocator* allocator = 0);
67 // packed dim
68 Mat(int w, int h, int c, size_t elemsize, int elempack, Allocator* allocator = 0);
69 // copy
70 Mat(const Mat& m);
71 // external vec
72 Mat(int w, void* data, size_t elemsize = 4u, Allocator* allocator = 0);
73 // external image
74 Mat(int w, int h, void* data, size_t elemsize = 4u, Allocator* allocator = 0);
75 // external dim
76 Mat(int w, int h, int c, void* data, size_t elemsize = 4u, Allocator* allocator = 0);
77 // external packed vec
78 Mat(int w, void* data, size_t elemsize, int elempack, Allocator* allocator = 0);
79 // external packed image
80 Mat(int w, int h, void* data, size_t elemsize, int elempack, Allocator* allocator = 0);
81 // external packed dim
82 Mat(int w, int h, int c, void* data, size_t elemsize, int elempack, Allocator* allocator = 0);
83 // release
84 ~Mat();
85 // assign
86 Mat& operator=(const Mat& m);
87 // set all
88 void fill(float v);
89 void fill(int v);
90 #if __ARM_NEON
91 void fill(float32x4_t _v);
92 void fill(uint16x4_t _v);
93 void fill(int32x4_t _v);
94 void fill(int32x4_t _v0, int32x4_t _v1);
95 #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
96 void fill(float16x4_t _v);
97 void fill(float16x8_t _v);
98 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
99 #endif // __ARM_NEON
100 #if __AVX__
101 void fill(__m256 _v);
102 void fill(__m128i _v);
103 #endif // __AVX__
104 template<typename T>
105 void fill(T v);
106 // deep copy
107 Mat clone(Allocator* allocator = 0) const;
108 // deep copy from other mat, inplace
109 void clone_from(const ncnn::Mat& mat, Allocator* allocator = 0);
110 // reshape vec
111 Mat reshape(int w, Allocator* allocator = 0) const;
112 // reshape image
113 Mat reshape(int w, int h, Allocator* allocator = 0) const;
114 // reshape dim
115 Mat reshape(int w, int h, int c, Allocator* allocator = 0) const;
116 // allocate vec
117 void create(int w, size_t elemsize = 4u, Allocator* allocator = 0);
118 // allocate image
119 void create(int w, int h, size_t elemsize = 4u, Allocator* allocator = 0);
120 // allocate dim
121 void create(int w, int h, int c, size_t elemsize = 4u, Allocator* allocator = 0);
122 // allocate packed vec
123 void create(int w, size_t elemsize, int elempack, Allocator* allocator = 0);
124 // allocate packed image
125 void create(int w, int h, size_t elemsize, int elempack, Allocator* allocator = 0);
126 // allocate packed dim
127 void create(int w, int h, int c, size_t elemsize, int elempack, Allocator* allocator = 0);
128 // allocate like
129 void create_like(const Mat& m, Allocator* allocator = 0);
130 #if NCNN_VULKAN
131 // allocate like
132 void create_like(const VkMat& m, Allocator* allocator = 0);
133 // allocate like
134 void create_like(const VkImageMat& im, Allocator* allocator = 0);
135 #endif // NCNN_VULKAN
136 // refcount++
137 void addref();
138 // refcount--
139 void release();
140
141 bool empty() const;
142 size_t total() const;
143
144 // bits per element
145 int elembits() const;
146
147 // shape only
148 Mat shape() const;
149
150 // data reference
151 Mat channel(int c);
152 const Mat channel(int c) const;
153 float* row(int y);
154 const float* row(int y) const;
155 template<typename T>
156 T* row(int y);
157 template<typename T>
158 const T* row(int y) const;
159
160 // range reference
161 Mat channel_range(int c, int channels);
162 const Mat channel_range(int c, int channels) const;
163 Mat row_range(int y, int rows);
164 const Mat row_range(int y, int rows) const;
165 Mat range(int x, int n);
166 const Mat range(int x, int n) const;
167
168 // access raw data
169 template<typename T>
170 operator T*();
171 template<typename T>
172 operator const T*() const;
173
174 // convenient access float vec element
175 float& operator[](size_t i);
176 const float& operator[](size_t i) const;
177
178 #if NCNN_PIXEL
179 enum PixelType
180 {
181 PIXEL_CONVERT_SHIFT = 16,
182 PIXEL_FORMAT_MASK = 0x0000ffff,
183 PIXEL_CONVERT_MASK = 0xffff0000,
184
185 PIXEL_RGB = 1,
186 PIXEL_BGR = 2,
187 PIXEL_GRAY = 3,
188 PIXEL_RGBA = 4,
189 PIXEL_BGRA = 5,
190
191 PIXEL_RGB2BGR = PIXEL_RGB | (PIXEL_BGR << PIXEL_CONVERT_SHIFT),
192 PIXEL_RGB2GRAY = PIXEL_RGB | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT),
193 PIXEL_RGB2RGBA = PIXEL_RGB | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT),
194 PIXEL_RGB2BGRA = PIXEL_RGB | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT),
195
196 PIXEL_BGR2RGB = PIXEL_BGR | (PIXEL_RGB << PIXEL_CONVERT_SHIFT),
197 PIXEL_BGR2GRAY = PIXEL_BGR | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT),
198 PIXEL_BGR2RGBA = PIXEL_BGR | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT),
199 PIXEL_BGR2BGRA = PIXEL_BGR | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT),
200
201 PIXEL_GRAY2RGB = PIXEL_GRAY | (PIXEL_RGB << PIXEL_CONVERT_SHIFT),
202 PIXEL_GRAY2BGR = PIXEL_GRAY | (PIXEL_BGR << PIXEL_CONVERT_SHIFT),
203 PIXEL_GRAY2RGBA = PIXEL_GRAY | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT),
204 PIXEL_GRAY2BGRA = PIXEL_GRAY | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT),
205
206 PIXEL_RGBA2RGB = PIXEL_RGBA | (PIXEL_RGB << PIXEL_CONVERT_SHIFT),
207 PIXEL_RGBA2BGR = PIXEL_RGBA | (PIXEL_BGR << PIXEL_CONVERT_SHIFT),
208 PIXEL_RGBA2GRAY = PIXEL_RGBA | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT),
209 PIXEL_RGBA2BGRA = PIXEL_RGBA | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT),
210
211 PIXEL_BGRA2RGB = PIXEL_BGRA | (PIXEL_RGB << PIXEL_CONVERT_SHIFT),
212 PIXEL_BGRA2BGR = PIXEL_BGRA | (PIXEL_BGR << PIXEL_CONVERT_SHIFT),
213 PIXEL_BGRA2GRAY = PIXEL_BGRA | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT),
214 PIXEL_BGRA2RGBA = PIXEL_BGRA | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT),
215 };
216 // convenient construct from pixel data
217 static Mat from_pixels(const unsigned char* pixels, int type, int w, int h, Allocator* allocator = 0);
218 // convenient construct from pixel data with stride(bytes-per-row) parameter
219 static Mat from_pixels(const unsigned char* pixels, int type, int w, int h, int stride, Allocator* allocator = 0);
220 // convenient construct from pixel data and resize to specific size
221 static Mat from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int target_width, int target_height, Allocator* allocator = 0);
222 // convenient construct from pixel data and resize to specific size with stride(bytes-per-row) parameter
223 static Mat from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int stride, int target_width, int target_height, Allocator* allocator = 0);
224 // convenient construct from pixel data roi
225 static Mat from_pixels_roi(const unsigned char* pixels, int type, int w, int h, int roix, int roiy, int roiw, int roih, Allocator* allocator = 0);
226 // convenient construct from pixel data roi with stride(bytes-per-row) parameter
227 static Mat from_pixels_roi(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, Allocator* allocator = 0);
228 // convenient construct from pixel data roi and resize to specific size
229 static Mat from_pixels_roi_resize(const unsigned char* pixels, int type, int w, int h, int roix, int roiy, int roiw, int roih, int target_width, int target_height, Allocator* allocator = 0);
230 // convenient construct from pixel data roi and resize to specific size with stride(bytes-per-row) parameter
231 static Mat from_pixels_roi_resize(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, int target_width, int target_height, Allocator* allocator = 0);
232
233 // convenient export to pixel data
234 void to_pixels(unsigned char* pixels, int type) const;
235 // convenient export to pixel data with stride(bytes-per-row) parameter
236 void to_pixels(unsigned char* pixels, int type, int stride) const;
237 // convenient export to pixel data and resize to specific size
238 void to_pixels_resize(unsigned char* pixels, int type, int target_width, int target_height) const;
239 // convenient export to pixel data and resize to specific size with stride(bytes-per-row) parameter
240 void to_pixels_resize(unsigned char* pixels, int type, int target_width, int target_height, int target_stride) const;
241
242 #if NCNN_PLATFORM_API
243 #if __ANDROID_API__ >= 9
244 // convenient construct from android Bitmap
245 static Mat from_android_bitmap(JNIEnv* env, jobject bitmap, int type_to, Allocator* allocator = 0);
246 // convenient construct from android Bitmap and resize to specific size
247 static Mat from_android_bitmap_resize(JNIEnv* env, jobject bitmap, int type_to, int target_width, int target_height, Allocator* allocator = 0);
248 // convenient construct from android Bitmap roi
249 static Mat from_android_bitmap_roi(JNIEnv* env, jobject bitmap, int type_to, int roix, int roiy, int roiw, int roih, Allocator* allocator = 0);
250 // convenient construct from android Bitmap roi and resize to specific size
251 static Mat from_android_bitmap_roi_resize(JNIEnv* env, jobject bitmap, int type_to, int roix, int roiy, int roiw, int roih, int target_width, int target_height, Allocator* allocator = 0);
252 // convenient export to android Bitmap and resize to the android Bitmap size
253 void to_android_bitmap(JNIEnv* env, jobject bitmap, int type_from) const;
254 #endif // __ANDROID_API__ >= 9
255 #endif // NCNN_PLATFORM_API
256 #endif // NCNN_PIXEL
257
258 // substract channel-wise mean values, then multiply by normalize values, pass 0 to skip
259 void substract_mean_normalize(const float* mean_vals, const float* norm_vals);
260
261 // convenient construct from half precision floating point data
262 static Mat from_float16(const unsigned short* data, int size);
263
264 // pointer to the data
265 void* data;
266
267 // pointer to the reference counter
268 // when points to user-allocated data, the pointer is NULL
269 int* refcount;
270
271 // element size in bytes
272 // 4 = float32/int32
273 // 2 = float16
274 // 1 = int8/uint8
275 // 0 = empty
276 size_t elemsize;
277
278 // packed count inside element
279 // c/1-h-w-1 h/1-w-1 w/1-1 scalar
280 // c/4-h-w-4 h/4-w-4 w/4-4 sse/neon
281 // c/8-h-w-8 h/8-w-8 w/8-8 avx/fp16
282 int elempack;
283
284 // the allocator
285 Allocator* allocator;
286
287 // the dimension rank
288 int dims;
289
290 int w;
291 int h;
292 int c;
293
294 size_t cstep;
295 };
296
297 #if NCNN_VULKAN
298
299 // the three dimension matrix, vulkan version
300 class NCNN_EXPORT VkMat
301 {
302 public:
303 // empty
304 VkMat();
305 // vec
306 VkMat(int w, size_t elemsize, VkAllocator* allocator);
307 // image
308 VkMat(int w, int h, size_t elemsize, VkAllocator* allocator);
309 // dim
310 VkMat(int w, int h, int c, size_t elemsize, VkAllocator* allocator);
311 // packed vec
312 VkMat(int w, size_t elemsize, int elempack, VkAllocator* allocator);
313 // packed image
314 VkMat(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator);
315 // packed dim
316 VkMat(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator);
317 // copy
318 VkMat(const VkMat& m);
319 // external vec
320 VkMat(int w, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator);
321 // external image
322 VkMat(int w, int h, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator);
323 // external dim
324 VkMat(int w, int h, int c, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator);
325 // external packed vec
326 VkMat(int w, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator);
327 // external packed image
328 VkMat(int w, int h, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator);
329 // external packed dim
330 VkMat(int w, int h, int c, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator);
331 // release
332 ~VkMat();
333 // assign
334 VkMat& operator=(const VkMat& m);
335 // allocate vec
336 void create(int w, size_t elemsize, VkAllocator* allocator);
337 // allocate image
338 void create(int w, int h, size_t elemsize, VkAllocator* allocator);
339 // allocate dim
340 void create(int w, int h, int c, size_t elemsize, VkAllocator* allocator);
341 // allocate packed vec
342 void create(int w, size_t elemsize, int elempack, VkAllocator* allocator);
343 // allocate packed image
344 void create(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator);
345 // allocate packed dim
346 void create(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator);
347 // allocate like
348 void create_like(const Mat& m, VkAllocator* allocator);
349 // allocate like
350 void create_like(const VkMat& m, VkAllocator* allocator);
351 // allocate like
352 void create_like(const VkImageMat& im, VkAllocator* allocator);
353
354 // mapped
355 Mat mapped() const;
356 void* mapped_ptr() const;
357
358 // refcount++
359 void addref();
360 // refcount--
361 void release();
362
363 bool empty() const;
364 size_t total() const;
365
366 // bits per element
367 int elembits() const;
368
369 // shape only
370 Mat shape() const;
371
372 // low-level reference
373 VkBuffer buffer() const;
374 size_t buffer_offset() const;
375 size_t buffer_capacity() const;
376
377 // device buffer
378 VkBufferMemory* data;
379
380 // pointer to the reference counter
381 // when points to user-allocated data, the pointer is NULL
382 int* refcount;
383
384 // element size in bytes
385 // 4 = float32/int32
386 // 2 = float16
387 // 1 = int8/uint8
388 // 0 = empty
389 size_t elemsize;
390
391 // packed count inside element
392 // c/1-h-w-1 h/1-w-1 w/1-1 scalar
393 // c/4-h-w-4 h/4-w-4 w/4-4 sse/neon
394 // c/8-h-w-8 h/8-w-8 w/8-8 avx/fp16
395 int elempack;
396
397 // the allocator
398 VkAllocator* allocator;
399
400 // the dimension rank
401 int dims;
402
403 int w;
404 int h;
405 int c;
406
407 size_t cstep;
408 };
409
410 class NCNN_EXPORT VkImageMat
411 {
412 public:
413 // empty
414 VkImageMat();
415 // vec
416 VkImageMat(int w, size_t elemsize, VkAllocator* allocator);
417 // image
418 VkImageMat(int w, int h, size_t elemsize, VkAllocator* allocator);
419 // dim
420 VkImageMat(int w, int h, int c, size_t elemsize, VkAllocator* allocator);
421 // packed vec
422 VkImageMat(int w, size_t elemsize, int elempack, VkAllocator* allocator);
423 // packed image
424 VkImageMat(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator);
425 // packed dim
426 VkImageMat(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator);
427 // copy
428 VkImageMat(const VkImageMat& m);
429 // external vec
430 VkImageMat(int w, VkImageMemory* data, size_t elemsize, VkAllocator* allocator);
431 // external image
432 VkImageMat(int w, int h, VkImageMemory* data, size_t elemsize, VkAllocator* allocator);
433 // external dim
434 VkImageMat(int w, int h, int c, VkImageMemory* data, size_t elemsize, VkAllocator* allocator);
435 // external packed vec
436 VkImageMat(int w, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator);
437 // external packed image
438 VkImageMat(int w, int h, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator);
439 // external packed dim
440 VkImageMat(int w, int h, int c, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator);
441 // release
442 ~VkImageMat();
443 // assign
444 VkImageMat& operator=(const VkImageMat& m);
445 // allocate vec
446 void create(int w, size_t elemsize, VkAllocator* allocator);
447 // allocate image
448 void create(int w, int h, size_t elemsize, VkAllocator* allocator);
449 // allocate dim
450 void create(int w, int h, int c, size_t elemsize, VkAllocator* allocator);
451 // allocate packed vec
452 void create(int w, size_t elemsize, int elempack, VkAllocator* allocator);
453 // allocate packed image
454 void create(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator);
455 // allocate packed dim
456 void create(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator);
457 // allocate like
458 void create_like(const Mat& m, VkAllocator* allocator);
459 // allocate like
460 void create_like(const VkMat& m, VkAllocator* allocator);
461 // allocate like
462 void create_like(const VkImageMat& im, VkAllocator* allocator);
463
464 // mapped
465 Mat mapped() const;
466 void* mapped_ptr() const;
467
468 // refcount++
469 void addref();
470 // refcount--
471 void release();
472
473 bool empty() const;
474 size_t total() const;
475
476 // bits per element
477 int elembits() const;
478
479 // shape only
480 Mat shape() const;
481
482 // low-level reference
483 VkImage image() const;
484 VkImageView imageview() const;
485
486 #if NCNN_PLATFORM_API
487 #if __ANDROID_API__ >= 26
488 // convenient construct from android hardware buffer
489 static VkImageMat from_android_hardware_buffer(VkAndroidHardwareBufferImageAllocator* allocator);
490 #endif // __ANDROID_API__ >= 26
491 #endif // NCNN_PLATFORM_API
492
493 // device image
494 VkImageMemory* data;
495
496 // pointer to the reference counter
497 // when points to user-allocated data, the pointer is NULL
498 int* refcount;
499
500 // element size in bytes
501 // 4 = float32/int32
502 // 2 = float16
503 // 1 = int8/uint8
504 // 0 = empty
505 size_t elemsize;
506
507 // packed count inside element
508 // c/1-h-w-1 h/1-w-1 w/1-1 scalar
509 // c/4-h-w-4 h/4-w-4 w/4-4 sse/neon
510 // c/8-h-w-8 h/8-w-8 w/8-8 avx/fp16
511 int elempack;
512
513 // the allocator
514 VkAllocator* allocator;
515
516 // the dimension rank
517 int dims;
518
519 int w;
520 int h;
521 int c;
522 };
523
524 // type for vulkan specialization constant and push constant
525 union vk_specialization_type
526 {
527 int i;
528 float f;
529 uint32_t u32;
530 };
531 union vk_constant_type
532 {
533 int i;
534 float f;
535 };
536 #endif // NCNN_VULKAN
537
538 // misc function
539 #if NCNN_PIXEL
540 // convert yuv420sp(nv21) to rgb, the fast approximate version
541 NCNN_EXPORT void yuv420sp2rgb(const unsigned char* yuv420sp, int w, int h, unsigned char* rgb);
542 // convert yuv420sp(nv12) to rgb, the fast approximate version
543 NCNN_EXPORT void yuv420sp2rgb_nv12(const unsigned char* yuv420sp, int w, int h, unsigned char* rgb);
544 // convert yuv420sp(nv21) to rgb with half resize, the faster approximate version
545 NCNN_EXPORT void yuv420sp2rgb_half(const unsigned char* yuv420sp, int w, int h, unsigned char* rgb);
546 // image pixel bilinear resize
547 NCNN_EXPORT void resize_bilinear_c1(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h);
548 NCNN_EXPORT void resize_bilinear_c2(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h);
549 NCNN_EXPORT void resize_bilinear_c3(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h);
550 NCNN_EXPORT void resize_bilinear_c4(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h);
551 // image pixel bilinear resize with stride(bytes-per-row) parameter
552 NCNN_EXPORT void resize_bilinear_c1(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride);
553 NCNN_EXPORT void resize_bilinear_c2(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride);
554 NCNN_EXPORT void resize_bilinear_c3(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride);
555 NCNN_EXPORT void resize_bilinear_c4(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride);
556 // image pixel bilinear resize, convenient wrapper for yuv420sp(nv21/nv12)
557 NCNN_EXPORT void resize_bilinear_yuv420sp(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h);
558 #endif // NCNN_PIXEL
559 #if NCNN_PIXEL_ROTATE
560 // type is the from type, 6 means rotating from 6 to 1
561 //
562 // 1 2 3 4 5 6 7 8
563 //
564 // 888888 888888 88 88 8888888888 88 88 8888888888
565 // 88 88 88 88 88 88 88 88 88 88 88 88
566 // 8888 8888 8888 8888 88 8888888888 8888888888 88
567 // 88 88 88 88
568 // 88 88 888888 888888
569 //
570 // ref http://sylvana.net/jpegcrop/exif_orientation.html
571 // image pixel kanna rotate
572 NCNN_EXPORT void kanna_rotate_c1(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type);
573 NCNN_EXPORT void kanna_rotate_c2(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type);
574 NCNN_EXPORT void kanna_rotate_c3(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type);
575 NCNN_EXPORT void kanna_rotate_c4(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type);
576 // image pixel kanna rotate with stride(bytes-per-row) parameter
577 NCNN_EXPORT void kanna_rotate_c1(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type);
578 NCNN_EXPORT void kanna_rotate_c2(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type);
579 NCNN_EXPORT void kanna_rotate_c3(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type);
580 NCNN_EXPORT void kanna_rotate_c4(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type);
581 // image pixel kanna rotate, convenient wrapper for yuv420sp(nv21/nv12)
582 NCNN_EXPORT void kanna_rotate_yuv420sp(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type);
583 #endif // NCNN_PIXEL_ROTATE
584 #if NCNN_PIXEL_AFFINE
585 // resolve affine transform matrix from rotation angle, scale factor and x y offset
586 NCNN_EXPORT void get_rotation_matrix(float angle, float scale, float dx, float dy, float* tm);
587 // resolve affine transform matrix from two set of points, num_point must be >= 2
588 NCNN_EXPORT void get_affine_transform(const float* points_from, const float* points_to, int num_point, float* tm);
589 // resolve the inversion affine transform matrix
590 NCNN_EXPORT void invert_affine_transform(const float* tm, float* tm_inv);
591 // image pixel bilinear warpaffine inverse transform, set -233 for transparent border color, the color RGBA is little-endian encoded
592 NCNN_EXPORT void warpaffine_bilinear_c1(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0);
593 NCNN_EXPORT void warpaffine_bilinear_c2(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0);
594 NCNN_EXPORT void warpaffine_bilinear_c3(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0);
595 NCNN_EXPORT void warpaffine_bilinear_c4(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0);
596 // image pixel bilinear warpaffine inverse transform with stride(bytes-per-row) parameter, set -233 for transparent border color, the color RGBA is little-endian encoded
597 NCNN_EXPORT void warpaffine_bilinear_c1(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0);
598 NCNN_EXPORT void warpaffine_bilinear_c2(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0);
599 NCNN_EXPORT void warpaffine_bilinear_c3(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0);
600 NCNN_EXPORT void warpaffine_bilinear_c4(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0);
601 // image pixel bilinear warpaffine, convenient wrapper for yuv420sp(nv21/nv12), set -233 for transparent border color, the color YUV_ is little-endian encoded
602 NCNN_EXPORT void warpaffine_bilinear_yuv420sp(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0);
603 #endif // NCNN_PIXEL_AFFINE
604 #if NCNN_PIXEL_DRAWING
605 // draw rectangle, set thickness -1 for filled rectangle, the color RGBA is little-endian encoded
606 NCNN_EXPORT void draw_rectangle_c1(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
607 NCNN_EXPORT void draw_rectangle_c2(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
608 NCNN_EXPORT void draw_rectangle_c3(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
609 NCNN_EXPORT void draw_rectangle_c4(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
610 // draw rectangle with stride(bytes-per-row) parameter, set thickness -1 for filled rectangle, the color RGBA is little-endian encoded
611 NCNN_EXPORT void draw_rectangle_c1(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
612 NCNN_EXPORT void draw_rectangle_c2(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
613 NCNN_EXPORT void draw_rectangle_c3(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
614 NCNN_EXPORT void draw_rectangle_c4(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
615 // draw rectangle, convenient wrapper for yuv420sp(nv21/nv12), set thickness -1 for filled rectangle, the color YUV_ is little-endian encoded
616 NCNN_EXPORT void draw_rectangle_yuv420sp(unsigned char* yuv420sp, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
617 // draw circle, set thickness -1 for filled circle, the color RGBA is little-endian encoded
618 NCNN_EXPORT void draw_circle_c1(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness);
619 NCNN_EXPORT void draw_circle_c2(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness);
620 NCNN_EXPORT void draw_circle_c3(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness);
621 NCNN_EXPORT void draw_circle_c4(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness);
622 // draw circle with stride(bytes-per-row) parameter, set thickness -1 for filled circle, the color RGBA is little-endian encoded
623 NCNN_EXPORT void draw_circle_c1(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness);
624 NCNN_EXPORT void draw_circle_c2(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness);
625 NCNN_EXPORT void draw_circle_c3(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness);
626 NCNN_EXPORT void draw_circle_c4(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness);
627 // draw circle, convenient wrapper for yuv420sp(nv21/nv12), set thickness -1 for filled circle, the color YUV_ is little-endian encoded
628 NCNN_EXPORT void draw_circle_yuv420sp(unsigned char* yuv420sp, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness);
629 // draw line, the color RGBA is little-endian encoded
630 NCNN_EXPORT void draw_line_c1(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
631 NCNN_EXPORT void draw_line_c2(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
632 NCNN_EXPORT void draw_line_c3(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
633 NCNN_EXPORT void draw_line_c4(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
634 // draw line with stride(bytes-per-row) parameter, the color RGBA is little-endian encoded
635 NCNN_EXPORT void draw_line_c1(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
636 NCNN_EXPORT void draw_line_c2(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
637 NCNN_EXPORT void draw_line_c3(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
638 NCNN_EXPORT void draw_line_c4(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
639 // draw line, convenient wrapper for yuv420sp(nv21/nv12), the color YUV_ is little-endian encoded
640 NCNN_EXPORT void draw_line_yuv420sp(unsigned char* yuv420sp, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
641 // resolve text bounding box size
642 NCNN_EXPORT void get_text_drawing_size(const char* text, int fontpixelsize, int* w, int* h);
643 // draw ascii printables and newline, the color RGBA is little-endian encoded
644 NCNN_EXPORT void draw_text_c1(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color);
645 NCNN_EXPORT void draw_text_c2(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color);
646 NCNN_EXPORT void draw_text_c3(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color);
647 NCNN_EXPORT void draw_text_c4(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color);
648 // draw ascii printables and newline with stride(bytes-per-row) parameter, the color RGBA is little-endian encoded
649 NCNN_EXPORT void draw_text_c1(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color);
650 NCNN_EXPORT void draw_text_c2(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color);
651 NCNN_EXPORT void draw_text_c3(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color);
652 NCNN_EXPORT void draw_text_c4(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color);
653 // draw ascii printables and newline, convenient wrapper for yuv420sp(nv21/nv12), the color YUV_ is little-endian encoded
654 NCNN_EXPORT void draw_text_yuv420sp(unsigned char* yuv420sp, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color);
655 #endif // NCNN_PIXEL_DRAWING
656
657 // type conversion
658 // convert float to half precision floating point
659 NCNN_EXPORT unsigned short float32_to_float16(float value);
660 // convert half precision floating point to float
661 NCNN_EXPORT float float16_to_float32(unsigned short value);
662 // convert float to brain half
float32_to_bfloat16(float value)663 NCNN_EXPORT inline unsigned short float32_to_bfloat16(float value)
664 {
665 // 16 : 16
666 union
667 {
668 unsigned int u;
669 float f;
670 } tmp;
671 tmp.f = value;
672 return tmp.u >> 16;
673 }
674 // convert brain half to float
bfloat16_to_float32(unsigned short value)675 NCNN_EXPORT inline float bfloat16_to_float32(unsigned short value)
676 {
677 // 16 : 16
678 union
679 {
680 unsigned int u;
681 float f;
682 } tmp;
683 tmp.u = value << 16;
684 return tmp.f;
685 }
686 #if __ARM_NEON
vcvt_bf16_f32(float32x4_t _v)687 NCNN_EXPORT inline uint16x4_t vcvt_bf16_f32(float32x4_t _v)
688 {
689 return vshrn_n_u32(vreinterpretq_u32_f32(_v), 16);
690 }
vcvt_f32_bf16(uint16x4_t _v)691 NCNN_EXPORT inline float32x4_t vcvt_f32_bf16(uint16x4_t _v)
692 {
693 return vreinterpretq_f32_u32(vshll_n_u16(_v, 16));
694 }
695 #endif // __ARM_NEON
696
697 // mat process
698 enum BorderType
699 {
700 BORDER_CONSTANT = 0,
701 BORDER_REPLICATE = 1,
702 BORDER_TRANSPARENT = -233,
703 };
704 NCNN_EXPORT void copy_make_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int type, float v, const Option& opt = Option());
705 NCNN_EXPORT void copy_cut_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, const Option& opt = Option());
706 NCNN_EXPORT void resize_nearest(const Mat& src, Mat& dst, int w, int h, const Option& opt = Option());
707 NCNN_EXPORT void resize_bilinear(const Mat& src, Mat& dst, int w, int h, const Option& opt = Option());
708 NCNN_EXPORT void resize_bicubic(const Mat& src, Mat& dst, int w, int h, const Option& opt = Option());
709 NCNN_EXPORT void convert_packing(const Mat& src, Mat& dst, int elempack, const Option& opt = Option());
710 NCNN_EXPORT void flatten(const Mat& src, Mat& dst, const Option& opt = Option());
711 NCNN_EXPORT void cast_float32_to_float16(const Mat& src, Mat& dst, const Option& opt = Option());
712 NCNN_EXPORT void cast_float16_to_float32(const Mat& src, Mat& dst, const Option& opt = Option());
713 NCNN_EXPORT void cast_int8_to_float32(const Mat& src, Mat& dst, const Option& opt = Option());
714 NCNN_EXPORT void cast_float32_to_bfloat16(const Mat& src, Mat& dst, const Option& opt = Option());
715 NCNN_EXPORT void cast_bfloat16_to_float32(const Mat& src, Mat& dst, const Option& opt = Option());
716 NCNN_EXPORT void quantize_to_int8(const Mat& src, Mat& dst, const Mat& scale_data, const Option& opt = Option());
717 NCNN_EXPORT void dequantize_from_int32(const Mat& src, Mat& dst, const Mat& scale_data, const Mat& bias_data, const Option& opt = Option());
718 NCNN_EXPORT void requantize_from_int32_to_int8(const Mat& src, Mat& dst, const Mat& scale_in_data, const Mat& scale_out_data, const Mat& bias_data, int activation_type, const Mat& activation_params, const Option& opt = Option());
719
Mat()720 inline Mat::Mat()
721 : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
722 {
723 }
724
Mat(int _w,size_t _elemsize,Allocator * _allocator)725 inline Mat::Mat(int _w, size_t _elemsize, Allocator* _allocator)
726 : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
727 {
728 create(_w, _elemsize, _allocator);
729 }
730
Mat(int _w,int _h,size_t _elemsize,Allocator * _allocator)731 inline Mat::Mat(int _w, int _h, size_t _elemsize, Allocator* _allocator)
732 : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
733 {
734 create(_w, _h, _elemsize, _allocator);
735 }
736
Mat(int _w,int _h,int _c,size_t _elemsize,Allocator * _allocator)737 inline Mat::Mat(int _w, int _h, int _c, size_t _elemsize, Allocator* _allocator)
738 : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
739 {
740 create(_w, _h, _c, _elemsize, _allocator);
741 }
742
Mat(int _w,size_t _elemsize,int _elempack,Allocator * _allocator)743 inline Mat::Mat(int _w, size_t _elemsize, int _elempack, Allocator* _allocator)
744 : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
745 {
746 create(_w, _elemsize, _elempack, _allocator);
747 }
748
Mat(int _w,int _h,size_t _elemsize,int _elempack,Allocator * _allocator)749 inline Mat::Mat(int _w, int _h, size_t _elemsize, int _elempack, Allocator* _allocator)
750 : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
751 {
752 create(_w, _h, _elemsize, _elempack, _allocator);
753 }
754
Mat(int _w,int _h,int _c,size_t _elemsize,int _elempack,Allocator * _allocator)755 inline Mat::Mat(int _w, int _h, int _c, size_t _elemsize, int _elempack, Allocator* _allocator)
756 : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
757 {
758 create(_w, _h, _c, _elemsize, _elempack, _allocator);
759 }
760
Mat(const Mat & m)761 inline Mat::Mat(const Mat& m)
762 : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), c(m.c), cstep(m.cstep)
763 {
764 addref();
765 }
766
Mat(int _w,void * _data,size_t _elemsize,Allocator * _allocator)767 inline Mat::Mat(int _w, void* _data, size_t _elemsize, Allocator* _allocator)
768 : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), c(1)
769 {
770 cstep = w;
771 }
772
Mat(int _w,int _h,void * _data,size_t _elemsize,Allocator * _allocator)773 inline Mat::Mat(int _w, int _h, void* _data, size_t _elemsize, Allocator* _allocator)
774 : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), c(1)
775 {
776 cstep = (size_t)w * h;
777 }
778
Mat(int _w,int _h,int _c,void * _data,size_t _elemsize,Allocator * _allocator)779 inline Mat::Mat(int _w, int _h, int _c, void* _data, size_t _elemsize, Allocator* _allocator)
780 : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), c(_c)
781 {
782 cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize;
783 }
784
Mat(int _w,void * _data,size_t _elemsize,int _elempack,Allocator * _allocator)785 inline Mat::Mat(int _w, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator)
786 : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), c(1)
787 {
788 cstep = w;
789 }
790
Mat(int _w,int _h,void * _data,size_t _elemsize,int _elempack,Allocator * _allocator)791 inline Mat::Mat(int _w, int _h, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator)
792 : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), c(1)
793 {
794 cstep = (size_t)w * h;
795 }
796
Mat(int _w,int _h,int _c,void * _data,size_t _elemsize,int _elempack,Allocator * _allocator)797 inline Mat::Mat(int _w, int _h, int _c, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator)
798 : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), c(_c)
799 {
800 cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize;
801 }
802
~Mat()803 inline Mat::~Mat()
804 {
805 release();
806 }
807
fill(float _v)808 inline void Mat::fill(float _v)
809 {
810 int size = (int)total();
811 float* ptr = (float*)data;
812
813 #if __ARM_NEON
814 int nn = size >> 2;
815 int remain = size - (nn << 2);
816 #else
817 int remain = size;
818 #endif // __ARM_NEON
819
820 #if __ARM_NEON
821 float32x4_t _c = vdupq_n_f32(_v);
822 #if __aarch64__
823 if (nn > 0)
824 {
825 asm volatile(
826 "0: \n"
827 "subs %w0, %w0, #1 \n"
828 "st1 {%4.4s}, [%1], #16 \n"
829 "bne 0b \n"
830 : "=r"(nn), // %0
831 "=r"(ptr) // %1
832 : "0"(nn),
833 "1"(ptr),
834 "w"(_c) // %4
835 : "cc", "memory");
836 }
837 #else
838 if (nn > 0)
839 {
840 asm volatile(
841 "0: \n"
842 "subs %0, #1 \n"
843 "vst1.f32 {%e4-%f4}, [%1 :128]!\n"
844 "bne 0b \n"
845 : "=r"(nn), // %0
846 "=r"(ptr) // %1
847 : "0"(nn),
848 "1"(ptr),
849 "w"(_c) // %4
850 : "cc", "memory");
851 }
852 #endif // __aarch64__
853 #endif // __ARM_NEON
854 for (; remain > 0; remain--)
855 {
856 *ptr++ = _v;
857 }
858 }
859
fill(int _v)860 inline void Mat::fill(int _v)
861 {
862 int size = (int)total();
863 int* ptr = (int*)data;
864
865 #if __ARM_NEON
866 int nn = size >> 2;
867 int remain = size - (nn << 2);
868 #else
869 int remain = size;
870 #endif // __ARM_NEON
871
872 #if __ARM_NEON
873 int32x4_t _c = vdupq_n_s32(_v);
874 #if __aarch64__
875 if (nn > 0)
876 {
877 asm volatile(
878 "0: \n"
879 "subs %w0, %w0, #1 \n"
880 "st1 {%4.4s}, [%1], #16 \n"
881 "bne 0b \n"
882 : "=r"(nn), // %0
883 "=r"(ptr) // %1
884 : "0"(nn),
885 "1"(ptr),
886 "w"(_c) // %4
887 : "cc", "memory");
888 }
889 #else
890 if (nn > 0)
891 {
892 asm volatile(
893 "0: \n"
894 "subs %0, #1 \n"
895 "vst1.s32 {%e4-%f4}, [%1 :128]!\n"
896 "bne 0b \n"
897 : "=r"(nn), // %0
898 "=r"(ptr) // %1
899 : "0"(nn),
900 "1"(ptr),
901 "w"(_c) // %4
902 : "cc", "memory");
903 }
904 #endif // __aarch64__
905 #endif // __ARM_NEON
906 for (; remain > 0; remain--)
907 {
908 *ptr++ = _v;
909 }
910 }
911
912 #if __ARM_NEON
fill(float32x4_t _v)913 inline void Mat::fill(float32x4_t _v)
914 {
915 int size = (int)total();
916 float* ptr = (float*)data;
917 for (int i = 0; i < size; i++)
918 {
919 vst1q_f32(ptr, _v);
920 ptr += 4;
921 }
922 }
923
fill(uint16x4_t _v)924 inline void Mat::fill(uint16x4_t _v)
925 {
926 int size = (int)total();
927 unsigned short* ptr = (unsigned short*)data;
928 for (int i = 0; i < size; i++)
929 {
930 vst1_u16(ptr, _v);
931 ptr += 4;
932 }
933 }
934
fill(int32x4_t _v)935 inline void Mat::fill(int32x4_t _v)
936 {
937 int size = (int)total();
938 int* ptr = (int*)data;
939 for (int i = 0; i < size; i++)
940 {
941 vst1q_s32(ptr, _v);
942 ptr += 4;
943 }
944 }
945
fill(int32x4_t _v0,int32x4_t _v1)946 inline void Mat::fill(int32x4_t _v0, int32x4_t _v1)
947 {
948 int size = (int)total();
949 int* ptr = (int*)data;
950 for (int i = 0; i < size; i++)
951 {
952 vst1q_s32(ptr, _v0);
953 vst1q_s32(ptr + 4, _v1);
954 ptr += 8;
955 }
956 }
957 #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
fill(float16x4_t _v)958 inline void Mat::fill(float16x4_t _v)
959 {
960 int size = (int)total();
961 __fp16* ptr = (__fp16*)data;
962 for (int i = 0; i < size; i++)
963 {
964 vst1_f16(ptr, _v);
965 ptr += 4;
966 }
967 }
968
fill(float16x8_t _v)969 inline void Mat::fill(float16x8_t _v)
970 {
971 int size = (int)total();
972 __fp16* ptr = (__fp16*)data;
973 for (int i = 0; i < size; i++)
974 {
975 vst1q_f16(ptr, _v);
976 ptr += 8;
977 }
978 }
979 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
980 #endif // __ARM_NEON
981 #if __AVX__
fill(__m256 _v)982 inline void Mat::fill(__m256 _v)
983 {
984 int size = (int)total();
985 float* ptr = (float*)data;
986 for (int i = 0; i < size; i++)
987 {
988 _mm256_storeu_ps(ptr, _v);
989 ptr += 8;
990 }
991 }
fill(__m128i _v)992 inline void Mat::fill(__m128i _v)
993 {
994 int size = (int)total();
995 unsigned short* ptr = (unsigned short*)data;
996 for (int i = 0; i < size; i++)
997 {
998 _mm_store_si128((__m128i*)ptr, _v);
999 ptr += 8;
1000 }
1001 }
1002 #endif // __AVX__
1003
1004 template<typename T>
fill(T _v)1005 inline void Mat::fill(T _v)
1006 {
1007 int size = (int)total();
1008 T* ptr = (T*)data;
1009 for (int i = 0; i < size; i++)
1010 {
1011 ptr[i] = _v;
1012 }
1013 }
1014
1015 inline Mat& Mat::operator=(const Mat& m)
1016 {
1017 if (this == &m)
1018 return *this;
1019
1020 if (m.refcount)
1021 NCNN_XADD(m.refcount, 1);
1022
1023 release();
1024
1025 data = m.data;
1026 refcount = m.refcount;
1027 elemsize = m.elemsize;
1028 elempack = m.elempack;
1029 allocator = m.allocator;
1030
1031 dims = m.dims;
1032 w = m.w;
1033 h = m.h;
1034 c = m.c;
1035
1036 cstep = m.cstep;
1037
1038 return *this;
1039 }
1040
addref()1041 inline void Mat::addref()
1042 {
1043 if (refcount)
1044 NCNN_XADD(refcount, 1);
1045 }
1046
release()1047 inline void Mat::release()
1048 {
1049 if (refcount && NCNN_XADD(refcount, -1) == 1)
1050 {
1051 if (allocator)
1052 allocator->fastFree(data);
1053 else
1054 fastFree(data);
1055 }
1056
1057 data = 0;
1058
1059 elemsize = 0;
1060 elempack = 0;
1061
1062 dims = 0;
1063 w = 0;
1064 h = 0;
1065 c = 0;
1066
1067 cstep = 0;
1068
1069 refcount = 0;
1070 }
1071
empty()1072 inline bool Mat::empty() const
1073 {
1074 return data == 0 || total() == 0;
1075 }
1076
total()1077 inline size_t Mat::total() const
1078 {
1079 return cstep * c;
1080 }
1081
elembits()1082 inline int Mat::elembits() const
1083 {
1084 return elempack ? static_cast<int>(elemsize * 8) / elempack : 0;
1085 }
1086
shape()1087 inline Mat Mat::shape() const
1088 {
1089 if (dims == 1)
1090 return Mat(w * elempack, (void*)0);
1091 if (dims == 2)
1092 return Mat(w, h * elempack, (void*)0);
1093 if (dims == 3)
1094 return Mat(w, h, c * elempack, (void*)0);
1095
1096 return Mat();
1097 }
1098
channel(int _c)1099 inline Mat Mat::channel(int _c)
1100 {
1101 return Mat(w, h, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator);
1102 }
1103
channel(int _c)1104 inline const Mat Mat::channel(int _c) const
1105 {
1106 return Mat(w, h, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator);
1107 }
1108
row(int y)1109 inline float* Mat::row(int y)
1110 {
1111 return (float*)((unsigned char*)data + (size_t)w * y * elemsize);
1112 }
1113
row(int y)1114 inline const float* Mat::row(int y) const
1115 {
1116 return (const float*)((unsigned char*)data + (size_t)w * y * elemsize);
1117 }
1118
1119 template<typename T>
row(int y)1120 inline T* Mat::row(int y)
1121 {
1122 return (T*)((unsigned char*)data + (size_t)w * y * elemsize);
1123 }
1124
1125 template<typename T>
row(int y)1126 inline const T* Mat::row(int y) const
1127 {
1128 return (const T*)((unsigned char*)data + (size_t)w * y * elemsize);
1129 }
1130
channel_range(int _c,int channels)1131 inline Mat Mat::channel_range(int _c, int channels)
1132 {
1133 return Mat(w, h, channels, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator);
1134 }
1135
channel_range(int _c,int channels)1136 inline const Mat Mat::channel_range(int _c, int channels) const
1137 {
1138 return Mat(w, h, channels, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator);
1139 }
1140
row_range(int y,int rows)1141 inline Mat Mat::row_range(int y, int rows)
1142 {
1143 return Mat(w, rows, (unsigned char*)data + (size_t)w * y * elemsize, elemsize, elempack, allocator);
1144 }
1145
row_range(int y,int rows)1146 inline const Mat Mat::row_range(int y, int rows) const
1147 {
1148 return Mat(w, rows, (unsigned char*)data + (size_t)w * y * elemsize, elemsize, elempack, allocator);
1149 }
1150
range(int x,int n)1151 inline Mat Mat::range(int x, int n)
1152 {
1153 return Mat(n, (unsigned char*)data + x * elemsize, elemsize, elempack, allocator);
1154 }
1155
range(int x,int n)1156 inline const Mat Mat::range(int x, int n) const
1157 {
1158 return Mat(n, (unsigned char*)data + x * elemsize, elemsize, elempack, allocator);
1159 }
1160
1161 template<typename T>
1162 inline Mat::operator T*()
1163 {
1164 return (T*)data;
1165 }
1166
1167 template<typename T>
1168 inline Mat::operator const T*() const
1169 {
1170 return (const T*)data;
1171 }
1172
1173 inline float& Mat::operator[](size_t i)
1174 {
1175 return ((float*)data)[i];
1176 }
1177
1178 inline const float& Mat::operator[](size_t i) const
1179 {
1180 return ((const float*)data)[i];
1181 }
1182
1183 #if NCNN_VULKAN
1184
VkMat()1185 inline VkMat::VkMat()
1186 : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
1187 {
1188 }
1189
VkMat(int _w,size_t _elemsize,VkAllocator * _allocator)1190 inline VkMat::VkMat(int _w, size_t _elemsize, VkAllocator* _allocator)
1191 : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
1192 {
1193 create(_w, _elemsize, _allocator);
1194 }
1195
VkMat(int _w,int _h,size_t _elemsize,VkAllocator * _allocator)1196 inline VkMat::VkMat(int _w, int _h, size_t _elemsize, VkAllocator* _allocator)
1197 : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
1198 {
1199 create(_w, _h, _elemsize, _allocator);
1200 }
1201
VkMat(int _w,int _h,int _c,size_t _elemsize,VkAllocator * _allocator)1202 inline VkMat::VkMat(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator)
1203 : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
1204 {
1205 create(_w, _h, _c, _elemsize, _allocator);
1206 }
1207
VkMat(int _w,size_t _elemsize,int _elempack,VkAllocator * _allocator)1208 inline VkMat::VkMat(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1209 : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
1210 {
1211 create(_w, _elemsize, _elempack, _allocator);
1212 }
1213
VkMat(int _w,int _h,size_t _elemsize,int _elempack,VkAllocator * _allocator)1214 inline VkMat::VkMat(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1215 : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
1216 {
1217 create(_w, _h, _elemsize, _elempack, _allocator);
1218 }
1219
VkMat(int _w,int _h,int _c,size_t _elemsize,int _elempack,VkAllocator * _allocator)1220 inline VkMat::VkMat(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1221 : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
1222 {
1223 create(_w, _h, _c, _elemsize, _elempack, _allocator);
1224 }
1225
VkMat(const VkMat & m)1226 inline VkMat::VkMat(const VkMat& m)
1227 : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), c(m.c)
1228 {
1229 addref();
1230
1231 cstep = m.cstep;
1232 }
1233
VkMat(int _w,VkBufferMemory * _data,size_t _elemsize,VkAllocator * _allocator)1234 inline VkMat::VkMat(int _w, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator)
1235 : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), c(1)
1236 {
1237 cstep = w;
1238 }
1239
VkMat(int _w,int _h,VkBufferMemory * _data,size_t _elemsize,VkAllocator * _allocator)1240 inline VkMat::VkMat(int _w, int _h, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator)
1241 : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), c(1)
1242 {
1243 cstep = w * h;
1244 }
1245
VkMat(int _w,int _h,int _c,VkBufferMemory * _data,size_t _elemsize,VkAllocator * _allocator)1246 inline VkMat::VkMat(int _w, int _h, int _c, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator)
1247 : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), c(_c)
1248 {
1249 cstep = alignSize(w * h * elemsize, 16) / elemsize;
1250 }
1251
VkMat(int _w,VkBufferMemory * _data,size_t _elemsize,int _elempack,VkAllocator * _allocator)1252 inline VkMat::VkMat(int _w, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1253 : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), c(1)
1254 {
1255 cstep = w;
1256 }
1257
VkMat(int _w,int _h,VkBufferMemory * _data,size_t _elemsize,int _elempack,VkAllocator * _allocator)1258 inline VkMat::VkMat(int _w, int _h, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1259 : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), c(1)
1260 {
1261 cstep = w * h;
1262 }
1263
VkMat(int _w,int _h,int _c,VkBufferMemory * _data,size_t _elemsize,int _elempack,VkAllocator * _allocator)1264 inline VkMat::VkMat(int _w, int _h, int _c, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1265 : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), c(_c)
1266 {
1267 cstep = alignSize(w * h * elemsize, 16) / elemsize;
1268 }
1269
~VkMat()1270 inline VkMat::~VkMat()
1271 {
1272 release();
1273 }
1274
1275 inline VkMat& VkMat::operator=(const VkMat& m)
1276 {
1277 if (this == &m)
1278 return *this;
1279
1280 if (m.refcount)
1281 NCNN_XADD(m.refcount, 1);
1282
1283 release();
1284
1285 data = m.data;
1286 refcount = m.refcount;
1287 elemsize = m.elemsize;
1288 elempack = m.elempack;
1289 allocator = m.allocator;
1290
1291 dims = m.dims;
1292 w = m.w;
1293 h = m.h;
1294 c = m.c;
1295
1296 cstep = m.cstep;
1297
1298 return *this;
1299 }
1300
mapped()1301 inline Mat VkMat::mapped() const
1302 {
1303 if (!allocator->mappable)
1304 return Mat();
1305
1306 if (dims == 1)
1307 return Mat(w, mapped_ptr(), elemsize, elempack, 0);
1308
1309 if (dims == 2)
1310 return Mat(w, h, mapped_ptr(), elemsize, elempack, 0);
1311
1312 if (dims == 3)
1313 return Mat(w, h, c, mapped_ptr(), elemsize, elempack, 0);
1314
1315 return Mat();
1316 }
1317
mapped_ptr()1318 inline void* VkMat::mapped_ptr() const
1319 {
1320 if (!allocator->mappable)
1321 return 0;
1322
1323 return (unsigned char*)data->mapped_ptr + data->offset;
1324 }
1325
addref()1326 inline void VkMat::addref()
1327 {
1328 if (refcount)
1329 NCNN_XADD(refcount, 1);
1330 }
1331
release()1332 inline void VkMat::release()
1333 {
1334 if (refcount && NCNN_XADD(refcount, -1) == 1)
1335 {
1336 if (allocator && data)
1337 {
1338 allocator->fastFree(data);
1339 }
1340 }
1341
1342 data = 0;
1343
1344 elemsize = 0;
1345 elempack = 0;
1346
1347 dims = 0;
1348 w = 0;
1349 h = 0;
1350 c = 0;
1351
1352 cstep = 0;
1353
1354 refcount = 0;
1355 }
1356
empty()1357 inline bool VkMat::empty() const
1358 {
1359 return data == 0 || total() == 0;
1360 }
1361
total()1362 inline size_t VkMat::total() const
1363 {
1364 return cstep * c;
1365 }
1366
elembits()1367 inline int VkMat::elembits() const
1368 {
1369 return elempack ? elemsize * 8 / elempack : 0;
1370 }
1371
shape()1372 inline Mat VkMat::shape() const
1373 {
1374 if (dims == 1)
1375 return Mat(w * elempack, (void*)0);
1376 if (dims == 2)
1377 return Mat(w, h * elempack, (void*)0);
1378 if (dims == 3)
1379 return Mat(w, h, c * elempack, (void*)0);
1380
1381 return Mat();
1382 }
1383
buffer()1384 inline VkBuffer VkMat::buffer() const
1385 {
1386 return data->buffer;
1387 }
1388
buffer_offset()1389 inline size_t VkMat::buffer_offset() const
1390 {
1391 return data->offset;
1392 }
1393
buffer_capacity()1394 inline size_t VkMat::buffer_capacity() const
1395 {
1396 return data->capacity;
1397 }
1398
VkImageMat()1399 inline VkImageMat::VkImageMat()
1400 : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0)
1401 {
1402 }
1403
VkImageMat(int _w,size_t _elemsize,VkAllocator * _allocator)1404 inline VkImageMat::VkImageMat(int _w, size_t _elemsize, VkAllocator* _allocator)
1405 : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0)
1406 {
1407 create(_w, _elemsize, _allocator);
1408 }
1409
VkImageMat(int _w,int _h,size_t _elemsize,VkAllocator * _allocator)1410 inline VkImageMat::VkImageMat(int _w, int _h, size_t _elemsize, VkAllocator* _allocator)
1411 : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0)
1412 {
1413 create(_w, _h, _elemsize, _allocator);
1414 }
1415
VkImageMat(int _w,int _h,int _c,size_t _elemsize,VkAllocator * _allocator)1416 inline VkImageMat::VkImageMat(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator)
1417 : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0)
1418 {
1419 create(_w, _h, _c, _elemsize, _allocator);
1420 }
1421
VkImageMat(int _w,size_t _elemsize,int _elempack,VkAllocator * _allocator)1422 inline VkImageMat::VkImageMat(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1423 : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0)
1424 {
1425 create(_w, _elemsize, _elempack, _allocator);
1426 }
1427
VkImageMat(int _w,int _h,size_t _elemsize,int _elempack,VkAllocator * _allocator)1428 inline VkImageMat::VkImageMat(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1429 : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0)
1430 {
1431 create(_w, _h, _elemsize, _elempack, _allocator);
1432 }
1433
VkImageMat(int _w,int _h,int _c,size_t _elemsize,int _elempack,VkAllocator * _allocator)1434 inline VkImageMat::VkImageMat(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1435 : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0)
1436 {
1437 create(_w, _h, _c, _elemsize, _elempack, _allocator);
1438 }
1439
VkImageMat(const VkImageMat & m)1440 inline VkImageMat::VkImageMat(const VkImageMat& m)
1441 : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), c(m.c)
1442 {
1443 addref();
1444 }
1445
VkImageMat(int _w,VkImageMemory * _data,size_t _elemsize,VkAllocator * _allocator)1446 inline VkImageMat::VkImageMat(int _w, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator)
1447 : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), c(1)
1448 {
1449 }
1450
VkImageMat(int _w,int _h,VkImageMemory * _data,size_t _elemsize,VkAllocator * _allocator)1451 inline VkImageMat::VkImageMat(int _w, int _h, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator)
1452 : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), c(1)
1453 {
1454 }
1455
VkImageMat(int _w,int _h,int _c,VkImageMemory * _data,size_t _elemsize,VkAllocator * _allocator)1456 inline VkImageMat::VkImageMat(int _w, int _h, int _c, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator)
1457 : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), c(_c)
1458 {
1459 }
1460
VkImageMat(int _w,VkImageMemory * _data,size_t _elemsize,int _elempack,VkAllocator * _allocator)1461 inline VkImageMat::VkImageMat(int _w, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1462 : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), c(1)
1463 {
1464 }
1465
VkImageMat(int _w,int _h,VkImageMemory * _data,size_t _elemsize,int _elempack,VkAllocator * _allocator)1466 inline VkImageMat::VkImageMat(int _w, int _h, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1467 : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), c(1)
1468 {
1469 }
1470
VkImageMat(int _w,int _h,int _c,VkImageMemory * _data,size_t _elemsize,int _elempack,VkAllocator * _allocator)1471 inline VkImageMat::VkImageMat(int _w, int _h, int _c, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1472 : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), c(_c)
1473 {
1474 }
1475
~VkImageMat()1476 inline VkImageMat::~VkImageMat()
1477 {
1478 release();
1479 }
1480
1481 inline VkImageMat& VkImageMat::operator=(const VkImageMat& m)
1482 {
1483 if (this == &m)
1484 return *this;
1485
1486 if (m.refcount)
1487 NCNN_XADD(m.refcount, 1);
1488
1489 release();
1490
1491 data = m.data;
1492 refcount = m.refcount;
1493 elemsize = m.elemsize;
1494 elempack = m.elempack;
1495 allocator = m.allocator;
1496
1497 dims = m.dims;
1498 w = m.w;
1499 h = m.h;
1500 c = m.c;
1501
1502 return *this;
1503 }
1504
mapped()1505 inline Mat VkImageMat::mapped() const
1506 {
1507 if (!allocator->mappable || !data->mapped_ptr)
1508 return Mat();
1509
1510 if (dims == 1)
1511 return Mat(w, mapped_ptr(), elemsize, elempack, 0);
1512
1513 if (dims == 2)
1514 return Mat(w, h, mapped_ptr(), elemsize, elempack, 0);
1515
1516 if (dims == 3)
1517 return Mat(w, h, c, mapped_ptr(), elemsize, elempack, 0);
1518
1519 return Mat();
1520 }
1521
mapped_ptr()1522 inline void* VkImageMat::mapped_ptr() const
1523 {
1524 if (!allocator->mappable || !data->mapped_ptr)
1525 return 0;
1526
1527 return (unsigned char*)data->mapped_ptr + data->bind_offset;
1528 }
1529
addref()1530 inline void VkImageMat::addref()
1531 {
1532 if (refcount)
1533 NCNN_XADD(refcount, 1);
1534 }
1535
release()1536 inline void VkImageMat::release()
1537 {
1538 if (refcount && NCNN_XADD(refcount, -1) == 1)
1539 {
1540 if (allocator && data)
1541 {
1542 allocator->fastFree(data);
1543 }
1544 }
1545
1546 data = 0;
1547
1548 elemsize = 0;
1549 elempack = 0;
1550
1551 dims = 0;
1552 w = 0;
1553 h = 0;
1554 c = 0;
1555
1556 refcount = 0;
1557 }
1558
empty()1559 inline bool VkImageMat::empty() const
1560 {
1561 return data == 0 || total() == 0;
1562 }
1563
total()1564 inline size_t VkImageMat::total() const
1565 {
1566 return w * h * c;
1567 }
1568
elembits()1569 inline int VkImageMat::elembits() const
1570 {
1571 return elempack ? elemsize * 8 / elempack : 0;
1572 }
1573
shape()1574 inline Mat VkImageMat::shape() const
1575 {
1576 if (dims == 1)
1577 return Mat(w * elempack, (void*)0);
1578 if (dims == 2)
1579 return Mat(w, h * elempack, (void*)0);
1580 if (dims == 3)
1581 return Mat(w, h, c * elempack, (void*)0);
1582
1583 return Mat();
1584 }
1585
image()1586 inline VkImage VkImageMat::image() const
1587 {
1588 return data->image;
1589 }
1590
imageview()1591 inline VkImageView VkImageMat::imageview() const
1592 {
1593 return data->imageview;
1594 }
1595
1596 #endif // NCNN_VULKAN
1597
1598 } // namespace ncnn
1599
1600 #endif // NCNN_MAT_H
1601