1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 #ifndef NCNN_MAT_H
16 #define NCNN_MAT_H
17 
18 #include <stdlib.h>
19 #include <string.h>
20 #if __ARM_NEON
21 #include <arm_neon.h>
22 #endif
23 #if __AVX__
24 #include <immintrin.h>
25 #endif
26 
27 #include "allocator.h"
28 #include "option.h"
29 #include "platform.h"
30 
31 #if NCNN_VULKAN
32 #include <vulkan/vulkan.h>
33 #endif // NCNN_VULKAN
34 
35 #if NCNN_PIXEL
36 #if NCNN_PLATFORM_API
37 #if __ANDROID_API__ >= 9
38 #include <android/bitmap.h>
39 #include <jni.h>
40 #endif // __ANDROID_API__ >= 9
41 #endif // NCNN_PLATFORM_API
42 #endif // NCNN_PIXEL
43 
44 namespace ncnn {
45 
46 #if NCNN_VULKAN
47 class VkMat;
48 class VkImageMat;
49 #endif // NCNN_VULKAN
50 
51 // the three dimension matrix
52 class NCNN_EXPORT Mat
53 {
54 public:
55     // empty
56     Mat();
57     // vec
58     Mat(int w, size_t elemsize = 4u, Allocator* allocator = 0);
59     // image
60     Mat(int w, int h, size_t elemsize = 4u, Allocator* allocator = 0);
61     // dim
62     Mat(int w, int h, int c, size_t elemsize = 4u, Allocator* allocator = 0);
63     // packed vec
64     Mat(int w, size_t elemsize, int elempack, Allocator* allocator = 0);
65     // packed image
66     Mat(int w, int h, size_t elemsize, int elempack, Allocator* allocator = 0);
67     // packed dim
68     Mat(int w, int h, int c, size_t elemsize, int elempack, Allocator* allocator = 0);
69     // copy
70     Mat(const Mat& m);
71     // external vec
72     Mat(int w, void* data, size_t elemsize = 4u, Allocator* allocator = 0);
73     // external image
74     Mat(int w, int h, void* data, size_t elemsize = 4u, Allocator* allocator = 0);
75     // external dim
76     Mat(int w, int h, int c, void* data, size_t elemsize = 4u, Allocator* allocator = 0);
77     // external packed vec
78     Mat(int w, void* data, size_t elemsize, int elempack, Allocator* allocator = 0);
79     // external packed image
80     Mat(int w, int h, void* data, size_t elemsize, int elempack, Allocator* allocator = 0);
81     // external packed dim
82     Mat(int w, int h, int c, void* data, size_t elemsize, int elempack, Allocator* allocator = 0);
83     // release
84     ~Mat();
85     // assign
86     Mat& operator=(const Mat& m);
87     // set all
88     void fill(float v);
89     void fill(int v);
90 #if __ARM_NEON
91     void fill(float32x4_t _v);
92     void fill(uint16x4_t _v);
93     void fill(int32x4_t _v);
94     void fill(int32x4_t _v0, int32x4_t _v1);
95 #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
96     void fill(float16x4_t _v);
97     void fill(float16x8_t _v);
98 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
99 #endif // __ARM_NEON
100 #if __AVX__
101     void fill(__m256 _v);
102     void fill(__m128i _v);
103 #endif // __AVX__
104     template<typename T>
105     void fill(T v);
106     // deep copy
107     Mat clone(Allocator* allocator = 0) const;
108     // deep copy from other mat, inplace
109     void clone_from(const ncnn::Mat& mat, Allocator* allocator = 0);
110     // reshape vec
111     Mat reshape(int w, Allocator* allocator = 0) const;
112     // reshape image
113     Mat reshape(int w, int h, Allocator* allocator = 0) const;
114     // reshape dim
115     Mat reshape(int w, int h, int c, Allocator* allocator = 0) const;
116     // allocate vec
117     void create(int w, size_t elemsize = 4u, Allocator* allocator = 0);
118     // allocate image
119     void create(int w, int h, size_t elemsize = 4u, Allocator* allocator = 0);
120     // allocate dim
121     void create(int w, int h, int c, size_t elemsize = 4u, Allocator* allocator = 0);
122     // allocate packed vec
123     void create(int w, size_t elemsize, int elempack, Allocator* allocator = 0);
124     // allocate packed image
125     void create(int w, int h, size_t elemsize, int elempack, Allocator* allocator = 0);
126     // allocate packed dim
127     void create(int w, int h, int c, size_t elemsize, int elempack, Allocator* allocator = 0);
128     // allocate like
129     void create_like(const Mat& m, Allocator* allocator = 0);
130 #if NCNN_VULKAN
131     // allocate like
132     void create_like(const VkMat& m, Allocator* allocator = 0);
133     // allocate like
134     void create_like(const VkImageMat& im, Allocator* allocator = 0);
135 #endif // NCNN_VULKAN
136     // refcount++
137     void addref();
138     // refcount--
139     void release();
140 
141     bool empty() const;
142     size_t total() const;
143 
144     // bits per element
145     int elembits() const;
146 
147     // shape only
148     Mat shape() const;
149 
150     // data reference
151     Mat channel(int c);
152     const Mat channel(int c) const;
153     float* row(int y);
154     const float* row(int y) const;
155     template<typename T>
156     T* row(int y);
157     template<typename T>
158     const T* row(int y) const;
159 
160     // range reference
161     Mat channel_range(int c, int channels);
162     const Mat channel_range(int c, int channels) const;
163     Mat row_range(int y, int rows);
164     const Mat row_range(int y, int rows) const;
165     Mat range(int x, int n);
166     const Mat range(int x, int n) const;
167 
168     // access raw data
169     template<typename T>
170     operator T*();
171     template<typename T>
172     operator const T*() const;
173 
174     // convenient access float vec element
175     float& operator[](size_t i);
176     const float& operator[](size_t i) const;
177 
178 #if NCNN_PIXEL
179     enum PixelType
180     {
181         PIXEL_CONVERT_SHIFT = 16,
182         PIXEL_FORMAT_MASK = 0x0000ffff,
183         PIXEL_CONVERT_MASK = 0xffff0000,
184 
185         PIXEL_RGB = 1,
186         PIXEL_BGR = 2,
187         PIXEL_GRAY = 3,
188         PIXEL_RGBA = 4,
189         PIXEL_BGRA = 5,
190 
191         PIXEL_RGB2BGR = PIXEL_RGB | (PIXEL_BGR << PIXEL_CONVERT_SHIFT),
192         PIXEL_RGB2GRAY = PIXEL_RGB | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT),
193         PIXEL_RGB2RGBA = PIXEL_RGB | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT),
194         PIXEL_RGB2BGRA = PIXEL_RGB | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT),
195 
196         PIXEL_BGR2RGB = PIXEL_BGR | (PIXEL_RGB << PIXEL_CONVERT_SHIFT),
197         PIXEL_BGR2GRAY = PIXEL_BGR | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT),
198         PIXEL_BGR2RGBA = PIXEL_BGR | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT),
199         PIXEL_BGR2BGRA = PIXEL_BGR | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT),
200 
201         PIXEL_GRAY2RGB = PIXEL_GRAY | (PIXEL_RGB << PIXEL_CONVERT_SHIFT),
202         PIXEL_GRAY2BGR = PIXEL_GRAY | (PIXEL_BGR << PIXEL_CONVERT_SHIFT),
203         PIXEL_GRAY2RGBA = PIXEL_GRAY | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT),
204         PIXEL_GRAY2BGRA = PIXEL_GRAY | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT),
205 
206         PIXEL_RGBA2RGB = PIXEL_RGBA | (PIXEL_RGB << PIXEL_CONVERT_SHIFT),
207         PIXEL_RGBA2BGR = PIXEL_RGBA | (PIXEL_BGR << PIXEL_CONVERT_SHIFT),
208         PIXEL_RGBA2GRAY = PIXEL_RGBA | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT),
209         PIXEL_RGBA2BGRA = PIXEL_RGBA | (PIXEL_BGRA << PIXEL_CONVERT_SHIFT),
210 
211         PIXEL_BGRA2RGB = PIXEL_BGRA | (PIXEL_RGB << PIXEL_CONVERT_SHIFT),
212         PIXEL_BGRA2BGR = PIXEL_BGRA | (PIXEL_BGR << PIXEL_CONVERT_SHIFT),
213         PIXEL_BGRA2GRAY = PIXEL_BGRA | (PIXEL_GRAY << PIXEL_CONVERT_SHIFT),
214         PIXEL_BGRA2RGBA = PIXEL_BGRA | (PIXEL_RGBA << PIXEL_CONVERT_SHIFT),
215     };
216     // convenient construct from pixel data
217     static Mat from_pixels(const unsigned char* pixels, int type, int w, int h, Allocator* allocator = 0);
218     // convenient construct from pixel data with stride(bytes-per-row) parameter
219     static Mat from_pixels(const unsigned char* pixels, int type, int w, int h, int stride, Allocator* allocator = 0);
220     // convenient construct from pixel data and resize to specific size
221     static Mat from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int target_width, int target_height, Allocator* allocator = 0);
222     // convenient construct from pixel data and resize to specific size with stride(bytes-per-row) parameter
223     static Mat from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int stride, int target_width, int target_height, Allocator* allocator = 0);
224     // convenient construct from pixel data roi
225     static Mat from_pixels_roi(const unsigned char* pixels, int type, int w, int h, int roix, int roiy, int roiw, int roih, Allocator* allocator = 0);
226     // convenient construct from pixel data roi with stride(bytes-per-row) parameter
227     static Mat from_pixels_roi(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, Allocator* allocator = 0);
228     // convenient construct from pixel data roi and resize to specific size
229     static Mat from_pixels_roi_resize(const unsigned char* pixels, int type, int w, int h, int roix, int roiy, int roiw, int roih, int target_width, int target_height, Allocator* allocator = 0);
230     // convenient construct from pixel data roi and resize to specific size with stride(bytes-per-row) parameter
231     static Mat from_pixels_roi_resize(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, int target_width, int target_height, Allocator* allocator = 0);
232 
233     // convenient export to pixel data
234     void to_pixels(unsigned char* pixels, int type) const;
235     // convenient export to pixel data with stride(bytes-per-row) parameter
236     void to_pixels(unsigned char* pixels, int type, int stride) const;
237     // convenient export to pixel data and resize to specific size
238     void to_pixels_resize(unsigned char* pixels, int type, int target_width, int target_height) const;
239     // convenient export to pixel data and resize to specific size with stride(bytes-per-row) parameter
240     void to_pixels_resize(unsigned char* pixels, int type, int target_width, int target_height, int target_stride) const;
241 
242 #if NCNN_PLATFORM_API
243 #if __ANDROID_API__ >= 9
244     // convenient construct from android Bitmap
245     static Mat from_android_bitmap(JNIEnv* env, jobject bitmap, int type_to, Allocator* allocator = 0);
246     // convenient construct from android Bitmap and resize to specific size
247     static Mat from_android_bitmap_resize(JNIEnv* env, jobject bitmap, int type_to, int target_width, int target_height, Allocator* allocator = 0);
248     // convenient construct from android Bitmap roi
249     static Mat from_android_bitmap_roi(JNIEnv* env, jobject bitmap, int type_to, int roix, int roiy, int roiw, int roih, Allocator* allocator = 0);
250     // convenient construct from android Bitmap roi and resize to specific size
251     static Mat from_android_bitmap_roi_resize(JNIEnv* env, jobject bitmap, int type_to, int roix, int roiy, int roiw, int roih, int target_width, int target_height, Allocator* allocator = 0);
252     // convenient export to android Bitmap and resize to the android Bitmap size
253     void to_android_bitmap(JNIEnv* env, jobject bitmap, int type_from) const;
254 #endif // __ANDROID_API__ >= 9
255 #endif // NCNN_PLATFORM_API
256 #endif // NCNN_PIXEL
257 
258     // substract channel-wise mean values, then multiply by normalize values, pass 0 to skip
259     void substract_mean_normalize(const float* mean_vals, const float* norm_vals);
260 
261     // convenient construct from half precision floating point data
262     static Mat from_float16(const unsigned short* data, int size);
263 
264     // pointer to the data
265     void* data;
266 
267     // pointer to the reference counter
268     // when points to user-allocated data, the pointer is NULL
269     int* refcount;
270 
271     // element size in bytes
272     // 4 = float32/int32
273     // 2 = float16
274     // 1 = int8/uint8
275     // 0 = empty
276     size_t elemsize;
277 
278     // packed count inside element
279     // c/1-h-w-1  h/1-w-1  w/1-1  scalar
280     // c/4-h-w-4  h/4-w-4  w/4-4  sse/neon
281     // c/8-h-w-8  h/8-w-8  w/8-8  avx/fp16
282     int elempack;
283 
284     // the allocator
285     Allocator* allocator;
286 
287     // the dimension rank
288     int dims;
289 
290     int w;
291     int h;
292     int c;
293 
294     size_t cstep;
295 };
296 
297 #if NCNN_VULKAN
298 
299 // the three dimension matrix, vulkan version
300 class NCNN_EXPORT VkMat
301 {
302 public:
303     // empty
304     VkMat();
305     // vec
306     VkMat(int w, size_t elemsize, VkAllocator* allocator);
307     // image
308     VkMat(int w, int h, size_t elemsize, VkAllocator* allocator);
309     // dim
310     VkMat(int w, int h, int c, size_t elemsize, VkAllocator* allocator);
311     // packed vec
312     VkMat(int w, size_t elemsize, int elempack, VkAllocator* allocator);
313     // packed image
314     VkMat(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator);
315     // packed dim
316     VkMat(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator);
317     // copy
318     VkMat(const VkMat& m);
319     // external vec
320     VkMat(int w, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator);
321     // external image
322     VkMat(int w, int h, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator);
323     // external dim
324     VkMat(int w, int h, int c, VkBufferMemory* data, size_t elemsize, VkAllocator* allocator);
325     // external packed vec
326     VkMat(int w, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator);
327     // external packed image
328     VkMat(int w, int h, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator);
329     // external packed dim
330     VkMat(int w, int h, int c, VkBufferMemory* data, size_t elemsize, int elempack, VkAllocator* allocator);
331     // release
332     ~VkMat();
333     // assign
334     VkMat& operator=(const VkMat& m);
335     // allocate vec
336     void create(int w, size_t elemsize, VkAllocator* allocator);
337     // allocate image
338     void create(int w, int h, size_t elemsize, VkAllocator* allocator);
339     // allocate dim
340     void create(int w, int h, int c, size_t elemsize, VkAllocator* allocator);
341     // allocate packed vec
342     void create(int w, size_t elemsize, int elempack, VkAllocator* allocator);
343     // allocate packed image
344     void create(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator);
345     // allocate packed dim
346     void create(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator);
347     // allocate like
348     void create_like(const Mat& m, VkAllocator* allocator);
349     // allocate like
350     void create_like(const VkMat& m, VkAllocator* allocator);
351     // allocate like
352     void create_like(const VkImageMat& im, VkAllocator* allocator);
353 
354     // mapped
355     Mat mapped() const;
356     void* mapped_ptr() const;
357 
358     // refcount++
359     void addref();
360     // refcount--
361     void release();
362 
363     bool empty() const;
364     size_t total() const;
365 
366     // bits per element
367     int elembits() const;
368 
369     // shape only
370     Mat shape() const;
371 
372     // low-level reference
373     VkBuffer buffer() const;
374     size_t buffer_offset() const;
375     size_t buffer_capacity() const;
376 
377     // device buffer
378     VkBufferMemory* data;
379 
380     // pointer to the reference counter
381     // when points to user-allocated data, the pointer is NULL
382     int* refcount;
383 
384     // element size in bytes
385     // 4 = float32/int32
386     // 2 = float16
387     // 1 = int8/uint8
388     // 0 = empty
389     size_t elemsize;
390 
391     // packed count inside element
392     // c/1-h-w-1  h/1-w-1  w/1-1  scalar
393     // c/4-h-w-4  h/4-w-4  w/4-4  sse/neon
394     // c/8-h-w-8  h/8-w-8  w/8-8  avx/fp16
395     int elempack;
396 
397     // the allocator
398     VkAllocator* allocator;
399 
400     // the dimension rank
401     int dims;
402 
403     int w;
404     int h;
405     int c;
406 
407     size_t cstep;
408 };
409 
410 class NCNN_EXPORT VkImageMat
411 {
412 public:
413     // empty
414     VkImageMat();
415     // vec
416     VkImageMat(int w, size_t elemsize, VkAllocator* allocator);
417     // image
418     VkImageMat(int w, int h, size_t elemsize, VkAllocator* allocator);
419     // dim
420     VkImageMat(int w, int h, int c, size_t elemsize, VkAllocator* allocator);
421     // packed vec
422     VkImageMat(int w, size_t elemsize, int elempack, VkAllocator* allocator);
423     // packed image
424     VkImageMat(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator);
425     // packed dim
426     VkImageMat(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator);
427     // copy
428     VkImageMat(const VkImageMat& m);
429     // external vec
430     VkImageMat(int w, VkImageMemory* data, size_t elemsize, VkAllocator* allocator);
431     // external image
432     VkImageMat(int w, int h, VkImageMemory* data, size_t elemsize, VkAllocator* allocator);
433     // external dim
434     VkImageMat(int w, int h, int c, VkImageMemory* data, size_t elemsize, VkAllocator* allocator);
435     // external packed vec
436     VkImageMat(int w, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator);
437     // external packed image
438     VkImageMat(int w, int h, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator);
439     // external packed dim
440     VkImageMat(int w, int h, int c, VkImageMemory* data, size_t elemsize, int elempack, VkAllocator* allocator);
441     // release
442     ~VkImageMat();
443     // assign
444     VkImageMat& operator=(const VkImageMat& m);
445     // allocate vec
446     void create(int w, size_t elemsize, VkAllocator* allocator);
447     // allocate image
448     void create(int w, int h, size_t elemsize, VkAllocator* allocator);
449     // allocate dim
450     void create(int w, int h, int c, size_t elemsize, VkAllocator* allocator);
451     // allocate packed vec
452     void create(int w, size_t elemsize, int elempack, VkAllocator* allocator);
453     // allocate packed image
454     void create(int w, int h, size_t elemsize, int elempack, VkAllocator* allocator);
455     // allocate packed dim
456     void create(int w, int h, int c, size_t elemsize, int elempack, VkAllocator* allocator);
457     // allocate like
458     void create_like(const Mat& m, VkAllocator* allocator);
459     // allocate like
460     void create_like(const VkMat& m, VkAllocator* allocator);
461     // allocate like
462     void create_like(const VkImageMat& im, VkAllocator* allocator);
463 
464     // mapped
465     Mat mapped() const;
466     void* mapped_ptr() const;
467 
468     // refcount++
469     void addref();
470     // refcount--
471     void release();
472 
473     bool empty() const;
474     size_t total() const;
475 
476     // bits per element
477     int elembits() const;
478 
479     // shape only
480     Mat shape() const;
481 
482     // low-level reference
483     VkImage image() const;
484     VkImageView imageview() const;
485 
486 #if NCNN_PLATFORM_API
487 #if __ANDROID_API__ >= 26
488     // convenient construct from android hardware buffer
489     static VkImageMat from_android_hardware_buffer(VkAndroidHardwareBufferImageAllocator* allocator);
490 #endif // __ANDROID_API__ >= 26
491 #endif // NCNN_PLATFORM_API
492 
493     // device image
494     VkImageMemory* data;
495 
496     // pointer to the reference counter
497     // when points to user-allocated data, the pointer is NULL
498     int* refcount;
499 
500     // element size in bytes
501     // 4 = float32/int32
502     // 2 = float16
503     // 1 = int8/uint8
504     // 0 = empty
505     size_t elemsize;
506 
507     // packed count inside element
508     // c/1-h-w-1  h/1-w-1  w/1-1  scalar
509     // c/4-h-w-4  h/4-w-4  w/4-4  sse/neon
510     // c/8-h-w-8  h/8-w-8  w/8-8  avx/fp16
511     int elempack;
512 
513     // the allocator
514     VkAllocator* allocator;
515 
516     // the dimension rank
517     int dims;
518 
519     int w;
520     int h;
521     int c;
522 };
523 
524 // type for vulkan specialization constant and push constant
525 union vk_specialization_type
526 {
527     int i;
528     float f;
529     uint32_t u32;
530 };
531 union vk_constant_type
532 {
533     int i;
534     float f;
535 };
536 #endif // NCNN_VULKAN
537 
538 // misc function
539 #if NCNN_PIXEL
540 // convert yuv420sp(nv21) to rgb, the fast approximate version
541 NCNN_EXPORT void yuv420sp2rgb(const unsigned char* yuv420sp, int w, int h, unsigned char* rgb);
542 // convert yuv420sp(nv12) to rgb, the fast approximate version
543 NCNN_EXPORT void yuv420sp2rgb_nv12(const unsigned char* yuv420sp, int w, int h, unsigned char* rgb);
544 // convert yuv420sp(nv21) to rgb with half resize, the faster approximate version
545 NCNN_EXPORT void yuv420sp2rgb_half(const unsigned char* yuv420sp, int w, int h, unsigned char* rgb);
546 // image pixel bilinear resize
547 NCNN_EXPORT void resize_bilinear_c1(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h);
548 NCNN_EXPORT void resize_bilinear_c2(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h);
549 NCNN_EXPORT void resize_bilinear_c3(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h);
550 NCNN_EXPORT void resize_bilinear_c4(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h);
551 // image pixel bilinear resize with stride(bytes-per-row) parameter
552 NCNN_EXPORT void resize_bilinear_c1(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride);
553 NCNN_EXPORT void resize_bilinear_c2(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride);
554 NCNN_EXPORT void resize_bilinear_c3(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride);
555 NCNN_EXPORT void resize_bilinear_c4(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride);
556 // image pixel bilinear resize, convenient wrapper for yuv420sp(nv21/nv12)
557 NCNN_EXPORT void resize_bilinear_yuv420sp(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h);
558 #endif // NCNN_PIXEL
559 #if NCNN_PIXEL_ROTATE
560 // type is the from type, 6 means rotating from 6 to 1
561 //
562 //     1        2       3      4         5            6           7          8
563 //
564 //   888888  888888      88  88      8888888888  88                  88  8888888888
565 //   88          88      88  88      88  88      88  88          88  88      88  88
566 //   8888      8888    8888  8888    88          8888888888  8888888888          88
567 //   88          88      88  88
568 //   88          88  888888  888888
569 //
570 // ref http://sylvana.net/jpegcrop/exif_orientation.html
571 // image pixel kanna rotate
572 NCNN_EXPORT void kanna_rotate_c1(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type);
573 NCNN_EXPORT void kanna_rotate_c2(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type);
574 NCNN_EXPORT void kanna_rotate_c3(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type);
575 NCNN_EXPORT void kanna_rotate_c4(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type);
576 // image pixel kanna rotate with stride(bytes-per-row) parameter
577 NCNN_EXPORT void kanna_rotate_c1(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type);
578 NCNN_EXPORT void kanna_rotate_c2(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type);
579 NCNN_EXPORT void kanna_rotate_c3(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type);
580 NCNN_EXPORT void kanna_rotate_c4(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, int type);
581 // image pixel kanna rotate, convenient wrapper for yuv420sp(nv21/nv12)
582 NCNN_EXPORT void kanna_rotate_yuv420sp(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, int type);
583 #endif // NCNN_PIXEL_ROTATE
584 #if NCNN_PIXEL_AFFINE
585 // resolve affine transform matrix from rotation angle, scale factor and x y offset
586 NCNN_EXPORT void get_rotation_matrix(float angle, float scale, float dx, float dy, float* tm);
587 // resolve affine transform matrix from two set of points, num_point must be >= 2
588 NCNN_EXPORT void get_affine_transform(const float* points_from, const float* points_to, int num_point, float* tm);
589 // resolve the inversion affine transform matrix
590 NCNN_EXPORT void invert_affine_transform(const float* tm, float* tm_inv);
591 // image pixel bilinear warpaffine inverse transform, set -233 for transparent border color, the color RGBA is little-endian encoded
592 NCNN_EXPORT void warpaffine_bilinear_c1(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0);
593 NCNN_EXPORT void warpaffine_bilinear_c2(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0);
594 NCNN_EXPORT void warpaffine_bilinear_c3(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0);
595 NCNN_EXPORT void warpaffine_bilinear_c4(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0);
596 // image pixel bilinear warpaffine inverse transform with stride(bytes-per-row) parameter, set -233 for transparent border color, the color RGBA is little-endian encoded
597 NCNN_EXPORT void warpaffine_bilinear_c1(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0);
598 NCNN_EXPORT void warpaffine_bilinear_c2(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0);
599 NCNN_EXPORT void warpaffine_bilinear_c3(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0);
600 NCNN_EXPORT void warpaffine_bilinear_c4(const unsigned char* src, int srcw, int srch, int srcstride, unsigned char* dst, int w, int h, int stride, const float* tm, int type = 0, unsigned int v = 0);
601 // image pixel bilinear warpaffine, convenient wrapper for yuv420sp(nv21/nv12), set -233 for transparent border color, the color YUV_ is little-endian encoded
602 NCNN_EXPORT void warpaffine_bilinear_yuv420sp(const unsigned char* src, int srcw, int srch, unsigned char* dst, int w, int h, const float* tm, int type = 0, unsigned int v = 0);
603 #endif // NCNN_PIXEL_AFFINE
604 #if NCNN_PIXEL_DRAWING
605 // draw rectangle, set thickness -1 for filled rectangle, the color RGBA is little-endian encoded
606 NCNN_EXPORT void draw_rectangle_c1(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
607 NCNN_EXPORT void draw_rectangle_c2(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
608 NCNN_EXPORT void draw_rectangle_c3(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
609 NCNN_EXPORT void draw_rectangle_c4(unsigned char* pixels, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
610 // draw rectangle with stride(bytes-per-row) parameter, set thickness -1 for filled rectangle, the color RGBA is little-endian encoded
611 NCNN_EXPORT void draw_rectangle_c1(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
612 NCNN_EXPORT void draw_rectangle_c2(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
613 NCNN_EXPORT void draw_rectangle_c3(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
614 NCNN_EXPORT void draw_rectangle_c4(unsigned char* pixels, int w, int h, int stride, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
615 // draw rectangle, convenient wrapper for yuv420sp(nv21/nv12), set thickness -1 for filled rectangle, the color YUV_ is little-endian encoded
616 NCNN_EXPORT void draw_rectangle_yuv420sp(unsigned char* yuv420sp, int w, int h, int rx, int ry, int rw, int rh, unsigned int color, int thickness);
617 // draw circle, set thickness -1 for filled circle, the color RGBA is little-endian encoded
618 NCNN_EXPORT void draw_circle_c1(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness);
619 NCNN_EXPORT void draw_circle_c2(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness);
620 NCNN_EXPORT void draw_circle_c3(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness);
621 NCNN_EXPORT void draw_circle_c4(unsigned char* pixels, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness);
622 // draw circle with stride(bytes-per-row) parameter, set thickness -1 for filled circle, the color RGBA is little-endian encoded
623 NCNN_EXPORT void draw_circle_c1(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness);
624 NCNN_EXPORT void draw_circle_c2(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness);
625 NCNN_EXPORT void draw_circle_c3(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness);
626 NCNN_EXPORT void draw_circle_c4(unsigned char* pixels, int w, int h, int stride, int cx, int cy, int radius, unsigned int color, int thickness);
627 // draw circle, convenient wrapper for yuv420sp(nv21/nv12), set thickness -1 for filled circle, the color YUV_ is little-endian encoded
628 NCNN_EXPORT void draw_circle_yuv420sp(unsigned char* yuv420sp, int w, int h, int cx, int cy, int radius, unsigned int color, int thickness);
629 // draw line, the color RGBA is little-endian encoded
630 NCNN_EXPORT void draw_line_c1(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
631 NCNN_EXPORT void draw_line_c2(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
632 NCNN_EXPORT void draw_line_c3(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
633 NCNN_EXPORT void draw_line_c4(unsigned char* pixels, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
634 // draw line with stride(bytes-per-row) parameter, the color RGBA is little-endian encoded
635 NCNN_EXPORT void draw_line_c1(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
636 NCNN_EXPORT void draw_line_c2(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
637 NCNN_EXPORT void draw_line_c3(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
638 NCNN_EXPORT void draw_line_c4(unsigned char* pixels, int w, int h, int stride, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
639 // draw line, convenient wrapper for yuv420sp(nv21/nv12), the color YUV_ is little-endian encoded
640 NCNN_EXPORT void draw_line_yuv420sp(unsigned char* yuv420sp, int w, int h, int x0, int y0, int x1, int y1, unsigned int color, int thickness);
641 // resolve text bounding box size
642 NCNN_EXPORT void get_text_drawing_size(const char* text, int fontpixelsize, int* w, int* h);
643 // draw ascii printables and newline, the color RGBA is little-endian encoded
644 NCNN_EXPORT void draw_text_c1(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color);
645 NCNN_EXPORT void draw_text_c2(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color);
646 NCNN_EXPORT void draw_text_c3(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color);
647 NCNN_EXPORT void draw_text_c4(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color);
648 // draw ascii printables and newline with stride(bytes-per-row) parameter, the color RGBA is little-endian encoded
649 NCNN_EXPORT void draw_text_c1(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color);
650 NCNN_EXPORT void draw_text_c2(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color);
651 NCNN_EXPORT void draw_text_c3(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color);
652 NCNN_EXPORT void draw_text_c4(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color);
653 // draw ascii printables and newline, convenient wrapper for yuv420sp(nv21/nv12), the color YUV_ is little-endian encoded
654 NCNN_EXPORT void draw_text_yuv420sp(unsigned char* yuv420sp, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color);
655 #endif // NCNN_PIXEL_DRAWING
656 
657 // type conversion
658 // convert float to half precision floating point
659 NCNN_EXPORT unsigned short float32_to_float16(float value);
660 // convert half precision floating point to float
661 NCNN_EXPORT float float16_to_float32(unsigned short value);
662 // convert float to brain half
float32_to_bfloat16(float value)663 NCNN_EXPORT inline unsigned short float32_to_bfloat16(float value)
664 {
665     // 16 : 16
666     union
667     {
668         unsigned int u;
669         float f;
670     } tmp;
671     tmp.f = value;
672     return tmp.u >> 16;
673 }
674 // convert brain half to float
bfloat16_to_float32(unsigned short value)675 NCNN_EXPORT inline float bfloat16_to_float32(unsigned short value)
676 {
677     // 16 : 16
678     union
679     {
680         unsigned int u;
681         float f;
682     } tmp;
683     tmp.u = value << 16;
684     return tmp.f;
685 }
686 #if __ARM_NEON
vcvt_bf16_f32(float32x4_t _v)687 NCNN_EXPORT inline uint16x4_t vcvt_bf16_f32(float32x4_t _v)
688 {
689     return vshrn_n_u32(vreinterpretq_u32_f32(_v), 16);
690 }
vcvt_f32_bf16(uint16x4_t _v)691 NCNN_EXPORT inline float32x4_t vcvt_f32_bf16(uint16x4_t _v)
692 {
693     return vreinterpretq_f32_u32(vshll_n_u16(_v, 16));
694 }
695 #endif // __ARM_NEON
696 
697 // mat process
698 enum BorderType
699 {
700     BORDER_CONSTANT = 0,
701     BORDER_REPLICATE = 1,
702     BORDER_TRANSPARENT = -233,
703 };
704 NCNN_EXPORT void copy_make_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int type, float v, const Option& opt = Option());
705 NCNN_EXPORT void copy_cut_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, const Option& opt = Option());
706 NCNN_EXPORT void resize_nearest(const Mat& src, Mat& dst, int w, int h, const Option& opt = Option());
707 NCNN_EXPORT void resize_bilinear(const Mat& src, Mat& dst, int w, int h, const Option& opt = Option());
708 NCNN_EXPORT void resize_bicubic(const Mat& src, Mat& dst, int w, int h, const Option& opt = Option());
709 NCNN_EXPORT void convert_packing(const Mat& src, Mat& dst, int elempack, const Option& opt = Option());
710 NCNN_EXPORT void flatten(const Mat& src, Mat& dst, const Option& opt = Option());
711 NCNN_EXPORT void cast_float32_to_float16(const Mat& src, Mat& dst, const Option& opt = Option());
712 NCNN_EXPORT void cast_float16_to_float32(const Mat& src, Mat& dst, const Option& opt = Option());
713 NCNN_EXPORT void cast_int8_to_float32(const Mat& src, Mat& dst, const Option& opt = Option());
714 NCNN_EXPORT void cast_float32_to_bfloat16(const Mat& src, Mat& dst, const Option& opt = Option());
715 NCNN_EXPORT void cast_bfloat16_to_float32(const Mat& src, Mat& dst, const Option& opt = Option());
716 NCNN_EXPORT void quantize_to_int8(const Mat& src, Mat& dst, const Mat& scale_data, const Option& opt = Option());
717 NCNN_EXPORT void dequantize_from_int32(const Mat& src, Mat& dst, const Mat& scale_data, const Mat& bias_data, const Option& opt = Option());
718 NCNN_EXPORT void requantize_from_int32_to_int8(const Mat& src, Mat& dst, const Mat& scale_in_data, const Mat& scale_out_data, const Mat& bias_data, int activation_type, const Mat& activation_params, const Option& opt = Option());
719 
Mat()720 inline Mat::Mat()
721     : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
722 {
723 }
724 
Mat(int _w,size_t _elemsize,Allocator * _allocator)725 inline Mat::Mat(int _w, size_t _elemsize, Allocator* _allocator)
726     : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
727 {
728     create(_w, _elemsize, _allocator);
729 }
730 
Mat(int _w,int _h,size_t _elemsize,Allocator * _allocator)731 inline Mat::Mat(int _w, int _h, size_t _elemsize, Allocator* _allocator)
732     : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
733 {
734     create(_w, _h, _elemsize, _allocator);
735 }
736 
Mat(int _w,int _h,int _c,size_t _elemsize,Allocator * _allocator)737 inline Mat::Mat(int _w, int _h, int _c, size_t _elemsize, Allocator* _allocator)
738     : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
739 {
740     create(_w, _h, _c, _elemsize, _allocator);
741 }
742 
Mat(int _w,size_t _elemsize,int _elempack,Allocator * _allocator)743 inline Mat::Mat(int _w, size_t _elemsize, int _elempack, Allocator* _allocator)
744     : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
745 {
746     create(_w, _elemsize, _elempack, _allocator);
747 }
748 
Mat(int _w,int _h,size_t _elemsize,int _elempack,Allocator * _allocator)749 inline Mat::Mat(int _w, int _h, size_t _elemsize, int _elempack, Allocator* _allocator)
750     : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
751 {
752     create(_w, _h, _elemsize, _elempack, _allocator);
753 }
754 
Mat(int _w,int _h,int _c,size_t _elemsize,int _elempack,Allocator * _allocator)755 inline Mat::Mat(int _w, int _h, int _c, size_t _elemsize, int _elempack, Allocator* _allocator)
756     : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
757 {
758     create(_w, _h, _c, _elemsize, _elempack, _allocator);
759 }
760 
Mat(const Mat & m)761 inline Mat::Mat(const Mat& m)
762     : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), c(m.c), cstep(m.cstep)
763 {
764     addref();
765 }
766 
Mat(int _w,void * _data,size_t _elemsize,Allocator * _allocator)767 inline Mat::Mat(int _w, void* _data, size_t _elemsize, Allocator* _allocator)
768     : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), c(1)
769 {
770     cstep = w;
771 }
772 
Mat(int _w,int _h,void * _data,size_t _elemsize,Allocator * _allocator)773 inline Mat::Mat(int _w, int _h, void* _data, size_t _elemsize, Allocator* _allocator)
774     : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), c(1)
775 {
776     cstep = (size_t)w * h;
777 }
778 
Mat(int _w,int _h,int _c,void * _data,size_t _elemsize,Allocator * _allocator)779 inline Mat::Mat(int _w, int _h, int _c, void* _data, size_t _elemsize, Allocator* _allocator)
780     : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), c(_c)
781 {
782     cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize;
783 }
784 
Mat(int _w,void * _data,size_t _elemsize,int _elempack,Allocator * _allocator)785 inline Mat::Mat(int _w, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator)
786     : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), c(1)
787 {
788     cstep = w;
789 }
790 
Mat(int _w,int _h,void * _data,size_t _elemsize,int _elempack,Allocator * _allocator)791 inline Mat::Mat(int _w, int _h, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator)
792     : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), c(1)
793 {
794     cstep = (size_t)w * h;
795 }
796 
Mat(int _w,int _h,int _c,void * _data,size_t _elemsize,int _elempack,Allocator * _allocator)797 inline Mat::Mat(int _w, int _h, int _c, void* _data, size_t _elemsize, int _elempack, Allocator* _allocator)
798     : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), c(_c)
799 {
800     cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize;
801 }
802 
~Mat()803 inline Mat::~Mat()
804 {
805     release();
806 }
807 
fill(float _v)808 inline void Mat::fill(float _v)
809 {
810     int size = (int)total();
811     float* ptr = (float*)data;
812 
813 #if __ARM_NEON
814     int nn = size >> 2;
815     int remain = size - (nn << 2);
816 #else
817     int remain = size;
818 #endif // __ARM_NEON
819 
820 #if __ARM_NEON
821     float32x4_t _c = vdupq_n_f32(_v);
822 #if __aarch64__
823     if (nn > 0)
824     {
825         asm volatile(
826             "0:                             \n"
827             "subs       %w0, %w0, #1        \n"
828             "st1        {%4.4s}, [%1], #16  \n"
829             "bne        0b                  \n"
830             : "=r"(nn), // %0
831             "=r"(ptr) // %1
832             : "0"(nn),
833             "1"(ptr),
834             "w"(_c) // %4
835             : "cc", "memory");
836     }
837 #else
838     if (nn > 0)
839     {
840         asm volatile(
841             "0:                             \n"
842             "subs       %0, #1              \n"
843             "vst1.f32   {%e4-%f4}, [%1 :128]!\n"
844             "bne        0b                  \n"
845             : "=r"(nn), // %0
846             "=r"(ptr) // %1
847             : "0"(nn),
848             "1"(ptr),
849             "w"(_c) // %4
850             : "cc", "memory");
851     }
852 #endif // __aarch64__
853 #endif // __ARM_NEON
854     for (; remain > 0; remain--)
855     {
856         *ptr++ = _v;
857     }
858 }
859 
fill(int _v)860 inline void Mat::fill(int _v)
861 {
862     int size = (int)total();
863     int* ptr = (int*)data;
864 
865 #if __ARM_NEON
866     int nn = size >> 2;
867     int remain = size - (nn << 2);
868 #else
869     int remain = size;
870 #endif // __ARM_NEON
871 
872 #if __ARM_NEON
873     int32x4_t _c = vdupq_n_s32(_v);
874 #if __aarch64__
875     if (nn > 0)
876     {
877         asm volatile(
878             "0:                             \n"
879             "subs       %w0, %w0, #1        \n"
880             "st1        {%4.4s}, [%1], #16  \n"
881             "bne        0b                  \n"
882             : "=r"(nn), // %0
883             "=r"(ptr) // %1
884             : "0"(nn),
885             "1"(ptr),
886             "w"(_c) // %4
887             : "cc", "memory");
888     }
889 #else
890     if (nn > 0)
891     {
892         asm volatile(
893             "0:                             \n"
894             "subs       %0, #1              \n"
895             "vst1.s32   {%e4-%f4}, [%1 :128]!\n"
896             "bne        0b                  \n"
897             : "=r"(nn), // %0
898             "=r"(ptr) // %1
899             : "0"(nn),
900             "1"(ptr),
901             "w"(_c) // %4
902             : "cc", "memory");
903     }
904 #endif // __aarch64__
905 #endif // __ARM_NEON
906     for (; remain > 0; remain--)
907     {
908         *ptr++ = _v;
909     }
910 }
911 
912 #if __ARM_NEON
fill(float32x4_t _v)913 inline void Mat::fill(float32x4_t _v)
914 {
915     int size = (int)total();
916     float* ptr = (float*)data;
917     for (int i = 0; i < size; i++)
918     {
919         vst1q_f32(ptr, _v);
920         ptr += 4;
921     }
922 }
923 
fill(uint16x4_t _v)924 inline void Mat::fill(uint16x4_t _v)
925 {
926     int size = (int)total();
927     unsigned short* ptr = (unsigned short*)data;
928     for (int i = 0; i < size; i++)
929     {
930         vst1_u16(ptr, _v);
931         ptr += 4;
932     }
933 }
934 
fill(int32x4_t _v)935 inline void Mat::fill(int32x4_t _v)
936 {
937     int size = (int)total();
938     int* ptr = (int*)data;
939     for (int i = 0; i < size; i++)
940     {
941         vst1q_s32(ptr, _v);
942         ptr += 4;
943     }
944 }
945 
fill(int32x4_t _v0,int32x4_t _v1)946 inline void Mat::fill(int32x4_t _v0, int32x4_t _v1)
947 {
948     int size = (int)total();
949     int* ptr = (int*)data;
950     for (int i = 0; i < size; i++)
951     {
952         vst1q_s32(ptr, _v0);
953         vst1q_s32(ptr + 4, _v1);
954         ptr += 8;
955     }
956 }
957 #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
fill(float16x4_t _v)958 inline void Mat::fill(float16x4_t _v)
959 {
960     int size = (int)total();
961     __fp16* ptr = (__fp16*)data;
962     for (int i = 0; i < size; i++)
963     {
964         vst1_f16(ptr, _v);
965         ptr += 4;
966     }
967 }
968 
fill(float16x8_t _v)969 inline void Mat::fill(float16x8_t _v)
970 {
971     int size = (int)total();
972     __fp16* ptr = (__fp16*)data;
973     for (int i = 0; i < size; i++)
974     {
975         vst1q_f16(ptr, _v);
976         ptr += 8;
977     }
978 }
979 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
980 #endif // __ARM_NEON
981 #if __AVX__
fill(__m256 _v)982 inline void Mat::fill(__m256 _v)
983 {
984     int size = (int)total();
985     float* ptr = (float*)data;
986     for (int i = 0; i < size; i++)
987     {
988         _mm256_storeu_ps(ptr, _v);
989         ptr += 8;
990     }
991 }
fill(__m128i _v)992 inline void Mat::fill(__m128i _v)
993 {
994     int size = (int)total();
995     unsigned short* ptr = (unsigned short*)data;
996     for (int i = 0; i < size; i++)
997     {
998         _mm_store_si128((__m128i*)ptr, _v);
999         ptr += 8;
1000     }
1001 }
1002 #endif // __AVX__
1003 
1004 template<typename T>
fill(T _v)1005 inline void Mat::fill(T _v)
1006 {
1007     int size = (int)total();
1008     T* ptr = (T*)data;
1009     for (int i = 0; i < size; i++)
1010     {
1011         ptr[i] = _v;
1012     }
1013 }
1014 
1015 inline Mat& Mat::operator=(const Mat& m)
1016 {
1017     if (this == &m)
1018         return *this;
1019 
1020     if (m.refcount)
1021         NCNN_XADD(m.refcount, 1);
1022 
1023     release();
1024 
1025     data = m.data;
1026     refcount = m.refcount;
1027     elemsize = m.elemsize;
1028     elempack = m.elempack;
1029     allocator = m.allocator;
1030 
1031     dims = m.dims;
1032     w = m.w;
1033     h = m.h;
1034     c = m.c;
1035 
1036     cstep = m.cstep;
1037 
1038     return *this;
1039 }
1040 
addref()1041 inline void Mat::addref()
1042 {
1043     if (refcount)
1044         NCNN_XADD(refcount, 1);
1045 }
1046 
release()1047 inline void Mat::release()
1048 {
1049     if (refcount && NCNN_XADD(refcount, -1) == 1)
1050     {
1051         if (allocator)
1052             allocator->fastFree(data);
1053         else
1054             fastFree(data);
1055     }
1056 
1057     data = 0;
1058 
1059     elemsize = 0;
1060     elempack = 0;
1061 
1062     dims = 0;
1063     w = 0;
1064     h = 0;
1065     c = 0;
1066 
1067     cstep = 0;
1068 
1069     refcount = 0;
1070 }
1071 
empty()1072 inline bool Mat::empty() const
1073 {
1074     return data == 0 || total() == 0;
1075 }
1076 
total()1077 inline size_t Mat::total() const
1078 {
1079     return cstep * c;
1080 }
1081 
elembits()1082 inline int Mat::elembits() const
1083 {
1084     return elempack ? static_cast<int>(elemsize * 8) / elempack : 0;
1085 }
1086 
shape()1087 inline Mat Mat::shape() const
1088 {
1089     if (dims == 1)
1090         return Mat(w * elempack, (void*)0);
1091     if (dims == 2)
1092         return Mat(w, h * elempack, (void*)0);
1093     if (dims == 3)
1094         return Mat(w, h, c * elempack, (void*)0);
1095 
1096     return Mat();
1097 }
1098 
channel(int _c)1099 inline Mat Mat::channel(int _c)
1100 {
1101     return Mat(w, h, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator);
1102 }
1103 
channel(int _c)1104 inline const Mat Mat::channel(int _c) const
1105 {
1106     return Mat(w, h, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator);
1107 }
1108 
row(int y)1109 inline float* Mat::row(int y)
1110 {
1111     return (float*)((unsigned char*)data + (size_t)w * y * elemsize);
1112 }
1113 
row(int y)1114 inline const float* Mat::row(int y) const
1115 {
1116     return (const float*)((unsigned char*)data + (size_t)w * y * elemsize);
1117 }
1118 
1119 template<typename T>
row(int y)1120 inline T* Mat::row(int y)
1121 {
1122     return (T*)((unsigned char*)data + (size_t)w * y * elemsize);
1123 }
1124 
1125 template<typename T>
row(int y)1126 inline const T* Mat::row(int y) const
1127 {
1128     return (const T*)((unsigned char*)data + (size_t)w * y * elemsize);
1129 }
1130 
channel_range(int _c,int channels)1131 inline Mat Mat::channel_range(int _c, int channels)
1132 {
1133     return Mat(w, h, channels, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator);
1134 }
1135 
channel_range(int _c,int channels)1136 inline const Mat Mat::channel_range(int _c, int channels) const
1137 {
1138     return Mat(w, h, channels, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator);
1139 }
1140 
row_range(int y,int rows)1141 inline Mat Mat::row_range(int y, int rows)
1142 {
1143     return Mat(w, rows, (unsigned char*)data + (size_t)w * y * elemsize, elemsize, elempack, allocator);
1144 }
1145 
row_range(int y,int rows)1146 inline const Mat Mat::row_range(int y, int rows) const
1147 {
1148     return Mat(w, rows, (unsigned char*)data + (size_t)w * y * elemsize, elemsize, elempack, allocator);
1149 }
1150 
range(int x,int n)1151 inline Mat Mat::range(int x, int n)
1152 {
1153     return Mat(n, (unsigned char*)data + x * elemsize, elemsize, elempack, allocator);
1154 }
1155 
range(int x,int n)1156 inline const Mat Mat::range(int x, int n) const
1157 {
1158     return Mat(n, (unsigned char*)data + x * elemsize, elemsize, elempack, allocator);
1159 }
1160 
1161 template<typename T>
1162 inline Mat::operator T*()
1163 {
1164     return (T*)data;
1165 }
1166 
1167 template<typename T>
1168 inline Mat::operator const T*() const
1169 {
1170     return (const T*)data;
1171 }
1172 
1173 inline float& Mat::operator[](size_t i)
1174 {
1175     return ((float*)data)[i];
1176 }
1177 
1178 inline const float& Mat::operator[](size_t i) const
1179 {
1180     return ((const float*)data)[i];
1181 }
1182 
1183 #if NCNN_VULKAN
1184 
VkMat()1185 inline VkMat::VkMat()
1186     : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
1187 {
1188 }
1189 
VkMat(int _w,size_t _elemsize,VkAllocator * _allocator)1190 inline VkMat::VkMat(int _w, size_t _elemsize, VkAllocator* _allocator)
1191     : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
1192 {
1193     create(_w, _elemsize, _allocator);
1194 }
1195 
VkMat(int _w,int _h,size_t _elemsize,VkAllocator * _allocator)1196 inline VkMat::VkMat(int _w, int _h, size_t _elemsize, VkAllocator* _allocator)
1197     : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
1198 {
1199     create(_w, _h, _elemsize, _allocator);
1200 }
1201 
VkMat(int _w,int _h,int _c,size_t _elemsize,VkAllocator * _allocator)1202 inline VkMat::VkMat(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator)
1203     : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
1204 {
1205     create(_w, _h, _c, _elemsize, _allocator);
1206 }
1207 
VkMat(int _w,size_t _elemsize,int _elempack,VkAllocator * _allocator)1208 inline VkMat::VkMat(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1209     : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
1210 {
1211     create(_w, _elemsize, _elempack, _allocator);
1212 }
1213 
VkMat(int _w,int _h,size_t _elemsize,int _elempack,VkAllocator * _allocator)1214 inline VkMat::VkMat(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1215     : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
1216 {
1217     create(_w, _h, _elemsize, _elempack, _allocator);
1218 }
1219 
VkMat(int _w,int _h,int _c,size_t _elemsize,int _elempack,VkAllocator * _allocator)1220 inline VkMat::VkMat(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1221     : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0), cstep(0)
1222 {
1223     create(_w, _h, _c, _elemsize, _elempack, _allocator);
1224 }
1225 
VkMat(const VkMat & m)1226 inline VkMat::VkMat(const VkMat& m)
1227     : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), c(m.c)
1228 {
1229     addref();
1230 
1231     cstep = m.cstep;
1232 }
1233 
VkMat(int _w,VkBufferMemory * _data,size_t _elemsize,VkAllocator * _allocator)1234 inline VkMat::VkMat(int _w, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator)
1235     : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), c(1)
1236 {
1237     cstep = w;
1238 }
1239 
VkMat(int _w,int _h,VkBufferMemory * _data,size_t _elemsize,VkAllocator * _allocator)1240 inline VkMat::VkMat(int _w, int _h, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator)
1241     : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), c(1)
1242 {
1243     cstep = w * h;
1244 }
1245 
VkMat(int _w,int _h,int _c,VkBufferMemory * _data,size_t _elemsize,VkAllocator * _allocator)1246 inline VkMat::VkMat(int _w, int _h, int _c, VkBufferMemory* _data, size_t _elemsize, VkAllocator* _allocator)
1247     : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), c(_c)
1248 {
1249     cstep = alignSize(w * h * elemsize, 16) / elemsize;
1250 }
1251 
VkMat(int _w,VkBufferMemory * _data,size_t _elemsize,int _elempack,VkAllocator * _allocator)1252 inline VkMat::VkMat(int _w, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1253     : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), c(1)
1254 {
1255     cstep = w;
1256 }
1257 
VkMat(int _w,int _h,VkBufferMemory * _data,size_t _elemsize,int _elempack,VkAllocator * _allocator)1258 inline VkMat::VkMat(int _w, int _h, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1259     : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), c(1)
1260 {
1261     cstep = w * h;
1262 }
1263 
VkMat(int _w,int _h,int _c,VkBufferMemory * _data,size_t _elemsize,int _elempack,VkAllocator * _allocator)1264 inline VkMat::VkMat(int _w, int _h, int _c, VkBufferMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1265     : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), c(_c)
1266 {
1267     cstep = alignSize(w * h * elemsize, 16) / elemsize;
1268 }
1269 
~VkMat()1270 inline VkMat::~VkMat()
1271 {
1272     release();
1273 }
1274 
1275 inline VkMat& VkMat::operator=(const VkMat& m)
1276 {
1277     if (this == &m)
1278         return *this;
1279 
1280     if (m.refcount)
1281         NCNN_XADD(m.refcount, 1);
1282 
1283     release();
1284 
1285     data = m.data;
1286     refcount = m.refcount;
1287     elemsize = m.elemsize;
1288     elempack = m.elempack;
1289     allocator = m.allocator;
1290 
1291     dims = m.dims;
1292     w = m.w;
1293     h = m.h;
1294     c = m.c;
1295 
1296     cstep = m.cstep;
1297 
1298     return *this;
1299 }
1300 
mapped()1301 inline Mat VkMat::mapped() const
1302 {
1303     if (!allocator->mappable)
1304         return Mat();
1305 
1306     if (dims == 1)
1307         return Mat(w, mapped_ptr(), elemsize, elempack, 0);
1308 
1309     if (dims == 2)
1310         return Mat(w, h, mapped_ptr(), elemsize, elempack, 0);
1311 
1312     if (dims == 3)
1313         return Mat(w, h, c, mapped_ptr(), elemsize, elempack, 0);
1314 
1315     return Mat();
1316 }
1317 
mapped_ptr()1318 inline void* VkMat::mapped_ptr() const
1319 {
1320     if (!allocator->mappable)
1321         return 0;
1322 
1323     return (unsigned char*)data->mapped_ptr + data->offset;
1324 }
1325 
addref()1326 inline void VkMat::addref()
1327 {
1328     if (refcount)
1329         NCNN_XADD(refcount, 1);
1330 }
1331 
release()1332 inline void VkMat::release()
1333 {
1334     if (refcount && NCNN_XADD(refcount, -1) == 1)
1335     {
1336         if (allocator && data)
1337         {
1338             allocator->fastFree(data);
1339         }
1340     }
1341 
1342     data = 0;
1343 
1344     elemsize = 0;
1345     elempack = 0;
1346 
1347     dims = 0;
1348     w = 0;
1349     h = 0;
1350     c = 0;
1351 
1352     cstep = 0;
1353 
1354     refcount = 0;
1355 }
1356 
empty()1357 inline bool VkMat::empty() const
1358 {
1359     return data == 0 || total() == 0;
1360 }
1361 
total()1362 inline size_t VkMat::total() const
1363 {
1364     return cstep * c;
1365 }
1366 
elembits()1367 inline int VkMat::elembits() const
1368 {
1369     return elempack ? elemsize * 8 / elempack : 0;
1370 }
1371 
shape()1372 inline Mat VkMat::shape() const
1373 {
1374     if (dims == 1)
1375         return Mat(w * elempack, (void*)0);
1376     if (dims == 2)
1377         return Mat(w, h * elempack, (void*)0);
1378     if (dims == 3)
1379         return Mat(w, h, c * elempack, (void*)0);
1380 
1381     return Mat();
1382 }
1383 
buffer()1384 inline VkBuffer VkMat::buffer() const
1385 {
1386     return data->buffer;
1387 }
1388 
buffer_offset()1389 inline size_t VkMat::buffer_offset() const
1390 {
1391     return data->offset;
1392 }
1393 
buffer_capacity()1394 inline size_t VkMat::buffer_capacity() const
1395 {
1396     return data->capacity;
1397 }
1398 
VkImageMat()1399 inline VkImageMat::VkImageMat()
1400     : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0)
1401 {
1402 }
1403 
VkImageMat(int _w,size_t _elemsize,VkAllocator * _allocator)1404 inline VkImageMat::VkImageMat(int _w, size_t _elemsize, VkAllocator* _allocator)
1405     : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0)
1406 {
1407     create(_w, _elemsize, _allocator);
1408 }
1409 
VkImageMat(int _w,int _h,size_t _elemsize,VkAllocator * _allocator)1410 inline VkImageMat::VkImageMat(int _w, int _h, size_t _elemsize, VkAllocator* _allocator)
1411     : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0)
1412 {
1413     create(_w, _h, _elemsize, _allocator);
1414 }
1415 
VkImageMat(int _w,int _h,int _c,size_t _elemsize,VkAllocator * _allocator)1416 inline VkImageMat::VkImageMat(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator)
1417     : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0)
1418 {
1419     create(_w, _h, _c, _elemsize, _allocator);
1420 }
1421 
VkImageMat(int _w,size_t _elemsize,int _elempack,VkAllocator * _allocator)1422 inline VkImageMat::VkImageMat(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1423     : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0)
1424 {
1425     create(_w, _elemsize, _elempack, _allocator);
1426 }
1427 
VkImageMat(int _w,int _h,size_t _elemsize,int _elempack,VkAllocator * _allocator)1428 inline VkImageMat::VkImageMat(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1429     : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0)
1430 {
1431     create(_w, _h, _elemsize, _elempack, _allocator);
1432 }
1433 
VkImageMat(int _w,int _h,int _c,size_t _elemsize,int _elempack,VkAllocator * _allocator)1434 inline VkImageMat::VkImageMat(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1435     : data(0), refcount(0), elemsize(0), elempack(0), allocator(0), dims(0), w(0), h(0), c(0)
1436 {
1437     create(_w, _h, _c, _elemsize, _elempack, _allocator);
1438 }
1439 
VkImageMat(const VkImageMat & m)1440 inline VkImageMat::VkImageMat(const VkImageMat& m)
1441     : data(m.data), refcount(m.refcount), elemsize(m.elemsize), elempack(m.elempack), allocator(m.allocator), dims(m.dims), w(m.w), h(m.h), c(m.c)
1442 {
1443     addref();
1444 }
1445 
VkImageMat(int _w,VkImageMemory * _data,size_t _elemsize,VkAllocator * _allocator)1446 inline VkImageMat::VkImageMat(int _w, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator)
1447     : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(1), w(_w), h(1), c(1)
1448 {
1449 }
1450 
VkImageMat(int _w,int _h,VkImageMemory * _data,size_t _elemsize,VkAllocator * _allocator)1451 inline VkImageMat::VkImageMat(int _w, int _h, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator)
1452     : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(2), w(_w), h(_h), c(1)
1453 {
1454 }
1455 
VkImageMat(int _w,int _h,int _c,VkImageMemory * _data,size_t _elemsize,VkAllocator * _allocator)1456 inline VkImageMat::VkImageMat(int _w, int _h, int _c, VkImageMemory* _data, size_t _elemsize, VkAllocator* _allocator)
1457     : data(_data), refcount(0), elemsize(_elemsize), elempack(1), allocator(_allocator), dims(3), w(_w), h(_h), c(_c)
1458 {
1459 }
1460 
VkImageMat(int _w,VkImageMemory * _data,size_t _elemsize,int _elempack,VkAllocator * _allocator)1461 inline VkImageMat::VkImageMat(int _w, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1462     : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(1), w(_w), h(1), c(1)
1463 {
1464 }
1465 
VkImageMat(int _w,int _h,VkImageMemory * _data,size_t _elemsize,int _elempack,VkAllocator * _allocator)1466 inline VkImageMat::VkImageMat(int _w, int _h, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1467     : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(2), w(_w), h(_h), c(1)
1468 {
1469 }
1470 
VkImageMat(int _w,int _h,int _c,VkImageMemory * _data,size_t _elemsize,int _elempack,VkAllocator * _allocator)1471 inline VkImageMat::VkImageMat(int _w, int _h, int _c, VkImageMemory* _data, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1472     : data(_data), refcount(0), elemsize(_elemsize), elempack(_elempack), allocator(_allocator), dims(3), w(_w), h(_h), c(_c)
1473 {
1474 }
1475 
~VkImageMat()1476 inline VkImageMat::~VkImageMat()
1477 {
1478     release();
1479 }
1480 
1481 inline VkImageMat& VkImageMat::operator=(const VkImageMat& m)
1482 {
1483     if (this == &m)
1484         return *this;
1485 
1486     if (m.refcount)
1487         NCNN_XADD(m.refcount, 1);
1488 
1489     release();
1490 
1491     data = m.data;
1492     refcount = m.refcount;
1493     elemsize = m.elemsize;
1494     elempack = m.elempack;
1495     allocator = m.allocator;
1496 
1497     dims = m.dims;
1498     w = m.w;
1499     h = m.h;
1500     c = m.c;
1501 
1502     return *this;
1503 }
1504 
mapped()1505 inline Mat VkImageMat::mapped() const
1506 {
1507     if (!allocator->mappable || !data->mapped_ptr)
1508         return Mat();
1509 
1510     if (dims == 1)
1511         return Mat(w, mapped_ptr(), elemsize, elempack, 0);
1512 
1513     if (dims == 2)
1514         return Mat(w, h, mapped_ptr(), elemsize, elempack, 0);
1515 
1516     if (dims == 3)
1517         return Mat(w, h, c, mapped_ptr(), elemsize, elempack, 0);
1518 
1519     return Mat();
1520 }
1521 
mapped_ptr()1522 inline void* VkImageMat::mapped_ptr() const
1523 {
1524     if (!allocator->mappable || !data->mapped_ptr)
1525         return 0;
1526 
1527     return (unsigned char*)data->mapped_ptr + data->bind_offset;
1528 }
1529 
addref()1530 inline void VkImageMat::addref()
1531 {
1532     if (refcount)
1533         NCNN_XADD(refcount, 1);
1534 }
1535 
release()1536 inline void VkImageMat::release()
1537 {
1538     if (refcount && NCNN_XADD(refcount, -1) == 1)
1539     {
1540         if (allocator && data)
1541         {
1542             allocator->fastFree(data);
1543         }
1544     }
1545 
1546     data = 0;
1547 
1548     elemsize = 0;
1549     elempack = 0;
1550 
1551     dims = 0;
1552     w = 0;
1553     h = 0;
1554     c = 0;
1555 
1556     refcount = 0;
1557 }
1558 
empty()1559 inline bool VkImageMat::empty() const
1560 {
1561     return data == 0 || total() == 0;
1562 }
1563 
total()1564 inline size_t VkImageMat::total() const
1565 {
1566     return w * h * c;
1567 }
1568 
elembits()1569 inline int VkImageMat::elembits() const
1570 {
1571     return elempack ? elemsize * 8 / elempack : 0;
1572 }
1573 
shape()1574 inline Mat VkImageMat::shape() const
1575 {
1576     if (dims == 1)
1577         return Mat(w * elempack, (void*)0);
1578     if (dims == 2)
1579         return Mat(w, h * elempack, (void*)0);
1580     if (dims == 3)
1581         return Mat(w, h, c * elempack, (void*)0);
1582 
1583     return Mat();
1584 }
1585 
image()1586 inline VkImage VkImageMat::image() const
1587 {
1588     return data->image;
1589 }
1590 
imageview()1591 inline VkImageView VkImageMat::imageview() const
1592 {
1593     return data->imageview;
1594 }
1595 
1596 #endif // NCNN_VULKAN
1597 
1598 } // namespace ncnn
1599 
1600 #endif // NCNN_MAT_H
1601