1 //
2 //  CommonOptFunction.h
3 //  MNN
4 //
5 //  Created by MNN on 2018/07/16.
6 //  Copyright © 2018, Alibaba Group Holding Limited
7 //
8 
9 #ifndef CommonOptFunction_h
10 #define CommonOptFunction_h
11 
12 #include <stdint.h>
13 #include <stdio.h>
14 #include <string.h>
15 
16 #include "core/Macro.h"
17 
18 extern "C" {
19 
20 void MNNReluWithSlope(float* dst, const float* src, size_t sizeQuad, float slope);
21 
22 void MNNReluInt8(int8_t* dst, const int8_t* src, size_t size);
23 
24 void MNNReluWithSlopeChannel(float* dst, const float* src, const float* slope, size_t sizeQuad, size_t depthQuad);
25 
26 void MNNHardSwish(float* dst, const float* src, size_t size);
27 
28 void MNNGelu(float* dst, const float* src, size_t size);
29 
30 void MNNPackC4(float* dst, const float* src, size_t area, size_t depth);
31 
32 void MNNPackC4Int16(int16_t* dst, const int16_t* src, size_t area, size_t depth);
33 
34 void MNNPackC4Uint8(uint8_t* dst, const uint8_t* src, size_t area, size_t depth);
35 
36 void MNNUnpackC4(float* dst, const float* src, size_t area, size_t depth);
37 
38 void MNNUnpackC4Int16(int16_t* dst, const int16_t* src, size_t area, size_t depth);
39 
40 void MNNUnpackC4Uint8(uint8_t* dst, const uint8_t* src, size_t area, size_t depth);
41 
42 void MNNScaleAndAddBias(float* dst, const float* src, const float* bias, const float* alpha, size_t planeNumber,
43                         size_t biasNumber);
44 void MNNScaleAndAddBiasScalar(float* dst, const float* src, float bias, float alpha, size_t number);
45 
46 void MNNUnpackTranspose(float* dst, const float* src, size_t area, size_t depth);
47 void MNNUnpackTransposeInt16(int16_t* dst, const int16_t* src, size_t area, size_t depth);
48 void MNNUnpackTransposeUint8(uint8_t* dst, const uint8_t* src, size_t area, size_t depth);
49 
50 void MNNPackTranspose(float* dst, const float* src, size_t area, size_t depth);
51 void MNNPackTransposeInt16(int16_t* dst, const int16_t* src, size_t area, size_t depth);
52 void MNNPackTransposeUint8(uint8_t* dst, const uint8_t* src, size_t area, size_t depth);
53 
54 void MNNCopyC4WithStride(const float* source, float* dest, size_t srcStride, size_t dstStride, size_t count);
55 void MNNAddC4WithStride(const float* source, float* dest, size_t srcStride, size_t dstStride, size_t count);
56 
57 void MNNUInt8ToInt16WithOffsetC4Common(int16_t* dst, const uint8_t* src, size_t zeroPoint, size_t sizeQuad,
58                                        size_t dstStride, size_t srcStride);
59 void MNNUInt8ToInt16WithOffsetC4Fast(int16_t* dst, const uint8_t* src, size_t zeroPoint, size_t sizeQuad,
60                                      size_t depthQuad, size_t dstZStep, size_t srcZStep);
61 void MNNMaxFloat(float* input, float* maxBuffer, int32_t inputCountUnit);
62 void MNNMinFloat(float* input, float* maxBuffer, int32_t inputCountUnit);
63 void MNNExpC8(float* dest, const float* source, const float* parameters, size_t countC8);
64 void MNNPowC8(float* dest, const float* source, const float* powfParam, size_t betaInt, size_t countC8);
65 
66 void MNNExp(float* dst, const float* src, size_t dataSize);
67 void MNNSin(float* dst, const float* src, size_t dataSize);
68 void MNNTanh(float* dst, const float* src, size_t dataSize);
69 void MNNSigmoid(float* dst, const float* src, size_t dataSize);
70 void MNNSigmoidLowp(float* dst, const float* src, size_t dataSize);
71 void MNNReluWithSlopeCommon(float* dst, const float* src, size_t size, float slope);
72 void MNNHardSwishCommon(float* dst, const float* src, size_t size);
73 void MNNGeluCommon(float* dst, const float* src, size_t size);
74 
75 // Get Pack for MatMul's e , l , h , the pack number must be 1 or 4 * n
76 void MNNGetMatMulPackMode(int* eP, int *lP, int* hP);
77 
78 void MNNGetSparseMatMulPackMode(int* eP, int *lP, int* hP);
79 
80 /**
81  int number = info[0];
82  int eSrcStride = info[1];
83  int eDstStride = info[2];
84  int xStride = info[3];
85 
86 el: number * 4
87  0: e
88  1: l
89  2: e-offset
90  3: l-offset
91  */
92 void MNNPackC4ForMatMul_A(float* destOrigin, float const** sourceGroup, const int32_t* info, const int32_t* el);
93 
94 void MNNPackForMatMul_B(float* dest, const float* source, size_t h, size_t l, bool transpose);
95 
96 // parameters: e, l, h, CStride, AStride, BStride
97 void MNNPackedMatMul(float* C, const float* A, const float* B, const size_t* parameter, const float* postParameters, const float* bias);
98 void MNNFunctionInit();
99 void MNNPackedMatMulRemain(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias);
100 
101 void MNNPackForSparseMatMul_B(float* dest, unsigned int* NNZMap, int* dataOffsetMap, int sparseBlockOC, const float* source, size_t h, size_t l, const int eP, bool transpose);
102 void MNNPackedSparseMatMulEpx1(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias, unsigned int* NNZMap, int* dataOffsetMap);
103 
104 void MNNPackedSparseMatMulEpx4(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias, unsigned int* NNZMap, int* dataOffsetMap);
105 
106 
107 int MNNGetC4DivNumber(int hP);
108 
109 // C = clamp(alpha * A + beta * B, min, max)
110 // paramters: alpha, beta, min, max
111 void MNNAxByClamp(float* C, const float* A, const float* B, size_t width, size_t cStride, size_t aStride, size_t bStride, size_t height, const float* parameters);
112 
113 void MNNAxByClampBroadcastUnit(float* C, const float* A, const float* B, size_t width, size_t cStride, size_t aStride, size_t height, const float* parameters);
114 
115 // dim: 4-element, sizeDW, sizeDH, strideSW, strideDH
116 void MNNTranspose32Bit(int32_t* dstO, const int32_t* srcO, int32_t* dim); // not C4
117 
118 void MNNVectorTop1Float(float* input, float* maxValue, int32_t* maxIndex, size_t inputCountUnit);
119 void MNNVectorTop1Int32(int32_t* input, int32_t* maxValue, int32_t* maxIndex, size_t inputCountUnit);
120 struct MatMulParam {
121     int32_t e;
122     int32_t l;
123     int32_t h;
124     int32_t numberThread;
125     bool ATranspose;
126     bool BTranspose;
127 };
128 void MNNComputeMatMulForE_1(const float* A, const float* B, float* C, const float* biasPtr, const MatMulParam* param, size_t tId);
129 
130 void MNNCopyC4Int16WithStride(const float* sourceF, float* destF, size_t srcStride, size_t dstStride, size_t count);
131 void MNNSourceTransformCommonF23(const float *source, float *dest, int unit, int iw, int pad, int su, int eu);
132 void MNNConvDwF23MulTransUnit(float **cacheLine, const float *weigth, float *dest, size_t ow, const float* bias, const float* postParameter);
133 void MNNMultiAndDestTransformCommon23(float **cacheLine, const float *weigth, float *dest, int cacheLineSize, int ow);
134 void MNNInt8ToInt16(int16_t* dest, const int8_t* source, size_t count);
135 }
136 
137 typedef void(*MNNBinaryExecute)(void* outputRaw, const void* inputRaw0, const void* inputRaw1, int elementSize, int broadcastIndex);
138 typedef void(*MNNUnaryExecute)(void* outputRaw, const void* inputRaw, int elementSize);
139 
140 namespace MNN {
141 struct CoreFunctions {
142     // cpu feature
143     bool supportFp16arith = false;
144     bool supportSDot = false;
145     /**MatMul Pack and Functions*/
146     void(*MNNGetMatMulPackMode)(int* eP, int *lP, int* hP);
147     void(*MNNGetSparseMatMulPackMode)(int* eP, int *lP, int* hP);
148     void(*MNNPackC4ForMatMul_A)(float* destOrigin, float const** sourceGroup, const int32_t* info, const int32_t* el);
149     void(*MNNPackForMatMul_B)(float* dest, const float* source, size_t h, size_t l, bool transpose);
150     // parameters: e, l, h, CStride, AStride, BStride
151     void(*MNNPackedMatMul)(float* C, const float* A, const float* B, const size_t* parameter, const float* postParameters, const float* bias);
152     void(*MNNPackedMatMulRemain)(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias);
153     void(*MNNComputeMatMulForH_1)(const float* A, const float* B, float* C, const float* biasPtr, const MatMulParam* param, size_t tId);
154     void(*MNNComputeMatMulForE_1)(const float* A, const float* B, float* C, const float* biasPtr, const MatMulParam* param, size_t tId);
155 
156     // For Atomic Op
157     MNNBinaryExecute(*MNNSelectBinaryFunctionForFloat)(int opType);
158     MNNUnaryExecute(*MNNSelectUnaryFunctionForFloat)(int opType, int precisionMode);
159 
160     // sparse matrix multiply
161     void(*MNNPackForSparseMatMul_B)(float* dest, unsigned int* NNZMap, int* dataOffsetMap, int sparseBlockOC, const float* source, size_t h, size_t l, const int eP, bool transpose);
162 
163     // B matrix is sparsed
164     void(*MNNPackedSparseMatMulEpx1)(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias, unsigned int* NNZMap, int* dataOffsetMap);
165     void(*MNNPackedSparseMatMulEpx4)(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias, unsigned int* NNZMap, int* dataOffsetMap);
166 
167     /**Lowp Backend Setting*/
168     void(*MNNFp32ToLowp)(const float* src, int16_t* dst, size_t size);
169     void(*MNNLowpToFp32)(const int16_t* src, float* dst, size_t size);
170     int bytes; // Byte for float
171 
172     /**NC4HW4's Functions*/
173     int pack;
174     void(*MNNPackCUnit)(float* dst, const float* src, size_t area, size_t depth);
175     void(*MNNUnpackCUnit)(float* dst, const float* src, size_t area, size_t depth);
176     void(*MNNPackCUnitTranspose)(float* dst, const float* src, size_t area, size_t depth);
177     void(*MNNUnpackCUnitTranspose)(float* dst, const float* src, size_t area, size_t depth);
178 
179     void(*MNNConvRunForUnitDepthWise)(float* dst, const float* src, const float* weight, size_t fw, size_t fh,
180                                         size_t weight_y_step, size_t dilateX_step, size_t dilateY_step);
181     void(*MNNConvRunForLineDepthwise)(float* dst, const float* src, const float* weight, size_t width, size_t src_w_setup,
182                                     size_t fw, size_t fh, size_t dilateX_step, size_t dilateY_step, size_t height,
183                                     size_t srcHStep, size_t dstHStep);
184     void(*MNNAxByClampBroadcastUnit)(float* C, const float* A, const float* B, size_t width, size_t cStride, size_t aStride, size_t height, const float* parameters);
185     void(*MNNMultiAndDestTransformCommon23)(float **cacheLine, const float *weigth, float *dest, int cacheLineSize, int ow, const float* bias, const float* post);
186     void(*MNNSourceTransformCommonF23)(const float *source, float *dest, int unit, int iw, int pad, int su, int eu);
187     void(*MNNConvDwF23MulTransUnit)(float **cacheLine, const float *weigth, float *dest, size_t ow, const float* bias, const float* post);
188     void(*MNNMatrixAdd)(float* C, const float* A, const float* B, size_t widthC4, size_t cStride, size_t aStride,
189                       size_t bStride, size_t height);
190     void(*MNNMatrixSub)(float* C, const float* A, const float* B, size_t widthC4, size_t cStride, size_t aStride,
191                       size_t bStride, size_t height);
192     void(*MNNStrassenMergeCFunction)(float* c11, float* c12, float* c21, float* c22, float* xAddr, size_t cStride, size_t eSub, size_t hSub);
193     void(*MNNScaleAndAddBias)(float* dst, const float* src, const float* bias, const float* alpha, size_t planeNumber, size_t biasNumber);
194     float penalty;
195 
196     void(*MNNCopyC4WithStride)(const float* source, float* dest, size_t srcStride, size_t dstStride, size_t count);
197     void(*MNNAddC4WithStride)(const float* source, float* dest, size_t srcStride, size_t dstStride, size_t count);
198 
199     typedef void (*WinoTransFunc)(const float* srcBlock, float* dstStart, size_t srcStep, size_t dstStep);
200     WinoTransFunc(*chooseWinoSourceTransform)(int k, int w);
201     WinoTransFunc(*chooseWinoDestTransform)(int k, int h);
202 
203     void(*MNNDeconvRunForUnitDepthWise)(const float* dst, float* src, const float* weight, size_t fw, size_t fh,
204                                       size_t weight_y_step, size_t dilateX_step, size_t dilateY_step);
205     void(*MNNDeconvRunForLineDepthwise)(const float* dst, float* src, const float* weight, size_t width, size_t src_w_setup,
206                                       size_t fw, size_t fh, size_t dilateX_step, size_t dilateY_step);
207     void(*MNNReluWithSlopeChannel)(float* dst, const float* src, const float* slope, size_t sizeQuad, size_t depthQuad);
208     void(*MNNPoolingAvg)(const void* channelInput, int inputWidth, int inputHeight, void *channelOutput,
209                            int outputWidth, int outputHeight, int kernelWidth, int kernelHeight, int strideWidth,
210                            int strideHeight, int padWidth, int padHeight, int padType, int countType);
211     void(*MNNPoolingMax)(const void* channelInput, int inputWidth, int inputHeight, void *channelOutput,
212                            int outputWidth, int outputHeight, int kernelWidth, int kernelHeight, int strideWidth,
213                            int strideHeight, int padWidth, int padHeight, int padType, int countType);
214 
215 };
216 void MNNCoreFunctionInit();
217 CoreFunctions* MNNGetCoreFunctions();
218 };
219 
220 #endif /* CommonOptFunction_h */
221