1 // 2 // CommonOptFunction.h 3 // MNN 4 // 5 // Created by MNN on 2018/07/16. 6 // Copyright © 2018, Alibaba Group Holding Limited 7 // 8 9 #ifndef CommonOptFunction_h 10 #define CommonOptFunction_h 11 12 #include <stdint.h> 13 #include <stdio.h> 14 #include <string.h> 15 16 #include "core/Macro.h" 17 18 extern "C" { 19 20 void MNNReluWithSlope(float* dst, const float* src, size_t sizeQuad, float slope); 21 22 void MNNReluInt8(int8_t* dst, const int8_t* src, size_t size); 23 24 void MNNReluWithSlopeChannel(float* dst, const float* src, const float* slope, size_t sizeQuad, size_t depthQuad); 25 26 void MNNHardSwish(float* dst, const float* src, size_t size); 27 28 void MNNGelu(float* dst, const float* src, size_t size); 29 30 void MNNPackC4(float* dst, const float* src, size_t area, size_t depth); 31 32 void MNNPackC4Int16(int16_t* dst, const int16_t* src, size_t area, size_t depth); 33 34 void MNNPackC4Uint8(uint8_t* dst, const uint8_t* src, size_t area, size_t depth); 35 36 void MNNUnpackC4(float* dst, const float* src, size_t area, size_t depth); 37 38 void MNNUnpackC4Int16(int16_t* dst, const int16_t* src, size_t area, size_t depth); 39 40 void MNNUnpackC4Uint8(uint8_t* dst, const uint8_t* src, size_t area, size_t depth); 41 42 void MNNScaleAndAddBias(float* dst, const float* src, const float* bias, const float* alpha, size_t planeNumber, 43 size_t biasNumber); 44 void MNNScaleAndAddBiasScalar(float* dst, const float* src, float bias, float alpha, size_t number); 45 46 void MNNUnpackTranspose(float* dst, const float* src, size_t area, size_t depth); 47 void MNNUnpackTransposeInt16(int16_t* dst, const int16_t* src, size_t area, size_t depth); 48 void MNNUnpackTransposeUint8(uint8_t* dst, const uint8_t* src, size_t area, size_t depth); 49 50 void MNNPackTranspose(float* dst, const float* src, size_t area, size_t depth); 51 void MNNPackTransposeInt16(int16_t* dst, const int16_t* src, size_t area, size_t depth); 52 void MNNPackTransposeUint8(uint8_t* dst, const uint8_t* src, size_t area, size_t depth); 53 54 void MNNCopyC4WithStride(const float* source, float* dest, size_t srcStride, size_t dstStride, size_t count); 55 void MNNAddC4WithStride(const float* source, float* dest, size_t srcStride, size_t dstStride, size_t count); 56 57 void MNNUInt8ToInt16WithOffsetC4Common(int16_t* dst, const uint8_t* src, size_t zeroPoint, size_t sizeQuad, 58 size_t dstStride, size_t srcStride); 59 void MNNUInt8ToInt16WithOffsetC4Fast(int16_t* dst, const uint8_t* src, size_t zeroPoint, size_t sizeQuad, 60 size_t depthQuad, size_t dstZStep, size_t srcZStep); 61 void MNNMaxFloat(float* input, float* maxBuffer, int32_t inputCountUnit); 62 void MNNMinFloat(float* input, float* maxBuffer, int32_t inputCountUnit); 63 void MNNExpC8(float* dest, const float* source, const float* parameters, size_t countC8); 64 void MNNPowC8(float* dest, const float* source, const float* powfParam, size_t betaInt, size_t countC8); 65 66 void MNNExp(float* dst, const float* src, size_t dataSize); 67 void MNNSin(float* dst, const float* src, size_t dataSize); 68 void MNNTanh(float* dst, const float* src, size_t dataSize); 69 void MNNSigmoid(float* dst, const float* src, size_t dataSize); 70 void MNNSigmoidLowp(float* dst, const float* src, size_t dataSize); 71 void MNNReluWithSlopeCommon(float* dst, const float* src, size_t size, float slope); 72 void MNNHardSwishCommon(float* dst, const float* src, size_t size); 73 void MNNGeluCommon(float* dst, const float* src, size_t size); 74 75 // Get Pack for MatMul's e , l , h , the pack number must be 1 or 4 * n 76 void MNNGetMatMulPackMode(int* eP, int *lP, int* hP); 77 78 void MNNGetSparseMatMulPackMode(int* eP, int *lP, int* hP); 79 80 /** 81 int number = info[0]; 82 int eSrcStride = info[1]; 83 int eDstStride = info[2]; 84 int xStride = info[3]; 85 86 el: number * 4 87 0: e 88 1: l 89 2: e-offset 90 3: l-offset 91 */ 92 void MNNPackC4ForMatMul_A(float* destOrigin, float const** sourceGroup, const int32_t* info, const int32_t* el); 93 94 void MNNPackForMatMul_B(float* dest, const float* source, size_t h, size_t l, bool transpose); 95 96 // parameters: e, l, h, CStride, AStride, BStride 97 void MNNPackedMatMul(float* C, const float* A, const float* B, const size_t* parameter, const float* postParameters, const float* bias); 98 void MNNFunctionInit(); 99 void MNNPackedMatMulRemain(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias); 100 101 void MNNPackForSparseMatMul_B(float* dest, unsigned int* NNZMap, int* dataOffsetMap, int sparseBlockOC, const float* source, size_t h, size_t l, const int eP, bool transpose); 102 void MNNPackedSparseMatMulEpx1(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias, unsigned int* NNZMap, int* dataOffsetMap); 103 104 void MNNPackedSparseMatMulEpx4(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias, unsigned int* NNZMap, int* dataOffsetMap); 105 106 107 int MNNGetC4DivNumber(int hP); 108 109 // C = clamp(alpha * A + beta * B, min, max) 110 // paramters: alpha, beta, min, max 111 void MNNAxByClamp(float* C, const float* A, const float* B, size_t width, size_t cStride, size_t aStride, size_t bStride, size_t height, const float* parameters); 112 113 void MNNAxByClampBroadcastUnit(float* C, const float* A, const float* B, size_t width, size_t cStride, size_t aStride, size_t height, const float* parameters); 114 115 // dim: 4-element, sizeDW, sizeDH, strideSW, strideDH 116 void MNNTranspose32Bit(int32_t* dstO, const int32_t* srcO, int32_t* dim); // not C4 117 118 void MNNVectorTop1Float(float* input, float* maxValue, int32_t* maxIndex, size_t inputCountUnit); 119 void MNNVectorTop1Int32(int32_t* input, int32_t* maxValue, int32_t* maxIndex, size_t inputCountUnit); 120 struct MatMulParam { 121 int32_t e; 122 int32_t l; 123 int32_t h; 124 int32_t numberThread; 125 bool ATranspose; 126 bool BTranspose; 127 }; 128 void MNNComputeMatMulForE_1(const float* A, const float* B, float* C, const float* biasPtr, const MatMulParam* param, size_t tId); 129 130 void MNNCopyC4Int16WithStride(const float* sourceF, float* destF, size_t srcStride, size_t dstStride, size_t count); 131 void MNNSourceTransformCommonF23(const float *source, float *dest, int unit, int iw, int pad, int su, int eu); 132 void MNNConvDwF23MulTransUnit(float **cacheLine, const float *weigth, float *dest, size_t ow, const float* bias, const float* postParameter); 133 void MNNMultiAndDestTransformCommon23(float **cacheLine, const float *weigth, float *dest, int cacheLineSize, int ow); 134 void MNNInt8ToInt16(int16_t* dest, const int8_t* source, size_t count); 135 } 136 137 typedef void(*MNNBinaryExecute)(void* outputRaw, const void* inputRaw0, const void* inputRaw1, int elementSize, int broadcastIndex); 138 typedef void(*MNNUnaryExecute)(void* outputRaw, const void* inputRaw, int elementSize); 139 140 namespace MNN { 141 struct CoreFunctions { 142 // cpu feature 143 bool supportFp16arith = false; 144 bool supportSDot = false; 145 /**MatMul Pack and Functions*/ 146 void(*MNNGetMatMulPackMode)(int* eP, int *lP, int* hP); 147 void(*MNNGetSparseMatMulPackMode)(int* eP, int *lP, int* hP); 148 void(*MNNPackC4ForMatMul_A)(float* destOrigin, float const** sourceGroup, const int32_t* info, const int32_t* el); 149 void(*MNNPackForMatMul_B)(float* dest, const float* source, size_t h, size_t l, bool transpose); 150 // parameters: e, l, h, CStride, AStride, BStride 151 void(*MNNPackedMatMul)(float* C, const float* A, const float* B, const size_t* parameter, const float* postParameters, const float* bias); 152 void(*MNNPackedMatMulRemain)(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias); 153 void(*MNNComputeMatMulForH_1)(const float* A, const float* B, float* C, const float* biasPtr, const MatMulParam* param, size_t tId); 154 void(*MNNComputeMatMulForE_1)(const float* A, const float* B, float* C, const float* biasPtr, const MatMulParam* param, size_t tId); 155 156 // For Atomic Op 157 MNNBinaryExecute(*MNNSelectBinaryFunctionForFloat)(int opType); 158 MNNUnaryExecute(*MNNSelectUnaryFunctionForFloat)(int opType, int precisionMode); 159 160 // sparse matrix multiply 161 void(*MNNPackForSparseMatMul_B)(float* dest, unsigned int* NNZMap, int* dataOffsetMap, int sparseBlockOC, const float* source, size_t h, size_t l, const int eP, bool transpose); 162 163 // B matrix is sparsed 164 void(*MNNPackedSparseMatMulEpx1)(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias, unsigned int* NNZMap, int* dataOffsetMap); 165 void(*MNNPackedSparseMatMulEpx4)(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias, unsigned int* NNZMap, int* dataOffsetMap); 166 167 /**Lowp Backend Setting*/ 168 void(*MNNFp32ToLowp)(const float* src, int16_t* dst, size_t size); 169 void(*MNNLowpToFp32)(const int16_t* src, float* dst, size_t size); 170 int bytes; // Byte for float 171 172 /**NC4HW4's Functions*/ 173 int pack; 174 void(*MNNPackCUnit)(float* dst, const float* src, size_t area, size_t depth); 175 void(*MNNUnpackCUnit)(float* dst, const float* src, size_t area, size_t depth); 176 void(*MNNPackCUnitTranspose)(float* dst, const float* src, size_t area, size_t depth); 177 void(*MNNUnpackCUnitTranspose)(float* dst, const float* src, size_t area, size_t depth); 178 179 void(*MNNConvRunForUnitDepthWise)(float* dst, const float* src, const float* weight, size_t fw, size_t fh, 180 size_t weight_y_step, size_t dilateX_step, size_t dilateY_step); 181 void(*MNNConvRunForLineDepthwise)(float* dst, const float* src, const float* weight, size_t width, size_t src_w_setup, 182 size_t fw, size_t fh, size_t dilateX_step, size_t dilateY_step, size_t height, 183 size_t srcHStep, size_t dstHStep); 184 void(*MNNAxByClampBroadcastUnit)(float* C, const float* A, const float* B, size_t width, size_t cStride, size_t aStride, size_t height, const float* parameters); 185 void(*MNNMultiAndDestTransformCommon23)(float **cacheLine, const float *weigth, float *dest, int cacheLineSize, int ow, const float* bias, const float* post); 186 void(*MNNSourceTransformCommonF23)(const float *source, float *dest, int unit, int iw, int pad, int su, int eu); 187 void(*MNNConvDwF23MulTransUnit)(float **cacheLine, const float *weigth, float *dest, size_t ow, const float* bias, const float* post); 188 void(*MNNMatrixAdd)(float* C, const float* A, const float* B, size_t widthC4, size_t cStride, size_t aStride, 189 size_t bStride, size_t height); 190 void(*MNNMatrixSub)(float* C, const float* A, const float* B, size_t widthC4, size_t cStride, size_t aStride, 191 size_t bStride, size_t height); 192 void(*MNNStrassenMergeCFunction)(float* c11, float* c12, float* c21, float* c22, float* xAddr, size_t cStride, size_t eSub, size_t hSub); 193 void(*MNNScaleAndAddBias)(float* dst, const float* src, const float* bias, const float* alpha, size_t planeNumber, size_t biasNumber); 194 float penalty; 195 196 void(*MNNCopyC4WithStride)(const float* source, float* dest, size_t srcStride, size_t dstStride, size_t count); 197 void(*MNNAddC4WithStride)(const float* source, float* dest, size_t srcStride, size_t dstStride, size_t count); 198 199 typedef void (*WinoTransFunc)(const float* srcBlock, float* dstStart, size_t srcStep, size_t dstStep); 200 WinoTransFunc(*chooseWinoSourceTransform)(int k, int w); 201 WinoTransFunc(*chooseWinoDestTransform)(int k, int h); 202 203 void(*MNNDeconvRunForUnitDepthWise)(const float* dst, float* src, const float* weight, size_t fw, size_t fh, 204 size_t weight_y_step, size_t dilateX_step, size_t dilateY_step); 205 void(*MNNDeconvRunForLineDepthwise)(const float* dst, float* src, const float* weight, size_t width, size_t src_w_setup, 206 size_t fw, size_t fh, size_t dilateX_step, size_t dilateY_step); 207 void(*MNNReluWithSlopeChannel)(float* dst, const float* src, const float* slope, size_t sizeQuad, size_t depthQuad); 208 void(*MNNPoolingAvg)(const void* channelInput, int inputWidth, int inputHeight, void *channelOutput, 209 int outputWidth, int outputHeight, int kernelWidth, int kernelHeight, int strideWidth, 210 int strideHeight, int padWidth, int padHeight, int padType, int countType); 211 void(*MNNPoolingMax)(const void* channelInput, int inputWidth, int inputHeight, void *channelOutput, 212 int outputWidth, int outputHeight, int kernelWidth, int kernelHeight, int strideWidth, 213 int strideHeight, int padWidth, int padHeight, int padType, int countType); 214 215 }; 216 void MNNCoreFunctionInit(); 217 CoreFunctions* MNNGetCoreFunctions(); 218 }; 219 220 #endif /* CommonOptFunction_h */ 221