1 // 2 // FunctionSummary.hpp 3 // MNN 4 // 5 // Created by MNN on 2019/08/25. 6 // Copyright © 2018, Alibaba Group Holding Limited 7 // 8 9 #if defined(_MSC_VER) 10 #include <intrin.h> 11 #else 12 #include <x86intrin.h> 13 #endif 14 #include <MNN/MNNDefine.h> 15 #include <stdint.h> 16 17 #ifndef _MM_TRANSPOSE4_PS 18 #define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ 19 do { \ 20 __m128 tmp3, tmp2, tmp1, tmp0; \ 21 tmp0 = _mm_unpacklo_ps((row0), (row1)); \ 22 tmp2 = _mm_unpacklo_ps((row2), (row3)); \ 23 tmp1 = _mm_unpackhi_ps((row0), (row1)); \ 24 tmp3 = _mm_unpackhi_ps((row2), (row3)); \ 25 (row0) = _mm_movelh_ps(tmp0, tmp2); \ 26 (row1) = _mm_movehl_ps(tmp2, tmp0); \ 27 (row2) = _mm_movelh_ps(tmp1, tmp3); \ 28 (row3) = _mm_movehl_ps(tmp3, tmp1); \ 29 } while (0) 30 #endif 31 #include "backend/cpu/compute/Int8FunctionsOpt.h" 32 #include "backend/cpu/compute/CommonOptFunction.h" 33 34 // ========= CommonOptFunction.cpp =========== 35 extern "C" { 36 37 void _AVX_MNNAxByClampBroadcastUnit(float* C, const float* A, const float* B, size_t width, size_t cStride, size_t aStride, size_t height, const float* parameters); 38 void _AVX_MNNGemmFloatCommon_4(float* dst, const float* src, const float* weight, size_t src_depth_quad, 39 size_t dst_step, size_t dst_depth_quad, size_t width, size_t weight_depth_offset); 40 void _AVX_MNNGemmFloatUnit_4(float* dstOrigin, const float* src, const float* weight, size_t src_depth_quad, 41 size_t dst_step, size_t dst_depth_quad, size_t weight_depth_offset); 42 void _AVX_MNNMatrixAdd(float* C, const float* A, const float* B, size_t widthC4, size_t cStride, size_t aStride, 43 size_t bStride, size_t height); 44 void _AVX_MNNMatrixSub(float* C, const float* A, const float* B, size_t widthC4, size_t cStride, size_t aStride, 45 size_t bStride, size_t height); 46 void _AVX_MNNStrassenMergeCFunction(float* c11, float* c12, float* c21, float* c22, float* xAddr, size_t cStride, 47 size_t length, size_t hSub); 48 49 void _AVX_MNNPackedMatMul(float* C, const float* A, const float* B, const size_t* parameter, 50 const float* postParameters, const float* bias); 51 void _AVX_MNNPackedMatMulRemain(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, 52 const float* postParameters, const float* bias); 53 void _AVX_MNNPackC4ForMatMul_A(float* destOrigin, float const** sourceGroup, const int32_t* info, const int32_t* el); 54 55 void _AVX_MNNConvRunForLineDepthwise(float* dst, const float* src, const float* weight, size_t width, size_t src_w_setup, 56 size_t fw, size_t fh, size_t dilateX_step, size_t dilateY_step, size_t height, 57 size_t srcHStep, size_t dstHStep); 58 void _AVX_MNNGemmInt8AddBiasScale_16x4_Unit(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t realDst); 59 void _AVX_MNNGemmInt8AddBiasScale_16x4_Unit_Fast(int8_t* dst, const int8_t* src, const int8_t* weight, size_t src_depth_quad, size_t dst_step, size_t dst_depth_quad, const QuanPostTreatParameters* post, size_t realDst); 60 61 void _AVX_MNNExpC8(float* dest, const float* source, const float* parameters, size_t countC8); 62 void _AVX_MNNFloat2Int8(const float* src, int8_t* dst, size_t sizeQuad, const float* scalep, ssize_t minV, ssize_t maxV, ssize_t zeroPoint); 63 void _AVX_MNNInt8ScaleToFloat(float* dst, const int8_t* src, const float* scale, size_t sizeQuad, ssize_t zeroPoint); 64 void _AVX_MNNLineDepthWiseInt8AddBiasScaleUnit(int8_t* dstO, const int8_t* srcO, const int8_t* weightO, const QuanPostTreatParameters* parameters, size_t width, size_t src_w_step, size_t fw, size_t fh, size_t dilateX_step, size_t dilateY_step); 65 void _AVX_MNNComputeMatMulForE_1(const float* A, const float* B, float* C, const float* biasPtr, const MatMulParam* param, size_t tId); 66 67 void _AVX_MNNPackC4ForMatMul_A_BF16(float* destOrigin, float const** sourceGroup, const int32_t* info, const int32_t* el); 68 69 void _AVX_MNNGetMatMulPackMode_BF16(int* eP, int *lP, int* hP); 70 void _AVX_MNNPackForMatMul_B_BF16(float* dest, const float* source, size_t h, size_t l, bool transpose); 71 void _AVX_MNNPackedSparseMatMul(float* C, const float* A, const float* B, unsigned int* NNZMap, int* dataOffsetMap, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias); 72 void _AVX_MNNReluWithSlopeChannel(float* dst, const float* src, const float* slope, size_t sizeQuad, size_t depthQuad); 73 void _AVX_MNNComputeMatMulForH_1(const float* A, const float* B, float* C, const float* biasPtr, const MatMulParam* param, size_t tId); 74 75 void _AVX_MNNPackCUnit(float* dst, const float* src, size_t area, size_t depth); 76 void _AVX_MNNUnpackCUnit(float* dst, const float* src, size_t area, size_t depth); 77 void _AVX_MNNPackCUnitTranspose(float* dst, const float* src, size_t area, size_t depth); 78 void _AVX_MNNUnpackCUnitTranspose(float* dst, const float* src, size_t area, size_t depth); 79 void _AVX_MNNPackForMatMul_B(float* dest, const float* source, size_t h, size_t l, bool transpose); 80 void _AVX_MNNStrassenMergeCFunction(float* c11, float* c12, float* c21, float* c22, float* xAddr, size_t cStride, size_t eSub, size_t hSub); 81 void _AVX_MNNConvRunForUnitDepthWise(float* dst, const float* src, const float* weight, size_t fw, size_t fh, 82 size_t weight_y_step, size_t dilateX_step, size_t dilateY_step); 83 void _AVX_MNNMultiAndDestTransformCommon23(float **cacheLine, const float *weigth, float *dest, int cacheLineSize, int ow, const float* bias, const float* parameter); 84 void _AVX_MNNSourceTransformCommonF23(const float *source, float *dest, int unit, int iw, int pad, int su, int eu); 85 void _AVX_MNNConvDwF23MulTransUnit(float **cacheLine, const float *weigth, float *dest, size_t ow, const float* bias, const float* parameter); 86 87 void _AVX_ExtraInit(void* functions); 88 void _AVX_WinogradInit(void* functions); 89 void _AVX_MNNCopyC4WithStride(const float* source, float* dest, size_t srcStride, size_t dstStride, size_t count); 90 void _AVX_MNNAddC4WithStride(const float* source, float* dest, size_t srcStride, size_t dstStride, size_t count); 91 void _AVX_MNNScaleAndAddBias(float* dst, const float* src, const float* bias, const float* alpha, size_t planeNumber, size_t biasNumber); 92 void _AVX_MNNDeconvRunForUnitDepthWise(const float* dst, float* src, const float* weight, size_t fw, size_t fh, 93 size_t weight_y_step, size_t dilateX_step, size_t dilateY_step); 94 void _AVX_MNNDeconvRunForLineDepthwise(const float* dst, float* src, const float* weight, size_t width, size_t src_w_setup, 95 size_t fw, size_t fh, size_t dilateX_step, size_t dilateY_step); 96 97 void _AVX_MNNGelu(float *dst, const float *src, size_t size); 98 } 99