1// 2// MNNConvRunForUnitDepthWise.S 3// MNN 4// 5// Created by MNN on 2019/02/04. 6// Copyright © 2018, Alibaba Group Holding Limited 7// 8 9#ifdef __aarch64__ 10 11#include "MNNAsmGlobal.h" 12 13.text 14.align 5 15 16asm_function MNNConvRunForUnitDepthWise 17//void MNNConvRunForUnitDepthWise(float* dst, const float* src, const float* weight, size_t fw, size_t fh, size_t weight_y_step, size_t dilate_x_step, size_t dilate_y_step) 18 19//Auto: x0:dst, x1:src, x2:weight, x3:fw 20//x4:fh, x5:weight_y_step, x6:dilate_x_step, x7:dilate_y_step 21 22cmp x3, #0 23movi v0.4s, #0 24beq UnitEnd 25cmp x4, #0 26beq UnitEnd 27 28mov x9, #4 29mul x5, x9, x5 30mul x6, x9, x6 31mul x7, x9, x7 32 33//dilate_y_step -> dilate_y_step - dilate_x_step*fw 34mul x9, x3, x6 35sub x7, x7, x9 36 37//weight_y_step -> weight_y_step - 4*sizeof(float)*fw 38mov x9, #16 39mul x9, x3, x9 40sub x5, x5, x9 41 42 43UnitLoopH: 44mov x9, x3 45UnitLoopW: 46ld1 {v1.4s}, [x1], x6 47ld1 {v2.4s}, [x2], #16 48fmla v0.4s, v1.4s, v2.4s 49subs x9, x9, #1 50bne UnitLoopW 51subs x4, x4, #1 52add x1, x1, x7 53add x2, x2, x5 54bne UnitLoopH 55 56 57UnitEnd: 58 59st1 {v0.4s}, [x0] 60 61ret 62 63#endif 64