1// 2// MNNExpC8.S 3// MNN 4// 5// Created by MNN on 2019/01/18. 6// Copyright © 2018, Alibaba Group Holding Limited 7// 8 9#ifdef __aarch64__ 10 11#include "MNNAsmGlobal.h" 12.text 13.align 5 14 15//void MNNExpC8(float* dest, const float* source, const float* parameters, size_t countC8) 16asm_function MNNExpC8 17 18//x0: dest, x1:source, x2:parameters, x3:countC8 19 20ld1 {v0.4s, v1.4s}, [x2] 21movi v2.4s, #23 22movi v3.4s, #87 23scvtf v3.4s, v3.4s 24fneg v4.4s, v3.4s 25 26Loop: 27 28ld1 {v16.4s, v17.4s}, [x1], #32 29 30fmin v16.4s, v16.4s, v3.4s 31fmin v17.4s, v17.4s, v3.4s 32fmax v16.4s, v16.4s, v4.4s 33fmax v17.4s, v17.4s, v4.4s 34 35fneg v18.4s, v16.4s 36fneg v19.4s, v17.4s 37 38fmul v16.4s, v18.4s, v0.s[1] 39fmul v17.4s, v19.4s, v0.s[1] 40fcvtzs v16.4s, v16.4s 41fcvtzs v17.4s, v17.4s 42scvtf v20.4s, v16.4s 43scvtf v21.4s, v17.4s 44 45//v18.4s, v19.4s: t 46fmls v18.4s, v20.4s, v0.s[0] 47fmls v19.4s, v21.4s, v0.s[0] 48 49.macro MLA_TWO z0 z1 z2 z3 50dup \z1, \z0 51fmla \z1, \z2, \z3 52.endm 53 54MLA_TWO v1.s[2], v20.4s, v18.4s, v1.s[3] 55MLA_TWO v1.s[2], v21.4s, v19.4s, v1.s[3] 56MLA_TWO v1.s[1], v22.4s, v18.4s, v20.4s 57MLA_TWO v1.s[1], v23.4s, v19.4s, v21.4s 58MLA_TWO v1.s[0], v20.4s, v18.4s, v22.4s 59MLA_TWO v1.s[0], v21.4s, v19.4s, v23.4s 60MLA_TWO v0.s[3], v22.4s, v18.4s, v20.4s 61MLA_TWO v0.s[3], v23.4s, v19.4s, v21.4s 62MLA_TWO v0.s[2], v20.4s, v18.4s, v22.4s 63MLA_TWO v0.s[2], v21.4s, v19.4s, v23.4s 64 65//v20.4s, v21.4s is expRemain 66 67ushl v16.4s, v16.4s, v2.4s 68ushl v17.4s, v17.4s, v2.4s 69add v20.4s, v20.4s, v16.4s 70add v21.4s, v21.4s, v17.4s 71 72st1 {v20.4s, v21.4s}, [x0], #32 73 74subs x3, x3, #1 75bne Loop 76 77ret 78 79#endif 80 81