1//
2//  MNNExpC8.S
3//  MNN
4//
5//  Created by MNN on 2019/01/18.
6//  Copyright © 2018, Alibaba Group Holding Limited
7//
8
9#ifdef __aarch64__
10
11#include "MNNAsmGlobal.h"
12.text
13.align 5
14
15//void MNNExpC8(float* dest, const float* source, const float* parameters, size_t countC8)
16asm_function MNNExpC8
17
18//x0: dest, x1:source, x2:parameters, x3:countC8
19
20ld1 {v0.4s, v1.4s}, [x2]
21movi v2.4s, #23
22movi v3.4s, #87
23scvtf v3.4s, v3.4s
24fneg v4.4s, v3.4s
25
26Loop:
27
28ld1 {v16.4s, v17.4s}, [x1], #32
29
30fmin v16.4s, v16.4s, v3.4s
31fmin v17.4s, v17.4s, v3.4s
32fmax v16.4s, v16.4s, v4.4s
33fmax v17.4s, v17.4s, v4.4s
34
35fneg v18.4s, v16.4s
36fneg v19.4s, v17.4s
37
38fmul v16.4s, v18.4s, v0.s[1]
39fmul v17.4s, v19.4s, v0.s[1]
40fcvtzs v16.4s, v16.4s
41fcvtzs v17.4s, v17.4s
42scvtf v20.4s, v16.4s
43scvtf v21.4s, v17.4s
44
45//v18.4s, v19.4s: t
46fmls v18.4s, v20.4s, v0.s[0]
47fmls v19.4s, v21.4s, v0.s[0]
48
49.macro MLA_TWO z0 z1 z2 z3
50dup \z1, \z0
51fmla \z1, \z2, \z3
52.endm
53
54MLA_TWO v1.s[2], v20.4s, v18.4s, v1.s[3]
55MLA_TWO v1.s[2], v21.4s, v19.4s, v1.s[3]
56MLA_TWO v1.s[1], v22.4s, v18.4s, v20.4s
57MLA_TWO v1.s[1], v23.4s, v19.4s, v21.4s
58MLA_TWO v1.s[0], v20.4s, v18.4s, v22.4s
59MLA_TWO v1.s[0], v21.4s, v19.4s, v23.4s
60MLA_TWO v0.s[3], v22.4s, v18.4s, v20.4s
61MLA_TWO v0.s[3], v23.4s, v19.4s, v21.4s
62MLA_TWO v0.s[2], v20.4s, v18.4s, v22.4s
63MLA_TWO v0.s[2], v21.4s, v19.4s, v23.4s
64
65//v20.4s, v21.4s is expRemain
66
67ushl v16.4s, v16.4s, v2.4s
68ushl v17.4s, v17.4s, v2.4s
69add v20.4s, v20.4s, v16.4s
70add v21.4s, v21.4s, v17.4s
71
72st1 {v20.4s, v21.4s}, [x0], #32
73
74subs x3, x3, #1
75bne Loop
76
77ret
78
79#endif
80
81