1//
2//  MNNAxByClampBroadcastUnit.S
3//  MNN
4//
5//  Created by MNN on 2020/06/20.
6//  Copyright © 2018, Alibaba Group Holding Limited
7//
8
9#ifdef __arm__
10#ifndef __aarch64__
11
12#include "MNNAsmGlobal.h"
13
14.text
15.align 5
16
17asm_function MNNAxByClampBroadcastUnit
18//void MNNAxByClampBroadcastUnit(float* C, const float* A, const float* B, size_t width, size_t cStride, size_t aStride, size_t height, const float* parameters)
19//Auto: r0: C, r1:A, r2:B, r3:width
20//r4:cStride, r5:aStride, r6:height, r7:parameters
21push {r4-r11, lr}
22ldr r4, [sp, #36]
23ldr r5, [sp, #40]
24ldr r6, [sp, #44]
25ldr r7, [sp, #48]
26
27
28vld1.32 {q3}, [r7]
29vdup.f32 q14, d7[0]
30vdup.f32 q15, d7[1]
31mov r12, #4 //sizeof(float)
32mul r4, r12, r4
33mul r5, r12, r5
34
35LoopY:
36mov r8, r0
37mov r9, r1
38vld1.32 {q13}, [r2]!
39
40mov r11, r3
41
42L1:
43cmp r11, #0
44beq EndLine
45
46L1Loop:
47vld1.32 {q0}, [r1]!
48vmla.f32 q0, q13, d6[1]
49vmax.f32 q0, q0, q14
50vmin.f32 q0, q0, q15
51vst1.32 {q0}, [r0]!
52subs r11, r11, #1
53bne L1Loop
54
55EndLine:
56add r0, r8, r4
57add r1, r9, r5
58
59subs r6, r6, #1
60bne LoopY
61
62pop {r4-r11, pc}
63
64#endif
65#endif
66