1// 2// MNNAxByClampBroadcastUnit.S 3// MNN 4// 5// Created by MNN on 2020/06/20. 6// Copyright © 2018, Alibaba Group Holding Limited 7// 8 9#ifdef __arm__ 10#ifndef __aarch64__ 11 12#include "MNNAsmGlobal.h" 13 14.text 15.align 5 16 17asm_function MNNAxByClampBroadcastUnit 18//void MNNAxByClampBroadcastUnit(float* C, const float* A, const float* B, size_t width, size_t cStride, size_t aStride, size_t height, const float* parameters) 19//Auto: r0: C, r1:A, r2:B, r3:width 20//r4:cStride, r5:aStride, r6:height, r7:parameters 21push {r4-r11, lr} 22ldr r4, [sp, #36] 23ldr r5, [sp, #40] 24ldr r6, [sp, #44] 25ldr r7, [sp, #48] 26 27 28vld1.32 {q3}, [r7] 29vdup.f32 q14, d7[0] 30vdup.f32 q15, d7[1] 31mov r12, #4 //sizeof(float) 32mul r4, r12, r4 33mul r5, r12, r5 34 35LoopY: 36mov r8, r0 37mov r9, r1 38vld1.32 {q13}, [r2]! 39 40mov r11, r3 41 42L1: 43cmp r11, #0 44beq EndLine 45 46L1Loop: 47vld1.32 {q0}, [r1]! 48vmla.f32 q0, q13, d6[1] 49vmax.f32 q0, q0, q14 50vmin.f32 q0, q0, q15 51vst1.32 {q0}, [r0]! 52subs r11, r11, #1 53bne L1Loop 54 55EndLine: 56add r0, r8, r4 57add r1, r9, r5 58 59subs r6, r6, #1 60bne LoopY 61 62pop {r4-r11, pc} 63 64#endif 65#endif 66