1/* 2 * ARM NEON optimised Format Conversion Utils 3 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> 4 * Copyright (c) 2015 Janne Grunau <janne-libav@jannau.net>b 5 * 6 * This file is part of FFmpeg. 7 * 8 * FFmpeg is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * FFmpeg is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with FFmpeg; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 */ 22 23#include "config.h" 24#include "libavutil/arm/asm.S" 25 26function ff_int32_to_float_fmul_scalar_neon, export=1 27VFP vdup.32 q0, d0[0] 28VFP len .req r2 29NOVFP vdup.32 q0, r2 30NOVFP len .req r3 31 32 vld1.32 {q1},[r1,:128]! 33 vcvt.f32.s32 q3, q1 34 vld1.32 {q2},[r1,:128]! 35 vcvt.f32.s32 q8, q2 361: subs len, len, #8 37 pld [r1, #16] 38 vmul.f32 q9, q3, q0 39 vmul.f32 q10, q8, q0 40 beq 2f 41 vld1.32 {q1},[r1,:128]! 42 vcvt.f32.s32 q3, q1 43 vld1.32 {q2},[r1,:128]! 44 vcvt.f32.s32 q8, q2 45 vst1.32 {q9}, [r0,:128]! 46 vst1.32 {q10},[r0,:128]! 47 b 1b 482: vst1.32 {q9}, [r0,:128]! 49 vst1.32 {q10},[r0,:128]! 50 bx lr 51 .unreq len 52endfunc 53 54function ff_int32_to_float_fmul_array8_neon, export=1 55 ldr r0, [sp] 56 lsr r0, r0, #3 57 subs r0, r0, #1 58 beq 1f 592: 60 vld1.32 {q0-q1}, [r2,:128]! 61 vld1.32 {q2-q3}, [r2,:128]! 62 vld1.32 {d20}, [r3]! 63 subs r0, r0, #2 64 vcvt.f32.s32 q0, q0 65 vcvt.f32.s32 q1, q1 66 vdup.32 q8, d20[0] 67 vcvt.f32.s32 q2, q2 68 vcvt.f32.s32 q3, q3 69 vmul.f32 q0, q0, q8 70 vdup.32 q9, d20[1] 71 vmul.f32 q1, q1, q8 72 vmul.f32 q2, q2, q9 73 vmul.f32 q3, q3, q9 74 vst1.32 {q0-q1}, [r1,:128]! 75 vst1.32 {q2-q3}, [r1,:128]! 76 bgt 2b 77 it lt 78 bxlt lr 791: 80 vld1.32 {q0-q1}, [r2,:128] 81 vld1.32 {d16[],d17[]}, [r3] 82 vcvt.f32.s32 q0, q0 83 vcvt.f32.s32 q1, q1 84 vmul.f32 q0, q0, q8 85 vmul.f32 q1, q1, q8 86 vst1.32 {q0-q1}, [r1,:128] 87 bx lr 88endfunc 89