1/***************************************************************************** 2 * asm.S: arm utility macros 3 ***************************************************************************** 4 * Copyright (C) 2013-2020 MulticoreWare, Inc 5 * 6 * Authors: Mans Rullgard <mans@mansr.com> 7 * David Conrad <lessen42@gmail.com> 8 * Dnyaneshwar Gorade <dnyaneshwar@multicorewareinc.com> 9 * 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of the GNU General Public License as published by 12 * the Free Software Foundation; either version 2 of the License, or 13 * (at your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public License 21 * along with this program; if not, write to the Free Software 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. 23 * 24 * This program is also available under a commercial proprietary license. 25 * For more information, contact us at license @ x265.com. 26 *****************************************************************************/ 27 28.syntax unified 29 30#if HAVE_NEON 31 .arch armv7-a 32#elif HAVE_ARMV6T2 33 .arch armv6t2 34#elif HAVE_ARMV6 35 .arch armv6 36#endif 37 38.fpu neon 39 40#ifdef PREFIX 41# define EXTERN_ASM _ 42#else 43# define EXTERN_ASM 44#endif 45 46#ifdef __ELF__ 47# define ELF 48#else 49# define ELF @ 50#endif 51 52#if HAVE_AS_FUNC 53# define FUNC 54#else 55# define FUNC @ 56#endif 57 58.macro require8, val=1 59ELF .eabi_attribute 24, \val 60.endm 61 62.macro preserve8, val=1 63ELF .eabi_attribute 25, \val 64.endm 65 66.macro function name, export=1 67 .macro endfunc 68ELF .size \name, . - \name 69FUNC .endfunc 70 .purgem endfunc 71 .endm 72 .align 2 73.if \export == 1 74 .global EXTERN_ASM\name 75ELF .hidden EXTERN_ASM\name 76ELF .type EXTERN_ASM\name, %function 77FUNC .func EXTERN_ASM\name 78EXTERN_ASM\name: 79.else 80ELF .hidden \name 81ELF .type \name, %function 82FUNC .func \name 83\name: 84.endif 85.endm 86 87.macro movrel rd, val 88#if HAVE_ARMV6T2 && !defined(PIC) 89 movw \rd, #:lower16:\val 90 movt \rd, #:upper16:\val 91#else 92 ldr \rd, =\val 93#endif 94.endm 95 96.macro movconst rd, val 97#if HAVE_ARMV6T2 98 movw \rd, #:lower16:\val 99.if \val >> 16 100 movt \rd, #:upper16:\val 101.endif 102#else 103 ldr \rd, =\val 104#endif 105.endm 106 107#define GLUE(a, b) a ## b 108#define JOIN(a, b) GLUE(a, b) 109#define X(s) JOIN(EXTERN_ASM, s) 110 111#define FENC_STRIDE 64 112#define FDEC_STRIDE 32 113 114.macro HORIZ_ADD dest, a, b 115.ifnb \b 116 vadd.u16 \a, \a, \b 117.endif 118 vpaddl.u16 \a, \a 119 vpaddl.u32 \dest, \a 120.endm 121 122.macro SUMSUB_AB sum, diff, a, b 123 vadd.s16 \sum, \a, \b 124 vsub.s16 \diff, \a, \b 125.endm 126 127.macro SUMSUB_ABCD s1, d1, s2, d2, a, b, c, d 128 SUMSUB_AB \s1, \d1, \a, \b 129 SUMSUB_AB \s2, \d2, \c, \d 130.endm 131 132.macro ABS2 a b 133 vabs.s16 \a, \a 134 vabs.s16 \b, \b 135.endm 136 137// dist = distance in elements (0 for vertical pass, 1/2 for horizontal passes) 138// op = sumsub/amax (sum and diff / maximum of absolutes) 139// d1/2 = destination registers 140// s1/2 = source registers 141.macro HADAMARD dist, op, d1, d2, s1, s2 142.if \dist == 1 143 vtrn.16 \s1, \s2 144.else 145 vtrn.32 \s1, \s2 146.endif 147.ifc \op, sumsub 148 SUMSUB_AB \d1, \d2, \s1, \s2 149.else 150 vabs.s16 \s1, \s1 151 vabs.s16 \s2, \s2 152 vmax.s16 \d1, \s1, \s2 153.endif 154.endm 155 156.macro TRANSPOSE8x8 r0 r1 r2 r3 r4 r5 r6 r7 157 vtrn.32 \r0, \r4 158 vtrn.32 \r1, \r5 159 vtrn.32 \r2, \r6 160 vtrn.32 \r3, \r7 161 vtrn.16 \r0, \r2 162 vtrn.16 \r1, \r3 163 vtrn.16 \r4, \r6 164 vtrn.16 \r5, \r7 165 vtrn.8 \r0, \r1 166 vtrn.8 \r2, \r3 167 vtrn.8 \r4, \r5 168 vtrn.8 \r6, \r7 169.endm 170 171.macro TRANSPOSE4x4 r0 r1 r2 r3 172 vtrn.16 \r0, \r2 173 vtrn.16 \r1, \r3 174 vtrn.8 \r0, \r1 175 vtrn.8 \r2, \r3 176.endm 177 178.macro TRANSPOSE4x4_16 r0, r1, r2, r3 179 vtrn.32 \r0, \r2 // r0 = [21 20 01 00], r2 = [23 22 03 02] 180 vtrn.32 \r1, \r3 // r1 = [31 30 11 10], r3 = [33 32 13 12] 181 vtrn.16 \r0, \r1 // r0 = [30 20 10 00], r1 = [31 21 11 01] 182 vtrn.16 \r2, \r3 // r2 = [32 22 12 02], r3 = [33 23 13 03] 183.endm 184 185.macro TRANSPOSE4x4x2_16 rA0, rA1, rA2, rA3, rB0, rB1, rB2, rB3 186 vtrn.32 \rA0, \rA2 // r0 = [21 20 01 00], r2 = [23 22 03 02] 187 vtrn.32 \rA1, \rA3 // r1 = [31 30 11 10], r3 = [33 32 13 12] 188 vtrn.32 \rB0, \rB2 189 vtrn.32 \rB1, \rB3 190 vtrn.16 \rA0, \rA1 // r0 = [30 20 10 00], r1 = [31 21 11 01] 191 vtrn.16 \rA2, \rA3 // r2 = [32 22 12 02], r3 = [33 23 13 03] 192 vtrn.16 \rB0, \rB1 193 vtrn.16 \rB2, \rB3 194.endm 195