1/****************************************************************************
2 * checkasm-arm.S: assembly check tool
3 *****************************************************************************
4 * Copyright (C) 2013-2020 MulticoreWare, Inc
5 *
6 * Authors: Martin Storsjo <martin@martin.st>
7 *          Dnyaneshwar Gorade <dnyaneshwar@multicorewareinc.com>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
22 *
23 * This program is also available under a commercial proprietary license.
24 * For more information, contact us at license @ x265.com.
25 *****************************************************************************/
26
27#include "../common/arm/asm.S"
28
29.section .rodata
30.align 4
31register_init:
32.quad 0x21f86d66c8ca00ce
33.quad 0x75b6ba21077c48ad
34.quad 0xed56bb2dcb3c7736
35.quad 0x8bda43d3fd1a7e06
36.quad 0xb64a9c9e5d318408
37.quad 0xdf9a54b303f1d3a3
38.quad 0x4a75479abd64e097
39.quad 0x249214109d5d1c88
40
41error_message:
42.asciz "failed to preserve register"
43
44.text
45
46@ max number of args used by any x265 asm function.
47#define MAX_ARGS 15
48
49#define ARG_STACK 4*(MAX_ARGS - 2)
50
51.macro clobbercheck variant
52.equ pushed, 4*10
53function x265_checkasm_call_\variant
54    push        {r4-r11, lr}
55.ifc \variant, neon
56    vpush       {q4-q7}
57.equ pushed, pushed + 16*4
58.endif
59
60    movrel      r12, register_init
61.ifc \variant, neon
62    vldm        r12, {q4-q7}
63.endif
64    ldm         r12, {r4-r11}
65
66    push        {r1}
67
68    sub         sp,  sp,  #ARG_STACK
69.equ pos, 0
70.rept MAX_ARGS-2
71    ldr         r12, [sp, #ARG_STACK + pushed + 8 + pos]
72    str         r12, [sp, #pos]
73.equ pos, pos + 4
74.endr
75
76    mov         r12, r0
77    mov         r0,  r2
78    mov         r1,  r3
79    ldrd        r2,  r3,  [sp, #ARG_STACK + pushed]
80    blx         r12
81    add         sp,  sp,  #ARG_STACK
82    pop         {r2}
83
84    push        {r0, r1}
85    movrel      r12, register_init
86.ifc \variant, neon
87    vldm        r12, {q0-q3}
88    veor        q0,  q0,  q4
89    veor        q1,  q1,  q5
90    veor        q2,  q2,  q6
91    veor        q3,  q3,  q7
92    vorr        q0,  q0,  q1
93    vorr        q0,  q0,  q2
94    vorr        q0,  q0,  q3
95    vorr        d0,  d0,  d1
96    vrev64.32   d1,  d0
97    vorr        d0,  d0,  d1
98    vmov.32     r3,  d0[0]
99.else
100    mov         r3,  #0
101.endif
102
103.macro check_reg reg1, reg2
104    ldrd        r0,  r1,  [r12], #8
105    eor         r0,  r0, \reg1
106    eor         r1,  r1, \reg2
107    orr         r3,  r3, r0
108    orr         r3,  r3, r1
109.endm
110    check_reg   r4,  r5
111    check_reg   r6,  r7
112    check_reg   r8,  r9
113    check_reg   r10, r11
114.purgem check_reg
115
116    cmp         r3,  #0
117    beq         0f
118
119    mov         r12, #0
120    str         r12, [r2]
121    movrel      r0, error_message
122    bl          puts
1230:
124    pop         {r0, r1}
125.ifc \variant, neon
126    vpop        {q4-q7}
127.endif
128    pop         {r4-r11, pc}
129endfunc
130.endm
131
132clobbercheck neon
133clobbercheck noneon
134