1/****************************************************************************
2 * Assembly testing and benchmarking tool
3 * Copyright (c) 2015 Martin Storsjo
4 * Copyright (c) 2015 Janne Grunau
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
21 *****************************************************************************/
22
23#include "libavutil/aarch64/asm.S"
24
25const register_init, align=4
26        .quad 0x21f86d66c8ca00ce
27        .quad 0x75b6ba21077c48ad
28        .quad 0xed56bb2dcb3c7736
29        .quad 0x8bda43d3fd1a7e06
30        .quad 0xb64a9c9e5d318408
31        .quad 0xdf9a54b303f1d3a3
32        .quad 0x4a75479abd64e097
33        .quad 0x249214109d5d1c88
34        .quad 0x1a1b2550a612b48c
35        .quad 0x79445c159ce79064
36        .quad 0x2eed899d5a28ddcd
37        .quad 0x86b2536fcd8cf636
38        .quad 0xb0856806085e7943
39        .quad 0x3f2bf84fc0fcca4e
40        .quad 0xacbd382dcf5b8de2
41        .quad 0xd229e1f5b281303f
42        .quad 0x71aeaff20b095fd9
43        .quad 0xab63e2e11fa38ed9
44endconst
45
46
47const error_message_register
48        .asciz "failed to preserve register"
49error_message_stack:
50        .asciz "stack clobbered"
51endconst
52
53
54// max number of args used by any asm function.
55#define MAX_ARGS 15
56
57#define CLOBBER_STACK ((8*MAX_ARGS + 15) & ~15)
58
59function checkasm_stack_clobber, export=1
60        mov             x3,  sp
61        mov             x2,  #CLOBBER_STACK
621:
63        stp             x0,  x1,  [sp, #-16]!
64        subs            x2,  x2,  #16
65        b.gt            1b
66        mov             sp,  x3
67        ret
68endfunc
69
70// + 16 for stack canary reference
71#define ARG_STACK ((8*(MAX_ARGS - 8) + 15) & ~15 + 16)
72
73function checkasm_checked_call, export=1
74        stp             x29, x30, [sp, #-16]!
75        mov             x29, sp
76        stp             x19, x20, [sp, #-16]!
77        stp             x21, x22, [sp, #-16]!
78        stp             x23, x24, [sp, #-16]!
79        stp             x25, x26, [sp, #-16]!
80        stp             x27, x28, [sp, #-16]!
81        stp             d8,  d9,  [sp, #-16]!
82        stp             d10, d11, [sp, #-16]!
83        stp             d12, d13, [sp, #-16]!
84        stp             d14, d15, [sp, #-16]!
85
86        movrel          x9, register_init
87        ldp             d8,  d9,  [x9], #16
88        ldp             d10, d11, [x9], #16
89        ldp             d12, d13, [x9], #16
90        ldp             d14, d15, [x9], #16
91        ldp             x19, x20, [x9], #16
92        ldp             x21, x22, [x9], #16
93        ldp             x23, x24, [x9], #16
94        ldp             x25, x26, [x9], #16
95        ldp             x27, x28, [x9], #16
96
97        sub             sp,  sp,  #ARG_STACK
98.equ pos, 0
99.rept MAX_ARGS-8
100        // Skip the first 8 args, that are loaded into registers
101        ldr             x9, [x29, #16 + 8*8 + pos]
102        str             x9, [sp, #pos]
103.equ pos, pos + 8
104.endr
105
106        // Fill x8-x17 with garbage. This doesn't have to be preserved,
107        // but avoids relying on them having any particular value.
108        movrel          x9, register_init
109        ldp             x10, x11, [x9], #32
110        ldp             x12, x13, [x9], #32
111        ldp             x14, x15, [x9], #32
112        ldp             x16, x17, [x9], #32
113        ldp             x8,  x9,  [x9]
114
115        // For stack overflows, the callee is free to overwrite the parameters
116        // that were passed on the stack (if any), so we can only check after
117        // that point. First figure out how many parameters the function
118        // really took on the stack:
119        ldr             w2,  [x29, #16 + 8*8 + (MAX_ARGS-8)*8]
120        // Load the first non-parameter value from the stack, that should be
121        // left untouched by the function. Store a copy of it inverted, so that
122        // e.g. overwriting everything with zero would be noticed.
123        ldr             x2,  [sp, x2, lsl #3]
124        mvn             x2,  x2
125        str             x2,  [sp, #ARG_STACK-8]
126
127        // Load the in-register arguments
128        mov             x12, x0
129        ldp             x0,  x1,  [x29, #16]
130        ldp             x2,  x3,  [x29, #32]
131        ldp             x4,  x5,  [x29, #48]
132        ldp             x6,  x7,  [x29, #64]
133        // Call the target function
134        blr             x12
135
136        // Load the number of stack parameters, stack canary and its reference
137        ldr             w2,  [x29, #16 + 8*8 + (MAX_ARGS-8)*8]
138        ldr             x2,  [sp, x2, lsl #3]
139        ldr             x3,  [sp, #ARG_STACK-8]
140
141        add             sp,  sp,  #ARG_STACK
142        stp             x0,  x1,  [sp, #-16]!
143
144        mvn             x3,  x3
145        cmp             x2,  x3
146        b.ne            2f
147
148        movrel          x9, register_init
149        movi            v3.8h,  #0
150
151.macro check_reg_neon reg1, reg2
152        ldr             q1,  [x9], #16
153        uzp1            v2.2d,  v\reg1\().2d, v\reg2\().2d
154        eor             v1.16b, v1.16b, v2.16b
155        orr             v3.16b, v3.16b, v1.16b
156.endm
157        check_reg_neon  8,  9
158        check_reg_neon  10, 11
159        check_reg_neon  12, 13
160        check_reg_neon  14, 15
161        uqxtn           v3.8b,  v3.8h
162        umov            x3,  v3.d[0]
163
164.macro check_reg reg1, reg2
165        ldp             x0,  x1,  [x9], #16
166        eor             x0,  x0,  \reg1
167        eor             x1,  x1,  \reg2
168        orr             x3,  x3,  x0
169        orr             x3,  x3,  x1
170.endm
171        check_reg       x19, x20
172        check_reg       x21, x22
173        check_reg       x23, x24
174        check_reg       x25, x26
175        check_reg       x27, x28
176
177        cbz             x3,  0f
178
179        movrel          x0, error_message_register
180        b               1f
1812:
182        movrel          x0, error_message_stack
1831:
184        bl              X(checkasm_fail_func)
1850:
186        ldp             x0,  x1,  [sp], #16
187        ldp             d14, d15, [sp], #16
188        ldp             d12, d13, [sp], #16
189        ldp             d10, d11, [sp], #16
190        ldp             d8,  d9,  [sp], #16
191        ldp             x27, x28, [sp], #16
192        ldp             x25, x26, [sp], #16
193        ldp             x23, x24, [sp], #16
194        ldp             x21, x22, [sp], #16
195        ldp             x19, x20, [sp], #16
196        ldp             x29, x30, [sp], #16
197        ret
198endfunc
199