1;*****************************************************************************
2;* checkasm-a.asm: assembly check tool
3;*****************************************************************************
4;* Copyright (C) 2003-2013 x264 project
5;* Copyright (C) 2013-2020 MulticoreWare, Inc
6;*
7;* Authors: Loren Merritt <lorenm@u.washington.edu>
8;*          Henrik Gramner <henrik@gramner.com>
9;*          Min Chen <chenm003@163.com>
10;*
11;* This program is free software; you can redistribute it and/or modify
12;* it under the terms of the GNU General Public License as published by
13;* the Free Software Foundation; either version 2 of the License, or
14;* (at your option) any later version.
15;*
16;* This program is distributed in the hope that it will be useful,
17;* but WITHOUT ANY WARRANTY; without even the implied warranty of
18;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19;* GNU General Public License for more details.
20;*
21;* You should have received a copy of the GNU General Public License
22;* along with this program; if not, write to the Free Software
23;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
24;*
25;* This program is also available under a commercial proprietary license.
26;* For more information, contact us at license @ x265.com.
27;*****************************************************************************
28
29%include "x86inc.asm"
30
31SECTION_RODATA
32
33error_message: db "failed to preserve register", 0
34
35%if ARCH_X86_64
36; just random numbers to reduce the chance of incidental match
37ALIGN 16
38x6:  dq 0x1a1b2550a612b48c,0x79445c159ce79064
39x7:  dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636
40x8:  dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e
41x9:  dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f
42x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9
43x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d
44x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b
45x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786
46x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef
47x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5
48n7:  dq 0x21f86d66c8ca00ce
49n8:  dq 0x75b6ba21077c48ad
50n9:  dq 0xed56bb2dcb3c7736
51n10: dq 0x8bda43d3fd1a7e06
52n11: dq 0xb64a9c9e5d318408
53n12: dq 0xdf9a54b303f1d3a3
54n13: dq 0x4a75479abd64e097
55n14: dq 0x249214109d5d1c88
56%endif
57
58SECTION .text
59
60cextern_naked puts
61
62; max number of args used by any x265 asm function.
63; (max_args % 4) must equal 3 for stack alignment
64%define max_args 15
65
66%if ARCH_X86_64
67
68;-----------------------------------------------------------------------------
69; void x265_checkasm_stack_clobber( uint64_t clobber, ... )
70;-----------------------------------------------------------------------------
71cglobal checkasm_stack_clobber, 1,2
72    ; Clobber the stack with junk below the stack pointer
73    %define argsize (max_args+6)*8
74    SUB  rsp, argsize
75    mov   r1, argsize-8
76.loop:
77    mov [rsp+r1], r0
78    sub   r1, 8
79    jge .loop
80    ADD  rsp, argsize
81    RET
82
83%if WIN64
84    %assign free_regs 7
85%else
86    %assign free_regs 9
87%endif
88
89;-----------------------------------------------------------------------------
90; intptr_t x265_checkasm_call( intptr_t (*func)(), int *ok, ... )
91;-----------------------------------------------------------------------------
92cglobal checkasm_call_float
93INIT_XMM
94cglobal checkasm_call, 2,15,16,max_args*8+8
95    mov  r6, r0
96    mov  [rsp+max_args*8], r1
97
98    ; All arguments have been pushed on the stack instead of registers in order to
99    ; test for incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
100    mov  r0, r6mp
101    mov  r1, r7mp
102    mov  r2, r8mp
103    mov  r3, r9mp
104%if UNIX64
105    mov  r4, r10mp
106    mov  r5, r11mp
107    %assign i 6
108    %rep max_args-6
109        mov  r9, [rsp+stack_offset+(i+1)*8]
110        mov  [rsp+(i-6)*8], r9
111        %assign i i+1
112    %endrep
113%else
114    %assign i 4
115    %rep max_args-4
116        mov  r9, [rsp+stack_offset+(i+7)*8]
117        mov  [rsp+i*8], r9
118        %assign i i+1
119    %endrep
120%endif
121
122%if WIN64
123    %assign i 6
124    %rep 16-6
125        mova m %+ i, [x %+ i]
126        %assign i i+1
127    %endrep
128%endif
129
130%assign i 14
131%rep 15-free_regs
132    mov  r %+ i, [n %+ i]
133    %assign i i-1
134%endrep
135    call r6
136%assign i 14
137%rep 15-free_regs
138    xor  r %+ i, [n %+ i]
139    or  r14, r %+ i
140    %assign i i-1
141%endrep
142
143%if WIN64
144    %assign i 6
145    %rep 16-6
146        pxor m %+ i, [x %+ i]
147        por  m6, m %+ i
148        %assign i i+1
149    %endrep
150    packsswb m6, m6
151    movq r5, m6
152    or  r14, r5
153%endif
154
155    jz .ok
156    mov  r9, rax
157    mov r10, rdx
158    lea  r0, [error_message]
159%if FORMAT_ELF
160    call puts wrt ..plt
161%else
162    call puts
163%endif
164    mov  r1, [rsp+max_args*8]
165    mov  dword [r1], 0
166    mov  rdx, r10
167    mov  rax, r9
168.ok:
169    RET
170
171%else
172
173; just random numbers to reduce the chance of incidental match
174%define n3 dword 0x6549315c
175%define n4 dword 0xe02f3e23
176%define n5 dword 0xb78d0d1d
177%define n6 dword 0x33627ba7
178
179;-----------------------------------------------------------------------------
180; intptr_t x265_checkasm_call( intptr_t (*func)(), int *ok, ... )
181;-----------------------------------------------------------------------------
182cglobal checkasm_call_float
183cglobal checkasm_call, 1,7
184    mov  r3, n3
185    mov  r4, n4
186    mov  r5, n5
187    mov  r6, n6
188%rep max_args
189    push dword [esp+24+max_args*4]
190%endrep
191    call r0
192    add  esp, max_args*4
193    xor  r3, n3
194    xor  r4, n4
195    xor  r5, n5
196    xor  r6, n6
197    or   r3, r4
198    or   r5, r6
199    or   r3, r5
200    jz .ok
201    mov  r3, eax
202    mov  r4, edx
203    lea  r1, [error_message]
204    push r1
205    call puts
206    add  esp, 4
207    mov  r1, r1m
208    mov  dword [r1], 0
209    mov  edx, r4
210    mov  eax, r3
211.ok:
212    REP_RET
213
214%endif ; ARCH_X86_64
215
216;-----------------------------------------------------------------------------
217; int x265_stack_pagealign( int (*func)(), int align )
218;-----------------------------------------------------------------------------
219cglobal stack_pagealign, 2,2
220    movsxdifnidn r1, r1d
221    push rbp
222    mov  rbp, rsp
223%if WIN64
224    sub  rsp, 32 ; shadow space
225%endif
226    and  rsp, ~0xfff
227    sub  rsp, r1
228    call r0
229    leave
230    RET
231
232