1;***************************************************************************** 2;* checkasm-a.asm: assembly check tool 3;***************************************************************************** 4;* Copyright (C) 2003-2013 x264 project 5;* Copyright (C) 2013-2020 MulticoreWare, Inc 6;* 7;* Authors: Loren Merritt <lorenm@u.washington.edu> 8;* Henrik Gramner <henrik@gramner.com> 9;* Min Chen <chenm003@163.com> 10;* 11;* This program is free software; you can redistribute it and/or modify 12;* it under the terms of the GNU General Public License as published by 13;* the Free Software Foundation; either version 2 of the License, or 14;* (at your option) any later version. 15;* 16;* This program is distributed in the hope that it will be useful, 17;* but WITHOUT ANY WARRANTY; without even the implied warranty of 18;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19;* GNU General Public License for more details. 20;* 21;* You should have received a copy of the GNU General Public License 22;* along with this program; if not, write to the Free Software 23;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. 24;* 25;* This program is also available under a commercial proprietary license. 26;* For more information, contact us at license @ x265.com. 27;***************************************************************************** 28 29%include "x86inc.asm" 30 31SECTION_RODATA 32 33error_message: db "failed to preserve register", 0 34 35%if ARCH_X86_64 36; just random numbers to reduce the chance of incidental match 37ALIGN 16 38x6: dq 0x1a1b2550a612b48c,0x79445c159ce79064 39x7: dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636 40x8: dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e 41x9: dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f 42x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9 43x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d 44x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b 45x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786 46x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef 47x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5 48n7: dq 0x21f86d66c8ca00ce 49n8: dq 0x75b6ba21077c48ad 50n9: dq 0xed56bb2dcb3c7736 51n10: dq 0x8bda43d3fd1a7e06 52n11: dq 0xb64a9c9e5d318408 53n12: dq 0xdf9a54b303f1d3a3 54n13: dq 0x4a75479abd64e097 55n14: dq 0x249214109d5d1c88 56%endif 57 58SECTION .text 59 60cextern_naked puts 61 62; max number of args used by any x265 asm function. 63; (max_args % 4) must equal 3 for stack alignment 64%define max_args 15 65 66%if ARCH_X86_64 67 68;----------------------------------------------------------------------------- 69; void x265_checkasm_stack_clobber( uint64_t clobber, ... ) 70;----------------------------------------------------------------------------- 71cglobal checkasm_stack_clobber, 1,2 72 ; Clobber the stack with junk below the stack pointer 73 %define argsize (max_args+6)*8 74 SUB rsp, argsize 75 mov r1, argsize-8 76.loop: 77 mov [rsp+r1], r0 78 sub r1, 8 79 jge .loop 80 ADD rsp, argsize 81 RET 82 83%if WIN64 84 %assign free_regs 7 85%else 86 %assign free_regs 9 87%endif 88 89;----------------------------------------------------------------------------- 90; intptr_t x265_checkasm_call( intptr_t (*func)(), int *ok, ... ) 91;----------------------------------------------------------------------------- 92cglobal checkasm_call_float 93INIT_XMM 94cglobal checkasm_call, 2,15,16,max_args*8+8 95 mov r6, r0 96 mov [rsp+max_args*8], r1 97 98 ; All arguments have been pushed on the stack instead of registers in order to 99 ; test for incorrect assumptions that 32-bit ints are zero-extended to 64-bit. 100 mov r0, r6mp 101 mov r1, r7mp 102 mov r2, r8mp 103 mov r3, r9mp 104%if UNIX64 105 mov r4, r10mp 106 mov r5, r11mp 107 %assign i 6 108 %rep max_args-6 109 mov r9, [rsp+stack_offset+(i+1)*8] 110 mov [rsp+(i-6)*8], r9 111 %assign i i+1 112 %endrep 113%else 114 %assign i 4 115 %rep max_args-4 116 mov r9, [rsp+stack_offset+(i+7)*8] 117 mov [rsp+i*8], r9 118 %assign i i+1 119 %endrep 120%endif 121 122%if WIN64 123 %assign i 6 124 %rep 16-6 125 mova m %+ i, [x %+ i] 126 %assign i i+1 127 %endrep 128%endif 129 130%assign i 14 131%rep 15-free_regs 132 mov r %+ i, [n %+ i] 133 %assign i i-1 134%endrep 135 call r6 136%assign i 14 137%rep 15-free_regs 138 xor r %+ i, [n %+ i] 139 or r14, r %+ i 140 %assign i i-1 141%endrep 142 143%if WIN64 144 %assign i 6 145 %rep 16-6 146 pxor m %+ i, [x %+ i] 147 por m6, m %+ i 148 %assign i i+1 149 %endrep 150 packsswb m6, m6 151 movq r5, m6 152 or r14, r5 153%endif 154 155 jz .ok 156 mov r9, rax 157 mov r10, rdx 158 lea r0, [error_message] 159%if FORMAT_ELF 160 call puts wrt ..plt 161%else 162 call puts 163%endif 164 mov r1, [rsp+max_args*8] 165 mov dword [r1], 0 166 mov rdx, r10 167 mov rax, r9 168.ok: 169 RET 170 171%else 172 173; just random numbers to reduce the chance of incidental match 174%define n3 dword 0x6549315c 175%define n4 dword 0xe02f3e23 176%define n5 dword 0xb78d0d1d 177%define n6 dword 0x33627ba7 178 179;----------------------------------------------------------------------------- 180; intptr_t x265_checkasm_call( intptr_t (*func)(), int *ok, ... ) 181;----------------------------------------------------------------------------- 182cglobal checkasm_call_float 183cglobal checkasm_call, 1,7 184 mov r3, n3 185 mov r4, n4 186 mov r5, n5 187 mov r6, n6 188%rep max_args 189 push dword [esp+24+max_args*4] 190%endrep 191 call r0 192 add esp, max_args*4 193 xor r3, n3 194 xor r4, n4 195 xor r5, n5 196 xor r6, n6 197 or r3, r4 198 or r5, r6 199 or r3, r5 200 jz .ok 201 mov r3, eax 202 mov r4, edx 203 lea r1, [error_message] 204 push r1 205 call puts 206 add esp, 4 207 mov r1, r1m 208 mov dword [r1], 0 209 mov edx, r4 210 mov eax, r3 211.ok: 212 REP_RET 213 214%endif ; ARCH_X86_64 215 216;----------------------------------------------------------------------------- 217; int x265_stack_pagealign( int (*func)(), int align ) 218;----------------------------------------------------------------------------- 219cglobal stack_pagealign, 2,2 220 movsxdifnidn r1, r1d 221 push rbp 222 mov rbp, rsp 223%if WIN64 224 sub rsp, 32 ; shadow space 225%endif 226 and rsp, ~0xfff 227 sub rsp, r1 228 call r0 229 leave 230 RET 231 232