1#! /usr/bin/env perl 2# Copyright 2005-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the OpenSSL license (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10$flavour = shift; 11$output = shift; 12if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } 13 14$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); 15 16$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 17( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 18( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or 19die "can't locate x86_64-xlate.pl"; 20 21open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; 22*STDOUT=*OUT; 23 24($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order 25 ("%rdi","%rsi","%rdx","%rcx"); # Unix order 26 27print<<___; 28.extern OPENSSL_cpuid_setup 29.hidden OPENSSL_cpuid_setup 30.section .init 31 call OPENSSL_cpuid_setup 32 33.hidden OPENSSL_ia32cap_P 34.comm OPENSSL_ia32cap_P,16,4 35 36.text 37 38.globl OPENSSL_atomic_add 39.type OPENSSL_atomic_add,\@abi-omnipotent 40.align 16 41OPENSSL_atomic_add: 42.cfi_startproc 43 movl ($arg1),%eax 44.Lspin: leaq ($arg2,%rax),%r8 45 .byte 0xf0 # lock 46 cmpxchgl %r8d,($arg1) 47 jne .Lspin 48 movl %r8d,%eax 49 .byte 0x48,0x98 # cltq/cdqe 50 ret 51.cfi_endproc 52.size OPENSSL_atomic_add,.-OPENSSL_atomic_add 53 54.globl OPENSSL_rdtsc 55.type OPENSSL_rdtsc,\@abi-omnipotent 56.align 16 57OPENSSL_rdtsc: 58.cfi_startproc 59 rdtsc 60 shl \$32,%rdx 61 or %rdx,%rax 62 ret 63.cfi_endproc 64.size OPENSSL_rdtsc,.-OPENSSL_rdtsc 65 66.globl OPENSSL_ia32_cpuid 67.type OPENSSL_ia32_cpuid,\@function,1 68.align 16 69OPENSSL_ia32_cpuid: 70.cfi_startproc 71 mov %rbx,%r8 # save %rbx 72.cfi_register %rbx,%r8 73 74 xor %eax,%eax 75 mov %rax,8(%rdi) # clear extended feature flags 76 cpuid 77 mov %eax,%r11d # max value for standard query level 78 79 xor %eax,%eax 80 cmp \$0x756e6547,%ebx # "Genu" 81 setne %al 82 mov %eax,%r9d 83 cmp \$0x49656e69,%edx # "ineI" 84 setne %al 85 or %eax,%r9d 86 cmp \$0x6c65746e,%ecx # "ntel" 87 setne %al 88 or %eax,%r9d # 0 indicates Intel CPU 89 jz .Lintel 90 91 cmp \$0x68747541,%ebx # "Auth" 92 setne %al 93 mov %eax,%r10d 94 cmp \$0x69746E65,%edx # "enti" 95 setne %al 96 or %eax,%r10d 97 cmp \$0x444D4163,%ecx # "cAMD" 98 setne %al 99 or %eax,%r10d # 0 indicates AMD CPU 100 jnz .Lintel 101 102 # AMD specific 103 mov \$0x80000000,%eax 104 cpuid 105 cmp \$0x80000001,%eax 106 jb .Lintel 107 mov %eax,%r10d 108 mov \$0x80000001,%eax 109 cpuid 110 or %ecx,%r9d 111 and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11 112 113 cmp \$0x80000008,%r10d 114 jb .Lintel 115 116 mov \$0x80000008,%eax 117 cpuid 118 movzb %cl,%r10 # number of cores - 1 119 inc %r10 # number of cores 120 121 mov \$1,%eax 122 cpuid 123 bt \$28,%edx # test hyper-threading bit 124 jnc .Lgeneric 125 shr \$16,%ebx # number of logical processors 126 cmp %r10b,%bl 127 ja .Lgeneric 128 and \$0xefffffff,%edx # ~(1<<28) 129 jmp .Lgeneric 130 131.Lintel: 132 cmp \$4,%r11d 133 mov \$-1,%r10d 134 jb .Lnocacheinfo 135 136 mov \$4,%eax 137 mov \$0,%ecx # query L1D 138 cpuid 139 mov %eax,%r10d 140 shr \$14,%r10d 141 and \$0xfff,%r10d # number of cores -1 per L1D 142 143.Lnocacheinfo: 144 mov \$1,%eax 145 cpuid 146 movd %eax,%xmm0 # put aside processor id 147 and \$0xbfefffff,%edx # force reserved bits to 0 148 cmp \$0,%r9d 149 jne .Lnotintel 150 or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs 151 and \$15,%ah 152 cmp \$15,%ah # examine Family ID 153 jne .LnotP4 154 or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR 155.LnotP4: 156 cmp \$6,%ah 157 jne .Lnotintel 158 and \$0x0fff0ff0,%eax 159 cmp \$0x00050670,%eax # Knights Landing 160 je .Lknights 161 cmp \$0x00080650,%eax # Knights Mill (according to sde) 162 jne .Lnotintel 163.Lknights: 164 and \$0xfbffffff,%ecx # clear XSAVE flag to mimic Silvermont 165 166.Lnotintel: 167 bt \$28,%edx # test hyper-threading bit 168 jnc .Lgeneric 169 and \$0xefffffff,%edx # ~(1<<28) 170 cmp \$0,%r10d 171 je .Lgeneric 172 173 or \$0x10000000,%edx # 1<<28 174 shr \$16,%ebx 175 cmp \$1,%bl # see if cache is shared 176 ja .Lgeneric 177 and \$0xefffffff,%edx # ~(1<<28) 178.Lgeneric: 179 and \$0x00000800,%r9d # isolate AMD XOP flag 180 and \$0xfffff7ff,%ecx 181 or %ecx,%r9d # merge AMD XOP flag 182 183 mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx 184 185 cmp \$7,%r11d 186 jb .Lno_extended_info 187 mov \$7,%eax 188 xor %ecx,%ecx 189 cpuid 190 bt \$26,%r9d # check XSAVE bit, cleared on Knights 191 jc .Lnotknights 192 and \$0xfff7ffff,%ebx # clear ADCX/ADOX flag 193.Lnotknights: 194 movd %xmm0,%eax # restore processor id 195 and \$0x0fff0ff0,%eax 196 cmp \$0x00050650,%eax # Skylake-X 197 jne .Lnotskylakex 198 and \$0xfffeffff,%ebx # ~(1<<16) 199 # suppress AVX512F flag on Skylake-X 200.Lnotskylakex: 201 mov %ebx,8(%rdi) # save extended feature flags 202 mov %ecx,12(%rdi) 203.Lno_extended_info: 204 205 bt \$27,%r9d # check OSXSAVE bit 206 jnc .Lclear_avx 207 xor %ecx,%ecx # XCR0 208 .byte 0x0f,0x01,0xd0 # xgetbv 209 and \$0xe6,%eax # isolate XMM, YMM and ZMM state support 210 cmp \$0xe6,%eax 211 je .Ldone 212 andl \$0x3fdeffff,8(%rdi) # ~(1<<31|1<<30|1<<21|1<<16) 213 # clear AVX512F+BW+VL+FIMA, all of 214 # them are EVEX-encoded, which requires 215 # ZMM state support even if one uses 216 # only XMM and YMM :-( 217 and \$6,%eax # isolate XMM and YMM state support 218 cmp \$6,%eax 219 je .Ldone 220.Lclear_avx: 221 mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) 222 and %eax,%r9d # clear AVX, FMA and AMD XOP bits 223 mov \$0x3fdeffdf,%eax # ~(1<<31|1<<30|1<<21|1<<16|1<<5) 224 and %eax,8(%rdi) # clear AVX2 and AVX512* bits 225.Ldone: 226 shl \$32,%r9 227 mov %r10d,%eax 228 mov %r8,%rbx # restore %rbx 229.cfi_restore %rbx 230 or %r9,%rax 231 ret 232.cfi_endproc 233.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid 234 235.globl OPENSSL_cleanse 236.type OPENSSL_cleanse,\@abi-omnipotent 237.align 16 238OPENSSL_cleanse: 239.cfi_startproc 240 xor %rax,%rax 241 cmp \$15,$arg2 242 jae .Lot 243 cmp \$0,$arg2 244 je .Lret 245.Little: 246 mov %al,($arg1) 247 sub \$1,$arg2 248 lea 1($arg1),$arg1 249 jnz .Little 250.Lret: 251 ret 252.align 16 253.Lot: 254 test \$7,$arg1 255 jz .Laligned 256 mov %al,($arg1) 257 lea -1($arg2),$arg2 258 lea 1($arg1),$arg1 259 jmp .Lot 260.Laligned: 261 mov %rax,($arg1) 262 lea -8($arg2),$arg2 263 test \$-8,$arg2 264 lea 8($arg1),$arg1 265 jnz .Laligned 266 cmp \$0,$arg2 267 jne .Little 268 ret 269.cfi_endproc 270.size OPENSSL_cleanse,.-OPENSSL_cleanse 271 272.globl CRYPTO_memcmp 273.type CRYPTO_memcmp,\@abi-omnipotent 274.align 16 275CRYPTO_memcmp: 276.cfi_startproc 277 xor %rax,%rax 278 xor %r10,%r10 279 cmp \$0,$arg3 280 je .Lno_data 281 cmp \$16,$arg3 282 jne .Loop_cmp 283 mov ($arg1),%r10 284 mov 8($arg1),%r11 285 mov \$1,$arg3 286 xor ($arg2),%r10 287 xor 8($arg2),%r11 288 or %r11,%r10 289 cmovnz $arg3,%rax 290 ret 291 292.align 16 293.Loop_cmp: 294 mov ($arg1),%r10b 295 lea 1($arg1),$arg1 296 xor ($arg2),%r10b 297 lea 1($arg2),$arg2 298 or %r10b,%al 299 dec $arg3 300 jnz .Loop_cmp 301 neg %rax 302 shr \$63,%rax 303.Lno_data: 304 ret 305.cfi_endproc 306.size CRYPTO_memcmp,.-CRYPTO_memcmp 307___ 308 309print<<___ if (!$win64); 310.globl OPENSSL_wipe_cpu 311.type OPENSSL_wipe_cpu,\@abi-omnipotent 312.align 16 313OPENSSL_wipe_cpu: 314.cfi_startproc 315 pxor %xmm0,%xmm0 316 pxor %xmm1,%xmm1 317 pxor %xmm2,%xmm2 318 pxor %xmm3,%xmm3 319 pxor %xmm4,%xmm4 320 pxor %xmm5,%xmm5 321 pxor %xmm6,%xmm6 322 pxor %xmm7,%xmm7 323 pxor %xmm8,%xmm8 324 pxor %xmm9,%xmm9 325 pxor %xmm10,%xmm10 326 pxor %xmm11,%xmm11 327 pxor %xmm12,%xmm12 328 pxor %xmm13,%xmm13 329 pxor %xmm14,%xmm14 330 pxor %xmm15,%xmm15 331 xorq %rcx,%rcx 332 xorq %rdx,%rdx 333 xorq %rsi,%rsi 334 xorq %rdi,%rdi 335 xorq %r8,%r8 336 xorq %r9,%r9 337 xorq %r10,%r10 338 xorq %r11,%r11 339 leaq 8(%rsp),%rax 340 ret 341.cfi_endproc 342.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu 343___ 344print<<___ if ($win64); 345.globl OPENSSL_wipe_cpu 346.type OPENSSL_wipe_cpu,\@abi-omnipotent 347.align 16 348OPENSSL_wipe_cpu: 349 pxor %xmm0,%xmm0 350 pxor %xmm1,%xmm1 351 pxor %xmm2,%xmm2 352 pxor %xmm3,%xmm3 353 pxor %xmm4,%xmm4 354 pxor %xmm5,%xmm5 355 xorq %rcx,%rcx 356 xorq %rdx,%rdx 357 xorq %r8,%r8 358 xorq %r9,%r9 359 xorq %r10,%r10 360 xorq %r11,%r11 361 leaq 8(%rsp),%rax 362 ret 363.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu 364___ 365{ 366my $out="%r10"; 367my $cnt="%rcx"; 368my $max="%r11"; 369my $lasttick="%r8d"; 370my $lastdiff="%r9d"; 371my $redzone=win64?8:-8; 372 373print<<___; 374.globl OPENSSL_instrument_bus 375.type OPENSSL_instrument_bus,\@abi-omnipotent 376.align 16 377OPENSSL_instrument_bus: 378.cfi_startproc 379 mov $arg1,$out # tribute to Win64 380 mov $arg2,$cnt 381 mov $arg2,$max 382 383 rdtsc # collect 1st tick 384 mov %eax,$lasttick # lasttick = tick 385 mov \$0,$lastdiff # lastdiff = 0 386 clflush ($out) 387 .byte 0xf0 # lock 388 add $lastdiff,($out) 389 jmp .Loop 390.align 16 391.Loop: rdtsc 392 mov %eax,%edx 393 sub $lasttick,%eax 394 mov %edx,$lasttick 395 mov %eax,$lastdiff 396 clflush ($out) 397 .byte 0xf0 # lock 398 add %eax,($out) 399 lea 4($out),$out 400 sub \$1,$cnt 401 jnz .Loop 402 403 mov $max,%rax 404 ret 405.cfi_endproc 406.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus 407 408.globl OPENSSL_instrument_bus2 409.type OPENSSL_instrument_bus2,\@abi-omnipotent 410.align 16 411OPENSSL_instrument_bus2: 412.cfi_startproc 413 mov $arg1,$out # tribute to Win64 414 mov $arg2,$cnt 415 mov $arg3,$max 416 mov $cnt,$redzone(%rsp) 417 418 rdtsc # collect 1st tick 419 mov %eax,$lasttick # lasttick = tick 420 mov \$0,$lastdiff # lastdiff = 0 421 422 clflush ($out) 423 .byte 0xf0 # lock 424 add $lastdiff,($out) 425 426 rdtsc # collect 1st diff 427 mov %eax,%edx 428 sub $lasttick,%eax # diff 429 mov %edx,$lasttick # lasttick = tick 430 mov %eax,$lastdiff # lastdiff = diff 431.Loop2: 432 clflush ($out) 433 .byte 0xf0 # lock 434 add %eax,($out) # accumulate diff 435 436 sub \$1,$max 437 jz .Ldone2 438 439 rdtsc 440 mov %eax,%edx 441 sub $lasttick,%eax # diff 442 mov %edx,$lasttick # lasttick = tick 443 cmp $lastdiff,%eax 444 mov %eax,$lastdiff # lastdiff = diff 445 mov \$0,%edx 446 setne %dl 447 sub %rdx,$cnt # conditional --$cnt 448 lea ($out,%rdx,4),$out # conditional ++$out 449 jnz .Loop2 450 451.Ldone2: 452 mov $redzone(%rsp),%rax 453 sub $cnt,%rax 454 ret 455.cfi_endproc 456.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2 457___ 458} 459 460sub gen_random { 461my $rdop = shift; 462print<<___; 463.globl OPENSSL_ia32_${rdop}_bytes 464.type OPENSSL_ia32_${rdop}_bytes,\@abi-omnipotent 465.align 16 466OPENSSL_ia32_${rdop}_bytes: 467.cfi_startproc 468 xor %rax, %rax # return value 469 cmp \$0,$arg2 470 je .Ldone_${rdop}_bytes 471 472 mov \$8,%r11 473.Loop_${rdop}_bytes: 474 ${rdop} %r10 475 jc .Lbreak_${rdop}_bytes 476 dec %r11 477 jnz .Loop_${rdop}_bytes 478 jmp .Ldone_${rdop}_bytes 479 480.align 16 481.Lbreak_${rdop}_bytes: 482 cmp \$8,$arg2 483 jb .Ltail_${rdop}_bytes 484 mov %r10,($arg1) 485 lea 8($arg1),$arg1 486 add \$8,%rax 487 sub \$8,$arg2 488 jz .Ldone_${rdop}_bytes 489 mov \$8,%r11 490 jmp .Loop_${rdop}_bytes 491 492.align 16 493.Ltail_${rdop}_bytes: 494 mov %r10b,($arg1) 495 lea 1($arg1),$arg1 496 inc %rax 497 shr \$8,%r10 498 dec $arg2 499 jnz .Ltail_${rdop}_bytes 500 501.Ldone_${rdop}_bytes: 502 xor %r10,%r10 # Clear sensitive data from register 503 ret 504.cfi_endproc 505.size OPENSSL_ia32_${rdop}_bytes,.-OPENSSL_ia32_${rdop}_bytes 506___ 507} 508gen_random("rdrand"); 509gen_random("rdseed"); 510 511close STDOUT or die "error closing STDOUT: $!"; # flush 512