1#! /usr/bin/env perl 2# Copyright 2004-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the OpenSSL license (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 10push(@INC, "${dir}perlasm", "perlasm"); 11require "x86asm.pl"; 12 13$output = pop; 14open OUT,">$output"; 15*STDOUT=*OUT; 16 17&asm_init($ARGV[0]); 18 19for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } 20 21&function_begin("OPENSSL_ia32_cpuid"); 22 &xor ("edx","edx"); 23 &pushf (); 24 &pop ("eax"); 25 &mov ("ecx","eax"); 26 &xor ("eax",1<<21); 27 &push ("eax"); 28 &popf (); 29 &pushf (); 30 &pop ("eax"); 31 &xor ("ecx","eax"); 32 &xor ("eax","eax"); 33 &mov ("esi",&wparam(0)); 34 &mov (&DWP(8,"esi"),"eax"); # clear extended feature flags 35 &bt ("ecx",21); 36 &jnc (&label("nocpuid")); 37 &cpuid (); 38 &mov ("edi","eax"); # max value for standard query level 39 40 &xor ("eax","eax"); 41 &cmp ("ebx",0x756e6547); # "Genu" 42 &setne (&LB("eax")); 43 &mov ("ebp","eax"); 44 &cmp ("edx",0x49656e69); # "ineI" 45 &setne (&LB("eax")); 46 &or ("ebp","eax"); 47 &cmp ("ecx",0x6c65746e); # "ntel" 48 &setne (&LB("eax")); 49 &or ("ebp","eax"); # 0 indicates Intel CPU 50 &jz (&label("intel")); 51 52 &cmp ("ebx",0x68747541); # "Auth" 53 &setne (&LB("eax")); 54 &mov ("esi","eax"); 55 &cmp ("edx",0x69746E65); # "enti" 56 &setne (&LB("eax")); 57 &or ("esi","eax"); 58 &cmp ("ecx",0x444D4163); # "cAMD" 59 &setne (&LB("eax")); 60 &or ("esi","eax"); # 0 indicates AMD CPU 61 &jnz (&label("intel")); 62 63 # AMD specific 64 &mov ("eax",0x80000000); 65 &cpuid (); 66 &cmp ("eax",0x80000001); 67 &jb (&label("intel")); 68 &mov ("esi","eax"); 69 &mov ("eax",0x80000001); 70 &cpuid (); 71 &or ("ebp","ecx"); 72 &and ("ebp",1<<11|1); # isolate XOP bit 73 &cmp ("esi",0x80000008); 74 &jb (&label("intel")); 75 76 &mov ("eax",0x80000008); 77 &cpuid (); 78 &movz ("esi",&LB("ecx")); # number of cores - 1 79 &inc ("esi"); # number of cores 80 81 &mov ("eax",1); 82 &xor ("ecx","ecx"); 83 &cpuid (); 84 &bt ("edx",28); 85 &jnc (&label("generic")); 86 &shr ("ebx",16); 87 &and ("ebx",0xff); 88 &cmp ("ebx","esi"); 89 &ja (&label("generic")); 90 &and ("edx",0xefffffff); # clear hyper-threading bit 91 &jmp (&label("generic")); 92 93&set_label("intel"); 94 &cmp ("edi",4); 95 &mov ("esi",-1); 96 &jb (&label("nocacheinfo")); 97 98 &mov ("eax",4); 99 &mov ("ecx",0); # query L1D 100 &cpuid (); 101 &mov ("esi","eax"); 102 &shr ("esi",14); 103 &and ("esi",0xfff); # number of cores -1 per L1D 104 105&set_label("nocacheinfo"); 106 &mov ("eax",1); 107 &xor ("ecx","ecx"); 108 &cpuid (); 109 &and ("edx",0xbfefffff); # force reserved bits #20, #30 to 0 110 &cmp ("ebp",0); 111 &jne (&label("notintel")); 112 &or ("edx",1<<30); # set reserved bit#30 on Intel CPUs 113 &and (&HB("eax"),15); # family ID 114 &cmp (&HB("eax"),15); # P4? 115 &jne (&label("notintel")); 116 &or ("edx",1<<20); # set reserved bit#20 to engage RC4_CHAR 117&set_label("notintel"); 118 &bt ("edx",28); # test hyper-threading bit 119 &jnc (&label("generic")); 120 &and ("edx",0xefffffff); 121 &cmp ("esi",0); 122 &je (&label("generic")); 123 124 &or ("edx",0x10000000); 125 &shr ("ebx",16); 126 &cmp (&LB("ebx"),1); 127 &ja (&label("generic")); 128 &and ("edx",0xefffffff); # clear hyper-threading bit if not 129 130&set_label("generic"); 131 &and ("ebp",1<<11); # isolate AMD XOP flag 132 &and ("ecx",0xfffff7ff); # force 11th bit to 0 133 &mov ("esi","edx"); # %ebp:%esi is copy of %ecx:%edx 134 &or ("ebp","ecx"); # merge AMD XOP flag 135 136 &cmp ("edi",7); 137 &mov ("edi",&wparam(0)); 138 &jb (&label("no_extended_info")); 139 &mov ("eax",7); 140 &xor ("ecx","ecx"); 141 &cpuid (); 142 &mov (&DWP(8,"edi"),"ebx"); # save extended feature flag 143&set_label("no_extended_info"); 144 145 &bt ("ebp",27); # check OSXSAVE bit 146 &jnc (&label("clear_avx")); 147 &xor ("ecx","ecx"); 148 &data_byte(0x0f,0x01,0xd0); # xgetbv 149 &and ("eax",6); 150 &cmp ("eax",6); 151 &je (&label("done")); 152 &cmp ("eax",2); 153 &je (&label("clear_avx")); 154&set_label("clear_xmm"); 155 &and ("ebp",0xfdfffffd); # clear AESNI and PCLMULQDQ bits 156 &and ("esi",0xfeffffff); # clear FXSR 157&set_label("clear_avx"); 158 &and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits 159 &and (&DWP(8,"edi"),0xffffffdf); # clear AVX2 160&set_label("done"); 161 &mov ("eax","esi"); 162 &mov ("edx","ebp"); 163&set_label("nocpuid"); 164&function_end("OPENSSL_ia32_cpuid"); 165 166&external_label("OPENSSL_ia32cap_P"); 167 168&function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 169 &xor ("eax","eax"); 170 &xor ("edx","edx"); 171 &picmeup("ecx","OPENSSL_ia32cap_P"); 172 &bt (&DWP(0,"ecx"),4); 173 &jnc (&label("notsc")); 174 &rdtsc (); 175&set_label("notsc"); 176 &ret (); 177&function_end_B("OPENSSL_rdtsc"); 178 179# This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host], 180# but it's safe to call it on any [supported] 32-bit platform... 181# Just check for [non-]zero return value... 182&function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 183 &picmeup("ecx","OPENSSL_ia32cap_P"); 184 &bt (&DWP(0,"ecx"),4); 185 &jnc (&label("nohalt")); # no TSC 186 187 &data_word(0x9058900e); # push %cs; pop %eax 188 &and ("eax",3); 189 &jnz (&label("nohalt")); # not enough privileges 190 191 &pushf (); 192 &pop ("eax"); 193 &bt ("eax",9); 194 &jnc (&label("nohalt")); # interrupts are disabled 195 196 &rdtsc (); 197 &push ("edx"); 198 &push ("eax"); 199 &halt (); 200 &rdtsc (); 201 202 &sub ("eax",&DWP(0,"esp")); 203 &sbb ("edx",&DWP(4,"esp")); 204 &add ("esp",8); 205 &ret (); 206 207&set_label("nohalt"); 208 &xor ("eax","eax"); 209 &xor ("edx","edx"); 210 &ret (); 211&function_end_B("OPENSSL_instrument_halt"); 212 213# Essentially there is only one use for this function. Under DJGPP: 214# 215# #include <go32.h> 216# ... 217# i=OPENSSL_far_spin(_dos_ds,0x46c); 218# ... 219# to obtain the number of spins till closest timer interrupt. 220 221&function_begin_B("OPENSSL_far_spin"); 222 &pushf (); 223 &pop ("eax"); 224 &bt ("eax",9); 225 &jnc (&label("nospin")); # interrupts are disabled 226 227 &mov ("eax",&DWP(4,"esp")); 228 &mov ("ecx",&DWP(8,"esp")); 229 &data_word (0x90d88e1e); # push %ds, mov %eax,%ds 230 &xor ("eax","eax"); 231 &mov ("edx",&DWP(0,"ecx")); 232 &jmp (&label("spin")); 233 234 &align (16); 235&set_label("spin"); 236 &inc ("eax"); 237 &cmp ("edx",&DWP(0,"ecx")); 238 &je (&label("spin")); 239 240 &data_word (0x1f909090); # pop %ds 241 &ret (); 242 243&set_label("nospin"); 244 &xor ("eax","eax"); 245 &xor ("edx","edx"); 246 &ret (); 247&function_end_B("OPENSSL_far_spin"); 248 249&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 250 &xor ("eax","eax"); 251 &xor ("edx","edx"); 252 &picmeup("ecx","OPENSSL_ia32cap_P"); 253 &mov ("ecx",&DWP(0,"ecx")); 254 &bt (&DWP(0,"ecx"),1); 255 &jnc (&label("no_x87")); 256 if ($sse2) { 257 &and ("ecx",1<<26|1<<24); # check SSE2 and FXSR bits 258 &cmp ("ecx",1<<26|1<<24); 259 &jne (&label("no_sse2")); 260 &pxor ("xmm0","xmm0"); 261 &pxor ("xmm1","xmm1"); 262 &pxor ("xmm2","xmm2"); 263 &pxor ("xmm3","xmm3"); 264 &pxor ("xmm4","xmm4"); 265 &pxor ("xmm5","xmm5"); 266 &pxor ("xmm6","xmm6"); 267 &pxor ("xmm7","xmm7"); 268 &set_label("no_sse2"); 269 } 270 # just a bunch of fldz to zap the fp/mm bank followed by finit... 271 &data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b); 272&set_label("no_x87"); 273 &lea ("eax",&DWP(4,"esp")); 274 &ret (); 275&function_end_B("OPENSSL_wipe_cpu"); 276 277&function_begin_B("OPENSSL_atomic_add"); 278 &mov ("edx",&DWP(4,"esp")); # fetch the pointer, 1st arg 279 &mov ("ecx",&DWP(8,"esp")); # fetch the increment, 2nd arg 280 &push ("ebx"); 281 &nop (); 282 &mov ("eax",&DWP(0,"edx")); 283&set_label("spin"); 284 &lea ("ebx",&DWP(0,"eax","ecx")); 285 &nop (); 286 &data_word(0x1ab10ff0); # lock; cmpxchg %ebx,(%edx) # %eax is involved and is always reloaded 287 &jne (&label("spin")); 288 &mov ("eax","ebx"); # OpenSSL expects the new value 289 &pop ("ebx"); 290 &ret (); 291&function_end_B("OPENSSL_atomic_add"); 292 293&function_begin_B("OPENSSL_cleanse"); 294 &mov ("edx",&wparam(0)); 295 &mov ("ecx",&wparam(1)); 296 &xor ("eax","eax"); 297 &cmp ("ecx",7); 298 &jae (&label("lot")); 299 &cmp ("ecx",0); 300 &je (&label("ret")); 301&set_label("little"); 302 &mov (&BP(0,"edx"),"al"); 303 &sub ("ecx",1); 304 &lea ("edx",&DWP(1,"edx")); 305 &jnz (&label("little")); 306&set_label("ret"); 307 &ret (); 308 309&set_label("lot",16); 310 &test ("edx",3); 311 &jz (&label("aligned")); 312 &mov (&BP(0,"edx"),"al"); 313 &lea ("ecx",&DWP(-1,"ecx")); 314 &lea ("edx",&DWP(1,"edx")); 315 &jmp (&label("lot")); 316&set_label("aligned"); 317 &mov (&DWP(0,"edx"),"eax"); 318 &lea ("ecx",&DWP(-4,"ecx")); 319 &test ("ecx",-4); 320 &lea ("edx",&DWP(4,"edx")); 321 &jnz (&label("aligned")); 322 &cmp ("ecx",0); 323 &jne (&label("little")); 324 &ret (); 325&function_end_B("OPENSSL_cleanse"); 326 327&function_begin_B("CRYPTO_memcmp"); 328 &push ("esi"); 329 &push ("edi"); 330 &mov ("esi",&wparam(0)); 331 &mov ("edi",&wparam(1)); 332 &mov ("ecx",&wparam(2)); 333 &xor ("eax","eax"); 334 &xor ("edx","edx"); 335 &cmp ("ecx",0); 336 &je (&label("no_data")); 337&set_label("loop"); 338 &mov ("dl",&BP(0,"esi")); 339 &lea ("esi",&DWP(1,"esi")); 340 &xor ("dl",&BP(0,"edi")); 341 &lea ("edi",&DWP(1,"edi")); 342 &or ("al","dl"); 343 &dec ("ecx"); 344 &jnz (&label("loop")); 345 &neg ("eax"); 346 &shr ("eax",31); 347&set_label("no_data"); 348 &pop ("edi"); 349 &pop ("esi"); 350 &ret (); 351&function_end_B("CRYPTO_memcmp"); 352{ 353my $lasttick = "esi"; 354my $lastdiff = "ebx"; 355my $out = "edi"; 356my $cnt = "ecx"; 357my $max = "ebp"; 358 359&function_begin("OPENSSL_instrument_bus"); 360 &mov ("eax",0); 361 if ($sse2) { 362 &picmeup("edx","OPENSSL_ia32cap_P"); 363 &bt (&DWP(0,"edx"),4); 364 &jnc (&label("nogo")); # no TSC 365 &bt (&DWP(0,"edx"),19); 366 &jnc (&label("nogo")); # no CLFLUSH 367 368 &mov ($out,&wparam(0)); # load arguments 369 &mov ($cnt,&wparam(1)); 370 371 # collect 1st tick 372 &rdtsc (); 373 &mov ($lasttick,"eax"); # lasttick = tick 374 &mov ($lastdiff,0); # lastdiff = 0 375 &clflush(&DWP(0,$out)); 376 &data_byte(0xf0); # lock 377 &add (&DWP(0,$out),$lastdiff); 378 &jmp (&label("loop")); 379 380&set_label("loop",16); 381 &rdtsc (); 382 &mov ("edx","eax"); # put aside tick (yes, I neglect edx) 383 &sub ("eax",$lasttick); # diff 384 &mov ($lasttick,"edx"); # lasttick = tick 385 &mov ($lastdiff,"eax"); # lastdiff = diff 386 &clflush(&DWP(0,$out)); 387 &data_byte(0xf0); # lock 388 &add (&DWP(0,$out),"eax"); # accumulate diff 389 &lea ($out,&DWP(4,$out)); # ++$out 390 &sub ($cnt,1); # --$cnt 391 &jnz (&label("loop")); 392 393 &mov ("eax",&wparam(1)); 394&set_label("nogo"); 395 } 396&function_end("OPENSSL_instrument_bus"); 397 398&function_begin("OPENSSL_instrument_bus2"); 399 &mov ("eax",0); 400 if ($sse2) { 401 &picmeup("edx","OPENSSL_ia32cap_P"); 402 &bt (&DWP(0,"edx"),4); 403 &jnc (&label("nogo")); # no TSC 404 &bt (&DWP(0,"edx"),19); 405 &jnc (&label("nogo")); # no CLFLUSH 406 407 &mov ($out,&wparam(0)); # load arguments 408 &mov ($cnt,&wparam(1)); 409 &mov ($max,&wparam(2)); 410 411 &rdtsc (); # collect 1st tick 412 &mov ($lasttick,"eax"); # lasttick = tick 413 &mov ($lastdiff,0); # lastdiff = 0 414 415 &clflush(&DWP(0,$out)); 416 &data_byte(0xf0); # lock 417 &add (&DWP(0,$out),$lastdiff); 418 419 &rdtsc (); # collect 1st diff 420 &mov ("edx","eax"); # put aside tick (yes, I neglect edx) 421 &sub ("eax",$lasttick); # diff 422 &mov ($lasttick,"edx"); # lasttick = tick 423 &mov ($lastdiff,"eax"); # lastdiff = diff 424 &jmp (&label("loop2")); 425 426&set_label("loop2",16); 427 &clflush(&DWP(0,$out)); 428 &data_byte(0xf0); # lock 429 &add (&DWP(0,$out),"eax"); # accumulate diff 430 431 &sub ($max,1); 432 &jz (&label("done2")); 433 434 &rdtsc (); 435 &mov ("edx","eax"); # put aside tick (yes, I neglect edx) 436 &sub ("eax",$lasttick); # diff 437 &mov ($lasttick,"edx"); # lasttick = tick 438 &cmp ("eax",$lastdiff); 439 &mov ($lastdiff,"eax"); # lastdiff = diff 440 &mov ("edx",0); 441 &setne ("dl"); 442 &sub ($cnt,"edx"); # conditional --$cnt 443 &lea ($out,&DWP(0,$out,"edx",4)); # conditional ++$out 444 &jnz (&label("loop2")); 445 446&set_label("done2"); 447 &mov ("eax",&wparam(1)); 448 &sub ("eax",$cnt); 449&set_label("nogo"); 450 } 451&function_end("OPENSSL_instrument_bus2"); 452} 453 454sub gen_random { 455my $rdop = shift; 456&function_begin_B("OPENSSL_ia32_${rdop}_bytes"); 457 &push ("edi"); 458 &push ("ebx"); 459 &xor ("eax","eax"); # return value 460 &mov ("edi",&wparam(0)); 461 &mov ("ebx",&wparam(1)); 462 463 &cmp ("ebx",0); 464 &je (&label("done")); 465 466 &mov ("ecx",8); 467&set_label("loop"); 468 &${rdop}("edx"); 469 &jc (&label("break")); 470 &loop (&label("loop")); 471 &jmp (&label("done")); 472 473&set_label("break",16); 474 &cmp ("ebx",4); 475 &jb (&label("tail")); 476 &mov (&DWP(0,"edi"),"edx"); 477 &lea ("edi",&DWP(4,"edi")); 478 &add ("eax",4); 479 &sub ("ebx",4); 480 &jz (&label("done")); 481 &mov ("ecx",8); 482 &jmp (&label("loop")); 483 484&set_label("tail",16); 485 &mov (&BP(0,"edi"),"dl"); 486 &lea ("edi",&DWP(1,"edi")); 487 &inc ("eax"); 488 &shr ("edx",8); 489 &dec ("ebx"); 490 &jnz (&label("tail")); 491 492&set_label("done"); 493 &xor ("edx","edx"); # Clear random value from registers 494 &pop ("ebx"); 495 &pop ("edi"); 496 &ret (); 497&function_end_B("OPENSSL_ia32_${rdop}_bytes"); 498} 499&gen_random("rdrand"); 500&gen_random("rdseed"); 501 502&initseg("OPENSSL_cpuid_setup"); 503 504&hidden("OPENSSL_cpuid_setup"); 505&hidden("OPENSSL_ia32cap_P"); 506 507&asm_finish(); 508 509close STDOUT or die "error closing STDOUT: $!"; 510