1#!/usr/bin/env perl 2 3push(@INC,"perlasm"); 4require "x86asm.pl"; 5 6&asm_init($ARGV[0],"x86cpuid"); 7 8for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } 9 10&function_begin("OPENSSL_ia32_cpuid"); 11 &xor ("edx","edx"); 12 &pushf (); 13 &pop ("eax"); 14 &mov ("ecx","eax"); 15 &xor ("eax",1<<21); 16 &push ("eax"); 17 &popf (); 18 &pushf (); 19 &pop ("eax"); 20 &xor ("ecx","eax"); 21 &bt ("ecx",21); 22 &jnc (&label("done")); 23 &xor ("eax","eax"); 24 &cpuid (); 25 &xor ("eax","eax"); 26 &cmp ("ebx",0x756e6547); # "Genu" 27 &data_byte(0x0f,0x95,0xc0); #&setne (&LB("eax")); 28 &mov ("ebp","eax"); 29 &cmp ("edx",0x49656e69); # "ineI" 30 &data_byte(0x0f,0x95,0xc0); #&setne (&LB("eax")); 31 &or ("ebp","eax"); 32 &cmp ("ecx",0x6c65746e); # "ntel" 33 &data_byte(0x0f,0x95,0xc0); #&setne (&LB("eax")); 34 &or ("ebp","eax"); 35 &mov ("eax",1); 36 &cpuid (); 37 &cmp ("ebp",0); 38 &jne (&label("notP4")); 39 &and ("eax",15<<8); # familiy ID 40 &cmp ("eax",15<<8); # P4? 41 &jne (&label("notP4")); 42 &or ("edx",1<<20); # use reserved bit to engage RC4_CHAR 43&set_label("notP4"); 44 &bt ("edx",28); # test hyper-threading bit 45 &jnc (&label("done")); 46 &shr ("ebx",16); 47 &and ("ebx",0xff); 48 &cmp ("ebx",1); # see if cache is shared(*) 49 &ja (&label("done")); 50 &and ("edx",0xefffffff); # clear hyper-threading bit if not 51&set_label("done"); 52 &mov ("eax","edx"); 53 &mov ("edx","ecx"); 54&function_end("OPENSSL_ia32_cpuid"); 55# (*) on Core2 this value is set to 2 denoting the fact that L2 56# cache is shared between cores. 57 58&external_label("OPENSSL_ia32cap_P"); 59 60&function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 61 &xor ("eax","eax"); 62 &xor ("edx","edx"); 63 &picmeup("ecx","OPENSSL_ia32cap_P"); 64 &bt (&DWP(0,"ecx"),4); 65 &jnc (&label("notsc")); 66 &rdtsc (); 67&set_label("notsc"); 68 &ret (); 69&function_end_B("OPENSSL_rdtsc"); 70 71# This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host], 72# but it's safe to call it on any [supported] 32-bit platform... 73# Just check for [non-]zero return value... 74&function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 75 &picmeup("ecx","OPENSSL_ia32cap_P"); 76 &bt (&DWP(0,"ecx"),4); 77 &jnc (&label("nohalt")); # no TSC 78 79 &data_word(0x9058900e); # push %cs; pop %eax 80 &and ("eax",3); 81 &jnz (&label("nohalt")); # not enough privileges 82 83 &pushf (); 84 &pop ("eax") 85 &bt ("eax",9); 86 &jnc (&label("nohalt")); # interrupts are disabled 87 88 &rdtsc (); 89 &push ("edx"); 90 &push ("eax"); 91 &halt (); 92 &rdtsc (); 93 94 &sub ("eax",&DWP(0,"esp")); 95 &sbb ("edx",&DWP(4,"esp")); 96 &add ("esp",8); 97 &ret (); 98 99&set_label("nohalt"); 100 &xor ("eax","eax"); 101 &xor ("edx","edx"); 102 &ret (); 103&function_end_B("OPENSSL_instrument_halt"); 104 105# Essentially there is only one use for this function. Under DJGPP: 106# 107# #include <go32.h> 108# ... 109# i=OPENSSL_far_spin(_dos_ds,0x46c); 110# ... 111# to obtain the number of spins till closest timer interrupt. 112 113&function_begin_B("OPENSSL_far_spin"); 114 &pushf (); 115 &pop ("eax") 116 &bt ("eax",9); 117 &jnc (&label("nospin")); # interrupts are disabled 118 119 &mov ("eax",&DWP(4,"esp")); 120 &mov ("ecx",&DWP(8,"esp")); 121 &data_word (0x90d88e1e); # push %ds, mov %eax,%ds 122 &xor ("eax","eax"); 123 &mov ("edx",&DWP(0,"ecx")); 124 &jmp (&label("spin")); 125 126 &align (16); 127&set_label("spin"); 128 &inc ("eax"); 129 &cmp ("edx",&DWP(0,"ecx")); 130 &je (&label("spin")); 131 132 &data_word (0x1f909090); # pop %ds 133 &ret (); 134 135&set_label("nospin"); 136 &xor ("eax","eax"); 137 &xor ("edx","edx"); 138 &ret (); 139&function_end_B("OPENSSL_far_spin"); 140 141&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 142 &xor ("eax","eax"); 143 &xor ("edx","edx"); 144 &picmeup("ecx","OPENSSL_ia32cap_P"); 145 &mov ("ecx",&DWP(0,"ecx")); 146 &bt (&DWP(0,"ecx"),1); 147 &jnc (&label("no_x87")); 148 if ($sse2) { 149 &bt (&DWP(0,"ecx"),26); 150 &jnc (&label("no_sse2")); 151 &pxor ("xmm0","xmm0"); 152 &pxor ("xmm1","xmm1"); 153 &pxor ("xmm2","xmm2"); 154 &pxor ("xmm3","xmm3"); 155 &pxor ("xmm4","xmm4"); 156 &pxor ("xmm5","xmm5"); 157 &pxor ("xmm6","xmm6"); 158 &pxor ("xmm7","xmm7"); 159 &set_label("no_sse2"); 160 } 161 # just a bunch of fldz to zap the fp/mm bank followed by finit... 162 &data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b); 163&set_label("no_x87"); 164 &lea ("eax",&DWP(4,"esp")); 165 &ret (); 166&function_end_B("OPENSSL_wipe_cpu"); 167 168&function_begin_B("OPENSSL_atomic_add"); 169 &mov ("edx",&DWP(4,"esp")); # fetch the pointer, 1st arg 170 &mov ("ecx",&DWP(8,"esp")); # fetch the increment, 2nd arg 171 &push ("ebx"); 172 &nop (); 173 &mov ("eax",&DWP(0,"edx")); 174&set_label("spin"); 175 &lea ("ebx",&DWP(0,"eax","ecx")); 176 &nop (); 177 &data_word(0x1ab10ff0); # lock; cmpxchg %ebx,(%edx) # %eax is envolved and is always reloaded 178 &jne (&label("spin")); 179 &mov ("eax","ebx"); # OpenSSL expects the new value 180 &pop ("ebx"); 181 &ret (); 182&function_end_B("OPENSSL_atomic_add"); 183 184# This function can become handy under Win32 in situations when 185# we don't know which calling convention, __stdcall or __cdecl(*), 186# indirect callee is using. In C it can be deployed as 187# 188#ifdef OPENSSL_CPUID_OBJ 189# type OPENSSL_indirect_call(void *f,...); 190# ... 191# OPENSSL_indirect_call(func,[up to $max arguments]); 192#endif 193# 194# (*) it's designed to work even for __fastcall if number of 195# arguments is 1 or 2! 196&function_begin_B("OPENSSL_indirect_call"); 197 { 198 my $i,$max=7; # $max has to be chosen as 4*n-1 199 # in order to preserve eventual 200 # stack alignment 201 &push ("ebp"); 202 &mov ("ebp","esp"); 203 &sub ("esp",$max*4); 204 &mov ("ecx",&DWP(12,"ebp")); 205 &mov (&DWP(0,"esp"),"ecx"); 206 &mov ("edx",&DWP(16,"ebp")); 207 &mov (&DWP(4,"esp"),"edx"); 208 for($i=2;$i<$max;$i++) 209 { 210 # Some copies will be redundant/bogus... 211 &mov ("eax",&DWP(12+$i*4,"ebp")); 212 &mov (&DWP(0+$i*4,"esp"),"eax"); 213 } 214 &call_ptr (&DWP(8,"ebp"));# make the call... 215 &mov ("esp","ebp"); # ... and just restore the stack pointer 216 # without paying attention to what we called, 217 # (__cdecl *func) or (__stdcall *one). 218 &pop ("ebp"); 219 &ret (); 220 } 221&function_end_B("OPENSSL_indirect_call"); 222 223&initseg("OPENSSL_cpuid_setup"); 224 225&asm_finish(); 226