xref: /freebsd/crypto/openssl/crypto/x86cpuid.pl (revision aa0a1e58)
1#!/usr/bin/env perl
2
3push(@INC,"perlasm");
4require "x86asm.pl";
5
6&asm_init($ARGV[0],"x86cpuid");
7
8for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
9
10&function_begin("OPENSSL_ia32_cpuid");
11	&xor	("edx","edx");
12	&pushf	();
13	&pop	("eax");
14	&mov	("ecx","eax");
15	&xor	("eax",1<<21);
16	&push	("eax");
17	&popf	();
18	&pushf	();
19	&pop	("eax");
20	&xor	("ecx","eax");
21	&bt	("ecx",21);
22	&jnc	(&label("done"));
23	&xor	("eax","eax");
24	&cpuid	();
25	&xor	("eax","eax");
26	&cmp	("ebx",0x756e6547);	# "Genu"
27	&data_byte(0x0f,0x95,0xc0);	#&setne	(&LB("eax"));
28	&mov	("ebp","eax");
29	&cmp	("edx",0x49656e69);	# "ineI"
30	&data_byte(0x0f,0x95,0xc0);	#&setne	(&LB("eax"));
31	&or	("ebp","eax");
32	&cmp	("ecx",0x6c65746e);	# "ntel"
33	&data_byte(0x0f,0x95,0xc0);	#&setne	(&LB("eax"));
34	&or	("ebp","eax");
35	&mov	("eax",1);
36	&cpuid	();
37	&cmp	("ebp",0);
38	&jne	(&label("notP4"));
39	&and	("eax",15<<8);		# familiy ID
40	&cmp	("eax",15<<8);		# P4?
41	&jne	(&label("notP4"));
42	&or	("edx",1<<20);		# use reserved bit to engage RC4_CHAR
43&set_label("notP4");
44	&bt	("edx",28);		# test hyper-threading bit
45	&jnc	(&label("done"));
46	&shr	("ebx",16);
47	&and	("ebx",0xff);
48	&cmp	("ebx",1);		# see if cache is shared(*)
49	&ja	(&label("done"));
50	&and	("edx",0xefffffff);	# clear hyper-threading bit if not
51&set_label("done");
52	&mov	("eax","edx");
53	&mov	("edx","ecx");
54&function_end("OPENSSL_ia32_cpuid");
55# (*)	on Core2 this value is set to 2 denoting the fact that L2
56#	cache is shared between cores.
57
58&external_label("OPENSSL_ia32cap_P");
59
60&function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
61	&xor	("eax","eax");
62	&xor	("edx","edx");
63	&picmeup("ecx","OPENSSL_ia32cap_P");
64	&bt	(&DWP(0,"ecx"),4);
65	&jnc	(&label("notsc"));
66	&rdtsc	();
67&set_label("notsc");
68	&ret	();
69&function_end_B("OPENSSL_rdtsc");
70
71# This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host],
72# but it's safe to call it on any [supported] 32-bit platform...
73# Just check for [non-]zero return value...
74&function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
75	&picmeup("ecx","OPENSSL_ia32cap_P");
76	&bt	(&DWP(0,"ecx"),4);
77	&jnc	(&label("nohalt"));	# no TSC
78
79	&data_word(0x9058900e);		# push %cs; pop %eax
80	&and	("eax",3);
81	&jnz	(&label("nohalt"));	# not enough privileges
82
83	&pushf	();
84	&pop	("eax")
85	&bt	("eax",9);
86	&jnc	(&label("nohalt"));	# interrupts are disabled
87
88	&rdtsc	();
89	&push	("edx");
90	&push	("eax");
91	&halt	();
92	&rdtsc	();
93
94	&sub	("eax",&DWP(0,"esp"));
95	&sbb	("edx",&DWP(4,"esp"));
96	&add	("esp",8);
97	&ret	();
98
99&set_label("nohalt");
100	&xor	("eax","eax");
101	&xor	("edx","edx");
102	&ret	();
103&function_end_B("OPENSSL_instrument_halt");
104
105# Essentially there is only one use for this function. Under DJGPP:
106#
107#	#include <go32.h>
108#	...
109#	i=OPENSSL_far_spin(_dos_ds,0x46c);
110#	...
111# to obtain the number of spins till closest timer interrupt.
112
113&function_begin_B("OPENSSL_far_spin");
114	&pushf	();
115	&pop	("eax")
116	&bt	("eax",9);
117	&jnc	(&label("nospin"));	# interrupts are disabled
118
119	&mov	("eax",&DWP(4,"esp"));
120	&mov	("ecx",&DWP(8,"esp"));
121	&data_word (0x90d88e1e);	# push %ds, mov %eax,%ds
122	&xor	("eax","eax");
123	&mov	("edx",&DWP(0,"ecx"));
124	&jmp	(&label("spin"));
125
126	&align	(16);
127&set_label("spin");
128	&inc	("eax");
129	&cmp	("edx",&DWP(0,"ecx"));
130	&je	(&label("spin"));
131
132	&data_word (0x1f909090);	# pop	%ds
133	&ret	();
134
135&set_label("nospin");
136	&xor	("eax","eax");
137	&xor	("edx","edx");
138	&ret	();
139&function_end_B("OPENSSL_far_spin");
140
141&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
142	&xor	("eax","eax");
143	&xor	("edx","edx");
144	&picmeup("ecx","OPENSSL_ia32cap_P");
145	&mov	("ecx",&DWP(0,"ecx"));
146	&bt	(&DWP(0,"ecx"),1);
147	&jnc	(&label("no_x87"));
148	if ($sse2) {
149		&bt	(&DWP(0,"ecx"),26);
150		&jnc	(&label("no_sse2"));
151		&pxor	("xmm0","xmm0");
152		&pxor	("xmm1","xmm1");
153		&pxor	("xmm2","xmm2");
154		&pxor	("xmm3","xmm3");
155		&pxor	("xmm4","xmm4");
156		&pxor	("xmm5","xmm5");
157		&pxor	("xmm6","xmm6");
158		&pxor	("xmm7","xmm7");
159	&set_label("no_sse2");
160	}
161	# just a bunch of fldz to zap the fp/mm bank followed by finit...
162	&data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b);
163&set_label("no_x87");
164	&lea	("eax",&DWP(4,"esp"));
165	&ret	();
166&function_end_B("OPENSSL_wipe_cpu");
167
168&function_begin_B("OPENSSL_atomic_add");
169	&mov	("edx",&DWP(4,"esp"));	# fetch the pointer, 1st arg
170	&mov	("ecx",&DWP(8,"esp"));	# fetch the increment, 2nd arg
171	&push	("ebx");
172	&nop	();
173	&mov	("eax",&DWP(0,"edx"));
174&set_label("spin");
175	&lea	("ebx",&DWP(0,"eax","ecx"));
176	&nop	();
177	&data_word(0x1ab10ff0);	# lock;	cmpxchg	%ebx,(%edx)	# %eax is envolved and is always reloaded
178	&jne	(&label("spin"));
179	&mov	("eax","ebx");	# OpenSSL expects the new value
180	&pop	("ebx");
181	&ret	();
182&function_end_B("OPENSSL_atomic_add");
183
184# This function can become handy under Win32 in situations when
185# we don't know which calling convention, __stdcall or __cdecl(*),
186# indirect callee is using. In C it can be deployed as
187#
188#ifdef OPENSSL_CPUID_OBJ
189#	type OPENSSL_indirect_call(void *f,...);
190#	...
191#	OPENSSL_indirect_call(func,[up to $max arguments]);
192#endif
193#
194# (*)	it's designed to work even for __fastcall if number of
195#	arguments is 1 or 2!
196&function_begin_B("OPENSSL_indirect_call");
197	{
198	my $i,$max=7;		# $max has to be chosen as 4*n-1
199				# in order to preserve eventual
200				# stack alignment
201	&push	("ebp");
202	&mov	("ebp","esp");
203	&sub	("esp",$max*4);
204	&mov	("ecx",&DWP(12,"ebp"));
205	&mov	(&DWP(0,"esp"),"ecx");
206	&mov	("edx",&DWP(16,"ebp"));
207	&mov	(&DWP(4,"esp"),"edx");
208	for($i=2;$i<$max;$i++)
209		{
210		# Some copies will be redundant/bogus...
211		&mov	("eax",&DWP(12+$i*4,"ebp"));
212		&mov	(&DWP(0+$i*4,"esp"),"eax");
213		}
214	&call_ptr	(&DWP(8,"ebp"));# make the call...
215	&mov	("esp","ebp");	# ... and just restore the stack pointer
216				# without paying attention to what we called,
217				# (__cdecl *func) or (__stdcall *one).
218	&pop	("ebp");
219	&ret	();
220	}
221&function_end_B("OPENSSL_indirect_call");
222
223&initseg("OPENSSL_cpuid_setup");
224
225&asm_finish();
226