xref: /freebsd/crypto/openssl/crypto/x86cpuid.pl (revision 10ff414c)
1#! /usr/bin/env perl
2# Copyright 2004-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License").  You may not use
5# this file except in compliance with the License.  You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10push(@INC, "${dir}perlasm", "perlasm");
11require "x86asm.pl";
12
13$output = pop;
14open OUT,">$output";
15*STDOUT=*OUT;
16
17&asm_init($ARGV[0]);
18
19for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
20
21&function_begin("OPENSSL_ia32_cpuid");
22	&xor	("edx","edx");
23	&pushf	();
24	&pop	("eax");
25	&mov	("ecx","eax");
26	&xor	("eax",1<<21);
27	&push	("eax");
28	&popf	();
29	&pushf	();
30	&pop	("eax");
31	&xor	("ecx","eax");
32	&xor	("eax","eax");
33	&mov	("esi",&wparam(0));
34	&mov	(&DWP(8,"esi"),"eax");	# clear extended feature flags
35	&bt	("ecx",21);
36	&jnc	(&label("nocpuid"));
37	&cpuid	();
38	&mov	("edi","eax");		# max value for standard query level
39
40	&xor	("eax","eax");
41	&cmp	("ebx",0x756e6547);	# "Genu"
42	&setne	(&LB("eax"));
43	&mov	("ebp","eax");
44	&cmp	("edx",0x49656e69);	# "ineI"
45	&setne	(&LB("eax"));
46	&or	("ebp","eax");
47	&cmp	("ecx",0x6c65746e);	# "ntel"
48	&setne	(&LB("eax"));
49	&or	("ebp","eax");		# 0 indicates Intel CPU
50	&jz	(&label("intel"));
51
52	&cmp	("ebx",0x68747541);	# "Auth"
53	&setne	(&LB("eax"));
54	&mov	("esi","eax");
55	&cmp	("edx",0x69746E65);	# "enti"
56	&setne	(&LB("eax"));
57	&or	("esi","eax");
58	&cmp	("ecx",0x444D4163);	# "cAMD"
59	&setne	(&LB("eax"));
60	&or	("esi","eax");		# 0 indicates AMD CPU
61	&jnz	(&label("intel"));
62
63	# AMD specific
64	&mov	("eax",0x80000000);
65	&cpuid	();
66	&cmp	("eax",0x80000001);
67	&jb	(&label("intel"));
68	&mov	("esi","eax");
69	&mov	("eax",0x80000001);
70	&cpuid	();
71	&or	("ebp","ecx");
72	&and	("ebp",1<<11|1);	# isolate XOP bit
73	&cmp	("esi",0x80000008);
74	&jb	(&label("intel"));
75
76	&mov	("eax",0x80000008);
77	&cpuid	();
78	&movz	("esi",&LB("ecx"));	# number of cores - 1
79	&inc	("esi");		# number of cores
80
81	&mov	("eax",1);
82	&xor	("ecx","ecx");
83	&cpuid	();
84	&bt	("edx",28);
85	&jnc	(&label("generic"));
86	&shr	("ebx",16);
87	&and	("ebx",0xff);
88	&cmp	("ebx","esi");
89	&ja	(&label("generic"));
90	&and	("edx",0xefffffff);	# clear hyper-threading bit
91	&jmp	(&label("generic"));
92
93&set_label("intel");
94	&cmp	("edi",4);
95	&mov	("esi",-1);
96	&jb	(&label("nocacheinfo"));
97
98	&mov	("eax",4);
99	&mov	("ecx",0);		# query L1D
100	&cpuid	();
101	&mov	("esi","eax");
102	&shr	("esi",14);
103	&and	("esi",0xfff);		# number of cores -1 per L1D
104
105&set_label("nocacheinfo");
106	&mov	("eax",1);
107	&xor	("ecx","ecx");
108	&cpuid	();
109	&and	("edx",0xbfefffff);	# force reserved bits #20, #30 to 0
110	&cmp	("ebp",0);
111	&jne	(&label("notintel"));
112	&or	("edx",1<<30);		# set reserved bit#30 on Intel CPUs
113	&and	(&HB("eax"),15);	# family ID
114	&cmp	(&HB("eax"),15);	# P4?
115	&jne	(&label("notintel"));
116	&or	("edx",1<<20);		# set reserved bit#20 to engage RC4_CHAR
117&set_label("notintel");
118	&bt	("edx",28);		# test hyper-threading bit
119	&jnc	(&label("generic"));
120	&and	("edx",0xefffffff);
121	&cmp	("esi",0);
122	&je	(&label("generic"));
123
124	&or	("edx",0x10000000);
125	&shr	("ebx",16);
126	&cmp	(&LB("ebx"),1);
127	&ja	(&label("generic"));
128	&and	("edx",0xefffffff);	# clear hyper-threading bit if not
129
130&set_label("generic");
131	&and	("ebp",1<<11);		# isolate AMD XOP flag
132	&and	("ecx",0xfffff7ff);	# force 11th bit to 0
133	&mov	("esi","edx");		# %ebp:%esi is copy of %ecx:%edx
134	&or	("ebp","ecx");		# merge AMD XOP flag
135
136	&cmp	("edi",7);
137	&mov	("edi",&wparam(0));
138	&jb	(&label("no_extended_info"));
139	&mov	("eax",7);
140	&xor	("ecx","ecx");
141	&cpuid	();
142	&mov	(&DWP(8,"edi"),"ebx");	# save extended feature flag
143&set_label("no_extended_info");
144
145	&bt	("ebp",27);		# check OSXSAVE bit
146	&jnc	(&label("clear_avx"));
147	&xor	("ecx","ecx");
148	&data_byte(0x0f,0x01,0xd0);	# xgetbv
149	&and	("eax",6);
150	&cmp	("eax",6);
151	&je	(&label("done"));
152	&cmp	("eax",2);
153	&je	(&label("clear_avx"));
154&set_label("clear_xmm");
155	&and	("ebp",0xfdfffffd);	# clear AESNI and PCLMULQDQ bits
156	&and	("esi",0xfeffffff);	# clear FXSR
157&set_label("clear_avx");
158	&and	("ebp",0xefffe7ff);	# clear AVX, FMA and AMD XOP bits
159	&and	(&DWP(8,"edi"),0xffffffdf);	# clear AVX2
160&set_label("done");
161	&mov	("eax","esi");
162	&mov	("edx","ebp");
163&set_label("nocpuid");
164&function_end("OPENSSL_ia32_cpuid");
165
166&external_label("OPENSSL_ia32cap_P");
167
168&function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
169	&xor	("eax","eax");
170	&xor	("edx","edx");
171	&picmeup("ecx","OPENSSL_ia32cap_P");
172	&bt	(&DWP(0,"ecx"),4);
173	&jnc	(&label("notsc"));
174	&rdtsc	();
175&set_label("notsc");
176	&ret	();
177&function_end_B("OPENSSL_rdtsc");
178
179# This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host],
180# but it's safe to call it on any [supported] 32-bit platform...
181# Just check for [non-]zero return value...
182&function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
183	&picmeup("ecx","OPENSSL_ia32cap_P");
184	&bt	(&DWP(0,"ecx"),4);
185	&jnc	(&label("nohalt"));	# no TSC
186
187	&data_word(0x9058900e);		# push %cs; pop %eax
188	&and	("eax",3);
189	&jnz	(&label("nohalt"));	# not enough privileges
190
191	&pushf	();
192	&pop	("eax");
193	&bt	("eax",9);
194	&jnc	(&label("nohalt"));	# interrupts are disabled
195
196	&rdtsc	();
197	&push	("edx");
198	&push	("eax");
199	&halt	();
200	&rdtsc	();
201
202	&sub	("eax",&DWP(0,"esp"));
203	&sbb	("edx",&DWP(4,"esp"));
204	&add	("esp",8);
205	&ret	();
206
207&set_label("nohalt");
208	&xor	("eax","eax");
209	&xor	("edx","edx");
210	&ret	();
211&function_end_B("OPENSSL_instrument_halt");
212
213# Essentially there is only one use for this function. Under DJGPP:
214#
215#	#include <go32.h>
216#	...
217#	i=OPENSSL_far_spin(_dos_ds,0x46c);
218#	...
219# to obtain the number of spins till closest timer interrupt.
220
221&function_begin_B("OPENSSL_far_spin");
222	&pushf	();
223	&pop	("eax");
224	&bt	("eax",9);
225	&jnc	(&label("nospin"));	# interrupts are disabled
226
227	&mov	("eax",&DWP(4,"esp"));
228	&mov	("ecx",&DWP(8,"esp"));
229	&data_word (0x90d88e1e);	# push %ds, mov %eax,%ds
230	&xor	("eax","eax");
231	&mov	("edx",&DWP(0,"ecx"));
232	&jmp	(&label("spin"));
233
234	&align	(16);
235&set_label("spin");
236	&inc	("eax");
237	&cmp	("edx",&DWP(0,"ecx"));
238	&je	(&label("spin"));
239
240	&data_word (0x1f909090);	# pop	%ds
241	&ret	();
242
243&set_label("nospin");
244	&xor	("eax","eax");
245	&xor	("edx","edx");
246	&ret	();
247&function_end_B("OPENSSL_far_spin");
248
249&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
250	&xor	("eax","eax");
251	&xor	("edx","edx");
252	&picmeup("ecx","OPENSSL_ia32cap_P");
253	&mov	("ecx",&DWP(0,"ecx"));
254	&bt	(&DWP(0,"ecx"),1);
255	&jnc	(&label("no_x87"));
256	if ($sse2) {
257		&and	("ecx",1<<26|1<<24);	# check SSE2 and FXSR bits
258		&cmp	("ecx",1<<26|1<<24);
259		&jne	(&label("no_sse2"));
260		&pxor	("xmm0","xmm0");
261		&pxor	("xmm1","xmm1");
262		&pxor	("xmm2","xmm2");
263		&pxor	("xmm3","xmm3");
264		&pxor	("xmm4","xmm4");
265		&pxor	("xmm5","xmm5");
266		&pxor	("xmm6","xmm6");
267		&pxor	("xmm7","xmm7");
268	&set_label("no_sse2");
269	}
270	# just a bunch of fldz to zap the fp/mm bank followed by finit...
271	&data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b);
272&set_label("no_x87");
273	&lea	("eax",&DWP(4,"esp"));
274	&ret	();
275&function_end_B("OPENSSL_wipe_cpu");
276
277&function_begin_B("OPENSSL_atomic_add");
278	&mov	("edx",&DWP(4,"esp"));	# fetch the pointer, 1st arg
279	&mov	("ecx",&DWP(8,"esp"));	# fetch the increment, 2nd arg
280	&push	("ebx");
281	&nop	();
282	&mov	("eax",&DWP(0,"edx"));
283&set_label("spin");
284	&lea	("ebx",&DWP(0,"eax","ecx"));
285	&nop	();
286	&data_word(0x1ab10ff0);	# lock;	cmpxchg	%ebx,(%edx)	# %eax is involved and is always reloaded
287	&jne	(&label("spin"));
288	&mov	("eax","ebx");	# OpenSSL expects the new value
289	&pop	("ebx");
290	&ret	();
291&function_end_B("OPENSSL_atomic_add");
292
293&function_begin_B("OPENSSL_cleanse");
294	&mov	("edx",&wparam(0));
295	&mov	("ecx",&wparam(1));
296	&xor	("eax","eax");
297	&cmp	("ecx",7);
298	&jae	(&label("lot"));
299	&cmp	("ecx",0);
300	&je	(&label("ret"));
301&set_label("little");
302	&mov	(&BP(0,"edx"),"al");
303	&sub	("ecx",1);
304	&lea	("edx",&DWP(1,"edx"));
305	&jnz	(&label("little"));
306&set_label("ret");
307	&ret	();
308
309&set_label("lot",16);
310	&test	("edx",3);
311	&jz	(&label("aligned"));
312	&mov	(&BP(0,"edx"),"al");
313	&lea	("ecx",&DWP(-1,"ecx"));
314	&lea	("edx",&DWP(1,"edx"));
315	&jmp	(&label("lot"));
316&set_label("aligned");
317	&mov	(&DWP(0,"edx"),"eax");
318	&lea	("ecx",&DWP(-4,"ecx"));
319	&test	("ecx",-4);
320	&lea	("edx",&DWP(4,"edx"));
321	&jnz	(&label("aligned"));
322	&cmp	("ecx",0);
323	&jne	(&label("little"));
324	&ret	();
325&function_end_B("OPENSSL_cleanse");
326
327&function_begin_B("CRYPTO_memcmp");
328	&push	("esi");
329	&push	("edi");
330	&mov	("esi",&wparam(0));
331	&mov	("edi",&wparam(1));
332	&mov	("ecx",&wparam(2));
333	&xor	("eax","eax");
334	&xor	("edx","edx");
335	&cmp	("ecx",0);
336	&je	(&label("no_data"));
337&set_label("loop");
338	&mov	("dl",&BP(0,"esi"));
339	&lea	("esi",&DWP(1,"esi"));
340	&xor	("dl",&BP(0,"edi"));
341	&lea	("edi",&DWP(1,"edi"));
342	&or	("al","dl");
343	&dec	("ecx");
344	&jnz	(&label("loop"));
345	&neg	("eax");
346	&shr	("eax",31);
347&set_label("no_data");
348	&pop	("edi");
349	&pop	("esi");
350	&ret	();
351&function_end_B("CRYPTO_memcmp");
352{
353my $lasttick = "esi";
354my $lastdiff = "ebx";
355my $out = "edi";
356my $cnt = "ecx";
357my $max = "ebp";
358
359&function_begin("OPENSSL_instrument_bus");
360    &mov	("eax",0);
361    if ($sse2) {
362	&picmeup("edx","OPENSSL_ia32cap_P");
363	&bt	(&DWP(0,"edx"),4);
364	&jnc	(&label("nogo"));	# no TSC
365	&bt	(&DWP(0,"edx"),19);
366	&jnc	(&label("nogo"));	# no CLFLUSH
367
368	&mov	($out,&wparam(0));	# load arguments
369	&mov	($cnt,&wparam(1));
370
371	# collect 1st tick
372	&rdtsc	();
373	&mov	($lasttick,"eax");	# lasttick = tick
374	&mov	($lastdiff,0);		# lastdiff = 0
375	&clflush(&DWP(0,$out));
376	&data_byte(0xf0);		# lock
377	&add	(&DWP(0,$out),$lastdiff);
378	&jmp	(&label("loop"));
379
380&set_label("loop",16);
381	&rdtsc	();
382	&mov	("edx","eax");		# put aside tick (yes, I neglect edx)
383	&sub	("eax",$lasttick);	# diff
384	&mov	($lasttick,"edx");	# lasttick = tick
385	&mov	($lastdiff,"eax");	# lastdiff = diff
386	&clflush(&DWP(0,$out));
387	&data_byte(0xf0);		# lock
388	&add	(&DWP(0,$out),"eax");	# accumulate diff
389	&lea	($out,&DWP(4,$out));	# ++$out
390	&sub	($cnt,1);		# --$cnt
391	&jnz	(&label("loop"));
392
393	&mov	("eax",&wparam(1));
394&set_label("nogo");
395    }
396&function_end("OPENSSL_instrument_bus");
397
398&function_begin("OPENSSL_instrument_bus2");
399    &mov	("eax",0);
400    if ($sse2) {
401	&picmeup("edx","OPENSSL_ia32cap_P");
402	&bt	(&DWP(0,"edx"),4);
403	&jnc	(&label("nogo"));	# no TSC
404	&bt	(&DWP(0,"edx"),19);
405	&jnc	(&label("nogo"));	# no CLFLUSH
406
407	&mov	($out,&wparam(0));	# load arguments
408	&mov	($cnt,&wparam(1));
409	&mov	($max,&wparam(2));
410
411	&rdtsc	();			# collect 1st tick
412	&mov	($lasttick,"eax");	# lasttick = tick
413	&mov	($lastdiff,0);		# lastdiff = 0
414
415	&clflush(&DWP(0,$out));
416	&data_byte(0xf0);		# lock
417	&add	(&DWP(0,$out),$lastdiff);
418
419	&rdtsc	();			# collect 1st diff
420	&mov	("edx","eax");		# put aside tick (yes, I neglect edx)
421	&sub	("eax",$lasttick);	# diff
422	&mov	($lasttick,"edx");	# lasttick = tick
423	&mov	($lastdiff,"eax");	# lastdiff = diff
424	&jmp	(&label("loop2"));
425
426&set_label("loop2",16);
427	&clflush(&DWP(0,$out));
428	&data_byte(0xf0);		# lock
429	&add	(&DWP(0,$out),"eax");	# accumulate diff
430
431	&sub	($max,1);
432	&jz	(&label("done2"));
433
434	&rdtsc	();
435	&mov	("edx","eax");		# put aside tick (yes, I neglect edx)
436	&sub	("eax",$lasttick);	# diff
437	&mov	($lasttick,"edx");	# lasttick = tick
438	&cmp	("eax",$lastdiff);
439	&mov	($lastdiff,"eax");	# lastdiff = diff
440	&mov	("edx",0);
441	&setne	("dl");
442	&sub	($cnt,"edx");		# conditional --$cnt
443	&lea	($out,&DWP(0,$out,"edx",4));	# conditional ++$out
444	&jnz	(&label("loop2"));
445
446&set_label("done2");
447	&mov	("eax",&wparam(1));
448	&sub	("eax",$cnt);
449&set_label("nogo");
450    }
451&function_end("OPENSSL_instrument_bus2");
452}
453
454sub gen_random {
455my $rdop = shift;
456&function_begin_B("OPENSSL_ia32_${rdop}_bytes");
457	&push	("edi");
458	&push	("ebx");
459	&xor	("eax","eax");		# return value
460	&mov	("edi",&wparam(0));
461	&mov	("ebx",&wparam(1));
462
463	&cmp	("ebx",0);
464	&je	(&label("done"));
465
466	&mov	("ecx",8);
467&set_label("loop");
468	&${rdop}("edx");
469	&jc	(&label("break"));
470	&loop	(&label("loop"));
471	&jmp	(&label("done"));
472
473&set_label("break",16);
474	&cmp	("ebx",4);
475	&jb	(&label("tail"));
476	&mov	(&DWP(0,"edi"),"edx");
477	&lea	("edi",&DWP(4,"edi"));
478	&add	("eax",4);
479	&sub	("ebx",4);
480	&jz	(&label("done"));
481	&mov	("ecx",8);
482	&jmp	(&label("loop"));
483
484&set_label("tail",16);
485	&mov	(&BP(0,"edi"),"dl");
486	&lea	("edi",&DWP(1,"edi"));
487	&inc	("eax");
488	&shr	("edx",8);
489	&dec	("ebx");
490	&jnz	(&label("tail"));
491
492&set_label("done");
493	&xor	("edx","edx");		# Clear random value from registers
494	&pop	("ebx");
495	&pop	("edi");
496	&ret	();
497&function_end_B("OPENSSL_ia32_${rdop}_bytes");
498}
499&gen_random("rdrand");
500&gen_random("rdseed");
501
502&initseg("OPENSSL_cpuid_setup");
503
504&hidden("OPENSSL_cpuid_setup");
505&hidden("OPENSSL_ia32cap_P");
506
507&asm_finish();
508
509close STDOUT or die "error closing STDOUT: $!";
510