1! Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
2!
3! Licensed under the Apache License 2.0 (the "License").  You may not use
4! this file except in compliance with the License.  You can obtain a copy
5! in the file LICENSE in the source distribution or at
6! https://www.openssl.org/source/license.html
7
8#if defined(__SUNPRO_C) && defined(__sparcv9)
9# define ABI64  /* They've said -xarch=v9 at command line */
10#elif defined(__GNUC__) && defined(__arch64__)
11# define ABI64  /* They've said -m64 at command line */
12#endif
13
14#ifdef ABI64
15  .register	%g2,#scratch
16  .register	%g3,#scratch
17# define	FRAME	-192
18# define	BIAS	2047
19#else
20# define	FRAME	-96
21# define	BIAS	0
22#endif
23
24.text
25.align	32
26.global	OPENSSL_wipe_cpu
27.type	OPENSSL_wipe_cpu,#function
28! Keep in mind that this does not excuse us from wiping the stack!
29! This routine wipes registers, but not the backing store [which
30! resides on the stack, toward lower addresses]. To facilitate for
31! stack wiping I return pointer to the top of stack of the *caller*.
32OPENSSL_wipe_cpu:
33	save	%sp,FRAME,%sp
34	nop
35#ifdef __sun
36#include <sys/trap.h>
37	ta	ST_CLEAN_WINDOWS
38#else
39	call	.walk.reg.wins
40#endif
41	nop
42	call	.PIC.zero.up
43	mov	.zero-(.-4),%o0
44	ld	[%o0],%f0
45	ld	[%o0],%f1
46
47	subcc	%g0,1,%o0
48	! Following is V9 "rd %ccr,%o0" instruction. However! V8
49	! specification says that it ("rd %asr2,%o0" in V8 terms) does
50	! not cause illegal_instruction trap. It therefore can be used
51	! to determine if the CPU the code is executing on is V8- or
52	! V9-compliant, as V9 returns a distinct value of 0x99,
53	! "negative" and "borrow" bits set in both %icc and %xcc.
54	.word	0x91408000	!rd	%ccr,%o0
55	cmp	%o0,0x99
56	bne	.v8
57	nop
58			! Even though we do not use %fp register bank,
59			! we wipe it as memcpy might have used it...
60			.word	0xbfa00040	!fmovd	%f0,%f62
61			.word	0xbba00040	!...
62			.word	0xb7a00040
63			.word	0xb3a00040
64			.word	0xafa00040
65			.word	0xaba00040
66			.word	0xa7a00040
67			.word	0xa3a00040
68			.word	0x9fa00040
69			.word	0x9ba00040
70			.word	0x97a00040
71			.word	0x93a00040
72			.word	0x8fa00040
73			.word	0x8ba00040
74			.word	0x87a00040
75			.word	0x83a00040	!fmovd	%f0,%f32
76.v8:			fmovs	%f1,%f31
77	clr	%o0
78			fmovs	%f0,%f30
79	clr	%o1
80			fmovs	%f1,%f29
81	clr	%o2
82			fmovs	%f0,%f28
83	clr	%o3
84			fmovs	%f1,%f27
85	clr	%o4
86			fmovs	%f0,%f26
87	clr	%o5
88			fmovs	%f1,%f25
89	clr	%o7
90			fmovs	%f0,%f24
91	clr	%l0
92			fmovs	%f1,%f23
93	clr	%l1
94			fmovs	%f0,%f22
95	clr	%l2
96			fmovs	%f1,%f21
97	clr	%l3
98			fmovs	%f0,%f20
99	clr	%l4
100			fmovs	%f1,%f19
101	clr	%l5
102			fmovs	%f0,%f18
103	clr	%l6
104			fmovs	%f1,%f17
105	clr	%l7
106			fmovs	%f0,%f16
107	clr	%i0
108			fmovs	%f1,%f15
109	clr	%i1
110			fmovs	%f0,%f14
111	clr	%i2
112			fmovs	%f1,%f13
113	clr	%i3
114			fmovs	%f0,%f12
115	clr	%i4
116			fmovs	%f1,%f11
117	clr	%i5
118			fmovs	%f0,%f10
119	clr	%g1
120			fmovs	%f1,%f9
121	clr	%g2
122			fmovs	%f0,%f8
123	clr	%g3
124			fmovs	%f1,%f7
125	clr	%g4
126			fmovs	%f0,%f6
127	clr	%g5
128			fmovs	%f1,%f5
129			fmovs	%f0,%f4
130			fmovs	%f1,%f3
131			fmovs	%f0,%f2
132
133	add	%fp,BIAS,%i0	! return pointer to caller´s top of stack
134
135	ret
136	restore
137
138.zero:	.long	0x0,0x0
139.PIC.zero.up:
140	retl
141	add	%o0,%o7,%o0
142#ifdef DEBUG
143.global	walk_reg_wins
144.type	walk_reg_wins,#function
145walk_reg_wins:
146#endif
147.walk.reg.wins:
148	save	%sp,FRAME,%sp
149	cmp	%i7,%o7
150	be	2f
151	clr	%o0
152	cmp	%o7,0	! compiler never cleans %o7...
153	be	1f	! could have been a leaf function...
154	clr	%o1
155	call	.walk.reg.wins
156	nop
1571:	clr	%o2
158	clr	%o3
159	clr	%o4
160	clr	%o5
161	clr	%o7
162	clr	%l0
163	clr	%l1
164	clr	%l2
165	clr	%l3
166	clr	%l4
167	clr	%l5
168	clr	%l6
169	clr	%l7
170	add	%o0,1,%i0	! used for debugging
1712:	ret
172	restore
173.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
174
175.global	OPENSSL_atomic_add
176.type	OPENSSL_atomic_add,#function
177.align	32
178OPENSSL_atomic_add:
179#ifndef ABI64
180	subcc	%g0,1,%o2
181	.word	0x95408000	!rd	%ccr,%o2, see comment above
182	cmp	%o2,0x99
183	be	.v9
184	nop
185	save	%sp,FRAME,%sp
186	ba	.enter
187	nop
188#ifdef __sun
189! Note that you do not have to link with libthread to call thr_yield,
190! as libc provides a stub, which is overloaded the moment you link
191! with *either* libpthread or libthread...
192#define	YIELD_CPU	thr_yield
193#else
194! applies at least to Linux and FreeBSD... Feedback expected...
195#define	YIELD_CPU	sched_yield
196#endif
197.spin:	call	YIELD_CPU
198	nop
199.enter:	ld	[%i0],%i2
200	cmp	%i2,-4096
201	be	.spin
202	mov	-1,%i2
203	swap	[%i0],%i2
204	cmp	%i2,-1
205	be	.spin
206	add	%i2,%i1,%i2
207	stbar
208	st	%i2,[%i0]
209	sra	%i2,%g0,%i0
210	ret
211	restore
212.v9:
213#endif
214	ld	[%o0],%o2
2151:	add	%o1,%o2,%o3
216	.word	0xd7e2100a	!cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3
217	cmp	%o2,%o3
218	bne	1b
219	mov	%o3,%o2		! cas is always fetching to dest. register
220	add	%o1,%o2,%o0	! OpenSSL expects the new value
221	retl
222	sra	%o0,%g0,%o0	! we return signed int, remember?
223.size	OPENSSL_atomic_add,.-OPENSSL_atomic_add
224
225.global	_sparcv9_rdtick
226.align	32
227_sparcv9_rdtick:
228	subcc	%g0,1,%o0
229	.word	0x91408000	!rd	%ccr,%o0
230	cmp	%o0,0x99
231	bne	.notick
232	xor	%o0,%o0,%o0
233	.word	0x91410000	!rd	%tick,%o0
234	retl
235	.word	0x93323020	!srlx	%o0,32,%o1
236.notick:
237	retl
238	xor	%o1,%o1,%o1
239.type	_sparcv9_rdtick,#function
240.size	_sparcv9_rdtick,.-_sparcv9_rdtick
241
242.global	_sparcv9_vis1_probe
243.align	8
244_sparcv9_vis1_probe:
245	add	%sp,BIAS+2,%o1
246	.word	0xc19a5a40	!ldda	[%o1]ASI_FP16_P,%f0
247	retl
248	.word	0x81b00d80	!fxor	%f0,%f0,%f0
249.type	_sparcv9_vis1_probe,#function
250.size	_sparcv9_vis1_probe,.-_sparcv9_vis1_probe
251
252! Probe and instrument VIS1 instruction. Output is number of cycles it
253! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit
254! is slow (documented to be 6 cycles on T2) and the core is in-order
255! single-issue, it should be possible to distinguish Tx reliably...
256! Observed return values are:
257!
258!	UltraSPARC IIe		7
259!	UltraSPARC III		7
260!	UltraSPARC T1		24
261!	SPARC T4		65(*)
262!
263! (*)	result has lesser to do with VIS instruction latencies, rdtick
264!	appears that slow, but it does the trick in sense that FP and
265!	VIS code paths are still slower than integer-only ones.
266!
267! Numbers for T2 and SPARC64 V-VII are more than welcomed.
268!
269! It would be possible to detect specifically US-T1 by instrumenting
270! fmul8ulx16, which is emulated on T1 and as such accounts for quite
271! a lot of %tick-s, couple of thousand on Linux...
272.global	_sparcv9_vis1_instrument
273.align	8
274_sparcv9_vis1_instrument:
275	.word	0x81b00d80	!fxor	%f0,%f0,%f0
276	.word	0x85b08d82	!fxor	%f2,%f2,%f2
277	.word	0x91410000	!rd	%tick,%o0
278	.word	0x81b00d80	!fxor	%f0,%f0,%f0
279	.word	0x85b08d82	!fxor	%f2,%f2,%f2
280	.word	0x93410000	!rd	%tick,%o1
281	.word	0x81b00d80	!fxor	%f0,%f0,%f0
282	.word	0x85b08d82	!fxor	%f2,%f2,%f2
283	.word	0x95410000	!rd	%tick,%o2
284	.word	0x81b00d80	!fxor	%f0,%f0,%f0
285	.word	0x85b08d82	!fxor	%f2,%f2,%f2
286	.word	0x97410000	!rd	%tick,%o3
287	.word	0x81b00d80	!fxor	%f0,%f0,%f0
288	.word	0x85b08d82	!fxor	%f2,%f2,%f2
289	.word	0x99410000	!rd	%tick,%o4
290
291	! calculate intervals
292	sub	%o1,%o0,%o0
293	sub	%o2,%o1,%o1
294	sub	%o3,%o2,%o2
295	sub	%o4,%o3,%o3
296
297	! find minimum value
298	cmp	%o0,%o1
299	.word	0x38680002	!bgu,a	%xcc,.+8
300	mov	%o1,%o0
301	cmp	%o0,%o2
302	.word	0x38680002	!bgu,a	%xcc,.+8
303	mov	%o2,%o0
304	cmp	%o0,%o3
305	.word	0x38680002	!bgu,a	%xcc,.+8
306	mov	%o3,%o0
307
308	retl
309	nop
310.type	_sparcv9_vis1_instrument,#function
311.size	_sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument
312
313.global	_sparcv9_vis2_probe
314.align	8
315_sparcv9_vis2_probe:
316	retl
317	.word	0x81b00980	!bshuffle	%f0,%f0,%f0
318.type	_sparcv9_vis2_probe,#function
319.size	_sparcv9_vis2_probe,.-_sparcv9_vis2_probe
320
321.global	_sparcv9_fmadd_probe
322.align	8
323_sparcv9_fmadd_probe:
324	.word	0x81b00d80	!fxor	%f0,%f0,%f0
325	.word	0x85b08d82	!fxor	%f2,%f2,%f2
326	retl
327	.word	0x81b80440	!fmaddd	%f0,%f0,%f2,%f0
328.type	_sparcv9_fmadd_probe,#function
329.size	_sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe
330
331.global	_sparcv9_rdcfr
332.align	8
333_sparcv9_rdcfr:
334	retl
335	.word	0x91468000	!rd	%asr26,%o0
336.type	_sparcv9_rdcfr,#function
337.size	_sparcv9_rdcfr,.-_sparcv9_rdcfr
338
339.global	_sparcv9_vis3_probe
340.align	8
341_sparcv9_vis3_probe:
342	retl
343	.word	0x81b022a0	!xmulx	%g0,%g0,%g0
344.type	_sparcv9_vis3_probe,#function
345.size	_sparcv9_vis3_probe,.-_sparcv9_vis3_probe
346
347.global	_sparcv9_random
348.align	8
349_sparcv9_random:
350	retl
351	.word	0x91b002a0	!random	%o0
352.type	_sparcv9_random,#function
353.size	_sparcv9_random,.-_sparcv9_vis3_probe
354
355.global	_sparcv9_fjaesx_probe
356.align	8
357_sparcv9_fjaesx_probe:
358	.word	0x81b09206	!faesencx %f2,%f6,%f0
359	retl
360	nop
361.size	_sparcv9_fjaesx_probe,.-_sparcv9_fjaesx_probe
362
363.global	OPENSSL_cleanse
364.align	32
365OPENSSL_cleanse:
366	cmp	%o1,14
367	nop
368#ifdef ABI64
369	bgu	%xcc,.Lot
370#else
371	bgu	.Lot
372#endif
373	cmp	%o1,0
374	bne	.Little
375	nop
376	retl
377	nop
378
379.Little:
380	stb	%g0,[%o0]
381	subcc	%o1,1,%o1
382	bnz	.Little
383	add	%o0,1,%o0
384	retl
385	nop
386.align	32
387.Lot:
388#ifndef ABI64
389	subcc	%g0,1,%g1
390	! see above for explanation
391	.word	0x83408000	!rd	%ccr,%g1
392	cmp	%g1,0x99
393	bne	.v8lot
394	nop
395#endif
396
397.v9lot:	andcc	%o0,7,%g0
398	bz	.v9aligned
399	nop
400	stb	%g0,[%o0]
401	sub	%o1,1,%o1
402	ba	.v9lot
403	add	%o0,1,%o0
404.align	16,0x01000000
405.v9aligned:
406	.word	0xc0720000	!stx	%g0,[%o0]
407	sub	%o1,8,%o1
408	andcc	%o1,-8,%g0
409#ifdef ABI64
410	.word	0x126ffffd	!bnz	%xcc,.v9aligned
411#else
412	.word	0x124ffffd	!bnz	%icc,.v9aligned
413#endif
414	add	%o0,8,%o0
415
416	cmp	%o1,0
417	bne	.Little
418	nop
419	retl
420	nop
421#ifndef ABI64
422.v8lot:	andcc	%o0,3,%g0
423	bz	.v8aligned
424	nop
425	stb	%g0,[%o0]
426	sub	%o1,1,%o1
427	ba	.v8lot
428	add	%o0,1,%o0
429	nop
430.v8aligned:
431	st	%g0,[%o0]
432	sub	%o1,4,%o1
433	andcc	%o1,-4,%g0
434	bnz	.v8aligned
435	add	%o0,4,%o0
436
437	cmp	%o1,0
438	bne	.Little
439	nop
440	retl
441	nop
442#endif
443.type	OPENSSL_cleanse,#function
444.size	OPENSSL_cleanse,.-OPENSSL_cleanse
445
446.global	CRYPTO_memcmp
447.align	16
448CRYPTO_memcmp:
449	cmp	%o2,0
450#ifdef ABI64
451	beq,pn	%xcc,.Lno_data
452#else
453	beq	.Lno_data
454#endif
455	xor	%g1,%g1,%g1
456	nop
457
458.Loop_cmp:
459	ldub	[%o0],%o3
460	add	%o0,1,%o0
461	ldub	[%o1],%o4
462	add	%o1,1,%o1
463	subcc	%o2,1,%o2
464	xor	%o3,%o4,%o4
465#ifdef ABI64
466	bnz	%xcc,.Loop_cmp
467#else
468	bnz	.Loop_cmp
469#endif
470	or	%o4,%g1,%g1
471
472	sub	%g0,%g1,%g1
473	srl	%g1,31,%g1
474.Lno_data:
475	retl
476	mov	%g1,%o0
477.type	CRYPTO_memcmp,#function
478.size	CRYPTO_memcmp,.-CRYPTO_memcmp
479
480.global	_sparcv9_vis1_instrument_bus
481.align	8
482_sparcv9_vis1_instrument_bus:
483	mov	%o1,%o3					! save cnt
484	.word	0x99410000	!rd	%tick,%o4	! tick
485	mov	%o4,%o5					! lasttick = tick
486	set	0,%g4					! diff
487
488	andn	%o0,63,%g1
489	.word	0xc1985e00	!ldda	[%g1]0xf0,%f0	! block load
490	.word	0x8143e040	!membar	#Sync
491	.word	0xc1b85c00	!stda	%f0,[%g1]0xe0	! block store and commit
492	.word	0x8143e040	!membar	#Sync
493	ld	[%o0],%o4
494	add	%o4,%g4,%g4
495	.word	0xc9e2100c	!cas	[%o0],%o4,%g4
496
497.Loop:	.word	0x99410000	!rd	%tick,%o4
498	sub	%o4,%o5,%g4				! diff=tick-lasttick
499	mov	%o4,%o5					! lasttick=tick
500
501	andn	%o0,63,%g1
502	.word	0xc1985e00	!ldda	[%g1]0xf0,%f0	! block load
503	.word	0x8143e040	!membar	#Sync
504	.word	0xc1b85c00	!stda	%f0,[%g1]0xe0	! block store and commit
505	.word	0x8143e040	!membar	#Sync
506	ld	[%o0],%o4
507	add	%o4,%g4,%g4
508	.word	0xc9e2100c	!cas	[%o0],%o4,%g4
509	subcc	%o1,1,%o1				! --$cnt
510	bnz	.Loop
511	add	%o0,4,%o0				! ++$out
512
513	retl
514	mov	%o3,%o0
515.type	_sparcv9_vis1_instrument_bus,#function
516.size	_sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus
517
518.global	_sparcv9_vis1_instrument_bus2
519.align	8
520_sparcv9_vis1_instrument_bus2:
521	mov	%o1,%o3					! save cnt
522	sll	%o1,2,%o1				! cnt*=4
523
524	.word	0x99410000	!rd	%tick,%o4	! tick
525	mov	%o4,%o5					! lasttick = tick
526	set	0,%g4					! diff
527
528	andn	%o0,63,%g1
529	.word	0xc1985e00	!ldda	[%g1]0xf0,%f0	! block load
530	.word	0x8143e040	!membar	#Sync
531	.word	0xc1b85c00	!stda	%f0,[%g1]0xe0	! block store and commit
532	.word	0x8143e040	!membar	#Sync
533	ld	[%o0],%o4
534	add	%o4,%g4,%g4
535	.word	0xc9e2100c	!cas	[%o0],%o4,%g4
536
537	.word	0x99410000	!rd	%tick,%o4	! tick
538	sub	%o4,%o5,%g4				! diff=tick-lasttick
539	mov	%o4,%o5					! lasttick=tick
540	mov	%g4,%g5					! lastdiff=diff
541.Loop2:
542	andn	%o0,63,%g1
543	.word	0xc1985e00	!ldda	[%g1]0xf0,%f0	! block load
544	.word	0x8143e040	!membar	#Sync
545	.word	0xc1b85c00	!stda	%f0,[%g1]0xe0	! block store and commit
546	.word	0x8143e040	!membar	#Sync
547	ld	[%o0],%o4
548	add	%o4,%g4,%g4
549	.word	0xc9e2100c	!cas	[%o0],%o4,%g4
550
551	subcc	%o2,1,%o2				! --max
552	bz	.Ldone2
553	nop
554
555	.word	0x99410000	!rd	%tick,%o4	! tick
556	sub	%o4,%o5,%g4				! diff=tick-lasttick
557	mov	%o4,%o5					! lasttick=tick
558	cmp	%g4,%g5
559	mov	%g4,%g5					! lastdiff=diff
560
561	.word	0x83408000	!rd	%ccr,%g1
562	and	%g1,4,%g1				! isolate zero flag
563	xor	%g1,4,%g1				! flip zero flag
564
565	subcc	%o1,%g1,%o1				! conditional --$cnt
566	bnz	.Loop2
567	add	%o0,%g1,%o0				! conditional ++$out
568
569.Ldone2:
570	srl	%o1,2,%o1
571	retl
572	sub	%o3,%o1,%o0
573.type	_sparcv9_vis1_instrument_bus2,#function
574.size	_sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2
575
576.section	".init",#alloc,#execinstr
577	call	OPENSSL_cpuid_setup
578	nop
579