xref: /openbsd/lib/libcrypto/sha/asm/sha1-alpha.pl (revision 73471bf0)
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# SHA1 block procedure for Alpha.
11
12# On 21264 performance is 33% better than code generated by vendor
13# compiler, and 75% better than GCC [3.4], and in absolute terms is
14# 8.7 cycles per processed byte. Implementation features vectorized
15# byte swap, but not Xupdate.
16
17@X=(	"\$0",	"\$1",	"\$2",	"\$3",	"\$4",	"\$5",	"\$6",	"\$7",
18	"\$8",	"\$9",	"\$10",	"\$11",	"\$12",	"\$13",	"\$14",	"\$15");
19$ctx="a0";	# $16
20$inp="a1";
21$num="a2";
22$A="a3";
23$B="a4";	# 20
24$C="a5";
25$D="t8";
26$E="t9";	@V=($A,$B,$C,$D,$E);
27$t0="t10";	# 24
28$t1="t11";
29$t2="ra";
30$t3="t12";
31$K="AT";	# 28
32
33sub BODY_00_19 {
34my ($i,$a,$b,$c,$d,$e)=@_;
35my $j=$i+1;
36$code.=<<___ if ($i==0);
37	ldq_u	@X[0],0+0($inp)
38	ldq_u	@X[1],0+7($inp)
39___
40$code.=<<___ if (!($i&1) && $i<14);
41	ldq_u	@X[$i+2],($i+2)*4+0($inp)
42	ldq_u	@X[$i+3],($i+2)*4+7($inp)
43___
44$code.=<<___ if (!($i&1) && $i<15);
45	extql	@X[$i],$inp,@X[$i]
46	extqh	@X[$i+1],$inp,@X[$i+1]
47
48	or	@X[$i+1],@X[$i],@X[$i]	# pair of 32-bit values are fetched
49
50	srl	@X[$i],24,$t0		# vectorized byte swap
51	srl	@X[$i],8,$t2
52
53	sll	@X[$i],8,$t3
54	sll	@X[$i],24,@X[$i]
55	zapnot	$t0,0x11,$t0
56	zapnot	$t2,0x22,$t2
57
58	zapnot	@X[$i],0x88,@X[$i]
59	or	$t0,$t2,$t0
60	zapnot	$t3,0x44,$t3
61	sll	$a,5,$t1
62
63	or	@X[$i],$t0,@X[$i]
64	addl	$K,$e,$e
65	and	$b,$c,$t2
66	zapnot	$a,0xf,$a
67
68	or	@X[$i],$t3,@X[$i]
69	srl	$a,27,$t0
70	bic	$d,$b,$t3
71	sll	$b,30,$b
72
73	extll	@X[$i],4,@X[$i+1]	# extract upper half
74	or	$t2,$t3,$t2
75	addl	@X[$i],$e,$e
76
77	addl	$t1,$e,$e
78	srl	$b,32,$t3
79	zapnot	@X[$i],0xf,@X[$i]
80
81	addl	$t0,$e,$e
82	addl	$t2,$e,$e
83	or	$t3,$b,$b
84___
85$code.=<<___ if (($i&1) && $i<15);
86	sll	$a,5,$t1
87	addl	$K,$e,$e
88	and	$b,$c,$t2
89	zapnot	$a,0xf,$a
90
91	srl	$a,27,$t0
92	addl	@X[$i%16],$e,$e
93	bic	$d,$b,$t3
94	sll	$b,30,$b
95
96	or	$t2,$t3,$t2
97	addl	$t1,$e,$e
98	srl	$b,32,$t3
99	zapnot	@X[$i],0xf,@X[$i]
100
101	addl	$t0,$e,$e
102	addl	$t2,$e,$e
103	or	$t3,$b,$b
104___
105$code.=<<___ if ($i>=15);	# with forward Xupdate
106	sll	$a,5,$t1
107	addl	$K,$e,$e
108	and	$b,$c,$t2
109	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
110
111	zapnot	$a,0xf,$a
112	addl	@X[$i%16],$e,$e
113	bic	$d,$b,$t3
114	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
115
116	srl	$a,27,$t0
117	addl	$t1,$e,$e
118	or	$t2,$t3,$t2
119	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
120
121	sll	$b,30,$b
122	addl	$t0,$e,$e
123	srl	@X[$j%16],31,$t1
124
125	addl	$t2,$e,$e
126	srl	$b,32,$t3
127	addl	@X[$j%16],@X[$j%16],@X[$j%16]
128
129	or	$t3,$b,$b
130	zapnot	@X[$i%16],0xf,@X[$i%16]
131	or	$t1,@X[$j%16],@X[$j%16]
132___
133}
134
135sub BODY_20_39 {
136my ($i,$a,$b,$c,$d,$e)=@_;
137my $j=$i+1;
138$code.=<<___ if ($i<79);	# with forward Xupdate
139	sll	$a,5,$t1
140	addl	$K,$e,$e
141	zapnot	$a,0xf,$a
142	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
143
144	sll	$b,30,$t3
145	addl	$t1,$e,$e
146	xor	$b,$c,$t2
147	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
148
149	srl	$b,2,$b
150	addl	@X[$i%16],$e,$e
151	xor	$d,$t2,$t2
152	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
153
154	srl	@X[$j%16],31,$t1
155	addl	$t2,$e,$e
156	srl	$a,27,$t0
157	addl	@X[$j%16],@X[$j%16],@X[$j%16]
158
159	or	$t3,$b,$b
160	addl	$t0,$e,$e
161	or	$t1,@X[$j%16],@X[$j%16]
162___
163$code.=<<___ if ($i<77);
164	zapnot	@X[$i%16],0xf,@X[$i%16]
165___
166$code.=<<___ if ($i==79);	# with context fetch
167	sll	$a,5,$t1
168	addl	$K,$e,$e
169	zapnot	$a,0xf,$a
170	ldl	@X[0],0($ctx)
171
172	sll	$b,30,$t3
173	addl	$t1,$e,$e
174	xor	$b,$c,$t2
175	ldl	@X[1],4($ctx)
176
177	srl	$b,2,$b
178	addl	@X[$i%16],$e,$e
179	xor	$d,$t2,$t2
180	ldl	@X[2],8($ctx)
181
182	srl	$a,27,$t0
183	addl	$t2,$e,$e
184	ldl	@X[3],12($ctx)
185
186	or	$t3,$b,$b
187	addl	$t0,$e,$e
188	ldl	@X[4],16($ctx)
189___
190}
191
192sub BODY_40_59 {
193my ($i,$a,$b,$c,$d,$e)=@_;
194my $j=$i+1;
195$code.=<<___;	# with forward Xupdate
196	sll	$a,5,$t1
197	addl	$K,$e,$e
198	zapnot	$a,0xf,$a
199	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
200
201	srl	$a,27,$t0
202	and	$b,$c,$t2
203	and	$b,$d,$t3
204	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
205
206	sll	$b,30,$b
207	addl	$t1,$e,$e
208	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
209
210	srl	@X[$j%16],31,$t1
211	addl	$t0,$e,$e
212	or	$t2,$t3,$t2
213	and	$c,$d,$t3
214
215	or	$t2,$t3,$t2
216	srl	$b,32,$t3
217	addl	@X[$i%16],$e,$e
218	addl	@X[$j%16],@X[$j%16],@X[$j%16]
219
220	or	$t3,$b,$b
221	addl	$t2,$e,$e
222	or	$t1,@X[$j%16],@X[$j%16]
223	zapnot	@X[$i%16],0xf,@X[$i%16]
224___
225}
226
227$code=<<___;
228#include <machine/asm.h>
229
230.text
231
232.set	noat
233.set	noreorder
234.globl	sha1_block_data_order
235.align	5
236.ent	sha1_block_data_order
237sha1_block_data_order:
238	lda	sp,-64(sp)
239	stq	ra,0(sp)
240	stq	s0,8(sp)
241	stq	s1,16(sp)
242	stq	s2,24(sp)
243	stq	s3,32(sp)
244	stq	s4,40(sp)
245	stq	s5,48(sp)
246	stq	fp,56(sp)
247	.mask	0x0400fe00,-64
248	.frame	sp,64,ra
249	.prologue 0
250
251	ldl	$A,0($ctx)
252	ldl	$B,4($ctx)
253	sll	$num,6,$num
254	ldl	$C,8($ctx)
255	ldl	$D,12($ctx)
256	ldl	$E,16($ctx)
257	addq	$inp,$num,$num
258
259.Lloop:
260	.set	noreorder
261	ldah	$K,23170(zero)
262	zapnot	$B,0xf,$B
263	lda	$K,31129($K)	# K_00_19
264___
265for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
266
267$code.=<<___;
268	ldah	$K,28378(zero)
269	lda	$K,-5215($K)	# K_20_39
270___
271for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
272
273$code.=<<___;
274	ldah	$K,-28900(zero)
275	lda	$K,-17188($K)	# K_40_59
276___
277for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
278
279$code.=<<___;
280	ldah	$K,-13725(zero)
281	lda	$K,-15914($K)	# K_60_79
282___
283for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
284
285$code.=<<___;
286	addl	@X[0],$A,$A
287	addl	@X[1],$B,$B
288	addl	@X[2],$C,$C
289	addl	@X[3],$D,$D
290	addl	@X[4],$E,$E
291	stl	$A,0($ctx)
292	stl	$B,4($ctx)
293	addq	$inp,64,$inp
294	stl	$C,8($ctx)
295	stl	$D,12($ctx)
296	stl	$E,16($ctx)
297	cmpult	$inp,$num,$t1
298	bne	$t1,.Lloop
299
300	.set	noreorder
301	ldq	ra,0(sp)
302	ldq	s0,8(sp)
303	ldq	s1,16(sp)
304	ldq	s2,24(sp)
305	ldq	s3,32(sp)
306	ldq	s4,40(sp)
307	ldq	s5,48(sp)
308	ldq	fp,56(sp)
309	lda	sp,64(sp)
310	ret	(ra)
311.end	sha1_block_data_order
312.ascii	"SHA1 block transform for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
313.align	2
314___
315$output=shift and open STDOUT,">$output";
316print $code;
317close STDOUT;
318